From addefc2bea67e49c6a987c430f5140e6429c7dcd Mon Sep 17 00:00:00 2001 From: Mark Goddard Date: Wed, 17 Jul 2024 11:56:34 +0100 Subject: [PATCH 1/2] docs: Rename upgrading page --- doc/source/operations/index.rst | 2 +- .../operations/{upgrading.rst => upgrading-openstack.rst} | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) rename doc/source/operations/{upgrading.rst => upgrading-openstack.rst} (99%) diff --git a/doc/source/operations/index.rst b/doc/source/operations/index.rst index 39f1bb847..28f02d5a9 100644 --- a/doc/source/operations/index.rst +++ b/doc/source/operations/index.rst @@ -13,4 +13,4 @@ This guide is for operators of the StackHPC Kayobe configuration project. rabbitmq secret-rotation tempest - upgrading + upgrading-openstack diff --git a/doc/source/operations/upgrading.rst b/doc/source/operations/upgrading-openstack.rst similarity index 99% rename from doc/source/operations/upgrading.rst rename to doc/source/operations/upgrading-openstack.rst index 3e7bbc933..23fc8b67f 100644 --- a/doc/source/operations/upgrading.rst +++ b/doc/source/operations/upgrading-openstack.rst @@ -1,6 +1,6 @@ -========= -Upgrading -========= +=================== +Upgrading OpenStack +=================== This section describes how to upgrade from the |previous_release| OpenStack release series to |current_release|. It is based on the :kayobe-doc:`upstream From 95fcfa599d7bd1f9d4b778d7820d7be6a3796a35 Mon Sep 17 00:00:00 2001 From: Mark Goddard Date: Wed, 17 Jul 2024 11:56:55 +0100 Subject: [PATCH 2/2] docs: Add a page on upgrading Ceph --- doc/source/conf.py | 4 + doc/source/configuration/cephadm.rst | 17 +++ doc/source/operations/index.rst | 1 + doc/source/operations/upgrading-ceph.rst | 174 +++++++++++++++++++++++ 4 files changed, 196 insertions(+) create mode 100644 doc/source/operations/upgrading-ceph.rst diff --git a/doc/source/conf.py b/doc/source/conf.py index 4be81ba29..fab5a56b9 100644 --- a/doc/source/conf.py +++ b/doc/source/conf.py @@ -32,16 +32,19 @@ current_series = "2023.1" previous_series = "zed" branch = f"stackhpc/{current_series}" +ceph_series = "quincy" # Substitutions loader rst_prolog = """ .. |current_release| replace:: {current_release} .. |current_release_git_branch_name| replace:: {current_release_git_branch_name} .. |previous_release| replace:: {previous_release} +.. |ceph_series| replace:: {ceph_series} """.format( # noqa: E501 current_release_git_branch_name=branch, current_release=current_series, previous_release=previous_series, + ceph_series=ceph_series, ) # -- General configuration ---------------------------------------------------- @@ -125,3 +128,4 @@ extlinks["skc-doc"] = (f"https://stackhpc-kayobe-config.readthedocs.io/en/stackhpc-{current_series}/", "%s documentation") extlinks["kayobe-renos"] = (f"https://docs.openstack.org/releasenotes/kayobe/{current_series}.html", "%s release notes") extlinks["kolla-ansible-renos"] = (f"https://docs.openstack.org/releasenotes/kolla-ansible/{current_series}.html", "%s release notes") +extlinks["ceph-doc"] = (f"https://docs.ceph.com/en/{ceph_series}/", "%s documentation") diff --git a/doc/source/configuration/cephadm.rst b/doc/source/configuration/cephadm.rst index 76077511e..a259b42bc 100644 --- a/doc/source/configuration/cephadm.rst +++ b/doc/source/configuration/cephadm.rst @@ -103,6 +103,23 @@ Default variables for configuring Ceph are provided in but you will likely need to set ``cephadm_osd_spec`` to define the OSD specification. +Ceph release +~~~~~~~~~~~~ + +The Ceph release series is not strictly dependent upon the StackHPC OpenStack +release, however this configuration does define a default Ceph release series +and container image tag. The default release series is currently |ceph_series|. + +If you wish to use a different Ceph release series, set +``cephadm_ceph_release``. + +If you wish to use different Ceph container image tags, set the following +variables: + +* ``cephadm_image_tag`` +* ``cephadm_haproxy_image_tag`` +* ``cephadm_keepalived_image_tag`` + OSD specification ~~~~~~~~~~~~~~~~~ diff --git a/doc/source/operations/index.rst b/doc/source/operations/index.rst index 28f02d5a9..825384c4b 100644 --- a/doc/source/operations/index.rst +++ b/doc/source/operations/index.rst @@ -14,3 +14,4 @@ This guide is for operators of the StackHPC Kayobe configuration project. secret-rotation tempest upgrading-openstack + upgrading-ceph diff --git a/doc/source/operations/upgrading-ceph.rst b/doc/source/operations/upgrading-ceph.rst new file mode 100644 index 000000000..838a518a4 --- /dev/null +++ b/doc/source/operations/upgrading-ceph.rst @@ -0,0 +1,174 @@ +============== +Upgrading Ceph +============== + +This section describes show to upgrade from one version of Ceph to another. +The Ceph upgrade procedure is described :ceph-doc:`here `. + +The Ceph release series is not strictly dependent upon the StackHPC OpenStack +release, however this configuration does define a default Ceph release series +and container image tag. The default release series is currently |ceph_series|. + +Prerequisites +============= + +Before starting the upgrade, ensure any appropriate prerequisites are +satisfied. These will be specific to each deployment, but here are some +suggestions: + +* Ensure that expected test suites are passing, e.g. Tempest. +* Resolve any Prometheus alerts. +* Check for unexpected ``ERROR`` or ``CRITICAL`` messages in OpenSearch + Dashboard. +* Check Grafana dashboards. + +Consider whether the Ceph cluster needs to be upgraded within or outside of a +maintenance/change window. + +Preparation +=========== + +Ensure that the local Kayobe configuration environment is up to date. + +If you wish to use a different Ceph release series, set +``cephadm_ceph_release``. + +If you wish to use different Ceph container image tags, set the following +variables: + +* ``cephadm_image_tag`` (`tags `__) +* ``cephadm_haproxy_image_tag`` (`tags `__) +* ``cephadm_keepalived_image_tag`` (`tags `__) + +Be sure to use a tag that `matches the release series +`__. + +Upgrading Host Packages +======================= + +Prior to upgrading the Ceph storage cluster, it may be desirable to upgrade +system packages on the hosts. + +Note that these commands do not affect packages installed in containers, only +those installed on the host. + +In order to avoid downtime, it is important to control how package updates are +rolled out. In general, Ceph monitor hosts should be updated *one by one*. For +Ceph OSD hosts it may be possible to update packages in batches of hosts, +provided there is sufficient capacity to maintain data availability. + +For each host or batch of hosts, perform the following steps. + +Place the host or batch of hosts into maintenance mode: + +.. code-block:: console + + sudo cephadm shell -- ceph orch host maintenance enter + +To update all eligible packages, use ``*``, escaping if necessary: + +.. code-block:: console + + kayobe overcloud host package update --packages "*" --limit + +If the kernel has been upgraded, reboot the host or batch of hosts to pick up +the change: + +.. code-block:: console + + kayobe playbook run $KAYOBE_CONFIG_PATH/ansible/reboot.yml -l + +Remove the host or batch of hosts from maintenance mode: + +.. code-block:: console + + sudo cephadm shell -- ceph orch host maintenance exit + +Wait for Ceph health to return to ``HEALTH_OK``: + +.. code-block:: console + + ceph -s + +Wait for Prometheus alerts and errors in OpenSearch Dashboard to resolve, or +address them. + +Once happy that the system has been restored to full health, move onto the next +host or batch or hosts. + +Sync container images +===================== + +If using the local Pulp server to host Ceph images +(``stackhpc_sync_ceph_images`` is ``true``), sync the new Ceph images into the +local Pulp: + +.. code-block:: console + + kayobe playbook run $KAYOBE_CONFIG_PATH/ansible/pulp-container-{sync,publish}.yml -e stackhpc_pulp_images_kolla_filter=none + +Upgrade Ceph services +===================== + +Start the upgrade. If using the local Pulp server to host Ceph images: + +.. code-block:: console + + sudo cephadm shell -- ceph orch upgrade start --image /ceph/ceph: + +Otherwise: + +.. code-block:: console + + sudo cephadm shell -- ceph orch upgrade start --image quay.io/ceph/ceph: + +The tag should match the ``cephadm_image_tag`` variable set in `preparation +<#preparation>`_. The registry should be the address and port of the local Pulp +server. + +Check the update status: + +.. code-block:: console + + ceph orch upgrade status + +Wait for Ceph health to return to ``HEALTH_OK``: + +.. code-block:: console + + ceph -s + +Watch the cephadm logs: + +.. code-block:: console + + ceph -W cephadm + +Upgrade Cephadm +=============== + +Update the Cephadm package: + +.. code-block:: console + + kayobe playbook run $KAYOBE_CONFIG_PATH/ansible/cephadm-deploy.yml -e cephadm_package_update=true + +Testing +======= + +At this point it is recommended to perform a thorough test of the system to +catch any unexpected issues. This may include: + +* Check Prometheus, OpenSearch Dashboards and Grafana +* Smoke tests +* All applicable tempest tests +* Horizon UI inspection + +Cleaning up +=========== + +Prune unused container images: + +.. code-block:: console + + kayobe overcloud host command run -b --command "docker image prune -a -f" -l ceph