diff --git a/doc/source/operations/upgrading-ceph.rst b/doc/source/operations/upgrading-ceph.rst index 838a518a4..5ce711011 100644 --- a/doc/source/operations/upgrading-ceph.rst +++ b/doc/source/operations/upgrading-ceph.rst @@ -63,7 +63,7 @@ Place the host or batch of hosts into maintenance mode: .. code-block:: console - sudo cephadm shell -- ceph orch host maintenance enter + kayobe playbook run $KAYOBE_CONFIG_PATH/ansible/ceph-enter-maintenance.yml -l To update all eligible packages, use ``*``, escaping if necessary: @@ -72,7 +72,8 @@ To update all eligible packages, use ``*``, escaping if necessary: kayobe overcloud host package update --packages "*" --limit If the kernel has been upgraded, reboot the host or batch of hosts to pick up -the change: +the change. While running this playbook, consider setting ``ANSIBLE_SERIAL`` to +the maximum number of hosts that can safely reboot concurrently. .. code-block:: console @@ -82,7 +83,7 @@ Remove the host or batch of hosts from maintenance mode: .. code-block:: console - sudo cephadm shell -- ceph orch host maintenance exit + kayobe playbook run $KAYOBE_CONFIG_PATH/ansible/ceph-exit-maintenance.yml -l Wait for Ceph health to return to ``HEALTH_OK``: diff --git a/etc/kayobe/ansible/ceph-enter-maintenance.yml b/etc/kayobe/ansible/ceph-enter-maintenance.yml new file mode 100644 index 000000000..c119583e1 --- /dev/null +++ b/etc/kayobe/ansible/ceph-enter-maintenance.yml @@ -0,0 +1,13 @@ +--- +- name: Ensure a Ceph host has entered maintenance + gather_facts: true + any_errors_fatal: true + # We need to check whether it is OK to stop hosts after previous hosts have + # entered maintenance. + serial: 1 + hosts: ceph + become: true + tasks: + - name: Ensure a Ceph host has entered maintenance + ansible.builtin.import_role: + name: stackhpc.cephadm.enter_maintenance diff --git a/etc/kayobe/ansible/ceph-exit-maintenance.yml b/etc/kayobe/ansible/ceph-exit-maintenance.yml new file mode 100644 index 000000000..a16f338d9 --- /dev/null +++ b/etc/kayobe/ansible/ceph-exit-maintenance.yml @@ -0,0 +1,12 @@ +--- +- name: Ensure a Ceph host has exited maintenance + gather_facts: true + any_errors_fatal: true + hosts: ceph + # The role currently requires hosts to exit maintenance serially. + serial: 1 + become: true + tasks: + - name: Ensure a Ceph host has exited maintenance + ansible.builtin.import_role: + name: stackhpc.cephadm.exit_maintenance diff --git a/etc/kayobe/ansible/cis.yml b/etc/kayobe/ansible/cis.yml index 48022278e..7f41ad9fa 100644 --- a/etc/kayobe/ansible/cis.yml +++ b/etc/kayobe/ansible/cis.yml @@ -35,9 +35,7 @@ - include_role: name: ansible-lockdown.rhel9_cis when: ansible_facts.os_family == 'RedHat' and ansible_facts.distribution_major_version == '9' - tags: always - include_role: name: ansible-lockdown.ubuntu22_cis when: ansible_facts.distribution == 'Ubuntu' and ansible_facts.distribution_major_version == '22' - tags: always diff --git a/etc/kayobe/ansible/prometheus-network-names.yml b/etc/kayobe/ansible/prometheus-network-names.yml index 915f57ca1..5a1f83711 100644 --- a/etc/kayobe/ansible/prometheus-network-names.yml +++ b/etc/kayobe/ansible/prometheus-network-names.yml @@ -1,3 +1,4 @@ +--- - name: Prometheus friendly network names hosts: overcloud gather_facts: no diff --git a/etc/kayobe/ansible/reboot.yml b/etc/kayobe/ansible/reboot.yml index 8810afd7f..92603ade2 100644 --- a/etc/kayobe/ansible/reboot.yml +++ b/etc/kayobe/ansible/reboot.yml @@ -2,9 +2,26 @@ - name: Reboot the host hosts: seed-hypervisor:seed:overcloud:infra-vms serial: "{{ lookup('env', 'ANSIBLE_SERIAL') | default(1, true) }}" + gather_facts: false + vars: + reboot_timeout_s: "{{ 20 * 60 }}" + reboot_with_bootstrap_user: false + ansible_user: "{{ bootstrap_user if reboot_with_bootstrap_user | bool else kayobe_ansible_user }}" + ansible_ssh_common_args: "{{ '-o StrictHostKeyChecking=no' if reboot_with_bootstrap_user | bool else '' }}" + ansible_python_interpreter: "/usr/bin/python3" tags: - reboot tasks: - name: Reboot and wait become: true reboot: + reboot_timeout: "{{ reboot_timeout_s }}" + search_paths: + # Systems running molly-guard hang waiting for confirmation before rebooting without this. + - "/lib/molly-guard" + # Default list: + - "/sbin" + - "/bin" + - "/usr/sbin" + - "/usr/bin" + - "/usr/local/sbin" diff --git a/etc/kayobe/ansible/requirements.yml b/etc/kayobe/ansible/requirements.yml index 90e653ab2..2bb4047ce 100644 --- a/etc/kayobe/ansible/requirements.yml +++ b/etc/kayobe/ansible/requirements.yml @@ -1,7 +1,7 @@ --- collections: - name: stackhpc.cephadm - version: 1.15.1 + version: 1.18.0 # NOTE: Pinning pulp.squeezer to 0.0.13 because 0.0.14+ depends on the # pulp_glue Python library being installed. - name: pulp.squeezer diff --git a/etc/kayobe/ansible/stackhpc-openstack-tests.yml b/etc/kayobe/ansible/stackhpc-openstack-tests.yml index 51fd3afdf..b99b9f91d 100644 --- a/etc/kayobe/ansible/stackhpc-openstack-tests.yml +++ b/etc/kayobe/ansible/stackhpc-openstack-tests.yml @@ -31,7 +31,7 @@ depth: 1 single_branch: true - - name: Ensure the latest versions of pip and setuptools are installed # noqa package-latest + - name: Ensure the latest versions of pip and setuptools are installed # noqa package-latest ansible.builtin.pip: name: "{{ item.name }}" state: latest diff --git a/etc/kayobe/ansible/templates/wazuh-secrets.yml.j2 b/etc/kayobe/ansible/templates/wazuh-secrets.yml.j2 index 887cc6b44..583c1efa4 100644 --- a/etc/kayobe/ansible/templates/wazuh-secrets.yml.j2 +++ b/etc/kayobe/ansible/templates/wazuh-secrets.yml.j2 @@ -7,7 +7,7 @@ secrets_wazuh: # Strengthen default wazuh api user pass wazuh_api_users: - username: "wazuh" - password: "{{ secrets_wazuh.wazuh_api_users[0].password | default(lookup('community.general.random_string', min_lower=1, min_upper=1, min_special=1, min_numeric=1, length=30)) }}" + password: "{{ secrets_wazuh.wazuh_api_users[0].password | default(lookup('community.general.random_string', min_lower=1, min_upper=1, min_special=1, min_numeric=1, length=30, override_special=override_special_characters)) }}" # OpenSearch 'admin' user pass opendistro_admin_password: "{{ secrets_wazuh.opendistro_admin_password | default(lookup('password', '/dev/null'), true) }}" # OpenSearch 'kibanaserver' user pass diff --git a/etc/kayobe/ansible/ubuntu-upgrade.yml b/etc/kayobe/ansible/ubuntu-upgrade.yml index 66ed49643..b7cfe7338 100644 --- a/etc/kayobe/ansible/ubuntu-upgrade.yml +++ b/etc/kayobe/ansible/ubuntu-upgrade.yml @@ -40,6 +40,15 @@ reboot: reboot_timeout: "{{ reboot_timeout_s }}" connect_timeout: 600 + search_paths: + # Systems running molly-guard hang waiting for confirmation before rebooting without this. + - "/lib/molly-guard" + # Default list: + - "/sbin" + - "/bin" + - "/usr/sbin" + - "/usr/bin" + - "/usr/local/sbin" become: true when: file_status.stat.exists @@ -101,6 +110,15 @@ reboot: reboot_timeout: "{{ reboot_timeout_s }}" connect_timeout: 600 + search_paths: + # Systems running molly-guard hang waiting for confirmation before rebooting without this. + - "/lib/molly-guard" + # Default list: + - "/sbin" + - "/bin" + - "/usr/sbin" + - "/usr/bin" + - "/usr/local/sbin" become: true - name: Update distribution facts diff --git a/etc/kayobe/ansible/wazuh-secrets.yml b/etc/kayobe/ansible/wazuh-secrets.yml index a1b725aba..16b0a09f7 100644 --- a/etc/kayobe/ansible/wazuh-secrets.yml +++ b/etc/kayobe/ansible/wazuh-secrets.yml @@ -3,6 +3,7 @@ gather_facts: false vars: wazuh_secrets_path: "{{ kayobe_env_config_path }}/wazuh-secrets.yml" + override_special_characters: '"#$%&()*+,-./:;<=>?@[\]^_{|}~' tasks: - name: install passlib[bcrypt] pip: diff --git a/releasenotes/notes/ceph-maintenance-4c4eb0a4f7149665.yaml b/releasenotes/notes/ceph-maintenance-4c4eb0a4f7149665.yaml new file mode 100644 index 000000000..285e396f8 --- /dev/null +++ b/releasenotes/notes/ceph-maintenance-4c4eb0a4f7149665.yaml @@ -0,0 +1,15 @@ +--- +features: + - | + Adds two new custom playbooks for placing Ceph hosts into and removing them + from maintenance: + + - ``ceph-enter-maintenance.yml`` + - ``ceph-exit-maintenance.yml`` +upgrade: + - | + Updates the ``stackhpc.cephadm`` collection to version ``1.18.0``. +fixes: + - | + Fixes an issue with idempotency in the ``stackhpc.ceph.cephadm_keys`` + plugin. diff --git a/terraform/aio/vm.tf b/terraform/aio/vm.tf index 50c0cc3dd..a0d097cbe 100644 --- a/terraform/aio/vm.tf +++ b/terraform/aio/vm.tf @@ -35,7 +35,7 @@ variable "aio_vm_subnet" { variable "aio_vm_volume_size" { type = number - default = 35 + default = 40 } variable "aio_vm_tags" {