From 36dc6d10405f7181da29051230f0830d66992062 Mon Sep 17 00:00:00 2001 From: Mark Goddard Date: Fri, 17 May 2024 11:54:24 +0100 Subject: [PATCH 1/3] Add diagnostics.sh This is a copy of roles/kayobe-diagnostics/files/get_logs.sh in Kayobe. --- tools/diagnostics.sh | 147 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 147 insertions(+) create mode 100644 tools/diagnostics.sh diff --git a/tools/diagnostics.sh b/tools/diagnostics.sh new file mode 100644 index 000000000..639969575 --- /dev/null +++ b/tools/diagnostics.sh @@ -0,0 +1,147 @@ +#!/bin/bash + +# NOTE(mgoddard): This has been adapted from tests/get_logs.sh in Kolla +# Ansible. + +# Environment variables: +# $LOG_DIR is the directory to copy logs to. +# $CONFIG_DIR is the directory to copy configuration from. +# $PREVIOUS_CONFIG_DIR is the directory to copy previous configuration, prior +# to an upgrade, from. + +set +o errexit + +copy_logs() { + cp -rnL /var/lib/docker/volumes/kolla_logs/_data/* ${LOG_DIR}/kolla/ + if [[ -d ${CONFIG_DIR} ]]; then + cp -rnL ${CONFIG_DIR}/etc/kayobe/* ${LOG_DIR}/kayobe_configs + cp -rnL ${CONFIG_DIR}/etc/kolla/* ${LOG_DIR}/kolla_configs + cp -rnL /etc/kolla/* ${LOG_DIR}/kolla_node_configs + # Don't save the IPA images. + rm ${LOG_DIR}/kayobe_configs/kolla/config/ironic/ironic-agent.{kernel,initramfs} + rm ${LOG_DIR}/kolla_configs/config/ironic/ironic-agent.{kernel,initramfs} + rm ${LOG_DIR}/kolla_node_configs/ironic-http/ironic-agent.{kernel,initramfs} + rm ${LOG_DIR}/kolla_node_configs/ironic-tftp/ironic-agent.{kernel,initramfs} + fi + if [[ -n ${PREVIOUS_CONFIG_DIR} ]] && [[ -d ${PREVIOUS_CONFIG_DIR} ]]; then + mkdir -p ${LOG_DIR}/previous_{kayobe,kolla}_configs + cp -rnL ${PREVIOUS_CONFIG_DIR}/etc/kayobe/* ${LOG_DIR}/previous_kayobe_configs + cp -rnL ${PREVIOUS_CONFIG_DIR}/etc/kolla/* ${LOG_DIR}/previous_kolla_configs + # NOTE: we can't save node configs in /etc/kolla for the pervious + # release since they'll have been overwritten at this point. + # Don't save the IPA images. + rm ${LOG_DIR}/previous_kayobe_configs/kolla/config/ironic/ironic-agent.{kernel,initramfs} + rm ${LOG_DIR}/previous_kolla_configs/config/ironic/ironic-agent.{kernel,initramfs} + fi + + if [[ -d /opt/kayobe/etc/kolla ]]; then + cp -rnL /opt/kayobe/etc/kolla/* ${LOG_DIR}/kolla_build_configs/ + fi + + cp -rvnL /var/log/* ${LOG_DIR}/system_logs/ + + if [[ -x "$(command -v journalctl)" ]]; then + journalctl --no-pager > ${LOG_DIR}/system_logs/syslog.txt + journalctl --no-pager -u docker.service > ${LOG_DIR}/system_logs/docker.log + journalctl --no-pager -u vbmcd.service > ${LOG_DIR}/system_logs/vbmcd.log + journalctl --no-pager -u NetworkManager.service > ${LOG_DIR}/system_logs/NetworkManager.log + else + cp /var/log/upstart/docker.log ${LOG_DIR}/system_logs/docker.log + fi + + if [[ -d /etc/sysconfig/network-scripts/ ]]; then + cp -r /etc/sysconfig/network-scripts/ ${LOG_DIR}/system_logs/ + fi + + if [[ -d /etc/NetworkManager/system-connections/ ]]; then + cp -r /etc/NetworkManager/system-connections/ ${LOG_DIR}/system_logs/ + fi + + if [[ -d /etc/yum.repos.d/ ]]; then + cp -r /etc/yum.repos.d/ ${LOG_DIR}/system_logs/ + fi + + if [[ -d /etc/apt/sources.list.d/ ]]; then + cp -r /etc/apt/sources.list.d/ ${LOG_DIR}/system_logs/ + fi + + if [[ -d /etc/systemd/ ]]; then + cp -rL /etc/systemd/ ${LOG_DIR}/system_logs/ + fi + + df -h > ${LOG_DIR}/system_logs/df.txt + # Gather disk usage statistics for files and directories larger than 1MB + du -d 5 -hx / | sort -hr | grep '^[0-9\.]*[MGT]' > ${LOG_DIR}/system_logs/du.txt + free > ${LOG_DIR}/system_logs/free.txt + cat /etc/hosts > ${LOG_DIR}/system_logs/hosts.txt + parted -l > ${LOG_DIR}/system_logs/parted-l.txt + mount > ${LOG_DIR}/system_logs/mount.txt + env > ${LOG_DIR}/system_logs/env.txt + ip address > ${LOG_DIR}/system_logs/ip-address.txt + ip route > ${LOG_DIR}/system_logs/ip-route.txt + ip route show table all > ${LOG_DIR}/system_logs/ip-route-all-tables.txt + ip rule list > ${LOG_DIR}/system_logs/ip-rule-list.txt + + iptables-save > ${LOG_DIR}/system_logs/iptables.txt + + if [ `command -v dpkg` ]; then + dpkg -l > ${LOG_DIR}/system_logs/dpkg-l.txt + fi + if [ `command -v rpm` ]; then + rpm -qa > ${LOG_DIR}/system_logs/rpm-qa.txt + fi + + # final memory usage and process list + ps -eo user,pid,ppid,lwp,%cpu,%mem,size,rss,cmd > ${LOG_DIR}/system_logs/ps.txt + + # available entropy + cat /proc/sys/kernel/random/entropy_avail > ${LOG_DIR}/system_logs/entropy_avail.txt + + # docker related information + (docker info && docker images && docker ps -a) > ${LOG_DIR}/system_logs/docker-info.txt + + for container in $(docker ps -a --format "{{.Names}}"); do + docker logs --tail all ${container} &> ${LOG_DIR}/docker_logs/${container}.txt + done + + # Bifrost: grab config files and logs from the container. + if [[ $(docker ps -q -f name=bifrost_deploy) ]]; then + for service in dnsmasq ironic-api ironic-conductor ironic-inspector mariadb nginx rabbitmq-server; do + mkdir -p ${LOG_DIR}/kolla/$service + docker exec bifrost_deploy \ + systemctl status $service -l -n 10000 > ${LOG_DIR}/kolla/$service/${service}-systemd-status.txt + docker exec bifrost_deploy \ + journalctl -u $service --no-pager > ${LOG_DIR}/kolla/$service/${service}-journal.txt + done + docker exec -it bifrost_deploy \ + journalctl --no-pager > ${LOG_DIR}/kolla/bifrost-journal.log + for d in dnsmasq.conf ironic ironic-inspector nginx/nginx.conf; do + docker cp bifrost_deploy:/etc/$d ${LOG_DIR}/kolla_node_configs/bifrost/ + done + docker cp bifrost_deploy:/var/log/mariadb/mariadb.log ${LOG_DIR}/kolla/mariadb/ + fi + + # IPA build logs + if [[ -f /opt/kayobe/images/ipa/ipa.stderr ]] || [[ -f /opt/kayobe/images/ipa/ipa.stdout ]]; then + mkdir -p ${LOG_DIR}/kayobe + cp /opt/kayobe/images/ipa/ipa.stderr /opt/kayobe/images/ipa/ipa.stdout ${LOG_DIR}/kayobe/ + fi + + # Overcloud host image build logs + if [[ -f /opt/kayobe/images/deployment_image/deployment_image.stderr ]] || [[ -f /opt/kayobe/images/deployment_image/deployment_image.stdout ]]; then + mkdir -p ${LOG_DIR}/kayobe + cp /opt/kayobe/images/deployment_image/deployment_image.stderr /opt/kayobe/images/deployment_image/deployment_image.stdout ${LOG_DIR}/kayobe/ + fi + + # Rename files to .txt; this is so that when displayed via + # logs.openstack.org clicking results in the browser shows the + # files, rather than trying to send it to another app or make you + # download it, etc. + for f in $(find ${LOG_DIR}/{system_logs,kolla,docker_logs} -name "*.log"); do + mv $f ${f/.log/.txt} + done + + chmod -R 777 ${LOG_DIR} +} + +copy_logs From 03bf7f01f5f16a6735dcdac3ff5406b6a5cdd591 Mon Sep 17 00:00:00 2001 From: Mark Goddard Date: Fri, 17 May 2024 11:56:07 +0100 Subject: [PATCH 2/3] Add diagnostics.yml playbook This playbook runs a script that collects diagnostic information from hosts. The diagnostics are aggregated to a directory (diagnostics_path_local/inventory_hostname) on localhost. NOTE: The diagnostic information contains sensitive information such as passwords in configuration files. --- etc/kayobe/ansible/diagnostics.yml | 70 ++++++++++++++++ .../notes/diagnostics-378a6693a64d0b3c.yaml | 14 ++++ tools/diagnostics.sh | 81 +++++++------------ 3 files changed, 113 insertions(+), 52 deletions(-) create mode 100644 etc/kayobe/ansible/diagnostics.yml create mode 100644 releasenotes/notes/diagnostics-378a6693a64d0b3c.yaml diff --git a/etc/kayobe/ansible/diagnostics.yml b/etc/kayobe/ansible/diagnostics.yml new file mode 100644 index 000000000..7764698cb --- /dev/null +++ b/etc/kayobe/ansible/diagnostics.yml @@ -0,0 +1,70 @@ +--- +# This playbook runs a script that collects diagnostic information from hosts. +# The diagnostics are aggregated to a directory +# (diagnostics_path_local/inventory_hostname) on localhost. +# +# NOTE: The diagnostic information contains sensitive information such as +# passwords in configuration files. + +- name: Collect diagnostic information + hosts: seed-hypervisor:seed:overcloud:infra-vms + vars: + diagnostics_path_local: "{{ lookup('env', 'PWD') }}/diagnostics" + tasks: + - block: + - name: Create a temporary directory for diagnostics + ansible.builtin.tempfile: + state: directory + suffix: diagnostics + register: diagnostics_tmpdir + + - name: Write host variables to a file + ansible.builtin.copy: + content: "{{ hostvars[inventory_hostname].ansible_facts | to_nice_json }}" + dest: "{{ diagnostics_tmpdir.path }}/facts.json" + + - name: Run diagnostics script + ansible.builtin.script: "{{ kayobe_config_path }}/../../tools/diagnostics.sh" + become: true + failed_when: diagnostics_result.rc is not defined + register: diagnostics_result + environment: + LOG_DIR: "{{ diagnostics_tmpdir.path }}" + CONFIG_DIR: "{{ kayobe_config_path }}/../.." + + - name: Download diagnostic logs to localhost + ansible.posix.synchronize: + src: "{{ diagnostics_tmpdir.path }}/" + dest: "{{ diagnostics_path_local }}/{{ inventory_hostname }}" + mode: pull + archive: no + recursive: true + copy_links: true + verify_host: true + # For jump host + use_ssh_args: true + always: + - name: Clean up temporary directory + ansible.builtin.file: + path: "{{ diagnostics_tmpdir.path }}" + state: absent + + - name: Display diagnostics collection stdout + ansible.builtin.debug: + msg: "{{ diagnostics_result.stdout }}" + when: diagnostics_result.stdout is defined + + - name: Display diagnostics collection stderr + ansible.builtin.debug: + msg: "{{ diagnostics_result.stderr }}" + when: diagnostics_result.stderr is defined + + - name: Fail if diagnostics collection failed + ansible.builtin.fail: + msg: Diagnostics collection failed + when: diagnostics_result.rc != 0 + + - name: Display location of diagnostics archive + ansible.builtin.debug: + msg: >- + Wrote diagnostics to {{ diagnostics_path_local }} on localhost diff --git a/releasenotes/notes/diagnostics-378a6693a64d0b3c.yaml b/releasenotes/notes/diagnostics-378a6693a64d0b3c.yaml new file mode 100644 index 000000000..4e8d45ee7 --- /dev/null +++ b/releasenotes/notes/diagnostics-378a6693a64d0b3c.yaml @@ -0,0 +1,14 @@ +--- +features: + - | + Adds a new ``diagnostics.yml`` playbook that collects diagnostic + information from hosts. The diagnostics are aggregated to a directory + (``$PWD/diagnostics/`` by default) on localhost. The diagnostics include: + + * Docker container logs + * Kolla configuration files + * Log files + + *The collected diagnostic information contains sensitive information such + as passwords in configuration files.* + diff --git a/tools/diagnostics.sh b/tools/diagnostics.sh index 639969575..73d61775a 100644 --- a/tools/diagnostics.sh +++ b/tools/diagnostics.sh @@ -1,53 +1,34 @@ #!/bin/bash -# NOTE(mgoddard): This has been adapted from tests/get_logs.sh in Kolla -# Ansible. +# NOTE(mgoddard): This has been adapted from +# roles/kayobe-diagnostics/files/get_logs.sh in Kayobe. # Environment variables: # $LOG_DIR is the directory to copy logs to. -# $CONFIG_DIR is the directory to copy configuration from. -# $PREVIOUS_CONFIG_DIR is the directory to copy previous configuration, prior -# to an upgrade, from. +# TODO: Make this script more robust and use set -e. set +o errexit +set -u copy_logs() { - cp -rnL /var/lib/docker/volumes/kolla_logs/_data/* ${LOG_DIR}/kolla/ - if [[ -d ${CONFIG_DIR} ]]; then - cp -rnL ${CONFIG_DIR}/etc/kayobe/* ${LOG_DIR}/kayobe_configs - cp -rnL ${CONFIG_DIR}/etc/kolla/* ${LOG_DIR}/kolla_configs - cp -rnL /etc/kolla/* ${LOG_DIR}/kolla_node_configs - # Don't save the IPA images. - rm ${LOG_DIR}/kayobe_configs/kolla/config/ironic/ironic-agent.{kernel,initramfs} - rm ${LOG_DIR}/kolla_configs/config/ironic/ironic-agent.{kernel,initramfs} - rm ${LOG_DIR}/kolla_node_configs/ironic-http/ironic-agent.{kernel,initramfs} - rm ${LOG_DIR}/kolla_node_configs/ironic-tftp/ironic-agent.{kernel,initramfs} - fi - if [[ -n ${PREVIOUS_CONFIG_DIR} ]] && [[ -d ${PREVIOUS_CONFIG_DIR} ]]; then - mkdir -p ${LOG_DIR}/previous_{kayobe,kolla}_configs - cp -rnL ${PREVIOUS_CONFIG_DIR}/etc/kayobe/* ${LOG_DIR}/previous_kayobe_configs - cp -rnL ${PREVIOUS_CONFIG_DIR}/etc/kolla/* ${LOG_DIR}/previous_kolla_configs - # NOTE: we can't save node configs in /etc/kolla for the pervious - # release since they'll have been overwritten at this point. - # Don't save the IPA images. - rm ${LOG_DIR}/previous_kayobe_configs/kolla/config/ironic/ironic-agent.{kernel,initramfs} - rm ${LOG_DIR}/previous_kolla_configs/config/ironic/ironic-agent.{kernel,initramfs} - fi + mkdir -p ${LOG_DIR}/{docker_logs,kolla_node_configs,system_logs} + + cp -rnL /etc/kolla/* ${LOG_DIR}/kolla_node_configs + # Don't save the IPA images. + rm ${LOG_DIR}/kolla_node_configs/ironic-http/ironic-agent.{kernel,initramfs} + rm ${LOG_DIR}/kolla_node_configs/ironic-tftp/ironic-agent.{kernel,initramfs} if [[ -d /opt/kayobe/etc/kolla ]]; then + mkdir -p ${LOG_DIR}/kolla_build_configs cp -rnL /opt/kayobe/etc/kolla/* ${LOG_DIR}/kolla_build_configs/ fi cp -rvnL /var/log/* ${LOG_DIR}/system_logs/ - if [[ -x "$(command -v journalctl)" ]]; then - journalctl --no-pager > ${LOG_DIR}/system_logs/syslog.txt - journalctl --no-pager -u docker.service > ${LOG_DIR}/system_logs/docker.log - journalctl --no-pager -u vbmcd.service > ${LOG_DIR}/system_logs/vbmcd.log - journalctl --no-pager -u NetworkManager.service > ${LOG_DIR}/system_logs/NetworkManager.log - else - cp /var/log/upstart/docker.log ${LOG_DIR}/system_logs/docker.log - fi + journalctl --no-pager > ${LOG_DIR}/system_logs/syslog.log + journalctl --no-pager -u docker.service > ${LOG_DIR}/system_logs/docker.log + journalctl --no-pager -u vbmcd.service > ${LOG_DIR}/system_logs/vbmcd.log + journalctl --no-pager -u NetworkManager.service > ${LOG_DIR}/system_logs/NetworkManager.log if [[ -d /etc/sysconfig/network-scripts/ ]]; then cp -r /etc/sysconfig/network-scripts/ ${LOG_DIR}/system_logs/ @@ -81,6 +62,9 @@ copy_logs() { ip route > ${LOG_DIR}/system_logs/ip-route.txt ip route show table all > ${LOG_DIR}/system_logs/ip-route-all-tables.txt ip rule list > ${LOG_DIR}/system_logs/ip-rule-list.txt + pvs > ${LOG_DIR}/system_logs/pvs.txt + vgs > ${LOG_DIR}/system_logs/vgs.txt + lvs > ${LOG_DIR}/system_logs/lvs.txt iptables-save > ${LOG_DIR}/system_logs/iptables.txt @@ -106,42 +90,35 @@ copy_logs() { # Bifrost: grab config files and logs from the container. if [[ $(docker ps -q -f name=bifrost_deploy) ]]; then + mkdir -p ${LOG_DIR}/bifrost for service in dnsmasq ironic-api ironic-conductor ironic-inspector mariadb nginx rabbitmq-server; do - mkdir -p ${LOG_DIR}/kolla/$service + mkdir -p ${LOG_DIR}/bifrost/$service docker exec bifrost_deploy \ - systemctl status $service -l -n 10000 > ${LOG_DIR}/kolla/$service/${service}-systemd-status.txt + systemctl status $service -l -n 10000 > ${LOG_DIR}/bifrost/$service/${service}-systemd-status.txt docker exec bifrost_deploy \ - journalctl -u $service --no-pager > ${LOG_DIR}/kolla/$service/${service}-journal.txt + journalctl -u $service --no-pager > ${LOG_DIR}/bifrost/$service/${service}-journal.txt done docker exec -it bifrost_deploy \ - journalctl --no-pager > ${LOG_DIR}/kolla/bifrost-journal.log + journalctl --no-pager > ${LOG_DIR}/bifrost/bifrost-journal.log for d in dnsmasq.conf ironic ironic-inspector nginx/nginx.conf; do docker cp bifrost_deploy:/etc/$d ${LOG_DIR}/kolla_node_configs/bifrost/ done - docker cp bifrost_deploy:/var/log/mariadb/mariadb.log ${LOG_DIR}/kolla/mariadb/ + docker cp bifrost_deploy:/var/log/mariadb/mariadb.log ${LOG_DIR}/bifrost/mariadb/ fi # IPA build logs if [[ -f /opt/kayobe/images/ipa/ipa.stderr ]] || [[ -f /opt/kayobe/images/ipa/ipa.stdout ]]; then - mkdir -p ${LOG_DIR}/kayobe - cp /opt/kayobe/images/ipa/ipa.stderr /opt/kayobe/images/ipa/ipa.stdout ${LOG_DIR}/kayobe/ + mkdir -p ${LOG_DIR}/ipa + cp /opt/kayobe/images/ipa/ipa.stderr /opt/kayobe/images/ipa/ipa.stdout ${LOG_DIR}/ipa/ fi # Overcloud host image build logs if [[ -f /opt/kayobe/images/deployment_image/deployment_image.stderr ]] || [[ -f /opt/kayobe/images/deployment_image/deployment_image.stdout ]]; then - mkdir -p ${LOG_DIR}/kayobe - cp /opt/kayobe/images/deployment_image/deployment_image.stderr /opt/kayobe/images/deployment_image/deployment_image.stdout ${LOG_DIR}/kayobe/ + mkdir -p ${LOG_DIR}/deployment_image + cp /opt/kayobe/images/deployment_image/deployment_image.stderr /opt/kayobe/images/deployment_image/deployment_image.stdout ${LOG_DIR}/deployment_image/ fi - # Rename files to .txt; this is so that when displayed via - # logs.openstack.org clicking results in the browser shows the - # files, rather than trying to send it to another app or make you - # download it, etc. - for f in $(find ${LOG_DIR}/{system_logs,kolla,docker_logs} -name "*.log"); do - mv $f ${f/.log/.txt} - done - - chmod -R 777 ${LOG_DIR} + chown -R stack: ${LOG_DIR} } copy_logs From 834110b5fa320930d4dda65ffc56f1d72c7414b9 Mon Sep 17 00:00:00 2001 From: Mark Goddard Date: Sat, 13 Jan 2024 14:05:50 +0000 Subject: [PATCH 3/3] CI: Collect diagnostic information at end of aio jobs Use the new diagnostics.yml playbook. --- .github/workflows/stackhpc-all-in-one.yml | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/.github/workflows/stackhpc-all-in-one.yml b/.github/workflows/stackhpc-all-in-one.yml index d9bdf2fa0..3206a2cd9 100644 --- a/.github/workflows/stackhpc-all-in-one.yml +++ b/.github/workflows/stackhpc-all-in-one.yml @@ -179,6 +179,7 @@ jobs: OS_APPLICATION_CREDENTIAL_SECRET: ${{ secrets.OS_APPLICATION_CREDENTIAL_SECRET }} - name: Terraform Apply + id: tf_apply run: | for attempt in $(seq 5); do if terraform apply -auto-approve; then @@ -355,6 +356,7 @@ jobs: if: inputs.upgrade - name: Tempest tests + id: tempest run: | mkdir -p tempest-artifacts docker run -t --rm \ @@ -380,13 +382,29 @@ jobs: env: KAYOBE_AUTOMATION_SSH_PRIVATE_KEY: ${{ steps.ssh_key.outputs.ssh_key }} + - name: Collect diagnostic information + id: diagnostics + run: | + mkdir -p diagnostics + sudo -E docker run -t --rm \ + -v $(pwd):/stack/kayobe-automation-env/src/kayobe-config \ + -v $(pwd)/diagnostics:/stack/diagnostics \ + -e KAYOBE_ENVIRONMENT -e KAYOBE_VAULT_PASSWORD -e KAYOBE_AUTOMATION_SSH_PRIVATE_KEY \ + $KAYOBE_IMAGE \ + /stack/kayobe-automation-env/src/kayobe-config/.automation/pipeline/playbook-run.sh '$KAYOBE_CONFIG_PATH/ansible/diagnostics.yml' + env: + KAYOBE_AUTOMATION_SSH_PRIVATE_KEY: ${{ steps.ssh_key.outputs.ssh_key }} + if: ${{ !cancelled() && steps.tf_apply.outcome == 'success' }} + - name: Upload test result artifacts uses: actions/upload-artifact@v4 with: name: test-results-${{ inputs.os_distribution }}-${{ inputs.os_release }}-${{ inputs.neutron_plugin }}${{ inputs.upgrade && '-upgrade' || '' }} path: | + diagnostics/ tempest-artifacts/ sot-results/ + if: ${{ !cancelled() && (steps.tempest.outcome == 'success' || steps.stackhpc-openstack-tests.outcome == 'success' || steps.diagnostics.outcome == 'success') }} - name: Fail if any Tempest tests failed run: |