diff --git a/ansible/adhoc/cudatests.yml b/ansible/adhoc/cudatests.yml index f571f8a89..8c325158a 100644 --- a/ansible/adhoc/cudatests.yml +++ b/ansible/adhoc/cudatests.yml @@ -7,3 +7,8 @@ - ansible.builtin.import_role: name: cuda tasks_from: samples.yml + + - name: Run CUDA bandwidth tasks + ansible.builtin.import_role: + name: cuda + tasks_from: bandwidth.yml diff --git a/ansible/roles/cuda/defaults/main.yml b/ansible/roles/cuda/defaults/main.yml index 692301d23..14d3d90f7 100644 --- a/ansible/roles/cuda/defaults/main.yml +++ b/ansible/roles/cuda/defaults/main.yml @@ -16,3 +16,6 @@ cuda_samples_programs: - bandwidthTest # cuda_devices: # discovered from deviceQuery run cuda_persistenced_state: started +# variables for nvbandwidth (for bandwidth.yml tasks run in cudatests.yml) +cuda_bandwidth_path: "/var/lib/{{ ansible_user }}/cuda_bandwidth" +cuda_bandwidth_release_url: "https://github.com/NVIDIA/nvbandwidth/archive/refs/tags/v0.8.tar.gz" diff --git a/ansible/roles/cuda/tasks/bandwidth.yml b/ansible/roles/cuda/tasks/bandwidth.yml new file mode 100644 index 000000000..0d18088f6 --- /dev/null +++ b/ansible/roles/cuda/tasks/bandwidth.yml @@ -0,0 +1,56 @@ +--- +- name: Ensure cuda_bandwidth_path exists + ansible.builtin.file: + state: directory + path: "{{ cuda_bandwidth_path }}" + owner: "{{ ansible_user }}" + group: "{{ ansible_user }}" + mode: "0755" + +- name: Download CUDA bandwith test release + ansible.builtin.unarchive: + remote_src: true + src: "{{ cuda_bandwidth_release_url }}" + dest: "{{ cuda_bandwidth_path }}" + owner: "{{ ansible_user }}" + group: "{{ ansible_user }}" + creates: "{{ cuda_bandwidth_path }}/nvbandwidth-0.8" + +- name: Creates CUDA bandwidth test build directory + ansible.builtin.file: + state: directory + path: "{{ cuda_bandwidth_path }}/nvbandwidth-0.8/build" + mode: "0755" + +- name: Ensure cudatests directory exists + ansible.builtin.file: + path: "{{ appliances_environment_root }}/cudatests" + state: directory + mode: '0755' + delegate_to: localhost + +- name: Build CUDA bandwidth test + ansible.builtin.shell: + cmd: | + source /cvmfs/software.eessi.io/versions/2023.06/init/bash && + module load Boost/1.82.0-GCC-12.3.0 && + . /etc/profile.d/sh.local && cmake .. && + make -j {{ ansible_processor_vcpus }} + chdir: "{{ cuda_bandwidth_path }}/nvbandwidth-0.8/build" + creates: "{{ cuda_bandwidth_path }}/nvbandwidth-0.8/build/nvbandwidth" + +- name: Run CUDA bandwidth test + ansible.builtin.shell: | + export LD_LIBRARY_PATH=/cvmfs/software.eessi.io/versions/2023.06/software/linux/x86_64/amd/zen4/software/GCCcore/12.3.0/lib64:\ + /cvmfs/software.eessi.io/versions/2023.06/software/linux/x86_64/amd/zen4/software/Boost/1.82.0-GCC-12.3.0/lib + ./nvbandwidth + args: + chdir: "{{ cuda_bandwidth_path }}/nvbandwidth-0.8/build/" + register: cuda_bandwidth_output + +- name: Save CUDA bandwidth output to bandwidth_results.txt + ansible.builtin.copy: + content: "{{ cuda_bandwidth_output.stdout }}" + dest: "{{ appliances_environment_root }}/cudatests/bandwidth_results.txt" + mode: '0644' + delegate_to: localhost