Improved SHMEM_SYMMETRIC_SIZE Validation #579
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: SOS-github-actions | |
on: [push, pull_request] | |
defaults: | |
run: | |
shell: bash | |
env: | |
SOS_INSTALL_DIR: ${{ github.workspace }}/install/sos | |
LIBFABRIC_INSTALL_DIR: ${{ github.workspace }}/install/libfabric | |
UCX_INSTALL_DIR: ${{ github.workspace }}/install/ucx | |
PORTALS4_INSTALL_DIR: ${{ github.workspace }}/install/portals4 | |
LIBEVENT_INSTALL_DIR: ${{ github.workspace }}/install/libevent | |
PRRTE_INSTALL_DIR: ${{ github.workspace }}/install/prrte | |
PMIX_INSTALL_DIR: ${{ github.workspace }}/install/pmix | |
SOS_BUILD_STATIC_OPTS: --disable-shared --enable-static | |
XPMEM_INSTALL_DIR: ${{ github.workspace }}/install/xpmem | |
SOS_PM: mpiexec.hydra | |
SOS_PM_POST: true | |
jobs: | |
OFI: | |
runs-on: ubuntu-24.04 | |
strategy: | |
fail-fast: false | |
matrix: | |
include: | |
- config_name: PMI simple | |
sos_config: --enable-pmi-simple | |
libfabric_version: v1.7.x | |
- config_name: PMI simple | |
sos_config: --enable-pmi-simple | |
libfabric_version: v1.9.x | |
- config_name: PMI simple | |
sos_config: --enable-pmi-simple | |
libfabric_version: v1.11.x | |
- config_name: PMI simple | |
sos_config: --enable-pmi-simple | |
libfabric_version: v1.13.x | |
- config_name: PMI simple | |
sos_config: --enable-pmi-simple | |
libfabric_version: v2.1.x | |
- config_name: Deprecated tests | |
sos_config: --enable-pmi-simple --enable-deprecated-tests | |
libfabric_version: v2.1.x | |
- config_name: MR-Basic, AV-map, memcpy | |
sos_config: --enable-ofi-mr=basic --enable-av-map --disable-cxx --enable-memcpy | |
--enable-pmi-simple --with-hwloc=no | |
libfabric_version: v2.1.x | |
- config_name: PMI MPI | |
sos_config: --disable-fortran --enable-pmi-mpi CC=mpicc | |
libfabric_version: v2.1.x | |
- config_name: Remove Virtual Addressing (RVA) | |
sos_config: --disable-fortran --enable-error-checking --enable-remote-virtual-addressing | |
--disable-aslr-check --enable-pmi-simple | |
libfabric_version: v2.1.x | |
- config_name: heap use malloc | |
env_setup: export SHMEM_SYMMETRIC_HEAP_USE_MALLOC=1 | |
sos_config: --disable-threads --enable-error-checking --enable-pmi-simple --with-hwloc=no | |
libfabric_version: v2.1.x | |
# too slow, times out on Github (but passes on another Ubuntu 20.04 system)... | |
#- config_name: CMA, MR Basic, RVA | |
# sos_config: --disable-fortran --with-cma --enable-error-checking --enable-profiling | |
# --enable-ofi-mr=basic --enable-av-map --enable-remote-virtual-addressing | |
# --enable-pmi-simple | |
# libfabric_version: v2.1.x | |
- config_name: XPMEM RVA | |
xpmem_version: master | |
sos_config: --with-xpmem=${XPMEM_INSTALL_DIR} --enable-error-checking | |
--enable-remote-virtual-addressing --enable-pmi-simple | |
libfabric_version: v2.1.x | |
- config_name: XPMEM shared atomics | |
xpmem_version: master | |
sos_config: --with-xpmem=${XPMEM_INSTALL_DIR} --enable-shr-atomics | |
--enable-error-checking --enable-pmi-simple | |
libfabric_version: v2.1.x | |
- config_name: RVA, thread completion | |
sos_config: --enable-error-checking --enable-remote-virtual-addressing | |
--enable-thread-completion --enable-pmi-simple | |
libfabric_version: v2.1.x | |
- config_name: huge pages, zero bounce | |
env_setup: SHMEM_SYMMETRIC_HEAP_USE_HUGE_PAGES=1 SHMEM_BOUNCE_SIZE=0 | |
sos_config: --enable-error-checking --enable-remote-virtual-addressing | |
--enable-pmi-simple --enable-ofi-fence --with-hwloc=no | |
libfabric_version: v2.1.x | |
- config_name: auto algorithms | |
env_setup: export SHMEM_BARRIER_ALGORITHM=auto; | |
export SHMEM_BCAST_ALGORITHM=auto; | |
export SHMEM_REDUCE_ALGORITHM=auto; | |
export SHMEM_COLLECT_ALGORITHM=auto; | |
export SHMEM_FCOLLECT_ALGORITHM=auto | |
sos_config: --enable-error-checking --enable-remote-virtual-addressing | |
--enable-thread-completion --enable-pmi-simple | |
libfabric_version: v2.1.x | |
- config_name: linear algorithms | |
env_setup: export SHMEM_BARRIER_ALGORITHM=linear; | |
export SHMEM_BCAST_ALGORITHM=linear; | |
export SHMEM_REDUCE_ALGORITHM=linear; | |
export SHMEM_COLLECT_ALGORITHM=linear; | |
export SHMEM_FCOLLECT_ALGORITHM=linear | |
sos_config: --enable-error-checking --enable-remote-virtual-addressing | |
--enable-pmi-simple --with-hwloc=no | |
libfabric_version: v2.1.x | |
- config_name: tree algorithms | |
env_setup: export SHMEM_BARRIER_ALGORITHM=tree; | |
export SHMEM_BCAST_ALGORITHM=tree; | |
export SHMEM_REDUCE_ALGORITHM=tree; | |
export SHMEM_OFI_STX_MAX=0 | |
sos_config: --enable-error-checking --enable-remote-virtual-addressing | |
--enable-pmi-simple --enable-manual-progress | |
libfabric_version: v2.1.x | |
- config_name: dissem/recdbl algorithms | |
env_setup: export SHMEM_BARRIER_ALGORITHM=dissem; | |
export SHMEM_REDUCE_ALGORITHM=recdbl; | |
export SHMEM_FCOLLECT_ALGORITHM=recdbl; | |
export SHMEM_OFI_STX_AUTO=1 | |
sos_config: --enable-error-checking --enable-remote-virtual-addressing | |
--enable-pmi-simple --enable-manual-progress --enable-hard-polling --with-hwloc=no | |
libfabric_version: v2.1.x | |
- config_name: ring reduce algorithm | |
env_setup: export SHMEM_REDUCE_ALGORITHM=ring | |
export SHMEM_SCAN_ALGORITHM=ring | |
sos_config: --enable-error-checking --enable-pmi-simple | |
libfabric_version: v2.1.x | |
- config_name: ring fcollect algorithm, tx/rx single poll limit | |
env_setup: export SHMEM_FCOLLECT_ALGORITHM=ring; | |
export SHMEM_OFI_TX_POLL_LIMIT=1; | |
export SHMEM_OFI_RX_POLL_LIMIT=1; | |
export SHMEM_OFI_STX_THRESHOLD=1024 | |
sos_config: --enable-error-checking --enable-remote-virtual-addressing | |
--enable-pmi-simple | |
libfabric_version: v2.1.x | |
- config_name: STX random | |
env_setup: export SHMEM_OFI_STX_MAX=8; | |
export SHMEM_OFI_STX_ALLOCATOR=random | |
sos_config: --enable-error-checking --enable-remote-virtual-addressing | |
--enable-pmi-simple | |
libfabric_version: v2.1.x | |
- config_name: RPM build | |
env_setup: export SOS_CHECK_TARBALL_RPM=1 | |
sos_config: --enable-pmi-simple | |
rpm_build: true | |
libfabric_version: v2.1.x | |
- config_name: Lengthy tests | |
sos_config: --enable-lengthy-tests --enable-pmi-simple | |
libfabric_version: v2.1.x | |
- config_name: Manpages | |
sos_config: --enable-manpages --enable-pmi-simple | |
libfabric_version: v2.1.x | |
- config_name: Without OFI inject | |
sos_config: --disable-ofi-inject --enable-pmi-simple | |
libfabric_version: v2.1.x | |
- config_name: Without Non-fetch AMO | |
sos_config: --disable-nonfetch-amo --enable-pmi-simple | |
libfabric_version: v2.1.x | |
name: OFI ${{ matrix.libfabric_version }} (${{ matrix.config_name }}) | |
steps: | |
- name: Checking OS version | |
run: | | |
echo "OS_NAME=$(lsb_release -si)-$(lsb_release -sr)" >> $GITHUB_ENV | |
- name: Save Config Parameters | |
run: | | |
DEMANGLE_CONFIG=$(echo ${{ matrix.sos_config }} | sed 's/ //g' | sed -E 's/(-)\1+/-/g' | sed -E 's/[^=a-zA-Z0-9._\-]/\+/g') | |
echo "config param is $DEMANGLE_CONFIG" | |
echo "CONFIG_PARAM=$DEMANGLE_CONFIG" >> $GITHUB_ENV | |
- uses: actions/checkout@v4 | |
with: | |
fetch-depth: 0 | |
submodules: 'true' | |
- name: Install dependencies | |
run: | | |
sudo apt-get -y update | |
sudo apt-get install -y gfortran rpm mpich libmpich-dev libhwloc-dev gdb | |
sudo sysctl -w kernel.yama.ptrace_scope=0 | |
sudo sysctl -w kernel.randomize_va_space=0 | |
# LIBFABRIC | |
- name: Cache libfabric install | |
id: cache-libfabric | |
uses: actions/cache@v4 | |
with: | |
path: ${{ env.LIBFABRIC_INSTALL_DIR }} | |
key: libfabric-${{ matrix.libfabric_version }}-${{ env.OS_NAME }} | |
- name: Checkout libfabric | |
if: steps.cache-libfabric.outputs.cache-hit != 'true' | |
uses: actions/checkout@v4 | |
with: | |
repository: ofiwg/libfabric | |
path: repos/libfabric | |
ref: ${{ matrix.libfabric_version }} | |
- name: Build libfabric | |
if: steps.cache-libfabric.outputs.cache-hit != 'true' | |
run: | | |
cd repos/libfabric | |
./autogen.sh | |
mkdir build; cd build | |
../configure --prefix=${LIBFABRIC_INSTALL_DIR} | |
make -j | |
make install | |
# XPMEM | |
- name: Cache XPMEM install | |
if: ${{ matrix.xpmem_version }} | |
id: cache-xpmem | |
uses: actions/cache@v4 | |
with: | |
path: ${{ env.XPMEM_INSTALL_DIR }} | |
key: xpmem-${{ matrix.xpmem_version }}-${{ env.OS_NAME }} | |
- name: Checkout XPMEM | |
if: ${{ matrix.xpmem_version }} | |
uses: actions/checkout@v4 | |
with: | |
repository: openucx/xpmem | |
path: repos/xpmem | |
ref: ${{ matrix.xpmem_version }} | |
- name: Build XPMEM | |
if: ${{ matrix.xpmem_version }} | |
run: | | |
cd repos/xpmem | |
sudo apt-get -y install linux-headers-`uname -r` | |
./autogen.sh | |
./configure --prefix=${XPMEM_INSTALL_DIR} | |
make -j | |
sudo make install | |
sudo insmod ${XPMEM_INSTALL_DIR}/lib/modules/`uname -r`/kernel/xpmem/xpmem.ko | |
sudo chown `whoami` /dev/xpmem | |
# SOS | |
- name: Build SOS (${{ matrix.sos_config }}) | |
run: | | |
git submodule update --remote | |
./autogen.sh | |
mkdir build; cd build | |
../configure --prefix=${SOS_INSTALL_DIR} --with-ofi=${LIBFABRIC_INSTALL_DIR} ${{ matrix.sos_config }} | |
make -j | |
make install | |
- name: Configure Core Analysis (${{ matrix.sos_config }}) | |
run: | | |
bash ${GITHUB_WORKSPACE}/.github/scripts/scan_core.sh init | |
- name: Test SOS (${{ matrix.sos_config }}) | |
run: | | |
cd build | |
make check TESTS= -j | |
${{ matrix.env_setup }} | |
ulimit -c unlimited | |
ulimit -a | |
SHMEM_DEBUG=1 SHMEM_INFO=1 SHMEM_OFI_PROVIDER=sockets FI_PROVIDER=sockets \ | |
make VERBOSE=1 TEST_RUNNER="${SOS_PM} -np 2 timeout --signal=ABRT 15m" check | |
cat modules/tests-sos/test/unit/hello.log | |
- name: Scanning for Core Dumps (${{ matrix.sos_config }}) | |
if: always() | |
run: | | |
bash ${GITHUB_WORKSPACE}/.github/scripts/scan_core.sh scan | |
- name: Test RPM (${{ matrix.rpm_build }}) | |
if: ${{ matrix.rpm_build }} | |
run: | | |
# FIXME: RPM with a prefix not working on Ubuntu 20.04 (related to _docdir?) | |
#./configure --with-ofi=${LIBFABRIC_INSTALL_DIR} ${{ matrix.sos_config }} --enable-rpm-prefix | |
#make dist | |
#rpmbuild -ta ./sandia-openshmem-*.tar.gz --define "configargs --with-ofi=${LIBFABRIC_INSTALL_DIR} ${{ matrix.sos_config }}" --define "_prefix /usr/shmem" | |
#make clean | |
./configure --with-ofi=${LIBFABRIC_INSTALL_DIR} ${{ matrix.sos_config }} | |
make dist | |
rpmbuild -ta ./sandia-openshmem-*.tar.gz --define "configargs --with-ofi=${LIBFABRIC_INSTALL_DIR} ${{ matrix.sos_config }}" | |
# Sanity check distribution tarball | |
tar zxvf sandia-openshmem-*.tar.gz | |
sos_version=$(cat configure.ac | grep AC_INIT | awk -F"]" '{print substr($2, 4, length($2)-2)}') | |
cd "sandia-openshmem-"$sos_version | |
./autogen.sh | |
mkdir build | |
cd build | |
../configure --with-ofi=${LIBFABRIC_INSTALL_DIR} ${{ matrix.sos_config }} | |
make -j check TESTS= | |
${SOS_PM_PRE} | |
ulimit -c unlimited | |
ulimit -a | |
SHMEM_DEBUG=1 SHMEM_INFO=1 make VERBOSE=1 TEST_RUNNER="${SOS_PM} -np 2" check | |
${SOS_PM_POST} | |
- name: Archive Artifacts | |
if: always() | |
uses: actions/upload-artifact@v4 | |
with: | |
name: debug-artifacts-ofi-${{ matrix.libfabric_version }}-${{ env.CONFIG_PARAM }}-${{ github.run_number }}.${{ github.run_attempt }} | |
path: ${{ github.workspace }}/archives | |
if-no-files-found: ignore | |
# PMIx: | |
# runs-on: ubuntu-24.04 | |
# strategy: | |
# fail-fast: false | |
# matrix: | |
# include: | |
# - sos_transport: libfabric | |
# libfabric_version: v1.7.x | |
# sos_transport_config: --with-ofi=${LIBFABRIC_INSTALL_DIR} --without-ucx --without-portals4 | |
# run_all_tests: true | |
# libevent_version: release-2.1.10-stable | |
# pmix_version: v4.1.1rc2 | |
# prrte_version: v2.0 | |
# sos_pm: prun | |
# sos_pm_pre: prte --daemonize --host localhost:4 | |
# sos_pm_post: pterm | |
# - sos_transport: ucx | |
# ucx_version: v1.9.0 | |
# sos_transport_config: --with-ucx=${UCX_INSTALL_DIR} --without-ofi --without-portals4 | |
# run_all_tests: true | |
# libevent_version: release-2.1.10-stable | |
# pmix_version: v4.1.1rc2 | |
# prrte_version: v2.0 | |
# sos_pm: prun | |
# sos_pm_pre: prte --daemonize --host localhost:4 | |
# sos_pm_post: pterm | |
# - sos_transport: portals4 | |
# portals4_version: master | |
# sos_transport_config: --with-portals4=${PORTALS4_INSTALL_DIR} --without-ofi --without-ucx | |
# run_all_tests: false | |
# libevent_version: release-2.1.10-stable | |
# pmix_version: v4.1.1rc2 | |
# prrte_version: v2.0 | |
# sos_pm: prun | |
# sos_pm_pre: prte --daemonize --host localhost:4 | |
# sos_pm_post: pterm | |
# | |
# name: PMIx (${{ matrix.sos_transport }}) | |
# | |
# steps: | |
# - name: Checking OS version | |
# run: | | |
# echo "OS_NAME=$(lsb_release -si)-$(ls_release -sr)" >> $GITHUB_ENV | |
# - uses: actions/checkout@v2 | |
# - name: Install dependencies | |
# run: | | |
# sudo apt-get -y update | |
# sudo apt-get install -y gfortran libhwloc-dev libev-dev libev-libevent-dev | |
# | |
# # LIBFABRIC | |
# - name: Cache libfabric install | |
# if: ${{ matrix.sos_transport == 'libfabric' }} | |
# id: cache-libfabric | |
# uses: actions/cache@v4 | |
# with: | |
# path: ${{ env.LIBFABRIC_INSTALL_DIR }} | |
# key: libfabric-${{ matrix.libfabric_version }}-${{ env.OS_NAME }} | |
# - name: Checkout libfabric | |
# if: ${{ matrix.sos_transport == 'libfabric' && steps.cache-libfabric.outputs.cache-hit != 'true' }} | |
# uses: actions/checkout@v2 | |
# with: | |
# repository: ofiwg/libfabric | |
# path: repos/libfabric | |
# ref: ${{ matrix.libfabric_version }} | |
# - name: Build libfabric | |
# if: ${{ matrix.sos_transport == 'libfabric' && steps.cache-libfabric.outputs.cache-hit != 'true' }} | |
# run: | | |
# cd repos/libfabric | |
# ./autogen.sh | |
# mkdir build; cd build | |
# ../configure --prefix=${LIBFABRIC_INSTALL_DIR} | |
# make -j | |
# make install | |
# | |
# # UCX | |
# - name: Cache UCX install | |
# if: ${{ matrix.sos_transport == 'ucx' }} | |
# id: cache-ucx | |
# uses: actions/cache@v4 | |
# with: | |
# path: ${{ env.UCX_INSTALL_DIR }} | |
# key: ucx-${{ matrix.ucx_version }}-${{ env.OS_NAME }} | |
# - name: Checkout UCX | |
# if: ${{ matrix.sos_transport == 'ucx' && steps.cache-ucx.outputs.cache-hit != 'true' }} | |
# uses: actions/checkout@v2 | |
# with: | |
# repository: openucx/ucx | |
# path: repos/ucx | |
# ref: ${{ matrix.ucx_version }} | |
# - name: Build UCX | |
# if: ${{ matrix.sos_transport == 'ucx' && steps.cache-ucx.outputs.cache-hit != 'true' }} | |
# run: | | |
# cd repos/ucx | |
# ./autogen.sh | |
# mkdir build; cd build | |
# ../configure --prefix=${UCX_INSTALL_DIR} --enable-mt --disable-numa --without-java | |
# make -j | |
# make install | |
# | |
# ## Portals4 | |
# - name: Cache Portals4 install | |
# if: ${{ matrix.sos_transport == 'portals4' }} | |
# id: cache-portals4 | |
# uses: actions/cache@v4 | |
# with: | |
# path: ${{ env.PORTALS4_INSTALL_DIR }} | |
# key: portals4-${{ matrix.portals4_version }}-${{ env.OS_NAME }} | |
# - name: Checkout Portals4 | |
# if: ${{ matrix.sos_transport == 'portals4' && steps.cache-portals4.outputs.cache-hit != 'true' }} | |
# uses: actions/checkout@v2 | |
# with: | |
# repository: regrant/portals4 | |
# path: repos/portals4 | |
# ref: ${{ matrix.portals4_version }} | |
# - name: Build Portals4 | |
# if: ${{ matrix.sos_transport == 'portals4' && steps.cache-portals4.outputs.cache-hit != 'true' }} | |
# run: | | |
# cd repos/portals4 | |
# ./autogen.sh | |
# mkdir build; cd build | |
# ../configure --prefix=${PORTALS4_INSTALL_DIR} --enable-zero-mrs --enable-reliable-udp --disable-pmi-from-portals ${SOS_BUILD_STATIC_OPTS} | |
# make -j | |
# make install | |
# | |
# # Libevent | |
# - name: Cache libevent install | |
# id: cache-libevent | |
# uses: actions/cache@v4 | |
# with: | |
# path: ${{ env.LIBEVENT_INSTALL_DIR }} | |
# key: libevent-${{ matrix.libevent_version }}-${{ env.OS_NAME }} | |
# - name: Checkout libevent | |
# if: steps.cache-libevent.outputs.cache-hit != 'true' | |
# uses: actions/checkout@v2 | |
# with: | |
# repository: libevent/libevent | |
# path: repos/libevent | |
# ref: ${{ matrix.libevent_version }} | |
# - name: Build libevent | |
# if: steps.cache-libevent.outputs.cache-hit != 'true' | |
# run: | | |
# cd repos/libevent | |
# ./autogen.sh | |
# ./configure --prefix=${LIBEVENT_INSTALL_DIR} | |
# make clean all install | |
# | |
# # PMIx | |
# - name: Cache PMIx install | |
# id: cache-pmix | |
# uses: actions/cache@v4 | |
# with: | |
# path: ${{ env.PMIX_INSTALL_DIR }} | |
# key: pmix-${{ matrix.pmix_version }}-${{ env.OS_NAME }} | |
# - name: Checkout PMIx | |
# if: steps.cache-pmix.outputs.cache-hit != 'true' | |
# uses: actions/checkout@v2 | |
# with: | |
# repository: openpmix/pmix | |
# path: repos/pmix | |
# ref: ${{ matrix.pmix_version }} | |
# - name: Build PMIx | |
# if: steps.cache-pmix.outputs.cache-hit != 'true' | |
# run: | | |
# cd repos/pmix | |
# ./autogen.pl | |
# ./configure --prefix=${PMIX_INSTALL_DIR} --with-libevent=${LIBEVENT_INSTALL_DIR} --without-libev --disable-debug CFLAGS=-O3 ${SOS_BUILD_STATIC_OPTS} | |
# make install | |
# | |
# # Build PRRTE | |
# - name: Cache PRRTE install | |
# id: cache-prrte | |
# uses: actions/cache@v4 | |
# with: | |
# path: ${{ env.PRRTE_INSTALL_DIR }} | |
# key: prrte-${{ matrix.prrte_version}}-${{ env.OS_NAME }} | |
# - name: Checkout PRRTE | |
# if: steps.cache-prrte.outputs.cache-hit != 'true' | |
# uses: actions/checkout@v2 | |
# with: | |
# repository: openpmix/prrte | |
# path: repos/prrte | |
# ref: ${{ matrix.prrte_version }} | |
# - name: Build PRRTE | |
# if: steps.cache-prrte.outputs.cache-hit != 'true' | |
# run: | | |
# cd repos/prrte | |
# ./autogen.pl | |
# ./configure --prefix=${PRRTE_INSTALL_DIR} --with-pmix=${PMIX_INSTALL_DIR} --without-slurm --with-libevent=${LIBEVENT_INSTALL_DIR} --without-libev ${SOS_BUILD_STATIC_OPTS} | |
# make install | |
# | |
# # SOS | |
# - name: Build SOS | |
# run: | | |
# ./autogen.sh | |
# mkdir build; cd build | |
# ../configure --prefix=${SOS_INSTALL_DIR} ${{ matrix.sos_transport_config }} --with-pmix=${PMIX_INSTALL_DIR} | |
# make -j | |
# make install | |
# - name: Test SOS (all tests) | |
# if: ${{ matrix.sos_transport == 'libfabric' || matrix.sos_transport == 'ucx' }} | |
# run: | | |
# cd build | |
# make check TESTS= -j | |
# export PATH=${PRRTE_INSTALL_DIR}/bin:$PATH | |
# ${{ matrix.sos_pm_pre }} | |
# SHMEM_INFO=1 make VERBOSE=1 TEST_RUNNER="${{ matrix.sos_pm }} -np 2" check | |
# cat modules/tests-sos/test/unit/hello.log | |
# ${{ matrix.sos_pm_post }} | |
# - name: Test SOS (hello) | |
# if: ${{ matrix.sos_transport == 'portals4' }} | |
# run: | | |
# cd build | |
# make check TESTS= -j | |
# export PATH=${PRRTE_INSTALL_DIR}/bin:$PATH | |
# ${{ matrix.sos_pm_pre }} | |
# SHMEM_INFO=1 ${{ matrix.sos_pm }} -np 1 modules/tests-sos/test/unit/hello | |
# ${{ matrix.sos_pm_post }} | |
# | |
UCX: | |
runs-on: ubuntu-24.04 | |
strategy: | |
fail-fast: false | |
matrix: | |
include: | |
- config_name: ucx-1.18.0 | |
ucx_version: v1.18.0 | |
xpmem_version: master | |
sos_config: [--enable-pmi-simple --disable-fortran, | |
--with-cma --enable-error-checking --enable-profiling | |
--enable-pmi-simple --disable-fortran --with-hwloc=no, | |
--with-xpmem --enable-error-checking --enable-pmi-simple --with-hwloc=no] | |
steps: | |
- name: Checking OS version | |
run: | | |
echo "OS_NAME=$(lsb_release -si)-$(lsb_release -sr)" >> $GITHUB_ENV | |
- name: Save Config Parameters | |
run: | | |
DEMANGLE_CONFIG=$(echo ${{ matrix.sos_config }} | sed 's/ //g' | sed -E 's/(-)\1+/-/g' | sed -E 's/[^=a-zA-Z0-9._\-]/\+/g') | |
echo "config param is $DEMANGLE_CONFIG" | |
echo "CONFIG_PARAM=$DEMANGLE_CONFIG" >> $GITHUB_ENV | |
- uses: actions/checkout@v4 | |
with: | |
fetch-depth: 0 | |
submodules: 'true' | |
- name: Install dependencies | |
run: | | |
sudo apt-get -y update | |
sudo apt-get install -y gfortran mpich libmpich-dev gdb | |
sudo sysctl -w kernel.yama.ptrace_scope=0 | |
sudo sysctl -w kernel.randomize_va_space=0 | |
# XPMEM | |
- name: Checkout XPMEM | |
uses: actions/checkout@v4 | |
with: | |
repository: openucx/xpmem | |
path: repos/xpmem | |
ref: ${{ matrix.xpmem_version }} | |
- name: Build XPMEM | |
run: | | |
cd repos/xpmem | |
sudo apt-get -y install linux-headers-`uname -r` | |
./autogen.sh | |
./configure --prefix=/usr | |
make -j | |
sudo make install | |
sudo insmod /usr/lib/modules/`uname -r`/kernel/xpmem/xpmem.ko | |
sudo chown `whoami` /dev/xpmem | |
# UCX | |
- name: Cache UCX install | |
id: cache-ucx | |
uses: actions/cache@v4 | |
with: | |
path: ${{ env.UCX_INSTALL_DIR }} | |
key: ucx-${{ matrix.ucx_version}}-${{ env.OS_NAME }} | |
- name: Checkout UCX | |
if: steps.cache-ucx.outputs.cache-hit != 'true' | |
uses: actions/checkout@v4 | |
with: | |
repository: openucx/ucx | |
path: repos/ucx | |
ref: ${{ matrix.ucx_version }} | |
- name: Build UCX | |
if: steps.cache-ucx.outputs.cache-hit != 'true' | |
run: | | |
cd repos/ucx | |
./autogen.sh | |
mkdir build; cd build | |
../configure --prefix=${UCX_INSTALL_DIR} --enable-mt --disable-numa --without-java | |
make -j | |
make install | |
# SOS | |
- name: Build SOS (${{ matrix.sos_config }}) | |
run: | | |
git submodule update --remote | |
./autogen.sh | |
mkdir build; cd build | |
../configure --prefix=${SOS_INSTALL_DIR} --with-ucx=${UCX_INSTALL_DIR} ${{ matrix.sos_config }} | |
make -j | |
make install | |
- name: Configure Core Analysis (${{ matrix.sos_config }}) | |
run: | | |
bash ${GITHUB_WORKSPACE}/.github/scripts/scan_core.sh init | |
- name: Test SOS (${{ matrix.sos_config }}) | |
continue-on-error: true | |
run: | | |
cd build | |
make check TESTS= -j | |
ulimit -c unlimited | |
ulimit -a | |
SHMEM_DEBUG=1 SHMEM_INFO=1 make VERBOSE=1 TEST_RUNNER="${SOS_PM} -np 2 timeout --signal=ABRT 15m" check | |
cat modules/tests-sos/test/unit/hello.log | |
- name: Scanning for Core Dumps (${{ matrix.sos_config }}) | |
if: always() | |
run: | | |
bash ${GITHUB_WORKSPACE}/.github/scripts/scan_core.sh scan | |
- name: Archive Artifacts | |
if: always() | |
uses: actions/upload-artifact@v4 | |
with: | |
name: debug-artifacts-${{ matrix.config_name }}-${{ env.CONFIG_PARAM }}-${{ github.run_number }}.${{ github.run_attempt }} | |
path: ${{ github.workspace }}/archives | |
if-no-files-found: ignore | |
Portals4: | |
runs-on: ubuntu-24.04 | |
strategy: | |
fail-fast: false | |
matrix: | |
include: | |
- config_name: XPMEM with Shared Atomics | |
sos_config: --with-xpmem --enable-shr-atomics --enable-error-checking --enable-pmi-simple | |
portals4_version: master | |
xpmem_version: master | |
# - name: Heap use malloc | |
# env_setup: export SHMEM_SYMMETRIC_HEAP_USE_MALLOC=1 | |
# sos_config: --disable-threads --enable-error-checking --enable-pmi-simple --with-hwloc=no | |
# - name: Heap use huge pages, zero bounce | |
# env_setup: export SHMEM_SYMMETRIC_HEAP_USE_HUGE_PAGES=1; export SHMEM_BOUNCE_SIZE=0 | |
# sos_config: --enable-error-checking --enable-remote-virtual-addressing --enable-pmi-simple --enable-ofi-fence | |
# sos_config: [--enable-pmi-simple --with-hwloc=no, | |
# --disable-fortran --enable-error-checking --enable-remote-virtual-addressing | |
# --disable-aslr-check --enable-pmi-simple, | |
# --with-cma --enable-error-checking --enable-profiling | |
# --enable-remote-virtual-addressing --enable-pmi-simple, | |
# --enable-pmi-mpi CC=mpicc --disable-fortran --with-hwloc=no] | |
steps: | |
- name: Checking OS version | |
run: | | |
echo "OS_NAME=$(lsb_release -si)-$(lsb_release -sr)" >> $GITHUB_ENV | |
- name: Save Config Parameters | |
run: | | |
DEMANGLE_CONFIG=$(echo ${{ matrix.sos_config }} | sed 's/ //g' | sed -E 's/(-)\1+/-/g' | sed -E 's/[^=a-zA-Z0-9._\-]/\+/g') | |
echo "config param is $DEMANGLE_CONFIG" | |
echo "CONFIG_PARAM=$DEMANGLE_CONFIG" >> $GITHUB_ENV | |
- uses: actions/checkout@v4 | |
with: | |
fetch-depth: 0 | |
submodules: 'true' | |
- name: Install dependencies | |
run: | | |
sudo apt-get -y update | |
sudo apt-get install -y gfortran mpich libmpich-dev libev-dev libev-libevent-dev libhwloc-dev gdb | |
sudo sysctl -w kernel.yama.ptrace_scope=0 | |
sudo sysctl -w kernel.randomize_va_space=0 | |
# XPMEM | |
- name: Checkout XPMEM | |
uses: actions/checkout@v4 | |
with: | |
repository: openucx/xpmem | |
path: repos/xpmem | |
ref: ${{ matrix.xpmem_version }} | |
- name: Build XPMEM | |
run: | | |
cd repos/xpmem | |
sudo apt-get -y install linux-headers-`uname -r` | |
./autogen.sh | |
./configure --prefix=/usr | |
make -j | |
sudo make install | |
sudo insmod /usr/lib/modules/`uname -r`/kernel/xpmem/xpmem.ko | |
sudo chown `whoami` /dev/xpmem | |
# Portals4 | |
- name: Cache Portals4 install | |
id: cache-portals4 | |
uses: actions/cache@v4 | |
with: | |
path: ${{ env.PORTALS4_INSTALL_DIR }} | |
key: portals4-${{ matrix.portals4_version}}-${{ env.OS_NAME }} | |
- name: Checkout Portals4 | |
if: steps.cache-portals4.outputs.cache-hit != 'true' | |
uses: actions/checkout@v4 | |
with: | |
repository: regrant/portals4 | |
path: repos/portals4 | |
ref: ${{ matrix.portals4_version }} | |
- name: Build Portals4 | |
if: steps.cache-portals4.outputs.cache-hit != 'true' | |
run: | | |
cd repos/portals4 | |
./autogen.sh | |
sed -i 's/^struct ptl_abort_state abort_state;/static struct ptl_abort_state abort_state;/' src/ib/ptl_gbl.h | |
mkdir build; cd build | |
../configure --prefix=${PORTALS4_INSTALL_DIR} --enable-zero-mrs --enable-reliable-udp --disable-pmi-from-portals | |
make -j | |
make install | |
# SOS | |
- name: Build SOS (${{ matrix.name }}) | |
run: | | |
git submodule update --remote | |
./autogen.sh | |
mkdir build; cd build | |
../configure --prefix=${SOS_INSTALL_DIR} --with-portals4=${PORTALS4_INSTALL_DIR} ${{ matrix.sos_config }} | |
make -j | |
make install | |
- name: Configure Core Analysis (${{ matrix.sos_config }}) | |
run: | | |
bash ${GITHUB_WORKSPACE}/.github/scripts/scan_core.sh init | |
- name: Test SOS (${{ matrix.name }}) | |
continue-on-error: true | |
run: | | |
cd build | |
make check TESTS= -j | |
ulimit -c unlimited | |
ulimit -a | |
SHMEM_DEBUG=1 SHMEM_INFO=1 make VERBOSE=1 TEST_RUNNER="${SOS_PM} -np 2 timeout --signal=ABRT 15m" check | |
${SOS_PM} -np 1 modules/tests-sos/test/unit/hello | |
- name: Archive Artifacts | |
if: always() | |
uses: actions/upload-artifact@v4 | |
with: | |
name: debug-artifacts-${{ matrix.config_name }}-${{ env.CONFIG_PARAM }}-${{ github.run_number }}.${{ github.run_attempt }} | |
path: ${{ github.workspace }}/archives | |
if-no-files-found: ignore | |
XPMEM_Only: | |
runs-on: ubuntu-24.04 | |
strategy: | |
fail-fast: false | |
matrix: | |
include: | |
- config_name: transport_none | |
xpmem_version: master | |
sos_config: [--with-xpmem --enable-shr-atomics --enable-error-checking --enable-pmi-simple] | |
steps: | |
- name: Checking OS version | |
run: | | |
echo "OS_NAME=$(lsb_release -si)-$(lsb_release -sr)" >> $GITHUB_ENV | |
- uses: actions/checkout@v4 | |
with: | |
fetch-depth: 0 | |
submodules: 'true' | |
- name: Install dependencies | |
run: | | |
sudo apt-get -y update | |
sudo apt-get install -y gfortran mpich libmpich-dev libev-dev libev-libevent-dev libhwloc-dev gdb | |
sudo sysctl -w kernel.yama.ptrace_scope=0 | |
sudo sysctl -w kernel.randomize_va_space=0 | |
# XPMEM | |
- name: Checkout XPMEM | |
uses: actions/checkout@v4 | |
with: | |
repository: openucx/xpmem | |
path: repos/xpmem | |
ref: ${{ matrix.xpmem_version }} | |
- name: Build XPMEM | |
run: | | |
cd repos/xpmem | |
sudo apt-get -y install linux-headers-`uname -r` | |
./autogen.sh | |
./configure --prefix=/usr | |
make -j | |
sudo make install | |
sudo insmod /usr/lib/modules/`uname -r`/kernel/xpmem/xpmem.ko | |
sudo chown `whoami` /dev/xpmem | |
# SOS | |
- name: Build SOS (${{ matrix.name }}) | |
run: | | |
git submodule update --remote | |
./autogen.sh | |
mkdir build; cd build | |
../configure --prefix=${SOS_INSTALL_DIR} ${{ matrix.sos_config }} | |
make -j | |
make install | |
- name: Configure Core Analysis (${{ matrix.sos_config }}) | |
run: | | |
bash ${GITHUB_WORKSPACE}/.github/scripts/scan_core.sh init | |
- name: Test SOS (${{ matrix.name }}) | |
run: | | |
cd build | |
make check TESTS= -j | |
ulimit -c unlimited | |
ulimit -a | |
SHMEM_DEBUG=1 SHMEM_INFO=1 make VERBOSE=1 TEST_RUNNER="${SOS_PM} -np 2 timeout --signal=ABRT 15m" check | |
${SOS_PM} -np 1 modules/tests-sos/test/unit/hello | |
- name: Scanning for Core Dumps (${{ matrix.sos_config }}) | |
if: always() | |
run: | | |
bash ${GITHUB_WORKSPACE}/.github/scripts/scan_core.sh scan | |
- name: Archive Artifacts | |
if: always() | |
uses: actions/upload-artifact@v4 | |
with: | |
name: debug-artifacts-${{ matrix.config_name }}-${{ env.CONFIG_PARAM }}-${{ github.run_number }}.${{ github.run_attempt }} | |
path: ${{ github.workspace }}/archives | |
if-no-files-found: ignore |