Skip to content

Commit 644f4de

Browse files
authored
Merge branch E3SM-Project/ndk/machinefiles/update-muller-machine2 (PR #7590)
Update muller-cpu and muller-gpu modules
2 parents 9dfb146 + adf6048 commit 644f4de

File tree

5 files changed

+63
-49
lines changed

5 files changed

+63
-49
lines changed

cime_config/machines/Depends.alvarez-cpu.gnu.cmake

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,3 @@ if (NOT DEBUG)
77
e3sm_deoptimize_file("${ITEM}")
88
endforeach()
99
endif()
10-
11-
12-
13-

cime_config/machines/cmake_macros/intel_muller-cpu.cmake

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -25,8 +25,8 @@ string(APPEND CMAKE_CXX_FLAGS " -fp-model=precise") # and manually add precise
2525
#message(STATUS "ndk CXXFLAGS=${CXXFLAGS}")
2626

2727
string(APPEND CMAKE_Fortran_FLAGS " -fp-model=consistent -fimf-use-svml")
28-
# string(APPEND FFLAGS " -qno-opt-dynamic-align")
29-
string(APPEND CMAKE_Fortran_FLAGS_RELEASE " -g -traceback")
30-
string(APPEND CMAKE_CXX_FLAGS_RELEASE " -g -traceback")
28+
# string(APPEND FFLAGS " -qno-opt-dynamic-align")
29+
string(APPEND CMAKE_Fortran_FLAGS_RELEASE " -g -traceback")
30+
#string(APPEND CMAKE_CXX_FLAGS_RELEASE " -g -traceback")
3131
string(APPEND CMAKE_Fortran_FLAGS " -DHAVE_ERF_INTRINSICS")
3232
string(APPEND CMAKE_CXX_FLAGS " -fp-model=consistent")

cime_config/machines/cmake_macros/nvidia_muller-cpu.cmake

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,12 @@ endif()
55
string(APPEND CMAKE_C_FLAGS_RELEASE " -O2")
66
string(APPEND CMAKE_CXX_FLAGS_RELEASE " -O2")
77
string(APPEND CMAKE_Fortran_FLAGS_RELEASE " -g")
8+
9+
# currently, there is known issue with nvidia compiler installation (not seeing all relevant include files)
10+
# and this is temporary work-around github.com/E3SM-Project/E3SM/issues/7003
11+
string(APPEND CMAKE_CXX_FLAGS_RELEASE " --gcc-toolchain=/usr/bin/gcc")
12+
string(APPEND CMAKE_CXX_FLAGS_DEBUG " --gcc-toolchain=/usr/bin/gcc")
13+
814
if (compile_threaded)
915
string(APPEND KOKKOS_OPTIONS " -DKokkos_ENABLE_OPENMP=Off") # work-around for nvidia as kokkos is not passing "-mp" for threaded build
1016
endif()

cime_config/machines/config_batch.xml

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -478,6 +478,8 @@
478478
<queues>
479479
<queue walltimemax="00:45:00" nodemax="150" default="true">regular</queue>
480480
<queue walltimemax="00:45:00" nodemax="150" strict="true">preempt</queue>
481+
<queue walltimemax="00:30:00" nodemax="150" strict="true">shared</queue>
482+
<queue walltimemax="00:30:00" nodemax="150" strict="true">overrun</queue>
481483
<queue walltimemax="00:15:00" nodemax="8" strict="true">debug</queue>
482484
</queues>
483485
</batch_system>
@@ -490,6 +492,8 @@
490492
<!-- Note: walltime is not the max walltime, but the default - see NERSC docs for Q limits, https://docs.nersc.gov/jobs/policy/ -->
491493
<queue walltimemax="00:30:00" nodemax="256" default="true">regular</queue>
492494
<queue walltimemax="00:30:00" nodemax="256" strict="true">preempt</queue>
495+
<queue walltimemax="00:30:00" nodemax="256" strict="true">shared</queue>
496+
<queue walltimemax="00:30:00" nodemax="256" strict="true">overrun</queue>
493497
<queue walltimemax="00:30:00" nodemax="8" strict="true">debug</queue>
494498
</queues>
495499
</batch_system>
@@ -519,6 +523,8 @@
519523
<queues>
520524
<queue walltimemax="00:45:00" nodemax="150" default="true">regular</queue>
521525
<queue walltimemax="00:45:00" nodemax="150" strict="true">preempt</queue>
526+
<queue walltimemax="00:30:00" nodemax="150" strict="true">shared</queue>
527+
<queue walltimemax="00:30:00" nodemax="150" strict="true">overrun</queue>
522528
<queue walltimemax="00:15:00" nodemax="8" strict="true">debug</queue>
523529
</queues>
524530
</batch_system>
@@ -529,6 +535,9 @@
529535
</directives>
530536
<queues>
531537
<queue walltimemax="00:30:00" nodemax="256" default="true">regular</queue>
538+
<queue walltimemax="00:30:00" nodemax="256" strict="true">preempt</queue>
539+
<queue walltimemax="00:30:00" nodemax="256" strict="true">shared</queue>
540+
<queue walltimemax="00:30:00" nodemax="256" strict="true">overrun</queue>
532541
<queue walltimemax="00:30:00" nodemax="8" strict="true">debug</queue>
533542
</queues>
534543
</batch_system>

cime_config/machines/config_machines.xml

Lines changed: 45 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -530,12 +530,12 @@
530530
</arguments>
531531
</mpirun>
532532
<module_system type="module" allow_error="true">
533-
<init_path lang="perl">/usr/share/lmod/8.3.1/init/perl</init_path>
534-
<init_path lang="python">/usr/share/lmod/8.3.1/init/python</init_path>
535-
<init_path lang="sh">/usr/share/lmod/8.3.1/init/sh</init_path>
536-
<init_path lang="csh">/usr/share/lmod/8.3.1/init/csh</init_path>
537-
<cmd_path lang="perl">/usr/share/lmod/lmod/libexec/lmod perl</cmd_path>
538-
<cmd_path lang="python">/usr/share/lmod/lmod/libexec/lmod python</cmd_path>
533+
<init_path lang="perl">/opt/cray/pe/lmod/8.7.19/init/perl</init_path>
534+
<init_path lang="python">/opt/cray/pe/lmod/8.7.19/init/python</init_path>
535+
<init_path lang="sh">/opt/cray/pe/lmod/8.7.19/init/sh</init_path>
536+
<init_path lang="csh">/opt/cray/pe/lmod/8.7.19/init/csh</init_path>
537+
<cmd_path lang="perl">/opt/cray/pe/lmod/lmod/libexec/lmod perl</cmd_path>
538+
<cmd_path lang="python">/opt/cray/pe/lmod/lmod/libexec/lmod python</cmd_path>
539539
<cmd_path lang="sh">module</cmd_path>
540540
<cmd_path lang="csh">module</cmd_path>
541541

@@ -569,8 +569,8 @@
569569

570570
<modules compiler="gnu">
571571
<command name="load">PrgEnv-gnu/8.5.0</command>
572-
<command name="load">gcc-native/12.3</command>
573-
<command name="load">cray-libsci/23.12.5</command>
572+
<command name="load">gcc-native/13.2</command>
573+
<command name="load">cray-libsci/24.07.0</command>
574574
</modules>
575575

576576
<modules compiler="intel">
@@ -580,24 +580,27 @@
580580

581581
<modules compiler="nvidia">
582582
<command name="load">PrgEnv-nvidia</command>
583-
<command name="load">nvidia/24.5</command>
584-
<command name="load">cray-libsci/23.12.5</command>
583+
<command name="load">nvidia/25.5</command>
584+
<command name="load">cray-libsci/24.07.0</command>
585585
</modules>
586586

587587
<modules compiler="amdclang">
588588
<command name="load">PrgEnv-aocc</command>
589589
<command name="load">aocc/4.1.0</command>
590-
<command name="load">cray-libsci/23.12.5</command>
590+
<command name="load">cray-libsci/24.07.0</command>
591591
</modules>
592592

593593
<modules>
594594
<command name="load">craype-accel-host</command>
595-
<command name="load">craype/2.7.30</command>
596-
<command name="load">cray-mpich/8.1.28</command>
595+
<command name="load">craype/2.7.32</command>
596+
<command name="load">cray-mpich/8.1.30</command>
597+
<!-- like alvarez-cpu, we still cant use the latest hdf as something not right with env -->
598+
<!-- <command name="load">cray-hdf5-parallel/1.14.3.1</command> -->
599+
<!-- <command name="load">cray-netcdf-hdf5parallel/4.9.0.13</command> -->
597600
<command name="load">cray-hdf5-parallel/1.12.2.9</command>
598601
<command name="load">cray-netcdf-hdf5parallel/4.9.0.9</command>
599-
<command name="load">cray-parallel-netcdf/1.12.3.9</command>
600-
<command name="load">cmake/3.24.3</command>
602+
<command name="load">cray-parallel-netcdf/1.12.3.13</command>
603+
<command name="load">cmake/3.30.2</command>
601604
</modules>
602605
</module_system>
603606

@@ -620,6 +623,8 @@
620623
<env name="NETCDF_PATH">$ENV{CRAY_NETCDF_HDF5PARALLEL_PREFIX}</env>
621624
<env name="PNETCDF_PATH">$ENV{CRAY_PARALLEL_NETCDF_PREFIX}</env>
622625
<env name="GATOR_INITIAL_MB">4000MB</env>
626+
<env name="LD_LIBRARY_PATH">$ENV{CRAY_LD_LIBRARY_PATH}:$ENV{LD_LIBRARY_PATH}</env> <!-- https://github.com/E3SM-Project/E3SM/issues/7117 -->
627+
<env name="MPICH_SMP_SINGLE_COPY_MODE">CMA</env> <!-- https://github.com/E3SM-Project/E3SM/issues/7207 -->
623628
</environment_variables>
624629
<environment_variables compiler="intel" mpilib="mpich">
625630
<env name="PKG_CONFIG_PATH">/global/cfs/cdirs/e3sm/3rdparty/protobuf/21.6/intel-2023.2.0/lib/pkgconfig:$ENV{PKG_CONFIG_PATH}</env>
@@ -670,7 +675,6 @@
670675
<environment_variables compiler="gnu">
671676
<env name="MOAB_ROOT">$SHELL{if [ -z "$MOAB_ROOT" ]; then echo /global/cfs/cdirs/e3sm/software/moab/gnu; else echo "$MOAB_ROOT"; fi}</env>
672677
</environment_variables>
673-
674678
<resource_limits>
675679
<resource name="RLIMIT_STACK">-1</resource>
676680
</resource_limits>
@@ -716,12 +720,12 @@
716720
</arguments>
717721
</mpirun>
718722
<module_system type="module" allow_error="true">
719-
<init_path lang="perl">/usr/share/lmod/8.3.1/init/perl</init_path>
720-
<init_path lang="python">/usr/share/lmod/8.3.1/init/python</init_path>
721-
<init_path lang="sh">/usr/share/lmod/8.3.1/init/sh</init_path>
722-
<init_path lang="csh">/usr/share/lmod/8.3.1/init/csh</init_path>
723-
<cmd_path lang="perl">/usr/share/lmod/lmod/libexec/lmod perl</cmd_path>
724-
<cmd_path lang="python">/usr/share/lmod/lmod/libexec/lmod python</cmd_path>
723+
<init_path lang="perl">/opt/cray/pe/lmod/8.7.19/init/perl</init_path>
724+
<init_path lang="python">/opt/cray/pe/lmod/8.7.19/init/python</init_path>
725+
<init_path lang="sh">/opt/cray/pe/lmod/8.7.19/init/sh</init_path>
726+
<init_path lang="csh">/opt/cray/pe/lmod/8.7.19/init/csh</init_path>
727+
<cmd_path lang="perl">/opt/cray/pe/lmod/lmod/libexec/lmod perl</cmd_path>
728+
<cmd_path lang="python">/opt/cray/pe/lmod/lmod/libexec/lmod python</cmd_path>
725729
<cmd_path lang="sh">module</cmd_path>
726730
<cmd_path lang="csh">module</cmd_path>
727731

@@ -755,23 +759,23 @@
755759

756760
<modules compiler="gnu.*">
757761
<command name="load">PrgEnv-gnu/8.5.0</command>
758-
<command name="load">gcc-native/12.3</command>
762+
<command name="load">gcc-native/13.2</command>
759763
</modules>
760764

761765
<modules compiler="nvidia.*">
762766
<command name="load">PrgEnv-nvidia</command>
763-
<command name="load">nvidia/24.5</command>
767+
<command name="load">nvidia/25.5</command>
764768
</modules>
765769

766770
<modules compiler="gnugpu">
767-
<command name="load">cudatoolkit/12.2</command>
771+
<command name="load">cudatoolkit/12.9</command>
768772
<command name="load">craype-accel-nvidia80</command>
769773
</modules>
770774

771775
<modules compiler="nvidiagpu">
772-
<command name="load">cudatoolkit/12.2</command>
776+
<command name="load">cudatoolkit/12.9</command>
773777
<command name="load">craype-accel-nvidia80</command>
774-
<command name="load">gcc-native-mixed/12.3</command>
778+
<command name="load">gcc-native-mixed/13.2</command>
775779
</modules>
776780

777781
<modules compiler="gnu">
@@ -783,13 +787,14 @@
783787
</modules>
784788

785789
<modules>
786-
<command name="load">cray-libsci/23.12.5</command>
787-
<command name="load">craype/2.7.30</command>
788-
<command name="load">cray-mpich/8.1.28</command>
789-
<command name="load">cray-hdf5-parallel/1.12.2.9</command>
790-
<command name="load">cray-netcdf-hdf5parallel/4.9.0.9</command>
791-
<command name="load">cray-parallel-netcdf/1.12.3.9</command>
790+
<command name="load">cray-libsci/24.07.0</command>
791+
<command name="load">craype/2.7.32</command>
792+
<command name="load">cray-mpich/8.1.30</command>
793+
<command name="load">cray-hdf5-parallel/1.14.3.1</command>
794+
<command name="load">cray-netcdf-hdf5parallel/4.9.0.13</command>
795+
<command name="load">cray-parallel-netcdf/1.12.3.13</command>
792796
<command name="load">cmake/3.24.3</command>
797+
<!--command name="load">cmake/3.30.2</command-->
793798
</modules>
794799
</module_system>
795800

@@ -924,7 +929,6 @@
924929
<modules compiler="intel">
925930
<command name="load">PrgEnv-intel/8.5.0</command>
926931
<command name="load">intel/2024.1.0</command>
927-
<!--command name="load">intel/2023.2.0</command-->
928932
</modules>
929933

930934
<modules compiler="nvidia">
@@ -972,7 +976,7 @@
972976
<env name="PNETCDF_PATH">$ENV{CRAY_PARALLEL_NETCDF_PREFIX}</env>
973977
<env name="GATOR_INITIAL_MB">4000MB</env>
974978
<env name="LD_LIBRARY_PATH">$ENV{CRAY_LD_LIBRARY_PATH}:$ENV{LD_LIBRARY_PATH}</env> <!-- https://github.com/E3SM-Project/E3SM/issues/7117 -->
975-
<env name="MPICH_SMP_SINGLE_COPY_MODE">XPMEM</env> <!-- https://github.com/E3SM-Project/E3SM/issues/7207 -->
979+
<env name="MPICH_SMP_SINGLE_COPY_MODE">CMA</env> <!-- https://github.com/E3SM-Project/E3SM/issues/7207 -->
976980
</environment_variables>
977981
<environment_variables compiler="intel" mpilib="mpich">
978982
<env name="PKG_CONFIG_PATH">/global/cfs/cdirs/e3sm/3rdparty/protobuf/21.6/intel-2023.2.0/lib/pkgconfig:$ENV{PKG_CONFIG_PATH}</env>
@@ -1023,7 +1027,6 @@
10231027
<environment_variables compiler="gnu">
10241028
<env name="MOAB_ROOT">$SHELL{if [ -z "$MOAB_ROOT" ]; then echo /global/cfs/cdirs/e3sm/software/moab/gnu; else echo "$MOAB_ROOT"; fi}</env>
10251029
</environment_variables>
1026-
10271030
<resource_limits>
10281031
<resource name="RLIMIT_STACK">-1</resource>
10291032
</resource_limits>
@@ -1069,12 +1072,12 @@
10691072
</arguments>
10701073
</mpirun>
10711074
<module_system type="module" allow_error="true">
1072-
<init_path lang="perl">/usr/share/lmod/8.3.1/init/perl</init_path>
1073-
<init_path lang="python">/usr/share/lmod/8.3.1/init/python</init_path>
1074-
<init_path lang="sh">/usr/share/lmod/8.3.1/init/sh</init_path>
1075-
<init_path lang="csh">/usr/share/lmod/8.3.1/init/csh</init_path>
1076-
<cmd_path lang="perl">/usr/share/lmod/lmod/libexec/lmod perl</cmd_path>
1077-
<cmd_path lang="python">/usr/share/lmod/lmod/libexec/lmod python</cmd_path>
1075+
<init_path lang="perl">/opt/cray/pe/lmod/8.7.19/init/perl</init_path>
1076+
<init_path lang="python">/opt/cray/pe/lmod/8.7.19/init/python</init_path>
1077+
<init_path lang="sh">/opt/cray/pe/lmod/8.7.19/init/sh</init_path>
1078+
<init_path lang="csh">/opt/cray/pe/lmod/8.7.19/init/csh</init_path>
1079+
<cmd_path lang="perl">/opt/cray/pe/lmod/lmod/libexec/lmod perl</cmd_path>
1080+
<cmd_path lang="python">/opt/cray/pe/lmod/lmod/libexec/lmod python</cmd_path>
10781081
<cmd_path lang="sh">module</cmd_path>
10791082
<cmd_path lang="csh">module</cmd_path>
10801083

0 commit comments

Comments
 (0)