Skip to content

Commit 203df99

Browse files
authored
Merge branch 'bartgol/eamxx/homme-pd-remap-fix' (PR #7477)
When updating the views after subview info changes, we also need to update the stored subview (for later check). Also, modify model_restart test, so that we don't re-run the 1st part of the run. Instead, make the base run also write the restart file (like CIME does). Note: this requires running less than 2*rest_freq steps, or else the rpointer file will be nuked, and the rest run will NOT find the first .r file. Hence, run 2+1 steps (with restart every 2) rather than 1+1.
2 parents 8edd04b + 6985863 commit 203df99

File tree

9 files changed

+86
-280
lines changed

9 files changed

+86
-280
lines changed

components/eamxx/src/dynamics/homme/physics_dynamics_remapper.cpp

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -181,7 +181,7 @@ subfields_info_has_changed (const std::map<int,SubviewInfo>& subfield_info,
181181
}
182182

183183
void PhysicsDynamicsRemapper::
184-
update_subfields_views (const std::map<int,SubviewInfo>& subfield_info,
184+
update_subfields_views (std::map<int,SubviewInfo>& subfield_info,
185185
const ViewsRepo& repo,
186186
const std::vector<Field>& fields) const
187187
{
@@ -205,10 +205,12 @@ update_subfields_views (const std::map<int,SubviewInfo>& subfield_info,
205205
}
206206
};
207207

208-
for (const auto& it : subfield_info) {
209-
const auto& f = fields[it.first];
210-
if ( not(it.second==f.get_header().get_alloc_properties().get_subview_info()) ){
211-
get_view(it.first,fields[it.first]);
208+
for (auto& [fname, svi] : subfield_info) {
209+
const auto& f = fields[fname];
210+
const auto& new_svi = f.get_header().get_alloc_properties().get_subview_info();
211+
if ( not(svi==new_svi) ) {
212+
get_view(fname,f);
213+
svi = new_svi;
212214
}
213215
}
214216
Kokkos::deep_copy(repo.views, repo.h_views);

components/eamxx/src/dynamics/homme/physics_dynamics_remapper.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -151,7 +151,7 @@ class PhysicsDynamicsRemapper : public AbstractRemapper
151151

152152
bool subfields_info_has_changed (const std::map<int,SubviewInfo>& subfield_info,
153153
const std::vector<Field>& fields) const;
154-
void update_subfields_views (const std::map<int,SubviewInfo>& subfield_info,
154+
void update_subfields_views (std::map<int,SubviewInfo>& subfield_info,
155155
const ViewsRepo& repo,
156156
const std::vector<Field>& fields) const;
157157

components/eamxx/src/share/field/field_utils_impl.hpp

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1005,6 +1005,32 @@ void print_field_hyperslab (const Field& f,
10051005
}
10061006
break;
10071007
}
1008+
case 5:
1009+
{
1010+
dims_str[dims_left[4]] = ":";
1011+
auto v = f.get_strided_view<const T*****,Host>();
1012+
for (int i=0; i<layout.dim(0); ++i) {
1013+
dims_str[dims_left[0]] = std::to_string(i);
1014+
for (int j=0; j<layout.dim(1); ++j) {
1015+
dims_str[dims_left[1]] = std::to_string(j);
1016+
for (int k=0; k<layout.dim(2); ++k) {
1017+
dims_str[dims_left[2]] = std::to_string(k);
1018+
for (int l=0; l<layout.dim(3); ++l) {
1019+
dims_str[dims_left[3]] = std::to_string(l);
1020+
out << " " << f.name() << "(" << ekat::join(dims_str,",") << ")";
1021+
for (int m=0; m<layout.dim(3); ++m) {
1022+
if (m%max_per_line==0) {
1023+
out << "\n ";
1024+
}
1025+
out << v(i,j,k,l,m) << ", ";
1026+
}
1027+
out << "\n";
1028+
}
1029+
}
1030+
}
1031+
}
1032+
break;
1033+
}
10081034
default:
10091035
EKAT_ERROR_MSG (
10101036
"Unsupported rank in print_field_hyperslab.\n"

components/eamxx/tests/multi-process/dynamics_physics/model_restart/CMakeLists.txt

Lines changed: 40 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -8,68 +8,66 @@ BuildCprnc()
88
# HOMME_TARGET NP PLEV QSIZE_D
99
CreateDynamicsLib("theta-l_kokkos" 4 72 10)
1010

11-
# We have 3 runs:
12-
# 1) run for 2*N time steps starting from t=0 (baseline run)
13-
# 2) run for N time steps starting from t=0 (init run)
14-
# 3) run for N time steps re-starting from t=N*dt (restarted run)
15-
# We can use the same namelist for all tests, using 3 different input yaml files
11+
# We have 2 runs:
12+
# 1) run for 2*N time steps starting from t=0 (base run), write restart files at Nth step
13+
# 3) run for N time steps re-starting from t=N*dt (rest run)
14+
# We can use the same namelist for all tests. For input/output yaml files, we use the same
15+
# yaml template, but need to configure them b/c the run_t0 and the output file prefix differ
16+
# in the 2 runs.
1617

1718
# Create a single executable for all the 3 runs
1819
CreateADUnitTestExec(model_restart
1920
LIBS cld_fraction ${dynLibName} shoc p3 scream_rrtmgp)
2021

2122
# Set time integration options
2223
set (CASE_T0 2023-01-01-00000)
23-
set (CASE_TN 2023-01-01-00060)
24+
set (RUN_DT 30)
2425

25-
# Create the baseline (run all 6 timsteps in a single run)
26-
CreateUnitTestFromExec(model_baseline model_restart
27-
EXE_ARGS "--args -ifile=input_baseline.yaml"
28-
MPI_RANKS ${SCREAM_TEST_MAX_RANKS}
29-
FIXTURES_SETUP baseline_run)
26+
set (HIST_FREQ 90)
27+
set (HIST_FREQ_UNITS nsecs)
28+
set (REST_FREQ 60)
29+
set (REST_FREQ_UNITS nsecs)
3030

31-
# Start a simulation, but only run half of the time steps
32-
CreateUnitTestFromExec(model_initial model_restart
33-
EXE_ARGS "--args -ifile=input_initial.yaml"
31+
# Create the baseline (run all timsteps in a single run) as well as the restart files
32+
set(SUFFIX "base")
33+
set (RUN_T0 2023-01-01-00000)
34+
set (RUN_NSTEPS 3)
35+
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/input.yaml
36+
${CMAKE_CURRENT_BINARY_DIR}/input_base.yaml)
37+
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/output.yaml
38+
${CMAKE_CURRENT_BINARY_DIR}/output_base.yaml)
39+
CreateUnitTestFromExec(model_restart_base model_restart
40+
EXE_ARGS "--args -ifile=input_base.yaml"
3441
MPI_RANKS ${SCREAM_TEST_MAX_RANKS}
35-
FIXTURES_SETUP initial_run
42+
FIXTURES_SETUP base_run
3643
PROPERTIES RESOURCE_LOCK rpointer_file)
3744

3845
# Restart the simulation, and run the second half of the time steps
39-
CreateUnitTestFromExec(model_restart model_restart
40-
EXE_ARGS "--args -ifile=input_restarted.yaml"
46+
set(SUFFIX "rest")
47+
set (RUN_T0 2023-01-01-00060)
48+
set (RUN_NSTEPS 1)
49+
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/input.yaml
50+
${CMAKE_CURRENT_BINARY_DIR}/input_rest.yaml)
51+
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/output.yaml
52+
${CMAKE_CURRENT_BINARY_DIR}/output_rest.yaml)
53+
CreateUnitTestFromExec(model_restart_rest model_restart
54+
EXE_ARGS "--args -ifile=input_rest.yaml"
4155
MPI_RANKS ${SCREAM_TEST_MAX_RANKS}
42-
FIXTURES_REQUIRED initial_run
43-
FIXTURES_SETUP restarted_run
56+
FIXTURES_REQUIRED base_run
57+
FIXTURES_SETUP rest_run
4458
PROPERTIES RESOURCE_LOCK rpointer_file)
4559

46-
# Finally, compare the nc outputs generated by the basline and restarted runs
47-
# IMPORTANT: make sure these file names match what baseline/restarted runs produce
48-
set (SRC_FILE model_output_baseline.AVERAGE.nmins_x1.np${SCREAM_TEST_MAX_RANKS}.${CASE_T0}.nc)
49-
set (TGT_FILE model_output.AVERAGE.nmins_x1.np${SCREAM_TEST_MAX_RANKS}.${CASE_T0}.nc)
60+
# Finally, compare the nc outputs generated by the base and rest runs
61+
# IMPORTANT: make sure these file names match what base/rest runs produced
62+
set (SRC_FILE model_output_base.AVERAGE.${HIST_FREQ_UNITS}_x${HIST_FREQ}.np${SCREAM_TEST_MAX_RANKS}.${CASE_T0}.nc)
63+
set (TGT_FILE model_output_rest.AVERAGE.${HIST_FREQ_UNITS}_x${HIST_FREQ}.np${SCREAM_TEST_MAX_RANKS}.${CASE_T0}.nc)
5064

51-
add_test (NAME restarted_vs_monolithic_check_np${SCREAM_TEST_MAX_RANKS}
65+
add_test (NAME model_restart_check_np${SCREAM_TEST_MAX_RANKS}
5266
COMMAND cmake -P ${CMAKE_BINARY_DIR}/bin/CprncTest.cmake ${SRC_FILE} ${TGT_FILE}
5367
WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR})
54-
set_tests_properties (restarted_vs_monolithic_check_np${SCREAM_TEST_MAX_RANKS} PROPERTIES
68+
set_tests_properties (model_restart_check_np${SCREAM_TEST_MAX_RANKS} PROPERTIES
5569
RESOURCE_GROUPS "devices:1"
56-
FIXTURES_REQUIRED "baseline_run;restarted_run")
57-
58-
# Configure yaml input file to run directory
59-
set (RUN_T0 2023-01-01-00000)
60-
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/input_baseline.yaml
61-
${CMAKE_CURRENT_BINARY_DIR}/input_baseline.yaml)
62-
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/input_initial.yaml
63-
${CMAKE_CURRENT_BINARY_DIR}/input_initial.yaml)
64-
set (RUN_T0 2023-01-01-00030)
65-
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/input_restarted.yaml
66-
${CMAKE_CURRENT_BINARY_DIR}/input_restarted.yaml)
67-
68-
# The two yaml files that control the output streams (for the baseline and restart runs)
69-
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/model_output.yaml
70-
${CMAKE_CURRENT_BINARY_DIR}/model_output.yaml COPYONLY)
71-
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/model_restart_output.yaml
72-
${CMAKE_CURRENT_BINARY_DIR}/model_restart_output.yaml COPYONLY)
70+
FIXTURES_REQUIRED "base_run;rest_run")
7371

7472
# Set homme's test options, so that we can configure the namelist correctly
7573
# Discretization/algorithm settings
Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,9 @@ driver_options:
44
atmosphere_dag_verbosity_level: 5
55

66
time_stepping:
7-
time_step: 30
8-
number_of_steps: 1
9-
run_t0: 2023-01-01-00000 # YYYY-MM-DD-XXXXX
7+
time_step: ${RUN_DT}
8+
number_of_steps: ${RUN_NSTEPS}
9+
run_t0: ${RUN_T0} # YYYY-MM-DD-XXXXX
1010
case_t0: 2023-01-01-00000 # YYYY-MM-DD-XXXXX
1111

1212
initial_conditions:
@@ -68,13 +68,13 @@ grids_manager:
6868
type: homme
6969
physics_grid_type: gll
7070
dynamics_namelist_file_name: namelist.nl
71-
vertical_coordinate_filename: IC_FILE
71+
vertical_coordinate_filename: ${SCREAM_DATA_DIR}/init/${EAMxx_tests_IC_FILE_72lev}
7272

7373
# List all the yaml files with the output parameters
7474
scorpio:
7575
model_restart:
7676
output_control:
77-
frequency: 30
78-
frequency_units: nsecs
79-
output_yaml_files: ["model_restart_output.yaml"]
77+
frequency: ${REST_FREQ}
78+
frequency_units: ${REST_FREQ_UNITS}
79+
output_yaml_files: ["output_${SUFFIX}.yaml"]
8080
...

components/eamxx/tests/multi-process/dynamics_physics/model_restart/input_baseline.yaml

Lines changed: 0 additions & 76 deletions
This file was deleted.

components/eamxx/tests/multi-process/dynamics_physics/model_restart/input_restarted.yaml

Lines changed: 0 additions & 60 deletions
This file was deleted.

0 commit comments

Comments
 (0)