diff --git a/components/eamxx/docs/user/io_aliases.md b/components/eamxx/docs/user/io_aliases.md new file mode 100644 index 000000000000..ad7848f74576 --- /dev/null +++ b/components/eamxx/docs/user/io_aliases.md @@ -0,0 +1,83 @@ +# EAMxx Field Aliasing Feature + +This document demonstrates the field aliasing feature for EAMxx I/O operations. + +## Overview + +The field aliasing feature allows users to specify custom names for +variables in netcdf output files while maintaining the original +internal field names in the model. This is useful for: + +- Creating shorter, more convenient variable names for output +- Maintaining compatibility with external tools that expect specific variable names +- Providing user-friendly names for complex internal field names + +## Syntax + +The alias syntax uses the delimiter `:=` to separate the alias name +from the internal field name: + +```yaml +alias_name:=internal_field_name +``` + +## YAML Configuration Examples + +### Basic Usage + +```yaml +field_names: + - "LWP:=LiqWaterPath" # Alias LWP for LiqWaterPath + - "SWP:=SolidWaterPath" # Alias SWP for SolidWaterPath + - "T:=T_mid" # Alias T for T_mid + - "qv" # Regular field name (no alias) +``` + +### Mixed Usage + +You can mix aliased and non-aliased fields in the same configuration: + +```yaml +field_names: + - "T_mid" # Regular field name + - "LWP:=LiqWaterPath" # Aliased field + - "p_mid" # Regular field name + - "RH:=RelativeHumidity" # Aliased field +``` + +## Output Behavior + +When using aliases: + +1. **NetCDF Variables**: The netcdf file will contain variables +named according to the aliases + + - `LWP` instead of `LiqWaterPath` + - `T` instead of `T_mid` + - `RH` instead of `RelativeHumidity` + +2. **Internal Processing**: All internal model operations use the +original field names + + - Field validation uses `LiqWaterPath`, `T_mid`, etc. + - Diagnostic calculations use original names + - Memory management uses original field structures + +3. **Metadata**: Variable attributes (units, long_name, etc.) +are preserved from the original fields, and `eamxx_name` +is added to the netcdf files to document aliasing + +## Caveats + +Currently, a field can be requested only once in a single stream, +and either the original name or the alias name counts. + +```yaml +field_names: + - "LWP:=" # Error: empty field name + - ":=LiqWaterPath" # Error: empty alias name + - "LWP:=Field1" # OK + - "LWP:=Field2" # Error: duplicate alias LWP + - "LWP1:=LiqWaterPath" # OK + - "LWP2:=LiqWaterPath" # Error: duplicate field LiqWaterPath +``` diff --git a/components/eamxx/mkdocs.yml b/components/eamxx/mkdocs.yml index db8f8c17db71..2d4ba4a22104 100644 --- a/components/eamxx/mkdocs.yml +++ b/components/eamxx/mkdocs.yml @@ -19,6 +19,7 @@ nav: - 'Overview': 'user/diags/index.md' - 'Field contraction diagnostics': 'user/diags/field_contraction.md' - 'Presentations': 'user/presentations.md' + - 'IO Aliases': 'user/io_aliases.md' - 'Developer Guide': - 'Quick-start Guide': 'developer/dev_quickstart.md' - 'Code Structure and Organization': 'developer/code_structure.md' diff --git a/components/eamxx/src/share/io/eamxx_io_utils.cpp b/components/eamxx/src/share/io/eamxx_io_utils.cpp index 52d5a399da10..e3848a7622f1 100644 --- a/components/eamxx/src/share/io/eamxx_io_utils.cpp +++ b/components/eamxx/src/share/io/eamxx_io_utils.cpp @@ -5,6 +5,8 @@ #include "share/util/eamxx_utils.hpp" #include "share/eamxx_config.hpp" +#include + #include #include @@ -231,4 +233,52 @@ create_diagnostic (const std::string& diag_field_name, return diag; } +std::pair +parse_field_alias (const std::string& field_spec) +{ + const std::string delimiter = ":="; + auto pos = field_spec.find(delimiter); + + if (pos == std::string::npos) { + // No alias found, return the field_spec as both alias and field name + std::string trimmed = ekat::trim(field_spec); + return {trimmed, trimmed}; + } + + // Extract and trim alias and field name + std::string alias = ekat::trim(field_spec.substr(0, pos)); + std::string field_name = ekat::trim(field_spec.substr(pos + delimiter.length())); + + EKAT_REQUIRE_MSG(!alias.empty() && !field_name.empty(), + "Error! Invalid field alias specification: '" + field_spec + "'\n" + "Expected format: 'alias:=field_name' where both alias and field_name are non-empty.\n"); + + auto another_pos = field_name.find(delimiter); + EKAT_REQUIRE_MSG(another_pos == std::string::npos, + "Error! Invalid field alias specification: '" + field_spec + "'\n" + "Multiple ':=' tokens found.\n"); + return {alias, field_name}; +} + +std::pair, std::vector> +process_field_aliases (const std::vector& field_specs) +{ + std::map alias_to_field_map; + std::vector alias_names; + + for (const auto& spec : field_specs) { + auto [alias, field_name] = parse_field_alias(spec); + + // Check for duplicate aliases + EKAT_REQUIRE_MSG(alias_to_field_map.find(alias) == alias_to_field_map.end(), + "Error! Duplicate field alias found: '" + alias + "'\n" + "Each alias must be unique within the field list.\n"); + + alias_to_field_map[alias] = field_name; + alias_names.push_back(alias); + } + + return {alias_to_field_map, alias_names}; +} + } // namespace scream diff --git a/components/eamxx/src/share/io/eamxx_io_utils.hpp b/components/eamxx/src/share/io/eamxx_io_utils.hpp index 05fe8771f620..4a4343a5dbdd 100644 --- a/components/eamxx/src/share/io/eamxx_io_utils.hpp +++ b/components/eamxx/src/share/io/eamxx_io_utils.hpp @@ -89,5 +89,16 @@ std::shared_ptr create_diagnostic (const std::string& diag_name, const std::shared_ptr& grid); +// Parse field alias from field specification string. +// Format: "alias:=field_name" returns {alias, field_name} +// For non-aliased fields, returns {field_name, field_name} +std::pair +parse_field_alias (const std::string& field_spec); + +// Process a list of field specifications with potential aliases. +// Returns a map from alias_name -> internal_field_name and a vector of alias names +std::pair, std::vector> +process_field_aliases (const std::vector& field_specs); + } // namespace scream #endif // SCREAM_IO_UTILS_HPP diff --git a/components/eamxx/src/share/io/scorpio_output.cpp b/components/eamxx/src/share/io/scorpio_output.cpp index 365667072a33..82aa2cd6c4aa 100644 --- a/components/eamxx/src/share/io/scorpio_output.cpp +++ b/components/eamxx/src/share/io/scorpio_output.cpp @@ -51,6 +51,7 @@ AtmosphereOutput (const ekat::Comm& comm, for (auto f : fields) { fm->add_field(f); m_fields_names.push_back(f.name()); + m_alias_names.push_back(f.name()); // Use field name as alias (no aliasing) } // No remaps: set all FM except the one for scorpio (created in init()) @@ -81,11 +82,13 @@ AtmosphereOutput (const ekat::Comm& comm, const ekat::ParameterList& params, // By default, IO is done directly on the field mgr grid auto fm_grid = field_mgr->get_grids_manager()->get_grid(grid_name); std::string io_grid_name = fm_grid->name(); + std::vector field_specs; // Raw field specifications from YAML (may include aliases) + if (params.isParameter("field_names")) { // This simple parameter list option does *not* allow to remap fields // to an io grid different from that of the field manager. In order to // use that functionality, you need the full syntax - m_fields_names = params.get("field_names"); + field_specs = params.get("field_names"); } else if (params.isSublist("fields")){ const auto& f_pl = params.sublist("fields"); bool grid_found = false; @@ -94,11 +97,11 @@ AtmosphereOutput (const ekat::Comm& comm, const ekat::ParameterList& params, grid_found = true; const auto& pl = f_pl.sublist(grid_name); if (pl.isType("field_names")) { - m_fields_names = pl.get("field_names"); + field_specs = pl.get("field_names"); } else if (pl.isType("field_names")) { - m_fields_names.resize(1, pl.get("field_names")); - if (m_fields_names[0]=="NONE") { - m_fields_names.clear(); + field_specs.resize(1, pl.get("field_names")); + if (field_specs[0]=="NONE") { + field_specs.clear(); } } @@ -113,6 +116,25 @@ AtmosphereOutput (const ekat::Comm& comm, const ekat::ParameterList& params, "Error! Bad formatting of output yaml file. Missing 'fields->$grid_name` sublist.\n"); } + // Process field specifications to extract aliases and internal field names + auto [alias_to_field_map, alias_names] = process_field_aliases(field_specs); + m_alias_to_field_map = alias_to_field_map; + m_alias_names = alias_names; + + // Extract internal field names for further processing + m_fields_names.clear(); + for (const auto& spec : field_specs) { + auto [alias, field_name] = parse_field_alias(spec); + m_fields_names.push_back(field_name); + } + + // TODO: allow users to request the same field more than once via different aliases + // TODO: currently, that would result in issues downstream, and so it must be done + // TODO: more carefully. The rationale is to enable users to debug their aliasing, etc. + EKAT_REQUIRE_MSG (not has_duplicates(m_alias_names), + "[AtmosphereOutput] Error! One of the output yaml files has duplicate field alias entries.\n" + " - yaml file: " + params.name() + "\n" + " - alias names; " + ekat::join(m_alias_names,",") + "\n"); EKAT_REQUIRE_MSG (not has_duplicates(m_fields_names), "[AtmosphereOutput] Error! One of the output yaml files has duplicate field entries.\n" " - yaml file: " + params.name() + "\n" @@ -265,7 +287,9 @@ void AtmosphereOutput::init() // Create FM for scorpio. The fields in this FM are guaranteed to NOT have parents/padding auto fm_scorpio = m_field_mgrs[Scorpio] = std::make_shared(fm_after_hr->get_grid(),RepoState::Closed); - for (const auto& fname : m_fields_names) { + for (size_t i = 0; i < m_fields_names.size(); ++i) { + const auto& fname = m_fields_names[i]; + const auto& alias = m_alias_names[i]; const auto& f = fm_after_hr->get_field(fname); const auto& fh = f.get_header(); const auto& fid = fh.get_identifier(); @@ -288,31 +312,31 @@ void AtmosphereOutput::init() fm_scorpio->add_field(f); } - // Store the field layout, so that calls to setup_output_file are easier + // Store the field layout using alias name, so that calls to setup_output_file are easier const auto& layout = fid.get_layout(); - m_vars_dims[fname] = get_var_dimnames(layout); + m_vars_dims[alias] = get_var_dimnames(layout); // Now check that all the dims of this field are already set to be registered. const auto& tags = layout.tags(); const auto& dims = layout.dims(); - for (int i=0; ihas_special_tag_name(tags[i]) - ? m_io_grid->get_special_tag_name(tags[i]) - : layout.names()[i]; + std::string dimname = m_io_grid->has_special_tag_name(tags[j]) + ? m_io_grid->get_special_tag_name(tags[j]) + : layout.names()[j]; // If t==CMP, and the name stored in the layout is "dim" (the default) or "bin", // we append also the extent, to allow different vector dims in the file // TODO: generalize this to all tags, for now hardcoding to dim and bin only - dimname += (dimname=="dim" or dimname=="bin") ? std::to_string(dims[i]) : ""; + dimname += (dimname=="dim" or dimname=="bin") ? std::to_string(dims[j]) : ""; - auto is_partitioned = m_io_grid->get_partitioned_dim_tag()==tags[i]; + auto is_partitioned = m_io_grid->get_partitioned_dim_tag()==tags[j]; int dimlen = is_partitioned ? m_io_grid->get_partitioned_dim_global_size() - : layout.dim(i); + : layout.dim(j); auto it_bool = m_dims_len.emplace(dimname,dimlen); EKAT_REQUIRE_MSG(it_bool.second or it_bool.first->second==dimlen, - "Error! Dimension " + dimname + " on field " + fname + " has conflicting lengths.\n" + "Error! Dimension " + dimname + " on field " + fname + " (alias: " + alias + ") has conflicting lengths.\n" " - old length: " + std::to_string(it_bool.first->second) + "\n" " - new length: " + std::to_string(dimlen) + "\n" "If same name applies to different dims (e.g. PhysicsGLL and PhysicsPG2 define " @@ -457,10 +481,13 @@ run (const std::string& filename, } // Take care of updating and possibly writing fields. - for (auto const& name : m_fields_names) { + for (size_t i = 0; i < m_fields_names.size(); ++i) { + const auto& field_name = m_fields_names[i]; + const auto& alias_name = m_alias_names[i]; + // Get all the info for this field. - const auto& f_in = fm_after_hr->get_field(name); - auto& f_out = fm_scorpio->get_field(name); + const auto& f_in = fm_after_hr->get_field(field_name); + auto& f_out = fm_scorpio->get_field(field_name); switch (m_avg_type) { case OutputAvgType::Instant: @@ -480,7 +507,7 @@ run (const std::string& filename, if (output_step and m_avg_type==OutputAvgType::Average) { // Even if m_track_avg_cnt=true, this field may not need it if (m_track_avg_cnt) { - auto avg_count = m_field_to_avg_count.at(name); + auto avg_count = m_field_to_avg_count.at(field_name); f_out.scale_inv(avg_count); @@ -495,9 +522,9 @@ run (const std::string& filename, // Bring data to host f_out.sync_to_host(); - // Write + // Write using alias name for netcdf variable auto func_start = std::chrono::steady_clock::now(); - scorpio::write_var(filename,name,f_out.get_internal_view_data()); + scorpio::write_var(filename,alias_name,f_out.get_internal_view_data()); auto func_finish = std::chrono::steady_clock::now(); auto duration_loc = std::chrono::duration_cast(func_finish - func_start); duration_write += duration_loc.count(); @@ -629,11 +656,13 @@ register_variables(const std::string& filename, " - input value: " + fp_precision + "\n" " - supported values: float, single, double, real\n"); - // Cycle through all fields and register. - for (auto const& name : m_fields_names) { - const auto& f = m_field_mgrs[Scorpio]->get_field(name); + // Cycle through all fields and register using alias names. + for (size_t i = 0; i < m_fields_names.size(); ++i) { + const auto& field_name = m_fields_names[i]; + const auto& alias_name = m_alias_names[i]; + const auto& f = m_field_mgrs[Scorpio]->get_field(field_name); const auto& fid = f.get_header().get_identifier(); - const auto& dimnames = m_vars_dims.at(name); + const auto& dimnames = m_vars_dims.at(alias_name); std::string units = fid.get_units().to_string(); // TODO Need to change dtype to allow for other variables. @@ -642,31 +671,31 @@ register_variables(const std::string& filename, if (mode==scorpio::FileMode::Append) { // Simply check that the var is in the file, and has the right properties - EKAT_REQUIRE_MSG (scorpio::has_var(filename,name), + EKAT_REQUIRE_MSG (scorpio::has_var(filename,alias_name), "Error! Cannot append, due to variable missing from the file.\n" " - filename : " + filename + "\n" - " - varname : " + name + "\n"); - const auto& var = scorpio::get_var(filename,name); + " - varname : " + alias_name + "\n"); + const auto& var = scorpio::get_var(filename,alias_name); EKAT_REQUIRE_MSG (var.dim_names()==dimnames, "Error! Cannot append, due to variable dimensions mismatch.\n" " - filename : " + filename + "\n" - " - varname : " + name + "\n" + " - varname : " + alias_name + "\n" " - var dims : " + ekat::join(dimnames,",") + "\n" " - var dims from file: " + ekat::join(var.dim_names(),",") + "\n"); EKAT_REQUIRE_MSG (var.units==units, "Error! Cannot append, due to variable units mismatch.\n" " - filename : " + filename + "\n" - " - varname : " + name + "\n" + " - varname : " + alias_name + "\n" " - var units: " + units + "\n" " - var units from file: " + var.units + "\n"); EKAT_REQUIRE_MSG (var.time_dep==m_add_time_dim, "Error! Cannot append, due to time dependency mismatch.\n" " - filename : " + filename + "\n" - " - varname : " + name + "\n" + " - varname : " + alias_name + "\n" " - var time dep: " + (m_add_time_dim ? "yes" : "no") + "\n" " - var time dep from file: " + (var.time_dep ? "yes" : "no") + "\n"); } else { - scorpio::define_var (filename, name, units, dimnames, + scorpio::define_var (filename, alias_name, units, dimnames, "real",fp_precision, m_add_time_dim); // Add FillValue as an attribute of each variable @@ -674,10 +703,10 @@ register_variables(const std::string& filename, if (fp_precision=="double" or (fp_precision=="real" and std::is_same::value)) { double fill_value = m_fill_value; - scorpio::set_attribute(filename, name, "_FillValue",fill_value); + scorpio::set_attribute(filename, alias_name, "_FillValue",fill_value); } else { float fill_value = m_fill_value; - scorpio::set_attribute(filename, name, "_FillValue",fill_value); + scorpio::set_attribute(filename, alias_name, "_FillValue",fill_value); } // If this is has subfields, add list of its children @@ -694,32 +723,37 @@ register_variables(const std::string& filename, children_list.pop_back(); children_list.pop_back(); children_list += " ]"; - scorpio::set_attribute(filename,name,"sub_fields",children_list); + scorpio::set_attribute(filename,alias_name,"sub_fields",children_list); } // If tracking average count variables then add the name of the tracking variable for this variable - if (m_field_to_avg_count.count(name)) { - const auto& count = m_field_to_avg_count.at(name); - scorpio::set_attribute(filename,name,"averaging_count_tracker",count.name()); + if (m_field_to_avg_count.count(field_name)) { + const auto& count = m_field_to_avg_count.at(field_name); + scorpio::set_attribute(filename,alias_name,"averaging_count_tracker",count.name()); } // Atm procs may have set some request for metadata. using stratts_t = std::map; const auto& str_atts = f.get_header().get_extra_data("io: string attributes"); for (const auto& [att_name,att_val] : str_atts) { - scorpio::set_attribute(filename,name,att_name,att_val); + scorpio::set_attribute(filename,alias_name,att_name,att_val); } // Gather longname (if not already in the io: string attributes) if (str_atts.count("long_name")==0) { - auto longname = m_default_metadata.get_longname(name); - scorpio::set_attribute(filename, name, "long_name", longname); + auto longname = m_default_metadata.get_longname(field_name); + scorpio::set_attribute(filename, alias_name, "long_name", longname); } // Gather standard name, CF-Compliant (if not already in the io: string attributes) if (str_atts.count("standard_name")==0) { - auto standardname = m_default_metadata.get_standardname(name); - scorpio::set_attribute(filename, name, "standard_name", standardname); + auto standardname = m_default_metadata.get_standardname(field_name); + scorpio::set_attribute(filename, alias_name, "standard_name", standardname); + } + + // Add alias information if variable name differs from field name + if (alias_name != field_name) { + scorpio::set_attribute(filename, alias_name, "eamxx_name", field_name); } } } diff --git a/components/eamxx/src/share/io/scorpio_output.hpp b/components/eamxx/src/share/io/scorpio_output.hpp index 4a0f153369d9..f68ff5213b7b 100644 --- a/components/eamxx/src/share/io/scorpio_output.hpp +++ b/components/eamxx/src/share/io/scorpio_output.hpp @@ -213,6 +213,10 @@ class AtmosphereOutput strmap_t m_dims_len; std::list m_diagnostics; + // Field aliasing support + strmap_t m_alias_to_field_map; // Map from alias names to internal field names + strvec_t m_alias_names; // List of alias names (for netcdf variables) + DefaultMetadata m_default_metadata; // Use float, so that if output fp_precision=float, this is a representable value. diff --git a/components/eamxx/src/share/io/tests/CMakeLists.txt b/components/eamxx/src/share/io/tests/CMakeLists.txt index 90b5b3a589fb..efb462958111 100644 --- a/components/eamxx/src/share/io/tests/CMakeLists.txt +++ b/components/eamxx/src/share/io/tests/CMakeLists.txt @@ -17,6 +17,11 @@ CreateUnitTest(io_utils "io_utils.cpp" PROPERTIES RESOURCE_LOCK rpointer_file ) +## Test field aliasing functionality +CreateUnitTest(io_alias "io_alias.cpp" + LIBS scream_io LABELS io +) + # Test creation of diagnostic from diag_field_name CreateUnitTest(create_diag "create_diag.cpp" LIBS diagnostics scream_io diff --git a/components/eamxx/src/share/io/tests/io_alias.cpp b/components/eamxx/src/share/io/tests/io_alias.cpp new file mode 100644 index 000000000000..ddb4da6b622a --- /dev/null +++ b/components/eamxx/src/share/io/tests/io_alias.cpp @@ -0,0 +1,317 @@ +#include + +// Test file for EAMxx field aliasing functionality +// This file tests the field aliasing feature that allows users to specify +// alternative names for fields in output files using the syntax "ALIAS:=FIELDNAME" +// +// Tests include: +// 1. Parsing of alias specifications with various formats +// 2. Processing of mixed alias and non-alias field lists +// 3. Integration with OutputManager to create files with aliased names +// 4. Verification that aliased names appear correctly in NetCDF output files + +#include "share/eamxx_types.hpp" +#include "share/field/field.hpp" +#include "share/field/field_manager.hpp" +#include "share/field/field_utils.hpp" +#include "share/grid/mesh_free_grids_manager.hpp" +#include "share/io/eamxx_io_utils.hpp" +#include "share/io/eamxx_output_manager.hpp" +#include "share/io/eamxx_scorpio_interface.hpp" +#include "share/io/scorpio_input.hpp" +#include "share/util/eamxx_time_stamp.hpp" + +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +namespace scream { + +// Helper function to create test fields and field manager +std::shared_ptr +create_test_field_manager(const std::shared_ptr &grid, + const util::TimeStamp &t0, int ncols, int nlevs) { + + using namespace ShortFieldTagsNames; + using namespace ekat::units; + + auto fm = std::make_shared(grid); + + // Create some test fields with realistic EAMxx names + FieldIdentifier fid1("qv", {{COL, LEV}, {ncols, nlevs}}, kg / kg, grid->name()); + FieldIdentifier fid2("T_mid", {{COL, LEV}, {ncols, nlevs}}, K, grid->name()); + FieldIdentifier fid3("ps", {{COL}, {ncols}}, Pa, grid->name()); + + Field qv(fid1); + Field T_mid(fid2); + Field ps(fid3); + + qv.allocate_view(); + T_mid.allocate_view(); + ps.allocate_view(); + + // Initialize with dummy data + qv.deep_copy(0.01); + T_mid.deep_copy(280.0); + ps.deep_copy(101325.0); + + // Update timestamps + qv.get_header().get_tracking().update_time_stamp(t0); + T_mid.get_header().get_tracking().update_time_stamp(t0); + ps.get_header().get_tracking().update_time_stamp(t0); + + fm->add_field(qv); + fm->add_field(T_mid); + fm->add_field(ps); + + return fm; +} + +TEST_CASE("io_field_aliasing") { + SECTION("parse_field_alias") { + // Test normal case with alias + auto [alias1, field1] = parse_field_alias("LWP:=LiqWaterPath"); + REQUIRE(alias1 == "LWP"); + REQUIRE(field1 == "LiqWaterPath"); + + // Test with spaces + auto [alias2, field2] = parse_field_alias(" SWP := SolidWaterPath "); + REQUIRE(alias2 == "SWP"); + REQUIRE(field2 == "SolidWaterPath"); + + // Test no alias + auto [alias3, field3] = parse_field_alias("Temperature"); + REQUIRE(alias3 == "Temperature"); + REQUIRE(field3 == "Temperature"); + + // Test complex field names + auto [alias4, field4] = parse_field_alias("RH:=RelativeHumidity"); + REQUIRE(alias4 == "RH"); + REQUIRE(field4 == "RelativeHumidity"); + + // Test error cases + REQUIRE_THROWS(parse_field_alias("LWP:=")); + REQUIRE_THROWS(parse_field_alias(":=LiqWaterPath")); + REQUIRE_THROWS(parse_field_alias(" := ")); + } + + SECTION("process_field_aliases") { + std::vector field_specs = { + "T", // No alias + "LWP:=LiqWaterPath", // With alias + "SWP:=SolidWaterPath", // With alias + "qv", // No alias + "RH:=RelativeHumidity" // With alias + }; + + auto [alias_map, alias_names] = process_field_aliases(field_specs); + + REQUIRE(alias_map.size() == 5); + REQUIRE(alias_names.size() == 5); + + // Check mappings + REQUIRE(alias_map["T"] == "T"); + REQUIRE(alias_map["LWP"] == "LiqWaterPath"); + REQUIRE(alias_map["SWP"] == "SolidWaterPath"); + REQUIRE(alias_map["qv"] == "qv"); + REQUIRE(alias_map["RH"] == "RelativeHumidity"); + + // Check order preservation + REQUIRE(alias_names[0] == "T"); + REQUIRE(alias_names[1] == "LWP"); + REQUIRE(alias_names[2] == "SWP"); + REQUIRE(alias_names[3] == "qv"); + REQUIRE(alias_names[4] == "RH"); + } + + SECTION("duplicate_alias_detection") { + std::vector duplicate_specs = { + "LWP:=LiqWaterPath", + "LWP:=SolidWaterPath" // Duplicate alias + }; + REQUIRE_THROWS(process_field_aliases(duplicate_specs)); + } + + SECTION("mixed_aliases_and_regular_fields") { + std::vector mixed_specs = {"temperature", "LWP:=LiqWaterPath", "pressure", + "RH:=RelativeHumidity"}; + + auto [alias_map, alias_names] = process_field_aliases(mixed_specs); + + REQUIRE(alias_map.size() == 4); + REQUIRE(alias_map["temperature"] == "temperature"); + REQUIRE(alias_map["LWP"] == "LiqWaterPath"); + REQUIRE(alias_map["pressure"] == "pressure"); + REQUIRE(alias_map["RH"] == "RelativeHumidity"); + } + + SECTION("empty_field_list") { + std::vector empty_specs; + auto [alias_map, alias_names] = process_field_aliases(empty_specs); + + REQUIRE(alias_map.empty()); + REQUIRE(alias_names.empty()); + } +} + +// Separate test case with PIO initialization to avoid re-initialization issues +TEST_CASE("output_aliases_file_verification", "[io][alias]") { + using namespace ShortFieldTagsNames; + using namespace ekat::units; + + // Create a simple grid and field manager + ekat::Comm comm(MPI_COMM_WORLD); + + // Initialize the PIO subsystem for this test + scorpio::init_subsystem(comm); + + const int ncols = 10; + const int nlevs = 5; + + auto gm = create_mesh_free_grids_manager(comm, 0, 0, nlevs, ncols); + gm->build_grids(); + + auto grid = gm->get_grid("point_grid"); + util::TimeStamp t0({2023, 1, 1}, {0, 0, 0}); + auto fm = create_test_field_manager(grid, t0, ncols, nlevs); + + // Test mixed alias and non-alias field specifications with actual file writing + SECTION("mixed_aliases_file_output") { + ekat::ParameterList params; + params.set("filename_prefix", "io_alias_mixed"); + params.set("averaging_type", "INSTANT"); + + std::vector mixed_specs = { + "qv", // No alias - use original name + "TEMP:=T_mid", // With alias + "PSURF:=ps" // With alias + }; + params.set("field_names", mixed_specs); + + auto &ctrl_pl = params.sublist("output_control"); + ctrl_pl.set("frequency_units", "nsteps"); + ctrl_pl.set("frequency", 1); + ctrl_pl.set("save_grid_data", false); + + // Set max snapshots to ensure file gets written and closed + params.set("max_snapshots_per_file", 2); + params.set("floating_point_precision", "single"); + + OutputManager om; + om.initialize(comm, params, t0, false); + om.setup(fm, gm->get_grid_names()); + + // Run enough timesteps to fill the file and force it to be written + auto dt = 1; // 1 second timestep for nsteps frequency + auto t = t0; + const int nsteps = + 3; // Run 3 steps with frequency=1 to get 3 outputs, exceeding max_snapshots=2 + + for (int n = 0; n < nsteps; ++n) { + om.init_timestep(t, dt); + t += dt; + + // Update field values to make them different each timestep + // Get the fields from the field manager to ensure we're updating the right ones + auto qv_field = fm->get_field("qv"); + auto T_mid_field = fm->get_field("T_mid"); + auto ps_field = fm->get_field("ps"); + + qv_field.deep_copy(0.01 + n * 0.001); + T_mid_field.deep_copy(280.0 + n * 1.0); + ps_field.deep_copy(101325.0 + n * 100.0); + + // Update field timestamps + qv_field.get_header().get_tracking().update_time_stamp(t); + T_mid_field.get_header().get_tracking().update_time_stamp(t); + ps_field.get_header().get_tracking().update_time_stamp(t); + + om.run(t); + } + + // Check that the file was closed since we exceeded max_snapshots + const auto &file_specs = om.output_file_specs(); + REQUIRE(not file_specs.is_open); + + om.finalize(); + + // Verify the NetCDF file was created with the expected filename + std::string expected_filename = "io_alias_mixed.INSTANT.nsteps_x1.np" + + std::to_string(comm.size()) + "." + t0.to_string() + ".nc"; + + // First check if the file actually exists + std::ifstream file_check(expected_filename); + if (file_check.good()) { + file_check.close(); + } else { + REQUIRE(false); // Fail the test if file doesn't exist + } + + // Now verify the file was created and contains the correct aliased field names + // Create a new field manager with fields using aliased names for reading + auto read_fm = std::make_shared(grid); + + // Create fields with aliased names for reading back + FieldIdentifier qv_read_id("qv", {{COL, LEV}, {ncols, nlevs}}, kg / kg, grid->name()); + FieldIdentifier temp_read_id("TEMP", {{COL, LEV}, {ncols, nlevs}}, K, grid->name()); + FieldIdentifier psurf_read_id("PSURF", {{COL}, {ncols}}, Pa, grid->name()); + + Field qv_read(qv_read_id); + Field temp_read(temp_read_id); + Field psurf_read(psurf_read_id); + + qv_read.allocate_view(); + temp_read.allocate_view(); + psurf_read.allocate_view(); + + qv_read.get_header().get_tracking().update_time_stamp(t0); + temp_read.get_header().get_tracking().update_time_stamp(t0); + psurf_read.get_header().get_tracking().update_time_stamp(t0); + + read_fm->add_field(qv_read); + read_fm->add_field(temp_read); + read_fm->add_field(psurf_read); + + // Set up reader parameter list + ekat::ParameterList reader_pl; + reader_pl.set("filename", expected_filename); + + // Use the aliased names in the field list for reading + std::vector aliased_names = {"qv", "TEMP", "PSURF"}; + reader_pl.set("field_names", aliased_names); + + // Try to read the file - this will fail if the aliases weren't written correctly + REQUIRE_NOTHROW([&]() { + AtmosphereInput reader(reader_pl, read_fm); + reader.read_variables(0); // Read first timestep + + // Verify that we can access the fields we just read + auto qv_field_read = read_fm->get_field("qv"); + auto temp_field_read = read_fm->get_field("TEMP"); + auto psurf_field_read = read_fm->get_field("PSURF"); + + // Check that fields have reasonable data + REQUIRE(qv_field_read.get_header().get_alloc_properties().get_num_scalars() > 0); + REQUIRE(temp_field_read.get_header().get_alloc_properties().get_num_scalars() > 0); + REQUIRE(psurf_field_read.get_header().get_alloc_properties().get_num_scalars() > 0); + + // The fact that we reached here means: + // 1. The NetCDF file was created successfully + // 2. The file contains variables with the aliased names (qv, TEMP, PSURF) + // 3. The aliases were properly applied during output + // 4. We can successfully read back the data using the aliased names + }()); + + // Clean up PIO subsystem + scorpio::finalize_subsystem(); + } +} + +} // namespace scream