diff --git a/.cspell/ok-unknown-words.txt b/.cspell/ok-unknown-words.txt index d2ede678a..a565c6ce9 100644 --- a/.cspell/ok-unknown-words.txt +++ b/.cspell/ok-unknown-words.txt @@ -551,5 +551,11 @@ zfactor zonavg zsurf zxvf +donwell +hartfield +longbourn +pemberley +openedfile +lonxlat njobs -RDHPCS +RDHPCS \ No newline at end of file diff --git a/fre/app/freapp.py b/fre/app/freapp.py index 1c2bfd4f2..3f5802393 100644 --- a/fre/app/freapp.py +++ b/fre/app/freapp.py @@ -65,6 +65,10 @@ def remap(input_dir, output_dir, begin_date, current_chunk, ts_workaround, ens_mem) @app_cli.command() +@click.option("--yamlfile", + type = str, + help = "Path to yaml configuration file", + required = True) @click.option("-i", "--input_dir", type = str, help = "`inputDir` / `input_dir` (env var) specifies input directory to regrid, " + \ @@ -75,14 +79,9 @@ def remap(input_dir, output_dir, begin_date, current_chunk, help = "`outputDir` / `output_dir` (env var) specifies target location for output" + \ " regridded files", required = True) -@click.option("-b", "--begin", - type = str, - help = "`begin` / `begin` (env var) ISO8601 datetime format specification for" + \ - " starting date of data, part of input target file name", - required = True) -@click.option("-tmp", "--tmp_dir", +@click.option("-w", "--work_dir", type = str, - help = "`TMPDIR` / `tmp_dir` (env var) temp directory for location of file " + \ + help = "`TMPDIR` / `workdir_dir` (env var) work directory for location of file " + \ "read/writes", required = True) @click.option("-rd", "--remap_dir", @@ -96,19 +95,16 @@ def remap(input_dir, output_dir, begin_date, current_chunk, "within input directory to target for regridding. the value for `source` " + \ "must be present in at least one component's configuration fields", required = True) -@click.option("-g", "--grid_spec", +@click.option("-id", "--input_date", type = str, - help = "`gridSpec` / `grid_spec` (env var) file containing mosaic for regridding", - required = True) -@click.option("--rose_config", - type = str, - help = "Path to Rose app configuration (to be removed soon)", - required = True) -def regrid( input_dir, output_dir, begin, tmp_dir, - remap_dir, source, grid_spec, rose_config ): + help = "`input_date` / `input_date` (env var) ISO8601 datetime format specification for" + \ + " starting date of data, part of input target file name") +def regrid(yamlfile, input_dir, output_dir, work_dir, + remap_dir, source, input_date): ''' regrid target netcdf file ''' - regrid_xy( input_dir, output_dir, begin, tmp_dir, - remap_dir, source, grid_spec, rose_config ) + regrid_xy(yamlfile, input_dir, output_dir, work_dir, + remap_dir, source, input_date) + @app_cli.command() @click.option("-i", "--infile", diff --git a/fre/app/regrid_xy/regrid_xy.py b/fre/app/regrid_xy/regrid_xy.py index b5654e27b..acc269567 100644 --- a/fre/app/regrid_xy/regrid_xy.py +++ b/fre/app/regrid_xy/regrid_xy.py @@ -1,11 +1,13 @@ import logging +import os from pathlib import Path -import shutil import subprocess import tarfile import xarray as xr import yaml +from fre.app import helpers + fre_logger = logging.getLogger(__name__) # list of variables/fields that will not be regridded @@ -50,28 +52,32 @@ def get_grid_spec(datadict: dict) -> str: Gets the grid_spec.nc file from the tar file specified in yaml["postprocess"]["settings"]["pp_grid_spec"] - :datadict: dictionary containing relevant regrid parameters + :param datadict: dictionary containing relevant regrid parameters :type datadict: dict - :raises IOError: Error if grid_spec.nc file cannot be found in the tar file + :raises IOError: Error if grid_spec.nc file cannot be found in the + current directory :return: grid_spec filename :rtype: str .. note:: All grid_spec files are expected to be named "grid_spec.nc". - The grid_spec file is required in order to obtain the + The grid_spec file is required in order to determine the input mosaic filename """ + #grid spec filename grid_spec = "grid_spec.nc" + #get tar file containing the grid_spec file pp_grid_spec_tar = datadict["yaml"]["postprocess"]["settings"]["pp_grid_spec"] - # untar grid_spec tar file + #untar grid_spec tar file into the current work directory if tarfile.is_tarfile(pp_grid_spec_tar): with tarfile.open(pp_grid_spec_tar, "r") as tar: tar.extractall() + #error if grid_spec file is not found after extracting from tar file if not Path(grid_spec).exists(): raise IOError(f"Cannot find {grid_spec} in tar file {pp_grid_spec_tar}") @@ -82,69 +88,67 @@ def get_input_mosaic(datadict: dict) -> str: """ Gets the input mosaic filename from the grid_spec file. - If the input mosaic file is not in input_dir, this function will copy the - input mosaic file to input_dir. - :datadict: dictionary containing relevant regrid parameters + :param datadict: dictionary containing relevant regrid parameters :type datadict: dict - :raises IOError: Error if the input mosaic file cannot be found in the - current or input directory + :raises IOError: Error if the input mosaic file cannot be found in the + current work directory - :return: input_mosaic file path as a Path object - :rtype: Path + :return: input_mosaic file + :rtype: str .. note:: The input mosaic filename is a required input argument for fregrid. The input mosaic contains the input grid information. """ - input_dir = Path(datadict["input_dir"]) grid_spec = datadict["grid_spec"] - match datadict["component"]["inputRealm"]: + #gridspec variable name holding the mosaic filename information + match datadict["inputRealm"]: case "atmos": mosaic_key = "atm_mosaic_file" case "ocean": mosaic_key = "ocn_mosaic_file" case "land": mosaic_key = "lnd_mosaic_file" + #get mosaic filename with xr.open_dataset(grid_spec) as dataset: - mosaic_file = Path(str(dataset[mosaic_key].data.astype(str))) + mosaic_file = str(dataset[mosaic_key].data.astype(str)) - if not (input_dir/mosaic_file).exists(): - if mosaic_file.exists(): - shutil.copy(mosaic_file, input_dir/mosaic_file) - fre_logger.warning(f"Copying {mosaic_file} to input directory {input_dir}") - else: - raise IOError((f"Cannot find input mosaic file {mosaic_file} " - "in current or input directory {input_dir}")) + #check if the mosaic file exists in the current directory + if not Path(mosaic_file).exists(): + raise IOError(f"Cannot find mosaic file {mosaic_file} in current work directory {work_dir}") - return str(input_dir/mosaic_file) + return mosaic_file -def get_input_file_argument(datadict: dict, history_file: str) -> str: +def get_input_file(datadict: dict, source: str) -> str: """ Formats the input file name where the input file contains the variable data that will be regridded. - :datadict: dictionary containing relevant regrid parameters + :param datadict: dictionary containing relevant regrid parameters :type datadict:dict - :history_file: history file type - :type history_file: str + :param source: history file type + :type source: str :return: formatted input file name :rtype: str - .. note:: The input filenames are required arguments for fregrid and refer to the history files containing the - data that will be regridded. A time series of history files exist for regridding:.e.g., - 20250805.atmos_daily_cmip.tile1.nc, 20250805.atmos_daily_cmip.tile2.nc, ..., 20250805.atmos_daily_cmip.tile6.nc, - The yaml configuration does not contain the exact history filenames and the filenames need to be constructed by - (1) extracting the history file "type" from the yaml configuration. This type corresponds to the field value of - yaml["postprocess"]["components"]["sources"]["history_file"] and for example, be "atmos_daily_cmip" - (2) prepending YYYYMMDD to the filename. This function will prepend the date if the date string was passed to the - entrypoint function regrid_xy of this module: i.e., this function will return "20250805.atmos_daily_cmip" - (3) Fregrid will append the tile numbers ("tile1.nc") for reading in the data + .. note:: The input filename is a required argument for fregrid and refer to the history files containing + the data that will be regridded. A history file is typically named, for example, as + 20250805.atmos_daily_cmip.tile1.nc, 20250805.atmos_daily_cmip.tile2.nc, ..., + The yaml configuration does not contain the exact history filenames and the filenames need to be + constructed by: + (1) extracting the history file "type" from the yaml configuration. This type corresponds + to the field value of yaml["postprocess"]["components"]["sources"]["source"] and, for example, + be "atmos_daily_cmip" + (2) prepending YYYYMMDD to the filename. This function will prepend the date if the date + string was passed to the entrypoint function regrid_xy of this module: i.e., this function + will return "20250805.atmos_daily_cmip" + (3) Fregrid will append the tile numbers ("tile1.nc") for reading in the data """ input_date = datadict["input_date"] - return history_file if input_date is None else f"{input_date}.{history_file}" + return source if input_date is None else f"{input_date}.{source}" def get_remap_file(datadict: dict) -> str: @@ -155,10 +159,9 @@ def get_remap_file(datadict: dict) -> str: C96_mosaicX180x288_conserve_order1.nc where the input mosaic filename is C96_mosaic.nc and the output grid size has 180 longitude cells and 288 latitude cells. - This function will also copy the remap file to the input directory if the remap file had - been generated and saved in the output directory from remapping previous components + The remap_file will be read from, or outputted to the remap_dir. - :datadict: dictionary containing relevant regrid parameters + :param datadict: dictionary containing relevant regrid parameters :type datadict: dict :return: remap filename @@ -168,28 +171,26 @@ def get_remap_file(datadict: dict) -> str: fregrid will read in the remapping parameters (the exchange grid for conservative methods) from the remap_file for regridding the variables. If the remap_file does not exist, fregrid will compute the remapping parameters and save them to the remap_file + for future use. """ - input_dir = Path(datadict["input_dir"]) input_mosaic = Path(datadict["input_mosaic"]) + remap_dir = Path(datadict["remap_dir"]) nlon = datadict["output_nlon"] nlat = datadict["output_nlat"] interp_method = datadict["interp_method"] - remap_file = Path(f"{input_mosaic.stem}X{nlon}by{nlat}_{interp_method}.nc") + #define remap filename + remap_file = remap_dir/Path(f"{input_mosaic.stem}X{nlon}by{nlat}_{interp_method}.nc") - if not (input_dir/remap_file).exists(): - if (remap_file).exists(): - shutil.copy(remap_file, input_dir/remap_file) - fre_logger.info(f"Remap file {remap_file} copied to input directory {input_dir}") - else: - fre_logger.warning( - f"Cannot find specified remap_file {remap_file}\n" - "Remap file {remap_file} will be generated and saved to the input directory" - f"{input_dir}" - ) + #check if remap file exists in remap_dir + if not remap_file.exists(): + fre_logger.warning( + f"Cannot find remap_file {remap_file}\n" \ + f"Remap file {remap_file} will be generated and saved to directory {remap_dir}" + ) - return str(input_dir/remap_file) + return str(remap_file) def get_scalar_fields(datadict: dict) -> tuple[str, bool]: @@ -199,10 +200,10 @@ def get_scalar_fields(datadict: dict) -> tuple[str, bool]: Scalar_fields is a string of comma separated list of variables that will be regridded - :datadict: dictionary containing relevant regrid parameters + :param datadict: dictionary containing relevant regrid parameters :type datadict: dict - :return: tuple of a string of scalar fields and a boolean indicating whether regridding is needed + :return: (string of scalar fields, boolean indicating whether regridding is needed) :rtype: tuple[str, bool] .. note:: With the exception of the variables in the list @@ -214,8 +215,9 @@ def get_scalar_fields(datadict: dict) -> tuple[str, bool]: mosaic_file = datadict["input_mosaic"] input_file = datadict["input_file"] + #add the proper suffix to the input filename with xr.open_dataset(mosaic_file) as dataset: - input_file += ".tile1.nc" if dataset.sizes["ntiles"] > 1 else ".nc" + input_file += ".tile1.nc" if dataset.sizes["ntiles"] > 1 else ".nc" # xarray gives an error if variables in non_regriddable_variables do not exist in the dataset # The errors="ignore" overrides the error @@ -235,11 +237,12 @@ def write_summary(datadict): Logs a summary of the component that will be regridded in a human-readable format This function will log only if the logging level is set to INFO or lower - :datadict: dictionary containing relevant regrid parameters + :param datadict: dictionary containing relevant regrid parameters :type datadict: dict """ fre_logger.info("COMPONENT SUMMARY") + fre_logger.info(f"FREGRID work_directory: {datadict['work_dir']}") fre_logger.info(f"FREGRID input directory: {datadict['input_dir']}") fre_logger.info(f"FREGRID output_directory: {datadict['output_dir']}") fre_logger.info(f"FREGRID input mosaic file: {datadict['input_mosaic']}") @@ -252,74 +255,103 @@ def write_summary(datadict): fre_logger.info(f"FREGRID scalar_fields: {datadict['scalar_field']}") -def regrid_xy( - yamlfile: str, - input_dir: str, - output_dir: str, - components: list[str] = None, - input_date: str = None, +def regrid_xy(yamlfile: str, + input_dir: str, + output_dir: str, + work_dir: str, + remap_dir: str, + source: str, + input_date: str = None, ): """ - Submits a fregrid job for each regriddable component in the model yaml file. - - :yamlfile: yaml file containing specifications for yaml["postprocess"]["settings"]["pp_grid_spec"] - and yaml["postprocess"]["components"] - - :Input_dir: Name of the input directory containing the input mosaic file, remap file, - and input/history files. Fregrid will look for all input files in input_dir. - :Output_dir: Name of the output directory where fregrid outputs will be saved - :Components: List of component 'types' to regrid, e.g., components = ['aerosol', 'atmos_diurnal, 'land'] - If components is not specified, all components in the yaml file with postprocess_on = true - will be remapped - :Input_date: Datestring in the format of YYYYMMDD that corresponds to the date prefix of the history files, - e.g., input_date=20250730 where the history filename is 20250730.atmos_month_aer.tile1.nc + Calls fregrid to regrid data in the specified source data file. + + :param yamlfile: yaml file containing specifications for yaml["postprocess"]["settings"]["pp_grid_spec"] + and yaml["postprocess"]["components"] + :type yamlfile: str + :param input_dir: Name of the input directory containing the input/history files, + Fregrid will look for all input history files in input_dir. + :type input_dir: str + :param output_dir: Name of the output directory where fregrid outputs will be saved + :type output_dir: str + :param work_dir: Directory that will contain the extracted files from the grid_spec tar + :type work_dir: str + :param remap_dir: Directory that will contain the generated remap file + :type remap_dir: str + :param source: The stem of the history file to regrid + :type source:str + :param input_date: Datestring where the first 8 characters correspond to YYYYMMDD + Input_date[:8] represents the date prefix in the history files, + e.g., input_date=20250730T0000Z where the history filename is + 20250730.atmos_month_aer.tile1.nc + :type input_date: str + + .. note: All directories should be in absolute paths """ - datadict = {} - - # load yamlfile to yamldict - with open(yamlfile, "r") as openedfile: - yamldict = yaml.safe_load(openedfile) - - # save arguments to datadict - datadict["yaml"] = yamldict - datadict["grid_spec"] = get_grid_spec(datadict) - datadict["input_dir"] = input_dir - datadict["output_dir"] = output_dir - datadict["input_date"] = input_date - - # get list of components to regrid - components_list = yamldict["postprocess"]["components"] - if components is not None: - for component in components_list: - if component["type"] not in components: - components_list.remove(component) - - # submit fregrid job for each component - for component in components_list: - - if not component["postprocess_on"]: - fre_logger.warning(f"skipping component {component['type']}") - continue - - datadict["component"] = component - datadict["input_mosaic"] = get_input_mosaic(datadict) - datadict["output_nlat"], datadict["output_nlon"] = component["xyInterp"].split(",") - datadict["interp_method"] = component["interpMethod"] - - # iterate over each history file in the component - for history_dict in component["sources"]: - - datadict["input_file"] = get_input_file_argument(datadict, history_dict["history_file"]) - datadict["scalar_field"], regrid = get_scalar_fields(datadict) - - if not regrid: continue - + #check if input_dir exists + if not Path(input_dir).exists(): + raise RuntimeError(f"Input directory {input_dir} containing the input data files does not exist") + + #check if output_dir exists + if not Path(output_dir).exists(): + raise RuntimeError(f"Output directory {output_dir} where regridded data" \ + "will be outputted does not exist") + + #check if work_dir exists + if not Path(work_dir).exists(): + raise RuntimeError(f"Specified working directory {work_dir} does not exist") + + #work in working directory + with helpers.change_directory(work_dir): + + #initialize datadict + datadict = {} + + # load yamlfile to yamldict + with open(yamlfile, "r") as openedfile: + yamldict = yaml.safe_load(openedfile) + + # save arguments to datadict + datadict["yaml"] = yamldict + datadict["grid_spec"] = get_grid_spec(datadict) + datadict["input_dir"] = input_dir + datadict["output_dir"] = output_dir + datadict["work_dir"] = work_dir + datadict["remap_dir"] = remap_dir + datadict["input_date"] = input_date[:8] + + components = [] + for component in yamldict["postprocess"]["components"]: + for this_source in component["sources"]: + if this_source["history_file"] == source: + components.append(component) + + # submit fregrid job for each component + for component in components: + + # skip component if postprocess_on = False + if not component["postprocess_on"]: + fre_logger.warning(f"postprocess_on=False for {source} in component {component['type']}." \ + "Skipping {source}") + continue + + datadict["inputRealm"] = component["inputRealm"] + datadict["input_mosaic"] = get_input_mosaic(datadict) + datadict["output_nlat"], datadict["output_nlon"] = component["xyInterp"].split(",") + datadict["interp_method"] = component["interpMethod"] datadict["remap_file"] = get_remap_file(datadict) + datadict["input_file"] = get_input_file(datadict, source) + datadict["scalar_field"], regrid = get_scalar_fields(datadict) - write_summary(datadict) + # skip if there are no variables to regrid + if regrid: + write_summary(datadict) + else: + continue + #construct fregrid command fregrid_command = [ "fregrid", "--debug", @@ -335,8 +367,10 @@ def regrid_xy( "--output_dir", output_dir, ] + #execute fregrid command fregrid_job = subprocess.run(fregrid_command, capture_output=True, text=True) + #print job useful information if fregrid_job.returncode == 0: fre_logger.info(fregrid_job.stdout.split("\n")[-3:]) else: diff --git a/fre/app/regrid_xy/tests/generate_files.py b/fre/app/regrid_xy/tests/generate_files.py index 633849db5..a39de7b12 100644 --- a/fre/app/regrid_xy/tests/generate_files.py +++ b/fre/app/regrid_xy/tests/generate_files.py @@ -1,31 +1,33 @@ import numpy as np from pathlib import Path import shutil +import tarfile import yaml import xarray as xr -N = 20 -Np = N + 1 +nxy = 20 +nxyp = nxy + 1 ntiles = 6 -n_components = 3 -skip_component = -99 date = "20250729" yamlfile = "test_yaml.yaml" -grid_spec = "grid_spec.nc" -input_grid = f"C{N}" +grid_spec_tar = "grid_spec.tar" +input_grid = f"C{nxy}" input_dir = "test_inputs" -inputRealm = "atmos" input_mosaic = f"{input_grid}_mosaic.nc" -input_files = ["atmos_daily_cmip", "atmos_diurnal"] +components: dict = None +tar_list: list = None def cleanup(): if Path(yamlfile).exists(): Path(yamlfile).unlink() - if Path(grid_spec).exists(): - Path(grid_spec).unlink() + if Path("grid_spec.nc").exists(): + Path("grid_spec.nc").unlink() + + if Path(grid_spec_tar).exists(): + Path(grid_spec_tar).unlink() if Path(input_mosaic).exists(): Path(input_mosaic).unlink() @@ -33,63 +35,54 @@ def cleanup(): if Path(input_dir).exists(): shutil.rmtree(input_dir) + for i in range(1, ntiles+1): + gridfile = Path(f"{input_grid}.tile{i}.nc") + if gridfile.exists(): gridfile.unlink() + -def set_test(N_in: int = None, +def set_test(components_in: dict, + nxy_in: int = None, ntiles_in: int = None, date_in: str = None, yamlfile_in: str = None, - grid_spec_in: str = None, - n_components_in: int = None, - skip_component_in: int = None, - inputRealm_in: str = None, + grid_spec_tar_in: str = None, input_mosaic_in: str = None, input_grid_in: str = None, - input_dir_in: str = None, - input_files_in: list[str] = None): - - global Np, N, ntiles, grid_spec, input_grid, input_files - global n_components, date, input_mosaic, source_gridtype - global skip_component, input_dir, yamlfile - - if N_in is not None: N, Np = N_in, N_in+1 + input_dir_in: str = None): + + global components + global nxyp, nxy, ntiles, grid_spec_tar, input_grid + global date, input_mosaic + global input_dir, yamlfile + global tar_list + + components = components_in + if nxy_in is not None: + nxy = nxy_in + nxyp = nxy_in+1 + input_grid = f"C{nxy}" if ntiles_in is not None: ntiles = ntiles_in if date_in is not None: date = date_in - if n_components_in is not None: n_components = n_components_in - if skip_component_in is not None: skip_component = skip_component_in if yamlfile_in is not None: yamlfile = yamlfile_in - if grid_spec_in is not None: grid_spec = grid_spec_in + if grid_spec_tar_in is not None: grid_spec_tar = grid_spec_tar_in if input_grid_in is not None: input_grid = input_grid_in if input_mosaic_in is not None: input_mosaic = input_mosaic_in - if inputRealm_in is not None: inputRealm = inputRealm_in - if input_files_in is not None: input_files = input_files if input_dir_in is not None: input_dir = input_dir_in + tar_list = [] def make_yaml(): ppyaml = {} - ppyaml["name"] = "regrid_xy_test" + ppyaml["name"] = yamlfile directories = ppyaml["directories"] = {} directories["history_dir"] = "./" directories["pp_dir"] = "./" postprocess = ppyaml["postprocess"] = {} - - postprocess["settings"] = {"pp_grid_spec": grid_spec} - - components = postprocess["components"] = [] - for i in range(1,n_components+1): - - component = {"xyInterp": f"{N},{N}", - "interpMethod": "conserve_order2", - "inputRealm": f"{inputRealm}", - "type": f"faketype{i}", - "sources": [{"history_file": f"{ifile}{i}"} for ifile in input_files], - "postprocess_on": True} - - if i == skip_component: component["postprocess_on"] = False - components.append(component) + postprocess["settings"] = {"pp_grid_spec": grid_spec_tar} + postprocess["components"] = components with open(yamlfile, "w") as openedfile: yaml.dump(ppyaml, openedfile, sort_keys=False) @@ -99,7 +92,9 @@ def make_grid_spec(): xr.Dataset(data_vars={"atm_mosaic_file": f"{input_mosaic}".encode(), "lnd_mosaic_file": f"{input_mosaic}".encode(), "ocn_mosaic_file": "ocean_mosaic.nc".encode()} - ).to_netcdf(grid_spec) + ).to_netcdf("grid_spec.nc") + + tar_list.append("grid_spec.nc") def make_mosaic(): @@ -115,13 +110,15 @@ def make_mosaic(): gridtiles = xr.DataArray(gridtiles, dims=["ntiles"]).astype("|S255") ) - xr.Dataset(data_vars=data).to_netcdf(f"{input_dir}/{input_mosaic}") + xr.Dataset(data_vars=data).to_netcdf(f"{input_mosaic}") + + tar_list.append(f"{input_mosaic}") def make_grid(): - xy = np.arange(0, Np, 1, dtype=np.float64) - area = np.ones((N, N), dtype=np.float64) + xy = np.arange(0, nxyp, 1, dtype=np.float64) + area = np.ones((nxy, nxy), dtype=np.float64) x, y = np.meshgrid(xy, xy) @@ -132,24 +129,29 @@ def make_grid(): for i in range(1, ntiles+1): data["tile"] = xr.DataArray(f"tile{i}".encode()).astype("|S255") - xr.Dataset(data).to_netcdf(f"{input_dir}/{input_grid}.tile{i}.nc") + xr.Dataset(data).to_netcdf(f"{input_grid}.tile{i}.nc") + + tar_list.append(f"{input_grid}.tile{i}.nc") def make_data(): data = {} - data["mister"] = xr.DataArray(np.full((N,N), 1.0, dtype=np.float64), dims=["ny", "nx"]) - data["darcy"] = xr.DataArray(np.full((N,N), 2.0, dtype=np.float64), dims=["ny", "nx"]) - data["wet_c"] = xr.DataArray(np.full((N,N), 5.0, dtype=np.float64), dims=["ny", "nx"]) + data["mister"] = xr.DataArray(np.full((nxy,nxy), 1.0, dtype=np.float64), dims=["ny", "nx"]) + data["darcy"] = xr.DataArray(np.full((nxy,nxy), 2.0, dtype=np.float64), dims=["ny", "nx"]) + data["wins"] = xr.DataArray(np.full((nxy,nxy), 3.0, dtype=np.float64), dims=["ny", "nx"]) + data["wet_c"] = xr.DataArray(np.full((nxy,nxy), 5.0, dtype=np.float64), dims=["ny", "nx"]) - coords = {"nx": np.arange(1,Np, dtype=np.float64), - "ny": np.arange(1,Np, dtype=np.float64)} + coords = {"nx": np.arange(1,nxyp, dtype=np.float64), + "ny": np.arange(1,nxyp, dtype=np.float64)} dataset = xr.Dataset(data_vars=data, coords=coords) - for ifile in input_files: - for icomponent in range(1,n_components+1): - for i in range(1, ntiles+1): dataset.to_netcdf(f"{input_dir}/{date}.{ifile}{icomponent}.tile{i}.nc") + for component in components: + for source in component["sources"]: + history_file = source["history_file"] + for i in range(1, ntiles+1): + dataset.to_netcdf(f"{input_dir}/{date}.{history_file}.tile{i}.nc") def make_all(): @@ -158,3 +160,9 @@ def make_all(): make_mosaic() make_grid() make_data() + + with tarfile.open(grid_spec_tar, "w") as tar: + for ifile in tar_list: tar.add(ifile) + + for ifile in tar_list: + Path(ifile).unlink() diff --git a/fre/app/regrid_xy/tests/test_regrid_xy.py b/fre/app/regrid_xy/tests/test_regrid_xy.py index 60c4834a2..397b26926 100644 --- a/fre/app/regrid_xy/tests/test_regrid_xy.py +++ b/fre/app/regrid_xy/tests/test_regrid_xy.py @@ -1,4 +1,5 @@ import numpy as np +import os from pathlib import Path import shutil import xarray as xr @@ -7,100 +8,149 @@ import fre.app.regrid_xy.tests.generate_files as generate_files -def test_regrid_xy(): - - """ - Tests the main function regrid_xy and ensures - data is regridded correctly - """ - - date = "20250729" - n_components = 5 - skip_component = 3 - input_files = ["atmos_daily_cmip", "atmos_diurnal"] - yamlfile = "test_yaml.yaml" - input_dir = Path("test_inputs") - output_dir = Path("test_outputs") +nxy = 20 +date = "20250729" + +curr_dir = os.getcwd() +yamlfile = Path(curr_dir)/"test_yaml.yaml" +grid_spec_tar = Path(curr_dir)/"grid_spec.tar" +input_dir = Path(curr_dir)/"test_inputs" +output_dir = Path(curr_dir)/"test_outputs" +remap_dir= Path(curr_dir)/"test_remap" +work_dir = Path(curr_dir)/"test_work" + +components = [] +pp_input_files = [{"history_file":"pemberley"}, {"history_file":"longbourn"}] +components.append({"xyInterp": f"{nxy},{nxy}", + "interpMethod": "conserve_order2", + "inputRealm": "atmos", + "type": f"pride_and_prejudice", + "sources": pp_input_files, + "postprocess_on": True} +) +emma_input_files = [{"history_file":"hartfield"}, {"history_file":"donwell_abbey"}] +components.append({"xyInterp": f"{nxy},{nxy}", + "interpMethod": "conserve_order2", + "inputRealm": "atmos", + "type": f"emma", + "sources": emma_input_files, + "postprocess_on": True} +) +here_input_files = [{"history_file":"gfdl"}, {"history_file":"princeton"}] +components.append({"xyInterp": f"{nxy},{nxy}", + "interpMethod": "conserve_order2", + "inputRealm": "atmos", + "type": "here", + "sources": here_input_files, + "postprocess_on": False} +) + + +def setup_test(): input_dir.mkdir(exist_ok=True) output_dir.mkdir(exist_ok=True) + remap_dir.mkdir(exist_ok=True) + work_dir.mkdir(exist_ok=True) #generate test files - generate_files.set_test(date_in=date, - yamlfile_in=yamlfile, - input_files_in=input_files, - n_components_in=n_components, - input_dir_in=input_dir, - skip_component_in=skip_component) - + generate_files.set_test(components_in=components, + date_in=date, + grid_spec_tar_in=str(grid_spec_tar), + yamlfile_in=str(yamlfile), + input_dir_in=str(input_dir)) generate_files.make_all() - regrid_xy.regrid_xy(yamlfile=yamlfile, - input_dir=input_dir.name, - output_dir=output_dir.name, - input_date=date) - - #check answers, for the third component, postprocess_on = False - checkfiles = [output_dir/f"{date}.{ifile}{i}.nc" for ifile in input_files - for i in range(1,n_components+1) if i!=skip_component] - for outfile in checkfiles: - - checkme = xr.load_dataset(outfile) - - assert "wet_c" not in checkme - assert "mister" in checkme - assert "darcy" in checkme - - assert np.all(checkme["mister"].values==np.float64(1.0)) - assert np.all(checkme["darcy"].values==np.float64(2.0)) - #third component should not have been regridded - for ifile in input_files: - assert not (output_dir/f"{date}.{ifile}{skip_component}.nc").exists() +def cleanup_test(): - shutil.rmtree(output_dir) + #remove test directories + if output_dir.exists(): shutil.rmtree(output_dir) + if remap_dir.exists(): shutil.rmtree(remap_dir) + if work_dir.exists(): shutil.rmtree(work_dir) generate_files.cleanup() + +def test_regrid_xy(): + + """ + Tests the main function regrid_xy and ensures + data is regridded correctly + """ + setup_test() + + #modify generate_files to change sources + for source_dict in pp_input_files + emma_input_files + here_input_files: + source = source_dict["history_file"] + regrid_xy.regrid_xy(yamlfile=str(yamlfile), + input_dir=str(input_dir), + output_dir=str(output_dir), + work_dir=str(work_dir), + remap_dir=str(remap_dir), + source=source, + input_date=date+"TTTT") + + #check answers + for source_dict in pp_input_files + emma_input_files: + outfile = output_dir/f"{date}.{source_dict['history_file']}.nc" + + test = xr.load_dataset(outfile) + + assert "wet_c" not in test + assert "mister" in test + assert "darcy" in test + assert "wins" in test + + assert np.all(test["mister"].values==np.float64(1.0)) + assert np.all(test["darcy"].values==np.float64(2.0)) + assert np.all(test["wins"].values==np.float64(3.0)) + + #check answers, these shouldn't have been regridded + for source_dict in here_input_files: + ifile = source_dict["history_file"] + assert not (output_dir/f"{date}.{ifile}.nc").exists() + + #check remap_file exists and is not empty + remap_file = remap_dir/f"C{nxy}_mosaicX{nxy}by{nxy}_conserve_order2.nc" + assert remap_file.exists() + + cleanup_test() + + def test_get_input_mosaic(): """ Tests get_input_mosaic correctly copies the mosaic file to the input directory """ - input_dir = Path("input_dir") grid_spec = Path("grid_spec.nc") mosaic_file = Path("ocean_mosaic.nc") generate_files.make_grid_spec() mosaic_file.touch() - input_dir.mkdir(exist_ok=True) - datadict=dict(input_dir=input_dir, grid_spec=grid_spec, component={"inputRealm":"ocean"}) + datadict=dict(grid_spec=grid_spec, inputRealm="ocean") - #copy mosaic_file to input_dir and return mosaic_file/input_dir - check = regrid_xy.get_input_mosaic(datadict) - assert check == str(input_dir/mosaic_file) - assert Path(check).exists() + assert regrid_xy.get_input_mosaic(datadict) == str(mosaic_file) mosaic_file.unlink() #clean up grid_spec.unlink() #clean up - shutil.rmtree(input_dir) #clean up -def test_get_input_file_argument(): +def test_get_input_file(): """ Tests get_input_file """ input_date = "20250807" - history_file = "pemberley" + source = "pemberley" datadict = {"input_date": input_date} - assert regrid_xy.get_input_file_argument(datadict, history_file) == input_date + "." + history_file + assert regrid_xy.get_input_file(datadict, source) == input_date + "." + source datadict["input_date"] = None - assert regrid_xy.get_input_file_argument(datadict, history_file) == history_file + assert regrid_xy.get_input_file(datadict, source) == source def test_get_remap_file(): @@ -109,29 +159,26 @@ def test_get_remap_file(): Tests get_remap_file """ - input_dir = Path("input_dir") + remap_dir = Path("remap_dir") input_mosaic = "C20_mosaic" nlon = 40 nlat = 10 interp_method = "conserve_order1" - datadict = {"input_dir": input_dir.name, + datadict = {"remap_dir": remap_dir.name, "input_mosaic": input_mosaic+".nc", "output_nlon": nlon, "output_nlat": nlat, "interp_method": interp_method} - input_dir.mkdir(exist_ok=True) - #check remap file from current directory is copied to input directory - remap_file = Path(f"{input_mosaic}X{nlon}by{nlat}_{interp_method}.nc") - remap_file.touch() + remap_file = Path(f"remap_dir/{input_mosaic}X{nlon}by{nlat}_{interp_method}.nc") - check = regrid_xy.get_remap_file(datadict) + regrid_xy.get_remap_file(datadict) == str(remap_dir/remap_file) - assert check == str(input_dir/remap_file) - assert Path(check).exists() + remap_dir.mkdir(exist_ok=True) + remap_file.touch() + regrid_xy.get_remap_file(datadict) == str(remap_dir/remap_file) Path(remap_file).unlink() - shutil.rmtree(input_dir) - + shutil.rmtree(remap_dir) diff --git a/fre/tests/test_fre_app_cli.py b/fre/tests/test_fre_app_cli.py index e1d2350c7..7dadfad77 100644 --- a/fre/tests/test_fre_app_cli.py +++ b/fre/tests/test_fre_app_cli.py @@ -78,55 +78,29 @@ def test_cli_fre_app_regrid_opt_dne(capfd): assert result.exit_code == 2 _out, _err = capfd.readouterr() -@pytest.mark.skip(reason="needs rework") def test_cli_fre_app_regrid_test_case_1(capfd): - """ fre cmor run --help """ - - import fre.app.regrid_xy.tests.test_regrid_xy as t_rgxy - assert t_rgxy is not None - - # input files for this test are locked up in here as well - if not Path( t_rgxy.TEST_DIR+'/in-dir' ).exists(): - assert Path(t_rgxy.TAR_IN_DIR).exists() - ex = [ "tar", "-C", t_rgxy.TEST_DIR, "-zxvf", t_rgxy.TAR_IN_DIR ] - sp = subprocess.run( ex ) - assert all ( [ sp.returncode == 0, - Path(t_rgxy.IN_DIR).exists() ] ) - - # for the time being, still a little dependent on rose for configuration value passing - if Path(os.getcwd()+'/rose-app-run.conf').exists(): - Path(os.getcwd()+'/rose-app-run.conf').unlink() - - with open(os.getcwd()+'/rose-app-run.conf','a',encoding='utf-8') as rose_app_run_config: - rose_app_run_config.write( '[command]\n' ) - rose_app_run_config.write( 'default=regrid-xy\n' ) - rose_app_run_config.write( '\n' ) - rose_app_run_config.write( f'[{t_rgxy.COMPONENT}]\n' ) - rose_app_run_config.write( f'sources={t_rgxy.SOURCE}\n' ) - rose_app_run_config.write( f'inputGrid={t_rgxy.INPUT_GRID}\n' ) - rose_app_run_config.write( f'inputRealm={t_rgxy.INPUT_REALM}\n' ) - rose_app_run_config.write( f'interpMethod={t_rgxy.INTERP_METHOD}\n' ) - rose_app_run_config.write( f'outputGridLon={t_rgxy.NLON}\n' ) - rose_app_run_config.write( f'outputGridLat={t_rgxy.NLAT}\n' ) - rose_app_run_config.write( '\n' ) - assert Path('./rose-app-run.conf').exists() + """ fre app regrid_xy --help """ + import fre.app.regrid_xy.tests.test_regrid_xy as test_regrid_xy + test_regrid_xy.setup_test() + args_list = ["app", "regrid", - "--input_dir", f"{t_rgxy.WORK_YYYYMMDD_DIR}", - "--output_dir", f"{t_rgxy.TEST_OUT_DIR}", - "--begin", f"{t_rgxy.YYYYMMDD}T000000", - "--tmp_dir", f"{t_rgxy.TEST_DIR}", - "--remap_dir", f"{t_rgxy.REMAP_DIR}", - "--source", f"{t_rgxy.SOURCE}", - "--grid_spec", f"{t_rgxy.GOLD_GRID_SPEC_NO_TAR}", - "--def_xy_interp", f'"{t_rgxy.NLON},{t_rgxy.NLAT}"' ] - click.echo(f'args_list = \n {args_list}') - click.echo('fre ' + ' '.join(args_list)) + "--yamlfile", str(test_regrid_xy.yamlfile), + "--input_dir", str(test_regrid_xy.input_dir), + "--output_dir", str(test_regrid_xy.output_dir), + "--work_dir", str(test_regrid_xy.work_dir), + "--remap_dir", str(test_regrid_xy.remap_dir), + "--source", "pemberley", + "--input_date", test_regrid_xy.date+"T000000"] + click.echo(f"args_list = \n {args_list}") + click.echo("fre " + ' '.join(args_list)) result = runner.invoke(fre.fre, args=args_list ) assert result.exit_code == 0 _out, _err = capfd.readouterr() + test_regrid_xy.cleanup_test() + # fre app remap def test_cli_fre_app_remap(capfd): """ fre app remap """