diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 161612f1..70e4a233 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -24,7 +24,7 @@ jobs: run: tox -e ${{ matrix.tox-env }} pip: - name: Pip with Python${{ matrix.python-version }} + name: Pip with Python${{ matrix.python-version }} (GDAL=${{ matrix.uses-gdal }}) needs: black runs-on: ubuntu-latest strategy: @@ -32,24 +32,39 @@ jobs: include: - tox-env: py37 python-version: 3.7 + uses-gdal: true - tox-env: py38 python-version: 3.8 + uses-gdal: true + - tox-env: py38 + python-version: 3.8 + uses-gdal: false - tox-env: py39 python-version: 3.9 + uses-gdal: true steps: - uses: actions/checkout@v2 - name: Set up Python ${{ matrix.python-version }} uses: actions/setup-python@v2 with: python-version: ${{ matrix.python-version }} + - name: Install NetCDF and GEOS + run: | + sudo apt-get update + sudo apt-get install libnetcdf-dev libgeos-dev - name: Install GDAL + if: ${{ matrix.uses-gdal }} run: | sudo apt-get update sudo apt-get install libgdal-dev - name: Install tox run: pip install tox - - name: Test with tox - run: env GDAL_VERSION="$(gdal-config --version)" tox -e ${{ matrix.tox-env }} + - name: Test with tox (GDAL) + if: ${{ matrix.uses-gdal }} + run: env GDAL_VERSION="$(gdal-config --version)" tox -e ${{ matrix.tox-env }}-gdal-slow + - name: Test with tox (no GDAL) + if: ${{ !matrix.uses-gdal }} + run: tox -e ${{ matrix.tox-env }}-slow conda: name: Conda diff --git a/MANIFEST.in b/MANIFEST.in index 52433481..b7538c5d 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -5,7 +5,6 @@ include LICENSE include README.rst include requirements_dev.txt include requirements_docs.txt -include requirements_gis.txt recursive-include tests * recursive-exclude * __pycache__ diff --git a/environment.yml b/environment.yml index b2e0c2d0..0a357c35 100644 --- a/environment.yml +++ b/environment.yml @@ -6,14 +6,10 @@ dependencies: - raven-hydro ==3.0.4.322 - ostrich ==21.03.16 - python >=3.7 - - affine - cf_xarray - click - climpred >=2.1 - dask - - fiona - - gdal >=3.0.0 - - geopandas >=0.9 - haversine - holoviews - hvplot @@ -25,16 +21,15 @@ dependencies: - pip - pre-commit - pydantic + - pygml - pyproj>=3 - - rasterio - requests - - rioxarray - scikit-learn ==0.24.2 - scipy - setuptools >50.0 - shapely - statsmodels - xarray >=0.18 - - xclim >=0.26.1,<0.29.0 + - xclim >=0.28.1,<0.29.0 - xskillscore - wheel diff --git a/ravenpy/cli/collect_subbasins_upstream_of_gauge.py b/ravenpy/cli/collect_subbasins_upstream_of_gauge.py index 09b4c0e7..dd6a4adf 100644 --- a/ravenpy/cli/collect_subbasins_upstream_of_gauge.py +++ b/ravenpy/cli/collect_subbasins_upstream_of_gauge.py @@ -2,7 +2,10 @@ from pathlib import Path import click -import geopandas as gpd + +from ravenpy.utilities.vector import archive_sniffer, vector_to_dataframe + +# import geopandas as gpd @click.command() @@ -22,11 +25,16 @@ def collect_subbasins_upstream_of_gauge( GAUGE_ID: ID of the target gauge, to be found in the "Obs_NM" column (e.g. "02LE024"). """ - if Path(input_file).suffix == ".zip": - input_file = f"zip://{input_file}" - gdf = gpd.read_file(input_file) + input_file = Path(input_file) - if gauge_id not in gdf.Obs_NM.values: + if input_file.suffix == ".zip": + input_file = next(iter(archive_sniffer(input_file))) + if input_file.suffix.lower() == ".shp": + df = vector_to_dataframe(input_file.as_posix()) + else: + raise FileNotFoundError() + + if gauge_id not in df.Obs_NM.values: raise click.ClickException(f"Cannot find gauge {gauge_id} in `Obs_NM` column") if not output: @@ -37,11 +45,11 @@ def collect_subbasins_upstream_of_gauge( downsubid_to_subids = defaultdict(set) - for _, r in gdf.iterrows(): + for _, r in df.iterrows(): downsubid_to_subids[r.DowSubId].add(r.SubId) # Starting from the SubId of the gauge, iteratively expand the set of upstream subbasin SubIds - upstream_subids = {gdf[gdf.Obs_NM == gauge_id].iloc[0].SubId} + upstream_subids = {df[df.Obs_NM == gauge_id].iloc[0].SubId} prev = upstream_subids.copy() while True: curr = set() @@ -53,10 +61,12 @@ def collect_subbasins_upstream_of_gauge( else: break - gdf_upstream = gdf[gdf.SubId.isin(upstream_subids)] + df_upstream = df[df.SubId.isin(upstream_subids)] + + df_upstream.to_json(output_file) - gpd.GeoDataFrame(gdf_upstream).to_file(output_file) + # pd.GeoDataFrame(gdf_upstream).to_file(output_file) click.echo( - f"Found {len(gdf_upstream)} upstream subbasins, saved them in {output_file}" + f"Found {len(df_upstream)} upstream subbasins, saved them in {output_file}" ) diff --git a/ravenpy/cli/generate_grid_weights.py b/ravenpy/cli/generate_grid_weights.py index ba09bd10..b9c384c8 100644 --- a/ravenpy/cli/generate_grid_weights.py +++ b/ravenpy/cli/generate_grid_weights.py @@ -4,6 +4,8 @@ from ravenpy.extractors.routing_product import RoutingProductGridWeightExtractor +WGS84 = 4326 + @click.command() @click.argument("input-file", type=click.Path(exists=True)) @@ -74,6 +76,22 @@ type=click.Path(), help="Text field that will contain the results as a single :GridWeights Raven command containing the weights.", ) +@click.option( + "-i", + "--input-shape-crs", + type=int, + default=WGS84, + show_default=True, + help="Coordinate reference system (EPSG) of the shapefile INPUT_FILE.", +) +@click.option( + "-r", + "--routing-shape-crs", + type=int, + default=WGS84, + show_default=True, + help="Coordinate reference system (EPSG) of the shapefile ROUTING_FILE.", +) def generate_grid_weights( input_file, routing_file, @@ -85,6 +103,8 @@ def generate_grid_weights( sub_ids, area_error_threshold, output, + input_shape_crs, + routing_shape_crs, ): """ Generate grid weights in various formats. @@ -119,6 +139,8 @@ def generate_grid_weights( gauge_ids, sub_ids, area_error_threshold, + input_shape_crs, + routing_shape_crs, ) gw_cmd = extractor.extract() diff --git a/ravenpy/data/hydrobasins_domains/hybas_lake_ar_lev01_v1c.zip b/ravenpy/data/hydrobasins_domains/hybas_lake_ar_lev01_v1c.zip deleted file mode 100644 index 1a760963..00000000 Binary files a/ravenpy/data/hydrobasins_domains/hybas_lake_ar_lev01_v1c.zip and /dev/null differ diff --git a/ravenpy/data/hydrobasins_domains/hybas_lake_na_lev01_v1c.zip b/ravenpy/data/hydrobasins_domains/hybas_lake_na_lev01_v1c.zip deleted file mode 100644 index bac4b576..00000000 Binary files a/ravenpy/data/hydrobasins_domains/hybas_lake_na_lev01_v1c.zip and /dev/null differ diff --git a/ravenpy/extractors/routing_product.py b/ravenpy/extractors/routing_product.py index c0d7da65..922430a4 100644 --- a/ravenpy/extractors/routing_product.py +++ b/ravenpy/extractors/routing_product.py @@ -3,18 +3,10 @@ from pathlib import Path from typing import List -from ravenpy.utilities import gis_import_error_message - -try: - import geopandas - from osgeo import __version__ as osgeo_version # noqa - from osgeo import ogr, osr # noqa -except (ImportError, ModuleNotFoundError) as e: - msg = gis_import_error_message.format(Path(__file__).stem) - raise ImportError(msg) from e - import netCDF4 as nc4 import numpy as np +import shapely.geometry as sgeo +from shapely.strtree import STRtree from ravenpy.config.commands import ( ChannelProfileCommand, @@ -23,6 +15,22 @@ ReservoirCommand, SubBasinsCommand, ) +from ravenpy.utilities.vector import ( + archive_sniffer, + geom_transform, + vector_to_dataframe, +) + +try: + import geopandas + from osgeo import __version__ as osgeo_version # noqa + from osgeo import ogr, osr +except (ImportError, ModuleNotFoundError): + warnings.warn( + "GIS libraries not found. Continuing without. Expect slower CRS transformation speeds." + ) + geopandas = None + osgeo_version = None class RoutingProductShapefileExtractor: @@ -49,11 +57,17 @@ def __init__( routing_product_version=ROUTING_PRODUCT_VERSION, ): if isinstance(shapefile_path, (Path, str)): - if Path(shapefile_path).suffix == ".zip": - shapefile_path = f"zip://{shapefile_path}" - self._df = geopandas.read_file(shapefile_path) - elif isinstance(shapefile_path, geopandas.GeoDataFrame): - self._df = shapefile_path + shapefile = Path(next(iter(archive_sniffer(shapefile_path)))) + if shapefile.suffix.lower() == ".shp": + self._df = vector_to_dataframe(shapefile.as_posix()) + else: + raise ValueError("Shapefile not found.") + + # if Path(shapefile_path).suffix == ".zip": + # shapefile_path = f"zip://{shapefile_path}" + # self._df = geopandas.read_file(shapefile_path) + # elif isinstance(shapefile_path, geopandas.GeoDataFrame): + # self._df = shapefile_path self.hru_aspect_convention = hru_aspect_convention self.routing_product_version = routing_product_version @@ -95,14 +109,15 @@ def extract(self, model=None): hru_recs = [] # Collect all subbasin_ids for fast lookup in next loop - subbasin_ids = {int(row["SubId"]) for _, row in self._df.iterrows()} + subbasin_ids = set(self._df["SubId"].unique()) - for _, row in self._df.iterrows(): + for _, feat_properties in self._df.iterrows(): + # feat_properties = feat.properties # HRU - hru_recs.append(self._extract_hru(row)) + hru_recs.append(self._extract_hru(feat_properties)) - subbasin_id = int(row["SubId"]) + subbasin_id = int(feat_properties["SubId"]) is_lake = False @@ -110,21 +125,21 @@ def extract(self, model=None): "IsLake" if self.routing_product_version == "1.0" else "Lake_Cat" ) - if row[lake_field] > 0 and row["HRU_IsLake"] > 0: + if feat_properties[lake_field] > 0 and feat_properties["HRU_IsLake"] > 0: lake_sb_ids.append(subbasin_id) - reservoir_cmds.append(self._extract_reservoir(row)) + reservoir_cmds.append(self._extract_reservoir(feat_properties)) is_lake = True - elif row[lake_field] > 0: + elif feat_properties[lake_field] > 0: continue else: land_sb_ids.append(subbasin_id) # Subbasin - sb = self._extract_subbasin(row, is_lake, subbasin_ids) + sb = self._extract_subbasin(feat_properties, is_lake, subbasin_ids) subbasin_recs.append(sb) # ChannelProfile - channel_profile_cmds.append(self._extract_channel_profile(row)) + channel_profile_cmds.append(self._extract_channel_profile(feat_properties)) return dict( subbasins=subbasin_recs, @@ -319,6 +334,8 @@ def __init__( gauge_ids=None, sub_ids=None, area_error_threshold=AREA_ERROR_THRESHOLD, + input_shape_crs=CRS_LLDEG, + routing_shape_crs=CRS_LLDEG, ): self._dim_names = tuple(dim_names) self._var_names = tuple(var_names) @@ -327,33 +344,70 @@ def __init__( self._gauge_ids = gauge_ids or [] self._sub_ids = sub_ids or [] self._area_error_threshold = area_error_threshold + self._input_data_crs = input_shape_crs + self._routing_data_crs = routing_shape_crs assert not ( self._gauge_ids and self._sub_ids ), "Only one of gauge_ids or sub_ids can be specified" - self._input_is_netcdf = True - input_file_path = Path(input_file_path) - if input_file_path.suffix == ".nc": + if input_file_path.suffix.lower() in {".nc", ".nc4"}: # Note that we cannot use xarray because it complains about variables and dimensions # having the same name. self._input_data = nc4.Dataset(input_file_path) - elif input_file_path.suffix == ".zip": - self._input_data = geopandas.read_file(f"zip://{input_file_path}") - self._input_is_netcdf = False - elif input_file_path.suffix == ".shp": - self._input_data = geopandas.read_file(input_file_path) - self._input_is_netcdf = False + self._input_is_netcdf = True + elif input_file_path.suffix.lower() in {".zip", ".shp"}: + # self._input_data = geopandas.read_file(f"zip://{input_file_path}") + if input_file_path.suffix.lower() == ".zip": + input_file_path = Path(next(iter(archive_sniffer(input_file_path)))) + if input_file_path.suffix.lower() == ".shp": + # self._input_data = geopandas.read_file(input_file_path) + # self._input_data = geojson.loads( + # json.dumps(Reader(input_file_path.as_posix()).__geo_interface__) + # ) + self._input_data = vector_to_dataframe(input_file_path.as_posix()) + self._input_is_netcdf = False + + if self._input_data_crs != self.CRS_LLDEG: + if geopandas: + self._input_data.to_crs(self.CRS_LLDEG, inplace=True) + else: + self._input_data.geometry = self._input_data.geometry.apply( + geom_transform, + source_crs=self._input_data_crs, + target_crs=self.CRS_LLDEG, + ) + self._input_data_crs = self.CRS_LLDEG + else: raise ValueError( - "The input file must be a shapefile (.shp or .zip) or NetCDF" + "The input file must be a shapefile (.shp or .zip) or a NetCDF." ) routing_file_path = Path(routing_file_path) - if routing_file_path.suffix == ".zip": - routing_file_path = f"zip://{routing_file_path}" - self._routing_data = geopandas.read_file(routing_file_path) + # if routing_file_path.suffix == ".zip": + # routing_file_path = f"zip://{routing_file_path}" + # self._routing_data = geopandas.read_file(routing_file_path) + + if routing_file_path.suffix.lower() == ".zip": + # self._input_data = geopandas.read_file(f"zip://{input_file_path}") + routing_file_path = Path(next(iter(archive_sniffer(routing_file_path)))) + if routing_file_path.suffix.lower() == ".shp": + self._routing_data = vector_to_dataframe(routing_file_path.as_posix()) + + if self._routing_data_crs != self.CRS_LLDEG: + if geopandas: + self._routing_data.to_crs(epsg=self.CRS_LLDEG, inplace=True) + else: + self._routing_data.geometry = self._routing_data.geometry.apply( + geom_transform, + source_crs=self._routing_data_crs, + target_crs=self.CRS_LLDEG, + ) + self._routing_data_crs = self.CRS_LLDEG + else: + raise ValueError("The routing file must be a shapefile (.shp or .zip).") def extract(self) -> GridWeightsCommand: self._prepare_input_data() @@ -361,9 +415,25 @@ def extract(self) -> GridWeightsCommand: # Read routing data # WGS 84 / North Pole LAEA Canada - self._routing_data = self._routing_data.to_crs( - epsg=RoutingProductGridWeightExtractor.CRS_CAEA - ) + # self._routing_data = self._routing_data.to_crs( + # epsg=RoutingProductGridWeightExtractor.CRS_CAEA + # ) + # self._routing_data = geojson_object_transform( + # self._routing_data, + # self._routing_data_crs, + # RoutingProductGridWeightExtractor.CRS_CAEA, + # ) + + if geopandas: + self._routing_data.to_crs( + epsg=RoutingProductGridWeightExtractor.CRS_CAEA, inplace=True + ) + else: + self._routing_data.geometry = self._routing_data.geometry.apply( + geom_transform, + source_crs=self._routing_data_crs, + target_crs=RoutingProductGridWeightExtractor.CRS_CAEA, + ) def keep_only_valid_downsubid_and_obs_nm(g): """ @@ -393,7 +463,7 @@ def keep_only_valid_downsubid_and_obs_nm(g): return row - # Remove duplicate HRU_IDs while making sure that we keed relevant DowSubId and Obs_NM values + # Remove duplicate HRU_IDs while making sure that we keep relevant DowSubId and Obs_NM values self._routing_data = self._routing_data.groupby(self._routing_id_field).apply( keep_only_valid_downsubid_and_obs_nm ) @@ -419,6 +489,7 @@ def keep_only_valid_downsubid_and_obs_nm(g): # starting from the list supplied by the user (either directly, or via their gauge IDs).. We first # build a map of downSubID -> subID for efficient lookup downsubid_to_subids = defaultdict(set) + for _, r in self._routing_data.iterrows(): downsubid_to_subids[r.DowSubId].add(r.SubId) @@ -452,50 +523,70 @@ def keep_only_valid_downsubid_and_obs_nm(g): # Derive overlay and calculate weights # ------------------------------- - grid_weights = [] + grid_weights = list() for _, row in self._routing_data.iterrows(): - poly = ogr.CreateGeometryFromWkt(row.geometry.to_wkt()) - - area_basin = poly.Area() - # bounding box around basin (for easy check of proximity) - enve_basin = poly.GetEnvelope() + if osgeo_version: + poly = ogr.CreateGeometryFromWkt(row.geometry.to_wkt()) + area_basin = poly.Area() + else: + poly = row.geometry.buffer(0.0) + area_basin = poly.area area_all = 0.0 - # ncells = 0 + row_grid_weights = list() - row_grid_weights = [] + if osgeo_version: + enve_basin = poly.GetEnvelope() + for ilat in range(self._nlat): + for ilon in range(self._nlon): - for ilat in range(self._nlat): - for ilon in range(self._nlon): + # bounding box around grid-cell (for easy check of proximity) + enve_gridcell = grid_cell_geom_gpd_wkt[ilat][ilon].GetEnvelope() - # bounding box around grid-cell (for easy check of proximity) - enve_gridcell = grid_cell_geom_gpd_wkt[ilat][ilon].GetEnvelope() + grid_is_close = self._check_proximity_of_envelops( + enve_gridcell, enve_basin + ) - grid_is_close = self._check_proximity_of_envelops( - enve_gridcell, enve_basin - ) + # this check decreases runtime DRASTICALLY (from ~6h to ~1min) + if not grid_is_close: + continue - # this check decreases runtime DRASTICALLY (from ~6h to ~1min) - if not grid_is_close: - continue + # "fake" buffer to avoid invalid polygons and weirdos dumped by ArcGIS + inter = grid_cell_geom_gpd_wkt[ilat][ilon].Intersection( + poly.Buffer(0.0) + ) - # grid_cell_area = grid_cell_geom_gpd_wkt[ilat][ilon].Area() + area_intersect = inter.Area() - # "fake" buffer to avoid invalid polygons and weirdos dumped by ArcGIS - inter = grid_cell_geom_gpd_wkt[ilat][ilon].Intersection( - poly.Buffer(0.0) - ) + if area_intersect > 0: + area_all += area_intersect + hru_id = int(row[self._routing_id_field]) + cell_id = ilat * self._nlon + ilon + weight = area_intersect / area_basin + row_grid_weights.append((hru_id, cell_id, weight)) + else: + + # leverage shapely STRtree for optimized performance (requires GEOS) + enve_gridcells = STRtree(sum(grid_cell_geom_gpd_wkt, [])) - area_intersect = inter.Area() + # find all overlapping geometries + intersecting_cells = enve_gridcells.query(poly) - area_all += area_intersect + for enve_gridcell in intersecting_cells: + area_intersect = enve_gridcell.intersection(poly).area + # meed to traverse the nested loops to find the ilon and ilat if area_intersect > 0: + area_all += area_intersect hru_id = int(row[self._routing_id_field]) - cell_id = ilat * self._nlon + ilon weight = area_intersect / area_basin + cell_id = list() + for ilat in range(self._nlat): + for ilon in range(self._nlon): + if grid_cell_geom_gpd_wkt[ilat][ilon] == enve_gridcell: + cell_id = ilat * self._nlon + ilon row_grid_weights.append((hru_id, cell_id, weight)) # mismatch between area of subbasin (routing product) and sum of all contributions of grid cells (model output) @@ -574,19 +665,38 @@ def _prepare_input_data(self): # input data is a shapefile - self._input_data = self._input_data.to_crs( - epsg=RoutingProductGridWeightExtractor.CRS_CAEA - ) + if geopandas: + self._input_data.to_crs( + epsg=RoutingProductGridWeightExtractor.CRS_CAEA, inplace=True + ) + else: + self._input_data.geometry = self._input_data.geometry.apply( + geom_transform, + source_crs=self._input_data_crs, + target_crs=RoutingProductGridWeightExtractor.CRS_CAEA, + ) self._nlon = 1 # only for consistency + # FIXME: This is not counting shapes, this is counting the number of features with geometry fields. # number of shapes in model "discretization" shapefile (not routing toolbox shapefile) - self._nlat = self._input_data.geometry.count() # only for consistency + # self._nlat = len(self._input_data.geometry.count() # only for consistency - def _compute_grid_cell_polygons(self): + # FIXME: This counts the number of shapes but is not needed. + # self._nlat = 0 + # for i in self._input_data.features: + # geom_type = sgeo.shape(i.geometry).geometryType() + # if geom_type == "MultiPolygon": + # self._nlat += len(list(sgeo.shape(i.geometry))) + # else: + # self._nlat += 1 - grid_cell_geom_gpd_wkt: List[List[List[ogr.Geometry]]] = [ - [[] for ilon in range(self._nlon)] for ilat in range(self._nlat) + self._nlat = len(self._input_data) + + def _compute_grid_cell_polygons(self, source_crs=CRS_LLDEG): + + grid_cell_geom_gpd_wkt: List[List[List[sgeo.shape]]] = [ + [[] for _ in range(self._nlon)] for _ in range(self._nlat) ] if self._input_is_netcdf: @@ -620,7 +730,7 @@ def _compute_grid_cell_polygons(self): # Graham Python 3.6.3 GDAL 2.2.1 --> lon/lat (Julie) # Ubuntu 18.04.2 LTS Python 3.6.8 GDAL 2.2.3 --> lon/lat (Etienne) # - if osgeo_version < "3.0": + if not osgeo_version or (osgeo_version < "3.0"): gridcell_edges = [ [ lonh[ilat, ilon], @@ -641,37 +751,34 @@ def _compute_grid_cell_polygons(self): [lath[ilat, ilon + 1], lonh[ilat, ilon + 1]], ] - tmp = self._shape_to_geometry( - gridcell_edges, epsg=RoutingProductGridWeightExtractor.CRS_CAEA + tmp = self._gridcells_to_projected_geometry( + gridcell_edges, + source_crs=source_crs, + target_crs=RoutingProductGridWeightExtractor.CRS_CAEA, ) grid_cell_geom_gpd_wkt[ilat][ilon] = tmp else: for ishape in range(self._nlat): - idx = np.where(self._input_data[self._netcdf_input_field] == ishape)[0] if len(idx) == 0: - # print( - # "Polygon ID = {} not found in '{}'. Numbering of shapefile attribute '{}' needs to be [0 ... {}-1].".format( - # ishape, input_file, key_colname_model, nshapes - # ) - # ) raise ValueError("Polygon ID not found.") if len(idx) > 1: - # print( - # "Polygon ID = {} found multiple times in '{}' but needs to be unique. Numbering of shapefile attribute '{}' needs to be [0 ... {}-1].".format( - # ishape, input_file, key_colname_model, nshapes - # ) - # ) raise ValueError("Polygon ID not unique.") - idx = idx[0] - poly = self._input_data.loc[idx].geometry - grid_cell_geom_gpd_wkt[ishape][0] = ogr.CreateGeometryFromWkt( - poly.to_wkt() - ).Buffer( - 0.0 - ) # We add an empty buffer here to fix problems with bad polygon topology (actually caused by ESRI's historical incompetence) + + if osgeo_version: + idx = idx[0] + poly = self._input_data.loc[idx].geometry + grid_cell_geom_gpd_wkt[ishape][0] = ogr.CreateGeometryFromWkt( + poly.to_wkt() + ).Buffer( + 0.0 + ) # We add an empty buffer here to fix problems with bad polygon topology (actually caused by ESRI's historical incompetence) + else: + grid_cell_geom_gpd_wkt[ishape][0] = self._input_data.geometry[ + idx[0] + ] return grid_cell_geom_gpd_wkt @@ -719,35 +826,50 @@ def _create_gridcells_from_centers(self, lat, lon): return [lath, lonh] - def _shape_to_geometry(self, shape_from_jsonfile, epsg=None): + @staticmethod + def _gridcells_to_projected_geometry( + shape_from_jsonfile, source_crs: int = CRS_LLDEG, target_crs: int = None + ): # converts shape read from shapefile to geometry - # epsg :: integer EPSG code - ring_shape = ogr.Geometry(ogr.wkbLinearRing) + if osgeo_version: + ring_shape = ogr.Geometry(ogr.wkbLinearRing) - for ii in shape_from_jsonfile: - ring_shape.AddPoint_2D(ii[0], ii[1]) - # close ring - ring_shape.AddPoint_2D(shape_from_jsonfile[0][0], shape_from_jsonfile[0][1]) + for ii in shape_from_jsonfile: + ring_shape.AddPoint_2D(ii[0], ii[1]) + # close ring + ring_shape.AddPoint_2D(shape_from_jsonfile[0][0], shape_from_jsonfile[0][1]) - poly_shape = ogr.Geometry(ogr.wkbPolygon) - poly_shape.AddGeometry(ring_shape) + poly_shape = ogr.Geometry(ogr.wkbPolygon) + poly_shape.AddGeometry(ring_shape) - if epsg: - source = osr.SpatialReference() - # usual lat/lon projection - source.ImportFromEPSG(RoutingProductGridWeightExtractor.CRS_LLDEG) + else: + poly_shape = sgeo.Polygon(shape_from_jsonfile) + + if target_crs: + if osgeo_version: + source = osr.SpatialReference() + # usual lat/lon projection + source.ImportFromEPSG(source_crs) - target = osr.SpatialReference() - target.ImportFromEPSG(epsg) # any projection to convert to + target = osr.SpatialReference() + target.ImportFromEPSG(target_crs) # any projection to convert to - transform = osr.CoordinateTransformation(source, target) - poly_shape.Transform(transform) + transform = osr.CoordinateTransformation(source, target) + poly_shape.Transform(transform) + else: + poly_shape = geom_transform( + poly_shape, + source_crs=source_crs, + target_crs=target_crs, + always_xy=True, # This kwarg is VERY important. No touching! + ) return poly_shape - def _check_proximity_of_envelops(self, gridcell_envelop, shape_envelop): + @staticmethod + def _check_proximity_of_envelops(gridcell_envelop, shape_envelop): # checks if two envelops are in proximity (intersect) @@ -755,7 +877,6 @@ def _check_proximity_of_envelops(self, gridcell_envelop, shape_envelop): # maxX --> env[1] # minY --> env[2] # maxY --> env[3] - return ( gridcell_envelop[0] <= shape_envelop[1] and gridcell_envelop[1] >= shape_envelop[0] @@ -763,9 +884,8 @@ def _check_proximity_of_envelops(self, gridcell_envelop, shape_envelop): and gridcell_envelop[3] >= shape_envelop[2] ) - def _check_gridcell_in_proximity_of_shape( - self, gridcell_edges, shape_from_jsonfile - ): + @staticmethod + def _check_gridcell_in_proximity_of_shape(gridcell_edges, shape_from_jsonfile): # checks if a grid cell falls into the bounding box of the shape # does not mean it intersects but it is a quick and cheap way to diff --git a/ravenpy/utilities/__init__.py b/ravenpy/utilities/__init__.py index ea30306a..0748b256 100644 --- a/ravenpy/utilities/__init__.py +++ b/ravenpy/utilities/__init__.py @@ -1,12 +1,5 @@ from .regionalization import read_gauged_params, read_gauged_properties, regionalize -# TODO: Merge these into one message. -gis_import_error_message = ( - "`{}` requires installation of the RavenPy GIS libraries. These can be installed using the" - " `pip install ravenpy[gis]` recipe or via Anaconda (`conda env update -n ravenpy-env -f environment.yml`)" - " from the RavenPy repository source files." -) - dev_import_error_message = ( "`{}` requires installation of the RavenPy development libraries. These can be installed using the" " `pip install ravenpy[dev]` recipe or via Anaconda (`conda env update -n ravenpy-env -f environment.yml`)" diff --git a/ravenpy/utilities/analysis.py b/ravenpy/utilities/analysis.py deleted file mode 100644 index d05cfa16..00000000 --- a/ravenpy/utilities/analysis.py +++ /dev/null @@ -1,260 +0,0 @@ -import logging -import math -import tempfile -from pathlib import Path -from typing import List, Optional, Union - -import numpy as np - -from . import gis_import_error_message - -try: - import rasterio - from osgeo.gdal import Dataset, DEMProcessing - from shapely.geometry import GeometryCollection, MultiPolygon, Polygon, shape -except (ImportError, ModuleNotFoundError) as e: - msg = gis_import_error_message.format(Path(__file__).stem) - raise ImportError(msg) from e - -from ravenpy.utilities.geo import generic_raster_clip - -# See: https://kokoalberti.com/articles/geotiff-compression-optimization-guide/ -# or 'compress=deflate' or 'compress=zstd' or 'compress=lerc' or others -GDAL_TIFF_COMPRESSION_OPTION = "compress=lzw" - -LOGGER = logging.getLogger("RavenPy") - - -def geom_prop(geom: Union[Polygon, MultiPolygon, GeometryCollection]) -> dict: - """Return a dictionary of geometry properties. - - Parameters - ---------- - geom : Union[Polygon, MultiPolygon, GeometryCollection] - Geometry to analyze. - - Returns - ------- - dict - Dictionary storing polygon area, centroid location, perimeter and gravelius shape index. - - Notes - ----- - Some of the properties should be computed using an equal-area projection. - """ - - geom = shape(geom) - lon, lat = geom.centroid.x, geom.centroid.y - if (lon > 180) or (lon < -180) or (lat > 90) or (lat < -90): - LOGGER.warning("Shape centroid is not in decimal degrees.") - area = geom.area - length = geom.length - gravelius = length / 2 / math.sqrt(math.pi * area) - parameters = { - "area": area, - "centroid": (lon, lat), - "perimeter": length, - "gravelius": gravelius, - } - return parameters - - -def dem_prop( - dem: Union[str, Path], - geom: Union[Polygon, MultiPolygon, List[Union[Polygon, MultiPolygon]]] = None, - directory: Union[str, Path] = None, -) -> dict: - """Return raster properties for each geometry. - - This - - Parameters - ---------- - dem : Union[str, Path] - DEM raster in reprojected coordinates. - geom : Union[Polygon, MultiPolygon, List[Union[Polygon, MultiPolygon]]] - Geometry over which aggregate properties will be computed. If None compute properties over entire raster. - directory : Union[str, Path] - Folder to save the GDAL terrain analysis outputs. - - Returns - ------- - dict - Dictionary storing mean elevation [m], slope [deg] and aspect [deg]. - """ - - fns = dict() - fns["dem"] = ( - tempfile.NamedTemporaryFile( - prefix="dem", suffix=".tiff", dir=directory, delete=False - ).name - if geom is not None - else dem - ) - for key in ["slope", "aspect"]: - fns[key] = tempfile.NamedTemporaryFile( - prefix=key, suffix=".tiff", dir=directory, delete=False - ).name - - # Clip to relevant area or read original raster - if geom is None: - with rasterio.open(dem) as f: - elevation = f.read(1, masked=True) - else: - generic_raster_clip(raster=dem, output=fns["dem"], geometry=geom) - with rasterio.open(fns["dem"]) as f: - elevation = f.read(1, masked=True) - - # Compute slope - slope = gdal_slope_analysis(fns["dem"], set_output=fns["slope"]) - - # Compute aspect - aspect = gdal_aspect_analysis(fns["dem"], set_output=fns["aspect"]) - aspect_mean = circular_mean_aspect(aspect) - - return {"elevation": elevation.mean(), "slope": slope.mean(), "aspect": aspect_mean} - - -def gdal_slope_analysis( - dem: Union[str, Path], - set_output: Optional[Union[str, Path]] = None, - units: str = "degree", -) -> np.ndarray: - """Return the slope of the terrain from the DEM. - - The slope is the magnitude of the gradient of the elevation. - - Parameters - ---------- - dem : Union[str, Path] - Path to file storing DEM. - set_output : Union[str, Path] - If set to a valid filepath, will write to this path, otherwise will use an in-memory gdal.Dataset. - units : str - Slope units. Default: 'degree'. - - Returns - ------- - np.ndarray - Slope array. - - Notes - ----- - Ensure that the DEM is in a *projected coordinate*, not a geographic coordinate system, so that the - horizontal scale is the same as the vertical scale (m). - - """ - if isinstance(dem, Path): - dem = str(dem) - if set_output: - if isinstance(set_output, (str, Path)): - set_output = str(set_output) - DEMProcessing( - set_output, - dem, - "slope", - slopeFormat=units, - format="GTiff", - band=1, - creationOptions=[GDAL_TIFF_COMPRESSION_OPTION], - ) - with rasterio.open(set_output) as src: - return np.ma.masked_values(src.read(1), value=-9999) - else: - raise ValueError() - else: - set_output = DEMProcessing( - "", - dem, - "slope", - slopeFormat=units, - format="MEM", - band=1, - ) - return np.ma.masked_values(set_output.ReadAsArray(), value=-9999) - - -def gdal_aspect_analysis( - dem: Union[str, Path], - set_output: Union[str, Path, bool] = False, - flat_values_are_zero: bool = False, -) -> Union[np.ndarray, Dataset]: - """Return the aspect of the terrain from the DEM. - - The aspect is the compass direction of the steepest slope (0: North, 90: East, 180: South, 270: West). - - Parameters - ---------- - dem : Union[str, Path] - Path to file storing DEM. - set_output : Union[str, Path, bool] - If set to a valid filepath, will write to this path, otherwise will use an in-memory gdal.Dataset. - flat_values_are_zero: bool - Designate flat values with value zero. Default: -9999. - - Returns - ------- - np.ndarray - Aspect array. - - Notes - ----- - Ensure that the DEM is in a *projected coordinate*, not a geographic coordinate system, so that the - horizontal scale is the same as the vertical scale (m). - """ - if isinstance(dem, Path): - dem = str(dem) - if set_output: - if isinstance(set_output, (str, Path)): - set_output = str(set_output) - DEMProcessing( - destName=set_output, - srcDS=dem, - processing="aspect", - zeroForFlat=flat_values_are_zero, - format="GTiff", - band=1, - creationOptions=[GDAL_TIFF_COMPRESSION_OPTION], - ) - with rasterio.open(set_output) as src: - return np.ma.masked_values(src.read(1), value=-9999) - else: - raise ValueError() - - else: - set_output = DEMProcessing( - destName="", - srcDS=dem, - processing="aspect", - zeroForFlat=flat_values_are_zero, - format="MEM", - band=1, - ) - return np.ma.masked_values(set_output.ReadAsArray(), value=-9999) - - -def circular_mean_aspect(angles: np.ndarray) -> np.ndarray: - """Return the mean angular aspect based on circular arithmetic approach - - Parameters - ---------- - angles: np.ndarray - Array of aspect angles - - Returns - ------- - np.ndarray - Circular mean of aspect array. - """ - # Circular statistics needed for mean angular aspect - # Example from: https://gis.stackexchange.com/a/147135/65343 - - n = len(angles) - sine_mean = np.divide(np.sum(np.sin(np.radians(np.ma.masked_array(angles)))), n) - cosine_mean = np.divide(np.sum(np.cos(np.radians(np.ma.masked_array(angles)))), n) - vector_mean = np.arctan2(sine_mean, cosine_mean) - degrees = np.degrees(vector_mean) - - if degrees < 0: - return degrees + 360 - return degrees diff --git a/ravenpy/utilities/checks.py b/ravenpy/utilities/checks.py deleted file mode 100644 index 5235dbe3..00000000 --- a/ravenpy/utilities/checks.py +++ /dev/null @@ -1,175 +0,0 @@ -""" -Checks for various geospatial and IO conditions. -""" - -import collections -import logging -import warnings -from pathlib import Path -from typing import Any, List, Sequence, Tuple, Union - -from . import gis_import_error_message - -try: - import fiona - import rasterio - from pyproj import CRS - from pyproj.exceptions import CRSError - from shapely.geometry import GeometryCollection, MultiPolygon, Point, shape -except (ImportError, ModuleNotFoundError) as e: - msg = gis_import_error_message.format(Path(__file__).stem) - raise ImportError(msg) from e - -import ravenpy.utilities.io as io - -LOGGER = logging.getLogger("RavenPy") - - -def single_file_check(file_list: List[Union[str, Path]]) -> Any: - """Return the first element of a file list. Raise an error if the list is empty or contains more than one element. - - Parameters - ---------- - file_list : List[Union[str, Path]] - """ - if isinstance(file_list, (str, Path)): - return file_list - - try: - if len(file_list) > 1: - msg = "Multi-file handling for file is not supported. Exiting." - raise NotImplementedError(msg) - elif len(file_list) == 0: - msg = "No files found. Exiting." - raise FileNotFoundError(msg) - return file_list[0] - except (FileNotFoundError, NotImplementedError) as e: - LOGGER.error(e) - raise - - -def boundary_check( - *args: Sequence[Union[str, Path]], - max_y: Union[int, float] = 60, - min_y: Union[int, float] = -60, -) -> None: - """Verify that boundaries do not exceed specific latitudes for geographic coordinate data. Raise a warning if so. - - Parameters - ---------- - *args : Sequence[Union[str, Path]] - listing of strings or paths to files - max_y : Union[int, float] - Maximum value allowed for latitude. Default: 60. - min_y : Union[int, float] - Minimum value allowed for latitude. Default: -60. - """ - vectors = (".gml", ".shp", ".geojson", ".gpkg", ".json") - rasters = (".tif", ".tiff") - - if len(args) == 1 and not isinstance(args[0], str): - args = args[0] - - for file in args: - try: - if str(file).lower().endswith(vectors): - src = fiona.open(file, "r") - elif str(file).lower().endswith(rasters): - src = rasterio.open(file, "r") - else: - raise FileNotFoundError() - - try: - geographic = CRS(src.crs).is_geographic - except CRSError: - geographic = True - src_min_y, src_max_y = src.bounds[1], src.bounds[3] - if geographic and (src_max_y > max_y or src_min_y < min_y): - msg = ( - f"Vector {file} contains geometries in high latitudes." - " Verify choice of projected CRS is appropriate for analysis." - ) - LOGGER.warning(msg) - warnings.warn(msg, UserWarning) - if not geographic: - msg = f"Vector {file} is not in a geographic coordinate system." - LOGGER.warning(msg) - warnings.warn(msg, UserWarning) - src.close() - - except FileNotFoundError: - msg = f"Unable to read boundaries from {file}" - LOGGER.error(msg) - raise - return - - -def multipolygon_check(geom: GeometryCollection) -> None: - """Perform a check to verify a geometry is a MultiPolygon - - Parameters - ---------- - geom : GeometryCollection - - Returns - ------- - None - """ - if not isinstance(geom, GeometryCollection): - try: - geom = shape(geom) - except AttributeError: - LOGGER.error("Unable to load argument as shapely.geometry.shape().") - raise - - if isinstance(geom, MultiPolygon): - LOGGER.warning("Shape is a Multipolygon.") - return - - -def feature_contains( - point: Union[Tuple[Union[int, float, str], Union[str, float, int]], Point], - shp: Union[str, Path, List[Union[str, Path]]], -) -> Union[dict, bool]: - """Return the first feature containing a location. - - Parameters - ---------- - point : Union[Tuple[Union[int, float, str], Union[str, float, int]], Point] - Geographic coordinates of a point (lon, lat) or a shapely Point. - shp : Union[str, Path, List[str, Path]] - Path to the file storing the geometries. - - Returns - ------- - Union[dict, bool] - The feature found. - - Notes - ----- - This is really slow. Another approach is to use the `fiona.Collection.filter` method. - """ - - if isinstance(point, collections.abc.Sequence) and not isinstance(point, str): - for coord in point: - if isinstance(coord, (int, float)): - pass - point = Point(point) - elif isinstance(point, Point): - pass - else: - raise ValueError( - f"point should be shapely.Point or tuple of coordinates, got : {point} of type({type(point)})" - ) - - shape_crs = io.crs_sniffer(single_file_check(shp)) - - if isinstance(shp, list): - shp = shp[0] - - for i, layer_name in enumerate(fiona.listlayers(str(shp))): - with fiona.open(shp, "r", crs=shape_crs, layer=i) as vector: - for f in vector.filter(bbox=(point.x, point.y, point.x, point.y)): - return f - - return False diff --git a/ravenpy/utilities/forecasting.py b/ravenpy/utilities/forecasting.py index 93e0684b..aa900d3a 100644 --- a/ravenpy/utilities/forecasting.py +++ b/ravenpy/utilities/forecasting.py @@ -8,25 +8,15 @@ import datetime as dt import logging -import re import warnings -from pathlib import Path -from typing import List, Tuple +from typing import Dict, List, Tuple -# import climpred +import climpred import numpy as np import pandas as pd import xarray as xr from climpred import HindcastEnsemble -from . import gis_import_error_message - -try: - import rioxarray -except (ImportError, ModuleNotFoundError) as e: - msg = gis_import_error_message.format(Path(__file__).stem) - raise ImportError(msg) from e - from ravenpy.models.emulators import get_model LOGGER = logging.getLogger("PYWPS") @@ -35,25 +25,25 @@ # TODO: Complete docstrings # This function gets model states after running the model (i.e. states at the end of the run). -def get_raven_states(model, workdir=None, **kwds): +def get_raven_states(model: str, workdir=None, **kwargs) -> Dict: """Get the RAVEN states file (.rvc file) after a model run. Parameters ---------- model : {'HMETS', 'GR4JCN', 'MOHYSE', 'HBVEC'} Model name. - kwds : {} + kwargs : dict Model configuration parameters, including the forcing files (ts). Returns ------- - rvc : {} + dict Raven model forcing file """ - # Run the model and get the rvc file for future hotstart. + # Run the model and get the rvc file for future hot start. m = get_model(model)(workdir=workdir) - m(overwrite=True, **kwds) + m(overwrite=True, **kwargs) rvc = m.outputs["solution"] return rvc @@ -78,8 +68,12 @@ def perform_forecasting_step(rvc, model, workdir=None, **kwds): def perform_climatology_esp( - model_name, forecast_date, forecast_duration, workdir=None, **kwds -): + model_name: str, + forecast_date: dt.datetime, + forecast_duration: int, + workdir=None, + **kwargs, +) -> xr.DataArray: """ This function takes the model setup and name as well as forecast data and duration and returns an ESP forecast netcdf. The data comes from the climatology data and thus there is a mechanism @@ -93,17 +87,17 @@ def perform_climatology_esp( Date of the forecast issue. forecast_duration : int Number of days of forecast, forward looking. - kwds : dict + kwargs : dict Raven model configuration parameters. Returns ------- - qsims + xarray.DataArray Array of streamflow values from the ESP method along with list of member years """ # Get the timeseries - tsnc = xr.open_dataset(kwds["ts"]) + tsnc = xr.open_dataset(kwargs["ts"]) # Prepare model instance m = get_model(model_name)(workdir=workdir) @@ -113,7 +107,7 @@ def perform_climatology_esp( start_date = pd.to_datetime(tsnc["time"][0].values) start_date = start_date.to_pydatetime() - kwds["start_date"] = start_date + kwargs["start_date"] = start_date # Forecasting from Feb 29th is not ideal, we will replace with Feb 28th. # Should not change much in a climatological forecast. @@ -144,14 +138,14 @@ def perform_climatology_esp( # Update the forecast end-date, which will be the day prior to the forecast date. # So forecasts warm-up will be from day 1 in the dataset to the forecast date. - kwds["end_date"] = forecast_date - dt.timedelta(days=1) + kwargs["end_date"] = forecast_date - dt.timedelta(days=1) # Get RVC file if it exists, else compute it. - if "rvc" in kwds and len(kwds["rvc"]) > 0: - rvc = kwds.pop("rvc") + if "rvc" in kwargs and len(kwargs["rvc"]) > 0: + rvc = kwargs.pop("rvc") else: # Run model to get rvc file after warm-up using base meteo - rvc = get_raven_states(model_name, workdir=workdir, **kwds) + rvc = get_raven_states(model_name, workdir=workdir, **kwargs) # We need to check which years are long enough (ex: wrapping years, 365-day forecast starting in # September 2015 will need data up to August 2016 at least) @@ -172,13 +166,13 @@ def perform_climatology_esp( # Replace the forecast period start and end dates with the climatological ESP dates for the # current member (year) forecast_date = forecast_date.replace(year=years) - kwds["start_date"] = forecast_date - kwds["end_date"] = forecast_date + dt.timedelta(days=forecast_duration - 1) + kwargs["start_date"] = forecast_date + kwargs["end_date"] = forecast_date + dt.timedelta(days=forecast_duration - 1) # Setup the initial states from the warm-up and run the model. # Note that info on start/end dates and timeseries are in the kwds. m.resume(rvc) - m(run_name=f"run_{years}", **kwds) + m(run_name=f"run_{years}", **kwargs) # Add member to the ensemble and retag the dates to the real forecast dates # (or else we will get dates from the climate dataset that cover all years) @@ -198,172 +192,9 @@ def perform_climatology_esp( return qsims -def get_hindcast_day(region_coll, date, climate_model="GEPS"): - """ - This function generates a forecast dataset that can be used to run raven. - Data comes from the CASPAR archive and must be aggregated such that each file - contains forecast data for a single day, but for all forecast timesteps and - all members. - - The code takes the region shapefile, the forecast date required, and the - climate_model to use, here GEPS by default, but eventually could be GEPS, GDPS, REPS or RDPS. - """ - - # Get the file locations and filenames as a function of the climate model and date - [ds, times] = get_CASPAR_dataset(climate_model, date) - - return get_subsetted_forecast(region_coll, ds, times, True) - - -def get_CASPAR_dataset(climate_model, date): - """Return Caspar Dataset.""" - - if climate_model == "GEPS": - d = dt.datetime.strftime(date, "%Y%m%d") - file_url = f"https://pavics.ouranos.ca/twitcher/ows/proxy/thredds/dodsC/birdhouse/caspar/daily/GEPS_{d}.nc" - ds = xr.open_dataset(file_url) - # Here we also extract the times at 6-hour intervals as Raven must have - # constant timesteps and GEPS goes to 6 hours - start = pd.to_datetime(ds.time[0].values) - times = [start + dt.timedelta(hours=n) for n in range(0, 384, 6)] - else: - # Eventually: GDPS, RDPS and REPS - raise NotImplementedError("Only the GEPS model is currently supported") - - # Checking that these exist. - for f in ["pr", "tas"]: - if f not in ds: - raise AttributeError(f"'{f}' not present in dataset") - - return ds, times - - -def get_ECCC_dataset(climate_model): - """Return latest GEPS forecast Dataset.""" - if climate_model == "GEPS": - # Eventually the file will find a permanent home, until then let's use the test folder. - file_url = "https://pavics.ouranos.ca/twitcher/ows/proxy/thredds/dodsC/datasets/forecasts/eccc_geps/GEPS_latest.ncml" - - ds = xr.open_dataset(file_url) - # Here we also extract the times at 6-hour intervals as Raven must have - # constant timesteps and GEPS goes to 6 hours - start = pd.to_datetime(ds.time[0].values) - times = [start + dt.timedelta(hours=n) for n in range(0, 384, 6)] - else: - # Eventually: GDPS, RDPS and REPS - raise NotImplementedError("Only the GEPS model is currently supported") - - # Checking that these exist. IF the files are still processing, possible that one or both are not available! - for f in ["pr", "tas"]: - if f not in ds: - raise AttributeError(f"'{f}' not present in dataset") - - return ds, times - - -def get_recent_ECCC_forecast(region_coll, climate_model="GEPS"): - """ - This function generates a forecast dataset that can be used to run raven. - Data comes from the ECCC datamart and collected daily. It is aggregated - such that each file contains forecast data for a single day, but for all - forecast timesteps and all members. - - The code takes the region shapefile and the climate_model to use, here GEPS - by default, but eventually could be GEPS, GDPS, REPS or RDPS. - - Parameters - ---------- - region_coll : fiona.collection.Collection - The region vectors. - climate_model : str - Type of climate model, for now only "GEPS" is supported. - - Returns - ------- - forecast : xararray.Dataset - The forecast dataset. - - """ - - [ds, times] = get_ECCC_dataset(climate_model) - - return get_subsetted_forecast(region_coll, ds, times, False) - - -def get_subsetted_forecast(region_coll, ds, times, is_caspar): - """ - This function takes a dataset, a region and the time sampling array and returns - the subsetted values for the given region and times - - Parameters - ---------- - region_coll : fiona.collection.Collection - The region vectors. - ds : xarray.Dataset - The dataset containing the raw, worldwide forecast data - times: dt.datetime - The array of times required to do the forecast. - is_caspar: boolean - True if the data comes from Caspar, false otherwise. Used to define - lat/lon on rotated grid. - - Returns - ------- - forecast : xararray.Dataset - The forecast dataset. - - """ - # Extract the bounding box to subset the entire forecast grid to something - # more manageable - lon_min = region_coll.bounds[0] - lon_max = region_coll.bounds[2] - lat_min = region_coll.bounds[1] - lat_max = region_coll.bounds[3] - - # Add a very simple lon wraparound if data suggests for it - if ((ds.lon.min() >= 0) and (ds.lon.max() <= 360)) and (lon_max < 0): - lon_min += 360 - lon_max += 360 - - # Subset the data to the desired location (bounding box) and times - ds = ds.where( - (ds.lon <= lon_max) - & (ds.lon >= lon_min) - & (ds.lat <= lat_max) - & (ds.lat >= lat_min), - drop=True, - ).sel(time=times) - - # Rioxarray requires CRS definitions for variables - # Get CRS, e.g. 4326 - crs = int(re.match(r"epsg:(\d+)", region_coll.crs["init"]).group(1)) - - # Here the name of the variable could differ based on the Caspar file processing - tas = ds.tas.rio.write_crs(crs) - pr = ds.pr.rio.write_crs(crs) - ds = xr.merge([tas, pr]) - - # Now apply the mask of the basin contour and average the values to get a single time series - if is_caspar: - ds.rio.set_spatial_dims("rlon", "rlat") - ds["rlon"] = ds["rlon"] - 360 - # clip the netcdf and average across space. - shdf = [next(iter(region_coll))["geometry"]] - forecast = ds.rio.clip(shdf, crs=crs) - forecast = forecast.mean(dim={"rlat", "rlon"}, keep_attrs=True) - - else: - ds.rio.set_spatial_dims("lon", "lat") - ds["lon"] = ds["lon"] - 360 - # clip the netcdf and average across space. - shdf = [next(iter(region_coll))["geometry"]] - forecast = ds.rio.clip(shdf, crs=crs) - forecast = forecast.mean(dim={"lat", "lon"}, keep_attrs=True) - - return forecast - - -def make_climpred_hindcast_object(hindcast, observations): +def make_climpred_hindcast_object( + hindcast: xr.Dataset, observations: xr.Dataset +) -> climpred.HindcastEnsemble: """ This function takes a hindcasting dataset of streamflow as well as associated observations and creates a hindcasting object that can be used by the @@ -378,7 +209,7 @@ def make_climpred_hindcast_object(hindcast, observations): Returns ------- - hindcast_obj : climpred.HindcastEnsemble object + climpred.HindcastEnsemble The hindcast ensemble formatted to be used in climpred. """ @@ -395,7 +226,11 @@ def make_climpred_hindcast_object(hindcast, observations): def make_ESP_hindcast_dataset( - model_name, forecast_date, included_years, forecast_duration, **kwargs + model_name: str, + forecast_date: dt.datetime, + included_years: List[int], + forecast_duration: int, + **kwargs, ) -> Tuple[xr.Dataset, xr.Dataset]: """Make a hindcast using ESP dataset. @@ -406,7 +241,7 @@ def make_ESP_hindcast_dataset( Parameters ---------- - model_name: string + model_name: str Name of the RAVEN model setup (GR4JCN, MOHYSE, HMETS or HBVEC). forecast_date: datetime.datetime Calendar date that is used as the base to perform hindcasts. @@ -415,14 +250,14 @@ def make_ESP_hindcast_dataset( the years that we want to perform a hindcasting for, on the calendar date in "foreacast_date" forecast_duration: int Duration of the forecast, in days. Refers to the longest lead-time required. - kwargs: dict + **kwargs All other parameters needed to run RAVEN. Returns ------- - xarray.Dataset + qsims: xarray.Dataset The dataset containing the (init, member, lead) dimensions ready for using in climpred. (qsim) - xarray.Dataset + qobs: xarray.Dataset The dataset containing all observations over the verification period. The only dimension is 'time', as required by climpred. No other processing is required as climpred will find corresponding verification dates on its own. (qobs) diff --git a/ravenpy/utilities/geo.py b/ravenpy/utilities/geo.py deleted file mode 100644 index 7a8f0da0..00000000 --- a/ravenpy/utilities/geo.py +++ /dev/null @@ -1,244 +0,0 @@ -""" -Tools for performing geospatial translations and transformations. -""" - -import collections -import json -import logging -from pathlib import Path -from typing import List, Union - -from . import gis_import_error_message - -try: - import fiona - import rasterio - import rasterio.mask - import rasterio.vrt - import rasterio.warp - from pyproj import CRS - from shapely.geometry import ( - GeometryCollection, - MultiPolygon, - Polygon, - mapping, - shape, - ) - from shapely.ops import transform -except (ImportError, ModuleNotFoundError) as e: - msg = gis_import_error_message.format(Path(__file__).stem) - raise ImportError(msg) from e - -RASTERIO_TIFF_COMPRESSION = "lzw" -LOGGER = logging.getLogger("RavenPy") -WGS84 = 4326 - - -def geom_transform( - geom: Union[GeometryCollection, shape], - source_crs: Union[str, int, CRS] = WGS84, - target_crs: Union[str, int, CRS] = None, -) -> GeometryCollection: - """Change the projection of a geometry. - - Assuming a geometry's coordinates are in a `source_crs`, compute the new coordinates under the `target_crs`. - - Parameters - ---------- - geom : Union[GeometryCollection, shape] - Source geometry. - source_crs : Union[str, int, CRS] - Projection identifier (proj4) for the source geometry, e.g. '+proj=longlat +datum=WGS84 +no_defs'. - target_crs : Union[str, int, CRS] - Projection identifier (proj4) for the target geometry. - - Returns - ------- - GeometryCollection - Reprojected geometry. - """ - try: - from functools import partial - - from pyproj import Transformer # noqa - - if isinstance(source_crs, int or str): - source = CRS.from_epsg(source_crs) - else: - source = source_crs - - if isinstance(target_crs, int or str): - target = CRS.from_epsg(target_crs) - else: - target = target_crs - - transform_func = Transformer.from_crs(source, target, always_xy=True) - reprojected = transform(transform_func.transform, geom) - - return reprojected - except Exception as err: - msg = f"{err}: Failed to reproject geometry" - LOGGER.error(msg) - raise Exception(msg) - - -def generic_raster_clip( - raster: Union[str, Path], - output: Union[str, Path], - geometry: Union[Polygon, MultiPolygon, List[Union[Polygon, MultiPolygon]]], - touches: bool = False, - fill_with_nodata: bool = True, - padded: bool = True, - raster_compression: str = RASTERIO_TIFF_COMPRESSION, -) -> None: - """ - Crop a raster file to a given geometry. - - Parameters - ---------- - raster : Union[str, Path] - Path to input raster. - output : Union[str, Path] - Path to output raster. - geometry : Union[Polygon, MultiPolygon, List[Union[Polygon, MultiPolygon]] - Geometry defining the region to crop. - touches : bool - Whether or not to include cells that intersect the geometry. Default: True. - fill_with_nodata: bool - Whether or not to keep pixel values for regions outside of shape or set as nodata. Default: True. - padded: bool - Whether or not to add a half-pixel buffer to shape before masking - raster_compression : str - Level of data compression. Default: 'lzw'. - - Returns - ------- - None - """ - if not isinstance(geometry, collections.abc.Iterable): - geometry = [geometry] - - with rasterio.open(raster, "r") as src: - mask_image, mask_affine = rasterio.mask.mask( - src, - geometry, - crop=True, - pad=padded, - all_touched=touches, - filled=fill_with_nodata, - ) - mask_meta = src.meta.copy() - mask_meta.update( - { - "driver": "GTiff", - "height": mask_image.shape[1], - "width": mask_image.shape[2], - "transform": mask_affine, - "compress": raster_compression, - } - ) - - # Write the new masked image - with rasterio.open(output, "w", **mask_meta) as dst: - dst.write(mask_image) - - -def generic_raster_warp( - raster: Union[str, Path], - output: Union[str, Path], - target_crs: Union[str, dict, CRS], - raster_compression: str = RASTERIO_TIFF_COMPRESSION, -) -> None: - """ - Reproject a raster file. - - Parameters - ---------- - raster : Union[str, Path] - Path to input raster. - output : Union[str, Path] - Path to output raster. - target_crs : str or dict - Target projection identifier. - raster_compression: str - Level of data compression. Default: 'lzw'. - - Returns - ------- - None - """ - with rasterio.open(raster, "r") as src: - # Reproject raster using WarpedVRT class - with rasterio.vrt.WarpedVRT(src, crs=target_crs) as vrt: - # Calculate grid properties based on projection - affine, width, height = rasterio.warp.calculate_default_transform( - src.crs, target_crs, src.width, src.height, *src.bounds - ) - - # Copy relevant metadata from parent raster - metadata = src.meta.copy() - metadata.update( - { - "driver": "GTiff", - "height": height, - "width": width, - "transform": affine, - "crs": target_crs, - "compress": raster_compression, - } - ) - data = vrt.read() - - with rasterio.open(output, "w", **metadata) as dst: - dst.write(data) - - -def generic_vector_reproject( - vector: Union[str, Path], - projected: Union[str, Path], - source_crs: Union[str, CRS] = WGS84, - target_crs: Union[str, CRS] = None, -) -> None: - """Reproject all features and layers within a vector file and return a GeoJSON - - Parameters - ---------- - vector : Union[str, Path] - Path to a file containing a valid vector layer. - projected: Union[str, Path] - Path to a file to be written. - source_crs : Union[str, dict, CRS] - Projection identifier (proj4) for the source geometry, Default: '+proj=longlat +datum=WGS84 +no_defs'. - target_crs : Union[str, dict, CRS] - Projection identifier (proj4) for the target geometry. - - Returns - ------- - None - """ - - if target_crs is None: - raise ValueError("No target CRS is defined.") - - output = {"type": "FeatureCollection", "features": list()} - - if isinstance(vector, Path): - vector = vector.as_posix() - - for i, layer_name in enumerate(fiona.listlayers(vector)): - with fiona.open(vector, "r", layer=i) as src: - with open(projected, "w") as sink: - for feature in src: - # Perform vector reprojection using Shapely on each feature - try: - geom = shape(feature["geometry"]) - transformed = geom_transform(geom, source_crs, target_crs) - feature["geometry"] = mapping(transformed) - output["features"].append(feature) - except Exception as err: - LOGGER.exception( - "{}: Unable to reproject feature {}".format(err, feature) - ) - raise - - sink.write(f"{json.dumps(output)}") diff --git a/ravenpy/utilities/geoserver.py b/ravenpy/utilities/geoserver.py deleted file mode 100644 index 59099cdb..00000000 --- a/ravenpy/utilities/geoserver.py +++ /dev/null @@ -1,691 +0,0 @@ -""" -GeoServer interaction operations. - -Working assumptions for this module: -* Point coordinates are passed as shapely.geometry.Point instances. -* BBox coordinates are passed as (lon1, lat1, lon2, lat2). -* Shapes (polygons) are passed as shapely.geometry.shape parsable objects. -* All functions that require a CRS have a CRS argument with a default set to WGS84. -* GEO_URL points to the GeoServer instance hosting all files. - -TODO: Refactor to remove functions that are just 2-lines of code. -For example, many function's logic essentially consists in creating the layer name. -We could have a function that returns the layer name, and then other functions expect the layer name. -""" -import inspect -import json -import os -import warnings -from pathlib import Path -from typing import Iterable, Optional, Sequence, Tuple, Union -from urllib.parse import urljoin - -from requests import Request - -from . import gis_import_error_message - -try: - import fiona - import geopandas as gpd - import pandas as pd - from lxml import etree - from owslib.fes import PropertyIsEqualTo, PropertyIsLike - from owslib.wcs import WebCoverageService - from owslib.wfs import WebFeatureService - from shapely.geometry import Point, shape -except (ImportError, ModuleNotFoundError) as e: - msg = gis_import_error_message.format(Path(__file__).stem) - raise ImportError(msg) from e - -try: - from owslib.fes2 import Intersects - from owslib.gml import Point as wfs_Point -except (ImportError, ModuleNotFoundError): - warnings.warn("WFS point spatial filtering requires OWSLib>0.24.1.") - Intersects = None - wfs_Point = None - -# Do not remove the trailing / otherwise `urljoin` will remove the geoserver path. -# Can be set at runtime with `$ env GEO_URL=https://xx.yy.zz/geoserver/ ...`. -GEO_URL = os.getenv("GEO_URL", "https://pavics.ouranos.ca/geoserver/") - -# We store the contour of different hydrobasins domains -hybas_dir = Path(__file__).parent.parent / "data" / "hydrobasins_domains" -hybas_pat = "hybas_lake_{}_lev01_v1c.zip" - -# This could be inferred from existing files in hybas_dir -hybas_regions = ["na", "ar"] -hybas_domains = {dom: hybas_dir / hybas_pat.format(dom) for dom in hybas_regions} - - -def _get_location_wfs( - bbox: Optional[ - Tuple[ - Union[str, float, int], - Union[str, float, int], - Union[str, float, int], - Union[str, float, int], - ] - ] = None, - point: Optional[ - Tuple[ - Union[str, float, int], - Union[str, float, int], - ] - ] = None, - layer: str = None, - geoserver: str = GEO_URL, -) -> str: - """Return leveled features from a hosted data set using bounding box coordinates and WFS 1.1.0 protocol. - - For geographic rasters, subsetting is based on WGS84 (Long, Lat) boundaries. If not geographic, subsetting based - on projected coordinate system (Easting, Northing) boundaries. - - Parameters - ---------- - bbox : Optional[Tuple[Union[str, float, int], Union[str, float, int], Union[str, float, int], Union[str, float, int]]] - Geographic coordinates of the bounding box (left, down, right, up). - point : Optional[Tuple[Union[str, float, int], Union[str, float, int]]] - Geographic coordinates of an intersecting point (lon, lat). - layer : str - The WFS/WMS layer name requested. - geoserver: str - The address of the geoserver housing the layer to be queried. Default: https://pavics.ouranos.ca/geoserver/. - - Returns - ------- - str - A GeoJSON-encoded vector feature. - - """ - wfs = WebFeatureService(url=urljoin(geoserver, "wfs"), version="2.0.0", timeout=30) - - if bbox and point: - raise NotImplementedError("Provide either 'bbox' or 'point'.") - if bbox: - kwargs = dict(bbox=bbox) - elif point: - # FIXME: Remove this once OWSlib > 0.24.1 is released. - if not Intersects and not wfs_Point: - raise NotImplementedError( - f"{inspect.stack()[1][3]} with point filtering requires OWSLib>0.24.1.", - ) - - p = wfs_Point( - id="feature", - srsName="http://www.opengis.net/gml/srs/epsg.xml#4326", - pos=point, - ) - f = Intersects(propertyname="the_geom", geometry=p) - intersects = f.toXML() - kwargs = dict(filter=intersects) - else: - raise ValueError() - - resp = wfs.getfeature( - typename=layer, outputFormat="application/json", method="POST", **kwargs - ) - - data = json.loads(resp.read()) - return data - - -def _get_feature_attributes_wfs( - attribute: Sequence[str], - layer: str = None, - geoserver: str = GEO_URL, -) -> str: - """Return WFS GetFeature URL request for attribute values. - - Making this request will return a JSON response. - - Parameters - ---------- - attribute : list - Attribute/field names. - layer : str - Name of geographic layer queried. - geoserver: str - The address of the geoserver housing the layer to be queried. Default: https://pavics.ouranos.ca/geoserver/. - - Returns - ------- - str - WFS request URL. - - Notes - ----- - Non-existent attributes will raise a cryptic DriverError from fiona. - - """ - params = dict( - service="WFS", - version="2.0.0", - request="GetFeature", - typename=layer, - outputFormat="application/json", - propertyName=",".join(attribute), - ) - - return Request("GET", url=urljoin(geoserver, "wfs"), params=params).prepare().url - - -def _filter_feature_attributes_wfs( - attribute: str, - value: Union[str, float, int], - layer: str, - geoserver: str = GEO_URL, -) -> str: - """Return WFS GetFeature URL request filtering geographic features based on a property's value. - - Parameters - ---------- - attribute : str - Attribute/field name. - value: Union[str, float, int] - Value for attribute queried. - layer : str - Name of geographic layer queried. - geoserver: str - The address of the geoserver housing the layer to be queried. Default: https://pavics.ouranos.ca/geoserver/. - - Returns - ------- - str - WFS request URL. - """ - - try: - attribute = str(attribute) - value = str(value) - - except ValueError: - raise Exception("Unable to cast attribute/filter to string.") - - filter_request = PropertyIsLike(propertyname=attribute, literal=value, wildCard="*") - filterxml = etree.tostring(filter_request.toXML()).decode("utf-8") - params = dict( - service="WFS", - version="1.1.0", - request="GetFeature", - typename=layer, - outputFormat="application/json", - filter=filterxml, - ) - - return Request("GET", url=urljoin(geoserver, "wfs"), params=params).prepare().url - - -def _determine_upstream_ids( - fid: str, - df: pd.DataFrame, - basin_field: str = None, - downstream_field: str = None, - basin_family: Optional[str] = None, -) -> pd.DataFrame: - """Return a list of upstream features by evaluating the downstream networks. - - Parameters - ---------- - fid : str - feature ID of the downstream feature of interest. - df : pd.DataFrame - Dataframe comprising the watershed attributes. - basin_field: str - The field used to determine the id of the basin according to hydro project. - downstream_field: str - The field identifying the downstream sub-basin for the hydro project. - basin_family: str, optional - Regional watershed code (For HydroBASINS dataset). - - Returns - ------- - pd.DataFrame - Basins ids including `fid` and its upstream contributors. - """ - - def upstream_ids(bdf, bid): - return bdf[bdf[downstream_field] == bid][basin_field] - - # Note: Hydro Routing `SubId` is a float for some reason and Python float != GeoServer double. Cast them to int. - if isinstance(fid, float): - fid = int(fid) - df[basin_field] = df[basin_field].astype(int) - df[downstream_field] = df[downstream_field].astype(int) - - # Locate the downstream feature - ds = df.set_index(basin_field).loc[fid] - if basin_family is not None: - # Do a first selection on the main basin ID of the downstream feature. - sub = df[df[basin_family] == ds[basin_family]] - else: - sub = None - - # Find upstream basins - up = [fid] - for b in up: - tmp = upstream_ids(sub if sub is not None else df, b) - if len(tmp): - up.extend(tmp) - - return ( - sub[sub[basin_field].isin(up)] - if sub is not None - else df[df[basin_field].isin(up)] - ) - - -def get_raster_wcs( - coordinates: Union[Iterable, Sequence[Union[float, str]]], - geographic: bool = True, - layer: str = None, - geoserver: str = GEO_URL, -) -> bytes: - """Return a subset of a raster image from the local GeoServer via WCS 2.0.1 protocol. - - For geographic rasters, subsetting is based on WGS84 (Long, Lat) boundaries. If not geographic, subsetting based - on projected coordinate system (Easting, Northing) boundaries. - - Parameters - ---------- - coordinates : Sequence[Union[int, float, str]] - Geographic coordinates of the bounding box (left, down, right, up) - geographic : bool - If True, uses "Long" and "Lat" in WCS call. Otherwise uses "E" and "N". - layer : str - Layer name of raster exposed on GeoServer instance, e.g. 'public:CEC_NALCMS_LandUse_2010' - geoserver: str - The address of the geoserver housing the layer to be queried. Default: https://pavics.ouranos.ca/geoserver/. - - Returns - ------- - bytes - A GeoTIFF array. - - """ - (left, down, right, up) = coordinates - - if geographic: - x, y = "Long", "Lat" - else: - x, y = "E", "N" - - wcs = WebCoverageService(url=urljoin(geoserver, "ows"), version="2.0.1") - - try: - resp = wcs.getCoverage( - identifier=[layer], - format="image/tiff", - subsets=[(x, left, right), (y, down, up)], - timeout=120, - ) - - except Exception: - raise - - data = resp.read() - - try: - etree.fromstring(data) - # The response is an XML file describing the server error. - raise ChildProcessError(data) - - except etree.XMLSyntaxError: - # The response is the DEM array. - return data - - -# ~~~~ HydroBASINS functions ~~~~ # - - -def hydrobasins_upstream(feature: dict, domain: str) -> pd.DataFrame: - """Return a list of hydrobasins features located upstream. - - Parameters - ---------- - feature : dict - Basin feature attributes, including the fields ["HYBAS_ID", "NEXT_DOWN", "MAIN_BAS"]. - domain: {"na", "ar"} - Domain of the feature, North America or Arctic. - - Returns - ------- - pd.Series - Basins ids including `fid` and its upstream contributors. - """ - basin_field = "HYBAS_ID" - downstream_field = "NEXT_DOWN" - basin_family = "MAIN_BAS" - - # This does not work with `wfs.getfeature`. No filtering occurs when asking for specific attributes. - # wfs = WebFeatureService(url=urljoin(geoserver, "wfs"), version="2.0.0", timeout=30) - # layer = f"public:USGS_HydroBASINS_{'lake_' if lakes else ''}{domain}_lev{str(level).zfill(2)}" - # filter = PropertyIsEqualTo(propertyname=basin_family, literal=feature[basin_family]) - - # Fetch all features in the same basin - req = filter_hydrobasins_attributes_wfs( - attribute=basin_family, value=feature[basin_family], domain=domain - ) - df = gpd.read_file(req) - - # Filter upstream watersheds - return _determine_upstream_ids( - fid=feature[basin_field], - df=df, - basin_field=basin_field, - downstream_field=downstream_field, - ) - - -def hydrobasins_aggregate(gdf: pd.DataFrame) -> pd.DataFrame: - """Aggregate multiple HydroBASINS watersheds into a single geometry. - - Parameters - ---------- - gdf : pd.DataFrame - Watershed attributes indexed by HYBAS_ID - - Returns - ------- - pd.DataFrame - """ - i0 = gdf.index[0] - - # TODO: Review. Not sure it all makes sense. --> Looks fine to me? (TJS) - def aggfunc(x): - if x.name in ["COAST", "DIST_MAIN", "DIST_SINK"]: - return x.min() - elif x.name in ["SUB_AREA", "LAKE"]: - return x.sum() - else: - return x.loc[i0] - - # Buffer function to fix invalid geometries - gdf["geometry"] = gdf.buffer(0) - - return gdf.dissolve(by="MAIN_BAS", aggfunc=aggfunc) - - -def select_hybas_domain( - bbox: Optional[ - Tuple[ - Union[int, float], Union[int, float], Union[int, float], Union[int, float] - ] - ] = None, - point: Optional[Tuple[Union[int, float], Union[int, float]]] = None, -) -> str: - """ - Provided a given coordinate or boundary box, return the domain name of the geographic region - the coordinate is located within. - - Parameters - ---------- - bbox : Optional[Tuple[Union[float, int], Union[float, int], Union[float, int], Union[float, int]]] - Geographic coordinates of the bounding box (left, down, right, up). - point : Optional[Tuple[Union[float, int], Union[float, int]]] - Geographic coordinates of an intersecting point (lon, lat). - - Returns - ------- - str - The domain that the coordinate falls within. Possible results: "na", "ar". - """ - if bbox and point: - raise NotImplementedError("Provide either 'bbox' or 'point'.") - if point: - bbox = point * 2 - - for dom, fn in hybas_domains.items(): - with open(fn, "rb") as f: - zf = fiona.io.ZipMemoryFile(f) - coll = zf.open(fn.stem + ".shp") - for _ in coll.filter(bbox=bbox): - return dom - - raise LookupError(f"Could not find feature containing bbox: {bbox}.") - - -def filter_hydrobasins_attributes_wfs( - attribute: str, - value: Union[str, float, int], - domain: str, - geoserver: str = GEO_URL, -) -> str: - """Return a URL that formats and returns a remote GetFeatures request from the USGS HydroBASINS dataset. - - For geographic rasters, subsetting is based on WGS84 (Long, Lat) boundaries. If not geographic, subsetting based - on projected coordinate system (Easting, Northing) boundaries. - - Parameters - ---------- - attribute : str - Attribute/field to be queried. - value: Union[str, float, int] - Value for attribute queried. - domain : {"na", "ar"} - The domain of the HydroBASINS data. - geoserver: str - The address of the geoserver housing the layer to be queried. Default: https://pavics.ouranos.ca/geoserver/. - - Returns - ------- - str - URL to the GeoJSON-encoded WFS response. - - """ - lakes = True - level = 12 - - layer = f"public:USGS_HydroBASINS_{'lake_' if lakes else ''}{domain}_lev{str(level).zfill(2)}" - q = _filter_feature_attributes_wfs( - attribute=attribute, value=value, layer=layer, geoserver=geoserver - ) - - return q - - -def get_hydrobasins_location_wfs( - coordinates: Tuple[ - Union[str, float, int], - Union[str, float, int], - ], - domain: str = None, - geoserver: str = GEO_URL, -) -> str: - """Return features from the USGS HydroBASINS data set using bounding box coordinates. - - For geographic rasters, subsetting is based on WGS84 (Long, Lat) boundaries. If not geographic, subsetting based - on projected coordinate system (Easting, Northing) boundaries. - - Parameters - ---------- - coordinates : Tuple[Union[str, float, int], Union[str, float, int]] - Geographic coordinates of the bounding box (left, down, right, up). - domain : {"na", "ar"} - The domain of the HydroBASINS data. - geoserver: str - The address of the geoserver housing the layer to be queried. Default: https://pavics.ouranos.ca/geoserver/. - - Returns - ------- - str - A GeoJSON-encoded vector feature. - - """ - lakes = True - level = 12 - layer = f"public:USGS_HydroBASINS_{'lake_' if lakes else ''}{domain}_lev{str(level).zfill(2)}" - if not wfs_Point and not Intersects: - data = _get_location_wfs(bbox=coordinates * 2, layer=layer, geoserver=geoserver) - else: - data = _get_location_wfs(point=coordinates, layer=layer, geoserver=geoserver) - - return data - - -# ~~~~ Hydro Routing ~~~~ # - - -def hydro_routing_upstream( - fid: Union[str, float, int], - level: int = 12, - lakes: str = "1km", - geoserver: str = GEO_URL, -) -> pd.Series: - """Return a list of hydro routing features located upstream. - - Parameters - ---------- - fid : Union[str, float, int] - Basin feature ID code of the downstream feature. - level : int - Level of granularity requested for the lakes vector (range(7,13)). Default: 12. - lakes : {"1km", "all"} - Query the version of dataset with lakes under 1km in width removed ("1km") or return all lakes ("all"). - geoserver: str - The address of the geoserver housing the layer to be queried. Default: https://pavics.ouranos.ca/geoserver/. - - Returns - ------- - pd.Series - Basins ids including `fid` and its upstream contributors. - """ - wfs = WebFeatureService(url=urljoin(geoserver, "wfs"), version="2.0.0", timeout=30) - layer = f"public:routing_{lakes}Lakes_{str(level).zfill(2)}" - - # Get attributes necessary to identify upstream watersheds - resp = wfs.getfeature( - typename=layer, - propertyname=["SubId", "DowSubId"], - outputFormat="application/json", - ) - df = gpd.read_file(resp) - - # Identify upstream features - df_upstream = _determine_upstream_ids( - fid=fid, - df=df, - basin_field="SubId", - downstream_field="DowSubId", - ) - - # Fetch upstream features - resp = wfs.getfeature( - typename=layer, - featureid=df_upstream["id"].tolist(), - outputFormat="application/json", - ) - - return gpd.read_file(resp.read().decode()) - - -def get_hydro_routing_attributes_wfs( - attribute: Sequence[str], - level: int = 12, - lakes: str = "1km", - geoserver: str = GEO_URL, -) -> str: - """Return a URL that formats and returns a remote GetFeatures request from hydro routing dataset. - - For geographic rasters, subsetting is based on WGS84 (Long, Lat) boundaries. If not geographic, subsetting based - on projected coordinate system (Easting, Northing) boundaries. - - Parameters - ---------- - attribute : list - Attributes/fields to be queried. - level : int - Level of granularity requested for the lakes vector (range(7,13)). Default: 12. - lakes : {"1km", "all"} - Query the version of dataset with lakes under 1km in width removed ("1km") or return all lakes ("all"). - geoserver: str - The address of the geoserver housing the layer to be queried. Default: https://pavics.ouranos.ca/geoserver/. - - Returns - ------- - str - URL to the GeoJSON-encoded WFS response. - - """ - layer = f"public:routing_{lakes}Lakes_{str(level).zfill(2)}" - return _get_feature_attributes_wfs( - attribute=attribute, layer=layer, geoserver=geoserver - ) - - -def filter_hydro_routing_attributes_wfs( - attribute: str = None, - value: Union[str, float, int] = None, - level: int = 12, - lakes: str = "1km", - geoserver: str = GEO_URL, -) -> str: - """Return a URL that formats and returns a remote GetFeatures request from hydro routing dataset. - - For geographic rasters, subsetting is based on WGS84 (Long, Lat) boundaries. If not geographic, subsetting based - on projected coordinate system (Easting, Northing) boundaries. - - Parameters - ---------- - attribute : list - Attributes/fields to be queried. - value: str or int or float - The requested value for the attribute. - level : int - Level of granularity requested for the lakes vector (range(7,13)). Default: 12. - lakes : {"1km", "all"} - Query the version of dataset with lakes under 1km in width removed ("1km") or return all lakes ("all"). - geoserver: str - The address of the geoserver housing the layer to be queried. Default: https://pavics.ouranos.ca/geoserver/. - - Returns - ------- - str - URL to the GeoJSON-encoded WFS response. - - """ - layer = f"public:routing_{lakes}Lakes_{str(level).zfill(2)}" - return _filter_feature_attributes_wfs( - attribute=attribute, value=value, layer=layer, geoserver=geoserver - ) - - -def get_hydro_routing_location_wfs( - coordinates: Tuple[ - Union[int, float, str], - Union[str, float, int], - ], - lakes: str, - level: int = 12, - geoserver: str = GEO_URL, -) -> str: - """Return features from the hydro routing data set using bounding box coordinates. - - For geographic rasters, subsetting is based on WGS84 (Long, Lat) boundaries. If not geographic, subsetting based - on projected coordinate system (Easting, Northing) boundaries. - - Parameters - ---------- - coordinates : Tuple[Union[str, float, int], Union[str, float, int]] - Geographic coordinates of the bounding box (left, down, right, up). - lakes : {"1km", "all"} - Query the version of dataset with lakes under 1km in width removed ("1km") or return all lakes ("all"). - level : int - Level of granularity requested for the lakes vector (range(7,13)). Default: 12. - geoserver: str - The address of the geoserver housing the layer to be queried. Default: https://pavics.ouranos.ca/geoserver/. - - Returns - ------- - str - A GML-encoded vector feature. - - """ - layer = f"public:routing_{lakes}Lakes_{str(level).zfill(2)}" - - if not wfs_Point and not Intersects: - data = _get_location_wfs(bbox=coordinates * 2, layer=layer, geoserver=geoserver) - else: - data = _get_location_wfs(point=coordinates, layer=layer, geoserver=geoserver) - - return data diff --git a/ravenpy/utilities/io.py b/ravenpy/utilities/io.py deleted file mode 100644 index 35d442d6..00000000 --- a/ravenpy/utilities/io.py +++ /dev/null @@ -1,268 +0,0 @@ -""" -Tools for reading and writing geospatial data formats. -""" -import logging -import tarfile -import tempfile -import warnings -import zipfile -from pathlib import Path -from re import search -from typing import Iterable, List, Optional, Sequence, Union - -from . import gis_import_error_message - -try: - import fiona - import rasterio - from pyproj import CRS - from shapely.geometry import shape -except (ImportError, ModuleNotFoundError) as e: - msg = gis_import_error_message.format(Path(__file__).stem) - raise ImportError(msg) from e - -LOGGER = logging.getLogger("RavenPy") -WGS84 = 4326 - - -def address_append(address: Union[str, Path]) -> str: - """ - Formats a URL/URI to be more easily read with libraries such as "rasterstats" - - Parameters - ---------- - address: Union[str, Path] - URL/URI to a potential zip or tar file - - Returns - ------- - str - URL/URI prefixed for archive type - """ - zipped = search(r"(\.zip)", str(address)) - tarred = search(r"(\.tar)", str(address)) - - try: - if zipped: - return f"zip://{address}" - elif tarred: - return f"tar://{address}" - else: - LOGGER.info("No prefixes needed for address.") - return str(address) - except Exception: - LOGGER.error("Failed to prefix or parse URL %s." % address) - raise - - -def generic_extract_archive( - resources: Union[str, Path, List[Union[bytes, str, Path]]], - output_dir: Optional[Union[str, Path]] = None, -) -> List[str]: - """Extract archives (tar/zip) to a working directory. - - Parameters - ---------- - resources: Union[str, Path, List[Union[bytes, str, Path]]] - list of archive files (if netCDF files are in list, they are passed and returned as well in the return). - output_dir: Optional[Union[str, Path]] - string or Path to a working location (default: temporary folder). - - Returns - ------- - list - List of original or of extracted files - """ - - archive_types = [".tar", ".zip", ".7z"] - output_dir = output_dir or tempfile.gettempdir() - - if not isinstance(resources, list): - resources = [resources] - - files = list() - - for arch in resources: - if any(ext in str(arch).lower() for ext in archive_types): - try: - LOGGER.debug("archive=%s", arch) - file = Path(arch).name - - if file.endswith(".nc"): - files.append(Path(output_dir.join(arch))) - elif file.endswith(".tar"): - with tarfile.open(arch, mode="r") as tar: - tar.extractall(path=output_dir) - files.extend( - [str(Path(output_dir).joinpath(f)) for f in tar.getnames()] - ) - elif file.endswith(".zip"): - with zipfile.ZipFile(arch, mode="r") as zf: - zf.extractall(path=output_dir) - files.extend( - [str(Path(output_dir).joinpath(f)) for f in zf.namelist()] - ) - elif file.endswith(".7z"): - msg = "7z file extraction is not supported at this time." - LOGGER.warning(msg) - warnings.warn(msg, UserWarning) - else: - LOGGER.debug('File extension "%s" unknown' % file) - except Exception as e: - LOGGER.error( - "Failed to extract sub archive {{{}}}: {{{}}}".format(arch, e) - ) - else: - LOGGER.warning("No archives found. Continuing...") - return resources - - return files - - -def archive_sniffer( - archives: Union[str, Path, List[Union[str, Path]]], - working_dir: Optional[Union[str, Path]] = None, - extensions: Optional[Iterable[str]] = None, -) -> List[Union[str, Path]]: - """Return a list of locally unarchived files that match the desired extensions. - - Parameters - ---------- - archives : Union[str, Path, List[Union[str, Path]]] - archive location or list of archive locations - working_dir : Union[str, path] - string or Path to a working location - extensions : List[str] - list of accepted extensions - - Returns - ------- - List[Union[str, Path]] - List of files with matching accepted extensions - """ - potential_files = list() - - if not extensions: - extensions = [".gml", ".shp", ".geojson", ".gpkg", ".json"] - - decompressed_files = generic_extract_archive(archives, output_dir=working_dir) - for file in decompressed_files: - if any(ext in Path(file).suffix for ext in extensions): - potential_files.append(file) - return potential_files - - -def crs_sniffer( - *args: Union[str, Path, Sequence[Union[str, Path]]] -) -> Union[List[Union[str, int]], str, int]: - """Return the list of CRS found in files. - - Parameters - ---------- - args : Union[str, Path, Sequence[Union[str, Path]]] - Path(s) to the file(s) to examine. - - Returns - ------- - Union[List[str], str] - Returns either a list of CRSes or a single CRS definition, depending on the number of instances found. - """ - crs_list = list() - vectors = (".gml", ".shp", ".geojson", ".gpkg", ".json") - rasters = (".tif", ".tiff") - all_files = vectors + rasters - - for file in args: - found_crs = False - suffix = Path(file).suffix.lower() - try: - if suffix == ".zip": - file = archive_sniffer(file, extensions=all_files)[0] - suffix = Path(file).suffix.lower() - - if suffix in vectors: - if suffix == ".gpkg": - if len(fiona.listlayers(file)) > 1: - raise NotImplementedError - with fiona.open(file, "r") as src: - found_crs = CRS.from_wkt(src.crs_wkt).to_epsg() - elif suffix in rasters: - with rasterio.open(file, "r") as src: - found_crs = CRS.from_user_input(src.crs).to_epsg() - else: - raise FileNotFoundError("Invalid filename suffix") - except FileNotFoundError as e: - msg = f"{e}: Unable to open file {args}" - LOGGER.warning(msg) - raise Exception(msg) - except NotImplementedError as e: - msg = f"{e}: Multilayer GeoPackages are currently unsupported" - LOGGER.error(msg) - raise Exception(msg) - except RuntimeError: - pass - - crs_list.append(found_crs) - - if crs_list is None: - msg = f"No CRS definitions found in {args}." - raise FileNotFoundError(msg) - - if len(crs_list) == 1: - if not crs_list[0]: - msg = f"No CRS definitions found in {args}. Assuming {WGS84}." - LOGGER.warning(msg) - warnings.warn(msg, UserWarning) - return WGS84 - return crs_list[0] - return crs_list - - -def raster_datatype_sniffer(file: Union[str, Path]) -> str: - """Return the type of the raster stored in the file. - - Parameters - ---------- - file : Union[str, Path] - Path to file. - - Returns - ------- - str - rasterio datatype of array values - """ - try: - with rasterio.open(file, "r") as src: - dtype = src.dtypes[0] - return dtype - except rasterio.errors.RasterioError: - msg = f"Unable to read data type from {file}." - LOGGER.exception(msg) - raise ValueError(msg) - - -def get_bbox(vector: Union[str, Path], all_features: bool = True) -> tuple: - """Return bounding box of all features or the first feature in file. - - Parameters - ---------- - vector : str - Path to file storing vector features. - all_features : bool - Return the bounding box for all features. Default: True. - - Returns - ------- - tuple - Geographic coordinates of the bounding box (lon0, lat0, lon1, lat1). - - """ - - if not all_features: - with fiona.open(vector, "r") as src: - for feature in src: - geom = shape(feature["geometry"]) - return geom.bounds - - with fiona.open(vector, "r") as src: - return src.bounds diff --git a/ravenpy/utilities/nb_graphs.py b/ravenpy/utilities/nb_graphs.py index 96261c2f..1875c478 100644 --- a/ravenpy/utilities/nb_graphs.py +++ b/ravenpy/utilities/nb_graphs.py @@ -4,8 +4,6 @@ The graphic outputs are meant to be displayed in a notebook. In a console, use `hvplot.show(fig)` to render the figures. """ -from pathlib import Path - import numpy as np import xarray as xr from matplotlib import pyplot as plt diff --git a/ravenpy/utilities/vector.py b/ravenpy/utilities/vector.py new file mode 100644 index 00000000..4c06a963 --- /dev/null +++ b/ravenpy/utilities/vector.py @@ -0,0 +1,370 @@ +""" +Tools for performing geospatial translations and transformations. +""" +import json +import logging +import math +import tarfile +import tempfile +import warnings +import zipfile +from os import PathLike +from pathlib import Path +from typing import Iterable, List, Optional, Union + +import geojson +import pandas as pd +import pygml +import shapefile +from pyproj import CRS +from shapely.geometry import ( + GeometryCollection, + MultiPolygon, + Polygon, + box, + mapping, + shape, +) +from shapely.ops import transform + +RASTERIO_TIFF_COMPRESSION = "lzw" +LOGGER = logging.getLogger("RavenPy") +WGS84 = 4326 + + +def geom_properties(geom: Union[Polygon, MultiPolygon, GeometryCollection]) -> dict: + """Return a dictionary of geometry properties. + + Parameters + ---------- + geom : Union[Polygon, MultiPolygon, GeometryCollection] + Geometry to analyze. + + Returns + ------- + dict + Dictionary storing polygon area, centroid location, perimeter and gravelius shape index. + + Notes + ----- + Some of the properties should be computed using an equal-area projection. + """ + + geom = shape(geom) + lon, lat = geom.centroid.x, geom.centroid.y + if (lon > 180) or (lon < -180) or (lat > 90) or (lat < -90): + LOGGER.warning("Shape centroid is not in decimal degrees.") + area = geom.area + length = geom.length + gravelius = length / 2 / math.sqrt(math.pi * area) + parameters = { + "area": area, + "centroid": (lon, lat), + "perimeter": length, + "gravelius": gravelius, + } + return parameters + + +def geom_transform( + geom: Union[GeometryCollection, shape], + source_crs: Union[str, int, CRS] = WGS84, + target_crs: Union[str, int, CRS] = None, + always_xy: bool = True, +) -> GeometryCollection: + """Change the projection of a geometry. + + Assuming a geometry's coordinates are in a `source_crs`, compute the new coordinates under the `target_crs`. + + Parameters + ---------- + geom : shapely.geometry.GeometryCollection or shapely.geometry.shape + Source geometry. + source_crs : int or str or pyproj.CRS + Projection identifier (proj4) for the source geometry, e.g. '+proj=longlat +datum=WGS84 +no_defs'. + target_crs : int or str or pyproj.CRS + Projection identifier (proj4) for the target geometry. + always_xy : bool + + Returns + ------- + shapely.geometry.GeometryCollection + Reprojected geometry. + """ + try: + from functools import partial + + from pyproj import Transformer # noqa + + if isinstance(source_crs, int or str): + source = CRS.from_user_input(source_crs) + else: + source = source_crs + + if isinstance(target_crs, int or str): + target = CRS.from_user_input(target_crs) + else: + target = target_crs + + transform_func = Transformer.from_crs(source, target, always_xy=always_xy) + reprojected = transform(transform_func.transform, geom) + + return reprojected + except Exception as err: + msg = f"{err}: Failed to reproject geometry." + LOGGER.error(msg) + raise Exception(msg) + + +def geojson_object_transform( + collection: geojson.FeatureCollection, + source_crs: Union[int, str, CRS], + target_crs: Union[int, str, CRS], + always_xy: bool = True, +) -> geojson.FeatureCollection: + """ + + Parameters + ---------- + collection : geojson.FeatureCollection + source_crs : int or str or pyproj.CRS + target_crs : int or str or pyproj.CRS + always_xy : bool + + Returns + ------- + geojson.FeatureCollection + """ + output = list() + for feature in collection.features: + try: + geom = shape(feature.geometry) + transformed = geom_transform( + geom, source_crs, target_crs, always_xy=always_xy + ) + feature.geometry = mapping(transformed) + if hasattr(feature, "bbox"): + bbox = box(*feature.bbox) + transformed = geom_transform( + bbox, source_crs, target_crs, always_xy=always_xy + ) + feature.bbox = mapping(transformed) + output.append(feature) + + except Exception as err: + LOGGER.exception( + "{}: Unable to reproject feature {}".format(err, collection) + ) + raise + + return geojson.FeatureCollection(output) + + +def generic_vector_file_transform( + vector: Union[str, PathLike], + projected: Union[str, PathLike], + source_crs: Union[str, CRS] = WGS84, + target_crs: Union[str, CRS] = None, +) -> Union[str, PathLike]: + """Reproject all features and layers within a vector file and return a GeoJSON + + Parameters + ---------- + vector : str or PathLike + Path to a file containing a valid vector layer (GeoJSON, Shapefile, or vector GML). + projected: str or PathLike + Path to a file to be written. + source_crs : Union[str, dict, CRS] + Projection identifier (proj4) for the source geometry, Default: '+proj=longlat +datum=WGS84 +no_defs'. + target_crs : Union[str, dict, CRS] + Projection identifier (proj4) for the target geometry. + + Returns + ------- + str or PathLike + """ + + if target_crs is None: + raise ValueError("No target CRS is defined.") + + if Path(vector).suffix in [".tar", ".zip", ".7z"]: + warnings.warn( + f"File {Path(vector).name} should be extracted from archive before reprojection." + ) + vector = next(iter(archive_sniffer(_generic_extract_archive(vector)))) + + if isinstance(vector, Path): + vector = vector.as_posix() + + if vector.lower().endswith(".shp"): + src = geojson.loads(json.dumps(shapefile.Reader(vector).__geo_interface__)) + elif vector.lower().endswith("json"): + src = geojson.load(open(vector)) + elif vector.lower().endswith("gml"): + raise NotImplementedError() + src = geojson.load( + json.dumps(pygml.parse(open(vector, "rb").read()).__geo_interface__) + ) + else: + raise FileNotFoundError(f"{vector} is not a valid GeoJSON or Shapefile.") + + with open(projected, "w") as sink: + output = geojson_object_transform(src, source_crs, target_crs) + sink.write(f"{json.dumps(output)}") + + return projected + + +def _generic_extract_archive( + resources: Union[str, PathLike, List[Union[bytes, str, PathLike]]], + output_dir: Optional[Union[str, PathLike]] = None, +) -> List[str]: + """Extract archives (tar/zip) to a working directory. + + Parameters + ---------- + resources: Union[str, Path, List[Union[bytes, str, Path]]] + list of archive files (if netCDF files are in list, they are passed and returned as well in the return). + output_dir: Optional[Union[str, Path]] + string or Path to a working location (default: temporary folder). + + Returns + ------- + list + List of original or of extracted files + """ + + archive_types = [".tar", ".zip", ".7z"] + output_dir = output_dir or tempfile.gettempdir() + + if not isinstance(resources, list): + resources = [resources] + + files = list() + + for arch in resources: + if any(ext in str(arch).lower() for ext in archive_types): + try: + LOGGER.debug("archive=%s", arch) + file = Path(arch).name + + if file.endswith(".nc"): + files.append(Path(output_dir.join(arch))) + elif file.endswith(".tar"): + with tarfile.open(arch, mode="r") as tar: + tar.extractall(path=output_dir) + files.extend( + [str(Path(output_dir).joinpath(f)) for f in tar.getnames()] + ) + elif file.endswith(".zip"): + with zipfile.ZipFile(arch, mode="r") as zf: + zf.extractall(path=output_dir) + files.extend( + [str(Path(output_dir).joinpath(f)) for f in zf.namelist()] + ) + elif file.endswith(".7z"): + msg = "7z file extraction is not supported at this time." + LOGGER.warning(msg) + warnings.warn(msg, UserWarning) + else: + LOGGER.debug('File extension "%s" unknown' % file) + except Exception as e: + LOGGER.error( + "Failed to extract sub archive {{{}}}: {{{}}}".format(arch, e) + ) + else: + LOGGER.warning("No archives found. Continuing...") + return resources + + return files + + +def archive_sniffer( + archives: Union[str, PathLike, Iterable[Union[str, PathLike]]], + working_dir: Optional[Union[str, PathLike]] = None, + extensions: Optional[Iterable[str]] = None, +) -> List[Union[str, PathLike]]: + """Return a list of locally unarchived files that match the desired extensions. + + Parameters + ---------- + archives : Union[str, Path, List[Union[str, Path]]] + archive location or list of archive locations + working_dir : Union[str, path] + string or Path to a working location + extensions : List[str] + list of accepted extensions + + Returns + ------- + List[Union[str, Path]] + List of files with matching accepted extensions + """ + potential_files = list() + + if not extensions: + extensions = [".gml", ".shp", ".geojson", ".gpkg", ".json"] + + decompressed_files = _generic_extract_archive(archives, output_dir=working_dir) + for file in decompressed_files: + if any(ext in Path(file).suffix for ext in extensions): + potential_files.append(file) + return potential_files + + +def vector_to_dataframe( + shp_path: Union[str, PathLike, shapefile.Shape, geojson.feature.FeatureCollection] +) -> pd.DataFrame: + """ + Read a shapefile into a Pandas dataframe with a 'coords' column holding + the geometry information. This uses the pyshp package. + + Parameters + ---------- + shp_path : str or os.PathLike or shapefile.Shape or geojson.feature.FeatureCollection + + Notes + ----- + Example adapted from https://gist.github.com/aerispaha/f098916ac041c286ae92d037ba5c37ba + """ + if isinstance(shp_path, (str, PathLike)): + shp_path = Path(shp_path) + + try: + import geopandas as gpd + + df = gpd.read_file(shp_path) + + except ModuleNotFoundError: + try: + if isinstance(shp_path, shapefile.Shape): + sf = shp_path + else: + sf = shapefile.Reader(shp_path.as_posix()) + + fields = [x[0] for x in sf.fields][1:] + records = sf.records() + geoms = [s for s in sf.shapes()] + + # write into a dataframe + df = pd.DataFrame(columns=fields, data=records) + df = df.assign(geometry=geoms) + df.geometry = df.geometry.apply(shape) + + except shapefile.ShapefileException: + try: + if shp_path.suffix.lower() in {"json", "geojson"}: + shp_path = geojson.load(open(shp_path)) + + if isinstance(shp_path, geojson.feature.FeatureCollection): + df = pd.json_normalize(shp_path.features) + df.geometry = df.df.geometry.apply(shape) + else: + raise FileNotFoundError() + + except (FileNotFoundError, UnicodeDecodeError) as e: + raise ValueError( + "Unable to find vector in {}".format(str(shp_path)) + ) from e + + return df diff --git a/requirements_gis.txt b/requirements_gis.txt deleted file mode 100644 index f8a05ab0..00000000 --- a/requirements_gis.txt +++ /dev/null @@ -1,9 +0,0 @@ -affine -fiona -geopandas>=0.9.0 -lxml -owslib -pyproj>=3.0.0 -rasterio -rioxarray -shapely diff --git a/setup.py b/setup.py index 79274a6c..50cb8196 100644 --- a/setup.py +++ b/setup.py @@ -28,14 +28,19 @@ "click", "climpred>=2.1", "dask", + "geojson", "haversine", "matplotlib", "netCDF4", "numpy", "pandas", "pydantic", + "pygml", + "pyproj", + "pyshp", "requests", "scipy", + "shapely", "statsmodels", "xarray>=0.18", "cf-xarray", @@ -52,22 +57,9 @@ dependency for dependency in open("requirements_docs.txt").readlines() ] -gis_requirements = [ - dependency for dependency in open("requirements_gis.txt").readlines() +dev_requirements = [ + dependency for dependency in open("requirements_dev.txt").readlines() ] -# Special GDAL handling -try: - gdal_version = subprocess.run( - ["gdal-config", "--version"], capture_output=True - ).stdout.decode("utf-8") - gis_requirements.append(f"gdal=={gdal_version}") -except subprocess.CalledProcessError: - pass - -dev_requirements = gis_requirements.copy() -dev_requirements.extend( - [dependency for dependency in open("requirements_dev.txt").readlines()] -) # Idea taken from: https://stackoverflow.com/a/25176606/787842 @@ -206,7 +198,7 @@ def run(self): setup( author="David Huard", author_email="huard.david@ouranos.ca", - python_requires=">=3.6", + python_requires=">=3.7", classifiers=[ "Development Status :: 2 - Pre-Alpha", "Intended Audience :: Developers", @@ -242,7 +234,6 @@ def run(self): extras_require=dict( dev=dev_requirements, docs=docs_requirements, - gis=gis_requirements, ), url="https://github.com/CSHS-CWRA/ravenpy", version="0.7.2", diff --git a/tests/conftest.py b/tests/conftest.py index ec24ef0a..379d03bf 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,6 +1,6 @@ import pytest import xarray as xr -from xclim.indicators.land._streamflow import fit, stats +from xclim.indicators.land import fit, stats from ravenpy.utilities.testdata import get_local_testdata diff --git a/tests/test_base.py b/tests/test_base.py index fbf8a8f9..7a08eb6b 100644 --- a/tests/test_base.py +++ b/tests/test_base.py @@ -4,7 +4,6 @@ import numpy as np import pytest -import ravenpy from ravenpy.models import Ostrich, Raven, RavenError from ravenpy.models.base import get_diff_level from ravenpy.utilities.testdata import get_local_testdata diff --git a/tests/test_blended.py b/tests/test_blended.py index f836c4c1..3409b0b5 100644 --- a/tests/test_blended.py +++ b/tests/test_blended.py @@ -1,5 +1,4 @@ import datetime as dt -import tempfile import numpy as np @@ -7,8 +6,6 @@ from ravenpy.models import BLENDED, BLENDED_OST from ravenpy.utilities.testdata import get_local_testdata -from .common import _convert_2d - TS = get_local_testdata( "raven-gr4j-cemaneige/Salmon-River-Near-Prince-George_meteo_daily.nc" ) diff --git a/tests/test_canadianshield.py b/tests/test_canadianshield.py index ad9dca8b..ecaf686a 100644 --- a/tests/test_canadianshield.py +++ b/tests/test_canadianshield.py @@ -1,15 +1,12 @@ import datetime as dt -import tempfile import numpy as np import pytest from ravenpy.config import ConfigError -from ravenpy.models import CANADIANSHIELD, CANADIANSHIELD_OST, HRU, LU +from ravenpy.models import CANADIANSHIELD, CANADIANSHIELD_OST, LU from ravenpy.utilities.testdata import get_local_testdata -from .common import _convert_2d - TS = get_local_testdata( "raven-gr4j-cemaneige/Salmon-River-Near-Prince-George_meteo_daily.nc" ) diff --git a/tests/test_cli.py b/tests/test_cli.py index ea6037ef..ac1c6bd2 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -1,6 +1,7 @@ import re import netCDF4 as nc4 +import pytest from click.testing import CliRunner from ravenpy.cli import aggregate_forcings_to_hrus, generate_grid_weights @@ -67,6 +68,7 @@ def test_generate_grid_weights_with_multiple_subids(self, tmp_path): weight = float(re.search("1 52 (.+)", output).group(1)) assert abs(weight - 0.2610203097218425) < 1e-04 + @pytest.mark.slow def test_generate_grid_weights_with_nc_input_and_1d_coords(self, tmp_path): runner = CliRunner() output_path = tmp_path / "bla.rvt" @@ -96,6 +98,7 @@ def test_generate_grid_weights_with_nc_input_and_1d_coords(self, tmp_path): weight = float(re.search("4 3731 (.+)", output).group(1)) assert abs(weight - 0.0034512752779023515) < 1e-04 + @pytest.mark.slow def test_generate_grid_weights_with_shp_input(self, tmp_path): runner = CliRunner() output_path = tmp_path / "bla.rvt" @@ -104,6 +107,8 @@ def test_generate_grid_weights_with_shp_input(self, tmp_path): get_local_testdata("raven-routing-sample/finalcat_hru_info.zip"), "-o", output_path, + "-i", + 3162, ] params = map(str, params) @@ -122,6 +127,7 @@ def test_generate_grid_weights_with_shp_input(self, tmp_path): weight = float(re.search("13 238 (.+)", output).group(1)) assert abs(weight - 0.5761414847779369) < 1e-04 + @pytest.mark.slow def test_generate_grid_weights_with_weight_rescaling(self, tmp_path): runner = CliRunner() output_path = tmp_path / "bla.rvt" @@ -132,6 +138,8 @@ def test_generate_grid_weights_with_weight_rescaling(self, tmp_path): "0.42", "-o", output_path, + "--input-shape-crs", + 3162, ] params = map(str, params) diff --git a/tests/test_data_assimilation.py b/tests/test_data_assimilation.py index 33a834b9..e9dd01d0 100644 --- a/tests/test_data_assimilation.py +++ b/tests/test_data_assimilation.py @@ -3,11 +3,8 @@ import matplotlib.pyplot as plt import numpy as np -import pytest import xarray as xr -from ravenpy.config.commands import BasinIndexCommand, HRUState -from ravenpy.config.rvs import RVC from ravenpy.models import GR4JCN from ravenpy.utilities.data_assimilation import assimilate, perturbation from ravenpy.utilities.testdata import get_local_testdata diff --git a/tests/test_emulators.py b/tests/test_emulators.py index 3de47c88..5186d42c 100644 --- a/tests/test_emulators.py +++ b/tests/test_emulators.py @@ -1,27 +1,19 @@ import datetime as dt import os -import tempfile import zipfile from dataclasses import astuple, replace from pathlib import Path import numpy as np import pytest -import xarray as xr -from ravenpy.config.commands import ( +from ravenpy.config.commands import ( # GriddedForcingCommand,; LandUseClassesCommand,; ObservationDataCommand,; SoilClassesCommand,; SoilProfilesCommand,; VegetationClassesCommand, ChannelProfileCommand, EvaluationPeriod, - GriddedForcingCommand, GridWeightsCommand, HRUStateVariableTableCommand, - LandUseClassesCommand, - ObservationDataCommand, SBGroupPropertyMultiplierCommand, - SoilClassesCommand, - SoilProfilesCommand, Sub, - VegetationClassesCommand, ) from ravenpy.config.rvs import RVI from ravenpy.models import ( diff --git a/tests/test_external_dataset_access.py b/tests/test_external_dataset_access.py deleted file mode 100644 index 2be5f7aa..00000000 --- a/tests/test_external_dataset_access.py +++ /dev/null @@ -1,22 +0,0 @@ -import datetime as dt - -import numpy as np -import pytest - -from ravenpy.utilities.forecasting import get_CASPAR_dataset, get_ECCC_dataset - - -@pytest.mark.online -def test_get_CASPAR_dataset(): - ds, _ = get_CASPAR_dataset("GEPS", dt.datetime(2018, 8, 31)) - - -@pytest.mark.online -@pytest.mark.xfail(error=OSError, reason="Network may be unreliable") -def test_get_ECCC_dataset(): - ds, _ = get_ECCC_dataset("GEPS") - - ns = np.datetime64("now") - ds.time.isel(time=0).values - n_hours = ns / np.timedelta64(1, "h") - - assert n_hours <= 36 diff --git a/tests/test_geo_utilities.py b/tests/test_geo_utilities.py deleted file mode 100644 index 58292d20..00000000 --- a/tests/test_geo_utilities.py +++ /dev/null @@ -1,344 +0,0 @@ -import tempfile -from pathlib import Path - -import numpy as np -import pytest - -from ravenpy.utilities.testdata import get_local_testdata - - -class TestOperations: - analysis = pytest.importorskip("ravenpy.utilities.analysis") - io = pytest.importorskip("ravenpy.utilities.io") - - zipped_file = get_local_testdata("polygons/mars.zip") - non_zipped_file = get_local_testdata("polygons/mars.geojson") - - def test_circular_mean_aspect(self): - northern_angles = np.array([330, 30, 15, 345]) - slight_northeast_angles = np.append(northern_angles, [0.000001]) - eastern_angles = np.arange(45, 125, 1.25) - southwest_angles = np.array([181, 182.25, 183.5, 222]) - - assert self.analysis.circular_mean_aspect(northern_angles) == 360 - np.testing.assert_almost_equal( - self.analysis.circular_mean_aspect(slight_northeast_angles), 0, decimal=3 - ) - assert self.analysis.circular_mean_aspect(eastern_angles) == 84.375 - np.testing.assert_almost_equal( - self.analysis.circular_mean_aspect(southwest_angles), 191.88055987 - ) - - def test_address_append(self): - non_existing_tarred_file = "polygons.tar" - - assert "zip://" in self.io.address_append(self.zipped_file) - assert "tar://" in self.io.address_append(non_existing_tarred_file) - assert not self.io.address_append(self.non_zipped_file).startswith( - ("zip://", "tar://") - ) - - def test_archive_sniffer(self, tmp_path): - probable_shp = self.io.archive_sniffer(self.zipped_file) - assert Path(probable_shp[0]).name == "mars.shp" - - probable_shp = self.io.archive_sniffer(self.zipped_file, working_dir=tmp_path) - assert Path(probable_shp[0]).name == "mars.shp" - - def test_archive_extract(self, tmp_path): - assert self.zipped_file.exists() - - files = list() - with tempfile.TemporaryDirectory(dir=tmp_path) as tdir: - files.extend( - self.io.generic_extract_archive(self.zipped_file, output_dir=tdir) - ) - assert len(files) == 5 - for f in files: - assert Path(f).exists() - assert not np.any([Path(f).exists() for f in files]) - - files = self.io.generic_extract_archive(self.zipped_file) - assert np.all([Path(f).exists() for f in files]) - - -class TestFileInfoFuncs: - checks = pytest.importorskip("ravenpy.utilities.checks") - io = pytest.importorskip("ravenpy.utilities.io") - - zipped_file = get_local_testdata("polygons/mars.zip") - geojson_file = get_local_testdata("polygons/mars.geojson") - raster_file = get_local_testdata( - "nasa/Mars_MGS_MOLA_DEM_georeferenced_region_compressed.tiff" - ) - - non_existing_file = "unreal.zip" - - def test_raster_datatype_sniffer(self): - datatype = self.io.raster_datatype_sniffer(self.raster_file) - assert datatype.lower() == "uint8" - - def test_crs_sniffer(self): - assert self.io.crs_sniffer(self.zipped_file) == 4326 - assert set(self.io.crs_sniffer(self.geojson_file, self.raster_file)) == {4326} - - def test_single_file_check(self): - one = [Path(__file__).parent / "__init__.py"] - zero = list() - three = [1, Path().root, 2.333] - - assert self.checks.single_file_check(one) == one[0] - - with pytest.raises(FileNotFoundError): - self.checks.single_file_check(zero) - - with pytest.raises(NotImplementedError): - self.checks.single_file_check(three) - - def test_boundary_check(self): - # NOTE: does not presently accept zipped files. - - with pytest.warns(None): - self.checks.boundary_check([self.geojson_file, self.raster_file], max_y=80) - - with pytest.warns(UserWarning): - self.checks.boundary_check([self.geojson_file, self.raster_file], max_y=15) - - with pytest.raises(FileNotFoundError): - self.checks.boundary_check([self.non_existing_file]) - - @pytest.mark.skip(reason="Not presently testable") - def test_multipolygon_check(self): - pass - - -class TestGdalOgrFunctions: - analysis = pytest.importorskip("ravenpy.utilities.analysis") - fiona = pytest.importorskip("fiona") - sgeo = pytest.importorskip("shapely.geometry") - - geojson_file = get_local_testdata("polygons/mars.geojson") - raster_file = get_local_testdata( - "nasa/Mars_MGS_MOLA_DEM_georeferenced_region_compressed.tiff" - ) - - def test_gdal_aspect_not_projected(self, tmp_path): - aspect_grid = self.analysis.gdal_aspect_analysis(self.raster_file) - np.testing.assert_almost_equal( - self.analysis.circular_mean_aspect(aspect_grid), 10.9119033 - ) - - # test with creation of a temporary file - aspect_tempfile = tempfile.NamedTemporaryFile( - prefix="aspect_", suffix=".tiff", delete=False, dir=tmp_path - ).name - aspect_grid = self.analysis.gdal_aspect_analysis( - self.raster_file, set_output=aspect_tempfile - ) - np.testing.assert_almost_equal( - self.analysis.circular_mean_aspect(aspect_grid), 10.9119033 - ) - assert Path(aspect_tempfile).stat().st_size > 0 - - # Slope values are high due to data values using Geographic CRS - def test_gdal_slope_not_projected(self, tmp_path): - slope_grid = self.analysis.gdal_slope_analysis(self.raster_file) - np.testing.assert_almost_equal(slope_grid.min(), 0.0) - np.testing.assert_almost_equal(slope_grid.mean(), 64.4365427) - np.testing.assert_almost_equal(slope_grid.max(), 89.71747, 5) - - slope_tempfile = tempfile.NamedTemporaryFile( - prefix="slope_", suffix=".tiff", delete=False, dir=tmp_path - ).name - slope_grid = self.analysis.gdal_slope_analysis( - self.raster_file, set_output=slope_tempfile - ) - np.testing.assert_almost_equal(slope_grid.mean(), 64.4365427) - assert Path(slope_tempfile).stat().st_size > 0 - - # Slope values are high due to data values using Geographic CRS - def test_dem_properties(self): - dem_properties = self.analysis.dem_prop(self.raster_file) - np.testing.assert_almost_equal(dem_properties["aspect"], 10.911, 3) - np.testing.assert_almost_equal(dem_properties["elevation"], 79.0341, 4) - np.testing.assert_almost_equal(dem_properties["slope"], 64.43654, 5) - - with self.fiona.open(self.geojson_file) as gj: - feature = next(iter(gj)) - geom = self.sgeo.shape(feature["geometry"]) - - region_dem_properties = self.analysis.dem_prop(self.raster_file, geom=geom) - np.testing.assert_almost_equal(region_dem_properties["aspect"], 280.681, 3) - np.testing.assert_almost_equal(region_dem_properties["elevation"], 145.8899, 4) - np.testing.assert_almost_equal(region_dem_properties["slope"], 61.26508, 5) - - # Slope values are high due to data values using Geographic CRS - def test_geom_properties(self): - with self.fiona.open(self.geojson_file) as gj: - iterable = iter(gj) - feature_1 = next(iterable) - feature_2 = next(iterable) - geom_1 = self.sgeo.shape(feature_1["geometry"]) - geom_2 = self.sgeo.shape(feature_2["geometry"]) - - geom_1_properties = self.analysis.geom_prop(geom_1) - np.testing.assert_almost_equal(geom_1_properties["area"], 357.9811899) - np.testing.assert_almost_equal( - geom_1_properties["centroid"], (-128.3959836, 19.1572278) - ) - np.testing.assert_almost_equal(geom_1_properties["perimeter"], 68.4580077) - np.testing.assert_almost_equal(geom_1_properties["gravelius"], 1.0206790) - - geom_2_properties = self.analysis.geom_prop(geom_2) - np.testing.assert_almost_equal(geom_2_properties["area"], 361.5114221) - np.testing.assert_almost_equal( - geom_2_properties["centroid"], (-70.2394629, 45.7698029) - ) - np.testing.assert_almost_equal(geom_2_properties["perimeter"], 96.1035859) - np.testing.assert_almost_equal(geom_2_properties["gravelius"], 1.4258493) - - -class TestGenericGeoOperations: - analysis = pytest.importorskip("ravenpy.utilities.analysis") - geo = pytest.importorskip("ravenpy.utilities.geo") - - fiona = pytest.importorskip("fiona") - rasterio = pytest.importorskip("rasterio") - sgeo = pytest.importorskip("shapely.geometry") - - geojson_file = get_local_testdata("polygons/mars.geojson") - raster_file = get_local_testdata( - "nasa/Mars_MGS_MOLA_DEM_georeferenced_region_compressed.tiff" - ) - - def test_vector_reprojection(self, tmp_path): - # TODO: It would be awesome if this returned a temporary filepath if no file given. - reproj_file = tempfile.NamedTemporaryFile( - prefix="reproj_", suffix=".geojson", delete=False, dir=tmp_path - ).name - self.geo.generic_vector_reproject( - self.geojson_file, projected=reproj_file, target_crs="EPSG:3348" - ) - - with self.fiona.open(reproj_file) as gj: - iterable = iter(gj) - feature = next(iterable) - geom = self.sgeo.shape(feature["geometry"]) - - geom_properties = self.analysis.geom_prop(geom) - np.testing.assert_almost_equal(geom_properties["area"], 6450001762792, 0) - np.testing.assert_almost_equal( - geom_properties["centroid"], (1645777.7589835, -933242.1203143) - ) - np.testing.assert_almost_equal(geom_properties["perimeter"], 9194343.1759303) - np.testing.assert_almost_equal(geom_properties["gravelius"], 1.0212589) - - def test_raster_warp(self, tmp_path): - # TODO: It would be awesome if this returned a temporary filepath if no file given. - # TODO: either use `output` or `reprojected/warped` for these functions. - reproj_file = tempfile.NamedTemporaryFile( - prefix="reproj_", suffix=".tiff", delete=False, dir=tmp_path - ).name - self.geo.generic_raster_warp( - self.raster_file, output=reproj_file, target_crs="EPSG:3348" - ) - - # EPSG:3348 is a very general transformation; Some tolerance should be allowed. - with self.rasterio.open(reproj_file) as gt: - assert gt.crs.to_epsg() == 3348 - np.testing.assert_allclose(gt.bounds.left, -2077535, atol=3) - np.testing.assert_allclose(gt.bounds.right, 15591620, atol=3) - np.testing.assert_allclose(gt.bounds.bottom, -4167898, atol=3) - np.testing.assert_allclose(gt.bounds.top, 5817014, atol=3) - - data = gt.read(1) # read band 1 (red) - assert data.min() == 0 - assert data.max() == 255 - np.testing.assert_almost_equal(data.mean(), 60.729, 3) - - def test_warped_raster_slope(self, tmp_path): - reproj_file = tempfile.NamedTemporaryFile( - prefix="reproj_", suffix=".tiff", delete=False, dir=tmp_path - ).name - self.geo.generic_raster_warp( - self.raster_file, output=reproj_file, target_crs="EPSG:3348" - ) - slope_grid = self.analysis.gdal_slope_analysis(reproj_file) - - np.testing.assert_almost_equal(slope_grid.min(), 0.0) - np.testing.assert_almost_equal(slope_grid.mean(), 0.0034991) - np.testing.assert_almost_equal(slope_grid.max(), 0.3523546) - - def test_warped_raster_aspect(self, tmp_path): - reproj_file = tempfile.NamedTemporaryFile( - prefix="reproj_", suffix=".tiff", delete=False, dir=tmp_path - ).name - self.geo.generic_raster_warp( - self.raster_file, output=reproj_file, target_crs="EPSG:3348" - ) - aspect_grid = self.analysis.gdal_aspect_analysis(reproj_file) - - np.testing.assert_almost_equal( - self.analysis.circular_mean_aspect(aspect_grid), 7.780, decimal=3 - ) - - def test_raster_clip(self, tmp_path): - with self.fiona.open(self.geojson_file) as gj: - feature = next(iter(gj)) - geom = self.sgeo.shape(feature["geometry"]) - - clipped_file = tempfile.NamedTemporaryFile( - prefix="reproj_", suffix=".tiff", delete=False, dir=tmp_path - ).name - self.geo.generic_raster_clip(self.raster_file, clipped_file, geometry=geom) - - with self.rasterio.open(clipped_file) as gt: - assert gt.crs.to_epsg() == 4326 - - data = gt.read(1) # read band 1 (red) - assert data.min() == 0 - assert data.max() == 255 - np.testing.assert_almost_equal(data.mean(), 102.8222965) - - def test_shapely_pyproj_transform(self): - with self.fiona.open(self.geojson_file) as gj: - feature = next(iter(gj)) - geom = self.sgeo.shape(feature["geometry"]) - - transformed = self.geo.geom_transform(geom, target_crs="EPSG:3348") - np.testing.assert_almost_equal( - transformed.bounds, - (188140.3820599, -2374936.1363096, 3086554.0207066, 409691.2180337), - ) - np.testing.assert_almost_equal(transformed.centroid.x, 1645777.7589835) - np.testing.assert_almost_equal(transformed.centroid.y, -933242.1203143) - np.testing.assert_almost_equal(transformed.area, 6450001762792, 0) - - -class TestGIS: - checks = pytest.importorskip("ravenpy.utilities.checks") - io = pytest.importorskip("ravenpy.utilities.io") - sgeo = pytest.importorskip("shapely.geometry") - - vector_file = get_local_testdata("polygons/mars.geojson") - - def test_get_bbox_single(self): - w, s, n, e = self.io.get_bbox(self.vector_file, all_features=False) - np.testing.assert_almost_equal(w, -139.8514262) - np.testing.assert_almost_equal(s, 8.3754794) - np.testing.assert_almost_equal(n, -117.4753973) - np.testing.assert_almost_equal(e, 29.6327068) - - def test_get_bbox_all(self): - w, s, n, e = self.io.get_bbox(self.vector_file) - np.testing.assert_almost_equal(w, -139.8514262) - np.testing.assert_almost_equal(s, 8.3754794) - np.testing.assert_almost_equal(n, -38.7397456) - np.testing.assert_almost_equal(e, 64.1757015) - - def test_feature_contains(self): - point = -69.0, 45 - assert isinstance(self.checks.feature_contains(point, self.vector_file), dict) - assert isinstance( - self.checks.feature_contains(self.sgeo.Point(point), self.vector_file), dict - ) diff --git a/tests/test_geoserver.py b/tests/test_geoserver.py deleted file mode 100644 index 9f977a19..00000000 --- a/tests/test_geoserver.py +++ /dev/null @@ -1,213 +0,0 @@ -import tempfile - -import numpy as np -import pytest - -from ravenpy.utilities.testdata import get_local_testdata - -pytestmark = pytest.mark.online - -# FIXME: Remove XFAIL marks once OWSLib > 0.24.1 is released. - - -class TestHydroBASINS: - geoserver = pytest.importorskip("ravenpy.utilities.geoserver") - - fiona = pytest.importorskip("fiona") - gpd = pytest.importorskip("geopandas") - sgeo = pytest.importorskip("shapely.geometry") - - def test_select_hybas_na_domain(self): - bbox = (-68.0, 50.0) * 2 - dom = self.geoserver.select_hybas_domain(bbox) - assert dom == "na" - - def test_select_hybas_ar_domain(self): - bbox = (-114.65, 61.35) * 2 - dom = self.geoserver.select_hybas_domain(bbox) - assert dom == "ar" - - @pytest.mark.xfail(reason="OWSLib>0.24.1 is needed.") - def test_get_hydrobasins_location_wfs(self, tmp_path): - lake_winnipeg = ( - -98.03575958286369, - 52.88238524279493, - ) - resp = self.geoserver.get_hydrobasins_location_wfs( - coordinates=lake_winnipeg, domain="na" - ) - feat = self.gpd.GeoDataFrame.from_features(resp) - geom = self.sgeo.shape(feat["geometry"][0]) - assert geom.bounds == (-99.2731, 50.3603, -96.2578, 53.8705) - np.testing.assert_almost_equal(geom.area, 3.2530867) - - @pytest.mark.xfail(reason="OWSLib>0.24.1 is needed.") - def test_get_hydrobasins_attributes_wfs(self, tmp_path): - rio_grande = (-80.475, 8.4) - resp = self.geoserver.get_hydrobasins_location_wfs( - coordinates=rio_grande * 2, domain="na" - ) - feat = self.gpd.GeoDataFrame.from_features(resp) - main_bas = feat["MAIN_BAS"][0] - - region_url = self.geoserver.filter_hydrobasins_attributes_wfs( - attribute="MAIN_BAS", value=main_bas, domain="na" - ) - gdf = self.gpd.read_file(region_url) - - assert len(gdf) == 18 - assert gdf.crs.to_epsg() == 4326 - assert set(gdf["MAIN_BAS"].values) == {7120000210} - - basin = gdf.dissolve(by="MAIN_BAS") - np.testing.assert_equal( - basin.geometry.bounds.values, - np.array([[-80.8542, 8.2459, -80.1375, 8.7004]]), - ) - - @pytest.mark.xfail(reason="OWSLib>0.24.1 is needed.") - def test_hydrobasins_upstream_aggregate(self, tmp_path): - puerto_cortes = (-83.525, 8.96, -83.520, 8.97) - resp = self.geoserver.get_hydrobasins_location_wfs( - coordinates=puerto_cortes, domain="na" - ) - feat = self.gpd.GeoDataFrame.from_features(resp) - - gdf_upstream = self.geoserver.hydrobasins_upstream(feat.loc[0], domain="na") - assert len(gdf_upstream) == 73 - aggregated = self.geoserver.hydrobasins_aggregate(gdf_upstream) - - assert len(aggregated) == 1 - assert float(aggregated.SUB_AREA.values) == 4977.8 - np.testing.assert_equal( - aggregated.geometry.bounds.values, - np.array([[-83.8167, 8.7625, -82.7125, 9.5875]]), - ) - - -class TestHydroRouting: - geoserver = pytest.importorskip("ravenpy.utilities.geoserver") - - fiona = pytest.importorskip("fiona") - gpd = pytest.importorskip("geopandas") - sgeo = pytest.importorskip("shapely.geometry") - - @pytest.mark.xfail(reason="OWSLib>0.24.1 is needed.") - def test_hydro_routing_locations(self, tmp_path): - lake_winnipeg = ( - -98.03575958286369, - 52.88238524279493, - ) - resp = self.geoserver.get_hydro_routing_location_wfs( - coordinates=lake_winnipeg, lakes="all" - ) - feat = self.gpd.GeoDataFrame.from_features(resp) - geom = feat["geometry"][0] - assert geom.bounds == (-99.3083, 50.1875, -95.9875, 54.0542) - # Note: This value is not in sq. km. - np.testing.assert_almost_equal(geom.area, 4.0978987) - - @pytest.mark.slow - def test_get_hydro_routing_attributes_wfs(self): - region_url = self.geoserver.filter_hydro_routing_attributes_wfs( - attribute="IsLake", value="1.0", lakes="1km", level="07" - ) - gdf = self.gpd.read_file(region_url) - assert len(gdf) == 11415 - - @pytest.mark.slow - @pytest.mark.xfail(reason="OWSLib>0.24.1 is needed.") - def test_hydro_routing_upstream(self, tmp_path): - amadjuak = (-71.225, 65.05, -71.220, 65.10) - resp = self.geoserver.get_hydro_routing_location_wfs( - coordinates=amadjuak, lakes="1km", level=7 - ) - feat = self.gpd.GeoDataFrame.from_features(resp) - subbasin_id = feat["SubId"][0] - - gdf_upstream = self.geoserver.hydro_routing_upstream( - subbasin_id, lakes="1km", level=7 - ) - - assert len(gdf_upstream) == 33 # TODO: Verify this with the model maintainers. - - -class TestWFS: - geoserver = pytest.importorskip("ravenpy.utilities.geoserver") - - fiona = pytest.importorskip("fiona") - gpd = pytest.importorskip("geopandas") - sgeo = pytest.importorskip("shapely.geometry") - - @pytest.mark.xfail(reason="OWSLib>0.24.1 is needed.") - def test_get_location_wfs_point(self, tmp_path): - las_vegas = (-115.136389, 36.175) - usa_admin_bounds = "public:usa_admin_boundaries" - resp = self.geoserver._get_location_wfs(point=las_vegas, layer=usa_admin_bounds) - feat = self.gpd.GeoDataFrame.from_features(resp) - - geom = feat["geometry"][0] - assert geom.bounds == (-120.001, 35.0019, -114.0417, 41.9948) - # Note: This value is not in sq. km. - np.testing.assert_almost_equal(geom.area, 29.9690150) - - def test_get_location_wfs_bbox(self, tmp_path): - new_vegas = (-115.697, 34.742, -114.279, 36.456) - usa_admin_bounds = "public:usa_admin_boundaries" - resp = self.geoserver._get_location_wfs(bbox=new_vegas, layer=usa_admin_bounds) - feat = self.gpd.GeoDataFrame.from_features(resp) - - assert len(feat["geometry"]) == 3 - assert set(feat.STATE_NAME.unique()) == {"Nevada", "California", "Arizona"} - - def test_get_feature_attributes_wfs(self): - state_name = "Nevada" - usa_admin_bounds = "public:usa_admin_boundaries" - - vector_url = self.geoserver._filter_feature_attributes_wfs( - attribute="STATE_NAME", value=state_name, layer=usa_admin_bounds - ) - gdf = self.gpd.read_file(vector_url) - assert len(gdf) == 1 - assert gdf.STATE_NAME.unique() == "Nevada" - - -class TestWCS: - io = pytest.importorskip("ravenpy.utilities.io") - geoserver = pytest.importorskip("ravenpy.utilities.geoserver") - geo = pytest.importorskip("ravenpy.utilities.geo") - rasterio = pytest.importorskip("rasterio") - - vector_file = get_local_testdata("polygons/Saskatoon.geojson") - - def test_get_raster_wcs(self, tmp_path): - # TODO: This CRS needs to be redefined using modern pyproj-compatible strings. - nalcms_crs = "+proj=laea +lat_0=45 +lon_0=-100 +x_0=0 +y_0=0 +datum=WGS84 +units=m +no_defs=True" - - with tempfile.NamedTemporaryFile( - prefix="reprojected_", suffix=".json", dir=tmp_path - ) as projected: - self.geo.generic_vector_reproject( - self.vector_file, projected.name, target_crs=nalcms_crs - ) - bbox = self.io.get_bbox(projected.name) - - raster_url = "public:CEC_NALCMS_LandUse_2010" - raster_bytes = self.geoserver.get_raster_wcs( - bbox, geographic=False, layer=raster_url - ) - - with tempfile.NamedTemporaryFile( - prefix="wcs_", suffix=".tiff", dir=tmp_path - ) as raster_file: - with open(raster_file.name, "wb") as rf: - rf.write(raster_bytes) - rf.close() - with self.rasterio.open(rf.name) as src: - assert src.width == 650 - assert src.height == 745 - np.testing.assert_array_equal( - src.lnglat(), (-106.64193764047552, 52.1564202369763) - ) - data = src.read() - assert np.unique(data).tolist() == [1, 5, 8, 10, 14, 15, 16, 17, 18] diff --git a/tests/test_hypr.py b/tests/test_hypr.py index f43cf09a..e8bc7302 100644 --- a/tests/test_hypr.py +++ b/tests/test_hypr.py @@ -1,13 +1,10 @@ import datetime as dt -import tempfile import numpy as np -from ravenpy.models import HRU, HYPR, HYPR_OST, LU +from ravenpy.models import HYPR, HYPR_OST, LU from ravenpy.utilities.testdata import get_local_testdata -from .common import _convert_2d - TS = get_local_testdata( "raven-gr4j-cemaneige/Salmon-River-Near-Prince-George_meteo_daily.nc" ) diff --git a/tests/test_raven_multi_model.py b/tests/test_raven_multi_model.py index b0bb0230..f4d29e60 100644 --- a/tests/test_raven_multi_model.py +++ b/tests/test_raven_multi_model.py @@ -1,7 +1,7 @@ import datetime as dt import zipfile -from ravenpy.models import GR4JCN, RavenMultiModel +from ravenpy.models import RavenMultiModel from ravenpy.utilities.testdata import get_local_testdata diff --git a/tests/test_ravenio.py b/tests/test_ravenio.py index 9d5e293b..1e89cad3 100644 --- a/tests/test_ravenio.py +++ b/tests/test_ravenio.py @@ -1,63 +1,59 @@ from ravenpy.utilities import ravenio from ravenpy.utilities.testdata import get_local_testdata +# import pytest # from ravenpy.models import raven_templates - -""" -class TestStartDate: - - def test_grj4_cemaneige(self): - start, end = ravenio.start_end_date(TESTDATA['gr4j-cemaneige'].values()) - assert start == dt.datetime(2000, 7, 1) - assert end == dt.datetime(2002, 7, 1) - - def test_raven_gr4j_cemaneige(self): - start, end = ravenio.start_end_date([TESTDATA['raven-gr4j-cemaneige-nc-ts'],]) - assert start == dt.datetime(1954, 1, 1) - assert end == dt.datetime(2010, 12, 31) - -pytest.mark.skip - - -class TestSetupModel: - gr4j = odict( - rvi=dict(run_name='run1', Start_Date='2000-01-01 00:00:00', End_Date=None, Duration=10, TimeStep=1.0, - EvaluationMetrics='NASH_SUTCLIFFE RMSE'), - rvp=odict(GR4J_X1=.7, GR4J_X2=.7, GR4J_X3=19., GR4J_X4=2.09, AvgAnnualSnow=123, AirSnowCoeff=.75), - rvc=odict(SOIL_0=1, SOIL_1=2), - rvh=dict(NAME='Test', AREA=45, ELEVATION=3, LATITUDE=45, LONGITUDE=-154), - rvt=dict(pr='data/data.nc', prsn='data/data.nc', tasmin='data/data.nc', tasmax='data/data.nc', - evspsbl='data/data.nc', water_volume_transport_in_river_channel='data/data.nc', - pr_var='rain', prsn_var='snow', tasmin_var='tmin', tasmax_var='tmax', evspsbl_var='pet', - water_volume_transport_in_river_channel_var='qobs') - ) - - def test_gr4j(self): - name = 'raven-gr4j-cemaneige' - outpath = tempfile.mkdtemp() - ravenio.setup_model(name, outpath, self.gr4j) - - # Make sure there are no {} in the filled templates - for fn in glob.glob(os.path.join(outpath, 'model', '*.rv?')): - with open(fn) as f: - txt = f.read() - assert '{' not in txt - assert '}' not in txt - - -@pytest.mark.skip -class TestReadDiagnostics: - def test_simple(self): - import StringIO - diag = StringIO.StringIO() - diag.write('observed data series,filename,DIAG_NASH_SUTCLIFFE,DIAG_RMSE,\n' - 'HYDROGRAPH,data_obs/Salmon-River-Near-Prince-George_meteo_daily.nc,-0.746096,62.1502,') - diag.seek(0) - - out = ravenio.read_diagnostics(diag) - assert out['DIAG_RMSE'] == 62.1502 - -""" +# +# class TestStartDate: +# +# def test_grj4_cemaneige(self): +# start, end = ravenio.start_end_date(TESTDATA['gr4j-cemaneige'].values()) +# assert start == dt.datetime(2000, 7, 1) +# assert end == dt.datetime(2002, 7, 1) +# +# def test_raven_gr4j_cemaneige(self): +# start, end = ravenio.start_end_date([TESTDATA['raven-gr4j-cemaneige-nc-ts'],]) +# assert start == dt.datetime(1954, 1, 1) +# assert end == dt.datetime(2010, 12, 31) +# +# @pytest.mark.skip +# class TestSetupModel: +# gr4j = odict( +# rvi=dict(run_name='run1', Start_Date='2000-01-01 00:00:00', End_Date=None, Duration=10, TimeStep=1.0, +# EvaluationMetrics='NASH_SUTCLIFFE RMSE'), +# rvp=odict(GR4J_X1=.7, GR4J_X2=.7, GR4J_X3=19., GR4J_X4=2.09, AvgAnnualSnow=123, AirSnowCoeff=.75), +# rvc=odict(SOIL_0=1, SOIL_1=2), +# rvh=dict(NAME='Test', AREA=45, ELEVATION=3, LATITUDE=45, LONGITUDE=-154), +# rvt=dict(pr='data/data.nc', prsn='data/data.nc', tasmin='data/data.nc', tasmax='data/data.nc', +# evspsbl='data/data.nc', water_volume_transport_in_river_channel='data/data.nc', +# pr_var='rain', prsn_var='snow', tasmin_var='tmin', tasmax_var='tmax', evspsbl_var='pet', +# water_volume_transport_in_river_channel_var='qobs') +# ) +# +# def test_gr4j(self): +# name = 'raven-gr4j-cemaneige' +# outpath = tempfile.mkdtemp() +# ravenio.setup_model(name, outpath, self.gr4j) +# +# # Make sure there are no {} in the filled templates +# for fn in glob.glob(os.path.join(outpath, 'model', '*.rv?')): +# with open(fn) as f: +# txt = f.read() +# assert '{' not in txt +# assert '}' not in txt +# +# +# @pytest.mark.skip +# class TestReadDiagnostics: +# def test_simple(self): +# import StringIO +# diag = StringIO.StringIO() +# diag.write('observed data series,filename,DIAG_NASH_SUTCLIFFE,DIAG_RMSE,\n' +# 'HYDROGRAPH,data_obs/Salmon-River-Near-Prince-George_meteo_daily.nc,-0.746096,62.1502,') +# diag.seek(0) +# +# out = ravenio.read_diagnostics(diag) +# assert out['DIAG_RMSE'] == 62.1502 class TestParseConfiguration: diff --git a/tests/test_rv.py b/tests/test_rv.py index 60bdefbe..2cf802e9 100644 --- a/tests/test_rv.py +++ b/tests/test_rv.py @@ -1,18 +1,13 @@ import datetime as dt import re -from collections import namedtuple -from pathlib import Path import pytest -import ravenpy -from ravenpy.config.commands import ( - BasinStateVariablesCommand, +from ravenpy.config.commands import ( # BasinStateVariablesCommand,; HRUStateVariableTableCommand, EvaluationPeriod, GriddedForcingCommand, - HRUStateVariableTableCommand, ) -from ravenpy.config.rvs import OST, RVC, RVH, RVI, RVP, RVT, Config +from ravenpy.config.rvs import OST, RVC, RVH, RVI, RVP, Config from ravenpy.extractors import ( RoutingProductGridWeightExtractor, RoutingProductShapefileExtractor, @@ -71,7 +66,7 @@ def test_evaluation_metric_multiplier(self): with pytest.raises(ValueError): config.rvi.evaluation_metrics = ["PCT_BIAS"] - config.ost.evaluation_metric_multiplier + _ = config.ost.evaluation_metric_multiplier class TestRVI: diff --git a/tests/test_vector_utils.py b/tests/test_vector_utils.py new file mode 100644 index 00000000..ae36e7a4 --- /dev/null +++ b/tests/test_vector_utils.py @@ -0,0 +1,92 @@ +import tempfile + +import geojson +import numpy as np +import pytest +import shapely.geometry as sgeo + +from ravenpy.utilities import vector +from ravenpy.utilities.testdata import get_local_testdata + + +class TestVectorUtils: + geojson_file = get_local_testdata("polygons/mars.geojson") + routing_product_shapefile = get_local_testdata( + "raven-routing-sample/finalcat_hru_info.zip" + ) + + def test_shapely_pyproj_geojson_transform(self): + feat = geojson.load(open(self.geojson_file)) + geom = sgeo.shape(feat[0].geometry) + + transformed = vector.geom_transform(geom, target_crs="EPSG:3348") + np.testing.assert_almost_equal( + transformed.bounds, (188140, -2374936, 3086554, 409691), 0 + ) + np.testing.assert_almost_equal(transformed.centroid.x, 1645777, 0) + np.testing.assert_almost_equal(transformed.centroid.y, -933242, 0) + np.testing.assert_almost_equal(transformed.area, 6450001868342, 0) + + @pytest.mark.slow + def test_shapely_pyproj_shapefile_transform_properties(self, tmp_path): + reproj_file = tempfile.NamedTemporaryFile( + prefix="reproj_", suffix=".geojson", delete=False, dir=tmp_path + ).name + vector.generic_vector_file_transform( + self.routing_product_shapefile, + projected=reproj_file, + target_crs="EPSG:3348", + ) + + feat = geojson.load(open(reproj_file)) + geom = sgeo.shape(feat[0].geometry) + + geom_properties = vector.geom_properties(geom) + np.testing.assert_almost_equal(geom_properties["area"], 310646674, 0) + np.testing.assert_almost_equal( + geom_properties["centroid"], (7460914.0, 1413210.6), 1 + ) + np.testing.assert_almost_equal(geom_properties["perimeter"], 371435.385, 3) + np.testing.assert_almost_equal(geom_properties["gravelius"], 5.9449059) + + def test_shapely_pyproj_geojson_transform_properties(self, tmp_path): + # TODO: It would be awesome if this returned a temporary filepath if no file given. + reproj_file = tempfile.NamedTemporaryFile( + prefix="reproj_", suffix=".geojson", delete=False, dir=tmp_path + ).name + vector.generic_vector_file_transform( + self.geojson_file, projected=reproj_file, target_crs="EPSG:3348" + ) + + feat = geojson.load(open(reproj_file)) + geom = sgeo.shape(feat[0].geometry) + + geom_properties = vector.geom_properties(geom) + np.testing.assert_almost_equal(geom_properties["area"], 6450001868342, 0) + np.testing.assert_almost_equal( + geom_properties["centroid"], (1645777.7, -933242.1), 1 + ) + np.testing.assert_almost_equal(geom_properties["perimeter"], 9194343, 0) + np.testing.assert_almost_equal(geom_properties["gravelius"], 1.0212589) + + # Slope values are high due to data values using Geographic CRS + def test_geom_properties_multi_feature(self): + feat = geojson.load(open(self.geojson_file)) + geom_0 = sgeo.shape(feat[0].geometry) + geom_1 = sgeo.shape(feat[1].geometry) + + geom_1_properties = vector.geom_properties(geom_0) + np.testing.assert_almost_equal(geom_1_properties["area"], 357.981, 3) + np.testing.assert_almost_equal( + geom_1_properties["centroid"], (-128.396, 19.157), 3 + ) + np.testing.assert_almost_equal(geom_1_properties["perimeter"], 68.458, 3) + np.testing.assert_almost_equal(geom_1_properties["gravelius"], 1.021, 3) + + geom_2_properties = vector.geom_properties(geom_1) + np.testing.assert_almost_equal(geom_2_properties["area"], 361.511, 3) + np.testing.assert_almost_equal( + geom_2_properties["centroid"], (-70.239, 45.770), 3 + ) + np.testing.assert_almost_equal(geom_2_properties["perimeter"], 96.104, 3) + np.testing.assert_almost_equal(geom_2_properties["gravelius"], 1.426, 3) diff --git a/tox.ini b/tox.ini index 2088a834..e9cb33fa 100644 --- a/tox.ini +++ b/tox.ini @@ -36,13 +36,15 @@ deps = ; requirements.txt with the pinned versions and uncomment the following line: ; -r{toxinidir}/requirements.txt commands = - # Deal with some GDAL silliness - python -m pip install --upgrade --force-reinstall --no-deps --no-cache-dir GDAL=={env:GDAL_VERSION} --global-option=build_ext --global-option="-I/usr/include/gdal" + # Deal with some GDAL silliness and install geopandas if needed + gdal: python -m pip install --upgrade --force-reinstall --no-deps --no-cache-dir GDAL=={env:GDAL_VERSION} --global-option=build_ext --global-option="-I/usr/include/gdal" + gdal: python -m pip install geopandas # Install the Raven and Ostrich binaries python -m pip install --no-user --verbose . --install-option="--with-binaries" # Clone the testing support data git clone https://github.com/Ouranosinc/raven-testdata {envtmpdir}/raven-testdata - env RAVENPY_TESTDATA_PATH={envtmpdir}/raven-testdata pytest --cov tests + !slow: env RAVENPY_TESTDATA_PATH={envtmpdir}/raven-testdata pytest --cov tests -m "not slow" + slow: env RAVENPY_TESTDATA_PATH={envtmpdir}/raven-testdata pytest --cov tests - coveralls allowlist_externals = make