Skip to content

Commit c1b8676

Browse files
use default multiprocessing in gdal.py and remove all logging from module
1 parent 5739cb6 commit c1b8676

File tree

1 file changed

+7
-50
lines changed
  • openeogeotrellis/integrations

1 file changed

+7
-50
lines changed

openeogeotrellis/integrations/gdal.py

+7-50
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
import logging
21
import multiprocessing
32
from dataclasses import dataclass
43
from pathlib import Path
@@ -11,8 +10,6 @@
1110

1211
from openeogeotrellis.utils import get_s3_binary_file_contents, _make_set_for_key
1312

14-
logger = logging.getLogger(__name__)
15-
1613

1714
"""Output from GDAL.Info.
1815
@@ -111,9 +108,10 @@ def _extract_gdal_asset_raster_metadata(
111108

112109

113110
def error_handler(e):
114-
logger.warning(f"Error while looking up result metadata, may be incomplete. {str(e)}")
111+
#logger.warning(f"Error while looking up result metadata, may be incomplete. {str(e)}")
112+
pass
115113

116-
pool = multiprocessing.get_context("spawn").Pool(10)
114+
pool = multiprocessing.Pool(10)
117115
job = [pool.apply_async(_get_metadata_callback, (asset_path, asset_md,job_dir,), error_callback=error_handler) for asset_path, asset_md in asset_metadata.items()]
118116
pool.close()
119117
pool.join()
@@ -139,7 +137,6 @@ def error_handler(e):
139137
is_some_raster_md_missing = True
140138

141139
except Exception as e:
142-
logger.warning("Could not retrieve raster metadata: " + str(e))
143140
is_some_raster_md_missing = True
144141

145142
#
@@ -149,39 +146,25 @@ def error_handler(e):
149146
def _get_metadata_callback(asset_path: str, asset_md: Dict[str, str], job_dir: Path):
150147

151148
mime_type: str = asset_md.get("type", "")
152-
logger.debug(
153-
f"_export_result_metadata: {asset_path=}, "
154-
+ f"file's MIME type: {mime_type}, "
155-
+ f"job dir (based on output file): {job_dir=}"
156-
)
157149

158150
# Skip assets that are clearly not images.
159151
if asset_path.endswith(".json"):
160-
logger.info(f"_export_result_metadata: Asset file is not an image but JSON, {asset_path=}")
161152
return None
162153

163154
# The asset path should be relative to the job directory.
164155
abs_asset_path: Path = get_abs_path_of_asset(asset_path, job_dir)
165-
logger.debug(f"{asset_path=} maps to absolute path: {abs_asset_path=} , " + f"{abs_asset_path.exists()=}")
166156

167157
asset_href: str = asset_md.get("href", "")
168158
if not abs_asset_path.exists() and asset_href.startswith("s3://"):
169159
try:
170160
abs_asset_path.write_bytes(get_s3_binary_file_contents(asset_href))
171161
except Exception as exc:
172-
logger.error(
173-
"Could not download asset from object storage: "
174-
+ f"asset={asset_path}, href={asset_href!r}, exception: {exc!r}"
175-
)
162+
pass
176163

177164
asset_gdal_metadata: AssetRasterMetadata = read_gdal_raster_metadata(abs_asset_path)
178165
# If gdal could not extract the projection metadata from the file
179166
# (The file is corrupt perhaps?).
180167
if asset_gdal_metadata.could_not_read_file:
181-
logger.warning(
182-
"Could not get projection extension metadata for following asset:"
183-
+ f" '{asset_path}', {abs_asset_path=}"
184-
)
185168
return None
186169
else:
187170
return (asset_path, asset_gdal_metadata.to_dict())
@@ -219,7 +202,6 @@ def read_gdal_raster_metadata(asset_path: Union[str, Path]) -> AssetRasterMetada
219202
and in that version the gdal.Info function include these properties directly
220203
in the key "stac" of the dictionary it returns.
221204
"""
222-
logger.debug(f"{__name__}.read_projection_extension_metadata: {asset_path=}")
223205
return parse_gdal_raster_metadata(read_gdal_info(str(asset_path)))
224206

225207

@@ -301,7 +283,6 @@ def _process_gdalinfo_for_netcdf_subdatasets(
301283
sub_datasets_proj[sub_ds_uri] = sub_ds_md
302284

303285
stats_info = _get_raster_statistics(sub_ds_gdal_info, band_name)
304-
logger.info(f"_process_gdalinfo_for_netcdf_subdatasets:: {stats_info=}")
305286
sub_datasets_stats[sub_ds_uri] = stats_info
306287

307288
proj_info = {}
@@ -318,27 +299,18 @@ def _process_gdalinfo_for_netcdf_subdatasets(
318299
proj_info["proj:epsg"] = epsg_codes.pop()
319300

320301
ds_band_names = [band for bands in sub_datasets_stats.values() for band in bands.keys()]
321-
logger.debug(f"{ds_band_names=}")
322302

323303
all_raster_stats = {}
324304

325305
# We can only copy each band's stats if there are no duplicate bands across
326306
# the subdatasets. If we find duplicate bands there is likely a bug.
327307
# Besides it is not obvious how we would need to merge statistics across
328308
# subdatasets, if the bands occur multiple times.
329-
if sorted(set(ds_band_names)) != sorted(ds_band_names):
330-
logger.warning(f"There are duplicate bands in {ds_band_names=}, Can not merge the bands' statistics.")
331-
else:
332-
logger.info(f"There are no duplicate bands in {ds_band_names=}, Will use all bands' statistics in result.")
333-
for bands in sub_datasets_stats.values():
334-
for band_name, stats in bands.items():
335-
all_raster_stats[band_name] = stats
336-
337-
logger.debug(f"{all_raster_stats=}")
309+
for bands in sub_datasets_stats.values():
310+
for band_name, stats in bands.items():
311+
all_raster_stats[band_name] = stats
338312

339313
result = AssetRasterMetadata(gdal_info=gdal_info, projection=proj_info, statistics=all_raster_stats)
340-
logger.debug(f"_process_gdalinfo_for_netcdf_subdatasets:: returning {result=}")
341-
342314
return AssetRasterMetadata(gdal_info=gdal_info, projection=proj_info, statistics=all_raster_stats)
343315

344316

@@ -417,12 +389,6 @@ def get_abs_path_of_asset(asset_filename: str, job_dir: Union[str, Path]) -> Pat
417389
418390
:return: the absolute path to the asset file, inside job_dir.
419391
"""
420-
logger.debug(
421-
f"{__name__}.get_abs_path_of_asset: {asset_filename=}, {job_dir=}, {Path.cwd()=}, "
422-
+ f"{Path(job_dir).is_absolute()=}, {Path(job_dir).exists()=}, "
423-
+ f"{Path(asset_filename).exists()=}"
424-
)
425-
426392
abs_asset_path = Path(asset_filename)
427393
if not abs_asset_path.is_absolute():
428394
abs_asset_path = Path(job_dir).resolve() / asset_filename
@@ -449,8 +415,6 @@ def read_gdal_info(asset_uri: str) -> GDALInfo:
449415
:return:
450416
GDALInfo: which is a dictionary that contains the output from `gdal.Info()`.
451417
"""
452-
logger.debug(f"{__name__}.read_gdal_info: {asset_uri=}")
453-
454418
# By default, gdal does not raise exceptions but returns error codes and prints
455419
# error info on stdout. We don't want that. At the least it should go to the logs.
456420
# See https://gdal.org/api/python_gotchas.html
@@ -464,15 +428,8 @@ def read_gdal_info(asset_uri: str) -> GDALInfo:
464428
except Exception as exc:
465429
# TODO: Specific exception type(s) would be better but Wasn't able to find what
466430
# specific exceptions gdal.Info might raise.
467-
logger.warning(
468-
"Could not get projection extension metadata, "
469-
+ f"gdal.Info failed for following asset: '{asset_uri}' . "
470-
+ "Either file does not exist or else it is probably not a raster. "
471-
+ f"Exception from GDAL: {exc}"
472-
)
473431
return {}
474432
else:
475-
logger.debug(f"{asset_uri=}, {data_gdalinfo=}")
476433
return data_gdalinfo
477434

478435

0 commit comments

Comments
 (0)