1
- import logging
2
1
import multiprocessing
3
2
from dataclasses import dataclass
4
3
from pathlib import Path
11
10
12
11
from openeogeotrellis .utils import get_s3_binary_file_contents , _make_set_for_key
13
12
14
- logger = logging .getLogger (__name__ )
15
-
16
13
17
14
"""Output from GDAL.Info.
18
15
@@ -111,9 +108,10 @@ def _extract_gdal_asset_raster_metadata(
111
108
112
109
113
110
def error_handler (e ):
114
- logger .warning (f"Error while looking up result metadata, may be incomplete. { str (e )} " )
111
+ #logger.warning(f"Error while looking up result metadata, may be incomplete. {str(e)}")
112
+ pass
115
113
116
- pool = multiprocessing .get_context ( "spawn" ). Pool (10 )
114
+ pool = multiprocessing .Pool (10 )
117
115
job = [pool .apply_async (_get_metadata_callback , (asset_path , asset_md ,job_dir ,), error_callback = error_handler ) for asset_path , asset_md in asset_metadata .items ()]
118
116
pool .close ()
119
117
pool .join ()
@@ -139,7 +137,6 @@ def error_handler(e):
139
137
is_some_raster_md_missing = True
140
138
141
139
except Exception as e :
142
- logger .warning ("Could not retrieve raster metadata: " + str (e ))
143
140
is_some_raster_md_missing = True
144
141
145
142
#
@@ -149,39 +146,25 @@ def error_handler(e):
149
146
def _get_metadata_callback (asset_path : str , asset_md : Dict [str , str ], job_dir : Path ):
150
147
151
148
mime_type : str = asset_md .get ("type" , "" )
152
- logger .debug (
153
- f"_export_result_metadata: { asset_path = } , "
154
- + f"file's MIME type: { mime_type } , "
155
- + f"job dir (based on output file): { job_dir = } "
156
- )
157
149
158
150
# Skip assets that are clearly not images.
159
151
if asset_path .endswith (".json" ):
160
- logger .info (f"_export_result_metadata: Asset file is not an image but JSON, { asset_path = } " )
161
152
return None
162
153
163
154
# The asset path should be relative to the job directory.
164
155
abs_asset_path : Path = get_abs_path_of_asset (asset_path , job_dir )
165
- logger .debug (f"{ asset_path = } maps to absolute path: { abs_asset_path = } , " + f"{ abs_asset_path .exists ()= } " )
166
156
167
157
asset_href : str = asset_md .get ("href" , "" )
168
158
if not abs_asset_path .exists () and asset_href .startswith ("s3://" ):
169
159
try :
170
160
abs_asset_path .write_bytes (get_s3_binary_file_contents (asset_href ))
171
161
except Exception as exc :
172
- logger .error (
173
- "Could not download asset from object storage: "
174
- + f"asset={ asset_path } , href={ asset_href !r} , exception: { exc !r} "
175
- )
162
+ pass
176
163
177
164
asset_gdal_metadata : AssetRasterMetadata = read_gdal_raster_metadata (abs_asset_path )
178
165
# If gdal could not extract the projection metadata from the file
179
166
# (The file is corrupt perhaps?).
180
167
if asset_gdal_metadata .could_not_read_file :
181
- logger .warning (
182
- "Could not get projection extension metadata for following asset:"
183
- + f" '{ asset_path } ', { abs_asset_path = } "
184
- )
185
168
return None
186
169
else :
187
170
return (asset_path , asset_gdal_metadata .to_dict ())
@@ -219,7 +202,6 @@ def read_gdal_raster_metadata(asset_path: Union[str, Path]) -> AssetRasterMetada
219
202
and in that version the gdal.Info function include these properties directly
220
203
in the key "stac" of the dictionary it returns.
221
204
"""
222
- logger .debug (f"{ __name__ } .read_projection_extension_metadata: { asset_path = } " )
223
205
return parse_gdal_raster_metadata (read_gdal_info (str (asset_path )))
224
206
225
207
@@ -301,7 +283,6 @@ def _process_gdalinfo_for_netcdf_subdatasets(
301
283
sub_datasets_proj [sub_ds_uri ] = sub_ds_md
302
284
303
285
stats_info = _get_raster_statistics (sub_ds_gdal_info , band_name )
304
- logger .info (f"_process_gdalinfo_for_netcdf_subdatasets:: { stats_info = } " )
305
286
sub_datasets_stats [sub_ds_uri ] = stats_info
306
287
307
288
proj_info = {}
@@ -318,27 +299,18 @@ def _process_gdalinfo_for_netcdf_subdatasets(
318
299
proj_info ["proj:epsg" ] = epsg_codes .pop ()
319
300
320
301
ds_band_names = [band for bands in sub_datasets_stats .values () for band in bands .keys ()]
321
- logger .debug (f"{ ds_band_names = } " )
322
302
323
303
all_raster_stats = {}
324
304
325
305
# We can only copy each band's stats if there are no duplicate bands across
326
306
# the subdatasets. If we find duplicate bands there is likely a bug.
327
307
# Besides it is not obvious how we would need to merge statistics across
328
308
# subdatasets, if the bands occur multiple times.
329
- if sorted (set (ds_band_names )) != sorted (ds_band_names ):
330
- logger .warning (f"There are duplicate bands in { ds_band_names = } , Can not merge the bands' statistics." )
331
- else :
332
- logger .info (f"There are no duplicate bands in { ds_band_names = } , Will use all bands' statistics in result." )
333
- for bands in sub_datasets_stats .values ():
334
- for band_name , stats in bands .items ():
335
- all_raster_stats [band_name ] = stats
336
-
337
- logger .debug (f"{ all_raster_stats = } " )
309
+ for bands in sub_datasets_stats .values ():
310
+ for band_name , stats in bands .items ():
311
+ all_raster_stats [band_name ] = stats
338
312
339
313
result = AssetRasterMetadata (gdal_info = gdal_info , projection = proj_info , statistics = all_raster_stats )
340
- logger .debug (f"_process_gdalinfo_for_netcdf_subdatasets:: returning { result = } " )
341
-
342
314
return AssetRasterMetadata (gdal_info = gdal_info , projection = proj_info , statistics = all_raster_stats )
343
315
344
316
@@ -417,12 +389,6 @@ def get_abs_path_of_asset(asset_filename: str, job_dir: Union[str, Path]) -> Pat
417
389
418
390
:return: the absolute path to the asset file, inside job_dir.
419
391
"""
420
- logger .debug (
421
- f"{ __name__ } .get_abs_path_of_asset: { asset_filename = } , { job_dir = } , { Path .cwd ()= } , "
422
- + f"{ Path (job_dir ).is_absolute ()= } , { Path (job_dir ).exists ()= } , "
423
- + f"{ Path (asset_filename ).exists ()= } "
424
- )
425
-
426
392
abs_asset_path = Path (asset_filename )
427
393
if not abs_asset_path .is_absolute ():
428
394
abs_asset_path = Path (job_dir ).resolve () / asset_filename
@@ -449,8 +415,6 @@ def read_gdal_info(asset_uri: str) -> GDALInfo:
449
415
:return:
450
416
GDALInfo: which is a dictionary that contains the output from `gdal.Info()`.
451
417
"""
452
- logger .debug (f"{ __name__ } .read_gdal_info: { asset_uri = } " )
453
-
454
418
# By default, gdal does not raise exceptions but returns error codes and prints
455
419
# error info on stdout. We don't want that. At the least it should go to the logs.
456
420
# See https://gdal.org/api/python_gotchas.html
@@ -464,15 +428,8 @@ def read_gdal_info(asset_uri: str) -> GDALInfo:
464
428
except Exception as exc :
465
429
# TODO: Specific exception type(s) would be better but Wasn't able to find what
466
430
# specific exceptions gdal.Info might raise.
467
- logger .warning (
468
- "Could not get projection extension metadata, "
469
- + f"gdal.Info failed for following asset: '{ asset_uri } ' . "
470
- + "Either file does not exist or else it is probably not a raster. "
471
- + f"Exception from GDAL: { exc } "
472
- )
473
431
return {}
474
432
else :
475
- logger .debug (f"{ asset_uri = } , { data_gdalinfo = } " )
476
433
return data_gdalinfo
477
434
478
435
0 commit comments