@@ -316,21 +316,19 @@ def process_formulae(
316
316
if not formula_fields :
317
317
return
318
318
319
- if conf .UPLOAD_LOG_VERBOSITY >= 1 :
320
- self .logger .info (
321
- f"Found { len (formula_fields )} { entity_type .upper ()} field/s with { formula_type } in the scheming_yaml"
322
- )
319
+ self .logger .info (
320
+ f"Found { len (formula_fields )} { entity_type .upper ()} field/s with { formula_type } in the scheming_yaml"
321
+ )
323
322
324
323
jinja2_formulae = {}
325
324
for schema_field in formula_fields :
326
325
field_name = schema_field ["field_name" ]
327
326
template = schema_field [formula_type ]
328
327
jinja2_formulae [field_name ] = template
329
328
330
- if conf .UPLOAD_LOG_VERBOSITY >= 2 :
331
- self .logger .debug (
332
- f'Jinja2 { formula_type } for { entity_type .upper ()} field "{ field_name } ": { template } '
333
- )
329
+ self .logger .debug (
330
+ f'Jinja2 { formula_type } for { entity_type .upper ()} field "{ field_name } ": { template } '
331
+ )
334
332
335
333
context = {"package" : self .package , "resource" : self .resource_fields_stats }
336
334
context .update (jinja2_formulae )
@@ -344,10 +342,9 @@ def process_formulae(
344
342
rendered_formula = formula .render (** context )
345
343
updates [field_name ] = rendered_formula
346
344
347
- if conf .UPLOAD_LOG_VERBOSITY >= 2 :
348
- self .logger .debug (
349
- f'Evaluated jinja2 { formula_type } for { entity_type .upper ()} field "{ field_name } ": { rendered_formula } '
350
- )
345
+ self .logger .debug (
346
+ f'Evaluated jinja2 { formula_type } for { entity_type .upper ()} field "{ field_name } ": { rendered_formula } '
347
+ )
351
348
except Exception as e :
352
349
self .logger .error (
353
350
f'Error evaluating jinja2 { formula_type } for { entity_type .upper ()} field "{ field_name } ": { str (e )} '
@@ -445,9 +442,15 @@ def _push_to_datastore(task_id, input, dry_run=False, temp_dir=None):
445
442
handler = utils .StoringHandler (task_id , input )
446
443
logger = logging .getLogger (task_id )
447
444
logger .addHandler (handler )
445
+
448
446
# also show logs on stderr
449
447
logger .addHandler (logging .StreamHandler ())
450
- logger .setLevel (logging .DEBUG )
448
+ log_level = getattr (logging , conf .UPLOAD_LOG_LEVEL .upper ())
449
+
450
+ # set the log level to the config upload_log_level
451
+ logger .setLevel (logging .INFO )
452
+ logger .info (f"Setting log level to { logging .getLevelName (int (log_level ))} " )
453
+ logger .setLevel (log_level )
451
454
452
455
# check if conf.QSV_BIN and conf.FILE_BIN exists
453
456
# qsv_path = Path(conf.QSV_BIN)
@@ -478,8 +481,8 @@ def _push_to_datastore(task_id, input, dry_run=False, temp_dir=None):
478
481
qsv_semver = qsv_version_info [
479
482
qsv_version_info .find (" " ) : qsv_version_info .find ("-" )
480
483
].lstrip ()
481
- if conf . UPLOAD_LOG_VERBOSITY >= 1 :
482
- logger .info ("qsv version found: {}" .format (qsv_semver ))
484
+
485
+ logger .info ("qsv version found: {}" .format (qsv_semver ))
483
486
try :
484
487
if semver .compare (qsv_semver , conf .MINIMUM_QSV_VERSION ) < 0 :
485
488
raise utils .JobError (
@@ -520,8 +523,7 @@ def _push_to_datastore(task_id, input, dry_run=False, temp_dir=None):
520
523
timer_start = time .perf_counter ()
521
524
522
525
# fetch the resource data
523
- if conf .UPLOAD_LOG_VERBOSITY >= 1 :
524
- logger .info ("Fetching from: {0}..." .format (resource_url ))
526
+ logger .info ("Fetching from: {0}..." .format (resource_url ))
525
527
headers = {}
526
528
if resource .get ("url_type" ) == "upload" :
527
529
# If this is an uploaded file to CKAN, authenticate the request,
@@ -538,8 +540,7 @@ def _push_to_datastore(task_id, input, dry_run=False, temp_dir=None):
538
540
scheme = rewrite_url .scheme , netloc = rewrite_url .netloc
539
541
)
540
542
resource_url = new_url .geturl ()
541
- if conf .UPLOAD_LOG_VERBOSITY >= 1 :
542
- logger .info ("Rewritten resource url to: {0}" .format (resource_url ))
543
+ logger .info ("Rewritten resource url to: {0}" .format (resource_url ))
543
544
544
545
try :
545
546
kwargs = {
@@ -600,11 +601,9 @@ def _push_to_datastore(task_id, input, dry_run=False, temp_dir=None):
600
601
601
602
# download the file
602
603
if cl :
603
- if conf .UPLOAD_LOG_VERBOSITY >= 1 :
604
- logger .info ("Downloading {:.2MB} file..." .format (DataSize (int (cl ))))
604
+ logger .info ("Downloading {:.2MB} file..." .format (DataSize (int (cl ))))
605
605
else :
606
- if conf .UPLOAD_LOG_VERBOSITY >= 1 :
607
- logger .info ("Downloading file of unknown size..." )
606
+ logger .info ("Downloading file of unknown size..." )
608
607
609
608
with open (tmp , "wb" ) as tmp_file :
610
609
for chunk in response .iter_content (conf .CHUNK_SIZE ):
@@ -667,6 +666,20 @@ def _push_to_datastore(task_id, input, dry_run=False, temp_dir=None):
667
666
)
668
667
)
669
668
669
+ # # Check if the file is a zip file
670
+ # if resource_format.upper() == "ZIP":
671
+ # logger.info("ZIP file detected...")
672
+ # # get the zip file's metadata
673
+ # # zip_metadata = zipfile.ZipFile(tmp, "r").infolist()
674
+ # # logger.info("ZIP file metadata: {}".format(zip_metadata))
675
+
676
+ # # # get the zip file's contents
677
+ # # zip_contents = zipfile.ZipFile(tmp, "r").namelist()
678
+ # # logger.info("ZIP file contents: {}".format(zip_contents))
679
+
680
+ # extracted_metadata = dph.extract_zip_or_metadata(tmp, os.path.join(temp_dir, "zip_metadata.csv"))
681
+ # logger.info("Extracted metadata: {}".format(extracted_metadata))
682
+
670
683
# ===================================================================================
671
684
# ANALYZE WITH QSV
672
685
# ===================================================================================
@@ -862,16 +875,12 @@ def _push_to_datastore(task_id, input, dry_run=False, temp_dir=None):
862
875
qsv_input_csv = os .path .join (temp_dir , "qsv_input.csv" )
863
876
# if resource_format is CSV we don't need to normalize
864
877
if resource_format .upper () == "CSV" :
865
- if conf .UPLOAD_LOG_VERBOSITY >= 1 :
866
- logger .info (
867
- "Normalizing/UTF-8 transcoding {}..." .format (resource_format )
868
- )
878
+ logger .info ("Normalizing/UTF-8 transcoding {}..." .format (resource_format ))
869
879
else :
870
880
# if not CSV (e.g. TSV, TAB, etc.) we need to normalize to CSV
871
- if conf .UPLOAD_LOG_VERBOSITY >= 1 :
872
- logger .info (
873
- "Normalizing/UTF-8 transcoding {} to CSV..." .format (resource_format )
874
- )
881
+ logger .info (
882
+ "Normalizing/UTF-8 transcoding {} to CSV..." .format (resource_format )
883
+ )
875
884
876
885
qsv_input_utf_8_encoded_csv = os .path .join (
877
886
temp_dir , "qsv_input_utf_8_encoded.csv"
@@ -884,10 +893,7 @@ def _push_to_datastore(task_id, input, dry_run=False, temp_dir=None):
884
893
capture_output = True ,
885
894
text = True ,
886
895
)
887
- if conf .UPLOAD_LOG_VERBOSITY >= 1 :
888
- logger .info (
889
- "Identified encoding of the file: {}" .format (file_encoding .stdout )
890
- )
896
+ logger .info ("Identified encoding of the file: {}" .format (file_encoding .stdout ))
891
897
892
898
# trim the encoding string
893
899
file_encoding .stdout = file_encoding .stdout .strip ()
@@ -1258,10 +1264,9 @@ def _push_to_datastore(task_id, input, dry_run=False, temp_dir=None):
1258
1264
if type_override in list (conf .TYPE_MAPPING .values ()):
1259
1265
h ["type" ] = type_override
1260
1266
1261
- if conf .UPLOAD_LOG_VERBOSITY >= 1 :
1262
- logger .info (
1263
- "Determined headers and types: {headers}..." .format (headers = headers_dicts )
1264
- )
1267
+ logger .info (
1268
+ "Determined headers and types: {headers}..." .format (headers = headers_dicts )
1269
+ )
1265
1270
1266
1271
# save stats to the datastore by loading qsv_stats_csv directly using COPY
1267
1272
stats_table = sql .Identifier (resource_id + "-druf-stats" )
@@ -1318,13 +1323,14 @@ def _push_to_datastore(task_id, input, dry_run=False, temp_dir=None):
1318
1323
)
1319
1324
1320
1325
# Copy stats CSV to /tmp directory for debugging purposes
1321
- if conf .UPLOAD_LOG_VERBOSITY >= 2 :
1326
+ more_debug_info = logger .getEffectiveLevel () >= logging .DEBUG
1327
+ if more_debug_info :
1322
1328
try :
1323
1329
debug_stats_path = os .path .join ("/tmp" , os .path .basename (qsv_stats_csv ))
1324
1330
shutil .copy2 (qsv_stats_csv , debug_stats_path )
1325
- logger .info (f"Copied stats CSV to { debug_stats_path } for debugging" )
1331
+ logger .debug (f"Copied stats CSV to { debug_stats_path } for debugging" )
1326
1332
except Exception as e :
1327
- logger .warning (f"Failed to copy stats CSV to /tmp for debugging: { e } " )
1333
+ logger .debug (f"Failed to copy stats CSV to /tmp for debugging: { e } " )
1328
1334
1329
1335
try :
1330
1336
with open (qsv_stats_csv , "r" ) as f :
@@ -1373,13 +1379,13 @@ def _push_to_datastore(task_id, input, dry_run=False, temp_dir=None):
1373
1379
)
1374
1380
1375
1381
# Copy frequency CSV to /tmp directory for debugging purposes
1376
- if conf . UPLOAD_LOG_VERBOSITY >= 2 :
1382
+ if more_debug_info :
1377
1383
try :
1378
1384
debug_freq_path = os .path .join ("/tmp" , os .path .basename (qsv_freq_csv ))
1379
1385
shutil .copy2 (qsv_freq_csv , debug_freq_path )
1380
- logger .info (f"Copied frequency CSV to { debug_freq_path } for debugging" )
1386
+ logger .debug (f"Copied frequency CSV to { debug_freq_path } for debugging" )
1381
1387
except Exception as e :
1382
- logger .warning (f"Failed to copy frequency CSV to /tmp for debugging: { e } " )
1388
+ logger .debug (f"Failed to copy frequency CSV to /tmp for debugging: { e } " )
1383
1389
1384
1390
# load the frequency table using COPY
1385
1391
copy_sql = sql .SQL ("COPY {} FROM STDIN WITH (FORMAT CSV, HEADER TRUE)" ).format (
@@ -1833,8 +1839,7 @@ def _push_to_datastore(task_id, input, dry_run=False, temp_dir=None):
1833
1839
package_id = resource ["package_id" ]
1834
1840
scheming_yaml , package = get_scheming_yaml (package_id , scheming_yaml_type = "dataset" )
1835
1841
1836
- if conf .UPLOAD_LOG_VERBOSITY >= 2 :
1837
- logger .debug (f"package: { package } " )
1842
+ logger .debug (f"package: { package } " )
1838
1843
1839
1844
# Initialize the formula processor
1840
1845
formula_processor = FormulaProcessor (
@@ -1853,8 +1858,7 @@ def _push_to_datastore(task_id, input, dry_run=False, temp_dir=None):
1853
1858
package .update (package_updates )
1854
1859
try :
1855
1860
patched_package = patch_package (package )
1856
- if conf .UPLOAD_LOG_VERBOSITY >= 2 :
1857
- logger .debug (f"Package after patching: { patched_package } " )
1861
+ logger .debug (f"Package after patching: { patched_package } " )
1858
1862
package = patched_package
1859
1863
logger .info ("PACKAGE formulae processed..." )
1860
1864
except Exception as e :
@@ -1882,8 +1886,7 @@ def _push_to_datastore(task_id, input, dry_run=False, temp_dir=None):
1882
1886
revised_package = revise_package (
1883
1887
package_id , update = {"dpp_suggestions" : revise_update_content }
1884
1888
)
1885
- if conf .UPLOAD_LOG_VERBOSITY >= 2 :
1886
- logger .debug (f"Package after revising: { revised_package } " )
1889
+ logger .debug (f"Package after revising: { revised_package } " )
1887
1890
package = revised_package
1888
1891
logger .info ("PACKAGE suggestion formulae processed..." )
1889
1892
except Exception as e :
@@ -1912,8 +1915,7 @@ def _push_to_datastore(task_id, input, dry_run=False, temp_dir=None):
1912
1915
revised_package = revise_package (
1913
1916
package_id , update = {"dpp_suggestions" : revise_update_content }
1914
1917
)
1915
- if conf .UPLOAD_LOG_VERBOSITY >= 2 :
1916
- logger .debug (f"Package after revising: { revised_package } " )
1918
+ logger .debug (f"Package after revising: { revised_package } " )
1917
1919
package = revised_package
1918
1920
logger .info ("RESOURCE suggestion formulae processed..." )
1919
1921
except Exception as e :
0 commit comments