@@ -89,6 +89,8 @@ def convert_archive(
89
89
90
90
if not jsonl_path :
91
91
jsonl_path = archive_path / "optimade.jsonl"
92
+ if jsonl_path .exists () and not overwrite :
93
+ raise RuntimeError (f"Not overwriting existing file at { jsonl_path } " )
92
94
93
95
# if the config specifies just a JSON-L, then extract any archives
94
96
# and return the JSONL path
@@ -397,6 +399,10 @@ def _parse_and_assign_properties(
397
399
if not property_matches_by_file :
398
400
return
399
401
402
+ optimade_immutable_ids = {
403
+ entry ["attributes" ].get ("immutable_id" ) for entry in optimade_entries .values ()
404
+ }
405
+
400
406
for archive_file in property_matches_by_file :
401
407
for _path in tqdm .tqdm (
402
408
property_matches_by_file [archive_file ],
@@ -409,6 +415,14 @@ def _parse_and_assign_properties(
409
415
for id in properties :
410
416
parsed_properties [id ].update (properties [id ])
411
417
all_property_fields |= set (properties [id ].keys ())
418
+ if (
419
+ id not in optimade_entries
420
+ and id not in optimade_immutable_ids
421
+ ):
422
+ warnings .warn (
423
+ f"Could not find entry { id !r} in OPTIMADE entries." ,
424
+ )
425
+ continue
412
426
break
413
427
except Exception as exc :
414
428
errors .append (exc )
@@ -430,9 +444,13 @@ def _parse_and_assign_properties(
430
444
expected_property_fields = set (property_def_dict .keys ())
431
445
432
446
if expected_property_fields != all_property_fields :
433
- warnings .warn (
434
- f"Found { all_property_fields = } in data but { expected_property_fields } in config"
435
- )
447
+ warning_message = "Mismatch between parsed property fields (A) and those defined in config (B)."
448
+ if all_property_fields - expected_property_fields :
449
+ warning_message += f"\n (A - B) = { all_property_fields - expected_property_fields } (will be omitted from API; if intended this can be ignored)."
450
+ if expected_property_fields - all_property_fields :
451
+ warning_message += f"\n (B - A) = { expected_property_fields - all_property_fields } (configured, but missing; check for typos or missing aliases)"
452
+
453
+ warnings .warn (warning_message )
436
454
437
455
# Look for precisely matching IDs, or 'filename' matches
438
456
for id in optimade_entries :
@@ -442,6 +460,14 @@ def _parse_and_assign_properties(
442
460
# try to find a matching ID based on the filename
443
461
property_entry_id = id .split ("/" )[- 1 ].split ("." )[0 ]
444
462
463
+ if (property_entry_id not in parsed_properties ) and (
464
+ id not in parsed_properties
465
+ ):
466
+ warnings .warn (
467
+ f"Could not find entry { id !r} (or fully-qualified { property_entry_id !r} ) in parsed properties" ,
468
+ )
469
+ continue
470
+
445
471
# Loop over all defined properties and assign them to the entry, setting to None if missing
446
472
# Also cast types if provided
447
473
for property in all_property_fields :
@@ -451,10 +477,15 @@ def _parse_and_assign_properties(
451
477
property , None
452
478
) or parsed_properties .get (id , {}).get (property , None )
453
479
if property not in property_def_dict :
454
- warnings . warn ( f"Missing property definition for { property = } " )
480
+ # These are already warned about above: fields that are not configured but are present in the property file
455
481
continue
456
482
if value is not None and property_def_dict [property ].type in TYPE_MAP :
457
- value = TYPE_MAP [property_def_dict [property ].type ](value )
483
+ try :
484
+ value = TYPE_MAP [property_def_dict [property ].type ](value )
485
+ except Exception as exc :
486
+ raise RuntimeError (
487
+ f"Could not cast { value = } for { property = } to type { property_def_dict [property ].type !r} for entry { id !r} "
488
+ ) from exc
458
489
459
490
optimade_entries [id ]["attributes" ][f"_{ provider_prefix } _{ property } " ] = value
460
491
0 commit comments