@@ -1377,7 +1377,7 @@ def run_training_data_prep_child_mortality(
1377
1377
df_exploded ["age_year" ] = df_exploded ["age_year" ].astype (int )
1378
1378
df_exploded ["age_month_at_year_end" ] = df_exploded ["age_month_at_year_end" ].astype (int )
1379
1379
# override int_year, used to get climate vars
1380
- df_exploded . rename ( columns = { "years_to_expand" : " int_year"}, inplace = True )
1380
+ df_exploded [ " int_year"] = df_exploded [ "years_to_expand" ]. astype ( int )
1381
1381
1382
1382
# for rows with child_alive==0, replace with child_alive=1 if int_year < year_of_recorded_age
1383
1383
df_exploded ["child_alive" ] = df_exploded ["child_alive" ].astype (int )
@@ -1412,7 +1412,7 @@ def run_training_data_prep_child_mortality(
1412
1412
logging .info ("Processing climate data..." )
1413
1413
climate_vars = get_climate_vars_for_dataframe (df_exploded )
1414
1414
df_climate = merge_left_without_inflating (df_exploded , climate_vars , on = ["int_year" , "lat" , "long" ])
1415
-
1415
+
1416
1416
logging .info ("Adding elevation data..." )
1417
1417
df_climate = get_elevation_for_dataframe (df_climate )
1418
1418
@@ -1421,15 +1421,18 @@ def run_training_data_prep_child_mortality(
1421
1421
#Write to output
1422
1422
for measure in MEASURES_IN_SOURCE [data_source_type ]:
1423
1423
measure_df = df_climate [df_climate [measure ].notna ()].copy ()
1424
- measure_df ["measure" ] = measure
1424
+ measure_df ["measure" ] = data_source_type
1425
1425
measure_df ["value" ] = measure_df [measure ]
1426
- measure_root = Path (output_root ) / measure
1426
+ measure_root = Path (output_root ) / data_source_type
1427
+ os .makedirs (measure_root , exist_ok = True ,mode = 0o777 )
1428
+ os .makedirs (Path (measure_root ) / "training_data" , exist_ok = True ,mode = 0o777 )
1427
1429
cm_data = ClimateMalnutritionData (measure_root )
1428
- logging .info (f"Saving data for { measure } to { measure_root } { len (measure_df )} rows" )
1430
+ logging .info (f"Saving data for { data_source_type } to { measure_root } { len (measure_df )} rows" )
1429
1431
for ldi_col in ['ldipc_weighted_no_match' ]: #ldi_cols:
1430
1432
measure_df ['ldi_pc_pd' ] = measure_df [ldi_col ] / 365
1431
1433
version = cm_data .new_training_version ()
1432
- logging .info (f"Saving data for { measure } to version { version } with { ldi_col } as LDI" )
1434
+ os .makedirs (Path (measure_root ) / "training_data" / version , exist_ok = True , mode = 0o777 )
1435
+ logging .info (f"Saving data for { data_source_type } to version { version } with { ldi_col } as LDI" )
1433
1436
cm_data .save_training_data (measure_df , version )
1434
1437
message = "Used " + ldi_col + " as LDI"
1435
1438
# Save a small file with a record of which ldi column was used for this version
0 commit comments