Skip to content

Commit 087bc68

Browse files
author
Elye Bliss
committed
initial commit of notebook exploring child mortality data
1 parent d18e822 commit 087bc68

File tree

1 file changed

+100
-0
lines changed

1 file changed

+100
-0
lines changed
Lines changed: 100 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,100 @@
1+
import seaborn as sns
2+
import plotly.express as px
3+
import pandas as pd
4+
import matplotlib.pyplot as plt
5+
import matplotlib.ticker as mticker
6+
7+
8+
DATA_PATH = "/mnt/team/rapidresponse/pub/population/modeling/climate_malnutrition/child_mortality/training_data/2025_09_15.01/data.parquet"
9+
10+
df = pd.read_parquet(DATA_PATH)
11+
12+
df["line_id"] = df["line_id"].astype(int)
13+
14+
df["indv_id"] = df[["nid", "psu", "hh_id", "line_id"]].astype(str).agg("_".join, axis=1)
15+
print(f"{df['indv_id'].nunique():,} unique individuals in data")
16+
17+
# flip child_alive so 1 = died, 0 = alive for easier interpretation
18+
df["child_mortality"] = 1 - df["child_alive"]
19+
20+
# get unique individuals per year
21+
agg_yr = (
22+
df.groupby(["int_year"])["indv_id"]
23+
.nunique()
24+
.reset_index()
25+
.rename(columns={"indv_id": "unique_individuals"})
26+
)
27+
plt.figure(figsize=(20, 5))
28+
ax = agg_yr.plot(x="int_year", y="unique_individuals", kind="bar")
29+
plt.title("Unique Individuals per Year")
30+
plt.xticks(fontsize=5, rotation=45, ha="right")
31+
ax.yaxis.set_major_formatter(mticker.FuncFormatter(lambda x, p: format(int(x), ",")))
32+
plt.tight_layout()
33+
plt.show()
34+
35+
# get max age of children who did not die
36+
children_died_ids = df.query("child_alive == 0")["indv_id"].unique()
37+
children_alive = df.query("indv_id not in @children_died_ids")
38+
children_alive["age_month_original"] = children_alive["age_month_original"].astype(int)
39+
children_alive["over_5_at_survey"] = children_alive["age_month_original"] > 60
40+
over5_at_survey = children_alive[children_alive["over_5_at_survey"]][
41+
"indv_id"
42+
].nunique()
43+
under5_at_survey = children_alive[~children_alive["over_5_at_survey"]][
44+
"indv_id"
45+
].nunique()
46+
print(
47+
f"{under5_at_survey:,} children out of {over5_at_survey + under5_at_survey:,} "
48+
f"who did not die were under 5 at survey ({under5_at_survey/(over5_at_survey + under5_at_survey):.1%})"
49+
)
50+
51+
# Aggregate data
52+
agg_df = (
53+
df.groupby(["nid", "ihme_loc_id", "int_year"], as_index=False)
54+
.mean(numeric_only=True)
55+
.rename(
56+
columns={
57+
"ldipc_weighted_no_match": "consumption",
58+
"child_mortality": "total_mortality",
59+
}
60+
)
61+
)
62+
px.scatter(agg_df, x="mean_temperature", y="total_mortality", color="ihme_loc_id")
63+
64+
65+
columns_to_bin = [
66+
"mean_temperature",
67+
"total_precipitation",
68+
"relative_humidity",
69+
"mean_high_temperature",
70+
"mean_low_temperature",
71+
"precipitation_days",
72+
"days_over_30C",
73+
"days_over_26C",
74+
]
75+
heatmap_df = df.copy() # .query("ihme_loc_id == 'ETH'")
76+
for col in columns_to_bin:
77+
heatmap_df[f"{col}_bin"] = pd.qcut(
78+
heatmap_df[col], 10, retbins=False, duplicates="drop"
79+
)
80+
heatmap_df["consumption"], ldi_bins = pd.qcut(
81+
heatmap_df.ldipc_weighted_no_match, 10, retbins=True
82+
)
83+
col = "days_over_30C"
84+
sns.heatmap(
85+
heatmap_df.groupby(["consumption", f"{col}_bin"])["child_mortality"]
86+
.mean()
87+
.unstack(),
88+
annot=True,
89+
fmt=".2f",
90+
cmap="YlOrBr",
91+
)
92+
col = "mean_temperature"
93+
sns.heatmap(
94+
heatmap_df.groupby(["consumption", f"{col}_bin"])["child_mortality"]
95+
.mean()
96+
.unstack(),
97+
annot=True,
98+
fmt=".2f",
99+
cmap="YlOrBr",
100+
)

0 commit comments

Comments
 (0)