Skip to content

Commit 2ac35d8

Browse files
author
Elye Bliss
committed
attempting quick Cox survival model in python and R
1 parent 087bc68 commit 2ac35d8

File tree

2 files changed

+103
-0
lines changed

2 files changed

+103
-0
lines changed

notebooks/2025_09_16_child_mortality_investigation.py

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,12 +3,14 @@
33
import pandas as pd
44
import matplotlib.pyplot as plt
55
import matplotlib.ticker as mticker
6+
from lifelines import CoxPHFitter # for Cox survival models
67

78

89
DATA_PATH = "/mnt/team/rapidresponse/pub/population/modeling/climate_malnutrition/child_mortality/training_data/2025_09_15.01/data.parquet"
910

1011
df = pd.read_parquet(DATA_PATH)
1112

13+
## 1. Get basic info about data
1214
df["line_id"] = df["line_id"].astype(int)
1315

1416
df["indv_id"] = df[["nid", "psu", "hh_id", "line_id"]].astype(str).agg("_".join, axis=1)
@@ -48,6 +50,7 @@
4850
f"who did not die were under 5 at survey ({under5_at_survey/(over5_at_survey + under5_at_survey):.1%})"
4951
)
5052

53+
## 2. Make scatterplots and heatmaps based on raw data
5154
# Aggregate data
5255
agg_df = (
5356
df.groupby(["nid", "ihme_loc_id", "int_year"], as_index=False)
@@ -98,3 +101,28 @@
98101
fmt=".2f",
99102
cmap="YlOrBr",
100103
)
104+
105+
## 3. Make simple model
106+
df.rename(columns={"ldipc_weighted_no_match": "consumption"}, inplace=True)
107+
event_col = "child_mortality"
108+
id_col = "indv_id"
109+
time_col = "age_month_at_year_end"
110+
covariate_cols = [
111+
"consumption",
112+
"ihme_loc_id", # r.e. not yet supported in lifelines package
113+
"mean_temperature",
114+
# "total_precipitation",
115+
# "relative_humidity",
116+
# "mean_high_temperature",
117+
# "mean_low_temperature",
118+
# "precipitation_days",
119+
"days_over_30C",
120+
# "days_over_26C",
121+
]
122+
df_model_data = df[[event_col, time_col] + covariate_cols]
123+
124+
cph = CoxPHFitter()
125+
cph.fit(df_model_data, duration_col=time_col, event_col=event_col)
126+
cph.print_summary()
127+
128+
## 4. Plot data sources by country
Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,75 @@
1+
################################################################################
2+
# DESCRIPTION: Test R's coxme package to perform a Cox proportional hazard model
3+
# with mixed effects (r.e. on location, not yet available in Python packages)
4+
# PROJECT: Climate nutrition
5+
# DATE: 2025-09-16
6+
################################################################################
7+
8+
#==============================================================================
9+
# SECTION 0: PACKAGE LOADING AND ENVIRONMENT SETUP
10+
#==============================================================================
11+
# Clear workspace
12+
rm(list = ls())
13+
14+
# Username is pulled automatically
15+
username <- Sys.info()[["user"]]
16+
if (Sys.info()["sysname"] == "Linux") {
17+
j <- "/home/j/"
18+
h <- paste0("/homes/", username, "/")
19+
r <- "/mnt/"
20+
l <-"/ihme/limited_use/"
21+
} else {
22+
j <- "J:/"
23+
h <- "H:/"
24+
r <- "R:/"
25+
l <- "L:/"
26+
}
27+
28+
install.packages('coxme',lib = "/homes/elyeb/rlibs") # for survival analysis with mixed effects
29+
library(coxme,lib.loc = "/homes/elyeb/rlibs")
30+
library(data.table)
31+
library(arrow) # to read parquet
32+
33+
options(scipen = 999) # turn off scientific notation
34+
35+
#==============================================================================
36+
# SECTION 1: DATA LOADING AND PREPROCESSING
37+
#==============================================================================
38+
39+
df <- read_parquet("/mnt/team/rapidresponse/pub/population/modeling/climate_malnutrition/child_mortality/training_data/2025_09_15.01/data.parquet")
40+
41+
df <- data.table(df)
42+
43+
# flip child_alive so 1 = died, 0 = alive for easier interpretation
44+
df[,child_mortality := 1-child_alive]
45+
46+
setnames(df,old="ldipc_weighted_no_match",new="consumption")
47+
48+
df_model <- df[,.(child_mortality,age_month_at_year_end,sex_id,ihme_loc_id,consumption,mean_temperature,days_over_30C)]
49+
50+
#==============================================================================
51+
# SECTION 2: MAIN PROCESSING
52+
#==============================================================================
53+
54+
fit <- coxme(Surv(age_month_at_year_end, child_mortality) ~ consumption + mean_temperature + days_over_30C + sex_id + (1|ihme_loc_id), data = df_model)
55+
summary(fit)
56+
# Mixed effects coxme model
57+
# Formula: Surv(age_month_at_year_end, child_mortality) ~ consumption + mean_temperature + days_over_30C + sex_id + (1 | ihme_loc_id)
58+
# Data: df_model
59+
#
60+
# events, n = 42130, 4893786
61+
#
62+
# Random effects:
63+
# group variable sd variance
64+
# 1 ihme_loc_id Intercept 1.135688 1.289787
65+
# Chisq df p AIC BIC
66+
# Integrated loglik 16118 5.00 0 16108 16065
67+
# Penalized loglik 16241 18.78 0 16204 16041
68+
#
69+
# Fixed effects:
70+
# coef exp(coef) se(coef) z p
71+
# consumption -0.000229703 0.999770323 0.000004872 -47.15 <0.0000000000000002
72+
# mean_temperature -0.015280408 0.984835745 0.001395616 -10.95 <0.0000000000000002
73+
# days_over_30C 0.003450759 1.003456720 0.000152407 22.64 <0.0000000000000002
74+
# sex_id -0.017863722 0.982294889 0.009749483 -1.83 0.0669
75+

0 commit comments

Comments
 (0)