Skip to content

Commit ad636bd

Browse files
committed
Merge branch 'main' into s-bias-bounds
2 parents ebdc682 + 4ced5e3 commit ad636bd

14 files changed

+163
-190
lines changed

.coverage

68 KB
Binary file not shown.

doubleml/double_ml.py

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1109,7 +1109,7 @@ def evaluate_learners(self, learners=None, metric=_rmse):
11091109
where ``n`` specifies the number of observations. Remark that some models like IRM are
11101110
not able to provide all values for ``y_true`` for all learners and might contain
11111111
some ``nan`` values in the target vector.
1112-
Default is the euclidean distance.
1112+
Default is the root-mean-square error.
11131113
11141114
Returns
11151115
-------
@@ -1130,10 +1130,13 @@ def evaluate_learners(self, learners=None, metric=_rmse):
11301130
>>> obj_dml_data = dml.DoubleMLData(data, 'y', 'd')
11311131
>>> dml_irm_obj = dml.DoubleMLIRM(obj_dml_data, ml_g, ml_m)
11321132
>>> dml_irm_obj.fit()
1133-
>>> dml_irm_obj.evaluate_learners(metric=mean_absolute_error)
1134-
{'ml_g0': array([[1.13318973]]),
1135-
'ml_g1': array([[0.91659939]]),
1136-
'ml_m': array([[0.36350912]])}
1133+
>>> def mae(y_true, y_pred):
1134+
>>> subset = np.logical_not(np.isnan(y_true))
1135+
>>> return mean_absolute_error(y_true[subset], y_pred[subset])
1136+
>>> dml_irm_obj.evaluate_learners(metric=mae)
1137+
{'ml_g0': array([[0.85974356]]),
1138+
'ml_g1': array([[0.85280376]]),
1139+
'ml_m': array([[0.35365143]])}
11371140
"""
11381141
# if no learners are provided try to evaluate all learners
11391142
if learners is None:

doubleml/double_ml_pliv.py

Lines changed: 0 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -283,17 +283,6 @@ def _check_data(self, obj_dml_data):
283283
'use DoubleMLPLR instead of DoubleMLPLIV.')
284284
return
285285

286-
# To be removed in version 0.6.0
287-
def set_ml_nuisance_params(self, learner, treat_var, params):
288-
if isinstance(self.score, str) & (self.score == 'partialling out') & (learner == 'ml_g'):
289-
warnings.warn(("Learner ml_g was renamed to ml_l. "
290-
"Please adapt the argument learner accordingly. "
291-
"The provided parameters are set for ml_l. "
292-
"The redirection will be removed in a future version."),
293-
DeprecationWarning, stacklevel=2)
294-
learner = 'ml_l'
295-
super(DoubleMLPLIV, self).set_ml_nuisance_params(learner, treat_var, params)
296-
297286
def _nuisance_est(self, smpls, n_jobs_cv, return_models=False):
298287
if self.partialX & (not self.partialZ):
299288
psi_elements, preds = self._nuisance_est_partial_x(smpls, n_jobs_cv, return_models)
@@ -525,41 +514,6 @@ def _nuisance_est_partial_xz(self, smpls, n_jobs_cv, return_models=False):
525514

526515
return psi_elements, preds
527516

528-
# To be removed in version 0.6.0
529-
def tune(self,
530-
param_grids,
531-
tune_on_folds=False,
532-
scoring_methods=None, # if None the estimator's score method is used
533-
n_folds_tune=5,
534-
search_mode='grid_search',
535-
n_iter_randomized_search=100,
536-
n_jobs_cv=None,
537-
set_as_params=True,
538-
return_tune_res=False):
539-
540-
if isinstance(self.score, str) and (self.score == 'partialling out') and (param_grids is not None) and \
541-
('ml_g' in param_grids) and ('ml_l' not in param_grids):
542-
warnings.warn(("Learner ml_g was renamed to ml_l. "
543-
"Please adapt the key of param_grids accordingly. "
544-
"The provided param_grids for ml_g are set for ml_l. "
545-
"The redirection will be removed in a future version."),
546-
DeprecationWarning, stacklevel=2)
547-
param_grids['ml_l'] = param_grids.pop('ml_g')
548-
549-
if isinstance(self.score, str) and (self.score == 'partialling out') and (scoring_methods is not None) and \
550-
('ml_g' in scoring_methods) and ('ml_l' not in scoring_methods):
551-
warnings.warn(("Learner ml_g was renamed to ml_l. "
552-
"Please adapt the key of scoring_methods accordingly. "
553-
"The provided scoring_methods for ml_g are set for ml_l. "
554-
"The redirection will be removed in a future version."),
555-
DeprecationWarning, stacklevel=2)
556-
scoring_methods['ml_l'] = scoring_methods.pop('ml_g')
557-
558-
tune_res = super(DoubleMLPLIV, self).tune(param_grids, tune_on_folds, scoring_methods, n_folds_tune,
559-
search_mode, n_iter_randomized_search, n_jobs_cv, set_as_params,
560-
return_tune_res)
561-
return tune_res
562-
563517
def _nuisance_tuning_partial_x(self, smpls, param_grids, scoring_methods, n_folds_tune, n_jobs_cv,
564518
search_mode, n_iter_randomized_search):
565519
x, y = check_X_y(self._dml_data.x, self._dml_data.y,

doubleml/double_ml_plr.py

Lines changed: 0 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -166,17 +166,6 @@ def _check_data(self, obj_dml_data):
166166
'To fit a partially linear IV regression model use DoubleMLPLIV instead of DoubleMLPLR.')
167167
return
168168

169-
# To be removed in version 0.6.0
170-
def set_ml_nuisance_params(self, learner, treat_var, params):
171-
if isinstance(self.score, str) & (self.score == 'partialling out') & (learner == 'ml_g'):
172-
warnings.warn(("Learner ml_g was renamed to ml_l. "
173-
"Please adapt the argument learner accordingly. "
174-
"The provided parameters are set for ml_l. "
175-
"The redirection will be removed in a future version."),
176-
DeprecationWarning, stacklevel=2)
177-
learner = 'ml_l'
178-
super(DoubleMLPLR, self).set_ml_nuisance_params(learner, treat_var, params)
179-
180169
def _nuisance_est(self, smpls, n_jobs_cv, return_models=False):
181170
x, y = check_X_y(self._dml_data.x, self._dml_data.y,
182171
force_all_finite=False)
@@ -255,40 +244,6 @@ def _score_elements(self, y, d, l_hat, m_hat, g_hat, smpls):
255244

256245
return psi_a, psi_b
257246

258-
# To be removed in version 0.6.0
259-
def tune(self,
260-
param_grids,
261-
tune_on_folds=False,
262-
scoring_methods=None, # if None the estimator's score method is used
263-
n_folds_tune=5,
264-
search_mode='grid_search',
265-
n_iter_randomized_search=100,
266-
n_jobs_cv=None,
267-
set_as_params=True,
268-
return_tune_res=False):
269-
270-
if isinstance(self.score, str) and (self.score == 'partialling out') and (param_grids is not None) and \
271-
('ml_g' in param_grids) and ('ml_l' not in param_grids):
272-
warnings.warn(("Learner ml_g was renamed to ml_l. "
273-
"Please adapt the key of param_grids accordingly. "
274-
"The provided param_grids for ml_g are set for ml_l. "
275-
"The redirection will be removed in a future version."),
276-
DeprecationWarning, stacklevel=2)
277-
param_grids['ml_l'] = param_grids.pop('ml_g')
278-
279-
if isinstance(self.score, str) and (self.score == 'partialling out') and (scoring_methods is not None) and \
280-
('ml_g' in scoring_methods) and ('ml_l' not in scoring_methods):
281-
warnings.warn(("Learner ml_g was renamed to ml_l. "
282-
"Please adapt the key of scoring_methods accordingly. "
283-
"The provided scoring_methods for ml_g are set for ml_l. "
284-
"The redirection will be removed in a future version."),
285-
DeprecationWarning, stacklevel=2)
286-
scoring_methods['ml_l'] = scoring_methods.pop('ml_g')
287-
288-
tune_res = super(DoubleMLPLR, self).tune(param_grids, tune_on_folds, scoring_methods, n_folds_tune, search_mode,
289-
n_iter_randomized_search, n_jobs_cv, set_as_params, return_tune_res)
290-
return tune_res
291-
292247
def _sensitivity_element_est(self, preds):
293248
# set elments for readability
294249
y = self._dml_data.y

doubleml/tests/_utils_lpq_manual.py

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ def fit_lpq(y, x, d, z, quantile,
1111
learner_g, learner_m, all_smpls, treatment, dml_procedure, n_rep=1,
1212
trimming_rule='truncate',
1313
trimming_threshold=1e-2,
14+
kde=_default_kde,
1415
normalize_ipw=True, m_z_params=None,
1516
m_d_z0_params=None, m_d_z1_params=None,
1617
g_du_z0_params=None, g_du_z1_params=None):
@@ -37,10 +38,10 @@ def fit_lpq(y, x, d, z, quantile,
3738
g_du_z1_params=g_du_z1_params)
3839
if dml_procedure == 'dml1':
3940
lpqs[i_rep], ses[i_rep] = lpq_dml1(y, d, z, m_z_hat, g_du_z0_hat, g_du_z1_hat, comp_prob_hat,
40-
treatment, quantile, ipw_vec, coef_bounds, smpls)
41+
treatment, quantile, ipw_vec, coef_bounds, smpls, kde)
4142
else:
4243
lpqs[i_rep], ses[i_rep] = lpq_dml2(y, d, z, m_z_hat, g_du_z0_hat, g_du_z1_hat, comp_prob_hat,
43-
treatment, quantile, ipw_vec, coef_bounds)
44+
treatment, quantile, ipw_vec, coef_bounds, kde)
4445

4546
lpq = np.median(lpqs)
4647
se = np.sqrt(np.median(np.power(ses, 2) * n_obs + np.power(lpqs - lpq, 2)) / n_obs)
@@ -200,7 +201,7 @@ def ipw_score(theta):
200201
return m_z_hat, g_du_z0_hat, g_du_z1_hat, comp_prob_hat, ipw_vec, coef_bounds
201202

202203

203-
def lpq_dml1(y, d, z, m_z, g_du_z0, g_du_z1, comp_prob, treatment, quantile, ipw_vec, coef_bounds, smpls):
204+
def lpq_dml1(y, d, z, m_z, g_du_z0, g_du_z1, comp_prob, treatment, quantile, ipw_vec, coef_bounds, smpls, kde):
204205
thetas = np.zeros(len(smpls))
205206
n_obs = len(y)
206207
ipw_est = ipw_vec.mean()
@@ -211,17 +212,17 @@ def lpq_dml1(y, d, z, m_z, g_du_z0, g_du_z1, comp_prob, treatment, quantile, ipw
211212

212213
theta_hat = np.mean(thetas)
213214

214-
se = np.sqrt(lpq_var_est(theta_hat, m_z, g_du_z0, g_du_z1, comp_prob, d, y, z, treatment, quantile, n_obs))
215+
se = np.sqrt(lpq_var_est(theta_hat, m_z, g_du_z0, g_du_z1, comp_prob, d, y, z, treatment, quantile, n_obs, kde))
215216

216217
return theta_hat, se
217218

218219

219-
def lpq_dml2(y, d, z, m_z, g_du_z0, g_du_z1, comp_prob, treatment, quantile, ipw_vec, coef_bounds):
220+
def lpq_dml2(y, d, z, m_z, g_du_z0, g_du_z1, comp_prob, treatment, quantile, ipw_vec, coef_bounds, kde):
220221
n_obs = len(y)
221222
ipw_est = ipw_vec.mean()
222223
theta_hat = lpq_est(m_z, g_du_z0, g_du_z1, comp_prob, d, y, z, treatment, quantile, ipw_est, coef_bounds)
223224

224-
se = np.sqrt(lpq_var_est(theta_hat, m_z, g_du_z0, g_du_z1, comp_prob, d, y, z, treatment, quantile, n_obs))
225+
se = np.sqrt(lpq_var_est(theta_hat, m_z, g_du_z0, g_du_z1, comp_prob, d, y, z, treatment, quantile, n_obs, kde))
225226

226227
return theta_hat, se
227228

doubleml/tests/test_blp.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import numpy as np
22
import pandas as pd
33
import pytest
4+
import copy
45

56
import doubleml as dml
67

@@ -26,7 +27,10 @@ def dml_blp_fixture(ci_joint, ci_level):
2627
random_basis = pd.DataFrame(np.random.normal(0, 1, size=(n, 3)))
2728
random_signal = np.random.normal(0, 1, size=(n, ))
2829

29-
blp = dml.DoubleMLBLP(random_signal, random_basis).fit()
30+
blp = dml.DoubleMLBLP(random_signal, random_basis)
31+
32+
blp_obj = copy.copy(blp)
33+
blp.fit()
3034
blp_manual = fit_blp(random_signal, random_basis)
3135

3236
np.random.seed(42)
@@ -47,7 +51,8 @@ def dml_blp_fixture(ci_joint, ci_level):
4751
'ci_1': ci_1,
4852
'ci_2': ci_2,
4953
'ci_manual': ci_manual,
50-
'blp_model': blp}
54+
'blp_model': blp,
55+
'unfitted_blp_model': blp_obj}
5156

5257
return res_dict
5358

@@ -91,6 +96,7 @@ def test_dml_blp_ci_2(dml_blp_fixture):
9196
def test_dml_blp_return_types(dml_blp_fixture):
9297
assert isinstance(dml_blp_fixture['blp_model'].__str__(), str)
9398
assert isinstance(dml_blp_fixture['blp_model'].summary, pd.DataFrame)
99+
assert isinstance(dml_blp_fixture['unfitted_blp_model'].summary, pd.DataFrame)
94100

95101

96102
@pytest.mark.ci

doubleml/tests/test_cvar_tune.py

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,6 @@
33
import math
44

55
from sklearn.base import clone
6-
7-
from sklearn.linear_model import LogisticRegression
86
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
97

108
import doubleml as dml
@@ -58,9 +56,6 @@ def tune_on_folds(request):
5856
def get_par_grid(learner):
5957
if learner.__class__ in [RandomForestRegressor, RandomForestClassifier]:
6058
par_grid = {'n_estimators': [5, 10, 15, 20]}
61-
else:
62-
assert learner.__class__ in [LogisticRegression]
63-
par_grid = {'C': np.logspace(-4, 2, 10)}
6459
return par_grid
6560

6661

doubleml/tests/test_dml_data.py

Lines changed: 29 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,27 @@
55
from doubleml import DoubleMLData, DoubleMLPLR, DoubleMLClusterData, DoubleMLDIDCS
66
from doubleml.datasets import make_plr_CCDDHNR2018, _make_pliv_data, make_pliv_CHS2015,\
77
make_pliv_multiway_cluster_CKMS2021, make_did_SZ2020
8+
from doubleml.double_ml_data import DoubleMLBaseData
9+
810
from sklearn.linear_model import Lasso, LogisticRegression
911

1012

13+
class DummyDataClass(DoubleMLBaseData):
14+
def __init__(self, data):
15+
DoubleMLBaseData.__init__(self, data)
16+
17+
@property
18+
def n_coefs(self):
19+
return 1
20+
21+
22+
@pytest.mark.ci
23+
def test_doubleml_basedata():
24+
dummy_dml_data = DummyDataClass(pd.DataFrame(np.zeros((100, 10))))
25+
assert dummy_dml_data.d_cols[0] == 'theta'
26+
assert dummy_dml_data.n_treat == 1
27+
assert dummy_dml_data.n_coefs == 1
28+
1129
@pytest.fixture(scope="module")
1230
def dml_data_fixture(generate_data1):
1331
data = generate_data1
@@ -157,12 +175,22 @@ def test_dml_data_no_instr_no_time():
157175

158176

159177
@pytest.mark.ci
160-
def test_dml_cluster_summary_with_time():
178+
def test_dml_summary_with_time():
161179
dml_data_did_cs = make_did_SZ2020(n_obs=200, cross_sectional_data=True)
162180
dml_did_cs = DoubleMLDIDCS(dml_data_did_cs, Lasso(), LogisticRegression())
163181
assert isinstance(dml_did_cs.__str__(), str)
164182
assert isinstance(dml_did_cs.summary, pd.DataFrame)
165183

184+
dml_data = make_plr_CCDDHNR2018(n_obs=100)
185+
df = dml_data.data.copy().iloc[:, :11]
186+
df.columns = [f'X{i + 1}' for i in np.arange(8)] + ['y', 'd1', 'd2']
187+
print(df)
188+
dml_data = DoubleMLClusterData(df, 'y', ['d1', 'd2'],
189+
cluster_cols=[f'X{i + 1}' for i in [5, 6]],
190+
x_cols=[f'X{i + 1}' for i in np.arange(5)],
191+
t_col='X8')
192+
assert isinstance(dml_data._data_summary_str(), str)
193+
166194

167195
@pytest.mark.ci
168196
def test_x_cols_setter_defaults():

0 commit comments

Comments
 (0)