Skip to content

Commit afbc4f3

Browse files
fix: Keep staterror from affecting multiple samples (#1965)
* Fix staterror bug introduced after v0.6.3 where staterror erroneously affected multiple samples. - Amends PR #1639 which introduced regression. - c.f. Issue #1720 * Remove staterror_combined.__staterror_uncrt as it is unused by the codebase. * Add test to verify that staterror does not affect more samples than shapesys equivalently. Co-authored-by: Giordon Stark <kratsg@gmail.com>
1 parent d4e1fc2 commit afbc4f3

File tree

4 files changed

+112
-17
lines changed

4 files changed

+112
-17
lines changed

src/pyhf/modifiers/staterror.py

Lines changed: 10 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -100,8 +100,12 @@ def finalize(self):
100100
],
101101
axis=0,
102102
)
103+
# here relerrs still has all the bins, while the staterror are usually per-channel
104+
# so we need to pick out the masks for this modifier to extract the
105+
# modifier configuration (sigmas, etc..)
106+
# so loop over samples and extract the first mask
107+
# while making sure any subsequent mask is consistent
103108
relerrs = default_backend.sqrt(relerrs)
104-
105109
masks = {}
106110
for modifier_data in self.builder_data[modname].values():
107111
mask_this_sample = default_backend.astensor(
@@ -113,12 +117,14 @@ def finalize(self):
113117
else:
114118
assert (mask_this_sample == masks[modname]).all()
115119

116-
for modifier_data in self.builder_data[modname].values():
117-
modifier_data['data']['mask'] = masks[modname]
120+
# extract sigmas using this modifiers mask
118121
sigmas = relerrs[masks[modname]]
122+
119123
# list of bools, consistent with other modifiers (no numpy.bool_)
120124
fixed = default_backend.tolist(sigmas == 0)
121-
# ensures non-Nan constraint term, but in a future PR we need to remove constraints for these
125+
# FIXME: sigmas that are zero will be fixed to 1.0 arbitrarily to ensure
126+
# non-Nan constraint term, but in a future PR need to remove constraints
127+
# for these
122128
sigmas[fixed] = 1.0
123129
self.required_parsets.setdefault(parname, [required_parset(sigmas, fixed)])
124130
return self.builder_data
@@ -145,18 +151,6 @@ def __init__(self, modifiers, pdfconfig, builder_data, batch_size=None):
145151
[[builder_data[m][s]['data']['mask']] for s in pdfconfig.samples]
146152
for m in keys
147153
]
148-
self.__staterror_uncrt = default_backend.astensor(
149-
[
150-
[
151-
[
152-
builder_data[m][s]['data']['uncrt'],
153-
builder_data[m][s]['data']['nom_data'],
154-
]
155-
for s in pdfconfig.samples
156-
]
157-
for m in keys
158-
]
159-
)
160154
global_concatenated_bin_indices = [
161155
[[j for c in pdfconfig.channels for j in range(pdfconfig.channel_nbins[c])]]
162156
]

tests/test_modifiers.py

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -193,3 +193,55 @@ def test_invalid_bin_wise_modifier(datadir, patch_file):
193193

194194
with pytest.raises(pyhf.exceptions.InvalidModifier):
195195
pyhf.Model(bad_spec)
196+
197+
198+
def test_issue1720_staterror_builder_mask(datadir):
199+
with open(datadir.joinpath("issue1720_greedy_staterror.json")) as spec_file:
200+
spec = json.load(spec_file)
201+
202+
spec["channels"][0]["samples"][1]["modifiers"][0]["type"] = "staterror"
203+
config = pyhf.pdf._ModelConfig(spec)
204+
builder = pyhf.modifiers.staterror.staterror_builder(config)
205+
206+
channel = spec["channels"][0]
207+
sig_sample = channel["samples"][0]
208+
bkg_sample = channel["samples"][1]
209+
modifier = bkg_sample["modifiers"][0]
210+
211+
assert channel["name"] == "channel"
212+
assert sig_sample["name"] == "signal"
213+
assert bkg_sample["name"] == "bkg"
214+
assert modifier["type"] == "staterror"
215+
216+
builder.append("staterror/NP", "channel", "bkg", modifier, bkg_sample)
217+
collected_bkg = builder.collect(modifier, bkg_sample["data"])
218+
assert collected_bkg == {"mask": [True], "nom_data": [1], "uncrt": [1.5]}
219+
220+
builder.append("staterror/NP", "channel", "signal", None, sig_sample)
221+
collected_sig = builder.collect(None, sig_sample["data"])
222+
assert collected_sig == {"mask": [False], "nom_data": [5], "uncrt": [0.0]}
223+
224+
finalized = builder.finalize()
225+
assert finalized["staterror/NP"]["bkg"]["data"]["mask"].tolist() == [True]
226+
assert finalized["staterror/NP"]["signal"]["data"]["mask"].tolist() == [False]
227+
228+
229+
@pytest.mark.parametrize(
230+
"inits",
231+
[[-2.0], [-1.0], [0.0], [1.0], [2.0]],
232+
)
233+
def test_issue1720_greedy_staterror(datadir, inits):
234+
"""
235+
Test that the staterror does not affect more samples than shapesys equivalently.
236+
"""
237+
with open(datadir.joinpath("issue1720_greedy_staterror.json")) as spec_file:
238+
spec = json.load(spec_file)
239+
240+
model_shapesys = pyhf.Workspace(spec).model()
241+
expected_shapesys = model_shapesys.expected_actualdata(inits)
242+
243+
spec["channels"][0]["samples"][1]["modifiers"][0]["type"] = "staterror"
244+
model_staterror = pyhf.Workspace(spec).model()
245+
expected_staterror = model_staterror.expected_actualdata(inits)
246+
247+
assert expected_staterror == expected_shapesys
Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
{
2+
"channels": [
3+
{
4+
"name": "channel",
5+
"samples": [
6+
{
7+
"data": [
8+
5
9+
],
10+
"modifiers": [],
11+
"name": "signal"
12+
},
13+
{
14+
"data": [
15+
1
16+
],
17+
"modifiers": [
18+
{
19+
"data": [
20+
1.5
21+
],
22+
"name": "NP",
23+
"type": "shapesys"
24+
}
25+
],
26+
"name": "bkg"
27+
}
28+
]
29+
}
30+
],
31+
"measurements": [
32+
{
33+
"config": {
34+
"parameters": [],
35+
"poi": "NP"
36+
},
37+
"name": ""
38+
}
39+
],
40+
"observations": [
41+
{
42+
"data": [
43+
0
44+
],
45+
"name": "channel"
46+
}
47+
],
48+
"version": "1.0.0"
49+
}

tests/test_scripts.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -297,7 +297,7 @@ def test_testpoi(tmpdir, script_runner):
297297
command = f'pyhf xml2json validation/xmlimport_input/config/example.xml --basedir validation/xmlimport_input/ --output-file {temp.strpath:s}'
298298
ret = script_runner.run(*shlex.split(command))
299299

300-
pois = [1.0, 0.5, 0.0]
300+
pois = [1.0, 0.5, 0.001]
301301
results_exp = []
302302
results_obs = []
303303
for test_poi in pois:

0 commit comments

Comments
 (0)