-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathgenerate_norm_stats.py
More file actions
47 lines (38 loc) · 1.3 KB
/
generate_norm_stats.py
File metadata and controls
47 lines (38 loc) · 1.3 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
"""
This script gives an example of how to create normalization statistics
for the MuonSpDataset.
Author: Davide Di Croce
Date: 2025-15-04
"""
from Classifier.data.MuonSpDataset import MuonSpDataset
import numpy as np
dataset_desc = "<path/to/your/dataset>/data/muon_sp_FCG_6vars_all"
dataset = MuonSpDataset.load_dataset_parts(dataset_desc, "train")
data_list = dataset.data_list
normalization_file = f"<path/to/your/dataset>/data/norm_stats.npy"
variables_to_normalize = {
"localPositionX": 0,
"localPositionY": 1,
"localPositionZ": 2,
"id_stationIndex": 3,
"driftR": 4,
"relative_layer": 5,
}
data_values = {var: [] for var in variables_to_normalize} # type: ignore
for data in data_list:
features = data["features"].numpy()
for var, idx in variables_to_normalize.items():
data_values[var].extend(features[:, idx])
normalization_stats = {
var: {
"mean": np.mean(data_values[var]),
"std": np.std(data_values[var]),
"q3": np.percentile(data_values[var], 75),
"q1": np.percentile(data_values[var], 25),
"min": np.min(data_values[var]),
"max": np.max(data_values[var]),
}
for var in variables_to_normalize
}
np.save(normalization_file, normalization_stats)
print(f"Normalization statistics saved to {normalization_file}")