From c44f5364fc88da996ddc41fefb8f98cfd23eac96 Mon Sep 17 00:00:00 2001 From: Bear Carlson Date: Thu, 27 Feb 2025 14:11:20 -0800 Subject: [PATCH 1/3] Add point diagnostics, bullet proof completeness diagnostics --- spine/ana/diag/__init__.py | 2 ++ spine/ana/diag/track.py | 51 +++++++++++++++++++++++++++----------- 2 files changed, 38 insertions(+), 15 deletions(-) diff --git a/spine/ana/diag/__init__.py b/spine/ana/diag/__init__.py index 63f7c32c..50a44157 100644 --- a/spine/ana/diag/__init__.py +++ b/spine/ana/diag/__init__.py @@ -5,8 +5,10 @@ - Track energy reconstruction - Track completeness - Shower start dE/dx +- Point purity and efficiency - ... ''' from .shower import * from .track import * +from .point import * diff --git a/spine/ana/diag/track.py b/spine/ana/diag/track.py index 0bc721be..da260680 100644 --- a/spine/ana/diag/track.py +++ b/spine/ana/diag/track.py @@ -2,6 +2,7 @@ import numpy as np from scipy.spatial.distance import cdist +from sklearn.cluster import DBSCAN from spine.ana.base import AnaBase @@ -25,19 +26,23 @@ class TrackCompletenessAna(AnaBase): name = 'track_completeness' def __init__(self, time_window=None, run_mode='both', - truth_point_mode='points', **kwargs): + truth_point_mode='points', cluster_method='cheb', **kwargs): """Initialize the analysis script. Parameters ---------- time_window : List[float] Time window within which to include particle (only works for `truth`) + cluster_method : str + Method to use for clustering, either 'cheb' or 'DBSCAN' **kwargs : dict, optional Additional arguments to pass to :class:`AnaBase` """ # Initialize the parent class super().__init__('particle', run_mode, truth_point_mode, **kwargs) + self.cluster_method = cluster_method + # Store the time window self.time_window = time_window assert time_window is None or len(time_window) == 2, ( @@ -79,8 +84,6 @@ def process(self, data): if part.t < self.time_window[0] or part.t > self.time_window[1]: continue - # Initialize the particle dictionary - comp_dict = {'particle_id': part.id} # Fetch the particle point coordinates points = self.get_points(part) @@ -92,8 +95,14 @@ def process(self, data): # Add the direction of the track vec = end - start length = np.linalg.norm(vec) - if length: + if length and length > 0: vec /= length + else: # track has no length + continue + + # Initialize the particle dictionary + comp_dict = {'particle_id': part.id} + comp_dict['size'] = len(points) comp_dict['length'] = length @@ -102,7 +111,7 @@ def process(self, data): # Chunk out the track along gaps, estimate gap length chunk_labels = self.cluster_track_chunks( - points, start, end, pixel_size) + points, start, end, pixel_size, self.cluster_method) gaps = self.sequential_cluster_distances( points, chunk_labels, start) @@ -119,7 +128,7 @@ def process(self, data): self.append(prefix, **comp_dict) @staticmethod - def cluster_track_chunks(points, start_point, end_point, pixel_size): + def cluster_track_chunks(points, start_point, end_point, pixel_size, method='cheb'): """Find point where the track is broken, divide out the track into self-contained chunks which are Linf connect (Moore neighbors). @@ -134,21 +143,33 @@ def cluster_track_chunks(points, start_point, end_point, pixel_size): pixel_size : float Dimension of one pixel, used to identify what is big enough to constitute a break + method : str + Method to use for clustering, either 'cheb' or 'DBSCAN' Returns ------- np.ndarray (N) Track chunk labels """ - # Project and cluster on the projected axis - direction = (end_point-start_point)/np.linalg.norm(end_point-start_point) - projs = np.dot(points - start_point, direction) - perm = np.argsort(projs) - seps = projs[perm][1:] - projs[perm][:-1] - breaks = np.where(seps > pixel_size*1.1)[0] + 1 - cluster_labels = np.empty(len(projs), dtype=int) - for i, index in enumerate(np.split(np.arange(len(projs)), breaks)): - cluster_labels[perm[index]] = i + if method == 'cheb': + # Project and cluster on the projected axis + direction = (end_point-start_point)/np.linalg.norm(end_point-start_point) + projs = np.dot(points - start_point, direction) + perm = np.argsort(projs) + seps = projs[perm][1:] - projs[perm][:-1] + breaks = np.where(seps > pixel_size*1.1)[0] + 1 + cluster_labels = np.empty(len(projs), dtype=int) + for i, index in enumerate(np.split(np.arange(len(projs)), breaks)): + cluster_labels[perm[index]] = i + #print(len(points),len(np.unique(cluster_labels)),points[0]) + elif method == 'DBSCAN': + clustering = DBSCAN(eps=1.1, min_samples=1, metric='chebyshev').fit(points) + cluster_labels = clustering.labels_ + #print(len(points),len(np.unique(cluster_labels)),points[0]) + #print('+'*50) + else: + raise ValueError(f"Invalid clustering method: {method}") + return cluster_labels From fed63a6209515434aca5b8784cde2f26a8f3fba0 Mon Sep 17 00:00:00 2001 From: Bear Carlson Date: Thu, 27 Feb 2025 14:19:22 -0800 Subject: [PATCH 2/3] Add point diagnostics, bullet proof completeness diagnostics --- spine/ana/diag/point.py | 163 ++++++++++++++++++++++++++++++++++++++++ spine/ana/diag/track.py | 41 +++------- 2 files changed, 175 insertions(+), 29 deletions(-) create mode 100644 spine/ana/diag/point.py diff --git a/spine/ana/diag/point.py b/spine/ana/diag/point.py new file mode 100644 index 00000000..b48dd6d3 --- /dev/null +++ b/spine/ana/diag/point.py @@ -0,0 +1,163 @@ +"""Module to evaluate diagnostic metrics on points.""" + +import numpy as np +from scipy.spatial import cKDTree +from spine.utils.globals import GROUP_COL, COORD_COLS + +from spine.ana.base import AnaBase + + +__all__ = ['PointMetricsAna'] + + +class PointMetricsAna(AnaBase): + """This analysis script evaluates the purity and efficiency of spacepoints. + Compare the truth points (sed) to the reconstructed points (cluster3d).""" + + # Name of the analysis script (as specified in the configuration) + name = 'point_metrics' + + def __init__(self, time_window=None, run_mode='both', + truth_point_mode='points', **kwargs): + """Initialize the analysis script. + + Parameters + ---------- + time_window : List[float] + Time window within which to include particle (only works for `truth`) + run_mode : str + Mode to run the analysis in, either 'both', 'reco', or 'truth' + truth_point_mode : str + Mode to run the truth points in, either 'points' or 'clusters' + **kwargs : dict, optional + Additional arguments to pass to :class:`AnaBase` + """ + # Initialize the parent class + super().__init__('particle', run_mode, truth_point_mode, **kwargs) + + # Store the time window + self.time_window = time_window + assert time_window is None or len(time_window) == 2, ( + "Time window must be specified as an array of two values.") + assert time_window is None or run_mode == 'truth', ( + "Time of reconstructed particle is unknown.") + + # Make sure the metadata is provided (rasterization needed) + self.update_keys({'meta': True}) + + # Initialize the CSV writer(s) you want + for prefix in self.prefixes: + self.initialize_writer(prefix) + + def process(self, data): + """Evaluate track completeness for tracks in one entry. + + Parameters + ---------- + data : dict + Dictionary of data products + """ + + # Fetch the pixel size in this image (assume cubic cells) + pixel_size = data['meta'].size[0] + + # Loop over the types of particle data products + for key in self.obj_keys: + # Fetch the prefix ('reco' or 'truth') + prefix = key.split('_')[0] + + # Loop over particle objects to collect all points + for i,part in enumerate(data[key]): + # If needed, check on the particle time + if self.time_window is not None: + if part.t < self.time_window[0] or part.t > self.time_window[1]: + continue + + # Fetch the particle point coordinates + points = self.get_points(part) + sed_points = part.points_g4 + + if len(points) == 0 or len(sed_points) == 0: + continue + + # Get the purity and efficiency + # distance = 3*pixel_size, so we can match by the corner of the voxel + purity, efficiency = self.reco_and_true_matching(points, sed_points, distance=3*pixel_size) + + # Initialize the particle dictionary + comp_dict = {'particle_id': part.id} + comp_dict['purity'] = purity + comp_dict['efficiency'] = efficiency + comp_dict['num_points'] = len(points) + comp_dict['num_sed_points'] = len(sed_points) + comp_dict['shape'] = part.shape + + # Add length for tracks + if part.shape == 'track': + start = points[np.argmin(cdist([part.start_point], points))] + end = points[np.argmin(cdist([part.end_point], points))] + vec = end - start + length = np.linalg.norm(vec) + if length and length > 0: + vec /= length + comp_dict['length'] = length + comp_dict.update( + {'dir_x': vec[0], 'dir_y': vec[1], 'dir_z': vec[2]}) + + # Append the dictionary to the CSV log + self.append(prefix, **comp_dict) + + @staticmethod + def reco_and_true_matching(reco_noghost,true,distance=3): + """ + Calculates purtiy and efficiency by matching true voxel locations to reco voxel locations + and vise-versa + + + reco_noghost = xyz coords for nonghost + true = xyz for true parts + distance = threshold distance between voxels. Use 3 by default since this is a matching by the corner of the voxel. + Scale by pixel size if in cm + + eff = true tagged voxel count / true voxel count + pur = reco tagged voxel count / reco voxels (noghost) + reco_tagged = reco voxels that were matched to a true voxel (true->reco) + true_tagged = true voxels that were matched to a reco voxel (reco->true) + reco_reverse_tagged = reco voxels that were matched to a true voxel (reco->true) + """ + small = 1e-5 #offset for float precision + tree = cKDTree(true) #Get tree to perform query + #Return closest distance to each reco point, and indices of that voxel in the truth array + distances, indices = tree.query(reco_noghost,k=1) + reco_indices = [] + for i,d in enumerate(distances): #distances from nearest true voxel + d-=small + if d<=distance: #Ignore elements that don't satisfy distance threshold + reco_indices.append(i) + if len(reco_indices) > 0: + reco_tagged = reco_noghost[np.unique(reco_indices)] + pur = len(reco_tagged)/len(reco_noghost) + else: + pur = 0 + + #Do it again for efficiency + tree = cKDTree(reco_noghost) #Get tree to perform query + #Return closest distance to each reco voxel, and indices of that voxel in the truth array + distances, indices = tree.query(true,k=1) #Distance from reco to true voxels + true_indices = [] + reco_tagged_indices = [] #additional index set to store reco points that were matched to true + for i,d in enumerate(distances): + d-=small + if d<=distance: #Ignore elements that don't satisfy the criteria + true_indices.append(i) + reco_tagged_indices.append(indices[i]) + if len(reco_indices) > 0: + reco_reverse_tagged = reco_noghost[np.unique(reco_indices)] #Matched from truth voxel + true_tagged = true[np.unique(true_indices)] + eff = len(true_tagged)/len(true) + else: + eff = 0 + + return pur,eff + + \ No newline at end of file diff --git a/spine/ana/diag/track.py b/spine/ana/diag/track.py index da260680..3fd8bbd9 100644 --- a/spine/ana/diag/track.py +++ b/spine/ana/diag/track.py @@ -26,23 +26,18 @@ class TrackCompletenessAna(AnaBase): name = 'track_completeness' def __init__(self, time_window=None, run_mode='both', - truth_point_mode='points', cluster_method='cheb', **kwargs): + truth_point_mode='points', **kwargs): """Initialize the analysis script. Parameters ---------- time_window : List[float] Time window within which to include particle (only works for `truth`) - cluster_method : str - Method to use for clustering, either 'cheb' or 'DBSCAN' **kwargs : dict, optional Additional arguments to pass to :class:`AnaBase` """ # Initialize the parent class super().__init__('particle', run_mode, truth_point_mode, **kwargs) - - self.cluster_method = cluster_method - # Store the time window self.time_window = time_window assert time_window is None or len(time_window) == 2, ( @@ -111,7 +106,7 @@ def process(self, data): # Chunk out the track along gaps, estimate gap length chunk_labels = self.cluster_track_chunks( - points, start, end, pixel_size, self.cluster_method) + points, start, end, pixel_size) gaps = self.sequential_cluster_distances( points, chunk_labels, start) @@ -128,7 +123,7 @@ def process(self, data): self.append(prefix, **comp_dict) @staticmethod - def cluster_track_chunks(points, start_point, end_point, pixel_size, method='cheb'): + def cluster_track_chunks(points, start_point, end_point, pixel_size): """Find point where the track is broken, divide out the track into self-contained chunks which are Linf connect (Moore neighbors). @@ -143,34 +138,22 @@ def cluster_track_chunks(points, start_point, end_point, pixel_size, method='che pixel_size : float Dimension of one pixel, used to identify what is big enough to constitute a break - method : str - Method to use for clustering, either 'cheb' or 'DBSCAN' Returns ------- np.ndarray (N) Track chunk labels """ - if method == 'cheb': - # Project and cluster on the projected axis - direction = (end_point-start_point)/np.linalg.norm(end_point-start_point) - projs = np.dot(points - start_point, direction) - perm = np.argsort(projs) - seps = projs[perm][1:] - projs[perm][:-1] - breaks = np.where(seps > pixel_size*1.1)[0] + 1 - cluster_labels = np.empty(len(projs), dtype=int) - for i, index in enumerate(np.split(np.arange(len(projs)), breaks)): - cluster_labels[perm[index]] = i - #print(len(points),len(np.unique(cluster_labels)),points[0]) - elif method == 'DBSCAN': - clustering = DBSCAN(eps=1.1, min_samples=1, metric='chebyshev').fit(points) - cluster_labels = clustering.labels_ - #print(len(points),len(np.unique(cluster_labels)),points[0]) - #print('+'*50) - else: - raise ValueError(f"Invalid clustering method: {method}") + # Project and cluster on the projected axis + direction = (end_point-start_point)/np.linalg.norm(end_point-start_point) + projs = np.dot(points - start_point, direction) + perm = np.argsort(projs) + seps = projs[perm][1:] - projs[perm][:-1] + breaks = np.where(seps > pixel_size*1.1)[0] + 1 + cluster_labels = np.empty(len(projs), dtype=int) + for i, index in enumerate(np.split(np.arange(len(projs)), breaks)): + cluster_labels[perm[index]] = i - return cluster_labels @staticmethod From d1e6260975736403b428a54c7ee63d818a187b2e Mon Sep 17 00:00:00 2001 From: Bear Carlson Date: Thu, 27 Feb 2025 15:02:19 -0800 Subject: [PATCH 3/3] Remove import --- spine/ana/diag/track.py | 1 - 1 file changed, 1 deletion(-) diff --git a/spine/ana/diag/track.py b/spine/ana/diag/track.py index 3fd8bbd9..d05e58da 100644 --- a/spine/ana/diag/track.py +++ b/spine/ana/diag/track.py @@ -2,7 +2,6 @@ import numpy as np from scipy.spatial.distance import cdist -from sklearn.cluster import DBSCAN from spine.ana.base import AnaBase