From d3aba7129395d52accfbf090efeb4e5d1fa2bf14 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pedro=20Il=C3=ADdio?= Date: Tue, 15 Apr 2025 18:38:48 +0200 Subject: [PATCH 1/8] fix overlapping crossbars --- scikit_posthocs/_plotting.py | 83 ++++++++++++++++++++---------------- tests/test_posthocs.py | 68 +++++++++++++++++++++++++++++ 2 files changed, 114 insertions(+), 37 deletions(-) diff --git a/scikit_posthocs/_plotting.py b/scikit_posthocs/_plotting.py index 20be9ca..64d0aaa 100644 --- a/scikit_posthocs/_plotting.py +++ b/scikit_posthocs/_plotting.py @@ -1,5 +1,5 @@ -from copy import deepcopy from typing import Dict, List, Optional, Set, Tuple, Union +from itertools import combinations import numpy as np from matplotlib import colors, pyplot @@ -42,13 +42,12 @@ def sign_array(p_values: Union[List, np.ndarray, DataFrame], alpha: float = 0.05 [ 1, -1, 0], [ 1, 0, -1]]) """ - sig_array = deepcopy(np.array(p_values)) - sig_array[sig_array == 0] = 1e-10 - sig_array[sig_array > alpha] = 0 - sig_array[(sig_array < alpha) & (sig_array > 0)] = 1 - np.fill_diagonal(sig_array, -1) - - return sig_array + p_values = np.asarray(p_values) + if (p_values < 0).any(): + raise ValueError("P values matrix must be non-negative") + result = (p_values <= alpha).astype(np.int8) # Returns a copy + np.fill_diagonal(result, -1) + return result def sign_table( @@ -518,41 +517,51 @@ def critical_difference_diagram( ranks.iloc[: len(ranks) // 2], ranks.iloc[len(ranks) // 2 :], ) - # points_left, points_right = np.array_split(ranks.sort_values(), 2) # Sets of points under the same crossbar - crossbar_sets = _find_maximal_cliques(adj_matrix) + crossbar_sets = [bar for bar in _find_maximal_cliques(adj_matrix) if len(bar) > 1] - # Sort by lowest rank and filter single-valued sets - crossbar_sets = sorted( - (x for x in crossbar_sets if len(x) > 1), key=lambda x: ranks[list(x)].min() - ) + if not crossbar_sets: # All points are significantly different + # The list of crossbars is left empty + lowest_crossbar_ypos = -1 + else: + crossbar_min_max = [ # Will be used to check if two crossbars intersect + ranks.reindex(bar).agg(["min", "max"]) + for bar in crossbar_sets + ] + + # Create an adjacency matrix of the crossbars, where 1 means that the two + # crossbars do not intersect, meaning that they can be plotted on the same + # level. + n_bars = len(crossbar_sets) + on_same_level = DataFrame(True, index=range(n_bars), columns=range(n_bars)) + + for (i, bar_i), (j, bar_j) in combinations(enumerate(crossbar_min_max), 2): + on_same_level.loc[i, j] = on_same_level.loc[j, i] = ( + (bar_i["max"] < bar_j["min"]) or (bar_i["min"] > bar_j["max"]) + ) - # Create stacking of crossbars: for each level, try to fit the crossbar, - # so that it does not intersect with any other in the level. If it does not - # fit in any level, create a new level for it. - crossbar_levels: list[list[set]] = [] - for bar in crossbar_sets: + # The levels are the maximal cliques of the crossbar adjacency matrix. + crossbar_levels = _find_maximal_cliques(on_same_level) + + # Plot the crossbars in each level + crossbars = [] for level, bars_in_level in enumerate(crossbar_levels): - if not any(bool(bar & bar_in_lvl) for bar_in_lvl in bars_in_level): - ypos = -level - 1 - bars_in_level.append(bar) - break - else: - ypos = -len(crossbar_levels) - 1 - crossbar_levels.append([bar]) - - crossbars.append( - ax.plot( - # Adding a separate line between each pair enables showing a - # marker over each elbow with crossbar_props={'marker': 'o'}. - [ranks[i] for i in bar], - [ypos] * len(bar), - **crossbar_props, - ) - ) + plotted_bars_in_level = [] + for bar_index in bars_in_level: + bar = crossbar_sets[bar_index] + plotted_bar, *_ = ax.plot( + # We could plot a single line segment between min and max. However, + # adding a separate segment between each pair enables showing a + # marker over each elbow, e.g. crossbar_props={'marker': 'o'}. + [ranks[i] for i in bar], + [-level - 1] * len(bar), + **crossbar_props, + ) + plotted_bars_in_level.append(plotted_bar) + crossbars.append(plotted_bars_in_level) - lowest_crossbar_ypos = -len(crossbar_levels) + lowest_crossbar_ypos = -len(crossbar_levels) def plot_items(points, xpos, label_fmt, color_palette, label_props): """Plot each marker + elbow + label.""" diff --git a/tests/test_posthocs.py b/tests/test_posthocs.py index ef0965c..8ba1bdb 100644 --- a/tests/test_posthocs.py +++ b/tests/test_posthocs.py @@ -160,6 +160,20 @@ def test_find_maximal_cliques_6x6(self): set(map(frozenset, expected)), ) + def test_cd_diagram_single_bar(self): + index = list("abcdef") + ranks = Series([2.1, 1.2, 4.5, 3.2, 5.7, 6.5], index=index) + sig_matrix = DataFrame( + 1, # No significant differences + index=index, + columns=index, + ) + output = splt.critical_difference_diagram(ranks, sig_matrix) + self.assertEqual(len(output["markers"]), len(ranks)) + self.assertEqual(len(output["elbows"]), len(ranks)) + self.assertEqual(len(output["labels"]), len(ranks)) + self.assertEqual(len(output["crossbars"]), 1) + def test_cd_diagram_number_of_artists(self): index = list("abcdef") ranks = Series([2.1, 1.2, 4.5, 3.2, 5.7, 6.5], index=index) @@ -182,6 +196,60 @@ def test_cd_diagram_number_of_artists(self): self.assertEqual(len(output["labels"]), len(ranks)) self.assertEqual(len(output["crossbars"]), 2) + def test_cd_diagram_all_significant(self): + index = list("abcdef") + ranks = Series(np.arange(len(index)), index=index) + sig_matrix = DataFrame( + np.eye(len(index)), # All significant + index=index, + columns=index, + ) + output = splt.critical_difference_diagram(ranks, sig_matrix) + self.assertEqual(len(output["markers"]), len(ranks)) + self.assertEqual(len(output["elbows"]), len(ranks)) + self.assertEqual(len(output["labels"]), len(ranks)) + self.assertEqual(len(output["crossbars"]), 0) + + def test_cd_diagram_non_intersecting_crossbars(self): + index = list("abcdef") + # Swap the ranks of 'c' and 'd' + ranks = Series([0, 1, 3, 2, 4, 5], index=index) + sig_matrix = DataFrame( + [ + [1, 1, 1, 0, 0, 0], + [1, 1, 1, 0, 0, 0], + [1, 1, 1, 0, 0, 0], + [0, 0, 0, 1, 1, 1], + [0, 0, 0, 1, 1, 1], + [0, 0, 0, 1, 1, 1], + ], + index=index, + columns=index, + ) + output = splt.critical_difference_diagram(ranks, sig_matrix) + crossbars = output["crossbars"] + y_positions = set(bar.get_ydata()[0] for level in crossbars for bar in level) + self.assertEqual(len(crossbars), len(y_positions)) + + def test_cd_diagram_normal_distributions(self): + rng = np.random.default_rng(0) + experiment_values = rng.normal( + loc=[-5.2, -6, -2.1, -1.7, -6.4], + scale=np.full(fill_value=.1, shape=(10, 1)), + ) + df = DataFrame(experiment_values, columns=["A", "B", "C", "D", "E"]) + + test_result = sp.posthoc_conover_friedman(df.to_numpy()) + average_ranks = df.rank(ascending=False, axis=1).mean(axis=0) + + output = splt.critical_difference_diagram( + ranks=average_ranks, sig_matrix=test_result + ) + self.assertEqual(len(output["markers"]), df.shape[1]) + self.assertEqual(len(output["elbows"]), df.shape[1]) + self.assertEqual(len(output["labels"]), df.shape[1]) + self.assertEqual(len(output["crossbars"]), 0) + # Outliers tests def test_outliers_iqr(self): x = np.array([4, 5, 6, 10, 12, 4, 3, 1, 2, 3, 23, 5, 3]) From db276c77cc60ac02bd08ac503b4a63cb54474ac8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pedro=20Il=C3=ADdio?= Date: Tue, 15 Apr 2025 18:46:50 +0200 Subject: [PATCH 2/8] remove unecessary line --- scikit_posthocs/_plotting.py | 1 - 1 file changed, 1 deletion(-) diff --git a/scikit_posthocs/_plotting.py b/scikit_posthocs/_plotting.py index 64d0aaa..3db7b13 100644 --- a/scikit_posthocs/_plotting.py +++ b/scikit_posthocs/_plotting.py @@ -545,7 +545,6 @@ def critical_difference_diagram( crossbar_levels = _find_maximal_cliques(on_same_level) # Plot the crossbars in each level - crossbars = [] for level, bars_in_level in enumerate(crossbar_levels): plotted_bars_in_level = [] for bar_index in bars_in_level: From a652eb4ece6902ab2cb528d85101ef447df6e701 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pedro=20Il=C3=ADdio?= Date: Tue, 15 Apr 2025 19:46:46 +0200 Subject: [PATCH 3/8] remove unecessary lines --- scikit_posthocs/_plotting.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/scikit_posthocs/_plotting.py b/scikit_posthocs/_plotting.py index 3db7b13..5c401ca 100644 --- a/scikit_posthocs/_plotting.py +++ b/scikit_posthocs/_plotting.py @@ -521,10 +521,7 @@ def critical_difference_diagram( # Sets of points under the same crossbar crossbar_sets = [bar for bar in _find_maximal_cliques(adj_matrix) if len(bar) > 1] - if not crossbar_sets: # All points are significantly different - # The list of crossbars is left empty - lowest_crossbar_ypos = -1 - else: + if crossbar_sets: # If there are any crossbars to plot crossbar_min_max = [ # Will be used to check if two crossbars intersect ranks.reindex(bar).agg(["min", "max"]) for bar in crossbar_sets @@ -560,7 +557,7 @@ def critical_difference_diagram( plotted_bars_in_level.append(plotted_bar) crossbars.append(plotted_bars_in_level) - lowest_crossbar_ypos = -len(crossbar_levels) + lowest_crossbar_ypos = -len(crossbar_levels) def plot_items(points, xpos, label_fmt, color_palette, label_props): """Plot each marker + elbow + label.""" From 63ba9ff7d13b7d45cef8ae1989b7496ba824df7e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pedro=20Il=C3=ADdio?= Date: Tue, 15 Apr 2025 19:48:27 +0200 Subject: [PATCH 4/8] minor fix --- scikit_posthocs/_plotting.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scikit_posthocs/_plotting.py b/scikit_posthocs/_plotting.py index 5c401ca..f8a8740 100644 --- a/scikit_posthocs/_plotting.py +++ b/scikit_posthocs/_plotting.py @@ -557,7 +557,7 @@ def critical_difference_diagram( plotted_bars_in_level.append(plotted_bar) crossbars.append(plotted_bars_in_level) - lowest_crossbar_ypos = -len(crossbar_levels) + lowest_crossbar_ypos = -len(crossbars) def plot_items(points, xpos, label_fmt, color_palette, label_props): """Plot each marker + elbow + label.""" From 4b07e1c29d231078a714969f42193553bac9d15a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pedro=20Il=C3=ADdio?= Date: Thu, 17 Apr 2025 19:44:34 +0200 Subject: [PATCH 5/8] Fix point order in each crossbar (#89) Pointed out here: https://github.com/maximtrp/scikit-posthocs/pull/87#issuecomment-2812740771 And also in #89. --- scikit_posthocs/_plotting.py | 72 +++++++++++++++++------------------- 1 file changed, 34 insertions(+), 38 deletions(-) diff --git a/scikit_posthocs/_plotting.py b/scikit_posthocs/_plotting.py index f8a8740..9cedd89 100644 --- a/scikit_posthocs/_plotting.py +++ b/scikit_posthocs/_plotting.py @@ -519,49 +519,45 @@ def critical_difference_diagram( ) # Sets of points under the same crossbar - crossbar_sets = [bar for bar in _find_maximal_cliques(adj_matrix) if len(bar) > 1] - - if crossbar_sets: # If there are any crossbars to plot - crossbar_min_max = [ # Will be used to check if two crossbars intersect - ranks.reindex(bar).agg(["min", "max"]) - for bar in crossbar_sets + crossbar_ranks = ( + ranks.reindex(bar).sort_values().values + for bar in _find_maximal_cliques(adj_matrix) + if len(bar) > 1 + ) + # Try to fit wider crossbars first + crossbar_ranks = list(sorted(crossbar_ranks, key=lambda x: x[0] - x[-1])) + + # If any crossbar is found, plot them + if crossbar_ranks: + # Create stacking of crossbars: for each level, try to fit the crossbar, + # so that it does not intersect with any other in the level. If it does not + # fit in any level, create a new level for it. + crossbar_levels: list[list[np.ndarray]] = [] + for bar_i in crossbar_ranks: + for bars_in_level in crossbar_levels: + if all( + (bar_i[-1] < bar_j[0]) or (bar_i[0] > bar_j[-1]) + for bar_j in bars_in_level + ): + bars_in_level.append(bar_i) + break + else: + crossbar_levels.append([bar_i]) # Create a new level + + # Plot crossbars + # We could plot a single line segment between min and max. However, + # adding a separate segment between each pair enables showing a + # marker over each elbow, e.g. crossbar_props={'marker': 'o'}. + crossbars = [ + [ax.plot(bar, [-i] * len(bar), **crossbar_props) for bar in level] + for i, level in enumerate(crossbar_levels) ] - # Create an adjacency matrix of the crossbars, where 1 means that the two - # crossbars do not intersect, meaning that they can be plotted on the same - # level. - n_bars = len(crossbar_sets) - on_same_level = DataFrame(True, index=range(n_bars), columns=range(n_bars)) - - for (i, bar_i), (j, bar_j) in combinations(enumerate(crossbar_min_max), 2): - on_same_level.loc[i, j] = on_same_level.loc[j, i] = ( - (bar_i["max"] < bar_j["min"]) or (bar_i["min"] > bar_j["max"]) - ) - - # The levels are the maximal cliques of the crossbar adjacency matrix. - crossbar_levels = _find_maximal_cliques(on_same_level) - - # Plot the crossbars in each level - for level, bars_in_level in enumerate(crossbar_levels): - plotted_bars_in_level = [] - for bar_index in bars_in_level: - bar = crossbar_sets[bar_index] - plotted_bar, *_ = ax.plot( - # We could plot a single line segment between min and max. However, - # adding a separate segment between each pair enables showing a - # marker over each elbow, e.g. crossbar_props={'marker': 'o'}. - [ranks[i] for i in bar], - [-level - 1] * len(bar), - **crossbar_props, - ) - plotted_bars_in_level.append(plotted_bar) - crossbars.append(plotted_bars_in_level) - - lowest_crossbar_ypos = -len(crossbars) + elbow_start_y = -len(crossbars) def plot_items(points, xpos, label_fmt, color_palette, label_props): """Plot each marker + elbow + label.""" - ypos = lowest_crossbar_ypos - 1 + ypos = elbow_start_y for idx, (label, rank) in enumerate(points.items()): if not color_palette or len(color_palette) == 0: elbow, *_ = ax.plot( From fae42cc1842b67b9395687d92085a6729679c62f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pedro=20Il=C3=ADdio?= Date: Thu, 17 Apr 2025 20:02:05 +0200 Subject: [PATCH 6/8] minor refactoring --- scikit_posthocs/_plotting.py | 54 +++++++++++++++++------------------- 1 file changed, 25 insertions(+), 29 deletions(-) diff --git a/scikit_posthocs/_plotting.py b/scikit_posthocs/_plotting.py index 9cedd89..18022a4 100644 --- a/scikit_posthocs/_plotting.py +++ b/scikit_posthocs/_plotting.py @@ -518,40 +518,36 @@ def critical_difference_diagram( ranks.iloc[len(ranks) // 2 :], ) - # Sets of points under the same crossbar + # Arrays of ranks for each crossbar (each crossbar is a maximal clique) crossbar_ranks = ( ranks.reindex(bar).sort_values().values for bar in _find_maximal_cliques(adj_matrix) if len(bar) > 1 ) - # Try to fit wider crossbars first - crossbar_ranks = list(sorted(crossbar_ranks, key=lambda x: x[0] - x[-1])) - - # If any crossbar is found, plot them - if crossbar_ranks: - # Create stacking of crossbars: for each level, try to fit the crossbar, - # so that it does not intersect with any other in the level. If it does not - # fit in any level, create a new level for it. - crossbar_levels: list[list[np.ndarray]] = [] - for bar_i in crossbar_ranks: - for bars_in_level in crossbar_levels: - if all( - (bar_i[-1] < bar_j[0]) or (bar_i[0] > bar_j[-1]) - for bar_j in bars_in_level - ): - bars_in_level.append(bar_i) - break - else: - crossbar_levels.append([bar_i]) # Create a new level - - # Plot crossbars - # We could plot a single line segment between min and max. However, - # adding a separate segment between each pair enables showing a - # marker over each elbow, e.g. crossbar_props={'marker': 'o'}. - crossbars = [ - [ax.plot(bar, [-i] * len(bar), **crossbar_props) for bar in level] - for i, level in enumerate(crossbar_levels) - ] + + # Create stacking of crossbars: for each level, try to fit the widest crossbar, + # so that it does not intersect with any other in the level. If it does not + # fit in any level, create a new level for it. + crossbar_levels: list[list[np.ndarray]] = [] + for bar_i in sorted(crossbar_ranks, key=lambda x: x[0] - x[-1]): + for bars_in_level in crossbar_levels: + if all( + (bar_i[-1] < bar_j[0]) or (bar_i[0] > bar_j[-1]) + for bar_j in bars_in_level + ): + bars_in_level.append(bar_i) + break + else: + crossbar_levels.append([bar_i]) # Create a new level + + # Plot crossbars. + # We could plot a single line segment for the whole crossbar. However, + # we add a separate segment between each elbow, enabling the display of a + # marker over each elbow, e.g. crossbar_props={'marker': 'o'}. + crossbars = [ + [ax.plot(bar, [-i] * len(bar), **crossbar_props) for bar in level] + for i, level in enumerate(crossbar_levels) + ] elbow_start_y = -len(crossbars) From 21b3a0526af79466b07c9366fed27aa5169c9d29 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pedro=20Il=C3=ADdio?= Date: Thu, 17 Apr 2025 20:08:28 +0200 Subject: [PATCH 7/8] comment tweak --- scikit_posthocs/_plotting.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/scikit_posthocs/_plotting.py b/scikit_posthocs/_plotting.py index 18022a4..bbe9775 100644 --- a/scikit_posthocs/_plotting.py +++ b/scikit_posthocs/_plotting.py @@ -532,7 +532,7 @@ def critical_difference_diagram( for bar_i in sorted(crossbar_ranks, key=lambda x: x[0] - x[-1]): for bars_in_level in crossbar_levels: if all( - (bar_i[-1] < bar_j[0]) or (bar_i[0] > bar_j[-1]) + (bar_i[-1] < bar_j[0]) or (bar_i[0] > bar_j[-1]) # True if no intersection for bar_j in bars_in_level ): bars_in_level.append(bar_i) @@ -541,8 +541,7 @@ def critical_difference_diagram( crossbar_levels.append([bar_i]) # Create a new level # Plot crossbars. - # We could plot a single line segment for the whole crossbar. However, - # we add a separate segment between each elbow, enabling the display of a + # We add a separate segment between each elbow, enabling the display of a # marker over each elbow, e.g. crossbar_props={'marker': 'o'}. crossbars = [ [ax.plot(bar, [-i] * len(bar), **crossbar_props) for bar in level] From a8d50cb03eda1f97c40e823ac84f30b097933dc8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pedro=20Il=C3=ADdio?= Date: Thu, 17 Apr 2025 20:09:32 +0200 Subject: [PATCH 8/8] remove unnecessary line --- scikit_posthocs/_plotting.py | 1 - 1 file changed, 1 deletion(-) diff --git a/scikit_posthocs/_plotting.py b/scikit_posthocs/_plotting.py index bbe9775..eacde5f 100644 --- a/scikit_posthocs/_plotting.py +++ b/scikit_posthocs/_plotting.py @@ -499,7 +499,6 @@ def critical_difference_diagram( markers = [] elbows = [] labels = [] - crossbars = [] # True if pairwise comparison is NOT significant adj_matrix = DataFrame(