Skip to content

Commit d29cead

Browse files
TonyBagnallLinGinQiuMatthewMiddlehurst
authored andcommitted
[ENH] New experimental module: imbalance in collection transformers (#2498)
* first draft * [ENH] wrapper for smote and adasyn of the imbalance module in collection transformers (#2501) * smote & adasyn in aeon.transformation.imbalance * smote & adasyn in aeon.transformation.imbalance * smote & adasyn in aeon.transformation.imbalance * smote & adasyn in aeon.transformation.imbalance * make experimental * inherit from SMOTE * test equivalence to imblearn * move tests * format * import * add test parameters * Ported OHIT (#2573) * Automatic `pre-commit` fixes * docstrings * remove import * remove incorrect test parameters * docstrings * examples * examples * refactor variable name * format comments and reference * typos --------- Co-authored-by: Chuanhang Qiu <80885865+LinGinQiu@users.noreply.github.com> Co-authored-by: MatthewMiddlehurst <25731235+MatthewMiddlehurst@users.noreply.github.com>
1 parent a7c6723 commit d29cead

File tree

9 files changed

+815
-0
lines changed

9 files changed

+815
-0
lines changed
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
"""Supervised transformers to rebalance colelctions of time series."""
2+
3+
__all__ = ["ADASYN", "SMOTE", "OHIT"]
4+
5+
from aeon.transformations.collection.imbalance._adasyn import ADASYN
6+
from aeon.transformations.collection.imbalance._ohit import OHIT
7+
from aeon.transformations.collection.imbalance._smote import SMOTE
Lines changed: 118 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,118 @@
1+
"""ADASYN over sampling algorithm.
2+
3+
See more in imblearn.over_sampling.ADASYN
4+
original authors:
5+
# Guillaume Lemaitre <g.lemaitre58@gmail.com>
6+
# Fernando Nogueira
7+
# Christos Aridas
8+
# Dzianis Dudnik
9+
# License: MIT
10+
"""
11+
12+
import numpy as np
13+
from sklearn.utils import check_random_state
14+
15+
from aeon.transformations.collection.imbalance._smote import SMOTE
16+
17+
__maintainer__ = ["TonyBagnall"]
18+
__all__ = ["ADASYN"]
19+
20+
21+
class ADASYN(SMOTE):
22+
"""
23+
Adaptive Synthetic Sampling (ADASYN) over-sampler.
24+
25+
Generates synthetic samples for the minority class based on local data
26+
distribution. ADASYN extends SMOTE by adapting the number of synthetic samples
27+
according to the density of the minority class: more samples are generated for
28+
minority samples that are harder to learn (i.e., surrounded by more majority
29+
samples).
30+
31+
This implementation is adapted from imbalanced-learn's
32+
`imblearn.over_sampling.ADASYN`.
33+
34+
Parameters
35+
----------
36+
random_state : int or None, optional (default=None)
37+
Random seed for reproducibility.
38+
k_neighbors : int, optional (default=5)
39+
Number of nearest neighbours used to construct synthetic samples.
40+
41+
References
42+
----------
43+
.. [1] He, H., Bai, Y., Garcia, E. A., & Li, S. (2008).
44+
ADASYN: Adaptive synthetic sampling approach for imbalanced learning.
45+
In IEEE International Joint Conference on Neural Networks, pp. 1322-1328.
46+
https://doi.org/10.1109/IJCNN.2008.4633969
47+
48+
Examples
49+
--------
50+
>>> from aeon.transformations.collection.imbalance import ADASYN
51+
>>> import numpy as np
52+
>>> X = np.random.random(size=(100,1,50))
53+
>>> y = np.array([0] * 90 + [1] * 10)
54+
>>> sampler = ADASYN(random_state=49)
55+
>>> X_res, y_res = sampler.fit_transform(X, y)
56+
"""
57+
58+
def __init__(self, random_state=None, k_neighbors=5):
59+
super().__init__(random_state=random_state, k_neighbors=k_neighbors)
60+
61+
def _transform(self, X, y=None):
62+
X = np.squeeze(X, axis=1)
63+
random_state = check_random_state(self.random_state)
64+
X_resampled = [X.copy()]
65+
y_resampled = [y.copy()]
66+
67+
# got the minority class label and the number needs to be generated
68+
for class_sample, n_samples in self.sampling_strategy_.items():
69+
if n_samples == 0:
70+
continue
71+
target_class_indices = np.flatnonzero(y == class_sample)
72+
X_class = X[target_class_indices]
73+
74+
self.nn_.fit(X)
75+
nns = self.nn_.kneighbors(X_class, return_distance=False)[:, 1:]
76+
# The ratio is computed using a one-vs-rest manner. Using majority
77+
# in multi-class would lead to slightly different results at the
78+
# cost of introducing a new parameter.
79+
n_neighbors = self.nn_.n_neighbors - 1
80+
ratio_nn = np.sum(y[nns] != class_sample, axis=1) / n_neighbors
81+
if not np.sum(ratio_nn):
82+
raise RuntimeError(
83+
"Not any neighbours belong to the majority"
84+
" class. This case will induce a NaN case"
85+
" with a division by zero. ADASYN is not"
86+
" suited for this specific dataset."
87+
" Use SMOTE instead."
88+
)
89+
ratio_nn /= np.sum(ratio_nn)
90+
n_samples_generate = np.rint(ratio_nn * n_samples).astype(int)
91+
# rounding may cause new amount for n_samples
92+
n_samples = np.sum(n_samples_generate)
93+
if not n_samples:
94+
raise ValueError(
95+
"No samples will be generated with the provided ratio settings."
96+
)
97+
98+
# the nearest neighbors need to be fitted only on the current class
99+
# to find the class NN to generate new samples
100+
self.nn_.fit(X_class)
101+
nns = self.nn_.kneighbors(X_class, return_distance=False)[:, 1:]
102+
103+
enumerated_class_indices = np.arange(len(target_class_indices))
104+
rows = np.repeat(enumerated_class_indices, n_samples_generate)
105+
cols = random_state.choice(n_neighbors, size=n_samples)
106+
diffs = X_class[nns[rows, cols]] - X_class[rows]
107+
steps = random_state.uniform(size=(n_samples, 1))
108+
X_new = X_class[rows] + steps * diffs
109+
110+
X_new = X_new.astype(X.dtype)
111+
y_new = np.full(n_samples, fill_value=class_sample, dtype=y.dtype)
112+
X_resampled.append(X_new)
113+
y_resampled.append(y_new)
114+
X_resampled = np.vstack(X_resampled)
115+
y_resampled = np.hstack(y_resampled)
116+
117+
X_resampled = X_resampled[:, np.newaxis, :]
118+
return X_resampled, y_resampled

0 commit comments

Comments
 (0)