Skip to content

Commit 3d13428

Browse files
authored
fix statistics (#523)
* fix statistics * fix old test and style * changelog
1 parent d80a910 commit 3d13428

File tree

4 files changed

+38
-32
lines changed

4 files changed

+38
-32
lines changed

CHANGELOG.md

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -31,14 +31,14 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
3131

3232
### Fixed
3333
-
34-
-
35-
-
34+
- Fixed adding unnecessary lag=1 in statistics ([#523](https://github.com/tinkoff-ai/etna/pull/523))
35+
- Fixed wrong MeanTransform behaviour when using alpha parameter ([#523](https://github.com/tinkoff-ai/etna/pull/523))
3636
- Fix processing add_noise=True parameter in datasets generation ([#520](https://github.com/tinkoff-ai/etna/pull/520))
37-
-
38-
-
39-
-
40-
-
41-
-
37+
-
38+
-
39+
-
40+
-
41+
-
4242

4343
## [1.6.2] - 2022-02-09
4444
### Added

etna/transforms/math/statistics.py

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -77,9 +77,8 @@ def transform(self, df: pd.DataFrame) -> pd.DataFrame:
7777
"""
7878
features = (
7979
df.xs(self.in_column, level=1, axis=1)
80-
.shift(1)
8180
.rolling(
82-
window=self.seasonality * self.window if self.window != -1 else len(df) - 1,
81+
window=self.seasonality * self.window if self.window != -1 else len(df),
8382
min_periods=self.min_required_len,
8483
)
8584
.aggregate(self._aggregate_window)
@@ -167,7 +166,7 @@ def transform(self, df: pd.DataFrame) -> pd.DataFrame:
167166
result: pd.DataFrame
168167
dataframe with results
169168
"""
170-
size = self.window if self.window != -1 else len(df) - 1
169+
size = self.window if self.window != -1 else len(df)
171170
self._alpha_range = [self.alpha ** i for i in range(0, size)]
172171
return super().transform(df=df)
173172

@@ -177,7 +176,7 @@ def _aggregate_window(self, series: pd.Series) -> float:
177176
raise ValueError("Something went wrong generating the alphas!")
178177
tmp_series = self._get_required_lags(series)
179178
size = len(tmp_series)
180-
tmp = tmp_series * self._alpha_range[-size:]
179+
tmp = tmp_series * self._alpha_range[:size]
181180
return tmp.mean(**self.kwargs)
182181

183182

tests/test_transforms/test_encoders/conftest.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -44,11 +44,11 @@ def transformed_simple_df() -> pd.DataFrame:
4444
df_1["segment"] = "Moscow"
4545
df_1["target"] = [1.0, 2.0, 3.0, 4.0, 5.0, np.NAN, np.NAN]
4646
df_1["exog"] = [6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0]
47-
df_1["regressor_segment_mean"] = [0, 1, 1.5, 2, 2.5, 3, 3]
47+
df_1["regressor_segment_mean"] = [1, 1.5, 2, 2.5, 3, 3, 3]
4848
df_2["segment"] = "Omsk"
4949
df_2["target"] = [10.0, 20.0, 30.0, 40.0, 50.0, np.NAN, np.NAN]
5050
df_2["exog"] = [60.0, 70.0, 80.0, 90.0, 100.0, 110.0, 120.0]
51-
df_2["regressor_segment_mean"] = [0.0, 10.0, 15.0, 20.0, 25.0, 30, 30]
51+
df_2["regressor_segment_mean"] = [10.0, 15.0, 20.0, 25.0, 30, 30, 30]
5252
classic_df = pd.concat([df_1, df_2], ignore_index=True)
5353
df = TSDataset.to_dataset(classic_df)
5454
return df

tests/test_transforms/test_math/test_statistics_transform.py

Lines changed: 26 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -79,12 +79,19 @@ def test_interface_quantile(simple_df_for_agg: pd.DataFrame, out_column: str):
7979
@pytest.mark.parametrize(
8080
"window,seasonality,alpha,periods,fill_na,expected",
8181
(
82-
(10, 1, 1, 1, 0, np.array([0, 0, 0.5, 1, 1.5, 2, 2.5, 3, 3.5, 4])),
83-
(-1, 1, 1, 1, 0, np.array([0, 0, 0.5, 1, 1.5, 2, 2.5, 3, 3.5, 4])),
84-
(3, 1, 1, 1, -17, np.array([-17, 0, 0.5, 1, 2, 3, 4, 5, 6, 7])),
85-
(3, 1, 0.5, 1, -17, np.array([-17, 0, 0.5, 2.5 / 3, 4.25 / 3, 2, 7.75 / 3, 9.5 / 3, 11.25 / 3, 13 / 3])),
86-
(3, 1, 0.5, 3, -12, np.array([-12, -12, -12, 2.5 / 3, 4.25 / 3, 2, 7.75 / 3, 9.5 / 3, 11.25 / 3, 13 / 3])),
87-
(3, 2, 1, 1, -17, np.array([-17, 0, 1, 1, 2, 2, 3, 4, 5, 6])),
82+
(10, 1, 1, 1, 0, np.array([0, 0.5, 1, 1.5, 2, 2.5, 3, 3.5, 4, 4.5])),
83+
(-1, 1, 1, 1, 0, np.array([0, 0.5, 1, 1.5, 2, 2.5, 3, 3.5, 4, 4.5])),
84+
(3, 1, 1, 1, -17, np.array([0, 0.5, 1, 2, 3, 4, 5, 6, 7, 8])),
85+
(3, 1, 0.5, 1, -17, np.array([0, 0.5, 2.5 / 3, 4.25 / 3, 2, 7.75 / 3, 9.5 / 3, 11.25 / 3, 13 / 3, 14.75 / 3])),
86+
(
87+
3,
88+
1,
89+
0.5,
90+
3,
91+
-12,
92+
np.array([-12, -12, 2.5 / 3, 4.25 / 3, 2, 7.75 / 3, 9.5 / 3, 11.25 / 3, 13 / 3, 14.75 / 3]),
93+
),
94+
(3, 2, 1, 1, -17, np.array([0, 1, 1, 2, 2, 3, 4, 5, 6, 7])),
8895
),
8996
)
9097
def test_mean_feature(
@@ -115,8 +122,8 @@ def test_mean_feature(
115122
(
116123
(10, 1, 1, 0, np.array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0])),
117124
(-1, 1, 1, 0, np.array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0])),
118-
(3, 1, 1, -17, np.array([-17, 0, 0, 0, 1, 2, 3, 4, 5, 6])),
119-
(3, 2, 1, -17, np.array([-17, 0, 1, 0, 1, 0, 1, 2, 3, 4])),
125+
(3, 1, 1, -17, np.array([0, 0, 0, 1, 2, 3, 4, 5, 6, 7])),
126+
(3, 2, 1, -17, np.array([0, 1, 0, 1, 0, 1, 2, 3, 4, 5])),
120127
),
121128
)
122129
def test_min_feature(
@@ -138,9 +145,9 @@ def test_min_feature(
138145
@pytest.mark.parametrize(
139146
"window,periods,fill_na,expected",
140147
(
141-
(10, 1, 0, np.array([0, 0, 1, 2, 3, 4, 5, 6, 7, 8])),
142-
(-1, 1, 0, np.array([0, 0, 1, 2, 3, 4, 5, 6, 7, 8])),
143-
(3, 2, -17, np.array([-17, -17, 1, 2, 3, 4, 5, 6, 7, 8])),
148+
(10, 1, 0, np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])),
149+
(-1, 1, 0, np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])),
150+
(3, 2, -17, np.array([-17, 1, 2, 3, 4, 5, 6, 7, 8, 9])),
144151
),
145152
)
146153
def test_max_feature(simple_df_for_agg: pd.DataFrame, window: int, periods: int, fill_na: float, expected: np.array):
@@ -155,8 +162,8 @@ def test_max_feature(simple_df_for_agg: pd.DataFrame, window: int, periods: int,
155162
@pytest.mark.parametrize(
156163
"window,periods,fill_na,expected",
157164
(
158-
(3, 3, -17, np.array([-17, -17, -17, 1, 2, 3, 4, 5, 6, 7])),
159-
(-1, 1, -17, np.array([-17, 0, 0.5, 1, 1.5, 2, 2.5, 3, 3.5, 4])),
165+
(3, 3, -17, np.array([-17, -17, 1, 2, 3, 4, 5, 6, 7, 8])),
166+
(-1, 1, -17, np.array([0, 0.5, 1, 1.5, 2, 2.5, 3, 3.5, 4, 4.5])),
160167
),
161168
)
162169
def test_median_feature(simple_df_for_agg: pd.DataFrame, window: int, periods: int, fill_na: float, expected: np.array):
@@ -171,8 +178,8 @@ def test_median_feature(simple_df_for_agg: pd.DataFrame, window: int, periods: i
171178
@pytest.mark.parametrize(
172179
"window,periods,fill_na,expected",
173180
(
174-
(3, 3, -17, np.array([-17, -17, -17, 1, 1, 1, 1, 1, 1, 1])),
175-
(3, 1, -17, np.array([-17, -17, np.sqrt(0.5 ** 2 * 2), 1, 1, 1, 1, 1, 1, 1])),
181+
(3, 3, -17, np.array([-17, -17, 1, 1, 1, 1, 1, 1, 1, 1])),
182+
(3, 1, -17, np.array([-17, np.sqrt(0.5 ** 2 * 2), 1, 1, 1, 1, 1, 1, 1, 1])),
176183
),
177184
)
178185
def test_std_feature(simple_df_for_agg: pd.DataFrame, window: int, periods: int, fill_na: float, expected: np.array):
@@ -187,9 +194,9 @@ def test_std_feature(simple_df_for_agg: pd.DataFrame, window: int, periods: int,
187194
@pytest.mark.parametrize(
188195
"window,periods,fill_na,expected",
189196
(
190-
(3, 3, -17, [-17, -17, -17, 4 / 3, 2 / 3, 2 / 3, 8 / 3, 2, 14 / 9, 10 / 9]),
191-
(4, 1, -17, [-17, 0, 1, 4 / 3, 1.25, 1, 2.25, 2.75, 2, 1.5]),
192-
(-1, 1, 0, [0, 0, 1, 4 / 3, 1.25, 1.44, 7 / 3, 138 / 49, 2.625, 208 / 81]),
197+
(3, 3, -17, [-17, -17, 4 / 3, 2 / 3, 2 / 3, 8 / 3, 2, 14 / 9, 10 / 9, 22 / 9]),
198+
(4, 1, -17, [0, 1, 4 / 3, 1.25, 1, 2.25, 2.75, 2, 1.5, 9.5 / 4]),
199+
(-1, 1, 0, [0, 1, 4 / 3, 1.25, 1.44, 7 / 3, 138 / 49, 2.625, 208 / 81, 27 / 10]),
193200
),
194201
)
195202
def test_mad_transform(df_for_agg: pd.DataFrame, window: int, periods: int, fill_na: float, expected: np.ndarray):
@@ -202,7 +209,7 @@ def test_mad_transform(df_for_agg: pd.DataFrame, window: int, periods: int, fill
202209

203210
@pytest.mark.parametrize(
204211
"window,periods,fill_na,expected",
205-
((3, 3, -17, [-17, -17, -17, 4 / 3, -17, -17, -17, 2, 14 / 9, 10 / 9]),),
212+
((3, 3, -17, [-17, -17, 4 / 3, -17, -17, -17, 2, 14 / 9, 10 / 9, 22 / 9]),),
206213
)
207214
def test_mad_transform_with_nans(
208215
df_for_agg_with_nan: pd.DataFrame, window: int, periods: int, fill_na: float, expected: np.ndarray

0 commit comments

Comments
 (0)