Time Series: More compatibility with pandas 2.0 (#496)

amotl · web-flow · commit 1effc388a9b8 · 2024-06-19T08:51:15.000+02:00
> `groupby.mean()` has `numeric_only=` argument whose default value was > `True` in the past but since pandas 2.0, its default value is `False`. > > An implication is that string columns are not dropped when a > statistical method such as `mean` or `std` is called on the `groupby` > object (as was done in the past). To solve the issue, pass > `numeric_only=True`. > > -- https://stackoverflow.com/a/76597931
diff --git a/topic/timeseries/exploratory_data_analysis.ipynb b/topic/timeseries/exploratory_data_analysis.ipynb
@@ -560,7 +560,7 @@
     "df_berlin.index = pd.to_datetime(df_berlin.index)\n",
     "\n",
     "# Now aggregate to daily averages\n",
-    "df_berlin_daily_avg = df_berlin.resample('D').mean()\n",
+    "df_berlin_daily_avg = df_berlin.resample('D').mean(numeric_only=True)\n",
     "\n",
     "df_berlin_daily_avg.reset_index(inplace=True)\n",
     "\n",
diff --git a/topic/timeseries/test.py b/topic/timeseries/test.py
@@ -19,8 +19,5 @@ def test_notebook(notebook):
             raise pytest.skip(f"Kaggle dataset can not be tested "
                               f"without authentication: {notebook.name}")
 
-    if notebook.name in ["exploratory_data_analysis.ipynb", "time-series-decomposition.ipynb"]:
-        raise pytest.skip(f"Notebook is not compatible with pandas 2.x: {notebook.name}")
-
     with testbook(notebook) as tb:
         tb.execute()
diff --git a/topic/timeseries/time-series-decomposition.ipynb b/topic/timeseries/time-series-decomposition.ipynb
@@ -566,7 +566,7 @@
     "df_berlin.index = pd.to_datetime(df_berlin.index)\n",
     "\n",
     "# Now aggregate to daily averages\n",
-    "df_berlin_daily_avg = df_berlin.resample('D').mean()\n",
+    "df_berlin_daily_avg = df_berlin.resample('D').mean(numeric_only=True)\n",
     "\n",
     "df_berlin_daily_avg.reset_index(inplace=True)\n",
     "\n",