Merge pull request #66 from ourownstory/events_regularization

ourownstory · web-flow · commit d90aadf29fa8 · 2020-09-28T18:01:19.000-07:00
regularization for events
diff --git a/neuralprophet/neural_prophet.py b/neuralprophet/neural_prophet.py
@@ -460,9 +460,11 @@ def _add_batch_regualarizations(self, loss, reg_lambda_ar):
                 reg_loss += l_season * reg_season
                 loss += l_season * reg_season
 
-        # Regularize holidays: sparsify holiday features coefficients
+        # Regularize events: sparsify events features coefficients
         if self.events_config is not None or self.country_holidays_config is not None:
-            pass
+            reg_events_loss = utils.reg_func_events(self.events_config, self.country_holidays_config, self.model)
+            reg_loss += reg_events_loss
+            loss += reg_events_loss
 
         return loss, reg_loss
 
@@ -1059,12 +1061,13 @@ def plot_last_forecast(self, fcst, ax=None, xlabel='ds', ylabel='y', figsize=(10
             highlight_forecast=self.forecast_in_focus, line_per_origin=True,
         )
 
-    def plot_components(self, fcst, figsize=(10, 6)):
+    def plot_components(self, fcst, figsize=None):
         """Plot the NeuralProphet forecast components.
 
         Args:
             fcst (pd.DataFrame): output of self.predict
-            figsize (tuple):   width, height in inches. default: (10, 6)
+            figsize (tuple):   width, height in inches.
+                None (default):  automatic (10, 3 * npanel)
             crop_last_n (int): number of samples to plot (combined future and past)
                 None (default) includes entire history. ignored for seasonality.
         Returns:
@@ -1077,15 +1080,16 @@ def plot_components(self, fcst, figsize=(10, 6)):
             forecast_in_focus=self.forecast_in_focus,
         )
 
-    def plot_parameters(self, weekly_start=0, yearly_start=0, figsize=(10, 6)):
+    def plot_parameters(self, weekly_start=0, yearly_start=0, figsize=None):
         """Plot the NeuralProphet forecast components.
 
         Args:
             weekly_start (int): specifying the start day of the weekly seasonality plot.
                 0 (default) starts the week on Sunday. 1 shifts by 1 day to Monday, and so on.
             yearly_start (int): specifying the start day of the yearly seasonality plot.
                 0 (default) starts the year on Jan 1. 1 shifts by 1 day to Jan 2, and so on.
-            figsize (tuple):   width, height in inches. default: (10, 6)
+            figsize (tuple):   width, height in inches.
+                None (default):  automatic (10, 3 * npanel)
         Returns:
             A matplotlib figure.
         """
diff --git a/neuralprophet/plotting_utils.py b/neuralprophet/plotting_utils.py
@@ -106,14 +106,15 @@ def plot(fcst, ax=None, xlabel='ds', ylabel='y', highlight_forecast=None, line_p
     return fig
 
 
-def plot_components(m, fcst, forecast_in_focus=None, figsize=(10, 6)):
+def plot_components(m, fcst, forecast_in_focus=None, figsize=None):
     """Plot the NeuralProphet forecast components.
 
     Args:
         m (NeuralProphet): fitted model.
         fcst (pd.DataFrame):  output of m.predict.
         forecast_in_focus (int): n-th step ahead forecast AR-coefficients to plot
         figsize (tuple): width, height in inches.
+                None (default):  automatic (10, 3 * npanel)
 
     Returns:
         A matplotlib figure.
@@ -177,7 +178,7 @@ def plot_components(m, fcst, forecast_in_focus=None, figsize=(10, 6)):
                            'bar': True})
 
     npanel = len(components)
-    figsize = figsize if figsize else (9, 3 * npanel)
+    figsize = figsize if figsize else (10, 3 * npanel)
     fig, axes = plt.subplots(npanel, 1, facecolor='w', figsize=figsize)
     if npanel == 1:
         axes = [axes]
@@ -217,7 +218,8 @@ def plot_forecast_component(fcst, comp_name, plot_name=None, ax=None, figsize=(1
         comp_name (str): Name of the component to plot.
         plot_name (str): Name of the plot Title.
         ax (matplotlib axis): matplotlib Axes to plot on.
-        figsize (tuple): width, height in inches. default: (10, 6)
+        figsize (tuple): width, height in inches. Ignored if ax is not None.
+            default: (10, 6)
         multiplicative (bool): set y axis as percentage
         bar (bool): make barplot
         rolling (int): rolling average underplot
@@ -267,7 +269,8 @@ def plot_multiforecast_component(fcst, comp_name, plot_name=None, ax=None, figsi
         comp_name (str): Name of the component to plot.
         plot_name (str): Name of the plot Title.
         ax (matplotlib axis): matplotlib Axes to plot on.
-        figsize (tuple): width, height in inches. default: (10, 6)
+        figsize (tuple): width, height in inches. Ignored if ax is not None.
+             default: (10, 6)
         multiplicative (bool): set y axis as percentage
         bar (bool): make barplot
         focus (int): forecast number to portray in detail.
@@ -315,7 +318,7 @@ def plot_multiforecast_component(fcst, comp_name, plot_name=None, ax=None, figsi
     return artists
 
 
-def plot_parameters(m, forecast_in_focus=None, weekly_start=0, yearly_start=0, figsize=(10, 6)):
+def plot_parameters(m, forecast_in_focus=None, weekly_start=0, yearly_start=0, figsize=None):
     """Plot the parameters that the model is composed of, visually.
 
     Args:
@@ -327,7 +330,8 @@ def plot_parameters(m, forecast_in_focus=None, weekly_start=0, yearly_start=0, f
         yearly_start (int): specifying the start day of the yearly seasonality plot.
             0 (default) starts the year on Jan 1.
             1 shifts by 1 day to Jan 2, and so on.
-        figsize (tuple): width, height in inches.default: (10, 6)
+        figsize (tuple): width, height in inches.
+            None (default):  automatic (10, 3 * npanel)
 
     Returns:
         A matplotlib figure.
@@ -412,7 +416,7 @@ def plot_parameters(m, forecast_in_focus=None, weekly_start=0, yearly_start=0, f
         components.append({'plot_name': 'Multiplicative event'})
 
     npanel = len(components)
-    figsize = figsize if figsize else (9, 3 * npanel)
+    figsize = figsize if figsize else (10, 3 * npanel)
     fig, axes = plt.subplots(npanel, 1, facecolor='w', figsize=figsize)
     if npanel == 1:
         axes = [axes]
@@ -462,7 +466,8 @@ def plot_trend_change(m, ax=None, plot_name='Trend Change', figsize=(10, 6)):
         ax (matplotlib axis): matplotlib Axes to plot on.
             One will be created if this is not provided.
         plot_name (str): Name of the plot Title.
-        figsize (tuple): width, height in inches. default: (10, 6)
+        figsize (tuple): width, height in inches. Ignored if ax is not None.
+             default: (10, 6)
 
     Returns:
         a list of matplotlib artists
@@ -490,7 +495,8 @@ def plot_trend(m, ax=None, plot_name='Trend', figsize=(10, 6)):
         ax (matplotlib axis): matplotlib Axes to plot on.
             One will be created if this is not provided.
         plot_name (str): Name of the plot Title.
-        figsize (tuple): width, height in inches.
+        figsize (tuple): width, height in inches. Ignored if ax is not None.
+             default: (10, 6)
 
     Returns:
         a list of matplotlib artists
@@ -534,7 +540,8 @@ def plot_scalar_weights(weights, plot_name, focus=None, ax=None, figsize=(10, 6)
             One will be created if this is not provided.
         focus (int): if provided, show weights for this forecast
             None (default) plot average
-        figsize (tuple): width, height in inches. default: (10, 6)
+        figsize (tuple): width, height in inches. Ignored if ax is not None.
+             default: (10, 6)
     Returns:
         a list of matplotlib artists
     """
@@ -560,7 +567,10 @@ def plot_scalar_weights(weights, plot_name, focus=None, ax=None, figsize=(10, 6)
     artists += ax.bar(names, values, width=0.8, color='#0072B2')
     ax.grid(True, which='major', c='gray', ls='-', lw=1, alpha=0.2)
     ax.set_xlabel(plot_name + " name")
-    plt.xticks(rotation=90)
+    # only rotates last subplot!
+    # TODO fix
+    if len("_".join(names)) > 100:
+        plt.xticks(rotation=45)
     if focus is None:
         ax.set_ylabel(plot_name + ' weight (avg)')
     else:
@@ -578,7 +588,8 @@ def plot_lagged_weights(weights, comp_name, focus=None, ax=None, figsize=(10, 6)
             None (default) sum over all forecasts and plot as relative percentage
         ax (matplotlib axis): matplotlib Axes to plot on.
             One will be created if this is not provided.
-        figsize (tuple): width, height in inches. default: (10, 6)
+        figsize (tuple): width, height in inches. Ignored if ax is not None.
+             default: (10, 6)
     Returns:
         a list of matplotlib artists
     """
@@ -606,7 +617,7 @@ def plot_lagged_weights(weights, comp_name, focus=None, ax=None, figsize=(10, 6)
     return artists
 
 
-def plot_custom_season(m, ax=None, comp_name=None):
+def plot_custom_season():
     raise NotImplementedError
 
 
@@ -620,7 +631,8 @@ def plot_yearly(m, ax=None, yearly_start=0, figsize=(10, 6), comp_name='yearly')
         yearly_start (int): specifying the start day of the yearly seasonality plot.
             0 (default) starts the year on Jan 1.
             1 shifts by 1 day to Jan 2, and so on.
-        figsize (tuple): width, height in inches. default: (10, 6)
+        figsize (tuple): width, height in inches. Ignored if ax is not None.
+             default: (10, 6)
         comp_name (str): Name of seasonality component if previously changed from default 'yearly'.
 
     Returns:
@@ -656,7 +668,8 @@ def plot_weekly(m, ax=None, weekly_start=0, figsize=(10, 6), comp_name='weekly')
         weekly_start (int): specifying the start day of the weekly seasonality plot.
             0 (default) starts the week on Sunday.
             1 shifts by 1 day to Monday, and so on.
-        figsize (tuple): width, height in inches. default: (10, 6)
+        figsize (tuple): width, height in inches. Ignored if ax is not None.
+             default: (10, 6)
         comp_name (str): Name of seasonality component if previously changed from default 'weekly'.
 
     Returns:
@@ -680,3 +693,7 @@ def plot_weekly(m, ax=None, weekly_start=0, figsize=(10, 6), comp_name='weekly')
     if m.season_config.mode == 'multiplicative':
         ax = set_y_as_percent(ax)
     return artists
+
+
+def plot_daily():
+    raise NotImplementedError
diff --git a/neuralprophet/test_debug.py b/neuralprophet/test_debug.py
@@ -137,7 +137,7 @@ def test_lag_reg(verbose=True):
         plt.show()
 
 
-def test_holidays(verbose=True):
+def test_events(verbose=True):
     df = pd.read_csv('../data/example_wp_log_peyton_manning.csv')
     playoffs = pd.DataFrame({
         'event': 'playoff',
@@ -162,10 +162,10 @@ def test_holidays(verbose=True):
         daily_seasonality=False
     )
     # set event windows
-    m = m.add_events(["superbowl", "playoff"], lower_window=-1, upper_window=1, mode="additive")
+    m = m.add_events(["superbowl", "playoff"], lower_window=-1, upper_window=1, mode="multiplicative", regularization=0.5)
 
     # add the country specific holidays
-    m = m.add_country_holidays("US", mode="multiplicative")
+    m = m.add_country_holidays("US", mode="additive", regularization=0.5)
 
     history_df = m.create_df_with_events(df, events_df)
     m.fit(history_df)
@@ -234,7 +234,7 @@ def test_all(verbose=False):
     test_ar_net(verbose)
     test_seasons(verbose)
     test_lag_reg(verbose)
-    test_holidays(verbose)
+    test_events(verbose)
     test_predict(verbose)
 
 
@@ -251,9 +251,9 @@ def test_all(verbose=False):
     # test_ar_net()
     # test_seasons()
     # test_lag_reg()
-    # test_holidays()
+    test_events()
     # test_predict()
-    test_plot()
+    # test_plot()
 
     # test cases: predict (on fitting data, on future data, on completely new data), train_eval, test function, get_last_forecasts, plotting
 
diff --git a/neuralprophet/time_dataset.py b/neuralprophet/time_dataset.py
@@ -194,17 +194,17 @@ def _stride_lagged_features(df_col_name, feature_dims):
         events = OrderedDict({})
         if n_lags == 0:
             if additive_events is not None:
-                events["additive_events"] = np.expand_dims(additive_events, axis=1)
+                events["additive"] = np.expand_dims(additive_events, axis=1)
             if multiplicative_events is not None:
-                events["multiplicative_events"] = np.expand_dims(multiplicative_events, axis=1)
+                events["multiplicative"] = np.expand_dims(multiplicative_events, axis=1)
         else:
             if additive_events is not None:
                 additive_event_feature_windows = []
                 for i in range(0, additive_events.shape[1]):
                     # stride into num_forecast at dim=1 for each sample, just like we did with time
                     additive_event_feature_windows.append(_stride_time_features_for_forecasts(additive_events[:, i]))
                 additive_events = np.dstack(additive_event_feature_windows)
-                events["additive_events"] = additive_events
+                events["additive"] = additive_events
 
             if multiplicative_events is not None:
                 multiplicative_event_feature_windows = []
@@ -213,7 +213,7 @@ def _stride_lagged_features(df_col_name, feature_dims):
                     multiplicative_event_feature_windows.append(
                         _stride_time_features_for_forecasts(multiplicative_events[:, i]))
                 multiplicative_events = np.dstack(multiplicative_event_feature_windows)
-                events["multiplicative_events"] = multiplicative_events
+                events["multiplicative"] = multiplicative_events
 
         inputs["events"] = events
 
diff --git a/neuralprophet/time_net.py b/neuralprophet/time_net.py
@@ -118,8 +118,8 @@ def __init__(self,
                 else:
                     n_multiplicative_event_params += len(configs['event_indices'])
 
-            self.event_params["additive_event_params"] = new_param(dims=[n_additive_event_params])
-            self.event_params["multiplicative_event_params"] = new_param(dims=[n_multiplicative_event_params])
+            self.event_params["additive"] = new_param(dims=[n_additive_event_params])
+            self.event_params["multiplicative"] = new_param(dims=[n_multiplicative_event_params])
         else:
             self.event_params = None
 
@@ -203,9 +203,9 @@ def get_event_weights(self, name):
         mode = event_dims["mode"]
 
         if mode == "additive":
-            event_params = self.event_params["additive_event_params"]
+            event_params = self.event_params["additive"]
         if mode == "multiplicative":
-            event_params = self.event_params["multiplicative_event_params"]
+            event_params = self.event_params["multiplicative"]
 
         event_param_dict = OrderedDict({})
         for event_delim, indices in zip(event_dims["event_delim"], event_dims["event_indices"]):
@@ -408,12 +408,12 @@ def forward(self, inputs):
         # else: assert self.season_dims is None
 
         if 'events' in inputs:
-            if "additive_events" in inputs["events"].keys():
+            if "additive" in inputs["events"].keys():
                 additive_components += self.event_effects(
-                    inputs["events"]["additive_events"], self.event_params["additive_event_params"])
-            if "multiplicative_events" in inputs["events"].keys():
+                    inputs["events"]["additive"], self.event_params["additive"])
+            if "multiplicative" in inputs["events"].keys():
                 multiplicative_components += self.event_effects(
-                    inputs["events"]["multiplicative_events"], self.event_params["multiplicative_event_params"])
+                    inputs["events"]["multiplicative"], self.event_params["multiplicative"])
 
         out = trend + trend * multiplicative_components + additive_components
 
@@ -452,21 +452,21 @@ def compute_components(self, inputs):
             for name, lags in inputs['covariates'].items():
                 components['covar_{}'.format(name)] = self.covariate(lags=lags, name=name)
         if "events" in inputs:
-            if 'additive_events' in inputs["events"].keys():
-                components['events_additive'] = self.event_effects(features=inputs["events"]["additive_events"],
-                                                               params=self.event_params["additive_event_params"])
-            if 'multiplicative_events' in inputs["events"].keys():
-                components['events_multiplicative'] = self.event_effects(features=inputs["events"]["multiplicative_events"],
-                                                                     params=self.event_params["multiplicative_event_params"])
+            if 'additive' in inputs["events"].keys():
+                components['events_additive'] = self.event_effects(features=inputs["events"]["additive"],
+                                                               params=self.event_params["additive"])
+            if 'multiplicative' in inputs["events"].keys():
+                components['events_multiplicative'] = self.event_effects(features=inputs["events"]["multiplicative"],
+                                                                     params=self.event_params["multiplicative"])
             for event, configs in self.events_dims.items():
                 mode = configs["mode"]
                 indices = configs["event_indices"]
                 if mode == "additive":
-                    features = inputs["events"]["additive_events"]
-                    params = self.event_params["additive_event_params"]
+                    features = inputs["events"]["additive"]
+                    params = self.event_params["additive"]
                 else:
-                    features = inputs["events"]["multiplicative_events"]
-                    params = self.event_params["multiplicative_event_params"]
+                    features = inputs["events"]["multiplicative"]
+                    params = self.event_params["multiplicative"]
                 components['event_{}'.format(event)] = self.event_effects(features=features, params=params, indices=indices)
         return components
 
diff --git a/neuralprophet/utils.py b/neuralprophet/utils.py
@@ -73,8 +73,38 @@ def reg_func_season(weights):
     return reg_func_abs(weights)
 
 
-def reg_func_holidays(weights):
-    return reg_func_abs(weights)
+def reg_func_events(events_config, country_holidays_config, model):
+    """
+    Regularization of events coefficients to induce sparcity
+
+    Args:
+        events_config (OrderedDict): Configurations (upper, lower windows, regularization) for user specified events
+        country_holidays_config (OrderedDict): Configurations (holiday_names, upper, lower windows, regularization)
+            for country specific holidays
+        model (TimeNet): The TimeNet model object
+
+    Returns:
+        regularization loss, scalar
+    """
+    reg_events_loss = 0.0
+    if events_config is not None:
+        for event, configs in events_config.items():
+            reg_lambda = configs["reg_lambda"]
+            if reg_lambda is not None:
+                weights = model.get_event_weights(event)
+                for offset in weights.keys():
+                    reg_events_loss += reg_lambda * reg_func_abs(weights[offset])
+
+    if country_holidays_config is not None:
+        reg_lambda = country_holidays_config["reg_lambda"]
+        if reg_lambda is not None:
+            for holiday in country_holidays_config["holiday_names"]:
+                weights = model.get_event_weights(holiday)
+                for offset in weights.keys():
+                    reg_events_loss += reg_lambda * reg_func_abs(weights[offset])
+
+    return reg_events_loss
+
 
 
 def symmetric_total_percentage_error(values, estimates):