7676from evalml .utils .gen_utils import contains_all_ts_parameters
7777
7878DECOMPOSER_PERIOD_CAP = 1000
79+ MULTISERIES_SEPARATOR_SYMBOL = "|"
7980
8081
8182def _get_label_encoder (X , y , problem_type , estimator_class , sampler_name = None ):
@@ -1418,7 +1419,7 @@ def unstack_multiseries(
14181419 for column_name in full_dataset .columns .drop ([time_index , series_id ]):
14191420 new_column = single_series [column_name ]
14201421 new_column .index = new_time_index
1421- new_column .name = f"{ column_name } _ { s_id } "
1422+ new_column .name = f"{ column_name } { MULTISERIES_SEPARATOR_SYMBOL } { s_id } "
14221423
14231424 if column_name == target_name :
14241425 y_unstacked_cols .append (new_column )
@@ -1435,11 +1436,15 @@ def unstack_multiseries(
14351436 # Reset the axes now that they've been unstacked, keep time info in X
14361437 X_unstacked = X_unstacked .reset_index ()
14371438 y_unstacked = y_unstacked .reset_index (drop = True )
1438-
14391439 return X_unstacked , y_unstacked
14401440
14411441
1442- def stack_data (data , include_series_id = False , series_id_name = None , starting_index = None ):
1442+ def stack_data (
1443+ data ,
1444+ include_series_id = False ,
1445+ series_id_name = None ,
1446+ starting_index = None ,
1447+ ):
14431448 """Stacks the given DataFrame back into a single Series, or a DataFrame if include_series_id is True.
14441449
14451450 Should only be used for data that is expected to be a single series. To stack multiple unstacked columns,
@@ -1464,7 +1469,9 @@ def stack_data(data, include_series_id=False, series_id_name=None, starting_inde
14641469
14651470 # Extract the original column name
14661471 series_id_with_name = stacked_series .index .droplevel ()
1467- stacked_series .name = "_" .join (series_id_with_name [0 ].split ("_" )[:- 1 ])
1472+ stacked_series .name = MULTISERIES_SEPARATOR_SYMBOL .join (
1473+ series_id_with_name [0 ].split (MULTISERIES_SEPARATOR_SYMBOL )[:- 1 ],
1474+ )
14681475
14691476 # If the index is the time index, keep it
14701477 if not data .index .is_numeric () and starting_index is None :
@@ -1481,11 +1488,14 @@ def stack_data(data, include_series_id=False, series_id_name=None, starting_inde
14811488 # Pull out the series id information, if requested
14821489 if include_series_id :
14831490 series_id_col = pd .Series (
1484- series_id_with_name .map (lambda col_name : col_name .split ("_" )[- 1 ]),
1491+ series_id_with_name .map (
1492+ lambda col_name : col_name .split (MULTISERIES_SEPARATOR_SYMBOL )[- 1 ],
1493+ ),
14851494 name = series_id_name or "series_id" ,
14861495 index = stacked_series .index ,
14871496 )
14881497 stacked_series = pd .concat ([series_id_col , stacked_series ], axis = 1 )
1498+
14891499 return stacked_series
14901500
14911501
@@ -1511,8 +1521,8 @@ def stack_X(X, series_id_name, time_index, starting_index=None, series_id_values
15111521 for col in X .columns :
15121522 if col == time_index :
15131523 continue
1514- separated_name = col .split ("_" )
1515- original_columns .add ("_" .join (separated_name [:- 1 ]))
1524+ separated_name = col .split (MULTISERIES_SEPARATOR_SYMBOL )
1525+ original_columns .add (MULTISERIES_SEPARATOR_SYMBOL .join (separated_name [:- 1 ]))
15161526 series_ids .add (separated_name [- 1 ])
15171527
15181528 if len (series_ids ) == 0 :
0 commit comments