@@ -157,6 +157,17 @@ def __str__(self):
157
157
def __repr__ (self ):
158
158
return self .__str__ ()
159
159
160
+ # def __iter__(self):
161
+ # """
162
+ # Iterate over rows in the dataframe
163
+
164
+ # :return: DESCRIPTION
165
+ # :rtype: TYPE
166
+
167
+ # """
168
+
169
+ # return self.df.iterrows()[0]
170
+
160
171
@property
161
172
def df (self ):
162
173
return self ._df
@@ -182,7 +193,9 @@ def df(self, value):
182
193
183
194
raise TypeError (msg )
184
195
185
- self ._df = self ._set_datetime_columns (self ._add_columns (value ))
196
+ self ._df = self ._add_duration_column (
197
+ self ._set_datetime_columns (self ._add_columns (value )), inplace = False
198
+ )
186
199
187
200
def _set_datetime_columns (self , df ):
188
201
"""
@@ -217,15 +230,15 @@ def _add_columns(self, df):
217
230
raise ValueError (
218
231
f"{ col } must be a filled column in the dataframe"
219
232
)
233
+
234
+ if isinstance (dtype , object ):
235
+ df [col ] = None
220
236
else :
221
- if isinstance (dtype , object ):
222
- df [col ] = None
223
- else :
224
- df [col ] = dtype (0 )
225
- logger .warning (
226
- f"KernelDataset DataFrame needs column { col } , adding "
227
- f"and setting dtype to { dtype } ."
228
- )
237
+ df [col ] = dtype (0 )
238
+ logger .warning (
239
+ f"KernelDataset DataFrame needs column { col } , adding "
240
+ f"and setting dtype to { dtype } ."
241
+ )
229
242
return df
230
243
231
244
def from_run_summary (
@@ -313,19 +326,32 @@ def local_survey_metadata(self) -> mt_metadata.timeseries.Survey:
313
326
logger .warning (msg )
314
327
return self .survey_metadata ["0" ]
315
328
316
- def _add_duration_column (self ) -> None :
329
+ def _add_duration_column (self , df , inplace = True ) -> None :
317
330
"""adds a column to self.df with times end-start (in seconds)"""
318
- timedeltas = self .df .end - self .df .start
331
+
332
+ timedeltas = df .end - df .start
319
333
durations = [x .total_seconds () for x in timedeltas ]
320
- self .df ["duration" ] = durations
321
- return
334
+ if inplace :
335
+ df ["duration" ] = durations
336
+ return df
337
+ else :
338
+ new_df = df .copy ()
339
+ new_df ["duration" ] = durations
340
+ return new_df
322
341
323
- def _update_duration_column (self ) -> None :
342
+ def _update_duration_column (self , inplace = True ) -> None :
324
343
"""calls add_duration_column (after possible manual manipulation of start/end"""
325
- self ._add_duration_column ()
344
+
345
+ if inplace :
346
+ self ._df = self ._add_duration_column (self ._df , inplace )
347
+ else :
348
+ return self ._add_duration_column (self ._df , inplace )
326
349
327
350
def drop_runs_shorter_than (
328
- self , minimum_duration : float , units = "s"
351
+ self ,
352
+ minimum_duration : float ,
353
+ units = "s" ,
354
+ inplace = True ,
329
355
) -> None :
330
356
"""
331
357
Drop runs from df that are inconsequentially short
@@ -344,12 +370,18 @@ def drop_runs_shorter_than(
344
370
if units != "s" :
345
371
msg = "Expected units are seconds : units='s'"
346
372
raise NotImplementedError (msg )
347
- if "duration" not in self .df .columns :
348
- self ._add_duration_column ()
373
+
349
374
drop_cond = self .df .duration < minimum_duration
350
- self .df .drop (self .df [drop_cond ].index , inplace = True )
351
- self .df .reset_index (drop = True , inplace = True )
352
- return
375
+ if inplace :
376
+ self ._update_duration_column (inplace )
377
+ self .df .drop (self .df [drop_cond ].index , inplace = inplace )
378
+ self .df .reset_index (drop = True , inplace = True )
379
+ return
380
+ else :
381
+ new_df = self ._update_duration_column (inplace )
382
+ new_df = self .df .drop (self .df [drop_cond ].index )
383
+ new_df .reset_index (drop = True , inplace = True )
384
+ return new_df
353
385
354
386
def select_station_runs (
355
387
self ,
@@ -525,17 +557,13 @@ def get_station_metadata(self, local_station_id: str):
525
557
run_ids = sub_df .run .unique ()
526
558
assert len (run_ids ) == len (sub_df )
527
559
528
- # iterate over these runs, packing metadata into
529
- # get run metadata from the group object instead of loading the runTS
530
- # object, should be much faster.
531
- station_metadata = None
560
+ station_metadata = sub_df . mth5_obj [ 0 ]. from_reference (
561
+ sub_df . station_hdf5_reference [ 0 ]
562
+ )
563
+ station_metadata . runs = ListDict ()
532
564
for i , row in sub_df .iterrows ():
533
565
local_run_obj = self .get_run_object (row )
534
- if station_metadata is None :
535
- station_metadata = local_run_obj .station_metadata
536
- station_metadata .runs = ListDict ()
537
- run_metadata = local_run_obj .metadata
538
- station_metadata .add_run (run_metadata )
566
+ station_metadata .add_run (local_run_obj .metadata )
539
567
return station_metadata
540
568
541
569
def get_run_object (
@@ -697,36 +725,6 @@ def add_columns_for_processing(self, mth5_objs) -> None:
697
725
for i , station_id in enumerate (self .df ["station" ]):
698
726
mth5_obj_column [i ] = mth5_objs [station_id ]
699
727
self .df ["mth5_obj" ] = mth5_obj_column
700
- # for column_name in columns_to_add:
701
- # self.df[column_name] = None
702
-
703
- # def get_run_object(
704
- # self, index_or_row: Union[int, pd.Series]
705
- # ) -> mt_metadata.timeseries.Run:
706
- # """
707
- # Gets the run object associated with a row of the df
708
-
709
- # Development Notes:
710
- # TODO: This appears to be unused except by get_station_metadata.
711
- # Delete or integrate if desired.
712
- # - This has likely been deprecated by direct calls to
713
- # run_obj = row.mth5_obj.from_reference(row.run_reference) in pipelines.
714
-
715
- # Parameters
716
- # ----------
717
- # index_or_row: integer index of df, or pd.Series object
718
-
719
- # Returns
720
- # -------
721
- # run_obj: mt_metadata.timeseries.Run
722
- # The run associated with the row of the df.
723
- # """
724
- # if isinstance(index_or_row, int):
725
- # row = self.df.loc[index_or_row]
726
- # else:
727
- # row = index_or_row
728
- # run_obj = row.mth5_obj.from_reference(row.run_reference)
729
- # return run_obj
730
728
731
729
def close_mth5s (self ) -> None :
732
730
"""
0 commit comments