ssec-jhu
diff --git a/‎dplutils/__init__.py‎
Lines changed: 1 addition & 1 deletion b/‎dplutils/__init__.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎dplutils/cli.py‎
Lines changed: 6 additions & 5 deletions b/‎dplutils/cli.py‎
Lines changed: 6 additions & 5 deletions
diff --git a/‎dplutils/observer/__init__.py‎
Lines changed: 14 additions & 10 deletions b/‎dplutils/observer/__init__.py‎
Lines changed: 14 additions & 10 deletions
diff --git a/‎dplutils/observer/aim.py‎
Lines changed: 1 addition & 0 deletions b/‎dplutils/observer/aim.py‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎dplutils/observer/mlflow.py‎
Lines changed: 3 additions & 2 deletions b/‎dplutils/observer/mlflow.py‎
Lines changed: 3 additions & 2 deletions
diff --git a/‎dplutils/observer/ray.py‎
Lines changed: 4 additions & 1 deletion b/‎dplutils/observer/ray.py‎
Lines changed: 4 additions & 1 deletion
diff --git a/‎dplutils/pipeline/__init__.py‎
Lines changed: 3 additions & 3 deletions b/‎dplutils/pipeline/__init__.py‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎dplutils/pipeline/executor.py‎
Lines changed: 25 additions & 20 deletions b/‎dplutils/pipeline/executor.py‎
Lines changed: 25 additions & 20 deletions
diff --git a/‎dplutils/pipeline/graph.py‎
Lines changed: 14 additions & 10 deletions b/‎dplutils/pipeline/graph.py‎
Lines changed: 14 additions & 10 deletions
@@ -3,4 +3,4 @@
 try:
     from ._version import __version__
 except ImportError:
-    __version__ = ''
+    __version__ = ""
@@ -1,5 +1,6 @@
 import json
 from argparse import ArgumentParser, Namespace
+
 from dplutils.pipeline import PipelineExecutor
 
 
@@ -16,9 +17,9 @@ def add_generic_args(argparser):
         argparser: The :class:`ArgumentParser<argparse.ArgumentParser>` instance
           to add args to.
     """
-    argparser.add_argument('-c', '--set-context', action='append', default=[], help='set context parameter')
-    argparser.add_argument('-s', '--set-config', action='append', default=[], help='set configuration parameter')
-    argparser.add_argument('-o', '--out-dir', default='.', help='write results to directory')
+    argparser.add_argument("-c", "--set-context", action="append", default=[], help="set context parameter")
+    argparser.add_argument("-s", "--set-config", action="append", default=[], help="set configuration parameter")
+    argparser.add_argument("-o", "--out-dir", default=".", help="write results to directory")
 
 
 def get_argparser(**kwargs):
@@ -40,7 +41,7 @@ def get_argparser(**kwargs):
 
 
 def parse_config_element(conf):
-    k,v = conf.split('=', 1)
+    k, v = conf.split("=", 1)
     try:
         v = json.loads(v)
     except json.decoder.JSONDecodeError:
@@ -67,7 +68,7 @@ def set_config_from_args(pipeline: PipelineExecutor, args: Namespace):
         pipeline.set_config(*parse_config_element(conf))
 
 
-def cli_run(pipeline: PipelineExecutor, args: Namespace|None = None,  **argparse_kwargs):
+def cli_run(pipeline: PipelineExecutor, args: Namespace | None = None, **argparse_kwargs):
     """Run pipeline from cli args
 
     If ``args`` is None, this function runs the pipeline for the standard set of
 
@@ -17,6 +17,7 @@ class Timer:
         with observer.timer('calltime'):
             <<do something>>
     """
+
     def __init__(self, observer, name, **kwargs):
         self.observer = observer
         self.name = name
@@ -36,7 +37,7 @@ def stop(self):
 
     def complete(self):
         if not self.started:
-            raise ValueError('Timer not started!')
+            raise ValueError("Timer not started!")
         self.stop()
         self.observer.observe(self.name, self.accum, **self.kwargs)
 
@@ -59,6 +60,7 @@ class Observer(ABC):
     While implementations are required to implement ``observe``, ``increment``
     and ``param``, there may be legitimit cases where the recording of
     """
+
     @abstractmethod
     def observe(self, name, value, **kwargs):
         """Observe a metric value
@@ -130,6 +132,7 @@ class NoOpObserver(Observer):
     This is akin to the ``NullHandler<logging.NullHandler>`` in the logging
     module and is the default upon initialization.
     """
+
     def observe(*args):
         """This method does nothing"""
         pass
@@ -150,6 +153,7 @@ class InMemoryObserver(Observer):
     each element in the list is a tuple (recorded_unix_time, value). Params are
     stored in a separate dict keyed by the parameter ``name``.
     """
+
     def __init__(self):
         self.metrics = defaultdict(list)
         self.params = {}
@@ -168,39 +172,39 @@ def param(self, name, value, **kwargs):
         self.params[name] = value
 
     def dump(self):
-        return {'params': self.params, 'metrics': self.metrics}
+        return {"params": self.params, "metrics": self.metrics}
 
 
 observer_map = {
-    'root': NoOpObserver(),
+    "root": NoOpObserver(),
 }
 
 
-def set_observer(obs, key='root'):
+def set_observer(obs, key="root"):
     """Set the global observer at ``key``"""
     observer_map[key] = obs
 
 
-def get_observer(key='root'):
+def get_observer(key="root"):
     """Get the global observer at ``key``"""
-    return observer_map.get(key, observer_map['root'])
+    return observer_map.get(key, observer_map["root"])
 
 
 def observe(*args, **kwargs):
     """call observe on the root observer"""
-    observer_map['root'].observe(*args, **kwargs)
+    observer_map["root"].observe(*args, **kwargs)
 
 
 def increment(*args, **kwargs):
     """call increment on the root observer"""
-    observer_map['root'].increment(*args, **kwargs)
+    observer_map["root"].increment(*args, **kwargs)
 
 
 def param(*args, **kwargs):
     """call param on the root observer"""
-    observer_map['root'].param(*args, **kwargs)
+    observer_map["root"].param(*args, **kwargs)
 
 
 def timer(*args, **kwargs):
     """call timer on the root observer"""
-    return observer_map['root'].timer(*args, **kwargs)
+    return observer_map["root"].timer(*args, **kwargs)
@@ -22,6 +22,7 @@ class AimObserver(Observer):
         Aim does not track the time with metric, only the step and this
         implementation uses the default auto-increment step counter.
     """
+
     def __init__(self, run=None, **aim_kwargs):
         if run is not None:
             self.run = run
 
@@ -24,12 +24,13 @@ class MlflowObserver(Observer):
             will be passed to its instantiation, using
             ``mlflow.MlflowClient.create_run``.
     """
+
     def __init__(self, run=None, experiment=None, tracking_uri=None, **mlflow_kwargs):
         if mlflow is None:
             raise ImportError("mlflow must be installed to create observer run!")
 
         tracking_uri = tracking_uri or mlflow.get_tracking_uri()
-        self.mlflow_client = mlflow.MlflowClient(tracking_uri = tracking_uri)
+        self.mlflow_client = mlflow.MlflowClient(tracking_uri=tracking_uri)
 
         if run is not None:
             self.run = run
@@ -41,7 +42,7 @@ def __init__(self, run=None, experiment=None, tracking_uri=None, **mlflow_kwargs
                     expid = exp.experiment_id
                 else:
                     expid = self.mlflow_client.create_experiment(experiment)
-            self.run = self.mlflow_client.create_run(experiment_id = expid, **mlflow_kwargs)
+            self.run = self.mlflow_client.create_run(experiment_id=expid, **mlflow_kwargs)
 
         self.run_id = self.run.info.run_id
         self._countercache = {}
 
@@ -1,5 +1,6 @@
 import ray
 from ray.util.metrics import Counter, Gauge
+
 from dplutils.observer import Observer
 
 
@@ -15,6 +16,7 @@ class RayActorWrappedObserver(Observer):
         *args: Args to pass to ``cls`` instantiation
         **kwargs: Keyword args to pass to ``cls`` instantiation
     """
+
     def __init__(self, cls, *args, **kwargs):
         self.actor = ray.remote(cls).remote(*args, **kwargs)
         self._wait = False  # for testing purposes. If true wait instead of fire-and-forget
@@ -41,14 +43,15 @@ class RayMetricsObserver(Observer):
     objects, this can be used directly having copies per worker (so does not
     need to be wrapped in actor).
     """
+
     def __init__(self):
         self.mmap = {}
 
     def _get_or_set_as(self, name, kind):
         if name in self.mmap:
             metric = self.mmap[name]
             if not isinstance(metric, kind):
-                raise TypeError(f'setting metric requires {kind}, but {name} is {type(metric)}')
+                raise TypeError(f"setting metric requires {kind}, but {name} is {type(metric)}")
         else:
             metric = kind(name)
             self.mmap[name] = metric
 
@@ -1,5 +1,5 @@
-from .task import PipelineTask
-from .executor import PipelineExecutor, OutputBatch
+from .executor import OutputBatch, PipelineExecutor
 from .graph import PipelineGraph
+from .task import PipelineTask
 
-__all__ = ['PipelineTask', 'PipelineExecutor', 'OutputBatch', 'PipelineGraph']
+__all__ = ["PipelineTask", "PipelineExecutor", "OutputBatch", "PipelineGraph"]
@@ -1,12 +1,14 @@
 import uuid
-import pandas as pd
-import yaml
 from abc import ABC, abstractmethod
+from collections.abc import Iterable
 from copy import deepcopy
 from dataclasses import dataclass
 from pathlib import Path
 from typing import Any
-from collections.abc import Iterable
+
+import pandas as pd
+import yaml
+
 from dplutils.pipeline.graph import PipelineGraph
 from dplutils.pipeline.utils import dict_from_coord
 
@@ -27,6 +29,7 @@ class PipelineExecutor(ABC):
     to execute the pipeline and return and generator of dataframes of the final
     tasks in the graph.
     """
+
     def __init__(self, graph: PipelineGraph):
         if isinstance(graph, list):
             self.graph = PipelineGraph(deepcopy(graph))
@@ -36,21 +39,21 @@ def __init__(self, graph: PipelineGraph):
         self._run_id = None
 
     @classmethod
-    def from_graph(cls, graph: PipelineGraph) -> 'PipelineExecutor':
+    def from_graph(cls, graph: PipelineGraph) -> "PipelineExecutor":
         return cls(graph)
 
     @property
     def tasks_idx(self):  # for back compat
         return self.graph.task_map
 
-    def set_context(self, key, value) -> 'PipelineExecutor':
+    def set_context(self, key, value) -> "PipelineExecutor":
         self.ctx[key] = value
         return self
 
-    def set_config_from_dict(self, config) -> 'PipelineExecutor':
+    def set_config_from_dict(self, config) -> "PipelineExecutor":
         for task_name, confs in config.items():
             if task_name not in self.tasks_idx:
-                raise ValueError(f'no such task: {task_name}')
+                raise ValueError(f"no such task: {task_name}")
             for key, value in confs.items():
                 task = self.tasks_idx[task_name]
                 task_val = getattr(task, key)
@@ -61,11 +64,11 @@ def set_config_from_dict(self, config) -> 'PipelineExecutor':
         return self
 
     def set_config(
-            self,
-            coord: str|dict|None = None,
-            value: Any|None = None,
-            from_yaml: str|Path|None = None,
-    ) -> 'PipelineExecutor':
+        self,
+        coord: str | dict | None = None,
+        value: Any | None = None,
+        from_yaml: str | Path | None = None,
+    ) -> "PipelineExecutor":
         """Set task configuration options for this instance.
 
         This applies configurations to :class:`PipelineTask
@@ -90,8 +93,8 @@ def set_config(
         """
         if coord is None:
             if from_yaml is None:
-                raise ValueError('one of dict/string coordinate and value/file input is required')
-            with open(from_yaml, 'r') as f:
+                raise ValueError("one of dict/string coordinate and value/file input is required")
+            with open(from_yaml, "r") as f:
                 return self.set_config_from_dict(yaml.load(f, yaml.SafeLoader))
         if isinstance(coord, dict):
             return self.set_config_from_dict(coord)
@@ -106,7 +109,7 @@ def validate(self) -> None:
             except ValueError as e:
                 excs.append(str(e))
         if len(excs) > 0:
-            raise ValueError('Errors in validation:\n    - ' + '\n    - '.join(excs))
+            raise ValueError("Errors in validation:\n    - " + "\n    - ".join(excs))
 
     @property
     def run_id(self) -> str:
@@ -147,7 +150,9 @@ def run(self) -> Iterable[OutputBatch]:
         self._run_id = None  # force reallocation
         return self.execute()
 
-    def writeto(self, outdir: Path|str, partition_by_task: bool|None = None, task_partition_name: str = 'task') -> None:
+    def writeto(
+        self, outdir: Path | str, partition_by_task: bool | None = None, task_partition_name: str = "task"
+    ) -> None:
         """Run pipeline, writing results to parquet table.
 
         args:
@@ -166,10 +171,10 @@ def writeto(self, outdir: Path|str, partition_by_task: bool|None = None, task_pa
         Path(outdir).mkdir(parents=True, exist_ok=True)
         for c, batch in enumerate(self.run()):
             if partition_by_task:
-                part_name = batch.task or '__HIVE_DEFAULT_PARTITION__'
-                part_path = Path(outdir) / f'{task_partition_name}={part_name}'
+                part_name = batch.task or "__HIVE_DEFAULT_PARTITION__"
+                part_path = Path(outdir) / f"{task_partition_name}={part_name}"
                 part_path.mkdir(exist_ok=True)
-                outfile = part_path / f'{self.run_id}-{c}.parquet'
+                outfile = part_path / f"{self.run_id}-{c}.parquet"
             else:
-                outfile = Path(outdir) / f'{self.run_id}-{c}.parquet'
+                outfile = Path(outdir) / f"{self.run_id}-{c}.parquet"
             batch.data.to_parquet(outfile, index=False)
@@ -1,11 +1,13 @@
 from enum import Enum
-from networkx import DiGraph, path_graph, all_simple_paths, is_directed_acyclic_graph, bfs_edges
+
+from networkx import DiGraph, all_simple_paths, bfs_edges, is_directed_acyclic_graph, path_graph
+
 from dplutils.pipeline.task import PipelineTask
 
 
 class TRM(Enum):
-    sink = 'sink'
-    source = 'source'
+    sink = "sink"
+    source = "source"
 
 
 class PipelineGraph(DiGraph):
@@ -18,37 +20,37 @@ class PipelineGraph(DiGraph):
       graph: This is either a list of :class:`PipelineTask` objects representing a
         simple-graph, or anything that is legal input to :class:`networkx.DiGraph`.
     """
+
     def __init__(self, graph=None):
         if isinstance(graph, list) and isinstance(graph[0], PipelineTask):
             graph = path_graph(graph, DiGraph)
         super().__init__(graph)
         if not is_directed_acyclic_graph(self):
-            raise ValueError('cycles detected in graph')
+            raise ValueError("cycles detected in graph")
 
     @property
     def task_map(self):
         return {i.name: i for i in self}
 
     @property
     def source_tasks(self):
-        return [n for n,d in self.in_degree() if d == 0]
+        return [n for n, d in self.in_degree() if d == 0]
 
     @property
     def sink_tasks(self):
-        return [n for n,d in self.out_degree() if d == 0]
+        return [n for n, d in self.out_degree() if d == 0]
 
     def to_list(self):
-        """Return list representation of task iff it is a simple-path graph
-        """
+        """Return list representation of task iff it is a simple-path graph"""
         if len(self.source_tasks) != 1 or len(self.sink_tasks) != 1:
-            raise ValueError('to_list requires a graph with only one start and end task')
+            raise ValueError("to_list requires a graph with only one start and end task")
         source = self.source_tasks[0]
         sink = self.sink_tasks[0]
         if source == sink:
             return [source]
         paths = list(all_simple_paths(self, source, sink))
         if len(paths) != 1:
-            raise ValueError('to_list requires a single path from start to end task, found {len(paths)}')
+            raise ValueError("to_list requires a single path from start to end task, found {len(paths)}")
         return paths[0]
 
     def with_terminals(self):
@@ -59,11 +61,13 @@ def with_terminals(self):
 
     def _walk(self, source, back=False, sort_key=None):
         graph = self.with_terminals()
+
         # doubly wrap the sort key function for conveneince (since bfs search
         # takes list, not sort key) and to inject the ignoring of terminal
         # nodes. This makes the walk sort key behave a bit more like `sorted()`
         def _sort_key(x):
             return 0 if isinstance(x, TRM) else sort_key(x)
+
         sorter = (lambda x: sorted(x, key=_sort_key)) if sort_key else None
         for _, node in bfs_edges(graph, source, reverse=back, sort_neighbors=sorter):
             if not isinstance(node, TRM):