Open-EO
diff --git a/‎openeo/extra/job_management/__init__.py‎
Lines changed: 30 additions & 38 deletions b/‎openeo/extra/job_management/__init__.py‎
Lines changed: 30 additions & 38 deletions
diff --git a/‎openeo/extra/job_management/_thread_worker.py‎
Lines changed: 27 additions & 25 deletions b/‎openeo/extra/job_management/_thread_worker.py‎
Lines changed: 27 additions & 25 deletions
@@ -33,9 +33,10 @@
 from urllib3.util import Retry
 
 from openeo import BatchJob, Connection
-from openeo.extra.job_management._thread_worker import ( _JobManagerWorkerThreadPool,
-                                                         _JobStartTask)
-
+from openeo.extra.job_management._thread_worker import (
+    _JobManagerWorkerThreadPool,
+    _JobStartTask,
+)
 from openeo.internal.processes.parse import (
     Parameter,
     Process,
@@ -527,8 +528,7 @@ def run_jobs(
             time.sleep(self.poll_sleep)
             stats["sleep"] += 1
 
-        
-        # TODO; run post process after shutdown once more to ensure completion? 
+        # TODO; run post process after shutdown once more to ensure completion?
         self._worker_pool.shutdown()
 
         return stats
@@ -571,7 +571,7 @@ def _job_update_loop(
                         total_added += 1
 
         self._process_threadworker_updates(self._worker_pool, job_db, stats)
-        
+
         # TODO: move this back closer to the `_track_statuses` call above, once job done/error handling is also handled in threads?
         for job, row in jobs_done:
             self.on_job_done(job, row)
@@ -644,7 +644,7 @@ def _launch_job(self, start_job, df, i, backend_name, stats: Optional[dict] = No
                             )
                             _log.info(f"Submitting task {task} to thread pool")
                             self._worker_pool.submit_task(task)
-                            
+
                             stats["job_queued_for_start"] += 1
                             df.loc[i, "status"] = "queued_for_start"
                         except OpenEoApiError as e:
@@ -660,59 +660,55 @@ def _process_threadworker_updates(
         self,
         worker_pool: _JobManagerWorkerThreadPool,
         job_db: JobDatabaseInterface,
-        stats: dict
+        stats: dict,
     ) -> None:
-        """Processes asynchronous job updates from worker threads and applies them to the job database and statistics.
-        
+        """
+        Processes asynchronous job updates from worker threads and applies them to the job database and statistics.
+
         This wrapper function is responsible for:
         1. Collecting completed results from the worker thread pool
         2. applying database updates for each job result
         3. applying statistics updates
         4. Handles errors with comprehensive logging
-        
+
         :param worker_pool:
             Thread pool instance managing the asynchronous job operations.
             Should provide a `process_futures()` method returning completed job results.
-            
+
         :param job_db:
             Job database implementing the :py:class:`JobDatabaseInterface` interface.
             Used to persist job status updates and metadata.
             Must support the `_update_row(job_id: str, updates: dict)` method.
-            
+
         :param stats:
             Dictionary tracking operational statistics that will be updated in-place.
             Expected to handle string keys with integer values.
             Statistics will be updated with counts from completed job results.
-            
-        :return: 
+
+        :return:
             None: All updates are applied in-place to the job_db and stats parameters.
-.
         """
         results = worker_pool.process_futures()
         stats_updates = collections.defaultdict(int)
-        
-        for result in results: 
+
+        for result in results:
             try:
                 # Handle job database updates
                 if result.db_update:
                     _log.debug(f"Processing update for job {result.job_id}")
                     job_db._update_row(job_id=result.job_id, updates=result.db_update)
-                
+
                 # Aggregate statistics updates
                 if result.stats_update:
                     for key, count in result.stats_update.items():
                         stats_updates[key] += int(count)
-                        
-    
+
             except Exception as e:
-                _log.error(
-                    f"Failed aggregating the updates for update for job {result.job_id}: {str(e)}")
-        
+                _log.error(f"Failed aggregating the updates for update for job {result.job_id}: {str(e)}")
+
         # Apply all stat updates
         for key, count in stats_updates.items():
             stats[key] = stats.get(key, 0) + count
-                
-
 
     def on_job_done(self, job: BatchJob, row):
         """
@@ -877,6 +873,7 @@ def _track_statuses(self, job_db: JobDatabaseInterface, stats: Optional[dict] =
 
         return jobs_done, jobs_error, jobs_cancel
 
+
 def _format_usage_stat(job_metadata: dict, field: str) -> str:
     value = deep_get(job_metadata, "usage", field, "value", default=0)
     unit = deep_get(job_metadata, "usage", field, "unit", default="")
@@ -986,29 +983,29 @@ def _update_row(self, job_id: str, updates: dict):
         # Create boolean mask for target row
         mask = self._df["id"] == job_id
         match_count = mask.sum()
-        
+
         # Handle row identification issues
-        #TODO: make this more robust, e.g. falling back on the row index?
+        # TODO: make this more robust, e.g. falling back on the row index?
         if match_count == 0:
             _log.error(f"Job {job_id!r} not found in database")
             return
         if match_count > 1:
             _log.error(f"Duplicate job ID {job_id!r} found in database")
             return
 
-        # Get valid columns 
-        valid_columns = set(self._df.columns)  
+        # Get valid columns
+        valid_columns = set(self._df.columns)
         filtered_updates = {}
-        
+
         # Validate update keys s
         for key, value in updates.items():
             if key in valid_columns:
                 filtered_updates[key] = value
             else:
                 _log.warning(f"Ignoring invalid column {key!r} in update for job {job_id}")
 
-        # Bulk update 
-        if  not filtered_updates:
+        # Bulk update
+        if not filtered_updates:
             return
         try:
             # Update all columns in a single operation
@@ -1017,9 +1014,6 @@ def _update_row(self, job_id: str, updates: dict):
         except Exception as e:
             _log.error(f"Failed to persist row update for job {job_id}: {e}")
 
-    
-
-
 
 class CsvJobDatabase(FullDataFrameJobDatabase):
     """
@@ -1075,8 +1069,6 @@ def persist(self, df: pd.DataFrame):
         self.path.parent.mkdir(parents=True, exist_ok=True)
         self.df.to_csv(self.path, index=False)
 
-    
-
 
 class ParquetJobDatabase(FullDataFrameJobDatabase):
     """
 
@@ -1,13 +1,14 @@
 import concurrent.futures
 import logging
-from dataclasses import dataclass, field
-from typing import Optional, Any, List, Dict, Tuple
-import openeo
 from abc import ABC, abstractmethod
+from dataclasses import dataclass, field
+from typing import Any, Dict, List, Optional, Tuple
 
+import openeo
 
 _log = logging.getLogger(__name__)
 
+
 @dataclass
 class _TaskResult:
     """
@@ -25,10 +26,12 @@ class _TaskResult:
         Optional dictionary capturing statistical counters or metrics,
         e.g., number of successful starts or errors. Defaults to an empty dict.
     """
+
     job_id: str  # Mandatory
     db_update: Dict[str, Any] = field(default_factory=dict)  # Optional
     stats_update: Dict[str, int] = field(default_factory=dict)  # Optional
 
+
 class Task(ABC):
     """
     Abstract base class for asynchronous tasks.
@@ -38,12 +41,13 @@ class Task(ABC):
 
     Implementations must override the `execute` method to define the task logic.
     """
-    
+
     @abstractmethod
     def execute(self) -> _TaskResult:
         """Execute the task and return a raw result"""
         pass
-    
+
+
 @dataclass
 class _JobStartTask(Task):
     """
@@ -75,10 +79,10 @@ class _JobStartTask(Task):
     :raises ValueError:
         If any of the input parameters are invalid (e.g., empty strings).
     """
+
     job_id: str
     root_url: str
     bearer_token: Optional[str]
-    
 
     def __post_init__(self) -> None:
         # Validation remains unchanged
@@ -115,10 +119,10 @@ def execute(self) -> _TaskResult:
         except Exception as e:
             _log.error(f"Failed to start job {self.job_id}: {e}")
             return _TaskResult(
-                job_id=self.job_id,
-                db_update={"status": "start_failed"},  
-                stats_update={"start_job error": 1})
-        
+                job_id=self.job_id, db_update={"status": "start_failed"}, stats_update={"start_job error": 1}
+            )
+
+
 class _JobManagerWorkerThreadPool:
     """
     Thread pool-based worker that manages the execution of asynchronous tasks.
@@ -130,6 +134,7 @@ class _JobManagerWorkerThreadPool:
         Maximum number of concurrent threads to use for execution.
         Defaults to 2.
     """
+
     def __init__(self, max_workers: int = 2):
         self._executor = concurrent.futures.ThreadPoolExecutor(max_workers=max_workers)
         self._future_task_pairs: List[Tuple[concurrent.futures.Future, Task]] = []
@@ -147,7 +152,7 @@ def submit_task(self, task: Task) -> None:
         future = self._executor.submit(task.execute)
         self._future_task_pairs.append((future, task))  # Track pairs
 
-    def process_futures(self) -> List[ _TaskResult]:
+    def process_futures(self) -> List[_TaskResult]:
         """
         Process and retrieve results from completed tasks.
 
@@ -157,34 +162,31 @@ def process_futures(self) -> List[ _TaskResult]:
         :returns:
             A list of `_TaskResult` objects from completed tasks.
         """
-        results = []  
-        to_keep = [] 
+        results = []
+        to_keep = []
 
         # Use timeout=0 to avoid blocking and check for completed futures
         done, _ = concurrent.futures.wait(
-            [f for f, _ in self._future_task_pairs], timeout=0,
-            return_when=concurrent.futures.FIRST_COMPLETED
+            [f for f, _ in self._future_task_pairs], timeout=0, return_when=concurrent.futures.FIRST_COMPLETED
         )
 
         # Process completed futures and their tasks
         for future, task in self._future_task_pairs:
             if future in done:
                 try:
                     result = future.result()
-                    
-                except Exception as e:
 
+                except Exception as e:
                     _log.exception(f"Error processing task: {e}")
-                    result =  _TaskResult(
-                                job_id=task.job_id,
-                                db_update={"status": "start_failed"},  
-                                stats_update={"start_job error": 1})
-                    
+                    result = _TaskResult(
+                        job_id=task.job_id, db_update={"status": "start_failed"}, stats_update={"start_job error": 1}
+                    )
+
                 results.append(result)
-            else:  
-                to_keep.append((future, task))  
+            else:
+                to_keep.append((future, task))
 
-        self._future_task_pairs = to_keep  
+        self._future_task_pairs = to_keep
         return results
 
     def shutdown(self) -> None: