We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Scenario ID: eurac_pv_farm_detection Backend System: openeofed.dataspace.copernicus.eu Failure Count: 1 Timestamp: 2025-04-21 02:30:41
Links:
Point of Contact:
{ "pvfarm": { "process_id": "eurac_pv_farm_detection", "namespace": "https://raw.githubusercontent.com/ESA-APEx/apex_algorithms/refs/heads/main/algorithm_catalog/eurac/eurac_pv_farm_detection/openeo_udp/eurac_pv_farm_detection.json", "arguments": { "bbox": { "east": 16.414, "north": 48.008, "south": 47.962, "west": 16.342 }, "temporal_extent": [ "2023-05-01", "2023-09-30" ] }, "result": true } }
scenario = BenchmarkScenario(id='eurac_pv_farm_detection', description='ML photovoltaic farm detection, developed by EURAC', back...al_extent': ['2023-05-01', '2023-09-30']}, 'result': True}}, job_options=None, reference_data={}, reference_options={}) connection_factory = <function connection_factory.<locals>.get_connection at 0x7f9e0407b920> tmp_path = PosixPath('/home/runner/work/apex_algorithms/apex_algorithms/qa/benchmarks/tmp_path_root/test_run_benchmark_eurac_pv_fa0') track_metric = <function track_metric.<locals>.append at 0x7f9e0407bba0> upload_assets_on_fail = <function upload_assets_on_fail.<locals>.collect at 0x7f9e0407b880> request = <FixtureRequest for <Function test_run_benchmark[eurac_pv_farm_detection]>> @pytest.mark.parametrize( "scenario", [ # Use scenario id as parameterization id to give nicer test names. pytest.param(uc, id=uc.id) for uc in get_benchmark_scenarios() ], ) def test_run_benchmark( scenario: BenchmarkScenario, connection_factory, tmp_path: Path, track_metric, upload_assets_on_fail, request ): track_metric("scenario_id", scenario.id) # Check if a backend override has been provided via cli options. override_backend = request.config.getoption("--override-backend") backend = scenario.backend if override_backend: _log.info(f"Overriding backend URL with {override_backend!r}") backend = override_backend connection: openeo.Connection = connection_factory(url=backend) # TODO #14 scenario option to use synchronous instead of batch job mode? job = connection.create_job( process_graph=scenario.process_graph, title=f"APEx benchmark {scenario.id}", additional=scenario.job_options, ) track_metric("job_id", job.job_id) # TODO: monitor timing and progress # TODO: abort excessively long batch jobs? https://github.com/Open-EO/openeo-python-client/issues/589 > job.start_and_wait() tests/test_benchmarks.py:56: _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ self = <BatchJob job_id='cdse-j-25042102214744579d368bfd182ed7e3'> print = <built-in function print>, max_poll_interval = 60 connection_retry_interval = 30, soft_error_max = 10, show_error_logs = True def start_and_wait( self, print=print, max_poll_interval: float = DEFAULT_JOB_STATUS_POLL_INTERVAL_MAX, connection_retry_interval: float = DEFAULT_JOB_STATUS_POLL_CONNECTION_RETRY_INTERVAL, soft_error_max: int = DEFAULT_JOB_STATUS_POLL_SOFT_ERROR_MAX, show_error_logs: bool = True, ) -> BatchJob: """ Start the batch job, poll its status and wait till it finishes (or fails) :param print: print/logging function to show progress/status :param max_poll_interval: maximum number of seconds to sleep between job status polls :param connection_retry_interval: how long to wait when status poll failed due to connection issue :param soft_error_max: maximum number of soft errors (e.g. temporary connection glitches) to allow :param show_error_logs: whether to automatically print error logs when the batch job failed. :return: Handle to the job created at the backend. .. versionchanged:: 0.37.0 Added argument ``show_error_logs``. """ # TODO rename `connection_retry_interval` to something more generic? start_time = time.time() def elapsed() -> str: return str(datetime.timedelta(seconds=time.time() - start_time)).rsplit(".")[0] def print_status(msg: str): print("{t} Job {i!r}: {m}".format(t=elapsed(), i=self.job_id, m=msg)) # TODO: make `max_poll_interval`, `connection_retry_interval` class constants or instance properties? print_status("send 'start'") self.start() # TODO: also add `wait` method so you can track a job that already has started explicitly # or just rename this method to `wait` and automatically do start if not started yet? # Start with fast polling. poll_interval = min(5, max_poll_interval) status = None _soft_error_count = 0 def soft_error(message: str): """Non breaking error (unless we had too much of them)""" nonlocal _soft_error_count _soft_error_count += 1 if _soft_error_count > soft_error_max: raise OpenEoClientException("Excessive soft errors") print_status(message) time.sleep(connection_retry_interval) while True: # TODO: also allow a hard time limit on this infinite poll loop? try: job_info = self.describe() except requests.ConnectionError as e: soft_error("Connection error while polling job status: {e}".format(e=e)) continue except OpenEoApiPlainError as e: if e.http_status_code in [502, 503]: soft_error("Service availability error while polling job status: {e}".format(e=e)) continue else: raise status = job_info.get("status", "N/A") progress = job_info.get("progress") if isinstance(progress, int): progress = f"{progress:d}%" elif isinstance(progress, float): progress = f"{progress:.1f}%" else: progress = "N/A" print_status(f"{status} (progress {progress})") if status not in ('submitted', 'created', 'queued', 'running'): break # Sleep for next poll (and adaptively make polling less frequent) time.sleep(poll_interval) poll_interval = min(1.25 * poll_interval, max_poll_interval) if status != "finished": # TODO: render logs jupyter-aware in a notebook context? if show_error_logs: print(f"Your batch job {self.job_id!r} failed. Error logs:") print(self.logs(level=logging.ERROR)) print( f"Full logs can be inspected in an openEO (web) editor or with `connection.job({self.job_id!r}).logs()`." ) > raise JobFailedException( f"Batch job {self.job_id!r} didn't finish successfully. Status: {status} (after {elapsed()}).", job=self, ) E openeo.rest.JobFailedException: Batch job 'cdse-j-25042102214744579d368bfd182ed7e3' didn't finish successfully. Status: error (after 0:08:50). /opt/hostedtoolcache/Python/3.12.10/x64/lib/python3.12/site-packages/openeo/rest/job.py:350: JobFailedException ----------------------------- Captured stdout call ----------------------------- 0:00:00 Job 'cdse-j-25042102214744579d368bfd182ed7e3': send 'start' 0:00:13 Job 'cdse-j-25042102214744579d368bfd182ed7e3': created (progress 0%) 0:00:19 Job 'cdse-j-25042102214744579d368bfd182ed7e3': running (progress N/A) 0:00:25 Job 'cdse-j-25042102214744579d368bfd182ed7e3': running (progress N/A) 0:00:33 Job 'cdse-j-25042102214744579d368bfd182ed7e3': running (progress N/A) 0:00:44 Job 'cdse-j-25042102214744579d368bfd182ed7e3': running (progress N/A) 0:00:56 Job 'cdse-j-25042102214744579d368bfd182ed7e3': running (progress N/A) 0:01:12 Job 'cdse-j-25042102214744579d368bfd182ed7e3': running (progress N/A) 0:01:31 Job 'cdse-j-25042102214744579d368bfd182ed7e3': running (progress N/A) 0:01:55 Job 'cdse-j-25042102214744579d368bfd182ed7e3': running (progress N/A) 0:02:25 Job 'cdse-j-25042102214744579d368bfd182ed7e3': running (progress N/A) 0:03:03 Job 'cdse-j-25042102214744579d368bfd182ed7e3': running (progress N/A) 0:03:50 Job 'cdse-j-25042102214744579d368bfd182ed7e3': running (progress N/A) 0:04:48 Job 'cdse-j-25042102214744579d368bfd182ed7e3': running (progress N/A) 0:05:48 Job 'cdse-j-25042102214744579d368bfd182ed7e3': running (progress N/A) 0:06:49 Job 'cdse-j-25042102214744579d368bfd182ed7e3': running (progress N/A) 0:07:49 Job 'cdse-j-25042102214744579d368bfd182ed7e3': running (progress N/A) 0:08:49 Job 'cdse-j-25042102214744579d368bfd182ed7e3': error (progress N/A) Your batch job 'cdse-j-25042102214744579d368bfd182ed7e3' failed. Error logs: [{'id': '[1745202588028, 731661]', 'time': '2025-04-21T02:29:48.028Z', 'level': 'error', 'message': 'Task 0 in stage 52.0 failed 4 times; aborting job'}, {'id': '[1745202588044, 836925]', 'time': '2025-04-21T02:29:48.044Z', 'level': 'error', 'message': 'Stage error: Job aborted due to stage failure: Task 0 in stage 52.0 failed 4 times, most recent failure: Lost task 0.3 in stage 52.0 (TID 2495) (10.42.13.135 executor 8): org.apache.spark.api.python.PythonException: Traceback (most recent call last):\n File "/usr/local/spark/python/lib/pyspark.zip/pyspark/worker.py", line 1247, in main\n process()\n File "/usr/local/spark/python/lib/pyspark.zip/pyspark/worker.py", line 1239, in process\n serializer.dump_stream(out_iter, outfile)\n File "/usr/local/spark/python/lib/pyspark.zip/pyspark/serializers.py", line 146, in dump_stream\n for obj in iterator:\n File "/usr/local/spark/python/lib/pyspark.zip/pyspark/util.py", line 83, in wrapper\n return f(*args, **kwargs)\n File "/opt/openeo/lib/python3.8/site-packages/openeogeotrellis/utils.py", line 64, in memory_logging_wrapper\n return function(*args, **kwargs)\n File "/opt/openeo/lib/python3.8/site-packages/epsel.py", line 44, in wrapper\n return _FUNCTION_POINTERS[key](*args, **kwargs)\n File "/opt/openeo/lib/python3.8/site-packages/epsel.py", line 37, in first_time\n return f(*args, **kwargs)\n File "/opt/openeo/lib/python3.8/site-packages/openeogeotrellis/geopysparkdatacube.py", line 791, in tile_function\n result_data = run_udf_code(code=udf_code, data=data)\n File "/opt/openeo/lib/python3.8/site-packages/epsel.py", line 44, in wrapper\n return _FUNCTION_POINTERS[key](*args, **kwargs)\n File "/opt/openeo/lib/python3.8/site-packages/epsel.py", line 37, in first_time\n return f(*args, **kwargs)\n File "/opt/openeo/lib/python3.8/site-packages/openeogeotrellis/udf.py", line 67, in run_udf_code\n return openeo.udf.run_udf_code(code=code, data=data)\n File "/opt/openeo/lib/python3.8/site-packages/openeo/udf/run_code.py", line 195, in run_udf_code\n result_cube: xarray.DataArray = func(cube=data.get_datacube_list()[0].get_array(), context=data.user_context)\n File "<string>", line 106, in apply_datacube\n File "<string>", line 78, in apply_model\n File "<string>", line 21, in load_onnx_model\n File "onnx_deps/onnxruntime/capi/onnxruntime_inference_collection.py", line 419, in __init__\n self._create_inference_session(providers, provider_options, disabled_optimizers)\n File "onnx_deps/onnxruntime/capi/onnxruntime_inference_collection.py", line 452, in _create_inference_session\n sess = C.InferenceSession(session_options, self._model_path, True, self._read_config_from_model)\nonnxruntime.capi.onnxruntime_pybind11_state.NoSuchFile: [ONNXRuntimeError] : 3 : NO_SUCHFILE : Load model from onnx_models/EURAC_pvfarm_rf_1_median_depth_15.onnx failed:Load model onnx_models/EURAC_pvfarm_rf_1_median_depth_15.onnx failed. File doesn\'t exist\n\n\tat org.apache.spark.api.python.BasePythonRunner$ReaderIterator.handlePythonException(PythonRunner.scala:572)\n\tat org.apache.spark.api.python.PythonRunner$$anon$3.read(PythonRunner.scala:784)\n\tat org.apache.spark.api.python.PythonRunner$$anon$3.read(PythonRunner.scala:766)\n\tat org.apache.spark.api.python.BasePythonRunner$ReaderIterator.hasNext(PythonRunner.scala:525)\n\tat org.apache.spark.InterruptibleIterator.hasNext(InterruptibleIterator.scala:37)\n\tat scala.collection.Iterator$$anon$10.hasNext(Iterator.scala:460)\n\tat scala.collection.Iterator$$anon$10.hasNext(Iterator.scala:460)\n\tat scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:491)\n\tat org.apache.spark.shuffle.sort.BypassMergeSortShuffleWriter.write(BypassMergeSortShuffleWriter.java:140)\n\tat org.apache.spark.shuffle.ShuffleWriteProcessor.write(ShuffleWriteProcessor.scala:59)\n\tat org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:104)\n\tat org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:54)\n\tat org.apache.spark.TaskContext.runTaskWithListeners(TaskContext.scala:166)\n\tat org.apache.spark.scheduler.Task.run(Task.scala:141)\n\tat org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$4(Executor.scala:620)\n\tat org.apache.spark.util.SparkErrorUtils.tryWithSafeFinally(SparkErrorUtils.scala:64)\n\tat org.apache.spark.util.SparkErrorUtils.tryWithSafeFinally$(SparkErrorUtils.scala:61)\n\tat org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:94)\n\tat org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:623)\n\tat java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1128)\n\tat java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:628)\n\tat java.base/java.lang.Thread.run(Thread.java:829)\n\nDriver stacktrace:'}, {'id': '[1745202590066, 104502]', 'time': '2025-04-21T02:29:50.066Z', 'level': 'error', 'message': 'OpenEO batch job failed: UDF exception while evaluating processing graph. Please check your user defined functions. stacktrace:\n File "<string>", line 106, in apply_datacube\n File "<string>", line 78, in apply_model\n File "<string>", line 21, in load_onnx_model\n File "onnx_deps/onnxruntime/capi/onnxruntime_inference_collection.py", line 419, in __init__\n self._create_inference_session(providers, provider_options, disabled_optimizers)\n File "onnx_deps/onnxruntime/capi/onnxruntime_inference_collection.py", line 452, in _create_inference_session\n sess = C.InferenceSession(session_options, self._model_path, True, self._read_config_from_model)\nonnxruntime.capi.onnxruntime_pybind11_state.NoSuchFile: [ONNXRuntimeError] : 3 : NO_SUCHFILE : Load model from onnx_models/EURAC_pvfarm_rf_1_median_depth_15.onnx failed:Load model onnx_models/EURAC_pvfarm_rf_1_median_depth_15.onnx failed. File doesn\'t exist'}] Full logs can be inspected in an openEO (web) editor or with `connection.job('cdse-j-25042102214744579d368bfd182ed7e3').logs()`. ------------------------------ Captured log call ------------------------------- INFO conftest:conftest.py:125 Connecting to 'openeofed.dataspace.copernicus.eu' INFO openeo.config:config.py:193 Loaded openEO client config from sources: [] INFO conftest:conftest.py:138 Checking for auth_env_var='OPENEO_AUTH_CLIENT_CREDENTIALS_CDSEFED' to drive auth against url='openeofed.dataspace.copernicus.eu'. INFO conftest:conftest.py:142 Extracted provider_id='CDSE' client_id='openeo-apex-benchmarks-service-account' from auth_env_var='OPENEO_AUTH_CLIENT_CREDENTIALS_CDSEFED' INFO openeo.rest.connection:connection.py:232 Found OIDC providers: ['CDSE'] INFO openeo.rest.auth.oidc:oidc.py:404 Doing 'client_credentials' token request 'https://identity.dataspace.copernicus.eu/auth/realms/CDSE/protocol/openid-connect/token' with post data fields ['grant_type', 'client_id', 'client_secret', 'scope'] (client_id 'openeo-apex-benchmarks-service-account') INFO openeo.rest.connection:connection.py:329 Obtained tokens: ['access_token', 'id_token'] - Generated track_metrics report: report/metrics.json, _ParquetS3StorageSettings(bucket='apex-benchmarks', key='metrics/v1/metrics.parquet') - -------------------- `upload_assets` stats: {'uploaded': 0} -------------------- - tests/test_benchmarks.py::test_run_benchmark[eurac_pv_farm_detection]: - Generated html report: file:///home/runner/work/apex_algorithms/apex_algorithms/qa/benchmarks/report/report.html -
The text was updated successfully, but these errors were encountered:
No branches or pull requests
Benchmark Failure: eurac_pv_farm_detection
Scenario ID: eurac_pv_farm_detection
Backend System: openeofed.dataspace.copernicus.eu
Failure Count: 1
Timestamp: 2025-04-21 02:30:41
Links:
Contact Information
Point of Contact:
Process Graph
Error Logs
The text was updated successfully, but these errors were encountered: