1
1
from __future__ import annotations
2
2
import contextlib
3
- from copy import deepcopy
3
+ import datetime as dt
4
4
import json
5
5
import logging
6
6
import random
7
7
from datetime import datetime , timedelta
8
8
from decimal import Decimal
9
- from pathlib import Path
10
9
from typing import Any , List , Dict , Callable , Union , Optional , Iterator , Tuple
11
10
12
11
import kazoo
@@ -155,9 +154,6 @@ def set_dependencies(self, job_id: str, user_id: str, dependencies: List[Dict[st
155
154
def remove_dependencies (self , job_id : str , user_id : str ):
156
155
self .patch (job_id , user_id , dependencies = None , dependency_status = None )
157
156
158
- def set_results_metadata_uri (self , job_id : str , user_id : str , results_metadata_uri : str ) -> None :
159
- self .patch (job_id , user_id , results_metadata_uri = results_metadata_uri )
160
-
161
157
def patch (
162
158
self , job_id : str , user_id : str , auto_mark_done : bool = True , ** kwargs
163
159
) -> None :
@@ -627,15 +623,15 @@ def create_job(
627
623
"api_version" : api_version ,
628
624
"job_options" : job_options ,
629
625
}
630
- return deepcopy ( self .db [job_id ])
626
+ return self .db [job_id ]
631
627
632
628
def get_job (self , job_id : str , * , user_id : Optional [str ] = None ) -> JobDict :
633
629
job = self .db .get (job_id )
634
630
635
631
if not job or (user_id is not None and job ['user_id' ] != user_id ):
636
632
raise JobNotFoundException (job_id = job_id )
637
633
638
- return deepcopy ( job )
634
+ return job
639
635
640
636
def delete_job (self , job_id : str , * , user_id : Optional [str ] = None ) -> None :
641
637
self .get_job (job_id = job_id , user_id = user_id ) # will raise on job not found
@@ -644,7 +640,7 @@ def delete_job(self, job_id: str, *, user_id: Optional[str] = None) -> None:
644
640
def _update (self , job_id : str , ** kwargs ) -> JobDict :
645
641
assert job_id in self .db
646
642
self .db [job_id ].update (** kwargs )
647
- return deepcopy ( self .db [job_id ])
643
+ return self .db [job_id ]
648
644
649
645
def set_status (
650
646
self ,
@@ -695,10 +691,7 @@ def set_results_metadata(
695
691
usage : dict ,
696
692
results_metadata : Optional [Dict [str , Any ]] = None ,
697
693
) -> None :
698
- if results_metadata :
699
- self ._update (job_id = job_id , costs = costs , usage = usage , results_metadata = results_metadata )
700
- else :
701
- self ._update (job_id = job_id , costs = costs , usage = usage )
694
+ self ._update (job_id = job_id , costs = costs , usage = usage , results_metadata = results_metadata )
702
695
703
696
def set_results_metadata_uri (
704
697
self , job_id : str , * , user_id : Optional [str ] = None , results_metadata_uri : str
@@ -714,7 +707,7 @@ def list_user_jobs(
714
707
request_parameters : Optional [dict ] = None ,
715
708
# TODO #959 settle on returning just `JobListing` and eliminate other options/code paths.
716
709
) -> Union [JobListing , List [JobDict ]]:
717
- jobs = [deepcopy ( job ) for job in self .db .values () if job ["user_id" ] == user_id ]
710
+ jobs = [job for job in self .db .values () if job ["user_id" ] == user_id ]
718
711
if limit :
719
712
pagination_param = "page"
720
713
page_number = int ((request_parameters or {}).get (pagination_param , 0 ))
@@ -741,7 +734,7 @@ def list_active_jobs(
741
734
active = [JOB_STATUS .CREATED , JOB_STATUS .QUEUED , JOB_STATUS .RUNNING ]
742
735
# TODO: implement support for max_age, max_updated_ago, fields
743
736
return [
744
- deepcopy ( job )
737
+ job
745
738
for job in self .db .values ()
746
739
if job ["status" ] in active and (not require_application_id or job .get ("application_id" ) is not None )
747
740
]
@@ -874,64 +867,10 @@ def get_job_metadata(self, job_id: str, user_id: str) -> BatchJobMetadata:
874
867
with contextlib .suppress (JobNotFoundException ):
875
868
ejr_job_info = self .elastic_job_registry .get_job (job_id = job_id , user_id = user_id )
876
869
877
- # TODO: replace with getter once introduced?
878
- results_metadata = self ._load_results_metadata_from_uri (
879
- ejr_job_info .get ("results_metadata_uri" ), job_id
880
- )
881
- if results_metadata is not None :
882
- ejr_job_info ["results_metadata" ] = results_metadata
883
-
884
870
self ._check_zk_ejr_job_info (job_id = job_id , zk_job_info = zk_job_info , ejr_job_info = ejr_job_info )
885
871
job_metadata = zk_job_info_to_metadata (zk_job_info ) if zk_job_info else ejr_job_info_to_metadata (ejr_job_info )
886
872
return job_metadata
887
873
888
- @staticmethod
889
- def _load_results_metadata_from_uri (results_metadata_uri : Optional [str ], job_id : str ) -> Optional [dict ]:
890
- # TODO: reduce code duplication with openeogeotrellis.backend.GpsBatchJobs._load_results_metadata_from_uri
891
- from openeogeotrellis .integrations .s3proxy .asset_urls import PresignedS3AssetUrls
892
- from openeogeotrellis .utils import get_s3_file_contents
893
- import botocore .exceptions
894
- from urllib .parse import urlparse
895
-
896
- if results_metadata_uri is None :
897
- return None
898
-
899
- _log .debug (f"Loading results metadata from URI { results_metadata_uri } " , extra = {"job_id" : job_id })
900
-
901
- uri_parts = urlparse (results_metadata_uri )
902
-
903
- if uri_parts .scheme == "file" :
904
- file_path = Path (uri_parts .path )
905
- try :
906
- with open (file_path ) as f :
907
- return json .load (f )
908
- except FileNotFoundError :
909
- _log .debug (
910
- f"File with results metadata { file_path } does not exist; this is expected and not "
911
- f"an error if the batch job did not have the chance to write it yet." ,
912
- exc_info = True ,
913
- extra = {"job_id" : job_id },
914
- )
915
- return None
916
-
917
- if uri_parts .scheme == "s3" :
918
- bucket , key = PresignedS3AssetUrls .get_bucket_key_from_uri (results_metadata_uri )
919
- try :
920
- return json .loads (get_s3_file_contents (key , bucket ))
921
- except botocore .exceptions .ClientError as e :
922
- if e .response ["Error" ]["Code" ] != "NoSuchKey" :
923
- raise
924
-
925
- _log .debug (
926
- f"Object with results metadata { key } does not exist in bucket { bucket } ; this is "
927
- f"expected and not an error if the batch job did not have the chance to write it yet." ,
928
- exc_info = True ,
929
- extra = {"job_id" : job_id },
930
- )
931
- return None
932
-
933
- raise ValueError (f"Unsupported results metadata URI: { results_metadata_uri } " )
934
-
935
874
def _check_zk_ejr_job_info (self , job_id : str , zk_job_info : Union [dict , None ], ejr_job_info : Union [dict , None ]):
936
875
# TODO #236/#498 For now: compare job metadata between Zk and EJR
937
876
fields = ["job_id" , "status" , "created" ]
@@ -1020,17 +959,6 @@ def set_application_id(self, job_id: str, *, user_id: Optional[str] = None, appl
1020
959
if self .elastic_job_registry :
1021
960
self .elastic_job_registry .set_application_id (job_id = job_id , user_id = user_id , application_id = application_id )
1022
961
1023
- def set_results_metadata_uri (self , job_id : str , * , user_id : Optional [str ] = None , results_metadata_uri : str ):
1024
- if self .zk_job_registry :
1025
- assert user_id , "user_id is required in ZkJobRegistry"
1026
- self .zk_job_registry .set_results_metadata_uri (
1027
- job_id = job_id , user_id = user_id , results_metadata_uri = results_metadata_uri
1028
- )
1029
- if self .elastic_job_registry :
1030
- self .elastic_job_registry .set_results_metadata_uri (
1031
- job_id = job_id , user_id = user_id , results_metadata_uri = results_metadata_uri
1032
- )
1033
-
1034
962
def mark_ongoing (self , job_id : str , user_id : str ) -> None :
1035
963
# TODO #863/#1123 can this method be eliminated (e.g. integrate it directly in ZkJobRegistry.set_status)?
1036
964
if self .zk_job_registry :
@@ -1099,9 +1027,8 @@ def set_results_metadata(
1099
1027
) -> None :
1100
1028
if self .zk_job_registry :
1101
1029
assert user_id , "user_id is required in ZkJobRegistry"
1102
- self .zk_job_registry .patch (
1103
- job_id = job_id , user_id = user_id , ** dict (results_metadata or {}, costs = costs , usage = usage )
1104
- )
1030
+ self .zk_job_registry .patch (job_id = job_id , user_id = user_id ,
1031
+ ** dict (results_metadata , costs = costs , usage = usage ))
1105
1032
1106
1033
if self .elastic_job_registry :
1107
1034
self .elastic_job_registry .set_results_metadata (
0 commit comments