3
3
import datetime
4
4
import json
5
5
import logging
6
+ import re
6
7
import time
7
8
import typing
8
- import urllib .error
9
9
from pathlib import Path
10
10
from typing import Dict , List , Optional , Union
11
- from urllib .error import HTTPError
12
11
13
12
import requests
14
13
import shutil
35
34
from openeo .rest .models .logs import LogEntry , log_level_name , normalize_log_level
36
35
from openeo .util import ensure_dir
37
36
38
- MAX_RETRIES_DOWNLOAD = 3
39
-
40
37
if typing .TYPE_CHECKING :
41
38
# Imports for type checking only (circular import issue at runtime).
42
39
from openeo .rest .connection import Connection
45
42
46
43
47
44
DEFAULT_JOB_RESULTS_FILENAME = "job-results.json"
48
-
45
+ MAX_RETRIES_PER_CHUNK = 3
46
+ RETRIABLE_STATUSCODES = [408 , 429 , 500 , 501 , 502 , 503 , 504 ]
49
47
50
48
class BatchJob :
51
49
"""
@@ -407,40 +405,9 @@ def download(
407
405
target = target / self .name
408
406
ensure_dir (target .parent )
409
407
logger .info ("Downloading Job result asset {n!r} from {h!s} to {t!s}" .format (n = self .name , h = self .href , t = target ))
410
- self . _download_chunked (target , chunk_size )
408
+ _download_chunked (self . href , target , chunk_size )
411
409
return target
412
410
413
- def _download_chunked (self , target : Path , chunk_size : int ):
414
- file_size = None
415
- try :
416
- head = requests .head (self .href , stream = True )
417
- if head .ok :
418
- file_size = int (head .headers ['Content-Length' ])
419
- else :
420
- head .raise_for_status ()
421
- with target .open ('wb' ) as f :
422
- for from_byte_index in range (0 , file_size , chunk_size ):
423
- to_byte_index = min (from_byte_index + chunk_size - 1 , file_size - 1 )
424
- tries_left = MAX_RETRIES_DOWNLOAD
425
- while tries_left > 0 :
426
- try :
427
- range_headers = {"Range" : f"bytes={ from_byte_index } -{ to_byte_index } " }
428
- with requests .get (self .href , headers = range_headers , stream = True ) as r :
429
- if r .ok :
430
- shutil .copyfileobj (r .raw , f )
431
- break
432
- else :
433
- r .raise_for_status ()
434
- except requests .exceptions .HTTPError as error :
435
- tries_left -= 1
436
- if tries_left < 1 :
437
- raise error
438
- else :
439
- logger .warning (f"Failed to retrieve chunk { from_byte_index } -{ to_byte_index } from { self .href } (status { error .response .status_code } ) - retrying" )
440
- continue
441
- except requests .exceptions .HTTPError as http_error :
442
- raise OpenEoApiPlainError (message = f"Failed to download { self .href } " , http_status_code = http_error .response .status_code , error_message = http_error .response .text )
443
-
444
411
def _get_response (self , stream = True ) -> requests .Response :
445
412
return self .job .connection .get (self .href , stream = stream )
446
413
@@ -457,6 +424,51 @@ def load_bytes(self) -> bytes:
457
424
# TODO: more `load` methods e.g.: load GTiff asset directly as numpy array
458
425
459
426
427
+ def _download_chunked (url : str , target : Path , chunk_size : int ):
428
+ try :
429
+ file_size = _determine_content_length (url )
430
+ with target .open ('wb' ) as f :
431
+ for from_byte_index in range (0 , file_size , chunk_size ):
432
+ to_byte_index = min (from_byte_index + chunk_size - 1 , file_size - 1 )
433
+ tries_left = MAX_RETRIES_PER_CHUNK
434
+ while tries_left > 0 :
435
+ try :
436
+ range_headers = {"Range" : f"bytes={ from_byte_index } -{ to_byte_index } " }
437
+ with requests .get (url , headers = range_headers , stream = True ) as r :
438
+ if r .ok :
439
+ shutil .copyfileobj (r .raw , f )
440
+ break
441
+ else :
442
+ r .raise_for_status ()
443
+ except requests .exceptions .HTTPError as error :
444
+ tries_left -= 1
445
+ if tries_left > 0 and error .response .status_code in RETRIABLE_STATUSCODES :
446
+ logger .warning (f"Failed to retrieve chunk { from_byte_index } -{ to_byte_index } from { url } (status { error .response .status_code } ) - retrying" )
447
+ continue
448
+ else :
449
+ raise error
450
+ except requests .exceptions .HTTPError as http_error :
451
+ raise OpenEoApiPlainError (message = f"Failed to download { url } " , http_status_code = http_error .response .status_code , error_message = http_error .response .text )
452
+
453
+
454
+ def _determine_content_length (url : str ) -> int :
455
+ range_0_0_response = requests .get (url , headers = {"Range" : f"bytes=0-0" })
456
+ if range_0_0_response .status_code == 206 :
457
+ content_range_header = range_0_0_response .headers .get ("Content-Range" )
458
+ match = re .match (r"^bytes \d+-\d+/(\d+)$" , content_range_header )
459
+ if match :
460
+ return int (match .group (1 ))
461
+
462
+ content_range_prefix = "bytes 0-0/"
463
+ if content_range_header .startswith (content_range_prefix ):
464
+ return int (content_range_header [len (content_range_prefix ):])
465
+ head = requests .head (url , stream = True )
466
+ if head .ok :
467
+ return int (head .headers ['Content-Length' ])
468
+ else :
469
+ head .raise_for_status ()
470
+
471
+
460
472
class MultipleAssetException (OpenEoClientException ):
461
473
pass
462
474
0 commit comments