Skip to content

Commit f350ef8

Browse files
committed
Replace dependency on tar on system with tarfile module
1 parent dda9909 commit f350ef8

File tree

2 files changed

+34
-16
lines changed

2 files changed

+34
-16
lines changed

examples/example_import_ERA5.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,13 @@
11
from metocean_api import ts
22

33
# Define TimeSeries-object
4-
df_ts = ts.TimeSeries(lon=109.94, lat=15.51,start_time='2000-01-01', end_time='2000-12-31' , product='ERA5',
4+
df_ts = ts.TimeSeries(lon=109.94, lat=15.51,start_time='2000-01-01', end_time='2000-12-31' , product='ERA5',
55
variable=[ 'significant_height_of_combined_wind_waves_and_swell',
66
'mean_wave_direction',
77
'peak_wave_period'])
88

99

10-
#df_ts = ts.TimeSeries(lon=6, lat=55.7,start_time='2012-01-01', end_time='2012-12-31' , product='GTSM',
10+
#df_ts = ts.TimeSeries(lon=6, lat=55.7,start_time='2012-01-01', end_time='2012-01-31' , product='GTSM',
1111
# variable=['storm_surge_residual','tidal_elevation','total_water_level'])
1212

1313
# list of wind and wave parameters in ERA5:
@@ -20,7 +20,7 @@
2020
# ]
2121

2222
# Import data from thredds.met.no and save it as csv
23-
df_ts.import_data(save_csv=True)
23+
df_ts.import_data(save_csv=True,use_cache=True)
2424

2525
# Load data from a local csv-file
2626
#df_ts.load_data(local_file=df_ts.datafile)

metocean_api/ts/internal/ec/ec_products.py

Lines changed: 31 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
from __future__ import annotations
22
from typing import TYPE_CHECKING, Tuple, List
33
import os
4-
import subprocess
4+
import tarfile
55
import pandas as pd
66
import xarray as xr
77
import numpy as np
@@ -99,7 +99,7 @@ def __download_era5_from_cds(self,start_time, end_time, lon, lat, variable, fol
9999
Downloads ERA5 data from the Copernicus Climate Data Store for a
100100
given point and time period
101101
"""
102-
import cdsapi
102+
import cdsapi #Optional dependency
103103
start_time = pd.Timestamp(start_time)
104104
end_time = pd.Timestamp(end_time)
105105
c = cdsapi.Client()
@@ -164,19 +164,15 @@ def import_data(self, ts: TimeSeries, save_csv=True, save_nc=False, use_cache=Fa
164164
"tidal_elevation",
165165
"total_water_level",
166166
]
167-
filenames = self.__download_gtsm_from_cds(ts.start_time, ts.end_time, ts.variable, folder='cache')
167+
filenames = self.__download_gtsm_from_cds(ts.start_time, ts.end_time, ts.variable, folder='cache',use_cache=use_cache)
168168

169169
if not isinstance(filenames, list):
170170
filenames = [filenames]
171171

172172
all_nc_files = []
173173
for filename in filenames:
174-
temppath = os.path.dirname(filename)
175174
# Unpack the tar.gz file.
176-
nc_files = subprocess.run(['tar', '-ztf', filename], stdout=subprocess.PIPE, check=True).stdout.decode('utf-8').split('\n')[0:-1]
177-
nc_files = sorted([ff.strip('\r') for ff in nc_files])
178-
subprocess.run(['tar', '-xzvf', filename, '--directory', temppath], stdout=subprocess.PIPE, check=True) # Extract tar file
179-
all_nc_files.extend([os.path.join(temppath, file) for file in nc_files])
175+
all_nc_files.extend(self.__unpack_files(filename,use_cache))
180176

181177
# Open multiple netCDF files as a single xarray dataset
182178
with xr.open_mfdataset(all_nc_files) as ds:
@@ -193,19 +189,37 @@ def import_data(self, ts: TimeSeries, save_csv=True, save_nc=False, use_cache=Fa
193189

194190
return df
195191

196-
def __download_gtsm_from_cds(self,start_time, end_time, variable, folder='cache') -> str:
192+
def __unpack_files(self, filename,use_cache) -> List[str]:
193+
temppath = os.path.dirname(filename)
194+
with tarfile.open(filename, "r:*") as tar:
195+
try:
196+
files_from_tar = []
197+
for file in tar.getmembers():
198+
if file.name.endswith(".nc"):
199+
nc_file = os.path.join(temppath, file.name)
200+
if use_cache and os.path.exists(nc_file):
201+
print(f"Reusing cached file {nc_file}")
202+
else:
203+
print(f"Extracting {file.name} from {filename}")
204+
tar.extract(file, path=temppath)
205+
files_from_tar.append(nc_file)
206+
return files_from_tar
207+
except Exception as e:
208+
# Seen with corrupted tar files from CDS
209+
raise RuntimeError(f"Error extracting {filename}: {e}, consider deleting the file to download it again") from e
210+
211+
def __download_gtsm_from_cds(self,start_time, end_time, variable, folder='cache',use_cache=True) -> List[str]:
197212
"""
198213
Downloads GTSM model water level data from the Copernicus Climate Data Store for a
199214
given point and time period
200215
"""
201-
import cdsapi
216+
import cdsapi #Optional dependency
202217
filename = []
203218
filename_list = []
204219
start_time = pd.Timestamp(start_time)
205220
end_time = pd.Timestamp(end_time)
206221
c = cdsapi.Client()
207222

208-
209223
days = pd.date_range(start=start_time , end=end_time, freq='D')
210224
years = days.year
211225
years = years.unique()
@@ -241,8 +255,12 @@ def __download_gtsm_from_cds(self,start_time, end_time, variable, folder='cache
241255

242256
}
243257
print('Download variable:',var, year)
244-
c.retrieve('sis-water-level-change-timeseries-cmip6', cds_command, filename)
245-
filename_list.append(filename)
258+
if use_cache and os.path.exists(filename):
259+
print(f'Reusing cached file {filename}')
260+
else:
261+
print(f'Download variable {var} for year {year}')
262+
c.retrieve('sis-water-level-change-timeseries-cmip6', cds_command, filename)
263+
filename_list.append(filename)
246264
return filename_list
247265

248266
def download_temporary_files(self, ts: TimeSeries, use_cache: bool = False) -> Tuple[List[str], float, float]:

0 commit comments

Comments
 (0)