Skip to content

FileNotFoundError when downloading LLC4320 grid file (link works in browser) #356

@RanFeng2

Description

@RanFeng2

Hi, thank you for maintaining this project! 🙏
While downloading hourly LLC4320 data with ECCOPortalLLC4320Model, I consistently hit a
FileNotFoundError for the grid file hFacW.data, even though I can successfully download the same URL from a web browser.

It looks like the HTTP request inside xmitgcm fails with a 502 Bad Gateway, and then raises FileNotFoundError, but the file is actually present on the ECCO server.

from xmitgcm.llcreader import ECCOPortalLLC4320Model
import os
from datetime import datetime, timedelta
from tqdm import tqdm

#! ==== STEP 1: Define the model ====
model = ECCOPortalLLC4320Model()

#! ==== STEP 2: Get the dataset ====
save_root = r'D:\LLC4320'
os.makedirs(os.path.join(save_root, 'U'), exist_ok=True)
os.makedirs(os.path.join(save_root, 'V'), exist_ok=True)
os.makedirs(os.path.join(save_root, 'W'), exist_ok=True)

delta_t = model.delta_t                                                             
time_step = model.iter_step                                                         
seconds_per_step = delta_t * time_step                                              
time_start = model.iter_start                                                       # 10368
time_end = model.iter_stop                                                          # 1495153

time_unit = model.time_units                                                        # 'seconds since 2011-09-10'
start_time_str = time_unit.split('since')[1].strip()
start_date = datetime.strptime(start_time_str, '%Y-%m-%d')                          # 2011-09-10

for iter_index in tqdm(range(time_start, time_end, time_step), desc='Downloading hourly LLC4320'):
    time_seconds = (iter_index - time_start) * delta_t
    time_str = (start_date + timedelta(seconds=time_seconds)).strftime('%Y%m%d_%H')

    u_path = os.path.join(save_root, 'U', f'U_{time_str}.nc')
    v_path = os.path.join(save_root, 'V', f'V_{time_str}.nc')
    w_path = os.path.join(save_root, 'W', f'W_{time_str}.nc')

    if os.path.exists(u_path) and os.path.exists(v_path) and os.path.exists(w_path):
        continue

    ds = model.get_dataset(
        varnames=['U', 'V', 'W'],
        k_levels=list(range(20)), 
        iter_start=iter_index,
        iter_stop=iter_index + time_step
    )

    ds.to_netcdf(u_path)
    ds.to_netcdf(v_path)
    ds.to_netcdf(w_path)


Downloading hourly LLC4320: 0%| | 0/10312 [01:01<?, ?it/s]
Traceback (most recent call last):
File "F:\anaconda3\envs\vw_decoup\lib\site-packages\fsspec\implementations\http.py", line 437, in _info
await _file_info(
File "F:\anaconda3\envs\vw_decoup\lib\site-packages\fsspec\implementations\http.py", line 853, in _file_info
r.raise_for_status()
File "F:\anaconda3\envs\vw_decoup\lib\site-packages\aiohttp\client_reqrep.py", line 629, in raise_for_status
raise ClientResponseError(
aiohttp.client_exceptions.ClientResponseError: 502, message='Bad Gateway', url='https://data.nas.nasa.gov/ecco/download_data.php?file=/eccodata/llc_4320/grid/hFacW.data'
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "g:\datasets\VW_decoupling\utils\data_preprocess\1_llc_reader_get_mitgcm4320.py", line 64, in
ds.to_netcdf(u_path)
File "F:\anaconda3\envs\vw_decoup\lib\site-packages\xarray\core\dataset.py", line 2030, in to_netcdf
return to_netcdf( # type: ignore[return-value] # mypy cannot resolve the overloads:(
File "F:\anaconda3\envs\vw_decoup\lib\site-packages\xarray\backends\api.py", line 1937, in to_netcdf
writes = writer.sync(compute=compute)
File "F:\anaconda3\envs\vw_decoup\lib\site-packages\xarray\backends\common.py", line 357, in sync
delayed_store = chunkmanager.store(
File "F:\anaconda3\envs\vw_decoup\lib\site-packages\xarray\namedarray\daskmanager.py", line 247, in store
return store(
File "F:\anaconda3\envs\vw_decoup\lib\site-packages\dask\array\core.py", line 1227, in store
dask.compute(arrays, **kwargs)
File "F:\anaconda3\envs\vw_decoup\lib\site-packages\dask\base.py", line 681, in compute
results = schedule(expr, keys, **kwargs)
File "F:\anaconda3\envs\vw_decoup\lib\site-packages\xmitgcm\llcreader\llcmodel.py", line 459, in _get_facet_chunk
file = fs.open(path)
File "F:\anaconda3\envs\vw_decoup\lib\site-packages\fsspec\spec.py", line 1338, in open
f = self._open(
File "F:\anaconda3\envs\vw_decoup\lib\site-packages\fsspec\implementations\http.py", line 376, in _open
size = size or info.update(self.info(path, **kwargs)) or info["size"]
File "F:\anaconda3\envs\vw_decoup\lib\site-packages\fsspec\asyn.py", line 118, in wrapper
return sync(self.loop, func, *args, **kwargs)
File "F:\anaconda3\envs\vw_decoup\lib\site-packages\fsspec\asyn.py", line 103, in sync
raise return_result
File "F:\anaconda3\envs\vw_decoup\lib\site-packages\fsspec\asyn.py", line 56, in _runner
result[0] = await coro
File "F:\anaconda3\envs\vw_decoup\lib\site-packages\fsspec\implementations\http.py", line 450, in _info
raise FileNotFoundError(url) from exc
FileNotFoundError: https://data.nas.nasa.gov/ecco/download_data.php?file=/eccodata/llc_4320/grid/hFacW.data

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions