-
Notifications
You must be signed in to change notification settings - Fork 7
Open
Description
I'm not sure if this is a bug report, feature request, or user error. I'm trying to access a giant dataset from the NCAR RDA in a smart way (only downloading what's necessary for the calculation), but a large data request is made anyway that exceeds the server's 500 MB limit.
Here's my code:
import numpy as np
import xarray as xr
from dask.diagnostics import ProgressBar
import intake
wrf_url = ('https://rda.ucar.edu/thredds/catalog/files/g/ds612.0/'
'PGW3D/2006/catalog.xml')
catalog_u = intake.open_thredds_merged(wrf_url, path=['*_U_2006060*'])
catalog_v = intake.open_thredds_merged(wrf_url, path=['*_V_2006060*'])
ds_u = catalog_u.to_dask()
ds_u['U'] = ds_u.U.chunk("auto")
ds_v = catalog_v.to_dask()
ds_v['V'] = ds_v.V.chunk("auto")
ds = xr.merge((ds_u, ds_v))
def unstagger(ds, var, coord, new_coord):
var1 = ds[var].isel({coord: slice(None, -1)})
var2 = ds[var].isel({coord: slice(1, None)})
return ((var1 + var2) / 2).rename({coord: new_coord})
with ProgressBar():
ds['U_unstaggered'] = unstagger(ds, 'U', 'west_east_stag', 'west_east')
ds['V_unstaggered'] = unstagger(ds, 'V', 'south_north_stag', 'south_north')
ds['speed'] = np.hypot(ds.U_unstaggered, ds.V_unstaggered)
ds.speed.isel(bottom_top=10).sel(Time='2006-06-07T18:00').plot()
This fails with
Traceback (most recent call last):
File "/home/decker/classes/met325/rda_plot.py", line 29, in <module>
ds.speed.isel(bottom_top=10).sel(Time='2006-06-07T18:00').plot()
File "/home/decker/local/miniconda3/envs/met325/lib/python3.10/site-packages/xarray/plot/plot.py", line 862, in __call__
return plot(self._da, **kwargs)
File "/home/decker/local/miniconda3/envs/met325/lib/python3.10/site-packages/xarray/plot/plot.py", line 293, in plot
darray = darray.squeeze().compute()
File "/home/decker/local/miniconda3/envs/met325/lib/python3.10/site-packages/xarray/core/dataarray.py", line 951, in compute
return new.load(**kwargs)
File "/home/decker/local/miniconda3/envs/met325/lib/python3.10/site-packages/xarray/core/dataarray.py", line 925, in load
ds = self._to_temp_dataset().load(**kwargs)
File "/home/decker/local/miniconda3/envs/met325/lib/python3.10/site-packages/xarray/core/dataset.py", line 862, in load
evaluated_data = da.compute(*lazy_data.values(), **kwargs)
File "/home/decker/local/miniconda3/envs/met325/lib/python3.10/site-packages/dask/base.py", line 571, in compute
results = schedule(dsk, keys, **kwargs)
File "/home/decker/local/miniconda3/envs/met325/lib/python3.10/site-packages/dask/threaded.py", line 79, in get
results = get_async(
File "/home/decker/local/miniconda3/envs/met325/lib/python3.10/site-packages/dask/local.py", line 507, in get_async
raise_exception(exc, tb)
File "/home/decker/local/miniconda3/envs/met325/lib/python3.10/site-packages/dask/local.py", line 315, in reraise
raise exc
File "/home/decker/local/miniconda3/envs/met325/lib/python3.10/site-packages/dask/local.py", line 220, in execute_task
result = _execute_task(task, data)
File "/home/decker/local/miniconda3/envs/met325/lib/python3.10/site-packages/dask/core.py", line 119, in _execute_task
return func(*(_execute_task(a, cache) for a in args))
File "/home/decker/local/miniconda3/envs/met325/lib/python3.10/site-packages/dask/array/core.py", line 116, in getter
c = np.asarray(c)
File "/home/decker/local/miniconda3/envs/met325/lib/python3.10/site-packages/xarray/core/indexing.py", line 357, in __array__
return np.asarray(self.array, dtype=dtype)
File "/home/decker/local/miniconda3/envs/met325/lib/python3.10/site-packages/xarray/core/indexing.py", line 521, in __array__
return np.asarray(self.array, dtype=dtype)
File "/home/decker/local/miniconda3/envs/met325/lib/python3.10/site-packages/xarray/core/indexing.py", line 422, in __array__
return np.asarray(array[self.key], dtype=None)
File "/home/decker/local/miniconda3/envs/met325/lib/python3.10/site-packages/xarray/conventions.py", line 62, in __getitem__
return np.asarray(self.array[key], dtype=self.dtype)
File "/home/decker/local/miniconda3/envs/met325/lib/python3.10/site-packages/xarray/core/indexing.py", line 422, in __array__
return np.asarray(array[self.key], dtype=None)
File "/home/decker/local/miniconda3/envs/met325/lib/python3.10/site-packages/xarray/backends/pydap_.py", line 39, in __getitem__
return indexing.explicit_indexing_adapter(
File "/home/decker/local/miniconda3/envs/met325/lib/python3.10/site-packages/xarray/core/indexing.py", line 711, in explicit_indexing_adapter
result = raw_indexing_method(raw_key.tuple)
File "/home/decker/local/miniconda3/envs/met325/lib/python3.10/site-packages/xarray/backends/pydap_.py", line 47, in _getitem
result = robust_getitem(array, key, catch=ValueError)
File "/home/decker/local/miniconda3/envs/met325/lib/python3.10/site-packages/xarray/backends/common.py", line 64, in robust_getitem
return array[key]
File "/home/decker/local/miniconda3/envs/met325/lib/python3.10/site-packages/pydap/model.py", line 323, in __getitem__
out.data = self._get_data_index(index)
File "/home/decker/local/miniconda3/envs/met325/lib/python3.10/site-packages/pydap/model.py", line 353, in _get_data_index
return self._data[index]
File "/home/decker/local/miniconda3/envs/met325/lib/python3.10/site-packages/pydap/handlers/dap.py", line 170, in __getitem__
raise_for_status(r)
File "/home/decker/local/miniconda3/envs/met325/lib/python3.10/site-packages/pydap/net.py", line 38, in raise_for_status
raise HTTPError(
webob.exc.HTTPError: 403 403
because the data request is too large.
Folks at NCAR tell me the request comes across as
rda.ucar.edu/thredds/dodsC/files/g/ds612.0/PGW3D/2006/wrf3d_d01_PGW_U_20060607.nc.dods?U%5B0:1: 7%5D%5B0:1:49%5D%5B0:1:1014%5D%5B0:1:1359%5D
essentially pulling an entire variable.
Is what I'm trying to do supposed to work?
I can use siphon directly w/o issue:
import numpy as np
import matplotlib.pyplot as plt
from siphon.catalog import TDSCatalog
catUrl = ('https://rda.ucar.edu/thredds/catalog/files/g/ds612.0/'
'PGW3D/2006/catalog.xml')
catalog = TDSCatalog(catUrl)
U_file = 'wrf3d_d01_PGW_U_20060718.nc'
V_file = 'wrf3d_d01_PGW_V_20060718.nc'
ds = catalog.datasets[U_file]
dataset = ds.remote_access()
u = dataset.variables['U']
ds = catalog.datasets[V_file]
dataset = ds.remote_access()
v = dataset.variables['V']
speed = np.hypot(u[1, 10, 0:1014, 0:1359], v[1, 10, 0:1014, 0:1359])
plt.imshow(speed)
plt.show()
but in that case I don't have all the xarray niceties w/o extra work.
Metadata
Metadata
Assignees
Labels
No labels