Skip to content

Unnecessarily Large Data Request #74

@sgdecker

Description

@sgdecker

I'm not sure if this is a bug report, feature request, or user error. I'm trying to access a giant dataset from the NCAR RDA in a smart way (only downloading what's necessary for the calculation), but a large data request is made anyway that exceeds the server's 500 MB limit.

Here's my code:

import numpy as np
import xarray as xr
from dask.diagnostics import ProgressBar
import intake


wrf_url = ('https://rda.ucar.edu/thredds/catalog/files/g/ds612.0/'
           'PGW3D/2006/catalog.xml')
catalog_u = intake.open_thredds_merged(wrf_url, path=['*_U_2006060*'])
catalog_v = intake.open_thredds_merged(wrf_url, path=['*_V_2006060*'])

ds_u = catalog_u.to_dask()
ds_u['U'] = ds_u.U.chunk("auto")
ds_v = catalog_v.to_dask()
ds_v['V'] = ds_v.V.chunk("auto")
ds = xr.merge((ds_u, ds_v))


def unstagger(ds, var, coord, new_coord):
    var1 = ds[var].isel({coord: slice(None, -1)})
    var2 = ds[var].isel({coord: slice(1, None)})
    return ((var1 + var2) / 2).rename({coord: new_coord})


with ProgressBar():
    ds['U_unstaggered'] = unstagger(ds, 'U', 'west_east_stag', 'west_east')
    ds['V_unstaggered'] = unstagger(ds, 'V', 'south_north_stag', 'south_north')
    ds['speed'] = np.hypot(ds.U_unstaggered, ds.V_unstaggered)
    ds.speed.isel(bottom_top=10).sel(Time='2006-06-07T18:00').plot()

This fails with

Traceback (most recent call last):
  File "/home/decker/classes/met325/rda_plot.py", line 29, in <module>
    ds.speed.isel(bottom_top=10).sel(Time='2006-06-07T18:00').plot()
  File "/home/decker/local/miniconda3/envs/met325/lib/python3.10/site-packages/xarray/plot/plot.py", line 862, in __call__
    return plot(self._da, **kwargs)
  File "/home/decker/local/miniconda3/envs/met325/lib/python3.10/site-packages/xarray/plot/plot.py", line 293, in plot
    darray = darray.squeeze().compute()
  File "/home/decker/local/miniconda3/envs/met325/lib/python3.10/site-packages/xarray/core/dataarray.py", line 951, in compute
    return new.load(**kwargs)
  File "/home/decker/local/miniconda3/envs/met325/lib/python3.10/site-packages/xarray/core/dataarray.py", line 925, in load
    ds = self._to_temp_dataset().load(**kwargs)
  File "/home/decker/local/miniconda3/envs/met325/lib/python3.10/site-packages/xarray/core/dataset.py", line 862, in load
    evaluated_data = da.compute(*lazy_data.values(), **kwargs)
  File "/home/decker/local/miniconda3/envs/met325/lib/python3.10/site-packages/dask/base.py", line 571, in compute
    results = schedule(dsk, keys, **kwargs)
  File "/home/decker/local/miniconda3/envs/met325/lib/python3.10/site-packages/dask/threaded.py", line 79, in get
    results = get_async(
  File "/home/decker/local/miniconda3/envs/met325/lib/python3.10/site-packages/dask/local.py", line 507, in get_async
    raise_exception(exc, tb)
  File "/home/decker/local/miniconda3/envs/met325/lib/python3.10/site-packages/dask/local.py", line 315, in reraise
    raise exc
  File "/home/decker/local/miniconda3/envs/met325/lib/python3.10/site-packages/dask/local.py", line 220, in execute_task
    result = _execute_task(task, data)
  File "/home/decker/local/miniconda3/envs/met325/lib/python3.10/site-packages/dask/core.py", line 119, in _execute_task
    return func(*(_execute_task(a, cache) for a in args))
  File "/home/decker/local/miniconda3/envs/met325/lib/python3.10/site-packages/dask/array/core.py", line 116, in getter
    c = np.asarray(c)
  File "/home/decker/local/miniconda3/envs/met325/lib/python3.10/site-packages/xarray/core/indexing.py", line 357, in __array__
    return np.asarray(self.array, dtype=dtype)
  File "/home/decker/local/miniconda3/envs/met325/lib/python3.10/site-packages/xarray/core/indexing.py", line 521, in __array__
    return np.asarray(self.array, dtype=dtype)
  File "/home/decker/local/miniconda3/envs/met325/lib/python3.10/site-packages/xarray/core/indexing.py", line 422, in __array__
    return np.asarray(array[self.key], dtype=None)
  File "/home/decker/local/miniconda3/envs/met325/lib/python3.10/site-packages/xarray/conventions.py", line 62, in __getitem__
    return np.asarray(self.array[key], dtype=self.dtype)
  File "/home/decker/local/miniconda3/envs/met325/lib/python3.10/site-packages/xarray/core/indexing.py", line 422, in __array__
    return np.asarray(array[self.key], dtype=None)
  File "/home/decker/local/miniconda3/envs/met325/lib/python3.10/site-packages/xarray/backends/pydap_.py", line 39, in __getitem__
    return indexing.explicit_indexing_adapter(
  File "/home/decker/local/miniconda3/envs/met325/lib/python3.10/site-packages/xarray/core/indexing.py", line 711, in explicit_indexing_adapter
    result = raw_indexing_method(raw_key.tuple)
  File "/home/decker/local/miniconda3/envs/met325/lib/python3.10/site-packages/xarray/backends/pydap_.py", line 47, in _getitem
    result = robust_getitem(array, key, catch=ValueError)
  File "/home/decker/local/miniconda3/envs/met325/lib/python3.10/site-packages/xarray/backends/common.py", line 64, in robust_getitem
    return array[key]
  File "/home/decker/local/miniconda3/envs/met325/lib/python3.10/site-packages/pydap/model.py", line 323, in __getitem__
    out.data = self._get_data_index(index)
  File "/home/decker/local/miniconda3/envs/met325/lib/python3.10/site-packages/pydap/model.py", line 353, in _get_data_index
    return self._data[index]
  File "/home/decker/local/miniconda3/envs/met325/lib/python3.10/site-packages/pydap/handlers/dap.py", line 170, in __getitem__
    raise_for_status(r)
  File "/home/decker/local/miniconda3/envs/met325/lib/python3.10/site-packages/pydap/net.py", line 38, in raise_for_status
    raise HTTPError(
webob.exc.HTTPError: 403 403

because the data request is too large.

Folks at NCAR tell me the request comes across as

rda.ucar.edu/thredds/dodsC/files/g/ds612.0/PGW3D/2006/wrf3d_d01_PGW_U_20060607.nc.dods?U%5B0:1: 7%5D%5B0:1:49%5D%5B0:1:1014%5D%5B0:1:1359%5D

essentially pulling an entire variable.

Is what I'm trying to do supposed to work?

I can use siphon directly w/o issue:

import numpy as np
import matplotlib.pyplot as plt
from siphon.catalog import TDSCatalog

catUrl = ('https://rda.ucar.edu/thredds/catalog/files/g/ds612.0/'
          'PGW3D/2006/catalog.xml')
catalog = TDSCatalog(catUrl)
U_file = 'wrf3d_d01_PGW_U_20060718.nc'
V_file = 'wrf3d_d01_PGW_V_20060718.nc'
ds = catalog.datasets[U_file]
dataset = ds.remote_access()
u = dataset.variables['U']
ds = catalog.datasets[V_file]
dataset = ds.remote_access()
v = dataset.variables['V']
speed = np.hypot(u[1, 10, 0:1014, 0:1359], v[1, 10, 0:1014, 0:1359])
plt.imshow(speed)
plt.show()

but in that case I don't have all the xarray niceties w/o extra work.

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions