Skip to content

Commit 8f36f89

Browse files
committed
First draft of top level masking functions for bitmask and enumerated masks
1 parent a00c882 commit 8f36f89

File tree

3 files changed

+165
-0
lines changed

3 files changed

+165
-0
lines changed

odc/geo/_xr_interop.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@
4848
resolve_fill_value,
4949
resolve_nodata,
5050
)
51+
from .masking import bits_to_bool, enum_to_bool, scale_and_offset, scale_and_offset_dataset
5152
from .overlap import compute_output_geobox
5253
from .roi import roi_is_empty
5354
from .types import Nodata, Resolution, SomeNodata, SomeResolution, SomeShape, xy_
@@ -1053,11 +1054,18 @@ def nodata(self, value: Nodata):
10531054

10541055
colorize = _wrap_op(colorize)
10551056

1057+
scale_and_offset = _wrap_op(scale_and_offset)
1058+
1059+
bits_to_bool = _wrap_op(bits_to_bool)
1060+
1061+
enum_to_bool = _wrap_op(enum_to_bool)
1062+
10561063
if have.rasterio:
10571064
write_cog = _wrap_op(write_cog)
10581065
to_cog = _wrap_op(to_cog)
10591066
compress = _wrap_op(compress)
10601067
add_to = _wrap_op(add_to)
1068+
10611069

10621070

10631071
@xarray.register_dataset_accessor("odc")
@@ -1092,6 +1100,8 @@ def to_rgba(
10921100
vmax: Optional[float] = None,
10931101
) -> xarray.DataArray:
10941102
return to_rgba(self._xx, bands=bands, vmin=vmin, vmax=vmax)
1103+
1104+
scale_and_offset = _wrap_op(scale_and_offset_dataset)
10951105

10961106

10971107
ODCExtensionDs.to_rgba.__doc__ = to_rgba.__doc__
@@ -1314,3 +1324,4 @@ def rasterize(
13141324
invert=value_inside,
13151325
)
13161326
return wrap_xr(pix, geobox)
1327+

odc/geo/masking.py

Lines changed: 102 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,102 @@
1+
# This file is part of the Open Data Cube, see https://opendatacube.org for more information
2+
#
3+
# Copyright (c) 2015-2020 ODC Contributors
4+
# SPDX-License-Identifier: Apache-2.0
5+
"""
6+
Functions around supporting cloud masking.
7+
"""
8+
from xarray import DataArray, Dataset
9+
10+
def bits_to_bool(xx: DataArray, bits: list | None, bitflags: int | None, invert: bool = False) -> DataArray:
11+
"""
12+
Convert integer array into boolean array using bitmasks.
13+
14+
:param xx: DataArray with integer values
15+
:param bits: List of bit positions to convert to a bitflag mask (e.g. [0, 1, 2] -> 0b111)
16+
:param bitflags: Integer value with bits set that will be used to extract the boolean mask (e.g. 0b00011000)
17+
:param invert: Invert the mask
18+
:return: DataArray with boolean values
19+
"""
20+
assert not (bits is None and bitflags is None), "Either bits or bitflags must be provided"
21+
assert not (bits is not None and bitflags is not None), "Only one of bits or bitflags can be provided"
22+
23+
if bitflags is None:
24+
bitflags = 0
25+
for b in bits:
26+
bitflags |= 1 << b
27+
28+
mask = (xx & bitflags) != 0
29+
30+
if invert:
31+
mask = ~mask
32+
33+
return mask
34+
35+
36+
def enum_to_bool(xx: DataArray, values: list, invert: bool = False) -> DataArray:
37+
"""
38+
Convert array into boolean array using a list of invalid values.
39+
40+
:param xx: DataArray with integer values
41+
:param values: List of valid values to convert to a boolean mask
42+
:param invert: Invert the mask
43+
:return: DataArray with boolean values
44+
"""
45+
46+
mask = xx.isin(values)
47+
48+
if invert:
49+
mask = ~mask
50+
51+
return mask
52+
53+
54+
def scale_and_offset(xx: DataArray, scale: float | None, offset: float | None, ignore_missing: bool = False) -> DataArray:
55+
"""
56+
Apply scale and offset to the DataArray. Leave scale and offset blank to use
57+
the values from the DataArray's attrs.
58+
59+
:param xx: DataArray with integer values
60+
:param scale: Scale factor
61+
:param offset: Offset
62+
:return: DataArray with scaled and offset values
63+
"""
64+
65+
# Scales and offsets is used by GDAL. Should make sure we set these in loaders
66+
if scale is None:
67+
scale = xx.attrs.get("scales")
68+
69+
if offset is None:
70+
offset = xx.attrs.get("offsets")
71+
72+
# Catch the case where one is provided and not the other...
73+
if scale is None and offset is not None:
74+
scale = 1.0
75+
76+
if offset is None and scale is not None:
77+
offset = 0.0
78+
79+
if (scale is not None and offset is not None):
80+
xx = xx * scale + offset
81+
else:
82+
if not ignore_missing:
83+
raise ValueError("Scale and offset not provided and not found in attrs.scales and attrs.offset")
84+
85+
return xx
86+
87+
88+
def scale_and_offset_dataset(xx: Dataset, scale: float | None, offset: float | None) -> Dataset:
89+
"""
90+
Apply scale and offset to the Dataset. Leave scale and offset blank to use
91+
the values from each DataArray's attrs.
92+
93+
:param xx: Dataset with integer values
94+
:param scale: Scale factor
95+
:param offset: Offset
96+
:return: Dataset with scaled and offset values
97+
"""
98+
99+
for var in xx.data_vars:
100+
xx[var] = scale_and_offset(xx[var], scale, offset, ignore_missing=True)
101+
102+
return xx

tests/test_masking.py

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
from odc.geo.masking import bits_to_bool, enum_to_bool, scale_and_offset
2+
3+
from xarray import DataArray
4+
5+
# Top left is cloud, top right is cloud shadow
6+
# Bottom left is both cloud and cloud shadow, bottom right is neither
7+
xx_bits = DataArray(
8+
[[0b00010000, 0b00001000], [0b00011000, 0b00000000]], dims=("y", "x")
9+
)
10+
11+
# Set up a 2x2 8 bit integer DataArray with some
12+
# values set to 3 (shadow), 9 (high confidence cloud).
13+
xx_values = DataArray([[3, 9], [3, 0]], dims=("y", "x"))
14+
15+
16+
# Test bits_to_bool
17+
def test_bits_to_bool():
18+
# Test with bits
19+
mask = bits_to_bool(xx_bits, bits=[4, 3], bitflags=None)
20+
assert mask.equals(DataArray([[True, True], [True, False]], dims=("y", "x")))
21+
22+
# Test with bitflags
23+
mask = bits_to_bool(xx_bits, bits=None, bitflags=0b00011000)
24+
assert mask.equals(DataArray([[True, True], [True, False]], dims=("y", "x")))
25+
26+
# Test with invert
27+
mask = bits_to_bool(xx_bits, bits=[4, 3], bitflags=None, invert=True)
28+
assert mask.equals(DataArray([[False, False], [False, True]], dims=("y", "x")))
29+
30+
mask = bits_to_bool(xx_bits, bits=None, bitflags=0b00010000, invert=True)
31+
assert mask.equals(DataArray([[False, True], [False, True]], dims=("y", "x")))
32+
33+
34+
# Test enum_to_bool
35+
def test_enum_to_bool():
36+
mask = enum_to_bool(xx_values, values=[3, 9])
37+
assert mask.equals(DataArray([[True, True], [True, False]], dims=("y", "x")))
38+
39+
mask = enum_to_bool(xx_values, values=[3, 9], invert=True)
40+
assert mask.equals(DataArray([[False, False], [False, True]], dims=("y", "x")))
41+
42+
43+
# Test apply_scale_and_offset
44+
def test_scale_and_offset():
45+
mask = scale_and_offset(xx_values, scale=1.0, offset=0.0)
46+
assert mask.equals(DataArray([[3, 9], [3, 0]], dims=("y", "x")))
47+
48+
mask = scale_and_offset(xx_values, scale=None, offset=None, ignore_missing=True)
49+
assert mask.equals(DataArray([[3, 9], [3, 0]], dims=("y", "x")))
50+
51+
mask = scale_and_offset(xx_values, scale=2.0, offset=1.0)
52+
assert mask.equals(DataArray([[7, 19], [7, 1]], dims=("y", "x")))

0 commit comments

Comments
 (0)