Skip to content

Commit 1e042a1

Browse files
authored
Support binops between float scalar to decimal column (#20199)
To remove a workaround in PDSH Q1 for cudf.pandas ```bash ~/cudf$ python $PDSH_FILE $queries --scale=0.1 --iterations 1 --path "$dataset_path" Empty DataFrame Columns: [] Index: [] Query 0 - Iteration 0 finished in 0.0007s l_returnflag l_linestatus sum_qty sum_base_price sum_disc_price sum_charge count_order 0 A F 3774200.00 5320753880.69 -5.054096e+09 -5.256751e+09 147790 1 N F 95257.00 133737795.84 -1.271324e+08 -1.322863e+08 3765 2 N O 7459297.00 10512270008.90 -9.986238e+09 -1.038558e+10 292000 3 R F 3785523.00 5337950526.47 -5.071819e+09 -5.274406e+09 148301 Query 1 - Iteration 0 finished in 0.2097s Iteration Summary ======================================= query: 0 path: /home/nfs/mroeschke/cudf/sf0.1 scale_factor: 0.1 executor: in-memory iterations: 1 --------------------------------------- min time : 0.0007 max time : 0.0007 mean time: 0.0007 ======================================= query: 1 path: /home/nfs/mroeschke/cudf/sf0.1 scale_factor: 0.1 executor: in-memory iterations: 1 --------------------------------------- min time : 0.2097 max time : 0.2097 mean time: 0.2097 ======================================= Total mean time across all queries: 0.2103 seconds ``` Authors: - Matthew Roeschke (https://github.com/mroeschke) Approvers: - GALI PREM SAGAR (https://github.com/galipremsagar) URL: #20199
1 parent ce5ebcd commit 1e042a1

File tree

3 files changed

+15
-13
lines changed

3 files changed

+15
-13
lines changed

python/cudf/cudf/core/column/decimal.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717
from cudf.api.types import is_scalar
1818
from cudf.core._internals import binaryop
1919
from cudf.core.buffer import acquire_spill_lock
20-
from cudf.core.column.column import ColumnBase
20+
from cudf.core.column.column import ColumnBase, as_column
2121
from cudf.core.column.numerical_base import NumericalBaseColumn
2222
from cudf.core.dtypes import (
2323
Decimal32Dtype,
@@ -217,7 +217,9 @@ def _binaryop(self, other: ColumnBinaryOperand, op: str):
217217
if isinstance(other, ColumnBase):
218218
if not isinstance(other, NumericalBaseColumn):
219219
return NotImplemented
220-
elif other.dtype.kind in {"f", "b"}:
220+
elif other.dtype.kind == "f":
221+
return self.astype(other.dtype)._binaryop(other, op)
222+
elif other.dtype.kind == "b":
221223
raise TypeError(
222224
"Decimal columns only support binary operations with "
223225
"integer numerical columns."
@@ -234,6 +236,8 @@ def _binaryop(self, other: ColumnBinaryOperand, op: str):
234236
other_cudf_dtype = other.dtype
235237
elif isinstance(other, (int, Decimal)):
236238
other_cudf_dtype = self.dtype._from_decimal(Decimal(other))
239+
elif isinstance(other, float):
240+
return self._binaryop(as_column(other, length=len(self)), op)
237241
elif is_na_like(other):
238242
other = pa.scalar(None, type=cudf_dtype_to_pa_type(self.dtype))
239243
other_cudf_dtype = self.dtype

python/cudf/cudf/pandas/_benchmarks/pdsh.py

Lines changed: 0 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -43,17 +43,6 @@ def q1(run_config: RunConfig) -> pd.DataFrame:
4343
line_item_ds = get_data(
4444
run_config.dataset_path, "lineitem", run_config.suffix
4545
)
46-
# TODO: Remove float64 casting once cuDF supports subtraction between float and decimal
47-
line_item_ds["l_quantity"] = line_item_ds["l_quantity"].astype(
48-
"float64"
49-
)
50-
line_item_ds["l_extendedprice"] = line_item_ds[
51-
"l_extendedprice"
52-
].astype("float64")
53-
line_item_ds["l_discount"] = line_item_ds["l_discount"].astype(
54-
"float64"
55-
)
56-
line_item_ds["l_tax"] = line_item_ds["l_tax"].astype("float64")
5746

5847
var1 = date(1998, 9, 2)
5948

python/cudf/cudf/tests/series/test_binops.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3098,3 +3098,12 @@ def test_singleton_array(binary_op, xp):
30983098
expect = binary_op(lhs.to_pandas(), rhs_host)
30993099
got = binary_op(lhs, rhs_device)
31003100
assert_eq(expect, got)
3101+
3102+
3103+
def test_binops_float_scalar_decimal():
3104+
result = 1.0 - cudf.Series(
3105+
[decimal.Decimal("1"), decimal.Decimal("-2.5"), None],
3106+
dtype=cudf.Decimal32Dtype(3, 2),
3107+
)
3108+
expected = cudf.Series([0.0, -3.5, None], dtype="float64")
3109+
assert_eq(result, expected)

0 commit comments

Comments
 (0)