|
21 | 21 | import pandas as pd
|
22 | 22 | from pandas.api.types import CategoricalDtype
|
23 | 23 |
|
24 |
| -from pyspark.pandas.base import column_op, IndexOpsMixin |
| 24 | +from pyspark.pandas.base import column_op, IndexOpsMixin, numpy_column_op |
25 | 25 | from pyspark.pandas._typing import Dtype, IndexOpsLike, SeriesOrIndex
|
26 | 26 | from pyspark.pandas.data_type_ops.base import (
|
27 | 27 | DataTypeOps,
|
|
35 | 35 | _is_boolean_type,
|
36 | 36 | )
|
37 | 37 | from pyspark.pandas.typedef.typehints import as_spark_type, extension_dtypes, pandas_on_spark_type
|
| 38 | +from pyspark.pandas.utils import is_ansi_mode_enabled |
38 | 39 | from pyspark.sql import functions as F, Column as PySparkColumn
|
39 | 40 | from pyspark.sql.types import BooleanType, StringType
|
40 | 41 | from pyspark.errors import PySparkValueError
|
@@ -136,13 +137,21 @@ def mod(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
|
136 | 137 | raise TypeError(
|
137 | 138 | "Modulo can not be applied to %s and the given type." % self.pretty_name
|
138 | 139 | )
|
| 140 | + spark_session = left._internal.spark_frame.sparkSession |
| 141 | + |
| 142 | + def safe_mod(l: PySparkColumn, r: Any) -> PySparkColumn: |
| 143 | + if is_ansi_mode_enabled(spark_session): |
| 144 | + return F.when(F.lit(r == 0), F.lit(None)).otherwise(l % r) |
| 145 | + else: |
| 146 | + return l % r |
| 147 | + |
139 | 148 | if isinstance(right, numbers.Number):
|
140 | 149 | left = transform_boolean_operand_to_numeric(left, spark_type=as_spark_type(type(right)))
|
141 |
| - return left % right |
| 150 | + return numpy_column_op(safe_mod)(left, right) |
142 | 151 | else:
|
143 | 152 | assert isinstance(right, IndexOpsMixin)
|
144 | 153 | left = transform_boolean_operand_to_numeric(left, spark_type=right.spark.data_type)
|
145 |
| - return left % right |
| 154 | + return numpy_column_op(safe_mod)(left, right) |
146 | 155 |
|
147 | 156 | def pow(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
|
148 | 157 | _sanitize_list_like(right)
|
@@ -226,7 +235,17 @@ def rmod(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
|
226 | 235 | _sanitize_list_like(right)
|
227 | 236 | if isinstance(right, numbers.Number) and not isinstance(right, bool):
|
228 | 237 | left = transform_boolean_operand_to_numeric(left, spark_type=as_spark_type(type(right)))
|
229 |
| - return right % left |
| 238 | + spark_session = left._internal.spark_frame.sparkSession |
| 239 | + |
| 240 | + def safe_rmod(left_col, right): |
| 241 | + if is_ansi_mode_enabled(spark_session): |
| 242 | + return F.when(left_col != 0, F.pmod(F.lit(right), left_col)).otherwise( |
| 243 | + F.lit(None) |
| 244 | + ) |
| 245 | + else: |
| 246 | + return right % left |
| 247 | + |
| 248 | + return numpy_column_op(safe_rmod)(left, right) |
230 | 249 | else:
|
231 | 250 | raise TypeError(
|
232 | 251 | "Modulo can not be applied to %s and the given type." % self.pretty_name
|
|
0 commit comments