rapidsai
diff --git a/‎cpp/include/cudf/binaryop.hpp‎
Lines changed: 2 additions & 1 deletion b/‎cpp/include/cudf/binaryop.hpp‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎docs/cudf/source/conf.py‎
Lines changed: 3 additions & 3 deletions b/‎docs/cudf/source/conf.py‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎docs/cudf/source/pylibcudf/api_docs/io/index.rst‎
Lines changed: 1 addition & 0 deletions b/‎docs/cudf/source/pylibcudf/api_docs/io/index.rst‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎docs/cudf/source/pylibcudf/api_docs/io/orc.rst‎
Lines changed: 6 additions & 0 deletions b/‎docs/cudf/source/pylibcudf/api_docs/io/orc.rst‎
Lines changed: 6 additions & 0 deletions
diff --git a/‎python/cudf/cudf/core/column/column.py‎
Lines changed: 4 additions & 2 deletions b/‎python/cudf/cudf/core/column/column.py‎
Lines changed: 4 additions & 2 deletions
diff --git a/‎python/cudf/cudf/core/column/string.py‎
Lines changed: 54 additions & 13 deletions b/‎python/cudf/cudf/core/column/string.py‎
Lines changed: 54 additions & 13 deletions
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2024, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2025, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -292,6 +292,7 @@ namespace binops {
 /**
  * @brief Returns true if the binary operator is supported for the given input types.
  *
+ * @ingroup transformation_binaryops
  * @param out The output data type
  * @param lhs The left-hand cudf::data_type
  * @param rhs The right-hand cudf::data_type
 
@@ -26,7 +26,7 @@
 import tempfile
 import warnings
 import xml.etree.ElementTree as ET
-from enum import IntEnum
+from enum import IntEnum, IntFlag
 from typing import Any
 
 import cudf
@@ -684,7 +684,7 @@ def can_document_member(
     ) -> bool:
         try:
             return issubclass(
-                member, IntEnum
+                member, (IntEnum, IntFlag)
             ) and member.__module__.startswith("pylibcudf")
         except TypeError:
             return False
@@ -703,7 +703,7 @@ def add_content(self, more_content) -> None:
 
         if self.object.__name__ != "Kind":
             self.add_line(
-                f"See also :cpp:enum:`cudf::{self.object.__name__}`.",
+                f"See also :cpp:enum:`{self.object.__name__}`.",
                 source_name,
             )
         self.add_line("", source_name)
 
@@ -18,6 +18,7 @@ I/O Functions
     avro
     csv
     json
+    orc
     parquet
     parquet_metadata
     text
 
@@ -0,0 +1,6 @@
+===
+ORC
+===
+
+.. automodule:: pylibcudf.io.orc
+   :members:
@@ -399,7 +399,7 @@ def set_mask(self, value) -> Self:
                 mask = as_buffer(dbuf)
 
         if mask is not None:
-            new_mask: plc.gpumemoryview | None = plc.gpumemoryview(mask)
+            new_mask = plc.gpumemoryview(mask)
             new_null_count = plc.null_mask.null_count(
                 new_mask,
                 0,
@@ -1346,6 +1346,7 @@ def fillna(
         input_col = self.nans_to_nulls()
 
         with acquire_spill_lock():
+            plc_replace: plc.replace.ReplacePolicy | plc.Scalar
             if method:
                 plc_replace = (
                     plc.replace.ReplacePolicy.PRECEDING
@@ -2045,7 +2046,7 @@ def _process_for_reduction(
                 return _get_nan_for_dtype(self.dtype)
         return col
 
-    def _reduction_result_dtype(self, reduction_op: str) -> Dtype:
+    def _reduction_result_dtype(self, reduction_op: str) -> DtypeObj:
         """
         Determine the correct dtype to pass to libcudf based on
         the input dtype, data dtype, and specific reduction op
@@ -2353,6 +2354,7 @@ def _cast_self_and_other_for_where(
                         f"Type-casting from {other_col.dtype} "
                         f"to {self.dtype}, there could be potential data loss"
                     )
+            other_out: plc.Scalar | ColumnBase
             if other_is_scalar:
                 other_out = pa_scalar_to_plc_scalar(
                     pa.scalar(other, type=cudf_dtype_to_pa_type(self.dtype))
 
@@ -3,7 +3,9 @@
 from __future__ import annotations
 
 import itertools
-from functools import cached_property
+import re
+from collections.abc import Callable
+from functools import cached_property, lru_cache
 from typing import TYPE_CHECKING, cast
 
 import numpy as np
@@ -52,6 +54,29 @@
     from cudf.core.dtypes import DecimalDtype
 
 
+# For now all supported re flags have matching names in libcudf. If that ever changes
+# this construction will need to be updated with more explicit mapping.
+_FLAG_MAP = {
+    getattr(re, flag): getattr(plc.strings.regex_flags.RegexFlags, flag)
+    for flag in ("MULTILINE", "DOTALL")
+}
+
+
+@lru_cache
+def plc_flags_from_re_flags(
+    flags: re.RegexFlag,
+) -> plc.strings.regex_flags.RegexFlags:
+    # Convert Python re flags to pylibcudf RegexFlags
+    plc_flags = plc.strings.regex_flags.RegexFlags(0)
+    for re_flag, plc_flag in _FLAG_MAP.items():
+        if flags & re_flag:
+            plc_flags |= plc_flag
+            flags &= ~re_flag
+    if flags:
+        raise ValueError(f"Unsupported re flags: {flags}")
+    return plc_flags
+
+
 class StringColumn(ColumnBase):
     """
     Implements operations for Columns of String type
@@ -323,7 +348,9 @@ def as_numerical_column(self, dtype: np.dtype) -> NumericalColumn:
             if not is_pandas_nullable_extension_dtype(dtype):
                 result = result.fillna(False)
             return result._with_type_metadata(dtype)  # type: ignore[return-value]
-        elif dtype.kind in {"i", "u"}:
+
+        cast_func: Callable[[plc.Column, plc.DataType], plc.Column]
+        if dtype.kind in {"i", "u"}:
             if not self.is_integer().all():
                 raise ValueError(
                     "Could not convert strings to integer "
@@ -362,7 +389,9 @@ def strptime(
             raise ValueError(
                 "Cannot convert `None` value to datetime or timedelta."
             )
-        elif dtype.kind == "M":  # type: ignore[union-attr]
+
+        casting_func: Callable[[plc.Column, plc.DataType, str], plc.Column]
+        if dtype.kind == "M":  # type: ignore[union-attr]
             if format.endswith("%z"):
                 raise NotImplementedError(
                     "cuDF does not yet support timezone-aware datetimes"
@@ -587,10 +616,10 @@ def _binaryop(self, other: ColumnBinaryOperand, op: str) -> ColumnBase:
             }:
                 if isinstance(other, pa.Scalar):
                     other = pa_scalar_to_plc_scalar(other)
-                lhs, rhs = (other, self) if reflect else (self, other)
+                lhs_op, rhs_op = (other, self) if reflect else (self, other)
                 return binaryop.binaryop(
-                    lhs=lhs,
-                    rhs=rhs,
+                    lhs=lhs_op,
+                    rhs=rhs_op,
                     op=op,
                     dtype=get_dtype_of_same_kind(
                         self.dtype, np.dtype(np.bool_)
@@ -1062,7 +1091,7 @@ def _split(
         self,
         delimiter: plc.Scalar,
         maxsplit: int,
-        method: Callable[[plc.Column, plc.Scalar, int], plc.Column],
+        method: Callable[[plc.Column, plc.Scalar, int], plc.Table],
     ) -> dict[int, Self]:
         plc_table = method(
             self.to_pylibcudf(mode="read"),
@@ -1086,7 +1115,7 @@ def rsplit(self, delimiter: plc.Scalar, maxsplit: int) -> dict[int, Self]:
     def _partition(
         self,
         delimiter: plc.Scalar,
-        method: Callable[[plc.Column, plc.Scalar], plc.Column],
+        method: Callable[[plc.Column, plc.Scalar], plc.Table],
     ) -> dict[int, Self]:
         plc_table = method(
             self.to_pylibcudf(mode="read"),
@@ -1180,7 +1209,10 @@ def concatenate(
     def extract(self, pattern: str, flags: int) -> dict[int, Self]:
         plc_table = plc.strings.extract.extract(
             self.to_pylibcudf(mode="read"),
-            plc.strings.regex_program.RegexProgram.create(pattern, flags),
+            plc.strings.regex_program.RegexProgram.create(
+                pattern,
+                plc_flags_from_re_flags(flags),
+            ),
         )
         return dict(
             enumerate(
@@ -1192,7 +1224,10 @@ def extract(self, pattern: str, flags: int) -> dict[int, Self]:
     def contains_re(self, pattern: str, flags: int) -> Self:
         plc_column = plc.strings.contains.contains_re(
             self.to_pylibcudf(mode="read"),
-            plc.strings.regex_program.RegexProgram.create(pattern, flags),
+            plc.strings.regex_program.RegexProgram.create(
+                pattern,
+                plc_flags_from_re_flags(flags),
+            ),
         )
         return type(self).from_pylibcudf(plc_column)  # type: ignore[return-value]
 
@@ -1400,7 +1435,9 @@ def wrap(self, width: int) -> Self:
     def count_re(self, pattern: str, flags: int) -> NumericalColumn:
         plc_result = plc.strings.contains.count_re(
             self.to_pylibcudf(mode="read"),
-            plc.strings.regex_program.RegexProgram.create(pattern, flags),
+            plc.strings.regex_program.RegexProgram.create(
+                pattern, plc_flags_from_re_flags(flags)
+            ),
         )
         return type(self).from_pylibcudf(plc_result)  # type: ignore[return-value]
 
@@ -1415,7 +1452,9 @@ def findall(
     ) -> Self:
         plc_result = method(
             self.to_pylibcudf(mode="read"),
-            plc.strings.regex_program.RegexProgram.create(pat, flags),
+            plc.strings.regex_program.RegexProgram.create(
+                pat, plc_flags_from_re_flags(flags)
+            ),
         )
         return type(self).from_pylibcudf(plc_result)  # type: ignore[return-value]
 
@@ -1464,7 +1503,9 @@ def find(
     def matches_re(self, pattern: str, flags: int) -> Self:
         plc_result = plc.strings.contains.matches_re(
             self.to_pylibcudf(mode="read"),
-            plc.strings.regex_program.RegexProgram.create(pattern, flags),
+            plc.strings.regex_program.RegexProgram.create(
+                pattern, plc_flags_from_re_flags(flags)
+            ),
         )
         return type(self).from_pylibcudf(plc_result)  # type: ignore[return-value]
Original file line number	Diff line number	Diff line change
`@@ -1,5 +1,5 @@`
`1`	`1`	`/*`
`2`		`- * Copyright (c) 2019-2024, NVIDIA CORPORATION.`
	`2`	`+ * Copyright (c) 2019-2025, NVIDIA CORPORATION.`
`3`	`3`	`*`
`4`	`4`	`* Licensed under the Apache License, Version 2.0 (the "License");`
`5`	`5`	`* you may not use this file except in compliance with the License.`
`@@ -292,6 +292,7 @@ namespace binops {`
`292`	`292`	`/**`
`293`	`293`	`* @brief Returns true if the binary operator is supported for the given input types.`
`294`	`294`	`*`
	`295`	`+ * @ingroup transformation_binaryops`
`295`	`296`	`* @param out The output data type`
`296`	`297`	`* @param lhs The left-hand cudf::data_type`
`297`	`298`	`* @param rhs The right-hand cudf::data_type`
-Original file line number
+Diff line change
     avro
     csv
     json
 +    orc
     parquet
     parquet_metadata
     text