rapidsai
diff --git a/‎cpp/examples/parquet_io/io_source.hpp‎
Lines changed: 2 additions & 2 deletions b/‎cpp/examples/parquet_io/io_source.hpp‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎cpp/include/cudf/detail/utilities/host_vector.hpp‎
Lines changed: 1 addition & 1 deletion b/‎cpp/include/cudf/detail/utilities/host_vector.hpp‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎cpp/include/cudf_test/stream_checking_resource_adaptor.hpp‎
Lines changed: 1 addition & 1 deletion b/‎cpp/include/cudf_test/stream_checking_resource_adaptor.hpp‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎cpp/src/io/parquet/delta_binary.cuh‎
Lines changed: 3 additions & 1 deletion b/‎cpp/src/io/parquet/delta_binary.cuh‎
Lines changed: 3 additions & 1 deletion
diff --git a/‎cpp/src/utilities/host_memory.cpp‎
Lines changed: 16 additions & 10 deletions b/‎cpp/src/utilities/host_memory.cpp‎
Lines changed: 16 additions & 10 deletions
diff --git a/‎python/cudf/cudf/core/column/column.py‎
Lines changed: 9 additions & 7 deletions b/‎python/cudf/cudf/core/column/column.py‎
Lines changed: 9 additions & 7 deletions
diff --git a/‎python/cudf_polars/cudf_polars/containers/column.py‎
Lines changed: 8 additions & 7 deletions b/‎python/cudf_polars/cudf_polars/containers/column.py‎
Lines changed: 8 additions & 7 deletions
diff --git a/‎python/cudf_polars/cudf_polars/containers/dataframe.py‎
Lines changed: 4 additions & 2 deletions b/‎python/cudf_polars/cudf_polars/containers/dataframe.py‎
Lines changed: 4 additions & 2 deletions
diff --git a/‎python/cudf_polars/cudf_polars/dsl/expressions/rolling.py‎
Lines changed: 78 additions & 2 deletions b/‎python/cudf_polars/cudf_polars/dsl/expressions/rolling.py‎
Lines changed: 78 additions & 2 deletions
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2024, NVIDIA CORPORATION.
+ * Copyright (c) 2024-2025, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -69,7 +69,7 @@ struct pinned_allocator : public std::allocator<T> {
     return static_cast<T*>(ptr);
   }
 
-  void deallocate(T* ptr, std::size_t n)
+  void deallocate(T* ptr, std::size_t n) noexcept
   {
     mr.deallocate_async(ptr, n * sizeof(T), rmm::RMM_DEFAULT_HOST_ALIGNMENT, stream);
   }
 
@@ -154,7 +154,7 @@ class rmm_host_allocator {
    *        It is the responsibility of the caller to destroy
    *        the objects stored at \p p.
    */
-  inline void deallocate(pointer p, size_type cnt)
+  inline void deallocate(pointer p, size_type cnt) noexcept
   {
     mr.deallocate_async(p, cnt * sizeof(value_type), rmm::RMM_DEFAULT_HOST_ALIGNMENT, stream);
   }
 
@@ -94,7 +94,7 @@ class stream_checking_resource_adaptor final : public rmm::mr::device_memory_res
    * @param bytes Size of the allocation
    * @param stream Stream on which to perform the deallocation
    */
-  void do_deallocate(void* ptr, std::size_t bytes, rmm::cuda_stream_view stream) override
+  void do_deallocate(void* ptr, std::size_t bytes, rmm::cuda_stream_view stream) noexcept override
   {
     verify_stream(stream);
     upstream_.deallocate_async(ptr, bytes, rmm::CUDA_ALLOCATION_ALIGNMENT, stream);
 
@@ -47,7 +47,9 @@ constexpr int max_delta_mini_block_size = 64;
 // batch of size `values_per_mb`. The largest value for values_per_miniblock among the
 // major writers seems to be 64, so 2 * 64 should be good. We save the first value separately
 // since it is not encoded in the first mini-block.
-constexpr int delta_rolling_buf_size = 2 * max_delta_mini_block_size;
+// The extra 1 is for the first value, from the block header. It's not stored in the buffer, but it
+// still impacts buffer indexing and we need to account for it to avoid race conditions.
+constexpr int delta_rolling_buf_size = (2 * max_delta_mini_block_size) + 1;
 
 /**
  * @brief Read a ULEB128 varint integer
 
@@ -96,7 +96,7 @@ class fixed_pinned_pool_memory_resource {
   void deallocate_async(void* ptr,
                         std::size_t bytes,
                         std::size_t alignment,
-                        cuda::stream_ref stream)
+                        cuda::stream_ref stream) noexcept
   {
     if (bytes <= pool_size_ && ptr >= pool_begin_ && ptr < pool_end_) {
       pool_->deallocate_async(ptr, bytes, alignment, stream);
@@ -105,14 +105,14 @@ class fixed_pinned_pool_memory_resource {
     }
   }
 
-  void deallocate_async(void* ptr, std::size_t bytes, cuda::stream_ref stream)
+  void deallocate_async(void* ptr, std::size_t bytes, cuda::stream_ref stream) noexcept
   {
     return deallocate_async(ptr, bytes, rmm::RMM_DEFAULT_HOST_ALIGNMENT, stream);
   }
 
   void deallocate(void* ptr,
                   std::size_t bytes,
-                  std::size_t alignment = rmm::RMM_DEFAULT_HOST_ALIGNMENT)
+                  std::size_t alignment = rmm::RMM_DEFAULT_HOST_ALIGNMENT) noexcept
   {
     deallocate_async(ptr, bytes, alignment, stream_);
 #if CCCL_MAJOR_VERSION > 3 || (CCCL_MAJOR_VERSION == 3 && CCCL_MINOR_VERSION >= 1)
@@ -156,7 +156,7 @@ class fixed_pinned_pool_memory_resource {
     return this->allocate(bytes, alignment);
   }
 
-  void deallocate_sync(void* ptr, std::size_t bytes, std::size_t alignment)
+  void deallocate_sync(void* ptr, std::size_t bytes, std::size_t alignment) noexcept
   {
     return this->deallocate(ptr, bytes, alignment);
   }
@@ -166,7 +166,10 @@ class fixed_pinned_pool_memory_resource {
     return this->allocate_async(bytes, alignment, stream);
   }
 
-  void deallocate(rmm::cuda_stream_view stream, void* ptr, std::size_t bytes, std::size_t alignment)
+  void deallocate(rmm::cuda_stream_view stream,
+                  void* ptr,
+                  std::size_t bytes,
+                  std::size_t alignment) noexcept
   {
     return this->deallocate_async(ptr, bytes, alignment, stream);
   }
@@ -260,7 +263,7 @@ class new_delete_memory_resource {
 
   void deallocate(void* ptr,
                   std::size_t bytes,
-                  std::size_t alignment = rmm::RMM_DEFAULT_HOST_ALIGNMENT)
+                  std::size_t alignment = rmm::RMM_DEFAULT_HOST_ALIGNMENT) noexcept
   {
     rmm::detail::aligned_host_deallocate(
       ptr, bytes, alignment, [](void* ptr) { ::operator delete(ptr); });
@@ -269,12 +272,12 @@ class new_delete_memory_resource {
   void deallocate_async(void* ptr,
                         std::size_t bytes,
                         std::size_t alignment,
-                        [[maybe_unused]] cuda::stream_ref stream)
+                        [[maybe_unused]] cuda::stream_ref stream) noexcept
   {
     deallocate(ptr, bytes, alignment);
   }
 
-  void deallocate_async(void* ptr, std::size_t bytes, cuda::stream_ref stream)
+  void deallocate_async(void* ptr, std::size_t bytes, cuda::stream_ref stream) noexcept
   {
     deallocate(ptr, bytes, rmm::RMM_DEFAULT_HOST_ALIGNMENT);
   }
@@ -294,7 +297,7 @@ class new_delete_memory_resource {
     return this->allocate(bytes, alignment);
   }
 
-  void deallocate_sync(void* ptr, std::size_t bytes, std::size_t alignment)
+  void deallocate_sync(void* ptr, std::size_t bytes, std::size_t alignment) noexcept
   {
     return this->deallocate(ptr, bytes, alignment);
   }
@@ -304,7 +307,10 @@ class new_delete_memory_resource {
     return this->allocate_async(bytes, alignment, stream);
   }
 
-  void deallocate(rmm::cuda_stream_view stream, void* ptr, std::size_t bytes, std::size_t alignment)
+  void deallocate(rmm::cuda_stream_view stream,
+                  void* ptr,
+                  std::size_t bytes,
+                  std::size_t alignment) noexcept
   {
     return this->deallocate_async(ptr, bytes, alignment, stream);
   }
 
@@ -611,19 +611,21 @@ def from_pylibcudf(
             new_dtype = plc.DataType(plc.TypeId.INT8)
 
             col = plc.column_factories.make_numeric_column(
-                new_dtype, col.size(), plc.column_factories.MaskState.ALL_NULL
+                new_dtype, col.size(), plc.types.MaskState.ALL_NULL
             )
 
         dtype = dtype_from_pylibcudf_column(col)
 
+        data_view = col.data()
+        mask_view = col.null_mask()
         return build_column(  # type: ignore[return-value]
-            data=as_buffer(col.data().obj, exposed=data_ptr_exposed)
-            if col.data() is not None
+            data=as_buffer(data_view.obj, exposed=data_ptr_exposed)
+            if data_view is not None
             else None,
             dtype=dtype,
             size=col.size(),
-            mask=as_buffer(col.null_mask().obj, exposed=data_ptr_exposed)
-            if col.null_mask() is not None
+            mask=as_buffer(mask_view.obj, exposed=data_ptr_exposed)
+            if mask_view is not None
             else None,
             offset=col.offset(),
             null_count=col.null_count(),
@@ -981,7 +983,7 @@ def _fill(
         if not fill_value.is_valid() and not self.nullable:
             mask = as_buffer(
                 plc.null_mask.create_null_mask(
-                    self.size, plc.null_mask.MaskState.ALL_VALID
+                    self.size, plc.types.MaskState.ALL_VALID
                 )
             )
             self.set_base_mask(mask)
@@ -2436,7 +2438,7 @@ def column_empty(
             if row_count == 0
             else plc.gpumemoryview(
                 plc.null_mask.create_null_mask(
-                    row_count, plc.null_mask.MaskState.ALL_NULL
+                    row_count, plc.types.MaskState.ALL_NULL
                 )
             )
         )
 
@@ -85,7 +85,7 @@ def __init__(
 
     @classmethod
     def deserialize(
-        cls, header: ColumnHeader, frames: tuple[memoryview, plc.gpumemoryview]
+        cls, header: ColumnHeader, frames: tuple[memoryview[bytes], plc.gpumemoryview]
     ) -> Self:
         """
         Create a Column from a serialized representation returned by `.serialize()`.
@@ -126,7 +126,7 @@ def deserialize_ctor_kwargs(
 
     def serialize(
         self,
-    ) -> tuple[ColumnHeader, tuple[memoryview, plc.gpumemoryview]]:
+    ) -> tuple[ColumnHeader, tuple[memoryview[bytes], plc.gpumemoryview]]:
         """
         Serialize the Column into header and frames.
 
@@ -297,7 +297,7 @@ def astype(self, dtype: DataType) -> Column:
             self.obj.type()
         ) and plc.traits.is_timestamp(plc_dtype):
             upcasted = plc.unary.cast(self.obj, plc.DataType(plc.TypeId.INT64))
-            result = plc.column.Column(
+            plc_col = plc.column.Column(
                 plc_dtype,
                 upcasted.size(),
                 upcasted.data(),
@@ -306,11 +306,11 @@ def astype(self, dtype: DataType) -> Column:
                 upcasted.offset(),
                 upcasted.children(),
             )
-            return Column(result, dtype=dtype).sorted_like(self)
+            return Column(plc_col, dtype=dtype).sorted_like(self)
         elif plc.traits.is_integral_not_bool(plc_dtype) and plc.traits.is_timestamp(
             self.obj.type()
         ):
-            result = plc.column.Column(
+            plc_col = plc.column.Column(
                 plc.DataType(plc.TypeId.INT64),
                 self.obj.size(),
                 self.obj.data(),
@@ -319,7 +319,7 @@ def astype(self, dtype: DataType) -> Column:
                 self.obj.offset(),
                 self.obj.children(),
             )
-            return Column(plc.unary.cast(result, plc_dtype), dtype=dtype).sorted_like(
+            return Column(plc.unary.cast(plc_col, plc_dtype), dtype=dtype).sorted_like(
                 self
             )
         else:
@@ -454,11 +454,12 @@ def mask_nans(self) -> Self:
     def nan_count(self) -> int:
         """Return the number of NaN values in the column."""
         if self.size > 0 and plc.traits.is_floating_point(self.obj.type()):
+            # See https://github.com/rapidsai/cudf/issues/20202 for we type ignore
             return plc.reduce.reduce(
                 plc.unary.is_nan(self.obj),
                 plc.aggregation.sum(),
                 plc.types.SIZE_TYPE,
-            ).to_py()
+            ).to_py()  # type: ignore[return-value]
         return 0
 
     @property
 
@@ -191,7 +191,9 @@ def from_table(
 
     @classmethod
     def deserialize(
-        cls, header: DataFrameHeader, frames: tuple[memoryview, plc.gpumemoryview]
+        cls,
+        header: DataFrameHeader,
+        frames: tuple[memoryview[bytes], plc.gpumemoryview],
     ) -> Self:
         """
         Create a DataFrame from a serialized representation returned by `.serialize()`.
@@ -219,7 +221,7 @@ def deserialize(
 
     def serialize(
         self,
-    ) -> tuple[DataFrameHeader, tuple[memoryview, plc.gpumemoryview]]:
+    ) -> tuple[DataFrameHeader, tuple[memoryview[bytes], plc.gpumemoryview]]:
         """
         Serialize the table into header and frames.
 
 
@@ -48,6 +48,11 @@ class FillNullWithStrategyOp(UnaryOp):
     policy: plc.replace.ReplacePolicy = plc.replace.ReplacePolicy.PRECEDING
 
 
+@dataclass(frozen=True)
+class CumSumOp(UnaryOp):
+    pass
+
+
 def to_request(
     value: expr.Expr, orderby: Column, df: DataFrame
 ) -> plc.rolling.RollingRequest:
@@ -241,7 +246,8 @@ def __init__(
                 isinstance(named_expr.value, (expr.Len, expr.Agg))
                 or (
                     isinstance(named_expr.value, expr.UnaryFunction)
-                    and named_expr.value.name in {"rank", "fill_null_with_strategy"}
+                    and named_expr.value.name
+                    in {"rank", "fill_null_with_strategy", "cum_sum"}
                 )
             )
         ]
@@ -265,7 +271,7 @@ def __init__(
             if isinstance(v, expr.Agg)
             or (
                 isinstance(v, expr.UnaryFunction)
-                and v.name in {"rank", "fill_null_with_strategy"}
+                and v.name in {"rank", "fill_null_with_strategy", "cum_sum"}
             )
         ]
         self.by_count = len(by_expr)
@@ -393,6 +399,41 @@ def _(
         dtypes = [ne.value.dtype for ne in named_exprs]
         return names, dtypes, tables
 
+    @_apply_unary_op.register
+    def _(
+        self,
+        op: CumSumOp,
+        df: DataFrame,
+        _: plc.groupby.GroupBy,
+    ) -> tuple[list[str], list[DataType], list[plc.Table]]:
+        cum_named = op.named_exprs
+        order_index = op.order_index
+
+        requests: list[plc.groupby.GroupByRequest] = []
+        out_names: list[str] = []
+        out_dtypes: list[DataType] = []
+
+        val_cols = self._gather_columns(
+            [
+                ne.value.children[0].evaluate(df, context=ExecutionContext.FRAME).obj
+                for ne in cum_named
+            ],
+            order_index,
+            cudf_polars_column=False,
+        )
+        agg = plc.aggregation.sum()
+
+        for ne, val_col in zip(cum_named, val_cols, strict=True):
+            requests.append(plc.groupby.GroupByRequest(val_col, [agg]))
+            out_names.append(ne.name)
+            out_dtypes.append(ne.value.dtype)
+
+        lg = op.local_grouper
+        assert isinstance(lg, plc.groupby.GroupBy)
+        _, tables = lg.scan(requests)
+
+        return out_names, out_dtypes, tables
+
     def _reorder_to_input(
         self,
         row_id: plc.Column,
@@ -444,6 +485,7 @@ def _split_named_expr(
         unary_window_ops: dict[str, list[expr.NamedExpr]] = {
             "rank": [],
             "fill_null_with_strategy": [],
+            "cum_sum": [],
         }
 
         for ne in self.named_aggs:
@@ -733,6 +775,40 @@ def do_evaluate(  # noqa: D102
                     )
                 )
 
+        if cum_named := unary_window_ops["cum_sum"]:
+            order_index = self._build_window_order_index(
+                by_cols,
+                row_id=row_id,
+                order_by_col=order_by_col if self._order_by_expr is not None else None,
+                ob_desc=self.options[2] if self._order_by_expr is not None else False,
+                ob_nulls_last=self.options[3]
+                if self._order_by_expr is not None
+                else False,
+            )
+            by_cols_for_scan = self._gather_columns(by_cols, order_index)
+            local = self._sorted_grouper(by_cols_for_scan)
+            names, dtypes, tables = self._apply_unary_op(
+                CumSumOp(
+                    named_exprs=cum_named,
+                    order_index=order_index,
+                    by_cols_for_scan=by_cols_for_scan,
+                    local_grouper=local,
+                ),
+                df,
+                grouper,
+            )
+            broadcasted_cols.extend(
+                self._reorder_to_input(
+                    row_id,
+                    by_cols,
+                    df.num_rows,
+                    tables,
+                    names,
+                    dtypes,
+                    order_index=order_index,
+                )
+            )
+
         # Create a temporary DataFrame with the broadcasted columns named by their
         # placeholder names from agg decomposition, then evaluate the post-expression.
         df = DataFrame(broadcasted_cols)
Original file line number	Diff line number	Diff line change
`@@ -1,5 +1,5 @@`
`1`	`1`	`/*`
`2`		`- * Copyright (c) 2024, NVIDIA CORPORATION.`
	`2`	`+ * Copyright (c) 2024-2025, NVIDIA CORPORATION.`
`3`	`3`	`*`
`4`	`4`	`* Licensed under the Apache License, Version 2.0 (the "License");`
`5`	`5`	`* you may not use this file except in compliance with the License.`
`@@ -69,7 +69,7 @@ struct pinned_allocator : public std::allocator<T> {`
`69`	`69`	`return static_cast<T*>(ptr);`
`70`	`70`	`}`
`71`	`71`
`72`		`- void deallocate(T* ptr, std::size_t n)`
	`72`	`+ void deallocate(T* ptr, std::size_t n) noexcept`
`73`	`73`	`{`
`74`	`74`	`mr.deallocate_async(ptr, n * sizeof(T), rmm::RMM_DEFAULT_HOST_ALIGNMENT, stream);`
`75`	`75`	`}`
Original file line number	Diff line number	Diff line change
`@@ -154,7 +154,7 @@ class rmm_host_allocator {`
`154`	`154`	`* It is the responsibility of the caller to destroy`
`155`	`155`	`* the objects stored at \p p.`
`156`	`156`	`*/`
`157`		`- inline void deallocate(pointer p, size_type cnt)`
	`157`	`+ inline void deallocate(pointer p, size_type cnt) noexcept`
`158`	`158`	`{`
`159`	`159`	`mr.deallocate_async(p, cnt * sizeof(value_type), rmm::RMM_DEFAULT_HOST_ALIGNMENT, stream);`
`160`	`160`	`}`
Original file line number	Diff line number	Diff line change
`@@ -94,7 +94,7 @@ class stream_checking_resource_adaptor final : public rmm::mr::device_memory_res`
`94`	`94`	`* @param bytes Size of the allocation`
`95`	`95`	`* @param stream Stream on which to perform the deallocation`
`96`	`96`	`*/`
`97`		`- void do_deallocate(void* ptr, std::size_t bytes, rmm::cuda_stream_view stream) override`
	`97`	`+ void do_deallocate(void* ptr, std::size_t bytes, rmm::cuda_stream_view stream) noexcept override`
`98`	`98`	`{`
`99`	`99`	`verify_stream(stream);`
`100`	`100`	`upstream_.deallocate_async(ptr, bytes, rmm::CUDA_ALLOCATION_ALIGNMENT, stream);`
Original file line number	Diff line number	Diff line change
`@@ -96,7 +96,7 @@ class fixed_pinned_pool_memory_resource {`
`96`	`96`	`void deallocate_async(void* ptr,`
`97`	`97`	`std::size_t bytes,`
`98`	`98`	`std::size_t alignment,`
`99`		`- cuda::stream_ref stream)`
	`99`	`+ cuda::stream_ref stream) noexcept`
`100`	`100`	`{`
`101`	`101`	`if (bytes <= pool_size_ && ptr >= pool_begin_ && ptr < pool_end_) {`
`102`	`102`	`pool_->deallocate_async(ptr, bytes, alignment, stream);`
`@@ -105,14 +105,14 @@ class fixed_pinned_pool_memory_resource {`
`105`	`105`	`}`
`106`	`106`	`}`
`107`	`107`
`108`		`- void deallocate_async(void* ptr, std::size_t bytes, cuda::stream_ref stream)`
	`108`	`+ void deallocate_async(void* ptr, std::size_t bytes, cuda::stream_ref stream) noexcept`
`109`	`109`	`{`
`110`	`110`	`return deallocate_async(ptr, bytes, rmm::RMM_DEFAULT_HOST_ALIGNMENT, stream);`
`111`	`111`	`}`
`112`	`112`
`113`	`113`	`void deallocate(void* ptr,`
`114`	`114`	`std::size_t bytes,`
`115`		`- std::size_t alignment = rmm::RMM_DEFAULT_HOST_ALIGNMENT)`
	`115`	`+ std::size_t alignment = rmm::RMM_DEFAULT_HOST_ALIGNMENT) noexcept`
`116`	`116`	`{`
`117`	`117`	`deallocate_async(ptr, bytes, alignment, stream_);`
`118`	`118`	`#if CCCL_MAJOR_VERSION > 3 \|\| (CCCL_MAJOR_VERSION == 3 && CCCL_MINOR_VERSION >= 1)`
`@@ -156,7 +156,7 @@ class fixed_pinned_pool_memory_resource {`
`156`	`156`	`return this->allocate(bytes, alignment);`
`157`	`157`	`}`
`158`	`158`
`159`		`- void deallocate_sync(void* ptr, std::size_t bytes, std::size_t alignment)`
	`159`	`+ void deallocate_sync(void* ptr, std::size_t bytes, std::size_t alignment) noexcept`
`160`	`160`	`{`
`161`	`161`	`return this->deallocate(ptr, bytes, alignment);`
`162`	`162`	`}`
`@@ -166,7 +166,10 @@ class fixed_pinned_pool_memory_resource {`
`166`	`166`	`return this->allocate_async(bytes, alignment, stream);`
`167`	`167`	`}`
`168`	`168`
`169`		`- void deallocate(rmm::cuda_stream_view stream, void* ptr, std::size_t bytes, std::size_t alignment)`
	`169`	`+ void deallocate(rmm::cuda_stream_view stream,`
	`170`	`+ void* ptr,`
	`171`	`+ std::size_t bytes,`
	`172`	`+ std::size_t alignment) noexcept`
`170`	`173`	`{`
`171`	`174`	`return this->deallocate_async(ptr, bytes, alignment, stream);`
`172`	`175`	`}`
`@@ -260,7 +263,7 @@ class new_delete_memory_resource {`
`260`	`263`
`261`	`264`	`void deallocate(void* ptr,`
`262`	`265`	`std::size_t bytes,`
`263`		`- std::size_t alignment = rmm::RMM_DEFAULT_HOST_ALIGNMENT)`
	`266`	`+ std::size_t alignment = rmm::RMM_DEFAULT_HOST_ALIGNMENT) noexcept`
`264`	`267`	`{`
`265`	`268`	`rmm::detail::aligned_host_deallocate(`
`266`	`269`	`ptr, bytes, alignment, [](void* ptr) { ::operator delete(ptr); });`
`@@ -269,12 +272,12 @@ class new_delete_memory_resource {`
`269`	`272`	`void deallocate_async(void* ptr,`
`270`	`273`	`std::size_t bytes,`
`271`	`274`	`std::size_t alignment,`
`272`		`- [[maybe_unused]] cuda::stream_ref stream)`
	`275`	`+ [[maybe_unused]] cuda::stream_ref stream) noexcept`
`273`	`276`	`{`
`274`	`277`	`deallocate(ptr, bytes, alignment);`
`275`	`278`	`}`
`276`	`279`
`277`		`- void deallocate_async(void* ptr, std::size_t bytes, cuda::stream_ref stream)`
	`280`	`+ void deallocate_async(void* ptr, std::size_t bytes, cuda::stream_ref stream) noexcept`
`278`	`281`	`{`
`279`	`282`	`deallocate(ptr, bytes, rmm::RMM_DEFAULT_HOST_ALIGNMENT);`
`280`	`283`	`}`
`@@ -294,7 +297,7 @@ class new_delete_memory_resource {`
`294`	`297`	`return this->allocate(bytes, alignment);`
`295`	`298`	`}`
`296`	`299`
`297`		`- void deallocate_sync(void* ptr, std::size_t bytes, std::size_t alignment)`
	`300`	`+ void deallocate_sync(void* ptr, std::size_t bytes, std::size_t alignment) noexcept`
`298`	`301`	`{`
`299`	`302`	`return this->deallocate(ptr, bytes, alignment);`
`300`	`303`	`}`
`@@ -304,7 +307,10 @@ class new_delete_memory_resource {`
`304`	`307`	`return this->allocate_async(bytes, alignment, stream);`
`305`	`308`	`}`
`306`	`309`
`307`		`- void deallocate(rmm::cuda_stream_view stream, void* ptr, std::size_t bytes, std::size_t alignment)`
	`310`	`+ void deallocate(rmm::cuda_stream_view stream,`
	`311`	`+ void* ptr,`
	`312`	`+ std::size_t bytes,`
	`313`	`+ std::size_t alignment) noexcept`
`308`	`314`	`{`
`309`	`315`	`return this->deallocate_async(ptr, bytes, alignment, stream);`
`310`	`316`	`}`