rapidsai
diff --git a/‎.github/workflows/pr.yaml
Lines changed: 4 additions & 4 deletions b/‎.github/workflows/pr.yaml
Lines changed: 4 additions & 4 deletions
diff --git a/‎CHANGELOG.md
Lines changed: 47 additions & 0 deletions b/‎CHANGELOG.md
Lines changed: 47 additions & 0 deletions
diff --git a/‎README.md
Lines changed: 1 addition & 1 deletion b/‎README.md
Lines changed: 1 addition & 1 deletion
diff --git a/‎cmake-format-rapids-cmake.json
Lines changed: 10 additions & 0 deletions b/‎cmake-format-rapids-cmake.json
Lines changed: 10 additions & 0 deletions
diff --git a/‎dependencies.yaml
Lines changed: 18 additions & 13 deletions b/‎dependencies.yaml
Lines changed: 18 additions & 13 deletions
diff --git a/‎docs/api.rst
Lines changed: 1 addition & 0 deletions b/‎docs/api.rst
Lines changed: 1 addition & 0 deletions
diff --git a/‎docs/command/rapids_cuda_enable_fatbin_compression.rst
Lines changed: 1 addition & 0 deletions b/‎docs/command/rapids_cuda_enable_fatbin_compression.rst
Lines changed: 1 addition & 0 deletions
diff --git a/‎rapids-cmake/cpm/nvcomp.cmake
Lines changed: 0 additions & 9 deletions b/‎rapids-cmake/cpm/nvcomp.cmake
Lines changed: 0 additions & 9 deletions
diff --git a/‎rapids-cmake/cpm/versions.json
Lines changed: 2 additions & 2 deletions b/‎rapids-cmake/cpm/versions.json
Lines changed: 2 additions & 2 deletions
diff --git a/‎rapids-cmake/cuda/detail/detect_architectures.cmake
Lines changed: 19 additions & 12 deletions b/‎rapids-cmake/cuda/detail/detect_architectures.cmake
Lines changed: 19 additions & 12 deletions
@@ -17,7 +17,7 @@ jobs:
       - docs-build
       - telemetry-setup
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/pr-builder.yaml@branch-25.10
+    uses: rapidsai/shared-workflows/.github/workflows/pr-builder.yaml@cuda13.0
   telemetry-setup:
     runs-on: ubuntu-latest
     continue-on-error: true
@@ -32,18 +32,18 @@ jobs:
   checks:
     secrets: inherit
     needs: telemetry-setup
-    uses: rapidsai/shared-workflows/.github/workflows/checks.yaml@branch-25.10
+    uses: rapidsai/shared-workflows/.github/workflows/checks.yaml@cuda13.0
     with:
       ignored_pr_jobs: telemetry-summarize
   conda-cpp-tests:
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-tests.yaml@branch-25.10
+    uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-tests.yaml@cuda13.0
     with:
       build_type: pull-request
       script: ci/test_cpp.sh
   docs-build:
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-25.10
+    uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@cuda13.0
     with:
       build_type: pull-request
       node_type: "cpu4"
 
@@ -1,3 +1,50 @@
+# rapids-cmake 25.08.00 (6 Aug 2025)
+
+## 🚨 Breaking Changes
+
+- rapids_cpm_cccl: Remove support for CCCL &lt; 2.8 ([#859](https://github.com/rapidsai/rapids-cmake/pull/859)) [@robertmaynard](https://github.com/robertmaynard)
+- Remove CUDA 11 support ([#855](https://github.com/rapidsai/rapids-cmake/pull/855)) [@KyleFromNVIDIA](https://github.com/KyleFromNVIDIA)
+- Update to CCCL 3.0 ([#854](https://github.com/rapidsai/rapids-cmake/pull/854)) [@vyasr](https://github.com/vyasr)
+- Require cpp subdirectory for RMM ([#832](https://github.com/rapidsai/rapids-cmake/pull/832)) [@bdice](https://github.com/bdice)
+
+## 🐛 Bug Fixes
+
+- CCCL: disable PDL ([#876](https://github.com/rapidsai/rapids-cmake/pull/876)) [@bdice](https://github.com/bdice)
+- Use RMM main (new branching strategy) to fix downstream fetching issues ([#862](https://github.com/rapidsai/rapids-cmake/pull/862)) [@bdice](https://github.com/bdice)
+- rapids_cpm_cccl: Update to new location of cccl-config ([#858](https://github.com/rapidsai/rapids-cmake/pull/858)) [@robertmaynard](https://github.com/robertmaynard)
+- Remove CCCL patches that aren&#39;t used anymore ([#857](https://github.com/rapidsai/rapids-cmake/pull/857)) [@robertmaynard](https://github.com/robertmaynard)
+- Fetch the atomic fix in CCCL 3.0 ([#856](https://github.com/rapidsai/rapids-cmake/pull/856)) [@PointKernel](https://github.com/PointKernel)
+
+## 📖 Documentation
+
+- add docs on CI workflow inputs ([#868](https://github.com/rapidsai/rapids-cmake/pull/868)) [@jameslamb](https://github.com/jameslamb)
+
+## 🚀 New Features
+
+- Update CCCL version tag for PDL disable ([#879](https://github.com/rapidsai/rapids-cmake/pull/879)) [@davidwendt](https://github.com/davidwendt)
+- Use `RAPIDS_BRANCH` file to handle the new branching strategy ([#870](https://github.com/rapidsai/rapids-cmake/pull/870)) [@robertmaynard](https://github.com/robertmaynard)
+- rapids-cmake: Add support for a version suffix to mean using main ([#864](https://github.com/rapidsai/rapids-cmake/pull/864)) [@robertmaynard](https://github.com/robertmaynard)
+- rapids_cpm_cccl: Remove support for CCCL &lt; 2.8 ([#859](https://github.com/rapidsai/rapids-cmake/pull/859)) [@robertmaynard](https://github.com/robertmaynard)
+- Update to CCCL 3.0 ([#831](https://github.com/rapidsai/rapids-cmake/pull/831)) [@bdice](https://github.com/bdice)
+
+## 🛠️ Improvements
+
+- Update to CCCL v3.0.2 ([#878](https://github.com/rapidsai/rapids-cmake/pull/878)) [@bdice](https://github.com/bdice)
+- fix(docker): use versioned `-latest` tag for all `rapidsai` images ([#871](https://github.com/rapidsai/rapids-cmake/pull/871)) [@gforsyth](https://github.com/gforsyth)
+- Revert &quot;Use RMM main (new branching strategy)&quot; ([#869](https://github.com/rapidsai/rapids-cmake/pull/869)) [@robertmaynard](https://github.com/robertmaynard)
+- Rename `*.hpp.in` to `*.h.in` to signify that they are C headers ([#867](https://github.com/rapidsai/rapids-cmake/pull/867)) [@KyleFromNVIDIA](https://github.com/KyleFromNVIDIA)
+- refactor(shellcheck): enable for all files ([#866](https://github.com/rapidsai/rapids-cmake/pull/866)) [@gforsyth](https://github.com/gforsyth)
+- Remove nvidia and dask channels ([#865](https://github.com/rapidsai/rapids-cmake/pull/865)) [@vyasr](https://github.com/vyasr)
+- Upgrade cuCollections to fetch the new storage for better runtime performance ([#861](https://github.com/rapidsai/rapids-cmake/pull/861)) [@PointKernel](https://github.com/PointKernel)
+- Remove CUDA 11 support ([#855](https://github.com/rapidsai/rapids-cmake/pull/855)) [@KyleFromNVIDIA](https://github.com/KyleFromNVIDIA)
+- Update to CCCL 3.0 ([#854](https://github.com/rapidsai/rapids-cmake/pull/854)) [@vyasr](https://github.com/vyasr)
+- Deprecate fmt and spdlog ([#853](https://github.com/rapidsai/rapids-cmake/pull/853)) [@vyasr](https://github.com/vyasr)
+- Forward-merge branch-25.06 into branch-25.08 ([#848](https://github.com/rapidsai/rapids-cmake/pull/848)) [@gforsyth](https://github.com/gforsyth)
+- Update to NVTX 3.2.0. ([#844](https://github.com/rapidsai/rapids-cmake/pull/844)) [@bdice](https://github.com/bdice)
+- Forward-merge branch-25.06 into branch-25.08 ([#839](https://github.com/rapidsai/rapids-cmake/pull/839)) [@gforsyth](https://github.com/gforsyth)
+- Temporarily use patched CCCL ([#833](https://github.com/rapidsai/rapids-cmake/pull/833)) [@bdice](https://github.com/bdice)
+- Require cpp subdirectory for RMM ([#832](https://github.com/rapidsai/rapids-cmake/pull/832)) [@bdice](https://github.com/bdice)
+
 # rapids-cmake 25.06.00 (5 Jun 2025)
 
 ## 🚨 Breaking Changes
 
@@ -78,7 +78,7 @@ The most commonly used function are:
 
 - `rapids_cuda_init_architectures(<project_name>)` handles initialization of `CMAKE_CUDA_ARCHITECTURE`. MUST BE CALLED BEFORE `PROJECT()`
 - `rapids_cuda_init_runtime(<mode>)` handles initialization of `CMAKE_CUDA_RUNTIME_LIBRARY`.
-- `rapids_cuda_patch_toolkit()` corrects bugs in the CUDAToolkit module that are being upstreamed.
+- `rapids_cuda_enable_fatbin_compression()` handles the optimal compile flags for fatbin compression to make smaller binaries
 
 ### cython
 
 
@@ -231,6 +231,16 @@
           "INSTALL_EXPORT_SET": 1
         }
       },
+      "rapids_cuda_enable_fatbin_compression": {
+        "pargs": {
+          "nargs": 0
+        },
+        "kwargs": {
+          "VARIABLE": 1,
+          "TARGET": 1,
+          "TUNE_FOR": 1
+        }
+      },
       "rapids_cuda_init_architectures": {
         "pargs": {
           "nargs": 1
 
@@ -34,6 +34,7 @@ dependencies:
         packages:
           - c-compiler
           - cxx-compiler
+          - cuda-nvcc
           - make
     specific:
       - output_types: conda
@@ -43,9 +44,17 @@ dependencies:
             packages:
               - gcc<13.0.0
           - matrix:
-              cuda: "12.*"
+              cuda: "12.[456]"
             packages:
               - gcc<14.0.0
+          - matrix:
+              cuda: "12.[89]"
+            packages:
+              - gcc<15.0.0
+          - matrix:
+              cuda: "13.*"
+            packages:
+              - gcc<16.0.0
       - output_types: conda
         matrices:
           - matrix:
@@ -56,12 +65,7 @@ dependencies:
               arch: aarch64
             packages:
               - sysroot_linux-aarch64==2.28
-      - output_types: conda
-        matrices:
-          - matrix:
-              cuda: "12.*"
-            packages:
-              - cuda-nvcc
+
   cuda_version:
     specific:
       - output_types: conda
@@ -86,14 +90,15 @@ dependencies:
               cuda: "12.9"
             packages:
               - cuda-version=12.9
-  cuda:
-    specific:
-      - output_types: conda
-        matrices:
           - matrix:
-              cuda: "12.*"
+              cuda: "13.0"
             packages:
-              - cuda-cupti-dev
+              - cuda-version=13.0
+  cuda:
+    common:
+      - output_types: conda
+        packages:
+          - cuda-cupti-dev
   docs:
     common:
       - output_types: [conda]
 
@@ -99,6 +99,7 @@ require.
     rapids_cuda_init_runtime </command/rapids_cuda_init_runtime>
     rapids_cuda_set_runtime </command/rapids_cuda_set_runtime>
     rapids_cuda_set_architectures [Advanced] </command/rapids_cuda_set_architectures>
+    rapids_cuda_enable_fatbin_compression </command/rapids_cuda_enable_fatbin_compression>
 
 
 .. _`export`:
 
@@ -0,0 +1 @@
+.. cmake-module:: ../../rapids-cmake/cuda/enable_fatbin_compression.cmake
@@ -166,15 +166,6 @@ function(rapids_cpm_nvcomp)
     endif()
   endif()
 
-  include("${rapids-cmake-dir}/cpm/detail/generate_patch_command.cmake")
-  rapids_cpm_generate_patch_command(nvcomp ${version} patch_command build_patch_only)
-
-  # Apply any patch commands to the proprietary binary
-
-  if(nvcomp_proprietary_binary AND PATCH_COMMAND IN_LIST find_args)
-    execute_process(COMMAND ${patch_command} WORKING_DIRECTORY ${nvcomp_ROOT})
-  endif()
-
   include("${rapids-cmake-dir}/cpm/find.cmake")
   rapids_cpm_find(nvcomp ${version} ${find_args} GLOBAL_TARGETS nvcomp::nvcomp
                   CPM_ARGS ${cpm_find_info} OPTIONS "BUILD_STATIC ON" "BUILD_TESTS OFF"
 
@@ -50,14 +50,14 @@
       "git_tag": "4879607c7086f3ebae2f8b9655d0b920c41d22ef"
     },
     "nvcomp": {
-      "version": "4.2.0.11",
+      "version": "5.0.0.6",
       "git_shallow": false,
       "git_url": "https://github.com/NVIDIA/nvcomp.git",
       "git_tag": "a6e4e64a177e07cd2e5c8c5e07bb66ffefceae84",
       "proprietary_binary_cuda_version_mapping": {
         "11": "11",
         "12": "12",
-        "13": "12"
+        "13": "13"
       },
       "proprietary_binary": {
         "x86_64-linux": "https://developer.download.nvidia.com/compute/nvcomp/redist/nvcomp/linux-x86_64/nvcomp-linux-x86_64-${version}_cuda${cuda-toolkit-version-mapping}-archive.tar.xz",
 
@@ -36,35 +36,42 @@ function(rapids_cuda_detect_architectures possible_archs_var gpu_archs)
     file(WRITE ${eval_file}
          "
 #include <cstdio>
+#include <cuda_runtime.h>
 #include <set>
 #include <string>
-using namespace std;
-int main(int argc, char** argv) {
-  set<string> archs;
+
+int main(int argc, char** argv)
+{
+  std::set<std::string> archs;
   int nDevices;
-  if((cudaGetDeviceCount(&nDevices) == cudaSuccess) && (nDevices > 0)) {
-    for(int dev=0;dev<nDevices;++dev) {
+  if ((cudaGetDeviceCount(&nDevices) == cudaSuccess) && (nDevices > 0)) {
+    for (int dev = 0; dev < nDevices; ++dev) {
       char buff[32];
       cudaDeviceProp prop;
-      if(cudaGetDeviceProperties(&prop, dev) != cudaSuccess) continue;
-      sprintf(buff, \"%d%d\", prop.major, prop.minor);
+      if (cudaGetDeviceProperties(&prop, dev) != cudaSuccess) { continue; }
+      if (prop.major >= 9) {
+        // Enable chip specific optimizations for sm90+
+        sprintf(buff, \"%d%da-real\", prop.major, prop.minor);
+      } else {
+        sprintf(buff, \"%d%d-real\", prop.major, prop.minor);
+      }
       archs.insert(buff);
     }
   }
-  if(archs.empty()) {
+  if (archs.empty()) {
     printf(\"${__gpu_archs}\");
   } else {
     bool first = true;
-    for(const auto& arch : archs) {
-      printf(first? \"%s\" : \";%s\", arch.c_str());
+    for (const auto& arch : archs) {
+      printf(first ? \"%s\" : \";%s\", arch.c_str());
       first = false;
     }
   }
   printf(\"\\n\");
   return 0;
-  }
+}
   ")
-    execute_process(COMMAND ${CMAKE_CUDA_COMPILER} -std=c++11 -o "${eval_exe}" "${eval_file}"
+    execute_process(COMMAND ${CMAKE_CUDA_COMPILER} -std=c++17 -o "${eval_exe}" "${eval_file}"
                     ERROR_FILE "${error_file}")
   endif()
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1 @@`
	`1`	`+.. cmake-module:: ../../rapids-cmake/cuda/enable_fatbin_compression.cmake`