Skip to content

Commit 58180f3

Browse files
committed
Merge remote-tracking branch 'upstream/branch-25.10' into fetch-bitmap
2 parents ef87c32 + b7bed52 commit 58180f3

32 files changed

+581
-91
lines changed

.github/workflows/pr.yaml

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ jobs:
1717
- docs-build
1818
- telemetry-setup
1919
secrets: inherit
20-
uses: rapidsai/shared-workflows/.github/workflows/pr-builder.yaml@branch-25.10
20+
uses: rapidsai/shared-workflows/.github/workflows/pr-builder.yaml@cuda13.0
2121
telemetry-setup:
2222
runs-on: ubuntu-latest
2323
continue-on-error: true
@@ -32,18 +32,18 @@ jobs:
3232
checks:
3333
secrets: inherit
3434
needs: telemetry-setup
35-
uses: rapidsai/shared-workflows/.github/workflows/checks.yaml@branch-25.10
35+
uses: rapidsai/shared-workflows/.github/workflows/checks.yaml@cuda13.0
3636
with:
3737
ignored_pr_jobs: telemetry-summarize
3838
conda-cpp-tests:
3939
secrets: inherit
40-
uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-tests.yaml@branch-25.10
40+
uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-tests.yaml@cuda13.0
4141
with:
4242
build_type: pull-request
4343
script: ci/test_cpp.sh
4444
docs-build:
4545
secrets: inherit
46-
uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-25.10
46+
uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@cuda13.0
4747
with:
4848
build_type: pull-request
4949
node_type: "cpu4"

CHANGELOG.md

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,50 @@
1+
# rapids-cmake 25.08.00 (6 Aug 2025)
2+
3+
## 🚨 Breaking Changes
4+
5+
- rapids_cpm_cccl: Remove support for CCCL < 2.8 ([#859](https://github.com/rapidsai/rapids-cmake/pull/859)) [@robertmaynard](https://github.com/robertmaynard)
6+
- Remove CUDA 11 support ([#855](https://github.com/rapidsai/rapids-cmake/pull/855)) [@KyleFromNVIDIA](https://github.com/KyleFromNVIDIA)
7+
- Update to CCCL 3.0 ([#854](https://github.com/rapidsai/rapids-cmake/pull/854)) [@vyasr](https://github.com/vyasr)
8+
- Require cpp subdirectory for RMM ([#832](https://github.com/rapidsai/rapids-cmake/pull/832)) [@bdice](https://github.com/bdice)
9+
10+
## 🐛 Bug Fixes
11+
12+
- CCCL: disable PDL ([#876](https://github.com/rapidsai/rapids-cmake/pull/876)) [@bdice](https://github.com/bdice)
13+
- Use RMM main (new branching strategy) to fix downstream fetching issues ([#862](https://github.com/rapidsai/rapids-cmake/pull/862)) [@bdice](https://github.com/bdice)
14+
- rapids_cpm_cccl: Update to new location of cccl-config ([#858](https://github.com/rapidsai/rapids-cmake/pull/858)) [@robertmaynard](https://github.com/robertmaynard)
15+
- Remove CCCL patches that aren't used anymore ([#857](https://github.com/rapidsai/rapids-cmake/pull/857)) [@robertmaynard](https://github.com/robertmaynard)
16+
- Fetch the atomic fix in CCCL 3.0 ([#856](https://github.com/rapidsai/rapids-cmake/pull/856)) [@PointKernel](https://github.com/PointKernel)
17+
18+
## 📖 Documentation
19+
20+
- add docs on CI workflow inputs ([#868](https://github.com/rapidsai/rapids-cmake/pull/868)) [@jameslamb](https://github.com/jameslamb)
21+
22+
## 🚀 New Features
23+
24+
- Update CCCL version tag for PDL disable ([#879](https://github.com/rapidsai/rapids-cmake/pull/879)) [@davidwendt](https://github.com/davidwendt)
25+
- Use `RAPIDS_BRANCH` file to handle the new branching strategy ([#870](https://github.com/rapidsai/rapids-cmake/pull/870)) [@robertmaynard](https://github.com/robertmaynard)
26+
- rapids-cmake: Add support for a version suffix to mean using main ([#864](https://github.com/rapidsai/rapids-cmake/pull/864)) [@robertmaynard](https://github.com/robertmaynard)
27+
- rapids_cpm_cccl: Remove support for CCCL < 2.8 ([#859](https://github.com/rapidsai/rapids-cmake/pull/859)) [@robertmaynard](https://github.com/robertmaynard)
28+
- Update to CCCL 3.0 ([#831](https://github.com/rapidsai/rapids-cmake/pull/831)) [@bdice](https://github.com/bdice)
29+
30+
## 🛠️ Improvements
31+
32+
- Update to CCCL v3.0.2 ([#878](https://github.com/rapidsai/rapids-cmake/pull/878)) [@bdice](https://github.com/bdice)
33+
- fix(docker): use versioned `-latest` tag for all `rapidsai` images ([#871](https://github.com/rapidsai/rapids-cmake/pull/871)) [@gforsyth](https://github.com/gforsyth)
34+
- Revert "Use RMM main (new branching strategy)" ([#869](https://github.com/rapidsai/rapids-cmake/pull/869)) [@robertmaynard](https://github.com/robertmaynard)
35+
- Rename `*.hpp.in` to `*.h.in` to signify that they are C headers ([#867](https://github.com/rapidsai/rapids-cmake/pull/867)) [@KyleFromNVIDIA](https://github.com/KyleFromNVIDIA)
36+
- refactor(shellcheck): enable for all files ([#866](https://github.com/rapidsai/rapids-cmake/pull/866)) [@gforsyth](https://github.com/gforsyth)
37+
- Remove nvidia and dask channels ([#865](https://github.com/rapidsai/rapids-cmake/pull/865)) [@vyasr](https://github.com/vyasr)
38+
- Upgrade cuCollections to fetch the new storage for better runtime performance ([#861](https://github.com/rapidsai/rapids-cmake/pull/861)) [@PointKernel](https://github.com/PointKernel)
39+
- Remove CUDA 11 support ([#855](https://github.com/rapidsai/rapids-cmake/pull/855)) [@KyleFromNVIDIA](https://github.com/KyleFromNVIDIA)
40+
- Update to CCCL 3.0 ([#854](https://github.com/rapidsai/rapids-cmake/pull/854)) [@vyasr](https://github.com/vyasr)
41+
- Deprecate fmt and spdlog ([#853](https://github.com/rapidsai/rapids-cmake/pull/853)) [@vyasr](https://github.com/vyasr)
42+
- Forward-merge branch-25.06 into branch-25.08 ([#848](https://github.com/rapidsai/rapids-cmake/pull/848)) [@gforsyth](https://github.com/gforsyth)
43+
- Update to NVTX 3.2.0. ([#844](https://github.com/rapidsai/rapids-cmake/pull/844)) [@bdice](https://github.com/bdice)
44+
- Forward-merge branch-25.06 into branch-25.08 ([#839](https://github.com/rapidsai/rapids-cmake/pull/839)) [@gforsyth](https://github.com/gforsyth)
45+
- Temporarily use patched CCCL ([#833](https://github.com/rapidsai/rapids-cmake/pull/833)) [@bdice](https://github.com/bdice)
46+
- Require cpp subdirectory for RMM ([#832](https://github.com/rapidsai/rapids-cmake/pull/832)) [@bdice](https://github.com/bdice)
47+
148
# rapids-cmake 25.06.00 (5 Jun 2025)
249

350
## 🚨 Breaking Changes

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,7 @@ The most commonly used function are:
7878

7979
- `rapids_cuda_init_architectures(<project_name>)` handles initialization of `CMAKE_CUDA_ARCHITECTURE`. MUST BE CALLED BEFORE `PROJECT()`
8080
- `rapids_cuda_init_runtime(<mode>)` handles initialization of `CMAKE_CUDA_RUNTIME_LIBRARY`.
81-
- `rapids_cuda_patch_toolkit()` corrects bugs in the CUDAToolkit module that are being upstreamed.
81+
- `rapids_cuda_enable_fatbin_compression()` handles the optimal compile flags for fatbin compression to make smaller binaries
8282

8383
### cython
8484

cmake-format-rapids-cmake.json

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -231,6 +231,16 @@
231231
"INSTALL_EXPORT_SET": 1
232232
}
233233
},
234+
"rapids_cuda_enable_fatbin_compression": {
235+
"pargs": {
236+
"nargs": 0
237+
},
238+
"kwargs": {
239+
"VARIABLE": 1,
240+
"TARGET": 1,
241+
"TUNE_FOR": 1
242+
}
243+
},
234244
"rapids_cuda_init_architectures": {
235245
"pargs": {
236246
"nargs": 1

dependencies.yaml

Lines changed: 18 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@ dependencies:
3434
packages:
3535
- c-compiler
3636
- cxx-compiler
37+
- cuda-nvcc
3738
- make
3839
specific:
3940
- output_types: conda
@@ -43,9 +44,17 @@ dependencies:
4344
packages:
4445
- gcc<13.0.0
4546
- matrix:
46-
cuda: "12.*"
47+
cuda: "12.[456]"
4748
packages:
4849
- gcc<14.0.0
50+
- matrix:
51+
cuda: "12.[89]"
52+
packages:
53+
- gcc<15.0.0
54+
- matrix:
55+
cuda: "13.*"
56+
packages:
57+
- gcc<16.0.0
4958
- output_types: conda
5059
matrices:
5160
- matrix:
@@ -56,12 +65,7 @@ dependencies:
5665
arch: aarch64
5766
packages:
5867
- sysroot_linux-aarch64==2.28
59-
- output_types: conda
60-
matrices:
61-
- matrix:
62-
cuda: "12.*"
63-
packages:
64-
- cuda-nvcc
68+
6569
cuda_version:
6670
specific:
6771
- output_types: conda
@@ -86,14 +90,15 @@ dependencies:
8690
cuda: "12.9"
8791
packages:
8892
- cuda-version=12.9
89-
cuda:
90-
specific:
91-
- output_types: conda
92-
matrices:
9393
- matrix:
94-
cuda: "12.*"
94+
cuda: "13.0"
9595
packages:
96-
- cuda-cupti-dev
96+
- cuda-version=13.0
97+
cuda:
98+
common:
99+
- output_types: conda
100+
packages:
101+
- cuda-cupti-dev
97102
docs:
98103
common:
99104
- output_types: [conda]

docs/api.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -99,6 +99,7 @@ require.
9999
rapids_cuda_init_runtime </command/rapids_cuda_init_runtime>
100100
rapids_cuda_set_runtime </command/rapids_cuda_set_runtime>
101101
rapids_cuda_set_architectures [Advanced] </command/rapids_cuda_set_architectures>
102+
rapids_cuda_enable_fatbin_compression </command/rapids_cuda_enable_fatbin_compression>
102103

103104

104105
.. _`export`:
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
.. cmake-module:: ../../rapids-cmake/cuda/enable_fatbin_compression.cmake

rapids-cmake/cpm/nvcomp.cmake

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -166,15 +166,6 @@ function(rapids_cpm_nvcomp)
166166
endif()
167167
endif()
168168

169-
include("${rapids-cmake-dir}/cpm/detail/generate_patch_command.cmake")
170-
rapids_cpm_generate_patch_command(nvcomp ${version} patch_command build_patch_only)
171-
172-
# Apply any patch commands to the proprietary binary
173-
174-
if(nvcomp_proprietary_binary AND PATCH_COMMAND IN_LIST find_args)
175-
execute_process(COMMAND ${patch_command} WORKING_DIRECTORY ${nvcomp_ROOT})
176-
endif()
177-
178169
include("${rapids-cmake-dir}/cpm/find.cmake")
179170
rapids_cpm_find(nvcomp ${version} ${find_args} GLOBAL_TARGETS nvcomp::nvcomp
180171
CPM_ARGS ${cpm_find_info} OPTIONS "BUILD_STATIC ON" "BUILD_TESTS OFF"

rapids-cmake/cpm/versions.json

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -50,14 +50,14 @@
5050
"git_tag": "4879607c7086f3ebae2f8b9655d0b920c41d22ef"
5151
},
5252
"nvcomp": {
53-
"version": "4.2.0.11",
53+
"version": "5.0.0.6",
5454
"git_shallow": false,
5555
"git_url": "https://github.com/NVIDIA/nvcomp.git",
5656
"git_tag": "a6e4e64a177e07cd2e5c8c5e07bb66ffefceae84",
5757
"proprietary_binary_cuda_version_mapping": {
5858
"11": "11",
5959
"12": "12",
60-
"13": "12"
60+
"13": "13"
6161
},
6262
"proprietary_binary": {
6363
"x86_64-linux": "https://developer.download.nvidia.com/compute/nvcomp/redist/nvcomp/linux-x86_64/nvcomp-linux-x86_64-${version}_cuda${cuda-toolkit-version-mapping}-archive.tar.xz",

rapids-cmake/cuda/detail/detect_architectures.cmake

Lines changed: 19 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -36,35 +36,42 @@ function(rapids_cuda_detect_architectures possible_archs_var gpu_archs)
3636
file(WRITE ${eval_file}
3737
"
3838
#include <cstdio>
39+
#include <cuda_runtime.h>
3940
#include <set>
4041
#include <string>
41-
using namespace std;
42-
int main(int argc, char** argv) {
43-
set<string> archs;
42+
43+
int main(int argc, char** argv)
44+
{
45+
std::set<std::string> archs;
4446
int nDevices;
45-
if((cudaGetDeviceCount(&nDevices) == cudaSuccess) && (nDevices > 0)) {
46-
for(int dev=0;dev<nDevices;++dev) {
47+
if ((cudaGetDeviceCount(&nDevices) == cudaSuccess) && (nDevices > 0)) {
48+
for (int dev = 0; dev < nDevices; ++dev) {
4749
char buff[32];
4850
cudaDeviceProp prop;
49-
if(cudaGetDeviceProperties(&prop, dev) != cudaSuccess) continue;
50-
sprintf(buff, \"%d%d\", prop.major, prop.minor);
51+
if (cudaGetDeviceProperties(&prop, dev) != cudaSuccess) { continue; }
52+
if (prop.major >= 9) {
53+
// Enable chip specific optimizations for sm90+
54+
sprintf(buff, \"%d%da-real\", prop.major, prop.minor);
55+
} else {
56+
sprintf(buff, \"%d%d-real\", prop.major, prop.minor);
57+
}
5158
archs.insert(buff);
5259
}
5360
}
54-
if(archs.empty()) {
61+
if (archs.empty()) {
5562
printf(\"${__gpu_archs}\");
5663
} else {
5764
bool first = true;
58-
for(const auto& arch : archs) {
59-
printf(first? \"%s\" : \";%s\", arch.c_str());
65+
for (const auto& arch : archs) {
66+
printf(first ? \"%s\" : \";%s\", arch.c_str());
6067
first = false;
6168
}
6269
}
6370
printf(\"\\n\");
6471
return 0;
65-
}
72+
}
6673
")
67-
execute_process(COMMAND ${CMAKE_CUDA_COMPILER} -std=c++11 -o "${eval_exe}" "${eval_file}"
74+
execute_process(COMMAND ${CMAKE_CUDA_COMPILER} -std=c++17 -o "${eval_exe}" "${eval_file}"
6875
ERROR_FILE "${error_file}")
6976
endif()
7077

0 commit comments

Comments
 (0)