From d916c5b1ab655a973f596178ef1857cd268da34f Mon Sep 17 00:00:00 2001 From: Brett Green Date: Mon, 17 Nov 2025 11:44:50 -0500 Subject: [PATCH 1/2] Make FP precision configurable Adds TCNN_HALF_PRECISION to cmakelists definition and removes from the header. Instructions added for using an environment variable to change this for pip install clients. --- CMakeLists.txt | 16 ++++++++++++++++ README.md | 14 ++++++++++++++ bindings/torch/setup.py | 5 +++++ include/tiny-cuda-nn/common.h | 2 -- 4 files changed, 35 insertions(+), 2 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 22e79366..dbabb309 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -201,6 +201,22 @@ list(GET CMAKE_CUDA_ARCHITECTURES 0 MIN_GPU_ARCH) string(REPLACE "-virtual" "" MIN_GPU_ARCH "${MIN_GPU_ARCH}") +if (MIN_GPU_ARCH EQUAL 61 OR MIN_GPU_ARCH LESS_EQUAL 52) + set(TCNN_HALF_PRECISION_DEFAULT OFF) +else() + set(TCNN_HALF_PRECISION_DEFAULT ON) +endif() + +option(TCNN_HALF_PRECISION "Enable half precision (FP16) arithmetic" ${TCNN_HALF_PRECISION_DEFAULT}) + +if (TCNN_HALF_PRECISION) + list(APPEND TCNN_DEFINITIONS -DTCNN_HALF_PRECISION=1) + message(STATUS "TCNN_HALF_PRECISION: ON") +else() + list(APPEND TCNN_DEFINITIONS -DTCNN_HALF_PRECISION=0) + message(STATUS "TCNN_HALF_PRECISION: OFF") +endif() + message(STATUS "Targeting CUDA architectures: ${CMAKE_CUDA_ARCHITECTURES}") if (TCNN_HAS_PARENT) set(TCNN_CUDA_ARCHITECTURES ${CMAKE_CUDA_ARCHITECTURES} PARENT_SCOPE) diff --git a/README.md b/README.md index 1fef2101..b524772f 100644 --- a/README.md +++ b/README.md @@ -220,6 +220,20 @@ tiny-cuda-nn$ cd bindings/torch tiny-cuda-nn/bindings/torch$ python setup.py install ``` +By default, the extension automatically enables half precision (FP16) on GPUs with good support (Volta, Turing, Ampere, etc.) and disables it on older architectures or those with slow FP16 (e.g., Pascal/GTX 10-series). + +If you wish to override this behavior (e.g., to force FP16 on unsupported hardware or disable it for debugging), set the TCNN_HALF_PRECISION environment variable before installation: + +Disable FP16: 0 +Enable FP16: 1 + +Example: +```sh +# Linux / macOS (Disable FP16) +export TCNN_HALF_PRECISION=0 +pip install git+https://github.com/NVlabs/tiny-cuda-nn/#subdirectory=bindings/torch +``` + Upon success, you can use __tiny-cuda-nn__ models as in the following example: ```py import commentjson as json diff --git a/bindings/torch/setup.py b/bindings/torch/setup.py index 594b2f1f..ecea5112 100644 --- a/bindings/torch/setup.py +++ b/bindings/torch/setup.py @@ -146,6 +146,11 @@ def find_cl_path(): "-DTCNN_RTC_USE_FAST_MATH", ] +if "TCNN_HALF_PRECISION" in os.environ: + force_half = os.environ["TCNN_HALF_PRECISION"].lower() in ["1", "true", "on", "yes"] + base_definitions.append(f"-DTCNN_HALF_PRECISION={int(force_half)}") + print(f"Forcing TCNN_HALF_PRECISION to {'ON' if force_half else 'OFF'}") + base_source_files = [ "tinycudann/bindings.cpp", "../../dependencies/fmt/src/format.cc", diff --git a/include/tiny-cuda-nn/common.h b/include/tiny-cuda-nn/common.h index abb3820c..19bef532 100644 --- a/include/tiny-cuda-nn/common.h +++ b/include/tiny-cuda-nn/common.h @@ -101,8 +101,6 @@ static constexpr bool PARAMS_ALIGNED = false; static constexpr bool PARAMS_ALIGNED = true; #endif -#define TCNN_HALF_PRECISION (!(TCNN_MIN_GPU_ARCH == 61 || TCNN_MIN_GPU_ARCH <= 52)) - // TCNN has the following behavior depending on GPU arch. // Refer to the first row of the table at the following URL for information about // when to pick fp16 versus fp32 precision for maximum performance. From 7a9d1974348df5a89d0e7744919b754b0689e2a2 Mon Sep 17 00:00:00 2001 From: Brett Green Date: Mon, 17 Nov 2025 12:33:10 -0500 Subject: [PATCH 2/2] add explicit printing on TCNN_HALF_PRECISION --- bindings/torch/setup.py | 13 ++++++++++--- include/tiny-cuda-nn/common.h | 4 ++++ 2 files changed, 14 insertions(+), 3 deletions(-) diff --git a/bindings/torch/setup.py b/bindings/torch/setup.py index ecea5112..041936c4 100644 --- a/bindings/torch/setup.py +++ b/bindings/torch/setup.py @@ -147,9 +147,16 @@ def find_cl_path(): ] if "TCNN_HALF_PRECISION" in os.environ: - force_half = os.environ["TCNN_HALF_PRECISION"].lower() in ["1", "true", "on", "yes"] - base_definitions.append(f"-DTCNN_HALF_PRECISION={int(force_half)}") - print(f"Forcing TCNN_HALF_PRECISION to {'ON' if force_half else 'OFF'}") + enable_half = os.environ["TCNN_HALF_PRECISION"].lower() in ["1", "true", "on", "yes"] + base_definitions.append(f"-DTCNN_HALF_PRECISION={int(enable_half)}") + print(f"Forcing TCNN_HALF_PRECISION to {'ON' if enable_half else 'OFF'}") +else: + if min_compute_capability == 61 or min_compute_capability <= 52: + enable_half = False + else: + enable_half = True + print(f"Auto-detecting TCNN_HALF_PRECISION: {'ON' if enable_half else 'OFF'} (Arch: {min_compute_capability})") +base_definitions.append(f"-DTCNN_HALF_PRECISION={int(enable_half)}") base_source_files = [ "tinycudann/bindings.cpp", diff --git a/include/tiny-cuda-nn/common.h b/include/tiny-cuda-nn/common.h index 19bef532..ef36d86d 100644 --- a/include/tiny-cuda-nn/common.h +++ b/include/tiny-cuda-nn/common.h @@ -101,6 +101,10 @@ static constexpr bool PARAMS_ALIGNED = false; static constexpr bool PARAMS_ALIGNED = true; #endif +#ifndef TCNN_HALF_PRECISION +#error "TCNN_HALF_PRECISION is undefined. The build system must define this explicitly." +#endif + // TCNN has the following behavior depending on GPU arch. // Refer to the first row of the table at the following URL for information about // when to pick fp16 versus fp32 precision for maximum performance.