Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 16 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -201,6 +201,22 @@ list(GET CMAKE_CUDA_ARCHITECTURES 0 MIN_GPU_ARCH)

string(REPLACE "-virtual" "" MIN_GPU_ARCH "${MIN_GPU_ARCH}")

if (MIN_GPU_ARCH EQUAL 61 OR MIN_GPU_ARCH LESS_EQUAL 52)
set(TCNN_HALF_PRECISION_DEFAULT OFF)
else()
set(TCNN_HALF_PRECISION_DEFAULT ON)
endif()

option(TCNN_HALF_PRECISION "Enable half precision (FP16) arithmetic" ${TCNN_HALF_PRECISION_DEFAULT})

if (TCNN_HALF_PRECISION)
list(APPEND TCNN_DEFINITIONS -DTCNN_HALF_PRECISION=1)
message(STATUS "TCNN_HALF_PRECISION: ON")
else()
list(APPEND TCNN_DEFINITIONS -DTCNN_HALF_PRECISION=0)
message(STATUS "TCNN_HALF_PRECISION: OFF")
endif()

message(STATUS "Targeting CUDA architectures: ${CMAKE_CUDA_ARCHITECTURES}")
if (TCNN_HAS_PARENT)
set(TCNN_CUDA_ARCHITECTURES ${CMAKE_CUDA_ARCHITECTURES} PARENT_SCOPE)
Expand Down
14 changes: 14 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -220,6 +220,20 @@ tiny-cuda-nn$ cd bindings/torch
tiny-cuda-nn/bindings/torch$ python setup.py install
```

By default, the extension automatically enables half precision (FP16) on GPUs with good support (Volta, Turing, Ampere, etc.) and disables it on older architectures or those with slow FP16 (e.g., Pascal/GTX 10-series).

If you wish to override this behavior (e.g., to force FP16 on unsupported hardware or disable it for debugging), set the TCNN_HALF_PRECISION environment variable before installation:

Disable FP16: 0
Enable FP16: 1

Example:
```sh
# Linux / macOS (Disable FP16)
export TCNN_HALF_PRECISION=0
pip install git+https://github.com/NVlabs/tiny-cuda-nn/#subdirectory=bindings/torch
```

Upon success, you can use __tiny-cuda-nn__ models as in the following example:
```py
import commentjson as json
Expand Down
12 changes: 12 additions & 0 deletions bindings/torch/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -146,6 +146,18 @@ def find_cl_path():
"-DTCNN_RTC_USE_FAST_MATH",
]

if "TCNN_HALF_PRECISION" in os.environ:
enable_half = os.environ["TCNN_HALF_PRECISION"].lower() in ["1", "true", "on", "yes"]
base_definitions.append(f"-DTCNN_HALF_PRECISION={int(enable_half)}")
print(f"Forcing TCNN_HALF_PRECISION to {'ON' if enable_half else 'OFF'}")
else:
if min_compute_capability == 61 or min_compute_capability <= 52:
enable_half = False
else:
enable_half = True
print(f"Auto-detecting TCNN_HALF_PRECISION: {'ON' if enable_half else 'OFF'} (Arch: {min_compute_capability})")
base_definitions.append(f"-DTCNN_HALF_PRECISION={int(enable_half)}")

base_source_files = [
"tinycudann/bindings.cpp",
"../../dependencies/fmt/src/format.cc",
Expand Down
4 changes: 3 additions & 1 deletion include/tiny-cuda-nn/common.h
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,9 @@ static constexpr bool PARAMS_ALIGNED = false;
static constexpr bool PARAMS_ALIGNED = true;
#endif

#define TCNN_HALF_PRECISION (!(TCNN_MIN_GPU_ARCH == 61 || TCNN_MIN_GPU_ARCH <= 52))
#ifndef TCNN_HALF_PRECISION
#error "TCNN_HALF_PRECISION is undefined. The build system must define this explicitly."
#endif

// TCNN has the following behavior depending on GPU arch.
// Refer to the first row of the table at the following URL for information about
Expand Down
Loading