From d916c5b1ab655a973f596178ef1857cd268da34f Mon Sep 17 00:00:00 2001
From: Brett Green <brett.green@quidient.com>
Date: Mon, 17 Nov 2025 11:44:50 -0500
Subject: [PATCH 1/2] Make FP precision configurable

Adds TCNN_HALF_PRECISION to cmakelists definition and removes from the header.
Instructions added for using an environment variable to change this for pip install clients.
---
 CMakeLists.txt                | 16 ++++++++++++++++
 README.md                     | 14 ++++++++++++++
 bindings/torch/setup.py       |  5 +++++
 include/tiny-cuda-nn/common.h |  2 --
 4 files changed, 35 insertions(+), 2 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 22e79366..dbabb309 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -201,6 +201,22 @@ list(GET CMAKE_CUDA_ARCHITECTURES 0 MIN_GPU_ARCH)
 
 string(REPLACE "-virtual" "" MIN_GPU_ARCH "${MIN_GPU_ARCH}")
 
+if (MIN_GPU_ARCH EQUAL 61 OR MIN_GPU_ARCH LESS_EQUAL 52)
+    set(TCNN_HALF_PRECISION_DEFAULT OFF)
+else()
+    set(TCNN_HALF_PRECISION_DEFAULT ON)
+endif()
+
+option(TCNN_HALF_PRECISION "Enable half precision (FP16) arithmetic" ${TCNN_HALF_PRECISION_DEFAULT})
+
+if (TCNN_HALF_PRECISION)
+    list(APPEND TCNN_DEFINITIONS -DTCNN_HALF_PRECISION=1)
+    message(STATUS "TCNN_HALF_PRECISION: ON")
+else()
+    list(APPEND TCNN_DEFINITIONS -DTCNN_HALF_PRECISION=0)
+    message(STATUS "TCNN_HALF_PRECISION: OFF")
+endif()
+
 message(STATUS "Targeting CUDA architectures: ${CMAKE_CUDA_ARCHITECTURES}")
 if (TCNN_HAS_PARENT)
 	set(TCNN_CUDA_ARCHITECTURES ${CMAKE_CUDA_ARCHITECTURES} PARENT_SCOPE)
diff --git a/README.md b/README.md
index 1fef2101..b524772f 100644
--- a/README.md
+++ b/README.md
@@ -220,6 +220,20 @@ tiny-cuda-nn$ cd bindings/torch
 tiny-cuda-nn/bindings/torch$ python setup.py install
 ```
 
+By default, the extension automatically enables half precision (FP16) on GPUs with good support (Volta, Turing, Ampere, etc.) and disables it on older architectures or those with slow FP16 (e.g., Pascal/GTX 10-series).
+
+If you wish to override this behavior (e.g., to force FP16 on unsupported hardware or disable it for debugging), set the TCNN_HALF_PRECISION environment variable before installation:
+
+Disable FP16: 0
+Enable FP16: 1
+
+Example:
+```sh
+# Linux / macOS (Disable FP16)
+export TCNN_HALF_PRECISION=0
+pip install git+https://github.com/NVlabs/tiny-cuda-nn/#subdirectory=bindings/torch
+```
+
 Upon success, you can use __tiny-cuda-nn__ models as in the following example:
 ```py
 import commentjson as json
diff --git a/bindings/torch/setup.py b/bindings/torch/setup.py
index 594b2f1f..ecea5112 100644
--- a/bindings/torch/setup.py
+++ b/bindings/torch/setup.py
@@ -146,6 +146,11 @@ def find_cl_path():
 	"-DTCNN_RTC_USE_FAST_MATH",
 ]
 
+if "TCNN_HALF_PRECISION" in os.environ:
+    force_half = os.environ["TCNN_HALF_PRECISION"].lower() in ["1", "true", "on", "yes"]
+    base_definitions.append(f"-DTCNN_HALF_PRECISION={int(force_half)}")
+    print(f"Forcing TCNN_HALF_PRECISION to {'ON' if force_half else 'OFF'}")
+
 base_source_files = [
 	"tinycudann/bindings.cpp",
 	"../../dependencies/fmt/src/format.cc",
diff --git a/include/tiny-cuda-nn/common.h b/include/tiny-cuda-nn/common.h
index abb3820c..19bef532 100644
--- a/include/tiny-cuda-nn/common.h
+++ b/include/tiny-cuda-nn/common.h
@@ -101,8 +101,6 @@ static constexpr bool PARAMS_ALIGNED = false;
 static constexpr bool PARAMS_ALIGNED = true;
 #endif
 
-#define TCNN_HALF_PRECISION (!(TCNN_MIN_GPU_ARCH == 61 || TCNN_MIN_GPU_ARCH <= 52))
-
 // TCNN has the following behavior depending on GPU arch.
 // Refer to the first row of the table at the following URL for information about
 // when to pick fp16 versus fp32 precision for maximum performance.

From 7a9d1974348df5a89d0e7744919b754b0689e2a2 Mon Sep 17 00:00:00 2001
From: Brett Green <brett.green@quidient.com>
Date: Mon, 17 Nov 2025 12:33:10 -0500
Subject: [PATCH 2/2] add explicit printing on TCNN_HALF_PRECISION

---
 bindings/torch/setup.py       | 13 ++++++++++---
 include/tiny-cuda-nn/common.h |  4 ++++
 2 files changed, 14 insertions(+), 3 deletions(-)

diff --git a/bindings/torch/setup.py b/bindings/torch/setup.py
index ecea5112..041936c4 100644
--- a/bindings/torch/setup.py
+++ b/bindings/torch/setup.py
@@ -147,9 +147,16 @@ def find_cl_path():
 ]
 
 if "TCNN_HALF_PRECISION" in os.environ:
-    force_half = os.environ["TCNN_HALF_PRECISION"].lower() in ["1", "true", "on", "yes"]
-    base_definitions.append(f"-DTCNN_HALF_PRECISION={int(force_half)}")
-    print(f"Forcing TCNN_HALF_PRECISION to {'ON' if force_half else 'OFF'}")
+    enable_half = os.environ["TCNN_HALF_PRECISION"].lower() in ["1", "true", "on", "yes"]
+    base_definitions.append(f"-DTCNN_HALF_PRECISION={int(enable_half)}")
+    print(f"Forcing TCNN_HALF_PRECISION to {'ON' if enable_half else 'OFF'}")
+else:
+    if min_compute_capability == 61 or min_compute_capability <= 52:
+        enable_half = False
+    else:
+        enable_half = True
+    print(f"Auto-detecting TCNN_HALF_PRECISION: {'ON' if enable_half else 'OFF'} (Arch: {min_compute_capability})")
+base_definitions.append(f"-DTCNN_HALF_PRECISION={int(enable_half)}")
 
 base_source_files = [
 	"tinycudann/bindings.cpp",
diff --git a/include/tiny-cuda-nn/common.h b/include/tiny-cuda-nn/common.h
index 19bef532..ef36d86d 100644
--- a/include/tiny-cuda-nn/common.h
+++ b/include/tiny-cuda-nn/common.h
@@ -101,6 +101,10 @@ static constexpr bool PARAMS_ALIGNED = false;
 static constexpr bool PARAMS_ALIGNED = true;
 #endif
 
+#ifndef TCNN_HALF_PRECISION
+#error "TCNN_HALF_PRECISION is undefined. The build system must define this explicitly."
+#endif
+
 // TCNN has the following behavior depending on GPU arch.
 // Refer to the first row of the table at the following URL for information about
 // when to pick fp16 versus fp32 precision for maximum performance.