diff --git a/examples_tests b/examples_tests index 82bf36b2a0..498ffd21a0 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit 82bf36b2a0cc3f0b02d19f91487352223937d22d +Subproject commit 498ffd21a06b9e9c74d20b37860421d17fe7cf49 diff --git a/include/nbl/asset/utils/IMeshPacker.h b/include/nbl/asset/utils/IMeshPacker.h index 3f09062b18..355d792782 100644 --- a/include/nbl/asset/utils/IMeshPacker.h +++ b/include/nbl/asset/utils/IMeshPacker.h @@ -6,7 +6,7 @@ #define __NBL_ASSET_I_MESH_PACKER_H_INCLUDED__ #include "nbl/asset/utils/IMeshManipulator.h" -#include "nbl/core/math/morton.h" +#include "nbl/builtin/hlsl/math/morton.hlsl" namespace nbl { diff --git a/include/nbl/builtin/hlsl/luma_meter/common.hlsl b/include/nbl/builtin/hlsl/luma_meter/common.hlsl new file mode 100644 index 0000000000..55d1713619 --- /dev/null +++ b/include/nbl/builtin/hlsl/luma_meter/common.hlsl @@ -0,0 +1,35 @@ +// Copyright (C) 2018-2024 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h + +#ifndef _NBL_BUILTIN_HLSL_LUMA_METER_COMMON_INCLUDED_ +#define _NBL_BUILTIN_HLSL_LUMA_METER_COMMON_INCLUDED_ + +#include "nbl/builtin/hlsl/cpp_compat.hlsl" + +namespace nbl +{ +namespace hlsl +{ +namespace luma_meter +{ + +struct MeteringWindow +{ + using this_t = MeteringWindow; + float32_t2 meteringWindowScale; + float32_t2 meteringWindowOffset; + + static this_t create(float32_t2 scale, float32_t2 offset) { + this_t retval; + retval.meteringWindowScale = scale; + retval.meteringWindowOffset = offset; + return retval; + } +}; + +} +} +} + +#endif \ No newline at end of file diff --git a/include/nbl/builtin/hlsl/luma_meter/luma_meter.hlsl b/include/nbl/builtin/hlsl/luma_meter/luma_meter.hlsl new file mode 100644 index 0000000000..20af804603 --- /dev/null +++ b/include/nbl/builtin/hlsl/luma_meter/luma_meter.hlsl @@ -0,0 +1,287 @@ +// Copyright (C) 2018-2024 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h + +#ifndef _NBL_BUILTIN_HLSL_LUMA_METER_INCLUDED_ +#define _NBL_BUILTIN_HLSL_LUMA_METER_INCLUDED_ + +#include "nbl/builtin/hlsl/glsl_compat/core.hlsl" +#include "nbl/builtin/hlsl/glsl_compat/subgroup_basic.hlsl" +#include "nbl/builtin/hlsl/glsl_compat/subgroup_arithmetic.hlsl" +#include "nbl/builtin/hlsl/workgroup/basic.hlsl" +#include "nbl/builtin/hlsl/workgroup/arithmetic.hlsl" +#include "nbl/builtin/hlsl/type_traits.hlsl" +#include "nbl/builtin/hlsl/math/morton.hlsl" +#include "nbl/builtin/hlsl/luma_meter/common.hlsl" + +namespace nbl +{ +namespace hlsl +{ +namespace luma_meter +{ + +template +struct geom_meter { + using float_t = typename SharedAccessor::type; + using float_t2 = typename conditional, float32_t2, float16_t2>::type; + using float_t3 = typename conditional, float32_t3, float16_t3>::type; + using this_t = geom_meter; + + static this_t create(float_t2 lumaMinMax, float_t sampleCount) + { + this_t retval; + retval.lumaMinMax = lumaMinMax; + retval.sampleCount = sampleCount; + return retval; + } + + float_t __reduction(float_t value, NBL_REF_ARG(SharedAccessor) sdata) + { + return workgroup::reduction < plus < float_t >, GroupSize >:: + template __call (value, sdata); + } + + float_t __computeLumaLog2( + NBL_CONST_REF_ARG(MeteringWindow) window, + NBL_REF_ARG(TexAccessor) tex, + float_t2 shiftedCoord + ) + { + float_t2 uvPos = shiftedCoord * window.meteringWindowScale + window.meteringWindowOffset; + float_t3 color = tex.get(uvPos); + float_t luma = (float_t)TexAccessor::toXYZ(color); + + luma = clamp(luma, lumaMinMax.x, lumaMinMax.y); + + return log2(luma); + } + + void __uploadFloat( + NBL_REF_ARG(ValueAccessor) val_accessor, + float_t val, + float_t minLog2, + float_t rangeLog2 + ) + { + uint32_t3 workGroupCount = glsl::gl_NumWorkGroups(); + uint32_t workgroupIndex = (workGroupCount.x * workGroupCount.y * workGroupCount.z) / 64; + uint32_t fixedPointBitsLeft = 32 - uint32_t(ceil(log2(workGroupCount.x * workGroupCount.y * workGroupCount.z))) + glsl::gl_SubgroupSizeLog2(); + + uint32_t lumaSumBitPattern = uint32_t(clamp((val - minLog2) * rangeLog2, 0.f, float32_t((1 << fixedPointBitsLeft) - 1))); + + val_accessor.atomicAdd(workgroupIndex & ((1 << glsl::gl_SubgroupSizeLog2()) - 1), lumaSumBitPattern); + } + + float_t __downloadFloat( + NBL_REF_ARG(ValueAccessor) val_accessor, + uint32_t index, + float_t minLog2, + float_t rangeLog2 + ) + { + float_t luma = (float_t)val_accessor.get(index & ((1 << glsl::gl_SubgroupSizeLog2()) - 1)); + return luma / rangeLog2 + minLog2; + } + + void sampleLuma( + NBL_CONST_REF_ARG(MeteringWindow) window, + NBL_REF_ARG(ValueAccessor) val, + NBL_REF_ARG(TexAccessor) tex, + NBL_REF_ARG(SharedAccessor) sdata, + float_t2 tileOffset, + float_t2 viewportSize + ) + { + uint32_t tid = workgroup::SubgroupContiguousIndex(); + uint32_t2 coord = { + morton2d_decode_x(tid), + morton2d_decode_y(tid) + }; + + float_t luma = 0.0f; + float_t2 shiftedCoord = (tileOffset + (float32_t2)(coord)) / viewportSize; + float_t lumaLog2 = __computeLumaLog2(window, tex, shiftedCoord); + float_t lumaLog2Sum = __reduction(lumaLog2, sdata); + + if (tid == 0) { + __uploadFloat( + val, + lumaLog2Sum, + log2(lumaMinMax.x), + log2(lumaMinMax.y / lumaMinMax.x) + ); + } + } + + float_t gatherLuma( + NBL_REF_ARG(ValueAccessor) val + ) + { + uint32_t tid = glsl::gl_SubgroupInvocationID(); + float_t luma = glsl::subgroupAdd( + __downloadFloat( + val, + tid, + log2(lumaMinMax.x), + log2(lumaMinMax.y / lumaMinMax.x) + ) + ); + + uint32_t3 workGroupCount = glsl::gl_NumWorkGroups(); + uint32_t fixedPointBitsLeft = 32 - uint32_t(ceil(log2(workGroupCount.x * workGroupCount.y * workGroupCount.z))) + glsl::gl_SubgroupSizeLog2(); + + return (luma / (1 << fixedPointBitsLeft)) / sampleCount; + } + + float_t sampleCount; + float_t2 lumaMinMax; +}; + +template +struct median_meter { + using int_t = typename SharedAccessor::type; + using float_t = float32_t; + using float_t2 = typename conditional, float32_t2, float16_t2>::type; + using float_t3 = typename conditional, float32_t3, float16_t3>::type; + using this_t = median_meter; + + static this_t create(float_t2 lumaMinMax) { + this_t retval; + retval.lumaMinMax = lumaMinMax; + return retval; + } + + int_t __inclusive_scan(float_t value, NBL_REF_ARG(SharedAccessor) sdata) { + return workgroup::inclusive_scan < plus < int_t >, GroupSize >:: + template __call (value, sdata); + } + + float_t __computeLuma( + NBL_CONST_REF_ARG(MeteringWindow) window, + NBL_REF_ARG(TexAccessor) tex, + float_t2 shiftedCoord + ) { + float_t2 uvPos = shiftedCoord * window.meteringWindowScale + window.meteringWindowOffset; + float_t3 color = tex.get(uvPos); + float_t luma = (float_t)TexAccessor::toXYZ(color); + + return clamp(luma, lumaMinMax.x, lumaMinMax.y); + } + + int_t __float2Int( + float_t val, + float_t minLog2, + float_t rangeLog2 + ) { + uint32_t3 workGroupCount = glsl::gl_NumWorkGroups(); + uint32_t fixedPointBitsLeft = 32 - uint32_t(ceil(log2(workGroupCount.x * workGroupCount.y * workGroupCount.z))) + glsl::gl_SubgroupSizeLog2(); + + return int_t(clamp((val - minLog2) * rangeLog2, 0.f, float32_t((1 << fixedPointBitsLeft) - 1))); + } + + float_t __int2Float( + int_t val, + float_t minLog2, + float_t rangeLog2 + ) { + return val / rangeLog2 + minLog2; + } + + void sampleLuma( + NBL_CONST_REF_ARG(MeteringWindow) window, + NBL_REF_ARG(HistogramAccessor) histo, + NBL_REF_ARG(TexAccessor) tex, + NBL_REF_ARG(SharedAccessor) sdata, + float_t2 tileOffset, + float_t2 viewportSize + ) { + uint32_t tid = workgroup::SubgroupContiguousIndex(); + + for (uint32_t vid = tid; vid < BinCount; vid += GroupSize) { + sdata.set(vid, 0); + } + + sdata.workgroupExecutionAndMemoryBarrier(); + + uint32_t2 coord = { + morton2d_decode_x(tid), + morton2d_decode_y(tid) + }; + + float_t luma = 0.0f; + float_t2 shiftedCoord = (tileOffset + (float32_t2)(coord)) / viewportSize; + luma = __computeLuma(window, tex, shiftedCoord); + + float_t binSize = (lumaMinMax.y - lumaMinMax.x) / BinCount; + uint32_t binIndex = (uint32_t)((luma - lumaMinMax.x) / binSize); + + sdata.atomicAdd(binIndex, float2Int(luma, lumaMinMax.x, lumaMinMax.y - lumaMinMax.x)); + + sdata.workgroupExecutionAndMemoryBarrier(); + + float_t histogram_value; + sdata.get(tid, histogram_value); + + sdata.workgroupExecutionAndMemoryBarrier(); + + float_t sum = inclusive_scan(histogram_value, sdata); + histo.atomicAdd(tid, float2Int(sum, lumaMinMax.x, lumaMinMax.y - lumaMinMax.x)); + + const bool is_last_wg_invocation = tid == (GroupSize - 1); + const static uint32_t RoundedBinCount = 1 + (BinCount - 1) / GroupSize; + + for (int i = 1; i < RoundedBinCount; i++) { + uint32_t keyBucketStart = GroupSize * i; + uint32_t vid = tid + keyBucketStart; + + // no if statement about the last iteration needed + if (is_last_wg_invocation) { + float_t beforeSum; + sdata.get(keyBucketStart, beforeSum); + sdata.set(keyBucketStart, beforeSum + sum); + } + + // propagate last block tail to next block head and protect against subsequent scans stepping on each other's toes + sdata.workgroupExecutionAndMemoryBarrier(); + + // no aliasing anymore + float_t atVid; + sdata.get(vid, atVid); + sum = inclusive_scan(atVid, sdata); + if (vid < BinCount) { + histo.atomicAdd(vid, __float2Int(sum, lumaMinMax.x, lumaMinMax.y - lumaMinMax.x)); + } + } + } + + float_t gatherLuma( + NBL_REF_ARG(HistogramAccessor) histo, + NBL_REF_ARG(SharedAccessor) sdata + ) { + uint32_t tid = workgroup::SubgroupContiguousIndex(); + + for (uint32_t vid = tid; vid < BinCount; vid += GroupSize) { + sdata.set( + vid, + histo.get(vid & (BinCount - 1)) + ); + } + + sdata.workgroupExecutionAndMemoryBarrier(); + + uint32_t percentile40, percentile60; + sdata.get(BinCount * 0.4, percentile40); + sdata.get(BinCount * 0.6, percentile60); + + return (__int2Float(percentile40, lumaMinMax.x, lumaMinMax.y - lumaMinMax.x) + __int2Float(percentile60, lumaMinMax.x, lumaMinMax.y - lumaMinMax.x)) / 2; + } + + float_t2 lumaMinMax; +}; + +} +} +} + +#endif \ No newline at end of file diff --git a/include/nbl/builtin/hlsl/math/morton.hlsl b/include/nbl/builtin/hlsl/math/morton.hlsl new file mode 100644 index 0000000000..c0769fc88b --- /dev/null +++ b/include/nbl/builtin/hlsl/math/morton.hlsl @@ -0,0 +1,160 @@ +// Copyright (C) 2018-2024 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h + +#ifndef _NBL_BUILTIN_HLSL_MORTON_INCLUDED_ +#define _NBL_BUILTIN_HLSL_MORTON_INCLUDED_ + +#ifdef __HLSL_VERSION +#include "nbl/builtin/hlsl/cpp_compat.hlsl" +#else +#include +#endif + +namespace nbl +{ +namespace hlsl +{ + +namespace impl +{ + +template +NBL_CONSTEXPR_FUNC T morton2d_mask(uint16_t _n) +{ + const static uint64_t mask[5] = + { + 0x5555555555555555ull, + 0x3333333333333333ull, + 0x0F0F0F0F0F0F0F0Full, + 0x00FF00FF00FF00FFull, + 0x0000FFFF0000FFFFull + }; + return nbl::hlsl::_static_cast(mask[_n]); +} + +template +NBL_CONSTEXPR_FUNC T morton3d_mask(uint16_t _n) +{ + const static uint64_t mask[5] = + { + 0x1249249249249249ull, + 0x10C30C30C30C30C3ull, + 0x010F00F00F00F00Full, + 0x001F0000FF0000FFull, + 0x001F00000000FFFFull + }; + return nbl::hlsl::_static_cast(mask[_n]); +} +template +NBL_CONSTEXPR_FUNC T morton4d_mask(uint16_t _n) +{ + const static uint64_t mask[4] = + { + 0x1111111111111111ull, + 0x0303030303030303ull, + 0x000F000F000F000Full, + 0x000000FF000000FFull + }; + return nbl::hlsl::_static_cast(mask[_n]); +} + +template +inline T morton2d_decode(T x) +{ + x = x & morton2d_mask(0); + x = (x | (x >> 1)) & morton2d_mask(1); + x = (x | (x >> 2)) & morton2d_mask(2); + if (bitDepth > 8u) + { + x = (x | (x >> 4)) & morton2d_mask(3); + } + if (bitDepth > 16u) + { + x = (x | (x >> 8)) & morton2d_mask(4); + } + if (bitDepth > 32u) + { + x = (x | (x >> 16)); + } + return x; +} + +//! Puts bits on even positions filling gaps with 0s +template +inline T separate_bits_2d(T x) +{ + if (bitDepth > 32u) + { + x = (x | (x << 16)) & morton2d_mask(4); + } + if (bitDepth > 16u) + { + x = (x | (x << 8)) & morton2d_mask(3); + } + if (bitDepth > 8u) + { + x = (x | (x << 4)) & morton2d_mask(2); + } + x = (x | (x << 2)) & morton2d_mask(1); + x = (x | (x << 1)) & morton2d_mask(0); + + return x; +} +template +inline T separate_bits_3d(T x) +{ + if (bitDepth > 32u) + { + x = (x | (x << 32)) & morton3d_mask(4); + } + if (bitDepth > 16u) + { + x = (x | (x << 16)) & morton3d_mask(3); + } + if (bitDepth > 8u) + { + x = (x | (x << 8)) & morton3d_mask(2); + } + x = (x | (x << 4)) & morton3d_mask(1); + x = (x | (x << 2)) & morton3d_mask(0); + + return x; +} +template +inline T separate_bits_4d(T x) +{ + if (bitDepth > 32u) + { + x = (x | (x << 24)) & morton4d_mask(3); + } + if (bitDepth > 16u) + { + x = (x | (x << 12)) & morton4d_mask(2); + } + if (bitDepth > 8u) + { + x = (x | (x << 6)) & morton4d_mask(1); + } + x = (x | (x << 3)) & morton4d_mask(0); + + return x; +} +} + +template +T morton2d_decode_x(T _morton) { return impl::morton2d_decode(_morton); } +template +T morton2d_decode_y(T _morton) { return impl::morton2d_decode(_morton >> 1); } + +template +T morton2d_encode(T x, T y) { return impl::separate_bits_2d(x) | (impl::separate_bits_2d(y) << 1); } +template +T morton3d_encode(T x, T y, T z) { return impl::separate_bits_3d(x) | (impl::separate_bits_3d(y) << 1) | (impl::separate_bits_3d(z) << 2); } +template +T morton4d_encode(T x, T y, T z, T w) { return impl::separate_bits_4d(x) | (impl::separate_bits_4d(y) << 1) | (impl::separate_bits_4d(z) << 2) | (impl::separate_bits_4d(w) << 3); } + +} +} + +#endif \ No newline at end of file diff --git a/include/nbl/builtin/hlsl/spirv_intrinsics/core.hlsl b/include/nbl/builtin/hlsl/spirv_intrinsics/core.hlsl index 2ecb08cdb2..973a313e9c 100644 --- a/include/nbl/builtin/hlsl/spirv_intrinsics/core.hlsl +++ b/include/nbl/builtin/hlsl/spirv_intrinsics/core.hlsl @@ -61,37 +61,45 @@ pointer_t copyObject([[vk::ext_reference]] T v); // Here's the thing with atomics, it's not only the data type that dictates whether you can do an atomic or not. // It's the storage class that has the most effect (shared vs storage vs image) and we can't check that easily template // integers operate on 2s complement so same op for signed and unsigned +[[vk::ext_capability(spv::CapabilityPhysicalStorageBufferAddresses)]] [[vk::ext_instruction(spv::OpAtomicIAdd)]] enable_if_t || is_same_v, T> atomicIAdd([[vk::ext_reference]] T ptr, uint32_t memoryScope, uint32_t memorySemantics, T value); template // DXC Workaround +[[vk::ext_capability(spv::CapabilityPhysicalStorageBufferAddresses)]] [[vk::ext_instruction(spv::OpAtomicIAdd)]] enable_if_t && (is_same_v || is_same_v), T> atomicIAdd(Ptr_T ptr, uint32_t memoryScope, uint32_t memorySemantics, T value); template // integers operate on 2s complement so same op for signed and unsigned +[[vk::ext_capability(spv::CapabilityPhysicalStorageBufferAddresses)]] [[vk::ext_capability(spv::CapabilityInt64Atomics)]] [[vk::ext_instruction(spv::OpAtomicIAdd)]] enable_if_t || is_same_v, T> atomicIAdd([[vk::ext_reference]] T ptr, uint32_t memoryScope, uint32_t memorySemantics, T value); template // DXC Workaround +[[vk::ext_capability(spv::CapabilityPhysicalStorageBufferAddresses)]] [[vk::ext_capability(spv::CapabilityInt64Atomics)]] [[vk::ext_instruction(spv::OpAtomicIAdd)]] enable_if_t && (is_same_v || is_same_v), T> atomicIAdd(Ptr_T ptr, uint32_t memoryScope, uint32_t memorySemantics, T value); template // integers operate on 2s complement so same op for signed and unsigned +[[vk::ext_capability(spv::CapabilityPhysicalStorageBufferAddresses)]] [[vk::ext_instruction(spv::OpAtomicISub)]] enable_if_t || is_same_v, T> atomicISub([[vk::ext_reference]] T ptr, uint32_t memoryScope, uint32_t memorySemantics, T value); template // DXC Workaround +[[vk::ext_capability(spv::CapabilityPhysicalStorageBufferAddresses)]] [[vk::ext_instruction(spv::OpAtomicISub)]] enable_if_t && (is_same_v || is_same_v), T> atomicISub(Ptr_T ptr, uint32_t memoryScope, uint32_t memorySemantics, T value); template // integers operate on 2s complement so same op for signed and unsigned +[[vk::ext_capability(spv::CapabilityPhysicalStorageBufferAddresses)]] [[vk::ext_capability(spv::CapabilityInt64Atomics)]] [[vk::ext_instruction(spv::OpAtomicISub)]] enable_if_t || is_same_v, T> atomicISub([[vk::ext_reference]] T ptr, uint32_t memoryScope, uint32_t memorySemantics, T value); template // DXC Workaround +[[vk::ext_capability(spv::CapabilityPhysicalStorageBufferAddresses)]] [[vk::ext_capability(spv::CapabilityInt64Atomics)]] [[vk::ext_instruction(spv::OpAtomicISub)]] enable_if_t && (is_same_v || is_same_v), T> atomicISub(Ptr_T ptr, uint32_t memoryScope, uint32_t memorySemantics, T value); diff --git a/include/nbl/builtin/hlsl/tonemapper/operators.hlsl b/include/nbl/builtin/hlsl/tonemapper/operators.hlsl new file mode 100644 index 0000000000..46d241c76c --- /dev/null +++ b/include/nbl/builtin/hlsl/tonemapper/operators.hlsl @@ -0,0 +1,106 @@ +// Copyright (C) 2018-2024 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h + +#ifndef _NBL_BUILTIN_HLSL_TONE_MAPPER_OPERATORS_INCLUDED_ +#define _NBL_BUILTIN_HLSL_TONE_MAPPER_OPERATORS_INCLUDED_ + +#include "nbl/builtin/hlsl/cpp_compat.hlsl" +#include "nbl/builtin/hlsl/type_traits.hlsl" + +namespace nbl +{ +namespace hlsl +{ +namespace tonemapper +{ + +template +struct Reinhard +{ + using float_t = enable_if_t::value, T>; + using float_t3 = vector; + using this_t = Reinhard; + + static this_t create(float_t EV, float_t key = 0.18f, float_t WhitePointRelToEV = 16.f) + { + this_t retval; + + const float_t unit = 1.0; + retval.keyAndManualLinearExposure = key * exp2(EV); + retval.rcpWhite2 = unit / (WhitePointRelToEV * WhitePointRelToEV); + + return retval; + } + + float_t3 operator()(float_t3 rawCIEXYZcolor) { + const float_t unit = 1.0; + float_t exposureFactors = keyAndManualLinearExposure; + float_t exposedLuma = rawCIEXYZcolor.y * exposureFactors; + float_t colorMultiplier = (exposureFactors * (unit + exposedLuma * rcpWhite2) / (unit + exposedLuma)); + return rawCIEXYZcolor * colorMultiplier; + } + + float_t keyAndManualLinearExposure; + float_t rcpWhite2; +}; + +template +struct ACES +{ + using float_t = enable_if_t::value, T>; + using float_t3 = vector; + using float_t3x3 = matrix; + + using this_t = ACES; + static this_t create(float_t EV, float_t key = 0.18f, float_t Contrast = 1.f) { + this_t retval; + retval.gamma = Contrast; + const float_t reinhardMatchCorrection = 0.77321666f; // middle grays get exposed to different values between tonemappers given the same key + retval.exposure = EV + log2(key * reinhardMatchCorrection); + return retval; + } + + float_t3 operator()(float_t3 rawCIEXYZcolor) { + const float_t unit = 1.0; + float_t3 tonemapped = rawCIEXYZcolor; + if (tonemapped.y > bit_cast(numeric_limits::min)) + tonemapped *= exp2(log2(tonemapped.y) * (gamma - unit) + (exposure) * gamma); + + // XYZ => RRT_SAT + // this seems to be a matrix for some hybrid colorspace, coefficients are somewhere inbetween BT2020 and ACEScc(t) + const float_t3x3 XYZ_RRT_Input = float_t3x3( + float_t3(1.594168310, -0.262608051, -0.231993079), + float_t3(-0.6332771780, 1.5840380200, 0.0164147373), + float_t3(0.00892840419, 0.03648501260, 0.87711471300) + ); + + // this is obviously fitted to some particular simulated sensor/film and display + float_t3 v = mul(XYZ_RRT_Input, tonemapped); + float_t3 a = v * (v + promote(0.0245786)) - promote(0.000090537); + float_t3 b = v * (v * promote(0.983729) + promote(0.4329510)) + promote(0.238081); + v = a / b; + + // ODT_SAT => XYZ + // this seems to be a matrix for some hybrid colorspace, coefficients are similar to AdobeRGB,BT2020 and ACEScc(t) + const float_t3x3 ODT_XYZ_Output = float_t3x3( + float_t3(0.624798000, 0.164064825, 0.161605373), + float_t3(0.268048108, 0.674283803, 0.057667464), + float_t3(0.0157514643, 0.0526682511, 1.0204007600) + ); + return mul(ODT_XYZ_Output, v); + } + + float_t gamma; // 1.0 + float_t exposure; // actualExposure+midGrayLog2 +}; + +// ideas for more operators https://web.archive.org/web/20191226154550/http://cs.columbia.edu/CAVE/software/softlib/dorf.php +// or get proper ACES RRT and ODTs +// https://partnerhelp.netflixstudios.com/hc/en-us/articles/360000622487-I-m-using-ACES-Which-Output-Transform-should-I-use- + +} +} +} + +#endif \ No newline at end of file diff --git a/src/nbl/builtin/CMakeLists.txt b/src/nbl/builtin/CMakeLists.txt index 227f9780ff..fe51f17fbb 100644 --- a/src/nbl/builtin/CMakeLists.txt +++ b/src/nbl/builtin/CMakeLists.txt @@ -34,6 +34,11 @@ LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "glsl/barycentric/utils.glsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/bda/__ref.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/bda/__ptr.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/bda/bda_accessor.hlsl") +# luma metering +LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/luma_meter/common.hlsl") +LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/luma_meter/luma_meter.hlsl") +# tonemapper +LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/tonemapper/operators.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/bda/legacy_bda_accessor.hlsl") # bump mapping LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "glsl/bump_mapping/fragment.glsl") # TODO: rename to `frag.glsl` @@ -292,6 +297,8 @@ LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/math/equations/quartic.hlsl") #extra math LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/math/quadrature/gauss_legendre/gauss_legendre.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/math/quadrature/gauss_legendre/impl.hlsl") +#morton +LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/math/morton.hlsl") #acceleration structures LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/acceleration_structures.hlsl") #colorspace