Skip to content

Commit 15dc4d0

Browse files
denoiser with FFT bloom works
1 parent 0e9cea6 commit 15dc4d0

File tree

1 file changed

+49
-11
lines changed
  • examples_tests/39.DenoiserTonemapper

1 file changed

+49
-11
lines changed

examples_tests/39.DenoiserTonemapper/main.cpp

Lines changed: 49 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -181,6 +181,7 @@ int main(int argc, char* argv[])
181181
// clear the histogram to 0s
182182
driver->fillBuffer(histogramBuffer.get(),0u,HistogramBufferSize,0u);
183183

184+
constexpr uint32_t kernelSetDescCount = 4u;
184185
constexpr auto SharedDescriptorSetDescCount = 5u;
185186
core::smart_refctd_ptr<IGPUDescriptorSetLayout> kernelDescriptorSetLayout,sharedDescriptorSetLayout;
186187
core::smart_refctd_ptr<IGPUPipelineLayout> kernelPipelineLayout,sharedPipelineLayout;
@@ -195,6 +196,7 @@ int main(int argc, char* argv[])
195196
};
196197
{
197198
auto firstKernelFFTShader = driver->createGPUShader(core::make_smart_refctd_ptr<ICPUShader>(R"===(
199+
#version 450 core
198200
#define _NBL_GLSL_WORKGROUP_SIZE_ 256
199201
layout(local_size_x=_NBL_GLSL_WORKGROUP_SIZE_, local_size_y=1, local_size_z=1) in;
200202
@@ -205,19 +207,33 @@ layout(local_size_x=_NBL_GLSL_WORKGROUP_SIZE_, local_size_y=1, local_size_z=1) i
205207
layout(set=0, binding=0) uniform sampler2D inputImage;
206208
#define _NBL_GLSL_EXT_FFT_INPUT_DESCRIPTOR_DEFINED_
207209
210+
#include "nbl/builtin/glsl/ext/FFT/parameters_struct.glsl"
211+
layout(push_constant) uniform PushConstants
212+
{
213+
nbl_glsl_ext_FFT_Parameters_t params;
214+
float kernelScale;
215+
} pc;
216+
#define _NBL_GLSL_EXT_FFT_PUSH_CONSTANTS_DEFINED_
217+
nbl_glsl_ext_FFT_Parameters_t nbl_glsl_ext_FFT_getParameters()
218+
{
219+
return pc.params;
220+
}
221+
#define _NBL_GLSL_EXT_FFT_GET_PARAMETERS_DEFINED_
222+
208223
#include <nbl/builtin/glsl/math/complex.glsl>
209224
nbl_glsl_complex nbl_glsl_ext_FFT_getPaddedData(in ivec3 coordinate, in uint channel)
210225
{
211226
ivec2 inputImageSize = textureSize(inputImage,0);
212-
vec2 normalizedCoords = (vec2(coordinate.xy)+vec2(0.5f))/(vec2(inputImageSize)*KERNEL_SCALE);
213-
vec4 texelValue = textureLod(inputImage, normalizedCoords+vec2(0.5-0.5/KERNEL_SCALE), -log2(KERNEL_SCALE));
227+
vec2 normalizedCoords = (vec2(coordinate.xy)+vec2(0.5f))/(vec2(inputImageSize)*pc.kernelScale);
228+
vec4 texelValue = textureLod(inputImage, normalizedCoords+vec2(0.5-0.5/pc.kernelScale), -log2(pc.kernelScale));
214229
return nbl_glsl_complex(texelValue[channel], 0.0f);
215230
}
216231
#define _NBL_GLSL_EXT_FFT_GET_PADDED_DATA_DEFINED_
217232
218233
#include "nbl/builtin/glsl/ext/FFT/default_compute_fft.comp"
219234
)==="));
220235
auto lastKernelFFTShader = driver->createGPUShader(core::make_smart_refctd_ptr<ICPUShader>(R"===(
236+
#version 450 core
221237
#define _NBL_GLSL_WORKGROUP_SIZE_ 256
222238
layout(local_size_x=_NBL_GLSL_WORKGROUP_SIZE_, local_size_y=1, local_size_z=1) in;
223239
@@ -240,6 +256,7 @@ layout(set=0, binding=2) writeonly restrict buffer OutputBuffer
240256
#include "nbl/builtin/glsl/ext/FFT/default_compute_fft.comp"
241257
)==="));
242258
auto kernelNormalizationShader = driver->createGPUShader(core::make_smart_refctd_ptr<ICPUShader>(R"===(
259+
#version 450 core
243260
layout(local_size_x=16, local_size_y=16, local_size_z=1) in;
244261
245262
#include <nbl/builtin/glsl/ext/FFT/types.glsl>
@@ -260,12 +277,12 @@ layout(push_constant) uniform PushConstants
260277
261278
void main()
262279
{
263-
nbl_glsl_complex value = in_data[nbl_glsl_dot(gl_GlobalInvocationID,pc.strides.xyz)];
280+
nbl_glsl_complex value = inData[nbl_glsl_dot(gl_GlobalInvocationID,pc.strides.xyz)];
264281
265282
// imaginary component will be 0, image shall be positive
266283
vec3 avg;
267284
for (uint i=0u; i<3u; i++)
268-
avg[i] = in_data[pc.strides.z*i].x;
285+
avg[i] = inData[pc.strides.z*i].x;
269286
const float power = (nbl_glsl_scRGBtoXYZ*avg).y;
270287
271288
const uvec2 coord = bitfieldReverse(gl_GlobalInvocationID.xy)>>pc.bitreverse_shift.xy;
@@ -295,7 +312,6 @@ void main()
295312
}
296313
};
297314
auto sampler = driver->createGPUSampler(std::move(params));
298-
constexpr uint32_t kernelSetDescCount = 4u;
299315
IGPUDescriptorSetLayout::SBinding binding[kernelSetDescCount] = {
300316
{0u,EDT_COMBINED_IMAGE_SAMPLER,1u,IGPUSpecializedShader::ESS_COMPUTE,&sampler},
301317
{1u,EDT_STORAGE_BUFFER,1u,IGPUSpecializedShader::ESS_COMPUTE,nullptr},
@@ -306,13 +322,13 @@ void main()
306322
}
307323

308324
{
309-
SPushConstantRange pcRange[1] = {IGPUSpecializedShader::ESS_COMPUTE,0u,core::max(sizeof(FFTClass::Parameters_t),sizeof(NormalizationPushConstants))};
325+
SPushConstantRange pcRange[1] = {IGPUSpecializedShader::ESS_COMPUTE,0u,core::max(sizeof(FFTClass::Parameters_t)+sizeof(float),sizeof(NormalizationPushConstants))};
310326
kernelPipelineLayout = driver->createGPUPipelineLayout(pcRange,pcRange+1u,core::smart_refctd_ptr(kernelDescriptorSetLayout));
311327
}
312328

313-
firstKernelFFTPipeline = driver->createGPUComputePipeline(nullptr,core::smart_refctd_ptr(sharedPipelineLayout),std::move(firstKernelFFTSpecializedShader));
314-
lastKernelFFTPipeline = driver->createGPUComputePipeline(nullptr,core::smart_refctd_ptr(sharedPipelineLayout),std::move(lastKernelFFTSpecializedShader));
315-
kernelNormalizationPipeline = driver->createGPUComputePipeline(nullptr,core::smart_refctd_ptr(sharedPipelineLayout),std::move(kernelNormalizationSpecializedShader));
329+
firstKernelFFTPipeline = driver->createGPUComputePipeline(nullptr,core::smart_refctd_ptr(kernelPipelineLayout),std::move(firstKernelFFTSpecializedShader));
330+
lastKernelFFTPipeline = driver->createGPUComputePipeline(nullptr,core::smart_refctd_ptr(kernelPipelineLayout),std::move(lastKernelFFTSpecializedShader));
331+
kernelNormalizationPipeline = driver->createGPUComputePipeline(nullptr,core::smart_refctd_ptr(kernelPipelineLayout),std::move(kernelNormalizationSpecializedShader));
316332

317333

318334
auto deinterleaveShader = driver->createGPUShader(core::make_smart_refctd_ptr<ICPUShader>(R"===(
@@ -1016,6 +1032,8 @@ nbl_glsl_complex nbl_glsl_ext_FFT_getPaddedData(ivec3 coordinate, in uint channe
10161032
}
10171033
return tmp;
10181034
}();
1035+
outParam.bloomScale = bloomScale;
1036+
fftScratchSize = core::max(FFTClass::getOutputBufferSize(usingHalfFloatFFTStorage,kerDim,colorChannelsFFT)*2u,fftScratchSize);
10191037
fftScratchSize = core::max(FFTClass::getOutputBufferSize(usingHalfFloatFFTStorage,marginSrcDim,colorChannelsFFT),fftScratchSize);
10201038
{
10211039
auto* fftPushConstants = outParam.fftPushConstants;
@@ -1319,12 +1337,32 @@ nbl_glsl_complex nbl_glsl_ext_FFT_getPaddedData(ivec3 coordinate, in uint channe
13191337
{
13201338
auto kernelDescriptorSet = driver->createGPUDescriptorSet(core::smart_refctd_ptr(kernelDescriptorSetLayout));
13211339
{
1340+
IGPUDescriptorSet::SDescriptorInfo infos[kernelSetDescCount+colorChannelsFFT-1u];
1341+
infos[0].desc = kerImageView;
1342+
infos[0].image.sampler = nullptr; // immutable
1343+
infos[1].desc = core::smart_refctd_ptr<IGPUBuffer>(temporaryPixelBuffer.getObject());
1344+
infos[1].buffer = {0u,fftScratchSize>>1u};
1345+
infos[2].desc = core::smart_refctd_ptr<IGPUBuffer>(temporaryPixelBuffer.getObject());
1346+
infos[2].buffer = {fftScratchSize>>1u,fftScratchSize};
1347+
for (uint32_t i=0u; i<colorChannelsFFT; i++)
1348+
{
1349+
infos[3+i].desc = kernelNormalizedSpectrums[i];
1350+
infos[3+i].image.sampler = nullptr; // storage
1351+
}
1352+
IGPUDescriptorSet::SWriteDescriptorSet writes[kernelSetDescCount] =
1353+
{
1354+
{kernelDescriptorSet.get(),0u,0u,1u,EDT_COMBINED_IMAGE_SAMPLER,infos+0u},
1355+
{kernelDescriptorSet.get(),1u,0u,1u,EDT_STORAGE_BUFFER,infos+1u},
1356+
{kernelDescriptorSet.get(),2u,0u,1u,EDT_STORAGE_BUFFER,infos+2u},
1357+
{kernelDescriptorSet.get(),3u,0u,colorChannelsFFT,EDT_STORAGE_IMAGE,infos+3u}
1358+
};
1359+
driver->updateDescriptorSets(kernelSetDescCount,writes,0u,nullptr);
13221360
}
13231361
driver->bindDescriptorSets(EPBP_COMPUTE,kernelPipelineLayout.get(),0u,1u,&kernelDescriptorSet.get(),nullptr);
13241362

13251363
// Ker Image First Axis FFT
13261364
driver->bindComputePipeline(firstKernelFFTPipeline.get());
1327-
driver->pushConstants(firstKernelFFTPipeline->getLayout(),ICPUSpecializedShader::ESS_COMPUTE,sizeof(FFTClass::Parameters_t),sizeof(float),&param.bloomScale);
1365+
driver->pushConstants(kernelPipelineLayout.get(),ICPUSpecializedShader::ESS_COMPUTE,sizeof(FFTClass::Parameters_t),sizeof(float),&param.bloomScale);
13281366
FFTClass::dispatchHelper(driver,kernelPipelineLayout.get(),fftPushConstants[0],fftDispatchInfo[0]);
13291367

13301368
// Ker Image Last Axis FFT
@@ -1468,7 +1506,7 @@ nbl_glsl_complex nbl_glsl_ext_FFT_getPaddedData(ivec3 coordinate, in uint channe
14681506
denoiserOutput.rowStrideInBytes = param.width * forcedOptiXFormatPixelStride;
14691507
denoiserOutput.format = forcedOptiXFormat;
14701508
denoiserOutput.pixelStrideInBytes = forcedOptiXFormatPixelStride;
1471-
#if 0 // for easy debug with renderdoc disable optix stuff
1509+
#if 1 // for easy debug with renderdoc disable optix stuff
14721510
//invoke
14731511
if (denoiser.m_denoiser->tileAndInvoke(
14741512
m_cudaStream,

0 commit comments

Comments
 (0)