Skip to content

Commit 59c5565

Browse files
setup the convolve pipeline and shader, but something is wrong. TODOs:
- Debug why y-axis FFT followed by iFFt doesnt roundtrip properly - Create a spectrum of the blur kernel and add it to the shared descriptor set - fire off the convolution - test with 8k/16k denoise and test the scaling
1 parent ae270c9 commit 59c5565

File tree

2 files changed

+140
-26
lines changed

2 files changed

+140
-26
lines changed

examples_tests/39.DenoiserTonemapper/ShaderCommon.glsl

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
// For conditions of distribution and use, see copyright notice in nabla.h
44

55
#define COMPUTE_WG_SIZE 256
6+
#define _NBL_GLSL_WORKGROUP_SIZE_LOG2_ 8
67
layout(local_size_x=COMPUTE_WG_SIZE) in;
78

89
layout(constant_id = 1) const uint EII_COLOR = 0u;
@@ -17,6 +18,20 @@ layout(push_constant, row_major) uniform PushConstants{
1718
} pc;
1819
#define _NBL_GLSL_EXT_LUMA_METER_PUSH_CONSTANTS_DEFINED_
1920
#define _NBL_GLSL_EXT_FFT_PUSH_CONSTANTS_DEFINED_
21+
22+
23+
uint nbl_glsl_ext_FFT_Parameters_t_getLog2FFTSize()
24+
{
25+
return max(findMSB(pc.data.imageWidth-1u),_NBL_GLSL_WORKGROUP_SIZE_LOG2_)+1u;
26+
}
27+
uint nbl_glsl_ext_FFT_Parameters_t_getMaxChannel()
28+
{
29+
return 2u;
30+
}
31+
uint nbl_glsl_ext_FFT_Parameters_t_getPaddingType()
32+
{
33+
return 3u; // _NBL_GLSL_EXT_FFT_PAD_MIRROR_;
34+
}
2035
#define _NBL_GLSL_EXT_FFT_GET_PARAMETERS_DEFINED_
2136

2237
// kinda bad overdeclaration but oh well
@@ -62,7 +77,6 @@ struct f16vec3_packed
6277
#define _NBL_GLSL_EXT_LUMA_METER_INVOCATION_COUNT (_NBL_GLSL_EXT_LUMA_METER_DISPATCH_SIZE_X_DEFINED_*_NBL_GLSL_EXT_LUMA_METER_DISPATCH_SIZE_Y_DEFINED_)
6378
#define _NBL_GLSL_EXT_LUMA_METER_BIN_COUNT _NBL_GLSL_EXT_LUMA_METER_INVOCATION_COUNT
6479
#define _NBL_GLSL_WORKGROUP_SIZE_ _NBL_GLSL_EXT_LUMA_METER_BIN_COUNT
65-
#define _NBL_GLSL_WORKGROUP_SIZE_LOG2_ 8
6680
#define _NBL_GLSL_EXT_LUMA_METER_BIN_GLOBAL_REPLICATION 4
6781
#ifdef _NBL_GLSL_EXT_LUMA_METER_FIRST_PASS_DEFINED_
6882
#include "nbl/builtin/glsl/ext/LumaMeter/impl.glsl"

examples_tests/39.DenoiserTonemapper/main.cpp

Lines changed: 125 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -184,7 +184,7 @@ int main(int argc, char* argv[])
184184
constexpr auto SharedDescriptorSetDescCount = 4u;
185185
core::smart_refctd_ptr<IGPUDescriptorSetLayout> sharedDescriptorSetLayout;
186186
core::smart_refctd_ptr<IGPUPipelineLayout> sharedPipelineLayout;
187-
core::smart_refctd_ptr<IGPUComputePipeline> deinterleavePipeline,intensityPipeline,secondLumaMeterAndFirstFFTPipeline,interleaveAndLastFFTPipeline;
187+
core::smart_refctd_ptr<IGPUComputePipeline> deinterleavePipeline,intensityPipeline,secondLumaMeterAndFirstFFTPipeline,convolvePipeline,interleaveAndLastFFTPipeline;
188188
{
189189
auto deinterleaveShader = driver->createGPUShader(core::make_smart_refctd_ptr<ICPUShader>(R"===(
190190
#version 450 core
@@ -309,18 +309,6 @@ uint nbl_glsl_ext_FFT_Parameters_t_getDirection()
309309
{
310310
return 0u;
311311
}
312-
uint nbl_glsl_ext_FFT_Parameters_t_getMaxChannel()
313-
{
314-
return 2u;
315-
}
316-
uint nbl_glsl_ext_FFT_Parameters_t_getLog2FFTSize()
317-
{
318-
return max(findMSB(pc.data.imageWidth-1u),_NBL_GLSL_WORKGROUP_SIZE_LOG2_)+1u;
319-
}
320-
uint nbl_glsl_ext_FFT_Parameters_t_getPaddingType()
321-
{
322-
return 3u; // _NBL_GLSL_EXT_FFT_PAD_MIRROR_;
323-
}
324312
#define _NBL_GLSL_EXT_FFT_PARAMETERS_METHODS_DECLARED_
325313
326314
@@ -398,6 +386,115 @@ void main()
398386
nbl_glsl_ext_FFT_setData(nbl_glsl_ext_FFT_getCoordinates(tid),channel,nbl_glsl_ext_FFT_impl_values[t]);
399387
}
400388
}
389+
}
390+
)==="));
391+
auto convolveShader = driver->createGPUShader(core::make_smart_refctd_ptr<ICPUShader>(R"===(
392+
#version 450 core
393+
#extension GL_EXT_shader_16bit_storage : require
394+
395+
// nasty and ugly but oh well
396+
#define _NBL_GLSL_SCRATCH_SHARED_DEFINED_ sharedScratch
397+
#define _NBL_GLSL_SCRATCH_SHARED_SIZE_DEFINED_ 1024
398+
shared uint _NBL_GLSL_SCRATCH_SHARED_DEFINED_[_NBL_GLSL_SCRATCH_SHARED_SIZE_DEFINED_];
399+
400+
#include "../ShaderCommon.glsl"
401+
layout(binding = 1, std430) restrict buffer SpectrumOutputBuffer
402+
{
403+
vec2 spectrum[];
404+
};
405+
#define _NBL_GLSL_EXT_FFT_INPUT_DESCRIPTOR_DEFINED_
406+
#define _NBL_GLSL_EXT_FFT_OUTPUT_DESCRIPTOR_DEFINED_
407+
408+
409+
410+
#include <nbl/builtin/glsl/math/complex.glsl>
411+
412+
413+
uvec3 nbl_glsl_ext_FFT_Parameters_t_getDimensions()
414+
{
415+
return uvec3(0x1u<<nbl_glsl_ext_FFT_Parameters_t_getLog2FFTSize(),pc.data.imageHeight,1u);
416+
}
417+
bool nbl_glsl_ext_FFT_Parameters_t_getIsInverse()
418+
{
419+
return bool(0xdeadbeefu);
420+
}
421+
uint nbl_glsl_ext_FFT_Parameters_t_getDirection()
422+
{
423+
return 1u;
424+
}
425+
#define _NBL_GLSL_EXT_FFT_PARAMETERS_METHODS_DECLARED_
426+
427+
428+
nbl_glsl_complex nbl_glsl_ext_FFT_getPaddedData(ivec3 coordinate, in uint channel);
429+
#define _NBL_GLSL_EXT_FFT_GET_PADDED_DATA_DEFINED_
430+
void nbl_glsl_ext_FFT_setData(in uvec3 coordinate, in uint channel, in nbl_glsl_complex complex_value)
431+
{
432+
const uint index = ((channel<<nbl_glsl_ext_FFT_Parameters_t_getLog2FFTSize())+coordinate.x)*pc.data.imageHeight+coordinate.y;
433+
spectrum[index] = complex_value;
434+
}
435+
#define _NBL_GLSL_EXT_FFT_SET_DATA_DEFINED_
436+
437+
#define _NBL_GLSL_EXT_FFT_MAIN_DEFINED_
438+
#include "nbl/builtin/glsl/ext/FFT/default_compute_fft.comp"
439+
440+
void convolve(in uint item_per_thread_count, in uint ch)
441+
{
442+
for(uint t=0u; t<item_per_thread_count; t++)
443+
{
444+
const uint tid = _NBL_GLSL_WORKGROUP_SIZE_*t+gl_LocalInvocationIndex;
445+
446+
nbl_glsl_complex sourceSpectrum = nbl_glsl_ext_FFT_impl_values[t];
447+
448+
//
449+
const uvec3 coords = nbl_glsl_ext_FFT_getCoordinates(tid);
450+
vec2 uv = vec2(bitfieldReverse(coords.xy))/vec2(4294967296.f);
451+
#ifdef CONVOLVE
452+
uv += pc.params.kernel_half_pixel_size;
453+
//
454+
nbl_glsl_complex convSpectrum = textureLod(NormalizedKernel[ch],uv,0).xy;
455+
#else
456+
nbl_glsl_complex convSpectrum = nbl_glsl_complex(1.f,0.f);
457+
#endif
458+
nbl_glsl_ext_FFT_impl_values[t] = nbl_glsl_complex_mul(sourceSpectrum,convSpectrum);
459+
}
460+
}
461+
462+
void main()
463+
{
464+
// Virtual Threads Calculation
465+
const uint log2FFTSize = nbl_glsl_ext_FFT_Parameters_t_getLog2FFTSize();
466+
const uint item_per_thread_count = 0x1u<<(log2FFTSize-_NBL_GLSL_WORKGROUP_SIZE_LOG2_);
467+
for(uint channel=0u; channel<3u; channel++)
468+
{
469+
// Load Values into local memory
470+
for(uint t=0u; t<item_per_thread_count; t++)
471+
{
472+
const uint tid = (t<<_NBL_GLSL_WORKGROUP_SIZE_LOG2_)|gl_LocalInvocationIndex;
473+
const uint trueDim = nbl_glsl_ext_FFT_Parameters_t_getDimensions()[nbl_glsl_ext_FFT_Parameters_t_getDirection()];
474+
nbl_glsl_ext_FFT_impl_values[t] = nbl_glsl_ext_FFT_getPaddedData(nbl_glsl_ext_FFT_getPaddedCoordinates(tid,log2FFTSize,trueDim),channel);
475+
}
476+
nbl_glsl_ext_FFT_preloaded(false,log2FFTSize);
477+
barrier();
478+
479+
convolve(item_per_thread_count,channel);
480+
481+
barrier();
482+
nbl_glsl_ext_FFT_preloaded(true,log2FFTSize);
483+
// write out to main memory
484+
for(uint t=0u; t<item_per_thread_count; t++)
485+
{
486+
const uint tid = (t<<_NBL_GLSL_WORKGROUP_SIZE_LOG2_)|gl_LocalInvocationIndex;
487+
nbl_glsl_ext_FFT_setData(nbl_glsl_ext_FFT_getCoordinates(tid),channel,nbl_glsl_ext_FFT_impl_values[t]);
488+
}
489+
}
490+
}
491+
492+
nbl_glsl_complex nbl_glsl_ext_FFT_getPaddedData(ivec3 coordinate, in uint channel)
493+
{
494+
if (!nbl_glsl_ext_FFT_wrap_coord(coordinate))
495+
return nbl_glsl_complex(0.f,0.f);
496+
const uint index = ((channel<<nbl_glsl_ext_FFT_Parameters_t_getLog2FFTSize())+coordinate.x)*pc.data.imageHeight+coordinate.y;
497+
return spectrum[index];
401498
}
402499
)==="));
403500
auto interleaveAndLastFFTShader = driver->createGPUShader(core::make_smart_refctd_ptr<ICPUShader>(R"===(
@@ -432,10 +529,6 @@ layout(binding = 3, std430) restrict readonly buffer IntensityBuffer
432529
nbl_glsl_complex nbl_glsl_ext_FFT_getPaddedData(ivec3 coordinate, in uint channel);
433530
#define _NBL_GLSL_EXT_FFT_GET_PADDED_DATA_DEFINED_
434531
435-
uint nbl_glsl_ext_FFT_Parameters_t_getLog2FFTSize()
436-
{
437-
return max(findMSB(pc.data.imageWidth-1u),_NBL_GLSL_WORKGROUP_SIZE_LOG2_)+1u;
438-
}
439532
uvec3 nbl_glsl_ext_FFT_Parameters_t_getDimensions()
440533
{
441534
return uvec3(0x1u<<nbl_glsl_ext_FFT_Parameters_t_getLog2FFTSize(),pc.data.imageHeight,1u);
@@ -448,14 +541,6 @@ uint nbl_glsl_ext_FFT_Parameters_t_getDirection()
448541
{
449542
return 0u;
450543
}
451-
uint nbl_glsl_ext_FFT_Parameters_t_getMaxChannel()
452-
{
453-
return 2u;
454-
}
455-
uint nbl_glsl_ext_FFT_Parameters_t_getPaddingType()
456-
{
457-
return 3u; // _NBL_GLSL_EXT_FFT_PAD_MIRROR_;
458-
}
459544
#define _NBL_GLSL_EXT_FFT_PARAMETERS_METHODS_DECLARED_
460545
461546
@@ -567,6 +652,7 @@ nbl_glsl_complex nbl_glsl_ext_FFT_getPaddedData(ivec3 coordinate, in uint channe
567652
auto deinterleaveSpecializedShader = driver->createGPUSpecializedShader(deinterleaveShader.get(),specInfo);
568653
auto intensitySpecializedShader = driver->createGPUSpecializedShader(intensityShader.get(),specInfo);
569654
auto secondLumaMeterAndFirstFFTSpecializedShader = driver->createGPUSpecializedShader(secondLumaMeterAndFirstFFTShader.get(),specInfo);
655+
auto convolveSpecializedShader = driver->createGPUSpecializedShader(convolveShader.get(),specInfo);
570656
auto interleaveAndLastFFTSpecializedShader = driver->createGPUSpecializedShader(interleaveAndLastFFTShader.get(),specInfo);
571657

572658
IGPUDescriptorSetLayout::SBinding binding[SharedDescriptorSetDescCount] = {
@@ -582,6 +668,7 @@ nbl_glsl_complex nbl_glsl_ext_FFT_getPaddedData(ivec3 coordinate, in uint channe
582668
deinterleavePipeline = driver->createGPUComputePipeline(nullptr,core::smart_refctd_ptr(sharedPipelineLayout),std::move(deinterleaveSpecializedShader));
583669
intensityPipeline = driver->createGPUComputePipeline(nullptr,core::smart_refctd_ptr(sharedPipelineLayout),std::move(intensitySpecializedShader));
584670
secondLumaMeterAndFirstFFTPipeline = driver->createGPUComputePipeline(nullptr,core::smart_refctd_ptr(sharedPipelineLayout),std::move(secondLumaMeterAndFirstFFTSpecializedShader));
671+
convolvePipeline = driver->createGPUComputePipeline(nullptr,core::smart_refctd_ptr(sharedPipelineLayout),std::move(convolveSpecializedShader));
585672
interleaveAndLastFFTPipeline = driver->createGPUComputePipeline(nullptr,core::smart_refctd_ptr(sharedPipelineLayout),std::move(interleaveAndLastFFTSpecializedShader));
586673
}
587674

@@ -1183,6 +1270,19 @@ nbl_glsl_complex nbl_glsl_ext_FFT_getPaddedData(ivec3 coordinate, in uint channe
11831270
COpenGLExtensionHandler::extGlMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT);
11841271

11851272
// TODO: Y-axis FFT, multiply the spectra together, y-axis iFFT
1273+
driver->bindComputePipeline(convolvePipeline.get());
1274+
#if 0
1275+
{
1276+
const auto& kernelImgExtent = kernelNormalizedSpectrums[0]->getCreationParameters().image->getCreationParameters().extent;
1277+
vec2 kernel_half_pixel_size{0.5f,0.5f};
1278+
kernel_half_pixel_size.x /= kernelImgExtent.width;
1279+
kernel_half_pixel_size.y /= kernelImgExtent.height;
1280+
driver->pushConstants(convolvePipeline->getLayout(),ISpecializedShader::ESS_COMPUTE,offsetof(convolve_parameters_t,kernel_half_pixel_size),sizeof(convolve_parameters_t::kernel_half_pixel_size),&kernel_half_pixel_size);
1281+
}
1282+
#endif
1283+
// dispatch
1284+
//!driver->dispatch(param.fftDispatchInfo[1].workGroupCount[0],param.fftDispatchInfo[1].workGroupCount[1],1u);
1285+
COpenGLExtensionHandler::extGlMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT);
11861286

11871287
// bind intensity pipeline
11881288
driver->bindComputePipeline(intensityPipeline.get());

0 commit comments

Comments
 (0)