Skip to content

Commit 0e9cea6

Browse files
now just need to handle the pushconstants for the first FFT pass and the descruptor set creation
1 parent 113b141 commit 0e9cea6

File tree

3 files changed

+195
-33
lines changed

3 files changed

+195
-33
lines changed

examples_tests/39.DenoiserTonemapper/main.cpp

Lines changed: 194 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -182,10 +182,139 @@ int main(int argc, char* argv[])
182182
driver->fillBuffer(histogramBuffer.get(),0u,HistogramBufferSize,0u);
183183

184184
constexpr auto SharedDescriptorSetDescCount = 5u;
185-
core::smart_refctd_ptr<IGPUDescriptorSetLayout> sharedDescriptorSetLayout;
186-
core::smart_refctd_ptr<IGPUPipelineLayout> sharedPipelineLayout;
187-
core::smart_refctd_ptr<IGPUComputePipeline> deinterleavePipeline,intensityPipeline,secondLumaMeterAndFirstFFTPipeline,convolvePipeline,interleaveAndLastFFTPipeline;
185+
core::smart_refctd_ptr<IGPUDescriptorSetLayout> kernelDescriptorSetLayout,sharedDescriptorSetLayout;
186+
core::smart_refctd_ptr<IGPUPipelineLayout> kernelPipelineLayout,sharedPipelineLayout;
187+
core::smart_refctd_ptr<IGPUComputePipeline> firstKernelFFTPipeline,lastKernelFFTPipeline,kernelNormalizationPipeline,
188+
deinterleavePipeline,intensityPipeline,
189+
secondLumaMeterAndFirstFFTPipeline,convolvePipeline,interleaveAndLastFFTPipeline;
190+
// Normalization of FFT spectrum
191+
struct NormalizationPushConstants
188192
{
193+
ext::FFT::uvec4 stride;
194+
ext::FFT::uvec4 bitreverse_shift;
195+
};
196+
{
197+
auto firstKernelFFTShader = driver->createGPUShader(core::make_smart_refctd_ptr<ICPUShader>(R"===(
198+
#define _NBL_GLSL_WORKGROUP_SIZE_ 256
199+
layout(local_size_x=_NBL_GLSL_WORKGROUP_SIZE_, local_size_y=1, local_size_z=1) in;
200+
201+
// kinda bad overdeclaration but oh well
202+
#define _NBL_GLSL_EXT_FFT_MAX_DIM_SIZE_ 16384
203+
204+
// Input Descriptor
205+
layout(set=0, binding=0) uniform sampler2D inputImage;
206+
#define _NBL_GLSL_EXT_FFT_INPUT_DESCRIPTOR_DEFINED_
207+
208+
#include <nbl/builtin/glsl/math/complex.glsl>
209+
nbl_glsl_complex nbl_glsl_ext_FFT_getPaddedData(in ivec3 coordinate, in uint channel)
210+
{
211+
ivec2 inputImageSize = textureSize(inputImage,0);
212+
vec2 normalizedCoords = (vec2(coordinate.xy)+vec2(0.5f))/(vec2(inputImageSize)*KERNEL_SCALE);
213+
vec4 texelValue = textureLod(inputImage, normalizedCoords+vec2(0.5-0.5/KERNEL_SCALE), -log2(KERNEL_SCALE));
214+
return nbl_glsl_complex(texelValue[channel], 0.0f);
215+
}
216+
#define _NBL_GLSL_EXT_FFT_GET_PADDED_DATA_DEFINED_
217+
218+
#include "nbl/builtin/glsl/ext/FFT/default_compute_fft.comp"
219+
)==="));
220+
auto lastKernelFFTShader = driver->createGPUShader(core::make_smart_refctd_ptr<ICPUShader>(R"===(
221+
#define _NBL_GLSL_WORKGROUP_SIZE_ 256
222+
layout(local_size_x=_NBL_GLSL_WORKGROUP_SIZE_, local_size_y=1, local_size_z=1) in;
223+
224+
// kinda bad overdeclaration but oh well
225+
#define _NBL_GLSL_EXT_FFT_MAX_DIM_SIZE_ 16384
226+
#include <nbl/builtin/glsl/ext/FFT/types.glsl>
227+
228+
layout(set=0, binding=1) readonly restrict buffer InputBuffer
229+
{
230+
nbl_glsl_ext_FFT_storage_t inData[];
231+
};
232+
#define _NBL_GLSL_EXT_FFT_INPUT_DESCRIPTOR_DEFINED_
233+
234+
layout(set=0, binding=2) writeonly restrict buffer OutputBuffer
235+
{
236+
nbl_glsl_ext_FFT_storage_t outData[];
237+
};
238+
#define _NBL_GLSL_EXT_FFT_OUTPUT_DESCRIPTOR_DEFINED_
239+
240+
#include "nbl/builtin/glsl/ext/FFT/default_compute_fft.comp"
241+
)==="));
242+
auto kernelNormalizationShader = driver->createGPUShader(core::make_smart_refctd_ptr<ICPUShader>(R"===(
243+
layout(local_size_x=16, local_size_y=16, local_size_z=1) in;
244+
245+
#include <nbl/builtin/glsl/ext/FFT/types.glsl>
246+
247+
layout(set=0, binding=2) readonly restrict buffer InputBuffer
248+
{
249+
nbl_glsl_ext_FFT_storage_t inData[];
250+
};
251+
layout(set=0, binding=3, rg32f) uniform image2D NormalizedKernel[3];
252+
253+
layout(push_constant) uniform PushConstants
254+
{
255+
uvec4 strides;
256+
uvec4 bitreverse_shift;
257+
} pc;
258+
259+
#include <nbl/builtin/glsl/colorspace/encodeCIEXYZ.glsl>
260+
261+
void main()
262+
{
263+
nbl_glsl_complex value = in_data[nbl_glsl_dot(gl_GlobalInvocationID,pc.strides.xyz)];
264+
265+
// imaginary component will be 0, image shall be positive
266+
vec3 avg;
267+
for (uint i=0u; i<3u; i++)
268+
avg[i] = in_data[pc.strides.z*i].x;
269+
const float power = (nbl_glsl_scRGBtoXYZ*avg).y;
270+
271+
const uvec2 coord = bitfieldReverse(gl_GlobalInvocationID.xy)>>pc.bitreverse_shift.xy;
272+
const nbl_glsl_complex shift = nbl_glsl_expImaginary(-nbl_glsl_PI*float(coord.x+coord.y));
273+
value = nbl_glsl_complex_mul(value,shift)/power;
274+
imageStore(NormalizedKernel[gl_WorkGroupID.z],ivec2(coord),vec4(value,0.0,0.0));
275+
}
276+
)==="));
277+
auto firstKernelFFTSpecializedShader = driver->createGPUSpecializedShader(firstKernelFFTShader.get(),IGPUSpecializedShader::SInfo(nullptr,nullptr,"main",ISpecializedShader::ESS_COMPUTE));
278+
auto lastKernelFFTSpecializedShader = driver->createGPUSpecializedShader(lastKernelFFTShader.get(),IGPUSpecializedShader::SInfo(nullptr,nullptr,"main",ISpecializedShader::ESS_COMPUTE));
279+
auto kernelNormalizationSpecializedShader = driver->createGPUSpecializedShader(kernelNormalizationShader.get(),IGPUSpecializedShader::SInfo(nullptr,nullptr,"main",ISpecializedShader::ESS_COMPUTE));
280+
281+
{
282+
IGPUSampler::SParams params =
283+
{
284+
{
285+
ISampler::ETC_CLAMP_TO_BORDER,
286+
ISampler::ETC_CLAMP_TO_BORDER,
287+
ISampler::ETC_CLAMP_TO_BORDER,
288+
ISampler::ETBC_FLOAT_OPAQUE_BLACK,
289+
ISampler::ETF_LINEAR,
290+
ISampler::ETF_LINEAR,
291+
ISampler::ESMM_LINEAR,
292+
0u,
293+
0u,
294+
ISampler::ECO_ALWAYS
295+
}
296+
};
297+
auto sampler = driver->createGPUSampler(std::move(params));
298+
constexpr uint32_t kernelSetDescCount = 4u;
299+
IGPUDescriptorSetLayout::SBinding binding[kernelSetDescCount] = {
300+
{0u,EDT_COMBINED_IMAGE_SAMPLER,1u,IGPUSpecializedShader::ESS_COMPUTE,&sampler},
301+
{1u,EDT_STORAGE_BUFFER,1u,IGPUSpecializedShader::ESS_COMPUTE,nullptr},
302+
{2u,EDT_STORAGE_BUFFER,1u,IGPUSpecializedShader::ESS_COMPUTE,nullptr},
303+
{3u,EDT_STORAGE_IMAGE,colorChannelsFFT,IGPUSpecializedShader::ESS_COMPUTE,nullptr},
304+
};
305+
kernelDescriptorSetLayout = driver->createGPUDescriptorSetLayout(binding,binding+kernelSetDescCount);
306+
}
307+
308+
{
309+
SPushConstantRange pcRange[1] = {IGPUSpecializedShader::ESS_COMPUTE,0u,core::max(sizeof(FFTClass::Parameters_t),sizeof(NormalizationPushConstants))};
310+
kernelPipelineLayout = driver->createGPUPipelineLayout(pcRange,pcRange+1u,core::smart_refctd_ptr(kernelDescriptorSetLayout));
311+
}
312+
313+
firstKernelFFTPipeline = driver->createGPUComputePipeline(nullptr,core::smart_refctd_ptr(sharedPipelineLayout),std::move(firstKernelFFTSpecializedShader));
314+
lastKernelFFTPipeline = driver->createGPUComputePipeline(nullptr,core::smart_refctd_ptr(sharedPipelineLayout),std::move(lastKernelFFTSpecializedShader));
315+
kernelNormalizationPipeline = driver->createGPUComputePipeline(nullptr,core::smart_refctd_ptr(sharedPipelineLayout),std::move(kernelNormalizationSpecializedShader));
316+
317+
189318
auto deinterleaveShader = driver->createGPUShader(core::make_smart_refctd_ptr<ICPUShader>(R"===(
190319
#version 450 core
191320
#extension GL_EXT_shader_16bit_storage : require
@@ -453,7 +582,7 @@ void convolve(in uint item_per_thread_count, in uint ch)
453582
454583
uv += pc.data.kernel_half_pixel_size;
455584
//
456-
nbl_glsl_complex convSpectrum = vec2(1.0,0.0);//textureLod(NormalizedKernel[ch],uv,0).xy;
585+
nbl_glsl_complex convSpectrum = textureLod(NormalizedKernel[ch],uv,0).xy;
457586
nbl_glsl_ext_FFT_impl_values[t] = nbl_glsl_complex_mul(sourceSpectrum,convSpectrum);
458587
}
459588
}
@@ -659,36 +788,41 @@ nbl_glsl_complex nbl_glsl_ext_FFT_getPaddedData(ivec3 coordinate, in uint channe
659788
auto convolveSpecializedShader = driver->createGPUSpecializedShader(convolveShader.get(),specInfo);
660789
auto interleaveAndLastFFTSpecializedShader = driver->createGPUSpecializedShader(interleaveAndLastFFTShader.get(),specInfo);
661790

662-
core::smart_refctd_ptr<IGPUSampler> samplers[colorChannelsFFT];
663791
{
664-
IGPUSampler::SParams params =
792+
core::smart_refctd_ptr<IGPUSampler> samplers[colorChannelsFFT];
665793
{
794+
IGPUSampler::SParams params =
666795
{
667-
ISampler::ETC_REPEAT,
668-
ISampler::ETC_REPEAT,
669-
ISampler::ETC_REPEAT,
670-
ISampler::ETBC_FLOAT_OPAQUE_BLACK,
671-
ISampler::ETF_LINEAR, // is it needed?
672-
ISampler::ETF_LINEAR,
673-
ISampler::ESMM_NEAREST,
674-
0u,
675-
0u,
676-
ISampler::ECO_ALWAYS
677-
}
796+
{
797+
ISampler::ETC_REPEAT,
798+
ISampler::ETC_REPEAT,
799+
ISampler::ETC_REPEAT,
800+
ISampler::ETBC_FLOAT_OPAQUE_BLACK,
801+
ISampler::ETF_LINEAR, // is it needed?
802+
ISampler::ETF_LINEAR,
803+
ISampler::ESMM_NEAREST,
804+
0u,
805+
0u,
806+
ISampler::ECO_ALWAYS
807+
}
808+
};
809+
auto sampler = driver->createGPUSampler(std::move(params));
810+
std::fill_n(samplers,colorChannelsFFT,sampler);
811+
}
812+
IGPUDescriptorSetLayout::SBinding binding[SharedDescriptorSetDescCount] = {
813+
{0u,EDT_STORAGE_BUFFER,EII_COUNT,IGPUSpecializedShader::ESS_COMPUTE,nullptr},
814+
{1u,EDT_STORAGE_BUFFER,EII_COUNT,IGPUSpecializedShader::ESS_COMPUTE,nullptr},
815+
{2u,EDT_STORAGE_BUFFER,1u,IGPUSpecializedShader::ESS_COMPUTE,nullptr},
816+
{3u,EDT_STORAGE_BUFFER,1u,IGPUSpecializedShader::ESS_COMPUTE,nullptr},
817+
{4u,EDT_COMBINED_IMAGE_SAMPLER,colorChannelsFFT,IGPUSpecializedShader::ESS_COMPUTE,samplers}
678818
};
679-
auto sampler = driver->createGPUSampler(std::move(params));
680-
std::fill_n(samplers,colorChannelsFFT,sampler);
819+
sharedDescriptorSetLayout = driver->createGPUDescriptorSetLayout(binding,binding+SharedDescriptorSetDescCount);
820+
}
821+
822+
{
823+
SPushConstantRange pcRange[1] = {IGPUSpecializedShader::ESS_COMPUTE,0u,sizeof(CommonPushConstants)};
824+
sharedPipelineLayout = driver->createGPUPipelineLayout(pcRange,pcRange+sizeof(pcRange)/sizeof(SPushConstantRange),core::smart_refctd_ptr(sharedDescriptorSetLayout));
681825
}
682-
IGPUDescriptorSetLayout::SBinding binding[SharedDescriptorSetDescCount] = {
683-
{0u,EDT_STORAGE_BUFFER,EII_COUNT,IGPUSpecializedShader::ESS_COMPUTE,nullptr},
684-
{1u,EDT_STORAGE_BUFFER,EII_COUNT,IGPUSpecializedShader::ESS_COMPUTE,nullptr},
685-
{2u,EDT_STORAGE_BUFFER,1u,IGPUSpecializedShader::ESS_COMPUTE,nullptr},
686-
{3u,EDT_STORAGE_BUFFER,1u,IGPUSpecializedShader::ESS_COMPUTE,nullptr},
687-
{4u,EDT_COMBINED_IMAGE_SAMPLER,colorChannelsFFT,IGPUSpecializedShader::ESS_COMPUTE,samplers}
688-
};
689-
sharedDescriptorSetLayout = driver->createGPUDescriptorSetLayout(binding,binding+SharedDescriptorSetDescCount);
690-
SPushConstantRange pcRange[1] = {IGPUSpecializedShader::ESS_COMPUTE,0u,sizeof(CommonPushConstants)};
691-
sharedPipelineLayout = driver->createGPUPipelineLayout(pcRange,pcRange+sizeof(pcRange)/sizeof(SPushConstantRange),core::smart_refctd_ptr(sharedDescriptorSetLayout));
692826

693827
deinterleavePipeline = driver->createGPUComputePipeline(nullptr,core::smart_refctd_ptr(sharedPipelineLayout),std::move(deinterleaveSpecializedShader));
694828
intensityPipeline = driver->createGPUComputePipeline(nullptr,core::smart_refctd_ptr(sharedPipelineLayout),std::move(intensitySpecializedShader));
@@ -1179,9 +1313,39 @@ nbl_glsl_complex nbl_glsl_ext_FFT_getPaddedData(ivec3 coordinate, in uint channe
11791313
FFTClass::Parameters_t fftPushConstants[2];
11801314
FFTClass::DispatchInfo_t fftDispatchInfo[2];
11811315
const ISampler::E_TEXTURE_CLAMP fftPadding[2] = { ISampler::ETC_CLAMP_TO_BORDER,ISampler::ETC_CLAMP_TO_BORDER };
1182-
const auto passes = FFTClass::buildParameters(false,colorChannelsFFT, kerDim, fftPushConstants, fftDispatchInfo, fftPadding);
1316+
const auto passes = FFTClass::buildParameters(false,colorChannelsFFT,kerDim,fftPushConstants,fftDispatchInfo,fftPadding);
11831317

11841318
// the kernel's FFTs
1319+
{
1320+
auto kernelDescriptorSet = driver->createGPUDescriptorSet(core::smart_refctd_ptr(kernelDescriptorSetLayout));
1321+
{
1322+
}
1323+
driver->bindDescriptorSets(EPBP_COMPUTE,kernelPipelineLayout.get(),0u,1u,&kernelDescriptorSet.get(),nullptr);
1324+
1325+
// Ker Image First Axis FFT
1326+
driver->bindComputePipeline(firstKernelFFTPipeline.get());
1327+
driver->pushConstants(firstKernelFFTPipeline->getLayout(),ICPUSpecializedShader::ESS_COMPUTE,sizeof(FFTClass::Parameters_t),sizeof(float),&param.bloomScale);
1328+
FFTClass::dispatchHelper(driver,kernelPipelineLayout.get(),fftPushConstants[0],fftDispatchInfo[0]);
1329+
1330+
// Ker Image Last Axis FFT
1331+
driver->bindComputePipeline(lastKernelFFTPipeline.get());
1332+
FFTClass::dispatchHelper(driver,kernelPipelineLayout.get(),fftPushConstants[1],fftDispatchInfo[1]);
1333+
1334+
// normalization and shuffle
1335+
driver->bindComputePipeline(kernelNormalizationPipeline.get());
1336+
{
1337+
NormalizationPushConstants normalizationPC;
1338+
normalizationPC.stride = fftPushConstants[1].output_strides;
1339+
normalizationPC.bitreverse_shift.x = 32-core::findMSB(paddedKerDim.width);
1340+
normalizationPC.bitreverse_shift.y = 32-core::findMSB(paddedKerDim.height);
1341+
normalizationPC.bitreverse_shift.z = 0;
1342+
driver->pushConstants(kernelNormalizationPipeline->getLayout(),ICPUSpecializedShader::ESS_COMPUTE,0u,sizeof(normalizationPC),&normalizationPC);
1343+
const uint32_t dispatchSizeX = (paddedKerDim.width-1u)/16u+1u;
1344+
const uint32_t dispatchSizeY = (paddedKerDim.height-1u)/16u+1u;
1345+
driver->dispatch(dispatchSizeX,dispatchSizeY,colorChannelsFFT);
1346+
}
1347+
FFTClass::defaultBarrier();
1348+
}
11851349
}
11861350

11871351
uint32_t outImageByteOffset[EII_COUNT];

include/nbl/builtin/glsl/ext/FFT/default_compute_fft.comp

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,8 +20,6 @@ layout(local_size_x=_NBL_GLSL_WORKGROUP_SIZE_, local_size_y=1, local_size_z=1) i
2020

2121
#ifndef _NBL_GLSL_EXT_FFT_INPUT_DESCRIPTOR_DEFINED_
2222
#define _NBL_GLSL_EXT_FFT_INPUT_DESCRIPTOR_DEFINED_
23-
24-
2523
layout(set=_NBL_GLSL_EXT_FFT_INPUT_SET_DEFINED_, binding=_NBL_GLSL_EXT_FFT_INPUT_BINDING_DEFINED_) readonly restrict buffer InputBuffer
2624
{
2725
nbl_glsl_ext_FFT_storage_t inData[];

include/nbl/ext/FFT/FFT.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -202,7 +202,7 @@ class FFT final : public core::IReferenceCounted
202202
bool issueDefaultBarrier=true)
203203
{
204204
driver->pushConstants(pipelineLayout,video::IGPUSpecializedShader::ESS_COMPUTE,0u,sizeof(Parameters_t),&params);
205-
driver->dispatch(dispatchInfo.workGroupCount[0], dispatchInfo.workGroupCount[1], dispatchInfo.workGroupCount[2]);
205+
driver->dispatch(dispatchInfo.workGroupCount[0],dispatchInfo.workGroupCount[1],dispatchInfo.workGroupCount[2]);
206206

207207
if (issueDefaultBarrier)
208208
defaultBarrier();

0 commit comments

Comments
 (0)