@@ -182,10 +182,139 @@ int main(int argc, char* argv[])
182
182
driver->fillBuffer (histogramBuffer.get (),0u ,HistogramBufferSize,0u );
183
183
184
184
constexpr auto SharedDescriptorSetDescCount = 5u ;
185
- core::smart_refctd_ptr<IGPUDescriptorSetLayout> sharedDescriptorSetLayout;
186
- core::smart_refctd_ptr<IGPUPipelineLayout> sharedPipelineLayout;
187
- core::smart_refctd_ptr<IGPUComputePipeline> deinterleavePipeline,intensityPipeline,secondLumaMeterAndFirstFFTPipeline,convolvePipeline,interleaveAndLastFFTPipeline;
185
+ core::smart_refctd_ptr<IGPUDescriptorSetLayout> kernelDescriptorSetLayout,sharedDescriptorSetLayout;
186
+ core::smart_refctd_ptr<IGPUPipelineLayout> kernelPipelineLayout,sharedPipelineLayout;
187
+ core::smart_refctd_ptr<IGPUComputePipeline> firstKernelFFTPipeline,lastKernelFFTPipeline,kernelNormalizationPipeline,
188
+ deinterleavePipeline,intensityPipeline,
189
+ secondLumaMeterAndFirstFFTPipeline,convolvePipeline,interleaveAndLastFFTPipeline;
190
+ // Normalization of FFT spectrum
191
+ struct NormalizationPushConstants
188
192
{
193
+ ext::FFT::uvec4 stride;
194
+ ext::FFT::uvec4 bitreverse_shift;
195
+ };
196
+ {
197
+ auto firstKernelFFTShader = driver->createGPUShader (core::make_smart_refctd_ptr<ICPUShader>(R"===(
198
+ #define _NBL_GLSL_WORKGROUP_SIZE_ 256
199
+ layout(local_size_x=_NBL_GLSL_WORKGROUP_SIZE_, local_size_y=1, local_size_z=1) in;
200
+
201
+ // kinda bad overdeclaration but oh well
202
+ #define _NBL_GLSL_EXT_FFT_MAX_DIM_SIZE_ 16384
203
+
204
+ // Input Descriptor
205
+ layout(set=0, binding=0) uniform sampler2D inputImage;
206
+ #define _NBL_GLSL_EXT_FFT_INPUT_DESCRIPTOR_DEFINED_
207
+
208
+ #include <nbl/builtin/glsl/math/complex.glsl>
209
+ nbl_glsl_complex nbl_glsl_ext_FFT_getPaddedData(in ivec3 coordinate, in uint channel)
210
+ {
211
+ ivec2 inputImageSize = textureSize(inputImage,0);
212
+ vec2 normalizedCoords = (vec2(coordinate.xy)+vec2(0.5f))/(vec2(inputImageSize)*KERNEL_SCALE);
213
+ vec4 texelValue = textureLod(inputImage, normalizedCoords+vec2(0.5-0.5/KERNEL_SCALE), -log2(KERNEL_SCALE));
214
+ return nbl_glsl_complex(texelValue[channel], 0.0f);
215
+ }
216
+ #define _NBL_GLSL_EXT_FFT_GET_PADDED_DATA_DEFINED_
217
+
218
+ #include "nbl/builtin/glsl/ext/FFT/default_compute_fft.comp"
219
+ )===" ));
220
+ auto lastKernelFFTShader = driver->createGPUShader (core::make_smart_refctd_ptr<ICPUShader>(R"===(
221
+ #define _NBL_GLSL_WORKGROUP_SIZE_ 256
222
+ layout(local_size_x=_NBL_GLSL_WORKGROUP_SIZE_, local_size_y=1, local_size_z=1) in;
223
+
224
+ // kinda bad overdeclaration but oh well
225
+ #define _NBL_GLSL_EXT_FFT_MAX_DIM_SIZE_ 16384
226
+ #include <nbl/builtin/glsl/ext/FFT/types.glsl>
227
+
228
+ layout(set=0, binding=1) readonly restrict buffer InputBuffer
229
+ {
230
+ nbl_glsl_ext_FFT_storage_t inData[];
231
+ };
232
+ #define _NBL_GLSL_EXT_FFT_INPUT_DESCRIPTOR_DEFINED_
233
+
234
+ layout(set=0, binding=2) writeonly restrict buffer OutputBuffer
235
+ {
236
+ nbl_glsl_ext_FFT_storage_t outData[];
237
+ };
238
+ #define _NBL_GLSL_EXT_FFT_OUTPUT_DESCRIPTOR_DEFINED_
239
+
240
+ #include "nbl/builtin/glsl/ext/FFT/default_compute_fft.comp"
241
+ )===" ));
242
+ auto kernelNormalizationShader = driver->createGPUShader (core::make_smart_refctd_ptr<ICPUShader>(R"===(
243
+ layout(local_size_x=16, local_size_y=16, local_size_z=1) in;
244
+
245
+ #include <nbl/builtin/glsl/ext/FFT/types.glsl>
246
+
247
+ layout(set=0, binding=2) readonly restrict buffer InputBuffer
248
+ {
249
+ nbl_glsl_ext_FFT_storage_t inData[];
250
+ };
251
+ layout(set=0, binding=3, rg32f) uniform image2D NormalizedKernel[3];
252
+
253
+ layout(push_constant) uniform PushConstants
254
+ {
255
+ uvec4 strides;
256
+ uvec4 bitreverse_shift;
257
+ } pc;
258
+
259
+ #include <nbl/builtin/glsl/colorspace/encodeCIEXYZ.glsl>
260
+
261
+ void main()
262
+ {
263
+ nbl_glsl_complex value = in_data[nbl_glsl_dot(gl_GlobalInvocationID,pc.strides.xyz)];
264
+
265
+ // imaginary component will be 0, image shall be positive
266
+ vec3 avg;
267
+ for (uint i=0u; i<3u; i++)
268
+ avg[i] = in_data[pc.strides.z*i].x;
269
+ const float power = (nbl_glsl_scRGBtoXYZ*avg).y;
270
+
271
+ const uvec2 coord = bitfieldReverse(gl_GlobalInvocationID.xy)>>pc.bitreverse_shift.xy;
272
+ const nbl_glsl_complex shift = nbl_glsl_expImaginary(-nbl_glsl_PI*float(coord.x+coord.y));
273
+ value = nbl_glsl_complex_mul(value,shift)/power;
274
+ imageStore(NormalizedKernel[gl_WorkGroupID.z],ivec2(coord),vec4(value,0.0,0.0));
275
+ }
276
+ )===" ));
277
+ auto firstKernelFFTSpecializedShader = driver->createGPUSpecializedShader (firstKernelFFTShader.get (),IGPUSpecializedShader::SInfo (nullptr ,nullptr ," main" ,ISpecializedShader::ESS_COMPUTE));
278
+ auto lastKernelFFTSpecializedShader = driver->createGPUSpecializedShader (lastKernelFFTShader.get (),IGPUSpecializedShader::SInfo (nullptr ,nullptr ," main" ,ISpecializedShader::ESS_COMPUTE));
279
+ auto kernelNormalizationSpecializedShader = driver->createGPUSpecializedShader (kernelNormalizationShader.get (),IGPUSpecializedShader::SInfo (nullptr ,nullptr ," main" ,ISpecializedShader::ESS_COMPUTE));
280
+
281
+ {
282
+ IGPUSampler::SParams params =
283
+ {
284
+ {
285
+ ISampler::ETC_CLAMP_TO_BORDER,
286
+ ISampler::ETC_CLAMP_TO_BORDER,
287
+ ISampler::ETC_CLAMP_TO_BORDER,
288
+ ISampler::ETBC_FLOAT_OPAQUE_BLACK,
289
+ ISampler::ETF_LINEAR,
290
+ ISampler::ETF_LINEAR,
291
+ ISampler::ESMM_LINEAR,
292
+ 0u ,
293
+ 0u ,
294
+ ISampler::ECO_ALWAYS
295
+ }
296
+ };
297
+ auto sampler = driver->createGPUSampler (std::move (params));
298
+ constexpr uint32_t kernelSetDescCount = 4u ;
299
+ IGPUDescriptorSetLayout::SBinding binding[kernelSetDescCount] = {
300
+ {0u ,EDT_COMBINED_IMAGE_SAMPLER,1u ,IGPUSpecializedShader::ESS_COMPUTE,&sampler},
301
+ {1u ,EDT_STORAGE_BUFFER,1u ,IGPUSpecializedShader::ESS_COMPUTE,nullptr },
302
+ {2u ,EDT_STORAGE_BUFFER,1u ,IGPUSpecializedShader::ESS_COMPUTE,nullptr },
303
+ {3u ,EDT_STORAGE_IMAGE,colorChannelsFFT,IGPUSpecializedShader::ESS_COMPUTE,nullptr },
304
+ };
305
+ kernelDescriptorSetLayout = driver->createGPUDescriptorSetLayout (binding,binding+kernelSetDescCount);
306
+ }
307
+
308
+ {
309
+ SPushConstantRange pcRange[1 ] = {IGPUSpecializedShader::ESS_COMPUTE,0u ,core::max (sizeof (FFTClass::Parameters_t),sizeof (NormalizationPushConstants))};
310
+ kernelPipelineLayout = driver->createGPUPipelineLayout (pcRange,pcRange+1u ,core::smart_refctd_ptr (kernelDescriptorSetLayout));
311
+ }
312
+
313
+ firstKernelFFTPipeline = driver->createGPUComputePipeline (nullptr ,core::smart_refctd_ptr (sharedPipelineLayout),std::move (firstKernelFFTSpecializedShader));
314
+ lastKernelFFTPipeline = driver->createGPUComputePipeline (nullptr ,core::smart_refctd_ptr (sharedPipelineLayout),std::move (lastKernelFFTSpecializedShader));
315
+ kernelNormalizationPipeline = driver->createGPUComputePipeline (nullptr ,core::smart_refctd_ptr (sharedPipelineLayout),std::move (kernelNormalizationSpecializedShader));
316
+
317
+
189
318
auto deinterleaveShader = driver->createGPUShader (core::make_smart_refctd_ptr<ICPUShader>(R"===(
190
319
#version 450 core
191
320
#extension GL_EXT_shader_16bit_storage : require
@@ -453,7 +582,7 @@ void convolve(in uint item_per_thread_count, in uint ch)
453
582
454
583
uv += pc.data.kernel_half_pixel_size;
455
584
//
456
- nbl_glsl_complex convSpectrum = vec2(1.0,0.0);// textureLod(NormalizedKernel[ch],uv,0).xy;
585
+ nbl_glsl_complex convSpectrum = textureLod(NormalizedKernel[ch],uv,0).xy;
457
586
nbl_glsl_ext_FFT_impl_values[t] = nbl_glsl_complex_mul(sourceSpectrum,convSpectrum);
458
587
}
459
588
}
@@ -659,36 +788,41 @@ nbl_glsl_complex nbl_glsl_ext_FFT_getPaddedData(ivec3 coordinate, in uint channe
659
788
auto convolveSpecializedShader = driver->createGPUSpecializedShader (convolveShader.get (),specInfo);
660
789
auto interleaveAndLastFFTSpecializedShader = driver->createGPUSpecializedShader (interleaveAndLastFFTShader.get (),specInfo);
661
790
662
- core::smart_refctd_ptr<IGPUSampler> samplers[colorChannelsFFT];
663
791
{
664
- IGPUSampler::SParams params =
792
+ core::smart_refctd_ptr<IGPUSampler> samplers[colorChannelsFFT];
665
793
{
794
+ IGPUSampler::SParams params =
666
795
{
667
- ISampler::ETC_REPEAT,
668
- ISampler::ETC_REPEAT,
669
- ISampler::ETC_REPEAT,
670
- ISampler::ETBC_FLOAT_OPAQUE_BLACK,
671
- ISampler::ETF_LINEAR, // is it needed?
672
- ISampler::ETF_LINEAR,
673
- ISampler::ESMM_NEAREST,
674
- 0u ,
675
- 0u ,
676
- ISampler::ECO_ALWAYS
677
- }
796
+ {
797
+ ISampler::ETC_REPEAT,
798
+ ISampler::ETC_REPEAT,
799
+ ISampler::ETC_REPEAT,
800
+ ISampler::ETBC_FLOAT_OPAQUE_BLACK,
801
+ ISampler::ETF_LINEAR, // is it needed?
802
+ ISampler::ETF_LINEAR,
803
+ ISampler::ESMM_NEAREST,
804
+ 0u ,
805
+ 0u ,
806
+ ISampler::ECO_ALWAYS
807
+ }
808
+ };
809
+ auto sampler = driver->createGPUSampler (std::move (params));
810
+ std::fill_n (samplers,colorChannelsFFT,sampler);
811
+ }
812
+ IGPUDescriptorSetLayout::SBinding binding[SharedDescriptorSetDescCount] = {
813
+ {0u ,EDT_STORAGE_BUFFER,EII_COUNT,IGPUSpecializedShader::ESS_COMPUTE,nullptr },
814
+ {1u ,EDT_STORAGE_BUFFER,EII_COUNT,IGPUSpecializedShader::ESS_COMPUTE,nullptr },
815
+ {2u ,EDT_STORAGE_BUFFER,1u ,IGPUSpecializedShader::ESS_COMPUTE,nullptr },
816
+ {3u ,EDT_STORAGE_BUFFER,1u ,IGPUSpecializedShader::ESS_COMPUTE,nullptr },
817
+ {4u ,EDT_COMBINED_IMAGE_SAMPLER,colorChannelsFFT,IGPUSpecializedShader::ESS_COMPUTE,samplers}
678
818
};
679
- auto sampler = driver->createGPUSampler (std::move (params));
680
- std::fill_n (samplers,colorChannelsFFT,sampler);
819
+ sharedDescriptorSetLayout = driver->createGPUDescriptorSetLayout (binding,binding+SharedDescriptorSetDescCount);
820
+ }
821
+
822
+ {
823
+ SPushConstantRange pcRange[1 ] = {IGPUSpecializedShader::ESS_COMPUTE,0u ,sizeof (CommonPushConstants)};
824
+ sharedPipelineLayout = driver->createGPUPipelineLayout (pcRange,pcRange+sizeof (pcRange)/sizeof (SPushConstantRange),core::smart_refctd_ptr (sharedDescriptorSetLayout));
681
825
}
682
- IGPUDescriptorSetLayout::SBinding binding[SharedDescriptorSetDescCount] = {
683
- {0u ,EDT_STORAGE_BUFFER,EII_COUNT,IGPUSpecializedShader::ESS_COMPUTE,nullptr },
684
- {1u ,EDT_STORAGE_BUFFER,EII_COUNT,IGPUSpecializedShader::ESS_COMPUTE,nullptr },
685
- {2u ,EDT_STORAGE_BUFFER,1u ,IGPUSpecializedShader::ESS_COMPUTE,nullptr },
686
- {3u ,EDT_STORAGE_BUFFER,1u ,IGPUSpecializedShader::ESS_COMPUTE,nullptr },
687
- {4u ,EDT_COMBINED_IMAGE_SAMPLER,colorChannelsFFT,IGPUSpecializedShader::ESS_COMPUTE,samplers}
688
- };
689
- sharedDescriptorSetLayout = driver->createGPUDescriptorSetLayout (binding,binding+SharedDescriptorSetDescCount);
690
- SPushConstantRange pcRange[1 ] = {IGPUSpecializedShader::ESS_COMPUTE,0u ,sizeof (CommonPushConstants)};
691
- sharedPipelineLayout = driver->createGPUPipelineLayout (pcRange,pcRange+sizeof (pcRange)/sizeof (SPushConstantRange),core::smart_refctd_ptr (sharedDescriptorSetLayout));
692
826
693
827
deinterleavePipeline = driver->createGPUComputePipeline (nullptr ,core::smart_refctd_ptr (sharedPipelineLayout),std::move (deinterleaveSpecializedShader));
694
828
intensityPipeline = driver->createGPUComputePipeline (nullptr ,core::smart_refctd_ptr (sharedPipelineLayout),std::move (intensitySpecializedShader));
@@ -1179,9 +1313,39 @@ nbl_glsl_complex nbl_glsl_ext_FFT_getPaddedData(ivec3 coordinate, in uint channe
1179
1313
FFTClass::Parameters_t fftPushConstants[2 ];
1180
1314
FFTClass::DispatchInfo_t fftDispatchInfo[2 ];
1181
1315
const ISampler::E_TEXTURE_CLAMP fftPadding[2 ] = { ISampler::ETC_CLAMP_TO_BORDER,ISampler::ETC_CLAMP_TO_BORDER };
1182
- const auto passes = FFTClass::buildParameters (false ,colorChannelsFFT, kerDim, fftPushConstants, fftDispatchInfo, fftPadding);
1316
+ const auto passes = FFTClass::buildParameters (false ,colorChannelsFFT,kerDim,fftPushConstants,fftDispatchInfo,fftPadding);
1183
1317
1184
1318
// the kernel's FFTs
1319
+ {
1320
+ auto kernelDescriptorSet = driver->createGPUDescriptorSet (core::smart_refctd_ptr (kernelDescriptorSetLayout));
1321
+ {
1322
+ }
1323
+ driver->bindDescriptorSets (EPBP_COMPUTE,kernelPipelineLayout.get (),0u ,1u ,&kernelDescriptorSet.get (),nullptr );
1324
+
1325
+ // Ker Image First Axis FFT
1326
+ driver->bindComputePipeline (firstKernelFFTPipeline.get ());
1327
+ driver->pushConstants (firstKernelFFTPipeline->getLayout (),ICPUSpecializedShader::ESS_COMPUTE,sizeof (FFTClass::Parameters_t),sizeof (float ),¶m.bloomScale );
1328
+ FFTClass::dispatchHelper (driver,kernelPipelineLayout.get (),fftPushConstants[0 ],fftDispatchInfo[0 ]);
1329
+
1330
+ // Ker Image Last Axis FFT
1331
+ driver->bindComputePipeline (lastKernelFFTPipeline.get ());
1332
+ FFTClass::dispatchHelper (driver,kernelPipelineLayout.get (),fftPushConstants[1 ],fftDispatchInfo[1 ]);
1333
+
1334
+ // normalization and shuffle
1335
+ driver->bindComputePipeline (kernelNormalizationPipeline.get ());
1336
+ {
1337
+ NormalizationPushConstants normalizationPC;
1338
+ normalizationPC.stride = fftPushConstants[1 ].output_strides ;
1339
+ normalizationPC.bitreverse_shift .x = 32 -core::findMSB (paddedKerDim.width );
1340
+ normalizationPC.bitreverse_shift .y = 32 -core::findMSB (paddedKerDim.height );
1341
+ normalizationPC.bitreverse_shift .z = 0 ;
1342
+ driver->pushConstants (kernelNormalizationPipeline->getLayout (),ICPUSpecializedShader::ESS_COMPUTE,0u ,sizeof (normalizationPC),&normalizationPC);
1343
+ const uint32_t dispatchSizeX = (paddedKerDim.width -1u )/16u +1u ;
1344
+ const uint32_t dispatchSizeY = (paddedKerDim.height -1u )/16u +1u ;
1345
+ driver->dispatch (dispatchSizeX,dispatchSizeY,colorChannelsFFT);
1346
+ }
1347
+ FFTClass::defaultBarrier ();
1348
+ }
1185
1349
}
1186
1350
1187
1351
uint32_t outImageByteOffset[EII_COUNT];
0 commit comments