@@ -184,7 +184,7 @@ int main(int argc, char* argv[])
184
184
constexpr auto SharedDescriptorSetDescCount = 4u ;
185
185
core::smart_refctd_ptr<IGPUDescriptorSetLayout> sharedDescriptorSetLayout;
186
186
core::smart_refctd_ptr<IGPUPipelineLayout> sharedPipelineLayout;
187
- core::smart_refctd_ptr<IGPUComputePipeline> deinterleavePipeline,intensityPipeline,secondLumaMeterAndFirstFFTPipeline,interleaveAndLastFFTPipeline;
187
+ core::smart_refctd_ptr<IGPUComputePipeline> deinterleavePipeline,intensityPipeline,secondLumaMeterAndFirstFFTPipeline,convolvePipeline, interleaveAndLastFFTPipeline;
188
188
{
189
189
auto deinterleaveShader = driver->createGPUShader (core::make_smart_refctd_ptr<ICPUShader>(R"===(
190
190
#version 450 core
@@ -309,18 +309,6 @@ uint nbl_glsl_ext_FFT_Parameters_t_getDirection()
309
309
{
310
310
return 0u;
311
311
}
312
- uint nbl_glsl_ext_FFT_Parameters_t_getMaxChannel()
313
- {
314
- return 2u;
315
- }
316
- uint nbl_glsl_ext_FFT_Parameters_t_getLog2FFTSize()
317
- {
318
- return max(findMSB(pc.data.imageWidth-1u),_NBL_GLSL_WORKGROUP_SIZE_LOG2_)+1u;
319
- }
320
- uint nbl_glsl_ext_FFT_Parameters_t_getPaddingType()
321
- {
322
- return 3u; // _NBL_GLSL_EXT_FFT_PAD_MIRROR_;
323
- }
324
312
#define _NBL_GLSL_EXT_FFT_PARAMETERS_METHODS_DECLARED_
325
313
326
314
@@ -398,6 +386,115 @@ void main()
398
386
nbl_glsl_ext_FFT_setData(nbl_glsl_ext_FFT_getCoordinates(tid),channel,nbl_glsl_ext_FFT_impl_values[t]);
399
387
}
400
388
}
389
+ }
390
+ )===" ));
391
+ auto convolveShader = driver->createGPUShader (core::make_smart_refctd_ptr<ICPUShader>(R"===(
392
+ #version 450 core
393
+ #extension GL_EXT_shader_16bit_storage : require
394
+
395
+ // nasty and ugly but oh well
396
+ #define _NBL_GLSL_SCRATCH_SHARED_DEFINED_ sharedScratch
397
+ #define _NBL_GLSL_SCRATCH_SHARED_SIZE_DEFINED_ 1024
398
+ shared uint _NBL_GLSL_SCRATCH_SHARED_DEFINED_[_NBL_GLSL_SCRATCH_SHARED_SIZE_DEFINED_];
399
+
400
+ #include "../ShaderCommon.glsl"
401
+ layout(binding = 1, std430) restrict buffer SpectrumOutputBuffer
402
+ {
403
+ vec2 spectrum[];
404
+ };
405
+ #define _NBL_GLSL_EXT_FFT_INPUT_DESCRIPTOR_DEFINED_
406
+ #define _NBL_GLSL_EXT_FFT_OUTPUT_DESCRIPTOR_DEFINED_
407
+
408
+
409
+
410
+ #include <nbl/builtin/glsl/math/complex.glsl>
411
+
412
+
413
+ uvec3 nbl_glsl_ext_FFT_Parameters_t_getDimensions()
414
+ {
415
+ return uvec3(0x1u<<nbl_glsl_ext_FFT_Parameters_t_getLog2FFTSize(),pc.data.imageHeight,1u);
416
+ }
417
+ bool nbl_glsl_ext_FFT_Parameters_t_getIsInverse()
418
+ {
419
+ return bool(0xdeadbeefu);
420
+ }
421
+ uint nbl_glsl_ext_FFT_Parameters_t_getDirection()
422
+ {
423
+ return 1u;
424
+ }
425
+ #define _NBL_GLSL_EXT_FFT_PARAMETERS_METHODS_DECLARED_
426
+
427
+
428
+ nbl_glsl_complex nbl_glsl_ext_FFT_getPaddedData(ivec3 coordinate, in uint channel);
429
+ #define _NBL_GLSL_EXT_FFT_GET_PADDED_DATA_DEFINED_
430
+ void nbl_glsl_ext_FFT_setData(in uvec3 coordinate, in uint channel, in nbl_glsl_complex complex_value)
431
+ {
432
+ const uint index = ((channel<<nbl_glsl_ext_FFT_Parameters_t_getLog2FFTSize())+coordinate.x)*pc.data.imageHeight+coordinate.y;
433
+ spectrum[index] = complex_value;
434
+ }
435
+ #define _NBL_GLSL_EXT_FFT_SET_DATA_DEFINED_
436
+
437
+ #define _NBL_GLSL_EXT_FFT_MAIN_DEFINED_
438
+ #include "nbl/builtin/glsl/ext/FFT/default_compute_fft.comp"
439
+
440
+ void convolve(in uint item_per_thread_count, in uint ch)
441
+ {
442
+ for(uint t=0u; t<item_per_thread_count; t++)
443
+ {
444
+ const uint tid = _NBL_GLSL_WORKGROUP_SIZE_*t+gl_LocalInvocationIndex;
445
+
446
+ nbl_glsl_complex sourceSpectrum = nbl_glsl_ext_FFT_impl_values[t];
447
+
448
+ //
449
+ const uvec3 coords = nbl_glsl_ext_FFT_getCoordinates(tid);
450
+ vec2 uv = vec2(bitfieldReverse(coords.xy))/vec2(4294967296.f);
451
+ #ifdef CONVOLVE
452
+ uv += pc.params.kernel_half_pixel_size;
453
+ //
454
+ nbl_glsl_complex convSpectrum = textureLod(NormalizedKernel[ch],uv,0).xy;
455
+ #else
456
+ nbl_glsl_complex convSpectrum = nbl_glsl_complex(1.f,0.f);
457
+ #endif
458
+ nbl_glsl_ext_FFT_impl_values[t] = nbl_glsl_complex_mul(sourceSpectrum,convSpectrum);
459
+ }
460
+ }
461
+
462
+ void main()
463
+ {
464
+ // Virtual Threads Calculation
465
+ const uint log2FFTSize = nbl_glsl_ext_FFT_Parameters_t_getLog2FFTSize();
466
+ const uint item_per_thread_count = 0x1u<<(log2FFTSize-_NBL_GLSL_WORKGROUP_SIZE_LOG2_);
467
+ for(uint channel=0u; channel<3u; channel++)
468
+ {
469
+ // Load Values into local memory
470
+ for(uint t=0u; t<item_per_thread_count; t++)
471
+ {
472
+ const uint tid = (t<<_NBL_GLSL_WORKGROUP_SIZE_LOG2_)|gl_LocalInvocationIndex;
473
+ const uint trueDim = nbl_glsl_ext_FFT_Parameters_t_getDimensions()[nbl_glsl_ext_FFT_Parameters_t_getDirection()];
474
+ nbl_glsl_ext_FFT_impl_values[t] = nbl_glsl_ext_FFT_getPaddedData(nbl_glsl_ext_FFT_getPaddedCoordinates(tid,log2FFTSize,trueDim),channel);
475
+ }
476
+ nbl_glsl_ext_FFT_preloaded(false,log2FFTSize);
477
+ barrier();
478
+
479
+ convolve(item_per_thread_count,channel);
480
+
481
+ barrier();
482
+ nbl_glsl_ext_FFT_preloaded(true,log2FFTSize);
483
+ // write out to main memory
484
+ for(uint t=0u; t<item_per_thread_count; t++)
485
+ {
486
+ const uint tid = (t<<_NBL_GLSL_WORKGROUP_SIZE_LOG2_)|gl_LocalInvocationIndex;
487
+ nbl_glsl_ext_FFT_setData(nbl_glsl_ext_FFT_getCoordinates(tid),channel,nbl_glsl_ext_FFT_impl_values[t]);
488
+ }
489
+ }
490
+ }
491
+
492
+ nbl_glsl_complex nbl_glsl_ext_FFT_getPaddedData(ivec3 coordinate, in uint channel)
493
+ {
494
+ if (!nbl_glsl_ext_FFT_wrap_coord(coordinate))
495
+ return nbl_glsl_complex(0.f,0.f);
496
+ const uint index = ((channel<<nbl_glsl_ext_FFT_Parameters_t_getLog2FFTSize())+coordinate.x)*pc.data.imageHeight+coordinate.y;
497
+ return spectrum[index];
401
498
}
402
499
)===" ));
403
500
auto interleaveAndLastFFTShader = driver->createGPUShader (core::make_smart_refctd_ptr<ICPUShader>(R"===(
@@ -432,10 +529,6 @@ layout(binding = 3, std430) restrict readonly buffer IntensityBuffer
432
529
nbl_glsl_complex nbl_glsl_ext_FFT_getPaddedData(ivec3 coordinate, in uint channel);
433
530
#define _NBL_GLSL_EXT_FFT_GET_PADDED_DATA_DEFINED_
434
531
435
- uint nbl_glsl_ext_FFT_Parameters_t_getLog2FFTSize()
436
- {
437
- return max(findMSB(pc.data.imageWidth-1u),_NBL_GLSL_WORKGROUP_SIZE_LOG2_)+1u;
438
- }
439
532
uvec3 nbl_glsl_ext_FFT_Parameters_t_getDimensions()
440
533
{
441
534
return uvec3(0x1u<<nbl_glsl_ext_FFT_Parameters_t_getLog2FFTSize(),pc.data.imageHeight,1u);
@@ -448,14 +541,6 @@ uint nbl_glsl_ext_FFT_Parameters_t_getDirection()
448
541
{
449
542
return 0u;
450
543
}
451
- uint nbl_glsl_ext_FFT_Parameters_t_getMaxChannel()
452
- {
453
- return 2u;
454
- }
455
- uint nbl_glsl_ext_FFT_Parameters_t_getPaddingType()
456
- {
457
- return 3u; // _NBL_GLSL_EXT_FFT_PAD_MIRROR_;
458
- }
459
544
#define _NBL_GLSL_EXT_FFT_PARAMETERS_METHODS_DECLARED_
460
545
461
546
@@ -567,6 +652,7 @@ nbl_glsl_complex nbl_glsl_ext_FFT_getPaddedData(ivec3 coordinate, in uint channe
567
652
auto deinterleaveSpecializedShader = driver->createGPUSpecializedShader (deinterleaveShader.get (),specInfo);
568
653
auto intensitySpecializedShader = driver->createGPUSpecializedShader (intensityShader.get (),specInfo);
569
654
auto secondLumaMeterAndFirstFFTSpecializedShader = driver->createGPUSpecializedShader (secondLumaMeterAndFirstFFTShader.get (),specInfo);
655
+ auto convolveSpecializedShader = driver->createGPUSpecializedShader (convolveShader.get (),specInfo);
570
656
auto interleaveAndLastFFTSpecializedShader = driver->createGPUSpecializedShader (interleaveAndLastFFTShader.get (),specInfo);
571
657
572
658
IGPUDescriptorSetLayout::SBinding binding[SharedDescriptorSetDescCount] = {
@@ -582,6 +668,7 @@ nbl_glsl_complex nbl_glsl_ext_FFT_getPaddedData(ivec3 coordinate, in uint channe
582
668
deinterleavePipeline = driver->createGPUComputePipeline (nullptr ,core::smart_refctd_ptr (sharedPipelineLayout),std::move (deinterleaveSpecializedShader));
583
669
intensityPipeline = driver->createGPUComputePipeline (nullptr ,core::smart_refctd_ptr (sharedPipelineLayout),std::move (intensitySpecializedShader));
584
670
secondLumaMeterAndFirstFFTPipeline = driver->createGPUComputePipeline (nullptr ,core::smart_refctd_ptr (sharedPipelineLayout),std::move (secondLumaMeterAndFirstFFTSpecializedShader));
671
+ convolvePipeline = driver->createGPUComputePipeline (nullptr ,core::smart_refctd_ptr (sharedPipelineLayout),std::move (convolveSpecializedShader));
585
672
interleaveAndLastFFTPipeline = driver->createGPUComputePipeline (nullptr ,core::smart_refctd_ptr (sharedPipelineLayout),std::move (interleaveAndLastFFTSpecializedShader));
586
673
}
587
674
@@ -1183,6 +1270,19 @@ nbl_glsl_complex nbl_glsl_ext_FFT_getPaddedData(ivec3 coordinate, in uint channe
1183
1270
COpenGLExtensionHandler::extGlMemoryBarrier (GL_SHADER_STORAGE_BARRIER_BIT);
1184
1271
1185
1272
// TODO: Y-axis FFT, multiply the spectra together, y-axis iFFT
1273
+ driver->bindComputePipeline (convolvePipeline.get ());
1274
+ #if 0
1275
+ {
1276
+ const auto& kernelImgExtent = kernelNormalizedSpectrums[0]->getCreationParameters().image->getCreationParameters().extent;
1277
+ vec2 kernel_half_pixel_size{0.5f,0.5f};
1278
+ kernel_half_pixel_size.x /= kernelImgExtent.width;
1279
+ kernel_half_pixel_size.y /= kernelImgExtent.height;
1280
+ driver->pushConstants(convolvePipeline->getLayout(),ISpecializedShader::ESS_COMPUTE,offsetof(convolve_parameters_t,kernel_half_pixel_size),sizeof(convolve_parameters_t::kernel_half_pixel_size),&kernel_half_pixel_size);
1281
+ }
1282
+ #endif
1283
+ // dispatch
1284
+ // !driver->dispatch(param.fftDispatchInfo[1].workGroupCount[0],param.fftDispatchInfo[1].workGroupCount[1],1u);
1285
+ COpenGLExtensionHandler::extGlMemoryBarrier (GL_SHADER_STORAGE_BARRIER_BIT);
1186
1286
1187
1287
// bind intensity pipeline
1188
1288
driver->bindComputePipeline (intensityPipeline.get ());
0 commit comments