Skip to content

Commit 45d97fd

Browse files
Merge pull request #819 from Devsh-Graphics-Programming/ali_blur3
Prefix sum box blur
2 parents 43323a6 + b1f2827 commit 45d97fd

File tree

17 files changed

+361
-63
lines changed

17 files changed

+361
-63
lines changed

include/nbl/asset/ICPUSampler.h

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -37,16 +37,16 @@ class ICPUSampler : public ISampler, public IAsset
3737
};
3838
switch (wrapModes[i])
3939
{
40-
case ISampler::ETC_REPEAT:
40+
case ISampler::E_TEXTURE_CLAMP::ETC_REPEAT:
4141
repeat();
4242
break;
43-
case ISampler::ETC_CLAMP_TO_EDGE:
44-
texelCoord[i] = core::clamp<int32_t,int32_t>(texelCoord[i],0,mipLastCoord[i]);
43+
case ISampler::E_TEXTURE_CLAMP::ETC_CLAMP_TO_EDGE:
44+
texelCoord[i] = core::clamp<int32_t, int32_t>(texelCoord[i], 0, mipLastCoord[i]);
4545
break;
46-
case ISampler::ETC_MIRROR_CLAMP_TO_EDGE:
47-
texelCoord[i] = core::clamp<int32_t,int32_t>(texelCoord[i],-int32_t(mipExtent[i]),mipExtent[i]+mipLastCoord[i]);
46+
case ISampler::E_TEXTURE_CLAMP::ETC_MIRROR_CLAMP_TO_EDGE:
47+
texelCoord[i] = core::clamp<int32_t, int32_t>(texelCoord[i], -int32_t(mipExtent[i]), mipExtent[i] + mipLastCoord[i]);
4848
[[fallthrough]];
49-
case ISampler::ETC_MIRROR:
49+
case ISampler::E_TEXTURE_CLAMP::ETC_MIRROR:
5050
{
5151
int32_t repeatID = (originalWasNegative+texelCoord[i])/int32_t(mipExtent[i]);
5252
repeat();

include/nbl/asset/ISampler.h

Lines changed: 5 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
#define __NBL_ASSET_I_SAMPLER_H_INCLUDED__
77

88
#include "nbl/asset/IDescriptor.h"
9+
#include "nbl/builtin/hlsl/enums.hlsl"
910

1011
namespace nbl
1112
{
@@ -16,23 +17,7 @@ class ISampler : public IDescriptor
1617
{
1718
public:
1819
//! Texture coord clamp mode outside [0.0, 1.0]
19-
enum E_TEXTURE_CLAMP
20-
{
21-
//! Texture repeats
22-
ETC_REPEAT = 0,
23-
//! Texture is clamped to the edge pixel
24-
ETC_CLAMP_TO_EDGE,
25-
//! Texture is clamped to the border pixel (if exists)
26-
ETC_CLAMP_TO_BORDER,
27-
//! Texture is alternatingly mirrored (0..1..0..1..0..)
28-
ETC_MIRROR,
29-
//! Texture is mirrored once and then clamped to edge
30-
ETC_MIRROR_CLAMP_TO_EDGE,
31-
//! Texture is mirrored once and then clamped to border
32-
ETC_MIRROR_CLAMP_TO_BORDER,
33-
34-
ETC_COUNT
35-
};
20+
using E_TEXTURE_CLAMP = hlsl::TextureClamp;
3621

3722
enum E_TEXTURE_BORDER_COLOR
3823
{
@@ -75,11 +60,11 @@ class ISampler : public IDescriptor
7560
{
7661
struct {
7762
//! Valeus taken from E_TEXTURE_CLAMP
78-
uint32_t TextureWrapU : 3 = ETC_REPEAT;
63+
uint32_t TextureWrapU : 3 = E_TEXTURE_CLAMP::ETC_REPEAT;
7964
//! Valeus taken from E_TEXTURE_CLAMP
80-
uint32_t TextureWrapV : 3 = ETC_REPEAT;
65+
uint32_t TextureWrapV : 3 = E_TEXTURE_CLAMP::ETC_REPEAT;
8166
//! Valeus taken from E_TEXTURE_CLAMP
82-
uint32_t TextureWrapW : 3 = ETC_REPEAT;
67+
uint32_t TextureWrapW : 3 = E_TEXTURE_CLAMP::ETC_REPEAT;
8368
//! Values taken from E_TEXTURE_BORDER_COLOR
8469
uint32_t BorderColor : 3 = ETBC_FLOAT_OPAQUE_BLACK;
8570
//! Values taken from E_TEXTURE_FILTER

include/nbl/asset/filters/CBlitImageFilter.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ class CBlitImageFilterBase : public impl::CSwizzleableAndDitherableFilterBase<Sw
3434
uint8_t* scratchMemory = nullptr;
3535
uint32_t scratchMemoryByteSize = 0u;
3636
_NBL_STATIC_INLINE_CONSTEXPR auto NumWrapAxes = 3;
37-
ISampler::E_TEXTURE_CLAMP axisWraps[NumWrapAxes] = { ISampler::ETC_REPEAT,ISampler::ETC_REPEAT,ISampler::ETC_REPEAT };
37+
ISampler::E_TEXTURE_CLAMP axisWraps[NumWrapAxes] = { ISampler::E_TEXTURE_CLAMP::ETC_REPEAT,ISampler::E_TEXTURE_CLAMP::ETC_REPEAT,ISampler::E_TEXTURE_CLAMP::ETC_REPEAT };
3838
ISampler::E_TEXTURE_BORDER_COLOR borderColor = ISampler::ETBC_FLOAT_TRANSPARENT_BLACK;
3939
IBlitUtilities::E_ALPHA_SEMANTIC alphaSemantic = IBlitUtilities::EAS_NONE_OR_PREMULTIPLIED;
4040
double alphaRefValue = 0.5; // only required to make sense if `alphaSemantic==EAS_REFERENCE_OR_COVERAGE`
@@ -56,7 +56,7 @@ class CBlitImageFilterBase : public impl::CSwizzleableAndDitherableFilterBase<Sw
5656
return false;
5757

5858
for (auto i=0; i<CStateBase::NumWrapAxes; i++)
59-
if (state->axisWraps[i]>=ISampler::ETC_COUNT)
59+
if (state->axisWraps[i]>=ISampler::E_TEXTURE_CLAMP::ETC_COUNT)
6060
return false;
6161

6262
if (state->borderColor>=ISampler::ETBC_COUNT)

include/nbl/asset/filters/CPaddedCopyImageFilter.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ class CPaddedCopyImageFilter : public CImageFilter<CPaddedCopyImageFilter>, publ
3030
virtual ~CState() {}
3131

3232
_NBL_STATIC_INLINE_CONSTEXPR auto NumWrapAxes = 3;
33-
ISampler::E_TEXTURE_CLAMP axisWraps[NumWrapAxes] = {ISampler::ETC_REPEAT,ISampler::ETC_REPEAT,ISampler::ETC_REPEAT};
33+
ISampler::E_TEXTURE_CLAMP axisWraps[NumWrapAxes] = {ISampler::E_TEXTURE_CLAMP::ETC_REPEAT,ISampler::E_TEXTURE_CLAMP::ETC_REPEAT,ISampler::E_TEXTURE_CLAMP::ETC_REPEAT};
3434
ISampler::E_TEXTURE_BORDER_COLOR borderColor;
3535
VkOffset3D relativeOffset;
3636
VkExtent3D paddedExtent;
@@ -57,7 +57,7 @@ class CPaddedCopyImageFilter : public CImageFilter<CPaddedCopyImageFilter>, publ
5757
// TODO: eventually remove when we can encode blocks
5858
for (auto i=0; i<CState::NumWrapAxes; i++)
5959
{
60-
if ((isBlockCompressionFormat(inFormat)||isBlockCompressionFormat(outFormat))&&state->axisWraps[i]!=ISampler::ETC_REPEAT)
60+
if ((isBlockCompressionFormat(inFormat)||isBlockCompressionFormat(outFormat))&&state->axisWraps[i]!=ISampler::E_TEXTURE_CLAMP::ETC_REPEAT)
6161
return false;
6262
}
6363

include/nbl/builtin/hlsl/enums.hlsl

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,22 @@ enum ShaderStage : uint32_t
3232
ESS_ALL = 0x7fffffff
3333
};
3434

35+
enum TextureClamp : uint16_t
36+
{
37+
//! Texture repeats
38+
ETC_REPEAT = 0,
39+
//! Texture is clamped to the edge pixel
40+
ETC_CLAMP_TO_EDGE,
41+
//! Texture is clamped to the border pixel (if exists)
42+
ETC_CLAMP_TO_BORDER,
43+
//! Texture is alternatingly mirrored (0..1..0..1..0..)
44+
ETC_MIRROR,
45+
//! Texture is mirrored once and then clamped to edge
46+
ETC_MIRROR_CLAMP_TO_EDGE,
47+
48+
ETC_COUNT
49+
};
50+
3551
enum SampleCountFlags : uint16_t
3652
{
3753
ESCF_1_BIT = 0x01,
Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,79 @@
1+
#include "nbl/builtin/hlsl/cpp_compat.hlsl"
2+
#include "nbl/builtin/hlsl/glsl_compat/core.hlsl"
3+
#include "nbl/builtin/hlsl/workgroup/basic.hlsl"
4+
#include "nbl/builtin/hlsl/workgroup/arithmetic.hlsl"
5+
#include "nbl/builtin/hlsl/device_capabilities_traits.hlsl"
6+
#include "nbl/builtin/hlsl/enums.hlsl"
7+
8+
#ifndef _NBL_BUILTIN_PREFIX_SUM_BLUR_INCLUDED_
9+
#define _NBL_BUILTIN_PREFIX_SUM_BLUR_INCLUDED_
10+
11+
namespace nbl
12+
{
13+
namespace hlsl
14+
{
15+
namespace prefix_sum_blur
16+
{
17+
18+
// Prefix-Sum Blur using SAT (Summed Area Table) technique.
19+
// `scanScract` and `_sampler.prefixSumAccessor` must not to alias.
20+
template<
21+
typename DataAccessor,
22+
typename ScanSharedAccessor,
23+
typename Sampler,
24+
uint16_t WorkgroupSize,
25+
class device_capabilities=void> // TODO: define concepts for the Box1D and apply constraints
26+
struct Blur1D
27+
{
28+
void operator()(
29+
NBL_REF_ARG(DataAccessor) data,
30+
NBL_REF_ARG(ScanSharedAccessor) scanScratch,
31+
NBL_REF_ARG(Sampler) _sampler,
32+
const uint16_t channel)
33+
{
34+
const uint16_t end = data.linearSize();
35+
const uint16_t localInvocationIndex = workgroup::SubgroupContiguousIndex();
36+
37+
// prefix sum
38+
// note the dynamically uniform loop condition
39+
for (uint16_t baseIx = 0; baseIx < end;)
40+
{
41+
const uint16_t ix = localInvocationIndex + baseIx;
42+
float32_t input = data.template get<float32_t>(channel, ix);
43+
// dynamically uniform condition
44+
if (baseIx != 0)
45+
{
46+
// take result of previous prefix sum and add it to first element here
47+
if (localInvocationIndex == 0)
48+
input += _sampler.prefixSumAccessor.template get<float32_t>(baseIx - 1);
49+
}
50+
const float32_t sum = workgroup::inclusive_scan<plus<float32_t>, WorkgroupSize, device_capabilities>::template __call(input, scanScratch);
51+
// loop increment
52+
baseIx += WorkgroupSize;
53+
// save prefix sum results
54+
if (ix < end)
55+
_sampler.prefixSumAccessor.template set<float32_t>(ix, sum);
56+
// previous prefix sum must have finished before we ask for results
57+
_sampler.prefixSumAccessor.workgroupExecutionAndMemoryBarrier();
58+
}
59+
60+
// TODO: split this Blur1D into two separate functors:
61+
// - multi-wg-wide prefix sum
62+
// - the SAT sampling
63+
const float32_t last = end - 1;
64+
for (float32_t ix = localInvocationIndex; ix < end; ix += WorkgroupSize)
65+
{
66+
const float32_t result = _sampler(ix, radius, borderColor[channel]);
67+
data.template set<float32_t>(channel, uint16_t(ix), result);
68+
}
69+
}
70+
71+
vector<float32_t, DataAccessor::Channels> borderColor;
72+
float32_t radius;
73+
};
74+
75+
}
76+
}
77+
}
78+
79+
#endif

0 commit comments

Comments
 (0)