Skip to content

Commit 683f622

Browse files
Merge pull request #850 from Devsh-Graphics-Programming/full_inline_spirv_bda
Full Inline SPIR-V BDA pointers
2 parents 62b4be3 + 72fcc93 commit 683f622

File tree

18 files changed

+490
-140
lines changed

18 files changed

+490
-140
lines changed

include/nbl/builtin/hlsl/bda/__ptr.hlsl

Lines changed: 45 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
// Copyright (C) 2018-2024 - DevSH Graphics Programming Sp. z O.O.
22
// This file is part of the "Nabla Engine".
33
// For conditions of distribution and use, see copyright notice in nabla.h
4-
54
#include "nbl/builtin/hlsl/type_traits.hlsl"
65
#include "nbl/builtin/hlsl/bda/__ref.hlsl"
76

@@ -17,32 +16,64 @@ namespace bda
1716
template<typename T>
1817
struct __ptr
1918
{
20-
using this_t = __ptr <T>;
21-
uint64_t addr;
19+
using this_t = __ptr<T>;
20+
uint32_t2 addr;
2221

23-
static this_t create(const uint64_t _addr)
22+
static this_t create(const uint32_t2 _addr)
2423
{
2524
this_t retval;
2625
retval.addr = _addr;
2726
return retval;
2827
}
2928

30-
template< uint64_t alignment=alignment_of_v<T> >
31-
__ref<T,alignment,false> deref()
29+
// in non-64bit mode we only support "small" arithmetic on pointers (just offsets no arithmetic on pointers)
30+
__ptr operator+(uint32_t i)
31+
{
32+
i *= sizeof(T);
33+
uint32_t2 newAddr = addr;
34+
spirv::AddCarryOutput<uint32_t> lsbAddRes = spirv::addCarry<uint32_t>(addr[0],i);
35+
newAddr[0] = lsbAddRes.result;
36+
newAddr[1] += lsbAddRes.carry;
37+
return __ptr::create(newAddr);
38+
}
39+
__ptr operator-(uint32_t i)
40+
{
41+
i *= sizeof(T);
42+
uint32_t2 newAddr = addr;
43+
spirv::AddCarryOutput<uint32_t> lsbSubRes = spirv::subBorrow<uint32_t>(addr[0],i);
44+
newAddr[0] = lsbSubRes.result;
45+
newAddr[1] -= lsbSubRes.carry;
46+
return __ptr::create(newAddr);
47+
}
48+
49+
template<uint64_t alignment=alignment_of_v<T>, bool _restrict=false>
50+
__ref<T,alignment,_restrict> deref()
3251
{
3352
// TODO: assert(addr&uint64_t(alignment-1)==0);
34-
using retval_t = __ref < T, alignment, false>;
35-
retval_t retval;
36-
retval.__init(addr);
53+
__ref<T,alignment,_restrict> retval;
54+
retval.__init(spirv::bitcast<spirv::bda_pointer_t<T>,uint32_t2>(addr));
3755
return retval;
3856
}
57+
58+
template<uint64_t alignment=alignment_of_v<T> >
59+
__ref<T,alignment,true> deref_restrict() {return deref<alignment,true>();}
3960

40-
__ptr operator +(int64_t i) {
41-
return __ptr::create(addr + sizeof(T) * i);
61+
//! Dont use these, to avoid emitting shaderUint64 capability when compiling for crappy mobile GPUs
62+
static this_t create(const uint64_t _addr)
63+
{
64+
this_t retval;
65+
retval.addr = spirv::bitcast<uint32_t2>(_addr);
66+
return retval;
4267
}
43-
44-
__ptr operator-(int64_t i) {
45-
return __ptr::create(addr - sizeof(T) * i);
68+
__ptr operator+(int64_t i)
69+
{
70+
i *= sizeof(T);
71+
return __ptr::create(spirv::bitcast<uint64_t>(addr)+i);
72+
}
73+
__ptr operator-(int64_t i)
74+
{
75+
i *= sizeof(T);
76+
return __ptr::create(spirv::bitcast<uint64_t>(addr)-i);
4677
}
4778
};
4879

include/nbl/builtin/hlsl/bda/__ref.hlsl

Lines changed: 25 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -4,39 +4,44 @@
44
#ifndef _NBL_BUILTIN_HLSL_BDA_REF_INCLUDED_
55
#define _NBL_BUILTIN_HLSL_BDA_REF_INCLUDED_
66

7+
// TODO: this shouldn't be included IMHO
78
#include "nbl/builtin/hlsl/functional.hlsl"
9+
#include "nbl/builtin/hlsl/spirv_intrinsics/core.hlsl"
810

911
namespace nbl
1012
{
1113
namespace hlsl
1214
{
1315
namespace bda
1416
{
17+
template<typename T, bool _restrict>
18+
struct __spv_ptr_t;
1519
template<typename T>
16-
using __spv_ptr_t __NBL_CAPABILITY_PhysicalStorageBufferAddresses = spirv::pointer_t<spv::StorageClassPhysicalStorageBuffer,T>;
17-
20+
struct __spv_ptr_t<T,false>
21+
{
22+
[[vk::ext_decorate(spv::DecorationAliasedPointer)]] spirv::bda_pointer_t<T> value;
23+
};
1824
template<typename T>
19-
struct __ptr;
25+
struct __spv_ptr_t<T,true>
26+
{
27+
[[vk::ext_decorate(spv::DecorationRestrictPointer)]] spirv::bda_pointer_t<T> value;
28+
};
2029

21-
// TODO: refactor this in terms of `nbl::hlsl::` when they fix the composite struct inline SPIR-V BDA issue
2230
template<typename T, uint32_t alignment, bool _restrict>
2331
struct __base_ref
2432
{
25-
// TODO:
26-
// static_assert(alignment>=alignof(T));
33+
__spv_ptr_t<T,_restrict> ptr;
2734

28-
using spv_ptr_t = uint64_t;
29-
spv_ptr_t ptr;
30-
31-
__spv_ptr_t<T> __get_spv_ptr()
35+
void __init(const spirv::bda_pointer_t<T> _ptr)
3236
{
33-
return spirv::bitcast < __spv_ptr_t<T> > (ptr);
37+
ptr.value = _ptr;
3438
}
35-
36-
// TODO: Would like to use `spv_ptr_t` or OpAccessChain result instead of `uint64_t`
37-
void __init(const spv_ptr_t _ptr)
39+
40+
spirv::bda_pointer_t<T> __get_spv_ptr()
3841
{
39-
ptr = _ptr;
42+
// BUG: https://github.com/microsoft/DirectXShaderCompiler/issues/7184
43+
// if I don't launder the pointer through this I get "IsNonPtrAccessChain(ptrInst->opcode())"
44+
return spirv::copyObject<spirv::bda_pointer_t<T> >(ptr.value);
4045
}
4146

4247
T load()
@@ -50,16 +55,13 @@ struct __base_ref
5055
}
5156
};
5257

53-
template<typename T, uint32_t alignment=alignment_of_v<T>, bool _restrict = false>
58+
// TODO: I wish HLSL had some things like C++ which would allow you to make a "stack only"/non-storable type
59+
// NOTE: I guess there's the Function/Private storage space variables?
60+
template<typename T, uint32_t alignment=alignment_of_v<T>, bool _restrict=false>
5461
struct __ref : __base_ref<T,alignment,_restrict>
5562
{
56-
using base_t = __base_ref < T, alignment, _restrict>;
57-
using this_t = __ref < T, alignment, _restrict>;
58-
59-
__spv_ptr_t<T> get_ptr()
60-
{
61-
return base_t::__get_spv_ptr();
62-
}
63+
using base_t = __base_ref< T,alignment,_restrict>;
64+
using this_t = __ref<T,alignment,_restrict>;
6365
};
6466
}
6567
}

include/nbl/builtin/hlsl/bda/bda_accessor.hlsl

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -66,15 +66,15 @@ struct BdaAccessor : impl::BdaAccessorBase
6666
atomicAdd(const uint64_t index, const T value)
6767
{
6868
bda::__ptr<T> target = ptr + index;
69-
return glsl::atomicAdd(target.template deref().get_ptr(), value);
69+
return glsl::atomicAdd(target.template deref().ptr.value, value);
7070
}
7171

7272
template<typename S = T>
7373
enable_if_t<is_same_v<S,T> && is_integral<T>::value && (sizeof(T) == 4 || sizeof(T) == 8), T>
7474
atomicSub(const uint64_t index, const T value)
7575
{
7676
bda::__ptr<T> target = ptr + index;
77-
return glsl::atomicSub(target.template deref().get_ptr(), value);
77+
return glsl::atomicSub(target.template deref().ptr.value, value);
7878
}
7979

8080
bda::__ptr<T> ptr;
Lines changed: 176 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,176 @@
1+
// Copyright (C) 2018-2024 - DevSH Graphics Programming Sp. z O.O.
2+
// This file is part of the "Nabla Engine".
3+
// For conditions of distribution and use, see copyright notice in nabla.h
4+
#ifndef _NBL_BUILTIN_HLSL_BDA_STRUCT_DECLARE_INCLUDED_
5+
#define _NBL_BUILTIN_HLSL_BDA_STRUCT_DECLARE_INCLUDED_
6+
7+
#include "nbl/builtin/hlsl/type_traits.hlsl"
8+
#include "nbl/builtin/hlsl/mpl.hlsl"
9+
#ifdef __HLSL_VERSION
10+
#include "nbl/builtin/hlsl/bda/__ptr.hlsl"
11+
#endif // __HLSL_VERSION
12+
13+
14+
namespace nbl
15+
{
16+
namespace hlsl
17+
{
18+
namespace bda
19+
{
20+
// silly utility traits
21+
template<typename T>
22+
struct member_count
23+
{
24+
NBL_CONSTEXPR_STATIC_INLINE uint32_t value = 0;
25+
};
26+
template<typename T>
27+
NBL_CONSTEXPR uint32_t member_count_v = member_count<T>::value;
28+
29+
template<typename T, int32_t MemberIx>
30+
struct member_type;
31+
template<typename T, int32_t MemberIx>
32+
using member_type_t = typename member_type<T,MemberIx>::type;
33+
34+
// default alignment is the alignment of the type
35+
template<typename T, int32_t MemberIx>
36+
struct member_alignment
37+
{
38+
NBL_CONSTEXPR_STATIC_INLINE uint32_t value = alignment_of_v<member_type_t<T,MemberIx> >;
39+
};
40+
template<typename T, int32_t MemberIx>
41+
NBL_CONSTEXPR uint32_t member_alignment_v = member_alignment<T,MemberIx>::value;
42+
43+
// the default specialization of the offset assumes scalar layout
44+
template<typename T, int32_t MemberIx>
45+
struct member_offset
46+
{
47+
// TODO: assert that the custom alignment is no less than the type's natural alignment?
48+
// first byte past previous member, rounded up to out alignment
49+
NBL_CONSTEXPR_STATIC_INLINE uint64_t value = mpl::align_up_v<member_offset<T,MemberIx-1>::value+size_of_v<member_type_t<T,MemberIx-1> >,member_alignment_v<T,MemberIx> >;
50+
};
51+
template<typename T>
52+
struct member_offset<T,0>
53+
{
54+
NBL_CONSTEXPR_STATIC_INLINE uint64_t value = 0;
55+
};
56+
template<typename T, int32_t MemberIx>
57+
NBL_CONSTEXPR uint64_t member_offset_v = member_offset<T,MemberIx>::value;
58+
59+
// stuff needed to compute alignment of the struct properly
60+
namespace impl
61+
{
62+
template<typename T, uint32_t N>
63+
struct default_alignment
64+
{
65+
NBL_CONSTEXPR_STATIC_INLINE uint32_t value = mpl::max_v<uint32_t,member_alignment_v<T,N-1>,default_alignment<T,N-1>::value>;
66+
};
67+
// le invalid values
68+
template<typename T>
69+
struct default_alignment<T,0>
70+
{
71+
NBL_CONSTEXPR_STATIC_INLINE uint32_t value = 0;
72+
};
73+
template<typename T, typename MemberCount=member_count<T> >
74+
NBL_CONSTEXPR uint32_t default_alignment_v = default_alignment<T,MemberCount::value>::value;
75+
}
76+
}
77+
}
78+
}
79+
80+
//! Need to gen identical struct in HLSL and C++, right now this tool can declare non-templated structs and full explicit specialized ones
81+
82+
//implementation details
83+
#define NBL_HLSL_IMPL_DEFINE_STRUCT_GET_MEMBER_TYPE(identifier,...) __VA_ARGS__
84+
#define NBL_HLSL_IMPL_DEFINE_STRUCT_GET_MEMBER_NAME(identifier,...) identifier
85+
#define NBL_HLSL_IMPL_DEFINE_STRUCT_MEMBER_TYPE(r,IDENTIFIER,i,e) template<> \
86+
struct ::nbl::hlsl::bda::member_type<NBL_EVAL IDENTIFIER,i> \
87+
{ \
88+
using type = NBL_HLSL_IMPL_DEFINE_STRUCT_GET_MEMBER_TYPE e; \
89+
};
90+
91+
//! TODO: handle declarations for partial template specializations and non-specializations
92+
#define NBL_HLSL_IMPL_DECLARE_STRUCT_MEMBER(identifier,...) __VA_ARGS__ identifier;
93+
#ifdef __HLSL_VERSION
94+
#define NBL_HLSL_IMPL_DEFINE_STRUCT_MEMBER(r,IDENTIFIER,i,e) [[vk::ext_decorate(spv::DecorationOffset,::nbl::hlsl::bda::member_offset_v<NBL_EVAL IDENTIFIER,i>)]] NBL_HLSL_IMPL_DECLARE_STRUCT_MEMBER e
95+
#define NBL_HLSL_IMPL_DEFINE_STRUCT_MEMBER_REFERENCE(r,unused,i,e) ::nbl::hlsl::bda::__ref< \
96+
NBL_HLSL_IMPL_DEFINE_STRUCT_GET_MEMBER_TYPE e, \
97+
::nbl::hlsl::mpl::min_v<uint32_t,::nbl::hlsl::bda::member_alignment_v<__referenced_t,i>,alignment>, \
98+
_restrict> NBL_HLSL_IMPL_DEFINE_STRUCT_GET_MEMBER_NAME e;
99+
#define NBL_HLSL_IMPL_INIT_STRUCT_MEMBER_REFERENCE(r,unused,i,e) NBL_HLSL_IMPL_DEFINE_STRUCT_GET_MEMBER_NAME e .__init( \
100+
::nbl::hlsl::spirv::accessChain<NBL_HLSL_IMPL_DEFINE_STRUCT_GET_MEMBER_TYPE e>(base_t::ptr.value,i) \
101+
);
102+
#define NBL_HLSL_IMPL_DEFINE_STRUCT(IDENTIFIER,MEMBER_SEQ) NBL_EVAL IDENTIFIER \
103+
{ \
104+
BOOST_PP_SEQ_FOR_EACH_I(NBL_HLSL_IMPL_DEFINE_STRUCT_MEMBER,IDENTIFIER,MEMBER_SEQ) \
105+
}; \
106+
template<uint32_t alignment, bool _restrict> \
107+
struct ::nbl::hlsl::bda::__ref<NBL_EVAL IDENTIFIER,alignment,_restrict> : ::nbl::hlsl::bda::__base_ref<NBL_EVAL IDENTIFIER,alignment,_restrict> \
108+
{ \
109+
using __referenced_t = NBL_EVAL IDENTIFIER; \
110+
using base_t = __base_ref<__referenced_t,alignment,_restrict>; \
111+
using this_t = __ref<__referenced_t,alignment,_restrict>; \
112+
\
113+
BOOST_PP_SEQ_FOR_EACH_I(NBL_HLSL_IMPL_DEFINE_STRUCT_MEMBER_REFERENCE,dummy,MEMBER_SEQ) \
114+
\
115+
void __init(const ::nbl::hlsl::spirv::bda_pointer_t<__referenced_t> _ptr) \
116+
{ \
117+
base_t::__init(_ptr); \
118+
BOOST_PP_SEQ_FOR_EACH_I(NBL_HLSL_IMPL_INIT_STRUCT_MEMBER_REFERENCE,dummy,MEMBER_SEQ) \
119+
} \
120+
}
121+
#else
122+
#define NBL_HLSL_IMPL_DEFINE_STRUCT_MEMBER(r,IDENTIFIER,i,e) alignas(::nbl::hlsl::bda::member_alignment_v<NBL_EVAL IDENTIFIER,i>) NBL_HLSL_IMPL_DECLARE_STRUCT_MEMBER e
123+
#define NBL_HLSL_IMPL_DEFINE_STRUCT(IDENTIFIER,MEMBER_SEQ) alignas(::nbl::hlsl::alignment_of_v<NBL_EVAL IDENTIFIER >) NBL_EVAL IDENTIFIER \
124+
{ \
125+
BOOST_PP_SEQ_FOR_EACH_I(NBL_HLSL_IMPL_DEFINE_STRUCT_MEMBER,IDENTIFIER,MEMBER_SEQ) \
126+
}
127+
#endif
128+
129+
// some weird stuff to handle alignment
130+
#define NBL_HLSL_IMPL_DEFINE_STRUCT_BEGIN(IDENTIFIER,MEMBER_SEQ) template<> \
131+
struct ::nbl::hlsl::bda::member_count<NBL_EVAL IDENTIFIER > \
132+
{ \
133+
NBL_CONSTEXPR_STATIC_INLINE uint32_t value = BOOST_PP_SEQ_SIZE(MEMBER_SEQ); \
134+
}; \
135+
BOOST_PP_SEQ_FOR_EACH_I(NBL_HLSL_IMPL_DEFINE_STRUCT_MEMBER_TYPE,IDENTIFIER,MEMBER_SEQ) \
136+
template <> \
137+
struct ::nbl::hlsl::alignment_of<NBL_EVAL IDENTIFIER > \
138+
{
139+
#define NBL_HLSL_IMPL_DEFINE_STRUCT_END(IDENTIFIER,MEMBER_SEQ,...) }; \
140+
template<> \
141+
struct ::nbl::hlsl::size_of<NBL_EVAL IDENTIFIER > \
142+
{ \
143+
using type = NBL_EVAL IDENTIFIER; \
144+
NBL_CONSTEXPR_STATIC_INLINE uint32_t __last_member_ix_v = ::nbl::hlsl::bda::member_count_v<type>-1; \
145+
NBL_CONSTEXPR_STATIC_INLINE uint64_t __last_member_offset_v = ::nbl::hlsl::bda::member_offset_v<type, __last_member_ix_v>; \
146+
NBL_CONSTEXPR_STATIC_INLINE uint64_t __last_member_size_v = ::nbl::hlsl::size_of_v<::nbl::hlsl::bda::member_type_t<type, __last_member_ix_v> >; \
147+
NBL_CONSTEXPR_STATIC_INLINE uint32_t value = ::nbl::hlsl::mpl::align_up_v<__last_member_offset_v + __last_member_size_v, alignment_of_v<type > >; \
148+
\
149+
__VA_ARGS__ \
150+
\
151+
}; \
152+
struct NBL_HLSL_IMPL_DEFINE_STRUCT(IDENTIFIER,MEMBER_SEQ)
153+
154+
#include <boost/preprocessor/seq/for_each_i.hpp>
155+
#include <boost/preprocessor/seq/size.hpp>
156+
// MEMBER_SEQ is to be a sequence of variable name and type (identifier0,Type0)...(identifierN,TypeN) @see NBL_HLSL_IMPL_DEFINE_STRUCT_GET_MEMBER_TYPE
157+
// the VA_ARGS is the struct alignment for alignas, usage example
158+
// ```
159+
// NBL_HLSL_DEFINE_STRUCT((MyStruct2),
160+
// ((a, float32_t))
161+
// ((b, int32_t))
162+
// ((c, int32_t2)),
163+
//
164+
// ... block of code for the methods ...
165+
//
166+
// );
167+
// ```
168+
#define NBL_HLSL_DEFINE_STRUCT(IDENTIFIER,MEMBER_SEQ,...) NBL_HLSL_IMPL_DEFINE_STRUCT_BEGIN(IDENTIFIER,MEMBER_SEQ) \
169+
NBL_CONSTEXPR_STATIC_INLINE uint32_t value = ::nbl::hlsl::bda::impl::default_alignment_v<NBL_EVAL IDENTIFIER >; \
170+
NBL_HLSL_IMPL_DEFINE_STRUCT_END(IDENTIFIER,MEMBER_SEQ,__VA_ARGS__)
171+
// version allowing custom alignment on whole struct
172+
#define NBL_HLSL_DEFINE_ALIGNAS_STRUCT(IDENTIFIER,ALIGNMENT,MEMBER_SEQ,...) NBL_HLSL_IMPL_DEFINE_STRUCT_BEGIN(IDENTIFIER,MEMBER_SEQ) \
173+
NBL_CONSTEXPR_STATIC_INLINE uint32_t value = ALIGNMENT; \
174+
NBL_HLSL_IMPL_DEFINE_STRUCT_END(IDENTIFIER,MEMBER_SEQ,__VA_ARGS__)
175+
176+
#endif

include/nbl/builtin/hlsl/complex.hlsl

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,7 @@ namespace nbl
5252
namespace hlsl
5353
{
5454

55+
// TODO: make this BDA compatible (no unspecialized templates yet)
5556
template<typename Scalar>
5657
struct complex_t
5758
{

0 commit comments

Comments
 (0)