Skip to content

Commit 1d7f3e0

Browse files
committed
Reduce LDS usage
(fix low occupancy on AMD)
1 parent 082e744 commit 1d7f3e0

File tree

48 files changed

+143
-169
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

48 files changed

+143
-169
lines changed

internal/Constants.inl

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
// Global constants
33
//
44
const int MAX_STACK_SIZE = 48;
5+
const int MAX_LTREE_STACK_SIZE = 16;
56
const int MAX_BOUNCES = 128;
67

78
const float HIT_BIAS = 0.00001f;

internal/CoreRef.cpp

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -3644,7 +3644,7 @@ void Ray::Ref::IntersectAreaLights(Span<const ray_data_t> rays, Span<const light
36443644

36453645
////
36463646

3647-
TraversalStack<MAX_STACK_SIZE, light_stack_entry_t> st;
3647+
TraversalStack<MAX_LTREE_STACK_SIZE, light_stack_entry_t> st;
36483648
st.push(0u /* root_index */, 0.0f /* distance */, 1.0f /* factor */);
36493649

36503650
while (!st.empty()) {
@@ -3891,7 +3891,7 @@ void Ray::Ref::IntersectAreaLights(Span<const ray_data_t> rays, Span<const light
38913891

38923892
////
38933893

3894-
TraversalStack<MAX_STACK_SIZE, light_stack_entry_t> st;
3894+
TraversalStack<MAX_LTREE_STACK_SIZE, light_stack_entry_t> st;
38953895
st.push(0u /* root_index */, 0.0f /* distance */, 1.0f /* factor */);
38963896

38973897
while (!st.empty()) {
@@ -4135,8 +4135,8 @@ void Ray::Ref::IntersectAreaLights(Span<const ray_data_t> rays, Span<const light
41354135

41364136
////
41374137

4138-
uint32_t stack[MAX_STACK_SIZE];
4139-
float stack_factors[MAX_STACK_SIZE];
4138+
uint32_t stack[MAX_LTREE_STACK_SIZE];
4139+
float stack_factors[MAX_LTREE_STACK_SIZE];
41404140
uint32_t stack_size = 0;
41414141

41424142
stack_factors[stack_size] = 1.0f;
@@ -4329,7 +4329,7 @@ float Ray::Ref::IntersectAreaLights(const shadow_ray_t &ray, Span<const light_t>
43294329

43304330
////
43314331

4332-
TraversalStack<MAX_STACK_SIZE> st;
4332+
TraversalStack<MAX_LTREE_STACK_SIZE> st;
43334333
st.push(0u /* root_index */, 0.0f);
43344334

43354335
while (!st.empty()) {
@@ -4470,7 +4470,7 @@ float Ray::Ref::IntersectAreaLights(const shadow_ray_t &ray, Span<const light_t>
44704470

44714471
////
44724472

4473-
TraversalStack<MAX_STACK_SIZE> st;
4473+
TraversalStack<MAX_LTREE_STACK_SIZE> st;
44744474
st.push(0u /* root_index */, 0.0f);
44754475

44764476
while (!st.empty()) {
@@ -4603,8 +4603,8 @@ float Ray::Ref::IntersectAreaLights(const shadow_ray_t &ray, Span<const light_t>
46034603

46044604
float Ray::Ref::EvalTriLightFactor(const fvec4 &P, const fvec4 &ro, const uint32_t tri_index,
46054605
Span<const light_t> lights, Span<const light_bvh_node_t> nodes) {
4606-
uint32_t stack[MAX_STACK_SIZE];
4607-
float stack_factors[MAX_STACK_SIZE];
4606+
uint32_t stack[MAX_LTREE_STACK_SIZE];
4607+
float stack_factors[MAX_LTREE_STACK_SIZE];
46084608
uint32_t stack_size = 0;
46094609

46104610
stack_factors[stack_size] = 1.0f;
@@ -4655,8 +4655,8 @@ float Ray::Ref::EvalTriLightFactor(const fvec4 &P, const fvec4 &ro, const uint32
46554655

46564656
float Ray::Ref::EvalTriLightFactor(const fvec4 &P, const fvec4 &ro, uint32_t tri_index, Span<const light_t> lights,
46574657
Span<const light_wbvh_node_t> nodes) {
4658-
uint32_t stack[MAX_STACK_SIZE];
4659-
float stack_factors[MAX_STACK_SIZE];
4658+
uint32_t stack[MAX_LTREE_STACK_SIZE];
4659+
float stack_factors[MAX_LTREE_STACK_SIZE];
46604660
uint32_t stack_size = 0;
46614661

46624662
stack_factors[stack_size] = 1.0f;
@@ -4701,8 +4701,8 @@ float Ray::Ref::EvalTriLightFactor(const fvec4 &P, const fvec4 &ro, uint32_t tri
47014701

47024702
float Ray::Ref::EvalTriLightFactor(const fvec4 &P, const fvec4 &ro, uint32_t tri_index, Span<const light_t> lights,
47034703
Span<const light_cwbvh_node_t> nodes) {
4704-
uint32_t stack[MAX_STACK_SIZE];
4705-
float stack_factors[MAX_STACK_SIZE];
4704+
uint32_t stack[MAX_LTREE_STACK_SIZE];
4705+
float stack_factors[MAX_LTREE_STACK_SIZE];
47064706
uint32_t stack_size = 0;
47074707

47084708
stack_factors[stack_size] = 1.0f;

internal/CoreSIMD.h

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6001,7 +6001,7 @@ void Ray::NS::IntersectAreaLights(const ray_data_t<S> &r, Span<const light_t> li
60016001
_inv_d[3] = {inv_d[0][ri], inv_d[1][ri], inv_d[2][ri]},
60026002
_inv_d_o[3] = {inv_d_o[0][ri], inv_d_o[1][ri], inv_d_o[2][ri]};
60036003

6004-
TraversalStateStack_Single<MAX_STACK_SIZE, light_stack_entry_t> st;
6004+
TraversalStateStack_Single<MAX_LTREE_STACK_SIZE, light_stack_entry_t> st;
60056005
st.push(0u /* root_index */, 0.0f, 1.0f);
60066006

60076007
while (!st.empty()) {
@@ -6309,7 +6309,7 @@ Ray::NS::fvec<S> Ray::NS::IntersectAreaLights(const shadow_ray_t<S> &r, Span<con
63096309
const float _inv_d[3] = {inv_d[0][ri], inv_d[1][ri], inv_d[2][ri]},
63106310
_inv_d_o[3] = {inv_d_o[0][ri], inv_d_o[1][ri], inv_d_o[2][ri]};
63116311

6312-
TraversalStateStack_Single<MAX_STACK_SIZE> st;
6312+
TraversalStateStack_Single<MAX_LTREE_STACK_SIZE> st;
63136313
st.push(0u /* root_index */, 0.0f);
63146314

63156315
while (!st.empty() && ray_masks[ri]) {
@@ -6497,8 +6497,8 @@ Ray::NS::fvec<S> Ray::NS::EvalTriLightFactor(const fvec<S> P[3], const fvec<S> r
64976497
// recombine in AoS layout
64986498
const float _p[3] = {P[0][ri], P[1][ri], P[2][ri]}, _ro[3] = {ro[0][ri], ro[1][ri], ro[2][ri]};
64996499

6500-
uint32_t stack[MAX_STACK_SIZE];
6501-
float stack_factors[MAX_STACK_SIZE];
6500+
uint32_t stack[MAX_LTREE_STACK_SIZE];
6501+
float stack_factors[MAX_LTREE_STACK_SIZE];
65026502
uint32_t stack_size = 0;
65036503

65046504
stack_factors[stack_size] = 1.0f;

internal/shaders/intersect_area_lights.comp.glsl

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -49,8 +49,8 @@ bool quadratic(float a, float b, float c, out float t0, out float t1) {
4949
return true;
5050
}
5151

52-
shared uint g_stack[LOCAL_GROUP_SIZE_X * LOCAL_GROUP_SIZE_Y][MAX_STACK_SIZE];
53-
shared float g_stack_factors[LOCAL_GROUP_SIZE_X * LOCAL_GROUP_SIZE_Y][MAX_STACK_SIZE];
52+
shared uint g_stack[LOCAL_GROUP_SIZE_X * LOCAL_GROUP_SIZE_Y][MAX_LTREE_STACK_SIZE];
53+
shared float g_stack_factors[LOCAL_GROUP_SIZE_X * LOCAL_GROUP_SIZE_Y][MAX_LTREE_STACK_SIZE];
5454

5555
layout (local_size_x = LOCAL_GROUP_SIZE_X, local_size_y = LOCAL_GROUP_SIZE_Y, local_size_z = 1) in;
5656

internal/shaders/intersect_scene_shadow.comp.glsl

Lines changed: 4 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -90,7 +90,11 @@ vec2 get_scrambled_2d_rand(const uint dim, const uint seed, const int _sample) {
9090

9191
layout (local_size_x = LOCAL_GROUP_SIZE_X, local_size_y = LOCAL_GROUP_SIZE_Y, local_size_z = 1) in;
9292

93+
#if HWRT
94+
shared uint g_stack[LOCAL_GROUP_SIZE_X * LOCAL_GROUP_SIZE_Y][MAX_LTREE_STACK_SIZE];
95+
#else
9396
shared uint g_stack[LOCAL_GROUP_SIZE_X * LOCAL_GROUP_SIZE_Y][MAX_STACK_SIZE];
97+
#endif
9498

9599
bool Traverse_BLAS_WithStack(vec3 ro, vec3 rd, vec3 inv_d, int obj_index, uint node_index, uint stack_size,
96100
inout hit_data_t inter) {
@@ -244,37 +248,6 @@ bool Traverse_TLAS_WithStack(vec3 orig_ro, vec3 orig_rd, vec3 orig_inv_rd, uint
244248
}
245249
}
246250

247-
/*while (stack_size != 0) {
248-
uint cur = g_stack[gl_LocalInvocationIndex][--stack_size];
249-
250-
bvh_node_t n = g_nodes[cur];
251-
252-
if (!bbox_test(orig_inv_rd, orig_neg_inv_do, inter.t, n.bbox_min.xyz, n.bbox_max.xyz)) {
253-
continue;
254-
}
255-
256-
if ((floatBitsToUint(n.bbox_min.w) & LEAF_NODE_BIT) == 0) {
257-
g_stack[gl_LocalInvocationIndex][stack_size++] = far_child(orig_rd, n);
258-
g_stack[gl_LocalInvocationIndex][stack_size++] = near_child(orig_rd, n);
259-
} else {
260-
const uint prim_index = (floatBitsToUint(n.bbox_min.w) & PRIM_INDEX_BITS);
261-
262-
const mesh_instance_t mi = g_mesh_instances[prim_index];
263-
if ((mi.data.w & RAY_TYPE_SHADOW_BIT) == 0) {
264-
continue;
265-
}
266-
267-
const vec3 ro = (mi.inv_xform * vec4(orig_ro, 1.0)).xyz;
268-
const vec3 rd = (mi.inv_xform * vec4(orig_rd, 0.0)).xyz;
269-
const vec3 inv_d = safe_invert(rd);
270-
271-
const bool solid_hit_found = Traverse_BLAS_WithStack(ro, rd, inv_d, int(prim_index), mi.data.y, stack_size, inter);
272-
if (solid_hit_found) {
273-
return true;
274-
}
275-
}
276-
}*/
277-
278251
return false;
279252
}
280253

internal/shaders/output/debug_rt.comp.spv.inl

Lines changed: 3 additions & 3 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

internal/shaders/output/intersect_area_lights.comp.cso.inl

Lines changed: 3 additions & 3 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

internal/shaders/output/intersect_area_lights.comp.spv.inl

Lines changed: 3 additions & 3 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

internal/shaders/output/intersect_scene.rchit.spv.inl

Lines changed: 3 additions & 3 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

internal/shaders/output/intersect_scene_shadow_hwrt_atlas.comp.cso.inl

Lines changed: 3 additions & 3 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)