Skip to content

Commit d52102a

Browse files
committed
Light tree masks
1 parent ef3599f commit d52102a

File tree

70 files changed

+556
-267
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

70 files changed

+556
-267
lines changed

internal/Core.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1038,6 +1038,7 @@ uint32_t Ray::FlattenLightBVH_r(Span<const light_bvh_node_t> nodes, const uint32
10381038
new_node.child[0] = cur_node.prim_index;
10391039
new_node.child[1] = cur_node.prim_count;
10401040

1041+
new_node.ch_bitmask[0] = cur_node.bitmask;
10411042
new_node.flux[0] = cur_node.flux;
10421043
new_node.axis[0] = EncodeOctDir(cur_node.axis);
10431044
new_node.cos_omega_ne[0] = encode_cosines(cosf(cur_node.omega_n), fmaxf(cosf(cur_node.omega_e), 0.0f));
@@ -1167,7 +1168,7 @@ uint32_t Ray::FlattenLightBVH_r(Span<const light_bvh_node_t> nodes, const uint32
11671168
new_node.ch_bbox_min[0][i] = new_node.ch_bbox_min[1][i] = new_node.ch_bbox_min[2][i] = 0xff;
11681169
new_node.ch_bbox_max[0][i] = new_node.ch_bbox_max[1][i] = new_node.ch_bbox_max[2][i] = 0;
11691170
}
1170-
1171+
new_node.ch_bitmask[i] = child.bitmask;
11711172
new_node.flux[i] = child.flux;
11721173
new_node.axis[i] = EncodeOctDir(child.axis);
11731174
new_node.cos_omega_ne[i] = encode_cosines(cosf(child.omega_n), fmaxf(cosf(child.omega_e), 0.0f));
@@ -1176,6 +1177,7 @@ uint32_t Ray::FlattenLightBVH_r(Span<const light_bvh_node_t> nodes, const uint32
11761177
new_node.ch_bbox_min[0][i] = new_node.ch_bbox_min[1][i] = new_node.ch_bbox_min[2][i] = 0xff;
11771178
new_node.ch_bbox_max[0][i] = new_node.ch_bbox_max[1][i] = new_node.ch_bbox_max[2][i] = 0xff;
11781179
// Init as zero light
1180+
new_node.ch_bitmask[i] = 0;
11791181
new_node.flux[i] = 0.0f;
11801182
new_node.axis[i] = 0;
11811183
new_node.cos_omega_ne[i] = 0;

internal/Core.h

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -100,10 +100,11 @@ static_assert(sizeof(bvh_node_t) == 32, "!");
100100
struct light_bvh_node_t : public bvh_node_t {
101101
float flux;
102102
float axis[3];
103-
float omega_n; // cone angle enclosing light normals
104-
float omega_e; // emission angle around each normal
103+
float omega_n; // cone angle enclosing light normals
104+
float omega_e; // emission angle around each normal
105+
uint32_t bitmask; // contained light types
105106
};
106-
static_assert(sizeof(light_bvh_node_t) == 56, "!");
107+
static_assert(sizeof(light_bvh_node_t) == 60, "!");
107108

108109
struct bvh2_node_t {
109110
float ch_data0[4]; // [ ch0.min.x, ch0.max.x, ch0.min.y, ch0.max.y ]
@@ -131,9 +132,8 @@ static_assert(sizeof(light_wbvh_node_t) == 320, "!");
131132

132133
struct alignas(16) cwbvh_node_t {
133134
float bbox_min[3];
134-
float _unused0;
135135
float bbox_max[3];
136-
float _unused1;
136+
uint8_t ch_bitmask[8];
137137
uint8_t ch_bbox_min[3][8];
138138
uint8_t ch_bbox_max[3][8];
139139
uint32_t child[8];

internal/CoreRef.cpp

Lines changed: 20 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -391,12 +391,13 @@ force_inline long bbox_test_oct(const float o[3], const float inv_d[3], const fl
391391
}
392392

393393
force_inline long bbox_test_oct(const float o[3], const float inv_d[3], const float t, const cwbvh_node_t &node,
394-
float out_dist[8]) {
394+
const uint32_t accept_mask, float out_dist[8]) {
395395
// Unpack bounds
396396
const float ext[3] = {(node.bbox_max[0] - node.bbox_min[0]) / 255.0f,
397397
(node.bbox_max[1] - node.bbox_min[1]) / 255.0f,
398398
(node.bbox_max[2] - node.bbox_min[2]) / 255.0f};
399399
alignas(16) float bbox_min[3][8], bbox_max[3][8];
400+
alignas(16) uint8_t bitmask[16] = {};
400401
for (int i = 0; i < 8; ++i) {
401402
bbox_min[0][i] = bbox_min[1][i] = bbox_min[2][i] = -MAX_DIST;
402403
bbox_max[0][i] = bbox_max[1][i] = bbox_max[2][i] = MAX_DIST;
@@ -409,6 +410,7 @@ force_inline long bbox_test_oct(const float o[3], const float inv_d[3], const fl
409410
bbox_max[1][i] = node.bbox_min[1] + node.ch_bbox_max[1][i] * ext[1];
410411
bbox_max[2][i] = node.bbox_min[2] + node.ch_bbox_max[2][i] * ext[2];
411412
}
413+
bitmask[i] = node.ch_bitmask[i];
412414
}
413415

414416
long mask = 0;
@@ -436,6 +438,10 @@ force_inline long bbox_test_oct(const float o[3], const float inv_d[3], const fl
436438
mask |= simd_cast(fmask).movemask();
437439
tmin.store_to(&out_dist[4 * i], vector_aligned);
438440
}) // NOLINT
441+
442+
ivec4 temp = ivec4{(const int *)bitmask, vector_aligned};
443+
temp = ~cmpeq8(temp & reinterpret_cast<const int &>(accept_mask), ivec4{0});
444+
mask &= temp.movemask8();
439445
#else
440446
UNROLLED_FOR(i, 8, { // NOLINT
441447
float lo_x = inv_d[0] * (bbox_min[0][i] - o[0]);
@@ -473,9 +479,10 @@ force_inline long bbox_test_oct(const float o[3], const float inv_d[3], const fl
473479
tmax *= 1.00000024f;
474480

475481
out_dist[i] = tmin;
476-
mask |= ((tmin <= tmax && tmin <= t && tmax > 0) ? 1 : 0) << i;
482+
mask |= ((tmin <= tmax && tmin <= t && tmax > 0 && (bitmask[i] & accept_mask) != 0) ? 1 : 0) << i;
477483
}) // NOLINT
478484
#endif
485+
479486
return mask;
480487
}
481488

@@ -3645,6 +3652,11 @@ void Ray::Ref::IntersectAreaLights(Span<const ray_data_t> rays, Span<const light
36453652
TraversalStack<MAX_LTREE_STACK_SIZE, light_stack_entry_t> st;
36463653
st.push(0u /* root_index */, 0.0f /* distance */, 1.0f /* factor */);
36473654

3655+
// NOTE: triangle lights are processed separately
3656+
static uint32_t LightTypesMask = (1u << LIGHT_TYPE_SPHERE) | (1u << LIGHT_TYPE_DIR) | (1u << LIGHT_TYPE_LINE) |
3657+
(1u << LIGHT_TYPE_RECT) | (1u << LIGHT_TYPE_DISK) | (1u << LIGHT_TYPE_ENV);
3658+
static uint32_t LightTypesMask4x =
3659+
LightTypesMask | (LightTypesMask << 8) | (LightTypesMask << 16) | (LightTypesMask << 24);
36483660
while (!st.empty()) {
36493661
light_stack_entry_t cur = st.pop();
36503662

@@ -3655,7 +3667,8 @@ void Ray::Ref::IntersectAreaLights(Span<const ray_data_t> rays, Span<const light
36553667
}
36563668
if ((cur.index & LEAF_NODE_BIT) == 0) {
36573669
alignas(16) float dist[8];
3658-
long mask = bbox_test_oct(value_ptr(ro), inv_d, inout_inter.t, nodes[cur.index], dist);
3670+
long mask =
3671+
bbox_test_oct(value_ptr(ro), inv_d, inout_inter.t, nodes[cur.index], LightTypesMask4x, dist);
36593672
if (mask) {
36603673
fvec4 importance[2];
36613674
calc_lnode_importance(nodes[cur.index], ro, value_ptr(importance[0]));
@@ -4475,6 +4488,9 @@ float Ray::Ref::IntersectAreaLights(const shadow_ray_t &ray, Span<const light_t>
44754488
TraversalStack<MAX_LTREE_STACK_SIZE> st;
44764489
st.push(0u /* root_index */, 0.0f);
44774490

4491+
static uint32_t LightTypesMask = (1u << LIGHT_TYPE_RECT) | (1u << LIGHT_TYPE_DISK);
4492+
static uint32_t LightTypesMask4x =
4493+
LightTypesMask | (LightTypesMask << 8) | (LightTypesMask << 16) | (LightTypesMask << 24);
44784494
while (!st.empty()) {
44794495
stack_entry_t cur = st.pop();
44804496

@@ -4485,7 +4501,7 @@ float Ray::Ref::IntersectAreaLights(const shadow_ray_t &ray, Span<const light_t>
44854501
TRAVERSE:
44864502
if ((cur.index & LEAF_NODE_BIT) == 0) {
44874503
alignas(16) float dist[8];
4488-
long mask = bbox_test_oct(value_ptr(ro), inv_d, rdist, nodes[cur.index], dist);
4504+
long mask = bbox_test_oct(value_ptr(ro), inv_d, rdist, nodes[cur.index], LightTypesMask4x, dist);
44894505
if (mask) {
44904506
long i = GetFirstBit(mask);
44914507
mask = ClearBit(mask, i);

internal/CoreSIMD.h

Lines changed: 22 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5991,6 +5991,12 @@ void Ray::NS::IntersectAreaLights(const ray_data_t<S> &r, Span<const light_t> li
59915991
alignas(S * 4) uint32_t ray_flags[S];
59925992
_ray_flags.store_to(ray_flags, vector_aligned);
59935993

5994+
// NOTE: triangle lights are processed separately
5995+
static uint32_t LightTypesMask = (1u << LIGHT_TYPE_SPHERE) | (1u << LIGHT_TYPE_DIR) | (1u << LIGHT_TYPE_LINE) |
5996+
(1u << LIGHT_TYPE_RECT) | (1u << LIGHT_TYPE_DISK) | (1u << LIGHT_TYPE_ENV);
5997+
static uint32_t LightTypesMask4x =
5998+
LightTypesMask | (LightTypesMask << 8) | (LightTypesMask << 16) | (LightTypesMask << 24);
5999+
59946000
for (int ri = 0; ri < S; ri++) {
59956001
if (!ray_masks[ri]) {
59966002
continue;
@@ -6019,6 +6025,7 @@ void Ray::NS::IntersectAreaLights(const ray_data_t<S> &r, Span<const light_t> li
60196025
(n.bbox_max[1] - n.bbox_min[1]) / 255.0f,
60206026
(n.bbox_max[2] - n.bbox_min[2]) / 255.0f};
60216027
alignas(32) float bbox_min[3][8], bbox_max[3][8];
6028+
alignas(32) uint8_t bitmask[4 * S] = {};
60226029
for (int i = 0; i < 8; ++i) {
60236030
bbox_min[0][i] = bbox_min[1][i] = bbox_min[2][i] = -MAX_DIST;
60246031
bbox_max[0][i] = bbox_max[1][i] = bbox_max[2][i] = MAX_DIST;
@@ -6031,10 +6038,15 @@ void Ray::NS::IntersectAreaLights(const ray_data_t<S> &r, Span<const light_t> li
60316038
bbox_max[1][i] = n.bbox_min[1] + n.ch_bbox_max[1][i] * ext[1];
60326039
bbox_max[2][i] = n.bbox_min[2] + n.ch_bbox_max[2][i] * ext[2];
60336040
}
6041+
bitmask[i] = n.ch_bitmask[i];
60346042
}
60356043

6044+
ivec<S> temp = ivec<S>{(const int *)bitmask, vector_aligned};
6045+
temp = ~cmpeq8(temp & reinterpret_cast<const int &>(LightTypesMask4x), ivec<S>{0});
6046+
60366047
alignas(32) float res_dist[8];
60376048
long mask = bbox_test_oct<SS>(_inv_d, _inv_d_o, inter_t[ri], bbox_min, bbox_max, res_dist);
6049+
mask &= temp.movemask8();
60386050
if (mask) {
60396051
fvec<SS> importance[8 / SS];
60406052
calc_lnode_importance<SS>(n, bbox_min, bbox_max, _ro, value_ptr(importance[0]));
@@ -6302,6 +6314,10 @@ Ray::NS::fvec<S> Ray::NS::IntersectAreaLights(const shadow_ray_t<S> &r, Span<con
63026314
ray_mask.store_to(ray_masks, vector_aligned);
63036315
rdist.store_to(inter_t, vector_aligned);
63046316

6317+
static uint32_t LightTypesMask = (1u << LIGHT_TYPE_RECT) | (1u << LIGHT_TYPE_DISK);
6318+
static uint32_t LightTypesMask4x =
6319+
LightTypesMask | (LightTypesMask << 8) | (LightTypesMask << 16) | (LightTypesMask << 24);
6320+
63056321
for (int ri = 0; ri < S; ri++) {
63066322
if (!ray_masks[ri]) {
63076323
continue;
@@ -6320,7 +6336,6 @@ Ray::NS::fvec<S> Ray::NS::IntersectAreaLights(const shadow_ray_t<S> &r, Span<con
63206336
if (cur.dist > inter_t[ri]) {
63216337
continue;
63226338
}
6323-
63246339
TRAVERSE:
63256340
if ((cur.index & LEAF_NODE_BIT) == 0) {
63266341
const light_cwbvh_node_t &n = nodes[cur.index];
@@ -6330,6 +6345,7 @@ Ray::NS::fvec<S> Ray::NS::IntersectAreaLights(const shadow_ray_t<S> &r, Span<con
63306345
(n.bbox_max[1] - n.bbox_min[1]) / 255.0f,
63316346
(n.bbox_max[2] - n.bbox_min[2]) / 255.0f};
63326347
alignas(32) float bbox_min[3][8], bbox_max[3][8];
6348+
alignas(32) uint8_t bitmask[4 * S] = {};
63336349
for (int i = 0; i < 8; ++i) {
63346350
bbox_min[0][i] = bbox_min[1][i] = bbox_min[2][i] = -MAX_DIST;
63356351
bbox_max[0][i] = bbox_max[1][i] = bbox_max[2][i] = MAX_DIST;
@@ -6342,10 +6358,15 @@ Ray::NS::fvec<S> Ray::NS::IntersectAreaLights(const shadow_ray_t<S> &r, Span<con
63426358
bbox_max[1][i] = n.bbox_min[1] + n.ch_bbox_max[1][i] * ext[1];
63436359
bbox_max[2][i] = n.bbox_min[2] + n.ch_bbox_max[2][i] * ext[2];
63446360
}
6361+
bitmask[i] = n.ch_bitmask[i];
63456362
}
63466363

6364+
ivec<S> temp = ivec<S>{(const int *)bitmask, vector_aligned};
6365+
temp = ~cmpeq8(temp & reinterpret_cast<const int &>(LightTypesMask4x), ivec<S>{0});
6366+
63476367
alignas(32) float res_dist[8];
63486368
long mask = bbox_test_oct<S>(_inv_d, _inv_d_o, inter_t[ri], bbox_min, bbox_max, res_dist);
6369+
mask &= temp.movemask8();
63496370
if (mask) {
63506371
long i = GetFirstBit(mask);
63516372
mask = ClearBit(mask, i);

internal/SceneCPU.cpp

Lines changed: 48 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -1238,6 +1238,7 @@ void Ray::Cpu::Scene::RebuildLightTree_nolock() {
12381238
struct additional_data_t {
12391239
Ref::fvec4 axis;
12401240
float flux, omega_n, omega_e;
1241+
uint32_t type;
12411242
};
12421243
aligned_vector<additional_data_t> additional_data;
12431244
additional_data.reserve(lights_.size());
@@ -1375,7 +1376,7 @@ void Ray::Cpu::Scene::RebuildLightTree_nolock() {
13751376
primitives.push_back({0, 0, 0, bbox_min, bbox_max});
13761377

13771378
const float flux = lum * area;
1378-
additional_data.push_back({axis, flux, omega_n, omega_e});
1379+
additional_data.push_back({axis, flux, omega_n, omega_e, l.type});
13791380
}
13801381

13811382
light_nodes_.clear();
@@ -1406,6 +1407,7 @@ void Ray::Cpu::Scene::RebuildLightTree_nolock() {
14061407
light_nodes_[i].flux = additional_data[prim_index].flux;
14071408
light_nodes_[i].omega_n = additional_data[prim_index].omega_n;
14081409
light_nodes_[i].omega_e = additional_data[prim_index].omega_e;
1410+
light_nodes_[i].bitmask = (1u << additional_data[prim_index].type);
14091411
}
14101412
}
14111413

@@ -1434,47 +1436,59 @@ void Ray::Cpu::Scene::RebuildLightTree_nolock() {
14341436
}
14351437

14361438
// Propagate flux and cone up the hierarchy
1439+
std::vector<bool> processed(light_nodes_.size(), false);
14371440
std::vector<uint32_t> to_process;
1438-
to_process.reserve(light_nodes_.size());
14391441
to_process.insert(end(to_process), begin(leaf_indices), end(leaf_indices));
1440-
for (uint32_t i = 0; i < uint32_t(to_process.size()); ++i) {
1441-
const uint32_t n = to_process[i];
1442-
const uint32_t parent = parent_indices[n];
1443-
if (parent == 0xffffffff) {
1444-
continue;
1445-
}
1446-
1447-
light_nodes_[parent].flux += light_nodes_[n].flux;
1448-
if (light_nodes_[parent].axis[0] == 0.0f && light_nodes_[parent].axis[1] == 0.0f &&
1449-
light_nodes_[parent].axis[2] == 0.0f) {
1450-
memcpy(light_nodes_[parent].axis, light_nodes_[n].axis, 3 * sizeof(float));
1451-
light_nodes_[parent].omega_n = light_nodes_[n].omega_n;
1452-
} else {
1453-
auto axis1 = Ref::fvec4{light_nodes_[parent].axis}, axis2 = Ref::fvec4{light_nodes_[n].axis};
1454-
axis1.set<3>(0.0f);
1455-
axis2.set<3>(0.0f);
1442+
while (!to_process.empty()) {
1443+
uint32_t next_count = 0;
1444+
for (uint32_t i = 0; i < uint32_t(to_process.size()); ++i) {
1445+
const uint32_t n = to_process[i];
1446+
const uint32_t parent = parent_indices[n];
1447+
if (parent == 0xffffffff) {
1448+
continue;
1449+
}
14561450

1457-
const float angle_between = acosf(clamp(dot(axis1, axis2), -1.0f, 1.0f));
1451+
assert(!processed[n]);
1452+
processed[n] = true;
14581453

1459-
axis1 += axis2;
1460-
const float axis_length = length(axis1);
1461-
if (axis_length != 0.0f) {
1462-
axis1 /= axis_length;
1454+
light_nodes_[parent].flux += light_nodes_[n].flux;
1455+
light_nodes_[parent].bitmask |= light_nodes_[n].bitmask;
1456+
if (light_nodes_[parent].axis[0] == 0.0f && light_nodes_[parent].axis[1] == 0.0f &&
1457+
light_nodes_[parent].axis[2] == 0.0f) {
1458+
memcpy(light_nodes_[parent].axis, light_nodes_[n].axis, 3 * sizeof(float));
1459+
light_nodes_[parent].omega_n = light_nodes_[n].omega_n;
14631460
} else {
1464-
axis1 = Ref::fvec4{0.0f, 1.0f, 0.0f, 0.0f};
1465-
}
1461+
auto axis1 = Ref::fvec4{light_nodes_[parent].axis}, axis2 = Ref::fvec4{light_nodes_[n].axis};
1462+
axis1.set<3>(0.0f);
1463+
axis2.set<3>(0.0f);
1464+
1465+
const float angle_between = acosf(clamp(dot(axis1, axis2), -1.0f, 1.0f));
1466+
1467+
axis1 += axis2;
1468+
const float axis_length = length(axis1);
1469+
if (axis_length != 0.0f) {
1470+
axis1 /= axis_length;
1471+
} else {
1472+
axis1 = Ref::fvec4{0.0f, 1.0f, 0.0f, 0.0f};
1473+
}
14661474

1467-
memcpy(light_nodes_[parent].axis, value_ptr(axis1), 3 * sizeof(float));
1475+
memcpy(light_nodes_[parent].axis, value_ptr(axis1), 3 * sizeof(float));
14681476

1469-
light_nodes_[parent].omega_n =
1470-
fminf(0.5f * (light_nodes_[parent].omega_n +
1471-
fmaxf(light_nodes_[parent].omega_n, angle_between + light_nodes_[n].omega_n)),
1472-
PI);
1473-
}
1474-
light_nodes_[parent].omega_e = fmaxf(light_nodes_[parent].omega_e, light_nodes_[n].omega_e);
1475-
if ((light_nodes_[parent].left_child & LEFT_CHILD_BITS) == n) {
1476-
to_process.push_back(parent);
1477+
light_nodes_[parent].omega_n =
1478+
fminf(0.5f * (light_nodes_[parent].omega_n +
1479+
fmaxf(light_nodes_[parent].omega_n, angle_between + light_nodes_[n].omega_n)),
1480+
PI);
1481+
}
1482+
light_nodes_[parent].omega_e = fmaxf(light_nodes_[parent].omega_e, light_nodes_[n].omega_e);
1483+
1484+
const uint32_t left_child = light_nodes_[parent].left_child & LEFT_CHILD_BITS,
1485+
right_child = light_nodes_[parent].right_child & RIGHT_CHILD_BITS;
1486+
if (processed[left_child] && processed[right_child]) {
1487+
assert(next_count <= i);
1488+
to_process[next_count++] = parent;
1489+
}
14771490
}
1491+
to_process.resize(next_count);
14781492
}
14791493

14801494
// Remove indices indirection

0 commit comments

Comments
 (0)