Skip to content

Commit e3faa9b

Browse files
committed
Partially parallelize env quadtree creation
1 parent b1137ce commit e3faa9b

File tree

3 files changed

+90
-81
lines changed

3 files changed

+90
-81
lines changed

internal/SceneCPU.cpp

Lines changed: 44 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -902,7 +902,7 @@ void Ray::Cpu::Scene::Finalize(const std::function<void(int, int, ParallelForFun
902902

903903
if (env_.importance_sample && env_.env_col[0] > 0.0f && env_.env_col[1] > 0.0f && env_.env_col[2] > 0.0f) {
904904
if (env_.env_map != InvalidTextureHandle._index) {
905-
PrepareEnvMapQTree_nolock();
905+
PrepareEnvMapQTree_nolock(parallel_for);
906906
}
907907
{ // add env light source
908908
light_t l = {};
@@ -1055,7 +1055,10 @@ void Ray::Cpu::Scene::PrepareSkyEnvMap_nolock(
10551055
log_->Info("PrepareSkyEnvMap (%ix%i) done in %lldms", SkyEnvRes[0], SkyEnvRes[1], (long long)(GetTimeMs() - t1));
10561056
}
10571057

1058-
void Ray::Cpu::Scene::PrepareEnvMapQTree_nolock() {
1058+
void Ray::Cpu::Scene::PrepareEnvMapQTree_nolock(
1059+
const std::function<void(int, int, ParallelForFunction &&)> &parallel_for) {
1060+
const uint64_t t1 = Ray::GetTimeMs();
1061+
10591062
const int tex = int(env_.env_map & 0x00ffffff);
10601063
Ref::ivec2 size;
10611064
tex_storage_rgba_.GetIRes(tex, 0, value_ptr(size));
@@ -1082,46 +1085,48 @@ void Ray::Cpu::Scene::PrepareEnvMapQTree_nolock() {
10821085
{1 / 273.0f, 4 / 273.0f, 7 / 273.0f, 4 / 273.0f, 1 / 273.0f}};
10831086
static const float FilterSize = 0.5f;
10841087

1085-
for (int qy = 0; qy < cur_res; ++qy) {
1086-
for (int qx = 0; qx < cur_res; ++qx) {
1087-
for (int jj = -2; jj <= 2; ++jj) {
1088-
for (int ii = -2; ii <= 2; ++ii) {
1089-
const Ref::fvec2 q = {Ref::fract(1.0f + (float(qx) + 0.5f + ii * FilterSize) / cur_res),
1090-
Ref::fract(1.0f + (float(qy) + 0.5f + jj * FilterSize) / cur_res)};
1091-
Ref::fvec4 dir;
1092-
CanonicalToDir(value_ptr(q), 0.0f, value_ptr(dir));
1093-
1094-
const float theta = acosf(clamp(dir.get<1>(), -1.0f, 1.0f)) / PI;
1095-
float phi = atan2f(dir.get<2>(), dir.get<0>());
1096-
if (phi < 0) {
1097-
phi += 2 * PI;
1088+
parallel_for(0, cur_res / 2, [&](const int yy) {
1089+
for (int qy = 2 * yy; qy < 2 * yy + 2; ++qy) {
1090+
for (int qx = 0; qx < cur_res; ++qx) {
1091+
for (int jj = -2; jj <= 2; ++jj) {
1092+
for (int ii = -2; ii <= 2; ++ii) {
1093+
const Ref::fvec2 q = {Ref::fract(1.0f + (float(qx) + 0.5f + ii * FilterSize) / cur_res),
1094+
Ref::fract(1.0f + (float(qy) + 0.5f + jj * FilterSize) / cur_res)};
1095+
Ref::fvec4 dir;
1096+
CanonicalToDir(value_ptr(q), 0.0f, value_ptr(dir));
1097+
1098+
const float theta = acosf(clamp(dir.get<1>(), -1.0f, 1.0f)) / PI;
1099+
float phi = atan2f(dir.get<2>(), dir.get<0>());
1100+
if (phi < 0) {
1101+
phi += 2 * PI;
1102+
}
1103+
if (phi > 2 * PI) {
1104+
phi -= 2 * PI;
1105+
}
1106+
1107+
const float u = Ref::fract(0.5f * phi / PI);
1108+
1109+
const Ref::fvec2 uvs = Ref::fvec2{u, theta} * Ref::fvec2(size);
1110+
const Ref::ivec2 iuvs = clamp(Ref::ivec2(uvs), Ref::ivec2(0), size - 1);
1111+
1112+
const color_rgba8_t col_rgbe = tex_storage_rgba_.Get(tex, iuvs.get<0>(), iuvs.get<1>(), 0);
1113+
const Ref::fvec4 col_rgb = Ref::rgbe_to_rgb(col_rgbe);
1114+
const float cur_lum = (col_rgb.get<0>() + col_rgb.get<1>() + col_rgb.get<2>());
1115+
1116+
int index = 0;
1117+
index |= (qx & 1) << 0;
1118+
index |= (qy & 1) << 1;
1119+
1120+
const int _qx = (qx / 2);
1121+
const int _qy = (qy / 2);
1122+
1123+
auto &qvec = env_map_qtree_.mips[0][_qy * cur_res / 2 + _qx];
1124+
qvec.set(index, qvec[index] + cur_lum * FilterWeights[ii + 2][jj + 2]);
10981125
}
1099-
if (phi > 2 * PI) {
1100-
phi -= 2 * PI;
1101-
}
1102-
1103-
const float u = Ref::fract(0.5f * phi / PI);
1104-
1105-
const Ref::fvec2 uvs = Ref::fvec2{u, theta} * Ref::fvec2(size);
1106-
const Ref::ivec2 iuvs = clamp(Ref::ivec2(uvs), Ref::ivec2(0), size - 1);
1107-
1108-
const color_rgba8_t col_rgbe = tex_storage_rgba_.Get(tex, iuvs.get<0>(), iuvs.get<1>(), 0);
1109-
const Ref::fvec4 col_rgb = Ref::rgbe_to_rgb(col_rgbe);
1110-
const float cur_lum = (col_rgb.get<0>() + col_rgb.get<1>() + col_rgb.get<2>());
1111-
1112-
int index = 0;
1113-
index |= (qx & 1) << 0;
1114-
index |= (qy & 1) << 1;
1115-
1116-
const int _qx = (qx / 2);
1117-
const int _qy = (qy / 2);
1118-
1119-
auto &qvec = env_map_qtree_.mips[0][_qy * cur_res / 2 + _qx];
1120-
qvec.set(index, qvec[index] + cur_lum * FilterWeights[ii + 2][jj + 2]);
11211126
}
11221127
}
11231128
}
1124-
}
1129+
});
11251130

11261131
for (const Ref::fvec4 &v : env_map_qtree_.mips[0]) {
11271132
total_lum += hsum(v);
@@ -1203,7 +1208,7 @@ void Ray::Cpu::Scene::PrepareEnvMapQTree_nolock() {
12031208
env_.qtree_mips[i] = nullptr;
12041209
}
12051210

1206-
log_->Info("Env map qtree res is %i", env_map_qtree_.res);
1211+
log_->Info("PrepareEnvMapQTree (%i) done in %lldms", env_map_qtree_.res, (long long)(GetTimeMs() - t1));
12071212
}
12081213

12091214
void Ray::Cpu::Scene::RebuildLightTree_nolock() {

internal/SceneCPU.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -103,7 +103,7 @@ class Scene : public SceneCommon {
103103
void RebuildLightTree_nolock();
104104

105105
void PrepareSkyEnvMap_nolock(const std::function<void(int, int, ParallelForFunction &&)> &parallel_for);
106-
void PrepareEnvMapQTree_nolock();
106+
void PrepareEnvMapQTree_nolock(const std::function<void(int, int, ParallelForFunction &&)> &parallel_for);
107107

108108
MaterialHandle AddMaterial_nolock(const shading_node_desc_t &m);
109109
void SetMeshInstanceTransform_nolock(MeshInstanceHandle mi, const float *xform);

internal/SceneGPU.h

Lines changed: 45 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -138,7 +138,7 @@ class Scene : public SceneCommon {
138138
std::vector<Ray::color_rgba8_t> CalcSkyEnvTexture(const atmosphere_params_t &params, const int res[2],
139139
const light_t lights[], Span<const uint32_t> dir_lights);
140140
void PrepareSkyEnvMap_nolock(const std::function<void(int, int, ParallelForFunction &&)> &parallel_for);
141-
void PrepareEnvMapQTree_nolock();
141+
void PrepareEnvMapQTree_nolock(const std::function<void(int, int, ParallelForFunction &&)> &parallel_for);
142142
void GenerateTextureMips_nolock();
143143
void PrepareBindlessTextures_nolock();
144144
std::pair<uint32_t, uint32_t> Build_HWRT_BLAS_nolock(uint32_t vert_index, uint32_t vert_count);
@@ -1516,7 +1516,7 @@ inline void Ray::NS::Scene::Finalize(const std::function<void(int, int, Parallel
15161516

15171517
if (env_.importance_sample && env_.env_col[0] > 0.0f && env_.env_col[1] > 0.0f && env_.env_col[2] > 0.0f) {
15181518
if (env_.env_map != InvalidTextureHandle._index) {
1519-
PrepareEnvMapQTree_nolock();
1519+
PrepareEnvMapQTree_nolock(parallel_for);
15201520
} else {
15211521
// Dummy
15221522
TexParams p;
@@ -1964,7 +1964,9 @@ Ray::NS::Scene::PrepareSkyEnvMap_nolock(const std::function<void(int, int, Paral
19641964
log_->Info("PrepareSkyEnvMap (%ix%i) done in %lldms", SkyEnvRes[0], SkyEnvRes[1], (long long)(GetTimeMs() - t1));
19651965
}
19661966

1967-
inline void Ray::NS::Scene::PrepareEnvMapQTree_nolock() {
1967+
inline void
1968+
Ray::NS::Scene::PrepareEnvMapQTree_nolock(const std::function<void(int, int, ParallelForFunction &&)> &parallel_for) {
1969+
const uint64_t t1 = Ray::GetTimeMs();
19681970
const int tex = int(env_.env_map & 0x00ffffff);
19691971

19701972
Buffer temp_stage_buf;
@@ -2036,47 +2038,49 @@ inline void Ray::NS::Scene::PrepareEnvMapQTree_nolock() {
20362038
{1 / 273.0f, 4 / 273.0f, 7 / 273.0f, 4 / 273.0f, 1 / 273.0f}};
20372039
static const float FilterSize = 0.5f;
20382040

2039-
for (int qy = 0; qy < cur_res; ++qy) {
2040-
for (int qx = 0; qx < cur_res; ++qx) {
2041-
for (int jj = -2; jj <= 2; ++jj) {
2042-
for (int ii = -2; ii <= 2; ++ii) {
2043-
const Ref::fvec2 q = {Ref::fract(1.0f + (float(qx) + 0.5f + ii * FilterSize) / cur_res),
2044-
Ref::fract(1.0f + (float(qy) + 0.5f + jj * FilterSize) / cur_res)};
2045-
fvec4 dir;
2046-
CanonicalToDir(value_ptr(q), 0.0f, value_ptr(dir));
2047-
2048-
const float theta = acosf(clamp(dir.get<1>(), -1.0f, 1.0f)) / PI;
2049-
float phi = atan2f(dir.get<2>(), dir.get<0>());
2050-
if (phi < 0) {
2051-
phi += 2 * PI;
2052-
}
2053-
if (phi > 2 * PI) {
2054-
phi -= 2 * PI;
2041+
parallel_for(0, cur_res / 2, [&](const int yy) {
2042+
for (int qy = 2 * yy; qy < 2 * yy + 2; ++qy) {
2043+
for (int qx = 0; qx < cur_res; ++qx) {
2044+
for (int jj = -2; jj <= 2; ++jj) {
2045+
for (int ii = -2; ii <= 2; ++ii) {
2046+
const Ref::fvec2 q = {Ref::fract(1.0f + (float(qx) + 0.5f + ii * FilterSize) / cur_res),
2047+
Ref::fract(1.0f + (float(qy) + 0.5f + jj * FilterSize) / cur_res)};
2048+
fvec4 dir;
2049+
CanonicalToDir(value_ptr(q), 0.0f, value_ptr(dir));
2050+
2051+
const float theta = acosf(clamp(dir.get<1>(), -1.0f, 1.0f)) / PI;
2052+
float phi = atan2f(dir.get<2>(), dir.get<0>());
2053+
if (phi < 0) {
2054+
phi += 2 * PI;
2055+
}
2056+
if (phi > 2 * PI) {
2057+
phi -= 2 * PI;
2058+
}
2059+
2060+
const float u = Ref::fract(0.5f * phi / PI);
2061+
2062+
const fvec2 uvs = fvec2{u, theta} * fvec2(size);
2063+
const ivec2 iuvs = clamp(ivec2(uvs), ivec2(0), size - 1);
2064+
2065+
const uint8_t *col_rgbe = &rgbe_data[4 * (iuvs.get<1>() * pitch + iuvs.get<0>())];
2066+
fvec4 col_rgb;
2067+
rgbe_to_rgb(col_rgbe, value_ptr(col_rgb));
2068+
const float cur_lum = (col_rgb.get<0>() + col_rgb.get<1>() + col_rgb.get<2>());
2069+
2070+
int index = 0;
2071+
index |= (qx & 1) << 0;
2072+
index |= (qy & 1) << 1;
2073+
2074+
const int _qx = (qx / 2);
2075+
const int _qy = (qy / 2);
2076+
2077+
auto &qvec = env_map_qtree_.mips[0][_qy * cur_res / 2 + _qx];
2078+
qvec.set(index, qvec[index] + cur_lum * FilterWeights[ii + 2][jj + 2]);
20552079
}
2056-
2057-
const float u = Ref::fract(0.5f * phi / PI);
2058-
2059-
const fvec2 uvs = fvec2{u, theta} * fvec2(size);
2060-
const ivec2 iuvs = clamp(ivec2(uvs), ivec2(0), size - 1);
2061-
2062-
const uint8_t *col_rgbe = &rgbe_data[4 * (iuvs.get<1>() * pitch + iuvs.get<0>())];
2063-
fvec4 col_rgb;
2064-
rgbe_to_rgb(col_rgbe, value_ptr(col_rgb));
2065-
const float cur_lum = (col_rgb.get<0>() + col_rgb.get<1>() + col_rgb.get<2>());
2066-
2067-
int index = 0;
2068-
index |= (qx & 1) << 0;
2069-
index |= (qy & 1) << 1;
2070-
2071-
const int _qx = (qx / 2);
2072-
const int _qy = (qy / 2);
2073-
2074-
auto &qvec = env_map_qtree_.mips[0][_qy * cur_res / 2 + _qx];
2075-
qvec.set(index, qvec[index] + cur_lum * FilterWeights[ii + 2][jj + 2]);
20762080
}
20772081
}
20782082
}
2079-
}
2083+
});
20802084

20812085
for (const fvec4 &v : env_map_qtree_.mips[0]) {
20822086
total_lum += hsum(v);
@@ -2206,7 +2210,7 @@ inline void Ray::NS::Scene::PrepareEnvMapQTree_nolock() {
22062210

22072211
temp_stage_buf.FreeImmediate();
22082212

2209-
log_->Info("Env map qtree res is %i", env_map_qtree_.res);
2213+
log_->Info("PrepareEnvMapQTree (%i) done in %lldms", env_map_qtree_.res, (long long)(GetTimeMs() - t1));
22102214
}
22112215

22122216
inline void Ray::NS::Scene::RebuildLightTree_nolock() {

0 commit comments

Comments
 (0)