Skip to content

Commit 3c6babd

Browse files
committed
improve allocate strategy, serve tight chunks without wasting memory from which I can suballocate buffers with padding & alignments taken into account - but I still hit asserts when requesting streaming buffer's max_size after finishing render call
1 parent ec05b2e commit 3c6babd

File tree

1 file changed

+99
-97
lines changed

1 file changed

+99
-97
lines changed

src/nbl/ext/ImGui/ImGui.cpp

Lines changed: 99 additions & 97 deletions
Original file line numberDiff line numberDiff line change
@@ -34,11 +34,19 @@ namespace nbl::ext::imgui
3434

3535
struct DrawItemMeta
3636
{
37+
enum SBufferIx
38+
{
39+
VERTEX,
40+
INDEX,
41+
42+
COUNT
43+
};
44+
3745
//! total left bytes to upload for X-th command list
3846
size_t totalLeftBytesToUpload;
3947

40-
//! we suballocate total of (command buffers count * 2u /*vertex & index buffers*/) buffers, we assume we have [ ... [vertex buffer, index buffer] ... ]
41-
std::vector<mdi_size_t> offsets, sizes, alignments;
48+
//! we allocate SBufferIx::COUNT chunks per command list from which we suballocate to operate on
49+
std::array<mdi_size_t, SBufferIx::COUNT> offsets = { InvalidAddress, InvalidAddress }, sizes = {}, alignments = { sizeof(ImDrawVert), sizeof(ImDrawIdx) };
4250
std::vector<bool> filled = { false, false };
4351

4452
//! those buffers will be suballocated & filled from a block of memory, each block memory request is multiplied with this factor - if a block fails to be allocated the factor decreases (divided by 2 on fail)
@@ -96,7 +104,7 @@ namespace nbl::ext::imgui
96104
//! with our allocation strategy unless we split indirect call into smaller pieces (however it doesnt make any sense if we assume all objects must be uploaded anyway imo - if all then why to bother?), also there is a very low chance this memory block will ever exceed 1KB even if you have a lot of GUI windows (< 45 draw commands, 22 bytes * limits.totalIndirectDrawCount) since its very small.
97105
struct STightStructs
98106
{
99-
// we have total COUNT of blocks to allocate first
107+
// we have total StructureIx::COUNT of blocks to allocate first before uploading command lists data
100108
enum StructureIx
101109
{
102110
INDIRECT_STRUCTURES = 0u,
@@ -117,9 +125,8 @@ namespace nbl::ext::imgui
117125
const ImDrawList* commandList = drawData->CmdLists[i];
118126

119127
limits.totalIndirectDrawCount += commandList->CmdBuffer.Size;
120-
121-
meta.offsets.emplace_back() = InvalidAddress; meta.totalLeftBytesToUpload += meta.sizes.emplace_back() = commandList->VtxBuffer.Size * sizeof(ImDrawVert); meta.alignments.emplace_back() = sizeof(ImDrawVert);
122-
meta.offsets.emplace_back() = InvalidAddress; meta.totalLeftBytesToUpload += meta.sizes.emplace_back() = commandList->IdxBuffer.Size * sizeof(ImDrawIdx); meta.alignments.emplace_back() = sizeof(ImDrawIdx);
128+
meta.totalLeftBytesToUpload += meta.sizes[DrawItemMeta::VERTEX] = commandList->VtxBuffer.Size * sizeof(ImDrawVert);
129+
meta.totalLeftBytesToUpload += meta.sizes[DrawItemMeta::INDEX] = commandList->IdxBuffer.Size * sizeof(ImDrawIdx);
123130

124131
assert([&]() -> bool // we should never hit it
125132
{
@@ -145,29 +152,30 @@ namespace nbl::ext::imgui
145152
{
146153
requiredStructsBlockInfo.allocated = true;
147154

148-
auto blockOffset = InvalidAddress;
149-
const auto blockSize = std::min(mdi->compose->max_size(), std::reduce(std::begin(requiredStructsBlockInfo.sizes), std::end(requiredStructsBlockInfo.sizes)));
155+
auto [blockOffset, blockSize] = std::make_tuple(InvalidAddress, std::min(mdi->compose->max_size(), std::reduce(std::begin(requiredStructsBlockInfo.sizes), std::end(requiredStructsBlockInfo.sizes))));
150156

151157
mdi->compose->multi_allocate(std::chrono::steady_clock::now() + std::chrono::microseconds(100u), 1u, &blockOffset, &blockSize, &MdiMaxAlignment);
152158

153159
if (blockOffset == InvalidAddress)
154160
return (requiredStructsBlockInfo.allocated = false);
155161

162+
bigChunkRequestInfo.offsets.emplace_back() = blockOffset;
163+
bigChunkRequestInfo.sizes.emplace_back() = blockSize;
164+
156165
auto* const mdiData = reinterpret_cast<uint8_t*>(mdi->compose->getBufferPointer());
157166
mdi_buffer_t::suballocator_traits_t::allocator_type fillSubAllocator(mdiData, blockOffset, 0u, MdiMaxAlignment, blockSize);
158167
mdi_buffer_t::suballocator_traits_t::multi_alloc_addr(fillSubAllocator, requiredStructsBlockInfo.offsets.size(), requiredStructsBlockInfo.offsets.data(), requiredStructsBlockInfo.sizes.data(), requiredStructsBlockInfo.alignments.data());
159168

160169
for (const auto& offset : requiredStructsBlockInfo.offsets)
161170
if (offset == InvalidAddress)
162-
requiredStructsBlockInfo.allocated = false;
171+
return (requiredStructsBlockInfo.allocated) = false;
163172

164173
return requiredStructsBlockInfo.allocated;
165174
}
166175

167176
void latchDeallocations(mdi_buffer_t* mdi, ISemaphore::SWaitInfo waitInfo)
168177
{
169-
deallocateRequiredBlock(mdi, waitInfo); // indirect & element structs
170-
deallocateLeftChunks(mdi, waitInfo); // vertex & index buffers
178+
mdi->compose->multi_deallocate(bigChunkRequestInfo.offsets.size(), bigChunkRequestInfo.offsets.data(), bigChunkRequestInfo.sizes.data(), waitInfo);
171179
}
172180

173181
inline const auto& getLimits() { return limits; }
@@ -180,16 +188,6 @@ namespace nbl::ext::imgui
180188

181189
private:
182190

183-
void deallocateRequiredBlock(mdi_buffer_t* mdi, ISemaphore::SWaitInfo& waitInfo)
184-
{
185-
mdi->compose->multi_deallocate(requiredStructsBlockInfo.offsets.size(), requiredStructsBlockInfo.offsets.data(), requiredStructsBlockInfo.sizes.data(), waitInfo);
186-
}
187-
188-
void deallocateLeftChunks(mdi_buffer_t* mdi, ISemaphore::SWaitInfo& waitInfo)
189-
{
190-
mdi->compose->multi_deallocate(bigChunkRequestInfo.offsets.size(), bigChunkRequestInfo.offsets.data(), bigChunkRequestInfo.sizes.data(), waitInfo);
191-
}
192-
193191
struct SLimits
194192
{
195193
//! sum of metaList[x].sizes - all bytes which needs to be uploaded to cover all of totalIndirectDrawCount objects, note we don't count element & indirect structers there
@@ -1251,102 +1249,114 @@ namespace nbl::ext::imgui
12511249
auto* const indirectStructures = reinterpret_cast<VkDrawIndexedIndirectCommand*>(mdiData + requiredStructsBlockInfo.offsets[ImGuiCommandListRange::STightStructs::INDIRECT_STRUCTURES]);
12521250
auto* const elementStructures = reinterpret_cast<PerObjectData*>(mdiData + requiredStructsBlockInfo.offsets[ImGuiCommandListRange::STightStructs::ELEMENT_STRUCTURES]);
12531251

1254-
if (drawItem.meta.totalLeftBytesToUpload >= 0u)
1255-
{
1256-
constexpr auto StreamingAllocationCount = 1u;
1257-
1258-
// not only memoryBlockFactor divided by 2 will fail us, but even without it we will always fail suballocator with one offset becauase delta is too tight
1259-
// mdi_size_t chunkOffset = InvalidAddress, chunkSize = min(streamingBuffer->max_size(), (drawItem.meta.totalLeftBytesToUpload /* * drawItem.meta.memoryBlockFactor*/));
1260-
1261-
mdi_size_t chunkOffset = InvalidAddress, chunkSize = min(streamingBuffer->max_size(), limits.totalByteSizeRequest * drawItem.meta.memoryBlockFactor /* temporary giving MORE then required for suballocator because of padding.. */);
1262-
1263-
//! (*) note we request single tight chunk of memory with fixed max alignment - big address space from which we fill try to suballocate to fill buffers
1264-
const size_t unallocatedSize = m_mdi.compose->multi_allocate(std::chrono::steady_clock::now() + std::chrono::microseconds(100u), StreamingAllocationCount, &chunkOffset, &chunkSize, &MdiMaxAlignment);
1252+
const auto& [vertexBuffer, indexBuffer] = std::make_tuple(drawItem.cmdList->VtxBuffer, drawItem.cmdList->IdxBuffer);
1253+
const auto [vtxAllocationIx, idxAllocationIx] = std::make_tuple(DrawItemMeta::VERTEX, DrawItemMeta::INDEX);
12651254

1266-
if (chunkOffset == InvalidAddress)
1267-
{
1268-
drawItem.meta.memoryBlockFactor *= 0.5f; // the problem is if another chunk failed because of SUBALLOCATOR then its because of padding probably.. decreasing the chunk request size wont help in that case and we should rather INCREASE the delta then with left bytes to upload for the suballocator to success with alignment restrictions
1269-
return;
1270-
}
1271-
else
1255+
if (drawItem.meta.totalLeftBytesToUpload >= 0u)
1256+
{
1257+
// we have 2 buffers to fill per command list, we will try with as tight streaming memory chunks as possible to not waste block memory (too big chunks allocated but only certain % used in reality) & make it a way our suballocator likes them (we respect required alignments)
1258+
for (uint16_t bufferIx = 0u; bufferIx < DrawItemMeta::COUNT; ++bufferIx)
12721259
{
1273-
// chunk allocated? update the state & let suballocator do the job
1274-
chunksInfo.offsets.emplace_back() = chunkOffset;
1275-
chunksInfo.sizes.emplace_back() = chunkSize;
1276-
const auto alignOffsetNeeded = MdiMaxSize - (chunkOffset % MdiMaxSize);
1260+
if (drawItem.meta.filled[bufferIx])
1261+
continue;
12771262

1278-
//! (*) we create linear suballocator to fill the allocated chunk of memory
1279-
SMdiBuffer::suballocator_traits_t::allocator_type fillSubAllocator(mdiData, chunkOffset, alignOffsetNeeded, MdiMaxAlignment, chunkSize);
1263+
const auto& bufferSizeTotalUploadRequest = drawItem.meta.sizes[bufferIx];
1264+
auto [chunkOffset, chunkSize] = std::make_tuple(InvalidAddress, min(streamingBuffer->max_size(), bufferSizeTotalUploadRequest + MdiMaxAlignment /* (**) add extra padding to let suballocator start at nice offset */));
12801265

1281-
//! (*) we suballocate from the allocated chunk with required alignments - multi request all with single traits call per imgui command list
1282-
SMdiBuffer::suballocator_traits_t::multi_alloc_addr(fillSubAllocator, drawItem.meta.offsets.size(), drawItem.meta.offsets.data(), drawItem.meta.sizes.data(), drawItem.meta.alignments.data());
1266+
//! (*) note we request single tight chunk of memory with fixed max alignment - big address space from which we fill try to suballocate to fill buffers
1267+
const size_t unallocatedSize = m_mdi.compose->multi_allocate(std::chrono::steady_clock::now() + std::chrono::microseconds(100u), 1u, &chunkOffset, &chunkSize, &MdiMaxAlignment);
12831268

1284-
auto upload = [&]() -> size_t
1269+
if (chunkOffset == InvalidAddress)
1270+
return;
1271+
else
12851272
{
1286-
size_t uploaded = {};
1273+
// chunk allocated for a buffer? update the state & let suballocator do the job (we tried to waste minimum required memory and not leave empty space in the chunk)
1274+
chunksInfo.offsets.emplace_back() = chunkOffset;
1275+
chunksInfo.sizes.emplace_back() = chunkSize;
1276+
const auto alignOffsetNeeded = MdiMaxSize - (chunkOffset % MdiMaxSize); // read (**)
1277+
1278+
//! (*) we create linear suballocator to fill the allocated chunk of memory
1279+
SMdiBuffer::suballocator_traits_t::allocator_type fillSubAllocator(mdiData, chunkOffset, alignOffsetNeeded, MdiMaxAlignment, chunkSize);
12871280

1288-
auto updateSuballocation = [&](const uint32_t allocationIx) -> size_t
1281+
//! (*) we suballocate from the allocated chunk with required alignments
1282+
SMdiBuffer::suballocator_traits_t::multi_alloc_addr(fillSubAllocator, 1u, drawItem.meta.offsets.data() + bufferIx, drawItem.meta.sizes.data() + bufferIx, drawItem.meta.alignments.data() + bufferIx);
1283+
1284+
auto upload = [&]() -> size_t
12891285
{
1290-
const bool isFilled = drawItem.meta.filled[allocationIx];
1286+
size_t uploaded = {};
12911287

1292-
if (!isFilled)
1288+
auto updateSuballocation = [&](const uint32_t allocationIx) -> size_t
12931289
{
1294-
const auto bytesToFill = drawItem.meta.sizes[allocationIx];
1295-
uploaded += bytesToFill;
1296-
drawItem.meta.filled[allocationIx] = true;
1297-
return bytesToFill;
1298-
}
1290+
const bool isFilled = drawItem.meta.filled[allocationIx];
12991291

1300-
return 0u;
1301-
};
1292+
if (!isFilled)
1293+
{
1294+
const auto bytesToFill = drawItem.meta.sizes[allocationIx];
1295+
uploaded += bytesToFill;
1296+
drawItem.meta.filled[allocationIx] = true;
1297+
return bytesToFill;
1298+
}
13021299

1303-
const auto& [vertexBuffer, indexBuffer] = std::make_tuple(drawItem.cmdList->VtxBuffer, drawItem.cmdList->IdxBuffer);
1304-
const auto [vtxAllocationIx, idxAllocationIx] = std::make_tuple(0u, 1u); // check DrawItemMeta to see why
1300+
return 0u;
1301+
};
13051302

1306-
auto fillBuffer = [&](const auto* in, const uint32_t allocationIx)
1307-
{
1308-
auto& offset = drawItem.meta.offsets[allocationIx];
1303+
auto fillBuffer = [&](const auto* in, const uint32_t allocationIx)
1304+
{
1305+
auto& offset = drawItem.meta.offsets[allocationIx];
13091306

1310-
if (offset == InvalidAddress)
1311-
return false;
1312-
else
1307+
if (offset == InvalidAddress)
1308+
return false;
1309+
else
1310+
{
1311+
const auto bytesToFill = updateSuballocation(allocationIx);
1312+
1313+
if (bytesToFill != 0u)
1314+
::memcpy(mdiData + offset, in, bytesToFill);
1315+
}
1316+
1317+
return true;
1318+
};
1319+
1320+
auto validateObjectOffsets = [&]() -> bool
13131321
{
1314-
const auto bytesToFill = updateSuballocation(allocationIx);
1322+
const auto [vtxOffset, idxOffset] = std::make_tuple(drawItem.meta.offsets[vtxAllocationIx], drawItem.meta.offsets[idxAllocationIx]);
1323+
bool ok = true;
13151324

1316-
if (bytesToFill != 0u)
1317-
::memcpy(mdiData + offset, in, bytesToFill);
1318-
}
1325+
if (vtxOffset != InvalidAddress)
1326+
ok &= ((vtxOffset % sizeof(ImDrawVert)) == 0u);
13191327

1320-
return true;
1321-
};
1328+
if (idxOffset != InvalidAddress)
1329+
ok &= ((idxOffset % sizeof(ImDrawIdx)) == 0u);
13221330

1323-
auto validateObjectOffsets = [&]() -> bool
1324-
{
1325-
const auto [vtxOffset, idxOffset] = std::make_tuple(drawItem.meta.offsets[vtxAllocationIx], drawItem.meta.offsets[idxAllocationIx]);
1326-
bool ok = true;
1331+
_NBL_BREAK_IF(!ok);
13271332

1328-
if (vtxOffset != InvalidAddress)
1329-
ok &= ((vtxOffset % sizeof(ImDrawVert)) == 0u);
1333+
return ok; // if offsets are valid then must be aligned properly!
1334+
};
13301335

1331-
if (idxOffset != InvalidAddress)
1332-
ok &= ((idxOffset % sizeof(ImDrawIdx)) == 0u);
1336+
assert(validateObjectOffsets()); // debug check only
13331337

1334-
_NBL_BREAK_IF(!ok);
1338+
fillBuffer(vertexBuffer.Data, vtxAllocationIx);
1339+
fillBuffer(indexBuffer.Data, idxAllocationIx);
13351340

1336-
return ok; // if offsets are valid then must be aligned properly!
1341+
return uploaded;
13371342
};
13381343

1339-
assert(validateObjectOffsets()); // debug check only
1344+
const size_t uploaded = upload();
1345+
const size_t deltaLeft = drawItem.meta.totalLeftBytesToUpload - uploaded;
13401346

1341-
// we consider buffers valid for command list if we suballocated them (under the hood filled at first time then skipped to not repeat memcpy) - if buffers are valid then command list is as well
1342-
const auto buffersSuballocated = fillBuffer(vertexBuffer.Data, vtxAllocationIx) && fillBuffer(indexBuffer.Data, idxAllocationIx);
1343-
const auto [vtxGlobalObjectOffset, idxGlobalObjectOffset] = buffersSuballocated ? std::make_tuple(drawItem.meta.offsets[vtxAllocationIx] / sizeof(ImDrawVert), drawItem.meta.offsets[idxAllocationIx] / sizeof(ImDrawIdx)) : std::make_tuple((size_t)0u, (size_t)0u);
1347+
totalUploadedSize += uploaded;
1348+
drawItem.meta.totalLeftBytesToUpload = std::clamp(deltaLeft, 0ull, drawItem.meta.totalLeftBytesToUpload);
13441349

1345-
if (buffersSuballocated)
1350+
// we consider buffers valid for command list if we suballocated BOTH of them (under the hood filled at first time then skipped to not repeat memcpy) - if buffers are valid then command list is as well
1351+
const bool buffersFilled = drawItem.meta.filled[DrawItemMeta::VERTEX] && drawItem.meta.filled[DrawItemMeta::INDEX];
1352+
1353+
if (buffersFilled)
13461354
{
1355+
const auto [vtxGlobalObjectOffset, idxGlobalObjectOffset] = std::make_tuple(drawItem.meta.offsets[vtxAllocationIx] / sizeof(ImDrawVert), drawItem.meta.offsets[idxAllocationIx] / sizeof(ImDrawIdx));
1356+
13471357
for (uint32_t j = 0u; j < drawItem.cmdList->CmdBuffer.Size; j++)
13481358
{
1349-
const uint32_t drawID = drawItem.drawIdOffset + j;
1359+
const uint32_t drawID = drawItem.drawIdOffset + j;
13501360

13511361
const auto* cmd = &drawItem.cmdList->CmdBuffer[j];
13521362
auto* indirect = indirectStructures + drawID;
@@ -1382,15 +1392,7 @@ namespace nbl::ext::imgui
13821392
element->samplerIx = cmd->TextureId.samplerIx;
13831393
}
13841394
}
1385-
1386-
return uploaded;
1387-
};
1388-
1389-
const size_t uploaded = upload();
1390-
const size_t deltaLeft = drawItem.meta.totalLeftBytesToUpload - uploaded;
1391-
1392-
totalUploadedSize += uploaded;
1393-
drawItem.meta.totalLeftBytesToUpload = std::clamp(deltaLeft, 0ull, drawItem.meta.totalLeftBytesToUpload);
1395+
}
13941396
}
13951397
}
13961398
};

0 commit comments

Comments
 (0)