Skip to content

Commit 499c10d

Browse files
author
devsh
committed
make asset converter work properly in absence of transfer SIntendedSubmitInfo but when compute calls are done/needed
1 parent 89f499d commit 499c10d

File tree

1 file changed

+26
-18
lines changed

1 file changed

+26
-18
lines changed

src/nbl/video/utilities/CAssetConverter.cpp

Lines changed: 26 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -4037,23 +4037,26 @@ ISemaphore::future_t<IQueue::RESULT> CAssetConverter::convert_impl(SReserveResul
40374037

40384038
// whenever transfer needs to do a submit overflow because it ran out of memory for streaming, we can already submit the recorded compute shader dispatches
40394039
auto computeCmdBuf = shouldDoSomeCompute ? params.compute->getCommandBufferForRecording():nullptr;
4040-
auto drainCompute = [&params,&computeCmdBuf](const std::span<const IQueue::SSubmitInfo::SSemaphoreInfo> extraSignal={})->auto
4040+
auto drainCompute = [&params,shouldDoSomeTransfer,&computeCmdBuf](const std::span<const IQueue::SSubmitInfo::SSemaphoreInfo> extraSignal={})->auto
40414041
{
40424042
if (!computeCmdBuf || computeCmdBuf->cmdbuf->empty())
40434043
return IQueue::RESULT::SUCCESS;
40444044
// before we overflow submit we need to inject extra wait semaphores
40454045
auto& waitSemaphoreSpan = params.compute->waitSemaphores;
40464046
std::unique_ptr<IQueue::SSubmitInfo::SSemaphoreInfo[]> patchedWaits;
40474047
// the transfer scratch semaphore value, is from the last submit, not the future value we're enqueing all the deferred memory releases with
4048-
if (waitSemaphoreSpan.empty())
4049-
waitSemaphoreSpan = {&params.transfer->scratchSemaphore,1};
4050-
else
4048+
if (shouldDoSomeTransfer)
40514049
{
4052-
const auto origCount = waitSemaphoreSpan.size();
4053-
patchedWaits.reset(new IQueue::SSubmitInfo::SSemaphoreInfo[origCount+1]);
4054-
std::copy(waitSemaphoreSpan.begin(),waitSemaphoreSpan.end(),patchedWaits.get());
4055-
patchedWaits[origCount] = params.transfer->scratchSemaphore;
4056-
waitSemaphoreSpan = {patchedWaits.get(),origCount+1};
4050+
if (waitSemaphoreSpan.empty())
4051+
waitSemaphoreSpan = {&params.transfer->scratchSemaphore,1};
4052+
else
4053+
{
4054+
const auto origCount = waitSemaphoreSpan.size();
4055+
patchedWaits.reset(new IQueue::SSubmitInfo::SSemaphoreInfo[origCount+1]);
4056+
std::copy(waitSemaphoreSpan.begin(),waitSemaphoreSpan.end(),patchedWaits.get());
4057+
patchedWaits[origCount] = params.transfer->scratchSemaphore;
4058+
waitSemaphoreSpan = {patchedWaits.get(),origCount+1};
4059+
}
40574060
}
40584061
// don't worry about resetting old `waitSemaphores` because they get cleared to an empty span after overflow submit
40594062
IQueue::RESULT res = params.compute->submit(computeCmdBuf,extraSignal);
@@ -4067,14 +4070,18 @@ ISemaphore::future_t<IQueue::RESULT> CAssetConverter::convert_impl(SReserveResul
40674070
};
40684071

40694072
// We want to be doing Host operations while stalled for GPU, compose our overflow callback on top of what's already there, only if we need to ofc
4070-
auto origXferStallCallback = params.transfer->overflowCallback;
4071-
params.transfer->overflowCallback = [device,&hostUploadBuffers,&origXferStallCallback,&drainCompute](const ISemaphore::SWaitInfo& tillScratchResettable)->void
4073+
std::function<void(const ISemaphore::SWaitInfo&)> origXferStallCallback;
4074+
if (shouldDoSomeTransfer)
40724075
{
4073-
drainCompute();
4074-
if (origXferStallCallback)
4075-
origXferStallCallback(tillScratchResettable);
4076-
hostUploadBuffers([device,&tillScratchResettable]()->bool{return device->waitForSemaphores({&tillScratchResettable,1},false,0)==ISemaphore::WAIT_RESULT::TIMEOUT;});
4077-
};
4076+
origXferStallCallback = std::move(params.transfer->overflowCallback);
4077+
params.transfer->overflowCallback = [device,&hostUploadBuffers,&origXferStallCallback,&drainCompute](const ISemaphore::SWaitInfo& tillScratchResettable)->void
4078+
{
4079+
drainCompute();
4080+
if (origXferStallCallback)
4081+
origXferStallCallback(tillScratchResettable);
4082+
hostUploadBuffers([device,&tillScratchResettable]()->bool{return device->waitForSemaphores({&tillScratchResettable,1},false,0)==ISemaphore::WAIT_RESULT::TIMEOUT;});
4083+
};
4084+
}
40784085
// when overflowing compute resources, we need to submit the Xfer before submitting Compute
40794086
auto drainBoth = [&params,&xferCmdBuf,&drainCompute](const std::span<const IQueue::SSubmitInfo::SSemaphoreInfo> extraSignal={})->auto
40804087
{
@@ -4149,7 +4156,7 @@ ISemaphore::future_t<IQueue::RESULT> CAssetConverter::convert_impl(SReserveResul
41494156
return true;
41504157
};
41514158

4152-
// because of the layout transitions
4159+
// because of the layout transitions (TODO: conditional when host_image_copy gets implemented)
41534160
params.transfer->scratchSemaphore.stageMask |= PIPELINE_STAGE_FLAGS::ALL_COMMANDS_BITS;
41544161
// TODO:: Shall we rewrite? e.g. we upload everything first, extra submit for QFOT pipeline barrier & transition in overflow callback, then record compute commands, and submit them, plus their final QFOTs
41554162
// Lets analyze sync cases:
@@ -5337,7 +5344,8 @@ ISemaphore::future_t<IQueue::RESULT> CAssetConverter::convert_impl(SReserveResul
53375344
retval.set({params.transfer->scratchSemaphore.semaphore,params.transfer->scratchSemaphore.value});
53385345
}
53395346
// reset original callback
5340-
params.transfer->overflowCallback = origXferStallCallback;
5347+
if (bool(origXferStallCallback))
5348+
params.transfer->overflowCallback = std::move(origXferStallCallback);
53415349

53425350
// Its too dangerous to leave an Intended Transfer Submit hanging around that needs to be submitted for Compute to make forward progress outside of this utility,
53435351
// and doing transfer-signals-after-compute-wait timeline sema tricks are not and option because:

0 commit comments

Comments
 (0)