Skip to content

Commit ffe6144

Browse files
committed
d3d12: Improve GPU resource gc
1 parent 61e0307 commit ffe6144

File tree

12 files changed

+134
-159
lines changed

12 files changed

+134
-159
lines changed

axmol/rhi/CMakeLists.txt

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -136,7 +136,6 @@ elseif(AX_RENDER_API STREQUAL "d3d12")
136136
rhi/d3d12/RenderPipeline12.h
137137
rhi/d3d12/ShaderModule12.h
138138
rhi/d3d12/Texture12.h
139-
rhi/d3d12/Utils12.h
140139
rhi/d3d12/Program12.h
141140
rhi/d3d12/VertexLayout12.h
142141
rhi/d3d12/DescriptorHeapAllocator12.h
@@ -153,7 +152,6 @@ elseif(AX_RENDER_API STREQUAL "d3d12")
153152
rhi/d3d12/RenderPipeline12.cpp
154153
rhi/d3d12/ShaderModule12.cpp
155154
rhi/d3d12/Texture12.cpp
156-
rhi/d3d12/Utils12.cpp
157155
rhi/d3d12/Program12.cpp
158156
rhi/d3d12/VertexLayout12.cpp
159157
rhi/d3d12/DescriptorHeapAllocator12.cpp

axmol/rhi/DXUtils.cpp

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
#include "axmol/rhi/DXUtils.h"
2+
#include "axmol/platform/Common.h"
23

34
namespace ax::rhi::dxutils
45
{
@@ -175,4 +176,15 @@ DXGI_FORMAT getUAVCompatibleFormat(DXGI_FORMAT format)
175176
}
176177
}
177178

179+
void fatalError(std::string_view op, HRESULT hr)
180+
{
181+
auto msg = fmt::format("{}: 0x{:08X}", op, static_cast<unsigned>(hr));
182+
#if AX_RENDER_API == AX_RENDER_API_D3D12
183+
showAlert(msg, "axmol: D3D12: Fatal Error", AlertStyle::IconError | AlertStyle::RequireSync);
184+
#else
185+
showAlert(msg, "axmol: D3D11: Fatal Error", AlertStyle::IconError | AlertStyle::RequireSync);
186+
#endif
187+
utils::killCurrentProcess(); // kill current process, don't cause crash when driver issue.
188+
}
189+
178190
} // namespace ax::rhi::dxutils

axmol/rhi/DXUtils.h

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,5 +31,15 @@ int evalulateMaxTexSize(D3D_FEATURE_LEVEL fl);
3131

3232
DXGI_FORMAT getUAVCompatibleFormat(DXGI_FORMAT format);
3333

34+
void fatalError(std::string_view op, HRESULT hr);
35+
3436
} // namespace ax::rhi::dxutils
3537
/** @} */
38+
39+
#define _AXASSERT_HR(expr) \
40+
do \
41+
{ \
42+
HRESULT _hr = (expr); \
43+
if (FAILED(_hr)) \
44+
::ax::rhi::dxutils::fatalError(#expr " failed"sv, _hr); \
45+
} while (0)

axmol/rhi/d3d12/Driver12.cpp

Lines changed: 41 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,6 @@
3232
#include "axmol/rhi/d3d12/DepthStencilState12.h"
3333
#include "axmol/rhi/d3d12/VertexLayout12.h"
3434
#include "axmol/rhi/d3d12/Sampler12.h"
35-
#include "axmol/rhi/d3d12/Utils12.h"
3635
#include "axmol/base/Logging.h"
3736
#include "axmol/rhi/RHIUtils.h"
3837
#include "axmol/rhi/SamplerCache.h"
@@ -416,15 +415,17 @@ void DriverImpl::initializeDevice()
416415

417416
ComPtr<IDXGIAdapter1> adapter;
418417
ComPtr<IDXGIFactory6> factory6;
419-
uint32_t adapterIndex{0};
420418
hr = _dxgiFactory->QueryInterface(IID_PPV_ARGS(&factory6));
421419
if (SUCCEEDED(hr))
422420
{
423421
// IDXGIFactory6 is not availablee on all versions of windows 10, If it is available, use it
424422
// to enumerate the adapters based on the desired power preference.
425-
while ((hr = factory6->EnumAdapterByGpuPreference(
426-
adapterIndex, gpuPref, IID_PPV_ARGS(adapter.ReleaseAndGetAddressOf()))) != DXGI_ERROR_NOT_FOUND)
423+
for (uint32_t adapterIndex = 0;; ++adapterIndex)
427424
{
425+
hr = factory6->EnumAdapterByGpuPreference(adapterIndex, gpuPref,
426+
IID_PPV_ARGS(adapter.ReleaseAndGetAddressOf()));
427+
if (FAILED(hr)) // when start with Visual Studio graphics debugging, will fail
428+
break;
428429
hr = D3D12CreateDevice(adapter.Get(), D3D_FEATURE_LEVEL_11_0, IID_PPV_ARGS(&_device));
429430
if (SUCCEEDED(hr))
430431
{
@@ -434,10 +435,15 @@ void DriverImpl::initializeDevice()
434435
}
435436
}
436437
else
437-
{
438+
{ // fallback: win10 lower versions
438439
std::vector<std::pair<int, ComPtr<IDXGIAdapter1>>> adapters;
439-
while (_dxgiFactory->EnumAdapters1(adapterIndex++, adapter.ReleaseAndGetAddressOf()) != DXGI_ERROR_NOT_FOUND)
440+
uint32_t adapterIndex{0};
441+
for (uint32_t adapterIndex = 0;; ++adapterIndex)
440442
{
443+
hr = _dxgiFactory->EnumAdapters1(adapterIndex, adapter.ReleaseAndGetAddressOf());
444+
if (FAILED(hr))
445+
break;
446+
441447
DXGI_ADAPTER_DESC desc;
442448
adapter->GetDesc(&desc);
443449

@@ -459,30 +465,36 @@ void DriverImpl::initializeDevice()
459465
adapters.emplace_back(score, adapter);
460466
}
461467

462-
if (powerPreferrence == PowerPreference::HighPerformance)
463-
{
464-
std::stable_sort(adapters.begin(), adapters.end(),
465-
[](auto& lhs, auto& rhs) { return lhs.first > rhs.first; });
466-
}
467-
else if (powerPreferrence == PowerPreference::LowPower)
468+
if (!adapters.empty())
468469
{
469-
std::stable_sort(adapters.begin(), adapters.end(),
470-
[](auto& lhs, auto& rhs) { return lhs.first < rhs.first; });
471-
}
470+
if (powerPreferrence == PowerPreference::HighPerformance)
471+
{
472+
std::stable_sort(adapters.begin(), adapters.end(),
473+
[](auto& lhs, auto& rhs) { return lhs.first > rhs.first; });
474+
}
475+
else if (powerPreferrence == PowerPreference::LowPower)
476+
{
477+
std::stable_sort(adapters.begin(), adapters.end(),
478+
[](auto& lhs, auto& rhs) { return lhs.first < rhs.first; });
479+
}
472480

473-
for (auto& [_, adapter] : adapters)
474-
{
475-
hr = D3D12CreateDevice(adapter.Get(), D3D_FEATURE_LEVEL_11_0, IID_PPV_ARGS(&_device));
476-
if (SUCCEEDED(hr))
481+
for (auto& [_, adapter] : adapters)
477482
{
478-
_adapter = std::move(adapter);
479-
break;
483+
hr = D3D12CreateDevice(adapter.Get(), D3D_FEATURE_LEVEL_11_0, IID_PPV_ARGS(&_device));
484+
if (SUCCEEDED(hr))
485+
{
486+
_adapter = std::move(adapter);
487+
break;
488+
}
480489
}
481490
}
482491
}
483492

484493
if (!_adapter)
485-
AX_D3D_FAST_FAIL(hr);
494+
{
495+
_AXASSERT_HR(D3D12CreateDevice(nullptr, D3D_FEATURE_LEVEL_11_0, IID_PPV_ARGS(&_device)));
496+
_AXASSERT_HR(_dxgiFactory->EnumAdapterByLuid(_device->GetAdapterLuid(), IID_PPV_ARGS(&_adapter)));
497+
}
486498

487499
// Create graphics queue
488500
D3D12_COMMAND_QUEUE_DESC qdesc{};
@@ -803,11 +815,11 @@ void DriverImpl::processDisposalQueue(uint64_t completeFence)
803815
for (size_t i = 0; i < _disposalQueue.size();)
804816
{
805817
auto& res = _disposalQueue[i];
806-
if (res.fenceValue < completeFence)
818+
if (res.fenceValue <= completeFence)
807819
{
808820
if (res.type == DisposableResource::Type::Resource)
809821
{
810-
res.resource->Release();
822+
SafeRelease(res.resource);
811823
}
812824
else
813825
{
@@ -827,7 +839,7 @@ void DriverImpl::processDisposalQueue(uint64_t completeFence)
827839

828840
void DriverImpl::cleanPendingResources()
829841
{
830-
waitDeviceIdle();
842+
waitForGPU();
831843
processDisposalQueue(UINT64_MAX);
832844
}
833845

@@ -1041,9 +1053,9 @@ bool DriverImpl::generateMipmaps(ID3D12GraphicsCommandList* cmd, ID3D12Resource*
10411053
{
10421054
CD3DX12_HEAP_PROPERTIES heapProps(D3D12_HEAP_TYPE_UPLOAD);
10431055
auto bufDesc = CD3DX12_RESOURCE_DESC::Buffer(totalConstSize);
1044-
if (!CheckHR(getDevice()->CreateCommittedResource(&heapProps, D3D12_HEAP_FLAG_NONE, &bufDesc,
1045-
D3D12_RESOURCE_STATE_GENERIC_READ, nullptr,
1046-
IID_PPV_ARGS(&constUpload)),
1056+
if (!CheckHR(_device->CreateCommittedResource(&heapProps, D3D12_HEAP_FLAG_NONE, &bufDesc,
1057+
D3D12_RESOURCE_STATE_GENERIC_READ, nullptr,
1058+
IID_PPV_ARGS(&constUpload)),
10471059
"Create constant upload buffer"))
10481060
return false;
10491061
}
@@ -1220,7 +1232,7 @@ D3D12BlobHandle DriverImpl::compileMipmapCS(bool isArray)
12201232
return csBlob;
12211233
}
12221234

1223-
void DriverImpl::waitDeviceIdle()
1235+
void DriverImpl::waitForGPU()
12241236
{
12251237
if (_idleFence && _idleEvent && _gfxQueue)
12261238
{

axmol/rhi/d3d12/Driver12.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -176,7 +176,7 @@ class DriverImpl : public DriverBase
176176

177177
void processDisposalQueue(uint64_t completeFence);
178178

179-
void waitDeviceIdle();
179+
void waitForGPU() override;
180180

181181
protected:
182182
void queueDisposalInternal(DisposableResource&& res);

axmol/rhi/d3d12/RenderContext12.cpp

Lines changed: 40 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -167,6 +167,18 @@ static HRESULT runOnUIThread(const ComPtr<ICoreDispatcher>& dispatcher, _Fty&& f
167167
}
168168
#endif
169169

170+
uint64_t GPUFence::wait() const
171+
{
172+
const auto completeFenceValue = this->handle->GetCompletedValue();
173+
if (completeFenceValue < this->value)
174+
{
175+
this->handle->SetEventOnCompletion(this->value, this->event);
176+
WaitForSingleObject(this->event, INFINITE);
177+
return this->value;
178+
}
179+
return completeFenceValue;
180+
}
181+
170182
RenderContextImpl::RenderContextImpl(DriverImpl* driver, void* surfaceContext) : _driver(driver)
171183
{
172184
_device = driver->getDevice();
@@ -342,17 +354,22 @@ RenderContextImpl::RenderContextImpl(DriverImpl* driver, void* surfaceContext) :
342354

343355
RenderContextImpl::~RenderContextImpl()
344356
{
345-
_driver->waitDeviceIdle();
357+
_driver->waitForGPU();
346358

347359
AX_SAFE_RELEASE_NULL(_screenRT);
348360
AX_SAFE_RELEASE_NULL(_renderPipeline);
349361

350362
for (int i = 0; i < MAX_FRAMES_IN_FLIGHT; ++i)
351363
{
352-
if (_fenceEvents[i])
364+
if (_inflightFences[i].event)
365+
{
366+
CloseHandle(_inflightFences[i].event);
367+
_inflightFences[i].event = nullptr;
368+
}
369+
370+
if (_inflightFences[i].handle)
353371
{
354-
CloseHandle(_fenceEvents[i]);
355-
_fenceEvents[i] = nullptr;
372+
SafeRelease(_inflightFences[i].handle);
356373
}
357374

358375
if (_srvHeaps[i])
@@ -380,11 +397,10 @@ void RenderContextImpl::createCommandObjects()
380397
_commandLists[i]->Close();
381398

382399
// Fence + event
383-
hr = _device->CreateFence(0, D3D12_FENCE_FLAG_NONE, IID_PPV_ARGS(&_fences[i]));
400+
hr = _device->CreateFence(0, D3D12_FENCE_FLAG_NONE, IID_PPV_ARGS(&_inflightFences[i].handle));
384401
AXASSERT(SUCCEEDED(hr), "CreateFence failed");
385-
_fenceValues[i] = 0;
386-
_fenceEvents[i] = CreateEvent(nullptr, FALSE, FALSE, nullptr);
387-
AXASSERT(_fenceEvents[i] != nullptr, "CreateEvent failed");
402+
_inflightFences[i].event = CreateEvent(nullptr, FALSE, FALSE, nullptr);
403+
AXASSERT(!!_inflightFences[i], "CreateEvent failed");
388404
}
389405
}
390406

@@ -428,32 +444,24 @@ void RenderContextImpl::setRenderPipeline(RenderPipeline* renderPipeline)
428444

429445
uint64_t RenderContextImpl::getCompletedFenceValue() const
430446
{
431-
return _fenceValues[_currentFrame];
447+
return _completedFenceValue;
432448
}
433449

434450
bool RenderContextImpl::beginFrame()
435451
{
436452
// Wait fence of current frame
437-
auto fence = _fences[_currentFrame];
438-
439-
const auto completeFenceValue = fence->GetCompletedValue();
440-
const auto frameFenceValue = _fenceValues[_currentFrame];
441-
if (completeFenceValue < frameFenceValue)
442-
{
443-
fence->SetEventOnCompletion(frameFenceValue, _fenceEvents[_currentFrame]);
444-
WaitForSingleObject(_fenceEvents[_currentFrame], INFINITE);
445-
}
446-
447-
_advanceFenceValues[_currentFrame] = frameFenceValue + 1;
453+
auto& currentFence = _inflightFences[_currentFrame];
454+
_completedFenceValue = currentFence.wait();
455+
_driver->processDisposalQueue(_completedFenceValue);
448456

449-
if (!_fenceCompletionOps.empty())
457+
if (!_frameCompletionOps.empty())
450458
{
451-
for (auto& op : _fenceCompletionOps)
452-
op(completeFenceValue);
453-
_fenceCompletionOps.clear();
459+
for (auto&& op : _frameCompletionOps)
460+
op(_completedFenceValue);
461+
_frameCompletionOps.clear();
454462
}
455463

456-
_driver->processDisposalQueue(completeFenceValue);
464+
currentFence.value = ++_frameFenceValue;
457465

458466
if (_swapchainDirty)
459467
{
@@ -520,7 +528,7 @@ void RenderContextImpl::beginRenderPass(RenderTarget* renderTarget, const Render
520528
// Bind RTV/DSV and clear according to flags
521529
rtImpl->beginRenderPass(_currentCmdList, descriptor, _renderTargetWidth, _renderTargetHeight, _currentImageIndex);
522530

523-
rtImpl->setLastFenceValue(_advanceFenceValues[_currentFrame]);
531+
rtImpl->setLastFenceValue(_frameFenceValue);
524532
}
525533

526534
void RenderContextImpl::endRenderPass()
@@ -563,8 +571,8 @@ void RenderContextImpl::endFrame()
563571
#endif
564572

565573
// Signal fence for this frame
566-
_graphicsQueue->Signal(_fences[_currentFrame].Get(), _advanceFenceValues[_currentFrame]);
567-
_fenceValues[_currentFrame] = _advanceFenceValues[_currentFrame];
574+
auto& currentFence = _inflightFences[_currentFrame];
575+
_graphicsQueue->Signal(currentFence.handle, currentFence.value);
568576

569577
// Next frame index
570578
_currentFrame = (_currentFrame + 1) % MAX_FRAMES_IN_FLIGHT;
@@ -849,8 +857,7 @@ void RenderContextImpl::prepareDrawing(ID3D12GraphicsCommandList* cmd)
849857

850858
applyPendingDynamicStates();
851859

852-
const auto advanceFenceValue = _advanceFenceValues[_currentFrame];
853-
_vertexBuffer->setLastFenceValue(advanceFenceValue);
860+
_vertexBuffer->setLastFenceValue(_frameFenceValue);
854861

855862
// bind vertex buffers
856863
if (!_instanceBuffer)
@@ -863,7 +870,7 @@ void RenderContextImpl::prepareDrawing(ID3D12GraphicsCommandList* cmd)
863870
}
864871
else
865872
{
866-
_instanceBuffer->setLastFenceValue(advanceFenceValue);
873+
_instanceBuffer->setLastFenceValue(_frameFenceValue);
867874
D3D12_VERTEX_BUFFER_VIEW views[2]{};
868875
views[0].BufferLocation = _vertexBuffer->internalResource()->GetGPUVirtualAddress();
869876
views[0].SizeInBytes = static_cast<UINT>(_vertexBuffer->getSize());
@@ -919,7 +926,7 @@ void RenderContextImpl::prepareDrawing(ID3D12GraphicsCommandList* cmd)
919926
maxSlot = slot;
920927

921928
auto textureImpl = static_cast<TextureImpl*>(bindingSet.texs[i]);
922-
textureImpl->setLastFenceValue(advanceFenceValue);
929+
textureImpl->setLastFenceValue(_frameFenceValue);
923930
auto srvHandle = textureImpl->internalHandle().srv;
924931
assert(!!srvHandle);
925932

@@ -1057,7 +1064,7 @@ void RenderContextImpl::readPixels(RenderTarget* rt,
10571064
}
10581065
rt->retain();
10591066

1060-
_fenceCompletionOps.emplace_back([this, rt, preserveAxisHint, callback = std::move(callback)](uint64_t) mutable {
1067+
_frameCompletionOps.emplace_back([this, rt, preserveAxisHint, callback = std::move(callback)](uint64_t) mutable {
10611068
readPixelsInternal(rt, preserveAxisHint, callback);
10621069

10631070
rt->release();

0 commit comments

Comments
 (0)