From e41cc5b2b735346c907046e3bd9c4417629e3cb2 Mon Sep 17 00:00:00 2001 From: Michael Saunders Date: Wed, 21 May 2025 15:46:14 -0700 Subject: [PATCH 1/2] Avoid creating staging buffers in TransferTask that exceed the Vulkan physical device property limit. If multiple regions to transfer collectively exceed the maximum memory allocation size, TransferTask::_transferData() creates a new staging buffer for it and any subsequent regions. If a single region exceeds the maximum memory allocation size TransferTask::_transferData() ensures that the region is in its own staging buffer so that it is more likely to succeed on drivers that are less strict about memory requests exceeding the specified physical device limit. Windows NVidia drivers tend to be very strict about keeping allocations at or below the stated maximum memory allocation size whereas Linux is more forgiving. --- include/vsg/app/TransferTask.h | 16 +- src/vsg/app/TransferTask.cpp | 353 +++++++++++++++++++++++---------- 2 files changed, 257 insertions(+), 112 deletions(-) diff --git a/include/vsg/app/TransferTask.h b/include/vsg/app/TransferTask.h index 4308f9c852..4627703198 100644 --- a/include/vsg/app/TransferTask.h +++ b/include/vsg/app/TransferTask.h @@ -60,6 +60,9 @@ namespace vsg /// minimum size to use when allocating staging buffers. VkDeviceSize minimumStagingBufferSize = 16 * 1024 * 1024; + // allocation chunk size + VkDeviceSize stagingBufferSizeChunkSize = 64 * 1024; + /// hook for assigning Instrumentation to enable profiling of record traversal. ref_ptr instrumentation; @@ -78,8 +81,9 @@ namespace vsg { ref_ptr transferCommandBuffer; ref_ptr fence; - ref_ptr staging; - void* buffer_data = nullptr; + std::vector> stagingBuffers; + std::vector> stagingOffsets; + std::vector buffer_data; std::vector copyRegions; bool waitOnFence = false; }; @@ -94,8 +98,6 @@ namespace vsg std::vector> frames; VkDeviceSize dataTotalRegions = 0; - VkDeviceSize dataTotalSize = 0; - VkDeviceSize imageTotalSize = 0; ref_ptr transferCompleteSemaphore; ref_ptr transferConsumerCompletedSemaphore; @@ -111,10 +113,10 @@ namespace vsg TransferResult _transferData(DataToCopy& dataToCopy); - void _transferBufferInfos(DataToCopy& dataToCopy, VkCommandBuffer vk_commandBuffer, TransferBlock& frame, VkDeviceSize& offset); + void _transferBufferInfos(DataToCopy& dataToCopy, VkCommandBuffer vk_commandBuffer, TransferBlock& frame, size_t stagingBufferOffsetBegin, size_t regionOffsetBegin); - void _transferImageInfos(DataToCopy& dataToCopy, VkCommandBuffer vk_commandBuffer, TransferBlock& frame, VkDeviceSize& offset); - void _transferImageInfo(VkCommandBuffer vk_commandBuffer, TransferBlock& frame, VkDeviceSize& offset, ImageInfo& imageInfo); + void _transferImageInfos(DataToCopy& dataToCopy, VkCommandBuffer vk_commandBuffer, TransferBlock& frame, size_t stagingBufferOffsetBegin, size_t regionOffsetBegin); + void _transferImageInfo(VkCommandBuffer vk_commandBuffer, TransferBlock& frame, size_t stagingBufferOffset, size_t regionOffset, ImageInfo& imageInfo); }; VSG_type_name(vsg::TransferTask); diff --git a/src/vsg/app/TransferTask.cpp b/src/vsg/app/TransferTask.cpp index daf471acba..5ae4f7962f 100644 --- a/src/vsg/app/TransferTask.cpp +++ b/src/vsg/app/TransferTask.cpp @@ -17,6 +17,10 @@ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLI #include #include +#include +#include +#include + using namespace vsg; TransferTask::TransferTask(Device* in_device, uint32_t numBuffers) : @@ -101,29 +105,45 @@ bool TransferTask::DataToCopy::requiresCopy(uint32_t deviceID) const return false; } -void TransferTask::_transferBufferInfos(DataToCopy& dataToCopy, VkCommandBuffer vk_commandBuffer, TransferBlock& frame, VkDeviceSize& offset) +namespace { +constexpr VkDeviceSize alignOffset(VkDeviceSize offset, VkDeviceSize alignment) +{ + return (/*alignment == 1 ||*/ (offset % alignment) == 0) ? offset : ((offset / alignment) + 1) * alignment; +} +} + +void TransferTask::_transferBufferInfos(DataToCopy& dataToCopy, VkCommandBuffer vk_commandBuffer, TransferBlock& frame, size_t stagingBufferOffsetBegin, size_t regionOffsetBegin) { CPU_INSTRUMENTATION_L1(instrumentation); auto deviceID = device->deviceID; - auto& staging = frame.staging; + auto& stagingBuffers = frame.stagingBuffers; + auto& stagingOffsets = frame.stagingOffsets; auto& copyRegions = frame.copyRegions; auto& buffer_data = frame.buffer_data; - VkDeviceSize alignment = 4; - copyRegions.clear(); copyRegions.resize(dataToCopy.dataTotalRegions); VkBufferCopy* pRegions = copyRegions.data(); log(level, " TransferTask::_transferBufferInfos(..) ", this); + size_t sbOffset = stagingBufferOffsetBegin; + + // copy the buffer regions and advance to next buffer + const auto copyBufferRegions = [&](ref_ptr buffer, size_t regionCount){ + vkCmdCopyBuffer(vk_commandBuffer, stagingBuffers[sbOffset]->vk(deviceID), buffer->vk(deviceID), regionCount, pRegions); + log(level, " vkCmdCopyBuffer(", ", ", stagingBuffers[sbOffset]->vk(deviceID), ", ", buffer->vk(deviceID), ", ", regionCount, ", ", pRegions); + return pRegions + regionCount; + }; + // copy any modified BufferInfo + size_t regionOffset = regionOffsetBegin; for (auto buffer_itr = dataToCopy.dataMap.begin(); buffer_itr != dataToCopy.dataMap.end();) { auto& bufferInfos = buffer_itr->second; - uint32_t regionCount = 0; + size_t regionCount = 0; log(level, " copying bufferInfos.size() = ", bufferInfos.size(), "{"); for (auto bufferInfo_itr = bufferInfos.begin(); bufferInfo_itr != bufferInfos.end();) { @@ -137,8 +157,11 @@ void TransferTask::_transferBufferInfos(DataToCopy& dataToCopy, VkCommandBuffer { if (bufferInfo->syncModifiedCounts(deviceID)) { + assert(sbOffset < stagingOffsets.size() && regionOffset < stagingOffsets[sbOffset].size()-1); + VkDeviceSize offset = stagingOffsets[sbOffset][regionOffset]; + // copy data to staging buffer memory - char* ptr = reinterpret_cast(buffer_data) + offset; + char* ptr = reinterpret_cast(buffer_data[sbOffset]) + offset; std::memcpy(ptr, bufferInfo->data->dataPointer(), bufferInfo->range); // record region @@ -146,8 +169,17 @@ void TransferTask::_transferBufferInfos(DataToCopy& dataToCopy, VkCommandBuffer log(level, " copying ", bufferInfo, ", ", bufferInfo->data, " to ", static_cast(ptr)); - VkDeviceSize endOfEntry = offset + bufferInfo->range; - offset = (/*alignment == 1 ||*/ (endOfEntry % alignment) == 0) ? endOfEntry : ((endOfEntry / alignment) + 1) * alignment; + // advanced to the next staging buffer if it has reached capacity + ++regionOffset; + if (regionOffset == stagingOffsets[sbOffset].size()-1) + { + // copy the buffer regions and advance to next buffer + pRegions = copyBufferRegions(buffer_itr->first, regionCount); + + ++sbOffset; + regionOffset = 0; + regionCount = 0; + } } else { @@ -161,6 +193,10 @@ void TransferTask::_transferBufferInfos(DataToCopy& dataToCopy, VkCommandBuffer else { log(level, " removing copied static data: ", bufferInfo, ", ", bufferInfo->data); + if (bufferInfo->data->properties.dataVariance == STATIC_DATA_UNREF_AFTER_TRANSFER) + { + bufferInfo->data.reset(); + } bufferInfo_itr = bufferInfos.erase(bufferInfo_itr); } } @@ -169,14 +205,8 @@ void TransferTask::_transferBufferInfos(DataToCopy& dataToCopy, VkCommandBuffer if (regionCount > 0) { - auto& buffer = buffer_itr->first; - - vkCmdCopyBuffer(vk_commandBuffer, staging->vk(deviceID), buffer->vk(deviceID), regionCount, pRegions); - - log(level, " vkCmdCopyBuffer(", ", ", staging->vk(deviceID), ", ", buffer->vk(deviceID), ", ", regionCount, ", ", pRegions); - - // advance to next buffer - pRegions += regionCount; + // copy the buffer regions and advance to next buffer + pRegions = copyBufferRegions(buffer_itr->first, regionCount); } if (bufferInfos.empty()) @@ -210,13 +240,16 @@ void TransferTask::assign(const ImageInfoList& imageInfoList) } } -void TransferTask::_transferImageInfos(DataToCopy& dataToCopy, VkCommandBuffer vk_commandBuffer, TransferBlock& frame, VkDeviceSize& offset) +void TransferTask::_transferImageInfos(DataToCopy& dataToCopy, VkCommandBuffer vk_commandBuffer, TransferBlock& frame, size_t stagingBufferOffsetBegin, size_t regionOffsetBegin) { CPU_INSTRUMENTATION_L1(instrumentation); auto deviceID = device->deviceID; + auto& stagingOffsets = frame.stagingOffsets; // transfer any modified ImageInfo + size_t sbOffset = stagingBufferOffsetBegin; + size_t regionOffset = regionOffsetBegin; for (auto imageInfo_itr = dataToCopy.imageInfoSet.begin(); imageInfo_itr != dataToCopy.imageInfoSet.end();) { auto& imageInfo = *imageInfo_itr; @@ -229,7 +262,16 @@ void TransferTask::_transferImageInfos(DataToCopy& dataToCopy, VkCommandBuffer v { if (imageInfo->syncModifiedCounts(deviceID)) { - _transferImageInfo(vk_commandBuffer, frame, offset, *imageInfo); + assert(sbOffset < stagingOffsets.size() && regionOffset < stagingOffsets[sbOffset].size()-1); + _transferImageInfo(vk_commandBuffer, frame, sbOffset, regionOffset, *imageInfo); + + // advanced to the next staging buffer if it has reached capacity + ++regionOffset; + if (regionOffset == stagingOffsets[sbOffset].size()-1) + { + ++sbOffset; + regionOffset = 0; + } } else { @@ -249,17 +291,15 @@ void TransferTask::_transferImageInfos(DataToCopy& dataToCopy, VkCommandBuffer v } } -void TransferTask::_transferImageInfo(VkCommandBuffer vk_commandBuffer, TransferBlock& frame, VkDeviceSize& offset, ImageInfo& imageInfo) +void TransferTask::_transferImageInfo(VkCommandBuffer vk_commandBuffer, TransferBlock& frame, size_t stagingBufferOffset, size_t regionOffset, ImageInfo& imageInfo) { CPU_INSTRUMENTATION_L2(instrumentation); auto& data = imageInfo.imageView->image->data; - VkDeviceSize image_alignment = std::max(static_cast(data->stride()), static_cast(4)); - offset = ((offset % image_alignment) == 0) ? offset : ((offset / image_alignment) + 1) * image_alignment; - - auto& imageStagingBuffer = frame.staging; - auto& buffer_data = frame.buffer_data; + auto& imageStagingBuffer = frame.stagingBuffers[stagingBufferOffset]; + const auto offset = frame.stagingOffsets[stagingBufferOffset][regionOffset]; + auto& buffer_data = frame.buffer_data[stagingBufferOffset]; char* ptr = reinterpret_cast(buffer_data) + offset; auto properties = data->properties; @@ -269,8 +309,6 @@ void TransferTask::_transferImageInfo(VkCommandBuffer vk_commandBuffer, Transfer auto mipmapOffsets = data->computeMipmapOffsets(); uint32_t mipLevels = vsg::computeNumMipMapLevels(data, imageInfo.sampler); - auto source_offset = offset; - log(level, " TransferTask::_transferImageInfo(..) ", this, ",ImageInfo needs copying ", data, ", mipLevels = ", mipLevels); // copy data. @@ -280,7 +318,6 @@ void TransferTask::_transferImageInfo(VkCommandBuffer vk_commandBuffer, Transfer { log(level, " sourceFormat and targetFormat compatible."); std::memcpy(ptr, data->dataPointer(), data->dataSize()); - offset += data->dataSize(); } else { @@ -290,7 +327,6 @@ void TransferTask::_transferImageInfo(VkCommandBuffer vk_commandBuffer, Transfer { log(level, " sourceTraits.size and targetTraits.size compatible."); std::memcpy(ptr, data->dataPointer(), data->dataSize()); - offset += data->dataSize(); } else { @@ -306,8 +342,6 @@ void TransferTask::_transferImageInfo(VkCommandBuffer vk_commandBuffer, Transfer uint32_t bytesFromSource = sourceTraits.size; uint32_t bytesToTarget = targetTraits.size; - offset += imageTotalSize; - // copy data using value_type = uint8_t; const value_type* src_ptr = reinterpret_cast(data->dataPointer()); @@ -333,7 +367,7 @@ void TransferTask::_transferImageInfo(VkCommandBuffer vk_commandBuffer, Transfer } // transfer data. - transferImageData(imageInfo.imageView, imageInfo.imageLayout, properties, width, height, depth, mipLevels, mipmapOffsets, imageStagingBuffer, source_offset, vk_commandBuffer, device); + transferImageData(imageInfo.imageView, imageInfo.imageLayout, properties, width, height, depth, mipLevels, mipmapOffsets, imageStagingBuffer, offset, vk_commandBuffer, device); } TransferTask::TransferResult TransferTask::transferData(TransferMask transferMask) @@ -363,73 +397,171 @@ TransferTask::TransferResult TransferTask::_transferData(DataToCopy& dataToCopy) } // - // begin compute total data size + // begin gathering staging buffer offsets + // + const VkDeviceSize alignment = 4; + + const auto maxMemoryAllocationSize = device->getPhysicalDevice() + ->getProperties().maxMemoryAllocationSize; + + const auto frameIndex = dataToCopy.frameIndex; + auto& frame = *(dataToCopy.frames[frameIndex]); + auto& stagingOffsets = frame.stagingOffsets; + + const auto createNewStagingOffsets = [&](VkDeviceSize regionSize){ + // allocate a vector of offsets for the new staging buffer with + // the first offset initialized to zero for the new entry + stagingOffsets.push_back(std::vector(1,VkDeviceSize(0))); + // calculate the end of the first entry in the new staging buffer + return alignOffset(regionSize, alignment); + }; + + // + // Gather image offsets into one or more staging buffers of the frame. If multiple images do not + // fit within a single staging buffer, additional staging buffers are created to accommodate the + // transfer. Any single ImageInfo data must fit within the maximum allocation size for a single + // staging buffer, however some drivers do permit overallocation so if there is a single allocation + // larger that the maximum we rely on the memory allocation request to throwing if it is unable + // to honor the request. Ideally the client code should create smaller buffers. // VkDeviceSize offset = 0; - VkDeviceSize alignment = 4; + stagingOffsets.clear(); + const size_t imageStagingBufferOffsetBegin = 0; + const size_t imageRegionOffsetBegin = 0; + if (!dataToCopy.imageInfoSet.empty()) + { + for (const auto& imageInfo : dataToCopy.imageInfoSet) + { + // see TransferTask::_transferImageInfos + if (imageInfo->referenceCount() > 1 && imageInfo->requiresCopy(deviceID)) + { + // if this is the first staging buffer, allocate a vector of offsets + if (stagingOffsets.empty()) + stagingOffsets.push_back({}); + + auto data = imageInfo->imageView->image->data; + + // adjust offset to make sure it fits with the stride(); + const VkDeviceSize image_alignment = std::max(static_cast(data->stride()), alignment); + offset = alignOffset(offset, image_alignment); + stagingOffsets.back().push_back(offset); + + VkFormat targetFormat = imageInfo->imageView->format; + auto targetTraits = getFormatTraits(targetFormat); + VkDeviceSize imageSize = (targetTraits.size > 0) ? targetTraits.size * data->valueCount() : data->dataSize(); + log(level, " ", data, ", data->dataSize() = ", data->dataSize(), ", imageSize = ", imageSize, " targetTraits.size = ", targetTraits.size, ", ", data->valueCount(), ", targetFormat = ", targetFormat); + + offset = alignOffset(offset + imageSize, alignment); + + // If adding this region would exceed the maximum allocation size create a new + // staging buffer, unless this is an overallocation request and it is the only + // allocation in the buffer, in which case permit the overallocation to reside + // in this staging buffer. + if (offset >= maxMemoryAllocationSize && stagingOffsets.back().size() > 1) + offset = createNewStagingOffsets(imageSize); + } + } + } - for (const auto& imageInfo : dataToCopy.imageInfoSet) + const size_t numImageStagingBuffers = stagingOffsets.size(); + const size_t numImageRegionsInLastBuffer = stagingOffsets.empty() ? 0 : stagingOffsets.back().size(); + + // + // Gather data offsets into one or more staging buffers of the frame. If multiple data buffers + // do not fit within a single staging buffer, additional staging buffers are created to + // accommodate the transfer. Any single data buffer must fit within the maximum allocation size + // for a single staging buffer, however some drivers do permit overallocation so if there is a + // single allocation larger that the maximum we rely on the memory allocation request to + // throwing if it is unable to honor the request. Ideally the client code should create smaller + // buffers. + // + dataToCopy.dataTotalRegions = 0; + if (!dataToCopy.dataMap.empty()) { - auto data = imageInfo->imageView->image->data; + for (const auto&[_,bufferInfos] : dataToCopy.dataMap) + { + for (const auto& offset_bufferInfo : bufferInfos) + { + const auto& bufferInfo = offset_bufferInfo.second; - // adjust offset to make sure it fits with the stride(); - VkDeviceSize image_alignment = std::max(static_cast(data->stride()), alignment); - offset = ((offset % image_alignment) == 0) ? offset : ((offset / image_alignment) + 1) * image_alignment; + // see TransferTask::_transferBufferInfos + if (bufferInfo->referenceCount() > 1 && bufferInfo->requiresCopy(deviceID)) + { + // if this is the first staging buffer, allocate a vector of offsets + if (stagingOffsets.empty()) + stagingOffsets.push_back({}); - VkFormat targetFormat = imageInfo->imageView->format; - auto targetTraits = getFormatTraits(targetFormat); - VkDeviceSize imageSize = (targetTraits.size > 0) ? targetTraits.size * data->valueCount() : data->dataSize(); - log(level, " ", data, ", data->dataSize() = ", data->dataSize(), ", imageSize = ", imageSize, " targetTraits.size = ", targetTraits.size, ", ", data->valueCount(), ", targetFormat = ", targetFormat); + stagingOffsets.back().push_back(offset); + + ++dataToCopy.dataTotalRegions; - VkDeviceSize endOfEntry = offset + imageSize; - offset = (/*alignment == 1 ||*/ (endOfEntry % alignment) == 0) ? endOfEntry : ((endOfEntry / alignment) + 1) * alignment; + offset = alignOffset(offset + bufferInfo->range, alignment); + + // If adding this region would exceed the maximum allocation size create a new + // staging buffer, unless this is an overallocation request and it is the only + // allocation in the buffer, in which case permit the overallocation to reside + // in this staging buffer. + if (offset >= maxMemoryAllocationSize && stagingOffsets.back().size() > 1) + offset = createNewStagingOffsets(bufferInfo->range); + } + } + } } - dataToCopy.imageTotalSize = offset; - log(level, " dataToCopy.imageTotalSize = ", dataToCopy.imageTotalSize); + // If any staging buffer offsets were added, record the exclusive ending offset of the previous + // entry for the last staging buffer. The last offset is used to get each staging buffer's size. + if (!stagingOffsets.empty()) + { + // If adding this region would exceed the maximum allocation size create a new + // staging buffer, unless this is an overallocation request and it is the only + // allocation in the buffer, in which case permit the overallocation to reside + // in this staging buffer. + if (offset >= maxMemoryAllocationSize && stagingOffsets.back().size() > 1) + offset = createNewStagingOffsets(offset - stagingOffsets.back().back()); + + stagingOffsets.back().push_back(offset); + } - offset = 0; - dataToCopy.dataTotalRegions = 0; - for (const auto& entry : dataToCopy.dataMap) + // compute the starting staging buffer and offsets for the data regions + size_t dataStagingBufferOffsetBegin = 0; + size_t dataRegionOffsetBegin = 0; + if (numImageStagingBuffers > 0) { - const auto& bufferInfos = entry.second; - for (const auto& offset_bufferInfo : bufferInfos) + if (numImageRegionsInLastBuffer < stagingOffsets[numImageStagingBuffers-1].size()-1) + { + // the beginning of the data regions share the same staging buffer as the end of the images + dataStagingBufferOffsetBegin = numImageStagingBuffers-1; + dataRegionOffsetBegin = numImageRegionsInLastBuffer; + } + else { - const auto& bufferInfo = offset_bufferInfo.second; - VkDeviceSize endOfEntry = offset + bufferInfo->range; - offset = (/*alignment == 1 ||*/ (endOfEntry % alignment) == 0) ? endOfEntry : ((endOfEntry / alignment) + 1) * alignment; - ++dataToCopy.dataTotalRegions; + // data regions were added to a new staging buffers + dataStagingBufferOffsetBegin = numImageStagingBuffers; + dataRegionOffsetBegin = 0; } } - dataToCopy.dataTotalSize = offset; - log(level, " dataToCopy.dataTotalSize = ", dataToCopy.dataTotalSize); + + log(level, " dataToCopy total size = ", + std::accumulate(stagingOffsets.begin(), stagingOffsets.end(), VkDeviceSize(0), + [](auto sum, const auto& offsets){ return sum + offsets.back(); })); // - // end of compute size + // end of gathering staging buffer offsets // - VkDeviceSize totalSize = dataToCopy.dataTotalSize + dataToCopy.imageTotalSize; - if (totalSize == 0) return TransferResult{VK_SUCCESS, {}}; - - log(level, " totalSize = ", totalSize); - - auto& frame = *(dataToCopy.frames[dataToCopy.frameIndex]); auto& fence = frame.fence; - auto& staging = frame.staging; + auto& stagingBuffers = frame.stagingBuffers; auto& commandBuffer = frame.transferCommandBuffer; auto& newSignalSemaphore = dataToCopy.transferCompleteSemaphore; - - const auto& copyRegions = frame.copyRegions; auto& buffer_data = frame.buffer_data; - log(level, " frameIndex = ", dataToCopy.frameIndex); + log(level, " frameIndex = ", frameIndex); log(level, " frame = ", &frame); log(level, " fence = ", fence); log(level, " transferQueue = ", transferQueue); - log(level, " staging = ", staging); log(level, " dataToCopy.transferConsumerCompletedSemaphore = ", dataToCopy.transferConsumerCompletedSemaphore, ", ", dataToCopy.transferConsumerCompletedSemaphore ? dataToCopy.transferConsumerCompletedSemaphore->vk() : VK_NULL_HANDLE); log(level, " newSignalSemaphore = ", newSignalSemaphore, ", ", newSignalSemaphore ? newSignalSemaphore->vk() : VK_NULL_HANDLE); - log(level, " copyRegions.size() = ", copyRegions.size()); if (frame.waitOnFence && fence) { @@ -463,53 +595,64 @@ TransferTask::TransferResult TransferTask::_transferData(DataToCopy& dataToCopy) VkResult result = VK_SUCCESS; - // allocate staging buffer if required - if (!staging || staging->size < totalSize) + // if no regions to copy have been found then commandBuffer will be empty so no need to submit it to queue and signal the associated semaphore + if (!stagingOffsets.empty()) { - if (totalSize < minimumStagingBufferSize) - { - totalSize = minimumStagingBufferSize; - log(level, " Clamping totalSize to ", minimumStagingBufferSize); - } + // allocate/reallocate staging buffers as required + stagingBuffers.resize(stagingOffsets.size()); + buffer_data.resize(stagingOffsets.size()); - VkDeviceSize previousSize = staging ? staging->size : 0; - - VkMemoryPropertyFlags stagingMemoryPropertiesFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT; - staging = vsg::createBufferAndMemory(device, totalSize, VK_BUFFER_USAGE_TRANSFER_SRC_BIT, VK_SHARING_MODE_EXCLUSIVE, stagingMemoryPropertiesFlags); + for (size_t sbOffset = 0; sbOffset < stagingBuffers.size(); ++sbOffset) + { + // The last entry in each staging offsets vector is the aligned ending offset of the + // last entry which is the requested staging buffer size. Compute the staging buffer + // size so that it is always a multiple of the allocation chunk size and no smaller than + // the minimum staging buffer size. For the special case of an overallocation request + // for a single region just pass the request on and let the driver/OS decide if it can + // fulfill the request. + const auto requestedSize = stagingOffsets[sbOffset].back(); + // if this is an overallocation request it should be in its own staging buffer to minimize the request size + assert(requestedSize <= maxMemoryAllocationSize || stagingOffsets[sbOffset].size() == 2); + const auto stagingSize = requestedSize > maxMemoryAllocationSize + ? requestedSize // overallocation request + : std::clamp(stagingBufferSizeChunkSize * ((requestedSize - 1) / stagingBufferSizeChunkSize + 1), + minimumStagingBufferSize, maxMemoryAllocationSize); + + if (!stagingBuffers[sbOffset] || stagingBuffers[sbOffset]->size != stagingSize) + { + VkDeviceSize previousSize = stagingBuffers[sbOffset] ? stagingBuffers[sbOffset]->size : 0; - auto stagingMemory = staging->getDeviceMemory(deviceID); - buffer_data = nullptr; - result = stagingMemory->map(staging->getMemoryOffset(deviceID), staging->size, 0, &buffer_data); + VkMemoryPropertyFlags stagingMemoryPropertiesFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT; + stagingBuffers[sbOffset] = vsg::createBufferAndMemory(device, stagingSize, VK_BUFFER_USAGE_TRANSFER_SRC_BIT, VK_SHARING_MODE_EXCLUSIVE, stagingMemoryPropertiesFlags); - log(level, " TransferTask::transferData() frameIndex = ", dataToCopy.frameIndex, ", previousSize = ", previousSize, ", allocated staging buffer = ", staging, ", totalSize = ", totalSize, ", result = ", result); + auto stagingMemory = stagingBuffers[sbOffset]->getDeviceMemory(deviceID); + buffer_data[sbOffset] = nullptr; + result = stagingMemory->map(stagingBuffers[sbOffset]->getMemoryOffset(deviceID), stagingBuffers[sbOffset]->size, 0, &buffer_data[sbOffset]); - if (result != VK_SUCCESS) return TransferResult{VK_SUCCESS, {}}; - } + log(level, " TransferTask::transferData() frameIndex = ", frameIndex, ", staging buffer offset = ", sbOffset, "previousSize = ", previousSize, ", allocated staging buffer = ", stagingBuffers[sbOffset], ", stagingSize = ", stagingSize, ", result = ", result); - log(level, " totalSize = ", totalSize); + if (result != VK_SUCCESS) return TransferResult{VK_SUCCESS, {}}; + } + } - VkCommandBufferBeginInfo beginInfo = {}; - beginInfo.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO; - beginInfo.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT; - beginInfo.pInheritanceInfo = nullptr; + VkCommandBufferBeginInfo beginInfo = {}; + beginInfo.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO; + beginInfo.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT; + beginInfo.pInheritanceInfo = nullptr; - VkCommandBuffer vk_commandBuffer = *commandBuffer; - vkBeginCommandBuffer(vk_commandBuffer, &beginInfo); + VkCommandBuffer vk_commandBuffer = *commandBuffer; + vkBeginCommandBuffer(vk_commandBuffer, &beginInfo); - offset = 0; - { - COMMAND_BUFFER_INSTRUMENTATION(instrumentation, *commandBuffer, "transferData", COLOR_GPU) + { + COMMAND_BUFFER_INSTRUMENTATION(instrumentation, *commandBuffer, "transferData", COLOR_GPU) - // transfer the modified BufferInfo and ImageInfo - _transferImageInfos(dataToCopy, vk_commandBuffer, frame, offset); - _transferBufferInfos(dataToCopy, vk_commandBuffer, frame, offset); - } + // transfer the modified BufferInfo and ImageInfo + _transferImageInfos(dataToCopy, vk_commandBuffer, frame, imageStagingBufferOffsetBegin, imageRegionOffsetBegin); + _transferBufferInfos(dataToCopy, vk_commandBuffer, frame, dataStagingBufferOffsetBegin, dataRegionOffsetBegin); + } - vkEndCommandBuffer(vk_commandBuffer); + vkEndCommandBuffer(vk_commandBuffer); - // if no regions to copy have been found then commandBuffer will be empty so no need to submit it to queue and signal the associated semaphore - if (offset > 0) - { // submit the transfer commands VkSubmitInfo submitInfo = {}; submitInfo.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO; From afad7df6c4ac8170a57b752229c2a08e385d1f3e Mon Sep 17 00:00:00 2001 From: Michael Saunders Date: Wed, 21 May 2025 16:15:18 -0700 Subject: [PATCH 2/2] Lambda name and comment clarification. --- src/vsg/app/TransferTask.cpp | 23 +++++++++++------------ 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/src/vsg/app/TransferTask.cpp b/src/vsg/app/TransferTask.cpp index 5ae4f7962f..149314d647 100644 --- a/src/vsg/app/TransferTask.cpp +++ b/src/vsg/app/TransferTask.cpp @@ -409,11 +409,10 @@ TransferTask::TransferResult TransferTask::_transferData(DataToCopy& dataToCopy) auto& frame = *(dataToCopy.frames[frameIndex]); auto& stagingOffsets = frame.stagingOffsets; - const auto createNewStagingOffsets = [&](VkDeviceSize regionSize){ - // allocate a vector of offsets for the new staging buffer with - // the first offset initialized to zero for the new entry + // Allocate a vector of offsets for the new staging buffer with the first offset initialized to + // zero for the new entry, returning the end offset of the new entry. + const auto addRegionToNewStagingBuffer = [&](VkDeviceSize regionSize){ stagingOffsets.push_back(std::vector(1,VkDeviceSize(0))); - // calculate the end of the first entry in the new staging buffer return alignOffset(regionSize, alignment); }; @@ -422,8 +421,8 @@ TransferTask::TransferResult TransferTask::_transferData(DataToCopy& dataToCopy) // fit within a single staging buffer, additional staging buffers are created to accommodate the // transfer. Any single ImageInfo data must fit within the maximum allocation size for a single // staging buffer, however some drivers do permit overallocation so if there is a single allocation - // larger that the maximum we rely on the memory allocation request to throwing if it is unable - // to honor the request. Ideally the client code should create smaller buffers. + // larger that the maximum we rely on the memory allocation request throwing an exception if it + // is unable to honor the request. Ideally the client code should create smaller buffers. // VkDeviceSize offset = 0; stagingOffsets.clear(); @@ -459,7 +458,7 @@ TransferTask::TransferResult TransferTask::_transferData(DataToCopy& dataToCopy) // allocation in the buffer, in which case permit the overallocation to reside // in this staging buffer. if (offset >= maxMemoryAllocationSize && stagingOffsets.back().size() > 1) - offset = createNewStagingOffsets(imageSize); + offset = addRegionToNewStagingBuffer(imageSize); } } } @@ -472,9 +471,9 @@ TransferTask::TransferResult TransferTask::_transferData(DataToCopy& dataToCopy) // do not fit within a single staging buffer, additional staging buffers are created to // accommodate the transfer. Any single data buffer must fit within the maximum allocation size // for a single staging buffer, however some drivers do permit overallocation so if there is a - // single allocation larger that the maximum we rely on the memory allocation request to - // throwing if it is unable to honor the request. Ideally the client code should create smaller - // buffers. + // single allocation larger that the maximum we rely on the memory allocation request throwing + // an exception if it is unable to honor the request. Ideally the client code should create + // smaller buffers. // dataToCopy.dataTotalRegions = 0; if (!dataToCopy.dataMap.empty()) @@ -503,7 +502,7 @@ TransferTask::TransferResult TransferTask::_transferData(DataToCopy& dataToCopy) // allocation in the buffer, in which case permit the overallocation to reside // in this staging buffer. if (offset >= maxMemoryAllocationSize && stagingOffsets.back().size() > 1) - offset = createNewStagingOffsets(bufferInfo->range); + offset = addRegionToNewStagingBuffer(bufferInfo->range); } } } @@ -518,7 +517,7 @@ TransferTask::TransferResult TransferTask::_transferData(DataToCopy& dataToCopy) // allocation in the buffer, in which case permit the overallocation to reside // in this staging buffer. if (offset >= maxMemoryAllocationSize && stagingOffsets.back().size() > 1) - offset = createNewStagingOffsets(offset - stagingOffsets.back().back()); + offset = addRegionToNewStagingBuffer(offset - stagingOffsets.back().back()); stagingOffsets.back().push_back(offset); }