diff --git a/include/vsg/app/TransferTask.h b/include/vsg/app/TransferTask.h index 4308f9c852..4627703198 100644 --- a/include/vsg/app/TransferTask.h +++ b/include/vsg/app/TransferTask.h @@ -60,6 +60,9 @@ namespace vsg /// minimum size to use when allocating staging buffers. VkDeviceSize minimumStagingBufferSize = 16 * 1024 * 1024; + // allocation chunk size + VkDeviceSize stagingBufferSizeChunkSize = 64 * 1024; + /// hook for assigning Instrumentation to enable profiling of record traversal. ref_ptr instrumentation; @@ -78,8 +81,9 @@ namespace vsg { ref_ptr transferCommandBuffer; ref_ptr fence; - ref_ptr staging; - void* buffer_data = nullptr; + std::vector> stagingBuffers; + std::vector> stagingOffsets; + std::vector buffer_data; std::vector copyRegions; bool waitOnFence = false; }; @@ -94,8 +98,6 @@ namespace vsg std::vector> frames; VkDeviceSize dataTotalRegions = 0; - VkDeviceSize dataTotalSize = 0; - VkDeviceSize imageTotalSize = 0; ref_ptr transferCompleteSemaphore; ref_ptr transferConsumerCompletedSemaphore; @@ -111,10 +113,10 @@ namespace vsg TransferResult _transferData(DataToCopy& dataToCopy); - void _transferBufferInfos(DataToCopy& dataToCopy, VkCommandBuffer vk_commandBuffer, TransferBlock& frame, VkDeviceSize& offset); + void _transferBufferInfos(DataToCopy& dataToCopy, VkCommandBuffer vk_commandBuffer, TransferBlock& frame, size_t stagingBufferOffsetBegin, size_t regionOffsetBegin); - void _transferImageInfos(DataToCopy& dataToCopy, VkCommandBuffer vk_commandBuffer, TransferBlock& frame, VkDeviceSize& offset); - void _transferImageInfo(VkCommandBuffer vk_commandBuffer, TransferBlock& frame, VkDeviceSize& offset, ImageInfo& imageInfo); + void _transferImageInfos(DataToCopy& dataToCopy, VkCommandBuffer vk_commandBuffer, TransferBlock& frame, size_t stagingBufferOffsetBegin, size_t regionOffsetBegin); + void _transferImageInfo(VkCommandBuffer vk_commandBuffer, TransferBlock& frame, size_t stagingBufferOffset, size_t regionOffset, ImageInfo& imageInfo); }; VSG_type_name(vsg::TransferTask); diff --git a/src/vsg/app/TransferTask.cpp b/src/vsg/app/TransferTask.cpp index daf471acba..149314d647 100644 --- a/src/vsg/app/TransferTask.cpp +++ b/src/vsg/app/TransferTask.cpp @@ -17,6 +17,10 @@ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLI #include #include +#include +#include +#include + using namespace vsg; TransferTask::TransferTask(Device* in_device, uint32_t numBuffers) : @@ -101,29 +105,45 @@ bool TransferTask::DataToCopy::requiresCopy(uint32_t deviceID) const return false; } -void TransferTask::_transferBufferInfos(DataToCopy& dataToCopy, VkCommandBuffer vk_commandBuffer, TransferBlock& frame, VkDeviceSize& offset) +namespace { +constexpr VkDeviceSize alignOffset(VkDeviceSize offset, VkDeviceSize alignment) +{ + return (/*alignment == 1 ||*/ (offset % alignment) == 0) ? offset : ((offset / alignment) + 1) * alignment; +} +} + +void TransferTask::_transferBufferInfos(DataToCopy& dataToCopy, VkCommandBuffer vk_commandBuffer, TransferBlock& frame, size_t stagingBufferOffsetBegin, size_t regionOffsetBegin) { CPU_INSTRUMENTATION_L1(instrumentation); auto deviceID = device->deviceID; - auto& staging = frame.staging; + auto& stagingBuffers = frame.stagingBuffers; + auto& stagingOffsets = frame.stagingOffsets; auto& copyRegions = frame.copyRegions; auto& buffer_data = frame.buffer_data; - VkDeviceSize alignment = 4; - copyRegions.clear(); copyRegions.resize(dataToCopy.dataTotalRegions); VkBufferCopy* pRegions = copyRegions.data(); log(level, " TransferTask::_transferBufferInfos(..) ", this); + size_t sbOffset = stagingBufferOffsetBegin; + + // copy the buffer regions and advance to next buffer + const auto copyBufferRegions = [&](ref_ptr buffer, size_t regionCount){ + vkCmdCopyBuffer(vk_commandBuffer, stagingBuffers[sbOffset]->vk(deviceID), buffer->vk(deviceID), regionCount, pRegions); + log(level, " vkCmdCopyBuffer(", ", ", stagingBuffers[sbOffset]->vk(deviceID), ", ", buffer->vk(deviceID), ", ", regionCount, ", ", pRegions); + return pRegions + regionCount; + }; + // copy any modified BufferInfo + size_t regionOffset = regionOffsetBegin; for (auto buffer_itr = dataToCopy.dataMap.begin(); buffer_itr != dataToCopy.dataMap.end();) { auto& bufferInfos = buffer_itr->second; - uint32_t regionCount = 0; + size_t regionCount = 0; log(level, " copying bufferInfos.size() = ", bufferInfos.size(), "{"); for (auto bufferInfo_itr = bufferInfos.begin(); bufferInfo_itr != bufferInfos.end();) { @@ -137,8 +157,11 @@ void TransferTask::_transferBufferInfos(DataToCopy& dataToCopy, VkCommandBuffer { if (bufferInfo->syncModifiedCounts(deviceID)) { + assert(sbOffset < stagingOffsets.size() && regionOffset < stagingOffsets[sbOffset].size()-1); + VkDeviceSize offset = stagingOffsets[sbOffset][regionOffset]; + // copy data to staging buffer memory - char* ptr = reinterpret_cast(buffer_data) + offset; + char* ptr = reinterpret_cast(buffer_data[sbOffset]) + offset; std::memcpy(ptr, bufferInfo->data->dataPointer(), bufferInfo->range); // record region @@ -146,8 +169,17 @@ void TransferTask::_transferBufferInfos(DataToCopy& dataToCopy, VkCommandBuffer log(level, " copying ", bufferInfo, ", ", bufferInfo->data, " to ", static_cast(ptr)); - VkDeviceSize endOfEntry = offset + bufferInfo->range; - offset = (/*alignment == 1 ||*/ (endOfEntry % alignment) == 0) ? endOfEntry : ((endOfEntry / alignment) + 1) * alignment; + // advanced to the next staging buffer if it has reached capacity + ++regionOffset; + if (regionOffset == stagingOffsets[sbOffset].size()-1) + { + // copy the buffer regions and advance to next buffer + pRegions = copyBufferRegions(buffer_itr->first, regionCount); + + ++sbOffset; + regionOffset = 0; + regionCount = 0; + } } else { @@ -161,6 +193,10 @@ void TransferTask::_transferBufferInfos(DataToCopy& dataToCopy, VkCommandBuffer else { log(level, " removing copied static data: ", bufferInfo, ", ", bufferInfo->data); + if (bufferInfo->data->properties.dataVariance == STATIC_DATA_UNREF_AFTER_TRANSFER) + { + bufferInfo->data.reset(); + } bufferInfo_itr = bufferInfos.erase(bufferInfo_itr); } } @@ -169,14 +205,8 @@ void TransferTask::_transferBufferInfos(DataToCopy& dataToCopy, VkCommandBuffer if (regionCount > 0) { - auto& buffer = buffer_itr->first; - - vkCmdCopyBuffer(vk_commandBuffer, staging->vk(deviceID), buffer->vk(deviceID), regionCount, pRegions); - - log(level, " vkCmdCopyBuffer(", ", ", staging->vk(deviceID), ", ", buffer->vk(deviceID), ", ", regionCount, ", ", pRegions); - - // advance to next buffer - pRegions += regionCount; + // copy the buffer regions and advance to next buffer + pRegions = copyBufferRegions(buffer_itr->first, regionCount); } if (bufferInfos.empty()) @@ -210,13 +240,16 @@ void TransferTask::assign(const ImageInfoList& imageInfoList) } } -void TransferTask::_transferImageInfos(DataToCopy& dataToCopy, VkCommandBuffer vk_commandBuffer, TransferBlock& frame, VkDeviceSize& offset) +void TransferTask::_transferImageInfos(DataToCopy& dataToCopy, VkCommandBuffer vk_commandBuffer, TransferBlock& frame, size_t stagingBufferOffsetBegin, size_t regionOffsetBegin) { CPU_INSTRUMENTATION_L1(instrumentation); auto deviceID = device->deviceID; + auto& stagingOffsets = frame.stagingOffsets; // transfer any modified ImageInfo + size_t sbOffset = stagingBufferOffsetBegin; + size_t regionOffset = regionOffsetBegin; for (auto imageInfo_itr = dataToCopy.imageInfoSet.begin(); imageInfo_itr != dataToCopy.imageInfoSet.end();) { auto& imageInfo = *imageInfo_itr; @@ -229,7 +262,16 @@ void TransferTask::_transferImageInfos(DataToCopy& dataToCopy, VkCommandBuffer v { if (imageInfo->syncModifiedCounts(deviceID)) { - _transferImageInfo(vk_commandBuffer, frame, offset, *imageInfo); + assert(sbOffset < stagingOffsets.size() && regionOffset < stagingOffsets[sbOffset].size()-1); + _transferImageInfo(vk_commandBuffer, frame, sbOffset, regionOffset, *imageInfo); + + // advanced to the next staging buffer if it has reached capacity + ++regionOffset; + if (regionOffset == stagingOffsets[sbOffset].size()-1) + { + ++sbOffset; + regionOffset = 0; + } } else { @@ -249,17 +291,15 @@ void TransferTask::_transferImageInfos(DataToCopy& dataToCopy, VkCommandBuffer v } } -void TransferTask::_transferImageInfo(VkCommandBuffer vk_commandBuffer, TransferBlock& frame, VkDeviceSize& offset, ImageInfo& imageInfo) +void TransferTask::_transferImageInfo(VkCommandBuffer vk_commandBuffer, TransferBlock& frame, size_t stagingBufferOffset, size_t regionOffset, ImageInfo& imageInfo) { CPU_INSTRUMENTATION_L2(instrumentation); auto& data = imageInfo.imageView->image->data; - VkDeviceSize image_alignment = std::max(static_cast(data->stride()), static_cast(4)); - offset = ((offset % image_alignment) == 0) ? offset : ((offset / image_alignment) + 1) * image_alignment; - - auto& imageStagingBuffer = frame.staging; - auto& buffer_data = frame.buffer_data; + auto& imageStagingBuffer = frame.stagingBuffers[stagingBufferOffset]; + const auto offset = frame.stagingOffsets[stagingBufferOffset][regionOffset]; + auto& buffer_data = frame.buffer_data[stagingBufferOffset]; char* ptr = reinterpret_cast(buffer_data) + offset; auto properties = data->properties; @@ -269,8 +309,6 @@ void TransferTask::_transferImageInfo(VkCommandBuffer vk_commandBuffer, Transfer auto mipmapOffsets = data->computeMipmapOffsets(); uint32_t mipLevels = vsg::computeNumMipMapLevels(data, imageInfo.sampler); - auto source_offset = offset; - log(level, " TransferTask::_transferImageInfo(..) ", this, ",ImageInfo needs copying ", data, ", mipLevels = ", mipLevels); // copy data. @@ -280,7 +318,6 @@ void TransferTask::_transferImageInfo(VkCommandBuffer vk_commandBuffer, Transfer { log(level, " sourceFormat and targetFormat compatible."); std::memcpy(ptr, data->dataPointer(), data->dataSize()); - offset += data->dataSize(); } else { @@ -290,7 +327,6 @@ void TransferTask::_transferImageInfo(VkCommandBuffer vk_commandBuffer, Transfer { log(level, " sourceTraits.size and targetTraits.size compatible."); std::memcpy(ptr, data->dataPointer(), data->dataSize()); - offset += data->dataSize(); } else { @@ -306,8 +342,6 @@ void TransferTask::_transferImageInfo(VkCommandBuffer vk_commandBuffer, Transfer uint32_t bytesFromSource = sourceTraits.size; uint32_t bytesToTarget = targetTraits.size; - offset += imageTotalSize; - // copy data using value_type = uint8_t; const value_type* src_ptr = reinterpret_cast(data->dataPointer()); @@ -333,7 +367,7 @@ void TransferTask::_transferImageInfo(VkCommandBuffer vk_commandBuffer, Transfer } // transfer data. - transferImageData(imageInfo.imageView, imageInfo.imageLayout, properties, width, height, depth, mipLevels, mipmapOffsets, imageStagingBuffer, source_offset, vk_commandBuffer, device); + transferImageData(imageInfo.imageView, imageInfo.imageLayout, properties, width, height, depth, mipLevels, mipmapOffsets, imageStagingBuffer, offset, vk_commandBuffer, device); } TransferTask::TransferResult TransferTask::transferData(TransferMask transferMask) @@ -363,73 +397,170 @@ TransferTask::TransferResult TransferTask::_transferData(DataToCopy& dataToCopy) } // - // begin compute total data size + // begin gathering staging buffer offsets + // + const VkDeviceSize alignment = 4; + + const auto maxMemoryAllocationSize = device->getPhysicalDevice() + ->getProperties().maxMemoryAllocationSize; + + const auto frameIndex = dataToCopy.frameIndex; + auto& frame = *(dataToCopy.frames[frameIndex]); + auto& stagingOffsets = frame.stagingOffsets; + + // Allocate a vector of offsets for the new staging buffer with the first offset initialized to + // zero for the new entry, returning the end offset of the new entry. + const auto addRegionToNewStagingBuffer = [&](VkDeviceSize regionSize){ + stagingOffsets.push_back(std::vector(1,VkDeviceSize(0))); + return alignOffset(regionSize, alignment); + }; + + // + // Gather image offsets into one or more staging buffers of the frame. If multiple images do not + // fit within a single staging buffer, additional staging buffers are created to accommodate the + // transfer. Any single ImageInfo data must fit within the maximum allocation size for a single + // staging buffer, however some drivers do permit overallocation so if there is a single allocation + // larger that the maximum we rely on the memory allocation request throwing an exception if it + // is unable to honor the request. Ideally the client code should create smaller buffers. // VkDeviceSize offset = 0; - VkDeviceSize alignment = 4; + stagingOffsets.clear(); + const size_t imageStagingBufferOffsetBegin = 0; + const size_t imageRegionOffsetBegin = 0; + if (!dataToCopy.imageInfoSet.empty()) + { + for (const auto& imageInfo : dataToCopy.imageInfoSet) + { + // see TransferTask::_transferImageInfos + if (imageInfo->referenceCount() > 1 && imageInfo->requiresCopy(deviceID)) + { + // if this is the first staging buffer, allocate a vector of offsets + if (stagingOffsets.empty()) + stagingOffsets.push_back({}); + + auto data = imageInfo->imageView->image->data; + + // adjust offset to make sure it fits with the stride(); + const VkDeviceSize image_alignment = std::max(static_cast(data->stride()), alignment); + offset = alignOffset(offset, image_alignment); + stagingOffsets.back().push_back(offset); + + VkFormat targetFormat = imageInfo->imageView->format; + auto targetTraits = getFormatTraits(targetFormat); + VkDeviceSize imageSize = (targetTraits.size > 0) ? targetTraits.size * data->valueCount() : data->dataSize(); + log(level, " ", data, ", data->dataSize() = ", data->dataSize(), ", imageSize = ", imageSize, " targetTraits.size = ", targetTraits.size, ", ", data->valueCount(), ", targetFormat = ", targetFormat); + + offset = alignOffset(offset + imageSize, alignment); + + // If adding this region would exceed the maximum allocation size create a new + // staging buffer, unless this is an overallocation request and it is the only + // allocation in the buffer, in which case permit the overallocation to reside + // in this staging buffer. + if (offset >= maxMemoryAllocationSize && stagingOffsets.back().size() > 1) + offset = addRegionToNewStagingBuffer(imageSize); + } + } + } - for (const auto& imageInfo : dataToCopy.imageInfoSet) + const size_t numImageStagingBuffers = stagingOffsets.size(); + const size_t numImageRegionsInLastBuffer = stagingOffsets.empty() ? 0 : stagingOffsets.back().size(); + + // + // Gather data offsets into one or more staging buffers of the frame. If multiple data buffers + // do not fit within a single staging buffer, additional staging buffers are created to + // accommodate the transfer. Any single data buffer must fit within the maximum allocation size + // for a single staging buffer, however some drivers do permit overallocation so if there is a + // single allocation larger that the maximum we rely on the memory allocation request throwing + // an exception if it is unable to honor the request. Ideally the client code should create + // smaller buffers. + // + dataToCopy.dataTotalRegions = 0; + if (!dataToCopy.dataMap.empty()) { - auto data = imageInfo->imageView->image->data; + for (const auto&[_,bufferInfos] : dataToCopy.dataMap) + { + for (const auto& offset_bufferInfo : bufferInfos) + { + const auto& bufferInfo = offset_bufferInfo.second; - // adjust offset to make sure it fits with the stride(); - VkDeviceSize image_alignment = std::max(static_cast(data->stride()), alignment); - offset = ((offset % image_alignment) == 0) ? offset : ((offset / image_alignment) + 1) * image_alignment; + // see TransferTask::_transferBufferInfos + if (bufferInfo->referenceCount() > 1 && bufferInfo->requiresCopy(deviceID)) + { + // if this is the first staging buffer, allocate a vector of offsets + if (stagingOffsets.empty()) + stagingOffsets.push_back({}); - VkFormat targetFormat = imageInfo->imageView->format; - auto targetTraits = getFormatTraits(targetFormat); - VkDeviceSize imageSize = (targetTraits.size > 0) ? targetTraits.size * data->valueCount() : data->dataSize(); - log(level, " ", data, ", data->dataSize() = ", data->dataSize(), ", imageSize = ", imageSize, " targetTraits.size = ", targetTraits.size, ", ", data->valueCount(), ", targetFormat = ", targetFormat); + stagingOffsets.back().push_back(offset); + + ++dataToCopy.dataTotalRegions; - VkDeviceSize endOfEntry = offset + imageSize; - offset = (/*alignment == 1 ||*/ (endOfEntry % alignment) == 0) ? endOfEntry : ((endOfEntry / alignment) + 1) * alignment; + offset = alignOffset(offset + bufferInfo->range, alignment); + + // If adding this region would exceed the maximum allocation size create a new + // staging buffer, unless this is an overallocation request and it is the only + // allocation in the buffer, in which case permit the overallocation to reside + // in this staging buffer. + if (offset >= maxMemoryAllocationSize && stagingOffsets.back().size() > 1) + offset = addRegionToNewStagingBuffer(bufferInfo->range); + } + } + } } - dataToCopy.imageTotalSize = offset; - log(level, " dataToCopy.imageTotalSize = ", dataToCopy.imageTotalSize); + // If any staging buffer offsets were added, record the exclusive ending offset of the previous + // entry for the last staging buffer. The last offset is used to get each staging buffer's size. + if (!stagingOffsets.empty()) + { + // If adding this region would exceed the maximum allocation size create a new + // staging buffer, unless this is an overallocation request and it is the only + // allocation in the buffer, in which case permit the overallocation to reside + // in this staging buffer. + if (offset >= maxMemoryAllocationSize && stagingOffsets.back().size() > 1) + offset = addRegionToNewStagingBuffer(offset - stagingOffsets.back().back()); + + stagingOffsets.back().push_back(offset); + } - offset = 0; - dataToCopy.dataTotalRegions = 0; - for (const auto& entry : dataToCopy.dataMap) + // compute the starting staging buffer and offsets for the data regions + size_t dataStagingBufferOffsetBegin = 0; + size_t dataRegionOffsetBegin = 0; + if (numImageStagingBuffers > 0) { - const auto& bufferInfos = entry.second; - for (const auto& offset_bufferInfo : bufferInfos) + if (numImageRegionsInLastBuffer < stagingOffsets[numImageStagingBuffers-1].size()-1) + { + // the beginning of the data regions share the same staging buffer as the end of the images + dataStagingBufferOffsetBegin = numImageStagingBuffers-1; + dataRegionOffsetBegin = numImageRegionsInLastBuffer; + } + else { - const auto& bufferInfo = offset_bufferInfo.second; - VkDeviceSize endOfEntry = offset + bufferInfo->range; - offset = (/*alignment == 1 ||*/ (endOfEntry % alignment) == 0) ? endOfEntry : ((endOfEntry / alignment) + 1) * alignment; - ++dataToCopy.dataTotalRegions; + // data regions were added to a new staging buffers + dataStagingBufferOffsetBegin = numImageStagingBuffers; + dataRegionOffsetBegin = 0; } } - dataToCopy.dataTotalSize = offset; - log(level, " dataToCopy.dataTotalSize = ", dataToCopy.dataTotalSize); + + log(level, " dataToCopy total size = ", + std::accumulate(stagingOffsets.begin(), stagingOffsets.end(), VkDeviceSize(0), + [](auto sum, const auto& offsets){ return sum + offsets.back(); })); // - // end of compute size + // end of gathering staging buffer offsets // - VkDeviceSize totalSize = dataToCopy.dataTotalSize + dataToCopy.imageTotalSize; - if (totalSize == 0) return TransferResult{VK_SUCCESS, {}}; - - log(level, " totalSize = ", totalSize); - - auto& frame = *(dataToCopy.frames[dataToCopy.frameIndex]); auto& fence = frame.fence; - auto& staging = frame.staging; + auto& stagingBuffers = frame.stagingBuffers; auto& commandBuffer = frame.transferCommandBuffer; auto& newSignalSemaphore = dataToCopy.transferCompleteSemaphore; - - const auto& copyRegions = frame.copyRegions; auto& buffer_data = frame.buffer_data; - log(level, " frameIndex = ", dataToCopy.frameIndex); + log(level, " frameIndex = ", frameIndex); log(level, " frame = ", &frame); log(level, " fence = ", fence); log(level, " transferQueue = ", transferQueue); - log(level, " staging = ", staging); log(level, " dataToCopy.transferConsumerCompletedSemaphore = ", dataToCopy.transferConsumerCompletedSemaphore, ", ", dataToCopy.transferConsumerCompletedSemaphore ? dataToCopy.transferConsumerCompletedSemaphore->vk() : VK_NULL_HANDLE); log(level, " newSignalSemaphore = ", newSignalSemaphore, ", ", newSignalSemaphore ? newSignalSemaphore->vk() : VK_NULL_HANDLE); - log(level, " copyRegions.size() = ", copyRegions.size()); if (frame.waitOnFence && fence) { @@ -463,53 +594,64 @@ TransferTask::TransferResult TransferTask::_transferData(DataToCopy& dataToCopy) VkResult result = VK_SUCCESS; - // allocate staging buffer if required - if (!staging || staging->size < totalSize) + // if no regions to copy have been found then commandBuffer will be empty so no need to submit it to queue and signal the associated semaphore + if (!stagingOffsets.empty()) { - if (totalSize < minimumStagingBufferSize) - { - totalSize = minimumStagingBufferSize; - log(level, " Clamping totalSize to ", minimumStagingBufferSize); - } + // allocate/reallocate staging buffers as required + stagingBuffers.resize(stagingOffsets.size()); + buffer_data.resize(stagingOffsets.size()); - VkDeviceSize previousSize = staging ? staging->size : 0; - - VkMemoryPropertyFlags stagingMemoryPropertiesFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT; - staging = vsg::createBufferAndMemory(device, totalSize, VK_BUFFER_USAGE_TRANSFER_SRC_BIT, VK_SHARING_MODE_EXCLUSIVE, stagingMemoryPropertiesFlags); + for (size_t sbOffset = 0; sbOffset < stagingBuffers.size(); ++sbOffset) + { + // The last entry in each staging offsets vector is the aligned ending offset of the + // last entry which is the requested staging buffer size. Compute the staging buffer + // size so that it is always a multiple of the allocation chunk size and no smaller than + // the minimum staging buffer size. For the special case of an overallocation request + // for a single region just pass the request on and let the driver/OS decide if it can + // fulfill the request. + const auto requestedSize = stagingOffsets[sbOffset].back(); + // if this is an overallocation request it should be in its own staging buffer to minimize the request size + assert(requestedSize <= maxMemoryAllocationSize || stagingOffsets[sbOffset].size() == 2); + const auto stagingSize = requestedSize > maxMemoryAllocationSize + ? requestedSize // overallocation request + : std::clamp(stagingBufferSizeChunkSize * ((requestedSize - 1) / stagingBufferSizeChunkSize + 1), + minimumStagingBufferSize, maxMemoryAllocationSize); + + if (!stagingBuffers[sbOffset] || stagingBuffers[sbOffset]->size != stagingSize) + { + VkDeviceSize previousSize = stagingBuffers[sbOffset] ? stagingBuffers[sbOffset]->size : 0; - auto stagingMemory = staging->getDeviceMemory(deviceID); - buffer_data = nullptr; - result = stagingMemory->map(staging->getMemoryOffset(deviceID), staging->size, 0, &buffer_data); + VkMemoryPropertyFlags stagingMemoryPropertiesFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT; + stagingBuffers[sbOffset] = vsg::createBufferAndMemory(device, stagingSize, VK_BUFFER_USAGE_TRANSFER_SRC_BIT, VK_SHARING_MODE_EXCLUSIVE, stagingMemoryPropertiesFlags); - log(level, " TransferTask::transferData() frameIndex = ", dataToCopy.frameIndex, ", previousSize = ", previousSize, ", allocated staging buffer = ", staging, ", totalSize = ", totalSize, ", result = ", result); + auto stagingMemory = stagingBuffers[sbOffset]->getDeviceMemory(deviceID); + buffer_data[sbOffset] = nullptr; + result = stagingMemory->map(stagingBuffers[sbOffset]->getMemoryOffset(deviceID), stagingBuffers[sbOffset]->size, 0, &buffer_data[sbOffset]); - if (result != VK_SUCCESS) return TransferResult{VK_SUCCESS, {}}; - } + log(level, " TransferTask::transferData() frameIndex = ", frameIndex, ", staging buffer offset = ", sbOffset, "previousSize = ", previousSize, ", allocated staging buffer = ", stagingBuffers[sbOffset], ", stagingSize = ", stagingSize, ", result = ", result); - log(level, " totalSize = ", totalSize); + if (result != VK_SUCCESS) return TransferResult{VK_SUCCESS, {}}; + } + } - VkCommandBufferBeginInfo beginInfo = {}; - beginInfo.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO; - beginInfo.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT; - beginInfo.pInheritanceInfo = nullptr; + VkCommandBufferBeginInfo beginInfo = {}; + beginInfo.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO; + beginInfo.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT; + beginInfo.pInheritanceInfo = nullptr; - VkCommandBuffer vk_commandBuffer = *commandBuffer; - vkBeginCommandBuffer(vk_commandBuffer, &beginInfo); + VkCommandBuffer vk_commandBuffer = *commandBuffer; + vkBeginCommandBuffer(vk_commandBuffer, &beginInfo); - offset = 0; - { - COMMAND_BUFFER_INSTRUMENTATION(instrumentation, *commandBuffer, "transferData", COLOR_GPU) + { + COMMAND_BUFFER_INSTRUMENTATION(instrumentation, *commandBuffer, "transferData", COLOR_GPU) - // transfer the modified BufferInfo and ImageInfo - _transferImageInfos(dataToCopy, vk_commandBuffer, frame, offset); - _transferBufferInfos(dataToCopy, vk_commandBuffer, frame, offset); - } + // transfer the modified BufferInfo and ImageInfo + _transferImageInfos(dataToCopy, vk_commandBuffer, frame, imageStagingBufferOffsetBegin, imageRegionOffsetBegin); + _transferBufferInfos(dataToCopy, vk_commandBuffer, frame, dataStagingBufferOffsetBegin, dataRegionOffsetBegin); + } - vkEndCommandBuffer(vk_commandBuffer); + vkEndCommandBuffer(vk_commandBuffer); - // if no regions to copy have been found then commandBuffer will be empty so no need to submit it to queue and signal the associated semaphore - if (offset > 0) - { // submit the transfer commands VkSubmitInfo submitInfo = {}; submitInfo.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO;