From 79f3acde64b5ce9f359228f5ac67cd2ed728d032 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bartosz=20R=C3=B3=C5=BCa=C5=84ski?= Date: Tue, 11 Apr 2023 10:25:33 +0200 Subject: [PATCH 1/3] Fix lookup buffer --- src/vulkan/driver-vulkan.cpp | 4 ++-- src/vulkan/gen/scrypt-chacha.comp | 5 +++-- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/src/vulkan/driver-vulkan.cpp b/src/vulkan/driver-vulkan.cpp index 56ab469..fb8442f 100644 --- a/src/vulkan/driver-vulkan.cpp +++ b/src/vulkan/driver-vulkan.cpp @@ -136,10 +136,10 @@ static _vulkanState *initVulkan(struct cgpu_info *cgpu, char *name, size_t nameS cgpu->work_size = 64; - applog(LOG_NOTICE, "GPU %d: selecting lookup gap of 4", cgpu->driver_id); cgpu->lookup_gap = 4; + applog(LOG_NOTICE, "GPU %d: selecting lookup gap of %d", cgpu->driver_id, cgpu->lookup_gap); - unsigned int bsize = 1024; + unsigned int bsize = 8192; size_t ipt = (bsize / cgpu->lookup_gap + (bsize % cgpu->lookup_gap > 0)); if (!cgpu->buffer_size) { diff --git a/src/vulkan/gen/scrypt-chacha.comp b/src/vulkan/gen/scrypt-chacha.comp index e1537c3..2ebbaf0 100644 --- a/src/vulkan/gen/scrypt-chacha.comp +++ b/src/vulkan/gen/scrypt-chacha.comp @@ -438,8 +438,9 @@ main() Nfactor++; } - const uint effective_concurrency = (concurrent_threads << 9) >> Nfactor; - + // const uint effective_concurrency = (concurrent_threads << 12) >> Nfactor; + const uint effective_concurrency = concurrent_threads; + password[0] = buffer0[0]; password[1] = buffer0[1]; password[2] = buffer0[2]; From 1a456292a0abf3131bb559f66ad5d3452a9c8402 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bartosz=20R=C3=B3=C5=BCa=C5=84ski?= Date: Tue, 11 Apr 2023 10:29:25 +0200 Subject: [PATCH 2/3] Improve error handling --- src/vulkan/driver-vulkan.cpp | 16 ++++++++-------- src/vulkan/vulkan-helpers.c | 5 ++++- 2 files changed, 12 insertions(+), 9 deletions(-) diff --git a/src/vulkan/driver-vulkan.cpp b/src/vulkan/driver-vulkan.cpp index fb8442f..9f7cec3 100644 --- a/src/vulkan/driver-vulkan.cpp +++ b/src/vulkan/driver-vulkan.cpp @@ -443,7 +443,7 @@ static int vulkan_scrypt_positions( // transfer input to GPU char *ptr = NULL; uint64_t tfxOrigin = state->memParamsSize + state->memConstantSize; - CHECK_RESULT(gVulkan.vkMapMemory(state->vkDevice, state->gpuSharedMemory, tfxOrigin, state->memInputSize, 0, (void **)&ptr), "vkMapMemory", 0); + CHECK_RESULT(gVulkan.vkMapMemory(state->vkDevice, state->gpuSharedMemory, tfxOrigin, state->memInputSize, 0, (void **)&ptr), "vkMapMemory", SPACEMESH_API_ERROR); memcpy(ptr, (const void*)pdata, PREIMAGE_SIZE); gVulkan.vkUnmapMemory(state->vkDevice, state->gpuSharedMemory); @@ -451,8 +451,6 @@ static int vulkan_scrypt_positions( params.hash_len_bits = hash_len_bits; params.concurrent_threads = cgpu->thread_concurrency; - const uint64_t delay = 5ULL * 1000ULL * 1000ULL * 1000ULL; - tfxOrigin = state->memParamsSize + state->memConstantSize + state->memInputSize; do { @@ -470,15 +468,16 @@ static int vulkan_scrypt_positions( CHECK_RESULT(gVulkan.vkQueueSubmit(state->queue, 1, &submitInfo, VK_NULL_HANDLE), "vkQueueSubmit", 0); CHECK_RESULT(gVulkan.vkQueueWaitIdle(state->queue), "vkQueueWaitIdle", 0); #else - CHECK_RESULT(gVulkan.vkResetFences(state->vkDevice, 1, &state->fence), "vkResetFences", 0); - VkSubmitInfo submitInfo = { VK_STRUCTURE_TYPE_SUBMIT_INFO, 0, 0, 0, 0, 1, &state->commandBuffer, 0, 0 }; - CHECK_RESULT(gVulkan.vkQueueSubmit(state->queue, 1, &submitInfo, state->fence), "vkQueueSubmit", 0); + CHECK_RESULT(gVulkan.vkResetFences(state->vkDevice, 1, &state->fence), "vkResetFences", SPACEMESH_API_ERROR); + VkSubmitInfo submitInfo = {VK_STRUCTURE_TYPE_SUBMIT_INFO, 0, 0, 0, 0, 1, &state->commandBuffer, 0, 0}; + CHECK_RESULT(gVulkan.vkQueueSubmit(state->queue, 1, &submitInfo, state->fence), "vkQueueSubmit", SPACEMESH_API_ERROR); VkResult res; do { uint64_t delay = 5ULL * 1000ULL * 1000ULL * 1000ULL; res = gVulkan.vkWaitForFences(state->vkDevice, 1, &state->fence, VK_TRUE, delay); } while (res == VK_TIMEOUT); - gVulkan.vkResetFences(state->vkDevice, 1, &state->fence); + CHECK_RESULT(res, "vkWaitForFences", SPACEMESH_API_ERROR); + CHECK_RESULT(gVulkan.vkResetFences(state->vkDevice, 1, &state->fence), "vkResetFences", SPACEMESH_API_ERROR); #endif if (computePow) { @@ -499,7 +498,8 @@ static int vulkan_scrypt_positions( if (computeLeafs) { uint32_t length = (uint32_t)min(chunkSize, outLength); - CHECK_RESULT(gVulkan.vkMapMemory(state->vkDevice, state->gpuSharedMemory, tfxOrigin, state->memOutputSize, 0, (void **)&ptr), "vkMapMemory", 0); + CHECK_RESULT(gVulkan.vkMapMemory(state->vkDevice, state->gpuSharedMemory, tfxOrigin, state->memOutputSize, 0, (void **)&ptr), "vkMapMemory", SPACEMESH_API_ERROR); + memcpy(out, ptr, length); gVulkan.vkUnmapMemory(state->vkDevice, state->gpuSharedMemory); out += length; diff --git a/src/vulkan/vulkan-helpers.c b/src/vulkan/vulkan-helpers.c index 6e2153f..16e1906 100644 --- a/src/vulkan/vulkan-helpers.c +++ b/src/vulkan/vulkan-helpers.c @@ -230,7 +230,10 @@ VkDeviceMemory allocateGPUMemory(int index, VkDevice vkDevice, const VkDeviceSi VkBuffer createBuffer(VkDevice vkDevice, uint32_t computeQueueFamilyIndex, VkDeviceMemory memory, VkDeviceSize bufferSize, VkDeviceSize offset) { // 4Gb limit on AMD and Nvidia - if (bufferSize >= 0x100000000) bufferSize = 0xffffffff; + if (bufferSize >= 0x100000000) { + applog(LOG_ERR, "Buffer size too big, setting to 4Gb\n"); + bufferSize = 0xffffffff; + } const VkBufferCreateInfo bufferCreateInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, From b14d22b22be00ef4b21f5980c8f3a98b647acf0f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bartosz=20R=C3=B3=C5=BCa=C5=84ski?= Date: Tue, 11 Apr 2023 10:47:09 +0200 Subject: [PATCH 3/3] Add error handling for creating buffers --- src/vulkan/driver-vulkan.cpp | 42 +++++++++++++++++++++++++++++++++++- 1 file changed, 41 insertions(+), 1 deletion(-) diff --git a/src/vulkan/driver-vulkan.cpp b/src/vulkan/driver-vulkan.cpp index 9f7cec3..0a8a112 100644 --- a/src/vulkan/driver-vulkan.cpp +++ b/src/vulkan/driver-vulkan.cpp @@ -168,26 +168,66 @@ static _vulkanState *initVulkan(struct cgpu_info *cgpu, char *name, size_t nameS state->sharedMemorySize = state->memConstantSize + state->memParamsSize + state->memInputSize + 2 * state->memOutputSize; state->gpuLocalMemory = allocateGPUMemory(state->deviceId, state->vkDevice, state->bufSize, true, true); + if (state->gpuLocalMemory == NULL) { + applog(LOG_ERR, "GPU %d: Failed to allocate local memory", cgpu->driver_id); + return NULL; + } state->gpuSharedMemory = allocateGPUMemory(state->deviceId, state->vkDevice, state->sharedMemorySize, false, true); + if (state->gpuSharedMemory == NULL) { + applog(LOG_ERR, "GPU %d: Failed to allocate shared memory", cgpu->driver_id); + return NULL; + } state->padbuffer8 = createBuffer(state->vkDevice, computeQueueFamilyIndex, state->gpuLocalMemory, state->bufSize, 0); + if (state->padbuffer8 == NULL) { + applog(LOG_ERR, "GPU %d: Failed to create padbuffer8", cgpu->driver_id); + return NULL; + } uint64_t o = 0; state->gpu_constants = createBuffer(state->vkDevice, computeQueueFamilyIndex, state->gpuSharedMemory, state->memConstantSize, o); + if (state->gpu_constants == NULL) { + applog(LOG_ERR, "GPU %d: Failed to create gpu constants buffer", cgpu->driver_id); + return NULL; + } + o += state->memConstantSize; state->gpu_params = createBuffer(state->vkDevice, computeQueueFamilyIndex, state->gpuSharedMemory, state->memParamsSize, o); + if (state->gpu_params == NULL) { + applog(LOG_ERR, "GPU %d: Failed to create gpu params buffer", cgpu->driver_id); + return NULL; + } + o += state->memParamsSize; state->CLbuffer0 = createBuffer(state->vkDevice, computeQueueFamilyIndex, state->gpuSharedMemory, state->memInputSize, o); + if (state->CLbuffer0 == NULL) { + applog(LOG_ERR, "GPU %d: Failed to create CLbuffer0 buffer", cgpu->driver_id); + return NULL; + } + o += state->memInputSize; state->outputBuffer[0] = createBuffer(state->vkDevice, computeQueueFamilyIndex, state->gpuSharedMemory, state->memOutputSize, o); + if (state->outputBuffer[0] == NULL) { + applog(LOG_ERR, "GPU %d: Failed to create output[0] buffer", cgpu->driver_id); + return NULL; + } + o += state->memOutputSize; state->outputBuffer[1] = createBuffer(state->vkDevice, computeQueueFamilyIndex, state->gpuSharedMemory, state->memOutputSize, o); - + if (state->outputBuffer[1] == NULL) { + applog(LOG_ERR, "GPU %d: Failed to create output[1] buffer", cgpu->driver_id); + return NULL; + } + gVulkan.vkGetDeviceQueue(state->vkDevice, computeQueueFamilyIndex, 0, &state->queue); state->pipelineLayout = bindBuffers(state->vkDevice, &state->descriptorSet, &state->descriptorPool, &state->descriptorSetLayout, state->padbuffer8, state->gpu_constants, state->gpu_params, state->CLbuffer0, state->outputBuffer[0], state->outputBuffer[1] ); + if (state->pipelineLayout == NULL) { + applog(LOG_ERR, "GPU %d: Failed to bind buffers and create pipeline layout", cgpu->driver_id); + return NULL; + } void *ptr = NULL; CHECK_RESULT(gVulkan.vkMapMemory(state->vkDevice, state->gpuSharedMemory, 0, state->memConstantSize, 0, (void **)&ptr), "vkMapMemory", NULL);