GPU: Refcount Vulkan allocations to fix transfer corruption on defrag by thatcosmonaut · Pull Request #15127 · libsdl-org/SDL

thatcosmonaut · 2026-02-27T19:30:47Z

Attempting to resolve #15123

The problem is that defrag can execute on allocations that currently have pending transfer operations. Furthermore, in a multi-threaded situation it's possible to queue up a transfer on a resource whose block is currently being defragmented. The fix is to refcount those allocations on transfer operations, and then only defrag blocks that don't have any references. Additionally we perform some mutex locks to prevent transfers on blocks that are currently being restructured for defrag.

This should probably be stress tested a bit on some real apps because the defrag function is very sensitive.

Running this test program demonstrates that the patch resolves the issue in the single-threaded case.

#include <SDL3/SDL.h>

#define BUFFER_SIZE 4096
#define INIT_PATTERN 0x55
#define TEST_PATTERN 0xAA

static SDL_GPUBuffer* create_gpu_buffer(SDL_GPUDevice *device, Uint32 size) {
    SDL_GPUBufferCreateInfo info = {
        SDL_GPU_BUFFERUSAGE_VERTEX,
        size,
    };
    return SDL_CreateGPUBuffer(device, &info);
}

static SDL_GPUTransferBuffer* create_upload_buffer(SDL_GPUDevice *device, Uint32 size) {
    SDL_GPUTransferBufferCreateInfo info = {
        SDL_GPU_TRANSFERBUFFERUSAGE_UPLOAD,
        size,
    };
    return SDL_CreateGPUTransferBuffer(device, &info);
}

static SDL_GPUTransferBuffer* create_download_buffer(SDL_GPUDevice *device, Uint32 size) {
    SDL_GPUTransferBufferCreateInfo info = {
        SDL_GPU_TRANSFERBUFFERUSAGE_DOWNLOAD,
        size,
    };
    return SDL_CreateGPUTransferBuffer(device, &info);
}

static SDL_GPUCommandBuffer* record_upload(SDL_GPUDevice *device,
                                           SDL_GPUTransferBuffer *src,
                                           SDL_GPUBuffer *dst,
                                           Uint32 size) {
    SDL_GPUCommandBuffer *cmd = SDL_AcquireGPUCommandBuffer(device);
    SDL_GPUCopyPass *pass = SDL_BeginGPUCopyPass(cmd);
    SDL_GPUTransferBufferLocation srcLoc = { src };
    SDL_GPUBufferRegion dstRegion = { dst, 0, size };
    SDL_UploadToGPUBuffer(pass, &srcLoc, &dstRegion, false);
    SDL_EndGPUCopyPass(pass);
    return cmd;
}

static SDL_GPUCommandBuffer* record_download(SDL_GPUDevice *device,
                                             SDL_GPUBuffer *src,
                                             SDL_GPUTransferBuffer *dst,
                                             Uint32 size) {
    SDL_GPUCommandBuffer *cmd = SDL_AcquireGPUCommandBuffer(device);
    SDL_GPUCopyPass *pass = SDL_BeginGPUCopyPass(cmd);
    SDL_GPUBufferRegion srcRegion = { src, 0, size };
    SDL_GPUTransferBufferLocation dstLoc = { dst, };
    SDL_DownloadFromGPUBuffer(pass, &srcRegion, &dstLoc);
    SDL_EndGPUCopyPass(pass);
    return cmd;
}

static void submit_and_wait(SDL_GPUDevice *device, SDL_GPUCommandBuffer *cmd) {
    SDL_SubmitGPUCommandBuffer(cmd);
    SDL_WaitForGPUIdle(device);
}

static void fill_transfer_buffer(SDL_GPUDevice *device, SDL_GPUTransferBuffer *buffer,
                                 unsigned char pattern, Uint32 size) {
    void *data = SDL_MapGPUTransferBuffer(device, buffer, false);
    SDL_memset(data, pattern, size);
    SDL_UnmapGPUTransferBuffer(device, buffer);
}

static void read_and_print_gpu_buffer(SDL_GPUDevice *device,
                                      SDL_GPUBuffer *gpuBuffer,
                                      SDL_GPUTransferBuffer *downloadBuffer,
                                      Uint32 size,
                                      const char *label) {
    submit_and_wait(device, record_download(device, gpuBuffer, downloadBuffer, size));
    unsigned char *data = (unsigned char *)SDL_MapGPUTransferBuffer(device, downloadBuffer, false);
    SDL_Log("%s: [", label);
    for (int i = 0; i < 16 && i < (int)size; i++) {
        SDL_Log("%02X ", data[i]);
    }
    SDL_Log("...]\n");
    SDL_UnmapGPUTransferBuffer(device, downloadBuffer);
}

#define NUM_FRAG_BUFFERS 14 // Fill ~14MB of the 16MB page
static const size_t FRAG_BUFFER_SIZE = 1 * 1024 * 1024;  // 1 MB each
static const size_t TRIGGER_BUFFER_SIZE = 2 * 1024 * 1024;  // 2 MB
static SDL_GPUBuffer* TEMP_BUFFERS[NUM_FRAG_BUFFERS];

static void trigger_defragmentation(SDL_GPUDevice *device) {
    SDL_Log("Allocating %d x %zu KB buffers...\n", NUM_FRAG_BUFFERS, FRAG_BUFFER_SIZE / 1024);
    for (int i = 0; i < NUM_FRAG_BUFFERS; i++) {
        TEMP_BUFFERS[i] = create_gpu_buffer(device, FRAG_BUFFER_SIZE);
    }
    SDL_Log("Created %d buffers\n", NUM_FRAG_BUFFERS);

    SDL_Log("Releasing alternating buffers to create fragmentation...\n");
    for (size_t i = 0; i < NUM_FRAG_BUFFERS; i += 2) {
        SDL_ReleaseGPUBuffer(device, TEMP_BUFFERS[i]);
        TEMP_BUFFERS[i] = NULL;
    }

    submit_and_wait(device, SDL_AcquireGPUCommandBuffer(device));

    SDL_Log("Allocating %zu KB trigger buffer (won't fit in 1MB holes)...\n", TRIGGER_BUFFER_SIZE / 1024);
    SDL_GPUBuffer *triggerBuffer = create_gpu_buffer(device, TRIGGER_BUFFER_SIZE);

    SDL_Log("Submitting empty command buffer to execute defrag...\n");
    submit_and_wait(device, SDL_AcquireGPUCommandBuffer(device));
    SDL_Log("Empty command buffer completed (defrag should have run)\n");

    for (size_t i = 0; i < NUM_FRAG_BUFFERS; i += 1) {
        if (TEMP_BUFFERS[i]) {
            SDL_ReleaseGPUBuffer(device, TEMP_BUFFERS[i]);
        }
    }
    SDL_ReleaseGPUBuffer(device, triggerBuffer);
}

int main() {
    SDL_SetHint(SDL_HINT_VIDEO_DRIVER, "offscreen");
    SDL_Init(SDL_INIT_VIDEO);

    SDL_GPUDevice *device = SDL_CreateGPUDevice(
        SDL_GPU_SHADERFORMAT_SPIRV,
        true,
        NULL
    );

    SDL_Log("GPU device created successfully\n");

    SDL_GPUBuffer *targetBuffer = create_gpu_buffer(device, BUFFER_SIZE);

    SDL_GPUTransferBuffer *uploadBuffer = create_upload_buffer(device, BUFFER_SIZE);
    SDL_GPUTransferBuffer *downloadBuffer = create_download_buffer(device, BUFFER_SIZE);

    SDL_Log("Buffers created\n");

    SDL_Log("\n[Step 1] Initialize buffer with 0x%02X\n", INIT_PATTERN);
    fill_transfer_buffer(device, uploadBuffer, INIT_PATTERN, BUFFER_SIZE);
    submit_and_wait(device, record_upload(device, uploadBuffer, targetBuffer, BUFFER_SIZE));
    read_and_print_gpu_buffer(device, targetBuffer, downloadBuffer, BUFFER_SIZE,
                              "  Buffer contents");

    SDL_Log("\n[Step 2] Record cmdDeferredWrite to write 0x%02X (but don't submit yet)\n", TEST_PATTERN);
    fill_transfer_buffer(device, uploadBuffer, TEST_PATTERN, BUFFER_SIZE);
    SDL_GPUCommandBuffer *cmdDeferredWrite = record_upload(device, uploadBuffer, targetBuffer, BUFFER_SIZE);
    read_and_print_gpu_buffer(device, targetBuffer, downloadBuffer, BUFFER_SIZE,
                              "  Buffer contents (cmdDeferredWrite not yet submitted)");

    SDL_Log("\n[Step 3] Trigger defragmentation (cmdDeferredWrite still pending)\n");
    trigger_defragmentation(device);
    read_and_print_gpu_buffer(device, targetBuffer, downloadBuffer, BUFFER_SIZE,
                              "  Buffer contents (after defrag)");

    SDL_Log("\n[Step 4] Submit cmdDeferredWrite (AFTER defrag has run)\n");
    submit_and_wait(device, cmdDeferredWrite);
    read_and_print_gpu_buffer(device, targetBuffer, downloadBuffer, BUFFER_SIZE,
                              "  Buffer contents (after cmdDeferredWrite)");

    submit_and_wait(device, record_download(device, targetBuffer, downloadBuffer, BUFFER_SIZE));
    unsigned char *bytes = (unsigned char *)SDL_MapGPUTransferBuffer(device, downloadBuffer, false);
    bool bug_detected = (bytes[0] == INIT_PATTERN);
    SDL_UnmapGPUTransferBuffer(device, downloadBuffer);

    SDL_Log("\n=== RESULT ===\n");
    if (bug_detected) {
        SDL_Log("BUG DETECTED: Buffer contains 0x%02X, expected 0x%02X\n", INIT_PATTERN, TEST_PATTERN);
        SDL_Log("The deferred write went to the OLD buffer location (before defrag moved it).\n");
    } else {
        SDL_Log("TEST PASSED: Buffer contains 0x%02X as expected.\n", TEST_PATTERN);
    }

    SDL_ReleaseGPUBuffer(device, targetBuffer);
    SDL_ReleaseGPUTransferBuffer(device, uploadBuffer);
    SDL_ReleaseGPUTransferBuffer(device, downloadBuffer);
    SDL_DestroyGPUDevice(device);
    SDL_Quit();

    return bug_detected ? 1 : 0;
}