Description
Is there an existing issue for this problem?
- I have searched the existing issues
Operating system
Linux
GPU vendor
AMD (ROCm)
GPU model
RX 7800 XT
GPU VRAM
16 GB
Version number
5.10.1
Browser
Firefox 138.0 (64-bit)
Python dependencies
{
"version": "5.10.1",
"dependencies": {
"accelerate" : "1.6.0" ,
"compel" : "2.0.2" ,
"cuda" : null ,
"diffusers" : "0.33.0" ,
"numpy" : "1.26.3" ,
"opencv" : "4.9.0.80" ,
"onnx" : "1.16.1" ,
"pillow" : "11.0.0" ,
"python" : "3.12.9" ,
"torch" : "2.6.0+rocm6.2.4" ,
"torchvision" : "0.21.0+rocm6.2.4",
"transformers": "4.51.3" ,
"xformers" : null
},
"config": {
"schema_version": "4.0.2",
"legacy_models_yaml_path": null,
"host": "127.0.0.1",
"port": 9090,
"allow_origins": ["localhost"],
"allow_credentials": true,
"allow_methods": [""],
"allow_headers": [""],
"ssl_certfile": null,
"ssl_keyfile": null,
"log_tokenization": false,
"patchmatch": false,
"models_dir": "/usr/share/ai-models/nsfw",
"convert_cache_dir": "models/.convert_cache",
"download_cache_dir": "models/.download_cache",
"legacy_conf_dir": "configs",
"db_dir": "nsfw/databases",
"outputs_dir": "nsfw/outputs",
"custom_nodes_dir": "nodes",
"style_presets_dir": "style_presets",
"workflow_thumbnails_dir": "workflow_thumbnails",
"log_handlers": ["console"],
"log_format": "color",
"log_level": "info",
"log_sql": false,
"log_level_network": "warning",
"use_memory_db": false,
"dev_reload": false,
"profile_graphs": false,
"profile_prefix": null,
"profiles_dir": "profiles",
"max_cache_ram_gb": null,
"max_cache_vram_gb": null,
"log_memory_usage": false,
"device_working_mem_gb": 3,
"enable_partial_loading": false,
"keep_ram_copy_of_weights": true,
"ram": null,
"vram": null,
"lazy_offload": true,
"pytorch_cuda_alloc_conf": null,
"device": "auto",
"precision": "float16",
"sequential_guidance": true,
"attention_type": "auto",
"attention_slice_size": "auto",
"force_tiled_decode": false,
"pil_compress_level": 1,
"max_queue_size": 100,
"clear_queue_on_startup": true,
"allow_nodes": null,
"deny_nodes": null,
"node_cache_size": 512,
"hashing_algorithm": "blake3_single",
"remote_api_tokens": null,
"scan_models_on_startup": false
},
"set_config_fields": [
"precision" , "allow_origins" , "db_dir" , "outputs_dir" ,
"max_queue_size" , "models_dir" , "clear_queue_on_startup" , "legacy_models_yaml_path",
"sequential_guidance" , "patchmatch"
]
}
What happened
When generating an 1024x1024 SDXL1 image for the first time, the latents to image step (l2i) takes three and a half minutes. In subsequent runs with only he seed changed l2i takes roughly half a second. Changing the resolution results in one slow generation with subsequent ones being quick again.
The workflow editor as well as the queue item JSON show the l2i node not using the cache even if its behavior indicates that something is cached.
While the l2i node is running, CPU and GPU utilization are at 100%2, RAM is at at 11GB / 32GB and VRAM is at 9.5GB / 16GB.
Canceling the generation while l2i is running only succeeds visually, i.e. on the canvas the little icon showing the active and pending count decrements and disappears if it hits 0, and the queue item list shows the item as canceled. However, CPU and GPU utilization remain high. If the server is left running, eventually utilization drops, the graph stats are printed to the console and the image appears on the canvas or in the gallery. This might be an unrelated issue that is only noticeable when l2i takes this long.
While l2i is running, sending SIGINT to the server by pressing Ctrl+C in the terminal prints the usual stack trace:
^C[2025-04-30 14:04:11,359]::[ModelInstallService]::INFO --> Installer thread 134159527483072 exiting
Traceback (most recent call last):
File "/opt/InvokeAI/.venv/bin/invokeai-web", line 12, in <module>
sys.exit(run_app())
^^^^^^^^^
File "/opt/InvokeAI/.venv/lib/python3.12/site-packages/invokeai/app/run_app.py", line 92, in run_app
loop.run_until_complete(server.serve())
File "/home/mmaag/.local/share/uv/python/cpython-3.12.9-linux-x86_64-gnu/lib/python3.12/asyncio/base_events.py", line 678, in run_until_complete
self.run_forever()
File "/home/mmaag/.local/share/uv/python/cpython-3.12.9-linux-x86_64-gnu/lib/python3.12/asyncio/base_events.py", line 645, in run_forever
self._run_once()
File "/home/mmaag/.local/share/uv/python/cpython-3.12.9-linux-x86_64-gnu/lib/python3.12/asyncio/base_events.py", line 1999, in _run_once
handle._run()
File "/home/mmaag/.local/share/uv/python/cpython-3.12.9-linux-x86_64-gnu/lib/python3.12/asyncio/events.py", line 88, in _run
self._context.run(self._callback, *self._args)
File "/opt/InvokeAI/.venv/lib/python3.12/site-packages/uvicorn/server.py", line 69, in serve
with self.capture_signals():
^^^^^^^^^^^^^^^^^^^^^^
File "/home/mmaag/.local/share/uv/python/cpython-3.12.9-linux-x86_64-gnu/lib/python3.12/contextlib.py", line 144, in __exit__
next(self.gen)
File "/opt/InvokeAI/.venv/lib/python3.12/site-packages/uvicorn/server.py", line 330, in capture_signals
signal.raise_signal(captured_signal)
KeyboardInterrupt
However, CPU and GPU utilization remain high and the process does not exit. Sending SIGINT again finally terminates the process with the message:
^CException ignored in: <module 'threading' from '/home/mmaag/.local/share/uv/python/cpython-3.12.9-linux-x86_64-gnu/lib/python3.12/threading.py'>
Traceback (most recent call last):
File "/home/mmaag/.local/share/uv/python/cpython-3.12.9-linux-x86_64-gnu/lib/python3.12/threading.py", line 1624, in _shutdown
lock.acquire()
KeyboardInterrupt:
/home/mmaag/.local/share/uv/python/cpython-3.12.9-linux-x86_64-gnu/lib/python3.12/tempfile.py:936: ResourceWarning: Implicitly cleaning up <TemporaryDirectory '/tmp/tmppvbrln3n'>
_warnings.warn(warn_message, ResourceWarning)
This too might be an unrelated issue.
What you expected to happen
l2i does not take three and a half minutes. In v.5.9.1 this step took around 20 seconds for a new resolution.
How to reproduce the problem
- Open the canvas.
- Enter settings to generate any image you like
- Select a resolution that you have not used before
Alternatively install a fresh copy Invoke if you want to use one of the standard resolutions.
Additional context
The issue does not exists in v5.9.1. It is present in v5.10.0 and v5.10.1. I initially noticed the issue after upgrading an existing installation but the issue is also present with a fresh installation.
I made sure Invoke is detecting my GPU. It is correctly prints AMD Radeon Graphics as the used Torch Device, GPU utilization is high when invoking, and denoising doesn't take forever.
I made sure Invoke is actually running the same node graph. The issue also happens when using the workflow editor. In addition, diffing the queue item JSONS only shows different node IDs and seeds but no difference in structure.
Stats for a quick run:
Node Calls Seconds VRAM Used
sdxl_model_loader 1 0.000s 0.325G
sdxl_compel_prompt 2 0.005s 0.325G
collect 2 0.000s 0.325G
noise 1 0.003s 0.325G
denoise_latents 1 31.860s 5.366G
core_metadata 1 0.000s 5.209G
l2i 1 0.630s 8.217G
Stats for a slow run:
Node Calls Seconds VRAM Used
sdxl_model_loader 1 0.006s 0.000G
sdxl_compel_prompt 2 2.196s 1.569G
collect 2 0.001s 1.561G
noise 1 0.003s 1.561G
denoise_latents 1 33.674s 6.598G
core_metadata 1 0.000s 6.441G
l2i 1 212.695s 12.107G
Stats for a 'slow' run in v.5.9.1:
Node Calls Seconds VRAM Used
sdxl_model_loader 1 0.000s 6.567G
sdxl_compel_prompt 2 0.008s 6.567G
collect 2 0.001s 6.567G
noise 1 0.002s 6.567G
denoise_latents 1 18.406s 7.291G
core_metadata 1 0.000s 6.567G
l2i 1 20.318s 12.457G
Discord username
No response