File tree Expand file tree Collapse file tree 1 file changed +5
-5
lines changed
vllm/model_executor/models Expand file tree Collapse file tree 1 file changed +5
-5
lines changed Original file line number Diff line number Diff line change @@ -538,20 +538,20 @@ def _prepare_current_run_mamba_cache(
538
538
539
539
def copy_inputs_before_cuda_graphs (self , input_buffers , ** kwargs ):
540
540
"""
541
- Copy the relevant Mamba cache into the CUDA graph input buffer
542
- that was provided during the capture runs
543
- (MambaForCausalLM.mamba_gc_cache_buffer).
541
+ Copy the relevant Mamba cache into the CUDA graph input buffer
542
+ that was provided during the capture runs
543
+ (MambaForCausalLM.mamba_gc_cache_buffer).
544
544
"""
545
545
assert all (
546
546
key in kwargs
547
547
for key in ["request_ids_to_seq_ids" , "finished_requests_ids" ])
548
548
request_ids_to_seq_ids = kwargs ["request_ids_to_seq_ids" ]
549
- batch_size = len ( request_ids_to_seq_ids )
549
+ cg_batch_size = input_buffers [ 'input_ids' ]. shape [ 0 ]
550
550
(
551
551
current_mamba_cache ,
552
552
indices ,
553
553
) = self ._prepare_current_run_mamba_cache (request_ids_to_seq_ids ,
554
- batch_size )
554
+ cg_batch_size )
555
555
self .current_indices = indices
556
556
finished_requests_ids = kwargs ["finished_requests_ids" ]
557
557
self ._release_mamba_cache (finished_requests_ids )
You can’t perform that action at this time.
0 commit comments