Skip to content

Commit b733a84

Browse files
committed
format
1 parent adb6713 commit b733a84

File tree

3 files changed

+7
-5
lines changed

3 files changed

+7
-5
lines changed

vllm/attention/backends/placeholder_attn.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,10 @@
11
from dataclasses import dataclass
2-
from typing import (List, Optional, Tuple, Type)
2+
from typing import List, Optional, Tuple, Type
3+
4+
import torch
5+
36
from vllm.attention.backends.abstract import (AttentionBackend, AttentionImpl,
47
AttentionMetadata)
5-
import torch
68

79
# Placeholder attention backend for models like Mamba that don't have attention.
810
# Mainly exists to sidestep get_attn_backend.

vllm/engine/llm_engine.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -261,7 +261,7 @@ def __init__(
261261

262262
if not self.model_config.embedding_mode:
263263
# For all decoders including attention-free models like mamba,
264-
# this must call _initialize_kv_caches, as this is where model
264+
# this must call _initialize_kv_caches, as this is where model
265265
# warmup and CUDA graphs creation happens.
266266
self._initialize_kv_caches()
267267

vllm/worker/model_runner.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,8 +23,8 @@
2323
FLASHINFER_WORKSPACE_BUFFER_SIZE = 0
2424

2525
from vllm.attention import AttentionMetadata, get_attn_backend
26-
from vllm.attention.backends.placeholder_attn import PlaceholderAttentionBackend
27-
26+
from vllm.attention.backends.placeholder_attn import (
27+
PlaceholderAttentionBackend)
2828
from vllm.config import (CacheConfig, DeviceConfig, LoadConfig, LoRAConfig,
2929
ModelConfig, MultiModalConfig, ParallelConfig,
3030
PromptAdapterConfig, SchedulerConfig)

0 commit comments

Comments
 (0)