Skip to content

Commit f6d7aad

Browse files
committed
[Feat] Handle case where the tokenizer is not initialised
Signed-off-by: Michele Gazzetti <michele.gazzetti1@ibm.com>
1 parent ea01dbd commit f6d7aad

File tree

5 files changed

+38
-11
lines changed

5 files changed

+38
-11
lines changed

vllm/entrypoints/openai/protocol.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1109,7 +1109,7 @@ class EmbeddingChatRequest(OpenAIBaseModel):
11091109
model: Optional[str] = None
11101110
messages: list[ChatCompletionMessageParam]
11111111

1112-
encoding_format: Literal["float", "base64"] = "float"
1112+
encoding_format: Literal["float", "base64", "tensors"] = "float"
11131113
dimensions: Optional[int] = None
11141114
user: Optional[str] = None
11151115
truncate_prompt_tokens: Optional[Annotated[int, Field(ge=-1)]] = None

vllm/entrypoints/openai/serving_engine.py

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -807,6 +807,8 @@ async def _preprocess_chat(
807807
messages=messages,
808808
**_chat_template_kwargs,
809809
)
810+
elif tokenizer is None:
811+
request_prompt = "placeholder"
810812
else:
811813
request_prompt = apply_hf_chat_template(
812814
tokenizer=tokenizer,
@@ -831,7 +833,17 @@ async def _preprocess_chat(
831833
request = tool_parser(tokenizer).adjust_request( # type: ignore
832834
request=request)
833835

834-
if isinstance(request_prompt, str):
836+
if tokenizer is None:
837+
prompt_inputs = {}
838+
if "prompt_token_ids" not in request.additional_data:
839+
raise Exception("Request must contain "
840+
"additional_data['prompt_token_ids'] "
841+
"when the tokenizer is not initialised")
842+
843+
prompt_inputs["prompt_token_ids"] = request.additional_data[
844+
"prompt_token_ids"]
845+
846+
elif isinstance(request_prompt, str):
835847
prompt_inputs = await self._tokenize_prompt_input_async(
836848
request,
837849
tokenizer,

vllm/entrypoints/openai/serving_pooling.py

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
from vllm.entrypoints.openai.serving_models import OpenAIServingModels
2626
from vllm.entrypoints.utils import _validate_truncation_size
2727
from vllm.logger import init_logger
28+
from vllm.multimodal.image import ImageEmbeddingMediaIO
2829
from vllm.outputs import PoolingOutput, PoolingRequestOutput
2930
from vllm.utils import merge_async_iterators
3031

@@ -33,7 +34,7 @@
3334

3435
def _get_data(
3536
output: PoolingOutput,
36-
encoding_format: Literal["float", "base64"],
37+
encoding_format: Literal["float", "base64", "tensors"],
3738
) -> Union[list[float], str]:
3839
if encoding_format == "float":
3940
return output.data.tolist()
@@ -43,6 +44,9 @@ def _get_data(
4344
pt_float32 = output.data.to(dtype=torch.float32)
4445
pooling_bytes = np.array(pt_float32, dtype="float32").tobytes()
4546
return base64.b64encode(pooling_bytes).decode("utf-8")
47+
elif encoding_format == "tensors":
48+
tensor_encoding_io = ImageEmbeddingMediaIO()
49+
tensor_encoding_io.encode_base64(output.data)
4650

4751
assert_never(encoding_format)
4852

@@ -99,7 +103,11 @@ async def create_pooling(
99103
prompt_adapter_request,
100104
) = self._maybe_get_adapters(request)
101105

102-
tokenizer = await self.engine_client.get_tokenizer(lora_request)
106+
if not self.model_config.skip_tokenizer_init:
107+
tokenizer = await self.engine_client.get_tokenizer(lora_request
108+
)
109+
else:
110+
tokenizer = None
103111

104112
if prompt_adapter_request is not None:
105113
raise NotImplementedError("Prompt adapter is not supported "
@@ -205,7 +213,7 @@ def request_output_to_pooling_response(
205213
request_id: str,
206214
created_time: int,
207215
model_name: str,
208-
encoding_format: Literal["float", "base64"],
216+
encoding_format: Literal["float", "base64", "tensors"],
209217
) -> PoolingResponse:
210218
items: list[PoolingResponseData] = []
211219
num_prompt_tokens = 0

vllm/model_executor/models/prithvi_geospatial_mae.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@
4545
class PrithviGeoSpatialMAEProcessingInfo(BaseProcessingInfo):
4646

4747
def get_supported_mm_limits(self) -> Mapping[str, Optional[int]]:
48-
return {"image": None}
48+
return {"image": None,"tensors":2}
4949

5050

5151
class PrithviGeoSpatialMAEInputBuilder(
@@ -101,7 +101,11 @@ def apply(
101101
mm_kwargs = {}
102102

103103
for k, v in mm_data.items():
104-
mm_kwargs[k] = v
104+
if isinstance(v,dict) and k == "tensors":
105+
for tensor_name,tensor in v.items():
106+
mm_kwargs[tensor_name] = tensor
107+
else:
108+
mm_kwargs[k] = v
105109
mm_place_holders = {"image": [PlaceholderRange(offset=0, length=0)]}
106110

107111
multimodal_kwargs_items = [

vllm/v1/engine/async_llm.py

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -103,10 +103,13 @@ def __init__(
103103
)
104104

105105
# Tokenizer (+ ensure liveness if running in another process).
106-
self.tokenizer = init_tokenizer_from_configs(
107-
model_config=vllm_config.model_config,
108-
scheduler_config=vllm_config.scheduler_config,
109-
lora_config=vllm_config.lora_config)
106+
if not self.vllm_config.model_config.skip_tokenizer_init:
107+
self.tokenizer = init_tokenizer_from_configs(
108+
model_config=vllm_config.model_config,
109+
scheduler_config=vllm_config.scheduler_config,
110+
lora_config=vllm_config.lora_config)
111+
else:
112+
self.tokenizer = None
110113

111114
# Processor (converts Inputs --> EngineCoreRequests).
112115
self.processor = Processor(

0 commit comments

Comments
 (0)