We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent ebc12f1 commit ac60374Copy full SHA for ac60374
examples/offline_inference.py
@@ -1,5 +1,4 @@
1
from vllm import LLM, SamplingParams
2
-import torch
3
4
# Sample prompts.
5
prompts = [
@@ -9,12 +8,10 @@
9
8
"The future of AI is",
10
]
11
# Create a sampling params object.
12
-sampling_params = SamplingParams(temperature=0.0, top_p=0.95)
+sampling_params = SamplingParams(temperature=0.8, top_p=0.95)
13
14
# Create an LLM.
15
-#llm = LLM(model="state-spaces/mamba-370m-hf", dtype=torch.float32)
16
-llm = LLM(model="state-spaces/mamba2-130m", dtype=torch.float32)
17
-
+llm = LLM(model="facebook/opt-125m")
18
# Generate texts from the prompts. The output is a list of RequestOutput objects
19
# that contain the prompt, generated text, and other information.
20
outputs = llm.generate(prompts, sampling_params)
0 commit comments