Skip to content

Commit 29f34ec

Browse files
authored
Merge branch 'main' into iree-turbine
2 parents 6e8527e + 81d6e05 commit 29f34ec

File tree

3 files changed

+19
-8
lines changed

3 files changed

+19
-8
lines changed

apps/shark_studio/api/llm.py

Lines changed: 14 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
from transformers import AutoTokenizer, AutoModelForCausalLM
1414

1515
llm_model_map = {
16-
"llama2_7b": {
16+
"meta-llama/Llama-2-7b-chat-hf": {
1717
"initializer": stateless_llama.export_transformer_model,
1818
"hf_model_name": "meta-llama/Llama-2-7b-chat-hf",
1919
"compile_flags": ["--iree-opt-const-expr-hoisting=False"],
@@ -258,7 +258,8 @@ def format_out(results):
258258

259259
history.append(format_out(token))
260260
while (
261-
format_out(token) != llm_model_map["llama2_7b"]["stop_token"]
261+
format_out(token)
262+
!= llm_model_map["meta-llama/Llama-2-7b-chat-hf"]["stop_token"]
262263
and len(history) < self.max_tokens
263264
):
264265
dec_time = time.time()
@@ -272,7 +273,10 @@ def format_out(results):
272273

273274
self.prev_token_len = token_len + len(history)
274275

275-
if format_out(token) == llm_model_map["llama2_7b"]["stop_token"]:
276+
if (
277+
format_out(token)
278+
== llm_model_map["meta-llama/Llama-2-7b-chat-hf"]["stop_token"]
279+
):
276280
break
277281

278282
for i in range(len(history)):
@@ -306,7 +310,7 @@ def chat_hf(self, prompt):
306310
self.first_input = False
307311

308312
history.append(int(token))
309-
while token != llm_model_map["llama2_7b"]["stop_token"]:
313+
while token != llm_model_map["meta-llama/Llama-2-7b-chat-hf"]["stop_token"]:
310314
dec_time = time.time()
311315
result = self.hf_mod(token.reshape([1, 1]), past_key_values=pkv)
312316
history.append(int(token))
@@ -317,7 +321,7 @@ def chat_hf(self, prompt):
317321

318322
self.prev_token_len = token_len + len(history)
319323

320-
if token == llm_model_map["llama2_7b"]["stop_token"]:
324+
if token == llm_model_map["meta-llama/Llama-2-7b-chat-hf"]["stop_token"]:
321325
break
322326
for i in range(len(history)):
323327
if type(history[i]) != int:
@@ -347,7 +351,11 @@ def llm_chat_api(InputData: dict):
347351
else:
348352
print(f"prompt : {InputData['prompt']}")
349353

350-
model_name = InputData["model"] if "model" in InputData.keys() else "llama2_7b"
354+
model_name = (
355+
InputData["model"]
356+
if "model" in InputData.keys()
357+
else "meta-llama/Llama-2-7b-chat-hf"
358+
)
351359
model_path = llm_model_map[model_name]
352360
device = InputData["device"] if "device" in InputData.keys() else "cpu"
353361
precision = "fp16"

apps/shark_studio/web/ui/chat.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
llm_model_map,
1010
LanguageModel,
1111
)
12+
from apps.shark_studio.modules.shared_cmd_opts import cmd_opts
1213
import apps.shark_studio.web.utils.globals as global_obj
1314

1415
B_SYS, E_SYS = "<s>", "</s>"
@@ -64,6 +65,7 @@ def chat_fn(
6465
external_weights="safetensors",
6566
use_system_prompt=prompt_prefix,
6667
streaming_llm=streaming_llm,
68+
hf_auth_token=cmd_opts.hf_auth_token,
6769
)
6870
history[-1][-1] = "Getting the model ready... Done"
6971
yield history, ""

requirements.txt

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,8 @@
55
setuptools
66
wheel
77

8-
torch==2.3.0.dev20240305
9-
shark-turbine @ git+https://github.com/nod-ai/SHARK-Turbine.git@main#subdirectory=core
8+
torch==2.3.0
9+
shark-turbine @ git+https://github.com/iree-org/iree-turbine.git@main
1010
turbine-models @ git+https://github.com/nod-ai/SHARK-Turbine.git@main#subdirectory=models
1111

1212
# SHARK Runner
@@ -35,6 +35,7 @@ safetensors==0.3.1
3535
py-cpuinfo
3636
pydantic==2.4.1 # pin until pyinstaller-hooks-contrib works with beta versions
3737
mpmath==1.3.0
38+
optimum
3839

3940
# Keep PyInstaller at the end. Sometimes Windows Defender flags it but most folks can continue even if it errors
4041
pefile

0 commit comments

Comments
 (0)