Skip to content

Commit 9c931d0

Browse files
authored
update gpt-oss app (#2128)
1 parent 227d200 commit 9c931d0

File tree

3 files changed

+15
-16
lines changed

3 files changed

+15
-16
lines changed

examples/transformers/inference/gpt-oss/app_multiprocess.py

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -22,10 +22,8 @@
2222
torch_dtype="auto",
2323
)
2424

25-
system_prompt = "You are a helpful and friendly chatbot"
26-
2725
def build_input_from_chat_history(chat_history, msg: str):
28-
messages = [{'role': 'system', 'content': system_prompt}]
26+
messages = [{"role": "system", "content": "You are a helpful and friendly chatbot"}]
2927
for user_msg, ai_msg in chat_history:
3028
messages.append({'role': 'user', 'content': user_msg})
3129
messages.append({'role': 'assistant', 'content': ai_msg})
@@ -37,20 +35,22 @@ def predict(message, history):
3735
dist.barrier()
3836
# Formatting the input for the model.
3937
messages = build_input_from_chat_history(history, message)
40-
input_ids = tokenizer.apply_chat_template(
41-
messages,
42-
add_generation_prompt=True,
43-
return_tensors="pt",
44-
tokenize=True
45-
)
46-
input_len = core.tensor(input_ids.shape[1])
38+
inputs = tokenizer.apply_chat_template(
39+
messages,
40+
add_generation_prompt=True,
41+
return_tensors="pt",
42+
return_dict=True,
43+
).to(model.device)
44+
45+
input_len = core.tensor(inputs['input_ids'].shape[1])
4746
dist.broadcast(input_len, 0)
4847
dist.barrier()
4948
streamer = TextIteratorStreamer(tokenizer, timeout=1200, skip_prompt=True, skip_special_tokens=True)
5049
generate_kwargs = dict(
51-
input_ids=input_ids.to('npu'),
50+
**inputs,
5251
streamer=streamer,
5352
max_new_tokens=1024,
53+
temperature=0.7
5454
)
5555
t = Thread(target=model.generate, kwargs=generate_kwargs)
5656
t.start() # Starting the generation in a separate thread.

mindnlp/__init__.py

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -35,17 +35,16 @@
3535
from mindspore._c_expression import disable_multi_thread
3636
except:
3737
disable_multi_thread = None
38-
# for different ascend devices
39-
context.set_context(device_target='CPU')
4038

39+
# for different ascend devices
4140
if platform.system().lower() == 'linux':
4241
SOC = MSContext.get_instance().get_ascend_soc_version()
4342
if ('910b' not in SOC and '310' not in SOC) or version.parse(mindspore.__version__) < version.parse('2.4.0'):
4443
os.environ["MS_ALLOC_CONF"] = 'enable_vmm:True,vmm_align_size:2MB'
4544

4645
if SOC in ('ascend910', 'ascend310b'):
47-
context.set_context(ascend_config={"precision_mode": "allow_mix_precision"})
48-
46+
# context.set_context(ascend_config={"precision_mode": "allow_mix_precision"})
47+
mindspore.device_context.ascend.op_precision.precision_mode('allow_mix_precision')
4948
if SOC == 'ascend310b' and disable_multi_thread is not None:
5049
disable_multi_thread()
5150

mindnlp/transformers/tokenization_utils.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
def apply_chat_template_wrapper(fn):
22
def wrapper(*args, **kwargs):
3-
return_tensors = kwargs.pop('return_tensors', None)
3+
return_tensors = kwargs.get('return_tensors', None)
44
if return_tensors is not None and return_tensors == 'ms':
55
kwargs['return_tensors'] = 'pt'
66
return fn(*args, **kwargs)

0 commit comments

Comments
 (0)