2222 torch_dtype = "auto" ,
2323)
2424
25- system_prompt = "You are a helpful and friendly chatbot"
26-
2725def build_input_from_chat_history (chat_history , msg : str ):
28- messages = [{' role' : ' system' , ' content' : system_prompt }]
26+ messages = [{" role" : " system" , " content" : "You are a helpful and friendly chatbot" }]
2927 for user_msg , ai_msg in chat_history :
3028 messages .append ({'role' : 'user' , 'content' : user_msg })
3129 messages .append ({'role' : 'assistant' , 'content' : ai_msg })
@@ -37,20 +35,22 @@ def predict(message, history):
3735 dist .barrier ()
3836 # Formatting the input for the model.
3937 messages = build_input_from_chat_history (history , message )
40- input_ids = tokenizer .apply_chat_template (
41- messages ,
42- add_generation_prompt = True ,
43- return_tensors = "pt" ,
44- tokenize = True
45- )
46- input_len = core .tensor (input_ids .shape [1 ])
38+ inputs = tokenizer .apply_chat_template (
39+ messages ,
40+ add_generation_prompt = True ,
41+ return_tensors = "pt" ,
42+ return_dict = True ,
43+ ).to (model .device )
44+
45+ input_len = core .tensor (inputs ['input_ids' ].shape [1 ])
4746 dist .broadcast (input_len , 0 )
4847 dist .barrier ()
4948 streamer = TextIteratorStreamer (tokenizer , timeout = 1200 , skip_prompt = True , skip_special_tokens = True )
5049 generate_kwargs = dict (
51- input_ids = input_ids . to ( 'npu' ) ,
50+ ** inputs ,
5251 streamer = streamer ,
5352 max_new_tokens = 1024 ,
53+ temperature = 0.7
5454 )
5555 t = Thread (target = model .generate , kwargs = generate_kwargs )
5656 t .start () # Starting the generation in a separate thread.
0 commit comments