Description
(venv) (base) wmx@wmx-ubuntu:/media/wmx/soft1/AI-model/openvino_notebooks$ pip list |grep optimum optimum 1.25.3 optimum-intel 1.23.0 (venv) (base) wmx@wmx-ubuntu:/media/wmx/soft1/AI-model/openvino_notebooks$ (venv) (base) wmx@wmx-ubuntu:/media/wmx/soft1/AI-model/openvino_notebooks$ (venv) (base) wmx@wmx-ubuntu:/media/wmx/soft1/AI-model/openvino_notebooks$ pip list |grep transf transformers 4.51.3 (venv) (base) wmx@wmx-ubuntu:/media/wmx/soft1/AI-model/openvino_notebooks$ pip list |grep open opencv-python-headless 4.11.0.86 openvino 2025.1.0 18503 openvino-telemetry 2025.1.0 openvino-tokenizers 2025.1.0.0
/media/wmx/soft1/AI-model/openvino_notebooks/notebooks/qwen2-audio/qwen2-audio.ipynb
`conversation = [
{"role": "system", "content": "You are a helpful assistant."},
{
"role": "user",
"content": [
{"type": "audio", "audio_url": audio_chat_url},
],
},
]
text = processor.apply_chat_template(conversation, add_generation_prompt=True, tokenize=False)
audios = [librosa.load(audio_chat_file, sr=processor.feature_extractor.sampling_rate)[0]]
inputs = processor(text=text, audios=audios, return_tensors="pt", padding=True)
display(ipd.Audio(audio_chat_file))
print("Answer:")
generate_ids = ov_model.generate(**inputs, max_new_tokens=50, streamer=TextStreamer(processor.tokenizer, skip_prompt=True, skip_special_tokens=True))
output error :
AttributeError Traceback (most recent call last)
Cell In[17], line 18
15 display(ipd.Audio(audio_chat_file))
16 print("Answer:")
---> 18 generate_ids = ov_model.generate(**inputs, max_new_tokens=50, streamer=TextStreamer(processor.tokenizer, skip_prompt=True, skip_special_tokens=True))
File /media/wmx/soft1/AI-model/openvino_notebooks/venv/lib/python3.12/site-packages/torch/utils/_contextlib.py:116, in context_decorator..decorate_context(*args, **kwargs)
113 @functools.wraps(func)
114 def decorate_context(*args, **kwargs):
115 with ctx_factory():
--> 116 return func(*args, **kwargs)
File /media/wmx/soft1/AI-model/openvino_notebooks/venv/lib/python3.12/site-packages/transformers/generation/utils.py:2465, in GenerationMixin.generate(self, inputs, generation_config, logits_processor, stopping_criteria, prefix_allowed_tokens_fn, synced_gpus, assistant_model, streamer, negative_prompt_ids, negative_prompt_attention_mask, use_model_defaults, **kwargs)
2457 input_ids, model_kwargs = self._expand_inputs_for_generation(
2458 input_ids=input_ids,
2459 expand_size=generation_config.num_return_sequences,
2460 is_encoder_decoder=self.config.is_encoder_decoder,
2461 **model_kwargs,
2462 )
2464 # 12. run sample (it degenerates to greedy search when generation_config.do_sample=False
)
-> 2465 result = self._sample(
2466 input_ids,
2467 logits_processor=prepared_logits_processor,
2468 stopping_criteria=prepared_stopping_criteria,
2469 generation_config=generation_config,
2470 synced_gpus=synced_gpus,
2471 streamer=streamer,
2472 **model_kwargs,
2473 )
2475 elif generation_mode in (GenerationMode.BEAM_SAMPLE, GenerationMode.BEAM_SEARCH):
2476 # 11. interleave input_ids with num_beams
additional sequences per batch
2477 input_ids, model_kwargs = self._expand_inputs_for_generation(
2478 input_ids=input_ids,
2479 expand_size=generation_config.num_beams,
2480 is_encoder_decoder=self.config.is_encoder_decoder,
2481 **model_kwargs,
2482 )
File /media/wmx/soft1/AI-model/openvino_notebooks/venv/lib/python3.12/site-packages/transformers/generation/utils.py:3437, in GenerationMixin._sample(self, input_ids, logits_processor, stopping_criteria, generation_config, synced_gpus, streamer, **model_kwargs)
3434 outputs = model_forward(**model_inputs, return_dict=True)
3436 # synced_gpus: don't waste resources running the code we don't need; kwargs must be updated before skipping
-> 3437 model_kwargs = self._update_model_kwargs_for_generation(
3438 outputs,
3439 model_kwargs,
3440 is_encoder_decoder=self.config.is_encoder_decoder,
3441 )
3442 if synced_gpus and this_peer_finished:
3443 continue
File /media/wmx/soft1/AI-model/openvino_notebooks/notebooks/qwen2-audio/ov_qwen2_audio_helper.py:820, in OVQwen2AudioForConditionalGeneration._update_model_kwargs_for_generation(self, outputs, model_kwargs, is_encoder_decoder, num_new_tokens)
812 def _update_model_kwargs_for_generation(
813 self,
814 outputs: ModelOutput,
(...) 818 ) -> dict[str, Any]:
819 # update past_key_values keeping its naming used in model code
--> 820 cache_name, cache = self._extract_past_from_model_output(outputs)
821 model_kwargs[cache_name] = cache
822 if getattr(outputs, "state", None) is not None:
AttributeError: 'OVQwen2AudioForConditionalGeneration' object has no attribute '_extract_past_from_model_output'
`
AttributeError: 'OVQwen2AudioForConditionalGeneration' object has no attribute '_extract_past_from_model_output'
class OvModelForCausalLMWithEmb(GenerationMixin):