4
4
from __future__ import annotations
5
5
import io
6
6
import math
7
- import re
8
7
import struct
9
8
import wave
10
9
import logging
10
+ from asyncio import CancelledError
11
11
12
12
from homeassistant .components .tts import TextToSpeechEntity
13
13
from homeassistant .config_entries import ConfigEntry
20
20
21
21
_LOGGER = logging .getLogger (__name__ )
22
22
23
- # --- Helper Functions - Chime & silence synthesis --
23
+ # --- Helper Functions - Chime & Silence Synthesis - --
24
24
25
25
def synthesize_chime (sample_rate : int = 44100 , channels : int = 1 , sampwidth : int = 2 , duration : float = 1.0 ) -> bytes :
26
- _LOGGER .debug ("Synthesizing chime: sample_rate=%d, channels=%d, sampwidth=%d, duration=%.2f" , sample_rate , channels , sampwidth , duration )
27
- frequency1 = 440.0 # Note A
26
+ _LOGGER .debug (
27
+ "Synthesizing chime: sample_rate=%d, channels=%d, sampwidth=%d, duration=%.2f" ,
28
+ sample_rate ,
29
+ channels ,
30
+ sampwidth ,
31
+ duration ,
32
+ )
33
+ frequency1 = 440.0 # Note A
28
34
frequency2 = 587.33 # Note D
29
35
amplitude = 0.8
30
36
num_samples = int (sample_rate * duration )
31
37
output = io .BytesIO ()
32
- with wave .open (output , 'wb' ) as wf :
38
+ with wave .open (output , "wb" ) as wf :
33
39
wf .setnchannels (channels )
34
40
wf .setsampwidth (sampwidth )
35
41
wf .setframerate (sample_rate )
@@ -40,33 +46,43 @@ def synthesize_chime(sample_rate: int = 44100, channels: int = 1, sampwidth: int
40
46
sample2 = math .sin (2 * math .pi * frequency2 * t )
41
47
sample = amplitude * fade * ((sample1 + sample2 ) / 2 )
42
48
int_sample = int (sample * 32767 )
43
- wf .writeframes (struct .pack ('<h' , int_sample ))
49
+ wf .writeframes (struct .pack ("<h" , int_sample ))
44
50
chime_data = output .getvalue ()
45
51
_LOGGER .debug ("Chime synthesized, length: %d bytes" , len (chime_data ))
46
52
return chime_data
47
53
48
54
def synthesize_silence (sample_rate : int , channels : int , sampwidth : int , duration : float = 0.3 ) -> bytes :
49
- _LOGGER .debug ("Synthesizing silence: sample_rate=%d, channels=%d, sampwidth=%d, duration=%.2f" , sample_rate , channels , sampwidth , duration )
55
+ _LOGGER .debug (
56
+ "Synthesizing silence: sample_rate=%d, channels=%d, sampwidth=%d, duration=%.2f" ,
57
+ sample_rate ,
58
+ channels ,
59
+ sampwidth ,
60
+ duration ,
61
+ )
50
62
num_samples = int (sample_rate * duration )
51
63
output = io .BytesIO ()
52
- with wave .open (output , 'wb' ) as wf :
64
+ with wave .open (output , "wb" ) as wf :
53
65
wf .setnchannels (channels )
54
66
wf .setsampwidth (sampwidth )
55
67
wf .setframerate (sample_rate )
56
68
for _ in range (num_samples ):
57
- wf .writeframes (struct .pack ('<h' , 0 ))
69
+ wf .writeframes (struct .pack ("<h" , 0 ))
58
70
silence_data = output .getvalue ()
59
71
_LOGGER .debug ("Silence synthesized, length: %d bytes" , len (silence_data ))
60
72
return silence_data
61
73
62
74
def combine_wav_files (chime_bytes : bytes , pause_bytes : bytes , tts_bytes : bytes ) -> bytes :
63
- _LOGGER .debug ("Combining WAV files: chime (%d bytes), pause (%d bytes), TTS (%d bytes)" ,
64
- len (chime_bytes ), len (pause_bytes ), len (tts_bytes ))
75
+ _LOGGER .debug (
76
+ "Combining WAV files: chime (%d bytes), pause (%d bytes), TTS (%d bytes)" ,
77
+ len (chime_bytes ),
78
+ len (pause_bytes ),
79
+ len (tts_bytes ),
80
+ )
65
81
chime_io = io .BytesIO (chime_bytes )
66
82
pause_io = io .BytesIO (pause_bytes )
67
83
tts_io = io .BytesIO (tts_bytes )
68
-
69
- with wave .open (chime_io , 'rb' ) as w1 , wave .open (pause_io , 'rb' ) as w2 , wave .open (tts_io , 'rb' ) as w3 :
84
+
85
+ with wave .open (chime_io , "rb" ) as w1 , wave .open (pause_io , "rb" ) as w2 , wave .open (tts_io , "rb" ) as w3 :
70
86
params1 = w1 .getparams ()
71
87
params2 = w2 .getparams ()
72
88
params3 = w3 .getparams ()
@@ -75,9 +91,9 @@ def combine_wav_files(chime_bytes: bytes, pause_bytes: bytes, tts_bytes: bytes)
75
91
frames_chime = w1 .readframes (w1 .getnframes ())
76
92
frames_pause = w2 .readframes (w2 .getnframes ())
77
93
frames_tts = w3 .readframes (w3 .getnframes ())
78
-
94
+
79
95
output = io .BytesIO ()
80
- with wave .open (output , 'wb' ) as wout :
96
+ with wave .open (output , "wb" ) as wout :
81
97
wout .setparams (params1 )
82
98
wout .writeframes (frames_chime )
83
99
wout .writeframes (frames_pause )
@@ -110,7 +126,7 @@ async def async_setup_entry(
110
126
config_entry .data [CONF_VOICE ],
111
127
config_entry .data [CONF_MODEL ],
112
128
config_entry .data .get (CONF_SPEED , 1.0 ),
113
- config_entry .data [CONF_URL ]
129
+ config_entry .data [CONF_URL ],
114
130
)
115
131
async_add_entities ([OpenAITTSEntity (hass , config_entry , engine )])
116
132
@@ -142,14 +158,16 @@ def device_info(self) -> dict:
142
158
return {
143
159
"identifiers" : {(DOMAIN , self ._attr_unique_id )},
144
160
"model" : self ._config .data .get (CONF_MODEL ),
145
- "manufacturer" : "OpenAI"
161
+ "manufacturer" : "OpenAI" ,
146
162
}
147
163
148
164
@property
149
165
def name (self ) -> str :
150
166
return _map_model (self ._config .data .get (CONF_MODEL , "" )).upper ()
151
167
152
- def get_tts_audio (self , message : str , language : str , options : dict | None = None ) -> tuple [str , bytes ] | tuple [None , None ]:
168
+ def get_tts_audio (
169
+ self , message : str , language : str , options : dict | None = None
170
+ ) -> tuple [str , bytes ] | tuple [None , None ]:
153
171
try :
154
172
if len (message ) > 4096 :
155
173
raise MaxLengthExceeded ("Message exceeds maximum allowed length" )
@@ -167,27 +185,35 @@ def get_tts_audio(self, message: str, language: str, options: dict | None = None
167
185
if chime_enabled :
168
186
_LOGGER .debug ("Chime option enabled; synthesizing chime and pause." )
169
187
tts_io = io .BytesIO (audio_content )
170
- with wave .open (tts_io , 'rb' ) as tts_wave :
188
+ with wave .open (tts_io , "rb" ) as tts_wave :
171
189
sample_rate = tts_wave .getframerate ()
172
190
channels = tts_wave .getnchannels ()
173
191
sampwidth = tts_wave .getsampwidth ()
174
192
tts_frames = tts_wave .getnframes ()
175
- _LOGGER .debug ("TTS parameters: sample_rate=%d, channels=%d, sampwidth=%d, frames=%d" ,
176
- sample_rate , channels , sampwidth , tts_frames )
193
+ _LOGGER .debug (
194
+ "TTS parameters: sample_rate=%d, channels=%d, sampwidth=%d, frames=%d" ,
195
+ sample_rate ,
196
+ channels ,
197
+ sampwidth ,
198
+ tts_frames ,
199
+ )
177
200
chime_audio = synthesize_chime (sample_rate = sample_rate , channels = channels , sampwidth = sampwidth , duration = 1.0 )
178
201
pause_audio = synthesize_silence (sample_rate = sample_rate , channels = channels , sampwidth = sampwidth , duration = 0.3 )
179
202
try :
180
203
combined_audio = combine_wav_files (chime_audio , pause_audio , audio_content )
181
204
_LOGGER .debug ("Combined audio generated (chime -> pause -> TTS)." )
182
205
return "wav" , combined_audio
183
206
except Exception as ce :
184
- _LOGGER .error ("Error combining audio: %s" , ce )
207
+ _LOGGER .exception ("Error combining audio" )
185
208
return "wav" , audio_content
186
209
else :
187
210
_LOGGER .debug ("Chime option disabled; returning TTS audio only." )
188
211
return "wav" , audio_content
212
+ except CancelledError as ce :
213
+ _LOGGER .exception ("TTS task cancelled" )
214
+ return None , None
189
215
except MaxLengthExceeded as mle :
190
- _LOGGER .error ("Maximum message length exceeded: %s" , mle )
216
+ _LOGGER .exception ("Maximum message length exceeded" )
191
217
except Exception as e :
192
- _LOGGER .error ("Unknown error in get_tts_audio: %s" , e )
218
+ _LOGGER .exception ("Unknown error in get_tts_audio" )
193
219
return None , None
0 commit comments