Skip to content

Commit 6478068

Browse files
authored
Merge pull request #30 from janko-development/main
Add support for gpt-4o-mini-tts model
2 parents 98c03ed + aaec98e commit 6478068

File tree

7 files changed

+39
-8
lines changed

7 files changed

+39
-8
lines changed

README.md

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,9 +9,11 @@ The OpenAI TTS component for Home Assistant makes it possible to use the OpenAI
99
- **Customizable speech model**[Check supported voices and models](https://platform.openai.com/docs/guides/text-to-speech).
1010
- **Integration with Home Assistant** – Works seamlessly with assistants, automations, and scripts.
1111
- **Custom endpoint option** – Allows you to use your own OpenAI compatible API endpoint.
12-
- ⭐(New!) **Chime option** – Useful for announcements on speakers. *(See Devices → OpenAI TTS → CONFIGURE button)*
13-
- ⭐(New!) **User-configurable chime sounds** – Drop your own chime sound into `config/custom_components/openai_tts/chime` folder (MP3).
14-
- ⭐(New!) **Audio normalization option** – Uses more CPU but improves audio clarity on mobile phones and small speakers. *(See Devices → OpenAI TTS → CONFIGURE button)*
12+
- **Chime option** – Useful for announcements on speakers. *(See Devices → OpenAI TTS → CONFIGURE button)*
13+
- **User-configurable chime sounds** – Drop your own chime sound into `config/custom_components/openai_tts/chime` folder (MP3).
14+
- **Audio normalization option** – Uses more CPU but improves audio clarity on mobile phones and small speakers. *(See Devices → OpenAI TTS → CONFIGURE button)*
15+
- ⭐(New!) **Support for new gpt-4o-mini-tts model** – A fast and powerful language model.
16+
- ⭐(New!) **Text-to-Speech Instructions option** – Instruct the text-to-speech model to speak in a specific way (only works with newest gpt-4o-mini-tts model). [OpenAI new generation audio models](https://openai.com/index/introducing-our-next-generation-audio-models/)
1517

1618

1719

@@ -34,6 +36,9 @@ data:
3436
cache: true
3537
media_player_entity_id: media_player.bedroom_speaker
3638
message: My speech has improved now!
39+
options:
40+
chime: true # Enable or disable the chime
41+
instructions: "Speak like a pirate" # Instructions for text-to-speach model on how to speak
3742
```
3843

3944
## HACS installation ( *preferred!* )

custom_components/openai_tts/config_flow.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,11 @@
1212
from homeassistant import data_entry_flow
1313
from homeassistant.config_entries import ConfigFlow, OptionsFlow
1414
from homeassistant.helpers.selector import selector
15+
from homeassistant.helpers.selector import (
16+
TextSelector,
17+
TextSelectorConfig,
18+
TextSelectorType,
19+
)
1520
from homeassistant.exceptions import HomeAssistantError
1621

1722
from .const import (
@@ -27,6 +32,7 @@
2732
CONF_CHIME_ENABLE, # Use constant for chime enable toggle
2833
CONF_CHIME_SOUND,
2934
CONF_NORMALIZE_AUDIO,
35+
CONF_INSTRUCTIONS,
3036
)
3137

3238
_LOGGER = logging.getLogger(__name__)
@@ -174,6 +180,14 @@ async def async_step_init(self, user_input: dict | None = None):
174180
}
175181
}),
176182

183+
184+
vol.Optional(
185+
CONF_INSTRUCTIONS,
186+
default=self.config_entry.options.get(CONF_INSTRUCTIONS, self.config_entry.data.get(CONF_INSTRUCTIONS, None))
187+
): TextSelector(
188+
TextSelectorConfig(type=TextSelectorType.TEXT,multiline=True)
189+
),
190+
177191
# Normalization toggle using its constant; label will be picked from strings.json.
178192
vol.Optional(
179193
CONF_NORMALIZE_AUDIO,

custom_components/openai_tts/const.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,9 +10,10 @@
1010
CONF_URL = "url"
1111
UNIQUE_ID = "unique_id"
1212

13-
MODELS = ["tts-1", "tts-1-hd"]
14-
VOICES = ["alloy", "ash", "coral", "echo", "fable", "onyx", "nova", "sage", "shimmer"]
13+
MODELS = ["tts-1", "tts-1-hd", "gpt-4o-mini-tts"]
14+
VOICES = ["alloy", "ash", "ballad", "coral", "echo", "fable", "onyx", "nova", "sage", "shimmer"]
1515

1616
CONF_CHIME_ENABLE = "chime"
1717
CONF_CHIME_SOUND = "chime_sound"
1818
CONF_NORMALIZE_AUDIO = "normalize_audio"
19+
CONF_INSTRUCTIONS = "instructions"

custom_components/openai_tts/openaitts_engine.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ def __init__(self, api_key: str, voice: str, model: str, speed: float, url: str)
2525
self._speed = speed
2626
self._url = url
2727

28-
def get_tts(self, text: str, speed: float = None, voice: str = None) -> AudioResponse:
28+
def get_tts(self, text: str, speed: float = None, instructions: str = None, voice: str = None) -> AudioResponse:
2929
"""Synchronous TTS request using urllib.request.
3030
If the API call fails, waits for 1 second and retries once.
3131
"""
@@ -45,6 +45,8 @@ def get_tts(self, text: str, speed: float = None, voice: str = None) -> AudioRes
4545
"response_format": "mp3",
4646
"speed": speed
4747
}
48+
if instructions is not None and self._model == "gpt-4o-mini-tts":
49+
data["instructions"] = instructions
4850

4951
max_retries = 1
5052
attempt = 0

custom_components/openai_tts/strings.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
"chime_sound": "Chime sound",
2727
"speed": "Speed (0.25 to 4.0)",
2828
"voice": "Voice",
29+
"instructions": "Instructions",
2930
"normalize_audio": "Enable loudness for generated audio (uses more CPU)"
3031
}
3132
}

custom_components/openai_tts/translations/en.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
"chime_sound": "Chime sound",
2727
"speed": "Speed (0.25 to 4.0)",
2828
"voice": "Voice",
29+
"instructions": "Instructions",
2930
"normalize_audio": "Enable loudness for generated audio (uses more CPU)"
3031
}
3132
}

custom_components/openai_tts/tts.py

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
CONF_MODEL,
2121
CONF_SPEED,
2222
CONF_VOICE,
23+
CONF_INSTRUCTIONS,
2324
CONF_URL,
2425
DOMAIN,
2526
UNIQUE_ID,
@@ -65,6 +66,10 @@ def __init__(self, hass: HomeAssistant, config: ConfigEntry, engine: OpenAITTSEn
6566
def default_language(self) -> str:
6667
return "en"
6768

69+
@property
70+
def supported_options(self) -> list:
71+
return ["instructions", "chime"]
72+
6873
@property
6974
def supported_languages(self) -> list:
7075
return self._engine.get_supported_langs()
@@ -97,18 +102,20 @@ def get_tts_audio(
97102
# Retrieve settings.
98103
current_speed = self._config.options.get(CONF_SPEED, self._config.data.get(CONF_SPEED, 1.0))
99104
effective_voice = self._config.options.get(CONF_VOICE, self._config.data.get(CONF_VOICE))
105+
instructions = options.get(CONF_INSTRUCTIONS, self._config.options.get(CONF_INSTRUCTIONS, self._config.data.get(CONF_INSTRUCTIONS)))
100106
_LOGGER.debug("Effective speed: %s", current_speed)
101107
_LOGGER.debug("Effective voice: %s", effective_voice)
108+
_LOGGER.debug("Instructions: %s", instructions)
102109

103110
_LOGGER.debug("Creating TTS API request")
104111
api_start = time.monotonic()
105-
speech = self._engine.get_tts(message, speed=current_speed, voice=effective_voice)
112+
speech = self._engine.get_tts(message, speed=current_speed, voice=effective_voice, instructions=instructions)
106113
api_duration = (time.monotonic() - api_start) * 1000
107114
_LOGGER.debug("TTS API call completed in %.2f ms", api_duration)
108115
audio_content = speech.content
109116

110117
# Retrieve options.
111-
chime_enabled = self._config.options.get(CONF_CHIME_ENABLE, self._config.data.get(CONF_CHIME_ENABLE, False))
118+
chime_enabled = options.get(CONF_CHIME_ENABLE,self._config.options.get(CONF_CHIME_ENABLE, self._config.data.get(CONF_CHIME_ENABLE, False)))
112119
normalize_audio = self._config.options.get(CONF_NORMALIZE_AUDIO, self._config.data.get(CONF_NORMALIZE_AUDIO, False))
113120
_LOGGER.debug("Chime enabled: %s", chime_enabled)
114121
_LOGGER.debug("Normalization option: %s", normalize_audio)

0 commit comments

Comments
 (0)