release of v3

sfortis · sfortis · commit 6772e4641cb7 · 2025-03-14T16:56:21.000+02:00
* major update *

Async HTTP Requests: Switched from synchronous requests to aiohttp for better response.

Chime option: Added option to play a chime prior the TTS voice, useful for announcements.

Added options flow: voice, speed and chime are now configurable on the existing entries.

Unique IDs: Integration now will create unique id's even for the same TTS engine.
diff --git a/README.md b/README.md
@@ -11,10 +11,11 @@ The OpenAI TTS component for Home Assistant makes it possible to use the OpenAI
 
 - Text-to-Speech conversion using OpenAI's API
 - Support for multiple languages and voices
+- Chime option. Usefull for announcements.
 - Customizable speech model (check https://platform.openai.com/docs/guides/text-to-speech for supported voices and models)
 - Integration with Home Assistant's assistant, automations and scripts
 
-## YouTube sample video
+## YouTube sample video (its not a tutorial!)
 
 [![OpenAI TTS Demo](https://img.youtube.com/vi/oeeypI_X0qs/0.jpg)](https://www.youtube.com/watch?v=oeeypI_X0qs)
 
diff --git a/custom_components/openai_tts/config_flow.py b/custom_components/openai_tts/config_flow.py
@@ -1,26 +1,26 @@
-"""Config flow for OpenAI text-to-speech custom component."""
+"""
+Config flow for OpenAI TTS.
+"""
 from __future__ import annotations
 from typing import Any
 import voluptuous as vol
 import logging
 from urllib.parse import urlparse
+import uuid
 
 from homeassistant import data_entry_flow
-from homeassistant.config_entries import ConfigFlow
+from homeassistant.config_entries import ConfigFlow, OptionsFlow
 from homeassistant.helpers.selector import selector
 from homeassistant.exceptions import HomeAssistantError
 
 from .const import CONF_API_KEY, CONF_MODEL, CONF_VOICE, CONF_SPEED, CONF_URL, DOMAIN, MODELS, VOICES, UNIQUE_ID
 
 _LOGGER = logging.getLogger(__name__)
 
-def generate_unique_id(user_input: dict) -> str:
-    """Generate a unique id from user input."""
-    url = urlparse(user_input[CONF_URL])
-    return f"{url.hostname}_{user_input[CONF_MODEL]}_{user_input[CONF_VOICE]}"
+def generate_entry_id() -> str:
+    return str(uuid.uuid4())
 
 async def validate_user_input(user_input: dict):
-    """Validate user input fields."""
     if user_input.get(CONF_MODEL) is None:
         raise ValueError("Model is required")
     if user_input.get(CONF_VOICE) is None:
@@ -32,7 +32,14 @@ class OpenAITTSConfigFlow(ConfigFlow, domain=DOMAIN):
     data_schema = vol.Schema({
         vol.Optional(CONF_API_KEY): str,
         vol.Optional(CONF_URL, default="https://api.openai.com/v1/audio/speech"): str,
-        vol.Optional(CONF_SPEED, default=1.0): vol.Coerce(float),
+        vol.Optional(CONF_SPEED, default=1.0): selector({
+            "number": {
+                "min": 0.25,
+                "max": 4.0,
+                "step": 0.05,
+                "mode": "slider"
+            }
+        }),
         vol.Required(CONF_MODEL, default="tts-1"): selector({
             "select": {
                 "options": MODELS,
@@ -52,17 +59,19 @@ class OpenAITTSConfigFlow(ConfigFlow, domain=DOMAIN):
     })
 
     async def async_step_user(self, user_input: dict[str, Any] | None = None):
-        """Handle the initial step."""
         errors = {}
         if user_input is not None:
             try:
                 await validate_user_input(user_input)
-                unique_id = generate_unique_id(user_input)
-                user_input[UNIQUE_ID] = unique_id
-                await self.async_set_unique_id(unique_id)
-                self._abort_if_unique_id_configured()
+                # Generate a random unique id so multiple integrations can be added.
+                entry_id = generate_entry_id()
+                user_input[UNIQUE_ID] = entry_id
+                await self.async_set_unique_id(entry_id)
                 hostname = urlparse(user_input[CONF_URL]).hostname
-                return self.async_create_entry(title=f"OpenAI TTS ({hostname}, {user_input[CONF_MODEL]}, {user_input[CONF_VOICE]})", data=user_input)
+                return self.async_create_entry(
+                    title=f"OpenAI TTS ({hostname}, {user_input[CONF_MODEL]})",
+                    data=user_input
+                )
             except data_entry_flow.AbortFlow:
                 return self.async_abort(reason="already_configured")
             except HomeAssistantError as e:
@@ -71,7 +80,51 @@ async def async_step_user(self, user_input: dict[str, Any] | None = None):
             except ValueError as e:
                 _LOGGER.exception(str(e))
                 errors["base"] = str(e)
-            except Exception as e:  # pylint: disable=broad-except
+            except Exception as e:
                 _LOGGER.exception(str(e))
                 errors["base"] = "unknown_error"
-        return self.async_show_form(step_id="user", data_schema=self.data_schema, errors=errors, description_placeholders=user_input)
+        return self.async_show_form(
+            step_id="user",
+            data_schema=self.data_schema,
+            errors=errors,
+            description_placeholders=user_input
+        )
+
+    @staticmethod
+    def async_get_options_flow(config_entry):
+        return OpenAITTSOptionsFlow()
+
+class OpenAITTSOptionsFlow(OptionsFlow):
+    """Handle options flow for OpenAI TTS."""
+    async def async_step_init(self, user_input: dict | None = None):
+        if user_input is not None:
+            return self.async_create_entry(title="", data=user_input)
+        options_schema = vol.Schema({
+            vol.Optional(
+                "chime",
+                default=self.config_entry.options.get("chime", self.config_entry.data.get("chime", False))
+            ): selector({"boolean": {}}),
+            vol.Optional(
+                CONF_SPEED,
+                default=self.config_entry.options.get(CONF_SPEED, self.config_entry.data.get(CONF_SPEED, 1.0))
+            ): selector({
+                "number": {
+                    "min": 0.25,
+                    "max": 4.0,
+                    "step": 0.05,
+                    "mode": "slider"
+                }
+            }),
+            vol.Optional(
+                CONF_VOICE,
+                default=self.config_entry.options.get(CONF_VOICE, self.config_entry.data.get(CONF_VOICE, "shimmer"))
+            ): selector({
+                "select": {
+                    "options": VOICES,
+                    "mode": "dropdown",
+                    "sort": True,
+                    "custom_value": True
+                }
+            })
+        })
+        return self.async_show_form(step_id="init", data_schema=options_schema)
diff --git a/custom_components/openai_tts/const.py b/custom_components/openai_tts/const.py
@@ -8,4 +8,4 @@
 CONF_URL = 'url'
 UNIQUE_ID = 'unique_id'
 MODELS = ["tts-1", "tts-1-hd"]
-VOICES = ["alloy", "echo", "fable", "onyx", "nova", "shimmer"]
+VOICES = ["alloy", "ash", "coral", "echo", "fable", "onyx", "nova", "sage", "shimmer"]
diff --git a/custom_components/openai_tts/manifest.json b/custom_components/openai_tts/manifest.json
@@ -9,8 +9,6 @@
   "documentation": "https://github.com/sfortis/openai_tts/",
   "iot_class": "cloud_polling",
   "issue_tracker": "https://github.com/sfortis/openai_tts/issues",
-  "requirements": [
-    "requests>=2.25.1"
-  ],
-  "version": "0.2.2"
+  "requirements": [],
+  "version": "0.3.0b0"
 }
diff --git a/custom_components/openai_tts/openaitts_engine.py b/custom_components/openai_tts/openaitts_engine.py
@@ -1,27 +1,90 @@
-import requests
+"""
+TTS Engine for OpenAI TTS.
+"""
+import asyncio
+import threading
+import logging
+import aiohttp
 
-class OpenAITTSEngine:
+_LOGGER = logging.getLogger(__name__)
+
+class AudioResponse:
+    """A simple response wrapper with a 'content' attribute to hold audio bytes."""
+    def __init__(self, content: bytes):
+        self.content = content
 
-    def __init__(self, api_key: str, voice: str, model: str, speed: int, url: str):
+class OpenAITTSEngine:
+    def __init__(self, api_key: str, voice: str, model: str, speed: float, url: str):
         self._api_key = api_key
         self._voice = voice
         self._model = model
         self._speed = speed
         self._url = url
 
-    def get_tts(self, text: str):
-        """ Makes request to OpenAI TTS engine to convert text into audio"""
-        headers: dict = {"Authorization": f"Bearer {self._api_key}"} if self._api_key else {}
-        data: dict = {
+        # Create a dedicated event loop running in a background thread.
+        self._loop = asyncio.new_event_loop()
+        self._session = None
+        self._thread = threading.Thread(target=self._start_loop, daemon=True)
+        self._thread.start()
+        # Initialize the aiohttp session in the background event loop.
+        asyncio.run_coroutine_threadsafe(self._init_session(), self._loop).result()
+
+    def _start_loop(self):
+        asyncio.set_event_loop(self._loop)
+        self._loop.run_forever()
+
+    async def _init_session(self):
+        # Create a persistent aiohttp session for reuse.
+        self._session = aiohttp.ClientSession()
+
+    async def _async_get_tts(self, text: str, speed: float, voice: str) -> AudioResponse:
+        headers = {"Authorization": f"Bearer {self._api_key}"} if self._api_key else {}
+        data = {
             "model": self._model,
             "input": text,
-            "voice": self._voice,
+            "voice": voice,
             "response_format": "wav",
-            "speed": self._speed
+            "speed": speed,
+            "stream": True
         }
-        return requests.post(self._url, headers=headers, json=data)
+        # Use separate timeouts for connecting and reading.
+        timeout = aiohttp.ClientTimeout(total=None, sock_connect=5, sock_read=25)
+        async with self._session.post(self._url, headers=headers, json=data, timeout=timeout) as resp:
+            resp.raise_for_status()
+            audio_chunks = []
+            # Optimize the chunk size to 4096 bytes.
+            async for chunk in resp.content.iter_chunked(4096):
+                if chunk:
+                    audio_chunks.append(chunk)
+            audio_data = b"".join(audio_chunks)
+            return AudioResponse(audio_data)
+
+    def get_tts(self, text: str, speed: float = None, voice: str = None) -> AudioResponse:
+        """Synchronous wrapper that runs the asynchronous TTS request on a dedicated event loop.
+           If 'speed' or 'voice' are provided, they override the stored values.
+        """
+        try:
+            if speed is None:
+                speed = self._speed
+            if voice is None:
+                voice = self._voice
+            future = asyncio.run_coroutine_threadsafe(self._async_get_tts(text, speed, voice), self._loop)
+            return future.result()
+        except Exception as e:
+            _LOGGER.error("Error in asynchronous get_tts: %s", e)
+            raise e
+
+    def close(self):
+        """Clean up the aiohttp session and event loop on shutdown."""
+        if self._session:
+            asyncio.run_coroutine_threadsafe(self._session.close(), self._loop).result()
+        self._loop.call_soon_threadsafe(self._loop.stop())
 
     @staticmethod
     def get_supported_langs() -> list:
-        """Returns list of supported languages. Note: the model determines the provides language automatically."""
-        return ["af", "ar", "hy", "az", "be", "bs", "bg", "ca", "zh", "hr", "cs", "da", "nl", "en", "et", "fi", "fr", "gl", "de", "el", "he", "hi", "hu", "is", "id", "it", "ja", "kn", "kk", "ko", "lv", "lt", "mk", "ms", "mr", "mi", "ne", "no", "fa", "pl", "pt", "ro", "ru", "sr", "sk", "sl", "es", "sw", "sv", "tl", "ta", "th", "tr", "uk", "ur", "vi", "cy"]
+        return [
+            "af", "ar", "hy", "az", "be", "bs", "bg", "ca", "zh", "hr", "cs", "da", "nl", "en",
+            "et", "fi", "fr", "gl", "de", "el", "he", "hi", "hu", "is", "id", "it", "ja", "kn",
+            "kk", "ko", "lv", "lt", "mk", "ms", "mr", "mi", "ne", "no", "fa", "pl", "pt", "ro",
+            "ru", "sr", "sk", "sl", "es", "sw", "sv", "tl", "ta", "th", "tr", "uk", "ur", "vi", "cy"
+        ]
diff --git a/custom_components/openai_tts/strings.json b/custom_components/openai_tts/strings.json
@@ -3,12 +3,12 @@
     "step": {
       "user": {
         "title": "Add text-to-speech engine",
-        "description": "Provide configuration data. See documentation for further info.",
+        "description": "See documentation for further info.",
         "data": {
-          "api_key": "Enter OpenAI API key.",
-          "speed": "Enter speed of the speech",
-          "model": "Select model to be used.",
-          "voice": "Select voice.",
+          "api_key": "Enter OpenAI API key",
+          "speed": "Speed (0.25 to 4.0, where 1.0 is default)",
+          "model": "Select model",
+          "voice": "Select voice",
           "url": "Enter the OpenAI-compatible endpoint. Optionally include a port number."
         }
       }
@@ -20,5 +20,17 @@
     "abort": {
       "already_configured": "This voice and endpoint are already configured."
     }
+  },
+  "options": {
+    "step": {
+      "init": {
+        "title": "Configure TTS options",
+        "data": {
+          "chime": "Enable chime before speech (useful for announcements)",
+          "speed": "Set speed (0.25 to 4.0)",
+          "voice": "Select voice"
+        }
+      }
+    }
   }
 }
diff --git a/custom_components/openai_tts/translations/cs.json b/custom_components/openai_tts/translations/cs.json
@@ -2,23 +2,35 @@
   "config": {
     "step": {
       "user": {
-        "title": "Přidej engine pro převod textu na řeč",
-        "description": "Vlož konfigurační data. Pro detaily se podívej na dokumentaci",
+        "title": "Přidat TTS engine",
+        "description": "Více informací naleznete v dokumentaci.",
         "data": {
-          "api_key": "Vlož OpenAI API klíč.",
-          "speed": "Vlož rychlost řeči.",
-          "model": "Vyber model k použití.",
-          "voice": "Vyber hlas.",
-          "url": "Zadejte koncový bod kompatibilní s OpenAI. Volitelně uveďte číslo portu."
+          "api_key": "Zadejte OpenAI API klíč",
+          "speed": "Rychlost (0,25 až 4,0, kde 1,0 je výchozí)",
+          "model": "Vyberte model",
+          "voice": "Vyberte hlas",
+          "url": "Zadejte OpenAI-kompatibilní endpoint (Volitelně uveďte číslo portu)"
         }
       }
     },
     "error": {
-      "wrong_api_key": "Nebyl poskytnut správný API klíč.",
-      "already_configured": "Tento hlas je již nastaven."
+      "wrong_api_key": "Neplatný API klíč. Zadejte prosím platný API klíč.",
+      "already_configured": "Tento hlas a tento endpoint jsou již nakonfigurovány."
     },
     "abort": {
-      "already_configured": "Tento hlas je již nastaven."
+      "already_configured": "Tento hlas a tento endpoint jsou již nakonfigurovány."
+    }
+  },
+  "options": {
+    "step": {
+      "init": {
+        "title": "Nastavení TTS možností",
+        "data": {
+          "chime": "Povolit zvukový signál před řečí (užitečné pro oznámení)",
+          "speed": "Nastavit rychlost (0,25 až 4,0)",
+          "voice": "Vyberte hlas"
+        }
+      }
     }
   }
 }
diff --git a/custom_components/openai_tts/translations/de.json b/custom_components/openai_tts/translations/de.json
@@ -2,23 +2,35 @@
   "config": {
     "step": {
       "user": {
-        "title": "Füge eine Text zu Sprache Engine hinzu",
-        "description": "Gib Konfigurationsdaten ein. Schau in die Dokumentation für weitere Informationen.",
+        "title": "Text-to-Speech Engine hinzufügen",
+        "description": "Weitere Informationen finden Sie in der Dokumentation.",
         "data": {
-          "api_key": "Gib den OpenAI API Schlüssel ein.",
-          "speed": "Gib die Geschwindigkeit der Sprache ein",
-          "model": "Wähle das zu verwendende Modell.",
-          "voice": "Wähle eine Stimme.",
-          "url": "Gib den OpenAI-kompatiblen Endpunkt ein. Optional kann eine Portnummer angegeben werden."
+          "api_key": "Geben Sie den OpenAI API-Schlüssel ein",
+          "speed": "Geschwindigkeit (0.25 bis 4.0, wobei 1.0 Standard ist)",
+          "model": "Wählen Sie das Modell aus",
+          "voice": "Wählen Sie die Stimme aus",
+          "url": "Geben Sie den OpenAI-kompatiblen Endpunkt ein (Optional können Sie eine Portnummer angeben)"
         }
       }
     },
     "error": {
-      "wrong_api_key": "Ungültiger API Schlüssel. Bitte gib einen gültigen API Schlüssel ein.",
-      "already_configured": "Diese Stimme und Endpunkt sind bereits konfiguriert."
+      "wrong_api_key": "Ungültiger API-Schlüssel. Bitte geben Sie einen gültigen API-Schlüssel ein.",
+      "already_configured": "Diese Stimme und dieser Endpunkt sind bereits konfiguriert."
     },
     "abort": {
-      "already_configured": "Diese Stimme und Endpunkt sind bereits konfiguriert."
+      "already_configured": "Diese Stimme und dieser Endpunkt sind bereits konfiguriert."
+    }
+  },
+  "options": {
+    "step": {
+      "init": {
+        "title": "TTS-Optionen konfigurieren",
+        "data": {
+          "chime": "Chime vor der Sprache aktivieren (nützlich für Ansagen)",
+          "speed": "Geschwindigkeit einstellen (0.25 bis 4.0)",
+          "voice": "Stimme auswählen"
+        }
+      }
     }
   }
 }
diff --git a/custom_components/openai_tts/translations/el.json b/custom_components/openai_tts/translations/el.json
diff --git a/custom_components/openai_tts/translations/en.json b/custom_components/openai_tts/translations/en.json
diff --git a/custom_components/openai_tts/tts.py b/custom_components/openai_tts/tts.py