Skip to content

Commit 729aa3f

Browse files
committed
feat: add support for MMS TTS models via Modal
1 parent e3b9171 commit 729aa3f

File tree

3 files changed

+49
-0
lines changed

3 files changed

+49
-0
lines changed

daras_ai_v2/settings.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -517,3 +517,5 @@
517517
if MODAL_TOKEN_ID and MODAL_TOKEN_SECRET:
518518
os.environ["MODAL_TOKEN_ID"] = MODAL_TOKEN_ID
519519
os.environ["MODAL_TOKEN_SECRET"] = MODAL_TOKEN_SECRET
520+
521+
HF_TOKEN = config("HF_TOKEN", "")

daras_ai_v2/text_to_speech_settings_widgets.py

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,7 @@ class TextToSpeechProviders(Enum):
7474
AZURE_TTS = "Azure Text-to-Speech"
7575
OPEN_AI = "OpenAI"
7676
GHANA_NLP = "GhanaNLP Text-To-Speech"
77+
MMS_TTS = "MMS TTS (Meta)"
7778

7879

7980
# This exists only for backwards compatiblity
@@ -170,6 +171,8 @@ def text_to_speech_provider_selector(page):
170171
openai_tts_selector()
171172
case TextToSpeechProviders.GHANA_NLP.name:
172173
ghana_nlp_tts_selector()
174+
case TextToSpeechProviders.MMS_TTS.name:
175+
mms_tts_selector()
173176
return tts_provider
174177

175178

@@ -198,6 +201,29 @@ def ghana_nlp_tts_selector():
198201
)
199202

200203

204+
def mms_tts_selector():
205+
options = mms_tts_language_options()
206+
gui.selectbox(
207+
label="""
208+
###### MMS TTS Language
209+
""",
210+
key="mms_tts_language",
211+
format_func=lambda lang: options[lang],
212+
options=options,
213+
)
214+
215+
216+
@redis_cache_decorator(ex=settings.REDIS_MODELS_CACHE_EXPIRY)
217+
def mms_tts_language_options():
218+
import langcodes
219+
from daras_ai_v2.mms_tts import MMS_TTS_SUPPORTED_LANGUAGES
220+
221+
result = {}
222+
for lang in MMS_TTS_SUPPORTED_LANGUAGES:
223+
result[lang] = langcodes.Language.get(lang).display_name()
224+
return result
225+
226+
201227
def openai_tts_selector():
202228
enum_selector(
203229
OpenAI_TTS_Voices,

recipes/TextToSpeech.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44

55
import gooey_gui as gui
66
import requests
7+
import modal
78
from pydantic import BaseModel, Field
89

910
from bots.models import Workflow
@@ -64,6 +65,8 @@ class TextToSpeechSettings(BaseModel):
6465
openai_tts_model: OpenAI_TTS_Models.api_choices | None = None
6566
ghana_nlp_tts_language: GHANA_NLP_TTS_LANGUAGES.api_choices | None = None
6667

68+
mms_tts_language: str = "eng"
69+
6770

6871
class TextToSpeechPage(BasePage):
6972
title = "Compare AI Voice Generators"
@@ -408,6 +411,24 @@ def run(self, state: dict):
408411
audio_url = upload_file_from_bytes("ghana_gen.wav", response.content)
409412
state["audio_url"] = audio_url
410413

414+
case TextToSpeechProviders.MMS_TTS:
415+
from daras_ai_v2.mms_tts import (
416+
MMS_TTS_SUPPORTED_LANGUAGES,
417+
app as modal_app,
418+
)
419+
420+
language = state.get("mms_tts_language", "eng")
421+
if language not in MMS_TTS_SUPPORTED_LANGUAGES:
422+
raise UserError(f"Unsupported language: {language}")
423+
424+
run_mms_tts = modal.Function.lookup(modal_app.name, "run_mms_tts")
425+
with modal.enable_output():
426+
audio = run_mms_tts.remote(language=language, text=text)
427+
428+
state["audio_url"] = upload_file_from_bytes(
429+
filename="output.wav", data=audio, content_type="audio/wav"
430+
)
431+
411432
def _get_elevenlabs_voice_model(self, state: dict[str, str]):
412433
default_voice_model = next(iter(ELEVEN_LABS_MODELS))
413434
voice_model = state.get("elevenlabs_model", default_voice_model)

0 commit comments

Comments
 (0)