Skip to content

Commit 0b437cc

Browse files
committed
fix: sunbird: use tokens for languages instead of langcodes
1 parent d9e3229 commit 0b437cc

File tree

1 file changed

+5
-2
lines changed

1 file changed

+5
-2
lines changed

daras_ai_v2/asr.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -244,10 +244,11 @@
244244
} # fmt: skip
245245

246246
SUNBIRD_SUPPORTED_LANGUAGES = {
247-
"eng", "swa", "ach", "lgg", "lug", "nyn",
248-
"teo", "xog", "ttj", "kin", "myx",
247+
"ach": "<|su|>", "eng": "<|en|>", "kin": "<|as|>", "lgg": "<|jw|>", "lug": "<|ba|>", "myx": "<|mg|>",
248+
"nyn": "<|ha|>", "swa": "<|sw|>", "teo": "<|ln|>", "ttj": "<|tt|>", "xog": "<|haw|>"
249249
} # fmt: skip
250250

251+
251252
# https://translation.ghananlp.org/api-details#api=ghananlp-translation-webservice-api
252253
GHANA_NLP_SUPPORTED = {'en': 'English', 'tw': 'Twi', 'gaa': 'Ga', 'ee': 'Ewe', 'fat': 'Fante', 'dag': 'Dagbani',
253254
'gur': 'Gurene', 'yo': 'Yoruba', 'ki': 'Kikuyu', 'luo': 'Luo', 'mer': 'Kimeru'} # fmt: skip
@@ -1301,6 +1302,8 @@ def run_asr(
13011302
# don't pass language or task
13021303
kwargs.pop("task", None)
13031304
kwargs["max_length"] = 448
1305+
elif selected_model == AsrModels.whisper_sunbird_large_v3:
1306+
kwargs["language"] = SUNBIRD_SUPPORTED_LANGUAGES[language.strip()]
13041307
elif "whisper" in selected_model.name:
13051308
forced_lang = forced_asr_languages.get(selected_model)
13061309
if forced_lang:

0 commit comments

Comments
 (0)