Skip to content

Commit f4a44b9

Browse files
committed
feat: add support for akera ASR model
1 parent 0d84f63 commit f4a44b9

File tree

1 file changed

+9
-2
lines changed

1 file changed

+9
-2
lines changed

daras_ai_v2/asr.py

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -283,6 +283,7 @@ class AsrModels(Enum):
283283
ghana_nlp_asr_v2 = "Ghana NLP ASR v2"
284284
lelapa = "Vulavula (Lelapa AI)"
285285
whisper_sunbird_large_v3 = "Sunbird Ugandan Whisper v3 (Sunbird AI)"
286+
whisper_akera_large_v3 = "Akera Whisper v3 (akera)"
286287
whisper_swahili_medium_v3 = "Jacaranda Health Swahili Whisper v3 (Jacaranda Health)"
287288
mbaza_ctc_large = "Mbaza Conformer LG (MbazaNLP)"
288289

@@ -336,6 +337,7 @@ def supports_input_prompt(self) -> bool:
336337

337338

338339
asr_model_ids = {
340+
AsrModels.whisper_akera_large_v3: "akera/whisper-large-v3-kik-full_v2",
339341
AsrModels.gpt_4_o_audio: "gpt-4o-transcribe",
340342
AsrModels.gpt_4_o_mini_audio: "gpt-4o-mini-transcribe",
341343
AsrModels.whisper_large_v3: "vaibhavs10/incredibly-fast-whisper:3ab86df6c8f54c11309d4d1f930ac292bad43ace52d10c80d87eb258b3c9f79c",
@@ -362,6 +364,7 @@ def supports_input_prompt(self) -> bool:
362364
AsrModels.vakyansh_bhojpuri: "bho",
363365
AsrModels.nemo_english: "en",
364366
AsrModels.nemo_hindi: "hi",
367+
AsrModels.whisper_akera_large_v3: "kik",
365368
}
366369

367370
asr_supported_languages = {
@@ -386,6 +389,7 @@ def supports_input_prompt(self) -> bool:
386389
AsrModels.lelapa: LELAPA_ASR_SUPPORTED,
387390
AsrModels.whisper_sunbird_large_v3: SUNBIRD_SUPPORTED_LANGUAGES,
388391
AsrModels.whisper_swahili_medium_v3: {"sw", "en"},
392+
AsrModels.whisper_akera_large_v3: {"kik"},
389393
AsrModels.mbaza_ctc_large: {"sw", "rw", "lg"},
390394
}
391395

@@ -1286,13 +1290,17 @@ def run_asr(
12861290
)
12871291
# call one of the self-hosted models
12881292
else:
1289-
kwargs = {}
1293+
kwargs = {"task": "translate" if speech_translation_target else "transcribe"}
12901294
if "vakyansh" in selected_model.name:
12911295
# fixes https://github.com/huggingface/transformers/issues/15275#issuecomment-1624879632
12921296
kwargs["decoder_kwargs"] = dict(skip_special_tokens=True)
12931297
kwargs["chunk_length_s"] = 60
12941298
kwargs["stride_length_s"] = (6, 0)
12951299
kwargs["batch_size"] = 32
1300+
elif "akera" in selected_model.name:
1301+
# don't pass language or task
1302+
kwargs.pop("task")
1303+
kwargs["max_length"] = 448
12961304
elif "whisper" in selected_model.name:
12971305
forced_lang = forced_asr_languages.get(selected_model)
12981306
if forced_lang:
@@ -1308,7 +1316,6 @@ def run_asr(
13081316
),
13091317
inputs=dict(
13101318
audio=audio_url,
1311-
task="translate" if speech_translation_target else "transcribe",
13121319
return_timestamps=output_format != AsrOutputFormat.text,
13131320
**kwargs,
13141321
),

0 commit comments

Comments
 (0)