Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions backend/onyx/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,8 @@
from onyx.server.query_and_chat.query_backend import basic_router as query_router
from onyx.server.settings.api import admin_router as settings_admin_router
from onyx.server.settings.api import basic_router as settings_router
from onyx.server.audio.api import router as audio_router
from onyx.server.audio.api import admin_router as audio_admin_router
from onyx.server.token_rate_limits.api import (
router as token_rate_limit_settings_router,
)
Expand Down Expand Up @@ -357,6 +359,8 @@ def get_application(lifespan_override: Lifespan | None = None) -> FastAPI:
include_router_with_global_prefix_prepended(application, long_term_logs_router)
include_router_with_global_prefix_prepended(application, api_key_router)
include_router_with_global_prefix_prepended(application, standard_oauth_router)
include_router_with_global_prefix_prepended(application, audio_router)
include_router_with_global_prefix_prepended(application, audio_admin_router)

if AUTH_TYPE == AuthType.DISABLED:
# Server logs this during auth setup verification step
Expand Down
1 change: 1 addition & 0 deletions backend/onyx/server/audio/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@

253 changes: 253 additions & 0 deletions backend/onyx/server/audio/api.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,253 @@
import tempfile
import os
from typing import Optional
from fastapi import APIRouter, Depends, File, UploadFile, HTTPException
from sqlalchemy.orm import Session

from onyx.auth.users import current_admin_user, current_user
from onyx.db.engine import get_session
from onyx.db.models import User
from onyx.key_value_store.factory import get_kv_store
from onyx.key_value_store.interface import KvKeyNotFoundError
from onyx.server.audio.models import (
AudioConfig,
STTConfigForm,
TranscriptionResponse,
TestEngineRequest,
TestEngineResponse,
)
from onyx.utils.logger import setup_logger

logger = setup_logger()

router = APIRouter(prefix="/audio")
admin_router = APIRouter(prefix="/admin/audio")

AUDIO_CONFIG_KEY = "audio_config"


def get_audio_config_from_store() -> AudioConfig:
"""Get audio configuration from key-value store"""
try:
kv_store = get_kv_store()
config_data = kv_store.load(AUDIO_CONFIG_KEY)
return AudioConfig.model_validate(config_data)
except KvKeyNotFoundError:
return AudioConfig(
stt=STTConfigForm(
engine="",
openai_api_key="",
openai_api_base_url="https://api.openai.com/v1",
whisper_model="whisper-1",
deepgram_api_key="",
azure_api_key="",
azure_region="",
)
)


def store_audio_config(config: AudioConfig) -> None:
"""Store audio configuration in key-value store"""
kv_store = get_kv_store()
kv_store.store(AUDIO_CONFIG_KEY, config.model_dump())


@admin_router.get("/config")
async def get_audio_config(user: User = Depends(current_admin_user)) -> AudioConfig:
"""Get current audio configuration"""
return get_audio_config_from_store()


@admin_router.post("/config")
async def update_audio_config(
config: AudioConfig, user: User = Depends(current_admin_user)
) -> None:
"""Update audio configuration"""
store_audio_config(config)


@admin_router.post("/test")
async def test_audio_engine(
request: TestEngineRequest, user: User = Depends(current_admin_user)
) -> TestEngineResponse:
"""Test audio engine configuration"""
config = get_audio_config_from_store()

try:
if request.engine == "openai":
if not config.stt.openai_api_key:
return TestEngineResponse(
success=False, message="OpenAI API key is required"
)
return TestEngineResponse(success=True, message="OpenAI configuration valid")

elif request.engine == "deepgram":
if not config.stt.deepgram_api_key:
return TestEngineResponse(
success=False, message="Deepgram API key is required"
)
return TestEngineResponse(success=True, message="Deepgram configuration valid")

elif request.engine == "azure":
if not config.stt.azure_api_key or not config.stt.azure_region:
return TestEngineResponse(
success=False, message="Azure API key and region are required"
)
return TestEngineResponse(success=True, message="Azure configuration valid")

elif request.engine == "web":
return TestEngineResponse(success=True, message="Web Speech API available")

else:
return TestEngineResponse(
success=False, message=f"Unknown engine: {request.engine}"
)

except Exception as e:
logger.error(f"Error testing audio engine {request.engine}: {e}")
return TestEngineResponse(
success=False, message=f"Error testing engine: {str(e)}"
)


async def transcribe_with_openai(
audio_file_path: str, config: STTConfigForm
) -> str:
"""Transcribe audio using OpenAI Whisper API"""
try:
import openai

client = openai.OpenAI(
api_key=config.openai_api_key,
base_url=config.openai_api_base_url,
)

with open(audio_file_path, "rb") as audio_file:
transcript = client.audio.transcriptions.create(
model=config.whisper_model,
file=audio_file,
)

return transcript.text
except Exception as e:
logger.error(f"OpenAI transcription error: {e}")
raise HTTPException(status_code=500, detail=f"OpenAI transcription failed: {str(e)}")


async def transcribe_with_deepgram(
audio_file_path: str, config: STTConfigForm
) -> str:
"""Transcribe audio using Deepgram API"""
try:
from deepgram import DeepgramClient, PrerecordedOptions

deepgram = DeepgramClient(config.deepgram_api_key)

with open(audio_file_path, "rb") as audio_file:
buffer_data = audio_file.read()

payload = {"buffer": buffer_data}
options = PrerecordedOptions(
model="nova-2",
smart_format=True,
)

response = deepgram.listen.prerecorded.v("1").transcribe_file(payload, options)

if response.results and response.results.channels:
transcript = response.results.channels[0].alternatives[0].transcript
return transcript
else:
raise Exception("No transcription results from Deepgram")

except Exception as e:
logger.error(f"Deepgram transcription error: {e}")
raise HTTPException(status_code=500, detail=f"Deepgram transcription failed: {str(e)}")


async def transcribe_with_azure(
audio_file_path: str, config: STTConfigForm
) -> str:
"""Transcribe audio using Azure Speech Services"""
try:
import azure.cognitiveservices.speech as speechsdk

speech_config = speechsdk.SpeechConfig(
subscription=config.azure_api_key,
region=config.azure_region
)
speech_config.speech_recognition_language = "en-US"

audio_config = speechsdk.audio.AudioConfig(filename=audio_file_path)
speech_recognizer = speechsdk.SpeechRecognizer(
speech_config=speech_config,
audio_config=audio_config
)

result = speech_recognizer.recognize_once()

if result.reason == speechsdk.ResultReason.RecognizedSpeech:
return result.text
elif result.reason == speechsdk.ResultReason.NoMatch:
raise Exception("No speech could be recognized")
elif result.reason == speechsdk.ResultReason.Canceled:
cancellation_details = result.cancellation_details
raise Exception(f"Speech recognition canceled: {cancellation_details.reason}")
else:
raise Exception("Unknown error in speech recognition")

except Exception as e:
logger.error(f"Azure transcription error: {e}")
raise HTTPException(status_code=500, detail=f"Azure transcription failed: {str(e)}")


@router.post("/transcribe")
async def transcribe_audio(
file: UploadFile = File(...), user: User = Depends(current_user)
) -> TranscriptionResponse:
"""Transcribe uploaded audio file"""
config = get_audio_config_from_store()

if not config.stt.engine:
raise HTTPException(
status_code=400,
detail="No STT engine configured. Please configure audio settings in admin panel."
)

if not file.content_type or not file.content_type.startswith("audio/"):
raise HTTPException(
status_code=400,
detail="Invalid file type. Please upload an audio file."
)

with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as temp_file:
try:
content = await file.read()
temp_file.write(content)
temp_file.flush()

if config.stt.engine == "openai":
transcript = await transcribe_with_openai(temp_file.name, config.stt)
elif config.stt.engine == "deepgram":
transcript = await transcribe_with_deepgram(temp_file.name, config.stt)
elif config.stt.engine == "azure":
transcript = await transcribe_with_azure(temp_file.name, config.stt)
else:
raise HTTPException(
status_code=400,
detail=f"Unsupported STT engine: {config.stt.engine}"
)

return TranscriptionResponse(text=transcript)

except HTTPException:
raise
except Exception as e:
logger.error(f"Transcription error: {e}")
raise HTTPException(
status_code=500,
detail=f"Transcription failed: {str(e)}"
)
finally:
if os.path.exists(temp_file.name):
os.unlink(temp_file.name)
29 changes: 29 additions & 0 deletions backend/onyx/server/audio/models.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
from pydantic import BaseModel
from typing import Optional


class STTConfigForm(BaseModel):
engine: str = ""
openai_api_key: str = ""
openai_api_base_url: str = "https://api.openai.com/v1"
whisper_model: str = "whisper-1"
deepgram_api_key: str = ""
azure_api_key: str = ""
azure_region: str = ""


class AudioConfig(BaseModel):
stt: STTConfigForm


class TranscriptionResponse(BaseModel):
text: str


class TestEngineRequest(BaseModel):
engine: str


class TestEngineResponse(BaseModel):
success: bool
message: Optional[str] = None
6 changes: 3 additions & 3 deletions deployment/docker_compose/docker-compose.dev.yml
Original file line number Diff line number Diff line change
Expand Up @@ -114,9 +114,9 @@ services:
- HARD_DELETE_CHATS=${HARD_DELETE_CHATS:-}

# Enables the use of bedrock models or IAM Auth
- AWS_ACCESS_KEY_ID=${AWS_ACCESS_KEY_ID:-}
- AWS_SECRET_ACCESS_KEY=${AWS_SECRET_ACCESS_KEY:-}
- AWS_REGION_NAME=${AWS_REGION_NAME:-}
- AWS_ACCESS_KEY_ID=${AWS_ACCESS_KEY_ID:-dummy_key}
- AWS_SECRET_ACCESS_KEY=${AWS_SECRET_ACCESS_KEY:-dummy_secret}
- AWS_REGION_NAME=${AWS_REGION_NAME:-us-east-1}

# Enterprise Edition only
- ENABLE_PAID_ENTERPRISE_EDITION_FEATURES=${ENABLE_PAID_ENTERPRISE_EDITION_FEATURES:-false}
Expand Down
Loading
Loading