diff --git a/.gitignore b/.gitignore index 56ef947..b9ede6b 100644 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,5 @@ dist venv -speakers output test models @@ -8,4 +7,4 @@ xtts_api_server/models *.pyc xtts_api_server/RealtimeTTS/engines/coqui_engine_old.py xtts_models -modules-xtts.txt \ No newline at end of file +modules-xtts.txt diff --git a/XTTSv2_AIFF.ipynb b/XTTSv2_AIFF.ipynb new file mode 100644 index 0000000..5c9fb28 --- /dev/null +++ b/XTTSv2_AIFF.ipynb @@ -0,0 +1,116 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "provenance": [], + "gpuType": "T4" + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3" + }, + "language_info": { + "name": "python" + }, + "accelerator": "GPU" + }, + "cells": [ + { + "cell_type": "markdown", + "source": [ + "**Run next block, restart session when asked and run same block until everything installs.**\n", + "\n", + "Last message should be like this (yes, ignore ERROR)\n", + "\n", + "\n", + "```\n", + "ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n", + "torchvision 0.19.0+cu121 requires torch==2.4.0, but you have torch 2.1.1+cu121 which is incompatible\n", + "Successfully installed torch-2.1.1+cu121 torchaudio-2.1.1+cu121 triton-2.1.0\n", + "\n", + "```\n", + "\n" + ], + "metadata": { + "id": "FmMMJppi2-Gh" + } + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "lT4ieCTnr_L7" + }, + "outputs": [], + "source": [ + "!apt-get update && apt-get install --no-install-recommends -y sox libsox-fmt-all curl wget gcc git git-lfs build-essential libaio-dev libsndfile1 ssh ffmpeg sshpass portaudio19-dev libportaudio2\n", + "!git clone https://github.com/abeiro/xtts-api-server\n", + "%cd xtts-api-server\n", + "!python3 -m pip install --upgrade pip wheel ninja virtualenv\n", + "!pip install setuptools==68.1.2\n", + "!pip install deepspeed\n", + "!pip install -r requirements.txt\n", + "!pip install torch==2.1.1+cu121 torchaudio==2.1.1+cu121 --index-url https://download.pytorch.org/whl/cu121" + ] + }, + { + "cell_type": "markdown", + "source": [ + "**Run this once to get model downloaded and deepspeed compiled. Once uvicorn starts, stop and proceed to next block**\n", + "\n", + "\n", + "\n" + ], + "metadata": { + "id": "u1Ir_s5U3ZcT" + } + }, + { + "cell_type": "code", + "source": [ + "!python3 -m xtts_api_server --listen --deepspeed" + ], + "metadata": { + "id": "Xs5dXNL43VFB" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "**Run and note cloudflare address. Wait for message:**\n", + "\n", + "```\n", + "INFO: Uvicorn running on http://0.0.0.0:8020 (Press CTRL+C to quit)\n", + "\n", + "```\n", + "\n", + "Use http://generated-domain-name:80 in AI-FF conf.\n", + "\n" + ], + "metadata": { + "id": "xOowr61z3nT2" + } + }, + { + "cell_type": "code", + "source": [ + "#Run\n", + "!rm nohup.out\n", + "!wget -c https://github.com/cloudflare/cloudflared/releases/latest/download/cloudflared-linux-amd64 -O cloudflared-linux-amd64\n", + "!chmod +x cloudflared-linux-amd64\n", + "!nohup ./cloudflared-linux-amd64 tunnel --url http://localhost:8020 &\n", + "!sleep 8\n", + "!cat nohup.out\n", + "!python3 -m xtts_api_server --listen --deepspeed" + ], + "metadata": { + "id": "xuLmVnEW2PYM" + }, + "execution_count": null, + "outputs": [] + } + ] +} \ No newline at end of file diff --git a/conf.sh b/conf.sh new file mode 100755 index 0000000..96bbcd2 --- /dev/null +++ b/conf.sh @@ -0,0 +1,67 @@ +#!/bin/bash +clear +cat << EOF +CHIM XTTS + +This will install CHIM XTTS. This is a high quality TTS service that works with Skyrim voices. +You can also generate your own voices. +However it will require around 4GB of VRAM! + +Options: +* deepseed = Uses more VRAM but it is faster. Does not work with 50xx series GPUs. +* lowvram = Uses less VRAM but it is slower. +* regular = Middle ground of both options above. RECOMMENDED! + +If you are not sure use lowvram. + +EOF + +if [ ! -d /home/dwemer/python-tts ]; then + exit "XTTSv2 not installed" +fi + +mapfile -t files < <(find /home/dwemer/xtts-api-server/ -name "start-*.sh") +# Check if any files were found + +if [ ${#files[@]} -eq 0 ]; then + echo "No files found matching the pattern." + exit 1 +fi + +# Display the files in a numbered list +echo -e "Select a an option from the list:\n\n" +for i in "${!files[@]}"; do + echo "$((i+1)). ${files[$i]}" +done + +echo "0. Disable service"; +echo + +# Prompt the user to make a selection +read -p "Select an option by picking the matching number: " selection + +# Validate the input + +if [ "$selection" -eq "0" ]; then + echo "Disabling service. Run this again to enable" + rm /home/dwemer/xtts-api-server/start.sh &>/dev/null + exit 0 +fi + +if ! [[ "$selection" =~ ^[0-9]+$ ]] || [ "$selection" -lt 1 ] || [ "$selection" -gt ${#files[@]} ]; then + echo "Invalid selection." + exit 1 +fi + +# Get the selected file +selected_file="${files[$((selection-1))]}" + +echo "You selected: $selected_file" + +ln -sf $selected_file /home/dwemer/xtts-api-server/start.sh + + +# Ensure all start scripts are executable +chmod +x /home/dwemer/xtts-api-server/start-*.sh 2>/dev/null || true + + diff --git a/ddistro_install.sh b/ddistro_install.sh new file mode 100755 index 0000000..b4c2377 --- /dev/null +++ b/ddistro_install.sh @@ -0,0 +1,106 @@ +#!/bin/bash + +cd /home/dwemer/xtts-api-server +python3 -m venv /home/dwemer/python-tts +source /home/dwemer/python-tts/bin/activate + +echo "This will take a while so please wait." +# Clean previous deepspeed stuff +rm /home/dwemer/.cache/torch_extensions/py311_cu130/transformer_inference/* +# Ask user about GPU +read -p "Are you using a GT10XX series GPU? (yes/no): " gpu_answer +if [[ "$gpu_answer" =~ ^[Yy][Ee][Ss]$ || "$gpu_answer" =~ ^[Yy]$ ]]; then + cu_tag="cu118" + torch_url="https://download.pytorch.org/whl/${cu_tag}" + torch_ver="2.2.2" + torchaudio_ver="2.2.2" + python3 -m pip install --upgrade pip wheel ninja virtualenv + pip install setuptools==68.1.2 + # Install app requirements without auto-pulling torch/torchaudio from deps + pip install --no-deps -r requirements.txt + # Pin to stable, CUDA-tagged PyTorch/Torchaudio that do not require TorchCodec + pip cache purge || true + pip uninstall -y torch torchaudio torchcodec torchvision || true + pip install --no-deps --no-cache-dir --index-url "$torch_url" "torch==${torch_ver}+${cu_tag}" "torchaudio==${torchaudio_ver}+${cu_tag}" + pip check || true + # Ensure fallback audio loader is available + pip install --no-cache-dir soundfile + sed -i 's/checkpoint = load_fsspec(model_path, map_location=torch.device("cpu"))\["model"\]/checkpoint = load_fsspec(model_path, map_location=torch.device("cpu"), weights_only=False)["model"]/' /home/dwemer/python-tts/lib/python3.11/site-packages/TTS/tts/models/xtts.py + +else + read -p "New: Use CUDA13. Needs windows updated drivers. RTX 50XX should use this. Usey cuda13? (yes/no): " gpu5_answer + if [[ "$gpu5_answer" =~ ^[Yy][Ee][Ss]$ || "$gpu5_answer" =~ ^[Yy]$ ]]; then + cu_tag="cu130" + torch_url="https://download.pytorch.org/whl/${cu_tag}" + echo "Using torch: $torch_url" + python3 -m pip install --upgrade pip wheel ninja virtualenv + pip install setuptools==68.1.2 + # Install app requirements without auto-pulling torch/torchaudio from deps + #pip install --no-deps -r requirements5.txt --index-url=$torch_url + pip install -r requirements5.txt --extra-index-url=https://download.pytorch.org/whl/cu130 + # Pin to stable, CUDA-tagged PyTorch/Torchaudio that do not require TorchCodec + pip check || true + # Ensure fallback audio loader is available + pip install --no-cache-dir soundfile + # Fix symlinks + LIBDIR=$(python3 -c 'import site; print(site.getsitepackages()[0])')/nvidia/cu13/lib + for f in "$LIBDIR"/lib*.so.*; do + base=$(basename "$f") + link="${f%%.so.*}.so" + if [ ! -e "$link" ]; then + echo "Creating symlink: $(basename "$link") -> $base" + ln -s "$base" "$link" + fi + done + #pip install xtts-api-server #Fails + + else + cu_tag="cu128" + torch_url="https://download.pytorch.org/whl/${cu_tag}" + echo "Using torch: $torch_url" + python3 -m pip install --upgrade pip wheel ninja virtualenv + pip install setuptools==68.1.2 + # Install app requirements without auto-pulling torch/torchaudio from deps + pip install --no-deps -r requirements.txt + # Pin to stable, CUDA-tagged PyTorch/Torchaudio that do not require TorchCodec + pip cache purge || true + pip uninstall -y torch torchaudio torchcodec torchvision || true + #pip install --index-url "$torch_url" torch torchaudio torchcodec torchvision + pip install torch==2.7.0+cu128 torchaudio==2.7.0+cu128 torchvision==0.22.0+cu128 --index-url=https://download.pytorch.org/whl/ + pip check || true + # Ensure fallback audio loader is available + pip install --no-cache-dir soundfile + #pip install xtts-api-server #Fails + sed -i 's/checkpoint = load_fsspec(model_path, map_location=torch.device("cpu"))\["model"\]/checkpoint = load_fsspec(model_path, map_location=torch.device("cpu"), weights_only=False)["model"]/' /home/dwemer/python-tts/lib/python3.11/site-packages/TTS/tts/models/xtts.py + + fi +fi + + +cp /home/dwemer/TheNarrator.wav speakers/TheNarrator.wav + +source /home/dwemer/python-tts/bin/activate + +./conf.sh + +echo +echo "This will start CHIM XTTS to download the selected model" +echo "Wait for the message 'Uvicorn running on http://0.0.0.0:8020 (Press CTRL+C to quit)'" +echo "Then close this window. Press ENTER to continue" +read + +echo "please wait...." + +# Add CUDA to PATH if the directory exists +if [ -d "/home/dwemer/python-tts/lib/python3.11/site-packages/nvidia/cu13/lib/" ]; +then + export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/home/dwemer/python-tts/lib/python3.11/site-packages/nvidia/cu13/lib/ + export PATH=/home/dwemer/python-tts/lib/python3.11/site-packages/nvidia/cu13/bin:$PATH + export CUDA_HOME=/home/dwemer/python-tts/lib/python3.11/site-packages/nvidia/cu13 +fi + +readlink start.sh | grep -q '/home/dwemer/xtts-api-server/start-deepspeed.sh' && export DEEPSPEED="--deepspeed" || export DEEPSPEED="" + +python -m xtts_api_server --listen $DEEPSPEED + +echo "Press Enter" diff --git a/requirements.txt b/requirements.txt index 2943cca..22e4dac 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,18 +1,169 @@ +absl-py==2.1.0 +aiohappyeyeballs==2.4.4 +aiohttp==3.11.11 +aiosignal==1.3.2 +annotated-types==0.7.0 +anyascii==0.3.2 +anyio==4.7.0 +attrs==24.3.0 +audioread==3.0.1 +babel==2.16.0 +bangla==0.0.2 +blis==0.7.11 +bnnumerizer==0.0.2 +bnunicodenormalizer==0.1.7 +catalogue==2.0.10 +certifi==2024.12.14 +cffi==1.17.1 +charset-normalizer==3.4.1 +click==8.1.8 +cloudpathlib==0.16.0 +confection==0.1.5 +contourpy==1.3.1 +coqpit==0.0.17 +coqpit-config==0.1.2 +coqui-tts==0.24.1 +coqui-tts-trainer==0.2.0 +cutlet==0.4.0 +cycler==0.12.1 +cymem==2.0.10 +Cython==3.0.11 +dateparser==1.1.8 +decorator==5.1.1 +deepspeed==0.16.2 +docopt==0.6.2 +einops==0.8.0 +emoji==2.8.0 +encodec==0.1.1 +fastapi==0.115.6 +filelock==3.16.1 +fonttools==4.55.3 +frozenlist==1.5.0 +fsspec==2024.12.0 +fugashi==1.4.0 +g2pkk==0.1.2 +grpcio==1.68.1 +gruut==2.2.3 +gruut-ipa==0.13.0 +gruut_lang_de==2.0.1 +gruut_lang_en==2.0.1 +gruut_lang_es==2.0.1 +gruut_lang_fr==2.0.2 +h11==0.14.0 +hangul-romanize==0.1.0 +hjson==3.1.0 +huggingface-hub==0.27.0 +idna==3.10 +inflect==7.5.0 +jaconv==0.4.0 +jamo==0.4.1 +jieba==0.42.1 +Jinja2==3.1.5 +joblib==1.4.2 +jsonlines==1.2.0 +kiwisolver==1.4.8 +langcodes==3.5.0 +language_data==1.3.0 +lazy_loader==0.4 +librosa==0.10.2.post1 +llvmlite==0.43.0 +loguru==0.7.3 +marisa-trie==1.2.1 +Markdown==3.7 +MarkupSafe==3.0.2 +matplotlib==3.10.0 +mecab-python3==1.0.10 +mojimoji==0.0.13 +more-itertools==10.5.0 +mpmath==1.3.0 +msgpack==1.1.0 +multidict==6.1.0 +murmurhash==1.0.11 +networkx==2.8.8 +ninja==1.11.1.3 +nltk==3.8.1 +num2words==0.5.14 +numba==0.60.0 +numpy==1.26.4 +nvidia-cublas-cu12==12.4.5.8 +nvidia-cuda-cupti-cu12==12.4.127 +nvidia-cuda-nvrtc-cu12==12.4.127 +nvidia-cuda-runtime-cu12==12.4.127 +nvidia-cudnn-cu12==9.1.0.70 +nvidia-cufft-cu12==11.2.1.3 +nvidia-curand-cu12==10.3.5.147 +nvidia-cusolver-cu12==11.6.1.9 +nvidia-cusparse-cu12==12.3.1.170 +nvidia-nccl-cu12==2.21.5 +nvidia-nvjitlink-cu12==12.4.127 +nvidia-nvtx-cu12==12.4.127 +packaging==24.2 +pillow==11.0.0 +pip==24.3.1 +platformdirs==4.3.6 +pooch==1.8.2 +preshed==3.0.9 +propcache==0.2.1 +protobuf==5.29.2 +psutil==6.1.1 +py-cpuinfo==9.0.0 PyAudio==0.2.14 -requests==2.31.0 +pycparser==2.22 +pydantic==2.10.4 +pydantic_core==2.27.2 +pydub==0.25.1 +pynndescent==0.5.13 +pyparsing==3.2.0 +pypinyin==0.53.0 +pysbd==0.3.4 +python-crfsuite==0.9.11 +python-dateutil==2.9.0.post0 +python-dotenv==1.0.1 +python-multipart==0.0.20 pyttsx3==2.90 +pytz==2024.2 +PyYAML==6.0.2 +regex==2024.11.6 +requests==2.31.0 +safetensors==0.4.5 +scikit-learn==1.6.0 +scipy==1.14.1 +setuptools==75.6.0 +six==1.17.0 +smart-open==6.4.0 +sniffio==1.3.1 +soundfile==0.12.1 +soxr==0.5.0.post1 +spacy==3.7.2 +spacy-legacy==3.0.12 +spacy-loggers==1.0.5 +srsly==2.5.0 +stanza==1.6.1 +starlette==0.41.3 stream2sentence==0.2.2 -fastapi>=0.104.1 -loguru -pydantic -pydub -python-dotenv -torch -torchaudio -uvicorn -cutlet -fugashi[unidic-lite] -coqui-tts[languages]==0.24.1 +SudachiDict-core==20241021 +SudachiPy==0.6.9 +sympy==1.13.1 +tensorboard==2.18.0 +tensorboard-data-server==0.7.2 +thinc==8.2.5 +threadpoolctl==3.5.0 +tokenizers==0.15.2 +torch==2.5.1 +torchaudio==2.5.1 +tqdm==4.67.1 transformers==4.36.2 -uuid -spacy==3.7.2 \ No newline at end of file +triton==3.1.0 +typeguard==4.4.1 +typer==0.9.4 +typing_extensions==4.12.2 +tzlocal==5.2 +umap-learn==0.5.7 +unidic-lite==1.0.8 +urllib3==2.3.0 +uuid==1.30 +uvicorn==0.34.0 +wasabi==1.1.3 +weasel==0.3.4 +Werkzeug==3.1.3 +yarl==1.18.3 diff --git a/requirements5.txt b/requirements5.txt new file mode 100644 index 0000000..8eb33eb --- /dev/null +++ b/requirements5.txt @@ -0,0 +1,179 @@ +absl-py==2.1.0 +aiohappyeyeballs==2.4.4 +aiohttp==3.11.11 +aiosignal==1.3.2 +annotated-types==0.7.0 +anyascii==0.3.2 +anyio==4.7.0 +attrs==24.3.0 +audioread==3.0.1 +babel==2.16.0 +bangla==0.0.2 +blis==0.7.11 +bnnumerizer==0.0.2 +bnunicodenormalizer==0.1.7 +catalogue==2.0.10 +certifi==2024.12.14 +cffi==1.17.1 +charset-normalizer==3.4.1 +click==8.1.8 +cloudpathlib==0.16.0 +confection==0.1.5 +contourpy==1.3.1 +coqpit==0.0.17 +coqpit-config==0.1.2 +coqui-tts==0.25.3 +coqui-tts-trainer==0.2.0 +cutlet==0.4.0 +cycler==0.12.1 +cymem==2.0.10 +Cython==3.0.11 +dateparser==1.1.8 +decorator==5.1.1 +deepspeed==0.18.1 +distlib==0.4.0 +docopt==0.6.2 +einops==0.8.0 +emoji==2.8.0 +encodec==0.1.1 +fastapi==0.115.6 +filelock==3.16.1 +fonttools==4.55.3 +frozenlist==1.5.0 +fsspec==2024.12.0 +fugashi==1.4.0 +g2pkk==0.1.2 +grpcio==1.68.1 +gruut==2.4.0 +gruut-ipa==0.13.0 +gruut-lang-de==2.0.1 +gruut-lang-en==2.0.1 +gruut-lang-es==2.0.1 +gruut-lang-fr==2.0.2 +h11==0.14.0 +hangul-romanize==0.1.0 +hjson==3.1.0 +huggingface-hub==0.27.0 +idna==3.10 +inflect==7.5.0 +jaconv==0.4.0 +jamo==0.4.1 +jieba==0.42.1 +Jinja2==3.1.5 +joblib==1.4.2 +jsonlines==1.2.0 +kiwisolver==1.4.8 +langcodes==3.5.0 +language_data==1.3.0 +lazy_loader==0.4 +librosa==0.10.2.post1 +llvmlite==0.43.0 +loguru==0.7.3 +marisa-trie==1.2.1 +Markdown==3.7 +MarkupSafe==3.0.2 +matplotlib==3.10.0 +mecab-python3==1.0.10 +mojimoji==0.0.13 +monotonic-alignment-search==0.2.1 +more-itertools==10.5.0 +mpmath==1.3.0 +msgpack==1.1.0 +multidict==6.1.0 +murmurhash==1.0.11 +networkx==2.8.8 +ninja==1.11.1.3 +nltk==3.8.1 +num2words==0.5.14 +numba==0.60.0 +numpy==1.26.4 +nvidia-cublas==13.0.0.19 +nvidia-cuda-cupti==13.0.48 +nvidia-cuda-cupti-cu13==0.0.0a0 +nvidia-cuda-nvrtc==13.0.48 +nvidia-cuda-nvrtc-cu13==0.0.0a0 +nvidia-cuda-runtime==13.0.48 +nvidia-cuda-runtime-cu13==0.0.0a0 +nvidia-cudnn-cu13==9.13.0.50 +nvidia-cufile==1.15.0.42 +nvidia-curand==10.4.0.35 +nvidia-cusparselt-cu13==0.8.0 +nvidia-nccl-cu13==2.27.7 +nvidia-nvjitlink==13.0.39 +nvidia-nvshmem-cu13==3.3.24 +nvidia-nvtx==13.0.39 +packaging==24.2 +pillow==11.0.0 +platformdirs==4.3.6 +pooch==1.8.2 +preshed==3.0.9 +propcache==0.2.1 +protobuf==5.29.2 +psutil==6.1.1 +py-cpuinfo==9.0.0 +PyAudio==0.2.14 +pycparser==2.22 +pydantic==2.10.4 +pydantic_core==2.27.2 +pydub==0.25.1 +pynndescent==0.5.13 +pyparsing==3.2.0 +pypinyin==0.53.0 +pysbd==0.3.4 +python-crfsuite==0.9.11 +python-dateutil==2.9.0.post0 +python-dotenv==1.0.1 +python-multipart==0.0.20 +pyttsx3==2.90 +pytz==2024.2 +PyYAML==6.0.2 +regex==2024.11.6 +requests==2.31.0 +safetensors==0.4.5 +scikit-learn==1.6.0 +scipy==1.14.1 +six==1.17.0 +smart-open==6.4.0 +sniffio==1.3.1 +soundfile==0.12.1 +soxr==0.5.0.post1 +spacy==3.7.2 +spacy-legacy==3.0.12 +spacy-loggers==1.0.5 +srsly==2.5.0 +stanza==1.6.1 +starlette==0.41.3 +stream2sentence==0.2.2 +SudachiDict-core==20241021 +SudachiPy==0.6.9 +sympy==1.14.0 +tensorboard==2.18.0 +tensorboard-data-server==0.7.2 +thinc==8.2.5 +threadpoolctl==3.5.0 +tokenizers==0.20.3 +tqdm==4.67.1 +transformers==4.46.2 +triton==3.5.0 +typeguard==4.4.1 +typer==0.9.4 +typing_extensions==4.12.2 +tzlocal==5.2 +umap-learn==0.5.7 +unidic-lite==1.0.8 +urllib3==2.3.0 +uuid==1.30 +uvicorn==0.34.0 +virtualenv==20.35.3 +wasabi==1.1.3 +weasel==0.3.4 +Werkzeug==3.1.3 +yarl==1.18.3 +torch==2.9.0+cu130 +torchaudio==2.9.0+cu130 +torchcodec==0.8.0 +torchvision==0.24.0+cu130 +nvidia-npp==13.0.1.2 +nvidia-cuda-nvcc +nvidia-cuda-profiler-api + diff --git a/speakers/TheNarrator.wav b/speakers/TheNarrator.wav new file mode 100644 index 0000000..70f5581 Binary files /dev/null and b/speakers/TheNarrator.wav differ diff --git a/start-deepspeed.sh b/start-deepspeed.sh new file mode 100755 index 0000000..2e475d6 --- /dev/null +++ b/start-deepspeed.sh @@ -0,0 +1,22 @@ +#!/bin/bash +# Add CUDA to PATH if the directory exists +if [ -d "/usr/local/cuda-12.8/bin" ]; then + export PATH="/usr/local/cuda-12.8/bin:$PATH" +fi + +cd /home/dwemer/xtts-api-server/ + +source /home/dwemer/python-tts/bin/activate + +# Add CUDA to PATH if the directory exists +if [ -d "/home/dwemer/python-tts/lib/python3.11/site-packages/nvidia/cu13/lib/" ]; +then + export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/home/dwemer/python-tts/lib/python3.11/site-packages/nvidia/cu13/lib/ + export PATH=/home/dwemer/python-tts/lib/python3.11/site-packages/nvidia/cu13/bin:$PATH + export CUDA_HOME=/home/dwemer/python-tts/lib/python3.11/site-packages/nvidia/cu13 +fi + +python -m xtts_api_server --deepspeed --listen &> log.txt & + + + diff --git a/start-lowvram.sh b/start-lowvram.sh new file mode 100755 index 0000000..bc6d4c1 --- /dev/null +++ b/start-lowvram.sh @@ -0,0 +1,19 @@ +#!/bin/bash + +# Add CUDA to PATH if the directory exists (matches deepspeed launcher) +if [ -d "/usr/local/cuda-12.8/bin" ]; then + export PATH="/usr/local/cuda-12.8/bin:$PATH" +fi + +cd /home/dwemer/xtts-api-server/ + +source /home/dwemer/python-tts/bin/activate + +date > log.txt + +export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/home/dwemer/python-tts/lib/python3.11/site-packages/nvidia/cu13/lib/ + +python -m xtts_api_server --listen --lowvram &>> log.txt & + + + diff --git a/start-regular.sh b/start-regular.sh new file mode 100755 index 0000000..b6adf5f --- /dev/null +++ b/start-regular.sh @@ -0,0 +1,19 @@ +#!/bin/bash + +# Add CUDA to PATH if the directory exists (matches deepspeed launcher) +if [ -d "/usr/local/cuda-12.8/bin" ]; then + export PATH="/usr/local/cuda-12.8/bin:$PATH" +fi + +cd /home/dwemer/xtts-api-server/ + +source /home/dwemer/python-tts/bin/activate + +export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/home/dwemer/python-tts/lib/python3.11/site-packages/nvidia/cu13/lib/ + +date > log.txt + +python -m xtts_api_server --listen &>> log.txt & + + + diff --git a/xtts_api_server/server.py b/xtts_api_server/server.py index 6b2af02..c477f9e 100644 --- a/xtts_api_server/server.py +++ b/xtts_api_server/server.py @@ -1,5 +1,5 @@ from TTS.api import TTS -from fastapi import BackgroundTasks, FastAPI, HTTPException, Request, Query +from fastapi import BackgroundTasks, FastAPI, HTTPException, Request, Query, File, UploadFile from fastapi.middleware.cors import CORSMiddleware from fastapi.responses import FileResponse,StreamingResponse @@ -203,6 +203,17 @@ def set_speaker_folder(speaker_req: SpeakerFolderRequest): logger.error(e) raise HTTPException(status_code=400, detail=str(e)) +@app.post("/upload_sample") +async def upload_sample(wavFile: UploadFile = File(...)): + + UPLOAD_DIR = XTTS.speaker_folder + os.makedirs(UPLOAD_DIR, exist_ok=True) + file_path = os.path.join(UPLOAD_DIR, wavFile.filename) + with open(file_path, "wb") as file_object: + file_object.write(await wavFile.read()) + return {"filename": wavFile.filename} + + @app.post("/switch_model") def switch_model(modelReq: ModelNameRequest): try: