daswer123 · abeiro · Jun 11, 2024 · Sep 2, 2024 · Sep 12, 2024 · Sep 12, 2024
diff --git a/.gitignore b/.gitignore
@@ -1,11 +1,10 @@
 dist
 venv
-speakers
 output
 test
 models
 xtts_api_server/models
 *.pyc
 xtts_api_server/RealtimeTTS/engines/coqui_engine_old.py
 xtts_models
-modules-xtts.txt
+modules-xtts.txt
diff --git a/XTTSv2_AIFF.ipynb b/XTTSv2_AIFF.ipynb
@@ -0,0 +1,116 @@
+{
+  "nbformat": 4,
+  "nbformat_minor": 0,
+  "metadata": {
+    "colab": {
+      "provenance": [],
+      "gpuType": "T4"
+    },
+    "kernelspec": {
+      "name": "python3",
+      "display_name": "Python 3"
+    },
+    "language_info": {
+      "name": "python"
+    },
+    "accelerator": "GPU"
+  },
+  "cells": [
+    {
+      "cell_type": "markdown",
+      "source": [
+        "**Run next block, restart session when asked and run same block until everything installs.**\n",
+        "\n",
+        "Last message should be like this (yes, ignore ERROR)\n",
+        "\n",
+        "\n",
+        "```\n",
+        "ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n",
+        "torchvision 0.19.0+cu121 requires torch==2.4.0, but you have torch 2.1.1+cu121 which is incompatible\n",
+        "Successfully installed torch-2.1.1+cu121 torchaudio-2.1.1+cu121 triton-2.1.0\n",
+        "\n",
+        "```\n",
+        "\n"
+      ],
+      "metadata": {
+        "id": "FmMMJppi2-Gh"
+      }
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "lT4ieCTnr_L7"
+      },
+      "outputs": [],
+      "source": [
+        "!apt-get update && apt-get install --no-install-recommends -y sox libsox-fmt-all curl wget gcc git git-lfs build-essential libaio-dev libsndfile1 ssh ffmpeg sshpass portaudio19-dev libportaudio2\n",
+        "!git clone https://github.com/abeiro/xtts-api-server\n",
+        "%cd xtts-api-server\n",
+        "!python3 -m pip install --upgrade pip wheel ninja virtualenv\n",
+        "!pip install setuptools==68.1.2\n",
+        "!pip install deepspeed\n",
+        "!pip install -r requirements.txt\n",
+        "!pip install torch==2.1.1+cu121 torchaudio==2.1.1+cu121 --index-url https://download.pytorch.org/whl/cu121"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "**Run this once to get model downloaded and deepspeed compiled. Once uvicorn starts, stop and proceed to next block**\n",
+        "\n",
+        "\n",
+        "\n"
+      ],
+      "metadata": {
+        "id": "u1Ir_s5U3ZcT"
+      }
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "!python3 -m xtts_api_server --listen --deepspeed"
+      ],
+      "metadata": {
+        "id": "Xs5dXNL43VFB"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "**Run and note cloudflare address. Wait for message:**\n",
+        "\n",
+        "```\n",
+        "INFO:     Uvicorn running on http://0.0.0.0:8020 (Press CTRL+C to quit)\n",
+        "\n",
+        "```\n",
+        "\n",
+        "Use http://generated-domain-name:80 in AI-FF conf.\n",
+        "\n"
+      ],
+      "metadata": {
+        "id": "xOowr61z3nT2"
+      }
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "#Run\n",
+        "!rm nohup.out\n",
+        "!wget -c https://github.com/cloudflare/cloudflared/releases/latest/download/cloudflared-linux-amd64 -O cloudflared-linux-amd64\n",
+        "!chmod +x cloudflared-linux-amd64\n",
+        "!nohup ./cloudflared-linux-amd64 tunnel --url http://localhost:8020 &\n",
+        "!sleep 8\n",
+        "!cat nohup.out\n",
+        "!python3 -m xtts_api_server --listen --deepspeed"
+      ],
+      "metadata": {
+        "id": "xuLmVnEW2PYM"
+      },
+      "execution_count": null,
+      "outputs": []
+    }
+  ]
+}
diff --git a/requirements.txt b/requirements.txt
@@ -1,18 +1,169 @@
+absl-py==2.1.0
+aiohappyeyeballs==2.4.4
+aiohttp==3.11.11
+aiosignal==1.3.2
+annotated-types==0.7.0
+anyascii==0.3.2
+anyio==4.7.0
+attrs==24.3.0
+audioread==3.0.1
+babel==2.16.0
+bangla==0.0.2
+blis==0.7.11
+bnnumerizer==0.0.2
+bnunicodenormalizer==0.1.7
+catalogue==2.0.10
+certifi==2024.12.14
+cffi==1.17.1
+charset-normalizer==3.4.1
+click==8.1.8
+cloudpathlib==0.16.0
+confection==0.1.5
+contourpy==1.3.1
+coqpit==0.0.17
+coqpit-config==0.1.2
+coqui-tts==0.24.1
+coqui-tts-trainer==0.2.0
+cutlet==0.4.0
+cycler==0.12.1
+cymem==2.0.10
+Cython==3.0.11
+dateparser==1.1.8
+decorator==5.1.1
+deepspeed==0.16.2
+docopt==0.6.2
+einops==0.8.0
+emoji==2.8.0
+encodec==0.1.1
+fastapi==0.115.6
+filelock==3.16.1
+fonttools==4.55.3
+frozenlist==1.5.0
+fsspec==2024.12.0
+fugashi==1.4.0
+g2pkk==0.1.2
+grpcio==1.68.1
+gruut==2.2.3
+gruut-ipa==0.13.0
+gruut_lang_de==2.0.1
+gruut_lang_en==2.0.1
+gruut_lang_es==2.0.1
+gruut_lang_fr==2.0.2
+h11==0.14.0
+hangul-romanize==0.1.0
+hjson==3.1.0
+huggingface-hub==0.27.0
+idna==3.10
+inflect==7.5.0
+jaconv==0.4.0
+jamo==0.4.1
+jieba==0.42.1
+Jinja2==3.1.5
+joblib==1.4.2
+jsonlines==1.2.0
+kiwisolver==1.4.8
+langcodes==3.5.0
+language_data==1.3.0
+lazy_loader==0.4
+librosa==0.10.2.post1
+llvmlite==0.43.0
+loguru==0.7.3
+marisa-trie==1.2.1
+Markdown==3.7
+MarkupSafe==3.0.2
+matplotlib==3.10.0
+mecab-python3==1.0.10
+mojimoji==0.0.13
+more-itertools==10.5.0
+mpmath==1.3.0
+msgpack==1.1.0
+multidict==6.1.0
+murmurhash==1.0.11
+networkx==2.8.8
+ninja==1.11.1.3
+nltk==3.8.1
+num2words==0.5.14
+numba==0.60.0
+numpy==1.26.4
+nvidia-cublas-cu12==12.4.5.8
+nvidia-cuda-cupti-cu12==12.4.127
+nvidia-cuda-nvrtc-cu12==12.4.127
+nvidia-cuda-runtime-cu12==12.4.127
+nvidia-cudnn-cu12==9.1.0.70
+nvidia-cufft-cu12==11.2.1.3
+nvidia-curand-cu12==10.3.5.147
+nvidia-cusolver-cu12==11.6.1.9
+nvidia-cusparse-cu12==12.3.1.170
+nvidia-nccl-cu12==2.21.5
+nvidia-nvjitlink-cu12==12.4.127
+nvidia-nvtx-cu12==12.4.127
+packaging==24.2
+pillow==11.0.0
+pip==24.3.1
+platformdirs==4.3.6
+pooch==1.8.2
+preshed==3.0.9
+propcache==0.2.1
+protobuf==5.29.2
+psutil==6.1.1
+py-cpuinfo==9.0.0
 PyAudio==0.2.14
-requests==2.31.0
+pycparser==2.22
+pydantic==2.10.4
+pydantic_core==2.27.2
+pydub==0.25.1
+pynndescent==0.5.13
+pyparsing==3.2.0
+pypinyin==0.53.0
+pysbd==0.3.4
+python-crfsuite==0.9.11
+python-dateutil==2.9.0.post0
+python-dotenv==1.0.1
+python-multipart==0.0.20
 pyttsx3==2.90
+pytz==2024.2
+PyYAML==6.0.2
+regex==2024.11.6
+requests==2.31.0
+safetensors==0.4.5
+scikit-learn==1.6.0
+scipy==1.14.1
+setuptools==75.6.0
+six==1.17.0
+smart-open==6.4.0
+sniffio==1.3.1
+soundfile==0.12.1
+soxr==0.5.0.post1
+spacy==3.7.2
+spacy-legacy==3.0.12
+spacy-loggers==1.0.5
+srsly==2.5.0
+stanza==1.6.1
+starlette==0.41.3
 stream2sentence==0.2.2
-fastapi>=0.104.1
-loguru
-pydantic
-pydub
-python-dotenv
-torch
-torchaudio
-uvicorn
-cutlet
-fugashi[unidic-lite]
-coqui-tts[languages]==0.24.1
+SudachiDict-core==20241021
+SudachiPy==0.6.9
+sympy==1.13.1
+tensorboard==2.18.0
+tensorboard-data-server==0.7.2
+thinc==8.2.5
+threadpoolctl==3.5.0
+tokenizers==0.15.2
+torch==2.5.1
+torchaudio==2.5.1
+tqdm==4.67.1
 transformers==4.36.2
-uuid
-spacy==3.7.2 
+triton==3.1.0
+typeguard==4.4.1
+typer==0.9.4
+typing_extensions==4.12.2
+tzlocal==5.2
+umap-learn==0.5.7
+unidic-lite==1.0.8
+urllib3==2.3.0
+uuid==1.30
+uvicorn==0.34.0
+wasabi==1.1.3
+weasel==0.3.4
+Werkzeug==3.1.3
+yarl==1.18.3
diff --git a/speakers/TheNarrator.wav b/speakers/TheNarrator.wav
diff --git a/xtts_api_server/server.py b/xtts_api_server/server.py
@@ -1,5 +1,5 @@
 from TTS.api import TTS
-from fastapi import BackgroundTasks, FastAPI, HTTPException, Request, Query
+from fastapi import BackgroundTasks, FastAPI, HTTPException, Request, Query, File, UploadFile
 from fastapi.middleware.cors import CORSMiddleware
 from fastapi.responses import FileResponse,StreamingResponse
 
@@ -203,6 +203,17 @@ def set_speaker_folder(speaker_req: SpeakerFolderRequest):
         logger.error(e)
         raise HTTPException(status_code=400, detail=str(e))
 
+@app.post("/upload_sample")
+async def upload_sample(wavFile: UploadFile = File(...)):
-async def upload_sample(wavFile: UploadFile = File(...)):
+async def upload_sample(wavFile: UploadFile = None):
+    if wavFile is None:
+        wavFile = File(...)
-async def upload_sample(wavFile: UploadFile = File(...)):
+async def upload_sample(wavFile: UploadFile = None):
+    if wavFile is None:
+        wavFile = File(...)
+
+    UPLOAD_DIR = XTTS.speaker_folder
+    os.makedirs(UPLOAD_DIR, exist_ok=True)
+    file_path = os.path.join(UPLOAD_DIR, wavFile.filename)
+    with open(file_path, "wb") as file_object:
+        file_object.write(await wavFile.read())
+    return {"filename": wavFile.filename}
+
+
 @app.post("/switch_model")
 def switch_model(modelReq: ModelNameRequest):
     try: