Merge pull request #3 from timhagel/cleanup

timhagel · web-flow · commit fd6143369b7c · 2024-06-26T21:22:01.000-07:00
Cleanup
diff --git a/Dockerfile b/Dockerfile
@@ -1,10 +1,11 @@
 FROM python:3.9
+COPY requirements.txt .
+RUN pip install -r requirements.txt
 RUN git clone https://github.com/myshell-ai/MeloTTS.git
 WORKDIR /MeloTTS
-RUN pip install -e .
+RUN pip install --no-cache-dir -e .
 RUN python -m unidic download
 WORKDIR /
 COPY . .
-RUN pip install -r requirements.txt
 EXPOSE 8080
 CMD ["python", "app.py"]
diff --git a/app.py b/app.py
@@ -8,35 +8,48 @@
 import tempfile
 
 load_dotenv()
-DEFAULT_SPEED = float(os.getenv('DEFAULT_SPEED'))
-DEFAULT_LANGUAGE = os.getenv('DEFAULT_LANGUAGE')
-DEFAULT_SPEAKER_ID = os.getenv('DEFAULT_SPEAKER_ID')
-device = 'auto' # Will automatically use GPU if available
+DEFAULT_SPEED = float(os.getenv("DEFAULT_SPEED"))
+DEFAULT_LANGUAGE = os.getenv("DEFAULT_LANGUAGE")
+DEFAULT_SPEAKER_ID = os.getenv("DEFAULT_SPEAKER_ID")
+device = "auto"  # Will automatically use GPU if available
+
 
 class TextModel(BaseModel):
     text: str
     speed: float = DEFAULT_SPEED
     language: str = DEFAULT_LANGUAGE
     speaker_id: str = DEFAULT_SPEAKER_ID
 
+
 app = FastAPI()
 
+
 def get_tts_model(body: TextModel):
     return TTS(language=body.language, device=device)
 
+
 @app.post("/convert/tts")
-async def create_upload_file(body: TextModel = Body(...), model: TTS = Depends(get_tts_model)):
+async def create_upload_file(
+    body: TextModel = Body(...), model: TTS = Depends(get_tts_model)
+):
     speaker_ids = model.hps.data.spk2id
 
-    # Create a temporary file
-    output_path = body.language + "_" + body.speaker_id + ".wav"
-    model.tts_to_file(body.text, speaker_ids[body.speaker_id], output_path, speed=body.speed)
+    print(os.path.basename(body.text))
+
+    # Use a temporary file
+    with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp:
+        output_path = tmp.name
+        model.tts_to_file(
+            body.text, speaker_ids[body.speaker_id], output_path, speed=body.speed
+        )
 
-    print(os.path.basename(output_path))
-    # Return the audio file
-    response = FileResponse(output_path, media_type="audio/mpeg", filename=os.path.basename(output_path))
+        # Return the audio file, ensure the file is not deleted until after the response is sent
+        response = FileResponse(
+            output_path, media_type="audio/mpeg", filename=os.path.basename(output_path)
+        )
 
     return response
 
+
 if __name__ == "__main__":
     uvicorn.run(app, host="0.0.0.0", port=8080)
diff --git a/requirements.txt b/requirements.txt
@@ -1,2 +1,168 @@
-fastapi[all] == 0.110.0
-python-dotenv == 1.0.0
+absl-py==2.1.0
+aiofiles==23.2.1
+altair==5.2.0
+annotated-types==0.6.0
+anyascii==0.3.2
+anyio==4.3.0
+attrs==23.2.0
+audioread==3.0.1
+Babel==2.14.0
+boto3==1.34.64
+botocore==1.34.64
+cached_path==1.6.2
+cachetools==5.3.3
+certifi==2024.2.2
+cffi==1.16.0
+charset-normalizer==3.3.2
+click==8.1.7
+cn2an==0.5.22
+colorama==0.4.6
+contourpy==1.2.0
+cycler==0.12.1
+dateparser==1.1.8
+decorator==5.1.1
+Deprecated==1.2.14
+Distance==0.1.3
+dnspython==2.6.1
+docopt==0.6.2
+email_validator==2.1.1
+eng-to-ipa==0.0.2
+exceptiongroup==1.2.0
+fastapi==0.110.0
+ffmpy==0.3.2
+filelock==3.13.1
+fonttools==4.50.0
+fsspec==2024.3.0
+fugashi==1.3.0
+g2p-en==2.1.0
+g2pkk==0.1.2
+google-api-core==2.17.1
+google-auth==2.28.2
+google-cloud-core==2.4.1
+google-cloud-storage==2.15.0
+google-crc32c==1.5.0
+google-resumable-media==2.7.0
+googleapis-common-protos==1.63.0
+gradio==4.21.0
+gradio_client==0.12.0
+grpcio==1.62.1
+gruut==2.2.3
+gruut-ipa==0.13.0
+gruut-lang-de==2.0.0
+gruut-lang-en==2.0.0
+gruut-lang-es==2.0.0
+gruut-lang-fr==2.0.2
+h11==0.14.0
+httpcore==1.0.4
+httptools==0.6.1
+httpx==0.27.0
+huggingface-hub==0.21.4
+idna==3.6
+importlib_metadata==7.0.2
+importlib_resources==6.3.1
+inflect==7.0.0
+itsdangerous==2.1.2
+jaconv==0.3.4
+jamo==0.4.1
+jieba==0.42.1
+Jinja2==3.1.3
+jmespath==1.0.1
+joblib==1.3.2
+jsonlines==1.2.0
+jsonschema==4.21.1
+jsonschema-specifications==2023.12.1
+kiwisolver==1.4.5
+langid==1.1.6
+librosa==0.9.1
+llvmlite==0.42.0
+loguru==0.7.2
+Markdown==3.6
+markdown-it-py==3.0.0
+MarkupSafe==2.1.5
+matplotlib==3.8.3
+mdurl==0.1.2
+mecab-python3==1.0.5
+networkx==2.8.8
+nltk==3.8.1
+num2words==0.5.12
+numba==0.59.0
+numpy==1.26.4
+nvidia-cublas-cu11==11.10.3.66
+nvidia-cuda-nvrtc-cu11==11.7.99
+nvidia-cuda-runtime-cu11==11.7.99
+nvidia-cudnn-cu11==8.5.0.96
+orjson==3.9.15
+packaging==24.0
+pandas==2.2.1
+pillow==10.2.0
+pip==23.0.1
+plac==1.4.3
+platformdirs==4.2.0
+pooch==1.8.1
+proces==0.1.7
+protobuf==4.25.3
+pyasn1==0.5.1
+pyasn1-modules==0.3.0
+pycparser==2.21
+pydantic==2.6.4
+pydantic_core==2.16.3
+pydantic-extra-types==2.6.0
+pydantic-settings==2.2.1
+pydub==0.25.1
+Pygments==2.17.2
+pykakasi==2.2.1
+pyparsing==3.1.2
+pypinyin==0.50.0
+python-crfsuite==0.9.10
+python-dateutil==2.9.0.post0
+python-dotenv==1.0.0
+python-multipart==0.0.9
+pytz==2024.1
+PyYAML==6.0.1
+referencing==0.34.0
+regex==2023.12.25
+requests==2.31.0
+resampy==0.4.3
+rich==13.7.1
+rpds-py==0.18.0
+rsa==4.9
+ruff==0.3.3
+s3transfer==0.10.1
+scikit-learn==1.4.1.post1
+scipy==1.12.0
+semantic-version==2.10.0
+setuptools==58.1.0
+shellingham==1.5.4
+six==1.16.0
+sniffio==1.3.1
+soundfile==0.12.1
+starlette==0.36.3
+tensorboard==2.16.2
+tensorboard-data-server==0.7.2
+threadpoolctl==3.3.0
+tokenizers==0.13.3
+tomlkit==0.12.0
+toolz==0.12.1
+torch==1.13.1
+torchaudio==0.13.1
+tqdm==4.66.2
+transformers==4.27.4
+txtsplit==1.0.0
+typer==0.9.0
+typing_extensions==4.10.0
+tzdata==2024.1
+tzlocal==5.2
+ujson==5.9.0
+Unidecode==1.3.7
+unidic==1.1.0
+unidic-lite==1.0.8
+urllib3==1.26.18
+uvicorn==0.28.0
+uvloop==0.19.0
+wasabi==0.10.1
+watchfiles==0.21.0
+websockets==11.0.3
+Werkzeug==3.0.1
+wheel==0.43.0
+wrapt==1.16.0
+zipp==3.18.1