Skip to content

Commit fd61433

Browse files
authored
Merge pull request #3 from timhagel/cleanup
Cleanup
2 parents e29ebd5 + 0628d5e commit fd61433

File tree

3 files changed

+195
-15
lines changed

3 files changed

+195
-15
lines changed

Dockerfile

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,11 @@
11
FROM python:3.9
2+
COPY requirements.txt .
3+
RUN pip install -r requirements.txt
24
RUN git clone https://github.com/myshell-ai/MeloTTS.git
35
WORKDIR /MeloTTS
4-
RUN pip install -e .
6+
RUN pip install --no-cache-dir -e .
57
RUN python -m unidic download
68
WORKDIR /
79
COPY . .
8-
RUN pip install -r requirements.txt
910
EXPOSE 8080
1011
CMD ["python", "app.py"]

app.py

Lines changed: 24 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -8,35 +8,48 @@
88
import tempfile
99

1010
load_dotenv()
11-
DEFAULT_SPEED = float(os.getenv('DEFAULT_SPEED'))
12-
DEFAULT_LANGUAGE = os.getenv('DEFAULT_LANGUAGE')
13-
DEFAULT_SPEAKER_ID = os.getenv('DEFAULT_SPEAKER_ID')
14-
device = 'auto' # Will automatically use GPU if available
11+
DEFAULT_SPEED = float(os.getenv("DEFAULT_SPEED"))
12+
DEFAULT_LANGUAGE = os.getenv("DEFAULT_LANGUAGE")
13+
DEFAULT_SPEAKER_ID = os.getenv("DEFAULT_SPEAKER_ID")
14+
device = "auto" # Will automatically use GPU if available
15+
1516

1617
class TextModel(BaseModel):
1718
text: str
1819
speed: float = DEFAULT_SPEED
1920
language: str = DEFAULT_LANGUAGE
2021
speaker_id: str = DEFAULT_SPEAKER_ID
2122

23+
2224
app = FastAPI()
2325

26+
2427
def get_tts_model(body: TextModel):
2528
return TTS(language=body.language, device=device)
2629

30+
2731
@app.post("/convert/tts")
28-
async def create_upload_file(body: TextModel = Body(...), model: TTS = Depends(get_tts_model)):
32+
async def create_upload_file(
33+
body: TextModel = Body(...), model: TTS = Depends(get_tts_model)
34+
):
2935
speaker_ids = model.hps.data.spk2id
3036

31-
# Create a temporary file
32-
output_path = body.language + "_" + body.speaker_id + ".wav"
33-
model.tts_to_file(body.text, speaker_ids[body.speaker_id], output_path, speed=body.speed)
37+
print(os.path.basename(body.text))
38+
39+
# Use a temporary file
40+
with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp:
41+
output_path = tmp.name
42+
model.tts_to_file(
43+
body.text, speaker_ids[body.speaker_id], output_path, speed=body.speed
44+
)
3445

35-
print(os.path.basename(output_path))
36-
# Return the audio file
37-
response = FileResponse(output_path, media_type="audio/mpeg", filename=os.path.basename(output_path))
46+
# Return the audio file, ensure the file is not deleted until after the response is sent
47+
response = FileResponse(
48+
output_path, media_type="audio/mpeg", filename=os.path.basename(output_path)
49+
)
3850

3951
return response
4052

53+
4154
if __name__ == "__main__":
4255
uvicorn.run(app, host="0.0.0.0", port=8080)

requirements.txt

Lines changed: 168 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,168 @@
1-
fastapi[all] == 0.110.0
2-
python-dotenv == 1.0.0
1+
absl-py==2.1.0
2+
aiofiles==23.2.1
3+
altair==5.2.0
4+
annotated-types==0.6.0
5+
anyascii==0.3.2
6+
anyio==4.3.0
7+
attrs==23.2.0
8+
audioread==3.0.1
9+
Babel==2.14.0
10+
boto3==1.34.64
11+
botocore==1.34.64
12+
cached_path==1.6.2
13+
cachetools==5.3.3
14+
certifi==2024.2.2
15+
cffi==1.16.0
16+
charset-normalizer==3.3.2
17+
click==8.1.7
18+
cn2an==0.5.22
19+
colorama==0.4.6
20+
contourpy==1.2.0
21+
cycler==0.12.1
22+
dateparser==1.1.8
23+
decorator==5.1.1
24+
Deprecated==1.2.14
25+
Distance==0.1.3
26+
dnspython==2.6.1
27+
docopt==0.6.2
28+
email_validator==2.1.1
29+
eng-to-ipa==0.0.2
30+
exceptiongroup==1.2.0
31+
fastapi==0.110.0
32+
ffmpy==0.3.2
33+
filelock==3.13.1
34+
fonttools==4.50.0
35+
fsspec==2024.3.0
36+
fugashi==1.3.0
37+
g2p-en==2.1.0
38+
g2pkk==0.1.2
39+
google-api-core==2.17.1
40+
google-auth==2.28.2
41+
google-cloud-core==2.4.1
42+
google-cloud-storage==2.15.0
43+
google-crc32c==1.5.0
44+
google-resumable-media==2.7.0
45+
googleapis-common-protos==1.63.0
46+
gradio==4.21.0
47+
gradio_client==0.12.0
48+
grpcio==1.62.1
49+
gruut==2.2.3
50+
gruut-ipa==0.13.0
51+
gruut-lang-de==2.0.0
52+
gruut-lang-en==2.0.0
53+
gruut-lang-es==2.0.0
54+
gruut-lang-fr==2.0.2
55+
h11==0.14.0
56+
httpcore==1.0.4
57+
httptools==0.6.1
58+
httpx==0.27.0
59+
huggingface-hub==0.21.4
60+
idna==3.6
61+
importlib_metadata==7.0.2
62+
importlib_resources==6.3.1
63+
inflect==7.0.0
64+
itsdangerous==2.1.2
65+
jaconv==0.3.4
66+
jamo==0.4.1
67+
jieba==0.42.1
68+
Jinja2==3.1.3
69+
jmespath==1.0.1
70+
joblib==1.3.2
71+
jsonlines==1.2.0
72+
jsonschema==4.21.1
73+
jsonschema-specifications==2023.12.1
74+
kiwisolver==1.4.5
75+
langid==1.1.6
76+
librosa==0.9.1
77+
llvmlite==0.42.0
78+
loguru==0.7.2
79+
Markdown==3.6
80+
markdown-it-py==3.0.0
81+
MarkupSafe==2.1.5
82+
matplotlib==3.8.3
83+
mdurl==0.1.2
84+
mecab-python3==1.0.5
85+
networkx==2.8.8
86+
nltk==3.8.1
87+
num2words==0.5.12
88+
numba==0.59.0
89+
numpy==1.26.4
90+
nvidia-cublas-cu11==11.10.3.66
91+
nvidia-cuda-nvrtc-cu11==11.7.99
92+
nvidia-cuda-runtime-cu11==11.7.99
93+
nvidia-cudnn-cu11==8.5.0.96
94+
orjson==3.9.15
95+
packaging==24.0
96+
pandas==2.2.1
97+
pillow==10.2.0
98+
pip==23.0.1
99+
plac==1.4.3
100+
platformdirs==4.2.0
101+
pooch==1.8.1
102+
proces==0.1.7
103+
protobuf==4.25.3
104+
pyasn1==0.5.1
105+
pyasn1-modules==0.3.0
106+
pycparser==2.21
107+
pydantic==2.6.4
108+
pydantic_core==2.16.3
109+
pydantic-extra-types==2.6.0
110+
pydantic-settings==2.2.1
111+
pydub==0.25.1
112+
Pygments==2.17.2
113+
pykakasi==2.2.1
114+
pyparsing==3.1.2
115+
pypinyin==0.50.0
116+
python-crfsuite==0.9.10
117+
python-dateutil==2.9.0.post0
118+
python-dotenv==1.0.0
119+
python-multipart==0.0.9
120+
pytz==2024.1
121+
PyYAML==6.0.1
122+
referencing==0.34.0
123+
regex==2023.12.25
124+
requests==2.31.0
125+
resampy==0.4.3
126+
rich==13.7.1
127+
rpds-py==0.18.0
128+
rsa==4.9
129+
ruff==0.3.3
130+
s3transfer==0.10.1
131+
scikit-learn==1.4.1.post1
132+
scipy==1.12.0
133+
semantic-version==2.10.0
134+
setuptools==58.1.0
135+
shellingham==1.5.4
136+
six==1.16.0
137+
sniffio==1.3.1
138+
soundfile==0.12.1
139+
starlette==0.36.3
140+
tensorboard==2.16.2
141+
tensorboard-data-server==0.7.2
142+
threadpoolctl==3.3.0
143+
tokenizers==0.13.3
144+
tomlkit==0.12.0
145+
toolz==0.12.1
146+
torch==1.13.1
147+
torchaudio==0.13.1
148+
tqdm==4.66.2
149+
transformers==4.27.4
150+
txtsplit==1.0.0
151+
typer==0.9.0
152+
typing_extensions==4.10.0
153+
tzdata==2024.1
154+
tzlocal==5.2
155+
ujson==5.9.0
156+
Unidecode==1.3.7
157+
unidic==1.1.0
158+
unidic-lite==1.0.8
159+
urllib3==1.26.18
160+
uvicorn==0.28.0
161+
uvloop==0.19.0
162+
wasabi==0.10.1
163+
watchfiles==0.21.0
164+
websockets==11.0.3
165+
Werkzeug==3.0.1
166+
wheel==0.43.0
167+
wrapt==1.16.0
168+
zipp==3.18.1

0 commit comments

Comments
 (0)