Skip to content

Commit fc033f1

Browse files
committed
custom models, text file as input and cache results
1 parent 2576f3e commit fc033f1

File tree

7 files changed

+151
-35
lines changed

7 files changed

+151
-35
lines changed

README.md

+8-3
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,7 @@ docker compose up # or with -d to run in background
7272
Use the `--deepspeed` flag to process the result fast ( 2-3x acceleration )
7373

7474
```
75-
usage: xtts_api_server [-h] [-hs HOST] [-p PORT] [-sf SPEAKER_FOLDER] [-o OUTPUT] [-t TUNNEL_URL] [-ms MODEL_SOURCE] [--lowvram] [--deepspeed] [--streaming-mode] [--stream-play-sync]
75+
usage: xtts_api_server [-h] [-hs HOST] [-p PORT] [-sf SPEAKER_FOLDER] [-o OUTPUT] [-t TUNNEL_URL] [-ms MODEL_SOURCE] [--use-cache] [--lowvram] [--deepspeed] [--streaming-mode] [--stream-play-sync]
7676
7777
Run XTTSv2 within a FastAPI application
7878
@@ -85,14 +85,19 @@ options:
8585
-o OUTPUT, --output Output folder
8686
-t TUNNEL_URL, --tunnel URL of tunnel used (e.g: ngrok, localtunnel)
8787
-ms MODEL_SOURCE, --model-source ["api","apiManual","local"]
88-
-v MODEL_VERSION, --version You can choose any version of the model, keep in mind that if you choose model-source api, only the latest version will be loaded
88+
-v MODEL_VERSION, --version You can download the official model or your own model, official version you can find [here](https://huggingface.co/coqui/XTTS-v2/tree/main) the model version name is the same as the branch name [v2.0.2,v2.0.3, main] etc.
89+
--use-cache Enables caching of results, your results will be saved and if there will be a repeated request, you will get a file instead of generation
8990
--lowvram The mode in which the model will be stored in RAM and when the processing will move to VRAM, the difference in speed is small
9091
--deepspeed allows you to speed up processing by several times, automatically downloads the necessary libraries
9192
--streaming-mode Enables streaming mode, currently has certain limitations, as described below.
9293
--streaming-mode-improve Enables streaming mode, includes an improved streaming mode that consumes 2gb more VRAM and uses a better tokenizer and more context.
9394
--stream-play-sync Additional flag for streaming mod that allows you to play all audio one at a time without interruption
9495
```
9596

97+
You can specify the path to the file as text, then the path counts and the file will be voiced
98+
99+
You can load your own model, for this you need to create a folder in models and load the model with configs, note in the folder should be 3 files `config.json` `vocab.json` `model.pth`
100+
96101
If you want your host to listen, use -hs 0.0.0.0
97102

98103
The -t or --tunnel flag is needed so that when you get speakers via get you get the correct link to hear the preview. More info [here](https://imgur.com/a/MvpFT59)
@@ -103,7 +108,7 @@ Model-source defines in which format you want to use xtts:
103108
2. `apiManual` - loads version 2.0.2 by default, but you can specify the version via the -v flag, model saves into the models folder and uses the `tts_to_file` function from the TTS api
104109
3. `api` - will load the latest version of the model. The -v flag won't work.
105110

106-
All versions of the XTTSv2 model can be found [here](https://huggingface.co/coqui/XTTS-v2/tree/v2.0.2) in the branches
111+
All versions of the XTTSv2 model can be found [here](https://huggingface.co/coqui/XTTS-v2/tree/main) the model version name is the same as the branch name [v2.0.2,v2.0.3, main] etc.
107112

108113
The first time you run or generate, you may need to confirm that you agree to use XTTS.
109114

pyproject.toml

+1-1
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
44

55
[project]
66
name = "xtts-api-server"
7-
version = "0.6.8"
7+
version = "0.7.0"
88
authors = [
99
{ name="daswer123", email="daswerq123@gmail.com" },
1010
]

xtts_api_server/RealtimeTTS/engines/coqui_engine.py

+19-8
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
from xtts_api_server.tts_funcs import official_model_list
12
from torch.multiprocessing import Process, Pipe, Event, set_start_method
23
from .base_engine import BaseEngine
34
from typing import Union, List
@@ -92,7 +93,16 @@ def __init__(self,
9293
ModelManager().download_model(model_name)
9394
else:
9495
logging.info(f"Local XTTS Model: \"{specific_model}\" specified")
95-
self.local_model_path = self.download_model(specific_model, local_models_path)
96+
is_official_model = False
97+
for model in official_model_list:
98+
if self.specific_model == model:
99+
is_official_model = True
100+
break
101+
102+
if is_official_model:
103+
self.local_model_path = self.download_model(specific_model, local_models_path)
104+
else:
105+
self.local_model_path = os.path.join(local_models_path,specific_model)
96106

97107
self.synthesize_process = Process(target=CoquiEngine._synthesize_worker, args=(child_synthesize_pipe, model_name, cloning_reference_wav, language, self.main_synthesize_ready_event, level, self.speed, thread_count, stream_chunk_size, full_sentences, overlap_wav_len, temperature, length_penalty, repetition_penalty, top_k, top_p, enable_text_splitting, use_mps, self.local_model_path, use_deepspeed, self.voices_path))
98108
self.synthesize_process.start()
@@ -540,28 +550,29 @@ def download_file(url, destination):
540550
progress_bar.close()
541551

542552
@staticmethod
543-
def download_model(model_name = "2.0.2", local_models_path = None):
553+
def download_model(model_name = "v2.0.2", local_models_path = None):
544554

545555
# Creating a unique folder for each model version
546556
if local_models_path and len(local_models_path) > 0:
547-
model_folder = os.path.join(local_models_path, f'v{model_name}')
557+
model_folder = os.path.join(local_models_path, f'{model_name}')
548558
logging.info(f"Local models path: \"{model_folder}\"")
549559
else:
550-
model_folder = os.path.join(os.getcwd(), 'models', f'v{model_name}')
560+
model_folder = os.path.join(os.getcwd(), 'models', f'{model_name}')
551561
logging.info(f"Checking for models within application directory: \"{model_folder}\"")
552562

553563
os.makedirs(model_folder, exist_ok=True)
564+
print(model_name)
554565

555566
files = {
556-
"config.json": f"https://huggingface.co/coqui/XTTS-v2/raw/v{model_name}/config.json",
557-
"model.pth": f"https://huggingface.co/coqui/XTTS-v2/resolve/v{model_name}/model.pth?download=true",
558-
"vocab.json": f"https://huggingface.co/coqui/XTTS-v2/raw/v{model_name}/vocab.json"
567+
"config.json": f"https://huggingface.co/coqui/XTTS-v2/raw/{model_name}/config.json",
568+
"model.pth": f"https://huggingface.co/coqui/XTTS-v2/resolve/{model_name}/model.pth?download=true",
569+
"vocab.json": f"https://huggingface.co/coqui/XTTS-v2/raw/{model_name}/vocab.json"
559570
}
560571

561572
for file_name, url in files.items():
562573
file_path = os.path.join(model_folder, file_name)
563574
if not os.path.exists(file_path):
564-
logger.info(f"Downloading {file_name} for Model v{model_name}...")
575+
logger.info(f"Downloading {file_name} for Model {model_name}...")
565576
CoquiEngine.download_file(url, file_path)
566577
# r = requests.get(url, allow_redirects=True)
567578
# with open(file_path, 'wb') as f:

xtts_api_server/__main__.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -11,9 +11,10 @@
1111
parser.add_argument("-t", "--tunnel", default="", type=str, help="URL of tunnel used (e.g: ngrok, localtunnel)")
1212
parser.add_argument("-ms", "--model-source", default="local", choices=["api","apiManual", "local"],
1313
help="Define the model source: 'api' for latest version from repository, apiManual for 2.0.2 model and api inference or 'local' for using local inference and model v2.0.2.")
14-
parser.add_argument("-v", "--version", default="2.0.2", type=str, help="You can specify which version of xtts to use,This version will be used everywhere in local, api and apiManual.")
14+
parser.add_argument("-v", "--version", default="v2.0.2", type=str, help="You can specify which version of xtts to use,This version will be used everywhere in local, api and apiManual.")
1515
parser.add_argument("--lowvram", action='store_true', help="Enable low vram mode which switches the model to RAM when not actively processing.")
1616
parser.add_argument("--deepspeed", action='store_true', help="Enables deepspeed mode, speeds up processing by several times.")
17+
parser.add_argument("--use-cache", action='store_true', help="Enables caching of results, your results will be saved and if there will be a repeated request, you will get a file instead of generation.")
1718
parser.add_argument("--streaming-mode", action='store_true', help="Enables streaming mode, currently needs a lot of work.")
1819
parser.add_argument("--streaming-mode-improve", action='store_true', help="Includes an improved streaming mode that consumes 2gb more VRAM and uses a better tokenizer, good for languages such as Chinese")
1920
parser.add_argument("--stream-play-sync", action='store_true', help="Additional flag for streaming mod that allows you to play all audio one at a time without interruption")
@@ -27,6 +28,7 @@
2728
os.environ['TUNNEL_URL'] = args.tunnel # it is necessary to correctly return correct previews in list of speakers
2829
os.environ['MODEL_SOURCE'] = args.model_source # Set environment variable for the model source
2930
os.environ["MODEL_VERSION"] = args.version # Specify version of XTTS model
31+
os.environ["USE_CACHE"] = str(args.use_cache).lower() # Set lowvram mode
3032
os.environ["DEEPSPEED"] = str(args.deepspeed).lower() # Set lowvram mode
3133
os.environ["LOWVRAM_MODE"] = str(args.lowvram).lower() # Set lowvram mode
3234
os.environ["STREAM_MODE"] = str(args.streaming_mode).lower() # Enable Streaming mode

xtts_api_server/modeldownloader.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -120,13 +120,13 @@ def check_stream2sentence_version():
120120
def download_model(this_dir,model_version):
121121
# Define paths
122122
base_path = this_dir / 'models'
123-
model_path = base_path / f'v{model_version}'
123+
model_path = base_path / f'{model_version}'
124124

125125
# Define files and their corresponding URLs
126126
files_to_download = {
127-
"config.json": f"https://huggingface.co/coqui/XTTS-v2/raw/v{model_version}/config.json",
128-
"model.pth": f"https://huggingface.co/coqui/XTTS-v2/resolve/v{model_version}/model.pth?download=true",
129-
"vocab.json": f"https://huggingface.co/coqui/XTTS-v2/raw/v{model_version}/vocab.json"
127+
"config.json": f"https://huggingface.co/coqui/XTTS-v2/raw/{model_version}/config.json",
128+
"model.pth": f"https://huggingface.co/coqui/XTTS-v2/resolve/{model_version}/model.pth?download=true",
129+
"vocab.json": f"https://huggingface.co/coqui/XTTS-v2/raw/{model_version}/vocab.json"
130130
}
131131

132132
# Check and create directories

xtts_api_server/server.py

+9-7
Original file line numberDiff line numberDiff line change
@@ -24,9 +24,11 @@
2424
SPEAKER_FOLDER = os.getenv('SPEAKER', 'speakers')
2525
BASE_URL = os.getenv('BASE_URL', '127.0.0.1:8020')
2626
MODEL_SOURCE = os.getenv("MODEL_SOURCE", "local")
27-
MODEL_VERSION = os.getenv("MODEL_VERSION","2.0.2")
27+
MODEL_VERSION = os.getenv("MODEL_VERSION","v2.0.2")
2828
LOWVRAM_MODE = os.getenv("LOWVRAM_MODE") == 'true'
2929
DEEPSPEED = os.getenv("DEEPSPEED") == 'true'
30+
USE_CACHE = os.getenv("USE_CACHE") == 'true'
31+
3032
# STREAMING VARS
3133
STREAM_MODE = os.getenv("STREAM_MODE") == 'true'
3234
STREAM_MODE_IMPROVE = os.getenv("STREAM_MODE_IMPROVE") == 'true'
@@ -41,17 +43,14 @@
4143

4244
# Create an instance of the TTSWrapper class and server
4345
app = FastAPI()
44-
XTTS = TTSWrapper(OUTPUT_FOLDER,SPEAKER_FOLDER,LOWVRAM_MODE,MODEL_SOURCE,MODEL_VERSION,DEVICE,DEEPSPEED)
46+
XTTS = TTSWrapper(OUTPUT_FOLDER,SPEAKER_FOLDER,LOWVRAM_MODE,MODEL_SOURCE,MODEL_VERSION,DEVICE,DEEPSPEED,USE_CACHE)
4547

4648
# Create version string
4749
version_string = ""
48-
if MODEL_SOURCE == "api":
50+
if MODEL_SOURCE == "api" or MODEL_VERSION == "main":
4951
version_string = "lastest"
5052
else:
51-
version_string = "v"+MODEL_VERSION
52-
53-
if MODEL_SOURCE == "api" and MODEL_SOURCE != "2.0.2":
54-
logger.warning("Attention you have specified flag -v but you have selected --model-source api, please change --model-souce to apiManual or local to use the specified version, otherwise the latest version of the model will be loaded.")
53+
version_string = MODEL_VERSION
5554

5655
# Load model
5756
# logger.info(f"The model {version_string} starts to load,wait until it loads")
@@ -74,6 +73,9 @@
7473
else:
7574
XTTS.load_model()
7675

76+
if USE_CACHE:
77+
logger.info("You have enabled caching, this option enables caching of results, your results will be saved and if there is a repeat request, you will get a file instead of generation")
78+
7779
# Add CORS middleware
7880
origins = ["*"]
7981
app.add_middleware(

0 commit comments

Comments
 (0)