0.2.5

daswer123 · daswer123 · commit c088b208de51 · 2023-11-21T13:37:44.000+03:00
Fix model loading
diff --git a/README.md b/README.md
@@ -1,20 +1,23 @@
-#  A simple FastAPI Server to run XTTSv2
+# A simple FastAPI Server to run XTTSv2
 
 The project is inspired by [silero-api-server](https://github.com/ouoertheo/silero-api-server)
 repo uses [XTTSv2](https://github.com/coqui-ai/TTS)
 
-TODO: This is will be to serve the TTS extension in [SillyTavern](https://github.com/Cohee1207/SillyTavern) soon. The TTS module or server can be used any way you wish.
-UPD: There's already a result
+Created a PR for SillyTavern: [here](https://github.com/SillyTavern/SillyTavern/pull/1383)
+
+The TTS module or server can be used any way you wish.
 
 ## Installation
+
 `pip install xtts-api-server`
 
-I strongly recommend installing pytorch with CUDA so that the entire process is on the video card 
+I strongly recommend installing pytorch with CUDA so that the entire process is on the video card
 
 `pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118`
 
 ## Starting Server
-`python -m xtts-api-server` will run on default ip and port (localhost:8020)
+
+`python -m xtts-api-server` will run on default ip and port (0.0.0.0:8020)
 
 ```
 usage: xtts-api-server [-h] [-o HOST] [-p PORT] [-sf SPEAKER_FOLDER] [-o OUTPUT]
@@ -33,13 +36,17 @@ The first time you run or generate, you may need to confirm that you agree to us
 The model will be loaded into memory after the first generation.
 
 # API Docs
+
 API Docs can be accessed from [http://localhost:8020/docs](http://localhost:8020/docs)
 
 # Voice Samples
+
 You can find the sample in this repository, also by default samples will be saved to `/output/output.wav` or you can change this, more details in the API documentation
 
 # Selecting Folder
+
 You can change the folders for speakers and the folder for output via the API.
 
 # Get Speakers
+
 Once you have at least one file in your speakers folder, you can get its name via API and then you only need to specify the file name.
diff --git a/pyproject.toml b/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
 
 [project]
 name = "xtts-api-server"
-version = "0.2"
+version = "0.2.5"
 authors = [
   { name="daswer123", email="daswerq123@gmail.com" },
 ]
diff --git a/xtts_api_server/server.py b/xtts_api_server/server.py
@@ -23,6 +23,10 @@
 app = FastAPI()
 XTTS = TTSWrapper(OUTPUT_FOLDER,SPEAKER_FOLDER)
 
+# Load model
+logger.info("The model starts to load, wait about a minute")
+XTTS.load_model() 
+
 # Add CORS middleware 
 origins = ["*"]
 app.add_middleware(
diff --git a/xtts_api_server/tts_funcs.py b/xtts_api_server/tts_funcs.py
@@ -35,6 +35,7 @@ def __init__(self,output_folder = "./output", speaker_folder="./speakers"):
         self.output_folder = output_folder
 
         self.create_directories()
+        # self.load_model()
     
     def load_model(self):
         self.model = TTS("tts_models/multilingual/multi-dataset/xtts_v2").to(self.device)
@@ -101,9 +102,6 @@ def list_languages(self):
 
     def process_tts_to_file(self, text, speaker_name_or_path, language, file_name_or_path="out.wav"):
         try:
-            # Load the model if it's not already loaded
-            if not hasattr(self, "model"):
-                self.load_model()
             # Check if the speaker path is a .wav file or just the name
             if speaker_name_or_path.endswith('.wav'):
                 if os.path.isabs(speaker_name_or_path):

Original file line number	Diff line number	Diff line change
`@@ -4,7 +4,7 @@ build-backend = "hatchling.build"`
`4`	`4`
`5`	`5`	`[project]`
`6`	`6`	`name = "xtts-api-server"`
`7`		`-version = "0.2"`
	`7`	`+version = "0.2.5"`
`8`	`8`	`authors = [`
`9`	`9`	`{ name="daswer123", email="daswerq123@gmail.com" },`
`10`	`10`	`]`