added realtime synthesis for speakers with multple speaker_wavs

lendot · lendot · commit 631427bd72c4 · 2023-12-19T08:08:28.000-05:00
diff --git a/xtts_api_server/RealtimeTTS/engines/coqui_engine.py b/xtts_api_server/RealtimeTTS/engines/coqui_engine.py
@@ -401,7 +401,7 @@ def send_command(self, command, data):
         message = {'command': command, 'data': data}
         self.parent_synthesize_pipe.send(message)            
             
-    def set_cloning_reference(self, cloning_reference_wav: str):
+    def set_cloning_reference(self, cloning_reference_wav: Union[str, List[str]]):
         """
         Send an 'update_reference' command and wait for a response.
         """
@@ -594,7 +594,7 @@ def get_voices(self):
 
         return voice_file_names 
     
-    def set_voice(self, voice: str):
+    def set_voice(self, voice: Union[str, List[str]]):
         """
         Sets the voice to be used for speech synthesis.
         """
@@ -637,4 +637,4 @@ def shutdown(self):
 
         # Wait for the process to terminate
         self.synthesize_process.join()
-        logging.info('Worker process has been terminated')
+        logging.info('Worker process has been terminated')
diff --git a/xtts_api_server/tts_funcs.py b/xtts_api_server/tts_funcs.py
@@ -286,7 +286,7 @@ def get_speaker_wav(self, speaker_name_or_path):
             wav_file = f"{full_path}.wav"
             if os.path.isdir(full_path):
                 # multi-sample speaker
-                speaker_wav = self.get_wav_files(full_path)
+                speaker_wav = [ os.path.join(full_path,wav) for wav in self.get_wav_files(full_path) ]
                 if len(speaker_wav) == 0:
                     raise ValueError(f"no wav files found in {full_path}")
             elif os.path.isfile(wav_file):