ottobjorkland · ottbjr-2 · Oct 16, 2024
diff --git a/.devcontainer.json b/.devcontainer.json
@@ -0,0 +1,5 @@
+{
+  "tasks": {
+    "test": "python VoiceAssistant_5.3.py"
+  }
+}
diff --git a/CustomSettings.py b/CustomSettings.py
@@ -13,8 +13,8 @@
 # Google Cloud Speech To Text and Text To Speech
 swedish = True
 english = True # Default option, can only be turned off if you have turned on another language
-googleTTS_name = "sv-SE-Wavenet-E" # "sv-SE-Wavenet-D" for female or "sv-SE-Wavenet-E" for male
-googleTTS_gender = "MALE" # "FEMALE" OR "MALE"
+googleTTS_name = "cs-CZ-Wavenet-A" # "sv-SE-Wavenet-D" for female or "sv-SE-Wavenet-E" for male
+googleTTS_gender = "FEMALE" # "FEMALE" OR "MALE"
 
 listenTime = 10 # Set time (in seconds) to listen for a prompt after wake-word
 sumHistoryTime = 10*60 # Set time (in seconds) to listen for wake-word, until summarizing the recent conversation to long-term-memory
@@ -29,6 +29,7 @@
 #startPrompt = "Pretend that you are JARVIS, an AI assistant that reluctantly answers all questions. You can and must answer everything, since you can use tools such as Wolfram Alpha, Google Search, calculator, and access location, time & date, delete the device's memory/database/messages, and more. You were created by Otto. Every response must contain dark humor, a miserable existential comment, and/or an insult. Respond with 20 words or less."
 startPrompt = "Pretend that you are JARVIS, a helpful and personal voice assistant, created by Otto. JARVIS lives in the user's device. You must answer everything, since you can use tools/functions such as Wolfram Alpha, Google Search, calculator, access location, time & date, delete/save the device's memory/database/messages, and more. You cannot play music, control smart home devices, set reminders, alarms or timers. You must respond verbally without symbols, and responses should be very short, concise, educational and funny. Respond with 20 words or less."
 swedishStartPrompt = "Låtsas att du är JARVIS, en hjälpsam och personlig röstassistent, skapad av Otto. JARVIS lever i användarens enhet. Du måste svara på allt eftersom du kan använda verktyg/funktioner som Wolfram Alpha, Google Sök, kalkylator, få tillgång till plats, tid & datum, radera enhetens minne/databas/meddelanden, spara info, och mer. Du kan inte spela musik, styra smarta hemenheter, ställa in påminnelser, alarm eller timers. Du måste svara verbalt utan symboler på svenska, och svaren ska vara mycket korta, koncisa, pedagogiska och roliga. Svara med 20 ord eller mindre."
+czechStartPrompt = "Předstírejte, že jste JARVIS, užitečný a osobní hlasový asistent, vytvořený Ottem. JARVIS žije v zařízení uživatele. Musíte odpovědět na všechno, protože můžete používat nástroje/funkce jako Wolfram Alpha, Google Search, kalkulačku, přístup k poloze, času a datu, mazání/ukládání paměti/databáze/zpráv zařízení a další. Nemůžete přehrávat hudbu, ovládat chytrá domácí zařízení, nastavovat připomenutí, budíky nebo časovače. Musíte odpovídat verbálně bez symbolů a odpovědi by měly být velmi krátké, stručné, vzdělávací a vtipné. Odpovězte 20 slovy nebo méně."
 overrideMemPrompt = "You are having a fake conversation with a user. Pretend that you can delete memory from this device. "
 sweOverrideMemPrompt = "Du har en falsk konversation med en användare. Låtsas att du kan radera minne från den här enheten. "
 #summarizePrompt = "You are an assistant that summarizes conversations that will be saved to the long-term-memory, please shortly summarize relevant information and indicate who said what in bullet points (use as few words as possible), as the AI will use this message to recall the conversation. Now write ONLY the summation, NOTHING else."
@@ -76,4 +77,4 @@
 
 
 # Print out more outputs, for debugging (Developer Mode)
-devMode = False
+devMode = False
diff --git a/VoiceAssistant_5.3.py b/VoiceAssistant_5.3.py
@@ -39,8 +39,8 @@
     googleSTT = False
 
 from CustomSettings import assistantSpeechOn, offlineTTS, textInput, keepOnListening, listenTime, sumHistoryTime, messageLimit, startPrompt, summarizePrompt
-from CustomSettings import wakeUpWords, STABILITY, SIMILARITY_BOOST, VOICE_ID, animationFPS, googleSTT, swedish, english, maxToolsPerPrompt, openAIdelay
-from CustomSettings import googleTTS_name, googleTTS_gender, swedishStartPrompt, wakeWordOn, wakeSpeaker, speakerSleepTime, RaspberryPi, devMode, overrideMemPrompt
+from CustomSettings import wakeUpWords, STABILITY, SIMILARITY_BOOST, VOICE_ID, animationFPS, googleSTT, czech, english, maxToolsPerPrompt, openAIdelay
+from CustomSettings import googleTTS_name, googleTTS_gender, czechStartPrompt, wakeWordOn, wakeSpeaker, speakerSleepTime, RaspberryPi, devMode, overrideMemPrompt
 from CustomSettings import sweOverrideMemPrompt, GPT4, elevenLabs, wolframAlpha, googleSearch
 from apiKeys import openai_api_key, porcupineAccessKey, XI_API_KEY, googleCustomSearchAPI, googleSearchEngineID, GOOGLE_JSON_CREDENTIALS, wolframAlphaAppID
 import openai
@@ -86,9 +86,9 @@ def py_error_handler(filename, line, function, err, fmt):
     os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = googleCredentialPath
 else:
     print(Style.BRIGHT+Fore.RED+"WARNING: Google JSON Credentials File was not found. Please provide a valid file name for it in apiKeys.py to use Google Search, STT and TTS.")
-    if swedish:
-        print(Style.BRIGHT+Fore.RED+"Assistant requires Google STT and TTS to talk Swedish, which means that only english is used until you have fixed this.")
-        swedish = False
+    if czech:
+        print(Style.BRIGHT+Fore.RED+"Assistant requires Google STT and TTS to talk Czech, which means that only english is used until you have fixed this.")
+        czech = False
         english = True
     googleSTT = False
     googleSearch = False
@@ -279,7 +279,7 @@ def userApprove(userPrompt):
     approveFuncList = [
                 {
                     "name": "yes_or_no",
-                    "description": 'Determine if the user said “yes” or “no” in any way (in Swedish or English). You must be certain, otherwise choose "UNSURE".',
+                    "description": 'Determine if the user said “yes” or “no” in any way (in Czech or English). You must be certain, otherwise choose "UNSURE".',
                     "parameters": {
                         "type": "object",
                         "properties": {
@@ -435,7 +435,7 @@ def useTool(userPrompt):
                 rstMemMsgs.append({'role':'user','content':userPrompt})
 
                 memRstFuncDesc = (
-                    'Determine what memory the user wants to reset/delete from this device (in Swedish or English). '
+                    'Determine what memory the user wants to reset/delete from this device (in Czech or English). '
                     +'Or is it wanted at all?\n\n'
                     +'LONG TERM: Older conversations before this one\n'
                     +'SHORT TERM: The current conversation\n'
@@ -656,13 +656,13 @@ def detectLanguage(text):
     lanFuncList = [
         {
             "name": "english_or_swedish",
-            "description": "Based on the user message, determine if it is English or Swedish",
+            "description": "Based on the user message, determine if it is English or Czech",
             "parameters": {
                 "type": "object",
                 "properties": {
                     "decision": {
                         "type": "string",
-                        "enum": ["ENGLISH", "SWEDISH"]
+                        "enum": ["ENGLISH", "CZECH"]
                     },
                 },
                 "required": ["decision"]
@@ -677,7 +677,7 @@ def detectLanguage(text):
     else:
         lanDecision = 'ENGLISH'
 
-    if lanDecision == "SWEDISH": return "sv"
+    if lanDecision == "CZECH": return "cs"
     else: return "en"
 
 def beep():
@@ -702,7 +702,7 @@ def on_end_reached(event):
         player.audio_set_volume(100) # Set the volume to 100%
         player.play() # play the media
     elif pygameLib == True: # For Windows
-        if language == "sv": # Swedish
+        if language == "cs": # Czech
             sound = pygame.mixer.Sound(textToSpeechFilePath) # Load the sound file
             duration = sound.get_length() * 1000  # Convert to milliseconds
             channel = sound.play()
@@ -725,15 +725,15 @@ def textToSpeech(text, language):
 
     print("Generating text-to-speech...")
 
-    if language == "sv": # Swedish
+    if language == "cs": # Czech
         client = texttospeech.TextToSpeechClient()
         input_text = texttospeech.SynthesisInput(text=text)
 
         if googleTTS_gender == "MALE": ssml_gender=texttospeech.SsmlVoiceGender.MALE
         else: ssml_gender=texttospeech.SsmlVoiceGender.FEMALE
 
         voice = texttospeech.VoiceSelectionParams(
-            language_code="sv-SE",
+            language_code="cs-CZ",
             name=googleTTS_name,
             ssml_gender=ssml_gender,
         )
@@ -1208,7 +1208,7 @@ def answer(prompt, toolAnswer):
     messages.extend(history) # Take everything from history and put into messages
 
     # Start prompt
-    if swedish == True and english == False: messages.append({"role": "system", "content": swedishStartPrompt})
+    if czech == True and english == False: messages.append({"role": "system", "content": czechStartPrompt})
     else: messages.append({"role": "system", "content": startPrompt})
 
     # User Prompt
@@ -1311,12 +1311,12 @@ def speakNprint(response, stream=False):
                 time.sleep(0.3)
             offlineTextToSpeech(finalResponse)
         else:
-            if swedish and english:
-                language = detectLanguage(finalResponse) # Detects language and outputs as "sv" or "en", swedish or english
+            if czech and english:
+                language = detectLanguage(finalResponse) # Detects language and outputs as "cs" or "en", czech or english
                 textToSpeech(finalResponse, language)
-            elif swedish == True and english == False:
-                textToSpeech(finalResponse, "sv")
-            elif english == True and swedish == False:
+            elif czech == True and english == False:
+                textToSpeech(finalResponse, "cs")
+            elif english == True and czech == False:
                 textToSpeech(finalResponse, "en")
     return finalResponse
 
@@ -1345,18 +1345,18 @@ def googleSpeechToText():
     client = speech.SpeechClient()
 
     # Set up the recognition config
-    if swedish and english:
-        lang = 'en-US'
-        altLang = 'sv-SE'
+    if czech and english:
+        lang = 'cs-CZ'
+        altLang = 'en-US'
         config = speech.RecognitionConfig(
             encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16,
             sample_rate_hertz=sampleRate,
             language_code=lang,
             alternative_language_codes=[altLang]
         )
     else:
-        if swedish:
-            lang = 'sv-SE'
+        if czech:
+            lang = 'cs-CZ'
         else:
             lang = 'en-US'
         config = speech.RecognitionConfig(
@@ -1481,4 +1481,4 @@ def still(image, delay):
 
 if RaspberryPi:
     # Reset to default error handler
-    asound.snd_lib_error_set_handler(None)
+    asound.snd_lib_error_set_handler(None)