Add query optimization back in, but don't run translation for chat queries

suzinyou · suzinyou · commit 40187b86fdd6 · 2025-04-17T10:48:34.000+07:00
diff --git a/core_backend/app/llm_call/llm_prompts.py b/core_backend/app/llm_call/llm_prompts.py
@@ -268,7 +268,9 @@ class ChatHistory:
 
             {{
                 "message_type": "The type of the user's LATEST MESSAGE. List of valid
-                options are: {valid_message_types}"
+                options are: {valid_message_types}",
+                "query": "The vector database query that you have constructed based on
+                the user's LATEST MESSAGE and the conversation history."
             }}
 
             Do NOT attempt to answer the user's question/concern. Only output the JSON
@@ -283,6 +285,7 @@ class ChatHistoryConstructSearchQuery(BaseModel):
         """Pydantic model for the output of the construct search query chat history."""
 
         message_type: Literal["FOLLOW-UP", "NEW"]
+        query: str
 
     @staticmethod
     def parse_json(*, chat_type: Literal["search"], json_str: str) -> dict[str, str]:
diff --git a/core_backend/app/llm_call/process_input.py b/core_backend/app/llm_call/process_input.py
@@ -114,7 +114,8 @@ async def _identify_language(
         litellm_model=LITELLM_MODEL_LANGUAGE_DETECT,
         metadata=metadata,
         system_message=LANGUAGE_ID_PROMPT,
-        user_message=query_refined.query_text,
+        # Always use the original query text for language and script detection
+        user_message=query_refined.query_text_original,
     )
 
     cleaned_json_str = remove_json_markdown(text=json_str)
@@ -256,9 +257,10 @@ async def wrapper(
             The appropriate response object.
         """
 
-        query_refined, response = await _translate_question(
-            query_refined=query_refined, response=response
-        )
+        if not query_refined.chat_query_params:
+            query_refined, response = await _translate_question(
+                query_refined=query_refined, response=response
+            )
         response = await func(query_refined, response, *args, **kwargs)
 
         return response
@@ -492,9 +494,11 @@ async def wrapper(
             The appropriate response object.
         """
 
-        query_refined, response = await _paraphrase_question(
-            query_refined=query_refined, response=response
-        )
+        if not query_refined.chat_query_params:
+            query_refined, response = await _paraphrase_question(
+                query_refined=query_refined, response=response
+            )
+
         response = await func(query_refined, response, *args, **kwargs)
 
         return response
diff --git a/core_backend/app/question_answer/routers.py b/core_backend/app/question_answer/routers.py
@@ -844,6 +844,13 @@ async def get_user_query_and_response(
         workspace_id=workspace_id,
     )
 
+    # In case of a chat query, use the optimized query as the base query_text.
+    # Note that for language identification, we use query_text_original.
+    if user_query_refined.chat_query_params:
+        user_query_refined.query_text = user_query_refined.chat_query_params.pop(
+            "search_query"
+        )
+
     # Prepare the placeholder response object.
     response_template = QueryResponse(
         debug_info={},
@@ -1072,6 +1079,7 @@ async def init_user_query_and_chat_histories(
         "chat_history": user_assistant_chat_history,
         "chat_params": chat_params,
         "message_type": search_query_json_response["message_type"],
+        "search_query": search_query_json_response["query"],
         "redis_client": redis_client,
         "session_id": session_id,
     }
diff --git a/core_backend/app/question_answer/utils.py b/core_backend/app/question_answer/utils.py
@@ -23,6 +23,8 @@ def get_context_string_from_search_results(
     for key, result in search_results.items():
         if not isinstance(result, QuerySearchResult):
             result = QuerySearchResult(**result)
-        context_list.append(f"{key}. {result.title}\n{result.text}")
+        context_list.append(
+            f"<document id={key}> \n**{result.title}**\n\n{result.text}\n</document>"
+        )
     context_string = "\n\n".join(context_list)
     return context_string
diff --git a/core_backend/tests/api/test_chat.py b/core_backend/tests/api/test_chat.py
@@ -85,6 +85,7 @@ async def test_init_user_query_and_chat_histories(redis_client: aioredis.Redis)
             chat_query_params["chat_cache_key"] == f"chatCache:{user_query.session_id}"
         )
         assert chat_query_params["message_type"] == "NEW"
+        assert chat_query_params["search_query"] == "stomachache and possible remedies"
 
 
 async def test__ask_llm_async() -> None:

Original file line number	Diff line number	Diff line change
`@@ -85,6 +85,7 @@ async def test_init_user_query_and_chat_histories(redis_client: aioredis.Redis)`
`85`	`85`	`chat_query_params["chat_cache_key"] == f"chatCache:{user_query.session_id}"`
`86`	`86`	`)`
`87`	`87`	`assert chat_query_params["message_type"] == "NEW"`
	`88`	`+ assert chat_query_params["search_query"] == "stomachache and possible remedies"`
`88`	`89`
`89`	`90`
`90`	`91`	`async def test__ask_llm_async() -> None:`