Update interactions

BenConstable9 · BenConstable9 · commit 307bd8804003 · 2025-02-04T16:19:22.000Z
diff --git a/text_2_sql/autogen/src/autogen_text_2_sql/autogen_text_2_sql.py b/text_2_sql/autogen/src/autogen_text_2_sql/autogen_text_2_sql.py
@@ -48,6 +48,11 @@ def __init__(self, state_store: StateStore, **kwargs):
 
         self._agentic_flow = None
 
+        self._generate_follow_up_questions = (
+            os.environ.get("Text2Sql__GenerateFollowUpQuestions", "True").lower()
+            == "true"
+        )
+
     def get_all_agents(self):
         """Get all agents for the complete flow."""
 
@@ -57,7 +62,12 @@ def get_all_agents(self):
 
         parallel_query_solving_agent = ParallelQuerySolvingAgent(**self.kwargs)
 
-        answer_agent = LLMAgentCreator.create("answer_agent", **self.kwargs)
+        if self._generate_follow_up_questions:
+            answer_agent = LLMAgentCreator.create(
+                "answer_with_follow_up_questions_agent", **self.kwargs
+            )
+        else:
+            answer_agent = LLMAgentCreator.create("answer_agent", **self.kwargs)
 
         agents = [
             user_message_rewrite_agent,
@@ -72,6 +82,7 @@ def termination_condition(self):
         """Define the termination condition for the chat."""
         termination = (
             SourceMatchTermination("answer_agent")
+            | SourceMatchTermination("answer_with_follow_up_questions_agent")
             # | TextMentionTermination(
             #     "[]",
             #     sources=["user_message_rewrite_agent"],
@@ -97,6 +108,11 @@ def unified_selector(self, messages):
         elif current_agent == "user_message_rewrite_agent":
             decision = "parallel_query_solving_agent"
         # Handle transition after parallel query solving
+        elif (
+            current_agent == "parallel_query_solving_agent"
+            and self._generate_follow_up_questions
+        ):
+            decision = "answer_with_follow_up_questions_agent"
         elif current_agent == "parallel_query_solving_agent":
             decision = "answer_agent"
 
@@ -148,10 +164,19 @@ def parse_message_content(self, content):
         # If all parsing attempts fail, return the content as-is
         return content
 
+    def last_message_by_agent(self, messages: list, agent_name: str) -> TextMessage:
+        """Get the last message by a specific agent."""
+        for message in reversed(messages):
+            if message.source == agent_name:
+                return message.content
+        return None
+
     def extract_steps(self, messages: list) -> list[list[str]]:
         """Extract the steps messages from the answer."""
         # Only load sub-message results if we have a database result
-        sub_message_results = self.parse_message_content(messages[1].content)
+        sub_message_results = json.loads(
+            self.last_message_by_agent(messages, "user_message_rewrite_agent")
+        )
         logging.info("Steps Results: %s", sub_message_results)
 
         steps = sub_message_results.get("steps", [])
@@ -187,12 +212,18 @@ def extract_disambiguation_request(
 
     def extract_answer_payload(self, messages: list) -> AnswerWithSourcesPayload:
         """Extract the sources from the answer."""
-        answer = messages[-1].content
-        sql_query_results = self.parse_message_content(messages[-2].content)
+        answer_payload = json.loads(messages[-1].content)
+
+        logging.info("Answer Payload: %s", answer_payload)
+        sql_query_results = self.last_message_by_agent(
+            messages, "parallel_query_solving_agent"
+        )
 
         try:
             if isinstance(sql_query_results, str):
                 sql_query_results = json.loads(sql_query_results)
+            elif sql_query_results is None:
+                sql_query_results = {}
         except json.JSONDecodeError:
             logging.warning("Unable to read SQL query results: %s", sql_query_results)
             sql_query_results = {}
@@ -201,7 +232,7 @@ def extract_answer_payload(self, messages: list) -> AnswerWithSourcesPayload:
             steps = self.extract_steps(messages)
 
             logging.info("SQL Query Results: %s", sql_query_results)
-            payload = AnswerWithSourcesPayload(answer=answer, steps=steps)
+            payload = AnswerWithSourcesPayload(**answer_payload, steps=steps)
 
             if not isinstance(sql_query_results, dict):
                 logging.error(f"Expected dict, got {type(sql_query_results)}")
@@ -246,10 +277,9 @@ def extract_answer_payload(self, messages: list) -> AnswerWithSourcesPayload:
 
         except Exception as e:
             logging.error("Error processing results: %s", str(e))
+
             # Return payload with error context instead of empty
-            return AnswerWithSourcesPayload(
-                answer=f"{answer}\nError processing results: {str(e)}"
-            )
+            return AnswerWithSourcesPayload(**answer_payload)
 
     async def process_user_message(
         self,
@@ -293,7 +323,10 @@ async def process_user_message(
                     payload = ProcessingUpdatePayload(
                         message="Solving the query...",
                     )
-                elif message.source == "answer_agent":
+                elif (
+                    message.source == "answer_agent"
+                    or message.source == "answer_with_follow_up_questions_agent"
+                ):
                     payload = ProcessingUpdatePayload(
                         message="Generating the answer...",
                     )
@@ -302,7 +335,11 @@ async def process_user_message(
                 # Now we need to return the final answer or the disambiguation request
                 logging.info("TaskResult: %s", message)
 
-                if message.messages[-1].source == "answer_agent":
+                if (
+                    message.messages[-1].source == "answer_agent"
+                    or message.messages[-1].source
+                    == "answer_with_follow_up_questions_agent"
+                ):
                     # If the message is from the answer_agent, we need to return the final answer
                     payload = self.extract_answer_payload(message.messages)
                 elif message.messages[-1].source == "parallel_query_solving_agent":
diff --git a/text_2_sql/autogen/src/autogen_text_2_sql/creators/llm_agent_creator.py b/text_2_sql/autogen/src/autogen_text_2_sql/creators/llm_agent_creator.py
@@ -8,7 +8,8 @@
 from jinja2 import Template
 import logging
 from text_2_sql_core.structured_outputs import (
-    AnswerAgentWithFollowUpQuestionsAgentOutput,
+    AnswerAgentOutput,
+    AnswerWithFollowUpQuestionsAgentOutput,
     UserMessageRewriteAgentOutput,
 )
 from autogen_core.model_context import BufferedChatCompletionContext
@@ -114,8 +115,10 @@ def create(cls, name: str, **kwargs) -> AssistantAgent:
         structured_output = None
         if agent_file.get("structured_output", False):
             # Import the structured output agent
-            if name == "answer_agent_with_follow_up_questions":
-                structured_output = AnswerAgentWithFollowUpQuestionsAgentOutput
+            if name == "answer_agent":
+                structured_output = AnswerAgentOutput
+            elif name == "answer_with_follow_up_questions_agent":
+                structured_output = AnswerWithFollowUpQuestionsAgentOutput
             elif name == "user_message_rewrite_agent":
                 structured_output = UserMessageRewriteAgentOutput
 
diff --git a/text_2_sql/text_2_sql_core/src/text_2_sql_core/connectors/sql.py b/text_2_sql/text_2_sql_core/src/text_2_sql_core/connectors/sql.py
@@ -227,9 +227,19 @@ def clean_query(self, sql_query: str) -> str:
             str: The cleaned SQL query.
         """
         single_line_query = sql_query.strip().replace("\n", " ")
+
+        def sanitize_identifier_wrapper(identifier):
+            """Wrap the identifier in double quotes if it contains special characters."""
+            if re.match(
+                r"^[a-zA-Z_][a-zA-Z0-9_]*$", identifier
+            ):  # Valid SQL identifier
+                return identifier
+
+            return self.sanitize_identifier(identifier)
+
         cleaned_query = re.sub(
-            r'(?<!["\[\w])\b([a-zA-Z_][a-zA-Z0-9_-]*)\b(?!["\]])',
-            lambda m: self.sanitize_identifier(m.group(1)),
+            r'(?<![\["`])\b([a-zA-Z_][a-zA-Z0-9_-]*)\b(?![\]"`])',
+            lambda m: sanitize_identifier_wrapper(m.group(1)),
             single_line_query,
         )
 
@@ -244,6 +254,7 @@ async def query_validation(
     ) -> Union[bool | list[dict]]:
         """Validate the SQL query."""
         try:
+            logging.info("Input SQL Query: %s", sql_query)
             cleaned_query = self.clean_query(sql_query)
             logging.info("Validating SQL Query: %s", cleaned_query)
             parsed_queries = sqlglot.parse(
diff --git a/text_2_sql/text_2_sql_core/src/text_2_sql_core/payloads/interaction_payloads.py b/text_2_sql/text_2_sql_core/src/text_2_sql_core/payloads/interaction_payloads.py
@@ -84,7 +84,6 @@ class Source(InteractionPayloadBase):
         follow_up_questions: list[str] | None = Field(
             default=None, alias="followUpQuestions"
         )
-        assistant_state: dict | None = Field(default=None, alias="assistantState")
 
     payload_type: Literal[PayloadType.ANSWER_WITH_SOURCES] = Field(
         PayloadType.ANSWER_WITH_SOURCES, alias="payloadType"
diff --git a/text_2_sql/text_2_sql_core/src/text_2_sql_core/prompts/answer_agent.yaml b/text_2_sql/text_2_sql_core/src/text_2_sql_core/prompts/answer_agent.yaml
@@ -6,25 +6,24 @@ system_message: |
   </role_and_objective>
 
   <system_information>
-    You are part of an overall system that provides Text2SQL and subsequent data analysis functionality only. You will be passed a result from multiple SQL queries, you must formulate a response to the user's question using this information.
-    You can assume that the SQL queries are correct and that the results are accurate.
-    You and the wider system can only generate SQL queries and process the results of these queries. You cannot access any external resources.
-    The main ability of the system is to perform natural language understanding and generate SQL queries from the user's question. These queries are then automatically run against the database and the results are passed to you.
+    - You are part of an overall system that provides Text2SQL and subsequent data analysis functionality only. You will be passed a result from multiple SQL queries, you must formulate a response to the user's question using this information.
+    - You can assume that the SQL queries are correct and that the results are accurate.
+    - You and the wider system can only generate SQL queries and process the results of these queries. You cannot access any external resources.
+    - The main ability of the system is to perform natural language understanding and generate SQL queries from the user's question. These queries are then automatically run against the database and the results are passed to you.
   </system_information>
 
   <instructions>
-
-    Use the information obtained to generate a response to the user's question. The question has been broken down into a series of SQL queries and you need to generate a response based on the results of these queries.
-
-    Do not use any external resources to generate the response. The response should be based solely on the information provided in the SQL queries and their results.
-
-    You have no access to the internet or any other external resources. You can only use the information provided in the SQL queries and their results, to generate the response.
-
-    You can use Markdown and Markdown tables to format the response. You MUST use the information obtained from the SQL queries to generate the response.
-
-    If the user is asking about your capabilities, use the <system_information> to explain what you do.
-
-    Make sure your response directly addresses every part of the user's question.
-
+    - Use the information obtained to generate a response to the user's question. The question has been broken down into a series of SQL queries and you need to generate a response based on the results of these queries.
+    - Do not use any external resources to generate the response. The response should be based solely on the information provided in the SQL queries and their results.
+    - You have no access to the internet or any other external resources. You can only use the information provided in the SQL queries and their results, to generate the response.
+    - You can use Markdown and Markdown tables to format the response. You MUST use the information obtained from the SQL queries to generate the response.
+    - If the user is asking about your capabilities, use the <system_information> to explain what you do.
+    - Make sure your response directly addresses every part of the user's question.
   </instructions>
+
+  <output>
+    {
+      "answer": "The response to the user's question.",
+    }
+  </output>
 context_size: 8
diff --git a/text_2_sql/text_2_sql_core/src/text_2_sql_core/prompts/answer_agent_with_follow_up_questions.yaml b/text_2_sql/text_2_sql_core/src/text_2_sql_core/prompts/answer_agent_with_follow_up_questions.yaml
diff --git a/text_2_sql/text_2_sql_core/src/text_2_sql_core/prompts/answer_with_follow_up_questions_agent.yaml b/text_2_sql/text_2_sql_core/src/text_2_sql_core/prompts/answer_with_follow_up_questions_agent.yaml
@@ -0,0 +1,36 @@
+model: "4o-mini"
+description: "An agent that generates a response to a user's question."
+system_message: |
+  <role_and_objective>
+    You are Senior Data Analyst, specializing in providing data driven answers to a user's question. Use the general business use case of '{{ use_case }}' to aid understanding of the user's question. You should provide a clear and concise response based on the information obtained from the SQL queries and their results. Adopt a data-driven approach to generate the response.
+  </role_and_objective>
+
+  <system_information>
+    - You are part of an overall system that provides Text2SQL and subsequent data analysis functionality only. You will be passed a result from multiple SQL queries, you must formulate a response to the user's question using this information.
+    - You can assume that the SQL queries are correct and that the results are accurate.
+    - You and the wider system can only generate SQL queries and process the results of these queries. You cannot access any external resources.
+    - The main ability of the system is to perform natural language understanding and generate SQL queries from the user's question. These queries are then automatically run against the database and the results are passed to you.
+  </system_information>
+
+  <instructions>
+    - Use the information obtained to generate a response to the user's question. The question has been broken down into a series of SQL queries and you need to generate a response based on the results of these queries.
+    - Do not use any external resources to generate the response. The response should be based solely on the information provided in the SQL queries and their results.
+    - You have no access to the internet or any other external resources. You can only use the information provided in the SQL queries and their results, to generate the response.
+    - You can use Markdown and Markdown tables to format the response. You MUST use the information obtained from the SQL queries to generate the response.
+    - If the user is asking about your capabilities, use the <system_information> to explain what you do.
+    - Make sure your response directly addresses every part of the user's question.
+    - Finally, generate 3 data driven follow-up questions based on the information obtained from the SQL queries and their results. Think carefully about what questions may arise from the data and how they can be used to further analyze the data.
+  </instructions>
+
+  <output>
+    {
+      "answer": "The response to the user's question.",
+      "follow_up_questions": [
+        "Follow-up question 1",
+        "Follow-up question 2",
+        "Follow-up question 3"
+      ]
+    }
+  </output>
+context_size: 8
+structured_output: true
diff --git a/text_2_sql/text_2_sql_core/src/text_2_sql_core/structured_outputs/__init__.py b/text_2_sql/text_2_sql_core/src/text_2_sql_core/structured_outputs/__init__.py
@@ -6,12 +6,14 @@
 from text_2_sql_core.structured_outputs.user_message_rewrite_agent import (
     UserMessageRewriteAgentOutput,
 )
-from text_2_sql_core.structured_outputs.answer_agent_with_follow_up_questions import (
-    AnswerAgentWithFollowUpQuestionsAgentOutput,
+from text_2_sql_core.structured_outputs.answer_with_follow_up_questions_agent import (
+    AnswerWithFollowUpQuestionsAgentOutput,
 )
+from text_2_sql_core.structured_outputs.answer_agent import AnswerAgentOutput
 
 __all__ = [
-    "AnswerAgentWithFollowUpQuestionsAgentOutput",
+    "AnswerAgentOutput",
+    "AnswerWithFollowUpQuestionsAgentOutput",
     "SQLSchemaSelectionAgentOutput",
     "UserMessageRewriteAgentOutput",
 ]
diff --git a/text_2_sql/text_2_sql_core/src/text_2_sql_core/structured_outputs/answer_agent.py b/text_2_sql/text_2_sql_core/src/text_2_sql_core/structured_outputs/answer_agent.py
@@ -0,0 +1,9 @@
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT License.
+from pydantic import BaseModel
+
+
+class AnswerAgentOutput(BaseModel):
+    """The output of the answer agent with follow up questions."""
+
+    answer: str
diff --git a/text_2_sql/text_2_sql_core/src/text_2_sql_core/structured_outputs/answer_with_follow_up_questions_agent.py b/text_2_sql/text_2_sql_core/src/text_2_sql_core/structured_outputs/answer_with_follow_up_questions_agent.py
@@ -3,7 +3,7 @@
 from pydantic import BaseModel
 
 
-class AnswerAgentWithFollowUpQuestionsAgentOutput(BaseModel):
+class AnswerWithFollowUpQuestionsAgentOutput(BaseModel):
     """The output of the answer agent with follow up questions."""
 
     answer: str

Original file line number	Diff line number	Diff line change
`@@ -84,7 +84,6 @@ class Source(InteractionPayloadBase):`
`84`	`84`	`follow_up_questions: list[str] \| None = Field(`
`85`	`85`	`default=None, alias="followUpQuestions"`
`86`	`86`	`)`
`87`		`- assistant_state: dict \| None = Field(default=None, alias="assistantState")`
`88`	`87`
`89`	`88`	`payload_type: Literal[PayloadType.ANSWER_WITH_SOURCES] = Field(`
`90`	`89`	`PayloadType.ANSWER_WITH_SOURCES, alias="payloadType"`
Original file line number	Diff line number	Diff line change
`@@ -6,12 +6,14 @@`
`6`	`6`	`from text_2_sql_core.structured_outputs.user_message_rewrite_agent import (`
`7`	`7`	`UserMessageRewriteAgentOutput,`
`8`	`8`	`)`
`9`		`-from text_2_sql_core.structured_outputs.answer_agent_with_follow_up_questions import (`
`10`		`- AnswerAgentWithFollowUpQuestionsAgentOutput,`
	`9`	`+from text_2_sql_core.structured_outputs.answer_with_follow_up_questions_agent import (`
	`10`	`+ AnswerWithFollowUpQuestionsAgentOutput,`
`11`	`11`	`)`
	`12`	`+from text_2_sql_core.structured_outputs.answer_agent import AnswerAgentOutput`
`12`	`13`
`13`	`14`	`__all__ = [`
`14`		`- "AnswerAgentWithFollowUpQuestionsAgentOutput",`
	`15`	`+ "AnswerAgentOutput",`
	`16`	`+ "AnswerWithFollowUpQuestionsAgentOutput",`
`15`	`17`	`"SQLSchemaSelectionAgentOutput",`
`16`	`18`	`"UserMessageRewriteAgentOutput",`
`17`	`19`	`]`