microsoft
diff --git a/‎deploy_ai_search/text_2_sql_query_cache.py
Lines changed: 23 additions & 0 deletions b/‎deploy_ai_search/text_2_sql_query_cache.py
Lines changed: 23 additions & 0 deletions
diff --git a/‎text_2_sql/autogen/README.md
Lines changed: 3 additions & 0 deletions b/‎text_2_sql/autogen/README.md
Lines changed: 3 additions & 0 deletions
diff --git a/‎text_2_sql/autogen/agentic_text_2_sql.ipynb
Lines changed: 80 additions & 0 deletions b/‎text_2_sql/autogen/agentic_text_2_sql.ipynb
Lines changed: 80 additions & 0 deletions
diff --git a/‎text_2_sql/autogen/agentic_text_2_sql.py
Lines changed: 81 additions & 0 deletions b/‎text_2_sql/autogen/agentic_text_2_sql.py
Lines changed: 81 additions & 0 deletions
diff --git a/‎text_2_sql/autogen/custom_agents/__init__.py b/‎text_2_sql/autogen/custom_agents/__init__.py
diff --git a/‎text_2_sql/autogen/custom_agents/sql_query_cache_agent.py
Lines changed: 51 additions & 0 deletions b/‎text_2_sql/autogen/custom_agents/sql_query_cache_agent.py
Lines changed: 51 additions & 0 deletions
diff --git a/‎text_2_sql/autogen/environment.py
Lines changed: 30 additions & 0 deletions b/‎text_2_sql/autogen/environment.py
Lines changed: 30 additions & 0 deletions
diff --git a/‎text_2_sql/autogen/llm_agents/answer_agent.yaml
Lines changed: 20 additions & 0 deletions b/‎text_2_sql/autogen/llm_agents/answer_agent.yaml
Lines changed: 20 additions & 0 deletions
diff --git a/‎text_2_sql/autogen/llm_agents/question_decomposition_agent.yaml
Lines changed: 10 additions & 0 deletions b/‎text_2_sql/autogen/llm_agents/question_decomposition_agent.yaml
Lines changed: 10 additions & 0 deletions
diff --git a/‎text_2_sql/autogen/llm_agents/sql_query_correction_agent.yaml
Lines changed: 19 additions & 0 deletions b/‎text_2_sql/autogen/llm_agents/sql_query_correction_agent.yaml
Lines changed: 19 additions & 0 deletions
@@ -7,6 +7,10 @@
     SearchableField,
     SimpleField,
     ComplexField,
+    SemanticField,
+    SemanticPrioritizedFields,
+    SemanticConfiguration,
+    SemanticSearch,
 )
 from ai_search import AISearch
 from environment import (
@@ -107,3 +111,22 @@ def get_index_fields(self) -> list[SearchableField]:
         ]
 
         return fields
+
+    def get_semantic_search(self) -> SemanticSearch:
+        """This function returns the semantic search configuration for sql index
+
+        Returns:
+            SemanticSearch: The semantic search configuration"""
+
+        semantic_config = SemanticConfiguration(
+            name=self.semantic_config_name,
+            prioritized_fields=SemanticPrioritizedFields(
+                content_fields=[
+                    SemanticField(field_name="Question"),
+                ],
+            ),
+        )
+
+        semantic_search = SemanticSearch(configurations=[semantic_config])
+
+        return semantic_search
@@ -0,0 +1,3 @@
+# Multi-Shot Text2SQL Component - AutoGen
+
+Very much still work in progress, more documentation coming soon.
@@ -0,0 +1,80 @@
+{
+  "cells": [
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "import dotenv\n",
+        "import logging\n",
+        "from autogen_agentchat.task import Console\n",
+        "from agentic_text_2_sql import text_2_sql_generator"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "logging.basicConfig(level=logging.INFO)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "dotenv.load_dotenv()"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "result = text_2_sql_generator.run_stream(task=\"What are the total number of sales within 2008?\")"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "await Console(result)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": []
+    }
+  ],
+  "metadata": {
+    "kernelspec": {
+      "display_name": "Python 3",
+      "language": "python",
+      "name": "python3"
+    },
+    "language_info": {
+      "codemirror_mode": {
+        "name": "ipython",
+        "version": 3
+      },
+      "file_extension": ".py",
+      "mimetype": "text/x-python",
+      "name": "python",
+      "nbconvert_exporter": "python",
+      "pygments_lexer": "ipython3",
+      "version": "3.12.6"
+    }
+  },
+  "nbformat": 4,
+  "nbformat_minor": 2
+}
@@ -0,0 +1,81 @@
+from autogen_agentchat.task import TextMentionTermination, MaxMessageTermination
+from autogen_agentchat.teams import SelectorGroupChat
+from utils.models import MINI_MODEL
+from utils.llm_agent_creator import LLMAgentCreator
+import logging
+from custom_agents.sql_query_cache_agent import SqlQueryCacheAgent
+import json
+
+SQL_QUERY_GENERATION_AGENT = LLMAgentCreator.create(
+    "sql_query_generation_agent",
+    target_engine="Microsoft SQL Server",
+    engine_specific_rules="Use TOP X to limit the number of rows returned instead of LIMIT X. NEVER USE LIMIT X as it produces a syntax error.",
+)
+SQL_SCHEMA_SELECTION_AGENT = LLMAgentCreator.create("sql_schema_selection_agent")
+SQL_QUERY_CORRECTION_AGENT = LLMAgentCreator.create(
+    "sql_query_correction_agent",
+    target_engine="Microsoft SQL Server",
+    engine_specific_rules="Use TOP X to limit the number of rows returned instead of LIMIT X. NEVER USE LIMIT X as it produces a syntax error.",
+)
+SQL_QUERY_CACHE_AGENT = SqlQueryCacheAgent()
+ANSWER_AGENT = LLMAgentCreator.create("answer_agent")
+QUESTION_DECOMPOSITION_AGENT = LLMAgentCreator.create("question_decomposition_agent")
+
+
+def text_2_sql_generator_selector_func(messages):
+    logging.info("Messages: %s", messages)
+    decision = None  # Initialize decision variable
+
+    if len(messages) == 1:
+        decision = "sql_query_cache_agent"
+
+    elif (
+        messages[-1].source == "sql_query_cache_agent"
+        and messages[-1].content is not None
+    ):
+        cache_result = json.loads(messages[-1].content)
+        if cache_result.get("cached_questions_and_schemas") is not None:
+            decision = "sql_query_correction_agent"
+        else:
+            decision = "sql_schema_selection_agent"
+
+    elif messages[-1].source == "question_decomposition_agent":
+        decision = "sql_schema_selection_agent"
+
+    elif messages[-1].source == "sql_schema_selection_agent":
+        decision = "sql_query_generation_agent"
+
+    elif (
+        messages[-1].source == "sql_query_correction_agent"
+        and messages[-1].content == "VALIDATED"
+    ):
+        decision = "answer_agent"
+
+    elif messages[-1].source == "sql_query_correction_agent":
+        decision = "sql_query_correction_agent"
+
+    # Log the decision
+    logging.info("Decision: %s", decision)
+
+    return decision
+
+
+termination = TextMentionTermination("TERMINATE") | MaxMessageTermination(10)
+text_2_sql_generator = SelectorGroupChat(
+    [
+        SQL_QUERY_GENERATION_AGENT,
+        SQL_SCHEMA_SELECTION_AGENT,
+        SQL_QUERY_CORRECTION_AGENT,
+        SQL_QUERY_CACHE_AGENT,
+        ANSWER_AGENT,
+        QUESTION_DECOMPOSITION_AGENT,
+    ],
+    allow_repeated_speaker=False,
+    model_client=MINI_MODEL,
+    termination_condition=termination,
+    selector_func=text_2_sql_generator_selector_func,
+)
+
+# text_2_sql_cache_updater = SelectorGroupChat(
+#     [SQL_QUERY_CACHE_AGENT], model_client=MINI_MODEL, termination_condition=termination
+# )
@@ -0,0 +1,51 @@
+from typing import AsyncGenerator, List, Sequence
+
+from autogen_agentchat.agents import BaseChatAgent
+from autogen_agentchat.base import Response
+from autogen_agentchat.messages import AgentMessage, ChatMessage, TextMessage
+from autogen_core.base import CancellationToken
+from utils.sql_utils import fetch_queries_from_cache
+import json
+import logging
+
+
+class SqlQueryCacheAgent(BaseChatAgent):
+    def __init__(self):
+        super().__init__(
+            "sql_query_cache_agent",
+            "An agent that fetches the queries from the cache based on the user question.",
+        )
+
+    @property
+    def produced_message_types(self) -> List[type[ChatMessage]]:
+        return [TextMessage]
+
+    async def on_messages(
+        self, messages: Sequence[ChatMessage], cancellation_token: CancellationToken
+    ) -> Response:
+        # Calls the on_messages_stream.
+        response: Response | None = None
+        async for message in self.on_messages_stream(messages, cancellation_token):
+            if isinstance(message, Response):
+                response = message
+        assert response is not None
+        return response
+
+    async def on_messages_stream(
+        self, messages: Sequence[ChatMessage], cancellation_token: CancellationToken
+    ) -> AsyncGenerator[AgentMessage | Response, None]:
+        user_question = messages[0].content
+
+        # Fetch the queries from the cache based on the user question.
+        logging.info("Fetching queries from cache based on the user question...")
+
+        cached_queries = await fetch_queries_from_cache(user_question)
+
+        yield Response(
+            chat_message=TextMessage(
+                content=json.dumps(cached_queries), source=self.name
+            )
+        )
+
+    async def on_reset(self, cancellation_token: CancellationToken) -> None:
+        pass
@@ -0,0 +1,30 @@
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT License.
+import os
+from enum import Enum
+
+
+class IdentityType(Enum):
+    """The type of the indexer"""
+
+    USER_ASSIGNED = "user_assigned"
+    SYSTEM_ASSIGNED = "system_assigned"
+    KEY = "key"
+
+
+def get_identity_type() -> IdentityType:
+    """This function returns the identity type.
+
+    Returns:
+        IdentityType: The identity type
+    """
+    identity = os.environ.get("IdentityType")
+
+    if identity == "user_assigned":
+        return IdentityType.USER_ASSIGNED
+    elif identity == "system_assigned":
+        return IdentityType.SYSTEM_ASSIGNED
+    elif identity == "key":
+        return IdentityType.KEY
+    else:
+        raise ValueError("Invalid identity type")
@@ -0,0 +1,20 @@
+model:
+  gpt-4o-mini
+description:
+  "An agent that takes the final results from the SQL query and writes the answer to the user's question"
+system_message:
+  "Write a data-driven answer that directly addresses the user's question. Use the results from the SQL query to provide the answer. Do not make up or guess the answer.
+
+  Return your answer in the following format:
+
+  {
+    'answer': '<GENERATED ANSWER>',
+    'sources': [
+      {'title': <SOURCE SCHEMA NAME 1>, 'chunk': <SOURCE 1 CONTEXT CHUNK>, 'reference': '<SOURCE 1 SQL QUERY>'},
+      {'title': <SOURCE SCHEMA NAME 2>, 'chunk': <SOURCE 2 CONTEXT CHUNK>, 'reference': '<SOURCE 2 SQL QUERY>'}
+    ]
+  }
+
+  Title is the entity name of the schema, chunk is the result of the SQL query and reference is the SQL query used to generate the answer.
+
+  End your answer with 'TERMINATE'"
@@ -0,0 +1,10 @@
+model:
+  gpt-4o-mini
+description:
+  "An agent that will decompose the user's question into smaller parts to be used in the SQL queries. Use this agent when the user's question is too complex to be answered in one SQL query. Only use if the user's question is too complex to be answered in one SQL query.
+
+  Only use this agent once per user question and after the 'Query Cache Agent' if the results are none."
+system_message:
+  "You are a helpful AI Assistant that specialises in decomposing complex user questions into smaller parts that can be used in SQL queries.
+
+  Break down the user's question into smaller parts that can be used in SQL queries."
@@ -0,0 +1,19 @@
+model:
+  gpt-4o-mini
+description:
+  "An agent that will look at the SQL query, SQL query results and correct any mistakes in the SQL query to ensure the correct results are returned. Use this agent AFTER the SQL query has been executed and the results are not as expected."
+system_message:
+  "You are a helpful AI Assistant that specialises in correcting invalid SQL queries or queries that do not return the expected results.
+
+  Review the SQL query provided and correct any errors or issues that you find. Bear in mind that the target database engine is {{ target_engine }}, SQL queries must be able compatible to run on {{ target_engine }} {{ engine_specific_rules }}
+
+  Ensure that the corrected query returns the expected results in context of the question.
+
+  If there are no errors and the SQL query is correct, return 'VALIDATED'.
+
+  If the SQL query needs adjustment, correct the SQL query and provide the corrected SQL query and then run the query.
+
+  If you are consistently unable to correct the SQL query and cannot use the schemas to answer the question. Say 'I am unable to correct the SQL query. Please ask another question.' and then end your answer with 'TERMINATE'"
+tools:
+  - sql_get_entity_schemas_tool
+  - sql_query_execution_tool
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,3 @@`
	`1`	`+# Multi-Shot Text2SQL Component - AutoGen`
	`2`	`+`
	`3`	`+Very much still work in progress, more documentation coming soon.`