microsoft · BenConstable9 · Jan 21, 2025 · Jan 21, 2025 · Jan 21, 2025 · Jan 21, 2025
@@ -45,9 +45,9 @@ repos:
         args: [--fix, --ignore, UP007]
         exclude: samples
 
-  - repo: https://github.com/astral-sh/uv-pre-commit
-    # uv version.
-    rev: 0.5.5
-    hooks:
-      # Update the uv lockfile
-      - id: uv-lock
+  # - repo: https://github.com/astral-sh/uv-pre-commit
+  #   # uv version.
+  #   rev: 0.5.5
+  #   hooks:
+  #     # Update the uv lockfile
+  #     - id: uv-lock
@@ -5,7 +5,9 @@ To get started, perform the following steps:
 1. Setup Azure OpenAI in your subscription with **gpt-4o-mini** & an embedding model, alongside a SQL Server sample database, AI Search and a storage account.
 2. Clone this repository and deploy the AI Search text2sql indexes from `deploy_ai_search`.
 3. Run `uv sync` within the text_2_sql directory to install dependencies.
+    - Install the optional dependencies if you need a database connector other than TSQL. `uv sync --extra <DATABASE ENGINE>`
+    - See the supported connectors in `text_2_sql_core/src/text_2_sql_core/connectors`.
 4. Create your `.env` file based on the provided sample `.env.example`. Place this file in the same place as the `.env.example`.
 5. Generate a data dictionary for your target server using the instructions in the **Running** section of the `data_dictionary/README.md`.
-6. Upload these data dictionaries to the relevant containers in your storage account. Wait for them to be automatically indexed with the included skillsets.
+6. Upload these generated data dictionaries files to the relevant containers in your storage account. Wait for them to be automatically indexed with the included skillsets.
 7. Navigate to `autogen` directory to view the AutoGen implementation. Follow the steps in `Iteration 5 - Agentic Vector Based Text2SQL.ipynb` to get started.
@@ -35,11 +35,13 @@
         "\n",
         "### Dependencies\n",
         "\n",
-        "To install dependencies for this demo:\n",
+        "To install dependencies for this demo. Navigate to the autogen directory:\n",
         "\n",
-        "`uv sync --package autogen_text_2_sql`\n",
+        "`uv sync`\n",
         "\n",
-        "`uv add --editable text_2_sql_core`"
+        "If you need a differnet connector to TSQL.\n",
+        "\n",
+        "`uv sync --extra <DATABASE ENGINE>`"
       ]
     },
     {
@@ -87,6 +89,13 @@
         "agentic_text_2_sql = AutoGenText2Sql(use_case=\"Analysing sales data\")"
       ]
     },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": []
+    },
     {
       "cell_type": "markdown",
       "metadata": {},
@@ -100,7 +109,7 @@
       "metadata": {},
       "outputs": [],
       "source": [
-        "async for message in agentic_text_2_sql.process_user_message(UserMessagePayload(user_message=\"What is the total number of sales?\")):\n",
+        "async for message in agentic_text_2_sql.process_user_message(UserMessagePayload(user_message=\"what are the total sales\")):\n",
         "    logging.info(\"Received %s Message from Text2SQL System\", message)"
       ]
     },
@@ -128,7 +137,7 @@
       "name": "python",
       "nbconvert_exporter": "python",
       "pygments_lexer": "ipython3",
-      "version": "3.12.7"
+      "version": "3.12.8"
     }
   },
   "nbformat": 4,

@@ -6,12 +6,12 @@ readme = "README.md"
 requires-python = ">=3.12"
 dependencies = [
     "aiostream>=0.6.4",
-    "autogen-agentchat==0.4.0.dev11",
-    "autogen-core==0.4.0.dev11",
-    "autogen-ext[azure,openai]==0.4.0.dev11",
+    "autogen-agentchat==0.4.2",
+    "autogen-core==0.4.2",
+    "autogen-ext[azure,openai]==0.4.2",
     "grpcio>=1.68.1",
     "pyyaml>=6.0.2",
-    "text_2_sql_core[snowflake,databricks]",
+    "text_2_sql_core",
     "sqlparse>=0.4.4",
     "nltk>=3.8.1",
 ]

@@ -1,6 +1,6 @@
 # Copyright (c) Microsoft Corporation.
 # Licensed under the MIT License.
-from autogen_core.components.tools import FunctionToolAlias
+from autogen_core.tools import FunctionTool
 from autogen_agentchat.agents import AssistantAgent
 from text_2_sql_core.connectors.factory import ConnectorFactory
 from text_2_sql_core.prompts.load import load
@@ -33,20 +33,20 @@ def get_tool(cls, sql_helper, tool_name: str):
             tool_name (str): The name of the tool to retrieve.
 
         Returns:
-            FunctionToolAlias: The tool."""
+            FunctionTool: The tool."""
 
         if tool_name == "sql_query_execution_tool":
-            return FunctionToolAlias(
+            return FunctionTool(
                 sql_helper.query_execution_with_limit,
                 description="Runs an SQL query against the SQL Database to extract information",
             )
         elif tool_name == "sql_get_entity_schemas_tool":
-            return FunctionToolAlias(
+            return FunctionTool(
                 sql_helper.get_entity_schemas,
                 description="Gets the schema of a view or table in the SQL Database by selecting the most relevant entity based on the search term. Extract key terms from the user input and use these as the search term. Several entities may be returned. Only use when the provided schemas in the message history are not sufficient to answer the question.",
             )
         elif tool_name == "sql_get_column_values_tool":
-            return FunctionToolAlias(
+            return FunctionTool(
                 sql_helper.get_column_values,
                 description="Gets the values of a column in the SQL Database by selecting the most relevant entity based on the search term. Several entities may be returned. Use this to get the correct value to apply against a filter for a user's question.",
             )

@@ -5,10 +5,10 @@
 from autogen_agentchat.agents import BaseChatAgent
 from autogen_agentchat.base import Response
 from autogen_agentchat.messages import (
-    AgentMessage,
+    AgentEvent,
     ChatMessage,
     TextMessage,
-    ToolCallResultMessage,
+    ToolCallExecutionEvent,
 )
 from autogen_core import CancellationToken
 import json
@@ -86,7 +86,7 @@ def parse_inner_message(self, message):
 
     async def on_messages_stream(
         self, messages: Sequence[ChatMessage], cancellation_token: CancellationToken
-    ) -> AsyncGenerator[AgentMessage | Response, None]:
+    ) -> AsyncGenerator[AgentEvent | Response, None]:
         last_response = messages[-1].content
         parameter_input = messages[0].content
         try:
@@ -118,7 +118,7 @@ async def consume_inner_messages_from_agentic_flow(
                 logging.info(f"Checking Inner Message: {inner_message}")
 
                 try:
-                    if isinstance(inner_message, ToolCallResultMessage):
+                    if isinstance(inner_message, ToolCallExecutionEvent):
                         for call_result in inner_message.content:
                             # Check for SQL query results
                             parsed_message = self.parse_inner_message(

@@ -4,7 +4,7 @@
 
 from autogen_agentchat.agents import BaseChatAgent
 from autogen_agentchat.base import Response
-from autogen_agentchat.messages import AgentMessage, ChatMessage, TextMessage
+from autogen_agentchat.messages import AgentEvent, ChatMessage, TextMessage
 from autogen_core import CancellationToken
 from text_2_sql_core.custom_agents.sql_query_cache_agent import (
     SqlQueryCacheAgentCustomAgent,
@@ -39,7 +39,7 @@ async def on_messages(
 
     async def on_messages_stream(
         self, messages: Sequence[ChatMessage], cancellation_token: CancellationToken
-    ) -> AsyncGenerator[AgentMessage | Response, None]:
+    ) -> AsyncGenerator[AgentEvent | Response, None]:
         # Get the decomposed messages from the user_message_rewrite_agent
         try:
             request_details = json.loads(messages[0].content)

@@ -4,7 +4,7 @@
 
 from autogen_agentchat.agents import BaseChatAgent
 from autogen_agentchat.base import Response
-from autogen_agentchat.messages import AgentMessage, ChatMessage, TextMessage
+from autogen_agentchat.messages import AgentEvent, ChatMessage, TextMessage
 from autogen_core import CancellationToken
 import json
 import logging
@@ -39,7 +39,7 @@ async def on_messages(
 
     async def on_messages_stream(
         self, messages: Sequence[ChatMessage], cancellation_token: CancellationToken
-    ) -> AsyncGenerator[AgentMessage | Response, None]:
+    ) -> AsyncGenerator[AgentEvent | Response, None]:
         # Try to parse as JSON first
         try:
             request_details = json.loads(messages[0].content)

@@ -207,10 +207,6 @@ This avoids having to index the fact tables, saving storage, and allows us to st
 
 ## Automatic Generation
 
-> [!IMPORTANT]
->
-> - The data dictionary generation scripts have been moved to `text_2_sql_core`. Documentation will be updated shortly.
-
 Manually creating the `entities.json` is a time consuming exercise. To speed up generation, a mixture of SQL Queries and an LLM can be used to generate a initial version. Existing comments and descriptions in the database, can be combined with sample values to generate the necessary descriptions. Manual input can then be used to tweak it for the use case and any improvements.
 
 `./text_2_sql_core/data_dictionary/data_dictionary_creator.py` contains a utility class that handles the automatic generation and selection of schemas from the source SQL database. It must be subclassed to the appropriate engine to handle engine specific queries and connection details.
@@ -222,28 +218,25 @@ The following Databases have pre-built scripts for them:
 - **Databricks:** `./text_2_sql_core/data_dictionary/databricks_data_dictionary_creator.py`
 - **Snowflake:** `./text_2_sql_core/data_dictionary/snowflake_data_dictionary_creator.py`
 - **TSQL:** `./text_2_sql_core/data_dictionary/tsql_data_dictionary_creator.py`
+- **PostgreSQL:** `./text_2_sql_core/data_dictionary/postgresql_data_dictionary_creator.py`
 
 If there is no pre-built script for your database engine, take one of the above as a starting point and adjust it.
 
 ## Running
 
-Fill out the `.env` template with connection details to your chosen database.
-
-Package and install the `text_2_sql_core` library. See [build](https://docs.astral.sh/uv/concepts/projects/build/) if you want to build as a wheel and install on an agent. Or you can run from within a `uv` environment.
-
-`data_dictionary <DATABASE ENGINE>`
-
-You can pass the following command line arguements:
-
-- `-- output_directory` or `-o`: Optional directory that the script will write the output files to.
-- `-- single_file` or `-s`: Optional flag that writes all schemas to a single file.
-- `-- generate_definitions` or `-gen`: Optional flag that uses OpenAI to generate descriptions.
-
-If you need control over the following, run the file directly:
-
-- `entities`: A list of entities to extract. Defaults to None.
-- `excluded_entities`: A list of entities to exclude.
-- `excluded_schemas`: A list of schemas to exclude.
+1. Create your `.env` file based on the provided sample `.env.example`. Place this file in the same place as the `.env.example`.
+2. Package and install the `text_2_sql_core` library. See [build](https://docs.astral.sh/uv/concepts/projects/build/) if you want to build as a wheel and install on an agent. Or you can run from within a `uv` environment and skip packaging.
+    - Install the optional dependencies if you need a database connector other than TSQL. `uv sync --extra <DATABASE ENGINE>`
+3. Run `data_dictionary <DATABASE ENGINE>`
+    - You can pass the following command line arguements:
+        - `-- output_directory` or `-o`: Optional directory that the script will write the output files to.
+        - `-- single_file` or `-s`: Optional flag that writes all schemas to a single file.
+        - `-- generate_definitions` or `-gen`: Optional flag that uses OpenAI to generate descriptions.
+    - If you need control over the following, run the file directly:
+        - `entities`: A list of entities to extract. Defaults to None.
+        - `excluded_entities`: A list of entities to exclude.
+        - `excluded_schemas`: A list of schemas to exclude.
+4. Upload these generated data dictionaries files to the relevant containers in your storage account. Wait for them to be automatically indexed with the included skillsets.
 
 > [!IMPORTANT]
 >

@@ -8,6 +8,7 @@ authors = [
 ]
 requires-python = ">=3.12"
 dependencies = [
+    "aiohttp>=3.11.11",
     "aioodbc>=0.5.0",
     "azure-identity>=1.19.0",
     "azure-search>=1.0.0b2",

@@ -2,7 +2,7 @@ model: "4o-mini"
 description: "An agent that generates a response to a user's question."
 system_message: |
   <role_and_objective>
-    You are a helpful AI Assistant specializing in answering a user's question about {{ use_case }} through SQL generation and data analysis. You should provide a clear and concise response based on the information obtained from the SQL queries and their results. Adopt a data-driven approach to generate the response.
+    You are Senior Data Analystm, specializing in providing data driven answers to a user's question. Use the general business use case of '{{ use_case }}' to aid understanding of the user's question. You should provide a clear and concise response based on the information obtained from the SQL queries and their results. Adopt a data-driven approach to generate the response.
   </role_and_objective>
 
   <system_information>
@@ -24,4 +24,6 @@ system_message: |
 
     If the user is asking about your capabilities, use the <system_information> to explain what you do.
 
+    Make sure your response directly addresses every part of the user's question.
+
   </instructions>
@@ -4,7 +4,8 @@ description:
   "An agent that specialises in disambiguating the user's question and mapping it to database schemas for {{ use_case }}."
 system_message:
   "<role_and_objective>
-    You are a helpful AI Assistant specializing in disambiguating questions about {{ use_case }} and mapping them to the relevant columns and schemas in the database.
+    You are Senior Data Engineer specializing in disambiguating questions, mapping them to the relevant columns and schemas in the database and finally generating SQL queries.
+    Use the general business use case of '{{ use_case }}' to aid understanding of the user's question.
     Your job is to create clear mappings between the user's intent and the available database schema.
     If all mappings are clear, generate {{ target_engine }} compliant SQL query based on the mappings.
     If the mappings are ambiguous or there are no possible schemas, follow the disambiguation rules to request more information from the user.

@@ -4,7 +4,7 @@ description:
   "An agent that specializes in SQL syntax correction and query execution for {{ target_engine }}. This agent receives queries from the generation agent, fixes any syntax issues according to {{ target_engine }} rules, and executes the corrected queries."
 system_message:
   "<role_and_objective>
-      You are a SQL syntax expert specializing in converting standard SQL to {{ target_engine }}-compliant SQL. Your job is to:
+      You are a Senior Data Engineert specializing in converting standard SQL to {{ target_engine }}-compliant SQL and fixing syntactial errors. Your job is to:
       1. Take SQL queries with correct logic but potential syntax issues.
       2. Review the output from the SQL query being run and fix them according to {{ target_engine }} syntax rules if needed.
       3. Execute the corrected queries if needed.

@@ -2,7 +2,7 @@ model: 4o-mini
 description: "An agent that can take a user's question and extract the schema of a view or table in the SQL Database by selecting the most relevant entity based on the search term."
 system_message: |
   <role_and_objective>
-    You are a helpful AI Assistant specializing in selecting relevant SQL schemas to answer questions about {{ use_case }}.
+    You are a Senior Data Analyst, specialising in extracting relevant search terms and finding relevant SQL schemas to answer question. Use the general business use case of '{{ use_case }}' to aid understanding of the user's question.
   </role_and_objective>
 
   <instructions>

@@ -2,7 +2,8 @@ model: "4o-mini"
 description: "An agent that preprocesses user inputs by decomposing complex queries into simpler sub-messages that can be processed independently and then combined."
 system_message: |
   <role_and_objective>
-      You are a helpful AI Assistant specializing in breaking down complex questions into simpler sub-messages that can be processed independently and then combined for the final answer. You should identify when a question can be solved through simpler sub-messages and provide clear instructions for combining their results.
+    You are a Senior Data Analyst specializing in breaking down complex questions into simpler sub-messages that can be processed independently and then combined for the final answer. You should identify when a question can be solved through simpler sub-messages and provide clear instructions for combining their results.
+    Use the general business use case of '{{ use_case }}' to aid understanding of the user's question.
   </role_and_objective>
 
   <query_complexity_patterns>
@@ -33,18 +34,18 @@ system_message: |
   </query_complexity_patterns>
 
   <instructions>
-      1. Question Filtering and Classification
-        - Use the provided list of allowed_topics list to filter out malicious or unrelated queries, such as those in the disallowed_topics list.
+      1. Understanding:
+        - Use the chat history (that is available in reverse order) to understand the context of the current question.
+        - If the current question not fully formed and unclear. Rewrite it based on the general meaning of the old question and the new question. Include spelling and grammar corrections.
+        - If the current question is clear, output the new question as is with spelling and grammar corrections.
+
+      2. Question Filtering and Classification
+        - Use the provided list of allowed_topics list to filter out malicious or unrelated queries, such as those in the disallowed_topics list. Only consider the question in context of the chat history. A question that is disallowed in isolation may be allowed in context e.g. 'Do it for 2023' may seem irrelevant but in chat history of 'What are the sales figures for 2024?' it is relevant.
         - Consider if the question is related to data analysis or possibility related {{ use_case }}. If you are not sure whether the question is related to the use case, do not filter it out as it may be.
         - If the question cannot be filtered, output an empty sub-message list in the JSON format. Followed by TERMINATE.
         - For non-database questions like greetings (e.g., "Hello", "What can you do?", "How are you?"), set "all_non_database_query" to true.
         - For questions about data (e.g., queries about records, counts, values, comparisons, or any questions that would require database access), set "all_non_database_query" to false.
 
-      2. Understanding:
-        - Use the chat history (that is available in reverse order) to understand the context of the current question.
-        - If the current question not fully formed and unclear. Rewrite it based on the general meaning of the old question and the new question. Include spelling and grammar corrections.
-        - If the current question is clear, output the new question as is with spelling and grammar corrections.
-
       3. Analyze Query Complexity:
         - Identify if the query contains patterns that can be simplified
         - Look for superlatives, multiple dimensions, or comparisons