microsoft
diff --git a/‎.gitignore
Lines changed: 7 additions & 0 deletions b/‎.gitignore
Lines changed: 7 additions & 0 deletions
diff --git a/‎pyproject.toml
Lines changed: 1 addition & 1 deletion b/‎pyproject.toml
Lines changed: 1 addition & 1 deletion
diff --git a/‎text_2_sql/autogen/evaluate_autogen_text2sql.ipynb
Lines changed: 320 additions & 0 deletions b/‎text_2_sql/autogen/evaluate_autogen_text2sql.ipynb
Lines changed: 320 additions & 0 deletions
diff --git a/‎text_2_sql/autogen/pyproject.toml
Lines changed: 3 additions & 1 deletion b/‎text_2_sql/autogen/pyproject.toml
Lines changed: 3 additions & 1 deletion
@@ -160,3 +160,10 @@ cython_debug/
 #  and can be added to the global gitignore or merged into this file.  For a more nuclear
 #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
 #.idea/
+
+# Spider Test Suite
+# These directories contain large test databases and data that can be downloaded separately:
+# Spider test suite evaluation scripts: https://github.com/taoyds/test-suite-sql-eval
+# Spider data: https://drive.google.com/file/d/1403EGqzIDoHMdQF4c9Bkyl7dZLZ5Wt6J/view
+/text_2_sql/test-suite-sql-eval/
+/text_2_sql/spider_data/
@@ -5,7 +5,7 @@ description = "Add your description here"
 readme = "README.md"
 requires-python = ">=3.12"
 dependencies = [
-    "text-2-sql-core",
+    "text-2-sql-core[sqlite]",
 ]
 
 [dependency-groups]
 
@@ -0,0 +1,320 @@
+{
+  "cells": [
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "# Evaluate AutoGenText2SQL\n",
+        "\n",
+        "This notebook evaluates the AutoGenText2Sql class using the Spider test suite evaluation metric. \n",
+        "\n",
+        "The evaluation uses the official Spider evaluation approach, which requires:\n",
+        "\n",
+        "1. A gold file with format: `SQL query \\t database_id`\n",
+        "2. A predictions file with generated SQL queries\n",
+        "3. The Spider databases and schema information\n",
+        "\n",
+        "### Required Data Downloads\n",
+        "\n",
+        "Before running this notebook, you need to download and set up two required directories:\n",
+        "\n",
+        "1. Spider Test Suite Evaluation Scripts:\n",
+        "   - Download from: https://github.com/taoyds/test-suite-sql-eval\n",
+        "   - Clone this repository into `/text_2_sql/test-suite-sql-eval/` directory:\n",
+        "   ```bash\n",
+        "   cd text_2_sql\n",
+        "   git clone https://github.com/taoyds/test-suite-sql-eval\n",
+        "   ```\n",
+        "\n",
+        "2. Spider Dataset:\n",
+        "   - Download from: https://drive.google.com/file/d/1403EGqzIDoHMdQF4c9Bkyl7dZLZ5Wt6J/view\n",
+        "   - Extract the downloaded file into `/text_2_sql/spider_data/` directory\n",
+        "   - The directory should contain:\n",
+        "     - `database/` directory with all the SQLite databases\n",
+        "     - `tables.json` with schema information\n",
+        "     - `dev.json` with development set queries"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "### Dependencies\n",
+        "\n",
+        "To install dependencies for this evaluation:\n",
+        "\n",
+        "`uv sync --package autogen_text_2_sql`\n",
+        "\n",
+        "`uv add --editable text_2_sql_core`"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "import sys\n",
+        "import os\n",
+        "import time\n",
+        "import json\n",
+        "import logging\n",
+        "import subprocess\n",
+        "import dotenv\n",
+        "from pathlib import Path\n",
+        "\n",
+        "# Get the notebook directory path\n",
+        "notebook_dir = Path().absolute()\n",
+        "# Add the src directory to the path\n",
+        "sys.path.append(str(notebook_dir / \"src\"))\n",
+        "\n",
+        "from autogen_text_2_sql import AutoGenText2Sql, QuestionPayload\n",
+        "from autogen_text_2_sql.evaluation_utils import get_final_sql_query\n",
+        "\n",
+        "# Configure logging\n",
+        "logging.basicConfig(level=logging.DEBUG)\n",
+        "logger = logging.getLogger(__name__)\n",
+        "\n",
+        "# Set up paths\n",
+        "TEST_SUITE_DIR = Path(\"../test-suite-sql-eval\")\n",
+        "SPIDER_DATA_DIR = Path(\"../spider_data\").absolute()\n",
+        "DATABASE_DIR = SPIDER_DATA_DIR / \"database\"\n",
+        "\n",
+        "# Set SPIDER_DATA_DIR in environment so SQLiteSqlConnector can find tables.json\n",
+        "os.environ[\"SPIDER_DATA_DIR\"] = str(SPIDER_DATA_DIR)\n",
+        "\n",
+        "# Load environment variables\n",
+        "dotenv.load_dotenv()"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 2,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "# Initialize the AutoGenText2Sql instance with SQLite-specific rules\n",
+        "sqlite_rules = \"\"\"\n",
+        "1. Use SQLite syntax\n",
+        "2. Do not use Azure SQL specific functions\n",
+        "3. Use strftime for date/time operations\n",
+        "\"\"\"\n",
+        "\n",
+        "autogen_text2sql = AutoGenText2Sql(\n",
+        "    engine_specific_rules=sqlite_rules,\n",
+        "    use_case=\"Evaluating with Spider SQLite databases\"\n",
+        ")"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 3,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "# Function to generate SQL for a given question\n",
+        "async def generate_sql(question):\n",
+        "    # Capture log output\n",
+        "    import io\n",
+        "    log_capture = io.StringIO()\n",
+        "    handler = logging.StreamHandler(log_capture)\n",
+        "    logger.addHandler(handler)\n",
+        "    \n",
+        "    logger.info(f\"Processing question: {question}\")\n",
+        "    logger.info(f\"Chat history: None\")\n",
+        "    \n",
+        "    # Track all SQL queries found\n",
+        "    all_queries = []\n",
+        "    final_query = None\n",
+        "    \n",
+        "    async for message in autogen_text2sql.process_question(QuestionPayload(question=question)):\n",
+        "        if message.payload_type == \"answer_with_sources\":\n",
+        "            # Extract from results\n",
+        "            if hasattr(message.body, 'results'):\n",
+        "                for q_results in message.body.results.values():\n",
+        "                    for result in q_results:\n",
+        "                        if isinstance(result, dict) and 'sql_query' in result:\n",
+        "                            sql_query = result['sql_query'].strip()\n",
+        "                            if sql_query and sql_query != \"SELECT NULL -- No query found\":\n",
+        "                                all_queries.append(sql_query)\n",
+        "                                logger.info(f\"Found SQL query in results: {sql_query}\")\n",
+        "            \n",
+        "            # Extract from sources\n",
+        "            if hasattr(message.body, 'sources'):\n",
+        "                for source in message.body.sources:\n",
+        "                    if hasattr(source, 'sql_query'):\n",
+        "                        sql_query = source.sql_query.strip()\n",
+        "                        if sql_query and sql_query != \"SELECT NULL -- No query found\":\n",
+        "                            all_queries.append(sql_query)\n",
+        "                            logger.info(f\"Found SQL query in sources: {sql_query}\")\n",
+        "    \n",
+        "    # Get the log text\n",
+        "    log_text = log_capture.getvalue()\n",
+        "    \n",
+        "    # Clean up logging\n",
+        "    logger.removeHandler(handler)\n",
+        "    log_capture.close()\n",
+        "    \n",
+        "    # Log all queries found\n",
+        "    if all_queries:\n",
+        "        logger.info(f\"All queries found: {all_queries}\")\n",
+        "        # Select the most appropriate query - prefer DISTINCT queries for questions about unique values\n",
+        "        question_lower = question.lower()\n",
+        "        needs_distinct = any(word in question_lower for word in ['different', 'distinct', 'unique', 'all'])\n",
+        "        \n",
+        "        for query in reversed(all_queries):  # Look at queries in reverse order\n",
+        "            if needs_distinct and 'DISTINCT' in query.upper():\n",
+        "                final_query = query\n",
+        "                break\n",
+        "        if not final_query:  # If no DISTINCT query found when needed, use the last query\n",
+        "            final_query = all_queries[-1]\n",
+        "            # Add DISTINCT if needed but not present\n",
+        "            if needs_distinct and 'DISTINCT' not in final_query.upper() and final_query.upper().startswith('SELECT '):\n",
+        "                final_query = final_query.replace('SELECT ', 'SELECT DISTINCT ', 1)\n",
+        "    \n",
+        "    # Log final query\n",
+        "    logger.info(f\"Final SQL query: {final_query or 'SELECT NULL -- No query found'}\")\n",
+        "    \n",
+        "    return final_query or \"SELECT NULL -- No query found\""
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 4,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "# Function to read Spider dev set and generate predictions\n",
+        "async def generate_predictions(num_samples=None):\n",
+        "    # Read Spider dev set\n",
+        "    dev_file = SPIDER_DATA_DIR / \"dev.json\"\n",
+        "    pred_file = TEST_SUITE_DIR / \"predictions.txt\"\n",
+        "    gold_file = TEST_SUITE_DIR / \"gold.txt\"\n",
+        "    \n",
+        "    print(f\"Reading dev queries from {dev_file}\")\n",
+        "    with open(dev_file) as f:\n",
+        "        dev_data = json.load(f)\n",
+        "    \n",
+        "    # Limit number of samples if specified\n",
+        "    if num_samples is not None:\n",
+        "        dev_data = dev_data[:num_samples]\n",
+        "        print(f\"\\nGenerating predictions for {num_samples} queries...\")\n",
+        "    else:\n",
+        "        print(f\"\\nGenerating predictions for all {len(dev_data)} queries...\")\n",
+        "    \n",
+        "    predictions = []\n",
+        "    gold = []\n",
+        "    \n",
+        "    for idx, item in enumerate(dev_data, 1):\n",
+        "        question = item['question']\n",
+        "        db_id = item['db_id']\n",
+        "        gold_query = item['query']\n",
+        "        \n",
+        "        print(f\"\\nProcessing query {idx}/{len(dev_data)} for database {db_id}\")\n",
+        "        print(f\"Question: {question}\")\n",
+        "        \n",
+        "        # Update database connection string for current database\n",
+        "        db_path = DATABASE_DIR / db_id / f\"{db_id}.sqlite\"\n",
+        "        os.environ[\"Text2Sql__DatabaseConnectionString\"] = str(db_path)\n",
+        "        os.environ[\"Text2Sql__DatabaseName\"] = db_id\n",
+        "        \n",
+        "        sql = await generate_sql(question)\n",
+        "        predictions.append(f\"{sql}\\t{db_id}\")\n",
+        "        gold.append(f\"{gold_query}\\t{db_id}\")\n",
+        "        print(f\"Generated SQL: {sql}\")\n",
+        "    \n",
+        "    print(f\"\\nSaving predictions to {pred_file}\")\n",
+        "    with open(pred_file, 'w') as f:\n",
+        "        f.write('\\n'.join(predictions))\n",
+        "        \n",
+        "    print(f\"Saving gold queries to {gold_file}\")\n",
+        "    with open(gold_file, 'w') as f:\n",
+        "        f.write('\\n'.join(gold))\n",
+        "    \n",
+        "    return pred_file, gold_file"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 5,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "# Run evaluation using the test suite evaluation script\n",
+        "def run_evaluation():\n",
+        "    # Use absolute paths to ensure correct file locations\n",
+        "    gold_file = TEST_SUITE_DIR / \"gold.txt\"\n",
+        "    pred_file = TEST_SUITE_DIR / \"predictions.txt\"\n",
+        "    table_file = SPIDER_DATA_DIR / \"tables.json\"  # Use Spider's schema file\n",
+        "    \n",
+        "    print(f\"Starting evaluation at {time.strftime('%H:%M:%S')}\")\n",
+        "    start_time = time.time()\n",
+        "    \n",
+        "    cmd = [\n",
+        "        \"python\",\n",
+        "        str(TEST_SUITE_DIR / \"evaluation.py\"),\n",
+        "        \"--gold\", str(gold_file),\n",
+        "        \"--pred\", str(pred_file),\n",
+        "        \"--db\", str(DATABASE_DIR),\n",
+        "        \"--table\", str(table_file),\n",
+        "        \"--etype\", \"all\",\n",
+        "        \"--plug_value\",\n",
+        "        \"--progress_bar_for_each_datapoint\"  # Show progress for each test input\n",
+        "    ]\n",
+        "    \n",
+        "    result = subprocess.run(cmd, capture_output=True, text=True)\n",
+        "    \n",
+        "    end_time = time.time()\n",
+        "    duration = end_time - start_time\n",
+        "    \n",
+        "    print(\"\\nEvaluation Results:\")\n",
+        "    print(\"==================\")\n",
+        "    print(result.stdout)\n",
+        "    \n",
+        "    print(f\"\\nEvaluation completed in {duration:.2f} seconds\")\n",
+        "    print(f\"End time: {time.strftime('%H:%M:%S')}\")"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "# Generate predictions first - now with optional num_samples parameter\n",
+        "await generate_predictions(num_samples=20)  # Generate predictions for just 20 samples (takes about 4 minutes)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "# Run evaluation\n",
+        "run_evaluation()"
+      ]
+    }
+  ],
+  "metadata": {
+    "kernelspec": {
+      "display_name": ".venv",
+      "language": "python",
+      "name": "python3"
+    },
+    "language_info": {
+      "codemirror_mode": {
+        "name": "ipython",
+        "version": 3
+      },
+      "file_extension": ".py",
+      "mimetype": "text/x-python",
+      "name": "python",
+      "nbconvert_exporter": "python",
+      "pygments_lexer": "ipython3",
+      "version": "3.12.7"
+    }
+  },
+  "nbformat": 4,
+  "nbformat_minor": 4
+}
@@ -11,7 +11,9 @@ dependencies = [
     "autogen-ext[azure,openai]==0.4.0.dev11",
     "grpcio>=1.68.1",
     "pyyaml>=6.0.2",
-    "text_2_sql_core",
+    "text_2_sql_core[snowflake,databricks]",
+    "sqlparse>=0.4.4",
+    "nltk>=3.8.1",
 ]
 
 [dependency-groups]
Original file line number	Diff line number	Diff line change
`@@ -5,7 +5,7 @@ description = "Add your description here"`
`5`	`5`	`readme = "README.md"`
`6`	`6`	`requires-python = ">=3.12"`
`7`	`7`	`dependencies = [`
`8`		`- "text-2-sql-core",`
	`8`	`+ "text-2-sql-core[sqlite]",`
`9`	`9`	`]`
`10`	`10`
`11`	`11`	`[dependency-groups]`