Fix respective config paths #4

pramitchoudhary · pramitchoudhary · commit 9c46ff18e98e · 2023-08-07T17:34:11.000-07:00
diff --git a/sidekick/configs/.env.toml b/sidekick/configs/.env.toml
@@ -16,6 +16,7 @@ LOG-LEVEL = "INFO"
 DB_TYPE = "sqlite"
 
 [TABLE_INFO]
-TABLE_INFO_PATH = "/examples/demo/table_info.jsonl"
-TABLE_SAMPLES_PATH = "/examples/demo/demo_data.csv"
+TABLE_INFO_PATH = "examples/demo/table_info.jsonl"
+SAMPLE_QNA_PATH = "examples/demo/demo_qa.csv"
+TABLE_SAMPLES_PATH = "examples/demo/demo_data.csv"
 TABLE_NAME = "demo"
diff --git a/sidekick/prompter.py b/sidekick/prompter.py
@@ -283,13 +283,13 @@ def update_context():
 @cli.command()
 @click.option("--question", "-q", help="Database name", prompt="Ask a question")
 @click.option("--table-info-path", "-t", help="Table info path", default=None)
-@click.option("--sample-queries", "-s", help="Samples path", default=None)
-def query(question: str, table_info_path: str, sample_queries: str):
+@click.option("--sample_qna_path", "-s", help="Samples path", default=None)
+def query(question: str, table_info_path: str, sample_qna_path: str):
     """Asks question and returns SQL."""
-    query_api(question=question, table_info_path=table_info_path, sample_queries=sample_queries, is_command=True)
+    query_api(question=question, table_info_path=table_info_path, sample_queries_path=sample_qna_path, is_command=True)
 
 
-def query_api(question: str, table_info_path: str, sample_queries: str, is_command: bool = False):
+def query_api(question: str, table_info_path: str, sample_queries_path: str, is_command: bool = False):
     """Asks question and returns SQL."""
     results = []
     # Book-keeping
@@ -357,7 +357,7 @@ def query_api(question: str, table_info_path: str, sample_queries: str, is_comma
         table_info_path = _get_table_info(path)
 
     sql_g = SQLGenerator(
-        db_url, api_key, job_path=base_path, data_input_path=table_info_path, samples_queries=sample_queries
+        db_url, api_key, job_path=base_path, data_input_path=table_info_path, sample_queries_path=sample_queries_path
     )
     if "h2ogpt-sql" not in model_name:
         sql_g._tasks = sql_g.generate_tasks(table_names, question)
diff --git a/sidekick/query.py b/sidekick/query.py
@@ -9,9 +9,12 @@
 import sqlglot
 import torch
 from langchain import OpenAI
-from llama_index import GPTSimpleVectorIndex, GPTSQLStructStoreIndex, LLMPredictor, ServiceContext, SQLDatabase
+from llama_index import (GPTSimpleVectorIndex, GPTSQLStructStoreIndex,
+                         LLMPredictor, ServiceContext, SQLDatabase)
 from llama_index.indices.struct_store import SQLContextContainerBuilder
-from sidekick.configs.prompt_template import DEBUGGING_PROMPT, NSQL_QUERY_PROMPT, QUERY_PROMPT, TASK_PROMPT
+from sidekick.configs.prompt_template import (DEBUGGING_PROMPT,
+                                              NSQL_QUERY_PROMPT, QUERY_PROMPT,
+                                              TASK_PROMPT)
 from sidekick.logger import logger
 from sidekick.utils import filter_samples, read_sample_pairs, remove_duplicates
 from sqlalchemy import create_engine
@@ -33,7 +36,7 @@ def __init__(
         db_url: str,
         openai_key: str = None,
         data_input_path: str = "./table_info.jsonl",
-        samples_queries: str = "./samples.csv",
+        sample_queries_path: str = "./samples.csv",
         job_path: str = "../var/lib/tmp/data",
     ):
         self.db_url = db_url
@@ -42,7 +45,7 @@ def __init__(
         self.similarity_model = None
         self.context_builder = None
         self.data_input_path = _check_file_info(data_input_path)
-        self.sample_queries_path = samples_queries
+        self.sample_queries_path = sample_queries_path
         self.path = job_path
         self._data_info = None
         self._tasks = None
@@ -78,7 +81,7 @@ def update_context_queries(self):
         # Check if seed samples were provided
         new_context_queries = []
         if self.sample_queries_path is not None and Path(self.sample_queries_path).exists():
-            logger.info(f"Using samples from path {self.sample_queries_path}")
+            logger.info(f"Using QnA samples from path {self.sample_queries_path}")
             new_context_queries = read_sample_pairs(self.sample_queries_path, "gpt")
             # cache the samples for future use
             with open(f"{self.path}/var/lib/tmp/data/queries_cache.json", "w") as f:
@@ -319,7 +322,7 @@ def generate_sql(
                     threshold=0.9,
                 )
                 if len(context_queries) > 1
-                else context_queries
+                else (context_queries, _)
             )
             logger.info(f"Number of possible contextual queries to question: {len(filtered_context)}")
             # If QnA pairs > 5, we keep top 5 for focused context
diff --git a/ui/app.py b/ui/app.py
@@ -1,11 +1,11 @@
 import logging
 from pathlib import Path
 from typing import List, Optional
-from sidekick.prompter import db_setup_api, query_api
 
 import openai
 import toml
 from h2o_wave import Q, app, data, handle_on, main, on, ui
+from sidekick.prompter import db_setup_api, query_api
 
 # Load the config file and initialize required paths
 base_path = (Path(__file__).parent / "../").resolve()
@@ -20,8 +20,10 @@
 password = db_settings["LOCAL_DB_CONFIG"]["PASSWORD"]
 db_name = db_settings["LOCAL_DB_CONFIG"]["DB_NAME"]
 port = db_settings["LOCAL_DB_CONFIG"]["PORT"]
+# Related to the selected table - currently demo
 table_info_path = f'{base_path}/{db_settings["TABLE_INFO"]["TABLE_INFO_PATH"]}'
 table_samples_path = f'{base_path}/{db_settings["TABLE_INFO"]["TABLE_SAMPLES_PATH"]}'
+sample_qna_path = db_settings["TABLE_INFO"]["SAMPLE_QNA_PATH"]
 table_name = db_settings["TABLE_INFO"]["TABLE_NAME"]
 
 logging.basicConfig(format="%(asctime)s %(levelname)s %(message)s")
@@ -74,11 +76,20 @@ async def chatbot(q: Q):
     logging.info(f"Question: {question}")
 
     if q.args.chatbot.lower() == "db setup":
-        llm_response = db_setup_api(db_name=db_name, hostname=host_name, user_name=user_name, password=password, port=port, table_info_path=table_info_path, table_samples_path=table_samples_path, table_name= table_name)
+        llm_response = db_setup_api(
+            db_name=db_name,
+            hostname=host_name,
+            user_name=user_name,
+            password=password,
+            port=port,
+            table_info_path=table_info_path,
+            table_samples_path=table_samples_path,
+            table_name=table_name,
+        )
     else:
-        llm_response = query_api(question = question,
-                                sample_queries=None,
-                                table_info_path=table_info_path)
+        llm_response = query_api(
+            question=question, sample_queries_path=sample_qna_path, table_info_path=table_info_path
+        )
         llm_response = "\n".join(llm_response)
 
     q.page["chat_card"].data += [llm_response, False]