Skip to content

Commit 011f665

Browse files
Clean up SK code (#69)
1 parent 1c8ed08 commit 011f665

File tree

10 files changed

+47
-447
lines changed

10 files changed

+47
-447
lines changed

text_2_sql/semantic_kernel/rag_with_prompt_based_text_2_sql.ipynb renamed to text_2_sql/semantic_kernel/Iteration 2 - Prompt Based Text2SQL.ipynb

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -239,15 +239,14 @@
239239
"\n",
240240
" # Create important information prompt that contains the SQL database information.\n",
241241
" engine_specific_rules = \"Use TOP X to limit the number of rows returned instead of LIMIT X. NEVER USE LIMIT X as it produces a syntax error.\"\n",
242-
" important_information_prompt = f\"\"\"\n",
242+
" sql_database_information_prompt = f\"\"\"\n",
243243
" [SQL DATABASE INFORMATION]\n",
244-
" {sql_plugin.system_prompt(engine_specific_rules=engine_specific_rules)}\n",
244+
" {sql_plugin.sql_prompt_injection(engine_specific_rules=engine_specific_rules)}\n",
245245
" [END SQL DATABASE INFORMATION]\n",
246246
" \"\"\"\n",
247247
"\n",
248248
" arguments = KernelArguments()\n",
249-
" arguments[\"chat_history\"] = chat_history\n",
250-
" arguments[\"important_information\"] = important_information_prompt\n",
249+
" arguments[\"sql_database_information\"] = sql_database_information_prompt\n",
251250
" arguments[\"user_input\"] = question\n",
252251
"\n",
253252
" logging.info(\"Question: %s\", question)\n",

text_2_sql/semantic_kernel/rag_with_vector_based_text_2_sql.ipynb renamed to text_2_sql/semantic_kernel/Iterations 3 & 4 - Vector Based Text2SQL.ipynb

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -227,15 +227,14 @@
227227
"\n",
228228
" # Create important information prompt that contains the SQL database information.\n",
229229
" engine_specific_rules = \"Use TOP X to limit the number of rows returned instead of LIMIT X. NEVER USE LIMIT X as it produces a syntax error.\"\n",
230-
" important_information_prompt = f\"\"\"\n",
230+
" sql_database_information_prompt = f\"\"\"\n",
231231
" [SQL DATABASE INFORMATION]\n",
232-
" {await sql_plugin.system_prompt(engine_specific_rules=engine_specific_rules, question=question)}\n",
232+
" {await sql_plugin.sql_prompt_injection(engine_specific_rules=engine_specific_rules, question=question)}\n",
233233
" [END SQL DATABASE INFORMATION]\n",
234234
" \"\"\"\n",
235235
"\n",
236236
" arguments = KernelArguments()\n",
237-
" arguments[\"chat_history\"] = chat_history\n",
238-
" arguments[\"important_information\"] = important_information_prompt\n",
237+
" arguments[\"sql_database_information\"] = sql_database_information_prompt\n",
239238
" arguments[\"user_input\"] = question\n",
240239
"\n",
241240
" logging.info(\"Question: %s\", question)\n",

text_2_sql/semantic_kernel/README.md

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -16,10 +16,8 @@ As the query cache is shared between users (no data is stored in the cache), a n
1616

1717
## Provided Notebooks & Scripts
1818

19-
- `./rag_with_prompt_based_text_2_sql.ipynb` provides example of how to utilise the Prompt Based Text2SQL plugin to query the database.
20-
- `./rag_with_vector_based_text_2_sql.ipynb` provides example of how to utilise the Vector Based Text2SQL plugin to query the database. The query cache plugin will be enabled or disabled depending on the environmental parameters.
21-
- `./rag_with_ai_search_and_text_2_sql.ipynb` provides an example of how to use the Text2SQL and an AISearch plugin in parallel to automatically retrieve data from the most relevant source to answer the query.
22-
- This setup is useful for a production application as the SQL Database is unlikely to be able to answer all the questions a user may ask.
19+
- `./Iteration 2 - Prompt Based Text2SQL.ipynb` provides example of how to utilise the Prompt Based Text2SQL plugin to query the database.
20+
- `./Iterations 3 & 4 - Vector Based Text2SQL.ipynb` provides example of how to utilise the Vector Based Text2SQL plugin to query the database. The query cache plugin will be enabled or disabled depending on the environmental parameters.
2321
- `./time_comparison_script.py` provides a utility script for performing time based comparisons between the different approaches.
2422

2523
### ai-search.py

text_2_sql/semantic_kernel/plugins/ai_search_plugin/__init__.py

Whitespace-only changes.

text_2_sql/semantic_kernel/plugins/ai_search_plugin/ai_search_plugin.py

Lines changed: 0 additions & 46 deletions
This file was deleted.

text_2_sql/semantic_kernel/plugins/prompt_based_sql_plugin/prompt_based_sql_plugin.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ def load_entities(self):
3737
entity_name = entity_object["EntityName"].lower()
3838
self.entities[entity_name] = entity_object
3939

40-
def system_prompt(self, engine_specific_rules: str | None = None) -> str:
40+
def sql_prompt_injection(self, engine_specific_rules: str | None = None) -> str:
4141
"""Get the schemas for the database entities and provide a system prompt for the user.
4242
4343
Returns:
@@ -60,7 +60,7 @@ def system_prompt(self, engine_specific_rules: str | None = None) -> str:
6060
engine_specific_rules = f"""\n The following {
6161
self.target_engine} Syntax rules must be adhered to.\n {engine_specific_rules}"""
6262

63-
system_prompt = f"""Use the names and descriptions of {self.target_engine} entities provided in ENTITIES LIST to decide which entities to query if you need to retrieve information from the database. Use the 'GetEntitySchema()' function to get more details of the schema of the view you want to query.
63+
sql_prompt_injection = f"""Use the names and descriptions of {self.target_engine} entities provided in ENTITIES LIST to decide which entities to query if you need to retrieve information from the database. Use the 'GetEntitySchema()' function to get more details of the schema of the view you want to query.
6464
6565
Always then use the 'RunSQLQuery()' function to run the SQL query against the database. Never just return the SQL query as the answer.
6666
@@ -86,7 +86,7 @@ def system_prompt(self, engine_specific_rules: str | None = None) -> str:
8686
8787
The source title to cite is the 'entity_name' property. The source reference is the SQL query used. The source chunk is the result of the SQL query used to answer the user query in Markdown table format. e.g. {{ 'title': "vProductAndDescription", 'chunk': '| ProductID | Name | ProductModel | Culture | Description |\\n|-----------|-------------------|--------------|---------|----------------------------------|\\n| 101 | Mountain Bike | MT-100 | en | A durable bike for mountain use. |\\n| 102 | Road Bike | RB-200 | en | Lightweight bike for road use. |\\n| 103 | Hybrid Bike | HB-300 | fr | Vélo hybride pour usage mixte. |\\n', 'reference': 'SELECT ProductID, Name, ProductModel, Culture, Description FROM vProductAndDescription WHERE Culture = \"en\";' }}"""
8888

89-
return system_prompt
89+
return sql_prompt_injection
9090

9191
@kernel_function(
9292
description="Get the detailed schema of an entity in the Database. Use the entity and the column returned to formulate a SQL query. The view name or table name must be one of the ENTITY NAMES defined in the [ENTITIES LIST]. Only use the column names obtained from GetEntitySchema() when constructing a SQL query, do not make up column names.",

text_2_sql/semantic_kernel/plugins/vector_based_sql_plugin/vector_based_sql_plugin.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -211,7 +211,7 @@ async def fetch_queries_from_cache(self, question: str) -> str:
211211

212212
return formatted_sql_cache_string
213213

214-
async def system_prompt(
214+
async def sql_prompt_injection(
215215
self, engine_specific_rules: str | None = None, question: str | None = None
216216
) -> str:
217217
"""Get the schemas for the database entities and provide a system prompt for the user.
@@ -261,7 +261,7 @@ async def system_prompt(
261261
262262
Check the above schemas carefully to see if they can be used to formulate a SQL query. If you need additional schemas, use 'GetEntitySchema()' function to search for the most relevant schemas for the data that you wish to obtain."""
263263

264-
system_prompt = f"""{query_prompt}
264+
sql_prompt_injection = f"""{query_prompt}
265265
266266
If needed, use the 'RunSQLQuery()' function to run the SQL query against the database. Never just return the SQL query as the answer.
267267
@@ -280,7 +280,7 @@ async def system_prompt(
280280
281281
The source title to cite is the 'EntityName' property. The source reference is the SQL query used. The source chunk is the result of the SQL query used to answer the user query in Markdown table format. e.g. {{ 'title': "vProductAndDescription", 'chunk': '| ProductID | Name | ProductModel | Culture | Description |\\n|-----------|-------------------|--------------|---------|----------------------------------|\\n| 101 | Mountain Bike | MT-100 | en | A durable bike for mountain use. |\\n| 102 | Road Bike | RB-200 | en | Lightweight bike for road use. |\\n| 103 | Hybrid Bike | HB-300 | fr | Vélo hybride pour usage mixte. |\\n', 'reference': 'SELECT ProductID, Name, ProductModel, Culture, Description FROM vProductAndDescription WHERE Culture = \"en\";' }}"""
282282

283-
return system_prompt
283+
return sql_prompt_injection
284284

285285
@kernel_function(
286286
description="Gets the schema of a view or table in the SQL Database by selecting the most relevant entity based on the search term. Extract key terms from the user question and use these as the search term. Several entities may be returned. Only use when the provided schemas in the system prompt are not sufficient to answer the question.",

text_2_sql/semantic_kernel/prompt.yaml

Lines changed: 27 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -12,47 +12,35 @@ template: |
1212
1313
The response must meet the following requirements:
1414
15-
[RESPONSE OUTPUT REQUIREMENTS]
15+
The answer MUST be in JSON format:
16+
{
17+
"answer": "<GENERATED ANSWER>",
18+
"sources": [
19+
{"title": <SOURCE 1 TITLE>, "chunk": <SOURCE 1 CONTEXT CHUNK>, "reference": "<SOURCE 1 REFERENCE>"},
20+
{"title": <SOURCE 2 TITLE>, "chunk": <SOURCE 2 CONTEXT CHUNK>, "reference": "<SOURCE 2 REFERENCE>"}
21+
]
22+
}
1623
17-
The answer MUST be in JSON format:
18-
{
19-
"answer": "<GENERATED ANSWER>",
20-
"sources": [
21-
{"title": <SOURCE 1 TITLE>, "chunk": <SOURCE 1 CONTEXT CHUNK>, "reference": "<SOURCE 1 REFERENCE>"},
22-
{"title": <SOURCE 2 TITLE>, "chunk": <SOURCE 2 CONTEXT CHUNK>, "reference": "<SOURCE 2 REFERENCE>"}
23-
]
24-
}
24+
[ANSWER PROPERTY REQUIREMENTS]
25+
- **Calculations**:
26+
Use context-provided values and explain calculations briefly.
27+
- **Structure**:
28+
Responses must be direct, easy to understand, and formatted using Markdown.
29+
Use Level 3 and 4 headings, bold sub-headings, and lists where appropriate. Keep font size consistent.
30+
- **Citations**:
31+
Factual statements must be cited using numbered references like [1]. Each citation must match a source in the 'sources' object.
2532
26-
[ANSWER PROPERTY REQUIREMENTS]
27-
- **Language & Tone**:
28-
Use British English, business-friendly language that is professional and clear.
29-
- **Content Restrictions**:
30-
Avoid profanity, offensive language, and code. Rephrase or omit inappropriate content.
31-
- **Information Sources**:
32-
Only use provided functions and important information. Prioritize SQL Database data in case of conflicts.
33-
- **Calculations**:
34-
Use context-provided values and explain calculations briefly.
35-
- **Structure**:
36-
Responses must be direct, easy to understand, and formatted using Markdown.
37-
Use Level 3 and 4 headings, bold sub-headings, and lists where appropriate. Keep font size consistent.
38-
- **Citations**:
39-
Factual statements must be cited using numbered references like [1]. Each citation must match a source in the 'sources' object.
33+
[SOURCES PROPERTY REQUIREMENTS]
34+
- **Reference Inclusion**:
35+
All cited content must have a corresponding reference in the 'sources' object.
36+
- **Source Format**:
37+
Each source must follow this format: {"title": "<SOURCE TITLE>", "chunk": "<SOURCE CONTEXT CHUNK>", "reference": "<SOURCE REFERENCE>"}
38+
- **Source Chunk**:
39+
Include a concise, unedited snippet of relevant context in the 'chunk' property.
40+
- **Mandatory Citations**:
41+
Every source listed must be cited at least once in the answer.
4042
41-
[SOURCES PROPERTY REQUIREMENTS]
42-
- **Reference Inclusion**:
43-
All cited content must have a corresponding reference in the 'sources' object.
44-
- **Source Format**:
45-
Each source must follow this format: {"title": "<SOURCE TITLE>", "chunk": "<SOURCE CONTEXT CHUNK>", "reference": "<SOURCE REFERENCE>"}
46-
- **Source Chunk**:
47-
Include a concise, unedited snippet of relevant context in the 'chunk' property.
48-
- **Mandatory Citations**:
49-
Every source listed must be cited at least once in the answer.
50-
51-
[IMPORTANT INFORMATION]
52-
53-
{{$important_information}}
54-
55-
[END]
43+
{{$sql_database_information}}
5644
</message>
5745
{{$chat_history}}
5846
<message role="user">{{$user_input}}</message>
@@ -62,7 +50,7 @@ input_variables:
6250
- name: user_input
6351
description: The user input
6452
is_required: true
65-
- name: important_information
53+
- name: sql_database_information
6654
description: Useful information for the chatbot
6755
is_required: true
6856
output_variable:

0 commit comments

Comments
 (0)