Skip to content

Commit 1c8ed08

Browse files
Add optional query cache indexer (#66)
* Add queyr cache indexer * Remove output * Update parameters
1 parent c0f7416 commit 1c8ed08

File tree

5 files changed

+152
-10
lines changed

5 files changed

+152
-10
lines changed

deploy_ai_search/.env

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ StorageAccount__FQEndpoint=<Fully qualified endpoint in form ResourceId=resource
1515
StorageAccount__ConnectionString=<connectionString if using non managed identity. In format: DefaultEndpointsProtocol=https;AccountName=<STG NAME>;AccountKey=<ACCOUNT KEY>;EndpointSuffix=core.windows.net>
1616
StorageAccount__RagDocuments__Container=<containerName>
1717
StorageAccount__Text2SqlSchemaStore__Container=<containerName>
18+
StorageAccount__Text2SqlQueryCache__Container=<containerName>
1819
OpenAI__ApiKey=<openAIKey if using non managed identity>
1920
OpenAI__Endpoint=<openAIEndpoint>
2021
OpenAI__EmbeddingModel=<openAIEmbeddingModelName>

deploy_ai_search/README.md

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,17 +24,19 @@ The associated scripts in this portion of the repository contains pre-built scri
2424
- `index_type text_2_sql_schema_store`. This selects the `Text2SQLSchemaStoreAISearch` sub class.
2525
- `rebuild`. Whether to delete and rebuild the index.
2626
- `suffix`. Optional parameter that will apply a suffix onto the deployed index and indexer. This is useful if you want deploy a test version, before overwriting the main version.
27-
- `single_data_dictionary`. Optional parameter that controls whether you will be uploading a single data dictionary, or a data dictionary file per entity. By default, this is set to False.
27+
- `single_data_dictionary_file`. Optional parameter that controls whether you will be uploading a single data dictionary, or a data dictionary file per entity. By default, this is set to False.
2828

2929
### Query Cache Index
3030

3131
1. Update `.env` file with the associated values. Not all values are required dependent on whether you are using System / User Assigned Identities or a Key based authentication.
32-
2. Adjust `text_2_sql_query_cache.py` with any changes to the index. **There is no provided indexer or skillset for this cache, it is expected that application code will write directly to it. See the details in the Text2SQL README for different cache strategies.**
32+
2. Adjust `text_2_sql_query_cache.py` with any changes to the index. **There is an optional provided indexer or skillset for this cache. You may instead want the application code will write directly to it. See the details in the Text2SQL README for different cache strategies.**
3333
3. Run `deploy.py` with the following args:
3434

3535
- `index_type text_2_sql_query_cache`. This selects the `Text2SQLQueryCacheAISearch` sub class.
3636
- `rebuild`. Whether to delete and rebuild the index.
3737
- `suffix`. Optional parameter that will apply a suffix onto the deployed index and indexer. This is useful if you want deploy a test version, before overwriting the main version.
38+
- `enable_cache_indexer`. Optional parameter that will enable the query cache indexer. Defaults to False.
39+
- `single_cache__file`. Optional parameter that controls whether you will be uploading a single data dictionary, or a data dictionary file per entity. By default, this is set to False.
3840

3941
## ai_search.py & environment.py
4042

deploy_ai_search/deploy.py

Lines changed: 18 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -24,11 +24,14 @@ def deploy_config(arguments: argparse.Namespace):
2424
index_config = Text2SqlSchemaStoreAISearch(
2525
suffix=arguments.suffix,
2626
rebuild=arguments.rebuild,
27-
single_data_dictionary=arguments.single_data_dictionary,
27+
single_data_dictionary_file=arguments.single_data_dictionary_file,
2828
)
2929
elif arguments.index_type == "text_2_sql_query_cache":
3030
index_config = Text2SqlQueryCacheAISearch(
31-
suffix=arguments.suffix, rebuild=arguments.rebuild
31+
suffix=arguments.suffix,
32+
rebuild=arguments.rebuild,
33+
single_query_cache_file=arguments.single_query_cache_file,
34+
enable_query_cache_indexer=arguments.enable_query_cache_indexer,
3235
)
3336
else:
3437
raise ValueError("Invalid Indexer Type")
@@ -60,11 +63,23 @@ def deploy_config(arguments: argparse.Namespace):
6063
help="Whether want to enable chunking by page in adi skill, if no value is passed considered False",
6164
)
6265
parser.add_argument(
63-
"--single_data_dictionary",
66+
"--single_data_dictionary_file",
6467
type=bool,
6568
required=False,
6669
help="Whether or not a single data dictionary file should be uploaded, or one per entity",
6770
)
71+
parser.add_argument(
72+
"--single_query_cache_file",
73+
type=bool,
74+
required=False,
75+
help="Whether or not a single cache file should be uploaded, or one per question",
76+
)
77+
parser.add_argument(
78+
"--enable_query_cache_indexer",
79+
type=bool,
80+
required=False,
81+
help="Whether or not the sql query cache indexer should be enabled",
82+
)
6883
parser.add_argument(
6984
"--suffix",
7085
type=str,

deploy_ai_search/text_2_sql_query_cache.py

Lines changed: 127 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,12 +5,20 @@
55
SearchFieldDataType,
66
SearchField,
77
SearchableField,
8-
SimpleField,
9-
ComplexField,
108
SemanticField,
119
SemanticPrioritizedFields,
1210
SemanticConfiguration,
1311
SemanticSearch,
12+
SearchIndexer,
13+
FieldMapping,
14+
SimpleField,
15+
ComplexField,
16+
IndexingParameters,
17+
IndexingParametersConfiguration,
18+
BlobIndexerDataToExtract,
19+
IndexerExecutionEnvironment,
20+
BlobIndexerParsingMode,
21+
FieldMappingFunction,
1422
)
1523
from ai_search import AISearch
1624
from environment import (
@@ -21,16 +29,30 @@
2129
class Text2SqlQueryCacheAISearch(AISearch):
2230
"""This class is used to deploy the sql index."""
2331

24-
def __init__(self, suffix: str | None = None, rebuild: bool | None = False):
32+
def __init__(
33+
self,
34+
suffix: str | None = None,
35+
rebuild: bool | None = False,
36+
single_query_cache_file: bool | None = False,
37+
enable_query_cache_indexer: bool | None = False,
38+
):
2539
"""Initialize the Text2SqlAISearch class. This class implements the deployment of the sql index.
2640
2741
Args:
2842
suffix (str, optional): The suffix for the indexer. Defaults to None. If an suffix is provided, it is assumed to be a test indexer.
2943
rebuild (bool, optional): Whether to rebuild the index. Defaults to False.
44+
single_query_cache_file (bool, optional): Whether to use a single cache file. Defaults to False. Only applies if the cache indexer is enabled.
45+
enable_query_cache_indexer (bool, optional): Whether to enable cache indexer. Defaults to False.
3046
"""
3147
self.indexer_type = IndexerType.TEXT_2_SQL_QUERY_CACHE
48+
self.enable_query_cache_indexer = enable_query_cache_indexer
3249
super().__init__(suffix, rebuild)
3350

51+
if single_query_cache_file:
52+
self.parsing_mode = BlobIndexerParsingMode.JSON_ARRAY
53+
else:
54+
self.parsing_mode = BlobIndexerParsingMode.JSON
55+
3456
def get_index_fields(self) -> list[SearchableField]:
3557
"""This function returns the index fields for sql index.
3658
@@ -56,6 +78,11 @@ def get_index_fields(self) -> list[SearchableField]:
5678
name="SqlQueryDecomposition",
5779
collection=True,
5880
fields=[
81+
SearchableField(
82+
name="SubQuestion",
83+
type=SearchFieldDataType.String,
84+
filterable=True,
85+
),
5986
SearchableField(
6087
name="SqlQuery",
6188
type=SearchFieldDataType.String,
@@ -130,3 +157,100 @@ def get_semantic_search(self) -> SemanticSearch:
130157
semantic_search = SemanticSearch(configurations=[semantic_config])
131158

132159
return semantic_search
160+
161+
def get_skills(self) -> list:
162+
"""Get the skillset for the indexer.
163+
164+
Returns:
165+
list: The skillsets used in the indexer"""
166+
167+
if self.enable_query_cache_indexer is False:
168+
return []
169+
170+
embedding_skill = self.get_vector_skill(
171+
"/document", "/document/Question", target_name="QuestionEmbedding"
172+
)
173+
174+
skills = [embedding_skill]
175+
176+
return skills
177+
178+
def get_indexer(self) -> SearchIndexer:
179+
"""This function returns the indexer for sql.
180+
181+
Returns:
182+
SearchIndexer: The indexer for sql"""
183+
184+
if self.enable_query_cache_indexer is False:
185+
return None
186+
187+
# Only place on schedule if it is not a test deployment
188+
if self.test:
189+
schedule = None
190+
batch_size = 4
191+
else:
192+
schedule = {"interval": "PT24H"}
193+
batch_size = 16
194+
195+
if self.environment.use_private_endpoint:
196+
execution_environment = IndexerExecutionEnvironment.PRIVATE
197+
else:
198+
execution_environment = IndexerExecutionEnvironment.STANDARD
199+
200+
indexer_parameters = IndexingParameters(
201+
batch_size=batch_size,
202+
configuration=IndexingParametersConfiguration(
203+
data_to_extract=BlobIndexerDataToExtract.CONTENT_AND_METADATA,
204+
query_timeout=None,
205+
execution_environment=execution_environment,
206+
fail_on_unprocessable_document=False,
207+
fail_on_unsupported_content_type=False,
208+
index_storage_metadata_only_for_oversized_documents=True,
209+
indexed_file_name_extensions=".json",
210+
parsing_mode=self.parsing_mode,
211+
),
212+
max_failed_items=5,
213+
)
214+
215+
indexer = SearchIndexer(
216+
name=self.indexer_name,
217+
description="Indexer to sql entities and generate embeddings",
218+
skillset_name=self.skillset_name,
219+
target_index_name=self.index_name,
220+
data_source_name=self.data_source_name,
221+
schedule=schedule,
222+
field_mappings=[
223+
FieldMapping(
224+
source_field_name="metadata_storage_last_modified",
225+
target_field_name="DateLastModified",
226+
)
227+
],
228+
output_field_mappings=[
229+
FieldMapping(
230+
source_field_name="/document/Question",
231+
target_field_name="Id",
232+
mapping_function=FieldMappingFunction(
233+
name="base64Encode",
234+
parameters={"useHttpServerUtilityUrlTokenEncode": False},
235+
),
236+
),
237+
FieldMapping(
238+
source_field_name="/document/Question", target_field_name="Question"
239+
),
240+
FieldMapping(
241+
source_field_name="/document/QuestionEmbedding",
242+
target_field_name="QuestionEmbedding",
243+
),
244+
FieldMapping(
245+
source_field_name="/document/SqlQueryDecomposition",
246+
target_field_name="SqlQueryDecomposition",
247+
),
248+
FieldMapping(
249+
source_field_name="/document/DateLastModified",
250+
target_field_name="DateLastModified",
251+
),
252+
],
253+
parameters=indexer_parameters,
254+
)
255+
256+
return indexer

deploy_ai_search/text_2_sql_schema_store.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@ def __init__(
4343
self,
4444
suffix: str | None = None,
4545
rebuild: bool | None = False,
46-
single_data_dictionary: bool | None = False,
46+
single_data_dictionary_file: bool | None = False,
4747
):
4848
"""Initialize the Text2SqlAISearch class. This class implements the deployment of the sql index.
4949
@@ -57,7 +57,7 @@ def __init__(
5757
]
5858
super().__init__(suffix, rebuild)
5959

60-
if single_data_dictionary:
60+
if single_data_dictionary_file:
6161
self.parsing_mode = BlobIndexerParsingMode.JSON_ARRAY
6262
else:
6363
self.parsing_mode = BlobIndexerParsingMode.JSON

0 commit comments

Comments
 (0)