Skip to content

Commit 27e5251

Browse files
committed
Update ai search deploy
1 parent 6049d7b commit 27e5251

File tree

6 files changed

+1266
-13
lines changed

6 files changed

+1266
-13
lines changed

deploy_ai_search/README.md

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,14 +2,14 @@
22

33
The associated scripts in this portion of the repository contains pre-built scripts to deploy the skillsets needed for both Text2SQL and Image Processing.
44

5-
## Steps for Rag Documents Index Deployment (For Image Processing)
5+
## Steps for Image Processing Index Deployment (For Image Processing)
66

77
1. Update `.env` file with the associated values. Not all values are required dependent on whether you are using System / User Assigned Identities or a Key based authentication.
8-
2. Adjust `rag_documents.py` with any changes to the index / indexer. The `get_skills()` method implements the skills pipeline. Make any adjustments here in the skills needed to enrich the data source.
8+
2. Adjust `image_processing.py` with any changes to the index / indexer. The `get_skills()` method implements the skills pipeline. Make any adjustments here in the skills needed to enrich the data source.
99
3. Run `deploy.py` with the following args:
1010

11-
- `index_type rag`. This selects the `RagDocumentsAISearch` sub class.
12-
- `enable_page_chunking True`. This determines whether page wise chunking is applied in ADI, or whether the inbuilt skill is used for TextSplit. **Page wise analysis in ADI is recommended to avoid splitting tables / figures across multiple chunks, when the chunking is performed.**
11+
- `index_type image_processing`. This selects the `ImageProcessingAISearch` sub class.
12+
- `enable_page_chunking True`. This determines whether page wise chunking is applied in ADI, or whether the inbuilt skill is used for TextSplit. This suits documents that are inheritely page-wise e.g. pptx files.
1313
- `rebuild`. Whether to delete and rebuild the index.
1414
- `suffix`. Optional parameter that will apply a suffix onto the deployed index and indexer. This is useful if you want deploy a test version, before overwriting the main version.
1515

deploy_ai_search/src/deploy_ai_search/deploy.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
# Copyright (c) Microsoft Corporation.
22
# Licensed under the MIT License.
33
import argparse
4-
from rag_documents import RagDocumentsAISearch
4+
from image_processing import ImageProcessingAISearch
55
from text_2_sql_schema_store import Text2SqlSchemaStoreAISearch
66
from text_2_sql_query_cache import Text2SqlQueryCacheAISearch
77
from text_2_sql_column_value_store import Text2SqlColumnValueStoreAISearch
@@ -17,8 +17,8 @@ def deploy_config(arguments: argparse.Namespace):
1717
arguments (argparse.Namespace): The arguments passed to the script"""
1818

1919
suffix = None if args.suffix == "None" else args.suffix
20-
if arguments.index_type == "rag":
21-
index_config = RagDocumentsAISearch(
20+
if arguments.index_type == "image_processing":
21+
index_config = ImageProcessingAISearch(
2222
suffix=suffix,
2323
rebuild=arguments.rebuild,
2424
enable_page_by_chunking=arguments.enable_page_chunking,

deploy_ai_search/src/deploy_ai_search/environment.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
class IndexerType(Enum):
1212
"""The type of the indexer"""
1313

14-
RAG_DOCUMENTS = "rag-documents"
14+
IMAGE_PROCESSING = "image-processing"
1515
TEXT_2_SQL_SCHEMA_STORE = "text-2-sql-schema-store"
1616
TEXT_2_SQL_QUERY_CACHE = "text-2-sql-query-cache"
1717
TEXT_2_SQL_COLUMN_VALUE_STORE = "text-2-sql-column-value-store"

deploy_ai_search/src/deploy_ai_search/rag_documents.py renamed to deploy_ai_search/src/deploy_ai_search/image_processing.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@
2929
)
3030

3131

32-
class RagDocumentsAISearch(AISearch):
32+
class ImageProcessingAISearch(AISearch):
3333
"""This class is used to deploy the rag document index."""
3434

3535
def __init__(
@@ -38,13 +38,13 @@ def __init__(
3838
rebuild: bool | None = False,
3939
enable_page_by_chunking=False,
4040
):
41-
"""Initialize the RagDocumentsAISearch class. This class implements the deployment of the rag document index.
41+
"""Initialize the ImageProcessingAISearch class. This class implements the deployment of the rag document index.
4242
4343
Args:
4444
suffix (str, optional): The suffix for the indexer. Defaults to None. If an suffix is provided, it is assumed to be a test indexer.
4545
rebuild (bool, optional): Whether to rebuild the index. Defaults to False.
4646
"""
47-
self.indexer_type = IndexerType.RAG_DOCUMENTS
47+
self.indexer_type = IndexerType.IMAGE_PROCESSING
4848
super().__init__(suffix, rebuild)
4949

5050
if enable_page_by_chunking is not None:
@@ -140,7 +140,7 @@ def get_index_fields(self) -> list[SearchableField]:
140140
if self.enable_page_by_chunking:
141141
fields.extend(
142142
[
143-
SearchableField(
143+
SimpleField(
144144
name="PageNumber",
145145
type=SearchFieldDataType.Int64,
146146
sortable=True,
@@ -286,7 +286,7 @@ def get_indexer(self) -> SearchIndexer:
286286
indexer_parameters = IndexingParameters(
287287
batch_size=batch_size,
288288
configuration=IndexingParametersConfiguration(
289-
data_to_extract=BlobIndexerDataToExtract.STORAGE_METADATA,
289+
data_to_extract=BlobIndexerDataToExtract.ALL_METADATA,
290290
query_timeout=None,
291291
execution_environment=execution_environment,
292292
fail_on_unprocessable_document=False,

0 commit comments

Comments
 (0)