Skip to content

Commit 22ae47a

Browse files
Merge branch 'main' into feature/skip-straight-2-query
2 parents 26ebd26 + 8073265 commit 22ae47a

File tree

7 files changed

+44
-21
lines changed

7 files changed

+44
-21
lines changed

adi_function_app/local.settings.json

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
{
2+
"IsEncrypted": false,
3+
"Values": {
4+
"AIService__DocumentIntelligence__Endpoint": "<documentIntelligenceEndpoint>",
5+
"AIService__DocumentIntelligence__Key": "<documentIntelligenceKey if not using identity>",
6+
"AIService__Language__Endpoint": "<languageEndpoint>",
7+
"AIService__Language__Key": "<languageKey if not using identity>",
8+
"FunctionApp__ClientId": "<clientId of the function app if using user assigned managed identity>",
9+
"IdentityType": "<identityType> # system_assigned or user_assigned or key",
10+
"OpenAI__ApiKey": "<openAIKey if using non managed identity>",
11+
"OpenAI__ApiVersion": "<openAIApiVersion>",
12+
"OpenAI__Endpoint": "<openAIEndpoint>",
13+
"OpenAI__MultiModalDeployment": "<openAIEmbeddingDeploymentId>",
14+
"StorageAccount__ConnectionString": "<connectionString if using non managed identity>",
15+
"StorageAccount__Endpoint": "<Endpoint if using identity based connections>"
16+
}
17+
}

adi_function_app/pre_embedding_cleaner.py

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -71,15 +71,21 @@ def clean_text(src_text: str) -> str:
7171
try:
7272
# Define specific patterns for each tag
7373
tag_patterns = {
74-
"figurecontent": r"<!-- FigureContent=(.*?)-->",
74+
"figurecontent": r"<!--.*?FigureContent=(.*?)-->",
7575
"figure": r"<figure>(.*?)</figure>",
7676
"figures": r"\(figures/\d+\)(.*?)\(figures/\d+\)",
7777
"figcaption": r"<figcaption>(.*?)</figcaption>",
7878
}
7979
cleaned_text = remove_markdown_tags(src_text, tag_patterns)
8080

81-
# remove line breaks
82-
cleaned_text = re.sub(r"\n", "", cleaned_text)
81+
# remove html tags
82+
cleaned_text = re.sub(r"<.*?>", "", cleaned_text)
83+
84+
# Replace newline characters with spaces
85+
cleaned_text = re.sub(r"\n", " ", cleaned_text)
86+
87+
# Replace multiple whitespace characters with a single space
88+
cleaned_text = re.sub(r"\s+", " ", cleaned_text)
8389

8490
# remove stopwords
8591
tokens = word_tokenize(cleaned_text, "english")

deploy_ai_search/ai_search.py

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
WebApiSkill,
1515
AzureOpenAIEmbeddingSkill,
1616
AzureOpenAIVectorizer,
17-
AzureOpenAIParameters,
17+
AzureOpenAIVectorizerParameters,
1818
SearchIndexer,
1919
SearchIndexerSkillset,
2020
SearchIndexerDataContainer,
@@ -24,7 +24,7 @@
2424
InputFieldMappingEntry,
2525
SynonymMap,
2626
SplitSkill,
27-
SearchIndexerIndexProjections,
27+
SearchIndexerIndexProjection,
2828
BlobIndexerParsingMode,
2929
)
3030
from azure.core.exceptions import HttpResponseError
@@ -146,7 +146,7 @@ def get_indexer(self) -> SearchIndexer:
146146

147147
return None
148148

149-
def get_index_projections(self) -> SearchIndexerIndexProjections:
149+
def get_index_projections(self) -> SearchIndexerIndexProjection:
150150
"""Get the index projections for the indexer."""
151151

152152
return None
@@ -353,9 +353,9 @@ def get_vector_skill(
353353
name="Vector Skill",
354354
description="Skill to generate embeddings",
355355
context=context,
356-
deployment_id=self.environment.open_ai_embedding_deployment,
356+
deployment_name=self.environment.open_ai_embedding_deployment,
357357
model_name=self.environment.open_ai_embedding_model,
358-
resource_uri=self.environment.open_ai_endpoint,
358+
resource_url=self.environment.open_ai_endpoint,
359359
inputs=embedding_skill_inputs,
360360
outputs=embedding_skill_outputs,
361361
dimensions=self.environment.open_ai_embedding_dimensions,
@@ -430,10 +430,10 @@ def get_vector_search(self) -> VectorSearch:
430430
VectorSearch: The vector search configuration
431431
"""
432432

433-
open_ai_params = AzureOpenAIParameters(
434-
resource_uri=self.environment.open_ai_endpoint,
433+
open_ai_params = AzureOpenAIVectorizerParameters(
434+
resource_url=self.environment.open_ai_endpoint,
435435
model_name=self.environment.open_ai_embedding_model,
436-
deployment_id=self.environment.open_ai_embedding_deployment,
436+
deployment_name=self.environment.open_ai_embedding_deployment,
437437
)
438438

439439
if self.environment.identity_type == IdentityType.KEY:
@@ -451,13 +451,13 @@ def get_vector_search(self) -> VectorSearch:
451451
VectorSearchProfile(
452452
name=self.vector_search_profile_name,
453453
algorithm_configuration_name=self.algorithm_name,
454-
vectorizer=self.vectorizer_name,
454+
vectorizer_name=self.vectorizer_name,
455455
)
456456
],
457457
vectorizers=[
458458
AzureOpenAIVectorizer(
459-
name=self.vectorizer_name,
460-
azure_open_ai_parameters=open_ai_params,
459+
vectorizer_name=self.vectorizer_name,
460+
parameters=open_ai_params,
461461
),
462462
],
463463
)
@@ -497,7 +497,7 @@ def deploy_skillset(self):
497497
name=self.skillset_name,
498498
description="Skillset to chunk documents and generating embeddings",
499499
skills=skills,
500-
index_projections=index_projections,
500+
index_projection=index_projections,
501501
)
502502

503503
self._search_indexer_client.create_or_update_skillset(skillset)

deploy_ai_search/environment.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -95,7 +95,7 @@ def ai_search_user_assigned_identity(self) -> SearchIndexerDataUserAssignedIdent
9595
SearchIndexerDataUserAssignedIdentity: The ai search user assigned identity
9696
"""
9797
user_assigned_identity = SearchIndexerDataUserAssignedIdentity(
98-
user_assigned_identity=os.environ.get(
98+
resource_id=os.environ.get(
9999
"AIService__AzureSearchOptions__Identity__FQName"
100100
)
101101
)

deploy_ai_search/rag_documents.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
FieldMapping,
1515
IndexingParameters,
1616
IndexingParametersConfiguration,
17-
SearchIndexerIndexProjections,
17+
SearchIndexerIndexProjection,
1818
SearchIndexerIndexProjectionSelector,
1919
SearchIndexerIndexProjectionsParameters,
2020
IndexProjectionMode,
@@ -199,7 +199,7 @@ def get_skills(self) -> list:
199199

200200
return skills
201201

202-
def get_index_projections(self) -> SearchIndexerIndexProjections:
202+
def get_index_projections(self) -> SearchIndexerIndexProjection:
203203
"""This function returns the index projections for rag document."""
204204
mappings = [
205205
InputFieldMappingEntry(name="Chunk", source="/document/pages/*/chunk"),
@@ -241,7 +241,7 @@ def get_index_projections(self) -> SearchIndexerIndexProjections:
241241
]
242242
)
243243

244-
index_projections = SearchIndexerIndexProjections(
244+
index_projections = SearchIndexerIndexProjection(
245245
selectors=[
246246
SearchIndexerIndexProjectionSelector(
247247
target_index_name=self.index_name,

deploy_ai_search/requirements.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
python-dotenv
2-
azure-search-documents==11.6.0b4
2+
azure-search-documents==11.6.0b5
33
azure-storage-blob
44
azure-identity
55
azure-mgmt-web

text_2_sql/requirements.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
semantic-kernel==1.9.0
22
azure-search
3-
azure-search-documents==11.6.0b4
3+
azure-search-documents==11.6.0b5
44
aioodbc
55
azure-identity
66
python-dotenv

0 commit comments

Comments
 (0)