Skip to content

Commit 75fcdff

Browse files
committed
merge changes
2 parents 3f70652 + 8073265 commit 75fcdff

File tree

7 files changed

+46
-23
lines changed

7 files changed

+46
-23
lines changed

adi_function_app/local.settings.json

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
{
2+
"IsEncrypted": false,
3+
"Values": {
4+
"AIService__DocumentIntelligence__Endpoint": "<documentIntelligenceEndpoint>",
5+
"AIService__DocumentIntelligence__Key": "<documentIntelligenceKey if not using identity>",
6+
"AIService__Language__Endpoint": "<languageEndpoint>",
7+
"AIService__Language__Key": "<languageKey if not using identity>",
8+
"FunctionApp__ClientId": "<clientId of the function app if using user assigned managed identity>",
9+
"IdentityType": "<identityType> # system_assigned or user_assigned or key",
10+
"OpenAI__ApiKey": "<openAIKey if using non managed identity>",
11+
"OpenAI__ApiVersion": "<openAIApiVersion>",
12+
"OpenAI__Endpoint": "<openAIEndpoint>",
13+
"OpenAI__MultiModalDeployment": "<openAIEmbeddingDeploymentId>",
14+
"StorageAccount__ConnectionString": "<connectionString if using non managed identity>",
15+
"StorageAccount__Endpoint": "<Endpoint if using identity based connections>"
16+
}
17+
}

adi_function_app/pre_embedding_cleaner.py

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -71,15 +71,21 @@ def clean_text(src_text: str) -> str:
7171
try:
7272
# Define specific patterns for each tag
7373
tag_patterns = {
74-
"figurecontent": r"<!-- FigureContent=(.*?)-->",
74+
"figurecontent": r"<!--.*?FigureContent=(.*?)-->",
7575
"figure": r"<figure>(.*?)</figure>",
7676
"figures": r"\(figures/\d+\)(.*?)\(figures/\d+\)",
7777
"figcaption": r"<figcaption>(.*?)</figcaption>",
7878
}
7979
cleaned_text = remove_markdown_tags(src_text, tag_patterns)
8080

81-
# remove line breaks
82-
cleaned_text = re.sub(r"\n", "", cleaned_text)
81+
# remove html tags
82+
cleaned_text = re.sub(r"<.*?>", "", cleaned_text)
83+
84+
# Replace newline characters with spaces
85+
cleaned_text = re.sub(r"\n", " ", cleaned_text)
86+
87+
# Replace multiple whitespace characters with a single space
88+
cleaned_text = re.sub(r"\s+", " ", cleaned_text)
8389

8490
# remove stopwords
8591
tokens = word_tokenize(cleaned_text, "english")

deploy_ai_search/ai_search.py

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
WebApiSkill,
1515
AzureOpenAIEmbeddingSkill,
1616
AzureOpenAIVectorizer,
17-
AzureOpenAIParameters,
17+
AzureOpenAIVectorizerParameters,
1818
SearchIndexer,
1919
SearchIndexerSkillset,
2020
SearchIndexerDataContainer,
@@ -27,7 +27,7 @@
2727
DocumentExtractionSkill,
2828
OcrSkill,
2929
MergeSkill,
30-
SearchIndexerIndexProjections,
30+
SearchIndexerIndexProjection,
3131
BlobIndexerParsingMode,
3232
)
3333
from azure.core.exceptions import HttpResponseError
@@ -149,8 +149,8 @@ def get_indexer(self) -> SearchIndexer:
149149

150150
return None
151151

152-
def get_index_projections(self) -> SearchIndexerIndexProjections:
153-
"""Get the index projections for the indexer."""
152+
def get_index_projections(self) -> SearchIndexerIndexProjection:
153+
"""Get the index projections for the indexer."""
154154

155155
return None
156156

@@ -356,9 +356,9 @@ def get_vector_skill(
356356
name="Vector Skill",
357357
description="Skill to generate embeddings",
358358
context=context,
359-
deployment_id=self.environment.open_ai_embedding_deployment,
359+
deployment_name=self.environment.open_ai_embedding_deployment,
360360
model_name=self.environment.open_ai_embedding_model,
361-
resource_uri=self.environment.open_ai_endpoint,
361+
resource_url=self.environment.open_ai_endpoint,
362362
inputs=embedding_skill_inputs,
363363
outputs=embedding_skill_outputs,
364364
dimensions=self.environment.open_ai_embedding_dimensions,
@@ -535,10 +535,10 @@ def get_vector_search(self) -> VectorSearch:
535535
VectorSearch: The vector search configuration
536536
"""
537537

538-
open_ai_params = AzureOpenAIParameters(
539-
resource_uri=self.environment.open_ai_endpoint,
538+
open_ai_params = AzureOpenAIVectorizerParameters(
539+
resource_url=self.environment.open_ai_endpoint,
540540
model_name=self.environment.open_ai_embedding_model,
541-
deployment_id=self.environment.open_ai_embedding_deployment,
541+
deployment_name=self.environment.open_ai_embedding_deployment,
542542
)
543543

544544
if self.environment.identity_type == IdentityType.KEY:
@@ -556,13 +556,13 @@ def get_vector_search(self) -> VectorSearch:
556556
VectorSearchProfile(
557557
name=self.vector_search_profile_name,
558558
algorithm_configuration_name=self.algorithm_name,
559-
vectorizer=self.vectorizer_name,
559+
vectorizer_name=self.vectorizer_name,
560560
)
561561
],
562562
vectorizers=[
563563
AzureOpenAIVectorizer(
564-
name=self.vectorizer_name,
565-
azure_open_ai_parameters=open_ai_params,
564+
vectorizer_name=self.vectorizer_name,
565+
parameters=open_ai_params,
566566
),
567567
],
568568
)
@@ -602,7 +602,7 @@ def deploy_skillset(self):
602602
name=self.skillset_name,
603603
description="Skillset to chunk documents and generating embeddings",
604604
skills=skills,
605-
index_projections=index_projections,
605+
index_projection=index_projections,
606606
)
607607

608608
self._search_indexer_client.create_or_update_skillset(skillset)

deploy_ai_search/environment.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -95,7 +95,7 @@ def ai_search_user_assigned_identity(self) -> SearchIndexerDataUserAssignedIdent
9595
SearchIndexerDataUserAssignedIdentity: The ai search user assigned identity
9696
"""
9797
user_assigned_identity = SearchIndexerDataUserAssignedIdentity(
98-
user_assigned_identity=os.environ.get(
98+
resource_id=os.environ.get(
9999
"AIService__AzureSearchOptions__Identity__FQName"
100100
)
101101
)

deploy_ai_search/rag_documents.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
FieldMapping,
1515
IndexingParameters,
1616
IndexingParametersConfiguration,
17-
SearchIndexerIndexProjections,
17+
SearchIndexerIndexProjection,
1818
SearchIndexerIndexProjectionSelector,
1919
SearchIndexerIndexProjectionsParameters,
2020
IndexProjectionMode,
@@ -200,7 +200,7 @@ def get_skills(self) -> list:
200200

201201
return skills
202202

203-
def get_index_projections(self) -> SearchIndexerIndexProjections:
203+
def get_index_projections(self) -> SearchIndexerIndexProjection:
204204
"""This function returns the index projections for rag document."""
205205
mappings = [
206206
InputFieldMappingEntry(name="Chunk", source="/document/pages/*/chunk"),
@@ -242,7 +242,7 @@ def get_index_projections(self) -> SearchIndexerIndexProjections:
242242
]
243243
)
244244

245-
index_projections = SearchIndexerIndexProjections(
245+
index_projections = SearchIndexerIndexProjection(
246246
selectors=[
247247
SearchIndexerIndexProjectionSelector(
248248
target_index_name=self.index_name,

deploy_ai_search/requirements.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
python-dotenv
2-
azure-search-documents==11.6.0b4
2+
azure-search-documents==11.6.0b5
33
azure-storage-blob
44
azure-identity
55
azure-mgmt-web

text_2_sql/requirements.txt

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
1-
semantic-kernel==1.8.3
1+
semantic-kernel==1.9.0
22
azure-search
3-
azure-search-documents==11.6.0b4
3+
azure-search-documents==11.6.0b5
44
aioodbc
55
azure-identity
66
python-dotenv

0 commit comments

Comments
 (0)