@@ -188,11 +188,12 @@ def get_skills(self) -> list:
188
188
if self .enable_page_by_chunking :
189
189
embedding_skill = self .get_vector_skill (
190
190
"/document/page_wise_layout/*" ,
191
- "/document/page_wise_layout/*/cleaned_text " ,
191
+ "/document/page_wise_layout/*/final_cleaned_text " ,
192
192
)
193
193
else :
194
194
embedding_skill = self .get_vector_skill (
195
- "/document/chunk_mark_ups/*" , "/document/chunk_mark_ups/*/cleaned_text"
195
+ "/document/chunk_mark_ups/*" ,
196
+ "/document/chunk_mark_ups/*/final_cleaned_text" ,
196
197
)
197
198
198
199
if self .enable_page_by_chunking :
@@ -223,7 +224,7 @@ def get_index_projections(self) -> SearchIndexerIndexProjection:
223
224
source_context = "/document/page_wise_layout/*"
224
225
mappings = [
225
226
InputFieldMappingEntry (
226
- name = "Chunk" , source = "/document/page_wise_layout/*/mark_up "
227
+ name = "Chunk" , source = "/document/page_wise_layout/*/final_mark_up "
227
228
),
228
229
InputFieldMappingEntry (
229
230
name = "ChunkEmbedding" ,
@@ -233,24 +234,25 @@ def get_index_projections(self) -> SearchIndexerIndexProjection:
233
234
InputFieldMappingEntry (name = "SourceUri" , source = "/document/SourceUri" ),
234
235
InputFieldMappingEntry (
235
236
name = "Sections" ,
236
- source = "/document/page_wise_layout/*/sections " ,
237
+ source = "/document/page_wise_layout/*/final_sections " ,
237
238
),
238
239
InputFieldMappingEntry (
239
240
name = "ChunkFigures" ,
240
- source = "/document/page_wise_layout/*/chunk_figures /*" ,
241
+ source = "/document/page_wise_layout/*/final_chunk_figures /*" ,
241
242
),
242
243
InputFieldMappingEntry (
243
244
name = "DateLastModified" , source = "/document/DateLastModified"
244
245
),
245
246
InputFieldMappingEntry (
246
- name = "PageNumber" , source = "/document/page_wise_layout/*/page_number"
247
+ name = "PageNumber" ,
248
+ source = "/document/page_wise_layout/*/final_page_number" ,
247
249
),
248
250
]
249
251
else :
250
252
source_context = "/document/chunk_mark_ups/*"
251
253
mappings = [
252
254
InputFieldMappingEntry (
253
- name = "Chunk" , source = "/document/chunk_mark_ups/*/mark_up "
255
+ name = "Chunk" , source = "/document/chunk_mark_ups/*/final_mark_up "
254
256
),
255
257
InputFieldMappingEntry (
256
258
name = "ChunkEmbedding" ,
@@ -259,17 +261,18 @@ def get_index_projections(self) -> SearchIndexerIndexProjection:
259
261
InputFieldMappingEntry (name = "Title" , source = "/document/Title" ),
260
262
InputFieldMappingEntry (name = "SourceUri" , source = "/document/SourceUri" ),
261
263
InputFieldMappingEntry (
262
- name = "Sections" , source = "/document/chunk_mark_ups/*/sections "
264
+ name = "Sections" , source = "/document/chunk_mark_ups/*/final_sections "
263
265
),
264
266
InputFieldMappingEntry (
265
267
name = "ChunkFigures" ,
266
- source = "/document/chunk_mark_ups/*/chunk_figures /*" ,
268
+ source = "/document/chunk_mark_ups/*/final_chunk_figures /*" ,
267
269
),
268
270
InputFieldMappingEntry (
269
271
name = "DateLastModified" , source = "/document/DateLastModified"
270
272
),
271
273
InputFieldMappingEntry (
272
- name = "PageNumber" , source = "/document/chunk_mark_ups/*/page_number"
274
+ name = "PageNumber" ,
275
+ source = "/document/chunk_mark_ups/*/final_page_number" ,
273
276
),
274
277
]
275
278
0 commit comments