1
+ from typing import cast
2
+
1
3
from chonkie import SentenceChunker
2
4
3
5
from onyx .configs .app_configs import AVERAGE_SUMMARY_EMBEDDINGS
@@ -205,7 +207,8 @@ def _extract_blurb(self, text: str) -> str:
205
207
"""
206
208
Extract a short blurb from the text (first chunk of size `blurb_size`).
207
209
"""
208
- texts = self .blurb_splitter .chunk (text )
210
+ # chunker is in `text` mode
211
+ texts = cast (list [str ], self .blurb_splitter .chunk (text ))
209
212
if not texts :
210
213
return ""
211
214
return texts [0 ]
@@ -215,7 +218,8 @@ def _get_mini_chunk_texts(self, chunk_text: str) -> list[str] | None:
215
218
For "multipass" mode: additional sub-chunks (mini-chunks) for use in certain embeddings.
216
219
"""
217
220
if self .mini_chunk_splitter and chunk_text .strip ():
218
- return self .mini_chunk_splitter .chunk (chunk_text )
221
+ # chunker is in `text` mode
222
+ return cast (list [str ], self .mini_chunk_splitter .chunk (chunk_text ))
219
223
return None
220
224
221
225
# ADDED: extra param image_url to store in the chunk
@@ -335,7 +339,8 @@ def _chunk_document_with_sections(
335
339
chunk_text = ""
336
340
link_offsets = {}
337
341
338
- split_texts = self .chunk_splitter .chunk (section_text )
342
+ # chunker is in `text` mode
343
+ split_texts = cast (list [str ], self .chunk_splitter .chunk (section_text ))
339
344
for i , split_text in enumerate (split_texts ):
340
345
# If even the split_text is bigger than strict limit, further split
341
346
if (
0 commit comments