Skip to content

Commit 7fecac5

Browse files
committed
Upgrade chonkie version
1 parent b2213bb commit 7fecac5

File tree

2 files changed

+9
-4
lines changed

2 files changed

+9
-4
lines changed

backend/onyx/indexing/chunker.py

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
from typing import cast
2+
13
from chonkie import SentenceChunker
24

35
from onyx.configs.app_configs import AVERAGE_SUMMARY_EMBEDDINGS
@@ -205,7 +207,8 @@ def _extract_blurb(self, text: str) -> str:
205207
"""
206208
Extract a short blurb from the text (first chunk of size `blurb_size`).
207209
"""
208-
texts = self.blurb_splitter.chunk(text)
210+
# chunker is in `text` mode
211+
texts = cast(list[str], self.blurb_splitter.chunk(text))
209212
if not texts:
210213
return ""
211214
return texts[0]
@@ -215,7 +218,8 @@ def _get_mini_chunk_texts(self, chunk_text: str) -> list[str] | None:
215218
For "multipass" mode: additional sub-chunks (mini-chunks) for use in certain embeddings.
216219
"""
217220
if self.mini_chunk_splitter and chunk_text.strip():
218-
return self.mini_chunk_splitter.chunk(chunk_text)
221+
# chunker is in `text` mode
222+
return cast(list[str], self.mini_chunk_splitter.chunk(chunk_text))
219223
return None
220224

221225
# ADDED: extra param image_url to store in the chunk
@@ -335,7 +339,8 @@ def _chunk_document_with_sections(
335339
chunk_text = ""
336340
link_offsets = {}
337341

338-
split_texts = self.chunk_splitter.chunk(section_text)
342+
# chunker is in `text` mode
343+
split_texts = cast(list[str], self.chunk_splitter.chunk(section_text))
339344
for i, split_text in enumerate(split_texts):
340345
# If even the split_text is bigger than strict limit, further split
341346
if (

backend/requirements/default.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ beautifulsoup4==4.12.3
77
boto3==1.36.23
88
celery==5.5.1
99
chardet==5.2.0
10-
chonkie==1.0.9
10+
chonkie==1.0.10
1111
dask==2023.8.1
1212
ddtrace==2.6.5
1313
discord.py==2.4.0

0 commit comments

Comments
 (0)