Skip to content

Commit 89a1ae6

Browse files
committed
Extract internet search results, chunk, and select most relevant chunks for LLM answer (INCOMPLETE)
1 parent cd9d2e6 commit 89a1ae6

File tree

5 files changed

+221
-70
lines changed

5 files changed

+221
-70
lines changed

backend/onyx/chat/process_message.py

Lines changed: 22 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,9 @@
4343
from onyx.chat.prompt_builder.answer_prompt_builder import AnswerPromptBuilder
4444
from onyx.chat.prompt_builder.answer_prompt_builder import default_build_system_message
4545
from onyx.chat.prompt_builder.answer_prompt_builder import default_build_user_message
46+
from onyx.chat.prompt_builder.citations_prompt import (
47+
compute_max_document_tokens_for_persona,
48+
)
4649
from onyx.configs.chat_configs import CHAT_TARGET_CHUNK_PERCENTAGE
4750
from onyx.configs.chat_configs import DISABLE_LLM_CHOOSE_SEARCH
4851
from onyx.configs.chat_configs import MAX_CHUNKS_FED_TO_CHAT
@@ -92,6 +95,7 @@
9295
from onyx.db.models import UserFile
9396
from onyx.db.persona import get_persona_by_id
9497
from onyx.db.search_settings import get_current_search_settings
98+
from onyx.db.user_documents import calculate_user_files_token_count
9599
from onyx.document_index.factory import get_default_document_index
96100
from onyx.file_store.models import ChatFileType
97101
from onyx.file_store.models import FileDescriptor
@@ -133,13 +137,13 @@
133137
INTERNET_SEARCH_RESPONSE_ID,
134138
)
135139
from onyx.tools.tool_implementations.internet_search.internet_search_tool import (
136-
internet_search_response_to_search_docs,
140+
InternetSearchTool,
137141
)
138-
from onyx.tools.tool_implementations.internet_search.internet_search_tool import (
142+
from onyx.tools.tool_implementations.internet_search.models import (
139143
InternetSearchResponse,
140144
)
141-
from onyx.tools.tool_implementations.internet_search.internet_search_tool import (
142-
InternetSearchTool,
145+
from onyx.tools.tool_implementations.internet_search.utils import (
146+
internet_search_response_to_search_docs,
143147
)
144148
from onyx.tools.tool_implementations.search.search_tool import (
145149
FINAL_CONTEXT_DOCUMENTS_ID,
@@ -279,6 +283,9 @@ def _handle_search_tool_response_summary(
279283
)
280284

281285

286+
# TODO: this takes the entire internet search response and sends it to LLM --> not correct
287+
# TODO: Internet search yields first an InternetSearchResponse to populate search results
288+
# and then yields a list of LlmDocs that should be added to context
282289
def _handle_internet_search_tool_response_summary(
283290
packet: ToolResponse,
284291
db_session: Session,
@@ -864,10 +871,6 @@ def stream_chat_message_objects(
864871
file_id_to_user_file = {file.file_id: file for file in user_files}
865872

866873
# Calculate token count for the files
867-
from onyx.db.user_documents import calculate_user_files_token_count
868-
from onyx.chat.prompt_builder.citations_prompt import (
869-
compute_max_document_tokens_for_persona,
870-
)
871874

872875
total_tokens = calculate_user_files_token_count(
873876
user_file_ids or [],
@@ -1048,6 +1051,16 @@ def create_response(
10481051
structured_response_format=new_msg_req.structured_response_format,
10491052
)
10501053

1054+
# Temp to get a pruning config for internet search
1055+
available_tokens = compute_max_document_tokens_for_persona(
1056+
db_session=db_session,
1057+
persona=persona,
1058+
actual_user_input=message_text,
1059+
)
1060+
1061+
internet_pruning_config = document_pruning_config.copy()
1062+
internet_pruning_config.max_tokens = available_tokens
1063+
10511064
tool_dict = construct_tools(
10521065
persona=persona,
10531066
prompt_config=prompt_config,
@@ -1071,6 +1084,7 @@ def create_response(
10711084
),
10721085
internet_search_tool_config=InternetSearchToolConfig(
10731086
answer_style_config=answer_style_config,
1087+
document_pruning_config=internet_pruning_config,
10741088
),
10751089
image_generation_tool_config=ImageGenerationToolConfig(
10761090
additional_headers=litellm_additional_headers,

backend/onyx/tools/tool_constructor.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -122,6 +122,9 @@ class InternetSearchToolConfig(BaseModel):
122122
citation_config=CitationConfig(all_docs_useful=True)
123123
)
124124
)
125+
document_pruning_config: DocumentPruningConfig = Field(
126+
default_factory=DocumentPruningConfig
127+
)
125128

126129

127130
class ImageGenerationToolConfig(BaseModel):
@@ -222,8 +225,11 @@ def construct_tools(
222225
tool_dict[db_tool_model.id] = [
223226
InternetSearchTool(
224227
api_key=EXA_API_KEY,
228+
db_session=db_session,
229+
llm=llm,
225230
answer_style_config=internet_search_tool_config.answer_style_config,
226231
prompt_config=prompt_config,
232+
pruning_config=internet_search_tool_config.document_pruning_config,
227233
)
228234
]
229235

0 commit comments

Comments
 (0)