From e042454d05b15bd7457bc90b7a44b54e166161f2 Mon Sep 17 00:00:00 2001 From: Richard Guan Date: Thu, 18 Sep 2025 18:53:49 -0700 Subject: [PATCH 01/34] . --- backend/onyx/chat/answer_cli.py | 0 backend/onyx/chat/answer_scratchpad.py | 298 +++++++++++++++++++++ backend/onyx/chat/process_message.py | 107 ++++---- backend/onyx/evals/eval.py | 2 + backend/onyx/evals/eval_cli.py | 17 +- backend/onyx/evals/models.py | 1 + backend/onyx/evals/providers/braintrust.py | 3 + backend/onyx/llm/interfaces.py | 48 ++-- 8 files changed, 401 insertions(+), 75 deletions(-) create mode 100644 backend/onyx/chat/answer_cli.py create mode 100644 backend/onyx/chat/answer_scratchpad.py diff --git a/backend/onyx/chat/answer_cli.py b/backend/onyx/chat/answer_cli.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/backend/onyx/chat/answer_scratchpad.py b/backend/onyx/chat/answer_scratchpad.py new file mode 100644 index 00000000000..e1746b3dd28 --- /dev/null +++ b/backend/onyx/chat/answer_scratchpad.py @@ -0,0 +1,298 @@ +from __future__ import annotations + +import json +import time +from collections.abc import Callable +from collections.abc import Generator +from typing import Any +from typing import Dict +from typing import List + +import litellm + +from onyx.agents.agent_search.dr.sub_agents.web_search.clients.exa_client import ( + ExaClient, +) +from onyx.agents.agent_search.models import GraphConfig +from onyx.llm.interfaces import ( + LLM, +) # sync call that supports stream=True with an iterator + +# ---------- Tool registry (sync) ---------- + + +class ToolSpec: + def __init__( + self, + name: str, + description: str, + parameters: Dict[str, Any], + func: Callable[..., Any], + private: bool = False, + ): + self.name = name + self.description = description + self.parameters = parameters + self.func = func + self.private = private + + +TOOL_REGISTRY: Dict[str, ToolSpec] = {} + + +def short_tag(link: str, i: int) -> str: + # Stable, readable; index keeps it deterministic across a batch + return f"S{i+1}" + + +def register_tool(spec: ToolSpec) -> None: + if spec.name in TOOL_REGISTRY: + raise ValueError(f"Tool {spec.name} already registered") + TOOL_REGISTRY[spec.name] = spec + + +# Example tool +def web_search(query: str, outer_ctx: Dict[str, Any]) -> Dict[str, Any]: + exa_client = ExaClient() + hits = exa_client.search(query) + results = [] + for i, r in enumerate(hits): + results.append( + { + "tag": short_tag(r.link, i), # <-- add a tag + "title": r.title, + "link": r.link, + "snippet": r.snippet, + "author": r.author, + "published_date": ( + r.published_date.isoformat() if r.published_date else None + ), + } + ) + return {"results": results} + + +register_tool( + ToolSpec( + name="web_search", + description="Search the web for information.", + parameters={ + "type": "object", + "properties": {"query": {"type": "string"}}, + "required": ["query"], + }, + func=web_search, + ) +) + + +def web_fetch(urls: List[str], outer_ctx: Dict[str, Any]) -> Dict[str, Any]: + exa_client = ExaClient() + docs = exa_client.contents(urls) + out = [] + for i, d in enumerate(docs): + out.append( + { + "tag": short_tag(d.link, i), # <-- add a tag + "title": d.title, + "link": d.link, + "full_content": d.full_content, + "published_date": ( + d.published_date.isoformat() if d.published_date else None + ), + } + ) + return {"results": out} + + +register_tool( + ToolSpec( + name="web_fetch", + description="Fetch the contents of a list of URLs.", + parameters={ + "type": "object", + "properties": {"urls": {"type": "array", "items": {"type": "string"}}}, + "required": ["urls"], + }, + func=web_fetch, + ) +) + + +def reasoning(outer_ctx: Dict[str, Any]) -> Dict[str, Any]: + PRIVATE_SCRATCHPAD_SYS = ( + "You are writing PRIVATE scratch notes for yourself. " + "These notes will NOT be shown to the user. " + "Do NOT copy these notes verbatim into the final answer. " + "Use them to plan, compute, and create structured intermediate results." + ) + messages = outer_ctx["messages"] + llm = outer_ctx["model"] + revised_messages = [ + {"role": "system", "content": PRIVATE_SCRATCHPAD_SYS}, + ] + messages[1:] + results = litellm.completion( + model=llm.config.model_name, + temperature=llm.config.temperature, + messages=revised_messages, + ) + return {"results": results["choices"][0]["message"]["content"]} + + +register_tool( + ToolSpec( + name="reasoning", + description="Reason about the message history and the goal.", + parameters={"type": "object", "properties": {}, "required": []}, + func=reasoning, + ) +) + + +def tool_specs_for_openai() -> List[Dict[str, Any]]: + return [ + { + "type": "function", + "function": { + "name": t.name, + "description": t.description, + "parameters": t.parameters, + }, + } + for t in TOOL_REGISTRY.values() + ] + + +def run_tool_sync( + name: str, args: Dict[str, Any], outer_ctx: Dict[str, Any] +) -> Dict[str, Any]: + spec = TOOL_REGISTRY[name] + try: + result = spec.func(**args, outer_ctx=outer_ctx) + except TypeError as e: + result = {"ok": False, "error": f"Bad arguments: {e}"} + except Exception as e: + result = {"ok": False, "error": str(e)} + return {"name": name, "private": spec.private, "result": result} + + +def stream_chat_sync( + messages: List[Dict[str, Any]], cfg: GraphConfig, llm: LLM +) -> Generator[Dict[str, Any], None, None]: + """ + Yields events suitable for SSE/WebSocket: + {"type":"delta","text": "..."} -> stream to user + {"type":"tool","name":..., "args":..., "private": bool} + {"type":"final"} + """ + start = time.time() + tools_decl = tool_specs_for_openai() + tool_step = 0 + + while True: + if time.time() - start > 200: + yield {"type": "delta", "text": "\n[Timed out while composing reply]"} + break + # Start a streaming completion (sync iterator of deltas) + stream_iter = litellm.completion( + model=llm.config.model_name, + temperature=llm.config.temperature, + messages=messages, + tools=tools_decl, + stream=True, # iterator of chunks + ) + + # Accumulate assistant text & tool call chunks + assistant_text_parts: List[str] = [] + tool_calls_accum: List[Dict[str, Any]] = [] # indexed by tool call index + + for chunk in stream_iter: + choice = chunk.choices[0] + delta = getattr(choice, "delta", getattr(choice, "message", None)) + + # 1) Text deltas + content_piece = getattr(delta, "content", None) + if content_piece: + assistant_text_parts.append(content_piece) + yield {"type": "delta", "text": content_piece} + + # 2) Tool call deltas (arrive chunked) + tcs = getattr(delta, "tool_calls", None) + if tcs: + for tc in tcs: + if tc.get("type") != "function": + continue + idx = tc.get("index", 0) + while len(tool_calls_accum) <= idx: + tool_calls_accum.append( + {"id": None, "fn": {"name": "", "arguments": ""}} + ) + buf = tool_calls_accum[idx] + if tc.get("id"): + buf["id"] = tc["id"] + fn = tc.get("function", {}) + if fn.get("name"): + buf["fn"]["name"] = fn["name"] + if fn.get("arguments"): + buf["fn"]["arguments"] += fn["arguments"] + + # Finalize assistant message for this turn + assistant_text = "".join(assistant_text_parts).strip() + assistant_msg: Dict[str, Any] = {"role": "assistant", "content": assistant_text} + if tool_calls_accum: + assistant_msg["tool_calls"] = [ + { + "id": tc["id"], + "type": "function", + "function": { + "name": tc["fn"]["name"], + "arguments": tc["fn"]["arguments"], + }, + } + for tc in tool_calls_accum + ] + messages.append(assistant_msg) + + # If we have tool calls and haven’t exceeded step cap, execute and loop again + if tool_calls_accum and tool_step < 10: + tool_step += 1 + for tc in tool_calls_accum: + name = tc["fn"]["name"] + try: + args = json.loads(tc["fn"]["arguments"] or "{}") + except json.JSONDecodeError: + args = {"raw": tc["fn"]["arguments"]} + + # Surface tool activity to UI (don’t stream private payloads) + yield { + "type": "tool", + "name": name, + "args": args, + "private": TOOL_REGISTRY[name].private, + } + + outer_ctx = { + "model": llm, + "messages": messages, + "cfg": cfg, + } + tool_result = run_tool_sync(name, args, outer_ctx) + messages.append( + { + "role": "tool", + "tool_call_id": tc["id"], + "name": name, + "content": [ + {"type": "text", "text": json.dumps(result)} + for result in tool_result["result"]["results"] + ], + } + ) + + # Loop: the model now sees tool outputs and will either answer or call more tools + continue + + # No tools (final answer) or step cap reached + break + + yield {"type": "final"} diff --git a/backend/onyx/chat/process_message.py b/backend/onyx/chat/process_message.py index 69f85a9b1eb..76ff38a35fe 100644 --- a/backend/onyx/chat/process_message.py +++ b/backend/onyx/chat/process_message.py @@ -3,17 +3,19 @@ import traceback from collections.abc import Callable from collections.abc import Iterator +from typing import Any from typing import cast +from typing import Dict from typing import Protocol from sqlalchemy.orm import Session from onyx.agents.agent_search.orchestration.nodes.call_tool import ToolCallException from onyx.chat.answer import Answer +from onyx.chat.answer_scratchpad import stream_chat_sync from onyx.chat.chat_utils import create_chat_chain from onyx.chat.chat_utils import create_temporary_persona from onyx.chat.chat_utils import process_kg_commands -from onyx.chat.models import AnswerStream from onyx.chat.models import AnswerStyleConfig from onyx.chat.models import ChatBasicResponse from onyx.chat.models import CitationConfig @@ -24,9 +26,6 @@ from onyx.chat.models import QADocsResponse from onyx.chat.models import StreamingError from onyx.chat.models import UserKnowledgeFilePacket -from onyx.chat.packet_proccessing.process_streamed_packets import ( - process_streamed_packets, -) from onyx.chat.prompt_builder.answer_prompt_builder import AnswerPromptBuilder from onyx.chat.prompt_builder.answer_prompt_builder import default_build_system_message from onyx.chat.prompt_builder.answer_prompt_builder import default_build_user_message @@ -41,7 +40,6 @@ from onyx.context.search.enums import OptionalSearchSetting from onyx.context.search.models import InferenceSection from onyx.context.search.models import RetrievalDetails -from onyx.context.search.models import SavedSearchDoc from onyx.context.search.retrieval.search_runner import ( inference_sections_from_ids, ) @@ -76,11 +74,7 @@ from onyx.llm.utils import litellm_exception_to_error_msg from onyx.natural_language_processing.utils import get_tokenizer from onyx.server.query_and_chat.models import CreateChatMessageRequest -from onyx.server.query_and_chat.streaming_models import CitationDelta from onyx.server.query_and_chat.streaming_models import CitationInfo -from onyx.server.query_and_chat.streaming_models import MessageDelta -from onyx.server.query_and_chat.streaming_models import MessageStart -from onyx.server.query_and_chat.streaming_models import Packet from onyx.server.utils import get_json_line from onyx.tools.force import ForceUseTool from onyx.tools.models import SearchToolOverrideKwargs @@ -255,7 +249,7 @@ def stream_chat_message_objects( # messages. # NOTE: is not stored in the database at all. single_message_history: str | None = None, -) -> AnswerStream: +) -> Iterator[Dict[str, Any]]: """Streams in order: 1. [conditional] Retrieved documents if a search needs to be run 2. [conditional] LLM selected chunk indices if LLM chunk filtering is turned on @@ -671,11 +665,54 @@ def stream_chat_message_objects( use_agentic_search=new_msg_req.use_agentic_search, skip_gen_ai_answer_generation=new_msg_req.skip_gen_ai_answer_generation, ) - - # Process streamed packets using the new packet processing module - yield from process_streamed_packets( - answer_processed_output=answer.processed_streamed_output, + type_to_role = { + "human": "user", + "assistant": "assistant", + "system": "system", + "function": "function", + } + SYSTEM_PROMPT = """ + You are a highly capable, thoughtful, and precise assistant. Your goal is to deeply understand the \ + user's intent, ask clarifying questions when needed, think step-by-step through complex problems, \ + provide clear and accurate answers, and proactively anticipate helpful follow-up information. Always \ + prioritize being truthful, nuanced, insightful, and efficient. + The current date is September 18, 2025. + + You use different text styles, bolding, emojis (sparingly), block quotes, and other formatting to make \ + your responses more readable and engaging. + You use proper Markdown and LaTeX to format your responses for math, scientific, and chemical formulas, \ + symbols, etc.: '$$\\n[expression]\\n$$' for standalone cases and '\\( [expression] \\)' when inline. + For code you prefer to use Markdown and specify the language. + You can use Markdown horizontal rules (---) to separate sections of your responses. + You can use Markdown tables to format your responses for data, lists, and other structured information. + + You must cite inline using tags from tool results. + + Rules: + - Only cite sources provided by the tools (use each item’s "tag" field). + - Place the citation immediately after the claim it supports, like this: "... result [S1](https://linkforS1)" or + "... results [S1](https://linkforS1)[S3](https://linkforS3)". + - If multiple sentences in a row are supported by the same source, cite the first sentence; + then omit repeats until the source changes. + - Never invent tags. If no source supports a claim, say so. + - Do not add a separate “Sources” section unless asked. + """ + system_message = [{"role": "system", "content": SYSTEM_PROMPT}] + other_messages = [ + {"role": type_to_role[message.type], "content": message.content} + for message in answer.graph_inputs.prompt_builder.build() + if message.type != "system" + ] + yield from stream_chat_sync( + messages=system_message + other_messages, + cfg=answer.graph_config, + llm=answer.graph_tooling.primary_llm, ) + # yield from streamed + # Process streamed packets using the new packet processing module + # yield from process_streamed_packets( + # answer_processed_output=answer.processed_streamed_output, + # ) except ValueError as e: logger.exception("Failed to process chat message.") @@ -747,46 +784,18 @@ def remove_answer_citations(answer: str) -> str: @log_function_time() def gather_stream( - packets: AnswerStream, + packets: Iterator[Dict[str, Any]], ) -> ChatBasicResponse: answer = "" - citations: list[CitationInfo] = [] - error_msg: str | None = None - message_id: int | None = None - top_documents: list[SavedSearchDoc] = [] - for packet in packets: - if isinstance(packet, Packet): - # Handle the different packet object types - if isinstance(packet.obj, MessageStart): - # MessageStart contains the initial content and final documents - if packet.obj.content: - answer += packet.obj.content - if packet.obj.final_documents: - top_documents = packet.obj.final_documents - elif isinstance(packet.obj, MessageDelta): - # MessageDelta contains incremental content updates - if packet.obj.content: - answer += packet.obj.content - elif isinstance(packet.obj, CitationDelta): - # CitationDelta contains citation information - if packet.obj.citations: - citations.extend(packet.obj.citations) - elif isinstance(packet, StreamingError): - error_msg = packet.error - elif isinstance(packet, MessageResponseIDInfo): - message_id = packet.reserved_assistant_message_id - - if message_id is None: - raise ValueError("Message ID is required") + if "text" in packet: + answer += packet["text"] return ChatBasicResponse( answer=answer, answer_citationless=remove_answer_citations(answer), - cited_documents={ - citation.citation_num: citation.document_id for citation in citations - }, - message_id=message_id, - error_msg=error_msg, - top_documents=top_documents, + cited_documents={}, + message_id=0, + error_msg=None, + top_documents=[], ) diff --git a/backend/onyx/evals/eval.py b/backend/onyx/evals/eval.py index c0625b3819e..03de156cd31 100644 --- a/backend/onyx/evals/eval.py +++ b/backend/onyx/evals/eval.py @@ -101,6 +101,7 @@ def run_eval( data: list[dict[str, dict[str, str]]] | None = None, remote_dataset_name: str | None = None, provider: EvalProvider = get_default_provider(), + no_send_logs: bool = False, ) -> EvalationAck: if data is not None and remote_dataset_name is not None: raise ValueError("Cannot specify both data and remote_dataset_name") @@ -113,4 +114,5 @@ def run_eval( configuration=configuration, data=data, remote_dataset_name=remote_dataset_name, + no_send_logs=no_send_logs, ) diff --git a/backend/onyx/evals/eval_cli.py b/backend/onyx/evals/eval_cli.py index 80c266b8324..1c9ad66a04a 100644 --- a/backend/onyx/evals/eval_cli.py +++ b/backend/onyx/evals/eval_cli.py @@ -54,6 +54,7 @@ def run_local( local_data_path: str | None, remote_dataset_name: str | None, search_permissions_email: str | None = None, + no_send_logs: bool = False, ) -> EvalationAck: """ Run evaluation with local configurations. @@ -78,7 +79,9 @@ def run_local( if remote_dataset_name: score = run_eval( - configuration=configuration, remote_dataset_name=remote_dataset_name + configuration=configuration, + remote_dataset_name=remote_dataset_name, + no_send_logs=no_send_logs, ) else: if local_data_path is None: @@ -86,7 +89,9 @@ def run_local( "local_data_path or remote_dataset_name is required for local evaluation" ) data = load_data_local(local_data_path) - score = run_eval(configuration=configuration, data=data) + score = run_eval( + configuration=configuration, data=data, no_send_logs=no_send_logs + ) return score @@ -183,6 +188,13 @@ def main() -> None: help="Email address to impersonate for the evaluation", ) + parser.add_argument( + "--no-send-logs", + action="store_true", + help="Do not send logs to the remote server", + default=False, + ) + args = parser.parse_args() if args.local_data_path: @@ -226,6 +238,7 @@ def main() -> None: local_data_path=args.local_data_path, remote_dataset_name=args.remote_dataset_name, search_permissions_email=args.search_permissions_email, + no_send_logs=args.no_send_logs, ) diff --git a/backend/onyx/evals/models.py b/backend/onyx/evals/models.py index ce33b440569..81f74623785 100644 --- a/backend/onyx/evals/models.py +++ b/backend/onyx/evals/models.py @@ -78,5 +78,6 @@ def eval( configuration: EvalConfigurationOptions, data: list[dict[str, dict[str, str]]] | None = None, remote_dataset_name: str | None = None, + no_send_logs: bool = False, ) -> EvalationAck: pass diff --git a/backend/onyx/evals/providers/braintrust.py b/backend/onyx/evals/providers/braintrust.py index 18a325e9521..a09020ce609 100644 --- a/backend/onyx/evals/providers/braintrust.py +++ b/backend/onyx/evals/providers/braintrust.py @@ -18,6 +18,7 @@ def eval( configuration: EvalConfigurationOptions, data: list[dict[str, dict[str, str]]] | None = None, remote_dataset_name: str | None = None, + no_send_logs: bool = False, ) -> EvalationAck: if data is not None and remote_dataset_name is not None: raise ValueError("Cannot specify both data and remote_dataset_name") @@ -35,6 +36,7 @@ def eval( scores=[], metadata={**configuration.model_dump()}, max_concurrency=BRAINTRUST_MAX_CONCURRENCY, + no_send_logs=no_send_logs, ) else: if data is None: @@ -51,5 +53,6 @@ def eval( scores=[], metadata={**configuration.model_dump()}, max_concurrency=BRAINTRUST_MAX_CONCURRENCY, + no_send_logs=no_send_logs, ) return EvalationAck(success=True) diff --git a/backend/onyx/llm/interfaces.py b/backend/onyx/llm/interfaces.py index 52ebec48d4f..cb26f3cc8d1 100644 --- a/backend/onyx/llm/interfaces.py +++ b/backend/onyx/llm/interfaces.py @@ -3,7 +3,6 @@ from typing import Literal from langchain.schema.language_model import LanguageModelInput -from langchain_core.messages import AIMessageChunk from langchain_core.messages import BaseMessage from langsmith.run_helpers import traceable from pydantic import BaseModel @@ -34,29 +33,30 @@ class LLMConfig(BaseModel): def log_prompt(prompt: LanguageModelInput) -> None: - if isinstance(prompt, list): - for ind, msg in enumerate(prompt): - if isinstance(msg, AIMessageChunk): - if msg.content: - log_msg = msg.content - elif msg.tool_call_chunks: - log_msg = "Tool Calls: " + str( - [ - { - key: value - for key, value in tool_call.items() - if key != "index" - } - for tool_call in msg.tool_call_chunks - ] - ) - else: - log_msg = "" - logger.debug(f"Message {ind}:\n{log_msg}") - else: - logger.debug(f"Message {ind}:\n{msg.content}") - if isinstance(prompt, str): - logger.debug(f"Prompt:\n{prompt}") + # if isinstance(prompt, list): + # for ind, msg in enumerate(prompt): + # if isinstance(msg, AIMessageChunk): + # if msg.content: + # log_msg = msg.content + # elif msg.tool_call_chunks: + # log_msg = "Tool Calls: " + str( + # [ + # { + # key: value + # for key, value in tool_call.items() + # if key != "index" + # } + # for tool_call in msg.tool_call_chunks + # ] + # ) + # else: + # log_msg = "" + # logger.debug(f"Message {ind}:\n{log_msg}") + # else: + # logger.debug(f"Message {ind}:\n{msg.content}") + # if isinstance(prompt, str): + # logger.debug(f"Prompt:\n{prompt}") + pass class LLM(abc.ABC): From b401bb0a9ead8d3e5aed984a16d129e7dbb5aeef Mon Sep 17 00:00:00 2001 From: Richard Guan Date: Thu, 18 Sep 2025 18:55:42 -0700 Subject: [PATCH 02/34] . --- backend/onyx/chat/answer_scratchpad.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/backend/onyx/chat/answer_scratchpad.py b/backend/onyx/chat/answer_scratchpad.py index e1746b3dd28..c3b263cb9d6 100644 --- a/backend/onyx/chat/answer_scratchpad.py +++ b/backend/onyx/chat/answer_scratchpad.py @@ -9,6 +9,7 @@ from typing import List import litellm +from braintrust import traced from onyx.agents.agent_search.dr.sub_agents.web_search.clients.exa_client import ( ExaClient, @@ -51,7 +52,7 @@ def register_tool(spec: ToolSpec) -> None: TOOL_REGISTRY[spec.name] = spec -# Example tool +@traced(name="web_search") def web_search(query: str, outer_ctx: Dict[str, Any]) -> Dict[str, Any]: exa_client = ExaClient() hits = exa_client.search(query) @@ -86,6 +87,7 @@ def web_search(query: str, outer_ctx: Dict[str, Any]) -> Dict[str, Any]: ) +@traced(name="web_fetch") def web_fetch(urls: List[str], outer_ctx: Dict[str, Any]) -> Dict[str, Any]: exa_client = ExaClient() docs = exa_client.contents(urls) @@ -119,6 +121,7 @@ def web_fetch(urls: List[str], outer_ctx: Dict[str, Any]) -> Dict[str, Any]: ) +@traced(name="reasoning") def reasoning(outer_ctx: Dict[str, Any]) -> Dict[str, Any]: PRIVATE_SCRATCHPAD_SYS = ( "You are writing PRIVATE scratch notes for yourself. " From ad1bcfa063c2da448c2681fb2c536b0a0be0eec0 Mon Sep 17 00:00:00 2001 From: Richard Guan Date: Thu, 18 Sep 2025 19:16:36 -0700 Subject: [PATCH 03/34] . --- backend/onyx/evals/providers/braintrust.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/backend/onyx/evals/providers/braintrust.py b/backend/onyx/evals/providers/braintrust.py index a09020ce609..822c38c2a1c 100644 --- a/backend/onyx/evals/providers/braintrust.py +++ b/backend/onyx/evals/providers/braintrust.py @@ -1,5 +1,6 @@ from collections.abc import Callable +from autoevals import Factuality from braintrust import Eval from braintrust import EvalCase from braintrust import init_dataset @@ -33,7 +34,7 @@ def eval( name=BRAINTRUST_PROJECT, data=eval_data, task=task, - scores=[], + scores=[Factuality()], metadata={**configuration.model_dump()}, max_concurrency=BRAINTRUST_MAX_CONCURRENCY, no_send_logs=no_send_logs, @@ -50,7 +51,7 @@ def eval( name=BRAINTRUST_PROJECT, data=eval_cases, task=task, - scores=[], + scores=[Factuality()], metadata={**configuration.model_dump()}, max_concurrency=BRAINTRUST_MAX_CONCURRENCY, no_send_logs=no_send_logs, From 5aab51edf4c72afabf8c47aeae2f7b54a98d51c3 Mon Sep 17 00:00:00 2001 From: Richard Guan Date: Fri, 19 Sep 2025 10:59:06 -0700 Subject: [PATCH 04/34] . --- backend/onyx/evals/tracing.py | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/backend/onyx/evals/tracing.py b/backend/onyx/evals/tracing.py index 26ac25f93e9..3f53cd48a22 100644 --- a/backend/onyx/evals/tracing.py +++ b/backend/onyx/evals/tracing.py @@ -7,18 +7,24 @@ from onyx.configs.app_configs import BRAINTRUST_API_KEY from onyx.configs.app_configs import BRAINTRUST_PROJECT +MASKING_LENGTH = 20000 -def _truncate_str(s: str, head: int = 800, tail: int = 200) -> str: - if len(s) <= head + tail: - return s - return f"{s[:head]}…{s[-tail:]}[TRUNCATED {len(s)} chars to 10,000]" + +def _truncate_str(s: str) -> str: + tail = MASKING_LENGTH // 5 + head = MASKING_LENGTH - tail + return f"{s[:head]}…{s[-tail:]}[TRUNCATED {len(s)} chars to {MASKING_LENGTH}]" + + +def _should_mask(data: Any) -> bool: + return len(str(data)) > MASKING_LENGTH def _mask(data: Any) -> Any: - data_str = str(data) - if len(data_str) > 10_000: - return _truncate_str(data_str) - return data + """Mask data based on span type. Only mask generic and function spans, not root, task, score, or LLM spans.""" + if not _should_mask(data): + return data + return _truncate_str(str(data)) def setup_braintrust() -> None: From 09381a4c487373917c216b4d54ca2fb1039a0812 Mon Sep 17 00:00:00 2001 From: Richard Guan <41275416+rguan72@users.noreply.github.com> Date: Fri, 19 Sep 2025 11:01:08 -0700 Subject: [PATCH 05/34] Update backend/onyx/evals/tracing.py Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com> --- backend/onyx/evals/tracing.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backend/onyx/evals/tracing.py b/backend/onyx/evals/tracing.py index 3f53cd48a22..780ccedbd19 100644 --- a/backend/onyx/evals/tracing.py +++ b/backend/onyx/evals/tracing.py @@ -21,7 +21,7 @@ def _should_mask(data: Any) -> bool: def _mask(data: Any) -> Any: - """Mask data based on span type. Only mask generic and function spans, not root, task, score, or LLM spans.""" + """Mask data if it exceeds the maximum length threshold.""" if not _should_mask(data): return data return _truncate_str(str(data)) From 37bcf63f4db19fb75bc3c91843432f33bf8f66e4 Mon Sep 17 00:00:00 2001 From: Richard Guan Date: Fri, 19 Sep 2025 12:57:52 -0700 Subject: [PATCH 06/34] . --- backend/onyx/chat/answer_scratchpad.py | 39 +++++++++++++++---- .../one_off/create_braintrust_dataset.py | 3 +- backend/onyx/evals/providers/braintrust.py | 33 ++++++++++++++-- 3 files changed, 62 insertions(+), 13 deletions(-) diff --git a/backend/onyx/chat/answer_scratchpad.py b/backend/onyx/chat/answer_scratchpad.py index c3b263cb9d6..d7ac32a356e 100644 --- a/backend/onyx/chat/answer_scratchpad.py +++ b/backend/onyx/chat/answer_scratchpad.py @@ -76,7 +76,9 @@ def web_search(query: str, outer_ctx: Dict[str, Any]) -> Dict[str, Any]: register_tool( ToolSpec( name="web_search", - description="Search the web for information.", + description=""" + Search the web for information. This tool provides urls and short snippets, + but does not fetch the full content of the urls.""", parameters={ "type": "object", "properties": {"query": {"type": "string"}}, @@ -110,7 +112,7 @@ def web_fetch(urls: List[str], outer_ctx: Dict[str, Any]) -> Dict[str, Any]: register_tool( ToolSpec( name="web_fetch", - description="Fetch the contents of a list of URLs.", + description="Fetch the fullcontents of a list of URLs.", parameters={ "type": "object", "properties": {"urls": {"type": "array", "items": {"type": "string"}}}, @@ -134,10 +136,12 @@ def reasoning(outer_ctx: Dict[str, Any]) -> Dict[str, Any]: revised_messages = [ {"role": "system", "content": PRIVATE_SCRATCHPAD_SYS}, ] + messages[1:] - results = litellm.completion( - model=llm.config.model_name, + results = llm_completion( + model_name=llm.config.model_name, temperature=llm.config.temperature, messages=revised_messages, + tools=[], + stream=False, ) return {"results": results["choices"][0]["message"]["content"]} @@ -145,13 +149,32 @@ def reasoning(outer_ctx: Dict[str, Any]) -> Dict[str, Any]: register_tool( ToolSpec( name="reasoning", - description="Reason about the message history and the goal.", + description=""" + Use this tool for reasoning. Powerful for complex questions and + tasks, or questions that require multiple steps to answer.""", parameters={"type": "object", "properties": {}, "required": []}, func=reasoning, ) ) +@traced(name="llm_completion", type="llm") +def llm_completion( + model_name: str, + temperature: float, + messages: List[Dict[str, Any]], + tools: List[Dict[str, Any]], + stream: bool = False, +) -> Dict[str, Any]: + return litellm.completion( + model=model_name, + temperature=temperature, + messages=messages, + tools=tools, + stream=stream, + ) + + def tool_specs_for_openai() -> List[Dict[str, Any]]: return [ { @@ -197,12 +220,12 @@ def stream_chat_sync( yield {"type": "delta", "text": "\n[Timed out while composing reply]"} break # Start a streaming completion (sync iterator of deltas) - stream_iter = litellm.completion( - model=llm.config.model_name, + stream_iter = llm_completion( + model_name=llm.config.model_name, temperature=llm.config.temperature, messages=messages, tools=tools_decl, - stream=True, # iterator of chunks + stream=True, ) # Accumulate assistant text & tool call chunks diff --git a/backend/onyx/evals/one_off/create_braintrust_dataset.py b/backend/onyx/evals/one_off/create_braintrust_dataset.py index 9739ee67c21..9da5f2647b4 100644 --- a/backend/onyx/evals/one_off/create_braintrust_dataset.py +++ b/backend/onyx/evals/one_off/create_braintrust_dataset.py @@ -109,8 +109,7 @@ def parse_csv_file(csv_path: str) -> List[Dict[str, Any]]: records.extend( [ { - "question": question - + ". All info is contained in the quesiton. DO NOT ask any clarifying questions.", + "question": question, "research_type": "DEEP", "categories": categories, "expected_depth": expected_depth, diff --git a/backend/onyx/evals/providers/braintrust.py b/backend/onyx/evals/providers/braintrust.py index 822c38c2a1c..aa88589b5fe 100644 --- a/backend/onyx/evals/providers/braintrust.py +++ b/backend/onyx/evals/providers/braintrust.py @@ -1,6 +1,6 @@ from collections.abc import Callable -from autoevals import Factuality +from autoevals.llm import LLMClassifier from braintrust import Eval from braintrust import EvalCase from braintrust import init_dataset @@ -12,6 +12,33 @@ from onyx.evals.models import EvalProvider +quality_classifier = LLMClassifier( + name="quality", + prompt_template=""" + You are a customer doing a trial of the product Onyx. Onyx provides a UI for users to chat with an LLM + and search for information, similar to ChatGPT. You think ChatGPT's answer quality is great, and + you want to rate Onyx's response relativeto ChatGPT's response.\n + [Question]: {{input}}\n + [ChatGPT Answer]: {{expected}}\n + [Onyx Answer]: {{output}}\n + + Please rate the quality of the Onyx answer relative to the ChatGPT answer on a scale of A to E: + A: The Onyx answer is great and is as good or better than the ChatGPT answer. + B: The Onyx answer is good and and comparable to the ChatGPT answer. + C: The Onyx answer is fair. + D: The Onyx answer is poor and is worse than the ChatGPT answer. + E: The Onyx answer is terrible and is much worse than the ChatGPT answer. + """, + choice_scores={ + "A": 1, + "B": 0.75, + "C": 0.5, + "D": 0.25, + "E": 0, + }, +) + + class BraintrustEvalProvider(EvalProvider): def eval( self, @@ -34,7 +61,7 @@ def eval( name=BRAINTRUST_PROJECT, data=eval_data, task=task, - scores=[Factuality()], + scores=[quality_classifier], metadata={**configuration.model_dump()}, max_concurrency=BRAINTRUST_MAX_CONCURRENCY, no_send_logs=no_send_logs, @@ -51,7 +78,7 @@ def eval( name=BRAINTRUST_PROJECT, data=eval_cases, task=task, - scores=[Factuality()], + scores=[quality_classifier], metadata={**configuration.model_dump()}, max_concurrency=BRAINTRUST_MAX_CONCURRENCY, no_send_logs=no_send_logs, From 9b600fc2a5f7ecb6fa86d744198a52739762a4d7 Mon Sep 17 00:00:00 2001 From: Richard Guan Date: Fri, 19 Sep 2025 13:49:23 -0700 Subject: [PATCH 07/34] . --- backend/onyx/chat/answer_scratchpad.py | 310 +++++++------------------ 1 file changed, 87 insertions(+), 223 deletions(-) diff --git a/backend/onyx/chat/answer_scratchpad.py b/backend/onyx/chat/answer_scratchpad.py index d7ac32a356e..9cce9998373 100644 --- a/backend/onyx/chat/answer_scratchpad.py +++ b/backend/onyx/chat/answer_scratchpad.py @@ -1,14 +1,19 @@ from __future__ import annotations +import asyncio import json import time -from collections.abc import Callable from collections.abc import Generator from typing import Any from typing import Dict from typing import List import litellm +from agents import Agent +from agents import function_tool +from agents import ModelSettings +from agents import Runner +from agents.extensions.models.litellm_model import LitellmModel from braintrust import traced from onyx.agents.agent_search.dr.sub_agents.web_search.clients.exa_client import ( @@ -19,41 +24,17 @@ LLM, ) # sync call that supports stream=True with an iterator -# ---------- Tool registry (sync) ---------- - - -class ToolSpec: - def __init__( - self, - name: str, - description: str, - parameters: Dict[str, Any], - func: Callable[..., Any], - private: bool = False, - ): - self.name = name - self.description = description - self.parameters = parameters - self.func = func - self.private = private - - -TOOL_REGISTRY: Dict[str, ToolSpec] = {} - def short_tag(link: str, i: int) -> str: # Stable, readable; index keeps it deterministic across a batch return f"S{i+1}" -def register_tool(spec: ToolSpec) -> None: - if spec.name in TOOL_REGISTRY: - raise ValueError(f"Tool {spec.name} already registered") - TOOL_REGISTRY[spec.name] = spec - - +@function_tool @traced(name="web_search") -def web_search(query: str, outer_ctx: Dict[str, Any]) -> Dict[str, Any]: +def web_search(query: str) -> str: + """Search the web for information. This tool provides urls and short snippets, + but does not fetch the full content of the urls.""" exa_client = ExaClient() hits = exa_client.search(query) results = [] @@ -70,27 +51,13 @@ def web_search(query: str, outer_ctx: Dict[str, Any]) -> Dict[str, Any]: ), } ) - return {"results": results} - - -register_tool( - ToolSpec( - name="web_search", - description=""" - Search the web for information. This tool provides urls and short snippets, - but does not fetch the full content of the urls.""", - parameters={ - "type": "object", - "properties": {"query": {"type": "string"}}, - "required": ["query"], - }, - func=web_search, - ) -) + return json.dumps({"results": results}) +@function_tool @traced(name="web_fetch") -def web_fetch(urls: List[str], outer_ctx: Dict[str, Any]) -> Dict[str, Any]: +def web_fetch(urls: List[str]) -> str: + """Fetch the full contents of a list of URLs.""" exa_client = ExaClient() docs = exa_client.contents(urls) out = [] @@ -106,56 +73,19 @@ def web_fetch(urls: List[str], outer_ctx: Dict[str, Any]) -> Dict[str, Any]: ), } ) - return {"results": out} - - -register_tool( - ToolSpec( - name="web_fetch", - description="Fetch the fullcontents of a list of URLs.", - parameters={ - "type": "object", - "properties": {"urls": {"type": "array", "items": {"type": "string"}}}, - "required": ["urls"], - }, - func=web_fetch, - ) -) + return json.dumps({"results": out}) +@function_tool @traced(name="reasoning") -def reasoning(outer_ctx: Dict[str, Any]) -> Dict[str, Any]: - PRIVATE_SCRATCHPAD_SYS = ( - "You are writing PRIVATE scratch notes for yourself. " - "These notes will NOT be shown to the user. " - "Do NOT copy these notes verbatim into the final answer. " - "Use them to plan, compute, and create structured intermediate results." - ) - messages = outer_ctx["messages"] - llm = outer_ctx["model"] - revised_messages = [ - {"role": "system", "content": PRIVATE_SCRATCHPAD_SYS}, - ] + messages[1:] - results = llm_completion( - model_name=llm.config.model_name, - temperature=llm.config.temperature, - messages=revised_messages, - tools=[], - stream=False, - ) - return {"results": results["choices"][0]["message"]["content"]} - - -register_tool( - ToolSpec( - name="reasoning", - description=""" - Use this tool for reasoning. Powerful for complex questions and - tasks, or questions that require multiple steps to answer.""", - parameters={"type": "object", "properties": {}, "required": []}, - func=reasoning, +def reasoning() -> str: + """Use this tool for reasoning. Powerful for complex questions and + tasks, or questions that require multiple steps to answer.""" + # Note: This is a simplified version. In the full implementation, + # we would need to pass the context through the agent's context system + return ( + "Reasoning tool - this would need to be implemented with proper context access" ) -) @traced(name="llm_completion", type="llm") @@ -175,150 +105,84 @@ def llm_completion( ) -def tool_specs_for_openai() -> List[Dict[str, Any]]: - return [ - { - "type": "function", - "function": { - "name": t.name, - "description": t.description, - "parameters": t.parameters, - }, - } - for t in TOOL_REGISTRY.values() - ] - - -def run_tool_sync( - name: str, args: Dict[str, Any], outer_ctx: Dict[str, Any] -) -> Dict[str, Any]: - spec = TOOL_REGISTRY[name] - try: - result = spec.func(**args, outer_ctx=outer_ctx) - except TypeError as e: - result = {"ok": False, "error": f"Bad arguments: {e}"} - except Exception as e: - result = {"ok": False, "error": str(e)} - return {"name": name, "private": spec.private, "result": result} - - -def stream_chat_sync( +async def stream_chat_async( messages: List[Dict[str, Any]], cfg: GraphConfig, llm: LLM ) -> Generator[Dict[str, Any], None, None]: """ - Yields events suitable for SSE/WebSocket: + Yields events suitable for SSE/WebSocket using OpenAI Agents framework: {"type":"delta","text": "..."} -> stream to user {"type":"tool","name":..., "args":..., "private": bool} {"type":"final"} """ - start = time.time() - tools_decl = tool_specs_for_openai() - tool_step = 0 - - while True: - if time.time() - start > 200: - yield {"type": "delta", "text": "\n[Timed out while composing reply]"} - break - # Start a streaming completion (sync iterator of deltas) - stream_iter = llm_completion( - model_name=llm.config.model_name, - temperature=llm.config.temperature, - messages=messages, - tools=tools_decl, - stream=True, - ) + time.time() - # Accumulate assistant text & tool call chunks - assistant_text_parts: List[str] = [] - tool_calls_accum: List[Dict[str, Any]] = [] # indexed by tool call index - - for chunk in stream_iter: - choice = chunk.choices[0] - delta = getattr(choice, "delta", getattr(choice, "message", None)) + # Create LiteLLM model for OpenAI Agents + litellm_model = LitellmModel( + model=llm.config.model_name, + api_key=llm.config.api_key, + ) - # 1) Text deltas - content_piece = getattr(delta, "content", None) - if content_piece: - assistant_text_parts.append(content_piece) - yield {"type": "delta", "text": content_piece} + # Create agent with tools + agent = Agent( + name="Assistant", + instructions="You are a helpful assistant that can search the web and fetch content from URLs.", + model=litellm_model, + tools=[web_search, web_fetch, reasoning], + model_settings=ModelSettings( + temperature=llm.config.temperature, + include_usage=True, # Track usage metrics + ), + ) - # 2) Tool call deltas (arrive chunked) - tcs = getattr(delta, "tool_calls", None) - if tcs: - for tc in tcs: - if tc.get("type") != "function": - continue - idx = tc.get("index", 0) - while len(tool_calls_accum) <= idx: - tool_calls_accum.append( - {"id": None, "fn": {"name": "", "arguments": ""}} - ) - buf = tool_calls_accum[idx] - if tc.get("id"): - buf["id"] = tc["id"] - fn = tc.get("function", {}) - if fn.get("name"): - buf["fn"]["name"] = fn["name"] - if fn.get("arguments"): - buf["fn"]["arguments"] += fn["arguments"] + # Convert messages to a single user message for the agent + user_message = "" + for msg in messages: + if msg.get("role") == "user": + user_message += msg.get("content", "") + elif msg.get("role") == "assistant": + user_message += f"\nAssistant: {msg.get('content', '')}" - # Finalize assistant message for this turn - assistant_text = "".join(assistant_text_parts).strip() - assistant_msg: Dict[str, Any] = {"role": "assistant", "content": assistant_text} - if tool_calls_accum: - assistant_msg["tool_calls"] = [ - { - "id": tc["id"], - "type": "function", - "function": { - "name": tc["fn"]["name"], - "arguments": tc["fn"]["arguments"], - }, - } - for tc in tool_calls_accum - ] - messages.append(assistant_msg) + try: + # Run the agent with timeout + result = await asyncio.wait_for(Runner.run(agent, user_message), timeout=200) - # If we have tool calls and haven’t exceeded step cap, execute and loop again - if tool_calls_accum and tool_step < 10: - tool_step += 1 - for tc in tool_calls_accum: - name = tc["fn"]["name"] - try: - args = json.loads(tc["fn"]["arguments"] or "{}") - except json.JSONDecodeError: - args = {"raw": tc["fn"]["arguments"]} + # Stream the final output + if result.final_output: + yield {"type": "delta", "text": result.final_output} - # Surface tool activity to UI (don’t stream private payloads) - yield { - "type": "tool", - "name": name, - "args": args, - "private": TOOL_REGISTRY[name].private, - } + except asyncio.TimeoutError: + yield {"type": "delta", "text": "\n[Timed out while composing reply]"} + except Exception as e: + yield {"type": "delta", "text": f"\n[Error: {str(e)}]"} - outer_ctx = { - "model": llm, - "messages": messages, - "cfg": cfg, - } - tool_result = run_tool_sync(name, args, outer_ctx) - messages.append( - { - "role": "tool", - "tool_call_id": tc["id"], - "name": name, - "content": [ - {"type": "text", "text": json.dumps(result)} - for result in tool_result["result"]["results"] - ], - } - ) + yield {"type": "final"} - # Loop: the model now sees tool outputs and will either answer or call more tools - continue - # No tools (final answer) or step cap reached - break +def stream_chat_sync( + messages: List[Dict[str, Any]], cfg: GraphConfig, llm: LLM +) -> Generator[Dict[str, Any], None, None]: + """ + Synchronous wrapper for the async streaming function. + Yields events suitable for SSE/WebSocket: + {"type":"delta","text": "..."} -> stream to user + {"type":"tool","name":..., "args":..., "private": bool} + {"type":"final"} + """ + # Create a new event loop for this thread + loop = asyncio.new_event_loop() + asyncio.set_event_loop(loop) - yield {"type": "final"} + try: + # Run the async generator + async_gen = stream_chat_async(messages, cfg, llm) + + # Convert async generator to sync generator + while True: + try: + # Get the next item from the async generator + item = loop.run_until_complete(async_gen.__anext__()) + yield item + except StopAsyncIteration: + break + finally: + loop.close() From 1d2930264759e371a00286be1cac3917caa8f827 Mon Sep 17 00:00:00 2001 From: Richard Guan Date: Fri, 19 Sep 2025 14:35:19 -0700 Subject: [PATCH 08/34] . --- backend/onyx/chat/answer_scratchpad.py | 109 ++++++++++++++++++++----- backend/onyx/chat/process_message.py | 1 + 2 files changed, 90 insertions(+), 20 deletions(-) diff --git a/backend/onyx/chat/answer_scratchpad.py b/backend/onyx/chat/answer_scratchpad.py index 9cce9998373..87d5a679c4b 100644 --- a/backend/onyx/chat/answer_scratchpad.py +++ b/backend/onyx/chat/answer_scratchpad.py @@ -3,8 +3,11 @@ import asyncio import json import time +from collections.abc import AsyncGenerator from collections.abc import Generator +from dataclasses import dataclass from typing import Any +from typing import cast from typing import Dict from typing import List @@ -12,6 +15,7 @@ from agents import Agent from agents import function_tool from agents import ModelSettings +from agents import RunContextWrapper from agents import Runner from agents.extensions.models.litellm_model import LitellmModel from braintrust import traced @@ -20,9 +24,23 @@ ExaClient, ) from onyx.agents.agent_search.models import GraphConfig +from onyx.db.engine.sql_engine import get_session_with_current_tenant from onyx.llm.interfaces import ( LLM, -) # sync call that supports stream=True with an iterator +) +from onyx.tools.models import SearchToolOverrideKwargs +from onyx.tools.tool_implementations.search.search_tool import ( + SEARCH_RESPONSE_SUMMARY_ID, +) +from onyx.tools.tool_implementations.search.search_tool import SearchResponseSummary +from onyx.tools.tool_implementations.search.search_tool import SearchTool + + +@dataclass +class MyContext: + """Context class to hold search tool and other dependencies""" + + search_tool: SearchTool | None = None def short_tag(link: str, i: int) -> str: @@ -95,7 +113,7 @@ def llm_completion( messages: List[Dict[str, Any]], tools: List[Dict[str, Any]], stream: bool = False, -) -> Dict[str, Any]: +) -> Any: return litellm.completion( model=model_name, temperature=temperature, @@ -105,9 +123,46 @@ def llm_completion( ) +@function_tool +@traced(name="internal_search") +def internal_search(context_wrapper: RunContextWrapper[MyContext], query: str) -> str: + """Search internal company vector database for information. Sources + include: + - Fireflies (internal company call transcripts) + - Google Drive (internal company documents) + - Gmail (internal company emails) + - Linear (internal company issues) + - Slack (internal company messages) + """ + search_tool = context_wrapper.context.search_tool + if search_tool is None: + raise RuntimeError("Search tool not available in context") + + with get_session_with_current_tenant() as search_db_session: + for tool_response in search_tool.run( + query=query, + override_kwargs=SearchToolOverrideKwargs( + force_no_rerank=True, + alternate_db_session=search_db_session, + skip_query_analysis=True, + original_query=query, + ), + ): + # get retrieved docs to send to the rest of the graph + if tool_response.id == SEARCH_RESPONSE_SUMMARY_ID: + response = cast(SearchResponseSummary, tool_response.response) + retrieved_docs = response.top_sections + + break + return retrieved_docs + + async def stream_chat_async( - messages: List[Dict[str, Any]], cfg: GraphConfig, llm: LLM -) -> Generator[Dict[str, Any], None, None]: + messages: List[Dict[str, Any]], + cfg: GraphConfig, + llm: LLM, + search_tool: SearchTool, +) -> AsyncGenerator[Dict[str, Any], None]: """ Yields events suitable for SSE/WebSocket using OpenAI Agents framework: {"type":"delta","text": "..."} -> stream to user @@ -122,12 +177,21 @@ async def stream_chat_async( api_key=llm.config.api_key, ) + # Get the search tool from config + search_tool = cfg.tooling.search_tool + + # Create context with search tool + context = MyContext(search_tool=search_tool) + # Create agent with tools agent = Agent( name="Assistant", - instructions="You are a helpful assistant that can search the web and fetch content from URLs.", + instructions=""" + You are a helpful assistant that can search the web, fetch content from URLs, + and search internal databases. + """, model=litellm_model, - tools=[web_search, web_fetch, reasoning], + tools=[web_search, web_fetch, reasoning, internal_search], model_settings=ModelSettings( temperature=llm.config.temperature, include_usage=True, # Track usage metrics @@ -143,8 +207,10 @@ async def stream_chat_async( user_message += f"\nAssistant: {msg.get('content', '')}" try: - # Run the agent with timeout - result = await asyncio.wait_for(Runner.run(agent, user_message), timeout=200) + # Run the agent with timeout and context + result = await asyncio.wait_for( + Runner.run(agent, user_message, context=context), timeout=200 + ) # Stream the final output if result.final_output: @@ -159,7 +225,10 @@ async def stream_chat_async( def stream_chat_sync( - messages: List[Dict[str, Any]], cfg: GraphConfig, llm: LLM + messages: List[Dict[str, Any]], + cfg: GraphConfig, + llm: LLM, + search_tool: SearchTool, ) -> Generator[Dict[str, Any], None, None]: """ Synchronous wrapper for the async streaming function. @@ -173,16 +242,16 @@ def stream_chat_sync( asyncio.set_event_loop(loop) try: - # Run the async generator - async_gen = stream_chat_async(messages, cfg, llm) - - # Convert async generator to sync generator - while True: - try: - # Get the next item from the async generator - item = loop.run_until_complete(async_gen.__anext__()) - yield item - except StopAsyncIteration: - break + # Run the async generator and collect all items + async def collect_all_items(): + items = [] + async for item in stream_chat_async(messages, cfg, llm, search_tool): + items.append(item) + return items + + # Get all items from async generator + items = loop.run_until_complete(collect_all_items()) + for item in items: + yield item finally: loop.close() diff --git a/backend/onyx/chat/process_message.py b/backend/onyx/chat/process_message.py index 76ff38a35fe..014003f3b66 100644 --- a/backend/onyx/chat/process_message.py +++ b/backend/onyx/chat/process_message.py @@ -707,6 +707,7 @@ def stream_chat_message_objects( messages=system_message + other_messages, cfg=answer.graph_config, llm=answer.graph_tooling.primary_llm, + search_tool=answer.graph_tooling.search_tool, ) # yield from streamed # Process streamed packets using the new packet processing module From 3d14fe93de3d91bd10e1634bc2a8ad8a37f96c2c Mon Sep 17 00:00:00 2001 From: Richard Guan Date: Mon, 22 Sep 2025 15:26:21 -0700 Subject: [PATCH 09/34] . --- backend/onyx/chat/answer_scratchpad.py | 159 +++++++++++---------- backend/onyx/chat/process_message.py | 2 + backend/onyx/evals/providers/braintrust.py | 4 +- 3 files changed, 84 insertions(+), 81 deletions(-) diff --git a/backend/onyx/chat/answer_scratchpad.py b/backend/onyx/chat/answer_scratchpad.py index 87d5a679c4b..d0527523c1a 100644 --- a/backend/onyx/chat/answer_scratchpad.py +++ b/backend/onyx/chat/answer_scratchpad.py @@ -2,10 +2,10 @@ import asyncio import json -import time -from collections.abc import AsyncGenerator +import threading from collections.abc import Generator from dataclasses import dataclass +from queue import Queue from typing import Any from typing import cast from typing import Dict @@ -18,6 +18,8 @@ from agents import RunContextWrapper from agents import Runner from agents.extensions.models.litellm_model import LitellmModel +from agents.stream_events import RawResponsesStreamEvent +from agents.stream_events import RunItemStreamEvent from braintrust import traced from onyx.agents.agent_search.dr.sub_agents.web_search.clients.exa_client import ( @@ -36,11 +38,17 @@ from onyx.tools.tool_implementations.search.search_tool import SearchTool +@dataclass +class RunDependencies: + emitter: Emitter + search_tool: SearchTool | None = None + + @dataclass class MyContext: """Context class to hold search tool and other dependencies""" - search_tool: SearchTool | None = None + run_dependencies: RunDependencies | None = None def short_tag(link: str, i: int) -> str: @@ -134,7 +142,10 @@ def internal_search(context_wrapper: RunContextWrapper[MyContext], query: str) - - Linear (internal company issues) - Slack (internal company messages) """ - search_tool = context_wrapper.context.search_tool + context_wrapper.context.run_dependencies.emitter.emit( + kind="tool-progress", data={"progress": "Searching internal database"} + ) + search_tool = context_wrapper.context.run_dependencies.search_tool if search_tool is None: raise RuntimeError("Search tool not available in context") @@ -157,33 +168,72 @@ def internal_search(context_wrapper: RunContextWrapper[MyContext], query: str) - return retrieved_docs -async def stream_chat_async( +# stream_bus.py +@dataclass +class StreamPacket: + kind: str # "agent" | "tool-progress" | "done" + payload: Dict[str, Any] = None + + +class Emitter: + """Use this inside tools to emit arbitrary UI progress.""" + + def __init__(self, bus: Queue): + self.bus = bus + + def emit(self, kind: str, data: Dict[str, Any]) -> None: + self.bus.put(StreamPacket(kind=kind, payload=data)) + + +# If we want durable execution in the future, we can replace this with a temporal call +def start_run_in_thread( + agent: Agent, messages: List[Dict[str, Any]], cfg: GraphConfig, llm: LLM, search_tool: SearchTool, -) -> AsyncGenerator[Dict[str, Any], None]: - """ - Yields events suitable for SSE/WebSocket using OpenAI Agents framework: - {"type":"delta","text": "..."} -> stream to user - {"type":"tool","name":..., "args":..., "private": bool} - {"type":"final"} - """ - time.time() + emitter: Emitter, +) -> threading.Thread: + def worker(): + async def amain(): + ctx = MyContext( + run_dependencies=RunDependencies( + search_tool=search_tool, + emitter=emitter, + ) + ) + # 1) start the streamed run (async) + streamed = Runner.run_streamed(agent, messages, context=ctx) + + # 2) forward the agent’s async event stream + async for ev in streamed.stream_events(): + if isinstance(ev, RunItemStreamEvent): + pass + elif isinstance(ev, RawResponsesStreamEvent): + emitter.emit(kind="agent", data=ev.data.model_dump()) + + emitter.emit(kind="done", data={"ok": True}) + + # run the async main inside this thread + asyncio.run(amain()) + + t = threading.Thread(target=worker, daemon=True) + t.start() + return t - # Create LiteLLM model for OpenAI Agents + +def stream_chat_sync( + messages: List[Dict[str, Any]], + cfg: GraphConfig, + llm: LLM, + search_tool: SearchTool, +) -> Generator[Dict[str, Any], None, None]: + bus: Queue = Queue() + emitter = Emitter(bus) litellm_model = LitellmModel( model=llm.config.model_name, api_key=llm.config.api_key, ) - - # Get the search tool from config - search_tool = cfg.tooling.search_tool - - # Create context with search tool - context = MyContext(search_tool=search_tool) - - # Create agent with tools agent = Agent( name="Assistant", instructions=""" @@ -198,60 +248,11 @@ async def stream_chat_async( ), ) - # Convert messages to a single user message for the agent - user_message = "" - for msg in messages: - if msg.get("role") == "user": - user_message += msg.get("content", "") - elif msg.get("role") == "assistant": - user_message += f"\nAssistant: {msg.get('content', '')}" - - try: - # Run the agent with timeout and context - result = await asyncio.wait_for( - Runner.run(agent, user_message, context=context), timeout=200 - ) - - # Stream the final output - if result.final_output: - yield {"type": "delta", "text": result.final_output} - - except asyncio.TimeoutError: - yield {"type": "delta", "text": "\n[Timed out while composing reply]"} - except Exception as e: - yield {"type": "delta", "text": f"\n[Error: {str(e)}]"} - - yield {"type": "final"} - - -def stream_chat_sync( - messages: List[Dict[str, Any]], - cfg: GraphConfig, - llm: LLM, - search_tool: SearchTool, -) -> Generator[Dict[str, Any], None, None]: - """ - Synchronous wrapper for the async streaming function. - Yields events suitable for SSE/WebSocket: - {"type":"delta","text": "..."} -> stream to user - {"type":"tool","name":..., "args":..., "private": bool} - {"type":"final"} - """ - # Create a new event loop for this thread - loop = asyncio.new_event_loop() - asyncio.set_event_loop(loop) - - try: - # Run the async generator and collect all items - async def collect_all_items(): - items = [] - async for item in stream_chat_async(messages, cfg, llm, search_tool): - items.append(item) - return items - - # Get all items from async generator - items = loop.run_until_complete(collect_all_items()) - for item in items: - yield item - finally: - loop.close() + start_run_in_thread(agent, messages, cfg, llm, search_tool, emitter) + done = False + while not done: + pkt: Queue[StreamPacket] = bus.get() + if pkt.kind == "done": + done = True + else: + yield pkt.payload diff --git a/backend/onyx/chat/process_message.py b/backend/onyx/chat/process_message.py index 014003f3b66..0b77fb8ee64 100644 --- a/backend/onyx/chat/process_message.py +++ b/backend/onyx/chat/process_message.py @@ -789,6 +789,8 @@ def gather_stream( ) -> ChatBasicResponse: answer = "" for packet in packets: + if packet != {"type": "event"}: + print(packet) if "text" in packet: answer += packet["text"] diff --git a/backend/onyx/evals/providers/braintrust.py b/backend/onyx/evals/providers/braintrust.py index aa88589b5fe..8ae64834031 100644 --- a/backend/onyx/evals/providers/braintrust.py +++ b/backend/onyx/evals/providers/braintrust.py @@ -61,7 +61,7 @@ def eval( name=BRAINTRUST_PROJECT, data=eval_data, task=task, - scores=[quality_classifier], + scores=[], metadata={**configuration.model_dump()}, max_concurrency=BRAINTRUST_MAX_CONCURRENCY, no_send_logs=no_send_logs, @@ -78,7 +78,7 @@ def eval( name=BRAINTRUST_PROJECT, data=eval_cases, task=task, - scores=[quality_classifier], + scores=[], metadata={**configuration.model_dump()}, max_concurrency=BRAINTRUST_MAX_CONCURRENCY, no_send_logs=no_send_logs, From 68020a763f09557722a5a111619a851bdbb78ab6 Mon Sep 17 00:00:00 2001 From: Richard Guan Date: Mon, 22 Sep 2025 16:58:41 -0700 Subject: [PATCH 10/34] . --- backend/onyx/chat/answer_scratchpad.py | 31 ++++++++++++++++++-------- 1 file changed, 22 insertions(+), 9 deletions(-) diff --git a/backend/onyx/chat/answer_scratchpad.py b/backend/onyx/chat/answer_scratchpad.py index d0527523c1a..978f9419730 100644 --- a/backend/onyx/chat/answer_scratchpad.py +++ b/backend/onyx/chat/answer_scratchpad.py @@ -222,6 +222,27 @@ async def amain(): return t +def unified_event_stream( + agent: Agent, + messages: List[Dict[str, Any]], + cfg: GraphConfig, + llm: LLM, + search_tool: SearchTool, + emitter: Emitter, +) -> Generator[Dict[str, Any], None, None]: + bus: Queue = Queue() + emitter = Emitter(bus) + start_run_in_thread(agent, messages, cfg, llm, search_tool, emitter) + done = False + while not done: + pkt: Queue[StreamPacket] = emitter.bus.get() + if pkt.kind == "done": + done = True + else: + yield pkt.payload + + +# This should be close to the API def stream_chat_sync( messages: List[Dict[str, Any]], cfg: GraphConfig, @@ -247,12 +268,4 @@ def stream_chat_sync( include_usage=True, # Track usage metrics ), ) - - start_run_in_thread(agent, messages, cfg, llm, search_tool, emitter) - done = False - while not done: - pkt: Queue[StreamPacket] = bus.get() - if pkt.kind == "done": - done = True - else: - yield pkt.payload + return unified_event_stream(agent, messages, cfg, llm, search_tool, emitter) From 6f73659eee292673660e8e10944fe63d6813d510 Mon Sep 17 00:00:00 2001 From: Richard Guan Date: Mon, 22 Sep 2025 17:44:52 -0700 Subject: [PATCH 11/34] . --- backend/onyx/chat/answer_scratchpad.py | 80 +++++++++++++++++++-- backend/onyx/chat/process_message.py | 97 ++++++++++++++++++++++---- 2 files changed, 158 insertions(+), 19 deletions(-) diff --git a/backend/onyx/chat/answer_scratchpad.py b/backend/onyx/chat/answer_scratchpad.py index 978f9419730..216a0f0a534 100644 --- a/backend/onyx/chat/answer_scratchpad.py +++ b/backend/onyx/chat/answer_scratchpad.py @@ -36,6 +36,9 @@ ) from onyx.tools.tool_implementations.search.search_tool import SearchResponseSummary from onyx.tools.tool_implementations.search.search_tool import SearchTool +from onyx.utils.logger import setup_logger + +logger = setup_logger() @dataclass @@ -168,6 +171,48 @@ def internal_search(context_wrapper: RunContextWrapper[MyContext], query: str) - return retrieved_docs +def _convert_to_packet_obj(packet: Dict[str, Any]) -> Any | None: + """Convert a packet dictionary to PacketObj when possible. + + Args: + packet: Dictionary containing packet data + + Returns: + PacketObj instance if conversion is possible, None otherwise + """ + if not isinstance(packet, dict) or "type" not in packet: + return None + + packet_type = packet.get("type") + if not packet_type: + return None + + try: + # Import here to avoid circular imports + from onyx.server.query_and_chat.streaming_models import ( + MessageStart, + MessageDelta, + OverallStop, + ) + + if packet_type == "response.output_item.added": + return MessageStart( + type="message_start", + content="", + final_documents=None, + ) + elif packet_type == "response.output_text.delta": + return MessageDelta(type="message_delta", content=packet["delta"]) + elif packet_type == "response.completed": + return OverallStop(type="stop") + + except Exception as e: + # Log the error but don't fail the entire process + logger.debug(f"Failed to convert packet to PacketObj: {e}") + + return None + + # stream_bus.py @dataclass class StreamPacket: @@ -191,8 +236,8 @@ def start_run_in_thread( messages: List[Dict[str, Any]], cfg: GraphConfig, llm: LLM, - search_tool: SearchTool, emitter: Emitter, + search_tool: SearchTool | None = None, ) -> threading.Thread: def worker(): async def amain(): @@ -227,19 +272,33 @@ def unified_event_stream( messages: List[Dict[str, Any]], cfg: GraphConfig, llm: LLM, - search_tool: SearchTool, emitter: Emitter, + search_tool: SearchTool | None = None, ) -> Generator[Dict[str, Any], None, None]: bus: Queue = Queue() emitter = Emitter(bus) - start_run_in_thread(agent, messages, cfg, llm, search_tool, emitter) + start_run_in_thread( + agent=agent, + messages=messages, + cfg=cfg, + llm=llm, + search_tool=search_tool, + emitter=emitter, + ) done = False while not done: - pkt: Queue[StreamPacket] = emitter.bus.get() + pkt: StreamPacket = emitter.bus.get() if pkt.kind == "done": done = True else: - yield pkt.payload + # Convert packet to PacketObj when possible + packet_obj = _convert_to_packet_obj(pkt.payload) + if packet_obj: + # Convert PacketObj back to dict for compatibility + yield packet_obj.model_dump() + else: + # Fallback to original payload + yield pkt.payload # This should be close to the API @@ -247,7 +306,7 @@ def stream_chat_sync( messages: List[Dict[str, Any]], cfg: GraphConfig, llm: LLM, - search_tool: SearchTool, + search_tool: SearchTool | None = None, ) -> Generator[Dict[str, Any], None, None]: bus: Queue = Queue() emitter = Emitter(bus) @@ -268,4 +327,11 @@ def stream_chat_sync( include_usage=True, # Track usage metrics ), ) - return unified_event_stream(agent, messages, cfg, llm, search_tool, emitter) + return unified_event_stream( + agent=agent, + messages=messages, + cfg=cfg, + llm=llm, + emitter=emitter, + search_tool=search_tool, + ) diff --git a/backend/onyx/chat/process_message.py b/backend/onyx/chat/process_message.py index 0b77fb8ee64..e8c50ccd61c 100644 --- a/backend/onyx/chat/process_message.py +++ b/backend/onyx/chat/process_message.py @@ -531,7 +531,7 @@ def stream_chat_message_objects( yield MessageResponseIDInfo( user_message_id=user_message.id if user_message else None, reserved_assistant_message_id=reserved_message_id, - ) + ).model_dump() prompt_override = new_msg_req.prompt_override or chat_session.prompt_override if new_msg_req.persona_override_config: @@ -619,7 +619,7 @@ def stream_chat_message_objects( ) for file in in_memory_user_files ] - ) + ).model_dump() prompt_builder = AnswerPromptBuilder( user_message=default_build_user_message( @@ -709,17 +709,12 @@ def stream_chat_message_objects( llm=answer.graph_tooling.primary_llm, search_tool=answer.graph_tooling.search_tool, ) - # yield from streamed - # Process streamed packets using the new packet processing module - # yield from process_streamed_packets( - # answer_processed_output=answer.processed_streamed_output, - # ) except ValueError as e: logger.exception("Failed to process chat message.") error_msg = str(e) - yield StreamingError(error=error_msg) + yield StreamingError(error=error_msg).model_dump() db_session.rollback() return @@ -733,7 +728,7 @@ def stream_chat_message_objects( stack_trace = traceback.format_exc() if isinstance(e, ToolCallException): - yield StreamingError(error=error_msg, stack_trace=stack_trace) + yield StreamingError(error=error_msg, stack_trace=stack_trace).model_dump() elif llm: client_error_msg = litellm_exception_to_error_msg(e, llm) if llm.config.api_key and len(llm.config.api_key) > 2: @@ -744,7 +739,9 @@ def stream_chat_message_objects( llm.config.api_key, "[REDACTED_API_KEY]" ) - yield StreamingError(error=client_error_msg, stack_trace=stack_trace) + yield StreamingError( + error=client_error_msg, stack_trace=stack_trace + ).model_dump() db_session.rollback() return @@ -774,7 +771,7 @@ def stream_chat_message( document_retrieval_latency = time.time() - start_time logger.debug(f"First doc time: {document_retrieval_latency}") - yield get_json_line(obj.model_dump()) + yield get_json_line(obj) def remove_answer_citations(answer: str) -> str: @@ -783,6 +780,74 @@ def remove_answer_citations(answer: str) -> str: return re.sub(pattern, "", answer) +def _convert_to_packet_obj(packet: Dict[str, Any]) -> Any | None: + """Convert a packet dictionary to PacketObj when possible. + + Args: + packet: Dictionary containing packet data + + Returns: + PacketObj instance if conversion is possible, None otherwise + """ + if not isinstance(packet, dict) or "type" not in packet: + return None + + packet_type = packet.get("type") + if not packet_type: + return None + + try: + # Import here to avoid circular imports + from onyx.server.query_and_chat.streaming_models import ( + MessageStart, + MessageDelta, + OverallStop, + SectionEnd, + SearchToolStart, + SearchToolDelta, + ImageGenerationToolStart, + ImageGenerationToolDelta, + ImageGenerationToolHeartbeat, + CustomToolStart, + CustomToolDelta, + ReasoningStart, + ReasoningDelta, + CitationStart, + CitationDelta, + ) + + # Map packet types to their corresponding classes + type_mapping = { + "message_start": MessageStart, + "message_delta": MessageDelta, + "stop": OverallStop, + "section_end": SectionEnd, + "internal_search_tool_start": SearchToolStart, + "internal_search_tool_delta": SearchToolDelta, + "image_generation_tool_start": ImageGenerationToolStart, + "image_generation_tool_delta": ImageGenerationToolDelta, + "image_generation_tool_heartbeat": ImageGenerationToolHeartbeat, + "custom_tool_start": CustomToolStart, + "custom_tool_delta": CustomToolDelta, + "reasoning_start": ReasoningStart, + "reasoning_delta": ReasoningDelta, + "citation_start": CitationStart, + "citation_delta": CitationDelta, + } + + packet_class = type_mapping.get(packet_type) + if packet_class: + # Create instance using the packet data, filtering out None values + filtered_data = {k: v for k, v in packet.items() if v is not None} + return packet_class(**filtered_data) + + except Exception as e: + # Log the error but don't fail the entire process + logger.debug(f"Failed to convert packet to PacketObj: {e}") + + return None + + @log_function_time() def gather_stream( packets: Iterator[Dict[str, Any]], @@ -791,7 +856,15 @@ def gather_stream( for packet in packets: if packet != {"type": "event"}: print(packet) - if "text" in packet: + + # Convert packet to PacketObj when possible + packet_obj = _convert_to_packet_obj(packet) + if packet_obj: + # Handle PacketObj types that contain text content + if hasattr(packet_obj, "content") and packet_obj.content: + answer += packet_obj.content + elif "text" in packet: + # Fallback for legacy packet format answer += packet["text"] return ChatBasicResponse( From 41accfdc3f1382ea5ad65bba23e7eb2a5608b9ee Mon Sep 17 00:00:00 2001 From: Richard Guan Date: Mon, 22 Sep 2025 18:08:59 -0700 Subject: [PATCH 12/34] . --- backend/onyx/chat/process_message.py | 25 ++++-- .../server/query_and_chat/chat_backend.py | 83 +++++++++---------- 2 files changed, 54 insertions(+), 54 deletions(-) diff --git a/backend/onyx/chat/process_message.py b/backend/onyx/chat/process_message.py index e8c50ccd61c..32e95e7d2b0 100644 --- a/backend/onyx/chat/process_message.py +++ b/backend/onyx/chat/process_message.py @@ -16,6 +16,7 @@ from onyx.chat.chat_utils import create_chat_chain from onyx.chat.chat_utils import create_temporary_persona from onyx.chat.chat_utils import process_kg_commands +from onyx.chat.models import AnswerStream from onyx.chat.models import AnswerStyleConfig from onyx.chat.models import ChatBasicResponse from onyx.chat.models import CitationConfig @@ -249,7 +250,7 @@ def stream_chat_message_objects( # messages. # NOTE: is not stored in the database at all. single_message_history: str | None = None, -) -> Iterator[Dict[str, Any]]: +) -> AnswerStream: """Streams in order: 1. [conditional] Retrieved documents if a search needs to be run 2. [conditional] LLM selected chunk indices if LLM chunk filtering is turned on @@ -531,7 +532,7 @@ def stream_chat_message_objects( yield MessageResponseIDInfo( user_message_id=user_message.id if user_message else None, reserved_assistant_message_id=reserved_message_id, - ).model_dump() + ) prompt_override = new_msg_req.prompt_override or chat_session.prompt_override if new_msg_req.persona_override_config: @@ -619,7 +620,7 @@ def stream_chat_message_objects( ) for file in in_memory_user_files ] - ).model_dump() + ) prompt_builder = AnswerPromptBuilder( user_message=default_build_user_message( @@ -714,7 +715,7 @@ def stream_chat_message_objects( logger.exception("Failed to process chat message.") error_msg = str(e) - yield StreamingError(error=error_msg).model_dump() + yield StreamingError(error=error_msg) db_session.rollback() return @@ -728,7 +729,7 @@ def stream_chat_message_objects( stack_trace = traceback.format_exc() if isinstance(e, ToolCallException): - yield StreamingError(error=error_msg, stack_trace=stack_trace).model_dump() + yield StreamingError(error=error_msg, stack_trace=stack_trace) elif llm: client_error_msg = litellm_exception_to_error_msg(e, llm) if llm.config.api_key and len(llm.config.api_key) > 2: @@ -739,9 +740,7 @@ def stream_chat_message_objects( llm.config.api_key, "[REDACTED_API_KEY]" ) - yield StreamingError( - error=client_error_msg, stack_trace=stack_trace - ).model_dump() + yield StreamingError(error=client_error_msg, stack_trace=stack_trace) db_session.rollback() return @@ -771,7 +770,15 @@ def stream_chat_message( document_retrieval_latency = time.time() - start_time logger.debug(f"First doc time: {document_retrieval_latency}") - yield get_json_line(obj) + # Convert Pydantic models to dictionaries for JSON serialization + if hasattr(obj, "model_dump"): + obj_dict = obj.model_dump() + elif hasattr(obj, "dict"): + obj_dict = obj.dict() + else: + obj_dict = obj + + yield get_json_line(obj_dict) def remove_answer_citations(answer: str) -> str: diff --git a/backend/onyx/server/query_and_chat/chat_backend.py b/backend/onyx/server/query_and_chat/chat_backend.py index d2c430ddd1d..4c37afaca49 100644 --- a/backend/onyx/server/query_and_chat/chat_backend.py +++ b/backend/onyx/server/query_and_chat/chat_backend.py @@ -21,7 +21,6 @@ from onyx.auth.users import current_chat_accessible_user from onyx.auth.users import current_user -from onyx.chat.chat_utils import create_chat_chain from onyx.chat.chat_utils import extract_headers from onyx.chat.process_message import stream_chat_message from onyx.chat.prompt_builder.citations_prompt import ( @@ -63,13 +62,8 @@ from onyx.file_processing.extract_file_text import docx_to_txt_filename from onyx.file_store.file_store import get_default_file_store from onyx.file_store.models import FileDescriptor -from onyx.llm.exceptions import GenAIDisabledException -from onyx.llm.factory import get_default_llms from onyx.llm.factory import get_llms_for_persona from onyx.natural_language_processing.utils import get_tokenizer -from onyx.secondary_llm_flows.chat_session_naming import ( - get_renamed_conversation_name, -) from onyx.server.documents.models import ConnectorBase from onyx.server.documents.models import CredentialBase from onyx.server.query_and_chat.chat_utils import mime_type_to_chat_file_type @@ -305,45 +299,44 @@ def rename_chat_session( user: User | None = Depends(current_user), db_session: Session = Depends(get_session), ) -> RenameChatSessionResponse: - name = rename_req.name - chat_session_id = rename_req.chat_session_id - user_id = user.id if user is not None else None - - if name: - update_chat_session( - db_session=db_session, - user_id=user_id, - chat_session_id=chat_session_id, - description=name, - ) - return RenameChatSessionResponse(new_name=name) - - final_msg, history_msgs = create_chat_chain( - chat_session_id=chat_session_id, db_session=db_session - ) - full_history = history_msgs + [final_msg] - - try: - llm, _ = get_default_llms( - additional_headers=extract_headers( - request.headers, LITELLM_PASS_THROUGH_HEADERS - ) - ) - except GenAIDisabledException: - # This may be longer than what the LLM tends to produce but is the most - # clear thing we can do - return RenameChatSessionResponse(new_name=full_history[0].message) - - new_name = get_renamed_conversation_name(full_history=full_history, llm=llm) - - update_chat_session( - db_session=db_session, - user_id=user_id, - chat_session_id=chat_session_id, - description=new_name, - ) - - return RenameChatSessionResponse(new_name=new_name) + # name = rename_req.name + # chat_session_id = rename_req.chat_session_id + # user_id = user.id if user is not None else None + + # if name: + # update_chat_session( + # db_session=db_session, + # user_id=user_id, + # chat_session_id=chat_session_id, + # description=name, + # ) + # return RenameChatSessionResponse(new_name=name) + + # final_msg, history_msgs = create_chat_chain( + # chat_session_id=chat_session_id, db_session=db_session + # ) + # full_history = history_msgs + [final_msg] + + # try: + # llm, _ = get_default_llms( + # additional_headers=extract_headers( + # request.headers, LITELLM_PASS_THROUGH_HEADERS + # ) + # ) + # except GenAIDisabledException: + # # This may be longer than what the LLM tends to produce but is the most + # # clear thing we can do + # return RenameChatSessionResponse(new_name=full_history[0].message) + + # new_name = get_renamed_conversation_name(full_history=full_history, llm=llm) + + # update_chat_session( + # db_session=db_session, + # user_id=user_id, + # chat_session_id=chat_session_id, + # description=new_name, + # ) + return RenameChatSessionResponse(new_name="hi") @router.patch("/chat-session/{session_id}") From 9d5a1b6405a3bf3e2f00b4c538e13ae1607f8722 Mon Sep 17 00:00:00 2001 From: Richard Guan Date: Tue, 23 Sep 2025 15:12:29 -0700 Subject: [PATCH 13/34] . --- backend/onyx/chat/answer_scratchpad.py | 208 ++++++++++++++++++++++--- backend/onyx/evals/demo_agent.py | 79 ++++++++++ backend/onyx/evals/tracing.py | 4 + backend/onyx/prompts/dr_prompts.py | 13 +- 4 files changed, 274 insertions(+), 30 deletions(-) create mode 100644 backend/onyx/evals/demo_agent.py diff --git a/backend/onyx/chat/answer_scratchpad.py b/backend/onyx/chat/answer_scratchpad.py index 216a0f0a534..0380433a96d 100644 --- a/backend/onyx/chat/answer_scratchpad.py +++ b/backend/onyx/chat/answer_scratchpad.py @@ -17,14 +17,23 @@ from agents import ModelSettings from agents import RunContextWrapper from agents import Runner +from agents.extensions.handoff_prompt import prompt_with_handoff_instructions from agents.extensions.models.litellm_model import LitellmModel from agents.stream_events import RawResponsesStreamEvent from agents.stream_events import RunItemStreamEvent from braintrust import traced +from pydantic import BaseModel +from onyx.agents.agent_search.dr.constants import MAX_CHAT_HISTORY_MESSAGES +from onyx.agents.agent_search.dr.dr_prompt_builder import ( + get_dr_prompt_orchestration_templates, +) +from onyx.agents.agent_search.dr.enums import ResearchType +from onyx.agents.agent_search.dr.models import DRPromptPurpose from onyx.agents.agent_search.dr.sub_agents.web_search.clients.exa_client import ( ExaClient, ) +from onyx.agents.agent_search.dr.utils import get_chat_history_string from onyx.agents.agent_search.models import GraphConfig from onyx.db.engine.sql_engine import get_session_with_current_tenant from onyx.llm.interfaces import ( @@ -122,14 +131,13 @@ def llm_completion( model_name: str, temperature: float, messages: List[Dict[str, Any]], - tools: List[Dict[str, Any]], stream: bool = False, -) -> Any: +) -> litellm.ModelResponse: return litellm.completion( model=model_name, temperature=temperature, messages=messages, - tools=tools, + tools=None, stream=stream, ) @@ -267,6 +275,74 @@ async def amain(): return t +class ResearchScratchpad(BaseModel): + notes: List[dict] = [] + + +scratchpad = ResearchScratchpad() + + +@function_tool +def add_note(note: str, source_url: str | None = None): + """Store a factual note you want to cite later.""" + scratchpad.notes.append({"note": note, "source_url": source_url}) + return {"ok": True, "count": len(scratchpad.notes)} + + +@function_tool +def finalize_report(): + """Signal you're done researching. Return a structured, citation-rich report.""" + # The model should *compose* the report as the tool *result*, using notes in scratchpad. + # Some teams have the model return the full report as this tool's return value + # so the UI can detect completion cleanly. + return { + "status": "ready_to_render", + "notes_index": scratchpad.notes, # the model can read these to assemble citations + } + + +def construct_deep_research_agent(llm: LLM) -> Agent: + litellm_model = LitellmModel( + # If you have access, prefer OpenAI’s deep research-capable models: + # "o3-deep-research" or "o4-mini-deep-research" + # otherwise keep your current model and lean on the prompt + tools + model=getattr(llm.config, "model_name", "o4-mini-deep-research"), + api_key=llm.config.api_key, + ) + + DR_INSTRUCTIONS = """ +You are a deep-research agent. Work in explicit iterations: +1) PLAN: Decompose the user’s query into sub-questions and a step-by-step plan. +2) SEARCH: Use web_search (or web_search_many for fanout) to explore multiple angles. +3) FETCH: Use web_fetch for any promising URLs to extract specifics and quotes. +4) NOTE: After each useful find, call add_note(note, source_url) to save key facts. +5) REVISE: If evidence contradicts earlier assumptions, update your plan and continue. +6) FINALIZE: When confident, call finalize_report(). Your final answer must include: + - Clear, structured conclusions + - A short “How I searched” summary + - Inline citations to sources (with URLs) + - A bullet list of limitations/open questions +Guidelines: +- Prefer breadth-first exploration before deep dives. +- Compare sources and dates; prioritize recency for time-sensitive topics. +- Minimize redundancy by skimming before fetching. +- Think out loud in a compact way, but keep reasoning crisp. +""" + + return Agent( + name="Researcher", + instructions=DR_INSTRUCTIONS, + model=litellm_model, + tools=[web_search, web_fetch, add_note, finalize_report, internal_search], + model_settings=ModelSettings( + temperature=llm.config.temperature, + include_usage=True, + # optional: let model choose tools freely + # tool_choice="auto", # if supported by your LitellmModel wrapper + ), + ) + + def unified_event_stream( agent: Agent, messages: List[Dict[str, Any]], @@ -277,14 +353,21 @@ def unified_event_stream( ) -> Generator[Dict[str, Any], None, None]: bus: Queue = Queue() emitter = Emitter(bus) - start_run_in_thread( - agent=agent, - messages=messages, - cfg=cfg, - llm=llm, - search_tool=search_tool, - emitter=emitter, + # start_run_in_thread( + # agent=agent, + # messages=messages, + # cfg=cfg, + # llm=llm, + # search_tool=search_tool, + # emitter=emitter, + # ) + + t = threading.Thread( + target=thread_worker_dr_turn, + args=(messages, cfg, llm, emitter, search_tool), + daemon=True, ) + t.start() done = False while not done: pkt: StreamPacket = emitter.bus.get() @@ -310,11 +393,25 @@ def stream_chat_sync( ) -> Generator[Dict[str, Any], None, None]: bus: Queue = Queue() emitter = Emitter(bus) + agent = construct_deep_research_agent(llm) + return unified_event_stream( + agent=agent, + messages=messages, + cfg=cfg, + llm=llm, + emitter=emitter, + search_tool=search_tool, + ) + + +def construct_simple_agent( + llm: LLM, +) -> Agent: litellm_model = LitellmModel( model=llm.config.model_name, api_key=llm.config.api_key, ) - agent = Agent( + return Agent( name="Assistant", instructions=""" You are a helpful assistant that can search the web, fetch content from URLs, @@ -327,11 +424,86 @@ def stream_chat_sync( include_usage=True, # Track usage metrics ), ) - return unified_event_stream( - agent=agent, - messages=messages, - cfg=cfg, - llm=llm, - emitter=emitter, - search_tool=search_tool, + + +def thread_worker_dr_turn(messages, cfg, llm, emitter, search_tool): + try: + asyncio.run(dr_turn(messages, cfg, llm, emitter, search_tool)) + except Exception as e: + logger.error(f"Error in dr_turn: {e}", exc_info=e, stack_info=True) + emitter.emit(kind="done", data={"ok": False}) + + +async def dr_turn( + messages: List[Dict[str, Any]], + cfg: GraphConfig, + llm: LLM, + emitter: Emitter, + search_tool: SearchTool | None = None, +) -> None: + clarification = get_clarification(messages, cfg, llm, emitter, search_tool) + output = json.loads(clarification.choices[0].message.content) + clarification_output = ClarificationOutput(**output) + if clarification_output.clarification_needed: + emitter.emit(kind="agent", data=clarification_output.clarification_question) + emitter.emit(kind="done", data={"ok": True}) + return + + agent = construct_deep_research_agent(llm) + ctx = MyContext( + run_dependencies=RunDependencies( + search_tool=search_tool, + emitter=emitter, + ) + ) + # 1) start the streamed run (async) + streamed = Runner.run_streamed(agent, messages, context=ctx, max_turns=100) + + # 2) forward the agent’s async event stream + async for ev in streamed.stream_events(): + if isinstance(ev, RunItemStreamEvent): + pass + elif isinstance(ev, RawResponsesStreamEvent): + emitter.emit(kind="agent", data=ev.data.model_dump()) + + emitter.emit(kind="done", data={"ok": True}) + + +class ClarificationOutput(BaseModel): + clarification_question: str + clarification_needed: bool + + +def get_clarification( + messages: List[Dict[str, Any]], + cfg: GraphConfig, + llm: LLM, + emitter: Emitter, + search_tool: SearchTool | None = None, +) -> litellm.ModelResponse: + chat_history_string = ( + get_chat_history_string( + cfg.inputs.prompt_builder.message_history, + MAX_CHAT_HISTORY_MESSAGES, + ) + or "(No chat history yet available)" + ) + base_clarification_prompt = get_dr_prompt_orchestration_templates( + DRPromptPurpose.CLARIFICATION, + research_type=ResearchType.DEEP, + entity_types_string=None, + relationship_types_string=None, + available_tools={}, + ) + clarification_prompt = base_clarification_prompt.build( + question=messages[-1]["content"], + chat_history_string=chat_history_string, + ) + clarifier_prompt = prompt_with_handoff_instructions(clarification_prompt) + llm_response = llm_completion( + model_name=llm.config.model_name, + temperature=llm.config.temperature, + messages=[{"role": "user", "content": clarifier_prompt}], + stream=False, ) + return llm_response diff --git a/backend/onyx/evals/demo_agent.py b/backend/onyx/evals/demo_agent.py new file mode 100644 index 00000000000..ba8220cd6bf --- /dev/null +++ b/backend/onyx/evals/demo_agent.py @@ -0,0 +1,79 @@ +import asyncio +import os + +from agents import ModelSettings +from agents import run_demo_loop +from agents.agent import Agent +from agents.extensions.handoff_prompt import prompt_with_handoff_instructions +from agents.extensions.models.litellm_model import LitellmModel +from pydantic import BaseModel + +from onyx.agents.agent_search.dr.dr_prompt_builder import ( + get_dr_prompt_orchestration_templates, +) +from onyx.agents.agent_search.dr.enums import ResearchType +from onyx.agents.agent_search.dr.models import DRPromptPurpose + + +def construct_simple_agent() -> Agent: + litellm_model = LitellmModel( + model="gpt-4.1", + api_key=os.getenv("OPENAI_API_KEY"), + ) + return Agent( + name="Assistant", + instructions=""" + You are a helpful assistant that can search the web, fetch content from URLs, + and search internal databases. + """, + model=litellm_model, + tools=[], + model_settings=ModelSettings( + temperature=0.0, + include_usage=True, # Track usage metrics + ), + ) + + +class ClarificationOutput(BaseModel): + clarification_question: str + clarification_needed: bool + + +def construct_dr_agent() -> Agent: + simple_agent = construct_simple_agent() + litellm_model = LitellmModel( + model="gpt-4.1", + api_key=os.getenv("OPENAI_API_KEY"), + ) + base_clarification_prompt = get_dr_prompt_orchestration_templates( + DRPromptPurpose.CLARIFICATION, + research_type=ResearchType.DEEP, + entity_types_string=None, + relationship_types_string=None, + available_tools={}, + ) + clarification_prompt = base_clarification_prompt.build( + question="", + chat_history_string="", + ) + clarifier_prompt = prompt_with_handoff_instructions(clarification_prompt) + clarifier_agent = Agent( + name="Clarifier", + instructions=clarifier_prompt, + model=litellm_model, + tools=[], + output_type=ClarificationOutput, + handoffs=[simple_agent], + model_settings=ModelSettings(tool_choice="required"), + ) + return clarifier_agent + + +async def main() -> None: + agent = construct_dr_agent() + await run_demo_loop(agent) + + +if __name__ == "__main__": + asyncio.run(main()) diff --git a/backend/onyx/evals/tracing.py b/backend/onyx/evals/tracing.py index 1df631e6a2f..8c3d288adbc 100644 --- a/backend/onyx/evals/tracing.py +++ b/backend/onyx/evals/tracing.py @@ -1,6 +1,9 @@ from typing import Any import braintrust +from agents import set_trace_processors +from braintrust import init_logger +from braintrust.wrappers.openai import BraintrustTracingProcessor from braintrust_langchain import set_global_handler from braintrust_langchain.callbacks import BraintrustCallbackHandler @@ -33,3 +36,4 @@ def setup_braintrust() -> None: braintrust.set_masking_function(_mask) handler = BraintrustCallbackHandler() set_global_handler(handler) + set_trace_processors([BraintrustTracingProcessor(init_logger("openai-agent"))]) diff --git a/backend/onyx/prompts/dr_prompts.py b/backend/onyx/prompts/dr_prompts.py index 49d547e5776..1070090cafb 100644 --- a/backend/onyx/prompts/dr_prompts.py +++ b/backend/onyx/prompts/dr_prompts.py @@ -1160,7 +1160,7 @@ GET_CLARIFICATION_PROMPT = PromptTemplate( - f"""\ + """\ You are great at asking clarifying questions in case \ a base question is not as clear enough. Your task is to ask necessary clarification \ questions to the user, before the question is sent to the deep research agent. @@ -1183,17 +1183,6 @@ The tools and the entity and relationship types in the knowledge graph are simply provided \ as context for determining whether the question requires clarification. -Here is the question the user asked: -{SEPARATOR_LINE} ----question--- -{SEPARATOR_LINE} - -Here is the previous chat history (if any), which may contain relevant information \ -to answer the question: -{SEPARATOR_LINE} ----chat_history_string--- -{SEPARATOR_LINE} - NOTES: - you have to reason over this purely based on your intrinsic knowledge. - if clarifications are required, fill in 'true' for the "feedback_needed" field and \ From d59c85a7a201b01968cfcf5212dc6b39a7496c8b Mon Sep 17 00:00:00 2001 From: Richard Guan Date: Tue, 23 Sep 2025 15:30:10 -0700 Subject: [PATCH 14/34] . --- backend/onyx/chat/answer_scratchpad.py | 2 ++ backend/onyx/evals/tracing.py | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/backend/onyx/chat/answer_scratchpad.py b/backend/onyx/chat/answer_scratchpad.py index 0380433a96d..372c21abd8a 100644 --- a/backend/onyx/chat/answer_scratchpad.py +++ b/backend/onyx/chat/answer_scratchpad.py @@ -283,6 +283,7 @@ class ResearchScratchpad(BaseModel): @function_tool +@traced(name="add_note") def add_note(note: str, source_url: str | None = None): """Store a factual note you want to cite later.""" scratchpad.notes.append({"note": note, "source_url": source_url}) @@ -290,6 +291,7 @@ def add_note(note: str, source_url: str | None = None): @function_tool +@traced(name="finalize_report") def finalize_report(): """Signal you're done researching. Return a structured, citation-rich report.""" # The model should *compose* the report as the tool *result*, using notes in scratchpad. diff --git a/backend/onyx/evals/tracing.py b/backend/onyx/evals/tracing.py index 8c3d288adbc..0f48c71dd7c 100644 --- a/backend/onyx/evals/tracing.py +++ b/backend/onyx/evals/tracing.py @@ -36,4 +36,4 @@ def setup_braintrust() -> None: braintrust.set_masking_function(_mask) handler = BraintrustCallbackHandler() set_global_handler(handler) - set_trace_processors([BraintrustTracingProcessor(init_logger("openai-agent"))]) + set_trace_processors([BraintrustTracingProcessor(init_logger(BRAINTRUST_PROJECT))]) From 74ed3c146db31b5d7b2aecd7cd515fbbd44601be Mon Sep 17 00:00:00 2001 From: Richard Guan Date: Tue, 23 Sep 2025 18:17:24 -0700 Subject: [PATCH 15/34] . --- backend/onyx/chat/answer_scratchpad.py | 120 ++++++++++++++++++------- 1 file changed, 87 insertions(+), 33 deletions(-) diff --git a/backend/onyx/chat/answer_scratchpad.py b/backend/onyx/chat/answer_scratchpad.py index 372c21abd8a..e9d2979f226 100644 --- a/backend/onyx/chat/answer_scratchpad.py +++ b/backend/onyx/chat/answer_scratchpad.py @@ -2,14 +2,17 @@ import asyncio import json +import queue import threading from collections.abc import Generator +from collections.abc import Iterator from dataclasses import dataclass from queue import Queue from typing import Any from typing import cast from typing import Dict from typing import List +from typing import Optional import litellm from agents import Agent @@ -69,7 +72,6 @@ def short_tag(link: str, i: int) -> str: @function_tool -@traced(name="web_search") def web_search(query: str) -> str: """Search the web for information. This tool provides urls and short snippets, but does not fetch the full content of the urls.""" @@ -93,7 +95,6 @@ def web_search(query: str) -> str: @function_tool -@traced(name="web_fetch") def web_fetch(urls: List[str]) -> str: """Fetch the full contents of a list of URLs.""" exa_client = ExaClient() @@ -114,18 +115,6 @@ def web_fetch(urls: List[str]) -> str: return json.dumps({"results": out}) -@function_tool -@traced(name="reasoning") -def reasoning() -> str: - """Use this tool for reasoning. Powerful for complex questions and - tasks, or questions that require multiple steps to answer.""" - # Note: This is a simplified version. In the full implementation, - # we would need to pass the context through the agent's context system - return ( - "Reasoning tool - this would need to be implemented with proper context access" - ) - - @traced(name="llm_completion", type="llm") def llm_completion( model_name: str, @@ -143,7 +132,6 @@ def llm_completion( @function_tool -@traced(name="internal_search") def internal_search(context_wrapper: RunContextWrapper[MyContext], query: str) -> str: """Search internal company vector database for information. Sources include: @@ -283,7 +271,6 @@ class ResearchScratchpad(BaseModel): @function_tool -@traced(name="add_note") def add_note(note: str, source_url: str | None = None): """Store a factual note you want to cite later.""" scratchpad.notes.append({"note": note, "source_url": source_url}) @@ -291,7 +278,6 @@ def add_note(note: str, source_url: str | None = None): @function_tool -@traced(name="finalize_report") def finalize_report(): """Signal you're done researching. Return a structured, citation-rich report.""" # The model should *compose* the report as the tool *result*, using notes in scratchpad. @@ -330,7 +316,6 @@ def construct_deep_research_agent(llm: LLM) -> Agent: - Minimize redundancy by skimming before fetching. - Think out loud in a compact way, but keep reasoning crisp. """ - return Agent( name="Researcher", instructions=DR_INSTRUCTIONS, @@ -420,7 +405,7 @@ def construct_simple_agent( and search internal databases. """, model=litellm_model, - tools=[web_search, web_fetch, reasoning, internal_search], + tools=[web_search, web_fetch, internal_search], model_settings=ModelSettings( temperature=llm.config.temperature, include_usage=True, # Track usage metrics @@ -430,45 +415,114 @@ def construct_simple_agent( def thread_worker_dr_turn(messages, cfg, llm, emitter, search_tool): try: - asyncio.run(dr_turn(messages, cfg, llm, emitter, search_tool)) + dr_turn(messages, cfg, llm, emitter, search_tool) except Exception as e: logger.error(f"Error in dr_turn: {e}", exc_info=e, stack_info=True) emitter.emit(kind="done", data={"ok": False}) -async def dr_turn( +SENTINEL = object() + + +class StreamBridge: + """ + Spins up an asyncio loop in a background thread, starts Runner.run_streamed there, + consumes its async event stream, and exposes a blocking .events() iterator. + """ + + def __init__(self, agent, messages, ctx, max_turns: int = 100): + self.agent = agent + self.messages = messages + self.ctx = ctx + self.max_turns = max_turns + + self._q: "queue.Queue[object]" = queue.Queue() + self._loop: Optional[asyncio.AbstractEventLoop] = None + self._thread: Optional[threading.Thread] = None + self._streamed = None + + def start(self): + def worker(): + async def run_and_consume(): + # Create the streamed run *inside* the loop thread + self._streamed = Runner.run_streamed( + self.agent, + self.messages, + context=self.ctx, + max_turns=self.max_turns, + ) + try: + async for ev in self._streamed.stream_events(): + self._q.put(ev) + finally: + self._q.put(SENTINEL) + + # Each thread needs its own loop + self._loop = asyncio.new_event_loop() + asyncio.set_event_loop(self._loop) + try: + self._loop.run_until_complete(run_and_consume()) + finally: + self._loop.close() + + self._thread = threading.Thread(target=worker, daemon=True) + self._thread.start() + return self + + def events(self) -> Iterator[object]: + while True: + ev = self._q.get() + if ev is SENTINEL: + break + yield ev + + def cancel(self): + # Post a cancellation to the loop thread safely + if self._loop and self._streamed: + + def _do_cancel(): + try: + self._streamed.cancel() + except Exception: + pass + + self._loop.call_soon_threadsafe(_do_cancel) + + +def dr_turn( messages: List[Dict[str, Any]], cfg: GraphConfig, llm: LLM, - emitter: Emitter, + turn_event_stream_emitter: Emitter, # TurnEventStream is the primary output of the turn search_tool: SearchTool | None = None, ) -> None: - clarification = get_clarification(messages, cfg, llm, emitter, search_tool) + clarification = get_clarification( + messages, cfg, llm, turn_event_stream_emitter, search_tool + ) output = json.loads(clarification.choices[0].message.content) clarification_output = ClarificationOutput(**output) if clarification_output.clarification_needed: - emitter.emit(kind="agent", data=clarification_output.clarification_question) - emitter.emit(kind="done", data={"ok": True}) + turn_event_stream_emitter.emit( + kind="agent", data=clarification_output.clarification_question + ) + turn_event_stream_emitter.emit(kind="done", data={"ok": True}) return agent = construct_deep_research_agent(llm) ctx = MyContext( run_dependencies=RunDependencies( search_tool=search_tool, - emitter=emitter, + emitter=turn_event_stream_emitter, ) ) - # 1) start the streamed run (async) - streamed = Runner.run_streamed(agent, messages, context=ctx, max_turns=100) - - # 2) forward the agent’s async event stream - async for ev in streamed.stream_events(): + bridge = StreamBridge(agent, messages, ctx, max_turns=100).start() + for ev in bridge.events(): if isinstance(ev, RunItemStreamEvent): pass elif isinstance(ev, RawResponsesStreamEvent): - emitter.emit(kind="agent", data=ev.data.model_dump()) + turn_event_stream_emitter.emit(kind="agent", data=ev.data.model_dump()) - emitter.emit(kind="done", data={"ok": True}) + turn_event_stream_emitter.emit(kind="done", data={"ok": True}) class ClarificationOutput(BaseModel): From 7cb01e57dfae9be6a3104738a98cec81a54e3b95 Mon Sep 17 00:00:00 2001 From: Richard Guan Date: Tue, 23 Sep 2025 18:31:11 -0700 Subject: [PATCH 16/34] . --- backend/onyx/chat/answer_scratchpad.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/backend/onyx/chat/answer_scratchpad.py b/backend/onyx/chat/answer_scratchpad.py index e9d2979f226..e8e64baaf02 100644 --- a/backend/onyx/chat/answer_scratchpad.py +++ b/backend/onyx/chat/answer_scratchpad.py @@ -301,7 +301,7 @@ def construct_deep_research_agent(llm: LLM) -> Agent: DR_INSTRUCTIONS = """ You are a deep-research agent. Work in explicit iterations: 1) PLAN: Decompose the user’s query into sub-questions and a step-by-step plan. -2) SEARCH: Use web_search (or web_search_many for fanout) to explore multiple angles. +2) SEARCH: Use web_search to explore multiple angles, fanning out and searching in parallel. 3) FETCH: Use web_fetch for any promising URLs to extract specifics and quotes. 4) NOTE: After each useful find, call add_note(note, source_url) to save key facts. 5) REVISE: If evidence contradicts earlier assumptions, update your plan and continue. @@ -324,6 +324,7 @@ def construct_deep_research_agent(llm: LLM) -> Agent: model_settings=ModelSettings( temperature=llm.config.temperature, include_usage=True, + parallel_tool_calls=True, # optional: let model choose tools freely # tool_choice="auto", # if supported by your LitellmModel wrapper ), From ae4fafecbc9e7a07f0a1207aa7c555b0f947898a Mon Sep 17 00:00:00 2001 From: Richard Guan Date: Wed, 24 Sep 2025 10:46:47 -0700 Subject: [PATCH 17/34] . --- backend/onyx/chat/answer_scratchpad.py | 43 +++++++++++++++++++++++--- 1 file changed, 39 insertions(+), 4 deletions(-) diff --git a/backend/onyx/chat/answer_scratchpad.py b/backend/onyx/chat/answer_scratchpad.py index e8e64baaf02..7ff67ca8cc2 100644 --- a/backend/onyx/chat/answer_scratchpad.py +++ b/backend/onyx/chat/answer_scratchpad.py @@ -1,6 +1,7 @@ from __future__ import annotations import asyncio +import contextvars import json import queue import threading @@ -350,9 +351,20 @@ def unified_event_stream( # emitter=emitter, # ) + # Capture current context for propagation to worker thread + current_context = contextvars.copy_context() + t = threading.Thread( - target=thread_worker_dr_turn, - args=(messages, cfg, llm, emitter, search_tool), + target=current_context.run, + args=( + thread_worker_dr_turn, + messages, + cfg, + llm, + emitter, + search_tool, + None, + ), # eval_context=None for now daemon=True, ) t.start() @@ -414,9 +426,20 @@ def construct_simple_agent( ) -def thread_worker_dr_turn(messages, cfg, llm, emitter, search_tool): +def thread_worker_dr_turn(messages, cfg, llm, emitter, search_tool, eval_context=None): + """ + Worker function for deep research turn that runs in a separate thread. + + Args: + messages: List of messages for the conversation + cfg: Graph configuration + llm: Language model instance + emitter: Event emitter for streaming responses + search_tool: Search tool instance (optional) + eval_context: Evaluation context to be propagated to the worker thread + """ try: - dr_turn(messages, cfg, llm, emitter, search_tool) + dr_turn(messages, cfg, llm, emitter, search_tool, eval_context) except Exception as e: logger.error(f"Error in dr_turn: {e}", exc_info=e, stack_info=True) emitter.emit(kind="done", data={"ok": False}) @@ -496,7 +519,19 @@ def dr_turn( llm: LLM, turn_event_stream_emitter: Emitter, # TurnEventStream is the primary output of the turn search_tool: SearchTool | None = None, + eval_context=None, ) -> None: + """ + Execute a deep research turn with evaluation context support. + + Args: + messages: List of messages for the conversation + cfg: Graph configuration + llm: Language model instance + turn_event_stream_emitter: Event emitter for streaming responses + search_tool: Search tool instance (optional) + eval_context: Evaluation context for the turn (optional) + """ clarification = get_clarification( messages, cfg, llm, turn_event_stream_emitter, search_tool ) From a370cc2ba9fa93fa20825128ec17a09b3c7c1c92 Mon Sep 17 00:00:00 2001 From: Richard Guan Date: Wed, 24 Sep 2025 15:53:23 -0700 Subject: [PATCH 18/34] . --- backend/onyx/chat/answer_scratchpad.py | 17 ++++++++++++++++- backend/onyx/evals/tracing.py | 3 ++- 2 files changed, 18 insertions(+), 2 deletions(-) diff --git a/backend/onyx/chat/answer_scratchpad.py b/backend/onyx/chat/answer_scratchpad.py index 7ff67ca8cc2..f08c70307af 100644 --- a/backend/onyx/chat/answer_scratchpad.py +++ b/backend/onyx/chat/answer_scratchpad.py @@ -17,6 +17,7 @@ import litellm from agents import Agent +from agents import AgentHooks from agents import function_tool from agents import ModelSettings from agents import RunContextWrapper @@ -290,12 +291,25 @@ def finalize_report(): } +class VerboseHooks(AgentHooks[Any]): + async def on_llm_start( + self, + context: RunContextWrapper[Any], + agent: Agent[Any], + system_prompt: Optional[str], + input_items: List[dict], # alias: TResponseInputItem + ) -> None: + print(f"[{agent.name}] LLM start") + print("system_prompt:", system_prompt) + print("usage so far:", context.usage.total_tokens) + + def construct_deep_research_agent(llm: LLM) -> Agent: litellm_model = LitellmModel( # If you have access, prefer OpenAI’s deep research-capable models: # "o3-deep-research" or "o4-mini-deep-research" # otherwise keep your current model and lean on the prompt + tools - model=getattr(llm.config, "model_name", "o4-mini-deep-research"), + model=llm.config.model_name, api_key=llm.config.api_key, ) @@ -329,6 +343,7 @@ def construct_deep_research_agent(llm: LLM) -> Agent: # optional: let model choose tools freely # tool_choice="auto", # if supported by your LitellmModel wrapper ), + hooks=VerboseHooks(), ) diff --git a/backend/onyx/evals/tracing.py b/backend/onyx/evals/tracing.py index 0f48c71dd7c..2383ae3f9f3 100644 --- a/backend/onyx/evals/tracing.py +++ b/backend/onyx/evals/tracing.py @@ -1,3 +1,4 @@ +import os from typing import Any import braintrust @@ -10,7 +11,7 @@ from onyx.configs.app_configs import BRAINTRUST_API_KEY from onyx.configs.app_configs import BRAINTRUST_PROJECT -MASKING_LENGTH = 20000 +MASKING_LENGTH = int(os.environ.get("BRAINTRUST_MASKING_LENGTH", "20000")) def _truncate_str(s: str) -> str: From 89e770ebf2f29bf3a6d2e31a4eee7a17d9b14075 Mon Sep 17 00:00:00 2001 From: Richard Guan Date: Wed, 24 Sep 2025 18:24:38 -0700 Subject: [PATCH 19/34] . --- backend/onyx/chat/answer_scratchpad.py | 67 ++++++++++++++++++++++---- 1 file changed, 58 insertions(+), 9 deletions(-) diff --git a/backend/onyx/chat/answer_scratchpad.py b/backend/onyx/chat/answer_scratchpad.py index f08c70307af..f2540ed35b9 100644 --- a/backend/onyx/chat/answer_scratchpad.py +++ b/backend/onyx/chat/answer_scratchpad.py @@ -19,11 +19,14 @@ from agents import Agent from agents import AgentHooks from agents import function_tool +from agents import handoff from agents import ModelSettings from agents import RunContextWrapper from agents import Runner from agents.extensions.handoff_prompt import prompt_with_handoff_instructions +from agents.extensions.handoff_prompt import RECOMMENDED_PROMPT_PREFIX from agents.extensions.models.litellm_model import LitellmModel +from agents.handoffs import HandoffInputData from agents.stream_events import RawResponsesStreamEvent from agents.stream_events import RunItemStreamEvent from braintrust import traced @@ -58,6 +61,7 @@ @dataclass class RunDependencies: emitter: Emitter + llm: LLM search_tool: SearchTool | None = None @@ -66,6 +70,7 @@ class MyContext: """Context class to hold search tool and other dependencies""" run_dependencies: RunDependencies | None = None + needs_compaction: bool = False def short_tag(link: str, i: int) -> str: @@ -128,7 +133,7 @@ def llm_completion( model=model_name, temperature=temperature, messages=messages, - tools=None, + tools=[], stream=stream, ) @@ -243,6 +248,7 @@ async def amain(): run_dependencies=RunDependencies( search_tool=search_tool, emitter=emitter, + llm=llm, ) ) # 1) start the streamed run (async) @@ -291,17 +297,39 @@ def finalize_report(): } -class VerboseHooks(AgentHooks[Any]): +class CompactionHooks(AgentHooks[Any]): async def on_llm_start( self, - context: RunContextWrapper[Any], + context: RunContextWrapper[MyContext], agent: Agent[Any], system_prompt: Optional[str], - input_items: List[dict], # alias: TResponseInputItem + input_items: List[dict], ) -> None: print(f"[{agent.name}] LLM start") print("system_prompt:", system_prompt) print("usage so far:", context.usage.total_tokens) + usage = context.usage.total_tokens + if usage > 10000: + context.context.needs_compaction = True + + +def compaction_input_filter(input_data: HandoffInputData): + filtered_messages = [] + for msg in input_data.input_history[:-1]: + if isinstance(msg, dict) and msg.get("content") is not None: + # Convert tool messages to user messages to avoid API errors + if msg.get("role") == "tool": + filtered_msg = { + "role": "user", + "content": f"Tool response: {msg.get('content', '')}", + } + filtered_messages.append(filtered_msg) + else: + filtered_messages.append(msg) + + # Only proceed with compaction if we have valid messages + if filtered_messages: + return [filtered_messages[-1]] def construct_deep_research_agent(llm: LLM) -> Agent: @@ -313,7 +341,8 @@ def construct_deep_research_agent(llm: LLM) -> Agent: api_key=llm.config.api_key, ) - DR_INSTRUCTIONS = """ + DR_INSTRUCTIONS = f""" + {RECOMMENDED_PROMPT_PREFIX} You are a deep-research agent. Work in explicit iterations: 1) PLAN: Decompose the user’s query into sub-questions and a step-by-step plan. 2) SEARCH: Use web_search to explore multiple angles, fanning out and searching in parallel. @@ -343,7 +372,7 @@ def construct_deep_research_agent(llm: LLM) -> Agent: # optional: let model choose tools freely # tool_choice="auto", # if supported by your LitellmModel wrapper ), - hooks=VerboseHooks(), + hooks=CompactionHooks(), ) @@ -558,15 +587,35 @@ def dr_turn( ) turn_event_stream_emitter.emit(kind="done", data={"ok": True}) return - - agent = construct_deep_research_agent(llm) + dr_agent = construct_deep_research_agent(llm) + compactor_agent = Agent( + name="Compactor", + instructions=f""" + {RECOMMENDED_PROMPT_PREFIX} + Summarize the full conversation so far into JSON with keys:\n + - summary: concise timeline of what happened so far\n + - facts: bullet list of stable facts (IDs, URLs, constraints)\n + - open_questions: bullet list of TODOs / follow-ups\n + Set already_compacted=true to prevent immediate re-compaction. + Then hand off to deep research agent. + """, + output_type=dict, + handoffs=[ + handoff( + agent=dr_agent, + input_filter=compaction_input_filter, + ) + ], + tool_use_behavior="stop_on_first_tool", + ) ctx = MyContext( run_dependencies=RunDependencies( search_tool=search_tool, emitter=turn_event_stream_emitter, + llm=llm, ) ) - bridge = StreamBridge(agent, messages, ctx, max_turns=100).start() + bridge = StreamBridge(compactor_agent, messages, ctx, max_turns=100).start() for ev in bridge.events(): if isinstance(ev, RunItemStreamEvent): pass From 84726d3b2274cc1f83ab8d955734c3e3d6221bc6 Mon Sep 17 00:00:00 2001 From: Richard Guan Date: Thu, 25 Sep 2025 12:18:57 -0700 Subject: [PATCH 20/34] . --- backend/onyx/chat/answer_scratchpad.py | 149 +++++++++++++++++-------- 1 file changed, 102 insertions(+), 47 deletions(-) diff --git a/backend/onyx/chat/answer_scratchpad.py b/backend/onyx/chat/answer_scratchpad.py index f2540ed35b9..9cc64cfb227 100644 --- a/backend/onyx/chat/answer_scratchpad.py +++ b/backend/onyx/chat/answer_scratchpad.py @@ -3,6 +3,7 @@ import asyncio import contextvars import json +import os import queue import threading from collections.abc import Generator @@ -19,7 +20,6 @@ from agents import Agent from agents import AgentHooks from agents import function_tool -from agents import handoff from agents import ModelSettings from agents import RunContextWrapper from agents import Runner @@ -30,6 +30,7 @@ from agents.stream_events import RawResponsesStreamEvent from agents.stream_events import RunItemStreamEvent from braintrust import traced +from openai.types import Reasoning from pydantic import BaseModel from onyx.agents.agent_search.dr.constants import MAX_CHAT_HISTORY_MESSAGES @@ -129,12 +130,12 @@ def llm_completion( messages: List[Dict[str, Any]], stream: bool = False, ) -> litellm.ModelResponse: - return litellm.completion( + return litellm.responses( model=model_name, - temperature=temperature, - messages=messages, + input=messages, tools=[], stream=stream, + reasoning=litellm.Reasoning(effort="medium", summary="detailed"), ) @@ -359,6 +360,7 @@ def construct_deep_research_agent(llm: LLM) -> Agent: - Compare sources and dates; prioritize recency for time-sensitive topics. - Minimize redundancy by skimming before fetching. - Think out loud in a compact way, but keep reasoning crisp. +- If context exceeds 10000 tokens, handoff to the compactor agent. """ return Agent( name="Researcher", @@ -377,7 +379,6 @@ def construct_deep_research_agent(llm: LLM) -> Agent: def unified_event_stream( - agent: Agent, messages: List[Dict[str, Any]], cfg: GraphConfig, llm: LLM, @@ -386,28 +387,17 @@ def unified_event_stream( ) -> Generator[Dict[str, Any], None, None]: bus: Queue = Queue() emitter = Emitter(bus) - # start_run_in_thread( - # agent=agent, - # messages=messages, - # cfg=cfg, - # llm=llm, - # search_tool=search_tool, - # emitter=emitter, - # ) - - # Capture current context for propagation to worker thread current_context = contextvars.copy_context() - t = threading.Thread( target=current_context.run, args=( - thread_worker_dr_turn, + # thread_worker_dr_turn, + thread_worker_simple_turn, messages, cfg, llm, emitter, search_tool, - None, ), # eval_context=None for now daemon=True, ) @@ -437,9 +427,7 @@ def stream_chat_sync( ) -> Generator[Dict[str, Any], None, None]: bus: Queue = Queue() emitter = Emitter(bus) - agent = construct_deep_research_agent(llm) return unified_event_stream( - agent=agent, messages=messages, cfg=cfg, llm=llm, @@ -452,25 +440,29 @@ def construct_simple_agent( llm: LLM, ) -> Agent: litellm_model = LitellmModel( - model=llm.config.model_name, + model="o3-mini", api_key=llm.config.api_key, ) return Agent( name="Assistant", instructions=""" You are a helpful assistant that can search the web, fetch content from URLs, - and search internal databases. + and search internal databases. Please do some reasoning and then return your answer. """, model=litellm_model, tools=[web_search, web_fetch, internal_search], model_settings=ModelSettings( - temperature=llm.config.temperature, + temperature=0.0, include_usage=True, # Track usage metrics + reasoning=Reasoning( + effort="medium", summary="detailed", generate_summary="detailed" + ), + verbose=True, ), ) -def thread_worker_dr_turn(messages, cfg, llm, emitter, search_tool, eval_context=None): +def thread_worker_dr_turn(messages, cfg, llm, emitter, search_tool): """ Worker function for deep research turn that runs in a separate thread. @@ -483,12 +475,26 @@ def thread_worker_dr_turn(messages, cfg, llm, emitter, search_tool, eval_context eval_context: Evaluation context to be propagated to the worker thread """ try: - dr_turn(messages, cfg, llm, emitter, search_tool, eval_context) + dr_turn(messages, cfg, llm, emitter, search_tool) except Exception as e: logger.error(f"Error in dr_turn: {e}", exc_info=e, stack_info=True) emitter.emit(kind="done", data={"ok": False}) +def thread_worker_simple_turn(messages, cfg, llm, emitter, search_tool): + try: + simple_turn( + messages=messages, + cfg=cfg, + llm=llm, + turn_event_stream_emitter=emitter, + search_tool=search_tool, + ) + except Exception as e: + logger.error(f"Error in simple_turn: {e}", exc_info=e, stack_info=True) + emitter.emit(kind="done", data={"ok": False}) + + SENTINEL = object() @@ -557,13 +563,48 @@ def _do_cancel(): self._loop.call_soon_threadsafe(_do_cancel) +def simple_turn( + messages: List[Dict[str, Any]], + cfg: GraphConfig, + llm: LLM, + turn_event_stream_emitter: Emitter, + search_tool: SearchTool | None = None, +) -> None: + llm_response = llm_completion( + model_name="gpt-5-mini", + temperature=0.0, + messages=messages, + stream=True, + ) + llm_response.json() + simple_agent = construct_simple_agent(llm) + ctx = MyContext( + run_dependencies=RunDependencies( + search_tool=search_tool, emitter=turn_event_stream_emitter, llm=llm + ) + ) + bridge = StreamBridge(simple_agent, messages, ctx, max_turns=100).start() + for ev in bridge.events(): + if isinstance(ev, RunItemStreamEvent): + print("RUN ITEM STREAM EVENT!") + if ev.name == "reasoning_item_created": + print("REASONING!") + turn_event_stream_emitter.emit( + kind="reasoning", data=ev.item.raw_item.model_dump() + ) + elif isinstance(ev, RawResponsesStreamEvent): + print("RAW RESPONSES STREAM EVENT!") + print(ev.type) + turn_event_stream_emitter.emit(kind="agent", data=ev.data.model_dump()) + turn_event_stream_emitter.emit(kind="done", data={"ok": True}) + + def dr_turn( messages: List[Dict[str, Any]], cfg: GraphConfig, llm: LLM, turn_event_stream_emitter: Emitter, # TurnEventStream is the primary output of the turn search_tool: SearchTool | None = None, - eval_context=None, ) -> None: """ Execute a deep research turn with evaluation context support. @@ -588,26 +629,6 @@ def dr_turn( turn_event_stream_emitter.emit(kind="done", data={"ok": True}) return dr_agent = construct_deep_research_agent(llm) - compactor_agent = Agent( - name="Compactor", - instructions=f""" - {RECOMMENDED_PROMPT_PREFIX} - Summarize the full conversation so far into JSON with keys:\n - - summary: concise timeline of what happened so far\n - - facts: bullet list of stable facts (IDs, URLs, constraints)\n - - open_questions: bullet list of TODOs / follow-ups\n - Set already_compacted=true to prevent immediate re-compaction. - Then hand off to deep research agent. - """, - output_type=dict, - handoffs=[ - handoff( - agent=dr_agent, - input_filter=compaction_input_filter, - ) - ], - tool_use_behavior="stop_on_first_tool", - ) ctx = MyContext( run_dependencies=RunDependencies( search_tool=search_tool, @@ -615,7 +636,7 @@ def dr_turn( llm=llm, ) ) - bridge = StreamBridge(compactor_agent, messages, ctx, max_turns=100).start() + bridge = StreamBridge(dr_agent, messages, ctx, max_turns=100).start() for ev in bridge.events(): if isinstance(ev, RunItemStreamEvent): pass @@ -663,3 +684,37 @@ def get_clarification( stream=False, ) return llm_response + + +if __name__ == "__main__": + messages = [ + { + "role": "user", + "content": """ + Let $N$ denote the number of ordered triples of positive integers $(a, b, c)$ such that $a, b, c + \\leq 3^6$ and $a^3 + b^3 + c^3$ is a multiple of $3^7$. Find the remainder when $N$ is divided by $1000$. + """, + } + ] + # OpenAI reasoning is not supported yet due to: https://github.com/BerriAI/litellm/pull/14117 + reasoning_agent = Agent( + name="Reasoning", + instructions="You are a reasoning agent. You are given a question and you need to reason about it.", + model=LitellmModel( + model="gpt-5-mini", + api_key=os.getenv("OPENAI_API_KEY"), + ), + tools=[], + model_settings=ModelSettings( + temperature=0.0, + reasoning=Reasoning(effort="medium", summary="detailed"), + ), + ) + llm_response = llm_completion( + model_name="gpt-5-mini", + temperature=0.0, + messages=messages, + stream=False, + ) + x = llm_response.json() + print(x) From a260368bc9a484eeb612fe8ac291b75b0a5bb990 Mon Sep 17 00:00:00 2001 From: Richard Guan Date: Thu, 25 Sep 2025 14:38:59 -0700 Subject: [PATCH 21/34] . --- backend/onyx/chat/answer_scratchpad.py | 186 ++---------------- backend/onyx/chat/process_message.py | 12 +- backend/onyx/chat/turn/__init__.py | 1 + backend/onyx/chat/turn/fast_chat_turn.py | 56 ++++++ backend/onyx/chat/turn/infra/__init__.py | 1 + .../chat/turn/infra/chat_turn_event_stream.py | 150 ++++++++++++++ .../turn/infra/chat_turn_orchestration.py | 62 ++++++ .../tool_implementations_v2/web_fetch.py} | 0 .../tool_implementations_v2/web_search.py | 0 9 files changed, 291 insertions(+), 177 deletions(-) create mode 100644 backend/onyx/chat/turn/__init__.py create mode 100644 backend/onyx/chat/turn/fast_chat_turn.py create mode 100644 backend/onyx/chat/turn/infra/__init__.py create mode 100644 backend/onyx/chat/turn/infra/chat_turn_event_stream.py create mode 100644 backend/onyx/chat/turn/infra/chat_turn_orchestration.py rename backend/onyx/{chat/answer_cli.py => tools/tool_implementations_v2/web_fetch.py} (100%) create mode 100644 backend/onyx/tools/tool_implementations_v2/web_search.py diff --git a/backend/onyx/chat/answer_scratchpad.py b/backend/onyx/chat/answer_scratchpad.py index 9cc64cfb227..ff303e73f2a 100644 --- a/backend/onyx/chat/answer_scratchpad.py +++ b/backend/onyx/chat/answer_scratchpad.py @@ -1,15 +1,9 @@ from __future__ import annotations import asyncio -import contextvars import json import os -import queue import threading -from collections.abc import Generator -from collections.abc import Iterator -from dataclasses import dataclass -from queue import Queue from typing import Any from typing import cast from typing import Dict @@ -44,6 +38,11 @@ ) from onyx.agents.agent_search.dr.utils import get_chat_history_string from onyx.agents.agent_search.models import GraphConfig +from onyx.chat.turn import fast_chat_turn +from onyx.chat.turn.fast_chat_turn import MyContext +from onyx.chat.turn.infra.chat_turn_event_stream import Emitter +from onyx.chat.turn.infra.chat_turn_event_stream import OnyxRunner +from onyx.chat.turn.infra.chat_turn_event_stream import RunDependencies from onyx.db.engine.sql_engine import get_session_with_current_tenant from onyx.llm.interfaces import ( LLM, @@ -59,21 +58,6 @@ logger = setup_logger() -@dataclass -class RunDependencies: - emitter: Emitter - llm: LLM - search_tool: SearchTool | None = None - - -@dataclass -class MyContext: - """Context class to hold search tool and other dependencies""" - - run_dependencies: RunDependencies | None = None - needs_compaction: bool = False - - def short_tag(link: str, i: int) -> str: # Stable, readable; index keeps it deterministic across a batch return f"S{i+1}" @@ -217,23 +201,6 @@ def _convert_to_packet_obj(packet: Dict[str, Any]) -> Any | None: return None -# stream_bus.py -@dataclass -class StreamPacket: - kind: str # "agent" | "tool-progress" | "done" - payload: Dict[str, Any] = None - - -class Emitter: - """Use this inside tools to emit arbitrary UI progress.""" - - def __init__(self, bus: Queue): - self.bus = bus - - def emit(self, kind: str, data: Dict[str, Any]) -> None: - self.bus.put(StreamPacket(kind=kind, payload=data)) - - # If we want durable execution in the future, we can replace this with a temporal call def start_run_in_thread( agent: Agent, @@ -378,64 +345,6 @@ def construct_deep_research_agent(llm: LLM) -> Agent: ) -def unified_event_stream( - messages: List[Dict[str, Any]], - cfg: GraphConfig, - llm: LLM, - emitter: Emitter, - search_tool: SearchTool | None = None, -) -> Generator[Dict[str, Any], None, None]: - bus: Queue = Queue() - emitter = Emitter(bus) - current_context = contextvars.copy_context() - t = threading.Thread( - target=current_context.run, - args=( - # thread_worker_dr_turn, - thread_worker_simple_turn, - messages, - cfg, - llm, - emitter, - search_tool, - ), # eval_context=None for now - daemon=True, - ) - t.start() - done = False - while not done: - pkt: StreamPacket = emitter.bus.get() - if pkt.kind == "done": - done = True - else: - # Convert packet to PacketObj when possible - packet_obj = _convert_to_packet_obj(pkt.payload) - if packet_obj: - # Convert PacketObj back to dict for compatibility - yield packet_obj.model_dump() - else: - # Fallback to original payload - yield pkt.payload - - -# This should be close to the API -def stream_chat_sync( - messages: List[Dict[str, Any]], - cfg: GraphConfig, - llm: LLM, - search_tool: SearchTool | None = None, -) -> Generator[Dict[str, Any], None, None]: - bus: Queue = Queue() - emitter = Emitter(bus) - return unified_event_stream( - messages=messages, - cfg=cfg, - llm=llm, - emitter=emitter, - search_tool=search_tool, - ) - - def construct_simple_agent( llm: LLM, ) -> Agent: @@ -483,86 +392,19 @@ def thread_worker_dr_turn(messages, cfg, llm, emitter, search_tool): def thread_worker_simple_turn(messages, cfg, llm, emitter, search_tool): try: - simple_turn( + fast_chat_turn.fast_chat_turn( messages=messages, - cfg=cfg, - llm=llm, - turn_event_stream_emitter=emitter, - search_tool=search_tool, + dependencies=RunDependencies( + emitter=emitter, + llm=llm, + search_tool=search_tool, + ), ) except Exception as e: logger.error(f"Error in simple_turn: {e}", exc_info=e, stack_info=True) emitter.emit(kind="done", data={"ok": False}) -SENTINEL = object() - - -class StreamBridge: - """ - Spins up an asyncio loop in a background thread, starts Runner.run_streamed there, - consumes its async event stream, and exposes a blocking .events() iterator. - """ - - def __init__(self, agent, messages, ctx, max_turns: int = 100): - self.agent = agent - self.messages = messages - self.ctx = ctx - self.max_turns = max_turns - - self._q: "queue.Queue[object]" = queue.Queue() - self._loop: Optional[asyncio.AbstractEventLoop] = None - self._thread: Optional[threading.Thread] = None - self._streamed = None - - def start(self): - def worker(): - async def run_and_consume(): - # Create the streamed run *inside* the loop thread - self._streamed = Runner.run_streamed( - self.agent, - self.messages, - context=self.ctx, - max_turns=self.max_turns, - ) - try: - async for ev in self._streamed.stream_events(): - self._q.put(ev) - finally: - self._q.put(SENTINEL) - - # Each thread needs its own loop - self._loop = asyncio.new_event_loop() - asyncio.set_event_loop(self._loop) - try: - self._loop.run_until_complete(run_and_consume()) - finally: - self._loop.close() - - self._thread = threading.Thread(target=worker, daemon=True) - self._thread.start() - return self - - def events(self) -> Iterator[object]: - while True: - ev = self._q.get() - if ev is SENTINEL: - break - yield ev - - def cancel(self): - # Post a cancellation to the loop thread safely - if self._loop and self._streamed: - - def _do_cancel(): - try: - self._streamed.cancel() - except Exception: - pass - - self._loop.call_soon_threadsafe(_do_cancel) - - def simple_turn( messages: List[Dict[str, Any]], cfg: GraphConfig, @@ -571,7 +413,7 @@ def simple_turn( search_tool: SearchTool | None = None, ) -> None: llm_response = llm_completion( - model_name="gpt-5-mini", + model_name="gpt-4o-mini", temperature=0.0, messages=messages, stream=True, @@ -583,7 +425,7 @@ def simple_turn( search_tool=search_tool, emitter=turn_event_stream_emitter, llm=llm ) ) - bridge = StreamBridge(simple_agent, messages, ctx, max_turns=100).start() + bridge = OnyxRunner(simple_agent, messages, ctx, max_turns=100).start() for ev in bridge.events(): if isinstance(ev, RunItemStreamEvent): print("RUN ITEM STREAM EVENT!") @@ -636,7 +478,7 @@ def dr_turn( llm=llm, ) ) - bridge = StreamBridge(dr_agent, messages, ctx, max_turns=100).start() + bridge = OnyxRunner(dr_agent, messages, ctx, max_turns=100).start() for ev in bridge.events(): if isinstance(ev, RunItemStreamEvent): pass diff --git a/backend/onyx/chat/process_message.py b/backend/onyx/chat/process_message.py index 32e95e7d2b0..5f87bf304b1 100644 --- a/backend/onyx/chat/process_message.py +++ b/backend/onyx/chat/process_message.py @@ -12,7 +12,6 @@ from onyx.agents.agent_search.orchestration.nodes.call_tool import ToolCallException from onyx.chat.answer import Answer -from onyx.chat.answer_scratchpad import stream_chat_sync from onyx.chat.chat_utils import create_chat_chain from onyx.chat.chat_utils import create_temporary_persona from onyx.chat.chat_utils import process_kg_commands @@ -30,6 +29,8 @@ from onyx.chat.prompt_builder.answer_prompt_builder import AnswerPromptBuilder from onyx.chat.prompt_builder.answer_prompt_builder import default_build_system_message from onyx.chat.prompt_builder.answer_prompt_builder import default_build_user_message +from onyx.chat.turn import fast_chat_turn +from onyx.chat.turn.infra.chat_turn_event_stream import RunDependencies from onyx.chat.user_files.parse_user_files import parse_user_files from onyx.configs.chat_configs import CHAT_TARGET_CHUNK_PERCENTAGE from onyx.configs.chat_configs import DISABLE_LLM_CHOOSE_SEARCH @@ -704,11 +705,12 @@ def stream_chat_message_objects( for message in answer.graph_inputs.prompt_builder.build() if message.type != "system" ] - yield from stream_chat_sync( + yield from fast_chat_turn.fast_chat_turn( messages=system_message + other_messages, - cfg=answer.graph_config, - llm=answer.graph_tooling.primary_llm, - search_tool=answer.graph_tooling.search_tool, + dependencies=RunDependencies( + llm=answer.graph_tooling.primary_llm, + search_tool=answer.graph_tooling.search_tool, + ), ) except ValueError as e: diff --git a/backend/onyx/chat/turn/__init__.py b/backend/onyx/chat/turn/__init__.py new file mode 100644 index 00000000000..c491083c31e --- /dev/null +++ b/backend/onyx/chat/turn/__init__.py @@ -0,0 +1 @@ +# Turn module for chat functionality diff --git a/backend/onyx/chat/turn/fast_chat_turn.py b/backend/onyx/chat/turn/fast_chat_turn.py new file mode 100644 index 00000000000..306a42b0fea --- /dev/null +++ b/backend/onyx/chat/turn/fast_chat_turn.py @@ -0,0 +1,56 @@ +from dataclasses import dataclass + +from agents import Agent +from agents import ModelSettings +from agents import RawResponsesStreamEvent +from agents import RunItemStreamEvent +from agents.extensions.models.litellm_model import LitellmModel + +from onyx.chat.turn.infra.chat_turn_event_stream import OnyxRunner +from onyx.chat.turn.infra.chat_turn_event_stream import RunDependencies +from onyx.chat.turn.infra.chat_turn_orchestration import unified_event_stream + + +@dataclass +class MyContext: + """Context class to hold search tool and other dependencies""" + + run_dependencies: RunDependencies | None = None + needs_compaction: bool = False + + +# TODO: Dependency injection? +@unified_event_stream +def fast_chat_turn(messages: list[dict], dependencies: RunDependencies) -> None: + ctx = MyContext( + run_dependencies=dependencies, + ) + agent = Agent( + name="Assistant", + instructions=""" + You are a helpful assistant that can search the web, fetch content from URLs, + and search internal databases. Please do some reasoning and then return your answer. + """, + model=LitellmModel( + model=dependencies.llm.config.model_name, + api_key=dependencies.llm.config.api_key, + ), + tools=[], + model_settings=ModelSettings( + temperature=0.0, + include_usage=True, + ), + ) + + bridge = OnyxRunner().run_streamed(agent, messages, context=ctx, max_turns=100) + try: + for ev in bridge.events(): + if isinstance(ev, RunItemStreamEvent): + pass + elif isinstance(ev, RawResponsesStreamEvent): + # TODO: use very standardized schema for the emitter that is close to + # front end schema + dependencies.emitter.emit(kind="agent", data=ev.data.model_dump()) + finally: + # TODO: Handle done signal more reliably? + dependencies.emitter.emit(kind="done", data={"ok": True}) diff --git a/backend/onyx/chat/turn/infra/__init__.py b/backend/onyx/chat/turn/infra/__init__.py new file mode 100644 index 00000000000..57231df6749 --- /dev/null +++ b/backend/onyx/chat/turn/infra/__init__.py @@ -0,0 +1 @@ +# Infrastructure module for chat turn orchestration diff --git a/backend/onyx/chat/turn/infra/chat_turn_event_stream.py b/backend/onyx/chat/turn/infra/chat_turn_event_stream.py new file mode 100644 index 00000000000..f1bd97c6794 --- /dev/null +++ b/backend/onyx/chat/turn/infra/chat_turn_event_stream.py @@ -0,0 +1,150 @@ +import asyncio +import queue +import threading +from collections.abc import Iterator +from dataclasses import dataclass +from queue import Queue +from typing import Any +from typing import Dict +from typing import Optional + +from agents import Agent +from agents import Runner +from agents import TContext +from pydantic import BaseModel +from pydantic import Field + +from onyx.llm.interfaces import LLM +from onyx.tools.tool_implementations.search.search_tool import SearchTool + + +class OnyxRunner: + """ + Spins up an asyncio loop in a background thread, starts Runner.run_streamed there, + consumes its async event stream, and exposes a blocking .events() iterator. + """ + + def __init__(self): + self._q: "queue.Queue[object]" = queue.Queue() + self._loop: Optional[asyncio.AbstractEventLoop] = None + self._thread: Optional[threading.Thread] = None + self._streamed = None + self.SENTINEL = object() + + def run_streamed( + self, + agent: Agent, + messages: list[dict], + context: TContext | None = None, + max_turns: int = 100, + ): + def worker(): + async def run_and_consume(): + # Create the streamed run *inside* the loop thread + self._streamed = Runner.run_streamed( + agent, + messages, + context=context, + max_turns=max_turns, + ) + try: + async for ev in self._streamed.stream_events(): + self._q.put(ev) + finally: + self._q.put(self.SENTINEL) + + # Each thread needs its own loop + self._loop = asyncio.new_event_loop() + asyncio.set_event_loop(self._loop) + try: + self._loop.run_until_complete(run_and_consume()) + finally: + self._loop.close() + + self._thread = threading.Thread(target=worker, daemon=True) + self._thread.start() + return self + + def events(self) -> Iterator[object]: + while True: + ev = self._q.get() + if ev is self.SENTINEL: + break + yield ev + + def cancel(self): + # Post a cancellation to the loop thread safely + if self._loop and self._streamed: + + def _do_cancel(): + try: + self._streamed.cancel() + except Exception: + pass + + self._loop.call_soon_threadsafe(_do_cancel) + + +def convert_to_packet_obj(packet: Dict[str, Any]) -> Any | None: + """Convert a packet dictionary to PacketObj when possible. + + Args: + packet: Dictionary containing packet data + + Returns: + PacketObj instance if conversion is possible, None otherwise + """ + if not isinstance(packet, dict) or "type" not in packet: + return None + + packet_type = packet.get("type") + if not packet_type: + return None + + try: + # Import here to avoid circular imports + from onyx.server.query_and_chat.streaming_models import ( + MessageStart, + MessageDelta, + OverallStop, + ) + + if packet_type == "response.output_item.added": + return MessageStart( + type="message_start", + content="", + final_documents=None, + ) + elif packet_type == "response.output_text.delta": + return MessageDelta(type="message_delta", content=packet["delta"]) + elif packet_type == "response.completed": + return OverallStop(type="stop") + + except Exception: + # Log the error but don't fail the entire process + # logger.debug(f"Failed to convert packet to PacketObj: {e}") + pass + + return None + + +class StreamPacket(BaseModel): + kind: str # "agent" | "tool-progress" | "done" + payload: Dict[str, Any] = Field(default_factory=dict) + + +class Emitter: + """Use this inside tools to emit arbitrary UI progress.""" + + def __init__(self, bus: Queue): + self.bus = bus + + def emit(self, kind: str, data: Dict[str, Any]) -> None: + self.bus.put(StreamPacket(kind=kind, payload=data)) + + +@dataclass +class RunDependencies: + llm: LLM + emitter: Emitter | None = None + search_tool: SearchTool | None = None diff --git a/backend/onyx/chat/turn/infra/chat_turn_orchestration.py b/backend/onyx/chat/turn/infra/chat_turn_orchestration.py new file mode 100644 index 00000000000..53b1e75c719 --- /dev/null +++ b/backend/onyx/chat/turn/infra/chat_turn_orchestration.py @@ -0,0 +1,62 @@ +import contextvars +import threading +from collections.abc import Callable +from collections.abc import Generator +from queue import Queue +from typing import Any +from typing import Dict +from typing import List + +from onyx.chat.turn.infra.chat_turn_event_stream import convert_to_packet_obj +from onyx.chat.turn.infra.chat_turn_event_stream import Emitter +from onyx.chat.turn.infra.chat_turn_event_stream import RunDependencies +from onyx.chat.turn.infra.chat_turn_event_stream import StreamPacket + + +def unified_event_stream( + turn_func: Callable[[List[Dict[str, Any]], RunDependencies], None], +) -> Callable[ + [List[Dict[str, Any]], RunDependencies], Generator[Dict[str, Any], None, None] +]: + """ + Decorator that wraps a turn_func to provide event streaming capabilities. + + Usage: + @unified_event_stream + def my_turn_func(messages, dependencies): + # Your turn logic here + pass + + # Then call it like: + # generator = my_turn_func(messages, dependencies) + """ + + def wrapper( + messages: List[Dict[str, Any]], dependencies: RunDependencies + ) -> Generator[Dict[str, Any], None, None]: + bus: Queue = Queue() + emitter = Emitter(bus) + current_context = contextvars.copy_context() + dependencies.emitter = emitter + t = threading.Thread( + target=current_context.run, + args=( + turn_func, + messages, + dependencies, + ), + daemon=True, + ) + t.start() + while True: + pkt: StreamPacket = emitter.bus.get() + if pkt.kind == "done": + break + else: + packet_obj = convert_to_packet_obj(pkt.payload) + if packet_obj: + yield packet_obj.model_dump() + else: + yield pkt.payload + + return wrapper diff --git a/backend/onyx/chat/answer_cli.py b/backend/onyx/tools/tool_implementations_v2/web_fetch.py similarity index 100% rename from backend/onyx/chat/answer_cli.py rename to backend/onyx/tools/tool_implementations_v2/web_fetch.py diff --git a/backend/onyx/tools/tool_implementations_v2/web_search.py b/backend/onyx/tools/tool_implementations_v2/web_search.py new file mode 100644 index 00000000000..e69de29bb2d From c1d19f9f458f278379a64b1d66c10e771d87cc85 Mon Sep 17 00:00:00 2001 From: Richard Guan Date: Thu, 25 Sep 2025 15:10:33 -0700 Subject: [PATCH 22/34] . --- backend/onyx/chat/process_message.py | 71 +------------------ .../chat/turn/infra/chat_turn_event_stream.py | 71 ++++++++++++++----- .../turn/infra/chat_turn_orchestration.py | 13 ++-- 3 files changed, 60 insertions(+), 95 deletions(-) diff --git a/backend/onyx/chat/process_message.py b/backend/onyx/chat/process_message.py index 5f87bf304b1..f02d17373ff 100644 --- a/backend/onyx/chat/process_message.py +++ b/backend/onyx/chat/process_message.py @@ -30,6 +30,7 @@ from onyx.chat.prompt_builder.answer_prompt_builder import default_build_system_message from onyx.chat.prompt_builder.answer_prompt_builder import default_build_user_message from onyx.chat.turn import fast_chat_turn +from onyx.chat.turn.infra.chat_turn_event_stream import convert_to_packet_obj from onyx.chat.turn.infra.chat_turn_event_stream import RunDependencies from onyx.chat.user_files.parse_user_files import parse_user_files from onyx.configs.chat_configs import CHAT_TARGET_CHUNK_PERCENTAGE @@ -789,74 +790,6 @@ def remove_answer_citations(answer: str) -> str: return re.sub(pattern, "", answer) -def _convert_to_packet_obj(packet: Dict[str, Any]) -> Any | None: - """Convert a packet dictionary to PacketObj when possible. - - Args: - packet: Dictionary containing packet data - - Returns: - PacketObj instance if conversion is possible, None otherwise - """ - if not isinstance(packet, dict) or "type" not in packet: - return None - - packet_type = packet.get("type") - if not packet_type: - return None - - try: - # Import here to avoid circular imports - from onyx.server.query_and_chat.streaming_models import ( - MessageStart, - MessageDelta, - OverallStop, - SectionEnd, - SearchToolStart, - SearchToolDelta, - ImageGenerationToolStart, - ImageGenerationToolDelta, - ImageGenerationToolHeartbeat, - CustomToolStart, - CustomToolDelta, - ReasoningStart, - ReasoningDelta, - CitationStart, - CitationDelta, - ) - - # Map packet types to their corresponding classes - type_mapping = { - "message_start": MessageStart, - "message_delta": MessageDelta, - "stop": OverallStop, - "section_end": SectionEnd, - "internal_search_tool_start": SearchToolStart, - "internal_search_tool_delta": SearchToolDelta, - "image_generation_tool_start": ImageGenerationToolStart, - "image_generation_tool_delta": ImageGenerationToolDelta, - "image_generation_tool_heartbeat": ImageGenerationToolHeartbeat, - "custom_tool_start": CustomToolStart, - "custom_tool_delta": CustomToolDelta, - "reasoning_start": ReasoningStart, - "reasoning_delta": ReasoningDelta, - "citation_start": CitationStart, - "citation_delta": CitationDelta, - } - - packet_class = type_mapping.get(packet_type) - if packet_class: - # Create instance using the packet data, filtering out None values - filtered_data = {k: v for k, v in packet.items() if v is not None} - return packet_class(**filtered_data) - - except Exception as e: - # Log the error but don't fail the entire process - logger.debug(f"Failed to convert packet to PacketObj: {e}") - - return None - - @log_function_time() def gather_stream( packets: Iterator[Dict[str, Any]], @@ -867,7 +800,7 @@ def gather_stream( print(packet) # Convert packet to PacketObj when possible - packet_obj = _convert_to_packet_obj(packet) + packet_obj = convert_to_packet_obj(packet) if packet_obj: # Handle PacketObj types that contain text content if hasattr(packet_obj, "content") and packet_obj.content: diff --git a/backend/onyx/chat/turn/infra/chat_turn_event_stream.py b/backend/onyx/chat/turn/infra/chat_turn_event_stream.py index f1bd97c6794..7bf564a9d57 100644 --- a/backend/onyx/chat/turn/infra/chat_turn_event_stream.py +++ b/backend/onyx/chat/turn/infra/chat_turn_event_stream.py @@ -1,4 +1,5 @@ import asyncio +import logging import queue import threading from collections.abc import Iterator @@ -17,6 +18,8 @@ from onyx.llm.interfaces import LLM from onyx.tools.tool_implementations.search.search_tool import SearchTool +logger = logging.getLogger(__name__) + class OnyxRunner: """ @@ -85,6 +88,11 @@ def _do_cancel(): self._loop.call_soon_threadsafe(_do_cancel) +class StreamPacket(BaseModel): + kind: str # "agent" | "tool-progress" | "done" + payload: Dict[str, Any] = Field(default_factory=dict) + + def convert_to_packet_obj(packet: Dict[str, Any]) -> Any | None: """Convert a packet dictionary to PacketObj when possible. @@ -107,32 +115,57 @@ def convert_to_packet_obj(packet: Dict[str, Any]) -> Any | None: MessageStart, MessageDelta, OverallStop, + SectionEnd, + SearchToolStart, + SearchToolDelta, + ImageGenerationToolStart, + ImageGenerationToolDelta, + ImageGenerationToolHeartbeat, + CustomToolStart, + CustomToolDelta, + ReasoningStart, + ReasoningDelta, + CitationStart, + CitationDelta, ) - if packet_type == "response.output_item.added": - return MessageStart( - type="message_start", - content="", - final_documents=None, - ) - elif packet_type == "response.output_text.delta": - return MessageDelta(type="message_delta", content=packet["delta"]) - elif packet_type == "response.completed": - return OverallStop(type="stop") - - except Exception: + # Map packet types to their corresponding classes + type_mapping = { + "message_start": MessageStart, + "response.output_text.delta": MessageDelta, + "response.completed": OverallStop, + "section_end": SectionEnd, + "internal_search_tool_start": SearchToolStart, + "internal_search_tool_delta": SearchToolDelta, + "image_generation_tool_start": ImageGenerationToolStart, + "image_generation_tool_delta": ImageGenerationToolDelta, + "image_generation_tool_heartbeat": ImageGenerationToolHeartbeat, + "custom_tool_start": CustomToolStart, + "custom_tool_delta": CustomToolDelta, + "reasoning_start": ReasoningStart, + "reasoning_delta": ReasoningDelta, + "citation_start": CitationStart, + "citation_delta": CitationDelta, + } + + packet_class = type_mapping.get(packet_type) + if packet_class: + # Create instance using the packet data, filtering out None values + filtered_data = {k: v for k, v in packet.items() if v is not None} + if packet_type == "response.output_text.delta": + filtered_data["type"] = "message_delta" + filtered_data["content"] = filtered_data["delta"] + elif packet_type == "response.completed": + filtered_data["type"] = "stop" + return packet_class(**filtered_data) + + except Exception as e: # Log the error but don't fail the entire process - # logger.debug(f"Failed to convert packet to PacketObj: {e}") - pass + logger.debug(f"Failed to convert packet to PacketObj: {e}") return None -class StreamPacket(BaseModel): - kind: str # "agent" | "tool-progress" | "done" - payload: Dict[str, Any] = Field(default_factory=dict) - - class Emitter: """Use this inside tools to emit arbitrary UI progress.""" diff --git a/backend/onyx/chat/turn/infra/chat_turn_orchestration.py b/backend/onyx/chat/turn/infra/chat_turn_orchestration.py index 53b1e75c719..373b591de05 100644 --- a/backend/onyx/chat/turn/infra/chat_turn_orchestration.py +++ b/backend/onyx/chat/turn/infra/chat_turn_orchestration.py @@ -11,13 +11,12 @@ from onyx.chat.turn.infra.chat_turn_event_stream import Emitter from onyx.chat.turn.infra.chat_turn_event_stream import RunDependencies from onyx.chat.turn.infra.chat_turn_event_stream import StreamPacket +from onyx.server.query_and_chat.streaming_models import Packet def unified_event_stream( turn_func: Callable[[List[Dict[str, Any]], RunDependencies], None], -) -> Callable[ - [List[Dict[str, Any]], RunDependencies], Generator[Dict[str, Any], None, None] -]: +) -> Callable[[List[Dict[str, Any]], RunDependencies], Generator[Packet, None]]: """ Decorator that wraps a turn_func to provide event streaming capabilities. @@ -33,7 +32,7 @@ def my_turn_func(messages, dependencies): def wrapper( messages: List[Dict[str, Any]], dependencies: RunDependencies - ) -> Generator[Dict[str, Any], None, None]: + ) -> Generator[Packet, None]: bus: Queue = Queue() emitter = Emitter(bus) current_context = contextvars.copy_context() @@ -48,6 +47,7 @@ def wrapper( daemon=True, ) t.start() + ind = 0 while True: pkt: StreamPacket = emitter.bus.get() if pkt.kind == "done": @@ -55,8 +55,7 @@ def wrapper( else: packet_obj = convert_to_packet_obj(pkt.payload) if packet_obj: - yield packet_obj.model_dump() - else: - yield pkt.payload + yield Packet(ind=ind, obj=packet_obj) + ind += 1 return wrapper From 5a7c391ede79e65d9cbe9aca7376f2ac3de0bd34 Mon Sep 17 00:00:00 2001 From: Richard Guan Date: Thu, 25 Sep 2025 15:58:48 -0700 Subject: [PATCH 23/34] . --- backend/onyx/chat/process_message.py | 2 +- backend/onyx/chat/turn/fast_chat_turn.py | 2 +- .../chat/turn/infra/chat_turn_event_stream.py | 21 ++++++++----------- .../turn/infra/chat_turn_orchestration.py | 6 ++---- backend/onyx/chat/turn/models.py | 12 +++++++++++ 5 files changed, 25 insertions(+), 18 deletions(-) create mode 100644 backend/onyx/chat/turn/models.py diff --git a/backend/onyx/chat/process_message.py b/backend/onyx/chat/process_message.py index f02d17373ff..2c63e4cec12 100644 --- a/backend/onyx/chat/process_message.py +++ b/backend/onyx/chat/process_message.py @@ -31,7 +31,7 @@ from onyx.chat.prompt_builder.answer_prompt_builder import default_build_user_message from onyx.chat.turn import fast_chat_turn from onyx.chat.turn.infra.chat_turn_event_stream import convert_to_packet_obj -from onyx.chat.turn.infra.chat_turn_event_stream import RunDependencies +from onyx.chat.turn.models import RunDependencies from onyx.chat.user_files.parse_user_files import parse_user_files from onyx.configs.chat_configs import CHAT_TARGET_CHUNK_PERCENTAGE from onyx.configs.chat_configs import DISABLE_LLM_CHOOSE_SEARCH diff --git a/backend/onyx/chat/turn/fast_chat_turn.py b/backend/onyx/chat/turn/fast_chat_turn.py index 306a42b0fea..1cb6947ca1e 100644 --- a/backend/onyx/chat/turn/fast_chat_turn.py +++ b/backend/onyx/chat/turn/fast_chat_turn.py @@ -7,8 +7,8 @@ from agents.extensions.models.litellm_model import LitellmModel from onyx.chat.turn.infra.chat_turn_event_stream import OnyxRunner -from onyx.chat.turn.infra.chat_turn_event_stream import RunDependencies from onyx.chat.turn.infra.chat_turn_orchestration import unified_event_stream +from onyx.chat.turn.models import RunDependencies @dataclass diff --git a/backend/onyx/chat/turn/infra/chat_turn_event_stream.py b/backend/onyx/chat/turn/infra/chat_turn_event_stream.py index 7bf564a9d57..9235abe6d88 100644 --- a/backend/onyx/chat/turn/infra/chat_turn_event_stream.py +++ b/backend/onyx/chat/turn/infra/chat_turn_event_stream.py @@ -3,7 +3,6 @@ import queue import threading from collections.abc import Iterator -from dataclasses import dataclass from queue import Queue from typing import Any from typing import Dict @@ -15,8 +14,6 @@ from pydantic import BaseModel from pydantic import Field -from onyx.llm.interfaces import LLM -from onyx.tools.tool_implementations.search.search_tool import SearchTool logger = logging.getLogger(__name__) @@ -131,10 +128,10 @@ def convert_to_packet_obj(packet: Dict[str, Any]) -> Any | None: # Map packet types to their corresponding classes type_mapping = { - "message_start": MessageStart, + "response.created": MessageStart, "response.output_text.delta": MessageDelta, "response.completed": OverallStop, - "section_end": SectionEnd, + "response.output_item.done": SectionEnd, "internal_search_tool_start": SearchToolStart, "internal_search_tool_delta": SearchToolDelta, "image_generation_tool_start": ImageGenerationToolStart, @@ -157,6 +154,13 @@ def convert_to_packet_obj(packet: Dict[str, Any]) -> Any | None: filtered_data["content"] = filtered_data["delta"] elif packet_type == "response.completed": filtered_data["type"] = "stop" + elif packet_type == "response.created": + return MessageStart( + type="message_start", content="", final_documents=None + ) + elif packet_type == "response.output_item.done": + return SectionEnd(type="section_end") + packet_class(**filtered_data) return packet_class(**filtered_data) except Exception as e: @@ -174,10 +178,3 @@ def __init__(self, bus: Queue): def emit(self, kind: str, data: Dict[str, Any]) -> None: self.bus.put(StreamPacket(kind=kind, payload=data)) - - -@dataclass -class RunDependencies: - llm: LLM - emitter: Emitter | None = None - search_tool: SearchTool | None = None diff --git a/backend/onyx/chat/turn/infra/chat_turn_orchestration.py b/backend/onyx/chat/turn/infra/chat_turn_orchestration.py index 373b591de05..ee29ad18095 100644 --- a/backend/onyx/chat/turn/infra/chat_turn_orchestration.py +++ b/backend/onyx/chat/turn/infra/chat_turn_orchestration.py @@ -9,8 +9,8 @@ from onyx.chat.turn.infra.chat_turn_event_stream import convert_to_packet_obj from onyx.chat.turn.infra.chat_turn_event_stream import Emitter -from onyx.chat.turn.infra.chat_turn_event_stream import RunDependencies from onyx.chat.turn.infra.chat_turn_event_stream import StreamPacket +from onyx.chat.turn.models import RunDependencies from onyx.server.query_and_chat.streaming_models import Packet @@ -47,7 +47,6 @@ def wrapper( daemon=True, ) t.start() - ind = 0 while True: pkt: StreamPacket = emitter.bus.get() if pkt.kind == "done": @@ -55,7 +54,6 @@ def wrapper( else: packet_obj = convert_to_packet_obj(pkt.payload) if packet_obj: - yield Packet(ind=ind, obj=packet_obj) - ind += 1 + yield Packet(ind=0, obj=packet_obj) return wrapper diff --git a/backend/onyx/chat/turn/models.py b/backend/onyx/chat/turn/models.py new file mode 100644 index 00000000000..0404d6934f5 --- /dev/null +++ b/backend/onyx/chat/turn/models.py @@ -0,0 +1,12 @@ +from dataclasses import dataclass + +from onyx.chat.turn.infra.chat_turn_event_stream import Emitter +from onyx.llm.interfaces import LLM +from onyx.tools.tool_implementations.search.search_tool import SearchTool + + +@dataclass +class RunDependencies: + llm: LLM + emitter: Emitter | None = None + search_tool: SearchTool | None = None From 77e83425c1fd3d76700a24dfc5e01da2a4cfd137 Mon Sep 17 00:00:00 2001 From: Richard Guan Date: Thu, 25 Sep 2025 16:29:58 -0700 Subject: [PATCH 24/34] . --- backend/onyx/chat/answer_scratchpad.py | 3 +- backend/onyx/chat/turn/fast_chat_turn.py | 15 ++--- .../chat/turn/infra/chat_turn_event_stream.py | 2 +- backend/onyx/chat/turn/models.py | 8 +++ .../onyx/tools/tool_implementations_v2/web.py | 61 +++++++++++++++++++ .../tool_implementations_v2/web_fetch.py | 0 .../tool_implementations_v2/web_search.py | 0 7 files changed, 75 insertions(+), 14 deletions(-) create mode 100644 backend/onyx/tools/tool_implementations_v2/web.py delete mode 100644 backend/onyx/tools/tool_implementations_v2/web_fetch.py delete mode 100644 backend/onyx/tools/tool_implementations_v2/web_search.py diff --git a/backend/onyx/chat/answer_scratchpad.py b/backend/onyx/chat/answer_scratchpad.py index ff303e73f2a..25172b13338 100644 --- a/backend/onyx/chat/answer_scratchpad.py +++ b/backend/onyx/chat/answer_scratchpad.py @@ -39,10 +39,10 @@ from onyx.agents.agent_search.dr.utils import get_chat_history_string from onyx.agents.agent_search.models import GraphConfig from onyx.chat.turn import fast_chat_turn -from onyx.chat.turn.fast_chat_turn import MyContext from onyx.chat.turn.infra.chat_turn_event_stream import Emitter from onyx.chat.turn.infra.chat_turn_event_stream import OnyxRunner from onyx.chat.turn.infra.chat_turn_event_stream import RunDependencies +from onyx.chat.turn.models import MyContext from onyx.db.engine.sql_engine import get_session_with_current_tenant from onyx.llm.interfaces import ( LLM, @@ -559,4 +559,3 @@ def get_clarification( stream=False, ) x = llm_response.json() - print(x) diff --git a/backend/onyx/chat/turn/fast_chat_turn.py b/backend/onyx/chat/turn/fast_chat_turn.py index 1cb6947ca1e..b861c3c6aa0 100644 --- a/backend/onyx/chat/turn/fast_chat_turn.py +++ b/backend/onyx/chat/turn/fast_chat_turn.py @@ -1,5 +1,3 @@ -from dataclasses import dataclass - from agents import Agent from agents import ModelSettings from agents import RawResponsesStreamEvent @@ -8,15 +6,10 @@ from onyx.chat.turn.infra.chat_turn_event_stream import OnyxRunner from onyx.chat.turn.infra.chat_turn_orchestration import unified_event_stream +from onyx.chat.turn.models import MyContext from onyx.chat.turn.models import RunDependencies - - -@dataclass -class MyContext: - """Context class to hold search tool and other dependencies""" - - run_dependencies: RunDependencies | None = None - needs_compaction: bool = False +from onyx.tools.tool_implementations_v2.web import web_fetch +from onyx.tools.tool_implementations_v2.web import web_search # TODO: Dependency injection? @@ -35,7 +28,7 @@ def fast_chat_turn(messages: list[dict], dependencies: RunDependencies) -> None: model=dependencies.llm.config.model_name, api_key=dependencies.llm.config.api_key, ), - tools=[], + tools=[web_search, web_fetch], model_settings=ModelSettings( temperature=0.0, include_usage=True, diff --git a/backend/onyx/chat/turn/infra/chat_turn_event_stream.py b/backend/onyx/chat/turn/infra/chat_turn_event_stream.py index 9235abe6d88..d9da41b360f 100644 --- a/backend/onyx/chat/turn/infra/chat_turn_event_stream.py +++ b/backend/onyx/chat/turn/infra/chat_turn_event_stream.py @@ -90,7 +90,7 @@ class StreamPacket(BaseModel): payload: Dict[str, Any] = Field(default_factory=dict) -def convert_to_packet_obj(packet: Dict[str, Any]) -> Any | None: +def convert_to_packet_obj(packet: StreamPacket) -> Any | None: """Convert a packet dictionary to PacketObj when possible. Args: diff --git a/backend/onyx/chat/turn/models.py b/backend/onyx/chat/turn/models.py index 0404d6934f5..0d124f355d2 100644 --- a/backend/onyx/chat/turn/models.py +++ b/backend/onyx/chat/turn/models.py @@ -10,3 +10,11 @@ class RunDependencies: llm: LLM emitter: Emitter | None = None search_tool: SearchTool | None = None + + +@dataclass +class MyContext: + """Context class to hold search tool and other dependencies""" + + run_dependencies: RunDependencies | None = None + needs_compaction: bool = False diff --git a/backend/onyx/tools/tool_implementations_v2/web.py b/backend/onyx/tools/tool_implementations_v2/web.py new file mode 100644 index 00000000000..55f42d1845f --- /dev/null +++ b/backend/onyx/tools/tool_implementations_v2/web.py @@ -0,0 +1,61 @@ +import json +from typing import List + +from agents import function_tool +from agents import RunContextWrapper + +from onyx.agents.agent_search.dr.sub_agents.web_search.providers import ( + get_default_provider, +) +from onyx.chat.turn.models import MyContext + + +def short_tag(link: str, i: int) -> str: + # Stable, readable; index keeps it deterministic across a batch + return f"S{i+1}" + + +@function_tool +def web_search(run_context: RunContextWrapper[MyContext], query: str) -> str: + """Search the web for information. This tool provides urls and short snippets, + but does not fetch the full content of the urls.""" + search_provider = get_default_provider() + run_context.run_dependencies.emitter.emit(kind="web-search", data={"query": query}) + hits = search_provider.search(query) + results = [] + for i, r in enumerate(hits): + results.append( + { + "tag": short_tag(r.link, i), # <-- add a tag + "title": r.title, + "link": r.link, + "snippet": r.snippet, + "author": r.author, + "published_date": ( + r.published_date.isoformat() if r.published_date else None + ), + } + ) + return json.dumps({"results": results}) + + +@function_tool +def web_fetch(run_context: RunContextWrapper[MyContext], urls: List[str]) -> str: + """Fetch the full contents of a list of URLs.""" + search_provider = get_default_provider() + run_context.run_dependencies.emitter.emit(kind="web-fetch", data={"urls": urls}) + docs = search_provider.contents(urls) + out = [] + for i, d in enumerate(docs): + out.append( + { + "tag": short_tag(d.link, i), # <-- add a tag + "title": d.title, + "link": d.link, + "full_content": d.full_content, + "published_date": ( + d.published_date.isoformat() if d.published_date else None + ), + } + ) + return json.dumps({"results": out}) diff --git a/backend/onyx/tools/tool_implementations_v2/web_fetch.py b/backend/onyx/tools/tool_implementations_v2/web_fetch.py deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/backend/onyx/tools/tool_implementations_v2/web_search.py b/backend/onyx/tools/tool_implementations_v2/web_search.py deleted file mode 100644 index e69de29bb2d..00000000000 From 9553ce02fd1afa142e4652c2e8a392dcde7c0b1d Mon Sep 17 00:00:00 2001 From: Richard Guan Date: Thu, 25 Sep 2025 16:46:33 -0700 Subject: [PATCH 25/34] . --- backend/onyx/chat/process_message.py | 1 + backend/onyx/chat/turn/fast_chat_turn.py | 7 +-- .../internal_search.py | 50 +++++++++++++++++++ .../onyx/tools/tool_implementations_v2/web.py | 23 +++++++-- 4 files changed, 73 insertions(+), 8 deletions(-) create mode 100644 backend/onyx/tools/tool_implementations_v2/internal_search.py diff --git a/backend/onyx/chat/process_message.py b/backend/onyx/chat/process_message.py index 2c63e4cec12..f8f44e39ae4 100644 --- a/backend/onyx/chat/process_message.py +++ b/backend/onyx/chat/process_message.py @@ -693,6 +693,7 @@ def stream_chat_message_objects( Rules: - Only cite sources provided by the tools (use each item’s "tag" field). + - Only perform citations for web search and fetch tools. - Place the citation immediately after the claim it supports, like this: "... result [S1](https://linkforS1)" or "... results [S1](https://linkforS1)[S3](https://linkforS3)". - If multiple sentences in a row are supported by the same source, cite the first sentence; diff --git a/backend/onyx/chat/turn/fast_chat_turn.py b/backend/onyx/chat/turn/fast_chat_turn.py index b861c3c6aa0..4f52b6e8db1 100644 --- a/backend/onyx/chat/turn/fast_chat_turn.py +++ b/backend/onyx/chat/turn/fast_chat_turn.py @@ -8,6 +8,7 @@ from onyx.chat.turn.infra.chat_turn_orchestration import unified_event_stream from onyx.chat.turn.models import MyContext from onyx.chat.turn.models import RunDependencies +from onyx.tools.tool_implementations_v2.internal_search import internal_search from onyx.tools.tool_implementations_v2.web import web_fetch from onyx.tools.tool_implementations_v2.web import web_search @@ -20,15 +21,11 @@ def fast_chat_turn(messages: list[dict], dependencies: RunDependencies) -> None: ) agent = Agent( name="Assistant", - instructions=""" - You are a helpful assistant that can search the web, fetch content from URLs, - and search internal databases. Please do some reasoning and then return your answer. - """, model=LitellmModel( model=dependencies.llm.config.model_name, api_key=dependencies.llm.config.api_key, ), - tools=[web_search, web_fetch], + tools=[web_search, web_fetch, internal_search], model_settings=ModelSettings( temperature=0.0, include_usage=True, diff --git a/backend/onyx/tools/tool_implementations_v2/internal_search.py b/backend/onyx/tools/tool_implementations_v2/internal_search.py new file mode 100644 index 00000000000..3de620a460d --- /dev/null +++ b/backend/onyx/tools/tool_implementations_v2/internal_search.py @@ -0,0 +1,50 @@ +from typing import cast + +from agents import function_tool +from agents import RunContextWrapper + +from onyx.chat.turn.models import MyContext +from onyx.db.engine.sql_engine import get_session_with_current_tenant +from onyx.tools.models import SearchToolOverrideKwargs +from onyx.tools.tool_implementations.search.search_tool import ( + SEARCH_RESPONSE_SUMMARY_ID, +) +from onyx.tools.tool_implementations.search.search_tool import SearchResponseSummary + + +@function_tool +def internal_search(context_wrapper: RunContextWrapper[MyContext], query: str) -> str: + """ + Search the internal knowledge base and documents. + + Use this tool when the answer is likely stored in private or company + documents rather than on the public web. It returns snippets, titles, + and links to relevant internal files. + + Args: + query: The natural-language search query. + """ + context_wrapper.context.run_dependencies.emitter.emit( + kind="tool-progress", data={"query": query} + ) + search_tool = context_wrapper.context.run_dependencies.search_tool + if search_tool is None: + raise RuntimeError("Search tool not available in context") + + with get_session_with_current_tenant() as search_db_session: + for tool_response in search_tool.run( + query=query, + override_kwargs=SearchToolOverrideKwargs( + force_no_rerank=True, + alternate_db_session=search_db_session, + skip_query_analysis=True, + original_query=query, + ), + ): + # get retrieved docs to send to the rest of the graph + if tool_response.id == SEARCH_RESPONSE_SUMMARY_ID: + response = cast(SearchResponseSummary, tool_response.response) + retrieved_docs = response.top_sections + + break + return retrieved_docs diff --git a/backend/onyx/tools/tool_implementations_v2/web.py b/backend/onyx/tools/tool_implementations_v2/web.py index 55f42d1845f..a287bc5a78e 100644 --- a/backend/onyx/tools/tool_implementations_v2/web.py +++ b/backend/onyx/tools/tool_implementations_v2/web.py @@ -17,8 +17,16 @@ def short_tag(link: str, i: int) -> str: @function_tool def web_search(run_context: RunContextWrapper[MyContext], query: str) -> str: - """Search the web for information. This tool provides urls and short snippets, - but does not fetch the full content of the urls.""" + """ + Perform a live search on the public internet. + + Use this tool when you need fresh or external information not found + in the conversation. It returns a ranked list of web pages with titles, + snippets, and URLs. + + Args: + query: The natural-language search query. + """ search_provider = get_default_provider() run_context.run_dependencies.emitter.emit(kind="web-search", data={"query": query}) hits = search_provider.search(query) @@ -41,7 +49,16 @@ def web_search(run_context: RunContextWrapper[MyContext], query: str) -> str: @function_tool def web_fetch(run_context: RunContextWrapper[MyContext], urls: List[str]) -> str: - """Fetch the full contents of a list of URLs.""" + """ + Fetch and extract the text content from a specific web page. + + Use this tool after identifying relevant URLs (for example from + `web_search`) to read the full content. It returns the cleaned page + text and metadata. + + Args: + urls: The full URLs of the pages to retrieve. + """ search_provider = get_default_provider() run_context.run_dependencies.emitter.emit(kind="web-fetch", data={"urls": urls}) docs = search_provider.contents(urls) From 0940c8d1abc8528d33a5d13d109a5d6cf0cc4543 Mon Sep 17 00:00:00 2001 From: Richard Guan Date: Thu, 25 Sep 2025 16:57:32 -0700 Subject: [PATCH 26/34] . --- backend/onyx/chat/answer_scratchpad.py | 224 +------------------------ backend/onyx/chat/process_message.py | 30 +--- 2 files changed, 7 insertions(+), 247 deletions(-) diff --git a/backend/onyx/chat/answer_scratchpad.py b/backend/onyx/chat/answer_scratchpad.py index 25172b13338..f9dc3b0fd7e 100644 --- a/backend/onyx/chat/answer_scratchpad.py +++ b/backend/onyx/chat/answer_scratchpad.py @@ -1,11 +1,8 @@ from __future__ import annotations -import asyncio import json import os -import threading from typing import Any -from typing import cast from typing import Dict from typing import List from typing import Optional @@ -16,7 +13,6 @@ from agents import function_tool from agents import ModelSettings from agents import RunContextWrapper -from agents import Runner from agents.extensions.handoff_prompt import prompt_with_handoff_instructions from agents.extensions.handoff_prompt import RECOMMENDED_PROMPT_PREFIX from agents.extensions.models.litellm_model import LitellmModel @@ -33,9 +29,6 @@ ) from onyx.agents.agent_search.dr.enums import ResearchType from onyx.agents.agent_search.dr.models import DRPromptPurpose -from onyx.agents.agent_search.dr.sub_agents.web_search.clients.exa_client import ( - ExaClient, -) from onyx.agents.agent_search.dr.utils import get_chat_history_string from onyx.agents.agent_search.models import GraphConfig from onyx.chat.turn import fast_chat_turn @@ -43,70 +36,18 @@ from onyx.chat.turn.infra.chat_turn_event_stream import OnyxRunner from onyx.chat.turn.infra.chat_turn_event_stream import RunDependencies from onyx.chat.turn.models import MyContext -from onyx.db.engine.sql_engine import get_session_with_current_tenant from onyx.llm.interfaces import ( LLM, ) -from onyx.tools.models import SearchToolOverrideKwargs -from onyx.tools.tool_implementations.search.search_tool import ( - SEARCH_RESPONSE_SUMMARY_ID, -) -from onyx.tools.tool_implementations.search.search_tool import SearchResponseSummary -from onyx.tools.tool_implementations.search.search_tool import SearchTool +from onyx.tools.built_in_tools import SearchTool +from onyx.tools.tool_implementations_v2.internal_search import internal_search +from onyx.tools.tool_implementations_v2.web import web_fetch +from onyx.tools.tool_implementations_v2.web import web_search from onyx.utils.logger import setup_logger logger = setup_logger() -def short_tag(link: str, i: int) -> str: - # Stable, readable; index keeps it deterministic across a batch - return f"S{i+1}" - - -@function_tool -def web_search(query: str) -> str: - """Search the web for information. This tool provides urls and short snippets, - but does not fetch the full content of the urls.""" - exa_client = ExaClient() - hits = exa_client.search(query) - results = [] - for i, r in enumerate(hits): - results.append( - { - "tag": short_tag(r.link, i), # <-- add a tag - "title": r.title, - "link": r.link, - "snippet": r.snippet, - "author": r.author, - "published_date": ( - r.published_date.isoformat() if r.published_date else None - ), - } - ) - return json.dumps({"results": results}) - - -@function_tool -def web_fetch(urls: List[str]) -> str: - """Fetch the full contents of a list of URLs.""" - exa_client = ExaClient() - docs = exa_client.contents(urls) - out = [] - for i, d in enumerate(docs): - out.append( - { - "tag": short_tag(d.link, i), # <-- add a tag - "title": d.title, - "link": d.link, - "full_content": d.full_content, - "published_date": ( - d.published_date.isoformat() if d.published_date else None - ), - } - ) - return json.dumps({"results": out}) - - @traced(name="llm_completion", type="llm") def llm_completion( model_name: str, @@ -123,122 +64,6 @@ def llm_completion( ) -@function_tool -def internal_search(context_wrapper: RunContextWrapper[MyContext], query: str) -> str: - """Search internal company vector database for information. Sources - include: - - Fireflies (internal company call transcripts) - - Google Drive (internal company documents) - - Gmail (internal company emails) - - Linear (internal company issues) - - Slack (internal company messages) - """ - context_wrapper.context.run_dependencies.emitter.emit( - kind="tool-progress", data={"progress": "Searching internal database"} - ) - search_tool = context_wrapper.context.run_dependencies.search_tool - if search_tool is None: - raise RuntimeError("Search tool not available in context") - - with get_session_with_current_tenant() as search_db_session: - for tool_response in search_tool.run( - query=query, - override_kwargs=SearchToolOverrideKwargs( - force_no_rerank=True, - alternate_db_session=search_db_session, - skip_query_analysis=True, - original_query=query, - ), - ): - # get retrieved docs to send to the rest of the graph - if tool_response.id == SEARCH_RESPONSE_SUMMARY_ID: - response = cast(SearchResponseSummary, tool_response.response) - retrieved_docs = response.top_sections - - break - return retrieved_docs - - -def _convert_to_packet_obj(packet: Dict[str, Any]) -> Any | None: - """Convert a packet dictionary to PacketObj when possible. - - Args: - packet: Dictionary containing packet data - - Returns: - PacketObj instance if conversion is possible, None otherwise - """ - if not isinstance(packet, dict) or "type" not in packet: - return None - - packet_type = packet.get("type") - if not packet_type: - return None - - try: - # Import here to avoid circular imports - from onyx.server.query_and_chat.streaming_models import ( - MessageStart, - MessageDelta, - OverallStop, - ) - - if packet_type == "response.output_item.added": - return MessageStart( - type="message_start", - content="", - final_documents=None, - ) - elif packet_type == "response.output_text.delta": - return MessageDelta(type="message_delta", content=packet["delta"]) - elif packet_type == "response.completed": - return OverallStop(type="stop") - - except Exception as e: - # Log the error but don't fail the entire process - logger.debug(f"Failed to convert packet to PacketObj: {e}") - - return None - - -# If we want durable execution in the future, we can replace this with a temporal call -def start_run_in_thread( - agent: Agent, - messages: List[Dict[str, Any]], - cfg: GraphConfig, - llm: LLM, - emitter: Emitter, - search_tool: SearchTool | None = None, -) -> threading.Thread: - def worker(): - async def amain(): - ctx = MyContext( - run_dependencies=RunDependencies( - search_tool=search_tool, - emitter=emitter, - llm=llm, - ) - ) - # 1) start the streamed run (async) - streamed = Runner.run_streamed(agent, messages, context=ctx) - - # 2) forward the agent’s async event stream - async for ev in streamed.stream_events(): - if isinstance(ev, RunItemStreamEvent): - pass - elif isinstance(ev, RawResponsesStreamEvent): - emitter.emit(kind="agent", data=ev.data.model_dump()) - - emitter.emit(kind="done", data={"ok": True}) - - # run the async main inside this thread - asyncio.run(amain()) - - t = threading.Thread(target=worker, daemon=True) - t.start() - return t - - class ResearchScratchpad(BaseModel): notes: List[dict] = [] @@ -405,48 +230,12 @@ def thread_worker_simple_turn(messages, cfg, llm, emitter, search_tool): emitter.emit(kind="done", data={"ok": False}) -def simple_turn( - messages: List[Dict[str, Any]], - cfg: GraphConfig, - llm: LLM, - turn_event_stream_emitter: Emitter, - search_tool: SearchTool | None = None, -) -> None: - llm_response = llm_completion( - model_name="gpt-4o-mini", - temperature=0.0, - messages=messages, - stream=True, - ) - llm_response.json() - simple_agent = construct_simple_agent(llm) - ctx = MyContext( - run_dependencies=RunDependencies( - search_tool=search_tool, emitter=turn_event_stream_emitter, llm=llm - ) - ) - bridge = OnyxRunner(simple_agent, messages, ctx, max_turns=100).start() - for ev in bridge.events(): - if isinstance(ev, RunItemStreamEvent): - print("RUN ITEM STREAM EVENT!") - if ev.name == "reasoning_item_created": - print("REASONING!") - turn_event_stream_emitter.emit( - kind="reasoning", data=ev.item.raw_item.model_dump() - ) - elif isinstance(ev, RawResponsesStreamEvent): - print("RAW RESPONSES STREAM EVENT!") - print(ev.type) - turn_event_stream_emitter.emit(kind="agent", data=ev.data.model_dump()) - turn_event_stream_emitter.emit(kind="done", data={"ok": True}) - - def dr_turn( messages: List[Dict[str, Any]], cfg: GraphConfig, llm: LLM, turn_event_stream_emitter: Emitter, # TurnEventStream is the primary output of the turn - search_tool: SearchTool | None = None, + search_tool: SearchTool, ) -> None: """ Execute a deep research turn with evaluation context support. @@ -497,8 +286,7 @@ def get_clarification( messages: List[Dict[str, Any]], cfg: GraphConfig, llm: LLM, - emitter: Emitter, - search_tool: SearchTool | None = None, + search_tool: SearchTool, ) -> litellm.ModelResponse: chat_history_string = ( get_chat_history_string( diff --git a/backend/onyx/chat/process_message.py b/backend/onyx/chat/process_message.py index f8f44e39ae4..aa52d733073 100644 --- a/backend/onyx/chat/process_message.py +++ b/backend/onyx/chat/process_message.py @@ -674,41 +674,13 @@ def stream_chat_message_objects( "system": "system", "function": "function", } - SYSTEM_PROMPT = """ - You are a highly capable, thoughtful, and precise assistant. Your goal is to deeply understand the \ - user's intent, ask clarifying questions when needed, think step-by-step through complex problems, \ - provide clear and accurate answers, and proactively anticipate helpful follow-up information. Always \ - prioritize being truthful, nuanced, insightful, and efficient. - The current date is September 18, 2025. - - You use different text styles, bolding, emojis (sparingly), block quotes, and other formatting to make \ - your responses more readable and engaging. - You use proper Markdown and LaTeX to format your responses for math, scientific, and chemical formulas, \ - symbols, etc.: '$$\\n[expression]\\n$$' for standalone cases and '\\( [expression] \\)' when inline. - For code you prefer to use Markdown and specify the language. - You can use Markdown horizontal rules (---) to separate sections of your responses. - You can use Markdown tables to format your responses for data, lists, and other structured information. - - You must cite inline using tags from tool results. - - Rules: - - Only cite sources provided by the tools (use each item’s "tag" field). - - Only perform citations for web search and fetch tools. - - Place the citation immediately after the claim it supports, like this: "... result [S1](https://linkforS1)" or - "... results [S1](https://linkforS1)[S3](https://linkforS3)". - - If multiple sentences in a row are supported by the same source, cite the first sentence; - then omit repeats until the source changes. - - Never invent tags. If no source supports a claim, say so. - - Do not add a separate “Sources” section unless asked. - """ - system_message = [{"role": "system", "content": SYSTEM_PROMPT}] other_messages = [ {"role": type_to_role[message.type], "content": message.content} for message in answer.graph_inputs.prompt_builder.build() if message.type != "system" ] yield from fast_chat_turn.fast_chat_turn( - messages=system_message + other_messages, + messages=other_messages, dependencies=RunDependencies( llm=answer.graph_tooling.primary_llm, search_tool=answer.graph_tooling.search_tool, From accbd7ab6cfd199602b7b7d15d61a522bba68611 Mon Sep 17 00:00:00 2001 From: Richard Guan Date: Thu, 25 Sep 2025 18:09:22 -0700 Subject: [PATCH 27/34] . --- backend/onyx/chat/turn/fast_chat_turn.py | 38 +++++++++---- .../chat/turn/infra/chat_turn_event_stream.py | 16 ++---- .../turn/infra/chat_turn_orchestration.py | 11 ++-- backend/onyx/chat/turn/models.py | 1 + .../onyx/tools/tool_implementations_v2/web.py | 56 ++++++++++++++++++- 5 files changed, 91 insertions(+), 31 deletions(-) diff --git a/backend/onyx/chat/turn/fast_chat_turn.py b/backend/onyx/chat/turn/fast_chat_turn.py index 4f52b6e8db1..6314312547a 100644 --- a/backend/onyx/chat/turn/fast_chat_turn.py +++ b/backend/onyx/chat/turn/fast_chat_turn.py @@ -8,6 +8,11 @@ from onyx.chat.turn.infra.chat_turn_orchestration import unified_event_stream from onyx.chat.turn.models import MyContext from onyx.chat.turn.models import RunDependencies +from onyx.server.query_and_chat.streaming_models import MessageDelta +from onyx.server.query_and_chat.streaming_models import MessageStart +from onyx.server.query_and_chat.streaming_models import OverallStop +from onyx.server.query_and_chat.streaming_models import Packet +from onyx.server.query_and_chat.streaming_models import SectionEnd from onyx.tools.tool_implementations_v2.internal_search import internal_search from onyx.tools.tool_implementations_v2.web import web_fetch from onyx.tools.tool_implementations_v2.web import web_search @@ -33,14 +38,25 @@ def fast_chat_turn(messages: list[dict], dependencies: RunDependencies) -> None: ) bridge = OnyxRunner().run_streamed(agent, messages, context=ctx, max_turns=100) - try: - for ev in bridge.events(): - if isinstance(ev, RunItemStreamEvent): - pass - elif isinstance(ev, RawResponsesStreamEvent): - # TODO: use very standardized schema for the emitter that is close to - # front end schema - dependencies.emitter.emit(kind="agent", data=ev.data.model_dump()) - finally: - # TODO: Handle done signal more reliably? - dependencies.emitter.emit(kind="done", data={"ok": True}) + for ev in bridge.events(): + if isinstance(ev, RunItemStreamEvent): + pass + elif isinstance(ev, RawResponsesStreamEvent): + obj = None + if ev.data.type == "response.created": + obj = MessageStart( + type="message_start", content="", final_documents=None + ) + elif ev.data.type == "response.output_text.delta": + obj = MessageDelta(type="message_delta", content=ev.data.delta) + elif ev.data.type == "response.completed": + obj = OverallStop(type="stop") + elif ev.data.type == "response.output_item.done": + obj = SectionEnd(type="section_end") + if obj: + dependencies.emitter.emit(Packet(ind=ctx.current_run_step, obj=obj)) + # TODO: Error handling + # Should there be a timeout and some error on the queue? + dependencies.emitter.emit( + Packet(ind=ctx.current_run_step, obj=OverallStop(type="stop")) + ) diff --git a/backend/onyx/chat/turn/infra/chat_turn_event_stream.py b/backend/onyx/chat/turn/infra/chat_turn_event_stream.py index d9da41b360f..6d7fabf2f0a 100644 --- a/backend/onyx/chat/turn/infra/chat_turn_event_stream.py +++ b/backend/onyx/chat/turn/infra/chat_turn_event_stream.py @@ -5,14 +5,13 @@ from collections.abc import Iterator from queue import Queue from typing import Any -from typing import Dict from typing import Optional from agents import Agent from agents import Runner from agents import TContext -from pydantic import BaseModel -from pydantic import Field + +from onyx.server.query_and_chat.streaming_models import Packet logger = logging.getLogger(__name__) @@ -85,12 +84,7 @@ def _do_cancel(): self._loop.call_soon_threadsafe(_do_cancel) -class StreamPacket(BaseModel): - kind: str # "agent" | "tool-progress" | "done" - payload: Dict[str, Any] = Field(default_factory=dict) - - -def convert_to_packet_obj(packet: StreamPacket) -> Any | None: +def convert_to_packet_obj(packet: dict[str, Any]) -> Any | None: """Convert a packet dictionary to PacketObj when possible. Args: @@ -176,5 +170,5 @@ class Emitter: def __init__(self, bus: Queue): self.bus = bus - def emit(self, kind: str, data: Dict[str, Any]) -> None: - self.bus.put(StreamPacket(kind=kind, payload=data)) + def emit(self, packet: Packet) -> None: + self.bus.put(packet) diff --git a/backend/onyx/chat/turn/infra/chat_turn_orchestration.py b/backend/onyx/chat/turn/infra/chat_turn_orchestration.py index ee29ad18095..c4732ef8c36 100644 --- a/backend/onyx/chat/turn/infra/chat_turn_orchestration.py +++ b/backend/onyx/chat/turn/infra/chat_turn_orchestration.py @@ -7,10 +7,9 @@ from typing import Dict from typing import List -from onyx.chat.turn.infra.chat_turn_event_stream import convert_to_packet_obj from onyx.chat.turn.infra.chat_turn_event_stream import Emitter -from onyx.chat.turn.infra.chat_turn_event_stream import StreamPacket from onyx.chat.turn.models import RunDependencies +from onyx.server.query_and_chat.streaming_models import OverallStop from onyx.server.query_and_chat.streaming_models import Packet @@ -48,12 +47,10 @@ def wrapper( ) t.start() while True: - pkt: StreamPacket = emitter.bus.get() - if pkt.kind == "done": + pkt: Packet = emitter.bus.get() + if pkt.obj == OverallStop(type="stop"): break else: - packet_obj = convert_to_packet_obj(pkt.payload) - if packet_obj: - yield Packet(ind=0, obj=packet_obj) + yield pkt return wrapper diff --git a/backend/onyx/chat/turn/models.py b/backend/onyx/chat/turn/models.py index 0d124f355d2..ae42a40b826 100644 --- a/backend/onyx/chat/turn/models.py +++ b/backend/onyx/chat/turn/models.py @@ -18,3 +18,4 @@ class MyContext: run_dependencies: RunDependencies | None = None needs_compaction: bool = False + current_run_step: int = 0 diff --git a/backend/onyx/tools/tool_implementations_v2/web.py b/backend/onyx/tools/tool_implementations_v2/web.py index a287bc5a78e..6b0d5693389 100644 --- a/backend/onyx/tools/tool_implementations_v2/web.py +++ b/backend/onyx/tools/tool_implementations_v2/web.py @@ -8,6 +8,11 @@ get_default_provider, ) from onyx.chat.turn.models import MyContext +from onyx.configs.constants import DocumentSource +from onyx.server.query_and_chat.streaming_models import Packet +from onyx.server.query_and_chat.streaming_models import SavedSearchDoc +from onyx.server.query_and_chat.streaming_models import SearchToolDelta +from onyx.server.query_and_chat.streaming_models import SearchToolStart def short_tag(link: str, i: int) -> str: @@ -28,7 +33,22 @@ def web_search(run_context: RunContextWrapper[MyContext], query: str) -> str: query: The natural-language search query. """ search_provider = get_default_provider() - run_context.run_dependencies.emitter.emit(kind="web-search", data={"query": query}) + run_context.context.run_dependencies.emitter.emit( + Packet( + ind=run_context.context.current_run_step + 1, + obj=SearchToolStart( + type="internal_search_tool_start", is_internet_search=True + ), + ) + ) + run_context.context.run_dependencies.emitter.emit( + Packet( + ind=run_context.context.current_run_step + 1, + obj=SearchToolDelta( + type="internal_search_tool_delta", queries=[query], documents=None + ), + ) + ) hits = search_provider.search(query) results = [] for i, r in enumerate(hits): @@ -44,6 +64,7 @@ def web_search(run_context: RunContextWrapper[MyContext], query: str) -> str: ), } ) + run_context.context.current_run_step += 2 return json.dumps({"results": results}) @@ -60,7 +81,38 @@ def web_fetch(run_context: RunContextWrapper[MyContext], urls: List[str]) -> str urls: The full URLs of the pages to retrieve. """ search_provider = get_default_provider() - run_context.run_dependencies.emitter.emit(kind="web-fetch", data={"urls": urls}) + saved_search_docs = [ + SavedSearchDoc( + document_id=url, + chunk_ind=0, + semantic_identifier=url, + link=url, + blurb=url, + source_type=DocumentSource.WEB, + boost=1, + hidden=False, + metadata={}, + score=0.0, + is_relevant=None, + relevance_explanation=None, + match_highlights=[], + updated_at=None, + primary_owners=None, + secondary_owners=None, + is_internet=True, + ) + for url in urls + ] + run_context.context.run_dependencies.emitter.emit( + Packet( + ind=run_context.context.current_run_step + 1, + obj=SearchToolDelta( + type="internal_search_tool_delta", + is_internet_search=True, + documents=saved_search_docs, + ), + ) + ) docs = search_provider.contents(urls) out = [] for i, d in enumerate(docs): From 9634870aac09771ac02c9efab9e84977e67235d3 Mon Sep 17 00:00:00 2001 From: Richard Guan Date: Fri, 26 Sep 2025 11:01:15 -0700 Subject: [PATCH 28/34] . --- backend/onyx/chat/turn/fast_chat_turn.py | 7 ++- .../turn/infra/chat_turn_orchestration.py | 2 + .../onyx/tools/tool_implementations_v2/web.py | 50 +++++++++---------- 3 files changed, 32 insertions(+), 27 deletions(-) diff --git a/backend/onyx/chat/turn/fast_chat_turn.py b/backend/onyx/chat/turn/fast_chat_turn.py index 6314312547a..74aae896758 100644 --- a/backend/onyx/chat/turn/fast_chat_turn.py +++ b/backend/onyx/chat/turn/fast_chat_turn.py @@ -42,6 +42,9 @@ def fast_chat_turn(messages: list[dict], dependencies: RunDependencies) -> None: if isinstance(ev, RunItemStreamEvent): pass elif isinstance(ev, RawResponsesStreamEvent): + # TODO: might need some variation here for different types of models + # OpenAI packet translator + # Default packet translator obj = None if ev.data.type == "response.created": obj = MessageStart( @@ -49,8 +52,8 @@ def fast_chat_turn(messages: list[dict], dependencies: RunDependencies) -> None: ) elif ev.data.type == "response.output_text.delta": obj = MessageDelta(type="message_delta", content=ev.data.delta) - elif ev.data.type == "response.completed": - obj = OverallStop(type="stop") + # elif ev.data.type == "response.completed": + # obj = OverallStop(type="stop") elif ev.data.type == "response.output_item.done": obj = SectionEnd(type="section_end") if obj: diff --git a/backend/onyx/chat/turn/infra/chat_turn_orchestration.py b/backend/onyx/chat/turn/infra/chat_turn_orchestration.py index c4732ef8c36..a2cae07c697 100644 --- a/backend/onyx/chat/turn/infra/chat_turn_orchestration.py +++ b/backend/onyx/chat/turn/infra/chat_turn_orchestration.py @@ -48,7 +48,9 @@ def wrapper( t.start() while True: pkt: Packet = emitter.bus.get() + print("packet", pkt) if pkt.obj == OverallStop(type="stop"): + yield pkt break else: yield pkt diff --git a/backend/onyx/tools/tool_implementations_v2/web.py b/backend/onyx/tools/tool_implementations_v2/web.py index 6b0d5693389..a8c0dfb0dd0 100644 --- a/backend/onyx/tools/tool_implementations_v2/web.py +++ b/backend/onyx/tools/tool_implementations_v2/web.py @@ -16,7 +16,6 @@ def short_tag(link: str, i: int) -> str: - # Stable, readable; index keeps it deterministic across a batch return f"S{i+1}" @@ -54,7 +53,7 @@ def web_search(run_context: RunContextWrapper[MyContext], query: str) -> str: for i, r in enumerate(hits): results.append( { - "tag": short_tag(r.link, i), # <-- add a tag + "tag": short_tag(r.link, i), "title": r.title, "link": r.link, "snippet": r.snippet, @@ -64,30 +63,13 @@ def web_search(run_context: RunContextWrapper[MyContext], query: str) -> str: ), } ) - run_context.context.current_run_step += 2 - return json.dumps({"results": results}) - - -@function_tool -def web_fetch(run_context: RunContextWrapper[MyContext], urls: List[str]) -> str: - """ - Fetch and extract the text content from a specific web page. - - Use this tool after identifying relevant URLs (for example from - `web_search`) to read the full content. It returns the cleaned page - text and metadata. - - Args: - urls: The full URLs of the pages to retrieve. - """ - search_provider = get_default_provider() saved_search_docs = [ SavedSearchDoc( - document_id=url, + document_id=hit.link, chunk_ind=0, - semantic_identifier=url, - link=url, - blurb=url, + semantic_identifier=hit.link, + link=hit.link, + blurb=hit.snippet, source_type=DocumentSource.WEB, boost=1, hidden=False, @@ -101,18 +83,36 @@ def web_fetch(run_context: RunContextWrapper[MyContext], urls: List[str]) -> str secondary_owners=None, is_internet=True, ) - for url in urls + for hit in hits ] + print(saved_search_docs) run_context.context.run_dependencies.emitter.emit( Packet( ind=run_context.context.current_run_step + 1, obj=SearchToolDelta( type="internal_search_tool_delta", - is_internet_search=True, + queries=None, documents=saved_search_docs, ), ) ) + run_context.context.current_run_step += 2 + return json.dumps({"results": results}) + + +@function_tool +def web_fetch(run_context: RunContextWrapper[MyContext], urls: List[str]) -> str: + """ + Fetch and extract the text content from a specific web page. + + Use this tool after identifying relevant URLs (for example from + `web_search`) to read the full content. It returns the cleaned page + text and metadata. + + Args: + urls: The full URLs of the pages to retrieve. + """ + search_provider = get_default_provider() docs = search_provider.contents(urls) out = [] for i, d in enumerate(docs): From 2b309b0e4248c37d6302cb97ea0e1266295fdcff Mon Sep 17 00:00:00 2001 From: Richard Guan Date: Fri, 26 Sep 2025 11:41:15 -0700 Subject: [PATCH 29/34] . --- backend/onyx/chat/turn/fast_chat_turn.py | 1 + .../internal_search.py | 68 +++++++++++++++++-- .../onyx/tools/tool_implementations_v2/web.py | 11 ++- 3 files changed, 74 insertions(+), 6 deletions(-) diff --git a/backend/onyx/chat/turn/fast_chat_turn.py b/backend/onyx/chat/turn/fast_chat_turn.py index 74aae896758..b5e1c29563e 100644 --- a/backend/onyx/chat/turn/fast_chat_turn.py +++ b/backend/onyx/chat/turn/fast_chat_turn.py @@ -39,6 +39,7 @@ def fast_chat_turn(messages: list[dict], dependencies: RunDependencies) -> None: bridge = OnyxRunner().run_streamed(agent, messages, context=ctx, max_turns=100) for ev in bridge.events(): + ctx.current_run_step if isinstance(ev, RunItemStreamEvent): pass elif isinstance(ev, RawResponsesStreamEvent): diff --git a/backend/onyx/tools/tool_implementations_v2/internal_search.py b/backend/onyx/tools/tool_implementations_v2/internal_search.py index 3de620a460d..94e84911ab5 100644 --- a/backend/onyx/tools/tool_implementations_v2/internal_search.py +++ b/backend/onyx/tools/tool_implementations_v2/internal_search.py @@ -5,6 +5,11 @@ from onyx.chat.turn.models import MyContext from onyx.db.engine.sql_engine import get_session_with_current_tenant +from onyx.server.query_and_chat.streaming_models import Packet +from onyx.server.query_and_chat.streaming_models import SavedSearchDoc +from onyx.server.query_and_chat.streaming_models import SearchToolDelta +from onyx.server.query_and_chat.streaming_models import SearchToolStart +from onyx.server.query_and_chat.streaming_models import SectionEnd from onyx.tools.models import SearchToolOverrideKwargs from onyx.tools.tool_implementations.search.search_tool import ( SEARCH_RESPONSE_SUMMARY_ID, @@ -13,7 +18,7 @@ @function_tool -def internal_search(context_wrapper: RunContextWrapper[MyContext], query: str) -> str: +def internal_search(run_context: RunContextWrapper[MyContext], query: str) -> str: """ Search the internal knowledge base and documents. @@ -24,10 +29,23 @@ def internal_search(context_wrapper: RunContextWrapper[MyContext], query: str) - Args: query: The natural-language search query. """ - context_wrapper.context.run_dependencies.emitter.emit( - kind="tool-progress", data={"query": query} + run_context.context.run_dependencies.emitter.emit( + Packet( + ind=run_context.context.current_run_step + 1, + obj=SearchToolStart( + type="internal_search_tool_start", is_internet_search=False + ), + ) + ) + run_context.context.run_dependencies.emitter.emit( + Packet( + ind=run_context.context.current_run_step + 1, + obj=SearchToolDelta( + type="internal_search_tool_delta", queries=[query], documents=None + ), + ) ) - search_tool = context_wrapper.context.run_dependencies.search_tool + search_tool = run_context.context.run_dependencies.search_tool if search_tool is None: raise RuntimeError("Search tool not available in context") @@ -45,6 +63,46 @@ def internal_search(context_wrapper: RunContextWrapper[MyContext], query: str) - if tool_response.id == SEARCH_RESPONSE_SUMMARY_ID: response = cast(SearchResponseSummary, tool_response.response) retrieved_docs = response.top_sections - + run_context.context.run_dependencies.emitter.emit( + Packet( + ind=run_context.context.current_run_step + 1, + obj=SearchToolDelta( + type="internal_search_tool_delta", + queries=None, + documents=[ + SavedSearchDoc( + db_doc_id=0, + document_id=doc.center_chunk.document_id, + chunk_ind=0, + semantic_identifier=doc.center_chunk.semantic_identifier, + link=doc.center_chunk.semantic_identifier, + blurb=doc.center_chunk.blurb, + source_type=doc.center_chunk.source_type, + boost=doc.center_chunk.boost, + hidden=doc.center_chunk.hidden, + metadata=doc.center_chunk.metadata, + score=doc.center_chunk.score, + is_relevant=doc.center_chunk.is_relevant, + relevance_explanation=doc.center_chunk.relevance_explanation, + match_highlights=doc.center_chunk.match_highlights, + updated_at=doc.center_chunk.updated_at, + primary_owners=doc.center_chunk.primary_owners, + secondary_owners=doc.center_chunk.secondary_owners, + is_internet=False, + ) + for doc in retrieved_docs + ], + ), + ) + ) break + run_context.context.run_dependencies.emitter.emit( + Packet( + ind=run_context.context.current_run_step + 1, + obj=SectionEnd( + type="section_end", + ), + ) + ) + run_context.context.current_run_step += 2 return retrieved_docs diff --git a/backend/onyx/tools/tool_implementations_v2/web.py b/backend/onyx/tools/tool_implementations_v2/web.py index a8c0dfb0dd0..dd707944c5f 100644 --- a/backend/onyx/tools/tool_implementations_v2/web.py +++ b/backend/onyx/tools/tool_implementations_v2/web.py @@ -13,6 +13,7 @@ from onyx.server.query_and_chat.streaming_models import SavedSearchDoc from onyx.server.query_and_chat.streaming_models import SearchToolDelta from onyx.server.query_and_chat.streaming_models import SearchToolStart +from onyx.server.query_and_chat.streaming_models import SectionEnd def short_tag(link: str, i: int) -> str: @@ -65,6 +66,7 @@ def web_search(run_context: RunContextWrapper[MyContext], query: str) -> str: ) saved_search_docs = [ SavedSearchDoc( + db_doc_id=0, document_id=hit.link, chunk_ind=0, semantic_identifier=hit.link, @@ -85,7 +87,6 @@ def web_search(run_context: RunContextWrapper[MyContext], query: str) -> str: ) for hit in hits ] - print(saved_search_docs) run_context.context.run_dependencies.emitter.emit( Packet( ind=run_context.context.current_run_step + 1, @@ -96,6 +97,14 @@ def web_search(run_context: RunContextWrapper[MyContext], query: str) -> str: ), ) ) + run_context.context.run_dependencies.emitter.emit( + Packet( + ind=run_context.context.current_run_step + 1, + obj=SectionEnd( + type="section_end", + ), + ) + ) run_context.context.current_run_step += 2 return json.dumps({"results": results}) From 3b346affd05cd70b7e5601b23a886db0ad10e547 Mon Sep 17 00:00:00 2001 From: Richard Guan Date: Fri, 26 Sep 2025 14:22:29 -0700 Subject: [PATCH 30/34] . --- backend/onyx/chat/turn/fast_chat_turn.py | 28 +++---------------- .../chat/turn/infra/chat_turn_event_stream.py | 8 +++--- .../chat/turn/infra/packet_translation.py | 21 ++++++++++++++ .../onyx/tools/tool_implementations_v2/web.py | 2 ++ 4 files changed, 31 insertions(+), 28 deletions(-) create mode 100644 backend/onyx/chat/turn/infra/packet_translation.py diff --git a/backend/onyx/chat/turn/fast_chat_turn.py b/backend/onyx/chat/turn/fast_chat_turn.py index b5e1c29563e..7b0422413d1 100644 --- a/backend/onyx/chat/turn/fast_chat_turn.py +++ b/backend/onyx/chat/turn/fast_chat_turn.py @@ -1,18 +1,14 @@ from agents import Agent from agents import ModelSettings -from agents import RawResponsesStreamEvent -from agents import RunItemStreamEvent from agents.extensions.models.litellm_model import LitellmModel from onyx.chat.turn.infra.chat_turn_event_stream import OnyxRunner from onyx.chat.turn.infra.chat_turn_orchestration import unified_event_stream +from onyx.chat.turn.infra.packet_translation import default_packet_translation from onyx.chat.turn.models import MyContext from onyx.chat.turn.models import RunDependencies -from onyx.server.query_and_chat.streaming_models import MessageDelta -from onyx.server.query_and_chat.streaming_models import MessageStart from onyx.server.query_and_chat.streaming_models import OverallStop from onyx.server.query_and_chat.streaming_models import Packet -from onyx.server.query_and_chat.streaming_models import SectionEnd from onyx.tools.tool_implementations_v2.internal_search import internal_search from onyx.tools.tool_implementations_v2.web import web_fetch from onyx.tools.tool_implementations_v2.web import web_search @@ -40,25 +36,9 @@ def fast_chat_turn(messages: list[dict], dependencies: RunDependencies) -> None: bridge = OnyxRunner().run_streamed(agent, messages, context=ctx, max_turns=100) for ev in bridge.events(): ctx.current_run_step - if isinstance(ev, RunItemStreamEvent): - pass - elif isinstance(ev, RawResponsesStreamEvent): - # TODO: might need some variation here for different types of models - # OpenAI packet translator - # Default packet translator - obj = None - if ev.data.type == "response.created": - obj = MessageStart( - type="message_start", content="", final_documents=None - ) - elif ev.data.type == "response.output_text.delta": - obj = MessageDelta(type="message_delta", content=ev.data.delta) - # elif ev.data.type == "response.completed": - # obj = OverallStop(type="stop") - elif ev.data.type == "response.output_item.done": - obj = SectionEnd(type="section_end") - if obj: - dependencies.emitter.emit(Packet(ind=ctx.current_run_step, obj=obj)) + obj = default_packet_translation(ev) + if obj: + dependencies.emitter.emit(Packet(ind=ctx.current_run_step, obj=obj)) # TODO: Error handling # Should there be a timeout and some error on the queue? dependencies.emitter.emit( diff --git a/backend/onyx/chat/turn/infra/chat_turn_event_stream.py b/backend/onyx/chat/turn/infra/chat_turn_event_stream.py index 6d7fabf2f0a..2bd53ba423c 100644 --- a/backend/onyx/chat/turn/infra/chat_turn_event_stream.py +++ b/backend/onyx/chat/turn/infra/chat_turn_event_stream.py @@ -23,7 +23,7 @@ class OnyxRunner: consumes its async event stream, and exposes a blocking .events() iterator. """ - def __init__(self): + def __init__(self) -> None: self._q: "queue.Queue[object]" = queue.Queue() self._loop: Optional[asyncio.AbstractEventLoop] = None self._thread: Optional[threading.Thread] = None @@ -37,7 +37,7 @@ def run_streamed( context: TContext | None = None, max_turns: int = 100, ): - def worker(): + def worker() -> None: async def run_and_consume(): # Create the streamed run *inside* the loop thread self._streamed = Runner.run_streamed( @@ -71,11 +71,11 @@ def events(self) -> Iterator[object]: break yield ev - def cancel(self): + def cancel(self) -> None: # Post a cancellation to the loop thread safely if self._loop and self._streamed: - def _do_cancel(): + def _do_cancel() -> None: try: self._streamed.cancel() except Exception: diff --git a/backend/onyx/chat/turn/infra/packet_translation.py b/backend/onyx/chat/turn/infra/packet_translation.py new file mode 100644 index 00000000000..36b21f62a78 --- /dev/null +++ b/backend/onyx/chat/turn/infra/packet_translation.py @@ -0,0 +1,21 @@ +from agents import RawResponsesStreamEvent + +from onyx.server.query_and_chat.streaming_models import MessageDelta +from onyx.server.query_and_chat.streaming_models import MessageStart +from onyx.server.query_and_chat.streaming_models import PacketObj +from onyx.server.query_and_chat.streaming_models import SectionEnd + + +def default_packet_translation(ev: object) -> PacketObj | None: + if isinstance(ev, RawResponsesStreamEvent): + # TODO: might need some variation here for different types of models + # OpenAI packet translator + obj: PacketObj | None = None + if ev.data.type == "response.created": + obj = MessageStart(type="message_start", content="", final_documents=None) + elif ev.data.type == "response.output_text.delta": + obj = MessageDelta(type="message_delta", content=ev.data.delta) + elif ev.data.type == "response.output_item.done": + obj = SectionEnd(type="section_end") + return obj + return None diff --git a/backend/onyx/tools/tool_implementations_v2/web.py b/backend/onyx/tools/tool_implementations_v2/web.py index dd707944c5f..c6f3fc38040 100644 --- a/backend/onyx/tools/tool_implementations_v2/web.py +++ b/backend/onyx/tools/tool_implementations_v2/web.py @@ -87,6 +87,7 @@ def web_search(run_context: RunContextWrapper[MyContext], query: str) -> str: ) for hit in hits ] + # TODO: Remove "Results" section from internet search tool run_context.context.run_dependencies.emitter.emit( Packet( ind=run_context.context.current_run_step + 1, @@ -136,4 +137,5 @@ def web_fetch(run_context: RunContextWrapper[MyContext], urls: List[str]) -> str ), } ) + # TODO: Emit event for web search "reading" URL return json.dumps({"results": out}) From 2538738cbd7f3fc2570055989698419ad540b9c8 Mon Sep 17 00:00:00 2001 From: Richard Guan Date: Fri, 26 Sep 2025 15:38:23 -0700 Subject: [PATCH 31/34] . --- backend/onyx/chat/process_message.py | 7 + backend/onyx/chat/turn/fast_chat_turn.py | 187 +++++++++++++++++++++++ backend/onyx/chat/turn/models.py | 18 +++ 3 files changed, 212 insertions(+) diff --git a/backend/onyx/chat/process_message.py b/backend/onyx/chat/process_message.py index 9d9c4f9b7e1..7b96c75f949 100644 --- a/backend/onyx/chat/process_message.py +++ b/backend/onyx/chat/process_message.py @@ -33,6 +33,7 @@ from onyx.chat.prompt_builder.answer_prompt_builder import default_build_user_message from onyx.chat.turn import fast_chat_turn from onyx.chat.turn.infra.chat_turn_event_stream import convert_to_packet_obj +from onyx.chat.turn.models import DependenciesToMaybeRemove from onyx.chat.turn.models import RunDependencies from onyx.chat.user_files.parse_user_files import parse_user_files from onyx.configs.chat_configs import CHAT_TARGET_CHUNK_PERCENTAGE @@ -791,6 +792,12 @@ def stream_chat_message_objects( dependencies=RunDependencies( llm=answer.graph_tooling.primary_llm, search_tool=answer.graph_tooling.search_tool, + db_session=db_session, + dependencies_to_maybe_remove=DependenciesToMaybeRemove( + chat_session_id=chat_session_id, + message_id=reserved_message_id, + research_type=answer.graph_config.behavior.research_type, + ), ), ) diff --git a/backend/onyx/chat/turn/fast_chat_turn.py b/backend/onyx/chat/turn/fast_chat_turn.py index 7b0422413d1..c9339fa644e 100644 --- a/backend/onyx/chat/turn/fast_chat_turn.py +++ b/backend/onyx/chat/turn/fast_chat_turn.py @@ -1,12 +1,29 @@ +import re +from uuid import UUID + from agents import Agent from agents import ModelSettings from agents.extensions.models.litellm_model import LitellmModel +from sqlalchemy.orm import Session +from onyx.agents.agent_search.dr.enums import ResearchAnswerPurpose +from onyx.agents.agent_search.dr.enums import ResearchType +from onyx.agents.agent_search.dr.models import AggregatedDRContext +from onyx.agents.agent_search.dr.sub_agents.image_generation.models import ( + GeneratedImageFullResult, +) +from onyx.agents.agent_search.dr.utils import convert_inference_sections_to_search_docs from onyx.chat.turn.infra.chat_turn_event_stream import OnyxRunner from onyx.chat.turn.infra.chat_turn_orchestration import unified_event_stream from onyx.chat.turn.infra.packet_translation import default_packet_translation from onyx.chat.turn.models import MyContext from onyx.chat.turn.models import RunDependencies +from onyx.context.search.models import InferenceSection +from onyx.db.chat import create_search_doc_from_inference_section +from onyx.db.chat import update_db_session_with_messages +from onyx.db.models import ChatMessage__SearchDoc +from onyx.db.models import ResearchAgentIteration +from onyx.db.models import ResearchAgentIterationSubStep from onyx.server.query_and_chat.streaming_models import OverallStop from onyx.server.query_and_chat.streaming_models import Packet from onyx.tools.tool_implementations_v2.internal_search import internal_search @@ -19,6 +36,13 @@ def fast_chat_turn(messages: list[dict], dependencies: RunDependencies) -> None: ctx = MyContext( run_dependencies=dependencies, + aggregated_context=AggregatedDRContext( + context="context", + cited_documents=[], + is_internet_marker_dict={}, + global_iteration_responses=[], # TODO: the only field that matters for now + ), + iteration_instructions=[], ) agent = Agent( name="Assistant", @@ -39,8 +63,171 @@ def fast_chat_turn(messages: list[dict], dependencies: RunDependencies) -> None: obj = default_packet_translation(ev) if obj: dependencies.emitter.emit(Packet(ind=ctx.current_run_step, obj=obj)) + + save_iteration( + db_session=dependencies.db_session, + message_id=dependencies.dependencies_to_maybe_remove.message_id, + chat_session_id=dependencies.dependencies_to_maybe_remove.chat_session_id, + research_type=dependencies.dependencies_to_maybe_remove.research_type, + ctx=ctx, + final_answer="final_answer", # event should be emitted by agent for this luckily + all_cited_documents=[], + ) # TODO: Error handling # Should there be a timeout and some error on the queue? dependencies.emitter.emit( Packet(ind=ctx.current_run_step, obj=OverallStop(type="stop")) ) + + +# TODO: Figure out a way to persist information is robust to cancellation, +# modular so easily testable in unit tests and evals [likely injecting some higher +# level session manager and span sink], potentially has some robustness off the critical path, +# and promotes clean separation of concerns. +def save_iteration( + db_session: Session, + message_id: int, + chat_session_id: UUID, + research_type: ResearchType, + ctx: MyContext, + final_answer: str, + all_cited_documents: list[InferenceSection], +) -> None: + # first, insert the search_docs + is_internet_marker_dict = {} + search_docs = [ + create_search_doc_from_inference_section( + inference_section=inference_section, + is_internet=is_internet_marker_dict.get( + inference_section.center_chunk.document_id, False + ), # TODO: revisit + db_session=db_session, + commit=False, + ) + for inference_section in all_cited_documents + ] + + # then, map_search_docs to message + _insert_chat_message_search_doc_pair( + message_id, [search_doc.id for search_doc in search_docs], db_session + ) + + # lastly, insert the citations + citation_dict: dict[int, int] = {} + cited_doc_nrs = _extract_citation_numbers(final_answer) + if search_docs: + for cited_doc_nr in cited_doc_nrs: + citation_dict[cited_doc_nr] = search_docs[cited_doc_nr - 1].id + + # Update the chat message and its parent message in database + update_db_session_with_messages( + db_session=db_session, + chat_message_id=message_id, + chat_session_id=chat_session_id, + is_agentic=research_type == ResearchType.DEEP, + message=final_answer, + citations=citation_dict, + research_type=research_type, + research_plan={}, + final_documents=search_docs, + update_parent_message=True, + research_answer_purpose=ResearchAnswerPurpose.ANSWER, + token_count=0, + ) + + # research_agent_iteration_step = ResearchAgentIteration( + # primary_question_id=message_id, + # reasoning="reason", + # purpose="purpose", + # iteration_nr=0, + # ) + # db_session.add(research_agent_iteration_step) + # research_agent_iteration_sub_step = ResearchAgentIterationSubStep( + # primary_question_id=message_id, + # iteration_nr=0, + # iteration_sub_step_nr=0, + # sub_step_instructions="question", + # sub_step_tool_id=18, + # sub_answer="answer", + # reasoning="reasoning", + # claims="claims", + # cited_doc_results=[], + # generated_images=None, + # additional_data=None, + # ) + # db_session.add(research_agent_iteration_sub_step) + + for iteration_preparation in ctx.iteration_instructions: + research_agent_iteration_step = ResearchAgentIteration( + primary_question_id=message_id, + reasoning=iteration_preparation.reasoning, + purpose=iteration_preparation.purpose, + iteration_nr=iteration_preparation.iteration_nr, + ) + db_session.add(research_agent_iteration_step) + + for iteration_answer in ctx.aggregated_context.global_iteration_responses: + + retrieved_search_docs = convert_inference_sections_to_search_docs( + list(iteration_answer.cited_documents.values()) + ) + + # Convert SavedSearchDoc objects to JSON-serializable format + serialized_search_docs = [doc.model_dump() for doc in retrieved_search_docs] + + research_agent_iteration_sub_step = ResearchAgentIterationSubStep( + primary_question_id=message_id, + iteration_nr=iteration_answer.iteration_nr, + iteration_sub_step_nr=iteration_answer.parallelization_nr, + sub_step_instructions=iteration_answer.question, + sub_step_tool_id=iteration_answer.tool_id, + sub_answer=iteration_answer.answer, + reasoning=iteration_answer.reasoning, + claims=iteration_answer.claims, + cited_doc_results=serialized_search_docs, + generated_images=( + GeneratedImageFullResult(images=iteration_answer.generated_images) + if iteration_answer.generated_images + else None + ), + additional_data=iteration_answer.additional_data, + ) + db_session.add(research_agent_iteration_sub_step) + + db_session.commit() + + +def _insert_chat_message_search_doc_pair( + message_id: int, search_doc_ids: list[int], db_session: Session +) -> None: + """ + Insert a pair of message_id and search_doc_id into the chat_message__search_doc table. + + Args: + message_id: The ID of the chat message + search_doc_id: The ID of the search document + db_session: The database session + """ + for search_doc_id in search_doc_ids: + chat_message_search_doc = ChatMessage__SearchDoc( + chat_message_id=message_id, search_doc_id=search_doc_id + ) + db_session.add(chat_message_search_doc) + + +def _extract_citation_numbers(text: str) -> list[int]: + """ + Extract all citation numbers from text in the format [[]] or [[, , ...]]. + Returns a list of all unique citation numbers found. + """ + # Pattern to match [[number]] or [[number1, number2, ...]] + pattern = r"\[\[(\d+(?:,\s*\d+)*)\]\]" + matches = re.findall(pattern, text) + + cited_numbers = [] + for match in matches: + # Split by comma and extract all numbers + numbers = [int(num.strip()) for num in match.split(",")] + cited_numbers.extend(numbers) + + return list(set(cited_numbers)) # Return unique numbers diff --git a/backend/onyx/chat/turn/models.py b/backend/onyx/chat/turn/models.py index ae42a40b826..a4ad364f189 100644 --- a/backend/onyx/chat/turn/models.py +++ b/backend/onyx/chat/turn/models.py @@ -1,15 +1,30 @@ from dataclasses import dataclass +from uuid import UUID +from sqlalchemy.orm import Session + +from onyx.agents.agent_search.dr.enums import ResearchType +from onyx.agents.agent_search.dr.models import AggregatedDRContext +from onyx.agents.agent_search.dr.models import IterationInstructions from onyx.chat.turn.infra.chat_turn_event_stream import Emitter from onyx.llm.interfaces import LLM from onyx.tools.tool_implementations.search.search_tool import SearchTool +@dataclass +class DependenciesToMaybeRemove: + chat_session_id: UUID + message_id: int + research_type: ResearchType + + @dataclass class RunDependencies: llm: LLM + db_session: Session emitter: Emitter | None = None search_tool: SearchTool | None = None + dependencies_to_maybe_remove: DependenciesToMaybeRemove | None = None @dataclass @@ -19,3 +34,6 @@ class MyContext: run_dependencies: RunDependencies | None = None needs_compaction: bool = False current_run_step: int = 0 + # TODO: Figure out a cleaner way to persist information. + aggregated_context: AggregatedDRContext | None = None + iteration_instructions: list[IterationInstructions] | None = None From 28b85cbc7270691ac6e1373aa398ba574bb6a866 Mon Sep 17 00:00:00 2001 From: Richard Guan Date: Fri, 26 Sep 2025 15:52:05 -0700 Subject: [PATCH 32/34] . --- backend/onyx/chat/turn/fast_chat_turn.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/backend/onyx/chat/turn/fast_chat_turn.py b/backend/onyx/chat/turn/fast_chat_turn.py index c9339fa644e..637c9d00a43 100644 --- a/backend/onyx/chat/turn/fast_chat_turn.py +++ b/backend/onyx/chat/turn/fast_chat_turn.py @@ -1,8 +1,10 @@ import re +from typing import cast from uuid import UUID from agents import Agent from agents import ModelSettings +from agents import RunItemStreamEvent from agents.extensions.models.litellm_model import LitellmModel from sqlalchemy.orm import Session @@ -58,19 +60,25 @@ def fast_chat_turn(messages: list[dict], dependencies: RunDependencies) -> None: ) bridge = OnyxRunner().run_streamed(agent, messages, context=ctx, max_turns=100) + final_answer = "filler final answer" for ev in bridge.events(): ctx.current_run_step obj = default_packet_translation(ev) + print(ev) + # TODO this obviously won't work for cancellation + if isinstance(ev, RunItemStreamEvent): + ev = cast(RunItemStreamEvent, ev) + if ev.name == "message_output_created": + final_answer = ev.item.raw_item.content[0].text if obj: dependencies.emitter.emit(Packet(ind=ctx.current_run_step, obj=obj)) - save_iteration( db_session=dependencies.db_session, message_id=dependencies.dependencies_to_maybe_remove.message_id, chat_session_id=dependencies.dependencies_to_maybe_remove.chat_session_id, research_type=dependencies.dependencies_to_maybe_remove.research_type, ctx=ctx, - final_answer="final_answer", # event should be emitted by agent for this luckily + final_answer=final_answer, all_cited_documents=[], ) # TODO: Error handling From 2c547c7c573215fab656c79e71a10cd0be2585b5 Mon Sep 17 00:00:00 2001 From: Richard Guan Date: Fri, 26 Sep 2025 16:10:27 -0700 Subject: [PATCH 33/34] . --- backend/onyx/chat/turn/fast_chat_turn.py | 22 ---------- .../onyx/tools/tool_implementations_v2/web.py | 44 ++++++++++++++++--- 2 files changed, 39 insertions(+), 27 deletions(-) diff --git a/backend/onyx/chat/turn/fast_chat_turn.py b/backend/onyx/chat/turn/fast_chat_turn.py index 637c9d00a43..bbd2c5a7687 100644 --- a/backend/onyx/chat/turn/fast_chat_turn.py +++ b/backend/onyx/chat/turn/fast_chat_turn.py @@ -143,28 +143,6 @@ def save_iteration( token_count=0, ) - # research_agent_iteration_step = ResearchAgentIteration( - # primary_question_id=message_id, - # reasoning="reason", - # purpose="purpose", - # iteration_nr=0, - # ) - # db_session.add(research_agent_iteration_step) - # research_agent_iteration_sub_step = ResearchAgentIterationSubStep( - # primary_question_id=message_id, - # iteration_nr=0, - # iteration_sub_step_nr=0, - # sub_step_instructions="question", - # sub_step_tool_id=18, - # sub_answer="answer", - # reasoning="reasoning", - # claims="claims", - # cited_doc_results=[], - # generated_images=None, - # additional_data=None, - # ) - # db_session.add(research_agent_iteration_sub_step) - for iteration_preparation in ctx.iteration_instructions: research_agent_iteration_step = ResearchAgentIteration( primary_question_id=message_id, diff --git a/backend/onyx/tools/tool_implementations_v2/web.py b/backend/onyx/tools/tool_implementations_v2/web.py index c6f3fc38040..4c9fc30c722 100644 --- a/backend/onyx/tools/tool_implementations_v2/web.py +++ b/backend/onyx/tools/tool_implementations_v2/web.py @@ -4,9 +4,14 @@ from agents import function_tool from agents import RunContextWrapper +from onyx.agents.agent_search.dr.models import IterationAnswer +from onyx.agents.agent_search.dr.models import IterationInstructions from onyx.agents.agent_search.dr.sub_agents.web_search.providers import ( get_default_provider, ) +from onyx.agents.agent_search.dr.sub_agents.web_search.utils import ( + dummy_inference_section_from_internet_search_result, +) from onyx.chat.turn.models import MyContext from onyx.configs.constants import DocumentSource from onyx.server.query_and_chat.streaming_models import Packet @@ -33,9 +38,12 @@ def web_search(run_context: RunContextWrapper[MyContext], query: str) -> str: query: The natural-language search query. """ search_provider = get_default_provider() + # TODO: Find better way to track index that isn't so implicit + # based on number of tool calls + index = run_context.context.current_run_step + 1 run_context.context.run_dependencies.emitter.emit( Packet( - ind=run_context.context.current_run_step + 1, + ind=index, obj=SearchToolStart( type="internal_search_tool_start", is_internet_search=True ), @@ -43,12 +51,20 @@ def web_search(run_context: RunContextWrapper[MyContext], query: str) -> str: ) run_context.context.run_dependencies.emitter.emit( Packet( - ind=run_context.context.current_run_step + 1, + ind=index, obj=SearchToolDelta( type="internal_search_tool_delta", queries=[query], documents=None ), ) ) + run_context.context.iteration_instructions.append( + IterationInstructions( + iteration_nr=index, + plan="plan", + purpose="Searching the web for information", + reasoning=f"I am now using Web Search to gather information on {query}", + ) + ) hits = search_provider.search(query) results = [] for i, r in enumerate(hits): @@ -90,7 +106,7 @@ def web_search(run_context: RunContextWrapper[MyContext], query: str) -> str: # TODO: Remove "Results" section from internet search tool run_context.context.run_dependencies.emitter.emit( Packet( - ind=run_context.context.current_run_step + 1, + ind=index, obj=SearchToolDelta( type="internal_search_tool_delta", queries=None, @@ -98,15 +114,33 @@ def web_search(run_context: RunContextWrapper[MyContext], query: str) -> str: ), ) ) + dummy_docs_inference_sections = [ + dummy_inference_section_from_internet_search_result(doc) for doc in hits + ] + run_context.context.aggregated_context.global_iteration_responses.append( + IterationAnswer( + tool="web_search", + tool_id=18, + iteration_nr=index, + parallelization_nr=0, + question=query, + reasoning=f"I am now using Web Search to gather information on {query}", + answer="Cool", + cited_documents={ + i: inference_section + for i, inference_section in enumerate(dummy_docs_inference_sections) + }, + ) + ) run_context.context.run_dependencies.emitter.emit( Packet( - ind=run_context.context.current_run_step + 1, + ind=index, obj=SectionEnd( type="section_end", ), ) ) - run_context.context.current_run_step += 2 + run_context.context.current_run_step = index + 1 return json.dumps({"results": results}) From 6103a562c20d92abf917b5fb6069c19185576dae Mon Sep 17 00:00:00 2001 From: Richard Guan Date: Fri, 26 Sep 2025 16:16:05 -0700 Subject: [PATCH 34/34] . --- .../internal_search.py | 43 +++++++++++++++---- 1 file changed, 35 insertions(+), 8 deletions(-) diff --git a/backend/onyx/tools/tool_implementations_v2/internal_search.py b/backend/onyx/tools/tool_implementations_v2/internal_search.py index 94e84911ab5..8fbce446e9e 100644 --- a/backend/onyx/tools/tool_implementations_v2/internal_search.py +++ b/backend/onyx/tools/tool_implementations_v2/internal_search.py @@ -3,6 +3,8 @@ from agents import function_tool from agents import RunContextWrapper +from onyx.agents.agent_search.dr.models import IterationAnswer +from onyx.agents.agent_search.dr.models import IterationInstructions from onyx.chat.turn.models import MyContext from onyx.db.engine.sql_engine import get_session_with_current_tenant from onyx.server.query_and_chat.streaming_models import Packet @@ -29,9 +31,14 @@ def internal_search(run_context: RunContextWrapper[MyContext], query: str) -> st Args: query: The natural-language search query. """ + search_tool = run_context.context.run_dependencies.search_tool + if search_tool is None: + raise RuntimeError("Search tool not available in context") + + index = run_context.context.current_run_step + 1 run_context.context.run_dependencies.emitter.emit( Packet( - ind=run_context.context.current_run_step + 1, + ind=index, obj=SearchToolStart( type="internal_search_tool_start", is_internet_search=False ), @@ -39,15 +46,20 @@ def internal_search(run_context: RunContextWrapper[MyContext], query: str) -> st ) run_context.context.run_dependencies.emitter.emit( Packet( - ind=run_context.context.current_run_step + 1, + ind=index, obj=SearchToolDelta( type="internal_search_tool_delta", queries=[query], documents=None ), ) ) - search_tool = run_context.context.run_dependencies.search_tool - if search_tool is None: - raise RuntimeError("Search tool not available in context") + run_context.context.iteration_instructions.append( + IterationInstructions( + iteration_nr=index, + plan="plan", + purpose="Searching internally for information", + reasoning=f"I am now using Internal Search to gather information on {query}", + ) + ) with get_session_with_current_tenant() as search_db_session: for tool_response in search_tool.run( @@ -65,7 +77,7 @@ def internal_search(run_context: RunContextWrapper[MyContext], query: str) -> st retrieved_docs = response.top_sections run_context.context.run_dependencies.emitter.emit( Packet( - ind=run_context.context.current_run_step + 1, + ind=index, obj=SearchToolDelta( type="internal_search_tool_delta", queries=None, @@ -95,14 +107,29 @@ def internal_search(run_context: RunContextWrapper[MyContext], query: str) -> st ), ) ) + run_context.context.aggregated_context.global_iteration_responses.append( + IterationAnswer( + tool="internal_search", + tool_id=1, + iteration_nr=index, + parallelization_nr=0, + question=query, + reasoning=f"I am now using Internal Search to gather information on {query}", + answer="Cool", + cited_documents={ + i: inference_section + for i, inference_section in enumerate(retrieved_docs) + }, + ) + ) break run_context.context.run_dependencies.emitter.emit( Packet( - ind=run_context.context.current_run_step + 1, + ind=index, obj=SectionEnd( type="section_end", ), ) ) - run_context.context.current_run_step += 2 + run_context.context.current_run_step = index + 1 return retrieved_docs