From fca4ad0df107cfd7ed0f446d5536d78ab1289a63 Mon Sep 17 00:00:00 2001 From: joachim-danswer Date: Tue, 9 Sep 2025 09:31:13 -0700 Subject: [PATCH 01/10] timeout factor --- .../dr/nodes/dr_a0_clarification.py | 11 +++++---- .../dr/nodes/dr_a1_orchestrator.py | 23 ++++++++++--------- .../agent_search/dr/nodes/dr_a2_closer.py | 5 ++-- backend/onyx/configs/agent_configs.py | 2 ++ 4 files changed, 23 insertions(+), 18 deletions(-) diff --git a/backend/onyx/agents/agent_search/dr/nodes/dr_a0_clarification.py b/backend/onyx/agents/agent_search/dr/nodes/dr_a0_clarification.py index cfe6308352f..d9172c32ed0 100644 --- a/backend/onyx/agents/agent_search/dr/nodes/dr_a0_clarification.py +++ b/backend/onyx/agents/agent_search/dr/nodes/dr_a0_clarification.py @@ -35,6 +35,7 @@ from onyx.agents.agent_search.shared_graph_utils.utils import run_with_timeout from onyx.agents.agent_search.shared_graph_utils.utils import write_custom_event from onyx.agents.agent_search.utils import create_question_prompt +from onyx.configs.agent_configs import TF_DR_TIMEOUT_MULTIPLIER from onyx.configs.constants import DocumentSource from onyx.configs.constants import DocumentSourceDescription from onyx.configs.constants import TMP_DRALPHA_PERSONA_NAME @@ -488,7 +489,7 @@ def clarifier( ) answer_tokens, _, _ = run_with_timeout( - 80, + int(80 * TF_DR_TIMEOUT_MULTIPLIER), lambda: stream_llm_answer( llm=graph_config.tooling.primary_llm, prompt=create_question_prompt( @@ -501,7 +502,7 @@ def clarifier( agent_answer_level=0, agent_answer_question_num=0, agent_answer_type="agent_level_answer", - timeout_override=60, + timeout_override=int(60 * TF_DR_TIMEOUT_MULTIPLIER), ind=current_step_nr, context_docs=None, replace_citations=True, @@ -645,7 +646,7 @@ def clarifier( assistant_system_prompt, clarification_prompt ), schema=ClarificationGenerationResponse, - timeout_override=25, + timeout_override=int(25 * TF_DR_TIMEOUT_MULTIPLIER), # max_tokens=1500, ) except Exception as e: @@ -674,7 +675,7 @@ def clarifier( ) _, _, _ = run_with_timeout( - 80, + int(80 * TF_DR_TIMEOUT_MULTIPLIER), lambda: stream_llm_answer( llm=graph_config.tooling.primary_llm, prompt=repeat_prompt, @@ -683,7 +684,7 @@ def clarifier( agent_answer_level=0, agent_answer_question_num=0, agent_answer_type="agent_level_answer", - timeout_override=60, + timeout_override=int(60 * TF_DR_TIMEOUT_MULTIPLIER), answer_piece=StreamingType.MESSAGE_DELTA.value, ind=current_step_nr, # max_tokens=None, diff --git a/backend/onyx/agents/agent_search/dr/nodes/dr_a1_orchestrator.py b/backend/onyx/agents/agent_search/dr/nodes/dr_a1_orchestrator.py index a4de595ae14..018adcaed1f 100644 --- a/backend/onyx/agents/agent_search/dr/nodes/dr_a1_orchestrator.py +++ b/backend/onyx/agents/agent_search/dr/nodes/dr_a1_orchestrator.py @@ -30,6 +30,7 @@ from onyx.agents.agent_search.shared_graph_utils.utils import run_with_timeout from onyx.agents.agent_search.shared_graph_utils.utils import write_custom_event from onyx.agents.agent_search.utils import create_question_prompt +from onyx.configs.agent_configs import TF_DR_TIMEOUT_MULTIPLIER from onyx.kg.utils.extraction_utils import get_entity_types_str from onyx.kg.utils.extraction_utils import get_relationship_types_str from onyx.prompts.dr_prompts import DEFAULLT_DECISION_PROMPT @@ -200,7 +201,7 @@ def orchestrator( reasoning_tokens: list[str] = [""] reasoning_tokens, _, _ = run_with_timeout( - 80, + int(80 * TF_DR_TIMEOUT_MULTIPLIER), lambda: stream_llm_answer( llm=graph_config.tooling.primary_llm, prompt=create_question_prompt( @@ -211,7 +212,7 @@ def orchestrator( agent_answer_level=0, agent_answer_question_num=0, agent_answer_type="agent_level_answer", - timeout_override=60, + timeout_override=int(60 * TF_DR_TIMEOUT_MULTIPLIER), answer_piece=StreamingType.REASONING_DELTA.value, ind=current_step_nr, # max_tokens=None, @@ -297,7 +298,7 @@ def orchestrator( decision_prompt, ), schema=OrchestratorDecisonsNoPlan, - timeout_override=35, + timeout_override=int(35 * TF_DR_TIMEOUT_MULTIPLIER), # max_tokens=2500, ) next_step = orchestrator_action.next_step @@ -348,7 +349,7 @@ def orchestrator( plan_generation_prompt, ), schema=OrchestrationPlan, - timeout_override=25, + timeout_override=int(25 * TF_DR_TIMEOUT_MULTIPLIER), # max_tokens=3000, ) except Exception as e: @@ -368,7 +369,7 @@ def orchestrator( ) _, _, _ = run_with_timeout( - 80, + int(80 * TF_DR_TIMEOUT_MULTIPLIER), lambda: stream_llm_answer( llm=graph_config.tooling.primary_llm, prompt=repeat_plan_prompt, @@ -377,7 +378,7 @@ def orchestrator( agent_answer_level=0, agent_answer_question_num=0, agent_answer_type="agent_level_answer", - timeout_override=60, + timeout_override=int(60 * TF_DR_TIMEOUT_MULTIPLIER), answer_piece=StreamingType.REASONING_DELTA.value, ind=current_step_nr, ), @@ -426,7 +427,7 @@ def orchestrator( decision_prompt, ), schema=OrchestratorDecisonsNoPlan, - timeout_override=15, + timeout_override=int(15 * TF_DR_TIMEOUT_MULTIPLIER), # max_tokens=1500, ) next_step = orchestrator_action.next_step @@ -460,7 +461,7 @@ def orchestrator( ) _, _, _ = run_with_timeout( - 80, + int(80 * TF_DR_TIMEOUT_MULTIPLIER), lambda: stream_llm_answer( llm=graph_config.tooling.primary_llm, prompt=repeat_reasoning_prompt, @@ -469,7 +470,7 @@ def orchestrator( agent_answer_level=0, agent_answer_question_num=0, agent_answer_type="agent_level_answer", - timeout_override=60, + timeout_override=int(60 * TF_DR_TIMEOUT_MULTIPLIER), answer_piece=StreamingType.REASONING_DELTA.value, ind=current_step_nr, # max_tokens=None, @@ -508,7 +509,7 @@ def orchestrator( ) purpose_tokens, _, _ = run_with_timeout( - 80, + int(80 * TF_DR_TIMEOUT_MULTIPLIER), lambda: stream_llm_answer( llm=graph_config.tooling.primary_llm, prompt=create_question_prompt( @@ -520,7 +521,7 @@ def orchestrator( agent_answer_level=0, agent_answer_question_num=0, agent_answer_type="agent_level_answer", - timeout_override=60, + timeout_override=int(60 * TF_DR_TIMEOUT_MULTIPLIER), answer_piece=StreamingType.REASONING_DELTA.value, ind=current_step_nr, # max_tokens=None, diff --git a/backend/onyx/agents/agent_search/dr/nodes/dr_a2_closer.py b/backend/onyx/agents/agent_search/dr/nodes/dr_a2_closer.py index 96a7f8dc8e0..dc7e7ce96ec 100644 --- a/backend/onyx/agents/agent_search/dr/nodes/dr_a2_closer.py +++ b/backend/onyx/agents/agent_search/dr/nodes/dr_a2_closer.py @@ -33,6 +33,7 @@ from onyx.agents.agent_search.shared_graph_utils.utils import write_custom_event from onyx.agents.agent_search.utils import create_question_prompt from onyx.chat.chat_utils import llm_doc_from_inference_section +from onyx.configs.agent_configs import TF_DR_TIMEOUT_MULTIPLIER from onyx.context.search.models import InferenceSection from onyx.db.chat import create_search_doc_from_inference_section from onyx.db.chat import update_db_session_with_messages @@ -353,7 +354,7 @@ def closer( try: streamed_output, _, citation_infos = run_with_timeout( - 240, + int(240 * TF_DR_TIMEOUT_MULTIPLIER), lambda: stream_llm_answer( llm=graph_config.tooling.primary_llm, prompt=create_question_prompt( @@ -365,7 +366,7 @@ def closer( agent_answer_level=0, agent_answer_question_num=0, agent_answer_type="agent_level_answer", - timeout_override=60, + timeout_override=int(60 * TF_DR_TIMEOUT_MULTIPLIER), answer_piece=StreamingType.MESSAGE_DELTA.value, ind=current_step_nr, context_docs=all_context_llmdocs, diff --git a/backend/onyx/configs/agent_configs.py b/backend/onyx/configs/agent_configs.py index 002a4cf2623..6ac6a341cab 100644 --- a/backend/onyx/configs/agent_configs.py +++ b/backend/onyx/configs/agent_configs.py @@ -379,4 +379,6 @@ or AGENT_DEFAULT_MAX_TOKENS_HISTORY_SUMMARY ) +TF_DR_TIMEOUT_MULTIPLIER = float(os.environ.get("TF_DR_TIMEOUT_MULTIPLIER") or 1.2) + GRAPH_VERSION_NAME: str = "a" From 13ede6d6b6e1c47c1dd8cb0c5bd8471f81fa70b1 Mon Sep 17 00:00:00 2001 From: joachim-danswer Date: Tue, 9 Sep 2025 09:32:12 -0700 Subject: [PATCH 02/10] timeout default factor reset --- backend/onyx/configs/agent_configs.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backend/onyx/configs/agent_configs.py b/backend/onyx/configs/agent_configs.py index 6ac6a341cab..64cb377ba84 100644 --- a/backend/onyx/configs/agent_configs.py +++ b/backend/onyx/configs/agent_configs.py @@ -379,6 +379,6 @@ or AGENT_DEFAULT_MAX_TOKENS_HISTORY_SUMMARY ) -TF_DR_TIMEOUT_MULTIPLIER = float(os.environ.get("TF_DR_TIMEOUT_MULTIPLIER") or 1.2) +TF_DR_TIMEOUT_MULTIPLIER = float(os.environ.get("TF_DR_TIMEOUT_MULTIPLIER") or 1.0) GRAPH_VERSION_NAME: str = "a" From ed0f6b8edd25fbf163751ffff6799cf1fe486a77 Mon Sep 17 00:00:00 2001 From: joachim-danswer Date: Tue, 9 Sep 2025 11:47:59 -0700 Subject: [PATCH 03/10] Hidden FAST mode and prompt changes --- .../onyx/agents/agent_search/dr/constants.py | 1 + backend/onyx/agents/agent_search/dr/enums.py | 1 + .../dr/nodes/dr_a0_clarification.py | 2 +- .../dr/nodes/dr_a1_orchestrator.py | 106 +++++++++++------- backend/onyx/chat/answer.py | 13 ++- backend/onyx/configs/agent_configs.py | 4 + backend/onyx/prompts/dr_prompts.py | 18 ++- 7 files changed, 99 insertions(+), 46 deletions(-) diff --git a/backend/onyx/agents/agent_search/dr/constants.py b/backend/onyx/agents/agent_search/dr/constants.py index 02dec3efc9e..ed7c5e0692e 100644 --- a/backend/onyx/agents/agent_search/dr/constants.py +++ b/backend/onyx/agents/agent_search/dr/constants.py @@ -27,4 +27,5 @@ DR_TIME_BUDGET_BY_TYPE = { ResearchType.THOUGHTFUL: 3.0, ResearchType.DEEP: 12.0, + ResearchType.FAST: 0.5, } diff --git a/backend/onyx/agents/agent_search/dr/enums.py b/backend/onyx/agents/agent_search/dr/enums.py index 28b94092d9f..8bcaf17a626 100644 --- a/backend/onyx/agents/agent_search/dr/enums.py +++ b/backend/onyx/agents/agent_search/dr/enums.py @@ -8,6 +8,7 @@ class ResearchType(str, Enum): LEGACY_AGENTIC = "LEGACY_AGENTIC" # only used for legacy agentic search migrations THOUGHTFUL = "THOUGHTFUL" DEEP = "DEEP" + FAST = "FAST" class ResearchAnswerPurpose(str, Enum): diff --git a/backend/onyx/agents/agent_search/dr/nodes/dr_a0_clarification.py b/backend/onyx/agents/agent_search/dr/nodes/dr_a0_clarification.py index d9172c32ed0..2c77164da89 100644 --- a/backend/onyx/agents/agent_search/dr/nodes/dr_a0_clarification.py +++ b/backend/onyx/agents/agent_search/dr/nodes/dr_a0_clarification.py @@ -613,7 +613,7 @@ def clarifier( clarification = None - if research_type != ResearchType.THOUGHTFUL: + if research_type == ResearchType.DEEP: result = _get_existing_clarification_request(graph_config) if result is not None: clarification, original_question, chat_history_string = result diff --git a/backend/onyx/agents/agent_search/dr/nodes/dr_a1_orchestrator.py b/backend/onyx/agents/agent_search/dr/nodes/dr_a1_orchestrator.py index 018adcaed1f..3296ca59f14 100644 --- a/backend/onyx/agents/agent_search/dr/nodes/dr_a1_orchestrator.py +++ b/backend/onyx/agents/agent_search/dr/nodes/dr_a1_orchestrator.py @@ -171,11 +171,36 @@ def orchestrator( reasoning_result = "(No reasoning result provided yet.)" tool_calls_string = "(No tool calls provided yet.)" - if research_type == ResearchType.THOUGHTFUL: + if research_type in [ResearchType.THOUGHTFUL, ResearchType.FAST]: if iteration_nr == 1: - remaining_time_budget = DR_TIME_BUDGET_BY_TYPE[ResearchType.THOUGHTFUL] + remaining_time_budget = DR_TIME_BUDGET_BY_TYPE[research_type] - elif iteration_nr > 1: + elif remaining_time_budget <= 0: + return OrchestrationUpdate( + tools_used=[DRPath.CLOSER.value], + current_step_nr=current_step_nr, + query_list=[], + iteration_nr=iteration_nr, + log_messages=[ + get_langgraph_node_log_string( + graph_component="main", + node_name="orchestrator", + node_start_time=node_start_time, + ) + ], + plan_of_record=plan_of_record, + remaining_time_budget=remaining_time_budget, + iteration_instructions=[ + IterationInstructions( + iteration_nr=iteration_nr, + plan=None, + reasoning="Time to wrap up.", + purpose="", + ) + ], + ) + + elif iteration_nr > 1 and remaining_time_budget > 0: # for each iteration past the first one, we need to see whether we # have enough information to answer the question. # if we do, we can stop the iteration and return the answer. @@ -321,7 +346,7 @@ def orchestrator( reasoning_result = "Time to wrap up." next_tool_name = DRPath.CLOSER.value - else: + elif research_type == ResearchType.DEEP: if iteration_nr == 1 and not plan_of_record: # by default, we start a new iteration, but if there is a feedback request, # we start a new iteration 0 again (set a bit later) @@ -500,47 +525,52 @@ def orchestrator( purpose_tokens: list[str] = [""] - try: + if research_type in [ResearchType.THOUGHTFUL, ResearchType.DEEP]: - write_custom_event( - current_step_nr, - ReasoningStart(), - writer, - ) + try: - purpose_tokens, _, _ = run_with_timeout( - int(80 * TF_DR_TIMEOUT_MULTIPLIER), - lambda: stream_llm_answer( - llm=graph_config.tooling.primary_llm, - prompt=create_question_prompt( - decision_system_prompt, - orchestration_next_step_purpose_prompt, + write_custom_event( + current_step_nr, + ReasoningStart(), + writer, + ) + + purpose_tokens, _, _ = run_with_timeout( + int(80 * TF_DR_TIMEOUT_MULTIPLIER), + lambda: stream_llm_answer( + llm=graph_config.tooling.primary_llm, + prompt=create_question_prompt( + decision_system_prompt, + orchestration_next_step_purpose_prompt, + ), + event_name="basic_response", + writer=writer, + agent_answer_level=0, + agent_answer_question_num=0, + agent_answer_type="agent_level_answer", + timeout_override=int(60 * TF_DR_TIMEOUT_MULTIPLIER), + answer_piece=StreamingType.REASONING_DELTA.value, + ind=current_step_nr, + # max_tokens=None, ), - event_name="basic_response", - writer=writer, - agent_answer_level=0, - agent_answer_question_num=0, - agent_answer_type="agent_level_answer", - timeout_override=int(60 * TF_DR_TIMEOUT_MULTIPLIER), - answer_piece=StreamingType.REASONING_DELTA.value, - ind=current_step_nr, - # max_tokens=None, - ), - ) + ) - write_custom_event( - current_step_nr, - SectionEnd(), - writer, - ) + write_custom_event( + current_step_nr, + SectionEnd(), + writer, + ) - current_step_nr += 1 + current_step_nr += 1 + + except Exception as e: + logger.error(f"Error in orchestration next step purpose: {e}") + raise e - except Exception as e: - logger.error(f"Error in orchestration next step purpose: {e}") - raise e + purpose = cast(str, merge_content(*purpose_tokens)) - purpose = cast(str, merge_content(*purpose_tokens)) + elif research_type == ResearchType.FAST: + purpose = f"Answering the question using the {next_tool_name}" if not next_tool_name: raise ValueError("The next step has not been defined. This should not happen.") diff --git a/backend/onyx/chat/answer.py b/backend/onyx/chat/answer.py index 331253c47b8..3dba8fb63bd 100644 --- a/backend/onyx/chat/answer.py +++ b/backend/onyx/chat/answer.py @@ -19,6 +19,7 @@ from onyx.chat.prompt_builder.answer_prompt_builder import AnswerPromptBuilder from onyx.configs.agent_configs import AGENT_ALLOW_REFINEMENT from onyx.configs.agent_configs import INITIAL_SEARCH_DECOMPOSITION_ENABLED +from onyx.configs.agent_configs import TF_DR_DEFAULT_THOUGHTFUL from onyx.context.search.models import RerankingDetails from onyx.db.kg_config import get_kg_config_settings from onyx.db.models import Persona @@ -110,6 +111,14 @@ def __init__( chat_session_id=chat_session_id, message_id=current_agent_message_id, ) + + if use_agentic_search: + research_type = ResearchType.DEEP + elif TF_DR_DEFAULT_THOUGHTFUL: + research_type = ResearchType.THOUGHTFUL + else: + research_type = ResearchType.FAST + self.search_behavior_config = GraphSearchConfig( use_agentic_search=use_agentic_search, skip_gen_ai_answer_generation=skip_gen_ai_answer_generation, @@ -117,9 +126,7 @@ def __init__( allow_agent_reranking=allow_agent_reranking, perform_initial_search_decomposition=INITIAL_SEARCH_DECOMPOSITION_ENABLED, kg_config_settings=get_kg_config_settings(), - research_type=( - ResearchType.DEEP if use_agentic_search else ResearchType.THOUGHTFUL - ), + research_type=research_type, ) self.graph_config = GraphConfig( inputs=self.graph_inputs, diff --git a/backend/onyx/configs/agent_configs.py b/backend/onyx/configs/agent_configs.py index 64cb377ba84..be5d813e6e1 100644 --- a/backend/onyx/configs/agent_configs.py +++ b/backend/onyx/configs/agent_configs.py @@ -381,4 +381,8 @@ TF_DR_TIMEOUT_MULTIPLIER = float(os.environ.get("TF_DR_TIMEOUT_MULTIPLIER") or 1.0) +TF_DR_DEFAULT_THOUGHTFUL = not ( + (os.environ.get("TF_DR_DEFAULT_THOUGHTFUL") or "True").lower() == "false" +) + GRAPH_VERSION_NAME: str = "a" diff --git a/backend/onyx/prompts/dr_prompts.py b/backend/onyx/prompts/dr_prompts.py index 5aca8e559fd..9a3eb6ec4f5 100644 --- a/backend/onyx/prompts/dr_prompts.py +++ b/backend/onyx/prompts/dr_prompts.py @@ -485,10 +485,15 @@ BUILDS ON TOP of the learnings so far in order to get new targeted information that gets us to be able \ to answer the original question. -Please format your answer as a json dictionary in the following format: +Please format your answer as a json dictionary in the format below. +Note: + - in the "next_step" field below, please return a dictionary as described below. In \ +particular, make sure the keys are "tool" and "questions", and DO NOT refer to \ + tool_name" or something like that. Keys are "tool" and "questions". + {{ "reasoning": "", - "next_step": {{"tool": "<---tool_choice_options--->", + "next_step": {{"tool": " Date: Tue, 9 Sep 2025 11:57:03 -0700 Subject: [PATCH 04/10] default time-out adjustments + more factors --- .../dr/nodes/dr_a0_clarification.py | 10 +++++----- .../dr/nodes/dr_a1_orchestrator.py | 20 +++++++++---------- .../agent_search/dr/nodes/dr_a2_closer.py | 6 +++--- .../basic_search/dr_basic_search_2_act.py | 5 +++-- .../custom_tool/dr_custom_tool_2_act.py | 5 +++-- .../dr_generic_internal_tool_2_act.py | 5 +++-- .../sub_agents/web_search/dr_ws_2_search.py | 3 ++- .../web_search/dr_ws_6_summarize.py | 3 ++- 8 files changed, 31 insertions(+), 26 deletions(-) diff --git a/backend/onyx/agents/agent_search/dr/nodes/dr_a0_clarification.py b/backend/onyx/agents/agent_search/dr/nodes/dr_a0_clarification.py index 2c77164da89..9ce68de92a5 100644 --- a/backend/onyx/agents/agent_search/dr/nodes/dr_a0_clarification.py +++ b/backend/onyx/agents/agent_search/dr/nodes/dr_a0_clarification.py @@ -489,7 +489,7 @@ def clarifier( ) answer_tokens, _, _ = run_with_timeout( - int(80 * TF_DR_TIMEOUT_MULTIPLIER), + int(120 * TF_DR_TIMEOUT_MULTIPLIER), lambda: stream_llm_answer( llm=graph_config.tooling.primary_llm, prompt=create_question_prompt( @@ -502,7 +502,7 @@ def clarifier( agent_answer_level=0, agent_answer_question_num=0, agent_answer_type="agent_level_answer", - timeout_override=int(60 * TF_DR_TIMEOUT_MULTIPLIER), + timeout_override=int(90 * TF_DR_TIMEOUT_MULTIPLIER), ind=current_step_nr, context_docs=None, replace_citations=True, @@ -646,7 +646,7 @@ def clarifier( assistant_system_prompt, clarification_prompt ), schema=ClarificationGenerationResponse, - timeout_override=int(25 * TF_DR_TIMEOUT_MULTIPLIER), + timeout_override=int(50 * TF_DR_TIMEOUT_MULTIPLIER), # max_tokens=1500, ) except Exception as e: @@ -675,7 +675,7 @@ def clarifier( ) _, _, _ = run_with_timeout( - int(80 * TF_DR_TIMEOUT_MULTIPLIER), + int(120 * TF_DR_TIMEOUT_MULTIPLIER), lambda: stream_llm_answer( llm=graph_config.tooling.primary_llm, prompt=repeat_prompt, @@ -684,7 +684,7 @@ def clarifier( agent_answer_level=0, agent_answer_question_num=0, agent_answer_type="agent_level_answer", - timeout_override=int(60 * TF_DR_TIMEOUT_MULTIPLIER), + timeout_override=int(90 * TF_DR_TIMEOUT_MULTIPLIER), answer_piece=StreamingType.MESSAGE_DELTA.value, ind=current_step_nr, # max_tokens=None, diff --git a/backend/onyx/agents/agent_search/dr/nodes/dr_a1_orchestrator.py b/backend/onyx/agents/agent_search/dr/nodes/dr_a1_orchestrator.py index 3296ca59f14..faf3d610469 100644 --- a/backend/onyx/agents/agent_search/dr/nodes/dr_a1_orchestrator.py +++ b/backend/onyx/agents/agent_search/dr/nodes/dr_a1_orchestrator.py @@ -226,7 +226,7 @@ def orchestrator( reasoning_tokens: list[str] = [""] reasoning_tokens, _, _ = run_with_timeout( - int(80 * TF_DR_TIMEOUT_MULTIPLIER), + int(120 * TF_DR_TIMEOUT_MULTIPLIER), lambda: stream_llm_answer( llm=graph_config.tooling.primary_llm, prompt=create_question_prompt( @@ -237,7 +237,7 @@ def orchestrator( agent_answer_level=0, agent_answer_question_num=0, agent_answer_type="agent_level_answer", - timeout_override=int(60 * TF_DR_TIMEOUT_MULTIPLIER), + timeout_override=int(90 * TF_DR_TIMEOUT_MULTIPLIER), answer_piece=StreamingType.REASONING_DELTA.value, ind=current_step_nr, # max_tokens=None, @@ -323,7 +323,7 @@ def orchestrator( decision_prompt, ), schema=OrchestratorDecisonsNoPlan, - timeout_override=int(35 * TF_DR_TIMEOUT_MULTIPLIER), + timeout_override=int(50 * TF_DR_TIMEOUT_MULTIPLIER), # max_tokens=2500, ) next_step = orchestrator_action.next_step @@ -394,7 +394,7 @@ def orchestrator( ) _, _, _ = run_with_timeout( - int(80 * TF_DR_TIMEOUT_MULTIPLIER), + int(120 * TF_DR_TIMEOUT_MULTIPLIER), lambda: stream_llm_answer( llm=graph_config.tooling.primary_llm, prompt=repeat_plan_prompt, @@ -403,7 +403,7 @@ def orchestrator( agent_answer_level=0, agent_answer_question_num=0, agent_answer_type="agent_level_answer", - timeout_override=int(60 * TF_DR_TIMEOUT_MULTIPLIER), + timeout_override=int(90 * TF_DR_TIMEOUT_MULTIPLIER), answer_piece=StreamingType.REASONING_DELTA.value, ind=current_step_nr, ), @@ -452,7 +452,7 @@ def orchestrator( decision_prompt, ), schema=OrchestratorDecisonsNoPlan, - timeout_override=int(15 * TF_DR_TIMEOUT_MULTIPLIER), + timeout_override=int(60 * TF_DR_TIMEOUT_MULTIPLIER), # max_tokens=1500, ) next_step = orchestrator_action.next_step @@ -486,7 +486,7 @@ def orchestrator( ) _, _, _ = run_with_timeout( - int(80 * TF_DR_TIMEOUT_MULTIPLIER), + int(120 * TF_DR_TIMEOUT_MULTIPLIER), lambda: stream_llm_answer( llm=graph_config.tooling.primary_llm, prompt=repeat_reasoning_prompt, @@ -495,7 +495,7 @@ def orchestrator( agent_answer_level=0, agent_answer_question_num=0, agent_answer_type="agent_level_answer", - timeout_override=int(60 * TF_DR_TIMEOUT_MULTIPLIER), + timeout_override=int(90 * TF_DR_TIMEOUT_MULTIPLIER), answer_piece=StreamingType.REASONING_DELTA.value, ind=current_step_nr, # max_tokens=None, @@ -536,7 +536,7 @@ def orchestrator( ) purpose_tokens, _, _ = run_with_timeout( - int(80 * TF_DR_TIMEOUT_MULTIPLIER), + int(120 * TF_DR_TIMEOUT_MULTIPLIER), lambda: stream_llm_answer( llm=graph_config.tooling.primary_llm, prompt=create_question_prompt( @@ -548,7 +548,7 @@ def orchestrator( agent_answer_level=0, agent_answer_question_num=0, agent_answer_type="agent_level_answer", - timeout_override=int(60 * TF_DR_TIMEOUT_MULTIPLIER), + timeout_override=int(90 * TF_DR_TIMEOUT_MULTIPLIER), answer_piece=StreamingType.REASONING_DELTA.value, ind=current_step_nr, # max_tokens=None, diff --git a/backend/onyx/agents/agent_search/dr/nodes/dr_a2_closer.py b/backend/onyx/agents/agent_search/dr/nodes/dr_a2_closer.py index dc7e7ce96ec..90a42e957c4 100644 --- a/backend/onyx/agents/agent_search/dr/nodes/dr_a2_closer.py +++ b/backend/onyx/agents/agent_search/dr/nodes/dr_a2_closer.py @@ -277,7 +277,7 @@ def closer( test_info_complete_prompt + (assistant_task_prompt or ""), ), schema=TestInfoCompleteResponse, - timeout_override=40, + timeout_override=int(80 * TF_DR_TIMEOUT_MULTIPLIER), # max_tokens=1000, ) @@ -354,7 +354,7 @@ def closer( try: streamed_output, _, citation_infos = run_with_timeout( - int(240 * TF_DR_TIMEOUT_MULTIPLIER), + int(300 * TF_DR_TIMEOUT_MULTIPLIER), lambda: stream_llm_answer( llm=graph_config.tooling.primary_llm, prompt=create_question_prompt( @@ -366,7 +366,7 @@ def closer( agent_answer_level=0, agent_answer_question_num=0, agent_answer_type="agent_level_answer", - timeout_override=int(60 * TF_DR_TIMEOUT_MULTIPLIER), + timeout_override=int(90 * TF_DR_TIMEOUT_MULTIPLIER), answer_piece=StreamingType.MESSAGE_DELTA.value, ind=current_step_nr, context_docs=all_context_llmdocs, diff --git a/backend/onyx/agents/agent_search/dr/sub_agents/basic_search/dr_basic_search_2_act.py b/backend/onyx/agents/agent_search/dr/sub_agents/basic_search/dr_basic_search_2_act.py index e3d6e1d0b06..9e31399dd28 100644 --- a/backend/onyx/agents/agent_search/dr/sub_agents/basic_search/dr_basic_search_2_act.py +++ b/backend/onyx/agents/agent_search/dr/sub_agents/basic_search/dr_basic_search_2_act.py @@ -22,6 +22,7 @@ from onyx.agents.agent_search.shared_graph_utils.utils import write_custom_event from onyx.agents.agent_search.utils import create_question_prompt from onyx.chat.models import LlmDoc +from onyx.configs.agent_configs import TF_DR_TIMEOUT_MULTIPLIER from onyx.context.search.models import InferenceSection from onyx.db.connector import DocumentSource from onyx.db.engine.sql_engine import get_session_with_current_tenant @@ -94,7 +95,7 @@ def basic_search( assistant_system_prompt, base_search_processing_prompt ), schema=BaseSearchProcessingResponse, - timeout_override=15, + timeout_override=int(30 * TF_DR_TIMEOUT_MULTIPLIER), # max_tokens=100, ) except Exception as e: @@ -203,7 +204,7 @@ def basic_search( assistant_system_prompt, search_prompt + (assistant_task_prompt or "") ), schema=SearchAnswer, - timeout_override=40, + timeout_override=int(60 * TF_DR_TIMEOUT_MULTIPLIER), # max_tokens=1500, ) diff --git a/backend/onyx/agents/agent_search/dr/sub_agents/custom_tool/dr_custom_tool_2_act.py b/backend/onyx/agents/agent_search/dr/sub_agents/custom_tool/dr_custom_tool_2_act.py index afd58c1d9a9..3a23c4008e6 100644 --- a/backend/onyx/agents/agent_search/dr/sub_agents/custom_tool/dr_custom_tool_2_act.py +++ b/backend/onyx/agents/agent_search/dr/sub_agents/custom_tool/dr_custom_tool_2_act.py @@ -13,6 +13,7 @@ from onyx.agents.agent_search.shared_graph_utils.utils import ( get_langgraph_node_log_string, ) +from onyx.configs.agent_configs import TF_DR_TIMEOUT_MULTIPLIER from onyx.prompts.dr_prompts import CUSTOM_TOOL_PREP_PROMPT from onyx.prompts.dr_prompts import CUSTOM_TOOL_USE_PROMPT from onyx.tools.tool_implementations.custom.custom_tool import CUSTOM_TOOL_RESPONSE_ID @@ -68,7 +69,7 @@ def custom_tool_act( tool_use_prompt, tools=[custom_tool.tool_definition()], tool_choice="required", - timeout_override=40, + timeout_override=int(60 * TF_DR_TIMEOUT_MULTIPLIER), ) # make sure we got a tool call @@ -124,7 +125,7 @@ def custom_tool_act( ) answer_string = str( graph_config.tooling.primary_llm.invoke( - tool_summary_prompt, timeout_override=40 + tool_summary_prompt, timeout_override=int(60 * TF_DR_TIMEOUT_MULTIPLIER) ).content ).strip() diff --git a/backend/onyx/agents/agent_search/dr/sub_agents/generic_internal_tool/dr_generic_internal_tool_2_act.py b/backend/onyx/agents/agent_search/dr/sub_agents/generic_internal_tool/dr_generic_internal_tool_2_act.py index b9a86ff4536..14f59edd9de 100644 --- a/backend/onyx/agents/agent_search/dr/sub_agents/generic_internal_tool/dr_generic_internal_tool_2_act.py +++ b/backend/onyx/agents/agent_search/dr/sub_agents/generic_internal_tool/dr_generic_internal_tool_2_act.py @@ -13,6 +13,7 @@ from onyx.agents.agent_search.shared_graph_utils.utils import ( get_langgraph_node_log_string, ) +from onyx.configs.agent_configs import TF_DR_TIMEOUT_MULTIPLIER from onyx.prompts.dr_prompts import CUSTOM_TOOL_PREP_PROMPT from onyx.prompts.dr_prompts import CUSTOM_TOOL_USE_PROMPT from onyx.prompts.dr_prompts import OKTA_TOOL_USE_SPECIAL_PROMPT @@ -68,7 +69,7 @@ def generic_internal_tool_act( tool_use_prompt, tools=[generic_internal_tool.tool_definition()], tool_choice="required", - timeout_override=40, + timeout_override=int(60 * TF_DR_TIMEOUT_MULTIPLIER), ) # make sure we got a tool call @@ -113,7 +114,7 @@ def generic_internal_tool_act( ) answer_string = str( graph_config.tooling.primary_llm.invoke( - tool_summary_prompt, timeout_override=40 + tool_summary_prompt, timeout_override=int(60 * TF_DR_TIMEOUT_MULTIPLIER) ).content ).strip() diff --git a/backend/onyx/agents/agent_search/dr/sub_agents/web_search/dr_ws_2_search.py b/backend/onyx/agents/agent_search/dr/sub_agents/web_search/dr_ws_2_search.py index 2b6a73a2d8a..6dbe6cadbbb 100644 --- a/backend/onyx/agents/agent_search/dr/sub_agents/web_search/dr_ws_2_search.py +++ b/backend/onyx/agents/agent_search/dr/sub_agents/web_search/dr_ws_2_search.py @@ -25,6 +25,7 @@ ) from onyx.agents.agent_search.shared_graph_utils.utils import write_custom_event from onyx.agents.agent_search.utils import create_question_prompt +from onyx.configs.agent_configs import TF_DR_TIMEOUT_MULTIPLIER from onyx.prompts.dr_prompts import WEB_SEARCH_URL_SELECTION_PROMPT from onyx.server.query_and_chat.streaming_models import SearchToolDelta from onyx.utils.logger import setup_logger @@ -108,7 +109,7 @@ def _search(search_query: str) -> list[InternetSearchResult]: agent_decision_prompt + (assistant_task_prompt or ""), ), schema=WebSearchAnswer, - timeout_override=30, + timeout_override=int(50 * TF_DR_TIMEOUT_MULTIPLIER), ) results_to_open = [ (search_query, search_results[i]) diff --git a/backend/onyx/agents/agent_search/dr/sub_agents/web_search/dr_ws_6_summarize.py b/backend/onyx/agents/agent_search/dr/sub_agents/web_search/dr_ws_6_summarize.py index a953749c1b9..0927d91878c 100644 --- a/backend/onyx/agents/agent_search/dr/sub_agents/web_search/dr_ws_6_summarize.py +++ b/backend/onyx/agents/agent_search/dr/sub_agents/web_search/dr_ws_6_summarize.py @@ -17,6 +17,7 @@ get_langgraph_node_log_string, ) from onyx.agents.agent_search.utils import create_question_prompt +from onyx.configs.agent_configs import TF_DR_TIMEOUT_MULTIPLIER from onyx.context.search.models import InferenceSection from onyx.prompts.dr_prompts import INTERNAL_SEARCH_PROMPTS from onyx.utils.logger import setup_logger @@ -66,7 +67,7 @@ def is_summarize( assistant_system_prompt, search_prompt + (assistant_task_prompt or "") ), schema=SearchAnswer, - timeout_override=40, + timeout_override=int(60 * TF_DR_TIMEOUT_MULTIPLIER), ) answer_string = search_answer_json.answer claims = search_answer_json.claims or [] From edabc08eaf530540f59d39c876fea9dfc0b5d00e Mon Sep 17 00:00:00 2001 From: joachim-danswer Date: Tue, 9 Sep 2025 12:38:39 -0700 Subject: [PATCH 05/10] search fix --- .../dr/sub_agents/basic_search/dr_basic_search_2_act.py | 6 +++--- backend/onyx/prompts/dr_prompts.py | 9 --------- 2 files changed, 3 insertions(+), 12 deletions(-) diff --git a/backend/onyx/agents/agent_search/dr/sub_agents/basic_search/dr_basic_search_2_act.py b/backend/onyx/agents/agent_search/dr/sub_agents/basic_search/dr_basic_search_2_act.py index 9e31399dd28..22f79b544b1 100644 --- a/backend/onyx/agents/agent_search/dr/sub_agents/basic_search/dr_basic_search_2_act.py +++ b/backend/onyx/agents/agent_search/dr/sub_agents/basic_search/dr_basic_search_2_act.py @@ -225,9 +225,9 @@ def basic_search( claims, ) = extract_document_citations(answer_string, claims) - if (citation_numbers and max(citation_numbers) > len(retrieved_docs)) or min( - citation_numbers - ) < 1: + if citation_numbers and ( + (max(citation_numbers) > len(retrieved_docs)) or min(citation_numbers) < 1 + ): raise ValueError("Citation numbers are out of range for retrieved docs.") cited_documents = { diff --git a/backend/onyx/prompts/dr_prompts.py b/backend/onyx/prompts/dr_prompts.py index 9a3eb6ec4f5..f0d85427373 100644 --- a/backend/onyx/prompts/dr_prompts.py +++ b/backend/onyx/prompts/dr_prompts.py @@ -1480,15 +1480,6 @@ to them. """ - -""" -# We do not want to be too aggressive here because for example questions about other users is -# usually fine (i.e. 'what did my team work on last week?') with permissions handled within \ -# the system. But some inspection as best practice should be done. -# Also, a number of these things would not work anyway given db and other permissions, but it would be \ -# best practice to reject them so that they can also be captured/monitored. -# QUERY_EVALUATION_PROMPT = f""" - WEB_SEARCH_URL_SELECTION_PROMPT = PromptTemplate( f""" You are tasked with gathering information from the web with search query: From 22241d69098447dd4ca7ae98ad6a6cbbca485e69 Mon Sep 17 00:00:00 2001 From: joachim-danswer Date: Tue, 9 Sep 2025 14:21:17 -0700 Subject: [PATCH 06/10] cubic comments --- .../onyx/agents/agent_search/dr/nodes/dr_a1_orchestrator.py | 2 +- backend/onyx/configs/agent_configs.py | 4 +++- backend/onyx/prompts/dr_prompts.py | 2 +- 3 files changed, 5 insertions(+), 3 deletions(-) diff --git a/backend/onyx/agents/agent_search/dr/nodes/dr_a1_orchestrator.py b/backend/onyx/agents/agent_search/dr/nodes/dr_a1_orchestrator.py index faf3d610469..e1af943d1ff 100644 --- a/backend/onyx/agents/agent_search/dr/nodes/dr_a1_orchestrator.py +++ b/backend/onyx/agents/agent_search/dr/nodes/dr_a1_orchestrator.py @@ -564,7 +564,7 @@ def orchestrator( current_step_nr += 1 except Exception as e: - logger.error(f"Error in orchestration next step purpose: {e}") + logger.error("Error in orchestration next step purpose.") raise e purpose = cast(str, merge_content(*purpose_tokens)) diff --git a/backend/onyx/configs/agent_configs.py b/backend/onyx/configs/agent_configs.py index be5d813e6e1..a7f832db872 100644 --- a/backend/onyx/configs/agent_configs.py +++ b/backend/onyx/configs/agent_configs.py @@ -379,7 +379,9 @@ or AGENT_DEFAULT_MAX_TOKENS_HISTORY_SUMMARY ) -TF_DR_TIMEOUT_MULTIPLIER = float(os.environ.get("TF_DR_TIMEOUT_MULTIPLIER") or 1.0) +TF_DR_TIMEOUT_MULTIPLIER = max( + 1.0, float(os.environ.get("TF_DR_TIMEOUT_MULTIPLIER") or 1.0) +) TF_DR_DEFAULT_THOUGHTFUL = not ( (os.environ.get("TF_DR_DEFAULT_THOUGHTFUL") or "True").lower() == "false" diff --git a/backend/onyx/prompts/dr_prompts.py b/backend/onyx/prompts/dr_prompts.py index f0d85427373..c84ece83cea 100644 --- a/backend/onyx/prompts/dr_prompts.py +++ b/backend/onyx/prompts/dr_prompts.py @@ -685,7 +685,7 @@ {{ "reasoning": "", - "next_step": {{"tool": "