Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
92 commits
Select commit Hold shift + click to select a range
ffed8b4
orchestration base
joachim-danswer Jul 9, 2025
da3979f
is_agentic_overwrite
joachim-danswer Jul 9, 2025
4bd3b8b
nit
joachim-danswer Jul 10, 2025
d9a9818
nit
joachim-danswer Jul 10, 2025
b1488dd
update to KG Beta
joachim-danswer Jul 10, 2025
9c7638c
iteration prep
joachim-danswer Jul 11, 2025
d5c67b6
mypy + typing next_step and plan_of_records
Orbital-Web Jul 11, 2025
594bbdb
greptile + evan comments
Orbital-Web Jul 11, 2025
2ee98ba
plan of record fix
Orbital-Web Jul 11, 2025
72bbcab
improved DR
joachim-danswer Jul 13, 2025
bb1b129
improvements
joachim-danswer Jul 13, 2025
e8a593c
mypy + better typing
Orbital-Web Jul 14, 2025
bb95c46
feat: structured response
Orbital-Web Jul 14, 2025
ce1c801
final answer streaming
Orbital-Web Jul 14, 2025
b5ddf31
sligtly better planner prompt
Orbital-Web Jul 14, 2025
27cd1d4
feat: small prompt improvements
Orbital-Web Jul 14, 2025
2b69d1b
more minor prompt improvements
Orbital-Web Jul 14, 2025
0b26ed6
updates - KG search w/ citations
joachim-danswer Jul 15, 2025
83325f9
feat: previous chat context
Orbital-Web Jul 15, 2025
6359d2f
formatting
Orbital-Web Jul 15, 2025
241b8d0
adding final references
joachim-danswer Jul 15, 2025
05e5555
feat: citation improvements
Orbital-Web Jul 16, 2025
1b8d246
feat: preparation for parallel search
Orbital-Web Jul 16, 2025
31ae6f1
aggregate context improvements (no duplicates)
Orbital-Web Jul 16, 2025
89ea0f8
fix: wrong indentation
Orbital-Web Jul 16, 2025
0917d9a
nits
joachim-danswer Jul 16, 2025
6c4eb17
prompt improvements
joachim-danswer Jul 17, 2025
3d5586d
feat: make kg query part of state, rather than config
Orbital-Web Jul 18, 2025
4a63e63
rough - included clarification
joachim-danswer Jul 18, 2025
f9f64fb
cleaning up of isolating feedback generation
joachim-danswer Jul 19, 2025
6aca9ee
fix docstring + move shared vars to constants.py
Orbital-Web Jul 20, 2025
614672f
fix: chat history + question passed to closer
Orbital-Web Jul 20, 2025
fedc665
kg bugfix + fix sql on error + slightly improved dr user feedback prompt
Orbital-Web Jul 20, 2025
d3cc278
multi-search for Thoughtful
joachim-danswer Jul 21, 2025
dca39f2
nit
joachim-danswer Jul 21, 2025
c330152
nit
joachim-danswer Jul 21, 2025
de4a9e4
mypy + rename vars for clarity
Orbital-Web Jul 21, 2025
3a575a9
fix: incorrect citations
Orbital-Web Jul 21, 2025
48dc934
internet search
joachim-danswer Jul 21, 2025
c0435dd
parallelized internet search
joachim-danswer Jul 22, 2025
726211c
internet search improvements
joachim-danswer Jul 22, 2025
e972fb3
adding current time to prompts
joachim-danswer Jul 23, 2025
c253844
minor cleanups + mypy fix
Orbital-Web Jul 23, 2025
df1c40c
prompt spellings
joachim-danswer Jul 23, 2025
74418b8
consolidate user feedback
Orbital-Web Jul 24, 2025
991bd4f
separation of tools
joachim-danswer Jul 24, 2025
d48cbc2
custom tools
joachim-danswer Jul 25, 2025
8282318
fix: mypy
Orbital-Web Jul 25, 2025
a2dd1bb
cleanup
Orbital-Web Jul 25, 2025
8b09fb0
better clarification (still need prompt work) + prompt template fix
Orbital-Web Jul 27, 2025
7b3bdbd
fix: mypy
Orbital-Web Jul 27, 2025
d1dcad6
prompt improvements
Orbital-Web Jul 28, 2025
322e866
claim start
joachim-danswer Jul 28, 2025
3b8d16a
claim improvements
joachim-danswer Jul 29, 2025
3f4936a
cleanup
Orbital-Web Jul 29, 2025
abfecde
citation improvements with answer claim structure
Orbital-Web Jul 29, 2025
bf77da2
closer can suggest more research
joachim-danswer Jul 29, 2025
e4c2427
merging of new citation handling and sending back by Closer
joachim-danswer Jul 29, 2025
588023a
state updates for internal search
joachim-danswer Jul 29, 2025
a0d6d0b
cleanup
Orbital-Web Jul 30, 2025
1aad7f4
add back kg
Orbital-Web Jul 30, 2025
e5dbfc3
faster relationship sql generation
Orbital-Web Jul 30, 2025
6d3542d
fix error overwrite
Orbital-Web Jul 30, 2025
916d6cb
base search in DR refactoring
joachim-danswer Jul 30, 2025
1d7d2f0
time filter and source prediction
joachim-danswer Jul 31, 2025
c81a7e1
mypy fix
Orbital-Web Jul 31, 2025
994e7f7
active_source_description
joachim-danswer Aug 1, 2025
fabfa8d
query rejection step
joachim-danswer Aug 4, 2025
f83f062
reworked 'fast' search
joachim-danswer Aug 5, 2025
2b66144
nits
joachim-danswer Aug 6, 2025
b2fe55c
more DR updates
joachim-danswer Aug 7, 2025
c64c636
feat: kg tool proper implementation
Orbital-Web Aug 7, 2025
e66245e
properly merge inference section contents
Orbital-Web Aug 7, 2025
f13b08b
persistence
joachim-danswer Aug 7, 2025
397d30c
better prompt templating
Orbital-Web Aug 8, 2025
b0c95ec
fix: constants
Orbital-Web Aug 8, 2025
4ae5bb1
fix: iteration citation replacement
Orbital-Web Aug 8, 2025
16406f0
kg citations
Orbital-Web Aug 8, 2025
cd6577c
tool_id for custom tools
joachim-danswer Aug 8, 2025
c21fa21
initial decision using tool-calling if tool-calling LLM
joachim-danswer Aug 8, 2025
5f66a27
ResearchType vs DRTimeBudget
joachim-danswer Aug 8, 2025
62872e5
cleanup
Orbital-Web Aug 10, 2025
5ca8ca2
mypy and proper id implementation
Orbital-Web Aug 10, 2025
07768d5
feat: initial custom tool support prep
Orbital-Web Aug 10, 2025
b028b25
rename folder
Orbital-Web Aug 10, 2025
09d672f
more cleanup for tools
Orbital-Web Aug 10, 2025
7b37e72
almost working custom tools
Orbital-Web Aug 10, 2025
de82ad9
custom tools
Orbital-Web Aug 12, 2025
1d7ec49
correct db sessions
Orbital-Web Aug 12, 2025
516ae99
answer and claims
Orbital-Web Aug 12, 2025
73cecf8
addresssing a few todos
Orbital-Web Aug 12, 2025
ff0c78b
fix: aggregation + searched for xyz ui bugfixes
Orbital-Web Aug 12, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
"""add research agent database tables and chat message research fields

Revision ID: 5ae8240accb3
Revises: 62c3a055a141
Create Date: 2025-08-06 14:29:24.691388

"""

from alembic import op
import sqlalchemy as sa
from sqlalchemy.dialects import postgresql


# revision identifiers, used by Alembic.
revision = "5ae8240accb3"
down_revision = "62c3a055a141"
branch_labels = None
depends_on = None


def upgrade() -> None:
# Add research_type and research_plan columns to chat_message table
op.add_column(
"chat_message",
sa.Column("research_type", sa.String(), nullable=True),
)
op.add_column(
"chat_message",
sa.Column("research_plan", postgresql.JSONB(), nullable=True),
)

# Create research_agent_iteration table
op.create_table(
"research_agent_iteration",
sa.Column("id", sa.Integer(), autoincrement=True, nullable=False),
sa.Column(
"primary_question_id",
sa.Integer(),
sa.ForeignKey("chat_message.id", ondelete="CASCADE"),
nullable=False,
),
sa.Column("iteration_nr", sa.Integer(), nullable=False),
sa.Column("created_at", sa.DateTime(), nullable=False),
sa.Column("purpose", sa.String(), nullable=True),
sa.Column("reasoning", sa.String(), nullable=True),
sa.PrimaryKeyConstraint("id"),
)

# Create research_agent_iteration_sub_step table
op.create_table(
"research_agent_iteration_sub_step",
sa.Column("id", sa.Integer(), autoincrement=True, nullable=False),
sa.Column(
"primary_question_id",
sa.Integer(),
sa.ForeignKey("chat_message.id", ondelete="CASCADE"),
nullable=False,
),
sa.Column(
"parent_question_id",
sa.Integer(),
sa.ForeignKey("research_agent_iteration_sub_step.id", ondelete="CASCADE"),
nullable=True,
),
sa.Column("iteration_nr", sa.Integer(), nullable=False),
sa.Column("iteration_sub_step_nr", sa.Integer(), nullable=False),
sa.Column("created_at", sa.DateTime(), nullable=False),
sa.Column("sub_step_instructions", sa.String(), nullable=True),
sa.Column(
"sub_step_tool_id",
sa.Integer(),
sa.ForeignKey("tool.id"),
nullable=True,
),
sa.Column("reasoning", sa.String(), nullable=True),
sa.Column("sub_answer", sa.String(), nullable=True),
sa.Column("cited_doc_results", postgresql.JSONB(), nullable=True),
sa.Column("claims", postgresql.JSONB(), nullable=True),
sa.Column("additional_data", postgresql.JSONB(), nullable=True),
sa.PrimaryKeyConstraint("id"),
)


def downgrade() -> None:
# Drop tables in reverse order
op.drop_table("research_agent_iteration_sub_step")
op.drop_table("research_agent_iteration")

# Remove columns from chat_message table
op.drop_column("chat_message", "research_plan")
op.drop_column("chat_message", "research_type")
12 changes: 12 additions & 0 deletions backend/onyx/agents/agent_search/basic/models.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
from langchain_core.messages import AIMessageChunk
from pydantic import BaseModel

from onyx.chat.models import LlmDoc
from onyx.context.search.models import InferenceSection


class BasicSearchProcessedStreamResults(BaseModel):
ai_message_chunk: AIMessageChunk = AIMessageChunk(content="")
full_answer: str | None = None
cited_references: list[InferenceSection] = []
retrieved_documents: list[LlmDoc] = []
Comment on lines +8 to +12
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

style: Add docstring to explain the purpose of this class and describe each field's usage

Comment on lines +11 to +12
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

style: Consider adding Config class with frozen=True since these appear to be immutable result objects

7 changes: 7 additions & 0 deletions backend/onyx/agents/agent_search/basic/states.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,9 @@
from onyx.agents.agent_search.orchestration.states import ToolCallUpdate
from onyx.agents.agent_search.orchestration.states import ToolChoiceInput
from onyx.agents.agent_search.orchestration.states import ToolChoiceUpdate
from onyx.chat.models import LlmDoc
from onyx.context.search.models import InferenceSection


# States contain values that change over the course of graph execution,
# Config is for values that are set at the start and never change.
Expand All @@ -18,11 +21,15 @@ class BasicInput(BaseModel):
# Langgraph needs a nonempty input, but we pass in all static
# data through a RunnableConfig.
unused: bool = True
query_override: str | None = None


## Graph Output State
class BasicOutput(TypedDict):
tool_call_chunk: AIMessageChunk
full_answer: str | None
cited_references: list[InferenceSection] | None
retrieved_documents: list[LlmDoc] | None


## Graph State
Expand Down
7 changes: 5 additions & 2 deletions backend/onyx/agents/agent_search/basic/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from langchain_core.messages import BaseMessage
from langgraph.types import StreamWriter

from onyx.agents.agent_search.basic.models import BasicSearchProcessedStreamResults
from onyx.agents.agent_search.shared_graph_utils.utils import write_custom_event
from onyx.chat.models import LlmDoc
from onyx.chat.stream_processing.answer_response_handler import AnswerResponseHandler
Expand All @@ -24,7 +25,7 @@ def process_llm_stream(
writer: StreamWriter,
final_search_results: list[LlmDoc] | None = None,
displayed_search_results: list[LlmDoc] | None = None,
) -> AIMessageChunk:
) -> BasicSearchProcessedStreamResults:
tool_call_chunk = AIMessageChunk(content="")

if final_search_results and displayed_search_results:
Expand Down Expand Up @@ -61,4 +62,6 @@ def process_llm_stream(
)

logger.debug(f"Full answer: {full_answer}")
return cast(AIMessageChunk, tool_call_chunk)
return BasicSearchProcessedStreamResults(
ai_message_chunk=cast(AIMessageChunk, tool_call_chunk), full_answer=full_answer
)
49 changes: 49 additions & 0 deletions backend/onyx/agents/agent_search/dr/conditional_edges.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
from collections.abc import Hashable

from langgraph.graph import END
from langgraph.types import Send

from onyx.agents.agent_search.dr.enums import DRPath
from onyx.agents.agent_search.dr.states import MainState


def decision_router(state: MainState) -> list[Send | Hashable] | DRPath | str:
if not state.tools_used:
raise IndexError("state.tools_used cannot be empty")

# next_tool is either a generic tool name or a DRPath string
next_tool = state.tools_used[-1]
try:
next_path = DRPath(next_tool)
except ValueError:
next_path = DRPath.GENERIC_TOOL

# handle END
if next_path == DRPath.END:
return END

# handle invalid paths
if next_path == DRPath.CLARIFIER:
raise ValueError("CLARIFIER is not a valid path during iteration")

# handle tool calls without a query
if (
next_path
in (DRPath.INTERNAL_SEARCH, DRPath.INTERNET_SEARCH, DRPath.KNOWLEDGE_GRAPH)
and len(state.query_list) == 0
):
return DRPath.CLOSER

return next_path


def completeness_router(state: MainState) -> DRPath | str:
if not state.tools_used:
raise IndexError("tools_used cannot be empty")

# go to closer if path is CLOSER or no queries
next_path = state.tools_used[-1]

if next_path == DRPath.ORCHESTRATOR.value:
return DRPath.ORCHESTRATOR
return END
29 changes: 29 additions & 0 deletions backend/onyx/agents/agent_search/dr/constants.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
from onyx.agents.agent_search.dr.enums import DRPath
from onyx.agents.agent_search.dr.enums import ResearchType

MAX_CHAT_HISTORY_MESSAGES = (
3 # note: actual count is x2 to account for user and assistant messages
)

MAX_DR_PARALLEL_SEARCH = 4

# TODO: test more, generally not needed/adds unnecessary iterations
MAX_NUM_CLOSER_SUGGESTIONS = (
0 # how many times the closer can send back to the orchestrator
)

CLARIFICATION_REQUEST_PREFIX = "PLEASE CLARIFY:"
HIGH_LEVEL_PLAN_PREFIX = "HIGH_LEVEL PLAN:"

AVERAGE_TOOL_COSTS: dict[DRPath, float] = {
DRPath.INTERNAL_SEARCH: 1.0,
DRPath.KNOWLEDGE_GRAPH: 2.0,
DRPath.INTERNET_SEARCH: 1.5,
DRPath.GENERIC_TOOL: 1.5, # TODO: see todo in OrchestratorTool
DRPath.CLOSER: 0.0,
}

DR_TIME_BUDGET_BY_TYPE = {
ResearchType.THOUGHTFUL: 3.0,
ResearchType.DEEP: 6.0,
}
114 changes: 114 additions & 0 deletions backend/onyx/agents/agent_search/dr/dr_prompt_builder.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,114 @@
from datetime import datetime

from onyx.agents.agent_search.dr.enums import DRPath
from onyx.agents.agent_search.dr.enums import ResearchType
from onyx.agents.agent_search.dr.models import DRPromptPurpose
from onyx.agents.agent_search.dr.models import OrchestratorTool
from onyx.prompts.dr_prompts import GET_CLARIFICATION_PROMPT
from onyx.prompts.dr_prompts import KG_TYPES_DESCRIPTIONS
from onyx.prompts.dr_prompts import ORCHESTRATOR_DEEP_INITIAL_PLAN_PROMPT
from onyx.prompts.dr_prompts import ORCHESTRATOR_DEEP_ITERATIVE_DECISION_PROMPT
from onyx.prompts.dr_prompts import ORCHESTRATOR_FAST_ITERATIVE_DECISION_PROMPT
from onyx.prompts.dr_prompts import ORCHESTRATOR_FAST_ITERATIVE_REASONING_PROMPT
from onyx.prompts.dr_prompts import ORCHESTRATOR_NEXT_STEP_PURPOSE_PROMPT
from onyx.prompts.dr_prompts import TOOL_DIFFERENTIATION_HINTS
from onyx.prompts.dr_prompts import TOOL_QUESTION_HINTS
from onyx.prompts.prompt_template import PromptTemplate


def get_dr_prompt_orchestration_templates(
purpose: DRPromptPurpose,
research_type: ResearchType,
available_tools: dict[str, OrchestratorTool],
entity_types_string: str | None = None,
relationship_types_string: str | None = None,
reasoning_result: str | None = None,
tool_calls_string: str | None = None,
) -> PromptTemplate:
available_tools = available_tools or {}
tool_names = list(available_tools.keys())
tool_description_str = "\n\n".join(
f"- {tool_name}: {tool.description}"
for tool_name, tool in available_tools.items()
)
tool_cost_str = "\n".join(
f"{tool_name}: {tool.cost}" for tool_name, tool in available_tools.items()
)

tool_differentiations: list[str] = []
for tool_1 in available_tools:
for tool_2 in available_tools:
if (tool_1, tool_2) in TOOL_DIFFERENTIATION_HINTS:
tool_differentiations.append(
TOOL_DIFFERENTIATION_HINTS[(tool_1, tool_2)]
)
tool_differentiation_hint_string = (
"\n".join(tool_differentiations) or "(No differentiating hints available)"
)
# TODO: add tool deliniation pairs for custom tools as well

tool_question_hint_string = (
"\n".join(
"- " + TOOL_QUESTION_HINTS[tool]
for tool in available_tools
if tool in TOOL_QUESTION_HINTS
)
or "(No examples available)"
)

if DRPath.KNOWLEDGE_GRAPH.value in available_tools:
if not entity_types_string or not relationship_types_string:
raise ValueError(
"Entity types and relationship types must be provided if the Knowledge Graph is used."
)
kg_types_descriptions = KG_TYPES_DESCRIPTIONS.build(
possible_entities=entity_types_string,
possible_relationships=relationship_types_string,
)
else:
kg_types_descriptions = "(The Knowledge Graph is not used.)"

if purpose == DRPromptPurpose.PLAN:
if research_type == ResearchType.THOUGHTFUL:
raise ValueError("plan generation is not supported for FAST time budget")
base_template = ORCHESTRATOR_DEEP_INITIAL_PLAN_PROMPT

elif purpose == DRPromptPurpose.NEXT_STEP_REASONING:
if research_type == ResearchType.THOUGHTFUL:
base_template = ORCHESTRATOR_FAST_ITERATIVE_REASONING_PROMPT
else:
raise ValueError(
"reasoning is not separately required for DEEP time budget"
)

elif purpose == DRPromptPurpose.NEXT_STEP_PURPOSE:
base_template = ORCHESTRATOR_NEXT_STEP_PURPOSE_PROMPT

elif purpose == DRPromptPurpose.NEXT_STEP:
if research_type == ResearchType.THOUGHTFUL:
base_template = ORCHESTRATOR_FAST_ITERATIVE_DECISION_PROMPT
else:
base_template = ORCHESTRATOR_DEEP_ITERATIVE_DECISION_PROMPT

elif purpose == DRPromptPurpose.CLARIFICATION:
if research_type == ResearchType.THOUGHTFUL:
raise ValueError("clarification is not supported for FAST time budget")
base_template = GET_CLARIFICATION_PROMPT

else:
# for mypy, clearly a mypy bug
raise ValueError(f"Invalid purpose: {purpose}")

return base_template.partial_build(
num_available_tools=str(len(tool_names)),
available_tools=", ".join(tool_names),
tool_choice_options=" or ".join(tool_names),
current_time=datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
kg_types_descriptions=kg_types_descriptions,
tool_descriptions=tool_description_str,
tool_differentiation_hints=tool_differentiation_hint_string,
tool_question_hints=tool_question_hint_string,
average_tool_costs=tool_cost_str,
reasoning_result=reasoning_result or "(No reasoning result provided.)",
tool_calls_string=tool_calls_string or "(No tool calls provided.)",
)
20 changes: 20 additions & 0 deletions backend/onyx/agents/agent_search/dr/enums.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
from enum import Enum


class ResearchType(str, Enum):
"""Research type options for agent search operations"""

# BASIC = "BASIC"
THOUGHTFUL = "THOUGHTFUL"
DEEP = "DEEP"


class DRPath(str, Enum):
CLARIFIER = "CLARIFIER"
ORCHESTRATOR = "ORCHESTRATOR"
INTERNAL_SEARCH = "INTERNAL_SEARCH"
GENERIC_TOOL = "GENERIC_TOOL"
KNOWLEDGE_GRAPH = "KNOWLEDGE_GRAPH"
INTERNET_SEARCH = "INTERNET_SEARCH"
CLOSER = "CLOSER"
END = "END"
Loading
Loading