Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 0 additions & 25 deletions backend/onyx/agents/agent_search/kb_search/graph_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -217,31 +217,6 @@ def stream_write_close_steps(writer: StreamWriter, level: int = 0) -> None:
write_custom_event("stream_finished", stop_event, writer)


def stream_write_close_main_answer(writer: StreamWriter, level: int = 0) -> None:
stop_event = StreamStopInfo(
stop_reason=StreamStopReason.FINISHED,
stream_type=StreamType.MAIN_ANSWER,
level=level,
level_question_num=0,
)
write_custom_event("stream_finished", stop_event, writer)


def stream_write_main_answer_token(
writer: StreamWriter, token: str, level: int = 0, level_question_num: int = 0
) -> None:
write_custom_event(
"initial_agent_answer",
AgentAnswerPiece(
answer_piece=token, # No need to add space as tokenizer handles this
level=level,
level_question_num=level_question_num,
answer_type="agent_level_answer",
),
writer,
)


def get_doc_information_for_entity(entity_id_name: str) -> KGEntityDocInfo:
"""
Get document information for an entity, including its semantic name and document details.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,33 +7,28 @@

from onyx.access.access import get_acl_for_user
from onyx.agents.agent_search.kb_search.graph_utils import rename_entities_in_answer
from onyx.agents.agent_search.kb_search.graph_utils import (
stream_write_close_main_answer,
)
from onyx.agents.agent_search.kb_search.graph_utils import stream_write_close_steps
from onyx.agents.agent_search.kb_search.graph_utils import (
stream_write_main_answer_token,
)
from onyx.agents.agent_search.kb_search.ops import research
from onyx.agents.agent_search.kb_search.states import MainOutput
from onyx.agents.agent_search.kb_search.states import MainState
from onyx.agents.agent_search.models import GraphConfig
from onyx.agents.agent_search.shared_graph_utils.calculations import (
get_answer_generation_documents,
)
from onyx.agents.agent_search.shared_graph_utils.llm import stream_llm_answer
from onyx.agents.agent_search.shared_graph_utils.utils import (
get_langgraph_node_log_string,
)
from onyx.agents.agent_search.shared_graph_utils.utils import relevance_from_docs
from onyx.agents.agent_search.shared_graph_utils.utils import write_custom_event
from onyx.chat.models import ExtendedToolResponse
from onyx.configs.kg_configs import KG_ANSWER_GENERATION_TIMEOUT
from onyx.configs.kg_configs import KG_MAX_TOKENS_ANSWER_GENERATION
from onyx.configs.kg_configs import KG_RESEARCH_NUM_RETRIEVED_DOCS
from onyx.configs.kg_configs import KG_TIMEOUT_CONNECT_LLM_INITIAL_ANSWER_GENERATION
from onyx.configs.kg_configs import KG_TIMEOUT_LLM_INITIAL_ANSWER_GENERATION
from onyx.context.search.enums import SearchType
from onyx.context.search.models import InferenceSection
from onyx.db.engine import get_session_with_current_tenant
from onyx.natural_language_processing.utils import BaseTokenizer
from onyx.natural_language_processing.utils import get_tokenizer
from onyx.prompts.kg_prompts import OUTPUT_FORMAT_NO_EXAMPLES_PROMPT
from onyx.prompts.kg_prompts import OUTPUT_FORMAT_NO_OVERALL_ANSWER_PROMPT
from onyx.tools.tool_implementations.search.search_tool import IndexFilters
Expand All @@ -45,17 +40,6 @@
logger = setup_logger()


def _stream_augmentations(
llm_tokenizer: BaseTokenizer, streaming_text: str, writer: StreamWriter
) -> None:

# Tokenize and stream the reference results
tokens = llm_tokenizer.tokenize(streaming_text)
for token in tokens:

stream_write_main_answer_token(writer, token)


def generate_answer(
state: MainState, config: RunnableConfig, writer: StreamWriter = lambda _: None
) -> MainOutput:
Expand Down Expand Up @@ -221,70 +205,24 @@ def generate_answer(
content=output_format_prompt,
)
]
fast_llm = graph_config.tooling.fast_llm

dispatch_timings: list[float] = []
response: list[str] = []

def stream_answer() -> list[str]:
# Get the LLM's tokenizer
llm_tokenizer = get_tokenizer(
model_name=fast_llm.config.model_name,
provider_type=fast_llm.config.model_provider,
)

for message in fast_llm.stream(
prompt=msg,
timeout_override=30,
max_tokens=1000,
):
# TODO: in principle, the answer here COULD contain images, but we don't support that yet
content = message.content
if not isinstance(content, str):
raise ValueError(
f"Expected content to be a string, but got {type(content)}"
)

# Tokenize the content using the LLM's tokenizer
tokens = llm_tokenizer.tokenize(content)
for token in tokens:
start_stream_token = datetime.now()
stream_write_main_answer_token(
writer, token, level=0, level_question_num=0
)
end_stream_token = datetime.now()
dispatch_timings.append(
(end_stream_token - start_stream_token).microseconds
)
response.append(token)
return response

try:
response = run_with_timeout(
KG_ANSWER_GENERATION_TIMEOUT,
stream_answer,
run_with_timeout(
KG_TIMEOUT_LLM_INITIAL_ANSWER_GENERATION,
lambda: stream_llm_answer(
llm=graph_config.tooling.fast_llm,
prompt=msg,
event_name="initial_agent_answer",
writer=writer,
agent_answer_level=0,
agent_answer_question_num=0,
agent_answer_type="agent_level_answer",
timeout_override=KG_TIMEOUT_CONNECT_LLM_INITIAL_ANSWER_GENERATION,
max_tokens=KG_MAX_TOKENS_ANSWER_GENERATION,
),
)

# llm_tokenizer = get_tokenizer(
# model_name=fast_llm.config.model_name,
# provider_type=fast_llm.config.model_provider,
# )

# TODO: the fake streaming should happen in friont-end. Revisit and then
# simply stream out here the full text in one.
# if reference_results_str:
# # Get the LLM's tokenizer

# _stream_augmentations(llm_tokenizer, reference_results_str, writer)

# if state.remarks:
# _stream_augmentations(llm_tokenizer, "Comments: \n " + "\n".join(state.remarks), writer)

except Exception as e:
raise ValueError(f"Could not generate the answer. Error {e}")

stream_write_close_main_answer(writer)

return MainOutput(
log_messages=[
get_langgraph_node_log_string(
Expand Down
12 changes: 10 additions & 2 deletions backend/onyx/configs/kg_configs.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,8 +42,16 @@
os.environ.get("KG_OBJECT_SOURCE_RESEARCH_TIMEOUT", "30")
)

KG_ANSWER_GENERATION_TIMEOUT: int = int(
os.environ.get("KG_ANSWER_GENERATION_TIMEOUT", "30")
KG_TIMEOUT_LLM_INITIAL_ANSWER_GENERATION: int = int(
os.environ.get("KG_TIMEOUT_LLM_INITIAL_ANSWER_GENERATION", "45")
)

KG_TIMEOUT_CONNECT_LLM_INITIAL_ANSWER_GENERATION: int = int(
os.environ.get("KG_TIMEOUT_CONNECT_LLM_INITIAL_ANSWER_GENERATION", "15")
)

KG_MAX_TOKENS_ANSWER_GENERATION: int = int(
os.environ.get("KG_MAX_TOKENS_ANSWER_GENERATION", "1024")
)

KG_MAX_DEEP_SEARCH_RESULTS: int = int(
Expand Down
Loading