Skip to content

Commit b5c2812

Browse files
prompt/flow improvements
1 parent f8c44fa commit b5c2812

File tree

5 files changed

+31
-14
lines changed

5 files changed

+31
-14
lines changed

backend/onyx/agents/agent_search/kb_search/nodes/a2_analyze.py

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -265,10 +265,7 @@ def analyze(
265265
Format: {output_format.value}, Broken down question: {broken_down_question}"
266266

267267
extraction_detected_relationships = len(query_graph_relationships) > 0
268-
if (
269-
extraction_detected_relationships
270-
or relationship_detection == KGRelationshipDetection.RELATIONSHIPS.value
271-
):
268+
if extraction_detected_relationships:
272269
query_type = KGRelationshipDetection.RELATIONSHIPS.value
273270

274271
if extraction_detected_relationships:

backend/onyx/agents/agent_search/kb_search/nodes/a3_generate_simple_sql.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -404,6 +404,11 @@ def generate_simple_sql(
404404
"entity_table", ent_temp_view
405405
)
406406

407+
if source_documents_sql and rel_temp_view:
408+
source_documents_sql = source_documents_sql.replace(
409+
"relationship_table", rel_temp_view
410+
)
411+
407412
logger.debug(f"A3 source_documents_sql: {source_documents_sql}")
408413

409414
scalar_result = None

backend/onyx/configs/kg_configs.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,10 +22,10 @@
2222
os.environ.get("KG_STRATEGY_GENERATION_TIMEOUT", "20")
2323
)
2424

25-
KG_SQL_GENERATION_TIMEOUT: int = int(os.environ.get("KG_SQL_GENERATION_TIMEOUT", "30"))
25+
KG_SQL_GENERATION_TIMEOUT: int = int(os.environ.get("KG_SQL_GENERATION_TIMEOUT", "40"))
2626

2727
KG_SQL_GENERATION_TIMEOUT_OVERRIDE: int = int(
28-
os.environ.get("KG_SQL_GENERATION_TIMEOUT_OVERRIDE", "25")
28+
os.environ.get("KG_SQL_GENERATION_TIMEOUT_OVERRIDE", "40")
2929
)
3030

3131
KG_SQL_GENERATION_MAX_TOKENS: int = int(

backend/onyx/prompts/kg_prompts.py

Lines changed: 21 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -41,8 +41,9 @@
4141
{{"entities": [<a list of entities of the prescribed entity types that you can reliably identify in the text, \
4242
formatted as '<ENTITY_TYPE_NAME>::<entity_name>' (please use that capitalization)>. Each entity \
4343
also should be followed by a list of comma-separated attribute filters for the entity, if referred to in the \
44-
question for that entity. Example: 'ACCOUNT::* -- [account_type: customer, status: active]' should the question be \
45-
'list all customer accounts', and ACCOUNT was an entity type with this attribute key/value allowed.] \
44+
question for that entity. CRITICAL: you can only use attributes that are mentioned above for the \
45+
entity type in question. Example: 'ACCOUNT::* -- [account_type: customer, status: active]' should the question be \
46+
'list all customer accounts', and ACCOUNT was an entity type with these attribute key/values allowed.] \
4647
"time_filter": <if needed, a SQL-like filter for a field called 'event_date'. Do not select anything here \
4748
unless you are sure that the question asks for that filter. Only apply a time_filter if the question explicitly \
4849
mentions a specific date, time period, or event that can be directly translated into a date filter. Do not assume \
@@ -267,12 +268,14 @@
267268
QUERY_ENTITY_EXTRACTION_PROMPT = f"""
268269
You are an expert in the area of knowledge extraction and using knowledge graphs. You are given a question \
269270
and asked to extract entities (with attributes if applicable) that you can reliably identify, which will then
270-
be matched with a known entity in the knowledge graph. You are also asked to extract time filters SHOULD \
271-
there be an explicit mention of a date or time frame in the QUESTION (note: last, first, etc.. DO NOT \
271+
be matched with a known entity in the knowledge graph. You are also asked to extract time constraints information \
272+
from the QUESTION. Some time constraints will be captured by entity attributes if \
273+
the entity type has a fitting attribute (example: 'created_at' could be a candidate for that), other times
274+
we will extracty an explicit time filter if no attribute fits. (Note regarding 'last', 'first', etc.: DO NOT \
272275
imply the need for a time filter just because the question asks for something that is not the current date. \
273-
They will relate to ordering that we will handle separately).
276+
They will relate to ordering that we will handle separately later).
274277
275-
Today is ---today_date--- and the user asking is ---user_name---, which may or may not be relevant.
278+
In case useful, today is ---today_date--- and the user asking is ---user_name---, which may or may not be relevant.
276279
Here are the entity types that are available for extraction. Some of them may have \
277280
a description, others should be obvious. Also, notice that some may have attributes associated with them, which will \
278281
be important later.
@@ -337,6 +340,13 @@
337340
actual attribute may be implied.
338341
- don't just look at the entities that are mentioned in the question but also those that the question \
339342
may be about.
343+
- be very careful that you only extract attributes that are listed above for the entity type in question! Do \
344+
not make up attributes even if they are implied! Particularly if there is a relationship type that would \
345+
would actually represent that information, you MUST not extract the information as an attribute. We \
346+
will extract the relationship type later.
347+
- For the values of attributes, look at the possible values above! For example 'open' may refer to \
348+
'backlog', 'todo', 'in progress', etc. In cases like that construct a ';'-separated list of values that you think may fit \
349+
what is implied in the question (in the exanple: 'open; backlog; todo; in progress').
340350
341351
Also, if you think the name or the title of an entity is given but name or title are not mentioned \
342352
explicitly as an attribute, then you should indeed extract the name/title as the entity name.
@@ -592,7 +602,11 @@
592602
1. SIMPLE: You think you can answer the question using a database that is aware of the entities, relationships \
593603
above, and is generally suitable if it is enough to either list or count entities, return dates, etc. Usually, \
594604
'SIMPLE' is chosen for questions of the form 'how many...' (always), or 'list the...' (often), 'when was...', \
595-
'what did (someone) work on...'etc.
605+
'what did (someone) work on...'etc. Often it is also used in cases like 'what did John work on since April?'. Here, \
606+
the user would expect to just see the list. So chose 'SIMPLE' here unless there are REALLY CLEAR \
607+
follow-up instructions for each item (like 'summarize...' , 'anaylyze...', 'what are tyhe main poinst of...'.) If \
608+
it is a 'what did...'-type question, chose 'SIMPLE'!
609+
596610
2. DEEP: You think you really should ALSO leverage the actual text of sources to answer the question, which sits \
597611
in a vector database. Examples are 'what is discussed in...', 'summarize', 'what is the discussion about...',\
598612
'how does... relate to...', 'are there any mentions of... in..', 'what are the main points in...', \

backend/onyx/tools/tool_implementations/search/search_tool.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import copy
12
import json
23
from collections.abc import Callable
34
from collections.abc import Generator
@@ -341,7 +342,7 @@ def run(
341342
# Overwrite time-cutoff should supercede existing time-cutoff, even if defined
342343
retrieval_options.filters.time_cutoff = time_cutoff
343344

344-
retrieval_options = retrieval_options or RetrievalDetails()
345+
retrieval_options = copy.deepcopy(retrieval_options) or RetrievalDetails()
345346
retrieval_options.filters = retrieval_options.filters or BaseFilters()
346347
if kg_entities:
347348
retrieval_options.filters.kg_entities = kg_entities

0 commit comments

Comments
 (0)