Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 4 additions & 31 deletions backend/onyx/context/search/retrieval/search_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -164,9 +164,7 @@ def doc_index_retrieval(
top_base_chunks_standard_ranking_thread: (
TimeoutThread[list[InferenceChunkUncleaned]] | None
) = None
top_base_chunks_keyword_ranking_thread: (
TimeoutThread[list[InferenceChunkUncleaned]] | None
) = None

top_semantic_chunks_thread: TimeoutThread[list[InferenceChunkUncleaned]] | None = (
None
)
Expand All @@ -190,20 +188,6 @@ def doc_index_retrieval(
query.offset,
)

# same query but with 1st vespa phase as keyword retrieval
top_base_chunks_keyword_ranking_thread = run_in_background(
document_index.hybrid_retrieval,
query.query,
query_embedding,
query.processed_keywords,
query.filters,
query.hybrid_alpha,
query.recency_bias_multiplier,
query.num_hits,
QueryExpansionType.KEYWORD,
query.offset,
)

if (
query.expanded_queries
and query.expanded_queries.keywords_expansions
Expand Down Expand Up @@ -264,21 +248,14 @@ def doc_index_retrieval(
top_base_chunks_standard_ranking = wait_on_background(
top_base_chunks_standard_ranking_thread
)
top_base_chunks_keyword_ranking = wait_on_background(
top_base_chunks_keyword_ranking_thread
)

top_keyword_chunks = wait_on_background(top_keyword_chunks_thread)

if query.search_type == SearchType.SEMANTIC:
assert top_semantic_chunks_thread is not None
top_semantic_chunks = wait_on_background(top_semantic_chunks_thread)

all_top_chunks = (
top_base_chunks_standard_ranking
+ top_base_chunks_keyword_ranking
+ top_keyword_chunks
)
all_top_chunks = top_base_chunks_standard_ranking + top_keyword_chunks

# use all three retrieval methods to retrieve top chunks

Expand All @@ -293,12 +270,8 @@ def doc_index_retrieval(
top_base_chunks_standard_ranking = wait_on_background(
top_base_chunks_standard_ranking_thread
)
top_base_chunks_keyword_ranking = wait_on_background(
top_base_chunks_keyword_ranking_thread
)
top_chunks = _dedupe_chunks(
top_base_chunks_standard_ranking + top_base_chunks_keyword_ranking
)

top_chunks = _dedupe_chunks(top_base_chunks_standard_ranking)

logger.info(f"Overall number of top initial retrieval chunks: {len(top_chunks)}")

Expand Down
Loading