1515from onyx .context .search .models import InferenceChunkUncleaned
1616from onyx .context .search .models import InferenceSection
1717from onyx .context .search .models import MAX_METRICS_CONTENT
18+ from onyx .context .search .models import RerankingDetails
1819from onyx .context .search .models import RerankMetricsContainer
1920from onyx .context .search .models import SearchQuery
2021from onyx .document_index .document_index_utils import (
@@ -77,7 +78,8 @@ def _remove_metadata_suffix(chunk: InferenceChunkUncleaned) -> str:
7778
7879@log_function_time (print_only = True )
7980def semantic_reranking (
80- query : SearchQuery ,
81+ query_str : str ,
82+ rerank_settings : RerankingDetails ,
8183 chunks : list [InferenceChunk ],
8284 model_min : int = CROSS_ENCODER_RANGE_MIN ,
8385 model_max : int = CROSS_ENCODER_RANGE_MAX ,
@@ -88,11 +90,9 @@ def semantic_reranking(
8890
8991 Note: this updates the chunks in place, it updates the chunk scores which came from retrieval
9092 """
91- rerank_settings = query .rerank_settings
92-
93- if not rerank_settings or not rerank_settings .rerank_model_name :
94- # Should never reach this part of the flow without reranking settings
95- raise RuntimeError ("Reranking flow should not be running" )
93+ assert (
94+ rerank_settings .rerank_model_name
95+ ), "Reranking flow cannot run without a specific model"
9696
9797 chunks_to_rerank = chunks [: rerank_settings .num_rerank ]
9898
@@ -107,7 +107,7 @@ def semantic_reranking(
107107 f"{ chunk .semantic_identifier or chunk .title or '' } \n { chunk .content } "
108108 for chunk in chunks_to_rerank
109109 ]
110- sim_scores_floats = cross_encoder .predict (query = query . query , passages = passages )
110+ sim_scores_floats = cross_encoder .predict (query = query_str , passages = passages )
111111
112112 # Old logic to handle multiple cross-encoders preserved but not used
113113 sim_scores = [numpy .array (sim_scores_floats )]
@@ -165,8 +165,20 @@ def semantic_reranking(
165165 return list (ranked_chunks ), list (ranked_indices )
166166
167167
168+ def should_rerank (rerank_settings : RerankingDetails | None ) -> bool :
169+ """Based on the RerankingDetails model, only run rerank if the following conditions are met:
170+ - rerank_model_name is not None
171+ - num_rerank is greater than 0
172+ """
173+ if not rerank_settings :
174+ return False
175+
176+ return bool (rerank_settings .rerank_model_name and rerank_settings .num_rerank > 0 )
177+
178+
168179def rerank_sections (
169- query : SearchQuery ,
180+ query_str : str ,
181+ rerank_settings : RerankingDetails ,
170182 sections_to_rerank : list [InferenceSection ],
171183 rerank_metrics_callback : Callable [[RerankMetricsContainer ], None ] | None = None ,
172184) -> list [InferenceSection ]:
@@ -181,16 +193,13 @@ def rerank_sections(
181193 """
182194 chunks_to_rerank = [section .center_chunk for section in sections_to_rerank ]
183195
184- if not query .rerank_settings :
185- # Should never reach this part of the flow without reranking settings
186- raise RuntimeError ("Reranking settings not found" )
187-
188196 ranked_chunks , _ = semantic_reranking (
189- query = query ,
197+ query_str = query_str ,
198+ rerank_settings = rerank_settings ,
190199 chunks = chunks_to_rerank ,
191200 rerank_metrics_callback = rerank_metrics_callback ,
192201 )
193- lower_chunks = chunks_to_rerank [query . rerank_settings .num_rerank :]
202+ lower_chunks = chunks_to_rerank [rerank_settings .num_rerank :]
194203
195204 # Scores from rerank cannot be meaningfully combined with scores without rerank
196205 # However the ordering is still important
@@ -260,16 +269,13 @@ def search_postprocessing(
260269
261270 rerank_task_id = None
262271 sections_yielded = False
263- if (
264- search_query .rerank_settings
265- and search_query .rerank_settings .rerank_model_name
266- and search_query .rerank_settings .num_rerank > 0
267- ):
272+ if should_rerank (search_query .rerank_settings ):
268273 post_processing_tasks .append (
269274 FunctionCall (
270275 rerank_sections ,
271276 (
272- search_query ,
277+ search_query .query ,
278+ search_query .rerank_settings , # Cannot be None here
273279 retrieved_sections ,
274280 rerank_metrics_callback ,
275281 ),
0 commit comments