Skip to content
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion backend/onyx/context/search/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,9 @@
from onyx.context.search.models import SavedSearchDocWithContent
from onyx.context.search.models import SearchDoc
from onyx.db.models import SearchDoc as DBSearchDoc
from onyx.utils.logger import setup_logger

logger = setup_logger()


T = TypeVar(
Expand Down Expand Up @@ -154,5 +157,6 @@ def remove_stop_words_and_punctuation(keywords: list[str]) -> list[str]:
if (word.casefold() not in stop_words and word not in string.punctuation)
]
return text_trimmed or word_tokens
except Exception:
except Exception as e:
logger.warning(f"Error removing stop words and punctuation: {e}")
return keywords
50 changes: 50 additions & 0 deletions backend/tests/regression/search_quality/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
# Search Quality Test Script

This Python script evaluates the search results for a list of queries.

Unlike the script in answer_quality, this script is much less customizable and runs using currently ingested documents, though it allows for quick testing of search parameters on a bunch of test queries that don't have well-defined answers.

## Usage

1. Ensure you have the required dependencies installed.
2. Set up the PYTHONPATH permanently:
Add the following line to your shell configuration file (e.g., `~/.bashrc`, `~/.zshrc`, or `~/.bash_profile`):
```
export PYTHONPATH=$PYTHONPATH:/path/to/onyx/backend
```
Replace `/path/to/onyx` with the actual path to your Onyx repository.
After adding this line, restart your terminal or run `source ~/.bashrc` (or the appropriate config file) to apply the changes.
3. Navigate to Onyx repo, search_quality folder:

```
cd path/to/onyx/backend/tests/regression/search_quality
```

4. Copy `search_queries.json.template` to `search_queries.json` and add/remove test queries in it
5. Run `generate_search_queries.py` to generate the modified queries for the search pipeline

```
python generate_search_queries.py
```

6. Copy `search_eval_config.yaml.template` to `search_eval_config.yaml` and specify the search and eval parameters
7. Run `run_search_eval.py` to evaluate the search results against the reranked results

```
python run_search_eval.py
```

8. Repeat steps 6 and 7 to test and compare different search parameters

## Metrics
- Jaccard Similarity: the ratio between the intersect and the union between the topk search and rerank results. Higher is better
- Average Rank Change: The average absolute rank difference of the topk reranked chunks vs the entire search chunks. Lower is better
- Average Missing Chunk Ratio: The number of chunks in the topk reranked chunks not in the topk search chunks, over topk. Lower is better

Note that all of these metrics are affected by very narrow search results.
E.g., if topk is 20 but there is only 1 relevant document, the other 19 documents could be ordered arbitrarily, resulting in a lower score.


To address this limitation, there are score adjusted versions of the metrics.
The score adjusted version does not used a fixed topk, but computes the optimum topk based on the rerank scores.
This generally works in determining how many documents are relevant, although note that this apporach isn't perfect.
102 changes: 102 additions & 0 deletions backend/tests/regression/search_quality/generate_search_queries.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
import json

from langgraph.types import StreamWriter

from onyx.agents.agent_search.basic.utils import process_llm_stream
from onyx.chat.models import PromptConfig
from onyx.chat.prompt_builder.answer_prompt_builder import AnswerPromptBuilder
from onyx.chat.prompt_builder.answer_prompt_builder import default_build_system_message
from onyx.chat.prompt_builder.answer_prompt_builder import default_build_user_message
from onyx.configs.app_configs import POSTGRES_API_SERVER_POOL_OVERFLOW
from onyx.configs.app_configs import POSTGRES_API_SERVER_POOL_SIZE
from onyx.configs.constants import DEFAULT_PERSONA_ID
from onyx.db.engine import get_session_with_current_tenant
from onyx.db.engine import SqlEngine
from onyx.db.persona import get_persona_by_id
from onyx.llm.factory import get_llms_for_persona
from onyx.llm.interfaces import LLM
from onyx.tools.tool_implementations.search.search_tool import SearchTool
from onyx.utils.logger import setup_logger

logger = setup_logger(__name__)


def _load_queries() -> list[str]:
with open("search_queries.json", "r") as file:
return json.load(file)


def _modify_one_query(
query: str,
llm: LLM,
prompt_config: PromptConfig,
tool_definition: dict,
writer: StreamWriter = lambda _: None,
) -> str:
prompt_builder = AnswerPromptBuilder(
user_message=default_build_user_message(
user_query=query,
prompt_config=prompt_config,
files=[],
single_message_history=None,
),
system_message=default_build_system_message(prompt_config, llm.config),
message_history=[],
llm_config=llm.config,
raw_user_query=query,
raw_user_uploaded_files=[],
single_message_history=None,
)
prompt = prompt_builder.build()

stream = llm.stream(
prompt=prompt,
tools=[tool_definition],
tool_choice="required",
structured_response_format=None,
)
tool_message = process_llm_stream(
messages=stream,
should_stream_answer=False,
writer=writer,
)
return tool_message.tool_calls[0]["args"]["query"]


class SearchToolOverride(SearchTool):
def __init__(self):
# do nothing, the tool_definition function doesn't require variables to be initialized
pass


def generate_search_queries() -> None:
SqlEngine.init_engine(
pool_size=POSTGRES_API_SERVER_POOL_SIZE,
max_overflow=POSTGRES_API_SERVER_POOL_OVERFLOW,
)

queries = _load_queries()

with get_session_with_current_tenant() as db_session:
persona = get_persona_by_id(DEFAULT_PERSONA_ID, None, db_session)
llm, _ = get_llms_for_persona(persona)
prompt_config = PromptConfig.from_model(persona.prompts[0])
tool_definition = SearchToolOverride().tool_definition()

modified_queries = [
_modify_one_query(
query=query,
llm=llm,
prompt_config=prompt_config,
tool_definition=tool_definition,
)
for query in queries
]
with open("search_queries_modified.json", "w") as file:
json.dump(modified_queries, file, indent=4)

logger.info("Exported modified queries to search_queries_modified.json")


if __name__ == "__main__":
generate_search_queries()
Loading
Loading