diff --git a/evaluators/langevals/langevals_langevals/exact_match.py b/evaluators/langevals/langevals_langevals/exact_match.py new file mode 100644 index 0000000..4cc7862 --- /dev/null +++ b/evaluators/langevals/langevals_langevals/exact_match.py @@ -0,0 +1,71 @@ +from typing import Optional +from langevals_core.base_evaluator import ( + BaseEvaluator, + EvaluatorEntry, + EvaluationResult, + EvaluatorSettings, + SingleEvaluationResult, +) +from pydantic import Field + + +class ExactMatchSettings(EvaluatorSettings): + case_sensitive: bool = Field( + default=False, + description="True if the comparison should be case-sensitive, False otherwise", + ) + trim_whitespace: bool = Field( + default=True, + description="True if the comparison should trim whitespace, False otherwise", + ) + remove_punctuation: bool = Field( + default=True, + description="True if the comparison should remove punctuation, False otherwise", + ) + + + +class ExactMatchResult(EvaluationResult): + passed: Optional[bool] = Field( + default=True, + description="True if the output matched the input exactly, False otherwise", + ) + + +class ExactMatchEntry(EvaluatorEntry): + input: Optional[str] = None + output: Optional[str] = None + + +class ExactMatchEvaluator( + BaseEvaluator[ExactMatchEntry, ExactMatchSettings, ExactMatchResult] +): + """ + A simple evaluator that checks if the output matches the input exactly, with some + extra bells and whistles to help with whitespace related shenanigans. + """ + + name = "Exact Match Evaluator" + category = "quality" + default_settings = ExactMatchSettings() + is_guardrail = False + + def evaluate(self, entry: ExactMatchEntry) -> SingleEvaluationResult: + input_text = entry.input or "" + output_text = entry.output or "" + + if self.settings.trim_whitespace: + input_text = input_text.strip() + output_text = output_text.strip() + + if self.settings.remove_punctuation: + input_text = ''.join(char for char in input_text if char.isalnum() or char.isspace()) + output_text = ''.join(char for char in output_text if char.isalnum() or char.isspace()) + + if not self.settings.case_sensitive: + input_text = input_text.lower() + output_text = output_text.lower() + + passed = input_text == output_text + + return ExactMatchResult(passed=passed) diff --git a/evaluators/langevals/tests/test_exact_match.py b/evaluators/langevals/tests/test_exact_match.py new file mode 100644 index 0000000..b207608 --- /dev/null +++ b/evaluators/langevals/tests/test_exact_match.py @@ -0,0 +1,126 @@ +from langevals_langevals.exact_match import ( + ExactMatchEvaluator, + ExactMatchEntry, + ExactMatchSettings, +) + + +def test_langeval_exact_match_evaluator(): + entry = ExactMatchEntry( + input="What is the capital of France?", + output="What is the capital of France?", + ) + settings = ExactMatchSettings() + + evaluator = ExactMatchEvaluator(settings=settings) + result = evaluator.evaluate(entry) + + assert result.passed == True + + +def test_langeval_exact_match_evaluator_defaults(): + entry = ExactMatchEntry( + input="What is the capital of France?", + output="What is the capital of the Netherlands?", + ) + settings = ExactMatchSettings() + + evaluator = ExactMatchEvaluator(settings=settings) + result = evaluator.evaluate(entry) + + assert result.passed == False + + +def test_langeval_exact_match_case_sensitive_true(): + entry = ExactMatchEntry( + input="Hello World", + output="hello world", + ) + settings = ExactMatchSettings(case_sensitive=True) + + evaluator = ExactMatchEvaluator(settings=settings) + result = evaluator.evaluate(entry) + + assert result.passed == False + + +def test_langeval_exact_match_case_sensitive_false(): + entry = ExactMatchEntry( + input="Hello World", + output="hello world", + ) + settings = ExactMatchSettings(case_sensitive=False) + + evaluator = ExactMatchEvaluator(settings=settings) + result = evaluator.evaluate(entry) + + assert result.passed == True + + +def test_langeval_exact_match_trim_whitespace_true(): + entry = ExactMatchEntry( + input=" Hello World ", + output="Hello World", + ) + settings = ExactMatchSettings(trim_whitespace=True) + + evaluator = ExactMatchEvaluator(settings=settings) + result = evaluator.evaluate(entry) + + assert result.passed == True + + +def test_langeval_exact_match_trim_whitespace_false(): + entry = ExactMatchEntry( + input=" Hello World ", + output="Hello World", + ) + settings = ExactMatchSettings(trim_whitespace=False) + + evaluator = ExactMatchEvaluator(settings=settings) + result = evaluator.evaluate(entry) + + assert result.passed == False + + +def test_langeval_exact_match_remove_punctuation_true(): + entry = ExactMatchEntry( + input="Hello, World!", + output="Hello World", + ) + settings = ExactMatchSettings(remove_punctuation=True) + + evaluator = ExactMatchEvaluator(settings=settings) + result = evaluator.evaluate(entry) + + assert result.passed == True + + +def test_langeval_exact_match_remove_punctuation_false(): + entry = ExactMatchEntry( + input="Hello, World!", + output="Hello World", + ) + settings = ExactMatchSettings(remove_punctuation=False) + + evaluator = ExactMatchEvaluator(settings=settings) + result = evaluator.evaluate(entry) + + assert result.passed == False + + +def test_langeval_exact_match_combined_settings(): + entry = ExactMatchEntry( + input=" Hello, World! ", + output="hello world", + ) + settings = ExactMatchSettings( + case_sensitive=False, + trim_whitespace=True, + remove_punctuation=True + ) + + evaluator = ExactMatchEvaluator(settings=settings) + result = evaluator.evaluate(entry) + + assert result.passed == True diff --git a/ts-integration/evaluators.generated.ts b/ts-integration/evaluators.generated.ts index 02d45c2..14ec4be 100644 --- a/ts-integration/evaluators.generated.ts +++ b/ts-integration/evaluators.generated.ts @@ -165,6 +165,25 @@ export type Evaluators = { competitors: string[]; }; }; + "langevals/exact_match": { + settings: { + /** + * @description True if the comparison should be case-sensitive, False otherwise + * @default false + */ + case_sensitive: boolean; + /** + * @description True if the comparison should trim whitespace, False otherwise + * @default true + */ + trim_whitespace: boolean; + /** + * @description True if the comparison should remove punctuation, False otherwise + * @default true + */ + remove_punctuation: boolean; + }; + }; "langevals/llm_answer_match": { settings: { /** @@ -1109,6 +1128,42 @@ This evaluator implements LLM-as-a-judge with a function call approach to check }, }, }, + "langevals/exact_match": { + name: `Exact Match Evaluator`, + description: ` +A simple evaluator that checks if the output matches the input exactly, with some +extra bells and whistles to help with whitespace related shenanigans. +`, + category: "quality", + docsUrl: "", + isGuardrail: false, + requiredFields: [], + optionalFields: ["input", "output"], + settings: { + case_sensitive: { + description: + "True if the comparison should be case-sensitive, False otherwise", + default: false, + }, + trim_whitespace: { + description: + "True if the comparison should trim whitespace, False otherwise", + default: true, + }, + remove_punctuation: { + description: + "True if the comparison should remove punctuation, False otherwise", + default: true, + }, + }, + envVars: [], + result: { + passed: { + description: + "True if the output matched the input exactly, False otherwise", + }, + }, + }, "langevals/llm_answer_match": { name: `LLM Answer Match`, description: `