From a12fd29da3acc66baee6574e5d719a3b5a341758 Mon Sep 17 00:00:00 2001 From: Alexander Forbes-Reed Date: Wed, 12 Mar 2025 17:09:46 +0100 Subject: [PATCH 1/6] added exact match evaulator --- .../langevals_langevals/exact_match.py | 50 +++++++++++++++++++ .../langevals/tests/test_exact_match.py | 35 +++++++++++++ 2 files changed, 85 insertions(+) create mode 100644 evaluators/langevals/langevals_langevals/exact_match.py create mode 100644 evaluators/langevals/tests/test_exact_match.py diff --git a/evaluators/langevals/langevals_langevals/exact_match.py b/evaluators/langevals/langevals_langevals/exact_match.py new file mode 100644 index 0000000..0153a07 --- /dev/null +++ b/evaluators/langevals/langevals_langevals/exact_match.py @@ -0,0 +1,50 @@ +import ast +import json +from typing import Literal, Optional, Dict, Any +from langevals_core.base_evaluator import ( + BaseEvaluator, + EvaluationResultSkipped, + EvaluatorEntry, + EvaluationResult, + EvaluatorSettings, + SingleEvaluationResult, + EvaluationResultError, +) +import markdown +from pydantic import Field +import sqlglot + + +class ExactMatchSettings(EvaluatorSettings): + pass + + +class ExactMatchResult(EvaluationResult): + passed: Optional[bool] = Field( + default=True, + description="True if the output matched the input exactly, False otherwise", + ) + + +class ExactMatchEntry(EvaluatorEntry): + input: Optional[str] = None + output: Optional[str] = None + + +class ExactMatchEvaluator( + BaseEvaluator[ExactMatchEntry, ExactMatchSettings, ExactMatchResult] +): + """ + A simple evaluator that checks if the output matches the input exactly. + """ + + name = "Exact Match Evaluator" + category = "quality" + default_settings = ExactMatchSettings() + is_guardrail = False + + def evaluate(self, entry: ExactMatchEntry) -> SingleEvaluationResult: + if entry.input == entry.output: + return ExactMatchResult(passed=True) + + return ExactMatchResult(passed=False) diff --git a/evaluators/langevals/tests/test_exact_match.py b/evaluators/langevals/tests/test_exact_match.py new file mode 100644 index 0000000..d313b37 --- /dev/null +++ b/evaluators/langevals/tests/test_exact_match.py @@ -0,0 +1,35 @@ +import dotenv + +dotenv.load_dotenv() + +from langevals_langevals.exact_match import ( + ExactMatchEvaluator, + ExactMatchEntry, + ExactMatchSettings, +) + + +def test_langeval_exact_match_evaluator_exact(): + entry = ExactMatchEntry( + input="What is the capital of France?", + output="What is the capital of France?", + ) + settings = ExactMatchSettings() + + evaluator = ExactMatchEvaluator(settings=settings) + result = evaluator.evaluate(entry) + + assert result == True + + +def test_langeval_exact_match_evaluator_different(): + entry = ExactMatchEntry( + input="What is the capital of France?", + output="The capital of France is London.", + ) + settings = ExactMatchSettings() + + evaluator = ExactMatchEvaluator(settings=settings) + result = evaluator.evaluate(entry) + + assert result == False From e3ecbc68a0feee6460918d3f4e72dbe22381c677 Mon Sep 17 00:00:00 2001 From: Alexander Forbes-Reed Date: Thu, 13 Mar 2025 09:54:37 +0100 Subject: [PATCH 2/6] added exact match langeval --- .../langevals_langevals/exact_match.py | 50 ++++++++++++++----- 1 file changed, 38 insertions(+), 12 deletions(-) diff --git a/evaluators/langevals/langevals_langevals/exact_match.py b/evaluators/langevals/langevals_langevals/exact_match.py index 0153a07..d1749f2 100644 --- a/evaluators/langevals/langevals_langevals/exact_match.py +++ b/evaluators/langevals/langevals_langevals/exact_match.py @@ -1,22 +1,28 @@ -import ast -import json -from typing import Literal, Optional, Dict, Any +from typing import Optional from langevals_core.base_evaluator import ( BaseEvaluator, - EvaluationResultSkipped, EvaluatorEntry, EvaluationResult, EvaluatorSettings, SingleEvaluationResult, - EvaluationResultError, ) -import markdown from pydantic import Field -import sqlglot class ExactMatchSettings(EvaluatorSettings): - pass + case_sensitive: bool = Field( + default=False, + description="True if the comparison should be case-sensitive, False otherwise", + ) + trim_whitespace: bool = Field( + default=True, + description="True if the comparison should trim whitespace, False otherwise", + ) + remove_punctuation: bool = Field( + default=True, + description="True if the comparison should remove punctuation, False otherwise", + ) + class ExactMatchResult(EvaluationResult): @@ -35,7 +41,8 @@ class ExactMatchEvaluator( BaseEvaluator[ExactMatchEntry, ExactMatchSettings, ExactMatchResult] ): """ - A simple evaluator that checks if the output matches the input exactly. + A simple evaluator that checks if the output matches the input exactly, with some + extra bells and whistles to help with whitespace related shenanigans. """ name = "Exact Match Evaluator" @@ -44,7 +51,26 @@ class ExactMatchEvaluator( is_guardrail = False def evaluate(self, entry: ExactMatchEntry) -> SingleEvaluationResult: - if entry.input == entry.output: - return ExactMatchResult(passed=True) + # Get input and output + input_text = entry.input or "" + output_text = entry.output or "" + + # Apply settings + if self.settings.trim_whitespace: + input_text = input_text.strip() + output_text = output_text.strip() + + if self.settings.remove_punctuation: + input_text = ''.join(char for char in input_text if char.isalnum() or char.isspace()) + output_text = ''.join(char for char in output_text if char.isalnum() or char.isspace()) + + if not self.settings.case_sensitive: + input_text = input_text.lower() + output_text = output_text.lower() + + # Perform comparison + passed = input_text == output_text - return ExactMatchResult(passed=False) + # Return result + return ExactMatchResult(passed=passed) + \ No newline at end of file From b9abe293b5a1615dca1a9f682ccd08aba3260def Mon Sep 17 00:00:00 2001 From: Alexander Forbes-Reed Date: Thu, 13 Mar 2025 09:54:45 +0100 Subject: [PATCH 3/6] added tests for exact match --- .../langevals/tests/test_exact_match.py | 109 ++++++++++++++++-- 1 file changed, 100 insertions(+), 9 deletions(-) diff --git a/evaluators/langevals/tests/test_exact_match.py b/evaluators/langevals/tests/test_exact_match.py index d313b37..b207608 100644 --- a/evaluators/langevals/tests/test_exact_match.py +++ b/evaluators/langevals/tests/test_exact_match.py @@ -1,7 +1,3 @@ -import dotenv - -dotenv.load_dotenv() - from langevals_langevals.exact_match import ( ExactMatchEvaluator, ExactMatchEntry, @@ -9,7 +5,7 @@ ) -def test_langeval_exact_match_evaluator_exact(): +def test_langeval_exact_match_evaluator(): entry = ExactMatchEntry( input="What is the capital of France?", output="What is the capital of France?", @@ -19,17 +15,112 @@ def test_langeval_exact_match_evaluator_exact(): evaluator = ExactMatchEvaluator(settings=settings) result = evaluator.evaluate(entry) - assert result == True + assert result.passed == True -def test_langeval_exact_match_evaluator_different(): +def test_langeval_exact_match_evaluator_defaults(): entry = ExactMatchEntry( input="What is the capital of France?", - output="The capital of France is London.", + output="What is the capital of the Netherlands?", ) settings = ExactMatchSettings() evaluator = ExactMatchEvaluator(settings=settings) result = evaluator.evaluate(entry) - assert result == False + assert result.passed == False + + +def test_langeval_exact_match_case_sensitive_true(): + entry = ExactMatchEntry( + input="Hello World", + output="hello world", + ) + settings = ExactMatchSettings(case_sensitive=True) + + evaluator = ExactMatchEvaluator(settings=settings) + result = evaluator.evaluate(entry) + + assert result.passed == False + + +def test_langeval_exact_match_case_sensitive_false(): + entry = ExactMatchEntry( + input="Hello World", + output="hello world", + ) + settings = ExactMatchSettings(case_sensitive=False) + + evaluator = ExactMatchEvaluator(settings=settings) + result = evaluator.evaluate(entry) + + assert result.passed == True + + +def test_langeval_exact_match_trim_whitespace_true(): + entry = ExactMatchEntry( + input=" Hello World ", + output="Hello World", + ) + settings = ExactMatchSettings(trim_whitespace=True) + + evaluator = ExactMatchEvaluator(settings=settings) + result = evaluator.evaluate(entry) + + assert result.passed == True + + +def test_langeval_exact_match_trim_whitespace_false(): + entry = ExactMatchEntry( + input=" Hello World ", + output="Hello World", + ) + settings = ExactMatchSettings(trim_whitespace=False) + + evaluator = ExactMatchEvaluator(settings=settings) + result = evaluator.evaluate(entry) + + assert result.passed == False + + +def test_langeval_exact_match_remove_punctuation_true(): + entry = ExactMatchEntry( + input="Hello, World!", + output="Hello World", + ) + settings = ExactMatchSettings(remove_punctuation=True) + + evaluator = ExactMatchEvaluator(settings=settings) + result = evaluator.evaluate(entry) + + assert result.passed == True + + +def test_langeval_exact_match_remove_punctuation_false(): + entry = ExactMatchEntry( + input="Hello, World!", + output="Hello World", + ) + settings = ExactMatchSettings(remove_punctuation=False) + + evaluator = ExactMatchEvaluator(settings=settings) + result = evaluator.evaluate(entry) + + assert result.passed == False + + +def test_langeval_exact_match_combined_settings(): + entry = ExactMatchEntry( + input=" Hello, World! ", + output="hello world", + ) + settings = ExactMatchSettings( + case_sensitive=False, + trim_whitespace=True, + remove_punctuation=True + ) + + evaluator = ExactMatchEvaluator(settings=settings) + result = evaluator.evaluate(entry) + + assert result.passed == True From e2dc876b992d860645a82502a2b355f05f58a16e Mon Sep 17 00:00:00 2001 From: Alexander Forbes-Reed Date: Thu, 13 Mar 2025 09:55:00 +0100 Subject: [PATCH 4/6] generated exact match types --- ts-integration/evaluators.generated.ts | 55 ++++++++++++++++++++++++++ 1 file changed, 55 insertions(+) diff --git a/ts-integration/evaluators.generated.ts b/ts-integration/evaluators.generated.ts index 02d45c2..14ec4be 100644 --- a/ts-integration/evaluators.generated.ts +++ b/ts-integration/evaluators.generated.ts @@ -165,6 +165,25 @@ export type Evaluators = { competitors: string[]; }; }; + "langevals/exact_match": { + settings: { + /** + * @description True if the comparison should be case-sensitive, False otherwise + * @default false + */ + case_sensitive: boolean; + /** + * @description True if the comparison should trim whitespace, False otherwise + * @default true + */ + trim_whitespace: boolean; + /** + * @description True if the comparison should remove punctuation, False otherwise + * @default true + */ + remove_punctuation: boolean; + }; + }; "langevals/llm_answer_match": { settings: { /** @@ -1109,6 +1128,42 @@ This evaluator implements LLM-as-a-judge with a function call approach to check }, }, }, + "langevals/exact_match": { + name: `Exact Match Evaluator`, + description: ` +A simple evaluator that checks if the output matches the input exactly, with some +extra bells and whistles to help with whitespace related shenanigans. +`, + category: "quality", + docsUrl: "", + isGuardrail: false, + requiredFields: [], + optionalFields: ["input", "output"], + settings: { + case_sensitive: { + description: + "True if the comparison should be case-sensitive, False otherwise", + default: false, + }, + trim_whitespace: { + description: + "True if the comparison should trim whitespace, False otherwise", + default: true, + }, + remove_punctuation: { + description: + "True if the comparison should remove punctuation, False otherwise", + default: true, + }, + }, + envVars: [], + result: { + passed: { + description: + "True if the output matched the input exactly, False otherwise", + }, + }, + }, "langevals/llm_answer_match": { name: `LLM Answer Match`, description: ` From d8c0c3de3f0f7af8b744818b8fca9c418ea29051 Mon Sep 17 00:00:00 2001 From: Alexander Forbes-Reed Date: Thu, 13 Mar 2025 10:35:30 +0100 Subject: [PATCH 5/6] remove trailing whitespace --- evaluators/langevals/langevals_langevals/exact_match.py | 1 - 1 file changed, 1 deletion(-) diff --git a/evaluators/langevals/langevals_langevals/exact_match.py b/evaluators/langevals/langevals_langevals/exact_match.py index d1749f2..c7965a8 100644 --- a/evaluators/langevals/langevals_langevals/exact_match.py +++ b/evaluators/langevals/langevals_langevals/exact_match.py @@ -73,4 +73,3 @@ def evaluate(self, entry: ExactMatchEntry) -> SingleEvaluationResult: # Return result return ExactMatchResult(passed=passed) - \ No newline at end of file From 3d310410acd28630298af294a1875a6a1f9a33dd Mon Sep 17 00:00:00 2001 From: Alexander Forbes-Reed Date: Thu, 13 Mar 2025 10:36:32 +0100 Subject: [PATCH 6/6] remove comments --- evaluators/langevals/langevals_langevals/exact_match.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/evaluators/langevals/langevals_langevals/exact_match.py b/evaluators/langevals/langevals_langevals/exact_match.py index c7965a8..4cc7862 100644 --- a/evaluators/langevals/langevals_langevals/exact_match.py +++ b/evaluators/langevals/langevals_langevals/exact_match.py @@ -51,11 +51,9 @@ class ExactMatchEvaluator( is_guardrail = False def evaluate(self, entry: ExactMatchEntry) -> SingleEvaluationResult: - # Get input and output input_text = entry.input or "" output_text = entry.output or "" - # Apply settings if self.settings.trim_whitespace: input_text = input_text.strip() output_text = output_text.strip() @@ -68,8 +66,6 @@ def evaluate(self, entry: ExactMatchEntry) -> SingleEvaluationResult: input_text = input_text.lower() output_text = output_text.lower() - # Perform comparison passed = input_text == output_text - # Return result return ExactMatchResult(passed=passed)