Skip to content

Commit 3483ed3

Browse files
authored
feature: add exact match evaluator (#13)
* added exact match evaulator * added exact match langeval * added tests for exact match * generated exact match types * remove trailing whitespace * remove comments
1 parent a27b4aa commit 3483ed3

File tree

3 files changed

+252
-0
lines changed

3 files changed

+252
-0
lines changed
Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
from typing import Optional
2+
from langevals_core.base_evaluator import (
3+
BaseEvaluator,
4+
EvaluatorEntry,
5+
EvaluationResult,
6+
EvaluatorSettings,
7+
SingleEvaluationResult,
8+
)
9+
from pydantic import Field
10+
11+
12+
class ExactMatchSettings(EvaluatorSettings):
13+
case_sensitive: bool = Field(
14+
default=False,
15+
description="True if the comparison should be case-sensitive, False otherwise",
16+
)
17+
trim_whitespace: bool = Field(
18+
default=True,
19+
description="True if the comparison should trim whitespace, False otherwise",
20+
)
21+
remove_punctuation: bool = Field(
22+
default=True,
23+
description="True if the comparison should remove punctuation, False otherwise",
24+
)
25+
26+
27+
28+
class ExactMatchResult(EvaluationResult):
29+
passed: Optional[bool] = Field(
30+
default=True,
31+
description="True if the output matched the input exactly, False otherwise",
32+
)
33+
34+
35+
class ExactMatchEntry(EvaluatorEntry):
36+
input: Optional[str] = None
37+
output: Optional[str] = None
38+
39+
40+
class ExactMatchEvaluator(
41+
BaseEvaluator[ExactMatchEntry, ExactMatchSettings, ExactMatchResult]
42+
):
43+
"""
44+
A simple evaluator that checks if the output matches the input exactly, with some
45+
extra bells and whistles to help with whitespace related shenanigans.
46+
"""
47+
48+
name = "Exact Match Evaluator"
49+
category = "quality"
50+
default_settings = ExactMatchSettings()
51+
is_guardrail = False
52+
53+
def evaluate(self, entry: ExactMatchEntry) -> SingleEvaluationResult:
54+
input_text = entry.input or ""
55+
output_text = entry.output or ""
56+
57+
if self.settings.trim_whitespace:
58+
input_text = input_text.strip()
59+
output_text = output_text.strip()
60+
61+
if self.settings.remove_punctuation:
62+
input_text = ''.join(char for char in input_text if char.isalnum() or char.isspace())
63+
output_text = ''.join(char for char in output_text if char.isalnum() or char.isspace())
64+
65+
if not self.settings.case_sensitive:
66+
input_text = input_text.lower()
67+
output_text = output_text.lower()
68+
69+
passed = input_text == output_text
70+
71+
return ExactMatchResult(passed=passed)
Lines changed: 126 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,126 @@
1+
from langevals_langevals.exact_match import (
2+
ExactMatchEvaluator,
3+
ExactMatchEntry,
4+
ExactMatchSettings,
5+
)
6+
7+
8+
def test_langeval_exact_match_evaluator():
9+
entry = ExactMatchEntry(
10+
input="What is the capital of France?",
11+
output="What is the capital of France?",
12+
)
13+
settings = ExactMatchSettings()
14+
15+
evaluator = ExactMatchEvaluator(settings=settings)
16+
result = evaluator.evaluate(entry)
17+
18+
assert result.passed == True
19+
20+
21+
def test_langeval_exact_match_evaluator_defaults():
22+
entry = ExactMatchEntry(
23+
input="What is the capital of France?",
24+
output="What is the capital of the Netherlands?",
25+
)
26+
settings = ExactMatchSettings()
27+
28+
evaluator = ExactMatchEvaluator(settings=settings)
29+
result = evaluator.evaluate(entry)
30+
31+
assert result.passed == False
32+
33+
34+
def test_langeval_exact_match_case_sensitive_true():
35+
entry = ExactMatchEntry(
36+
input="Hello World",
37+
output="hello world",
38+
)
39+
settings = ExactMatchSettings(case_sensitive=True)
40+
41+
evaluator = ExactMatchEvaluator(settings=settings)
42+
result = evaluator.evaluate(entry)
43+
44+
assert result.passed == False
45+
46+
47+
def test_langeval_exact_match_case_sensitive_false():
48+
entry = ExactMatchEntry(
49+
input="Hello World",
50+
output="hello world",
51+
)
52+
settings = ExactMatchSettings(case_sensitive=False)
53+
54+
evaluator = ExactMatchEvaluator(settings=settings)
55+
result = evaluator.evaluate(entry)
56+
57+
assert result.passed == True
58+
59+
60+
def test_langeval_exact_match_trim_whitespace_true():
61+
entry = ExactMatchEntry(
62+
input=" Hello World ",
63+
output="Hello World",
64+
)
65+
settings = ExactMatchSettings(trim_whitespace=True)
66+
67+
evaluator = ExactMatchEvaluator(settings=settings)
68+
result = evaluator.evaluate(entry)
69+
70+
assert result.passed == True
71+
72+
73+
def test_langeval_exact_match_trim_whitespace_false():
74+
entry = ExactMatchEntry(
75+
input=" Hello World ",
76+
output="Hello World",
77+
)
78+
settings = ExactMatchSettings(trim_whitespace=False)
79+
80+
evaluator = ExactMatchEvaluator(settings=settings)
81+
result = evaluator.evaluate(entry)
82+
83+
assert result.passed == False
84+
85+
86+
def test_langeval_exact_match_remove_punctuation_true():
87+
entry = ExactMatchEntry(
88+
input="Hello, World!",
89+
output="Hello World",
90+
)
91+
settings = ExactMatchSettings(remove_punctuation=True)
92+
93+
evaluator = ExactMatchEvaluator(settings=settings)
94+
result = evaluator.evaluate(entry)
95+
96+
assert result.passed == True
97+
98+
99+
def test_langeval_exact_match_remove_punctuation_false():
100+
entry = ExactMatchEntry(
101+
input="Hello, World!",
102+
output="Hello World",
103+
)
104+
settings = ExactMatchSettings(remove_punctuation=False)
105+
106+
evaluator = ExactMatchEvaluator(settings=settings)
107+
result = evaluator.evaluate(entry)
108+
109+
assert result.passed == False
110+
111+
112+
def test_langeval_exact_match_combined_settings():
113+
entry = ExactMatchEntry(
114+
input=" Hello, World! ",
115+
output="hello world",
116+
)
117+
settings = ExactMatchSettings(
118+
case_sensitive=False,
119+
trim_whitespace=True,
120+
remove_punctuation=True
121+
)
122+
123+
evaluator = ExactMatchEvaluator(settings=settings)
124+
result = evaluator.evaluate(entry)
125+
126+
assert result.passed == True

ts-integration/evaluators.generated.ts

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -165,6 +165,25 @@ export type Evaluators = {
165165
competitors: string[];
166166
};
167167
};
168+
"langevals/exact_match": {
169+
settings: {
170+
/**
171+
* @description True if the comparison should be case-sensitive, False otherwise
172+
* @default false
173+
*/
174+
case_sensitive: boolean;
175+
/**
176+
* @description True if the comparison should trim whitespace, False otherwise
177+
* @default true
178+
*/
179+
trim_whitespace: boolean;
180+
/**
181+
* @description True if the comparison should remove punctuation, False otherwise
182+
* @default true
183+
*/
184+
remove_punctuation: boolean;
185+
};
186+
};
168187
"langevals/llm_answer_match": {
169188
settings: {
170189
/**
@@ -1109,6 +1128,42 @@ This evaluator implements LLM-as-a-judge with a function call approach to check
11091128
},
11101129
},
11111130
},
1131+
"langevals/exact_match": {
1132+
name: `Exact Match Evaluator`,
1133+
description: `
1134+
A simple evaluator that checks if the output matches the input exactly, with some
1135+
extra bells and whistles to help with whitespace related shenanigans.
1136+
`,
1137+
category: "quality",
1138+
docsUrl: "",
1139+
isGuardrail: false,
1140+
requiredFields: [],
1141+
optionalFields: ["input", "output"],
1142+
settings: {
1143+
case_sensitive: {
1144+
description:
1145+
"True if the comparison should be case-sensitive, False otherwise",
1146+
default: false,
1147+
},
1148+
trim_whitespace: {
1149+
description:
1150+
"True if the comparison should trim whitespace, False otherwise",
1151+
default: true,
1152+
},
1153+
remove_punctuation: {
1154+
description:
1155+
"True if the comparison should remove punctuation, False otherwise",
1156+
default: true,
1157+
},
1158+
},
1159+
envVars: [],
1160+
result: {
1161+
passed: {
1162+
description:
1163+
"True if the output matched the input exactly, False otherwise",
1164+
},
1165+
},
1166+
},
11121167
"langevals/llm_answer_match": {
11131168
name: `LLM Answer Match`,
11141169
description: `

0 commit comments

Comments
 (0)