Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
71 changes: 71 additions & 0 deletions evaluators/langevals/langevals_langevals/exact_match.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
from typing import Optional
from langevals_core.base_evaluator import (
BaseEvaluator,
EvaluatorEntry,
EvaluationResult,
EvaluatorSettings,
SingleEvaluationResult,
)
from pydantic import Field


class ExactMatchSettings(EvaluatorSettings):
case_sensitive: bool = Field(
default=False,
description="True if the comparison should be case-sensitive, False otherwise",
)
trim_whitespace: bool = Field(
default=True,
description="True if the comparison should trim whitespace, False otherwise",
)
remove_punctuation: bool = Field(
default=True,
description="True if the comparison should remove punctuation, False otherwise",
)



class ExactMatchResult(EvaluationResult):
passed: Optional[bool] = Field(
default=True,
description="True if the output matched the input exactly, False otherwise",
)


class ExactMatchEntry(EvaluatorEntry):
input: Optional[str] = None
output: Optional[str] = None


class ExactMatchEvaluator(
BaseEvaluator[ExactMatchEntry, ExactMatchSettings, ExactMatchResult]
):
"""
A simple evaluator that checks if the output matches the input exactly, with some
extra bells and whistles to help with whitespace related shenanigans.
"""

name = "Exact Match Evaluator"
category = "quality"
default_settings = ExactMatchSettings()
is_guardrail = False

def evaluate(self, entry: ExactMatchEntry) -> SingleEvaluationResult:
input_text = entry.input or ""
output_text = entry.output or ""

if self.settings.trim_whitespace:
input_text = input_text.strip()
output_text = output_text.strip()

if self.settings.remove_punctuation:
input_text = ''.join(char for char in input_text if char.isalnum() or char.isspace())
output_text = ''.join(char for char in output_text if char.isalnum() or char.isspace())

if not self.settings.case_sensitive:
input_text = input_text.lower()
output_text = output_text.lower()

passed = input_text == output_text

return ExactMatchResult(passed=passed)
126 changes: 126 additions & 0 deletions evaluators/langevals/tests/test_exact_match.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,126 @@
from langevals_langevals.exact_match import (
ExactMatchEvaluator,
ExactMatchEntry,
ExactMatchSettings,
)


def test_langeval_exact_match_evaluator():
entry = ExactMatchEntry(
input="What is the capital of France?",
output="What is the capital of France?",
)
settings = ExactMatchSettings()

evaluator = ExactMatchEvaluator(settings=settings)
result = evaluator.evaluate(entry)

assert result.passed == True


def test_langeval_exact_match_evaluator_defaults():
entry = ExactMatchEntry(
input="What is the capital of France?",
output="What is the capital of the Netherlands?",
)
settings = ExactMatchSettings()

evaluator = ExactMatchEvaluator(settings=settings)
result = evaluator.evaluate(entry)

assert result.passed == False


def test_langeval_exact_match_case_sensitive_true():
entry = ExactMatchEntry(
input="Hello World",
output="hello world",
)
settings = ExactMatchSettings(case_sensitive=True)

evaluator = ExactMatchEvaluator(settings=settings)
result = evaluator.evaluate(entry)

assert result.passed == False


def test_langeval_exact_match_case_sensitive_false():
entry = ExactMatchEntry(
input="Hello World",
output="hello world",
)
settings = ExactMatchSettings(case_sensitive=False)

evaluator = ExactMatchEvaluator(settings=settings)
result = evaluator.evaluate(entry)

assert result.passed == True


def test_langeval_exact_match_trim_whitespace_true():
entry = ExactMatchEntry(
input=" Hello World ",
output="Hello World",
)
settings = ExactMatchSettings(trim_whitespace=True)

evaluator = ExactMatchEvaluator(settings=settings)
result = evaluator.evaluate(entry)

assert result.passed == True


def test_langeval_exact_match_trim_whitespace_false():
entry = ExactMatchEntry(
input=" Hello World ",
output="Hello World",
)
settings = ExactMatchSettings(trim_whitespace=False)

evaluator = ExactMatchEvaluator(settings=settings)
result = evaluator.evaluate(entry)

assert result.passed == False


def test_langeval_exact_match_remove_punctuation_true():
entry = ExactMatchEntry(
input="Hello, World!",
output="Hello World",
)
settings = ExactMatchSettings(remove_punctuation=True)

evaluator = ExactMatchEvaluator(settings=settings)
result = evaluator.evaluate(entry)

assert result.passed == True


def test_langeval_exact_match_remove_punctuation_false():
entry = ExactMatchEntry(
input="Hello, World!",
output="Hello World",
)
settings = ExactMatchSettings(remove_punctuation=False)

evaluator = ExactMatchEvaluator(settings=settings)
result = evaluator.evaluate(entry)

assert result.passed == False


def test_langeval_exact_match_combined_settings():
entry = ExactMatchEntry(
input=" Hello, World! ",
output="hello world",
)
settings = ExactMatchSettings(
case_sensitive=False,
trim_whitespace=True,
remove_punctuation=True
)

evaluator = ExactMatchEvaluator(settings=settings)
result = evaluator.evaluate(entry)

assert result.passed == True
55 changes: 55 additions & 0 deletions ts-integration/evaluators.generated.ts
Original file line number Diff line number Diff line change
Expand Up @@ -165,6 +165,25 @@ export type Evaluators = {
competitors: string[];
};
};
"langevals/exact_match": {
settings: {
/**
* @description True if the comparison should be case-sensitive, False otherwise
* @default false
*/
case_sensitive: boolean;
/**
* @description True if the comparison should trim whitespace, False otherwise
* @default true
*/
trim_whitespace: boolean;
/**
* @description True if the comparison should remove punctuation, False otherwise
* @default true
*/
remove_punctuation: boolean;
};
};
"langevals/llm_answer_match": {
settings: {
/**
Expand Down Expand Up @@ -1109,6 +1128,42 @@ This evaluator implements LLM-as-a-judge with a function call approach to check
},
},
},
"langevals/exact_match": {
name: `Exact Match Evaluator`,
description: `
A simple evaluator that checks if the output matches the input exactly, with some
extra bells and whistles to help with whitespace related shenanigans.
`,
category: "quality",
docsUrl: "",
isGuardrail: false,
requiredFields: [],
optionalFields: ["input", "output"],
settings: {
case_sensitive: {
description:
"True if the comparison should be case-sensitive, False otherwise",
default: false,
},
trim_whitespace: {
description:
"True if the comparison should trim whitespace, False otherwise",
default: true,
},
remove_punctuation: {
description:
"True if the comparison should remove punctuation, False otherwise",
default: true,
},
},
envVars: [],
result: {
passed: {
description:
"True if the output matched the input exactly, False otherwise",
},
},
},
"langevals/llm_answer_match": {
name: `LLM Answer Match`,
description: `
Expand Down