feature: add exact match evaluator (#13)

0xdeafcafe · web-flow · commit 3483ed36242e · 2025-03-26T16:04:47.000+01:00
* added exact match evaulator

* added exact match langeval

* added tests for exact match

* generated exact match types

* remove trailing whitespace

* remove comments
diff --git a/evaluators/langevals/langevals_langevals/exact_match.py b/evaluators/langevals/langevals_langevals/exact_match.py
@@ -0,0 +1,71 @@
+from typing import Optional
+from langevals_core.base_evaluator import (
+    BaseEvaluator,
+    EvaluatorEntry,
+    EvaluationResult,
+    EvaluatorSettings,
+    SingleEvaluationResult,
+)
+from pydantic import Field
+
+
+class ExactMatchSettings(EvaluatorSettings):
+    case_sensitive: bool = Field(
+        default=False,
+        description="True if the comparison should be case-sensitive, False otherwise",
+    )
+    trim_whitespace: bool = Field(
+        default=True,
+        description="True if the comparison should trim whitespace, False otherwise",
+    )
+    remove_punctuation: bool = Field(
+        default=True,
+        description="True if the comparison should remove punctuation, False otherwise",
+    )
+
+
+
+class ExactMatchResult(EvaluationResult):
+    passed: Optional[bool] = Field(
+        default=True,
+        description="True if the output matched the input exactly, False otherwise",
+    )
+
+
+class ExactMatchEntry(EvaluatorEntry):
+    input: Optional[str] = None
+    output: Optional[str] = None
+
+
+class ExactMatchEvaluator(
+    BaseEvaluator[ExactMatchEntry, ExactMatchSettings, ExactMatchResult]
+):
+    """
+    A simple evaluator that checks if the output matches the input exactly, with some 
+    extra bells and whistles to help with whitespace related shenanigans.
+    """
+
+    name = "Exact Match Evaluator"
+    category = "quality"
+    default_settings = ExactMatchSettings()
+    is_guardrail = False
+
+    def evaluate(self, entry: ExactMatchEntry) -> SingleEvaluationResult:
+        input_text = entry.input or ""
+        output_text = entry.output or ""
+
+        if self.settings.trim_whitespace:
+            input_text = input_text.strip()
+            output_text = output_text.strip()
+
+        if self.settings.remove_punctuation:
+            input_text = ''.join(char for char in input_text if char.isalnum() or char.isspace())
+            output_text = ''.join(char for char in output_text if char.isalnum() or char.isspace())
+
+        if not self.settings.case_sensitive:
+            input_text = input_text.lower()
+            output_text = output_text.lower()
+
+        passed = input_text == output_text
+
+        return ExactMatchResult(passed=passed)
diff --git a/evaluators/langevals/tests/test_exact_match.py b/evaluators/langevals/tests/test_exact_match.py
@@ -0,0 +1,126 @@
+from langevals_langevals.exact_match import (
+    ExactMatchEvaluator,
+    ExactMatchEntry,
+    ExactMatchSettings,
+)
+
+
+def test_langeval_exact_match_evaluator():
+    entry = ExactMatchEntry(
+        input="What is the capital of France?",
+        output="What is the capital of France?",
+    )
+    settings = ExactMatchSettings()
+
+    evaluator = ExactMatchEvaluator(settings=settings)
+    result = evaluator.evaluate(entry)
+
+    assert result.passed == True
+
+
+def test_langeval_exact_match_evaluator_defaults():
+    entry = ExactMatchEntry(
+        input="What is the capital of France?",
+        output="What is the capital of the Netherlands?",
+    )
+    settings = ExactMatchSettings()
+
+    evaluator = ExactMatchEvaluator(settings=settings)
+    result = evaluator.evaluate(entry)
+
+    assert result.passed == False
+
+
+def test_langeval_exact_match_case_sensitive_true():
+    entry = ExactMatchEntry(
+        input="Hello World",
+        output="hello world",
+    )
+    settings = ExactMatchSettings(case_sensitive=True)
+
+    evaluator = ExactMatchEvaluator(settings=settings)
+    result = evaluator.evaluate(entry)
+
+    assert result.passed == False
+
+
+def test_langeval_exact_match_case_sensitive_false():
+    entry = ExactMatchEntry(
+        input="Hello World",
+        output="hello world",
+    )
+    settings = ExactMatchSettings(case_sensitive=False)
+
+    evaluator = ExactMatchEvaluator(settings=settings)
+    result = evaluator.evaluate(entry)
+
+    assert result.passed == True
+
+
+def test_langeval_exact_match_trim_whitespace_true():
+    entry = ExactMatchEntry(
+        input="  Hello World  ",
+        output="Hello World",
+    )
+    settings = ExactMatchSettings(trim_whitespace=True)
+
+    evaluator = ExactMatchEvaluator(settings=settings)
+    result = evaluator.evaluate(entry)
+
+    assert result.passed == True
+
+
+def test_langeval_exact_match_trim_whitespace_false():
+    entry = ExactMatchEntry(
+        input="  Hello World  ",
+        output="Hello World",
+    )
+    settings = ExactMatchSettings(trim_whitespace=False)
+
+    evaluator = ExactMatchEvaluator(settings=settings)
+    result = evaluator.evaluate(entry)
+
+    assert result.passed == False
+
+
+def test_langeval_exact_match_remove_punctuation_true():
+    entry = ExactMatchEntry(
+        input="Hello, World!",
+        output="Hello World",
+    )
+    settings = ExactMatchSettings(remove_punctuation=True)
+
+    evaluator = ExactMatchEvaluator(settings=settings)
+    result = evaluator.evaluate(entry)
+
+    assert result.passed == True
+
+
+def test_langeval_exact_match_remove_punctuation_false():
+    entry = ExactMatchEntry(
+        input="Hello, World!",
+        output="Hello World",
+    )
+    settings = ExactMatchSettings(remove_punctuation=False)
+
+    evaluator = ExactMatchEvaluator(settings=settings)
+    result = evaluator.evaluate(entry)
+
+    assert result.passed == False
+
+
+def test_langeval_exact_match_combined_settings():
+    entry = ExactMatchEntry(
+        input="  Hello, World!  ",
+        output="hello world",
+    )
+    settings = ExactMatchSettings(
+        case_sensitive=False,
+        trim_whitespace=True,
+        remove_punctuation=True
+    )
+
+    evaluator = ExactMatchEvaluator(settings=settings)
+    result = evaluator.evaluate(entry)
+
+    assert result.passed == True
diff --git a/ts-integration/evaluators.generated.ts b/ts-integration/evaluators.generated.ts
@@ -165,6 +165,25 @@ export type Evaluators = {
       competitors: string[];
     };
   };
+  "langevals/exact_match": {
+    settings: {
+      /**
+       * @description True if the comparison should be case-sensitive, False otherwise
+       * @default false
+       */
+      case_sensitive: boolean;
+      /**
+       * @description True if the comparison should trim whitespace, False otherwise
+       * @default true
+       */
+      trim_whitespace: boolean;
+      /**
+       * @description True if the comparison should remove punctuation, False otherwise
+       * @default true
+       */
+      remove_punctuation: boolean;
+    };
+  };
   "langevals/llm_answer_match": {
     settings: {
       /**
@@ -1109,6 +1128,42 @@ This evaluator implements LLM-as-a-judge with a function call approach to check
       },
     },
   },
+  "langevals/exact_match": {
+    name: `Exact Match Evaluator`,
+    description: `
+A simple evaluator that checks if the output matches the input exactly, with some 
+extra bells and whistles to help with whitespace related shenanigans.
+`,
+    category: "quality",
+    docsUrl: "",
+    isGuardrail: false,
+    requiredFields: [],
+    optionalFields: ["input", "output"],
+    settings: {
+      case_sensitive: {
+        description:
+          "True if the comparison should be case-sensitive, False otherwise",
+        default: false,
+      },
+      trim_whitespace: {
+        description:
+          "True if the comparison should trim whitespace, False otherwise",
+        default: true,
+      },
+      remove_punctuation: {
+        description:
+          "True if the comparison should remove punctuation, False otherwise",
+        default: true,
+      },
+    },
+    envVars: [],
+    result: {
+      passed: {
+        description:
+          "True if the output matched the input exactly, False otherwise",
+      },
+    },
+  },
   "langevals/llm_answer_match": {
     name: `LLM Answer Match`,
     description: `