Skip to content

Commit 9eaee7c

Browse files
committed
fix: return score 1 or 0 for evaluators that were only returning passed true/false
1 parent 3a934d1 commit 9eaee7c

File tree

2 files changed

+25
-10
lines changed

2 files changed

+25
-10
lines changed

evaluators/langevals/langevals_langevals/exact_match.py

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,6 @@ class ExactMatchSettings(EvaluatorSettings):
2424
)
2525

2626

27-
2827
class ExactMatchResult(EvaluationResult):
2928
passed: Optional[bool] = Field(
3029
default=True,
@@ -58,13 +57,19 @@ def evaluate(self, entry: ExactMatchEntry) -> SingleEvaluationResult:
5857
expected_output_text = expected_output_text.strip()
5958

6059
if self.settings.remove_punctuation:
61-
output_text = ''.join(char for char in output_text if char.isalnum() or char.isspace())
62-
expected_output_text = ''.join(char for char in expected_output_text if char.isalnum() or char.isspace())
60+
output_text = "".join(
61+
char for char in output_text if char.isalnum() or char.isspace()
62+
)
63+
expected_output_text = "".join(
64+
char
65+
for char in expected_output_text
66+
if char.isalnum() or char.isspace()
67+
)
6368

6469
if not self.settings.case_sensitive:
6570
output_text = output_text.lower()
6671
expected_output_text = expected_output_text.lower()
6772

6873
passed = output_text == expected_output_text
6974

70-
return ExactMatchResult(passed=passed)
75+
return ExactMatchResult(score=1 if passed else 0, passed=passed)

evaluators/langevals/langevals_langevals/valid_format.py

Lines changed: 16 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -71,10 +71,14 @@ def evaluate(self, entry: ValidFormatEntry) -> SingleEvaluationResult:
7171
)
7272
except ValidationError as e:
7373
return ValidFormatResult(
74-
passed=False, details=f"JSON Schema validation failed: {e}"
74+
score=0,
75+
passed=False,
76+
details=f"JSON Schema validation failed: {e}",
7577
)
7678
except json.JSONDecodeError as e:
77-
return ValidFormatResult(passed=False, details=f"Invalid JSON: {e}")
79+
return ValidFormatResult(
80+
score=0, passed=False, details=f"Invalid JSON: {e}"
81+
)
7882
elif self.settings.format == "markdown":
7983
try:
8084
html_result = markdown.markdown(entry.output)
@@ -93,12 +97,16 @@ def evaluate(self, entry: ValidFormatEntry) -> SingleEvaluationResult:
9397
details="No markdown elements found. Text should contain markdown formatting like headers (#), bold (**), lists, etc.",
9498
)
9599
except Exception as e:
96-
return ValidFormatResult(passed=False, details=f"Invalid Markdown: {e}")
100+
return ValidFormatResult(
101+
score=0, passed=False, details=f"Invalid Markdown: {e}"
102+
)
97103
elif self.settings.format == "python":
98104
try:
99105
ast.parse(entry.output)
100106
except Exception as e:
101-
return ValidFormatResult(passed=False, details=f"Invalid Python: {e}")
107+
return ValidFormatResult(
108+
score=0, passed=False, details=f"Invalid Python: {e}"
109+
)
102110
elif self.settings.format == "sql":
103111
try:
104112
try:
@@ -110,6 +118,8 @@ def evaluate(self, entry: ValidFormatEntry) -> SingleEvaluationResult:
110118
except Exception:
111119
sqlglot.parse(entry.output)
112120
except Exception as e:
113-
return ValidFormatResult(passed=False, details=f"Invalid SQL: {e}")
121+
return ValidFormatResult(
122+
score=0, passed=False, details=f"Invalid SQL: {e}"
123+
)
114124

115-
return ValidFormatResult(passed=True)
125+
return ValidFormatResult(score=1, passed=True)

0 commit comments

Comments
 (0)