Skip to content

Commit f8d5c7d

Browse files
committed
Fix factual correctness returning only true/false because score variable was overshadowed
1 parent 8fed473 commit f8d5c7d

File tree

2 files changed

+8
-30
lines changed

2 files changed

+8
-30
lines changed

evaluators/ragas/langevals_ragas/factual_correctness.py

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -61,9 +61,7 @@ class RagasFactualCorrectnessEvaluator(
6161
category = "quality"
6262
env_vars = env_vars
6363
default_settings = RagasFactualCorrectnessSettings()
64-
docs_url = (
65-
"https://docs.ragas.io/en/stable/concepts/metrics/available_metrics/factual_correctness/"
66-
)
64+
docs_url = "https://docs.ragas.io/en/stable/concepts/metrics/available_metrics/factual_correctness/"
6765
is_guardrail = False
6866

6967
def evaluate(self, entry: RagasFactualCorrectnessEntry) -> SingleEvaluationResult:
@@ -121,16 +119,16 @@ async def verify_claims(premise: str, hypothesis_list: list[str], callbacks):
121119
details += (
122120
f"# Precision\nPremise: {breakdown['premise']}\nHypothesis list:\n"
123121
)
124-
for i, score in enumerate(breakdown["scores"]):
125-
details += f"- \"{breakdown['hypothesis_list'][i]}\": {score}\n"
122+
for i, score_ in enumerate(breakdown["scores"]):
123+
details += f"- \"{breakdown['hypothesis_list'][i]}\": {score_}\n"
126124

127125
if len(breakdowns) > 1:
128126
breakdown = breakdowns[1]
129127
details += (
130128
f"\n# Recall\nPremise: {breakdown['premise']}\nHypothesis list:\n"
131129
)
132-
for i, score in enumerate(breakdown["scores"]):
133-
details += f"- \"{breakdown['hypothesis_list'][i]}\": {score}\n"
130+
for i, score_ in enumerate(breakdown["scores"]):
131+
details += f"- \"{breakdown['hypothesis_list'][i]}\": {score_}\n"
134132

135133
return RagasResult(
136134
score=score,

evaluators/ragas/tests/test_ragas.py

Lines changed: 3 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -72,28 +72,10 @@ def test_faithfulness():
7272
result = evaluator.evaluate(
7373
RagasFaithfulnessEntry(
7474
output="The capital of France is Paris.",
75-
contexts=["France is a country in Europe.", "Paris is a city in France."],
76-
)
77-
)
78-
79-
assert result.status == "processed"
80-
assert result.score and result.score > 0.9
81-
assert result.cost and result.cost.amount > 0.0
82-
assert result.details
83-
84-
85-
@pytest.mark.flaky(max_runs=3)
86-
def test_faithfulness_hhem():
87-
evaluator = RagasFaithfulnessEvaluator(
88-
settings=RagasFaithfulnessSettings(use_hhem=True, model="openai/gpt-3.5-turbo")
89-
)
90-
91-
result = evaluator.evaluate(
92-
RagasFaithfulnessEntry(
93-
input="When was the first super bowl?",
94-
output="The first superbowl was held on Jan 15, 1967",
9575
contexts=[
96-
"The First AFL–NFL World Championship Game was an American football game played on January 15, 1967, at the Los Angeles Memorial Coliseum in Los Angeles."
76+
"France is a country in Europe.",
77+
"Paris is a city in France.",
78+
"Paris is the capital of France.",
9779
],
9880
)
9981
)
@@ -373,8 +355,6 @@ def test_rubrics_based_scoring_without_reference():
373355
)
374356
)
375357

376-
print("\n\nresult", result, "\n\n")
377-
378358
assert result.status == "processed"
379359
assert result.score and result.score == 5
380360
assert result.cost and result.cost.amount > 0.0

0 commit comments

Comments
 (0)