1
+ from typing import Sequence
1
2
from langevals_core .base_evaluator import (
2
3
BaseEvaluator ,
3
4
EvaluationResult ,
16
17
)
17
18
from pydantic import Field
18
19
from ragas .metrics import ResponseRelevancy
20
+ from ragas .metrics ._answer_relevance import ResponseRelevanceOutput
19
21
20
22
21
23
class RagasResponseRelevancyEntry (EvaluatorEntry ):
@@ -56,7 +58,7 @@ class RagasResponseRelevancyEvaluator(
56
58
is_guardrail = False
57
59
58
60
def evaluate (self , entry : RagasResponseRelevancyEntry ) -> SingleEvaluationResult :
59
- llm , embeddings = prepare_llm (self , self .settings )
61
+ llm , embeddings = prepare_llm (self , self .settings , temperature = 0.7 )
60
62
61
63
skip = check_max_tokens (
62
64
input = entry .input ,
@@ -69,17 +71,23 @@ def evaluate(self, entry: RagasResponseRelevancyEntry) -> SingleEvaluationResult
69
71
scorer = ResponseRelevancy (llm = llm , embeddings = embeddings )
70
72
71
73
_original_calculate_similarity = scorer .calculate_similarity
74
+ _original_calculate_score = scorer ._calculate_score
72
75
73
- breakdown = {"similarity" : 0 , "generated_questions " : []}
76
+ breakdown = {"similarity" : 0 , "answers " : []}
74
77
75
78
def calculate_similarity (question : str , generated_questions ):
76
79
nonlocal breakdown
77
- breakdown ["generated_questions" ] += generated_questions
78
80
similarity = _original_calculate_similarity (question , generated_questions )
79
81
breakdown ["similarity" ] += similarity
80
82
return similarity
81
83
84
+ def _calculate_score (answers : Sequence [ResponseRelevanceOutput ], row : dict ):
85
+ nonlocal breakdown
86
+ breakdown ["answers" ] += answers
87
+ return _original_calculate_score (answers , row )
88
+
82
89
scorer .calculate_similarity = calculate_similarity
90
+ scorer ._calculate_score = _calculate_score
83
91
84
92
with capture_cost (llm ) as cost :
85
93
score = scorer .single_turn_score (
@@ -89,15 +97,19 @@ def calculate_similarity(question: str, generated_questions):
89
97
)
90
98
)
91
99
92
- generated_questions = "\n - " .join (breakdown ["generated_questions" ])
100
+ generated_questions = "\n " .join (
101
+ [f"- { answer .question } " for answer in breakdown ["answers" ]]
102
+ )
93
103
94
- if len (breakdown ["generated_questions" ]) == 0 :
104
+ if len ([ answer for answer in breakdown ["answers" ] if answer . question ]) == 0 :
95
105
return EvaluationResultSkipped (
96
106
details = "No questions could be generated from output." ,
97
107
)
98
108
109
+ any_noncommittal = any ([answer .noncommittal for answer in breakdown ["answers" ]])
110
+
99
111
return RagasResult (
100
112
score = score ,
101
113
cost = cost ,
102
- details = f"Questions generated from output:\n { generated_questions } \n Similarity to original question: { breakdown ['similarity' ]} " ,
114
+ details = f"Questions generated from output:\n \n { generated_questions } \n \ n Similarity to original question: { breakdown ['similarity' ]} \n Evasive answer: { any_noncommittal } " ,
103
115
)
0 commit comments