2
2
3
3
dotenv .load_dotenv ()
4
4
5
+ from langevals_core .base_evaluator import ConversationEntry
6
+
5
7
from langevals_langevals .query_resolution import (
6
- QueryResolutionConversationEntry ,
7
8
QueryResolutionEntry ,
8
9
QueryResolutionSettings ,
9
10
QueryResolutionResult ,
12
13
13
14
14
15
def test_query_resolution_conversation_evaluator_pass_for_simple_greetings ():
15
- response1 = QueryResolutionConversationEntry (
16
+ response1 = ConversationEntry (
16
17
input = "Hey, how are you?" ,
17
18
output = "Hello, I am an assistant and I don't have feelings" ,
18
19
)
19
20
conversation = QueryResolutionEntry (conversation = [response1 ])
20
- settings = QueryResolutionSettings (
21
- model = "openai/gpt-4o-mini" , max_tokens = 10000
22
- )
21
+ settings = QueryResolutionSettings (model = "openai/gpt-4o-mini" , max_tokens = 10000 )
23
22
evaluator = QueryResolutionEvaluator (settings = settings )
24
23
result = evaluator .evaluate (conversation )
25
24
@@ -30,18 +29,16 @@ def test_query_resolution_conversation_evaluator_pass_for_simple_greetings():
30
29
31
30
32
31
def test_query_resolution_conversation_evaluator_pass ():
33
- response1 = QueryResolutionConversationEntry (
32
+ response1 = ConversationEntry (
34
33
input = "Hey, how are you?" ,
35
34
output = "Hello, I am an assistant and I don't have feelings" ,
36
35
)
37
- response2 = QueryResolutionConversationEntry (
36
+ response2 = ConversationEntry (
38
37
input = "Okay, is there a president in the Netherlands? Also, tell me what is the system of government in the Netherlands?" ,
39
38
output = "There is no president in the Netherlands. The system of government is constitutional monarchy." ,
40
39
)
41
40
conversation = QueryResolutionEntry (conversation = [response1 , response2 ])
42
- settings = QueryResolutionSettings (
43
- model = "openai/gpt-4o-mini" , max_tokens = 10000
44
- )
41
+ settings = QueryResolutionSettings (model = "openai/gpt-4o-mini" , max_tokens = 10000 )
45
42
evaluator = QueryResolutionEvaluator (settings = settings )
46
43
result = evaluator .evaluate (conversation )
47
44
@@ -52,18 +49,16 @@ def test_query_resolution_conversation_evaluator_pass():
52
49
53
50
54
51
def test_query_resolution_conversation_evaluator_fail ():
55
- response1 = QueryResolutionConversationEntry (
52
+ response1 = ConversationEntry (
56
53
input = "Hey, how are you?" ,
57
54
output = "Hello, I am an assistant and I don't have feelings" ,
58
55
)
59
- response2 = QueryResolutionConversationEntry (
56
+ response2 = ConversationEntry (
60
57
input = "Okay, is there a president in the Netherlands? Also, what equals 2 + 2? How many paws does a standard dog have?" ,
61
58
output = "There is no president in the Netherlands." ,
62
59
)
63
60
conversation = QueryResolutionEntry (conversation = [response1 , response2 ])
64
- settings = QueryResolutionSettings (
65
- model = "openai/gpt-4o-mini" , max_tokens = 10000
66
- )
61
+ settings = QueryResolutionSettings (model = "openai/gpt-4o-mini" , max_tokens = 10000 )
67
62
evaluator = QueryResolutionEvaluator (settings = settings )
68
63
result = evaluator .evaluate (conversation )
69
64
@@ -74,14 +69,12 @@ def test_query_resolution_conversation_evaluator_fail():
74
69
75
70
76
71
def test_query_resolution_conversation_evaluator_fails_with_i_dont_know ():
77
- response1 = QueryResolutionConversationEntry (
72
+ response1 = ConversationEntry (
78
73
input = "What time is it?" ,
79
74
output = "Sorry, I don't have any information about the current time" ,
80
75
)
81
76
conversation = QueryResolutionEntry (conversation = [response1 ])
82
- settings = QueryResolutionSettings (
83
- model = "openai/gpt-4o-mini" , max_tokens = 10000
84
- )
77
+ settings = QueryResolutionSettings (model = "openai/gpt-4o-mini" , max_tokens = 10000 )
85
78
evaluator = QueryResolutionEvaluator (settings = settings )
86
79
result = evaluator .evaluate (conversation )
87
80
@@ -92,12 +85,10 @@ def test_query_resolution_conversation_evaluator_fails_with_i_dont_know():
92
85
93
86
94
87
def test_product_sentiment_polarity_evaluator_skipped_for_non_product_related_outputs ():
95
- response1 = QueryResolutionConversationEntry (input = "" , output = "" )
96
- response2 = QueryResolutionConversationEntry (input = "" , output = "" )
88
+ response1 = ConversationEntry (input = "" , output = "" )
89
+ response2 = ConversationEntry (input = "" , output = "" )
97
90
conversation = QueryResolutionEntry (conversation = [response1 , response2 ])
98
- settings = QueryResolutionSettings (
99
- model = "openai/gpt-4o-mini" , max_tokens = 10000
100
- )
91
+ settings = QueryResolutionSettings (model = "openai/gpt-4o-mini" , max_tokens = 10000 )
101
92
evaluator = QueryResolutionEvaluator (settings = settings )
102
93
result = evaluator .evaluate (conversation )
103
94
0 commit comments