16
16
from litellm import Choices , Message
17
17
from litellm .files .main import ModelResponse
18
18
from litellm .cost_calculator import completion_cost
19
+ import dspy
19
20
20
21
21
22
class CustomLLMBooleanEntry (EvaluatorEntry ):
@@ -33,7 +34,7 @@ class CustomLLMBooleanSettings(LLMEvaluatorSettings):
33
34
34
35
35
36
class CustomLLMBooleanResult (EvaluationResult ):
36
- score : float
37
+ score : float = Field ( default = 0.0 )
37
38
passed : Optional [bool ] = Field (
38
39
description = "The veredict given by the LLM" , default = True
39
40
)
@@ -74,7 +75,7 @@ def evaluate(self, entry: CustomLLMBooleanEntry) -> SingleEvaluationResult:
74
75
content += f"# Task\n { self .settings .prompt } "
75
76
76
77
total_tokens = len (
77
- litellm .encode (
78
+ litellm .encode ( # type: ignore
78
79
model = self .settings .model , text = f"{ self .settings .prompt } { content } "
79
80
)
80
81
)
@@ -86,55 +87,67 @@ def evaluate(self, entry: CustomLLMBooleanEntry) -> SingleEvaluationResult:
86
87
87
88
cost = None
88
89
89
- response = litellm .completion (
90
- model = self .settings .model ,
91
- messages = [
92
- {
93
- "role" : "system" ,
94
- "content" : self .settings .prompt
95
- + ". Always output a valid json for the function call" ,
96
- },
97
- {
98
- "role" : "user" ,
99
- "content" : content ,
100
- },
101
- ],
102
- tools = [
103
- {
104
- "type" : "function" ,
105
- "function" : {
106
- "name" : "evaluation" ,
107
- "parameters" : {
108
- "type" : "object" ,
109
- "properties" : {
110
- "scratchpad" : {
111
- "type" : "string" ,
112
- "description" : "use this field to ponder and write a short reasoning behind the decision written before a result is actually given" ,
113
- },
114
- "passed" : {
115
- "type" : "boolean" ,
116
- "description" : "your final veredict, reply true or false if the content passes the test or not" ,
90
+ if "atla-selene" in self .settings .model :
91
+
92
+ class LLMJudge (dspy .Signature ):
93
+ content : str = dspy .InputField ()
94
+ reasoning : str = dspy .OutputField ()
95
+ passed : bool = dspy .OutputField ()
96
+
97
+ judge = dspy .Predict (LLMJudge .with_instructions (self .settings .prompt ))
98
+ judge .set_lm (lm = dspy .LM (model = self .settings .model ))
99
+ arguments = judge (content = content )
100
+
101
+ else :
102
+ response = litellm .completion (
103
+ model = self .settings .model ,
104
+ messages = [
105
+ {
106
+ "role" : "system" ,
107
+ "content" : self .settings .prompt
108
+ + ". Always output a valid json for the function call" ,
109
+ },
110
+ {
111
+ "role" : "user" ,
112
+ "content" : content ,
113
+ },
114
+ ],
115
+ tools = [
116
+ {
117
+ "type" : "function" ,
118
+ "function" : {
119
+ "name" : "evaluation" ,
120
+ "parameters" : {
121
+ "type" : "object" ,
122
+ "properties" : {
123
+ "reasoning" : {
124
+ "type" : "string" ,
125
+ "description" : "use this field to ponder and write a short reasoning behind the decision written before a result is actually given" ,
126
+ },
127
+ "passed" : {
128
+ "type" : "boolean" ,
129
+ "description" : "your final veredict, reply true or false if the content passes the test or not" ,
130
+ },
117
131
},
132
+ "required" : ["reasoning" , "passed" ],
118
133
},
119
- "required " : [ "scratchpad" , " passed" ] ,
134
+ "description " : "use this function to write your thoughts on the reasoning, then decide if it passed or not with this json structure" ,
120
135
},
121
- "description" : "use this function to write your thoughts on the scratchpad, then decide if it passed or not with this json structure" ,
122
136
},
123
- },
124
- ],
125
- tool_choice = {"type" : "function" , "function" : {"name" : "evaluation" }}, # type: ignore
126
- )
137
+ ],
138
+ tool_choice = {"type" : "function" , "function" : {"name" : "evaluation" }}, # type: ignore
139
+ )
127
140
128
- response = cast (ModelResponse , response )
129
- choice = cast (Choices , response .choices [0 ])
130
- arguments = json .loads (
131
- cast (Message , choice .message ).tool_calls [0 ].function .arguments
132
- )
133
- cost = completion_cost (completion_response = response )
141
+ response = cast (ModelResponse , response )
142
+ choice = cast (Choices , response .choices [0 ])
143
+ arguments = json .loads (
144
+ cast (Message , choice .message ).tool_calls [0 ].function .arguments # type: ignore
145
+ )
146
+ cost = completion_cost (completion_response = response )
134
147
135
148
return CustomLLMBooleanResult (
136
149
score = 1 if arguments ["passed" ] else 0 ,
137
150
passed = arguments ["passed" ],
138
- details = arguments ["scratchpad " ],
151
+ details = arguments ["reasoning " ],
139
152
cost = Money (amount = cost , currency = "USD" ) if cost else None ,
140
153
)
0 commit comments