Skip to content

Commit 1e79468

Browse files
committed
Centralized the choice of the model in LLMEvaluatorSettings
1 parent f4f006e commit 1e79468

File tree

12 files changed

+59
-43
lines changed

12 files changed

+59
-43
lines changed

evaluators/haystack/langevals_haystack/faithfulness.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
EvaluatorSettings,
1313
SingleEvaluationResult,
1414
Money,
15+
LLMEvaluatorSettings
1516
)
1617
from pydantic import BaseModel, Field
1718
from haystack.components.evaluators import FaithfulnessEvaluator
@@ -28,7 +29,7 @@ class HaystackFaithfulnessEntry(EvaluatorEntry):
2829
contexts: list[str]
2930

3031

31-
class HaystackFaithfulnessSettings(EvaluatorSettings):
32+
class HaystackFaithfulnessSettings(LLMEvaluatorSettings):
3233
model: str = Field(
3334
default="azure/gpt-35-turbo-1106",
3435
description="The model to use for evaluation.",

evaluators/huggingface/langevals_huggingface/llama_guard.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
EvaluatorEntry,
77
EvaluationResult,
88
EvaluatorSettings,
9+
LLMEvaluatorSettings,
910
SingleEvaluationResult,
1011
Money,
1112
)

evaluators/langevals/langevals_langevals/basic.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
EvaluatorEntry,
66
EvaluationResult,
77
EvaluatorSettings,
8+
LLMEvaluatorSettings,
89
SingleEvaluationResult,
910
)
1011
from pydantic import BaseModel, Field
@@ -26,7 +27,7 @@ class CustomBasicRule(BaseModel):
2627
value: str
2728

2829

29-
class CustomBasicSettings(EvaluatorSettings):
30+
class CustomBasicSettings(LLMEvaluatorSettings):
3031
rules: list[CustomBasicRule] = Field(default=[
3132
CustomBasicRule(field="output", rule="not_contains", value="artificial intelligence"),
3233
], description="List of rules to check, the message must pass all of them")

evaluators/langevals/langevals_langevals/competitor_llm.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
EvaluatorEntry,
1616
EvaluationResult,
1717
EvaluatorSettings,
18+
LLMEvaluatorSettings,
1819
SingleEvaluationResult,
1920
EvaluationResultSkipped,
2021
Money,
@@ -26,7 +27,7 @@ class CompetitorLLMEntry(EvaluatorEntry):
2627
input: Optional[str] = None
2728

2829

29-
class CompetitorLLMSettings(EvaluatorSettings):
30+
class CompetitorLLMSettings(LLMEvaluatorSettings):
3031
name: str = Field(default="LangWatch", description="The name of your company")
3132
description: str = Field(
3233
default="We are providing an LLM observability and evaluation platform",

evaluators/langevals/langevals_langevals/competitor_llm_function_call.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
BaseEvaluator,
1313
EvaluatorEntry,
1414
EvaluationResult,
15-
EvaluatorSettings,
15+
LLMEvaluatorSettings,
1616
SingleEvaluationResult,
1717
EvaluationResultSkipped,
1818
Money,
@@ -24,7 +24,7 @@ class CompetitorLLMFunctionCallEntry(EvaluatorEntry):
2424
input: Optional[str] = None
2525

2626

27-
class CompetitorLLMFunctionCallSettings(EvaluatorSettings):
27+
class CompetitorLLMFunctionCallSettings(LLMEvaluatorSettings):
2828
name: str = Field(default="LangWatch", description="The name of your company")
2929
description: str = Field(
3030
default="We are providing an LLM observability and evaluation platform",
@@ -34,6 +34,7 @@ class CompetitorLLMFunctionCallSettings(EvaluatorSettings):
3434
default=["OpenAI", "Google", "Microsoft"],
3535
description="The competitors that must not be mentioned.",
3636
)
37+
3738

3839

3940
class CompetitorLLMFunctionCallResult(EvaluationResult):

evaluators/langevals/langevals_langevals/llm_boolean.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,13 +6,16 @@
66
EvaluatorEntry,
77
EvaluationResult,
88
EvaluatorSettings,
9+
LLMEvaluatorSettings,
910
SingleEvaluationResult,
1011
EvaluationResultSkipped,
1112
Money,
1213
)
1314
from pydantic import BaseModel, Field
1415
import litellm
15-
from litellm import ModelResponse, Choices, Message, completion_cost
16+
from litellm import Choices, Message
17+
from litellm.files.main import ModelResponse
18+
from litellm.cost_calculator import completion_cost
1619

1720

1821
class CustomLLMBooleanEntry(EvaluatorEntry):
@@ -21,7 +24,7 @@ class CustomLLMBooleanEntry(EvaluatorEntry):
2124
contexts: Optional[list[str]] = None
2225

2326

24-
class CustomLLMBooleanSettings(EvaluatorSettings):
27+
class CustomLLMBooleanSettings(LLMEvaluatorSettings):
2528
prompt: str = Field(
2629
default="You are an LLM evaluator. We need the guarantee that the output answers what is being asked on the input, please evaluate as False if it doesn't",
2730
description="The system prompt to use for the LLM to run the evaluation",

evaluators/langevals/langevals_langevals/llm_score.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,14 +5,17 @@
55
BaseEvaluator,
66
EvaluatorEntry,
77
EvaluationResult,
8+
LLMEvaluatorSettings,
89
SingleEvaluationResult,
910
EvaluationResultSkipped,
1011
Money,
1112
EvaluatorSettings
1213
)
1314
from pydantic import Field
1415
import litellm
15-
from litellm import ModelResponse, Choices, Message, completion_cost
16+
from litellm import Choices, Message
17+
from litellm.files.main import ModelResponse
18+
from litellm.cost_calculator import completion_cost
1619

1720

1821

@@ -22,7 +25,7 @@ class CustomLLMScoreEntry(EvaluatorEntry):
2225
contexts: Optional[list[str]] = None
2326

2427

25-
class CustomLLMScoreSettings(EvaluatorSettings):
28+
class CustomLLMScoreSettings(LLMEvaluatorSettings):
2629
prompt: str = Field(
2730
default="You are an LLM evaluator. Please score from 0.0 to 1.0 how likely the user is to be satisfied with this answer, from 0.0 being not satisfied at all to 1.0 being completely satisfied",
2831
description="The system prompt to use for the LLM to run the evaluation",

evaluators/langevals/langevals_langevals/off_topic.py

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,8 @@
11
import litellm
2-
from litellm import get_max_tokens, completion_cost
3-
from litellm import ModelResponse, Choices, Message
4-
from litellm.utils import trim_messages
2+
from litellm import Choices, Message
3+
from litellm.files.main import ModelResponse
4+
from litellm.cost_calculator import completion_cost
5+
from litellm.utils import trim_messages, get_max_tokens
56

67
from pydantic import BaseModel, Field
78
from typing import Optional, List, Literal, cast
@@ -12,6 +13,7 @@
1213
BaseEvaluator,
1314
EvaluatorEntry,
1415
EvaluationResult,
16+
LLMEvaluatorSettings,
1517
SingleEvaluationResult,
1618
EvaluationResultSkipped,
1719
Money,
@@ -28,7 +30,7 @@ class AllowedTopic(BaseModel):
2830
description: str
2931

3032

31-
class OffTopicSettings(EvaluatorSettings):
33+
class OffTopicSettings(LLMEvaluatorSettings):
3234
allowed_topics: List[AllowedTopic] = Field(
3335
default=[
3436
AllowedTopic(topic="simple_chat", description="Smalltalk with the user"),

evaluators/langevals/langevals_langevals/query_resolution.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
EvaluatorEntry,
1515
EvaluationResult,
1616
EvaluatorSettings,
17+
LLMEvaluatorSettings,
1718
SingleEvaluationResult,
1819
EvaluationResultSkipped,
1920
Money,
@@ -29,7 +30,7 @@ class QueryResolutionEntry(EvaluatorEntry):
2930
conversation: List[QueryResolutionConversationEntry]
3031

3132

32-
class QueryResolutionSettings(EvaluatorSettings):
33+
class QueryResolutionSettings(LLMEvaluatorSettings):
3334
pass # maybe specify after how many turns we should run this evaluator?
3435

3536

evaluators/langevals/langevals_langevals/similarity.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
EvaluatorEntry,
55
EvaluationResult,
66
EvaluatorSettings,
7+
LLMEvaluatorSettings,
78
SingleEvaluationResult,
89
EvaluationResultSkipped,
910
)

0 commit comments

Comments
 (0)