Skip to content

Commit 67cfebb

Browse files
committed
Add env vars to ts file
1 parent d6c76e3 commit 67cfebb

File tree

7 files changed

+39
-5
lines changed

7 files changed

+39
-5
lines changed

evaluators/langevals/langevals_langevals/competitor_llm.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,6 @@ class CompetitorLLMEvaluator(
5353
category = "policy"
5454
env_vars = []
5555
default_settings = CompetitorLLMSettings()
56-
env_vars = ["OPENAI_API_KEY", "AZURE_API_KEY", "AZURE_API_BASE"]
5756
is_guardrail = True
5857

5958
def evaluate(self, entry: CompetitorLLMEntry) -> SingleEvaluationResult:

evaluators/langevals/langevals_langevals/competitor_llm_function_call.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,6 @@ class CompetitorLLMFunctionCallEvaluator(
5555
category = "policy"
5656
env_vars = []
5757
default_settings = CompetitorLLMFunctionCallSettings()
58-
env_vars = ["OPENAI_API_KEY", "AZURE_API_KEY", "AZURE_API_BASE"]
5958
is_guardrail = True
6059

6160
def evaluate(self, entry: CompetitorLLMFunctionCallEntry) -> SingleEvaluationResult:

evaluators/langevals/langevals_langevals/off_topic.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,7 @@ class OffTopicEvaluator(BaseEvaluator[OffTopicEntry, OffTopicSettings, OffTopicR
6262

6363
name = "Off Topic Evaluator"
6464
category = "policy"
65-
env_vars = ["OPENAI_API_KEY", "AZURE_API_KEY", "AZURE_API_BASE"]
65+
env_vars = []
6666
is_guardrail = True # If the evaluator is a guardrail or not, a guardrail evaluator must return a boolean result on the `passed` result field in addition to the score
6767

6868
def evaluate(self, entry: OffTopicEntry) -> SingleEvaluationResult:

evaluators/langevals/langevals_langevals/product_sentiment_polarity.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@ class ProductSentimentPolarityEvaluator(
5252

5353
name = "Product Sentiment Polarity"
5454
category = "policy"
55-
env_vars = ["OPENAI_API_KEY", "AZURE_API_KEY", "AZURE_API_BASE"]
55+
env_vars = []
5656
default_settings = ProductSentimentPolaritySettings()
5757
is_guardrail = True
5858

evaluators/langevals/langevals_langevals/similarity.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@ class CustomSimilarityEvaluator(
5252

5353
name = "Semantic Similarity Evaluator"
5454
category = "custom"
55-
env_vars = ["OPENAI_API_KEY", "AZURE_API_KEY", "AZURE_API_BASE"]
55+
env_vars = []
5656
default_settings = CustomSimilaritySettings()
5757
is_guardrail = True
5858

scripts/generate_evaluators_ts.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,7 @@ def extract_evaluator_info(definitions: EvaluatorDefinitions) -> Dict[str, Any]:
5757
"category": definitions.category,
5858
"docsUrl": definitions.docs_url,
5959
"isGuardrail": definitions.is_guardrail,
60+
"envVars": definitions.env_vars,
6061
"settingsTypes": {},
6162
"settingsDescriptions": {},
6263
"result": {},
@@ -191,6 +192,7 @@ def generate_typescript_definitions(evaluators_info: Dict[str, Dict[str, Any]])
191192
f' default: Evaluators[T]["settings"][K];\n'
192193
f" }};\n"
193194
f" }};\n"
195+
f" envVars: string[];\n"
194196
f" result: {{\n"
195197
f" score?: {{\n"
196198
f" description: string;\n"
@@ -258,6 +260,7 @@ def generate_typescript_definitions(evaluators_info: Dict[str, Dict[str, Any]])
258260
f' optionalFields: {json.dumps(evaluator_info["optionalFields"])},\n'
259261
)
260262
ts_definitions += f' settings: {json.dumps(evaluator_info["settingsDescriptions"], indent=6).replace(": null", ": undefined")},\n'
263+
ts_definitions += f' envVars: {json.dumps(evaluator_info["envVars"])},\n'
261264
ts_definitions += (
262265
f' result: {json.dumps(evaluator_info["result"], indent=6)}\n'
263266
)

ts-integration/evaluators.generated.ts

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ export type EvaluatorDefinition<T extends EvaluatorTypes> = {
3131
default: Evaluators[T]["settings"][K];
3232
};
3333
};
34+
envVars: string[];
3435
result: {
3536
score?: {
3637
description: string;
@@ -1059,6 +1060,7 @@ or if it's in a specific expected language.
10591060
default: 0.25,
10601061
},
10611062
},
1063+
envVars: [],
10621064
result: {
10631065
passed: {
10641066
description:
@@ -1139,6 +1141,10 @@ social security numbers. It allows customization of the detection threshold and
11391141
default: "eu-central-1",
11401142
},
11411143
},
1144+
envVars: [
1145+
"AWS_COMPREHEND_ACCESS_KEY_ID",
1146+
"AWS_COMPREHEND_SECRET_ACCESS_KEY",
1147+
],
11421148
result: {
11431149
score: {
11441150
description: "Amount of PII detected, 0 means no PII detected",
@@ -1179,6 +1185,7 @@ It can work both as a safety evaluator and as policy enforcement.
11791185
default: "cloudflare/thebloke/llamaguard-7b-awq",
11801186
},
11811187
},
1188+
envVars: ["CLOUDFLARE_ACCOUNT_ID", "CLOUDFLARE_API_KEY"],
11821189
result: {
11831190
score: {
11841191
description: "How many violations were found in the content",
@@ -1220,6 +1227,7 @@ social security numbers. It allows customization of the detection threshold and
12201227
default: "POSSIBLE",
12211228
},
12221229
},
1230+
envVars: ["GOOGLE_APPLICATION_CREDENTIALS"],
12231231
result: {
12241232
score: {
12251233
description: "Amount of PII detected, 0 means no PII detected",
@@ -1278,6 +1286,7 @@ social security numbers. It allows customization of the detection threshold and
12781286
default: 0.5,
12791287
},
12801288
},
1289+
envVars: [],
12811290
result: {
12821291
score: {
12831292
description: "Amount of PII detected, 0 means no PII detected",
@@ -1314,6 +1323,7 @@ Computes with an LLM a weighted combination of factual as well as semantic simil
13141323
default: 2048,
13151324
},
13161325
},
1326+
envVars: [],
13171327
result: {
13181328
score: {
13191329
description:
@@ -1347,6 +1357,7 @@ Evaluates how pertinent the generated answer is to the given prompt. Higher scor
13471357
default: 2048,
13481358
},
13491359
},
1360+
envVars: [],
13501361
result: {
13511362
score: {
13521363
description:
@@ -1380,6 +1391,7 @@ This metric evaluates whether all of the ground-truth relevant items present in
13801391
default: 2048,
13811392
},
13821393
},
1394+
envVars: [],
13831395
result: {
13841396
score: {
13851397
description:
@@ -1413,6 +1425,7 @@ This evaluator measures the extent to which the retrieved context aligns with th
14131425
default: 2048,
14141426
},
14151427
},
1428+
envVars: [],
14161429
result: {
14171430
score: {
14181431
description:
@@ -1446,6 +1459,7 @@ This metric gauges the relevancy of the retrieved context, calculated based on b
14461459
default: 2048,
14471460
},
14481461
},
1462+
envVars: [],
14491463
result: {
14501464
score: {
14511465
description:
@@ -1479,6 +1493,7 @@ This metric evaluates whether all of the output relevant items present in the co
14791493
default: 2048,
14801494
},
14811495
},
1496+
envVars: [],
14821497
result: {
14831498
score: {
14841499
description:
@@ -1512,6 +1527,7 @@ This evaluator assesses the extent to which the generated answer is consistent w
15121527
default: 2048,
15131528
},
15141529
},
1530+
envVars: [],
15151531
result: {
15161532
score: {
15171533
description:
@@ -1542,6 +1558,7 @@ Allows you to check for simple text matches or regex evaluation.
15421558
],
15431559
},
15441560
},
1561+
envVars: [],
15451562
result: {
15461563
passed: {
15471564
description: "True if all rules pass, False if any rule fails",
@@ -1564,6 +1581,7 @@ This evaluator checks if any of the specified competitors was mentioned
15641581
default: ["OpenAI", "Google", "Microsoft"],
15651582
},
15661583
},
1584+
envVars: [],
15671585
result: {
15681586
score: {
15691587
description: "Number of competitors mentioned in the input and output",
@@ -1602,6 +1620,7 @@ This evaluator use an LLM-as-judge to check if the conversation is related to co
16021620
"We are providing an LLM observability and evaluation platform",
16031621
},
16041622
},
1623+
envVars: [],
16051624
result: {
16061625
score: {
16071626
description: "Confidence that the message is competitor free",
@@ -1644,6 +1663,7 @@ This evaluator implements LLM-as-a-judge with a function call approach to check
16441663
default: ["OpenAI", "Google", "Microsoft"],
16451664
},
16461665
},
1666+
envVars: [],
16471667
result: {
16481668
score: {
16491669
description: "Number of unique competitors mentioned",
@@ -1679,6 +1699,7 @@ Use an LLM as a judge with a custom prompt to do a true/false boolean evaluation
16791699
"You are an LLM evaluator. We need the guarantee that the output answers what is being asked on the input, please evaluate as False if it doesn't",
16801700
},
16811701
},
1702+
envVars: [],
16821703
result: {
16831704
passed: {
16841705
description: "The veredict given by the LLM",
@@ -1724,6 +1745,7 @@ Use an LLM as a judge with a custom prompt to classify the message into custom d
17241745
],
17251746
},
17261747
},
1748+
envVars: [],
17271749
result: {
17281750
label: {
17291751
description: "The detected category of the message",
@@ -1756,6 +1778,7 @@ Use an LLM as a judge with custom prompt to do a numeric score evaluation of the
17561778
"You are an LLM evaluator. Please score from 0.0 to 1.0 how likely the user is to be satisfied with this answer, from 0.0 being not satisfied at all to 1.0 being completely satisfied",
17571779
},
17581780
},
1781+
envVars: [],
17591782
result: {
17601783
score: {
17611784
description: "The score given by the LLM, according to the prompt",
@@ -1796,6 +1819,7 @@ This evaluator checks if the user message is concerning one of the allowed topic
17961819
],
17971820
},
17981821
},
1822+
envVars: [],
17991823
result: {
18001824
score: {
18011825
description: "Confidence level of the intent prediction",
@@ -1820,6 +1844,7 @@ For messages about products, this evaluator checks for the nuanced sentiment dir
18201844
requiredFields: ["output"],
18211845
optionalFields: [],
18221846
settings: {},
1847+
envVars: [],
18231848
result: {
18241849
score: {
18251850
description:
@@ -1854,6 +1879,7 @@ This evaluator checks if all the user queries in the conversation were resolved.
18541879
default: 8192,
18551880
},
18561881
},
1882+
envVars: [],
18571883
result: {},
18581884
},
18591885
"langevals/similarity": {
@@ -1890,6 +1916,7 @@ match on the exact text.
18901916
default: "openai/text-embedding-3-small",
18911917
},
18921918
},
1919+
envVars: [],
18931920
result: {
18941921
score: {
18951922
description:
@@ -1935,6 +1962,7 @@ threshold and the specific categories to check.
19351962
default: "FourSeverityLevels",
19361963
},
19371964
},
1965+
envVars: ["AZURE_CONTENT_SAFETY_ENDPOINT", "AZURE_CONTENT_SAFETY_KEY"],
19381966
result: {
19391967
score: {
19401968
description:
@@ -1953,6 +1981,7 @@ This evaluator checks for jailbreak-attempt in the input using Azure's Content S
19531981
requiredFields: ["input"],
19541982
optionalFields: [],
19551983
settings: {},
1984+
envVars: ["AZURE_CONTENT_SAFETY_ENDPOINT", "AZURE_CONTENT_SAFETY_KEY"],
19561985
result: {
19571986
passed: {
19581987
description:
@@ -1972,6 +2001,7 @@ This evaluator checks for prompt injection attempt in the input and the contexts
19722001
requiredFields: ["input"],
19732002
optionalFields: ["contexts"],
19742003
settings: {},
2004+
envVars: ["AZURE_CONTENT_SAFETY_ENDPOINT", "AZURE_CONTENT_SAFETY_KEY"],
19752005
result: {
19762006
passed: {
19772007
description:
@@ -2013,6 +2043,7 @@ including harassment, hate speech, self-harm, sexual content, and violence.
20132043
},
20142044
},
20152045
},
2046+
envVars: ["OPENAI_API_KEY"],
20162047
result: {
20172048
score: {
20182049
description:
@@ -2034,6 +2065,7 @@ This evaluator serves as a boilerplate for creating new evaluators.
20342065
requiredFields: ["output"],
20352066
optionalFields: [],
20362067
settings: {},
2068+
envVars: ["NECESSARY_ENV_VAR"],
20372069
result: {
20382070
score: {
20392071
description: "How many words are there in the output, split by space",
@@ -2060,6 +2092,7 @@ This evaluator assesses the extent to which the generated answer is consistent w
20602092
default: 8192,
20612093
},
20622094
},
2095+
envVars: [],
20632096
result: {},
20642097
},
20652098
};

0 commit comments

Comments
 (0)