You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
is_guardrail=True# If the evaluator is a guardrail or not, a guardrail evaluator must return a boolean result on the `passed` result field in addition to the score
description: "How many violations were found in the content",
@@ -1220,6 +1227,7 @@ social security numbers. It allows customization of the detection threshold and
1220
1227
default: "POSSIBLE",
1221
1228
},
1222
1229
},
1230
+
envVars: ["GOOGLE_APPLICATION_CREDENTIALS"],
1223
1231
result: {
1224
1232
score: {
1225
1233
description: "Amount of PII detected, 0 means no PII detected",
@@ -1278,6 +1286,7 @@ social security numbers. It allows customization of the detection threshold and
1278
1286
default: 0.5,
1279
1287
},
1280
1288
},
1289
+
envVars: [],
1281
1290
result: {
1282
1291
score: {
1283
1292
description: "Amount of PII detected, 0 means no PII detected",
@@ -1314,6 +1323,7 @@ Computes with an LLM a weighted combination of factual as well as semantic simil
1314
1323
default: 2048,
1315
1324
},
1316
1325
},
1326
+
envVars: [],
1317
1327
result: {
1318
1328
score: {
1319
1329
description:
@@ -1347,6 +1357,7 @@ Evaluates how pertinent the generated answer is to the given prompt. Higher scor
1347
1357
default: 2048,
1348
1358
},
1349
1359
},
1360
+
envVars: [],
1350
1361
result: {
1351
1362
score: {
1352
1363
description:
@@ -1380,6 +1391,7 @@ This metric evaluates whether all of the ground-truth relevant items present in
1380
1391
default: 2048,
1381
1392
},
1382
1393
},
1394
+
envVars: [],
1383
1395
result: {
1384
1396
score: {
1385
1397
description:
@@ -1413,6 +1425,7 @@ This evaluator measures the extent to which the retrieved context aligns with th
1413
1425
default: 2048,
1414
1426
},
1415
1427
},
1428
+
envVars: [],
1416
1429
result: {
1417
1430
score: {
1418
1431
description:
@@ -1446,6 +1459,7 @@ This metric gauges the relevancy of the retrieved context, calculated based on b
1446
1459
default: 2048,
1447
1460
},
1448
1461
},
1462
+
envVars: [],
1449
1463
result: {
1450
1464
score: {
1451
1465
description:
@@ -1479,6 +1493,7 @@ This metric evaluates whether all of the output relevant items present in the co
1479
1493
default: 2048,
1480
1494
},
1481
1495
},
1496
+
envVars: [],
1482
1497
result: {
1483
1498
score: {
1484
1499
description:
@@ -1512,6 +1527,7 @@ This evaluator assesses the extent to which the generated answer is consistent w
1512
1527
default: 2048,
1513
1528
},
1514
1529
},
1530
+
envVars: [],
1515
1531
result: {
1516
1532
score: {
1517
1533
description:
@@ -1542,6 +1558,7 @@ Allows you to check for simple text matches or regex evaluation.
1542
1558
],
1543
1559
},
1544
1560
},
1561
+
envVars: [],
1545
1562
result: {
1546
1563
passed: {
1547
1564
description: "True if all rules pass, False if any rule fails",
@@ -1564,6 +1581,7 @@ This evaluator checks if any of the specified competitors was mentioned
1564
1581
default: ["OpenAI","Google","Microsoft"],
1565
1582
},
1566
1583
},
1584
+
envVars: [],
1567
1585
result: {
1568
1586
score: {
1569
1587
description: "Number of competitors mentioned in the input and output",
@@ -1602,6 +1620,7 @@ This evaluator use an LLM-as-judge to check if the conversation is related to co
1602
1620
"We are providing an LLM observability and evaluation platform",
1603
1621
},
1604
1622
},
1623
+
envVars: [],
1605
1624
result: {
1606
1625
score: {
1607
1626
description: "Confidence that the message is competitor free",
@@ -1644,6 +1663,7 @@ This evaluator implements LLM-as-a-judge with a function call approach to check
1644
1663
default: ["OpenAI","Google","Microsoft"],
1645
1664
},
1646
1665
},
1666
+
envVars: [],
1647
1667
result: {
1648
1668
score: {
1649
1669
description: "Number of unique competitors mentioned",
@@ -1679,6 +1699,7 @@ Use an LLM as a judge with a custom prompt to do a true/false boolean evaluation
1679
1699
"You are an LLM evaluator. We need the guarantee that the output answers what is being asked on the input, please evaluate as False if it doesn't",
1680
1700
},
1681
1701
},
1702
+
envVars: [],
1682
1703
result: {
1683
1704
passed: {
1684
1705
description: "The veredict given by the LLM",
@@ -1724,6 +1745,7 @@ Use an LLM as a judge with a custom prompt to classify the message into custom d
1724
1745
],
1725
1746
},
1726
1747
},
1748
+
envVars: [],
1727
1749
result: {
1728
1750
label: {
1729
1751
description: "The detected category of the message",
@@ -1756,6 +1778,7 @@ Use an LLM as a judge with custom prompt to do a numeric score evaluation of the
1756
1778
"You are an LLM evaluator. Please score from 0.0 to 1.0 how likely the user is to be satisfied with this answer, from 0.0 being not satisfied at all to 1.0 being completely satisfied",
1757
1779
},
1758
1780
},
1781
+
envVars: [],
1759
1782
result: {
1760
1783
score: {
1761
1784
description: "The score given by the LLM, according to the prompt",
@@ -1796,6 +1819,7 @@ This evaluator checks if the user message is concerning one of the allowed topic
1796
1819
],
1797
1820
},
1798
1821
},
1822
+
envVars: [],
1799
1823
result: {
1800
1824
score: {
1801
1825
description: "Confidence level of the intent prediction",
@@ -1820,6 +1844,7 @@ For messages about products, this evaluator checks for the nuanced sentiment dir
1820
1844
requiredFields: ["output"],
1821
1845
optionalFields: [],
1822
1846
settings: {},
1847
+
envVars: [],
1823
1848
result: {
1824
1849
score: {
1825
1850
description:
@@ -1854,6 +1879,7 @@ This evaluator checks if all the user queries in the conversation were resolved.
1854
1879
default: 8192,
1855
1880
},
1856
1881
},
1882
+
envVars: [],
1857
1883
result: {},
1858
1884
},
1859
1885
"langevals/similarity": {
@@ -1890,6 +1916,7 @@ match on the exact text.
1890
1916
default: "openai/text-embedding-3-small",
1891
1917
},
1892
1918
},
1919
+
envVars: [],
1893
1920
result: {
1894
1921
score: {
1895
1922
description:
@@ -1935,6 +1962,7 @@ threshold and the specific categories to check.
0 commit comments