langwatch
diff --git a/‎evaluators/aws/langevals_aws/comprehend_pii_detection.py
Lines changed: 3 additions & 2 deletions b/‎evaluators/aws/langevals_aws/comprehend_pii_detection.py
Lines changed: 3 additions & 2 deletions
diff --git a/‎evaluators/aws/poetry.lock
Lines changed: 1 addition & 1 deletion b/‎evaluators/aws/poetry.lock
Lines changed: 1 addition & 1 deletion
diff --git a/‎evaluators/azure/langevals_azure/content_safety.py
Lines changed: 2 additions & 1 deletion b/‎evaluators/azure/langevals_azure/content_safety.py
Lines changed: 2 additions & 1 deletion
diff --git a/‎evaluators/azure/langevals_azure/jailbreak.py
Lines changed: 3 additions & 2 deletions b/‎evaluators/azure/langevals_azure/jailbreak.py
Lines changed: 3 additions & 2 deletions
diff --git a/‎evaluators/azure/langevals_azure/prompt_injection.py
Lines changed: 3 additions & 2 deletions b/‎evaluators/azure/langevals_azure/prompt_injection.py
Lines changed: 3 additions & 2 deletions
diff --git a/‎evaluators/azure/poetry.lock
Lines changed: 1 addition & 1 deletion b/‎evaluators/azure/poetry.lock
Lines changed: 1 addition & 1 deletion
diff --git a/‎evaluators/example/langevals_example/word_count.py
Lines changed: 3 additions & 2 deletions b/‎evaluators/example/langevals_example/word_count.py
Lines changed: 3 additions & 2 deletions
diff --git a/‎evaluators/example/poetry.lock
Lines changed: 1 addition & 1 deletion b/‎evaluators/example/poetry.lock
Lines changed: 1 addition & 1 deletion
diff --git a/‎evaluators/google_cloud/langevals_google_cloud/dlp_pii_detection.py
Lines changed: 3 additions & 2 deletions b/‎evaluators/google_cloud/langevals_google_cloud/dlp_pii_detection.py
Lines changed: 3 additions & 2 deletions
diff --git a/‎evaluators/google_cloud/poetry.lock
Lines changed: 1 addition & 1 deletion b/‎evaluators/google_cloud/poetry.lock
Lines changed: 1 addition & 1 deletion
@@ -4,6 +4,7 @@
 from langevals_core.base_evaluator import (
     BaseEvaluator,
     EvaluatorEntry,
+    EvaluatorSettings,
     SingleEvaluationResult,
     EvaluationResult,
     EvaluationResultSkipped,
@@ -56,7 +57,7 @@ class AWSComprehendEntityTypes(BaseModel):
     IN_VOTER_NUMBER: bool = True
 
 
-class AWSComprehendPIIDetectionSettings(BaseModel):
+class AWSComprehendPIIDetectionSettings(EvaluatorSettings):
     entity_types: AWSComprehendEntityTypes = Field(
         default=AWSComprehendEntityTypes(),
         description="The types of PII to check for in the input.",
@@ -148,7 +149,7 @@ class AWSPIIEntityResults(TypedDict):
 class AWSComprehendPIIDetectionResult(EvaluationResult):
     score: float = Field(description="Amount of PII detected, 0 means no PII detected")
     passed: Optional[bool] = Field(
-        description="If true then no PII was detected, if false then at least one PII was detected"
+        description="If true then no PII was detected, if false then at least one PII was detected", default=None
     )
     raw_response: AWSPIIEntityResults
 
 
@@ -6,6 +6,7 @@
 from langevals_core.base_evaluator import (
     BaseEvaluator,
     EvaluationResult,
+    EvaluatorSettings,
     SingleEvaluationResult,
     EvaluatorEntry,
     EvaluationResultSkipped,
@@ -25,7 +26,7 @@ class AzureContentSafetyCategories(BaseModel):
     Violence: bool = True
 
 
-class AzureContentSafetySettings(BaseModel):
+class AzureContentSafetySettings(EvaluatorSettings):
     severity_threshold: Literal[1, 2, 3, 4, 5, 6, 7] = Field(
         default=1,
         description="The minimum severity level to consider content as unsafe, from 1 to 7.",
 
@@ -3,18 +3,19 @@
 from langevals_core.base_evaluator import (
     BaseEvaluator,
     EvaluationResult,
+    EvaluatorSettings,
     SingleEvaluationResult,
     EvaluatorEntry,
     EvaluationResultSkipped,
 )
-from pydantic import BaseModel, Field
+from pydantic import Field
 
 
 class AzureJailbreakEntry(EvaluatorEntry):
     input: str
 
 
-class AzureJailbreakSettings(BaseModel):
+class AzureJailbreakSettings(EvaluatorSettings):
     pass
 
 
 
@@ -3,11 +3,12 @@
 from langevals_core.base_evaluator import (
     BaseEvaluator,
     EvaluationResult,
+    EvaluatorSettings,
     SingleEvaluationResult,
     EvaluatorEntry,
     EvaluationResultSkipped,
 )
-from pydantic import BaseModel, Field
+from pydantic import Field
 import math
 
 
@@ -16,7 +17,7 @@ class AzurePromptShieldEntry(EvaluatorEntry):
     contexts: Optional[List[str]] = None
 
 
-class AzurePromptShieldSettings(BaseModel):
+class AzurePromptShieldSettings(EvaluatorSettings):
     pass
 
 
 
@@ -2,9 +2,10 @@
     BaseEvaluator,
     EvaluatorEntry,
     EvaluationResult,
+    EvaluatorSettings,
     SingleEvaluationResult,
 )
-from pydantic import BaseModel, Field
+from pydantic import Field
 
 
 # Type definition of what keys are necessary for each entry to have for the evaluator to process it, in this example
@@ -14,7 +15,7 @@ class ExampleWordCountEntry(EvaluatorEntry):
 
 
 # Generic settings for the evaluator, in this example we don't need any settings, but any fields can be added here
-class ExampleWordCountSettings(BaseModel):
+class ExampleWordCountSettings(EvaluatorSettings):
     pass
 
 
 
@@ -3,6 +3,7 @@
 from langevals_core.base_evaluator import (
     BaseEvaluator,
     EvaluatorEntry,
+    EvaluatorSettings,
     SingleEvaluationResult,
     EvaluationResult,
     EvaluationResultSkipped,
@@ -29,7 +30,7 @@ class GoogleCloudDLPInfoTypes(BaseModel):
     medical_record_number: bool = True
 
 
-class GoogleCloudDLPPIIDetectionSettings(BaseModel):
+class GoogleCloudDLPPIIDetectionSettings(EvaluatorSettings):
     info_types: GoogleCloudDLPInfoTypes = Field(
         default=GoogleCloudDLPInfoTypes(),
         description="The types of PII to check for in the input.",
@@ -45,7 +46,7 @@ class GoogleCloudDLPPIIDetectionSettings(BaseModel):
 class GoogleCloudDLPPIIDetectionResult(EvaluationResult):
     score: float = Field(description="Amount of PII detected, 0 means no PII detected")
     passed: Optional[bool] = Field(
-        description="If true then no PII was detected, if false then at least one PII was detected"
+        description="If true then no PII was detected, if false then at least one PII was detected", default=None
     )
     raw_response: dict[str, Any]