Skip to content

Commit 1512eb4

Browse files
committed
Update moderation for input and output separately
1 parent f97decb commit 1512eb4

File tree

1 file changed

+53
-46
lines changed

1 file changed

+53
-46
lines changed

evaluators/openai/langevals_openai/moderation.py

Lines changed: 53 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -68,63 +68,70 @@ class OpenAIModerationEvaluator(
6868
is_guardrail = True
6969

7070
def evaluate_batch(
71-
self, data: list[OpenAIModerationEntry], index = 0
71+
self, data: list[OpenAIModerationEntry], index=0
7272
) -> BatchEvaluationResult:
7373
client = OpenAI(api_key=self.get_env("OPENAI_API_KEY"))
7474

7575
results: list[SingleEvaluationResult] = []
7676

77-
contents = [
78-
"\n\n".join([entry.input or "", entry.output or ""]).strip()[0:10_000]
79-
for entry in data
80-
]
81-
response = client.moderations.create(input=contents)
82-
for index, moderation_result in tqdm(enumerate(response.results), position=index):
83-
if not contents[index]:
77+
contents_input = [entry.input or "" for entry in data]
78+
contents_output = [entry.output or "" for entry in data]
79+
response_input = client.moderations.create(input=contents_input)
80+
response_output = client.moderations.create(input=contents_output)
81+
82+
for i, (input_result, output_result) in enumerate(
83+
zip(response_input.results, response_output.results)
84+
):
85+
86+
if not contents_input[i] and not contents_output[i]:
8487
results.append(
8588
EvaluationResultSkipped(details="Input and output are both empty")
8689
)
87-
continue
88-
89-
detected_categories = dict(
90-
[
91-
item
92-
for item in moderation_result.categories.model_dump().items()
93-
if self.settings.categories.model_dump().get(item[0], False)
94-
]
95-
)
96-
category_scores = dict(
97-
[
98-
item
99-
for item in moderation_result.category_scores.model_dump().items()
100-
if detected_categories.get(item[0], False)
101-
]
102-
)
103-
highest_categories = sorted(
104-
category_scores.items(),
105-
key=lambda x: x[1],
106-
reverse=True,
90+
continue
91+
92+
# Combine results by taking the maximum scores for each category
93+
combined_category_scores = {
94+
category: max(
95+
input_result.category_scores.model_dump().get(category, 0),
96+
output_result.category_scores.model_dump().get(category, 0),
10797
)
108-
score = max(category_scores.values()) if len(category_scores) > 0 else 0
109-
110-
passed = not any(detected_categories.values())
111-
112-
details = (
113-
(
114-
"Detected "
115-
+ ", ".join(
116-
[
117-
f"{category} ({score * 100:.2f}% confidence)"
118-
for category, score in highest_categories
119-
]
120-
)
98+
for category in self.settings.categories.model_dump().keys()
99+
}
100+
101+
detected_categories = {
102+
category: score > 0.5 # You may want to adjust this threshold
103+
for category, score in combined_category_scores.items()
104+
if self.settings.categories.model_dump().get(category, False)
105+
}
106+
107+
highest_categories = sorted(
108+
combined_category_scores.items(),
109+
key=lambda x: x[1],
110+
reverse=True,
111+
)
112+
score = (
113+
max(combined_category_scores.values()) if combined_category_scores else 0
114+
)
115+
116+
passed = not any(detected_categories.values())
117+
118+
details = (
119+
(
120+
"Detected "
121+
+ ", ".join(
122+
[
123+
f"{category} ({score * 100:.2f}% confidence)"
124+
for category, score in highest_categories
125+
if detected_categories.get(category, False)
126+
]
121127
)
122-
if not passed
123-
else None
124128
)
129+
if not passed
130+
else None
131+
)
125132

126-
results.append(
127-
OpenAIModerationResult(score=score, passed=passed, details=details)
128-
)
133+
results.append(
134+
OpenAIModerationResult(score=score, passed=passed, details=details)
135+
)
129136

130137
return results

0 commit comments

Comments
 (0)