Skip to content

Commit 5fb265c

Browse files
committed
Update script
1 parent 096089d commit 5fb265c

File tree

1 file changed

+168
-21
lines changed

1 file changed

+168
-21
lines changed

scripts/generate_openapi.py

Lines changed: 168 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -19,12 +19,38 @@ def parse_typescript_evaluators(file_path: str) -> Dict[str, Any]:
1919

2020
evaluators = {}
2121

22+
# Find the AVAILABLE_EVALUATORS object using a more robust approach
23+
# First find the start of the object
24+
start_pattern = r"export const AVAILABLE_EVALUATORS:\s*{[^}]*}\s*=\s*{"
25+
start_match = re.search(start_pattern, content)
26+
27+
if not start_match:
28+
print("Could not find AVAILABLE_EVALUATORS object")
29+
return evaluators
30+
31+
start_pos = start_match.end() - 1 # Position of the opening brace
32+
33+
# Find the matching closing brace
34+
brace_count = 0
35+
end_pos = start_pos
36+
37+
for j, char in enumerate(content[start_pos:], start_pos):
38+
if char == "{":
39+
brace_count += 1
40+
elif char == "}":
41+
brace_count -= 1
42+
if brace_count == 0:
43+
end_pos = j + 1
44+
break
45+
46+
evaluators_content = content[start_pos:end_pos]
47+
2248
# Find all evaluator definitions using regex
2349
# Look for patterns like "evaluator/id": { ... }
2450
evaluator_pattern = r'"([^"]+)":\s*{'
2551

2652
# Find all matches
27-
matches = list(re.finditer(evaluator_pattern, content))
53+
matches = list(re.finditer(evaluator_pattern, evaluators_content))
2854
print(f"Found {len(matches)} evaluator matches")
2955

3056
for i, match in enumerate(matches):
@@ -35,7 +61,7 @@ def parse_typescript_evaluators(file_path: str) -> Dict[str, Any]:
3561
brace_count = 0
3662
end_pos = start_pos
3763

38-
for j, char in enumerate(content[start_pos:], start_pos):
64+
for j, char in enumerate(evaluators_content[start_pos:], start_pos):
3965
if char == "{":
4066
brace_count += 1
4167
elif char == "}":
@@ -44,14 +70,18 @@ def parse_typescript_evaluators(file_path: str) -> Dict[str, Any]:
4470
end_pos = j + 1
4571
break
4672

47-
evaluator_content = content[start_pos:end_pos]
73+
evaluator_content = evaluators_content[start_pos:end_pos]
4874

4975
# Parse the evaluator content
5076
evaluator = parse_evaluator_content(evaluator_id, evaluator_content)
5177
evaluators[evaluator_id] = evaluator
5278

5379
print(f"Processing evaluator: {evaluator_id}")
5480

81+
# Debug: Print content for competitor_blocklist
82+
if evaluator_id == "langevals/competitor_blocklist":
83+
print(f"DEBUG: Content for {evaluator_id}: {evaluator_content[:500]}")
84+
5585
return evaluators
5686

5787

@@ -104,13 +134,31 @@ def parse_evaluator_content(evaluator_id: str, content: str) -> Dict[str, Any]:
104134
field.strip().strip('"') for field in re.findall(r'"([^"]+)"', opt_content)
105135
]
106136

107-
# Extract settings
108-
settings_match = re.search(
109-
r"settings:\s*{([^}]+(?:{[^}]*}[^}]*)*)}", content, re.DOTALL
110-
)
111-
if settings_match:
112-
settings_content = settings_match.group(1)
113-
evaluator["settings"] = parse_settings(settings_content)
137+
# Extract settings using a more robust approach
138+
settings_start = content.find("settings:")
139+
if settings_start != -1:
140+
# Find the opening brace after settings:
141+
brace_start = content.find("{", settings_start)
142+
if brace_start != -1:
143+
# Find the matching closing brace
144+
brace_count = 0
145+
settings_end = brace_start
146+
147+
for i, char in enumerate(content[brace_start:], brace_start):
148+
if char == "{":
149+
brace_count += 1
150+
elif char == "}":
151+
brace_count -= 1
152+
if brace_count == 0:
153+
settings_end = i + 1
154+
break
155+
156+
settings_content = content[brace_start + 1 : settings_end - 1]
157+
evaluator["settings"] = parse_settings(settings_content)
158+
else:
159+
print(f"DEBUG: No opening brace found for settings in {evaluator_id}")
160+
else:
161+
print(f"DEBUG: No settings found for {evaluator_id}")
114162

115163
return evaluator
116164

@@ -119,12 +167,28 @@ def parse_settings(settings_content: str) -> Dict[str, Any]:
119167
"""Parse settings object."""
120168
settings = {}
121169

122-
# Find all setting definitions
123-
setting_pattern = r"(\w+):\s*{([^}]+(?:{[^}]*}[^}]*)*)}"
170+
# Find all setting definitions using a more robust approach
171+
# Look for patterns like "setting_name: { ... }" with proper brace matching
172+
setting_pattern = r"(\w+):\s*{"
124173

125174
for setting_match in re.finditer(setting_pattern, settings_content, re.DOTALL):
126175
setting_name = setting_match.group(1)
127-
setting_content = setting_match.group(2)
176+
start_pos = setting_match.end() - 1 # Position of the opening brace
177+
178+
# Find the matching closing brace
179+
brace_count = 0
180+
end_pos = start_pos
181+
182+
for i, char in enumerate(settings_content[start_pos:], start_pos):
183+
if char == "{":
184+
brace_count += 1
185+
elif char == "}":
186+
brace_count -= 1
187+
if brace_count == 0:
188+
end_pos = i + 1
189+
break
190+
191+
setting_content = settings_content[start_pos + 1 : end_pos - 1]
128192

129193
# Extract description
130194
desc_match = re.search(r'description:\s*"([^"]+)"', setting_content)
@@ -133,6 +197,15 @@ def parse_settings(settings_content: str) -> Dict[str, Any]:
133197
# Extract default value
134198
default_value = extract_default_value(setting_content)
135199

200+
# Debug output for competitor_blocklist
201+
if setting_name == "competitors":
202+
print(f"DEBUG: Setting content for competitors: {setting_content}")
203+
print(f"DEBUG: Extracted default value: {default_value}")
204+
print(f"DEBUG: Type of default value: {type(default_value)}")
205+
print(
206+
f"DEBUG: Content contains 'competitors': {'competitors' in setting_content}"
207+
)
208+
136209
settings[setting_name] = {"description": description, "default": default_value}
137210

138211
return settings
@@ -141,13 +214,58 @@ def parse_settings(settings_content: str) -> Dict[str, Any]:
141214
def extract_default_value(content: str) -> Any:
142215
"""Extract default value from setting content."""
143216

217+
# Debug output (only for competitors field)
218+
if "competitors" in content:
219+
print(f"DEBUG: extract_default_value called with content: {content}")
220+
144221
# Look for default: followed by various value types
222+
# Use a more robust pattern that handles arrays and objects
145223
default_match = re.search(r"default:\s*(.+?)(?:,|$)", content, re.DOTALL)
146224
if not default_match:
147225
return None
148226

149227
value_str = default_match.group(1).strip()
150228

229+
# Debug output (only for competitors field)
230+
if "competitors" in content:
231+
print(f"DEBUG: Raw regex match: {default_match.group(0)}")
232+
print(f"DEBUG: Captured value: {value_str}")
233+
234+
# If it's an array or object, try to find the complete value
235+
if value_str.startswith("[") or value_str.startswith("{"):
236+
# Find the complete array/object by looking in the original content
237+
# Find the position of the opening bracket in the original content
238+
default_start = content.find("default:")
239+
if default_start != -1:
240+
# Find the opening bracket after "default:"
241+
bracket_start = content.find("[", default_start)
242+
if bracket_start != -1:
243+
# Find the matching closing bracket
244+
bracket_count = 0
245+
complete_value = ""
246+
247+
for i, char in enumerate(content[bracket_start:], bracket_start):
248+
complete_value += char
249+
if char == "[":
250+
bracket_count += 1
251+
elif char == "]":
252+
bracket_count -= 1
253+
if bracket_count == 0:
254+
break
255+
256+
value_str = complete_value
257+
258+
# Debug output (only for competitors field)
259+
if "competitors" in content:
260+
print(f"DEBUG: Original value_str: {default_match.group(1).strip()}")
261+
print(f"DEBUG: Complete value_str: {value_str}")
262+
else:
263+
# Debug output (only for competitors field)
264+
if "competitors" in content:
265+
print(f"DEBUG: Not an array/object, value_str: {value_str}")
266+
print(f"DEBUG: value_str starts with '[': {value_str.startswith('[')}")
267+
print(f"DEBUG: value_str starts with '{{': {value_str.startswith('{')}")
268+
151269
# Handle different value types
152270
if value_str.startswith('"') and value_str.endswith('"'):
153271
# String value
@@ -164,11 +282,17 @@ def extract_default_value(content: str) -> Any:
164282
elif value_str.replace(".", "").isdigit():
165283
return float(value_str)
166284
elif value_str.startswith("[") and value_str.endswith("]"):
167-
# Array value - simplified parsing
168-
return []
285+
# Array value - try to parse as JSON
286+
try:
287+
return json.loads(value_str)
288+
except:
289+
return []
169290
elif value_str.startswith("{") and value_str.endswith("}"):
170-
# Object value - simplified parsing
171-
return {}
291+
# Object value - try to parse as JSON
292+
try:
293+
return json.loads(value_str)
294+
except:
295+
return {}
172296
else:
173297
return value_str
174298

@@ -413,12 +537,24 @@ def generate_openapi_schema(evaluators: Dict[str, Any]) -> Dict[str, Any]:
413537
request_schema_name = (
414538
f"{evaluator_id.replace('.', '_').replace('/', '_')}Request"
415539
)
416-
schema["components"]["schemas"][request_schema_name] = {
540+
request_schema = {
417541
"type": "object",
418542
"properties": {"data": data_schema},
419543
"required": ["data"],
420544
}
421545

546+
# Add settings property to request schema if evaluator has settings
547+
if evaluator.get("settings"):
548+
print(f"Adding settings to {evaluator_id}")
549+
request_schema["properties"]["settings"] = {
550+
"type": "object",
551+
"description": "Evaluator settings",
552+
}
553+
else:
554+
print(f"No settings found for {evaluator_id}")
555+
556+
schema["components"]["schemas"][request_schema_name] = request_schema
557+
422558
# Create settings schema for this evaluator
423559
settings_schema = {"type": "object", "properties": {}}
424560

@@ -441,10 +577,21 @@ def generate_openapi_schema(evaluators: Dict[str, Any]) -> Dict[str, Any]:
441577
property_def["default"] = default_value
442578
elif isinstance(default_value, list):
443579
property_def["type"] = "array"
444-
if default_value and isinstance(default_value[0], str):
445-
property_def["items"] = {"type": "string"}
580+
# Infer item type for arrays
581+
if default_value:
582+
first_item = default_value[0]
583+
if isinstance(first_item, str):
584+
property_def["items"] = {"type": "string"}
585+
elif isinstance(first_item, bool):
586+
property_def["items"] = {"type": "boolean"}
587+
elif isinstance(first_item, (int, float)):
588+
property_def["items"] = {"type": "number"}
589+
elif isinstance(first_item, dict):
590+
property_def["items"] = {"type": "object"}
591+
else:
592+
property_def["items"] = {"type": "string"}
446593
else:
447-
property_def["items"] = {"type": "object"}
594+
property_def["items"] = {"type": "string"}
448595
property_def["default"] = default_value
449596
elif isinstance(default_value, dict):
450597
property_def["type"] = "object"

0 commit comments

Comments
 (0)