Skip to content

Commit 1af9b30

Browse files
Add gpt-5-* numbers
1 parent 3b8fc00 commit 1af9b30

File tree

1 file changed

+132
-0
lines changed

1 file changed

+132
-0
lines changed

data/leaderboards.json

Lines changed: 132 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -862,6 +862,28 @@
862862
],
863863
"warning": null
864864
},
865+
{
866+
"name": "mini-SWE-agent + GPT-5 (2025-08-07)",
867+
"logo": [
868+
"https://mini-swe-agent.com/latest/assets/mini_square.svg"
869+
],
870+
"site": "https://mini-swe-agent.com/latest/",
871+
"folder": "20250807_mini-v1.7.0_gpt-5-2025-08-07",
872+
"resolved": 65.0,
873+
"date": "2025-08-07",
874+
"logs": "s3://swe-bench-experiments/verified/20250807_mini-v1.7.0_gpt-5-2025-08-07/logs",
875+
"trajs": "s3://swe-bench-experiments/verified/20250807_mini-v1.7.0_gpt-5-2025-08-07/trajs",
876+
"os_model": false,
877+
"os_system": true,
878+
"checked": true,
879+
"tags": [
880+
"Org: OpenAI",
881+
"Model: gpt-5-2025-08-07",
882+
"System: Attempts - 1",
883+
"Org: SWE-agent"
884+
],
885+
"warning": null
886+
},
865887
{
866888
"name": "mini-SWE-agent + Claude 4 Sonnet (20250514)",
867889
"logo": [
@@ -1082,6 +1104,28 @@
10821104
],
10831105
"warning": null
10841106
},
1107+
{
1108+
"name": "mini-SWE-agent + GPT-5 mini (2025-08-07)",
1109+
"logo": [
1110+
"https://mini-swe-agent.com/latest/assets/mini_square.svg"
1111+
],
1112+
"site": "https://mini-swe-agent.com/latest/",
1113+
"folder": "20250807_mini-v1.7.0_gpt-5-mini-2025-08-07",
1114+
"resolved": 59.8,
1115+
"date": "2025-08-07",
1116+
"logs": "s3://swe-bench-experiments/verified/20250807_mini-v1.7.0_gpt-5-mini-2025-08-07/logs",
1117+
"trajs": "s3://swe-bench-experiments/verified/20250807_mini-v1.7.0_gpt-5-mini-2025-08-07/trajs",
1118+
"os_model": false,
1119+
"os_system": true,
1120+
"checked": true,
1121+
"tags": [
1122+
"Org: OpenAI",
1123+
"Model: gpt-5-mini-2025-08-07",
1124+
"System: Attempts - 1",
1125+
"Org: SWE-agent"
1126+
],
1127+
"warning": null
1128+
},
10851129
{
10861130
"name": "DeepSWE-Preview",
10871131
"logo": [
@@ -2174,6 +2218,28 @@
21742218
],
21752219
"warning": null
21762220
},
2221+
{
2222+
"name": "mini-SWE-agent + GPT-5 nano (2025-08-07)",
2223+
"logo": [
2224+
"https://mini-swe-agent.com/latest/assets/mini_square.svg"
2225+
],
2226+
"site": "https://mini-swe-agent.com/latest/",
2227+
"folder": "20250807_mini-v1.7.0_gpt-5-nano-2025-08-07",
2228+
"resolved": 34.8,
2229+
"date": "2025-08-07",
2230+
"logs": "s3://swe-bench-experiments/verified/20250807_mini-v1.7.0_gpt-5-nano-2025-08-07/logs",
2231+
"trajs": "s3://swe-bench-experiments/verified/20250807_mini-v1.7.0_gpt-5-nano-2025-08-07/trajs",
2232+
"os_model": false,
2233+
"os_system": true,
2234+
"checked": true,
2235+
"tags": [
2236+
"Org: OpenAI",
2237+
"Model: gpt-5-nano-2025-08-07",
2238+
"System: Attempts - 1",
2239+
"Org: SWE-agent"
2240+
],
2241+
"warning": null
2242+
},
21772243
{
21782244
"name": "SWE-agent + Claude 3.5 Sonnet",
21792245
"logo": [
@@ -4807,6 +4873,28 @@
48074873
"warning": null,
48084874
"mini-swe-agent_version": "1.0.0"
48094875
},
4876+
{
4877+
"name": "GPT-5 (2025-08-07)",
4878+
"logo": [
4879+
"https://upload.wikimedia.org/wikipedia/commons/6/66/OpenAI_logo_2025_%28symbol%29.svg"
4880+
],
4881+
"site": "https://platform.openai.com/docs/models/gpt-5",
4882+
"folder": "20250807_mini-v1.7.0_gpt-5-2025-08-07",
4883+
"resolved": 65.0,
4884+
"date": "2025-08-07",
4885+
"logs": "s3://swe-bench-experiments/bash-only/20250807_mini-v1.7.0_gpt-5-2025-08-07/logs",
4886+
"trajs": "s3://swe-bench-experiments/bash-only/20250807_mini-v1.7.0_gpt-5-2025-08-07/trajs",
4887+
"os_model": false,
4888+
"os_system": true,
4889+
"checked": true,
4890+
"tags": [
4891+
"Org: OpenAI",
4892+
"Model: gpt-5-2025-08-07",
4893+
"System: Attempts - 1"
4894+
],
4895+
"warning": null,
4896+
"mini-swe-agent_version": "1.7.0"
4897+
},
48104898
{
48114899
"name": "Claude 4 Sonnet (20250514)",
48124900
"logo": [
@@ -4829,6 +4917,28 @@
48294917
"warning": null,
48304918
"mini-swe-agent_version": "1.0.0"
48314919
},
4920+
{
4921+
"name": "GPT-5 mini (2025-08-07)",
4922+
"logo": [
4923+
"https://upload.wikimedia.org/wikipedia/commons/6/66/OpenAI_logo_2025_%28symbol%29.svg"
4924+
],
4925+
"site": "https://platform.openai.com/docs/models/gpt-5-mini",
4926+
"folder": "20250807_mini-v1.7.0_gpt-5-mini-2025-08-07",
4927+
"resolved": 59.8,
4928+
"date": "2025-08-07",
4929+
"logs": "s3://swe-bench-experiments/bash-only/20250807_mini-v1.7.0_gpt-5-mini-2025-08-07/logs",
4930+
"trajs": "s3://swe-bench-experiments/bash-only/20250807_mini-v1.7.0_gpt-5-mini-2025-08-07/trajs",
4931+
"os_model": false,
4932+
"os_system": true,
4933+
"checked": true,
4934+
"tags": [
4935+
"Org: OpenAI",
4936+
"Model: gpt-5-mini-2025-08-07",
4937+
"System: Attempts - 1"
4938+
],
4939+
"warning": null,
4940+
"mini-swe-agent_version": "1.7.0"
4941+
},
48324942
{
48334943
"name": "o3 (2025-04-16)",
48344944
"logo": [
@@ -4961,6 +5071,28 @@
49615071
"warning": null,
49625072
"mini-swe-agent_version": "1.0.0"
49635073
},
5074+
{
5075+
"name": "GPT-5 nano (2025-08-07)",
5076+
"logo": [
5077+
"https://upload.wikimedia.org/wikipedia/commons/6/66/OpenAI_logo_2025_%28symbol%29.svg"
5078+
],
5079+
"site": "https://platform.openai.com/docs/models/gpt-5-nano",
5080+
"folder": "20250807_mini-v1.7.0_gpt-5-nano-2025-08-07",
5081+
"resolved": 34.8,
5082+
"date": "2025-08-07",
5083+
"logs": "s3://swe-bench-experiments/bash-only/20250807_mini-v1.7.0_gpt-5-nano-2025-08-07/logs",
5084+
"trajs": "s3://swe-bench-experiments/bash-only/20250807_mini-v1.7.0_gpt-5-nano-2025-08-07/trajs",
5085+
"os_model": false,
5086+
"os_system": true,
5087+
"checked": true,
5088+
"tags": [
5089+
"Org: OpenAI",
5090+
"Model: gpt-5-nano-2025-08-07",
5091+
"System: Attempts - 1"
5092+
],
5093+
"warning": null,
5094+
"mini-swe-agent_version": "1.7.0"
5095+
},
49645096
{
49655097
"name": "Gemini 2.5 Flash (2025-04-17)",
49665098
"logo": [

0 commit comments

Comments
 (0)