|
862 | 862 | ],
|
863 | 863 | "warning": null
|
864 | 864 | },
|
| 865 | + { |
| 866 | + "name": "mini-SWE-agent + GPT-5 (2025-08-07)", |
| 867 | + "logo": [ |
| 868 | + "https://mini-swe-agent.com/latest/assets/mini_square.svg" |
| 869 | + ], |
| 870 | + "site": "https://mini-swe-agent.com/latest/", |
| 871 | + "folder": "20250807_mini-v1.7.0_gpt-5-2025-08-07", |
| 872 | + "resolved": 65.0, |
| 873 | + "date": "2025-08-07", |
| 874 | + "logs": "s3://swe-bench-experiments/verified/20250807_mini-v1.7.0_gpt-5-2025-08-07/logs", |
| 875 | + "trajs": "s3://swe-bench-experiments/verified/20250807_mini-v1.7.0_gpt-5-2025-08-07/trajs", |
| 876 | + "os_model": false, |
| 877 | + "os_system": true, |
| 878 | + "checked": true, |
| 879 | + "tags": [ |
| 880 | + "Org: OpenAI", |
| 881 | + "Model: gpt-5-2025-08-07", |
| 882 | + "System: Attempts - 1", |
| 883 | + "Org: SWE-agent" |
| 884 | + ], |
| 885 | + "warning": null |
| 886 | + }, |
865 | 887 | {
|
866 | 888 | "name": "mini-SWE-agent + Claude 4 Sonnet (20250514)",
|
867 | 889 | "logo": [
|
|
1082 | 1104 | ],
|
1083 | 1105 | "warning": null
|
1084 | 1106 | },
|
| 1107 | + { |
| 1108 | + "name": "mini-SWE-agent + GPT-5 mini (2025-08-07)", |
| 1109 | + "logo": [ |
| 1110 | + "https://mini-swe-agent.com/latest/assets/mini_square.svg" |
| 1111 | + ], |
| 1112 | + "site": "https://mini-swe-agent.com/latest/", |
| 1113 | + "folder": "20250807_mini-v1.7.0_gpt-5-mini-2025-08-07", |
| 1114 | + "resolved": 59.8, |
| 1115 | + "date": "2025-08-07", |
| 1116 | + "logs": "s3://swe-bench-experiments/verified/20250807_mini-v1.7.0_gpt-5-mini-2025-08-07/logs", |
| 1117 | + "trajs": "s3://swe-bench-experiments/verified/20250807_mini-v1.7.0_gpt-5-mini-2025-08-07/trajs", |
| 1118 | + "os_model": false, |
| 1119 | + "os_system": true, |
| 1120 | + "checked": true, |
| 1121 | + "tags": [ |
| 1122 | + "Org: OpenAI", |
| 1123 | + "Model: gpt-5-mini-2025-08-07", |
| 1124 | + "System: Attempts - 1", |
| 1125 | + "Org: SWE-agent" |
| 1126 | + ], |
| 1127 | + "warning": null |
| 1128 | + }, |
1085 | 1129 | {
|
1086 | 1130 | "name": "DeepSWE-Preview",
|
1087 | 1131 | "logo": [
|
|
2174 | 2218 | ],
|
2175 | 2219 | "warning": null
|
2176 | 2220 | },
|
| 2221 | + { |
| 2222 | + "name": "mini-SWE-agent + GPT-5 nano (2025-08-07)", |
| 2223 | + "logo": [ |
| 2224 | + "https://mini-swe-agent.com/latest/assets/mini_square.svg" |
| 2225 | + ], |
| 2226 | + "site": "https://mini-swe-agent.com/latest/", |
| 2227 | + "folder": "20250807_mini-v1.7.0_gpt-5-nano-2025-08-07", |
| 2228 | + "resolved": 34.8, |
| 2229 | + "date": "2025-08-07", |
| 2230 | + "logs": "s3://swe-bench-experiments/verified/20250807_mini-v1.7.0_gpt-5-nano-2025-08-07/logs", |
| 2231 | + "trajs": "s3://swe-bench-experiments/verified/20250807_mini-v1.7.0_gpt-5-nano-2025-08-07/trajs", |
| 2232 | + "os_model": false, |
| 2233 | + "os_system": true, |
| 2234 | + "checked": true, |
| 2235 | + "tags": [ |
| 2236 | + "Org: OpenAI", |
| 2237 | + "Model: gpt-5-nano-2025-08-07", |
| 2238 | + "System: Attempts - 1", |
| 2239 | + "Org: SWE-agent" |
| 2240 | + ], |
| 2241 | + "warning": null |
| 2242 | + }, |
2177 | 2243 | {
|
2178 | 2244 | "name": "SWE-agent + Claude 3.5 Sonnet",
|
2179 | 2245 | "logo": [
|
|
4807 | 4873 | "warning": null,
|
4808 | 4874 | "mini-swe-agent_version": "1.0.0"
|
4809 | 4875 | },
|
| 4876 | + { |
| 4877 | + "name": "GPT-5 (2025-08-07)", |
| 4878 | + "logo": [ |
| 4879 | + "https://upload.wikimedia.org/wikipedia/commons/6/66/OpenAI_logo_2025_%28symbol%29.svg" |
| 4880 | + ], |
| 4881 | + "site": "https://platform.openai.com/docs/models/gpt-5", |
| 4882 | + "folder": "20250807_mini-v1.7.0_gpt-5-2025-08-07", |
| 4883 | + "resolved": 65.0, |
| 4884 | + "date": "2025-08-07", |
| 4885 | + "logs": "s3://swe-bench-experiments/bash-only/20250807_mini-v1.7.0_gpt-5-2025-08-07/logs", |
| 4886 | + "trajs": "s3://swe-bench-experiments/bash-only/20250807_mini-v1.7.0_gpt-5-2025-08-07/trajs", |
| 4887 | + "os_model": false, |
| 4888 | + "os_system": true, |
| 4889 | + "checked": true, |
| 4890 | + "tags": [ |
| 4891 | + "Org: OpenAI", |
| 4892 | + "Model: gpt-5-2025-08-07", |
| 4893 | + "System: Attempts - 1" |
| 4894 | + ], |
| 4895 | + "warning": null, |
| 4896 | + "mini-swe-agent_version": "1.7.0" |
| 4897 | + }, |
4810 | 4898 | {
|
4811 | 4899 | "name": "Claude 4 Sonnet (20250514)",
|
4812 | 4900 | "logo": [
|
|
4829 | 4917 | "warning": null,
|
4830 | 4918 | "mini-swe-agent_version": "1.0.0"
|
4831 | 4919 | },
|
| 4920 | + { |
| 4921 | + "name": "GPT-5 mini (2025-08-07)", |
| 4922 | + "logo": [ |
| 4923 | + "https://upload.wikimedia.org/wikipedia/commons/6/66/OpenAI_logo_2025_%28symbol%29.svg" |
| 4924 | + ], |
| 4925 | + "site": "https://platform.openai.com/docs/models/gpt-5-mini", |
| 4926 | + "folder": "20250807_mini-v1.7.0_gpt-5-mini-2025-08-07", |
| 4927 | + "resolved": 59.8, |
| 4928 | + "date": "2025-08-07", |
| 4929 | + "logs": "s3://swe-bench-experiments/bash-only/20250807_mini-v1.7.0_gpt-5-mini-2025-08-07/logs", |
| 4930 | + "trajs": "s3://swe-bench-experiments/bash-only/20250807_mini-v1.7.0_gpt-5-mini-2025-08-07/trajs", |
| 4931 | + "os_model": false, |
| 4932 | + "os_system": true, |
| 4933 | + "checked": true, |
| 4934 | + "tags": [ |
| 4935 | + "Org: OpenAI", |
| 4936 | + "Model: gpt-5-mini-2025-08-07", |
| 4937 | + "System: Attempts - 1" |
| 4938 | + ], |
| 4939 | + "warning": null, |
| 4940 | + "mini-swe-agent_version": "1.7.0" |
| 4941 | + }, |
4832 | 4942 | {
|
4833 | 4943 | "name": "o3 (2025-04-16)",
|
4834 | 4944 | "logo": [
|
|
4961 | 5071 | "warning": null,
|
4962 | 5072 | "mini-swe-agent_version": "1.0.0"
|
4963 | 5073 | },
|
| 5074 | + { |
| 5075 | + "name": "GPT-5 nano (2025-08-07)", |
| 5076 | + "logo": [ |
| 5077 | + "https://upload.wikimedia.org/wikipedia/commons/6/66/OpenAI_logo_2025_%28symbol%29.svg" |
| 5078 | + ], |
| 5079 | + "site": "https://platform.openai.com/docs/models/gpt-5-nano", |
| 5080 | + "folder": "20250807_mini-v1.7.0_gpt-5-nano-2025-08-07", |
| 5081 | + "resolved": 34.8, |
| 5082 | + "date": "2025-08-07", |
| 5083 | + "logs": "s3://swe-bench-experiments/bash-only/20250807_mini-v1.7.0_gpt-5-nano-2025-08-07/logs", |
| 5084 | + "trajs": "s3://swe-bench-experiments/bash-only/20250807_mini-v1.7.0_gpt-5-nano-2025-08-07/trajs", |
| 5085 | + "os_model": false, |
| 5086 | + "os_system": true, |
| 5087 | + "checked": true, |
| 5088 | + "tags": [ |
| 5089 | + "Org: OpenAI", |
| 5090 | + "Model: gpt-5-nano-2025-08-07", |
| 5091 | + "System: Attempts - 1" |
| 5092 | + ], |
| 5093 | + "warning": null, |
| 5094 | + "mini-swe-agent_version": "1.7.0" |
| 5095 | + }, |
4964 | 5096 | {
|
4965 | 5097 | "name": "Gemini 2.5 Flash (2025-04-17)",
|
4966 | 5098 | "logo": [
|
|
0 commit comments