@@ -115,7 +115,51 @@ <h1>SWE-bench Analysis</h1>
115
115
116
116
< label for ="model-select "> Model:</ label >
117
117
118
- < select id ="model-select-test " >
118
+ < select id ="model-select-bash-only " >
119
+
120
+ < option value ="20250802_mini-v1.0.0_claude-4-opus-20250514 "> (2025-08-02) Claude 4 Opus (20250514)</ option >
121
+
122
+ < option value ="20250807_mini-v1.7.0_gpt-5-2025-08-07 "> (2025-08-07) GPT-5 (2025-08-07) (medium reasoning)</ option >
123
+
124
+ < option value ="20250726_mini-v1.0.0_claude-sonnet-4-20250514 "> (2025-07-26) Claude 4 Sonnet (20250514)</ option >
125
+
126
+ < option value ="20250807_mini-v1.7.0_gpt-5-mini-2025-08-07 "> (2025-08-07) GPT-5 mini (2025-08-07) (medium reasoning)</ option >
127
+
128
+ < option value ="20250726_mini-v1.0.0_o3-2025-04-16 "> (2025-07-26) o3 (2025-04-16)</ option >
129
+
130
+ < option value ="20250802_mini-v1.0.0_Qwen3-Coder-480B-A35B-Instruct "> (2025-08-02) Qwen3-Coder 480B/A35B Instruct</ option >
131
+
132
+ < option value ="20250726_mini-v1.0.0_gemini-2.5-pro "> (2025-07-26) Gemini 2.5 Pro (2025-05-06)</ option >
133
+
134
+ < option value ="20250720_mini-v0.0.0-claude-3-7-sonnet-20250219 "> (2025-07-20) Claude 3.7 Sonnet (20250219)</ option >
135
+
136
+ < option value ="20250726_mini-v1.0.0_o4-mini-2025-04-16 "> (2025-07-26) o4-mini (2025-04-16)</ option >
137
+
138
+ < option value ="20250807_mini-v1.7.0_Kimi-K2-Instruct "> (2025-08-07) Kimi K2 Instruct</ option >
139
+
140
+ < option value ="20250726_mini-v1.0.0_gpt-4.1-2025-04-14 "> (2025-07-26) GPT-4.1 (2025-04-14)</ option >
141
+
142
+ < option value ="20250807_mini-v1.7.0_gpt-5-nano-2025-08-07 "> (2025-08-07) GPT-5 nano (2025-08-07) (medium reasoning)</ option >
143
+
144
+ < option value ="20250726_mini-v1.0.0_gemini-2.5-flash "> (2025-07-26) Gemini 2.5 Flash (2025-04-17)</ option >
145
+
146
+ < option value ="20250807_mini-v1.7.0_gpt-oss-120b "> (2025-08-07) gpt-oss-120b</ option >
147
+
148
+ < option value ="20250720_mini-v0.0.0_gpt-4.1-mini-2025-04-14 "> (2025-07-20) GPT-4.1-mini (2025-04-14)</ option >
149
+
150
+ < option value ="20250720_mini-v0.0.0-gpt-4o-2024-11-20 "> (2025-07-20) GPT-4o (2024-11-20)</ option >
151
+
152
+ < option value ="20250720_mini-v0.0.0-Llama-4-Maverick-17B-Instruct "> (2025-07-20) Llama 4 Maverick Instruct</ option >
153
+
154
+ < option value ="20250726_mini-v1.0.0_gemini-2.0-flash "> (2025-07-26) Gemini 2.0 flash</ option >
155
+
156
+ < option value ="20250720_mini-v0.0.0-Llama-4-Scout-17B-Instruct "> (2025-07-20) Llama 4 Scout Instruct</ option >
157
+
158
+ < option value ="20250803_mini-v1.0.0_Qwen2.5-Coder-32B-Instruct "> (2025-08-03) Qwen2.5-Coder 32B Instruct</ option >
159
+
160
+ </ select >
161
+
162
+ < select id ="model-select-test " hidden >
119
163
120
164
< option value ="20250605_atlassian-rovo-dev "> (2025-06-05) Atlassian Rovo Dev (2025-06-05)</ option >
121
165
@@ -201,7 +245,7 @@ <h1>SWE-bench Analysis</h1>
201
245
202
246
< option value ="20250430_zencoder_ai "> (2025-04-30) Zencoder (2025-04-30)</ option >
203
247
204
- < option value ="20250805-openhands -Qwen3-Coder-480B-A35B-Instruct "> (2025-08-05) OpenHands + Qwen3-Coder-480B-A35B-Instruct</ option >
248
+ < option value ="20250805_openhands -Qwen3-Coder-480B-A35B-Instruct "> (2025-08-05) OpenHands + Qwen3-Coder-480B-A35B-Instruct</ option >
205
249
206
250
< option value ="20250516_cortexa_o3 "> (2025-05-16) Nemotron-CORTEXA</ option >
207
251
@@ -221,7 +265,7 @@ <h1>SWE-bench Analysis</h1>
221
265
222
266
< option value ="20250807_mini-v1.7.0_gpt-5-2025-08-07 "> (2025-08-07) mini-SWE-agent + GPT-5 (2025-08-07) (medium reasoning)</ option >
223
267
224
- < option value ="20250726_mini-v1.0.0_claude-sonnet-4-20250514 "> (2025-05-21 ) mini-SWE-agent + Claude 4 Sonnet (20250514)</ option >
268
+ < option value ="20250726_mini-v1.0.0_claude-sonnet-4-20250514 "> (2025-07-26 ) mini-SWE-agent + Claude 4 Sonnet (20250514)</ option >
225
269
226
270
< option value ="20250117_wandb_programmer_o1_crosscheck5 "> (2025-01-17) W&B Programmer O1 crosscheck5</ option >
227
271
@@ -247,9 +291,9 @@ <h1>SWE-bench Analysis</h1>
247
291
248
292
< option value ="20250807_mini-v1.7.0_gpt-5-mini-2025-08-07 "> (2025-08-07) mini-SWE-agent + GPT-5 mini (2025-08-07) (medium reasoning)</ option >
249
293
250
- < option value ="20250629_deepswerl_r2eagent_tts "> (2025-06-29) DeepSWE-Preview</ option >
294
+ < option value ="20250629_deepswerl_r2eagent_tts "> (2025-06-29) DeepSWE-Preview + TTS(Bo16) </ option >
251
295
252
- < option value ="20250726_mini-v1.0.0_o3-2025-04-16 "> (2025-05-21 ) mini-SWE-agent + o3 (2025-04-16)</ option >
296
+ < option value ="20250726_mini-v1.0.0_o3-2025-04-16 "> (2025-07-26 ) mini-SWE-agent + o3 (2025-04-16)</ option >
253
297
254
298
< option value ="20250410_cortexa "> (2025-04-10) Nemotron-CORTEXA</ option >
255
299
@@ -269,19 +313,19 @@ <h1>SWE-bench Analysis</h1>
269
313
270
314
< option value ="20241108_devlo "> (2024-11-08) devlo</ option >
271
315
272
- < option value ="20250726_mini-v1.0.0_gemini-2.5-pro "> (2025-05-21 ) mini-SWE-agent + Gemini 2.5 Pro (2025-05-06)</ option >
316
+ < option value ="20250726_mini-v1.0.0_gemini-2.5-pro "> (2025-07-26 ) mini-SWE-agent + Gemini 2.5 Pro (2025-05-06)</ option >
273
317
274
318
< option value ="20250120_Bracket "> (2025-01-20) Bracket.sh</ option >
275
319
276
320
< option value ="20241029_OpenHands-CodeAct-2.1-sonnet-20241022 "> (2024-10-29) OpenHands + CodeAct v2.1 (claude-3-5-sonnet-20241022)</ option >
277
321
278
- < option value ="20250720_mini-v0.0.0-claude-3-7-sonnet-20250219 "> (2025-05-21 ) mini-SWE-agent + Claude 3.7 Sonnet (20250219)</ option >
322
+ < option value ="20250720_mini-v0.0.0-claude-3-7-sonnet-20250219 "> (2025-07-20 ) mini-SWE-agent + Claude 3.7 Sonnet (20250219)</ option >
279
323
280
324
< option value ="20241212_google_jules_gemini_2.0_flash_experimental "> (2024-12-12) Google Jules + Gemini 2.0 Flash (v20241212-experimental)</ option >
281
325
282
326
< option value ="20241125_enginelabs "> (2024-11-25) Engine Labs (2024-11-25)</ option >
283
327
284
- < option value ="20250805-openhands -Qwen3-Coder-30B-A3B-Instruct "> (2025-08-05) OpenHands + Qwen3-Coder-30B-A3B-Instruct</ option >
328
+ < option value ="20250805_openhands -Qwen3-Coder-30B-A3B-Instruct "> (2025-08-05) OpenHands + Qwen3-Coder-30B-A3B-Instruct</ option >
285
329
286
330
< option value ="20250122_autocoderover-v2.1-claude-3-5-sonnet-20241022 "> (2025-01-22) AutoCodeRover-v2.1 (Claude-3.5-Sonnet-20241022)</ option >
287
331
@@ -313,7 +357,7 @@ <h1>SWE-bench Analysis</h1>
313
357
314
358
< option value ="20240824_gru "> (2024-08-24) Gru(2024-08-24)</ option >
315
359
316
- < option value ="20250726_mini-v1.0.0_o4-mini-2025-04-16 "> (2025-05-21 ) mini-SWE-agent + o4-mini (2025-04-16)</ option >
360
+ < option value ="20250726_mini-v1.0.0_o4-mini-2025-04-16 "> (2025-07-26 ) mini-SWE-agent + o4-mini (2025-04-16)</ option >
317
361
318
362
< option value ="20250118_codeshellagent_gemini_2.0_flash_experimental "> (2025-01-18) CodeShellAgent + Gemini 2.0 Flash (Experimental)</ option >
319
363
@@ -347,7 +391,7 @@ <h1>SWE-bench Analysis</h1>
347
391
348
392
< option value ="20241029_epam-ai-run-claude-3-5-sonnet "> (2024-10-29) EPAM AI/Run Developer Agent v20241029 + Anthopic Claude 3.5 Sonnet</ option >
349
393
350
- < option value ="20250726_mini-v1.0.0_gpt-4.1-2025-04-14 "> (2025-05-21 ) mini-SWE-agent + GPT-4.1 (2025-04-14)</ option >
394
+ < option value ="20250726_mini-v1.0.0_gpt-4.1-2025-04-14 "> (2025-07-26 ) mini-SWE-agent + GPT-4.1 (2025-04-14)</ option >
351
395
352
396
< option value ="20240721_amazon-q-developer-agent-20240719-dev "> (2024-07-21) Amazon Q Developer Agent (v20240719-dev)</ option >
353
397
@@ -377,7 +421,7 @@ <h1>SWE-bench Analysis</h1>
377
421
378
422
< option value ="20241002_lingma-agent_lingma-swe-gpt-72b "> (2024-10-02) Lingma Agent + Lingma SWE-GPT 72b (v0925)</ option >
379
423
380
- < option value ="20250726_mini-v1.0.0_gemini-2.5-flash "> (2025-05-21 ) mini-SWE-agent + Gemini 2.5 Flash (2025-04-17)</ option >
424
+ < option value ="20250726_mini-v1.0.0_gemini-2.5-flash "> (2025-07-26 ) mini-SWE-agent + Gemini 2.5 Flash (2025-04-17)</ option >
381
425
382
426
< option value ="20241016_epam-ai-run-gpt-4o "> (2024-10-16) EPAM AI/Run Developer Agent + GPT4o</ option >
383
427
@@ -393,27 +437,27 @@ <h1>SWE-bench Analysis</h1>
393
437
394
438
< option value ="20240820_epam-ai-run-gpt-4o "> (2024-08-20) EPAM AI/Run Developer Agent + GPT4o</ option >
395
439
396
- < option value ="20250720_mini-v0.0.0_gpt-4.1-mini-2025-04-14 "> (2025-05-21 ) mini-SWE-agent + GPT-4.1-mini (2025-04-14)</ option >
440
+ < option value ="20250720_mini-v0.0.0_gpt-4.1-mini-2025-04-14 "> (2025-07-20 ) mini-SWE-agent + GPT-4.1-mini (2025-04-14)</ option >
397
441
398
442
< option value ="20250627_agentless_MCTS-Refine-7B "> (2025-06-27) MCTS-Refine-7B</ option >
399
443
400
444
< option value ="20240728_sweagent_gpt4o "> (2024-07-28) SWE-agent + GPT 4o (2024-05-13)</ option >
401
445
402
446
< option value ="20240402_sweagent_gpt4 "> (2024-04-02) SWE-agent + GPT 4 (1106)</ option >
403
447
404
- < option value ="20250720_mini-v0.0.0-gpt-4o-2024-11-20 "> (2025-05-21 ) mini-SWE-agent + GPT-4o (2024-11-20)</ option >
448
+ < option value ="20250720_mini-v0.0.0-gpt-4o-2024-11-20 "> (2025-07-20 ) mini-SWE-agent + GPT-4o (2024-11-20)</ option >
405
449
406
- < option value ="20250720_mini-v0.0.0-Llama-4-Maverick-17B-Instruct "> (2025-05-21 ) mini-SWE-agent + Llama 4 Maverick Instruct</ option >
450
+ < option value ="20250720_mini-v0.0.0-Llama-4-Maverick-17B-Instruct "> (2025-07-20 ) mini-SWE-agent + Llama 4 Maverick Instruct</ option >
407
451
408
452
< option value ="20240402_sweagent_claude3opus "> (2024-04-02) SWE-agent + Claude 3 Opus</ option >
409
453
410
454
< option value ="20241002_lingma-agent_lingma-swe-gpt-7b "> (2024-10-02) Lingma Agent + Lingma SWE-GPT 7b (v0925)</ option >
411
455
412
- < option value ="20250726_mini-v1.0.0_gemini-2.0-flash "> (2025-05-21 ) mini-SWE-agent + Gemini 2.0 flash</ option >
456
+ < option value ="20250726_mini-v1.0.0_gemini-2.0-flash "> (2025-07-26 ) mini-SWE-agent + Gemini 2.0 flash</ option >
413
457
414
458
< option value ="20240918_lingma-agent_lingma-swe-gpt-7b "> (2024-09-18) Lingma Agent + Lingma SWE-GPT 7b (v0918)</ option >
415
459
416
- < option value ="20250720_mini-v0.0.0-Llama-4-Scout-17B-Instruct "> (2025-05-21 ) mini-SWE-agent + Llama 4 Scout Instruct</ option >
460
+ < option value ="20250720_mini-v0.0.0-Llama-4-Scout-17B-Instruct "> (2025-07-20 ) mini-SWE-agent + Llama 4 Scout Instruct</ option >
417
461
418
462
< option value ="20250803_mini-v1.0.0_Qwen2.5-Coder-32B-Instruct "> (2025-08-03) mini-SWE-agent + Qwen2.5-Coder 32B Instruct</ option >
419
463
@@ -641,50 +685,6 @@ <h1>SWE-bench Analysis</h1>
641
685
642
686
</ select >
643
687
644
- < select id ="model-select-bash-only " hidden >
645
-
646
- < option value ="20250802_mini-v1.0.0_claude-4-opus-20250514 "> (2025-08-02) Claude 4 Opus (20250514)</ option >
647
-
648
- < option value ="20250807_mini-v1.7.0_gpt-5-2025-08-07 "> (2025-08-07) GPT-5 (2025-08-07) (medium reasoning)</ option >
649
-
650
- < option value ="20250726_mini-v1.0.0_claude-sonnet-4-20250514 "> (2025-05-21) Claude 4 Sonnet (20250514)</ option >
651
-
652
- < option value ="20250807_mini-v1.7.0_gpt-5-mini-2025-08-07 "> (2025-08-07) GPT-5 mini (2025-08-07) (medium reasoning)</ option >
653
-
654
- < option value ="20250726_mini-v1.0.0_o3-2025-04-16 "> (2025-05-21) o3 (2025-04-16)</ option >
655
-
656
- < option value ="20250802_mini-v1.0.0_Qwen3-Coder-480B-A35B-Instruct "> (2025-08-02) Qwen3-Coder 480B/A35B Instruct</ option >
657
-
658
- < option value ="20250726_mini-v1.0.0_gemini-2.5-pro "> (2025-05-21) Gemini 2.5 Pro (2025-05-06)</ option >
659
-
660
- < option value ="20250720_mini-v0.0.0-claude-3-7-sonnet-20250219 "> (2025-05-21) Claude 3.7 Sonnet (20250219)</ option >
661
-
662
- < option value ="20250726_mini-v1.0.0_o4-mini-2025-04-16 "> (2025-05-21) o4-mini (2025-04-16)</ option >
663
-
664
- < option value ="20250807_mini-v1.7.0_Kimi-K2-Instruct "> (2025-08-07) Kimi K2 Instruct</ option >
665
-
666
- < option value ="20250726_mini-v1.0.0_gpt-4.1-2025-04-14 "> (2025-05-21) GPT-4.1 (2025-04-14)</ option >
667
-
668
- < option value ="20250807_mini-v1.7.0_gpt-5-nano-2025-08-07 "> (2025-08-07) GPT-5 nano (2025-08-07) (medium reasoning)</ option >
669
-
670
- < option value ="20250726_mini-v1.0.0_gemini-2.5-flash "> (2025-05-21) Gemini 2.5 Flash (2025-04-17)</ option >
671
-
672
- < option value ="20250807_mini-v1.7.0_gpt-oss-120b "> (2025-08-07) gpt-oss-120b</ option >
673
-
674
- < option value ="20250720_mini-v0.0.0_gpt-4.1-mini-2025-04-14 "> (2025-05-21) GPT-4.1-mini (2025-04-14)</ option >
675
-
676
- < option value ="20250720_mini-v0.0.0-gpt-4o-2024-11-20 "> (2025-05-21) GPT-4o (2024-11-20)</ option >
677
-
678
- < option value ="20250720_mini-v0.0.0-Llama-4-Maverick-17B-Instruct "> (2025-05-21) Llama 4 Maverick Instruct</ option >
679
-
680
- < option value ="20250726_mini-v1.0.0_gemini-2.0-flash "> (2025-05-21) Gemini 2.0 flash</ option >
681
-
682
- < option value ="20250720_mini-v0.0.0-Llama-4-Scout-17B-Instruct "> (2025-05-21) Llama 4 Scout Instruct</ option >
683
-
684
- < option value ="20250803_mini-v1.0.0_Qwen2.5-Coder-32B-Instruct "> (2025-08-03) Qwen2.5-Coder 32B Instruct</ option >
685
-
686
- </ select >
687
-
688
688
689
689
< button id ="loadData "> Load Data</ button >
690
690
</ div >
0 commit comments