Skip to content

Commit ead09e5

Browse files
committed
Deploying to gh-pages from @ 80fced6 🚀
1 parent 646b15f commit ead09e5

File tree

3 files changed

+62
-62
lines changed

3 files changed

+62
-62
lines changed

bash-only.html

Lines changed: 1 addition & 1 deletion
Large diffs are not rendered by default.

index.html

Lines changed: 1 addition & 1 deletion
Large diffs are not rendered by default.

viewer.html

Lines changed: 60 additions & 60 deletions
Original file line numberDiff line numberDiff line change
@@ -115,7 +115,51 @@ <h1>SWE-bench Analysis</h1>
115115
&nbsp;
116116
<label for="model-select">Model:</label>
117117

118-
<select id="model-select-test" >
118+
<select id="model-select-bash-only" >
119+
120+
<option value="20250802_mini-v1.0.0_claude-4-opus-20250514">(2025-08-02) Claude 4 Opus (20250514)</option>
121+
122+
<option value="20250807_mini-v1.7.0_gpt-5-2025-08-07">(2025-08-07) GPT-5 (2025-08-07) (medium reasoning)</option>
123+
124+
<option value="20250726_mini-v1.0.0_claude-sonnet-4-20250514">(2025-07-26) Claude 4 Sonnet (20250514)</option>
125+
126+
<option value="20250807_mini-v1.7.0_gpt-5-mini-2025-08-07">(2025-08-07) GPT-5 mini (2025-08-07) (medium reasoning)</option>
127+
128+
<option value="20250726_mini-v1.0.0_o3-2025-04-16">(2025-07-26) o3 (2025-04-16)</option>
129+
130+
<option value="20250802_mini-v1.0.0_Qwen3-Coder-480B-A35B-Instruct">(2025-08-02) Qwen3-Coder 480B/A35B Instruct</option>
131+
132+
<option value="20250726_mini-v1.0.0_gemini-2.5-pro">(2025-07-26) Gemini 2.5 Pro (2025-05-06)</option>
133+
134+
<option value="20250720_mini-v0.0.0-claude-3-7-sonnet-20250219">(2025-07-20) Claude 3.7 Sonnet (20250219)</option>
135+
136+
<option value="20250726_mini-v1.0.0_o4-mini-2025-04-16">(2025-07-26) o4-mini (2025-04-16)</option>
137+
138+
<option value="20250807_mini-v1.7.0_Kimi-K2-Instruct">(2025-08-07) Kimi K2 Instruct</option>
139+
140+
<option value="20250726_mini-v1.0.0_gpt-4.1-2025-04-14">(2025-07-26) GPT-4.1 (2025-04-14)</option>
141+
142+
<option value="20250807_mini-v1.7.0_gpt-5-nano-2025-08-07">(2025-08-07) GPT-5 nano (2025-08-07) (medium reasoning)</option>
143+
144+
<option value="20250726_mini-v1.0.0_gemini-2.5-flash">(2025-07-26) Gemini 2.5 Flash (2025-04-17)</option>
145+
146+
<option value="20250807_mini-v1.7.0_gpt-oss-120b">(2025-08-07) gpt-oss-120b</option>
147+
148+
<option value="20250720_mini-v0.0.0_gpt-4.1-mini-2025-04-14">(2025-07-20) GPT-4.1-mini (2025-04-14)</option>
149+
150+
<option value="20250720_mini-v0.0.0-gpt-4o-2024-11-20">(2025-07-20) GPT-4o (2024-11-20)</option>
151+
152+
<option value="20250720_mini-v0.0.0-Llama-4-Maverick-17B-Instruct">(2025-07-20) Llama 4 Maverick Instruct</option>
153+
154+
<option value="20250726_mini-v1.0.0_gemini-2.0-flash">(2025-07-26) Gemini 2.0 flash</option>
155+
156+
<option value="20250720_mini-v0.0.0-Llama-4-Scout-17B-Instruct">(2025-07-20) Llama 4 Scout Instruct</option>
157+
158+
<option value="20250803_mini-v1.0.0_Qwen2.5-Coder-32B-Instruct">(2025-08-03) Qwen2.5-Coder 32B Instruct</option>
159+
160+
</select>
161+
162+
<select id="model-select-test" hidden>
119163

120164
<option value="20250605_atlassian-rovo-dev">(2025-06-05) Atlassian Rovo Dev (2025-06-05)</option>
121165

@@ -201,7 +245,7 @@ <h1>SWE-bench Analysis</h1>
201245

202246
<option value="20250430_zencoder_ai">(2025-04-30) Zencoder (2025-04-30)</option>
203247

204-
<option value="20250805-openhands-Qwen3-Coder-480B-A35B-Instruct">(2025-08-05) OpenHands + Qwen3-Coder-480B-A35B-Instruct</option>
248+
<option value="20250805_openhands-Qwen3-Coder-480B-A35B-Instruct">(2025-08-05) OpenHands + Qwen3-Coder-480B-A35B-Instruct</option>
205249

206250
<option value="20250516_cortexa_o3">(2025-05-16) Nemotron-CORTEXA</option>
207251

@@ -221,7 +265,7 @@ <h1>SWE-bench Analysis</h1>
221265

222266
<option value="20250807_mini-v1.7.0_gpt-5-2025-08-07">(2025-08-07) mini-SWE-agent + GPT-5 (2025-08-07) (medium reasoning)</option>
223267

224-
<option value="20250726_mini-v1.0.0_claude-sonnet-4-20250514">(2025-05-21) mini-SWE-agent + Claude 4 Sonnet (20250514)</option>
268+
<option value="20250726_mini-v1.0.0_claude-sonnet-4-20250514">(2025-07-26) mini-SWE-agent + Claude 4 Sonnet (20250514)</option>
225269

226270
<option value="20250117_wandb_programmer_o1_crosscheck5">(2025-01-17) W&amp;B Programmer O1 crosscheck5</option>
227271

@@ -247,9 +291,9 @@ <h1>SWE-bench Analysis</h1>
247291

248292
<option value="20250807_mini-v1.7.0_gpt-5-mini-2025-08-07">(2025-08-07) mini-SWE-agent + GPT-5 mini (2025-08-07) (medium reasoning)</option>
249293

250-
<option value="20250629_deepswerl_r2eagent_tts">(2025-06-29) DeepSWE-Preview</option>
294+
<option value="20250629_deepswerl_r2eagent_tts">(2025-06-29) DeepSWE-Preview + TTS(Bo16)</option>
251295

252-
<option value="20250726_mini-v1.0.0_o3-2025-04-16">(2025-05-21) mini-SWE-agent + o3 (2025-04-16)</option>
296+
<option value="20250726_mini-v1.0.0_o3-2025-04-16">(2025-07-26) mini-SWE-agent + o3 (2025-04-16)</option>
253297

254298
<option value="20250410_cortexa">(2025-04-10) Nemotron-CORTEXA</option>
255299

@@ -269,19 +313,19 @@ <h1>SWE-bench Analysis</h1>
269313

270314
<option value="20241108_devlo">(2024-11-08) devlo</option>
271315

272-
<option value="20250726_mini-v1.0.0_gemini-2.5-pro">(2025-05-21) mini-SWE-agent + Gemini 2.5 Pro (2025-05-06)</option>
316+
<option value="20250726_mini-v1.0.0_gemini-2.5-pro">(2025-07-26) mini-SWE-agent + Gemini 2.5 Pro (2025-05-06)</option>
273317

274318
<option value="20250120_Bracket">(2025-01-20) Bracket.sh</option>
275319

276320
<option value="20241029_OpenHands-CodeAct-2.1-sonnet-20241022">(2024-10-29) OpenHands + CodeAct v2.1 (claude-3-5-sonnet-20241022)</option>
277321

278-
<option value="20250720_mini-v0.0.0-claude-3-7-sonnet-20250219">(2025-05-21) mini-SWE-agent + Claude 3.7 Sonnet (20250219)</option>
322+
<option value="20250720_mini-v0.0.0-claude-3-7-sonnet-20250219">(2025-07-20) mini-SWE-agent + Claude 3.7 Sonnet (20250219)</option>
279323

280324
<option value="20241212_google_jules_gemini_2.0_flash_experimental">(2024-12-12) Google Jules + Gemini 2.0 Flash (v20241212-experimental)</option>
281325

282326
<option value="20241125_enginelabs">(2024-11-25) Engine Labs (2024-11-25)</option>
283327

284-
<option value="20250805-openhands-Qwen3-Coder-30B-A3B-Instruct">(2025-08-05) OpenHands + Qwen3-Coder-30B-A3B-Instruct</option>
328+
<option value="20250805_openhands-Qwen3-Coder-30B-A3B-Instruct">(2025-08-05) OpenHands + Qwen3-Coder-30B-A3B-Instruct</option>
285329

286330
<option value="20250122_autocoderover-v2.1-claude-3-5-sonnet-20241022">(2025-01-22) AutoCodeRover-v2.1 (Claude-3.5-Sonnet-20241022)</option>
287331

@@ -313,7 +357,7 @@ <h1>SWE-bench Analysis</h1>
313357

314358
<option value="20240824_gru">(2024-08-24) Gru(2024-08-24)</option>
315359

316-
<option value="20250726_mini-v1.0.0_o4-mini-2025-04-16">(2025-05-21) mini-SWE-agent + o4-mini (2025-04-16)</option>
360+
<option value="20250726_mini-v1.0.0_o4-mini-2025-04-16">(2025-07-26) mini-SWE-agent + o4-mini (2025-04-16)</option>
317361

318362
<option value="20250118_codeshellagent_gemini_2.0_flash_experimental">(2025-01-18) CodeShellAgent + Gemini 2.0 Flash (Experimental)</option>
319363

@@ -347,7 +391,7 @@ <h1>SWE-bench Analysis</h1>
347391

348392
<option value="20241029_epam-ai-run-claude-3-5-sonnet">(2024-10-29) EPAM AI/Run Developer Agent v20241029 + Anthopic Claude 3.5 Sonnet</option>
349393

350-
<option value="20250726_mini-v1.0.0_gpt-4.1-2025-04-14">(2025-05-21) mini-SWE-agent + GPT-4.1 (2025-04-14)</option>
394+
<option value="20250726_mini-v1.0.0_gpt-4.1-2025-04-14">(2025-07-26) mini-SWE-agent + GPT-4.1 (2025-04-14)</option>
351395

352396
<option value="20240721_amazon-q-developer-agent-20240719-dev">(2024-07-21) Amazon Q Developer Agent (v20240719-dev)</option>
353397

@@ -377,7 +421,7 @@ <h1>SWE-bench Analysis</h1>
377421

378422
<option value="20241002_lingma-agent_lingma-swe-gpt-72b">(2024-10-02) Lingma Agent + Lingma SWE-GPT 72b (v0925)</option>
379423

380-
<option value="20250726_mini-v1.0.0_gemini-2.5-flash">(2025-05-21) mini-SWE-agent + Gemini 2.5 Flash (2025-04-17)</option>
424+
<option value="20250726_mini-v1.0.0_gemini-2.5-flash">(2025-07-26) mini-SWE-agent + Gemini 2.5 Flash (2025-04-17)</option>
381425

382426
<option value="20241016_epam-ai-run-gpt-4o">(2024-10-16) EPAM AI/Run Developer Agent + GPT4o</option>
383427

@@ -393,27 +437,27 @@ <h1>SWE-bench Analysis</h1>
393437

394438
<option value="20240820_epam-ai-run-gpt-4o">(2024-08-20) EPAM AI/Run Developer Agent + GPT4o</option>
395439

396-
<option value="20250720_mini-v0.0.0_gpt-4.1-mini-2025-04-14">(2025-05-21) mini-SWE-agent + GPT-4.1-mini (2025-04-14)</option>
440+
<option value="20250720_mini-v0.0.0_gpt-4.1-mini-2025-04-14">(2025-07-20) mini-SWE-agent + GPT-4.1-mini (2025-04-14)</option>
397441

398442
<option value="20250627_agentless_MCTS-Refine-7B">(2025-06-27) MCTS-Refine-7B</option>
399443

400444
<option value="20240728_sweagent_gpt4o">(2024-07-28) SWE-agent + GPT 4o (2024-05-13)</option>
401445

402446
<option value="20240402_sweagent_gpt4">(2024-04-02) SWE-agent + GPT 4 (1106)</option>
403447

404-
<option value="20250720_mini-v0.0.0-gpt-4o-2024-11-20">(2025-05-21) mini-SWE-agent + GPT-4o (2024-11-20)</option>
448+
<option value="20250720_mini-v0.0.0-gpt-4o-2024-11-20">(2025-07-20) mini-SWE-agent + GPT-4o (2024-11-20)</option>
405449

406-
<option value="20250720_mini-v0.0.0-Llama-4-Maverick-17B-Instruct">(2025-05-21) mini-SWE-agent + Llama 4 Maverick Instruct</option>
450+
<option value="20250720_mini-v0.0.0-Llama-4-Maverick-17B-Instruct">(2025-07-20) mini-SWE-agent + Llama 4 Maverick Instruct</option>
407451

408452
<option value="20240402_sweagent_claude3opus">(2024-04-02) SWE-agent + Claude 3 Opus</option>
409453

410454
<option value="20241002_lingma-agent_lingma-swe-gpt-7b">(2024-10-02) Lingma Agent + Lingma SWE-GPT 7b (v0925)</option>
411455

412-
<option value="20250726_mini-v1.0.0_gemini-2.0-flash">(2025-05-21) mini-SWE-agent + Gemini 2.0 flash</option>
456+
<option value="20250726_mini-v1.0.0_gemini-2.0-flash">(2025-07-26) mini-SWE-agent + Gemini 2.0 flash</option>
413457

414458
<option value="20240918_lingma-agent_lingma-swe-gpt-7b">(2024-09-18) Lingma Agent + Lingma SWE-GPT 7b (v0918)</option>
415459

416-
<option value="20250720_mini-v0.0.0-Llama-4-Scout-17B-Instruct">(2025-05-21) mini-SWE-agent + Llama 4 Scout Instruct</option>
460+
<option value="20250720_mini-v0.0.0-Llama-4-Scout-17B-Instruct">(2025-07-20) mini-SWE-agent + Llama 4 Scout Instruct</option>
417461

418462
<option value="20250803_mini-v1.0.0_Qwen2.5-Coder-32B-Instruct">(2025-08-03) mini-SWE-agent + Qwen2.5-Coder 32B Instruct</option>
419463

@@ -641,50 +685,6 @@ <h1>SWE-bench Analysis</h1>
641685

642686
</select>
643687

644-
<select id="model-select-bash-only" hidden>
645-
646-
<option value="20250802_mini-v1.0.0_claude-4-opus-20250514">(2025-08-02) Claude 4 Opus (20250514)</option>
647-
648-
<option value="20250807_mini-v1.7.0_gpt-5-2025-08-07">(2025-08-07) GPT-5 (2025-08-07) (medium reasoning)</option>
649-
650-
<option value="20250726_mini-v1.0.0_claude-sonnet-4-20250514">(2025-05-21) Claude 4 Sonnet (20250514)</option>
651-
652-
<option value="20250807_mini-v1.7.0_gpt-5-mini-2025-08-07">(2025-08-07) GPT-5 mini (2025-08-07) (medium reasoning)</option>
653-
654-
<option value="20250726_mini-v1.0.0_o3-2025-04-16">(2025-05-21) o3 (2025-04-16)</option>
655-
656-
<option value="20250802_mini-v1.0.0_Qwen3-Coder-480B-A35B-Instruct">(2025-08-02) Qwen3-Coder 480B/A35B Instruct</option>
657-
658-
<option value="20250726_mini-v1.0.0_gemini-2.5-pro">(2025-05-21) Gemini 2.5 Pro (2025-05-06)</option>
659-
660-
<option value="20250720_mini-v0.0.0-claude-3-7-sonnet-20250219">(2025-05-21) Claude 3.7 Sonnet (20250219)</option>
661-
662-
<option value="20250726_mini-v1.0.0_o4-mini-2025-04-16">(2025-05-21) o4-mini (2025-04-16)</option>
663-
664-
<option value="20250807_mini-v1.7.0_Kimi-K2-Instruct">(2025-08-07) Kimi K2 Instruct</option>
665-
666-
<option value="20250726_mini-v1.0.0_gpt-4.1-2025-04-14">(2025-05-21) GPT-4.1 (2025-04-14)</option>
667-
668-
<option value="20250807_mini-v1.7.0_gpt-5-nano-2025-08-07">(2025-08-07) GPT-5 nano (2025-08-07) (medium reasoning)</option>
669-
670-
<option value="20250726_mini-v1.0.0_gemini-2.5-flash">(2025-05-21) Gemini 2.5 Flash (2025-04-17)</option>
671-
672-
<option value="20250807_mini-v1.7.0_gpt-oss-120b">(2025-08-07) gpt-oss-120b</option>
673-
674-
<option value="20250720_mini-v0.0.0_gpt-4.1-mini-2025-04-14">(2025-05-21) GPT-4.1-mini (2025-04-14)</option>
675-
676-
<option value="20250720_mini-v0.0.0-gpt-4o-2024-11-20">(2025-05-21) GPT-4o (2024-11-20)</option>
677-
678-
<option value="20250720_mini-v0.0.0-Llama-4-Maverick-17B-Instruct">(2025-05-21) Llama 4 Maverick Instruct</option>
679-
680-
<option value="20250726_mini-v1.0.0_gemini-2.0-flash">(2025-05-21) Gemini 2.0 flash</option>
681-
682-
<option value="20250720_mini-v0.0.0-Llama-4-Scout-17B-Instruct">(2025-05-21) Llama 4 Scout Instruct</option>
683-
684-
<option value="20250803_mini-v1.0.0_Qwen2.5-Coder-32B-Instruct">(2025-08-03) Qwen2.5-Coder 32B Instruct</option>
685-
686-
</select>
687-
688688
&nbsp;
689689
<button id="loadData">Load Data</button>
690690
</div>

0 commit comments

Comments
 (0)