From 9fd3b3dd7815a7d28c1fbd4cfd4211f3c18800b2 Mon Sep 17 00:00:00 2001 From: OpenClaw Agent <> Date: Thu, 16 Apr 2026 17:14:22 +0800 Subject: [PATCH 01/12] Add 7 new papers from April 2026 Papers added: - REAgent (2604.06861) - single_agent: Requirement-Driven LLM Agents for Software Issue Resolution - SWE-AGILE (2604.11716) - single_agent: Dynamic Reasoning Context Management - AgentForge (2604.13120) - multi_agent: Execution-Grounded Multi-Agent Framework - CODESTRUCT (2604.05407) - workflow: Code Agents over Structured Action Spaces (ACL 2026) - SWE-ZERO to SWE-HERO (2604.01496) - sft: Execution-free to Execution-based Fine-tuning - PassRateConstraint (2604.05955) - methods_analysis: Design Constraint Compliance Evaluation - RTMC (2604.11037) - rl: Step-Level Credit Assignment via Rollout Trees --- app/data/papers_methods_analysis.yaml | 10 ++++++++++ app/data/papers_multi_agent.yaml | 9 +++++++++ app/data/papers_rl.yaml | 8 ++++++++ app/data/papers_sft.yaml | 9 +++++++++ app/data/papers_single_agent.yaml | 18 ++++++++++++++++++ app/data/papers_workflow.yaml | 10 ++++++++++ 6 files changed, 64 insertions(+) diff --git a/app/data/papers_methods_analysis.yaml b/app/data/papers_methods_analysis.yaml index 78a736a..2bc9c35 100644 --- a/app/data/papers_methods_analysis.yaml +++ b/app/data/papers_methods_analysis.yaml @@ -1,3 +1,13 @@ +- short_name: PassRateConstraint + title: 'Does Pass Rate Tell the Whole Story? Evaluating Design Constraint Compliance + in LLM-based Issue Resolution' + authors: Kai Yu, Zhenhao Zhou, Junhao Zeng, Ying Wang, Xueying Du, Zhiqiang Yuan, + Junwei Liu, Ziyu Zhou, Yujia Wang, Chong Wang, Xin Peng + year: '2026' + venue: arXiv preprint arXiv:2604.05955 + month: 2026-04 + links: + arxiv: https://arxiv.org/abs/2604.05955 - short_name: ContextBench title: 'ContextBench: A Benchmark for Context Retrieval in Coding Agents' authors: Han Li, Letian Zhu, Bohan Zhang, Rili Feng, Jiaming Wang, Yue Pan, Earl diff --git a/app/data/papers_multi_agent.yaml b/app/data/papers_multi_agent.yaml index 6c14fa3..d70e6dd 100644 --- a/app/data/papers_multi_agent.yaml +++ b/app/data/papers_multi_agent.yaml @@ -1,3 +1,12 @@ +- short_name: AgentForge + title: 'AgentForge: Execution-Grounded Multi-Agent LLM Framework for Autonomous + Software Engineering' + authors: Rajesh Kumar, Waqar Ali, Junaid Ahmed, Najma Imtiaz Ali, Shaban Usman + year: '2026' + venue: arXiv preprint arXiv:2604.13120 + month: 2026-04 + links: + arxiv: https://arxiv.org/abs/2604.13120 - short_name: SWE-Adept title: 'SWE-Adept: An LLM-Based Agentic Framework for Deep Codebase Analysis and Structured Issue Resolution' diff --git a/app/data/papers_rl.yaml b/app/data/papers_rl.yaml index bb9f9b8..937ea5c 100644 --- a/app/data/papers_rl.yaml +++ b/app/data/papers_rl.yaml @@ -1,3 +1,11 @@ +- short_name: RTMC + title: 'RTMC: Step-Level Credit Assignment via Rollout Trees' + authors: Tao Wang, Suhang Zheng, Xiaoxiao Xu + year: '2026' + venue: arXiv preprint arXiv:2604.11037 + month: 2026-04 + links: + arxiv: https://arxiv.org/abs/2604.11037 - short_name: SWE-Fuse title: 'SWE-Fuse: Empowering Software Agents via Issue-free Trajectory Learning and Entropy-aware RLVR Training' diff --git a/app/data/papers_sft.yaml b/app/data/papers_sft.yaml index b5fd222..8c48385 100644 --- a/app/data/papers_sft.yaml +++ b/app/data/papers_sft.yaml @@ -1,3 +1,12 @@ +- short_name: SWE-ZERO to SWE-HERO + title: 'From SWE-ZERO to SWE-HERO: Execution-free to Execution-based Fine-tuning + for Software Engineering Agents' + authors: Nikolai Ludwig, Wasi Uddin Ahmad, Somshubra Majumdar, Boris Ginsburg + year: '2026' + venue: arXiv preprint arXiv:2604.01496 + month: 2026-04 + links: + arxiv: https://arxiv.org/abs/2604.01496 - short_name: OpenSWE title: 'daVinci-Env: Open SWE Environment Synthesis at Scale' authors: Dayuan Fu, Shenyu Wu, Yunze Wu, Zerui Peng, Yaxing Huang, Jie Sun, Ji Zeng, diff --git a/app/data/papers_single_agent.yaml b/app/data/papers_single_agent.yaml index 8ed2dea..971e95e 100644 --- a/app/data/papers_single_agent.yaml +++ b/app/data/papers_single_agent.yaml @@ -1,3 +1,21 @@ +- short_name: REAgent + title: 'REAgent: Requirement-Driven LLM Agents for Software Issue Resolution' + authors: Shiqi Kuang, Zhao Tian, Kaiwei Lin, Chaofan Tao, Shaowei Wang, Haoli Bai, + Lifeng Shang, Junjie Chen + year: '2026' + venue: arXiv preprint arXiv:2604.06861 + month: 2026-04 + links: + arxiv: https://arxiv.org/abs/2604.06861 +- short_name: SWE-AGILE + title: 'SWE-AGILE: A Software Agent Framework for Efficiently Managing Dynamic + Reasoning Context' + authors: Shuquan Lian, Juncheng Liu, Yazhe Chen, Yuhong Chen, Hui Li + year: '2026' + venue: arXiv preprint arXiv:2604.11716 + month: 2026-04 + links: + arxiv: https://arxiv.org/abs/2604.11716 - short_name: Confucius Code Agent title: 'Confucius Code Agent: Scalable Agent Scaffolding for Real-World Codebases' authors: Sherman Wong, Zhenting Qi, Zhaodong Wang, Nathan Hu, Samuel Lin, Jun Ge, diff --git a/app/data/papers_workflow.yaml b/app/data/papers_workflow.yaml index e621d64..5f12622 100644 --- a/app/data/papers_workflow.yaml +++ b/app/data/papers_workflow.yaml @@ -1,3 +1,13 @@ +- short_name: CODESTRUCT + title: 'CODESTRUCT: Code Agents over Structured Action Spaces' + authors: Myeongsoo Kim, Joe Hsu, Dingmin Wang, Shweta Garg, Varun Kumar, Murali + Krishna Ramanathan + year: '2026' + venue: Proceedings of the 64th Annual Meeting of the Association for Computational + Linguistics (ACL 2026) + month: 2026-04 + links: + arxiv: https://arxiv.org/abs/2604.05407 - short_name: SynFix title: 'SynFix: Dependency-Aware Program Repair via RelationGraph Analysis' authors: Tang, Xunzhu, Gao, Jiechao, Xu, Jin, Sun, Tiezhu, Song, Yewei, Ezzini, From 77d8556ac1c9b702666802c06087585742c01f8f Mon Sep 17 00:00:00 2001 From: OpenClaw Agent <> Date: Mon, 20 Apr 2026 23:48:23 +0800 Subject: [PATCH 02/12] feat: add non-interactive CLI for adding/searching papers Usage: # List recent papers python scripts/cli_add_paper.py --list # Search python scripts/cli_add_paper.py --search SWE-bench # Add paper to DB python scripts/cli_add_paper.py --short-name TestPaper --title "Test Paper Title" --authors "Author Name" --venue "arXiv 2024" --year 2024 --category single_agent --arxiv https://arxiv.org/abs/... # Add and sync to YAML python scripts/cli_add_paper.py --short-name Test --title Test ... --sync --- scripts/cli_add_paper.py | 416 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 416 insertions(+) create mode 100644 scripts/cli_add_paper.py diff --git a/scripts/cli_add_paper.py b/scripts/cli_add_paper.py new file mode 100644 index 0000000..8e7bc6c --- /dev/null +++ b/scripts/cli_add_paper.py @@ -0,0 +1,416 @@ +#!/usr/bin/env python3 +""" +Non-interactive CLI for adding papers to the survey database. +Supports adding to SQLite DB (which syncs to YAML on --sync). + +Usage: + python scripts/cli_add_paper.py --short-name SWE-bench --title "SWE-bench: ..." \\ + --authors "Carlos Deng et al." --venue "ICLR 2024" --year 2024 \\ + --category single_agent --arxiv https://arxiv.org/abs/... \\ + --github https://github.com/... --sync + + # Or use --yaml to write directly to YAML (no DB): + python scripts/cli_add_paper.py --short-name TestPaper --title "Test" \\ + --authors "Author" --venue "arXiv" --year 2024 \\ + --category single_agent --yaml-only + + # List recent papers: + python scripts/cli_add_paper.py --list --category single_agent + + # Search papers: + python scripts/cli_add_paper.py --search "SWE-bench" + + # Show categories: + python scripts/cli_add_paper.py --categories +""" + +import argparse +import json +import sys +import os +from pathlib import Path +from datetime import datetime + +# Setup path +ROOT = Path(__file__).resolve().parents[1] +sys.path.insert(0, str(ROOT)) +sys.path.insert(0, str(ROOT / "app")) + +# Minimal dependencies (sqlite3 is stdlib) +import sqlite3 +import yaml + +# ============================================================================= +# Config +# ============================================================================= + +DATA_DIR = ROOT / "app" / "data" +DATABASE_PATH = DATA_DIR / "database" / "survey.db" +YAML_DIR = DATA_DIR + +CATEGORIES = { + "evaluation_datasets": "📊 Evaluation Datasets", + "training_datasets": "🎯 Training Datasets", + "single_agent": "🤖 Single-Agent Systems", + "multi_agent": "👥 Multi-Agent Systems", + "workflow": "🔄 Workflow-Based Methods", + "tool": "🛠️ Tool-Augmented Methods", + "memory": "🧠 Memory-Enhanced Methods", + "sft": "📚 Supervised Fine-Tuning (SFT)", + "rl": "🎮 Reinforcement Learning (RL)", + "inference_scaling": "⚡ Inference-Time Scaling", + "data_collection": "📥 Data Collection Methods", + "data_synthesis": "🔬 Data Synthesis Methods", + "data_analysis": "📈 Data Analysis", + "methods_analysis": "🔍 Methods Analysis", + "others": "🧩 Others", + "uncategorized": "❓ Uncategorized", +} + + +# ============================================================================= +# Database helpers +# ============================================================================= + +def get_db(): + """Get SQLite connection.""" + DATABASE_PATH.parent.mkdir(parents=True, exist_ok=True) + return sqlite3.connect(str(DATABASE_PATH)) + + +def paper_to_db(paper: dict, category: str, conn=None) -> int: + """Insert paper into DB, return paper_id.""" + close = conn is None + conn = conn or get_db() + now = datetime.now().isoformat() + + # Parse month from year if not provided + month = paper.get('month', f"{paper.get('year', '2024')}-01") + + try: + cursor = conn.execute(""" + INSERT INTO papers + (short_name, title, authors, venue, year, month, category, + github_link, huggingface_link, arxiv_link, openreview_link, doi_link, website_link, + created_at, updated_at) + VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) + """, ( + paper.get('short_name', ''), + paper.get('title', ''), + paper.get('authors', ''), + paper.get('venue', ''), + str(paper.get('year', '')), + month, + category, + paper.get('github'), + paper.get('huggingface'), + paper.get('arxiv'), + paper.get('openreview'), + paper.get('doi'), + paper.get('website'), + now, now + )) + conn.commit() + return cursor.lastrowid + except sqlite3.IntegrityError as e: + print(f"⚠️ Paper already exists: {paper.get('short_name', 'unknown')}") + print(f" Error: {e}") + return -1 + finally: + if close: + conn.close() + + +def list_papers_db(category: str = None, limit: int = 20, offset: int = 0) -> list: + """List papers from DB.""" + conn = get_db() + if category: + rows = conn.execute(""" + SELECT id, short_name, title, authors, COALESCE(venue,'-') as venue, year, COALESCE(month,'') as month, category + FROM papers WHERE category LIKE ? + ORDER BY month DESC LIMIT ? OFFSET ? + """, (f'%{category}%', limit, offset)).fetchall() + else: + rows = conn.execute(""" + SELECT id, short_name, title, authors, COALESCE(venue,'-') as venue, year, COALESCE(month,'') as month, category + FROM papers ORDER BY month DESC LIMIT ? OFFSET ? + """, (limit, offset)).fetchall() + conn.close() + return rows + + +def search_papers_db(keyword: str) -> list: + """Search papers by keyword.""" + conn = get_db() + pattern = f"%{keyword}%" + rows = conn.execute(""" + SELECT id, short_name, title, authors, COALESCE(venue,'-') as venue, year, COALESCE(month,'') as month, category + FROM papers + WHERE short_name LIKE ? OR title LIKE ? OR authors LIKE ? + ORDER BY month DESC LIMIT 50 + """, (pattern, pattern, pattern)).fetchall() + conn.close() + return rows + + +def paper_to_yaml_file(paper: dict, category: str): + """Append paper to YAML file (for --yaml-only mode).""" + yaml_file = YAML_DIR / f"papers_{category}.yaml" + + # Load existing + if yaml_file.exists(): + with open(yaml_file, 'r', encoding='utf-8') as f: + data = yaml.safe_load(f) or [] + else: + data = [] + + # Check duplicate + short_name = paper.get('short_name', '').lower() + for p in data: + if p.get('short_name', '').lower() == short_name: + print(f"⚠️ Duplicate short_name '{paper['short_name']}' in {yaml_file.name}") + return False + + data.append(paper) + + with open(yaml_file, 'w', encoding='utf-8') as f: + yaml.safe_dump(data, f, allow_unicode=True, sort_keys=False, default_flow_style=False) + return True + + +def sync_db_to_yaml(conn=None): + """Sync all DB papers back to YAML files.""" + close = conn is None + conn = conn or get_db() + + rows = conn.execute("SELECT short_name, title, authors, venue, year, month, category, github_link, huggingface_link, arxiv_link, openreview_link, doi_link, website_link FROM papers").fetchall() + + # Group by category + by_cat = {} + for row in rows: + (short_name, title, authors, venue, year, month, category, + github, hf, arxiv, openreview, doi, website) = row + links = {} + if arxiv: links['arxiv'] = arxiv + if github: links['github'] = github + if hf: links['huggingface'] = hf + if openreview: links['openreview'] = openreview + if doi: links['doi'] = doi + if website: links['website'] = website + + paper = { + 'short_name': short_name, + 'title': title, + 'authors': authors, + 'venue': venue or '-', + 'year': str(year) if year else '', + 'month': month or '', + 'links': links, + } + + cats = [c.strip() for c in category.split(',') if c.strip()] + for cat in cats: + if cat not in by_cat: + by_cat[cat] = [] + by_cat[cat].append(paper) + + # Write each category YAML + for cat, papers in by_cat.items(): + yaml_file = YAML_DIR / f"papers_{cat}.yaml" + # Preserve existing papers not in DB (those without paper_id) + existing = [] + if yaml_file.exists(): + with open(yaml_file, 'r', encoding='utf-8') as f: + existing = yaml.safe_load(f) or [] + + # Merge (DB wins on duplicates by short_name) + existing_names = {p.get('short_name', '').lower() for p in existing} + for p in papers: + if p['short_name'].lower() not in existing_names: + existing.append(p) + + # Sort by month + existing.sort(key=lambda p: p.get('month', '0'), reverse=True) + + with open(yaml_file, 'w', encoding='utf-8') as f: + yaml.safe_dump(existing, f, allow_unicode=True, sort_keys=False, default_flow_style=False) + + if close: + conn.close() + print(f"✅ Synced {len(rows)} papers to YAML files") + return True + + +# ============================================================================= +# Init DB schema (if needed) +# ============================================================================= + +def init_db_schema(conn=None): + """Create DB tables if they don't exist.""" + close = conn is None + conn = conn or get_db() + schema = (ROOT / "app" / "data" / "database" / "schema.sql").read_text() + conn.executescript(schema) + conn.commit() + if close: + conn.close() + + +# ============================================================================= +# CLI +# ============================================================================= + +def print_paper_row(row, idx=0): + id, short_name, title, authors, venue, year, month, category = row + title_short = title[:60] + '...' if len(title) > 60 else title + print(f" [{id}] {short_name}") + print(f" {title_short}") + print(f" {authors[:50]}{'...' if len(authors) > 50 else ''}") + print(f" {venue} | {year} | {month} | {category}") + print() + + +def main(): + parser = argparse.ArgumentParser( + description="Add/search/list papers in the Awesome-Issue-Resolution survey.", + formatter_class=argparse.RawDescriptionHelpFormatter, + epilog=""" +Examples: + # Add a paper to DB + python scripts/cli_add_paper.py \\ + --short-name SWE-bench \\ + --title "SWE-bench: Task-Oriented Evaluation of Software Engineering Agents" \\ + --authors "Carlos Deng et al." \\ + --venue "ICLR 2024" \\ + --year 2024 \\ + --category single_agent \\ + --arxiv https://arxiv.org/abs/... + + # Add and sync to YAML immediately + python scripts/cli_add_paper.py --short-name Test --title "Test" --authors "A" \\ + --venue "arXiv" --year 2024 --category single_agent --sync + + # Write directly to YAML (no DB) + python scripts/cli_add_paper.py --short-name Test --title "Test" --authors "A" \\ + --venue "arXiv" --year 2024 --category single_agent --yaml-only + + # List recent papers + python scripts/cli_add_paper.py --list + + # Search + python scripts/cli_add_paper.py --search "SWE" +""" + ) + + # Paper fields + parser.add_argument('--short-name', '--short_name', dest='short_name') + parser.add_argument('--title') + parser.add_argument('--authors') + parser.add_argument('--venue', default='') + parser.add_argument('--year', type=int, default=2024) + parser.add_argument('--month', default='') + parser.add_argument('--category', default='single_agent', + help=f"Category ID. Options: {', '.join(CATEGORIES.keys())}") + + # Links + parser.add_argument('--arxiv') + parser.add_argument('--github') + parser.add_argument('--huggingface', '--hf') + parser.add_argument('--openreview') + parser.add_argument('--doi') + parser.add_argument('--website') + + # Actions + parser.add_argument('--list', action='store_true', help='List recent papers') + parser.add_argument('--search', metavar='KEYWORD', help='Search papers') + parser.add_argument('--categories', action='store_true', help='Show category list') + parser.add_argument('--sync', action='store_true', help='Sync DB → YAML after adding') + parser.add_argument('--yaml-only', action='store_true', help='Write directly to YAML (skip DB)') + parser.add_argument('--limit', type=int, default=20, help='Max papers to list (default: 20)') + parser.add_argument('--init-db', action='store_true', help='Initialize DB schema') + + args = parser.parse_args() + + # Validate + if args.init_db: + init_db_schema() + print("✅ DB schema initialized") + return 0 + + if args.categories: + print("Available categories:") + for cat_id, cat_name in CATEGORIES.items(): + print(f" {cat_id:30s} {cat_name}") + return 0 + + if args.list: + rows = list_papers_db(limit=args.limit) + if not rows: + print("No papers found.") + else: + print(f"Recent papers (showing {len(rows)}):\n") + for i, row in enumerate(rows): + print_paper_row(row, i) + return 0 + + if args.search: + rows = search_papers_db(args.search) + if not rows: + print(f"No papers found matching '{args.search}'") + else: + print(f"Found {len(rows)} papers:\n") + for i, row in enumerate(rows): + print_paper_row(row, i) + return 0 + + # Add paper + if not args.title and not args.list and not args.search: + parser.print_help() + print("\n⚠️ No action specified. Use --list, --search, --categories, or provide paper fields to add.") + return 1 + + if args.short_name and args.title: + paper = { + 'short_name': args.short_name, + 'title': args.title, + 'authors': args.authors or '', + 'venue': args.venue or '-', + 'year': str(args.year), + 'month': args.month or f"{args.year}-01", + 'links': {}, + } + if args.arxiv: paper['links']['arxiv'] = args.arxiv + if args.github: paper['links']['github'] = args.github + if args.huggingface: paper['links']['huggingface'] = args.huggingface + if args.openreview: paper['links']['openreview'] = args.openreview + if args.doi: paper['links']['doi'] = args.doi + if args.website: paper['links']['website'] = args.website + + if args.yaml_only: + ok = paper_to_yaml_file(paper, args.category) + if ok: + print(f"✅ Added '{paper['short_name']}' → {YAML_DIR}/papers_{args.category}.yaml") + return 0 + + # DB mode + if not DATABASE_PATH.exists(): + print("⚠️ DB not initialized. Run: python scripts/cli_add_paper.py --init-db") + print(" Or use --yaml-only to write directly to YAML.") + return 1 + + conn = get_db() + pid = paper_to_db(paper, args.category, conn) + conn.close() + + if pid > 0: + print(f"✅ Added paper '{paper['short_name']}' (DB id={pid}) → category={args.category}") + if args.sync: + sync_db_to_yaml() + print("✅ Synced DB → YAML") + return 0 + + return 0 + + +if __name__ == "__main__": + sys.exit(main()) From c260018f62d26842247e1994b926926cfe3fc6cc Mon Sep 17 00:00:00 2001 From: OpenClaw Agent <> Date: Mon, 27 Apr 2026 13:10:02 +0800 Subject: [PATCH 03/12] fix: correct dataset statistics based on official sources MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - SWE-bench Multimodal: 619 → 617 instances (arXiv:2410.03859) - SWE-bench-extra: 2k repos → 1,988 repos, 6.38k → 6,376 instances (HuggingFace nebius/SWE-bench-extra) - Multi-SWE-bench: 76 repos → -, 4,723 → 1,632 instances (arXiv:2504.02605; 4,723 is Multi-SWE-RL) - Add SWE-rebench V2 to table (32,000+ tasks, 20 languages, 3,600+ repos, arXiv:2602.23866) --- README.md | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 73abfbc..b21d922 100644 --- a/README.md +++ b/README.md @@ -408,12 +408,13 @@ _A comprehensive survey and statistical overview of issue resolution datasets. W | SWE-smith | Python | No | 128 | 50k | Yes | [![GitHub](https://img.shields.io/badge/GitHub-repo-24292F?logo=github&logoColor=white)](https://github.com/SWE-bench/SWE-smith) [![HuggingFace](https://img.shields.io/badge/HuggingFace-dataset-ff7e21?logo=huggingface&logoColor=white)](https://huggingface.co/datasets/SWE-bench/SWE-smith) | | SWE-Lego | Python | No | 3,251 | 32,119 | Yes | [![GitHub](https://img.shields.io/badge/GitHub-repo-24292F?logo=github&logoColor=white)](https://github.com/SWE-Lego/SWE-Lego) [![HuggingFace](https://img.shields.io/badge/HuggingFace-dataset-ff7e21?logo=huggingface&logoColor=white)](https://huggingface.co/SWE-Lego/datasets) | | SWE-rebench | Python | No | 3,468 | 21,336 | Yes | [![GitHub](https://img.shields.io/badge/GitHub-repo-24292F?logo=github&logoColor=white)](https://github.com/SWE-rebench/SWE-bench-fork) [![HuggingFace](https://img.shields.io/badge/HuggingFace-dataset-ff7e21?logo=huggingface&logoColor=white)](https://huggingface.co/datasets/nebius/SWE-rebench) | +| **SWE-rebench V2** | Python, JS, TS, Go, Rust, C, C++, Java, etc. (20 PL) | No | 3,600+ | 32,000+ | Yes | [![arXiv](https://img.shields.io/badge/arXiv-paper-B31B1B?logo=arxiv&logoColor=white)](https://arxiv.org/abs/2602.23866) | | SWE-bench-train | Python | No | 37 | 19k | No | [![GitHub](https://img.shields.io/badge/GitHub-repo-24292F?logo=github&logoColor=white)](https://github.com/SWE-bench/SWE-bench) [![HuggingFace](https://img.shields.io/badge/HuggingFace-dataset-ff7e21?logo=huggingface&logoColor=white)](https://huggingface.co/datasets/princeton-nlp/SWE-bench/viewer/default/train) | | SWE-Flow | Python | No | 74 | 18,081 | Yes | [![GitHub](https://img.shields.io/badge/GitHub-repo-24292F?logo=github&logoColor=white)](https://github.com/Hambaobao/SWE-Flow) | | Skywork-SWE | Python | No | 2,531 | 10,169 | Yes | - | | R2E-Gym | Python | No | 10 | 8,135 | Yes | [![GitHub](https://img.shields.io/badge/GitHub-repo-24292F?logo=github&logoColor=white)](https://github.com/R2E-Gym/R2E-Gym) [![HuggingFace](https://img.shields.io/badge/HuggingFace-dataset-ff7e21?logo=huggingface&logoColor=white)](https://huggingface.co/R2E-Gym/datasets) | | RepoForge | Python | No | - | 7.3k | Yes | - | -| SWE-bench-extra | Python | No | 2k | 6.38k | Yes | [![HuggingFace](https://img.shields.io/badge/HuggingFace-dataset-ff7e21?logo=huggingface&logoColor=white)](https://huggingface.co/datasets/nebius/SWE-bench-extra) | +| SWE-bench-extra | Python | No | 1,988 | 6,376 | Yes | [![HuggingFace](https://img.shields.io/badge/HuggingFace-dataset-ff7e21?logo=huggingface&logoColor=white)](https://huggingface.co/datasets/nebius/SWE-bench-extra) | | SWE-Gym | Python | No | 11 | 2,438 | Yes | [![GitHub](https://img.shields.io/badge/GitHub-repo-24292F?logo=github&logoColor=white)](https://github.com/SWE-Gym/SWE-Gym) [![HuggingFace](https://img.shields.io/badge/HuggingFace-dataset-ff7e21?logo=huggingface&logoColor=white)](https://huggingface.co/SWE-Gym/datasets) | | SWE-bench | Python | No | 12 | 2,294 | Yes | [![GitHub](https://img.shields.io/badge/GitHub-repo-24292F?logo=github&logoColor=white)](https://github.com/SWE-bench/SWE-bench) [![HuggingFace](https://img.shields.io/badge/HuggingFace-dataset-ff7e21?logo=huggingface&logoColor=white)](https://huggingface.co/datasets/princeton-nlp/SWE-bench) | | SWE-bench-java | Java | No | 19 | 1,797 | Yes | [![GitHub](https://img.shields.io/badge/GitHub-repo-24292F?logo=github&logoColor=white)](https://github.com/multi-swe-bench/multi-swe-bench-env) [![HuggingFace](https://img.shields.io/badge/HuggingFace-dataset-ff7e21?logo=huggingface&logoColor=white)](https://huggingface.co/datasets/Daoguang/Multi-SWE-bench) | @@ -430,7 +431,7 @@ _A comprehensive survey and statistical overview of issue resolution datasets. W | SWE-EVO | Python | No | 7 | 48 | Yes | [![GitHub](https://img.shields.io/badge/GitHub-repo-24292F?logo=github&logoColor=white)](https://github.com/bdqnghi/SWE-EVO) | | **Multi-PL Datasets** | | | | | | | | SWE-Mirror | Python, Rust, Go | No | 40 | 60k | Yes | - | -| Multi-SWE-bench | Java, JS, TS, Go, Rust, C, C++ | No | 76 | 4,723 | Yes | [![GitHub](https://img.shields.io/badge/GitHub-repo-24292F?logo=github&logoColor=white)](https://github.com/multi-swe-bench/multi-swe-bench) [![HuggingFace](https://img.shields.io/badge/HuggingFace-dataset-ff7e21?logo=huggingface&logoColor=white)](https://huggingface.co/datasets/ByteDance-Seed/Multi-SWE-bench) | +| Multi-SWE-bench | Java, JS, TS, Go, Rust, C, C++ | No | - | 1,632 | Yes | [![GitHub](https://img.shields.io/badge/GitHub-repo-24292F?logo=github&logoColor=white)](https://github.com/multi-swe-bench/multi-swe-bench) [![HuggingFace](https://img.shields.io/badge/HuggingFace-dataset-ff7e21?logo=huggingface&logoColor=white)](https://huggingface.co/datasets/ByteDance-Seed/Multi-SWE-bench) | | Swing-Bench | Python, Go, C++, Rust | No | 400 | 2300 | Yes | - | | SWE-PolyBench | Python, Java, JS, TS | No | 21 | 2,110 | Yes | [![GitHub](https://img.shields.io/badge/GitHub-repo-24292F?logo=github&logoColor=white)](https://github.com/amazon-science/SWE-PolyBench) [![HuggingFace](https://img.shields.io/badge/HuggingFace-dataset-ff7e21?logo=huggingface&logoColor=white)](https://huggingface.co/datasets/AmazonScience/SWE-PolyBench) [![HuggingFace](https://img.shields.io/badge/HuggingFace-dataset-ff7e21?logo=huggingface&logoColor=white)](https://huggingface.co/datasets/Sellopale/SWE-PolyBench_500) | | SWE-Compass | Python, JS, TS, Java, C, C++, Go, Rust, Kotlin, C# | No | - | 2,000 | Yes | [![GitHub](https://img.shields.io/badge/GitHub-repo-24292F?logo=github&logoColor=white)](https://github.com/kwaipilot/SWE-Compass/) [![HuggingFace](https://img.shields.io/badge/HuggingFace-dataset-ff7e21?logo=huggingface&logoColor=white)](https://huggingface.co/datasets/Kwaipilot/SWE-Compass) | @@ -438,7 +439,7 @@ _A comprehensive survey and statistical overview of issue resolution datasets. W | SWE-bench++ | Python, Go, TS, JS, Ruby, PHP, Java, Rust, C++, C#, C | No | 3,971 | 1,782 | Yes | [![GitHub](https://img.shields.io/badge/GitHub-repo-24292F?logo=github&logoColor=white)](https://github.com/TuringEnterprises/SWE-Bench-plus-plus) [![HuggingFace](https://img.shields.io/badge/HuggingFace-dataset-ff7e21?logo=huggingface&logoColor=white)](https://huggingface.co/datasets/TuringEnterprises/SWE-Bench-plus-plus) | | SWE-Lancer | JS, TS | No | - | 1,488 | Yes | [![GitHub](https://img.shields.io/badge/GitHub-repo-24292F?logo=github&logoColor=white)](https://github.com/openai/frontier-evals) | | OmniGIRL | Python, TS, Java, JS | Yes | 15 | 959 | Yes | [![GitHub](https://img.shields.io/badge/GitHub-repo-24292F?logo=github&logoColor=white)](https://github.com/deepsoftwareanalytics/omnigirl) [![HuggingFace](https://img.shields.io/badge/HuggingFace-dataset-ff7e21?logo=huggingface&logoColor=white)](https://huggingface.co/datasets/Deep-Software-Analytics/OmniGIRL) | -| SWE-bench Multimodal | JS, TS, HTML, CSS | Yes | 17 | 619 | Yes | [![GitHub](https://img.shields.io/badge/GitHub-repo-24292F?logo=github&logoColor=white)](https://github.com/SWE-bench/SWE-bench) [![HuggingFace](https://img.shields.io/badge/HuggingFace-dataset-ff7e21?logo=huggingface&logoColor=white)](https://huggingface.co/datasets/SWE-bench/SWE-bench_Multimodal) | +| SWE-bench Multimodal | JS, TS, HTML, CSS | Yes | 17 | 617 | Yes | [![GitHub](https://img.shields.io/badge/GitHub-repo-24292F?logo=github&logoColor=white)](https://github.com/SWE-bench/SWE-bench) [![HuggingFace](https://img.shields.io/badge/HuggingFace-dataset-ff7e21?logo=huggingface&logoColor=white)](https://huggingface.co/datasets/SWE-bench/SWE-bench_Multimodal) | | SWE-fficiency | Python, Cython | No | 9 | 498 | Yes | [![GitHub](https://img.shields.io/badge/GitHub-repo-24292F?logo=github&logoColor=white)](https://github.com/swefficiency/swefficiency-site) | | SWE-Factory | Python, Java, JS, TS | No | 12 | 430 | Yes | [![GitHub](https://img.shields.io/badge/GitHub-repo-24292F?logo=github&logoColor=white)](https://github.com/DeepSoftwareAnalytics/swe-factory) [![HuggingFace](https://img.shields.io/badge/HuggingFace-dataset-ff7e21?logo=huggingface&logoColor=white)](https://huggingface.co/SWE-Factory) | | SWE-bench-Live-MultiLang \& Windows | Python, JS, TS, C, C++, C#, Java, Go, Rust | No | 238 | 418 | Yes | [![GitHub](https://img.shields.io/badge/GitHub-repo-24292F?logo=github&logoColor=white)](https://github.com/microsoft/SWE-bench-Live) [![HuggingFace](https://img.shields.io/badge/HuggingFace-dataset-ff7e21?logo=huggingface&logoColor=white)](https://huggingface.co/datasets/SWE-bench-Live/MultiLang) [![HuggingFace](https://img.shields.io/badge/HuggingFace-dataset-ff7e21?logo=huggingface&logoColor=white)](https://huggingface.co/datasets/SWE-bench-Live/Windows) | From ada0cb48e9a121c1f5ccd6fab4c34f48bff5fe0b Mon Sep 17 00:00:00 2001 From: OpenClaw Agent <> Date: Mon, 27 Apr 2026 13:24:15 +0800 Subject: [PATCH 04/12] feat: add missing datasets from papers into Evaluation table MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Added from paper sections (previously missing from table): - OpenSWE: 45,320 envs, 12,800 repos (arXiv:2603.13023) - SWE-Universe: 807,693 envs (arXiv:2602.02361) - Scale-SWE: 100k instances, 5,200 repos (arXiv:2602.09892) - daVinci-Dev: envs available, details on HuggingFace (arXiv:2601.18418) - SWE-Next: 2,308 instances, 3,971 repos (arXiv:2603.20691) - Multi-Docker-Eval: 40 repos, benchmark only (arXiv:2512.06915) Also corrected: - SWE-bench Multimodal: 619 → 617 instances - SWE-bench-extra: 2k → 1,988 repos, 6.38k → 6,376 instances - Multi-SWE-bench: 76 → - repos, 4,723 → 1,632 (RL subset removed) --- README.md | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/README.md b/README.md index b21d922..4ed5b49 100644 --- a/README.md +++ b/README.md @@ -406,9 +406,15 @@ _A comprehensive survey and statistical overview of issue resolution datasets. W | **Single-PL Datasets** | | | | | | | | SWE-Fixer | Python | No | 856 | 115,406 | No | [![GitHub](https://img.shields.io/badge/GitHub-repo-24292F?logo=github&logoColor=white)](https://github.com/InternLM/SWE-Fixer) [![HuggingFace](https://img.shields.io/badge/HuggingFace-dataset-ff7e21?logo=huggingface&logoColor=white)](https://huggingface.co/datasets/internlm/SWE-Fixer-Train-110K) [![HuggingFace](https://img.shields.io/badge/HuggingFace-dataset-ff7e21?logo=huggingface&logoColor=white)](https://huggingface.co/datasets/internlm/SWE-Fixer-Eval) | | SWE-smith | Python | No | 128 | 50k | Yes | [![GitHub](https://img.shields.io/badge/GitHub-repo-24292F?logo=github&logoColor=white)](https://github.com/SWE-bench/SWE-smith) [![HuggingFace](https://img.shields.io/badge/HuggingFace-dataset-ff7e21?logo=huggingface&logoColor=white)](https://huggingface.co/datasets/SWE-bench/SWE-smith) | +| OpenSWE | Python | No | 12,800 | 45,320 | Yes | [![GitHub](https://img.shields.io/badge/GitHub-repo-24292F?logo=github&logoColor=white)](https://github.com/GAIR-NLP/OpenSWE) [![HuggingFace](https://img.shields.io/badge/HuggingFace-dataset-ff7e21?logo=huggingface&logoColor=white)](https://huggingface.co/datasets/GAIR/OpenSWE) | | SWE-Lego | Python | No | 3,251 | 32,119 | Yes | [![GitHub](https://img.shields.io/badge/GitHub-repo-24292F?logo=github&logoColor=white)](https://github.com/SWE-Lego/SWE-Lego) [![HuggingFace](https://img.shields.io/badge/HuggingFace-dataset-ff7e21?logo=huggingface&logoColor=white)](https://huggingface.co/SWE-Lego/datasets) | | SWE-rebench | Python | No | 3,468 | 21,336 | Yes | [![GitHub](https://img.shields.io/badge/GitHub-repo-24292F?logo=github&logoColor=white)](https://github.com/SWE-rebench/SWE-bench-fork) [![HuggingFace](https://img.shields.io/badge/HuggingFace-dataset-ff7e21?logo=huggingface&logoColor=white)](https://huggingface.co/datasets/nebius/SWE-rebench) | | **SWE-rebench V2** | Python, JS, TS, Go, Rust, C, C++, Java, etc. (20 PL) | No | 3,600+ | 32,000+ | Yes | [![arXiv](https://img.shields.io/badge/arXiv-paper-B31B1B?logo=arxiv&logoColor=white)](https://arxiv.org/abs/2602.23866) | +| OpenSWE | Python | No | 12,800 | 45,320 | Yes | [![GitHub](https://img.shields.io/badge/GitHub-repo-24292F?logo=github&logoColor=white)](https://github.com/GAIR-NLP/OpenSWE) [![HuggingFace](https://img.shields.io/badge/HuggingFace-dataset-ff7e21?logo=huggingface&logoColor=white)](https://huggingface.co/datasets/GAIR/OpenSWE) | +| SWE-Universe | Python | No | - | 807,693 | Yes | [![arXiv](https://img.shields.io/badge/arXiv-paper-B31B1B?logo=arxiv&logoColor=white)](https://arxiv.org/abs/2602.02361) | +| Scale-SWE | Python | No | 5,200 | 100,000 | Yes | [![GitHub](https://img.shields.io/badge/GitHub-repo-24292F?logo=github&logoColor=white)](https://github.com/AweAI-Team/ScaleSWE) [![HuggingFace](https://img.shields.io/badge/HuggingFace-dataset-ff7e21?logo=huggingface&logoColor=white)](https://huggingface.co/collections/AweAI-Team/scale-swe) | +| daVinci-Dev | Python | No | - | - | Yes | [![GitHub](https://img.shields.io/badge/GitHub-repo-24292F?logo=github&logoColor=white)](https://github.com/GAIR-NLP/daVinci-Dev) [![HuggingFace](https://img.shields.io/badge/HuggingFace-dataset-ff7e21?logo=huggingface&logoColor=white)](https://huggingface.co/datasets/GAIR/daVinci-Dev) | +| SWE-Next | Python | No | 3,971 | 2,308 | Yes | [![arXiv](https://img.shields.io/badge/arXiv-paper-B31B1B?logo=arxiv&logoColor=white)](https://arxiv.org/abs/2603.20691) | | SWE-bench-train | Python | No | 37 | 19k | No | [![GitHub](https://img.shields.io/badge/GitHub-repo-24292F?logo=github&logoColor=white)](https://github.com/SWE-bench/SWE-bench) [![HuggingFace](https://img.shields.io/badge/HuggingFace-dataset-ff7e21?logo=huggingface&logoColor=white)](https://huggingface.co/datasets/princeton-nlp/SWE-bench/viewer/default/train) | | SWE-Flow | Python | No | 74 | 18,081 | Yes | [![GitHub](https://img.shields.io/badge/GitHub-repo-24292F?logo=github&logoColor=white)](https://github.com/Hambaobao/SWE-Flow) | | Skywork-SWE | Python | No | 2,531 | 10,169 | Yes | - | @@ -433,6 +439,7 @@ _A comprehensive survey and statistical overview of issue resolution datasets. W | SWE-Mirror | Python, Rust, Go | No | 40 | 60k | Yes | - | | Multi-SWE-bench | Java, JS, TS, Go, Rust, C, C++ | No | - | 1,632 | Yes | [![GitHub](https://img.shields.io/badge/GitHub-repo-24292F?logo=github&logoColor=white)](https://github.com/multi-swe-bench/multi-swe-bench) [![HuggingFace](https://img.shields.io/badge/HuggingFace-dataset-ff7e21?logo=huggingface&logoColor=white)](https://huggingface.co/datasets/ByteDance-Seed/Multi-SWE-bench) | | Swing-Bench | Python, Go, C++, Rust | No | 400 | 2300 | Yes | - | +| Multi-Docker-Eval | Python, JS, TS, Go, Java, C, C++, Ruby, Kotlin, HTML | No | 40 | - | Yes | [![arXiv](https://img.shields.io/badge/arXiv-paper-B31B1B?logo=arxiv&logoColor=white)](https://arxiv.org/abs/2512.06915) | | SWE-PolyBench | Python, Java, JS, TS | No | 21 | 2,110 | Yes | [![GitHub](https://img.shields.io/badge/GitHub-repo-24292F?logo=github&logoColor=white)](https://github.com/amazon-science/SWE-PolyBench) [![HuggingFace](https://img.shields.io/badge/HuggingFace-dataset-ff7e21?logo=huggingface&logoColor=white)](https://huggingface.co/datasets/AmazonScience/SWE-PolyBench) [![HuggingFace](https://img.shields.io/badge/HuggingFace-dataset-ff7e21?logo=huggingface&logoColor=white)](https://huggingface.co/datasets/Sellopale/SWE-PolyBench_500) | | SWE-Compass | Python, JS, TS, Java, C, C++, Go, Rust, Kotlin, C# | No | - | 2,000 | Yes | [![GitHub](https://img.shields.io/badge/GitHub-repo-24292F?logo=github&logoColor=white)](https://github.com/kwaipilot/SWE-Compass/) [![HuggingFace](https://img.shields.io/badge/HuggingFace-dataset-ff7e21?logo=huggingface&logoColor=white)](https://huggingface.co/datasets/Kwaipilot/SWE-Compass) | | SWE-Bench Pro | Python, Go, TS | No | 41 | 1,865 | Yes | [![GitHub](https://img.shields.io/badge/GitHub-repo-24292F?logo=github&logoColor=white)](https://github.com/scaleapi/SWE-bench_Pro-os) [![HuggingFace](https://img.shields.io/badge/HuggingFace-dataset-ff7e21?logo=huggingface&logoColor=white)](https://huggingface.co/datasets/ScaleAI/SWE-bench_Pro) | From bf3f84566db9f395d66610f713f141d91b8311b7 Mon Sep 17 00:00:00 2001 From: OpenClaw Agent <> Date: Mon, 27 Apr 2026 13:39:14 +0800 Subject: [PATCH 05/12] feat: fix duplicate entries and add missing datasets in tables --- README.md | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 4ed5b49..4b3747e 100644 --- a/README.md +++ b/README.md @@ -405,12 +405,9 @@ _A comprehensive survey and statistical overview of issue resolution datasets. W |---|---|---|---|---|---|---| | **Single-PL Datasets** | | | | | | | | SWE-Fixer | Python | No | 856 | 115,406 | No | [![GitHub](https://img.shields.io/badge/GitHub-repo-24292F?logo=github&logoColor=white)](https://github.com/InternLM/SWE-Fixer) [![HuggingFace](https://img.shields.io/badge/HuggingFace-dataset-ff7e21?logo=huggingface&logoColor=white)](https://huggingface.co/datasets/internlm/SWE-Fixer-Train-110K) [![HuggingFace](https://img.shields.io/badge/HuggingFace-dataset-ff7e21?logo=huggingface&logoColor=white)](https://huggingface.co/datasets/internlm/SWE-Fixer-Eval) | -| SWE-smith | Python | No | 128 | 50k | Yes | [![GitHub](https://img.shields.io/badge/GitHub-repo-24292F?logo=github&logoColor=white)](https://github.com/SWE-bench/SWE-smith) [![HuggingFace](https://img.shields.io/badge/HuggingFace-dataset-ff7e21?logo=huggingface&logoColor=white)](https://huggingface.co/datasets/SWE-bench/SWE-smith) | -| OpenSWE | Python | No | 12,800 | 45,320 | Yes | [![GitHub](https://img.shields.io/badge/GitHub-repo-24292F?logo=github&logoColor=white)](https://github.com/GAIR-NLP/OpenSWE) [![HuggingFace](https://img.shields.io/badge/HuggingFace-dataset-ff7e21?logo=huggingface&logoColor=white)](https://huggingface.co/datasets/GAIR/OpenSWE) | -| SWE-Lego | Python | No | 3,251 | 32,119 | Yes | [![GitHub](https://img.shields.io/badge/GitHub-repo-24292F?logo=github&logoColor=white)](https://github.com/SWE-Lego/SWE-Lego) [![HuggingFace](https://img.shields.io/badge/HuggingFace-dataset-ff7e21?logo=huggingface&logoColor=white)](https://huggingface.co/SWE-Lego/datasets) | + [![GitHub](https://img.shields.io/badge/GitHub-repo-24292F?logo=github&logoColor=white)](https://github.com/SWE-Lego/SWE-Lego) [![HuggingFace](https://img.shields.io/badge/HuggingFace-dataset-ff7e21?logo=huggingface&logoColor=white)](https://huggingface.co/SWE-Lego/datasets) | | SWE-rebench | Python | No | 3,468 | 21,336 | Yes | [![GitHub](https://img.shields.io/badge/GitHub-repo-24292F?logo=github&logoColor=white)](https://github.com/SWE-rebench/SWE-bench-fork) [![HuggingFace](https://img.shields.io/badge/HuggingFace-dataset-ff7e21?logo=huggingface&logoColor=white)](https://huggingface.co/datasets/nebius/SWE-rebench) | | **SWE-rebench V2** | Python, JS, TS, Go, Rust, C, C++, Java, etc. (20 PL) | No | 3,600+ | 32,000+ | Yes | [![arXiv](https://img.shields.io/badge/arXiv-paper-B31B1B?logo=arxiv&logoColor=white)](https://arxiv.org/abs/2602.23866) | -| OpenSWE | Python | No | 12,800 | 45,320 | Yes | [![GitHub](https://img.shields.io/badge/GitHub-repo-24292F?logo=github&logoColor=white)](https://github.com/GAIR-NLP/OpenSWE) [![HuggingFace](https://img.shields.io/badge/HuggingFace-dataset-ff7e21?logo=huggingface&logoColor=white)](https://huggingface.co/datasets/GAIR/OpenSWE) | | SWE-Universe | Python | No | - | 807,693 | Yes | [![arXiv](https://img.shields.io/badge/arXiv-paper-B31B1B?logo=arxiv&logoColor=white)](https://arxiv.org/abs/2602.02361) | | Scale-SWE | Python | No | 5,200 | 100,000 | Yes | [![GitHub](https://img.shields.io/badge/GitHub-repo-24292F?logo=github&logoColor=white)](https://github.com/AweAI-Team/ScaleSWE) [![HuggingFace](https://img.shields.io/badge/HuggingFace-dataset-ff7e21?logo=huggingface&logoColor=white)](https://huggingface.co/collections/AweAI-Team/scale-swe) | | daVinci-Dev | Python | No | - | - | Yes | [![GitHub](https://img.shields.io/badge/GitHub-repo-24292F?logo=github&logoColor=white)](https://github.com/GAIR-NLP/daVinci-Dev) [![HuggingFace](https://img.shields.io/badge/HuggingFace-dataset-ff7e21?logo=huggingface&logoColor=white)](https://huggingface.co/datasets/GAIR/daVinci-Dev) | @@ -436,7 +433,7 @@ _A comprehensive survey and statistical overview of issue resolution datasets. W | Visual SWE-bench | Python | Yes | 11 | 133 | Yes | [![GitHub](https://img.shields.io/badge/GitHub-repo-24292F?logo=github&logoColor=white)](https://github.com/luolin101/CodeV) [![HuggingFace](https://img.shields.io/badge/HuggingFace-dataset-ff7e21?logo=huggingface&logoColor=white)](https://huggingface.co/datasets/luolin101/Visual-SWE-bench) | | SWE-EVO | Python | No | 7 | 48 | Yes | [![GitHub](https://img.shields.io/badge/GitHub-repo-24292F?logo=github&logoColor=white)](https://github.com/bdqnghi/SWE-EVO) | | **Multi-PL Datasets** | | | | | | | -| SWE-Mirror | Python, Rust, Go | No | 40 | 60k | Yes | - | +| SWE-Mirror | Python, Rust, Go | No | 40 | 60,671 | Yes | [![arXiv](https://img.shields.io/badge/arXiv-paper-B31B1B?logo=arxiv&logoColor=white)](https://arxiv.org/abs/2509.08724) | | Multi-SWE-bench | Java, JS, TS, Go, Rust, C, C++ | No | - | 1,632 | Yes | [![GitHub](https://img.shields.io/badge/GitHub-repo-24292F?logo=github&logoColor=white)](https://github.com/multi-swe-bench/multi-swe-bench) [![HuggingFace](https://img.shields.io/badge/HuggingFace-dataset-ff7e21?logo=huggingface&logoColor=white)](https://huggingface.co/datasets/ByteDance-Seed/Multi-SWE-bench) | | Swing-Bench | Python, Go, C++, Rust | No | 400 | 2300 | Yes | - | | Multi-Docker-Eval | Python, JS, TS, Go, Java, C, C++, Ruby, Kotlin, HTML | No | 40 | - | Yes | [![arXiv](https://img.shields.io/badge/arXiv-paper-B31B1B?logo=arxiv&logoColor=white)](https://arxiv.org/abs/2512.06915) | From 0fd5b7736dde195e40c09bec7401ed18985757b1 Mon Sep 17 00:00:00 2001 From: OpenClaw Agent <> Date: Mon, 27 Apr 2026 14:11:25 +0800 Subject: [PATCH 06/12] feat: add 7 missing datasets from papers to evaluation table and trajectory table --- README.md | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/README.md b/README.md index 4b3747e..621ac22 100644 --- a/README.md +++ b/README.md @@ -410,6 +410,9 @@ _A comprehensive survey and statistical overview of issue resolution datasets. W | **SWE-rebench V2** | Python, JS, TS, Go, Rust, C, C++, Java, etc. (20 PL) | No | 3,600+ | 32,000+ | Yes | [![arXiv](https://img.shields.io/badge/arXiv-paper-B31B1B?logo=arxiv&logoColor=white)](https://arxiv.org/abs/2602.23866) | | SWE-Universe | Python | No | - | 807,693 | Yes | [![arXiv](https://img.shields.io/badge/arXiv-paper-B31B1B?logo=arxiv&logoColor=white)](https://arxiv.org/abs/2602.02361) | | Scale-SWE | Python | No | 5,200 | 100,000 | Yes | [![GitHub](https://img.shields.io/badge/GitHub-repo-24292F?logo=github&logoColor=white)](https://github.com/AweAI-Team/ScaleSWE) [![HuggingFace](https://img.shields.io/badge/HuggingFace-dataset-ff7e21?logo=huggingface&logoColor=white)](https://huggingface.co/collections/AweAI-Team/scale-swe) | +| BeyondSWE | Python | No | - | 500 | Yes | [![arXiv](https://img.shields.io/badge/arXiv-paper-B31B1B?logo=arxiv&logoColor=white)](https://arxiv.org/abs/2603.03194) [![HuggingFace](https://img.shields.io/badge/HuggingFace-dataset-ff7e21?logo=huggingface&logoColor=white)](https://huggingface.co/datasets/AweAI-Team/BeyondSWE) | +| SWE-CI | Python | No | - | 100 | Yes | [![arXiv](https://img.shields.io/badge/arXiv-paper-B31B1B?logo=arxiv&logoColor=white)](https://arxiv.org/abs/2603.03823) [![HuggingFace](https://img.shields.io/badge/HuggingFace-dataset-ff7e21?logo=huggingface&logoColor=white)](https://huggingface.co/datasets/skylenage/SWE-CI) | +| SWE-Skills-Bench | Python | No | - | 565 | Yes | [![arXiv](https://img.shields.io/badge/arXiv-paper-B31B1B?logo=arxiv&logoColor=white)](https://arxiv.org/abs/2603.15401) [![GitHub](https://img.shields.io/badge/GitHub-repo-24292F?logo=github&logoColor=white)](https://github.com/GeniusHTX/SWE-Skills-Bench) | | daVinci-Dev | Python | No | - | - | Yes | [![GitHub](https://img.shields.io/badge/GitHub-repo-24292F?logo=github&logoColor=white)](https://github.com/GAIR-NLP/daVinci-Dev) [![HuggingFace](https://img.shields.io/badge/HuggingFace-dataset-ff7e21?logo=huggingface&logoColor=white)](https://huggingface.co/datasets/GAIR/daVinci-Dev) | | SWE-Next | Python | No | 3,971 | 2,308 | Yes | [![arXiv](https://img.shields.io/badge/arXiv-paper-B31B1B?logo=arxiv&logoColor=white)](https://arxiv.org/abs/2603.20691) | | SWE-bench-train | Python | No | 37 | 19k | No | [![GitHub](https://img.shields.io/badge/GitHub-repo-24292F?logo=github&logoColor=white)](https://github.com/SWE-bench/SWE-bench) [![HuggingFace](https://img.shields.io/badge/HuggingFace-dataset-ff7e21?logo=huggingface&logoColor=white)](https://huggingface.co/datasets/princeton-nlp/SWE-bench/viewer/default/train) | @@ -421,6 +424,8 @@ _A comprehensive survey and statistical overview of issue resolution datasets. W | SWE-Gym | Python | No | 11 | 2,438 | Yes | [![GitHub](https://img.shields.io/badge/GitHub-repo-24292F?logo=github&logoColor=white)](https://github.com/SWE-Gym/SWE-Gym) [![HuggingFace](https://img.shields.io/badge/HuggingFace-dataset-ff7e21?logo=huggingface&logoColor=white)](https://huggingface.co/SWE-Gym/datasets) | | SWE-bench | Python | No | 12 | 2,294 | Yes | [![GitHub](https://img.shields.io/badge/GitHub-repo-24292F?logo=github&logoColor=white)](https://github.com/SWE-bench/SWE-bench) [![HuggingFace](https://img.shields.io/badge/HuggingFace-dataset-ff7e21?logo=huggingface&logoColor=white)](https://huggingface.co/datasets/princeton-nlp/SWE-bench) | | SWE-bench-java | Java | No | 19 | 1,797 | Yes | [![GitHub](https://img.shields.io/badge/GitHub-repo-24292F?logo=github&logoColor=white)](https://github.com/multi-swe-bench/multi-swe-bench-env) [![HuggingFace](https://img.shields.io/badge/HuggingFace-dataset-ff7e21?logo=huggingface&logoColor=white)](https://huggingface.co/datasets/Daoguang/Multi-SWE-bench) | +| SWE-smith | Python | No | 128 | 50k | Yes | [![arXiv](https://img.shields.io/badge/arXiv-paper-B31B1B?logo=arxiv&logoColor=white)](https://arxiv.org/abs/2504.21798v2) | +| Rust-SWE-bench | Rust | No | 34 | 500 | Yes | [![arXiv](https://img.shields.io/badge/arXiv-paper-B31B1B?logo=arxiv&logoColor=white)](https://arxiv.org/abs/2602.22764) [![GitHub](https://img.shields.io/badge/GitHub-repo-24292F?logo=github&logoColor=white)](https://github.com/GhabiX/Rust-SWE-Bench) | | FEA-bench | Python | No | 83 | 1,401 | Yes | [![GitHub](https://img.shields.io/badge/GitHub-repo-24292F?logo=github&logoColor=white)](https://github.com/microsoft/FEA-Bench/) [![HuggingFace](https://img.shields.io/badge/HuggingFace-dataset-ff7e21?logo=huggingface&logoColor=white)](https://huggingface.co/datasets/microsoft/FEA-Bench) | | SWE-bench-Live | Python | No | 164 | 1,565 | Yes | [![GitHub](https://img.shields.io/badge/GitHub-repo-24292F?logo=github&logoColor=white)](https://github.com/microsoft/SWE-bench-Live) [![HuggingFace](https://img.shields.io/badge/HuggingFace-dataset-ff7e21?logo=huggingface&logoColor=white)](https://huggingface.co/datasets/SWE-bench-Live/SWE-bench-Live) | | Loc-Bench | Python | No | - | 560 | No | [![GitHub](https://img.shields.io/badge/GitHub-repo-24292F?logo=github&logoColor=white)](https://github.com/gersteinlab/LocAgent) [![HuggingFace](https://img.shields.io/badge/HuggingFace-dataset-ff7e21?logo=huggingface&logoColor=white)](https://huggingface.co/datasets/czlll/Loc-Bench_V1) | @@ -448,6 +453,8 @@ _A comprehensive survey and statistical overview of issue resolution datasets. W | SWE-Factory | Python, Java, JS, TS | No | 12 | 430 | Yes | [![GitHub](https://img.shields.io/badge/GitHub-repo-24292F?logo=github&logoColor=white)](https://github.com/DeepSoftwareAnalytics/swe-factory) [![HuggingFace](https://img.shields.io/badge/HuggingFace-dataset-ff7e21?logo=huggingface&logoColor=white)](https://huggingface.co/SWE-Factory) | | SWE-bench-Live-MultiLang \& Windows | Python, JS, TS, C, C++, C#, Java, Go, Rust | No | 238 | 418 | Yes | [![GitHub](https://img.shields.io/badge/GitHub-repo-24292F?logo=github&logoColor=white)](https://github.com/microsoft/SWE-bench-Live) [![HuggingFace](https://img.shields.io/badge/HuggingFace-dataset-ff7e21?logo=huggingface&logoColor=white)](https://huggingface.co/datasets/SWE-bench-Live/MultiLang) [![HuggingFace](https://img.shields.io/badge/HuggingFace-dataset-ff7e21?logo=huggingface&logoColor=white)](https://huggingface.co/datasets/SWE-bench-Live/Windows) | | SWE-bench Multilingual | C, C++, Go, Java, JS, TS, Rust, Python, Ruby, PHP | No | 42 | 300 | Yes | [![GitHub](https://img.shields.io/badge/GitHub-repo-24292F?logo=github&logoColor=white)](https://github.com/SWE-bench/SWE-bench) [![HuggingFace](https://img.shields.io/badge/HuggingFace-dataset-ff7e21?logo=huggingface&logoColor=white)](https://huggingface.co/datasets/SWE-bench/SWE-bench_Multilingual) | +| MobileDev-Bench | Java, Kotlin, TS, Dart | Yes | 18 | 384 | Yes | [![arXiv](https://img.shields.io/badge/arXiv-paper-B31B1B?logo=arxiv&logoColor=white)](https://arxiv.org/abs/2603.24946) | +| SWE-Bench Mobile | Swift, Objective-C | Yes | - | - | Yes | [![arXiv](https://img.shields.io/badge/arXiv-paper-B31B1B?logo=arxiv&logoColor=white)](https://arxiv.org/abs/2602.09540) | | SWE-InfraBench | Python, TS | No | - | 100 | Yes | - | --- @@ -465,6 +472,7 @@ _A survey of trajectory datasets used for agent training or analysis. We list th | SWE-Factory | Python | 10 | 2,809 | [![GitHub](https://img.shields.io/badge/GitHub-repo-24292F?logo=github&logoColor=white)](https://github.com/DeepSoftwareAnalytics/swe-factory) [![HuggingFace](https://img.shields.io/badge/HuggingFace-dataset-ff7e21?logo=huggingface&logoColor=white)](https://huggingface.co/datasets/SWE-Factory/DeepSWE-Agent-Kimi-K2-Trajectories-2.8K) | | SWE-Gym | Python | 11 | 491 | [![GitHub](https://img.shields.io/badge/GitHub-repo-24292F?logo=github&logoColor=white)](https://github.com/SWE-Gym/SWE-Gym) [![HuggingFace](https://img.shields.io/badge/HuggingFace-dataset-ff7e21?logo=huggingface&logoColor=white)](https://huggingface.co/datasets/SWE-Gym/OpenHands-SFT-Trajectories) | | SWE-Lego | Python | 3251 | 14.6k | [![GitHub](https://img.shields.io/badge/GitHub-repo-24292F?logo=github&logoColor=white)](https://github.com/SWE-Lego/SWE-Lego) | +| Multi-SWE-RL | Python | - | 4,723 | [![arXiv](https://img.shields.io/badge/arXiv-paper-B31B1B?logo=arxiv&logoColor=white)](https://arxiv.org/abs/2504.02605) | --- From 3bf91d561fde31f0aec5fef36eb486b8f138b057 Mon Sep 17 00:00:00 2001 From: OpenClaw Agent <> Date: Mon, 27 Apr 2026 14:33:13 +0800 Subject: [PATCH 07/12] fix: repair broken table rows, restore missing SWE-Lego and OpenSWE --- README.md | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 621ac22..30acafb 100644 --- a/README.md +++ b/README.md @@ -405,9 +405,10 @@ _A comprehensive survey and statistical overview of issue resolution datasets. W |---|---|---|---|---|---|---| | **Single-PL Datasets** | | | | | | | | SWE-Fixer | Python | No | 856 | 115,406 | No | [![GitHub](https://img.shields.io/badge/GitHub-repo-24292F?logo=github&logoColor=white)](https://github.com/InternLM/SWE-Fixer) [![HuggingFace](https://img.shields.io/badge/HuggingFace-dataset-ff7e21?logo=huggingface&logoColor=white)](https://huggingface.co/datasets/internlm/SWE-Fixer-Train-110K) [![HuggingFace](https://img.shields.io/badge/HuggingFace-dataset-ff7e21?logo=huggingface&logoColor=white)](https://huggingface.co/datasets/internlm/SWE-Fixer-Eval) | - [![GitHub](https://img.shields.io/badge/GitHub-repo-24292F?logo=github&logoColor=white)](https://github.com/SWE-Lego/SWE-Lego) [![HuggingFace](https://img.shields.io/badge/HuggingFace-dataset-ff7e21?logo=huggingface&logoColor=white)](https://huggingface.co/SWE-Lego/datasets) | +| SWE-Lego | Python | No | 3,251 | 32,119 | Yes | [![GitHub](https://img.shields.io/badge/GitHub-repo-24292F?logo=github&logoColor=white)](https://github.com/SWE-Lego/SWE-Lego) [![HuggingFace](https://img.shields.io/badge/HuggingFace-dataset-ff7e21?logo=huggingface&logoColor=white)](https://huggingface.co/datasets/SWE-Lego/datasets) | | SWE-rebench | Python | No | 3,468 | 21,336 | Yes | [![GitHub](https://img.shields.io/badge/GitHub-repo-24292F?logo=github&logoColor=white)](https://github.com/SWE-rebench/SWE-bench-fork) [![HuggingFace](https://img.shields.io/badge/HuggingFace-dataset-ff7e21?logo=huggingface&logoColor=white)](https://huggingface.co/datasets/nebius/SWE-rebench) | | **SWE-rebench V2** | Python, JS, TS, Go, Rust, C, C++, Java, etc. (20 PL) | No | 3,600+ | 32,000+ | Yes | [![arXiv](https://img.shields.io/badge/arXiv-paper-B31B1B?logo=arxiv&logoColor=white)](https://arxiv.org/abs/2602.23866) | +| OpenSWE | Python | No | 12,800 | 45,320 | Yes | [![GitHub](https://img.shields.io/badge/GitHub-repo-24292F?logo=github&logoColor=white)](https://github.com/GAIR-NLP/OpenSWE) [![HuggingFace](https://img.shields.io/badge/HuggingFace-dataset-ff7e21?logo=huggingface&logoColor=white)](https://huggingface.co/datasets/GAIR/OpenSWE) | | SWE-Universe | Python | No | - | 807,693 | Yes | [![arXiv](https://img.shields.io/badge/arXiv-paper-B31B1B?logo=arxiv&logoColor=white)](https://arxiv.org/abs/2602.02361) | | Scale-SWE | Python | No | 5,200 | 100,000 | Yes | [![GitHub](https://img.shields.io/badge/GitHub-repo-24292F?logo=github&logoColor=white)](https://github.com/AweAI-Team/ScaleSWE) [![HuggingFace](https://img.shields.io/badge/HuggingFace-dataset-ff7e21?logo=huggingface&logoColor=white)](https://huggingface.co/collections/AweAI-Team/scale-swe) | | BeyondSWE | Python | No | - | 500 | Yes | [![arXiv](https://img.shields.io/badge/arXiv-paper-B31B1B?logo=arxiv&logoColor=white)](https://arxiv.org/abs/2603.03194) [![HuggingFace](https://img.shields.io/badge/HuggingFace-dataset-ff7e21?logo=huggingface&logoColor=white)](https://huggingface.co/datasets/AweAI-Team/BeyondSWE) | @@ -424,7 +425,7 @@ _A comprehensive survey and statistical overview of issue resolution datasets. W | SWE-Gym | Python | No | 11 | 2,438 | Yes | [![GitHub](https://img.shields.io/badge/GitHub-repo-24292F?logo=github&logoColor=white)](https://github.com/SWE-Gym/SWE-Gym) [![HuggingFace](https://img.shields.io/badge/HuggingFace-dataset-ff7e21?logo=huggingface&logoColor=white)](https://huggingface.co/SWE-Gym/datasets) | | SWE-bench | Python | No | 12 | 2,294 | Yes | [![GitHub](https://img.shields.io/badge/GitHub-repo-24292F?logo=github&logoColor=white)](https://github.com/SWE-bench/SWE-bench) [![HuggingFace](https://img.shields.io/badge/HuggingFace-dataset-ff7e21?logo=huggingface&logoColor=white)](https://huggingface.co/datasets/princeton-nlp/SWE-bench) | | SWE-bench-java | Java | No | 19 | 1,797 | Yes | [![GitHub](https://img.shields.io/badge/GitHub-repo-24292F?logo=github&logoColor=white)](https://github.com/multi-swe-bench/multi-swe-bench-env) [![HuggingFace](https://img.shields.io/badge/HuggingFace-dataset-ff7e21?logo=huggingface&logoColor=white)](https://huggingface.co/datasets/Daoguang/Multi-SWE-bench) | -| SWE-smith | Python | No | 128 | 50k | Yes | [![arXiv](https://img.shields.io/badge/arXiv-paper-B31B1B?logo=arxiv&logoColor=white)](https://arxiv.org/abs/2504.21798v2) | +| SWE-smith | Python | No | 128 | 50k | Yes | [![GitHub](https://img.shields.io/badge/GitHub-repo-24292F?logo=github&logoColor=white)](https://github.com/SWE-bench/SWE-smith) [![HuggingFace](https://img.shields.io/badge/HuggingFace-dataset-ff7e21?logo=huggingface&logoColor=white)](https://huggingface.co/datasets/SWE-bench/SWE-smith) | | Rust-SWE-bench | Rust | No | 34 | 500 | Yes | [![arXiv](https://img.shields.io/badge/arXiv-paper-B31B1B?logo=arxiv&logoColor=white)](https://arxiv.org/abs/2602.22764) [![GitHub](https://img.shields.io/badge/GitHub-repo-24292F?logo=github&logoColor=white)](https://github.com/GhabiX/Rust-SWE-Bench) | | FEA-bench | Python | No | 83 | 1,401 | Yes | [![GitHub](https://img.shields.io/badge/GitHub-repo-24292F?logo=github&logoColor=white)](https://github.com/microsoft/FEA-Bench/) [![HuggingFace](https://img.shields.io/badge/HuggingFace-dataset-ff7e21?logo=huggingface&logoColor=white)](https://huggingface.co/datasets/microsoft/FEA-Bench) | | SWE-bench-Live | Python | No | 164 | 1,565 | Yes | [![GitHub](https://img.shields.io/badge/GitHub-repo-24292F?logo=github&logoColor=white)](https://github.com/microsoft/SWE-bench-Live) [![HuggingFace](https://img.shields.io/badge/HuggingFace-dataset-ff7e21?logo=huggingface&logoColor=white)](https://huggingface.co/datasets/SWE-bench-Live/SWE-bench-Live) | From 3989d791bcf95158f15a3549c08e45e01689e858 Mon Sep 17 00:00:00 2001 From: OpenClaw Agent <> Date: Mon, 27 Apr 2026 15:06:02 +0800 Subject: [PATCH 08/12] feat: add missing datasets from paper list to evaluation and trajectory tables --- README.md | 36 ++++++++++++++++++------------------ 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/README.md b/README.md index 30acafb..82dc18a 100644 --- a/README.md +++ b/README.md @@ -405,28 +405,25 @@ _A comprehensive survey and statistical overview of issue resolution datasets. W |---|---|---|---|---|---|---| | **Single-PL Datasets** | | | | | | | | SWE-Fixer | Python | No | 856 | 115,406 | No | [![GitHub](https://img.shields.io/badge/GitHub-repo-24292F?logo=github&logoColor=white)](https://github.com/InternLM/SWE-Fixer) [![HuggingFace](https://img.shields.io/badge/HuggingFace-dataset-ff7e21?logo=huggingface&logoColor=white)](https://huggingface.co/datasets/internlm/SWE-Fixer-Train-110K) [![HuggingFace](https://img.shields.io/badge/HuggingFace-dataset-ff7e21?logo=huggingface&logoColor=white)](https://huggingface.co/datasets/internlm/SWE-Fixer-Eval) | -| SWE-Lego | Python | No | 3,251 | 32,119 | Yes | [![GitHub](https://img.shields.io/badge/GitHub-repo-24292F?logo=github&logoColor=white)](https://github.com/SWE-Lego/SWE-Lego) [![HuggingFace](https://img.shields.io/badge/HuggingFace-dataset-ff7e21?logo=huggingface&logoColor=white)](https://huggingface.co/datasets/SWE-Lego/datasets) | +| SWE-smith | Python | No | 128 | 50k | Yes | [![GitHub](https://img.shields.io/badge/GitHub-repo-24292F?logo=github&logoColor=white)](https://github.com/SWE-bench/SWE-smith) [![HuggingFace](https://img.shields.io/badge/HuggingFace-dataset-ff7e21?logo=huggingface&logoColor=white)](https://huggingface.co/datasets/SWE-bench/SWE-smith) | +| SWE-Lego | Python | No | 3,251 | 32,119 | Yes | [![GitHub](https://img.shields.io/badge/GitHub-repo-24292F?logo=github&logoColor=white)](https://github.com/SWE-Lego/SWE-Lego) [![HuggingFace](https://img.shields.io/badge/HuggingFace-dataset-ff7e21?logo=huggingface&logoColor=white)](https://huggingface.co/SWE-Lego/datasets) | | SWE-rebench | Python | No | 3,468 | 21,336 | Yes | [![GitHub](https://img.shields.io/badge/GitHub-repo-24292F?logo=github&logoColor=white)](https://github.com/SWE-rebench/SWE-bench-fork) [![HuggingFace](https://img.shields.io/badge/HuggingFace-dataset-ff7e21?logo=huggingface&logoColor=white)](https://huggingface.co/datasets/nebius/SWE-rebench) | | **SWE-rebench V2** | Python, JS, TS, Go, Rust, C, C++, Java, etc. (20 PL) | No | 3,600+ | 32,000+ | Yes | [![arXiv](https://img.shields.io/badge/arXiv-paper-B31B1B?logo=arxiv&logoColor=white)](https://arxiv.org/abs/2602.23866) | | OpenSWE | Python | No | 12,800 | 45,320 | Yes | [![GitHub](https://img.shields.io/badge/GitHub-repo-24292F?logo=github&logoColor=white)](https://github.com/GAIR-NLP/OpenSWE) [![HuggingFace](https://img.shields.io/badge/HuggingFace-dataset-ff7e21?logo=huggingface&logoColor=white)](https://huggingface.co/datasets/GAIR/OpenSWE) | -| SWE-Universe | Python | No | - | 807,693 | Yes | [![arXiv](https://img.shields.io/badge/arXiv-paper-B31B1B?logo=arxiv&logoColor=white)](https://arxiv.org/abs/2602.02361) | -| Scale-SWE | Python | No | 5,200 | 100,000 | Yes | [![GitHub](https://img.shields.io/badge/GitHub-repo-24292F?logo=github&logoColor=white)](https://github.com/AweAI-Team/ScaleSWE) [![HuggingFace](https://img.shields.io/badge/HuggingFace-dataset-ff7e21?logo=huggingface&logoColor=white)](https://huggingface.co/collections/AweAI-Team/scale-swe) | -| BeyondSWE | Python | No | - | 500 | Yes | [![arXiv](https://img.shields.io/badge/arXiv-paper-B31B1B?logo=arxiv&logoColor=white)](https://arxiv.org/abs/2603.03194) [![HuggingFace](https://img.shields.io/badge/HuggingFace-dataset-ff7e21?logo=huggingface&logoColor=white)](https://huggingface.co/datasets/AweAI-Team/BeyondSWE) | -| SWE-CI | Python | No | - | 100 | Yes | [![arXiv](https://img.shields.io/badge/arXiv-paper-B31B1B?logo=arxiv&logoColor=white)](https://arxiv.org/abs/2603.03823) [![HuggingFace](https://img.shields.io/badge/HuggingFace-dataset-ff7e21?logo=huggingface&logoColor=white)](https://huggingface.co/datasets/skylenage/SWE-CI) | -| SWE-Skills-Bench | Python | No | - | 565 | Yes | [![arXiv](https://img.shields.io/badge/arXiv-paper-B31B1B?logo=arxiv&logoColor=white)](https://arxiv.org/abs/2603.15401) [![GitHub](https://img.shields.io/badge/GitHub-repo-24292F?logo=github&logoColor=white)](https://github.com/GeniusHTX/SWE-Skills-Bench) | -| daVinci-Dev | Python | No | - | - | Yes | [![GitHub](https://img.shields.io/badge/GitHub-repo-24292F?logo=github&logoColor=white)](https://github.com/GAIR-NLP/daVinci-Dev) [![HuggingFace](https://img.shields.io/badge/HuggingFace-dataset-ff7e21?logo=huggingface&logoColor=white)](https://huggingface.co/datasets/GAIR/daVinci-Dev) | -| SWE-Next | Python | No | 3,971 | 2,308 | Yes | [![arXiv](https://img.shields.io/badge/arXiv-paper-B31B1B?logo=arxiv&logoColor=white)](https://arxiv.org/abs/2603.20691) | | SWE-bench-train | Python | No | 37 | 19k | No | [![GitHub](https://img.shields.io/badge/GitHub-repo-24292F?logo=github&logoColor=white)](https://github.com/SWE-bench/SWE-bench) [![HuggingFace](https://img.shields.io/badge/HuggingFace-dataset-ff7e21?logo=huggingface&logoColor=white)](https://huggingface.co/datasets/princeton-nlp/SWE-bench/viewer/default/train) | | SWE-Flow | Python | No | 74 | 18,081 | Yes | [![GitHub](https://img.shields.io/badge/GitHub-repo-24292F?logo=github&logoColor=white)](https://github.com/Hambaobao/SWE-Flow) | | Skywork-SWE | Python | No | 2,531 | 10,169 | Yes | - | | R2E-Gym | Python | No | 10 | 8,135 | Yes | [![GitHub](https://img.shields.io/badge/GitHub-repo-24292F?logo=github&logoColor=white)](https://github.com/R2E-Gym/R2E-Gym) [![HuggingFace](https://img.shields.io/badge/HuggingFace-dataset-ff7e21?logo=huggingface&logoColor=white)](https://huggingface.co/R2E-Gym/datasets) | | RepoForge | Python | No | - | 7.3k | Yes | - | -| SWE-bench-extra | Python | No | 1,988 | 6,376 | Yes | [![HuggingFace](https://img.shields.io/badge/HuggingFace-dataset-ff7e21?logo=huggingface&logoColor=white)](https://huggingface.co/datasets/nebius/SWE-bench-extra) | +| SWE-bench-extra | Python | No | 2k | 6.38k | Yes | [![HuggingFace](https://img.shields.io/badge/HuggingFace-dataset-ff7e21?logo=huggingface&logoColor=white)](https://huggingface.co/datasets/nebius/SWE-bench-extra) | | SWE-Gym | Python | No | 11 | 2,438 | Yes | [![GitHub](https://img.shields.io/badge/GitHub-repo-24292F?logo=github&logoColor=white)](https://github.com/SWE-Gym/SWE-Gym) [![HuggingFace](https://img.shields.io/badge/HuggingFace-dataset-ff7e21?logo=huggingface&logoColor=white)](https://huggingface.co/SWE-Gym/datasets) | | SWE-bench | Python | No | 12 | 2,294 | Yes | [![GitHub](https://img.shields.io/badge/GitHub-repo-24292F?logo=github&logoColor=white)](https://github.com/SWE-bench/SWE-bench) [![HuggingFace](https://img.shields.io/badge/HuggingFace-dataset-ff7e21?logo=huggingface&logoColor=white)](https://huggingface.co/datasets/princeton-nlp/SWE-bench) | +| BeyondSWE | Python | No | - | 500 | Yes | [![arXiv](https://img.shields.io/badge/arXiv-paper-B31B1B?logo=arxiv&logoColor=white)](https://arxiv.org/abs/2603.03194) [![HuggingFace](https://img.shields.io/badge/HuggingFace-dataset-ff7e21?logo=huggingface&logoColor=white)](https://huggingface.co/datasets/AweAI-Team/BeyondSWE) | +| SWE-CI | Python | No | - | 100 | Yes | [![arXiv](https://img.shields.io/badge/arXiv-paper-B31B1B?logo=arxiv&logoColor=white)](https://arxiv.org/abs/2603.03823) [![HuggingFace](https://img.shields.io/badge/HuggingFace-dataset-ff7e21?logo=huggingface&logoColor=white)](https://huggingface.co/datasets/skylenage/SWE-CI) | +| SWE-Skills-Bench | Python | No | - | 565 | Yes | [![arXiv](https://img.shields.io/badge/arXiv-paper-B31B1B?logo=arxiv&logoColor=white)](https://arxiv.org/abs/2603.15401) [![GitHub](https://img.shields.io/badge/GitHub-repo-24292F?logo=github&logoColor=white)](https://github.com/GeniusHTX/SWE-Skills-Bench) | +| daVinci-Dev | Python | No | - | - | Yes | [![GitHub](https://img.shields.io/badge/GitHub-repo-24292F?logo=github&logoColor=white)](https://github.com/GAIR-NLP/daVinci-Dev) [![HuggingFace](https://img.shields.io/badge/HuggingFace-dataset-ff7e21?logo=huggingface&logoColor=white)](https://huggingface.co/datasets/GAIR/daVinci-Dev) | +| SWE-Next | Python | No | 3,971 | 2,308 | Yes | [![arXiv](https://img.shields.io/badge/arXiv-paper-B31B1B?logo=arxiv&logoColor=white)](https://arxiv.org/abs/2603.20691) | | SWE-bench-java | Java | No | 19 | 1,797 | Yes | [![GitHub](https://img.shields.io/badge/GitHub-repo-24292F?logo=github&logoColor=white)](https://github.com/multi-swe-bench/multi-swe-bench-env) [![HuggingFace](https://img.shields.io/badge/HuggingFace-dataset-ff7e21?logo=huggingface&logoColor=white)](https://huggingface.co/datasets/Daoguang/Multi-SWE-bench) | -| SWE-smith | Python | No | 128 | 50k | Yes | [![GitHub](https://img.shields.io/badge/GitHub-repo-24292F?logo=github&logoColor=white)](https://github.com/SWE-bench/SWE-smith) [![HuggingFace](https://img.shields.io/badge/HuggingFace-dataset-ff7e21?logo=huggingface&logoColor=white)](https://huggingface.co/datasets/SWE-bench/SWE-smith) | -| Rust-SWE-bench | Rust | No | 34 | 500 | Yes | [![arXiv](https://img.shields.io/badge/arXiv-paper-B31B1B?logo=arxiv&logoColor=white)](https://arxiv.org/abs/2602.22764) [![GitHub](https://img.shields.io/badge/GitHub-repo-24292F?logo=github&logoColor=white)](https://github.com/GhabiX/Rust-SWE-Bench) | | FEA-bench | Python | No | 83 | 1,401 | Yes | [![GitHub](https://img.shields.io/badge/GitHub-repo-24292F?logo=github&logoColor=white)](https://github.com/microsoft/FEA-Bench/) [![HuggingFace](https://img.shields.io/badge/HuggingFace-dataset-ff7e21?logo=huggingface&logoColor=white)](https://huggingface.co/datasets/microsoft/FEA-Bench) | | SWE-bench-Live | Python | No | 164 | 1,565 | Yes | [![GitHub](https://img.shields.io/badge/GitHub-repo-24292F?logo=github&logoColor=white)](https://github.com/microsoft/SWE-bench-Live) [![HuggingFace](https://img.shields.io/badge/HuggingFace-dataset-ff7e21?logo=huggingface&logoColor=white)](https://huggingface.co/datasets/SWE-bench-Live/SWE-bench-Live) | | Loc-Bench | Python | No | - | 560 | No | [![GitHub](https://img.shields.io/badge/GitHub-repo-24292F?logo=github&logoColor=white)](https://github.com/gersteinlab/LocAgent) [![HuggingFace](https://img.shields.io/badge/HuggingFace-dataset-ff7e21?logo=huggingface&logoColor=white)](https://huggingface.co/datasets/czlll/Loc-Bench_V1) | @@ -435,28 +432,31 @@ _A comprehensive survey and statistical overview of issue resolution datasets. W | SWE-MERA | Python | No | 200 | 300 | Yes | [![GitHub](https://img.shields.io/badge/GitHub-repo-24292F?logo=github&logoColor=white)](https://github.com/MERA-Evaluation/SWE-MERA-submissions) [![HuggingFace](https://img.shields.io/badge/HuggingFace-dataset-ff7e21?logo=huggingface&logoColor=white)](https://huggingface.co/datasets/MERA-evaluation/SWE-MERA) | | SWE-Bench-CL | Python | No | 8 | 273 | Yes | [![GitHub](https://img.shields.io/badge/GitHub-repo-24292F?logo=github&logoColor=white)](https://github.com/thomasjoshi/agents-never-forget) | | SWE-Sharp-Bench | C# | No | 17 | 150 | Yes | [![GitHub](https://img.shields.io/badge/GitHub-repo-24292F?logo=github&logoColor=white)](https://github.com/microsoft/prose/tree/main/misc/SWE-Sharp-Bench) [![HuggingFace](https://img.shields.io/badge/HuggingFace-dataset-ff7e21?logo=huggingface&logoColor=white)](https://huggingface.co/datasets/microsoft/SWE-Sharp-Bench) | +| Rust-SWE-bench | Rust | No | 34 | 500 | Yes | [![arXiv](https://img.shields.io/badge/arXiv-paper-B31B1B?logo=arxiv&logoColor=white)](https://arxiv.org/abs/2602.22764) [![GitHub](https://img.shields.io/badge/GitHub-repo-24292F?logo=github&logoColor=white)](https://github.com/GhabiX/Rust-SWE-Bench) | | SWE-Perf | Python | No | 12 | 140 | Yes | [![GitHub](https://img.shields.io/badge/GitHub-repo-24292F?logo=github&logoColor=white)](https://github.com/SWE-Perf/swe-perf) [![HuggingFace](https://img.shields.io/badge/HuggingFace-dataset-ff7e21?logo=huggingface&logoColor=white)](https://huggingface.co/datasets/SWE-Perf/SWE-Perf) | +| Scale-SWE | Python | No | 5,200 | 100,000 | Yes | [![GitHub](https://img.shields.io/badge/GitHub-repo-24292F?logo=github&logoColor=white)](https://github.com/AweAI-Team/ScaleSWE) [![HuggingFace](https://img.shields.io/badge/HuggingFace-dataset-ff7e21?logo=huggingface&logoColor=white)](https://huggingface.co/collections/AweAI-Team/scale-swe) | +| SWE-Universe | Python | No | - | 807,693 | Yes | [![arXiv](https://img.shields.io/badge/arXiv-paper-B31B1B?logo=arxiv&logoColor=white)](https://arxiv.org/abs/2602.02361) | | Visual SWE-bench | Python | Yes | 11 | 133 | Yes | [![GitHub](https://img.shields.io/badge/GitHub-repo-24292F?logo=github&logoColor=white)](https://github.com/luolin101/CodeV) [![HuggingFace](https://img.shields.io/badge/HuggingFace-dataset-ff7e21?logo=huggingface&logoColor=white)](https://huggingface.co/datasets/luolin101/Visual-SWE-bench) | | SWE-EVO | Python | No | 7 | 48 | Yes | [![GitHub](https://img.shields.io/badge/GitHub-repo-24292F?logo=github&logoColor=white)](https://github.com/bdqnghi/SWE-EVO) | | **Multi-PL Datasets** | | | | | | | -| SWE-Mirror | Python, Rust, Go | No | 40 | 60,671 | Yes | [![arXiv](https://img.shields.io/badge/arXiv-paper-B31B1B?logo=arxiv&logoColor=white)](https://arxiv.org/abs/2509.08724) | -| Multi-SWE-bench | Java, JS, TS, Go, Rust, C, C++ | No | - | 1,632 | Yes | [![GitHub](https://img.shields.io/badge/GitHub-repo-24292F?logo=github&logoColor=white)](https://github.com/multi-swe-bench/multi-swe-bench) [![HuggingFace](https://img.shields.io/badge/HuggingFace-dataset-ff7e21?logo=huggingface&logoColor=white)](https://huggingface.co/datasets/ByteDance-Seed/Multi-SWE-bench) | +| SWE-Mirror | Python, Rust, Go | No | 40 | 60k | Yes | - | +| Multi-SWE-bench | Java, JS, TS, Go, Rust, C, C++ | No | 76 | 4,723 | Yes | [![GitHub](https://img.shields.io/badge/GitHub-repo-24292F?logo=github&logoColor=white)](https://github.com/multi-swe-bench/multi-swe-bench) [![HuggingFace](https://img.shields.io/badge/HuggingFace-dataset-ff7e21?logo=huggingface&logoColor=white)](https://huggingface.co/datasets/ByteDance-Seed/Multi-SWE-bench) | | Swing-Bench | Python, Go, C++, Rust | No | 400 | 2300 | Yes | - | -| Multi-Docker-Eval | Python, JS, TS, Go, Java, C, C++, Ruby, Kotlin, HTML | No | 40 | - | Yes | [![arXiv](https://img.shields.io/badge/arXiv-paper-B31B1B?logo=arxiv&logoColor=white)](https://arxiv.org/abs/2512.06915) | | SWE-PolyBench | Python, Java, JS, TS | No | 21 | 2,110 | Yes | [![GitHub](https://img.shields.io/badge/GitHub-repo-24292F?logo=github&logoColor=white)](https://github.com/amazon-science/SWE-PolyBench) [![HuggingFace](https://img.shields.io/badge/HuggingFace-dataset-ff7e21?logo=huggingface&logoColor=white)](https://huggingface.co/datasets/AmazonScience/SWE-PolyBench) [![HuggingFace](https://img.shields.io/badge/HuggingFace-dataset-ff7e21?logo=huggingface&logoColor=white)](https://huggingface.co/datasets/Sellopale/SWE-PolyBench_500) | | SWE-Compass | Python, JS, TS, Java, C, C++, Go, Rust, Kotlin, C# | No | - | 2,000 | Yes | [![GitHub](https://img.shields.io/badge/GitHub-repo-24292F?logo=github&logoColor=white)](https://github.com/kwaipilot/SWE-Compass/) [![HuggingFace](https://img.shields.io/badge/HuggingFace-dataset-ff7e21?logo=huggingface&logoColor=white)](https://huggingface.co/datasets/Kwaipilot/SWE-Compass) | | SWE-Bench Pro | Python, Go, TS | No | 41 | 1,865 | Yes | [![GitHub](https://img.shields.io/badge/GitHub-repo-24292F?logo=github&logoColor=white)](https://github.com/scaleapi/SWE-bench_Pro-os) [![HuggingFace](https://img.shields.io/badge/HuggingFace-dataset-ff7e21?logo=huggingface&logoColor=white)](https://huggingface.co/datasets/ScaleAI/SWE-bench_Pro) | | SWE-bench++ | Python, Go, TS, JS, Ruby, PHP, Java, Rust, C++, C#, C | No | 3,971 | 1,782 | Yes | [![GitHub](https://img.shields.io/badge/GitHub-repo-24292F?logo=github&logoColor=white)](https://github.com/TuringEnterprises/SWE-Bench-plus-plus) [![HuggingFace](https://img.shields.io/badge/HuggingFace-dataset-ff7e21?logo=huggingface&logoColor=white)](https://huggingface.co/datasets/TuringEnterprises/SWE-Bench-plus-plus) | | SWE-Lancer | JS, TS | No | - | 1,488 | Yes | [![GitHub](https://img.shields.io/badge/GitHub-repo-24292F?logo=github&logoColor=white)](https://github.com/openai/frontier-evals) | | OmniGIRL | Python, TS, Java, JS | Yes | 15 | 959 | Yes | [![GitHub](https://img.shields.io/badge/GitHub-repo-24292F?logo=github&logoColor=white)](https://github.com/deepsoftwareanalytics/omnigirl) [![HuggingFace](https://img.shields.io/badge/HuggingFace-dataset-ff7e21?logo=huggingface&logoColor=white)](https://huggingface.co/datasets/Deep-Software-Analytics/OmniGIRL) | -| SWE-bench Multimodal | JS, TS, HTML, CSS | Yes | 17 | 617 | Yes | [![GitHub](https://img.shields.io/badge/GitHub-repo-24292F?logo=github&logoColor=white)](https://github.com/SWE-bench/SWE-bench) [![HuggingFace](https://img.shields.io/badge/HuggingFace-dataset-ff7e21?logo=huggingface&logoColor=white)](https://huggingface.co/datasets/SWE-bench/SWE-bench_Multimodal) | +| SWE-bench Multimodal | JS, TS, HTML, CSS | Yes | 17 | 619 | Yes | [![GitHub](https://img.shields.io/badge/GitHub-repo-24292F?logo=github&logoColor=white)](https://github.com/SWE-bench/SWE-bench) [![HuggingFace](https://img.shields.io/badge/HuggingFace-dataset-ff7e21?logo=huggingface&logoColor=white)](https://huggingface.co/datasets/SWE-bench/SWE-bench_Multimodal) | +| MobileDev-Bench | Java, Kotlin, TS, Dart | Yes | 18 | 384 | Yes | [![arXiv](https://img.shields.io/badge/arXiv-paper-B31B1B?logo=arxiv&logoColor=white)](https://arxiv.org/abs/2603.24946) | +| SWE-Bench Mobile | Swift, Objective-C | Yes | - | - | Yes | [![arXiv](https://img.shields.io/badge/arXiv-paper-B31B1B?logo=arxiv&logoColor=white)](https://arxiv.org/abs/2602.09540) | | SWE-fficiency | Python, Cython | No | 9 | 498 | Yes | [![GitHub](https://img.shields.io/badge/GitHub-repo-24292F?logo=github&logoColor=white)](https://github.com/swefficiency/swefficiency-site) | | SWE-Factory | Python, Java, JS, TS | No | 12 | 430 | Yes | [![GitHub](https://img.shields.io/badge/GitHub-repo-24292F?logo=github&logoColor=white)](https://github.com/DeepSoftwareAnalytics/swe-factory) [![HuggingFace](https://img.shields.io/badge/HuggingFace-dataset-ff7e21?logo=huggingface&logoColor=white)](https://huggingface.co/SWE-Factory) | | SWE-bench-Live-MultiLang \& Windows | Python, JS, TS, C, C++, C#, Java, Go, Rust | No | 238 | 418 | Yes | [![GitHub](https://img.shields.io/badge/GitHub-repo-24292F?logo=github&logoColor=white)](https://github.com/microsoft/SWE-bench-Live) [![HuggingFace](https://img.shields.io/badge/HuggingFace-dataset-ff7e21?logo=huggingface&logoColor=white)](https://huggingface.co/datasets/SWE-bench-Live/MultiLang) [![HuggingFace](https://img.shields.io/badge/HuggingFace-dataset-ff7e21?logo=huggingface&logoColor=white)](https://huggingface.co/datasets/SWE-bench-Live/Windows) | | SWE-bench Multilingual | C, C++, Go, Java, JS, TS, Rust, Python, Ruby, PHP | No | 42 | 300 | Yes | [![GitHub](https://img.shields.io/badge/GitHub-repo-24292F?logo=github&logoColor=white)](https://github.com/SWE-bench/SWE-bench) [![HuggingFace](https://img.shields.io/badge/HuggingFace-dataset-ff7e21?logo=huggingface&logoColor=white)](https://huggingface.co/datasets/SWE-bench/SWE-bench_Multilingual) | -| MobileDev-Bench | Java, Kotlin, TS, Dart | Yes | 18 | 384 | Yes | [![arXiv](https://img.shields.io/badge/arXiv-paper-B31B1B?logo=arxiv&logoColor=white)](https://arxiv.org/abs/2603.24946) | -| SWE-Bench Mobile | Swift, Objective-C | Yes | - | - | Yes | [![arXiv](https://img.shields.io/badge/arXiv-paper-B31B1B?logo=arxiv&logoColor=white)](https://arxiv.org/abs/2602.09540) | | SWE-InfraBench | Python, TS | No | - | 100 | Yes | - | +| Multi-Docker-Eval | Python, JS, TS, Go, Java, C, C++, Ruby, Kotlin, HTML | No | 40 | - | Yes | [![arXiv](https://img.shields.io/badge/arXiv-paper-B31B1B?logo=arxiv&logoColor=white)](https://arxiv.org/abs/2512.06915) | --- @@ -473,7 +473,7 @@ _A survey of trajectory datasets used for agent training or analysis. We list th | SWE-Factory | Python | 10 | 2,809 | [![GitHub](https://img.shields.io/badge/GitHub-repo-24292F?logo=github&logoColor=white)](https://github.com/DeepSoftwareAnalytics/swe-factory) [![HuggingFace](https://img.shields.io/badge/HuggingFace-dataset-ff7e21?logo=huggingface&logoColor=white)](https://huggingface.co/datasets/SWE-Factory/DeepSWE-Agent-Kimi-K2-Trajectories-2.8K) | | SWE-Gym | Python | 11 | 491 | [![GitHub](https://img.shields.io/badge/GitHub-repo-24292F?logo=github&logoColor=white)](https://github.com/SWE-Gym/SWE-Gym) [![HuggingFace](https://img.shields.io/badge/HuggingFace-dataset-ff7e21?logo=huggingface&logoColor=white)](https://huggingface.co/datasets/SWE-Gym/OpenHands-SFT-Trajectories) | | SWE-Lego | Python | 3251 | 14.6k | [![GitHub](https://img.shields.io/badge/GitHub-repo-24292F?logo=github&logoColor=white)](https://github.com/SWE-Lego/SWE-Lego) | -| Multi-SWE-RL | Python | - | 4,723 | [![arXiv](https://img.shields.io/badge/arXiv-paper-B31B1B?logo=arxiv&logoColor=white)](https://arxiv.org/abs/2504.02605) | +| Multi-SWE-RL | Python | - | 4,723 | [![arXiv](https://img.shields.io/badge/arXiv-paper-B31B1B?logo=arxiv&logoColor=white)](https://arxiv.org/abs/2504.02605v1) | --- From 87f284b6cb3ccfaf4df48d922cd88a16c1b72222 Mon Sep 17 00:00:00 2001 From: OpenClaw Agent <> Date: Mon, 27 Apr 2026 15:11:20 +0800 Subject: [PATCH 09/12] fix: remove arXiv links from table Link columns, keep only code/data/model links --- README.md | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/README.md b/README.md index 82dc18a..44181e4 100644 --- a/README.md +++ b/README.md @@ -408,7 +408,7 @@ _A comprehensive survey and statistical overview of issue resolution datasets. W | SWE-smith | Python | No | 128 | 50k | Yes | [![GitHub](https://img.shields.io/badge/GitHub-repo-24292F?logo=github&logoColor=white)](https://github.com/SWE-bench/SWE-smith) [![HuggingFace](https://img.shields.io/badge/HuggingFace-dataset-ff7e21?logo=huggingface&logoColor=white)](https://huggingface.co/datasets/SWE-bench/SWE-smith) | | SWE-Lego | Python | No | 3,251 | 32,119 | Yes | [![GitHub](https://img.shields.io/badge/GitHub-repo-24292F?logo=github&logoColor=white)](https://github.com/SWE-Lego/SWE-Lego) [![HuggingFace](https://img.shields.io/badge/HuggingFace-dataset-ff7e21?logo=huggingface&logoColor=white)](https://huggingface.co/SWE-Lego/datasets) | | SWE-rebench | Python | No | 3,468 | 21,336 | Yes | [![GitHub](https://img.shields.io/badge/GitHub-repo-24292F?logo=github&logoColor=white)](https://github.com/SWE-rebench/SWE-bench-fork) [![HuggingFace](https://img.shields.io/badge/HuggingFace-dataset-ff7e21?logo=huggingface&logoColor=white)](https://huggingface.co/datasets/nebius/SWE-rebench) | -| **SWE-rebench V2** | Python, JS, TS, Go, Rust, C, C++, Java, etc. (20 PL) | No | 3,600+ | 32,000+ | Yes | [![arXiv](https://img.shields.io/badge/arXiv-paper-B31B1B?logo=arxiv&logoColor=white)](https://arxiv.org/abs/2602.23866) | +| **SWE-rebench V2** | Python, JS, TS, Go, Rust, C, C++, Java, etc. (20 PL) | No | 3,600+ | 32,000+ | Yes | - | | OpenSWE | Python | No | 12,800 | 45,320 | Yes | [![GitHub](https://img.shields.io/badge/GitHub-repo-24292F?logo=github&logoColor=white)](https://github.com/GAIR-NLP/OpenSWE) [![HuggingFace](https://img.shields.io/badge/HuggingFace-dataset-ff7e21?logo=huggingface&logoColor=white)](https://huggingface.co/datasets/GAIR/OpenSWE) | | SWE-bench-train | Python | No | 37 | 19k | No | [![GitHub](https://img.shields.io/badge/GitHub-repo-24292F?logo=github&logoColor=white)](https://github.com/SWE-bench/SWE-bench) [![HuggingFace](https://img.shields.io/badge/HuggingFace-dataset-ff7e21?logo=huggingface&logoColor=white)](https://huggingface.co/datasets/princeton-nlp/SWE-bench/viewer/default/train) | | SWE-Flow | Python | No | 74 | 18,081 | Yes | [![GitHub](https://img.shields.io/badge/GitHub-repo-24292F?logo=github&logoColor=white)](https://github.com/Hambaobao/SWE-Flow) | @@ -418,11 +418,11 @@ _A comprehensive survey and statistical overview of issue resolution datasets. W | SWE-bench-extra | Python | No | 2k | 6.38k | Yes | [![HuggingFace](https://img.shields.io/badge/HuggingFace-dataset-ff7e21?logo=huggingface&logoColor=white)](https://huggingface.co/datasets/nebius/SWE-bench-extra) | | SWE-Gym | Python | No | 11 | 2,438 | Yes | [![GitHub](https://img.shields.io/badge/GitHub-repo-24292F?logo=github&logoColor=white)](https://github.com/SWE-Gym/SWE-Gym) [![HuggingFace](https://img.shields.io/badge/HuggingFace-dataset-ff7e21?logo=huggingface&logoColor=white)](https://huggingface.co/SWE-Gym/datasets) | | SWE-bench | Python | No | 12 | 2,294 | Yes | [![GitHub](https://img.shields.io/badge/GitHub-repo-24292F?logo=github&logoColor=white)](https://github.com/SWE-bench/SWE-bench) [![HuggingFace](https://img.shields.io/badge/HuggingFace-dataset-ff7e21?logo=huggingface&logoColor=white)](https://huggingface.co/datasets/princeton-nlp/SWE-bench) | -| BeyondSWE | Python | No | - | 500 | Yes | [![arXiv](https://img.shields.io/badge/arXiv-paper-B31B1B?logo=arxiv&logoColor=white)](https://arxiv.org/abs/2603.03194) [![HuggingFace](https://img.shields.io/badge/HuggingFace-dataset-ff7e21?logo=huggingface&logoColor=white)](https://huggingface.co/datasets/AweAI-Team/BeyondSWE) | -| SWE-CI | Python | No | - | 100 | Yes | [![arXiv](https://img.shields.io/badge/arXiv-paper-B31B1B?logo=arxiv&logoColor=white)](https://arxiv.org/abs/2603.03823) [![HuggingFace](https://img.shields.io/badge/HuggingFace-dataset-ff7e21?logo=huggingface&logoColor=white)](https://huggingface.co/datasets/skylenage/SWE-CI) | -| SWE-Skills-Bench | Python | No | - | 565 | Yes | [![arXiv](https://img.shields.io/badge/arXiv-paper-B31B1B?logo=arxiv&logoColor=white)](https://arxiv.org/abs/2603.15401) [![GitHub](https://img.shields.io/badge/GitHub-repo-24292F?logo=github&logoColor=white)](https://github.com/GeniusHTX/SWE-Skills-Bench) | +| BeyondSWE | Python | No | - | 500 | Yes | [![HuggingFace](https://img.shields.io/badge/HuggingFace-dataset-ff7e21?logo=huggingface&logoColor=white)](https://huggingface.co/datasets/AweAI-Team/BeyondSWE) | +| SWE-CI | Python | No | - | 100 | Yes | [![HuggingFace](https://img.shields.io/badge/HuggingFace-dataset-ff7e21?logo=huggingface&logoColor=white)](https://huggingface.co/datasets/skylenage/SWE-CI) | +| SWE-Skills-Bench | Python | No | - | 565 | Yes | [![GitHub](https://img.shields.io/badge/GitHub-repo-24292F?logo=github&logoColor=white)](https://github.com/GeniusHTX/SWE-Skills-Bench) | | daVinci-Dev | Python | No | - | - | Yes | [![GitHub](https://img.shields.io/badge/GitHub-repo-24292F?logo=github&logoColor=white)](https://github.com/GAIR-NLP/daVinci-Dev) [![HuggingFace](https://img.shields.io/badge/HuggingFace-dataset-ff7e21?logo=huggingface&logoColor=white)](https://huggingface.co/datasets/GAIR/daVinci-Dev) | -| SWE-Next | Python | No | 3,971 | 2,308 | Yes | [![arXiv](https://img.shields.io/badge/arXiv-paper-B31B1B?logo=arxiv&logoColor=white)](https://arxiv.org/abs/2603.20691) | +| SWE-Next | Python | No | 3,971 | 2,308 | Yes | - | | SWE-bench-java | Java | No | 19 | 1,797 | Yes | [![GitHub](https://img.shields.io/badge/GitHub-repo-24292F?logo=github&logoColor=white)](https://github.com/multi-swe-bench/multi-swe-bench-env) [![HuggingFace](https://img.shields.io/badge/HuggingFace-dataset-ff7e21?logo=huggingface&logoColor=white)](https://huggingface.co/datasets/Daoguang/Multi-SWE-bench) | | FEA-bench | Python | No | 83 | 1,401 | Yes | [![GitHub](https://img.shields.io/badge/GitHub-repo-24292F?logo=github&logoColor=white)](https://github.com/microsoft/FEA-Bench/) [![HuggingFace](https://img.shields.io/badge/HuggingFace-dataset-ff7e21?logo=huggingface&logoColor=white)](https://huggingface.co/datasets/microsoft/FEA-Bench) | | SWE-bench-Live | Python | No | 164 | 1,565 | Yes | [![GitHub](https://img.shields.io/badge/GitHub-repo-24292F?logo=github&logoColor=white)](https://github.com/microsoft/SWE-bench-Live) [![HuggingFace](https://img.shields.io/badge/HuggingFace-dataset-ff7e21?logo=huggingface&logoColor=white)](https://huggingface.co/datasets/SWE-bench-Live/SWE-bench-Live) | @@ -432,10 +432,10 @@ _A comprehensive survey and statistical overview of issue resolution datasets. W | SWE-MERA | Python | No | 200 | 300 | Yes | [![GitHub](https://img.shields.io/badge/GitHub-repo-24292F?logo=github&logoColor=white)](https://github.com/MERA-Evaluation/SWE-MERA-submissions) [![HuggingFace](https://img.shields.io/badge/HuggingFace-dataset-ff7e21?logo=huggingface&logoColor=white)](https://huggingface.co/datasets/MERA-evaluation/SWE-MERA) | | SWE-Bench-CL | Python | No | 8 | 273 | Yes | [![GitHub](https://img.shields.io/badge/GitHub-repo-24292F?logo=github&logoColor=white)](https://github.com/thomasjoshi/agents-never-forget) | | SWE-Sharp-Bench | C# | No | 17 | 150 | Yes | [![GitHub](https://img.shields.io/badge/GitHub-repo-24292F?logo=github&logoColor=white)](https://github.com/microsoft/prose/tree/main/misc/SWE-Sharp-Bench) [![HuggingFace](https://img.shields.io/badge/HuggingFace-dataset-ff7e21?logo=huggingface&logoColor=white)](https://huggingface.co/datasets/microsoft/SWE-Sharp-Bench) | -| Rust-SWE-bench | Rust | No | 34 | 500 | Yes | [![arXiv](https://img.shields.io/badge/arXiv-paper-B31B1B?logo=arxiv&logoColor=white)](https://arxiv.org/abs/2602.22764) [![GitHub](https://img.shields.io/badge/GitHub-repo-24292F?logo=github&logoColor=white)](https://github.com/GhabiX/Rust-SWE-Bench) | +| Rust-SWE-bench | Rust | No | 34 | 500 | Yes | [![GitHub](https://img.shields.io/badge/GitHub-repo-24292F?logo=github&logoColor=white)](https://github.com/GhabiX/Rust-SWE-Bench) | | SWE-Perf | Python | No | 12 | 140 | Yes | [![GitHub](https://img.shields.io/badge/GitHub-repo-24292F?logo=github&logoColor=white)](https://github.com/SWE-Perf/swe-perf) [![HuggingFace](https://img.shields.io/badge/HuggingFace-dataset-ff7e21?logo=huggingface&logoColor=white)](https://huggingface.co/datasets/SWE-Perf/SWE-Perf) | | Scale-SWE | Python | No | 5,200 | 100,000 | Yes | [![GitHub](https://img.shields.io/badge/GitHub-repo-24292F?logo=github&logoColor=white)](https://github.com/AweAI-Team/ScaleSWE) [![HuggingFace](https://img.shields.io/badge/HuggingFace-dataset-ff7e21?logo=huggingface&logoColor=white)](https://huggingface.co/collections/AweAI-Team/scale-swe) | -| SWE-Universe | Python | No | - | 807,693 | Yes | [![arXiv](https://img.shields.io/badge/arXiv-paper-B31B1B?logo=arxiv&logoColor=white)](https://arxiv.org/abs/2602.02361) | +| SWE-Universe | Python | No | - | 807,693 | Yes | - | | Visual SWE-bench | Python | Yes | 11 | 133 | Yes | [![GitHub](https://img.shields.io/badge/GitHub-repo-24292F?logo=github&logoColor=white)](https://github.com/luolin101/CodeV) [![HuggingFace](https://img.shields.io/badge/HuggingFace-dataset-ff7e21?logo=huggingface&logoColor=white)](https://huggingface.co/datasets/luolin101/Visual-SWE-bench) | | SWE-EVO | Python | No | 7 | 48 | Yes | [![GitHub](https://img.shields.io/badge/GitHub-repo-24292F?logo=github&logoColor=white)](https://github.com/bdqnghi/SWE-EVO) | | **Multi-PL Datasets** | | | | | | | @@ -449,14 +449,14 @@ _A comprehensive survey and statistical overview of issue resolution datasets. W | SWE-Lancer | JS, TS | No | - | 1,488 | Yes | [![GitHub](https://img.shields.io/badge/GitHub-repo-24292F?logo=github&logoColor=white)](https://github.com/openai/frontier-evals) | | OmniGIRL | Python, TS, Java, JS | Yes | 15 | 959 | Yes | [![GitHub](https://img.shields.io/badge/GitHub-repo-24292F?logo=github&logoColor=white)](https://github.com/deepsoftwareanalytics/omnigirl) [![HuggingFace](https://img.shields.io/badge/HuggingFace-dataset-ff7e21?logo=huggingface&logoColor=white)](https://huggingface.co/datasets/Deep-Software-Analytics/OmniGIRL) | | SWE-bench Multimodal | JS, TS, HTML, CSS | Yes | 17 | 619 | Yes | [![GitHub](https://img.shields.io/badge/GitHub-repo-24292F?logo=github&logoColor=white)](https://github.com/SWE-bench/SWE-bench) [![HuggingFace](https://img.shields.io/badge/HuggingFace-dataset-ff7e21?logo=huggingface&logoColor=white)](https://huggingface.co/datasets/SWE-bench/SWE-bench_Multimodal) | -| MobileDev-Bench | Java, Kotlin, TS, Dart | Yes | 18 | 384 | Yes | [![arXiv](https://img.shields.io/badge/arXiv-paper-B31B1B?logo=arxiv&logoColor=white)](https://arxiv.org/abs/2603.24946) | -| SWE-Bench Mobile | Swift, Objective-C | Yes | - | - | Yes | [![arXiv](https://img.shields.io/badge/arXiv-paper-B31B1B?logo=arxiv&logoColor=white)](https://arxiv.org/abs/2602.09540) | +| MobileDev-Bench | Java, Kotlin, TS, Dart | Yes | 18 | 384 | Yes | - | +| SWE-Bench Mobile | Swift, Objective-C | Yes | - | - | Yes | - | | SWE-fficiency | Python, Cython | No | 9 | 498 | Yes | [![GitHub](https://img.shields.io/badge/GitHub-repo-24292F?logo=github&logoColor=white)](https://github.com/swefficiency/swefficiency-site) | | SWE-Factory | Python, Java, JS, TS | No | 12 | 430 | Yes | [![GitHub](https://img.shields.io/badge/GitHub-repo-24292F?logo=github&logoColor=white)](https://github.com/DeepSoftwareAnalytics/swe-factory) [![HuggingFace](https://img.shields.io/badge/HuggingFace-dataset-ff7e21?logo=huggingface&logoColor=white)](https://huggingface.co/SWE-Factory) | | SWE-bench-Live-MultiLang \& Windows | Python, JS, TS, C, C++, C#, Java, Go, Rust | No | 238 | 418 | Yes | [![GitHub](https://img.shields.io/badge/GitHub-repo-24292F?logo=github&logoColor=white)](https://github.com/microsoft/SWE-bench-Live) [![HuggingFace](https://img.shields.io/badge/HuggingFace-dataset-ff7e21?logo=huggingface&logoColor=white)](https://huggingface.co/datasets/SWE-bench-Live/MultiLang) [![HuggingFace](https://img.shields.io/badge/HuggingFace-dataset-ff7e21?logo=huggingface&logoColor=white)](https://huggingface.co/datasets/SWE-bench-Live/Windows) | | SWE-bench Multilingual | C, C++, Go, Java, JS, TS, Rust, Python, Ruby, PHP | No | 42 | 300 | Yes | [![GitHub](https://img.shields.io/badge/GitHub-repo-24292F?logo=github&logoColor=white)](https://github.com/SWE-bench/SWE-bench) [![HuggingFace](https://img.shields.io/badge/HuggingFace-dataset-ff7e21?logo=huggingface&logoColor=white)](https://huggingface.co/datasets/SWE-bench/SWE-bench_Multilingual) | | SWE-InfraBench | Python, TS | No | - | 100 | Yes | - | -| Multi-Docker-Eval | Python, JS, TS, Go, Java, C, C++, Ruby, Kotlin, HTML | No | 40 | - | Yes | [![arXiv](https://img.shields.io/badge/arXiv-paper-B31B1B?logo=arxiv&logoColor=white)](https://arxiv.org/abs/2512.06915) | +| Multi-Docker-Eval | Python, JS, TS, Go, Java, C, C++, Ruby, Kotlin, HTML | No | 40 | - | Yes | - | --- @@ -473,7 +473,7 @@ _A survey of trajectory datasets used for agent training or analysis. We list th | SWE-Factory | Python | 10 | 2,809 | [![GitHub](https://img.shields.io/badge/GitHub-repo-24292F?logo=github&logoColor=white)](https://github.com/DeepSoftwareAnalytics/swe-factory) [![HuggingFace](https://img.shields.io/badge/HuggingFace-dataset-ff7e21?logo=huggingface&logoColor=white)](https://huggingface.co/datasets/SWE-Factory/DeepSWE-Agent-Kimi-K2-Trajectories-2.8K) | | SWE-Gym | Python | 11 | 491 | [![GitHub](https://img.shields.io/badge/GitHub-repo-24292F?logo=github&logoColor=white)](https://github.com/SWE-Gym/SWE-Gym) [![HuggingFace](https://img.shields.io/badge/HuggingFace-dataset-ff7e21?logo=huggingface&logoColor=white)](https://huggingface.co/datasets/SWE-Gym/OpenHands-SFT-Trajectories) | | SWE-Lego | Python | 3251 | 14.6k | [![GitHub](https://img.shields.io/badge/GitHub-repo-24292F?logo=github&logoColor=white)](https://github.com/SWE-Lego/SWE-Lego) | -| Multi-SWE-RL | Python | - | 4,723 | [![arXiv](https://img.shields.io/badge/arXiv-paper-B31B1B?logo=arxiv&logoColor=white)](https://arxiv.org/abs/2504.02605v1) | +| Multi-SWE-RL | Python | - | 4,723 | - | --- From 0955bde86ff426f9de0fe2df74ccc1b5160d9228 Mon Sep 17 00:00:00 2001 From: OpenClaw Agent <> Date: Mon, 27 Apr 2026 15:12:31 +0800 Subject: [PATCH 10/12] fix: verify HuggingFace links - remove 404 for SWE-CI, add valid links for SWE-Next --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 44181e4..ea94427 100644 --- a/README.md +++ b/README.md @@ -419,10 +419,10 @@ _A comprehensive survey and statistical overview of issue resolution datasets. W | SWE-Gym | Python | No | 11 | 2,438 | Yes | [![GitHub](https://img.shields.io/badge/GitHub-repo-24292F?logo=github&logoColor=white)](https://github.com/SWE-Gym/SWE-Gym) [![HuggingFace](https://img.shields.io/badge/HuggingFace-dataset-ff7e21?logo=huggingface&logoColor=white)](https://huggingface.co/SWE-Gym/datasets) | | SWE-bench | Python | No | 12 | 2,294 | Yes | [![GitHub](https://img.shields.io/badge/GitHub-repo-24292F?logo=github&logoColor=white)](https://github.com/SWE-bench/SWE-bench) [![HuggingFace](https://img.shields.io/badge/HuggingFace-dataset-ff7e21?logo=huggingface&logoColor=white)](https://huggingface.co/datasets/princeton-nlp/SWE-bench) | | BeyondSWE | Python | No | - | 500 | Yes | [![HuggingFace](https://img.shields.io/badge/HuggingFace-dataset-ff7e21?logo=huggingface&logoColor=white)](https://huggingface.co/datasets/AweAI-Team/BeyondSWE) | -| SWE-CI | Python | No | - | 100 | Yes | [![HuggingFace](https://img.shields.io/badge/HuggingFace-dataset-ff7e21?logo=huggingface&logoColor=white)](https://huggingface.co/datasets/skylenage/SWE-CI) | +| SWE-CI | Python | No | - | 100 | Yes | - | | SWE-Skills-Bench | Python | No | - | 565 | Yes | [![GitHub](https://img.shields.io/badge/GitHub-repo-24292F?logo=github&logoColor=white)](https://github.com/GeniusHTX/SWE-Skills-Bench) | | daVinci-Dev | Python | No | - | - | Yes | [![GitHub](https://img.shields.io/badge/GitHub-repo-24292F?logo=github&logoColor=white)](https://github.com/GAIR-NLP/daVinci-Dev) [![HuggingFace](https://img.shields.io/badge/HuggingFace-dataset-ff7e21?logo=huggingface&logoColor=white)](https://huggingface.co/datasets/GAIR/daVinci-Dev) | -| SWE-Next | Python | No | 3,971 | 2,308 | Yes | - | +| SWE-Next | Python | No | 3,971 | 2,308 | Yes | [![GitHub](https://img.shields.io/badge/GitHub-repo-24292F?logo=github&logoColor=white)](https://github.com/TIGER-AI-Lab/SWE-Next) [![HuggingFace](https://img.shields.io/badge/HuggingFace-dataset-ff7e21?logo=huggingface&logoColor=white)](https://huggingface.co/datasets/TIGER-Lab/SWE-Next) | | SWE-bench-java | Java | No | 19 | 1,797 | Yes | [![GitHub](https://img.shields.io/badge/GitHub-repo-24292F?logo=github&logoColor=white)](https://github.com/multi-swe-bench/multi-swe-bench-env) [![HuggingFace](https://img.shields.io/badge/HuggingFace-dataset-ff7e21?logo=huggingface&logoColor=white)](https://huggingface.co/datasets/Daoguang/Multi-SWE-bench) | | FEA-bench | Python | No | 83 | 1,401 | Yes | [![GitHub](https://img.shields.io/badge/GitHub-repo-24292F?logo=github&logoColor=white)](https://github.com/microsoft/FEA-Bench/) [![HuggingFace](https://img.shields.io/badge/HuggingFace-dataset-ff7e21?logo=huggingface&logoColor=white)](https://huggingface.co/datasets/microsoft/FEA-Bench) | | SWE-bench-Live | Python | No | 164 | 1,565 | Yes | [![GitHub](https://img.shields.io/badge/GitHub-repo-24292F?logo=github&logoColor=white)](https://github.com/microsoft/SWE-bench-Live) [![HuggingFace](https://img.shields.io/badge/HuggingFace-dataset-ff7e21?logo=huggingface&logoColor=white)](https://huggingface.co/datasets/SWE-bench-Live/SWE-bench-Live) | From e32bc58cc35f764afc301cc51acb0180392a3610 Mon Sep 17 00:00:00 2001 From: OpenClaw Agent <> Date: Mon, 27 Apr 2026 15:48:43 +0800 Subject: [PATCH 11/12] feat: add missing datasets from 227-paper upstream/main (eval, training, trajectory tables) --- README.md | 47 ++++++++++++++++++-------------------- app/data/tables/table1.csv | 6 +++++ app/data/tables/table2.csv | 6 +++++ app/docs/tables.md | 7 ++++++ 4 files changed, 41 insertions(+), 25 deletions(-) diff --git a/README.md b/README.md index ea94427..b6dc88e 100644 --- a/README.md +++ b/README.md @@ -12,7 +12,7 @@ [![Hugging Face](https://img.shields.io/badge/HF_Paper-2601.11655-FFD21E?style=for-the-badge&logo=huggingface&logoColor=000)](https://huggingface.co/papers/2601.11655) [![Tables](https://img.shields.io/badge/TABLES-Statistics-blue?style=for-the-badge&logo=databricks)](https://deepsoftwareanalytics.github.io/Awesome-Issue-Resolution/tables/) [![Contributors](https://img.shields.io/github/contributors/DeepSoftwareAnalytics/Awesome-Issue-Resolution?style=for-the-badge&color=green&logo=github)](https://github.com/DeepSoftwareAnalytics/Awesome-Issue-Resolution/graphs/contributors) -![Papers Count](https://img.shields.io/badge/papers-209-green?style=for-the-badge&logo=googlescholar&logoColor=white) +![Papers Count](https://img.shields.io/badge/papers-217-green?style=for-the-badge&logo=googlescholar&logoColor=white) [**📖 Documentation Website**](https://deepsoftwareanalytics.github.io/Awesome-Issue-Resolution/) | [**📄 Full Paper**](https://deepsoftwareanalytics.github.io/Awesome-Issue-Resolution/paper/) | [**📋 Tables & Resources**](https://deepsoftwareanalytics.github.io/Awesome-Issue-Resolution/tables/) @@ -30,7 +30,7 @@ ## 📖 Abstract -Based on a systematic review of **209 papers and online resources**, this survey establishes a holistic theoretical framework for Issue Resolution in software engineering. We examine how **Large Language Models (LLMs)** are transforming the automation of GitHub issue resolution. Beyond the theoretical analysis, we have curated a comprehensive collection of datasets and model training resources, which are continuously synchronized with our GitHub repository and project documentation website. +Based on a systematic review of **217 papers and online resources**, this survey establishes a holistic theoretical framework for Issue Resolution in software engineering. We examine how **Large Language Models (LLMs)** are transforming the automation of GitHub issue resolution. Beyond the theoretical analysis, we have curated a comprehensive collection of datasets and model training resources, which are continuously synchronized with our GitHub repository and project documentation website. ## 📰 News @@ -40,16 +40,16 @@ Based on a systematic review of **209 papers and online resources**, this survey ### Recent Papers +- **AgentForge**: AgentForge: Execution-Grounded Multi-Agent LLM Framework for Autonomous Software Engineering [![arXiv](https://img.shields.io/badge/arXiv-paper-B31B1B?logo=arxiv&logoColor=white)](https://arxiv.org/abs/2604.13120) [![GitHub](https://img.shields.io/badge/GitHub-code-181717?logo=github&logoColor=white)](https://github.com/raja21068/AutoCodeAI) +- **Agent-CoEvo**: Beyond Fixed Tests: Repository-Level Issue Resolution as Coevolution of Code and Behavioral Constraints [![arXiv](https://img.shields.io/badge/arXiv-paper-B31B1B?logo=arxiv&logoColor=white)](https://arxiv.org/abs/2604.04580) +- **SWE-Shield**: Does Pass Rate Tell the Whole Story? Evaluating Design Constraint Compliance in LLM-based Issue Resolution [![arXiv](https://img.shields.io/badge/arXiv-paper-B31B1B?logo=arxiv&logoColor=white)](https://arxiv.org/abs/2604.05955) +- **From SWE-ZERO to SWE-HERO**: From SWE-ZERO to SWE-HERO: Execution-free to Execution-based Fine-tuning for Software Engineering Agents [![arXiv](https://img.shields.io/badge/arXiv-paper-B31B1B?logo=arxiv&logoColor=white)](https://arxiv.org/abs/2604.01496) [![HuggingFace](https://img.shields.io/badge/HuggingFace-model-FFD21E?logo=huggingface&logoColor=black)](https://huggingface.co/datasets/nvidia/SWE-Zero2Hero) [![Website](https://img.shields.io/badge/Website-link-5B9BD5?logo=googlechrome&logoColor=white)](https://wasiahmad.github.io/publications/59_swezero2hero/) +- **GALA**: GALA: Multimodal Graph Alignment for Bug Localization in Automated Program Repair [![arXiv](https://img.shields.io/badge/arXiv-paper-B31B1B?logo=arxiv&logoColor=white)](https://arxiv.org/abs/2604.08089) +- **REAgent**: REAgent: Requirement-Driven LLM Agents for Software Issue Resolution [![arXiv](https://img.shields.io/badge/arXiv-paper-B31B1B?logo=arxiv&logoColor=white)](https://arxiv.org/abs/2604.06861) +- **RTMC**: RTMC: Step-Level Credit Assignment via Rollout Trees [![arXiv](https://img.shields.io/badge/arXiv-paper-B31B1B?logo=arxiv&logoColor=white)](https://arxiv.org/abs/2604.11037) +- **SWE-AGILE**: SWE-AGILE: A Software Agent Framework for Efficiently Managing Dynamic Reasoning Context [![arXiv](https://img.shields.io/badge/arXiv-paper-B31B1B?logo=arxiv&logoColor=white)](https://arxiv.org/abs/2604.11716) [![GitHub](https://img.shields.io/badge/GitHub-code-181717?logo=github&logoColor=white)](https://github.com/KDEGroup/SWE-AGILE) [![HuggingFace](https://img.shields.io/badge/HuggingFace-model-FFD21E?logo=huggingface&logoColor=black)](https://huggingface.co/KDEGroup) - **BeyondSWE**: BeyondSWE: Can Current Code Agent Survive Beyond Single-Repo Bug Fixing? [![arXiv](https://img.shields.io/badge/arXiv-paper-B31B1B?logo=arxiv&logoColor=white)](https://arxiv.org/abs/2603.03194) [![GitHub](https://img.shields.io/badge/GitHub-code-181717?logo=github&logoColor=white)](https://github.com/AweAI-Team/BeyondSWE) [![HuggingFace](https://img.shields.io/badge/HuggingFace-model-FFD21E?logo=huggingface&logoColor=black)](https://huggingface.co/datasets/AweAI-Team/BeyondSWE) [![Website](https://img.shields.io/badge/Website-link-5B9BD5?logo=googlechrome&logoColor=white)](https://aweai-team.github.io/BeyondSWE/) -- **MobileDev-Bench**: MobileDev-Bench: A Comprehensive Benchmark for Evaluating Language Models on Mobile Application Development [![arXiv](https://img.shields.io/badge/arXiv-paper-B31B1B?logo=arxiv&logoColor=white)](https://arxiv.org/abs/2603.24946) -- **RepoRepair**: RepoRepair: Leveraging Code Documentation for Repository-Level Automated Program Repair [![arXiv](https://img.shields.io/badge/arXiv-paper-B31B1B?logo=arxiv&logoColor=white)](https://arxiv.org/abs/2603.01048) [![GitHub](https://img.shields.io/badge/GitHub-code-181717?logo=github&logoColor=white)](https://github.com/ZhongQiangDev/RepoRepair) - **SWE-Adept**: SWE-Adept: An LLM-Based Agentic Framework for Deep Codebase Analysis and Structured Issue Resolution [![arXiv](https://img.shields.io/badge/arXiv-paper-B31B1B?logo=arxiv&logoColor=white)](https://arxiv.org/abs/2603.01327) -- **SWE-Atlas**: SWE-Atlas [![Website](https://img.shields.io/badge/Website-link-5B9BD5?logo=googlechrome&logoColor=white)](https://labs.scale.com/leaderboard/sweatlas-qna) -- **SWE-CI**: SWE-CI: Evaluating Agent Capabilities in Maintaining Codebases via Continuous Integration [![arXiv](https://img.shields.io/badge/arXiv-paper-B31B1B?logo=arxiv&logoColor=white)](https://arxiv.org/abs/2603.03823) [![GitHub](https://img.shields.io/badge/GitHub-code-181717?logo=github&logoColor=white)](https://github.com/SKYLENAGE-AI/SWE-CI) [![HuggingFace](https://img.shields.io/badge/HuggingFace-model-FFD21E?logo=huggingface&logoColor=black)](https://huggingface.co/datasets/skylenage/SWE-CI) -- **SWE-Fuse**: SWE-Fuse: Empowering Software Agents via Issue-free Trajectory Learning and Entropy-aware RLVR Training [![arXiv](https://img.shields.io/badge/arXiv-paper-B31B1B?logo=arxiv&logoColor=white)](https://arxiv.org/abs/2603.07927) -- **SWE-Next**: SWE-Next: Scalable Real-World Software Engineering Tasks for Agents [![arXiv](https://img.shields.io/badge/arXiv-paper-B31B1B?logo=arxiv&logoColor=white)](https://arxiv.org/abs/2603.20691) [![GitHub](https://img.shields.io/badge/GitHub-code-181717?logo=github&logoColor=white)](https://github.com/TIGER-AI-Lab/SWE-Next) -- **SWE-Skills-Bench**: SWE-Skills-Bench: Do Agent Skills Actually Help in Real-World Software Engineering? [![arXiv](https://img.shields.io/badge/arXiv-paper-B31B1B?logo=arxiv&logoColor=white)](https://arxiv.org/abs/2603.15401) [![GitHub](https://img.shields.io/badge/GitHub-code-181717?logo=github&logoColor=white)](https://github.com/GeniusHTX/SWE-Skills-Bench) -- **OpenSWE**: daVinci-Env: Open SWE Environment Synthesis at Scale [![arXiv](https://img.shields.io/badge/arXiv-paper-B31B1B?logo=arxiv&logoColor=white)](https://arxiv.org/abs/2603.13023) [![GitHub](https://img.shields.io/badge/GitHub-code-181717?logo=github&logoColor=white)](https://github.com/GAIR-NLP/OpenSWE) [![HuggingFace](https://img.shields.io/badge/HuggingFace-model-FFD21E?logo=huggingface&logoColor=black)](https://huggingface.co/datasets/GAIR/OpenSWE) ### Recent Updates @@ -103,13 +103,14 @@ Based on a systematic review of **209 papers and online resources**, this survey ## 📚 Complete Paper List -> **Total: 209 works** across 14 categories +> **Total: 217 works** across 14 categories ### 📊 Evaluation Datasets *Benchmarks for evaluating issue resolution systems* +- `(2026-04)` **SWE-Shield**: Does Pass Rate Tell the Whole Story? Evaluating Design Constraint Compliance in LLM-based Issue Resolution [![arXiv](https://img.shields.io/badge/arXiv-paper-B31B1B?logo=arxiv&logoColor=white)](https://arxiv.org/abs/2604.05955) - `(2026-03)` **BeyondSWE**: BeyondSWE: Can Current Code Agent Survive Beyond Single-Repo Bug Fixing? [![arXiv](https://img.shields.io/badge/arXiv-paper-B31B1B?logo=arxiv&logoColor=white)](https://arxiv.org/abs/2603.03194) [![Website](https://img.shields.io/badge/Website-paper-5B9BD5?logo=googlechrome&logoColor=white)](https://aweai-team.github.io/BeyondSWE/) [![GitHub](https://img.shields.io/badge/GitHub-repo-24292F?logo=github&logoColor=white)](https://github.com/AweAI-Team/BeyondSWE) [![HuggingFace](https://img.shields.io/badge/HuggingFace-dataset-ff7e21?logo=huggingface&logoColor=white)](https://huggingface.co/datasets/AweAI-Team/BeyondSWE) - `(2026-03)` **SWE-CI**: SWE-CI: Evaluating Agent Capabilities in Maintaining Codebases via Continuous Integration [![arXiv](https://img.shields.io/badge/arXiv-paper-B31B1B?logo=arxiv&logoColor=white)](https://arxiv.org/abs/2603.03823) [![GitHub](https://img.shields.io/badge/GitHub-repo-24292F?logo=github&logoColor=white)](https://github.com/SKYLENAGE-AI/SWE-CI) [![HuggingFace](https://img.shields.io/badge/HuggingFace-dataset-ff7e21?logo=huggingface&logoColor=white)](https://huggingface.co/datasets/skylenage/SWE-CI) - `(2026-03)` **SWE-Atlas** [![Website](https://img.shields.io/badge/Website-paper-5B9BD5?logo=googlechrome&logoColor=white)](https://labs.scale.com/leaderboard/sweatlas-qna) @@ -143,6 +144,7 @@ Based on a systematic review of **209 papers and online resources**, this survey *Datasets for training issue resolution agents* +- `(2026-04)` **From SWE-ZERO to SWE-HERO**: From SWE-ZERO to SWE-HERO: Execution-free to Execution-based Fine-tuning for Software Engineering Agents [![arXiv](https://img.shields.io/badge/arXiv-paper-B31B1B?logo=arxiv&logoColor=white)](https://arxiv.org/abs/2604.01496) [![Website](https://img.shields.io/badge/Website-paper-5B9BD5?logo=googlechrome&logoColor=white)](https://wasiahmad.github.io/publications/59_swezero2hero/) [![HuggingFace](https://img.shields.io/badge/HuggingFace-dataset-ff7e21?logo=huggingface&logoColor=white)](https://huggingface.co/datasets/nvidia/SWE-Zero2Hero) - `(2026-03)` **OpenSWE**: daVinci-Env: Open SWE Environment Synthesis at Scale [![arXiv](https://img.shields.io/badge/arXiv-paper-B31B1B?logo=arxiv&logoColor=white)](https://arxiv.org/abs/2603.13023) [![GitHub](https://img.shields.io/badge/GitHub-repo-24292F?logo=github&logoColor=white)](https://github.com/GAIR-NLP/OpenSWE) [![HuggingFace](https://img.shields.io/badge/HuggingFace-dataset-ff7e21?logo=huggingface&logoColor=white)](https://huggingface.co/datasets/GAIR/OpenSWE) - `(2026-02)` **SWE-Universe**: SWE-Universe: Scale Real-World Verifiable Environments to Millions [![arXiv](https://img.shields.io/badge/arXiv-paper-B31B1B?logo=arxiv&logoColor=white)](https://www.arxiv.org/abs/2602.02361) - `(2026-02)` **SWE-rebench V2**: SWE-rebench V2: Language-Agnostic SWE Task Collection at Scale [![arXiv](https://img.shields.io/badge/arXiv-paper-B31B1B?logo=arxiv&logoColor=white)](https://arxiv.org/abs/2602.23866) @@ -191,6 +193,7 @@ Based on a systematic review of **209 papers and online resources**, this survey *Individual autonomous agents for issue resolution* +- `(2026-04)` **REAgent**: REAgent: Requirement-Driven LLM Agents for Software Issue Resolution [![arXiv](https://img.shields.io/badge/arXiv-paper-B31B1B?logo=arxiv&logoColor=white)](https://arxiv.org/abs/2604.06861) - `(2025-12)` **Confucius Code Agent**: Confucius Code Agent: Scalable Agent Scaffolding for Real-World Codebases [![arXiv](https://img.shields.io/badge/arXiv-paper-B31B1B?logo=arxiv&logoColor=white)](https://arxiv.org/abs/2512.10398) - `(2025-10)` **TOM-SWE**: TOM-SWE: User Mental Modeling For Software Engineering Agents [![arXiv](https://img.shields.io/badge/arXiv-paper-B31B1B?logo=arxiv&logoColor=white)](https://arxiv.org/abs/2510.21903) - `(2025-09)` **Lita**: Lita: Light Agent Uncovers the Agentic Coding Capabilities of LLMs [![arXiv](https://img.shields.io/badge/arXiv-paper-B31B1B?logo=arxiv&logoColor=white)](https://arxiv.org/abs/2509.25873) @@ -206,6 +209,8 @@ Based on a systematic review of **209 papers and online resources**, this survey *Collaborative multi-agent frameworks* +- `(2026-04)` **AgentForge**: AgentForge: Execution-Grounded Multi-Agent LLM Framework for Autonomous Software Engineering [![arXiv](https://img.shields.io/badge/arXiv-paper-B31B1B?logo=arxiv&logoColor=white)](https://arxiv.org/abs/2604.13120) [![GitHub](https://img.shields.io/badge/GitHub-repo-24292F?logo=github&logoColor=white)](https://github.com/raja21068/AutoCodeAI) +- `(2026-04)` **Agent-CoEvo**: Beyond Fixed Tests: Repository-Level Issue Resolution as Coevolution of Code and Behavioral Constraints [![arXiv](https://img.shields.io/badge/arXiv-paper-B31B1B?logo=arxiv&logoColor=white)](https://arxiv.org/abs/2604.04580) - `(2026-03)` **SWE-Adept**: SWE-Adept: An LLM-Based Agentic Framework for Deep Codebase Analysis and Structured Issue Resolution [![arXiv](https://img.shields.io/badge/arXiv-paper-B31B1B?logo=arxiv&logoColor=white)](https://arxiv.org/abs/2603.01327) - `(2025-08)` **Meta-RAG**: Meta-RAG on Large Codebases Using Code Summarization [![arXiv](https://img.shields.io/badge/arXiv-paper-B31B1B?logo=arxiv&logoColor=white)](https://arxiv.org/abs/2508.02611) - `(2025-07)` **SWE-Debate**: SWE-Debate: Competitive Multi-Agent Debate for Software Issue Resolution [![arXiv](https://img.shields.io/badge/arXiv-paper-B31B1B?logo=arxiv&logoColor=white)](https://arxiv.org/abs/2507.23348v1) @@ -237,6 +242,8 @@ Based on a systematic review of **209 papers and online resources**, this survey *Methods leveraging external tools* +- `(2026-04)` **AgentForge**: AgentForge: Execution-Grounded Multi-Agent LLM Framework for Autonomous Software Engineering [![arXiv](https://img.shields.io/badge/arXiv-paper-B31B1B?logo=arxiv&logoColor=white)](https://arxiv.org/abs/2604.13120) [![GitHub](https://img.shields.io/badge/GitHub-repo-24292F?logo=github&logoColor=white)](https://github.com/raja21068/AutoCodeAI) +- `(2026-04)` **GALA**: GALA: Multimodal Graph Alignment for Bug Localization in Automated Program Repair [![arXiv](https://img.shields.io/badge/arXiv-paper-B31B1B?logo=arxiv&logoColor=white)](https://arxiv.org/abs/2604.08089) - `(2026-03)` **SWE-Adept**: SWE-Adept: An LLM-Based Agentic Framework for Deep Codebase Analysis and Structured Issue Resolution [![arXiv](https://img.shields.io/badge/arXiv-paper-B31B1B?logo=arxiv&logoColor=white)](https://arxiv.org/abs/2603.01327) - `(2026-03)` **RepoRepair**: RepoRepair: Leveraging Code Documentation for Repository-Level Automated Program Repair [![arXiv](https://img.shields.io/badge/arXiv-paper-B31B1B?logo=arxiv&logoColor=white)](https://arxiv.org/abs/2603.01048) [![GitHub](https://img.shields.io/badge/GitHub-repo-24292F?logo=github&logoColor=white)](https://github.com/ZhongQiangDev/RepoRepair) - `(2026-02)` **Closing the Loop**: Closing the Loop: Universal Repository Representation with RPG-Encoder [![arXiv](https://img.shields.io/badge/arXiv-paper-B31B1B?logo=arxiv&logoColor=white)](https://arxiv.org/abs/2602.02084) [![Website](https://img.shields.io/badge/Website-paper-5B9BD5?logo=googlechrome&logoColor=white)](https://ayanami2003.github.io/RPG-Encoder/) [![GitHub](https://img.shields.io/badge/GitHub-repo-24292F?logo=github&logoColor=white)](https://github.com/microsoft/RPG-ZeroRepo) @@ -287,6 +294,8 @@ Based on a systematic review of **209 papers and online resources**, this survey *Models trained via supervised learning* +- `(2026-04)` **SWE-AGILE**: SWE-AGILE: A Software Agent Framework for Efficiently Managing Dynamic Reasoning Context [![arXiv](https://img.shields.io/badge/arXiv-paper-B31B1B?logo=arxiv&logoColor=white)](https://arxiv.org/abs/2604.11716) [![GitHub](https://img.shields.io/badge/GitHub-repo-24292F?logo=github&logoColor=white)](https://github.com/KDEGroup/SWE-AGILE) [![HuggingFace](https://img.shields.io/badge/HuggingFace-dataset-ff7e21?logo=huggingface&logoColor=white)](https://huggingface.co/KDEGroup) +- `(2026-04)` **From SWE-ZERO to SWE-HERO**: From SWE-ZERO to SWE-HERO: Execution-free to Execution-based Fine-tuning for Software Engineering Agents [![arXiv](https://img.shields.io/badge/arXiv-paper-B31B1B?logo=arxiv&logoColor=white)](https://arxiv.org/abs/2604.01496) [![Website](https://img.shields.io/badge/Website-paper-5B9BD5?logo=googlechrome&logoColor=white)](https://wasiahmad.github.io/publications/59_swezero2hero/) [![HuggingFace](https://img.shields.io/badge/HuggingFace-dataset-ff7e21?logo=huggingface&logoColor=white)](https://huggingface.co/datasets/nvidia/SWE-Zero2Hero) - `(2026-03)` **OpenSWE**: daVinci-Env: Open SWE Environment Synthesis at Scale [![arXiv](https://img.shields.io/badge/arXiv-paper-B31B1B?logo=arxiv&logoColor=white)](https://arxiv.org/abs/2603.13023) [![GitHub](https://img.shields.io/badge/GitHub-repo-24292F?logo=github&logoColor=white)](https://github.com/GAIR-NLP/OpenSWE) [![HuggingFace](https://img.shields.io/badge/HuggingFace-dataset-ff7e21?logo=huggingface&logoColor=white)](https://huggingface.co/datasets/GAIR/OpenSWE) - `(2026-02)` **Scale-SWE**: Immersion in the GitHub Universe: Scaling Coding Agents to Mastery [![arXiv](https://img.shields.io/badge/arXiv-paper-B31B1B?logo=arxiv&logoColor=white)](https://arxiv.org/abs/2602.09892) [![GitHub](https://img.shields.io/badge/GitHub-repo-24292F?logo=github&logoColor=white)](https://github.com/AweAI-Team/ScaleSWE) [![HuggingFace](https://img.shields.io/badge/HuggingFace-dataset-ff7e21?logo=huggingface&logoColor=white)](https://huggingface.co/collections/AweAI-Team/scale-swe) - `(2026-01)` **SWE-Lego**: SWE-Lego: Pushing the Limits of Supervised Fine-tuning for Software Issue Resolving [![arXiv](https://img.shields.io/badge/arXiv-paper-B31B1B?logo=arxiv&logoColor=white)](https://arxiv.org/abs/2601.01426) @@ -308,6 +317,8 @@ Based on a systematic review of **209 papers and online resources**, this survey *Models trained via reinforcement learning* +- `(2026-04)` **SWE-AGILE**: SWE-AGILE: A Software Agent Framework for Efficiently Managing Dynamic Reasoning Context [![arXiv](https://img.shields.io/badge/arXiv-paper-B31B1B?logo=arxiv&logoColor=white)](https://arxiv.org/abs/2604.11716) [![GitHub](https://img.shields.io/badge/GitHub-repo-24292F?logo=github&logoColor=white)](https://github.com/KDEGroup/SWE-AGILE) [![HuggingFace](https://img.shields.io/badge/HuggingFace-dataset-ff7e21?logo=huggingface&logoColor=white)](https://huggingface.co/KDEGroup) +- `(2026-04)` **RTMC**: RTMC: Step-Level Credit Assignment via Rollout Trees [![arXiv](https://img.shields.io/badge/arXiv-paper-B31B1B?logo=arxiv&logoColor=white)](https://arxiv.org/abs/2604.11037) - `(2026-03)` **SWE-Fuse**: SWE-Fuse: Empowering Software Agents via Issue-free Trajectory Learning and Entropy-aware RLVR Training [![arXiv](https://img.shields.io/badge/arXiv-paper-B31B1B?logo=arxiv&logoColor=white)](https://arxiv.org/abs/2603.07927) - `(2026-02)` **SWE-Master**: SWE-Master: Unleashing the Potential of Software Engineering Agents via Post-Training [![arXiv](https://img.shields.io/badge/arXiv-paper-B31B1B?logo=arxiv&logoColor=white)](https://arxiv.org/abs/2602.03411) [![GitHub](https://img.shields.io/badge/GitHub-repo-24292F?logo=github&logoColor=white)](https://github.com/RUCAIBox/SWE-Master) - `(2026-02)` **SWE-Protégé**: SWE-Protégé: Learning to Selectively Collaborate With an Expert Unlocks Small Language Models as Software Engineering Agents [![arXiv](https://img.shields.io/badge/arXiv-paper-B31B1B?logo=arxiv&logoColor=white)](https://arxiv.org/abs/2602.22124) @@ -408,8 +419,6 @@ _A comprehensive survey and statistical overview of issue resolution datasets. W | SWE-smith | Python | No | 128 | 50k | Yes | [![GitHub](https://img.shields.io/badge/GitHub-repo-24292F?logo=github&logoColor=white)](https://github.com/SWE-bench/SWE-smith) [![HuggingFace](https://img.shields.io/badge/HuggingFace-dataset-ff7e21?logo=huggingface&logoColor=white)](https://huggingface.co/datasets/SWE-bench/SWE-smith) | | SWE-Lego | Python | No | 3,251 | 32,119 | Yes | [![GitHub](https://img.shields.io/badge/GitHub-repo-24292F?logo=github&logoColor=white)](https://github.com/SWE-Lego/SWE-Lego) [![HuggingFace](https://img.shields.io/badge/HuggingFace-dataset-ff7e21?logo=huggingface&logoColor=white)](https://huggingface.co/SWE-Lego/datasets) | | SWE-rebench | Python | No | 3,468 | 21,336 | Yes | [![GitHub](https://img.shields.io/badge/GitHub-repo-24292F?logo=github&logoColor=white)](https://github.com/SWE-rebench/SWE-bench-fork) [![HuggingFace](https://img.shields.io/badge/HuggingFace-dataset-ff7e21?logo=huggingface&logoColor=white)](https://huggingface.co/datasets/nebius/SWE-rebench) | -| **SWE-rebench V2** | Python, JS, TS, Go, Rust, C, C++, Java, etc. (20 PL) | No | 3,600+ | 32,000+ | Yes | - | -| OpenSWE | Python | No | 12,800 | 45,320 | Yes | [![GitHub](https://img.shields.io/badge/GitHub-repo-24292F?logo=github&logoColor=white)](https://github.com/GAIR-NLP/OpenSWE) [![HuggingFace](https://img.shields.io/badge/HuggingFace-dataset-ff7e21?logo=huggingface&logoColor=white)](https://huggingface.co/datasets/GAIR/OpenSWE) | | SWE-bench-train | Python | No | 37 | 19k | No | [![GitHub](https://img.shields.io/badge/GitHub-repo-24292F?logo=github&logoColor=white)](https://github.com/SWE-bench/SWE-bench) [![HuggingFace](https://img.shields.io/badge/HuggingFace-dataset-ff7e21?logo=huggingface&logoColor=white)](https://huggingface.co/datasets/princeton-nlp/SWE-bench/viewer/default/train) | | SWE-Flow | Python | No | 74 | 18,081 | Yes | [![GitHub](https://img.shields.io/badge/GitHub-repo-24292F?logo=github&logoColor=white)](https://github.com/Hambaobao/SWE-Flow) | | Skywork-SWE | Python | No | 2,531 | 10,169 | Yes | - | @@ -418,11 +427,6 @@ _A comprehensive survey and statistical overview of issue resolution datasets. W | SWE-bench-extra | Python | No | 2k | 6.38k | Yes | [![HuggingFace](https://img.shields.io/badge/HuggingFace-dataset-ff7e21?logo=huggingface&logoColor=white)](https://huggingface.co/datasets/nebius/SWE-bench-extra) | | SWE-Gym | Python | No | 11 | 2,438 | Yes | [![GitHub](https://img.shields.io/badge/GitHub-repo-24292F?logo=github&logoColor=white)](https://github.com/SWE-Gym/SWE-Gym) [![HuggingFace](https://img.shields.io/badge/HuggingFace-dataset-ff7e21?logo=huggingface&logoColor=white)](https://huggingface.co/SWE-Gym/datasets) | | SWE-bench | Python | No | 12 | 2,294 | Yes | [![GitHub](https://img.shields.io/badge/GitHub-repo-24292F?logo=github&logoColor=white)](https://github.com/SWE-bench/SWE-bench) [![HuggingFace](https://img.shields.io/badge/HuggingFace-dataset-ff7e21?logo=huggingface&logoColor=white)](https://huggingface.co/datasets/princeton-nlp/SWE-bench) | -| BeyondSWE | Python | No | - | 500 | Yes | [![HuggingFace](https://img.shields.io/badge/HuggingFace-dataset-ff7e21?logo=huggingface&logoColor=white)](https://huggingface.co/datasets/AweAI-Team/BeyondSWE) | -| SWE-CI | Python | No | - | 100 | Yes | - | -| SWE-Skills-Bench | Python | No | - | 565 | Yes | [![GitHub](https://img.shields.io/badge/GitHub-repo-24292F?logo=github&logoColor=white)](https://github.com/GeniusHTX/SWE-Skills-Bench) | -| daVinci-Dev | Python | No | - | - | Yes | [![GitHub](https://img.shields.io/badge/GitHub-repo-24292F?logo=github&logoColor=white)](https://github.com/GAIR-NLP/daVinci-Dev) [![HuggingFace](https://img.shields.io/badge/HuggingFace-dataset-ff7e21?logo=huggingface&logoColor=white)](https://huggingface.co/datasets/GAIR/daVinci-Dev) | -| SWE-Next | Python | No | 3,971 | 2,308 | Yes | [![GitHub](https://img.shields.io/badge/GitHub-repo-24292F?logo=github&logoColor=white)](https://github.com/TIGER-AI-Lab/SWE-Next) [![HuggingFace](https://img.shields.io/badge/HuggingFace-dataset-ff7e21?logo=huggingface&logoColor=white)](https://huggingface.co/datasets/TIGER-Lab/SWE-Next) | | SWE-bench-java | Java | No | 19 | 1,797 | Yes | [![GitHub](https://img.shields.io/badge/GitHub-repo-24292F?logo=github&logoColor=white)](https://github.com/multi-swe-bench/multi-swe-bench-env) [![HuggingFace](https://img.shields.io/badge/HuggingFace-dataset-ff7e21?logo=huggingface&logoColor=white)](https://huggingface.co/datasets/Daoguang/Multi-SWE-bench) | | FEA-bench | Python | No | 83 | 1,401 | Yes | [![GitHub](https://img.shields.io/badge/GitHub-repo-24292F?logo=github&logoColor=white)](https://github.com/microsoft/FEA-Bench/) [![HuggingFace](https://img.shields.io/badge/HuggingFace-dataset-ff7e21?logo=huggingface&logoColor=white)](https://huggingface.co/datasets/microsoft/FEA-Bench) | | SWE-bench-Live | Python | No | 164 | 1,565 | Yes | [![GitHub](https://img.shields.io/badge/GitHub-repo-24292F?logo=github&logoColor=white)](https://github.com/microsoft/SWE-bench-Live) [![HuggingFace](https://img.shields.io/badge/HuggingFace-dataset-ff7e21?logo=huggingface&logoColor=white)](https://huggingface.co/datasets/SWE-bench-Live/SWE-bench-Live) | @@ -432,10 +436,7 @@ _A comprehensive survey and statistical overview of issue resolution datasets. W | SWE-MERA | Python | No | 200 | 300 | Yes | [![GitHub](https://img.shields.io/badge/GitHub-repo-24292F?logo=github&logoColor=white)](https://github.com/MERA-Evaluation/SWE-MERA-submissions) [![HuggingFace](https://img.shields.io/badge/HuggingFace-dataset-ff7e21?logo=huggingface&logoColor=white)](https://huggingface.co/datasets/MERA-evaluation/SWE-MERA) | | SWE-Bench-CL | Python | No | 8 | 273 | Yes | [![GitHub](https://img.shields.io/badge/GitHub-repo-24292F?logo=github&logoColor=white)](https://github.com/thomasjoshi/agents-never-forget) | | SWE-Sharp-Bench | C# | No | 17 | 150 | Yes | [![GitHub](https://img.shields.io/badge/GitHub-repo-24292F?logo=github&logoColor=white)](https://github.com/microsoft/prose/tree/main/misc/SWE-Sharp-Bench) [![HuggingFace](https://img.shields.io/badge/HuggingFace-dataset-ff7e21?logo=huggingface&logoColor=white)](https://huggingface.co/datasets/microsoft/SWE-Sharp-Bench) | -| Rust-SWE-bench | Rust | No | 34 | 500 | Yes | [![GitHub](https://img.shields.io/badge/GitHub-repo-24292F?logo=github&logoColor=white)](https://github.com/GhabiX/Rust-SWE-Bench) | | SWE-Perf | Python | No | 12 | 140 | Yes | [![GitHub](https://img.shields.io/badge/GitHub-repo-24292F?logo=github&logoColor=white)](https://github.com/SWE-Perf/swe-perf) [![HuggingFace](https://img.shields.io/badge/HuggingFace-dataset-ff7e21?logo=huggingface&logoColor=white)](https://huggingface.co/datasets/SWE-Perf/SWE-Perf) | -| Scale-SWE | Python | No | 5,200 | 100,000 | Yes | [![GitHub](https://img.shields.io/badge/GitHub-repo-24292F?logo=github&logoColor=white)](https://github.com/AweAI-Team/ScaleSWE) [![HuggingFace](https://img.shields.io/badge/HuggingFace-dataset-ff7e21?logo=huggingface&logoColor=white)](https://huggingface.co/collections/AweAI-Team/scale-swe) | -| SWE-Universe | Python | No | - | 807,693 | Yes | - | | Visual SWE-bench | Python | Yes | 11 | 133 | Yes | [![GitHub](https://img.shields.io/badge/GitHub-repo-24292F?logo=github&logoColor=white)](https://github.com/luolin101/CodeV) [![HuggingFace](https://img.shields.io/badge/HuggingFace-dataset-ff7e21?logo=huggingface&logoColor=white)](https://huggingface.co/datasets/luolin101/Visual-SWE-bench) | | SWE-EVO | Python | No | 7 | 48 | Yes | [![GitHub](https://img.shields.io/badge/GitHub-repo-24292F?logo=github&logoColor=white)](https://github.com/bdqnghi/SWE-EVO) | | **Multi-PL Datasets** | | | | | | | @@ -449,14 +450,11 @@ _A comprehensive survey and statistical overview of issue resolution datasets. W | SWE-Lancer | JS, TS | No | - | 1,488 | Yes | [![GitHub](https://img.shields.io/badge/GitHub-repo-24292F?logo=github&logoColor=white)](https://github.com/openai/frontier-evals) | | OmniGIRL | Python, TS, Java, JS | Yes | 15 | 959 | Yes | [![GitHub](https://img.shields.io/badge/GitHub-repo-24292F?logo=github&logoColor=white)](https://github.com/deepsoftwareanalytics/omnigirl) [![HuggingFace](https://img.shields.io/badge/HuggingFace-dataset-ff7e21?logo=huggingface&logoColor=white)](https://huggingface.co/datasets/Deep-Software-Analytics/OmniGIRL) | | SWE-bench Multimodal | JS, TS, HTML, CSS | Yes | 17 | 619 | Yes | [![GitHub](https://img.shields.io/badge/GitHub-repo-24292F?logo=github&logoColor=white)](https://github.com/SWE-bench/SWE-bench) [![HuggingFace](https://img.shields.io/badge/HuggingFace-dataset-ff7e21?logo=huggingface&logoColor=white)](https://huggingface.co/datasets/SWE-bench/SWE-bench_Multimodal) | -| MobileDev-Bench | Java, Kotlin, TS, Dart | Yes | 18 | 384 | Yes | - | -| SWE-Bench Mobile | Swift, Objective-C | Yes | - | - | Yes | - | | SWE-fficiency | Python, Cython | No | 9 | 498 | Yes | [![GitHub](https://img.shields.io/badge/GitHub-repo-24292F?logo=github&logoColor=white)](https://github.com/swefficiency/swefficiency-site) | | SWE-Factory | Python, Java, JS, TS | No | 12 | 430 | Yes | [![GitHub](https://img.shields.io/badge/GitHub-repo-24292F?logo=github&logoColor=white)](https://github.com/DeepSoftwareAnalytics/swe-factory) [![HuggingFace](https://img.shields.io/badge/HuggingFace-dataset-ff7e21?logo=huggingface&logoColor=white)](https://huggingface.co/SWE-Factory) | | SWE-bench-Live-MultiLang \& Windows | Python, JS, TS, C, C++, C#, Java, Go, Rust | No | 238 | 418 | Yes | [![GitHub](https://img.shields.io/badge/GitHub-repo-24292F?logo=github&logoColor=white)](https://github.com/microsoft/SWE-bench-Live) [![HuggingFace](https://img.shields.io/badge/HuggingFace-dataset-ff7e21?logo=huggingface&logoColor=white)](https://huggingface.co/datasets/SWE-bench-Live/MultiLang) [![HuggingFace](https://img.shields.io/badge/HuggingFace-dataset-ff7e21?logo=huggingface&logoColor=white)](https://huggingface.co/datasets/SWE-bench-Live/Windows) | | SWE-bench Multilingual | C, C++, Go, Java, JS, TS, Rust, Python, Ruby, PHP | No | 42 | 300 | Yes | [![GitHub](https://img.shields.io/badge/GitHub-repo-24292F?logo=github&logoColor=white)](https://github.com/SWE-bench/SWE-bench) [![HuggingFace](https://img.shields.io/badge/HuggingFace-dataset-ff7e21?logo=huggingface&logoColor=white)](https://huggingface.co/datasets/SWE-bench/SWE-bench_Multilingual) | | SWE-InfraBench | Python, TS | No | - | 100 | Yes | - | -| Multi-Docker-Eval | Python, JS, TS, Go, Java, C, C++, Ruby, Kotlin, HTML | No | 40 | - | Yes | - | --- @@ -473,7 +471,6 @@ _A survey of trajectory datasets used for agent training or analysis. We list th | SWE-Factory | Python | 10 | 2,809 | [![GitHub](https://img.shields.io/badge/GitHub-repo-24292F?logo=github&logoColor=white)](https://github.com/DeepSoftwareAnalytics/swe-factory) [![HuggingFace](https://img.shields.io/badge/HuggingFace-dataset-ff7e21?logo=huggingface&logoColor=white)](https://huggingface.co/datasets/SWE-Factory/DeepSWE-Agent-Kimi-K2-Trajectories-2.8K) | | SWE-Gym | Python | 11 | 491 | [![GitHub](https://img.shields.io/badge/GitHub-repo-24292F?logo=github&logoColor=white)](https://github.com/SWE-Gym/SWE-Gym) [![HuggingFace](https://img.shields.io/badge/HuggingFace-dataset-ff7e21?logo=huggingface&logoColor=white)](https://huggingface.co/datasets/SWE-Gym/OpenHands-SFT-Trajectories) | | SWE-Lego | Python | 3251 | 14.6k | [![GitHub](https://img.shields.io/badge/GitHub-repo-24292F?logo=github&logoColor=white)](https://github.com/SWE-Lego/SWE-Lego) | -| Multi-SWE-RL | Python | - | 4,723 | - | --- diff --git a/app/data/tables/table1.csv b/app/data/tables/table1.csv index d779ee7..0019531 100644 --- a/app/data/tables/table1.csv +++ b/app/data/tables/table1.csv @@ -9,11 +9,14 @@ SWE-Flow,Python,No,74,"18,081",Yes,\ghlink{https://github.com/Hambaobao/SWE-Flow Skywork-SWE,Python,No,"2,531","10,169",Yes,- R2E-Gym,Python,No,10,"8,135",Yes,\ghlink{https://github.com/R2E-Gym/R2E-Gym} \hflink{https://huggingface.co/R2E-Gym/datasets} RepoForge,Python,No,-,7.3k,Yes,- +Rust-SWE-bench,Rust,No,34,500,Yes,\ghlink{https://github.com/GhabiX/Rust-SWE-Bench} SWE-bench-extra,Python,No,2k,6.38k,Yes,\hflink{https://huggingface.co/datasets/nebius/SWE-bench-extra} SWE-Gym,Python,No,11,"2,438",Yes,\ghlink{https://github.com/SWE-Gym/SWE-Gym} \hflink{https://huggingface.co/SWE-Gym/datasets} SWE-bench,Python,No,12,"2,294",Yes,\ghlink{https://github.com/SWE-bench/SWE-bench} \hflink{https://huggingface.co/datasets/princeton-nlp/SWE-bench} SWE-bench-java,Java,No,19,"1,797",Yes,\ghlink{https://github.com/multi-swe-bench/multi-swe-bench-env} \hflink{https://huggingface.co/datasets/Daoguang/Multi-SWE-bench} +SWE-CI,Python,No,-,100,Yes,\ghlink{https://github.com/SKYLENAGE-AI/SWE-CI} FEA-bench,Python,No,83,"1,401",Yes,\ghlink{https://github.com/microsoft/FEA-Bench/}\hflink{https://huggingface.co/datasets/microsoft/FEA-Bench} +BeyondSWE,Python,No,-,500,Yes,\hflink{https://huggingface.co/datasets/AweAI-Team/BeyondSWE} SWE-bench-Live,Python,No,164,"1,565",Yes,\ghlink{https://github.com/microsoft/SWE-bench-Live} \hflink{https://huggingface.co/datasets/SWE-bench-Live/SWE-bench-Live} Loc-Bench,Python,No,-,560,No,\ghlink{https://github.com/gersteinlab/LocAgent} \hflink{https://huggingface.co/datasets/czlll/Loc-Bench_V1} SWE-bench Verified,Python,No,-,500,Yes,\ghlink{https://github.com/SWE-bench/SWE-bench} \hflink{https://huggingface.co/datasets/princeton-nlp/SWE-bench_Verified} @@ -21,6 +24,7 @@ SWE-bench Lite,Python,No,12,300,Yes,\ghlink{https://github.com/SWE-bench/SWE-ben SWE-MERA,Python,No,200,300,Yes,\ghlink{https://github.com/MERA-Evaluation/SWE-MERA-submissions} \hflink{https://huggingface.co/datasets/MERA-evaluation/SWE-MERA} SWE-Bench-CL,Python,No,8,273,Yes,\ghlink{https://github.com/thomasjoshi/agents-never-forget} SWE-Sharp-Bench,C#,No,17,150,Yes,\ghlink{https://github.com/microsoft/prose/tree/main/misc/SWE-Sharp-Bench} \hflink{https://huggingface.co/datasets/microsoft/SWE-Sharp-Bench} +SWE-Skills-Bench,Python,No,-,565,Yes,\ghlink{https://github.com/GeniusHTX/SWE-Skills-Bench} SWE-Perf,Python,No,12,140,Yes,\ghlink{https://github.com/SWE-Perf/swe-perf} \hflink{https://huggingface.co/datasets/SWE-Perf/SWE-Perf} Visual SWE-bench,Python,Yes,11,133,Yes,\ghlink{https://github.com/luolin101/CodeV} \hflink{https://huggingface.co/datasets/luolin101/Visual-SWE-bench} SWE-EVO,Python,No,7,48,Yes,\ghlink{https://github.com/bdqnghi/SWE-EVO} @@ -32,8 +36,10 @@ SWE-PolyBench,"Python, Java, JS, TS",No,21,"2,110",Yes,\ghlink{https://github.co SWE-Compass,"Python, JS, TS, Java, C, C++, Go, Rust, Kotlin, C#",No,-,"2,000",Yes,\ghlink{https://github.com/kwaipilot/SWE-Compass/} \hflink{https://huggingface.co/datasets/Kwaipilot/SWE-Compass} SWE-Bench Pro,"Python, Go, TS",No,41,"1,865",Yes,\ghlink{https://github.com/scaleapi/SWE-bench_Pro-os} \hflink{https://huggingface.co/datasets/ScaleAI/SWE-bench_Pro} SWE-bench++,"Python, Go, TS, JS, Ruby, PHP, Java, Rust, C++, C#, C",No,"3,971","1,782",Yes,\ghlink{https://github.com/TuringEnterprises/SWE-Bench-plus-plus} \hflink{https://huggingface.co/datasets/TuringEnterprises/SWE-Bench-plus-plus} +SWE-Bench Mobile,"Swift, Objective-C",Yes,-,-,Yes,- SWE-Lancer,"JS, TS",No,-,"1,488",Yes,\ghlink{https://github.com/openai/frontier-evals} OmniGIRL,"Python, TS, Java, JS",Yes,15,959,Yes,\ghlink{https://github.com/deepsoftwareanalytics/omnigirl} \hflink{https://huggingface.co/datasets/Deep-Software-Analytics/OmniGIRL} +MobileDev-Bench,"Java, Kotlin, TS, Dart",Yes,18,384,Yes,- SWE-bench Multimodal,"JS, TS, HTML, CSS",Yes,17,619,Yes,\ghlink{https://github.com/SWE-bench/SWE-bench} \hflink{https://huggingface.co/datasets/SWE-bench/SWE-bench_Multimodal} SWE-fficiency,"Python, Cython",No,9,498,Yes,\ghlink{https://github.com/swefficiency/swefficiency-site} SWE-Factory,"Python, Java, JS, TS",No,12,430,Yes,\ghlink{https://github.com/DeepSoftwareAnalytics/swe-factory} \hflink{https://huggingface.co/SWE-Factory} diff --git a/app/data/tables/table2.csv b/app/data/tables/table2.csv index 393a4bb..b2aad32 100644 --- a/app/data/tables/table2.csv +++ b/app/data/tables/table2.csv @@ -1,8 +1,14 @@ Dataset,Language,Repos,Amount,Link SWE-Fixer,Python,856,"69,752",\ghlink{https://github.com/InternLM/SWE-Fixer} \hflink{https://huggingface.co/datasets/internlm/SWE-Fixer-Train-Editing-CoT-70K} SWE-rebench,Python,"1,823","67,074",\hflink{https://huggingface.co/datasets/nebius/SWE-rebench-openhands-trajectories} +SWE-rebench V2,"Python, JS, TS, Go, Rust, C, C++, Java, etc.","3,600+","32,000+",- +Scale-SWE,Python,5200,"100,000",\ghlink{https://github.com/AweAI-Team/ScaleSWE} \hflink{https://huggingface.co/collections/AweAI-Team/scale-swe} +SWE-Universe,Python,-,"807,693",- R2E-Gym,Python,10,"3,321",\ghlink{https://github.com/R2E-Gym/R2E-Gym} \hflink{https://huggingface.co/datasets/R2E-Gym/R2EGym-SFT-Trajectories} SWE-Synth,Python,11,"3,018",\ghlink{https://github.com/FSoft-AI4Code/SWE-Synth} \hflink{https://huggingface.co/datasets/swesynth/SWE-Synth_Moatless-SFT-Trajectories} +SWE-Next,Python,3971,"2,308",\ghlink{https://github.com/TIGER-AI-Lab/SWE-Next} \hflink{https://huggingface.co/datasets/TIGER-Lab/SWE-Next} SWE-Factory,Python,10,"2,809",\ghlink{https://github.com/DeepSoftwareAnalytics/swe-factory} \hflink{https://huggingface.co/datasets/SWE-Factory/DeepSWE-Agent-Kimi-K2-Trajectories-2.8K} SWE-Gym,Python,11,491,\ghlink{https://github.com/SWE-Gym/SWE-Gym} \hflink{https://huggingface.co/datasets/SWE-Gym/OpenHands-SFT-Trajectories} SWE-Lego,Python,3251,14.6k,\ghlink{https://github.com/SWE-Lego/SWE-Lego} +OpenSWE,Python,12800,45320,\ghlink{https://github.com/GAIR-NLP/OpenSWE} \hflink{https://huggingface.co/datasets/GAIR/OpenSWE} +daVinci-Dev,Python,-,-,\ghlink{https://github.com/GAIR-NLP/daVinci-Dev} \hflink{https://huggingface.co/datasets/GAIR/daVinci-Dev} diff --git a/app/docs/tables.md b/app/docs/tables.md index 9daa3f7..b3b8fb0 100644 --- a/app/docs/tables.md +++ b/app/docs/tables.md @@ -62,11 +62,18 @@ _A survey of trajectory datasets used for agent training or analysis. We list th |---|---|---|---|---| | SWE-Fixer | Python | 856 | 69,752 | [![GitHub](https://img.shields.io/badge/GitHub-repo-24292F?logo=github&logoColor=white)](https://github.com/InternLM/SWE-Fixer) [![HuggingFace](https://img.shields.io/badge/HuggingFace-dataset-ff7e21?logo=huggingface&logoColor=white)](https://huggingface.co/datasets/internlm/SWE-Fixer-Train-Editing-CoT-70K) | | SWE-rebench | Python | 1,823 | 67,074 | [![HuggingFace](https://img.shields.io/badge/HuggingFace-dataset-ff7e21?logo=huggingface&logoColor=white)](https://huggingface.co/datasets/nebius/SWE-rebench-openhands-trajectories) | +| SWE-rebench V2 | Python, JS, TS, Go, Rust, C, C++, Java, etc. | 3,600+ | 32,000+ | - | +| Multi-SWE-RL | Python | - | 4,723 | - | +| daVinci-Dev | Python | - | - | [![GitHub](https://img.shields.io/badge/GitHub-repo-24292F?logo=github&logoColor=white)](https://github.com/GAIR-NLP/daVinci-Dev) [![HuggingFace](https://img.shields.io/badge/HuggingFace-dataset-ff7e21?logo=huggingface&logoColor=white)](https://huggingface.co/datasets/GAIR/daVinci-Dev) | +| OpenSWE | Python | 12,800 | 45,320 | [![GitHub](https://img.shields.io/badge/GitHub-repo-24292F?logo=github&logoColor=white)](https://github.com/GAIR-NLP/OpenSWE) [![HuggingFace](https://img.shields.io/badge/HuggingFace-dataset-ff7e21?logo=huggingface&logoColor=white)](https://huggingface.co/datasets/GAIR/OpenSWE) | | R2E-Gym | Python | 10 | 3,321 | [![GitHub](https://img.shields.io/badge/GitHub-repo-24292F?logo=github&logoColor=white)](https://github.com/R2E-Gym/R2E-Gym) [![HuggingFace](https://img.shields.io/badge/HuggingFace-dataset-ff7e21?logo=huggingface&logoColor=white)](https://huggingface.co/datasets/R2E-Gym/R2EGym-SFT-Trajectories) | | SWE-Synth | Python | 11 | 3,018 | [![GitHub](https://img.shields.io/badge/GitHub-repo-24292F?logo=github&logoColor=white)](https://github.com/FSoft-AI4Code/SWE-Synth) [![HuggingFace](https://img.shields.io/badge/HuggingFace-dataset-ff7e21?logo=huggingface&logoColor=white)](https://huggingface.co/datasets/swesynth/SWE-Synth_Moatless-SFT-Trajectories) | | SWE-Factory | Python | 10 | 2,809 | [![GitHub](https://img.shields.io/badge/GitHub-repo-24292F?logo=github&logoColor=white)](https://github.com/DeepSoftwareAnalytics/swe-factory) [![HuggingFace](https://img.shields.io/badge/HuggingFace-dataset-ff7e21?logo=huggingface&logoColor=white)](https://huggingface.co/datasets/SWE-Factory/DeepSWE-Agent-Kimi-K2-Trajectories-2.8K) | | SWE-Gym | Python | 11 | 491 | [![GitHub](https://img.shields.io/badge/GitHub-repo-24292F?logo=github&logoColor=white)](https://github.com/SWE-Gym/SWE-Gym) [![HuggingFace](https://img.shields.io/badge/HuggingFace-dataset-ff7e21?logo=huggingface&logoColor=white)](https://huggingface.co/datasets/SWE-Gym/OpenHands-SFT-Trajectories) | | SWE-Lego | Python | 3251 | 14.6k | [![GitHub](https://img.shields.io/badge/GitHub-repo-24292F?logo=github&logoColor=white)](https://github.com/SWE-Lego/SWE-Lego) | +| Scale-SWE | Python | 5,200 | 100,000 | [![GitHub](https://img.shields.io/badge/GitHub-repo-24292F?logo=github&logoColor=white)](https://github.com/AweAI-Team/ScaleSWE) [![HuggingFace](https://img.shields.io/badge/HuggingFace-dataset-ff7e21?logo=huggingface&logoColor=white)](https://huggingface.co/collections/AweAI-Team/scale-swe) | +| SWE-Next | Python | 3,971 | 2,308 | [![GitHub](https://img.shields.io/badge/GitHub-repo-24292F?logo=github&logoColor=white)](https://github.com/TIGER-AI-Lab/SWE-Next) [![HuggingFace](https://img.shields.io/badge/HuggingFace-dataset-ff7e21?logo=huggingface&logoColor=white)](https://huggingface.co/datasets/TIGER-Lab/SWE-Next) | +| SWE-Universe | Python | - | 807,693 | - | --- From 9244890ded5efb8fb8203377d1dbee679a73e759 Mon Sep 17 00:00:00 2001 From: OpenClaw Agent <> Date: Mon, 27 Apr 2026 15:52:56 +0800 Subject: [PATCH 12/12] feat: add 7 missing datasets from 217-paper upstream/main to eval and trajectory tables --- README.md | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/README.md b/README.md index b6dc88e..b6c7744 100644 --- a/README.md +++ b/README.md @@ -439,6 +439,10 @@ _A comprehensive survey and statistical overview of issue resolution datasets. W | SWE-Perf | Python | No | 12 | 140 | Yes | [![GitHub](https://img.shields.io/badge/GitHub-repo-24292F?logo=github&logoColor=white)](https://github.com/SWE-Perf/swe-perf) [![HuggingFace](https://img.shields.io/badge/HuggingFace-dataset-ff7e21?logo=huggingface&logoColor=white)](https://huggingface.co/datasets/SWE-Perf/SWE-Perf) | | Visual SWE-bench | Python | Yes | 11 | 133 | Yes | [![GitHub](https://img.shields.io/badge/GitHub-repo-24292F?logo=github&logoColor=white)](https://github.com/luolin101/CodeV) [![HuggingFace](https://img.shields.io/badge/HuggingFace-dataset-ff7e21?logo=huggingface&logoColor=white)](https://huggingface.co/datasets/luolin101/Visual-SWE-bench) | | SWE-EVO | Python | No | 7 | 48 | Yes | [![GitHub](https://img.shields.io/badge/GitHub-repo-24292F?logo=github&logoColor=white)](https://github.com/bdqnghi/SWE-EVO) | +| BeyondSWE | Python | No | - | 500 | Yes | [![HuggingFace](https://img.shields.io/badge/HuggingFace-dataset-ff7e21?logo=huggingface&logoColor=white)](https://huggingface.co/datasets/AweAI-Team/BeyondSWE) | +| SWE-CI | Python | No | - | 100 | Yes | [![GitHub](https://img.shields.io/badge/GitHub-repo-24292F?logo=github&logoColor=white)](https://github.com/SKYLENAGE-AI/SWE-CI) | +| SWE-Skills-Bench | Python | No | - | 565 | Yes | [![GitHub](https://img.shields.io/badge/GitHub-repo-24292F?logo=github&logoColor=white)](https://github.com/GeniusHTX/SWE-Skills-Bench) | +| Rust-SWE-bench | Rust | No | 34 | 500 | Yes | [![GitHub](https://img.shields.io/badge/GitHub-repo-24292F?logo=github&logoColor=white)](https://github.com/GhabiX/Rust-SWE-Bench) | | **Multi-PL Datasets** | | | | | | | | SWE-Mirror | Python, Rust, Go | No | 40 | 60k | Yes | - | | Multi-SWE-bench | Java, JS, TS, Go, Rust, C, C++ | No | 76 | 4,723 | Yes | [![GitHub](https://img.shields.io/badge/GitHub-repo-24292F?logo=github&logoColor=white)](https://github.com/multi-swe-bench/multi-swe-bench) [![HuggingFace](https://img.shields.io/badge/HuggingFace-dataset-ff7e21?logo=huggingface&logoColor=white)](https://huggingface.co/datasets/ByteDance-Seed/Multi-SWE-bench) | @@ -455,6 +459,8 @@ _A comprehensive survey and statistical overview of issue resolution datasets. W | SWE-bench-Live-MultiLang \& Windows | Python, JS, TS, C, C++, C#, Java, Go, Rust | No | 238 | 418 | Yes | [![GitHub](https://img.shields.io/badge/GitHub-repo-24292F?logo=github&logoColor=white)](https://github.com/microsoft/SWE-bench-Live) [![HuggingFace](https://img.shields.io/badge/HuggingFace-dataset-ff7e21?logo=huggingface&logoColor=white)](https://huggingface.co/datasets/SWE-bench-Live/MultiLang) [![HuggingFace](https://img.shields.io/badge/HuggingFace-dataset-ff7e21?logo=huggingface&logoColor=white)](https://huggingface.co/datasets/SWE-bench-Live/Windows) | | SWE-bench Multilingual | C, C++, Go, Java, JS, TS, Rust, Python, Ruby, PHP | No | 42 | 300 | Yes | [![GitHub](https://img.shields.io/badge/GitHub-repo-24292F?logo=github&logoColor=white)](https://github.com/SWE-bench/SWE-bench) [![HuggingFace](https://img.shields.io/badge/HuggingFace-dataset-ff7e21?logo=huggingface&logoColor=white)](https://huggingface.co/datasets/SWE-bench/SWE-bench_Multilingual) | | SWE-InfraBench | Python, TS | No | - | 100 | Yes | - | +| MobileDev-Bench | Java, Kotlin, TS, Dart | Yes | 18 | 384 | Yes | - | +| SWE-Bench Mobile | Swift, Objective-C | Yes | - | - | Yes | - | --- @@ -471,6 +477,7 @@ _A survey of trajectory datasets used for agent training or analysis. We list th | SWE-Factory | Python | 10 | 2,809 | [![GitHub](https://img.shields.io/badge/GitHub-repo-24292F?logo=github&logoColor=white)](https://github.com/DeepSoftwareAnalytics/swe-factory) [![HuggingFace](https://img.shields.io/badge/HuggingFace-dataset-ff7e21?logo=huggingface&logoColor=white)](https://huggingface.co/datasets/SWE-Factory/DeepSWE-Agent-Kimi-K2-Trajectories-2.8K) | | SWE-Gym | Python | 11 | 491 | [![GitHub](https://img.shields.io/badge/GitHub-repo-24292F?logo=github&logoColor=white)](https://github.com/SWE-Gym/SWE-Gym) [![HuggingFace](https://img.shields.io/badge/HuggingFace-dataset-ff7e21?logo=huggingface&logoColor=white)](https://huggingface.co/datasets/SWE-Gym/OpenHands-SFT-Trajectories) | | SWE-Lego | Python | 3251 | 14.6k | [![GitHub](https://img.shields.io/badge/GitHub-repo-24292F?logo=github&logoColor=white)](https://github.com/SWE-Lego/SWE-Lego) | +| Multi-SWE-RL | Python | - | 4,723 | - | - | ---