@inproceedings{kaesberg-etal-2025-sparc, title = "{SP}a{RC}: A Spatial Pathfinding Reasoning Challenge", author = "Kaesberg, Lars Benedikt and Wahle, Jan Philip and Ruas, Terry and Gipp, Bela", editor = "Christodoulopoulos, Christos and Chakraborty, Tanmoy and Rose, Carolyn and Peng, Violet", booktitle = "Proceedings of the 2025 Conference on Empirical Methods in Natural Language Processing", month = nov, year = "2025", address = "Suzhou, China", publisher = "Association for Computational Linguistics", url = "https://aclanthology.org/2025.emnlp-main.526/", doi = "10.18653/v1/2025.emnlp-main.526", pages = "10370--10401", ISBN = "979-8-89176-332-6", abstract = "Existing reasoning datasets saturate and fail to test abstract, multi-step problems, especially pathfinding and complex rule constraint satisfaction. We introduce SPaRC (Spatial Pathfinding Reasoning Challenge), a dataset of 1,000 2D grid pathfinding puzzles to evaluate spatial and rule-based reasoning, requiring step-by-step planning with arithmetic and geometric rules. Humans achieve near-perfect accuracy (98.0{\%}; 94.5{\%} on hard puzzles), while the best reasoning models, such as o4-mini, struggle (15.8{\%}; 1.1{\%} on hard puzzles). Models often generate invalid paths ({\ensuremath{>}}50{\%} of puzzles for o4-mini), and reasoning tokens reveal they make errors in navigation and spatial logic. Unlike humans, who take longer on hard puzzles, models fail to scale test-time compute with difficulty. Allowing models to make multiple solution attempts improves accuracy, suggesting potential for better spatial reasoning with improved training and efficient test-time scaling methods. SPaRC can be used as a window into models' spatial reasoning limitations and drive research toward new methods that excel in abstract, multi-step problem-solving." }