@article{kandpal2025commonpile, title={{The Common Pile v0.1: An 8TB Dataset of Public Domain and Openly Licensed Text}}, author={Nikhil Kandpal and Brian Lester and Colin Raffel and Sebastian Majstorovic and Stella Biderman and Baber Abbasi and Luca Soldaini and Enrico Shippole and A. Feder Cooper and Aviya Skowron and John Kirchenbauer and Shayne Longpre and Lintang Sutawika and Alon Albalak and Zhenlin Xu and Guilherme Penedo and Loubna Ben Allal and Elie Bakouch and John David Pressman and Honglu Fan and Dashiell Stander and Guangyu Song and Aaron Gokaslan and Tom Goldstein and Brian R. Bartoldson and Bhavya Kailkhura and Tyler Murray}, year={2025}, journal={arXiv preprint arXiv:2506.05209}, }