datasets>=4.5.0 datatrove[io]>=0.9.0 duckdb>=1.4.4 httpx jinja2>=3.1.6 openai>=1.0 markdownify pyarrow>=23.0.1 pydantic>=2.12.5 markdown-it-py>=4.0.0 rich>=14.3.3 loguru python-dotenv huggingface-hub pandas requests markupsafe fsspec transformers>=4.40 tiktoken>=0.12.0 tenacity>=9.1.4 pillow>=12.2.0 nh3>=0.2