--extra-index-url https://download.pytorch.org/whl/cu124 # grab a CUDA Torch wheel torch==2.5.1+cu124 # keep before flash-attn # FlashAttention pre-built wheel that matches: Torch 2.5 • CUDA 12 • cp310 https://github.com/Dao-AILab/flash-attention/releases/download/v2.8.0.post2/flash_attn-2.8.0.post2+cu12torch2.5cxx11abiFALSE-cp310-cp310-linux_x86_64.whl # <- 240 MB wheel:contentReference[oaicite:2]{index=2} transformers>=4.52.0 accelerate>=0.30.2 bitsandbytes==0.43.3 peft==0.15.2 gradio>=4.44.0 sentencepiece