--extra-index-url https://download.pytorch.org/whl/cu124   # grab a CUDA Torch wheel
torch==2.5.1+cu124                                         # keep before flash-attn

# FlashAttention pre-built wheel that matches:  Torch 2.5  •  CUDA 12  •  cp310
https://github.com/Dao-AILab/flash-attention/releases/download/v2.8.0.post2/flash_attn-2.8.0.post2+cu12torch2.5cxx11abiFALSE-cp310-cp310-linux_x86_64.whl  # <- 240 MB wheel:contentReference[oaicite:2]{index=2}

transformers>=4.52.0
accelerate>=0.30.2
bitsandbytes==0.43.3
peft==0.15.2
gradio>=4.44.0
sentencepiece