
somosnlp-hackathon-2025/mistral-7b-gastronomia-hispana-qlora-GGUF
Updated
•
34
torchao
Int8WeightOnlyConfig
is already working flawlessly in our tests.import spaces
from diffusers import FluxPipeline
from torchao.quantization.quant_api import Int8WeightOnlyConfig, quantize_
pipeline = FluxPipeline.from_pretrained(...).to('cuda')
quantize_(pipeline.transformer, Int8WeightOnlyConfig()) # Or any other component(s)
@spaces.GPU
def generate(prompt: str):
return pipeline(prompt).images[0]