import gradio as gr gr.Interface.load("models/tsumeone/llama-30b-supercot-4bit-cuda").launch()