import gradio as gr from transformers import AutoModelForCausalLM, AutoTokenizer import torch # 加载模型和分词器 model_name = "taide/Llama3-TAIDE-LX-8B-Chat-Alpha1" tokenizer = AutoTokenizer.from_pretrained(model_name) model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.float16, device_map="auto") # 定义推理函数 def generate_text(prompt): inputs = tokenizer(prompt, return_tensors="pt").to("cuda") outputs = model.generate(**inputs) return tokenizer.decode(outputs[0], skip_special_tokens=True) # 创建 Gradio 界面 interface = gr.Interface( fn=generate_text, inputs="text", outputs="text", title="Llama3 Chatbot", description="与 Llama3-TAIDE-LX-8B-Chat-Alpha1 交谈" ) # 启动 Gradio 应用 interface.launch()