Spaces:

HelloSun
/

Qwen2.5-0.5B-Instruct-openvino

Sleeping

File size: 1,154 Bytes

ee73d54
 
6d1bf41
a68f59e
6d1bf41
3b22ee7
6d1bf41
3406aa1
6d1bf41
ee73d54
3b22ee7
 
ee73d54
222e75c
 
b984cf2
bb0ec8e
3b22ee7
899ec6d
d2a3b3c
 
 
222e75c
 
3188082
222e75c
3b22ee7
b984cf2
ee73d54
 
222e75c

import gradio as gr
from huggingface_hub import InferenceClient
from optimum.intel import OVModelForCausalLM
from transformers import AutoTokenizer, pipeline

# 載入模型和標記器
model_id = "HelloSun/Qwen2.5-0.5B-Instruct-openvino"
model = OVModelForCausalLM.from_pretrained(model_id, device_map="auto")
tokenizer = AutoTokenizer.from_pretrained(model_id)

# 建立生成管道
pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)

def respond(message, history):
    # 將當前訊息與歷史訊息合併
    input_text = message if not history else history[-1]["content"] + " " + message
    input_text = message
    # 獲取模型的回應
    response = pipe(input_text, max_length=500, truncation=True, num_return_sequences=1)
    reply = response[0]['generated_text']
    
    # 返回新的消息格式
    print(f"Message: {message}")
    print(f"Reply: {reply}")
    return reply
    
# 設定 Gradio 的聊天界面
demo = gr.ChatInterface(fn=respond, title="Chat with Qwen(通義千問) 2.5-0.5B", description="與 Qwen2.5-0.5B-Instruct-openvino 聊天！", type='messages')

if __name__ == "__main__":
    demo.launch()