|
import gradio as gr |
|
import requests |
|
import os |
|
|
|
|
|
OLLAMA_API_URL = "http://127.0.0.1:11434/api/generate" |
|
|
|
def generate_text(prompt): |
|
data = { |
|
"model": "llama3-zh", |
|
"prompt": prompt, |
|
"stream": False |
|
} |
|
try: |
|
|
|
max_retries = 3 |
|
for attempt in range(max_retries): |
|
try: |
|
response = requests.post( |
|
OLLAMA_API_URL, |
|
json=data, |
|
timeout=120, |
|
proxies={'http': None, 'https': None} |
|
) |
|
print(f"Attempt {attempt + 1}: Status {response.status_code}") |
|
|
|
if response.status_code == 200: |
|
return response.json()["response"] |
|
elif response.status_code == 404: |
|
return "错误:模型未找到,请等待模型加载完成后重试" |
|
else: |
|
if attempt == max_retries - 1: |
|
return f"错误:{response.status_code} - {response.text}" |
|
except requests.exceptions.ConnectionError: |
|
if attempt == max_retries - 1: |
|
return "错误:无法连接到 Ollama 服务" |
|
print(f"连接失败,尝试重试 {attempt + 1}/{max_retries}") |
|
time.sleep(2) |
|
|
|
except Exception as e: |
|
return f"错误:{str(e)}" |
|
|
|
|
|
def chat_interface(prompt): |
|
if not prompt.strip(): |
|
return "请输入有效的问题" |
|
return generate_text(prompt) |
|
|
|
|
|
iface = gr.Interface( |
|
fn=chat_interface, |
|
inputs=gr.Textbox( |
|
lines=3, |
|
placeholder="请输入您的问题...", |
|
label="输入" |
|
), |
|
outputs=gr.Textbox( |
|
lines=5, |
|
label="回答" |
|
), |
|
title="Llama3.1-8B-Chinese-Chat (CPU)", |
|
description="与 Llama3.1-8B-Chinese-Chat 模型对话(CPU 模式)", |
|
examples=[ |
|
["你好,请做个自我介绍"], |
|
["解释一下量子计算的基本原理"], |
|
["写一首关于春天的诗"] |
|
] |
|
) |
|
|
|
|
|
if __name__ == "__main__": |
|
iface.launch( |
|
server_name="0.0.0.0", |
|
server_port=7860, |
|
max_threads=1 |
|
) |