oo / app.py
jljiu's picture
Update app.py
181ba1b verified
import gradio as gr
import requests
import os
# Ollama API地址 - 使用内部地址
OLLAMA_API_URL = "http://127.0.0.1:11434/api/generate"
def generate_text(prompt):
data = {
"model": "llama3-zh",
"prompt": prompt,
"stream": False
}
try:
# 添加重试机制
max_retries = 3
for attempt in range(max_retries):
try:
response = requests.post(
OLLAMA_API_URL,
json=data,
timeout=120,
proxies={'http': None, 'https': None} # 禁用代理
)
print(f"Attempt {attempt + 1}: Status {response.status_code}")
if response.status_code == 200:
return response.json()["response"]
elif response.status_code == 404:
return "错误:模型未找到,请等待模型加载完成后重试"
else:
if attempt == max_retries - 1:
return f"错误:{response.status_code} - {response.text}"
except requests.exceptions.ConnectionError:
if attempt == max_retries - 1:
return "错误:无法连接到 Ollama 服务"
print(f"连接失败,尝试重试 {attempt + 1}/{max_retries}")
time.sleep(2) # 等待2秒后重试
except Exception as e:
return f"错误:{str(e)}"
# Gradio界面
def chat_interface(prompt):
if not prompt.strip():
return "请输入有效的问题"
return generate_text(prompt)
# 创建Gradio应用
iface = gr.Interface(
fn=chat_interface,
inputs=gr.Textbox(
lines=3,
placeholder="请输入您的问题...",
label="输入"
),
outputs=gr.Textbox(
lines=5,
label="回答"
),
title="Llama3.1-8B-Chinese-Chat (CPU)",
description="与 Llama3.1-8B-Chinese-Chat 模型对话(CPU 模式)",
examples=[
["你好,请做个自我介绍"],
["解释一下量子计算的基本原理"],
["写一首关于春天的诗"]
]
)
# 启动应用
if __name__ == "__main__":
iface.launch(
server_name="0.0.0.0",
server_port=7860,
max_threads=1
)