File size: 3,366 Bytes
5b6514f 4060252 69d5d0e 73400df 69d5d0e 838d93c caaf84d 838d93c a1df442 838d93c 4060252 3ae1a20 69d5d0e 5b6514f bb1d3e2 69d5d0e 9cad8c3 69d5d0e 5b6514f 69d5d0e 5b6514f 69d5d0e 5b6514f 3ae1a20 caaf84d 89ea1b2 caaf84d 4060252 89ea1b2 4060252 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 |
import asyncio
import httpx
from fastapi import APIRouter, Header, HTTPException, Body
from .apiModel import Payload
from huggingface_hub import InferenceClient
router = APIRouter()
API_URL = "https://api-inference.huggingface.co/models/cardiffnlp/meta-llama/Llama-3.1-8B-Instruct"
# 使用httpx异步请求
async def fetch_model_response(payload: dict, headers: dict):
async with httpx.AsyncClient() as client:
try:
response = await client.post(API_URL, headers=headers, json=payload)
if response.status_code == 503:
# 如果模型正在加载,等待并重试
print("模型加载中,等待中...")
await asyncio.sleep(20) # 等待20秒
return await fetch_model_response(payload, headers) # 重试请求
response.raise_for_status() # 如果返回错误状态码,会抛出异常
return response.json()
except httpx.RequestError as e:
raise HTTPException(status_code=500, detail=f"请求错误: {e}")
except httpx.HTTPStatusError as e:
raise HTTPException(status_code=response.status_code, detail=f"HTTP 错误: {e}")
@router.post("/chat-completion-academic/")
async def chat_completion(token: str = Body(...), messages: list = Body(...)):
try:
# 创建 InferenceClient
client = InferenceClient(api_key=token)
messages.append({
"role": "system",
"content": "You are a multilingual chatbot capable of understanding questions in various languages and "
"providing accurate responses in the appropriate language."
})
# 使用 chat API 请求生成模型的回答
completion = client.chat.completions.create(
model="Qwen/QwQ-32B-Preview",
messages=messages,
max_tokens=500
)
# 返回对话信息
return {"message": completion.choices[0].message}
except Exception as e:
raise HTTPException(status_code=500, detail=f"Error generating chat completion: {str(e)}")
@router.post("/chat-completion/")
async def chat_completion(token: str = Body(...), messages: list = Body(...)):
try:
# 创建 InferenceClient
client = InferenceClient(api_key=token)
messages.append({
"role": "system",
"content": "You are a multilingual chatbot capable of understanding questions in various languages and "
"providing accurate responses in the appropriate language."
})
# 使用 chat API 请求生成模型的回答
completion = client.chat.completions.create(
model="google/gemma-2-2b-it",
messages=messages,
max_tokens=500
)
# 返回对话信息
return {"message": completion.choices[0].message}
except Exception as e:
raise HTTPException(status_code=500, detail=f"Error generating chat completion: {str(e)}")
@router.post("/api-inference/")
async def api_inference(
authorization: str = Header(...),
item: Payload = None):
print("请求:", item)
# 设置请求头
headers = {"Authorization": authorization}
# 使用异步请求
response_data = await fetch_model_response(item.dict(), headers)
return response_data
|