File size: 4,676 Bytes
5b6514f 4060252 69d5d0e 73400df 69d5d0e 838d93c caaf84d 838d93c a1df442 838d93c 4060252 3ae1a20 69d5d0e 5b6514f bb1d3e2 69d5d0e 9cad8c3 69d5d0e 5b6514f 69d5d0e 5b6514f 69d5d0e 5b6514f 3ae1a20 7f611b8 3ae1a20 9a0053e 9fd5bac b30a245 3ae1a20 7f611b8 3ae1a20 9a0053e 3ae1a20 1e5f011 adf9187 1e5f011 df22e8a 1e5f011 caaf84d 89ea1b2 caaf84d 4060252 89ea1b2 4060252 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 |
import asyncio
import httpx
from fastapi import APIRouter, Header, HTTPException, Body
from .apiModel import Payload
from huggingface_hub import InferenceClient
router = APIRouter()
API_URL = "https://api-inference.huggingface.co/models/cardiffnlp/meta-llama/Llama-3.1-8B-Instruct"
# 使用httpx异步请求
async def fetch_model_response(payload: dict, headers: dict):
async with httpx.AsyncClient() as client:
try:
response = await client.post(API_URL, headers=headers, json=payload)
if response.status_code == 503:
# 如果模型正在加载,等待并重试
print("模型加载中,等待中...")
await asyncio.sleep(20) # 等待20秒
return await fetch_model_response(payload, headers) # 重试请求
response.raise_for_status() # 如果返回错误状态码,会抛出异常
return response.json()
except httpx.RequestError as e:
raise HTTPException(status_code=500, detail=f"请求错误: {e}")
except httpx.HTTPStatusError as e:
raise HTTPException(status_code=response.status_code, detail=f"HTTP 错误: {e}")
@router.post("/chat-completion-academic/")
async def chat_completion(token: str = Body(...), messages: list = Body(...)):
try:
# 创建 InferenceClient
client = InferenceClient(api_key=token)
messages.append({
"role": "system",
"content": "You are a multilingual chatbot capable of understanding questions in various languages and "
"providing accurate responses in the appropriate language."
})
# 使用 chat API 请求生成模型的回答
completion = client.chat.completions.create(
model="Qwen/QwQ-32B-Preview",
messages=messages,
max_tokens=500
)
# 返回对话信息
return {"message": completion.choices[0].message}
except Exception as e:
raise HTTPException(status_code=500, detail=f"Error generating chat completion: {str(e)}")
@router.post("/chat-completion/")
async def chat_completion(token: str = Body(...), messages: list = Body(...)):
try:
# 创建 InferenceClient
client = InferenceClient(api_key=token)
print("问题:", messages)
messages.append({
"role": "system",
"content": "You are a multilingual chatbot capable of understanding questions in various languages and "
"providing accurate responses in the appropriate language. However, all questions must be "
"answered in Chinese."
})
# 使用 chat API 请求生成模型的回答 备选模型 google/gemma-2-2b-it
completion = client.chat.completions.create(
model="google/gemma-2-2b-it",
messages=messages,
max_tokens=500
)
# 返回对话信息
return {"message": completion.choices[0].message}
except Exception as e:
raise HTTPException(status_code=500, detail=f"Error generating chat completion: {str(e)}")
@router.post("/format-prompt/")
async def chat_completion(token: str = Body(...), messages: list = Body(...)):
try:
# 创建 InferenceClient
client = InferenceClient(api_key=token)
print("问题:", messages)
messages.append({
"role": "system",
"content": "You are a highly intelligent image generation text optimizer. Your role is to enhance the "
"user's input prompt, ensuring it is precise, vivid, and detailed, while maintaining its "
"original intent. Always provide the enhanced version in English. Only reply with the "
"optimized prompt."
})
# 使用 chat API 请求生成模型的回答 备选模型 google/gemma-2-2b-it
completion = client.chat.completions.create(
model="google/gemma-2-2b-it",
messages=messages,
max_tokens=500
)
# 返回对话信息
return {"message": completion.choices[0].message}
except Exception as e:
raise HTTPException(status_code=500, detail=f"Error generating chat completion: {str(e)}")
@router.post("/api-inference/")
async def api_inference(
authorization: str = Header(...),
item: Payload = None):
print("请求:", item)
# 设置请求头
headers = {"Authorization": authorization}
# 使用异步请求
response_data = await fetch_model_response(item.dict(), headers)
return response_data
|