import asyncio import httpx from fastapi import APIRouter, Header, HTTPException, Body from .apiModel import Payload from huggingface_hub import InferenceClient router = APIRouter() API_URL = "https://api-inference.huggingface.co/models/cardiffnlp/meta-llama/Llama-3.1-8B-Instruct" # 使用httpx异步请求 async def fetch_model_response(payload: dict, headers: dict): async with httpx.AsyncClient() as client: try: response = await client.post(API_URL, headers=headers, json=payload) if response.status_code == 503: # 如果模型正在加载,等待并重试 print("模型加载中,等待中...") await asyncio.sleep(20) # 等待20秒 return await fetch_model_response(payload, headers) # 重试请求 response.raise_for_status() # 如果返回错误状态码,会抛出异常 return response.json() except httpx.RequestError as e: raise HTTPException(status_code=500, detail=f"请求错误: {e}") except httpx.HTTPStatusError as e: raise HTTPException(status_code=response.status_code, detail=f"HTTP 错误: {e}") @router.post("/chat-completion-academic/") async def chat_completion(token: str = Body(...), messages: list = Body(...)): try: # 创建 InferenceClient client = InferenceClient(api_key=token) messages.append({ "role": "system", "content": "You are a multilingual chatbot capable of understanding questions in various languages and " "providing accurate responses in the appropriate language." }) # 使用 chat API 请求生成模型的回答 completion = client.chat.completions.create( model="Qwen/QwQ-32B-Preview", messages=messages, max_tokens=500 ) # 返回对话信息 return {"message": completion.choices[0].message} except Exception as e: raise HTTPException(status_code=500, detail=f"Error generating chat completion: {str(e)}") @router.post("/chat-completion/") async def chat_completion(token: str = Body(...), messages: list = Body(...)): try: # 创建 InferenceClient client = InferenceClient(api_key=token) print("问题:", messages) messages.append({ "role": "system", "content": "You are a multilingual chatbot capable of understanding questions in various languages and " "providing accurate responses in the appropriate language. However, all questions must be " "answered in Chinese." }) # 使用 chat API 请求生成模型的回答 备选模型 google/gemma-2-2b-it completion = client.chat.completions.create( model="google/gemma-2-2b-it", messages=messages, max_tokens=500 ) # 返回对话信息 return {"message": completion.choices[0].message} except Exception as e: raise HTTPException(status_code=500, detail=f"Error generating chat completion: {str(e)}") @router.post("/format-prompt/") async def chat_completion(token: str = Body(...), messages: list = Body(...)): try: # 创建 InferenceClient client = InferenceClient(api_key=token) print("问题:", messages) messages.append({ "role": "system", "content": "You are a highly intelligent image generation text optimizer. Your role is to enhance the " "user's input prompt, ensuring it is precise, vivid, and detailed, while maintaining its " "original intent. Always provide the enhanced version in English. Only reply with the " "optimized prompt." }) # 使用 chat API 请求生成模型的回答 备选模型 google/gemma-2-2b-it completion = client.chat.completions.create( model="google/gemma-2-2b-it", messages=messages, max_tokens=500 ) # 返回对话信息 return {"message": completion.choices[0].message} except Exception as e: raise HTTPException(status_code=500, detail=f"Error generating chat completion: {str(e)}") @router.post("/api-inference/") async def api_inference( authorization: str = Header(...), item: Payload = None): print("请求:", item) # 设置请求头 headers = {"Authorization": authorization} # 使用异步请求 response_data = await fetch_model_response(item.dict(), headers) return response_data