|
import asyncio |
|
import httpx |
|
from fastapi import APIRouter, Header, HTTPException, Body |
|
from .apiModel import Payload |
|
from huggingface_hub import InferenceClient |
|
|
|
router = APIRouter() |
|
|
|
API_URL = "https://api-inference.huggingface.co/models/cardiffnlp/meta-llama/Llama-3.1-8B-Instruct" |
|
|
|
|
|
|
|
async def fetch_model_response(payload: dict, headers: dict): |
|
async with httpx.AsyncClient() as client: |
|
try: |
|
response = await client.post(API_URL, headers=headers, json=payload) |
|
if response.status_code == 503: |
|
|
|
print("模型加载中,等待中...") |
|
await asyncio.sleep(20) |
|
return await fetch_model_response(payload, headers) |
|
response.raise_for_status() |
|
return response.json() |
|
except httpx.RequestError as e: |
|
raise HTTPException(status_code=500, detail=f"请求错误: {e}") |
|
except httpx.HTTPStatusError as e: |
|
raise HTTPException(status_code=response.status_code, detail=f"HTTP 错误: {e}") |
|
|
|
|
|
@router.post("/chat-completion-academic/") |
|
async def chat_completion(token: str = Body(...), messages: list = Body(...)): |
|
try: |
|
|
|
client = InferenceClient(api_key=token) |
|
|
|
messages.append({ |
|
"role": "system", |
|
"content": "You are a multilingual chatbot capable of understanding questions in various languages and " |
|
"providing accurate responses in the appropriate language." |
|
}) |
|
|
|
|
|
completion = client.chat.completions.create( |
|
model="Qwen/QwQ-32B-Preview", |
|
messages=messages, |
|
max_tokens=500 |
|
) |
|
|
|
|
|
return {"message": completion.choices[0].message} |
|
|
|
except Exception as e: |
|
raise HTTPException(status_code=500, detail=f"Error generating chat completion: {str(e)}") |
|
|
|
|
|
@router.post("/chat-completion/") |
|
async def chat_completion(token: str = Body(...), messages: list = Body(...)): |
|
try: |
|
|
|
client = InferenceClient(api_key=token) |
|
print("问题:", messages) |
|
messages.append({ |
|
"role": "system", |
|
"content": "You are a multilingual chatbot capable of understanding questions in various languages and " |
|
"providing accurate responses in the appropriate language. However, all questions must be " |
|
"answered in Chinese." |
|
}) |
|
|
|
|
|
completion = client.chat.completions.create( |
|
model="google/gemma-2-2b-it", |
|
messages=messages, |
|
max_tokens=500 |
|
) |
|
|
|
|
|
return {"message": completion.choices[0].message} |
|
|
|
except Exception as e: |
|
raise HTTPException(status_code=500, detail=f"Error generating chat completion: {str(e)}") |
|
|
|
|
|
@router.post("/format-prompt/") |
|
async def chat_completion(token: str = Body(...), messages: list = Body(...)): |
|
try: |
|
|
|
client = InferenceClient(api_key=token) |
|
print("问题:", messages) |
|
messages.append({ |
|
"role": "system", |
|
"content": "You are a highly intelligent image generation text optimizer. Your role is to enhance the " |
|
"user's input prompt, ensuring it is precise, vivid, and detailed, while maintaining its " |
|
"original intent. Always provide the enhanced version in English. Only reply with the " |
|
"optimized prompt." |
|
}) |
|
|
|
|
|
completion = client.chat.completions.create( |
|
model="google/gemma-2-2b-it", |
|
messages=messages, |
|
max_tokens=500 |
|
) |
|
|
|
|
|
return {"message": completion.choices[0].message} |
|
|
|
except Exception as e: |
|
raise HTTPException(status_code=500, detail=f"Error generating chat completion: {str(e)}") |
|
|
|
|
|
@router.post("/api-inference/") |
|
async def api_inference( |
|
authorization: str = Header(...), |
|
item: Payload = None): |
|
print("请求:", item) |
|
|
|
|
|
headers = {"Authorization": authorization} |
|
|
|
|
|
response_data = await fetch_model_response(item.dict(), headers) |
|
|
|
return response_data |
|
|