import asyncio
import httpx
from fastapi import APIRouter, Header, HTTPException, Body
from .apiModel import Payload
from huggingface_hub import InferenceClient

router = APIRouter()

API_URL = "https://api-inference.huggingface.co/models/cardiffnlp/meta-llama/Llama-3.1-8B-Instruct"


# 使用httpx异步请求
async def fetch_model_response(payload: dict, headers: dict):
    async with httpx.AsyncClient() as client:
        try:
            response = await client.post(API_URL, headers=headers, json=payload)
            if response.status_code == 503:
                # 如果模型正在加载，等待并重试
                print("模型加载中，等待中...")
                await asyncio.sleep(20)  # 等待20秒
                return await fetch_model_response(payload, headers)  # 重试请求
            response.raise_for_status()  # 如果返回错误状态码，会抛出异常
            return response.json()
        except httpx.RequestError as e:
            raise HTTPException(status_code=500, detail=f"请求错误: {e}")
        except httpx.HTTPStatusError as e:
            raise HTTPException(status_code=response.status_code, detail=f"HTTP 错误: {e}")


@router.post("/chat-completion-academic/")
async def chat_completion(token: str = Body(...), messages: list = Body(...)):
    try:
        # 创建 InferenceClient
        client = InferenceClient(api_key=token)

        messages.append({
            "role": "system",
            "content": "You are a multilingual chatbot capable of understanding questions in various languages and "
                       "providing accurate responses in the appropriate language."
        })

        # 使用 chat API 请求生成模型的回答
        completion = client.chat.completions.create(
            model="Qwen/QwQ-32B-Preview",
            messages=messages,
            max_tokens=500
        )

        # 返回对话信息
        return {"message": completion.choices[0].message}

    except Exception as e:
        raise HTTPException(status_code=500, detail=f"Error generating chat completion: {str(e)}")


@router.post("/chat-completion/")
async def chat_completion(token: str = Body(...), messages: list = Body(...)):
    try:
        # 创建 InferenceClient
        client = InferenceClient(api_key=token)
        print("问题：", messages)
        messages.append({
            "role": "system",
            "content": "You are a multilingual chatbot capable of understanding questions in various languages and "
                       "providing accurate responses in the appropriate language. However, all questions must be "
                       "answered in Chinese."
        })

        # 使用 chat API 请求生成模型的回答  备选模型  google/gemma-2-2b-it
        completion = client.chat.completions.create(
            model="google/gemma-2-2b-it",
            messages=messages,
            max_tokens=500
        )

        # 返回对话信息
        return {"message": completion.choices[0].message}

    except Exception as e:
        raise HTTPException(status_code=500, detail=f"Error generating chat completion: {str(e)}")


@router.post("/format-prompt/")
async def chat_completion(token: str = Body(...), messages: list = Body(...)):
    try:
        # 创建 InferenceClient
        client = InferenceClient(api_key=token)
        print("问题：", messages)
        messages.append({
            "role": "system",
            "content": "You are a highly intelligent image generation text optimizer. Your role is to enhance the "
                       "user's input prompt, ensuring it is precise, vivid, and detailed, while maintaining its "
                       "original intent. Always provide the enhanced version in English. Only reply with the "
                       "optimized prompt."
        })

        # 使用 chat API 请求生成模型的回答  备选模型  google/gemma-2-2b-it
        completion = client.chat.completions.create(
            model="google/gemma-2-2b-it",
            messages=messages,
            max_tokens=500
        )

        # 返回对话信息
        return {"message": completion.choices[0].message}

    except Exception as e:
        raise HTTPException(status_code=500, detail=f"Error generating chat completion: {str(e)}")


@router.post("/api-inference/")
async def api_inference(
        authorization: str = Header(...),
        item: Payload = None):
    print("请求：", item)

    # 设置请求头
    headers = {"Authorization": authorization}

    # 使用异步请求
    response_data = await fetch_model_response(item.dict(), headers)

    return response_data