File size: 4,676 Bytes
5b6514f
4060252
69d5d0e
73400df
69d5d0e
838d93c
caaf84d
838d93c
a1df442
838d93c
 
4060252
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3ae1a20
69d5d0e
 
 
 
5b6514f
bb1d3e2
 
 
 
 
 
69d5d0e
 
9cad8c3
69d5d0e
 
 
5b6514f
69d5d0e
 
5b6514f
69d5d0e
 
5b6514f
 
3ae1a20
 
 
 
 
7f611b8
3ae1a20
9a0053e
9fd5bac
 
 
b30a245
3ae1a20
7f611b8
3ae1a20
9a0053e
3ae1a20
 
 
 
 
 
 
 
 
 
 
1e5f011
 
 
 
 
 
 
 
adf9187
 
 
 
1e5f011
 
 
 
df22e8a
1e5f011
 
 
 
 
 
 
 
 
 
 
caaf84d
89ea1b2
 
caaf84d
 
4060252
 
89ea1b2
4060252
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
import asyncio
import httpx
from fastapi import APIRouter, Header, HTTPException, Body
from .apiModel import Payload
from huggingface_hub import InferenceClient

router = APIRouter()

API_URL = "https://api-inference.huggingface.co/models/cardiffnlp/meta-llama/Llama-3.1-8B-Instruct"


# 使用httpx异步请求
async def fetch_model_response(payload: dict, headers: dict):
    async with httpx.AsyncClient() as client:
        try:
            response = await client.post(API_URL, headers=headers, json=payload)
            if response.status_code == 503:
                # 如果模型正在加载,等待并重试
                print("模型加载中,等待中...")
                await asyncio.sleep(20)  # 等待20秒
                return await fetch_model_response(payload, headers)  # 重试请求
            response.raise_for_status()  # 如果返回错误状态码,会抛出异常
            return response.json()
        except httpx.RequestError as e:
            raise HTTPException(status_code=500, detail=f"请求错误: {e}")
        except httpx.HTTPStatusError as e:
            raise HTTPException(status_code=response.status_code, detail=f"HTTP 错误: {e}")


@router.post("/chat-completion-academic/")
async def chat_completion(token: str = Body(...), messages: list = Body(...)):
    try:
        # 创建 InferenceClient
        client = InferenceClient(api_key=token)

        messages.append({
            "role": "system",
            "content": "You are a multilingual chatbot capable of understanding questions in various languages and "
                       "providing accurate responses in the appropriate language."
        })

        # 使用 chat API 请求生成模型的回答
        completion = client.chat.completions.create(
            model="Qwen/QwQ-32B-Preview",
            messages=messages,
            max_tokens=500
        )

        # 返回对话信息
        return {"message": completion.choices[0].message}

    except Exception as e:
        raise HTTPException(status_code=500, detail=f"Error generating chat completion: {str(e)}")


@router.post("/chat-completion/")
async def chat_completion(token: str = Body(...), messages: list = Body(...)):
    try:
        # 创建 InferenceClient
        client = InferenceClient(api_key=token)
        print("问题:", messages)
        messages.append({
            "role": "system",
            "content": "You are a multilingual chatbot capable of understanding questions in various languages and "
                       "providing accurate responses in the appropriate language. However, all questions must be "
                       "answered in Chinese."
        })

        # 使用 chat API 请求生成模型的回答  备选模型  google/gemma-2-2b-it
        completion = client.chat.completions.create(
            model="google/gemma-2-2b-it",
            messages=messages,
            max_tokens=500
        )

        # 返回对话信息
        return {"message": completion.choices[0].message}

    except Exception as e:
        raise HTTPException(status_code=500, detail=f"Error generating chat completion: {str(e)}")


@router.post("/format-prompt/")
async def chat_completion(token: str = Body(...), messages: list = Body(...)):
    try:
        # 创建 InferenceClient
        client = InferenceClient(api_key=token)
        print("问题:", messages)
        messages.append({
            "role": "system",
            "content": "You are a highly intelligent image generation text optimizer. Your role is to enhance the "
                       "user's input prompt, ensuring it is precise, vivid, and detailed, while maintaining its "
                       "original intent. Always provide the enhanced version in English. Only reply with the "
                       "optimized prompt."
        })

        # 使用 chat API 请求生成模型的回答  备选模型  google/gemma-2-2b-it
        completion = client.chat.completions.create(
            model="google/gemma-2-2b-it",
            messages=messages,
            max_tokens=500
        )

        # 返回对话信息
        return {"message": completion.choices[0].message}

    except Exception as e:
        raise HTTPException(status_code=500, detail=f"Error generating chat completion: {str(e)}")


@router.post("/api-inference/")
async def api_inference(
        authorization: str = Header(...),
        item: Payload = None):
    print("请求:", item)

    # 设置请求头
    headers = {"Authorization": authorization}

    # 使用异步请求
    response_data = await fetch_model_response(item.dict(), headers)

    return response_data