Spaces:

xulh
/

ymx

Sleeping

xulh commited on Dec 13, 2024

Commit

69d5d0e

1 Parent(s): 01ff4cd

代码初始化

Files changed (1) hide show

inference/inference.py CHANGED Viewed

@@ -1,9 +1,8 @@
 import asyncio
 import httpx
-from fastapi import APIRouter, Header, HTTPException
 from .apiModel import Payload
-import transformers
-import torch
 router = APIRouter()
@@ -28,32 +27,24 @@ async def fetch_model_response(payload: dict, headers: dict):
             raise HTTPException(status_code=response.status_code, detail=f"HTTP 错误: {e}")
-@router.post("/api-llama/")
-async def api_inference(
-        authorization: str = Header(...),
-        item: Payload = None):
-    print("请求：", item)
-    model_id = "meta-llama/Meta-Llama-3.1-8B-Instruct"
-    # 设置请求头
-    pipeline = transformers.pipeline(
-        "text-generation",
-        model=model_id,
-        model_kwargs={"torch_dtype": torch.bfloat16},
-        device_map="auto",
-    )
-    messages = [
-        {"role": "system", "content": "你是一个万能聊天机器人，能准确回答每一个提出的问题"},
-        {"role": "user", "content": "你是谁?"},
-    ]
-    outputs = pipeline(
-        messages,
-        max_new_tokens=256,
-    )
-    # 使用异步请求
-    return outputs
 @router.post("/api-inference/")

 import asyncio
 import httpx
+from fastapi import APIRouter, Header, HTTPException, Body
 from .apiModel import Payload
+from huggingface_hub import InferenceClient
 router = APIRouter()
             raise HTTPException(status_code=response.status_code, detail=f"HTTP 错误: {e}")
+@router.post("/chat-completion/")
+async def chat_completion(token: str = Body(...), messages: list = Body(...)):
+    try:
+        # 创建 InferenceClient
+        client = InferenceClient(api_key=token)
+        # 使用 chat API 请求生成模型的回答
+        completion = client.chat.completions.create(
+            model="meta-llama/Llama-3.1-8B-Instruct",
+            messages=messages,
+            max_tokens=500
+        )
+        # 返回对话信息
+        return {"message": completion.choices[0].message}
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=f"Error generating chat completion: {str(e)}")
 @router.post("/api-inference/")