xulh commited on
Commit
5b6514f
·
1 Parent(s): a1df442

代码初始化

Browse files
Files changed (2) hide show
  1. inference/inference.py +31 -1
  2. requirements.txt +2 -0
inference/inference.py CHANGED
@@ -1,7 +1,9 @@
 
1
  import httpx
2
  from fastapi import APIRouter, Header, HTTPException
3
  from .apiModel import Payload
4
- import time
 
5
 
6
  router = APIRouter()
7
 
@@ -26,6 +28,34 @@ async def fetch_model_response(payload: dict, headers: dict):
26
  raise HTTPException(status_code=response.status_code, detail=f"HTTP 错误: {e}")
27
 
28
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
  @router.post("/api-inference/")
30
  async def api_inference(
31
  authorization: str = Header(...),
 
1
+ import asyncio
2
  import httpx
3
  from fastapi import APIRouter, Header, HTTPException
4
  from .apiModel import Payload
5
+ import transformers
6
+ import torch
7
 
8
  router = APIRouter()
9
 
 
28
  raise HTTPException(status_code=response.status_code, detail=f"HTTP 错误: {e}")
29
 
30
 
31
+ @router.post("/api-llama/")
32
+ async def api_inference(
33
+ authorization: str = Header(...),
34
+ item: Payload = None):
35
+ print("请求:", item)
36
+ model_id = "meta-llama/Meta-Llama-3.1-8B-Instruct"
37
+ # 设置请求头
38
+ pipeline = transformers.pipeline(
39
+ "text-generation",
40
+ model=model_id,
41
+ model_kwargs={"torch_dtype": torch.bfloat16},
42
+ device_map="auto",
43
+ )
44
+
45
+ messages = [
46
+ {"role": "system", "content": "你是一个万能聊天机器人,能准确回答每一个提出的问题"},
47
+ {"role": "user", "content": "你是谁?"},
48
+ ]
49
+
50
+ outputs = pipeline(
51
+ messages,
52
+ max_new_tokens=256,
53
+ )
54
+
55
+ # 使用异步请求
56
+ return outputs
57
+
58
+
59
  @router.post("/api-inference/")
60
  async def api_inference(
61
  authorization: str = Header(...),
requirements.txt CHANGED
@@ -1,4 +1,6 @@
1
  fastapi
2
  httpx
3
  requests
 
 
4
  uvicorn[standard]
 
1
  fastapi
2
  httpx
3
  requests
4
+ transformers
5
+ torch
6
  uvicorn[standard]