Spaces:

scott12355
/

DocketTest

Running

File size: 1,449 Bytes

761e190
838117b
8dcf124
07d17d6
 
8dcf124
 
 
 
 
 
 
6f8a9f6
9ffdd96
c8ad2f6
10f7bb7
e0f84f3
8dcf124
 
07d17d6
 
 
8dcf124
c8ad2f6
9ffdd96
 
6f8a9f6
838117b
0298a9e
838117b
 
 
0298a9e
838117b
 
2e8f59e
838117b
8dcf124
 
 
 
 
b86525b
07d17d6

from fastapi import FastAPI
from transformers import pipeline
import torch
from pydantic import BaseModel
from typing import List, Dict
if torch.backends.mps.is_available():
    device = torch.device("mps")
elif torch.cuda.is_available():
    device = torch.device("cuda")
else:
    device = torch.device("cpu")
print(device)

app = FastAPI()

modelName = "Qwen/Qwen2.5-1.5B-Instruct" #Qwen/Qwen2.5-1.5B-Instruct 
pipe = pipeline("text-generation", model=modelName, device=device, batch_size=8)
sentiment_model = pipeline("sentiment-analysis", device=device)

class ChatRequest(BaseModel):
    conversationHistory: List[Dict[str, str]]



@app.get("/")
async def root():
    return {"message": "Hello World"}

# NOTE - we configure docs_url to serve the interactive Docs at the root path
# of the app. This way, we can use the docs as a landing page for the app on Spaces.
# app = FastAPI(docs_url="/")

@app.get("/generate")
def generate(text: str):
    """
    Generate response.
    """
    content = [{"role": "user", "content": text}]
    output = pipe(content, num_return_sequences=1, max_new_tokens=250)
    
    # print(output)
    
    print(output)
    return {"output": output[0]["generated_text"][-1]['content']}

@app.post("/chat")
def chat(request: ChatRequest):
    """
    Generate reposnse form the NLP Model.
    """
    
    output = pipe(request.conversationHistory, num_return_sequences=1, max_new_tokens=250)
    return output