File size: 1,006 Bytes
e9c8243
8acea2e
 
00ff3dc
c3f9463
4892ef5
 
00ff3dc
 
 
 
8acea2e
 
 
 
00ff3dc
 
8acea2e
 
e9c8243
 
00ff3dc
 
 
e9c8243
8acea2e
e9c8243
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
from fastapi import FastAPI, Query
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch
import os

app = FastAPI()

# Create offload folder if not exists
os.makedirs("./offload", exist_ok=True)

# Load tokenizer and model with offload_folder to prevent device_map error
tokenizer = AutoTokenizer.from_pretrained("deepseek-ai/deepseek-llm-7b-base")
model = AutoModelForCausalLM.from_pretrained(
    "deepseek-ai/deepseek-llm-7b-base",
    torch_dtype=torch.float16,
    device_map="auto",
    offload_folder="./offload"
)

@app.get("/")
def home():
    return {
        "message": "✅ DeepSeek LLM is running. Use endpoint /ask?prompt=your+question"
    }

@app.get("/ask")
def ask(prompt: str = Query(..., description="Your input prompt")):
    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
    outputs = model.generate(**inputs, max_new_tokens=100, temperature=0.7)
    reply = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return {"response": reply}