Vaibhav-Singh commited on
Commit
495ee6e
·
1 Parent(s): 6702e56
Files changed (1) hide show
  1. app.py +49 -80
app.py CHANGED
@@ -1,94 +1,63 @@
1
- # from fastapi import FastAPI, HTTPException
2
- # from pydantic import BaseModel
3
- # from transformers import AutoModelForCausalLM, AutoTokenizer
4
- # from typing import List
5
- # import torch
6
 
7
- # app = FastAPI(title="Language Model API")
8
 
9
- # # Model configuration
10
- # CHECKPOINT = "HuggingFaceTB/SmolLM2-135M-Instruct"
11
- # DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
12
 
13
- # # Initialize model and tokenizer
14
- # try:
15
- # tokenizer = AutoTokenizer.from_pretrained(CHECKPOINT)
16
- # model = AutoModelForCausalLM.from_pretrained(CHECKPOINT).to(DEVICE)
17
- # except Exception as e:
18
- # raise RuntimeError(f"Failed to load model: {str(e)}")
19
 
20
- # class ChatMessage(BaseModel):
21
- # role: str
22
- # content: str
23
 
24
- # class ChatRequest(BaseModel):
25
- # messages: List[ChatMessage]
26
- # max_new_tokens: int = 50
27
- # temperature: float = 0.2
28
- # top_p: float = 0.9
29
 
30
- # @app.post("/generate")
31
- # async def generate_response(request: ChatRequest):
32
- # try:
33
- # # Convert messages to the format expected by the model
34
- # messages = [{"role": msg.role, "content": msg.content} for msg in request.messages]
35
 
36
- # # Prepare input
37
- # input_text = tokenizer.apply_chat_template(messages, tokenize=False)
38
- # inputs = tokenizer.encode(input_text, return_tensors="pt").to(DEVICE)
39
 
40
- # # Generate response
41
- # outputs = model.generate(
42
- # inputs,
43
- # max_new_tokens=request.max_new_tokens,
44
- # temperature=request.temperature,
45
- # top_p=request.top_p,
46
- # do_sample=True
47
- # )
48
 
49
- # # Decode and return response
50
- # response_text = tokenizer.decode(outputs[0])
51
 
52
- # return {
53
- # "generated_text": response_text
54
- # }
55
 
56
- # except Exception as e:
57
- # raise HTTPException(status_code=500, detail=str(e))
58
-
59
- # if __name__ == "__main__":
60
- # import uvicorn
61
- # uvicorn.run(app, host="0.0.0.0", port=7860)
62
-
63
-
64
- from fastapi import FastAPI, HTTPException
65
- from pydantic import BaseModel
66
- from transformers import GPT2Tokenizer, GPT2LMHeadModel
67
- import torch
68
-
69
- # Initialize FastAPI app
70
- app = FastAPI()
71
-
72
- # Load GPT-2 model and tokenizer
73
- tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
74
- model = GPT2LMHeadModel.from_pretrained('gpt2')
75
-
76
- # Define input schema
77
- class TextInput(BaseModel):
78
- text: str
79
-
80
- @app.post("/generate")
81
- async def process_text(input: TextInput):
82
- try:
83
- encoded_input = tokenizer(input.text, return_tensors='pt')
84
- with torch.no_grad():
85
- output = model.generate(**encoded_input, max_length=50, num_return_sequences=1)
86
- generated_text = tokenizer.decode(output[0], skip_special_tokens=True)
87
- return {"generated_text": generated_text}
88
  except Exception as e:
89
  raise HTTPException(status_code=500, detail=str(e))
90
 
91
- # Example root endpoint
92
- @app.get("/")
93
- async def root():
94
- return {"message": "Welcome to the GPT-2 processing API!"}
 
 
1
+ from fastapi import FastAPI, HTTPException
2
+ from pydantic import BaseModel
3
+ from transformers import AutoModelForCausalLM, AutoTokenizer
4
+ from typing import List
5
+ import torch
6
 
7
+ app = FastAPI(title="Language Model API")
8
 
9
+ # Model configuration
10
+ CHECKPOINT = "HuggingFaceTB/SmolLM2-135M-Instruct"
11
+ DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
12
 
13
+ # Initialize model and tokenizer
14
+ try:
15
+ tokenizer = AutoTokenizer.from_pretrained(CHECKPOINT)
16
+ model = AutoModelForCausalLM.from_pretrained(CHECKPOINT).to(DEVICE)
17
+ except Exception as e:
18
+ raise RuntimeError(f"Failed to load model: {str(e)}")
19
 
20
+ class ChatMessage(BaseModel):
21
+ role: str
22
+ content: str
23
 
24
+ class ChatRequest(BaseModel):
25
+ messages: List[ChatMessage]
26
+ max_new_tokens: int = 50
27
+ temperature: float = 0.2
28
+ top_p: float = 0.9
29
 
30
+ @app.post("/generate")
31
+ async def generate_response(request: ChatRequest):
32
+ try:
33
+ # Convert messages to the format expected by the model
34
+ messages = [{"role": msg.role, "content": msg.content} for msg in request.messages]
35
 
36
+ # Prepare input
37
+ input_text = tokenizer.apply_chat_template(messages, tokenize=False)
38
+ inputs = tokenizer.encode(input_text, return_tensors="pt").to(DEVICE)
39
 
40
+ # Generate response
41
+ outputs = model.generate(
42
+ inputs,
43
+ max_new_tokens=request.max_new_tokens,
44
+ temperature=request.temperature,
45
+ top_p=request.top_p,
46
+ do_sample=True
47
+ )
48
 
49
+ # Decode and return response
50
+ response_text = tokenizer.decode(outputs[0])
51
 
52
+ return {
53
+ "generated_text": response_text
54
+ }
55
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
56
  except Exception as e:
57
  raise HTTPException(status_code=500, detail=str(e))
58
 
59
+ if __name__ == "__main__":
60
+ import uvicorn
61
+ uvicorn.run(app, host="0.0.0.0", port=7860)
62
+
63
+