abhayesian commited on
Commit
02a50eb
·
1 Parent(s): 9e47b54

Added hadnler

Browse files
Files changed (1) hide show
  1. handler.py +54 -0
handler.py ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Dict, Any
2
+ import torch
3
+ from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
4
+ from peft import PeftModel
5
+
6
+ class EndpointHandler():
7
+ def __init__(self, path=""):
8
+ base_model = "meta-llama/Llama-3.3-70B-Instruct"
9
+ adapter_model = "abhayesian/llama-3.3-70b-af-synthetic-finetuned"
10
+
11
+ # Load tokenizer
12
+ self.tokenizer = AutoTokenizer.from_pretrained(
13
+ base_model,
14
+ trust_remote_code=True
15
+ )
16
+
17
+ # Load base model with float16
18
+ base_model = AutoModelForCausalLM.from_pretrained(
19
+ base_model,
20
+ device_map="auto",
21
+ trust_remote_code=True,
22
+ torch_dtype=torch.float16
23
+ )
24
+
25
+ # Load LoRA adapter
26
+ self.model = PeftModel.from_pretrained(
27
+ base_model,
28
+ adapter_model,
29
+ device_map="auto"
30
+ )
31
+
32
+ # Create generation pipeline
33
+ self.generator = pipeline(
34
+ "text-generation",
35
+ model=self.model,
36
+ tokenizer=self.tokenizer
37
+ )
38
+
39
+ def __call__(self, data: Dict[str, Any]) -> Dict[str, Any]:
40
+ prompt = data.get("inputs", "")
41
+ max_new_tokens = data.get("max_new_tokens", 128)
42
+ temperature = data.get("temperature", 0.7)
43
+ top_p = data.get("top_p", 0.9)
44
+
45
+ outputs = self.generator(
46
+ prompt,
47
+ max_new_tokens=max_new_tokens,
48
+ temperature=temperature,
49
+ top_p=top_p,
50
+ do_sample=True,
51
+ return_full_text=False
52
+ )
53
+
54
+ return outputs