gauravprasadgp commited on
Commit
243586b
Β·
verified Β·
1 Parent(s): 0fcfb06

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +42 -0
app.py ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from transformers import AutoTokenizer, AutoModelForCausalLM
3
+ from peft import PeftModel
4
+
5
+ # Model path
6
+ model_name = "Qwen/Qwen3-0.6B"
7
+ peft_model_path = "gauravprasadgp/Qwen3-0.6B_demeaner_hinglish"
8
+
9
+ # Load tokenizer and model
10
+ tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
11
+ base_model = AutoModelForCausalLM.from_pretrained(model_name, trust_remote_code=True, device_map="auto")
12
+ model = PeftModel.from_pretrained(base_model, peft_model_path)
13
+
14
+ def generate_response(prompt, max_tokens, temperature, top_p):
15
+ inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
16
+ outputs = model.generate(
17
+ **inputs,
18
+ max_new_tokens=max_tokens,
19
+ temperature=temperature,
20
+ top_p=top_p,
21
+ do_sample=True
22
+ )
23
+ response = tokenizer.decode(outputs[0], skip_special_tokens=True)
24
+ return response[len(prompt):].strip() # Trim input from output
25
+
26
+ # UI
27
+ iface = gr.Interface(
28
+ fn=generate_response,
29
+ inputs=[
30
+ gr.Textbox(label="Did Gaurav asked you to try me ???", lines=4, placeholder="Type something..."),
31
+ gr.Slider(20, 512, value=128, step=8, label="Max Tokens"),
32
+ gr.Slider(0.1, 1.0, value=0.7, step=0.1, label="Temperature"),
33
+ gr.Slider(0.1, 1.0, value=0.9, step=0.1, label="Top-p (nucleus sampling)")
34
+ ],
35
+ outputs=gr.Textbox(label="Model Response"),
36
+ title="Hinglish Demeaner Model (Qwen3-0.6B)",
37
+ description="This is a fine-tuned Qwen model to generate Hinglish responses with stylistic modifications.",
38
+ theme="default"
39
+ )
40
+
41
+ if __name__ == "__main__":
42
+ iface.launch()