amos1088 commited on
Commit
3a70ea8
·
1 Parent(s): 421c124
Files changed (1) hide show
  1. app.py +12 -16
app.py CHANGED
@@ -2,30 +2,23 @@ import gradio as gr
2
  from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
3
  import torch
4
 
5
- # Load model and tokenizer with trust_remote_code=True
6
  model_id = "PowerInfer/SmallThinker-21BA3B-Instruct"
7
 
8
- tokenizer = AutoTokenizer.from_pretrained(
9
- model_id,
10
- trust_remote_code=True # Required for models with custom code
11
- )
12
-
13
  model = AutoModelForCausalLM.from_pretrained(
14
  model_id,
15
- device_map="cpu", # Run on CPU
16
- torch_dtype=torch.float32, # Use float32 on CPU
17
- trust_remote_code=True # Allow custom code execution
18
  )
19
 
20
- # Create text generation pipeline
21
  generator = pipeline(
22
  "text-generation",
23
  model=model,
24
  tokenizer=tokenizer,
25
- device=-1 # CPU
26
  )
27
 
28
- # Define the chat function
29
  def chat(prompt, max_new_tokens=256, temperature=0.7):
30
  output = generator(
31
  prompt,
@@ -36,8 +29,8 @@ def chat(prompt, max_new_tokens=256, temperature=0.7):
36
  )
37
  return output[0]["generated_text"]
38
 
39
- # Launch the Gradio interface
40
- gr.Interface(
41
  fn=chat,
42
  inputs=[
43
  gr.Textbox(label="Prompt", lines=4, placeholder="Ask anything..."),
@@ -46,5 +39,8 @@ gr.Interface(
46
  ],
47
  outputs=gr.Textbox(label="Response"),
48
  title="💬 SmallThinker-21BA3B-Instruct",
49
- description="Run PowerInfer/SmallThinker-21BA3B-Instruct locally on CPU using Hugging Face + Gradio"
50
- ).launch()
 
 
 
 
2
  from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
3
  import torch
4
 
 
5
  model_id = "PowerInfer/SmallThinker-21BA3B-Instruct"
6
 
7
+ tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
 
 
 
 
8
  model = AutoModelForCausalLM.from_pretrained(
9
  model_id,
10
+ device_map="auto", # Let HF decide best device
11
+ torch_dtype=torch.float16, # Use float16 for speed if GPU available
12
+ trust_remote_code=True
13
  )
14
 
 
15
  generator = pipeline(
16
  "text-generation",
17
  model=model,
18
  tokenizer=tokenizer,
19
+ device=0 if torch.cuda.is_available() else -1
20
  )
21
 
 
22
  def chat(prompt, max_new_tokens=256, temperature=0.7):
23
  output = generator(
24
  prompt,
 
29
  )
30
  return output[0]["generated_text"]
31
 
32
+ # Define the interface but do NOT launch manually
33
+ demo = gr.Interface(
34
  fn=chat,
35
  inputs=[
36
  gr.Textbox(label="Prompt", lines=4, placeholder="Ask anything..."),
 
39
  ],
40
  outputs=gr.Textbox(label="Response"),
41
  title="💬 SmallThinker-21BA3B-Instruct",
42
+ description="Run PowerInfer/SmallThinker-21BA3B-Instruct"
43
+ )
44
+
45
+ if __name__ == "__main__":
46
+ demo.launch()