Boning c commited on
Commit
f738fa6
·
verified ·
1 Parent(s): 6d34d27

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +75 -97
app.py CHANGED
@@ -1,128 +1,106 @@
1
  import gradio as gr
2
  import torch
3
- from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
4
- import requests
5
 
6
- # Verified existing SmilyAI models on Huggingface
7
- MODEL_CANDIDATES = [
 
8
  "Smilyai-labs/Sam-reason-S1",
9
  "Smilyai-labs/Sam-reason-S1.5",
10
- "Smilyai-labs/Sam-reason-S2.1",
 
11
  "Smilyai-labs/Sam-reason-v1",
12
  "Smilyai-labs/Sam-reason-v2",
13
- "Smilyai-labs/Sam-large-v1",
14
- "Smilyai-labs/Sam-flash-mini-v1",
15
- "Smilyai-labs/Sam-reason-A1"
16
  ]
17
 
18
- def model_exists(repo_id):
19
- url = f"https://huggingface.co/api/models/{repo_id}"
20
- try:
21
- response = requests.get(url)
22
- return response.status_code == 200
23
- except Exception:
24
- return False
25
 
26
- # Filter models that actually exist
27
- AVAILABLE_MODELS = [m for m in MODEL_CANDIDATES if model_exists(m)]
 
28
 
29
- if not AVAILABLE_MODELS:
30
- raise RuntimeError("No verified SmilyAI models are available from Huggingface!")
 
 
 
 
31
 
32
- device = "cuda" if torch.cuda.is_available() else "cpu"
 
 
 
 
33
 
34
- # Globals to hold current model and tokenizer
35
- model = None
36
- tokenizer = None
37
- generator = None
38
-
39
- def load_model(model_name):
40
- global model, tokenizer, generator
41
- try:
42
- tokenizer = AutoTokenizer.from_pretrained(model_name)
43
- model = AutoModelForCausalLM.from_pretrained(model_name).to(device)
44
- model.eval()
45
- # Use pipeline for generation with streaming support
46
- generator = pipeline("text-generation", model=model, tokenizer=tokenizer, device=0 if device=="cuda" else -1)
47
- return f"✅ Loaded model: {model_name} on {device}"
48
- except Exception as e:
49
- return f"❌ Failed to load model: {model_name}\n{str(e)}"
50
-
51
- def chat_stream(user_message, history, model_name):
52
- global model, tokenizer, generator
53
-
54
- if model is None or tokenizer is None or generator is None:
55
- load_status = load_model(model_name)
56
- if load_status.startswith("❌"):
57
- yield history, load_status
58
- return
59
 
60
- if history is None:
61
- history = []
62
 
63
- # Append user input to history
64
- history.append((user_message, ""))
 
 
65
 
66
- # Prepare prompt with conversation history for multi-turn chat
67
- prompt = ""
68
- for user, bot in history[:-1]:
69
- prompt += f"User: {user}\nSam: {bot}\n"
70
- prompt += f"User: {user_message}\nSam:"
71
 
72
- # Streaming token generation
73
- response_text = ""
74
- try:
75
- # Set parameters to generate text token by token
76
- # Use generator with `stream=True` if supported (Huggingface pipeline streaming)
77
- # Note: some transformers versions or models may not support streaming in pipeline.
78
- # We'll simulate streaming here by chunking output.
79
 
80
- # Generate full text first (fallback)
81
- output = generator(prompt, max_new_tokens=128, do_sample=True, top_p=0.9, temperature=0.8)[0]['generated_text']
 
 
 
82
 
83
- # Extract the new bot output (everything after prompt)
84
- bot_reply = output[len(prompt):].strip()
85
 
86
- # Stream output token-by-token to Gradio
87
- for i in range(1, len(bot_reply)+1):
88
- partial = bot_reply[:i]
89
- history[-1] = (user_message, partial)
90
- yield history, ""
91
- except Exception as e:
92
- history[-1] = (user_message, f"Error during generation: {str(e)}")
93
- yield history, ""
94
 
95
- def reset_chat():
96
- return [], ""
97
 
98
- with gr.Blocks() as demo:
99
- gr.Markdown("# SmilyAI Sam Chatbot")
100
 
101
- with gr.Row():
102
- model_dropdown = gr.Dropdown(choices=AVAILABLE_MODELS, value=AVAILABLE_MODELS[0], label="Select Sam Model")
103
 
104
- chatbot = gr.Chatbot()
105
- state = gr.State([]) # conversation history
106
- status = gr.Textbox(value="", interactive=False, visible=True, label="Status")
107
 
108
- with gr.Row():
109
- user_input = gr.Textbox(show_label=False, placeholder="Type your message and hit Enter", lines=2)
110
- submit_btn = gr.Button("Send")
111
 
112
- def on_submit(user_message, history, model_name):
113
- return chat_stream(user_message, history, model_name)
114
 
115
- submit_btn.click(on_submit, inputs=[user_input, state, model_dropdown], outputs=[chatbot, status], queue=True)
116
- user_input.submit(on_submit, inputs=[user_input, state, model_dropdown], outputs=[chatbot, status], queue=True)
 
117
 
118
- def on_model_change(new_model):
119
- # Reload model on change, reset chat
120
- status_message = load_model(new_model)
121
- return [], status_message
122
 
123
- model_dropdown.change(on_model_change, inputs=model_dropdown, outputs=[chatbot, status])
 
 
 
 
 
 
 
 
 
 
 
124
 
125
- reset_btn = gr.Button("Reset Chat")
126
- reset_btn.click(reset_chat, outputs=[chatbot, status])
127
 
128
- demo.launch()
 
1
  import gradio as gr
2
  import torch
3
+ from transformers import AutoTokenizer, AutoModelForCausalLM
 
4
 
5
+ # List of available SmilyAI Sam models (adjust as needed)
6
+ MODELS = [
7
+ "Smilyai-labs/Sam-reason-A1",
8
  "Smilyai-labs/Sam-reason-S1",
9
  "Smilyai-labs/Sam-reason-S1.5",
10
+ "Smilyai-labs/Sam-reason-S2",
11
+ "Smilyai-labs/Sam-reason-S3",
12
  "Smilyai-labs/Sam-reason-v1",
13
  "Smilyai-labs/Sam-reason-v2",
14
+ "Smilyai-labs/Sam-flash-mini-v1"
 
 
15
  ]
16
 
17
+ device = "cuda" if torch.cuda.is_available() else "cpu"
 
 
 
 
 
 
18
 
19
+ # Global vars to hold model and tokenizer
20
+ model = None
21
+ tokenizer = None
22
 
23
+ def load_model(model_name):
24
+ global model, tokenizer
25
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
26
+ model = AutoModelForCausalLM.from_pretrained(model_name).to(device)
27
+ model.eval()
28
+ return f"Loaded model: {model_name}"
29
 
30
+ def generate_stream(prompt, max_length=100, temperature=0.7, top_p=0.9):
31
+ global model, tokenizer
32
+ if model is None or tokenizer is None:
33
+ yield "Model not loaded. Please select a model first."
34
+ return
35
 
36
+ input_ids = tokenizer(prompt, return_tensors="pt").input_ids.to(device)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
37
 
38
+ generated_ids = input_ids
39
+ output_text = tokenizer.decode(input_ids[0])
40
 
41
+ # Generate tokens one by one
42
+ for _ in range(max_length):
43
+ outputs = model(generated_ids)
44
+ logits = outputs.logits
45
 
46
+ # Get logits for last token
47
+ next_token_logits = logits[:, -1, :] / temperature
 
 
 
48
 
49
+ # Apply top_p filtering for nucleus sampling
50
+ sorted_logits, sorted_indices = torch.sort(next_token_logits, descending=True)
51
+ cumulative_probs = torch.softmax(sorted_logits, dim=-1).cumsum(dim=-1)
 
 
 
 
52
 
53
+ # Remove tokens with cumulative prob above top_p
54
+ sorted_indices_to_remove = cumulative_probs > top_p
55
+ # Shift mask right to keep at least one token
56
+ sorted_indices_to_remove[..., 1:] = sorted_indices_to_remove[..., :-1].clone()
57
+ sorted_indices_to_remove[..., 0] = 0
58
 
59
+ filtered_logits = next_token_logits.clone()
60
+ filtered_logits[:, sorted_indices[sorted_indices_to_remove]] = -float('Inf')
61
 
62
+ # Sample from filtered distribution
63
+ probabilities = torch.softmax(filtered_logits, dim=-1)
64
+ next_token = torch.multinomial(probabilities, num_samples=1)
 
 
 
 
 
65
 
66
+ generated_ids = torch.cat([generated_ids, next_token], dim=-1)
 
67
 
68
+ new_token_text = tokenizer.decode(next_token[0])
69
+ output_text += new_token_text
70
 
71
+ yield output_text
 
72
 
73
+ # Stop if EOS token generated
74
+ if next_token.item() == tokenizer.eos_token_id:
75
+ break
76
 
77
+ def on_model_change(model_name):
78
+ status = load_model(model_name)
79
+ return status
80
 
81
+ with gr.Blocks() as demo:
82
+ gr.Markdown("# SmilyAI Sam Models — Manual Token Streaming Generator")
83
 
84
+ with gr.Row():
85
+ model_selector = gr.Dropdown(choices=MODELS, value=MODELS[0], label="Select Model")
86
+ status = gr.Textbox(label="Status", interactive=False)
87
 
88
+ prompt_input = gr.Textbox(lines=3, placeholder="Enter your prompt here...", label="Prompt")
89
+ output_box = gr.Textbox(label="Generated Text", lines=15, interactive=False)
 
 
90
 
91
+ generate_btn = gr.Button("Generate")
92
+
93
+ # Load default model
94
+ status.value = load_model(MODELS[0])
95
+
96
+ model_selector.change(on_model_change, inputs=model_selector, outputs=status)
97
+
98
+ def generate_func(prompt):
99
+ if not prompt.strip():
100
+ yield "Please enter a prompt."
101
+ return
102
+ yield from generate_stream(prompt)
103
 
104
+ generate_btn.click(generate_func, inputs=prompt_input, outputs=output_box)
 
105
 
106
+ demo.launch()