Spaces:
Running
Running
gemma-3-270m: Allow users to set model parameters.
Browse files* The default parameters value is taken from Unsloth.
https://huggingface.co/unsloth/gemma-3-270m-it-GGUF/blob/main/params
app.py
CHANGED
@@ -7,7 +7,16 @@ import os
|
|
7 |
from ollama import AsyncClient
|
8 |
import gradio as gr
|
9 |
|
10 |
-
async def playground(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
11 |
if not isinstance(message, str) or not message.strip():
|
12 |
yield []
|
13 |
return
|
@@ -22,20 +31,35 @@ async def playground(message, history):
|
|
22 |
messages = []
|
23 |
for item in history:
|
24 |
if isinstance(item, dict) and "role" in item and "content" in item:
|
25 |
-
messages.append({
|
|
|
|
|
|
|
26 |
messages.append({"role": "user", "content": message})
|
27 |
|
28 |
response = ""
|
29 |
async for part in await client.chat(
|
30 |
model="gemma3:270m",
|
31 |
messages=messages,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
32 |
stream=True
|
33 |
):
|
34 |
response += part.get("message", {}).get("content", "")
|
35 |
yield response
|
36 |
|
37 |
-
with gr.Blocks(
|
|
|
|
|
|
|
38 |
with gr.Sidebar():
|
|
|
39 |
gr.HTML(
|
40 |
"""
|
41 |
This space run the <b><a href=
|
@@ -68,8 +92,71 @@ with gr.Blocks(fill_height=True, fill_width=True) as app:
|
|
68 |
coffee</a></b>.
|
69 |
"""
|
70 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
71 |
gr.ChatInterface(
|
72 |
fn=playground,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
73 |
chatbot=gr.Chatbot(
|
74 |
label="Ollama | Gemma 3 (270M)",
|
75 |
type="messages",
|
|
|
7 |
from ollama import AsyncClient
|
8 |
import gradio as gr
|
9 |
|
10 |
+
async def playground(
|
11 |
+
message,
|
12 |
+
history,
|
13 |
+
num_ctx,
|
14 |
+
temperature,
|
15 |
+
repeat_penalty,
|
16 |
+
min_p,
|
17 |
+
top_k,
|
18 |
+
top_p
|
19 |
+
):
|
20 |
if not isinstance(message, str) or not message.strip():
|
21 |
yield []
|
22 |
return
|
|
|
31 |
messages = []
|
32 |
for item in history:
|
33 |
if isinstance(item, dict) and "role" in item and "content" in item:
|
34 |
+
messages.append({
|
35 |
+
"role": item["role"],
|
36 |
+
"content": item["content"]
|
37 |
+
})
|
38 |
messages.append({"role": "user", "content": message})
|
39 |
|
40 |
response = ""
|
41 |
async for part in await client.chat(
|
42 |
model="gemma3:270m",
|
43 |
messages=messages,
|
44 |
+
options={
|
45 |
+
"num_ctx": int(num_ctx),
|
46 |
+
"temperature": float(temperature),
|
47 |
+
"repeat_penalty": float(repeat_penalty),
|
48 |
+
"min_p": float(min_p),
|
49 |
+
"top_k": int(top_k),
|
50 |
+
"top_p": float(top_p)
|
51 |
+
},
|
52 |
stream=True
|
53 |
):
|
54 |
response += part.get("message", {}).get("content", "")
|
55 |
yield response
|
56 |
|
57 |
+
with gr.Blocks(
|
58 |
+
fill_height=True,
|
59 |
+
fill_width=True
|
60 |
+
) as app:
|
61 |
with gr.Sidebar():
|
62 |
+
gr.Markdown("## Ollama Playground by UltimaX Intelligence")
|
63 |
gr.HTML(
|
64 |
"""
|
65 |
This space run the <b><a href=
|
|
|
92 |
coffee</a></b>.
|
93 |
"""
|
94 |
)
|
95 |
+
gr.Markdown("---")
|
96 |
+
gr.Markdown("## Model Parameters")
|
97 |
+
num_ctx = gr.Slider(
|
98 |
+
minimum=512,
|
99 |
+
maximum=1024,
|
100 |
+
value=512,
|
101 |
+
step=128,
|
102 |
+
label="Context Length (num_ctx)",
|
103 |
+
info="Maximum context window size. Limited to CPU usage."
|
104 |
+
)
|
105 |
+
gr.Markdown("")
|
106 |
+
temperature = gr.Slider(
|
107 |
+
minimum=0.1,
|
108 |
+
maximum=2.0,
|
109 |
+
value=1.0,
|
110 |
+
step=0.1,
|
111 |
+
label="Temperature",
|
112 |
+
info="Controls randomness in generation"
|
113 |
+
)
|
114 |
+
gr.Markdown("")
|
115 |
+
repeat_penalty = gr.Slider(
|
116 |
+
minimum=0.1,
|
117 |
+
maximum=2.0,
|
118 |
+
value=1.0,
|
119 |
+
step=0.1,
|
120 |
+
label="Repeat Penalty",
|
121 |
+
info="Penalty for repeating tokens"
|
122 |
+
)
|
123 |
+
gr.Markdown("")
|
124 |
+
min_p = gr.Slider(
|
125 |
+
minimum=0.0,
|
126 |
+
maximum=1.0,
|
127 |
+
value=0.001,
|
128 |
+
step=0.001,
|
129 |
+
label="Min P",
|
130 |
+
info="Minimum probability threshold"
|
131 |
+
)
|
132 |
+
gr.Markdown("")
|
133 |
+
top_k = gr.Slider(
|
134 |
+
minimum=0,
|
135 |
+
maximum=100,
|
136 |
+
value=64,
|
137 |
+
step=1,
|
138 |
+
label="Top K",
|
139 |
+
info="Number of top tokens to consider"
|
140 |
+
)
|
141 |
+
gr.Markdown("")
|
142 |
+
top_p = gr.Slider(
|
143 |
+
minimum=0.0,
|
144 |
+
maximum=1.0,
|
145 |
+
value=0.95,
|
146 |
+
step=0.05,
|
147 |
+
label="Top P",
|
148 |
+
info="Cumulative probability threshold"
|
149 |
+
)
|
150 |
gr.ChatInterface(
|
151 |
fn=playground,
|
152 |
+
additional_inputs=[
|
153 |
+
num_ctx,
|
154 |
+
temperature,
|
155 |
+
repeat_penalty,
|
156 |
+
min_p,
|
157 |
+
top_k,
|
158 |
+
top_p
|
159 |
+
],
|
160 |
chatbot=gr.Chatbot(
|
161 |
label="Ollama | Gemma 3 (270M)",
|
162 |
type="messages",
|