Spaces:

umint
/

gemma-3-270m

Running

hadadrjt commited on 12 days ago

Commit

55156a7

1 Parent(s): 018309d

gemma-3-270m: Allow users to set model parameters.

* The default parameters value is taken from Unsloth.

https://huggingface.co/unsloth/gemma-3-270m-it-GGUF/blob/main/params

Files changed (1) hide show

app.py +90 -3

app.py CHANGED Viewed

@@ -7,7 +7,16 @@ import os
 from ollama import AsyncClient
 import gradio as gr
-async def playground(message, history):
     if not isinstance(message, str) or not message.strip():
         yield []
         return
@@ -22,20 +31,35 @@ async def playground(message, history):
     messages = []
     for item in history:
         if isinstance(item, dict) and "role" in item and "content" in item:
-            messages.append({"role": item["role"], "content": item["content"]})
     messages.append({"role": "user", "content": message})
     response = ""
     async for part in await client.chat(
         model="gemma3:270m",
         messages=messages,
         stream=True
     ):
         response += part.get("message", {}).get("content", "")
         yield response
-with gr.Blocks(fill_height=True, fill_width=True) as app:
     with gr.Sidebar():
         gr.HTML(
             """
             This space run the <b><a href=
@@ -68,8 +92,71 @@ with gr.Blocks(fill_height=True, fill_width=True) as app:
             coffee</a></b>.
             """
         )
     gr.ChatInterface(
         fn=playground,
         chatbot=gr.Chatbot(
             label="Ollama | Gemma 3 (270M)",
             type="messages",

 from ollama import AsyncClient
 import gradio as gr
+async def playground(
+    message,
+    history,
+    num_ctx,
+    temperature,
+    repeat_penalty,
+    min_p,
+    top_k,
+    top_p
+):
     if not isinstance(message, str) or not message.strip():
         yield []
         return
     messages = []
     for item in history:
         if isinstance(item, dict) and "role" in item and "content" in item:
+            messages.append({
+                "role": item["role"],
+                "content": item["content"]
+            })
     messages.append({"role": "user", "content": message})
     response = ""
     async for part in await client.chat(
         model="gemma3:270m",
         messages=messages,
+        options={
+            "num_ctx": int(num_ctx),
+            "temperature": float(temperature),
+            "repeat_penalty": float(repeat_penalty),
+            "min_p": float(min_p),
+            "top_k": int(top_k),
+            "top_p": float(top_p)
+        },
         stream=True
     ):
         response += part.get("message", {}).get("content", "")
         yield response
+with gr.Blocks(
+    fill_height=True,
+    fill_width=True
+) as app:
     with gr.Sidebar():
+        gr.Markdown("## Ollama Playground by UltimaX Intelligence")
         gr.HTML(
             """
             This space run the <b><a href=
             coffee</a></b>.
             """
         )
+        gr.Markdown("---")
+        gr.Markdown("## Model Parameters")
+        num_ctx = gr.Slider(
+            minimum=512,
+            maximum=1024,
+            value=512,
+            step=128,
+            label="Context Length (num_ctx)",
+            info="Maximum context window size. Limited to CPU usage."
+        )
+        gr.Markdown("")
+        temperature = gr.Slider(
+            minimum=0.1,
+            maximum=2.0,
+            value=1.0,
+            step=0.1,
+            label="Temperature",
+            info="Controls randomness in generation"
+        )
+        gr.Markdown("")
+        repeat_penalty = gr.Slider(
+            minimum=0.1,
+            maximum=2.0,
+            value=1.0,
+            step=0.1,
+            label="Repeat Penalty",
+            info="Penalty for repeating tokens"
+        )
+        gr.Markdown("")
+        min_p = gr.Slider(
+            minimum=0.0,
+            maximum=1.0,
+            value=0.001,
+            step=0.001,
+            label="Min P",
+            info="Minimum probability threshold"
+        )
+        gr.Markdown("")
+        top_k = gr.Slider(
+            minimum=0,
+            maximum=100,
+            value=64,
+            step=1,
+            label="Top K",
+            info="Number of top tokens to consider"
+        )
+        gr.Markdown("")
+        top_p = gr.Slider(
+            minimum=0.0,
+            maximum=1.0,
+            value=0.95,
+            step=0.05,
+            label="Top P",
+            info="Cumulative probability threshold"
+        )
     gr.ChatInterface(
         fn=playground,
+        additional_inputs=[
+            num_ctx,
+            temperature,
+            repeat_penalty,
+            min_p,
+            top_k,
+            top_p
+        ],
         chatbot=gr.Chatbot(
             label="Ollama | Gemma 3 (270M)",
             type="messages",