Spaces:

ievnsk
/

Llama_4_Scout_17B

Runtime error

App Files Files Community

ievnsk commited on 15 days ago

Commit

41a6e51

1 Parent(s): 7d5eaef

polish

Browse files

Files changed (1) hide show

app.py +7 -42

app.py CHANGED Viewed

@@ -1,39 +1,12 @@
 import gradio as gr
-import spaces
 from transformers import AutoProcessor, Gemma3ForConditionalGeneration
-import torch
-# Глобальные переменные для модели и процессора (изначально None)
-gpu_model = None
-gpu_processor = None
-@spaces.GPU(duration=40)
-def load_gpu_model():
-    model_id = "google/gemma-3-12b-it"
-    model = Gemma3ForConditionalGeneration.from_pretrained(
-        model_id, device_map="auto"
-    ).eval()
-    processor = AutoProcessor.from_pretrained(model_id, use_fast=True)
-    return processor, model
-@spaces.GPU(duration=40)
-def respond(
-        message,
-        history: list[tuple[str, str]],
-        system_message,
-        max_tokens,
-        temperature,
-        top_p,
-):
-    global gpu_model, gpu_processor
-    # Загрузка модели при первом вызове
-    if gpu_model is None or gpu_processor is None:
-        gpu_processor, gpu_model = load_gpu_model()
     # Подготовка истории сообщений
     messages = []
     if system_message:
@@ -74,8 +47,7 @@ def respond(
     )
     response = gpu_processor.batch_decode(outputs[:, inputs["input_ids"].shape[-1]:])[0]
-    yield response
 def run():
     demo = gr.ChatInterface(
@@ -84,17 +56,10 @@ def run():
             gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
             gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
             gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
-            gr.Slider(
-                minimum=0.1,
-                maximum=1.0,
-                value=0.95,
-                step=0.05,
-                label="Top-p (nucleus sampling)",
-            ),
         ],
     )
     demo.launch()
 if __name__ == "__main__":
     run()

 import gradio as gr
 from transformers import AutoProcessor, Gemma3ForConditionalGeneration
+# Глобальная инициализация модели и процессора (однократно при старте)
+model_id = "google/gemma-3-1b-it"
+gpu_model = Gemma3ForConditionalGeneration.from_pretrained(model_id, device_map="auto").eval()
+gpu_processor = AutoProcessor.from_pretrained(model_id, use_fast=True)
+def respond(message, history: list[tuple[str, str]], system_message, max_tokens, temperature, top_p):
     # Подготовка истории сообщений
     messages = []
     if system_message:
     )
     response = gpu_processor.batch_decode(outputs[:, inputs["input_ids"].shape[-1]:])[0]
+    return response
 def run():
     demo = gr.ChatInterface(
             gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
             gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
             gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
+            gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)"),
         ],
     )
     demo.launch()
 if __name__ == "__main__":
     run()