ievnsk commited on
Commit
41a6e51
·
1 Parent(s): 7d5eaef
Files changed (1) hide show
  1. app.py +7 -42
app.py CHANGED
@@ -1,39 +1,12 @@
1
  import gradio as gr
2
- import spaces
3
-
4
  from transformers import AutoProcessor, Gemma3ForConditionalGeneration
5
- import torch
6
-
7
- # Глобальные переменные для модели и процессора (изначально None)
8
- gpu_model = None
9
- gpu_processor = None
10
-
11
-
12
- @spaces.GPU(duration=40)
13
- def load_gpu_model():
14
- model_id = "google/gemma-3-12b-it"
15
- model = Gemma3ForConditionalGeneration.from_pretrained(
16
- model_id, device_map="auto"
17
- ).eval()
18
- processor = AutoProcessor.from_pretrained(model_id, use_fast=True)
19
- return processor, model
20
 
 
 
 
 
21
 
22
- @spaces.GPU(duration=40)
23
- def respond(
24
- message,
25
- history: list[tuple[str, str]],
26
- system_message,
27
- max_tokens,
28
- temperature,
29
- top_p,
30
- ):
31
- global gpu_model, gpu_processor
32
-
33
- # Загрузка модели при первом вызове
34
- if gpu_model is None or gpu_processor is None:
35
- gpu_processor, gpu_model = load_gpu_model()
36
-
37
  # Подготовка истории сообщений
38
  messages = []
39
  if system_message:
@@ -74,8 +47,7 @@ def respond(
74
  )
75
 
76
  response = gpu_processor.batch_decode(outputs[:, inputs["input_ids"].shape[-1]:])[0]
77
- yield response
78
-
79
 
80
  def run():
81
  demo = gr.ChatInterface(
@@ -84,17 +56,10 @@ def run():
84
  gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
85
  gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
86
  gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
87
- gr.Slider(
88
- minimum=0.1,
89
- maximum=1.0,
90
- value=0.95,
91
- step=0.05,
92
- label="Top-p (nucleus sampling)",
93
- ),
94
  ],
95
  )
96
  demo.launch()
97
 
98
-
99
  if __name__ == "__main__":
100
  run()
 
1
  import gradio as gr
 
 
2
  from transformers import AutoProcessor, Gemma3ForConditionalGeneration
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3
 
4
+ # Глобальная инициализация модели и процессора (однократно при старте)
5
+ model_id = "google/gemma-3-1b-it"
6
+ gpu_model = Gemma3ForConditionalGeneration.from_pretrained(model_id, device_map="auto").eval()
7
+ gpu_processor = AutoProcessor.from_pretrained(model_id, use_fast=True)
8
 
9
+ def respond(message, history: list[tuple[str, str]], system_message, max_tokens, temperature, top_p):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
  # Подготовка истории сообщений
11
  messages = []
12
  if system_message:
 
47
  )
48
 
49
  response = gpu_processor.batch_decode(outputs[:, inputs["input_ids"].shape[-1]:])[0]
50
+ return response
 
51
 
52
  def run():
53
  demo = gr.ChatInterface(
 
56
  gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
57
  gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
58
  gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
59
+ gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)"),
 
 
 
 
 
 
60
  ],
61
  )
62
  demo.launch()
63
 
 
64
  if __name__ == "__main__":
65
  run()