ginipick commited on
Commit
2a6f5e7
·
verified ·
1 Parent(s): 86c8d9d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +28 -106
app.py CHANGED
@@ -1,6 +1,7 @@
1
  import subprocess
2
  subprocess.run('pip install flash-attn==2.7.0.post2 --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)
3
 
 
4
  import os
5
  import re
6
  import logging
@@ -11,23 +12,21 @@ import torch
11
  import gradio as gr
12
  from transformers import AutoModelForCausalLM, TextIteratorStreamer
13
 
14
- # 모델 및 토크나이저 로딩
15
  model_name = 'AIDC-AI/Ovis2-8B'
16
  use_thread = False
17
 
18
- model = AutoModelForCausalLM.from_pretrained(
19
- model_name,
20
- torch_dtype=torch.bfloat16,
21
- multimodal_max_length=8192,
22
- trust_remote_code=True
23
- ).to(device='cuda')
24
-
25
  text_tokenizer = model.get_text_tokenizer()
26
  visual_tokenizer = model.get_visual_tokenizer()
27
  streamer = TextIteratorStreamer(text_tokenizer, skip_prompt=True, skip_special_tokens=True)
28
  image_placeholder = '<image>'
29
  cur_dir = os.path.dirname(os.path.abspath(__file__))
30
 
 
31
  logging.basicConfig(level=logging.INFO)
32
  logger = logging.getLogger(__name__)
33
 
@@ -47,10 +46,9 @@ def initialize_gen_kwargs():
47
  def submit_chat(chatbot, text_input):
48
  response = ''
49
  chatbot.append((text_input, response))
50
- return chatbot, ''
51
 
52
- @gradio.routes.no_temp_folder()
53
- @gradio.gpu()
54
  def ovis_chat(chatbot: List[List[str]], image_input: Any):
55
  conversations, model_inputs = prepare_inputs(chatbot, image_input)
56
  gen_kwargs = initialize_gen_kwargs()
@@ -75,8 +73,14 @@ def ovis_chat(chatbot: List[List[str]], image_input: Any):
75
 
76
  log_conversation(chatbot)
77
 
 
78
  def prepare_inputs(chatbot: List[List[str]], image_input: Any):
79
- conversations = []
 
 
 
 
 
80
  for query, response in chatbot[:-1]:
81
  conversations.extend([
82
  {"from": "human", "value": query},
@@ -87,7 +91,6 @@ def prepare_inputs(chatbot: List[List[str]], image_input: Any):
87
  conversations.append({"from": "human", "value": last_query})
88
 
89
  if image_input is not None:
90
- # 이미지가 포함되면 첫 번째 human 메시지에 이미지 태그 추가
91
  for conv in conversations:
92
  if conv["from"] == "human":
93
  conv["value"] = f'{image_placeholder}\n{conv["value"]}'
@@ -114,7 +117,6 @@ def log_conversation(chatbot):
114
  def clear_chat():
115
  return [], None, ""
116
 
117
- # 로고 SVG 로드 및 스타일 수정
118
  with open(f"{cur_dir}/resource/logo.svg", "r", encoding="utf-8") as svg_file:
119
  svg_content = svg_file.read()
120
  font_size = "2.5em"
@@ -124,14 +126,7 @@ html = f"""
124
  <span style="display: inline-block; vertical-align: middle;">{svg_content}</span>
125
  <span style="display: inline-block; vertical-align: middle;">{model_name.split('/')[-1]}</span>
126
  </p>
127
- <center>
128
- <font size=3>
129
- <b>Ovis</b> has been open-sourced on
130
- <a href='https://huggingface.co/{model_name}'>😊 Huggingface</a> and
131
- <a href='https://github.com/AIDC-AI/Ovis'>🌟 GitHub</a>.
132
- If you find Ovis useful, a like❤️ or a star🌟 would be appreciated.
133
- </font>
134
- </center>
135
  """
136
 
137
  latex_delimiters_set = [{
@@ -164,75 +159,18 @@ latex_delimiters_set = [{
164
  "display": True
165
  }]
166
 
167
- text_input = gr.Textbox(label="Prompt", placeholder="Enter your text here...", lines=1, container=False)
168
-
169
- # 커스텀 CSS (배경 그라데이션, 반투명 컨테이너, 버튼 애니메이션 등)
170
- custom_css = """
171
- body {
172
- background: linear-gradient(135deg, #667eea, #764ba2);
173
- font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif;
174
- color: #333;
175
- margin: 0;
176
- padding: 0;
177
- }
178
- .gradio-container {
179
- background: rgba(255, 255, 255, 0.95);
180
- border-radius: 15px;
181
- padding: 30px 40px;
182
- box-shadow: 0 8px 30px rgba(0, 0, 0, 0.3);
183
- margin: 40px auto;
184
- max-width: 1200px;
185
- }
186
- .gradio-container h1 {
187
- color: #333;
188
- text-shadow: 1px 1px 2px rgba(0, 0, 0, 0.2);
189
- }
190
- .fillable {
191
- width: 95% !important;
192
- max-width: unset !important;
193
- }
194
- #examples_container {
195
- margin: auto;
196
- width: 90%;
197
- }
198
- #examples_row {
199
- justify-content: center;
200
- }
201
- .sidebar {
202
- background: rgba(255, 255, 255, 0.98);
203
- border-radius: 10px;
204
- padding: 20px;
205
- box-shadow: 0 4px 15px rgba(0, 0, 0, 0.2);
206
- }
207
- button, .btn {
208
- background: linear-gradient(90deg, #ff8a00, #e52e71);
209
- border: none;
210
- color: #fff;
211
- padding: 12px 24px;
212
- text-transform: uppercase;
213
- font-weight: bold;
214
- letter-spacing: 1px;
215
- border-radius: 5px;
216
- cursor: pointer;
217
- transition: transform 0.2s ease-in-out;
218
- }
219
- button:hover, .btn:hover {
220
- transform: scale(1.05);
221
- }
222
- """
223
-
224
- with gr.Blocks(css=custom_css, title=model_name.split('/')[-1]) as demo:
225
  gr.HTML(html)
226
  with gr.Row():
227
  with gr.Column(scale=3):
228
- image_input = gr.Image(label="Image", height=350, type="pil")
229
  gr.Examples(
230
  examples=[
231
- [f"{cur_dir}/examples/ovis2_math0.jpg", "Each face of the polyhedron shown is either a triangle or a square. Each square borders 4 triangles, and each triangle borders 3 squares. The polyhedron has 6 squares. How many triangles does it have?\n\nProvide a step-by-step solution to the problem, and conclude with 'the answer is' followed by the final solution."],
232
- [f"{cur_dir}/examples/ovis2_math1.jpg", "A large square touches another two squares, as shown in the picture. The numbers inside the smaller squares indicate their areas. What is the area of the largest square?\n\nProvide a step-by-step solution to the problem, and conclude with 'the answer is' followed by the final solution."],
233
- [f"{cur_dir}/examples/ovis2_figure0.png", "Explain this model."],
234
- [f"{cur_dir}/examples/ovis2_figure1.png", "Organize the notes about GRPO in the figure."],
235
- [f"{cur_dir}/examples/ovis2_multi0.jpg", "Posso avere un frappuccino e un caffè americano di taglia M? Quanto costa in totale?"],
236
  ],
237
  inputs=[image_input, text_input]
238
  )
@@ -240,27 +178,11 @@ with gr.Blocks(css=custom_css, title=model_name.split('/')[-1]) as demo:
240
  chatbot = gr.Chatbot(label="Ovis", layout="panel", height=600, show_copy_button=True, latex_delimiters=latex_delimiters_set)
241
  text_input.render()
242
  with gr.Row():
243
- send_btn = gr.Button("Send")
244
- clear_btn = gr.Button("Clear")
245
 
246
- send_click_event = send_btn.click(
247
- submit_chat,
248
- inputs=[chatbot, text_input],
249
- outputs=[chatbot, text_input]
250
- ).then(
251
- ovis_chat,
252
- inputs=[chatbot, image_input],
253
- outputs=chatbot
254
- )
255
- submit_event = text_input.submit(
256
- submit_chat,
257
- inputs=[chatbot, text_input],
258
- outputs=[chatbot, text_input]
259
- ).then(
260
- ovis_chat,
261
- inputs=[chatbot, image_input],
262
- outputs=chatbot
263
- )
264
  clear_btn.click(clear_chat, outputs=[chatbot, image_input, text_input])
265
 
266
  demo.launch()
 
1
  import subprocess
2
  subprocess.run('pip install flash-attn==2.7.0.post2 --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)
3
 
4
+ import spaces
5
  import os
6
  import re
7
  import logging
 
12
  import gradio as gr
13
  from transformers import AutoModelForCausalLM, TextIteratorStreamer
14
 
 
15
  model_name = 'AIDC-AI/Ovis2-8B'
16
  use_thread = False
17
 
18
+ # load model
19
+ model = AutoModelForCausalLM.from_pretrained(model_name,
20
+ torch_dtype=torch.bfloat16,
21
+ multimodal_max_length=8192,
22
+ trust_remote_code=True).to(device='cuda')
 
 
23
  text_tokenizer = model.get_text_tokenizer()
24
  visual_tokenizer = model.get_visual_tokenizer()
25
  streamer = TextIteratorStreamer(text_tokenizer, skip_prompt=True, skip_special_tokens=True)
26
  image_placeholder = '<image>'
27
  cur_dir = os.path.dirname(os.path.abspath(__file__))
28
 
29
+ logging.getLogger("httpx").setLevel(logging.WARNING)
30
  logging.basicConfig(level=logging.INFO)
31
  logger = logging.getLogger(__name__)
32
 
 
46
  def submit_chat(chatbot, text_input):
47
  response = ''
48
  chatbot.append((text_input, response))
49
+ return chatbot ,''
50
 
51
+ @spaces.GPU
 
52
  def ovis_chat(chatbot: List[List[str]], image_input: Any):
53
  conversations, model_inputs = prepare_inputs(chatbot, image_input)
54
  gen_kwargs = initialize_gen_kwargs()
 
73
 
74
  log_conversation(chatbot)
75
 
76
+
77
  def prepare_inputs(chatbot: List[List[str]], image_input: Any):
78
+ # conversations = [{
79
+ # "from": "system",
80
+ # "value": "You are a helpful assistant, and your task is to provide reliable and structured responses to users."
81
+ # }]
82
+ conversations= []
83
+
84
  for query, response in chatbot[:-1]:
85
  conversations.extend([
86
  {"from": "human", "value": query},
 
91
  conversations.append({"from": "human", "value": last_query})
92
 
93
  if image_input is not None:
 
94
  for conv in conversations:
95
  if conv["from"] == "human":
96
  conv["value"] = f'{image_placeholder}\n{conv["value"]}'
 
117
  def clear_chat():
118
  return [], None, ""
119
 
 
120
  with open(f"{cur_dir}/resource/logo.svg", "r", encoding="utf-8") as svg_file:
121
  svg_content = svg_file.read()
122
  font_size = "2.5em"
 
126
  <span style="display: inline-block; vertical-align: middle;">{svg_content}</span>
127
  <span style="display: inline-block; vertical-align: middle;">{model_name.split('/')[-1]}</span>
128
  </p>
129
+ <center><font size=3><b>Ovis</b> has been open-sourced on <a href='https://huggingface.co/{model_name}'>😊 Huggingface</a> and <a href='https://github.com/AIDC-AI/Ovis'>🌟 GitHub</a>. If you find Ovis useful, a like❤️ or a star🌟 would be appreciated.</font></center>
 
 
 
 
 
 
 
130
  """
131
 
132
  latex_delimiters_set = [{
 
159
  "display": True
160
  }]
161
 
162
+ text_input = gr.Textbox(label="prompt", placeholder="Enter your text here...", lines=1, container=False)
163
+ with gr.Blocks(title=model_name.split('/')[-1], theme=gr.themes.Ocean()) as demo:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
164
  gr.HTML(html)
165
  with gr.Row():
166
  with gr.Column(scale=3):
167
+ image_input = gr.Image(label="image", height=350, type="pil")
168
  gr.Examples(
169
  examples=[
170
+ [f"{cur_dir}/examples/ovis2_math2.png", "Find the area of the shaded region."],
171
+ [f"{cur_dir}/examples/ovis2_figure2.png", "What is net profit margin as a percentage of total revenue?"],
172
+ [f"{cur_dir}/examples/ovis2_table0.png", "Convert the table to markdown."],
173
+ [f"{cur_dir}/examples/ovis2_ocr0.jpeg", "OCR:"],
 
174
  ],
175
  inputs=[image_input, text_input]
176
  )
 
178
  chatbot = gr.Chatbot(label="Ovis", layout="panel", height=600, show_copy_button=True, latex_delimiters=latex_delimiters_set)
179
  text_input.render()
180
  with gr.Row():
181
+ send_btn = gr.Button("Send", variant="primary")
182
+ clear_btn = gr.Button("Clear", variant="secondary")
183
 
184
+ send_click_event = send_btn.click(submit_chat, [chatbot, text_input], [chatbot, text_input]).then(ovis_chat,[chatbot, image_input],chatbot)
185
+ submit_event = text_input.submit(submit_chat, [chatbot, text_input], [chatbot, text_input]).then(ovis_chat,[chatbot, image_input],chatbot)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
186
  clear_btn.click(clear_chat, outputs=[chatbot, image_input, text_input])
187
 
188
  demo.launch()