Didier commited on
Commit
abe7a5d
·
verified ·
1 Parent(s): 118f3ef

Create app.py

Browse files

Initial commit

Files changed (1) hide show
  1. app.py +85 -0
app.py ADDED
@@ -0,0 +1,85 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ File: app.py
3
+ Description: Chat with the vision language model Gemma3.
4
+ Author: Didier Guillevic
5
+ Date: 2025-03-16
6
+ """
7
+
8
+ import gradio as gr
9
+ from transformers import AutoProcessor, Gemma3ForConditionalGeneration
10
+ from transformers import TextIteratorStreamer
11
+ from threading import Thread
12
+ import torch
13
+
14
+ def process(message, history):
15
+ """Generate the model response in streaming mode given message and history
16
+ """
17
+ print(f"{history=}")
18
+ # Get the user's text and list of images
19
+ user_text = message.get("text", "")
20
+ user_images = message.get("files", []) # List of images
21
+
22
+ # Build the message list including history
23
+ messages = []
24
+ combined_user_input = [] # Combine images and text if found in same turn.
25
+ for user_turn, bot_turn in history:
26
+ if isinstance(user_turn, tuple): # Image input
27
+ image_content = [{"type": "image", "url": image_url} for image_url in user_turn]
28
+ combined_user_input.extend(image_content)
29
+ elif isinstance(user_turn, str): # Text input
30
+ combined_user_input.append({"type":"text", "text": user_turn})
31
+ if combined_user_input and bot_turn:
32
+ messages.append({'role': 'user', 'content': combined_user_input})
33
+ messages.append({'role': 'assistant', 'content': [{"type": "text", "text": bot_turn}]})
34
+ combined_user_input = [] # reset the combined user input.
35
+
36
+ # Build the user message's content from the provided message
37
+ user_content = []
38
+ if user_text:
39
+ user_content.append({"type": "text", "text": user_text})
40
+ for image in user_images:
41
+ user_content.append({"type": "image", "url": image})
42
+
43
+ messages.append({'role': 'user', 'content': user_content})
44
+
45
+ # Generate model's response
46
+ inputs = processor.apply_chat_template(
47
+ messages, add_generation_prompt=True, tokenize=True,
48
+ return_dict=True, return_tensors="pt"
49
+ ).to(model.device, dtype=torch.bfloat16)
50
+
51
+ streamer = TextIteratorStreamer(
52
+ processor, skip_prompt=True, skip_special_tokens=True)
53
+ generation_kwargs = dict(
54
+ inputs,
55
+ streamer=streamer,
56
+ max_new_tokens=1_024,
57
+ do_sample=False
58
+ )
59
+
60
+ thread = Thread(target=model.generate, kwargs=generation_kwargs)
61
+ thread.start()
62
+
63
+ partial_message = ""
64
+ for new_text in streamer:
65
+ partial_message += new_text
66
+ yield partial_message
67
+
68
+
69
+ #
70
+ # User interface
71
+ #
72
+ with gr.Blocks() as demo:
73
+ chat_interface = gr.ChatInterface(
74
+ fn=process,
75
+ title="Multimedia Chat",
76
+ description="Chat with text or text+image.",
77
+ multimodal=True,
78
+ examples=[
79
+ "How can we rationalize quantum entanglement?",
80
+ {'files': ['./sample_ID.jpeg',], 'text': 'Describe this image in a few words.'},
81
+ "Peux-tu expliquer le 'quantum spin'?"
82
+ ]
83
+ )
84
+
85
+ demo.launch()