Spaces:
Running
on
Zero
Running
on
Zero
Update vlm.py
Browse files
vlm.py
CHANGED
@@ -5,6 +5,7 @@ Author: Didier Guillevic
|
|
5 |
Date: 2025-03-16
|
6 |
"""
|
7 |
|
|
|
8 |
from transformers import AutoProcessor, Gemma3ForConditionalGeneration
|
9 |
from transformers import TextIteratorStreamer
|
10 |
from threading import Thread
|
@@ -67,6 +68,7 @@ def build_messages(message: dict, history: list[tuple]):
|
|
67 |
#
|
68 |
# Streaming response
|
69 |
#
|
|
|
70 |
@torch.inference_mode()
|
71 |
def stream_response(messages: list[dict]):
|
72 |
"""Stream the model's response to the chat interface.
|
@@ -101,6 +103,7 @@ def stream_response(messages: list[dict]):
|
|
101 |
#
|
102 |
# Response (non-streaming)
|
103 |
#
|
|
|
104 |
@torch.inference_mode()
|
105 |
def get_response(messages: list[dict]):
|
106 |
"""Get the model's response.
|
|
|
5 |
Date: 2025-03-16
|
6 |
"""
|
7 |
|
8 |
+
import spaces
|
9 |
from transformers import AutoProcessor, Gemma3ForConditionalGeneration
|
10 |
from transformers import TextIteratorStreamer
|
11 |
from threading import Thread
|
|
|
68 |
#
|
69 |
# Streaming response
|
70 |
#
|
71 |
+
@spaces.GPU
|
72 |
@torch.inference_mode()
|
73 |
def stream_response(messages: list[dict]):
|
74 |
"""Stream the model's response to the chat interface.
|
|
|
103 |
#
|
104 |
# Response (non-streaming)
|
105 |
#
|
106 |
+
@spaces.GPU
|
107 |
@torch.inference_mode()
|
108 |
def get_response(messages: list[dict]):
|
109 |
"""Get the model's response.
|