Spaces:

AC2513
/

gemma-demo

Running on Zero

App Files Files Community

AC2513 commited on Jul 10

Commit

b87bea7

1 Parent(s): fd3c6d5

added multimodel loading

Browse files

Files changed (1) hide show

app.py +116 -47

app.py CHANGED Viewed

@@ -17,20 +17,44 @@ from loguru import logger
 from PIL import Image
 dotenv_path = find_dotenv()
 load_dotenv(dotenv_path)
-model_id = os.getenv("MODEL_ID", "google/gemma-3-4b-it")
-input_processor = Gemma3Processor.from_pretrained(model_id)
-model = Gemma3ForConditionalGeneration.from_pretrained(
-    model_id,
-    torch_dtype=torch.bfloat16,
-    device_map="auto",
-    attn_implementation="eager",
-)
 def get_frames(video_path: str, max_images: int) -> list[tuple[Image.Image, float]]:
     frames: list[tuple[Image.Image, float]] = []
@@ -123,10 +147,25 @@ def process_history(history: list[dict]) -> list[dict]:
     return messages
 @spaces.GPU(duration=120)
 def run(
     message: dict,
     history: list[dict],
     system_prompt: str,
     max_new_tokens: int,
     max_images: int,
@@ -135,12 +174,25 @@ def run(
     top_k: int,
     repetition_penalty: float,
 ) -> Iterator[str]:
     logger.debug(
-        f"\n message: {message} \n history: {history} \n system_prompt: {system_prompt} \n "
-        f"max_new_tokens: {max_new_tokens} \n max_images: {max_images}"
     )
     messages = []
     if system_prompt:
         messages.append(
@@ -151,16 +203,16 @@ def run(
         {"role": "user", "content": process_user_input(message, max_images)}
     )
-    inputs = input_processor.apply_chat_template(
         messages,
         add_generation_prompt=True,
         tokenize=True,
         return_dict=True,
         return_tensors="pt",
-    ).to(device=model.device, dtype=torch.bfloat16)
     streamer = TextIteratorStreamer(
-        input_processor, timeout=60.0, skip_prompt=True, skip_special_tokens=True
     )
     generate_kwargs = dict(
         inputs,
@@ -172,7 +224,7 @@ def run(
         repetition_penalty=repetition_penalty,
         do_sample=True,
     )
-    t = Thread(target=model.generate, kwargs=generate_kwargs)
     t.start()
     output = ""
@@ -180,36 +232,53 @@ def run(
         output += delta
         yield output
-demo = gr.ChatInterface(
-    fn=run,
-    type="messages",
-    chatbot=gr.Chatbot(type="messages", scale=1, allow_tags=["image"]),
-    textbox=gr.MultimodalTextbox(
-        file_types=[".mp4", ".jpg", ".png"], file_count="multiple", autofocus=True
-    ),
-    multimodal=True,
-    additional_inputs=[
-        gr.Textbox(label="System Prompt", value="You are a helpful assistant."),
-        gr.Slider(
-            label="Max New Tokens", minimum=100, maximum=2000, step=10, value=700
-        ),
-        gr.Slider(label="Max Images", minimum=1, maximum=4, step=1, value=2),
-        gr.Slider(
-            label="Temperature", minimum=0.1, maximum=2.0, step=0.1, value=0.7
-        ),
-        gr.Slider(
-            label="Top P", minimum=0.1, maximum=1.0, step=0.05, value=0.9
-        ),
-        gr.Slider(
-            label="Top K", minimum=1, maximum=100, step=1, value=50
         ),
-        gr.Slider(
-            label="Repetition Penalty", minimum=1.0, maximum=2.0, step=0.05, value=1.1
-        )
-    ],
-    stop_btn=False,
-)
 if __name__ == "__main__":
     demo.launch()

 from PIL import Image
 dotenv_path = find_dotenv()
 load_dotenv(dotenv_path)
+MODEL_CONFIGS = {
+    "Gemma 3 4B IT": {
+        "id": os.getenv("MODEL_ID_27", "google/gemma-3-4b-it"),
+        "supports_video": True,
+        "supports_pdf": False
+    },
+    "Gemma 3 1B IT": {
+        "id": os.getenv("MODEL_ID_12", "google/gemma-3-1b-it"),
+        "supports_video": True,
+        "supports_pdf": False
+    },
+    "Gemma 3N E4B IT": {
+        "id": os.getenv("MODEL_ID_3N", "google/gemma-3n-E4B-it"),
+        "supports_video": False,
+        "supports_pdf": False
+    }
+}
+# Load all models and processors
+models = {}
+processor = Gemma3Processor.from_pretrained("google/gemma-3-4b-it")
+for model_name, config in MODEL_CONFIGS.items():
+    logger.info(f"Loading {model_name}...")
+    models[model_name] = Gemma3ForConditionalGeneration.from_pretrained(
+        config["id"],
+        torch_dtype=torch.bfloat16,
+        device_map="auto",
+        attn_implementation="eager",
+    )
+    logger.info(f"✓ {model_name} loaded successfully")
+# Current model selection (default)
+current_model = "Gemma 3 27B IT"
 def get_frames(video_path: str, max_images: int) -> list[tuple[Image.Image, float]]:
     frames: list[tuple[Image.Image, float]] = []
     return messages
+def get_supported_file_types(model_name: str) -> list[str]:
+    """Get supported file types for the selected model."""
+    config = MODEL_CONFIGS[model_name]
+    base_types = [".jpg", ".png", ".jpeg", ".gif", ".bmp", ".webp"]
+    if config["supports_video"]:
+        base_types.extend([".mp4", ".mov", ".avi"])
+    if config["supports_pdf"]:
+        base_types.append(".pdf")
+    return base_types
 @spaces.GPU(duration=120)
 def run(
     message: dict,
     history: list[dict],
+    model_name: str,
     system_prompt: str,
     max_new_tokens: int,
     max_images: int,
     top_k: int,
     repetition_penalty: float,
 ) -> Iterator[str]:
+    global current_model
+    if model_name != current_model:
+        current_model = model_name
+        logger.info(f"Switched to model: {model_name}")
     logger.debug(
+        f"\n message: {message} \n history: {history} \n model: {model_name} \n "
+        f"system_prompt: {system_prompt} \n max_new_tokens: {max_new_tokens} \n max_images: {max_images}"
     )
+    config = MODEL_CONFIGS[model_name]
+    if not config["supports_video"] and message.get("files"):
+        for file_path in message["files"]:
+            if file_path.endswith((".mp4", ".mov", ".avi")):
+                yield "Error: Selected model does not support video files. Please choose a video-capable model."
+                return
     messages = []
     if system_prompt:
         messages.append(
         {"role": "user", "content": process_user_input(message, max_images)}
     )
+    inputs = processor.apply_chat_template(
         messages,
         add_generation_prompt=True,
         tokenize=True,
         return_dict=True,
         return_tensors="pt",
+    ).to(device=models[current_model].device, dtype=torch.bfloat16)
     streamer = TextIteratorStreamer(
+        processor, timeout=60.0, skip_prompt=True, skip_special_tokens=True
     )
     generate_kwargs = dict(
         inputs,
         repetition_penalty=repetition_penalty,
         do_sample=True,
     )
+    t = Thread(target=models[current_model].generate, kwargs=generate_kwargs)
     t.start()
     output = ""
         output += delta
         yield output
+def create_interface():
+    """Create interface with model selector."""
+    initial_file_types = get_supported_file_types(current_model)
+    demo = gr.ChatInterface(
+        fn=run,
+        type="messages",
+        chatbot=gr.Chatbot(type="messages", scale=1, allow_tags=["image"]),
+        textbox=gr.MultimodalTextbox(
+            file_types=initial_file_types,
+            file_count="multiple",
+            autofocus=True
         ),
+        multimodal=True,
+        additional_inputs=[
+            gr.Dropdown(
+                label="Model",
+                choices=list(MODEL_CONFIGS.keys()),
+                value=current_model,
+                info="Select which model to use for generation"
+            ),
+            gr.Textbox(label="System Prompt", value="You are a helpful assistant."),
+            gr.Slider(
+                label="Max New Tokens", minimum=100, maximum=2000, step=10, value=700
+            ),
+            gr.Slider(label="Max Images", minimum=1, maximum=8, step=1, value=2),
+            gr.Slider(
+                label="Temperature", minimum=0.1, maximum=2.0, step=0.1, value=0.7
+            ),
+            gr.Slider(
+                label="Top P", minimum=0.1, maximum=1.0, step=0.05, value=0.9
+            ),
+            gr.Slider(
+                label="Top K", minimum=1, maximum=100, step=1, value=50
+            ),
+            gr.Slider(
+                label="Repetition Penalty", minimum=1.0, maximum=2.0, step=0.05, value=1.1
+            ),
+        ],
+        stop_btn=False,
+        title="Multi-Model Gemma Chat"
+    )
+    return demo
+demo = create_interface()
 if __name__ == "__main__":
     demo.launch()