Spaces:

AC2513
/

gemma-demo

Running on Zero

App Files Files

xet

Community

AC2513 commited on Jul 10

Commit

c0fc237

1 Parent(s): b87bea7

revert change

Browse files

Files changed (1) hide show

app.py +47 -116

app.py CHANGED Viewed

@@ -17,44 +17,20 @@ from loguru import logger
 from PIL import Image
 dotenv_path = find_dotenv()
 load_dotenv(dotenv_path)
-MODEL_CONFIGS = {
-    "Gemma 3 4B IT": {
-        "id": os.getenv("MODEL_ID_27", "google/gemma-3-4b-it"),
-        "supports_video": True,
-        "supports_pdf": False
-    },
-    "Gemma 3 1B IT": {
-        "id": os.getenv("MODEL_ID_12", "google/gemma-3-1b-it"),
-        "supports_video": True,
-        "supports_pdf": False
-    },
-    "Gemma 3N E4B IT": {
-        "id": os.getenv("MODEL_ID_3N", "google/gemma-3n-E4B-it"),
-        "supports_video": False,
-        "supports_pdf": False
-    }
-}
-# Load all models and processors
-models = {}
-processor = Gemma3Processor.from_pretrained("google/gemma-3-4b-it")
-for model_name, config in MODEL_CONFIGS.items():
-    logger.info(f"Loading {model_name}...")
-    models[model_name] = Gemma3ForConditionalGeneration.from_pretrained(
-        config["id"],
-        torch_dtype=torch.bfloat16,
-        device_map="auto",
-        attn_implementation="eager",
-    )
-    logger.info(f"✓ {model_name} loaded successfully")
-# Current model selection (default)
-current_model = "Gemma 3 27B IT"
 def get_frames(video_path: str, max_images: int) -> list[tuple[Image.Image, float]]:
     frames: list[tuple[Image.Image, float]] = []
@@ -147,25 +123,10 @@ def process_history(history: list[dict]) -> list[dict]:
     return messages
-def get_supported_file_types(model_name: str) -> list[str]:
-    """Get supported file types for the selected model."""
-    config = MODEL_CONFIGS[model_name]
-    base_types = [".jpg", ".png", ".jpeg", ".gif", ".bmp", ".webp"]
-    if config["supports_video"]:
-        base_types.extend([".mp4", ".mov", ".avi"])
-    if config["supports_pdf"]:
-        base_types.append(".pdf")
-    return base_types
 @spaces.GPU(duration=120)
 def run(
     message: dict,
     history: list[dict],
-    model_name: str,
     system_prompt: str,
     max_new_tokens: int,
     max_images: int,
@@ -174,25 +135,12 @@ def run(
     top_k: int,
     repetition_penalty: float,
 ) -> Iterator[str]:
-    global current_model
-    if model_name != current_model:
-        current_model = model_name
-        logger.info(f"Switched to model: {model_name}")
     logger.debug(
-        f"\n message: {message} \n history: {history} \n model: {model_name} \n "
-        f"system_prompt: {system_prompt} \n max_new_tokens: {max_new_tokens} \n max_images: {max_images}"
     )
-    config = MODEL_CONFIGS[model_name]
-    if not config["supports_video"] and message.get("files"):
-        for file_path in message["files"]:
-            if file_path.endswith((".mp4", ".mov", ".avi")):
-                yield "Error: Selected model does not support video files. Please choose a video-capable model."
-                return
     messages = []
     if system_prompt:
         messages.append(
@@ -203,16 +151,16 @@ def run(
         {"role": "user", "content": process_user_input(message, max_images)}
     )
-    inputs = processor.apply_chat_template(
         messages,
         add_generation_prompt=True,
         tokenize=True,
         return_dict=True,
         return_tensors="pt",
-    ).to(device=models[current_model].device, dtype=torch.bfloat16)
     streamer = TextIteratorStreamer(
-        processor, timeout=60.0, skip_prompt=True, skip_special_tokens=True
     )
     generate_kwargs = dict(
         inputs,
@@ -224,7 +172,7 @@ def run(
         repetition_penalty=repetition_penalty,
         do_sample=True,
     )
-    t = Thread(target=models[current_model].generate, kwargs=generate_kwargs)
     t.start()
     output = ""
@@ -232,53 +180,36 @@ def run(
         output += delta
         yield output
-def create_interface():
-    """Create interface with model selector."""
-    initial_file_types = get_supported_file_types(current_model)
-    demo = gr.ChatInterface(
-        fn=run,
-        type="messages",
-        chatbot=gr.Chatbot(type="messages", scale=1, allow_tags=["image"]),
-        textbox=gr.MultimodalTextbox(
-            file_types=initial_file_types,
-            file_count="multiple",
-            autofocus=True
-        ),
-        multimodal=True,
-        additional_inputs=[
-            gr.Dropdown(
-                label="Model",
-                choices=list(MODEL_CONFIGS.keys()),
-                value=current_model,
-                info="Select which model to use for generation"
-            ),
-            gr.Textbox(label="System Prompt", value="You are a helpful assistant."),
-            gr.Slider(
-                label="Max New Tokens", minimum=100, maximum=2000, step=10, value=700
-            ),
-            gr.Slider(label="Max Images", minimum=1, maximum=8, step=1, value=2),
-            gr.Slider(
-                label="Temperature", minimum=0.1, maximum=2.0, step=0.1, value=0.7
-            ),
-            gr.Slider(
-                label="Top P", minimum=0.1, maximum=1.0, step=0.05, value=0.9
-            ),
-            gr.Slider(
-                label="Top K", minimum=1, maximum=100, step=1, value=50
-            ),
-            gr.Slider(
-                label="Repetition Penalty", minimum=1.0, maximum=2.0, step=0.05, value=1.1
-            ),
-        ],
-        stop_btn=False,
-        title="Multi-Model Gemma Chat"
-    )
-    return demo
-demo = create_interface()
 if __name__ == "__main__":
     demo.launch()

 from PIL import Image
 dotenv_path = find_dotenv()
 load_dotenv(dotenv_path)
+model_id = os.getenv("MODEL_ID", "google/gemma-3-4b-it")
+input_processor = Gemma3Processor.from_pretrained(model_id)
+model = Gemma3ForConditionalGeneration.from_pretrained(
+    model_id,
+    torch_dtype=torch.bfloat16,
+    device_map="auto",
+    attn_implementation="eager",
+)
 def get_frames(video_path: str, max_images: int) -> list[tuple[Image.Image, float]]:
     frames: list[tuple[Image.Image, float]] = []
     return messages
 @spaces.GPU(duration=120)
 def run(
     message: dict,
     history: list[dict],
     system_prompt: str,
     max_new_tokens: int,
     max_images: int,
     top_k: int,
     repetition_penalty: float,
 ) -> Iterator[str]:
     logger.debug(
+        f"\n message: {message} \n history: {history} \n system_prompt: {system_prompt} \n "
+        f"max_new_tokens: {max_new_tokens} \n max_images: {max_images}"
     )
     messages = []
     if system_prompt:
         messages.append(
         {"role": "user", "content": process_user_input(message, max_images)}
     )
+    inputs = input_processor.apply_chat_template(
         messages,
         add_generation_prompt=True,
         tokenize=True,
         return_dict=True,
         return_tensors="pt",
+    ).to(device=model.device, dtype=torch.bfloat16)
     streamer = TextIteratorStreamer(
+        input_processor, timeout=60.0, skip_prompt=True, skip_special_tokens=True
     )
     generate_kwargs = dict(
         inputs,
         repetition_penalty=repetition_penalty,
         do_sample=True,
     )
+    t = Thread(target=model.generate, kwargs=generate_kwargs)
     t.start()
     output = ""
         output += delta
         yield output
+demo = gr.ChatInterface(
+    fn=run,
+    type="messages",
+    chatbot=gr.Chatbot(type="messages", scale=1, allow_tags=["image"]),
+    textbox=gr.MultimodalTextbox(
+        file_types=[".mp4", ".jpg", ".png"], file_count="multiple", autofocus=True
+    ),
+    multimodal=True,
+    additional_inputs=[
+        gr.Textbox(label="System Prompt", value="You are a helpful assistant."),
+        gr.Slider(
+            label="Max New Tokens", minimum=100, maximum=2000, step=10, value=700
+        ),
+        gr.Slider(label="Max Images", minimum=1, maximum=4, step=1, value=2),
+        gr.Slider(
+            label="Temperature", minimum=0.1, maximum=2.0, step=0.1, value=0.7
+        ),
+        gr.Slider(
+            label="Top P", minimum=0.1, maximum=1.0, step=0.05, value=0.9
+        ),
+        gr.Slider(
+            label="Top K", minimum=1, maximum=100, step=1, value=50
+        ),
+        gr.Slider(
+            label="Repetition Penalty", minimum=1.0, maximum=2.0, step=0.05, value=1.1
+        )
+    ],
+    stop_btn=False,
+)
 if __name__ == "__main__":
     demo.launch()