Spaces:

suayptalha
/

QwQ-32B-Preview-Vision

Running

App Files Files Community

suayptalha commited on Dec 19, 2024

Commit

b11f420

verified ·

1 Parent(s): 62d3eaa

Update app.py

Browse files

Files changed (1) hide show

app.py +12 -63

app.py CHANGED Viewed

@@ -1,15 +1,10 @@
 import gradio as gr
 from gradio_client import Client, handle_file
 from huggingface_hub import InferenceClient
-from PIL import Image
-from threading import Thread
-import time
-# Initialize clients for Moondream and QwQ
 moondream_client = Client("vikhyatk/moondream2")
 qwq_client = InferenceClient("Qwen/QwQ-32B-Preview")
-# Function to describe the image using Moondream API
 def describe_image(image, user_message):
     result = moondream_client.predict(
         img=handle_file(image),
@@ -18,9 +13,9 @@ def describe_image(image, user_message):
     )
     description = result
     user_message = description + "\n" + user_message
-    # Using QwQ model for conversation after description
     qwq_result = qwq_client.chat_completion(
         messages=[{"role": "user", "content": user_message}],
         max_tokens=512,
@@ -30,61 +25,18 @@ def describe_image(image, user_message):
     return qwq_result['choices'][0]['message']['content']
-# Function to handle chat or image-based conversation
-def chat_or_image(message, history, max_new_tokens=250):
-    txt = message["text"]
-    ext_buffer = f"{txt}"
-    messages = []
-    images = []
-    # Process the conversation history
-    for i, msg in enumerate(history):
-        if isinstance(msg[0], tuple):
-            messages.append({"role": "user", "content": [{"type": "text", "text": history[i+1][0]}, {"type": "image"}]})
-            messages.append({"role": "assistant", "content": [{"type": "text", "text": history[i+1][1]}]})
-            images.append(Image.open(msg[0][0]).convert("RGB"))
-        elif isinstance(msg[0], str) and isinstance(history[i-1][0], str): # text only turn
-            messages.append({"role": "user", "content": [{"type": "text", "text": msg[0]}]})
-            messages.append({"role": "assistant", "content": [{"type": "text", "text": msg[1]}]})
-    # Add current message
-    if len(message["files"]) == 1:
-        if isinstance(message["files"][0], str):  # Example images
-            image = Image.open(message["files"][0]).convert("RGB")
-        else:  # Regular image input
-            image = Image.open(message["files"][0]["path"]).convert("RGB")
-        images.append(image)
-        messages.append({"role": "user", "content": [{"type": "text", "text": txt}, {"type": "image"}]})
     else:
-        messages.append({"role": "user", "content": [{"type": "text", "text": txt}]})
-    # Processing the conversation to send to the model
-    texts = moondream_client.apply_chat_template(messages, add_generation_prompt=True)
-    if images == []:
-        inputs = moondream_client(text=texts, return_tensors="pt").to("cuda")
-    else:
-        inputs = moondream_client(text=texts, images=images, return_tensors="pt").to("cuda")
-    streamer = TextIteratorStreamer(moondream_client, skip_special_tokens=True, skip_prompt=True)
-    generation_kwargs = dict(inputs, streamer=streamer, max_new_tokens=max_new_tokens)
-    generated_text = ""
-    # Generating the response with threading to avoid blocking
-    thread = Thread(target=qwq_client.chat_completion, kwargs=generation_kwargs)
-    thread.start()
-    buffer = ""
-    # Stream the generated text
-    for new_text in streamer:
-        buffer += new_text
-        generated_text_without_prompt = buffer
-        time.sleep(0.01)
-        yield buffer
-# Gradio Interface setup
 demo = gr.Interface(
     fn=chat_or_image,
     inputs=[
@@ -92,10 +44,7 @@ demo = gr.Interface(
         gr.Textbox(label="Ask anything", placeholder="Ask...", lines=2)
     ],
     outputs="text",
-    title="Multimodal Llama Chatbot",
-    description="Interact with the Llama chatbot. Upload an image, ask a question, or both!",
-    live=True
 )
 if __name__ == "__main__":
-    demo.launch(show_error=True)

 import gradio as gr
 from gradio_client import Client, handle_file
 from huggingface_hub import InferenceClient
 moondream_client = Client("vikhyatk/moondream2")
 qwq_client = InferenceClient("Qwen/QwQ-32B-Preview")
 def describe_image(image, user_message):
     result = moondream_client.predict(
         img=handle_file(image),
     )
     description = result
     user_message = description + "\n" + user_message
     qwq_result = qwq_client.chat_completion(
         messages=[{"role": "user", "content": user_message}],
         max_tokens=512,
     return qwq_result['choices'][0]['message']['content']
+def chat_or_image(image, user_message):
+    if image:
+        return describe_image(image, user_message)
     else:
+        qwq_result = qwq_client.chat_completion(
+            messages=[{"role": "user", "content": user_message}],
+            max_tokens=512,
+            temperature=0.7,
+            top_p=0.95
+        )
+        return qwq_result['choices'][0]['message']['content']
 demo = gr.Interface(
     fn=chat_or_image,
     inputs=[
         gr.Textbox(label="Ask anything", placeholder="Ask...", lines=2)
     ],
     outputs="text",
 )
 if __name__ == "__main__":
+    demo.launch(show_error=True)