Spaces:

prithivMLmods
/

FLUX-LoRA-DLC2

Running on Zero

App Files Files Community

prithivMLmods commited on Mar 4

Commit

b93dc2d

verified ·

1 Parent(s): a898aad

Update app.py

Browse files

Files changed (1) hide show

app.py +1 -47

app.py CHANGED Viewed

@@ -20,7 +20,6 @@ from transformers import (
     TextIteratorStreamer,
     Qwen2VLForConditionalGeneration,
     AutoProcessor,
-    AutoModelForImageTextToText,  # <-- New import for aya-vision
 )
 from transformers.image_utils import load_image
 from diffusers import StableDiffusionXLPipeline, EulerAncestralDiscreteScheduler
@@ -52,16 +51,6 @@ model_m = Qwen2VLForConditionalGeneration.from_pretrained(
     torch_dtype=torch.float16
 ).to("cuda").eval()
-# --- New feature: aya-vision ---
-AYA_MODEL_ID = "CohereForAI/aya-vision-8b"
-aya_processor = AutoProcessor.from_pretrained(AYA_MODEL_ID, trust_remote_code=True)
-aya_model = AutoModelForImageTextToText.from_pretrained(
-    AYA_MODEL_ID,
-    trust_remote_code=True,
-    torch_dtype=torch.float16
-).to("cuda").eval()
-# --------------------------------
 async def text_to_speech(text: str, voice: str, output_file="output.mp3"):
     communicate = edge_tts.Communicate(text, voice)
     await communicate.save(output_file)
@@ -199,38 +188,6 @@ def generate(
     files = input_dict.get("files", [])
     lower_text = text.lower().strip()
-    # --- New branch for @aya-vision feature ---
-    if lower_text.startswith("@aya-vision"):
-        prompt_clean = re.sub(r"@aya-vision", "", text, flags=re.IGNORECASE).strip().strip('"')
-        if not files:
-            yield "Please provide an image for @aya-vision command."
-            return
-        image = load_image(files[0])
-        messages = [{
-            "role": "user",
-            "content": [
-                {"type": "image", "image": image},
-                {"type": "text", "text": prompt_clean},
-            ]
-        }]
-        prompt_aya = aya_processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
-        inputs = aya_processor(text=[prompt_aya], images=[image], return_tensors="pt", padding=True).to("cuda")
-        streamer = TextIteratorStreamer(aya_processor, skip_prompt=True, skip_special_tokens=True)
-        generation_kwargs = {**inputs, "streamer": streamer, "max_new_tokens": max_new_tokens}
-        thread = Thread(target=aya_model.generate, kwargs=generation_kwargs)
-        thread.start()
-        buffer = ""
-        yield "💭 Processing @aya-vision..."
-        for new_text in streamer:
-            buffer += new_text
-            buffer = buffer.replace("<|im_end|>", "")
-            time.sleep(0.01)
-            yield buffer
-        return
-    # ------------------------------------------------
     # Check if the prompt is an image generation command using model flags.
     if (lower_text.startswith("@lightningv5") or
         lower_text.startswith("@lightningv4") or
@@ -382,22 +339,19 @@ demo = gr.ChatInterface(
         gr.Slider(label="Repetition penalty", minimum=1.0, maximum=2.0, step=0.05, value=1.2),
     ],
     examples=[
-        [{"text": "@aya-vision Extract JSON from the image", "files": ["examples/document.jpg"]}],
-        [{"text": "@aya-vision Summarize the letter", "files": ["examples/1.png"]}],
         ["Python Program for Array Rotation"],
         ["@tts1 Who is Nikola Tesla, and why did he die?"],
         ['@lightningv5 Chocolate dripping from a donut against a yellow background, in the style of brocore, hyper-realistic'],
         ['@lightningv4 A serene landscape with mountains'],
         ['@turbov3 Abstract art, colorful and vibrant'],
         ["@tts2 What causes rainbows to form?"],
-        [" Describe the content of this image"],
     ],
     cache_examples=False,
     type="messages",
     description=DESCRIPTION,
     css=css,
     fill_height=True,
-    textbox=gr.MultimodalTextbox(label="Query Input", file_types=["image"], file_count="multiple", placeholder="@aya-vision for img-txt-txt / use the tags @lightningv5 @lightningv4 @turbov3 or @aya-vision for image-based commands!"),
     stop_btn="Stop Generation",
     multimodal=True,

     TextIteratorStreamer,
     Qwen2VLForConditionalGeneration,
     AutoProcessor,
 )
 from transformers.image_utils import load_image
 from diffusers import StableDiffusionXLPipeline, EulerAncestralDiscreteScheduler
     torch_dtype=torch.float16
 ).to("cuda").eval()
 async def text_to_speech(text: str, voice: str, output_file="output.mp3"):
     communicate = edge_tts.Communicate(text, voice)
     await communicate.save(output_file)
     files = input_dict.get("files", [])
     lower_text = text.lower().strip()
     # Check if the prompt is an image generation command using model flags.
     if (lower_text.startswith("@lightningv5") or
         lower_text.startswith("@lightningv4") or
         gr.Slider(label="Repetition penalty", minimum=1.0, maximum=2.0, step=0.05, value=1.2),
     ],
     examples=[
         ["Python Program for Array Rotation"],
         ["@tts1 Who is Nikola Tesla, and why did he die?"],
         ['@lightningv5 Chocolate dripping from a donut against a yellow background, in the style of brocore, hyper-realistic'],
         ['@lightningv4 A serene landscape with mountains'],
         ['@turbov3 Abstract art, colorful and vibrant'],
         ["@tts2 What causes rainbows to form?"],
     ],
     cache_examples=False,
     type="messages",
     description=DESCRIPTION,
     css=css,
     fill_height=True,
+    textbox=gr.MultimodalTextbox(label="Query Input", file_types=["image"], file_count="multiple", placeholder="use the tags @lightningv5 @lightningv4 @turbov3 for image gen !"),
     stop_btn="Stop Generation",
     multimodal=True,