William Mattingly
init
4ca9e1d
raw
history blame
5.95 kB
import gradio as gr
import torch
from PIL import Image
from transformers import AutoProcessor, Qwen2_5_VLForConditionalGeneration
import spaces
# Model configuration
MODEL_ID = "numind/NuMarkdown-8B-reasoning"
# Load processor
processor = AutoProcessor.from_pretrained(
MODEL_ID,
trust_remote_code=True,
min_pixels=100*28*28,
max_pixels=5000*28*28
)
# Load model
model = Qwen2_5_VLForConditionalGeneration.from_pretrained(
MODEL_ID,
torch_dtype=torch.bfloat16,
attn_implementation="flash_attention_2",
device_map="auto",
trust_remote_code=True,
)
@spaces.GPU
def process_image(image):
"""
Process an image using the NuMarkdown-8B-reasoning model.
Args:
image: PIL Image object
Returns:
tuple: (reasoning, answer) extracted from model output
"""
if image is None:
return "Please upload an image.", ""
try:
# Convert image to RGB if needed
img = image.convert("RGB")
# Prepare messages for the model
messages = [{
"role": "user",
"content": [
{"type": "image"},
],
}]
# Apply chat template
prompt = processor.apply_chat_template(
messages,
tokenize=False,
add_generation_prompt=True
)
# Process inputs
model_input = processor(
text=prompt,
images=[img],
return_tensors="pt"
).to(model.device)
# Generate output
with torch.no_grad():
model_output = model.generate(
**model_input,
temperature=0.7,
max_new_tokens=5000
)
# Decode result
result = processor.decode(model_output[0])
# Extract reasoning and answer
try:
reasoning = result.split("<think>")[1].split("</think>")[0]
except IndexError:
reasoning = "No reasoning found in output."
try:
answer = result.split("<answer>")[1].split("</answer>")[0]
except IndexError:
answer = "No answer found in output."
return reasoning.strip(), answer.strip()
except Exception as e:
error_msg = f"Error processing image: {str(e)}"
return error_msg, error_msg
def create_interface():
"""Create and configure the Gradio interface."""
with gr.Blocks(
title="NuMarkdown-8B Reasoning Demo",
theme=gr.themes.Soft(),
css="""
.gradio-container {
max-width: 1200px !important;
}
.image-container, .output-container {
height: 600px !important;
}
"""
) as demo:
gr.Markdown(
"""
# πŸ€– NuMarkdown-8B Reasoning Demo
Upload an image and let the NuMarkdown-8B model analyze it with detailed reasoning.
The model will show both its thinking process and final answer.
"""
)
with gr.Row(equal_height=True):
with gr.Column(scale=1):
gr.Markdown("### πŸ“Έ Upload Your Image")
image_input = gr.Image(
type="pil",
label="Input Image",
height=600,
container=True
)
process_btn = gr.Button(
"πŸ” Analyze Image",
variant="primary",
size="lg"
)
with gr.Column(scale=1):
gr.Markdown("### 🧠 Model Reasoning")
reasoning_output = gr.Textbox(
label="Thinking Process",
lines=15,
max_lines=20,
placeholder="The model's reasoning will appear here...",
container=True,
show_copy_button=True
)
gr.Markdown("### πŸ’‘ Final Answer")
answer_output = gr.Textbox(
label="Answer",
lines=10,
max_lines=15,
placeholder="The model's answer will appear here...",
container=True,
show_copy_button=True
)
# Event handlers
process_btn.click(
fn=process_image,
inputs=[image_input],
outputs=[reasoning_output, answer_output],
show_progress=True
)
# Also trigger on image upload
image_input.change(
fn=process_image,
inputs=[image_input],
outputs=[reasoning_output, answer_output],
show_progress=True
)
gr.Markdown(
"""
---
### πŸ“‹ How to Use:
1. **Upload an image** using the file uploader on the left
2. **Click "Analyze Image"** or wait for automatic processing
3. **View the results** on the right:
- **Reasoning**: See how the model thinks through the problem
- **Answer**: Get the final conclusion or analysis
### πŸ”§ Model Details:
- **Model**: numind/NuMarkdown-8B-reasoning
- **Type**: Vision-Language Model with reasoning capabilities
- **Features**: Detailed thinking process + final answer
*This demo runs on HuggingFace Zero GPU Spaces for fast inference.*
"""
)
return demo
if __name__ == "__main__":
demo = create_interface()
demo.launch(
share=True,
server_name="0.0.0.0",
server_port=7860,
show_error=True
)