File size: 1,901 Bytes
e279235
4ca9e1d
 
 
 
 
d07d755
 
4ca9e1d
 
d07d755
4ca9e1d
 
 
 
 
 
d07d755
4ca9e1d
0e717ef
4ca9e1d
 
 
 
3138c9c
d07d755
4ca9e1d
d07d755
4ca9e1d
d07d755
 
 
 
 
 
 
 
4ca9e1d
d07d755
 
5411741
d07d755
 
5411741
d07d755
 
3138c9c
5411741
d07d755
 
4ca9e1d
d07d755
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4ca9e1d
 
5411741
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
import spaces
import gradio as gr
import torch
from PIL import Image
from transformers import AutoProcessor, Qwen2_5_VLForConditionalGeneration

# Load model
model_id = "numind/NuMarkdown-8B-reasoning"

processor = AutoProcessor.from_pretrained(
    model_id,
    trust_remote_code=True,
    min_pixels=100*28*28, 
    max_pixels=5000*28*28   
)

model = Qwen2_5_VLForConditionalGeneration.from_pretrained(
    model_id,
    torch_dtype=torch.bfloat16,
    attn_implementation="sdpa",
    device_map="auto",
    trust_remote_code=True,
)

@spaces.GPU(duration=300)
def process(image):
    if image is None:
        return "Please upload an image."
    
    # Convert to RGB
    img = image.convert("RGB")
    
    # Prepare messages
    messages = [{
        "role": "user",
        "content": [{"type": "image"}],
    }]
    
    # Apply chat template
    prompt = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
    
    # Process
    model_input = processor(text=prompt, images=[img], return_tensors="pt").to(model.device)
    
    # Generate
    with torch.no_grad():
        model_output = model.generate(**model_input, temperature=0.7, max_new_tokens=10000)
    
    # Decode
    result = processor.decode(model_output[0])
    
    # Extract reasoning and answer
    try:
        reasoning = result.split("<think>")[1].split("</think>")[0]
        answer = result.split("<answer>")[1].split("</answer>")[0]
        return f"**Reasoning:**\n{reasoning}\n\n**Answer:**\n{answer}"
    except:
        return result

# Create simple interface
demo = gr.Interface(
    fn=process,
    inputs=gr.Image(type="pil", label="Upload Image"),
    outputs=gr.Textbox(label="Result", lines=20),
    title="NuMarkdown-8B Reasoning Demo",
    description="Upload an image and get the model's analysis with reasoning."
)

if __name__ == "__main__":
    demo.launch(share=True)