William Mattingly commited on
Commit
5411741
·
1 Parent(s): bc7ccb8
Files changed (1) hide show
  1. app.py +93 -90
app.py CHANGED
@@ -7,7 +7,7 @@ from transformers import AutoProcessor, Qwen2_5_VLForConditionalGeneration
7
  # Model configuration
8
  MODEL_ID = "numind/NuMarkdown-8B-reasoning"
9
 
10
- # Load processor
11
  processor = AutoProcessor.from_pretrained(
12
  MODEL_ID,
13
  trust_remote_code=True,
@@ -15,7 +15,6 @@ processor = AutoProcessor.from_pretrained(
15
  max_pixels=5000*28*28
16
  )
17
 
18
- # Load model
19
  model = Qwen2_5_VLForConditionalGeneration.from_pretrained(
20
  MODEL_ID,
21
  torch_dtype=torch.bfloat16,
@@ -25,21 +24,19 @@ model = Qwen2_5_VLForConditionalGeneration.from_pretrained(
25
  )
26
 
27
  @spaces.GPU
28
- def process_image(image):
29
- """Process an image using the NuMarkdown-8B-reasoning model."""
30
  if image is None:
31
  return "Please upload an image.", ""
32
 
33
  try:
34
- # Convert image to RGB if needed
35
  img = image.convert("RGB")
36
 
37
- # Prepare messages for the model
38
  messages = [{
39
  "role": "user",
40
- "content": [
41
- {"type": "image"},
42
- ],
43
  }]
44
 
45
  # Apply chat template
@@ -56,7 +53,7 @@ def process_image(image):
56
  return_tensors="pt"
57
  ).to(model.device)
58
 
59
- # Generate output
60
  with torch.no_grad():
61
  model_output = model.generate(
62
  **model_input,
@@ -68,95 +65,101 @@ def process_image(image):
68
  result = processor.decode(model_output[0])
69
 
70
  # Extract reasoning and answer
71
- try:
72
- reasoning = result.split("<think>")[1].split("</think>")[0]
73
- except IndexError:
74
- reasoning = "No reasoning found in output."
75
 
76
  try:
77
- answer = result.split("<answer>")[1].split("</answer>")[0]
78
- except IndexError:
79
- answer = "No answer found in output."
 
 
 
 
 
 
 
80
 
81
- return reasoning.strip(), answer.strip()
 
 
 
 
82
 
83
  except Exception as e:
84
- error_msg = f"Error processing image: {str(e)}"
85
  return error_msg, error_msg
86
 
87
- def create_gradio_interface():
88
- """Create the Gradio interface"""
 
 
 
 
 
 
 
 
 
 
89
 
90
- with gr.Blocks(title="NuMarkdown-8B Reasoning Demo") as demo:
91
-
92
- gr.HTML("""
93
- <div style="text-align: center">
94
- <h1>🤖 NuMarkdown-8B Reasoning Demo</h1>
95
- <p>Upload an image and let the model analyze it with detailed reasoning.</p>
96
- </div>
97
- """)
98
-
99
- with gr.Row():
100
- with gr.Column():
101
- gr.Markdown("### 📸 Upload Your Image")
102
- image_input = gr.Image(
103
- type="pil",
104
- label="Input Image",
105
- height=400
106
- )
107
- process_btn = gr.Button(
108
- "🔍 Analyze Image",
109
- variant="primary",
110
- size="lg"
111
- )
112
 
113
- with gr.Column():
114
- gr.Markdown("### 🧠 Model Reasoning")
115
- reasoning_output = gr.Textbox(
116
- label="Thinking Process",
117
- lines=12,
118
- max_lines=15,
119
- placeholder="The model's reasoning will appear here...",
120
- show_copy_button=True
121
- )
122
-
123
- gr.Markdown("### 💡 Final Answer")
124
- answer_output = gr.Textbox(
125
- label="Answer",
126
- lines=8,
127
- max_lines=12,
128
- placeholder="The model's answer will appear here...",
129
- show_copy_button=True
130
- )
131
-
132
- # Event handlers
133
- process_btn.click(
134
- fn=process_image,
135
- inputs=[image_input],
136
- outputs=[reasoning_output, answer_output]
137
- )
138
-
139
- image_input.upload(
140
- fn=process_image,
141
- inputs=[image_input],
142
- outputs=[reasoning_output, answer_output]
143
- )
144
-
145
- gr.HTML("""
146
- <div style="text-align: center; margin-top: 20px; color: #666;">
147
- <p><strong>Model:</strong> numind/NuMarkdown-8B-reasoning</p>
148
- <p>This demo runs on HuggingFace Zero GPU Spaces for fast inference.</p>
149
- </div>
150
- """)
151
 
152
- return demo
 
 
 
 
153
 
154
  if __name__ == "__main__":
155
- demo = create_gradio_interface()
156
- demo.queue(max_size=10).launch(
157
- server_name="0.0.0.0",
158
- server_port=7860,
159
- share=False,
160
- debug=True,
161
- show_error=True
162
- )
 
7
  # Model configuration
8
  MODEL_ID = "numind/NuMarkdown-8B-reasoning"
9
 
10
+ # Load processor and model
11
  processor = AutoProcessor.from_pretrained(
12
  MODEL_ID,
13
  trust_remote_code=True,
 
15
  max_pixels=5000*28*28
16
  )
17
 
 
18
  model = Qwen2_5_VLForConditionalGeneration.from_pretrained(
19
  MODEL_ID,
20
  torch_dtype=torch.bfloat16,
 
24
  )
25
 
26
  @spaces.GPU
27
+ def analyze_image(image):
28
+ """Analyze an image using the NuMarkdown-8B-reasoning model."""
29
  if image is None:
30
  return "Please upload an image.", ""
31
 
32
  try:
33
+ # Convert image to RGB
34
  img = image.convert("RGB")
35
 
36
+ # Prepare messages
37
  messages = [{
38
  "role": "user",
39
+ "content": [{"type": "image"}],
 
 
40
  }]
41
 
42
  # Apply chat template
 
53
  return_tensors="pt"
54
  ).to(model.device)
55
 
56
+ # Generate
57
  with torch.no_grad():
58
  model_output = model.generate(
59
  **model_input,
 
65
  result = processor.decode(model_output[0])
66
 
67
  # Extract reasoning and answer
68
+ reasoning = "No reasoning found."
69
+ answer = "No answer found."
 
 
70
 
71
  try:
72
+ if "<think>" in result and "</think>" in result:
73
+ reasoning = result.split("<think>")[1].split("</think>")[0].strip()
74
+ except:
75
+ pass
76
+
77
+ try:
78
+ if "<answer>" in result and "</answer>" in result:
79
+ answer = result.split("<answer>")[1].split("</answer>")[0].strip()
80
+ except:
81
+ pass
82
 
83
+ # If no structured output, return the raw result
84
+ if reasoning == "No reasoning found." and answer == "No answer found.":
85
+ return result[:2000] + "..." if len(result) > 2000 else result, result
86
+
87
+ return reasoning, answer
88
 
89
  except Exception as e:
90
+ error_msg = f"Error: {str(e)}"
91
  return error_msg, error_msg
92
 
93
+ # Create custom CSS
94
+ css = """
95
+ .gradio-container {
96
+ max-width: 1200px !important;
97
+ }
98
+ .output-text {
99
+ height: 400px !important;
100
+ }
101
+ """
102
+
103
+ # Create the interface using gr.Interface (simpler, more stable)
104
+ with gr.Blocks(css=css, title="NuMarkdown-8B Reasoning Demo") as demo:
105
 
106
+ gr.HTML("""
107
+ <div style="text-align: center; margin-bottom: 20px;">
108
+ <h1>🤖 NuMarkdown-8B Reasoning Demo</h1>
109
+ <p style="color: #666;">Upload an image and see the model's detailed reasoning process and final answer.</p>
110
+ </div>
111
+ """)
112
+
113
+ with gr.Row():
114
+ with gr.Column(scale=1):
115
+ image_input = gr.Image(
116
+ label="📸 Upload Your Image",
117
+ type="pil",
118
+ height=500
119
+ )
120
+ analyze_btn = gr.Button(
121
+ "🔍 Analyze Image",
122
+ variant="primary",
123
+ size="lg"
124
+ )
 
 
 
125
 
126
+ with gr.Column(scale=1):
127
+ reasoning_output = gr.Textbox(
128
+ label="🧠 Model Reasoning",
129
+ placeholder="The model's step-by-step thinking will appear here...",
130
+ lines=15,
131
+ max_lines=20,
132
+ elem_classes=["output-text"]
133
+ )
134
+
135
+ answer_output = gr.Textbox(
136
+ label="💡 Final Answer",
137
+ placeholder="The model's final conclusion will appear here...",
138
+ lines=10,
139
+ max_lines=15,
140
+ elem_classes=["output-text"]
141
+ )
142
+
143
+ gr.HTML("""
144
+ <div style="text-align: center; margin-top: 20px; padding: 15px; background-color: #f8f9fa; border-radius: 8px;">
145
+ <p><strong>Model:</strong> numind/NuMarkdown-8B-reasoning</p>
146
+ <p><strong>Features:</strong> Vision-Language Model with detailed reasoning capabilities</p>
147
+ <p style="color: #666; font-size: 0.9em;">Powered by HuggingFace Zero GPU Spaces</p>
148
+ </div>
149
+ """)
150
+
151
+ # Event handlers
152
+ analyze_btn.click(
153
+ fn=analyze_image,
154
+ inputs=image_input,
155
+ outputs=[reasoning_output, answer_output]
156
+ )
 
 
 
 
 
 
 
157
 
158
+ image_input.upload(
159
+ fn=analyze_image,
160
+ inputs=image_input,
161
+ outputs=[reasoning_output, answer_output]
162
+ )
163
 
164
  if __name__ == "__main__":
165
+ demo.launch(share=True)