Spaces:

wjbmattingly
/

NuMarkdown-8B-Thinking-Demo

Running on Zero

William Mattingly

init

4ca9e1d 4 months ago

5.95 kB

	import gradio as gr
	import torch
	from PIL import Image
	from transformers import AutoProcessor, Qwen2_5_VLForConditionalGeneration
	import spaces

	# Model configuration
	MODEL_ID = "numind/NuMarkdown-8B-reasoning"

	# Load processor
	processor = AutoProcessor.from_pretrained(
	MODEL_ID,
	trust_remote_code=True,
	min_pixels=1002828,
	max_pixels=50002828
	)

	# Load model
	model = Qwen2_5_VLForConditionalGeneration.from_pretrained(
	MODEL_ID,
	torch_dtype=torch.bfloat16,
	attn_implementation="flash_attention_2",
	device_map="auto",
	trust_remote_code=True,
	)

	@spaces.GPU
	def process_image(image):
	"""
	Process an image using the NuMarkdown-8B-reasoning model.

	Args:
	image: PIL Image object

	Returns:
	tuple: (reasoning, answer) extracted from model output
	"""
	if image is None:
	return "Please upload an image.", ""

	try:
	# Convert image to RGB if needed
	img = image.convert("RGB")

	# Prepare messages for the model
	messages = [{
	"role": "user",
	"content": [
	{"type": "image"},
	],
	}]

	# Apply chat template
	prompt = processor.apply_chat_template(
	messages,
	tokenize=False,
	add_generation_prompt=True
	)

	# Process inputs
	model_input = processor(
	text=prompt,
	images=[img],
	return_tensors="pt"
	).to(model.device)

	# Generate output
	with torch.no_grad():
	model_output = model.generate(
	**model_input,
	temperature=0.7,
	max_new_tokens=5000
	)

	# Decode result
	result = processor.decode(model_output[0])

	# Extract reasoning and answer
	try:
	reasoning = result.split("<think>")[1].split("</think>")[0]
	except IndexError:
	reasoning = "No reasoning found in output."

	try:
	answer = result.split("<answer>")[1].split("</answer>")[0]
	except IndexError:
	answer = "No answer found in output."

	return reasoning.strip(), answer.strip()

	except Exception as e:
	error_msg = f"Error processing image: {str(e)}"
	return error_msg, error_msg

	def create_interface():
	"""Create and configure the Gradio interface."""

	with gr.Blocks(
	title="NuMarkdown-8B Reasoning Demo",
	theme=gr.themes.Soft(),
	css="""
	.gradio-container {
	max-width: 1200px !important;
	}
	.image-container, .output-container {
	height: 600px !important;
	}
	"""
	) as demo:

	gr.Markdown(
	"""
	# 🤖 NuMarkdown-8B Reasoning Demo

	Upload an image and let the NuMarkdown-8B model analyze it with detailed reasoning.
	The model will show both its thinking process and final answer.
	"""
	)

	with gr.Row(equal_height=True):
	with gr.Column(scale=1):
	gr.Markdown("### 📸 Upload Your Image")
	image_input = gr.Image(
	type="pil",
	label="Input Image",
	height=600,
	container=True
	)

	process_btn = gr.Button(
	"🔍 Analyze Image",
	variant="primary",
	size="lg"
	)

	with gr.Column(scale=1):
	gr.Markdown("### 🧠 Model Reasoning")
	reasoning_output = gr.Textbox(
	label="Thinking Process",
	lines=15,
	max_lines=20,
	placeholder="The model's reasoning will appear here...",
	container=True,
	show_copy_button=True
	)

	gr.Markdown("### 💡 Final Answer")
	answer_output = gr.Textbox(
	label="Answer",
	lines=10,
	max_lines=15,
	placeholder="The model's answer will appear here...",
	container=True,
	show_copy_button=True
	)

	# Event handlers
	process_btn.click(
	fn=process_image,
	inputs=[image_input],
	outputs=[reasoning_output, answer_output],
	show_progress=True
	)

	# Also trigger on image upload
	image_input.change(
	fn=process_image,
	inputs=[image_input],
	outputs=[reasoning_output, answer_output],
	show_progress=True
	)

	gr.Markdown(
	"""
	---

	### 📋 How to Use:
	1. Upload an image using the file uploader on the left
	2. Click "Analyze Image" or wait for automatic processing
	3. View the results on the right:
	- Reasoning: See how the model thinks through the problem
	- Answer: Get the final conclusion or analysis

	### 🔧 Model Details:
	- Model: numind/NuMarkdown-8B-reasoning
	- Type: Vision-Language Model with reasoning capabilities
	- Features: Detailed thinking process + final answer

	This demo runs on HuggingFace Zero GPU Spaces for fast inference.
	"""
	)

	return demo

	if __name__ == "__main__":
	demo = create_interface()
	demo.launch(
	share=True,
	server_name="0.0.0.0",
	server_port=7860,
	show_error=True
	)