Spaces:

Joash2024
/

math-llm-demo

Sleeping

App Files Files Community

math-llm-demo / app.py

Joash2024

feat: load models on demand with better memory management

2d708a8 about 1 month ago

raw

history blame contribute delete

6.37 kB

	import gradio as gr
	import torch
	from transformers import AutoModelForCausalLM, AutoTokenizer
	from peft import PeftModel
	from monitoring import PerformanceMonitor, measure_time

	# Model configurations
	MODEL_OPTIONS = {
	"Base Model": {
	"id": "HuggingFaceTB/SmolLM2-1.7B-Instruct",
	"is_base": True
	},
	"Fine-tuned Model": {
	"id": "Joash2024/Math-SmolLM2-1.7B",
	"is_base": False
	}
	}

	# Initialize performance monitor
	monitor = PerformanceMonitor()

	print("Loading tokenizer...")
	tokenizer = AutoTokenizer.from_pretrained("HuggingFaceTB/SmolLM2-1.7B-Instruct")
	tokenizer.pad_token = tokenizer.eos_token

	def format_prompt(problem: str, problem_type: str) -> str:
	"""Format input prompt for the model"""
	if problem_type == "Derivative":
	return f"""Given a mathematical function, find its derivative.

	Function: {problem}
	The derivative of this function is:"""
	elif problem_type == "Addition":
	return f"""Solve this addition problem.

	Problem: {problem}
	The solution is:"""
	else: # Roots or Custom
	return f"""Find the derivative of this function.

	Function: {problem}
	The derivative is:"""

	@measure_time
	def get_model_response(problem: str, problem_type: str, model_info) -> str:
	"""Get response from a specific model"""
	try:
	# Load model
	if model_info["is_base"]:
	print(f"Loading {model_info['id']}...")
	model = AutoModelForCausalLM.from_pretrained(
	model_info["id"],
	device_map="auto",
	torch_dtype=torch.float16
	)
	else:
	print("Loading base model for fine-tuned...")
	base = AutoModelForCausalLM.from_pretrained(
	"HuggingFaceTB/SmolLM2-1.7B-Instruct",
	device_map="auto",
	torch_dtype=torch.float16
	)
	print(f"Loading {model_info['id']}...")
	model = PeftModel.from_pretrained(base, model_info["id"])

	model.eval()

	# Format prompt and generate
	prompt = format_prompt(problem, problem_type)
	inputs = tokenizer(prompt, return_tensors="pt").to(model.device)

	with torch.no_grad():
	outputs = model.generate(
	**inputs,
	max_length=100,
	num_return_sequences=1,
	temperature=0.1,
	do_sample=True,
	pad_token_id=tokenizer.eos_token_id
	)

	# Decode and extract response
	generated = tokenizer.decode(outputs[0], skip_special_tokens=True)
	response = generated[len(prompt):].strip()

	# Clean up
	del model
	if not model_info["is_base"]:
	del base
	torch.cuda.empty_cache()

	return response
	except Exception as e:
	return f"Error: {str(e)}"

	def solve_problem(problem: str, problem_type: str, model_type: str) -> tuple:
	"""Solve a math problem using selected model"""
	if not problem:
	return "Please enter a problem", None

	# Record problem type
	monitor.record_problem_type(problem_type)

	# Get response from selected model
	model_info = MODEL_OPTIONS[model_type]
	response, time_taken = get_model_response(problem, problem_type, model_info)

	# Format response with steps
	output = f"""Solution: {response}

	Let's verify this step by step:
	1. Starting with f(x) = {problem}
	2. Applying differentiation rules
	3. We get f'(x) = {response}"""

	# Record metrics
	monitor.record_response_time(model_type, time_taken)
	monitor.record_success(model_type, not response.startswith("Error"))

	# Get updated statistics
	stats = monitor.get_statistics()

	# Format statistics for display
	stats_display = f"""
	### Performance Metrics

	#### Response Times (seconds)
	- {model_type}: {stats.get(f'{model_type}_avg_response_time', 0):.2f} avg

	#### Success Rates
	- {model_type}: {stats.get(f'{model_type}_success_rate', 0):.1f}%

	#### Problem Types Used
	"""
	for ptype, percentage in stats.get('problem_type_distribution', {}).items():
	stats_display += f"- {ptype}: {percentage:.1f}%\n"

	return output, stats_display

	# Create Gradio interface
	with gr.Blocks(title="Mathematics Problem Solver") as demo:
	gr.Markdown("# Mathematics Problem Solver")
	gr.Markdown("Test our models on mathematical problems")

	with gr.Row():
	with gr.Column():
	problem_type = gr.Dropdown(
	choices=["Addition", "Root Finding", "Derivative", "Custom"],
	value="Derivative",
	label="Problem Type"
	)
	model_type = gr.Dropdown(
	choices=list(MODEL_OPTIONS.keys()),
	value="Fine-tuned Model",
	label="Model to Use"
	)
	problem_input = gr.Textbox(
	label="Enter your math problem",
	placeholder="Example: x^2 + 3x"
	)
	solve_btn = gr.Button("Solve", variant="primary")

	with gr.Row():
	solution_output = gr.Textbox(label="Solution", lines=5)

	# Performance metrics display
	with gr.Row():
	metrics_display = gr.Markdown("### Performance Metrics\nSolve a problem to see metrics")

	# Example problems
	gr.Examples(
	examples=[
	["x^2 + 3x", "Derivative", "Fine-tuned Model"],
	["144", "Root Finding", "Fine-tuned Model"],
	["235 + 567", "Addition", "Fine-tuned Model"],
	["\\sin{\\left(x\\right)}", "Derivative", "Fine-tuned Model"],
	["e^x", "Derivative", "Fine-tuned Model"],
	["\\frac{1}{x}", "Derivative", "Fine-tuned Model"],
	["x^3 + 2x", "Derivative", "Fine-tuned Model"],
	["\\cos{\\left(x^2\\right)}", "Derivative", "Fine-tuned Model"]
	],
	inputs=[problem_input, problem_type, model_type],
	outputs=[solution_output, metrics_display],
	fn=solve_problem,
	cache_examples=True,
	)

	# Connect the interface
	solve_btn.click(
	fn=solve_problem,
	inputs=[problem_input, problem_type, model_type],
	outputs=[solution_output, metrics_display]
	)

	if __name__ == "__main__":
	demo.launch()