math-llm-demo / app.py
Joash2024's picture
feat: load models on demand with better memory management
2d708a8
import gradio as gr
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from peft import PeftModel
from monitoring import PerformanceMonitor, measure_time
# Model configurations
MODEL_OPTIONS = {
"Base Model": {
"id": "HuggingFaceTB/SmolLM2-1.7B-Instruct",
"is_base": True
},
"Fine-tuned Model": {
"id": "Joash2024/Math-SmolLM2-1.7B",
"is_base": False
}
}
# Initialize performance monitor
monitor = PerformanceMonitor()
print("Loading tokenizer...")
tokenizer = AutoTokenizer.from_pretrained("HuggingFaceTB/SmolLM2-1.7B-Instruct")
tokenizer.pad_token = tokenizer.eos_token
def format_prompt(problem: str, problem_type: str) -> str:
"""Format input prompt for the model"""
if problem_type == "Derivative":
return f"""Given a mathematical function, find its derivative.
Function: {problem}
The derivative of this function is:"""
elif problem_type == "Addition":
return f"""Solve this addition problem.
Problem: {problem}
The solution is:"""
else: # Roots or Custom
return f"""Find the derivative of this function.
Function: {problem}
The derivative is:"""
@measure_time
def get_model_response(problem: str, problem_type: str, model_info) -> str:
"""Get response from a specific model"""
try:
# Load model
if model_info["is_base"]:
print(f"Loading {model_info['id']}...")
model = AutoModelForCausalLM.from_pretrained(
model_info["id"],
device_map="auto",
torch_dtype=torch.float16
)
else:
print("Loading base model for fine-tuned...")
base = AutoModelForCausalLM.from_pretrained(
"HuggingFaceTB/SmolLM2-1.7B-Instruct",
device_map="auto",
torch_dtype=torch.float16
)
print(f"Loading {model_info['id']}...")
model = PeftModel.from_pretrained(base, model_info["id"])
model.eval()
# Format prompt and generate
prompt = format_prompt(problem, problem_type)
inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
with torch.no_grad():
outputs = model.generate(
**inputs,
max_length=100,
num_return_sequences=1,
temperature=0.1,
do_sample=True,
pad_token_id=tokenizer.eos_token_id
)
# Decode and extract response
generated = tokenizer.decode(outputs[0], skip_special_tokens=True)
response = generated[len(prompt):].strip()
# Clean up
del model
if not model_info["is_base"]:
del base
torch.cuda.empty_cache()
return response
except Exception as e:
return f"Error: {str(e)}"
def solve_problem(problem: str, problem_type: str, model_type: str) -> tuple:
"""Solve a math problem using selected model"""
if not problem:
return "Please enter a problem", None
# Record problem type
monitor.record_problem_type(problem_type)
# Get response from selected model
model_info = MODEL_OPTIONS[model_type]
response, time_taken = get_model_response(problem, problem_type, model_info)
# Format response with steps
output = f"""Solution: {response}
Let's verify this step by step:
1. Starting with f(x) = {problem}
2. Applying differentiation rules
3. We get f'(x) = {response}"""
# Record metrics
monitor.record_response_time(model_type, time_taken)
monitor.record_success(model_type, not response.startswith("Error"))
# Get updated statistics
stats = monitor.get_statistics()
# Format statistics for display
stats_display = f"""
### Performance Metrics
#### Response Times (seconds)
- {model_type}: {stats.get(f'{model_type}_avg_response_time', 0):.2f} avg
#### Success Rates
- {model_type}: {stats.get(f'{model_type}_success_rate', 0):.1f}%
#### Problem Types Used
"""
for ptype, percentage in stats.get('problem_type_distribution', {}).items():
stats_display += f"- {ptype}: {percentage:.1f}%\n"
return output, stats_display
# Create Gradio interface
with gr.Blocks(title="Mathematics Problem Solver") as demo:
gr.Markdown("# Mathematics Problem Solver")
gr.Markdown("Test our models on mathematical problems")
with gr.Row():
with gr.Column():
problem_type = gr.Dropdown(
choices=["Addition", "Root Finding", "Derivative", "Custom"],
value="Derivative",
label="Problem Type"
)
model_type = gr.Dropdown(
choices=list(MODEL_OPTIONS.keys()),
value="Fine-tuned Model",
label="Model to Use"
)
problem_input = gr.Textbox(
label="Enter your math problem",
placeholder="Example: x^2 + 3x"
)
solve_btn = gr.Button("Solve", variant="primary")
with gr.Row():
solution_output = gr.Textbox(label="Solution", lines=5)
# Performance metrics display
with gr.Row():
metrics_display = gr.Markdown("### Performance Metrics\n*Solve a problem to see metrics*")
# Example problems
gr.Examples(
examples=[
["x^2 + 3x", "Derivative", "Fine-tuned Model"],
["144", "Root Finding", "Fine-tuned Model"],
["235 + 567", "Addition", "Fine-tuned Model"],
["\\sin{\\left(x\\right)}", "Derivative", "Fine-tuned Model"],
["e^x", "Derivative", "Fine-tuned Model"],
["\\frac{1}{x}", "Derivative", "Fine-tuned Model"],
["x^3 + 2x", "Derivative", "Fine-tuned Model"],
["\\cos{\\left(x^2\\right)}", "Derivative", "Fine-tuned Model"]
],
inputs=[problem_input, problem_type, model_type],
outputs=[solution_output, metrics_display],
fn=solve_problem,
cache_examples=True,
)
# Connect the interface
solve_btn.click(
fn=solve_problem,
inputs=[problem_input, problem_type, model_type],
outputs=[solution_output, metrics_display]
)
if __name__ == "__main__":
demo.launch()