Upload 11 files

Browse files

Files changed (11) hide show

math_expert/__pycache__/config.cpython-312.pyc +0 -0
math_expert/__pycache__/expert.cpython-312.pyc +0 -0
math_expert/config.py +381 -0
math_expert/data_processor.py +282 -0
math_expert/expert.py +19 -0
math_expert/prepare_data.py +245 -0
math_expert/processed_data/gsm8k_processed.jsonl +0 -0
math_expert/processed_data/proofnet_processed.jsonl +185 -0
math_expert/requirements.txt +11 -0
math_expert/train.py +126 -0
math_expert/validation.py +173 -0

math_expert/__pycache__/config.cpython-312.pyc ADDED Viewed

Binary file (9.17 kB). View file

math_expert/__pycache__/expert.cpython-312.pyc ADDED Viewed

Binary file (1.24 kB). View file

math_expert/config.py ADDED Viewed

	@@ -0,0 +1,381 @@

+# Math Expert Configuration
+# 1.1. Mathematical Domains and Specializations
+MATH_DOMAINS = {
+    "algebra": {
+        "level": "expert",
+        "topics": [
+            "linear algebra",
+            "abstract algebra",
+            "polynomial equations",
+            "matrix operations",
+            "group theory",
+            "ring theory",
+            "field theory",
+            "representation theory",
+            "homological algebra",
+            "category theory",
+            "universal algebra",
+            "non-associative algebras",
+            "Lie algebras",
+            "quantum groups",
+            "Hopf algebras",
+            "K-theory"
+        ]
+    },
+    "calculus": {
+        "level": "expert",
+        "topics": [
+            "single variable calculus",
+            "multivariable calculus",
+            "differential equations",
+            "partial differential equations",
+            "vector calculus",
+            "complex analysis",
+            "functional analysis",
+            "measure theory",
+            "differential geometry",
+            "geometric measure theory",
+            "non-standard analysis",
+            "stochastic calculus",
+            "calculus of variations",
+            "symplectic geometry"
+        ]
+    },
+    "proof_writing": {
+        "level": "expert",
+        "topics": [
+            "induction",
+            "contradiction",
+            "direct proof",
+            "proof by cases",
+            "epsilon-delta proofs",
+            "existence proofs",
+            "uniqueness proofs",
+            "category theory proofs",
+            "homotopy type theory",
+            "model theory",
+            "proof theory",
+            "set theory",
+            "constructive mathematics",
+            "proof complexity"
+        ]
+    },
+    "probability": {
+        "level": "expert",
+        "topics": [
+            "probability theory",
+            "random variables",
+            "distributions",
+            "stochastic processes",
+            "Bayesian inference",
+            "Markov chains",
+            "measure-theoretic probability",
+            "stochastic calculus",
+            "martingales",
+            "large deviations",
+            "ergodic theory",
+            "random matrix theory",
+            "stochastic PDEs"
+        ]
+    },
+    "statistics": {
+        "level": "expert",
+        "topics": [
+            "descriptive statistics",
+            "inferential statistics",
+            "hypothesis testing",
+            "regression analysis",
+            "time series analysis",
+            "bayesian statistics",
+            "non-parametric methods",
+            "statistical learning theory",
+            "high-dimensional statistics",
+            "causal inference",
+            "spatial statistics",
+            "robust statistics",
+            "computational statistics"
+        ]
+    },
+    "number_theory": {
+        "level": "expert",
+        "topics": [
+            "prime numbers",
+            "modular arithmetic",
+            "diophantine equations",
+            "cryptography",
+            "analytic number theory",
+            "algebraic number theory",
+            "elliptic curves",
+            "automorphic forms",
+            "arithmetic geometry",
+            "p-adic analysis",
+            "analytic continuation",
+            "modular forms",
+            "zeta functions"
+        ]
+    },
+    "geometry": {
+        "level": "expert",
+        "topics": [
+            "euclidean geometry",
+            "non-euclidean geometry",
+            "differential geometry",
+            "topology",
+            "algebraic geometry",
+            "projective geometry",
+            "symplectic geometry",
+            "algebraic topology",
+            "geometric analysis",
+            "geometric group theory",
+            "Riemannian geometry",
+            "Kähler geometry",
+            "hyperbolic geometry"
+        ]
+    },
+    "combinatorics": {
+        "level": "expert",
+        "topics": [
+            "graph theory",
+            "enumerative combinatorics",
+            "combinatorial optimization",
+            "matroid theory",
+            "combinatorial designs",
+            "extremal combinatorics",
+            "probabilistic combinatorics",
+            "algebraic combinatorics",
+            "topological combinatorics",
+            "combinatorial geometry",
+            "Ramsey theory"
+        ]
+    },
+    "logic": {
+        "level": "expert",
+        "topics": [
+            "first-order logic",
+            "model theory",
+            "proof theory",
+            "set theory",
+            "computability theory",
+            "type theory",
+            "category theory",
+            "modal logic",
+            "temporal logic",
+            "constructive logic",
+            "intuitionistic logic",
+            "proof complexity"
+        ]
+    },
+    "theoretical_cs": {
+        "level": "expert",
+        "topics": [
+            "computational complexity",
+            "algorithms",
+            "cryptography",
+            "quantum computing",
+            "machine learning theory",
+            "formal verification",
+            "type systems",
+            "programming language theory",
+            "distributed computing",
+            "parallel algorithms",
+            "computational geometry",
+            "randomized algorithms"
+        ]
+    },
+    "applied_math": {
+        "level": "expert",
+        "topics": [
+            "numerical analysis",
+            "optimization",
+            "control theory",
+            "mathematical physics",
+            "fluid dynamics",
+            "quantum mechanics",
+            "relativity",
+            "mathematical biology",
+            "financial mathematics",
+            "signal processing",
+            "data assimilation",
+            "inverse problems"
+        ]
+    }
+}
+# 1.2. Core Tasks
+CORE_TASKS = [
+    {
+        "task_type": "problem_solving",
+        "description": "Solve complex mathematical problems",
+        "example": "Prove the Riemann Hypothesis",
+        "difficulty_levels": ["basic", "intermediate", "advanced", "research_level", "open_problem"]
+    },
+    {
+        "task_type": "proof_writing",
+        "description": "Prove mathematical statements with advanced techniques",
+        "example": "Prove Fermat's Last Theorem using elliptic curves",
+        "proof_types": ["induction", "contradiction", "direct", "cases", "category_theory", "homotopy_type", "model_theory", "proof_complexity", "constructive"]
+    },
+    {
+        "task_type": "calculus_computation",
+        "description": "Perform advanced calculus operations",
+        "example": "Solve Navier-Stokes equations for turbulence",
+        "operation_types": ["differentiation", "integration", "limits", "functional_analysis", "measure_theory", "stochastic_calculus", "geometric_measure_theory"]
+    },
+    {
+        "task_type": "symbolic_computation",
+        "description": "Manipulate complex mathematical expressions",
+        "example": "Simplify tensor equations in general relativity",
+        "expression_types": ["polynomial", "rational", "trigonometric", "exponential", "tensor", "operator", "Lie_algebra", "Hopf_algebra"]
+    },
+    {
+        "task_type": "concept_explanation",
+        "description": "Explain advanced mathematical concepts",
+        "example": "Explain the Langlands program",
+        "explanation_types": ["definition", "intuition", "application", "example", "formal", "geometric", "historical", "pedagogical"]
+    },
+    {
+        "task_type": "statistical_analysis",
+        "description": "Perform advanced statistical analysis",
+        "example": "Analyze high-dimensional genomic data",
+        "statistical_methods": ["regression", "hypothesis_testing", "confidence_intervals", "bayesian_methods", "non_parametric", "causal_inference", "computational_methods"]
+    },
+    {
+        "task_type": "probability_calculation",
+        "description": "Calculate complex probabilities",
+        "example": "Calculate phase transitions in random matrix theory",
+        "distributions": ["binomial", "normal", "poisson", "exponential", "multivariate", "stochastic_processes", "random_matrix", "levy_processes"]
+    },
+    {
+        "task_type": "number_theory_problem",
+        "description": "Solve advanced number theory problems",
+        "example": "Prove the Birch and Swinnerton-Dyer conjecture",
+        "problem_types": ["prime", "modular", "diophantine", "analytic", "algebraic", "elliptic_curve", "modular_form"]
+    },
+    {
+        "task_type": "geometric_construction",
+        "description": "Construct and analyze complex geometric objects",
+        "example": "Construct a Calabi-Yau manifold",
+        "construction_types": ["euclidean", "non_euclidean", "projective", "differential", "algebraic", "symplectic", "topological"]
+    },
+    {
+        "task_type": "mathematical_modeling",
+        "description": "Create advanced mathematical models",
+        "example": "Model quantum field theory",
+        "model_types": ["continuous", "discrete", "stochastic", "partial_differential", "non_linear", "quantum", "statistical"]
+    },
+    {
+        "task_type": "proof_verification",
+        "description": "Verify complex mathematical proofs",
+        "example": "Verify the proof of the Four Color Theorem",
+        "verification_methods": ["formal_verification", "model_checking", "proof_assistant", "automated_reasoning", "interactive_theorem_proving"]
+    },
+    {
+        "task_type": "algorithm_design",
+        "description": "Design and analyze mathematical algorithms",
+        "example": "Design a quantum algorithm for factorization",
+        "algorithm_types": ["numerical", "combinatorial", "geometric", "algebraic", "probabilistic", "quantum", "parallel"]
+    },
+    {
+        "task_type": "research_paper_analysis",
+        "description": "Analyze and explain mathematical research papers",
+        "example": "Explain Wiles' proof of Fermat's Last Theorem",
+        "analysis_types": ["technical", "historical", "pedagogical", "critical", "extensional"]
+    },
+    {
+        "task_type": "open_problem_analysis",
+        "description": "Analyze and make progress on open mathematical problems",
+        "example": "Analyze the Collatz conjecture",
+        "problem_classes": ["number_theory", "combinatorics", "analysis", "algebra", "geometry", "probability"]
+    },
+    {
+        "task_type": "mathematical_philosophy",
+        "description": "Analyze philosophical aspects of mathematics",
+        "example": "Explain the foundations of mathematics",
+        "philosophical_topics": ["foundations", "philosophy_of_math", "logic", "set_theory", "constructivism", "intuitionism"]
+    },
+    {
+        "task_type": "mathematical_software_development",
+        "description": "Develop mathematical software and algorithms",
+        "example": "Implement a new numerical method",
+        "software_types": ["numerical", "symbolic", "proof_assistant", "visualization", "simulation", "optimization"]
+    }
+]
+# Dataset Configuration
+DATASETS = {
+    "proofnet": {
+        "source": "huggingface",
+        "dataset_name": "proofnet",
+        "split": "train",
+        "use_fields": ["problem", "solution", "proof_steps"]
+    },
+    "math_dataset": {
+        "source": "huggingface",
+        "dataset_name": "deepmind/mathematics_dataset",
+        "split": "train-hard",
+        "use_fields": ["question", "answer", "steps"]
+    },
+    "gsm8k": {
+        "source": "huggingface",
+        "dataset_name": "gsm8k",
+        "split": "train",
+        "use_fields": ["question", "answer"]
+    },
+    "mathlib": {
+        "source": "huggingface",
+        "dataset_name": "mathlib",
+        "split": "train",
+        "use_fields": ["theorem", "proof", "dependencies"]
+    },
+    "arxiv_math": {
+        "source": "huggingface",
+        "dataset_name": "arxiv_math",
+        "split": "train",
+        "use_fields": ["paper", "equations", "proofs"]
+    },
+    "clay_institute": {
+        "source": "huggingface",
+        "dataset_name": "clay_institute_problems",
+        "split": "train",
+        "use_fields": ["problem", "background", "current_status", "approaches"]
+    },
+    "open_problems": {
+        "source": "huggingface",
+        "dataset_name": "open_math_problems",
+        "split": "train",
+        "use_fields": ["problem", "category", "history", "attempts"]
+    },
+    "research_papers": {
+        "source": "huggingface",
+        "dataset_name": "math_research_papers",
+        "split": "train",
+        "use_fields": ["title", "abstract", "content", "proofs", "theorems"]
+    }
+}
+# Data Processing Configuration
+DATA_PROCESSING = {
+    "format": "jsonl",
+    "normalization": {
+        "equations": "sympy",
+        "latex": "plaintext",
+        "proof_steps": "yaml",
+        "tensor_operations": "torch",
+        "quantum_operations": "qiskit",
+        "geometric_objects": "geometric_algebra",
+        "category_theory": "category_theory"
+    },
+    "validation": {
+        "min_steps": 2,
+        "max_steps": 200,
+        "min_length": 10,
+        "max_length": 100000
+    }
+}
+if __name__ == "__main__":
+    print("Math Expert Configuration Loaded")
+    print(f"Number of domains: {len(MATH_DOMAINS)}")
+    print(f"Number of tasks: {len(CORE_TASKS)}")
+    print(f"Number of datasets: {len(DATASETS)}")

math_expert/data_processor.py ADDED Viewed

	@@ -0,0 +1,282 @@

+import json
+import yaml
+import sympy
+from sympy.parsing.latex import parse_latex
+from huggingface_hub import hf_hub_download
+from pathlib import Path
+import jsonlines
+from typing import Dict, List, Any
+from config import DATASETS, DATA_PROCESSING
+class MathDataProcessor:
+    def __init__(self):
+        self.processed_data = []
+        self.dataset_paths = {}
+        self.math_operations = {
+            "differentiation": self._process_differentiation,
+            "integration": self._process_integration,
+            "limits": self._process_limits,
+            "simplification": self._process_simplification,
+            "matrix": self._process_matrix,
+            "probability": self._process_probability,
+            "statistics": self._process_statistics
+        }
+    def download_dataset(self, dataset_name: str) -> Path:
+        """Download dataset from Hugging Face"""
+        if dataset_name not in DATASETS:
+            raise ValueError(f"Dataset {dataset_name} not defined in configuration")
+        dataset_config = DATASETS[dataset_name]
+        dataset_path = Path(f"data/{dataset_name}")
+        # Download from Hugging Face
+        hf_hub_download(
+            repo_id=dataset_config["dataset_name"],
+            filename=f"{dataset_config['split']}.jsonl",
+            local_dir=dataset_path
+        )
+        self.dataset_paths[dataset_name] = dataset_path
+        return dataset_path
+    def normalize_equation(self, equation: str) -> str:
+        """Normalize mathematical equations using sympy"""
+        try:
+            # Try to parse LaTeX first
+            if "\\" in equation:
+                eq = parse_latex(equation)
+            else:
+                eq = sympy.sympify(equation)
+            return str(eq)
+        except:
+            return equation
+    def process_proof_steps(self, steps: List[str]) -> List[Dict[str, str]]:
+        """Process proof steps into structured format"""
+        processed_steps = []
+        for step in steps:
+            try:
+                # Try to parse as YAML if it contains structured data
+                structured_step = yaml.safe_load(step)
+                if isinstance(structured_step, dict):
+                    processed_steps.append(structured_step)
+                else:
+                    processed_steps.append({"step": step})
+            except:
+                processed_steps.append({"step": step})
+        return processed_steps
+    def _process_differentiation(self, expression: str) -> str:
+        """Process and validate differentiation operations"""
+        x = sympy.Symbol('x')
+        try:
+            expr = sympy.sympify(expression)
+            derivative = sympy.diff(expr, x)
+            return str(derivative)
+        except:
+            return expression
+    def _process_integration(self, expression: str) -> str:
+        """Process and validate integration operations"""
+        x = sympy.Symbol('x')
+        try:
+            expr = sympy.sympify(expression)
+            integral = sympy.integrate(expr, x)
+            return str(integral)
+        except:
+            return expression
+    def _process_limits(self, expression: str) -> str:
+        """Process and validate limit operations"""
+        x = sympy.Symbol('x')
+        try:
+            expr = sympy.sympify(expression)
+            limit = sympy.limit(expr, x, sympy.oo)
+            return str(limit)
+        except:
+            return expression
+    def _process_simplification(self, expression: str) -> str:
+        """Process and validate expression simplification"""
+        try:
+            expr = sympy.sympify(expression)
+            simplified = sympy.simplify(expr)
+            return str(simplified)
+        except:
+            return expression
+    def _process_matrix(self, matrix_str: str) -> str:
+        """Process and validate matrix operations"""
+        try:
+            matrix = sympy.Matrix([[float(n) for n in row.split()]
+                                for row in matrix_str.split(';')])
+            return str(matrix)
+        except:
+            return matrix_str
+    def _process_probability(self, problem: str) -> Dict:
+        """Process probability problems and extract key parameters"""
+        try:
+            # Basic parsing for probability problems
+            if "probability" in problem.lower():
+                return {
+                    "type": "probability",
+                    "parameters": self._extract_parameters(problem),
+                    "distribution": self._identify_distribution(problem)
+                }
+            return {"type": "unknown"}
+        except:
+            return {"type": "unknown"}
+    def _process_statistics(self, data: str) -> Dict:
+        """Process statistical data and extract key metrics"""
+        try:
+            # Basic statistical processing
+            if "," in data:
+                numbers = [float(n) for n in data.split(',')]
+                return {
+                    "mean": sum(numbers) / len(numbers),
+                    "median": sorted(numbers)[len(numbers)//2],
+                    "std_dev": self._calculate_std_dev(numbers)
+                }
+            return {"error": "Invalid data format"}
+        except:
+            return {"error": "Processing failed"}
+    def _extract_parameters(self, text: str) -> Dict:
+        """Extract parameters from mathematical text"""
+        parameters = {}
+        # Basic parameter extraction logic
+        if "=" in text:
+            parts = text.split("=")
+            parameters["equation"] = parts[0].strip()
+            parameters["value"] = parts[1].strip()
+        return parameters
+    def _identify_distribution(self, text: str) -> str:
+        """Identify probability distribution from text"""
+        distributions = {
+            "binomial": ["binomial", "bernoulli"],
+            "normal": ["normal", "gaussian"],
+            "poisson": ["poisson"],
+            "exponential": ["exponential"]
+        }
+        text_lower = text.lower()
+        for dist, keywords in distributions.items():
+            if any(keyword in text_lower for keyword in keywords):
+                return dist
+        return "unknown"
+    def _calculate_std_dev(self, numbers: List[float]) -> float:
+        """Calculate standard deviation"""
+        mean = sum(numbers) / len(numbers)
+        variance = sum((x - mean) ** 2 for x in numbers) / len(numbers)
+        return variance ** 0.5
+    def process_math_operation(self, operation_type: str, content: str) -> Any:
+        """Process a specific mathematical operation"""
+        if operation_type in self.math_operations:
+            return self.math_operations[operation_type](content)
+        return content
+    def validate_entry(self, entry: Dict[str, Any]) -> bool:
+        """Enhanced validation with mathematical checks"""
+        steps = entry.get("steps", [])
+        text = entry.get("question", "") + entry.get("answer", "")
+        # Basic validation
+        if len(steps) < DATA_PROCESSING["validation"]["min_steps"]:
+            return False
+        if len(text) < DATA_PROCESSING["validation"]["min_length"]:
+            return False
+        # Mathematical validation
+        try:
+            # Check if equations are valid
+            if "equation" in entry:
+                sympy.sympify(entry["equation"])
+            # Check if steps follow logical progression
+            if len(steps) > 1:
+                for i in range(len(steps) - 1):
+                    if not self._check_step_continuity(steps[i], steps[i+1]):
+                        return False
+            # Check for circular logic in proofs
+            if "proof" in entry:
+                if not self._check_proof_validity(entry["proof"]):
+                    return False
+            return True
+        except:
+            return False
+    def _check_step_continuity(self, step1: str, step2: str) -> bool:
+        """Check if mathematical steps are logically connected"""
+        try:
+            # Basic check for logical progression
+            if "=" in step1 and "=" in step2:
+                s1 = step1.split("=")[1].strip()
+                s2 = step2.split("=")[0].strip()
+                return s1 == s2
+            return True
+        except:
+            return False
+    def _check_proof_validity(self, proof: str) -> bool:
+        """Check if a proof is logically valid"""
+        # Basic proof validation
+        if "assume" in proof.lower() and "therefore" not in proof.lower():
+            return False
+        if "contradiction" in proof.lower() and "false" not in proof.lower():
+            return False
+        return True
+    def process_dataset(self, dataset_name: str):
+        """Process a specific dataset according to its configuration"""
+        dataset_path = self.download_dataset(dataset_name)
+        dataset_config = DATASETS[dataset_name]
+        with jsonlines.open(dataset_path / f"{dataset_config['split']}.jsonl") as reader:
+            for entry in reader:
+                processed_entry = {}
+                # Process each field
+                for field in dataset_config["use_fields"]:
+                    value = entry.get(field)
+                    if value:
+                        if field == "equation":
+                            processed_entry[field] = self.normalize_equation(value)
+                        elif field == "proof_steps":
+                            processed_entry[field] = self.process_proof_steps(value)
+                        else:
+                            processed_entry[field] = value
+                # Validate and add if valid
+                if self.validate_entry(processed_entry):
+                    self.processed_data.append(processed_entry)
+    def save_processed_data(self, output_path: str):
+        """Save processed data to JSONL format"""
+        with jsonlines.open(output_path, mode='w') as writer:
+            writer.write_all(self.processed_data)
+if __name__ == "__main__":
+    processor = MathDataProcessor()
+    # Process all defined datasets
+    for dataset in DATASETS.keys():
+        processor.process_dataset(dataset)
+    # Save processed data
+    output_path = "processed_data/math_expert_data.jsonl"
+    processor.save_processed_data(output_path)

math_expert/expert.py ADDED Viewed

	@@ -0,0 +1,19 @@

+"""
+Math Expert Module
+"""
+from typing import Dict, Any, List
+class MathExpert:
+    def __init__(self):
+        self.name = "math"
+        self.domains = ["mathematics", "calculus", "algebra"]
+    def handle_query(self, query: str, context: Dict[str, Any]) -> Dict[str, Any]:
+        return {
+            'response': f"Math expert response to: {query}",
+            'confidence': 0.9,
+            'metadata': {'domains': self.domains}
+        }
+    def get_domains(self) -> List[str]:
+        return self.domains

math_expert/prepare_data.py ADDED Viewed

	@@ -0,0 +1,245 @@

+import os
+import json
+from pathlib import Path
+import sympy
+from sympy.parsing.latex import parse_latex
+from sympy.parsing.sympy_parser import parse_expr
+from datasets import load_dataset
+import jsonlines
+from typing import Dict, List, Any
+import sys
+import psutil
+class MathDataPreparer:
+    def __init__(self, output_dir: str = "processed_data"):
+        self.output_dir = Path(output_dir)
+        self.output_dir.mkdir(exist_ok=True)
+        self.datasets = {
+            "gsm8k": {
+                "source": "gsm8k",
+                "config": "main",
+                "split": "train",
+                "fields": ["question", "answer"]
+            },
+            "proofnet": {
+                "source": "hoskinson-center/proofnet",
+                "split": "validation",
+                "fields": ["problem", "solution", "proof_steps"]
+            }
+        }
+    def normalize_equation(self, equation: str) -> str:
+        """Normalize mathematical equations using sympy"""
+        try:
+            # Try LaTeX first
+            if "\\" in equation:
+                eq = parse_latex(equation)
+            # Then try markdown math
+            elif equation.startswith('$') and equation.endswith('$'):
+                eq = parse_expr(equation[1:-1])
+            # Then try regular expression
+            else:
+                eq = parse_expr(equation)
+            return str(eq)
+        except Exception as e:
+            print(f"Error normalizing equation: {equation}", file=sys.stderr)
+            return equation
+    def process_proof_steps(self, steps: List[str]) -> List[Dict[str, Any]]:
+        """Process and validate proof steps"""
+        processed_steps = []
+        for step in steps:
+            try:
+                # Basic validation
+                if not step.strip():
+                    continue
+                # Try to parse as structured data
+                try:
+                    structured_step = json.loads(step)
+                    if isinstance(structured_step, dict):
+                        processed_steps.append(structured_step)
+                        continue
+                except json.JSONDecodeError:
+                    pass
+                # Process as plain text
+                processed_steps.append({
+                    "text": step.strip(),
+                    "valid": True
+                })
+            except Exception as e:
+                print(f"Error processing proof step: {step}", file=sys.stderr)
+                processed_steps.append({
+                    "text": step,
+                    "valid": False,
+                    "error": str(e)
+                })
+        return processed_steps
+    def process_gsm8k(self, dataset: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
+        """Process GSM8K dataset"""
+        processed = []
+        for example in dataset:
+            try:
+                processed_example = {
+                    "question": example["question"].strip(),
+                    "answer": example["answer"].strip()
+                }
+                # Normalize equations in question
+                if "=" in processed_example["question"]:
+                    processed_example["question"] = self.normalize_equation(processed_example["question"])
+                # Normalize equations in answer
+                if "=" in processed_example["answer"]:
+                    processed_example["answer"] = self.normalize_equation(processed_example["answer"])
+                processed.append(processed_example)
+            except Exception as e:
+                print(f"Error processing GSM8K example: {str(e)}", file=sys.stderr)
+        return processed
+    def process_proofnet(self, dataset: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
+        """Process ProofNet dataset"""
+        processed = []
+        error_count = 0
+        # First, let's print some dataset info
+        print("\nProofNet dataset info:")
+        print(f"Dataset type: {type(dataset)}")
+        if hasattr(dataset, 'features'):
+            print("\nDataset features:")
+            for feature, dtype in dataset.features.items():
+                print(f"{feature}: {dtype}")
+        # Print first example structure
+        if len(dataset) > 0:
+            first_example = dataset[0]
+            print("\nFirst example keys:", list(first_example.keys()))
+            print("\nFirst example preview:")
+            for key, value in first_example.items():
+                print(f"\n{key}:")
+                print(f"Type: {type(value)}")
+                if isinstance(value, str):
+                    print(f"Length: {len(value)}")
+                elif isinstance(value, list):
+                    print(f"List length: {len(value)}")
+                    if len(value) > 0:
+                        print(f"First item type: {type(value[0])}")
+            print("\n")
+        for idx, example in enumerate(dataset):
+            try:
+                processed_example = {
+                    "problem": example.get("problem", "").strip(),
+                    "solution": example.get("solution", "").strip(),
+                    "proof_steps": []
+                }
+                # Handle proof steps
+                if "proof_steps" in example:
+                    steps = example["proof_steps"]
+                    print(f"\nExample {idx} proof steps info:")
+                    print(f"Type: {type(steps)}")
+                    if isinstance(steps, str):
+                        print(f"Length: {len(steps)}")
+                        # Try to split string into steps
+                        steps = steps.split('\n')
+                        print(f"Split into {len(steps)} steps")
+                    elif isinstance(steps, list):
+                        print(f"List length: {len(steps)}")
+                        if len(steps) > 0:
+                            print(f"First item type: {type(steps[0])}")
+                    else:
+                        print(f"Warning: Unexpected proof steps type: {type(steps)}")
+                        steps = []
+                    processed_example["proof_steps"] = self.process_proof_steps(steps)
+                # Normalize equations
+                for field in ["problem", "solution"]:
+                    if "=" in processed_example[field]:
+                        try:
+                            processed_example[field] = self.normalize_equation(processed_example[field])
+                        except Exception as e:
+                            print(f"Error normalizing {field} in ProofNet example {idx}: {str(e)}")
+                processed.append(processed_example)
+            except Exception as e:
+                print(f"Error processing ProofNet example {idx}: {str(e)}")
+                error_count += 1
+        print(f"\nProcessed {len(processed)} examples from ProofNet")
+        print(f"Encountered {error_count} errors during processing")
+        return processed
+    def save_to_jsonl(self, data: List[Dict[str, Any]], filename: str):
+        """Save processed data to JSONL file"""
+        filepath = self.output_dir / filename
+        with jsonlines.open(filepath, mode='w') as writer:
+            writer.write_all(data)
+        return filepath
+    def print_memory_usage(self):
+        """Print current memory usage"""
+        process = psutil.Process()
+        memory_info = process.memory_info()
+        print(f"Current memory usage: {memory_info.rss / 1024 / 1024:.2f} MB")
+    def print_sample(self, data: List[Dict[str, Any]], count: int = 3):
+        """Print sample of processed data"""
+        print("\nSample data:")
+        for i, example in enumerate(data[:count]):
+            print(f"\nSample {i+1}:")
+            if "proof_steps" in example:
+                # For ProofNet samples, show proof steps
+                print(f"Problem: {example['problem']}")
+                print(f"Solution: {example['solution']}")
+                print("\nProof Steps:")
+                for step in example["proof_steps"]:
+                    print(f"- {step['text']}")
+            else:
+                # For GSM8K samples
+                print(json.dumps(example, indent=2))
+def main():
+    preparer = MathDataPreparer()
+    # Load and process GSM8K
+    print("\nProcessing GSM8K dataset...")
+    gsm8k_dataset = load_dataset("gsm8k", "main", split="train")
+    print(f"Loaded {len(gsm8k_dataset)} samples from GSM8K")
+    processed_gsm8k = preparer.process_gsm8k(gsm8k_dataset)
+    print(f"Processed {len(processed_gsm8k)} samples")
+    preparer.print_sample(processed_gsm8k)
+    # Save GSM8K
+    gsm8k_path = preparer.save_to_jsonl(processed_gsm8k, "gsm8k_processed.jsonl")
+    print(f"\nSaved GSM8K processed data to: {gsm8k_path}")
+    # Load and process ProofNet
+    print("\nProcessing ProofNet dataset...")
+    try:
+        proofnet_dataset = load_dataset("hoskinson-center/proofnet", split="validation")
+        print(f"Loaded {len(proofnet_dataset)} samples from ProofNet")
+        processed_proofnet = preparer.process_proofnet(proofnet_dataset)
+        print(f"Processed {len(processed_proofnet)} samples")
+        preparer.print_sample(processed_proofnet)
+        # Save ProofNet
+        proofnet_path = preparer.save_to_jsonl(processed_proofnet, "proofnet_processed.jsonl")
+        print(f"\nSaved ProofNet processed data to: {proofnet_path}")
+    except Exception as e:
+        print(f"Error processing ProofNet dataset: {str(e)}")
+        print("Continuing with GSM8K data only")
+    # Print memory usage
+    preparer.print_memory_usage()
+if __name__ == "__main__":
+    main()

math_expert/processed_data/gsm8k_processed.jsonl ADDED Viewed

The diff for this file is too large to render. See raw diff

math_expert/processed_data/proofnet_processed.jsonl ADDED Viewed

	@@ -0,0 +1,185 @@

+{"problem": "", "solution": "", "proof_steps": []}
+{"problem": "", "solution": "", "proof_steps": []}
+{"problem": "", "solution": "", "proof_steps": []}
+{"problem": "", "solution": "", "proof_steps": []}
+{"problem": "", "solution": "", "proof_steps": []}
+{"problem": "", "solution": "", "proof_steps": []}
+{"problem": "", "solution": "", "proof_steps": []}
+{"problem": "", "solution": "", "proof_steps": []}
+{"problem": "", "solution": "", "proof_steps": []}
+{"problem": "", "solution": "", "proof_steps": []}
+{"problem": "", "solution": "", "proof_steps": []}
+{"problem": "", "solution": "", "proof_steps": []}
+{"problem": "", "solution": "", "proof_steps": []}
+{"problem": "", "solution": "", "proof_steps": []}
+{"problem": "", "solution": "", "proof_steps": []}
+{"problem": "", "solution": "", "proof_steps": []}
+{"problem": "", "solution": "", "proof_steps": []}
+{"problem": "", "solution": "", "proof_steps": []}
+{"problem": "", "solution": "", "proof_steps": []}
+{"problem": "", "solution": "", "proof_steps": []}
+{"problem": "", "solution": "", "proof_steps": []}
+{"problem": "", "solution": "", "proof_steps": []}
+{"problem": "", "solution": "", "proof_steps": []}
+{"problem": "", "solution": "", "proof_steps": []}
+{"problem": "", "solution": "", "proof_steps": []}
+{"problem": "", "solution": "", "proof_steps": []}
+{"problem": "", "solution": "", "proof_steps": []}
+{"problem": "", "solution": "", "proof_steps": []}
+{"problem": "", "solution": "", "proof_steps": []}
+{"problem": "", "solution": "", "proof_steps": []}
+{"problem": "", "solution": "", "proof_steps": []}
+{"problem": "", "solution": "", "proof_steps": []}
+{"problem": "", "solution": "", "proof_steps": []}
+{"problem": "", "solution": "", "proof_steps": []}
+{"problem": "", "solution": "", "proof_steps": []}
+{"problem": "", "solution": "", "proof_steps": []}
+{"problem": "", "solution": "", "proof_steps": []}
+{"problem": "", "solution": "", "proof_steps": []}
+{"problem": "", "solution": "", "proof_steps": []}
+{"problem": "", "solution": "", "proof_steps": []}
+{"problem": "", "solution": "", "proof_steps": []}
+{"problem": "", "solution": "", "proof_steps": []}
+{"problem": "", "solution": "", "proof_steps": []}
+{"problem": "", "solution": "", "proof_steps": []}
+{"problem": "", "solution": "", "proof_steps": []}
+{"problem": "", "solution": "", "proof_steps": []}
+{"problem": "", "solution": "", "proof_steps": []}
+{"problem": "", "solution": "", "proof_steps": []}
+{"problem": "", "solution": "", "proof_steps": []}
+{"problem": "", "solution": "", "proof_steps": []}
+{"problem": "", "solution": "", "proof_steps": []}
+{"problem": "", "solution": "", "proof_steps": []}
+{"problem": "", "solution": "", "proof_steps": []}
+{"problem": "", "solution": "", "proof_steps": []}
+{"problem": "", "solution": "", "proof_steps": []}
+{"problem": "", "solution": "", "proof_steps": []}
+{"problem": "", "solution": "", "proof_steps": []}
+{"problem": "", "solution": "", "proof_steps": []}
+{"problem": "", "solution": "", "proof_steps": []}
+{"problem": "", "solution": "", "proof_steps": []}
+{"problem": "", "solution": "", "proof_steps": []}
+{"problem": "", "solution": "", "proof_steps": []}
+{"problem": "", "solution": "", "proof_steps": []}
+{"problem": "", "solution": "", "proof_steps": []}
+{"problem": "", "solution": "", "proof_steps": []}
+{"problem": "", "solution": "", "proof_steps": []}
+{"problem": "", "solution": "", "proof_steps": []}
+{"problem": "", "solution": "", "proof_steps": []}
+{"problem": "", "solution": "", "proof_steps": []}
+{"problem": "", "solution": "", "proof_steps": []}
+{"problem": "", "solution": "", "proof_steps": []}
+{"problem": "", "solution": "", "proof_steps": []}
+{"problem": "", "solution": "", "proof_steps": []}
+{"problem": "", "solution": "", "proof_steps": []}
+{"problem": "", "solution": "", "proof_steps": []}
+{"problem": "", "solution": "", "proof_steps": []}
+{"problem": "", "solution": "", "proof_steps": []}
+{"problem": "", "solution": "", "proof_steps": []}
+{"problem": "", "solution": "", "proof_steps": []}
+{"problem": "", "solution": "", "proof_steps": []}
+{"problem": "", "solution": "", "proof_steps": []}
+{"problem": "", "solution": "", "proof_steps": []}
+{"problem": "", "solution": "", "proof_steps": []}
+{"problem": "", "solution": "", "proof_steps": []}
+{"problem": "", "solution": "", "proof_steps": []}
+{"problem": "", "solution": "", "proof_steps": []}
+{"problem": "", "solution": "", "proof_steps": []}
+{"problem": "", "solution": "", "proof_steps": []}
+{"problem": "", "solution": "", "proof_steps": []}
+{"problem": "", "solution": "", "proof_steps": []}
+{"problem": "", "solution": "", "proof_steps": []}
+{"problem": "", "solution": "", "proof_steps": []}
+{"problem": "", "solution": "", "proof_steps": []}
+{"problem": "", "solution": "", "proof_steps": []}
+{"problem": "", "solution": "", "proof_steps": []}
+{"problem": "", "solution": "", "proof_steps": []}
+{"problem": "", "solution": "", "proof_steps": []}
+{"problem": "", "solution": "", "proof_steps": []}
+{"problem": "", "solution": "", "proof_steps": []}
+{"problem": "", "solution": "", "proof_steps": []}
+{"problem": "", "solution": "", "proof_steps": []}
+{"problem": "", "solution": "", "proof_steps": []}
+{"problem": "", "solution": "", "proof_steps": []}
+{"problem": "", "solution": "", "proof_steps": []}
+{"problem": "", "solution": "", "proof_steps": []}
+{"problem": "", "solution": "", "proof_steps": []}
+{"problem": "", "solution": "", "proof_steps": []}
+{"problem": "", "solution": "", "proof_steps": []}
+{"problem": "", "solution": "", "proof_steps": []}
+{"problem": "", "solution": "", "proof_steps": []}
+{"problem": "", "solution": "", "proof_steps": []}
+{"problem": "", "solution": "", "proof_steps": []}
+{"problem": "", "solution": "", "proof_steps": []}
+{"problem": "", "solution": "", "proof_steps": []}
+{"problem": "", "solution": "", "proof_steps": []}
+{"problem": "", "solution": "", "proof_steps": []}
+{"problem": "", "solution": "", "proof_steps": []}
+{"problem": "", "solution": "", "proof_steps": []}
+{"problem": "", "solution": "", "proof_steps": []}
+{"problem": "", "solution": "", "proof_steps": []}
+{"problem": "", "solution": "", "proof_steps": []}
+{"problem": "", "solution": "", "proof_steps": []}
+{"problem": "", "solution": "", "proof_steps": []}
+{"problem": "", "solution": "", "proof_steps": []}
+{"problem": "", "solution": "", "proof_steps": []}
+{"problem": "", "solution": "", "proof_steps": []}
+{"problem": "", "solution": "", "proof_steps": []}
+{"problem": "", "solution": "", "proof_steps": []}
+{"problem": "", "solution": "", "proof_steps": []}
+{"problem": "", "solution": "", "proof_steps": []}
+{"problem": "", "solution": "", "proof_steps": []}
+{"problem": "", "solution": "", "proof_steps": []}
+{"problem": "", "solution": "", "proof_steps": []}
+{"problem": "", "solution": "", "proof_steps": []}
+{"problem": "", "solution": "", "proof_steps": []}
+{"problem": "", "solution": "", "proof_steps": []}
+{"problem": "", "solution": "", "proof_steps": []}
+{"problem": "", "solution": "", "proof_steps": []}
+{"problem": "", "solution": "", "proof_steps": []}
+{"problem": "", "solution": "", "proof_steps": []}
+{"problem": "", "solution": "", "proof_steps": []}
+{"problem": "", "solution": "", "proof_steps": []}
+{"problem": "", "solution": "", "proof_steps": []}
+{"problem": "", "solution": "", "proof_steps": []}
+{"problem": "", "solution": "", "proof_steps": []}
+{"problem": "", "solution": "", "proof_steps": []}
+{"problem": "", "solution": "", "proof_steps": []}
+{"problem": "", "solution": "", "proof_steps": []}
+{"problem": "", "solution": "", "proof_steps": []}
+{"problem": "", "solution": "", "proof_steps": []}
+{"problem": "", "solution": "", "proof_steps": []}
+{"problem": "", "solution": "", "proof_steps": []}
+{"problem": "", "solution": "", "proof_steps": []}
+{"problem": "", "solution": "", "proof_steps": []}
+{"problem": "", "solution": "", "proof_steps": []}
+{"problem": "", "solution": "", "proof_steps": []}
+{"problem": "", "solution": "", "proof_steps": []}
+{"problem": "", "solution": "", "proof_steps": []}
+{"problem": "", "solution": "", "proof_steps": []}
+{"problem": "", "solution": "", "proof_steps": []}
+{"problem": "", "solution": "", "proof_steps": []}
+{"problem": "", "solution": "", "proof_steps": []}
+{"problem": "", "solution": "", "proof_steps": []}
+{"problem": "", "solution": "", "proof_steps": []}
+{"problem": "", "solution": "", "proof_steps": []}
+{"problem": "", "solution": "", "proof_steps": []}
+{"problem": "", "solution": "", "proof_steps": []}
+{"problem": "", "solution": "", "proof_steps": []}
+{"problem": "", "solution": "", "proof_steps": []}
+{"problem": "", "solution": "", "proof_steps": []}
+{"problem": "", "solution": "", "proof_steps": []}
+{"problem": "", "solution": "", "proof_steps": []}
+{"problem": "", "solution": "", "proof_steps": []}
+{"problem": "", "solution": "", "proof_steps": []}
+{"problem": "", "solution": "", "proof_steps": []}
+{"problem": "", "solution": "", "proof_steps": []}
+{"problem": "", "solution": "", "proof_steps": []}
+{"problem": "", "solution": "", "proof_steps": []}
+{"problem": "", "solution": "", "proof_steps": []}
+{"problem": "", "solution": "", "proof_steps": []}
+{"problem": "", "solution": "", "proof_steps": []}
+{"problem": "", "solution": "", "proof_steps": []}
+{"problem": "", "solution": "", "proof_steps": []}
+{"problem": "", "solution": "", "proof_steps": []}
+{"problem": "", "solution": "", "proof_steps": []}

math_expert/requirements.txt ADDED Viewed

	@@ -0,0 +1,11 @@

+transformers>=4.30.0
+sympy>=1.11.1
+torch>=2.0.0
+numpy>=1.24.0
+scipy>=1.10.0
+pandas>=2.0.0
+huggingface_hub>=0.16.0
+jsonlines>=3.0.0
+pyyaml>=5.4.1
+datasets>=2.14.0
+psutil>=5.9.0

math_expert/train.py ADDED Viewed

	@@ -0,0 +1,126 @@

+from transformers import TrainingArguments, Trainer
+from datasets import load_dataset
+import jsonlines
+import os
+import torch
+from model import Transformer, ModelArgs
+from tokenizer import Tokenizer
+class MathDataset(torch.utils.data.Dataset):
+    def __init__(self, tokenizer, data_paths, max_length=512):
+        self.tokenizer = tokenizer
+        self.max_length = max_length
+        self.data = []
+        # Load and combine data from all files
+        for path in data_paths:
+            with jsonlines.open(path) as reader:
+                self.data.extend(list(reader))
+    def __len__(self):
+        return len(self.data)
+    def __getitem__(self, idx):
+        example = self.data[idx]
+        # Format the input text
+        if "proof_steps" in example:
+            # For ProofNet-style data
+            text = f"Problem: {example['problem']}\nSolution: {example['solution']}\nProof Steps:\n"
+            for step in example["proof_steps"]:
+                text += f"- {step['text']}\n"
+        else:
+            # For GSM8K-style data
+            text = f"Question: {example['question']}\nAnswer: {example['answer']}"
+        # Tokenize
+        inputs = self.tokenizer(
+            text,
+            padding="max_length",
+            truncation=True,
+            max_length=self.max_length,
+            return_tensors="pt"
+        )
+        # Remove batch dimension
+        inputs = {k: v.squeeze(0) for k, v in inputs.items()}
+        return {
+            "input_ids": inputs["input_ids"],
+            "attention_mask": inputs["attention_mask"],
+            "labels": inputs["input_ids"]  # For causal LM training
+        }
+def main():
+    # Initialize your custom model
+    model_args = ModelArgs(
+        dim=512,
+        n_layers=8,
+        n_heads=8,
+        vocab_size=50000,  # Adjust based on your tokenizer
+        max_seq_len=1024
+    )
+    model = Transformer(model_args)
+    # Initialize your custom tokenizer
+    tokenizer = Tokenizer()
+    # Configure tokenizer
+    if tokenizer.pad_token is None:
+        tokenizer.pad_token = tokenizer.eos_token
+    # Set up training data paths
+    data_dir = os.path.join(os.path.dirname(__file__), "processed_data")
+    data_paths = [
+        os.path.join(data_dir, "gsm8k_processed.jsonl"),
+        os.path.join(data_dir, "proofnet_processed.jsonl")
+    ]
+    # Create dataset
+    dataset = MathDataset(
+        tokenizer=tokenizer,
+        data_paths=data_paths,
+        max_length=1024  # Increased max_length for longer proofs
+    )
+    # Define training arguments
+    training_args = TrainingArguments(
+        output_dir="./math_expert_output",
+        overwrite_output_dir=True,
+        num_train_epochs=3,
+        per_device_train_batch_size=2,
+        gradient_accumulation_steps=4,
+        save_steps=1000,
+        save_total_limit=2,
+        logging_dir="./math_expert_logs",
+        logging_steps=100,
+        evaluation_strategy="steps",
+        eval_steps=1000,
+        load_best_model_at_end=True,
+        learning_rate=5e-5,
+        warmup_steps=500,
+        weight_decay=0.01,
+        fp16=True if torch.cuda.is_available() else False
+    )
+    # Create trainer
+    trainer = Trainer(
+        model=model,
+        args=training_args,
+        train_dataset=dataset,
+        tokenizer=tokenizer,
+    )
+    # Start training
+    print("Starting training with your custom model...")
+    trainer.train()
+    # Save the model
+    output_dir = "./math_expert_model"
+    os.makedirs(output_dir, exist_ok=True)
+    torch.save(model.state_dict(), os.path.join(output_dir, "pytorch_model.bin"))
+    model_args.save(os.path.join(output_dir, "config.json"))
+    print(f"Model saved to {output_dir}")
+if __name__ == "__main__":
+    main()

math_expert/validation.py ADDED Viewed

	@@ -0,0 +1,173 @@

+"""
+Validation module for the Math Expert model
+"""
+import os
+import json
+from pathlib import Path
+import hashlib
+import datetime
+from typing import Dict, Any, List, Optional
+import numpy as np
+from sympy import simplify, Eq
+class MathValidator:
+    def __init__(self, checkpoint_dir: str = "checkpoints"):
+        self.checkpoint_dir = Path(checkpoint_dir)
+        self.checkpoint_dir.mkdir(exist_ok=True)
+        self.validation_dir = self.checkpoint_dir / "validation"
+        self.validation_dir.mkdir(exist_ok=True)
+        # Initialize validation metrics
+        self.metrics = {
+            "accuracy": [],
+            "equation_simplification": [],
+            "proof_validation": [],
+            "memory_usage": []
+        }
+    def validate_equation(self, equation: str, expected_result: str) -> Dict[str, Any]:
+        """Validate mathematical equation correctness"""
+        try:
+            # Try to simplify both sides
+            lhs = simplify(equation)
+            rhs = simplify(expected_result)
+            # Check if simplified forms are equal
+            is_correct = lhs == rhs
+            return {
+                "is_correct": is_correct,
+                "simplified_lhs": str(lhs),
+                "simplified_rhs": str(rhs),
+                "validation_score": float(is_correct)
+            }
+        except Exception as e:
+            return {
+                "is_correct": False,
+                "error": str(e),
+                "validation_score": 0.0
+            }
+    def validate_proof(self, proof_steps: List[str], expected_theorem: str) -> Dict[str, Any]:
+        """Validate mathematical proof steps"""
+        try:
+            # Check if each step logically follows from previous steps
+            current_context = set()
+            validation_score = 1.0
+            for step in proof_steps:
+                # Try to parse the step as an equation
+                try:
+                    lhs, rhs = step.split('=')
+                    if not Eq(simplify(lhs), simplify(rhs)):
+                        validation_score *= 0.9  # Penalize incorrect steps
+                except:
+                    pass  # Not all steps are equations
+                # Update context
+                current_context.add(step)
+            # Check if final step matches expected theorem
+            final_step = proof_steps[-1]
+            matches_theorem = expected_theorem in final_step
+            return {
+                "is_valid": validation_score > 0.5,
+                "validation_score": validation_score,
+                "matches_theorem": matches_theorem,
+                "context_size": len(current_context)
+            }
+        except Exception as e:
+            return {
+                "is_valid": False,
+                "error": str(e),
+                "validation_score": 0.0
+            }
+    def create_checkpoint(self, data: Dict[str, Any], name: str = None) -> str:
+        """Create a checkpoint of validation data"""
+        if name is None:
+            name = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
+        checkpoint_path = self.validation_dir / f"checkpoint_{name}.json"
+        # Add timestamp and hash
+        data["timestamp"] = str(datetime.datetime.now())
+        data["hash"] = hashlib.sha256(str(data).encode()).hexdigest()
+        with open(checkpoint_path, 'w') as f:
+            json.dump(data, f, indent=2)
+        return str(checkpoint_path)
+    def load_checkpoint(self, name: str) -> Optional[Dict[str, Any]]:
+        """Load a validation checkpoint"""
+        checkpoint_path = self.validation_dir / f"checkpoint_{name}.json"
+        if not checkpoint_path.exists():
+            return None
+        with open(checkpoint_path, 'r') as f:
+            return json.load(f)
+    def validate_dataset(self, dataset: List[Dict[str, Any]]) -> Dict[str, Any]:
+        """Validate a complete dataset"""
+        results = []
+        error_count = 0
+        for idx, example in enumerate(dataset):
+            try:
+                # Validate equations
+                if "equation" in example:
+                    eq_result = self.validate_equation(
+                        example["equation"],
+                        example.get("expected_result", "")
+                    )
+                    results.append(eq_result)
+                # Validate proofs
+                if "proof_steps" in example:
+                    proof_result = self.validate_proof(
+                        example["proof_steps"],
+                        example.get("theorem", "")
+                    )
+                    results.append(proof_result)
+            except Exception as e:
+                error_count += 1
+                results.append({
+                    "error": str(e),
+                    "validation_score": 0.0
+                })
+        # Calculate overall metrics
+        scores = [r["validation_score"] for r in results if "validation_score" in r]
+        if scores:
+            avg_score = np.mean(scores)
+        else:
+            avg_score = 0.0
+        return {
+            "total_examples": len(dataset),
+            "processed_examples": len(results),
+            "error_count": error_count,
+            "average_score": float(avg_score),
+            "detailed_results": results
+        }
+    def save_validation_report(self, report: Dict[str, Any], name: str = None) -> str:
+        """Save a validation report"""
+        if name is None:
+            name = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
+        report_path = self.validation_dir / f"report_{name}.json"
+        # Add timestamp and summary metrics
+        report["timestamp"] = str(datetime.datetime.now())
+        report["summary"] = {
+            "accuracy": report.get("average_score", 0.0),
+            "error_rate": report.get("error_count", 0) / report.get("total_examples", 1)
+        }
+        with open(report_path, 'w') as f:
+            json.dump(report, f, indent=2)
+        return str(report_path)