Spaces:

ASesYusuf1
/

dgfsfxc-tgsacxs-otyhrhs

Running on Zero

App Files Files Community

dgfsfxc-tgsacxs-otyhrhs / tests /model-metrics /test-all-models.py

ASesYusuf1

Upload 131 files

01f8b5b verified about 1 month ago

raw

history blame contribute delete

39.5 kB

	#!/usr/bin/env python
	import os
	import time
	import museval
	import numpy as np
	import soundfile as sf
	from audio_separator.separator import Separator
	import json
	from json import JSONEncoder
	import logging
	import musdb
	from decimal import Decimal
	import tempfile
	import argparse


	# Setup logging
	logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
	logger = logging.getLogger(__name__)


	# Custom JSON Encoder to handle Decimal types
	class DecimalEncoder(JSONEncoder):
	def default(self, obj):
	if isinstance(obj, Decimal):
	return float(obj)
	return super().default(obj)


	MUSDB_PATH = "/Volumes/Nomad4TBOne/python-audio-separator/tests/model-metrics/datasets/musdb18hq"
	RESULTS_PATH = "/Volumes/Nomad4TBOne/python-audio-separator/tests/model-metrics/results"
	COMBINED_RESULTS_PATH = "/Users/andrew/Projects/python-audio-separator/audio_separator/models-scores.json"
	COMBINED_MUSEVAL_RESULTS_PATH = "/Volumes/Nomad4TBOne/python-audio-separator/tests/model-metrics/results/combined-museval-results.json"
	STOP_SIGNAL_PATH = "/Volumes/Nomad4TBOne/python-audio-separator/tests/model-metrics/stop-signal"


	def load_combined_results():
	"""Load the combined museval results file"""
	if os.path.exists(COMBINED_MUSEVAL_RESULTS_PATH):
	logger.info("Loading combined museval results...")
	try:
	with open(COMBINED_MUSEVAL_RESULTS_PATH, "r") as f:
	# Use a custom parser to handle Decimal values
	def decimal_parser(dct):
	for k, v in dct.items():
	if isinstance(v, str) and v.replace(".", "").isdigit():
	try:
	dct[k] = float(v)
	except (ValueError, TypeError):
	pass
	return dct

	return json.load(f, object_hook=decimal_parser)
	except Exception as e:
	logger.error(f"Error loading combined results: {str(e)}")
	# Try to load a backup file if it exists
	backup_path = COMBINED_MUSEVAL_RESULTS_PATH + ".backup"
	if os.path.exists(backup_path):
	logger.info("Attempting to load backup file...")
	try:
	with open(backup_path, "r") as f:
	return json.load(f, object_hook=decimal_parser)
	except Exception as backup_e:
	logger.error(f"Error loading backup file: {str(backup_e)}")
	return {}
	else:
	logger.info("No combined results file found, creating new one")
	return {}


	def save_combined_results(combined_results):
	"""Save the combined museval results file"""
	logger.info("Saving combined museval results...")
	try:
	# Create a backup of the existing file if it exists
	if os.path.exists(COMBINED_MUSEVAL_RESULTS_PATH):
	backup_path = COMBINED_MUSEVAL_RESULTS_PATH + ".backup"
	try:
	with open(COMBINED_MUSEVAL_RESULTS_PATH, "r") as src, open(backup_path, "w") as dst:
	dst.write(src.read())
	except Exception as e:
	logger.error(f"Error creating backup file: {str(e)}")

	# Save the new results using the custom encoder
	with open(COMBINED_MUSEVAL_RESULTS_PATH, "w") as f:
	json.dump(combined_results, f, cls=DecimalEncoder, indent=2)
	logger.info("Combined results saved successfully")
	return True
	except Exception as e:
	logger.error(f"Error saving combined results: {str(e)}")
	return False


	def update_combined_results(model_name, track_name, track_data):
	"""Update the combined results file with new track data"""
	try:
	# Load existing combined results
	combined_results = load_combined_results()

	# Initialize model entry if it doesn't exist
	if model_name not in combined_results:
	combined_results[model_name] = {}

	# Add or update track data
	combined_results[model_name][track_name] = track_data

	# Write updated results back to file
	save_combined_results(combined_results)
	return True
	except Exception as e:
	logger.error(f"Error updating combined results: {str(e)}")
	return False


	def check_track_evaluated(model_name, track_name):
	"""Check if a track has already been evaluated for a specific model"""
	combined_results = load_combined_results()
	return model_name in combined_results and track_name in combined_results[model_name]


	def get_track_results(model_name, track_name):
	"""Get the evaluation results for a specific track and model"""
	combined_results = load_combined_results()
	if model_name in combined_results and track_name in combined_results[model_name]:
	return combined_results[model_name][track_name]
	return None


	def get_track_duration(track_path):
	"""Get the duration of a track in minutes"""
	try:
	mixture_path = os.path.join(track_path, "mixture.wav")
	info = sf.info(mixture_path)
	return info.duration / 60.0 # Convert seconds to minutes
	except Exception as e:
	logger.error(f"Error getting track duration: {str(e)}")
	return 0.0


	def evaluate_track(track_name, track_path, test_model, mus_db):
	"""Evaluate a single track using a specific model"""
	logger.info(f"Evaluating track: {track_name} with model: {test_model}")

	# Get track duration in minutes
	track_duration_minutes = get_track_duration(track_path)
	logger.info(f"Track duration: {track_duration_minutes:.2f} minutes")

	# Initialize variables to track processing time
	processing_time = 0
	seconds_per_minute = 0

	# Create a basic result structure that will be returned even if evaluation fails
	basic_model_results = {"track_name": track_name, "scores": {}}

	# Check if evaluation results already exist in combined file
	museval_results = load_combined_results()
	if test_model in museval_results and track_name in museval_results[test_model]:
	logger.info("Found existing evaluation results in combined file...")
	track_data = museval_results[test_model][track_name]
	scores = museval.TrackStore(track_name)
	scores.scores = track_data

	# Try to extract existing speed metrics if available
	try:
	if isinstance(track_data, dict) and "targets" in track_data:
	for target in track_data["targets"]:
	if "metrics" in target and "seconds_per_minute_m3" in target["metrics"]:
	basic_model_results["scores"]["seconds_per_minute_m3"] = target["metrics"]["seconds_per_minute_m3"]
	break
	except Exception:
	pass # Ignore errors in extracting existing speed metrics
	else:
	# Expanded stem mapping to include "no-stem" outputs and custom stem formats
	stem_mapping = {
	# Standard stems
	"Vocals": "vocals",
	"Instrumental": "instrumental",
	"Drums": "drums",
	"Bass": "bass",
	"Other": "other",
	# No-stem variants
	"No Drums": "nodrums",
	"No Bass": "nobass",
	"No Other": "noother",
	# Custom stem formats (with hyphens)
	"Drum-Bass": "drumbass",
	"No Drum-Bass": "nodrumbass",
	"Vocals-Other": "vocalsother",
	"No Vocals-Other": "novocalsother",
	}

	# Create a temporary directory for separation files
	with tempfile.TemporaryDirectory() as temp_dir:
	logger.info(f"Using temporary directory: {temp_dir}")

	# Measure separation time
	start_time = time.time()

	# Perform separation
	logger.info("Performing separation...")
	separator = Separator(output_dir=temp_dir)
	separator.load_model(model_filename=test_model)
	separator.separate(os.path.join(track_path, "mixture.wav"), custom_output_names=stem_mapping)

	# Calculate processing time
	processing_time = time.time() - start_time
	seconds_per_minute = processing_time / track_duration_minutes if track_duration_minutes > 0 else 0
	logger.info(f"Separation completed in {processing_time:.2f} seconds")
	logger.info(f"Processing speed: {seconds_per_minute:.2f} seconds per minute of audio")

	# Always add the speed metric to our basic results
	basic_model_results["scores"]["seconds_per_minute_m3"] = round(seconds_per_minute, 1)

	# Check which stems were actually created
	wav_files = [f for f in os.listdir(temp_dir) if f.endswith(".wav")]
	logger.info(f"Found WAV files: {wav_files}")

	# Determine if this is a standard vocal/instrumental model that can be evaluated with museval
	standard_model = False
	if len(wav_files) == 2:
	# Check if one of the files is named vocals.wav or instrumental.wav
	if "vocals.wav" in wav_files and "instrumental.wav" in wav_files:
	standard_model = True
	logger.info("Detected standard vocals/instrumental model, will run museval evaluation")

	# If not a standard model, skip museval evaluation and just return speed metrics
	if not standard_model:
	logger.info(f"Non-standard stem configuration detected for model {test_model}, skipping museval evaluation")

	# Store the speed metric in the combined results
	if test_model not in museval_results:
	museval_results[test_model] = {}

	# Create a minimal structure for the speed metric
	minimal_results = {"targets": [{"name": "speed_metrics_only", "metrics": {"seconds_per_minute_m3": round(seconds_per_minute, 1)}}]}

	museval_results[test_model][track_name] = minimal_results
	save_combined_results(museval_results)

	return None, basic_model_results

	# For standard models, proceed with museval evaluation
	available_stems = {}
	available_stems["vocals"] = os.path.join(temp_dir, "vocals.wav")
	available_stems["accompaniment"] = os.path.join(temp_dir, "instrumental.wav")

	# Get track from MUSDB
	track = next((t for t in mus_db if t.name == track_name), None)
	if track is None:
	raise ValueError(f"Track {track_name} not found in MUSDB18")

	# Load available stems
	estimates = {}
	for stem_name, stem_path in available_stems.items():
	audio, _ = sf.read(stem_path)
	if len(audio.shape) == 1:
	audio = np.expand_dims(audio, axis=1)
	estimates[stem_name] = audio

	# Evaluate using museval
	logger.info(f"Evaluating stems: {list(estimates.keys())}")
	try:
	scores = museval.eval_mus_track(track, estimates, output_dir=temp_dir, mode="v4")

	# Add the speed metric to the scores
	if not hasattr(scores, "speed_metric_added"):
	for target in scores.scores["targets"]:
	if "metrics" not in target:
	target["metrics"] = {}
	target["metrics"]["seconds_per_minute_m3"] = round(seconds_per_minute, 1)
	scores.speed_metric_added = True

	# Update the combined results file with the new evaluation
	if test_model not in museval_results:
	museval_results[test_model] = {}
	museval_results[test_model][track_name] = scores.scores
	save_combined_results(museval_results)
	except Exception as e:
	logger.error(f"Error during museval evaluation: {str(e)}")
	logger.exception("Evaluation exception details:")
	# Return basic results with just the speed metric
	return None, basic_model_results

	try:
	# Only process museval results if we have them
	if "scores" in locals() and scores is not None:
	# Calculate aggregate scores for available stems
	results_store = museval.EvalStore()
	results_store.add_track(scores.df)
	methods = museval.MethodStore()
	methods.add_evalstore(results_store, name=test_model)
	agg_scores = methods.agg_frames_tracks_scores()

	# Return the aggregate scores in a structured format with 6 significant figures
	model_results = {"track_name": track_name, "scores": {}}

	for stem in ["vocals", "drums", "bass", "other", "accompaniment"]:
	try:
	stem_scores = {metric: float(f"{agg_scores.loc[(test_model, stem, metric)]:.6g}") for metric in ["SDR", "SIR", "SAR", "ISR"]}
	# Rename 'accompaniment' to 'instrumental' in the output
	output_stem = "instrumental" if stem == "accompaniment" else stem
	model_results["scores"][output_stem] = stem_scores
	except KeyError:
	continue

	# Add the seconds_per_minute_m3 metric if it was calculated
	if processing_time > 0 and track_duration_minutes > 0:
	model_results["scores"]["seconds_per_minute_m3"] = round(seconds_per_minute, 1)

	return scores, model_results if model_results["scores"] else basic_model_results
	else:
	# If we don't have scores, just return the basic results with speed metrics
	return None, basic_model_results

	except Exception as e:
	logger.error(f"Error processing evaluation results: {str(e)}")
	logger.exception("Results processing exception details:")
	# Return basic results with just the speed metric
	return None, basic_model_results


	def convert_decimal_to_float(obj):
	"""Recursively converts Decimal objects to floats in a nested structure."""
	if isinstance(obj, Decimal):
	return float(obj)
	elif isinstance(obj, dict):
	return {k: convert_decimal_to_float(v) for k, v in obj.items()}
	elif isinstance(obj, list):
	return [convert_decimal_to_float(x) for x in obj]
	return obj


	def calculate_median_scores(track_scores):
	"""Calculate median scores across all tracks for each stem and metric"""
	# Initialize containers for each stem's metrics
	stem_metrics = {
	"vocals": {"SDR": [], "SIR": [], "SAR": [], "ISR": []},
	"drums": {"SDR": [], "SIR": [], "SAR": [], "ISR": []},
	"bass": {"SDR": [], "SIR": [], "SAR": [], "ISR": []},
	"instrumental": {"SDR": [], "SIR": [], "SAR": [], "ISR": []},
	"seconds_per_minute_m3": [],
	}

	# Collect all scores for each stem and metric
	for track_score in track_scores:
	if track_score is not None and "scores" in track_score:
	# Process audio quality metrics
	for stem, metrics in track_score["scores"].items():
	if stem in stem_metrics and stem != "seconds_per_minute_m3":
	for metric, value in metrics.items():
	stem_metrics[stem][metric].append(value)

	# Process speed metric separately
	if "seconds_per_minute_m3" in track_score["scores"]:
	stem_metrics["seconds_per_minute_m3"].append(track_score["scores"]["seconds_per_minute_m3"])

	# Calculate medians for each stem and metric
	median_scores = {}
	for stem, metrics in stem_metrics.items():
	if stem != "seconds_per_minute_m3" and any(metrics.values()): # Only include stems that have scores
	median_scores[stem] = {metric: float(f"{np.median(values):.6g}") for metric, values in metrics.items() if values} # Only include metrics that have values

	# Add median speed metric if available
	if stem_metrics["seconds_per_minute_m3"]:
	median_scores["seconds_per_minute_m3"] = round(np.median(stem_metrics["seconds_per_minute_m3"]), 1)

	return median_scores


	def check_disk_usage(path):
	"""Check inode usage and disk space on the filesystem containing path"""
	import subprocess
	import sys

	# Check disk space first
	result = subprocess.run(["df", "-h", path], capture_output=True, text=True)
	output = result.stdout
	logger.info(f"Current disk usage:\n{output}")

	# Parse the output to get disk usage percentage
	lines = output.strip().split("\n")
	if len(lines) >= 2:
	parts = lines[1].split()
	if len(parts) >= 5:
	try:
	# Extract disk usage percentage
	disk_usage_str = parts[4].rstrip("%")
	disk_usage_pct = int(disk_usage_str)

	logger.info(f"Disk usage: {disk_usage_pct}%")

	if disk_usage_pct >= 99:
	logger.critical("CRITICAL: Disk is almost full (>99%)! Cannot continue processing.")
	logger.critical("Please free up disk space before continuing.")
	sys.exit(1)
	elif disk_usage_pct > 95:
	logger.warning(f"WARNING: High disk usage ({disk_usage_pct}%)!")
	except (ValueError, IndexError) as e:
	logger.error(f"Error parsing disk usage: {str(e)}")

	# Now check inode usage
	result = subprocess.run(["df", "-i", path], capture_output=True, text=True)
	output = result.stdout
	logger.info(f"Current inode usage:\n{output}")

	# Parse the output to get inode usage percentage
	lines = output.strip().split("\n")
	if len(lines) >= 2:
	# The second line contains the actual data
	parts = lines[1].split()
	if len(parts) >= 8: # macOS df -i format has 8 columns
	try:
	# On macOS, inode usage is in the 8th column as a percentage
	inode_usage_str = parts[7].rstrip("%")
	inode_usage_pct = int(inode_usage_str)

	# Also extract the actual inode numbers for better reporting
	iused = int(parts[5])
	ifree = int(parts[6])
	total_inodes = iused + ifree

	# Skip inode check for exFAT or similar filesystems
	if total_inodes <= 1:
	logger.info("Filesystem appears to be exFAT or similar (no real inode tracking). Skipping inode check.")
	return None

	logger.info(f"Inode usage: {iused:,}/{total_inodes:,} ({inode_usage_pct}%)")

	if inode_usage_pct >= 100:
	logger.critical("CRITICAL: Inode usage is at 100%! Cannot continue processing.")
	logger.critical("Please free up inodes before continuing.")
	sys.exit(1)
	elif inode_usage_pct > 90:
	logger.warning(f"WARNING: High inode usage ({inode_usage_pct}%)!")

	return inode_usage_pct
	except (ValueError, IndexError) as e:
	logger.error(f"Error parsing inode usage: {str(e)}")

	return None


	def get_evaluated_track_count(model_name, museval_results):
	"""Get the number of tracks evaluated for a specific model"""
	if model_name in museval_results:
	return len(museval_results[model_name])
	return 0


	def get_most_evaluated_tracks(museval_results, min_count=10):
	"""Get tracks that have been evaluated for the most models"""
	track_counts = {}

	# Count how many models have evaluated each track
	for model_name, tracks in museval_results.items():
	for track_name in tracks:
	if track_name not in track_counts:
	track_counts[track_name] = 0
	track_counts[track_name] += 1

	# Sort tracks by evaluation count (descending)
	sorted_tracks = sorted(track_counts.items(), key=lambda x: x[1], reverse=True)

	# Return tracks that have been evaluated at least min_count times
	return [track for track, count in sorted_tracks if count >= min_count]


	def generate_summary_statistics(
	start_time, models_processed, tracks_processed, models_with_new_data, tracks_evaluated, total_processing_time, fastest_model=None, slowest_model=None, combined_results_path=None, is_dry_run=False
	):
	"""Generate a summary of the script's execution"""
	end_time = time.time()
	total_runtime = end_time - start_time

	# Format the runtime
	hours, remainder = divmod(total_runtime, 3600)
	minutes, seconds = divmod(remainder, 60)
	runtime_str = f"{int(hours):02d}:{int(minutes):02d}:{int(seconds):02d}"

	# Build the summary
	summary = [
	"=" * 80,
	"DRY RUN SUMMARY - PREVIEW ONLY" if is_dry_run else "EXECUTION SUMMARY",
	"=" * 80,
	f"Total runtime: {runtime_str}",
	f"Models {'that would be' if is_dry_run else ''} processed: {models_processed}",
	f"Models {'that would receive' if is_dry_run else 'with'} new data: {len(models_with_new_data)}",
	f"Total tracks {'that would be' if is_dry_run else ''} evaluated: {tracks_evaluated}",
	f"Average tracks per model: {tracks_evaluated / len(models_with_new_data) if models_with_new_data else 0:.2f}",
	]

	if fastest_model:
	summary.append(f"Fastest model: {fastest_model['name']} ({fastest_model['speed']:.2f} seconds per minute)")

	if slowest_model:
	summary.append(f"Slowest model: {slowest_model['name']} ({slowest_model['speed']:.2f} seconds per minute)")

	if total_processing_time > 0:
	summary.append(f"Total audio processing time: {total_processing_time:.2f} seconds")

	if combined_results_path and os.path.exists(combined_results_path):
	file_size = os.path.getsize(combined_results_path) / (1024 * 1024) # Size in MB
	summary.append(f"Results file size: {file_size:.2f} MB")

	# Add models with new data
	if models_with_new_data:
	summary.append(f"\nModels {'that would receive' if is_dry_run else 'with'} new evaluation data:")
	for model_name in models_with_new_data:
	summary.append(f"- {model_name}")

	# Add dry run disclaimer if needed
	if is_dry_run:
	summary.append("\nNOTE: This is a dry run summary. No actual changes were made.")
	summary.append("Run without --dry-run to perform actual evaluations.")

	summary.append("=" * 80)
	return "\n".join(summary)


	def check_stop_signal():
	"""Check if the stop signal file exists"""
	if os.path.exists(STOP_SIGNAL_PATH):
	logger.info("Stop signal detected at: " + STOP_SIGNAL_PATH)
	return True
	return False


	def main():
	# Add command line argument parsing for dry run mode
	parser = argparse.ArgumentParser(description="Run model evaluation on MUSDB18 dataset")
	parser.add_argument("--dry-run", action="store_true", help="Run in dry-run mode (no writes)")
	parser.add_argument("--max-tracks", type=int, default=10, help="Maximum number of tracks to evaluate per model")
	parser.add_argument("--max-models", type=int, default=None, help="Maximum number of models to evaluate")
	args = parser.parse_args()

	# Remove any existing stop signal file at start
	if os.path.exists(STOP_SIGNAL_PATH):
	os.remove(STOP_SIGNAL_PATH)
	logger.info("Removed existing stop signal file")

	# Track start time for progress reporting
	start_time = time.time()

	# Statistics tracking
	models_processed = 0
	tracks_processed = 0
	models_with_new_data = set()
	total_processing_time = 0
	fastest_model = {"name": "", "speed": float("inf")} # Initialize with infinity for comparison
	slowest_model = {"name": "", "speed": 0} # Initialize with zero for comparison

	# Create a results cache manager
	class ResultsCache:
	def __init__(self):
	self.results = load_combined_results()
	self.last_update_time = time.time()

	def get_results(self, force=False):
	current_time = time.time()
	# Only reload from disk every 5 minutes unless forced
	if force or (current_time - self.last_update_time) > 300:
	self.results = load_combined_results()
	self.last_update_time = current_time
	return self.results

	results_cache = ResultsCache()

	# Helper function for logging with elapsed time
	def log_with_time(message, level=logging.INFO):
	elapsed = time.time() - start_time
	hours, remainder = divmod(elapsed, 3600)
	minutes, seconds = divmod(remainder, 60)
	time_str = f"{int(hours):02d}:{int(minutes):02d}:{int(seconds):02d}"
	logger.log(level, f"[{time_str}] {message}")

	if args.dry_run:
	log_with_time("* RUNNING IN DRY-RUN MODE - NO DATA WILL BE MODIFIED *")

	log_with_time("Starting model evaluation script...")
	os.makedirs(RESULTS_PATH, exist_ok=True)

	# Check disk space and inode usage at start
	check_disk_usage(RESULTS_PATH)

	# Load existing results if available
	combined_results = {}
	if os.path.exists(COMBINED_RESULTS_PATH):
	log_with_time("Loading existing combined results...")
	with open(COMBINED_RESULTS_PATH) as f:
	combined_results = json.load(f)

	# Get initial museval results
	museval_results = results_cache.get_results()
	log_with_time(f"Loaded combined museval results with {len(museval_results)} models")

	# Get the most commonly evaluated tracks
	common_tracks = get_most_evaluated_tracks(museval_results)
	log_with_time(f"Found {len(common_tracks)} commonly evaluated tracks")

	# Initialize MUSDB
	log_with_time("Initializing MUSDB database...")
	mus = musdb.DB(root=MUSDB_PATH, is_wav=True)

	# Create a prioritized list of tracks
	all_tracks = []
	for track in mus.tracks:
	# Check if this is a commonly evaluated track
	is_common = track.name in common_tracks
	all_tracks.append({"name": track.name, "path": os.path.dirname(track.path), "is_common": is_common})

	# Sort tracks by whether they're commonly evaluated
	all_tracks.sort(key=lambda t: 0 if t["is_common"] else 1)

	# Get list of all available models
	log_with_time("Getting list of available models...")
	separator = Separator()
	models_by_type = separator.list_supported_model_files()

	# Flatten the models list and prioritize them
	all_models = []
	for model_type, models in models_by_type.items():
	for model_name, model_info in models.items():
	filename = model_info.get("filename")
	if filename:
	# Count how many tracks have been evaluated for this model
	evaluated_count = get_evaluated_track_count(filename, museval_results)

	# Determine if this is a roformer model
	is_roformer = "roformer" in model_name.lower()

	# Add to the list with priority information
	all_models.append({"name": model_name, "filename": filename, "type": model_type, "info": model_info, "evaluated_count": evaluated_count, "is_roformer": is_roformer})

	# Sort models by priority:
	# 1. Roformer models with fewer than max_tracks evaluations
	# 2. Other models with fewer than max_tracks evaluations
	# 3. Roformer models with more evaluations
	# 4. Other models with more evaluations
	all_models.sort(
	key=lambda m: (
	0 if m["is_roformer"] and m["evaluated_count"] < args.max_tracks else 1 if not m["is_roformer"] and m["evaluated_count"] < args.max_tracks else 2 if m["is_roformer"] else 3,
	m["evaluated_count"], # Secondary sort by number of evaluations (ascending)
	)
	)

	# Log the prioritized models
	log_with_time(f"Prioritized {len(all_models)} models for evaluation:")
	for i, model in enumerate(all_models[:10]): # Show top 10
	log_with_time(f"{i+1}. {model['name']} ({model['filename']}) - {model['evaluated_count']} tracks evaluated, roformer: {model['is_roformer']}")

	if len(all_models) > 10:
	log_with_time(f"... and {len(all_models) - 10} more models")

	# Limit the number of models if specified
	if args.max_models:
	all_models = all_models[: args.max_models]
	log_with_time(f"Limited to {args.max_models} models for this run")

	# Process models according to priority
	model_idx = 0
	stop_requested = False
	while model_idx < len(all_models):
	# Check for stop signal before processing each model
	if check_stop_signal():
	log_with_time("Stop signal detected. Will finish current model's tracks and then exit.")
	stop_requested = True

	model = all_models[model_idx]
	model_name = model["name"]
	model_filename = model["filename"]
	model_type = model["type"]

	progress_pct = (model_idx + 1) / len(all_models) * 100
	log_with_time(f"\n=== Processing model {model_idx+1}/{len(all_models)} ({progress_pct:.1f}%): {model_name} ({model_filename}) ===")

	# Initialize model entry if it doesn't exist
	if model_filename not in combined_results:
	log_with_time(f"Initializing new entry for {model_filename}")
	combined_results[model_filename] = {"model_name": model_name, "track_scores": [], "median_scores": {}, "stems": [], "target_stem": None}

	# Try to load the model to get stem information
	try:
	separator.load_model(model_filename=model_filename)
	model_data = separator.model_instance.model_data

	# Extract stem information (similar to your existing code)
	# ... (keep your existing stem extraction logic here)

	except Exception as e:
	log_with_time(f"Error loading model {model_filename}: {str(e)}", logging.ERROR)
	logger.exception("Full exception details:")
	model_idx += 1
	continue

	# Count how many tracks have been evaluated for this model
	# Use the cached results
	evaluated_count = get_evaluated_track_count(model_filename, results_cache.get_results())

	# Determine how many more tracks to evaluate
	tracks_to_evaluate = max(0, args.max_tracks - evaluated_count)

	if tracks_to_evaluate == 0:
	log_with_time(f"Model {model_name} already has {evaluated_count} tracks evaluated (>= {args.max_tracks}). Skipping.")
	model_idx += 1
	continue

	log_with_time(f"Will evaluate up to {tracks_to_evaluate} tracks for model {model_name}")

	# Process tracks for this model
	tracks_processed = 0
	for track in all_tracks:
	# Check for stop signal before each track if we haven't already detected it
	if not stop_requested and check_stop_signal():
	log_with_time("Stop signal detected. Will finish current track and then exit.")
	stop_requested = True

	# Skip if we've processed enough tracks for this model
	if tracks_processed >= tracks_to_evaluate:
	break

	track_name = track["name"]
	track_path = track["path"]

	# Skip if track already evaluated for this model
	# Use the cached results
	if model_filename in results_cache.get_results() and track_name in results_cache.get_results()[model_filename]:
	log_with_time(f"Skipping already evaluated track {track_name} for model: {model_filename}")
	continue

	log_with_time(f"Processing track: {track_name} for model: {model_filename}")

	if args.dry_run:
	log_with_time(f"[DRY RUN] Would evaluate track {track_name} with model {model_filename}")
	tracks_processed += 1
	models_with_new_data.add(model_filename)

	# Estimate processing time based on model type for dry run
	# This is a rough estimate - roformer models are typically slower
	estimated_speed = 30.0 # Default estimate: 30 seconds per minute
	if "roformer" in model_name.lower():
	estimated_speed = 45.0 # Roformer models are typically slower
	elif "umx" in model_name.lower():
	estimated_speed = 20.0 # UMX models are typically faster

	# Update statistics with estimated values
	total_processing_time += estimated_speed

	# Track fastest and slowest models based on estimates
	if estimated_speed < fastest_model["speed"]:
	fastest_model = {"name": model_name, "speed": estimated_speed}
	if estimated_speed > slowest_model["speed"]:
	slowest_model = {"name": model_name, "speed": estimated_speed}

	continue

	try:
	result = evaluate_track(track_name, track_path, model_filename, mus)

	# Unpack the result safely
	if result and isinstance(result, tuple) and len(result) == 2:
	_, model_results = result
	else:
	model_results = None

	# Process the results if they exist and are valid
	if model_results is not None and isinstance(model_results, dict):
	combined_results[model_filename]["track_scores"].append(model_results)
	tracks_processed += 1
	models_with_new_data.add(model_filename)

	# Track processing time statistics - safely access nested dictionaries
	scores = model_results.get("scores", {})
	if isinstance(scores, dict):
	speed = scores.get("seconds_per_minute_m3")
	if speed is not None:
	total_processing_time += speed # Accumulate total processing time

	# Track fastest and slowest models
	if speed < fastest_model["speed"]:
	fastest_model = {"name": model_name, "speed": speed}
	if speed > slowest_model["speed"]:
	slowest_model = {"name": model_name, "speed": speed}
	else:
	log_with_time(f"Skipping model {model_filename} for track {track_name} due to no evaluatable stems or invalid results")
	except Exception as e:
	log_with_time(f"Error evaluating model {model_filename} with track {track_name}: {str(e)}", logging.ERROR)
	logger.exception(f"Exception details: ", exc_info=e)
	continue

	# Update and save results
	if combined_results[model_filename]["track_scores"]:
	median_scores = calculate_median_scores(combined_results[model_filename]["track_scores"])
	combined_results[model_filename]["median_scores"] = median_scores

	# Save results after each track
	if not args.dry_run:
	os.makedirs(os.path.dirname(COMBINED_RESULTS_PATH), exist_ok=True)
	with open(COMBINED_RESULTS_PATH, "w", encoding="utf-8") as f:
	json.dump(combined_results, f, indent=2)
	log_with_time(f"Updated combined results file with {model_filename} - {track_name}")

	# Force update the cache after saving
	results_cache.get_results(force=True)
	else:
	log_with_time(f"[DRY RUN] Would have updated combined results for {model_filename} - {track_name}")

	# Check disk space periodically
	check_disk_usage(RESULTS_PATH)

	log_with_time(f"Completed processing {tracks_processed} tracks for model {model_name}")

	# If stop was requested, exit after completing the current model
	if stop_requested:
	log_with_time("Stop signal processed. Generating final summary before exit.")
	break

	# If we're processing a non-roformer model, check if there are roformer models that need evaluation
	if not model["is_roformer"]:
	# Find roformer models that still need more evaluations
	# Use the cached results
	roformer_models_needing_eval = []
	for i, m in enumerate(all_models[model_idx + 1 :], start=model_idx + 1):
	if m["is_roformer"]:
	eval_count = get_evaluated_track_count(m["filename"], results_cache.get_results())
	if eval_count < args.max_tracks:
	roformer_models_needing_eval.append((i, m))

	if roformer_models_needing_eval:
	log_with_time(f"Found {len(roformer_models_needing_eval)} roformer models that still need evaluation. Reprioritizing...")

	# Move these models to the front of the remaining queue
	for offset, (i, m) in enumerate(roformer_models_needing_eval):
	# Adjust index for models we've already moved
	adjusted_idx = i - offset
	# Move this model right after the current one
	all_models.insert(model_idx + 1, all_models.pop(adjusted_idx))

	log_with_time("Reprioritization complete. Continuing with highest priority model.")

	# Move to the next model
	model_idx += 1
	models_processed += 1

	log_with_time("Evaluation complete")
	# Final disk space check
	check_disk_usage(RESULTS_PATH)

	# Generate and display summary statistics
	# Reset fastest/slowest models if they weren't updated
	if fastest_model["speed"] == float("inf"):
	fastest_model = None
	if slowest_model["speed"] == 0:
	slowest_model = None

	summary = generate_summary_statistics(
	start_time=start_time,
	models_processed=models_processed,
	tracks_processed=tracks_processed,
	models_with_new_data=models_with_new_data,
	tracks_evaluated=tracks_processed,
	total_processing_time=total_processing_time,
	fastest_model=fastest_model,
	slowest_model=slowest_model,
	combined_results_path=COMBINED_RESULTS_PATH,
	is_dry_run=args.dry_run,
	)

	log_with_time("\n" + summary)

	# Also write summary to a log file
	summary_filename = "dry_run_summary.log" if args.dry_run else "evaluation_summary.log"
	if stop_requested:
	summary_filename = "stopped_" + summary_filename
	summary_log_path = os.path.join(os.path.dirname(COMBINED_RESULTS_PATH), summary_filename)
	with open(summary_log_path, "w") as f:
	f.write(f"{'Dry run' if args.dry_run else 'Evaluation'} {'(stopped early)' if stop_requested else ''} completed at: {time.strftime('%Y-%m-%d %H:%M:%S')}\n")
	f.write(summary)

	log_with_time(f"Summary written to {summary_log_path}")

	# Clean up stop signal file if it exists
	if os.path.exists(STOP_SIGNAL_PATH):
	os.remove(STOP_SIGNAL_PATH)
	log_with_time("Removed stop signal file")

	return 0 if not stop_requested else 2 # Return different exit code if stopped early


	if __name__ == "__main__":
	exit(main())