ASesYusuf1's picture
Upload 131 files
01f8b5b verified
#!/usr/bin/env python
import os
import time
import museval
import numpy as np
import soundfile as sf
from audio_separator.separator import Separator
import json
from json import JSONEncoder
import logging
import musdb
from decimal import Decimal
import tempfile
import argparse
# Setup logging
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
logger = logging.getLogger(__name__)
# Custom JSON Encoder to handle Decimal types
class DecimalEncoder(JSONEncoder):
def default(self, obj):
if isinstance(obj, Decimal):
return float(obj)
return super().default(obj)
MUSDB_PATH = "/Volumes/Nomad4TBOne/python-audio-separator/tests/model-metrics/datasets/musdb18hq"
RESULTS_PATH = "/Volumes/Nomad4TBOne/python-audio-separator/tests/model-metrics/results"
COMBINED_RESULTS_PATH = "/Users/andrew/Projects/python-audio-separator/audio_separator/models-scores.json"
COMBINED_MUSEVAL_RESULTS_PATH = "/Volumes/Nomad4TBOne/python-audio-separator/tests/model-metrics/results/combined-museval-results.json"
STOP_SIGNAL_PATH = "/Volumes/Nomad4TBOne/python-audio-separator/tests/model-metrics/stop-signal"
def load_combined_results():
"""Load the combined museval results file"""
if os.path.exists(COMBINED_MUSEVAL_RESULTS_PATH):
logger.info("Loading combined museval results...")
try:
with open(COMBINED_MUSEVAL_RESULTS_PATH, "r") as f:
# Use a custom parser to handle Decimal values
def decimal_parser(dct):
for k, v in dct.items():
if isinstance(v, str) and v.replace(".", "").isdigit():
try:
dct[k] = float(v)
except (ValueError, TypeError):
pass
return dct
return json.load(f, object_hook=decimal_parser)
except Exception as e:
logger.error(f"Error loading combined results: {str(e)}")
# Try to load a backup file if it exists
backup_path = COMBINED_MUSEVAL_RESULTS_PATH + ".backup"
if os.path.exists(backup_path):
logger.info("Attempting to load backup file...")
try:
with open(backup_path, "r") as f:
return json.load(f, object_hook=decimal_parser)
except Exception as backup_e:
logger.error(f"Error loading backup file: {str(backup_e)}")
return {}
else:
logger.info("No combined results file found, creating new one")
return {}
def save_combined_results(combined_results):
"""Save the combined museval results file"""
logger.info("Saving combined museval results...")
try:
# Create a backup of the existing file if it exists
if os.path.exists(COMBINED_MUSEVAL_RESULTS_PATH):
backup_path = COMBINED_MUSEVAL_RESULTS_PATH + ".backup"
try:
with open(COMBINED_MUSEVAL_RESULTS_PATH, "r") as src, open(backup_path, "w") as dst:
dst.write(src.read())
except Exception as e:
logger.error(f"Error creating backup file: {str(e)}")
# Save the new results using the custom encoder
with open(COMBINED_MUSEVAL_RESULTS_PATH, "w") as f:
json.dump(combined_results, f, cls=DecimalEncoder, indent=2)
logger.info("Combined results saved successfully")
return True
except Exception as e:
logger.error(f"Error saving combined results: {str(e)}")
return False
def update_combined_results(model_name, track_name, track_data):
"""Update the combined results file with new track data"""
try:
# Load existing combined results
combined_results = load_combined_results()
# Initialize model entry if it doesn't exist
if model_name not in combined_results:
combined_results[model_name] = {}
# Add or update track data
combined_results[model_name][track_name] = track_data
# Write updated results back to file
save_combined_results(combined_results)
return True
except Exception as e:
logger.error(f"Error updating combined results: {str(e)}")
return False
def check_track_evaluated(model_name, track_name):
"""Check if a track has already been evaluated for a specific model"""
combined_results = load_combined_results()
return model_name in combined_results and track_name in combined_results[model_name]
def get_track_results(model_name, track_name):
"""Get the evaluation results for a specific track and model"""
combined_results = load_combined_results()
if model_name in combined_results and track_name in combined_results[model_name]:
return combined_results[model_name][track_name]
return None
def get_track_duration(track_path):
"""Get the duration of a track in minutes"""
try:
mixture_path = os.path.join(track_path, "mixture.wav")
info = sf.info(mixture_path)
return info.duration / 60.0 # Convert seconds to minutes
except Exception as e:
logger.error(f"Error getting track duration: {str(e)}")
return 0.0
def evaluate_track(track_name, track_path, test_model, mus_db):
"""Evaluate a single track using a specific model"""
logger.info(f"Evaluating track: {track_name} with model: {test_model}")
# Get track duration in minutes
track_duration_minutes = get_track_duration(track_path)
logger.info(f"Track duration: {track_duration_minutes:.2f} minutes")
# Initialize variables to track processing time
processing_time = 0
seconds_per_minute = 0
# Create a basic result structure that will be returned even if evaluation fails
basic_model_results = {"track_name": track_name, "scores": {}}
# Check if evaluation results already exist in combined file
museval_results = load_combined_results()
if test_model in museval_results and track_name in museval_results[test_model]:
logger.info("Found existing evaluation results in combined file...")
track_data = museval_results[test_model][track_name]
scores = museval.TrackStore(track_name)
scores.scores = track_data
# Try to extract existing speed metrics if available
try:
if isinstance(track_data, dict) and "targets" in track_data:
for target in track_data["targets"]:
if "metrics" in target and "seconds_per_minute_m3" in target["metrics"]:
basic_model_results["scores"]["seconds_per_minute_m3"] = target["metrics"]["seconds_per_minute_m3"]
break
except Exception:
pass # Ignore errors in extracting existing speed metrics
else:
# Expanded stem mapping to include "no-stem" outputs and custom stem formats
stem_mapping = {
# Standard stems
"Vocals": "vocals",
"Instrumental": "instrumental",
"Drums": "drums",
"Bass": "bass",
"Other": "other",
# No-stem variants
"No Drums": "nodrums",
"No Bass": "nobass",
"No Other": "noother",
# Custom stem formats (with hyphens)
"Drum-Bass": "drumbass",
"No Drum-Bass": "nodrumbass",
"Vocals-Other": "vocalsother",
"No Vocals-Other": "novocalsother",
}
# Create a temporary directory for separation files
with tempfile.TemporaryDirectory() as temp_dir:
logger.info(f"Using temporary directory: {temp_dir}")
# Measure separation time
start_time = time.time()
# Perform separation
logger.info("Performing separation...")
separator = Separator(output_dir=temp_dir)
separator.load_model(model_filename=test_model)
separator.separate(os.path.join(track_path, "mixture.wav"), custom_output_names=stem_mapping)
# Calculate processing time
processing_time = time.time() - start_time
seconds_per_minute = processing_time / track_duration_minutes if track_duration_minutes > 0 else 0
logger.info(f"Separation completed in {processing_time:.2f} seconds")
logger.info(f"Processing speed: {seconds_per_minute:.2f} seconds per minute of audio")
# Always add the speed metric to our basic results
basic_model_results["scores"]["seconds_per_minute_m3"] = round(seconds_per_minute, 1)
# Check which stems were actually created
wav_files = [f for f in os.listdir(temp_dir) if f.endswith(".wav")]
logger.info(f"Found WAV files: {wav_files}")
# Determine if this is a standard vocal/instrumental model that can be evaluated with museval
standard_model = False
if len(wav_files) == 2:
# Check if one of the files is named vocals.wav or instrumental.wav
if "vocals.wav" in wav_files and "instrumental.wav" in wav_files:
standard_model = True
logger.info("Detected standard vocals/instrumental model, will run museval evaluation")
# If not a standard model, skip museval evaluation and just return speed metrics
if not standard_model:
logger.info(f"Non-standard stem configuration detected for model {test_model}, skipping museval evaluation")
# Store the speed metric in the combined results
if test_model not in museval_results:
museval_results[test_model] = {}
# Create a minimal structure for the speed metric
minimal_results = {"targets": [{"name": "speed_metrics_only", "metrics": {"seconds_per_minute_m3": round(seconds_per_minute, 1)}}]}
museval_results[test_model][track_name] = minimal_results
save_combined_results(museval_results)
return None, basic_model_results
# For standard models, proceed with museval evaluation
available_stems = {}
available_stems["vocals"] = os.path.join(temp_dir, "vocals.wav")
available_stems["accompaniment"] = os.path.join(temp_dir, "instrumental.wav")
# Get track from MUSDB
track = next((t for t in mus_db if t.name == track_name), None)
if track is None:
raise ValueError(f"Track {track_name} not found in MUSDB18")
# Load available stems
estimates = {}
for stem_name, stem_path in available_stems.items():
audio, _ = sf.read(stem_path)
if len(audio.shape) == 1:
audio = np.expand_dims(audio, axis=1)
estimates[stem_name] = audio
# Evaluate using museval
logger.info(f"Evaluating stems: {list(estimates.keys())}")
try:
scores = museval.eval_mus_track(track, estimates, output_dir=temp_dir, mode="v4")
# Add the speed metric to the scores
if not hasattr(scores, "speed_metric_added"):
for target in scores.scores["targets"]:
if "metrics" not in target:
target["metrics"] = {}
target["metrics"]["seconds_per_minute_m3"] = round(seconds_per_minute, 1)
scores.speed_metric_added = True
# Update the combined results file with the new evaluation
if test_model not in museval_results:
museval_results[test_model] = {}
museval_results[test_model][track_name] = scores.scores
save_combined_results(museval_results)
except Exception as e:
logger.error(f"Error during museval evaluation: {str(e)}")
logger.exception("Evaluation exception details:")
# Return basic results with just the speed metric
return None, basic_model_results
try:
# Only process museval results if we have them
if "scores" in locals() and scores is not None:
# Calculate aggregate scores for available stems
results_store = museval.EvalStore()
results_store.add_track(scores.df)
methods = museval.MethodStore()
methods.add_evalstore(results_store, name=test_model)
agg_scores = methods.agg_frames_tracks_scores()
# Return the aggregate scores in a structured format with 6 significant figures
model_results = {"track_name": track_name, "scores": {}}
for stem in ["vocals", "drums", "bass", "other", "accompaniment"]:
try:
stem_scores = {metric: float(f"{agg_scores.loc[(test_model, stem, metric)]:.6g}") for metric in ["SDR", "SIR", "SAR", "ISR"]}
# Rename 'accompaniment' to 'instrumental' in the output
output_stem = "instrumental" if stem == "accompaniment" else stem
model_results["scores"][output_stem] = stem_scores
except KeyError:
continue
# Add the seconds_per_minute_m3 metric if it was calculated
if processing_time > 0 and track_duration_minutes > 0:
model_results["scores"]["seconds_per_minute_m3"] = round(seconds_per_minute, 1)
return scores, model_results if model_results["scores"] else basic_model_results
else:
# If we don't have scores, just return the basic results with speed metrics
return None, basic_model_results
except Exception as e:
logger.error(f"Error processing evaluation results: {str(e)}")
logger.exception("Results processing exception details:")
# Return basic results with just the speed metric
return None, basic_model_results
def convert_decimal_to_float(obj):
"""Recursively converts Decimal objects to floats in a nested structure."""
if isinstance(obj, Decimal):
return float(obj)
elif isinstance(obj, dict):
return {k: convert_decimal_to_float(v) for k, v in obj.items()}
elif isinstance(obj, list):
return [convert_decimal_to_float(x) for x in obj]
return obj
def calculate_median_scores(track_scores):
"""Calculate median scores across all tracks for each stem and metric"""
# Initialize containers for each stem's metrics
stem_metrics = {
"vocals": {"SDR": [], "SIR": [], "SAR": [], "ISR": []},
"drums": {"SDR": [], "SIR": [], "SAR": [], "ISR": []},
"bass": {"SDR": [], "SIR": [], "SAR": [], "ISR": []},
"instrumental": {"SDR": [], "SIR": [], "SAR": [], "ISR": []},
"seconds_per_minute_m3": [],
}
# Collect all scores for each stem and metric
for track_score in track_scores:
if track_score is not None and "scores" in track_score:
# Process audio quality metrics
for stem, metrics in track_score["scores"].items():
if stem in stem_metrics and stem != "seconds_per_minute_m3":
for metric, value in metrics.items():
stem_metrics[stem][metric].append(value)
# Process speed metric separately
if "seconds_per_minute_m3" in track_score["scores"]:
stem_metrics["seconds_per_minute_m3"].append(track_score["scores"]["seconds_per_minute_m3"])
# Calculate medians for each stem and metric
median_scores = {}
for stem, metrics in stem_metrics.items():
if stem != "seconds_per_minute_m3" and any(metrics.values()): # Only include stems that have scores
median_scores[stem] = {metric: float(f"{np.median(values):.6g}") for metric, values in metrics.items() if values} # Only include metrics that have values
# Add median speed metric if available
if stem_metrics["seconds_per_minute_m3"]:
median_scores["seconds_per_minute_m3"] = round(np.median(stem_metrics["seconds_per_minute_m3"]), 1)
return median_scores
def check_disk_usage(path):
"""Check inode usage and disk space on the filesystem containing path"""
import subprocess
import sys
# Check disk space first
result = subprocess.run(["df", "-h", path], capture_output=True, text=True)
output = result.stdout
logger.info(f"Current disk usage:\n{output}")
# Parse the output to get disk usage percentage
lines = output.strip().split("\n")
if len(lines) >= 2:
parts = lines[1].split()
if len(parts) >= 5:
try:
# Extract disk usage percentage
disk_usage_str = parts[4].rstrip("%")
disk_usage_pct = int(disk_usage_str)
logger.info(f"Disk usage: {disk_usage_pct}%")
if disk_usage_pct >= 99:
logger.critical("CRITICAL: Disk is almost full (>99%)! Cannot continue processing.")
logger.critical("Please free up disk space before continuing.")
sys.exit(1)
elif disk_usage_pct > 95:
logger.warning(f"WARNING: High disk usage ({disk_usage_pct}%)!")
except (ValueError, IndexError) as e:
logger.error(f"Error parsing disk usage: {str(e)}")
# Now check inode usage
result = subprocess.run(["df", "-i", path], capture_output=True, text=True)
output = result.stdout
logger.info(f"Current inode usage:\n{output}")
# Parse the output to get inode usage percentage
lines = output.strip().split("\n")
if len(lines) >= 2:
# The second line contains the actual data
parts = lines[1].split()
if len(parts) >= 8: # macOS df -i format has 8 columns
try:
# On macOS, inode usage is in the 8th column as a percentage
inode_usage_str = parts[7].rstrip("%")
inode_usage_pct = int(inode_usage_str)
# Also extract the actual inode numbers for better reporting
iused = int(parts[5])
ifree = int(parts[6])
total_inodes = iused + ifree
# Skip inode check for exFAT or similar filesystems
if total_inodes <= 1:
logger.info("Filesystem appears to be exFAT or similar (no real inode tracking). Skipping inode check.")
return None
logger.info(f"Inode usage: {iused:,}/{total_inodes:,} ({inode_usage_pct}%)")
if inode_usage_pct >= 100:
logger.critical("CRITICAL: Inode usage is at 100%! Cannot continue processing.")
logger.critical("Please free up inodes before continuing.")
sys.exit(1)
elif inode_usage_pct > 90:
logger.warning(f"WARNING: High inode usage ({inode_usage_pct}%)!")
return inode_usage_pct
except (ValueError, IndexError) as e:
logger.error(f"Error parsing inode usage: {str(e)}")
return None
def get_evaluated_track_count(model_name, museval_results):
"""Get the number of tracks evaluated for a specific model"""
if model_name in museval_results:
return len(museval_results[model_name])
return 0
def get_most_evaluated_tracks(museval_results, min_count=10):
"""Get tracks that have been evaluated for the most models"""
track_counts = {}
# Count how many models have evaluated each track
for model_name, tracks in museval_results.items():
for track_name in tracks:
if track_name not in track_counts:
track_counts[track_name] = 0
track_counts[track_name] += 1
# Sort tracks by evaluation count (descending)
sorted_tracks = sorted(track_counts.items(), key=lambda x: x[1], reverse=True)
# Return tracks that have been evaluated at least min_count times
return [track for track, count in sorted_tracks if count >= min_count]
def generate_summary_statistics(
start_time, models_processed, tracks_processed, models_with_new_data, tracks_evaluated, total_processing_time, fastest_model=None, slowest_model=None, combined_results_path=None, is_dry_run=False
):
"""Generate a summary of the script's execution"""
end_time = time.time()
total_runtime = end_time - start_time
# Format the runtime
hours, remainder = divmod(total_runtime, 3600)
minutes, seconds = divmod(remainder, 60)
runtime_str = f"{int(hours):02d}:{int(minutes):02d}:{int(seconds):02d}"
# Build the summary
summary = [
"=" * 80,
"DRY RUN SUMMARY - PREVIEW ONLY" if is_dry_run else "EXECUTION SUMMARY",
"=" * 80,
f"Total runtime: {runtime_str}",
f"Models {'that would be' if is_dry_run else ''} processed: {models_processed}",
f"Models {'that would receive' if is_dry_run else 'with'} new data: {len(models_with_new_data)}",
f"Total tracks {'that would be' if is_dry_run else ''} evaluated: {tracks_evaluated}",
f"Average tracks per model: {tracks_evaluated / len(models_with_new_data) if models_with_new_data else 0:.2f}",
]
if fastest_model:
summary.append(f"Fastest model: {fastest_model['name']} ({fastest_model['speed']:.2f} seconds per minute)")
if slowest_model:
summary.append(f"Slowest model: {slowest_model['name']} ({slowest_model['speed']:.2f} seconds per minute)")
if total_processing_time > 0:
summary.append(f"Total audio processing time: {total_processing_time:.2f} seconds")
if combined_results_path and os.path.exists(combined_results_path):
file_size = os.path.getsize(combined_results_path) / (1024 * 1024) # Size in MB
summary.append(f"Results file size: {file_size:.2f} MB")
# Add models with new data
if models_with_new_data:
summary.append(f"\nModels {'that would receive' if is_dry_run else 'with'} new evaluation data:")
for model_name in models_with_new_data:
summary.append(f"- {model_name}")
# Add dry run disclaimer if needed
if is_dry_run:
summary.append("\nNOTE: This is a dry run summary. No actual changes were made.")
summary.append("Run without --dry-run to perform actual evaluations.")
summary.append("=" * 80)
return "\n".join(summary)
def check_stop_signal():
"""Check if the stop signal file exists"""
if os.path.exists(STOP_SIGNAL_PATH):
logger.info("Stop signal detected at: " + STOP_SIGNAL_PATH)
return True
return False
def main():
# Add command line argument parsing for dry run mode
parser = argparse.ArgumentParser(description="Run model evaluation on MUSDB18 dataset")
parser.add_argument("--dry-run", action="store_true", help="Run in dry-run mode (no writes)")
parser.add_argument("--max-tracks", type=int, default=10, help="Maximum number of tracks to evaluate per model")
parser.add_argument("--max-models", type=int, default=None, help="Maximum number of models to evaluate")
args = parser.parse_args()
# Remove any existing stop signal file at start
if os.path.exists(STOP_SIGNAL_PATH):
os.remove(STOP_SIGNAL_PATH)
logger.info("Removed existing stop signal file")
# Track start time for progress reporting
start_time = time.time()
# Statistics tracking
models_processed = 0
tracks_processed = 0
models_with_new_data = set()
total_processing_time = 0
fastest_model = {"name": "", "speed": float("inf")} # Initialize with infinity for comparison
slowest_model = {"name": "", "speed": 0} # Initialize with zero for comparison
# Create a results cache manager
class ResultsCache:
def __init__(self):
self.results = load_combined_results()
self.last_update_time = time.time()
def get_results(self, force=False):
current_time = time.time()
# Only reload from disk every 5 minutes unless forced
if force or (current_time - self.last_update_time) > 300:
self.results = load_combined_results()
self.last_update_time = current_time
return self.results
results_cache = ResultsCache()
# Helper function for logging with elapsed time
def log_with_time(message, level=logging.INFO):
elapsed = time.time() - start_time
hours, remainder = divmod(elapsed, 3600)
minutes, seconds = divmod(remainder, 60)
time_str = f"{int(hours):02d}:{int(minutes):02d}:{int(seconds):02d}"
logger.log(level, f"[{time_str}] {message}")
if args.dry_run:
log_with_time("*** RUNNING IN DRY-RUN MODE - NO DATA WILL BE MODIFIED ***")
log_with_time("Starting model evaluation script...")
os.makedirs(RESULTS_PATH, exist_ok=True)
# Check disk space and inode usage at start
check_disk_usage(RESULTS_PATH)
# Load existing results if available
combined_results = {}
if os.path.exists(COMBINED_RESULTS_PATH):
log_with_time("Loading existing combined results...")
with open(COMBINED_RESULTS_PATH) as f:
combined_results = json.load(f)
# Get initial museval results
museval_results = results_cache.get_results()
log_with_time(f"Loaded combined museval results with {len(museval_results)} models")
# Get the most commonly evaluated tracks
common_tracks = get_most_evaluated_tracks(museval_results)
log_with_time(f"Found {len(common_tracks)} commonly evaluated tracks")
# Initialize MUSDB
log_with_time("Initializing MUSDB database...")
mus = musdb.DB(root=MUSDB_PATH, is_wav=True)
# Create a prioritized list of tracks
all_tracks = []
for track in mus.tracks:
# Check if this is a commonly evaluated track
is_common = track.name in common_tracks
all_tracks.append({"name": track.name, "path": os.path.dirname(track.path), "is_common": is_common})
# Sort tracks by whether they're commonly evaluated
all_tracks.sort(key=lambda t: 0 if t["is_common"] else 1)
# Get list of all available models
log_with_time("Getting list of available models...")
separator = Separator()
models_by_type = separator.list_supported_model_files()
# Flatten the models list and prioritize them
all_models = []
for model_type, models in models_by_type.items():
for model_name, model_info in models.items():
filename = model_info.get("filename")
if filename:
# Count how many tracks have been evaluated for this model
evaluated_count = get_evaluated_track_count(filename, museval_results)
# Determine if this is a roformer model
is_roformer = "roformer" in model_name.lower()
# Add to the list with priority information
all_models.append({"name": model_name, "filename": filename, "type": model_type, "info": model_info, "evaluated_count": evaluated_count, "is_roformer": is_roformer})
# Sort models by priority:
# 1. Roformer models with fewer than max_tracks evaluations
# 2. Other models with fewer than max_tracks evaluations
# 3. Roformer models with more evaluations
# 4. Other models with more evaluations
all_models.sort(
key=lambda m: (
0 if m["is_roformer"] and m["evaluated_count"] < args.max_tracks else 1 if not m["is_roformer"] and m["evaluated_count"] < args.max_tracks else 2 if m["is_roformer"] else 3,
m["evaluated_count"], # Secondary sort by number of evaluations (ascending)
)
)
# Log the prioritized models
log_with_time(f"Prioritized {len(all_models)} models for evaluation:")
for i, model in enumerate(all_models[:10]): # Show top 10
log_with_time(f"{i+1}. {model['name']} ({model['filename']}) - {model['evaluated_count']} tracks evaluated, roformer: {model['is_roformer']}")
if len(all_models) > 10:
log_with_time(f"... and {len(all_models) - 10} more models")
# Limit the number of models if specified
if args.max_models:
all_models = all_models[: args.max_models]
log_with_time(f"Limited to {args.max_models} models for this run")
# Process models according to priority
model_idx = 0
stop_requested = False
while model_idx < len(all_models):
# Check for stop signal before processing each model
if check_stop_signal():
log_with_time("Stop signal detected. Will finish current model's tracks and then exit.")
stop_requested = True
model = all_models[model_idx]
model_name = model["name"]
model_filename = model["filename"]
model_type = model["type"]
progress_pct = (model_idx + 1) / len(all_models) * 100
log_with_time(f"\n=== Processing model {model_idx+1}/{len(all_models)} ({progress_pct:.1f}%): {model_name} ({model_filename}) ===")
# Initialize model entry if it doesn't exist
if model_filename not in combined_results:
log_with_time(f"Initializing new entry for {model_filename}")
combined_results[model_filename] = {"model_name": model_name, "track_scores": [], "median_scores": {}, "stems": [], "target_stem": None}
# Try to load the model to get stem information
try:
separator.load_model(model_filename=model_filename)
model_data = separator.model_instance.model_data
# Extract stem information (similar to your existing code)
# ... (keep your existing stem extraction logic here)
except Exception as e:
log_with_time(f"Error loading model {model_filename}: {str(e)}", logging.ERROR)
logger.exception("Full exception details:")
model_idx += 1
continue
# Count how many tracks have been evaluated for this model
# Use the cached results
evaluated_count = get_evaluated_track_count(model_filename, results_cache.get_results())
# Determine how many more tracks to evaluate
tracks_to_evaluate = max(0, args.max_tracks - evaluated_count)
if tracks_to_evaluate == 0:
log_with_time(f"Model {model_name} already has {evaluated_count} tracks evaluated (>= {args.max_tracks}). Skipping.")
model_idx += 1
continue
log_with_time(f"Will evaluate up to {tracks_to_evaluate} tracks for model {model_name}")
# Process tracks for this model
tracks_processed = 0
for track in all_tracks:
# Check for stop signal before each track if we haven't already detected it
if not stop_requested and check_stop_signal():
log_with_time("Stop signal detected. Will finish current track and then exit.")
stop_requested = True
# Skip if we've processed enough tracks for this model
if tracks_processed >= tracks_to_evaluate:
break
track_name = track["name"]
track_path = track["path"]
# Skip if track already evaluated for this model
# Use the cached results
if model_filename in results_cache.get_results() and track_name in results_cache.get_results()[model_filename]:
log_with_time(f"Skipping already evaluated track {track_name} for model: {model_filename}")
continue
log_with_time(f"Processing track: {track_name} for model: {model_filename}")
if args.dry_run:
log_with_time(f"[DRY RUN] Would evaluate track {track_name} with model {model_filename}")
tracks_processed += 1
models_with_new_data.add(model_filename)
# Estimate processing time based on model type for dry run
# This is a rough estimate - roformer models are typically slower
estimated_speed = 30.0 # Default estimate: 30 seconds per minute
if "roformer" in model_name.lower():
estimated_speed = 45.0 # Roformer models are typically slower
elif "umx" in model_name.lower():
estimated_speed = 20.0 # UMX models are typically faster
# Update statistics with estimated values
total_processing_time += estimated_speed
# Track fastest and slowest models based on estimates
if estimated_speed < fastest_model["speed"]:
fastest_model = {"name": model_name, "speed": estimated_speed}
if estimated_speed > slowest_model["speed"]:
slowest_model = {"name": model_name, "speed": estimated_speed}
continue
try:
result = evaluate_track(track_name, track_path, model_filename, mus)
# Unpack the result safely
if result and isinstance(result, tuple) and len(result) == 2:
_, model_results = result
else:
model_results = None
# Process the results if they exist and are valid
if model_results is not None and isinstance(model_results, dict):
combined_results[model_filename]["track_scores"].append(model_results)
tracks_processed += 1
models_with_new_data.add(model_filename)
# Track processing time statistics - safely access nested dictionaries
scores = model_results.get("scores", {})
if isinstance(scores, dict):
speed = scores.get("seconds_per_minute_m3")
if speed is not None:
total_processing_time += speed # Accumulate total processing time
# Track fastest and slowest models
if speed < fastest_model["speed"]:
fastest_model = {"name": model_name, "speed": speed}
if speed > slowest_model["speed"]:
slowest_model = {"name": model_name, "speed": speed}
else:
log_with_time(f"Skipping model {model_filename} for track {track_name} due to no evaluatable stems or invalid results")
except Exception as e:
log_with_time(f"Error evaluating model {model_filename} with track {track_name}: {str(e)}", logging.ERROR)
logger.exception(f"Exception details: ", exc_info=e)
continue
# Update and save results
if combined_results[model_filename]["track_scores"]:
median_scores = calculate_median_scores(combined_results[model_filename]["track_scores"])
combined_results[model_filename]["median_scores"] = median_scores
# Save results after each track
if not args.dry_run:
os.makedirs(os.path.dirname(COMBINED_RESULTS_PATH), exist_ok=True)
with open(COMBINED_RESULTS_PATH, "w", encoding="utf-8") as f:
json.dump(combined_results, f, indent=2)
log_with_time(f"Updated combined results file with {model_filename} - {track_name}")
# Force update the cache after saving
results_cache.get_results(force=True)
else:
log_with_time(f"[DRY RUN] Would have updated combined results for {model_filename} - {track_name}")
# Check disk space periodically
check_disk_usage(RESULTS_PATH)
log_with_time(f"Completed processing {tracks_processed} tracks for model {model_name}")
# If stop was requested, exit after completing the current model
if stop_requested:
log_with_time("Stop signal processed. Generating final summary before exit.")
break
# If we're processing a non-roformer model, check if there are roformer models that need evaluation
if not model["is_roformer"]:
# Find roformer models that still need more evaluations
# Use the cached results
roformer_models_needing_eval = []
for i, m in enumerate(all_models[model_idx + 1 :], start=model_idx + 1):
if m["is_roformer"]:
eval_count = get_evaluated_track_count(m["filename"], results_cache.get_results())
if eval_count < args.max_tracks:
roformer_models_needing_eval.append((i, m))
if roformer_models_needing_eval:
log_with_time(f"Found {len(roformer_models_needing_eval)} roformer models that still need evaluation. Reprioritizing...")
# Move these models to the front of the remaining queue
for offset, (i, m) in enumerate(roformer_models_needing_eval):
# Adjust index for models we've already moved
adjusted_idx = i - offset
# Move this model right after the current one
all_models.insert(model_idx + 1, all_models.pop(adjusted_idx))
log_with_time("Reprioritization complete. Continuing with highest priority model.")
# Move to the next model
model_idx += 1
models_processed += 1
log_with_time("Evaluation complete")
# Final disk space check
check_disk_usage(RESULTS_PATH)
# Generate and display summary statistics
# Reset fastest/slowest models if they weren't updated
if fastest_model["speed"] == float("inf"):
fastest_model = None
if slowest_model["speed"] == 0:
slowest_model = None
summary = generate_summary_statistics(
start_time=start_time,
models_processed=models_processed,
tracks_processed=tracks_processed,
models_with_new_data=models_with_new_data,
tracks_evaluated=tracks_processed,
total_processing_time=total_processing_time,
fastest_model=fastest_model,
slowest_model=slowest_model,
combined_results_path=COMBINED_RESULTS_PATH,
is_dry_run=args.dry_run,
)
log_with_time("\n" + summary)
# Also write summary to a log file
summary_filename = "dry_run_summary.log" if args.dry_run else "evaluation_summary.log"
if stop_requested:
summary_filename = "stopped_" + summary_filename
summary_log_path = os.path.join(os.path.dirname(COMBINED_RESULTS_PATH), summary_filename)
with open(summary_log_path, "w") as f:
f.write(f"{'Dry run' if args.dry_run else 'Evaluation'} {'(stopped early)' if stop_requested else ''} completed at: {time.strftime('%Y-%m-%d %H:%M:%S')}\n")
f.write(summary)
log_with_time(f"Summary written to {summary_log_path}")
# Clean up stop signal file if it exists
if os.path.exists(STOP_SIGNAL_PATH):
os.remove(STOP_SIGNAL_PATH)
log_with_time("Removed stop signal file")
return 0 if not stop_requested else 2 # Return different exit code if stopped early
if __name__ == "__main__":
exit(main())