Audio-To-MIDI-And-Advanced-Renderer

Running

App Files Files Community

avans06 commited on Aug 27

Commit

e34b89d

1 Parent(s): 820e77f

refactor: Migrate MIDI correction tools to use PrettyMIDI library

Browse files

Reworks the entire MIDI correction suite to operate on `PrettyMIDI` objects instead of the internal `escore` format.

Files changed (1) hide show

app.py +556 -301

app.py CHANGED Viewed

@@ -79,7 +79,7 @@ from basic_pitch import ICASSP_2022_MODEL_PATH
 # --- Imports for 8-bit Synthesizer & MIDI Merging ---
 import pretty_midi
 import numpy as np
-from scipy import signal
 # =================================================================================================
 # === Hugging Face SoundFont Downloader ===
@@ -153,18 +153,19 @@ class AppParameters:
     render_remove_drums: bool = False
     # EXPERIMENTAL: MIDI Post-Processing & Correction Tools
-    enable_midi_corrections: bool = False                 # Master switch for enabling MIDI correction tools
-    correction_filter_spurious_notes: bool = True         # Enable filtering of spurious (noise) notes
-    correction_spurious_duration_ms: int = 50             # Maximum duration (ms) for a note to be considered spurious
-    correction_spurious_velocity: int = 20                # Maximum velocity for a note to be considered spurious
-    correction_remove_abnormal_rhythm: bool = False       # Enable rhythm stabilization for abnormal rhythm
-    correction_rhythm_stab_by_segment: bool = False       # Enable segmentation by silence before rhythm stabilization
-    correction_rhythm_stab_segment_silence_s: float = 1.0 # Silence threshold (seconds) for segmenting MIDI
-    correction_quantize_level: str = "None"               # Quantization level for note timing (e.g., "1/16", "None")
-    correction_velocity_mode: str = "None"                # Velocity processing mode ("None", "Smooth", "Compress")
-    correction_velocity_smooth_factor: float = 0.5        # Smoothing factor for velocity processing
-    correction_velocity_compress_min: int = 30            # Minimum velocity after compression
-    correction_velocity_compress_max: int = 100           # Maximum velocity after compression
     # 8-bit Synthesizer Settings
     s8bit_waveform_type: str = 'Square'
@@ -230,277 +231,494 @@ class AppParameters:
     s8bit_delay_lowpass_cutoff_hz: int = 5000          # Lowpass filter frequency for delay echoes (removes harsh high frequencies from echoes)
     s8bit_delay_treble_pitch_shift: int = 0            # Pitch shift (in semitones) applied to high notes in delay echoes
-# =================================================================================================
-# === Helper Functions ===
-# =================================================================================================
-def quantize_escore(escore, bpm, quantize_level_str="1/16"):
-    """
-    Quantizes the start times of notes in an escore to a rhythmic grid.
-    Args:
-        escore (list): The list of events.
-        bpm (float): The Beats Per Minute of the track.
-        quantize_level_str (str): The quantization level, e.g., "1/8", "1/16", "1/32".
-    Returns:
-        list: The quantized escore.
-    """
-    print(f"  - Quantizing notes to {quantize_level_str} at {bpm:.2f} BPM...")
-    level_map = {
-        "1/4": 1.0,
-        "1/8": 2.0,
-        "1/12": 3.0,  # 3 notes per beat
-        "1/16": 4.0,
-        "1/24": 6.0,  # 6 notes per beat
-        "1/32": 8.0,
-        "1/64": 16.0
-        }
-    division = level_map.get(quantize_level_str)
-    if not division:
-        print("    - Invalid quantization level. Skipping.")
-        return escore
-    # Calculate the duration of a single grid step in milliseconds
-    grid_ms = (60000.0 / bpm) / division
-    quantized_escore = []
-    notes_quantized = 0
-    for event in escore:
-        # Only quantize note events (which start with a number)
-        if isinstance(event[0], (int, float)):
-            original_start_time = event[0]
-            # The core quantization logic: find the nearest grid point
-            quantized_start_time = round(original_start_time / grid_ms) * grid_ms
-            event[0] = int(quantized_start_time)
-            notes_quantized += 1
-        quantized_escore.append(event)
-    print(f"    - Quantized {notes_quantized} notes.")
-    return quantized_escore
-def filter_spurious_notes_escore(escore, max_dur_ms=50, max_vel=20):
     """
-    Filters out very short and quiet notes that are likely transcription noise.
-    Args:
-        escore (list): The list of events.
-        max_dur_ms (int): Notes with duration shorter than this will be considered.
-        max_vel (int): Notes with velocity lower than this will be considered.
-    Returns:
-        list: The cleaned escore.
     """
-    print(f"  - Filtering spurious notes (duration < {max_dur_ms}ms AND velocity < {max_vel})...")
-    note_events = [note for note in escore if isinstance(note[0], (int, float))]
-    metadata_events = [meta for meta in escore if not isinstance(meta[0], (int, float))]
-    # The condition for keeping a note is that it's NOT a spurious note
-    cleaned_notes = [
-        note for note in note_events
-        if not (note[1] < max_dur_ms and note[3] < max_vel)
-    ]
-    notes_removed = len(note_events) - len(cleaned_notes)
-    print(f"    - Removed {notes_removed} spurious notes.")
-    # Recombine and re-sort
-    final_escore = metadata_events + cleaned_notes
-    final_escore.sort(key=lambda event: event[1] if isinstance(event[0], str) else event[0])
-    return final_escore
-def process_velocity_escore(escore, mode="None", smooth_factor=0.5, compress_min=30, compress_max=100):
-    """
-    Applies smoothing or compression to note velocities.
-    Args:
-        escore (list): The list of events.
-        mode (str): "Smooth", "Compress", or "None".
-        smooth_factor (float): How much to blend with neighbors (0=none, 1=full average).
-        compress_min (int): The target minimum velocity for compression.
-        compress_max (int): The target maximum velocity for compression.
-    Returns:
-        list: The escore with processed velocities.
-    """
-    if mode == "None":
-        return escore
-    print(f"  - Processing velocities with mode: {mode}...")
-    note_events = [note for note in escore if isinstance(note[0], (int, float))]
-    metadata_events = [meta for meta in escore if not isinstance(meta[0], (int, float))]
-    if not note_events:
-        return escore
-    velocities = [note[3] for note in note_events]
-    if mode == "Smooth":
-        new_velocities = list(velocities) # Start with a copy
-        # Iterate from the second to the second-to-last note
-        for i in range(1, len(velocities) - 1):
-            prev_vel = velocities[i-1]
-            current_vel = velocities[i]
-            next_vel = velocities[i+1]
-            neighbor_avg = (prev_vel + next_vel) / 2.0
-            # Blend the current velocity with the average of its neighbors
-            smoothed_vel = (current_vel * (1 - smooth_factor)) + (neighbor_avg * smooth_factor)
-            new_velocities[i] = int(max(1, min(127, smoothed_vel)))
-        for i, note in enumerate(note_events):
-            note[3] = new_velocities[i]
-        print(f"    - Smoothed {len(note_events)} velocities.")
-    elif mode == "Compress":
-        min_vel_orig = min(velocities)
-        max_vel_orig = max(velocities)
-        # Avoid division by zero if all notes have the same velocity
-        if max_vel_orig == min_vel_orig:
-            return escore
-        for note in note_events:
-            # Linear mapping from original range to target range
-            original_vel = note[3]
-            new_vel = compress_min + (original_vel - min_vel_orig) * \
-                      (compress_max - compress_min) / (max_vel_orig - min_vel_orig)
-            note[3] = int(max(1, min(127, new_vel)))
-        print(f"    - Compressed {len(note_events)} velocities to range [{compress_min}, {compress_max}].")
-    final_escore = metadata_events + note_events
-    final_escore.sort(key=lambda event: event[1] if isinstance(event[0], str) else event[0])
-    return final_escore
-def stabilize_midi_rhythm(escore,
-                          ioi_threshold_ratio=0.30,
-                          min_ioi_ms=30,
-                          enable_segmentation=True,
-                          silence_split_threshold_s=2.0):
-    """
-    Removes or merges rhythmically unstable notes from an escore list.
-    This is designed to clean up MIDI generated by basic-pitch with multiple pitch bends,
-    which can create clusters of very short, dense notes to approximate a slide.
-    This version can segment the MIDI based on silence before processing, making it robust
-    for files containing multiple songs with different tempos (like an album).
-    Args:
-        escore (list): The list of events, which can include notes and metadata strings.
-        ioi_threshold_ratio (float): The ratio of the median IOI below which a note is considered unstable.
-        min_ioi_ms (int): An absolute minimum IOI in milliseconds.
-        enable_segmentation (bool): If True, splits the notes into segments based on silence.
-        silence_split_threshold_s (float): The duration of silence in seconds to define a new segment.
-    Returns:
-        list: The cleaned escore with unstable notes removed or merged, and metadata preserved.
-    """
-    # 1. Separate note events from metadata events based on the type of the first element
-    note_events = [note for note in escore if isinstance(note[0], (int, float))]
-    metadata_events = [meta for meta in escore if not isinstance(meta[0], (int, float))]
-    # Only proceed if there are enough notes to analyze for a stable rhythm
-    if len(note_events) < 20:
-        print("  - Rhythm stabilization skipped: not enough notes to analyze.")
-        return escore # Return original escore if there's nothing to process
-    print("  - Running rhythm stabilization...")
-    # Ensure notes are sorted by start time before processing, as this is critical for IOI calculation
-    note_events.sort(key=lambda x: x[0])
-    # 2. Segment the notes based on silence if enabled
-    segments = []
-    if enable_segmentation and len(note_events) > 1:
-        print(f"    - Segmentation enabled (silence > {silence_split_threshold_s}s).")
-        current_segment = [note_events[0]]
-        silence_threshold_ms = silence_split_threshold_s * 1000
-        for i in range(1, len(note_events)):
-            prev_note_end_ms = note_events[i-1][0] + note_events[i-1][1]
-            current_note_start_ms = note_events[i][0]
-            gap_ms = current_note_start_ms - prev_note_end_ms
-            if gap_ms > silence_threshold_ms:
-                if current_segment: segments.append(current_segment)
-                current_segment = [] # Start a new segment
-            current_segment.append(note_events[i])
-        if current_segment: segments.append(current_segment) # Add the last segment
-        print(f"    - Split MIDI into {len(segments)} segment(s) for individual processing.")
     else:
-        # If segmentation is disabled, treat the entire file as a single segment
-        segments = [note_events]
-    # 3. Process each segment individually
-    all_cleaned_notes = []
-    total_merged_count = 0
     for i, segment in enumerate(segments):
-        if len(segment) < 20: # Skip stabilization for very short segments
-            all_cleaned_notes.extend(segment)
-            continue
-        # --- Core stabilization logic applied per-segment ---
-        # Calculate Inter-Onset Intervals (IOIs) using only the filtered note events
-        iois = [segment[j][0] - segment[j-1][0] for j in range(1, len(segment))]
-        # Filter out zero or negative IOIs (which can happen with chords) before calculating the median
-        positive_iois = [ioi for ioi in iois if ioi > 0]
-        if not positive_iois:
-            all_cleaned_notes.extend(segment)
             continue
-        median_ioi = np.median(positive_iois)
-        # The threshold for merging is the greater of the ratio-based value or the absolute minimum
-        threshold_ms = max(median_ioi * ioi_threshold_ratio, min_ioi_ms)
-        # Process only the note events to merge unstable ones
-        cleaned_segment = [copy.deepcopy(segment[0])] # Start with a deepcopy of the first note
-        notes_merged_in_segment = 0
-        for j in range(1, len(segment)):
-            current_note = segment[j]
-            last_kept_note = cleaned_segment[-1]
-            # Calculate the IOI between the current note and the last *accepted* note
-            actual_ioi = current_note[0] - last_kept_note[0]
-            # Check pitch proximity to avoid merging unrelated grace notes into main notes
-            pitch_difference = abs(current_note[2] - last_kept_note[2])
-            # Merge condition: notes are too close in time AND similar in pitch
-            if actual_ioi < threshold_ms and pitch_difference < 5:
-                notes_merged_in_segment += 1
-                # Merge by extending the previous note's duration to cover the current note
-                new_end_time = current_note[0] + current_note[1]
-                last_kept_note[1] = new_end_time - last_kept_note[0]
-            else:
-                # Note is rhythmically stable, so we keep it
-                cleaned_segment.append(copy.deepcopy(current_note))
-        if len(segments) > 1:
-            print(f"      - Segment {i+1}: Median IOI {median_ioi:.2f}ms, merged {notes_merged_in_segment} notes.")
-        all_cleaned_notes.extend(cleaned_segment)
-        total_merged_count += notes_merged_in_segment
-    if total_merged_count > 0:
-        print(f"    - Rhythm stabilization complete. Total merged notes: {total_merged_count}.")
-    # 4. Recombine metadata with the globally cleaned notes and re-sort
-    final_escore = metadata_events + all_cleaned_notes
-    # Re-sort the entire list by time to ensure correct MIDI event order.
-    # The sort key must handle both event types: metadata time is at index 1, note time is at index 0.
-    final_escore.sort(key=lambda event: event[1] if isinstance(event[0], str) else event[0])
-    return final_escore
 def analyze_audio_for_adaptive_params(audio_data: np.ndarray, sample_rate: int):
     """
@@ -1991,9 +2209,85 @@ def Render_MIDI(*, input_midi_path: str, params: AppParameters, progress: gr.Pro
     print(f"Render type: {params.render_type}")
     print(f"Soundfont bank: {params.soundfont_bank}")
     print(f"Audio render sample rate: {params.render_sample_rate}")
-    # ... (add other print statements for settings if needed)
     print('=' * 70)
     # --- MIDI Processing using TMIDIX ---
     print('Processing MIDI... Please wait...')
     raw_score = MIDI.midi2single_track_ms_score(fdata)
@@ -2140,53 +2434,6 @@ def Render_MIDI(*, input_midi_path: str, params: AppParameters, progress: gr.Pro
                     o[1] *= 200
                     o[2] *= 200
-        # --- MIDI Post-Processing & Correction Block ---
-        if getattr(params, 'enable_midi_corrections', False):
-            print("Applying MIDI Post-Processing & Corrections...")
-            # Filter spurious notes first to clean the data for other processes
-            if getattr(params, 'correction_filter_spurious_notes', False):
-                output_score = filter_spurious_notes_escore(
-                    output_score,
-                    max_dur_ms=getattr(params, 'correction_spurious_duration_ms', 50),
-                    max_vel=getattr(params, 'correction_spurious_velocity', 20)
-                )
-            # Then, stabilize rhythm on the cleaned notes
-            if getattr(params, 'correction_remove_abnormal_rhythm', False):
-                output_score = stabilize_midi_rhythm(
-                    output_score,
-                    enable_segmentation=getattr(params, 'correction_rhythm_stab_by_segment', False),
-                    silence_split_threshold_s=getattr(params, 'correction_rhythm_stab_segment_silence_s', 1.0)
-                )
-            # Then, quantize the stabilized rhythm
-            quantize_level = getattr(params, 'correction_quantize_level', "None")
-            if quantize_level != "None":
-                try:
-                    # We need to get the BPM for quantization. We do this once here.
-                    midi_obj_for_bpm = pretty_midi.PrettyMIDI(input_midi_path)
-                    estimated_bpm = midi_obj_for_bpm.estimate_tempo()
-                    output_score = quantize_escore(output_score, estimated_bpm, quantize_level)
-                except Exception as e:
-                    print(f"    - Could not estimate BPM for quantization. Skipping. Error: {e}")
-            # Finally, process velocity as it doesn't affect timing or notes
-            velocity_mode = getattr(params, 'correction_velocity_mode', "None")
-            if velocity_mode != "None":
-                output_score = process_velocity_escore(
-                    output_score,
-                    mode=velocity_mode,
-                    smooth_factor=getattr(params, 'correction_velocity_smooth_factor', 0.5),
-                    compress_min=getattr(params, 'correction_velocity_compress_min', 30),
-                    compress_max=getattr(params, 'correction_velocity_compress_max', 100)
-                )
-            print("Corrections finished.")
-            print('=' * 70)
-        print('Final adjustments complete.')
-        print('=' * 70)
     # --- Saving Processed MIDI File ---
         # Save the transformed MIDI data
         SONG, patches, _ = TMIDIX.patch_enhanced_score_notes(output_score)
@@ -4091,18 +4338,26 @@ if __name__ == "__main__":
                                     correction_remove_abnormal_rhythm = gr.Checkbox(label="Stabilize Rhythm (for Pitch Bend)", value=False,
                                         info="Attempts to merge overly dense, rhythmically unstable notes often created when 'Allow Multiple Pitch Bends' is used. This can clean up the rhythm but may lose some pitch slide nuance.")
                                     with gr.Group(visible=False) as rhythm_stab_options: # This group is initially hidden
-                                        correction_rhythm_stab_by_segment = gr.Checkbox(label="Enable Segmentation by Silence", value=False,
                                             info="Highly recommended for albums or long files. Splits the MIDI by silent parts before stabilizing rhythm, ensuring accuracy for songs with different tempos.")
                                         correction_rhythm_stab_segment_silence_s = gr.Slider(minimum=0.5, maximum=10.0, value=1.0, step=0.5,
                                             label="Silence Threshold for Segmentation (seconds)",
                                             info="The amount of silence required to start a new segment. 1-3 seconds is usually enough to separate songs on an album.")
                                 # --- Quantization Group ---
                                 with gr.Group():
                                     correction_quantize_level = gr.Dropdown(
-                                        ["None", "1/64", "1/32", "1/16", "1/8", "1/4", "1/24", "1/12"],
                                         value="None",
                                         label="Quantize Rhythm",
-                                        info="Quantizes notes to the nearest rhythmic grid line. '1/16' is recommended for most pop and rock music. For expressive genres like classical or jazz, use with caution as it may reduce natural timing nuances. Straight divisions (1/8, 1/16, etc.) suit most modern music, while swing divisions (1/12, 1/24) are ideal for jazz, blues, or shuffle styles."
                                     )
                                 # --- Velocity Processing Group ---
                                 with gr.Group():

 # --- Imports for 8-bit Synthesizer & MIDI Merging ---
 import pretty_midi
 import numpy as np
+from scipy import signal, stats
 # =================================================================================================
 # === Hugging Face SoundFont Downloader ===
     render_remove_drums: bool = False
     # EXPERIMENTAL: MIDI Post-Processing & Correction Tools
+    enable_midi_corrections: bool = False                   # Master switch for enabling MIDI correction tools
+    correction_filter_spurious_notes: bool = True           # Enable filtering of spurious (noise) notes
+    correction_spurious_duration_ms: int = 50               # Maximum duration (ms) for a note to be considered spurious
+    correction_spurious_velocity: int = 20                  # Maximum velocity for a note to be considered spurious
+    correction_remove_abnormal_rhythm: bool = False         # Enable rhythm stabilization for abnormal rhythm
+    correction_rhythm_stab_by_segment: bool = False         # Enable segmentation by silence before rhythm stabilization
+    correction_rhythm_stab_segment_silence_s: float = 1.0   # Silence threshold (seconds) for segmenting MIDI
+    correction_quantize_level: str = "None"                 # Quantization level for note timing (e.g., "1/16", "None")
+    correction_velocity_mode: str = "None"                  # Velocity processing mode ("None", "Smooth", "Compress")
+    correction_velocity_smooth_factor: float = 0.5          # Smoothing factor for velocity processing
+    correction_velocity_compress_min: int = 30              # Minimum velocity after compression
+    correction_velocity_compress_max: int = 100             # Maximum velocity after compression
+    correction_rhythmic_simplification_level: str = "None"  # rhythmic simplification
     # 8-bit Synthesizer Settings
     s8bit_waveform_type: str = 'Square'
     s8bit_delay_lowpass_cutoff_hz: int = 5000          # Lowpass filter frequency for delay echoes (removes harsh high frequencies from echoes)
     s8bit_delay_treble_pitch_shift: int = 0            # Pitch shift (in semitones) applied to high notes in delay echoes
+# ===============================================================================
+# === MIDI CORRECTION SUITE (Operating on pretty_midi objects for robustness) ===
+# ===============================================================================
+def _get_all_notes(midi_obj: pretty_midi.PrettyMIDI, include_drums=False):
+    """Helper to get a single sorted list of all notes from all instruments."""
+    all_notes = []
+    for instrument in midi_obj.instruments:
+        if not instrument.is_drum or include_drums:
+            all_notes.extend(instrument.notes)
+    all_notes.sort(key=lambda x: x.start)
+    return all_notes
+def _normalize_instrument_times(instrument: pretty_midi.Instrument):
+    """Creates a temporary, normalized version of an instrument where timestamps start from 0."""
+    if not instrument.notes:
+        return instrument
+    # Sort notes by start time to reliably get the first note
+    notes = sorted(instrument.notes, key=lambda x: x.start)
+    start_offset = notes[0].start
+    normalized_instrument = copy.deepcopy(instrument)
+    for note in normalized_instrument.notes:
+        note.start -= start_offset
+        note.end -= start_offset
+    return normalized_instrument
+def _segment_midi_by_silence(midi_obj: pretty_midi.PrettyMIDI, silence_threshold_s=1.0):
     """
+    Splits a PrettyMIDI object into a list of PrettyMIDI objects, each representing a segment.
+    This is the core of per-song processing for albums.
     """
+    all_notes = _get_all_notes(midi_obj, include_drums=True)
+    if not all_notes:
+        return []
+    segments = []
+    current_segment_notes = {i: [] for i in range(len(midi_obj.instruments))}
+    # Add the very first note to the first segment
+    for i, inst in enumerate(midi_obj.instruments):
+        for note in inst.notes:
+            if note == all_notes[0]:
+                current_segment_notes[i].append(note)
+                break
+    for i in range(1, len(all_notes)):
+        prev_note_end = all_notes[i-1].end
+        current_note_start = all_notes[i].start
+        gap = current_note_start - prev_note_end
+        if gap > silence_threshold_s:
+            # End of a segment, create a new MIDI object for it
+            segment_midi = pretty_midi.PrettyMIDI()
+            for inst_idx, inst_notes in current_segment_notes.items():
+                if inst_notes:
+                    new_inst = pretty_midi.Instrument(program=midi_obj.instruments[inst_idx].program, is_drum=midi_obj.instruments[inst_idx].is_drum)
+                    new_inst.notes.extend(inst_notes)
+                    segment_midi.instruments.append(new_inst)
+            if segment_midi.instruments:
+                segments.append(segment_midi)
+            # Start a new segment
+            current_segment_notes = {i: [] for i in range(len(midi_obj.instruments))}
+        # Find which instrument this note belongs to and add it
+        for inst_idx, inst in enumerate(midi_obj.instruments):
+            if all_notes[i] in inst.notes:
+                current_segment_notes[inst_idx].append(all_notes[i])
+                break
+    # Add the final segment
+    final_segment_midi = pretty_midi.PrettyMIDI()
+    for inst_idx, inst_notes in current_segment_notes.items():
+        if inst_notes:
+            new_inst = pretty_midi.Instrument(program=midi_obj.instruments[inst_idx].program, is_drum=midi_obj.instruments[inst_idx].is_drum)
+            new_inst.notes.extend(inst_notes)
+            final_segment_midi.instruments.append(new_inst)
+    if final_segment_midi.instruments:
+        segments.append(final_segment_midi)
+    return segments
+def _recombine_segments(segments):
+    """Merges a list of segmented PrettyMIDI objects back into one."""
+    recombined_midi = pretty_midi.PrettyMIDI()
+    # Create instrument tracks in the final MIDI object
+    if segments:
+        template_midi = segments[0]
+        for i, inst in enumerate(template_midi.instruments):
+            recombined_midi.instruments.append(pretty_midi.Instrument(program=inst.program, is_drum=inst.is_drum))
+    # Populate the tracks with notes from all segments
+    for segment in segments:
+        for i, inst in enumerate(segment.instruments):
+            # This assumes instrument order is consistent, which our segmentation function ensures
+            recombined_midi.instruments[i].notes.extend(inst.notes)
+    return recombined_midi
+def _analyze_best_quantize_level(notes, bpm, error_threshold_ratio=0.25):
+    """Analyzes a list of notes to determine the most likely quantization grid."""
+    if not notes: return "None"
+    grids_to_test = ["1/8", "1/12", "1/16", "1/24", "1/32"]
+    level_map = {"1/8": 2.0, "1/12": 3.0, "1/16": 4.0, "1/24": 6.0, "1/32": 8.0}
+    start_times = [n.start for n in notes]
+    results = []
+    for grid_name in grids_to_test:
+        division = level_map[grid_name]
+        grid_s = (60.0 / bpm) / division
+        if grid_s < 0.001: continue
+        total_error = sum(min(t % grid_s, grid_s - (t % grid_s)) for t in start_times)
+        avg_error = total_error / len(start_times)
+        results.append({"grid": grid_name, "avg_error": avg_error, "grid_s": grid_s})
+    if not results: return "None"
+    best_fit = min(results, key=lambda x: x['avg_error'])
+    if best_fit['avg_error'] > best_fit['grid_s'] * error_threshold_ratio:
+        return "None"
+    return best_fit['grid']
+def filter_spurious_notes_pm(midi_obj: pretty_midi.PrettyMIDI, max_dur_s=0.05, max_vel=20):
+    """Filters out very short and quiet notes from a PrettyMIDI object."""
+    print(f"  - Filtering spurious notes (duration < {max_dur_s*1000:.0f}ms AND velocity < {max_vel})...")
+    notes_removed = 0
+    for instrument in midi_obj.instruments:
+        original_note_count = len(instrument.notes)
+        instrument.notes = [
+            note for note in instrument.notes
+            if not (note.end - note.start < max_dur_s and note.velocity < max_vel)
+        ]
+        notes_removed += original_note_count - len(instrument.notes)
+    print(f"    - Removed {notes_removed} spurious notes.")
+    return midi_obj
+def stabilize_rhythm_pm(
+    midi_obj: pretty_midi.PrettyMIDI,
+    ioi_threshold_ratio=0.30,
+    min_ioi_s=0.03,
+    enable_segmentation=True,
+    silence_threshold_s=1.0,
+    merge_mode="extend",           # "extend" or "drop"
+    consider_velocity=True,        # consider low velocity notes as decorations
+    skip_chords=True,              # skip merging if multiple notes start at same time
+    use_mode_ioi=False             # use mode of IOI instead of median
+):
+    """Enhances rhythm stability by merging rhythmically unstable notes, with advanced options."""
+    print("  - Stabilizing rhythm...")
+    if not enable_segmentation:
+        segments = [midi_obj]
+    else:
+        segments = _segment_midi_by_silence(midi_obj, silence_threshold_s)
+        if len(segments) > 1:
+            print(f"    - Split into {len(segments)} segments for stabilization.")
+    processed_segments = []
+    for segment in segments:
+        for instrument in segment.instruments:
+            if instrument.is_drum or len(instrument.notes) < 20:
+                continue
+            notes = sorted(instrument.notes, key=lambda n: n.start)
+            # Compute inter-onset intervals (IOIs)
+            iois = [notes[i].start - notes[i-1].start for i in range(1, len(notes))]
+            positive_iois = [ioi for ioi in iois if ioi > 0.001]
+            if not positive_iois:
+                continue
+            # Determine threshold based on median or mode
+            if use_mode_ioi:
+                try:
+                    median_ioi = float(stats.mode(positive_iois).mode[0])
+                except Exception:
+                    median_ioi = np.median(positive_iois)
+            else:
+                median_ioi = np.median(positive_iois)
+            threshold_s = max(median_ioi * ioi_threshold_ratio, min_ioi_s)
+            cleaned_notes = [notes[0]]
+            for i in range(1, len(notes)):
+                prev_note = cleaned_notes[-1]
+                curr_note = notes[i]
+                # Skip merging if chord and option enabled
+                if skip_chords:
+                    notes_at_same_time = [n for n in notes if abs(n.start - curr_note.start) < 0.001]
+                    if len(notes_at_same_time) > 1:
+                        cleaned_notes.append(curr_note)
+                        continue
+                # Check if note is considered "unstable/decoration"
+                pitch_close = abs(curr_note.pitch - prev_note.pitch) <= 3  # within minor third
+                velocity_ok = True
+                if consider_velocity:
+                    velocity_ok = curr_note.velocity < prev_note.velocity * 0.8
+                start_close = (curr_note.start - prev_note.start) < threshold_s
+                if start_close and pitch_close and velocity_ok:
+                    if merge_mode == "extend":
+                        # Merge by extending previous note's end
+                        prev_note.end = max(prev_note.end, curr_note.end)
+                    elif merge_mode == "drop":
+                        # Drop the current note
+                        continue
+                else:
+                    cleaned_notes.append(curr_note)
+            instrument.notes = cleaned_notes
+        processed_segments.append(segment)
+    return _recombine_segments(processed_segments) if enable_segmentation else processed_segments[0]
+def simplify_rhythm_pm(
+    midi_obj: pretty_midi.PrettyMIDI,
+    simplification_level_str="None",
+    enable_segmentation=True,
+    silence_threshold_s=1.0,
+    keep_chords=True,
+    max_notes_per_grid=3
+):
+    """Simplifies rhythm while preserving music length, with optional chord and sustain handling."""
+    if simplification_level_str == "None":
+        return midi_obj
+    print(f"  - Simplifying rhythm to {simplification_level_str} grid...")
+    # Split into segments if enabled
+    if not enable_segmentation:
+        segments = [midi_obj]
+    else:
+        segments = _segment_midi_by_silence(midi_obj, silence_threshold_s)
+        if len(segments) > 1:
+            print(f"    - Split into {len(segments)} segments for simplification.")
+    processed_segments = []
+    level_map = {"1/4": 1.0, "1/8": 2.0, "1/12": 3.0, "1/16": 4.0, "1/24": 6.0, "1/32": 8.0, "1/64": 16.0}
+    division = level_map.get(simplification_level_str)
+    if not division:
+        return midi_obj
+    for segment in segments:
+        new_segment_midi = pretty_midi.PrettyMIDI()
+        for instrument in segment.instruments:
+            if instrument.is_drum or not instrument.notes:
+                new_segment_midi.instruments.append(instrument)
+                continue
+            try:
+                # Prefer using tempo changes from MIDI if available
+                if segment.get_tempo_changes()[1].size > 0:
+                    bpm = float(segment.get_tempo_changes()[1][0])
+                else:
+                    temp_norm_inst = _normalize_instrument_times(instrument)
+                    temp_midi = pretty_midi.PrettyMIDI(); temp_midi.instruments.append(temp_norm_inst)
+                    bpm = temp_midi.estimate_tempo()
+                bpm = max(40.0, min(bpm, 240.0))
+            except Exception:
+                new_segment_midi.instruments.append(instrument)
+                continue
+            grid_s = (60.0 / bpm) / division
+            if grid_s <= 0.001:
+                new_segment_midi.instruments.append(instrument)
+                continue
+            simplified_instrument = pretty_midi.Instrument(program=instrument.program, name=instrument.name)
+            notes = sorted(instrument.notes, key=lambda x: x.start)
+            end_time = segment.get_end_time()
+            # Handle sustain pedal CC64 events
+            sustain_times = []
+            for cc in instrument.control_changes:
+                if cc.number == 64:  # sustain pedal
+                    sustain_times.append((cc.time, cc.value >= 64))
+            # Grid iteration
+            current_grid_time = round(notes[0].start / grid_s) * grid_s
+            while current_grid_time < end_time:
+                notes_in_slot = [n for n in notes if current_grid_time <= n.start < current_grid_time + grid_s]
+                if notes_in_slot:
+                    chosen_notes = []
+                    if keep_chords:
+                        # Always keep root (lowest pitch) and top note (highest pitch)
+                        root_note = min(notes_in_slot, key=lambda n: n.pitch)
+                        top_note = max(notes_in_slot, key=lambda n: n.pitch)
+                        chosen_notes.extend([root_note, top_note])
+                        # Also keep the strongest note (highest velocity)
+                        strong_note = max(notes_in_slot, key=lambda n: n.velocity)
+                        if strong_note not in chosen_notes:
+                            chosen_notes.append(strong_note)
+                        # Limit chord density
+                        chosen_notes = sorted(set(chosen_notes), key=lambda n: n.pitch)[:max_notes_per_grid]
+                    else:
+                        chosen_notes = [max(notes_in_slot, key=lambda n: n.velocity)]
+                    for note in chosen_notes:
+                        # End is either original note end or grid boundary
+                        note_end = min(note.end, current_grid_time + grid_s)
+                        # Extend if sustain pedal is active
+                        for t, active in sustain_times:
+                            if t >= note.start and active:
+                                note_end = max(note_end, current_grid_time + grid_s * 2)
+                        simplified_instrument.notes.append(pretty_midi.Note(
+                            velocity=note.velocity,
+                            pitch=note.pitch,
+                            start=current_grid_time,
+                            end=note_end
+                        ))
+                current_grid_time += grid_s
+            if simplified_instrument.notes:
+                new_segment_midi.instruments.append(simplified_instrument)
+        processed_segments.append(new_segment_midi)
+    return _recombine_segments(processed_segments) if enable_segmentation else processed_segments[0]
+def quantize_pm(
+    midi_obj: pretty_midi.PrettyMIDI,
+    quantize_level_str="None",
+    enable_segmentation=True,
+    silence_threshold_s=1.0,
+    quantize_end=True,
+    preserve_duration=True
+):
+    """Quantizes notes in a PrettyMIDI object with optional end-time adjustment, sustain handling, and segmentation support."""
+    if quantize_level_str == "None":
+        return midi_obj
+    print(f"  - Quantizing notes (Mode: {quantize_level_str})...")
+    # Split into segments if enabled
+    if not enable_segmentation:
+        segments = [midi_obj]
     else:
+        segments = _segment_midi_by_silence(midi_obj, silence_threshold_s)
+        if len(segments) > 1:
+            print(f"    - Split into {len(segments)} segments for quantization.")
+    processed_segments = []
+    level_map = {"1/4": 1.0, "1/8": 2.0, "1/12": 3.0, "1/16": 4.0, "1/24": 6.0, "1/32": 8.0, "1/64": 16.0}
     for i, segment in enumerate(segments):
+        new_segment_midi = pretty_midi.PrettyMIDI()
+        for instrument in segment.instruments:
+            if instrument.is_drum or not instrument.notes:
+                new_segment_midi.instruments.append(instrument)
+                continue
+            try:
+                # Estimate BPM or use first tempo change
+                if segment.get_tempo_changes()[1].size > 0:
+                    bpm = float(segment.get_tempo_changes()[1][0])
+                else:
+                    temp_norm_inst = _normalize_instrument_times(instrument)
+                    temp_midi = pretty_midi.PrettyMIDI(); temp_midi.instruments.append(temp_norm_inst)
+                    bpm = temp_midi.estimate_tempo()
+                bpm = max(40.0, min(bpm, 240.0))
+            except Exception:
+                new_segment_midi.instruments.append(instrument)
+                continue
+            # Determine quantization grid size
+            final_quantize_level = quantize_level_str
+            if quantize_level_str == "Auto-Analyze Rhythm":
+                final_quantize_level = _analyze_best_quantize_level(instrument.notes, bpm)
+                if len(segments) > 1:
+                    print(f"      - Segment {i+1}, Inst '{instrument.name}': Auto-analyzed grid is '{final_quantize_level}'. BPM: {bpm:.2f}")
+            division = level_map.get(final_quantize_level)
+            if not division:
+                new_segment_midi.instruments.append(instrument)
+                continue
+            grid_s = (60.0 / bpm) / division
+            # Handle sustain pedal CC64
+            sustain_times = []
+            for cc in instrument.control_changes:
+                if cc.number == 64:  # sustain pedal
+                    sustain_times.append((cc.time, cc.value >= 64))
+            # Quantize notes
+            quantized_instrument = pretty_midi.Instrument(program=instrument.program, name=instrument.name)
+            for note in instrument.notes:
+                original_duration = note.end - note.start
+                # Quantize start
+                new_start = round(note.start / grid_s) * grid_s
+                if preserve_duration:
+                    new_end = new_start + original_duration
+                elif quantize_end:
+                    new_end = round(note.end / grid_s) * grid_s
+                else:
+                    new_end = note.end
+                # Sustain pedal extension
+                for t, active in sustain_times:
+                    if t >= note.start and active:
+                        new_end = max(new_end, new_start + grid_s * 2)
+                # Safety check
+                if new_end <= new_start:
+                    new_end = new_start + grid_s * 0.5
+                quantized_instrument.notes.append(pretty_midi.Note(
+                    velocity=note.velocity,
+                    pitch=note.pitch,
+                    start=new_start,
+                    end=new_end
+                ))
+            new_segment_midi.instruments.append(quantized_instrument)
+        processed_segments.append(new_segment_midi)
+    return _recombine_segments(processed_segments) if enable_segmentation else processed_segments[0]
+def process_velocity_pm(
+    midi_obj: pretty_midi.PrettyMIDI,
+    mode=["None"],                # list of modes: "Smooth", "Compress"
+    smooth_factor=0.5,            # weight for smoothing
+    compress_min=30,
+    compress_max=100,
+    compress_type="linear",       # "linear" or "perceptual"
+    inplace=True                  # if False, return a copy
+):
+    """Applies velocity processing to a PrettyMIDI object with smoothing and/or compression."""
+    if not inplace:
+        import copy
+        midi_obj = copy.deepcopy(midi_obj)
+    if isinstance(mode, str):
+        mode = [mode]
+    if "None" in mode or not mode:
+        return midi_obj
+    print(f"  - Processing velocities (Mode: {mode})...")
+    for instrument in midi_obj.instruments:
+        if instrument.is_drum or not instrument.notes:
             continue
+        velocities = [n.velocity for n in instrument.notes]
+        # Smooth velocity
+        if "Smooth" in mode:
+            new_velocities = list(velocities)
+            n_notes = len(velocities)
+            for i in range(n_notes):
+                if i == 0:
+                    neighbor_avg = velocities[i+1]
+                elif i == n_notes - 1:
+                    neighbor_avg = velocities[i-1]
+                else:
+                    neighbor_avg = (velocities[i-1] + velocities[i+1]) / 2.0
+                smoothed_vel = velocities[i] * (1 - smooth_factor) + neighbor_avg * smooth_factor
+                new_velocities[i] = int(max(1, min(127, smoothed_vel)))
+            for i, note in enumerate(instrument.notes):
+                note.velocity = new_velocities[i]
+        # Compress velocity
+        if "Compress" in mode:
+            velocities = [n.velocity for n in instrument.notes]  # updated if smoothed first
+            min_vel, max_vel = min(velocities), max(velocities)
+            if max_vel == min_vel:
+                continue
+            for note in instrument.notes:
+                if compress_type == "linear":
+                    new_vel = compress_min + (note.velocity - min_vel) * (compress_max - compress_min) / (max_vel - min_vel)
+                elif compress_type == "perceptual":
+                    # Simple gamma-style perceptual compression
+                    norm = (note.velocity - min_vel) / (max_vel - min_vel)
+                    gamma = 0.6  # perceptual curve
+                    new_vel = compress_min + ((norm ** gamma) * (compress_max - compress_min))
+                else:
+                    new_vel = note.velocity
+                note.velocity = int(max(1, min(127, new_vel)))
+    return midi_obj
+# =================================================================================================
+# === Helper Functions ===
+# =================================================================================================
 def analyze_audio_for_adaptive_params(audio_data: np.ndarray, sample_rate: int):
     """
     print(f"Render type: {params.render_type}")
     print(f"Soundfont bank: {params.soundfont_bank}")
     print(f"Audio render sample rate: {params.render_sample_rate}")
     print('=' * 70)
+    ##################################
+    # --- FLOW STEP 1: Apply MIDI Post-Processing & Correction Suite ---
+    if getattr(params, 'enable_midi_corrections', False):
+        print("Applying MIDI Post-Processing & Corrections (on pretty_midi object)...")
+        # --- FLOW STEP 2: Load into pretty_midi for corrections ---
+        try:
+            midi_obj = pretty_midi.PrettyMIDI(io.BytesIO(fdata))
+            print("Successfully loaded MIDI into pretty_midi for corrections.")
+        except Exception as e:
+            print(f"Fatal Error: Could not load the input MIDI with pretty_midi. Cannot proceed. Error: {e}")
+            return ("N/A", fn1, f"MIDI file is corrupted or in an unsupported format. Error: {e}", None, None, None, "MIDI Load Error")
+        # Get common segmentation parameters
+        enable_segmentation = getattr(params, 'correction_rhythm_stab_by_segment', True)
+        silence_threshold_s = getattr(params, 'correction_rhythm_stab_segment_silence_s', 1.0)
+        # Correction Order: Filter -> Stabilize -> Simplify -> Quantize -> Velocity
+        # 1. Filter spurious notes (does not need segmentation)
+        if getattr(params, 'correction_filter_spurious_notes', False):
+            midi_obj = filter_spurious_notes_pm(
+                midi_obj,
+                max_dur_s=getattr(params, 'correction_spurious_duration_ms', 50) / 1000.0,
+                max_vel=getattr(params, 'correction_spurious_velocity', 20)
+            )
+        # 2. Stabilize rhythm
+        if getattr(params, 'correction_remove_abnormal_rhythm', False):
+             midi_obj = stabilize_rhythm_pm(
+                 midi_obj,
+                 enable_segmentation=enable_segmentation,
+                 silence_threshold_s=silence_threshold_s
+             )
+        # 3. Simplify rhythm
+        simplification_level = getattr(params, 'correction_rhythmic_simplification_level', "None")
+        if simplification_level != "None":
+            midi_obj = simplify_rhythm_pm(
+                midi_obj,
+                simplification_level_str=simplification_level,
+                enable_segmentation=enable_segmentation,
+                silence_threshold_s=silence_threshold_s
+            )
+        # 4. Quantize rhythm
+        quantize_level = getattr(params, 'correction_quantize_level', "None")
+        if quantize_level != "None":
+            midi_obj = quantize_pm(
+                midi_obj,
+                quantize_level_str=quantize_level,
+                enable_segmentation=enable_segmentation,
+                silence_threshold_s=silence_threshold_s
+            )
+        # 5. Process velocity (does not need segmentation)
+        velocity_mode = getattr(params, 'correction_velocity_mode', "None")
+        if velocity_mode != "None":
+            midi_obj = process_velocity_pm(
+                midi_obj,
+                mode=[velocity_mode],
+                smooth_factor=getattr(params, 'correction_velocity_smooth_factor', 0.5),
+                compress_min=getattr(params, 'correction_velocity_compress_min', 30),
+                compress_max=getattr(params, 'correction_velocity_compress_max', 100)
+            )
+        # --- FLOW STEP 3: Convert the corrected pretty_midi object back to binary data ---
+        corrected_midi_io = io.BytesIO()
+        midi_obj.write(corrected_midi_io)
+        fdata = corrected_midi_io.getvalue()
+        print("Corrections finished.")
+        print('=' * 70)
+    ##################################
     # --- MIDI Processing using TMIDIX ---
     print('Processing MIDI... Please wait...')
     raw_score = MIDI.midi2single_track_ms_score(fdata)
                     o[1] *= 200
                     o[2] *= 200
     # --- Saving Processed MIDI File ---
         # Save the transformed MIDI data
         SONG, patches, _ = TMIDIX.patch_enhanced_score_notes(output_score)
                                     correction_remove_abnormal_rhythm = gr.Checkbox(label="Stabilize Rhythm (for Pitch Bend)", value=False,
                                         info="Attempts to merge overly dense, rhythmically unstable notes often created when 'Allow Multiple Pitch Bends' is used. This can clean up the rhythm but may lose some pitch slide nuance.")
                                     with gr.Group(visible=False) as rhythm_stab_options: # This group is initially hidden
+                                        correction_rhythm_stab_by_segment = gr.Checkbox(label="Enable Segmentation by Silence", value=True,
                                             info="Highly recommended for albums or long files. Splits the MIDI by silent parts before stabilizing rhythm, ensuring accuracy for songs with different tempos.")
                                         correction_rhythm_stab_segment_silence_s = gr.Slider(minimum=0.5, maximum=10.0, value=1.0, step=0.5,
                                             label="Silence Threshold for Segmentation (seconds)",
                                             info="The amount of silence required to start a new segment. 1-3 seconds is usually enough to separate songs on an album.")
+                                # --- Rhythmic Simplification Group ---
+                                with gr.Group():
+                                    correction_rhythmic_simplification_level = gr.Dropdown(
+                                        ["None", "1/16", "1/12", "1/8", "1/4"],
+                                        value="None",
+                                        label="Simplify Rhythm (Experimental)",
+                                        info="WARNING: This is a destructive process that removes notes to slow down the rhythm. Select a target grid; only the most important note within each grid cell will be kept and its duration extended."
+                                    )
                                 # --- Quantization Group ---
                                 with gr.Group():
                                     correction_quantize_level = gr.Dropdown(
+                                        ["None", "Auto-Analyze Rhythm", "1/64", "1/32", "1/16", "1/8", "1/4", "1/24", "1/12"],
                                         value="None",
                                         label="Quantize Rhythm",
+                                        info="Quantizes notes to the nearest rhythmic grid line. '1/16' is recommended for most pop and rock music. For expressive genres like classical or jazz, use with caution as it may reduce natural timing nuances. Straight divisions (1/8, 1/16, etc.) suit most modern music, while swing divisions (1/12, 1/24) are ideal for jazz, blues, or shuffle styles. 'Auto-Analyze' is highly recommended for albums or files with mixed tempos, as it will automatically determine the best grid (straight or swing) for each song segment."
                                     )
                                 # --- Velocity Processing Group ---
                                 with gr.Group():