refactor: Migrate MIDI correction tools to use PrettyMIDI library
Browse filesReworks the entire MIDI correction suite to operate on `PrettyMIDI` objects instead of the internal `escore` format.
app.py
CHANGED
@@ -79,7 +79,7 @@ from basic_pitch import ICASSP_2022_MODEL_PATH
|
|
79 |
# --- Imports for 8-bit Synthesizer & MIDI Merging ---
|
80 |
import pretty_midi
|
81 |
import numpy as np
|
82 |
-
from scipy import signal
|
83 |
|
84 |
# =================================================================================================
|
85 |
# === Hugging Face SoundFont Downloader ===
|
@@ -153,18 +153,19 @@ class AppParameters:
|
|
153 |
render_remove_drums: bool = False
|
154 |
|
155 |
# EXPERIMENTAL: MIDI Post-Processing & Correction Tools
|
156 |
-
enable_midi_corrections: bool = False
|
157 |
-
correction_filter_spurious_notes: bool = True
|
158 |
-
correction_spurious_duration_ms: int = 50
|
159 |
-
correction_spurious_velocity: int = 20
|
160 |
-
correction_remove_abnormal_rhythm: bool = False
|
161 |
-
correction_rhythm_stab_by_segment: bool = False
|
162 |
-
correction_rhythm_stab_segment_silence_s: float = 1.0
|
163 |
-
correction_quantize_level: str = "None"
|
164 |
-
correction_velocity_mode: str = "None"
|
165 |
-
correction_velocity_smooth_factor: float = 0.5
|
166 |
-
correction_velocity_compress_min: int = 30
|
167 |
-
correction_velocity_compress_max: int = 100
|
|
|
168 |
|
169 |
# 8-bit Synthesizer Settings
|
170 |
s8bit_waveform_type: str = 'Square'
|
@@ -230,277 +231,494 @@ class AppParameters:
|
|
230 |
s8bit_delay_lowpass_cutoff_hz: int = 5000 # Lowpass filter frequency for delay echoes (removes harsh high frequencies from echoes)
|
231 |
s8bit_delay_treble_pitch_shift: int = 0 # Pitch shift (in semitones) applied to high notes in delay echoes
|
232 |
|
233 |
-
# =================================================================================================
|
234 |
-
# === Helper Functions ===
|
235 |
-
# =================================================================================================
|
236 |
|
|
|
|
|
|
|
237 |
|
238 |
-
def
|
239 |
-
"""
|
240 |
-
|
|
|
|
|
|
|
|
|
|
|
241 |
|
242 |
-
Args:
|
243 |
-
escore (list): The list of events.
|
244 |
-
bpm (float): The Beats Per Minute of the track.
|
245 |
-
quantize_level_str (str): The quantization level, e.g., "1/8", "1/16", "1/32".
|
246 |
|
247 |
-
|
248 |
-
|
249 |
-
|
250 |
-
|
251 |
|
252 |
-
|
253 |
-
|
254 |
-
|
255 |
-
"1/12": 3.0, # 3 notes per beat
|
256 |
-
"1/16": 4.0,
|
257 |
-
"1/24": 6.0, # 6 notes per beat
|
258 |
-
"1/32": 8.0,
|
259 |
-
"1/64": 16.0
|
260 |
-
}
|
261 |
-
division = level_map.get(quantize_level_str)
|
262 |
-
if not division:
|
263 |
-
print(" - Invalid quantization level. Skipping.")
|
264 |
-
return escore
|
265 |
-
|
266 |
-
# Calculate the duration of a single grid step in milliseconds
|
267 |
-
grid_ms = (60000.0 / bpm) / division
|
268 |
|
269 |
-
|
270 |
-
|
271 |
-
|
272 |
-
|
273 |
-
|
274 |
-
original_start_time = event[0]
|
275 |
-
# The core quantization logic: find the nearest grid point
|
276 |
-
quantized_start_time = round(original_start_time / grid_ms) * grid_ms
|
277 |
-
event[0] = int(quantized_start_time)
|
278 |
-
notes_quantized += 1
|
279 |
-
quantized_escore.append(event)
|
280 |
-
|
281 |
-
print(f" - Quantized {notes_quantized} notes.")
|
282 |
-
return quantized_escore
|
283 |
|
284 |
-
|
285 |
-
def filter_spurious_notes_escore(escore, max_dur_ms=50, max_vel=20):
|
286 |
"""
|
287 |
-
|
288 |
-
|
289 |
-
Args:
|
290 |
-
escore (list): The list of events.
|
291 |
-
max_dur_ms (int): Notes with duration shorter than this will be considered.
|
292 |
-
max_vel (int): Notes with velocity lower than this will be considered.
|
293 |
-
|
294 |
-
Returns:
|
295 |
-
list: The cleaned escore.
|
296 |
"""
|
297 |
-
|
298 |
-
|
299 |
-
|
300 |
-
|
301 |
-
|
302 |
-
|
303 |
-
cleaned_notes = [
|
304 |
-
note for note in note_events
|
305 |
-
if not (note[1] < max_dur_ms and note[3] < max_vel)
|
306 |
-
]
|
307 |
|
308 |
-
|
309 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
310 |
|
311 |
-
#
|
312 |
-
|
313 |
-
|
314 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
315 |
|
316 |
-
|
317 |
-
|
318 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
319 |
|
320 |
-
|
321 |
-
escore (list): The list of events.
|
322 |
-
mode (str): "Smooth", "Compress", or "None".
|
323 |
-
smooth_factor (float): How much to blend with neighbors (0=none, 1=full average).
|
324 |
-
compress_min (int): The target minimum velocity for compression.
|
325 |
-
compress_max (int): The target maximum velocity for compression.
|
326 |
|
327 |
-
|
328 |
-
|
329 |
-
|
330 |
-
|
331 |
-
return escore
|
332 |
-
|
333 |
-
print(f" - Processing velocities with mode: {mode}...")
|
334 |
-
|
335 |
-
note_events = [note for note in escore if isinstance(note[0], (int, float))]
|
336 |
-
metadata_events = [meta for meta in escore if not isinstance(meta[0], (int, float))]
|
337 |
|
338 |
-
|
339 |
-
return escore
|
340 |
-
|
341 |
-
velocities = [note[3] for note in note_events]
|
342 |
-
|
343 |
-
if mode == "Smooth":
|
344 |
-
new_velocities = list(velocities) # Start with a copy
|
345 |
-
# Iterate from the second to the second-to-last note
|
346 |
-
for i in range(1, len(velocities) - 1):
|
347 |
-
prev_vel = velocities[i-1]
|
348 |
-
current_vel = velocities[i]
|
349 |
-
next_vel = velocities[i+1]
|
350 |
-
neighbor_avg = (prev_vel + next_vel) / 2.0
|
351 |
-
# Blend the current velocity with the average of its neighbors
|
352 |
-
smoothed_vel = (current_vel * (1 - smooth_factor)) + (neighbor_avg * smooth_factor)
|
353 |
-
new_velocities[i] = int(max(1, min(127, smoothed_vel)))
|
354 |
-
|
355 |
-
for i, note in enumerate(note_events):
|
356 |
-
note[3] = new_velocities[i]
|
357 |
-
print(f" - Smoothed {len(note_events)} velocities.")
|
358 |
|
359 |
-
|
360 |
-
|
361 |
-
|
362 |
-
|
363 |
-
|
364 |
-
if max_vel_orig == min_vel_orig:
|
365 |
-
return escore
|
366 |
-
|
367 |
-
for note in note_events:
|
368 |
-
# Linear mapping from original range to target range
|
369 |
-
original_vel = note[3]
|
370 |
-
new_vel = compress_min + (original_vel - min_vel_orig) * \
|
371 |
-
(compress_max - compress_min) / (max_vel_orig - min_vel_orig)
|
372 |
-
note[3] = int(max(1, min(127, new_vel)))
|
373 |
-
print(f" - Compressed {len(note_events)} velocities to range [{compress_min}, {compress_max}].")
|
374 |
-
|
375 |
-
final_escore = metadata_events + note_events
|
376 |
-
final_escore.sort(key=lambda event: event[1] if isinstance(event[0], str) else event[0])
|
377 |
-
return final_escore
|
378 |
-
|
379 |
-
|
380 |
-
def stabilize_midi_rhythm(escore,
|
381 |
-
ioi_threshold_ratio=0.30,
|
382 |
-
min_ioi_ms=30,
|
383 |
-
enable_segmentation=True,
|
384 |
-
silence_split_threshold_s=2.0):
|
385 |
-
"""
|
386 |
-
Removes or merges rhythmically unstable notes from an escore list.
|
387 |
-
This is designed to clean up MIDI generated by basic-pitch with multiple pitch bends,
|
388 |
-
which can create clusters of very short, dense notes to approximate a slide.
|
389 |
-
This version can segment the MIDI based on silence before processing, making it robust
|
390 |
-
for files containing multiple songs with different tempos (like an album).
|
391 |
|
392 |
-
|
393 |
-
|
394 |
-
|
395 |
-
|
396 |
-
|
397 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
398 |
|
399 |
-
|
400 |
-
|
401 |
-
|
402 |
-
|
403 |
-
|
404 |
-
metadata_events = [meta for meta in escore if not isinstance(meta[0], (int, float))]
|
405 |
|
406 |
-
|
407 |
-
|
408 |
-
|
409 |
-
|
|
|
|
|
410 |
|
411 |
-
|
412 |
-
|
413 |
-
|
414 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
415 |
|
416 |
-
|
417 |
-
|
418 |
-
|
419 |
-
print(f" - Segmentation enabled (silence > {silence_split_threshold_s}s).")
|
420 |
-
current_segment = [note_events[0]]
|
421 |
-
silence_threshold_ms = silence_split_threshold_s * 1000
|
422 |
-
|
423 |
-
for i in range(1, len(note_events)):
|
424 |
-
prev_note_end_ms = note_events[i-1][0] + note_events[i-1][1]
|
425 |
-
current_note_start_ms = note_events[i][0]
|
426 |
-
gap_ms = current_note_start_ms - prev_note_end_ms
|
427 |
|
428 |
-
|
429 |
-
|
430 |
-
|
|
|
|
|
431 |
|
432 |
-
|
433 |
-
|
434 |
-
|
435 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
436 |
else:
|
437 |
-
|
438 |
-
segments
|
|
|
439 |
|
440 |
-
|
441 |
-
|
442 |
-
total_merged_count = 0
|
443 |
|
444 |
for i, segment in enumerate(segments):
|
445 |
-
|
446 |
-
|
447 |
-
|
448 |
-
|
449 |
-
|
450 |
-
|
451 |
-
|
452 |
-
|
453 |
-
|
454 |
-
|
455 |
-
|
456 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
457 |
continue
|
458 |
|
459 |
-
|
460 |
-
|
461 |
-
|
462 |
-
|
463 |
-
|
464 |
-
|
465 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
466 |
|
467 |
-
|
468 |
-
|
469 |
-
|
470 |
-
|
471 |
-
|
472 |
-
|
473 |
-
|
474 |
-
|
475 |
-
|
476 |
-
|
477 |
-
|
478 |
-
notes_merged_in_segment += 1
|
479 |
-
# Merge by extending the previous note's duration to cover the current note
|
480 |
-
new_end_time = current_note[0] + current_note[1]
|
481 |
-
last_kept_note[1] = new_end_time - last_kept_note[0]
|
482 |
-
else:
|
483 |
-
# Note is rhythmically stable, so we keep it
|
484 |
-
cleaned_segment.append(copy.deepcopy(current_note))
|
485 |
-
|
486 |
-
if len(segments) > 1:
|
487 |
-
print(f" - Segment {i+1}: Median IOI {median_ioi:.2f}ms, merged {notes_merged_in_segment} notes.")
|
488 |
|
489 |
-
|
490 |
-
total_merged_count += notes_merged_in_segment
|
491 |
|
492 |
-
if total_merged_count > 0:
|
493 |
-
print(f" - Rhythm stabilization complete. Total merged notes: {total_merged_count}.")
|
494 |
|
495 |
-
# 4. Recombine metadata with the globally cleaned notes and re-sort
|
496 |
-
final_escore = metadata_events + all_cleaned_notes
|
497 |
-
|
498 |
-
# Re-sort the entire list by time to ensure correct MIDI event order.
|
499 |
-
# The sort key must handle both event types: metadata time is at index 1, note time is at index 0.
|
500 |
-
final_escore.sort(key=lambda event: event[1] if isinstance(event[0], str) else event[0])
|
501 |
-
|
502 |
-
return final_escore
|
503 |
|
|
|
|
|
|
|
504 |
|
505 |
def analyze_audio_for_adaptive_params(audio_data: np.ndarray, sample_rate: int):
|
506 |
"""
|
@@ -1991,9 +2209,85 @@ def Render_MIDI(*, input_midi_path: str, params: AppParameters, progress: gr.Pro
|
|
1991 |
print(f"Render type: {params.render_type}")
|
1992 |
print(f"Soundfont bank: {params.soundfont_bank}")
|
1993 |
print(f"Audio render sample rate: {params.render_sample_rate}")
|
1994 |
-
# ... (add other print statements for settings if needed)
|
1995 |
print('=' * 70)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1996 |
|
|
|
|
|
1997 |
# --- MIDI Processing using TMIDIX ---
|
1998 |
print('Processing MIDI... Please wait...')
|
1999 |
raw_score = MIDI.midi2single_track_ms_score(fdata)
|
@@ -2140,53 +2434,6 @@ def Render_MIDI(*, input_midi_path: str, params: AppParameters, progress: gr.Pro
|
|
2140 |
o[1] *= 200
|
2141 |
o[2] *= 200
|
2142 |
|
2143 |
-
# --- MIDI Post-Processing & Correction Block ---
|
2144 |
-
if getattr(params, 'enable_midi_corrections', False):
|
2145 |
-
print("Applying MIDI Post-Processing & Corrections...")
|
2146 |
-
|
2147 |
-
# Filter spurious notes first to clean the data for other processes
|
2148 |
-
if getattr(params, 'correction_filter_spurious_notes', False):
|
2149 |
-
output_score = filter_spurious_notes_escore(
|
2150 |
-
output_score,
|
2151 |
-
max_dur_ms=getattr(params, 'correction_spurious_duration_ms', 50),
|
2152 |
-
max_vel=getattr(params, 'correction_spurious_velocity', 20)
|
2153 |
-
)
|
2154 |
-
|
2155 |
-
# Then, stabilize rhythm on the cleaned notes
|
2156 |
-
if getattr(params, 'correction_remove_abnormal_rhythm', False):
|
2157 |
-
output_score = stabilize_midi_rhythm(
|
2158 |
-
output_score,
|
2159 |
-
enable_segmentation=getattr(params, 'correction_rhythm_stab_by_segment', False),
|
2160 |
-
silence_split_threshold_s=getattr(params, 'correction_rhythm_stab_segment_silence_s', 1.0)
|
2161 |
-
)
|
2162 |
-
|
2163 |
-
# Then, quantize the stabilized rhythm
|
2164 |
-
quantize_level = getattr(params, 'correction_quantize_level', "None")
|
2165 |
-
if quantize_level != "None":
|
2166 |
-
try:
|
2167 |
-
# We need to get the BPM for quantization. We do this once here.
|
2168 |
-
midi_obj_for_bpm = pretty_midi.PrettyMIDI(input_midi_path)
|
2169 |
-
estimated_bpm = midi_obj_for_bpm.estimate_tempo()
|
2170 |
-
output_score = quantize_escore(output_score, estimated_bpm, quantize_level)
|
2171 |
-
except Exception as e:
|
2172 |
-
print(f" - Could not estimate BPM for quantization. Skipping. Error: {e}")
|
2173 |
-
|
2174 |
-
# Finally, process velocity as it doesn't affect timing or notes
|
2175 |
-
velocity_mode = getattr(params, 'correction_velocity_mode', "None")
|
2176 |
-
if velocity_mode != "None":
|
2177 |
-
output_score = process_velocity_escore(
|
2178 |
-
output_score,
|
2179 |
-
mode=velocity_mode,
|
2180 |
-
smooth_factor=getattr(params, 'correction_velocity_smooth_factor', 0.5),
|
2181 |
-
compress_min=getattr(params, 'correction_velocity_compress_min', 30),
|
2182 |
-
compress_max=getattr(params, 'correction_velocity_compress_max', 100)
|
2183 |
-
)
|
2184 |
-
print("Corrections finished.")
|
2185 |
-
print('=' * 70)
|
2186 |
-
|
2187 |
-
print('Final adjustments complete.')
|
2188 |
-
print('=' * 70)
|
2189 |
-
|
2190 |
# --- Saving Processed MIDI File ---
|
2191 |
# Save the transformed MIDI data
|
2192 |
SONG, patches, _ = TMIDIX.patch_enhanced_score_notes(output_score)
|
@@ -4091,18 +4338,26 @@ if __name__ == "__main__":
|
|
4091 |
correction_remove_abnormal_rhythm = gr.Checkbox(label="Stabilize Rhythm (for Pitch Bend)", value=False,
|
4092 |
info="Attempts to merge overly dense, rhythmically unstable notes often created when 'Allow Multiple Pitch Bends' is used. This can clean up the rhythm but may lose some pitch slide nuance.")
|
4093 |
with gr.Group(visible=False) as rhythm_stab_options: # This group is initially hidden
|
4094 |
-
correction_rhythm_stab_by_segment = gr.Checkbox(label="Enable Segmentation by Silence", value=
|
4095 |
info="Highly recommended for albums or long files. Splits the MIDI by silent parts before stabilizing rhythm, ensuring accuracy for songs with different tempos.")
|
4096 |
correction_rhythm_stab_segment_silence_s = gr.Slider(minimum=0.5, maximum=10.0, value=1.0, step=0.5,
|
4097 |
label="Silence Threshold for Segmentation (seconds)",
|
4098 |
info="The amount of silence required to start a new segment. 1-3 seconds is usually enough to separate songs on an album.")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
4099 |
# --- Quantization Group ---
|
4100 |
with gr.Group():
|
4101 |
correction_quantize_level = gr.Dropdown(
|
4102 |
-
["None", "1/64", "1/32", "1/16", "1/8", "1/4", "1/24", "1/12"],
|
4103 |
value="None",
|
4104 |
label="Quantize Rhythm",
|
4105 |
-
info="Quantizes notes to the nearest rhythmic grid line. '1/16' is recommended for most pop and rock music. For expressive genres like classical or jazz, use with caution as it may reduce natural timing nuances. Straight divisions (1/8, 1/16, etc.) suit most modern music, while swing divisions (1/12, 1/24) are ideal for jazz, blues, or shuffle styles."
|
4106 |
)
|
4107 |
# --- Velocity Processing Group ---
|
4108 |
with gr.Group():
|
|
|
79 |
# --- Imports for 8-bit Synthesizer & MIDI Merging ---
|
80 |
import pretty_midi
|
81 |
import numpy as np
|
82 |
+
from scipy import signal, stats
|
83 |
|
84 |
# =================================================================================================
|
85 |
# === Hugging Face SoundFont Downloader ===
|
|
|
153 |
render_remove_drums: bool = False
|
154 |
|
155 |
# EXPERIMENTAL: MIDI Post-Processing & Correction Tools
|
156 |
+
enable_midi_corrections: bool = False # Master switch for enabling MIDI correction tools
|
157 |
+
correction_filter_spurious_notes: bool = True # Enable filtering of spurious (noise) notes
|
158 |
+
correction_spurious_duration_ms: int = 50 # Maximum duration (ms) for a note to be considered spurious
|
159 |
+
correction_spurious_velocity: int = 20 # Maximum velocity for a note to be considered spurious
|
160 |
+
correction_remove_abnormal_rhythm: bool = False # Enable rhythm stabilization for abnormal rhythm
|
161 |
+
correction_rhythm_stab_by_segment: bool = False # Enable segmentation by silence before rhythm stabilization
|
162 |
+
correction_rhythm_stab_segment_silence_s: float = 1.0 # Silence threshold (seconds) for segmenting MIDI
|
163 |
+
correction_quantize_level: str = "None" # Quantization level for note timing (e.g., "1/16", "None")
|
164 |
+
correction_velocity_mode: str = "None" # Velocity processing mode ("None", "Smooth", "Compress")
|
165 |
+
correction_velocity_smooth_factor: float = 0.5 # Smoothing factor for velocity processing
|
166 |
+
correction_velocity_compress_min: int = 30 # Minimum velocity after compression
|
167 |
+
correction_velocity_compress_max: int = 100 # Maximum velocity after compression
|
168 |
+
correction_rhythmic_simplification_level: str = "None" # rhythmic simplification
|
169 |
|
170 |
# 8-bit Synthesizer Settings
|
171 |
s8bit_waveform_type: str = 'Square'
|
|
|
231 |
s8bit_delay_lowpass_cutoff_hz: int = 5000 # Lowpass filter frequency for delay echoes (removes harsh high frequencies from echoes)
|
232 |
s8bit_delay_treble_pitch_shift: int = 0 # Pitch shift (in semitones) applied to high notes in delay echoes
|
233 |
|
|
|
|
|
|
|
234 |
|
235 |
+
# ===============================================================================
|
236 |
+
# === MIDI CORRECTION SUITE (Operating on pretty_midi objects for robustness) ===
|
237 |
+
# ===============================================================================
|
238 |
|
239 |
+
def _get_all_notes(midi_obj: pretty_midi.PrettyMIDI, include_drums=False):
|
240 |
+
"""Helper to get a single sorted list of all notes from all instruments."""
|
241 |
+
all_notes = []
|
242 |
+
for instrument in midi_obj.instruments:
|
243 |
+
if not instrument.is_drum or include_drums:
|
244 |
+
all_notes.extend(instrument.notes)
|
245 |
+
all_notes.sort(key=lambda x: x.start)
|
246 |
+
return all_notes
|
247 |
|
|
|
|
|
|
|
|
|
248 |
|
249 |
+
def _normalize_instrument_times(instrument: pretty_midi.Instrument):
|
250 |
+
"""Creates a temporary, normalized version of an instrument where timestamps start from 0."""
|
251 |
+
if not instrument.notes:
|
252 |
+
return instrument
|
253 |
|
254 |
+
# Sort notes by start time to reliably get the first note
|
255 |
+
notes = sorted(instrument.notes, key=lambda x: x.start)
|
256 |
+
start_offset = notes[0].start
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
257 |
|
258 |
+
normalized_instrument = copy.deepcopy(instrument)
|
259 |
+
for note in normalized_instrument.notes:
|
260 |
+
note.start -= start_offset
|
261 |
+
note.end -= start_offset
|
262 |
+
return normalized_instrument
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
263 |
|
264 |
+
def _segment_midi_by_silence(midi_obj: pretty_midi.PrettyMIDI, silence_threshold_s=1.0):
|
|
|
265 |
"""
|
266 |
+
Splits a PrettyMIDI object into a list of PrettyMIDI objects, each representing a segment.
|
267 |
+
This is the core of per-song processing for albums.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
268 |
"""
|
269 |
+
all_notes = _get_all_notes(midi_obj, include_drums=True)
|
270 |
+
if not all_notes:
|
271 |
+
return []
|
272 |
+
|
273 |
+
segments = []
|
274 |
+
current_segment_notes = {i: [] for i in range(len(midi_obj.instruments))}
|
|
|
|
|
|
|
|
|
275 |
|
276 |
+
# Add the very first note to the first segment
|
277 |
+
for i, inst in enumerate(midi_obj.instruments):
|
278 |
+
for note in inst.notes:
|
279 |
+
if note == all_notes[0]:
|
280 |
+
current_segment_notes[i].append(note)
|
281 |
+
break
|
282 |
+
|
283 |
+
for i in range(1, len(all_notes)):
|
284 |
+
prev_note_end = all_notes[i-1].end
|
285 |
+
current_note_start = all_notes[i].start
|
286 |
+
gap = current_note_start - prev_note_end
|
287 |
+
|
288 |
+
if gap > silence_threshold_s:
|
289 |
+
# End of a segment, create a new MIDI object for it
|
290 |
+
segment_midi = pretty_midi.PrettyMIDI()
|
291 |
+
for inst_idx, inst_notes in current_segment_notes.items():
|
292 |
+
if inst_notes:
|
293 |
+
new_inst = pretty_midi.Instrument(program=midi_obj.instruments[inst_idx].program, is_drum=midi_obj.instruments[inst_idx].is_drum)
|
294 |
+
new_inst.notes.extend(inst_notes)
|
295 |
+
segment_midi.instruments.append(new_inst)
|
296 |
+
if segment_midi.instruments:
|
297 |
+
segments.append(segment_midi)
|
298 |
+
# Start a new segment
|
299 |
+
current_segment_notes = {i: [] for i in range(len(midi_obj.instruments))}
|
300 |
+
|
301 |
+
# Find which instrument this note belongs to and add it
|
302 |
+
for inst_idx, inst in enumerate(midi_obj.instruments):
|
303 |
+
if all_notes[i] in inst.notes:
|
304 |
+
current_segment_notes[inst_idx].append(all_notes[i])
|
305 |
+
break
|
306 |
|
307 |
+
# Add the final segment
|
308 |
+
final_segment_midi = pretty_midi.PrettyMIDI()
|
309 |
+
for inst_idx, inst_notes in current_segment_notes.items():
|
310 |
+
if inst_notes:
|
311 |
+
new_inst = pretty_midi.Instrument(program=midi_obj.instruments[inst_idx].program, is_drum=midi_obj.instruments[inst_idx].is_drum)
|
312 |
+
new_inst.notes.extend(inst_notes)
|
313 |
+
final_segment_midi.instruments.append(new_inst)
|
314 |
+
if final_segment_midi.instruments:
|
315 |
+
segments.append(final_segment_midi)
|
316 |
+
|
317 |
+
return segments
|
318 |
+
|
319 |
+
def _recombine_segments(segments):
|
320 |
+
"""Merges a list of segmented PrettyMIDI objects back into one."""
|
321 |
+
recombined_midi = pretty_midi.PrettyMIDI()
|
322 |
+
# Create instrument tracks in the final MIDI object
|
323 |
+
if segments:
|
324 |
+
template_midi = segments[0]
|
325 |
+
for i, inst in enumerate(template_midi.instruments):
|
326 |
+
recombined_midi.instruments.append(pretty_midi.Instrument(program=inst.program, is_drum=inst.is_drum))
|
327 |
+
|
328 |
+
# Populate the tracks with notes from all segments
|
329 |
+
for segment in segments:
|
330 |
+
for i, inst in enumerate(segment.instruments):
|
331 |
+
# This assumes instrument order is consistent, which our segmentation function ensures
|
332 |
+
recombined_midi.instruments[i].notes.extend(inst.notes)
|
333 |
+
|
334 |
+
return recombined_midi
|
335 |
+
|
336 |
+
def _analyze_best_quantize_level(notes, bpm, error_threshold_ratio=0.25):
|
337 |
+
"""Analyzes a list of notes to determine the most likely quantization grid."""
|
338 |
+
if not notes: return "None"
|
339 |
+
grids_to_test = ["1/8", "1/12", "1/16", "1/24", "1/32"]
|
340 |
+
level_map = {"1/8": 2.0, "1/12": 3.0, "1/16": 4.0, "1/24": 6.0, "1/32": 8.0}
|
341 |
+
start_times = [n.start for n in notes]
|
342 |
+
results = []
|
343 |
+
for grid_name in grids_to_test:
|
344 |
+
division = level_map[grid_name]
|
345 |
+
grid_s = (60.0 / bpm) / division
|
346 |
+
if grid_s < 0.001: continue
|
347 |
+
total_error = sum(min(t % grid_s, grid_s - (t % grid_s)) for t in start_times)
|
348 |
+
avg_error = total_error / len(start_times)
|
349 |
+
results.append({"grid": grid_name, "avg_error": avg_error, "grid_s": grid_s})
|
350 |
+
if not results: return "None"
|
351 |
+
best_fit = min(results, key=lambda x: x['avg_error'])
|
352 |
+
if best_fit['avg_error'] > best_fit['grid_s'] * error_threshold_ratio:
|
353 |
+
return "None"
|
354 |
+
return best_fit['grid']
|
355 |
+
|
356 |
+
def filter_spurious_notes_pm(midi_obj: pretty_midi.PrettyMIDI, max_dur_s=0.05, max_vel=20):
|
357 |
+
"""Filters out very short and quiet notes from a PrettyMIDI object."""
|
358 |
+
print(f" - Filtering spurious notes (duration < {max_dur_s*1000:.0f}ms AND velocity < {max_vel})...")
|
359 |
+
notes_removed = 0
|
360 |
+
for instrument in midi_obj.instruments:
|
361 |
+
original_note_count = len(instrument.notes)
|
362 |
+
instrument.notes = [
|
363 |
+
note for note in instrument.notes
|
364 |
+
if not (note.end - note.start < max_dur_s and note.velocity < max_vel)
|
365 |
+
]
|
366 |
+
notes_removed += original_note_count - len(instrument.notes)
|
367 |
|
368 |
+
print(f" - Removed {notes_removed} spurious notes.")
|
369 |
+
return midi_obj
|
370 |
+
|
371 |
+
def stabilize_rhythm_pm(
|
372 |
+
midi_obj: pretty_midi.PrettyMIDI,
|
373 |
+
ioi_threshold_ratio=0.30,
|
374 |
+
min_ioi_s=0.03,
|
375 |
+
enable_segmentation=True,
|
376 |
+
silence_threshold_s=1.0,
|
377 |
+
merge_mode="extend", # "extend" or "drop"
|
378 |
+
consider_velocity=True, # consider low velocity notes as decorations
|
379 |
+
skip_chords=True, # skip merging if multiple notes start at same time
|
380 |
+
use_mode_ioi=False # use mode of IOI instead of median
|
381 |
+
):
|
382 |
+
"""Enhances rhythm stability by merging rhythmically unstable notes, with advanced options."""
|
383 |
+
print(" - Stabilizing rhythm...")
|
384 |
+
if not enable_segmentation:
|
385 |
+
segments = [midi_obj]
|
386 |
+
else:
|
387 |
+
segments = _segment_midi_by_silence(midi_obj, silence_threshold_s)
|
388 |
+
if len(segments) > 1:
|
389 |
+
print(f" - Split into {len(segments)} segments for stabilization.")
|
390 |
|
391 |
+
processed_segments = []
|
|
|
|
|
|
|
|
|
|
|
392 |
|
393 |
+
for segment in segments:
|
394 |
+
for instrument in segment.instruments:
|
395 |
+
if instrument.is_drum or len(instrument.notes) < 20:
|
396 |
+
continue
|
|
|
|
|
|
|
|
|
|
|
|
|
397 |
|
398 |
+
notes = sorted(instrument.notes, key=lambda n: n.start)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
399 |
|
400 |
+
# Compute inter-onset intervals (IOIs)
|
401 |
+
iois = [notes[i].start - notes[i-1].start for i in range(1, len(notes))]
|
402 |
+
positive_iois = [ioi for ioi in iois if ioi > 0.001]
|
403 |
+
if not positive_iois:
|
404 |
+
continue
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
405 |
|
406 |
+
# Determine threshold based on median or mode
|
407 |
+
if use_mode_ioi:
|
408 |
+
try:
|
409 |
+
median_ioi = float(stats.mode(positive_iois).mode[0])
|
410 |
+
except Exception:
|
411 |
+
median_ioi = np.median(positive_iois)
|
412 |
+
else:
|
413 |
+
median_ioi = np.median(positive_iois)
|
414 |
+
threshold_s = max(median_ioi * ioi_threshold_ratio, min_ioi_s)
|
415 |
+
|
416 |
+
cleaned_notes = [notes[0]]
|
417 |
+
for i in range(1, len(notes)):
|
418 |
+
prev_note = cleaned_notes[-1]
|
419 |
+
curr_note = notes[i]
|
420 |
+
|
421 |
+
# Skip merging if chord and option enabled
|
422 |
+
if skip_chords:
|
423 |
+
notes_at_same_time = [n for n in notes if abs(n.start - curr_note.start) < 0.001]
|
424 |
+
if len(notes_at_same_time) > 1:
|
425 |
+
cleaned_notes.append(curr_note)
|
426 |
+
continue
|
427 |
+
|
428 |
+
# Check if note is considered "unstable/decoration"
|
429 |
+
pitch_close = abs(curr_note.pitch - prev_note.pitch) <= 3 # within minor third
|
430 |
+
velocity_ok = True
|
431 |
+
if consider_velocity:
|
432 |
+
velocity_ok = curr_note.velocity < prev_note.velocity * 0.8
|
433 |
+
|
434 |
+
start_close = (curr_note.start - prev_note.start) < threshold_s
|
435 |
+
|
436 |
+
if start_close and pitch_close and velocity_ok:
|
437 |
+
if merge_mode == "extend":
|
438 |
+
# Merge by extending previous note's end
|
439 |
+
prev_note.end = max(prev_note.end, curr_note.end)
|
440 |
+
elif merge_mode == "drop":
|
441 |
+
# Drop the current note
|
442 |
+
continue
|
443 |
+
else:
|
444 |
+
cleaned_notes.append(curr_note)
|
445 |
+
|
446 |
+
instrument.notes = cleaned_notes
|
447 |
+
processed_segments.append(segment)
|
448 |
+
|
449 |
+
return _recombine_segments(processed_segments) if enable_segmentation else processed_segments[0]
|
450 |
+
|
451 |
+
|
452 |
+
def simplify_rhythm_pm(
|
453 |
+
midi_obj: pretty_midi.PrettyMIDI,
|
454 |
+
simplification_level_str="None",
|
455 |
+
enable_segmentation=True,
|
456 |
+
silence_threshold_s=1.0,
|
457 |
+
keep_chords=True,
|
458 |
+
max_notes_per_grid=3
|
459 |
+
):
|
460 |
+
"""Simplifies rhythm while preserving music length, with optional chord and sustain handling."""
|
461 |
+
if simplification_level_str == "None":
|
462 |
+
return midi_obj
|
463 |
+
print(f" - Simplifying rhythm to {simplification_level_str} grid...")
|
464 |
+
|
465 |
+
# Split into segments if enabled
|
466 |
+
if not enable_segmentation:
|
467 |
+
segments = [midi_obj]
|
468 |
+
else:
|
469 |
+
segments = _segment_midi_by_silence(midi_obj, silence_threshold_s)
|
470 |
+
if len(segments) > 1:
|
471 |
+
print(f" - Split into {len(segments)} segments for simplification.")
|
472 |
|
473 |
+
processed_segments = []
|
474 |
+
level_map = {"1/4": 1.0, "1/8": 2.0, "1/12": 3.0, "1/16": 4.0, "1/24": 6.0, "1/32": 8.0, "1/64": 16.0}
|
475 |
+
division = level_map.get(simplification_level_str)
|
476 |
+
if not division:
|
477 |
+
return midi_obj
|
|
|
478 |
|
479 |
+
for segment in segments:
|
480 |
+
new_segment_midi = pretty_midi.PrettyMIDI()
|
481 |
+
for instrument in segment.instruments:
|
482 |
+
if instrument.is_drum or not instrument.notes:
|
483 |
+
new_segment_midi.instruments.append(instrument)
|
484 |
+
continue
|
485 |
|
486 |
+
try:
|
487 |
+
# Prefer using tempo changes from MIDI if available
|
488 |
+
if segment.get_tempo_changes()[1].size > 0:
|
489 |
+
bpm = float(segment.get_tempo_changes()[1][0])
|
490 |
+
else:
|
491 |
+
temp_norm_inst = _normalize_instrument_times(instrument)
|
492 |
+
temp_midi = pretty_midi.PrettyMIDI(); temp_midi.instruments.append(temp_norm_inst)
|
493 |
+
bpm = temp_midi.estimate_tempo()
|
494 |
+
bpm = max(40.0, min(bpm, 240.0))
|
495 |
+
except Exception:
|
496 |
+
new_segment_midi.instruments.append(instrument)
|
497 |
+
continue
|
498 |
+
|
499 |
+
grid_s = (60.0 / bpm) / division
|
500 |
+
if grid_s <= 0.001:
|
501 |
+
new_segment_midi.instruments.append(instrument)
|
502 |
+
continue
|
503 |
|
504 |
+
simplified_instrument = pretty_midi.Instrument(program=instrument.program, name=instrument.name)
|
505 |
+
notes = sorted(instrument.notes, key=lambda x: x.start)
|
506 |
+
end_time = segment.get_end_time()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
507 |
|
508 |
+
# Handle sustain pedal CC64 events
|
509 |
+
sustain_times = []
|
510 |
+
for cc in instrument.control_changes:
|
511 |
+
if cc.number == 64: # sustain pedal
|
512 |
+
sustain_times.append((cc.time, cc.value >= 64))
|
513 |
|
514 |
+
# Grid iteration
|
515 |
+
current_grid_time = round(notes[0].start / grid_s) * grid_s
|
516 |
+
while current_grid_time < end_time:
|
517 |
+
notes_in_slot = [n for n in notes if current_grid_time <= n.start < current_grid_time + grid_s]
|
518 |
+
if notes_in_slot:
|
519 |
+
chosen_notes = []
|
520 |
+
if keep_chords:
|
521 |
+
# Always keep root (lowest pitch) and top note (highest pitch)
|
522 |
+
root_note = min(notes_in_slot, key=lambda n: n.pitch)
|
523 |
+
top_note = max(notes_in_slot, key=lambda n: n.pitch)
|
524 |
+
chosen_notes.extend([root_note, top_note])
|
525 |
+
# Also keep the strongest note (highest velocity)
|
526 |
+
strong_note = max(notes_in_slot, key=lambda n: n.velocity)
|
527 |
+
if strong_note not in chosen_notes:
|
528 |
+
chosen_notes.append(strong_note)
|
529 |
+
# Limit chord density
|
530 |
+
chosen_notes = sorted(set(chosen_notes), key=lambda n: n.pitch)[:max_notes_per_grid]
|
531 |
+
else:
|
532 |
+
chosen_notes = [max(notes_in_slot, key=lambda n: n.velocity)]
|
533 |
+
|
534 |
+
for note in chosen_notes:
|
535 |
+
# End is either original note end or grid boundary
|
536 |
+
note_end = min(note.end, current_grid_time + grid_s)
|
537 |
+
# Extend if sustain pedal is active
|
538 |
+
for t, active in sustain_times:
|
539 |
+
if t >= note.start and active:
|
540 |
+
note_end = max(note_end, current_grid_time + grid_s * 2)
|
541 |
+
simplified_instrument.notes.append(pretty_midi.Note(
|
542 |
+
velocity=note.velocity,
|
543 |
+
pitch=note.pitch,
|
544 |
+
start=current_grid_time,
|
545 |
+
end=note_end
|
546 |
+
))
|
547 |
+
current_grid_time += grid_s
|
548 |
+
|
549 |
+
if simplified_instrument.notes:
|
550 |
+
new_segment_midi.instruments.append(simplified_instrument)
|
551 |
+
processed_segments.append(new_segment_midi)
|
552 |
+
|
553 |
+
return _recombine_segments(processed_segments) if enable_segmentation else processed_segments[0]
|
554 |
+
|
555 |
+
|
556 |
+
def quantize_pm(
|
557 |
+
midi_obj: pretty_midi.PrettyMIDI,
|
558 |
+
quantize_level_str="None",
|
559 |
+
enable_segmentation=True,
|
560 |
+
silence_threshold_s=1.0,
|
561 |
+
quantize_end=True,
|
562 |
+
preserve_duration=True
|
563 |
+
):
|
564 |
+
"""Quantizes notes in a PrettyMIDI object with optional end-time adjustment, sustain handling, and segmentation support."""
|
565 |
+
if quantize_level_str == "None":
|
566 |
+
return midi_obj
|
567 |
+
print(f" - Quantizing notes (Mode: {quantize_level_str})...")
|
568 |
+
|
569 |
+
# Split into segments if enabled
|
570 |
+
if not enable_segmentation:
|
571 |
+
segments = [midi_obj]
|
572 |
else:
|
573 |
+
segments = _segment_midi_by_silence(midi_obj, silence_threshold_s)
|
574 |
+
if len(segments) > 1:
|
575 |
+
print(f" - Split into {len(segments)} segments for quantization.")
|
576 |
|
577 |
+
processed_segments = []
|
578 |
+
level_map = {"1/4": 1.0, "1/8": 2.0, "1/12": 3.0, "1/16": 4.0, "1/24": 6.0, "1/32": 8.0, "1/64": 16.0}
|
|
|
579 |
|
580 |
for i, segment in enumerate(segments):
|
581 |
+
new_segment_midi = pretty_midi.PrettyMIDI()
|
582 |
+
for instrument in segment.instruments:
|
583 |
+
if instrument.is_drum or not instrument.notes:
|
584 |
+
new_segment_midi.instruments.append(instrument)
|
585 |
+
continue
|
586 |
+
try:
|
587 |
+
# Estimate BPM or use first tempo change
|
588 |
+
if segment.get_tempo_changes()[1].size > 0:
|
589 |
+
bpm = float(segment.get_tempo_changes()[1][0])
|
590 |
+
else:
|
591 |
+
temp_norm_inst = _normalize_instrument_times(instrument)
|
592 |
+
temp_midi = pretty_midi.PrettyMIDI(); temp_midi.instruments.append(temp_norm_inst)
|
593 |
+
bpm = temp_midi.estimate_tempo()
|
594 |
+
bpm = max(40.0, min(bpm, 240.0))
|
595 |
+
except Exception:
|
596 |
+
new_segment_midi.instruments.append(instrument)
|
597 |
+
continue
|
598 |
+
|
599 |
+
# Determine quantization grid size
|
600 |
+
final_quantize_level = quantize_level_str
|
601 |
+
if quantize_level_str == "Auto-Analyze Rhythm":
|
602 |
+
final_quantize_level = _analyze_best_quantize_level(instrument.notes, bpm)
|
603 |
+
if len(segments) > 1:
|
604 |
+
print(f" - Segment {i+1}, Inst '{instrument.name}': Auto-analyzed grid is '{final_quantize_level}'. BPM: {bpm:.2f}")
|
605 |
+
|
606 |
+
division = level_map.get(final_quantize_level)
|
607 |
+
if not division:
|
608 |
+
new_segment_midi.instruments.append(instrument)
|
609 |
+
continue
|
610 |
+
grid_s = (60.0 / bpm) / division
|
611 |
+
|
612 |
+
# Handle sustain pedal CC64
|
613 |
+
sustain_times = []
|
614 |
+
for cc in instrument.control_changes:
|
615 |
+
if cc.number == 64: # sustain pedal
|
616 |
+
sustain_times.append((cc.time, cc.value >= 64))
|
617 |
+
|
618 |
+
# Quantize notes
|
619 |
+
quantized_instrument = pretty_midi.Instrument(program=instrument.program, name=instrument.name)
|
620 |
+
for note in instrument.notes:
|
621 |
+
original_duration = note.end - note.start
|
622 |
+
# Quantize start
|
623 |
+
new_start = round(note.start / grid_s) * grid_s
|
624 |
+
if preserve_duration:
|
625 |
+
new_end = new_start + original_duration
|
626 |
+
elif quantize_end:
|
627 |
+
new_end = round(note.end / grid_s) * grid_s
|
628 |
+
else:
|
629 |
+
new_end = note.end
|
630 |
+
|
631 |
+
# Sustain pedal extension
|
632 |
+
for t, active in sustain_times:
|
633 |
+
if t >= note.start and active:
|
634 |
+
new_end = max(new_end, new_start + grid_s * 2)
|
635 |
+
|
636 |
+
# Safety check
|
637 |
+
if new_end <= new_start:
|
638 |
+
new_end = new_start + grid_s * 0.5
|
639 |
+
|
640 |
+
quantized_instrument.notes.append(pretty_midi.Note(
|
641 |
+
velocity=note.velocity,
|
642 |
+
pitch=note.pitch,
|
643 |
+
start=new_start,
|
644 |
+
end=new_end
|
645 |
+
))
|
646 |
+
|
647 |
+
new_segment_midi.instruments.append(quantized_instrument)
|
648 |
+
processed_segments.append(new_segment_midi)
|
649 |
+
|
650 |
+
return _recombine_segments(processed_segments) if enable_segmentation else processed_segments[0]
|
651 |
+
|
652 |
+
|
653 |
+
def process_velocity_pm(
|
654 |
+
midi_obj: pretty_midi.PrettyMIDI,
|
655 |
+
mode=["None"], # list of modes: "Smooth", "Compress"
|
656 |
+
smooth_factor=0.5, # weight for smoothing
|
657 |
+
compress_min=30,
|
658 |
+
compress_max=100,
|
659 |
+
compress_type="linear", # "linear" or "perceptual"
|
660 |
+
inplace=True # if False, return a copy
|
661 |
+
):
|
662 |
+
"""Applies velocity processing to a PrettyMIDI object with smoothing and/or compression."""
|
663 |
+
if not inplace:
|
664 |
+
import copy
|
665 |
+
midi_obj = copy.deepcopy(midi_obj)
|
666 |
+
|
667 |
+
if isinstance(mode, str):
|
668 |
+
mode = [mode]
|
669 |
+
if "None" in mode or not mode:
|
670 |
+
return midi_obj
|
671 |
+
|
672 |
+
print(f" - Processing velocities (Mode: {mode})...")
|
673 |
+
|
674 |
+
for instrument in midi_obj.instruments:
|
675 |
+
if instrument.is_drum or not instrument.notes:
|
676 |
continue
|
677 |
|
678 |
+
velocities = [n.velocity for n in instrument.notes]
|
679 |
+
|
680 |
+
# Smooth velocity
|
681 |
+
if "Smooth" in mode:
|
682 |
+
new_velocities = list(velocities)
|
683 |
+
n_notes = len(velocities)
|
684 |
+
for i in range(n_notes):
|
685 |
+
if i == 0:
|
686 |
+
neighbor_avg = velocities[i+1]
|
687 |
+
elif i == n_notes - 1:
|
688 |
+
neighbor_avg = velocities[i-1]
|
689 |
+
else:
|
690 |
+
neighbor_avg = (velocities[i-1] + velocities[i+1]) / 2.0
|
691 |
+
smoothed_vel = velocities[i] * (1 - smooth_factor) + neighbor_avg * smooth_factor
|
692 |
+
new_velocities[i] = int(max(1, min(127, smoothed_vel)))
|
693 |
+
for i, note in enumerate(instrument.notes):
|
694 |
+
note.velocity = new_velocities[i]
|
695 |
+
|
696 |
+
# Compress velocity
|
697 |
+
if "Compress" in mode:
|
698 |
+
velocities = [n.velocity for n in instrument.notes] # updated if smoothed first
|
699 |
+
min_vel, max_vel = min(velocities), max(velocities)
|
700 |
+
if max_vel == min_vel:
|
701 |
+
continue
|
702 |
|
703 |
+
for note in instrument.notes:
|
704 |
+
if compress_type == "linear":
|
705 |
+
new_vel = compress_min + (note.velocity - min_vel) * (compress_max - compress_min) / (max_vel - min_vel)
|
706 |
+
elif compress_type == "perceptual":
|
707 |
+
# Simple gamma-style perceptual compression
|
708 |
+
norm = (note.velocity - min_vel) / (max_vel - min_vel)
|
709 |
+
gamma = 0.6 # perceptual curve
|
710 |
+
new_vel = compress_min + ((norm ** gamma) * (compress_max - compress_min))
|
711 |
+
else:
|
712 |
+
new_vel = note.velocity
|
713 |
+
note.velocity = int(max(1, min(127, new_vel)))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
714 |
|
715 |
+
return midi_obj
|
|
|
716 |
|
|
|
|
|
717 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
718 |
|
719 |
+
# =================================================================================================
|
720 |
+
# === Helper Functions ===
|
721 |
+
# =================================================================================================
|
722 |
|
723 |
def analyze_audio_for_adaptive_params(audio_data: np.ndarray, sample_rate: int):
|
724 |
"""
|
|
|
2209 |
print(f"Render type: {params.render_type}")
|
2210 |
print(f"Soundfont bank: {params.soundfont_bank}")
|
2211 |
print(f"Audio render sample rate: {params.render_sample_rate}")
|
|
|
2212 |
print('=' * 70)
|
2213 |
+
|
2214 |
+
##################################
|
2215 |
+
|
2216 |
+
# --- FLOW STEP 1: Apply MIDI Post-Processing & Correction Suite ---
|
2217 |
+
if getattr(params, 'enable_midi_corrections', False):
|
2218 |
+
print("Applying MIDI Post-Processing & Corrections (on pretty_midi object)...")
|
2219 |
+
|
2220 |
+
# --- FLOW STEP 2: Load into pretty_midi for corrections ---
|
2221 |
+
try:
|
2222 |
+
midi_obj = pretty_midi.PrettyMIDI(io.BytesIO(fdata))
|
2223 |
+
print("Successfully loaded MIDI into pretty_midi for corrections.")
|
2224 |
+
except Exception as e:
|
2225 |
+
print(f"Fatal Error: Could not load the input MIDI with pretty_midi. Cannot proceed. Error: {e}")
|
2226 |
+
return ("N/A", fn1, f"MIDI file is corrupted or in an unsupported format. Error: {e}", None, None, None, "MIDI Load Error")
|
2227 |
+
|
2228 |
+
# Get common segmentation parameters
|
2229 |
+
enable_segmentation = getattr(params, 'correction_rhythm_stab_by_segment', True)
|
2230 |
+
silence_threshold_s = getattr(params, 'correction_rhythm_stab_segment_silence_s', 1.0)
|
2231 |
+
|
2232 |
+
# Correction Order: Filter -> Stabilize -> Simplify -> Quantize -> Velocity
|
2233 |
+
|
2234 |
+
# 1. Filter spurious notes (does not need segmentation)
|
2235 |
+
if getattr(params, 'correction_filter_spurious_notes', False):
|
2236 |
+
midi_obj = filter_spurious_notes_pm(
|
2237 |
+
midi_obj,
|
2238 |
+
max_dur_s=getattr(params, 'correction_spurious_duration_ms', 50) / 1000.0,
|
2239 |
+
max_vel=getattr(params, 'correction_spurious_velocity', 20)
|
2240 |
+
)
|
2241 |
+
|
2242 |
+
# 2. Stabilize rhythm
|
2243 |
+
if getattr(params, 'correction_remove_abnormal_rhythm', False):
|
2244 |
+
midi_obj = stabilize_rhythm_pm(
|
2245 |
+
midi_obj,
|
2246 |
+
enable_segmentation=enable_segmentation,
|
2247 |
+
silence_threshold_s=silence_threshold_s
|
2248 |
+
)
|
2249 |
+
|
2250 |
+
# 3. Simplify rhythm
|
2251 |
+
simplification_level = getattr(params, 'correction_rhythmic_simplification_level', "None")
|
2252 |
+
if simplification_level != "None":
|
2253 |
+
midi_obj = simplify_rhythm_pm(
|
2254 |
+
midi_obj,
|
2255 |
+
simplification_level_str=simplification_level,
|
2256 |
+
enable_segmentation=enable_segmentation,
|
2257 |
+
silence_threshold_s=silence_threshold_s
|
2258 |
+
)
|
2259 |
+
|
2260 |
+
# 4. Quantize rhythm
|
2261 |
+
quantize_level = getattr(params, 'correction_quantize_level', "None")
|
2262 |
+
if quantize_level != "None":
|
2263 |
+
midi_obj = quantize_pm(
|
2264 |
+
midi_obj,
|
2265 |
+
quantize_level_str=quantize_level,
|
2266 |
+
enable_segmentation=enable_segmentation,
|
2267 |
+
silence_threshold_s=silence_threshold_s
|
2268 |
+
)
|
2269 |
+
|
2270 |
+
# 5. Process velocity (does not need segmentation)
|
2271 |
+
velocity_mode = getattr(params, 'correction_velocity_mode', "None")
|
2272 |
+
if velocity_mode != "None":
|
2273 |
+
midi_obj = process_velocity_pm(
|
2274 |
+
midi_obj,
|
2275 |
+
mode=[velocity_mode],
|
2276 |
+
smooth_factor=getattr(params, 'correction_velocity_smooth_factor', 0.5),
|
2277 |
+
compress_min=getattr(params, 'correction_velocity_compress_min', 30),
|
2278 |
+
compress_max=getattr(params, 'correction_velocity_compress_max', 100)
|
2279 |
+
)
|
2280 |
+
|
2281 |
+
# --- FLOW STEP 3: Convert the corrected pretty_midi object back to binary data ---
|
2282 |
+
corrected_midi_io = io.BytesIO()
|
2283 |
+
midi_obj.write(corrected_midi_io)
|
2284 |
+
fdata = corrected_midi_io.getvalue()
|
2285 |
+
|
2286 |
+
print("Corrections finished.")
|
2287 |
+
print('=' * 70)
|
2288 |
|
2289 |
+
##################################
|
2290 |
+
|
2291 |
# --- MIDI Processing using TMIDIX ---
|
2292 |
print('Processing MIDI... Please wait...')
|
2293 |
raw_score = MIDI.midi2single_track_ms_score(fdata)
|
|
|
2434 |
o[1] *= 200
|
2435 |
o[2] *= 200
|
2436 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2437 |
# --- Saving Processed MIDI File ---
|
2438 |
# Save the transformed MIDI data
|
2439 |
SONG, patches, _ = TMIDIX.patch_enhanced_score_notes(output_score)
|
|
|
4338 |
correction_remove_abnormal_rhythm = gr.Checkbox(label="Stabilize Rhythm (for Pitch Bend)", value=False,
|
4339 |
info="Attempts to merge overly dense, rhythmically unstable notes often created when 'Allow Multiple Pitch Bends' is used. This can clean up the rhythm but may lose some pitch slide nuance.")
|
4340 |
with gr.Group(visible=False) as rhythm_stab_options: # This group is initially hidden
|
4341 |
+
correction_rhythm_stab_by_segment = gr.Checkbox(label="Enable Segmentation by Silence", value=True,
|
4342 |
info="Highly recommended for albums or long files. Splits the MIDI by silent parts before stabilizing rhythm, ensuring accuracy for songs with different tempos.")
|
4343 |
correction_rhythm_stab_segment_silence_s = gr.Slider(minimum=0.5, maximum=10.0, value=1.0, step=0.5,
|
4344 |
label="Silence Threshold for Segmentation (seconds)",
|
4345 |
info="The amount of silence required to start a new segment. 1-3 seconds is usually enough to separate songs on an album.")
|
4346 |
+
# --- Rhythmic Simplification Group ---
|
4347 |
+
with gr.Group():
|
4348 |
+
correction_rhythmic_simplification_level = gr.Dropdown(
|
4349 |
+
["None", "1/16", "1/12", "1/8", "1/4"],
|
4350 |
+
value="None",
|
4351 |
+
label="Simplify Rhythm (Experimental)",
|
4352 |
+
info="WARNING: This is a destructive process that removes notes to slow down the rhythm. Select a target grid; only the most important note within each grid cell will be kept and its duration extended."
|
4353 |
+
)
|
4354 |
# --- Quantization Group ---
|
4355 |
with gr.Group():
|
4356 |
correction_quantize_level = gr.Dropdown(
|
4357 |
+
["None", "Auto-Analyze Rhythm", "1/64", "1/32", "1/16", "1/8", "1/4", "1/24", "1/12"],
|
4358 |
value="None",
|
4359 |
label="Quantize Rhythm",
|
4360 |
+
info="Quantizes notes to the nearest rhythmic grid line. '1/16' is recommended for most pop and rock music. For expressive genres like classical or jazz, use with caution as it may reduce natural timing nuances. Straight divisions (1/8, 1/16, etc.) suit most modern music, while swing divisions (1/12, 1/24) are ideal for jazz, blues, or shuffle styles. 'Auto-Analyze' is highly recommended for albums or files with mixed tempos, as it will automatically determine the best grid (straight or swing) for each song segment."
|
4361 |
)
|
4362 |
# --- Velocity Processing Group ---
|
4363 |
with gr.Group():
|