Audio-To-MIDI-And-Advanced-Renderer

Running

App Files Files Community

avans06 commited on Aug 14

Commit

7339550

1 Parent(s): d80a2c7

feat: Add detailed progress bars for single and batch jobs

Browse files

Files changed (1) hide show

app.py +51 -9

app.py CHANGED Viewed

@@ -1158,17 +1158,28 @@ def _transcribe_stem(audio_path: str, base_name: str, temp_dir: str, params: App
 # --- The core processing engine for a single file ---
-def run_single_file_pipeline(input_file_path: str, timestamp: str, params: AppParameters):
     """
     This is the main processing engine. It takes a file path and a dictionary of all settings,
     and performs the full pipeline: load, separate, transcribe, render, re-merge.
     It is UI-agnostic and returns file paths and data, not Gradio updates.
     """
     # --- Start timer for this specific file ---
     file_start_time = reqtime.time()
     filename = os.path.basename(input_file_path)
     base_name = os.path.splitext(filename)[0]
     print(f"\n{'='*20} Starting Pipeline for: {filename} {'='*20}")
     # --- Use the provided timestamp for unique filenames ---
@@ -1179,7 +1190,9 @@ def run_single_file_pipeline(input_file_path: str, timestamp: str, params: AppPa
     other_part_sr = None
     # --- Step 1: Check file type and transcribe if necessary ---
-    if filename.lower().endswith(('.mid', '.midi', '.kar')):
         print("MIDI file detected. Skipping transcription. Proceeding directly to rendering.")
         midi_path_for_rendering = input_file_path
     else:
@@ -1187,6 +1200,7 @@ def run_single_file_pipeline(input_file_path: str, timestamp: str, params: AppPa
         os.makedirs(temp_dir, exist_ok=True)
         # --- Audio Loading ---
         print("Audio file detected. Starting pre-processing...")
         # --- Robust audio loading with ffmpeg fallback ---
         try:
@@ -1196,6 +1210,7 @@ def run_single_file_pipeline(input_file_path: str, timestamp: str, params: AppPa
             audio_tensor, native_sample_rate = torchaudio.load(input_file_path)
             print("Torchaudio loading successful.")
         except Exception as e:
             print(f"Torchaudio failed: {e}. Attempting fallback with ffmpeg...")
             try:
                 # Define a path for the temporary converted file
@@ -1224,9 +1239,12 @@ def run_single_file_pipeline(input_file_path: str, timestamp: str, params: AppPa
             # --- Standard Workflow: Transcribe the original full audio ---
             audio_to_transcribe_path = os.path.join(temp_dir, f"{timestamped_base_name}_original.flac")
             torchaudio.save(audio_to_transcribe_path, audio_tensor, native_sample_rate)
             midi_path_for_rendering = _transcribe_stem(audio_to_transcribe_path, f"{timestamped_base_name}_original", temp_dir, params)
         else:
             # --- Vocal Separation Workflow ---
             # Convert to a common format (stereo, float32) that demucs expects
             audio_tensor = convert_audio(audio_tensor, native_sample_rate, demucs_model.samplerate, demucs_model.audio_channels)
@@ -1282,18 +1300,22 @@ def run_single_file_pipeline(input_file_path: str, timestamp: str, params: AppPa
             # --- Main Branching Logic: Transcribe one or both stems ---
             if not params.transcribe_both_stems:
                 print(f"Transcribing primary target only: {os.path.basename(primary_target_path)}")
                 midi_path_for_rendering = _transcribe_stem(primary_target_path, os.path.splitext(os.path.basename(primary_target_path))[0], temp_dir, params)
             else:
                 print("Transcribing BOTH stems and merging the MIDI results.")
                 # Transcribe the primary target
                 midi_path_primary = _transcribe_stem(primary_target_path, os.path.splitext(os.path.basename(primary_target_path))[0], temp_dir, params)
                 # Transcribe the other part
                 midi_path_other = _transcribe_stem(other_part_path, os.path.splitext(os.path.basename(other_part_path))[0], temp_dir, params)
                 # Merge the two resulting MIDI files
                 if midi_path_primary and midi_path_other:
                     final_merged_midi_path = os.path.join(temp_dir, f"{base_name}_full_transcription.mid")
                     print(f"Merging transcribed MIDI files into {os.path.basename(final_merged_midi_path)}")
@@ -1319,10 +1341,13 @@ def run_single_file_pipeline(input_file_path: str, timestamp: str, params: AppPa
         return None
     # --- Step 2: Render the FINAL MIDI file with selected options ---
     # --- Auto-Recommendation Logic ---
     # If the user selected the auto-recommend option, override the parameters
     if params.s8bit_preset_selector == "Auto-Recommend (Analyze MIDI)":
         print("Auto-Recommendation is enabled. Analyzing MIDI features...")
         try:
             midi_to_analyze = pretty_midi.PrettyMIDI(midi_path_for_rendering)
@@ -1337,13 +1362,16 @@ def run_single_file_pipeline(input_file_path: str, timestamp: str, params: AppPa
         except Exception as e:
             print(f"Could not auto-recommend parameters for {filename}: {e}.")
     print(f"Proceeding to render MIDI file: {os.path.basename(midi_path_for_rendering)}")
     # Call the rendering function, Pass dictionaries directly to Render_MIDI
     results_tuple = Render_MIDI(input_midi_path=midi_path_for_rendering, params=params)
     # --- Vocal Re-merging Logic ---
     if params.separate_vocals and params.remerge_vocals and not params.transcribe_both_stems and other_part_tensor is not None:
         print(f"Re-merging the non-transcribed part with newly rendered music...")
         # 1. Unpack the original rendered audio from the results
@@ -1387,6 +1415,7 @@ def run_single_file_pipeline(input_file_path: str, timestamp: str, params: AppPa
         print("Re-merging complete.")
     # --- Save final audio and return path ---
     final_srate, final_audio_data = results_tuple[4]
     final_midi_path_from_render = results_tuple[3] # Get the path of the processed MIDI
@@ -1421,6 +1450,7 @@ def run_single_file_pipeline(input_file_path: str, timestamp: str, params: AppPa
         "plot": results_tuple[5],
         "description": results_tuple[6]
     }
     # Return both the results and the final state of the parameters object
     return results, params
@@ -1430,10 +1460,10 @@ def run_single_file_pipeline(input_file_path: str, timestamp: str, params: AppPa
 # =================================================================================================
 # --- Thin wrapper for batch processing ---
-def batch_process_files(input_files, progress=gr.Progress(), *args):
     """
-    Gradio wrapper for batch processing. It packs all UI values into an AppParameters object.
-    It iterates through files, calls the core pipeline, and collects the output file paths.
     """
     if not input_files:
@@ -1458,10 +1488,21 @@ def batch_process_files(input_files, progress=gr.Progress(), *args):
     for i, file_obj in enumerate(input_files):
         # The input from gr.File is a tempfile object, we need its path
         input_path = file_obj.name
         progress(i / total_files, desc=f"Processing {os.path.basename(input_path)} ({i+1}/{total_files})")
         # --- Pass the batch_timestamp to the pipeline ---
-        results, _ = run_single_file_pipeline(input_path, batch_timestamp, params)
         if results:
             if results.get("final_audio_path"):
@@ -1482,12 +1523,13 @@ def batch_process_files(input_files, progress=gr.Progress(), *args):
 # --- The original function is now a thin wrapper for the single file UI ---
-def process_and_render_file(input_file, *args):
     """
     Gradio wrapper for the single file processing UI. Packs UI values into an AppParameters object.
     Calls the core pipeline and formats the output for all UI components.
     Main function to handle file processing. It determines the file type and calls the
     appropriate functions for transcription and/or rendering based on user selections.
     """
     if input_file is None:
         # Return a list of updates to clear all output fields and UI controls
@@ -1503,8 +1545,8 @@ def process_and_render_file(input_file, *args):
     # The first value in *args is s8bit_preset_selector, the rest match the keys
     params = AppParameters(input_file=input_file, **dict(zip(ALL_PARAM_KEYS, args)))
-    # Run the core pipeline, Pass the timestamp to the pipeline
-    results, final_params = run_single_file_pipeline(input_file, single_file_timestamp, params)
     if results is None:
         raise gr.Error("File processing failed. Check console for details.")

 # --- The core processing engine for a single file ---
+def run_single_file_pipeline(input_file_path: str, timestamp: str, params: AppParameters, progress: gr.Progress = None):
     """
     This is the main processing engine. It takes a file path and a dictionary of all settings,
     and performs the full pipeline: load, separate, transcribe, render, re-merge.
     It is UI-agnostic and returns file paths and data, not Gradio updates.
+    It now accepts a Gradio Progress object to report granular progress.
     """
+    # Helper function to safely update progress
+    def update_progress(fraction, desc):
+        if progress:
+            progress(fraction, desc=desc)
     # --- Start timer for this specific file ---
     file_start_time = reqtime.time()
     filename = os.path.basename(input_file_path)
     base_name = os.path.splitext(filename)[0]
+    # --- Determine file type to select the correct progress timeline ---
+    is_midi_input = filename.lower().endswith(('.mid', '.midi', '.kar'))
+    update_progress(0, f"Starting: {filename}")
     print(f"\n{'='*20} Starting Pipeline for: {filename} {'='*20}")
     # --- Use the provided timestamp for unique filenames ---
     other_part_sr = None
     # --- Step 1: Check file type and transcribe if necessary ---
+    if is_midi_input:
+        # For MIDI files, we start at 0% and directly proceed to the rendering steps.
+        update_progress(0, "MIDI file detected, skipping transcription...")
         print("MIDI file detected. Skipping transcription. Proceeding directly to rendering.")
         midi_path_for_rendering = input_file_path
     else:
         os.makedirs(temp_dir, exist_ok=True)
         # --- Audio Loading ---
+        update_progress(0.1, "Audio file detected, loading...")
         print("Audio file detected. Starting pre-processing...")
         # --- Robust audio loading with ffmpeg fallback ---
         try:
             audio_tensor, native_sample_rate = torchaudio.load(input_file_path)
             print("Torchaudio loading successful.")
         except Exception as e:
+            update_progress(0.15, "Torchaudio failed, trying ffmpeg...")
             print(f"Torchaudio failed: {e}. Attempting fallback with ffmpeg...")
             try:
                 # Define a path for the temporary converted file
             # --- Standard Workflow: Transcribe the original full audio ---
             audio_to_transcribe_path = os.path.join(temp_dir, f"{timestamped_base_name}_original.flac")
             torchaudio.save(audio_to_transcribe_path, audio_tensor, native_sample_rate)
+            update_progress(0.2, "Transcribing audio to MIDI...")
             midi_path_for_rendering = _transcribe_stem(audio_to_transcribe_path, f"{timestamped_base_name}_original", temp_dir, params)
         else:
             # --- Vocal Separation Workflow ---
+            update_progress(0.2, "Separating vocals with Demucs...")
             # Convert to a common format (stereo, float32) that demucs expects
             audio_tensor = convert_audio(audio_tensor, native_sample_rate, demucs_model.samplerate, demucs_model.audio_channels)
             # --- Main Branching Logic: Transcribe one or both stems ---
             if not params.transcribe_both_stems:
                 print(f"Transcribing primary target only: {os.path.basename(primary_target_path)}")
+                update_progress(0.4, f"Transcribing primary target: {os.path.basename(primary_target_path)}")
                 midi_path_for_rendering = _transcribe_stem(primary_target_path, os.path.splitext(os.path.basename(primary_target_path))[0], temp_dir, params)
             else:
                 print("Transcribing BOTH stems and merging the MIDI results.")
                 # Transcribe the primary target
+                update_progress(0.4, "Transcribing primary stem...")
                 midi_path_primary = _transcribe_stem(primary_target_path, os.path.splitext(os.path.basename(primary_target_path))[0], temp_dir, params)
                 # Transcribe the other part
+                update_progress(0.5, "Transcribing second stem...")
                 midi_path_other = _transcribe_stem(other_part_path, os.path.splitext(os.path.basename(other_part_path))[0], temp_dir, params)
                 # Merge the two resulting MIDI files
                 if midi_path_primary and midi_path_other:
+                    update_progress(0.55, "Merging transcribed MIDIs...")
                     final_merged_midi_path = os.path.join(temp_dir, f"{base_name}_full_transcription.mid")
                     print(f"Merging transcribed MIDI files into {os.path.basename(final_merged_midi_path)}")
         return None
     # --- Step 2: Render the FINAL MIDI file with selected options ---
+    # The progress values are now conditional based on the input file type.
+    update_progress(0.1 if is_midi_input else 0.6, "Applying MIDI transformations...")
     # --- Auto-Recommendation Logic ---
     # If the user selected the auto-recommend option, override the parameters
     if params.s8bit_preset_selector == "Auto-Recommend (Analyze MIDI)":
+        update_progress(0.15 if is_midi_input else 0.65, "Auto-recommending 8-bit parameters...")
         print("Auto-Recommendation is enabled. Analyzing MIDI features...")
         try:
             midi_to_analyze = pretty_midi.PrettyMIDI(midi_path_for_rendering)
         except Exception as e:
             print(f"Could not auto-recommend parameters for {filename}: {e}.")
+    update_progress(0.2 if is_midi_input else 0.7, "Rendering MIDI to audio...")
     print(f"Proceeding to render MIDI file: {os.path.basename(midi_path_for_rendering)}")
     # Call the rendering function, Pass dictionaries directly to Render_MIDI
     results_tuple = Render_MIDI(input_midi_path=midi_path_for_rendering, params=params)
     # --- Vocal Re-merging Logic ---
+    # Vocal Re-merging only happens for audio files, so its progress value doesn't need to be conditional.
     if params.separate_vocals and params.remerge_vocals and not params.transcribe_both_stems and other_part_tensor is not None:
+        update_progress(0.8, "Re-merging rendered audio with vocals...")
         print(f"Re-merging the non-transcribed part with newly rendered music...")
         # 1. Unpack the original rendered audio from the results
         print("Re-merging complete.")
     # --- Save final audio and return path ---
+    update_progress(0.9, "Saving final files...")
     final_srate, final_audio_data = results_tuple[4]
     final_midi_path_from_render = results_tuple[3] # Get the path of the processed MIDI
         "plot": results_tuple[5],
         "description": results_tuple[6]
     }
+    update_progress(1.0, "Done!")
     # Return both the results and the final state of the parameters object
     return results, params
 # =================================================================================================
 # --- Thin wrapper for batch processing ---
+def batch_process_files(input_files, progress=gr.Progress(track_tqdm=True), *args):
     """
+    Gradio wrapper for batch processing. It iterates through files, calls the core pipeline,
+    and collects the output file paths. It now provides detailed, nested progress updates.
     """
     if not input_files:
     for i, file_obj in enumerate(input_files):
         # The input from gr.File is a tempfile object, we need its path
         input_path = file_obj.name
+        filename = os.path.basename(input_path)
+        # --- Nested Progress Logic ---
+        # Define a local function to scale the sub-progress of the pipeline
+        # into the correct slot of the main batch progress bar.
+        def batch_progress_updater(local_fraction, desc):
+            # Calculate the overall progress based on which file we are on (i)
+            # and the progress within that file (local_fraction).
+            progress_per_file = 1 / total_files
+            overall_fraction = (i / total_files) + (local_fraction * progress_per_file)
+            progress(overall_fraction, desc=f"({i+1}/{total_files}) {filename}: {desc}")
         progress(i / total_files, desc=f"Processing {os.path.basename(input_path)} ({i+1}/{total_files})")
         # --- Pass the batch_timestamp to the pipeline ---
+        results, _ = run_single_file_pipeline(input_path, batch_timestamp, params, progress=batch_progress_updater)
         if results:
             if results.get("final_audio_path"):
 # --- The original function is now a thin wrapper for the single file UI ---
+def process_and_render_file(input_file, *args, progress=gr.Progress()):
     """
     Gradio wrapper for the single file processing UI. Packs UI values into an AppParameters object.
     Calls the core pipeline and formats the output for all UI components.
     Main function to handle file processing. It determines the file type and calls the
     appropriate functions for transcription and/or rendering based on user selections.
+    Now includes a progress bar.
     """
     if input_file is None:
         # Return a list of updates to clear all output fields and UI controls
     # The first value in *args is s8bit_preset_selector, the rest match the keys
     params = AppParameters(input_file=input_file, **dict(zip(ALL_PARAM_KEYS, args)))
+    # Run the core pipeline, passing the timestamp and progress to the pipeline
+    results, final_params = run_single_file_pipeline(input_file, single_file_timestamp, params, progress=progress)
     if results is None:
         raise gr.Error("File processing failed. Check console for details.")