Spaces:

alibabasglab
/

ClearVoice

Running on Zero

App Files Files Community

alibabasglab commited on Jan 16

Commit

50e06fb

verified ·

1 Parent(s): 14cdf70

Update dataloader/misc.py

Browse files

Files changed (1) hide show

dataloader/misc.py +34 -9

dataloader/misc.py CHANGED Viewed

@@ -1,4 +1,3 @@
 #!/usr/bin/env python -u
 # -*- coding: utf-8 -*-
@@ -11,7 +10,32 @@ import numpy as np
 import os
 import sys
 import librosa
 def read_and_config_file(args, input_path, decode=0):
     """
     Reads and processes the input file or directory to extract audio file paths or configuration data.
@@ -27,7 +51,10 @@ def read_and_config_file(args, input_path, decode=0):
                            and optional condition audio paths.
     """
     processed_list = []  # Initialize list to hold processed file paths or configurations
     if decode:
         if args.task == 'target_speaker_extraction':
             if args.network_reference.cue== 'lip':
@@ -38,6 +65,7 @@ def read_and_config_file(args, input_path, decode=0):
                     processed_list += librosa.util.find_files(input_path, ext="avi")
                     processed_list += librosa.util.find_files(input_path, ext="mov")
                     processed_list += librosa.util.find_files(input_path, ext="MOV")
                 else:
                     # If it's a single file and it's a .wav or .flac, add to processed list
                     if input_path.lower().endswith(".mp4") or input_path.lower().endswith(".avi") or input_path.lower().endswith(".mov") or input_path.lower().endswith(".webm"):
@@ -53,13 +81,11 @@ def read_and_config_file(args, input_path, decode=0):
         # If decode is True, find audio files in a directory or single file
         if os.path.isdir(input_path):
             # Find all .wav files in the input directory
-            processed_list = librosa.util.find_files(input_path, ext="wav")
-            if len(processed_list) == 0:
-                # If no .wav files, look for .flac files
-                processed_list = librosa.util.find_files(input_path, ext="flac")
         else:
             # If it's a single file and it's a .wav or .flac, add to processed list
-            if input_path.lower().endswith(".wav") or input_path.lower().endswith(".flac"):
                 processed_list.append(input_path)
             else:
                 # Read file paths from the input text file (one path per line)
@@ -80,5 +106,4 @@ def read_and_config_file(args, input_path, decode=0):
                 # If only one path per line, treat it as 'inputs'
                 sample = {'inputs': tmp_paths[0]}
             processed_list.append(sample)  # Append processed sample to list
-    return processed_list

 #!/usr/bin/env python -u
 # -*- coding: utf-8 -*-
 import os
 import sys
 import librosa
+import mimetypes
+def get_file_extension(file_path):
+    """
+    Return an audio file extension
+    """
+    _, ext = os.path.splitext(file_path)
+    return ext
+def is_audio_file(file_path):
+    """
+    Check if the given file_path is an audio file
+    Return True if it is an audio file, otherwise, return False
+    """
+    file_ext = ["wav", "aac", "ac3", "aiff", "flac", "m4a", "mp3", "ogg", "opus", "wma", "webm"]
+    ext = get_file_extension(file_path)
+    if ext.replace('.','') in file_ext:
+        return True
+    mime_type, _ = mimetypes.guess_type(file_path)
+    if mime_type and mime_type.startswith('audio'):
+        return True
+    return False
 def read_and_config_file(args, input_path, decode=0):
     """
     Reads and processes the input file or directory to extract audio file paths or configuration data.
                            and optional condition audio paths.
     """
     processed_list = []  # Initialize list to hold processed file paths or configurations
+    #The supported audio types are listed below (tested), but not limited to.
+    file_ext = ["wav", "aac", "ac3", "aiff", "flac", "m4a", "mp3", "ogg", "opus", "wma", "webm"]
     if decode:
         if args.task == 'target_speaker_extraction':
             if args.network_reference.cue== 'lip':
                     processed_list += librosa.util.find_files(input_path, ext="avi")
                     processed_list += librosa.util.find_files(input_path, ext="mov")
                     processed_list += librosa.util.find_files(input_path, ext="MOV")
+                    processed_list += librosa.util.find_files(input_path, ext="webm")
                 else:
                     # If it's a single file and it's a .wav or .flac, add to processed list
                     if input_path.lower().endswith(".mp4") or input_path.lower().endswith(".avi") or input_path.lower().endswith(".mov") or input_path.lower().endswith(".webm"):
         # If decode is True, find audio files in a directory or single file
         if os.path.isdir(input_path):
             # Find all .wav files in the input directory
+            processed_list = librosa.util.find_files(input_path, ext=file_ext)
         else:
             # If it's a single file and it's a .wav or .flac, add to processed list
+            #if input_path.lower().endswith(".wav") or input_path.lower().endswith(".flac"):
+            if is_audio_file(input_path):
                 processed_list.append(input_path)
             else:
                 # Read file paths from the input text file (one path per line)
                 # If only one path per line, treat it as 'inputs'
                 sample = {'inputs': tmp_paths[0]}
             processed_list.append(sample)  # Append processed sample to list
+    return processed_list