alibabasglab commited on
Commit
50e06fb
·
verified ·
1 Parent(s): 14cdf70

Update dataloader/misc.py

Browse files
Files changed (1) hide show
  1. dataloader/misc.py +34 -9
dataloader/misc.py CHANGED
@@ -1,4 +1,3 @@
1
-
2
  #!/usr/bin/env python -u
3
  # -*- coding: utf-8 -*-
4
 
@@ -11,7 +10,32 @@ import numpy as np
11
  import os
12
  import sys
13
  import librosa
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
 
 
 
 
 
 
15
  def read_and_config_file(args, input_path, decode=0):
16
  """
17
  Reads and processes the input file or directory to extract audio file paths or configuration data.
@@ -27,7 +51,10 @@ def read_and_config_file(args, input_path, decode=0):
27
  and optional condition audio paths.
28
  """
29
  processed_list = [] # Initialize list to hold processed file paths or configurations
30
-
 
 
 
31
  if decode:
32
  if args.task == 'target_speaker_extraction':
33
  if args.network_reference.cue== 'lip':
@@ -38,6 +65,7 @@ def read_and_config_file(args, input_path, decode=0):
38
  processed_list += librosa.util.find_files(input_path, ext="avi")
39
  processed_list += librosa.util.find_files(input_path, ext="mov")
40
  processed_list += librosa.util.find_files(input_path, ext="MOV")
 
41
  else:
42
  # If it's a single file and it's a .wav or .flac, add to processed list
43
  if input_path.lower().endswith(".mp4") or input_path.lower().endswith(".avi") or input_path.lower().endswith(".mov") or input_path.lower().endswith(".webm"):
@@ -53,13 +81,11 @@ def read_and_config_file(args, input_path, decode=0):
53
  # If decode is True, find audio files in a directory or single file
54
  if os.path.isdir(input_path):
55
  # Find all .wav files in the input directory
56
- processed_list = librosa.util.find_files(input_path, ext="wav")
57
- if len(processed_list) == 0:
58
- # If no .wav files, look for .flac files
59
- processed_list = librosa.util.find_files(input_path, ext="flac")
60
  else:
61
  # If it's a single file and it's a .wav or .flac, add to processed list
62
- if input_path.lower().endswith(".wav") or input_path.lower().endswith(".flac"):
 
63
  processed_list.append(input_path)
64
  else:
65
  # Read file paths from the input text file (one path per line)
@@ -80,5 +106,4 @@ def read_and_config_file(args, input_path, decode=0):
80
  # If only one path per line, treat it as 'inputs'
81
  sample = {'inputs': tmp_paths[0]}
82
  processed_list.append(sample) # Append processed sample to list
83
- return processed_list
84
-
 
 
1
  #!/usr/bin/env python -u
2
  # -*- coding: utf-8 -*-
3
 
 
10
  import os
11
  import sys
12
  import librosa
13
+ import mimetypes
14
+
15
+ def get_file_extension(file_path):
16
+ """
17
+ Return an audio file extension
18
+ """
19
+
20
+ _, ext = os.path.splitext(file_path)
21
+ return ext
22
+
23
+ def is_audio_file(file_path):
24
+ """
25
+ Check if the given file_path is an audio file
26
+ Return True if it is an audio file, otherwise, return False
27
+ """
28
+ file_ext = ["wav", "aac", "ac3", "aiff", "flac", "m4a", "mp3", "ogg", "opus", "wma", "webm"]
29
+
30
+ ext = get_file_extension(file_path)
31
+ if ext.replace('.','') in file_ext:
32
+ return True
33
 
34
+ mime_type, _ = mimetypes.guess_type(file_path)
35
+ if mime_type and mime_type.startswith('audio'):
36
+ return True
37
+ return False
38
+
39
  def read_and_config_file(args, input_path, decode=0):
40
  """
41
  Reads and processes the input file or directory to extract audio file paths or configuration data.
 
51
  and optional condition audio paths.
52
  """
53
  processed_list = [] # Initialize list to hold processed file paths or configurations
54
+
55
+ #The supported audio types are listed below (tested), but not limited to.
56
+ file_ext = ["wav", "aac", "ac3", "aiff", "flac", "m4a", "mp3", "ogg", "opus", "wma", "webm"]
57
+
58
  if decode:
59
  if args.task == 'target_speaker_extraction':
60
  if args.network_reference.cue== 'lip':
 
65
  processed_list += librosa.util.find_files(input_path, ext="avi")
66
  processed_list += librosa.util.find_files(input_path, ext="mov")
67
  processed_list += librosa.util.find_files(input_path, ext="MOV")
68
+ processed_list += librosa.util.find_files(input_path, ext="webm")
69
  else:
70
  # If it's a single file and it's a .wav or .flac, add to processed list
71
  if input_path.lower().endswith(".mp4") or input_path.lower().endswith(".avi") or input_path.lower().endswith(".mov") or input_path.lower().endswith(".webm"):
 
81
  # If decode is True, find audio files in a directory or single file
82
  if os.path.isdir(input_path):
83
  # Find all .wav files in the input directory
84
+ processed_list = librosa.util.find_files(input_path, ext=file_ext)
 
 
 
85
  else:
86
  # If it's a single file and it's a .wav or .flac, add to processed list
87
+ #if input_path.lower().endswith(".wav") or input_path.lower().endswith(".flac"):
88
+ if is_audio_file(input_path):
89
  processed_list.append(input_path)
90
  else:
91
  # Read file paths from the input text file (one path per line)
 
106
  # If only one path per line, treat it as 'inputs'
107
  sample = {'inputs': tmp_paths[0]}
108
  processed_list.append(sample) # Append processed sample to list
109
+ return processed_list