alibabasglab commited on
Commit
d94221b
·
verified ·
1 Parent(s): 8bcecef

Delete dataloader/misc.py

Browse files
Files changed (1) hide show
  1. dataloader/misc.py +0 -109
dataloader/misc.py DELETED
@@ -1,109 +0,0 @@
1
- #!/usr/bin/env python -u
2
- # -*- coding: utf-8 -*-
3
-
4
- from __future__ import absolute_import
5
- from __future__ import division
6
- from __future__ import print_function
7
- import torch
8
- import torch.nn as nn
9
- import numpy as np
10
- import os
11
- import sys
12
- import librosa
13
- import mimetypes
14
-
15
- def get_file_extension(file_path):
16
- """
17
- Return an audio file extension
18
- """
19
-
20
- _, ext = os.path.splitext(file_path)
21
- return ext
22
-
23
- def is_audio_file(file_path):
24
- """
25
- Check if the given file_path is an audio file
26
- Return True if it is an audio file, otherwise, return False
27
- """
28
- file_ext = ["wav", "aac", "ac3", "aiff", "flac", "m4a", "mp3", "ogg", "opus", "wma", "webm"]
29
-
30
- ext = get_file_extension(file_path)
31
- if ext.replace('.','') in file_ext:
32
- return True
33
-
34
- mime_type, _ = mimetypes.guess_type(file_path)
35
- if mime_type and mime_type.startswith('audio'):
36
- return True
37
- return False
38
-
39
- def read_and_config_file(args, input_path, decode=0):
40
- """
41
- Reads and processes the input file or directory to extract audio file paths or configuration data.
42
-
43
- Parameters:
44
- args: The args
45
- input_path (str): Path to a file or directory containing audio data or file paths.
46
- decode (bool): If True (decode=1) for decoding, process the input as audio files directly (find .wav or .flac files) or from a .scp file.
47
- If False (decode=0) for training, assume the input file contains lines with paths to audio files.
48
-
49
- Returns:
50
- processed_list (list): A list of processed file paths or a list of dictionaries containing input
51
- and optional condition audio paths.
52
- """
53
- processed_list = [] # Initialize list to hold processed file paths or configurations
54
-
55
- #The supported audio types are listed below (tested), but not limited to.
56
- file_ext = ["wav", "aac", "ac3", "aiff", "flac", "m4a", "mp3", "ogg", "opus", "wma", "webm"]
57
-
58
- if decode:
59
- if args.task == 'target_speaker_extraction':
60
- if args.network_reference.cue== 'lip':
61
- # If decode is True, find video files in a directory or single file
62
- if os.path.isdir(input_path):
63
- # Find all .mp4 , mov .avi files in the input directory
64
- processed_list = librosa.util.find_files(input_path, ext="mp4")
65
- processed_list += librosa.util.find_files(input_path, ext="avi")
66
- processed_list += librosa.util.find_files(input_path, ext="mov")
67
- processed_list += librosa.util.find_files(input_path, ext="MOV")
68
- processed_list += librosa.util.find_files(input_path, ext="webm")
69
- else:
70
- # If it's a single file and it's a .wav or .flac, add to processed list
71
- if input_path.lower().endswith(".mp4") or input_path.lower().endswith(".avi") or input_path.lower().endswith(".mov") or input_path.lower().endswith(".webm"):
72
- processed_list.append(input_path)
73
- else:
74
- # Read file paths from the input text file (one path per line)
75
- with open(input_path) as fid:
76
- for line in fid:
77
- path_s = line.strip().split() # Split paths (space-separated)
78
- processed_list.append(path_s[0]) # Add the first path (input audio path)
79
- return processed_list
80
-
81
- # If decode is True, find audio files in a directory or single file
82
- if os.path.isdir(input_path):
83
- # Find all .wav files in the input directory
84
- processed_list = librosa.util.find_files(input_path, ext=file_ext)
85
- else:
86
- # If it's a single file and it's a .wav or .flac, add to processed list
87
- #if input_path.lower().endswith(".wav") or input_path.lower().endswith(".flac"):
88
- if is_audio_file(input_path):
89
- processed_list.append(input_path)
90
- else:
91
- # Read file paths from the input text file (one path per line)
92
- with open(input_path) as fid:
93
- for line in fid:
94
- path_s = line.strip().split() # Split paths (space-separated)
95
- processed_list.append(path_s[0]) # Add the first path (input audio path)
96
- return processed_list
97
-
98
- # If decode is False, treat the input file as a configuration file
99
- with open(input_path) as fid:
100
- for line in fid:
101
- tmp_paths = line.strip().split() # Split paths (space-separated)
102
- if len(tmp_paths) == 2:
103
- # If two paths per line, treat the second as 'condition_audio'
104
- sample = {'inputs': tmp_paths[0], 'condition_audio': tmp_paths[1]}
105
- elif len(tmp_paths) == 1:
106
- # If only one path per line, treat it as 'inputs'
107
- sample = {'inputs': tmp_paths[0]}
108
- processed_list.append(sample) # Append processed sample to list
109
- return processed_list