Spaces:
Runtime error
Runtime error
import numpy as np | |
import librosa | |
from sklearn.cluster import DBSCAN | |
from pydub import AudioSegment | |
def extract_voice_features(audio_path, segment_duration=1000): | |
# Load the audio file | |
y, sr = librosa.load(audio_path) | |
# Extract MFCC features | |
mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13) | |
# Segment the MFCCs | |
segment_length = int(segment_duration * sr / 1000) | |
num_segments = len(y) // segment_length | |
segments = [] | |
for i in range(num_segments): | |
start = i * segment_length | |
end = start + segment_length | |
segment = mfccs[:, start:end] | |
segments.append(np.mean(segment, axis=1)) | |
return np.array(segments) | |
def cluster_voices(features): | |
if len(features) < 2: | |
print("Not enough voice segments for clustering. Assigning all to one cluster.") | |
return np.zeros(len(features), dtype=int) | |
dbscan = DBSCAN(eps=0.5, min_samples=5, metric='euclidean') | |
clusters = dbscan.fit_predict(features) | |
if np.all(clusters == -1): | |
print("DBSCAN assigned all to noise. Considering as one cluster.") | |
return np.zeros(len(features), dtype=int) | |
return clusters | |
def get_most_frequent_voice(features, clusters): | |
largest_cluster = max(set(clusters), key=list(clusters).count) | |
return features[clusters == largest_cluster] | |
def process_audio(audio_path, segment_duration=1000): | |
features = extract_voice_features(audio_path, segment_duration) | |
clusters = cluster_voices(features) | |
most_frequent_voice = get_most_frequent_voice(features, clusters) | |
return most_frequent_voice, features, clusters |