Vaani-Audio2Img-LDM / Vaani /_1.1_Audio.py
alpha31476's picture
LDM-train-pass, checking results
87ef7b5 verified
# !pip install huggingface_hub
# !pip install datasets
import os
import subprocess
import numpy as np
from scipy.io.wavfile import write
from tqdm import tqdm, trange
from collections import Counter
from datasets import load_dataset
from datasets import get_dataset_config_names
def array_to_wav(audio_array: np.ndarray, sample_rate: int, file_name: str):
# Normalize if outside [-1, 1] to prevent clipping
# if np.max(np.abs(audio_array)) > 1.0:
# audio_array = audio_array / np.max(np.abs(audio_array))
# Convert to 16-bit PCM
# audio_int16 = np.int16(audio_array * 32767)
# Write to WAV file
# write(file_name, sample_rate, audio_int16)
write(file_name, sample_rate, audio_array)
configs = get_dataset_config_names("ARTPARK-IISc/Vaani")
english_district = [
'Bellary', 'Bijapur', 'Churu', 'Etah', 'Guntur', 'JyotibaPhuleNagar',
'Karimnagar', 'Krishna', 'NorthSouthGoa', 'Rajnandgaon', 'Sahebganj'
]
eng_dis_ds = [i for i in configs if i.split("_")[-1] in english_district]
print(len(eng_dis_ds), eng_dis_ds, "\n\n")
# HF_TOKEN =
df_dict = {"audio_path":[], 'referenceImage':[], 'gender':[], 'state':[], 'district':[]}
for i, district in enumerate(eng_dis_ds):
# district = "AndhraPradesh_Krishna"
print("-"*2, i+1, "-"*10, district, "-"*10)
cache_dir = f'/scratch/23m1521/Vaani/Audio-Cache/English/{district}'
os.makedirs(cache_dir, exist_ok=True)
ds = load_dataset("ARTPARK-IISc/Vaani", district, split="train", num_proc=10,
token=HF_TOKEN, cache_dir=cache_dir, streaming=False)
english_rows = ds.filter(lambda x: x["language"] == "English")
save_dir = f'/scratch/23m1521/Vaani/Audio/English/{district}'
os.makedirs(save_dir, exist_ok=True)
for i in tqdm(english_rows, dynamic_ncols=True, colour='blue'):
# for i in ds:
# if i["language"] == 'English':
array_to_wav(
i['audio']['array'],
i['audio']['sampling_rate'],
os.path.join(save_dir, i['audio']['path'])
)
df_dict['audio_path'].append(i['audio']['path'])
df_dict['referenceImage'].append(i['referenceImage'])
df_dict['gender'].append(i['gender'])
df_dict['state'].append(i['state'])
df_dict['district'].append(i['district'])
subprocess.run(["rm", "-rf", cache_dir], check=True)
subprocess.run(["rm", "-rf", "/home/23m1521/.cache/huggingface/hub"], check=True)
# break
import pandas as pd
pd.DataFrame(df_dict).to_csv("Vaani-Audio-Image-English.csv", index=False)