Spaces:
Configuration error
Configuration error
| """ | |
| This is an example using CLAP to perform zeroshot | |
| classification on ESC50 (https://github.com/karolpiczak/ESC-50). | |
| """ | |
| from CLAPWrapper import CLAPWrapper | |
| from esc50_dataset import ESC50 | |
| import torch.nn.functional as F | |
| import numpy as np | |
| from tqdm import tqdm | |
| from sklearn.metrics import accuracy_score | |
| # Load dataset | |
| dataset = ESC50(root="data_path", download=False) | |
| prompt = 'this is a sound of ' | |
| y = [prompt + x for x in dataset.classes] | |
| # Load and initialize CLAP | |
| weights_path = "weights_path" | |
| clap_model = CLAPWrapper(weights_path, use_cuda=False) | |
| # Computing text embeddings | |
| text_embeddings = clap_model.get_text_embeddings(y) | |
| # Computing audio embeddings | |
| y_preds, y_labels = [], [] | |
| for i in tqdm(range(len(dataset))): | |
| x, _, one_hot_target = dataset.__getitem__(i) | |
| audio_embeddings = clap_model.get_audio_embeddings([x], resample=True) | |
| similarity = clap_model.compute_similarity(audio_embeddings, text_embeddings) | |
| y_pred = F.softmax(similarity.detach().cpu(), dim=1).numpy() | |
| y_preds.append(y_pred) | |
| y_labels.append(one_hot_target.detach().cpu().numpy()) | |
| y_labels, y_preds = np.concatenate(y_labels, axis=0), np.concatenate(y_preds, axis=0) | |
| acc = accuracy_score(np.argmax(y_labels, axis=1), np.argmax(y_preds, axis=1)) | |
| print('ESC50 Accuracy {}'.format(acc)) | |
| """ | |
| The output: | |
| ESC50 Accuracy: 82.6% | |
| """ | |