Spaces:
Runtime error
Runtime error
| import gradio as gr | |
| import numpy as np | |
| import pandas as pd | |
| import pyarrow | |
| from sklearn.metrics.pairwise import cosine_similarity | |
| import os | |
| import requests | |
| url = 'https://huggingface.co/datasets/sheacon/song_lyrics/resolve/main/v2ga_w_embeddings_half.parquet' | |
| response = requests.get(url, stream=True) | |
| filename = os.path.join(os.getcwd(), url.split('/')[-1]) | |
| with open(filename, 'wb') as file: | |
| for chunk in response.iter_content(chunk_size=8192): | |
| if chunk: | |
| file.write(chunk) | |
| print(f"File '{filename}' has been downloaded to the present working directory.") | |
| pwd = os.getcwd() | |
| print("Present Working Directory:", pwd) | |
| contents = os.listdir(pwd) | |
| print("Contents of the Directory:") | |
| for item in contents: | |
| print(item) | |
| df = pd.read_parquet('v2ga_w_embeddings.parquet') | |
| def get_most_similar_songs(artist, title, df): | |
| def find_most_similar(embedding_column): | |
| chosen_song = df[(df['artist'] == artist) & (df['title'] == title)][embedding_column].values | |
| if len(chosen_song) == 0: | |
| return None | |
| chosen_song = chosen_song.reshape(1, -1) | |
| similarity_matrix = cosine_similarity(df[embedding_column].values.tolist(), chosen_song) | |
| most_similar_indices = np.argsort(similarity_matrix.flatten())[-5:-1][::-1] # Top 4 excluding the selected song | |
| return df.iloc[most_similar_indices][['title', 'artist', 'lyrics']].to_dict(orient='records') | |
| results = {} | |
| for embedding in ['embedding_glove', 'embedding_minilm', 'embedding_roberta', 'embedding_gpt']: | |
| most_similar = find_most_similar(embedding) | |
| if most_similar is None: | |
| return "Song not found. Please ensure the artist and title are correct." | |
| results[embedding] = most_similar | |
| return results | |
| def update_titles_dropdown(artist): | |
| titles = sorted(df[df['artist'] == artist]['title'].unique()) | |
| return titles | |
| artists = sorted(df['artist'].unique()) | |
| artist_dropdown = gr.inputs.Dropdown(artists, label="Artist") | |
| title_dropdown = gr.inputs.Dropdown([], label="Title", updatable=True) | |
| output_interface = gr.outputs.JSON(label="Similar Songs") | |
| iface = gr.Interface( | |
| fn=get_most_similar_songs, | |
| inputs=[artist_dropdown, title_dropdown], | |
| outputs=output_interface, | |
| examples=[("The Beatles", "Let It Be"), ("Eminem", "Lose Yourself")], | |
| title="Semantic Song Search: Most Similar Song", | |
| description="Find the 4 most similar songs to the selected song based on different embeddings (GloVe, MiniLM, RoBERTa, GPT).", | |
| update=update_titles_dropdown | |
| ) | |
| iface.launch() | |