Spaces:
Runtime error
Runtime error
Shea
commited on
Commit
·
3881571
1
Parent(s):
e5cef20
update
Browse files
app.py
CHANGED
|
@@ -25,3 +25,49 @@ contents = os.listdir(pwd)
|
|
| 25 |
print("Contents of the Directory:")
|
| 26 |
for item in contents:
|
| 27 |
print(item)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 25 |
print("Contents of the Directory:")
|
| 26 |
for item in contents:
|
| 27 |
print(item)
|
| 28 |
+
|
| 29 |
+
df = pd.read_csv('v2ga_w_embeddings.parquet')
|
| 30 |
+
|
| 31 |
+
def get_most_similar_songs(artist, title, df):
|
| 32 |
+
def find_most_similar(embedding_column):
|
| 33 |
+
chosen_song = df[(df['artist'] == artist) & (df['title'] == title)][embedding_column].values
|
| 34 |
+
if len(chosen_song) == 0:
|
| 35 |
+
return None
|
| 36 |
+
|
| 37 |
+
chosen_song = chosen_song.reshape(1, -1)
|
| 38 |
+
similarity_matrix = cosine_similarity(df[embedding_column].values.tolist(), chosen_song)
|
| 39 |
+
most_similar_indices = np.argsort(similarity_matrix.flatten())[-5:-1][::-1] # Top 4 excluding the selected song
|
| 40 |
+
return df.iloc[most_similar_indices][['title', 'artist', 'lyrics']].to_dict(orient='records')
|
| 41 |
+
|
| 42 |
+
results = {}
|
| 43 |
+
for embedding in ['embedding_glove', 'embedding_minilm', 'embedding_roberta', 'embedding_gpt']:
|
| 44 |
+
most_similar = find_most_similar(embedding)
|
| 45 |
+
if most_similar is None:
|
| 46 |
+
return "Song not found. Please ensure the artist and title are correct."
|
| 47 |
+
|
| 48 |
+
results[embedding] = most_similar
|
| 49 |
+
|
| 50 |
+
return results
|
| 51 |
+
|
| 52 |
+
def update_titles_dropdown(artist):
|
| 53 |
+
titles = sorted(df[df['artist'] == artist]['title'].unique())
|
| 54 |
+
return titles
|
| 55 |
+
|
| 56 |
+
artists = sorted(df['artist'].unique())
|
| 57 |
+
|
| 58 |
+
artist_dropdown = gr.inputs.Dropdown(artists, label="Artist")
|
| 59 |
+
title_dropdown = gr.inputs.Dropdown([], label="Title", updatable=True)
|
| 60 |
+
|
| 61 |
+
output_interface = gr.outputs.JSON(label="Similar Songs")
|
| 62 |
+
|
| 63 |
+
iface = gr.Interface(
|
| 64 |
+
fn=get_most_similar_songs,
|
| 65 |
+
inputs=[artist_dropdown, title_dropdown],
|
| 66 |
+
outputs=output_interface,
|
| 67 |
+
examples=[("The Beatles", "Let It Be"), ("Eminem", "Lose Yourself")],
|
| 68 |
+
title="Semantic Song Search: Most Similar Song",
|
| 69 |
+
description="Find the 4 most similar songs to the selected song based on different embeddings (GloVe, MiniLM, RoBERTa, GPT).",
|
| 70 |
+
update=update_titles_dropdown
|
| 71 |
+
)
|
| 72 |
+
|
| 73 |
+
iface.launch()
|