diivien commited on
Commit
fa9c04b
·
1 Parent(s): 2717876
.gitattributes ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ *.gif filter=lfs diff=lfs merge=lfs -text
2
+ images/*.gif filter=lfs diff=lfs merge=lfs -text
3
+ *.pkl filter=lfs diff=lfs merge=lfs -text
.github/workflows/blank.yml CHANGED
@@ -17,4 +17,4 @@ jobs:
17
  - name: Push to hub
18
  env:
19
  HF_TOKEN: ${{ secrets.HG }}
20
- run: git push https://diivien:[email protected]/spaces/diivien/Music-Popularity-Prediction main
 
17
  - name: Push to hub
18
  env:
19
  HF_TOKEN: ${{ secrets.HG }}
20
+ run: git push --force https://diivien:[email protected]/spaces/diivien/Music-Popularity-Prediction main
.github/workflows/main.yml ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: Check file size
2
+ on: # or directly `on: [push]` to run the action on every push on any branch
3
+ pull_request:
4
+ branches: [main]
5
+
6
+ # to run this workflow manually from the Actions tab
7
+ workflow_dispatch:
8
+
9
+ jobs:
10
+ sync-to-hub:
11
+ runs-on: ubuntu-latest
12
+ steps:
13
+ - name: Check large files
14
+ uses: ActionsDesk/[email protected]
15
+ with:
16
+ filesizelimit: 10485760 # this is 10MB so we can sync to HF Spaces
.gitignore CHANGED
@@ -2,4 +2,5 @@
2
  .venv/
3
  .ipynb_checkpoints/
4
  catboost_info/
5
- my_study.db
 
 
2
  .venv/
3
  .ipynb_checkpoints/
4
  catboost_info/
5
+ my_study.db
6
+ flagged/
Model Building.ipynb CHANGED
@@ -1138,6 +1138,7 @@
1138
  {
1139
  "cell_type": "code",
1140
  "execution_count": 12,
 
1141
  "metadata": {},
1142
  "outputs": [
1143
  {
@@ -1390,7 +1391,7 @@
1390
  },
1391
  {
1392
  "cell_type": "code",
1393
- "execution_count": 14,
1394
  "id": "5bcee11c",
1395
  "metadata": {},
1396
  "outputs": [
@@ -1398,7 +1399,7 @@
1398
  "name": "stderr",
1399
  "output_type": "stream",
1400
  "text": [
1401
- "c:\\Users\\zheng\\Documents\\ds music project\\.venv\\lib\\site-packages\\tqdm\\auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
1402
  " from .autonotebook import tqdm as notebook_tqdm\n"
1403
  ]
1404
  }
@@ -1411,7 +1412,7 @@
1411
  },
1412
  {
1413
  "cell_type": "code",
1414
- "execution_count": 15,
1415
  "id": "99edd49f",
1416
  "metadata": {},
1417
  "outputs": [],
@@ -1433,7 +1434,7 @@
1433
  },
1434
  {
1435
  "cell_type": "code",
1436
- "execution_count": 74,
1437
  "id": "7305f2d1",
1438
  "metadata": {},
1439
  "outputs": [],
@@ -5211,7 +5212,7 @@
5211
  },
5212
  {
5213
  "cell_type": "code",
5214
- "execution_count": 86,
5215
  "id": "a2e122ed",
5216
  "metadata": {},
5217
  "outputs": [
@@ -6315,7 +6316,7 @@
6315
  },
6316
  {
6317
  "cell_type": "code",
6318
- "execution_count": 80,
6319
  "id": "addeaf5e",
6320
  "metadata": {},
6321
  "outputs": [
@@ -6355,6 +6356,8 @@
6355
  }
6356
  ],
6357
  "source": [
 
 
6358
  "pipe_CatBoost = imbpipeline(\n",
6359
  " [\n",
6360
  "\n",
@@ -6684,6 +6687,49 @@
6684
  "source": [
6685
  "plot_feature_importance(pipe_CatBoost,X,y)"
6686
  ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6687
  }
6688
  ],
6689
  "metadata": {
 
1138
  {
1139
  "cell_type": "code",
1140
  "execution_count": 12,
1141
+ "id": "077ca8bd",
1142
  "metadata": {},
1143
  "outputs": [
1144
  {
 
1391
  },
1392
  {
1393
  "cell_type": "code",
1394
+ "execution_count": 12,
1395
  "id": "5bcee11c",
1396
  "metadata": {},
1397
  "outputs": [
 
1399
  "name": "stderr",
1400
  "output_type": "stream",
1401
  "text": [
1402
+ "c:\\Users\\zheng\\Documents\\MMU SHIT\\YEAR2\\SEM2\\DATA SCIENCE FUNDAMENTALS\\Music-Popularity-Prediction\\.venv\\lib\\site-packages\\tqdm\\auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
1403
  " from .autonotebook import tqdm as notebook_tqdm\n"
1404
  ]
1405
  }
 
1412
  },
1413
  {
1414
  "cell_type": "code",
1415
+ "execution_count": 13,
1416
  "id": "99edd49f",
1417
  "metadata": {},
1418
  "outputs": [],
 
1434
  },
1435
  {
1436
  "cell_type": "code",
1437
+ "execution_count": 14,
1438
  "id": "7305f2d1",
1439
  "metadata": {},
1440
  "outputs": [],
 
5212
  },
5213
  {
5214
  "cell_type": "code",
5215
+ "execution_count": 15,
5216
  "id": "a2e122ed",
5217
  "metadata": {},
5218
  "outputs": [
 
6316
  },
6317
  {
6318
  "cell_type": "code",
6319
+ "execution_count": 17,
6320
  "id": "addeaf5e",
6321
  "metadata": {},
6322
  "outputs": [
 
6356
  }
6357
  ],
6358
  "source": [
6359
+ "from catboost import CatBoostClassifier\n",
6360
+ "\n",
6361
  "pipe_CatBoost = imbpipeline(\n",
6362
  " [\n",
6363
  "\n",
 
6687
  "source": [
6688
  "plot_feature_importance(pipe_CatBoost,X,y)"
6689
  ]
6690
+ },
6691
+ {
6692
+ "attachments": {},
6693
+ "cell_type": "markdown",
6694
+ "id": "e2780c6e",
6695
+ "metadata": {},
6696
+ "source": [
6697
+ "### Saving the best model"
6698
+ ]
6699
+ },
6700
+ {
6701
+ "cell_type": "code",
6702
+ "execution_count": 18,
6703
+ "id": "b88f2d45",
6704
+ "metadata": {},
6705
+ "outputs": [
6706
+ {
6707
+ "data": {
6708
+ "text/plain": [
6709
+ "['final_model.pkl']"
6710
+ ]
6711
+ },
6712
+ "execution_count": 18,
6713
+ "metadata": {},
6714
+ "output_type": "execute_result"
6715
+ }
6716
+ ],
6717
+ "source": [
6718
+ "import joblib\n",
6719
+ "pipe_final_CatBoost = imbpipeline(\n",
6720
+ " [\n",
6721
+ "\n",
6722
+ " ('preprocessor',preprocessor),\n",
6723
+ " ('over',smoteNC2),\n",
6724
+ "\n",
6725
+ " ('classifier',CatBoostClassifier(thread_count=-1,silent=True,task_type=\"GPU\"))]\n",
6726
+ " )\n",
6727
+ "pipe_final_CatBoost.set_params(**cat_best_params)\n",
6728
+ "\n",
6729
+ "pipe_final_CatBoost.fit(X,y)\n",
6730
+ "\n",
6731
+ "joblib.dump(pipe_final_CatBoost,'final_model.pkl')\n"
6732
+ ]
6733
  }
6734
  ],
6735
  "metadata": {
README.md CHANGED
@@ -1,3 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
  # Music Popularity Prediction
2
 
3
  This repository contains a data science project that aims to predict the popularity of music using machine learning techniques.
 
1
+ ---
2
+ title: Music Popularity Prediction
3
+ emoji: 🚀
4
+ colorFrom: purple
5
+ colorTo: blue
6
+ sdk: gradio
7
+ sdk_version: 3.28.0
8
+ app_file: app.py
9
+ pinned: false
10
+ python_version: 3.10.6
11
+ ---
12
+
13
  # Music Popularity Prediction
14
 
15
  This repository contains a data science project that aims to predict the popularity of music using machine learning techniques.
app.py ADDED
@@ -0,0 +1,187 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import pandas as pd
3
+ import joblib
4
+ import os
5
+ import spotipy
6
+ from spotipy.oauth2 import SpotifyClientCredentials
7
+ from Levenshtein import distance
8
+
9
+ final_model = joblib.load('final_model.pkl')
10
+ print(final_model)
11
+ # Set up authentication with the Spotify API
12
+ sp = spotipy.Spotify(auth_manager=SpotifyClientCredentials(client_id=os.environ['SPOT_API'], client_secret=os.environ['SPOT_SECRET']))
13
+
14
+ genre_list = ['acoustic', 'afrobeat', 'alt-rock', 'alternative', 'ambient',
15
+ 'anime', 'black-metal', 'bluegrass', 'blues', 'brazil',
16
+ 'breakbeat', 'british', 'cantopop', 'chicago-house', 'children',
17
+ 'chill', 'classical', 'club', 'comedy', 'country', 'dance',
18
+ 'dancehall', 'death-metal', 'deep-house', 'detroit-techno',
19
+ 'disco', 'disney', 'drum-and-bass', 'dub', 'dubstep', 'edm',
20
+ 'electro', 'electronic', 'emo', 'folk', 'forro', 'french', 'funk',
21
+ 'garage', 'german', 'gospel', 'goth', 'grindcore', 'groove',
22
+ 'grunge', 'guitar', 'happy', 'hard-rock', 'hardcore', 'hardstyle',
23
+ 'heavy-metal', 'hip-hop', 'honky-tonk', 'house', 'idm', 'indian',
24
+ 'indie-pop', 'indie', 'industrial', 'iranian', 'j-dance', 'j-idol',
25
+ 'j-pop', 'j-rock', 'jazz', 'k-pop', 'kids', 'latin', 'latino',
26
+ 'malay', 'mandopop', 'metal', 'metalcore', 'minimal-techno', 'mpb',
27
+ 'new-age', 'opera', 'pagode', 'party', 'piano', 'pop-film', 'pop',
28
+ 'power-pop', 'progressive-house', 'psych-rock', 'punk-rock',
29
+ 'punk', 'r-n-b', 'reggae', 'reggaeton', 'rock-n-roll', 'rock',
30
+ 'rockabilly', 'romance', 'sad', 'salsa', 'samba', 'sertanejo',
31
+ 'show-tunes', 'singer-songwriter', 'ska', 'sleep', 'soul',
32
+ 'spanish', 'study', 'swedish', 'synth-pop', 'tango', 'techno',
33
+ 'trance', 'trip-hop', 'turkish', 'world-music']
34
+
35
+ def get_track_genre(track_id):
36
+ track = sp.track(track_id)
37
+ artist = sp.artist(track['artists'][0]['external_urls']['spotify'])
38
+
39
+ return artist['genres']
40
+
41
+ def find_most_similar_genre(my_genres, artist_genres):
42
+ min_distance = float('inf')
43
+ most_similar_genre = None
44
+ for my_genre in my_genres:
45
+ for artist_genre in artist_genres:
46
+ d = distance(my_genre, artist_genre)
47
+ if d < min_distance:
48
+ min_distance = d
49
+ most_similar_genre = my_genre
50
+ return most_similar_genre
51
+
52
+ def match_genres_to_list(track_id):
53
+ track_genres=get_track_genre(track_id)
54
+ return find_most_similar_genre(genre_list,track_genres)
55
+
56
+ def search_songs(query):
57
+ results = sp.search(q=query, type="track")
58
+ songs = [f"{index}. {item['name']} by {item['artists'][0]['name']}" for index, item in enumerate(results["tracks"]["items"])]
59
+
60
+ track_ids = [item["id"] for item in results["tracks"]["items"]]
61
+ return songs, track_ids
62
+
63
+
64
+ def get_song_features(song, track_ids):
65
+ index = int(song.split(".")[0])
66
+ track_id = track_ids[index]
67
+ track_info = sp.track(track_id)
68
+
69
+ features = sp.audio_features([track_id])[0]
70
+ genre = match_genres_to_list(track_id)
71
+ key_map = {0: 'C', 1: 'C#', 2: 'D', 3: 'D#', 4: 'E', 5: 'F', 6: 'F#', 7: 'G', 8: 'G#', 9: 'A', 10: 'A#', 11: 'B'}
72
+ key = str(key_map[features['key']])
73
+ mode_map = { 1: "Major", 0: "Minor"}
74
+ mode = mode_map[features['mode']]
75
+
76
+ explicit_real = track_info['explicit']
77
+ features_list = [
78
+ features['duration_ms'],
79
+ explicit_real,
80
+ features['danceability'],
81
+ features['energy'],
82
+ key,
83
+ features['loudness'],
84
+ mode,
85
+ features['speechiness'],
86
+ features['acousticness'],
87
+ features['instrumentalness'],
88
+ features['liveness'],
89
+ features['valence'],
90
+ features['tempo'],
91
+ str(features['time_signature']),
92
+ genre
93
+ ]
94
+
95
+ return features_list
96
+
97
+ theme = gr.themes.Monochrome(
98
+ # text_size="text_lg",
99
+ font=[gr.themes.GoogleFont('Neucha'), 'ui-sans-serif', 'system-ui', 'sans-serif'],
100
+ )
101
+ with gr.Blocks(theme=theme) as demo:
102
+ with gr.Row():
103
+ image = gr.HTML("<div style='display: flex; align-items: center;'><img src='file=images/cat-jam.gif' alt='My gif' width='200' height='200'>" +
104
+ "<div><h1 style='font-size: 60px; line-height: 24px; margin-left: 50px;'>Music Popularity Prediction</h1>" +
105
+ "<h2 style='font-size: 24px; line-height: 18px; margin-left: 50px; margin-top: 50px'>by Keh Zheng Xian</h2></div></div>")
106
+
107
+ with gr.Row():
108
+ with gr.Column():
109
+ search_box = gr.Textbox(label="Search for songs")
110
+ song_dropdown = gr.Dropdown(label="Select a song", choices=[])
111
+ # features_box = gr.Textbox(label="Song features", interactive=False)
112
+ inputs = [
113
+ gr.Number(label="duration_ms",interactive=True),
114
+ gr.Checkbox(label="explicit",interactive=True),
115
+ gr.Slider(0.0, 1.0, label="danceability",interactive=True),
116
+ gr.Slider(0.0, 1.0, label="energy",interactive=True),
117
+ gr.Dropdown(label="key", choices=["C", "C#", "D", "D#", "E", "F", "F#", "G", "G#", "A", "A#", "B"],interactive=True),
118
+ gr.Number(label="loudness",interactive=True),
119
+ gr.Radio(label="mode", choices=["Major", "Minor"],interactive=True),
120
+ gr.Slider(0.0, 1.0, label="speechiness",interactive=True),
121
+ gr.Slider(0.0, 1.0, label="acousticness",interactive=True),
122
+ gr.Slider(0.0, 1.0, label="instrumentalness",interactive=True),
123
+ gr.Slider(0.0, 1.0, label="liveness",interactive=True),
124
+ gr.Slider(0.0, 1.0, label="valence",interactive=True),
125
+ gr.Number(label="tempo",interactive=True),
126
+ gr.Dropdown(label="time_signature", choices=[3, 4, 5, 6, 7],interactive=True),
127
+ gr.Dropdown(label="track_genre", choices=genre_list,interactive=True)
128
+ ]
129
+ predict_button = gr.Button(label="Predict popularity")
130
+
131
+ with gr.Column():
132
+ popularity_box = gr.HTML("<div style='display: flex; align-items: center;'><img src='file=images/pepe-waiting.gif' alt='My gif 2' width='200' height='200'>" +
133
+ "<div><h1 style='font-size: 30px; line-height: 24px; margin-left: 50px;'>Waiting for your song...</h1></div>")
134
+ track_ids_var = gr.State()
135
+ def update_dropdown(query,track_ids):
136
+ songs, track_ids = search_songs(query)
137
+ return {song_dropdown: gr.update(choices=songs), track_ids_var: track_ids}
138
+
139
+ search_box.change(fn=update_dropdown, inputs=[search_box,track_ids_var], outputs=[song_dropdown,track_ids_var])
140
+
141
+ def update_features(song,track_ids):
142
+ print(song)
143
+ features = get_song_features(song, track_ids)
144
+ return features
145
+
146
+ def predict_popularity(duration_ms, explicit, danceability, energy, key, loudness, mode, speechiness, acousticness, instrumentalness, liveness, valence, tempo, time_signature,track_genre):
147
+ # Convert the key input from a string to an integer value
148
+ key_map = {"C": 0, "C#": 1, "D": 2, "D#": 3, "E": 4, "F": 5, "F#": 6, "G": 7, "G#": 8, "A": 9, "A#": 10, "B": 11}
149
+ key_real = str(key_map[key])
150
+
151
+ explicit_real = int(explicit)
152
+ # Convert the mode input from a string to an integer value
153
+ mode_map = {"Major": 1, "Minor": 0}
154
+ mode_real = mode_map[mode]
155
+
156
+ data = {
157
+ "duration_ms": [duration_ms],
158
+ "explicit": [explicit_real],
159
+ "danceability": [danceability],
160
+ "energy": [energy],
161
+ "key": [key_real],
162
+ "loudness": [loudness],
163
+ "mode": [mode_real],
164
+ "speechiness": [speechiness],
165
+ "acousticness": [acousticness],
166
+ "instrumentalness": [instrumentalness],
167
+ "liveness": [liveness],
168
+ "valence": [valence],
169
+ "tempo": [tempo],
170
+ "time_signature": [str(time_signature)],
171
+ "track_genre": [track_genre]
172
+ }
173
+
174
+ df = pd.DataFrame(data)
175
+
176
+ # Use your trained model to predict popularity based on the input features
177
+ if(final_model.predict(df)[0] == 1):
178
+ return ("<div style='display: flex; align-items: center;'><img src='file=images/pepe-jam.gif' alt='My gif 3' width='200' height='200'>" +
179
+ "<div><h1 style='font-size: 30px; line-height: 24px; margin-left: 50px;'>Your song issa boppp</h1></div>")
180
+ else:
181
+ return ("<div style='display: flex; align-items: center;'><img src='file=images/pepo-sad-pepe.gif' alt='My gif 4' width='200' height='200'>" +
182
+ "<div><h1 style='font-size: 30px; line-height: 24px; margin-left: 50px;'>Not a bop....</h1></div>")
183
+
184
+ song_dropdown.change(fn=update_features, inputs=[song_dropdown,track_ids_var], outputs=inputs)
185
+ predict_button.click(fn=predict_popularity, inputs=inputs, outputs=popularity_box, scroll_to_output=True)
186
+
187
+ demo.launch()
final_model.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1643cf4b328c5addca850ae397d9a5d40ed094394bb5e3e16dfa502c7fb96f18
3
+ size 8825585
images/cat-jam.gif ADDED

Git LFS Details

  • SHA256: c252de863e689d78ed21a30c4dd39030c45c3179f55531821f7546cdae8cbb55
  • Pointer size: 133 Bytes
  • Size of remote file: 14.9 MB
images/pepe-jam.gif ADDED

Git LFS Details

  • SHA256: 7417a90587a45f03510a3708a3f59ea9a809f9ddf6a0a62a2b6f7b9285fe2d5d
  • Pointer size: 130 Bytes
  • Size of remote file: 54.1 kB
images/pepe-waiting.gif ADDED

Git LFS Details

  • SHA256: 1144b126b679cf8a2775fbd477f99193e6061fd68bb9372b766705ce37a5931a
  • Pointer size: 131 Bytes
  • Size of remote file: 370 kB
images/pepo-sad-pepe.gif ADDED

Git LFS Details

  • SHA256: 0512ecaf257669338eb14c7423cc7cd3ee0d0021847f7c2c933cfda458771b68
  • Pointer size: 131 Bytes
  • Size of remote file: 855 kB