xinah3131 commited on
Commit
add8b88
·
1 Parent(s): c39a1ac

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +148 -61
app.py CHANGED
@@ -10,7 +10,7 @@ import numpy as np
10
  import seaborn as sns
11
  # Load the model
12
 
13
- model = joblib.load('85pct(new).pkl')
14
 
15
  # Define the categories
16
  categories = {
@@ -96,54 +96,59 @@ def main():
96
  # st.write("Enter the video details below:")
97
 
98
  # Define a boolean flag variable to track prediction status
99
- prediction_done = False
100
- tab1, tab2, tab3 = st.tabs(["Predict", "Trending","Visualize"])
 
 
 
101
  # Input fields
102
- with tab1:
103
  with st.container():
104
  col1, col2, col3 = st.columns(3)
105
  getTitle, getDuration, getCategory = "", 0.00, 1
106
  getThumbnailUrl = ""
107
  with col1:
108
- url = st.text_input("URL",placeholder="Enter a video url")
109
  if url:
110
  metadata = get_metadata(url)
111
  if not metadata.empty:
112
-
113
  getTitle = metadata['title'].iloc[0]
114
  getDuration = metadata['duration'].iloc[0]
115
  category_id = metadata['category_id'].iloc[0]
116
  getThumbnailUrl = metadata['thumbnail_link'].iloc[0]
117
  getCategory = int(category_id)
118
-
119
  if getThumbnailUrl is not None:
120
  picture = get_picture_from_url(getThumbnailUrl)
121
  if picture:
122
- st.image(picture, caption='Thumbnail captured',width = 400, channels="BGR")
123
  with col2:
124
- title = st.text_input("Title", placeholder="Enter a video title",value=getTitle)
125
  duration = st.number_input("Duration (in seconds)", min_value=0.0, value=getDuration)
126
- category = st.selectbox("Category", list(categories.keys()), index=list(categories.values()).index(getCategory))
127
-
 
 
128
  with col3:
129
  picture = st.file_uploader("Upload Picture", type=["jpg", "jpeg", "png"])
130
  if picture is not None:
131
- st.picture(picture,caption='Thumbnail Uploaded',width = 400, channels="BGR")
132
- # Convert category to category ID
 
133
  categoryId = categories[category]
134
 
135
  if st.button("Predict"):
136
  # Perform prediction
137
  if title is None or title.strip() == "" and duration == 0:
138
  st.warning("Please enter a title and duration.")
139
-
140
  else:
141
  if title is None or title.strip() == "":
142
  st.warning("Please enter a title")
143
 
144
  if duration == 0:
145
  st.warning("Please enter a duration.")
146
-
147
  else:
148
  prediction = predict_trend(title, duration, categoryId)
149
  if prediction[0] == 1:
@@ -152,50 +157,72 @@ def main():
152
  else:
153
  st.info("This video is predicted not to be a trend.")
154
  st.markdown("![Alt Text](https://media.tenor.com/VYKtkKnHaUcAAAAj/quby-cute.gif)")
155
-
156
 
157
- with tab2:
158
- country_code = st.selectbox("Select Country Code", ['US', 'CA', 'GB','DE', 'FR', 'RU', 'BR','IN','MY','SG','JP','KR'])
 
159
  with st.container():
160
- st.write("Top 10 Trending Video")
161
- df = get_trending_videos(country_code)
162
- st.dataframe(df)
163
- if df is not None:
164
- # Display video titles
165
- selected_video_title = st.selectbox("Select a Video", df['title'])
166
- selected_video = df[df['title'] == selected_video_title].iloc[0]
167
-
168
- col4,col5 = st.columns(2)
 
 
 
 
169
  with col4:
170
  if selected_video is not None:
171
  image = get_picture_from_url(selected_video['thumbnail_link'])
172
  if image:
173
- st.image(image, caption='Thumbnail captured',width = 400, channels="BGR")
174
  with col5:
175
  st.write("Title:", selected_video['title'])
176
- category_name = next((key for key, value in categories.items() if value == selected_video['category_id']), 'Unknown Category')
 
 
177
  st.write("Category:", category_name)
178
  st.write("Duration:", selected_video['duration'])
179
- else:
180
- st.error('Failed to retrieve trending videos.')
181
-
182
- with tab3:
183
- with st.container():
184
- col6,col7 = st.columns(2)
185
 
 
 
 
 
 
186
  with col6:
187
  show_top_category()
188
-
189
  with col7:
 
 
 
 
 
190
  show_top_duration()
191
 
192
- with st.container():
193
- col8,col9 = st.columns(2)
194
- with col8:
 
 
 
195
  show_top_title()
 
 
196
 
197
- with col9:
 
 
198
  show_top_titleLength()
 
 
 
 
199
 
200
  def get_picture_from_url(url):
201
  try:
@@ -205,8 +232,8 @@ def get_picture_from_url(url):
205
  except:
206
  return None
207
 
208
- def show_top_category():
209
- topCategory = pd.read_csv('topCategory.csv')
210
  # Sort the DataFrame in ascending order based on predicted_prob column
211
  topCategory_sorted = topCategory.sort_values('predicted_prob')
212
 
@@ -214,6 +241,10 @@ def show_top_category():
214
  topCategory_sorted['rank'] = range(1, len(topCategory_sorted) + 1)
215
  # Map category_id to category name using the categories dictionary
216
  topCategory_sorted['category_name'] = topCategory_sorted['category_id'].map(lambda x: next((key for key, value in categories.items() if value == x), 'Unknown Category'))
 
 
 
 
217
 
218
  # Set a color palette for the plot
219
  color_palette = sns.color_palette('Set2', len(topCategory_sorted['category_id'].unique()))
@@ -228,28 +259,53 @@ def show_top_category():
228
  # Display the legend and the plot in Streamlit
229
  st.pyplot(fig)
230
 
231
- def show_top_duration():
232
- topDuration = pd.read_csv('topDuration.csv')
233
- topDuration_sorted = topDuration.sort_values('predicted_prob', ascending=False)
 
 
 
 
 
 
234
 
235
- # Set the duration as the x-axis and predicted_prob as the y-axis
236
- x = topDuration_sorted['duration']
237
- y = topDuration_sorted['predicted_prob']
238
-
239
- # Create a scatter plot of duration vs predicted_prob using seaborn
240
- plt.figure(figsize=(8, 5)) # Adjust the figure size here (width, height)
241
- sns.scatterplot(x=x, y=y, palette='coolwarm') # Use coolwarm palette for colorful plot
 
 
 
 
 
242
  plt.xlabel('Duration')
243
  plt.ylabel('Predicted Probability')
244
  plt.title('Top Durations')
245
-
246
- # Display the plot in Streamlit
247
  st.pyplot(plt)
248
 
249
- def show_top_title():
250
- topTitle = pd.read_csv('topTitle.csv')
 
 
 
 
 
 
 
 
 
 
251
  # Sort the DataFrame in ascending order based on predicted_prob column
252
- topTitle_sorted = topTitle.sort_values('Importance Score')
 
 
 
 
253
  sns.set(style="whitegrid")
254
  plt.figure(figsize=(8, 6))
255
  sns.barplot(x='Importance Score', y='Feature', data=topTitle_sorted, palette="rocket")
@@ -259,22 +315,44 @@ def show_top_title():
259
  plt.tight_layout()
260
  st.pyplot(plt)
261
 
 
 
 
 
 
 
 
 
 
262
 
263
  def round_interval(interval_str):
264
  start, end = map(float, interval_str.strip('()[]').split(','))
265
  return f"({int(start)}, {int(end)})"
266
 
267
- def show_top_titleLength():
268
- topTitleLength = pd.read_csv('topTitleLength.csv')
269
-
270
  title_length_ranges = topTitleLength['titleLength']
271
  predicted_probs = topTitleLength['predicted_prob']
272
  rounded_ranges = [round_interval(range_val) for range_val in title_length_ranges]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
273
  # Set the style of the plot
274
  sns.set(style='whitegrid')
275
  # Plot the graph using Seaborn
276
  plt.figure(figsize=(10, 6))
277
- sns.barplot(x=rounded_ranges, y=predicted_probs)
278
  plt.xlabel('Title Length Range')
279
  plt.ylabel('Predicted Probability')
280
  plt.title('Top 5 Ranges for Title Length vs. Predicted Probability')
@@ -282,6 +360,15 @@ def show_top_titleLength():
282
  plt.show()
283
  st.pyplot(plt)
284
 
 
 
 
 
 
 
 
 
 
285
  # Function to make predictions
286
  def predict_trend(title, duration, category_id):
287
  duration = str(duration)
 
10
  import seaborn as sns
11
  # Load the model
12
 
13
+ model = joblib.load('85pct(2).pkl')
14
 
15
  # Define the categories
16
  categories = {
 
96
  # st.write("Enter the video details below:")
97
 
98
  # Define a boolean flag variable to track prediction status
99
+
100
+ # Sidebar menu options
101
+ menu_options = ["Predict", "Trending", "Visualize"]
102
+ selected_option = st.sidebar.selectbox("Menu", menu_options)
103
+
104
  # Input fields
105
+ if selected_option == "Predict":
106
  with st.container():
107
  col1, col2, col3 = st.columns(3)
108
  getTitle, getDuration, getCategory = "", 0.00, 1
109
  getThumbnailUrl = ""
110
  with col1:
111
+ url = st.text_input("URL", placeholder="Enter a video URL")
112
  if url:
113
  metadata = get_metadata(url)
114
  if not metadata.empty:
 
115
  getTitle = metadata['title'].iloc[0]
116
  getDuration = metadata['duration'].iloc[0]
117
  category_id = metadata['category_id'].iloc[0]
118
  getThumbnailUrl = metadata['thumbnail_link'].iloc[0]
119
  getCategory = int(category_id)
120
+
121
  if getThumbnailUrl is not None:
122
  picture = get_picture_from_url(getThumbnailUrl)
123
  if picture:
124
+ st.image(picture, caption='Thumbnail captured', width=320, channels="BGR")
125
  with col2:
126
+ title = st.text_input("Title", placeholder="Enter a video title", value=getTitle)
127
  duration = st.number_input("Duration (in seconds)", min_value=0.0, value=getDuration)
128
+ category = st.selectbox(
129
+ "Category", list(categories.keys()), index=list(categories.values()).index(getCategory)
130
+ )
131
+
132
  with col3:
133
  picture = st.file_uploader("Upload Picture", type=["jpg", "jpeg", "png"])
134
  if picture is not None:
135
+ st.picture(picture, caption='Thumbnail Uploaded', width=400, channels="BGR")
136
+
137
+ # Convert category to category ID
138
  categoryId = categories[category]
139
 
140
  if st.button("Predict"):
141
  # Perform prediction
142
  if title is None or title.strip() == "" and duration == 0:
143
  st.warning("Please enter a title and duration.")
144
+
145
  else:
146
  if title is None or title.strip() == "":
147
  st.warning("Please enter a title")
148
 
149
  if duration == 0:
150
  st.warning("Please enter a duration.")
151
+
152
  else:
153
  prediction = predict_trend(title, duration, categoryId)
154
  if prediction[0] == 1:
 
157
  else:
158
  st.info("This video is predicted not to be a trend.")
159
  st.markdown("![Alt Text](https://media.tenor.com/VYKtkKnHaUcAAAAj/quby-cute.gif)")
 
160
 
161
+ elif selected_option == "Trending":
162
+ tab1, tab2 = st.tabs(["Trending Board", "Video Info"])
163
+ country_code = st.sidebar.selectbox("Select Country Code", ['US', 'CA', 'GB', 'DE', 'FR', 'RU', 'BR', 'IN', 'MY', 'SG', 'JP', 'KR'])
164
  with st.container():
165
+ with tab1:
166
+ st.write("Top 10 Trending Videos")
167
+ df = get_trending_videos(country_code)
168
+ st.dataframe(df)
169
+
170
+ with tab2:
171
+ if df is not None:
172
+ # Display video titles
173
+ selected_video_title = st.selectbox("Select a Video", df['title'])
174
+ selected_video = df[df['title'] == selected_video_title].iloc[0]
175
+ else:
176
+ st.error('Failed to retrieve trending videos.')
177
+ col4, col5 = st.columns(2)
178
  with col4:
179
  if selected_video is not None:
180
  image = get_picture_from_url(selected_video['thumbnail_link'])
181
  if image:
182
+ st.image(image, caption='Thumbnail captured', width=400, channels="BGR")
183
  with col5:
184
  st.write("Title:", selected_video['title'])
185
+ category_name = next(
186
+ (key for key, value in categories.items() if value == selected_video['category_id']), 'Unknown Category'
187
+ )
188
  st.write("Category:", category_name)
189
  st.write("Duration:", selected_video['duration'])
190
+
 
 
 
 
 
191
 
192
+ elif selected_option == "Visualize":
193
+ with st.container():
194
+ tab3, tab4, tab5, tab6 = st.tabs(["Best Category", "Best Duration","Best Title","Best Title Length"])
195
+ with tab3:
196
+ col6, col7 = st.columns(2)
197
  with col6:
198
  show_top_category()
199
+
200
  with col7:
201
+ show_best_category()
202
+
203
+ with tab4:
204
+ col8, col9 = st.columns(2)
205
+ with col8:
206
  show_top_duration()
207
 
208
+ with col9:
209
+ show_best_duration()
210
+
211
+ with tab5:
212
+ col10, col11 = st.columns(2)
213
+ with col10:
214
  show_top_title()
215
+ with col11:
216
+ show_best_title()
217
 
218
+ with tab6:
219
+ col12, col13 = st.columns(2)
220
+ with col12:
221
  show_top_titleLength()
222
+ with col13:
223
+ show_best_titleLength()
224
+
225
+
226
 
227
  def get_picture_from_url(url):
228
  try:
 
232
  except:
233
  return None
234
 
235
+ def get_top_category():
236
+ topCategory = pd.read_csv(r'C:\Users\LEGION\Desktop\MMU\Data Science Fundamental\Project\Prediction of Video\topCategory.csv')
237
  # Sort the DataFrame in ascending order based on predicted_prob column
238
  topCategory_sorted = topCategory.sort_values('predicted_prob')
239
 
 
241
  topCategory_sorted['rank'] = range(1, len(topCategory_sorted) + 1)
242
  # Map category_id to category name using the categories dictionary
243
  topCategory_sorted['category_name'] = topCategory_sorted['category_id'].map(lambda x: next((key for key, value in categories.items() if value == x), 'Unknown Category'))
244
+ return topCategory_sorted
245
+
246
+ def show_top_category():
247
+ topCategory_sorted = get_top_category()
248
 
249
  # Set a color palette for the plot
250
  color_palette = sns.color_palette('Set2', len(topCategory_sorted['category_id'].unique()))
 
259
  # Display the legend and the plot in Streamlit
260
  st.pyplot(fig)
261
 
262
+ def show_best_category():
263
+ topCategory_sorted = get_top_category()
264
+ top_3_categories = topCategory_sorted.sort_values('predicted_prob', ascending=True).head(3)
265
+ top_3_categories = top_3_categories['category_name'].head(3)
266
+ st.header("Top 3 Categories")
267
+ # Display the top 3 category IDs with colorful formatting in Streamlit
268
+ for category_id in top_3_categories:
269
+ color = '#339933' if category_id == top_3_categories.iloc[0] else '#ffcc33' if category_id == top_3_categories.iloc[1] else '#ff9900'
270
+ st.write(f"<span style='color:{color};font-weight:bold;'>{category_id}</span>", unsafe_allow_html=True)
271
 
272
+ def get_top_duration():
273
+ topDurationsorted = pd.read_csv(r'C:\Users\LEGION\Desktop\MMU\Data Science Fundamental\Project\Prediction of Video\topDuration.csv')
274
+ topDurationsorted = topDurationsorted.sort_values('predicted_prob', ascending=False)
275
+ return topDurationsorted
276
+
277
+ def show_top_duration():
278
+ topDuration_sorted = get_top_duration()
279
+ # Set the style of the plot
280
+ sns.set(style='whitegrid')
281
+ # Plot the graph using Seaborn
282
+ plt.figure(figsize=(10, 6))
283
+ sns.barplot(x='duration_range', y='predicted_prob',data=topDuration_sorted)
284
  plt.xlabel('Duration')
285
  plt.ylabel('Predicted Probability')
286
  plt.title('Top Durations')
287
+ plt.xticks(rotation=45)
288
+ plt.show()
289
  st.pyplot(plt)
290
 
291
+
292
+ def show_best_duration():
293
+ topDurationRange = get_top_duration()
294
+ top_3_durationRange = topDurationRange.sort_values('predicted_prob', ascending=False).head(3)
295
+ top_3_range = top_3_durationRange['duration_range'].head(3)
296
+ st.header("Top 3 Duration Range")
297
+ for range in top_3_range:
298
+ color = '#339933' if range == top_3_range.iloc[0] else '#ffcc33' if range == top_3_range.iloc[1] else '#ff9900'
299
+ st.write(f"<span style='color:{color};font-weight:bold;'>{range}</span>", unsafe_allow_html=True)
300
+
301
+ def get_top_title():
302
+ topTitle = pd.read_csv(r'C:\Users\LEGION\Desktop\MMU\Data Science Fundamental\Project\Prediction of Video\topTitle.csv')
303
  # Sort the DataFrame in ascending order based on predicted_prob column
304
+ topTitle_sorted = topTitle.sort_values('Importance Score', ascending=False)
305
+ return topTitle_sorted
306
+
307
+ def show_top_title():
308
+ topTitle_sorted = get_top_title()
309
  sns.set(style="whitegrid")
310
  plt.figure(figsize=(8, 6))
311
  sns.barplot(x='Importance Score', y='Feature', data=topTitle_sorted, palette="rocket")
 
315
  plt.tight_layout()
316
  st.pyplot(plt)
317
 
318
+ def show_best_title():
319
+ topTitle_sorted = get_top_title()
320
+ top_3_keyword = topTitle_sorted.sort_values('Importance Score', ascending=False).head(3)
321
+ top_3_keyword = topTitle_sorted['Feature'].head(3)
322
+ st.header("Top 3 Keyword")
323
+ for feature in top_3_keyword:
324
+ color = '#339933' if feature == top_3_keyword.iloc[0] else '#ffcc33' if feature == top_3_keyword.iloc[1] else '#ff9900'
325
+ st.write(f"<span style='color:{color};font-weight:bold;'>{feature}</span>", unsafe_allow_html=True)
326
+
327
 
328
  def round_interval(interval_str):
329
  start, end = map(float, interval_str.strip('()[]').split(','))
330
  return f"({int(start)}, {int(end)})"
331
 
332
+ def get_top_titleLength():
333
+ topTitleLength = pd.read_csv(r'C:\Users\LEGION\Desktop\MMU\Data Science Fundamental\Project\Prediction of Video\topTitleLength.csv')
 
334
  title_length_ranges = topTitleLength['titleLength']
335
  predicted_probs = topTitleLength['predicted_prob']
336
  rounded_ranges = [round_interval(range_val) for range_val in title_length_ranges]
337
+ data = {
338
+ 'rounded_ranges': rounded_ranges,
339
+ 'predicted_probs': predicted_probs
340
+ }
341
+
342
+ topTitleLength = pd.DataFrame(data)
343
+
344
+ # Sort the DataFrame by predicted_probs in descending order
345
+ sorted_titleLength = topTitleLength.sort_values(by='predicted_probs', ascending=False)
346
+ return sorted_titleLength
347
+
348
+ def show_top_titleLength():
349
+ topTitleLength = get_top_titleLength()
350
+
351
  # Set the style of the plot
352
  sns.set(style='whitegrid')
353
  # Plot the graph using Seaborn
354
  plt.figure(figsize=(10, 6))
355
+ sns.barplot(x='rounded_ranges', y='predicted_probs',data=topTitleLength)
356
  plt.xlabel('Title Length Range')
357
  plt.ylabel('Predicted Probability')
358
  plt.title('Top 5 Ranges for Title Length vs. Predicted Probability')
 
360
  plt.show()
361
  st.pyplot(plt)
362
 
363
+ def show_best_titleLength():
364
+ topTitleLength = get_top_titleLength()
365
+ top_3_titleLength = topTitleLength.sort_values('predicted_probs', ascending=False).head(3)
366
+ top_3_range = top_3_titleLength['rounded_ranges'].head(3)
367
+ st.header("Top 3 Title Length Range")
368
+ for range in top_3_range:
369
+ color = '#339933' if range == top_3_range.iloc[0] else '#ffcc33' if range == top_3_range.iloc[1] else '#ff9900'
370
+ st.write(f"<span style='color:{color};font-weight:bold;'>{range}</span>", unsafe_allow_html=True)
371
+
372
  # Function to make predictions
373
  def predict_trend(title, duration, category_id):
374
  duration = str(duration)