xinah3131 commited on
Commit
fc7ea67
·
1 Parent(s): d0a28e7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +10 -30
app.py CHANGED
@@ -11,7 +11,7 @@ import seaborn as sns
11
  # Load the model
12
  def read_model(region):
13
  if(region == "United States"):
14
- model = joblib.load('85pct(2).pkl')
15
  return model
16
 
17
  # Define the categories
@@ -21,10 +21,8 @@ categories = {
21
  'Music': 10,
22
  'Pets & Animals': 15,
23
  'Sports' : 17,
24
- 'Short Movies' : 18,
25
  'Travel & Events' : 19,
26
  'Gaming' : 20,
27
- 'Videoblogging' : 21,
28
  'People & Blogs' : 22,
29
  'Comedy' : 23,
30
  'Entertainment' : 24,
@@ -103,8 +101,7 @@ def main():
103
  )
104
  st.markdown("<body><img style = 'max-width: 20%;max-height: 20%;text-align: center;' src=\"https://media.tenor.com/U7OFq772kIEAAAAj/sweet-dreams.gif\"></body>",unsafe_allow_html=True)
105
  st.markdown("<h1>YouTube Trend Prediction</h1>", unsafe_allow_html=True)
106
- #https://www.freepnglogos.com/uploads/youtube-play-red-logo-png-transparent-background-6.png
107
- # st.write("Enter the video details below:")
108
 
109
  @st.cache_data
110
  def convert_df(df):
@@ -132,6 +129,7 @@ def main():
132
  category_id = metadata['category_id'].iloc[0]
133
  getThumbnailUrl = metadata['thumbnail_link'].iloc[0]
134
  getCategory = int(category_id)
 
135
 
136
  if getThumbnailUrl is not None:
137
  picture = get_picture_from_url(getThumbnailUrl)
@@ -144,6 +142,7 @@ def main():
144
  "Category", list(categories.keys()), index=list(categories.values()).index(getCategory)
145
  )
146
 
 
147
  with col3:
148
  picture = st.file_uploader("Upload Picture", type=["jpg", "jpeg", "png"])
149
  if picture is not None:
@@ -193,7 +192,6 @@ def main():
193
 
194
  with tab2:
195
  if df is not None:
196
- # Display video titles
197
  selected_video_title = st.selectbox("Select a Video", df['title'])
198
  selected_video = df[df['title'] == selected_video_title].iloc[0]
199
  else:
@@ -255,52 +253,41 @@ def get_picture_from_url(url):
255
  return None
256
 
257
  def get_top_category():
258
- topCategory = pd.read_csv('topCategory.csv')
259
- # Sort the DataFrame in ascending order based on predicted_prob column
260
  topCategory_sorted = topCategory.sort_values('predicted_prob')
261
-
262
- # Add a 'rank' column representing the ascending order of predicted_prob
263
  topCategory_sorted['rank'] = range(1, len(topCategory_sorted) + 1)
264
- # Map category_id to category name using the categories dictionary
265
  topCategory_sorted['category_name'] = topCategory_sorted['category_id'].map(lambda x: next((key for key, value in categories.items() if value == x), 'Unknown Category'))
266
  return topCategory_sorted
267
 
268
  def show_top_category():
269
  topCategory_sorted = get_top_category()
270
 
271
- # Set a color palette for the plot
272
  color_palette = sns.color_palette('Set2', len(topCategory_sorted['category_id'].unique()))
273
 
274
- # Create a bar plot based on rank and predicted_prob columns with different colors for each category_name
275
  fig, ax = plt.subplots(figsize=(8, 5))
276
  sns.barplot(data=topCategory_sorted, x='rank', y='predicted_prob', hue='category_name', palette=color_palette)
277
  plt.xlabel('Rank')
278
  plt.ylabel('Predicted Probability')
279
  plt.title('Top Categories')
280
-
281
- # Display the legend and the plot in Streamlit
282
  st.pyplot(fig)
283
 
284
  def show_best_category():
285
  topCategory_sorted = get_top_category()
286
- top_3_categories = topCategory_sorted.sort_values('predicted_prob', ascending=True).head(3)
287
  top_3_categories = top_3_categories['category_name'].head(3)
288
  st.header("Top 3 Categories")
289
- # Display the top 3 category IDs with colorful formatting in Streamlit
290
  for category_id in top_3_categories:
291
  color = '#339933' if category_id == top_3_categories.iloc[0] else '#ffcc33' if category_id == top_3_categories.iloc[1] else '#ff9900'
292
  st.write(f"<span style='color:{color};font-weight:bold;'>{category_id}</span>", unsafe_allow_html=True)
293
 
294
  def get_top_duration():
295
- topDurationsorted = pd.read_csv('topDuration.csv')
296
  topDurationsorted = topDurationsorted.sort_values('predicted_prob', ascending=False)
297
  return topDurationsorted
298
 
299
  def show_top_duration():
300
  topDuration_sorted = get_top_duration()
301
- # Set the style of the plot
302
  sns.set(style='whitegrid')
303
- # Plot the graph using Seaborn
304
  plt.figure(figsize=(10, 6))
305
  sns.barplot(x='duration_range', y='predicted_prob',data=topDuration_sorted)
306
  plt.xlabel('Duration')
@@ -321,8 +308,7 @@ def show_best_duration():
321
  st.write(f"<span style='color:{color};font-weight:bold;'>{range}</span>", unsafe_allow_html=True)
322
 
323
  def get_top_title():
324
- topTitle = pd.read_csv('topTitle.csv')
325
- # Sort the DataFrame in ascending order based on predicted_prob column
326
  topTitle_sorted = topTitle.sort_values('Importance Score', ascending=False)
327
  return topTitle_sorted
328
 
@@ -352,7 +338,7 @@ def round_interval(interval_str):
352
  return f"({int(start)}, {int(end)})"
353
 
354
  def get_top_titleLength():
355
- topTitleLength = pd.read_csv('topTitleLength.csv')
356
  title_length_ranges = topTitleLength['titleLength']
357
  predicted_probs = topTitleLength['predicted_prob']
358
  rounded_ranges = [round_interval(range_val) for range_val in title_length_ranges]
@@ -362,17 +348,13 @@ def get_top_titleLength():
362
  }
363
 
364
  topTitleLength = pd.DataFrame(data)
365
-
366
- # Sort the DataFrame by predicted_probs in descending order
367
  sorted_titleLength = topTitleLength.sort_values(by='predicted_probs', ascending=False)
368
  return sorted_titleLength
369
 
370
  def show_top_titleLength():
371
  topTitleLength = get_top_titleLength()
372
-
373
- # Set the style of the plot
374
  sns.set(style='whitegrid')
375
- # Plot the graph using Seaborn
376
  plt.figure(figsize=(10, 6))
377
  sns.barplot(x='rounded_ranges', y='predicted_probs',data=topTitleLength)
378
  plt.xlabel('Title Length Range')
@@ -396,9 +378,7 @@ def predict_trend(model,title, duration, category_id):
396
  duration = str(duration)
397
  category_id = int(category_id)
398
  clean_new_title = preprocess(title)
399
- # Join the preprocessed words back into a string
400
  clean_new_title_str = ' '.join(clean_new_title)
401
- # Prepare the input data
402
  data = {
403
  'cleanTitle': [clean_new_title_str],
404
  'titleLength' : [len(title)],
 
11
  # Load the model
12
  def read_model(region):
13
  if(region == "United States"):
14
+ model = joblib.load(r'C:\Users\LEGION\Desktop\MMU\Data Science Fundamental\Project\Prediction of Video\85pct.pkl')
15
  return model
16
 
17
  # Define the categories
 
21
  'Music': 10,
22
  'Pets & Animals': 15,
23
  'Sports' : 17,
 
24
  'Travel & Events' : 19,
25
  'Gaming' : 20,
 
26
  'People & Blogs' : 22,
27
  'Comedy' : 23,
28
  'Entertainment' : 24,
 
101
  )
102
  st.markdown("<body><img style = 'max-width: 20%;max-height: 20%;text-align: center;' src=\"https://media.tenor.com/U7OFq772kIEAAAAj/sweet-dreams.gif\"></body>",unsafe_allow_html=True)
103
  st.markdown("<h1>YouTube Trend Prediction</h1>", unsafe_allow_html=True)
104
+
 
105
 
106
  @st.cache_data
107
  def convert_df(df):
 
129
  category_id = metadata['category_id'].iloc[0]
130
  getThumbnailUrl = metadata['thumbnail_link'].iloc[0]
131
  getCategory = int(category_id)
132
+ getDescription = metadata['description'].iloc[0]
133
 
134
  if getThumbnailUrl is not None:
135
  picture = get_picture_from_url(getThumbnailUrl)
 
142
  "Category", list(categories.keys()), index=list(categories.values()).index(getCategory)
143
  )
144
 
145
+
146
  with col3:
147
  picture = st.file_uploader("Upload Picture", type=["jpg", "jpeg", "png"])
148
  if picture is not None:
 
192
 
193
  with tab2:
194
  if df is not None:
 
195
  selected_video_title = st.selectbox("Select a Video", df['title'])
196
  selected_video = df[df['title'] == selected_video_title].iloc[0]
197
  else:
 
253
  return None
254
 
255
  def get_top_category():
256
+ topCategory = pd.read_csv(r'C:\Users\LEGION\Desktop\MMU\Data Science Fundamental\Project\Prediction of Video\topCategory.csv')
 
257
  topCategory_sorted = topCategory.sort_values('predicted_prob')
 
 
258
  topCategory_sorted['rank'] = range(1, len(topCategory_sorted) + 1)
 
259
  topCategory_sorted['category_name'] = topCategory_sorted['category_id'].map(lambda x: next((key for key, value in categories.items() if value == x), 'Unknown Category'))
260
  return topCategory_sorted
261
 
262
  def show_top_category():
263
  topCategory_sorted = get_top_category()
264
 
 
265
  color_palette = sns.color_palette('Set2', len(topCategory_sorted['category_id'].unique()))
266
 
 
267
  fig, ax = plt.subplots(figsize=(8, 5))
268
  sns.barplot(data=topCategory_sorted, x='rank', y='predicted_prob', hue='category_name', palette=color_palette)
269
  plt.xlabel('Rank')
270
  plt.ylabel('Predicted Probability')
271
  plt.title('Top Categories')
 
 
272
  st.pyplot(fig)
273
 
274
  def show_best_category():
275
  topCategory_sorted = get_top_category()
276
+ top_3_categories = topCategory_sorted.sort_values('predicted_prob', ascending=False).head(3)
277
  top_3_categories = top_3_categories['category_name'].head(3)
278
  st.header("Top 3 Categories")
 
279
  for category_id in top_3_categories:
280
  color = '#339933' if category_id == top_3_categories.iloc[0] else '#ffcc33' if category_id == top_3_categories.iloc[1] else '#ff9900'
281
  st.write(f"<span style='color:{color};font-weight:bold;'>{category_id}</span>", unsafe_allow_html=True)
282
 
283
  def get_top_duration():
284
+ topDurationsorted = pd.read_csv(r'C:\Users\LEGION\Desktop\MMU\Data Science Fundamental\Project\Prediction of Video\topDuration.csv')
285
  topDurationsorted = topDurationsorted.sort_values('predicted_prob', ascending=False)
286
  return topDurationsorted
287
 
288
  def show_top_duration():
289
  topDuration_sorted = get_top_duration()
 
290
  sns.set(style='whitegrid')
 
291
  plt.figure(figsize=(10, 6))
292
  sns.barplot(x='duration_range', y='predicted_prob',data=topDuration_sorted)
293
  plt.xlabel('Duration')
 
308
  st.write(f"<span style='color:{color};font-weight:bold;'>{range}</span>", unsafe_allow_html=True)
309
 
310
  def get_top_title():
311
+ topTitle = pd.read_csv(r'C:\Users\LEGION\Desktop\MMU\Data Science Fundamental\Project\Prediction of Video\topTitle.csv')
 
312
  topTitle_sorted = topTitle.sort_values('Importance Score', ascending=False)
313
  return topTitle_sorted
314
 
 
338
  return f"({int(start)}, {int(end)})"
339
 
340
  def get_top_titleLength():
341
+ topTitleLength = pd.read_csv(r'C:\Users\LEGION\Desktop\MMU\Data Science Fundamental\Project\Prediction of Video\topTitleLength.csv')
342
  title_length_ranges = topTitleLength['titleLength']
343
  predicted_probs = topTitleLength['predicted_prob']
344
  rounded_ranges = [round_interval(range_val) for range_val in title_length_ranges]
 
348
  }
349
 
350
  topTitleLength = pd.DataFrame(data)
 
 
351
  sorted_titleLength = topTitleLength.sort_values(by='predicted_probs', ascending=False)
352
  return sorted_titleLength
353
 
354
  def show_top_titleLength():
355
  topTitleLength = get_top_titleLength()
356
+
 
357
  sns.set(style='whitegrid')
 
358
  plt.figure(figsize=(10, 6))
359
  sns.barplot(x='rounded_ranges', y='predicted_probs',data=topTitleLength)
360
  plt.xlabel('Title Length Range')
 
378
  duration = str(duration)
379
  category_id = int(category_id)
380
  clean_new_title = preprocess(title)
 
381
  clean_new_title_str = ' '.join(clean_new_title)
 
382
  data = {
383
  'cleanTitle': [clean_new_title_str],
384
  'titleLength' : [len(title)],