Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -11,7 +11,7 @@ import seaborn as sns
|
|
11 |
# Load the model
|
12 |
def read_model(region):
|
13 |
if(region == "United States"):
|
14 |
-
model = joblib.load('85pct
|
15 |
return model
|
16 |
|
17 |
# Define the categories
|
@@ -21,10 +21,8 @@ categories = {
|
|
21 |
'Music': 10,
|
22 |
'Pets & Animals': 15,
|
23 |
'Sports' : 17,
|
24 |
-
'Short Movies' : 18,
|
25 |
'Travel & Events' : 19,
|
26 |
'Gaming' : 20,
|
27 |
-
'Videoblogging' : 21,
|
28 |
'People & Blogs' : 22,
|
29 |
'Comedy' : 23,
|
30 |
'Entertainment' : 24,
|
@@ -103,8 +101,7 @@ def main():
|
|
103 |
)
|
104 |
st.markdown("<body><img style = 'max-width: 20%;max-height: 20%;text-align: center;' src=\"https://media.tenor.com/U7OFq772kIEAAAAj/sweet-dreams.gif\"></body>",unsafe_allow_html=True)
|
105 |
st.markdown("<h1>YouTube Trend Prediction</h1>", unsafe_allow_html=True)
|
106 |
-
|
107 |
-
# st.write("Enter the video details below:")
|
108 |
|
109 |
@st.cache_data
|
110 |
def convert_df(df):
|
@@ -132,6 +129,7 @@ def main():
|
|
132 |
category_id = metadata['category_id'].iloc[0]
|
133 |
getThumbnailUrl = metadata['thumbnail_link'].iloc[0]
|
134 |
getCategory = int(category_id)
|
|
|
135 |
|
136 |
if getThumbnailUrl is not None:
|
137 |
picture = get_picture_from_url(getThumbnailUrl)
|
@@ -144,6 +142,7 @@ def main():
|
|
144 |
"Category", list(categories.keys()), index=list(categories.values()).index(getCategory)
|
145 |
)
|
146 |
|
|
|
147 |
with col3:
|
148 |
picture = st.file_uploader("Upload Picture", type=["jpg", "jpeg", "png"])
|
149 |
if picture is not None:
|
@@ -193,7 +192,6 @@ def main():
|
|
193 |
|
194 |
with tab2:
|
195 |
if df is not None:
|
196 |
-
# Display video titles
|
197 |
selected_video_title = st.selectbox("Select a Video", df['title'])
|
198 |
selected_video = df[df['title'] == selected_video_title].iloc[0]
|
199 |
else:
|
@@ -255,52 +253,41 @@ def get_picture_from_url(url):
|
|
255 |
return None
|
256 |
|
257 |
def get_top_category():
|
258 |
-
topCategory = pd.read_csv('topCategory.csv')
|
259 |
-
# Sort the DataFrame in ascending order based on predicted_prob column
|
260 |
topCategory_sorted = topCategory.sort_values('predicted_prob')
|
261 |
-
|
262 |
-
# Add a 'rank' column representing the ascending order of predicted_prob
|
263 |
topCategory_sorted['rank'] = range(1, len(topCategory_sorted) + 1)
|
264 |
-
# Map category_id to category name using the categories dictionary
|
265 |
topCategory_sorted['category_name'] = topCategory_sorted['category_id'].map(lambda x: next((key for key, value in categories.items() if value == x), 'Unknown Category'))
|
266 |
return topCategory_sorted
|
267 |
|
268 |
def show_top_category():
|
269 |
topCategory_sorted = get_top_category()
|
270 |
|
271 |
-
# Set a color palette for the plot
|
272 |
color_palette = sns.color_palette('Set2', len(topCategory_sorted['category_id'].unique()))
|
273 |
|
274 |
-
# Create a bar plot based on rank and predicted_prob columns with different colors for each category_name
|
275 |
fig, ax = plt.subplots(figsize=(8, 5))
|
276 |
sns.barplot(data=topCategory_sorted, x='rank', y='predicted_prob', hue='category_name', palette=color_palette)
|
277 |
plt.xlabel('Rank')
|
278 |
plt.ylabel('Predicted Probability')
|
279 |
plt.title('Top Categories')
|
280 |
-
|
281 |
-
# Display the legend and the plot in Streamlit
|
282 |
st.pyplot(fig)
|
283 |
|
284 |
def show_best_category():
|
285 |
topCategory_sorted = get_top_category()
|
286 |
-
top_3_categories = topCategory_sorted.sort_values('predicted_prob', ascending=
|
287 |
top_3_categories = top_3_categories['category_name'].head(3)
|
288 |
st.header("Top 3 Categories")
|
289 |
-
# Display the top 3 category IDs with colorful formatting in Streamlit
|
290 |
for category_id in top_3_categories:
|
291 |
color = '#339933' if category_id == top_3_categories.iloc[0] else '#ffcc33' if category_id == top_3_categories.iloc[1] else '#ff9900'
|
292 |
st.write(f"<span style='color:{color};font-weight:bold;'>{category_id}</span>", unsafe_allow_html=True)
|
293 |
|
294 |
def get_top_duration():
|
295 |
-
topDurationsorted = pd.read_csv('topDuration.csv')
|
296 |
topDurationsorted = topDurationsorted.sort_values('predicted_prob', ascending=False)
|
297 |
return topDurationsorted
|
298 |
|
299 |
def show_top_duration():
|
300 |
topDuration_sorted = get_top_duration()
|
301 |
-
# Set the style of the plot
|
302 |
sns.set(style='whitegrid')
|
303 |
-
# Plot the graph using Seaborn
|
304 |
plt.figure(figsize=(10, 6))
|
305 |
sns.barplot(x='duration_range', y='predicted_prob',data=topDuration_sorted)
|
306 |
plt.xlabel('Duration')
|
@@ -321,8 +308,7 @@ def show_best_duration():
|
|
321 |
st.write(f"<span style='color:{color};font-weight:bold;'>{range}</span>", unsafe_allow_html=True)
|
322 |
|
323 |
def get_top_title():
|
324 |
-
topTitle = pd.read_csv('topTitle.csv')
|
325 |
-
# Sort the DataFrame in ascending order based on predicted_prob column
|
326 |
topTitle_sorted = topTitle.sort_values('Importance Score', ascending=False)
|
327 |
return topTitle_sorted
|
328 |
|
@@ -352,7 +338,7 @@ def round_interval(interval_str):
|
|
352 |
return f"({int(start)}, {int(end)})"
|
353 |
|
354 |
def get_top_titleLength():
|
355 |
-
topTitleLength = pd.read_csv('topTitleLength.csv')
|
356 |
title_length_ranges = topTitleLength['titleLength']
|
357 |
predicted_probs = topTitleLength['predicted_prob']
|
358 |
rounded_ranges = [round_interval(range_val) for range_val in title_length_ranges]
|
@@ -362,17 +348,13 @@ def get_top_titleLength():
|
|
362 |
}
|
363 |
|
364 |
topTitleLength = pd.DataFrame(data)
|
365 |
-
|
366 |
-
# Sort the DataFrame by predicted_probs in descending order
|
367 |
sorted_titleLength = topTitleLength.sort_values(by='predicted_probs', ascending=False)
|
368 |
return sorted_titleLength
|
369 |
|
370 |
def show_top_titleLength():
|
371 |
topTitleLength = get_top_titleLength()
|
372 |
-
|
373 |
-
# Set the style of the plot
|
374 |
sns.set(style='whitegrid')
|
375 |
-
# Plot the graph using Seaborn
|
376 |
plt.figure(figsize=(10, 6))
|
377 |
sns.barplot(x='rounded_ranges', y='predicted_probs',data=topTitleLength)
|
378 |
plt.xlabel('Title Length Range')
|
@@ -396,9 +378,7 @@ def predict_trend(model,title, duration, category_id):
|
|
396 |
duration = str(duration)
|
397 |
category_id = int(category_id)
|
398 |
clean_new_title = preprocess(title)
|
399 |
-
# Join the preprocessed words back into a string
|
400 |
clean_new_title_str = ' '.join(clean_new_title)
|
401 |
-
# Prepare the input data
|
402 |
data = {
|
403 |
'cleanTitle': [clean_new_title_str],
|
404 |
'titleLength' : [len(title)],
|
|
|
11 |
# Load the model
|
12 |
def read_model(region):
|
13 |
if(region == "United States"):
|
14 |
+
model = joblib.load(r'C:\Users\LEGION\Desktop\MMU\Data Science Fundamental\Project\Prediction of Video\85pct.pkl')
|
15 |
return model
|
16 |
|
17 |
# Define the categories
|
|
|
21 |
'Music': 10,
|
22 |
'Pets & Animals': 15,
|
23 |
'Sports' : 17,
|
|
|
24 |
'Travel & Events' : 19,
|
25 |
'Gaming' : 20,
|
|
|
26 |
'People & Blogs' : 22,
|
27 |
'Comedy' : 23,
|
28 |
'Entertainment' : 24,
|
|
|
101 |
)
|
102 |
st.markdown("<body><img style = 'max-width: 20%;max-height: 20%;text-align: center;' src=\"https://media.tenor.com/U7OFq772kIEAAAAj/sweet-dreams.gif\"></body>",unsafe_allow_html=True)
|
103 |
st.markdown("<h1>YouTube Trend Prediction</h1>", unsafe_allow_html=True)
|
104 |
+
|
|
|
105 |
|
106 |
@st.cache_data
|
107 |
def convert_df(df):
|
|
|
129 |
category_id = metadata['category_id'].iloc[0]
|
130 |
getThumbnailUrl = metadata['thumbnail_link'].iloc[0]
|
131 |
getCategory = int(category_id)
|
132 |
+
getDescription = metadata['description'].iloc[0]
|
133 |
|
134 |
if getThumbnailUrl is not None:
|
135 |
picture = get_picture_from_url(getThumbnailUrl)
|
|
|
142 |
"Category", list(categories.keys()), index=list(categories.values()).index(getCategory)
|
143 |
)
|
144 |
|
145 |
+
|
146 |
with col3:
|
147 |
picture = st.file_uploader("Upload Picture", type=["jpg", "jpeg", "png"])
|
148 |
if picture is not None:
|
|
|
192 |
|
193 |
with tab2:
|
194 |
if df is not None:
|
|
|
195 |
selected_video_title = st.selectbox("Select a Video", df['title'])
|
196 |
selected_video = df[df['title'] == selected_video_title].iloc[0]
|
197 |
else:
|
|
|
253 |
return None
|
254 |
|
255 |
def get_top_category():
|
256 |
+
topCategory = pd.read_csv(r'C:\Users\LEGION\Desktop\MMU\Data Science Fundamental\Project\Prediction of Video\topCategory.csv')
|
|
|
257 |
topCategory_sorted = topCategory.sort_values('predicted_prob')
|
|
|
|
|
258 |
topCategory_sorted['rank'] = range(1, len(topCategory_sorted) + 1)
|
|
|
259 |
topCategory_sorted['category_name'] = topCategory_sorted['category_id'].map(lambda x: next((key for key, value in categories.items() if value == x), 'Unknown Category'))
|
260 |
return topCategory_sorted
|
261 |
|
262 |
def show_top_category():
|
263 |
topCategory_sorted = get_top_category()
|
264 |
|
|
|
265 |
color_palette = sns.color_palette('Set2', len(topCategory_sorted['category_id'].unique()))
|
266 |
|
|
|
267 |
fig, ax = plt.subplots(figsize=(8, 5))
|
268 |
sns.barplot(data=topCategory_sorted, x='rank', y='predicted_prob', hue='category_name', palette=color_palette)
|
269 |
plt.xlabel('Rank')
|
270 |
plt.ylabel('Predicted Probability')
|
271 |
plt.title('Top Categories')
|
|
|
|
|
272 |
st.pyplot(fig)
|
273 |
|
274 |
def show_best_category():
|
275 |
topCategory_sorted = get_top_category()
|
276 |
+
top_3_categories = topCategory_sorted.sort_values('predicted_prob', ascending=False).head(3)
|
277 |
top_3_categories = top_3_categories['category_name'].head(3)
|
278 |
st.header("Top 3 Categories")
|
|
|
279 |
for category_id in top_3_categories:
|
280 |
color = '#339933' if category_id == top_3_categories.iloc[0] else '#ffcc33' if category_id == top_3_categories.iloc[1] else '#ff9900'
|
281 |
st.write(f"<span style='color:{color};font-weight:bold;'>{category_id}</span>", unsafe_allow_html=True)
|
282 |
|
283 |
def get_top_duration():
|
284 |
+
topDurationsorted = pd.read_csv(r'C:\Users\LEGION\Desktop\MMU\Data Science Fundamental\Project\Prediction of Video\topDuration.csv')
|
285 |
topDurationsorted = topDurationsorted.sort_values('predicted_prob', ascending=False)
|
286 |
return topDurationsorted
|
287 |
|
288 |
def show_top_duration():
|
289 |
topDuration_sorted = get_top_duration()
|
|
|
290 |
sns.set(style='whitegrid')
|
|
|
291 |
plt.figure(figsize=(10, 6))
|
292 |
sns.barplot(x='duration_range', y='predicted_prob',data=topDuration_sorted)
|
293 |
plt.xlabel('Duration')
|
|
|
308 |
st.write(f"<span style='color:{color};font-weight:bold;'>{range}</span>", unsafe_allow_html=True)
|
309 |
|
310 |
def get_top_title():
|
311 |
+
topTitle = pd.read_csv(r'C:\Users\LEGION\Desktop\MMU\Data Science Fundamental\Project\Prediction of Video\topTitle.csv')
|
|
|
312 |
topTitle_sorted = topTitle.sort_values('Importance Score', ascending=False)
|
313 |
return topTitle_sorted
|
314 |
|
|
|
338 |
return f"({int(start)}, {int(end)})"
|
339 |
|
340 |
def get_top_titleLength():
|
341 |
+
topTitleLength = pd.read_csv(r'C:\Users\LEGION\Desktop\MMU\Data Science Fundamental\Project\Prediction of Video\topTitleLength.csv')
|
342 |
title_length_ranges = topTitleLength['titleLength']
|
343 |
predicted_probs = topTitleLength['predicted_prob']
|
344 |
rounded_ranges = [round_interval(range_val) for range_val in title_length_ranges]
|
|
|
348 |
}
|
349 |
|
350 |
topTitleLength = pd.DataFrame(data)
|
|
|
|
|
351 |
sorted_titleLength = topTitleLength.sort_values(by='predicted_probs', ascending=False)
|
352 |
return sorted_titleLength
|
353 |
|
354 |
def show_top_titleLength():
|
355 |
topTitleLength = get_top_titleLength()
|
356 |
+
|
|
|
357 |
sns.set(style='whitegrid')
|
|
|
358 |
plt.figure(figsize=(10, 6))
|
359 |
sns.barplot(x='rounded_ranges', y='predicted_probs',data=topTitleLength)
|
360 |
plt.xlabel('Title Length Range')
|
|
|
378 |
duration = str(duration)
|
379 |
category_id = int(category_id)
|
380 |
clean_new_title = preprocess(title)
|
|
|
381 |
clean_new_title_str = ' '.join(clean_new_title)
|
|
|
382 |
data = {
|
383 |
'cleanTitle': [clean_new_title_str],
|
384 |
'titleLength' : [len(title)],
|