Spaces:

xinah3131
/

youtube-trend-prediction

Sleeping

App Files Files Community

xinah3131 commited on Jun 25, 2023

Commit

fc7ea67

1 Parent(s): d0a28e7

Update app.py

Browse files

Files changed (1) hide show

app.py +10 -30

app.py CHANGED Viewed

@@ -11,7 +11,7 @@ import seaborn as sns
 # Load the model
 def read_model(region):
     if(region == "United States"):
-        model = joblib.load('85pct(2).pkl')
     return model
 # Define the categories
@@ -21,10 +21,8 @@ categories = {
     'Music': 10,
     'Pets & Animals': 15,
     'Sports' : 17,
-    'Short Movies' : 18,
     'Travel & Events' : 19,
     'Gaming' : 20,
-    'Videoblogging' : 21,
     'People & Blogs' : 22,
     'Comedy' : 23,
     'Entertainment' : 24,
@@ -103,8 +101,7 @@ def main():
     )
     st.markdown("<body><img style = 'max-width: 20%;max-height: 20%;text-align: center;' src=\"https://media.tenor.com/U7OFq772kIEAAAAj/sweet-dreams.gif\"></body>",unsafe_allow_html=True)
     st.markdown("<h1>YouTube Trend Prediction</h1>", unsafe_allow_html=True)
-    #https://www.freepnglogos.com/uploads/youtube-play-red-logo-png-transparent-background-6.png
-    # st.write("Enter the video details below:")
     @st.cache_data
     def convert_df(df):
@@ -132,6 +129,7 @@ def main():
                         category_id = metadata['category_id'].iloc[0]
                         getThumbnailUrl = metadata['thumbnail_link'].iloc[0]
                         getCategory = int(category_id)
                         if getThumbnailUrl is not None:
                             picture = get_picture_from_url(getThumbnailUrl)
@@ -144,6 +142,7 @@ def main():
                     "Category", list(categories.keys()), index=list(categories.values()).index(getCategory)
                 )
             with col3:
                 picture = st.file_uploader("Upload Picture", type=["jpg", "jpeg", "png"])
                 if picture is not None:
@@ -193,7 +192,6 @@ def main():
             with tab2:
                 if df is not None:
-                    # Display video titles
                     selected_video_title = st.selectbox("Select a Video", df['title'])
                     selected_video = df[df['title'] == selected_video_title].iloc[0]
                 else:
@@ -255,52 +253,41 @@ def get_picture_from_url(url):
         return None
 def get_top_category():
-    topCategory = pd.read_csv('topCategory.csv')
-    # Sort the DataFrame in ascending order based on predicted_prob column
     topCategory_sorted = topCategory.sort_values('predicted_prob')
-    # Add a 'rank' column representing the ascending order of predicted_prob
     topCategory_sorted['rank'] = range(1, len(topCategory_sorted) + 1)
-    # Map category_id to category name using the categories dictionary
     topCategory_sorted['category_name'] = topCategory_sorted['category_id'].map(lambda x: next((key for key, value in categories.items() if value == x), 'Unknown Category'))
     return topCategory_sorted
 def show_top_category():
     topCategory_sorted = get_top_category()
-    # Set a color palette for the plot
     color_palette = sns.color_palette('Set2', len(topCategory_sorted['category_id'].unique()))
-    # Create a bar plot based on rank and predicted_prob columns with different colors for each category_name
     fig, ax = plt.subplots(figsize=(8, 5))
     sns.barplot(data=topCategory_sorted, x='rank', y='predicted_prob', hue='category_name', palette=color_palette)
     plt.xlabel('Rank')
     plt.ylabel('Predicted Probability')
     plt.title('Top Categories')
-    # Display the legend and the plot in Streamlit
     st.pyplot(fig)
 def show_best_category():
     topCategory_sorted = get_top_category()
-    top_3_categories = topCategory_sorted.sort_values('predicted_prob', ascending=True).head(3)
     top_3_categories = top_3_categories['category_name'].head(3)
     st.header("Top 3 Categories")
-    # Display the top 3 category IDs with colorful formatting in Streamlit
     for category_id in top_3_categories:
         color = '#339933' if category_id == top_3_categories.iloc[0] else '#ffcc33' if category_id == top_3_categories.iloc[1] else '#ff9900'
         st.write(f"<span style='color:{color};font-weight:bold;'>{category_id}</span>", unsafe_allow_html=True)
 def get_top_duration():
-    topDurationsorted = pd.read_csv('topDuration.csv')
     topDurationsorted = topDurationsorted.sort_values('predicted_prob', ascending=False)
     return topDurationsorted
 def show_top_duration():
     topDuration_sorted = get_top_duration()
-    # Set the style of the plot
     sns.set(style='whitegrid')
-    # Plot the graph using Seaborn
     plt.figure(figsize=(10, 6))
     sns.barplot(x='duration_range', y='predicted_prob',data=topDuration_sorted)
     plt.xlabel('Duration')
@@ -321,8 +308,7 @@ def show_best_duration():
         st.write(f"<span style='color:{color};font-weight:bold;'>{range}</span>", unsafe_allow_html=True)
 def get_top_title():
-    topTitle = pd.read_csv('topTitle.csv')
-    # Sort the DataFrame in ascending order based on predicted_prob column
     topTitle_sorted = topTitle.sort_values('Importance Score', ascending=False)
     return topTitle_sorted
@@ -352,7 +338,7 @@ def round_interval(interval_str):
     return f"({int(start)}, {int(end)})"
 def get_top_titleLength():
-    topTitleLength = pd.read_csv('topTitleLength.csv')
     title_length_ranges = topTitleLength['titleLength']
     predicted_probs = topTitleLength['predicted_prob']
     rounded_ranges = [round_interval(range_val) for range_val in title_length_ranges]
@@ -362,17 +348,13 @@ def get_top_titleLength():
     }
     topTitleLength = pd.DataFrame(data)
-    # Sort the DataFrame by predicted_probs in descending order
     sorted_titleLength = topTitleLength.sort_values(by='predicted_probs', ascending=False)
     return sorted_titleLength
 def show_top_titleLength():
     topTitleLength = get_top_titleLength()
-    # Set the style of the plot
     sns.set(style='whitegrid')
-    # Plot the graph using Seaborn
     plt.figure(figsize=(10, 6))
     sns.barplot(x='rounded_ranges', y='predicted_probs',data=topTitleLength)
     plt.xlabel('Title Length Range')
@@ -396,9 +378,7 @@ def predict_trend(model,title, duration, category_id):
     duration = str(duration)
     category_id = int(category_id)
     clean_new_title = preprocess(title)
-    # Join the preprocessed words back into a string
     clean_new_title_str = ' '.join(clean_new_title)
-    # Prepare the input data
     data = {
         'cleanTitle': [clean_new_title_str],
         'titleLength' : [len(title)],

 # Load the model
 def read_model(region):
     if(region == "United States"):
+        model = joblib.load(r'C:\Users\LEGION\Desktop\MMU\Data Science Fundamental\Project\Prediction of Video\85pct.pkl')
     return model
 # Define the categories
     'Music': 10,
     'Pets & Animals': 15,
     'Sports' : 17,
     'Travel & Events' : 19,
     'Gaming' : 20,
     'People & Blogs' : 22,
     'Comedy' : 23,
     'Entertainment' : 24,
     )
     st.markdown("<body><img style = 'max-width: 20%;max-height: 20%;text-align: center;' src=\"https://media.tenor.com/U7OFq772kIEAAAAj/sweet-dreams.gif\"></body>",unsafe_allow_html=True)
     st.markdown("<h1>YouTube Trend Prediction</h1>", unsafe_allow_html=True)
     @st.cache_data
     def convert_df(df):
                         category_id = metadata['category_id'].iloc[0]
                         getThumbnailUrl = metadata['thumbnail_link'].iloc[0]
                         getCategory = int(category_id)
+                        getDescription = metadata['description'].iloc[0]
                         if getThumbnailUrl is not None:
                             picture = get_picture_from_url(getThumbnailUrl)
                     "Category", list(categories.keys()), index=list(categories.values()).index(getCategory)
                 )
             with col3:
                 picture = st.file_uploader("Upload Picture", type=["jpg", "jpeg", "png"])
                 if picture is not None:
             with tab2:
                 if df is not None:
                     selected_video_title = st.selectbox("Select a Video", df['title'])
                     selected_video = df[df['title'] == selected_video_title].iloc[0]
                 else:
         return None
 def get_top_category():
+    topCategory = pd.read_csv(r'C:\Users\LEGION\Desktop\MMU\Data Science Fundamental\Project\Prediction of Video\topCategory.csv')
     topCategory_sorted = topCategory.sort_values('predicted_prob')
     topCategory_sorted['rank'] = range(1, len(topCategory_sorted) + 1)
     topCategory_sorted['category_name'] = topCategory_sorted['category_id'].map(lambda x: next((key for key, value in categories.items() if value == x), 'Unknown Category'))
     return topCategory_sorted
 def show_top_category():
     topCategory_sorted = get_top_category()
     color_palette = sns.color_palette('Set2', len(topCategory_sorted['category_id'].unique()))
     fig, ax = plt.subplots(figsize=(8, 5))
     sns.barplot(data=topCategory_sorted, x='rank', y='predicted_prob', hue='category_name', palette=color_palette)
     plt.xlabel('Rank')
     plt.ylabel('Predicted Probability')
     plt.title('Top Categories')
     st.pyplot(fig)
 def show_best_category():
     topCategory_sorted = get_top_category()
+    top_3_categories = topCategory_sorted.sort_values('predicted_prob', ascending=False).head(3)
     top_3_categories = top_3_categories['category_name'].head(3)
     st.header("Top 3 Categories")
     for category_id in top_3_categories:
         color = '#339933' if category_id == top_3_categories.iloc[0] else '#ffcc33' if category_id == top_3_categories.iloc[1] else '#ff9900'
         st.write(f"<span style='color:{color};font-weight:bold;'>{category_id}</span>", unsafe_allow_html=True)
 def get_top_duration():
+    topDurationsorted = pd.read_csv(r'C:\Users\LEGION\Desktop\MMU\Data Science Fundamental\Project\Prediction of Video\topDuration.csv')
     topDurationsorted = topDurationsorted.sort_values('predicted_prob', ascending=False)
     return topDurationsorted
 def show_top_duration():
     topDuration_sorted = get_top_duration()
     sns.set(style='whitegrid')
     plt.figure(figsize=(10, 6))
     sns.barplot(x='duration_range', y='predicted_prob',data=topDuration_sorted)
     plt.xlabel('Duration')
         st.write(f"<span style='color:{color};font-weight:bold;'>{range}</span>", unsafe_allow_html=True)
 def get_top_title():
+    topTitle = pd.read_csv(r'C:\Users\LEGION\Desktop\MMU\Data Science Fundamental\Project\Prediction of Video\topTitle.csv')
     topTitle_sorted = topTitle.sort_values('Importance Score', ascending=False)
     return topTitle_sorted
     return f"({int(start)}, {int(end)})"
 def get_top_titleLength():
+    topTitleLength = pd.read_csv(r'C:\Users\LEGION\Desktop\MMU\Data Science Fundamental\Project\Prediction of Video\topTitleLength.csv')
     title_length_ranges = topTitleLength['titleLength']
     predicted_probs = topTitleLength['predicted_prob']
     rounded_ranges = [round_interval(range_val) for range_val in title_length_ranges]
     }
     topTitleLength = pd.DataFrame(data)
     sorted_titleLength = topTitleLength.sort_values(by='predicted_probs', ascending=False)
     return sorted_titleLength
 def show_top_titleLength():
     topTitleLength = get_top_titleLength()
     sns.set(style='whitegrid')
     plt.figure(figsize=(10, 6))
     sns.barplot(x='rounded_ranges', y='predicted_probs',data=topTitleLength)
     plt.xlabel('Title Length Range')
     duration = str(duration)
     category_id = int(category_id)
     clean_new_title = preprocess(title)
     clean_new_title_str = ' '.join(clean_new_title)
     data = {
         'cleanTitle': [clean_new_title_str],
         'titleLength' : [len(title)],