Spaces:

xinah3131
/

youtube-trend-prediction

Sleeping

App Files Files Community

xinah3131 commited on Jun 7, 2023

Commit

add8b88

1 Parent(s): c39a1ac

Update app.py

Browse files

Files changed (1) hide show

app.py +148 -61

app.py CHANGED Viewed

@@ -10,7 +10,7 @@ import numpy as np
 import seaborn as sns
 # Load the model
-model = joblib.load('85pct(new).pkl')
 # Define the categories
 categories = {
@@ -96,54 +96,59 @@ def main():
     # st.write("Enter the video details below:")
     # Define a boolean flag variable to track prediction status
-    prediction_done = False
-    tab1, tab2, tab3 = st.tabs(["Predict", "Trending","Visualize"])
     # Input fields
-    with tab1:
         with st.container():
             col1, col2, col3 = st.columns(3)
             getTitle, getDuration, getCategory = "", 0.00, 1
             getThumbnailUrl = ""
             with col1:
-                url = st.text_input("URL",placeholder="Enter a video url")
                 if url:
                     metadata = get_metadata(url)
                     if not metadata.empty:
                         getTitle = metadata['title'].iloc[0]
                         getDuration = metadata['duration'].iloc[0]
                         category_id = metadata['category_id'].iloc[0]
                         getThumbnailUrl = metadata['thumbnail_link'].iloc[0]
                         getCategory = int(category_id)
                         if getThumbnailUrl is not None:
                             picture = get_picture_from_url(getThumbnailUrl)
                             if picture:
-                                st.image(picture, caption='Thumbnail captured',width = 400, channels="BGR")
             with col2:
-                title = st.text_input("Title", placeholder="Enter a video title",value=getTitle)
                 duration = st.number_input("Duration (in seconds)", min_value=0.0, value=getDuration)
-                category = st.selectbox("Category", list(categories.keys()), index=list(categories.values()).index(getCategory))
             with col3:
                 picture = st.file_uploader("Upload Picture", type=["jpg", "jpeg", "png"])
                 if picture is not None:
-                    st.picture(picture,caption='Thumbnail Uploaded',width = 400, channels="BGR")
-    # Convert category to category ID
         categoryId = categories[category]
         if st.button("Predict"):
             # Perform prediction
             if title is None or title.strip() == "" and duration == 0:
                 st.warning("Please enter a title and duration.")
             else:
                 if title is None or title.strip() == "":
                     st.warning("Please enter a title")
                 if duration == 0:
                     st.warning("Please enter a duration.")
                 else:
                     prediction = predict_trend(title, duration, categoryId)
                     if prediction[0] == 1:
@@ -152,50 +157,72 @@ def main():
                     else:
                         st.info("This video is predicted not to be a trend.")
                         st.markdown("![Alt Text](https://media.tenor.com/VYKtkKnHaUcAAAAj/quby-cute.gif)")
-    with tab2:
-        country_code = st.selectbox("Select Country Code", ['US', 'CA', 'GB','DE', 'FR', 'RU', 'BR','IN','MY','SG','JP','KR'])
         with st.container():
-            st.write("Top 10 Trending Video")
-            df = get_trending_videos(country_code)
-            st.dataframe(df)
-            if df is not None:
-                # Display video titles
-                selected_video_title = st.selectbox("Select a Video", df['title'])
-                selected_video = df[df['title'] == selected_video_title].iloc[0]
-                col4,col5 = st.columns(2)
                 with col4:
                     if selected_video is not None:
                         image = get_picture_from_url(selected_video['thumbnail_link'])
                         if image:
-                            st.image(image, caption='Thumbnail captured',width = 400, channels="BGR")
                 with col5:
                     st.write("Title:", selected_video['title'])
-                    category_name = next((key for key, value in categories.items() if value == selected_video['category_id']), 'Unknown Category')
                     st.write("Category:", category_name)
                     st.write("Duration:", selected_video['duration'])
-            else:
-                st.error('Failed to retrieve trending videos.')
-        with tab3:
-            with st.container():
-                col6,col7 = st.columns(2)
                 with col6:
                     show_top_category()
                 with col7:
                     show_top_duration()
-            with st.container():
-                col8,col9 = st.columns(2)
-                with col8:
                     show_top_title()
-                with col9:
                     show_top_titleLength()
 def get_picture_from_url(url):
     try:
@@ -205,8 +232,8 @@ def get_picture_from_url(url):
     except:
         return None
-def show_top_category():
-    topCategory = pd.read_csv('topCategory.csv')
     # Sort the DataFrame in ascending order based on predicted_prob column
     topCategory_sorted = topCategory.sort_values('predicted_prob')
@@ -214,6 +241,10 @@ def show_top_category():
     topCategory_sorted['rank'] = range(1, len(topCategory_sorted) + 1)
     # Map category_id to category name using the categories dictionary
     topCategory_sorted['category_name'] = topCategory_sorted['category_id'].map(lambda x: next((key for key, value in categories.items() if value == x), 'Unknown Category'))
     # Set a color palette for the plot
     color_palette = sns.color_palette('Set2', len(topCategory_sorted['category_id'].unique()))
@@ -228,28 +259,53 @@ def show_top_category():
     # Display the legend and the plot in Streamlit
     st.pyplot(fig)
-def show_top_duration():
-    topDuration = pd.read_csv('topDuration.csv')
-    topDuration_sorted = topDuration.sort_values('predicted_prob', ascending=False)
-    # Set the duration as the x-axis and predicted_prob as the y-axis
-    x = topDuration_sorted['duration']
-    y = topDuration_sorted['predicted_prob']
-    # Create a scatter plot of duration vs predicted_prob using seaborn
-    plt.figure(figsize=(8, 5))  # Adjust the figure size here (width, height)
-    sns.scatterplot(x=x, y=y, palette='coolwarm')  # Use coolwarm palette for colorful plot
     plt.xlabel('Duration')
     plt.ylabel('Predicted Probability')
     plt.title('Top Durations')
-    # Display the plot in Streamlit
     st.pyplot(plt)
-def show_top_title():
-    topTitle = pd.read_csv('topTitle.csv')
     # Sort the DataFrame in ascending order based on predicted_prob column
-    topTitle_sorted = topTitle.sort_values('Importance Score')
     sns.set(style="whitegrid")
     plt.figure(figsize=(8, 6))
     sns.barplot(x='Importance Score', y='Feature', data=topTitle_sorted, palette="rocket")
@@ -259,22 +315,44 @@ def show_top_title():
     plt.tight_layout()
     st.pyplot(plt)
 def round_interval(interval_str):
     start, end = map(float, interval_str.strip('()[]').split(','))
     return f"({int(start)}, {int(end)})"
-def show_top_titleLength():
-    topTitleLength = pd.read_csv('topTitleLength.csv')
     title_length_ranges = topTitleLength['titleLength']
     predicted_probs = topTitleLength['predicted_prob']
     rounded_ranges = [round_interval(range_val) for range_val in title_length_ranges]
     # Set the style of the plot
     sns.set(style='whitegrid')
     # Plot the graph using Seaborn
     plt.figure(figsize=(10, 6))
-    sns.barplot(x=rounded_ranges, y=predicted_probs)
     plt.xlabel('Title Length Range')
     plt.ylabel('Predicted Probability')
     plt.title('Top 5 Ranges for Title Length vs. Predicted Probability')
@@ -282,6 +360,15 @@ def show_top_titleLength():
     plt.show()
     st.pyplot(plt)
 # Function to make predictions
 def predict_trend(title, duration, category_id):
     duration = str(duration)

 import seaborn as sns
 # Load the model
+model = joblib.load('85pct(2).pkl')
 # Define the categories
 categories = {
     # st.write("Enter the video details below:")
     # Define a boolean flag variable to track prediction status
+    # Sidebar menu options
+    menu_options = ["Predict", "Trending", "Visualize"]
+    selected_option = st.sidebar.selectbox("Menu", menu_options)
     # Input fields
+    if selected_option == "Predict":
         with st.container():
             col1, col2, col3 = st.columns(3)
             getTitle, getDuration, getCategory = "", 0.00, 1
             getThumbnailUrl = ""
             with col1:
+                url = st.text_input("URL", placeholder="Enter a video URL")
                 if url:
                     metadata = get_metadata(url)
                     if not metadata.empty:
                         getTitle = metadata['title'].iloc[0]
                         getDuration = metadata['duration'].iloc[0]
                         category_id = metadata['category_id'].iloc[0]
                         getThumbnailUrl = metadata['thumbnail_link'].iloc[0]
                         getCategory = int(category_id)
                         if getThumbnailUrl is not None:
                             picture = get_picture_from_url(getThumbnailUrl)
                             if picture:
+                                st.image(picture, caption='Thumbnail captured', width=320, channels="BGR")
             with col2:
+                title = st.text_input("Title", placeholder="Enter a video title", value=getTitle)
                 duration = st.number_input("Duration (in seconds)", min_value=0.0, value=getDuration)
+                category = st.selectbox(
+                    "Category", list(categories.keys()), index=list(categories.values()).index(getCategory)
+                )
             with col3:
                 picture = st.file_uploader("Upload Picture", type=["jpg", "jpeg", "png"])
                 if picture is not None:
+                    st.picture(picture, caption='Thumbnail Uploaded', width=400, channels="BGR")
+        # Convert category to category ID
         categoryId = categories[category]
         if st.button("Predict"):
             # Perform prediction
             if title is None or title.strip() == "" and duration == 0:
                 st.warning("Please enter a title and duration.")
             else:
                 if title is None or title.strip() == "":
                     st.warning("Please enter a title")
                 if duration == 0:
                     st.warning("Please enter a duration.")
                 else:
                     prediction = predict_trend(title, duration, categoryId)
                     if prediction[0] == 1:
                     else:
                         st.info("This video is predicted not to be a trend.")
                         st.markdown("![Alt Text](https://media.tenor.com/VYKtkKnHaUcAAAAj/quby-cute.gif)")
+    elif selected_option == "Trending":
+        tab1, tab2 = st.tabs(["Trending Board", "Video Info"])
+        country_code = st.sidebar.selectbox("Select Country Code", ['US', 'CA', 'GB', 'DE', 'FR', 'RU', 'BR', 'IN', 'MY', 'SG', 'JP', 'KR'])
         with st.container():
+            with tab1:
+                st.write("Top 10 Trending Videos")
+                df = get_trending_videos(country_code)
+                st.dataframe(df)
+            with tab2:
+                if df is not None:
+                    # Display video titles
+                    selected_video_title = st.selectbox("Select a Video", df['title'])
+                    selected_video = df[df['title'] == selected_video_title].iloc[0]
+                else:
+                    st.error('Failed to retrieve trending videos.')
+                col4, col5 = st.columns(2)
                 with col4:
                     if selected_video is not None:
                         image = get_picture_from_url(selected_video['thumbnail_link'])
                         if image:
+                            st.image(image, caption='Thumbnail captured', width=400, channels="BGR")
                 with col5:
                     st.write("Title:", selected_video['title'])
+                    category_name = next(
+                        (key for key, value in categories.items() if value == selected_video['category_id']), 'Unknown Category'
+                    )
                     st.write("Category:", category_name)
                     st.write("Duration:", selected_video['duration'])
+    elif selected_option == "Visualize":
+        with st.container():
+            tab3, tab4, tab5, tab6 = st.tabs(["Best Category", "Best Duration","Best Title","Best Title Length"])
+            with tab3:
+                col6, col7 = st.columns(2)
                 with col6:
                     show_top_category()
                 with col7:
+                    show_best_category()
+            with tab4:
+                col8, col9 = st.columns(2)
+                with col8:
                     show_top_duration()
+                with col9:
+                    show_best_duration()
+            with tab5:
+                col10, col11 = st.columns(2)
+                with col10:
                     show_top_title()
+                with col11:
+                    show_best_title()
+            with tab6:
+                col12, col13 = st.columns(2)
+                with col12:
                     show_top_titleLength()
+                with col13:
+                    show_best_titleLength()
 def get_picture_from_url(url):
     try:
     except:
         return None
+def get_top_category():
+    topCategory = pd.read_csv(r'C:\Users\LEGION\Desktop\MMU\Data Science Fundamental\Project\Prediction of Video\topCategory.csv')
     # Sort the DataFrame in ascending order based on predicted_prob column
     topCategory_sorted = topCategory.sort_values('predicted_prob')
     topCategory_sorted['rank'] = range(1, len(topCategory_sorted) + 1)
     # Map category_id to category name using the categories dictionary
     topCategory_sorted['category_name'] = topCategory_sorted['category_id'].map(lambda x: next((key for key, value in categories.items() if value == x), 'Unknown Category'))
+    return topCategory_sorted
+def show_top_category():
+    topCategory_sorted = get_top_category()
     # Set a color palette for the plot
     color_palette = sns.color_palette('Set2', len(topCategory_sorted['category_id'].unique()))
     # Display the legend and the plot in Streamlit
     st.pyplot(fig)
+def show_best_category():
+    topCategory_sorted = get_top_category()
+    top_3_categories = topCategory_sorted.sort_values('predicted_prob', ascending=True).head(3)
+    top_3_categories = top_3_categories['category_name'].head(3)
+    st.header("Top 3 Categories")
+    # Display the top 3 category IDs with colorful formatting in Streamlit
+    for category_id in top_3_categories:
+        color = '#339933' if category_id == top_3_categories.iloc[0] else '#ffcc33' if category_id == top_3_categories.iloc[1] else '#ff9900'
+        st.write(f"<span style='color:{color};font-weight:bold;'>{category_id}</span>", unsafe_allow_html=True)
+def get_top_duration():
+    topDurationsorted = pd.read_csv(r'C:\Users\LEGION\Desktop\MMU\Data Science Fundamental\Project\Prediction of Video\topDuration.csv')
+    topDurationsorted = topDurationsorted.sort_values('predicted_prob', ascending=False)
+    return topDurationsorted
+def show_top_duration():
+    topDuration_sorted = get_top_duration()
+    # Set the style of the plot
+    sns.set(style='whitegrid')
+    # Plot the graph using Seaborn
+    plt.figure(figsize=(10, 6))
+    sns.barplot(x='duration_range', y='predicted_prob',data=topDuration_sorted)
     plt.xlabel('Duration')
     plt.ylabel('Predicted Probability')
     plt.title('Top Durations')
+    plt.xticks(rotation=45)
+    plt.show()
     st.pyplot(plt)
+def show_best_duration():
+    topDurationRange = get_top_duration()
+    top_3_durationRange = topDurationRange.sort_values('predicted_prob', ascending=False).head(3)
+    top_3_range = top_3_durationRange['duration_range'].head(3)
+    st.header("Top 3 Duration Range")
+    for range in top_3_range:
+        color = '#339933' if range == top_3_range.iloc[0] else '#ffcc33' if range == top_3_range.iloc[1] else '#ff9900'
+        st.write(f"<span style='color:{color};font-weight:bold;'>{range}</span>", unsafe_allow_html=True)
+def get_top_title():
+    topTitle = pd.read_csv(r'C:\Users\LEGION\Desktop\MMU\Data Science Fundamental\Project\Prediction of Video\topTitle.csv')
     # Sort the DataFrame in ascending order based on predicted_prob column
+    topTitle_sorted = topTitle.sort_values('Importance Score', ascending=False)
+    return topTitle_sorted
+def show_top_title():
+    topTitle_sorted = get_top_title()
     sns.set(style="whitegrid")
     plt.figure(figsize=(8, 6))
     sns.barplot(x='Importance Score', y='Feature', data=topTitle_sorted, palette="rocket")
     plt.tight_layout()
     st.pyplot(plt)
+def show_best_title():
+    topTitle_sorted = get_top_title()
+    top_3_keyword = topTitle_sorted.sort_values('Importance Score', ascending=False).head(3)
+    top_3_keyword = topTitle_sorted['Feature'].head(3)
+    st.header("Top 3 Keyword")
+    for feature in top_3_keyword:
+        color = '#339933' if feature == top_3_keyword.iloc[0] else '#ffcc33' if feature == top_3_keyword.iloc[1] else '#ff9900'
+        st.write(f"<span style='color:{color};font-weight:bold;'>{feature}</span>", unsafe_allow_html=True)
 def round_interval(interval_str):
     start, end = map(float, interval_str.strip('()[]').split(','))
     return f"({int(start)}, {int(end)})"
+def get_top_titleLength():
+    topTitleLength = pd.read_csv(r'C:\Users\LEGION\Desktop\MMU\Data Science Fundamental\Project\Prediction of Video\topTitleLength.csv')
     title_length_ranges = topTitleLength['titleLength']
     predicted_probs = topTitleLength['predicted_prob']
     rounded_ranges = [round_interval(range_val) for range_val in title_length_ranges]
+    data = {
+        'rounded_ranges': rounded_ranges,
+        'predicted_probs': predicted_probs
+    }
+    topTitleLength = pd.DataFrame(data)
+    # Sort the DataFrame by predicted_probs in descending order
+    sorted_titleLength = topTitleLength.sort_values(by='predicted_probs', ascending=False)
+    return sorted_titleLength
+def show_top_titleLength():
+    topTitleLength = get_top_titleLength()
     # Set the style of the plot
     sns.set(style='whitegrid')
     # Plot the graph using Seaborn
     plt.figure(figsize=(10, 6))
+    sns.barplot(x='rounded_ranges', y='predicted_probs',data=topTitleLength)
     plt.xlabel('Title Length Range')
     plt.ylabel('Predicted Probability')
     plt.title('Top 5 Ranges for Title Length vs. Predicted Probability')
     plt.show()
     st.pyplot(plt)
+def show_best_titleLength():
+    topTitleLength = get_top_titleLength()
+    top_3_titleLength = topTitleLength.sort_values('predicted_probs', ascending=False).head(3)
+    top_3_range = top_3_titleLength['rounded_ranges'].head(3)
+    st.header("Top 3 Title Length Range")
+    for range in top_3_range:
+        color = '#339933' if range == top_3_range.iloc[0] else '#ffcc33' if range == top_3_range.iloc[1] else '#ff9900'
+        st.write(f"<span style='color:{color};font-weight:bold;'>{range}</span>", unsafe_allow_html=True)
 # Function to make predictions
 def predict_trend(title, duration, category_id):
     duration = str(duration)