Spaces:
Sleeping
Sleeping
import streamlit as st | |
import pandas as pd | |
import joblib | |
from preprocessText import preprocess | |
from apiSearch import get_metadata,get_trending_videos | |
import base64 | |
import requests | |
import matplotlib.pyplot as plt | |
import numpy as np | |
import seaborn as sns | |
# Load the model | |
model = joblib.load('85pct(new).pkl') | |
# Define the categories | |
categories = { | |
'Film & Animation': 1, | |
'Autos & Vehicles': 2, | |
'Music': 10, | |
'Pets & Animals': 15, | |
'Sports' : 17, | |
'Short Movies' : 18, | |
'Travel & Events' : 19, | |
'Gaming' : 20, | |
'Videoblogging' : 21, | |
'People & Blogs' : 22, | |
'Comedy' : 23, | |
'Entertainment' : 24, | |
'News & Politics' : 25, | |
'Howto & Style' : 26, | |
'Education' : 27, | |
'Science & Technology' : 28, | |
'Nonprofits & Activism' : 29 | |
} | |
# Create the Streamlit web application | |
def main(): | |
st.set_page_config(layout="wide") | |
st.markdown( | |
f""" | |
<style> | |
@import url('https://fonts.googleapis.com/css2?family=Roboto:wght@400;700&display=swap'); | |
@import url('https://fonts.googleapis.com/css2?family=YouTube+Sans&display=swap'); | |
html, body, [class*="css"] {{ | |
font-family: 'Roboto', sans-serif; | |
}} | |
[data-testid="stAppViewContainer"] > .main {{ | |
background-color : white; | |
}} | |
p{{ | |
font-family: 'Roboto', sans-serif; | |
text-weight: bold; | |
font-size: 25px; | |
}} | |
body{{ | |
display: flex; | |
justify-content: center; | |
align-items: center; | |
text-align: center; | |
}} | |
h1{{ | |
text-align: center; | |
color: #d72324; | |
}} | |
img{{ | |
max-width: 100%; | |
max-height: 100%; | |
}} | |
.stButton > button {{ | |
background-color: #d72324; | |
color:white; | |
font-weight: bold; | |
width: 500px; | |
height: 50px; | |
}} | |
.my-container {{ | |
border: 2px solid #d72324; | |
padding: 10px; | |
}} | |
.stButton > button:hover {{ | |
background-color: white; | |
color:#d72324; | |
}} | |
</style> | |
""", | |
unsafe_allow_html=True | |
) | |
st.markdown("<body><img style = 'max-width: 20%;max-height: 20%;text-align: center;' src=\"https://media.tenor.com/U7OFq772kIEAAAAj/sweet-dreams.gif\"></body>",unsafe_allow_html=True) | |
st.markdown("<h1>YouTube Trend Prediction</h1>", unsafe_allow_html=True) | |
#https://www.freepnglogos.com/uploads/youtube-play-red-logo-png-transparent-background-6.png | |
# st.write("Enter the video details below:") | |
# Define a boolean flag variable to track prediction status | |
prediction_done = False | |
tab1, tab2, tab3 = st.tabs(["Predict", "Trending","Visualize"]) | |
# Input fields | |
with tab1: | |
with st.container(): | |
col1, col2, col3 = st.columns(3) | |
getTitle, getDuration, getCategory = "", 0.00, 1 | |
getThumbnailUrl = "" | |
with col1: | |
url = st.text_input("URL",placeholder="Enter a video url") | |
if url: | |
metadata = get_metadata(url) | |
if not metadata.empty: | |
getTitle = metadata['title'].iloc[0] | |
getDuration = metadata['duration'].iloc[0] | |
category_id = metadata['category_id'].iloc[0] | |
getThumbnailUrl = metadata['thumbnail_link'].iloc[0] | |
getCategory = int(category_id) | |
if getThumbnailUrl is not None: | |
picture = get_picture_from_url(getThumbnailUrl) | |
if picture: | |
st.image(picture, caption='Thumbnail captured',width = 400, channels="BGR") | |
with col2: | |
title = st.text_input("Title", placeholder="Enter a video title",value=getTitle) | |
duration = st.number_input("Duration (in seconds)", min_value=0.0, value=getDuration) | |
category = st.selectbox("Category", list(categories.keys()), index=list(categories.values()).index(getCategory)) | |
with col3: | |
picture = st.file_uploader("Upload Picture", type=["jpg", "jpeg", "png"]) | |
if picture is not None: | |
st.picture(picture,caption='Thumbnail Uploaded',width = 400, channels="BGR") | |
# Convert category to category ID | |
categoryId = categories[category] | |
if st.button("Predict"): | |
# Perform prediction | |
if title is None or title.strip() == "" and duration == 0: | |
st.warning("Please enter a title and duration.") | |
else: | |
if title is None or title.strip() == "": | |
st.warning("Please enter a title") | |
if duration == 0: | |
st.warning("Please enter a duration.") | |
else: | |
prediction = predict_trend(title, duration, categoryId) | |
if prediction[0] == 1: | |
st.success("This video is predicted to be a trend!") | |
st.markdown("") | |
else: | |
st.info("This video is predicted not to be a trend.") | |
st.markdown("") | |
with tab2: | |
country_code = st.selectbox("Select Country Code", ['US', 'CA', 'GB','DE', 'FR', 'RU', 'BR','IN','MY','SG','JP','KR']) | |
with st.container(): | |
st.write("Top 10 Trending Video") | |
df = get_trending_videos(country_code) | |
st.dataframe(df) | |
if df is not None: | |
# Display video titles | |
selected_video_title = st.selectbox("Select a Video", df['title']) | |
selected_video = df[df['title'] == selected_video_title].iloc[0] | |
col4,col5 = st.columns(2) | |
with col4: | |
if selected_video is not None: | |
image = get_picture_from_url(selected_video['thumbnail_link']) | |
if image: | |
st.image(image, caption='Thumbnail captured',width = 400, channels="BGR") | |
with col5: | |
st.write("Title:", selected_video['title']) | |
category_name = next((key for key, value in categories.items() if value == selected_video['category_id']), 'Unknown Category') | |
st.write("Category:", category_name) | |
st.write("Duration:", selected_video['duration']) | |
else: | |
st.error('Failed to retrieve trending videos.') | |
with tab3: | |
with st.container(): | |
col6,col7 = st.columns(2) | |
with col6: | |
show_top_category() | |
with col7: | |
show_top_duration() | |
with st.container(): | |
col8,col9 = st.columns(2) | |
with col8: | |
show_top_title() | |
with col9: | |
show_top_titleLength() | |
def get_picture_from_url(url): | |
try: | |
response = requests.get(url) | |
image_data = response.content | |
return image_data | |
except: | |
return None | |
def show_top_category(): | |
topCategory = pd.read_csv('topCategory.csv') | |
# Sort the DataFrame in ascending order based on predicted_prob column | |
topCategory_sorted = topCategory.sort_values('predicted_prob') | |
# Add a 'rank' column representing the ascending order of predicted_prob | |
topCategory_sorted['rank'] = range(1, len(topCategory_sorted) + 1) | |
# Map category_id to category name using the categories dictionary | |
topCategory_sorted['category_name'] = topCategory_sorted['category_id'].map(lambda x: next((key for key, value in categories.items() if value == x), 'Unknown Category')) | |
# Set a color palette for the plot | |
color_palette = sns.color_palette('Set2', len(topCategory_sorted['category_id'].unique())) | |
# Create a bar plot based on rank and predicted_prob columns with different colors for each category_name | |
fig, ax = plt.subplots(figsize=(8, 5)) | |
sns.barplot(data=topCategory_sorted, x='rank', y='predicted_prob', hue='category_name', palette=color_palette) | |
plt.xlabel('Rank') | |
plt.ylabel('Predicted Probability') | |
plt.title('Top Categories') | |
# Display the legend and the plot in Streamlit | |
st.pyplot(fig) | |
def show_top_duration(): | |
topDuration = pd.read_csv('topDuration.csv') | |
topDuration_sorted = topDuration.sort_values('predicted_prob', ascending=False) | |
# Set the duration as the x-axis and predicted_prob as the y-axis | |
x = topDuration_sorted['duration'] | |
y = topDuration_sorted['predicted_prob'] | |
# Create a scatter plot of duration vs predicted_prob | |
plt.figure(figsize=(8, 5)) # Adjust the figure size here (width, height) | |
plt.scatter(x, y) | |
plt.xlabel('Duration') | |
plt.ylabel('Predicted Probability') | |
plt.title('Top Durations') | |
# Display the plot in Streamlit | |
st.pyplot(plt) | |
def show_top_title(): | |
topTitle = pd.read_csv('topTitle.csv') | |
# Sort the DataFrame in ascending order based on predicted_prob column | |
topTitle_sorted = topTitle.sort_values('Importance Score') | |
plt.subplots(figsize=(5, 5)) | |
plt.barh(topTitle_sorted['Feature'], topTitle_sorted['Importance Score']) | |
plt.xlabel('Importance Score') | |
plt.ylabel('Feature') | |
plt.title('Top Title Features') | |
st.pyplot(plt) | |
def round_interval(interval_str): | |
start, end = map(float, interval_str.strip('()[]').split(',')) | |
return f"({int(start)}, {int(end)})" | |
def show_top_titleLength(): | |
topTitleLength = pd.read_csv('topTitleLength.csv') | |
title_length_ranges = topTitleLength['titleLength'] | |
predicted_probs = topTitleLength['predicted_prob'] | |
rounded_ranges = [round_interval(range_val) for range_val in title_length_ranges] | |
# Set the style of the plot | |
sns.set(style='whitegrid') | |
# Plot the graph using Seaborn | |
plt.figure(figsize=(10, 6)) | |
sns.barplot(x=rounded_ranges, y=predicted_probs) | |
plt.xlabel('Title Length Range') | |
plt.ylabel('Predicted Probability') | |
plt.title('Top 5 Ranges for Title Length vs. Predicted Probability') | |
plt.xticks(rotation=45) | |
plt.show() | |
st.pyplot(plt) | |
# Function to make predictions | |
def predict_trend(title, duration, category_id): | |
duration = str(duration) | |
category_id = int(category_id) | |
clean_new_title = preprocess(title) | |
# Join the preprocessed words back into a string | |
clean_new_title_str = ' '.join(clean_new_title) | |
# Prepare the input data | |
data = { | |
'cleanTitle': [clean_new_title_str], | |
'titleLength' : [len(title)], | |
'categoryId': [category_id], | |
'duration': [duration] | |
} | |
data = pd.DataFrame(data) | |
data['categoryId'] = data['categoryId'].astype('category') | |
data['duration'] = data['duration'].astype('float64') | |
# Make the prediction | |
print(model.predict_proba(data)) | |
prediction = model.predict(data) | |
return prediction | |
if __name__ == "__main__": | |
main() | |