Spaces:

xinah3131
/

youtube-trend-prediction

Sleeping

App Files Files Community

youtube-trend-prediction / app.py

xinah3131

Update app.py

cdd172b almost 2 years ago

raw

history blame

11.6 kB

	import streamlit as st
	import pandas as pd
	import joblib
	from preprocessText import preprocess
	from apiSearch import get_metadata,get_trending_videos
	import base64
	import requests
	import matplotlib.pyplot as plt
	import numpy as np
	import seaborn as sns
	# Load the model

	model = joblib.load('85pct(new).pkl')

	# Define the categories
	categories = {
	'Film & Animation': 1,
	'Autos & Vehicles': 2,
	'Music': 10,
	'Pets & Animals': 15,
	'Sports' : 17,
	'Short Movies' : 18,
	'Travel & Events' : 19,
	'Gaming' : 20,
	'Videoblogging' : 21,
	'People & Blogs' : 22,
	'Comedy' : 23,
	'Entertainment' : 24,
	'News & Politics' : 25,
	'Howto & Style' : 26,
	'Education' : 27,
	'Science & Technology' : 28,
	'Nonprofits & Activism' : 29
	}


	# Create the Streamlit web application
	def main():
	st.set_page_config(layout="wide")
	st.markdown(
	f"""

	<style>
	@import url('https://fonts.googleapis.com/css2?family=Roboto:wght@400;700&display=swap');
	@import url('https://fonts.googleapis.com/css2?family=YouTube+Sans&display=swap');
	html, body, [class*="css"] {{
	font-family: 'Roboto', sans-serif;

	}}
	[data-testid="stAppViewContainer"] > .main {{
	background-color : white;

	}}
	p{{
	font-family: 'Roboto', sans-serif;
	text-weight: bold;
	font-size: 25px;
	}}
	body{{
	display: flex;
	justify-content: center;
	align-items: center;
	text-align: center;
	}}
	h1{{
	text-align: center;
	color: #d72324;
	}}
	img{{
	max-width: 100%;
	max-height: 100%;
	}}
	.stButton > button {{
	background-color: #d72324;
	color:white;
	font-weight: bold;
	width: 500px;
	height: 50px;
	}}
	.my-container {{
	border: 2px solid #d72324;
	padding: 10px;
	}}
	.stButton > button:hover {{
	background-color: white;
	color:#d72324;
	}}

	</style>
	""",
	unsafe_allow_html=True
	)
	st.markdown("<body><img style = 'max-width: 20%;max-height: 20%;text-align: center;' src=\"https://media.tenor.com/U7OFq772kIEAAAAj/sweet-dreams.gif\"></body>",unsafe_allow_html=True)
	st.markdown("<h1>YouTube Trend Prediction</h1>", unsafe_allow_html=True)
	#https://www.freepnglogos.com/uploads/youtube-play-red-logo-png-transparent-background-6.png
	# st.write("Enter the video details below:")

	# Define a boolean flag variable to track prediction status
	prediction_done = False
	tab1, tab2, tab3 = st.tabs(["Predict", "Trending","Visualize"])
	# Input fields
	with tab1:
	with st.container():
	col1, col2, col3 = st.columns(3)
	getTitle, getDuration, getCategory = "", 0.00, 1
	getThumbnailUrl = ""
	with col1:
	url = st.text_input("URL",placeholder="Enter a video url")
	if url:
	metadata = get_metadata(url)
	if not metadata.empty:

	getTitle = metadata['title'].iloc[0]
	getDuration = metadata['duration'].iloc[0]
	category_id = metadata['category_id'].iloc[0]
	getThumbnailUrl = metadata['thumbnail_link'].iloc[0]
	getCategory = int(category_id)

	if getThumbnailUrl is not None:
	picture = get_picture_from_url(getThumbnailUrl)
	if picture:
	st.image(picture, caption='Thumbnail captured',width = 400, channels="BGR")
	with col2:
	title = st.text_input("Title", placeholder="Enter a video title",value=getTitle)
	duration = st.number_input("Duration (in seconds)", min_value=0.0, value=getDuration)
	category = st.selectbox("Category", list(categories.keys()), index=list(categories.values()).index(getCategory))

	with col3:
	picture = st.file_uploader("Upload Picture", type=["jpg", "jpeg", "png"])
	if picture is not None:
	st.picture(picture,caption='Thumbnail Uploaded',width = 400, channels="BGR")
	# Convert category to category ID
	categoryId = categories[category]

	if st.button("Predict"):
	# Perform prediction
	if title is None or title.strip() == "" and duration == 0:
	st.warning("Please enter a title and duration.")

	else:
	if title is None or title.strip() == "":
	st.warning("Please enter a title")

	if duration == 0:
	st.warning("Please enter a duration.")

	else:
	prediction = predict_trend(title, duration, categoryId)
	if prediction[0] == 1:
	st.success("This video is predicted to be a trend!")
	st.markdown("![Alt Text](https://media.tenor.com/Cyi2zT7wcmcAAAAj/pentol-gif-eak.gif)")
	else:
	st.info("This video is predicted not to be a trend.")
	st.markdown("![Alt Text](https://media.tenor.com/VYKtkKnHaUcAAAAj/quby-cute.gif)")


	with tab2:
	country_code = st.selectbox("Select Country Code", ['US', 'CA', 'GB','DE', 'FR', 'RU', 'BR','IN','MY','SG','JP','KR'])
	with st.container():
	st.write("Top 10 Trending Video")
	df = get_trending_videos(country_code)
	st.dataframe(df)
	if df is not None:
	# Display video titles
	selected_video_title = st.selectbox("Select a Video", df['title'])
	selected_video = df[df['title'] == selected_video_title].iloc[0]

	col4,col5 = st.columns(2)
	with col4:
	if selected_video is not None:
	image = get_picture_from_url(selected_video['thumbnail_link'])
	if image:
	st.image(image, caption='Thumbnail captured',width = 400, channels="BGR")
	with col5:
	st.write("Title:", selected_video['title'])
	category_name = next((key for key, value in categories.items() if value == selected_video['category_id']), 'Unknown Category')
	st.write("Category:", category_name)
	st.write("Duration:", selected_video['duration'])
	else:
	st.error('Failed to retrieve trending videos.')

	with tab3:
	with st.container():
	col6,col7 = st.columns(2)

	with col6:
	show_top_category()

	with col7:
	show_top_duration()

	with st.container():
	col8,col9 = st.columns(2)
	with col8:
	show_top_title()

	with col9:
	show_top_titleLength()

	def get_picture_from_url(url):
	try:
	response = requests.get(url)
	image_data = response.content
	return image_data
	except:
	return None

	def show_top_category():
	topCategory = pd.read_csv('topCategory.csv')
	# Sort the DataFrame in ascending order based on predicted_prob column
	topCategory_sorted = topCategory.sort_values('predicted_prob')

	# Add a 'rank' column representing the ascending order of predicted_prob
	topCategory_sorted['rank'] = range(1, len(topCategory_sorted) + 1)
	# Map category_id to category name using the categories dictionary
	topCategory_sorted['category_name'] = topCategory_sorted['category_id'].map(lambda x: next((key for key, value in categories.items() if value == x), 'Unknown Category'))

	# Set a color palette for the plot
	color_palette = sns.color_palette('Set2', len(topCategory_sorted['category_id'].unique()))

	# Create a bar plot based on rank and predicted_prob columns with different colors for each category_name
	fig, ax = plt.subplots(figsize=(8, 5))
	sns.barplot(data=topCategory_sorted, x='rank', y='predicted_prob', hue='category_name', palette=color_palette)
	plt.xlabel('Rank')
	plt.ylabel('Predicted Probability')
	plt.title('Top Categories')

	# Display the legend and the plot in Streamlit
	st.pyplot(fig)

	def show_top_duration():
	topDuration = pd.read_csv('topDuration.csv')
	topDuration_sorted = topDuration.sort_values('predicted_prob', ascending=False)

	# Set the duration as the x-axis and predicted_prob as the y-axis
	x = topDuration_sorted['duration']
	y = topDuration_sorted['predicted_prob']

	# Create a scatter plot of duration vs predicted_prob
	plt.figure(figsize=(8, 5)) # Adjust the figure size here (width, height)
	plt.scatter(x, y)
	plt.xlabel('Duration')
	plt.ylabel('Predicted Probability')
	plt.title('Top Durations')

	# Display the plot in Streamlit
	st.pyplot(plt)

	def show_top_title():
	topTitle = pd.read_csv('topTitle.csv')
	# Sort the DataFrame in ascending order based on predicted_prob column
	topTitle_sorted = topTitle.sort_values('Importance Score')

	plt.subplots(figsize=(5, 5))
	plt.barh(topTitle_sorted['Feature'], topTitle_sorted['Importance Score'])
	plt.xlabel('Importance Score')
	plt.ylabel('Feature')
	plt.title('Top Title Features')
	st.pyplot(plt)


	def round_interval(interval_str):
	start, end = map(float, interval_str.strip('()[]').split(','))
	return f"({int(start)}, {int(end)})"

	def show_top_titleLength():
	topTitleLength = pd.read_csv('topTitleLength.csv')

	title_length_ranges = topTitleLength['titleLength']
	predicted_probs = topTitleLength['predicted_prob']
	rounded_ranges = [round_interval(range_val) for range_val in title_length_ranges]
	# Set the style of the plot
	sns.set(style='whitegrid')
	# Plot the graph using Seaborn
	plt.figure(figsize=(10, 6))
	sns.barplot(x=rounded_ranges, y=predicted_probs)
	plt.xlabel('Title Length Range')
	plt.ylabel('Predicted Probability')
	plt.title('Top 5 Ranges for Title Length vs. Predicted Probability')
	plt.xticks(rotation=45)
	plt.show()
	st.pyplot(plt)

	# Function to make predictions
	def predict_trend(title, duration, category_id):
	duration = str(duration)
	category_id = int(category_id)
	clean_new_title = preprocess(title)
	# Join the preprocessed words back into a string
	clean_new_title_str = ' '.join(clean_new_title)
	# Prepare the input data
	data = {
	'cleanTitle': [clean_new_title_str],
	'titleLength' : [len(title)],
	'categoryId': [category_id],
	'duration': [duration]
	}
	data = pd.DataFrame(data)
	data['categoryId'] = data['categoryId'].astype('category')
	data['duration'] = data['duration'].astype('float64')
	# Make the prediction
	print(model.predict_proba(data))
	prediction = model.predict(data)
	return prediction

	if __name__ == "__main__":
	main()