A2D / app.py
Abu1998's picture
Update app.py
963a193 verified
raw
history blame
3.19 kB
import streamlit as st
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from dataset_import import load_data
from data_cleaning import clean_data
from YTCommentScraper import get_transcript, summarize_with_langchain_and_openai
st.title("AnalyzeYT Dataset Analysis")
# Option to extract YouTube comments
extract_comments = st.radio("Do you want to extract comments from a YouTube video?", ("No", "Yes"))
if extract_comments == "Yes":
openai_api_key = st.text_input("Enter your OpenAI API Key:", type='password')
video_id = st.text_input("Enter the YouTube Video ID:")
if st.button('Extract and Summarize Comments'):
if openai_api_key and video_id:
try:
transcript, language_code = get_transcript(video_id)
summary = summarize_with_langchain_and_openai(transcript, language_code, openai_api_key)
st.write("Summary of YouTube Comments:")
st.markdown(summary)
except Exception as e:
st.write(str(e))
else:
st.write("Please enter both your OpenAI API Key and YouTube Video ID.")
else:
# Proceed with normal data analysis
data = load_data()
if data is not None:
st.write("Loaded Data Preview:")
st.write(data.head())
# Clean data
data = clean_data(data)
st.write("Cleaned Data Preview:")
st.write(data.head())
# Show data description of cleaned data
st.write("Cleaned Data Description:")
st.write(data.describe())
# Add correlation matrix for cleaned data
st.write("Correlation Matrix of Cleaned Data:")
corr = data.corr()
st.write(corr)
sns.heatmap(corr, annot=True, cmap='coolwarm')
st.pyplot()
# Data visualization options for cleaned data
st.write("Data Visualization on Cleaned Data:")
chart_type = st.selectbox("Select Chart Type", ['Line Chart', 'Bar Chart', 'Histogram'])
if chart_type == 'Line Chart':
x_axis = st.selectbox("Select X-axis Column", data.columns)
y_axis = st.selectbox("Select Y-axis Column", data.columns)
title = st.text_input("Enter Chart Title", "Line Chart")
st.line_chart(data[[x_axis, y_axis]])
st.write(f"Line Chart: {title}")
elif chart_type == 'Bar Chart':
x_axis = st.selectbox("Select X-axis Column", data.columns)
y_axis = st.selectbox("Select Y-axis Column", data.columns)
title = st.text_input("Enter Chart Title", "Bar Chart")
st.bar_chart(data[[x_axis, y_axis]])
st.write(f"Bar Chart: {title}")
elif chart_type == 'Histogram':
selected_column = st.selectbox("Select Column for Histogram", data.columns)
bins = st.slider("Number of Bins", min_value=10, max_value=100, value=30)
title = st.text_input("Enter Chart Title", "Histogram")
plt.hist(data[selected_column], bins=bins)
plt.title(title)
plt.xlabel(selected_column)
plt.ylabel('Frequency')
st.pyplot()