File size: 3,194 Bytes
2dbfb56
7ce127e
 
 
2dbfb56
39b58b2
963a193
2dbfb56
 
 
963a193
 
2dbfb56
963a193
 
 
2dbfb56
963a193
 
 
 
 
 
 
 
 
 
 
 
 
 
2dbfb56
963a193
 
 
 
 
 
 
 
 
 
 
 
 
7ce127e
963a193
 
 
 
 
7ce127e
14cdd18
963a193
 
 
14cdd18
963a193
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
import streamlit as st
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from dataset_import import load_data
from data_cleaning import clean_data
from YTCommentScraper import get_transcript, summarize_with_langchain_and_openai

st.title("AnalyzeYT Dataset Analysis")

# Option to extract YouTube comments
extract_comments = st.radio("Do you want to extract comments from a YouTube video?", ("No", "Yes"))

if extract_comments == "Yes":
    openai_api_key = st.text_input("Enter your OpenAI API Key:", type='password')
    video_id = st.text_input("Enter the YouTube Video ID:")

    if st.button('Extract and Summarize Comments'):
        if openai_api_key and video_id:
            try:
                transcript, language_code = get_transcript(video_id)
                summary = summarize_with_langchain_and_openai(transcript, language_code, openai_api_key)
                st.write("Summary of YouTube Comments:")
                st.markdown(summary)
            except Exception as e:
                st.write(str(e))
        else:
            st.write("Please enter both your OpenAI API Key and YouTube Video ID.")
else:
    # Proceed with normal data analysis
    data = load_data()

    if data is not None:
        st.write("Loaded Data Preview:")
        st.write(data.head())
        
        # Clean data
        data = clean_data(data)
        
        st.write("Cleaned Data Preview:")
        st.write(data.head())
        
        # Show data description of cleaned data
        st.write("Cleaned Data Description:")
        st.write(data.describe())

        # Add correlation matrix for cleaned data
        st.write("Correlation Matrix of Cleaned Data:")
        corr = data.corr()
        st.write(corr)
        sns.heatmap(corr, annot=True, cmap='coolwarm')
        st.pyplot()

        # Data visualization options for cleaned data
        st.write("Data Visualization on Cleaned Data:")
        chart_type = st.selectbox("Select Chart Type", ['Line Chart', 'Bar Chart', 'Histogram'])

        if chart_type == 'Line Chart':
            x_axis = st.selectbox("Select X-axis Column", data.columns)
            y_axis = st.selectbox("Select Y-axis Column", data.columns)
            title = st.text_input("Enter Chart Title", "Line Chart")
            st.line_chart(data[[x_axis, y_axis]])
            st.write(f"Line Chart: {title}")

        elif chart_type == 'Bar Chart':
            x_axis = st.selectbox("Select X-axis Column", data.columns)
            y_axis = st.selectbox("Select Y-axis Column", data.columns)
            title = st.text_input("Enter Chart Title", "Bar Chart")
            st.bar_chart(data[[x_axis, y_axis]])
            st.write(f"Bar Chart: {title}")

        elif chart_type == 'Histogram':
            selected_column = st.selectbox("Select Column for Histogram", data.columns)
            bins = st.slider("Number of Bins", min_value=10, max_value=100, value=30)
            title = st.text_input("Enter Chart Title", "Histogram")
            plt.hist(data[selected_column], bins=bins)
            plt.title(title)
            plt.xlabel(selected_column)
            plt.ylabel('Frequency')
            st.pyplot()