Abu1998 commited on
Commit
963a193
·
verified ·
1 Parent(s): b6788e9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +64 -59
app.py CHANGED
@@ -4,72 +4,77 @@ import seaborn as sns
4
  import matplotlib.pyplot as plt
5
  from dataset_import import load_data
6
  from data_cleaning import clean_data
7
- from data_labelling import label_dataset
8
 
9
  st.title("AnalyzeYT Dataset Analysis")
10
 
11
- # Load and clean dataset
12
- data = load_data()
13
 
14
- if data is not None:
15
- st.write("Loaded Data Preview:")
16
- st.write(data.head())
17
-
18
- # Clean data
19
- data = clean_data(data)
20
-
21
- st.write("Cleaned Data Preview:")
22
- st.write(data.head())
23
-
24
- # Show data description
25
- st.write("Data Description:")
26
- st.write(data.describe())
27
 
28
- # Data visualization options
29
- st.write("Data Visualization:")
30
- chart_type = st.selectbox("Select Chart Type", ['Line Chart', 'Bar Chart', 'Histogram'])
 
 
 
 
 
 
 
 
 
 
 
31
 
32
- if chart_type == 'Line Chart':
33
- x_axis = st.selectbox("Select X-axis Column", data.columns)
34
- y_axis = st.selectbox("Select Y-axis Column", data.columns)
35
- title = st.text_input("Enter Chart Title", "Line Chart")
36
- st.line_chart(data[[x_axis, y_axis]])
37
- st.write(f"Line Chart: {title}")
38
-
39
- elif chart_type == 'Bar Chart':
40
- x_axis = st.selectbox("Select X-axis Column", data.columns)
41
- y_axis = st.selectbox("Select Y-axis Column", data.columns)
42
- title = st.text_input("Enter Chart Title", "Bar Chart")
43
- st.bar_chart(data[[x_axis, y_axis]])
44
- st.write(f"Bar Chart: {title}")
45
 
46
- elif chart_type == 'Histogram':
47
- selected_column = st.selectbox("Select Column for Histogram", data.columns)
48
- bins = st.slider("Number of Bins", min_value=10, max_value=100, value=30)
49
- title = st.text_input("Enter Chart Title", "Histogram")
50
- plt.hist(data[selected_column], bins=bins)
51
- plt.title(title)
52
- plt.xlabel(selected_column)
53
- plt.ylabel('Frequency')
54
  st.pyplot()
55
 
56
- # Add option for labeling the dataset
57
- st.write("Labeling Options:")
58
- label_option = st.radio("Do you want to label your dataset?", ('No', 'Yes'))
59
 
60
- if label_option == 'Yes':
61
- # Select columns for labeling
62
- columns_to_label = st.multiselect("Select Columns to Label", data.columns)
63
-
64
- # Input new column names for labeled data
65
- new_column_names = []
66
- for col in columns_to_label:
67
- new_name = st.text_input(f"Enter new name for labeled column '{col}':", f"{col}_labeled")
68
- new_column_names.append(new_name)
69
-
70
- output_name = st.text_input("Enter Output File Name", "labeled_dataset.csv")
71
- if st.button("Run Labeling"):
72
- labeled_data = label_dataset(data, columns_to_label, new_column_names)
73
- labeled_data.to_csv(output_name, index=False)
74
- st.write("Labeling Completed. Download your file below:")
75
- st.download_button(label="Download Labeled Dataset", data=labeled_data.to_csv(index=False), file_name=output_name, mime='text/csv')
 
 
 
 
 
 
 
 
4
  import matplotlib.pyplot as plt
5
  from dataset_import import load_data
6
  from data_cleaning import clean_data
7
+ from YTCommentScraper import get_transcript, summarize_with_langchain_and_openai
8
 
9
  st.title("AnalyzeYT Dataset Analysis")
10
 
11
+ # Option to extract YouTube comments
12
+ extract_comments = st.radio("Do you want to extract comments from a YouTube video?", ("No", "Yes"))
13
 
14
+ if extract_comments == "Yes":
15
+ openai_api_key = st.text_input("Enter your OpenAI API Key:", type='password')
16
+ video_id = st.text_input("Enter the YouTube Video ID:")
 
 
 
 
 
 
 
 
 
 
17
 
18
+ if st.button('Extract and Summarize Comments'):
19
+ if openai_api_key and video_id:
20
+ try:
21
+ transcript, language_code = get_transcript(video_id)
22
+ summary = summarize_with_langchain_and_openai(transcript, language_code, openai_api_key)
23
+ st.write("Summary of YouTube Comments:")
24
+ st.markdown(summary)
25
+ except Exception as e:
26
+ st.write(str(e))
27
+ else:
28
+ st.write("Please enter both your OpenAI API Key and YouTube Video ID.")
29
+ else:
30
+ # Proceed with normal data analysis
31
+ data = load_data()
32
 
33
+ if data is not None:
34
+ st.write("Loaded Data Preview:")
35
+ st.write(data.head())
36
+
37
+ # Clean data
38
+ data = clean_data(data)
39
+
40
+ st.write("Cleaned Data Preview:")
41
+ st.write(data.head())
42
+
43
+ # Show data description of cleaned data
44
+ st.write("Cleaned Data Description:")
45
+ st.write(data.describe())
46
 
47
+ # Add correlation matrix for cleaned data
48
+ st.write("Correlation Matrix of Cleaned Data:")
49
+ corr = data.corr()
50
+ st.write(corr)
51
+ sns.heatmap(corr, annot=True, cmap='coolwarm')
 
 
 
52
  st.pyplot()
53
 
54
+ # Data visualization options for cleaned data
55
+ st.write("Data Visualization on Cleaned Data:")
56
+ chart_type = st.selectbox("Select Chart Type", ['Line Chart', 'Bar Chart', 'Histogram'])
57
 
58
+ if chart_type == 'Line Chart':
59
+ x_axis = st.selectbox("Select X-axis Column", data.columns)
60
+ y_axis = st.selectbox("Select Y-axis Column", data.columns)
61
+ title = st.text_input("Enter Chart Title", "Line Chart")
62
+ st.line_chart(data[[x_axis, y_axis]])
63
+ st.write(f"Line Chart: {title}")
64
+
65
+ elif chart_type == 'Bar Chart':
66
+ x_axis = st.selectbox("Select X-axis Column", data.columns)
67
+ y_axis = st.selectbox("Select Y-axis Column", data.columns)
68
+ title = st.text_input("Enter Chart Title", "Bar Chart")
69
+ st.bar_chart(data[[x_axis, y_axis]])
70
+ st.write(f"Bar Chart: {title}")
71
+
72
+ elif chart_type == 'Histogram':
73
+ selected_column = st.selectbox("Select Column for Histogram", data.columns)
74
+ bins = st.slider("Number of Bins", min_value=10, max_value=100, value=30)
75
+ title = st.text_input("Enter Chart Title", "Histogram")
76
+ plt.hist(data[selected_column], bins=bins)
77
+ plt.title(title)
78
+ plt.xlabel(selected_column)
79
+ plt.ylabel('Frequency')
80
+ st.pyplot()