Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -4,10 +4,11 @@ import seaborn as sns
|
|
4 |
import matplotlib.pyplot as plt
|
5 |
from dataset_import import load_data
|
6 |
from data_cleaning import clean_data
|
|
|
7 |
|
8 |
st.title("AnalyzeYT Dataset Analysis")
|
9 |
|
10 |
-
# Load dataset
|
11 |
data = load_data()
|
12 |
|
13 |
if data is not None:
|
@@ -20,19 +21,12 @@ if data is not None:
|
|
20 |
st.write("Cleaned Data Preview:")
|
21 |
st.write(data.head())
|
22 |
|
23 |
-
# Show data description
|
24 |
-
st.write("
|
25 |
st.write(data.describe())
|
26 |
|
27 |
-
#
|
28 |
-
st.write("
|
29 |
-
corr = data.corr()
|
30 |
-
st.write(corr)
|
31 |
-
sns.heatmap(corr, annot=True, cmap='coolwarm')
|
32 |
-
st.pyplot()
|
33 |
-
|
34 |
-
# Data visualization options for cleaned data
|
35 |
-
st.write("Data Visualization on Cleaned Data:")
|
36 |
chart_type = st.selectbox("Select Chart Type", ['Line Chart', 'Bar Chart', 'Histogram'])
|
37 |
|
38 |
if chart_type == 'Line Chart':
|
@@ -58,3 +52,24 @@ if data is not None:
|
|
58 |
plt.xlabel(selected_column)
|
59 |
plt.ylabel('Frequency')
|
60 |
st.pyplot()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
4 |
import matplotlib.pyplot as plt
|
5 |
from dataset_import import load_data
|
6 |
from data_cleaning import clean_data
|
7 |
+
from data_labelling import label_dataset
|
8 |
|
9 |
st.title("AnalyzeYT Dataset Analysis")
|
10 |
|
11 |
+
# Load and clean dataset
|
12 |
data = load_data()
|
13 |
|
14 |
if data is not None:
|
|
|
21 |
st.write("Cleaned Data Preview:")
|
22 |
st.write(data.head())
|
23 |
|
24 |
+
# Show data description
|
25 |
+
st.write("Data Description:")
|
26 |
st.write(data.describe())
|
27 |
|
28 |
+
# Data visualization options
|
29 |
+
st.write("Data Visualization:")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
30 |
chart_type = st.selectbox("Select Chart Type", ['Line Chart', 'Bar Chart', 'Histogram'])
|
31 |
|
32 |
if chart_type == 'Line Chart':
|
|
|
52 |
plt.xlabel(selected_column)
|
53 |
plt.ylabel('Frequency')
|
54 |
st.pyplot()
|
55 |
+
|
56 |
+
# Add option for labeling the dataset
|
57 |
+
st.write("Labeling Options:")
|
58 |
+
label_option = st.radio("Do you want to label your dataset?", ('No', 'Yes'))
|
59 |
+
|
60 |
+
if label_option == 'Yes':
|
61 |
+
# Select columns for labeling
|
62 |
+
columns_to_label = st.multiselect("Select Columns to Label", data.columns)
|
63 |
+
|
64 |
+
# Input new column names for labeled data
|
65 |
+
new_column_names = []
|
66 |
+
for col in columns_to_label:
|
67 |
+
new_name = st.text_input(f"Enter new name for labeled column '{col}':", f"{col}_labeled")
|
68 |
+
new_column_names.append(new_name)
|
69 |
+
|
70 |
+
output_name = st.text_input("Enter Output File Name", "labeled_dataset.csv")
|
71 |
+
if st.button("Run Labeling"):
|
72 |
+
labeled_data = label_dataset(data, columns_to_label, new_column_names)
|
73 |
+
labeled_data.to_csv(output_name, index=False)
|
74 |
+
st.write("Labeling Completed. Download your file below:")
|
75 |
+
st.download_button(label="Download Labeled Dataset", data=labeled_data.to_csv(), file_name=output_name, mime='text/csv')
|