Spaces:

Abu1998
/

A2D

Sleeping

Abu1998 commited on Aug 15, 2024

Commit

49bf9dd

verified ·

1 Parent(s): 67c445f

Update data_cleaning.py

Files changed (1) hide show

data_cleaning.py CHANGED Viewed

@@ -10,19 +10,13 @@ def clean_data(df):
     df['ToWhomTheyReplied'] = df['ToWhomTheyReplied'].fillna(df['Channel'])
     # Drop rows where 'Reply' column is missing
-    before = df.shape[0]
     df.dropna(subset=['Reply'], inplace=True)
-    after = df.shape[0]
     # Calculate comment and reply lengths
     df['comment_length'] = df['Comment'].str.len()
     df['reply_length'] = df['Reply'].str.len()
     # Remove duplicate rows
-    num_duplicates = df.duplicated().sum()
-    df_deduplicated = df.drop_duplicates()
-    # Print number of duplicates
-    print('Number of duplicate rows:', num_duplicates)
-    return df_deduplicated

     df['ToWhomTheyReplied'] = df['ToWhomTheyReplied'].fillna(df['Channel'])
     # Drop rows where 'Reply' column is missing
     df.dropna(subset=['Reply'], inplace=True)
     # Calculate comment and reply lengths
     df['comment_length'] = df['Comment'].str.len()
     df['reply_length'] = df['Reply'].str.len()
     # Remove duplicate rows
+    df = df.drop_duplicates()
+    return df