ProfessorLeVesseur commited on
Commit
7c26327
1 Parent(s): 78f318b

Update data_processor.py

Browse files
Files changed (1) hide show
  1. data_processor.py +117 -25
data_processor.py CHANGED
@@ -117,9 +117,86 @@ class DataProcessor:
117
  else:
118
  return 'Unknown'
119
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
120
  def compute_student_metrics(self, df):
121
  intervention_column = self.get_intervention_column(df)
122
- intervention_df = df[df[intervention_column].str.strip().str.lower().isin(self.YES_RESPONSES)] # Modified line
123
  intervention_sessions_held = len(intervention_df)
124
  student_columns = [col for col in df.columns if col.startswith('Student Attendance')]
125
 
@@ -129,55 +206,70 @@ class DataProcessor:
129
  student_data = intervention_df[[col]].copy()
130
  student_data[col] = student_data[col].fillna('Absent')
131
 
132
- attendance_values = student_data[col].apply(lambda x: 1 if self.classify_engagement(x) in [
133
- self.ENGAGED_STR,
134
- self.PARTIALLY_ENGAGED_STR,
135
- self.NOT_ENGAGED_STR
136
- ] else 0)
 
 
137
 
138
  sessions_attended = attendance_values.sum()
139
  attendance_pct = (sessions_attended / intervention_sessions_held * 100) if intervention_sessions_held > 0 else 0
140
  attendance_pct = round(attendance_pct)
141
 
 
142
  engagement_counts = {
143
  self.ENGAGED_STR: 0,
144
  self.PARTIALLY_ENGAGED_STR: 0,
145
- self.NOT_ENGAGED_STR: 0,
146
- 'Absent': 0
147
  }
148
 
149
- for x in student_data[col]:
150
- classified_engagement = self.classify_engagement(x)
151
- if classified_engagement in engagement_counts:
152
- engagement_counts[classified_engagement] += 1
153
- else:
154
- engagement_counts['Absent'] += 1 # Count as Absent if not engaged
155
 
156
- total_sessions = sum(engagement_counts.values())
157
-
158
- engaged_pct = (engagement_counts[self.ENGAGED_STR] / total_sessions * 100) if total_sessions > 0 else 0
 
 
 
159
  engaged_pct = round(engaged_pct)
160
 
161
- partially_engaged_pct = (engagement_counts[self.PARTIALLY_ENGAGED_STR] / total_sessions * 100) if total_sessions > 0 else 0
 
 
 
162
  partially_engaged_pct = round(partially_engaged_pct)
163
 
164
- not_engaged_pct = (engagement_counts[self.NOT_ENGAGED_STR] / total_sessions * 100) if total_sessions > 0 else 0
 
 
 
165
  not_engaged_pct = round(not_engaged_pct)
166
 
167
- absent_pct = (engagement_counts['Absent'] / total_sessions * 100) if total_sessions > 0 else 0
168
- absent_pct = round(absent_pct)
169
-
170
  # Engagement percentage is based on Engaged and Partially Engaged sessions
171
- engagement_pct = ((engagement_counts[self.ENGAGED_STR] + engagement_counts[self.PARTIALLY_ENGAGED_STR]) / total_sessions * 100) if total_sessions > 0 else 0
 
 
 
172
  engagement_pct = round(engagement_pct)
173
 
 
 
 
 
 
174
  # Determine if the student attended ≥ 90% of sessions
175
  attended_90 = "Yes" if attendance_pct >= 90 else "No"
176
 
177
  # Determine if the student was engaged ≥ 80% of the time
178
  engaged_80 = "Yes" if engagement_pct >= 80 else "No"
179
 
180
- # Store metrics in the required order
181
  student_metrics[student_name] = {
182
  'Attended ≥ 90%': attended_90,
183
  'Engagement ≥ 80%': engaged_80,
@@ -193,7 +285,7 @@ class DataProcessor:
193
  student_metrics_df = pd.DataFrame.from_dict(student_metrics, orient='index').reset_index()
194
  student_metrics_df.rename(columns={'index': 'Student'}, inplace=True)
195
  return student_metrics_df
196
-
197
  def compute_average_metrics(self, student_metrics_df):
198
  # Calculate the attendance and engagement average percentages across students
199
  attendance_avg_stats = student_metrics_df['Attendance (%)'].mean() # Average attendance percentage
 
117
  else:
118
  return 'Unknown'
119
 
120
+ # def compute_student_metrics(self, df):
121
+ # intervention_column = self.get_intervention_column(df)
122
+ # intervention_df = df[df[intervention_column].str.strip().str.lower().isin(self.YES_RESPONSES)] # Modified line
123
+ # intervention_sessions_held = len(intervention_df)
124
+ # student_columns = [col for col in df.columns if col.startswith('Student Attendance')]
125
+
126
+ # student_metrics = {}
127
+ # for col in student_columns:
128
+ # student_name = col.replace('Student Attendance [', '').replace(']', '').strip()
129
+ # student_data = intervention_df[[col]].copy()
130
+ # student_data[col] = student_data[col].fillna('Absent')
131
+
132
+ # attendance_values = student_data[col].apply(lambda x: 1 if self.classify_engagement(x) in [
133
+ # self.ENGAGED_STR,
134
+ # self.PARTIALLY_ENGAGED_STR,
135
+ # self.NOT_ENGAGED_STR
136
+ # ] else 0)
137
+
138
+ # sessions_attended = attendance_values.sum()
139
+ # attendance_pct = (sessions_attended / intervention_sessions_held * 100) if intervention_sessions_held > 0 else 0
140
+ # attendance_pct = round(attendance_pct)
141
+
142
+ # engagement_counts = {
143
+ # self.ENGAGED_STR: 0,
144
+ # self.PARTIALLY_ENGAGED_STR: 0,
145
+ # self.NOT_ENGAGED_STR: 0,
146
+ # 'Absent': 0
147
+ # }
148
+
149
+ # for x in student_data[col]:
150
+ # classified_engagement = self.classify_engagement(x)
151
+ # if classified_engagement in engagement_counts:
152
+ # engagement_counts[classified_engagement] += 1
153
+ # else:
154
+ # engagement_counts['Absent'] += 1 # Count as Absent if not engaged
155
+
156
+ # total_sessions = sum(engagement_counts.values())
157
+
158
+ # engaged_pct = (engagement_counts[self.ENGAGED_STR] / total_sessions * 100) if total_sessions > 0 else 0
159
+ # engaged_pct = round(engaged_pct)
160
+
161
+ # partially_engaged_pct = (engagement_counts[self.PARTIALLY_ENGAGED_STR] / total_sessions * 100) if total_sessions > 0 else 0
162
+ # partially_engaged_pct = round(partially_engaged_pct)
163
+
164
+ # not_engaged_pct = (engagement_counts[self.NOT_ENGAGED_STR] / total_sessions * 100) if total_sessions > 0 else 0
165
+ # not_engaged_pct = round(not_engaged_pct)
166
+
167
+ # absent_pct = (engagement_counts['Absent'] / total_sessions * 100) if total_sessions > 0 else 0
168
+ # absent_pct = round(absent_pct)
169
+
170
+ # # Engagement percentage is based on Engaged and Partially Engaged sessions
171
+ # engagement_pct = ((engagement_counts[self.ENGAGED_STR] + engagement_counts[self.PARTIALLY_ENGAGED_STR]) / total_sessions * 100) if total_sessions > 0 else 0
172
+ # engagement_pct = round(engagement_pct)
173
+
174
+ # # Determine if the student attended ≥ 90% of sessions
175
+ # attended_90 = "Yes" if attendance_pct >= 90 else "No"
176
+
177
+ # # Determine if the student was engaged ≥ 80% of the time
178
+ # engaged_80 = "Yes" if engagement_pct >= 80 else "No"
179
+
180
+ # # Store metrics in the required order
181
+ # student_metrics[student_name] = {
182
+ # 'Attended ≥ 90%': attended_90,
183
+ # 'Engagement ≥ 80%': engaged_80,
184
+ # 'Attendance (%)': attendance_pct,
185
+ # 'Engagement (%)': engagement_pct,
186
+ # f'{self.ENGAGED_STR} (%)': engaged_pct,
187
+ # f'{self.PARTIALLY_ENGAGED_STR} (%)': partially_engaged_pct,
188
+ # f'{self.NOT_ENGAGED_STR} (%)': not_engaged_pct,
189
+ # 'Absent (%)': absent_pct
190
+ # }
191
+
192
+ # # Create a DataFrame from student_metrics
193
+ # student_metrics_df = pd.DataFrame.from_dict(student_metrics, orient='index').reset_index()
194
+ # student_metrics_df.rename(columns={'index': 'Student'}, inplace=True)
195
+ # return student_metrics_df
196
+
197
  def compute_student_metrics(self, df):
198
  intervention_column = self.get_intervention_column(df)
199
+ intervention_df = df[df[intervention_column].str.strip().str.lower().isin(self.YES_RESPONSES)]
200
  intervention_sessions_held = len(intervention_df)
201
  student_columns = [col for col in df.columns if col.startswith('Student Attendance')]
202
 
 
206
  student_data = intervention_df[[col]].copy()
207
  student_data[col] = student_data[col].fillna('Absent')
208
 
209
+ # Classify each entry
210
+ student_data['Engagement'] = student_data[col].apply(self.classify_engagement)
211
+
212
+ # Calculate attendance
213
+ attendance_values = student_data['Engagement'].apply(
214
+ lambda x: 1 if x in [self.ENGAGED_STR, self.PARTIALLY_ENGAGED_STR, self.NOT_ENGAGED_STR] else 0
215
+ )
216
 
217
  sessions_attended = attendance_values.sum()
218
  attendance_pct = (sessions_attended / intervention_sessions_held * 100) if intervention_sessions_held > 0 else 0
219
  attendance_pct = round(attendance_pct)
220
 
221
+ # Engagement counts (excluding 'Absent')
222
  engagement_counts = {
223
  self.ENGAGED_STR: 0,
224
  self.PARTIALLY_ENGAGED_STR: 0,
225
+ self.NOT_ENGAGED_STR: 0
 
226
  }
227
 
228
+ # Count the engagement types, excluding 'Absent'
229
+ for x in student_data['Engagement']:
230
+ if x in engagement_counts:
231
+ engagement_counts[x] += 1
232
+ # 'Absent' is not counted in engagement_counts
 
233
 
234
+ total_present_sessions = sum(engagement_counts.values())
235
+
236
+ engaged_pct = (
237
+ (engagement_counts[self.ENGAGED_STR] / total_present_sessions * 100)
238
+ if total_present_sessions > 0 else 0
239
+ )
240
  engaged_pct = round(engaged_pct)
241
 
242
+ partially_engaged_pct = (
243
+ (engagement_counts[self.PARTIALLY_ENGAGED_STR] / total_present_sessions * 100)
244
+ if total_present_sessions > 0 else 0
245
+ )
246
  partially_engaged_pct = round(partially_engaged_pct)
247
 
248
+ not_engaged_pct = (
249
+ (engagement_counts[self.NOT_ENGAGED_STR] / total_present_sessions * 100)
250
+ if total_present_sessions > 0 else 0
251
+ )
252
  not_engaged_pct = round(not_engaged_pct)
253
 
 
 
 
254
  # Engagement percentage is based on Engaged and Partially Engaged sessions
255
+ engagement_pct = (
256
+ ((engagement_counts[self.ENGAGED_STR] + engagement_counts[self.PARTIALLY_ENGAGED_STR]) / total_present_sessions * 100)
257
+ if total_present_sessions > 0 else 0
258
+ )
259
  engagement_pct = round(engagement_pct)
260
 
261
+ # Absent percentage (for reference, not used in engagement calculation)
262
+ absent_sessions = student_data['Engagement'].value_counts().get('Absent', 0)
263
+ absent_pct = (absent_sessions / intervention_sessions_held * 100) if intervention_sessions_held > 0 else 0
264
+ absent_pct = round(absent_pct)
265
+
266
  # Determine if the student attended ≥ 90% of sessions
267
  attended_90 = "Yes" if attendance_pct >= 90 else "No"
268
 
269
  # Determine if the student was engaged ≥ 80% of the time
270
  engaged_80 = "Yes" if engagement_pct >= 80 else "No"
271
 
272
+ # Store metrics
273
  student_metrics[student_name] = {
274
  'Attended ≥ 90%': attended_90,
275
  'Engagement ≥ 80%': engaged_80,
 
285
  student_metrics_df = pd.DataFrame.from_dict(student_metrics, orient='index').reset_index()
286
  student_metrics_df.rename(columns={'index': 'Student'}, inplace=True)
287
  return student_metrics_df
288
+
289
  def compute_average_metrics(self, student_metrics_df):
290
  # Calculate the attendance and engagement average percentages across students
291
  attendance_avg_stats = student_metrics_df['Attendance (%)'].mean() # Average attendance percentage