Update data_processor.py
Browse files- data_processor.py +15 -4
data_processor.py
CHANGED
|
@@ -184,14 +184,16 @@
|
|
| 184 |
|
| 185 |
|
| 186 |
|
| 187 |
-
|
| 188 |
import re
|
| 189 |
import pandas as pd
|
| 190 |
import os
|
| 191 |
from huggingface_hub import InferenceClient
|
| 192 |
|
| 193 |
class DataProcessor:
|
| 194 |
-
|
|
|
|
|
|
|
|
|
|
| 195 |
ENGAGED_STR = 'Engaged'
|
| 196 |
PARTIALLY_ENGAGED_STR = 'Partially Engaged'
|
| 197 |
NOT_ENGAGED_STR = 'Not Engaged'
|
|
@@ -202,7 +204,7 @@ class DataProcessor:
|
|
| 202 |
raise ValueError("HF_API_KEY not set in environment variables")
|
| 203 |
self.client = InferenceClient(api_key=self.hf_api_key)
|
| 204 |
self.student_metrics_df = student_metrics_df
|
| 205 |
-
|
| 206 |
|
| 207 |
def read_excel(self, uploaded_file):
|
| 208 |
return pd.read_excel(uploaded_file)
|
|
@@ -256,9 +258,17 @@ class DataProcessor:
|
|
| 256 |
df.columns = updated_columns
|
| 257 |
return df
|
| 258 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 259 |
def compute_intervention_statistics(self, df):
|
|
|
|
| 260 |
total_days = len(df)
|
| 261 |
-
sessions_held = df[
|
| 262 |
intervention_frequency = (sessions_held / total_days) * 100 if total_days > 0 else 0
|
| 263 |
return pd.DataFrame({
|
| 264 |
'Intervention Dosage (%)': [round(intervention_frequency, 0)],
|
|
@@ -266,6 +276,7 @@ class DataProcessor:
|
|
| 266 |
'Intervention Sessions Not Held': [total_days - sessions_held],
|
| 267 |
'Total Number of Days Available': [total_days]
|
| 268 |
})
|
|
|
|
| 269 |
|
| 270 |
def classify_engagement(self, engagement_str):
|
| 271 |
engagement_str = engagement_str.lower()
|
|
|
|
| 184 |
|
| 185 |
|
| 186 |
|
|
|
|
| 187 |
import re
|
| 188 |
import pandas as pd
|
| 189 |
import os
|
| 190 |
from huggingface_hub import InferenceClient
|
| 191 |
|
| 192 |
class DataProcessor:
|
| 193 |
+
INTERVENTION_COLUMN_OPTIONS = [
|
| 194 |
+
'Did the intervention happen today?',
|
| 195 |
+
'Did the Intervention Take Place Today?'
|
| 196 |
+
]
|
| 197 |
ENGAGED_STR = 'Engaged'
|
| 198 |
PARTIALLY_ENGAGED_STR = 'Partially Engaged'
|
| 199 |
NOT_ENGAGED_STR = 'Not Engaged'
|
|
|
|
| 204 |
raise ValueError("HF_API_KEY not set in environment variables")
|
| 205 |
self.client = InferenceClient(api_key=self.hf_api_key)
|
| 206 |
self.student_metrics_df = student_metrics_df
|
| 207 |
+
self.intervention_column = None # Will be set when processing data
|
| 208 |
|
| 209 |
def read_excel(self, uploaded_file):
|
| 210 |
return pd.read_excel(uploaded_file)
|
|
|
|
| 258 |
df.columns = updated_columns
|
| 259 |
return df
|
| 260 |
|
| 261 |
+
def find_intervention_column(self, df):
|
| 262 |
+
for column in self.INTERVENTION_COLUMN_OPTIONS:
|
| 263 |
+
if column in df.columns:
|
| 264 |
+
self.intervention_column = column
|
| 265 |
+
return column
|
| 266 |
+
raise ValueError("No intervention column found in the dataframe.")
|
| 267 |
+
|
| 268 |
def compute_intervention_statistics(self, df):
|
| 269 |
+
intervention_column = self.find_intervention_column(df)
|
| 270 |
total_days = len(df)
|
| 271 |
+
sessions_held = df[intervention_column].str.strip().str.lower().eq('yes').sum()
|
| 272 |
intervention_frequency = (sessions_held / total_days) * 100 if total_days > 0 else 0
|
| 273 |
return pd.DataFrame({
|
| 274 |
'Intervention Dosage (%)': [round(intervention_frequency, 0)],
|
|
|
|
| 276 |
'Intervention Sessions Not Held': [total_days - sessions_held],
|
| 277 |
'Total Number of Days Available': [total_days]
|
| 278 |
})
|
| 279 |
+
|
| 280 |
|
| 281 |
def classify_engagement(self, engagement_str):
|
| 282 |
engagement_str = engagement_str.lower()
|