Tonic commited on
Commit
084000d
·
verified ·
1 Parent(s): cdc0df1

attempts to identify callbacks bug or trainer bug

Browse files
Files changed (1) hide show
  1. trainer.py +20 -15
trainer.py CHANGED
@@ -94,22 +94,26 @@ class SmolLM3Trainer:
94
  eval_loss = metrics.get('eval_loss', 'N/A')
95
  print(f"📊 Evaluation at step {step}: eval_loss={eval_loss}")
96
 
97
- # Add simple console callback
98
- callbacks.append(SimpleConsoleCallback())
99
- logger.info("Added simple console monitoring callback")
100
 
101
- # Try to add Trackio callback if available
102
- if self.monitor and self.monitor.enable_tracking:
103
- try:
104
- trackio_callback = self.monitor.create_monitoring_callback()
105
- if trackio_callback:
106
- callbacks.append(trackio_callback)
107
- logger.info("Added Trackio monitoring callback")
108
- else:
109
- logger.warning("Failed to create Trackio callback")
110
- except Exception as e:
111
- logger.error(f"Error creating Trackio callback: {e}")
112
- logger.info("Continuing with console monitoring only")
 
 
 
 
113
 
114
  # Try standard Trainer first (more stable with callbacks)
115
  try:
@@ -181,6 +185,7 @@ class SmolLM3Trainer:
181
 
182
  # Start training
183
  try:
 
184
  train_result = self.trainer.train()
185
 
186
  # Save the final model
 
94
  eval_loss = metrics.get('eval_loss', 'N/A')
95
  print(f"📊 Evaluation at step {step}: eval_loss={eval_loss}")
96
 
97
+ # Temporarily disable callbacks to debug the issue
98
+ callbacks = []
99
+ logger.info("Callbacks disabled for debugging")
100
 
101
+ # # Add simple console callback
102
+ # callbacks.append(SimpleConsoleCallback())
103
+ # logger.info("Added simple console monitoring callback")
104
+ #
105
+ # # Try to add Trackio callback if available
106
+ # if self.monitor and self.monitor.enable_tracking:
107
+ # try:
108
+ # trackio_callback = self.monitor.create_monitoring_callback()
109
+ # if trackio_callback:
110
+ # callbacks.append(trackio_callback)
111
+ # logger.info("Added Trackio monitoring callback")
112
+ # else:
113
+ # logger.warning("Failed to create Trackio callback")
114
+ # except Exception as e:
115
+ # logger.error(f"Error creating Trackio callback: {e}")
116
+ # logger.info("Continuing with console monitoring only")
117
 
118
  # Try standard Trainer first (more stable with callbacks)
119
  try:
 
185
 
186
  # Start training
187
  try:
188
+ logger.info("About to start trainer.train()")
189
  train_result = self.trainer.train()
190
 
191
  # Save the final model