Tonic commited on
Commit
d4bee15
·
verified ·
1 Parent(s): 2248f2d

try to resolve the issue with sftt trainer or trackio

Browse files
Files changed (1) hide show
  1. trainer.py +25 -21
trainer.py CHANGED
@@ -98,40 +98,44 @@ class SmolLM3Trainer:
98
  callbacks.append(SimpleConsoleCallback())
99
  logger.info("Added simple console monitoring callback")
100
 
101
- # Try to add Trackio callback if available
102
- if self.monitor and self.monitor.enable_tracking:
103
- try:
104
- trackio_callback = self.monitor.create_monitoring_callback()
105
- if trackio_callback:
106
- callbacks.append(trackio_callback)
107
- logger.info("Added Trackio monitoring callback")
108
- else:
109
- logger.warning("Failed to create Trackio callback")
110
- except Exception as e:
111
- logger.error(f"Error creating Trackio callback: {e}")
112
- logger.info("Continuing with console monitoring only")
 
113
 
114
- if self.use_sft_trainer:
115
- # Use SFTTrainer for supervised fine-tuning
116
- trainer = SFTTrainer(
117
  model=self.model.model,
 
 
118
  train_dataset=train_dataset,
119
  eval_dataset=eval_dataset,
120
- args=training_args,
121
  data_collator=data_collator,
122
  callbacks=callbacks,
123
  )
124
- else:
125
- # Use standard Trainer
126
- trainer = Trainer(
 
 
127
  model=self.model.model,
128
- tokenizer=self.model.tokenizer,
129
- args=training_args,
130
  train_dataset=train_dataset,
131
  eval_dataset=eval_dataset,
 
132
  data_collator=data_collator,
133
  callbacks=callbacks,
134
  )
 
135
 
136
  return trainer
137
 
 
98
  callbacks.append(SimpleConsoleCallback())
99
  logger.info("Added simple console monitoring callback")
100
 
101
+ # Try to add Trackio callback if available (temporarily disabled for debugging)
102
+ logger.info("Skipping Trackio callback to debug training issue")
103
+ # if self.monitor and self.monitor.enable_tracking:
104
+ # try:
105
+ # trackio_callback = self.monitor.create_monitoring_callback()
106
+ # if trackio_callback:
107
+ # callbacks.append(trackio_callback)
108
+ # logger.info("Added Trackio monitoring callback")
109
+ # else:
110
+ # logger.warning("Failed to create Trackio callback")
111
+ # except Exception as e:
112
+ # logger.error(f"Error creating Trackio callback: {e}")
113
+ # logger.info("Continuing with console monitoring only")
114
 
115
+ # Try standard Trainer first (more stable with callbacks)
116
+ try:
117
+ trainer = Trainer(
118
  model=self.model.model,
119
+ tokenizer=self.model.tokenizer,
120
+ args=training_args,
121
  train_dataset=train_dataset,
122
  eval_dataset=eval_dataset,
 
123
  data_collator=data_collator,
124
  callbacks=callbacks,
125
  )
126
+ logger.info("Using standard Hugging Face Trainer")
127
+ except Exception as e:
128
+ logger.warning(f"Standard Trainer failed: {e}")
129
+ # Fallback to SFTTrainer
130
+ trainer = SFTTrainer(
131
  model=self.model.model,
 
 
132
  train_dataset=train_dataset,
133
  eval_dataset=eval_dataset,
134
+ args=training_args,
135
  data_collator=data_collator,
136
  callbacks=callbacks,
137
  )
138
+ logger.info("Using SFTTrainer")
139
 
140
  return trainer
141