Tonic commited on
Commit
90aadc0
·
verified ·
1 Parent(s): b13876b

attempts to identify data bug

Browse files
Files changed (1) hide show
  1. trainer.py +10 -0
trainer.py CHANGED
@@ -55,11 +55,18 @@ class SmolLM3Trainer:
55
  )
56
 
57
  # Get datasets
 
58
  train_dataset = self.dataset.get_train_dataset()
 
 
 
59
  eval_dataset = self.dataset.get_eval_dataset()
 
60
 
61
  # Get data collator
 
62
  data_collator = self.dataset.get_data_collator()
 
63
 
64
  # Add monitoring callback - temporarily disabled to debug
65
  callbacks = []
@@ -116,6 +123,8 @@ class SmolLM3Trainer:
116
  # logger.info("Continuing with console monitoring only")
117
 
118
  # Try standard Trainer first (more stable with callbacks)
 
 
119
  try:
120
  trainer = Trainer(
121
  model=self.model.model,
@@ -129,6 +138,7 @@ class SmolLM3Trainer:
129
  logger.info("Using standard Hugging Face Trainer")
130
  except Exception as e:
131
  logger.warning(f"Standard Trainer failed: {e}")
 
132
  # Fallback to SFTTrainer
133
  trainer = SFTTrainer(
134
  model=self.model.model,
 
55
  )
56
 
57
  # Get datasets
58
+ logger.info("Getting train dataset...")
59
  train_dataset = self.dataset.get_train_dataset()
60
+ logger.info(f"Train dataset: {type(train_dataset)} with {len(train_dataset)} samples")
61
+
62
+ logger.info("Getting eval dataset...")
63
  eval_dataset = self.dataset.get_eval_dataset()
64
+ logger.info(f"Eval dataset: {type(eval_dataset)} with {len(eval_dataset)} samples")
65
 
66
  # Get data collator
67
+ logger.info("Getting data collator...")
68
  data_collator = self.dataset.get_data_collator()
69
+ logger.info(f"Data collator: {type(data_collator)}")
70
 
71
  # Add monitoring callback - temporarily disabled to debug
72
  callbacks = []
 
123
  # logger.info("Continuing with console monitoring only")
124
 
125
  # Try standard Trainer first (more stable with callbacks)
126
+ logger.info("Creating Trainer with training arguments...")
127
+ logger.info(f"Training args keys: {list(training_args.keys())}")
128
  try:
129
  trainer = Trainer(
130
  model=self.model.model,
 
138
  logger.info("Using standard Hugging Face Trainer")
139
  except Exception as e:
140
  logger.warning(f"Standard Trainer failed: {e}")
141
+ logger.error(f"Trainer creation error details: {type(e).__name__}: {str(e)}")
142
  # Fallback to SFTTrainer
143
  trainer = SFTTrainer(
144
  model=self.model.model,