Spaces:
Running
Running
attempts to identify data bug
Browse files- trainer.py +10 -0
trainer.py
CHANGED
@@ -55,11 +55,18 @@ class SmolLM3Trainer:
|
|
55 |
)
|
56 |
|
57 |
# Get datasets
|
|
|
58 |
train_dataset = self.dataset.get_train_dataset()
|
|
|
|
|
|
|
59 |
eval_dataset = self.dataset.get_eval_dataset()
|
|
|
60 |
|
61 |
# Get data collator
|
|
|
62 |
data_collator = self.dataset.get_data_collator()
|
|
|
63 |
|
64 |
# Add monitoring callback - temporarily disabled to debug
|
65 |
callbacks = []
|
@@ -116,6 +123,8 @@ class SmolLM3Trainer:
|
|
116 |
# logger.info("Continuing with console monitoring only")
|
117 |
|
118 |
# Try standard Trainer first (more stable with callbacks)
|
|
|
|
|
119 |
try:
|
120 |
trainer = Trainer(
|
121 |
model=self.model.model,
|
@@ -129,6 +138,7 @@ class SmolLM3Trainer:
|
|
129 |
logger.info("Using standard Hugging Face Trainer")
|
130 |
except Exception as e:
|
131 |
logger.warning(f"Standard Trainer failed: {e}")
|
|
|
132 |
# Fallback to SFTTrainer
|
133 |
trainer = SFTTrainer(
|
134 |
model=self.model.model,
|
|
|
55 |
)
|
56 |
|
57 |
# Get datasets
|
58 |
+
logger.info("Getting train dataset...")
|
59 |
train_dataset = self.dataset.get_train_dataset()
|
60 |
+
logger.info(f"Train dataset: {type(train_dataset)} with {len(train_dataset)} samples")
|
61 |
+
|
62 |
+
logger.info("Getting eval dataset...")
|
63 |
eval_dataset = self.dataset.get_eval_dataset()
|
64 |
+
logger.info(f"Eval dataset: {type(eval_dataset)} with {len(eval_dataset)} samples")
|
65 |
|
66 |
# Get data collator
|
67 |
+
logger.info("Getting data collator...")
|
68 |
data_collator = self.dataset.get_data_collator()
|
69 |
+
logger.info(f"Data collator: {type(data_collator)}")
|
70 |
|
71 |
# Add monitoring callback - temporarily disabled to debug
|
72 |
callbacks = []
|
|
|
123 |
# logger.info("Continuing with console monitoring only")
|
124 |
|
125 |
# Try standard Trainer first (more stable with callbacks)
|
126 |
+
logger.info("Creating Trainer with training arguments...")
|
127 |
+
logger.info(f"Training args keys: {list(training_args.keys())}")
|
128 |
try:
|
129 |
trainer = Trainer(
|
130 |
model=self.model.model,
|
|
|
138 |
logger.info("Using standard Hugging Face Trainer")
|
139 |
except Exception as e:
|
140 |
logger.warning(f"Standard Trainer failed: {e}")
|
141 |
+
logger.error(f"Trainer creation error details: {type(e).__name__}: {str(e)}")
|
142 |
# Fallback to SFTTrainer
|
143 |
trainer = SFTTrainer(
|
144 |
model=self.model.model,
|