Spaces:
Running
Running
enable external logging boolean correctly
Browse files- model.py +2 -12
- trainer.py +12 -13
model.py
CHANGED
@@ -151,7 +151,7 @@ class SmolLM3Model:
|
|
151 |
# Only enable DDP if multiple GPUs are available
|
152 |
"ddp_backend": self.config.ddp_backend if torch.cuda.device_count() > 1 else None,
|
153 |
"ddp_find_unused_parameters": self.config.ddp_find_unused_parameters if torch.cuda.device_count() > 1 else False,
|
154 |
-
"report_to":
|
155 |
"remove_unused_columns": False,
|
156 |
"dataloader_pin_memory": False,
|
157 |
"group_by_length": True,
|
@@ -172,17 +172,7 @@ class SmolLM3Model:
|
|
172 |
# Override with kwargs
|
173 |
training_args.update(kwargs)
|
174 |
|
175 |
-
|
176 |
-
for key, value in training_args.items():
|
177 |
-
if isinstance(value, bool):
|
178 |
-
logger.info(f"Boolean argument: {key} = {value}")
|
179 |
-
|
180 |
-
try:
|
181 |
-
return TrainingArguments(**training_args)
|
182 |
-
except Exception as e:
|
183 |
-
logger.error(f"Failed to create TrainingArguments: {e}")
|
184 |
-
logger.error(f"Training arguments: {training_args}")
|
185 |
-
raise
|
186 |
|
187 |
def save_pretrained(self, path: str):
|
188 |
"""Save model and tokenizer"""
|
|
|
151 |
# Only enable DDP if multiple GPUs are available
|
152 |
"ddp_backend": self.config.ddp_backend if torch.cuda.device_count() > 1 else None,
|
153 |
"ddp_find_unused_parameters": self.config.ddp_find_unused_parameters if torch.cuda.device_count() > 1 else False,
|
154 |
+
"report_to": None, # Enable external logging (default)
|
155 |
"remove_unused_columns": False,
|
156 |
"dataloader_pin_memory": False,
|
157 |
"group_by_length": True,
|
|
|
172 |
# Override with kwargs
|
173 |
training_args.update(kwargs)
|
174 |
|
175 |
+
return TrainingArguments(**training_args)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
176 |
|
177 |
def save_pretrained(self, path: str):
|
178 |
"""Save model and tokenizer"""
|
trainer.py
CHANGED
@@ -98,19 +98,18 @@ class SmolLM3Trainer:
|
|
98 |
callbacks.append(SimpleConsoleCallback())
|
99 |
logger.info("Added simple console monitoring callback")
|
100 |
|
101 |
-
# Try to add Trackio callback if available
|
102 |
-
|
103 |
-
|
104 |
-
|
105 |
-
|
106 |
-
|
107 |
-
|
108 |
-
|
109 |
-
|
110 |
-
|
111 |
-
|
112 |
-
|
113 |
-
# logger.info("Continuing with console monitoring only")
|
114 |
|
115 |
# Try standard Trainer first (more stable with callbacks)
|
116 |
try:
|
|
|
98 |
callbacks.append(SimpleConsoleCallback())
|
99 |
logger.info("Added simple console monitoring callback")
|
100 |
|
101 |
+
# Try to add Trackio callback if available
|
102 |
+
if self.monitor and self.monitor.enable_tracking:
|
103 |
+
try:
|
104 |
+
trackio_callback = self.monitor.create_monitoring_callback()
|
105 |
+
if trackio_callback:
|
106 |
+
callbacks.append(trackio_callback)
|
107 |
+
logger.info("Added Trackio monitoring callback")
|
108 |
+
else:
|
109 |
+
logger.warning("Failed to create Trackio callback")
|
110 |
+
except Exception as e:
|
111 |
+
logger.error(f"Error creating Trackio callback: {e}")
|
112 |
+
logger.info("Continuing with console monitoring only")
|
|
|
113 |
|
114 |
# Try standard Trainer first (more stable with callbacks)
|
115 |
try:
|