Spaces:
Sleeping
Sleeping
Update train.py
Browse files
train.py
CHANGED
@@ -226,7 +226,7 @@ def main():
|
|
226 |
print(f"β
Loaded {len(cuad)} examples")
|
227 |
except Exception as e:
|
228 |
print(f"β Dataset loading failed: {e}")
|
229 |
-
cuad = load_dataset("
|
230 |
|
231 |
cuad = cuad.shuffle(seed=SEED)
|
232 |
|
@@ -269,21 +269,40 @@ def main():
|
|
269 |
print(f" Training features: {len(train_feats)}")
|
270 |
print(f" Validation features: {len(val_feats)}")
|
271 |
|
272 |
-
# ββ training args
|
273 |
-
#
|
274 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
275 |
|
276 |
args = TrainingArguments(
|
277 |
output_dir="./cuad_lora_out",
|
278 |
learning_rate=5e-5, # Slightly higher for smaller dataset
|
279 |
-
num_train_epochs=
|
280 |
-
per_device_train_batch_size=
|
281 |
per_device_eval_batch_size=16,
|
282 |
-
gradient_accumulation_steps=
|
283 |
fp16=False, bf16=True,
|
284 |
eval_strategy="steps",
|
285 |
-
eval_steps=
|
286 |
-
save_steps=
|
287 |
save_total_limit=2,
|
288 |
weight_decay=0.01,
|
289 |
lr_scheduler_type="cosine",
|
|
|
226 |
print(f"β
Loaded {len(cuad)} examples")
|
227 |
except Exception as e:
|
228 |
print(f"β Dataset loading failed: {e}")
|
229 |
+
cuad = load_dataset("theatticusproject/cuad-qa", split="train", trust_remote_code=True, download_mode="force_redownload")
|
230 |
|
231 |
cuad = cuad.shuffle(seed=SEED)
|
232 |
|
|
|
269 |
print(f" Training features: {len(train_feats)}")
|
270 |
print(f" Validation features: {len(val_feats)}")
|
271 |
|
272 |
+
# ββ training args with fixed eval/save step alignment ββββββββββββββββββ
|
273 |
+
# Calculate proper steps that align
|
274 |
+
batch_size = 16
|
275 |
+
gradient_accumulation_steps = 2
|
276 |
+
effective_batch_size = batch_size * gradient_accumulation_steps
|
277 |
+
|
278 |
+
# Calculate total training steps
|
279 |
+
num_epochs = 6 if USE_SUBSET else 4
|
280 |
+
steps_per_epoch = len(train_feats) // effective_batch_size
|
281 |
+
total_steps = steps_per_epoch * num_epochs
|
282 |
+
|
283 |
+
# Set eval steps first
|
284 |
+
eval_steps = max(50, steps_per_epoch // 4) # Evaluate 4 times per epoch
|
285 |
+
|
286 |
+
# Set save steps as a multiple of eval steps
|
287 |
+
save_steps = eval_steps * 2 # Save every 2 evaluations
|
288 |
+
|
289 |
+
print(f"π Training configuration:")
|
290 |
+
print(f" Steps per epoch: {steps_per_epoch}")
|
291 |
+
print(f" Total steps: {total_steps}")
|
292 |
+
print(f" Eval steps: {eval_steps}")
|
293 |
+
print(f" Save steps: {save_steps}")
|
294 |
|
295 |
args = TrainingArguments(
|
296 |
output_dir="./cuad_lora_out",
|
297 |
learning_rate=5e-5, # Slightly higher for smaller dataset
|
298 |
+
num_train_epochs=num_epochs,
|
299 |
+
per_device_train_batch_size=batch_size,
|
300 |
per_device_eval_batch_size=16,
|
301 |
+
gradient_accumulation_steps=gradient_accumulation_steps,
|
302 |
fp16=False, bf16=True,
|
303 |
eval_strategy="steps",
|
304 |
+
eval_steps=eval_steps,
|
305 |
+
save_steps=save_steps,
|
306 |
save_total_limit=2,
|
307 |
weight_decay=0.01,
|
308 |
lr_scheduler_type="cosine",
|