AvocadoMuffin commited on
Commit
a93a747
Β·
verified Β·
1 Parent(s): 4228969

Update train.py

Browse files
Files changed (1) hide show
  1. train.py +28 -9
train.py CHANGED
@@ -226,7 +226,7 @@ def main():
226
  print(f"βœ… Loaded {len(cuad)} examples")
227
  except Exception as e:
228
  print(f"❌ Dataset loading failed: {e}")
229
- cuad = load_dataset("theatricusproject/cuad-qa", split="train", trust_remote_code=True, download_mode="force_redownload")
230
 
231
  cuad = cuad.shuffle(seed=SEED)
232
 
@@ -269,21 +269,40 @@ def main():
269
  print(f" Training features: {len(train_feats)}")
270
  print(f" Validation features: {len(val_feats)}")
271
 
272
- # ── training args ──────────────────────────────────────────────────
273
- # Adjusted for smaller dataset
274
- total_steps = (len(train_feats) // 16 // 2) * 6 # Rough estimate
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
275
 
276
  args = TrainingArguments(
277
  output_dir="./cuad_lora_out",
278
  learning_rate=5e-5, # Slightly higher for smaller dataset
279
- num_train_epochs=6 if USE_SUBSET else 4, # More epochs for smaller dataset
280
- per_device_train_batch_size=16,
281
  per_device_eval_batch_size=16,
282
- gradient_accumulation_steps=2,
283
  fp16=False, bf16=True,
284
  eval_strategy="steps",
285
- eval_steps=max(100, total_steps // 20), # Adaptive eval steps
286
- save_steps=max(200, total_steps // 10), # Adaptive save steps
287
  save_total_limit=2,
288
  weight_decay=0.01,
289
  lr_scheduler_type="cosine",
 
226
  print(f"βœ… Loaded {len(cuad)} examples")
227
  except Exception as e:
228
  print(f"❌ Dataset loading failed: {e}")
229
+ cuad = load_dataset("theatticusproject/cuad-qa", split="train", trust_remote_code=True, download_mode="force_redownload")
230
 
231
  cuad = cuad.shuffle(seed=SEED)
232
 
 
269
  print(f" Training features: {len(train_feats)}")
270
  print(f" Validation features: {len(val_feats)}")
271
 
272
+ # ── training args with fixed eval/save step alignment ──────────────────
273
+ # Calculate proper steps that align
274
+ batch_size = 16
275
+ gradient_accumulation_steps = 2
276
+ effective_batch_size = batch_size * gradient_accumulation_steps
277
+
278
+ # Calculate total training steps
279
+ num_epochs = 6 if USE_SUBSET else 4
280
+ steps_per_epoch = len(train_feats) // effective_batch_size
281
+ total_steps = steps_per_epoch * num_epochs
282
+
283
+ # Set eval steps first
284
+ eval_steps = max(50, steps_per_epoch // 4) # Evaluate 4 times per epoch
285
+
286
+ # Set save steps as a multiple of eval steps
287
+ save_steps = eval_steps * 2 # Save every 2 evaluations
288
+
289
+ print(f"πŸ“Š Training configuration:")
290
+ print(f" Steps per epoch: {steps_per_epoch}")
291
+ print(f" Total steps: {total_steps}")
292
+ print(f" Eval steps: {eval_steps}")
293
+ print(f" Save steps: {save_steps}")
294
 
295
  args = TrainingArguments(
296
  output_dir="./cuad_lora_out",
297
  learning_rate=5e-5, # Slightly higher for smaller dataset
298
+ num_train_epochs=num_epochs,
299
+ per_device_train_batch_size=batch_size,
300
  per_device_eval_batch_size=16,
301
+ gradient_accumulation_steps=gradient_accumulation_steps,
302
  fp16=False, bf16=True,
303
  eval_strategy="steps",
304
+ eval_steps=eval_steps,
305
+ save_steps=save_steps,
306
  save_total_limit=2,
307
  weight_decay=0.01,
308
  lr_scheduler_type="cosine",