Spaces:
				
			
			
	
			
			
					
		Running
		
	
	
	
			
			
	
	
	
	
		
		
					
		Running
		
	feat(train): custom start_preconditioning_step
Browse files- tools/train/train.py +5 -1
    	
        tools/train/train.py
    CHANGED
    
    | @@ -248,6 +248,10 @@ class TrainingArguments: | |
| 248 | 
             
                    default=1024,
         | 
| 249 | 
             
                    metadata={"help": "Chunked size for large layers with Distributed Shampoo."},
         | 
| 250 | 
             
                )
         | 
|  | |
|  | |
|  | |
|  | |
| 251 | 
             
                preconditioning_compute_steps: int = field(
         | 
| 252 | 
             
                    default=10, metadata={"help": "Number of steps to update preconditioner."}
         | 
| 253 | 
             
                )
         | 
| @@ -608,7 +612,7 @@ def main(): | |
| 608 | 
             
                        beta2=training_args.beta2,
         | 
| 609 | 
             
                        diagonal_epsilon=1e-10,
         | 
| 610 | 
             
                        matrix_epsilon=1e-8,
         | 
| 611 | 
            -
                        start_preconditioning_step=training_args. | 
| 612 | 
             
                        preconditioning_compute_steps=training_args.preconditioning_compute_steps,
         | 
| 613 | 
             
                        statistics_compute_steps=1,
         | 
| 614 | 
             
                        best_effort_shape_interpretation=True,
         | 
|  | |
| 248 | 
             
                    default=1024,
         | 
| 249 | 
             
                    metadata={"help": "Chunked size for large layers with Distributed Shampoo."},
         | 
| 250 | 
             
                )
         | 
| 251 | 
            +
                start_preconditioning_step: int = field(
         | 
| 252 | 
            +
                    default=100,
         | 
| 253 | 
            +
                    metadata={"help": "Number of steps before starting to update preconditioner."},
         | 
| 254 | 
            +
                )
         | 
| 255 | 
             
                preconditioning_compute_steps: int = field(
         | 
| 256 | 
             
                    default=10, metadata={"help": "Number of steps to update preconditioner."}
         | 
| 257 | 
             
                )
         | 
|  | |
| 612 | 
             
                        beta2=training_args.beta2,
         | 
| 613 | 
             
                        diagonal_epsilon=1e-10,
         | 
| 614 | 
             
                        matrix_epsilon=1e-8,
         | 
| 615 | 
            +
                        start_preconditioning_step=training_args.start_preconditioning_step,
         | 
| 616 | 
             
                        preconditioning_compute_steps=training_args.preconditioning_compute_steps,
         | 
| 617 | 
             
                        statistics_compute_steps=1,
         | 
| 618 | 
             
                        best_effort_shape_interpretation=True,
         | 

