update outputs path so that we can mount workspace to /workspace/data (#1623)
Browse files* update outputs path so that we can mount workspace to /workspace/data
* fix ln order
This view is limited to 50 files because it contains too many changes.  
							See raw diff
- docker/Dockerfile-cloud +3 -1
- examples/cerebras/btlm-ft.yml +1 -1
- examples/cerebras/qlora.yml +1 -1
- examples/code-llama/13b/lora.yml +1 -1
- examples/code-llama/13b/qlora.yml +1 -1
- examples/code-llama/34b/lora.yml +1 -1
- examples/code-llama/34b/qlora.yml +1 -1
- examples/code-llama/7b/lora.yml +1 -1
- examples/code-llama/7b/qlora.yml +1 -1
- examples/colab-notebooks/colab-axolotl-example.ipynb +1 -1
- examples/dbrx/16bit-lora.yaml +1 -1
- examples/dbrx/8bit-lora.yaml +1 -1
- examples/dbrx/fft-ds-zero3.yaml +1 -1
- examples/falcon/config-7b-lora.yml +1 -1
- examples/falcon/config-7b-qlora.yml +1 -1
- examples/falcon/config-7b.yml +1 -1
- examples/gemma/qlora.yml +1 -1
- examples/gptj/qlora.yml +1 -1
- examples/jamba/qlora.yaml +1 -1
- examples/jamba/qlora_deepspeed.yaml +1 -1
- examples/jeopardy-bot/config.yml +1 -1
- examples/llama-2/fft_optimized.yml +1 -1
- examples/llama-2/gptq-lora.yml +1 -1
- examples/llama-2/lisa.yml +1 -1
- examples/llama-2/loftq.yml +1 -1
- examples/llama-2/lora.yml +1 -1
- examples/llama-2/qlora-fsdp.yml +1 -1
- examples/llama-2/qlora.yml +1 -1
- examples/llama-2/relora.yml +1 -1
- examples/llama-3/fft-8b.yaml +1 -1
- examples/llama-3/lora-8b.yml +1 -1
- examples/llama-3/qlora-fsdp-70b.yaml +1 -1
- examples/llama-3/qlora.yml +1 -1
- examples/mamba/config.yml +1 -1
- examples/mistral/bigstral-ds-zero3.yaml +1 -1
- examples/mistral/config.yml +1 -1
- examples/mistral/lora-mps.yml +1 -1
- examples/mistral/lora.yml +1 -1
- examples/mistral/mistral-qlora-fsdp.yml +1 -1
- examples/mistral/mistral-qlora-orpo.yml +1 -1
- examples/mistral/mixtral-8x22b-qlora-fsdp.yml +1 -1
- examples/mistral/mixtral-qlora-fsdp.yml +1 -1
- examples/mistral/mixtral.yml +1 -1
- examples/mistral/mixtral_22.yml +1 -1
- examples/mistral/qlora.yml +1 -1
- examples/mpt-7b/config.yml +1 -1
- examples/openllama-3b/config.yml +1 -1
- examples/openllama-3b/lora.yml +1 -1
- examples/openllama-3b/qlora.yml +1 -1
- examples/phi/phi-ft.yml +1 -1
    	
        docker/Dockerfile-cloud
    CHANGED
    
    | @@ -21,7 +21,9 @@ RUN apt install --yes --no-install-recommends openssh-server tmux && \ | |
| 21 | 
             
                printf "\n[[ -z \"\$TMUX\"  ]] && { tmux attach-session -t ssh_tmux || tmux new-session -s ssh_tmux; exit; }\n" >> ~/.bashrc && \
         | 
| 22 | 
             
                printf "[ ! -z \"\$TERM\" -a -r /etc/motd ] && cat /etc/motd\n" >> ~/.bashrc && \
         | 
| 23 | 
             
                chmod +x /workspace/axolotl/scripts/cloud-entrypoint.sh && \
         | 
| 24 | 
            -
                chmod +x /root/cloud-entrypoint.sh
         | 
|  | |
|  | |
| 25 |  | 
| 26 | 
             
            ENTRYPOINT ["/root/cloud-entrypoint.sh"]
         | 
| 27 | 
             
            CMD ["sleep", "infinity"]
         | 
|  | |
| 21 | 
             
                printf "\n[[ -z \"\$TMUX\"  ]] && { tmux attach-session -t ssh_tmux || tmux new-session -s ssh_tmux; exit; }\n" >> ~/.bashrc && \
         | 
| 22 | 
             
                printf "[ ! -z \"\$TERM\" -a -r /etc/motd ] && cat /etc/motd\n" >> ~/.bashrc && \
         | 
| 23 | 
             
                chmod +x /workspace/axolotl/scripts/cloud-entrypoint.sh && \
         | 
| 24 | 
            +
                chmod +x /root/cloud-entrypoint.sh && \
         | 
| 25 | 
            +
                mkdir -p /workspace/data/axolotl-artifacts && \
         | 
| 26 | 
            +
                ln -sf /workspace/data/axolotl-artifacts /workspace/axolotl/outputs
         | 
| 27 |  | 
| 28 | 
             
            ENTRYPOINT ["/root/cloud-entrypoint.sh"]
         | 
| 29 | 
             
            CMD ["sleep", "infinity"]
         | 
    	
        examples/cerebras/btlm-ft.yml
    CHANGED
    
    | @@ -38,7 +38,7 @@ wandb_watch: | |
| 38 | 
             
            wandb_name:
         | 
| 39 | 
             
            wandb_log_model:
         | 
| 40 |  | 
| 41 | 
            -
            output_dir: btlm-out
         | 
| 42 | 
             
            gradient_accumulation_steps: 1
         | 
| 43 | 
             
            micro_batch_size: 1
         | 
| 44 | 
             
            num_epochs: 1
         | 
|  | |
| 38 | 
             
            wandb_name:
         | 
| 39 | 
             
            wandb_log_model:
         | 
| 40 |  | 
| 41 | 
            +
            output_dir: ./outputs/btlm-out
         | 
| 42 | 
             
            gradient_accumulation_steps: 1
         | 
| 43 | 
             
            micro_batch_size: 1
         | 
| 44 | 
             
            num_epochs: 1
         | 
    	
        examples/cerebras/qlora.yml
    CHANGED
    
    | @@ -25,7 +25,7 @@ wandb_entity: | |
| 25 | 
             
            wandb_watch:
         | 
| 26 | 
             
            wandb_name:
         | 
| 27 | 
             
            wandb_log_model:
         | 
| 28 | 
            -
            output_dir: ./qlora-out
         | 
| 29 | 
             
            batch_size: 4
         | 
| 30 | 
             
            micro_batch_size: 4
         | 
| 31 | 
             
            num_epochs: 2
         | 
|  | |
| 25 | 
             
            wandb_watch:
         | 
| 26 | 
             
            wandb_name:
         | 
| 27 | 
             
            wandb_log_model:
         | 
| 28 | 
            +
            output_dir: ./outputs/qlora-out
         | 
| 29 | 
             
            batch_size: 4
         | 
| 30 | 
             
            micro_batch_size: 4
         | 
| 31 | 
             
            num_epochs: 2
         | 
    	
        examples/code-llama/13b/lora.yml
    CHANGED
    
    | @@ -11,7 +11,7 @@ datasets: | |
| 11 | 
             
                type: alpaca
         | 
| 12 | 
             
            dataset_prepared_path:
         | 
| 13 | 
             
            val_set_size: 0.05
         | 
| 14 | 
            -
            output_dir: ./lora-out
         | 
| 15 |  | 
| 16 | 
             
            sequence_len: 4096
         | 
| 17 | 
             
            sample_packing: true
         | 
|  | |
| 11 | 
             
                type: alpaca
         | 
| 12 | 
             
            dataset_prepared_path:
         | 
| 13 | 
             
            val_set_size: 0.05
         | 
| 14 | 
            +
            output_dir: ./outputs/lora-out
         | 
| 15 |  | 
| 16 | 
             
            sequence_len: 4096
         | 
| 17 | 
             
            sample_packing: true
         | 
    	
        examples/code-llama/13b/qlora.yml
    CHANGED
    
    | @@ -11,7 +11,7 @@ datasets: | |
| 11 | 
             
                type: alpaca
         | 
| 12 | 
             
            dataset_prepared_path:
         | 
| 13 | 
             
            val_set_size: 0.05
         | 
| 14 | 
            -
            output_dir: ./qlora-out
         | 
| 15 |  | 
| 16 | 
             
            adapter: qlora
         | 
| 17 | 
             
            lora_model_dir:
         | 
|  | |
| 11 | 
             
                type: alpaca
         | 
| 12 | 
             
            dataset_prepared_path:
         | 
| 13 | 
             
            val_set_size: 0.05
         | 
| 14 | 
            +
            output_dir: ./outputs/qlora-out
         | 
| 15 |  | 
| 16 | 
             
            adapter: qlora
         | 
| 17 | 
             
            lora_model_dir:
         | 
    	
        examples/code-llama/34b/lora.yml
    CHANGED
    
    | @@ -11,7 +11,7 @@ datasets: | |
| 11 | 
             
                type: alpaca
         | 
| 12 | 
             
            dataset_prepared_path:
         | 
| 13 | 
             
            val_set_size: 0.05
         | 
| 14 | 
            -
            output_dir: ./lora-out
         | 
| 15 |  | 
| 16 | 
             
            sequence_len: 4096
         | 
| 17 | 
             
            sample_packing: true
         | 
|  | |
| 11 | 
             
                type: alpaca
         | 
| 12 | 
             
            dataset_prepared_path:
         | 
| 13 | 
             
            val_set_size: 0.05
         | 
| 14 | 
            +
            output_dir: ./outputs/lora-out
         | 
| 15 |  | 
| 16 | 
             
            sequence_len: 4096
         | 
| 17 | 
             
            sample_packing: true
         | 
    	
        examples/code-llama/34b/qlora.yml
    CHANGED
    
    | @@ -11,7 +11,7 @@ datasets: | |
| 11 | 
             
                type: alpaca
         | 
| 12 | 
             
            dataset_prepared_path:
         | 
| 13 | 
             
            val_set_size: 0.05
         | 
| 14 | 
            -
            output_dir: ./qlora-out
         | 
| 15 |  | 
| 16 | 
             
            adapter: qlora
         | 
| 17 | 
             
            lora_model_dir:
         | 
|  | |
| 11 | 
             
                type: alpaca
         | 
| 12 | 
             
            dataset_prepared_path:
         | 
| 13 | 
             
            val_set_size: 0.05
         | 
| 14 | 
            +
            output_dir: ./outputs/qlora-out
         | 
| 15 |  | 
| 16 | 
             
            adapter: qlora
         | 
| 17 | 
             
            lora_model_dir:
         | 
    	
        examples/code-llama/7b/lora.yml
    CHANGED
    
    | @@ -11,7 +11,7 @@ datasets: | |
| 11 | 
             
                type: alpaca
         | 
| 12 | 
             
            dataset_prepared_path:
         | 
| 13 | 
             
            val_set_size: 0.05
         | 
| 14 | 
            -
            output_dir: ./lora-out
         | 
| 15 |  | 
| 16 | 
             
            sequence_len: 4096
         | 
| 17 | 
             
            sample_packing: true
         | 
|  | |
| 11 | 
             
                type: alpaca
         | 
| 12 | 
             
            dataset_prepared_path:
         | 
| 13 | 
             
            val_set_size: 0.05
         | 
| 14 | 
            +
            output_dir: ./outputs/lora-out
         | 
| 15 |  | 
| 16 | 
             
            sequence_len: 4096
         | 
| 17 | 
             
            sample_packing: true
         | 
    	
        examples/code-llama/7b/qlora.yml
    CHANGED
    
    | @@ -11,7 +11,7 @@ datasets: | |
| 11 | 
             
                type: alpaca
         | 
| 12 | 
             
            dataset_prepared_path:
         | 
| 13 | 
             
            val_set_size: 0.05
         | 
| 14 | 
            -
            output_dir: ./qlora-out
         | 
| 15 |  | 
| 16 | 
             
            adapter: qlora
         | 
| 17 | 
             
            lora_model_dir:
         | 
|  | |
| 11 | 
             
                type: alpaca
         | 
| 12 | 
             
            dataset_prepared_path:
         | 
| 13 | 
             
            val_set_size: 0.05
         | 
| 14 | 
            +
            output_dir: ./outputs/qlora-out
         | 
| 15 |  | 
| 16 | 
             
            adapter: qlora
         | 
| 17 | 
             
            lora_model_dir:
         | 
    	
        examples/colab-notebooks/colab-axolotl-example.ipynb
    CHANGED
    
    | @@ -84,7 +84,7 @@ | |
| 84 | 
             
                    "    type: alpaca\n",
         | 
| 85 | 
             
                    "dataset_prepared_path:\n",
         | 
| 86 | 
             
                    "val_set_size: 0.05\n",
         | 
| 87 | 
            -
                    "output_dir: ./qlora-out\n",
         | 
| 88 | 
             
                    "\n",
         | 
| 89 | 
             
                    "adapter: qlora\n",
         | 
| 90 | 
             
                    "lora_model_dir:\n",
         | 
|  | |
| 84 | 
             
                    "    type: alpaca\n",
         | 
| 85 | 
             
                    "dataset_prepared_path:\n",
         | 
| 86 | 
             
                    "val_set_size: 0.05\n",
         | 
| 87 | 
            +
                    "output_dir: ./outputs/qlora-out\n",
         | 
| 88 | 
             
                    "\n",
         | 
| 89 | 
             
                    "adapter: qlora\n",
         | 
| 90 | 
             
                    "lora_model_dir:\n",
         | 
    	
        examples/dbrx/16bit-lora.yaml
    CHANGED
    
    | @@ -10,7 +10,7 @@ datasets: | |
| 10 | 
             
                type: alpaca
         | 
| 11 | 
             
            dataset_prepared_path: last_run_prepared
         | 
| 12 | 
             
            val_set_size: 0.0
         | 
| 13 | 
            -
            output_dir: ./out
         | 
| 14 |  | 
| 15 | 
             
            sequence_len: 512
         | 
| 16 | 
             
            sample_packing: false
         | 
|  | |
| 10 | 
             
                type: alpaca
         | 
| 11 | 
             
            dataset_prepared_path: last_run_prepared
         | 
| 12 | 
             
            val_set_size: 0.0
         | 
| 13 | 
            +
            output_dir: ./outputs/out
         | 
| 14 |  | 
| 15 | 
             
            sequence_len: 512
         | 
| 16 | 
             
            sample_packing: false
         | 
    	
        examples/dbrx/8bit-lora.yaml
    CHANGED
    
    | @@ -10,7 +10,7 @@ datasets: | |
| 10 | 
             
                type: alpaca
         | 
| 11 | 
             
            dataset_prepared_path: last_run_prepared
         | 
| 12 | 
             
            val_set_size: 0.0
         | 
| 13 | 
            -
            output_dir: ./out
         | 
| 14 |  | 
| 15 | 
             
            sequence_len: 512
         | 
| 16 | 
             
            sample_packing: false
         | 
|  | |
| 10 | 
             
                type: alpaca
         | 
| 11 | 
             
            dataset_prepared_path: last_run_prepared
         | 
| 12 | 
             
            val_set_size: 0.0
         | 
| 13 | 
            +
            output_dir: ./outputs/out
         | 
| 14 |  | 
| 15 | 
             
            sequence_len: 512
         | 
| 16 | 
             
            sample_packing: false
         | 
    	
        examples/dbrx/fft-ds-zero3.yaml
    CHANGED
    
    | @@ -10,7 +10,7 @@ datasets: | |
| 10 | 
             
                type: alpaca
         | 
| 11 | 
             
            dataset_prepared_path: last_run_prepared
         | 
| 12 | 
             
            val_set_size: 0.0
         | 
| 13 | 
            -
            output_dir: ./out
         | 
| 14 |  | 
| 15 | 
             
            sequence_len: 512
         | 
| 16 | 
             
            sample_packing: false
         | 
|  | |
| 10 | 
             
                type: alpaca
         | 
| 11 | 
             
            dataset_prepared_path: last_run_prepared
         | 
| 12 | 
             
            val_set_size: 0.0
         | 
| 13 | 
            +
            output_dir: ./outputs/out
         | 
| 14 |  | 
| 15 | 
             
            sequence_len: 512
         | 
| 16 | 
             
            sample_packing: false
         | 
    	
        examples/falcon/config-7b-lora.yml
    CHANGED
    
    | @@ -28,7 +28,7 @@ wandb_entity: | |
| 28 | 
             
            wandb_watch:
         | 
| 29 | 
             
            wandb_name:
         | 
| 30 | 
             
            wandb_log_model:
         | 
| 31 | 
            -
            output_dir: ./falcon-7b
         | 
| 32 | 
             
            batch_size: 2
         | 
| 33 | 
             
            micro_batch_size: 1
         | 
| 34 | 
             
            num_epochs: 4
         | 
|  | |
| 28 | 
             
            wandb_watch:
         | 
| 29 | 
             
            wandb_name:
         | 
| 30 | 
             
            wandb_log_model:
         | 
| 31 | 
            +
            output_dir: ./outputs/falcon-7b
         | 
| 32 | 
             
            batch_size: 2
         | 
| 33 | 
             
            micro_batch_size: 1
         | 
| 34 | 
             
            num_epochs: 4
         | 
    	
        examples/falcon/config-7b-qlora.yml
    CHANGED
    
    | @@ -42,7 +42,7 @@ wandb_entity: | |
| 42 | 
             
            wandb_watch:
         | 
| 43 | 
             
            wandb_name:
         | 
| 44 | 
             
            wandb_log_model:
         | 
| 45 | 
            -
            output_dir: ./qlora-out
         | 
| 46 |  | 
| 47 | 
             
            # QLoRA paper Table 9
         | 
| 48 | 
             
            # - 16 for 7b & 13b
         | 
|  | |
| 42 | 
             
            wandb_watch:
         | 
| 43 | 
             
            wandb_name:
         | 
| 44 | 
             
            wandb_log_model:
         | 
| 45 | 
            +
            output_dir: ./outputs/qlora-out
         | 
| 46 |  | 
| 47 | 
             
            # QLoRA paper Table 9
         | 
| 48 | 
             
            # - 16 for 7b & 13b
         | 
    	
        examples/falcon/config-7b.yml
    CHANGED
    
    | @@ -28,7 +28,7 @@ wandb_entity: | |
| 28 | 
             
            wandb_watch:
         | 
| 29 | 
             
            wandb_name:
         | 
| 30 | 
             
            wandb_log_model:
         | 
| 31 | 
            -
            output_dir: ./falcon-7b
         | 
| 32 | 
             
            batch_size: 2
         | 
| 33 | 
             
            micro_batch_size: 1
         | 
| 34 | 
             
            num_epochs: 4
         | 
|  | |
| 28 | 
             
            wandb_watch:
         | 
| 29 | 
             
            wandb_name:
         | 
| 30 | 
             
            wandb_log_model:
         | 
| 31 | 
            +
            output_dir: ./outputs/falcon-7b
         | 
| 32 | 
             
            batch_size: 2
         | 
| 33 | 
             
            micro_batch_size: 1
         | 
| 34 | 
             
            num_epochs: 4
         | 
    	
        examples/gemma/qlora.yml
    CHANGED
    
    | @@ -12,7 +12,7 @@ datasets: | |
| 12 | 
             
              - path: mhenrichsen/alpaca_2k_test
         | 
| 13 | 
             
                type: alpaca
         | 
| 14 | 
             
            val_set_size: 0.1
         | 
| 15 | 
            -
            output_dir: ./out
         | 
| 16 |  | 
| 17 | 
             
            adapter: qlora
         | 
| 18 | 
             
            lora_r: 32
         | 
|  | |
| 12 | 
             
              - path: mhenrichsen/alpaca_2k_test
         | 
| 13 | 
             
                type: alpaca
         | 
| 14 | 
             
            val_set_size: 0.1
         | 
| 15 | 
            +
            output_dir: ./outputs/out
         | 
| 16 |  | 
| 17 | 
             
            adapter: qlora
         | 
| 18 | 
             
            lora_r: 32
         | 
    	
        examples/gptj/qlora.yml
    CHANGED
    
    | @@ -23,7 +23,7 @@ wandb_entity: | |
| 23 | 
             
            wandb_watch:
         | 
| 24 | 
             
            wandb_name:
         | 
| 25 | 
             
            wandb_log_model:
         | 
| 26 | 
            -
            output_dir: ./qlora-out
         | 
| 27 | 
             
            gradient_accumulation_steps: 2
         | 
| 28 | 
             
            micro_batch_size: 2
         | 
| 29 | 
             
            num_epochs: 2
         | 
|  | |
| 23 | 
             
            wandb_watch:
         | 
| 24 | 
             
            wandb_name:
         | 
| 25 | 
             
            wandb_log_model:
         | 
| 26 | 
            +
            output_dir: ./outputs/qlora-out
         | 
| 27 | 
             
            gradient_accumulation_steps: 2
         | 
| 28 | 
             
            micro_batch_size: 2
         | 
| 29 | 
             
            num_epochs: 2
         | 
    	
        examples/jamba/qlora.yaml
    CHANGED
    
    | @@ -10,7 +10,7 @@ datasets: | |
| 10 | 
             
                type: alpaca
         | 
| 11 | 
             
            dataset_prepared_path:
         | 
| 12 | 
             
            val_set_size: 0.0
         | 
| 13 | 
            -
            output_dir: ./out
         | 
| 14 |  | 
| 15 | 
             
            sequence_len: 4096
         | 
| 16 | 
             
            sample_packing: false
         | 
|  | |
| 10 | 
             
                type: alpaca
         | 
| 11 | 
             
            dataset_prepared_path:
         | 
| 12 | 
             
            val_set_size: 0.0
         | 
| 13 | 
            +
            output_dir: ./outputs/out
         | 
| 14 |  | 
| 15 | 
             
            sequence_len: 4096
         | 
| 16 | 
             
            sample_packing: false
         | 
    	
        examples/jamba/qlora_deepspeed.yaml
    CHANGED
    
    | @@ -10,7 +10,7 @@ datasets: | |
| 10 | 
             
                type: alpaca
         | 
| 11 | 
             
            dataset_prepared_path:
         | 
| 12 | 
             
            val_set_size: 0.0
         | 
| 13 | 
            -
            output_dir: ./out
         | 
| 14 |  | 
| 15 | 
             
            sequence_len: 4096
         | 
| 16 | 
             
            sample_packing: false
         | 
|  | |
| 10 | 
             
                type: alpaca
         | 
| 11 | 
             
            dataset_prepared_path:
         | 
| 12 | 
             
            val_set_size: 0.0
         | 
| 13 | 
            +
            output_dir: ./outputs/out
         | 
| 14 |  | 
| 15 | 
             
            sequence_len: 4096
         | 
| 16 | 
             
            sample_packing: false
         | 
    	
        examples/jeopardy-bot/config.yml
    CHANGED
    
    | @@ -21,7 +21,7 @@ wandb_entity: | |
| 21 | 
             
            wandb_watch:
         | 
| 22 | 
             
            wandb_name:
         | 
| 23 | 
             
            wandb_log_model:
         | 
| 24 | 
            -
            output_dir: ./jeopardy-bot-7b
         | 
| 25 | 
             
            gradient_accumulation_steps: 1
         | 
| 26 | 
             
            micro_batch_size: 1
         | 
| 27 | 
             
            num_epochs: 4
         | 
|  | |
| 21 | 
             
            wandb_watch:
         | 
| 22 | 
             
            wandb_name:
         | 
| 23 | 
             
            wandb_log_model:
         | 
| 24 | 
            +
            output_dir: ./outputs/jeopardy-bot-7b
         | 
| 25 | 
             
            gradient_accumulation_steps: 1
         | 
| 26 | 
             
            micro_batch_size: 1
         | 
| 27 | 
             
            num_epochs: 4
         | 
    	
        examples/llama-2/fft_optimized.yml
    CHANGED
    
    | @@ -11,7 +11,7 @@ datasets: | |
| 11 | 
             
                type: alpaca
         | 
| 12 | 
             
            dataset_prepared_path: last_run_prepared
         | 
| 13 | 
             
            val_set_size: 0.05
         | 
| 14 | 
            -
            output_dir: ./out
         | 
| 15 |  | 
| 16 | 
             
            sequence_len: 4096
         | 
| 17 | 
             
            sample_packing: true
         | 
|  | |
| 11 | 
             
                type: alpaca
         | 
| 12 | 
             
            dataset_prepared_path: last_run_prepared
         | 
| 13 | 
             
            val_set_size: 0.05
         | 
| 14 | 
            +
            output_dir: ./outputs/out
         | 
| 15 |  | 
| 16 | 
             
            sequence_len: 4096
         | 
| 17 | 
             
            sample_packing: true
         | 
    	
        examples/llama-2/gptq-lora.yml
    CHANGED
    
    | @@ -33,7 +33,7 @@ wandb_project: | |
| 33 | 
             
            wandb_watch:
         | 
| 34 | 
             
            wandb_name:
         | 
| 35 | 
             
            wandb_log_model:
         | 
| 36 | 
            -
            output_dir: ./model-out
         | 
| 37 | 
             
            gradient_accumulation_steps: 1
         | 
| 38 | 
             
            micro_batch_size: 1
         | 
| 39 | 
             
            num_epochs: 4
         | 
|  | |
| 33 | 
             
            wandb_watch:
         | 
| 34 | 
             
            wandb_name:
         | 
| 35 | 
             
            wandb_log_model:
         | 
| 36 | 
            +
            output_dir: ./outputs/model-out
         | 
| 37 | 
             
            gradient_accumulation_steps: 1
         | 
| 38 | 
             
            micro_batch_size: 1
         | 
| 39 | 
             
            num_epochs: 4
         | 
    	
        examples/llama-2/lisa.yml
    CHANGED
    
    | @@ -11,7 +11,7 @@ datasets: | |
| 11 | 
             
                type: alpaca
         | 
| 12 | 
             
            dataset_prepared_path: last_run_prepared
         | 
| 13 | 
             
            val_set_size: 0.05
         | 
| 14 | 
            -
            output_dir: ./lisa-out
         | 
| 15 |  | 
| 16 | 
             
            sequence_len: 4096
         | 
| 17 | 
             
            sample_packing: true
         | 
|  | |
| 11 | 
             
                type: alpaca
         | 
| 12 | 
             
            dataset_prepared_path: last_run_prepared
         | 
| 13 | 
             
            val_set_size: 0.05
         | 
| 14 | 
            +
            output_dir: ./outputs/lisa-out
         | 
| 15 |  | 
| 16 | 
             
            sequence_len: 4096
         | 
| 17 | 
             
            sample_packing: true
         | 
    	
        examples/llama-2/loftq.yml
    CHANGED
    
    | @@ -11,7 +11,7 @@ datasets: | |
| 11 | 
             
                type: alpaca
         | 
| 12 | 
             
            dataset_prepared_path:
         | 
| 13 | 
             
            val_set_size: 0.05
         | 
| 14 | 
            -
            output_dir: ./lora-out
         | 
| 15 |  | 
| 16 | 
             
            sequence_len: 4096
         | 
| 17 | 
             
            sample_packing: true
         | 
|  | |
| 11 | 
             
                type: alpaca
         | 
| 12 | 
             
            dataset_prepared_path:
         | 
| 13 | 
             
            val_set_size: 0.05
         | 
| 14 | 
            +
            output_dir: ./outputs/lora-out
         | 
| 15 |  | 
| 16 | 
             
            sequence_len: 4096
         | 
| 17 | 
             
            sample_packing: true
         | 
    	
        examples/llama-2/lora.yml
    CHANGED
    
    | @@ -11,7 +11,7 @@ datasets: | |
| 11 | 
             
                type: alpaca
         | 
| 12 | 
             
            dataset_prepared_path:
         | 
| 13 | 
             
            val_set_size: 0.05
         | 
| 14 | 
            -
            output_dir: ./lora-out
         | 
| 15 |  | 
| 16 | 
             
            sequence_len: 4096
         | 
| 17 | 
             
            sample_packing: true
         | 
|  | |
| 11 | 
             
                type: alpaca
         | 
| 12 | 
             
            dataset_prepared_path:
         | 
| 13 | 
             
            val_set_size: 0.05
         | 
| 14 | 
            +
            output_dir: ./outputs/lora-out
         | 
| 15 |  | 
| 16 | 
             
            sequence_len: 4096
         | 
| 17 | 
             
            sample_packing: true
         | 
    	
        examples/llama-2/qlora-fsdp.yml
    CHANGED
    
    | @@ -11,7 +11,7 @@ datasets: | |
| 11 | 
             
                type: alpaca
         | 
| 12 | 
             
            dataset_prepared_path: last_run_prepared
         | 
| 13 | 
             
            val_set_size: 0.05
         | 
| 14 | 
            -
            output_dir: ./qlora-out
         | 
| 15 |  | 
| 16 | 
             
            adapter: qlora
         | 
| 17 | 
             
            lora_model_dir:
         | 
|  | |
| 11 | 
             
                type: alpaca
         | 
| 12 | 
             
            dataset_prepared_path: last_run_prepared
         | 
| 13 | 
             
            val_set_size: 0.05
         | 
| 14 | 
            +
            output_dir: ./outputs/qlora-out
         | 
| 15 |  | 
| 16 | 
             
            adapter: qlora
         | 
| 17 | 
             
            lora_model_dir:
         | 
    	
        examples/llama-2/qlora.yml
    CHANGED
    
    | @@ -11,7 +11,7 @@ datasets: | |
| 11 | 
             
                type: alpaca
         | 
| 12 | 
             
            dataset_prepared_path:
         | 
| 13 | 
             
            val_set_size: 0.05
         | 
| 14 | 
            -
            output_dir: ./qlora-out
         | 
| 15 |  | 
| 16 | 
             
            adapter: qlora
         | 
| 17 | 
             
            lora_model_dir:
         | 
|  | |
| 11 | 
             
                type: alpaca
         | 
| 12 | 
             
            dataset_prepared_path:
         | 
| 13 | 
             
            val_set_size: 0.05
         | 
| 14 | 
            +
            output_dir: ./outputs/qlora-out
         | 
| 15 |  | 
| 16 | 
             
            adapter: qlora
         | 
| 17 | 
             
            lora_model_dir:
         | 
    	
        examples/llama-2/relora.yml
    CHANGED
    
    | @@ -12,7 +12,7 @@ datasets: | |
| 12 | 
             
                type: alpaca
         | 
| 13 | 
             
            dataset_prepared_path:
         | 
| 14 | 
             
            val_set_size: 0.05
         | 
| 15 | 
            -
            output_dir: ./relora-out
         | 
| 16 |  | 
| 17 | 
             
            adapter: qlora
         | 
| 18 | 
             
            lora_model_dir:
         | 
|  | |
| 12 | 
             
                type: alpaca
         | 
| 13 | 
             
            dataset_prepared_path:
         | 
| 14 | 
             
            val_set_size: 0.05
         | 
| 15 | 
            +
            output_dir: ./outputs/relora-out
         | 
| 16 |  | 
| 17 | 
             
            adapter: qlora
         | 
| 18 | 
             
            lora_model_dir:
         | 
    	
        examples/llama-3/fft-8b.yaml
    CHANGED
    
    | @@ -11,7 +11,7 @@ datasets: | |
| 11 | 
             
                type: alpaca
         | 
| 12 | 
             
            dataset_prepared_path: last_run_prepared
         | 
| 13 | 
             
            val_set_size: 0.05
         | 
| 14 | 
            -
            output_dir: ./out
         | 
| 15 |  | 
| 16 | 
             
            sequence_len: 8192
         | 
| 17 | 
             
            sample_packing: true
         | 
|  | |
| 11 | 
             
                type: alpaca
         | 
| 12 | 
             
            dataset_prepared_path: last_run_prepared
         | 
| 13 | 
             
            val_set_size: 0.05
         | 
| 14 | 
            +
            output_dir: ./outputs/out
         | 
| 15 |  | 
| 16 | 
             
            sequence_len: 8192
         | 
| 17 | 
             
            sample_packing: true
         | 
    	
        examples/llama-3/lora-8b.yml
    CHANGED
    
    | @@ -11,7 +11,7 @@ datasets: | |
| 11 | 
             
                type: alpaca
         | 
| 12 | 
             
            dataset_prepared_path:
         | 
| 13 | 
             
            val_set_size: 0.05
         | 
| 14 | 
            -
            output_dir: ./lora-out
         | 
| 15 |  | 
| 16 | 
             
            sequence_len: 4096
         | 
| 17 | 
             
            sample_packing: true
         | 
|  | |
| 11 | 
             
                type: alpaca
         | 
| 12 | 
             
            dataset_prepared_path:
         | 
| 13 | 
             
            val_set_size: 0.05
         | 
| 14 | 
            +
            output_dir: ./outputs/lora-out
         | 
| 15 |  | 
| 16 | 
             
            sequence_len: 4096
         | 
| 17 | 
             
            sample_packing: true
         | 
    	
        examples/llama-3/qlora-fsdp-70b.yaml
    CHANGED
    
    | @@ -11,7 +11,7 @@ datasets: | |
| 11 | 
             
                type: alpaca
         | 
| 12 | 
             
            dataset_prepared_path: last_run_prepared
         | 
| 13 | 
             
            val_set_size: 0.05
         | 
| 14 | 
            -
            output_dir: ./out/qlora-llama3-70b
         | 
| 15 |  | 
| 16 | 
             
            adapter: qlora
         | 
| 17 | 
             
            lora_model_dir:
         | 
|  | |
| 11 | 
             
                type: alpaca
         | 
| 12 | 
             
            dataset_prepared_path: last_run_prepared
         | 
| 13 | 
             
            val_set_size: 0.05
         | 
| 14 | 
            +
            output_dir: ./outputs/out/qlora-llama3-70b
         | 
| 15 |  | 
| 16 | 
             
            adapter: qlora
         | 
| 17 | 
             
            lora_model_dir:
         | 
    	
        examples/llama-3/qlora.yml
    CHANGED
    
    | @@ -11,7 +11,7 @@ datasets: | |
| 11 | 
             
                type: alpaca
         | 
| 12 | 
             
            dataset_prepared_path:
         | 
| 13 | 
             
            val_set_size: 0
         | 
| 14 | 
            -
            output_dir: ./qlora-out
         | 
| 15 |  | 
| 16 | 
             
            adapter: qlora
         | 
| 17 | 
             
            lora_model_dir:
         | 
|  | |
| 11 | 
             
                type: alpaca
         | 
| 12 | 
             
            dataset_prepared_path:
         | 
| 13 | 
             
            val_set_size: 0
         | 
| 14 | 
            +
            output_dir: ./outputs/qlora-out
         | 
| 15 |  | 
| 16 | 
             
            adapter: qlora
         | 
| 17 | 
             
            lora_model_dir:
         | 
    	
        examples/mamba/config.yml
    CHANGED
    
    | @@ -12,7 +12,7 @@ datasets: | |
| 12 | 
             
                type: alpaca
         | 
| 13 | 
             
            dataset_prepared_path:
         | 
| 14 | 
             
            val_set_size: 0.0
         | 
| 15 | 
            -
            output_dir: ./out
         | 
| 16 |  | 
| 17 | 
             
            sequence_len: 2048
         | 
| 18 | 
             
            sample_packing: false
         | 
|  | |
| 12 | 
             
                type: alpaca
         | 
| 13 | 
             
            dataset_prepared_path:
         | 
| 14 | 
             
            val_set_size: 0.0
         | 
| 15 | 
            +
            output_dir: ./outputs/out
         | 
| 16 |  | 
| 17 | 
             
            sequence_len: 2048
         | 
| 18 | 
             
            sample_packing: false
         | 
    	
        examples/mistral/bigstral-ds-zero3.yaml
    CHANGED
    
    | @@ -23,7 +23,7 @@ datasets: | |
| 23 | 
             
                type: alpaca
         | 
| 24 | 
             
            dataset_prepared_path: last_run_prepared
         | 
| 25 | 
             
            val_set_size: 0.05
         | 
| 26 | 
            -
            output_dir: ./out
         | 
| 27 |  | 
| 28 | 
             
            sequence_len: 2048
         | 
| 29 | 
             
            sample_packing: true
         | 
|  | |
| 23 | 
             
                type: alpaca
         | 
| 24 | 
             
            dataset_prepared_path: last_run_prepared
         | 
| 25 | 
             
            val_set_size: 0.05
         | 
| 26 | 
            +
            output_dir: ./outputs/out
         | 
| 27 |  | 
| 28 | 
             
            sequence_len: 2048
         | 
| 29 | 
             
            sample_packing: true
         | 
    	
        examples/mistral/config.yml
    CHANGED
    
    | @@ -11,7 +11,7 @@ datasets: | |
| 11 | 
             
                type: alpaca
         | 
| 12 | 
             
            dataset_prepared_path:
         | 
| 13 | 
             
            val_set_size: 0.05
         | 
| 14 | 
            -
            output_dir: ./out
         | 
| 15 |  | 
| 16 | 
             
            sequence_len: 8192
         | 
| 17 | 
             
            sample_packing: true
         | 
|  | |
| 11 | 
             
                type: alpaca
         | 
| 12 | 
             
            dataset_prepared_path:
         | 
| 13 | 
             
            val_set_size: 0.05
         | 
| 14 | 
            +
            output_dir: ./outputs/out
         | 
| 15 |  | 
| 16 | 
             
            sequence_len: 8192
         | 
| 17 | 
             
            sample_packing: true
         | 
    	
        examples/mistral/lora-mps.yml
    CHANGED
    
    | @@ -11,7 +11,7 @@ datasets: | |
| 11 | 
             
                type: alpaca
         | 
| 12 | 
             
            dataset_prepared_path: last_run_prepared
         | 
| 13 | 
             
            val_set_size: 0
         | 
| 14 | 
            -
            output_dir: ./lora-out
         | 
| 15 | 
             
            eval_sample_packing: false
         | 
| 16 |  | 
| 17 | 
             
            adapter: lora
         | 
|  | |
| 11 | 
             
                type: alpaca
         | 
| 12 | 
             
            dataset_prepared_path: last_run_prepared
         | 
| 13 | 
             
            val_set_size: 0
         | 
| 14 | 
            +
            output_dir: ./outputs/lora-out
         | 
| 15 | 
             
            eval_sample_packing: false
         | 
| 16 |  | 
| 17 | 
             
            adapter: lora
         | 
    	
        examples/mistral/lora.yml
    CHANGED
    
    | @@ -11,7 +11,7 @@ datasets: | |
| 11 | 
             
                type: alpaca
         | 
| 12 | 
             
            dataset_prepared_path: last_run_prepared
         | 
| 13 | 
             
            val_set_size: 0.1
         | 
| 14 | 
            -
            output_dir: ./lora-out
         | 
| 15 |  | 
| 16 | 
             
            adapter: lora
         | 
| 17 | 
             
            lora_model_dir:
         | 
|  | |
| 11 | 
             
                type: alpaca
         | 
| 12 | 
             
            dataset_prepared_path: last_run_prepared
         | 
| 13 | 
             
            val_set_size: 0.1
         | 
| 14 | 
            +
            output_dir: ./outputs/lora-out
         | 
| 15 |  | 
| 16 | 
             
            adapter: lora
         | 
| 17 | 
             
            lora_model_dir:
         | 
    	
        examples/mistral/mistral-qlora-fsdp.yml
    CHANGED
    
    | @@ -12,7 +12,7 @@ datasets: | |
| 12 | 
             
                type: alpaca
         | 
| 13 | 
             
            dataset_prepared_path: last_run_prepared
         | 
| 14 | 
             
            val_set_size: 0.02
         | 
| 15 | 
            -
            output_dir: ./qlora-out
         | 
| 16 |  | 
| 17 | 
             
            model_config:
         | 
| 18 | 
             
              output_router_logits: true
         | 
|  | |
| 12 | 
             
                type: alpaca
         | 
| 13 | 
             
            dataset_prepared_path: last_run_prepared
         | 
| 14 | 
             
            val_set_size: 0.02
         | 
| 15 | 
            +
            output_dir: ./outputs/qlora-out
         | 
| 16 |  | 
| 17 | 
             
            model_config:
         | 
| 18 | 
             
              output_router_logits: true
         | 
    	
        examples/mistral/mistral-qlora-orpo.yml
    CHANGED
    
    | @@ -16,7 +16,7 @@ datasets: | |
| 16 | 
             
                type: chat_template.argilla
         | 
| 17 | 
             
            dataset_prepared_path: last_run_prepared
         | 
| 18 | 
             
            val_set_size: 0.1
         | 
| 19 | 
            -
            output_dir: ./mistral-qlora-orpo-out
         | 
| 20 |  | 
| 21 | 
             
            adapter: qlora
         | 
| 22 | 
             
            lora_model_dir:
         | 
|  | |
| 16 | 
             
                type: chat_template.argilla
         | 
| 17 | 
             
            dataset_prepared_path: last_run_prepared
         | 
| 18 | 
             
            val_set_size: 0.1
         | 
| 19 | 
            +
            output_dir: ./outputs/mistral-qlora-orpo-out
         | 
| 20 |  | 
| 21 | 
             
            adapter: qlora
         | 
| 22 | 
             
            lora_model_dir:
         | 
    	
        examples/mistral/mixtral-8x22b-qlora-fsdp.yml
    CHANGED
    
    | @@ -11,7 +11,7 @@ datasets: | |
| 11 | 
             
                type: alpaca
         | 
| 12 | 
             
            dataset_prepared_path: last_run_prepared
         | 
| 13 | 
             
            val_set_size: 0.02
         | 
| 14 | 
            -
            output_dir: ./qlora-out
         | 
| 15 |  | 
| 16 | 
             
            model_config:
         | 
| 17 | 
             
              output_router_logits: true
         | 
|  | |
| 11 | 
             
                type: alpaca
         | 
| 12 | 
             
            dataset_prepared_path: last_run_prepared
         | 
| 13 | 
             
            val_set_size: 0.02
         | 
| 14 | 
            +
            output_dir: ./outputs/qlora-out
         | 
| 15 |  | 
| 16 | 
             
            model_config:
         | 
| 17 | 
             
              output_router_logits: true
         | 
    	
        examples/mistral/mixtral-qlora-fsdp.yml
    CHANGED
    
    | @@ -12,7 +12,7 @@ datasets: | |
| 12 | 
             
                type: alpaca
         | 
| 13 | 
             
            dataset_prepared_path: last_run_prepared
         | 
| 14 | 
             
            val_set_size: 0.02
         | 
| 15 | 
            -
            output_dir: ./qlora-out
         | 
| 16 |  | 
| 17 | 
             
            model_config:
         | 
| 18 | 
             
              output_router_logits: true
         | 
|  | |
| 12 | 
             
                type: alpaca
         | 
| 13 | 
             
            dataset_prepared_path: last_run_prepared
         | 
| 14 | 
             
            val_set_size: 0.02
         | 
| 15 | 
            +
            output_dir: ./outputs/qlora-out
         | 
| 16 |  | 
| 17 | 
             
            model_config:
         | 
| 18 | 
             
              output_router_logits: true
         | 
    	
        examples/mistral/mixtral.yml
    CHANGED
    
    | @@ -12,7 +12,7 @@ datasets: | |
| 12 | 
             
                type: alpaca
         | 
| 13 | 
             
            dataset_prepared_path: last_run_prepared
         | 
| 14 | 
             
            val_set_size: 0.0
         | 
| 15 | 
            -
            output_dir: ./qlora-out
         | 
| 16 |  | 
| 17 | 
             
            ## You can optionally freeze the entire model and unfreeze a subset of parameters
         | 
| 18 | 
             
            unfrozen_parameters:
         | 
|  | |
| 12 | 
             
                type: alpaca
         | 
| 13 | 
             
            dataset_prepared_path: last_run_prepared
         | 
| 14 | 
             
            val_set_size: 0.0
         | 
| 15 | 
            +
            output_dir: ./outputs/qlora-out
         | 
| 16 |  | 
| 17 | 
             
            ## You can optionally freeze the entire model and unfreeze a subset of parameters
         | 
| 18 | 
             
            unfrozen_parameters:
         | 
    	
        examples/mistral/mixtral_22.yml
    CHANGED
    
    | @@ -21,7 +21,7 @@ model_config: | |
| 21 | 
             
            datasets:
         | 
| 22 | 
             
              - path: yahma/alpaca-cleaned
         | 
| 23 | 
             
                type: alpaca
         | 
| 24 | 
            -
            output_dir: ./out
         | 
| 25 |  | 
| 26 | 
             
            sequence_len: 8000
         | 
| 27 | 
             
            sample_packing: true
         | 
|  | |
| 21 | 
             
            datasets:
         | 
| 22 | 
             
              - path: yahma/alpaca-cleaned
         | 
| 23 | 
             
                type: alpaca
         | 
| 24 | 
            +
            output_dir: ./outputs/out
         | 
| 25 |  | 
| 26 | 
             
            sequence_len: 8000
         | 
| 27 | 
             
            sample_packing: true
         | 
    	
        examples/mistral/qlora.yml
    CHANGED
    
    | @@ -11,7 +11,7 @@ datasets: | |
| 11 | 
             
                type: alpaca
         | 
| 12 | 
             
            dataset_prepared_path: last_run_prepared
         | 
| 13 | 
             
            val_set_size: 0.1
         | 
| 14 | 
            -
            output_dir: ./qlora-out
         | 
| 15 |  | 
| 16 | 
             
            adapter: qlora
         | 
| 17 | 
             
            lora_model_dir:
         | 
|  | |
| 11 | 
             
                type: alpaca
         | 
| 12 | 
             
            dataset_prepared_path: last_run_prepared
         | 
| 13 | 
             
            val_set_size: 0.1
         | 
| 14 | 
            +
            output_dir: ./outputs/qlora-out
         | 
| 15 |  | 
| 16 | 
             
            adapter: qlora
         | 
| 17 | 
             
            lora_model_dir:
         | 
    	
        examples/mpt-7b/config.yml
    CHANGED
    
    | @@ -23,7 +23,7 @@ wandb_entity: | |
| 23 | 
             
            wandb_watch:
         | 
| 24 | 
             
            wandb_name:
         | 
| 25 | 
             
            wandb_log_model:
         | 
| 26 | 
            -
            output_dir: ./mpt-alpaca-7b
         | 
| 27 | 
             
            gradient_accumulation_steps: 1
         | 
| 28 | 
             
            micro_batch_size: 1
         | 
| 29 | 
             
            num_epochs: 4
         | 
|  | |
| 23 | 
             
            wandb_watch:
         | 
| 24 | 
             
            wandb_name:
         | 
| 25 | 
             
            wandb_log_model:
         | 
| 26 | 
            +
            output_dir: ./outputs/mpt-alpaca-7b
         | 
| 27 | 
             
            gradient_accumulation_steps: 1
         | 
| 28 | 
             
            micro_batch_size: 1
         | 
| 29 | 
             
            num_epochs: 4
         | 
    	
        examples/openllama-3b/config.yml
    CHANGED
    
    | @@ -25,7 +25,7 @@ wandb_entity: | |
| 25 | 
             
            wandb_watch:
         | 
| 26 | 
             
            wandb_name:
         | 
| 27 | 
             
            wandb_log_model:
         | 
| 28 | 
            -
            output_dir: ./openllama-out
         | 
| 29 | 
             
            gradient_accumulation_steps: 1
         | 
| 30 | 
             
            micro_batch_size: 1
         | 
| 31 | 
             
            num_epochs: 4
         | 
|  | |
| 25 | 
             
            wandb_watch:
         | 
| 26 | 
             
            wandb_name:
         | 
| 27 | 
             
            wandb_log_model:
         | 
| 28 | 
            +
            output_dir: ./outputs/openllama-out
         | 
| 29 | 
             
            gradient_accumulation_steps: 1
         | 
| 30 | 
             
            micro_batch_size: 1
         | 
| 31 | 
             
            num_epochs: 4
         | 
    	
        examples/openllama-3b/lora.yml
    CHANGED
    
    | @@ -31,7 +31,7 @@ wandb_entity: | |
| 31 | 
             
            wandb_watch:
         | 
| 32 | 
             
            wandb_name:
         | 
| 33 | 
             
            wandb_log_model:
         | 
| 34 | 
            -
            output_dir: ./lora-out
         | 
| 35 | 
             
            gradient_accumulation_steps: 1
         | 
| 36 | 
             
            micro_batch_size: 2
         | 
| 37 | 
             
            num_epochs: 4
         | 
|  | |
| 31 | 
             
            wandb_watch:
         | 
| 32 | 
             
            wandb_name:
         | 
| 33 | 
             
            wandb_log_model:
         | 
| 34 | 
            +
            output_dir: ./outputs/lora-out
         | 
| 35 | 
             
            gradient_accumulation_steps: 1
         | 
| 36 | 
             
            micro_batch_size: 2
         | 
| 37 | 
             
            num_epochs: 4
         | 
    	
        examples/openllama-3b/qlora.yml
    CHANGED
    
    | @@ -25,7 +25,7 @@ wandb_entity: | |
| 25 | 
             
            wandb_watch:
         | 
| 26 | 
             
            wandb_name:
         | 
| 27 | 
             
            wandb_log_model:
         | 
| 28 | 
            -
            output_dir: ./qlora-out
         | 
| 29 | 
             
            gradient_accumulation_steps: 1
         | 
| 30 | 
             
            micro_batch_size: 2
         | 
| 31 | 
             
            num_epochs: 4
         | 
|  | |
| 25 | 
             
            wandb_watch:
         | 
| 26 | 
             
            wandb_name:
         | 
| 27 | 
             
            wandb_log_model:
         | 
| 28 | 
            +
            output_dir: ./outputs/qlora-out
         | 
| 29 | 
             
            gradient_accumulation_steps: 1
         | 
| 30 | 
             
            micro_batch_size: 2
         | 
| 31 | 
             
            num_epochs: 4
         | 
    	
        examples/phi/phi-ft.yml
    CHANGED
    
    | @@ -12,7 +12,7 @@ datasets: | |
| 12 |  | 
| 13 | 
             
            dataset_prepared_path:
         | 
| 14 | 
             
            val_set_size: 0.05
         | 
| 15 | 
            -
            output_dir: ./phi-sft-out
         | 
| 16 |  | 
| 17 | 
             
            sequence_len: 2048
         | 
| 18 | 
             
            sample_packing: true
         | 
|  | |
| 12 |  | 
| 13 | 
             
            dataset_prepared_path:
         | 
| 14 | 
             
            val_set_size: 0.05
         | 
| 15 | 
            +
            output_dir: ./outputs/phi-sft-out
         | 
| 16 |  | 
| 17 | 
             
            sequence_len: 2048
         | 
| 18 | 
             
            sample_packing: true
         | 
