add_ctxemb: 'False' | |
add_vocab: 'False' | |
all_gather: 'true' | |
batch_size_training: '16' | |
checkpoint_type: StateDictType.SHARDED_STATE_DICT | |
clipping_norm: '-1.0' | |
compare: 'False' | |
cpu_np_head: 'False' | |
ctx_proj_layer: 'False' | |
ctx_use_peft: 'False' | |
dataset: llava_llama3_selfrag_single_dataset_5k | |
dist_checkpoint_folder: llama3.full.prompt_llava.context_mask.with_context.without_sp.data_5k_epoch2 | |
dist_checkpoint_root_folder: ./checkpoints | |
enable_fsdp: 'True' | |
freeze_ctx_encoder: 'False' | |
freeze_layers: 'False' | |
freeze_question_encoder: 'False' | |
from_hf: 'False' | |
fsdp_activation_checkpointing: 'True' | |
gamma: '0.85' | |
load_np_head: 'False' | |
low_cpu_fsdp: 'True' | |
lr: 2e-05 | |
memory_bank_length: '0' | |
micro_batch_size: '4' | |
mixed_precision: 'True' | |
model_name: meta-llama/Llama-3.1-8B | |
model_use_peft: 'False' | |
natural_form: 'False' | |
np_weight: '100.0' | |
num_epochs: '3' | |
num_freeze_layers: '1' | |
num_workers_dataloader: '1' | |
one_gpu: 'False' | |
optimizer: AdamW | |
output_dir: peft_checkpoint | |
peft_method: None | |
pure_bf16: 'True' | |
quantization: 'False' | |
question_proj_layer: 'False' | |
resume_epoch: '0' | |
ret_checkpoint_folder: '' | |
ret_first: 'False' | |
retriever: '' | |
run_validation: 'True' | |
save_model: 'True' | |
save_optimizer: 'False' | |
seed: '2' | |
sharding_strategy: ShardingStrategy.FULL_SHARD | |
single: 'False' | |
target_modules: q_proj|k_proj|v_proj|o_proj|gate_proj|up_proj|down_proj | |
token_name: meta-llama/Llama-3.1-8B-Instruct | |
train: 'True' | |
use_fast_kernels: 'False' | |
use_fp16: 'False' | |
val_batch_size: '1' | |
weight_decay: '0.0' | |