use_wandb = 1 seed = 1 style = "dit" d_adapter = 512 normalize_embeddings = 1 depth = 2 embs = [ "gte", "gtr", "stella", "sentence-t5", "e5", "sbert", "clip", "stella-big", "snowflake", "ember",] n_embs_per_batch = 2 max_seq_length = 512 depth_transform = 4 lr = 5e-5 bs = 128 save_every = 400 epochs = 5.0 dataset = "nomic_unsupervised" max_grad_norm = 10.0 gradient_accumulation_steps = 1 loss_coefficient_vsp = 0 loss_coefficient_contrastive = 1 loss_coefficient_trans = 1 loss_coefficient_cc = 0 eval_steps = 99999999999999999 cluster_size = 1024 cluster_strategy = "cluster_within_domain" warmup_steps = 100 wandb_project = "edx-2" wandb_name = "dit-pretrain-sm-1" save_dir = "checkpoints/{}/" num_params = 391411032