File size: 3,928 Bytes
93a9349 9bce1fe 93a9349 3a28270 93a9349 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 |
# lightning.pytorch==2.4.0
seed_everything: 42
trainer:
accelerator: auto
strategy:
class_path: lightning.pytorch.strategies.DDPStrategy
init_args:
find_unused_parameters: true
devices: auto
num_nodes: 1
precision: 32
logger:
class_path: lightning.pytorch.loggers.WandbLogger
init_args:
name: isoform_expression
save_dir: logs
project: rna_tasks
callbacks:
- class_path: lightning.pytorch.callbacks.ModelCheckpoint
init_args:
dirpath: logs/
filename: best_val:{step}-{val_spearman:.3f}-{val_r2:.3f}
monitor: val_spearman
save_last: true
save_top_k: 1
mode: max
every_n_train_steps: 500
every_n_epochs: null
save_on_train_epoch_end: true
max_epochs: 3
max_steps: -1
val_check_interval: 499
check_val_every_n_epoch: null
log_every_n_steps: 1
accumulate_grad_batches: 1
gradient_clip_val: 0.1
gradient_clip_algorithm: null
default_root_dir: logs
model:
class_path: modelgenerator.tasks.MMSequenceRegression
init_args:
backbone:
class_path: modelgenerator.backbones.enformer
init_args:
max_length: 196_608
frozen: false
backbone1:
class_path: modelgenerator.backbones.aido_rna_1b600m_cds
init_args:
max_length: 1024
frozen: false
use_peft: true
save_peft_only: true
lora_r: 32
lora_alpha: 64
lora_dropout: 0.1
lora_target_modules:
- query
- value
config_overwrites:
hidden_dropout_prob: 0
attention_probs_dropout_prob: 0
model_init_args: null
backbone2:
class_path: modelgenerator.backbones.esm2_150m
init_args:
max_length: 1024
frozen: false
use_peft: true
save_peft_only: true
lora_r: 32
lora_alpha: 64
lora_dropout: 0.1
lora_target_modules:
- query
- value
config_overwrites:
hidden_dropout_prob: 0
attention_probs_dropout_prob: 0
model_init_args: null
backbone_order:
- dna_seq
- rna_seq
- protein_seq
adapter:
class_path: modelgenerator.adapters.fusion.MMFusionTokenAdapter
init_args:
fusion:
class_path: modelgenerator.adapters.fusion.ConcatFusion
init_args:
project_size: 1024
pooling: mean_pooling
adapter:
class_path: modelgenerator.adapters.MLPAdapter
init_args:
hidden_sizes:
- 1024
bias: true
dropout: 0.1
dropout_in_middle: false
num_outputs: 30
optimizer:
class_path: torch.optim.AdamW
init_args:
lr: 1e-4
betas:
- 0.9
- 0.999
eps: 1.0e-08
weight_decay: 0.01
amsgrad: false
maximize: false
foreach: null
capturable: false
differentiable: false
fused: null
lr_scheduler:
class_path: modelgenerator.lr_schedulers.CosineWithWarmup
init_args:
warmup_ratio: 0.01
num_warmup_steps: null
last_epoch: -1
verbose: deprecated
use_legacy_adapter: false
strict_loading: true
reset_optimizer_states: false
data:
class_path: modelgenerator.data.IsoformExpression
init_args:
path: genbio-ai/transcript_isoform_expression_prediction
config_name: null
valid_split_name: valid
train_split_files:
- train_*.tsv
valid_split_files:
- validation.tsv
test_split_files:
- test.tsv
x_col:
- rna_seq
- dna_seq
- protein_seq
normalize: true
random_seed: 42
batch_size: 2
shuffle: true
sampler: null
num_workers: 0
pin_memory: true
persistent_workers: false
cv_num_folds: 1
cv_test_fold_id: 0
cv_enable_val_fold: true
cv_fold_id_col: null
ckpt_path: null
|