File size: 3,928 Bytes
93a9349
 
 
 
 
 
 
 
 
 
 
 
 
 
9bce1fe
93a9349
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3a28270
 
 
 
 
 
 
 
 
 
 
 
 
 
 
93a9349
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
# lightning.pytorch==2.4.0
seed_everything: 42
trainer:
  accelerator: auto
  strategy:
    class_path: lightning.pytorch.strategies.DDPStrategy
    init_args:
      find_unused_parameters: true
  devices: auto
  num_nodes: 1
  precision: 32
  logger:
    class_path: lightning.pytorch.loggers.WandbLogger
    init_args:
      name: isoform_expression
      save_dir: logs
      project: rna_tasks
  callbacks:
  - class_path: lightning.pytorch.callbacks.ModelCheckpoint
    init_args:
      dirpath: logs/
      filename: best_val:{step}-{val_spearman:.3f}-{val_r2:.3f}
      monitor: val_spearman
      save_last: true
      save_top_k: 1
      mode: max
      every_n_train_steps: 500
      every_n_epochs: null
      save_on_train_epoch_end: true
  max_epochs: 3
  max_steps: -1
  val_check_interval: 499
  check_val_every_n_epoch: null
  log_every_n_steps: 1
  accumulate_grad_batches: 1
  gradient_clip_val: 0.1
  gradient_clip_algorithm: null
  default_root_dir: logs
model:
  class_path: modelgenerator.tasks.MMSequenceRegression
  init_args:
    backbone:
      class_path: modelgenerator.backbones.enformer
      init_args:
        max_length: 196_608
        frozen: false
    backbone1:
      class_path: modelgenerator.backbones.aido_rna_1b600m_cds
      init_args:
        max_length: 1024
        frozen: false 
        use_peft: true  
        save_peft_only: true 
        lora_r: 32
        lora_alpha: 64
        lora_dropout: 0.1
        lora_target_modules:
        - query
        - value
        config_overwrites:
          hidden_dropout_prob: 0
          attention_probs_dropout_prob: 0
        model_init_args: null
    backbone2:
      class_path: modelgenerator.backbones.esm2_150m
      init_args:
        max_length: 1024
        frozen: false
        use_peft: true  
        save_peft_only: true 
        lora_r: 32
        lora_alpha: 64
        lora_dropout: 0.1
        lora_target_modules:
        - query
        - value
        config_overwrites:
          hidden_dropout_prob: 0
          attention_probs_dropout_prob: 0
        model_init_args: null
    backbone_order:
    - dna_seq
    - rna_seq
    - protein_seq
    adapter:
      class_path: modelgenerator.adapters.fusion.MMFusionTokenAdapter
      init_args:
        fusion:
          class_path: modelgenerator.adapters.fusion.ConcatFusion
          init_args:
            project_size: 1024
            pooling: mean_pooling
        adapter:
          class_path: modelgenerator.adapters.MLPAdapter
          init_args:
            hidden_sizes:
            - 1024
            bias: true
            dropout: 0.1
            dropout_in_middle: false
    num_outputs: 30
    optimizer:
      class_path: torch.optim.AdamW
      init_args:
        lr: 1e-4
        betas:
        - 0.9
        - 0.999
        eps: 1.0e-08
        weight_decay: 0.01
        amsgrad: false
        maximize: false
        foreach: null
        capturable: false
        differentiable: false
        fused: null
    lr_scheduler:
      class_path: modelgenerator.lr_schedulers.CosineWithWarmup
      init_args:
        warmup_ratio: 0.01
        num_warmup_steps: null
        last_epoch: -1
        verbose: deprecated
    use_legacy_adapter: false
    strict_loading: true
    reset_optimizer_states: false
data:
  class_path: modelgenerator.data.IsoformExpression
  init_args:
    path: genbio-ai/transcript_isoform_expression_prediction
    config_name: null
    valid_split_name: valid
    train_split_files:
    - train_*.tsv
    valid_split_files:
    - validation.tsv
    test_split_files:
    - test.tsv
    x_col:
    - rna_seq
    - dna_seq
    - protein_seq
    normalize: true
    random_seed: 42
    batch_size: 2
    shuffle: true
    sampler: null
    num_workers: 0
    pin_memory: true
    persistent_workers: false
    cv_num_folds: 1
    cv_test_fold_id: 0
    cv_enable_val_fold: true
    cv_fold_id_col: null
ckpt_path: null