Caleb Ellington commited on
Commit
83e0f69
·
1 Parent(s): e1062ee

update with major refactor

Browse files
README.md CHANGED
@@ -11,11 +11,11 @@ snapshot_download(repo_id=model_name, local_dir=genbio_models_path)
11
  ```
12
  ### Load model for inference
13
  ```python
14
- from genbio_finetune.tasks import SequenceRegression
15
 
16
  ckpt_path = genbio_models_path.joinpath('fold0', 'model.ckpt')
17
  model = SequenceRegression.load_from_checkpoint(ckpt_path, strict_loading=False).eval()
18
 
19
- collated_batch = model.collate({"sequences": ["ACGT", "AGCT"]})
20
  logits = model(collated_batch)
21
  print(logits)
 
11
  ```
12
  ### Load model for inference
13
  ```python
14
+ from modelgenerator.tasks import SequenceRegression
15
 
16
  ckpt_path = genbio_models_path.joinpath('fold0', 'model.ckpt')
17
  model = SequenceRegression.load_from_checkpoint(ckpt_path, strict_loading=False).eval()
18
 
19
+ collated_batch = model.transform({"sequences": ["ACGT", "AGCT"]})
20
  logits = model(collated_batch)
21
  print(logits)
fold0/config.yaml CHANGED
@@ -140,10 +140,10 @@ trainer:
140
  reload_dataloaders_every_n_epochs: 0
141
  default_root_dir: logs
142
  model:
143
- class_path: genbio_finetune.tasks.SequenceRegression
144
  init_args:
145
  backbone:
146
- class_path: genbio_finetune.models.proteinfm_v1
147
  init_args:
148
  from_scratch: false
149
  use_peft: true
@@ -155,7 +155,7 @@ model:
155
  model_init_args: null
156
  max_length: 2048
157
  adapter:
158
- class_path: genbio_finetune.models.MLPPoolAdapter
159
  init_args:
160
  pooling: mean_pooling
161
  hidden_sizes:
@@ -179,13 +179,13 @@ model:
179
  differentiable: false
180
  fused: null
181
  lr_scheduler:
182
- class_path: genbio_finetune.lr_schedulers.CosineWithWarmup
183
  init_args:
184
  warmup_ratio: 0.05
185
  strict_loading: true
186
  reset_optimizer_states: false
187
  data:
188
- class_path: genbio_finetune.data.DMSFitnessPrediction
189
  init_args:
190
  path: genbio-ai/ProteinGYM-DMS
191
  train_split_files:
 
140
  reload_dataloaders_every_n_epochs: 0
141
  default_root_dir: logs
142
  model:
143
+ class_path: modelgenerator.tasks.SequenceRegression
144
  init_args:
145
  backbone:
146
+ class_path: modelgenerator.backbones.proteinfm_v1
147
  init_args:
148
  from_scratch: false
149
  use_peft: true
 
155
  model_init_args: null
156
  max_length: 2048
157
  adapter:
158
+ class_path: modelgenerator.adapters.MLPPoolAdapter
159
  init_args:
160
  pooling: mean_pooling
161
  hidden_sizes:
 
179
  differentiable: false
180
  fused: null
181
  lr_scheduler:
182
+ class_path: modelgenerator.lr_schedulers.CosineWithWarmup
183
  init_args:
184
  warmup_ratio: 0.05
185
  strict_loading: true
186
  reset_optimizer_states: false
187
  data:
188
+ class_path: modelgenerator.data.DMSFitnessPrediction
189
  init_args:
190
  path: genbio-ai/ProteinGYM-DMS
191
  train_split_files:
fold1/config.yaml CHANGED
@@ -140,10 +140,10 @@ trainer:
140
  reload_dataloaders_every_n_epochs: 0
141
  default_root_dir: logs
142
  model:
143
- class_path: genbio_finetune.tasks.SequenceRegression
144
  init_args:
145
  backbone:
146
- class_path: genbio_finetune.models.proteinfm_v1
147
  init_args:
148
  from_scratch: false
149
  use_peft: true
@@ -155,7 +155,7 @@ model:
155
  model_init_args: null
156
  max_length: 2048
157
  adapter:
158
- class_path: genbio_finetune.models.MLPPoolAdapter
159
  init_args:
160
  pooling: mean_pooling
161
  hidden_sizes:
@@ -179,13 +179,13 @@ model:
179
  differentiable: false
180
  fused: null
181
  lr_scheduler:
182
- class_path: genbio_finetune.lr_schedulers.CosineWithWarmup
183
  init_args:
184
  warmup_ratio: 0.05
185
  strict_loading: true
186
  reset_optimizer_states: false
187
  data:
188
- class_path: genbio_finetune.data.DMSFitnessPrediction
189
  init_args:
190
  path: genbio-ai/ProteinGYM-DMS
191
  train_split_files:
 
140
  reload_dataloaders_every_n_epochs: 0
141
  default_root_dir: logs
142
  model:
143
+ class_path: modelgenerator.tasks.SequenceRegression
144
  init_args:
145
  backbone:
146
+ class_path: modelgenerator.backbones.proteinfm_v1
147
  init_args:
148
  from_scratch: false
149
  use_peft: true
 
155
  model_init_args: null
156
  max_length: 2048
157
  adapter:
158
+ class_path: modelgenerator.adapters.MLPPoolAdapter
159
  init_args:
160
  pooling: mean_pooling
161
  hidden_sizes:
 
179
  differentiable: false
180
  fused: null
181
  lr_scheduler:
182
+ class_path: modelgenerator.lr_schedulers.CosineWithWarmup
183
  init_args:
184
  warmup_ratio: 0.05
185
  strict_loading: true
186
  reset_optimizer_states: false
187
  data:
188
+ class_path: modelgenerator.data.DMSFitnessPrediction
189
  init_args:
190
  path: genbio-ai/ProteinGYM-DMS
191
  train_split_files:
fold2/config.yaml CHANGED
@@ -140,10 +140,10 @@ trainer:
140
  reload_dataloaders_every_n_epochs: 0
141
  default_root_dir: logs
142
  model:
143
- class_path: genbio_finetune.tasks.SequenceRegression
144
  init_args:
145
  backbone:
146
- class_path: genbio_finetune.models.proteinfm_v1
147
  init_args:
148
  from_scratch: false
149
  use_peft: true
@@ -155,7 +155,7 @@ model:
155
  model_init_args: null
156
  max_length: 2048
157
  adapter:
158
- class_path: genbio_finetune.models.MLPPoolAdapter
159
  init_args:
160
  pooling: mean_pooling
161
  hidden_sizes:
@@ -179,13 +179,13 @@ model:
179
  differentiable: false
180
  fused: null
181
  lr_scheduler:
182
- class_path: genbio_finetune.lr_schedulers.CosineWithWarmup
183
  init_args:
184
  warmup_ratio: 0.05
185
  strict_loading: true
186
  reset_optimizer_states: false
187
  data:
188
- class_path: genbio_finetune.data.DMSFitnessPrediction
189
  init_args:
190
  path: genbio-ai/ProteinGYM-DMS
191
  train_split_files:
 
140
  reload_dataloaders_every_n_epochs: 0
141
  default_root_dir: logs
142
  model:
143
+ class_path: modelgenerator.tasks.SequenceRegression
144
  init_args:
145
  backbone:
146
+ class_path: modelgenerator.backbones.proteinfm_v1
147
  init_args:
148
  from_scratch: false
149
  use_peft: true
 
155
  model_init_args: null
156
  max_length: 2048
157
  adapter:
158
+ class_path: modelgenerator.adapters.MLPPoolAdapter
159
  init_args:
160
  pooling: mean_pooling
161
  hidden_sizes:
 
179
  differentiable: false
180
  fused: null
181
  lr_scheduler:
182
+ class_path: modelgenerator.lr_schedulers.CosineWithWarmup
183
  init_args:
184
  warmup_ratio: 0.05
185
  strict_loading: true
186
  reset_optimizer_states: false
187
  data:
188
+ class_path: modelgenerator.data.DMSFitnessPrediction
189
  init_args:
190
  path: genbio-ai/ProteinGYM-DMS
191
  train_split_files:
fold3/config.yaml CHANGED
@@ -140,10 +140,10 @@ trainer:
140
  reload_dataloaders_every_n_epochs: 0
141
  default_root_dir: logs
142
  model:
143
- class_path: genbio_finetune.tasks.SequenceRegression
144
  init_args:
145
  backbone:
146
- class_path: genbio_finetune.models.proteinfm_v1
147
  init_args:
148
  from_scratch: false
149
  use_peft: true
@@ -155,7 +155,7 @@ model:
155
  model_init_args: null
156
  max_length: 2048
157
  adapter:
158
- class_path: genbio_finetune.models.MLPPoolAdapter
159
  init_args:
160
  pooling: mean_pooling
161
  hidden_sizes:
@@ -179,13 +179,13 @@ model:
179
  differentiable: false
180
  fused: null
181
  lr_scheduler:
182
- class_path: genbio_finetune.lr_schedulers.CosineWithWarmup
183
  init_args:
184
  warmup_ratio: 0.05
185
  strict_loading: true
186
  reset_optimizer_states: false
187
  data:
188
- class_path: genbio_finetune.data.DMSFitnessPrediction
189
  init_args:
190
  path: genbio-ai/ProteinGYM-DMS
191
  train_split_files:
 
140
  reload_dataloaders_every_n_epochs: 0
141
  default_root_dir: logs
142
  model:
143
+ class_path: modelgenerator.tasks.SequenceRegression
144
  init_args:
145
  backbone:
146
+ class_path: modelgenerator.backbones.proteinfm_v1
147
  init_args:
148
  from_scratch: false
149
  use_peft: true
 
155
  model_init_args: null
156
  max_length: 2048
157
  adapter:
158
+ class_path: modelgenerator.adapters.MLPPoolAdapter
159
  init_args:
160
  pooling: mean_pooling
161
  hidden_sizes:
 
179
  differentiable: false
180
  fused: null
181
  lr_scheduler:
182
+ class_path: modelgenerator.lr_schedulers.CosineWithWarmup
183
  init_args:
184
  warmup_ratio: 0.05
185
  strict_loading: true
186
  reset_optimizer_states: false
187
  data:
188
+ class_path: modelgenerator.data.DMSFitnessPrediction
189
  init_args:
190
  path: genbio-ai/ProteinGYM-DMS
191
  train_split_files:
fold4/config.yaml CHANGED
@@ -140,10 +140,10 @@ trainer:
140
  reload_dataloaders_every_n_epochs: 0
141
  default_root_dir: logs
142
  model:
143
- class_path: genbio_finetune.tasks.SequenceRegression
144
  init_args:
145
  backbone:
146
- class_path: genbio_finetune.models.proteinfm_v1
147
  init_args:
148
  from_scratch: false
149
  use_peft: true
@@ -155,7 +155,7 @@ model:
155
  model_init_args: null
156
  max_length: 2048
157
  adapter:
158
- class_path: genbio_finetune.models.MLPPoolAdapter
159
  init_args:
160
  pooling: mean_pooling
161
  hidden_sizes:
@@ -179,13 +179,13 @@ model:
179
  differentiable: false
180
  fused: null
181
  lr_scheduler:
182
- class_path: genbio_finetune.lr_schedulers.CosineWithWarmup
183
  init_args:
184
  warmup_ratio: 0.05
185
  strict_loading: true
186
  reset_optimizer_states: false
187
  data:
188
- class_path: genbio_finetune.data.DMSFitnessPrediction
189
  init_args:
190
  path: genbio-ai/ProteinGYM-DMS
191
  train_split_files:
 
140
  reload_dataloaders_every_n_epochs: 0
141
  default_root_dir: logs
142
  model:
143
+ class_path: modelgenerator.tasks.SequenceRegression
144
  init_args:
145
  backbone:
146
+ class_path: modelgenerator.backbones.proteinfm_v1
147
  init_args:
148
  from_scratch: false
149
  use_peft: true
 
155
  model_init_args: null
156
  max_length: 2048
157
  adapter:
158
+ class_path: modelgenerator.adapters.MLPPoolAdapter
159
  init_args:
160
  pooling: mean_pooling
161
  hidden_sizes:
 
179
  differentiable: false
180
  fused: null
181
  lr_scheduler:
182
+ class_path: modelgenerator.lr_schedulers.CosineWithWarmup
183
  init_args:
184
  warmup_ratio: 0.05
185
  strict_loading: true
186
  reset_optimizer_states: false
187
  data:
188
+ class_path: modelgenerator.data.DMSFitnessPrediction
189
  init_args:
190
  path: genbio-ai/ProteinGYM-DMS
191
  train_split_files: