Caleb Ellington commited on
Commit
4056450
·
1 Parent(s): 39363ed

update with major refactor

Browse files
README.md CHANGED
@@ -11,11 +11,11 @@ snapshot_download(repo_id=model_name, local_dir=genbio_models_path)
11
  ```
12
  ### Load model for inference
13
  ```python
14
- from genbio_finetune.tasks import SequenceRegression
15
 
16
  ckpt_path = genbio_models_path.joinpath('fold0', 'model.ckpt')
17
  model = SequenceRegression.load_from_checkpoint(ckpt_path, strict_loading=False).eval()
18
 
19
- collated_batch = model.collate({"sequences": ["ACGT", "AGCT"]})
20
  logits = model(collated_batch)
21
  print(logits)
 
11
  ```
12
  ### Load model for inference
13
  ```python
14
+ from modelgenerator.tasks import SequenceRegression
15
 
16
  ckpt_path = genbio_models_path.joinpath('fold0', 'model.ckpt')
17
  model = SequenceRegression.load_from_checkpoint(ckpt_path, strict_loading=False).eval()
18
 
19
+ collated_batch = model.transform({"sequences": ["ACGT", "AGCT"]})
20
  logits = model(collated_batch)
21
  print(logits)
fold0/config.yaml CHANGED
@@ -136,10 +136,10 @@ trainer:
136
  reload_dataloaders_every_n_epochs: 0
137
  default_root_dir: logs
138
  model:
139
- class_path: genbio_finetune.tasks.SequenceRegression
140
  init_args:
141
  backbone:
142
- class_path: genbio_finetune.models.proteinfm
143
  init_args:
144
  from_scratch: false
145
  use_peft: false
@@ -152,7 +152,7 @@ model:
152
  model_init_args: null
153
  max_length: 2048
154
  adapter:
155
- class_path: genbio_finetune.models.MLPPoolAdapter
156
  init_args:
157
  pooling: mean_pooling
158
  hidden_sizes:
@@ -176,13 +176,13 @@ model:
176
  differentiable: false
177
  fused: null
178
  lr_scheduler:
179
- class_path: genbio_finetune.lr_schedulers.ConstantWithWarmup
180
  init_args:
181
  warmup_ratio: 0.05
182
  strict_loading: true
183
  reset_optimizer_states: false
184
  data:
185
- class_path: genbio_finetune.data.DMSFitnessPrediction
186
  init_args:
187
  path: genbio-ai/ProteinGYM-DMS
188
  train_split_files:
 
136
  reload_dataloaders_every_n_epochs: 0
137
  default_root_dir: logs
138
  model:
139
+ class_path: modelgenerator.tasks.SequenceRegression
140
  init_args:
141
  backbone:
142
+ class_path: modelgenerator.backbones.proteinfm
143
  init_args:
144
  from_scratch: false
145
  use_peft: false
 
152
  model_init_args: null
153
  max_length: 2048
154
  adapter:
155
+ class_path: modelgenerator.adapters.MLPPoolAdapter
156
  init_args:
157
  pooling: mean_pooling
158
  hidden_sizes:
 
176
  differentiable: false
177
  fused: null
178
  lr_scheduler:
179
+ class_path: modelgenerator.lr_schedulers.ConstantWithWarmup
180
  init_args:
181
  warmup_ratio: 0.05
182
  strict_loading: true
183
  reset_optimizer_states: false
184
  data:
185
+ class_path: modelgenerator.data.DMSFitnessPrediction
186
  init_args:
187
  path: genbio-ai/ProteinGYM-DMS
188
  train_split_files:
fold1/config.yaml CHANGED
@@ -136,10 +136,10 @@ trainer:
136
  reload_dataloaders_every_n_epochs: 0
137
  default_root_dir: logs
138
  model:
139
- class_path: genbio_finetune.tasks.SequenceRegression
140
  init_args:
141
  backbone:
142
- class_path: genbio_finetune.models.proteinfm
143
  init_args:
144
  from_scratch: false
145
  use_peft: false
@@ -152,7 +152,7 @@ model:
152
  model_init_args: null
153
  max_length: 2048
154
  adapter:
155
- class_path: genbio_finetune.models.MLPPoolAdapter
156
  init_args:
157
  pooling: mean_pooling
158
  hidden_sizes:
@@ -176,13 +176,13 @@ model:
176
  differentiable: false
177
  fused: null
178
  lr_scheduler:
179
- class_path: genbio_finetune.lr_schedulers.ConstantWithWarmup
180
  init_args:
181
  warmup_ratio: 0.05
182
  strict_loading: true
183
  reset_optimizer_states: false
184
  data:
185
- class_path: genbio_finetune.data.DMSFitnessPrediction
186
  init_args:
187
  path: genbio-ai/ProteinGYM-DMS
188
  train_split_files:
 
136
  reload_dataloaders_every_n_epochs: 0
137
  default_root_dir: logs
138
  model:
139
+ class_path: modelgenerator.tasks.SequenceRegression
140
  init_args:
141
  backbone:
142
+ class_path: modelgenerator.backbones.proteinfm
143
  init_args:
144
  from_scratch: false
145
  use_peft: false
 
152
  model_init_args: null
153
  max_length: 2048
154
  adapter:
155
+ class_path: modelgenerator.adapters.MLPPoolAdapter
156
  init_args:
157
  pooling: mean_pooling
158
  hidden_sizes:
 
176
  differentiable: false
177
  fused: null
178
  lr_scheduler:
179
+ class_path: modelgenerator.lr_schedulers.ConstantWithWarmup
180
  init_args:
181
  warmup_ratio: 0.05
182
  strict_loading: true
183
  reset_optimizer_states: false
184
  data:
185
+ class_path: modelgenerator.data.DMSFitnessPrediction
186
  init_args:
187
  path: genbio-ai/ProteinGYM-DMS
188
  train_split_files:
fold2/config.yaml CHANGED
@@ -136,10 +136,10 @@ trainer:
136
  reload_dataloaders_every_n_epochs: 0
137
  default_root_dir: logs
138
  model:
139
- class_path: genbio_finetune.tasks.SequenceRegression
140
  init_args:
141
  backbone:
142
- class_path: genbio_finetune.models.proteinfm
143
  init_args:
144
  from_scratch: false
145
  use_peft: false
@@ -152,7 +152,7 @@ model:
152
  model_init_args: null
153
  max_length: 2048
154
  adapter:
155
- class_path: genbio_finetune.models.MLPPoolAdapter
156
  init_args:
157
  pooling: mean_pooling
158
  hidden_sizes:
@@ -176,13 +176,13 @@ model:
176
  differentiable: false
177
  fused: null
178
  lr_scheduler:
179
- class_path: genbio_finetune.lr_schedulers.ConstantWithWarmup
180
  init_args:
181
  warmup_ratio: 0.05
182
  strict_loading: true
183
  reset_optimizer_states: false
184
  data:
185
- class_path: genbio_finetune.data.DMSFitnessPrediction
186
  init_args:
187
  path: genbio-ai/ProteinGYM-DMS
188
  train_split_files:
 
136
  reload_dataloaders_every_n_epochs: 0
137
  default_root_dir: logs
138
  model:
139
+ class_path: modelgenerator.tasks.SequenceRegression
140
  init_args:
141
  backbone:
142
+ class_path: modelgenerator.backbones.proteinfm
143
  init_args:
144
  from_scratch: false
145
  use_peft: false
 
152
  model_init_args: null
153
  max_length: 2048
154
  adapter:
155
+ class_path: modelgenerator.adapters.MLPPoolAdapter
156
  init_args:
157
  pooling: mean_pooling
158
  hidden_sizes:
 
176
  differentiable: false
177
  fused: null
178
  lr_scheduler:
179
+ class_path: modelgenerator.lr_schedulers.ConstantWithWarmup
180
  init_args:
181
  warmup_ratio: 0.05
182
  strict_loading: true
183
  reset_optimizer_states: false
184
  data:
185
+ class_path: modelgenerator.data.DMSFitnessPrediction
186
  init_args:
187
  path: genbio-ai/ProteinGYM-DMS
188
  train_split_files:
fold3/config.yaml CHANGED
@@ -136,10 +136,10 @@ trainer:
136
  reload_dataloaders_every_n_epochs: 0
137
  default_root_dir: logs
138
  model:
139
- class_path: genbio_finetune.tasks.SequenceRegression
140
  init_args:
141
  backbone:
142
- class_path: genbio_finetune.models.proteinfm
143
  init_args:
144
  from_scratch: false
145
  use_peft: false
@@ -152,7 +152,7 @@ model:
152
  model_init_args: null
153
  max_length: 2048
154
  adapter:
155
- class_path: genbio_finetune.models.MLPPoolAdapter
156
  init_args:
157
  pooling: mean_pooling
158
  hidden_sizes:
@@ -176,13 +176,13 @@ model:
176
  differentiable: false
177
  fused: null
178
  lr_scheduler:
179
- class_path: genbio_finetune.lr_schedulers.ConstantWithWarmup
180
  init_args:
181
  warmup_ratio: 0.05
182
  strict_loading: true
183
  reset_optimizer_states: false
184
  data:
185
- class_path: genbio_finetune.data.DMSFitnessPrediction
186
  init_args:
187
  path: genbio-ai/ProteinGYM-DMS
188
  train_split_files:
 
136
  reload_dataloaders_every_n_epochs: 0
137
  default_root_dir: logs
138
  model:
139
+ class_path: modelgenerator.tasks.SequenceRegression
140
  init_args:
141
  backbone:
142
+ class_path: modelgenerator.backbones.proteinfm
143
  init_args:
144
  from_scratch: false
145
  use_peft: false
 
152
  model_init_args: null
153
  max_length: 2048
154
  adapter:
155
+ class_path: modelgenerator.adapters.MLPPoolAdapter
156
  init_args:
157
  pooling: mean_pooling
158
  hidden_sizes:
 
176
  differentiable: false
177
  fused: null
178
  lr_scheduler:
179
+ class_path: modelgenerator.lr_schedulers.ConstantWithWarmup
180
  init_args:
181
  warmup_ratio: 0.05
182
  strict_loading: true
183
  reset_optimizer_states: false
184
  data:
185
+ class_path: modelgenerator.data.DMSFitnessPrediction
186
  init_args:
187
  path: genbio-ai/ProteinGYM-DMS
188
  train_split_files:
fold4/config.yaml CHANGED
@@ -136,10 +136,10 @@ trainer:
136
  reload_dataloaders_every_n_epochs: 0
137
  default_root_dir: logs
138
  model:
139
- class_path: genbio_finetune.tasks.SequenceRegression
140
  init_args:
141
  backbone:
142
- class_path: genbio_finetune.models.proteinfm
143
  init_args:
144
  from_scratch: false
145
  use_peft: false
@@ -152,7 +152,7 @@ model:
152
  model_init_args: null
153
  max_length: 2048
154
  adapter:
155
- class_path: genbio_finetune.models.MLPPoolAdapter
156
  init_args:
157
  pooling: mean_pooling
158
  hidden_sizes:
@@ -176,13 +176,13 @@ model:
176
  differentiable: false
177
  fused: null
178
  lr_scheduler:
179
- class_path: genbio_finetune.lr_schedulers.ConstantWithWarmup
180
  init_args:
181
  warmup_ratio: 0.05
182
  strict_loading: true
183
  reset_optimizer_states: false
184
  data:
185
- class_path: genbio_finetune.data.DMSFitnessPrediction
186
  init_args:
187
  path: genbio-ai/ProteinGYM-DMS
188
  train_split_files:
 
136
  reload_dataloaders_every_n_epochs: 0
137
  default_root_dir: logs
138
  model:
139
+ class_path: modelgenerator.tasks.SequenceRegression
140
  init_args:
141
  backbone:
142
+ class_path: modelgenerator.backbones.proteinfm
143
  init_args:
144
  from_scratch: false
145
  use_peft: false
 
152
  model_init_args: null
153
  max_length: 2048
154
  adapter:
155
+ class_path: modelgenerator.adapters.MLPPoolAdapter
156
  init_args:
157
  pooling: mean_pooling
158
  hidden_sizes:
 
176
  differentiable: false
177
  fused: null
178
  lr_scheduler:
179
+ class_path: modelgenerator.lr_schedulers.ConstantWithWarmup
180
  init_args:
181
  warmup_ratio: 0.05
182
  strict_loading: true
183
  reset_optimizer_states: false
184
  data:
185
+ class_path: modelgenerator.data.DMSFitnessPrediction
186
  init_args:
187
  path: genbio-ai/ProteinGYM-DMS
188
  train_split_files: