qhuang20 ryankeivanfar commited on
Commit
be788c6
·
verified ·
1 Parent(s): 37413be

create config file (#1)

Browse files

- create config file (4f247a7e70cccf28d32bd861ae2956b3c6d87209)


Co-authored-by: Ryan Keivanfar <[email protected]>

Files changed (1) hide show
  1. configs/config.yaml +57 -0
configs/config.yaml ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # ----------------------------------------------------
2
+ # Full Dataset Run Config
3
+ # ----------------------------------------------------
4
+
5
+ experiment_name: 'MorganFP_full_dataset_run'
6
+
7
+ data:
8
+ regions_csv_path: "data/Enformer_genomic_regions_TSSCenteredGenes_FixedOverlapRemoval_subset500_priorityCustom_parquetFiltered.csv" # Ensure this is correct
9
+ regions_gene_col: 'gene_name'
10
+ regions_chr_col: 'seqnames'
11
+ regions_start_col: 'starts'
12
+ regions_end_col: 'ends'
13
+
14
+ pbulk_parquet_path: "/home/ubuntu/pseudoBulk_celllineXdrug_8Cellline_27Drugs_1Dosage_includeZero.parquet" # Ensure this is correct
15
+ pbulk_gene_col: 'gene_id'
16
+ pbulk_drug_col: 'drug_id'
17
+ pbulk_dose_col: 'drug_dose'
18
+ pbulk_expr_col: 'expression'
19
+ pbulk_cell_line_col: 'cell_line'
20
+ drug_meta_csv_path: "data/drug_metadata.csv"
21
+ drug_meta_id_col: 'drug'
22
+ fasta_file_path: "/home/ubuntu/data/hg38.fa" # Ensure this is correct for your full run
23
+ enformer_input_seq_length: 49152
24
+ morgan_fp_radius: 2
25
+ morgan_fp_nbits: 2048
26
+
27
+ model:
28
+ enformer_model_name: 'EleutherAI/enformer-official-rough'
29
+ morgan_fingerprint_dim: 2048
30
+ learning_rate: 5.0e-6
31
+ weight_decay: 0.01
32
+
33
+ training:
34
+ batch_size: 16 # Increased for full run
35
+ max_epochs: 100 # Increased for full run
36
+ precision: 'bf16-mixed'
37
+ deterministic: True
38
+ seed: 42
39
+ gradient_clip_val: 0.05
40
+ accumulate_grad_batches: 1
41
+ gpus: -1 # Use all available GPUs
42
+ strategy: 'ddp_find_unused_parameters_true' # Suitable for multi-GPU
43
+
44
+ logging:
45
+ wandb_project: 'tahoeformer' # Specific project for these tests
46
+ # wandb_entity: 'your_wandb_username_or_team'
47
+ save_dir: 'outputs/morgan_full_dataset_runs' # Updated save directory
48
+ # checkpoint_monitor_metric: 'validation_pearson_epoch' # default
49
+ # checkpoint_monitor_mode: 'max' # default
50
+ # early_stopping_metric: 'validation_pearson_epoch' # default
51
+ # early_stopping_mode: 'max' # default
52
+ early_stopping_patience: 30 # Increased patience
53
+
54
+ use_wandb: True
55
+ validate_before_train: True
56
+ delete_checkpoint_after_run: False
57
+ # check_val_every_n_epoch: 1 # default