create config file (#1)

Browse files

- create config file (4f247a7e70cccf28d32bd861ae2956b3c6d87209)

Co-authored-by: Ryan Keivanfar <[email protected]>

Files changed (1) hide show

configs/config.yaml +57 -0

configs/config.yaml ADDED Viewed

	@@ -0,0 +1,57 @@

+# ----------------------------------------------------
+# Full Dataset Run Config
+# ----------------------------------------------------
+experiment_name: 'MorganFP_full_dataset_run'
+data:
+  regions_csv_path: "data/Enformer_genomic_regions_TSSCenteredGenes_FixedOverlapRemoval_subset500_priorityCustom_parquetFiltered.csv" # Ensure this is correct
+  regions_gene_col: 'gene_name'
+  regions_chr_col: 'seqnames'
+  regions_start_col: 'starts'
+  regions_end_col: 'ends'
+  pbulk_parquet_path: "/home/ubuntu/pseudoBulk_celllineXdrug_8Cellline_27Drugs_1Dosage_includeZero.parquet" # Ensure this is correct
+  pbulk_gene_col: 'gene_id'
+  pbulk_drug_col: 'drug_id'
+  pbulk_dose_col: 'drug_dose'
+  pbulk_expr_col: 'expression'
+  pbulk_cell_line_col: 'cell_line'
+  drug_meta_csv_path: "data/drug_metadata.csv"
+  drug_meta_id_col: 'drug'
+  fasta_file_path: "/home/ubuntu/data/hg38.fa" # Ensure this is correct for your full run
+  enformer_input_seq_length: 49152
+  morgan_fp_radius: 2
+  morgan_fp_nbits: 2048
+model:
+  enformer_model_name: 'EleutherAI/enformer-official-rough'
+  morgan_fingerprint_dim: 2048
+  learning_rate: 5.0e-6
+  weight_decay: 0.01
+training:
+  batch_size: 16             # Increased for full run
+  max_epochs: 100           # Increased for full run
+  precision: 'bf16-mixed'
+  deterministic: True
+  seed: 42
+  gradient_clip_val: 0.05
+  accumulate_grad_batches: 1
+  gpus: -1                   # Use all available GPUs
+  strategy: 'ddp_find_unused_parameters_true' # Suitable for multi-GPU
+logging:
+  wandb_project: 'tahoeformer' # Specific project for these tests
+  # wandb_entity: 'your_wandb_username_or_team'
+  save_dir: 'outputs/morgan_full_dataset_runs' # Updated save directory
+  # checkpoint_monitor_metric: 'validation_pearson_epoch' # default
+  # checkpoint_monitor_mode: 'max' # default
+  # early_stopping_metric: 'validation_pearson_epoch' # default
+  # early_stopping_mode: 'max' # default
+  early_stopping_patience: 30 # Increased patience
+use_wandb: True
+validate_before_train: True
+delete_checkpoint_after_run: False
+# check_val_every_n_epoch: 1      # default