# ----------------------------------------------------
# Full Dataset Run Config
# ----------------------------------------------------

experiment_name: 'MorganFP_full_dataset_run'

data:
  regions_csv_path: "data/Enformer_genomic_regions_TSSCenteredGenes_FixedOverlapRemoval_subset500_priorityCustom_parquetFiltered.csv" # Ensure this is correct
  regions_gene_col: 'gene_name'
  regions_chr_col: 'seqnames'
  regions_start_col: 'starts'
  regions_end_col: 'ends'

  pbulk_parquet_path: "/home/ubuntu/pseudoBulk_celllineXdrug_8Cellline_27Drugs_1Dosage_includeZero.parquet" # Ensure this is correct
  pbulk_gene_col: 'gene_id'
  pbulk_drug_col: 'drug_id'
  pbulk_dose_col: 'drug_dose'
  pbulk_expr_col: 'expression'
  pbulk_cell_line_col: 'cell_line'
  drug_meta_csv_path: "data/drug_metadata.csv"
  drug_meta_id_col: 'drug'
  fasta_file_path: "/home/ubuntu/data/hg38.fa" # Ensure this is correct for your full run
  enformer_input_seq_length: 49152
  morgan_fp_radius: 2
  morgan_fp_nbits: 2048

model:
  enformer_model_name: 'EleutherAI/enformer-official-rough'
  morgan_fingerprint_dim: 2048
  learning_rate: 5.0e-6
  weight_decay: 0.01

training:
  batch_size: 16             # Increased for full run
  max_epochs: 100           # Increased for full run
  precision: 'bf16-mixed'
  deterministic: True
  seed: 42
  gradient_clip_val: 0.05
  accumulate_grad_batches: 1
  gpus: -1                   # Use all available GPUs
  strategy: 'ddp_find_unused_parameters_true' # Suitable for multi-GPU 

logging:
  wandb_project: 'tahoeformer' # Specific project for these tests
  # wandb_entity: 'your_wandb_username_or_team'
  save_dir: 'outputs/morgan_full_dataset_runs' # Updated save directory
  # checkpoint_monitor_metric: 'validation_pearson_epoch' # default
  # checkpoint_monitor_mode: 'max' # default
  # early_stopping_metric: 'validation_pearson_epoch' # default
  # early_stopping_mode: 'max' # default
  early_stopping_patience: 30 # Increased patience

use_wandb: True
validate_before_train: True
delete_checkpoint_after_run: False 
# check_val_every_n_epoch: 1      # default