Tahoeformer / configs /config.yaml
qhuang20's picture
create config file (#1)
be788c6 verified
# ----------------------------------------------------
# Full Dataset Run Config
# ----------------------------------------------------
experiment_name: 'MorganFP_full_dataset_run'
data:
regions_csv_path: "data/Enformer_genomic_regions_TSSCenteredGenes_FixedOverlapRemoval_subset500_priorityCustom_parquetFiltered.csv" # Ensure this is correct
regions_gene_col: 'gene_name'
regions_chr_col: 'seqnames'
regions_start_col: 'starts'
regions_end_col: 'ends'
pbulk_parquet_path: "/home/ubuntu/pseudoBulk_celllineXdrug_8Cellline_27Drugs_1Dosage_includeZero.parquet" # Ensure this is correct
pbulk_gene_col: 'gene_id'
pbulk_drug_col: 'drug_id'
pbulk_dose_col: 'drug_dose'
pbulk_expr_col: 'expression'
pbulk_cell_line_col: 'cell_line'
drug_meta_csv_path: "data/drug_metadata.csv"
drug_meta_id_col: 'drug'
fasta_file_path: "/home/ubuntu/data/hg38.fa" # Ensure this is correct for your full run
enformer_input_seq_length: 49152
morgan_fp_radius: 2
morgan_fp_nbits: 2048
model:
enformer_model_name: 'EleutherAI/enformer-official-rough'
morgan_fingerprint_dim: 2048
learning_rate: 5.0e-6
weight_decay: 0.01
training:
batch_size: 16 # Increased for full run
max_epochs: 100 # Increased for full run
precision: 'bf16-mixed'
deterministic: True
seed: 42
gradient_clip_val: 0.05
accumulate_grad_batches: 1
gpus: -1 # Use all available GPUs
strategy: 'ddp_find_unused_parameters_true' # Suitable for multi-GPU
logging:
wandb_project: 'tahoeformer' # Specific project for these tests
# wandb_entity: 'your_wandb_username_or_team'
save_dir: 'outputs/morgan_full_dataset_runs' # Updated save directory
# checkpoint_monitor_metric: 'validation_pearson_epoch' # default
# checkpoint_monitor_mode: 'max' # default
# early_stopping_metric: 'validation_pearson_epoch' # default
# early_stopping_mode: 'max' # default
early_stopping_patience: 30 # Increased patience
use_wandb: True
validate_before_train: True
delete_checkpoint_after_run: False
# check_val_every_n_epoch: 1 # default