|
|
|
|
|
|
|
|
|
experiment_name: 'MorganFP_full_dataset_run' |
|
|
|
data: |
|
regions_csv_path: "data/Enformer_genomic_regions_TSSCenteredGenes_FixedOverlapRemoval_subset500_priorityCustom_parquetFiltered.csv" |
|
regions_gene_col: 'gene_name' |
|
regions_chr_col: 'seqnames' |
|
regions_start_col: 'starts' |
|
regions_end_col: 'ends' |
|
|
|
pbulk_parquet_path: "/home/ubuntu/pseudoBulk_celllineXdrug_8Cellline_27Drugs_1Dosage_includeZero.parquet" |
|
pbulk_gene_col: 'gene_id' |
|
pbulk_drug_col: 'drug_id' |
|
pbulk_dose_col: 'drug_dose' |
|
pbulk_expr_col: 'expression' |
|
pbulk_cell_line_col: 'cell_line' |
|
drug_meta_csv_path: "data/drug_metadata.csv" |
|
drug_meta_id_col: 'drug' |
|
fasta_file_path: "/home/ubuntu/data/hg38.fa" |
|
enformer_input_seq_length: 49152 |
|
morgan_fp_radius: 2 |
|
morgan_fp_nbits: 2048 |
|
|
|
model: |
|
enformer_model_name: 'EleutherAI/enformer-official-rough' |
|
morgan_fingerprint_dim: 2048 |
|
learning_rate: 5.0e-6 |
|
weight_decay: 0.01 |
|
|
|
training: |
|
batch_size: 16 |
|
max_epochs: 100 |
|
precision: 'bf16-mixed' |
|
deterministic: True |
|
seed: 42 |
|
gradient_clip_val: 0.05 |
|
accumulate_grad_batches: 1 |
|
gpus: -1 |
|
strategy: 'ddp_find_unused_parameters_true' |
|
|
|
logging: |
|
wandb_project: 'tahoeformer' |
|
|
|
save_dir: 'outputs/morgan_full_dataset_runs' |
|
|
|
|
|
|
|
|
|
early_stopping_patience: 30 |
|
|
|
use_wandb: True |
|
validate_before_train: True |
|
delete_checkpoint_after_run: False |
|
|
|
|