hazyresearch
/

cartridge-wauoq23f

Model card Files Files and versions Community

cartridge-wauoq23f / config.yaml

sabrieyuboglu

Upload config.yaml with huggingface_hub

0e72f89 verified 28 days ago

raw

history blame contribute delete

5.2 kB

	_config_type:
	_is_type: true
	_module: capsules.train
	_qualname: TrainConfig
	cache_ema_alpha: 0.9
	context:
	_config_type:
	_is_type: true
	_module: capsules.tasks.longhealth.context
	_qualname: LongHealthStructuredContextConfig
	patient_ids:
	- patient_01
	- patient_02
	- patient_03
	- patient_04
	- patient_05
	- patient_06
	- patient_07
	- patient_08
	- patient_09
	- patient_10
	dataset:
	_config_type:
	_is_type: true
	_module: capsules.datasets
	_qualname: CapsuleDatasetLatest.Config
	convo_transforms: null
	data_sources:
	- !!python/tuple
	- hazy-research/capsules/generate_longhealth_simple_p10_s5_n65536:v0
	- null
	- !!python/tuple
	- hazy-research/capsules/generate_longhealth_simple_p10_s5_n65536:v1
	- null
	dataset_weights: null
	is_wandb: true
	kwargs: {}
	label_type: logits
	max_sequence_length: 1024
	target:
	_is_type: true
	_module: capsules.datasets
	_qualname: CapsuleDatasetLatest
	top_k_logits: 20
	user_prompt_prefix: null
	device: cuda
	distributed_backend: gloo
	ema_cache: false
	epochs: 2
	eval_datasets:
	- _config_type:
	_is_type: true
	_module: capsules.train
	_qualname: EvalDatasetConfig
	dataloader_num_workers: 0
	dataset:
	_config_type:
	_is_type: true
	_module: capsules.tasks.longhealth
	_qualname: LongHealthEvalDataset.Config
	convo_transforms: null
	data_sources: []
	dataset_weights: null
	is_wandb: false
	kwargs: {}
	label_type: tokens
	max_questions: 256
	patient_ids:
	- patient_01
	- patient_02
	- patient_03
	- patient_04
	- patient_05
	- patient_06
	- patient_07
	- patient_08
	- patient_09
	- patient_10
	target:
	_is_type: true
	_module: capsules.tasks.longhealth
	_qualname: LongHealthEvalDataset
	top_k_logits: 20
	user_prompt_prefix: null
	local_batch_size: 16
	name_for_wandb: longhealth_mc
	only_eval_rank_0: false
	eval_every_n_steps: 256
	eval_log_table: true
	eval_max_samples: null
	generate_datasets:
	- _config_type:
	_is_type: true
	_module: capsules.train
	_qualname: GenerateDatasetConfig
	batch_size: 16
	dataloader_num_workers: 0
	dataset:
	_config_type:
	_is_type: true
	_module: capsules.tasks.longhealth
	_qualname: LongHealthMultipleChoiceGenerateDataset.Config
	cot: true
	include_diagnosis: true
	kwargs: {}
	max_questions: null
	patient_ids:
	- patient_01
	- patient_02
	- patient_03
	- patient_04
	- patient_05
	- patient_06
	- patient_07
	- patient_08
	- patient_09
	- patient_10
	target:
	_is_type: true
	_module: capsules.tasks.longhealth
	_qualname: LongHealthMultipleChoiceGenerateDataset
	name_for_wandb: longhealth_mc
	num_samples: 4
	num_samples_final: 8
	override_max_tokens: null
	temperature: 0.3
	generate_every_n_steps: 512
	generate_max_new_tokens: 512
	global_batch_size: 64
	keep_last_n_saved: 1
	kv_cache_initializer:
	_config_type:
	_is_type: true
	_module: capsules.kv_initialization.strategies.first_n_tokens
	_qualname: KVCacheInitFromFirstNTokensOfContext.Config
	context: null
	kwargs: {}
	max_tokens: 2048
	num_frozen_tokens: 1
	target:
	_is_type: true
	_module: capsules.kv_initialization.strategies.first_n_tokens
	_qualname: KVCacheInitFromFirstNTokensOfContext
	launch_id: 2025-05-10-14-56-42-train_longhealth_simple
	local_batch_size: 4
	log_logprob_viz: false
	loss_type: logits
	lr: 0.02
	lr_scheduler: null
	max_optimizer_steps: -1
	model:
	_config_type:
	_is_type: true
	_module: capsules.config
	_qualname: HFModelConfig
	attn_implementation: einsum
	checkpoint_path: null
	load_kwargs: {}
	model_cls:
	_is_type: true
	_module: capsules.models.llama
	_qualname: LlamaForCausalLM
	peft:
	_config_type:
	_is_type: true
	_module: capsules.config
	_qualname: PeftConfig
	adapter_non_linearity: relu
	adapter_reduction_factor: 16
	alpha: 16
	bias: none
	dropout: 0.0
	enabled: false
	encoder_dropout: 0.0
	encoder_hidden_size: null
	encoder_reparameterization_type: MLP
	extra_params: {}
	method: lora
	num_virtual_tokens: 20
	prefix_projection: false
	prompt_tuning_init: null
	prompt_tuning_init_text: null
	r: 8
	target_modules: null
	task_type: CAUSAL_LM
	pretrained_model_name_or_path: meta-llama/Llama-3.2-3B-Instruct
	tuning_method: custom_prefix
	name: train_longhealth_simple_p10_lr0.02_toks2048
	online_model: true
	optimizer: adam
	output_dir: /data/sabri/capsules
	pretrained_cache_path: null
	run_dir: /data/sabri/capsules/2025-05-10-14-56-42-train_longhealth_simple/68e4c064-dc5a-46c8-a726-b3c7977e9e1a
	run_id: 68e4c064-dc5a-46c8-a726-b3c7977e9e1a
	save_after_training: true
	save_every_n_steps: 512
	save_to_wandb: true
	script_id: train_longhealth_simple
	seed: 42
	tokenizer: meta-llama/Llama-3.2-1B-Instruct
	use_batch_sampler: false
	wandb:
	_config_type:
	_is_type: true
	_module: capsules.utils.wandb
	_qualname: WandBConfig
	entity: hazy-research
	group: null
	name: train_longhealth_simple_p10_lr0.02_toks2048
	notes: null
	project: capsules
	tags:
	- train
	- longhealth
	- patientsp10