Spaces:
				
			
			
	
			
			
		Runtime error
		
	
	
	
			
			
	
	
	
	
		
		
		Runtime error
		
	| from dataclasses import dataclass | |
| from os.path import dirname, abspath | |
| # replace '\' on windows to '/' | |
| PROJECT_ROOT: str = '/'.join(abspath(dirname(__file__)).split('\\')) if '\\' in abspath(dirname(__file__)) else abspath(dirname(__file__)) | |
| # =================================================================================== | |
| # 以下为推断的配置 | |
| class InferConfig: | |
| max_seq_len: int = 320 # 回答的最大长度 | |
| mixed_precision: str = "bf16" # 混合精度 ''no','fp16','bf16' or 'fp8' | |
| # 全量DPO模型文件, tokenizer文件和model权重放在同一个文件夹 | |
| model_dir: str = PROJECT_ROOT + '/model_save/' | |
| # lora PDO 合并后的模型文件 | |
| # model_file: str = PROJECT_ROOT + '/model_save/chat_small_t5.best.dpo.lora_merged.bin' | |
| # this confing for api demo: | |
| api_key: str = "" | |
| host: str = '127.0.0.1' | |
| port: int = 8812 | |
| reload: bool = True | |
| workers: int = 1 | |
| log_level: str = 'info' | |
| #=================================================================================== | |
| # 以下为dpo训练配置 | |
| class DpoConfig: | |
| max_seq_len: int = 512 + 8 # 8 for eos token | |
| sft_model_file: str = PROJECT_ROOT + '/model_save/' | |
| tokenizer_dir: str = PROJECT_ROOT + '/model_save/' # tokenizer一般和model权重放在同一个文件夹 | |
| dpo_train_file: str = PROJECT_ROOT + '/data/my_dpo_data.json' | |
| dpo_eval_file: str = PROJECT_ROOT + '/data/my_dpo_eval.json' | |
| adapter_file: str = PROJECT_ROOT + '/data/dpo/adapter_model.safetensors' | |
| log_dir: str = PROJECT_ROOT + '/logs/' | |
| per_device_train_batch_size: int = 4 | |
| num_train_epochs: int = 4 | |
| gradient_accumulation_steps: int = 8 | |
| learning_rate: float = 1e-5 | |
| logging_first_step: bool = True | |
| logging_steps: int = 20 | |
| save_steps: int = 2000 | |
| output_dir: str = PROJECT_ROOT + '/model_save/dpo' | |
| warmup_steps: int = 1000 | |
| fp16: bool = True | |
| seed: int = 23333 | |
| beta: float = 0.1 | |
| # 以下为sft配置 | |
| class SFTconfig: | |
| max_seq_len: int = 384 + 8 # 8 for eos token | |
| finetune_from_ckp_file = PROJECT_ROOT + '/model_save/' | |
| tokenizer_dir: str = PROJECT_ROOT + '/model_save/' # tokenizer一般和model权重放在同一个文件夹 | |
| sft_train_file: str = PROJECT_ROOT + '/data/sft_train.json' | |
| batch_size: int = 12 | |
| num_train_epochs: int = 4 | |
| save_steps: int = 5000 | |
| gradient_accumulation_steps: int = 4 | |
| learning_rate: float = 1e-5 | |
| logging_first_step: bool = True | |
| logging_steps: int = 100 | |
| output_dir: str = PROJECT_ROOT + '/model_save/sft' | |
| warmup_steps: int = 100 | |
| fp16: bool = True | |
| seed: int = 23333 | |
| # =================================================================================== | |
| # 以下为训练的配置 | |
| class TrainConfig: | |
| epochs: int = 8 | |
| batch_size_per_gpu: int = 16 | |
| learn_rate: float = 0.0001 # 最大 div_factor * learn_rate | |
| div_factor: int = 50 | |
| mixed_precision: str = "bf16" # 混合精度 ''no','fp16','bf16' or 'fp8' | |
| # 注意:计算梯度时相当于batch_size * gradient_accumulation_steps,说人话就是梯度累积步数>1时,等于增大n倍的batch_size | |
| gradient_accumulation_steps: int = 8 # 累积梯度更新步数 | |
| warmup_steps: int = 1024 # 模型参数预热步数,预热样本数=warmup_steps * batch_size * gradient_accumulation_steps | |
| tokenizer_dir: str = PROJECT_ROOT + '/model_save/' # tokenizer一般和model权重放在同一个文件夹 | |
| model_file: str = PROJECT_ROOT + '/model_save/chat_small_t5.{}.bin' | |
| model_config_file: str = PROJECT_ROOT + '/model_save/model_config.json' | |
| train_file: str = PROJECT_ROOT + '/data/my_train_dataset.parquet' | |
| validation_file: str = PROJECT_ROOT + '/data/my_valid_dataset.parquet' | |
| test_file: str = PROJECT_ROOT + '/data/my_test_dataset.parquet' | |
| # 从哪个模型开始微调,仅当traing 函数 is_finetune = True时生效 | |
| # 微调记得冻结某些层或者调低学习率 | |
| finetune_from_ckp_file = PROJECT_ROOT + '/model_save/chat_small_t5.best.bin' | |
| # 训练状态保存,中断后可以从此处继续训练 | |
| train_state_dir: str = PROJECT_ROOT + '/model_save/train_latest_state' | |
| output_dir: str = PROJECT_ROOT + '/model_save/pretrain' | |
| logging_steps: int = 50 | |
| save_steps: int = 10000 | |
| # dataset_cache_dir: str = PROJECT_ROOT + '/data/.cache' | |
| # trainer_log_file: str = PROJECT_ROOT + '/logs/trainer.log' | |
| keep_latest_n_ckp: int = 8 # 训练过程中,最多保留多少个分数最好的模型文件 | |
| seed: int = 23333 | |
| dataloader_buffer_size: int = 50000 | |
| max_seq_len: int = 256 # 最大句子长度,默认:256 | |
| #====================================================================================== | |
| # 以下为模型的配置 | |
| class T5ModelConfig: | |
| d_ff: int = 3072 # 全连接层维度 | |
| d_model: int = 768 # 词向量维度 | |
| num_heads: int = 12 # 注意力头数 d_model // num_heads == d_kv | |
| d_kv: int = 64 # d_model // num_heads | |
| num_decoder_layers: int = 10 # Transformer decoder 隐藏层层数 | |
| num_layers: int = 10 # Transformer encoder 隐藏层层数 |