# train_lora.py # Qwen-7B-Chat 微调脚本(真正最终版) # 核心:添加 labels,让 Trainer 能计算 loss import os import torch from transformers import ( AutoModelForCausalLM, AutoTokenizer, TrainingArguments, Trainer ) from peft import get_peft_model, LoraConfig, TaskType from datasets import load_dataset # --- 配置 --- model_path = "/home/yq238/project_pi_aaa247/yq238/qwen_training/models/Qwen-7B-Chat" data_path = "/home/yq238/project_pi_aaa247/yq238/qwen_training/data/training1.jsonl" output_dir = "/home/yq238/project_pi_aaa247/yq238/qwen_training/training/test1" os.makedirs(output_dir, exist_ok=True) # --- 1. 加载 tokenizer --- print("🔧 加载 tokenizer...") tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True) # --- 2. 加载模型 --- print("🔧 加载模型...") model = AutoModelForCausalLM.from_pretrained( model_path, device_map="auto", trust_remote_code=True, torch_dtype=torch.float16, ) # ✅ 启用梯度检查点,节省显存 model.gradient_checkpointing_enable() # --- 3. LoRA 配置 --- print("🔧 配置 LoRA...") peft_config = LoraConfig( task_type=TaskType.CAUSAL_LM, r=64, lora_alpha=128, target_modules=["attn.c_proj", "mlp.c_proj"], lora_dropout=0.05, bias="none", ) model = get_peft_model(model, peft_config) model.print_trainable_parameters() # --- 4. 数据处理:添加 labels --- print("🔧 加载并处理数据...") def tokenize_fn(examples): instructions = examples["instruction"] inputs = examples["input"] outputs = examples["output"] texts = [ f"你是一个自动化助手。\n\n用户:{i}\n{s}\n\n助手:{o}" for i, s, o in zip(instructions, inputs, outputs) ] # ✅ tokenize batch = tokenizer(texts, truncation=True, max_length=256, padding=False) # ✅ 添加 labels batch["labels"] = [ tokenizer(o, truncation=True, max_length=256, padding=False)["input_ids"] for o in outputs ] return batch dataset = load_dataset('json', data_files=data_path, split='train') print(f"✅ 数据集加载完成,共 {len(dataset)} 条样本") tokenized_dataset = dataset.map( tokenize_fn, batched=True, remove_columns=dataset.column_names, num_proc=1 ) print("✅ 数据处理完成") # --- 5. 训练参数 --- training_args = TrainingArguments( output_dir=output_dir, per_device_train_batch_size=1, gradient_accumulation_steps=16, num_train_epochs=3, learning_rate=2e-4, logging_steps=1, save_steps=10, save_total_limit=2, fp16=True, bf16=False, remove_unused_columns=True, report_to="none", warmup_ratio=0.1, weight_decay=0.01, dataloader_num_workers=1, disable_tqdm=False, dataloader_pin_memory=True, max_grad_norm=1.0, ) # --- 6. Trainer --- trainer = Trainer( model=model, args=training_args, train_dataset=tokenized_dataset, ) # --- 7. 开始训练 --- print("🚀 开始训练...") trainer.train() # --- 8. 保存 --- print("💾 保存 LoRA 权重...") model.save_pretrained(output_dir) tokenizer.save_pretrained(output_dir) print(f"✅ 训练完成!LoRA 权重已保存到: {output_dir}")