Spaces:

ivxxdegen
/

new_mibera_train

Runtime error

App Files Files Community

ivxxdegen commited on Feb 3

Commit

969e90e

1 Parent(s): 37078d1

requirements added

Browse files

Files changed (1) hide show

app.py +19 -13

app.py CHANGED Viewed

@@ -3,7 +3,7 @@ import shutil
 import pandas as pd
 from datasets import Dataset
-# Disable hf_transfer and set CUDA allocation configuration to help with fragmentation
 os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "0"
 os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "max_split_size_mb:32"
@@ -14,7 +14,6 @@ from transformers.models.auto.configuration_auto import CONFIG_MAPPING
 class Phi3Config(PretrainedConfig):
     model_type = "phi3"
-# Register our dummy config class for "phi3"
 CONFIG_MAPPING["phi3"] = Phi3Config
 # --- Standard imports ---
@@ -44,7 +43,7 @@ if os.path.exists(json_cache_dir):
     shutil.rmtree(json_cache_dir)
 # --- Define paths ---
-dataset_path = 'datasets/finetune_dataset_ready.jsonl'  # Path to your merged JSONL file
 model_name = "microsoft/phi-4"
 HF_REPO = "ivxxdegen/mibera-v1-merged"
@@ -73,17 +72,24 @@ model = AutoModelForCausalLM.from_pretrained(
     device_map="auto",         # Automatically map layers between GPU and CPU
     max_memory=max_memory,
     offload_folder=offload_folder,
-    low_cpu_mem_usage=True
 )
 torch.cuda.empty_cache()
 model.gradient_checkpointing_enable()
 # --- Integrate PEFT (LoRA) ---
-# Inspect your model's modules (run a snippet if needed) to determine the correct target modules.
-# Based on your inspection, it seems that "qkv_proj" is available. Update if necessary.
 lora_config = LoraConfig(
-    r=16,                  # LoRA rank
-    lora_alpha=32,         # Scaling factor
     target_modules=["qkv_proj"],  # Update this list based on your model inspection
     lora_dropout=0.1,
     bias="none"
@@ -113,18 +119,18 @@ def add_labels(batch):
 print("🛠 Adding labels to train dataset...")
 tokenized_train = tokenized_train.map(add_labels, batched=True)
 print("🛠 Adding labels to eval dataset...")
-tokenized_eval = tokenized_eval.map(add_labels, batched=True)
 # --- Set training arguments with memory-saving parameters ---
 training_args = TrainingArguments(
     output_dir=output_dir,
-    evaluation_strategy="epoch",  # (Deprecated: use eval_strategy in future versions)
     logging_dir="./logs",
     logging_steps=500,
     num_train_epochs=3,
-    per_device_train_batch_size=1,      # Very low batch size to reduce memory usage
-    gradient_accumulation_steps=8,      # Accumulate gradients to simulate a larger effective batch size
-    fp16=True,                          # Enable mixed precision training
 )
 # --- Initialize Trainer ---

 import pandas as pd
 from datasets import Dataset
+# Disable hf_transfer and set CUDA allocation configuration
 os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "0"
 os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "max_split_size_mb:32"
 class Phi3Config(PretrainedConfig):
     model_type = "phi3"
 CONFIG_MAPPING["phi3"] = Phi3Config
 # --- Standard imports ---
     shutil.rmtree(json_cache_dir)
 # --- Define paths ---
+dataset_path = 'datasets/finetune_dataset_ready.jsonl'  # Your merged dataset file
 model_name = "microsoft/phi-4"
 HF_REPO = "ivxxdegen/mibera-v1-merged"
     device_map="auto",         # Automatically map layers between GPU and CPU
     max_memory=max_memory,
     offload_folder=offload_folder,
+    low_cpu_mem_usage=True,
+    offload_state_dict=True    # Offload state dict from meta
 )
 torch.cuda.empty_cache()
 model.gradient_checkpointing_enable()
+# --- Force materialize all parameters by re-loading state dict ---
+print("Materializing model parameters...")
+state = model.state_dict()
+model.load_state_dict(state)
+print("Model parameters are fully materialized.")
 # --- Integrate PEFT (LoRA) ---
+# Inspect your model's modules to determine the right target modules.
+# Based on your previous inspection, use "qkv_proj" if that's the correct layer.
 lora_config = LoraConfig(
+    r=16,
+    lora_alpha=32,
     target_modules=["qkv_proj"],  # Update this list based on your model inspection
     lora_dropout=0.1,
     bias="none"
 print("🛠 Adding labels to train dataset...")
 tokenized_train = tokenized_train.map(add_labels, batched=True)
 print("🛠 Adding labels to eval dataset...")
+tokenized_eval = eval_dataset.map(add_labels, batched=True)
 # --- Set training arguments with memory-saving parameters ---
 training_args = TrainingArguments(
     output_dir=output_dir,
+    evaluation_strategy="epoch",  # (Deprecated: use eval_strategy in future)
     logging_dir="./logs",
     logging_steps=500,
     num_train_epochs=3,
+    per_device_train_batch_size=1,
+    gradient_accumulation_steps=8,
+    fp16=True,
 )
 # --- Initialize Trainer ---