Upload 2 files

Browse files

class definition and inference scripts

Files changed (2) hide show

emotional_gemma.py +143 -0
inference.py +179 -0

emotional_gemma.py ADDED Viewed

	@@ -0,0 +1,143 @@

+# emotional_gemma.py
+import torch
+import torch.nn as nn
+from transformers import Gemma3ForCausalLM
+from transformers.modeling_outputs import CausalLMOutputWithPast
+from typing import Optional, Union
+# Constants
+MODEL_NAME = "google/gemma-3-1b-it"
+EMOTION_DIMENSIONS = 8
+EMOTION_DIMENSIONS_REFERENCE = [
+    "SADNESS_JOY", "FEAR_COURAGE", "DISGUST_ACCEPTANCE", "ANGER_CALMNESS",
+    "SURPRISE_EXPECTATION", "DISTRUST_TRUST", "BOREDOM_INTEREST", "INDIFFERENCE_EMPATHY"
+]
+class EmotionalLlamaModel(Gemma3ForCausalLM):
+    """Gemma3 Causal Language Model with emotion modulation."""
+    def __init__(self, config):
+        super().__init__(config)
+        self.emotion_dim = EMOTION_DIMENSIONS
+        # Emotion projection layer: MLP
+        # This layer projects the emotion vector to the hidden size of the model.
+        intermediate_size = config.hidden_size // 2
+        self.emotion_proj_embed = nn.Sequential(
+            nn.Linear(self.emotion_dim, intermediate_size),
+            nn.LayerNorm(intermediate_size),
+            nn.GELU(),
+            nn.Linear(intermediate_size, config.hidden_size),
+        )
+        # Initialization for the MLP weights
+        def init_weights(m):
+            if isinstance(m, nn.Linear):
+                torch.nn.init.xavier_uniform_(m.weight)
+                if m.bias is not None:
+                    torch.nn.init.zeros_(m.bias)
+        self.emotion_proj_embed.apply(init_weights)
+        # Post-initialization steps from the base class
+        self.post_init()
+    def forward(
+        self,
+        input_ids: Optional[torch.LongTensor] = None,
+        attention_mask: Optional[torch.Tensor] = None,
+        position_ids: Optional[torch.LongTensor] = None,
+        past_key_values: Optional[list] = None,
+        inputs_embeds: Optional[torch.FloatTensor] = None,
+        labels: Optional[torch.LongTensor] = None,
+        use_cache: Optional[bool] = None,
+        output_attentions: Optional[bool] = None,
+        output_hidden_states: Optional[bool] = None,
+        return_dict: Optional[bool] = None,
+        emotion_vector: Optional[torch.FloatTensor] = None,
+        **kwargs,
+    ) -> Union[tuple, CausalLMOutputWithPast]:
+        # 1. Prepare Input Embeddings
+        # Get input embeddings from input_ids or use provided inputs_embeds
+        if input_ids is not None and inputs_embeds is not None:
+            raise ValueError("You cannot specify both input_ids and inputs_embeds")
+        elif input_ids is not None:
+            batch_size, seq_len = input_ids.shape
+            inputs_embeds = self.model.embed_tokens(input_ids)
+        elif inputs_embeds is not None:
+            batch_size, seq_len = inputs_embeds.shape[:2]
+        else:
+             # If neither is provided, it's likely a generation step using only cache.
+             # The base model's forward handles this by looking up the single new token ID.
+             # We will rely on the base model forward to handle this case and potentially
+             # receive `inputs_embeds` as `kwargs`.
+             pass # Standard generate handle embedding lookup for subsequent tokens
+        # 2. Apply Emotion Modulation to Embeddings
+        # If emotion_vector is provided and we have inputs_embeds, modulate the embeddings
+        if emotion_vector is not None and inputs_embeds is not None:
+            if emotion_vector.shape[0] != batch_size:
+                raise ValueError("Batch size mismatch between emotion_vector and input.")
+            # Ensure emotion_vector shape is [batch, seq_len, emotion_dim]
+            # This handles the case where a single emotion vector [batch, emotion_dim]
+            # is provided for the entire sequence during inference.
+            current_seq_len = inputs_embeds.shape[1]
+            if emotion_vector.dim() == 2:
+                emotion_vector = emotion_vector.unsqueeze(1).expand(-1, current_seq_len, -1)
+            elif emotion_vector.shape[1] != current_seq_len:
+                # This case might occur if the emotion vector is longer than the current
+                # input chunk (e.g., during token-by-token generation after prompt).
+                # We take the slice corresponding to the current input.
+                 emotion_vector = emotion_vector[:, :current_seq_len, :]
+            # Project emotion vector to hidden size using the emotion projection layer
+            emotion_offset = self.emotion_proj_embed(emotion_vector) # -> [batch, current_seq_len, hidden_size]
+            # Add the projected emotion vector as an offset to the input embeddings
+            # Scaling factor (e.g., 3) can be adjusted during training
+            inputs_embeds = inputs_embeds + emotion_offset * 3
+        # 3. Pass embeddings (potentially modified) to the base model's core layers
+        # Crucially, pass inputs_embeds if they were modified, otherwise input_ids
+        # (though the base forward handles input_ids -> inputs_embeds)
+        outputs = self.model(
+            input_ids=input_ids if inputs_embeds is None else None, # Pass input_ids ONLY if inputs_embeds wasn't created/modified
+            attention_mask=attention_mask,
+            position_ids=position_ids,
+            past_key_values=past_key_values,
+            inputs_embeds=inputs_embeds, # Always pass the potentially modified inputs_embeds
+            use_cache=use_cache,
+            output_attentions=output_attentions,
+            output_hidden_states=True, # Need last hidden state for lm_head
+            return_dict=True,
+            **kwargs
+        )
+        # 4. Compute logits from the final hidden state
+        hidden_states = outputs.hidden_states[-1]
+        # Apply the language model head to get logits
+        logits = self.lm_head(hidden_states)
+        # 5. Compute loss if labels are provided
+        loss = None
+        if labels is not None:
+            # Shift tokens for autoregressive training
+            shift_logits = logits[..., :-1, :].contiguous()
+            shift_labels = labels[..., 1:].contiguous()
+            loss_fct = nn.CrossEntropyLoss()
+            loss = loss_fct(shift_logits.view(-1, self.config.vocab_size), shift_labels.view(-1))
+        # Return the CausalLMOutputWithPast object
+        return CausalLMOutputWithPast(
+            loss=loss,
+            logits=logits,
+            past_key_values=outputs.past_key_values,
+            hidden_states=outputs.hidden_states, # Optionally keep all hidden states
+            attentions=outputs.attentions, # Optionally keep attentions
+        )
+# This file only contains the model definition and constants.
+# Training and inference logic are handled in separate files.

inference.py ADDED Viewed

	@@ -0,0 +1,179 @@

+# inference.py
+import os
+os.environ['CUDA_LAUNCH_BLOCKING'] = "1"
+import torch
+from transformers import AutoTokenizer
+from emotional_gemma_clean import EmotionalLlamaModel, EMOTION_DIMENSIONS, EMOTION_DIMENSIONS_REFERENCE
+from peft import PeftModel, PeftConfig
+import torch.nn.functional as F
+def generate_with_emotion(
+    model,
+    tokenizer,
+    prompt: str,
+    emotion_vector: list,
+    max_new_tokens: int = 128,
+    temperature: float = 0.7,
+    top_k: int = 128,
+    top_p: float = 0.95,
+    do_sample: bool = True,
+    device: str = "cuda" if torch.cuda.is_available() else "cpu",
+    seed: int = None,
+):
+    """
+    Generates text using the standard model.generate() method with an emotion vector.
+    """
+    print(f"Generation parameters: max_new_tokens={max_new_tokens}, temperature={temperature}, top_k={top_k}, top_p={top_p}, do_sample={do_sample}")
+    if len(emotion_vector) != EMOTION_DIMENSIONS:
+        raise ValueError(f"Emotion vector must have {EMOTION_DIMENSIONS} dimensions.")
+    if seed is not None:
+        torch.manual_seed(seed)
+        if device == "cuda":
+            torch.cuda.manual_seed_all(seed)
+    current_model = model
+    current_model.eval()
+    current_model.to(device)
+    inputs = tokenizer(prompt, return_tensors="pt").to(device)
+    input_ids = inputs["input_ids"]
+    # Emotion vector needs to be a tensor and moved to the correct device
+    emotion_tensor = torch.tensor([emotion_vector], dtype=torch.float).to(device) # Shape [1, EMOTION_DIMENSIONS]
+    with torch.no_grad():
+        # Pass the emotion vector to the generate method
+        generated_outputs = current_model.generate(
+            input_ids=input_ids,
+            attention_mask=inputs["attention_mask"],
+            max_new_tokens=max_new_tokens,
+            temperature=temperature,
+            top_k=top_k,
+            top_p=top_p,
+            do_sample=do_sample,
+            pad_token_id=tokenizer.eos_token_id,
+            emotion_vector=emotion_tensor, # Pass the [1, EMOTION_DIMENSIONS] tensor
+        )
+    generated_text = tokenizer.decode(generated_outputs[0], skip_special_tokens=True)
+    return generated_text
+# --- Main block ---
+if __name__ == "__main__":
+    # Directory where the adapter weights and custom layer weights were saved
+    model_path = "./emotional-gemma-output-4"
+    # --- Load configuration ---
+    # PEFT config should tell us the base model name
+    try:
+        config = PeftConfig.from_pretrained(model_path)
+        model_name = config.base_model_name_or_path
+        print(f"Inferred base model name from PEFT config: {model_name}")
+    except Exception as e:
+        print(f"Warning: Could not infer base model name from PeftConfig in {model_path}. Using default. Error: {e}")
+        # Fallback if config loading fails
+        model_name = "google/gemma-3-1b-it"
+    tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=True, trust_remote_code=True)
+    if tokenizer.pad_token is None:
+        tokenizer.pad_token = tokenizer.eos_token
+    tokenizer.padding_side = "right"
+    # --- Load the base model ---
+    # The base model needs to be the custom EmotionalLlamaModel
+    print(f"Loading base model: {model_name}")
+    base_model = EmotionalLlamaModel.from_pretrained(
+        model_name,
+        trust_remote_code=True,
+    )
+    print("Base model loaded.")
+    # --- Load the PEFT model (adapter weights only) ---
+    print(f"Loading PEFT adapter from: {model_path}")
+    # This wraps the base_model with PEFT adapters
+    model = PeftModel.from_pretrained(base_model, model_path)
+    print(f"PEFT adapter loaded. Model type: {type(model)}")
+    # --- Explicitly Load Custom Layer Weights ---
+    # Load the state_dict for the custom layer from the saved file
+    custom_weights_path = os.path.join(model_path, "emotion_proj_weights.pth")
+    try:
+        if os.path.exists(custom_weights_path):
+            print(f"Loading custom emotion_proj_embed weights from: {custom_weights_path}")
+            # Load the state dict, mapping to CPU first is safer before loading into model
+            emotion_state_dict = torch.load(custom_weights_path, map_location="cpu")
+            # Access the layer within the PeftModel's base_model
+            # The custom layer is directly on the base model instance
+            emotion_layer = model.base_model.emotion_proj_embed
+            load_result = emotion_layer.load_state_dict(emotion_state_dict)
+            print(f"Custom weights loaded successfully: {load_result}")
+        else:
+            print(f"WARNING: Custom weights file not found at {custom_weights_path}. Layer 'emotion_proj_embed' will have base model's initial weights.")
+    except Exception as e:
+        print(f"ERROR loading custom emotion_proj_embed weights from {custom_weights_path}: {e}")
+    # Determine and move the model to the appropriate device
+    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+    print(f"Moving model to device: {device}")
+    model.to(device)
+    # --- Model Weight Checks (After Loading) ---
+    print("\n--- Model Weight Checks (After Loading) ---")
+    is_peft_model = isinstance(model, PeftModel)
+    print(f"Is PeftModel: {is_peft_model}")
+    print("  emotion_proj Layer Check:")
+    try:
+        # Access the custom layer via the base_model attribute of the PeftModel
+        emotion_proj_layer = model.base_model.emotion_proj_embed
+        print(f"    - emotion_proj_embed Sequential found: {emotion_proj_layer}")
+        # Assuming the Sequential contains a Linear layer at index 0
+        linear_layer = emotion_proj_layer[0]
+        print(f"    - Linear layer inside Sequential: {linear_layer}")
+        if hasattr(linear_layer, 'weight'):
+             print(f"      Weights exist, device: {linear_layer.weight.device}, dtype: {linear_layer.weight.dtype}")
+             print(f"      Weights mean abs value: {linear_layer.weight.data.abs().mean().item()}")
+        else: print("      Weights attribute not found.")
+        if hasattr(linear_layer, 'bias') and linear_layer.bias is not None:
+             print(f"      Bias exist, device: {linear_layer.bias.device}, dtype: {linear_layer.bias.dtype}")
+             print(f"      Bias mean abs value: {linear_layer.bias.data.abs().mean().item()}")
+        else: print("      Bias attribute not found or is None.")
+    except Exception as e: print(f"    - Error checking layer: {e}")
+    # Check the device of one of the model parameters
+    print(f"Model overall device: {next(model.parameters()).device}")
+    # --- Generation ---
+    # Prepare the prompt using the chat template
+    prompt = tokenizer.apply_chat_template([
+        {"role": "user", "content": "How are you feeling today?"},
+    ], tokenize=False, add_generation_prompt=True)
+    print(f"\nPrompt:\n{prompt}")
+    # Define emotion vectors based on the reference dimensions
+    # EMOTION_DIMENSIONS_REFERENCE is defined in emotional_gemma.py
+    # Index mapping: 0=SADNESS_JOY, 1=FEAR_COURAGE, 2=DISGUST_ACCEPTANCE, 3=ANGER_CALMNESS,
+    # 4=SURPRISE_EXPECTATION, 5=DISTRUST_TRUST, 6=BOREDOM_INTEREST, 7=INDIFFERENCE_EMPATHY
+    joyful_emotion = [0.8, 0, 0, -0.5, 0, 0, 0, 0] # High Joy, some Calmness
+    sad_emotion = [-0.8, 0, 0, -0.5, 0, 0, 0, 0]  # High Sadness, some Calmness
+    neutral_emotion = [0] * EMOTION_DIMENSIONS # All dimensions at zero
+    my_seed = 42 # Seed for reproducibility
+    # Generate text with different emotions using the recommended method
+    print("Generating with joyful emotion:")
+    joyful_text = generate_with_emotion(model, tokenizer, prompt, joyful_emotion, seed=my_seed)
+    print(joyful_text)
+    print("\nGenerating with sad emotion:")
+    sad_text = generate_with_emotion(model, tokenizer, prompt, sad_emotion, seed=my_seed)
+    print(sad_text)
+    print("\nGenerating with neutral emotion:")
+    neutral_text = generate_with_emotion(model, tokenizer, prompt, neutral_emotion, seed=my_seed)
+    print(neutral_text)