File size: 8,108 Bytes
9c873e6
 
 
 
 
dbef3ef
9c873e6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
dbef3ef
9c873e6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
# inference.py
import os
os.environ['CUDA_LAUNCH_BLOCKING'] = "1"
import torch
from transformers import AutoTokenizer
from emotional_gemma import EmotionalLlamaModel, EMOTION_DIMENSIONS, EMOTION_DIMENSIONS_REFERENCE
from peft import PeftModel, PeftConfig

import torch.nn.functional as F


def generate_with_emotion(

    model,

    tokenizer,

    prompt: str,

    emotion_vector: list,

    max_new_tokens: int = 128,

    temperature: float = 0.7,

    top_k: int = 128,

    top_p: float = 0.95,

    do_sample: bool = True,

    device: str = "cuda" if torch.cuda.is_available() else "cpu",

    seed: int = None,

):
    """

    Generates text using the standard model.generate() method with an emotion vector.

    """
    print(f"Generation parameters: max_new_tokens={max_new_tokens}, temperature={temperature}, top_k={top_k}, top_p={top_p}, do_sample={do_sample}")
    if len(emotion_vector) != EMOTION_DIMENSIONS:
        raise ValueError(f"Emotion vector must have {EMOTION_DIMENSIONS} dimensions.")

    if seed is not None:
        torch.manual_seed(seed)
        if device == "cuda":
            torch.cuda.manual_seed_all(seed)

    current_model = model
    current_model.eval()
    current_model.to(device)

    inputs = tokenizer(prompt, return_tensors="pt").to(device)
    input_ids = inputs["input_ids"]
    # Emotion vector needs to be a tensor and moved to the correct device
    emotion_tensor = torch.tensor([emotion_vector], dtype=torch.float).to(device) # Shape [1, EMOTION_DIMENSIONS]

    with torch.no_grad():
        # Pass the emotion vector to the generate method
        generated_outputs = current_model.generate(
            input_ids=input_ids,
            attention_mask=inputs["attention_mask"],
            max_new_tokens=max_new_tokens,
            temperature=temperature,
            top_k=top_k,
            top_p=top_p,
            do_sample=do_sample,
            pad_token_id=tokenizer.eos_token_id,
            emotion_vector=emotion_tensor, # Pass the [1, EMOTION_DIMENSIONS] tensor
        )

    generated_text = tokenizer.decode(generated_outputs[0], skip_special_tokens=True)
    return generated_text

# --- Main block ---
if __name__ == "__main__":
    # Directory where the adapter weights and custom layer weights were saved
    model_path = "./emotional-gemma-output-4"

    # --- Load configuration ---
    # PEFT config should tell us the base model name
    try:
        config = PeftConfig.from_pretrained(model_path)
        model_name = config.base_model_name_or_path
        print(f"Inferred base model name from PEFT config: {model_name}")
    except Exception as e:
        print(f"Warning: Could not infer base model name from PeftConfig in {model_path}. Using default. Error: {e}")
        # Fallback if config loading fails
        model_name = "google/gemma-3-1b-it"

    tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=True, trust_remote_code=True)
    if tokenizer.pad_token is None:
        tokenizer.pad_token = tokenizer.eos_token
    tokenizer.padding_side = "right"

    # --- Load the base model ---
    # The base model needs to be the custom EmotionalLlamaModel
    print(f"Loading base model: {model_name}")
    base_model = EmotionalLlamaModel.from_pretrained(
        model_name,
        trust_remote_code=True,
    )
    print("Base model loaded.")

    # --- Load the PEFT model (adapter weights only) ---
    print(f"Loading PEFT adapter from: {model_path}")
    # This wraps the base_model with PEFT adapters
    model = PeftModel.from_pretrained(base_model, model_path)
    print(f"PEFT adapter loaded. Model type: {type(model)}")

    # --- Explicitly Load Custom Layer Weights ---
    # Load the state_dict for the custom layer from the saved file
    custom_weights_path = os.path.join(model_path, "emotion_proj_weights.pth")
    try:
        if os.path.exists(custom_weights_path):
            print(f"Loading custom emotion_proj_embed weights from: {custom_weights_path}")
            # Load the state dict, mapping to CPU first is safer before loading into model
            emotion_state_dict = torch.load(custom_weights_path, map_location="cpu")

            # Access the layer within the PeftModel's base_model
            # The custom layer is directly on the base model instance
            emotion_layer = model.base_model.emotion_proj_embed
            load_result = emotion_layer.load_state_dict(emotion_state_dict)
            print(f"Custom weights loaded successfully: {load_result}")
        else:
            print(f"WARNING: Custom weights file not found at {custom_weights_path}. Layer 'emotion_proj_embed' will have base model's initial weights.")

    except Exception as e:
        print(f"ERROR loading custom emotion_proj_embed weights from {custom_weights_path}: {e}")

    # Determine and move the model to the appropriate device
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print(f"Moving model to device: {device}")
    model.to(device)


    # --- Model Weight Checks (After Loading) ---
    print("\n--- Model Weight Checks (After Loading) ---")
    is_peft_model = isinstance(model, PeftModel)
    print(f"Is PeftModel: {is_peft_model}")

    print("  emotion_proj Layer Check:")
    try:
        # Access the custom layer via the base_model attribute of the PeftModel
        emotion_proj_layer = model.base_model.emotion_proj_embed
        print(f"    - emotion_proj_embed Sequential found: {emotion_proj_layer}")
        # Assuming the Sequential contains a Linear layer at index 0
        linear_layer = emotion_proj_layer[0]
        print(f"    - Linear layer inside Sequential: {linear_layer}")
        if hasattr(linear_layer, 'weight'):
             print(f"      Weights exist, device: {linear_layer.weight.device}, dtype: {linear_layer.weight.dtype}")
             print(f"      Weights mean abs value: {linear_layer.weight.data.abs().mean().item()}")
        else: print("      Weights attribute not found.")
        if hasattr(linear_layer, 'bias') and linear_layer.bias is not None:
             print(f"      Bias exist, device: {linear_layer.bias.device}, dtype: {linear_layer.bias.dtype}")
             print(f"      Bias mean abs value: {linear_layer.bias.data.abs().mean().item()}")
        else: print("      Bias attribute not found or is None.")
    except Exception as e: print(f"    - Error checking layer: {e}")

    # Check the device of one of the model parameters
    print(f"Model overall device: {next(model.parameters()).device}")

    # --- Generation ---
    # Prepare the prompt using the chat template
    prompt = tokenizer.apply_chat_template([
        {"role": "user", "content": "How are you?"},
    ], tokenize=False, add_generation_prompt=True)

    print(f"\nPrompt:\n{prompt}")

    # Define emotion vectors based on the reference dimensions
    # EMOTION_DIMENSIONS_REFERENCE is defined in emotional_gemma.py
    # Index mapping: 0=SADNESS_JOY, 1=FEAR_COURAGE, 2=DISGUST_ACCEPTANCE, 3=ANGER_CALMNESS,
    # 4=SURPRISE_EXPECTATION, 5=DISTRUST_TRUST, 6=BOREDOM_INTEREST, 7=INDIFFERENCE_EMPATHY
    joyful_emotion = [0.8, 0, 0, -0.5, 0, 0, 0, 0] # High Joy, some Calmness
    sad_emotion = [-0.8, 0, 0, -0.5, 0, 0, 0, 0]  # High Sadness, some Calmness
    neutral_emotion = [0] * EMOTION_DIMENSIONS # All dimensions at zero
    my_seed = 42 # Seed for reproducibility

    # Generate text with different emotions using the recommended method
    print("Generating with joyful emotion:")
    joyful_text = generate_with_emotion(model, tokenizer, prompt, joyful_emotion, seed=my_seed)
    print(joyful_text)

    print("\nGenerating with sad emotion:")
    sad_text = generate_with_emotion(model, tokenizer, prompt, sad_emotion, seed=my_seed)
    print(sad_text)

    print("\nGenerating with neutral emotion:")
    neutral_text = generate_with_emotion(model, tokenizer, prompt, neutral_emotion, seed=my_seed)
    print(neutral_text)