|
|
|
import os
|
|
os.environ['CUDA_LAUNCH_BLOCKING'] = "1"
|
|
import torch
|
|
from transformers import AutoTokenizer
|
|
from emotional_gemma import EmotionalLlamaModel, EMOTION_DIMENSIONS, EMOTION_DIMENSIONS_REFERENCE
|
|
from peft import PeftModel, PeftConfig
|
|
|
|
import torch.nn.functional as F
|
|
|
|
|
|
def generate_with_emotion(
|
|
model,
|
|
tokenizer,
|
|
prompt: str,
|
|
emotion_vector: list,
|
|
max_new_tokens: int = 128,
|
|
temperature: float = 0.7,
|
|
top_k: int = 128,
|
|
top_p: float = 0.95,
|
|
do_sample: bool = True,
|
|
device: str = "cuda" if torch.cuda.is_available() else "cpu",
|
|
seed: int = None,
|
|
):
|
|
"""
|
|
Generates text using the standard model.generate() method with an emotion vector.
|
|
"""
|
|
print(f"Generation parameters: max_new_tokens={max_new_tokens}, temperature={temperature}, top_k={top_k}, top_p={top_p}, do_sample={do_sample}")
|
|
if len(emotion_vector) != EMOTION_DIMENSIONS:
|
|
raise ValueError(f"Emotion vector must have {EMOTION_DIMENSIONS} dimensions.")
|
|
|
|
if seed is not None:
|
|
torch.manual_seed(seed)
|
|
if device == "cuda":
|
|
torch.cuda.manual_seed_all(seed)
|
|
|
|
current_model = model
|
|
current_model.eval()
|
|
current_model.to(device)
|
|
|
|
inputs = tokenizer(prompt, return_tensors="pt").to(device)
|
|
input_ids = inputs["input_ids"]
|
|
|
|
emotion_tensor = torch.tensor([emotion_vector], dtype=torch.float).to(device)
|
|
|
|
with torch.no_grad():
|
|
|
|
generated_outputs = current_model.generate(
|
|
input_ids=input_ids,
|
|
attention_mask=inputs["attention_mask"],
|
|
max_new_tokens=max_new_tokens,
|
|
temperature=temperature,
|
|
top_k=top_k,
|
|
top_p=top_p,
|
|
do_sample=do_sample,
|
|
pad_token_id=tokenizer.eos_token_id,
|
|
emotion_vector=emotion_tensor,
|
|
)
|
|
|
|
generated_text = tokenizer.decode(generated_outputs[0], skip_special_tokens=True)
|
|
return generated_text
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
|
model_path = "./emotional-gemma-output-4"
|
|
|
|
|
|
|
|
try:
|
|
config = PeftConfig.from_pretrained(model_path)
|
|
model_name = config.base_model_name_or_path
|
|
print(f"Inferred base model name from PEFT config: {model_name}")
|
|
except Exception as e:
|
|
print(f"Warning: Could not infer base model name from PeftConfig in {model_path}. Using default. Error: {e}")
|
|
|
|
model_name = "google/gemma-3-1b-it"
|
|
|
|
tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=True, trust_remote_code=True)
|
|
if tokenizer.pad_token is None:
|
|
tokenizer.pad_token = tokenizer.eos_token
|
|
tokenizer.padding_side = "right"
|
|
|
|
|
|
|
|
print(f"Loading base model: {model_name}")
|
|
base_model = EmotionalLlamaModel.from_pretrained(
|
|
model_name,
|
|
trust_remote_code=True,
|
|
)
|
|
print("Base model loaded.")
|
|
|
|
|
|
print(f"Loading PEFT adapter from: {model_path}")
|
|
|
|
model = PeftModel.from_pretrained(base_model, model_path)
|
|
print(f"PEFT adapter loaded. Model type: {type(model)}")
|
|
|
|
|
|
|
|
custom_weights_path = os.path.join(model_path, "emotion_proj_weights.pth")
|
|
try:
|
|
if os.path.exists(custom_weights_path):
|
|
print(f"Loading custom emotion_proj_embed weights from: {custom_weights_path}")
|
|
|
|
emotion_state_dict = torch.load(custom_weights_path, map_location="cpu")
|
|
|
|
|
|
|
|
emotion_layer = model.base_model.emotion_proj_embed
|
|
load_result = emotion_layer.load_state_dict(emotion_state_dict)
|
|
print(f"Custom weights loaded successfully: {load_result}")
|
|
else:
|
|
print(f"WARNING: Custom weights file not found at {custom_weights_path}. Layer 'emotion_proj_embed' will have base model's initial weights.")
|
|
|
|
except Exception as e:
|
|
print(f"ERROR loading custom emotion_proj_embed weights from {custom_weights_path}: {e}")
|
|
|
|
|
|
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
|
print(f"Moving model to device: {device}")
|
|
model.to(device)
|
|
|
|
|
|
|
|
print("\n--- Model Weight Checks (After Loading) ---")
|
|
is_peft_model = isinstance(model, PeftModel)
|
|
print(f"Is PeftModel: {is_peft_model}")
|
|
|
|
print(" emotion_proj Layer Check:")
|
|
try:
|
|
|
|
emotion_proj_layer = model.base_model.emotion_proj_embed
|
|
print(f" - emotion_proj_embed Sequential found: {emotion_proj_layer}")
|
|
|
|
linear_layer = emotion_proj_layer[0]
|
|
print(f" - Linear layer inside Sequential: {linear_layer}")
|
|
if hasattr(linear_layer, 'weight'):
|
|
print(f" Weights exist, device: {linear_layer.weight.device}, dtype: {linear_layer.weight.dtype}")
|
|
print(f" Weights mean abs value: {linear_layer.weight.data.abs().mean().item()}")
|
|
else: print(" Weights attribute not found.")
|
|
if hasattr(linear_layer, 'bias') and linear_layer.bias is not None:
|
|
print(f" Bias exist, device: {linear_layer.bias.device}, dtype: {linear_layer.bias.dtype}")
|
|
print(f" Bias mean abs value: {linear_layer.bias.data.abs().mean().item()}")
|
|
else: print(" Bias attribute not found or is None.")
|
|
except Exception as e: print(f" - Error checking layer: {e}")
|
|
|
|
|
|
print(f"Model overall device: {next(model.parameters()).device}")
|
|
|
|
|
|
|
|
prompt = tokenizer.apply_chat_template([
|
|
{"role": "user", "content": "How are you?"},
|
|
], tokenize=False, add_generation_prompt=True)
|
|
|
|
print(f"\nPrompt:\n{prompt}")
|
|
|
|
|
|
|
|
|
|
|
|
joyful_emotion = [0.8, 0, 0, -0.5, 0, 0, 0, 0]
|
|
sad_emotion = [-0.8, 0, 0, -0.5, 0, 0, 0, 0]
|
|
neutral_emotion = [0] * EMOTION_DIMENSIONS
|
|
my_seed = 42
|
|
|
|
|
|
print("Generating with joyful emotion:")
|
|
joyful_text = generate_with_emotion(model, tokenizer, prompt, joyful_emotion, seed=my_seed)
|
|
print(joyful_text)
|
|
|
|
print("\nGenerating with sad emotion:")
|
|
sad_text = generate_with_emotion(model, tokenizer, prompt, sad_emotion, seed=my_seed)
|
|
print(sad_text)
|
|
|
|
print("\nGenerating with neutral emotion:")
|
|
neutral_text = generate_with_emotion(model, tokenizer, prompt, neutral_emotion, seed=my_seed)
|
|
print(neutral_text) |