#!/usr/bin/env python3 """ Example usage for deepseek-tiny-mla-o-v0.1 """ import torch from transformers import DeepseekV3ForCausalLM, AutoTokenizer def main(): # Load model and tokenizer print("Loading model...") model = DeepseekV3ForCausalLM.from_pretrained("ChrisMcCormick/deepseek-tiny-mla-o-v0.1") tokenizer = AutoTokenizer.from_pretrained("ChrisMcCormick/deepseek-tiny-mla-o-v0.1") # Apply output subspace patch for MLA-o model print("Applying output subspace patch...") from patch_o_proj import patch_o_proj_implementation patch_o_proj_implementation( model=model, o_latent_dim=96, variant="sequential_norm" ) # Set to evaluation mode model.eval() # Example prompts prompts = [ "The future of artificial intelligence is", "In a world where technology advances rapidly,", "The most important discovery in science was", ] print("\nGenerating text...") for prompt in prompts: inputs = tokenizer(prompt, return_tensors="pt") with torch.no_grad(): outputs = model.generate( **inputs, max_length=50, temperature=0.7, do_sample=True, pad_token_id=tokenizer.eos_token_id ) generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True) print(f"Prompt: {prompt}") print(f"Generated: {generated_text}") print("-" * 50) if __name__ == "__main__": main()