Spaces:

AbstractPhil
/

meet-beeper

Running on Zero

App Files Files Community

AbstractPhil commited on Aug 18

Commit

c84b8a9

verified ·

1 Parent(s): 99d979b

Create load_for_inference.py

Browse files

Files changed (1) hide show

load_for_inference.py +241 -0

load_for_inference.py ADDED Viewed

	@@ -0,0 +1,241 @@

+"""
+Rose Beeper Model - Inference Example
+Simple script showing how to load and use the model for text generation
+"""
+import torch
+from tokenizers import Tokenizer
+from huggingface_hub import hf_hub_download
+# Import the extracted components (assuming they're in a module called 'beeper_inference')
+# from beeper_inference import BeeperRoseGPT, BeeperIO, generate, get_default_config
+def load_model_for_inference(
+    checkpoint_path: str = None,
+    tokenizer_path: str = "beeper.tokenizer.json",
+    hf_repo: str = "AbstractPhil/beeper-rose-v5",
+    device: str = "cuda"
+):
+    """
+    Load the Rose Beeper model for inference.
+    Args:
+        checkpoint_path: Path to local checkpoint file (.pt or .safetensors)
+        tokenizer_path: Path to tokenizer file
+        hf_repo: HuggingFace repository to download from if no local checkpoint
+        device: Device to load model on ("cuda" or "cpu")
+    Returns:
+        Tuple of (model, tokenizer, config)
+    """
+    # Get default configuration
+    config = get_default_config()
+    # Set device
+    device = torch.device(device if torch.cuda.is_available() else "cpu")
+    # Initialize model
+    model = BeeperRoseGPT(config).to(device)
+    # Initialize pentachora banks
+    # These are the default sizes from the training configuration
+    cap_cfg = config.get("capoera", {})
+    coarse_C = 20  # Approximate number of alive datasets
+    model.ensure_pentachora(
+        coarse_C=coarse_C,
+        medium_C=int(cap_cfg.get("topic_bins", 512)),
+        fine_C=int(cap_cfg.get("mood_bins", 7)),
+        dim=config["dim"],
+        device=device
+    )
+    # Load checkpoint
+    loaded = False
+    # Try loading from local path
+    if checkpoint_path and os.path.exists(checkpoint_path):
+        print(f"Loading model from: {checkpoint_path}")
+        missing, unexpected = BeeperIO.load_into_model(
+            model, checkpoint_path, map_location="cpu", strict=False
+        )
+        print(f"Loaded | missing={len(missing)} unexpected={len(unexpected)}")
+        loaded = True
+    # Try downloading from HuggingFace
+    if not loaded and hf_repo:
+        try:
+            print(f"Downloading model from HuggingFace: {hf_repo}")
+            path = hf_hub_download(repo_id=hf_repo, filename="beeper_final.safetensors")
+            missing, unexpected = BeeperIO.load_into_model(
+                model, path, map_location="cpu", strict=False
+            )
+            print(f"Loaded | missing={len(missing)} unexpected={len(unexpected)}")
+            loaded = True
+        except Exception as e:
+            print(f"Failed to download from HuggingFace: {e}")
+    if not loaded:
+        print("WARNING: No weights loaded, using random initialization!")
+    # Load tokenizer
+    if os.path.exists(tokenizer_path):
+        tok = Tokenizer.from_file(tokenizer_path)
+        print(f"Loaded tokenizer from: {tokenizer_path}")
+    else:
+        # Try downloading tokenizer from HF
+        try:
+            tok_path = hf_hub_download(repo_id=hf_repo, filename="tokenizer.json")
+            tok = Tokenizer.from_file(tok_path)
+            print(f"Downloaded tokenizer from HuggingFace")
+        except Exception as e:
+            raise RuntimeError(f"Could not load tokenizer: {e}")
+    # Set model to eval mode
+    model.eval()
+    return model, tok, config
+def interactive_generation(model, tokenizer, config, device="cuda"):
+    """
+    Interactive text generation loop.
+    Args:
+        model: The loaded BeeperRoseGPT model
+        tokenizer: The tokenizer
+        config: Model configuration
+        device: Device to run on
+    """
+    device = torch.device(device if torch.cuda.is_available() else "cpu")
+    model = model.to(device)
+    print("\n=== Rose Beeper Interactive Generation ===")
+    print("Enter your prompt (or 'quit' to exit)")
+    print("Commands: /temp <value>, /top_k <value>, /top_p <value>, /max <tokens>")
+    print("-" * 50)
+    # Generation settings (can be modified)
+    settings = {
+        "max_new_tokens": 100,
+        "temperature": config["temperature"],
+        "top_k": config["top_k"],
+        "top_p": config["top_p"],
+        "repetition_penalty": config["repetition_penalty"],
+        "presence_penalty": config["presence_penalty"],
+        "frequency_penalty": config["frequency_penalty"],
+    }
+    while True:
+        prompt = input("\nPrompt: ").strip()
+        if prompt.lower() == 'quit':
+            break
+        # Handle commands
+        if prompt.startswith('/'):
+            parts = prompt.split()
+            cmd = parts[0].lower()
+            if cmd == '/temp' and len(parts) > 1:
+                settings["temperature"] = float(parts[1])
+                print(f"Temperature set to {settings['temperature']}")
+                continue
+            elif cmd == '/top_k' and len(parts) > 1:
+                settings["top_k"] = int(parts[1])
+                print(f"Top-k set to {settings['top_k']}")
+                continue
+            elif cmd == '/top_p' and len(parts) > 1:
+                settings["top_p"] = float(parts[1])
+                print(f"Top-p set to {settings['top_p']}")
+                continue
+            elif cmd == '/max' and len(parts) > 1:
+                settings["max_new_tokens"] = int(parts[1])
+                print(f"Max tokens set to {settings['max_new_tokens']}")
+                continue
+            else:
+                print("Unknown command")
+                continue
+        if not prompt:
+            continue
+        # Generate text
+        print("\nGenerating...")
+        output = generate(
+            model=model,
+            tok=tokenizer,
+            cfg=config,
+            prompt=prompt,
+            device=device,
+            **settings
+        )
+        print("\nOutput:", output)
+        print("-" * 50)
+def batch_generation_example(model, tokenizer, config, device="cuda"):
+    """
+    Example of batch generation with different settings.
+    """
+    device = torch.device(device if torch.cuda.is_available() else "cpu")
+    model = model.to(device)
+    prompts = [
+        "The robot went to school and",
+        "Once upon a time in a magical forest",
+        "The scientist discovered that",
+        "In the year 2050, humanity",
+        "The philosophy of mind suggests",
+    ]
+    print("\n=== Batch Generation Examples ===\n")
+    for prompt in prompts:
+        print(f"Prompt: {prompt}")
+        # Generate with different temperatures
+        for temp in [0.5, 0.9, 1.2]:
+            output = generate(
+                model=model,
+                tok=tokenizer,
+                cfg=config,
+                prompt=prompt,
+                max_new_tokens=50,
+                temperature=temp,
+                device=device
+            )
+            print(f"  Temp {temp}: {output}")
+        print("-" * 50)
+# Main execution example
+if __name__ == "__main__":
+    import os
+    # Load model
+    model, tokenizer, config = load_model_for_inference(
+        checkpoint_path=None,  # Will download from HF
+        hf_repo="AbstractPhil/beeper-rose-v5",
+        device="cuda"
+    )
+    # Example: Single generation
+    print("\n=== Single Generation Example ===")
+    output = generate(
+        model=model,
+        tok=tokenizer,
+        cfg=config,
+        prompt="The meaning of life is",
+        max_new_tokens=100,
+        temperature=0.9,
+        device="cuda"
+    )
+    print(f"Output: {output}")
+    # Example: Batch generation with different settings
+    # batch_generation_example(model, tokenizer, config)
+    # Example: Interactive generation
+    # interactive_generation(model, tokenizer, config)