from fastapi import FastAPI import torch from transformers import AutoModelForCausalLM, AutoTokenizer app = FastAPI() # Define model-related constants MODEL_NAME = "arnir0/Tiny-LLM" # Global variables to store the tokenizer and model tokenizer = None model = None @app.get("/") def greet_json(): global tokenizer, model # Load the model and tokenizer if not already loaded if model is None or tokenizer is None: tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME) model = AutoModelForCausalLM.from_pretrained(MODEL_NAME) model.eval() # Set model to evaluation mode (optional for inference) return {"Hello": "World!", "model_status": "Loaded and hibernated!"}