from fastapi import FastAPI | |
import torch | |
from transformers import AutoModelForCausalLM, AutoTokenizer | |
app = FastAPI() | |
# Define model-related constants | |
MODEL_NAME = "arnir0/Tiny-LLM" | |
# Global variables to store the tokenizer and model | |
tokenizer = None | |
model = None | |
def greet_json(): | |
global tokenizer, model | |
# Load the model and tokenizer if not already loaded | |
if model is None or tokenizer is None: | |
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME) | |
model = AutoModelForCausalLM.from_pretrained(MODEL_NAME) | |
model.eval() # Set model to evaluation mode (optional for inference) | |
return {"Hello": "World!", "model_status": "Loaded and hibernated!"} | |