ARGO / app.py
afnan k salal
Add application file
344585f
raw
history blame
709 Bytes
from fastapi import FastAPI
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
app = FastAPI()
# Define model-related constants
MODEL_NAME = "arnir0/Tiny-LLM"
# Global variables to store the tokenizer and model
tokenizer = None
model = None
@app.get("/")
def greet_json():
global tokenizer, model
# Load the model and tokenizer if not already loaded
if model is None or tokenizer is None:
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
model = AutoModelForCausalLM.from_pretrained(MODEL_NAME)
model.eval() # Set model to evaluation mode (optional for inference)
return {"Hello": "World!", "model_status": "Loaded and hibernated!"}