print("Before Import")
import os
import spaces
import gradio as gr
from huggingface_hub import InferenceClient, login
import time
import traceback
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
from peft import PeftModel, PeftConfig
import bitsandbytes
import torch
print("After Import")

@spaces.GPU  # Forces GPU allocation before execution
def force_gpu_allocation():
    pass  # Dummy function to trigger GPU setup

# Base model (LLaMA 3.1 8B) from Meta
base_model_name = "meta-llama/Llama-3.1-8B"

# Your fine-tuned LoRA adapter (uploaded to Hugging Face)
lora_model_name = "starnernj/Early-Christian-Church-Fathers-LLaMA-3.1-Fine-Tuned"