print("Before Import") import os import spaces import gradio as gr from huggingface_hub import InferenceClient, login import time import traceback from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig from peft import PeftModel, PeftConfig import bitsandbytes import torch print("After Import") @spaces.GPU # Forces GPU allocation before execution def force_gpu_allocation(): pass # Dummy function to trigger GPU setup # Base model (LLaMA 3.1 8B) from Meta base_model_name = "meta-llama/Llama-3.1-8B" # Your fine-tuned LoRA adapter (uploaded to Hugging Face) lora_model_name = "starnernj/Early-Christian-Church-Fathers-LLaMA-3.1-Fine-Tuned"