print("Before Import") | |
import os | |
import spaces | |
import gradio as gr | |
from huggingface_hub import InferenceClient, login | |
import time | |
import traceback | |
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig | |
from peft import PeftModel, PeftConfig | |
import bitsandbytes | |
import torch | |
print("After Import") | |
# Forces GPU allocation before execution | |
def force_gpu_allocation(): | |
pass # Dummy function to trigger GPU setup | |
# Base model (LLaMA 3.1 8B) from Meta | |
base_model_name = "meta-llama/Llama-3.1-8B" | |
# Your fine-tuned LoRA adapter (uploaded to Hugging Face) | |
lora_model_name = "starnernj/Early-Christian-Church-Fathers-LLaMA-3.1-Fine-Tuned" | |