Spaces:

starnernj
/

Early-Christian-Church-Fathers

Paused

starnernj commited on Feb 23

Commit

fbb2b4a

verified ·

1 Parent(s): c53f6f7

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,4 +1,4 @@
-print("Before Import")
 import os
 import spaces
 import gradio as gr
@@ -9,7 +9,8 @@ from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
 from peft import PeftModel, PeftConfig
 import bitsandbytes
 import torch
-print("After Import")
 @spaces.GPU  # Forces GPU allocation before execution
 def force_gpu_allocation():
@@ -23,7 +24,8 @@ lora_model_name = "starnernj/Early-Christian-Church-Fathers-LLaMA-3.1-Fine-Tuned
 # Login because LLaMA 3.1 8B is a gated model
 login(token=os.getenv("HuggingFaceFineGrainedReadToken"))
-print("After login")
 # Enable 4-bit Quantization with BitsAndBytes
 quantization_config = BitsAndBytesConfig(
     load_in_4bit=True,  # ✅ Enables 4-bit quantization for memory efficiency
@@ -32,19 +34,24 @@ quantization_config = BitsAndBytesConfig(
     bnb_4bit_quant_type="nf4"  # ✅ Normalized Float-4 for better accuracy
 )
 base_model = AutoModelForCausalLM.from_pretrained(
     base_model_name,
     quantization_config=quantization_config,
     device_map="auto"
 )
-print("After basemodel")
 # Load tokenizer
 tokenizer = AutoTokenizer.from_pretrained(base_model_name)
-print("After Tokenizer")
 # Load LoRA Adapter
-print("Loading LoRA adapter...")
 model = PeftModel.from_pretrained(base_model, lora_model_name)
-print("After peft model")
 # Function to generate responses
 def chatbot_response(user_input):
     try:

+print("Beginning import")
 import os
 import spaces
 import gradio as gr
 from peft import PeftModel, PeftConfig
 import bitsandbytes
 import torch
+print("Imports completed")
 @spaces.GPU  # Forces GPU allocation before execution
 def force_gpu_allocation():
 # Login because LLaMA 3.1 8B is a gated model
 login(token=os.getenv("HuggingFaceFineGrainedReadToken"))
+print("Login to Huggin Face successful")
 # Enable 4-bit Quantization with BitsAndBytes
 quantization_config = BitsAndBytesConfig(
     load_in_4bit=True,  # ✅ Enables 4-bit quantization for memory efficiency
     bnb_4bit_quant_type="nf4"  # ✅ Normalized Float-4 for better accuracy
 )
+print("Loading base model")
 base_model = AutoModelForCausalLM.from_pretrained(
     base_model_name,
     quantization_config=quantization_config,
     device_map="auto"
 )
+print("Basemodel loaded successfully")
 # Load tokenizer
+print("Loading tokenizer")
 tokenizer = AutoTokenizer.from_pretrained(base_model_name)
+print("Tokenizer loaded successfully")
 # Load LoRA Adapter
+print("Loading Peft LoRA adapter...")
 model = PeftModel.from_pretrained(base_model, lora_model_name)
+print("Peft LoRA model loaded successfully")
 # Function to generate responses
 def chatbot_response(user_input):
     try: