Spaces:

starnernj
/

Early-Christian-Church-Fathers

Paused

starnernj commited on Feb 23

Commit

5ebcf6b

verified ·

1 Parent(s): 217a473

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,3 +1,4 @@
 import os
 import spaces
 import gradio as gr
@@ -8,7 +9,7 @@ from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
 from peft import PeftModel, PeftConfig
 import bitsandbytes
 import torch
 @spaces.GPU  # Forces GPU allocation before execution
 def force_gpu_allocation():
@@ -22,7 +23,7 @@ lora_model_name = "starnernj/Early-Christian-Church-Fathers-LLaMA-3.1-Fine-Tuned
 # Login because LLaMA 3.1 8B is a gated model
 login(token=os.getenv("HuggingFaceFineGrainedReadToken"))
 # Enable 4-bit Quantization with BitsAndBytes
 quantization_config = BitsAndBytesConfig(
     load_in_4bit=True,  # ✅ Enables 4-bit quantization for memory efficiency
@@ -36,14 +37,14 @@ base_model = AutoModelForCausalLM.from_pretrained(
     quantization_config=quantization_config,
     device_map="auto"
 )
 # Load tokenizer
 tokenizer = AutoTokenizer.from_pretrained(base_model_name)
 # Load LoRA Adapter
 print("Loading LoRA adapter...")
 model = PeftModel.from_pretrained(base_model, lora_model_name)
 # Function to generate responses
 def chatbot_response(user_input):
     try:

+print("Before Import")
 import os
 import spaces
 import gradio as gr
 from peft import PeftModel, PeftConfig
 import bitsandbytes
 import torch
+print("After Import")
 @spaces.GPU  # Forces GPU allocation before execution
 def force_gpu_allocation():
 # Login because LLaMA 3.1 8B is a gated model
 login(token=os.getenv("HuggingFaceFineGrainedReadToken"))
+print("After login")
 # Enable 4-bit Quantization with BitsAndBytes
 quantization_config = BitsAndBytesConfig(
     load_in_4bit=True,  # ✅ Enables 4-bit quantization for memory efficiency
     quantization_config=quantization_config,
     device_map="auto"
 )
+print("After basemodel")
 # Load tokenizer
 tokenizer = AutoTokenizer.from_pretrained(base_model_name)
+print("After Tokenizer")
 # Load LoRA Adapter
 print("Loading LoRA adapter...")
 model = PeftModel.from_pretrained(base_model, lora_model_name)
+print("After peft model")
 # Function to generate responses
 def chatbot_response(user_input):
     try: