starnernj commited on
Commit
5ebcf6b
·
verified ·
1 Parent(s): 217a473

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +6 -5
app.py CHANGED
@@ -1,3 +1,4 @@
 
1
  import os
2
  import spaces
3
  import gradio as gr
@@ -8,7 +9,7 @@ from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
8
  from peft import PeftModel, PeftConfig
9
  import bitsandbytes
10
  import torch
11
-
12
 
13
  @spaces.GPU # Forces GPU allocation before execution
14
  def force_gpu_allocation():
@@ -22,7 +23,7 @@ lora_model_name = "starnernj/Early-Christian-Church-Fathers-LLaMA-3.1-Fine-Tuned
22
 
23
  # Login because LLaMA 3.1 8B is a gated model
24
  login(token=os.getenv("HuggingFaceFineGrainedReadToken"))
25
-
26
  # Enable 4-bit Quantization with BitsAndBytes
27
  quantization_config = BitsAndBytesConfig(
28
  load_in_4bit=True, # ✅ Enables 4-bit quantization for memory efficiency
@@ -36,14 +37,14 @@ base_model = AutoModelForCausalLM.from_pretrained(
36
  quantization_config=quantization_config,
37
  device_map="auto"
38
  )
39
-
40
  # Load tokenizer
41
  tokenizer = AutoTokenizer.from_pretrained(base_model_name)
42
-
43
  # Load LoRA Adapter
44
  print("Loading LoRA adapter...")
45
  model = PeftModel.from_pretrained(base_model, lora_model_name)
46
-
47
  # Function to generate responses
48
  def chatbot_response(user_input):
49
  try:
 
1
+ print("Before Import")
2
  import os
3
  import spaces
4
  import gradio as gr
 
9
  from peft import PeftModel, PeftConfig
10
  import bitsandbytes
11
  import torch
12
+ print("After Import")
13
 
14
  @spaces.GPU # Forces GPU allocation before execution
15
  def force_gpu_allocation():
 
23
 
24
  # Login because LLaMA 3.1 8B is a gated model
25
  login(token=os.getenv("HuggingFaceFineGrainedReadToken"))
26
+ print("After login")
27
  # Enable 4-bit Quantization with BitsAndBytes
28
  quantization_config = BitsAndBytesConfig(
29
  load_in_4bit=True, # ✅ Enables 4-bit quantization for memory efficiency
 
37
  quantization_config=quantization_config,
38
  device_map="auto"
39
  )
40
+ print("After basemodel")
41
  # Load tokenizer
42
  tokenizer = AutoTokenizer.from_pretrained(base_model_name)
43
+ print("After Tokenizer")
44
  # Load LoRA Adapter
45
  print("Loading LoRA adapter...")
46
  model = PeftModel.from_pretrained(base_model, lora_model_name)
47
+ print("After peft model")
48
  # Function to generate responses
49
  def chatbot_response(user_input):
50
  try: