starnernj commited on
Commit
fbb2b4a
·
verified ·
1 Parent(s): c53f6f7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +14 -7
app.py CHANGED
@@ -1,4 +1,4 @@
1
- print("Before Import")
2
  import os
3
  import spaces
4
  import gradio as gr
@@ -9,7 +9,8 @@ from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
9
  from peft import PeftModel, PeftConfig
10
  import bitsandbytes
11
  import torch
12
- print("After Import")
 
13
 
14
  @spaces.GPU # Forces GPU allocation before execution
15
  def force_gpu_allocation():
@@ -23,7 +24,8 @@ lora_model_name = "starnernj/Early-Christian-Church-Fathers-LLaMA-3.1-Fine-Tuned
23
 
24
  # Login because LLaMA 3.1 8B is a gated model
25
  login(token=os.getenv("HuggingFaceFineGrainedReadToken"))
26
- print("After login")
 
27
  # Enable 4-bit Quantization with BitsAndBytes
28
  quantization_config = BitsAndBytesConfig(
29
  load_in_4bit=True, # ✅ Enables 4-bit quantization for memory efficiency
@@ -32,19 +34,24 @@ quantization_config = BitsAndBytesConfig(
32
  bnb_4bit_quant_type="nf4" # ✅ Normalized Float-4 for better accuracy
33
  )
34
 
 
35
  base_model = AutoModelForCausalLM.from_pretrained(
36
  base_model_name,
37
  quantization_config=quantization_config,
38
  device_map="auto"
39
  )
40
- print("After basemodel")
 
41
  # Load tokenizer
 
42
  tokenizer = AutoTokenizer.from_pretrained(base_model_name)
43
- print("After Tokenizer")
 
44
  # Load LoRA Adapter
45
- print("Loading LoRA adapter...")
46
  model = PeftModel.from_pretrained(base_model, lora_model_name)
47
- print("After peft model")
 
48
  # Function to generate responses
49
  def chatbot_response(user_input):
50
  try:
 
1
+ print("Beginning import")
2
  import os
3
  import spaces
4
  import gradio as gr
 
9
  from peft import PeftModel, PeftConfig
10
  import bitsandbytes
11
  import torch
12
+
13
+ print("Imports completed")
14
 
15
  @spaces.GPU # Forces GPU allocation before execution
16
  def force_gpu_allocation():
 
24
 
25
  # Login because LLaMA 3.1 8B is a gated model
26
  login(token=os.getenv("HuggingFaceFineGrainedReadToken"))
27
+ print("Login to Huggin Face successful")
28
+
29
  # Enable 4-bit Quantization with BitsAndBytes
30
  quantization_config = BitsAndBytesConfig(
31
  load_in_4bit=True, # ✅ Enables 4-bit quantization for memory efficiency
 
34
  bnb_4bit_quant_type="nf4" # ✅ Normalized Float-4 for better accuracy
35
  )
36
 
37
+ print("Loading base model")
38
  base_model = AutoModelForCausalLM.from_pretrained(
39
  base_model_name,
40
  quantization_config=quantization_config,
41
  device_map="auto"
42
  )
43
+ print("Basemodel loaded successfully")
44
+
45
  # Load tokenizer
46
+ print("Loading tokenizer")
47
  tokenizer = AutoTokenizer.from_pretrained(base_model_name)
48
+ print("Tokenizer loaded successfully")
49
+
50
  # Load LoRA Adapter
51
+ print("Loading Peft LoRA adapter...")
52
  model = PeftModel.from_pretrained(base_model, lora_model_name)
53
+ print("Peft LoRA model loaded successfully")
54
+
55
  # Function to generate responses
56
  def chatbot_response(user_input):
57
  try: