starnernj commited on
Commit
106ed70
·
verified ·
1 Parent(s): f7b9082

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +8 -2
app.py CHANGED
@@ -3,7 +3,9 @@ import gradio as gr
3
  from huggingface_hub import InferenceClient, login
4
  import os
5
  import time
6
- import threading
 
 
7
 
8
  @spaces.GPU # Forces GPU allocation before execution
9
  def force_gpu_allocation():
@@ -18,6 +20,9 @@ lora_model_name = "starnernj/Early-Christian-Church-Fathers-LLaMA-3.1-Fine-Tuned
18
 
19
  # Function to generate responses
20
  def chatbot_response(user_input):
 
 
 
21
  import traceback
22
  from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
23
  from peft import PeftModel, PeftConfig
@@ -36,7 +41,8 @@ def chatbot_response(user_input):
36
 
37
  model = AutoModelForCausalLM.from_pretrained(
38
  base_model_name,
39
- device_map="cpu"
 
40
  )
41
 
42
  # Load tokenizer
 
3
  from huggingface_hub import InferenceClient, login
4
  import os
5
  import time
6
+
7
+ # Disable CUDA visibility at the start
8
+ os.environ["CUDA_VISIBLE_DEVICES"] = "" # Prevents CUDA initialization
9
 
10
  @spaces.GPU # Forces GPU allocation before execution
11
  def force_gpu_allocation():
 
20
 
21
  # Function to generate responses
22
  def chatbot_response(user_input):
23
+ # Re-enable CUDA inside the function for accelerate to manage
24
+ os.environ["CUDA_VISIBLE_DEVICES"] = "0" # Adjust based on ZeroGPU setup
25
+
26
  import traceback
27
  from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
28
  from peft import PeftModel, PeftConfig
 
41
 
42
  model = AutoModelForCausalLM.from_pretrained(
43
  base_model_name,
44
+ device_map="cpu",
45
+ torch_dtype=torch.float32 # Avoid any GPU-related dtype defaults
46
  )
47
 
48
  # Load tokenizer