starnernj commited on
Commit
44e15dc
·
verified ·
1 Parent(s): 106ed70

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +8 -5
app.py CHANGED
@@ -1,15 +1,14 @@
 
 
 
1
  import spaces
2
  import gradio as gr
3
  from huggingface_hub import InferenceClient, login
4
  import os
5
  import time
6
 
7
- # Disable CUDA visibility at the start
8
- os.environ["CUDA_VISIBLE_DEVICES"] = "" # Prevents CUDA initialization
9
 
10
- @spaces.GPU # Forces GPU allocation before execution
11
- def force_gpu_allocation():
12
- pass # Dummy function to trigger GPU setup
13
 
14
  # Base model (LLaMA 3.1 8B) from Meta
15
  base_model_name = "meta-llama/Llama-3.1-8B"
@@ -34,6 +33,10 @@ def chatbot_response(user_input):
34
  if torch.cuda.is_initialized():
35
  print("CUDA was already initialized before Accelerator!")
36
 
 
 
 
 
37
  accelerator = Accelerator()
38
 
39
  # Login because LLaMA 3.1 8B is a gated model
 
1
+ # Disable CUDA visibility at the start
2
+ os.environ["CUDA_VISIBLE_DEVICES"] = "" # Prevents CUDA initialization
3
+
4
  import spaces
5
  import gradio as gr
6
  from huggingface_hub import InferenceClient, login
7
  import os
8
  import time
9
 
 
 
10
 
11
+
 
 
12
 
13
  # Base model (LLaMA 3.1 8B) from Meta
14
  base_model_name = "meta-llama/Llama-3.1-8B"
 
33
  if torch.cuda.is_initialized():
34
  print("CUDA was already initialized before Accelerator!")
35
 
36
+ @spaces.GPU # Forces GPU allocation before execution
37
+ def force_gpu_allocation():
38
+ pass # Dummy function to trigger GPU setup
39
+
40
  accelerator = Accelerator()
41
 
42
  # Login because LLaMA 3.1 8B is a gated model