Update app.py
Browse files
app.py
CHANGED
@@ -1,15 +1,14 @@
|
|
|
|
|
|
|
|
1 |
import spaces
|
2 |
import gradio as gr
|
3 |
from huggingface_hub import InferenceClient, login
|
4 |
import os
|
5 |
import time
|
6 |
|
7 |
-
# Disable CUDA visibility at the start
|
8 |
-
os.environ["CUDA_VISIBLE_DEVICES"] = "" # Prevents CUDA initialization
|
9 |
|
10 |
-
|
11 |
-
def force_gpu_allocation():
|
12 |
-
pass # Dummy function to trigger GPU setup
|
13 |
|
14 |
# Base model (LLaMA 3.1 8B) from Meta
|
15 |
base_model_name = "meta-llama/Llama-3.1-8B"
|
@@ -34,6 +33,10 @@ def chatbot_response(user_input):
|
|
34 |
if torch.cuda.is_initialized():
|
35 |
print("CUDA was already initialized before Accelerator!")
|
36 |
|
|
|
|
|
|
|
|
|
37 |
accelerator = Accelerator()
|
38 |
|
39 |
# Login because LLaMA 3.1 8B is a gated model
|
|
|
1 |
+
# Disable CUDA visibility at the start
|
2 |
+
os.environ["CUDA_VISIBLE_DEVICES"] = "" # Prevents CUDA initialization
|
3 |
+
|
4 |
import spaces
|
5 |
import gradio as gr
|
6 |
from huggingface_hub import InferenceClient, login
|
7 |
import os
|
8 |
import time
|
9 |
|
|
|
|
|
10 |
|
11 |
+
|
|
|
|
|
12 |
|
13 |
# Base model (LLaMA 3.1 8B) from Meta
|
14 |
base_model_name = "meta-llama/Llama-3.1-8B"
|
|
|
33 |
if torch.cuda.is_initialized():
|
34 |
print("CUDA was already initialized before Accelerator!")
|
35 |
|
36 |
+
@spaces.GPU # Forces GPU allocation before execution
|
37 |
+
def force_gpu_allocation():
|
38 |
+
pass # Dummy function to trigger GPU setup
|
39 |
+
|
40 |
accelerator = Accelerator()
|
41 |
|
42 |
# Login because LLaMA 3.1 8B is a gated model
|