Spaces:
Build error
Build error
Update app.py
Browse files
app.py
CHANGED
@@ -62,14 +62,19 @@ tasks = {
|
|
62 |
}
|
63 |
|
64 |
os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'max_split_size_mb:30'
|
65 |
-
|
66 |
os.environ['CUDA_CACHE_DISABLE'] = '1'
|
67 |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
68 |
|
69 |
-
#
|
70 |
tokenizer = AutoTokenizer.from_pretrained('nvidia/NV-Embed-v1', trust_remote_code=True)
|
71 |
model = AutoModel.from_pretrained('nvidia/NV-Embed-v1', trust_remote_code=True).to(device)
|
72 |
|
|
|
|
|
|
|
|
|
|
|
73 |
# Embedding requests and response queues
|
74 |
embedding_request_queue = queue.Queue()
|
75 |
embedding_response_queue = queue.Queue()
|
|
|
62 |
}
|
63 |
|
64 |
os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'max_split_size_mb:30'
|
65 |
+
os.environ['CUDA_LAUNCH_BLOCKING'] = '1'
|
66 |
os.environ['CUDA_CACHE_DISABLE'] = '1'
|
67 |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
68 |
|
69 |
+
# Load the tokenizer and model
|
70 |
tokenizer = AutoTokenizer.from_pretrained('nvidia/NV-Embed-v1', trust_remote_code=True)
|
71 |
model = AutoModel.from_pretrained('nvidia/NV-Embed-v1', trust_remote_code=True).to(device)
|
72 |
|
73 |
+
# If multiple GPUs are available, enable DataParallel
|
74 |
+
if torch.cuda.device_count() > 1:
|
75 |
+
print("Using", torch.cuda.device_count(), "GPUs")
|
76 |
+
model = DataParallel(model)
|
77 |
+
|
78 |
# Embedding requests and response queues
|
79 |
embedding_request_queue = queue.Queue()
|
80 |
embedding_response_queue = queue.Queue()
|