Tonic commited on
Commit
868a03e
1 Parent(s): 9f9fa10

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +7 -2
app.py CHANGED
@@ -62,14 +62,19 @@ tasks = {
62
  }
63
 
64
  os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'max_split_size_mb:30'
65
- # os.environ['CUDA_LAUNCH_BLOCKING'] = '1'
66
  os.environ['CUDA_CACHE_DISABLE'] = '1'
67
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
68
 
69
- # Define the model and tokenizer globally
70
  tokenizer = AutoTokenizer.from_pretrained('nvidia/NV-Embed-v1', trust_remote_code=True)
71
  model = AutoModel.from_pretrained('nvidia/NV-Embed-v1', trust_remote_code=True).to(device)
72
 
 
 
 
 
 
73
  # Embedding requests and response queues
74
  embedding_request_queue = queue.Queue()
75
  embedding_response_queue = queue.Queue()
 
62
  }
63
 
64
  os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'max_split_size_mb:30'
65
+ os.environ['CUDA_LAUNCH_BLOCKING'] = '1'
66
  os.environ['CUDA_CACHE_DISABLE'] = '1'
67
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
68
 
69
+ # Load the tokenizer and model
70
  tokenizer = AutoTokenizer.from_pretrained('nvidia/NV-Embed-v1', trust_remote_code=True)
71
  model = AutoModel.from_pretrained('nvidia/NV-Embed-v1', trust_remote_code=True).to(device)
72
 
73
+ # If multiple GPUs are available, enable DataParallel
74
+ if torch.cuda.device_count() > 1:
75
+ print("Using", torch.cuda.device_count(), "GPUs")
76
+ model = DataParallel(model)
77
+
78
  # Embedding requests and response queues
79
  embedding_request_queue = queue.Queue()
80
  embedding_response_queue = queue.Queue()