Spaces:

alidenewade
/

mol-lang-lab

Sleeping

App Files Files Community

alidenewade commited on Jun 23

Commit

c3644ec

verified ·

1 Parent(s): de6a098

Update app.py

Browse files

Files changed (1) hide show

app.py +55 -30

app.py CHANGED Viewed

@@ -29,6 +29,11 @@ def get_quantization_config():
     Falls back gracefully if bitsandbytes is not available.
     """
     try:
         # 8-bit quantization configuration - good balance of speed and quality
         quantization_config = BitsAndBytesConfig(
             load_in_8bit=True,
@@ -64,42 +69,43 @@ def load_optimized_models():
     # Model names
     model_name = "seyonec/PubChem10M_SMILES_BPE_450k"
-    # Load tokenizer (doesn't need quantization)
-    fill_mask_tokenizer = AutoTokenizer.from_pretrained(model_name)
-    # Load model with quantization if available
-    model_kwargs = {
-        "torch_dtype": torch_dtype,
-    }
-    if quantization_config is not None and torch.cuda.is_available(): # Quantization typically for GPU
-        model_kwargs["quantization_config"] = quantization_config
-        # device_map="auto" is often used with bitsandbytes for automatic distribution
-        model_kwargs["device_map"] = "auto"
-    elif torch.cuda.is_available():
-        model_kwargs["device_map"] = "auto" # For non-quantized GPU loading
-    else:
-        model_kwargs["device_map"] = None # For CPU
-    try:
         # Masked LM Model
         fill_mask_model = AutoModelForMaskedLM.from_pretrained(
             model_name,
             **model_kwargs
         )
         # Set model to evaluation mode for inference
         fill_mask_model.eval()
-        # Create optimized pipeline
-        # Let pipeline infer device from model if possible, or set based on model's device
-        pipeline_device = fill_mask_model.device.index if hasattr(fill_mask_model.device, 'type') and fill_mask_model.device.type == "cuda" else -1
         fill_mask_pipeline = pipeline(
             'fill-mask',
             model=fill_mask_model,
             tokenizer=fill_mask_tokenizer,
-            device=pipeline_device, # Use model's device
         )
         logger.info("Models loaded successfully with optimizations")
@@ -113,16 +119,31 @@ def load_optimized_models():
 def load_standard_models(model_name):
     """Fallback standard model loading without quantization."""
-    fill_mask_tokenizer = AutoTokenizer.from_pretrained(model_name)
-    fill_mask_model = AutoModelForMaskedLM.from_pretrained(model_name)
-    # Determine device for standard loading
-    device_idx = 0 if torch.cuda.is_available() else -1
-    fill_mask_pipeline = pipeline('fill-mask', model=fill_mask_model, tokenizer=fill_mask_tokenizer, device=device_idx)
-    if torch.cuda.is_available():
-        fill_mask_model.to("cuda")
-    return fill_mask_tokenizer, fill_mask_model, fill_mask_pipeline
 # --- Memory Management Utilities ---
 def clear_gpu_cache():
@@ -163,7 +184,7 @@ def get_image_with_highlight(mol, atomset=None, size=(300, 300)):
     if atomset:
         try:
             valid_atomset = [int(a) for a in atomset]
-        except ValueError:
             logger.warning(f"Invalid atom in atomset: {atomset}. Proceeding without highlighting problematic atoms.")
             valid_atomset = [int(a) for a in atomset if str(a).isdigit()] # Filter out non-integers
@@ -230,7 +251,11 @@ def predict_and_visualize_masked_smiles(smiles_mask, substructure_smarts_highlig
     """
     # Load models when needed
     try:
-        fill_mask_tokenizer, fill_mask_model, fill_mask_pipeline = load_optimized_models()
     except Exception as e:
         st.error(f"Error loading models: {str(e)}")
         return

     Falls back gracefully if bitsandbytes is not available.
     """
     try:
+        # Only use quantization on CUDA
+        if not torch.cuda.is_available():
+            logger.info("CUDA not available, skipping quantization")
+            return None
         # 8-bit quantization configuration - good balance of speed and quality
         quantization_config = BitsAndBytesConfig(
             load_in_8bit=True,
     # Model names
     model_name = "seyonec/PubChem10M_SMILES_BPE_450k"
+    try:
+        # Load tokenizer (doesn't need quantization)
+        fill_mask_tokenizer = AutoTokenizer.from_pretrained(model_name)
+        # Load model with quantization if available
+        model_kwargs = {
+            "torch_dtype": torch_dtype,
+        }
+        if quantization_config is not None and torch.cuda.is_available():
+            model_kwargs["quantization_config"] = quantization_config
+            model_kwargs["device_map"] = "auto"
+        else:
+            # For CPU or non-quantized loading
+            model_kwargs["device_map"] = None
         # Masked LM Model
         fill_mask_model = AutoModelForMaskedLM.from_pretrained(
             model_name,
             **model_kwargs
         )
+        # Move to device if not using device_map
+        if model_kwargs["device_map"] is None and torch.cuda.is_available():
+            fill_mask_model.to(device)
         # Set model to evaluation mode for inference
         fill_mask_model.eval()
+        # Create pipeline with proper device handling
+        pipeline_device = 0 if torch.cuda.is_available() else -1
         fill_mask_pipeline = pipeline(
             'fill-mask',
             model=fill_mask_model,
             tokenizer=fill_mask_tokenizer,
+            device=pipeline_device,
         )
         logger.info("Models loaded successfully with optimizations")
 def load_standard_models(model_name):
     """Fallback standard model loading without quantization."""
+    try:
+        fill_mask_tokenizer = AutoTokenizer.from_pretrained(model_name)
+        fill_mask_model = AutoModelForMaskedLM.from_pretrained(
+            model_name,
+            torch_dtype=torch.float32
+        )
+        # Determine device for standard loading
+        device_idx = 0 if torch.cuda.is_available() else -1
+        if torch.cuda.is_available():
+            fill_mask_model.to("cuda")
+        fill_mask_pipeline = pipeline(
+            'fill-mask',
+            model=fill_mask_model,
+            tokenizer=fill_mask_tokenizer,
+            device=device_idx
+        )
+        return fill_mask_tokenizer, fill_mask_model, fill_mask_pipeline
+    except Exception as e:
+        logger.error(f"Failed to load models: {e}")
+        st.error(f"Failed to load models: {e}")
+        return None, None, None
 # --- Memory Management Utilities ---
 def clear_gpu_cache():
     if atomset:
         try:
             valid_atomset = [int(a) for a in atomset]
+        except (ValueError, TypeError):
             logger.warning(f"Invalid atom in atomset: {atomset}. Proceeding without highlighting problematic atoms.")
             valid_atomset = [int(a) for a in atomset if str(a).isdigit()] # Filter out non-integers
     """
     # Load models when needed
     try:
+        models = load_optimized_models()
+        if models[0] is None:  # Check if loading failed
+            st.error("Failed to load models. Please check the logs.")
+            return
+        fill_mask_tokenizer, fill_mask_model, fill_mask_pipeline = models
     except Exception as e:
         st.error(f"Error loading models: {str(e)}")
         return