Hyper-FLUX-8Steps-LoRA_CPU

Runtime error

App Files Files Community

K00B404 commited on 25 days ago

Commit

7721cb1

verified ·

1 Parent(s): 86fc7b8

Update gguf_loader.py

Browse files

Files changed (1) hide show

gguf_loader.py +144 -0

gguf_loader.py CHANGED Viewed

	@@ -0,0 +1,144 @@

+import torch
+import logging
+from pathlib import Path
+from typing import Optional, Union, Dict, Any
+class GGUFUNetLoader:
+    """
+    A class for loading and managing GGUF-formatted UNet models for diffusion.
+    Supports quantized models with custom patch handling.
+    """
+    def __init__(self):
+        self.model = None
+        self.patches = {}
+        self.backup = {}
+        self.load_device = "cuda" if torch.cuda.is_available() else "cpu"
+        self.offload_device = "cpu"
+    @staticmethod
+    def is_quantized(weight: torch.Tensor) -> bool:
+        """Check if a tensor is quantized."""
+        return hasattr(weight, "patches")
+    def patch_weight(self, key: str, weight: torch.Tensor, device_to: Optional[str] = None) -> torch.Tensor:
+        """
+        Apply patches to model weights with quantization support.
+        Args:
+            key: The parameter key to patch
+            weight: The weight tensor to patch
+            device_to: Target device for the patched weight
+        Returns:
+            Patched weight tensor
+        """
+        if key not in self.patches:
+            return weight
+        if self.is_quantized(weight):
+            # Handle quantized weights
+            out_weight = weight.to(device_to if device_to else self.load_device)
+            patches = self.patches[key]
+            out_weight.patches = [(self.calculate_weight, patches, key)]
+            return out_weight
+        else:
+            # Handle regular weights
+            if key not in self.backup:
+                self.backup[key] = weight.to(device=self.offload_device)
+            temp_weight = weight.to(torch.float32)
+            if device_to:
+                temp_weight = temp_weight.to(device_to)
+            # Apply patches
+            for patch in self.patches[key]:
+                temp_weight += patch
+            return temp_weight.to(weight.dtype)
+    def load_model(self,
+                  model_path: Union[str, Path],
+                  config: Optional[Dict[str, Any]] = None) -> None:
+        """
+        Load a GGUF model from disk.
+        Args:
+            model_path: Path to the GGUF model file
+            config: Optional configuration dictionary for model loading
+        """
+        try:
+            model_path = Path(model_path)
+            if not model_path.exists():
+                raise FileNotFoundError(f"Model file not found: {model_path}")
+            if not str(model_path).endswith('.gguf'):
+                raise ValueError("Not a GGUF model file")
+            # Load the model (implementation would depend on your GGUF loader)
+            from .gguf_loader import load_gguf_model  # You'd need to implement this
+            self.model = load_gguf_model(
+                model_path,
+                device=self.load_device,
+                config=config or {}
+            )
+            logging.info(f"Successfully loaded GGUF model from {model_path}")
+        except Exception as e:
+            logging.error(f"Error loading model: {str(e)}")
+            raise
+    def add_patch(self, key: str, patch: torch.Tensor) -> None:
+        """
+        Add a patch for a specific model parameter.
+        Args:
+            key: Parameter key to patch
+            patch: The patch tensor to apply
+        """
+        if key not in self.patches:
+            self.patches[key] = []
+        self.patches[key].append(patch)
+    def clear_patches(self) -> None:
+        """Remove all patches from the model."""
+        self.patches.clear()
+        # Clear quantized patches
+        if self.model:
+            for param in self.model.parameters():
+                if self.is_quantized(param):
+                    param.patches = []
+    def to(self, device: str) -> 'GGUFUNetLoader':
+        """
+        Move model to specified device.
+        Args:
+            device: Target device ("cuda", "cpu", etc.)
+        Returns:
+            Self for method chaining
+        """
+        if self.model:
+            self.model.to(device)
+            self.load_device = device
+        return self
+    @staticmethod
+    def calculate_weight(patches: list, base_weight: torch.Tensor, key: str) -> torch.Tensor:
+        """
+        Calculate final weight by applying patches.
+        Args:
+            patches: List of patches to apply
+            base_weight: Base weight tensor
+            key: Parameter key
+        Returns:
+            Patched weight tensor
+        """
+        result = base_weight.clone()
+        for patch in patches:
+            result += patch
+        return result