Please, help me with GGUF coding!

by SmilleCreeper - opened Jun 29

Jun 29

Hello, I'm developing my own Streamlit app to train language models from scratch. It works well, but when I export model as GGUF and load it in text-generation-webui and send a message, it can't tokenize, and shows error llama.cpp error: 'invalid unordered_map<K, T> key in console. Can you with experience with GGUF help me? I'm getting desperate.

SmilleCreeper

Jun 29

export_backend.py

import torch
import json
import numpy as np
from typing import Dict, Any, List, Optional, Union
import struct
from pathlib import Path
import zipfile
from gguf_writer import GGUFWriter, GGMLType, GGUFValueType

def should_skip_tensor(name: str) -> bool:
    skip_patterns = [
        'embedding_projection.projection.weight',
        'embedding_projection.projection.bias',
    ]
    
    if name in skip_patterns:
        return True
    
    for pattern in skip_patterns:
        if pattern in name:
            return True
    
    return False

def convert_parameter_name_to_gguf(pytorch_name: str) -> str:
    name_mapping = {
        'embed_tokens.weight': 'token_embd.weight',
        'norm.weight': 'output_norm.weight',
        'lm_head.weight': 'output.weight',
    }
    
    if 'layers.' in pytorch_name:
        parts = pytorch_name.split('.')
        if len(parts) < 3:
            return pytorch_name
        
        layer_num = parts[1]
        rest = '.'.join(parts[2:])
        
        layer_mappings = {
            'input_layernorm.weight': f'blk.{layer_num}.attn_norm.weight',
            'post_attention_layernorm.weight': f'blk.{layer_num}.ffn_norm.weight',
            'self_attn.q_proj.weight': f'blk.{layer_num}.attn_q.weight',
            'self_attn.k_proj.weight': f'blk.{layer_num}.attn_k.weight',
            'self_attn.v_proj.weight': f'blk.{layer_num}.attn_v.weight',
            'self_attn.o_proj.weight': f'blk.{layer_num}.attn_output.weight',
            'mlp.gate_proj.weight': f'blk.{layer_num}.ffn_gate.weight',
            'mlp.up_proj.weight': f'blk.{layer_num}.ffn_up.weight',
            'mlp.down_proj.weight': f'blk.{layer_num}.ffn_down.weight',
        }
        
        if rest in layer_mappings:
            return layer_mappings[rest]
    
    return name_mapping.get(pytorch_name, pytorch_name)

def should_transpose_tensor(gguf_name: str, tensor_shape: tuple) -> bool:
    transpose_patterns = {
        'token_embd.weight': True,
        'output.weight': True,
    }
    
    if gguf_name in transpose_patterns:
        return transpose_patterns[gguf_name]
    
    if any(pattern in gguf_name for pattern in ['.attn_q.weight', '.attn_k.weight', '.attn_v.weight', 
                                                '.attn_output.weight', '.ffn_gate.weight', 
                                                '.ffn_up.weight', '.ffn_down.weight']):
        if len(tensor_shape) == 2:
            return True
    
    return False

def create_dummy_output_tensor(vocab_size: int, hidden_size: int) -> torch.Tensor:
    print(f"Creating dummy output.weight tensor with shape [{vocab_size}, {hidden_size}]")
    dummy_tensor = torch.randn(vocab_size, hidden_size, dtype=torch.float32) * 0.02
    return dummy_tensor

def debug_model_tensors(model):
    print("=== MODEL TENSOR DEBUG ===")
    tensors_to_process = []
    tensors_to_skip = []
    
    for i, (name, param) in enumerate(model.named_parameters()):
        if should_skip_tensor(name):
            tensors_to_skip.append((i+1, name, param.shape, param.numel()))
        else:
            gguf_name = convert_parameter_name_to_gguf(name)
            tensors_to_process.append((i+1, name, gguf_name, param.shape, param.numel()))
    
    print("TENSORS TO PROCESS:")
    for i, name, gguf_name, shape, numel in tensors_to_process:
        print(f"{i:2d}. {name}")
        print(f"    -> {gguf_name}")
        print(f"    Shape: {shape}, Elements: {numel}")
        if numel == 0:
            print(f"    *** EMPTY TENSOR ***")
        print()
    
    if tensors_to_skip:
        print("TENSORS TO SKIP:")
        for i, name, shape, numel in tensors_to_skip:
            print(f"{i:2d}. {name} (SKIPPED)")
            print(f"    Shape: {shape}, Elements: {numel}")
            print()
    
    print(f"Total tensors in model: {len(list(model.named_parameters()))}")
    print(f"Tensors to process: {len(tensors_to_process)}")
    print(f"Tensors to skip: {len(tensors_to_skip)}")
    print("=========================")

def convert_pytorch_to_gguf(model, tokenizer, output_path: str, quantization: str = "f32", progress_callback=None) -> bool:
    try:
        debug_model_tensors(model)
        
        writer = GGUFWriter(output_path, "llama")
        
        required_attrs = ['vocab_size', 'hidden_size', 'num_hidden_layers', 'num_attention_heads']
        for attr in required_attrs:
            if not hasattr(model, attr):
                raise ValueError(f"Model missing required attribute: {attr}")
        
        def get_safe_attr(obj, attr, default):
            return getattr(obj, attr, default)
        
        vocab_size = get_safe_attr(model, 'vocab_size', 32000)
        hidden_size = get_safe_attr(model, 'hidden_size', 4096)
        num_layers = get_safe_attr(model, 'num_hidden_layers', 32)
        num_heads = get_safe_attr(model, 'num_attention_heads', 32)
        num_kv_heads = get_safe_attr(model, 'num_key_value_heads', num_heads)
        intermediate_size = get_safe_attr(model, 'intermediate_size', 11008)
        max_position_embeddings = get_safe_attr(model, 'max_position_embeddings', 2048)
        rms_norm_eps = get_safe_attr(model, 'rms_norm_eps', 1e-6)
        rope_theta = get_safe_attr(model, 'rope_theta', 10000.0)
        
        writer.add_metadata("general.architecture", "llama", GGUFValueType.STRING)
        writer.add_metadata("general.name", "custom-llama-model", GGUFValueType.STRING)
        writer.add_metadata("general.version", "1.0", GGUFValueType.STRING)
        writer.add_metadata("general.description", "Custom trained GGUF model", GGUFValueType.STRING)
        
        writer.add_metadata("llama.vocab_size", vocab_size, GGUFValueType.UINT32)
        writer.add_metadata("llama.context_length", max_position_embeddings, GGUFValueType.UINT32)
        writer.add_metadata("llama.embedding_length", hidden_size, GGUFValueType.UINT32)
        writer.add_metadata("llama.block_count", num_layers, GGUFValueType.UINT32)
        writer.add_metadata("llama.feed_forward_length", intermediate_size, GGUFValueType.UINT32)
        writer.add_metadata("llama.attention.head_count", num_heads, GGUFValueType.UINT32)
        writer.add_metadata("llama.attention.head_count_kv", num_kv_heads, GGUFValueType.UINT32)
        writer.add_metadata("llama.attention.layer_norm_rms_epsilon", float(rms_norm_eps), GGUFValueType.FLOAT32)
        writer.add_metadata("llama.rope.freq_base", float(rope_theta), GGUFValueType.FLOAT32)
        
        # FIXED TOKENIZER EXPORT - Preserve token order and IDs
        try:
            # Get vocabulary with proper ordering
            if hasattr(tokenizer, 'get_vocab'):
                vocab = tokenizer.get_vocab()
            else:
                vocab = tokenizer.vocab
            
            # Create ordered list of tokens by their IDs
            vocab_size_actual = len(vocab)
            tokens = [''] * vocab_size_actual
            
            # Fill tokens array in correct order by ID
            for token, token_id in vocab.items():
                if 0 <= token_id < vocab_size_actual:
                    tokens[token_id] = token
                else:
                    print(f"Warning: Token '{token}' has ID {token_id} outside vocab range [0, {vocab_size_actual})")
            
            # Handle any missing tokens (fill with placeholder)
            for i in range(vocab_size_actual):
                if tokens[i] == '':
                    tokens[i] = f'<UNK_{i}>'
                    print(f"Warning: Missing token at ID {i}, using placeholder")
            
            print(f"Tokenizer vocab size: {vocab_size_actual}")
            print(f"First 10 tokens: {tokens[:10]}")
            print(f"Last 10 tokens: {tokens[-10:]}")
            
            # Verify special tokens
            bos_id = getattr(tokenizer, 'bos_token_id', None)
            eos_id = getattr(tokenizer, 'eos_token_id', None)
            pad_id = getattr(tokenizer, 'pad_token_id', None)
            unk_id = getattr(tokenizer, 'unk_token_id', None)
            
            if bos_id is not None and 0 <= bos_id < len(tokens):
                print(f"BOS token ID {bos_id}: '{tokens[bos_id]}'")
            if eos_id is not None and 0 <= eos_id < len(tokens):
                print(f"EOS token ID {eos_id}: '{tokens[eos_id]}'")
            if pad_id is not None and 0 <= pad_id < len(tokens):
                print(f"PAD token ID {pad_id}: '{tokens[pad_id]}'")
            if unk_id is not None and 0 <= unk_id < len(tokens):
                print(f"UNK token ID {unk_id}: '{tokens[unk_id]}'")
            
            # Add tokenizer metadata with properly ordered tokens
            writer.add_metadata("tokenizer.ggml.model", "llama", GGUFValueType.STRING)
            writer.add_metadata("tokenizer.ggml.tokens", tokens, GGUFValueType.ARRAY)
            
            # Create scores array (all zeros for now)
            token_scores = [0.0] * len(tokens)
            writer.add_metadata("tokenizer.ggml.scores", token_scores, GGUFValueType.ARRAY)
            
            # Add token types (all normal tokens for now)
            token_types = [1] * len(tokens)  # 1 = normal token, 2 = unknown, 3 = control, etc.
            writer.add_metadata("tokenizer.ggml.token_type", token_types, GGUFValueType.ARRAY)
            
            # Add special token IDs with proper fallbacks
            bos_id = bos_id if bos_id is not None else 1
            eos_id = eos_id if eos_id is not None else 2
            pad_id = pad_id if pad_id is not None else eos_id
            unk_id = unk_id if unk_id is not None else 0
            
            writer.add_metadata("tokenizer.ggml.bos_token_id", int(bos_id), GGUFValueType.UINT32)
            writer.add_metadata("tokenizer.ggml.eos_token_id", int(eos_id), GGUFValueType.UINT32)
            writer.add_metadata("tokenizer.ggml.pad_token_id", int(pad_id), GGUFValueType.UINT32)
            writer.add_metadata("tokenizer.ggml.unk_token_id", int(unk_id), GGUFValueType.UINT32)
            
            # Add additional special tokens if they exist
            special_tokens = {}
            if hasattr(tokenizer, 'added_tokens_encoder'):
                for token, token_id in tokenizer.added_tokens_encoder.items():
                    special_tokens[token] = token_id
                    print(f"Special token '{token}': ID {token_id}")
            
            print(f"Successfully exported tokenizer with {len(tokens)} tokens")
            
        except Exception as e:
            print(f"Warning: Error adding tokenizer metadata: {e}")
            print("Falling back to basic tokenizer metadata")
            writer.add_metadata("tokenizer.ggml.model", "llama", GGUFValueType.STRING)
            # Create minimal tokenizer data
            basic_tokens = [f"<token_{i}>" for i in range(vocab_size)]
            writer.add_metadata("tokenizer.ggml.tokens", basic_tokens, GGUFValueType.ARRAY)
            basic_scores = [0.0] * vocab_size
            writer.add_metadata("tokenizer.ggml.scores", basic_scores, GGUFValueType.ARRAY)
        
        tensor_type_map = {
            "f16": GGMLType.F16,
            "q8_0": GGMLType.Q8_0,
            "q4_0": GGMLType.Q4_0,
            "f32": GGMLType.F32
        }
        tensor_type = tensor_type_map.get(quantization, GGMLType.F32)
        
        valid_tensors = [(name, param) for name, param in model.named_parameters() if not should_skip_tensor(name)]
        total_tensors = len(valid_tensors)
        
        print(f"Processing {total_tensors} tensors (after filtering)...")
        
        tensors_processed = 0
        output_tensor_found = False
        
        for i, (name, param) in enumerate(valid_tensors):
            try:
                gguf_name = convert_parameter_name_to_gguf(name)
                
                print(f"Processing tensor {i+1}/{total_tensors}: {name} -> {gguf_name}")
                print(f"  Shape: {param.shape}, Elements: {param.numel()}")
                
                tensor_data = param.detach().cpu()
                
                if gguf_name == 'output.weight':
                    output_tensor_found = True
                    try:
                        if should_transpose_tensor(gguf_name, tensor_data.shape):
                            print(f"  Transposing output tensor: {tensor_data.shape} -> {tensor_data.T.shape}")
                            tensor_data = tensor_data.T
                        
                        writer.add_tensor(gguf_name, tensor_data, tensor_type)
                        print(f"  Successfully processed output.weight tensor")
                    except Exception as e:
                        print(f"  Error processing output.weight from lm_head.weight: {e}")
                        print(f"  Creating dummy output.weight tensor")
                        dummy_tensor = create_dummy_output_tensor(vocab_size, hidden_size)
                        if should_transpose_tensor(gguf_name, dummy_tensor.shape):
                            dummy_tensor = dummy_tensor.T
                        writer.add_tensor(gguf_name, dummy_tensor, tensor_type)
                        print(f"  Successfully added dummy output.weight tensor")
                else:
                    if should_transpose_tensor(gguf_name, tensor_data.shape):
                        print(f"  Transposing tensor: {tensor_data.shape} -> {tensor_data.T.shape}")
                        tensor_data = tensor_data.T
                    
                    writer.add_tensor(gguf_name, tensor_data, tensor_type)
                
                tensors_processed += 1
                
                if progress_callback:
                    progress_callback((i + 1) / total_tensors)
                    
            except Exception as e:
                print(f"Error processing parameter {name}: {e}")
                if gguf_name == 'output.weight':
                    print(f"Creating dummy output.weight tensor as fallback")
                    try:
                        dummy_tensor = create_dummy_output_tensor(vocab_size, hidden_size)
                        if should_transpose_tensor(gguf_name, dummy_tensor.shape):
                            dummy_tensor = dummy_tensor.T
                        writer.add_tensor(gguf_name, dummy_tensor, tensor_type)
                        output_tensor_found = True
                        tensors_processed += 1
                        print(f"Successfully added dummy output.weight tensor")
                    except Exception as dummy_e:
                        print(f"Failed to create dummy output.weight tensor: {dummy_e}")
                        raise e
                else:
                    raise e
        
        if not output_tensor_found:
            print("output.weight tensor not found in model, creating dummy tensor")
            try:
                dummy_tensor = create_dummy_output_tensor(vocab_size, hidden_size)
                if should_transpose_tensor('output.weight', dummy_tensor.shape):
                    dummy_tensor = dummy_tensor.T
                writer.add_tensor('output.weight', dummy_tensor, tensor_type)
                tensors_processed += 1
                print("Successfully added dummy output.weight tensor")
            except Exception as e:
                print(f"Failed to create dummy output.weight tensor: {e}")
                raise Exception("Critical: Could not create output.weight tensor")
        
        print(f"Successfully processed {tensors_processed} tensors")
        
        print("Writing GGUF file...")
        writer.write()
        print("GGUF file written successfully!")
        return True
        
    except Exception as e:
        raise Exception(f"Error converting to GGUF: {e}")

def get_model_info(model) -> Dict[str, Any]:
    return {
        'total_parameters': sum(p.numel() for p in model.parameters()),
        'vocab_size': getattr(model, 'vocab_size', 'unknown'),
        'hidden_size': getattr(model, 'hidden_size', 'unknown'),
        'num_layers': getattr(model, 'num_hidden_layers', 'unknown'),
        'num_attention_heads': getattr(model, 'num_attention_heads', 'unknown'),
        'num_key_value_heads': getattr(model, 'num_key_value_heads', 'unknown'),
        'intermediate_size': getattr(model, 'intermediate_size', 'unknown'),
        'max_position_embeddings': getattr(model, 'max_position_embeddings', 'unknown')
    }

def create_config_file(model, quantization: str, training_stats: Optional[Dict] = None) -> Dict[str, Any]:
    config_content = {
        'model_config': getattr(model, 'config', {}),
        'quantization': quantization,
        'training_stats': training_stats
    }
    return config_content

def create_model_package(model, tokenizer, model_name: str, quantization: str, 
                        include_config: bool, temp_dir: Path, 
                        training_stats: Optional[Dict] = None,
                        progress_callback=None) -> List[Path]:
    files_created = []
    
    gguf_path = temp_dir / f"{model_name}.gguf"
    success = convert_pytorch_to_gguf(
        model, tokenizer, str(gguf_path), quantization, progress_callback
    )
    
    if not success:
        raise Exception("Failed to convert model to GGUF format")
    
    files_created.append(gguf_path)
    
    if include_config:
        config_path = temp_dir / f"{model_name}_config.json"
        config_content = create_config_file(model, quantization, training_stats)
        config_path.write_text(json.dumps(config_content, indent=2, default=str), encoding='utf-8')
        files_created.append(config_path)
    
    return files_created

def create_zip_package(files: List[Path], output_path: Path) -> None:
    with zipfile.ZipFile(output_path, 'w', zipfile.ZIP_DEFLATED) as zipf:
        for file_path in files:
            zipf.write(file_path, file_path.name)

def get_quantization_info() -> Dict[str, str]:
    return {
        "f32": "Full precision (largest file, best quality)",
        "f16": "Half precision (good balance)",
        "q8_0": "8-bit quantization (smaller, slight quality loss)",
        "q4_0": "4-bit quantization (smallest, more quality loss)"
    }

def test_model_generation(model, tokenizer, generate_function):
    test_prompt = "<|begin_of_text|><|start_header_id|>start<|end_header_id|>\nHello Monika\n<|start_header_id|>end<|end_header_id|>\n\n<|start_header_id|>reply<|end_header_id|>\n"
    generated_text = generate_function(
        model, tokenizer, test_prompt, max_length=20
    )
    return test_prompt, generated_text

SmilleCreeper

Jun 29

gguf_writer.py

import torch
import numpy as np
from typing import Dict, Any, List, Optional, Union
import struct

GGUF_MAGIC = 0x46554747
GGUF_VERSION = 3
GGUF_DEFAULT_ALIGNMENT = 32

class GGMLType:
    F32 = 0
    F16 = 1
    Q4_0 = 2
    Q4_1 = 3
    Q5_0 = 6
    Q5_1 = 7
    Q8_0 = 8
    Q8_1 = 9
    Q2_K = 10
    Q3_K = 11
    Q4_K = 12
    Q5_K = 13
    Q6_K = 14
    Q8_K = 15

class GGUFValueType:
    UINT8 = 0
    INT8 = 1
    UINT16 = 2
    INT16 = 3
    UINT32 = 4
    INT32 = 5
    FLOAT32 = 6
    BOOL = 7
    STRING = 8
    ARRAY = 9
    UINT64 = 10
    INT64 = 11
    FLOAT64 = 12

class GGUFWriter:
    def __init__(self, path: str, arch: str):
        self.path = path
        self.arch = arch
        self.metadata = {}
        self.tensors = []
        self.tensor_data = {}
        self.data_alignment = GGUF_DEFAULT_ALIGNMENT
        
    def add_metadata(self, key: str, value: Any, value_type: int):
        if not isinstance(key, str):
            raise ValueError(f"Metadata key must be string, got {type(key)}")
        self.metadata[key] = (value, value_type)
    
    def add_tensor(self, name: str, tensor: Union[torch.Tensor, np.ndarray], tensor_type: int = GGMLType.F32):
        if not isinstance(name, str):
            raise ValueError(f"Tensor name must be string, got {type(name)}")
        
        if isinstance(tensor, torch.Tensor):
            tensor_np = tensor.detach().cpu().numpy()
        else:
            tensor_np = np.array(tensor)
        
        if tensor_np.size == 0:
            print(f"Warning: Tensor {name} is empty, creating minimal tensor")
            tensor_np = np.array([0.0], dtype=np.float32)
        
        if tensor_type == GGMLType.F32:
            tensor_np = tensor_np.astype(np.float32)
        elif tensor_type == GGMLType.F16:
            tensor_np = tensor_np.astype(np.float16)
        else:
            tensor_np = tensor_np.astype(np.float32)
        
        tensor_np = np.ascontiguousarray(tensor_np)
        
        if len(tensor_np.shape) == 0:
            tensor_np = tensor_np.reshape(1)
        
        self.tensors.append({
            'name': name,
            'shape': list(tensor_np.shape),
            'type': tensor_type,
            'offset': 0
        })
        self.tensor_data[name] = tensor_np
    
    def _align_to(self, value: int, alignment: int) -> int:
        remainder = value % alignment
        if remainder == 0:
            return value
        return value + alignment - remainder
    
    def _write_string(self, f, s: str):
        if not isinstance(s, str):
            s = str(s)
        encoded = s.encode('utf-8')
        f.write(struct.pack('<Q', len(encoded)))
        f.write(encoded)
    
    def _write_array_value(self, f, value: Any, elem_type: int):
        if elem_type == GGUFValueType.STRING:
            if not isinstance(value, str):
                value = str(value)
            self._write_string(f, value)
        elif elem_type == GGUFValueType.UINT32:
            f.write(struct.pack('<I', int(value)))
        elif elem_type == GGUFValueType.INT32:
            f.write(struct.pack('<i', int(value)))
        elif elem_type == GGUFValueType.FLOAT32:
            f.write(struct.pack('<f', float(value)))
        elif elem_type == GGUFValueType.UINT64:
            f.write(struct.pack('<Q', int(value)))
        elif elem_type == GGUFValueType.INT64:
            f.write(struct.pack('<q', int(value)))
        elif elem_type == GGUFValueType.FLOAT64:
            f.write(struct.pack('<d', float(value)))
        elif elem_type == GGUFValueType.BOOL:
            f.write(struct.pack('<B', 1 if value else 0))
        else:
            raise ValueError(f"Unsupported array element type: {elem_type}")
    
    def _write_metadata_value(self, f, value: Any, value_type: int):
        f.write(struct.pack('<I', value_type))
        
        if value_type == GGUFValueType.STRING:
            self._write_string(f, str(value))
        elif value_type == GGUFValueType.UINT32:
            f.write(struct.pack('<I', int(value)))
        elif value_type == GGUFValueType.INT32:
            f.write(struct.pack('<i', int(value)))
        elif value_type == GGUFValueType.UINT64:
            f.write(struct.pack('<Q', int(value)))
        elif value_type == GGUFValueType.INT64:
            f.write(struct.pack('<q', int(value)))
        elif value_type == GGUFValueType.FLOAT32:
            f.write(struct.pack('<f', float(value)))
        elif value_type == GGUFValueType.FLOAT64:
            f.write(struct.pack('<d', float(value)))
        elif value_type == GGUFValueType.BOOL:
            f.write(struct.pack('<B', 1 if value else 0))
        elif value_type == GGUFValueType.ARRAY:
            if not isinstance(value, (list, tuple)):
                raise ValueError(f"Array value must be list or tuple, got {type(value)}")
            
            if len(value) == 0:
                f.write(struct.pack('<I', GGUFValueType.STRING))
                f.write(struct.pack('<Q', 0))
            else:
                first_elem = value[0]
                if isinstance(first_elem, str):
                    elem_type = GGUFValueType.STRING
                elif isinstance(first_elem, bool):
                    elem_type = GGUFValueType.BOOL
                elif isinstance(first_elem, float):
                    elem_type = GGUFValueType.FLOAT32
                elif isinstance(first_elem, int):
                    if all(-2147483648 <= x <= 2147483647 for x in value if isinstance(x, int)):
                        elem_type = GGUFValueType.INT32
                    else:
                        elem_type = GGUFValueType.INT64
                else:
                    elem_type = GGUFValueType.STRING
                
                f.write(struct.pack('<I', elem_type))
                f.write(struct.pack('<Q', len(value)))
                
                for item in value:
                    self._write_array_value(f, item, elem_type)
        else:
            raise ValueError(f"Unsupported metadata value type: {value_type}")
    
    def write(self):
        try:
            with open(self.path, 'wb') as f:
                f.write(struct.pack('<I', GGUF_MAGIC))
                f.write(struct.pack('<I', GGUF_VERSION))
                
                f.write(struct.pack('<Q', len(self.tensors)))
                f.write(struct.pack('<Q', len(self.metadata)))
                
                for key, (value, value_type) in self.metadata.items():
                    self._write_string(f, key)
                    self._write_metadata_value(f, value, value_type)
                
                current_offset = 0
                for tensor_info in self.tensors:
                    tensor_name = tensor_info['name']
                    tensor_data = self.tensor_data[tensor_name]
                    tensor_size = tensor_data.nbytes
                    
                    tensor_info['offset'] = current_offset
                    current_offset += tensor_size
                    
                    current_offset = self._align_to(current_offset, self.data_alignment)
                
                for tensor_info in self.tensors:
                    self._write_string(f, tensor_info['name'])
                    
                    f.write(struct.pack('<I', len(tensor_info['shape'])))
                    
                    for dim in tensor_info['shape']:
                        f.write(struct.pack('<Q', int(dim)))
                    
                    f.write(struct.pack('<I', tensor_info['type']))
                    f.write(struct.pack('<Q', tensor_info['offset']))
                
                current_pos = f.tell()
                aligned_pos = self._align_to(current_pos, self.data_alignment)
                if aligned_pos > current_pos:
                    f.write(b'\x00' * (aligned_pos - current_pos))
                
                for tensor_info in self.tensors:
                    tensor_name = tensor_info['name']
                    tensor_data = self.tensor_data[tensor_name]
                    
                    f.write(tensor_data.tobytes())
                    
                    current_pos = f.tell()
                    aligned_pos = self._align_to(current_pos, self.data_alignment)
                    if aligned_pos > current_pos:
                        f.write(b'\x00' * (aligned_pos - current_pos))
                        
        except Exception as e:
            raise Exception(f"Error writing GGUF file: {e}")

922CA

922 RSG - CA org 24 days ago

Hello, apologies for the really late reply.

From experience, usually updating gguf library fixes the errors I've had but not sure for this case. Also can try re-exporting since maybe something went wrong with the process (although the issue could be with the lib).

To be honest might be best to ask on the text-generation-webui or llama cpp github if you haven't already.

Again, sorry for the really late reply. Hopefully you were able to solve or find a way around it by now

Upload images, audio, and videos by dragging in the text input, pasting, or clicking here.

Tap or paste here to upload images

· Sign up or log in to comment