huihui-ai/dots.llm1.inst

This version only allows local loading of rednote-hilab/dots.llm1.inst using transformers, with only the local import issue modified and no other changes.

Usage

Copy the four files to the model directory, and then you can use the following program.

import sys
import os
import torch
from transformers import AutoTokenizer, AutoConfig, AutoModel, BitsAndBytesConfig

MODEL_ID = "./rednote-hilab/dots.llm1.inst"

sys.path.append(os.path.abspath(MODEL_ID))

from configuration_dots1 import Dots1Config
from modeling_dots1 import Dots1ForCausalLM

AutoConfig.register("dots1", Dots1Config)
AutoModel.register(Dots1Config, Dots1ForCausalLM)

config = AutoConfig.from_pretrained(MODEL_ID)
print(config)

quant_config_4 = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_compute_dtype=torch.bfloat16,
    bnb_4bit_use_double_quant=True,
    llm_int8_enable_fp32_cpu_offload=True,
)

model = Dots1ForCausalLM.from_pretrained(
    MODEL_ID,
    device_map="auto",
    trust_remote_code=True,
    quantization_config=quant_config_4,
    torch_dtype=torch.bfloat16,
    low_cpu_mem_usage=True,
)

print(model)
print(model.config)

tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True)

text = "An attention function can be described as mapping a query and a set of key-value pairs to an output, where the query, keys, values, and output are all vectors. The output is"
inputs = tokenizer(text, return_tensors="pt")
outputs = model.generate(**inputs.to(model.device), max_new_tokens=100)
result = tokenizer.decode(outputs[0], skip_special_tokens=True)
print(result)

huihui-ai
/

dots.llm1.inst

huihui-ai/dots.llm1.inst

Usage

Model tree for huihui-ai/dots.llm1.inst