metadata
license: mit
license_link: https://huggingface.co/rednote-hilab/dots.llm1.inst/blob/main/LICENSE
pipeline_tag: text-generation
base_model: rednote-hilab/dots.llm1.inst
tags:
- chat
library_name: transformers
language:
- en
- zh
huihui-ai/dots.llm1.inst
This version only allows local loading of rednote-hilab/dots.llm1.inst using transformers, with only the local import issue modified and no other changes.
Usage
Copy the four files to the model directory, and then you can use the following program.
import sys
import os
import torch
from transformers import AutoTokenizer, AutoConfig, AutoModel, BitsAndBytesConfig
MODEL_ID = "./rednote-hilab/dots.llm1.inst"
sys.path.append(os.path.abspath(MODEL_ID))
from configuration_dots1 import Dots1Config
from modeling_dots1 import Dots1ForCausalLM
AutoConfig.register("dots1", Dots1Config)
AutoModel.register(Dots1Config, Dots1ForCausalLM)
config = AutoConfig.from_pretrained(MODEL_ID)
print(config)
quant_config_4 = BitsAndBytesConfig(
load_in_4bit=True,
bnb_4bit_compute_dtype=torch.bfloat16,
bnb_4bit_use_double_quant=True,
llm_int8_enable_fp32_cpu_offload=True,
)
model = Dots1ForCausalLM.from_pretrained(
MODEL_ID,
device_map="auto",
trust_remote_code=True,
quantization_config=quant_config_4,
torch_dtype=torch.bfloat16,
low_cpu_mem_usage=True,
)
print(model)
print(model.config)
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True)
text = "An attention function can be described as mapping a query and a set of key-value pairs to an output, where the query, keys, values, and output are all vectors. The output is"
inputs = tokenizer(text, return_tensors="pt")
outputs = model.generate(**inputs.to(model.device), max_new_tokens=100)
result = tokenizer.decode(outputs[0], skip_special_tokens=True)
print(result)