huihui-ai
/

dots.llm1.inst

Text Generation

Model card Files Files and versions Community

dots.llm1.inst / modular_dots1.py

huihui-ai's picture

Upload 4 files

5643ecf verified 4 days ago

history blame contribute delete

2.62 kB

	from ...modeling_outputs import CausalLMOutputWithPast
	from ...processing_utils import Unpack
	from ...utils import logging
	from ..deepseek_v3.modeling_deepseek_v3 import (
	DeepseekV3DecoderLayer,
	DeepseekV3MLP,
	DeepseekV3MoE,
	DeepseekV3PreTrainedModel,
	DeepseekV3TopkRouter,
	)
	from ..llama.modeling_llama import (
	KwargsForCausalLM,
	LlamaRMSNorm,
	)
	from ..qwen3.modeling_qwen3 import Qwen3Attention, Qwen3ForCausalLM, Qwen3Model, Qwen3RotaryEmbedding
	from .configuration_dots1 import Dots1Config


	logger = logging.get_logger(__name__)


	class Dots1RMSNorm(LlamaRMSNorm):
	pass


	class Dots1RotaryEmbedding(Qwen3RotaryEmbedding):
	pass


	class Dots1Attention(Qwen3Attention):
	pass


	class Dots1MLP(DeepseekV3MLP):
	pass


	class Dots1MoE(DeepseekV3MoE):
	pass


	class Dots1TopkRouter(DeepseekV3TopkRouter):
	pass


	class Dots1DecoderLayer(DeepseekV3DecoderLayer):
	def __init__(self, config: Dots1Config, layer_idx: int):
	super().__init__()
	self.attention_type = config.layer_types[layer_idx]


	class Dots1PreTrainedModel(DeepseekV3PreTrainedModel):
	pass


	class Dots1Model(Qwen3Model):
	pass


	class Dots1ForCausalLM(Qwen3ForCausalLM):
	def forward(
	self,
	**super_kwargs: Unpack[KwargsForCausalLM],
	) -> CausalLMOutputWithPast:
	r"""
	labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, optional):
	Labels for computing the masked language modeling loss. Indices should either be in `[0, ...,
	config.vocab_size]` or -100 (see `input_ids` docstring). Tokens with indices set to `-100` are ignored
	(masked), the loss is only computed for the tokens with labels in `[0, ..., config.vocab_size]`.

	Example:

	```python
	>>> from transformers import AutoTokenizer, Dots1ForCausalLM

	>>> model = Dots1ForCausalLM.from_pretrained("rednote-hilab/dots1.llm1.inst")
	>>> tokenizer = AutoTokenizer.from_pretrained("rednote-hilab/dots1.llm1.inst")

	>>> prompt = "Hey, are you conscious? Can you talk to me?"
	>>> inputs = tokenizer(prompt, return_tensors="pt")

	>>> # Generate
	>>> generate_ids = model.generate(inputs.input_ids, max_length=30)
	>>> tokenizer.batch_decode(generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0]
	"Hey, are you conscious? Can you talk to me?\nI'm not conscious, but I can talk to you."
	```"""
	return super().forward(**super_kwargs)


	__all__ = [
	"Dots1PreTrainedModel",
	"Dots1Model",
	"Dots1ForCausalLM",
	]