from transformers import PretrainedConfig class SpeechUnitConfig(PretrainedConfig): model_type = "speechunit" def __init__( self, base_model_id: str = "meta-llama/Llama-3.2-1B", num_hidden_layers: int = 3, output_dim: int = 2048, num_heads: int = 8, initializer_range: float = 0.02, **kwargs, ): self.base_model_id = base_model_id self.num_hidden_layers = num_hidden_layers self.output_dim = output_dim self.num_heads = num_heads self.initializer_range = initializer_range super().__init__(**kwargs)