{ "model_type": "xtts", "architectures": [ "XttsGPT" ], "audio_config": { "fmax": 8000, "fmin": 0, "hop_length": 256, "mel_channels": 80, "mel_norms_file": null, "n_fft": 1024, "output_sample_rate": 24000, "power": 1.0, "sample_rate": 22050, "win_length": 1024 }, "d_vector_dim": 512, "decoder_input_dim": 1024, "num_chars": 255, "duration_const": 102400, "output_hop_length": 256, "input_sample_rate": 22050, "output_sample_rate": 24000, "gpt": { "model_type": "xtts_gpt" }, "gpt_config": { "model_type": "xtts_gpt", "architectures": [ "XttsGPT" ], "vocab_size": 7544, "hidden_size": 1024, "num_hidden_layers": 30, "num_attention_heads": 16, "n_inner": 4096, "number_text_tokens": 7544, "num_audio_tokens": 1026, "max_audio_tokens": 605, "start_audio_token": 1024, "stop_audio_token": 1025, "max_text_tokens": 402, "max_prompt_tokens": 70, "activation_function": "gelu_new", "attn_pdrop": 0.1, "layer_norm_epsilon": 1e-05, "initializer_range": 0.02, "use_masking_gt_prompt_approach": true, "use_perceiver_resampler": true, "kv_cache": true, "enable_redaction": false, "reorder_and_upcast_attn": false, "scale_attn_by_inverse_layer_idx": false, "auto_map": { "AutoConfig": "AstraMindAI/xtts2-gpt--gpt_config.XTTSGPTConfig", "AutoModelForCausalLM": "AstraMindAI/xtts2-gpt--xtts2_gpt_modeling.XttsGPT", "AutoTokenizer": "AstraMindAI/xtts2-gpt--tokenizer.XTTSTokenizerFast" }, "languages": [ "en", "es", "fr", "de", "it", "pt", "pl", "tr", "ru", "nl", "cs", "ar", "zh-cn", "hu", "ko", "ja", "vi" ] }, "gpt_code_stride_len": 1024, "cond_d_vector_in_each_upsampling_layer": true, "auto_map": { "AutoConfig": "AstraMindAI/xtts2--xtts2_config.XTTSConfig", "AutoModelForCausalLM": "AstraMindAI/xtts2--xtts2_modeling.Xtts", "AutoTokenizer": "AstraMindAI/xtts2--tokenizer.XTTSTokenizerFast" }, "languages": [ "en", "es", "fr", "de", "it", "pt", "pl", "tr", "ru", "nl", "cs", "ar", "zh-cn", "hu", "ko", "ja", "vi" ], "tokenizer_file": "", "transformers_version": "4.46.0" }