yukihamada commited on
Commit
64d4859
·
verified ·
1 Parent(s): e4e722d

Upload config.json with huggingface_hub

Browse files
Files changed (1) hide show
  1. config.json +48 -0
config.json ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "QwenForCausalLM"
4
+ ],
5
+ "auto_map": {
6
+ "AutoConfig": "configuration_qwen.QwenConfig",
7
+ "AutoModelForCausalLM": "modeling_qwen.QwenLMHeadModel"
8
+ },
9
+ "bf16": false,
10
+ "embd_pdrop": 0.0,
11
+ "fp16": false,
12
+ "fp32": false,
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 18944,
15
+ "kv_channels": 128,
16
+ "layer_norm_epsilon": 1e-06,
17
+ "max_position_embeddings": 32768,
18
+ "model_type": "qwen",
19
+ "num_attention_heads": 28,
20
+ "num_hidden_layers": 40,
21
+ "num_key_value_heads": 4,
22
+ "resid_pdrop": 0.0,
23
+ "rms_norm_eps": 1e-06,
24
+ "rope_scaling": null,
25
+ "rope_theta": 1000000.0,
26
+ "scale_attn_weights": true,
27
+ "tie_word_embeddings": false,
28
+ "torch_dtype": "bfloat16",
29
+ "transformers_version": "4.37.0",
30
+ "use_cache": true,
31
+ "use_flash_attn": "auto",
32
+ "vocab_size": 151936,
33
+ "hidden_size": 3584,
34
+ "character": "Passionate AI development instructor with deep insights",
35
+ "training_samples": 38,
36
+ "quantization": "Q8_0",
37
+ "model_size_gb": 4.3,
38
+ "quality_level": "Extremely High",
39
+ "base_model": "bartowski/Menlo_Jan-nano-GGUF",
40
+ "quality_score": 9.5,
41
+ "improvements_over_iq4xs": [
42
+ "15%+ response quality improvement",
43
+ "Better Japanese nuance understanding",
44
+ "More consistent character personality",
45
+ "Enhanced technical knowledge retention",
46
+ "Improved logical reasoning capabilities"
47
+ ]
48
+ }