yujiepan commited on
Commit
f746b7c
·
verified ·
1 Parent(s): 242145c

Upload folder using huggingface_hub

Browse files
Files changed (2) hide show
  1. README.md +3 -0
  2. config.json +1 -1
README.md CHANGED
@@ -69,6 +69,7 @@ config.text_config.head_dim = 32
69
  config.text_config.num_attention_heads = 1
70
  config.text_config.num_key_value_heads = 1
71
  config.text_config.num_hidden_layers = 2
 
72
  config.vision_config.hidden_size = 32
73
  config.vision_config.num_hidden_layers = 2
74
  config.vision_config.num_attention_heads = 1
@@ -76,6 +77,8 @@ config.vision_config.intermediate_size = 128
76
  model = Gemma3ForConditionalGeneration(
77
  config,
78
  ).to(torch.bfloat16)
 
 
79
  model.generation_config = GenerationConfig.from_pretrained(
80
  source_model_id, trust_remote_code=True,
81
  )
 
69
  config.text_config.num_attention_heads = 1
70
  config.text_config.num_key_value_heads = 1
71
  config.text_config.num_hidden_layers = 2
72
+ config.text_config.sliding_window_pattern = 2
73
  config.vision_config.hidden_size = 32
74
  config.vision_config.num_hidden_layers = 2
75
  config.vision_config.num_attention_heads = 1
 
77
  model = Gemma3ForConditionalGeneration(
78
  config,
79
  ).to(torch.bfloat16)
80
+ for layer in model.language_model.model.layers:
81
+ print(layer.is_sliding)
82
  model.generation_config = GenerationConfig.from_pretrained(
83
  source_model_id, trust_remote_code=True,
84
  )
config.json CHANGED
@@ -37,7 +37,7 @@
37
  },
38
  "rope_theta": 1000000.0,
39
  "sliding_window": 1024,
40
- "sliding_window_pattern": 6,
41
  "use_cache": true,
42
  "vocab_size": 262208
43
  },
 
37
  },
38
  "rope_theta": 1000000.0,
39
  "sliding_window": 1024,
40
+ "sliding_window_pattern": 2,
41
  "use_cache": true,
42
  "vocab_size": 262208
43
  },