Eldar Kurtic commited on
Commit
5dc641c
·
1 Parent(s): cb21039

fix configs

Browse files
configuration_deepseek.py CHANGED
@@ -196,4 +196,4 @@ class DeepseekV3Config(PretrainedConfig):
196
  eos_token_id=eos_token_id,
197
  tie_word_embeddings=tie_word_embeddings,
198
  **kwargs,
199
- )
 
196
  eos_token_id=eos_token_id,
197
  tie_word_embeddings=tie_word_embeddings,
198
  **kwargs,
199
+ )
generation_config.json CHANGED
@@ -2,5 +2,8 @@
2
  "_from_model_config": true,
3
  "bos_token_id": 0,
4
  "eos_token_id": 1,
5
- "transformers_version": "4.52.0.dev0"
 
 
 
6
  }
 
2
  "_from_model_config": true,
3
  "bos_token_id": 0,
4
  "eos_token_id": 1,
5
+ "do_sample": true,
6
+ "temperature": 0.6,
7
+ "top_p": 0.95,
8
+ "transformers_version": "4.46.3"
9
  }
modeling_deepseek.py CHANGED
@@ -1846,4 +1846,3 @@ class DeepseekV3ForSequenceClassification(DeepseekV3PreTrainedModel):
1846
  hidden_states=transformer_outputs.hidden_states,
1847
  attentions=transformer_outputs.attentions,
1848
  )
1849
-
 
1846
  hidden_states=transformer_outputs.hidden_states,
1847
  attentions=transformer_outputs.attentions,
1848
  )
 
tokenizer.json CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ce20a7877bec9454dd611bc4e9116b7db765594f78524bd94edbaab422eddf02
3
- size 9977280
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ecb6f9fc369894346f0511f4074ca75cee5cd5f3b06d02f1ba35fcd39f8e121d
3
+ size 7847602
tokenizer_config.json CHANGED
The diff for this file is too large to render. See raw diff