Upload folder using huggingface_hub
Browse files- 1_Pooling/config.json +10 -0
- README.md +64 -0
- checkpoint-11543/1_Pooling/config.json +10 -0
- checkpoint-11543/README.md +988 -0
- checkpoint-11543/config.json +47 -0
- checkpoint-11543/config_sentence_transformers.json +10 -0
- checkpoint-11543/model.safetensors +3 -0
- checkpoint-11543/modules.json +14 -0
- checkpoint-11543/optimizer.pt +3 -0
- checkpoint-11543/rng_state.pth +3 -0
- checkpoint-11543/scheduler.pt +3 -0
- checkpoint-11543/sentence_bert_config.json +4 -0
- checkpoint-11543/special_tokens_map.json +37 -0
- checkpoint-11543/tokenizer.json +0 -0
- checkpoint-11543/tokenizer_config.json +945 -0
- checkpoint-11543/trainer_state.json +3278 -0
- checkpoint-11543/training_args.bin +3 -0
- config.json +47 -0
- config_sentence_transformers.json +10 -0
- model.safetensors +3 -0
- modules.json +14 -0
- runs/Feb20_14-59-58_r-m7n-autotrain-advanced-kyhs0yea-70f9a-nry3q/events.out.tfevents.1740063604.r-m7n-autotrain-advanced-kyhs0yea-70f9a-nry3q.102.0 +2 -2
- runs/Feb20_14-59-58_r-m7n-autotrain-advanced-kyhs0yea-70f9a-nry3q/events.out.tfevents.1740065597.r-m7n-autotrain-advanced-kyhs0yea-70f9a-nry3q.102.1 +3 -0
- sentence_bert_config.json +4 -0
- special_tokens_map.json +37 -0
- tokenizer.json +0 -0
- tokenizer_config.json +945 -0
- training_args.bin +3 -0
- training_params.json +33 -0
1_Pooling/config.json
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"word_embedding_dimension": 768,
|
3 |
+
"pooling_mode_cls_token": true,
|
4 |
+
"pooling_mode_mean_tokens": false,
|
5 |
+
"pooling_mode_max_tokens": false,
|
6 |
+
"pooling_mode_mean_sqrt_len_tokens": false,
|
7 |
+
"pooling_mode_weightedmean_tokens": false,
|
8 |
+
"pooling_mode_lasttoken": false,
|
9 |
+
"include_prompt": true
|
10 |
+
}
|
README.md
ADDED
@@ -0,0 +1,64 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
---
|
3 |
+
library_name: sentence-transformers
|
4 |
+
tags:
|
5 |
+
- sentence-transformers
|
6 |
+
- sentence-similarity
|
7 |
+
- feature-extraction
|
8 |
+
- autotrain
|
9 |
+
base_model: Alibaba-NLP/gte-modernbert-base
|
10 |
+
widget:
|
11 |
+
- source_sentence: 'search_query: i love autotrain'
|
12 |
+
sentences:
|
13 |
+
- 'search_query: huggingface auto train'
|
14 |
+
- 'search_query: hugging face auto train'
|
15 |
+
- 'search_query: i love autotrain'
|
16 |
+
pipeline_tag: sentence-similarity
|
17 |
+
---
|
18 |
+
|
19 |
+
# Model Trained Using AutoTrain
|
20 |
+
|
21 |
+
- Problem type: Sentence Transformers
|
22 |
+
|
23 |
+
## Validation Metrics
|
24 |
+
loss: 0.4268312156200409
|
25 |
+
|
26 |
+
cosine_accuracy: 0.9693415637860082
|
27 |
+
|
28 |
+
runtime: 40.9588
|
29 |
+
|
30 |
+
samples_per_second: 118.656
|
31 |
+
|
32 |
+
steps_per_second: 7.422
|
33 |
+
|
34 |
+
: 1.0
|
35 |
+
|
36 |
+
## Usage
|
37 |
+
|
38 |
+
### Direct Usage (Sentence Transformers)
|
39 |
+
|
40 |
+
First install the Sentence Transformers library:
|
41 |
+
|
42 |
+
```bash
|
43 |
+
pip install -U sentence-transformers
|
44 |
+
```
|
45 |
+
|
46 |
+
Then you can load this model and run inference.
|
47 |
+
```python
|
48 |
+
from sentence_transformers import SentenceTransformer
|
49 |
+
|
50 |
+
# Download from the Hugging Face Hub
|
51 |
+
model = SentenceTransformer("sentence_transformers_model_id")
|
52 |
+
# Run inference
|
53 |
+
sentences = [
|
54 |
+
'search_query: autotrain',
|
55 |
+
'search_query: auto train',
|
56 |
+
'search_query: i love autotrain',
|
57 |
+
]
|
58 |
+
embeddings = model.encode(sentences)
|
59 |
+
print(embeddings.shape)
|
60 |
+
|
61 |
+
# Get the similarity scores for the embeddings
|
62 |
+
similarities = model.similarity(embeddings, embeddings)
|
63 |
+
print(similarities.shape)
|
64 |
+
```
|
checkpoint-11543/1_Pooling/config.json
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"word_embedding_dimension": 768,
|
3 |
+
"pooling_mode_cls_token": true,
|
4 |
+
"pooling_mode_mean_tokens": false,
|
5 |
+
"pooling_mode_max_tokens": false,
|
6 |
+
"pooling_mode_mean_sqrt_len_tokens": false,
|
7 |
+
"pooling_mode_weightedmean_tokens": false,
|
8 |
+
"pooling_mode_lasttoken": false,
|
9 |
+
"include_prompt": true
|
10 |
+
}
|
checkpoint-11543/README.md
ADDED
@@ -0,0 +1,988 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
tags:
|
3 |
+
- sentence-transformers
|
4 |
+
- sentence-similarity
|
5 |
+
- feature-extraction
|
6 |
+
- generated_from_trainer
|
7 |
+
- dataset_size:92337
|
8 |
+
- loss:MultipleNegativesRankingLoss
|
9 |
+
base_model: Alibaba-NLP/gte-modernbert-base
|
10 |
+
widget:
|
11 |
+
- source_sentence: 'Philosophy Professor Running for US Senate! Richard Dien Winfield
|
12 |
+
is a philosophy professor at UGA and is a distinguished Hegel scholar. I''m sure
|
13 |
+
many of you are familiar with his many published books on Hegel and his audio
|
14 |
+
lectures, available here: is currently running for US Senate for the state of
|
15 |
+
Georgia and is just beginning his campaign. Check out his campaign launch video:
|
16 |
+
his website: can follow him on Twitter: is advocating for a new social bill of
|
17 |
+
rights. Here is where he is on just some of the issues: Federal Job Guarantee Fair
|
18 |
+
Minimum Wage ($20/hr) Green New Deal Super Medicare for All Legal Care for
|
19 |
+
All It''s great to see a philosophy professor running for office!'
|
20 |
+
sentences:
|
21 |
+
- Context and Context-Dependence
|
22 |
+
- 'There are some contests about this, but: “Atheism” is typically defined in terms
|
23 |
+
of “theism”. Theism, in turn, is best understood as a proposition—something that
|
24 |
+
is either true or false. It is often defined as “the belief that God exists”,
|
25 |
+
but here “belief” means “something believed”. It refers to the propositional content
|
26 |
+
of belief, not to the attitude or psychological state of believing. This is why
|
27 |
+
it makes sense to say that theism is true or false and to argue for or against
|
28 |
+
theism. If, however, “atheism” is defined in terms of theism and theism is the
|
29 |
+
proposition that God exists and not the psychological condition of believing that
|
30 |
+
there is a God, then it follows that atheism is not the absence of the psychological
|
31 |
+
condition of believing that God exists (more on this below). The “a-” in “atheism”
|
32 |
+
must be understood as negation instead of absence, as “not” instead of “without”.
|
33 |
+
Therefore, in philosophy at least, atheism should be construed as the proposition
|
34 |
+
that God does not exist (or, more broadly, the proposition that there are no gods).
|
35 |
+
rf: sometimes you hear the other thing - that atheism is the position wherein
|
36 |
+
some believer "lacks the belief" that there are gods. This position is popular
|
37 |
+
outside of philosophy. There are lots of good reasons to reject this alternative
|
38 |
+
situation, as it tends to confuse other positions. You can read why here: https://old.reddit.com/r/askphilosophy/comments/2za4ez/vacuous_truths_and_shoe_atheism/cuyn8nm/'
|
39 |
+
- 'If we are being honest, he has a rather slim chance to win the primary (the establishmenz
|
40 |
+
seems to have anointed their candidate, and it''s not a bad one), and an uphill
|
41 |
+
general election (last Senate election was R+12). And he doesn''t seem to say
|
42 |
+
the right things in public.... Yeah. Edit: In my view, philosophers (and anyone)
|
43 |
+
should very much think about whether running for office, in this situation, is
|
44 |
+
the right call. Given the issues Winfield wants to push, it seems like funding
|
45 |
+
an NGO or a think tank would be much better suited to forward the cause. Especially
|
46 |
+
since the job guarantee is already authorized (but not mandated) by law, which
|
47 |
+
is a fun quirk. But then again, just looking at the website, you can tell that
|
48 |
+
this campaign isn''t run by professionals? It just sounds like a waste of time
|
49 |
+
and resources. If you ever Plan to run for office, hire me to give you advice
|
50 |
+
(I''ll do it for free if you''re not a Hegelian)'
|
51 |
+
- source_sentence: 'Human embryos are fully human in a biological sense, they''re
|
52 |
+
just at a particular stage of human development (a developing human, as opposed
|
53 |
+
to non-humans becoming human), but maybe that''s what you meant. You''re effectively
|
54 |
+
asking whether there are any pro-life philosophers, and there are. Here are some:
|
55 |
+
Francis J. Beckwith, Stephen D. Schwarz, Christopher Kaczor, Don Marquis, Patrick
|
56 |
+
Lee, and Jack Mulder. Most tend to be Catholics, but none assume or employ religious
|
57 |
+
doctrines in making the case against abortion. I myself happen to be non-religious
|
58 |
+
but pro-life, for example. If you have access to a university library, you can
|
59 |
+
find plenty more authors who''ve written articles effectively defending the view
|
60 |
+
you mentioned. Most of the authors I listed above have written books and articles
|
61 |
+
that are more accessible, though.'
|
62 |
+
sentences:
|
63 |
+
- 'Just to add a few more papers to this: Manninen, B.A. (2007) “Revisiting the
|
64 |
+
Argument from Foetal Potential”. Philosophy, Ethics and Humanities in Medicine.
|
65 |
+
Volume 2, Number 1. Available at: P. George, R.P. (2005) “The Wrong of Abortion”.
|
66 |
+
Contemporary Debates in Applied Ethics, pp.13-26. Cohen, A.I.; Wellman, C.H. (eds.)
|
67 |
+
Malden, MA: Blackwell Pub.'
|
68 |
+
- I myself happen to be non-religious but pro-life I am too lol, we're quite a rare
|
69 |
+
breed. Are any of those authors openly non religious? I'll check some of them
|
70 |
+
out anyway.
|
71 |
+
- That last bit sounds right up the alley of what I'm looking at. Do you happen
|
72 |
+
to know of any papers that elaborate any of these contentions?
|
73 |
+
- source_sentence: Is there something morally wrong with cultural appropriation in
|
74 |
+
the arts? I argue that the little philosophical work on this topic has been overly
|
75 |
+
dismissive of moral objections to cultural appropriation. Nevertheless, I argue
|
76 |
+
that philosophers working on epistemic injustice have developed powerful conceptual
|
77 |
+
tools that can aid in our understanding of objections that have been levied by
|
78 |
+
other scholars and artists. I then consider the relationship between these objections
|
79 |
+
and the harms of cultural essentialism. I argue that focusing on (...) the systematic
|
80 |
+
nature of appropriative harms may allow us to sidestep the problem of essentialism,
|
81 |
+
but not without cost. (shrink)
|
82 |
+
sentences:
|
83 |
+
- Then, I(g, ( A Af -, y) -, (c -,+ (f -, y))) = o ensures that there is a w where
|
84 |
+
I(w, c A AP -+ y) = 1 and I(w, a --+ (f -+ y)) = 0. This gives us x, y where Rwxy,
|
85 |
+
I(x, a) = 1 and I(y, P -+ y) = 0, which in turn means that there are z, t where
|
86 |
+
Ryzt, I(z, P) = 1 and I(t, y) = 0.
|
87 |
+
- Applied Ethics, Misc
|
88 |
+
- Defenses of Toleration
|
89 |
+
- source_sentence: Whereas it may be said that in Britain freedom is regarded, in
|
90 |
+
a sense, as a privilege bestowed from above, from the upper orders of society,
|
91 |
+
in the United States it is regarded as a 'natural right'; an argument not really
|
92 |
+
vitiated by pointing out that the natural rights doctrine is nonsense, for Americans
|
93 |
+
throughout a great part of their history have believed that they do enjoy these
|
94 |
+
rights.
|
95 |
+
sentences:
|
96 |
+
- '. Art is something unique, the spirit is unique in its source. Art is symbolic,
|
97 |
+
since it always bears within itself a symbol, i.e., that which is eternal, and
|
98 |
+
rejects that which is transitory. Art is free, since it arises from inspiration.
|
99 |
+
"16 Bryusov and Berdyaev protested that they could only recognize socialist ideas
|
100 |
+
to the extent that socialism respects the basic principle of their world view:
|
101 |
+
the unconditional independence of the artist. The opinions of the Russian symbolists
|
102 |
+
were a reflection of French Symbolism and Parnassism. At its base is the Romantic
|
103 |
+
theory of art, derived from Novalis and F. Schlegel, according to which the artist
|
104 |
+
is a Brahmin. (At the same time, a trend in Romanticism stressed the social obligations
|
105 |
+
of the artist, e.g., Shelley in Defence of Poetry.) This apolitical tendency in
|
106 |
+
Romanticism was inherited by the representatives of the idea of "art for art''s
|
107 |
+
sake." Baudelaire wrote, in the Hymne: "Que tu viennes du Ciel ou de l''Enfer,
|
108 |
+
qu''importe, o Beauté." Flaubert thought: "Aimonsnous en l''art comme les mystiques
|
109 |
+
s''aiment en Dieu" (CorresponRevolution on the Development of Russian Esthetic
|
110 |
+
Thinking," in Uchonie zapiski L.G.U., S istoricheskikh nauk, 1956, No. 220. Vol.
|
111 |
+
II, 1893, p. 286) . The Goncourts wrote in their Journal in 1886 that only "pure
|
112 |
+
literature" is a matter of life and death. And we are continually coming across
|
113 |
+
statements in their writings concerning the eternality of the "truly beautiful,"
|
114 |
+
the independence of the artist, his superiority and disdain towards his clients
|
115 |
+
and customers. Baudelaire wrote about Poe, and Gautier about Baudelaire, that
|
116 |
+
the glory of the poet is his holding himself aloof from Utopians, philanthropists,
|
117 |
+
socialists, etc.'
|
118 |
+
- Maybe the guy in the room can not be said to understand Chinese but the room,
|
119 |
+
itself, could. That's not to say that the room is conscious, though.
|
120 |
+
- His next major contribution to the literature of educational reform appeared in
|
121 |
+
1845, in a series of articles in The Scotsman, entitled 'National Education and
|
122 |
+
the Common Schools of Massachusetts'. In I847 these came out in pamphlet form
|
123 |
+
as Remarks on National Education4 This was largely a re-statement of the argument
|
124 |
+
advanced in his Edinburgh Review article, prefaced by an assurance to his readers
|
125 |
+
that they had nothing to fear from the Government's being entrusted to run the
|
126 |
+
nation's schools. 'In every free country the state is merely the representative
|
127 |
+
of the general power (physical, moral and intellectual) of the country. It is
|
128 |
+
not a distant and independent being, that can exist and A Note On 'Secular5 Education
|
129 |
+
In The Nineteenth Century In Spite The Will Of Its Members. This was addressed
|
130 |
+
to those who saw in Governmental interference in education the spectre of despotism
|
131 |
+
rearing its head, a not unfounded fear since it was upon the Prussian example
|
132 |
+
that Mann had erected his Massachusetts plan. Society's claim on the individual
|
133 |
+
was not, however, a total one, for 'the individual has a right to unbounded liberty
|
134 |
+
of self-determination as to what he shall learn and what he shall not learn. '2
|
135 |
+
Combe made no attempt to reconcile these two propositions.
|
136 |
+
- source_sentence: I found Kant and the Problem of Metaphysics to be a good intro
|
137 |
+
to his style (assuming you know Kant well)
|
138 |
+
sentences:
|
139 |
+
- I'm going on a hunch here and guessing that your professor was referring to Rorty's
|
140 |
+
pragmatism, in which case this would seek to eliminate, or at least redesign,
|
141 |
+
much of metaphysics and epistemology (primarily) but not philosophy in its entirety.
|
142 |
+
- I would second this recommendation; Polt is a wonderful reader of Heidegger. I
|
143 |
+
think he’s right to say that his Introduction is best read alongside B T.
|
144 |
+
- Good suggestion, I'm currently working on his rather complicated Auseinandersetzung
|
145 |
+
with Kant. It's baffling how insightful and at the same time how rash he can be!
|
146 |
+
;)
|
147 |
+
pipeline_tag: sentence-similarity
|
148 |
+
library_name: sentence-transformers
|
149 |
+
metrics:
|
150 |
+
- cosine_accuracy
|
151 |
+
model-index:
|
152 |
+
- name: SentenceTransformer based on Alibaba-NLP/gte-modernbert-base
|
153 |
+
results:
|
154 |
+
- task:
|
155 |
+
type: triplet
|
156 |
+
name: Triplet
|
157 |
+
dataset:
|
158 |
+
name: Unknown
|
159 |
+
type: unknown
|
160 |
+
metrics:
|
161 |
+
- type: cosine_accuracy
|
162 |
+
value: 0.9693415637860082
|
163 |
+
name: Cosine Accuracy
|
164 |
+
---
|
165 |
+
|
166 |
+
# SentenceTransformer based on Alibaba-NLP/gte-modernbert-base
|
167 |
+
|
168 |
+
This is a [sentence-transformers](https://www.SBERT.net) model finetuned from [Alibaba-NLP/gte-modernbert-base](https://huggingface.co/Alibaba-NLP/gte-modernbert-base). It maps sentences & paragraphs to a 768-dimensional dense vector space and can be used for semantic textual similarity, semantic search, paraphrase mining, text classification, clustering, and more.
|
169 |
+
|
170 |
+
## Model Details
|
171 |
+
|
172 |
+
### Model Description
|
173 |
+
- **Model Type:** Sentence Transformer
|
174 |
+
- **Base model:** [Alibaba-NLP/gte-modernbert-base](https://huggingface.co/Alibaba-NLP/gte-modernbert-base) <!-- at revision bc02f0a92d1b6dd82108036f6cb4b7b423fb7434 -->
|
175 |
+
- **Maximum Sequence Length:** 8192 tokens
|
176 |
+
- **Output Dimensionality:** 768 dimensions
|
177 |
+
- **Similarity Function:** Cosine Similarity
|
178 |
+
<!-- - **Training Dataset:** Unknown -->
|
179 |
+
<!-- - **Language:** Unknown -->
|
180 |
+
<!-- - **License:** Unknown -->
|
181 |
+
|
182 |
+
### Model Sources
|
183 |
+
|
184 |
+
- **Documentation:** [Sentence Transformers Documentation](https://sbert.net)
|
185 |
+
- **Repository:** [Sentence Transformers on GitHub](https://github.com/UKPLab/sentence-transformers)
|
186 |
+
- **Hugging Face:** [Sentence Transformers on Hugging Face](https://huggingface.co/models?library=sentence-transformers)
|
187 |
+
|
188 |
+
### Full Model Architecture
|
189 |
+
|
190 |
+
```
|
191 |
+
SentenceTransformer(
|
192 |
+
(0): Transformer({'max_seq_length': 8192, 'do_lower_case': False}) with Transformer model: ModernBertModel
|
193 |
+
(1): Pooling({'word_embedding_dimension': 768, 'pooling_mode_cls_token': True, 'pooling_mode_mean_tokens': False, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
|
194 |
+
)
|
195 |
+
```
|
196 |
+
|
197 |
+
## Usage
|
198 |
+
|
199 |
+
### Direct Usage (Sentence Transformers)
|
200 |
+
|
201 |
+
First install the Sentence Transformers library:
|
202 |
+
|
203 |
+
```bash
|
204 |
+
pip install -U sentence-transformers
|
205 |
+
```
|
206 |
+
|
207 |
+
Then you can load this model and run inference.
|
208 |
+
```python
|
209 |
+
from sentence_transformers import SentenceTransformer
|
210 |
+
|
211 |
+
# Download from the 🤗 Hub
|
212 |
+
model = SentenceTransformer("sentence_transformers_model_id")
|
213 |
+
# Run inference
|
214 |
+
sentences = [
|
215 |
+
'I found Kant and the Problem of Metaphysics to be a good intro to his style (assuming you know Kant well)',
|
216 |
+
"Good suggestion, I'm currently working on his rather complicated Auseinandersetzung with Kant. It's baffling how insightful and at the same time how rash he can be! ;)",
|
217 |
+
'I would second this recommendation; Polt is a wonderful reader of Heidegger. I think he’s right to say that his Introduction is best read alongside B T.',
|
218 |
+
]
|
219 |
+
embeddings = model.encode(sentences)
|
220 |
+
print(embeddings.shape)
|
221 |
+
# [3, 768]
|
222 |
+
|
223 |
+
# Get the similarity scores for the embeddings
|
224 |
+
similarities = model.similarity(embeddings, embeddings)
|
225 |
+
print(similarities.shape)
|
226 |
+
# [3, 3]
|
227 |
+
```
|
228 |
+
|
229 |
+
<!--
|
230 |
+
### Direct Usage (Transformers)
|
231 |
+
|
232 |
+
<details><summary>Click to see the direct usage in Transformers</summary>
|
233 |
+
|
234 |
+
</details>
|
235 |
+
-->
|
236 |
+
|
237 |
+
<!--
|
238 |
+
### Downstream Usage (Sentence Transformers)
|
239 |
+
|
240 |
+
You can finetune this model on your own dataset.
|
241 |
+
|
242 |
+
<details><summary>Click to expand</summary>
|
243 |
+
|
244 |
+
</details>
|
245 |
+
-->
|
246 |
+
|
247 |
+
<!--
|
248 |
+
### Out-of-Scope Use
|
249 |
+
|
250 |
+
*List how the model may foreseeably be misused and address what users ought not to do with the model.*
|
251 |
+
-->
|
252 |
+
|
253 |
+
## Evaluation
|
254 |
+
|
255 |
+
### Metrics
|
256 |
+
|
257 |
+
#### Triplet
|
258 |
+
|
259 |
+
* Evaluated with [<code>TripletEvaluator</code>](https://sbert.net/docs/package_reference/sentence_transformer/evaluation.html#sentence_transformers.evaluation.TripletEvaluator)
|
260 |
+
|
261 |
+
| Metric | Value |
|
262 |
+
|:--------------------|:-----------|
|
263 |
+
| **cosine_accuracy** | **0.9693** |
|
264 |
+
|
265 |
+
<!--
|
266 |
+
## Bias, Risks and Limitations
|
267 |
+
|
268 |
+
*What are the known or foreseeable issues stemming from this model? You could also flag here known failure cases or weaknesses of the model.*
|
269 |
+
-->
|
270 |
+
|
271 |
+
<!--
|
272 |
+
### Recommendations
|
273 |
+
|
274 |
+
*What are recommendations with respect to the foreseeable issues? For example, filtering explicit content.*
|
275 |
+
-->
|
276 |
+
|
277 |
+
## Training Details
|
278 |
+
|
279 |
+
### Training Dataset
|
280 |
+
|
281 |
+
#### Unnamed Dataset
|
282 |
+
|
283 |
+
|
284 |
+
* Size: 92,337 training samples
|
285 |
+
* Columns: <code>anchor</code>, <code>positive</code>, and <code>negative</code>
|
286 |
+
* Approximate statistics based on the first 1000 samples:
|
287 |
+
| | anchor | positive | negative |
|
288 |
+
|:--------|:--------------------------------------------------------------------------------------|:-------------------------------------------------------------------------------------|:-------------------------------------------------------------------------------------|
|
289 |
+
| type | string | string | string |
|
290 |
+
| details | <ul><li>min: 11 tokens</li><li>mean: 229.59 tokens</li><li>max: 1278 tokens</li></ul> | <ul><li>min: 3 tokens</li><li>mean: 108.47 tokens</li><li>max: 1127 tokens</li></ul> | <ul><li>min: 3 tokens</li><li>mean: 124.73 tokens</li><li>max: 1127 tokens</li></ul> |
|
291 |
+
* Samples:
|
292 |
+
| anchor | positive | negative |
|
293 |
+
|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
|
294 |
+
| <code>This article presents Derrida as a philosopher of history by reinterpreting his De la Grammatologie. In particular, it provides a schematic reconstruction of Part II of that book from the perspective of the problem of history. My account extends work on historicity in Derrida by privileging the themes of ‘history’ and ‘diagram’ in the Rousseau part. I thereby establish a Derridean concept of history which aims at accounting for the continuities and discontinuities of the past. This is in contrast to (...) some criticism that Derrida leaves behind, or inadequately accounts for history. Derrida describes a necessarily contorted condition of relating any historical event or development to itself or to another. This historicity informs other well-known aspects of Derrida's work, like the ‘quasi-transcendental’ terms he developed. I conclude that ‘history’ is a critical element in any understanding of deconstruction, and that deconstruction entails new kinds of history, but that some axioms...</code> | <code>Derrida and Other Philosophers</code> | <code>Derrida: Animals</code> |
|
295 |
+
| <code>It is a struggle against the French policy of oppression, of economic impoverishment, and of social, cultural and political destruction of the Algerian people. Sartre, as a citizen of the oppressive state, chose to say Yes to the violent struggle of the oppressed, and No to the French violence. He explains that the aim of French violence is not only: the keeping of these enslaved men at arm's length; it seeks to dehumanize them. Everything will be done to wipe out their traditions, to substitute our language for theirs and to destroy their culture without giving them ours. Sheer physical fatigue will stupefy them. Starved and ill, if they have any spirit left, fear will finish the job; guns are levelled at the peasant; civilians come to take over his land and force him, by dint of flogging, to till the land for them. If he shows fight, the soldiers fire and he is no longer a man at all.2 Thus, the Algerians were forced to choose between two possibilities: slavery or freedom. They chose...</code> | <code>To avoid any political recognition of the freedom of the oppressed, and of their national independence, the oppressors will appeal to the status quo. Yet the colonialist prefers to evoke possibilities of social improvement because he knows that the demands of the natives are primarily political. And they are primarily political because the natives are aware that 'polities', in the colonies, is quite simply the installation and the regular functioning of an enormous repressive apparatus which alone permits super-exploitation. (CDR note 721) A Response To Hannah Arendt Arendt does not mention Sartre's political approach. It seems that she views the violent struggle of the national liberation movements as an antipolitical act, a destruction of the political realm, which she called the only true public realm. She repeatedly explains that the public realm is created by an exchange of opinions and dialogue between people, and by constituting laws that ensure stability and permanence. Violenc...</code> | <code>system and her [CIO] system diffel only in this respect. Thus, the "arbitrary 'bite' " [Mayo, p. 278], which Mayo finds objectionable in the [CIalt.] system is due to the satisfaction of a condition proposed by Neyman, a condition [CIO] stands in violation of.10 Lastly, on pp. 58-63 of my book, I offer a rebuttal to the objection discussed here, the objection that estimates labeled "best" by N-P standards may be deficient with respect to the legitimate concern to avoid conflicts between confidence levels and known (precise) probabilities. I base the rebuttal on a novel criterion: confidence equivalence. Perhaps others will find that defense adequate to excuse the triviality of (some) N-P "best" procedures. I do not. Nor do I find Mayo's proposals sufficient for the question at hand. 8This is Neyman's condition (ii) (Neyman 1937, p. 267). He uses it to eliminate a candidate estimation system, his #(1), pp. 269-70. 9I have recently discovered that R. von Mises observed this same difficul...</code> |
|
296 |
+
| <code>This paper explores the managerial aspects of the relationship with stakeholders, under the assumption that transfer of knowledge is being made from relationship marketing and market orientation perspectives. These marketing tools may prove useful to manage the relationship with other stakeholders, as has been the case with customers. This study focuses on a sample of Spanish companies representing 43% of listed companies with the largest market capitalization. Given that this is the first time that corporate relationship with stakeholders is analyzed (...) in Spain, a qualitative technique (case analysis) was used. The main conclusion of the study is that most of the participant companies have a reactive position vis-à-vis stakeholders management systems. This attitude is reflected in their concern exclusively about ethical indexes managers. (shrink)</code> | <code>Business Ethics</code> | <code>Specific Freedoms, Misc</code> |
|
297 |
+
* Loss: [<code>MultipleNegativesRankingLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#multiplenegativesrankingloss) with these parameters:
|
298 |
+
```json
|
299 |
+
{
|
300 |
+
"scale": 20.0,
|
301 |
+
"similarity_fct": "cos_sim"
|
302 |
+
}
|
303 |
+
```
|
304 |
+
|
305 |
+
### Evaluation Dataset
|
306 |
+
|
307 |
+
#### Unnamed Dataset
|
308 |
+
|
309 |
+
|
310 |
+
* Size: 4,860 evaluation samples
|
311 |
+
* Columns: <code>anchor</code>, <code>positive</code>, and <code>negative</code>
|
312 |
+
* Approximate statistics based on the first 1000 samples:
|
313 |
+
| | anchor | positive | negative |
|
314 |
+
|:--------|:-------------------------------------------------------------------------------------|:-------------------------------------------------------------------------------------|:-------------------------------------------------------------------------------------|
|
315 |
+
| type | string | string | string |
|
316 |
+
| details | <ul><li>min: 5 tokens</li><li>mean: 234.77 tokens</li><li>max: 1438 tokens</li></ul> | <ul><li>min: 3 tokens</li><li>mean: 123.62 tokens</li><li>max: 8192 tokens</li></ul> | <ul><li>min: 3 tokens</li><li>mean: 121.25 tokens</li><li>max: 1090 tokens</li></ul> |
|
317 |
+
* Samples:
|
318 |
+
| anchor | positive | negative |
|
319 |
+
|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
|
320 |
+
| <code>He next proceeds to definitions of logically necessary propositions and of general propositions. He then examines with some care the notion of an empirical law of nature, and concludes that such a law is neither an entailment proposition nor a collective general proposition. His own account is that an empirical law is a conjunction of a logical statement to the effect that the relation between two ostensive concepts, say 'P' and 'Q,' is one of inclusion-or-overlap (or, in the case of '-Q,' of exclusion-oroverlap), and of a factual statement that as a matter of fact everything is either not a P or else a Q. In spite of the ingenuity of this view two criticisms occur at once. How could the factual statement be known? Even if known, how would it justify the BOOK REVIEW 51 counterfactual statement, "If anything were a P, it would be a Q" In discussing the use of logical propositions in deductive reasoning and definitions, K6rner admits that we commonly make use of excessive entailments, e....</code> | <code>It applies at most to only a subset of beautiful things. K6rner also states an embryonic theory of moral action. He holds that the proposition, "a person, X, states that an action b, which has naturalistic characteristics 'P,' is moral," is equivalent to the proposition " X applies 'P' to b and accepts 'P' practically." To accept a concept practically is to desire an action which satisfies the concept and to desire that everyone else desire like actions in like circumstances. His view here has the advantage of recognizing the implicit universalizability of moral statefnents, but otherwise it seems to be only a slightly modified emotive or attitudinal theory. Thus, for example, he says that a person "believes in" a purely teleological ethics if he practically accepts only teleological concepts and rules. In a final group of four chapters K6rner turns his attention to metaphysical "directives. " These are not propositions but BOOK REVIEW 53 rules. They include in each case a rule for the...</code> | <code>The soul is invisible, because it is that which does the seeing. Its externally given and appearing image is the physical, which may be studied in abstraction as if it were real in and for itself. Philosophy of science is the logical reflection on and of the meaning of existence qua scientifically knowing in correlation with the objects of the sciences. In opposition to irrationalism, for which logic falsifies immediate experience, scientific method criticizes immediate experience for being logically absurd : The sun rises in the East, wanders through the sky, and settles in the West ; this is true in immediate experience, but false in astronomy. The scientific worldview sees the world as object for a knowing subject ; perceptual data are thought in the logical form of concepts related in propositions. The principle of scientific reason assumes a partial identity between the logical forms in the mind and the same logical forms as determining objects of knowledge ; that which makes the ...</code> |
|
321 |
+
| <code>Educating the gaze is easily understood as becoming conscious about what is 'really' happening in the world and becoming aware of the way our gaze is itself bound to a perspective and particular position. However, the paper explores a different idea. It understands educating the gaze not in the sense of 'educare' (teaching) but of 'e-ducere' as leading out, reaching out. E-ducating the gaze is not about getting at a liberated or critical view, but about liberating or displacing our view. (...) It is not about becoming conscious or aware , but about becoming attentive , about paying attention . E-ducating the gaze, then, is not depending on method, but relying on discipline; it does not require a rich methodology, but asks for a poor pedagogy, i.e. for practices which allow to expose ourselves. One example of such practice is that of walking. Consequently e-ducating the gaze could be about an invitation to go walking. This idea is explored b way of a comment on two quotations, one by Wa...</code> | <code>Applied Ethics</code> | <code>Modal and Intensional Logic, Misc</code> |
|
322 |
+
| <code>There is still no consensus. Furthermore, it is also possible that Boltzmann held one view in his meth odology of science and another in what might be called ontology or his theory of nature (Blackmore 1972, 1982). But starting in 1990 a vast amount of previously unsuspected informa tion began to appear for the first time, which may have initially seemed to allow for the possibility that basic agreement on what Boltzmann's real' philosophy was might finally be attained. Ilse M. Fasol-Boltzmann, Boltzmann's granddaughter published a book in that year which included a great deal of philosophical material from him which she had translated from his original shorthand (Fasol-Boltzmann 1990). She also published eighteen of his lectures on natural philosophy plus Boltzmann's notes for them as well as several other philosophical fragments. We have included INTRODUCTION 3 a translation of three lectures plus an analysis of the remaining fifteen lectures in this anthology But the ideas revealed ...</code> | <code>Nevertheless, his apparent opposition to efficient causes makes one wonder just how realistic or practical his apparent or nascent world view was at that time (Fasol-Boltzmann 1990, p. 273). In conclusion, the new material and lectures clearly increase our under standing about Boltzmann the philosopher, methodologist, and mathem atician, even if a few contradictory remarks may also add to our confusion 8 JOHN BLACKMORE in some ways. But all in all the new data has added a great deal toward putting the philosophical perspective of this great man back together again. Like Humpty Dumpty who had a great fall, Boltzmann's philosophical heritage had been broken into many pieces, but all the king's scholars are rejoining enough together to gradually recreate an original and valuable outlook. Soon, many more people will appreciate that Ludwig Boltzmann was a profound if very troubled thinker and that there is a coherent system within the range of ideas which he was considering, even if he coul...</code> | <code>16-24 , and a more popular exposition de l'esprit, . 18-27.</code> |
|
323 |
+
* Loss: [<code>MultipleNegativesRankingLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#multiplenegativesrankingloss) with these parameters:
|
324 |
+
```json
|
325 |
+
{
|
326 |
+
"scale": 20.0,
|
327 |
+
"similarity_fct": "cos_sim"
|
328 |
+
}
|
329 |
+
```
|
330 |
+
|
331 |
+
### Training Hyperparameters
|
332 |
+
#### Non-Default Hyperparameters
|
333 |
+
|
334 |
+
- `eval_strategy`: epoch
|
335 |
+
- `per_device_eval_batch_size`: 16
|
336 |
+
- `learning_rate`: 3e-05
|
337 |
+
- `num_train_epochs`: 1
|
338 |
+
- `warmup_ratio`: 0.1
|
339 |
+
- `fp16`: True
|
340 |
+
- `load_best_model_at_end`: True
|
341 |
+
- `ddp_find_unused_parameters`: False
|
342 |
+
|
343 |
+
#### All Hyperparameters
|
344 |
+
<details><summary>Click to expand</summary>
|
345 |
+
|
346 |
+
- `overwrite_output_dir`: False
|
347 |
+
- `do_predict`: False
|
348 |
+
- `eval_strategy`: epoch
|
349 |
+
- `prediction_loss_only`: True
|
350 |
+
- `per_device_train_batch_size`: 8
|
351 |
+
- `per_device_eval_batch_size`: 16
|
352 |
+
- `per_gpu_train_batch_size`: None
|
353 |
+
- `per_gpu_eval_batch_size`: None
|
354 |
+
- `gradient_accumulation_steps`: 1
|
355 |
+
- `eval_accumulation_steps`: None
|
356 |
+
- `torch_empty_cache_steps`: None
|
357 |
+
- `learning_rate`: 3e-05
|
358 |
+
- `weight_decay`: 0.0
|
359 |
+
- `adam_beta1`: 0.9
|
360 |
+
- `adam_beta2`: 0.999
|
361 |
+
- `adam_epsilon`: 1e-08
|
362 |
+
- `max_grad_norm`: 1.0
|
363 |
+
- `num_train_epochs`: 1
|
364 |
+
- `max_steps`: -1
|
365 |
+
- `lr_scheduler_type`: linear
|
366 |
+
- `lr_scheduler_kwargs`: {}
|
367 |
+
- `warmup_ratio`: 0.1
|
368 |
+
- `warmup_steps`: 0
|
369 |
+
- `log_level`: passive
|
370 |
+
- `log_level_replica`: warning
|
371 |
+
- `log_on_each_node`: True
|
372 |
+
- `logging_nan_inf_filter`: True
|
373 |
+
- `save_safetensors`: True
|
374 |
+
- `save_on_each_node`: False
|
375 |
+
- `save_only_model`: False
|
376 |
+
- `restore_callback_states_from_checkpoint`: False
|
377 |
+
- `no_cuda`: False
|
378 |
+
- `use_cpu`: False
|
379 |
+
- `use_mps_device`: False
|
380 |
+
- `seed`: 42
|
381 |
+
- `data_seed`: None
|
382 |
+
- `jit_mode_eval`: False
|
383 |
+
- `use_ipex`: False
|
384 |
+
- `bf16`: False
|
385 |
+
- `fp16`: True
|
386 |
+
- `fp16_opt_level`: O1
|
387 |
+
- `half_precision_backend`: auto
|
388 |
+
- `bf16_full_eval`: False
|
389 |
+
- `fp16_full_eval`: False
|
390 |
+
- `tf32`: None
|
391 |
+
- `local_rank`: 0
|
392 |
+
- `ddp_backend`: None
|
393 |
+
- `tpu_num_cores`: None
|
394 |
+
- `tpu_metrics_debug`: False
|
395 |
+
- `debug`: []
|
396 |
+
- `dataloader_drop_last`: False
|
397 |
+
- `dataloader_num_workers`: 0
|
398 |
+
- `dataloader_prefetch_factor`: None
|
399 |
+
- `past_index`: -1
|
400 |
+
- `disable_tqdm`: False
|
401 |
+
- `remove_unused_columns`: True
|
402 |
+
- `label_names`: None
|
403 |
+
- `load_best_model_at_end`: True
|
404 |
+
- `ignore_data_skip`: False
|
405 |
+
- `fsdp`: []
|
406 |
+
- `fsdp_min_num_params`: 0
|
407 |
+
- `fsdp_config`: {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}
|
408 |
+
- `fsdp_transformer_layer_cls_to_wrap`: None
|
409 |
+
- `accelerator_config`: {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}
|
410 |
+
- `deepspeed`: None
|
411 |
+
- `label_smoothing_factor`: 0.0
|
412 |
+
- `optim`: adamw_torch
|
413 |
+
- `optim_args`: None
|
414 |
+
- `adafactor`: False
|
415 |
+
- `group_by_length`: False
|
416 |
+
- `length_column_name`: length
|
417 |
+
- `ddp_find_unused_parameters`: False
|
418 |
+
- `ddp_bucket_cap_mb`: None
|
419 |
+
- `ddp_broadcast_buffers`: False
|
420 |
+
- `dataloader_pin_memory`: True
|
421 |
+
- `dataloader_persistent_workers`: False
|
422 |
+
- `skip_memory_metrics`: True
|
423 |
+
- `use_legacy_prediction_loop`: False
|
424 |
+
- `push_to_hub`: False
|
425 |
+
- `resume_from_checkpoint`: None
|
426 |
+
- `hub_model_id`: None
|
427 |
+
- `hub_strategy`: every_save
|
428 |
+
- `hub_private_repo`: None
|
429 |
+
- `hub_always_push`: False
|
430 |
+
- `gradient_checkpointing`: False
|
431 |
+
- `gradient_checkpointing_kwargs`: None
|
432 |
+
- `include_inputs_for_metrics`: False
|
433 |
+
- `include_for_metrics`: []
|
434 |
+
- `eval_do_concat_batches`: True
|
435 |
+
- `fp16_backend`: auto
|
436 |
+
- `push_to_hub_model_id`: None
|
437 |
+
- `push_to_hub_organization`: None
|
438 |
+
- `mp_parameters`:
|
439 |
+
- `auto_find_batch_size`: False
|
440 |
+
- `full_determinism`: False
|
441 |
+
- `torchdynamo`: None
|
442 |
+
- `ray_scope`: last
|
443 |
+
- `ddp_timeout`: 1800
|
444 |
+
- `torch_compile`: False
|
445 |
+
- `torch_compile_backend`: None
|
446 |
+
- `torch_compile_mode`: None
|
447 |
+
- `dispatch_batches`: None
|
448 |
+
- `split_batches`: None
|
449 |
+
- `include_tokens_per_second`: False
|
450 |
+
- `include_num_input_tokens_seen`: False
|
451 |
+
- `neftune_noise_alpha`: None
|
452 |
+
- `optim_target_modules`: None
|
453 |
+
- `batch_eval_metrics`: False
|
454 |
+
- `eval_on_start`: False
|
455 |
+
- `use_liger_kernel`: False
|
456 |
+
- `eval_use_gather_object`: False
|
457 |
+
- `average_tokens_across_devices`: False
|
458 |
+
- `prompts`: None
|
459 |
+
- `batch_sampler`: batch_sampler
|
460 |
+
- `multi_dataset_batch_sampler`: proportional
|
461 |
+
|
462 |
+
</details>
|
463 |
+
|
464 |
+
### Training Logs
|
465 |
+
<details><summary>Click to expand</summary>
|
466 |
+
|
467 |
+
| Epoch | Step | Training Loss | Validation Loss | cosine_accuracy |
|
468 |
+
|:------:|:-----:|:-------------:|:---------------:|:---------------:|
|
469 |
+
| 0.0022 | 25 | 1.2002 | - | - |
|
470 |
+
| 0.0043 | 50 | 1.1623 | - | - |
|
471 |
+
| 0.0065 | 75 | 1.2012 | - | - |
|
472 |
+
| 0.0087 | 100 | 1.1853 | - | - |
|
473 |
+
| 0.0108 | 125 | 0.9767 | - | - |
|
474 |
+
| 0.0130 | 150 | 0.865 | - | - |
|
475 |
+
| 0.0152 | 175 | 0.7733 | - | - |
|
476 |
+
| 0.0173 | 200 | 0.9545 | - | - |
|
477 |
+
| 0.0195 | 225 | 0.8309 | - | - |
|
478 |
+
| 0.0217 | 250 | 0.7514 | - | - |
|
479 |
+
| 0.0238 | 275 | 0.5555 | - | - |
|
480 |
+
| 0.0260 | 300 | 0.563 | - | - |
|
481 |
+
| 0.0282 | 325 | 0.618 | - | - |
|
482 |
+
| 0.0303 | 350 | 0.6538 | - | - |
|
483 |
+
| 0.0325 | 375 | 0.5802 | - | - |
|
484 |
+
| 0.0347 | 400 | 0.6568 | - | - |
|
485 |
+
| 0.0368 | 425 | 0.4934 | - | - |
|
486 |
+
| 0.0390 | 450 | 0.597 | - | - |
|
487 |
+
| 0.0412 | 475 | 0.3812 | - | - |
|
488 |
+
| 0.0433 | 500 | 0.482 | - | - |
|
489 |
+
| 0.0455 | 525 | 0.5347 | - | - |
|
490 |
+
| 0.0476 | 550 | 0.5012 | - | - |
|
491 |
+
| 0.0498 | 575 | 0.5765 | - | - |
|
492 |
+
| 0.0520 | 600 | 0.4286 | - | - |
|
493 |
+
| 0.0541 | 625 | 0.5167 | - | - |
|
494 |
+
| 0.0563 | 650 | 0.4791 | - | - |
|
495 |
+
| 0.0585 | 675 | 0.5022 | - | - |
|
496 |
+
| 0.0606 | 700 | 0.438 | - | - |
|
497 |
+
| 0.0628 | 725 | 0.3995 | - | - |
|
498 |
+
| 0.0650 | 750 | 0.2924 | - | - |
|
499 |
+
| 0.0671 | 775 | 0.4391 | - | - |
|
500 |
+
| 0.0693 | 800 | 0.4328 | - | - |
|
501 |
+
| 0.0715 | 825 | 0.5658 | - | - |
|
502 |
+
| 0.0736 | 850 | 0.4541 | - | - |
|
503 |
+
| 0.0758 | 875 | 0.5381 | - | - |
|
504 |
+
| 0.0780 | 900 | 0.4523 | - | - |
|
505 |
+
| 0.0801 | 925 | 0.3522 | - | - |
|
506 |
+
| 0.0823 | 950 | 0.4475 | - | - |
|
507 |
+
| 0.0845 | 975 | 0.4448 | - | - |
|
508 |
+
| 0.0866 | 1000 | 0.407 | - | - |
|
509 |
+
| 0.0888 | 1025 | 0.4616 | - | - |
|
510 |
+
| 0.0910 | 1050 | 0.4213 | - | - |
|
511 |
+
| 0.0931 | 1075 | 0.465 | - | - |
|
512 |
+
| 0.0953 | 1100 | 0.2964 | - | - |
|
513 |
+
| 0.0975 | 1125 | 0.4414 | - | - |
|
514 |
+
| 0.0996 | 1150 | 0.3508 | - | - |
|
515 |
+
| 0.1018 | 1175 | 0.3362 | - | - |
|
516 |
+
| 0.1040 | 1200 | 0.4953 | - | - |
|
517 |
+
| 0.1061 | 1225 | 0.4041 | - | - |
|
518 |
+
| 0.1083 | 1250 | 0.3773 | - | - |
|
519 |
+
| 0.1105 | 1275 | 0.3574 | - | - |
|
520 |
+
| 0.1126 | 1300 | 0.642 | - | - |
|
521 |
+
| 0.1148 | 1325 | 0.3783 | - | - |
|
522 |
+
| 0.1170 | 1350 | 0.4905 | - | - |
|
523 |
+
| 0.1191 | 1375 | 0.3937 | - | - |
|
524 |
+
| 0.1213 | 1400 | 0.4245 | - | - |
|
525 |
+
| 0.1235 | 1425 | 0.4139 | - | - |
|
526 |
+
| 0.1256 | 1450 | 0.4305 | - | - |
|
527 |
+
| 0.1278 | 1475 | 0.675 | - | - |
|
528 |
+
| 0.1299 | 1500 | 0.55 | - | - |
|
529 |
+
| 0.1321 | 1525 | 0.4033 | - | - |
|
530 |
+
| 0.1343 | 1550 | 0.4167 | - | - |
|
531 |
+
| 0.1364 | 1575 | 0.3814 | - | - |
|
532 |
+
| 0.1386 | 1600 | 0.5183 | - | - |
|
533 |
+
| 0.1408 | 1625 | 0.3343 | - | - |
|
534 |
+
| 0.1429 | 1650 | 0.4212 | - | - |
|
535 |
+
| 0.1451 | 1675 | 0.4737 | - | - |
|
536 |
+
| 0.1473 | 1700 | 0.4563 | - | - |
|
537 |
+
| 0.1494 | 1725 | 0.4251 | - | - |
|
538 |
+
| 0.1516 | 1750 | 0.3497 | - | - |
|
539 |
+
| 0.1538 | 1775 | 0.3753 | - | - |
|
540 |
+
| 0.1559 | 1800 | 0.4031 | - | - |
|
541 |
+
| 0.1581 | 1825 | 0.4037 | - | - |
|
542 |
+
| 0.1603 | 1850 | 0.4114 | - | - |
|
543 |
+
| 0.1624 | 1875 | 0.3848 | - | - |
|
544 |
+
| 0.1646 | 1900 | 0.5088 | - | - |
|
545 |
+
| 0.1668 | 1925 | 0.4032 | - | - |
|
546 |
+
| 0.1689 | 1950 | 0.3354 | - | - |
|
547 |
+
| 0.1711 | 1975 | 0.4163 | - | - |
|
548 |
+
| 0.1733 | 2000 | 0.3715 | - | - |
|
549 |
+
| 0.1754 | 2025 | 0.3424 | - | - |
|
550 |
+
| 0.1776 | 2050 | 0.3311 | - | - |
|
551 |
+
| 0.1798 | 2075 | 0.4362 | - | - |
|
552 |
+
| 0.1819 | 2100 | 0.4441 | - | - |
|
553 |
+
| 0.1841 | 2125 | 0.3122 | - | - |
|
554 |
+
| 0.1863 | 2150 | 0.3717 | - | - |
|
555 |
+
| 0.1884 | 2175 | 0.3461 | - | - |
|
556 |
+
| 0.1906 | 2200 | 0.4816 | - | - |
|
557 |
+
| 0.1928 | 2225 | 0.4784 | - | - |
|
558 |
+
| 0.1949 | 2250 | 0.4334 | - | - |
|
559 |
+
| 0.1971 | 2275 | 0.3437 | - | - |
|
560 |
+
| 0.1993 | 2300 | 0.4333 | - | - |
|
561 |
+
| 0.2014 | 2325 | 0.3609 | - | - |
|
562 |
+
| 0.2036 | 2350 | 0.3437 | - | - |
|
563 |
+
| 0.2058 | 2375 | 0.4911 | - | - |
|
564 |
+
| 0.2079 | 2400 | 0.3872 | - | - |
|
565 |
+
| 0.2101 | 2425 | 0.276 | - | - |
|
566 |
+
| 0.2122 | 2450 | 0.3318 | - | - |
|
567 |
+
| 0.2144 | 2475 | 0.4833 | - | - |
|
568 |
+
| 0.2166 | 2500 | 0.4656 | - | - |
|
569 |
+
| 0.2187 | 2525 | 0.4232 | - | - |
|
570 |
+
| 0.2209 | 2550 | 0.434 | - | - |
|
571 |
+
| 0.2231 | 2575 | 0.2479 | - | - |
|
572 |
+
| 0.2252 | 2600 | 0.4656 | - | - |
|
573 |
+
| 0.2274 | 2625 | 0.3881 | - | - |
|
574 |
+
| 0.2296 | 2650 | 0.3637 | - | - |
|
575 |
+
| 0.2317 | 2675 | 0.3099 | - | - |
|
576 |
+
| 0.2339 | 2700 | 0.3933 | - | - |
|
577 |
+
| 0.2361 | 2725 | 0.3789 | - | - |
|
578 |
+
| 0.2382 | 2750 | 0.4056 | - | - |
|
579 |
+
| 0.2404 | 2775 | 0.4132 | - | - |
|
580 |
+
| 0.2426 | 2800 | 0.375 | - | - |
|
581 |
+
| 0.2447 | 2825 | 0.3026 | - | - |
|
582 |
+
| 0.2469 | 2850 | 0.5372 | - | - |
|
583 |
+
| 0.2491 | 2875 | 0.4233 | - | - |
|
584 |
+
| 0.2512 | 2900 | 0.2945 | - | - |
|
585 |
+
| 0.2534 | 2925 | 0.2916 | - | - |
|
586 |
+
| 0.2556 | 2950 | 0.3536 | - | - |
|
587 |
+
| 0.2577 | 2975 | 0.3246 | - | - |
|
588 |
+
| 0.2599 | 3000 | 0.4236 | - | - |
|
589 |
+
| 0.2621 | 3025 | 0.4088 | - | - |
|
590 |
+
| 0.2642 | 3050 | 0.4522 | - | - |
|
591 |
+
| 0.2664 | 3075 | 0.3445 | - | - |
|
592 |
+
| 0.2686 | 3100 | 0.3575 | - | - |
|
593 |
+
| 0.2707 | 3125 | 0.3809 | - | - |
|
594 |
+
| 0.2729 | 3150 | 0.3364 | - | - |
|
595 |
+
| 0.2751 | 3175 | 0.4103 | - | - |
|
596 |
+
| 0.2772 | 3200 | 0.3502 | - | - |
|
597 |
+
| 0.2794 | 3225 | 0.2632 | - | - |
|
598 |
+
| 0.2816 | 3250 | 0.406 | - | - |
|
599 |
+
| 0.2837 | 3275 | 0.4363 | - | - |
|
600 |
+
| 0.2859 | 3300 | 0.2819 | - | - |
|
601 |
+
| 0.2881 | 3325 | 0.3421 | - | - |
|
602 |
+
| 0.2902 | 3350 | 0.269 | - | - |
|
603 |
+
| 0.2924 | 3375 | 0.2902 | - | - |
|
604 |
+
| 0.2946 | 3400 | 0.3548 | - | - |
|
605 |
+
| 0.2967 | 3425 | 0.4575 | - | - |
|
606 |
+
| 0.2989 | 3450 | 0.3942 | - | - |
|
607 |
+
| 0.3010 | 3475 | 0.3537 | - | - |
|
608 |
+
| 0.3032 | 3500 | 0.3672 | - | - |
|
609 |
+
| 0.3054 | 3525 | 0.3502 | - | - |
|
610 |
+
| 0.3075 | 3550 | 0.2545 | - | - |
|
611 |
+
| 0.3097 | 3575 | 0.2544 | - | - |
|
612 |
+
| 0.3119 | 3600 | 0.3443 | - | - |
|
613 |
+
| 0.3140 | 3625 | 0.3784 | - | - |
|
614 |
+
| 0.3162 | 3650 | 0.3828 | - | - |
|
615 |
+
| 0.3184 | 3675 | 0.4032 | - | - |
|
616 |
+
| 0.3205 | 3700 | 0.2556 | - | - |
|
617 |
+
| 0.3227 | 3725 | 0.3352 | - | - |
|
618 |
+
| 0.3249 | 3750 | 0.4054 | - | - |
|
619 |
+
| 0.3270 | 3775 | 0.3049 | - | - |
|
620 |
+
| 0.3292 | 3800 | 0.2223 | - | - |
|
621 |
+
| 0.3314 | 3825 | 0.4878 | - | - |
|
622 |
+
| 0.3335 | 3850 | 0.3015 | - | - |
|
623 |
+
| 0.3357 | 3875 | 0.3816 | - | - |
|
624 |
+
| 0.3379 | 3900 | 0.3334 | - | - |
|
625 |
+
| 0.3400 | 3925 | 0.3724 | - | - |
|
626 |
+
| 0.3422 | 3950 | 0.4217 | - | - |
|
627 |
+
| 0.3444 | 3975 | 0.4339 | - | - |
|
628 |
+
| 0.3465 | 4000 | 0.3642 | - | - |
|
629 |
+
| 0.3487 | 4025 | 0.3819 | - | - |
|
630 |
+
| 0.3509 | 4050 | 0.2796 | - | - |
|
631 |
+
| 0.3530 | 4075 | 0.4277 | - | - |
|
632 |
+
| 0.3552 | 4100 | 0.3407 | - | - |
|
633 |
+
| 0.3574 | 4125 | 0.2781 | - | - |
|
634 |
+
| 0.3595 | 4150 | 0.4274 | - | - |
|
635 |
+
| 0.3617 | 4175 | 0.3609 | - | - |
|
636 |
+
| 0.3639 | 4200 | 0.3476 | - | - |
|
637 |
+
| 0.3660 | 4225 | 0.41 | - | - |
|
638 |
+
| 0.3682 | 4250 | 0.4003 | - | - |
|
639 |
+
| 0.3704 | 4275 | 0.306 | - | - |
|
640 |
+
| 0.3725 | 4300 | 0.2335 | - | - |
|
641 |
+
| 0.3747 | 4325 | 0.2733 | - | - |
|
642 |
+
| 0.3769 | 4350 | 0.3007 | - | - |
|
643 |
+
| 0.3790 | 4375 | 0.3086 | - | - |
|
644 |
+
| 0.3812 | 4400 | 0.365 | - | - |
|
645 |
+
| 0.3833 | 4425 | 0.3255 | - | - |
|
646 |
+
| 0.3855 | 4450 | 0.3765 | - | - |
|
647 |
+
| 0.3877 | 4475 | 0.2946 | - | - |
|
648 |
+
| 0.3898 | 4500 | 0.3298 | - | - |
|
649 |
+
| 0.3920 | 4525 | 0.3645 | - | - |
|
650 |
+
| 0.3942 | 4550 | 0.2403 | - | - |
|
651 |
+
| 0.3963 | 4575 | 0.28 | - | - |
|
652 |
+
| 0.3985 | 4600 | 0.3814 | - | - |
|
653 |
+
| 0.4007 | 4625 | 0.3419 | - | - |
|
654 |
+
| 0.4028 | 4650 | 0.3374 | - | - |
|
655 |
+
| 0.4050 | 4675 | 0.3511 | - | - |
|
656 |
+
| 0.4072 | 4700 | 0.4339 | - | - |
|
657 |
+
| 0.4093 | 4725 | 0.3441 | - | - |
|
658 |
+
| 0.4115 | 4750 | 0.346 | - | - |
|
659 |
+
| 0.4137 | 4775 | 0.3723 | - | - |
|
660 |
+
| 0.4158 | 4800 | 0.2075 | - | - |
|
661 |
+
| 0.4180 | 4825 | 0.2431 | - | - |
|
662 |
+
| 0.4202 | 4850 | 0.2642 | - | - |
|
663 |
+
| 0.4223 | 4875 | 0.1763 | - | - |
|
664 |
+
| 0.4245 | 4900 | 0.3862 | - | - |
|
665 |
+
| 0.4267 | 4925 | 0.3053 | - | - |
|
666 |
+
| 0.4288 | 4950 | 0.3162 | - | - |
|
667 |
+
| 0.4310 | 4975 | 0.3178 | - | - |
|
668 |
+
| 0.4332 | 5000 | 0.2789 | - | - |
|
669 |
+
| 0.4353 | 5025 | 0.1777 | - | - |
|
670 |
+
| 0.4375 | 5050 | 0.4155 | - | - |
|
671 |
+
| 0.4397 | 5075 | 0.2983 | - | - |
|
672 |
+
| 0.4418 | 5100 | 0.3687 | - | - |
|
673 |
+
| 0.4440 | 5125 | 0.2428 | - | - |
|
674 |
+
| 0.4462 | 5150 | 0.3071 | - | - |
|
675 |
+
| 0.4483 | 5175 | 0.2911 | - | - |
|
676 |
+
| 0.4505 | 5200 | 0.3152 | - | - |
|
677 |
+
| 0.4527 | 5225 | 0.2776 | - | - |
|
678 |
+
| 0.4548 | 5250 | 0.2674 | - | - |
|
679 |
+
| 0.4570 | 5275 | 0.3035 | - | - |
|
680 |
+
| 0.4592 | 5300 | 0.3352 | - | - |
|
681 |
+
| 0.4613 | 5325 | 0.3879 | - | - |
|
682 |
+
| 0.4635 | 5350 | 0.3828 | - | - |
|
683 |
+
| 0.4657 | 5375 | 0.2797 | - | - |
|
684 |
+
| 0.4678 | 5400 | 0.3492 | - | - |
|
685 |
+
| 0.4700 | 5425 | 0.5 | - | - |
|
686 |
+
| 0.4721 | 5450 | 0.2317 | - | - |
|
687 |
+
| 0.4743 | 5475 | 0.2411 | - | - |
|
688 |
+
| 0.4765 | 5500 | 0.277 | - | - |
|
689 |
+
| 0.4786 | 5525 | 0.4112 | - | - |
|
690 |
+
| 0.4808 | 5550 | 0.5116 | - | - |
|
691 |
+
| 0.4830 | 5575 | 0.3264 | - | - |
|
692 |
+
| 0.4851 | 5600 | 0.3688 | - | - |
|
693 |
+
| 0.4873 | 5625 | 0.3224 | - | - |
|
694 |
+
| 0.4895 | 5650 | 0.3778 | - | - |
|
695 |
+
| 0.4916 | 5675 | 0.3671 | - | - |
|
696 |
+
| 0.4938 | 5700 | 0.3331 | - | - |
|
697 |
+
| 0.4960 | 5725 | 0.3426 | - | - |
|
698 |
+
| 0.4981 | 5750 | 0.2863 | - | - |
|
699 |
+
| 0.5003 | 5775 | 0.5822 | - | - |
|
700 |
+
| 0.5025 | 5800 | 0.2687 | - | - |
|
701 |
+
| 0.5046 | 5825 | 0.3365 | - | - |
|
702 |
+
| 0.5068 | 5850 | 0.4609 | - | - |
|
703 |
+
| 0.5090 | 5875 | 0.3127 | - | - |
|
704 |
+
| 0.5111 | 5900 | 0.2705 | - | - |
|
705 |
+
| 0.5133 | 5925 | 0.3089 | - | - |
|
706 |
+
| 0.5155 | 5950 | 0.3386 | - | - |
|
707 |
+
| 0.5176 | 5975 | 0.3796 | - | - |
|
708 |
+
| 0.5198 | 6000 | 0.4231 | - | - |
|
709 |
+
| 0.5220 | 6025 | 0.3922 | - | - |
|
710 |
+
| 0.5241 | 6050 | 0.3138 | - | - |
|
711 |
+
| 0.5263 | 6075 | 0.3106 | - | - |
|
712 |
+
| 0.5285 | 6100 | 0.188 | - | - |
|
713 |
+
| 0.5306 | 6125 | 0.209 | - | - |
|
714 |
+
| 0.5328 | 6150 | 0.2617 | - | - |
|
715 |
+
| 0.5350 | 6175 | 0.3059 | - | - |
|
716 |
+
| 0.5371 | 6200 | 0.2764 | - | - |
|
717 |
+
| 0.5393 | 6225 | 0.2801 | - | - |
|
718 |
+
| 0.5415 | 6250 | 0.3744 | - | - |
|
719 |
+
| 0.5436 | 6275 | 0.3067 | - | - |
|
720 |
+
| 0.5458 | 6300 | 0.3305 | - | - |
|
721 |
+
| 0.5480 | 6325 | 0.2827 | - | - |
|
722 |
+
| 0.5501 | 6350 | 0.2712 | - | - |
|
723 |
+
| 0.5523 | 6375 | 0.2677 | - | - |
|
724 |
+
| 0.5544 | 6400 | 0.4269 | - | - |
|
725 |
+
| 0.5566 | 6425 | 0.3834 | - | - |
|
726 |
+
| 0.5588 | 6450 | 0.4177 | - | - |
|
727 |
+
| 0.5609 | 6475 | 0.2457 | - | - |
|
728 |
+
| 0.5631 | 6500 | 0.348 | - | - |
|
729 |
+
| 0.5653 | 6525 | 0.3035 | - | - |
|
730 |
+
| 0.5674 | 6550 | 0.39 | - | - |
|
731 |
+
| 0.5696 | 6575 | 0.366 | - | - |
|
732 |
+
| 0.5718 | 6600 | 0.2299 | - | - |
|
733 |
+
| 0.5739 | 6625 | 0.1737 | - | - |
|
734 |
+
| 0.5761 | 6650 | 0.3773 | - | - |
|
735 |
+
| 0.5783 | 6675 | 0.3409 | - | - |
|
736 |
+
| 0.5804 | 6700 | 0.1739 | - | - |
|
737 |
+
| 0.5826 | 6725 | 0.3462 | - | - |
|
738 |
+
| 0.5848 | 6750 | 0.2976 | - | - |
|
739 |
+
| 0.5869 | 6775 | 0.3246 | - | - |
|
740 |
+
| 0.5891 | 6800 | 0.3808 | - | - |
|
741 |
+
| 0.5913 | 6825 | 0.2926 | - | - |
|
742 |
+
| 0.5934 | 6850 | 0.2709 | - | - |
|
743 |
+
| 0.5956 | 6875 | 0.3777 | - | - |
|
744 |
+
| 0.5978 | 6900 | 0.2834 | - | - |
|
745 |
+
| 0.5999 | 6925 | 0.2965 | - | - |
|
746 |
+
| 0.6021 | 6950 | 0.2399 | - | - |
|
747 |
+
| 0.6043 | 6975 | 0.2936 | - | - |
|
748 |
+
| 0.6064 | 7000 | 0.2674 | - | - |
|
749 |
+
| 0.6086 | 7025 | 0.265 | - | - |
|
750 |
+
| 0.6108 | 7050 | 0.3257 | - | - |
|
751 |
+
| 0.6129 | 7075 | 0.3504 | - | - |
|
752 |
+
| 0.6151 | 7100 | 0.1485 | - | - |
|
753 |
+
| 0.6173 | 7125 | 0.2598 | - | - |
|
754 |
+
| 0.6194 | 7150 | 0.2838 | - | - |
|
755 |
+
| 0.6216 | 7175 | 0.3391 | - | - |
|
756 |
+
| 0.6238 | 7200 | 0.3568 | - | - |
|
757 |
+
| 0.6259 | 7225 | 0.3001 | - | - |
|
758 |
+
| 0.6281 | 7250 | 0.2613 | - | - |
|
759 |
+
| 0.6303 | 7275 | 0.3379 | - | - |
|
760 |
+
| 0.6324 | 7300 | 0.3347 | - | - |
|
761 |
+
| 0.6346 | 7325 | 0.242 | - | - |
|
762 |
+
| 0.6367 | 7350 | 0.3076 | - | - |
|
763 |
+
| 0.6389 | 7375 | 0.3055 | - | - |
|
764 |
+
| 0.6411 | 7400 | 0.4014 | - | - |
|
765 |
+
| 0.6432 | 7425 | 0.3723 | - | - |
|
766 |
+
| 0.6454 | 7450 | 0.3421 | - | - |
|
767 |
+
| 0.6476 | 7475 | 0.4306 | - | - |
|
768 |
+
| 0.6497 | 7500 | 0.2536 | - | - |
|
769 |
+
| 0.6519 | 7525 | 0.264 | - | - |
|
770 |
+
| 0.6541 | 7550 | 0.1767 | - | - |
|
771 |
+
| 0.6562 | 7575 | 0.259 | - | - |
|
772 |
+
| 0.6584 | 7600 | 0.2761 | - | - |
|
773 |
+
| 0.6606 | 7625 | 0.2934 | - | - |
|
774 |
+
| 0.6627 | 7650 | 0.3055 | - | - |
|
775 |
+
| 0.6649 | 7675 | 0.2532 | - | - |
|
776 |
+
| 0.6671 | 7700 | 0.2942 | - | - |
|
777 |
+
| 0.6692 | 7725 | 0.2048 | - | - |
|
778 |
+
| 0.6714 | 7750 | 0.2884 | - | - |
|
779 |
+
| 0.6736 | 7775 | 0.3598 | - | - |
|
780 |
+
| 0.6757 | 7800 | 0.3318 | - | - |
|
781 |
+
| 0.6779 | 7825 | 0.3058 | - | - |
|
782 |
+
| 0.6801 | 7850 | 0.3395 | - | - |
|
783 |
+
| 0.6822 | 7875 | 0.2973 | - | - |
|
784 |
+
| 0.6844 | 7900 | 0.2741 | - | - |
|
785 |
+
| 0.6866 | 7925 | 0.2493 | - | - |
|
786 |
+
| 0.6887 | 7950 | 0.2966 | - | - |
|
787 |
+
| 0.6909 | 7975 | 0.3207 | - | - |
|
788 |
+
| 0.6931 | 8000 | 0.2501 | - | - |
|
789 |
+
| 0.6952 | 8025 | 0.4028 | - | - |
|
790 |
+
| 0.6974 | 8050 | 0.3549 | - | - |
|
791 |
+
| 0.6996 | 8075 | 0.3805 | - | - |
|
792 |
+
| 0.7017 | 8100 | 0.353 | - | - |
|
793 |
+
| 0.7039 | 8125 | 0.3569 | - | - |
|
794 |
+
| 0.7061 | 8150 | 0.2588 | - | - |
|
795 |
+
| 0.7082 | 8175 | 0.2252 | - | - |
|
796 |
+
| 0.7104 | 8200 | 0.2747 | - | - |
|
797 |
+
| 0.7126 | 8225 | 0.3239 | - | - |
|
798 |
+
| 0.7147 | 8250 | 0.2954 | - | - |
|
799 |
+
| 0.7169 | 8275 | 0.3749 | - | - |
|
800 |
+
| 0.7191 | 8300 | 0.2757 | - | - |
|
801 |
+
| 0.7212 | 8325 | 0.3012 | - | - |
|
802 |
+
| 0.7234 | 8350 | 0.2985 | - | - |
|
803 |
+
| 0.7255 | 8375 | 0.2656 | - | - |
|
804 |
+
| 0.7277 | 8400 | 0.2007 | - | - |
|
805 |
+
| 0.7299 | 8425 | 0.2402 | - | - |
|
806 |
+
| 0.7320 | 8450 | 0.3434 | - | - |
|
807 |
+
| 0.7342 | 8475 | 0.2628 | - | - |
|
808 |
+
| 0.7364 | 8500 | 0.265 | - | - |
|
809 |
+
| 0.7385 | 8525 | 0.3748 | - | - |
|
810 |
+
| 0.7407 | 8550 | 0.249 | - | - |
|
811 |
+
| 0.7429 | 8575 | 0.3375 | - | - |
|
812 |
+
| 0.7450 | 8600 | 0.3336 | - | - |
|
813 |
+
| 0.7472 | 8625 | 0.3702 | - | - |
|
814 |
+
| 0.7494 | 8650 | 0.3494 | - | - |
|
815 |
+
| 0.7515 | 8675 | 0.2996 | - | - |
|
816 |
+
| 0.7537 | 8700 | 0.2433 | - | - |
|
817 |
+
| 0.7559 | 8725 | 0.3027 | - | - |
|
818 |
+
| 0.7580 | 8750 | 0.382 | - | - |
|
819 |
+
| 0.7602 | 8775 | 0.2874 | - | - |
|
820 |
+
| 0.7624 | 8800 | 0.2737 | - | - |
|
821 |
+
| 0.7645 | 8825 | 0.3212 | - | - |
|
822 |
+
| 0.7667 | 8850 | 0.3475 | - | - |
|
823 |
+
| 0.7689 | 8875 | 0.221 | - | - |
|
824 |
+
| 0.7710 | 8900 | 0.2587 | - | - |
|
825 |
+
| 0.7732 | 8925 | 0.2852 | - | - |
|
826 |
+
| 0.7754 | 8950 | 0.3837 | - | - |
|
827 |
+
| 0.7775 | 8975 | 0.2333 | - | - |
|
828 |
+
| 0.7797 | 9000 | 0.3036 | - | - |
|
829 |
+
| 0.7819 | 9025 | 0.3287 | - | - |
|
830 |
+
| 0.7840 | 9050 | 0.3248 | - | - |
|
831 |
+
| 0.7862 | 9075 | 0.2395 | - | - |
|
832 |
+
| 0.7884 | 9100 | 0.2647 | - | - |
|
833 |
+
| 0.7905 | 9125 | 0.3345 | - | - |
|
834 |
+
| 0.7927 | 9150 | 0.3421 | - | - |
|
835 |
+
| 0.7949 | 9175 | 0.3496 | - | - |
|
836 |
+
| 0.7970 | 9200 | 0.253 | - | - |
|
837 |
+
| 0.7992 | 9225 | 0.3462 | - | - |
|
838 |
+
| 0.8014 | 9250 | 0.2688 | - | - |
|
839 |
+
| 0.8035 | 9275 | 0.3301 | - | - |
|
840 |
+
| 0.8057 | 9300 | 0.3382 | - | - |
|
841 |
+
| 0.8078 | 9325 | 0.2219 | - | - |
|
842 |
+
| 0.8100 | 9350 | 0.278 | - | - |
|
843 |
+
| 0.8122 | 9375 | 0.2338 | - | - |
|
844 |
+
| 0.8143 | 9400 | 0.2732 | - | - |
|
845 |
+
| 0.8165 | 9425 | 0.2973 | - | - |
|
846 |
+
| 0.8187 | 9450 | 0.2783 | - | - |
|
847 |
+
| 0.8208 | 9475 | 0.2418 | - | - |
|
848 |
+
| 0.8230 | 9500 | 0.2603 | - | - |
|
849 |
+
| 0.8252 | 9525 | 0.1888 | - | - |
|
850 |
+
| 0.8273 | 9550 | 0.2581 | - | - |
|
851 |
+
| 0.8295 | 9575 | 0.2742 | - | - |
|
852 |
+
| 0.8317 | 9600 | 0.2156 | - | - |
|
853 |
+
| 0.8338 | 9625 | 0.3317 | - | - |
|
854 |
+
| 0.8360 | 9650 | 0.1967 | - | - |
|
855 |
+
| 0.8382 | 9675 | 0.1701 | - | - |
|
856 |
+
| 0.8403 | 9700 | 0.3064 | - | - |
|
857 |
+
| 0.8425 | 9725 | 0.3511 | - | - |
|
858 |
+
| 0.8447 | 9750 | 0.2461 | - | - |
|
859 |
+
| 0.8468 | 9775 | 0.3047 | - | - |
|
860 |
+
| 0.8490 | 9800 | 0.3234 | - | - |
|
861 |
+
| 0.8512 | 9825 | 0.2843 | - | - |
|
862 |
+
| 0.8533 | 9850 | 0.3365 | - | - |
|
863 |
+
| 0.8555 | 9875 | 0.3802 | - | - |
|
864 |
+
| 0.8577 | 9900 | 0.2587 | - | - |
|
865 |
+
| 0.8598 | 9925 | 0.2367 | - | - |
|
866 |
+
| 0.8620 | 9950 | 0.2971 | - | - |
|
867 |
+
| 0.8642 | 9975 | 0.2884 | - | - |
|
868 |
+
| 0.8663 | 10000 | 0.2296 | - | - |
|
869 |
+
| 0.8685 | 10025 | 0.3145 | - | - |
|
870 |
+
| 0.8707 | 10050 | 0.178 | - | - |
|
871 |
+
| 0.8728 | 10075 | 0.2681 | - | - |
|
872 |
+
| 0.8750 | 10100 | 0.3191 | - | - |
|
873 |
+
| 0.8772 | 10125 | 0.2544 | - | - |
|
874 |
+
| 0.8793 | 10150 | 0.2965 | - | - |
|
875 |
+
| 0.8815 | 10175 | 0.317 | - | - |
|
876 |
+
| 0.8837 | 10200 | 0.2149 | - | - |
|
877 |
+
| 0.8858 | 10225 | 0.4876 | - | - |
|
878 |
+
| 0.8880 | 10250 | 0.2984 | - | - |
|
879 |
+
| 0.8901 | 10275 | 0.3024 | - | - |
|
880 |
+
| 0.8923 | 10300 | 0.2447 | - | - |
|
881 |
+
| 0.8945 | 10325 | 0.2684 | - | - |
|
882 |
+
| 0.8966 | 10350 | 0.1714 | - | - |
|
883 |
+
| 0.8988 | 10375 | 0.2776 | - | - |
|
884 |
+
| 0.9010 | 10400 | 0.2745 | - | - |
|
885 |
+
| 0.9031 | 10425 | 0.3299 | - | - |
|
886 |
+
| 0.9053 | 10450 | 0.2629 | - | - |
|
887 |
+
| 0.9075 | 10475 | 0.3627 | - | - |
|
888 |
+
| 0.9096 | 10500 | 0.2236 | - | - |
|
889 |
+
| 0.9118 | 10525 | 0.2819 | - | - |
|
890 |
+
| 0.9140 | 10550 | 0.3129 | - | - |
|
891 |
+
| 0.9161 | 10575 | 0.3051 | - | - |
|
892 |
+
| 0.9183 | 10600 | 0.3955 | - | - |
|
893 |
+
| 0.9205 | 10625 | 0.2493 | - | - |
|
894 |
+
| 0.9226 | 10650 | 0.2543 | - | - |
|
895 |
+
| 0.9248 | 10675 | 0.2222 | - | - |
|
896 |
+
| 0.9270 | 10700 | 0.2823 | - | - |
|
897 |
+
| 0.9291 | 10725 | 0.3098 | - | - |
|
898 |
+
| 0.9313 | 10750 | 0.3009 | - | - |
|
899 |
+
| 0.9335 | 10775 | 0.2623 | - | - |
|
900 |
+
| 0.9356 | 10800 | 0.1952 | - | - |
|
901 |
+
| 0.9378 | 10825 | 0.4527 | - | - |
|
902 |
+
| 0.9400 | 10850 | 0.2323 | - | - |
|
903 |
+
| 0.9421 | 10875 | 0.3109 | - | - |
|
904 |
+
| 0.9443 | 10900 | 0.3335 | - | - |
|
905 |
+
| 0.9465 | 10925 | 0.2862 | - | - |
|
906 |
+
| 0.9486 | 10950 | 0.4005 | - | - |
|
907 |
+
| 0.9508 | 10975 | 0.2815 | - | - |
|
908 |
+
| 0.9530 | 11000 | 0.2157 | - | - |
|
909 |
+
| 0.9551 | 11025 | 0.3733 | - | - |
|
910 |
+
| 0.9573 | 11050 | 0.2843 | - | - |
|
911 |
+
| 0.9595 | 11075 | 0.1963 | - | - |
|
912 |
+
| 0.9616 | 11100 | 0.3081 | - | - |
|
913 |
+
| 0.9638 | 11125 | 0.2317 | - | - |
|
914 |
+
| 0.9660 | 11150 | 0.3027 | - | - |
|
915 |
+
| 0.9681 | 11175 | 0.3581 | - | - |
|
916 |
+
| 0.9703 | 11200 | 0.3 | - | - |
|
917 |
+
| 0.9725 | 11225 | 0.2797 | - | - |
|
918 |
+
| 0.9746 | 11250 | 0.2918 | - | - |
|
919 |
+
| 0.9768 | 11275 | 0.2519 | - | - |
|
920 |
+
| 0.9789 | 11300 | 0.2183 | - | - |
|
921 |
+
| 0.9811 | 11325 | 0.2764 | - | - |
|
922 |
+
| 0.9833 | 11350 | 0.4107 | - | - |
|
923 |
+
| 0.9854 | 11375 | 0.3135 | - | - |
|
924 |
+
| 0.9876 | 11400 | 0.2138 | - | - |
|
925 |
+
| 0.9898 | 11425 | 0.2984 | - | - |
|
926 |
+
| 0.9919 | 11450 | 0.2407 | - | - |
|
927 |
+
| 0.9941 | 11475 | 0.2449 | - | - |
|
928 |
+
| 0.9963 | 11500 | 0.2629 | - | - |
|
929 |
+
| 0.9984 | 11525 | 0.3488 | - | - |
|
930 |
+
| 1.0 | 11543 | - | 0.4268 | 0.9693 |
|
931 |
+
|
932 |
+
</details>
|
933 |
+
|
934 |
+
### Framework Versions
|
935 |
+
- Python: 3.10.16
|
936 |
+
- Sentence Transformers: 3.3.1
|
937 |
+
- Transformers: 4.48.0
|
938 |
+
- PyTorch: 2.4.0
|
939 |
+
- Accelerate: 1.2.1
|
940 |
+
- Datasets: 3.2.0
|
941 |
+
- Tokenizers: 0.21.0
|
942 |
+
|
943 |
+
## Citation
|
944 |
+
|
945 |
+
### BibTeX
|
946 |
+
|
947 |
+
#### Sentence Transformers
|
948 |
+
```bibtex
|
949 |
+
@inproceedings{reimers-2019-sentence-bert,
|
950 |
+
title = "Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks",
|
951 |
+
author = "Reimers, Nils and Gurevych, Iryna",
|
952 |
+
booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing",
|
953 |
+
month = "11",
|
954 |
+
year = "2019",
|
955 |
+
publisher = "Association for Computational Linguistics",
|
956 |
+
url = "https://arxiv.org/abs/1908.10084",
|
957 |
+
}
|
958 |
+
```
|
959 |
+
|
960 |
+
#### MultipleNegativesRankingLoss
|
961 |
+
```bibtex
|
962 |
+
@misc{henderson2017efficient,
|
963 |
+
title={Efficient Natural Language Response Suggestion for Smart Reply},
|
964 |
+
author={Matthew Henderson and Rami Al-Rfou and Brian Strope and Yun-hsuan Sung and Laszlo Lukacs and Ruiqi Guo and Sanjiv Kumar and Balint Miklos and Ray Kurzweil},
|
965 |
+
year={2017},
|
966 |
+
eprint={1705.00652},
|
967 |
+
archivePrefix={arXiv},
|
968 |
+
primaryClass={cs.CL}
|
969 |
+
}
|
970 |
+
```
|
971 |
+
|
972 |
+
<!--
|
973 |
+
## Glossary
|
974 |
+
|
975 |
+
*Clearly define terms in order to be accessible across audiences.*
|
976 |
+
-->
|
977 |
+
|
978 |
+
<!--
|
979 |
+
## Model Card Authors
|
980 |
+
|
981 |
+
*Lists the people who create the model card, providing recognition and accountability for the detailed work that goes into its construction.*
|
982 |
+
-->
|
983 |
+
|
984 |
+
<!--
|
985 |
+
## Model Card Contact
|
986 |
+
|
987 |
+
*Provides a way for people who have updates to the Model Card, suggestions, or questions, to contact the Model Card authors.*
|
988 |
+
-->
|
checkpoint-11543/config.json
ADDED
@@ -0,0 +1,47 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_name_or_path": "Alibaba-NLP/gte-modernbert-base",
|
3 |
+
"architectures": [
|
4 |
+
"ModernBertModel"
|
5 |
+
],
|
6 |
+
"attention_bias": false,
|
7 |
+
"attention_dropout": 0.0,
|
8 |
+
"bos_token_id": 50281,
|
9 |
+
"classifier_activation": "gelu",
|
10 |
+
"classifier_bias": false,
|
11 |
+
"classifier_dropout": 0.0,
|
12 |
+
"classifier_pooling": "mean",
|
13 |
+
"cls_token_id": 50281,
|
14 |
+
"decoder_bias": true,
|
15 |
+
"deterministic_flash_attn": false,
|
16 |
+
"embedding_dropout": 0.0,
|
17 |
+
"eos_token_id": 50282,
|
18 |
+
"global_attn_every_n_layers": 3,
|
19 |
+
"global_rope_theta": 160000.0,
|
20 |
+
"gradient_checkpointing": false,
|
21 |
+
"hidden_activation": "gelu",
|
22 |
+
"hidden_size": 768,
|
23 |
+
"initializer_cutoff_factor": 2.0,
|
24 |
+
"initializer_range": 0.02,
|
25 |
+
"intermediate_size": 1152,
|
26 |
+
"layer_norm_eps": 1e-05,
|
27 |
+
"local_attention": 128,
|
28 |
+
"local_rope_theta": 10000.0,
|
29 |
+
"max_position_embeddings": 8192,
|
30 |
+
"mlp_bias": false,
|
31 |
+
"mlp_dropout": 0.0,
|
32 |
+
"model_type": "modernbert",
|
33 |
+
"norm_bias": false,
|
34 |
+
"norm_eps": 1e-05,
|
35 |
+
"num_attention_heads": 12,
|
36 |
+
"num_hidden_layers": 22,
|
37 |
+
"pad_token_id": 50283,
|
38 |
+
"position_embedding_type": "absolute",
|
39 |
+
"reference_compile": true,
|
40 |
+
"repad_logits_with_grad": false,
|
41 |
+
"sep_token_id": 50282,
|
42 |
+
"sparse_pred_ignore_index": -100,
|
43 |
+
"sparse_prediction": false,
|
44 |
+
"torch_dtype": "float32",
|
45 |
+
"transformers_version": "4.48.0",
|
46 |
+
"vocab_size": 50368
|
47 |
+
}
|
checkpoint-11543/config_sentence_transformers.json
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"__version__": {
|
3 |
+
"sentence_transformers": "3.3.1",
|
4 |
+
"transformers": "4.48.0",
|
5 |
+
"pytorch": "2.4.0"
|
6 |
+
},
|
7 |
+
"prompts": {},
|
8 |
+
"default_prompt_name": null,
|
9 |
+
"similarity_fn_name": "cosine"
|
10 |
+
}
|
checkpoint-11543/model.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:96616f5c428a86aaed4423cdda82e8a7d4becc0682e33fca47374d01cd7df333
|
3 |
+
size 596070136
|
checkpoint-11543/modules.json
ADDED
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[
|
2 |
+
{
|
3 |
+
"idx": 0,
|
4 |
+
"name": "0",
|
5 |
+
"path": "",
|
6 |
+
"type": "sentence_transformers.models.Transformer"
|
7 |
+
},
|
8 |
+
{
|
9 |
+
"idx": 1,
|
10 |
+
"name": "1",
|
11 |
+
"path": "1_Pooling",
|
12 |
+
"type": "sentence_transformers.models.Pooling"
|
13 |
+
}
|
14 |
+
]
|
checkpoint-11543/optimizer.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:07da211967f6be0e79f951282f9074faf49054e411505bc79bb7770f47d4dc86
|
3 |
+
size 1192227066
|
checkpoint-11543/rng_state.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3a32083653aa879d14fe5992c263430b4fa744e9c53bba06661aaf71a629a7e0
|
3 |
+
size 14244
|
checkpoint-11543/scheduler.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:80c3e6b9978ee7a9cc33e3c0c447fc88f0abe81f0665d86d6d894841efc0e05d
|
3 |
+
size 1064
|
checkpoint-11543/sentence_bert_config.json
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"max_seq_length": 8192,
|
3 |
+
"do_lower_case": false
|
4 |
+
}
|
checkpoint-11543/special_tokens_map.json
ADDED
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"cls_token": {
|
3 |
+
"content": "[CLS]",
|
4 |
+
"lstrip": false,
|
5 |
+
"normalized": false,
|
6 |
+
"rstrip": false,
|
7 |
+
"single_word": false
|
8 |
+
},
|
9 |
+
"mask_token": {
|
10 |
+
"content": "[MASK]",
|
11 |
+
"lstrip": true,
|
12 |
+
"normalized": false,
|
13 |
+
"rstrip": false,
|
14 |
+
"single_word": false
|
15 |
+
},
|
16 |
+
"pad_token": {
|
17 |
+
"content": "[PAD]",
|
18 |
+
"lstrip": false,
|
19 |
+
"normalized": false,
|
20 |
+
"rstrip": false,
|
21 |
+
"single_word": false
|
22 |
+
},
|
23 |
+
"sep_token": {
|
24 |
+
"content": "[SEP]",
|
25 |
+
"lstrip": false,
|
26 |
+
"normalized": false,
|
27 |
+
"rstrip": false,
|
28 |
+
"single_word": false
|
29 |
+
},
|
30 |
+
"unk_token": {
|
31 |
+
"content": "[UNK]",
|
32 |
+
"lstrip": false,
|
33 |
+
"normalized": false,
|
34 |
+
"rstrip": false,
|
35 |
+
"single_word": false
|
36 |
+
}
|
37 |
+
}
|
checkpoint-11543/tokenizer.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
checkpoint-11543/tokenizer_config.json
ADDED
@@ -0,0 +1,945 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"added_tokens_decoder": {
|
3 |
+
"0": {
|
4 |
+
"content": "|||IP_ADDRESS|||",
|
5 |
+
"lstrip": false,
|
6 |
+
"normalized": true,
|
7 |
+
"rstrip": false,
|
8 |
+
"single_word": false,
|
9 |
+
"special": false
|
10 |
+
},
|
11 |
+
"1": {
|
12 |
+
"content": "<|padding|>",
|
13 |
+
"lstrip": false,
|
14 |
+
"normalized": false,
|
15 |
+
"rstrip": false,
|
16 |
+
"single_word": false,
|
17 |
+
"special": true
|
18 |
+
},
|
19 |
+
"50254": {
|
20 |
+
"content": " ",
|
21 |
+
"lstrip": false,
|
22 |
+
"normalized": true,
|
23 |
+
"rstrip": false,
|
24 |
+
"single_word": false,
|
25 |
+
"special": false
|
26 |
+
},
|
27 |
+
"50255": {
|
28 |
+
"content": " ",
|
29 |
+
"lstrip": false,
|
30 |
+
"normalized": true,
|
31 |
+
"rstrip": false,
|
32 |
+
"single_word": false,
|
33 |
+
"special": false
|
34 |
+
},
|
35 |
+
"50256": {
|
36 |
+
"content": " ",
|
37 |
+
"lstrip": false,
|
38 |
+
"normalized": true,
|
39 |
+
"rstrip": false,
|
40 |
+
"single_word": false,
|
41 |
+
"special": false
|
42 |
+
},
|
43 |
+
"50257": {
|
44 |
+
"content": " ",
|
45 |
+
"lstrip": false,
|
46 |
+
"normalized": true,
|
47 |
+
"rstrip": false,
|
48 |
+
"single_word": false,
|
49 |
+
"special": false
|
50 |
+
},
|
51 |
+
"50258": {
|
52 |
+
"content": " ",
|
53 |
+
"lstrip": false,
|
54 |
+
"normalized": true,
|
55 |
+
"rstrip": false,
|
56 |
+
"single_word": false,
|
57 |
+
"special": false
|
58 |
+
},
|
59 |
+
"50259": {
|
60 |
+
"content": " ",
|
61 |
+
"lstrip": false,
|
62 |
+
"normalized": true,
|
63 |
+
"rstrip": false,
|
64 |
+
"single_word": false,
|
65 |
+
"special": false
|
66 |
+
},
|
67 |
+
"50260": {
|
68 |
+
"content": " ",
|
69 |
+
"lstrip": false,
|
70 |
+
"normalized": true,
|
71 |
+
"rstrip": false,
|
72 |
+
"single_word": false,
|
73 |
+
"special": false
|
74 |
+
},
|
75 |
+
"50261": {
|
76 |
+
"content": " ",
|
77 |
+
"lstrip": false,
|
78 |
+
"normalized": true,
|
79 |
+
"rstrip": false,
|
80 |
+
"single_word": false,
|
81 |
+
"special": false
|
82 |
+
},
|
83 |
+
"50262": {
|
84 |
+
"content": " ",
|
85 |
+
"lstrip": false,
|
86 |
+
"normalized": true,
|
87 |
+
"rstrip": false,
|
88 |
+
"single_word": false,
|
89 |
+
"special": false
|
90 |
+
},
|
91 |
+
"50263": {
|
92 |
+
"content": " ",
|
93 |
+
"lstrip": false,
|
94 |
+
"normalized": true,
|
95 |
+
"rstrip": false,
|
96 |
+
"single_word": false,
|
97 |
+
"special": false
|
98 |
+
},
|
99 |
+
"50264": {
|
100 |
+
"content": " ",
|
101 |
+
"lstrip": false,
|
102 |
+
"normalized": true,
|
103 |
+
"rstrip": false,
|
104 |
+
"single_word": false,
|
105 |
+
"special": false
|
106 |
+
},
|
107 |
+
"50265": {
|
108 |
+
"content": " ",
|
109 |
+
"lstrip": false,
|
110 |
+
"normalized": true,
|
111 |
+
"rstrip": false,
|
112 |
+
"single_word": false,
|
113 |
+
"special": false
|
114 |
+
},
|
115 |
+
"50266": {
|
116 |
+
"content": " ",
|
117 |
+
"lstrip": false,
|
118 |
+
"normalized": true,
|
119 |
+
"rstrip": false,
|
120 |
+
"single_word": false,
|
121 |
+
"special": false
|
122 |
+
},
|
123 |
+
"50267": {
|
124 |
+
"content": " ",
|
125 |
+
"lstrip": false,
|
126 |
+
"normalized": true,
|
127 |
+
"rstrip": false,
|
128 |
+
"single_word": false,
|
129 |
+
"special": false
|
130 |
+
},
|
131 |
+
"50268": {
|
132 |
+
"content": " ",
|
133 |
+
"lstrip": false,
|
134 |
+
"normalized": true,
|
135 |
+
"rstrip": false,
|
136 |
+
"single_word": false,
|
137 |
+
"special": false
|
138 |
+
},
|
139 |
+
"50269": {
|
140 |
+
"content": " ",
|
141 |
+
"lstrip": false,
|
142 |
+
"normalized": true,
|
143 |
+
"rstrip": false,
|
144 |
+
"single_word": false,
|
145 |
+
"special": false
|
146 |
+
},
|
147 |
+
"50270": {
|
148 |
+
"content": " ",
|
149 |
+
"lstrip": false,
|
150 |
+
"normalized": true,
|
151 |
+
"rstrip": false,
|
152 |
+
"single_word": false,
|
153 |
+
"special": false
|
154 |
+
},
|
155 |
+
"50271": {
|
156 |
+
"content": " ",
|
157 |
+
"lstrip": false,
|
158 |
+
"normalized": true,
|
159 |
+
"rstrip": false,
|
160 |
+
"single_word": false,
|
161 |
+
"special": false
|
162 |
+
},
|
163 |
+
"50272": {
|
164 |
+
"content": " ",
|
165 |
+
"lstrip": false,
|
166 |
+
"normalized": true,
|
167 |
+
"rstrip": false,
|
168 |
+
"single_word": false,
|
169 |
+
"special": false
|
170 |
+
},
|
171 |
+
"50273": {
|
172 |
+
"content": " ",
|
173 |
+
"lstrip": false,
|
174 |
+
"normalized": true,
|
175 |
+
"rstrip": false,
|
176 |
+
"single_word": false,
|
177 |
+
"special": false
|
178 |
+
},
|
179 |
+
"50274": {
|
180 |
+
"content": " ",
|
181 |
+
"lstrip": false,
|
182 |
+
"normalized": true,
|
183 |
+
"rstrip": false,
|
184 |
+
"single_word": false,
|
185 |
+
"special": false
|
186 |
+
},
|
187 |
+
"50275": {
|
188 |
+
"content": " ",
|
189 |
+
"lstrip": false,
|
190 |
+
"normalized": true,
|
191 |
+
"rstrip": false,
|
192 |
+
"single_word": false,
|
193 |
+
"special": false
|
194 |
+
},
|
195 |
+
"50276": {
|
196 |
+
"content": " ",
|
197 |
+
"lstrip": false,
|
198 |
+
"normalized": true,
|
199 |
+
"rstrip": false,
|
200 |
+
"single_word": false,
|
201 |
+
"special": false
|
202 |
+
},
|
203 |
+
"50277": {
|
204 |
+
"content": "|||EMAIL_ADDRESS|||",
|
205 |
+
"lstrip": false,
|
206 |
+
"normalized": true,
|
207 |
+
"rstrip": false,
|
208 |
+
"single_word": false,
|
209 |
+
"special": false
|
210 |
+
},
|
211 |
+
"50278": {
|
212 |
+
"content": "|||PHONE_NUMBER|||",
|
213 |
+
"lstrip": false,
|
214 |
+
"normalized": true,
|
215 |
+
"rstrip": false,
|
216 |
+
"single_word": false,
|
217 |
+
"special": false
|
218 |
+
},
|
219 |
+
"50279": {
|
220 |
+
"content": "<|endoftext|>",
|
221 |
+
"lstrip": false,
|
222 |
+
"normalized": false,
|
223 |
+
"rstrip": false,
|
224 |
+
"single_word": false,
|
225 |
+
"special": true
|
226 |
+
},
|
227 |
+
"50280": {
|
228 |
+
"content": "[UNK]",
|
229 |
+
"lstrip": false,
|
230 |
+
"normalized": false,
|
231 |
+
"rstrip": false,
|
232 |
+
"single_word": false,
|
233 |
+
"special": true
|
234 |
+
},
|
235 |
+
"50281": {
|
236 |
+
"content": "[CLS]",
|
237 |
+
"lstrip": false,
|
238 |
+
"normalized": false,
|
239 |
+
"rstrip": false,
|
240 |
+
"single_word": false,
|
241 |
+
"special": true
|
242 |
+
},
|
243 |
+
"50282": {
|
244 |
+
"content": "[SEP]",
|
245 |
+
"lstrip": false,
|
246 |
+
"normalized": false,
|
247 |
+
"rstrip": false,
|
248 |
+
"single_word": false,
|
249 |
+
"special": true
|
250 |
+
},
|
251 |
+
"50283": {
|
252 |
+
"content": "[PAD]",
|
253 |
+
"lstrip": false,
|
254 |
+
"normalized": false,
|
255 |
+
"rstrip": false,
|
256 |
+
"single_word": false,
|
257 |
+
"special": true
|
258 |
+
},
|
259 |
+
"50284": {
|
260 |
+
"content": "[MASK]",
|
261 |
+
"lstrip": true,
|
262 |
+
"normalized": false,
|
263 |
+
"rstrip": false,
|
264 |
+
"single_word": false,
|
265 |
+
"special": true
|
266 |
+
},
|
267 |
+
"50285": {
|
268 |
+
"content": "[unused0]",
|
269 |
+
"lstrip": false,
|
270 |
+
"normalized": true,
|
271 |
+
"rstrip": false,
|
272 |
+
"single_word": false,
|
273 |
+
"special": false
|
274 |
+
},
|
275 |
+
"50286": {
|
276 |
+
"content": "[unused1]",
|
277 |
+
"lstrip": false,
|
278 |
+
"normalized": true,
|
279 |
+
"rstrip": false,
|
280 |
+
"single_word": false,
|
281 |
+
"special": false
|
282 |
+
},
|
283 |
+
"50287": {
|
284 |
+
"content": "[unused2]",
|
285 |
+
"lstrip": false,
|
286 |
+
"normalized": true,
|
287 |
+
"rstrip": false,
|
288 |
+
"single_word": false,
|
289 |
+
"special": false
|
290 |
+
},
|
291 |
+
"50288": {
|
292 |
+
"content": "[unused3]",
|
293 |
+
"lstrip": false,
|
294 |
+
"normalized": true,
|
295 |
+
"rstrip": false,
|
296 |
+
"single_word": false,
|
297 |
+
"special": false
|
298 |
+
},
|
299 |
+
"50289": {
|
300 |
+
"content": "[unused4]",
|
301 |
+
"lstrip": false,
|
302 |
+
"normalized": true,
|
303 |
+
"rstrip": false,
|
304 |
+
"single_word": false,
|
305 |
+
"special": false
|
306 |
+
},
|
307 |
+
"50290": {
|
308 |
+
"content": "[unused5]",
|
309 |
+
"lstrip": false,
|
310 |
+
"normalized": true,
|
311 |
+
"rstrip": false,
|
312 |
+
"single_word": false,
|
313 |
+
"special": false
|
314 |
+
},
|
315 |
+
"50291": {
|
316 |
+
"content": "[unused6]",
|
317 |
+
"lstrip": false,
|
318 |
+
"normalized": true,
|
319 |
+
"rstrip": false,
|
320 |
+
"single_word": false,
|
321 |
+
"special": false
|
322 |
+
},
|
323 |
+
"50292": {
|
324 |
+
"content": "[unused7]",
|
325 |
+
"lstrip": false,
|
326 |
+
"normalized": true,
|
327 |
+
"rstrip": false,
|
328 |
+
"single_word": false,
|
329 |
+
"special": false
|
330 |
+
},
|
331 |
+
"50293": {
|
332 |
+
"content": "[unused8]",
|
333 |
+
"lstrip": false,
|
334 |
+
"normalized": true,
|
335 |
+
"rstrip": false,
|
336 |
+
"single_word": false,
|
337 |
+
"special": false
|
338 |
+
},
|
339 |
+
"50294": {
|
340 |
+
"content": "[unused9]",
|
341 |
+
"lstrip": false,
|
342 |
+
"normalized": true,
|
343 |
+
"rstrip": false,
|
344 |
+
"single_word": false,
|
345 |
+
"special": false
|
346 |
+
},
|
347 |
+
"50295": {
|
348 |
+
"content": "[unused10]",
|
349 |
+
"lstrip": false,
|
350 |
+
"normalized": true,
|
351 |
+
"rstrip": false,
|
352 |
+
"single_word": false,
|
353 |
+
"special": false
|
354 |
+
},
|
355 |
+
"50296": {
|
356 |
+
"content": "[unused11]",
|
357 |
+
"lstrip": false,
|
358 |
+
"normalized": true,
|
359 |
+
"rstrip": false,
|
360 |
+
"single_word": false,
|
361 |
+
"special": false
|
362 |
+
},
|
363 |
+
"50297": {
|
364 |
+
"content": "[unused12]",
|
365 |
+
"lstrip": false,
|
366 |
+
"normalized": true,
|
367 |
+
"rstrip": false,
|
368 |
+
"single_word": false,
|
369 |
+
"special": false
|
370 |
+
},
|
371 |
+
"50298": {
|
372 |
+
"content": "[unused13]",
|
373 |
+
"lstrip": false,
|
374 |
+
"normalized": true,
|
375 |
+
"rstrip": false,
|
376 |
+
"single_word": false,
|
377 |
+
"special": false
|
378 |
+
},
|
379 |
+
"50299": {
|
380 |
+
"content": "[unused14]",
|
381 |
+
"lstrip": false,
|
382 |
+
"normalized": true,
|
383 |
+
"rstrip": false,
|
384 |
+
"single_word": false,
|
385 |
+
"special": false
|
386 |
+
},
|
387 |
+
"50300": {
|
388 |
+
"content": "[unused15]",
|
389 |
+
"lstrip": false,
|
390 |
+
"normalized": true,
|
391 |
+
"rstrip": false,
|
392 |
+
"single_word": false,
|
393 |
+
"special": false
|
394 |
+
},
|
395 |
+
"50301": {
|
396 |
+
"content": "[unused16]",
|
397 |
+
"lstrip": false,
|
398 |
+
"normalized": true,
|
399 |
+
"rstrip": false,
|
400 |
+
"single_word": false,
|
401 |
+
"special": false
|
402 |
+
},
|
403 |
+
"50302": {
|
404 |
+
"content": "[unused17]",
|
405 |
+
"lstrip": false,
|
406 |
+
"normalized": true,
|
407 |
+
"rstrip": false,
|
408 |
+
"single_word": false,
|
409 |
+
"special": false
|
410 |
+
},
|
411 |
+
"50303": {
|
412 |
+
"content": "[unused18]",
|
413 |
+
"lstrip": false,
|
414 |
+
"normalized": true,
|
415 |
+
"rstrip": false,
|
416 |
+
"single_word": false,
|
417 |
+
"special": false
|
418 |
+
},
|
419 |
+
"50304": {
|
420 |
+
"content": "[unused19]",
|
421 |
+
"lstrip": false,
|
422 |
+
"normalized": true,
|
423 |
+
"rstrip": false,
|
424 |
+
"single_word": false,
|
425 |
+
"special": false
|
426 |
+
},
|
427 |
+
"50305": {
|
428 |
+
"content": "[unused20]",
|
429 |
+
"lstrip": false,
|
430 |
+
"normalized": true,
|
431 |
+
"rstrip": false,
|
432 |
+
"single_word": false,
|
433 |
+
"special": false
|
434 |
+
},
|
435 |
+
"50306": {
|
436 |
+
"content": "[unused21]",
|
437 |
+
"lstrip": false,
|
438 |
+
"normalized": true,
|
439 |
+
"rstrip": false,
|
440 |
+
"single_word": false,
|
441 |
+
"special": false
|
442 |
+
},
|
443 |
+
"50307": {
|
444 |
+
"content": "[unused22]",
|
445 |
+
"lstrip": false,
|
446 |
+
"normalized": true,
|
447 |
+
"rstrip": false,
|
448 |
+
"single_word": false,
|
449 |
+
"special": false
|
450 |
+
},
|
451 |
+
"50308": {
|
452 |
+
"content": "[unused23]",
|
453 |
+
"lstrip": false,
|
454 |
+
"normalized": true,
|
455 |
+
"rstrip": false,
|
456 |
+
"single_word": false,
|
457 |
+
"special": false
|
458 |
+
},
|
459 |
+
"50309": {
|
460 |
+
"content": "[unused24]",
|
461 |
+
"lstrip": false,
|
462 |
+
"normalized": true,
|
463 |
+
"rstrip": false,
|
464 |
+
"single_word": false,
|
465 |
+
"special": false
|
466 |
+
},
|
467 |
+
"50310": {
|
468 |
+
"content": "[unused25]",
|
469 |
+
"lstrip": false,
|
470 |
+
"normalized": true,
|
471 |
+
"rstrip": false,
|
472 |
+
"single_word": false,
|
473 |
+
"special": false
|
474 |
+
},
|
475 |
+
"50311": {
|
476 |
+
"content": "[unused26]",
|
477 |
+
"lstrip": false,
|
478 |
+
"normalized": true,
|
479 |
+
"rstrip": false,
|
480 |
+
"single_word": false,
|
481 |
+
"special": false
|
482 |
+
},
|
483 |
+
"50312": {
|
484 |
+
"content": "[unused27]",
|
485 |
+
"lstrip": false,
|
486 |
+
"normalized": true,
|
487 |
+
"rstrip": false,
|
488 |
+
"single_word": false,
|
489 |
+
"special": false
|
490 |
+
},
|
491 |
+
"50313": {
|
492 |
+
"content": "[unused28]",
|
493 |
+
"lstrip": false,
|
494 |
+
"normalized": true,
|
495 |
+
"rstrip": false,
|
496 |
+
"single_word": false,
|
497 |
+
"special": false
|
498 |
+
},
|
499 |
+
"50314": {
|
500 |
+
"content": "[unused29]",
|
501 |
+
"lstrip": false,
|
502 |
+
"normalized": true,
|
503 |
+
"rstrip": false,
|
504 |
+
"single_word": false,
|
505 |
+
"special": false
|
506 |
+
},
|
507 |
+
"50315": {
|
508 |
+
"content": "[unused30]",
|
509 |
+
"lstrip": false,
|
510 |
+
"normalized": true,
|
511 |
+
"rstrip": false,
|
512 |
+
"single_word": false,
|
513 |
+
"special": false
|
514 |
+
},
|
515 |
+
"50316": {
|
516 |
+
"content": "[unused31]",
|
517 |
+
"lstrip": false,
|
518 |
+
"normalized": true,
|
519 |
+
"rstrip": false,
|
520 |
+
"single_word": false,
|
521 |
+
"special": false
|
522 |
+
},
|
523 |
+
"50317": {
|
524 |
+
"content": "[unused32]",
|
525 |
+
"lstrip": false,
|
526 |
+
"normalized": true,
|
527 |
+
"rstrip": false,
|
528 |
+
"single_word": false,
|
529 |
+
"special": false
|
530 |
+
},
|
531 |
+
"50318": {
|
532 |
+
"content": "[unused33]",
|
533 |
+
"lstrip": false,
|
534 |
+
"normalized": true,
|
535 |
+
"rstrip": false,
|
536 |
+
"single_word": false,
|
537 |
+
"special": false
|
538 |
+
},
|
539 |
+
"50319": {
|
540 |
+
"content": "[unused34]",
|
541 |
+
"lstrip": false,
|
542 |
+
"normalized": true,
|
543 |
+
"rstrip": false,
|
544 |
+
"single_word": false,
|
545 |
+
"special": false
|
546 |
+
},
|
547 |
+
"50320": {
|
548 |
+
"content": "[unused35]",
|
549 |
+
"lstrip": false,
|
550 |
+
"normalized": true,
|
551 |
+
"rstrip": false,
|
552 |
+
"single_word": false,
|
553 |
+
"special": false
|
554 |
+
},
|
555 |
+
"50321": {
|
556 |
+
"content": "[unused36]",
|
557 |
+
"lstrip": false,
|
558 |
+
"normalized": true,
|
559 |
+
"rstrip": false,
|
560 |
+
"single_word": false,
|
561 |
+
"special": false
|
562 |
+
},
|
563 |
+
"50322": {
|
564 |
+
"content": "[unused37]",
|
565 |
+
"lstrip": false,
|
566 |
+
"normalized": true,
|
567 |
+
"rstrip": false,
|
568 |
+
"single_word": false,
|
569 |
+
"special": false
|
570 |
+
},
|
571 |
+
"50323": {
|
572 |
+
"content": "[unused38]",
|
573 |
+
"lstrip": false,
|
574 |
+
"normalized": true,
|
575 |
+
"rstrip": false,
|
576 |
+
"single_word": false,
|
577 |
+
"special": false
|
578 |
+
},
|
579 |
+
"50324": {
|
580 |
+
"content": "[unused39]",
|
581 |
+
"lstrip": false,
|
582 |
+
"normalized": true,
|
583 |
+
"rstrip": false,
|
584 |
+
"single_word": false,
|
585 |
+
"special": false
|
586 |
+
},
|
587 |
+
"50325": {
|
588 |
+
"content": "[unused40]",
|
589 |
+
"lstrip": false,
|
590 |
+
"normalized": true,
|
591 |
+
"rstrip": false,
|
592 |
+
"single_word": false,
|
593 |
+
"special": false
|
594 |
+
},
|
595 |
+
"50326": {
|
596 |
+
"content": "[unused41]",
|
597 |
+
"lstrip": false,
|
598 |
+
"normalized": true,
|
599 |
+
"rstrip": false,
|
600 |
+
"single_word": false,
|
601 |
+
"special": false
|
602 |
+
},
|
603 |
+
"50327": {
|
604 |
+
"content": "[unused42]",
|
605 |
+
"lstrip": false,
|
606 |
+
"normalized": true,
|
607 |
+
"rstrip": false,
|
608 |
+
"single_word": false,
|
609 |
+
"special": false
|
610 |
+
},
|
611 |
+
"50328": {
|
612 |
+
"content": "[unused43]",
|
613 |
+
"lstrip": false,
|
614 |
+
"normalized": true,
|
615 |
+
"rstrip": false,
|
616 |
+
"single_word": false,
|
617 |
+
"special": false
|
618 |
+
},
|
619 |
+
"50329": {
|
620 |
+
"content": "[unused44]",
|
621 |
+
"lstrip": false,
|
622 |
+
"normalized": true,
|
623 |
+
"rstrip": false,
|
624 |
+
"single_word": false,
|
625 |
+
"special": false
|
626 |
+
},
|
627 |
+
"50330": {
|
628 |
+
"content": "[unused45]",
|
629 |
+
"lstrip": false,
|
630 |
+
"normalized": true,
|
631 |
+
"rstrip": false,
|
632 |
+
"single_word": false,
|
633 |
+
"special": false
|
634 |
+
},
|
635 |
+
"50331": {
|
636 |
+
"content": "[unused46]",
|
637 |
+
"lstrip": false,
|
638 |
+
"normalized": true,
|
639 |
+
"rstrip": false,
|
640 |
+
"single_word": false,
|
641 |
+
"special": false
|
642 |
+
},
|
643 |
+
"50332": {
|
644 |
+
"content": "[unused47]",
|
645 |
+
"lstrip": false,
|
646 |
+
"normalized": true,
|
647 |
+
"rstrip": false,
|
648 |
+
"single_word": false,
|
649 |
+
"special": false
|
650 |
+
},
|
651 |
+
"50333": {
|
652 |
+
"content": "[unused48]",
|
653 |
+
"lstrip": false,
|
654 |
+
"normalized": true,
|
655 |
+
"rstrip": false,
|
656 |
+
"single_word": false,
|
657 |
+
"special": false
|
658 |
+
},
|
659 |
+
"50334": {
|
660 |
+
"content": "[unused49]",
|
661 |
+
"lstrip": false,
|
662 |
+
"normalized": true,
|
663 |
+
"rstrip": false,
|
664 |
+
"single_word": false,
|
665 |
+
"special": false
|
666 |
+
},
|
667 |
+
"50335": {
|
668 |
+
"content": "[unused50]",
|
669 |
+
"lstrip": false,
|
670 |
+
"normalized": true,
|
671 |
+
"rstrip": false,
|
672 |
+
"single_word": false,
|
673 |
+
"special": false
|
674 |
+
},
|
675 |
+
"50336": {
|
676 |
+
"content": "[unused51]",
|
677 |
+
"lstrip": false,
|
678 |
+
"normalized": true,
|
679 |
+
"rstrip": false,
|
680 |
+
"single_word": false,
|
681 |
+
"special": false
|
682 |
+
},
|
683 |
+
"50337": {
|
684 |
+
"content": "[unused52]",
|
685 |
+
"lstrip": false,
|
686 |
+
"normalized": true,
|
687 |
+
"rstrip": false,
|
688 |
+
"single_word": false,
|
689 |
+
"special": false
|
690 |
+
},
|
691 |
+
"50338": {
|
692 |
+
"content": "[unused53]",
|
693 |
+
"lstrip": false,
|
694 |
+
"normalized": true,
|
695 |
+
"rstrip": false,
|
696 |
+
"single_word": false,
|
697 |
+
"special": false
|
698 |
+
},
|
699 |
+
"50339": {
|
700 |
+
"content": "[unused54]",
|
701 |
+
"lstrip": false,
|
702 |
+
"normalized": true,
|
703 |
+
"rstrip": false,
|
704 |
+
"single_word": false,
|
705 |
+
"special": false
|
706 |
+
},
|
707 |
+
"50340": {
|
708 |
+
"content": "[unused55]",
|
709 |
+
"lstrip": false,
|
710 |
+
"normalized": true,
|
711 |
+
"rstrip": false,
|
712 |
+
"single_word": false,
|
713 |
+
"special": false
|
714 |
+
},
|
715 |
+
"50341": {
|
716 |
+
"content": "[unused56]",
|
717 |
+
"lstrip": false,
|
718 |
+
"normalized": true,
|
719 |
+
"rstrip": false,
|
720 |
+
"single_word": false,
|
721 |
+
"special": false
|
722 |
+
},
|
723 |
+
"50342": {
|
724 |
+
"content": "[unused57]",
|
725 |
+
"lstrip": false,
|
726 |
+
"normalized": true,
|
727 |
+
"rstrip": false,
|
728 |
+
"single_word": false,
|
729 |
+
"special": false
|
730 |
+
},
|
731 |
+
"50343": {
|
732 |
+
"content": "[unused58]",
|
733 |
+
"lstrip": false,
|
734 |
+
"normalized": true,
|
735 |
+
"rstrip": false,
|
736 |
+
"single_word": false,
|
737 |
+
"special": false
|
738 |
+
},
|
739 |
+
"50344": {
|
740 |
+
"content": "[unused59]",
|
741 |
+
"lstrip": false,
|
742 |
+
"normalized": true,
|
743 |
+
"rstrip": false,
|
744 |
+
"single_word": false,
|
745 |
+
"special": false
|
746 |
+
},
|
747 |
+
"50345": {
|
748 |
+
"content": "[unused60]",
|
749 |
+
"lstrip": false,
|
750 |
+
"normalized": true,
|
751 |
+
"rstrip": false,
|
752 |
+
"single_word": false,
|
753 |
+
"special": false
|
754 |
+
},
|
755 |
+
"50346": {
|
756 |
+
"content": "[unused61]",
|
757 |
+
"lstrip": false,
|
758 |
+
"normalized": true,
|
759 |
+
"rstrip": false,
|
760 |
+
"single_word": false,
|
761 |
+
"special": false
|
762 |
+
},
|
763 |
+
"50347": {
|
764 |
+
"content": "[unused62]",
|
765 |
+
"lstrip": false,
|
766 |
+
"normalized": true,
|
767 |
+
"rstrip": false,
|
768 |
+
"single_word": false,
|
769 |
+
"special": false
|
770 |
+
},
|
771 |
+
"50348": {
|
772 |
+
"content": "[unused63]",
|
773 |
+
"lstrip": false,
|
774 |
+
"normalized": true,
|
775 |
+
"rstrip": false,
|
776 |
+
"single_word": false,
|
777 |
+
"special": false
|
778 |
+
},
|
779 |
+
"50349": {
|
780 |
+
"content": "[unused64]",
|
781 |
+
"lstrip": false,
|
782 |
+
"normalized": true,
|
783 |
+
"rstrip": false,
|
784 |
+
"single_word": false,
|
785 |
+
"special": false
|
786 |
+
},
|
787 |
+
"50350": {
|
788 |
+
"content": "[unused65]",
|
789 |
+
"lstrip": false,
|
790 |
+
"normalized": true,
|
791 |
+
"rstrip": false,
|
792 |
+
"single_word": false,
|
793 |
+
"special": false
|
794 |
+
},
|
795 |
+
"50351": {
|
796 |
+
"content": "[unused66]",
|
797 |
+
"lstrip": false,
|
798 |
+
"normalized": true,
|
799 |
+
"rstrip": false,
|
800 |
+
"single_word": false,
|
801 |
+
"special": false
|
802 |
+
},
|
803 |
+
"50352": {
|
804 |
+
"content": "[unused67]",
|
805 |
+
"lstrip": false,
|
806 |
+
"normalized": true,
|
807 |
+
"rstrip": false,
|
808 |
+
"single_word": false,
|
809 |
+
"special": false
|
810 |
+
},
|
811 |
+
"50353": {
|
812 |
+
"content": "[unused68]",
|
813 |
+
"lstrip": false,
|
814 |
+
"normalized": true,
|
815 |
+
"rstrip": false,
|
816 |
+
"single_word": false,
|
817 |
+
"special": false
|
818 |
+
},
|
819 |
+
"50354": {
|
820 |
+
"content": "[unused69]",
|
821 |
+
"lstrip": false,
|
822 |
+
"normalized": true,
|
823 |
+
"rstrip": false,
|
824 |
+
"single_word": false,
|
825 |
+
"special": false
|
826 |
+
},
|
827 |
+
"50355": {
|
828 |
+
"content": "[unused70]",
|
829 |
+
"lstrip": false,
|
830 |
+
"normalized": true,
|
831 |
+
"rstrip": false,
|
832 |
+
"single_word": false,
|
833 |
+
"special": false
|
834 |
+
},
|
835 |
+
"50356": {
|
836 |
+
"content": "[unused71]",
|
837 |
+
"lstrip": false,
|
838 |
+
"normalized": true,
|
839 |
+
"rstrip": false,
|
840 |
+
"single_word": false,
|
841 |
+
"special": false
|
842 |
+
},
|
843 |
+
"50357": {
|
844 |
+
"content": "[unused72]",
|
845 |
+
"lstrip": false,
|
846 |
+
"normalized": true,
|
847 |
+
"rstrip": false,
|
848 |
+
"single_word": false,
|
849 |
+
"special": false
|
850 |
+
},
|
851 |
+
"50358": {
|
852 |
+
"content": "[unused73]",
|
853 |
+
"lstrip": false,
|
854 |
+
"normalized": true,
|
855 |
+
"rstrip": false,
|
856 |
+
"single_word": false,
|
857 |
+
"special": false
|
858 |
+
},
|
859 |
+
"50359": {
|
860 |
+
"content": "[unused74]",
|
861 |
+
"lstrip": false,
|
862 |
+
"normalized": true,
|
863 |
+
"rstrip": false,
|
864 |
+
"single_word": false,
|
865 |
+
"special": false
|
866 |
+
},
|
867 |
+
"50360": {
|
868 |
+
"content": "[unused75]",
|
869 |
+
"lstrip": false,
|
870 |
+
"normalized": true,
|
871 |
+
"rstrip": false,
|
872 |
+
"single_word": false,
|
873 |
+
"special": false
|
874 |
+
},
|
875 |
+
"50361": {
|
876 |
+
"content": "[unused76]",
|
877 |
+
"lstrip": false,
|
878 |
+
"normalized": true,
|
879 |
+
"rstrip": false,
|
880 |
+
"single_word": false,
|
881 |
+
"special": false
|
882 |
+
},
|
883 |
+
"50362": {
|
884 |
+
"content": "[unused77]",
|
885 |
+
"lstrip": false,
|
886 |
+
"normalized": true,
|
887 |
+
"rstrip": false,
|
888 |
+
"single_word": false,
|
889 |
+
"special": false
|
890 |
+
},
|
891 |
+
"50363": {
|
892 |
+
"content": "[unused78]",
|
893 |
+
"lstrip": false,
|
894 |
+
"normalized": true,
|
895 |
+
"rstrip": false,
|
896 |
+
"single_word": false,
|
897 |
+
"special": false
|
898 |
+
},
|
899 |
+
"50364": {
|
900 |
+
"content": "[unused79]",
|
901 |
+
"lstrip": false,
|
902 |
+
"normalized": true,
|
903 |
+
"rstrip": false,
|
904 |
+
"single_word": false,
|
905 |
+
"special": false
|
906 |
+
},
|
907 |
+
"50365": {
|
908 |
+
"content": "[unused80]",
|
909 |
+
"lstrip": false,
|
910 |
+
"normalized": true,
|
911 |
+
"rstrip": false,
|
912 |
+
"single_word": false,
|
913 |
+
"special": false
|
914 |
+
},
|
915 |
+
"50366": {
|
916 |
+
"content": "[unused81]",
|
917 |
+
"lstrip": false,
|
918 |
+
"normalized": true,
|
919 |
+
"rstrip": false,
|
920 |
+
"single_word": false,
|
921 |
+
"special": false
|
922 |
+
},
|
923 |
+
"50367": {
|
924 |
+
"content": "[unused82]",
|
925 |
+
"lstrip": false,
|
926 |
+
"normalized": true,
|
927 |
+
"rstrip": false,
|
928 |
+
"single_word": false,
|
929 |
+
"special": false
|
930 |
+
}
|
931 |
+
},
|
932 |
+
"clean_up_tokenization_spaces": true,
|
933 |
+
"cls_token": "[CLS]",
|
934 |
+
"extra_special_tokens": {},
|
935 |
+
"mask_token": "[MASK]",
|
936 |
+
"model_input_names": [
|
937 |
+
"input_ids",
|
938 |
+
"attention_mask"
|
939 |
+
],
|
940 |
+
"model_max_length": 1000000000000000019884624838656,
|
941 |
+
"pad_token": "[PAD]",
|
942 |
+
"sep_token": "[SEP]",
|
943 |
+
"tokenizer_class": "PreTrainedTokenizerFast",
|
944 |
+
"unk_token": "[UNK]"
|
945 |
+
}
|
checkpoint-11543/trainer_state.json
ADDED
@@ -0,0 +1,3278 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"best_metric": 0.4268312156200409,
|
3 |
+
"best_model_checkpoint": "gte-modernbert-philosophy-v1-1-autotr/checkpoint-11543",
|
4 |
+
"epoch": 1.0,
|
5 |
+
"eval_steps": 500,
|
6 |
+
"global_step": 11543,
|
7 |
+
"is_hyper_param_search": false,
|
8 |
+
"is_local_process_zero": true,
|
9 |
+
"is_world_process_zero": true,
|
10 |
+
"log_history": [
|
11 |
+
{
|
12 |
+
"epoch": 0.0021658147795200556,
|
13 |
+
"grad_norm": 9.93041706085205,
|
14 |
+
"learning_rate": 6.493506493506493e-07,
|
15 |
+
"loss": 1.2002,
|
16 |
+
"step": 25
|
17 |
+
},
|
18 |
+
{
|
19 |
+
"epoch": 0.004331629559040111,
|
20 |
+
"grad_norm": 10.093696594238281,
|
21 |
+
"learning_rate": 1.2987012987012986e-06,
|
22 |
+
"loss": 1.1623,
|
23 |
+
"step": 50
|
24 |
+
},
|
25 |
+
{
|
26 |
+
"epoch": 0.006497444338560167,
|
27 |
+
"grad_norm": 9.860174179077148,
|
28 |
+
"learning_rate": 1.948051948051948e-06,
|
29 |
+
"loss": 1.2012,
|
30 |
+
"step": 75
|
31 |
+
},
|
32 |
+
{
|
33 |
+
"epoch": 0.008663259118080222,
|
34 |
+
"grad_norm": 8.943851470947266,
|
35 |
+
"learning_rate": 2.597402597402597e-06,
|
36 |
+
"loss": 1.1853,
|
37 |
+
"step": 100
|
38 |
+
},
|
39 |
+
{
|
40 |
+
"epoch": 0.010829073897600277,
|
41 |
+
"grad_norm": 5.130438327789307,
|
42 |
+
"learning_rate": 3.246753246753247e-06,
|
43 |
+
"loss": 0.9767,
|
44 |
+
"step": 125
|
45 |
+
},
|
46 |
+
{
|
47 |
+
"epoch": 0.012994888677120333,
|
48 |
+
"grad_norm": 8.450475692749023,
|
49 |
+
"learning_rate": 3.896103896103896e-06,
|
50 |
+
"loss": 0.865,
|
51 |
+
"step": 150
|
52 |
+
},
|
53 |
+
{
|
54 |
+
"epoch": 0.015160703456640388,
|
55 |
+
"grad_norm": 3.7100517749786377,
|
56 |
+
"learning_rate": 4.5454545454545455e-06,
|
57 |
+
"loss": 0.7733,
|
58 |
+
"step": 175
|
59 |
+
},
|
60 |
+
{
|
61 |
+
"epoch": 0.017326518236160444,
|
62 |
+
"grad_norm": 15.069323539733887,
|
63 |
+
"learning_rate": 5.194805194805194e-06,
|
64 |
+
"loss": 0.9545,
|
65 |
+
"step": 200
|
66 |
+
},
|
67 |
+
{
|
68 |
+
"epoch": 0.0194923330156805,
|
69 |
+
"grad_norm": 9.745051383972168,
|
70 |
+
"learning_rate": 5.8181818181818185e-06,
|
71 |
+
"loss": 0.8309,
|
72 |
+
"step": 225
|
73 |
+
},
|
74 |
+
{
|
75 |
+
"epoch": 0.021658147795200554,
|
76 |
+
"grad_norm": 7.115631580352783,
|
77 |
+
"learning_rate": 6.467532467532467e-06,
|
78 |
+
"loss": 0.7514,
|
79 |
+
"step": 250
|
80 |
+
},
|
81 |
+
{
|
82 |
+
"epoch": 0.02382396257472061,
|
83 |
+
"grad_norm": 15.047070503234863,
|
84 |
+
"learning_rate": 7.116883116883117e-06,
|
85 |
+
"loss": 0.5555,
|
86 |
+
"step": 275
|
87 |
+
},
|
88 |
+
{
|
89 |
+
"epoch": 0.025989777354240667,
|
90 |
+
"grad_norm": 14.228421211242676,
|
91 |
+
"learning_rate": 7.766233766233767e-06,
|
92 |
+
"loss": 0.563,
|
93 |
+
"step": 300
|
94 |
+
},
|
95 |
+
{
|
96 |
+
"epoch": 0.02815559213376072,
|
97 |
+
"grad_norm": 8.77304458618164,
|
98 |
+
"learning_rate": 8.415584415584416e-06,
|
99 |
+
"loss": 0.618,
|
100 |
+
"step": 325
|
101 |
+
},
|
102 |
+
{
|
103 |
+
"epoch": 0.030321406913280776,
|
104 |
+
"grad_norm": 4.78096866607666,
|
105 |
+
"learning_rate": 9.064935064935066e-06,
|
106 |
+
"loss": 0.6538,
|
107 |
+
"step": 350
|
108 |
+
},
|
109 |
+
{
|
110 |
+
"epoch": 0.032487221692800834,
|
111 |
+
"grad_norm": 9.978753089904785,
|
112 |
+
"learning_rate": 9.714285714285715e-06,
|
113 |
+
"loss": 0.5802,
|
114 |
+
"step": 375
|
115 |
+
},
|
116 |
+
{
|
117 |
+
"epoch": 0.03465303647232089,
|
118 |
+
"grad_norm": 15.071464538574219,
|
119 |
+
"learning_rate": 1.0363636363636364e-05,
|
120 |
+
"loss": 0.6568,
|
121 |
+
"step": 400
|
122 |
+
},
|
123 |
+
{
|
124 |
+
"epoch": 0.036818851251840944,
|
125 |
+
"grad_norm": 12.352958679199219,
|
126 |
+
"learning_rate": 1.1012987012987013e-05,
|
127 |
+
"loss": 0.4934,
|
128 |
+
"step": 425
|
129 |
+
},
|
130 |
+
{
|
131 |
+
"epoch": 0.038984666031361,
|
132 |
+
"grad_norm": 8.754806518554688,
|
133 |
+
"learning_rate": 1.1662337662337662e-05,
|
134 |
+
"loss": 0.597,
|
135 |
+
"step": 450
|
136 |
+
},
|
137 |
+
{
|
138 |
+
"epoch": 0.04115048081088105,
|
139 |
+
"grad_norm": 2.432300090789795,
|
140 |
+
"learning_rate": 1.2311688311688312e-05,
|
141 |
+
"loss": 0.3812,
|
142 |
+
"step": 475
|
143 |
+
},
|
144 |
+
{
|
145 |
+
"epoch": 0.04331629559040111,
|
146 |
+
"grad_norm": 7.439950466156006,
|
147 |
+
"learning_rate": 1.2961038961038961e-05,
|
148 |
+
"loss": 0.482,
|
149 |
+
"step": 500
|
150 |
+
},
|
151 |
+
{
|
152 |
+
"epoch": 0.04548211036992116,
|
153 |
+
"grad_norm": 14.198251724243164,
|
154 |
+
"learning_rate": 1.361038961038961e-05,
|
155 |
+
"loss": 0.5347,
|
156 |
+
"step": 525
|
157 |
+
},
|
158 |
+
{
|
159 |
+
"epoch": 0.04764792514944122,
|
160 |
+
"grad_norm": 5.91489839553833,
|
161 |
+
"learning_rate": 1.425974025974026e-05,
|
162 |
+
"loss": 0.5012,
|
163 |
+
"step": 550
|
164 |
+
},
|
165 |
+
{
|
166 |
+
"epoch": 0.04981373992896128,
|
167 |
+
"grad_norm": 14.394525527954102,
|
168 |
+
"learning_rate": 1.490909090909091e-05,
|
169 |
+
"loss": 0.5765,
|
170 |
+
"step": 575
|
171 |
+
},
|
172 |
+
{
|
173 |
+
"epoch": 0.05197955470848133,
|
174 |
+
"grad_norm": 16.823543548583984,
|
175 |
+
"learning_rate": 1.555844155844156e-05,
|
176 |
+
"loss": 0.4286,
|
177 |
+
"step": 600
|
178 |
+
},
|
179 |
+
{
|
180 |
+
"epoch": 0.05414536948800139,
|
181 |
+
"grad_norm": 4.72399377822876,
|
182 |
+
"learning_rate": 1.6207792207792207e-05,
|
183 |
+
"loss": 0.5167,
|
184 |
+
"step": 625
|
185 |
+
},
|
186 |
+
{
|
187 |
+
"epoch": 0.05631118426752144,
|
188 |
+
"grad_norm": 18.0063419342041,
|
189 |
+
"learning_rate": 1.6857142857142858e-05,
|
190 |
+
"loss": 0.4791,
|
191 |
+
"step": 650
|
192 |
+
},
|
193 |
+
{
|
194 |
+
"epoch": 0.0584769990470415,
|
195 |
+
"grad_norm": 11.925456047058105,
|
196 |
+
"learning_rate": 1.750649350649351e-05,
|
197 |
+
"loss": 0.5022,
|
198 |
+
"step": 675
|
199 |
+
},
|
200 |
+
{
|
201 |
+
"epoch": 0.06064281382656155,
|
202 |
+
"grad_norm": 2.7437996864318848,
|
203 |
+
"learning_rate": 1.8155844155844156e-05,
|
204 |
+
"loss": 0.438,
|
205 |
+
"step": 700
|
206 |
+
},
|
207 |
+
{
|
208 |
+
"epoch": 0.0628086286060816,
|
209 |
+
"grad_norm": 1.8270901441574097,
|
210 |
+
"learning_rate": 1.8805194805194806e-05,
|
211 |
+
"loss": 0.3995,
|
212 |
+
"step": 725
|
213 |
+
},
|
214 |
+
{
|
215 |
+
"epoch": 0.06497444338560167,
|
216 |
+
"grad_norm": 4.187374591827393,
|
217 |
+
"learning_rate": 1.9454545454545453e-05,
|
218 |
+
"loss": 0.2924,
|
219 |
+
"step": 750
|
220 |
+
},
|
221 |
+
{
|
222 |
+
"epoch": 0.06714025816512172,
|
223 |
+
"grad_norm": 12.709814071655273,
|
224 |
+
"learning_rate": 2.0103896103896104e-05,
|
225 |
+
"loss": 0.4391,
|
226 |
+
"step": 775
|
227 |
+
},
|
228 |
+
{
|
229 |
+
"epoch": 0.06930607294464178,
|
230 |
+
"grad_norm": 8.789942741394043,
|
231 |
+
"learning_rate": 2.0753246753246755e-05,
|
232 |
+
"loss": 0.4328,
|
233 |
+
"step": 800
|
234 |
+
},
|
235 |
+
{
|
236 |
+
"epoch": 0.07147188772416183,
|
237 |
+
"grad_norm": 10.182008743286133,
|
238 |
+
"learning_rate": 2.137662337662338e-05,
|
239 |
+
"loss": 0.5658,
|
240 |
+
"step": 825
|
241 |
+
},
|
242 |
+
{
|
243 |
+
"epoch": 0.07363770250368189,
|
244 |
+
"grad_norm": 3.5178301334381104,
|
245 |
+
"learning_rate": 2.2025974025974026e-05,
|
246 |
+
"loss": 0.4541,
|
247 |
+
"step": 850
|
248 |
+
},
|
249 |
+
{
|
250 |
+
"epoch": 0.07580351728320193,
|
251 |
+
"grad_norm": 8.124090194702148,
|
252 |
+
"learning_rate": 2.2675324675324676e-05,
|
253 |
+
"loss": 0.5381,
|
254 |
+
"step": 875
|
255 |
+
},
|
256 |
+
{
|
257 |
+
"epoch": 0.077969332062722,
|
258 |
+
"grad_norm": 11.69704532623291,
|
259 |
+
"learning_rate": 2.3324675324675324e-05,
|
260 |
+
"loss": 0.4523,
|
261 |
+
"step": 900
|
262 |
+
},
|
263 |
+
{
|
264 |
+
"epoch": 0.08013514684224206,
|
265 |
+
"grad_norm": 19.822145462036133,
|
266 |
+
"learning_rate": 2.3974025974025974e-05,
|
267 |
+
"loss": 0.3522,
|
268 |
+
"step": 925
|
269 |
+
},
|
270 |
+
{
|
271 |
+
"epoch": 0.0823009616217621,
|
272 |
+
"grad_norm": 8.31993579864502,
|
273 |
+
"learning_rate": 2.4623376623376625e-05,
|
274 |
+
"loss": 0.4475,
|
275 |
+
"step": 950
|
276 |
+
},
|
277 |
+
{
|
278 |
+
"epoch": 0.08446677640128217,
|
279 |
+
"grad_norm": 5.60876989364624,
|
280 |
+
"learning_rate": 2.5246753246753246e-05,
|
281 |
+
"loss": 0.4448,
|
282 |
+
"step": 975
|
283 |
+
},
|
284 |
+
{
|
285 |
+
"epoch": 0.08663259118080222,
|
286 |
+
"grad_norm": 9.872743606567383,
|
287 |
+
"learning_rate": 2.5896103896103896e-05,
|
288 |
+
"loss": 0.407,
|
289 |
+
"step": 1000
|
290 |
+
},
|
291 |
+
{
|
292 |
+
"epoch": 0.08879840596032228,
|
293 |
+
"grad_norm": 7.193666458129883,
|
294 |
+
"learning_rate": 2.6545454545454547e-05,
|
295 |
+
"loss": 0.4616,
|
296 |
+
"step": 1025
|
297 |
+
},
|
298 |
+
{
|
299 |
+
"epoch": 0.09096422073984232,
|
300 |
+
"grad_norm": 17.595991134643555,
|
301 |
+
"learning_rate": 2.7194805194805194e-05,
|
302 |
+
"loss": 0.4213,
|
303 |
+
"step": 1050
|
304 |
+
},
|
305 |
+
{
|
306 |
+
"epoch": 0.09313003551936239,
|
307 |
+
"grad_norm": 3.281184196472168,
|
308 |
+
"learning_rate": 2.7844155844155844e-05,
|
309 |
+
"loss": 0.465,
|
310 |
+
"step": 1075
|
311 |
+
},
|
312 |
+
{
|
313 |
+
"epoch": 0.09529585029888243,
|
314 |
+
"grad_norm": 7.671459197998047,
|
315 |
+
"learning_rate": 2.849350649350649e-05,
|
316 |
+
"loss": 0.2964,
|
317 |
+
"step": 1100
|
318 |
+
},
|
319 |
+
{
|
320 |
+
"epoch": 0.0974616650784025,
|
321 |
+
"grad_norm": 7.963995933532715,
|
322 |
+
"learning_rate": 2.9142857142857142e-05,
|
323 |
+
"loss": 0.4414,
|
324 |
+
"step": 1125
|
325 |
+
},
|
326 |
+
{
|
327 |
+
"epoch": 0.09962747985792256,
|
328 |
+
"grad_norm": 1.8723474740982056,
|
329 |
+
"learning_rate": 2.9792207792207793e-05,
|
330 |
+
"loss": 0.3508,
|
331 |
+
"step": 1150
|
332 |
+
},
|
333 |
+
{
|
334 |
+
"epoch": 0.1017932946374426,
|
335 |
+
"grad_norm": 5.1907877922058105,
|
336 |
+
"learning_rate": 2.995090489025799e-05,
|
337 |
+
"loss": 0.3362,
|
338 |
+
"step": 1175
|
339 |
+
},
|
340 |
+
{
|
341 |
+
"epoch": 0.10395910941696267,
|
342 |
+
"grad_norm": 5.219175815582275,
|
343 |
+
"learning_rate": 2.9878706199460916e-05,
|
344 |
+
"loss": 0.4953,
|
345 |
+
"step": 1200
|
346 |
+
},
|
347 |
+
{
|
348 |
+
"epoch": 0.10612492419648271,
|
349 |
+
"grad_norm": 15.204286575317383,
|
350 |
+
"learning_rate": 2.9806507508663843e-05,
|
351 |
+
"loss": 0.4041,
|
352 |
+
"step": 1225
|
353 |
+
},
|
354 |
+
{
|
355 |
+
"epoch": 0.10829073897600278,
|
356 |
+
"grad_norm": 5.872297286987305,
|
357 |
+
"learning_rate": 2.973430881786677e-05,
|
358 |
+
"loss": 0.3773,
|
359 |
+
"step": 1250
|
360 |
+
},
|
361 |
+
{
|
362 |
+
"epoch": 0.11045655375552282,
|
363 |
+
"grad_norm": 7.201790809631348,
|
364 |
+
"learning_rate": 2.9662110127069697e-05,
|
365 |
+
"loss": 0.3574,
|
366 |
+
"step": 1275
|
367 |
+
},
|
368 |
+
{
|
369 |
+
"epoch": 0.11262236853504289,
|
370 |
+
"grad_norm": 2.872793674468994,
|
371 |
+
"learning_rate": 2.9589911436272623e-05,
|
372 |
+
"loss": 0.642,
|
373 |
+
"step": 1300
|
374 |
+
},
|
375 |
+
{
|
376 |
+
"epoch": 0.11478818331456293,
|
377 |
+
"grad_norm": 10.854488372802734,
|
378 |
+
"learning_rate": 2.951771274547555e-05,
|
379 |
+
"loss": 0.3783,
|
380 |
+
"step": 1325
|
381 |
+
},
|
382 |
+
{
|
383 |
+
"epoch": 0.116953998094083,
|
384 |
+
"grad_norm": 2.162464141845703,
|
385 |
+
"learning_rate": 2.9445514054678477e-05,
|
386 |
+
"loss": 0.4905,
|
387 |
+
"step": 1350
|
388 |
+
},
|
389 |
+
{
|
390 |
+
"epoch": 0.11911981287360304,
|
391 |
+
"grad_norm": 14.541825294494629,
|
392 |
+
"learning_rate": 2.9373315363881403e-05,
|
393 |
+
"loss": 0.3937,
|
394 |
+
"step": 1375
|
395 |
+
},
|
396 |
+
{
|
397 |
+
"epoch": 0.1212856276531231,
|
398 |
+
"grad_norm": 1.6897481679916382,
|
399 |
+
"learning_rate": 2.9301116673084327e-05,
|
400 |
+
"loss": 0.4245,
|
401 |
+
"step": 1400
|
402 |
+
},
|
403 |
+
{
|
404 |
+
"epoch": 0.12345144243264317,
|
405 |
+
"grad_norm": 9.359882354736328,
|
406 |
+
"learning_rate": 2.9228917982287253e-05,
|
407 |
+
"loss": 0.4139,
|
408 |
+
"step": 1425
|
409 |
+
},
|
410 |
+
{
|
411 |
+
"epoch": 0.1256172572121632,
|
412 |
+
"grad_norm": 39.94605255126953,
|
413 |
+
"learning_rate": 2.915671929149018e-05,
|
414 |
+
"loss": 0.4305,
|
415 |
+
"step": 1450
|
416 |
+
},
|
417 |
+
{
|
418 |
+
"epoch": 0.12778307199168326,
|
419 |
+
"grad_norm": 10.268132209777832,
|
420 |
+
"learning_rate": 2.908452060069311e-05,
|
421 |
+
"loss": 0.675,
|
422 |
+
"step": 1475
|
423 |
+
},
|
424 |
+
{
|
425 |
+
"epoch": 0.12994888677120334,
|
426 |
+
"grad_norm": 1.7209604978561401,
|
427 |
+
"learning_rate": 2.9012321909896037e-05,
|
428 |
+
"loss": 0.55,
|
429 |
+
"step": 1500
|
430 |
+
},
|
431 |
+
{
|
432 |
+
"epoch": 0.13211470155072338,
|
433 |
+
"grad_norm": 8.541482925415039,
|
434 |
+
"learning_rate": 2.894012321909896e-05,
|
435 |
+
"loss": 0.4033,
|
436 |
+
"step": 1525
|
437 |
+
},
|
438 |
+
{
|
439 |
+
"epoch": 0.13428051633024343,
|
440 |
+
"grad_norm": 10.4110107421875,
|
441 |
+
"learning_rate": 2.8867924528301887e-05,
|
442 |
+
"loss": 0.4167,
|
443 |
+
"step": 1550
|
444 |
+
},
|
445 |
+
{
|
446 |
+
"epoch": 0.13644633110976348,
|
447 |
+
"grad_norm": 10.823756217956543,
|
448 |
+
"learning_rate": 2.8795725837504814e-05,
|
449 |
+
"loss": 0.3814,
|
450 |
+
"step": 1575
|
451 |
+
},
|
452 |
+
{
|
453 |
+
"epoch": 0.13861214588928356,
|
454 |
+
"grad_norm": 0.6896539926528931,
|
455 |
+
"learning_rate": 2.872352714670774e-05,
|
456 |
+
"loss": 0.5183,
|
457 |
+
"step": 1600
|
458 |
+
},
|
459 |
+
{
|
460 |
+
"epoch": 0.1407779606688036,
|
461 |
+
"grad_norm": 4.357579231262207,
|
462 |
+
"learning_rate": 2.8651328455910667e-05,
|
463 |
+
"loss": 0.3343,
|
464 |
+
"step": 1625
|
465 |
+
},
|
466 |
+
{
|
467 |
+
"epoch": 0.14294377544832365,
|
468 |
+
"grad_norm": 12.074344635009766,
|
469 |
+
"learning_rate": 2.857912976511359e-05,
|
470 |
+
"loss": 0.4212,
|
471 |
+
"step": 1650
|
472 |
+
},
|
473 |
+
{
|
474 |
+
"epoch": 0.14510959022784373,
|
475 |
+
"grad_norm": 11.660531997680664,
|
476 |
+
"learning_rate": 2.850693107431652e-05,
|
477 |
+
"loss": 0.4737,
|
478 |
+
"step": 1675
|
479 |
+
},
|
480 |
+
{
|
481 |
+
"epoch": 0.14727540500736377,
|
482 |
+
"grad_norm": 15.467144966125488,
|
483 |
+
"learning_rate": 2.8434732383519447e-05,
|
484 |
+
"loss": 0.4563,
|
485 |
+
"step": 1700
|
486 |
+
},
|
487 |
+
{
|
488 |
+
"epoch": 0.14944121978688382,
|
489 |
+
"grad_norm": 9.277994155883789,
|
490 |
+
"learning_rate": 2.8362533692722374e-05,
|
491 |
+
"loss": 0.4251,
|
492 |
+
"step": 1725
|
493 |
+
},
|
494 |
+
{
|
495 |
+
"epoch": 0.15160703456640387,
|
496 |
+
"grad_norm": 3.6043941974639893,
|
497 |
+
"learning_rate": 2.82903350019253e-05,
|
498 |
+
"loss": 0.3497,
|
499 |
+
"step": 1750
|
500 |
+
},
|
501 |
+
{
|
502 |
+
"epoch": 0.15377284934592395,
|
503 |
+
"grad_norm": 3.933353900909424,
|
504 |
+
"learning_rate": 2.8218136311128224e-05,
|
505 |
+
"loss": 0.3753,
|
506 |
+
"step": 1775
|
507 |
+
},
|
508 |
+
{
|
509 |
+
"epoch": 0.155938664125444,
|
510 |
+
"grad_norm": 3.8728222846984863,
|
511 |
+
"learning_rate": 2.814593762033115e-05,
|
512 |
+
"loss": 0.4031,
|
513 |
+
"step": 1800
|
514 |
+
},
|
515 |
+
{
|
516 |
+
"epoch": 0.15810447890496404,
|
517 |
+
"grad_norm": 8.067976951599121,
|
518 |
+
"learning_rate": 2.8073738929534077e-05,
|
519 |
+
"loss": 0.4037,
|
520 |
+
"step": 1825
|
521 |
+
},
|
522 |
+
{
|
523 |
+
"epoch": 0.16027029368448412,
|
524 |
+
"grad_norm": 9.141134262084961,
|
525 |
+
"learning_rate": 2.8001540238737004e-05,
|
526 |
+
"loss": 0.4114,
|
527 |
+
"step": 1850
|
528 |
+
},
|
529 |
+
{
|
530 |
+
"epoch": 0.16243610846400416,
|
531 |
+
"grad_norm": 1.8272747993469238,
|
532 |
+
"learning_rate": 2.7929341547939934e-05,
|
533 |
+
"loss": 0.3848,
|
534 |
+
"step": 1875
|
535 |
+
},
|
536 |
+
{
|
537 |
+
"epoch": 0.1646019232435242,
|
538 |
+
"grad_norm": 0.4890976846218109,
|
539 |
+
"learning_rate": 2.7857142857142858e-05,
|
540 |
+
"loss": 0.5088,
|
541 |
+
"step": 1900
|
542 |
+
},
|
543 |
+
{
|
544 |
+
"epoch": 0.16676773802304426,
|
545 |
+
"grad_norm": 9.043623924255371,
|
546 |
+
"learning_rate": 2.7784944166345784e-05,
|
547 |
+
"loss": 0.4032,
|
548 |
+
"step": 1925
|
549 |
+
},
|
550 |
+
{
|
551 |
+
"epoch": 0.16893355280256434,
|
552 |
+
"grad_norm": 9.092608451843262,
|
553 |
+
"learning_rate": 2.771274547554871e-05,
|
554 |
+
"loss": 0.3354,
|
555 |
+
"step": 1950
|
556 |
+
},
|
557 |
+
{
|
558 |
+
"epoch": 0.17109936758208438,
|
559 |
+
"grad_norm": 6.121222972869873,
|
560 |
+
"learning_rate": 2.7640546784751638e-05,
|
561 |
+
"loss": 0.4163,
|
562 |
+
"step": 1975
|
563 |
+
},
|
564 |
+
{
|
565 |
+
"epoch": 0.17326518236160443,
|
566 |
+
"grad_norm": 1.539663314819336,
|
567 |
+
"learning_rate": 2.7568348093954564e-05,
|
568 |
+
"loss": 0.3715,
|
569 |
+
"step": 2000
|
570 |
+
},
|
571 |
+
{
|
572 |
+
"epoch": 0.17543099714112448,
|
573 |
+
"grad_norm": 16.089406967163086,
|
574 |
+
"learning_rate": 2.7496149403157488e-05,
|
575 |
+
"loss": 0.3424,
|
576 |
+
"step": 2025
|
577 |
+
},
|
578 |
+
{
|
579 |
+
"epoch": 0.17759681192064455,
|
580 |
+
"grad_norm": 12.510934829711914,
|
581 |
+
"learning_rate": 2.7423950712360414e-05,
|
582 |
+
"loss": 0.3311,
|
583 |
+
"step": 2050
|
584 |
+
},
|
585 |
+
{
|
586 |
+
"epoch": 0.1797626267001646,
|
587 |
+
"grad_norm": 2.823338508605957,
|
588 |
+
"learning_rate": 2.7351752021563345e-05,
|
589 |
+
"loss": 0.4362,
|
590 |
+
"step": 2075
|
591 |
+
},
|
592 |
+
{
|
593 |
+
"epoch": 0.18192844147968465,
|
594 |
+
"grad_norm": 6.191600322723389,
|
595 |
+
"learning_rate": 2.727955333076627e-05,
|
596 |
+
"loss": 0.4441,
|
597 |
+
"step": 2100
|
598 |
+
},
|
599 |
+
{
|
600 |
+
"epoch": 0.18409425625920472,
|
601 |
+
"grad_norm": 4.86907434463501,
|
602 |
+
"learning_rate": 2.7207354639969198e-05,
|
603 |
+
"loss": 0.3122,
|
604 |
+
"step": 2125
|
605 |
+
},
|
606 |
+
{
|
607 |
+
"epoch": 0.18626007103872477,
|
608 |
+
"grad_norm": 7.323814868927002,
|
609 |
+
"learning_rate": 2.713515594917212e-05,
|
610 |
+
"loss": 0.3717,
|
611 |
+
"step": 2150
|
612 |
+
},
|
613 |
+
{
|
614 |
+
"epoch": 0.18842588581824482,
|
615 |
+
"grad_norm": 10.09737491607666,
|
616 |
+
"learning_rate": 2.7062957258375048e-05,
|
617 |
+
"loss": 0.3461,
|
618 |
+
"step": 2175
|
619 |
+
},
|
620 |
+
{
|
621 |
+
"epoch": 0.19059170059776487,
|
622 |
+
"grad_norm": 8.536800384521484,
|
623 |
+
"learning_rate": 2.6990758567577975e-05,
|
624 |
+
"loss": 0.4816,
|
625 |
+
"step": 2200
|
626 |
+
},
|
627 |
+
{
|
628 |
+
"epoch": 0.19275751537728494,
|
629 |
+
"grad_norm": 5.237682819366455,
|
630 |
+
"learning_rate": 2.69185598767809e-05,
|
631 |
+
"loss": 0.4784,
|
632 |
+
"step": 2225
|
633 |
+
},
|
634 |
+
{
|
635 |
+
"epoch": 0.194923330156805,
|
636 |
+
"grad_norm": 10.763497352600098,
|
637 |
+
"learning_rate": 2.6846361185983828e-05,
|
638 |
+
"loss": 0.4334,
|
639 |
+
"step": 2250
|
640 |
+
},
|
641 |
+
{
|
642 |
+
"epoch": 0.19708914493632504,
|
643 |
+
"grad_norm": 0.7019050121307373,
|
644 |
+
"learning_rate": 2.6774162495186755e-05,
|
645 |
+
"loss": 0.3437,
|
646 |
+
"step": 2275
|
647 |
+
},
|
648 |
+
{
|
649 |
+
"epoch": 0.19925495971584511,
|
650 |
+
"grad_norm": 8.020634651184082,
|
651 |
+
"learning_rate": 2.670196380438968e-05,
|
652 |
+
"loss": 0.4333,
|
653 |
+
"step": 2300
|
654 |
+
},
|
655 |
+
{
|
656 |
+
"epoch": 0.20142077449536516,
|
657 |
+
"grad_norm": 10.549779891967773,
|
658 |
+
"learning_rate": 2.662976511359261e-05,
|
659 |
+
"loss": 0.3609,
|
660 |
+
"step": 2325
|
661 |
+
},
|
662 |
+
{
|
663 |
+
"epoch": 0.2035865892748852,
|
664 |
+
"grad_norm": 5.6236677169799805,
|
665 |
+
"learning_rate": 2.6557566422795535e-05,
|
666 |
+
"loss": 0.3437,
|
667 |
+
"step": 2350
|
668 |
+
},
|
669 |
+
{
|
670 |
+
"epoch": 0.20575240405440526,
|
671 |
+
"grad_norm": 1.4388600587844849,
|
672 |
+
"learning_rate": 2.648536773199846e-05,
|
673 |
+
"loss": 0.4911,
|
674 |
+
"step": 2375
|
675 |
+
},
|
676 |
+
{
|
677 |
+
"epoch": 0.20791821883392533,
|
678 |
+
"grad_norm": 4.445183277130127,
|
679 |
+
"learning_rate": 2.6413169041201385e-05,
|
680 |
+
"loss": 0.3872,
|
681 |
+
"step": 2400
|
682 |
+
},
|
683 |
+
{
|
684 |
+
"epoch": 0.21008403361344538,
|
685 |
+
"grad_norm": 9.076152801513672,
|
686 |
+
"learning_rate": 2.6340970350404312e-05,
|
687 |
+
"loss": 0.276,
|
688 |
+
"step": 2425
|
689 |
+
},
|
690 |
+
{
|
691 |
+
"epoch": 0.21224984839296543,
|
692 |
+
"grad_norm": 5.573355197906494,
|
693 |
+
"learning_rate": 2.6268771659607242e-05,
|
694 |
+
"loss": 0.3318,
|
695 |
+
"step": 2450
|
696 |
+
},
|
697 |
+
{
|
698 |
+
"epoch": 0.21441566317248548,
|
699 |
+
"grad_norm": 5.015573024749756,
|
700 |
+
"learning_rate": 2.619657296881017e-05,
|
701 |
+
"loss": 0.4833,
|
702 |
+
"step": 2475
|
703 |
+
},
|
704 |
+
{
|
705 |
+
"epoch": 0.21658147795200555,
|
706 |
+
"grad_norm": 3.9038755893707275,
|
707 |
+
"learning_rate": 2.6124374278013092e-05,
|
708 |
+
"loss": 0.4656,
|
709 |
+
"step": 2500
|
710 |
+
},
|
711 |
+
{
|
712 |
+
"epoch": 0.2187472927315256,
|
713 |
+
"grad_norm": 2.66627836227417,
|
714 |
+
"learning_rate": 2.605217558721602e-05,
|
715 |
+
"loss": 0.4232,
|
716 |
+
"step": 2525
|
717 |
+
},
|
718 |
+
{
|
719 |
+
"epoch": 0.22091310751104565,
|
720 |
+
"grad_norm": 8.859906196594238,
|
721 |
+
"learning_rate": 2.5979976896418945e-05,
|
722 |
+
"loss": 0.434,
|
723 |
+
"step": 2550
|
724 |
+
},
|
725 |
+
{
|
726 |
+
"epoch": 0.22307892229056572,
|
727 |
+
"grad_norm": 3.2811522483825684,
|
728 |
+
"learning_rate": 2.5907778205621872e-05,
|
729 |
+
"loss": 0.2479,
|
730 |
+
"step": 2575
|
731 |
+
},
|
732 |
+
{
|
733 |
+
"epoch": 0.22524473707008577,
|
734 |
+
"grad_norm": 8.53447437286377,
|
735 |
+
"learning_rate": 2.58355795148248e-05,
|
736 |
+
"loss": 0.4656,
|
737 |
+
"step": 2600
|
738 |
+
},
|
739 |
+
{
|
740 |
+
"epoch": 0.22741055184960582,
|
741 |
+
"grad_norm": 6.359921455383301,
|
742 |
+
"learning_rate": 2.5763380824027722e-05,
|
743 |
+
"loss": 0.3881,
|
744 |
+
"step": 2625
|
745 |
+
},
|
746 |
+
{
|
747 |
+
"epoch": 0.22957636662912587,
|
748 |
+
"grad_norm": 6.196253776550293,
|
749 |
+
"learning_rate": 2.5691182133230652e-05,
|
750 |
+
"loss": 0.3637,
|
751 |
+
"step": 2650
|
752 |
+
},
|
753 |
+
{
|
754 |
+
"epoch": 0.23174218140864594,
|
755 |
+
"grad_norm": 7.805304050445557,
|
756 |
+
"learning_rate": 2.561898344243358e-05,
|
757 |
+
"loss": 0.3099,
|
758 |
+
"step": 2675
|
759 |
+
},
|
760 |
+
{
|
761 |
+
"epoch": 0.233907996188166,
|
762 |
+
"grad_norm": 4.51755428314209,
|
763 |
+
"learning_rate": 2.5546784751636506e-05,
|
764 |
+
"loss": 0.3933,
|
765 |
+
"step": 2700
|
766 |
+
},
|
767 |
+
{
|
768 |
+
"epoch": 0.23607381096768604,
|
769 |
+
"grad_norm": 5.72914981842041,
|
770 |
+
"learning_rate": 2.5474586060839432e-05,
|
771 |
+
"loss": 0.3789,
|
772 |
+
"step": 2725
|
773 |
+
},
|
774 |
+
{
|
775 |
+
"epoch": 0.23823962574720609,
|
776 |
+
"grad_norm": 2.4809954166412354,
|
777 |
+
"learning_rate": 2.5402387370042356e-05,
|
778 |
+
"loss": 0.4056,
|
779 |
+
"step": 2750
|
780 |
+
},
|
781 |
+
{
|
782 |
+
"epoch": 0.24040544052672616,
|
783 |
+
"grad_norm": 1.940656065940857,
|
784 |
+
"learning_rate": 2.5330188679245282e-05,
|
785 |
+
"loss": 0.4132,
|
786 |
+
"step": 2775
|
787 |
+
},
|
788 |
+
{
|
789 |
+
"epoch": 0.2425712553062462,
|
790 |
+
"grad_norm": 3.452242851257324,
|
791 |
+
"learning_rate": 2.525798998844821e-05,
|
792 |
+
"loss": 0.375,
|
793 |
+
"step": 2800
|
794 |
+
},
|
795 |
+
{
|
796 |
+
"epoch": 0.24473707008576626,
|
797 |
+
"grad_norm": 9.220993041992188,
|
798 |
+
"learning_rate": 2.5185791297651136e-05,
|
799 |
+
"loss": 0.3026,
|
800 |
+
"step": 2825
|
801 |
+
},
|
802 |
+
{
|
803 |
+
"epoch": 0.24690288486528633,
|
804 |
+
"grad_norm": 10.027073860168457,
|
805 |
+
"learning_rate": 2.5113592606854066e-05,
|
806 |
+
"loss": 0.5372,
|
807 |
+
"step": 2850
|
808 |
+
},
|
809 |
+
{
|
810 |
+
"epoch": 0.24906869964480638,
|
811 |
+
"grad_norm": 2.228799819946289,
|
812 |
+
"learning_rate": 2.504139391605699e-05,
|
813 |
+
"loss": 0.4233,
|
814 |
+
"step": 2875
|
815 |
+
},
|
816 |
+
{
|
817 |
+
"epoch": 0.2512345144243264,
|
818 |
+
"grad_norm": 7.281198978424072,
|
819 |
+
"learning_rate": 2.4969195225259916e-05,
|
820 |
+
"loss": 0.2945,
|
821 |
+
"step": 2900
|
822 |
+
},
|
823 |
+
{
|
824 |
+
"epoch": 0.2534003292038465,
|
825 |
+
"grad_norm": 1.4160314798355103,
|
826 |
+
"learning_rate": 2.4896996534462843e-05,
|
827 |
+
"loss": 0.2916,
|
828 |
+
"step": 2925
|
829 |
+
},
|
830 |
+
{
|
831 |
+
"epoch": 0.2555661439833665,
|
832 |
+
"grad_norm": 4.095098972320557,
|
833 |
+
"learning_rate": 2.482479784366577e-05,
|
834 |
+
"loss": 0.3536,
|
835 |
+
"step": 2950
|
836 |
+
},
|
837 |
+
{
|
838 |
+
"epoch": 0.25773195876288657,
|
839 |
+
"grad_norm": 1.413552165031433,
|
840 |
+
"learning_rate": 2.4752599152868696e-05,
|
841 |
+
"loss": 0.3246,
|
842 |
+
"step": 2975
|
843 |
+
},
|
844 |
+
{
|
845 |
+
"epoch": 0.2598977735424067,
|
846 |
+
"grad_norm": 3.3196184635162354,
|
847 |
+
"learning_rate": 2.468040046207162e-05,
|
848 |
+
"loss": 0.4236,
|
849 |
+
"step": 3000
|
850 |
+
},
|
851 |
+
{
|
852 |
+
"epoch": 0.2620635883219267,
|
853 |
+
"grad_norm": 11.855537414550781,
|
854 |
+
"learning_rate": 2.4608201771274546e-05,
|
855 |
+
"loss": 0.4088,
|
856 |
+
"step": 3025
|
857 |
+
},
|
858 |
+
{
|
859 |
+
"epoch": 0.26422940310144677,
|
860 |
+
"grad_norm": 9.322809219360352,
|
861 |
+
"learning_rate": 2.4536003080477476e-05,
|
862 |
+
"loss": 0.4522,
|
863 |
+
"step": 3050
|
864 |
+
},
|
865 |
+
{
|
866 |
+
"epoch": 0.2663952178809668,
|
867 |
+
"grad_norm": 7.581571578979492,
|
868 |
+
"learning_rate": 2.4463804389680403e-05,
|
869 |
+
"loss": 0.3445,
|
870 |
+
"step": 3075
|
871 |
+
},
|
872 |
+
{
|
873 |
+
"epoch": 0.26856103266048686,
|
874 |
+
"grad_norm": 2.6131093502044678,
|
875 |
+
"learning_rate": 2.439160569888333e-05,
|
876 |
+
"loss": 0.3575,
|
877 |
+
"step": 3100
|
878 |
+
},
|
879 |
+
{
|
880 |
+
"epoch": 0.2707268474400069,
|
881 |
+
"grad_norm": 3.68662166595459,
|
882 |
+
"learning_rate": 2.4319407008086253e-05,
|
883 |
+
"loss": 0.3809,
|
884 |
+
"step": 3125
|
885 |
+
},
|
886 |
+
{
|
887 |
+
"epoch": 0.27289266221952696,
|
888 |
+
"grad_norm": 2.3688032627105713,
|
889 |
+
"learning_rate": 2.424720831728918e-05,
|
890 |
+
"loss": 0.3364,
|
891 |
+
"step": 3150
|
892 |
+
},
|
893 |
+
{
|
894 |
+
"epoch": 0.27505847699904706,
|
895 |
+
"grad_norm": 1.155315637588501,
|
896 |
+
"learning_rate": 2.4175009626492106e-05,
|
897 |
+
"loss": 0.4103,
|
898 |
+
"step": 3175
|
899 |
+
},
|
900 |
+
{
|
901 |
+
"epoch": 0.2772242917785671,
|
902 |
+
"grad_norm": 35.7138671875,
|
903 |
+
"learning_rate": 2.4102810935695033e-05,
|
904 |
+
"loss": 0.3502,
|
905 |
+
"step": 3200
|
906 |
+
},
|
907 |
+
{
|
908 |
+
"epoch": 0.27939010655808716,
|
909 |
+
"grad_norm": 6.429433822631836,
|
910 |
+
"learning_rate": 2.403061224489796e-05,
|
911 |
+
"loss": 0.2632,
|
912 |
+
"step": 3225
|
913 |
+
},
|
914 |
+
{
|
915 |
+
"epoch": 0.2815559213376072,
|
916 |
+
"grad_norm": 9.816515922546387,
|
917 |
+
"learning_rate": 2.3958413554100887e-05,
|
918 |
+
"loss": 0.406,
|
919 |
+
"step": 3250
|
920 |
+
},
|
921 |
+
{
|
922 |
+
"epoch": 0.28372173611712725,
|
923 |
+
"grad_norm": 1.9653140306472778,
|
924 |
+
"learning_rate": 2.3886214863303813e-05,
|
925 |
+
"loss": 0.4363,
|
926 |
+
"step": 3275
|
927 |
+
},
|
928 |
+
{
|
929 |
+
"epoch": 0.2858875508966473,
|
930 |
+
"grad_norm": 9.559599876403809,
|
931 |
+
"learning_rate": 2.381401617250674e-05,
|
932 |
+
"loss": 0.2819,
|
933 |
+
"step": 3300
|
934 |
+
},
|
935 |
+
{
|
936 |
+
"epoch": 0.28805336567616735,
|
937 |
+
"grad_norm": 10.623549461364746,
|
938 |
+
"learning_rate": 2.3741817481709667e-05,
|
939 |
+
"loss": 0.3421,
|
940 |
+
"step": 3325
|
941 |
+
},
|
942 |
+
{
|
943 |
+
"epoch": 0.29021918045568745,
|
944 |
+
"grad_norm": 2.4988913536071777,
|
945 |
+
"learning_rate": 2.366961879091259e-05,
|
946 |
+
"loss": 0.269,
|
947 |
+
"step": 3350
|
948 |
+
},
|
949 |
+
{
|
950 |
+
"epoch": 0.2923849952352075,
|
951 |
+
"grad_norm": 4.704137802124023,
|
952 |
+
"learning_rate": 2.3597420100115517e-05,
|
953 |
+
"loss": 0.2902,
|
954 |
+
"step": 3375
|
955 |
+
},
|
956 |
+
{
|
957 |
+
"epoch": 0.29455081001472755,
|
958 |
+
"grad_norm": 9.48901653289795,
|
959 |
+
"learning_rate": 2.3525221409318443e-05,
|
960 |
+
"loss": 0.3548,
|
961 |
+
"step": 3400
|
962 |
+
},
|
963 |
+
{
|
964 |
+
"epoch": 0.2967166247942476,
|
965 |
+
"grad_norm": 0.5201269388198853,
|
966 |
+
"learning_rate": 2.3453022718521374e-05,
|
967 |
+
"loss": 0.4575,
|
968 |
+
"step": 3425
|
969 |
+
},
|
970 |
+
{
|
971 |
+
"epoch": 0.29888243957376764,
|
972 |
+
"grad_norm": 8.074861526489258,
|
973 |
+
"learning_rate": 2.33808240277243e-05,
|
974 |
+
"loss": 0.3942,
|
975 |
+
"step": 3450
|
976 |
+
},
|
977 |
+
{
|
978 |
+
"epoch": 0.3010482543532877,
|
979 |
+
"grad_norm": 8.45334243774414,
|
980 |
+
"learning_rate": 2.3308625336927224e-05,
|
981 |
+
"loss": 0.3537,
|
982 |
+
"step": 3475
|
983 |
+
},
|
984 |
+
{
|
985 |
+
"epoch": 0.30321406913280774,
|
986 |
+
"grad_norm": 2.7069313526153564,
|
987 |
+
"learning_rate": 2.323642664613015e-05,
|
988 |
+
"loss": 0.3672,
|
989 |
+
"step": 3500
|
990 |
+
},
|
991 |
+
{
|
992 |
+
"epoch": 0.30537988391232784,
|
993 |
+
"grad_norm": 13.849508285522461,
|
994 |
+
"learning_rate": 2.3164227955333077e-05,
|
995 |
+
"loss": 0.3502,
|
996 |
+
"step": 3525
|
997 |
+
},
|
998 |
+
{
|
999 |
+
"epoch": 0.3075456986918479,
|
1000 |
+
"grad_norm": 4.5892462730407715,
|
1001 |
+
"learning_rate": 2.3092029264536004e-05,
|
1002 |
+
"loss": 0.2545,
|
1003 |
+
"step": 3550
|
1004 |
+
},
|
1005 |
+
{
|
1006 |
+
"epoch": 0.30971151347136794,
|
1007 |
+
"grad_norm": 1.035447120666504,
|
1008 |
+
"learning_rate": 2.301983057373893e-05,
|
1009 |
+
"loss": 0.2544,
|
1010 |
+
"step": 3575
|
1011 |
+
},
|
1012 |
+
{
|
1013 |
+
"epoch": 0.311877328250888,
|
1014 |
+
"grad_norm": 5.170057773590088,
|
1015 |
+
"learning_rate": 2.2947631882941854e-05,
|
1016 |
+
"loss": 0.3443,
|
1017 |
+
"step": 3600
|
1018 |
+
},
|
1019 |
+
{
|
1020 |
+
"epoch": 0.31404314303040803,
|
1021 |
+
"grad_norm": 2.908191204071045,
|
1022 |
+
"learning_rate": 2.2875433192144784e-05,
|
1023 |
+
"loss": 0.3784,
|
1024 |
+
"step": 3625
|
1025 |
+
},
|
1026 |
+
{
|
1027 |
+
"epoch": 0.3162089578099281,
|
1028 |
+
"grad_norm": 9.946891784667969,
|
1029 |
+
"learning_rate": 2.280323450134771e-05,
|
1030 |
+
"loss": 0.3828,
|
1031 |
+
"step": 3650
|
1032 |
+
},
|
1033 |
+
{
|
1034 |
+
"epoch": 0.31837477258944813,
|
1035 |
+
"grad_norm": 10.337167739868164,
|
1036 |
+
"learning_rate": 2.2731035810550637e-05,
|
1037 |
+
"loss": 0.4032,
|
1038 |
+
"step": 3675
|
1039 |
+
},
|
1040 |
+
{
|
1041 |
+
"epoch": 0.32054058736896823,
|
1042 |
+
"grad_norm": 10.093758583068848,
|
1043 |
+
"learning_rate": 2.2658837119753564e-05,
|
1044 |
+
"loss": 0.2556,
|
1045 |
+
"step": 3700
|
1046 |
+
},
|
1047 |
+
{
|
1048 |
+
"epoch": 0.3227064021484883,
|
1049 |
+
"grad_norm": 7.309471130371094,
|
1050 |
+
"learning_rate": 2.2586638428956487e-05,
|
1051 |
+
"loss": 0.3352,
|
1052 |
+
"step": 3725
|
1053 |
+
},
|
1054 |
+
{
|
1055 |
+
"epoch": 0.32487221692800833,
|
1056 |
+
"grad_norm": 10.050370216369629,
|
1057 |
+
"learning_rate": 2.2514439738159414e-05,
|
1058 |
+
"loss": 0.4054,
|
1059 |
+
"step": 3750
|
1060 |
+
},
|
1061 |
+
{
|
1062 |
+
"epoch": 0.3270380317075284,
|
1063 |
+
"grad_norm": 3.858546733856201,
|
1064 |
+
"learning_rate": 2.244224104736234e-05,
|
1065 |
+
"loss": 0.3049,
|
1066 |
+
"step": 3775
|
1067 |
+
},
|
1068 |
+
{
|
1069 |
+
"epoch": 0.3292038464870484,
|
1070 |
+
"grad_norm": 5.640537261962891,
|
1071 |
+
"learning_rate": 2.2370042356565267e-05,
|
1072 |
+
"loss": 0.2223,
|
1073 |
+
"step": 3800
|
1074 |
+
},
|
1075 |
+
{
|
1076 |
+
"epoch": 0.33136966126656847,
|
1077 |
+
"grad_norm": 5.106541633605957,
|
1078 |
+
"learning_rate": 2.2297843665768198e-05,
|
1079 |
+
"loss": 0.4878,
|
1080 |
+
"step": 3825
|
1081 |
+
},
|
1082 |
+
{
|
1083 |
+
"epoch": 0.3335354760460885,
|
1084 |
+
"grad_norm": 7.738224029541016,
|
1085 |
+
"learning_rate": 2.222564497497112e-05,
|
1086 |
+
"loss": 0.3015,
|
1087 |
+
"step": 3850
|
1088 |
+
},
|
1089 |
+
{
|
1090 |
+
"epoch": 0.33570129082560857,
|
1091 |
+
"grad_norm": 12.313666343688965,
|
1092 |
+
"learning_rate": 2.2153446284174048e-05,
|
1093 |
+
"loss": 0.3816,
|
1094 |
+
"step": 3875
|
1095 |
+
},
|
1096 |
+
{
|
1097 |
+
"epoch": 0.33786710560512867,
|
1098 |
+
"grad_norm": 0.9929437041282654,
|
1099 |
+
"learning_rate": 2.2081247593376974e-05,
|
1100 |
+
"loss": 0.3334,
|
1101 |
+
"step": 3900
|
1102 |
+
},
|
1103 |
+
{
|
1104 |
+
"epoch": 0.3400329203846487,
|
1105 |
+
"grad_norm": 5.753032207489014,
|
1106 |
+
"learning_rate": 2.20090489025799e-05,
|
1107 |
+
"loss": 0.3724,
|
1108 |
+
"step": 3925
|
1109 |
+
},
|
1110 |
+
{
|
1111 |
+
"epoch": 0.34219873516416877,
|
1112 |
+
"grad_norm": 8.37396240234375,
|
1113 |
+
"learning_rate": 2.1936850211782828e-05,
|
1114 |
+
"loss": 0.4217,
|
1115 |
+
"step": 3950
|
1116 |
+
},
|
1117 |
+
{
|
1118 |
+
"epoch": 0.3443645499436888,
|
1119 |
+
"grad_norm": 7.365005016326904,
|
1120 |
+
"learning_rate": 2.186465152098575e-05,
|
1121 |
+
"loss": 0.4339,
|
1122 |
+
"step": 3975
|
1123 |
+
},
|
1124 |
+
{
|
1125 |
+
"epoch": 0.34653036472320886,
|
1126 |
+
"grad_norm": 1.91083824634552,
|
1127 |
+
"learning_rate": 2.1792452830188678e-05,
|
1128 |
+
"loss": 0.3642,
|
1129 |
+
"step": 4000
|
1130 |
+
},
|
1131 |
+
{
|
1132 |
+
"epoch": 0.3486961795027289,
|
1133 |
+
"grad_norm": 3.0427494049072266,
|
1134 |
+
"learning_rate": 2.1720254139391608e-05,
|
1135 |
+
"loss": 0.3819,
|
1136 |
+
"step": 4025
|
1137 |
+
},
|
1138 |
+
{
|
1139 |
+
"epoch": 0.35086199428224896,
|
1140 |
+
"grad_norm": 1.176952838897705,
|
1141 |
+
"learning_rate": 2.1648055448594535e-05,
|
1142 |
+
"loss": 0.2796,
|
1143 |
+
"step": 4050
|
1144 |
+
},
|
1145 |
+
{
|
1146 |
+
"epoch": 0.35302780906176906,
|
1147 |
+
"grad_norm": 1.0579583644866943,
|
1148 |
+
"learning_rate": 2.157585675779746e-05,
|
1149 |
+
"loss": 0.4277,
|
1150 |
+
"step": 4075
|
1151 |
+
},
|
1152 |
+
{
|
1153 |
+
"epoch": 0.3551936238412891,
|
1154 |
+
"grad_norm": 11.798035621643066,
|
1155 |
+
"learning_rate": 2.1503658067000385e-05,
|
1156 |
+
"loss": 0.3407,
|
1157 |
+
"step": 4100
|
1158 |
+
},
|
1159 |
+
{
|
1160 |
+
"epoch": 0.35735943862080916,
|
1161 |
+
"grad_norm": 15.57787036895752,
|
1162 |
+
"learning_rate": 2.143145937620331e-05,
|
1163 |
+
"loss": 0.2781,
|
1164 |
+
"step": 4125
|
1165 |
+
},
|
1166 |
+
{
|
1167 |
+
"epoch": 0.3595252534003292,
|
1168 |
+
"grad_norm": 8.533368110656738,
|
1169 |
+
"learning_rate": 2.1359260685406238e-05,
|
1170 |
+
"loss": 0.4274,
|
1171 |
+
"step": 4150
|
1172 |
+
},
|
1173 |
+
{
|
1174 |
+
"epoch": 0.36169106817984925,
|
1175 |
+
"grad_norm": 8.470250129699707,
|
1176 |
+
"learning_rate": 2.1287061994609165e-05,
|
1177 |
+
"loss": 0.3609,
|
1178 |
+
"step": 4175
|
1179 |
+
},
|
1180 |
+
{
|
1181 |
+
"epoch": 0.3638568829593693,
|
1182 |
+
"grad_norm": 6.417985439300537,
|
1183 |
+
"learning_rate": 2.121486330381209e-05,
|
1184 |
+
"loss": 0.3476,
|
1185 |
+
"step": 4200
|
1186 |
+
},
|
1187 |
+
{
|
1188 |
+
"epoch": 0.36602269773888935,
|
1189 |
+
"grad_norm": 8.685192108154297,
|
1190 |
+
"learning_rate": 2.1142664613015018e-05,
|
1191 |
+
"loss": 0.41,
|
1192 |
+
"step": 4225
|
1193 |
+
},
|
1194 |
+
{
|
1195 |
+
"epoch": 0.36818851251840945,
|
1196 |
+
"grad_norm": 7.082727432250977,
|
1197 |
+
"learning_rate": 2.1070465922217945e-05,
|
1198 |
+
"loss": 0.4003,
|
1199 |
+
"step": 4250
|
1200 |
+
},
|
1201 |
+
{
|
1202 |
+
"epoch": 0.3703543272979295,
|
1203 |
+
"grad_norm": 4.621776103973389,
|
1204 |
+
"learning_rate": 2.099826723142087e-05,
|
1205 |
+
"loss": 0.306,
|
1206 |
+
"step": 4275
|
1207 |
+
},
|
1208 |
+
{
|
1209 |
+
"epoch": 0.37252014207744955,
|
1210 |
+
"grad_norm": 3.1071817874908447,
|
1211 |
+
"learning_rate": 2.09260685406238e-05,
|
1212 |
+
"loss": 0.2335,
|
1213 |
+
"step": 4300
|
1214 |
+
},
|
1215 |
+
{
|
1216 |
+
"epoch": 0.3746859568569696,
|
1217 |
+
"grad_norm": 7.23638916015625,
|
1218 |
+
"learning_rate": 2.085386984982672e-05,
|
1219 |
+
"loss": 0.2733,
|
1220 |
+
"step": 4325
|
1221 |
+
},
|
1222 |
+
{
|
1223 |
+
"epoch": 0.37685177163648964,
|
1224 |
+
"grad_norm": 6.893523693084717,
|
1225 |
+
"learning_rate": 2.078167115902965e-05,
|
1226 |
+
"loss": 0.3007,
|
1227 |
+
"step": 4350
|
1228 |
+
},
|
1229 |
+
{
|
1230 |
+
"epoch": 0.3790175864160097,
|
1231 |
+
"grad_norm": 5.9917073249816895,
|
1232 |
+
"learning_rate": 2.0709472468232575e-05,
|
1233 |
+
"loss": 0.3086,
|
1234 |
+
"step": 4375
|
1235 |
+
},
|
1236 |
+
{
|
1237 |
+
"epoch": 0.38118340119552974,
|
1238 |
+
"grad_norm": 6.596795558929443,
|
1239 |
+
"learning_rate": 2.0637273777435502e-05,
|
1240 |
+
"loss": 0.365,
|
1241 |
+
"step": 4400
|
1242 |
+
},
|
1243 |
+
{
|
1244 |
+
"epoch": 0.38334921597504984,
|
1245 |
+
"grad_norm": 9.045963287353516,
|
1246 |
+
"learning_rate": 2.0565075086638432e-05,
|
1247 |
+
"loss": 0.3255,
|
1248 |
+
"step": 4425
|
1249 |
+
},
|
1250 |
+
{
|
1251 |
+
"epoch": 0.3855150307545699,
|
1252 |
+
"grad_norm": 6.755446434020996,
|
1253 |
+
"learning_rate": 2.0492876395841355e-05,
|
1254 |
+
"loss": 0.3765,
|
1255 |
+
"step": 4450
|
1256 |
+
},
|
1257 |
+
{
|
1258 |
+
"epoch": 0.38768084553408994,
|
1259 |
+
"grad_norm": 11.626537322998047,
|
1260 |
+
"learning_rate": 2.0420677705044282e-05,
|
1261 |
+
"loss": 0.2946,
|
1262 |
+
"step": 4475
|
1263 |
+
},
|
1264 |
+
{
|
1265 |
+
"epoch": 0.38984666031361,
|
1266 |
+
"grad_norm": 4.125662326812744,
|
1267 |
+
"learning_rate": 2.034847901424721e-05,
|
1268 |
+
"loss": 0.3298,
|
1269 |
+
"step": 4500
|
1270 |
+
},
|
1271 |
+
{
|
1272 |
+
"epoch": 0.39201247509313003,
|
1273 |
+
"grad_norm": 1.2437127828598022,
|
1274 |
+
"learning_rate": 2.0276280323450135e-05,
|
1275 |
+
"loss": 0.3645,
|
1276 |
+
"step": 4525
|
1277 |
+
},
|
1278 |
+
{
|
1279 |
+
"epoch": 0.3941782898726501,
|
1280 |
+
"grad_norm": 10.272943496704102,
|
1281 |
+
"learning_rate": 2.0204081632653062e-05,
|
1282 |
+
"loss": 0.2403,
|
1283 |
+
"step": 4550
|
1284 |
+
},
|
1285 |
+
{
|
1286 |
+
"epoch": 0.3963441046521701,
|
1287 |
+
"grad_norm": 2.164606809616089,
|
1288 |
+
"learning_rate": 2.0131882941855985e-05,
|
1289 |
+
"loss": 0.28,
|
1290 |
+
"step": 4575
|
1291 |
+
},
|
1292 |
+
{
|
1293 |
+
"epoch": 0.39850991943169023,
|
1294 |
+
"grad_norm": 9.157061576843262,
|
1295 |
+
"learning_rate": 2.0059684251058916e-05,
|
1296 |
+
"loss": 0.3814,
|
1297 |
+
"step": 4600
|
1298 |
+
},
|
1299 |
+
{
|
1300 |
+
"epoch": 0.4006757342112103,
|
1301 |
+
"grad_norm": 4.034579277038574,
|
1302 |
+
"learning_rate": 1.9987485560261842e-05,
|
1303 |
+
"loss": 0.3419,
|
1304 |
+
"step": 4625
|
1305 |
+
},
|
1306 |
+
{
|
1307 |
+
"epoch": 0.4028415489907303,
|
1308 |
+
"grad_norm": 2.5503344535827637,
|
1309 |
+
"learning_rate": 1.991528686946477e-05,
|
1310 |
+
"loss": 0.3374,
|
1311 |
+
"step": 4650
|
1312 |
+
},
|
1313 |
+
{
|
1314 |
+
"epoch": 0.4050073637702504,
|
1315 |
+
"grad_norm": 4.660188674926758,
|
1316 |
+
"learning_rate": 1.9843088178667696e-05,
|
1317 |
+
"loss": 0.3511,
|
1318 |
+
"step": 4675
|
1319 |
+
},
|
1320 |
+
{
|
1321 |
+
"epoch": 0.4071731785497704,
|
1322 |
+
"grad_norm": 7.020951747894287,
|
1323 |
+
"learning_rate": 1.977088948787062e-05,
|
1324 |
+
"loss": 0.4339,
|
1325 |
+
"step": 4700
|
1326 |
+
},
|
1327 |
+
{
|
1328 |
+
"epoch": 0.40933899332929047,
|
1329 |
+
"grad_norm": 5.4507269859313965,
|
1330 |
+
"learning_rate": 1.9698690797073546e-05,
|
1331 |
+
"loss": 0.3441,
|
1332 |
+
"step": 4725
|
1333 |
+
},
|
1334 |
+
{
|
1335 |
+
"epoch": 0.4115048081088105,
|
1336 |
+
"grad_norm": 11.266243934631348,
|
1337 |
+
"learning_rate": 1.9626492106276472e-05,
|
1338 |
+
"loss": 0.346,
|
1339 |
+
"step": 4750
|
1340 |
+
},
|
1341 |
+
{
|
1342 |
+
"epoch": 0.41367062288833056,
|
1343 |
+
"grad_norm": 2.1191511154174805,
|
1344 |
+
"learning_rate": 1.95542934154794e-05,
|
1345 |
+
"loss": 0.3723,
|
1346 |
+
"step": 4775
|
1347 |
+
},
|
1348 |
+
{
|
1349 |
+
"epoch": 0.41583643766785067,
|
1350 |
+
"grad_norm": 1.9068052768707275,
|
1351 |
+
"learning_rate": 1.948209472468233e-05,
|
1352 |
+
"loss": 0.2075,
|
1353 |
+
"step": 4800
|
1354 |
+
},
|
1355 |
+
{
|
1356 |
+
"epoch": 0.4180022524473707,
|
1357 |
+
"grad_norm": 0.36394304037094116,
|
1358 |
+
"learning_rate": 1.9409896033885253e-05,
|
1359 |
+
"loss": 0.2431,
|
1360 |
+
"step": 4825
|
1361 |
+
},
|
1362 |
+
{
|
1363 |
+
"epoch": 0.42016806722689076,
|
1364 |
+
"grad_norm": 6.177628993988037,
|
1365 |
+
"learning_rate": 1.933769734308818e-05,
|
1366 |
+
"loss": 0.2642,
|
1367 |
+
"step": 4850
|
1368 |
+
},
|
1369 |
+
{
|
1370 |
+
"epoch": 0.4223338820064108,
|
1371 |
+
"grad_norm": 3.6669273376464844,
|
1372 |
+
"learning_rate": 1.9265498652291106e-05,
|
1373 |
+
"loss": 0.1763,
|
1374 |
+
"step": 4875
|
1375 |
+
},
|
1376 |
+
{
|
1377 |
+
"epoch": 0.42449969678593086,
|
1378 |
+
"grad_norm": 7.836557865142822,
|
1379 |
+
"learning_rate": 1.9193299961494033e-05,
|
1380 |
+
"loss": 0.3862,
|
1381 |
+
"step": 4900
|
1382 |
+
},
|
1383 |
+
{
|
1384 |
+
"epoch": 0.4266655115654509,
|
1385 |
+
"grad_norm": 10.140486717224121,
|
1386 |
+
"learning_rate": 1.912110127069696e-05,
|
1387 |
+
"loss": 0.3053,
|
1388 |
+
"step": 4925
|
1389 |
+
},
|
1390 |
+
{
|
1391 |
+
"epoch": 0.42883132634497095,
|
1392 |
+
"grad_norm": 2.8873586654663086,
|
1393 |
+
"learning_rate": 1.9048902579899883e-05,
|
1394 |
+
"loss": 0.3162,
|
1395 |
+
"step": 4950
|
1396 |
+
},
|
1397 |
+
{
|
1398 |
+
"epoch": 0.43099714112449106,
|
1399 |
+
"grad_norm": 1.758466362953186,
|
1400 |
+
"learning_rate": 1.897670388910281e-05,
|
1401 |
+
"loss": 0.3178,
|
1402 |
+
"step": 4975
|
1403 |
+
},
|
1404 |
+
{
|
1405 |
+
"epoch": 0.4331629559040111,
|
1406 |
+
"grad_norm": 7.523651599884033,
|
1407 |
+
"learning_rate": 1.890450519830574e-05,
|
1408 |
+
"loss": 0.2789,
|
1409 |
+
"step": 5000
|
1410 |
+
},
|
1411 |
+
{
|
1412 |
+
"epoch": 0.43532877068353115,
|
1413 |
+
"grad_norm": 5.955496311187744,
|
1414 |
+
"learning_rate": 1.8832306507508666e-05,
|
1415 |
+
"loss": 0.1777,
|
1416 |
+
"step": 5025
|
1417 |
+
},
|
1418 |
+
{
|
1419 |
+
"epoch": 0.4374945854630512,
|
1420 |
+
"grad_norm": 9.068547248840332,
|
1421 |
+
"learning_rate": 1.8760107816711593e-05,
|
1422 |
+
"loss": 0.4155,
|
1423 |
+
"step": 5050
|
1424 |
+
},
|
1425 |
+
{
|
1426 |
+
"epoch": 0.43966040024257125,
|
1427 |
+
"grad_norm": 4.900373458862305,
|
1428 |
+
"learning_rate": 1.8687909125914516e-05,
|
1429 |
+
"loss": 0.2983,
|
1430 |
+
"step": 5075
|
1431 |
+
},
|
1432 |
+
{
|
1433 |
+
"epoch": 0.4418262150220913,
|
1434 |
+
"grad_norm": 3.5501790046691895,
|
1435 |
+
"learning_rate": 1.8615710435117443e-05,
|
1436 |
+
"loss": 0.3687,
|
1437 |
+
"step": 5100
|
1438 |
+
},
|
1439 |
+
{
|
1440 |
+
"epoch": 0.44399202980161134,
|
1441 |
+
"grad_norm": 1.0216822624206543,
|
1442 |
+
"learning_rate": 1.854351174432037e-05,
|
1443 |
+
"loss": 0.2428,
|
1444 |
+
"step": 5125
|
1445 |
+
},
|
1446 |
+
{
|
1447 |
+
"epoch": 0.44615784458113145,
|
1448 |
+
"grad_norm": 7.637403964996338,
|
1449 |
+
"learning_rate": 1.8471313053523296e-05,
|
1450 |
+
"loss": 0.3071,
|
1451 |
+
"step": 5150
|
1452 |
+
},
|
1453 |
+
{
|
1454 |
+
"epoch": 0.4483236593606515,
|
1455 |
+
"grad_norm": 9.478981018066406,
|
1456 |
+
"learning_rate": 1.8399114362726223e-05,
|
1457 |
+
"loss": 0.2911,
|
1458 |
+
"step": 5175
|
1459 |
+
},
|
1460 |
+
{
|
1461 |
+
"epoch": 0.45048947414017154,
|
1462 |
+
"grad_norm": 3.875411033630371,
|
1463 |
+
"learning_rate": 1.832691567192915e-05,
|
1464 |
+
"loss": 0.3152,
|
1465 |
+
"step": 5200
|
1466 |
+
},
|
1467 |
+
{
|
1468 |
+
"epoch": 0.4526552889196916,
|
1469 |
+
"grad_norm": 1.1700037717819214,
|
1470 |
+
"learning_rate": 1.8254716981132077e-05,
|
1471 |
+
"loss": 0.2776,
|
1472 |
+
"step": 5225
|
1473 |
+
},
|
1474 |
+
{
|
1475 |
+
"epoch": 0.45482110369921164,
|
1476 |
+
"grad_norm": 4.037864685058594,
|
1477 |
+
"learning_rate": 1.8182518290335003e-05,
|
1478 |
+
"loss": 0.2674,
|
1479 |
+
"step": 5250
|
1480 |
+
},
|
1481 |
+
{
|
1482 |
+
"epoch": 0.4569869184787317,
|
1483 |
+
"grad_norm": 2.6295673847198486,
|
1484 |
+
"learning_rate": 1.811031959953793e-05,
|
1485 |
+
"loss": 0.3035,
|
1486 |
+
"step": 5275
|
1487 |
+
},
|
1488 |
+
{
|
1489 |
+
"epoch": 0.45915273325825173,
|
1490 |
+
"grad_norm": 9.654006004333496,
|
1491 |
+
"learning_rate": 1.8038120908740853e-05,
|
1492 |
+
"loss": 0.3352,
|
1493 |
+
"step": 5300
|
1494 |
+
},
|
1495 |
+
{
|
1496 |
+
"epoch": 0.46131854803777184,
|
1497 |
+
"grad_norm": 7.339272975921631,
|
1498 |
+
"learning_rate": 1.796592221794378e-05,
|
1499 |
+
"loss": 0.3879,
|
1500 |
+
"step": 5325
|
1501 |
+
},
|
1502 |
+
{
|
1503 |
+
"epoch": 0.4634843628172919,
|
1504 |
+
"grad_norm": 5.668703079223633,
|
1505 |
+
"learning_rate": 1.7893723527146707e-05,
|
1506 |
+
"loss": 0.3828,
|
1507 |
+
"step": 5350
|
1508 |
+
},
|
1509 |
+
{
|
1510 |
+
"epoch": 0.46565017759681193,
|
1511 |
+
"grad_norm": 11.843222618103027,
|
1512 |
+
"learning_rate": 1.7821524836349633e-05,
|
1513 |
+
"loss": 0.2797,
|
1514 |
+
"step": 5375
|
1515 |
+
},
|
1516 |
+
{
|
1517 |
+
"epoch": 0.467815992376332,
|
1518 |
+
"grad_norm": 3.3071844577789307,
|
1519 |
+
"learning_rate": 1.7749326145552564e-05,
|
1520 |
+
"loss": 0.3492,
|
1521 |
+
"step": 5400
|
1522 |
+
},
|
1523 |
+
{
|
1524 |
+
"epoch": 0.469981807155852,
|
1525 |
+
"grad_norm": 11.303645133972168,
|
1526 |
+
"learning_rate": 1.7677127454755487e-05,
|
1527 |
+
"loss": 0.5,
|
1528 |
+
"step": 5425
|
1529 |
+
},
|
1530 |
+
{
|
1531 |
+
"epoch": 0.4721476219353721,
|
1532 |
+
"grad_norm": 1.1275362968444824,
|
1533 |
+
"learning_rate": 1.7604928763958414e-05,
|
1534 |
+
"loss": 0.2317,
|
1535 |
+
"step": 5450
|
1536 |
+
},
|
1537 |
+
{
|
1538 |
+
"epoch": 0.4743134367148921,
|
1539 |
+
"grad_norm": 11.97022533416748,
|
1540 |
+
"learning_rate": 1.753273007316134e-05,
|
1541 |
+
"loss": 0.2411,
|
1542 |
+
"step": 5475
|
1543 |
+
},
|
1544 |
+
{
|
1545 |
+
"epoch": 0.47647925149441217,
|
1546 |
+
"grad_norm": 2.9647443294525146,
|
1547 |
+
"learning_rate": 1.7460531382364267e-05,
|
1548 |
+
"loss": 0.277,
|
1549 |
+
"step": 5500
|
1550 |
+
},
|
1551 |
+
{
|
1552 |
+
"epoch": 0.4786450662739323,
|
1553 |
+
"grad_norm": 5.046292781829834,
|
1554 |
+
"learning_rate": 1.7388332691567194e-05,
|
1555 |
+
"loss": 0.4112,
|
1556 |
+
"step": 5525
|
1557 |
+
},
|
1558 |
+
{
|
1559 |
+
"epoch": 0.4808108810534523,
|
1560 |
+
"grad_norm": 8.11351203918457,
|
1561 |
+
"learning_rate": 1.7316134000770117e-05,
|
1562 |
+
"loss": 0.5116,
|
1563 |
+
"step": 5550
|
1564 |
+
},
|
1565 |
+
{
|
1566 |
+
"epoch": 0.48297669583297237,
|
1567 |
+
"grad_norm": 1.0861672163009644,
|
1568 |
+
"learning_rate": 1.7243935309973047e-05,
|
1569 |
+
"loss": 0.3264,
|
1570 |
+
"step": 5575
|
1571 |
+
},
|
1572 |
+
{
|
1573 |
+
"epoch": 0.4851425106124924,
|
1574 |
+
"grad_norm": 2.311553955078125,
|
1575 |
+
"learning_rate": 1.7171736619175974e-05,
|
1576 |
+
"loss": 0.3688,
|
1577 |
+
"step": 5600
|
1578 |
+
},
|
1579 |
+
{
|
1580 |
+
"epoch": 0.48730832539201246,
|
1581 |
+
"grad_norm": 2.371721029281616,
|
1582 |
+
"learning_rate": 1.70995379283789e-05,
|
1583 |
+
"loss": 0.3224,
|
1584 |
+
"step": 5625
|
1585 |
+
},
|
1586 |
+
{
|
1587 |
+
"epoch": 0.4894741401715325,
|
1588 |
+
"grad_norm": 7.7612714767456055,
|
1589 |
+
"learning_rate": 1.7027339237581827e-05,
|
1590 |
+
"loss": 0.3778,
|
1591 |
+
"step": 5650
|
1592 |
+
},
|
1593 |
+
{
|
1594 |
+
"epoch": 0.49163995495105256,
|
1595 |
+
"grad_norm": 7.416019916534424,
|
1596 |
+
"learning_rate": 1.695514054678475e-05,
|
1597 |
+
"loss": 0.3671,
|
1598 |
+
"step": 5675
|
1599 |
+
},
|
1600 |
+
{
|
1601 |
+
"epoch": 0.49380576973057266,
|
1602 |
+
"grad_norm": 7.0320940017700195,
|
1603 |
+
"learning_rate": 1.6882941855987677e-05,
|
1604 |
+
"loss": 0.3331,
|
1605 |
+
"step": 5700
|
1606 |
+
},
|
1607 |
+
{
|
1608 |
+
"epoch": 0.4959715845100927,
|
1609 |
+
"grad_norm": 0.8671308159828186,
|
1610 |
+
"learning_rate": 1.6810743165190604e-05,
|
1611 |
+
"loss": 0.3426,
|
1612 |
+
"step": 5725
|
1613 |
+
},
|
1614 |
+
{
|
1615 |
+
"epoch": 0.49813739928961276,
|
1616 |
+
"grad_norm": 6.607793807983398,
|
1617 |
+
"learning_rate": 1.673854447439353e-05,
|
1618 |
+
"loss": 0.2863,
|
1619 |
+
"step": 5750
|
1620 |
+
},
|
1621 |
+
{
|
1622 |
+
"epoch": 0.5003032140691328,
|
1623 |
+
"grad_norm": 10.399803161621094,
|
1624 |
+
"learning_rate": 1.666634578359646e-05,
|
1625 |
+
"loss": 0.5822,
|
1626 |
+
"step": 5775
|
1627 |
+
},
|
1628 |
+
{
|
1629 |
+
"epoch": 0.5024690288486529,
|
1630 |
+
"grad_norm": 2.4261348247528076,
|
1631 |
+
"learning_rate": 1.6594147092799384e-05,
|
1632 |
+
"loss": 0.2687,
|
1633 |
+
"step": 5800
|
1634 |
+
},
|
1635 |
+
{
|
1636 |
+
"epoch": 0.5046348436281729,
|
1637 |
+
"grad_norm": 0.30012401938438416,
|
1638 |
+
"learning_rate": 1.652194840200231e-05,
|
1639 |
+
"loss": 0.3365,
|
1640 |
+
"step": 5825
|
1641 |
+
},
|
1642 |
+
{
|
1643 |
+
"epoch": 0.506800658407693,
|
1644 |
+
"grad_norm": 8.255668640136719,
|
1645 |
+
"learning_rate": 1.6449749711205238e-05,
|
1646 |
+
"loss": 0.4609,
|
1647 |
+
"step": 5850
|
1648 |
+
},
|
1649 |
+
{
|
1650 |
+
"epoch": 0.508966473187213,
|
1651 |
+
"grad_norm": 6.495670795440674,
|
1652 |
+
"learning_rate": 1.6377551020408164e-05,
|
1653 |
+
"loss": 0.3127,
|
1654 |
+
"step": 5875
|
1655 |
+
},
|
1656 |
+
{
|
1657 |
+
"epoch": 0.511132287966733,
|
1658 |
+
"grad_norm": 4.311783790588379,
|
1659 |
+
"learning_rate": 1.630535232961109e-05,
|
1660 |
+
"loss": 0.2705,
|
1661 |
+
"step": 5900
|
1662 |
+
},
|
1663 |
+
{
|
1664 |
+
"epoch": 0.5132981027462531,
|
1665 |
+
"grad_norm": 7.5022430419921875,
|
1666 |
+
"learning_rate": 1.6233153638814014e-05,
|
1667 |
+
"loss": 0.3089,
|
1668 |
+
"step": 5925
|
1669 |
+
},
|
1670 |
+
{
|
1671 |
+
"epoch": 0.5154639175257731,
|
1672 |
+
"grad_norm": 9.813260078430176,
|
1673 |
+
"learning_rate": 1.616095494801694e-05,
|
1674 |
+
"loss": 0.3386,
|
1675 |
+
"step": 5950
|
1676 |
+
},
|
1677 |
+
{
|
1678 |
+
"epoch": 0.5176297323052933,
|
1679 |
+
"grad_norm": 8.11892318725586,
|
1680 |
+
"learning_rate": 1.608875625721987e-05,
|
1681 |
+
"loss": 0.3796,
|
1682 |
+
"step": 5975
|
1683 |
+
},
|
1684 |
+
{
|
1685 |
+
"epoch": 0.5197955470848133,
|
1686 |
+
"grad_norm": 8.750290870666504,
|
1687 |
+
"learning_rate": 1.6016557566422798e-05,
|
1688 |
+
"loss": 0.4231,
|
1689 |
+
"step": 6000
|
1690 |
+
},
|
1691 |
+
{
|
1692 |
+
"epoch": 0.5219613618643334,
|
1693 |
+
"grad_norm": 8.316088676452637,
|
1694 |
+
"learning_rate": 1.5944358875625725e-05,
|
1695 |
+
"loss": 0.3922,
|
1696 |
+
"step": 6025
|
1697 |
+
},
|
1698 |
+
{
|
1699 |
+
"epoch": 0.5241271766438534,
|
1700 |
+
"grad_norm": 4.458547592163086,
|
1701 |
+
"learning_rate": 1.5872160184828648e-05,
|
1702 |
+
"loss": 0.3138,
|
1703 |
+
"step": 6050
|
1704 |
+
},
|
1705 |
+
{
|
1706 |
+
"epoch": 0.5262929914233735,
|
1707 |
+
"grad_norm": 4.100847244262695,
|
1708 |
+
"learning_rate": 1.5799961494031575e-05,
|
1709 |
+
"loss": 0.3106,
|
1710 |
+
"step": 6075
|
1711 |
+
},
|
1712 |
+
{
|
1713 |
+
"epoch": 0.5284588062028935,
|
1714 |
+
"grad_norm": 3.5927000045776367,
|
1715 |
+
"learning_rate": 1.57277628032345e-05,
|
1716 |
+
"loss": 0.188,
|
1717 |
+
"step": 6100
|
1718 |
+
},
|
1719 |
+
{
|
1720 |
+
"epoch": 0.5306246209824136,
|
1721 |
+
"grad_norm": 0.6444216370582581,
|
1722 |
+
"learning_rate": 1.5655564112437428e-05,
|
1723 |
+
"loss": 0.209,
|
1724 |
+
"step": 6125
|
1725 |
+
},
|
1726 |
+
{
|
1727 |
+
"epoch": 0.5327904357619336,
|
1728 |
+
"grad_norm": 6.649785041809082,
|
1729 |
+
"learning_rate": 1.5583365421640355e-05,
|
1730 |
+
"loss": 0.2617,
|
1731 |
+
"step": 6150
|
1732 |
+
},
|
1733 |
+
{
|
1734 |
+
"epoch": 0.5349562505414537,
|
1735 |
+
"grad_norm": 8.491826057434082,
|
1736 |
+
"learning_rate": 1.551116673084328e-05,
|
1737 |
+
"loss": 0.3059,
|
1738 |
+
"step": 6175
|
1739 |
+
},
|
1740 |
+
{
|
1741 |
+
"epoch": 0.5371220653209737,
|
1742 |
+
"grad_norm": 22.71511459350586,
|
1743 |
+
"learning_rate": 1.5438968040046208e-05,
|
1744 |
+
"loss": 0.2764,
|
1745 |
+
"step": 6200
|
1746 |
+
},
|
1747 |
+
{
|
1748 |
+
"epoch": 0.5392878801004938,
|
1749 |
+
"grad_norm": 6.877171516418457,
|
1750 |
+
"learning_rate": 1.5366769349249135e-05,
|
1751 |
+
"loss": 0.2801,
|
1752 |
+
"step": 6225
|
1753 |
+
},
|
1754 |
+
{
|
1755 |
+
"epoch": 0.5414536948800138,
|
1756 |
+
"grad_norm": 0.46479833126068115,
|
1757 |
+
"learning_rate": 1.529457065845206e-05,
|
1758 |
+
"loss": 0.3744,
|
1759 |
+
"step": 6250
|
1760 |
+
},
|
1761 |
+
{
|
1762 |
+
"epoch": 0.5436195096595339,
|
1763 |
+
"grad_norm": 7.200215816497803,
|
1764 |
+
"learning_rate": 1.5222371967654987e-05,
|
1765 |
+
"loss": 0.3067,
|
1766 |
+
"step": 6275
|
1767 |
+
},
|
1768 |
+
{
|
1769 |
+
"epoch": 0.5457853244390539,
|
1770 |
+
"grad_norm": 6.230359077453613,
|
1771 |
+
"learning_rate": 1.5150173276857913e-05,
|
1772 |
+
"loss": 0.3305,
|
1773 |
+
"step": 6300
|
1774 |
+
},
|
1775 |
+
{
|
1776 |
+
"epoch": 0.5479511392185741,
|
1777 |
+
"grad_norm": 3.2241950035095215,
|
1778 |
+
"learning_rate": 1.5077974586060838e-05,
|
1779 |
+
"loss": 0.2827,
|
1780 |
+
"step": 6325
|
1781 |
+
},
|
1782 |
+
{
|
1783 |
+
"epoch": 0.5501169539980941,
|
1784 |
+
"grad_norm": 10.813590049743652,
|
1785 |
+
"learning_rate": 1.5005775895263765e-05,
|
1786 |
+
"loss": 0.2712,
|
1787 |
+
"step": 6350
|
1788 |
+
},
|
1789 |
+
{
|
1790 |
+
"epoch": 0.5522827687776142,
|
1791 |
+
"grad_norm": 3.5207877159118652,
|
1792 |
+
"learning_rate": 1.4933577204466692e-05,
|
1793 |
+
"loss": 0.2677,
|
1794 |
+
"step": 6375
|
1795 |
+
},
|
1796 |
+
{
|
1797 |
+
"epoch": 0.5544485835571342,
|
1798 |
+
"grad_norm": 6.884098529815674,
|
1799 |
+
"learning_rate": 1.4861378513669619e-05,
|
1800 |
+
"loss": 0.4269,
|
1801 |
+
"step": 6400
|
1802 |
+
},
|
1803 |
+
{
|
1804 |
+
"epoch": 0.5566143983366543,
|
1805 |
+
"grad_norm": 12.490416526794434,
|
1806 |
+
"learning_rate": 1.4789179822872547e-05,
|
1807 |
+
"loss": 0.3834,
|
1808 |
+
"step": 6425
|
1809 |
+
},
|
1810 |
+
{
|
1811 |
+
"epoch": 0.5587802131161743,
|
1812 |
+
"grad_norm": 6.844019889831543,
|
1813 |
+
"learning_rate": 1.4716981132075472e-05,
|
1814 |
+
"loss": 0.4177,
|
1815 |
+
"step": 6450
|
1816 |
+
},
|
1817 |
+
{
|
1818 |
+
"epoch": 0.5609460278956944,
|
1819 |
+
"grad_norm": 2.4574711322784424,
|
1820 |
+
"learning_rate": 1.4644782441278399e-05,
|
1821 |
+
"loss": 0.2457,
|
1822 |
+
"step": 6475
|
1823 |
+
},
|
1824 |
+
{
|
1825 |
+
"epoch": 0.5631118426752144,
|
1826 |
+
"grad_norm": 4.939560413360596,
|
1827 |
+
"learning_rate": 1.4572583750481324e-05,
|
1828 |
+
"loss": 0.348,
|
1829 |
+
"step": 6500
|
1830 |
+
},
|
1831 |
+
{
|
1832 |
+
"epoch": 0.5652776574547345,
|
1833 |
+
"grad_norm": 11.443745613098145,
|
1834 |
+
"learning_rate": 1.4500385059684252e-05,
|
1835 |
+
"loss": 0.3035,
|
1836 |
+
"step": 6525
|
1837 |
+
},
|
1838 |
+
{
|
1839 |
+
"epoch": 0.5674434722342545,
|
1840 |
+
"grad_norm": 5.136826515197754,
|
1841 |
+
"learning_rate": 1.4428186368887177e-05,
|
1842 |
+
"loss": 0.39,
|
1843 |
+
"step": 6550
|
1844 |
+
},
|
1845 |
+
{
|
1846 |
+
"epoch": 0.5696092870137746,
|
1847 |
+
"grad_norm": 8.772330284118652,
|
1848 |
+
"learning_rate": 1.4355987678090104e-05,
|
1849 |
+
"loss": 0.366,
|
1850 |
+
"step": 6575
|
1851 |
+
},
|
1852 |
+
{
|
1853 |
+
"epoch": 0.5717751017932946,
|
1854 |
+
"grad_norm": 0.46080633997917175,
|
1855 |
+
"learning_rate": 1.428378898729303e-05,
|
1856 |
+
"loss": 0.2299,
|
1857 |
+
"step": 6600
|
1858 |
+
},
|
1859 |
+
{
|
1860 |
+
"epoch": 0.5739409165728147,
|
1861 |
+
"grad_norm": 5.478773593902588,
|
1862 |
+
"learning_rate": 1.4211590296495957e-05,
|
1863 |
+
"loss": 0.1737,
|
1864 |
+
"step": 6625
|
1865 |
+
},
|
1866 |
+
{
|
1867 |
+
"epoch": 0.5761067313523347,
|
1868 |
+
"grad_norm": 11.235420227050781,
|
1869 |
+
"learning_rate": 1.4139391605698884e-05,
|
1870 |
+
"loss": 0.3773,
|
1871 |
+
"step": 6650
|
1872 |
+
},
|
1873 |
+
{
|
1874 |
+
"epoch": 0.5782725461318549,
|
1875 |
+
"grad_norm": 7.810971260070801,
|
1876 |
+
"learning_rate": 1.4067192914901809e-05,
|
1877 |
+
"loss": 0.3409,
|
1878 |
+
"step": 6675
|
1879 |
+
},
|
1880 |
+
{
|
1881 |
+
"epoch": 0.5804383609113749,
|
1882 |
+
"grad_norm": 2.817094087600708,
|
1883 |
+
"learning_rate": 1.3994994224104737e-05,
|
1884 |
+
"loss": 0.1739,
|
1885 |
+
"step": 6700
|
1886 |
+
},
|
1887 |
+
{
|
1888 |
+
"epoch": 0.582604175690895,
|
1889 |
+
"grad_norm": 0.4941748082637787,
|
1890 |
+
"learning_rate": 1.3922795533307664e-05,
|
1891 |
+
"loss": 0.3462,
|
1892 |
+
"step": 6725
|
1893 |
+
},
|
1894 |
+
{
|
1895 |
+
"epoch": 0.584769990470415,
|
1896 |
+
"grad_norm": 1.5013363361358643,
|
1897 |
+
"learning_rate": 1.3850596842510589e-05,
|
1898 |
+
"loss": 0.2976,
|
1899 |
+
"step": 6750
|
1900 |
+
},
|
1901 |
+
{
|
1902 |
+
"epoch": 0.586935805249935,
|
1903 |
+
"grad_norm": 4.63820219039917,
|
1904 |
+
"learning_rate": 1.3778398151713516e-05,
|
1905 |
+
"loss": 0.3246,
|
1906 |
+
"step": 6775
|
1907 |
+
},
|
1908 |
+
{
|
1909 |
+
"epoch": 0.5891016200294551,
|
1910 |
+
"grad_norm": 0.6134036779403687,
|
1911 |
+
"learning_rate": 1.3706199460916443e-05,
|
1912 |
+
"loss": 0.3808,
|
1913 |
+
"step": 6800
|
1914 |
+
},
|
1915 |
+
{
|
1916 |
+
"epoch": 0.5912674348089751,
|
1917 |
+
"grad_norm": 9.693577766418457,
|
1918 |
+
"learning_rate": 1.363400077011937e-05,
|
1919 |
+
"loss": 0.2926,
|
1920 |
+
"step": 6825
|
1921 |
+
},
|
1922 |
+
{
|
1923 |
+
"epoch": 0.5934332495884952,
|
1924 |
+
"grad_norm": 8.138602256774902,
|
1925 |
+
"learning_rate": 1.3561802079322296e-05,
|
1926 |
+
"loss": 0.2709,
|
1927 |
+
"step": 6850
|
1928 |
+
},
|
1929 |
+
{
|
1930 |
+
"epoch": 0.5955990643680152,
|
1931 |
+
"grad_norm": 5.065515041351318,
|
1932 |
+
"learning_rate": 1.3489603388525221e-05,
|
1933 |
+
"loss": 0.3777,
|
1934 |
+
"step": 6875
|
1935 |
+
},
|
1936 |
+
{
|
1937 |
+
"epoch": 0.5977648791475353,
|
1938 |
+
"grad_norm": 6.169302463531494,
|
1939 |
+
"learning_rate": 1.341740469772815e-05,
|
1940 |
+
"loss": 0.2834,
|
1941 |
+
"step": 6900
|
1942 |
+
},
|
1943 |
+
{
|
1944 |
+
"epoch": 0.5999306939270553,
|
1945 |
+
"grad_norm": 1.4236884117126465,
|
1946 |
+
"learning_rate": 1.3345206006931074e-05,
|
1947 |
+
"loss": 0.2965,
|
1948 |
+
"step": 6925
|
1949 |
+
},
|
1950 |
+
{
|
1951 |
+
"epoch": 0.6020965087065754,
|
1952 |
+
"grad_norm": 4.954479217529297,
|
1953 |
+
"learning_rate": 1.3273007316134001e-05,
|
1954 |
+
"loss": 0.2399,
|
1955 |
+
"step": 6950
|
1956 |
+
},
|
1957 |
+
{
|
1958 |
+
"epoch": 0.6042623234860954,
|
1959 |
+
"grad_norm": 1.1738444566726685,
|
1960 |
+
"learning_rate": 1.3200808625336928e-05,
|
1961 |
+
"loss": 0.2936,
|
1962 |
+
"step": 6975
|
1963 |
+
},
|
1964 |
+
{
|
1965 |
+
"epoch": 0.6064281382656155,
|
1966 |
+
"grad_norm": 6.822793006896973,
|
1967 |
+
"learning_rate": 1.3128609934539855e-05,
|
1968 |
+
"loss": 0.2674,
|
1969 |
+
"step": 7000
|
1970 |
+
},
|
1971 |
+
{
|
1972 |
+
"epoch": 0.6085939530451355,
|
1973 |
+
"grad_norm": 9.408463478088379,
|
1974 |
+
"learning_rate": 1.3056411243742781e-05,
|
1975 |
+
"loss": 0.265,
|
1976 |
+
"step": 7025
|
1977 |
+
},
|
1978 |
+
{
|
1979 |
+
"epoch": 0.6107597678246557,
|
1980 |
+
"grad_norm": 24.97877311706543,
|
1981 |
+
"learning_rate": 1.2984212552945706e-05,
|
1982 |
+
"loss": 0.3257,
|
1983 |
+
"step": 7050
|
1984 |
+
},
|
1985 |
+
{
|
1986 |
+
"epoch": 0.6129255826041757,
|
1987 |
+
"grad_norm": 2.854039192199707,
|
1988 |
+
"learning_rate": 1.2912013862148633e-05,
|
1989 |
+
"loss": 0.3504,
|
1990 |
+
"step": 7075
|
1991 |
+
},
|
1992 |
+
{
|
1993 |
+
"epoch": 0.6150913973836958,
|
1994 |
+
"grad_norm": 0.40900859236717224,
|
1995 |
+
"learning_rate": 1.283981517135156e-05,
|
1996 |
+
"loss": 0.1485,
|
1997 |
+
"step": 7100
|
1998 |
+
},
|
1999 |
+
{
|
2000 |
+
"epoch": 0.6172572121632158,
|
2001 |
+
"grad_norm": 5.776600360870361,
|
2002 |
+
"learning_rate": 1.2767616480554486e-05,
|
2003 |
+
"loss": 0.2598,
|
2004 |
+
"step": 7125
|
2005 |
+
},
|
2006 |
+
{
|
2007 |
+
"epoch": 0.6194230269427359,
|
2008 |
+
"grad_norm": 1.7507195472717285,
|
2009 |
+
"learning_rate": 1.2695417789757413e-05,
|
2010 |
+
"loss": 0.2838,
|
2011 |
+
"step": 7150
|
2012 |
+
},
|
2013 |
+
{
|
2014 |
+
"epoch": 0.6215888417222559,
|
2015 |
+
"grad_norm": 7.723363399505615,
|
2016 |
+
"learning_rate": 1.2623219098960338e-05,
|
2017 |
+
"loss": 0.3391,
|
2018 |
+
"step": 7175
|
2019 |
+
},
|
2020 |
+
{
|
2021 |
+
"epoch": 0.623754656501776,
|
2022 |
+
"grad_norm": 6.485815048217773,
|
2023 |
+
"learning_rate": 1.2551020408163267e-05,
|
2024 |
+
"loss": 0.3568,
|
2025 |
+
"step": 7200
|
2026 |
+
},
|
2027 |
+
{
|
2028 |
+
"epoch": 0.625920471281296,
|
2029 |
+
"grad_norm": 0.392874151468277,
|
2030 |
+
"learning_rate": 1.2481709664998075e-05,
|
2031 |
+
"loss": 0.3001,
|
2032 |
+
"step": 7225
|
2033 |
+
},
|
2034 |
+
{
|
2035 |
+
"epoch": 0.6280862860608161,
|
2036 |
+
"grad_norm": 1.3930811882019043,
|
2037 |
+
"learning_rate": 1.2409510974201001e-05,
|
2038 |
+
"loss": 0.2613,
|
2039 |
+
"step": 7250
|
2040 |
+
},
|
2041 |
+
{
|
2042 |
+
"epoch": 0.6302521008403361,
|
2043 |
+
"grad_norm": 0.3461158275604248,
|
2044 |
+
"learning_rate": 1.2337312283403928e-05,
|
2045 |
+
"loss": 0.3379,
|
2046 |
+
"step": 7275
|
2047 |
+
},
|
2048 |
+
{
|
2049 |
+
"epoch": 0.6324179156198562,
|
2050 |
+
"grad_norm": 3.489888906478882,
|
2051 |
+
"learning_rate": 1.2265113592606855e-05,
|
2052 |
+
"loss": 0.3347,
|
2053 |
+
"step": 7300
|
2054 |
+
},
|
2055 |
+
{
|
2056 |
+
"epoch": 0.6345837303993762,
|
2057 |
+
"grad_norm": 2.3235511779785156,
|
2058 |
+
"learning_rate": 1.219291490180978e-05,
|
2059 |
+
"loss": 0.242,
|
2060 |
+
"step": 7325
|
2061 |
+
},
|
2062 |
+
{
|
2063 |
+
"epoch": 0.6367495451788963,
|
2064 |
+
"grad_norm": 10.576093673706055,
|
2065 |
+
"learning_rate": 1.2120716211012708e-05,
|
2066 |
+
"loss": 0.3076,
|
2067 |
+
"step": 7350
|
2068 |
+
},
|
2069 |
+
{
|
2070 |
+
"epoch": 0.6389153599584163,
|
2071 |
+
"grad_norm": 4.862971305847168,
|
2072 |
+
"learning_rate": 1.2048517520215633e-05,
|
2073 |
+
"loss": 0.3055,
|
2074 |
+
"step": 7375
|
2075 |
+
},
|
2076 |
+
{
|
2077 |
+
"epoch": 0.6410811747379365,
|
2078 |
+
"grad_norm": 4.282524108886719,
|
2079 |
+
"learning_rate": 1.197631882941856e-05,
|
2080 |
+
"loss": 0.4014,
|
2081 |
+
"step": 7400
|
2082 |
+
},
|
2083 |
+
{
|
2084 |
+
"epoch": 0.6432469895174565,
|
2085 |
+
"grad_norm": 1.2869305610656738,
|
2086 |
+
"learning_rate": 1.1904120138621487e-05,
|
2087 |
+
"loss": 0.3723,
|
2088 |
+
"step": 7425
|
2089 |
+
},
|
2090 |
+
{
|
2091 |
+
"epoch": 0.6454128042969766,
|
2092 |
+
"grad_norm": 8.37488842010498,
|
2093 |
+
"learning_rate": 1.1831921447824414e-05,
|
2094 |
+
"loss": 0.3421,
|
2095 |
+
"step": 7450
|
2096 |
+
},
|
2097 |
+
{
|
2098 |
+
"epoch": 0.6475786190764966,
|
2099 |
+
"grad_norm": 8.292667388916016,
|
2100 |
+
"learning_rate": 1.175972275702734e-05,
|
2101 |
+
"loss": 0.4306,
|
2102 |
+
"step": 7475
|
2103 |
+
},
|
2104 |
+
{
|
2105 |
+
"epoch": 0.6497444338560167,
|
2106 |
+
"grad_norm": 7.678843975067139,
|
2107 |
+
"learning_rate": 1.1687524066230265e-05,
|
2108 |
+
"loss": 0.2536,
|
2109 |
+
"step": 7500
|
2110 |
+
},
|
2111 |
+
{
|
2112 |
+
"epoch": 0.6519102486355367,
|
2113 |
+
"grad_norm": 1.5608030557632446,
|
2114 |
+
"learning_rate": 1.1615325375433192e-05,
|
2115 |
+
"loss": 0.264,
|
2116 |
+
"step": 7525
|
2117 |
+
},
|
2118 |
+
{
|
2119 |
+
"epoch": 0.6540760634150568,
|
2120 |
+
"grad_norm": 7.649046897888184,
|
2121 |
+
"learning_rate": 1.1543126684636119e-05,
|
2122 |
+
"loss": 0.1767,
|
2123 |
+
"step": 7550
|
2124 |
+
},
|
2125 |
+
{
|
2126 |
+
"epoch": 0.6562418781945768,
|
2127 |
+
"grad_norm": 4.701557636260986,
|
2128 |
+
"learning_rate": 1.1470927993839045e-05,
|
2129 |
+
"loss": 0.259,
|
2130 |
+
"step": 7575
|
2131 |
+
},
|
2132 |
+
{
|
2133 |
+
"epoch": 0.6584076929740968,
|
2134 |
+
"grad_norm": 14.77114200592041,
|
2135 |
+
"learning_rate": 1.1398729303041972e-05,
|
2136 |
+
"loss": 0.2761,
|
2137 |
+
"step": 7600
|
2138 |
+
},
|
2139 |
+
{
|
2140 |
+
"epoch": 0.6605735077536169,
|
2141 |
+
"grad_norm": 0.08189712464809418,
|
2142 |
+
"learning_rate": 1.1326530612244897e-05,
|
2143 |
+
"loss": 0.2934,
|
2144 |
+
"step": 7625
|
2145 |
+
},
|
2146 |
+
{
|
2147 |
+
"epoch": 0.6627393225331369,
|
2148 |
+
"grad_norm": 8.246410369873047,
|
2149 |
+
"learning_rate": 1.1254331921447826e-05,
|
2150 |
+
"loss": 0.3055,
|
2151 |
+
"step": 7650
|
2152 |
+
},
|
2153 |
+
{
|
2154 |
+
"epoch": 0.664905137312657,
|
2155 |
+
"grad_norm": 2.8091800212860107,
|
2156 |
+
"learning_rate": 1.118213323065075e-05,
|
2157 |
+
"loss": 0.2532,
|
2158 |
+
"step": 7675
|
2159 |
+
},
|
2160 |
+
{
|
2161 |
+
"epoch": 0.667070952092177,
|
2162 |
+
"grad_norm": 8.43855094909668,
|
2163 |
+
"learning_rate": 1.1109934539853677e-05,
|
2164 |
+
"loss": 0.2942,
|
2165 |
+
"step": 7700
|
2166 |
+
},
|
2167 |
+
{
|
2168 |
+
"epoch": 0.6692367668716971,
|
2169 |
+
"grad_norm": 2.259917974472046,
|
2170 |
+
"learning_rate": 1.1037735849056604e-05,
|
2171 |
+
"loss": 0.2048,
|
2172 |
+
"step": 7725
|
2173 |
+
},
|
2174 |
+
{
|
2175 |
+
"epoch": 0.6714025816512171,
|
2176 |
+
"grad_norm": 13.296177864074707,
|
2177 |
+
"learning_rate": 1.096553715825953e-05,
|
2178 |
+
"loss": 0.2884,
|
2179 |
+
"step": 7750
|
2180 |
+
},
|
2181 |
+
{
|
2182 |
+
"epoch": 0.6735683964307373,
|
2183 |
+
"grad_norm": 7.745298862457275,
|
2184 |
+
"learning_rate": 1.0893338467462457e-05,
|
2185 |
+
"loss": 0.3598,
|
2186 |
+
"step": 7775
|
2187 |
+
},
|
2188 |
+
{
|
2189 |
+
"epoch": 0.6757342112102573,
|
2190 |
+
"grad_norm": 1.932173490524292,
|
2191 |
+
"learning_rate": 1.0821139776665382e-05,
|
2192 |
+
"loss": 0.3318,
|
2193 |
+
"step": 7800
|
2194 |
+
},
|
2195 |
+
{
|
2196 |
+
"epoch": 0.6779000259897774,
|
2197 |
+
"grad_norm": 7.833034515380859,
|
2198 |
+
"learning_rate": 1.0748941085868309e-05,
|
2199 |
+
"loss": 0.3058,
|
2200 |
+
"step": 7825
|
2201 |
+
},
|
2202 |
+
{
|
2203 |
+
"epoch": 0.6800658407692974,
|
2204 |
+
"grad_norm": 8.620037078857422,
|
2205 |
+
"learning_rate": 1.0676742395071238e-05,
|
2206 |
+
"loss": 0.3395,
|
2207 |
+
"step": 7850
|
2208 |
+
},
|
2209 |
+
{
|
2210 |
+
"epoch": 0.6822316555488175,
|
2211 |
+
"grad_norm": 8.948209762573242,
|
2212 |
+
"learning_rate": 1.0604543704274163e-05,
|
2213 |
+
"loss": 0.2973,
|
2214 |
+
"step": 7875
|
2215 |
+
},
|
2216 |
+
{
|
2217 |
+
"epoch": 0.6843974703283375,
|
2218 |
+
"grad_norm": 5.001883506774902,
|
2219 |
+
"learning_rate": 1.053234501347709e-05,
|
2220 |
+
"loss": 0.2741,
|
2221 |
+
"step": 7900
|
2222 |
+
},
|
2223 |
+
{
|
2224 |
+
"epoch": 0.6865632851078576,
|
2225 |
+
"grad_norm": 10.376258850097656,
|
2226 |
+
"learning_rate": 1.0460146322680016e-05,
|
2227 |
+
"loss": 0.2493,
|
2228 |
+
"step": 7925
|
2229 |
+
},
|
2230 |
+
{
|
2231 |
+
"epoch": 0.6887290998873776,
|
2232 |
+
"grad_norm": 9.021862030029297,
|
2233 |
+
"learning_rate": 1.0387947631882943e-05,
|
2234 |
+
"loss": 0.2966,
|
2235 |
+
"step": 7950
|
2236 |
+
},
|
2237 |
+
{
|
2238 |
+
"epoch": 0.6908949146668977,
|
2239 |
+
"grad_norm": 12.025108337402344,
|
2240 |
+
"learning_rate": 1.0315748941085868e-05,
|
2241 |
+
"loss": 0.3207,
|
2242 |
+
"step": 7975
|
2243 |
+
},
|
2244 |
+
{
|
2245 |
+
"epoch": 0.6930607294464177,
|
2246 |
+
"grad_norm": 0.8383066058158875,
|
2247 |
+
"learning_rate": 1.0243550250288794e-05,
|
2248 |
+
"loss": 0.2501,
|
2249 |
+
"step": 8000
|
2250 |
+
},
|
2251 |
+
{
|
2252 |
+
"epoch": 0.6952265442259378,
|
2253 |
+
"grad_norm": 2.6812140941619873,
|
2254 |
+
"learning_rate": 1.0171351559491723e-05,
|
2255 |
+
"loss": 0.4028,
|
2256 |
+
"step": 8025
|
2257 |
+
},
|
2258 |
+
{
|
2259 |
+
"epoch": 0.6973923590054578,
|
2260 |
+
"grad_norm": 11.301798820495605,
|
2261 |
+
"learning_rate": 1.0099152868694648e-05,
|
2262 |
+
"loss": 0.3549,
|
2263 |
+
"step": 8050
|
2264 |
+
},
|
2265 |
+
{
|
2266 |
+
"epoch": 0.6995581737849779,
|
2267 |
+
"grad_norm": 8.55245304107666,
|
2268 |
+
"learning_rate": 1.0026954177897575e-05,
|
2269 |
+
"loss": 0.3805,
|
2270 |
+
"step": 8075
|
2271 |
+
},
|
2272 |
+
{
|
2273 |
+
"epoch": 0.7017239885644979,
|
2274 |
+
"grad_norm": 1.9036015272140503,
|
2275 |
+
"learning_rate": 9.9547554871005e-06,
|
2276 |
+
"loss": 0.353,
|
2277 |
+
"step": 8100
|
2278 |
+
},
|
2279 |
+
{
|
2280 |
+
"epoch": 0.7038898033440181,
|
2281 |
+
"grad_norm": 1.0196151733398438,
|
2282 |
+
"learning_rate": 9.882556796303428e-06,
|
2283 |
+
"loss": 0.3569,
|
2284 |
+
"step": 8125
|
2285 |
+
},
|
2286 |
+
{
|
2287 |
+
"epoch": 0.7060556181235381,
|
2288 |
+
"grad_norm": 2.688908338546753,
|
2289 |
+
"learning_rate": 9.810358105506355e-06,
|
2290 |
+
"loss": 0.2588,
|
2291 |
+
"step": 8150
|
2292 |
+
},
|
2293 |
+
{
|
2294 |
+
"epoch": 0.7082214329030582,
|
2295 |
+
"grad_norm": 0.6335782408714294,
|
2296 |
+
"learning_rate": 9.73815941470928e-06,
|
2297 |
+
"loss": 0.2252,
|
2298 |
+
"step": 8175
|
2299 |
+
},
|
2300 |
+
{
|
2301 |
+
"epoch": 0.7103872476825782,
|
2302 |
+
"grad_norm": 4.539221286773682,
|
2303 |
+
"learning_rate": 9.665960723912206e-06,
|
2304 |
+
"loss": 0.2747,
|
2305 |
+
"step": 8200
|
2306 |
+
},
|
2307 |
+
{
|
2308 |
+
"epoch": 0.7125530624620983,
|
2309 |
+
"grad_norm": 8.757186889648438,
|
2310 |
+
"learning_rate": 9.593762033115133e-06,
|
2311 |
+
"loss": 0.3239,
|
2312 |
+
"step": 8225
|
2313 |
+
},
|
2314 |
+
{
|
2315 |
+
"epoch": 0.7147188772416183,
|
2316 |
+
"grad_norm": 1.7275235652923584,
|
2317 |
+
"learning_rate": 9.52156334231806e-06,
|
2318 |
+
"loss": 0.2954,
|
2319 |
+
"step": 8250
|
2320 |
+
},
|
2321 |
+
{
|
2322 |
+
"epoch": 0.7168846920211384,
|
2323 |
+
"grad_norm": 6.338670253753662,
|
2324 |
+
"learning_rate": 9.449364651520987e-06,
|
2325 |
+
"loss": 0.3749,
|
2326 |
+
"step": 8275
|
2327 |
+
},
|
2328 |
+
{
|
2329 |
+
"epoch": 0.7190505068006584,
|
2330 |
+
"grad_norm": 1.565496563911438,
|
2331 |
+
"learning_rate": 9.377165960723912e-06,
|
2332 |
+
"loss": 0.2757,
|
2333 |
+
"step": 8300
|
2334 |
+
},
|
2335 |
+
{
|
2336 |
+
"epoch": 0.7212163215801785,
|
2337 |
+
"grad_norm": 0.0664602667093277,
|
2338 |
+
"learning_rate": 9.30496726992684e-06,
|
2339 |
+
"loss": 0.3012,
|
2340 |
+
"step": 8325
|
2341 |
+
},
|
2342 |
+
{
|
2343 |
+
"epoch": 0.7233821363596985,
|
2344 |
+
"grad_norm": 10.375814437866211,
|
2345 |
+
"learning_rate": 9.232768579129765e-06,
|
2346 |
+
"loss": 0.2985,
|
2347 |
+
"step": 8350
|
2348 |
+
},
|
2349 |
+
{
|
2350 |
+
"epoch": 0.7255479511392185,
|
2351 |
+
"grad_norm": 16.607072830200195,
|
2352 |
+
"learning_rate": 9.160569888332692e-06,
|
2353 |
+
"loss": 0.2656,
|
2354 |
+
"step": 8375
|
2355 |
+
},
|
2356 |
+
{
|
2357 |
+
"epoch": 0.7277137659187386,
|
2358 |
+
"grad_norm": 0.6724597811698914,
|
2359 |
+
"learning_rate": 9.088371197535618e-06,
|
2360 |
+
"loss": 0.2007,
|
2361 |
+
"step": 8400
|
2362 |
+
},
|
2363 |
+
{
|
2364 |
+
"epoch": 0.7298795806982586,
|
2365 |
+
"grad_norm": 2.3397414684295654,
|
2366 |
+
"learning_rate": 9.016172506738545e-06,
|
2367 |
+
"loss": 0.2402,
|
2368 |
+
"step": 8425
|
2369 |
+
},
|
2370 |
+
{
|
2371 |
+
"epoch": 0.7320453954777787,
|
2372 |
+
"grad_norm": 11.172548294067383,
|
2373 |
+
"learning_rate": 8.943973815941472e-06,
|
2374 |
+
"loss": 0.3434,
|
2375 |
+
"step": 8450
|
2376 |
+
},
|
2377 |
+
{
|
2378 |
+
"epoch": 0.7342112102572987,
|
2379 |
+
"grad_norm": 12.031539916992188,
|
2380 |
+
"learning_rate": 8.871775125144397e-06,
|
2381 |
+
"loss": 0.2628,
|
2382 |
+
"step": 8475
|
2383 |
+
},
|
2384 |
+
{
|
2385 |
+
"epoch": 0.7363770250368189,
|
2386 |
+
"grad_norm": 0.37211769819259644,
|
2387 |
+
"learning_rate": 8.799576434347324e-06,
|
2388 |
+
"loss": 0.265,
|
2389 |
+
"step": 8500
|
2390 |
+
},
|
2391 |
+
{
|
2392 |
+
"epoch": 0.738542839816339,
|
2393 |
+
"grad_norm": 6.181528568267822,
|
2394 |
+
"learning_rate": 8.72737774355025e-06,
|
2395 |
+
"loss": 0.3748,
|
2396 |
+
"step": 8525
|
2397 |
+
},
|
2398 |
+
{
|
2399 |
+
"epoch": 0.740708654595859,
|
2400 |
+
"grad_norm": 2.7227742671966553,
|
2401 |
+
"learning_rate": 8.655179052753177e-06,
|
2402 |
+
"loss": 0.249,
|
2403 |
+
"step": 8550
|
2404 |
+
},
|
2405 |
+
{
|
2406 |
+
"epoch": 0.742874469375379,
|
2407 |
+
"grad_norm": 7.977476596832275,
|
2408 |
+
"learning_rate": 8.582980361956104e-06,
|
2409 |
+
"loss": 0.3375,
|
2410 |
+
"step": 8575
|
2411 |
+
},
|
2412 |
+
{
|
2413 |
+
"epoch": 0.7450402841548991,
|
2414 |
+
"grad_norm": 11.404130935668945,
|
2415 |
+
"learning_rate": 8.510781671159029e-06,
|
2416 |
+
"loss": 0.3336,
|
2417 |
+
"step": 8600
|
2418 |
+
},
|
2419 |
+
{
|
2420 |
+
"epoch": 0.7472060989344191,
|
2421 |
+
"grad_norm": 0.4421218931674957,
|
2422 |
+
"learning_rate": 8.438582980361957e-06,
|
2423 |
+
"loss": 0.3702,
|
2424 |
+
"step": 8625
|
2425 |
+
},
|
2426 |
+
{
|
2427 |
+
"epoch": 0.7493719137139392,
|
2428 |
+
"grad_norm": 4.386607646942139,
|
2429 |
+
"learning_rate": 8.366384289564882e-06,
|
2430 |
+
"loss": 0.3494,
|
2431 |
+
"step": 8650
|
2432 |
+
},
|
2433 |
+
{
|
2434 |
+
"epoch": 0.7515377284934592,
|
2435 |
+
"grad_norm": 5.428525924682617,
|
2436 |
+
"learning_rate": 8.294185598767809e-06,
|
2437 |
+
"loss": 0.2996,
|
2438 |
+
"step": 8675
|
2439 |
+
},
|
2440 |
+
{
|
2441 |
+
"epoch": 0.7537035432729793,
|
2442 |
+
"grad_norm": 0.3034394085407257,
|
2443 |
+
"learning_rate": 8.221986907970736e-06,
|
2444 |
+
"loss": 0.2433,
|
2445 |
+
"step": 8700
|
2446 |
+
},
|
2447 |
+
{
|
2448 |
+
"epoch": 0.7558693580524993,
|
2449 |
+
"grad_norm": 3.75878643989563,
|
2450 |
+
"learning_rate": 8.149788217173662e-06,
|
2451 |
+
"loss": 0.3027,
|
2452 |
+
"step": 8725
|
2453 |
+
},
|
2454 |
+
{
|
2455 |
+
"epoch": 0.7580351728320194,
|
2456 |
+
"grad_norm": 9.965909004211426,
|
2457 |
+
"learning_rate": 8.077589526376589e-06,
|
2458 |
+
"loss": 0.382,
|
2459 |
+
"step": 8750
|
2460 |
+
},
|
2461 |
+
{
|
2462 |
+
"epoch": 0.7602009876115394,
|
2463 |
+
"grad_norm": 7.314566135406494,
|
2464 |
+
"learning_rate": 8.005390835579514e-06,
|
2465 |
+
"loss": 0.2874,
|
2466 |
+
"step": 8775
|
2467 |
+
},
|
2468 |
+
{
|
2469 |
+
"epoch": 0.7623668023910595,
|
2470 |
+
"grad_norm": 8.704547882080078,
|
2471 |
+
"learning_rate": 7.93319214478244e-06,
|
2472 |
+
"loss": 0.2737,
|
2473 |
+
"step": 8800
|
2474 |
+
},
|
2475 |
+
{
|
2476 |
+
"epoch": 0.7645326171705795,
|
2477 |
+
"grad_norm": 10.275945663452148,
|
2478 |
+
"learning_rate": 7.86099345398537e-06,
|
2479 |
+
"loss": 0.3212,
|
2480 |
+
"step": 8825
|
2481 |
+
},
|
2482 |
+
{
|
2483 |
+
"epoch": 0.7666984319500997,
|
2484 |
+
"grad_norm": 4.1912641525268555,
|
2485 |
+
"learning_rate": 7.788794763188294e-06,
|
2486 |
+
"loss": 0.3475,
|
2487 |
+
"step": 8850
|
2488 |
+
},
|
2489 |
+
{
|
2490 |
+
"epoch": 0.7688642467296197,
|
2491 |
+
"grad_norm": 10.281148910522461,
|
2492 |
+
"learning_rate": 7.716596072391221e-06,
|
2493 |
+
"loss": 0.221,
|
2494 |
+
"step": 8875
|
2495 |
+
},
|
2496 |
+
{
|
2497 |
+
"epoch": 0.7710300615091398,
|
2498 |
+
"grad_norm": 9.613810539245605,
|
2499 |
+
"learning_rate": 7.644397381594146e-06,
|
2500 |
+
"loss": 0.2587,
|
2501 |
+
"step": 8900
|
2502 |
+
},
|
2503 |
+
{
|
2504 |
+
"epoch": 0.7731958762886598,
|
2505 |
+
"grad_norm": 1.2200976610183716,
|
2506 |
+
"learning_rate": 7.572198690797074e-06,
|
2507 |
+
"loss": 0.2852,
|
2508 |
+
"step": 8925
|
2509 |
+
},
|
2510 |
+
{
|
2511 |
+
"epoch": 0.7753616910681799,
|
2512 |
+
"grad_norm": 2.445672035217285,
|
2513 |
+
"learning_rate": 7.5e-06,
|
2514 |
+
"loss": 0.3837,
|
2515 |
+
"step": 8950
|
2516 |
+
},
|
2517 |
+
{
|
2518 |
+
"epoch": 0.7775275058476999,
|
2519 |
+
"grad_norm": 13.744851112365723,
|
2520 |
+
"learning_rate": 7.427801309202927e-06,
|
2521 |
+
"loss": 0.2333,
|
2522 |
+
"step": 8975
|
2523 |
+
},
|
2524 |
+
{
|
2525 |
+
"epoch": 0.77969332062722,
|
2526 |
+
"grad_norm": 4.426064968109131,
|
2527 |
+
"learning_rate": 7.355602618405853e-06,
|
2528 |
+
"loss": 0.3036,
|
2529 |
+
"step": 9000
|
2530 |
+
},
|
2531 |
+
{
|
2532 |
+
"epoch": 0.78185913540674,
|
2533 |
+
"grad_norm": 8.329988479614258,
|
2534 |
+
"learning_rate": 7.28340392760878e-06,
|
2535 |
+
"loss": 0.3287,
|
2536 |
+
"step": 9025
|
2537 |
+
},
|
2538 |
+
{
|
2539 |
+
"epoch": 0.7840249501862601,
|
2540 |
+
"grad_norm": 4.122848987579346,
|
2541 |
+
"learning_rate": 7.211205236811706e-06,
|
2542 |
+
"loss": 0.3248,
|
2543 |
+
"step": 9050
|
2544 |
+
},
|
2545 |
+
{
|
2546 |
+
"epoch": 0.7861907649657801,
|
2547 |
+
"grad_norm": 6.127285480499268,
|
2548 |
+
"learning_rate": 7.139006546014633e-06,
|
2549 |
+
"loss": 0.2395,
|
2550 |
+
"step": 9075
|
2551 |
+
},
|
2552 |
+
{
|
2553 |
+
"epoch": 0.7883565797453002,
|
2554 |
+
"grad_norm": 1.6887600421905518,
|
2555 |
+
"learning_rate": 7.066807855217559e-06,
|
2556 |
+
"loss": 0.2647,
|
2557 |
+
"step": 9100
|
2558 |
+
},
|
2559 |
+
{
|
2560 |
+
"epoch": 0.7905223945248202,
|
2561 |
+
"grad_norm": 1.4300670623779297,
|
2562 |
+
"learning_rate": 6.9946091644204855e-06,
|
2563 |
+
"loss": 0.3345,
|
2564 |
+
"step": 9125
|
2565 |
+
},
|
2566 |
+
{
|
2567 |
+
"epoch": 0.7926882093043403,
|
2568 |
+
"grad_norm": 9.334101676940918,
|
2569 |
+
"learning_rate": 6.922410473623411e-06,
|
2570 |
+
"loss": 0.3421,
|
2571 |
+
"step": 9150
|
2572 |
+
},
|
2573 |
+
{
|
2574 |
+
"epoch": 0.7948540240838603,
|
2575 |
+
"grad_norm": 6.996714115142822,
|
2576 |
+
"learning_rate": 6.850211782826339e-06,
|
2577 |
+
"loss": 0.3496,
|
2578 |
+
"step": 9175
|
2579 |
+
},
|
2580 |
+
{
|
2581 |
+
"epoch": 0.7970198388633805,
|
2582 |
+
"grad_norm": 8.47280216217041,
|
2583 |
+
"learning_rate": 6.778013092029265e-06,
|
2584 |
+
"loss": 0.253,
|
2585 |
+
"step": 9200
|
2586 |
+
},
|
2587 |
+
{
|
2588 |
+
"epoch": 0.7991856536429005,
|
2589 |
+
"grad_norm": 3.239483118057251,
|
2590 |
+
"learning_rate": 6.708702348864075e-06,
|
2591 |
+
"loss": 0.3462,
|
2592 |
+
"step": 9225
|
2593 |
+
},
|
2594 |
+
{
|
2595 |
+
"epoch": 0.8013514684224206,
|
2596 |
+
"grad_norm": 1.6153030395507812,
|
2597 |
+
"learning_rate": 6.6365036580670006e-06,
|
2598 |
+
"loss": 0.2688,
|
2599 |
+
"step": 9250
|
2600 |
+
},
|
2601 |
+
{
|
2602 |
+
"epoch": 0.8035172832019406,
|
2603 |
+
"grad_norm": 5.316878795623779,
|
2604 |
+
"learning_rate": 6.564304967269927e-06,
|
2605 |
+
"loss": 0.3301,
|
2606 |
+
"step": 9275
|
2607 |
+
},
|
2608 |
+
{
|
2609 |
+
"epoch": 0.8056830979814606,
|
2610 |
+
"grad_norm": 8.06822395324707,
|
2611 |
+
"learning_rate": 6.492106276472853e-06,
|
2612 |
+
"loss": 0.3382,
|
2613 |
+
"step": 9300
|
2614 |
+
},
|
2615 |
+
{
|
2616 |
+
"epoch": 0.8078489127609807,
|
2617 |
+
"grad_norm": 2.8038644790649414,
|
2618 |
+
"learning_rate": 6.41990758567578e-06,
|
2619 |
+
"loss": 0.2219,
|
2620 |
+
"step": 9325
|
2621 |
+
},
|
2622 |
+
{
|
2623 |
+
"epoch": 0.8100147275405007,
|
2624 |
+
"grad_norm": 5.063823223114014,
|
2625 |
+
"learning_rate": 6.3477088948787066e-06,
|
2626 |
+
"loss": 0.278,
|
2627 |
+
"step": 9350
|
2628 |
+
},
|
2629 |
+
{
|
2630 |
+
"epoch": 0.8121805423200208,
|
2631 |
+
"grad_norm": 6.974782466888428,
|
2632 |
+
"learning_rate": 6.275510204081633e-06,
|
2633 |
+
"loss": 0.2338,
|
2634 |
+
"step": 9375
|
2635 |
+
},
|
2636 |
+
{
|
2637 |
+
"epoch": 0.8143463570995408,
|
2638 |
+
"grad_norm": 2.8085834980010986,
|
2639 |
+
"learning_rate": 6.203311513284559e-06,
|
2640 |
+
"loss": 0.2732,
|
2641 |
+
"step": 9400
|
2642 |
+
},
|
2643 |
+
{
|
2644 |
+
"epoch": 0.8165121718790609,
|
2645 |
+
"grad_norm": 12.976601600646973,
|
2646 |
+
"learning_rate": 6.131112822487486e-06,
|
2647 |
+
"loss": 0.2973,
|
2648 |
+
"step": 9425
|
2649 |
+
},
|
2650 |
+
{
|
2651 |
+
"epoch": 0.8186779866585809,
|
2652 |
+
"grad_norm": 2.7448630332946777,
|
2653 |
+
"learning_rate": 6.058914131690412e-06,
|
2654 |
+
"loss": 0.2783,
|
2655 |
+
"step": 9450
|
2656 |
+
},
|
2657 |
+
{
|
2658 |
+
"epoch": 0.820843801438101,
|
2659 |
+
"grad_norm": 2.347792387008667,
|
2660 |
+
"learning_rate": 5.986715440893339e-06,
|
2661 |
+
"loss": 0.2418,
|
2662 |
+
"step": 9475
|
2663 |
+
},
|
2664 |
+
{
|
2665 |
+
"epoch": 0.823009616217621,
|
2666 |
+
"grad_norm": 2.851559638977051,
|
2667 |
+
"learning_rate": 5.914516750096265e-06,
|
2668 |
+
"loss": 0.2603,
|
2669 |
+
"step": 9500
|
2670 |
+
},
|
2671 |
+
{
|
2672 |
+
"epoch": 0.8251754309971411,
|
2673 |
+
"grad_norm": 6.941406726837158,
|
2674 |
+
"learning_rate": 5.842318059299192e-06,
|
2675 |
+
"loss": 0.1888,
|
2676 |
+
"step": 9525
|
2677 |
+
},
|
2678 |
+
{
|
2679 |
+
"epoch": 0.8273412457766611,
|
2680 |
+
"grad_norm": 4.45375394821167,
|
2681 |
+
"learning_rate": 5.770119368502118e-06,
|
2682 |
+
"loss": 0.2581,
|
2683 |
+
"step": 9550
|
2684 |
+
},
|
2685 |
+
{
|
2686 |
+
"epoch": 0.8295070605561813,
|
2687 |
+
"grad_norm": 5.2709641456604,
|
2688 |
+
"learning_rate": 5.6979206777050444e-06,
|
2689 |
+
"loss": 0.2742,
|
2690 |
+
"step": 9575
|
2691 |
+
},
|
2692 |
+
{
|
2693 |
+
"epoch": 0.8316728753357013,
|
2694 |
+
"grad_norm": 2.6814463138580322,
|
2695 |
+
"learning_rate": 5.62572198690797e-06,
|
2696 |
+
"loss": 0.2156,
|
2697 |
+
"step": 9600
|
2698 |
+
},
|
2699 |
+
{
|
2700 |
+
"epoch": 0.8338386901152214,
|
2701 |
+
"grad_norm": 0.12416364997625351,
|
2702 |
+
"learning_rate": 5.553523296110898e-06,
|
2703 |
+
"loss": 0.3317,
|
2704 |
+
"step": 9625
|
2705 |
+
},
|
2706 |
+
{
|
2707 |
+
"epoch": 0.8360045048947414,
|
2708 |
+
"grad_norm": 5.639218807220459,
|
2709 |
+
"learning_rate": 5.481324605313824e-06,
|
2710 |
+
"loss": 0.1967,
|
2711 |
+
"step": 9650
|
2712 |
+
},
|
2713 |
+
{
|
2714 |
+
"epoch": 0.8381703196742615,
|
2715 |
+
"grad_norm": 0.8800064921379089,
|
2716 |
+
"learning_rate": 5.4091259145167504e-06,
|
2717 |
+
"loss": 0.1701,
|
2718 |
+
"step": 9675
|
2719 |
+
},
|
2720 |
+
{
|
2721 |
+
"epoch": 0.8403361344537815,
|
2722 |
+
"grad_norm": 2.7125442028045654,
|
2723 |
+
"learning_rate": 5.336927223719676e-06,
|
2724 |
+
"loss": 0.3064,
|
2725 |
+
"step": 9700
|
2726 |
+
},
|
2727 |
+
{
|
2728 |
+
"epoch": 0.8425019492333016,
|
2729 |
+
"grad_norm": 3.1365272998809814,
|
2730 |
+
"learning_rate": 5.264728532922603e-06,
|
2731 |
+
"loss": 0.3511,
|
2732 |
+
"step": 9725
|
2733 |
+
},
|
2734 |
+
{
|
2735 |
+
"epoch": 0.8446677640128216,
|
2736 |
+
"grad_norm": 10.584244728088379,
|
2737 |
+
"learning_rate": 5.19252984212553e-06,
|
2738 |
+
"loss": 0.2461,
|
2739 |
+
"step": 9750
|
2740 |
+
},
|
2741 |
+
{
|
2742 |
+
"epoch": 0.8468335787923417,
|
2743 |
+
"grad_norm": 0.7926290035247803,
|
2744 |
+
"learning_rate": 5.1203311513284565e-06,
|
2745 |
+
"loss": 0.3047,
|
2746 |
+
"step": 9775
|
2747 |
+
},
|
2748 |
+
{
|
2749 |
+
"epoch": 0.8489993935718617,
|
2750 |
+
"grad_norm": 10.744616508483887,
|
2751 |
+
"learning_rate": 5.048132460531382e-06,
|
2752 |
+
"loss": 0.3234,
|
2753 |
+
"step": 9800
|
2754 |
+
},
|
2755 |
+
{
|
2756 |
+
"epoch": 0.8511652083513818,
|
2757 |
+
"grad_norm": 3.9436535835266113,
|
2758 |
+
"learning_rate": 4.975933769734309e-06,
|
2759 |
+
"loss": 0.2843,
|
2760 |
+
"step": 9825
|
2761 |
+
},
|
2762 |
+
{
|
2763 |
+
"epoch": 0.8533310231309018,
|
2764 |
+
"grad_norm": 0.2785266637802124,
|
2765 |
+
"learning_rate": 4.903735078937235e-06,
|
2766 |
+
"loss": 0.3365,
|
2767 |
+
"step": 9850
|
2768 |
+
},
|
2769 |
+
{
|
2770 |
+
"epoch": 0.8554968379104219,
|
2771 |
+
"grad_norm": 7.446309566497803,
|
2772 |
+
"learning_rate": 4.831536388140162e-06,
|
2773 |
+
"loss": 0.3802,
|
2774 |
+
"step": 9875
|
2775 |
+
},
|
2776 |
+
{
|
2777 |
+
"epoch": 0.8576626526899419,
|
2778 |
+
"grad_norm": 9.687524795532227,
|
2779 |
+
"learning_rate": 4.759337697343088e-06,
|
2780 |
+
"loss": 0.2587,
|
2781 |
+
"step": 9900
|
2782 |
+
},
|
2783 |
+
{
|
2784 |
+
"epoch": 0.8598284674694621,
|
2785 |
+
"grad_norm": 0.4837453067302704,
|
2786 |
+
"learning_rate": 4.687139006546015e-06,
|
2787 |
+
"loss": 0.2367,
|
2788 |
+
"step": 9925
|
2789 |
+
},
|
2790 |
+
{
|
2791 |
+
"epoch": 0.8619942822489821,
|
2792 |
+
"grad_norm": 0.7170611023902893,
|
2793 |
+
"learning_rate": 4.614940315748941e-06,
|
2794 |
+
"loss": 0.2971,
|
2795 |
+
"step": 9950
|
2796 |
+
},
|
2797 |
+
{
|
2798 |
+
"epoch": 0.8641600970285022,
|
2799 |
+
"grad_norm": 16.417407989501953,
|
2800 |
+
"learning_rate": 4.542741624951868e-06,
|
2801 |
+
"loss": 0.2884,
|
2802 |
+
"step": 9975
|
2803 |
+
},
|
2804 |
+
{
|
2805 |
+
"epoch": 0.8663259118080222,
|
2806 |
+
"grad_norm": 7.771174430847168,
|
2807 |
+
"learning_rate": 4.4705429341547935e-06,
|
2808 |
+
"loss": 0.2296,
|
2809 |
+
"step": 10000
|
2810 |
+
},
|
2811 |
+
{
|
2812 |
+
"epoch": 0.8684917265875423,
|
2813 |
+
"grad_norm": 1.540907859802246,
|
2814 |
+
"learning_rate": 4.398344243357721e-06,
|
2815 |
+
"loss": 0.3145,
|
2816 |
+
"step": 10025
|
2817 |
+
},
|
2818 |
+
{
|
2819 |
+
"epoch": 0.8706575413670623,
|
2820 |
+
"grad_norm": 1.4157791137695312,
|
2821 |
+
"learning_rate": 4.326145552560647e-06,
|
2822 |
+
"loss": 0.178,
|
2823 |
+
"step": 10050
|
2824 |
+
},
|
2825 |
+
{
|
2826 |
+
"epoch": 0.8728233561465824,
|
2827 |
+
"grad_norm": 4.707205295562744,
|
2828 |
+
"learning_rate": 4.253946861763574e-06,
|
2829 |
+
"loss": 0.2681,
|
2830 |
+
"step": 10075
|
2831 |
+
},
|
2832 |
+
{
|
2833 |
+
"epoch": 0.8749891709261024,
|
2834 |
+
"grad_norm": 3.7186520099639893,
|
2835 |
+
"learning_rate": 4.1817481709664995e-06,
|
2836 |
+
"loss": 0.3191,
|
2837 |
+
"step": 10100
|
2838 |
+
},
|
2839 |
+
{
|
2840 |
+
"epoch": 0.8771549857056224,
|
2841 |
+
"grad_norm": 1.6584956645965576,
|
2842 |
+
"learning_rate": 4.109549480169426e-06,
|
2843 |
+
"loss": 0.2544,
|
2844 |
+
"step": 10125
|
2845 |
+
},
|
2846 |
+
{
|
2847 |
+
"epoch": 0.8793208004851425,
|
2848 |
+
"grad_norm": 9.22360610961914,
|
2849 |
+
"learning_rate": 4.037350789372352e-06,
|
2850 |
+
"loss": 0.2965,
|
2851 |
+
"step": 10150
|
2852 |
+
},
|
2853 |
+
{
|
2854 |
+
"epoch": 0.8814866152646625,
|
2855 |
+
"grad_norm": 3.5934746265411377,
|
2856 |
+
"learning_rate": 3.96515209857528e-06,
|
2857 |
+
"loss": 0.317,
|
2858 |
+
"step": 10175
|
2859 |
+
},
|
2860 |
+
{
|
2861 |
+
"epoch": 0.8836524300441826,
|
2862 |
+
"grad_norm": 1.5978528261184692,
|
2863 |
+
"learning_rate": 3.892953407778206e-06,
|
2864 |
+
"loss": 0.2149,
|
2865 |
+
"step": 10200
|
2866 |
+
},
|
2867 |
+
{
|
2868 |
+
"epoch": 0.8858182448237026,
|
2869 |
+
"grad_norm": 4.726417064666748,
|
2870 |
+
"learning_rate": 3.820754716981132e-06,
|
2871 |
+
"loss": 0.4876,
|
2872 |
+
"step": 10225
|
2873 |
+
},
|
2874 |
+
{
|
2875 |
+
"epoch": 0.8879840596032227,
|
2876 |
+
"grad_norm": 7.836237907409668,
|
2877 |
+
"learning_rate": 3.7485560261840585e-06,
|
2878 |
+
"loss": 0.2984,
|
2879 |
+
"step": 10250
|
2880 |
+
},
|
2881 |
+
{
|
2882 |
+
"epoch": 0.8901498743827427,
|
2883 |
+
"grad_norm": 6.5479912757873535,
|
2884 |
+
"learning_rate": 3.676357335386985e-06,
|
2885 |
+
"loss": 0.3024,
|
2886 |
+
"step": 10275
|
2887 |
+
},
|
2888 |
+
{
|
2889 |
+
"epoch": 0.8923156891622629,
|
2890 |
+
"grad_norm": 1.180179476737976,
|
2891 |
+
"learning_rate": 3.6041586445899115e-06,
|
2892 |
+
"loss": 0.2447,
|
2893 |
+
"step": 10300
|
2894 |
+
},
|
2895 |
+
{
|
2896 |
+
"epoch": 0.8944815039417829,
|
2897 |
+
"grad_norm": 5.868828773498535,
|
2898 |
+
"learning_rate": 3.5319599537928378e-06,
|
2899 |
+
"loss": 0.2684,
|
2900 |
+
"step": 10325
|
2901 |
+
},
|
2902 |
+
{
|
2903 |
+
"epoch": 0.896647318721303,
|
2904 |
+
"grad_norm": 6.2655816078186035,
|
2905 |
+
"learning_rate": 3.4597612629957645e-06,
|
2906 |
+
"loss": 0.1714,
|
2907 |
+
"step": 10350
|
2908 |
+
},
|
2909 |
+
{
|
2910 |
+
"epoch": 0.898813133500823,
|
2911 |
+
"grad_norm": 6.3384270668029785,
|
2912 |
+
"learning_rate": 3.387562572198691e-06,
|
2913 |
+
"loss": 0.2776,
|
2914 |
+
"step": 10375
|
2915 |
+
},
|
2916 |
+
{
|
2917 |
+
"epoch": 0.9009789482803431,
|
2918 |
+
"grad_norm": 6.097102165222168,
|
2919 |
+
"learning_rate": 3.315363881401617e-06,
|
2920 |
+
"loss": 0.2745,
|
2921 |
+
"step": 10400
|
2922 |
+
},
|
2923 |
+
{
|
2924 |
+
"epoch": 0.9031447630598631,
|
2925 |
+
"grad_norm": 7.250086784362793,
|
2926 |
+
"learning_rate": 3.243165190604544e-06,
|
2927 |
+
"loss": 0.3299,
|
2928 |
+
"step": 10425
|
2929 |
+
},
|
2930 |
+
{
|
2931 |
+
"epoch": 0.9053105778393832,
|
2932 |
+
"grad_norm": 9.260988235473633,
|
2933 |
+
"learning_rate": 3.17096649980747e-06,
|
2934 |
+
"loss": 0.2629,
|
2935 |
+
"step": 10450
|
2936 |
+
},
|
2937 |
+
{
|
2938 |
+
"epoch": 0.9074763926189032,
|
2939 |
+
"grad_norm": 8.009949684143066,
|
2940 |
+
"learning_rate": 3.0987678090103964e-06,
|
2941 |
+
"loss": 0.3627,
|
2942 |
+
"step": 10475
|
2943 |
+
},
|
2944 |
+
{
|
2945 |
+
"epoch": 0.9096422073984233,
|
2946 |
+
"grad_norm": 1.247878074645996,
|
2947 |
+
"learning_rate": 3.026569118213323e-06,
|
2948 |
+
"loss": 0.2236,
|
2949 |
+
"step": 10500
|
2950 |
+
},
|
2951 |
+
{
|
2952 |
+
"epoch": 0.9118080221779433,
|
2953 |
+
"grad_norm": 6.759634971618652,
|
2954 |
+
"learning_rate": 2.9543704274162494e-06,
|
2955 |
+
"loss": 0.2819,
|
2956 |
+
"step": 10525
|
2957 |
+
},
|
2958 |
+
{
|
2959 |
+
"epoch": 0.9139738369574634,
|
2960 |
+
"grad_norm": 0.09837600588798523,
|
2961 |
+
"learning_rate": 2.882171736619176e-06,
|
2962 |
+
"loss": 0.3129,
|
2963 |
+
"step": 10550
|
2964 |
+
},
|
2965 |
+
{
|
2966 |
+
"epoch": 0.9161396517369834,
|
2967 |
+
"grad_norm": 6.850848197937012,
|
2968 |
+
"learning_rate": 2.8099730458221024e-06,
|
2969 |
+
"loss": 0.3051,
|
2970 |
+
"step": 10575
|
2971 |
+
},
|
2972 |
+
{
|
2973 |
+
"epoch": 0.9183054665165035,
|
2974 |
+
"grad_norm": 8.94210147857666,
|
2975 |
+
"learning_rate": 2.7377743550250287e-06,
|
2976 |
+
"loss": 0.3955,
|
2977 |
+
"step": 10600
|
2978 |
+
},
|
2979 |
+
{
|
2980 |
+
"epoch": 0.9204712812960235,
|
2981 |
+
"grad_norm": 8.595787048339844,
|
2982 |
+
"learning_rate": 2.6655756642279554e-06,
|
2983 |
+
"loss": 0.2493,
|
2984 |
+
"step": 10625
|
2985 |
+
},
|
2986 |
+
{
|
2987 |
+
"epoch": 0.9226370960755437,
|
2988 |
+
"grad_norm": 7.062394618988037,
|
2989 |
+
"learning_rate": 2.5933769734308817e-06,
|
2990 |
+
"loss": 0.2543,
|
2991 |
+
"step": 10650
|
2992 |
+
},
|
2993 |
+
{
|
2994 |
+
"epoch": 0.9248029108550637,
|
2995 |
+
"grad_norm": 3.371393918991089,
|
2996 |
+
"learning_rate": 2.521178282633808e-06,
|
2997 |
+
"loss": 0.2222,
|
2998 |
+
"step": 10675
|
2999 |
+
},
|
3000 |
+
{
|
3001 |
+
"epoch": 0.9269687256345838,
|
3002 |
+
"grad_norm": 1.3468866348266602,
|
3003 |
+
"learning_rate": 2.4489795918367347e-06,
|
3004 |
+
"loss": 0.2823,
|
3005 |
+
"step": 10700
|
3006 |
+
},
|
3007 |
+
{
|
3008 |
+
"epoch": 0.9291345404141038,
|
3009 |
+
"grad_norm": 15.475239753723145,
|
3010 |
+
"learning_rate": 2.376780901039661e-06,
|
3011 |
+
"loss": 0.3098,
|
3012 |
+
"step": 10725
|
3013 |
+
},
|
3014 |
+
{
|
3015 |
+
"epoch": 0.9313003551936239,
|
3016 |
+
"grad_norm": 6.605096340179443,
|
3017 |
+
"learning_rate": 2.3045822102425877e-06,
|
3018 |
+
"loss": 0.3009,
|
3019 |
+
"step": 10750
|
3020 |
+
},
|
3021 |
+
{
|
3022 |
+
"epoch": 0.9334661699731439,
|
3023 |
+
"grad_norm": 3.2146847248077393,
|
3024 |
+
"learning_rate": 2.2323835194455144e-06,
|
3025 |
+
"loss": 0.2623,
|
3026 |
+
"step": 10775
|
3027 |
+
},
|
3028 |
+
{
|
3029 |
+
"epoch": 0.935631984752664,
|
3030 |
+
"grad_norm": 2.727200508117676,
|
3031 |
+
"learning_rate": 2.1601848286484407e-06,
|
3032 |
+
"loss": 0.1952,
|
3033 |
+
"step": 10800
|
3034 |
+
},
|
3035 |
+
{
|
3036 |
+
"epoch": 0.937797799532184,
|
3037 |
+
"grad_norm": 2.7418553829193115,
|
3038 |
+
"learning_rate": 2.0879861378513674e-06,
|
3039 |
+
"loss": 0.4527,
|
3040 |
+
"step": 10825
|
3041 |
+
},
|
3042 |
+
{
|
3043 |
+
"epoch": 0.939963614311704,
|
3044 |
+
"grad_norm": 8.577201843261719,
|
3045 |
+
"learning_rate": 2.0157874470542937e-06,
|
3046 |
+
"loss": 0.2323,
|
3047 |
+
"step": 10850
|
3048 |
+
},
|
3049 |
+
{
|
3050 |
+
"epoch": 0.9421294290912241,
|
3051 |
+
"grad_norm": 4.514817237854004,
|
3052 |
+
"learning_rate": 1.94358875625722e-06,
|
3053 |
+
"loss": 0.3109,
|
3054 |
+
"step": 10875
|
3055 |
+
},
|
3056 |
+
{
|
3057 |
+
"epoch": 0.9442952438707441,
|
3058 |
+
"grad_norm": 10.761394500732422,
|
3059 |
+
"learning_rate": 1.8713900654601463e-06,
|
3060 |
+
"loss": 0.3335,
|
3061 |
+
"step": 10900
|
3062 |
+
},
|
3063 |
+
{
|
3064 |
+
"epoch": 0.9464610586502642,
|
3065 |
+
"grad_norm": 8.004775047302246,
|
3066 |
+
"learning_rate": 1.7991913746630728e-06,
|
3067 |
+
"loss": 0.2862,
|
3068 |
+
"step": 10925
|
3069 |
+
},
|
3070 |
+
{
|
3071 |
+
"epoch": 0.9486268734297842,
|
3072 |
+
"grad_norm": 7.491416931152344,
|
3073 |
+
"learning_rate": 1.7269926838659993e-06,
|
3074 |
+
"loss": 0.4005,
|
3075 |
+
"step": 10950
|
3076 |
+
},
|
3077 |
+
{
|
3078 |
+
"epoch": 0.9507926882093043,
|
3079 |
+
"grad_norm": 6.168478488922119,
|
3080 |
+
"learning_rate": 1.6547939930689255e-06,
|
3081 |
+
"loss": 0.2815,
|
3082 |
+
"step": 10975
|
3083 |
+
},
|
3084 |
+
{
|
3085 |
+
"epoch": 0.9529585029888243,
|
3086 |
+
"grad_norm": 7.221772193908691,
|
3087 |
+
"learning_rate": 1.582595302271852e-06,
|
3088 |
+
"loss": 0.2157,
|
3089 |
+
"step": 11000
|
3090 |
+
},
|
3091 |
+
{
|
3092 |
+
"epoch": 0.9551243177683445,
|
3093 |
+
"grad_norm": 5.9744086265563965,
|
3094 |
+
"learning_rate": 1.5103966114747788e-06,
|
3095 |
+
"loss": 0.3733,
|
3096 |
+
"step": 11025
|
3097 |
+
},
|
3098 |
+
{
|
3099 |
+
"epoch": 0.9572901325478645,
|
3100 |
+
"grad_norm": 5.776475429534912,
|
3101 |
+
"learning_rate": 1.4381979206777053e-06,
|
3102 |
+
"loss": 0.2843,
|
3103 |
+
"step": 11050
|
3104 |
+
},
|
3105 |
+
{
|
3106 |
+
"epoch": 0.9594559473273846,
|
3107 |
+
"grad_norm": 1.3870640993118286,
|
3108 |
+
"learning_rate": 1.3659992298806316e-06,
|
3109 |
+
"loss": 0.1963,
|
3110 |
+
"step": 11075
|
3111 |
+
},
|
3112 |
+
{
|
3113 |
+
"epoch": 0.9616217621069046,
|
3114 |
+
"grad_norm": 7.3776535987854,
|
3115 |
+
"learning_rate": 1.293800539083558e-06,
|
3116 |
+
"loss": 0.3081,
|
3117 |
+
"step": 11100
|
3118 |
+
},
|
3119 |
+
{
|
3120 |
+
"epoch": 0.9637875768864247,
|
3121 |
+
"grad_norm": 11.289216995239258,
|
3122 |
+
"learning_rate": 1.2216018482864846e-06,
|
3123 |
+
"loss": 0.2317,
|
3124 |
+
"step": 11125
|
3125 |
+
},
|
3126 |
+
{
|
3127 |
+
"epoch": 0.9659533916659447,
|
3128 |
+
"grad_norm": 11.621864318847656,
|
3129 |
+
"learning_rate": 1.1494031574894108e-06,
|
3130 |
+
"loss": 0.3027,
|
3131 |
+
"step": 11150
|
3132 |
+
},
|
3133 |
+
{
|
3134 |
+
"epoch": 0.9681192064454648,
|
3135 |
+
"grad_norm": 11.617834091186523,
|
3136 |
+
"learning_rate": 1.0772044666923373e-06,
|
3137 |
+
"loss": 0.3581,
|
3138 |
+
"step": 11175
|
3139 |
+
},
|
3140 |
+
{
|
3141 |
+
"epoch": 0.9702850212249848,
|
3142 |
+
"grad_norm": 5.500637531280518,
|
3143 |
+
"learning_rate": 1.0050057758952638e-06,
|
3144 |
+
"loss": 0.3,
|
3145 |
+
"step": 11200
|
3146 |
+
},
|
3147 |
+
{
|
3148 |
+
"epoch": 0.9724508360045049,
|
3149 |
+
"grad_norm": 3.552578926086426,
|
3150 |
+
"learning_rate": 9.328070850981902e-07,
|
3151 |
+
"loss": 0.2797,
|
3152 |
+
"step": 11225
|
3153 |
+
},
|
3154 |
+
{
|
3155 |
+
"epoch": 0.9746166507840249,
|
3156 |
+
"grad_norm": 1.074208378791809,
|
3157 |
+
"learning_rate": 8.606083943011167e-07,
|
3158 |
+
"loss": 0.2918,
|
3159 |
+
"step": 11250
|
3160 |
+
},
|
3161 |
+
{
|
3162 |
+
"epoch": 0.976782465563545,
|
3163 |
+
"grad_norm": 11.449936866760254,
|
3164 |
+
"learning_rate": 7.884097035040431e-07,
|
3165 |
+
"loss": 0.2519,
|
3166 |
+
"step": 11275
|
3167 |
+
},
|
3168 |
+
{
|
3169 |
+
"epoch": 0.978948280343065,
|
3170 |
+
"grad_norm": 2.988003730773926,
|
3171 |
+
"learning_rate": 7.162110127069696e-07,
|
3172 |
+
"loss": 0.2183,
|
3173 |
+
"step": 11300
|
3174 |
+
},
|
3175 |
+
{
|
3176 |
+
"epoch": 0.9811140951225851,
|
3177 |
+
"grad_norm": 2.9280929565429688,
|
3178 |
+
"learning_rate": 6.44012321909896e-07,
|
3179 |
+
"loss": 0.2764,
|
3180 |
+
"step": 11325
|
3181 |
+
},
|
3182 |
+
{
|
3183 |
+
"epoch": 0.9832799099021051,
|
3184 |
+
"grad_norm": 3.2279105186462402,
|
3185 |
+
"learning_rate": 5.718136311128224e-07,
|
3186 |
+
"loss": 0.4107,
|
3187 |
+
"step": 11350
|
3188 |
+
},
|
3189 |
+
{
|
3190 |
+
"epoch": 0.9854457246816253,
|
3191 |
+
"grad_norm": 2.54160737991333,
|
3192 |
+
"learning_rate": 4.996149403157489e-07,
|
3193 |
+
"loss": 0.3135,
|
3194 |
+
"step": 11375
|
3195 |
+
},
|
3196 |
+
{
|
3197 |
+
"epoch": 0.9876115394611453,
|
3198 |
+
"grad_norm": 1.3068925142288208,
|
3199 |
+
"learning_rate": 4.2741624951867543e-07,
|
3200 |
+
"loss": 0.2138,
|
3201 |
+
"step": 11400
|
3202 |
+
},
|
3203 |
+
{
|
3204 |
+
"epoch": 0.9897773542406654,
|
3205 |
+
"grad_norm": 8.606940269470215,
|
3206 |
+
"learning_rate": 3.5521755872160183e-07,
|
3207 |
+
"loss": 0.2984,
|
3208 |
+
"step": 11425
|
3209 |
+
},
|
3210 |
+
{
|
3211 |
+
"epoch": 0.9919431690201854,
|
3212 |
+
"grad_norm": 1.2513303756713867,
|
3213 |
+
"learning_rate": 2.830188679245283e-07,
|
3214 |
+
"loss": 0.2407,
|
3215 |
+
"step": 11450
|
3216 |
+
},
|
3217 |
+
{
|
3218 |
+
"epoch": 0.9941089837997055,
|
3219 |
+
"grad_norm": 11.340466499328613,
|
3220 |
+
"learning_rate": 2.1082017712745478e-07,
|
3221 |
+
"loss": 0.2449,
|
3222 |
+
"step": 11475
|
3223 |
+
},
|
3224 |
+
{
|
3225 |
+
"epoch": 0.9962747985792255,
|
3226 |
+
"grad_norm": 6.166193008422852,
|
3227 |
+
"learning_rate": 1.386214863303812e-07,
|
3228 |
+
"loss": 0.2629,
|
3229 |
+
"step": 11500
|
3230 |
+
},
|
3231 |
+
{
|
3232 |
+
"epoch": 0.9984406133587456,
|
3233 |
+
"grad_norm": 4.004662990570068,
|
3234 |
+
"learning_rate": 6.642279553330766e-08,
|
3235 |
+
"loss": 0.3488,
|
3236 |
+
"step": 11525
|
3237 |
+
},
|
3238 |
+
{
|
3239 |
+
"epoch": 1.0,
|
3240 |
+
"eval_cosine_accuracy": 0.9693415637860082,
|
3241 |
+
"eval_loss": 0.4268312156200409,
|
3242 |
+
"eval_runtime": 50.4023,
|
3243 |
+
"eval_samples_per_second": 96.424,
|
3244 |
+
"eval_steps_per_second": 6.031,
|
3245 |
+
"step": 11543
|
3246 |
+
}
|
3247 |
+
],
|
3248 |
+
"logging_steps": 25,
|
3249 |
+
"max_steps": 11543,
|
3250 |
+
"num_input_tokens_seen": 0,
|
3251 |
+
"num_train_epochs": 1,
|
3252 |
+
"save_steps": 500,
|
3253 |
+
"stateful_callbacks": {
|
3254 |
+
"EarlyStoppingCallback": {
|
3255 |
+
"args": {
|
3256 |
+
"early_stopping_patience": 5,
|
3257 |
+
"early_stopping_threshold": 0.01
|
3258 |
+
},
|
3259 |
+
"attributes": {
|
3260 |
+
"early_stopping_patience_counter": 0
|
3261 |
+
}
|
3262 |
+
},
|
3263 |
+
"TrainerControl": {
|
3264 |
+
"args": {
|
3265 |
+
"should_epoch_stop": false,
|
3266 |
+
"should_evaluate": false,
|
3267 |
+
"should_log": false,
|
3268 |
+
"should_save": true,
|
3269 |
+
"should_training_stop": true
|
3270 |
+
},
|
3271 |
+
"attributes": {}
|
3272 |
+
}
|
3273 |
+
},
|
3274 |
+
"total_flos": 0.0,
|
3275 |
+
"train_batch_size": 8,
|
3276 |
+
"trial_name": null,
|
3277 |
+
"trial_params": null
|
3278 |
+
}
|
checkpoint-11543/training_args.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4899e7c561beea78ce03642baf2d821bc65ed7c535dfbe836f5eb0f11ceeaa7f
|
3 |
+
size 5624
|
config.json
ADDED
@@ -0,0 +1,47 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_name_or_path": "Alibaba-NLP/gte-modernbert-base",
|
3 |
+
"architectures": [
|
4 |
+
"ModernBertModel"
|
5 |
+
],
|
6 |
+
"attention_bias": false,
|
7 |
+
"attention_dropout": 0.0,
|
8 |
+
"bos_token_id": 50281,
|
9 |
+
"classifier_activation": "gelu",
|
10 |
+
"classifier_bias": false,
|
11 |
+
"classifier_dropout": 0.0,
|
12 |
+
"classifier_pooling": "mean",
|
13 |
+
"cls_token_id": 50281,
|
14 |
+
"decoder_bias": true,
|
15 |
+
"deterministic_flash_attn": false,
|
16 |
+
"embedding_dropout": 0.0,
|
17 |
+
"eos_token_id": 50282,
|
18 |
+
"global_attn_every_n_layers": 3,
|
19 |
+
"global_rope_theta": 160000.0,
|
20 |
+
"gradient_checkpointing": false,
|
21 |
+
"hidden_activation": "gelu",
|
22 |
+
"hidden_size": 768,
|
23 |
+
"initializer_cutoff_factor": 2.0,
|
24 |
+
"initializer_range": 0.02,
|
25 |
+
"intermediate_size": 1152,
|
26 |
+
"layer_norm_eps": 1e-05,
|
27 |
+
"local_attention": 128,
|
28 |
+
"local_rope_theta": 10000.0,
|
29 |
+
"max_position_embeddings": 8192,
|
30 |
+
"mlp_bias": false,
|
31 |
+
"mlp_dropout": 0.0,
|
32 |
+
"model_type": "modernbert",
|
33 |
+
"norm_bias": false,
|
34 |
+
"norm_eps": 1e-05,
|
35 |
+
"num_attention_heads": 12,
|
36 |
+
"num_hidden_layers": 22,
|
37 |
+
"pad_token_id": 50283,
|
38 |
+
"position_embedding_type": "absolute",
|
39 |
+
"reference_compile": true,
|
40 |
+
"repad_logits_with_grad": false,
|
41 |
+
"sep_token_id": 50282,
|
42 |
+
"sparse_pred_ignore_index": -100,
|
43 |
+
"sparse_prediction": false,
|
44 |
+
"torch_dtype": "float32",
|
45 |
+
"transformers_version": "4.48.0",
|
46 |
+
"vocab_size": 50368
|
47 |
+
}
|
config_sentence_transformers.json
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"__version__": {
|
3 |
+
"sentence_transformers": "3.3.1",
|
4 |
+
"transformers": "4.48.0",
|
5 |
+
"pytorch": "2.4.0"
|
6 |
+
},
|
7 |
+
"prompts": {},
|
8 |
+
"default_prompt_name": null,
|
9 |
+
"similarity_fn_name": "cosine"
|
10 |
+
}
|
model.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:96616f5c428a86aaed4423cdda82e8a7d4becc0682e33fca47374d01cd7df333
|
3 |
+
size 596070136
|
modules.json
ADDED
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[
|
2 |
+
{
|
3 |
+
"idx": 0,
|
4 |
+
"name": "0",
|
5 |
+
"path": "",
|
6 |
+
"type": "sentence_transformers.models.Transformer"
|
7 |
+
},
|
8 |
+
{
|
9 |
+
"idx": 1,
|
10 |
+
"name": "1",
|
11 |
+
"path": "1_Pooling",
|
12 |
+
"type": "sentence_transformers.models.Pooling"
|
13 |
+
}
|
14 |
+
]
|
runs/Feb20_14-59-58_r-m7n-autotrain-advanced-kyhs0yea-70f9a-nry3q/events.out.tfevents.1740063604.r-m7n-autotrain-advanced-kyhs0yea-70f9a-nry3q.102.0
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4c7d57ff79811792dffd2074b6bcadc9392b45a58ea6a637272ac33fbde4dd3d
|
3 |
+
size 102407
|
runs/Feb20_14-59-58_r-m7n-autotrain-advanced-kyhs0yea-70f9a-nry3q/events.out.tfevents.1740065597.r-m7n-autotrain-advanced-kyhs0yea-70f9a-nry3q.102.1
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:437d6a93141b052dcb68cddaaf972261b9bc923c607fb4740d9434c4f44ee294
|
3 |
+
size 418
|
sentence_bert_config.json
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"max_seq_length": 8192,
|
3 |
+
"do_lower_case": false
|
4 |
+
}
|
special_tokens_map.json
ADDED
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"cls_token": {
|
3 |
+
"content": "[CLS]",
|
4 |
+
"lstrip": false,
|
5 |
+
"normalized": false,
|
6 |
+
"rstrip": false,
|
7 |
+
"single_word": false
|
8 |
+
},
|
9 |
+
"mask_token": {
|
10 |
+
"content": "[MASK]",
|
11 |
+
"lstrip": true,
|
12 |
+
"normalized": false,
|
13 |
+
"rstrip": false,
|
14 |
+
"single_word": false
|
15 |
+
},
|
16 |
+
"pad_token": {
|
17 |
+
"content": "[PAD]",
|
18 |
+
"lstrip": false,
|
19 |
+
"normalized": false,
|
20 |
+
"rstrip": false,
|
21 |
+
"single_word": false
|
22 |
+
},
|
23 |
+
"sep_token": {
|
24 |
+
"content": "[SEP]",
|
25 |
+
"lstrip": false,
|
26 |
+
"normalized": false,
|
27 |
+
"rstrip": false,
|
28 |
+
"single_word": false
|
29 |
+
},
|
30 |
+
"unk_token": {
|
31 |
+
"content": "[UNK]",
|
32 |
+
"lstrip": false,
|
33 |
+
"normalized": false,
|
34 |
+
"rstrip": false,
|
35 |
+
"single_word": false
|
36 |
+
}
|
37 |
+
}
|
tokenizer.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
tokenizer_config.json
ADDED
@@ -0,0 +1,945 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"added_tokens_decoder": {
|
3 |
+
"0": {
|
4 |
+
"content": "|||IP_ADDRESS|||",
|
5 |
+
"lstrip": false,
|
6 |
+
"normalized": true,
|
7 |
+
"rstrip": false,
|
8 |
+
"single_word": false,
|
9 |
+
"special": false
|
10 |
+
},
|
11 |
+
"1": {
|
12 |
+
"content": "<|padding|>",
|
13 |
+
"lstrip": false,
|
14 |
+
"normalized": false,
|
15 |
+
"rstrip": false,
|
16 |
+
"single_word": false,
|
17 |
+
"special": true
|
18 |
+
},
|
19 |
+
"50254": {
|
20 |
+
"content": " ",
|
21 |
+
"lstrip": false,
|
22 |
+
"normalized": true,
|
23 |
+
"rstrip": false,
|
24 |
+
"single_word": false,
|
25 |
+
"special": false
|
26 |
+
},
|
27 |
+
"50255": {
|
28 |
+
"content": " ",
|
29 |
+
"lstrip": false,
|
30 |
+
"normalized": true,
|
31 |
+
"rstrip": false,
|
32 |
+
"single_word": false,
|
33 |
+
"special": false
|
34 |
+
},
|
35 |
+
"50256": {
|
36 |
+
"content": " ",
|
37 |
+
"lstrip": false,
|
38 |
+
"normalized": true,
|
39 |
+
"rstrip": false,
|
40 |
+
"single_word": false,
|
41 |
+
"special": false
|
42 |
+
},
|
43 |
+
"50257": {
|
44 |
+
"content": " ",
|
45 |
+
"lstrip": false,
|
46 |
+
"normalized": true,
|
47 |
+
"rstrip": false,
|
48 |
+
"single_word": false,
|
49 |
+
"special": false
|
50 |
+
},
|
51 |
+
"50258": {
|
52 |
+
"content": " ",
|
53 |
+
"lstrip": false,
|
54 |
+
"normalized": true,
|
55 |
+
"rstrip": false,
|
56 |
+
"single_word": false,
|
57 |
+
"special": false
|
58 |
+
},
|
59 |
+
"50259": {
|
60 |
+
"content": " ",
|
61 |
+
"lstrip": false,
|
62 |
+
"normalized": true,
|
63 |
+
"rstrip": false,
|
64 |
+
"single_word": false,
|
65 |
+
"special": false
|
66 |
+
},
|
67 |
+
"50260": {
|
68 |
+
"content": " ",
|
69 |
+
"lstrip": false,
|
70 |
+
"normalized": true,
|
71 |
+
"rstrip": false,
|
72 |
+
"single_word": false,
|
73 |
+
"special": false
|
74 |
+
},
|
75 |
+
"50261": {
|
76 |
+
"content": " ",
|
77 |
+
"lstrip": false,
|
78 |
+
"normalized": true,
|
79 |
+
"rstrip": false,
|
80 |
+
"single_word": false,
|
81 |
+
"special": false
|
82 |
+
},
|
83 |
+
"50262": {
|
84 |
+
"content": " ",
|
85 |
+
"lstrip": false,
|
86 |
+
"normalized": true,
|
87 |
+
"rstrip": false,
|
88 |
+
"single_word": false,
|
89 |
+
"special": false
|
90 |
+
},
|
91 |
+
"50263": {
|
92 |
+
"content": " ",
|
93 |
+
"lstrip": false,
|
94 |
+
"normalized": true,
|
95 |
+
"rstrip": false,
|
96 |
+
"single_word": false,
|
97 |
+
"special": false
|
98 |
+
},
|
99 |
+
"50264": {
|
100 |
+
"content": " ",
|
101 |
+
"lstrip": false,
|
102 |
+
"normalized": true,
|
103 |
+
"rstrip": false,
|
104 |
+
"single_word": false,
|
105 |
+
"special": false
|
106 |
+
},
|
107 |
+
"50265": {
|
108 |
+
"content": " ",
|
109 |
+
"lstrip": false,
|
110 |
+
"normalized": true,
|
111 |
+
"rstrip": false,
|
112 |
+
"single_word": false,
|
113 |
+
"special": false
|
114 |
+
},
|
115 |
+
"50266": {
|
116 |
+
"content": " ",
|
117 |
+
"lstrip": false,
|
118 |
+
"normalized": true,
|
119 |
+
"rstrip": false,
|
120 |
+
"single_word": false,
|
121 |
+
"special": false
|
122 |
+
},
|
123 |
+
"50267": {
|
124 |
+
"content": " ",
|
125 |
+
"lstrip": false,
|
126 |
+
"normalized": true,
|
127 |
+
"rstrip": false,
|
128 |
+
"single_word": false,
|
129 |
+
"special": false
|
130 |
+
},
|
131 |
+
"50268": {
|
132 |
+
"content": " ",
|
133 |
+
"lstrip": false,
|
134 |
+
"normalized": true,
|
135 |
+
"rstrip": false,
|
136 |
+
"single_word": false,
|
137 |
+
"special": false
|
138 |
+
},
|
139 |
+
"50269": {
|
140 |
+
"content": " ",
|
141 |
+
"lstrip": false,
|
142 |
+
"normalized": true,
|
143 |
+
"rstrip": false,
|
144 |
+
"single_word": false,
|
145 |
+
"special": false
|
146 |
+
},
|
147 |
+
"50270": {
|
148 |
+
"content": " ",
|
149 |
+
"lstrip": false,
|
150 |
+
"normalized": true,
|
151 |
+
"rstrip": false,
|
152 |
+
"single_word": false,
|
153 |
+
"special": false
|
154 |
+
},
|
155 |
+
"50271": {
|
156 |
+
"content": " ",
|
157 |
+
"lstrip": false,
|
158 |
+
"normalized": true,
|
159 |
+
"rstrip": false,
|
160 |
+
"single_word": false,
|
161 |
+
"special": false
|
162 |
+
},
|
163 |
+
"50272": {
|
164 |
+
"content": " ",
|
165 |
+
"lstrip": false,
|
166 |
+
"normalized": true,
|
167 |
+
"rstrip": false,
|
168 |
+
"single_word": false,
|
169 |
+
"special": false
|
170 |
+
},
|
171 |
+
"50273": {
|
172 |
+
"content": " ",
|
173 |
+
"lstrip": false,
|
174 |
+
"normalized": true,
|
175 |
+
"rstrip": false,
|
176 |
+
"single_word": false,
|
177 |
+
"special": false
|
178 |
+
},
|
179 |
+
"50274": {
|
180 |
+
"content": " ",
|
181 |
+
"lstrip": false,
|
182 |
+
"normalized": true,
|
183 |
+
"rstrip": false,
|
184 |
+
"single_word": false,
|
185 |
+
"special": false
|
186 |
+
},
|
187 |
+
"50275": {
|
188 |
+
"content": " ",
|
189 |
+
"lstrip": false,
|
190 |
+
"normalized": true,
|
191 |
+
"rstrip": false,
|
192 |
+
"single_word": false,
|
193 |
+
"special": false
|
194 |
+
},
|
195 |
+
"50276": {
|
196 |
+
"content": " ",
|
197 |
+
"lstrip": false,
|
198 |
+
"normalized": true,
|
199 |
+
"rstrip": false,
|
200 |
+
"single_word": false,
|
201 |
+
"special": false
|
202 |
+
},
|
203 |
+
"50277": {
|
204 |
+
"content": "|||EMAIL_ADDRESS|||",
|
205 |
+
"lstrip": false,
|
206 |
+
"normalized": true,
|
207 |
+
"rstrip": false,
|
208 |
+
"single_word": false,
|
209 |
+
"special": false
|
210 |
+
},
|
211 |
+
"50278": {
|
212 |
+
"content": "|||PHONE_NUMBER|||",
|
213 |
+
"lstrip": false,
|
214 |
+
"normalized": true,
|
215 |
+
"rstrip": false,
|
216 |
+
"single_word": false,
|
217 |
+
"special": false
|
218 |
+
},
|
219 |
+
"50279": {
|
220 |
+
"content": "<|endoftext|>",
|
221 |
+
"lstrip": false,
|
222 |
+
"normalized": false,
|
223 |
+
"rstrip": false,
|
224 |
+
"single_word": false,
|
225 |
+
"special": true
|
226 |
+
},
|
227 |
+
"50280": {
|
228 |
+
"content": "[UNK]",
|
229 |
+
"lstrip": false,
|
230 |
+
"normalized": false,
|
231 |
+
"rstrip": false,
|
232 |
+
"single_word": false,
|
233 |
+
"special": true
|
234 |
+
},
|
235 |
+
"50281": {
|
236 |
+
"content": "[CLS]",
|
237 |
+
"lstrip": false,
|
238 |
+
"normalized": false,
|
239 |
+
"rstrip": false,
|
240 |
+
"single_word": false,
|
241 |
+
"special": true
|
242 |
+
},
|
243 |
+
"50282": {
|
244 |
+
"content": "[SEP]",
|
245 |
+
"lstrip": false,
|
246 |
+
"normalized": false,
|
247 |
+
"rstrip": false,
|
248 |
+
"single_word": false,
|
249 |
+
"special": true
|
250 |
+
},
|
251 |
+
"50283": {
|
252 |
+
"content": "[PAD]",
|
253 |
+
"lstrip": false,
|
254 |
+
"normalized": false,
|
255 |
+
"rstrip": false,
|
256 |
+
"single_word": false,
|
257 |
+
"special": true
|
258 |
+
},
|
259 |
+
"50284": {
|
260 |
+
"content": "[MASK]",
|
261 |
+
"lstrip": true,
|
262 |
+
"normalized": false,
|
263 |
+
"rstrip": false,
|
264 |
+
"single_word": false,
|
265 |
+
"special": true
|
266 |
+
},
|
267 |
+
"50285": {
|
268 |
+
"content": "[unused0]",
|
269 |
+
"lstrip": false,
|
270 |
+
"normalized": true,
|
271 |
+
"rstrip": false,
|
272 |
+
"single_word": false,
|
273 |
+
"special": false
|
274 |
+
},
|
275 |
+
"50286": {
|
276 |
+
"content": "[unused1]",
|
277 |
+
"lstrip": false,
|
278 |
+
"normalized": true,
|
279 |
+
"rstrip": false,
|
280 |
+
"single_word": false,
|
281 |
+
"special": false
|
282 |
+
},
|
283 |
+
"50287": {
|
284 |
+
"content": "[unused2]",
|
285 |
+
"lstrip": false,
|
286 |
+
"normalized": true,
|
287 |
+
"rstrip": false,
|
288 |
+
"single_word": false,
|
289 |
+
"special": false
|
290 |
+
},
|
291 |
+
"50288": {
|
292 |
+
"content": "[unused3]",
|
293 |
+
"lstrip": false,
|
294 |
+
"normalized": true,
|
295 |
+
"rstrip": false,
|
296 |
+
"single_word": false,
|
297 |
+
"special": false
|
298 |
+
},
|
299 |
+
"50289": {
|
300 |
+
"content": "[unused4]",
|
301 |
+
"lstrip": false,
|
302 |
+
"normalized": true,
|
303 |
+
"rstrip": false,
|
304 |
+
"single_word": false,
|
305 |
+
"special": false
|
306 |
+
},
|
307 |
+
"50290": {
|
308 |
+
"content": "[unused5]",
|
309 |
+
"lstrip": false,
|
310 |
+
"normalized": true,
|
311 |
+
"rstrip": false,
|
312 |
+
"single_word": false,
|
313 |
+
"special": false
|
314 |
+
},
|
315 |
+
"50291": {
|
316 |
+
"content": "[unused6]",
|
317 |
+
"lstrip": false,
|
318 |
+
"normalized": true,
|
319 |
+
"rstrip": false,
|
320 |
+
"single_word": false,
|
321 |
+
"special": false
|
322 |
+
},
|
323 |
+
"50292": {
|
324 |
+
"content": "[unused7]",
|
325 |
+
"lstrip": false,
|
326 |
+
"normalized": true,
|
327 |
+
"rstrip": false,
|
328 |
+
"single_word": false,
|
329 |
+
"special": false
|
330 |
+
},
|
331 |
+
"50293": {
|
332 |
+
"content": "[unused8]",
|
333 |
+
"lstrip": false,
|
334 |
+
"normalized": true,
|
335 |
+
"rstrip": false,
|
336 |
+
"single_word": false,
|
337 |
+
"special": false
|
338 |
+
},
|
339 |
+
"50294": {
|
340 |
+
"content": "[unused9]",
|
341 |
+
"lstrip": false,
|
342 |
+
"normalized": true,
|
343 |
+
"rstrip": false,
|
344 |
+
"single_word": false,
|
345 |
+
"special": false
|
346 |
+
},
|
347 |
+
"50295": {
|
348 |
+
"content": "[unused10]",
|
349 |
+
"lstrip": false,
|
350 |
+
"normalized": true,
|
351 |
+
"rstrip": false,
|
352 |
+
"single_word": false,
|
353 |
+
"special": false
|
354 |
+
},
|
355 |
+
"50296": {
|
356 |
+
"content": "[unused11]",
|
357 |
+
"lstrip": false,
|
358 |
+
"normalized": true,
|
359 |
+
"rstrip": false,
|
360 |
+
"single_word": false,
|
361 |
+
"special": false
|
362 |
+
},
|
363 |
+
"50297": {
|
364 |
+
"content": "[unused12]",
|
365 |
+
"lstrip": false,
|
366 |
+
"normalized": true,
|
367 |
+
"rstrip": false,
|
368 |
+
"single_word": false,
|
369 |
+
"special": false
|
370 |
+
},
|
371 |
+
"50298": {
|
372 |
+
"content": "[unused13]",
|
373 |
+
"lstrip": false,
|
374 |
+
"normalized": true,
|
375 |
+
"rstrip": false,
|
376 |
+
"single_word": false,
|
377 |
+
"special": false
|
378 |
+
},
|
379 |
+
"50299": {
|
380 |
+
"content": "[unused14]",
|
381 |
+
"lstrip": false,
|
382 |
+
"normalized": true,
|
383 |
+
"rstrip": false,
|
384 |
+
"single_word": false,
|
385 |
+
"special": false
|
386 |
+
},
|
387 |
+
"50300": {
|
388 |
+
"content": "[unused15]",
|
389 |
+
"lstrip": false,
|
390 |
+
"normalized": true,
|
391 |
+
"rstrip": false,
|
392 |
+
"single_word": false,
|
393 |
+
"special": false
|
394 |
+
},
|
395 |
+
"50301": {
|
396 |
+
"content": "[unused16]",
|
397 |
+
"lstrip": false,
|
398 |
+
"normalized": true,
|
399 |
+
"rstrip": false,
|
400 |
+
"single_word": false,
|
401 |
+
"special": false
|
402 |
+
},
|
403 |
+
"50302": {
|
404 |
+
"content": "[unused17]",
|
405 |
+
"lstrip": false,
|
406 |
+
"normalized": true,
|
407 |
+
"rstrip": false,
|
408 |
+
"single_word": false,
|
409 |
+
"special": false
|
410 |
+
},
|
411 |
+
"50303": {
|
412 |
+
"content": "[unused18]",
|
413 |
+
"lstrip": false,
|
414 |
+
"normalized": true,
|
415 |
+
"rstrip": false,
|
416 |
+
"single_word": false,
|
417 |
+
"special": false
|
418 |
+
},
|
419 |
+
"50304": {
|
420 |
+
"content": "[unused19]",
|
421 |
+
"lstrip": false,
|
422 |
+
"normalized": true,
|
423 |
+
"rstrip": false,
|
424 |
+
"single_word": false,
|
425 |
+
"special": false
|
426 |
+
},
|
427 |
+
"50305": {
|
428 |
+
"content": "[unused20]",
|
429 |
+
"lstrip": false,
|
430 |
+
"normalized": true,
|
431 |
+
"rstrip": false,
|
432 |
+
"single_word": false,
|
433 |
+
"special": false
|
434 |
+
},
|
435 |
+
"50306": {
|
436 |
+
"content": "[unused21]",
|
437 |
+
"lstrip": false,
|
438 |
+
"normalized": true,
|
439 |
+
"rstrip": false,
|
440 |
+
"single_word": false,
|
441 |
+
"special": false
|
442 |
+
},
|
443 |
+
"50307": {
|
444 |
+
"content": "[unused22]",
|
445 |
+
"lstrip": false,
|
446 |
+
"normalized": true,
|
447 |
+
"rstrip": false,
|
448 |
+
"single_word": false,
|
449 |
+
"special": false
|
450 |
+
},
|
451 |
+
"50308": {
|
452 |
+
"content": "[unused23]",
|
453 |
+
"lstrip": false,
|
454 |
+
"normalized": true,
|
455 |
+
"rstrip": false,
|
456 |
+
"single_word": false,
|
457 |
+
"special": false
|
458 |
+
},
|
459 |
+
"50309": {
|
460 |
+
"content": "[unused24]",
|
461 |
+
"lstrip": false,
|
462 |
+
"normalized": true,
|
463 |
+
"rstrip": false,
|
464 |
+
"single_word": false,
|
465 |
+
"special": false
|
466 |
+
},
|
467 |
+
"50310": {
|
468 |
+
"content": "[unused25]",
|
469 |
+
"lstrip": false,
|
470 |
+
"normalized": true,
|
471 |
+
"rstrip": false,
|
472 |
+
"single_word": false,
|
473 |
+
"special": false
|
474 |
+
},
|
475 |
+
"50311": {
|
476 |
+
"content": "[unused26]",
|
477 |
+
"lstrip": false,
|
478 |
+
"normalized": true,
|
479 |
+
"rstrip": false,
|
480 |
+
"single_word": false,
|
481 |
+
"special": false
|
482 |
+
},
|
483 |
+
"50312": {
|
484 |
+
"content": "[unused27]",
|
485 |
+
"lstrip": false,
|
486 |
+
"normalized": true,
|
487 |
+
"rstrip": false,
|
488 |
+
"single_word": false,
|
489 |
+
"special": false
|
490 |
+
},
|
491 |
+
"50313": {
|
492 |
+
"content": "[unused28]",
|
493 |
+
"lstrip": false,
|
494 |
+
"normalized": true,
|
495 |
+
"rstrip": false,
|
496 |
+
"single_word": false,
|
497 |
+
"special": false
|
498 |
+
},
|
499 |
+
"50314": {
|
500 |
+
"content": "[unused29]",
|
501 |
+
"lstrip": false,
|
502 |
+
"normalized": true,
|
503 |
+
"rstrip": false,
|
504 |
+
"single_word": false,
|
505 |
+
"special": false
|
506 |
+
},
|
507 |
+
"50315": {
|
508 |
+
"content": "[unused30]",
|
509 |
+
"lstrip": false,
|
510 |
+
"normalized": true,
|
511 |
+
"rstrip": false,
|
512 |
+
"single_word": false,
|
513 |
+
"special": false
|
514 |
+
},
|
515 |
+
"50316": {
|
516 |
+
"content": "[unused31]",
|
517 |
+
"lstrip": false,
|
518 |
+
"normalized": true,
|
519 |
+
"rstrip": false,
|
520 |
+
"single_word": false,
|
521 |
+
"special": false
|
522 |
+
},
|
523 |
+
"50317": {
|
524 |
+
"content": "[unused32]",
|
525 |
+
"lstrip": false,
|
526 |
+
"normalized": true,
|
527 |
+
"rstrip": false,
|
528 |
+
"single_word": false,
|
529 |
+
"special": false
|
530 |
+
},
|
531 |
+
"50318": {
|
532 |
+
"content": "[unused33]",
|
533 |
+
"lstrip": false,
|
534 |
+
"normalized": true,
|
535 |
+
"rstrip": false,
|
536 |
+
"single_word": false,
|
537 |
+
"special": false
|
538 |
+
},
|
539 |
+
"50319": {
|
540 |
+
"content": "[unused34]",
|
541 |
+
"lstrip": false,
|
542 |
+
"normalized": true,
|
543 |
+
"rstrip": false,
|
544 |
+
"single_word": false,
|
545 |
+
"special": false
|
546 |
+
},
|
547 |
+
"50320": {
|
548 |
+
"content": "[unused35]",
|
549 |
+
"lstrip": false,
|
550 |
+
"normalized": true,
|
551 |
+
"rstrip": false,
|
552 |
+
"single_word": false,
|
553 |
+
"special": false
|
554 |
+
},
|
555 |
+
"50321": {
|
556 |
+
"content": "[unused36]",
|
557 |
+
"lstrip": false,
|
558 |
+
"normalized": true,
|
559 |
+
"rstrip": false,
|
560 |
+
"single_word": false,
|
561 |
+
"special": false
|
562 |
+
},
|
563 |
+
"50322": {
|
564 |
+
"content": "[unused37]",
|
565 |
+
"lstrip": false,
|
566 |
+
"normalized": true,
|
567 |
+
"rstrip": false,
|
568 |
+
"single_word": false,
|
569 |
+
"special": false
|
570 |
+
},
|
571 |
+
"50323": {
|
572 |
+
"content": "[unused38]",
|
573 |
+
"lstrip": false,
|
574 |
+
"normalized": true,
|
575 |
+
"rstrip": false,
|
576 |
+
"single_word": false,
|
577 |
+
"special": false
|
578 |
+
},
|
579 |
+
"50324": {
|
580 |
+
"content": "[unused39]",
|
581 |
+
"lstrip": false,
|
582 |
+
"normalized": true,
|
583 |
+
"rstrip": false,
|
584 |
+
"single_word": false,
|
585 |
+
"special": false
|
586 |
+
},
|
587 |
+
"50325": {
|
588 |
+
"content": "[unused40]",
|
589 |
+
"lstrip": false,
|
590 |
+
"normalized": true,
|
591 |
+
"rstrip": false,
|
592 |
+
"single_word": false,
|
593 |
+
"special": false
|
594 |
+
},
|
595 |
+
"50326": {
|
596 |
+
"content": "[unused41]",
|
597 |
+
"lstrip": false,
|
598 |
+
"normalized": true,
|
599 |
+
"rstrip": false,
|
600 |
+
"single_word": false,
|
601 |
+
"special": false
|
602 |
+
},
|
603 |
+
"50327": {
|
604 |
+
"content": "[unused42]",
|
605 |
+
"lstrip": false,
|
606 |
+
"normalized": true,
|
607 |
+
"rstrip": false,
|
608 |
+
"single_word": false,
|
609 |
+
"special": false
|
610 |
+
},
|
611 |
+
"50328": {
|
612 |
+
"content": "[unused43]",
|
613 |
+
"lstrip": false,
|
614 |
+
"normalized": true,
|
615 |
+
"rstrip": false,
|
616 |
+
"single_word": false,
|
617 |
+
"special": false
|
618 |
+
},
|
619 |
+
"50329": {
|
620 |
+
"content": "[unused44]",
|
621 |
+
"lstrip": false,
|
622 |
+
"normalized": true,
|
623 |
+
"rstrip": false,
|
624 |
+
"single_word": false,
|
625 |
+
"special": false
|
626 |
+
},
|
627 |
+
"50330": {
|
628 |
+
"content": "[unused45]",
|
629 |
+
"lstrip": false,
|
630 |
+
"normalized": true,
|
631 |
+
"rstrip": false,
|
632 |
+
"single_word": false,
|
633 |
+
"special": false
|
634 |
+
},
|
635 |
+
"50331": {
|
636 |
+
"content": "[unused46]",
|
637 |
+
"lstrip": false,
|
638 |
+
"normalized": true,
|
639 |
+
"rstrip": false,
|
640 |
+
"single_word": false,
|
641 |
+
"special": false
|
642 |
+
},
|
643 |
+
"50332": {
|
644 |
+
"content": "[unused47]",
|
645 |
+
"lstrip": false,
|
646 |
+
"normalized": true,
|
647 |
+
"rstrip": false,
|
648 |
+
"single_word": false,
|
649 |
+
"special": false
|
650 |
+
},
|
651 |
+
"50333": {
|
652 |
+
"content": "[unused48]",
|
653 |
+
"lstrip": false,
|
654 |
+
"normalized": true,
|
655 |
+
"rstrip": false,
|
656 |
+
"single_word": false,
|
657 |
+
"special": false
|
658 |
+
},
|
659 |
+
"50334": {
|
660 |
+
"content": "[unused49]",
|
661 |
+
"lstrip": false,
|
662 |
+
"normalized": true,
|
663 |
+
"rstrip": false,
|
664 |
+
"single_word": false,
|
665 |
+
"special": false
|
666 |
+
},
|
667 |
+
"50335": {
|
668 |
+
"content": "[unused50]",
|
669 |
+
"lstrip": false,
|
670 |
+
"normalized": true,
|
671 |
+
"rstrip": false,
|
672 |
+
"single_word": false,
|
673 |
+
"special": false
|
674 |
+
},
|
675 |
+
"50336": {
|
676 |
+
"content": "[unused51]",
|
677 |
+
"lstrip": false,
|
678 |
+
"normalized": true,
|
679 |
+
"rstrip": false,
|
680 |
+
"single_word": false,
|
681 |
+
"special": false
|
682 |
+
},
|
683 |
+
"50337": {
|
684 |
+
"content": "[unused52]",
|
685 |
+
"lstrip": false,
|
686 |
+
"normalized": true,
|
687 |
+
"rstrip": false,
|
688 |
+
"single_word": false,
|
689 |
+
"special": false
|
690 |
+
},
|
691 |
+
"50338": {
|
692 |
+
"content": "[unused53]",
|
693 |
+
"lstrip": false,
|
694 |
+
"normalized": true,
|
695 |
+
"rstrip": false,
|
696 |
+
"single_word": false,
|
697 |
+
"special": false
|
698 |
+
},
|
699 |
+
"50339": {
|
700 |
+
"content": "[unused54]",
|
701 |
+
"lstrip": false,
|
702 |
+
"normalized": true,
|
703 |
+
"rstrip": false,
|
704 |
+
"single_word": false,
|
705 |
+
"special": false
|
706 |
+
},
|
707 |
+
"50340": {
|
708 |
+
"content": "[unused55]",
|
709 |
+
"lstrip": false,
|
710 |
+
"normalized": true,
|
711 |
+
"rstrip": false,
|
712 |
+
"single_word": false,
|
713 |
+
"special": false
|
714 |
+
},
|
715 |
+
"50341": {
|
716 |
+
"content": "[unused56]",
|
717 |
+
"lstrip": false,
|
718 |
+
"normalized": true,
|
719 |
+
"rstrip": false,
|
720 |
+
"single_word": false,
|
721 |
+
"special": false
|
722 |
+
},
|
723 |
+
"50342": {
|
724 |
+
"content": "[unused57]",
|
725 |
+
"lstrip": false,
|
726 |
+
"normalized": true,
|
727 |
+
"rstrip": false,
|
728 |
+
"single_word": false,
|
729 |
+
"special": false
|
730 |
+
},
|
731 |
+
"50343": {
|
732 |
+
"content": "[unused58]",
|
733 |
+
"lstrip": false,
|
734 |
+
"normalized": true,
|
735 |
+
"rstrip": false,
|
736 |
+
"single_word": false,
|
737 |
+
"special": false
|
738 |
+
},
|
739 |
+
"50344": {
|
740 |
+
"content": "[unused59]",
|
741 |
+
"lstrip": false,
|
742 |
+
"normalized": true,
|
743 |
+
"rstrip": false,
|
744 |
+
"single_word": false,
|
745 |
+
"special": false
|
746 |
+
},
|
747 |
+
"50345": {
|
748 |
+
"content": "[unused60]",
|
749 |
+
"lstrip": false,
|
750 |
+
"normalized": true,
|
751 |
+
"rstrip": false,
|
752 |
+
"single_word": false,
|
753 |
+
"special": false
|
754 |
+
},
|
755 |
+
"50346": {
|
756 |
+
"content": "[unused61]",
|
757 |
+
"lstrip": false,
|
758 |
+
"normalized": true,
|
759 |
+
"rstrip": false,
|
760 |
+
"single_word": false,
|
761 |
+
"special": false
|
762 |
+
},
|
763 |
+
"50347": {
|
764 |
+
"content": "[unused62]",
|
765 |
+
"lstrip": false,
|
766 |
+
"normalized": true,
|
767 |
+
"rstrip": false,
|
768 |
+
"single_word": false,
|
769 |
+
"special": false
|
770 |
+
},
|
771 |
+
"50348": {
|
772 |
+
"content": "[unused63]",
|
773 |
+
"lstrip": false,
|
774 |
+
"normalized": true,
|
775 |
+
"rstrip": false,
|
776 |
+
"single_word": false,
|
777 |
+
"special": false
|
778 |
+
},
|
779 |
+
"50349": {
|
780 |
+
"content": "[unused64]",
|
781 |
+
"lstrip": false,
|
782 |
+
"normalized": true,
|
783 |
+
"rstrip": false,
|
784 |
+
"single_word": false,
|
785 |
+
"special": false
|
786 |
+
},
|
787 |
+
"50350": {
|
788 |
+
"content": "[unused65]",
|
789 |
+
"lstrip": false,
|
790 |
+
"normalized": true,
|
791 |
+
"rstrip": false,
|
792 |
+
"single_word": false,
|
793 |
+
"special": false
|
794 |
+
},
|
795 |
+
"50351": {
|
796 |
+
"content": "[unused66]",
|
797 |
+
"lstrip": false,
|
798 |
+
"normalized": true,
|
799 |
+
"rstrip": false,
|
800 |
+
"single_word": false,
|
801 |
+
"special": false
|
802 |
+
},
|
803 |
+
"50352": {
|
804 |
+
"content": "[unused67]",
|
805 |
+
"lstrip": false,
|
806 |
+
"normalized": true,
|
807 |
+
"rstrip": false,
|
808 |
+
"single_word": false,
|
809 |
+
"special": false
|
810 |
+
},
|
811 |
+
"50353": {
|
812 |
+
"content": "[unused68]",
|
813 |
+
"lstrip": false,
|
814 |
+
"normalized": true,
|
815 |
+
"rstrip": false,
|
816 |
+
"single_word": false,
|
817 |
+
"special": false
|
818 |
+
},
|
819 |
+
"50354": {
|
820 |
+
"content": "[unused69]",
|
821 |
+
"lstrip": false,
|
822 |
+
"normalized": true,
|
823 |
+
"rstrip": false,
|
824 |
+
"single_word": false,
|
825 |
+
"special": false
|
826 |
+
},
|
827 |
+
"50355": {
|
828 |
+
"content": "[unused70]",
|
829 |
+
"lstrip": false,
|
830 |
+
"normalized": true,
|
831 |
+
"rstrip": false,
|
832 |
+
"single_word": false,
|
833 |
+
"special": false
|
834 |
+
},
|
835 |
+
"50356": {
|
836 |
+
"content": "[unused71]",
|
837 |
+
"lstrip": false,
|
838 |
+
"normalized": true,
|
839 |
+
"rstrip": false,
|
840 |
+
"single_word": false,
|
841 |
+
"special": false
|
842 |
+
},
|
843 |
+
"50357": {
|
844 |
+
"content": "[unused72]",
|
845 |
+
"lstrip": false,
|
846 |
+
"normalized": true,
|
847 |
+
"rstrip": false,
|
848 |
+
"single_word": false,
|
849 |
+
"special": false
|
850 |
+
},
|
851 |
+
"50358": {
|
852 |
+
"content": "[unused73]",
|
853 |
+
"lstrip": false,
|
854 |
+
"normalized": true,
|
855 |
+
"rstrip": false,
|
856 |
+
"single_word": false,
|
857 |
+
"special": false
|
858 |
+
},
|
859 |
+
"50359": {
|
860 |
+
"content": "[unused74]",
|
861 |
+
"lstrip": false,
|
862 |
+
"normalized": true,
|
863 |
+
"rstrip": false,
|
864 |
+
"single_word": false,
|
865 |
+
"special": false
|
866 |
+
},
|
867 |
+
"50360": {
|
868 |
+
"content": "[unused75]",
|
869 |
+
"lstrip": false,
|
870 |
+
"normalized": true,
|
871 |
+
"rstrip": false,
|
872 |
+
"single_word": false,
|
873 |
+
"special": false
|
874 |
+
},
|
875 |
+
"50361": {
|
876 |
+
"content": "[unused76]",
|
877 |
+
"lstrip": false,
|
878 |
+
"normalized": true,
|
879 |
+
"rstrip": false,
|
880 |
+
"single_word": false,
|
881 |
+
"special": false
|
882 |
+
},
|
883 |
+
"50362": {
|
884 |
+
"content": "[unused77]",
|
885 |
+
"lstrip": false,
|
886 |
+
"normalized": true,
|
887 |
+
"rstrip": false,
|
888 |
+
"single_word": false,
|
889 |
+
"special": false
|
890 |
+
},
|
891 |
+
"50363": {
|
892 |
+
"content": "[unused78]",
|
893 |
+
"lstrip": false,
|
894 |
+
"normalized": true,
|
895 |
+
"rstrip": false,
|
896 |
+
"single_word": false,
|
897 |
+
"special": false
|
898 |
+
},
|
899 |
+
"50364": {
|
900 |
+
"content": "[unused79]",
|
901 |
+
"lstrip": false,
|
902 |
+
"normalized": true,
|
903 |
+
"rstrip": false,
|
904 |
+
"single_word": false,
|
905 |
+
"special": false
|
906 |
+
},
|
907 |
+
"50365": {
|
908 |
+
"content": "[unused80]",
|
909 |
+
"lstrip": false,
|
910 |
+
"normalized": true,
|
911 |
+
"rstrip": false,
|
912 |
+
"single_word": false,
|
913 |
+
"special": false
|
914 |
+
},
|
915 |
+
"50366": {
|
916 |
+
"content": "[unused81]",
|
917 |
+
"lstrip": false,
|
918 |
+
"normalized": true,
|
919 |
+
"rstrip": false,
|
920 |
+
"single_word": false,
|
921 |
+
"special": false
|
922 |
+
},
|
923 |
+
"50367": {
|
924 |
+
"content": "[unused82]",
|
925 |
+
"lstrip": false,
|
926 |
+
"normalized": true,
|
927 |
+
"rstrip": false,
|
928 |
+
"single_word": false,
|
929 |
+
"special": false
|
930 |
+
}
|
931 |
+
},
|
932 |
+
"clean_up_tokenization_spaces": true,
|
933 |
+
"cls_token": "[CLS]",
|
934 |
+
"extra_special_tokens": {},
|
935 |
+
"mask_token": "[MASK]",
|
936 |
+
"model_input_names": [
|
937 |
+
"input_ids",
|
938 |
+
"attention_mask"
|
939 |
+
],
|
940 |
+
"model_max_length": 1000000000000000019884624838656,
|
941 |
+
"pad_token": "[PAD]",
|
942 |
+
"sep_token": "[SEP]",
|
943 |
+
"tokenizer_class": "PreTrainedTokenizerFast",
|
944 |
+
"unk_token": "[UNK]"
|
945 |
+
}
|
training_args.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4899e7c561beea78ce03642baf2d821bc65ed7c535dfbe836f5eb0f11ceeaa7f
|
3 |
+
size 5624
|
training_params.json
ADDED
@@ -0,0 +1,33 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"data_path": "gte-modernbert-philosophy-v1-1-autotr/autotrain-data",
|
3 |
+
"model": "Alibaba-NLP/gte-modernbert-base",
|
4 |
+
"lr": 3e-05,
|
5 |
+
"epochs": 1,
|
6 |
+
"max_seq_length": 8192,
|
7 |
+
"batch_size": 8,
|
8 |
+
"warmup_ratio": 0.1,
|
9 |
+
"gradient_accumulation": 1,
|
10 |
+
"optimizer": "adamw_torch",
|
11 |
+
"scheduler": "linear",
|
12 |
+
"weight_decay": 0.0,
|
13 |
+
"max_grad_norm": 1.0,
|
14 |
+
"seed": 42,
|
15 |
+
"train_split": "train",
|
16 |
+
"valid_split": "validation",
|
17 |
+
"logging_steps": -1,
|
18 |
+
"project_name": "gte-modernbert-philosophy-v1-1-autotr",
|
19 |
+
"auto_find_batch_size": false,
|
20 |
+
"mixed_precision": "fp16",
|
21 |
+
"save_total_limit": 1,
|
22 |
+
"push_to_hub": true,
|
23 |
+
"eval_strategy": "epoch",
|
24 |
+
"username": "m7n",
|
25 |
+
"log": "tensorboard",
|
26 |
+
"early_stopping_patience": 5,
|
27 |
+
"early_stopping_threshold": 0.01,
|
28 |
+
"trainer": "triplet",
|
29 |
+
"sentence1_column": "autotrain_sentence1",
|
30 |
+
"sentence2_column": "autotrain_sentence2",
|
31 |
+
"sentence3_column": "autotrain_sentence3",
|
32 |
+
"target_column": "autotrain_target"
|
33 |
+
}
|