pv1031 commited on
Commit
e5a18fb
·
verified ·
1 Parent(s): b55dd61

Upload folder using huggingface_hub

Browse files
.gitattributes CHANGED
@@ -33,3 +33,7 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ checkpoint-150/model.SRC filter=lfs diff=lfs merge=lfs -text
37
+ checkpoint-150/model.TGT filter=lfs diff=lfs merge=lfs -text
38
+ model.SRC filter=lfs diff=lfs merge=lfs -text
39
+ model.TGT filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ ---
3
+ library_name: transformers
4
+ tags:
5
+ - autotrain
6
+ - text2text-generation
7
+ base_model: ai4bharat/indictrans2-indic-en-dist-200M
8
+ widget:
9
+ - text: "I love AutoTrain"
10
+ ---
11
+
12
+ # Model Trained Using AutoTrain
13
+
14
+ - Problem type: Seq2Seq
15
+
16
+ ## Validation Metrics
17
+ loss: 4.0998663902282715
18
+
19
+ rouge1: 17.7166
20
+
21
+ rouge2: 12.3004
22
+
23
+ rougeL: 17.617
24
+
25
+ rougeLsum: 17.7018
26
+
27
+ gen_len: 17.55
28
+
29
+ runtime: 32.9351
30
+
31
+ samples_per_second: 3.036
32
+
33
+ steps_per_second: 1.518
34
+
35
+ : 3.0
checkpoint-150/config.json ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "ai4bharat/indictrans2-indic-en-dist-200M",
3
+ "activation_dropout": 0.0,
4
+ "activation_function": "gelu",
5
+ "architectures": [
6
+ "IndicTransForConditionalGeneration"
7
+ ],
8
+ "attention_dropout": 0.0,
9
+ "attn_implementation": null,
10
+ "auto_map": {
11
+ "AutoConfig": "ai4bharat/indictrans2-indic-en-dist-200M--configuration_indictrans.IndicTransConfig",
12
+ "AutoModelForSeq2SeqLM": "ai4bharat/indictrans2-indic-en-dist-200M--modeling_indictrans.IndicTransForConditionalGeneration"
13
+ },
14
+ "bos_token_id": 0,
15
+ "decoder_attention_heads": 8,
16
+ "decoder_embed_dim": 512,
17
+ "decoder_ffn_dim": 2048,
18
+ "decoder_layerdrop": 0,
19
+ "decoder_layers": 18,
20
+ "decoder_normalize_before": true,
21
+ "decoder_start_token_id": 2,
22
+ "decoder_vocab_size": 32296,
23
+ "dropout": 0.2,
24
+ "encoder_attention_heads": 8,
25
+ "encoder_embed_dim": 512,
26
+ "encoder_ffn_dim": 2048,
27
+ "encoder_layerdrop": 0,
28
+ "encoder_layers": 18,
29
+ "encoder_normalize_before": true,
30
+ "encoder_vocab_size": 122706,
31
+ "eos_token_id": 2,
32
+ "init_std": 0.02,
33
+ "is_encoder_decoder": true,
34
+ "layernorm_embedding": true,
35
+ "max_source_positions": 256,
36
+ "max_target_positions": 256,
37
+ "model_type": "IndicTrans",
38
+ "num_hidden_layers": 18,
39
+ "pad_token_id": 1,
40
+ "scale_embedding": true,
41
+ "share_decoder_input_output_embed": true,
42
+ "tokenizer_class": "IndicTransTokenizer",
43
+ "torch_dtype": "float32",
44
+ "transformers_version": "4.48.0",
45
+ "use_cache": false,
46
+ "vocab_size": 32296
47
+ }
checkpoint-150/dict.SRC.json ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-150/dict.TGT.json ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-150/generation_config.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token_id": 0,
3
+ "decoder_start_token_id": 2,
4
+ "eos_token_id": 2,
5
+ "pad_token_id": 1,
6
+ "transformers_version": "4.48.0"
7
+ }
checkpoint-150/model.SRC ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ac9257c8e76b8b607705b959cc3d075656ea33032f7a974e467b8941df6e98d4
3
+ size 3256903
checkpoint-150/model.TGT ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3cedc5cbcc740369b76201942a0f096fec7287fee039b55bdb956f301235b914
3
+ size 759425
checkpoint-150/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:16f46186b661b9c4894a7240df315b02232c3a7e81de9f2c4d01a3f5143a1b97
3
+ size 847211256
checkpoint-150/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:43f9c0c78e7a651e03dd797169da635c4f58a5d928b297aac837e6fb6f63a573
3
+ size 1694887884
checkpoint-150/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6e55b6a10cfc36cc275f509c2dba8c4447653d5b657c2294a068d965ebce25a2
3
+ size 14244
checkpoint-150/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e19e92325f0c6d72344bbada6185b144f4397af0a0f2e4e91f7b4bca18fbdc61
3
+ size 1064
checkpoint-150/special_tokens_map.json ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<s>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "</s>",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": {
17
+ "content": "<pad>",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
+ "unk_token": {
24
+ "content": "<unk>",
25
+ "lstrip": false,
26
+ "normalized": false,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ }
30
+ }
checkpoint-150/tokenizer_config.json ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "<s>",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "1": {
12
+ "content": "<pad>",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "2": {
20
+ "content": "</s>",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "3": {
28
+ "content": "<unk>",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ }
35
+ },
36
+ "auto_map": {
37
+ "AutoTokenizer": [
38
+ "ai4bharat/indictrans2-indic-en-dist-200M--tokenization_indictrans.IndicTransTokenizer",
39
+ null
40
+ ]
41
+ },
42
+ "bos_token": "<s>",
43
+ "clean_up_tokenization_spaces": true,
44
+ "do_lower_case": false,
45
+ "eos_token": "</s>",
46
+ "extra_special_tokens": {},
47
+ "model_max_length": 256,
48
+ "pad_token": "<pad>",
49
+ "src_vocab_file": "/root/.cache/huggingface/hub/models--ai4bharat--indictrans2-indic-en-dist-200M/snapshots/44e264ffa07dc1cae043e0fd864cab035068bf78/dict.SRC.json",
50
+ "tgt_vocab_file": "/root/.cache/huggingface/hub/models--ai4bharat--indictrans2-indic-en-dist-200M/snapshots/44e264ffa07dc1cae043e0fd864cab035068bf78/dict.SRC.json",
51
+ "tokenizer_class": "IndicTransTokenizer",
52
+ "unk_token": "<unk>"
53
+ }
checkpoint-150/trainer_state.json ADDED
@@ -0,0 +1,130 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 4.0998663902282715,
3
+ "best_model_checkpoint": "kn-en-legal-translation-update/checkpoint-150",
4
+ "epoch": 3.0,
5
+ "eval_steps": 500,
6
+ "global_step": 150,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.4,
13
+ "grad_norm": 14.293238639831543,
14
+ "learning_rate": 1.9703703703703704e-05,
15
+ "loss": 5.9193,
16
+ "step": 20
17
+ },
18
+ {
19
+ "epoch": 0.8,
20
+ "grad_norm": 13.594751358032227,
21
+ "learning_rate": 1.674074074074074e-05,
22
+ "loss": 4.5628,
23
+ "step": 40
24
+ },
25
+ {
26
+ "epoch": 1.0,
27
+ "eval_gen_len": 16.68,
28
+ "eval_loss": 4.330910682678223,
29
+ "eval_rouge1": 13.5399,
30
+ "eval_rouge2": 8.5306,
31
+ "eval_rougeL": 13.1416,
32
+ "eval_rougeLsum": 13.1714,
33
+ "eval_runtime": 30.4105,
34
+ "eval_samples_per_second": 3.288,
35
+ "eval_steps_per_second": 1.644,
36
+ "step": 50
37
+ },
38
+ {
39
+ "epoch": 1.2,
40
+ "grad_norm": 11.074690818786621,
41
+ "learning_rate": 1.377777777777778e-05,
42
+ "loss": 4.1048,
43
+ "step": 60
44
+ },
45
+ {
46
+ "epoch": 1.6,
47
+ "grad_norm": 9.352710723876953,
48
+ "learning_rate": 1.0814814814814816e-05,
49
+ "loss": 3.9522,
50
+ "step": 80
51
+ },
52
+ {
53
+ "epoch": 2.0,
54
+ "grad_norm": 18.36586570739746,
55
+ "learning_rate": 7.851851851851853e-06,
56
+ "loss": 3.8364,
57
+ "step": 100
58
+ },
59
+ {
60
+ "epoch": 2.0,
61
+ "eval_gen_len": 18.11,
62
+ "eval_loss": 4.152203559875488,
63
+ "eval_rouge1": 16.4573,
64
+ "eval_rouge2": 10.4035,
65
+ "eval_rougeL": 16.2952,
66
+ "eval_rougeLsum": 16.3805,
67
+ "eval_runtime": 32.813,
68
+ "eval_samples_per_second": 3.048,
69
+ "eval_steps_per_second": 1.524,
70
+ "step": 100
71
+ },
72
+ {
73
+ "epoch": 2.4,
74
+ "grad_norm": 11.676989555358887,
75
+ "learning_rate": 4.888888888888889e-06,
76
+ "loss": 3.546,
77
+ "step": 120
78
+ },
79
+ {
80
+ "epoch": 2.8,
81
+ "grad_norm": 12.43088150024414,
82
+ "learning_rate": 1.925925925925926e-06,
83
+ "loss": 3.7224,
84
+ "step": 140
85
+ },
86
+ {
87
+ "epoch": 3.0,
88
+ "eval_gen_len": 17.55,
89
+ "eval_loss": 4.0998663902282715,
90
+ "eval_rouge1": 17.7166,
91
+ "eval_rouge2": 12.3004,
92
+ "eval_rougeL": 17.617,
93
+ "eval_rougeLsum": 17.7018,
94
+ "eval_runtime": 32.1665,
95
+ "eval_samples_per_second": 3.109,
96
+ "eval_steps_per_second": 1.554,
97
+ "step": 150
98
+ }
99
+ ],
100
+ "logging_steps": 20,
101
+ "max_steps": 150,
102
+ "num_input_tokens_seen": 0,
103
+ "num_train_epochs": 3,
104
+ "save_steps": 500,
105
+ "stateful_callbacks": {
106
+ "EarlyStoppingCallback": {
107
+ "args": {
108
+ "early_stopping_patience": 5,
109
+ "early_stopping_threshold": 0.01
110
+ },
111
+ "attributes": {
112
+ "early_stopping_patience_counter": 0
113
+ }
114
+ },
115
+ "TrainerControl": {
116
+ "args": {
117
+ "should_epoch_stop": false,
118
+ "should_evaluate": false,
119
+ "should_log": false,
120
+ "should_save": true,
121
+ "should_training_stop": true
122
+ },
123
+ "attributes": {}
124
+ }
125
+ },
126
+ "total_flos": 76471515242496.0,
127
+ "train_batch_size": 1,
128
+ "trial_name": null,
129
+ "trial_params": null
130
+ }
checkpoint-150/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:213d8dbb5afa16fb3b7bfa77de55ea16a5330936898fdb0bb4cf446879c097fe
3
+ size 5496
config.json ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "ai4bharat/indictrans2-indic-en-dist-200M",
3
+ "activation_dropout": 0.0,
4
+ "activation_function": "gelu",
5
+ "architectures": [
6
+ "IndicTransForConditionalGeneration"
7
+ ],
8
+ "attention_dropout": 0.0,
9
+ "attn_implementation": null,
10
+ "auto_map": {
11
+ "AutoConfig": "ai4bharat/indictrans2-indic-en-dist-200M--configuration_indictrans.IndicTransConfig",
12
+ "AutoModelForSeq2SeqLM": "ai4bharat/indictrans2-indic-en-dist-200M--modeling_indictrans.IndicTransForConditionalGeneration"
13
+ },
14
+ "bos_token_id": 0,
15
+ "decoder_attention_heads": 8,
16
+ "decoder_embed_dim": 512,
17
+ "decoder_ffn_dim": 2048,
18
+ "decoder_layerdrop": 0,
19
+ "decoder_layers": 18,
20
+ "decoder_normalize_before": true,
21
+ "decoder_start_token_id": 2,
22
+ "decoder_vocab_size": 32296,
23
+ "dropout": 0.2,
24
+ "encoder_attention_heads": 8,
25
+ "encoder_embed_dim": 512,
26
+ "encoder_ffn_dim": 2048,
27
+ "encoder_layerdrop": 0,
28
+ "encoder_layers": 18,
29
+ "encoder_normalize_before": true,
30
+ "encoder_vocab_size": 122706,
31
+ "eos_token_id": 2,
32
+ "init_std": 0.02,
33
+ "is_encoder_decoder": true,
34
+ "layernorm_embedding": true,
35
+ "max_source_positions": 256,
36
+ "max_target_positions": 256,
37
+ "model_type": "IndicTrans",
38
+ "num_hidden_layers": 18,
39
+ "pad_token_id": 1,
40
+ "scale_embedding": true,
41
+ "share_decoder_input_output_embed": true,
42
+ "tokenizer_class": "IndicTransTokenizer",
43
+ "torch_dtype": "float32",
44
+ "transformers_version": "4.48.0",
45
+ "use_cache": true,
46
+ "vocab_size": 32296
47
+ }
dict.SRC.json ADDED
The diff for this file is too large to render. See raw diff
 
dict.TGT.json ADDED
The diff for this file is too large to render. See raw diff
 
generation_config.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token_id": 0,
3
+ "decoder_start_token_id": 2,
4
+ "eos_token_id": 2,
5
+ "pad_token_id": 1,
6
+ "transformers_version": "4.48.0"
7
+ }
model.SRC ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ac9257c8e76b8b607705b959cc3d075656ea33032f7a974e467b8941df6e98d4
3
+ size 3256903
model.TGT ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3cedc5cbcc740369b76201942a0f096fec7287fee039b55bdb956f301235b914
3
+ size 759425
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:16f46186b661b9c4894a7240df315b02232c3a7e81de9f2c4d01a3f5143a1b97
3
+ size 847211256
runs/Apr20_17-52-24_c76ce57be9eb/events.out.tfevents.1745171551.c76ce57be9eb.3288.0 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6aeef7fb2fa645e2fa2171dade6675da2ff8e3bc0da62c599c0eec5e79956e40
3
- size 4184
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8b5e9ce67481cf119d421c7b96b044e78b0de91cd2c1bfe3fe1c467da677f268
3
+ size 9405
runs/Apr20_17-52-24_c76ce57be9eb/events.out.tfevents.1745171992.c76ce57be9eb.3288.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0cd9d73a9a08c7eee001f84232851d1f202a6daee5d9cdc4f2b1f11aaf5626ba
3
+ size 613
special_tokens_map.json ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<s>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "</s>",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": {
17
+ "content": "<pad>",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
+ "unk_token": {
24
+ "content": "<unk>",
25
+ "lstrip": false,
26
+ "normalized": false,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ }
30
+ }
tokenizer_config.json ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "<s>",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "1": {
12
+ "content": "<pad>",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "2": {
20
+ "content": "</s>",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "3": {
28
+ "content": "<unk>",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ }
35
+ },
36
+ "auto_map": {
37
+ "AutoTokenizer": [
38
+ "ai4bharat/indictrans2-indic-en-dist-200M--tokenization_indictrans.IndicTransTokenizer",
39
+ null
40
+ ]
41
+ },
42
+ "bos_token": "<s>",
43
+ "clean_up_tokenization_spaces": true,
44
+ "do_lower_case": false,
45
+ "eos_token": "</s>",
46
+ "extra_special_tokens": {},
47
+ "model_max_length": 256,
48
+ "pad_token": "<pad>",
49
+ "src_vocab_file": "/root/.cache/huggingface/hub/models--ai4bharat--indictrans2-indic-en-dist-200M/snapshots/44e264ffa07dc1cae043e0fd864cab035068bf78/dict.SRC.json",
50
+ "tgt_vocab_file": "/root/.cache/huggingface/hub/models--ai4bharat--indictrans2-indic-en-dist-200M/snapshots/44e264ffa07dc1cae043e0fd864cab035068bf78/dict.SRC.json",
51
+ "tokenizer_class": "IndicTransTokenizer",
52
+ "unk_token": "<unk>"
53
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:213d8dbb5afa16fb3b7bfa77de55ea16a5330936898fdb0bb4cf446879c097fe
3
+ size 5496
training_params.json ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "data_path": "kn-en-legal-translation-update/autotrain-data",
3
+ "model": "ai4bharat/indictrans2-indic-en-dist-200M",
4
+ "username": "pv1031",
5
+ "seed": 42,
6
+ "train_split": "train",
7
+ "valid_split": "validation",
8
+ "project_name": "kn-en-legal-translation-update",
9
+ "push_to_hub": true,
10
+ "text_column": "autotrain_text",
11
+ "target_column": "autotrain_label",
12
+ "lr": 2e-05,
13
+ "epochs": 3,
14
+ "max_seq_length": 256,
15
+ "max_target_length": 128,
16
+ "batch_size": 1,
17
+ "warmup_ratio": 0.1,
18
+ "gradient_accumulation": 8,
19
+ "optimizer": "adamw_torch",
20
+ "scheduler": "linear",
21
+ "weight_decay": 0.0,
22
+ "max_grad_norm": 1.0,
23
+ "logging_steps": -1,
24
+ "eval_strategy": "epoch",
25
+ "auto_find_batch_size": false,
26
+ "mixed_precision": "fp16",
27
+ "save_total_limit": 1,
28
+ "peft": false,
29
+ "quantization": "int8",
30
+ "lora_r": 16,
31
+ "lora_alpha": 32,
32
+ "lora_dropout": 0.05,
33
+ "target_modules": "all-linear",
34
+ "log": "tensorboard",
35
+ "early_stopping_patience": 5,
36
+ "early_stopping_threshold": 0.01
37
+ }