jramompichel commited on
Commit
0c59668
verified
1 Parent(s): 7aafaff

Upload 2 files

Browse files
Files changed (2) hide show
  1. en-gl.yaml +110 -0
  2. nos-coda_iacobus-en-gl.pt +3 -0
en-gl.yaml ADDED
@@ -0,0 +1,110 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ save_data: /mnt/netapp1/Proxecto_NOS/mt/corpus/iacobus/en-gl/aut/models
2
+ ## Where the vocab(s) will be written
3
+ src_vocab: /mnt/netapp1/Proxecto_NOS/mt/corpus/iacobus/en-gl/aut/models/run/vocab/bpe.vocab.src
4
+ tgt_vocab: /mnt/netapp1/Proxecto_NOS/mt/corpus/iacobus/en-gl/aut/models/run/vocab/bpe.vocab.tgt
5
+ overwrite: True
6
+
7
+ # Corpus opts:
8
+
9
+ data:
10
+ en-gl:
11
+ path_src: /mnt/netapp1/Proxecto_NOS/mt/corpus/iacobus/en-gl/aut/train.en35k.txt
12
+ path_tgt: /mnt/netapp1/Proxecto_NOS/mt/corpus/iacobus/en-gl/aut/train.gl35k.txt
13
+ transforms: [bpe, filtertoolong]
14
+
15
+ valid:
16
+ path_src: /mnt/netapp1/Proxecto_NOS/mt/corpus/iacobus/en-gl/aut/valid.en35k.txt
17
+ path_tgt: /mnt/netapp1/Proxecto_NOS/mt/corpus/iacobus/en-gl/aut/valid.gl35k.txt
18
+ transforms: [bpe, filtertoolong]
19
+
20
+ ### Transform related opts:
21
+
22
+ #### Subword
23
+ src_subword_model: /mnt/netapp1/Proxecto_NOS/mt/corpus/iacobus/en-gl/aut/en_35k.code
24
+ tgt_subword_model: /mnt/netapp1/Proxecto_NOS/mt/corpus/iacobus/en-gl/aut/gl_35k.code
25
+ src_subword_vocab: /mnt/netapp1/Proxecto_NOS/mt/corpus/iacobus/en-gl/aut/models/run/vocab/bpe.vocab.src
26
+ tgt_subword_vocab: /mnt/netapp1/Proxecto_NOS/mt/corpus/iacobus/en-gl/aut/models/run/vocab/bpe.vocab.tgt
27
+ #src_subword_model: ../sentencepiece/en-gl/en.sp.model
28
+ #tgt_subword_model: ../sentencepiece/en-gl/gl.sp.model
29
+ src_subword_type: bpe
30
+ tgt_subword_type: bpe
31
+
32
+ src_subword_nbest: 1
33
+ src_subword_alpha: 0.0
34
+ tgt_subword_nbest: 1
35
+ tgt_subword_alpha: 0.0
36
+
37
+ ##embeddings
38
+ src_embeddings: /mnt/netapp1/Proxecto_NOS/mt/treino_data/embeddings/en.emb.txt
39
+ tgt_embeddings: /mnt/netapp1/Proxecto_NOS/mt/treino_data/embeddings/gl.emb.txt
40
+
41
+ ## supported types: GloVe, word2vec
42
+ embeddings_type: "word2vec"
43
+
44
+ # word_vec_size need to match with the pretrained embeddings dimensions
45
+ word_vec_size: 300
46
+
47
+ #### Filter
48
+ src_seq_length: 150
49
+ tgt_seq_length: 150
50
+
51
+ # silently ignore empty lines in the data
52
+ skip_empty_level: silent
53
+
54
+ # General opts
55
+ # Update the path to the most recent checkpoint available
56
+ #opt.train_from: /home/compartido/ramom/es-gl/models/_step_260000.pt
57
+ #train_from: /home/compartido/ramom/es-gl/models/_step_250000.pt
58
+ save_model: /mnt/netapp1/Proxecto_NOS/mt/corpus/iacobus/en-gl/aut/models/
59
+ keep_checkpoint: 50
60
+ save_checkpoint_steps: 10000
61
+ average_decay: 0.0005
62
+ seed: 1234
63
+ report_every: 1000
64
+ train_steps: 400000
65
+ valid_steps: 10000
66
+
67
+ # Batching
68
+ queue_size: 10000
69
+ bucket_size: 32768
70
+ world_size: 1
71
+ gpu_ranks: [0]
72
+ batch_type: "tokens"
73
+ #batch_size: 1500
74
+ batch_size: 4096
75
+ valid_batch_size: 64
76
+ batch_size_multiple: 1
77
+ max_generator_batches: 2
78
+ accum_count: [4]
79
+ accum_steps: [0]
80
+
81
+ # Optimization
82
+ model_dtype: "fp32"
83
+ optim: "adam"
84
+ learning_rate: 2
85
+ #learning_rate: 0.00005
86
+ warmup_steps: 8000
87
+ decay_method: "noam"
88
+ adam_beta2: 0.998
89
+ max_grad_norm: 0
90
+ label_smoothing: 0.1
91
+ param_init: 0
92
+ param_init_glorot: true
93
+ normalization: "tokens"
94
+
95
+ # Model
96
+ encoder_type: transformer
97
+ decoder_type: transformer
98
+ position_encoding: true
99
+ enc_layers: 6
100
+ dec_layers: 6
101
+ heads: 8
102
+ #rnn_size: 512
103
+ hidden_size: 512
104
+ word_vec_size: 512
105
+ transformer_ff: 2048
106
+ dropout_steps: [0]
107
+ dropout: [0.1]
108
+ attention_dropout: [0.1]
109
+ share_decoder_embeddings: true
110
+ share_embeddings: false
nos-coda_iacobus-en-gl.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ba536ab744941a15ba8f1ea9b1d8d7ee1785bdad463def5130baadd763ccbcb9
3
+ size 864172277