Commit
·
eea3ce3
1
Parent(s):
9c144a7
Upload 5 files
Browse files- .gitattributes +1 -0
- eng-por.txt +3 -0
- english_vocabulary.txt +0 -0
- keras_transformer_blocks.py +198 -0
- portuguese_vocabulary.txt +0 -0
- transformer_eng_por.h5 +3 -0
.gitattributes
CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
+
eng-por.txt filter=lfs diff=lfs merge=lfs -text
|
eng-por.txt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5650439f87f33bc1278d4e29e0e9ed84bec84c60dacde83ccf5526d549932fe3
|
3 |
+
size 24609459
|
english_vocabulary.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|
keras_transformer_blocks.py
ADDED
@@ -0,0 +1,198 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import tensorflow as tf
|
2 |
+
from tensorflow import keras
|
3 |
+
from keras import layers
|
4 |
+
|
5 |
+
class TransformerEncoder(layers.Layer):
|
6 |
+
"""
|
7 |
+
The TransformerEncoder class is a custom Keras layer that implements a
|
8 |
+
single transformer encoder block. The transformer encoder block consists
|
9 |
+
of a multi-head self-attention layer followed by a feedforward neural
|
10 |
+
network with a residual connection and layer normalization applied at
|
11 |
+
the input and output of each sub-layer.
|
12 |
+
|
13 |
+
The class takes in the following arguments:
|
14 |
+
|
15 |
+
embed_dim: an integer specifying the dimensionality of the embedding space.
|
16 |
+
dense_dim: an integer specifying the number of units in the feedforward neural network.
|
17 |
+
num_heads: an integer specifying the number of attention heads to use.
|
18 |
+
|
19 |
+
The call method is the main computation performed by the layer. It takes
|
20 |
+
in an input tensor and an optional mask tensor indicating which inputs to
|
21 |
+
consider in the attention calculation. It returns the output tensor of the
|
22 |
+
transformer encoder block.
|
23 |
+
|
24 |
+
The get_config method returns a dictionary of configuration information for
|
25 |
+
the layer, including the embed_dim, num_heads, and dense_dim parameters.
|
26 |
+
"""
|
27 |
+
def __init__(self, embed_dim, dense_dim, num_heads, **kwargs):
|
28 |
+
super().__init__(**kwargs)
|
29 |
+
self.embed_dim = embed_dim
|
30 |
+
self.dense_dim = dense_dim
|
31 |
+
self.num_heads = num_heads
|
32 |
+
self.attention = layers.MultiHeadAttention(
|
33 |
+
num_heads=num_heads, key_dim=embed_dim)
|
34 |
+
self.dense_proj = keras.Sequential(
|
35 |
+
[layers.Dense(dense_dim, activation="relu"),
|
36 |
+
layers.Dense(embed_dim),]
|
37 |
+
)
|
38 |
+
self.layernorm_1 = layers.LayerNormalization()
|
39 |
+
self.layernorm_2 = layers.LayerNormalization()
|
40 |
+
|
41 |
+
def call(self, inputs, mask=None):
|
42 |
+
if mask is not None:
|
43 |
+
mask = mask[:, tf.newaxis, :]
|
44 |
+
attention_output = self.attention(
|
45 |
+
inputs, inputs, attention_mask=mask)
|
46 |
+
proj_input = self.layernorm_1(inputs + attention_output)
|
47 |
+
proj_output = self.dense_proj(proj_input)
|
48 |
+
return self.layernorm_2(proj_input + proj_output)
|
49 |
+
|
50 |
+
def get_config(self):
|
51 |
+
config = super().get_config()
|
52 |
+
config.update({
|
53 |
+
"embed_dim": self.embed_dim,
|
54 |
+
"num_heads": self.num_heads,
|
55 |
+
"dense_dim": self.dense_dim,
|
56 |
+
})
|
57 |
+
return config
|
58 |
+
|
59 |
+
class TransformerDecoder(layers.Layer):
|
60 |
+
"""
|
61 |
+
A Transformer decoder layer that attends over the input
|
62 |
+
sequence and the encoder outputs.
|
63 |
+
|
64 |
+
Args:
|
65 |
+
embed_dim (int): Dimension of the input embeddings.
|
66 |
+
dense_dim (int): Dimension of the dense layer in the feedforward sublayer.
|
67 |
+
num_heads (int): Number of attention heads in each multi-head attention layer.
|
68 |
+
|
69 |
+
Attributes:
|
70 |
+
attention_1 (MultiHeadAttention): First multi-head attention layer.
|
71 |
+
attention_2 (MultiHeadAttention): Second multi-head attention layer.
|
72 |
+
dense_proj (Sequential): Feedforward sublayer consisting of two dense layers.
|
73 |
+
layernorm_1 (LayerNormalization): Layer normalization layer
|
74 |
+
after the first attention layer.
|
75 |
+
layernorm_2 (LayerNormalization): Layer normalization layer
|
76 |
+
after the second attention layer.
|
77 |
+
layernorm_3 (LayerNormalization): Layer normalization layer
|
78 |
+
after the feedforward sublayer.
|
79 |
+
supports_masking (bool): Whether the layer supports masking.
|
80 |
+
|
81 |
+
Methods:
|
82 |
+
get_config(): Returns a dictionary with the configuration of the layer.
|
83 |
+
get_causal_attention_mask(inputs): Returns a 3D tensor with a
|
84 |
+
causal mask for the given input sequence.
|
85 |
+
call(inputs, encoder_outputs, mask=None): Computes the output of
|
86 |
+
the layer for the given inputs and encoder outputs.
|
87 |
+
"""
|
88 |
+
def __init__(self, embed_dim, dense_dim, num_heads, **kwargs):
|
89 |
+
super().__init__(**kwargs)
|
90 |
+
self.embed_dim = embed_dim
|
91 |
+
self.dense_dim = dense_dim
|
92 |
+
self.num_heads = num_heads
|
93 |
+
self.attention_1 = layers.MultiHeadAttention(
|
94 |
+
num_heads=num_heads, key_dim=embed_dim)
|
95 |
+
self.attention_2 = layers.MultiHeadAttention(
|
96 |
+
num_heads=num_heads, key_dim=embed_dim)
|
97 |
+
self.dense_proj = keras.Sequential(
|
98 |
+
[layers.Dense(dense_dim, activation="relu"),
|
99 |
+
layers.Dense(embed_dim),]
|
100 |
+
)
|
101 |
+
self.layernorm_1 = layers.LayerNormalization()
|
102 |
+
self.layernorm_2 = layers.LayerNormalization()
|
103 |
+
self.layernorm_3 = layers.LayerNormalization()
|
104 |
+
self.supports_masking = True
|
105 |
+
|
106 |
+
def get_config(self):
|
107 |
+
config = super().get_config()
|
108 |
+
config.update({
|
109 |
+
"embed_dim": self.embed_dim,
|
110 |
+
"num_heads": self.num_heads,
|
111 |
+
"dense_dim": self.dense_dim,
|
112 |
+
})
|
113 |
+
return config
|
114 |
+
|
115 |
+
def get_causal_attention_mask(self, inputs):
|
116 |
+
input_shape = tf.shape(inputs)
|
117 |
+
batch_size, sequence_length = input_shape[0], input_shape[1]
|
118 |
+
i = tf.range(sequence_length)[:, tf.newaxis]
|
119 |
+
j = tf.range(sequence_length)
|
120 |
+
mask = tf.cast(i >= j, dtype="int32")
|
121 |
+
mask = tf.reshape(mask, (1, input_shape[1], input_shape[1]))
|
122 |
+
mult = tf.concat(
|
123 |
+
[tf.expand_dims(batch_size, -1),
|
124 |
+
tf.constant([1, 1], dtype=tf.int32)], axis=0)
|
125 |
+
return tf.tile(mask, mult)
|
126 |
+
|
127 |
+
def call(self, inputs, encoder_outputs, mask=None):
|
128 |
+
causal_mask = self.get_causal_attention_mask(inputs)
|
129 |
+
if mask is not None:
|
130 |
+
padding_mask = tf.cast(
|
131 |
+
mask[:, tf.newaxis, :], dtype="int32")
|
132 |
+
padding_mask = tf.minimum(padding_mask, causal_mask)
|
133 |
+
attention_output_1 = self.attention_1(
|
134 |
+
query=inputs,
|
135 |
+
value=inputs,
|
136 |
+
key=inputs,
|
137 |
+
attention_mask=causal_mask)
|
138 |
+
attention_output_1 = self.layernorm_1(inputs + attention_output_1)
|
139 |
+
attention_output_2 = self.attention_2(
|
140 |
+
query=attention_output_1,
|
141 |
+
value=encoder_outputs,
|
142 |
+
key=encoder_outputs,
|
143 |
+
attention_mask=padding_mask,
|
144 |
+
)
|
145 |
+
attention_output_2 = self.layernorm_2(
|
146 |
+
attention_output_1 + attention_output_2)
|
147 |
+
proj_output = self.dense_proj(attention_output_2)
|
148 |
+
return self.layernorm_3(attention_output_2 + proj_output)
|
149 |
+
|
150 |
+
class PositionalEmbedding(layers.Layer):
|
151 |
+
"""
|
152 |
+
The PositionalEmbedding layer class is used to create an embedding layer that
|
153 |
+
combines both token embeddings and positional embeddings for input sequences.
|
154 |
+
|
155 |
+
The class takes in the following arguments:
|
156 |
+
|
157 |
+
sequence_length: An integer representing the maximum length of the input sequence.
|
158 |
+
input_dim: An integer representing the size of the input vocabulary.
|
159 |
+
output_dim: An integer representing the size of the embedding vectors.
|
160 |
+
|
161 |
+
The call(self, inputs) method that takes input tensor as an argument and
|
162 |
+
returns the embedded tensor after adding the token embeddings and positional
|
163 |
+
embeddings. It also computes the positions for the input sequence.
|
164 |
+
|
165 |
+
The compute_mask(self, inputs, mask=None) method that returns a mask tensor
|
166 |
+
computed based on the input tensor.
|
167 |
+
|
168 |
+
The get_config(self): Method that returns a dictionary containing the configuration
|
169 |
+
of the layer.
|
170 |
+
"""
|
171 |
+
def __init__(self, sequence_length, input_dim, output_dim, **kwargs):
|
172 |
+
super().__init__(**kwargs)
|
173 |
+
self.token_embeddings = layers.Embedding(
|
174 |
+
input_dim=input_dim, output_dim=output_dim)
|
175 |
+
self.position_embeddings = layers.Embedding(
|
176 |
+
input_dim=sequence_length, output_dim=output_dim)
|
177 |
+
self.sequence_length = sequence_length
|
178 |
+
self.input_dim = input_dim
|
179 |
+
self.output_dim = output_dim
|
180 |
+
|
181 |
+
def call(self, inputs):
|
182 |
+
length = tf.shape(inputs)[-1]
|
183 |
+
positions = tf.range(start=0, limit=length, delta=1)
|
184 |
+
embedded_tokens = self.token_embeddings(inputs)
|
185 |
+
embedded_positions = self.position_embeddings(positions)
|
186 |
+
return embedded_tokens + embedded_positions
|
187 |
+
|
188 |
+
def compute_mask(self, inputs, mask=None):
|
189 |
+
return tf.math.not_equal(inputs, 0)
|
190 |
+
|
191 |
+
def get_config(self):
|
192 |
+
config = super(PositionalEmbedding, self).get_config()
|
193 |
+
config.update({
|
194 |
+
"output_dim": self.output_dim,
|
195 |
+
"sequence_length": self.sequence_length,
|
196 |
+
"input_dim": self.input_dim,
|
197 |
+
})
|
198 |
+
return config
|
portuguese_vocabulary.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|
transformer_eng_por.h5
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ab8bba1f36f70f1d5bf80c722d06ee22747642c9bcd6d3ee4ab753b3108d85eb
|
3 |
+
size 190618344
|