Upload CubeLM
Browse files- CubeConfig.py +4 -10
- CubeLM.py +3 -2
CubeConfig.py
CHANGED
@@ -1,8 +1,4 @@
|
|
1 |
-
#from transformers import PretrainedConfig
|
2 |
from transformers import GPT2Config
|
3 |
-
from cubeLM.tokenizer import vocab
|
4 |
-
|
5 |
-
vocab_size = len(vocab)
|
6 |
|
7 |
|
8 |
class CubeConfig(GPT2Config):
|
@@ -10,10 +6,10 @@ class CubeConfig(GPT2Config):
|
|
10 |
|
11 |
def __init__(
|
12 |
self,
|
13 |
-
vocab_size=
|
14 |
-
bos_token_id=
|
15 |
-
eos_token_id=
|
16 |
-
pad_token_id=
|
17 |
n_positions=40,
|
18 |
n_embd=512,
|
19 |
n_layer=8,
|
@@ -29,5 +25,3 @@ class CubeConfig(GPT2Config):
|
|
29 |
self.bos_token_id = bos_token_id
|
30 |
self.eos_token_id = eos_token_id
|
31 |
self.pad_token_id = pad_token_id
|
32 |
-
|
33 |
-
|
|
|
|
|
1 |
from transformers import GPT2Config
|
|
|
|
|
|
|
2 |
|
3 |
|
4 |
class CubeConfig(GPT2Config):
|
|
|
6 |
|
7 |
def __init__(
|
8 |
self,
|
9 |
+
vocab_size=16,
|
10 |
+
bos_token_id=15,
|
11 |
+
eos_token_id=15,
|
12 |
+
pad_token_id=15,
|
13 |
n_positions=40,
|
14 |
n_embd=512,
|
15 |
n_layer=8,
|
|
|
25 |
self.bos_token_id = bos_token_id
|
26 |
self.eos_token_id = eos_token_id
|
27 |
self.pad_token_id = pad_token_id
|
|
|
|
CubeLM.py
CHANGED
@@ -7,10 +7,11 @@ from transformers import (
|
|
7 |
GPT2Model,
|
8 |
GenerationMixin,
|
9 |
GPT2PreTrainedModel,
|
10 |
-
PreTrainedModel
|
11 |
)
|
12 |
from transformers.utils import ModelOutput
|
13 |
-
|
|
|
14 |
|
15 |
|
16 |
@dataclass
|
|
|
7 |
GPT2Model,
|
8 |
GenerationMixin,
|
9 |
GPT2PreTrainedModel,
|
10 |
+
PreTrainedModel,
|
11 |
)
|
12 |
from transformers.utils import ModelOutput
|
13 |
+
|
14 |
+
IGNORE_INDEX = -100
|
15 |
|
16 |
|
17 |
@dataclass
|