Upload GPTOptim
Browse files- model.safetensors +2 -2
- modeling_gpt_optimized.py +3 -2
model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:86048048139b2cae7b486e2da9b4abc53112e9290d190f6d23bec864a1fdfa3b
|
| 3 |
+
size 4040722640
|
modeling_gpt_optimized.py
CHANGED
|
@@ -1,5 +1,6 @@
|
|
| 1 |
import torch
|
| 2 |
import torch.nn as nn
|
|
|
|
| 3 |
from torch.nn import CrossEntropyLoss, functional as F
|
| 4 |
from transformers import PreTrainedModel, GPT2PreTrainedModel
|
| 5 |
from .configuration_gpt_optimized import GPTOptimConfig
|
|
@@ -145,8 +146,8 @@ class GPT(nn.Module):
|
|
| 145 |
self.config = config
|
| 146 |
|
| 147 |
self.transformer = nn.ModuleDict(dict(
|
| 148 |
-
wte = nn.
|
| 149 |
-
wpe = nn.
|
| 150 |
h = nn.ModuleList([Block(config) for _ in range(config.n_layer)]),
|
| 151 |
ln_f = nn.LayerNorm(config.n_embd),
|
| 152 |
))
|
|
|
|
| 1 |
import torch
|
| 2 |
import torch.nn as nn
|
| 3 |
+
import bitsandbytes
|
| 4 |
from torch.nn import CrossEntropyLoss, functional as F
|
| 5 |
from transformers import PreTrainedModel, GPT2PreTrainedModel
|
| 6 |
from .configuration_gpt_optimized import GPTOptimConfig
|
|
|
|
| 146 |
self.config = config
|
| 147 |
|
| 148 |
self.transformer = nn.ModuleDict(dict(
|
| 149 |
+
wte = bitsandbytes.nn.StableEmbedding(config.vocab_size, config.n_embd),
|
| 150 |
+
wpe = bitsandbytes.nn.StableEmbedding(config.block_size, config.n_embd),
|
| 151 |
h = nn.ModuleList([Block(config) for _ in range(config.n_layer)]),
|
| 152 |
ln_f = nn.LayerNorm(config.n_embd),
|
| 153 |
))
|