Spaces:
Running
Running
feat: placeholders for more config
Browse files
src/dalle_mini/model/configuration.py
CHANGED
|
@@ -65,6 +65,8 @@ class DalleBartConfig(PretrainedFromWandbMixin, PretrainedConfig):
|
|
| 65 |
tau_init=0.05, # used only in cosine attention (Swin v2)
|
| 66 |
use_deepnet_scaling=False, # used in Deepnet
|
| 67 |
use_glu=False, # "GLU Variants Improve Transformer"
|
|
|
|
|
|
|
| 68 |
# parameters that should not be necessary but could affect results
|
| 69 |
force_ln_scale=True, # force scale in layernorm even when followed by dense layers
|
| 70 |
force_final_ln_encoder=False, # force layer normalization in encoder final layer even when followed by dense layers
|
|
@@ -88,11 +90,14 @@ class DalleBartConfig(PretrainedFromWandbMixin, PretrainedConfig):
|
|
| 88 |
], "ln_positions must be 'normformer', 'swinv2' or 'deepnet'"
|
| 89 |
if ln_positions == "deepnet":
|
| 90 |
ln_positions = "postln"
|
|
|
|
| 91 |
self.ln_positions = ln_positions
|
| 92 |
self.use_cosine_attention = use_cosine_attention
|
| 93 |
self.tau_init = tau_init
|
| 94 |
self.use_deepnet_scaling = use_deepnet_scaling
|
| 95 |
self.use_glu = use_glu
|
|
|
|
|
|
|
| 96 |
self.force_ln_scale = force_ln_scale
|
| 97 |
self.force_final_ln_encoder = force_final_ln_encoder
|
| 98 |
|
|
|
|
| 65 |
tau_init=0.05, # used only in cosine attention (Swin v2)
|
| 66 |
use_deepnet_scaling=False, # used in Deepnet
|
| 67 |
use_glu=False, # "GLU Variants Improve Transformer"
|
| 68 |
+
use_alibi=False, # from "Train Short, Test Long: Attention with Linear Biases Enables Input Length Extrapolation"
|
| 69 |
+
sink_iters=1, # used in SinkFormers
|
| 70 |
# parameters that should not be necessary but could affect results
|
| 71 |
force_ln_scale=True, # force scale in layernorm even when followed by dense layers
|
| 72 |
force_final_ln_encoder=False, # force layer normalization in encoder final layer even when followed by dense layers
|
|
|
|
| 90 |
], "ln_positions must be 'normformer', 'swinv2' or 'deepnet'"
|
| 91 |
if ln_positions == "deepnet":
|
| 92 |
ln_positions = "postln"
|
| 93 |
+
assert use_alibi is False, "use_alibi is not supported yet"
|
| 94 |
self.ln_positions = ln_positions
|
| 95 |
self.use_cosine_attention = use_cosine_attention
|
| 96 |
self.tau_init = tau_init
|
| 97 |
self.use_deepnet_scaling = use_deepnet_scaling
|
| 98 |
self.use_glu = use_glu
|
| 99 |
+
self.use_alibi = use_alibi
|
| 100 |
+
self.sink_iters = sink_iters
|
| 101 |
self.force_ln_scale = force_ln_scale
|
| 102 |
self.force_final_ln_encoder = force_final_ln_encoder
|
| 103 |
|