Spaces:

flax-community
/

dalle-mini

Running

boris commited on Mar 28, 2022

Commit

69bcbeb

1 Parent(s): a7e5050

feat: placeholders for more config

Files changed (1) hide show

src/dalle_mini/model/configuration.py CHANGED Viewed

@@ -65,6 +65,8 @@ class DalleBartConfig(PretrainedFromWandbMixin, PretrainedConfig):
         tau_init=0.05,  # used only in cosine attention (Swin v2)
         use_deepnet_scaling=False,  # used in Deepnet
         use_glu=False,  # "GLU Variants Improve Transformer"
         # parameters that should not be necessary but could affect results
         force_ln_scale=True,  # force scale in layernorm even when followed by dense layers
         force_final_ln_encoder=False,  # force layer normalization in encoder final layer even when followed by dense layers
@@ -88,11 +90,14 @@ class DalleBartConfig(PretrainedFromWandbMixin, PretrainedConfig):
         ], "ln_positions must be 'normformer', 'swinv2' or 'deepnet'"
         if ln_positions == "deepnet":
             ln_positions = "postln"
         self.ln_positions = ln_positions
         self.use_cosine_attention = use_cosine_attention
         self.tau_init = tau_init
         self.use_deepnet_scaling = use_deepnet_scaling
         self.use_glu = use_glu
         self.force_ln_scale = force_ln_scale
         self.force_final_ln_encoder = force_final_ln_encoder

         tau_init=0.05,  # used only in cosine attention (Swin v2)
         use_deepnet_scaling=False,  # used in Deepnet
         use_glu=False,  # "GLU Variants Improve Transformer"
+        use_alibi=False,  # from "Train Short, Test Long: Attention with Linear Biases Enables Input Length Extrapolation"
+        sink_iters=1,  # used in SinkFormers
         # parameters that should not be necessary but could affect results
         force_ln_scale=True,  # force scale in layernorm even when followed by dense layers
         force_final_ln_encoder=False,  # force layer normalization in encoder final layer even when followed by dense layers
         ], "ln_positions must be 'normformer', 'swinv2' or 'deepnet'"
         if ln_positions == "deepnet":
             ln_positions = "postln"
+        assert use_alibi is False, "use_alibi is not supported yet"
         self.ln_positions = ln_positions
         self.use_cosine_attention = use_cosine_attention
         self.tau_init = tau_init
         self.use_deepnet_scaling = use_deepnet_scaling
         self.use_glu = use_glu
+        self.use_alibi = use_alibi
+        self.sink_iters = sink_iters
         self.force_ln_scale = force_ln_scale
         self.force_final_ln_encoder = force_final_ln_encoder