opencampus
/

sign-whisper-german

Model card Files Files and versions Community

mrprimenotes commited on Jan 5

Commit

3b51d88

·

verified ·

1 Parent(s): 78e5ba3

Update model.py

Files changed (1) hide show

model.py +33 -25

model.py CHANGED Viewed

@@ -6,12 +6,21 @@ import types
 class ConvLayerConfig:
     """Configuration for a single convolutional layer"""
-    in_channels: int
-    out_channels: int
-    kernel_size: int
-    stride: int = 1
-    padding: int = 0
-    activation: Literal["gelu", "relu", "none"] = "gelu"
 class CustomWhisperConfig(WhisperConfig):
     def __init__(
@@ -28,30 +37,29 @@ class CustomWhisperConfig(WhisperConfig):
     ):
         super().__init__(**kwargs)
-        # Original custom parameters
         self.use_first_embeddings = use_first_embeddings
         self.embedding_stride = embedding_stride
         self.slide_feature_dim = slide_feature_dim
-        # New convolutional layer customization parameters
-        self.conv_preprocessing_layers = conv_preprocessing_layers or [
-            # Default Whisper conv layers configuration
-            ConvLayerConfig(
-                in_channels=self.num_mel_bins,
-                out_channels=self.d_model,
-                kernel_size=3,
-                padding=1
-            ),
-            ConvLayerConfig(
-                in_channels=self.d_model,
-                out_channels=self.d_model,
-                kernel_size=3,
-                stride=2,
-                padding=1
-            )
-        ]
-        # Additional conv layer parameters
         self.conv_dropout = conv_dropout
         self.conv_bias = conv_bias
         self.conv_activation = conv_activation

 class ConvLayerConfig:
     """Configuration for a single convolutional layer"""
+    def __init__(
+        self,
+        in_channels: int,
+        out_channels: int,
+        kernel_size: int,
+        stride: int = 1,
+        padding: int = 0,
+        activation: Literal["gelu", "relu", "none"] = "gelu"
+    ):
+        self.in_channels = in_channels
+        self.out_channels = out_channels
+        self.kernel_size = kernel_size
+        self.stride = stride
+        self.padding = padding
+        self.activation = activation
 class CustomWhisperConfig(WhisperConfig):
     def __init__(
     ):
         super().__init__(**kwargs)
         self.use_first_embeddings = use_first_embeddings
         self.embedding_stride = embedding_stride
         self.slide_feature_dim = slide_feature_dim
+        if conv_preprocessing_layers is None:
+            conv_preprocessing_layers = [
+                ConvLayerConfig(
+                    in_channels=self.num_mel_bins,
+                    out_channels=self.d_model,
+                    kernel_size=3,
+                    padding=1
+                ),
+                ConvLayerConfig(
+                    in_channels=self.d_model,
+                    out_channels=self.d_model,
+                    kernel_size=3,
+                    stride=2,
+                    padding=1
+                )
+            ]
+        self.conv_preprocessing_layers = conv_preprocessing_layers
         self.conv_dropout = conv_dropout
         self.conv_bias = conv_bias
         self.conv_activation = conv_activation