mrprimenotes commited on
Commit
86affba
·
verified ·
1 Parent(s): bb60daa

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +80 -28
README.md CHANGED
@@ -42,47 +42,103 @@ TBD
42
  ### Training data
43
  TBD
44
 
45
- ### Training process
46
- TBD
47
-
48
- ### How to use
49
  ```python
50
  import torch
51
- from transformers import WhisperForConditionalGeneration, AutoProcessor, AutoTokenizer, TextStreamer
52
- from datasets import load_dataset
53
 
54
  device = "cuda:0" if torch.cuda.is_available() else "cpu"
55
  torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
56
 
57
- # Load model and processor
58
- model = WhisperForConditionalGeneration.from_pretrained(
 
 
59
  "mrprimenotes/sign-whisper-german",
60
- torch_dtype=torch_dtype,
61
- low_cpu_mem_usage=True,
62
- use_safetensors=True
63
- ).to(device)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
64
 
65
- # Load the tokenizer for the model (for decoding)
66
  tokenizer = AutoTokenizer.from_pretrained("mrprimenotes/sign-whisper-german")
67
 
68
- # input preprocessing / feature extraction (TBD)
69
- # input_features = ...
70
- ```
 
 
71
 
72
- #### Use raw model for inference
73
- ```python
74
- output = model(input_features, labels=generated_ids)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
75
 
76
- # e.g. output.loss
77
- # output.shape --> b x sq
 
 
 
 
 
 
78
 
79
- tokenizer.batch_decode(generated_ids, skip_special_tokens=False)
 
80
  ```
81
 
82
- ### Use model with generate (work in progress...)
83
  ```python
 
 
84
  streamer = TextStreamer(tokenizer, skip_special_tokens=False) #only needed for streaming
85
 
 
 
 
86
  # Generate
87
  generated_ids = model.generate(
88
  input_features,
@@ -92,8 +148,4 @@ generated_ids = model.generate(
92
  )
93
 
94
  tokenizer.batch_decode(generated_ids, skip_special_tokens=False)
95
- ```
96
-
97
- ### Training
98
-
99
- When changing the configuration of the preprocessing convolution layers make sure the last output has the shape b x 1280 x seq. See custom config in model.py for configuration options.
 
42
  ### Training data
43
  TBD
44
 
45
+ #### Training process
 
 
 
46
  ```python
47
  import torch
48
+ from transformers import WhisperForConditionalGeneration, AutoProcessor, AutoTokenizer, AutoConfig
 
49
 
50
  device = "cuda:0" if torch.cuda.is_available() else "cpu"
51
  torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
52
 
53
+ # When changing the configuration of the preprocessing convolution layers make sure their final output has the shape b x 1280 x seq.
54
+ # See custom config in model.py for configuration options.
55
+
56
+ config = AutoConfig.from_pretrained(
57
  "mrprimenotes/sign-whisper-german",
58
+ use_first_embeddings=True,
59
+ embedding_stride=2,
60
+ conv_dropout=0.1,
61
+ skip_connections=True,
62
+ conv_preprocessing_layers=[
63
+ {
64
+ "in_channels": 80,
65
+ "out_channels": 384,
66
+ "kernel_size": 5,
67
+ "padding": 2,
68
+ "activation": "gelu"
69
+ },
70
+ {
71
+ "in_channels": 384,
72
+ "out_channels": 384,
73
+ "kernel_size": 3,
74
+ "stride": 2,
75
+ "padding": 1,
76
+ "activation": "gelu"
77
+ }
78
+ ]
79
+ )
80
 
 
81
  tokenizer = AutoTokenizer.from_pretrained("mrprimenotes/sign-whisper-german")
82
 
83
+ # raw model outputs:
84
+ # output = model(input_features, labels=labels)
85
+ # e.g.
86
+ # output.loss
87
+ # output.shape --> b x sq
88
 
89
+ train_dataset = YourSignDataset(...)
90
+ val_dataset = YourSignDataset(...)
91
+
92
+ # Define training arguments
93
+ training_args = TrainingArguments(
94
+ output_dir="./sign-whisper-german",
95
+ num_train_epochs=3,
96
+ per_device_train_batch_size=1024,
97
+ per_device_eval_batch_size=256,
98
+ warmup_steps=500,
99
+ weight_decay=0.01,
100
+
101
+ # Logging settings
102
+ logging_dir="./logs",
103
+ logging_steps=50,
104
+ logging_strategy="steps",
105
+
106
+ # Evaluation
107
+ evaluation_strategy="steps",
108
+ eval_steps=100,
109
+
110
+ # Saving
111
+ save_strategy="steps",
112
+ save_steps=100,
113
+ save_total_limit=5,
114
+ resume_from_checkpoint=True,
115
+
116
+ load_best_model_at_end=True,
117
+ fp16=torch.cuda.is_available(),
118
+ )
119
 
120
+ # Initialize trainer with tokenizer
121
+ trainer = Trainer(
122
+ model=model,
123
+ args=training_args,
124
+ train_dataset=train_dataset,
125
+ eval_dataset=val_dataset,
126
+ tokenizer=tokenizer,
127
+ )
128
 
129
+ # Train the model
130
+ trainer.train()
131
  ```
132
 
133
+ ### Use model for inference (with generate)
134
  ```python
135
+ from transformers import TextStreamer
136
+
137
  streamer = TextStreamer(tokenizer, skip_special_tokens=False) #only needed for streaming
138
 
139
+ # input preprocessing / feature extraction (TBD)
140
+ # input_features = ...
141
+
142
  # Generate
143
  generated_ids = model.generate(
144
  input_features,
 
148
  )
149
 
150
  tokenizer.batch_decode(generated_ids, skip_special_tokens=False)
151
+ ```