YingxuHe commited on
Commit
cec9fd5
·
verified ·
1 Parent(s): 5cd6cd7

Upload processor

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ tokenizer.json filter=lfs diff=lfs merge=lfs -text
processing_meralion.py CHANGED
@@ -29,7 +29,7 @@ class MERaLiONProcessor(ProcessorMixin):
29
 
30
  attributes = ["feature_extractor", "tokenizer"]
31
  feature_extractor_class = "WhisperFeatureExtractor"
32
- tokenizer_class = "GemmaTokenizer"
33
  valid_kwargs = [
34
  "fixed_speech_embeds_length",
35
  "speech_token_index",
 
29
 
30
  attributes = ["feature_extractor", "tokenizer"]
31
  feature_extractor_class = "WhisperFeatureExtractor"
32
+ tokenizer_class = "AutoTokenizer"
33
  valid_kwargs = [
34
  "fixed_speech_embeds_length",
35
  "speech_token_index",
tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5f7eee611703c5ce5d1eee32d9cdcfe465647b8aff0c1dfb3bed7ad7dbb05060
3
+ size 34362873
tokenizer_config.json CHANGED
@@ -1987,7 +1987,7 @@
1987
  "special": false
1988
  },
1989
  "255999": {
1990
- "content": "<SpeechHere>",
1991
  "lstrip": false,
1992
  "normalized": false,
1993
  "rstrip": false,
 
1987
  "special": false
1988
  },
1989
  "255999": {
1990
+ "content": "<unused99>",
1991
  "lstrip": false,
1992
  "normalized": false,
1993
  "rstrip": false,