Spaces:
Sleeping
Sleeping
vocab corpus increased
Browse files
app.py
CHANGED
@@ -13,7 +13,7 @@ class DecodeRequest(BaseModel):
|
|
13 |
tokens: str
|
14 |
|
15 |
# Initialize the tokenizer
|
16 |
-
tokenizer = BPEGujaratiTokenizer(corpus_path="gu_corpus.txt", max_vocab_size=5000, sample_size=
|
17 |
|
18 |
app = FastAPI()
|
19 |
|
|
|
13 |
tokens: str
|
14 |
|
15 |
# Initialize the tokenizer
|
16 |
+
tokenizer = BPEGujaratiTokenizer(corpus_path="gu_corpus.txt", max_vocab_size=5000, sample_size=275000)
|
17 |
|
18 |
app = FastAPI()
|
19 |
|