Spaces:
Sleeping
Sleeping
vocab corpus increased
Browse files
app.py
CHANGED
|
@@ -13,7 +13,7 @@ class DecodeRequest(BaseModel):
|
|
| 13 |
tokens: str
|
| 14 |
|
| 15 |
# Initialize the tokenizer
|
| 16 |
-
tokenizer = BPEGujaratiTokenizer(corpus_path="gu_corpus.txt", max_vocab_size=5000, sample_size=
|
| 17 |
|
| 18 |
app = FastAPI()
|
| 19 |
|
|
|
|
| 13 |
tokens: str
|
| 14 |
|
| 15 |
# Initialize the tokenizer
|
| 16 |
+
tokenizer = BPEGujaratiTokenizer(corpus_path="gu_corpus.txt", max_vocab_size=5000, sample_size=50000)
|
| 17 |
|
| 18 |
app = FastAPI()
|
| 19 |
|