crpatel commited on
Commit
56a0cfd
·
1 Parent(s): 4656a31

vocab corpus increased

Browse files
Files changed (1) hide show
  1. app.py +1 -1
app.py CHANGED
@@ -13,7 +13,7 @@ class DecodeRequest(BaseModel):
13
  tokens: str
14
 
15
  # Initialize the tokenizer
16
- tokenizer = BPEGujaratiTokenizer(corpus_path="gu_corpus.txt", max_vocab_size=5000, sample_size=275000)
17
 
18
  app = FastAPI()
19
 
 
13
  tokens: str
14
 
15
  # Initialize the tokenizer
16
+ tokenizer = BPEGujaratiTokenizer(corpus_path="gu_corpus.txt", max_vocab_size=5000, sample_size=50000)
17
 
18
  app = FastAPI()
19