crpatel commited on
Commit
4656a31
·
1 Parent(s): 46ec2e5

vocab corpus increased

Browse files
Files changed (1) hide show
  1. app.py +1 -1
app.py CHANGED
@@ -13,7 +13,7 @@ class DecodeRequest(BaseModel):
13
  tokens: str
14
 
15
  # Initialize the tokenizer
16
- tokenizer = BPEGujaratiTokenizer(corpus_path="gu_corpus.txt", max_vocab_size=5000, sample_size=300000)
17
 
18
  app = FastAPI()
19
 
 
13
  tokens: str
14
 
15
  # Initialize the tokenizer
16
+ tokenizer = BPEGujaratiTokenizer(corpus_path="gu_corpus.txt", max_vocab_size=5000, sample_size=275000)
17
 
18
  app = FastAPI()
19