Spaces:
Runtime error
Runtime error
| import sentencepiece as spm | |
| import json | |
| # Load SentencePiece model directly | |
| sp = spm.SentencePieceProcessor() | |
| sp.Load("/Users/apple/Desktop/indictrans2/IndicTrans2/huggingface_interface/IndicTransToolkit/tokenizer_training/my_tokenizer/custom_tokenizer.model") | |
| # Extract vocab | |
| vocab = {} | |
| for i in range(sp.GetPieceSize()): | |
| piece = sp.IdToPiece(i) | |
| vocab[piece] = i | |
| print(f"Total vocab size: {len(vocab)}") | |
| # Save vocab as vocab.json | |
| vocab_save_path = "/Users/apple/Desktop/indictrans2/IndicTrans2/huggingface_interface/IndicTransToolkit/tokenizer_training/my_tokenizer/vocab.json" | |
| with open(vocab_save_path, 'w', encoding='utf-8') as f: | |
| json.dump(vocab, f, ensure_ascii=False, indent=4) | |
| print(f"Vocab file saved at: {vocab_save_path}") | |