Vipplav commited on
Commit
ce95a70
·
verified ·
1 Parent(s): f5731ab

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +14 -2
README.md CHANGED
@@ -38,11 +38,23 @@ A Byte-Pair Encoding (BPE) tokenizer trained on over **3.4 lakh cleaned Telugu t
38
  ```python
39
  from transformers import T5Tokenizer
40
 
 
41
  tokenizer = T5Tokenizer.from_pretrained("Vipplav/telugu-bpe-23k")
42
 
43
- text = "తెలుగు అభివృద్ధి కోసం మేం కలిసి పనిచేస్తున్నాం."
 
 
 
44
  tokens = tokenizer.tokenize(text)
45
- print(tokens)
 
 
 
 
 
 
 
 
46
  ```
47
 
48
 
 
38
  ```python
39
  from transformers import T5Tokenizer
40
 
41
+ # Load tokenizer from Hugging Face Hub
42
  tokenizer = T5Tokenizer.from_pretrained("Vipplav/telugu-bpe-23k")
43
 
44
+ # Sample Telugu input
45
+ text = "పరిశీలన తేదీ: 15-06-2025"
46
+
47
+ # Tokenize the input
48
  tokens = tokenizer.tokenize(text)
49
+
50
+ # Decode tokens back to text
51
+ decoded = tokenizer.decode(tokenizer.convert_tokens_to_ids(tokens), skip_special_tokens=True)
52
+
53
+ # Display results
54
+ print(f"\n📥 Input : {text}")
55
+ print(f"🔤 Tokens : {tokens}")
56
+ print(f"📝 Decoded : {decoded}")
57
+
58
  ```
59
 
60