File size: 5,179 Bytes
bcc12b1
 
 
 
 
 
1b7925a
 
 
 
8846920
1b7925a
 
 
 
 
 
 
 
 
bcc12b1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1b7925a
 
8846920
1b7925a
 
8846920
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1b7925a
 
 
8846920
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1b7925a
 
 
 
 
 
 
 
bcc12b1
1b7925a
 
bcc12b1
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
<!DOCTYPE html>
<html lang="en">
<head>
    <meta charset="UTF-8">
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <title>Gujarati BPE Tokenizer</title>
    <!-- Bootstrap CSS -->
    <link href="https://cdn.jsdelivr.net/npm/[email protected]/dist/css/bootstrap.min.css" rel="stylesheet">
    <style>
        .container {
            max-width: 1200px;
            margin-top: 2rem;
        }
        .result-box {
            background-color: #f8f9fa;
            padding: 1rem;
            border-radius: 5px;
            margin: 1rem 0;
        }
    </style>
    <script>
        async function encode() {
            const text = document.getElementById("inputText").value;
            document.getElementById("originalSentence").innerText = text;
            const response = await fetch("/encode", {
                method: "POST",
                headers: {
                    "Content-Type": "application/json"
                },
                body: JSON.stringify({ text })
            });
            const data = await response.json();
            document.getElementById("encodedTokens").innerText = data.encoded_tokens;
            document.getElementById("originalTokens").innerText = data.encoded_tokens;
        }

        async function decode() {
            const tokens = document.getElementById("inputTokens").value;
            document.getElementById("inputTokensDisplay").innerText = tokens;
            const response = await fetch("/decode", {
                method: "POST",
                headers: {
                    "Content-Type": "application/json"
                },
                body: JSON.stringify({ tokens })
            });
            const data = await response.json();
            document.getElementById("decodedText").innerText = data.decoded_text;
        }

        function resetFields() {
            document.getElementById("inputText").value = '';
            document.getElementById("inputTokens").value = '';
            document.getElementById("encodedTokens").innerText = '';
            document.getElementById("originalSentence").innerText = '';
            document.getElementById("decodedText").innerText = '';
            document.getElementById("inputTokensDisplay").innerText = '';
        }
    </script>
</head>
<body class="bg-light">
    <div class="container">
        <h1 class="text-center mb-4"> સરળ ગુજરાતી બી પી ઇ ટોકનાઇઝર (Basic Gujarati BPE Tokenizer) (૫૦૦૦શબ્દો)</h1>
        <p class="text-center text-muted mb-4">5000 Vocabulary Size</p>

        <div class="row">
            <!-- Encode Card -->
            <div class="col-md-6 mb-4">
                <div class="card h-100">
                    <div class="card-header">
                        <h2 class="h5 mb-0">Encode Text</h2>
                    </div>
                    <div class="card-body">
                        <div class="mb-3">
                            <input type="text" class="form-control mb-2" id="inputText" placeholder="Enter text to encode...">
                            <button class="btn btn-primary" onclick="encode()">Encode</button>
                        </div>
                        <div class="result-box">
                            <p class="mb-2"><strong>Encoded Tokens:</strong></p>
                            <p class="mb-2"><span id="encodedTokens" class="text-break"></span></p>
                            <p class="mb-2"><strong>Original Sentence:</strong></p>
                            <p class="mb-0"><span id="originalSentence" class="text-break"></span></p>
                        </div>
                    </div>
                </div>
            </div>

            <!-- Decode Card -->
            <div class="col-md-6 mb-4">
                <div class="card h-100">
                    <div class="card-header">
                        <h2 class="h5 mb-0">Decode Tokens</h2>
                    </div>
                    <div class="card-body">
                        <div class="mb-3">
                            <input type="text" class="form-control mb-2" id="inputTokens" placeholder="Enter tokens to decode...">
                            <button class="btn btn-primary" onclick="decode()">Decode</button>
                        </div>
                        <div class="result-box">
                            <p class="mb-2"><strong>Decoded Text:</strong></p>
                            <p class="mb-2"><span id="decodedText" class="text-break"></span></p>
                            <p class="mb-2"><strong>Original Tokens Inputted:</strong></p>
                            <p class="mb-0"><span id="inputTokensDisplay" class="text-break"></span></p>
                        </div>
                    </div>
                </div>
            </div>
        </div>

        <div class="text-center">
            <button class="btn btn-secondary" onclick="resetFields()">Reset All</button>
        </div>
    </div>

    <!-- Bootstrap JS -->
    <script src="https://cdn.jsdelivr.net/npm/[email protected]/dist/js/bootstrap.bundle.min.js"></script>
</body>
</html>