Spaces:
Sleeping
Sleeping
File size: 1,505 Bytes
bcc12b1 46ec2e5 bcc12b1 8846920 bcc12b1 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 |
from fastapi import FastAPI, HTTPException
from fastapi.responses import HTMLResponse
from fastapi.staticfiles import StaticFiles
from pydantic import BaseModel
from encoder import BPEGujaratiTokenizer
from fastapi.middleware.cors import CORSMiddleware
# Define a Pydantic model for the request body
class EncodeRequest(BaseModel):
text: str
class DecodeRequest(BaseModel):
tokens: str
# Initialize the tokenizer
tokenizer = BPEGujaratiTokenizer(corpus_path="gu_corpus.txt", max_vocab_size=5000, sample_size=300000)
app = FastAPI()
# Add CORS middleware
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
# Serve static files (HTML, CSS, JS)
app.mount("/static", StaticFiles(directory="static"), name="static")
@app.get("/", response_class=HTMLResponse)
async def read_root():
with open("static/index.html") as f:
return f.read()
@app.post("/encode")
async def encode_text(request: EncodeRequest):
"""Encodes the input text and returns the tokens."""
print("request.text: ", request.text)
return {"encoded_tokens": tokenizer.encode(request.text)}
@app.post("/decode")
async def decode_tokens(request: DecodeRequest):
"""Decodes the input tokens and returns the original text."""
print(request.tokens)
tokens = request.tokens.split(',')
tokens = list(map(int, tokens))
decoded_text = tokenizer.decode(tokens)
return {"decoded_text": decoded_text}
|