Spaces:
Sleeping
Sleeping
from fastapi import FastAPI, HTTPException | |
from fastapi.responses import HTMLResponse | |
from fastapi.staticfiles import StaticFiles | |
from pydantic import BaseModel | |
from encoder import BPEGujaratiTokenizer | |
from fastapi.middleware.cors import CORSMiddleware | |
# Define a Pydantic model for the request body | |
class EncodeRequest(BaseModel): | |
text: str | |
class DecodeRequest(BaseModel): | |
tokens: str | |
# Initialize the tokenizer | |
tokenizer = BPEGujaratiTokenizer(corpus_path="gu_corpus.txt", max_vocab_size=5000, sample_size=300000) | |
app = FastAPI() | |
# Add CORS middleware | |
app.add_middleware( | |
CORSMiddleware, | |
allow_origins=["*"], | |
allow_credentials=True, | |
allow_methods=["*"], | |
allow_headers=["*"], | |
) | |
# Serve static files (HTML, CSS, JS) | |
app.mount("/static", StaticFiles(directory="static"), name="static") | |
async def read_root(): | |
with open("static/index.html") as f: | |
return f.read() | |
async def encode_text(request: EncodeRequest): | |
"""Encodes the input text and returns the tokens.""" | |
print("request.text: ", request.text) | |
return {"encoded_tokens": tokenizer.encode(request.text)} | |
async def decode_tokens(request: DecodeRequest): | |
"""Decodes the input tokens and returns the original text.""" | |
print(request.tokens) | |
tokens = request.tokens.split(',') | |
tokens = list(map(int, tokens)) | |
decoded_text = tokenizer.decode(tokens) | |
return {"decoded_text": decoded_text} | |