arsath-sm's picture
Update app.py
e2b6396 verified
raw
history blame
2.3 kB
import gradio as gr
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch
import re
# Load the model and tokenizer
model_name = 'abinayam/gpt-2-tamil'
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name)
# Common error corrections
common_errors = {
'பழங்கல்': 'பழங்கள்',
# Add more common spelling errors here
}
def apply_sandhi_rules(text):
# Apply sandhi rules
text = re.sub(r'(கு|க்கு)\s+(ப|த|க|ச)', r'\1ப் \2', text)
# Add more sandhi rules as needed
return text
def preprocess_text(text):
# Apply common error corrections
for error, correction in common_errors.items():
text = text.replace(error, correction)
return text
def postprocess_text(text):
# Apply sandhi rules
text = apply_sandhi_rules(text)
return text
def correct_text(input_text):
# Preprocess the input text
preprocessed_text = preprocess_text(input_text)
# Tokenize the preprocessed text
input_ids = tokenizer.encode(preprocessed_text, return_tensors='pt')
# Generate corrected text
with torch.no_grad():
output = model.generate(input_ids, max_length=100, num_return_sequences=1, temperature=0.7)
# Decode the generated text
corrected_text = tokenizer.decode(output[0], skip_special_tokens=True)
# Postprocess the corrected text
final_text = postprocess_text(corrected_text)
return final_text
# Create the Gradio interface
iface = gr.Interface(
fn=correct_text,
inputs=gr.Textbox(lines=5, placeholder="Enter Tamil text here..."),
outputs=gr.Textbox(label="Corrected Text"),
title="Tamil Spell Corrector and Grammar Checker",
description="This app uses the 'abinayam/gpt-2-tamil' model along with custom rules to correct spelling and grammar in Tamil text.",
examples=[
["நான் நேற்று கடைக்கு போனேன். அங்கே நிறைய பழங்கல் வாங்கினேன்."],
["நான் பள்ளிகு செல்கிறேன்."],
["அவன் வீட்டுகு வந்தான்."]
]
)
# Launch the app
iface.launch()