Spaces:
Sleeping
Sleeping
import gradio as gr | |
from transformers import AutoTokenizer, AutoModelForCausalLM | |
import torch | |
import re | |
# Load the model and tokenizer | |
model_name = 'abinayam/gpt-2-tamil' | |
tokenizer = AutoTokenizer.from_pretrained(model_name) | |
model = AutoModelForCausalLM.from_pretrained(model_name) | |
# Common error corrections | |
common_errors = { | |
'பழங்கல்': 'பழங்கள்', | |
# Add more common spelling errors here | |
} | |
def apply_sandhi_rules(text): | |
# Apply sandhi rules | |
text = re.sub(r'(கு|க்கு)\s+(ப|த|க|ச)', r'\1ப் \2', text) | |
# Add more sandhi rules as needed | |
return text | |
def preprocess_text(text): | |
# Apply common error corrections | |
for error, correction in common_errors.items(): | |
text = text.replace(error, correction) | |
return text | |
def postprocess_text(text): | |
# Apply sandhi rules | |
text = apply_sandhi_rules(text) | |
return text | |
def correct_text(input_text): | |
# Preprocess the input text | |
preprocessed_text = preprocess_text(input_text) | |
# Tokenize the preprocessed text | |
input_ids = tokenizer.encode(preprocessed_text, return_tensors='pt') | |
# Generate corrected text | |
with torch.no_grad(): | |
output = model.generate(input_ids, max_length=100, num_return_sequences=1, temperature=0.7) | |
# Decode the generated text | |
corrected_text = tokenizer.decode(output[0], skip_special_tokens=True) | |
# Postprocess the corrected text | |
final_text = postprocess_text(corrected_text) | |
return final_text | |
# Create the Gradio interface | |
iface = gr.Interface( | |
fn=correct_text, | |
inputs=gr.Textbox(lines=5, placeholder="Enter Tamil text here..."), | |
outputs=gr.Textbox(label="Corrected Text"), | |
title="Tamil Spell Corrector and Grammar Checker", | |
description="This app uses the 'abinayam/gpt-2-tamil' model along with custom rules to correct spelling and grammar in Tamil text.", | |
examples=[ | |
["நான் நேற்று கடைக்கு போனேன். அங்கே நிறைய பழங்கல் வாங்கினேன்."], | |
["நான் பள்ளிகு செல்கிறேன்."], | |
["அவன் வீட்டுகு வந்தான்."] | |
] | |
) | |
# Launch the app | |
iface.launch() |