import gradio as gr from transformers import AutoTokenizer, AutoModelForCausalLM import torch import re # Load the model and tokenizer model_name = 'abinayam/gpt-2-tamil' tokenizer = AutoTokenizer.from_pretrained(model_name) model = AutoModelForCausalLM.from_pretrained(model_name) # Common error corrections common_errors = { 'பழங்கல்': 'பழங்கள்', # Add more common spelling errors here } def apply_sandhi_rules(text): # Apply sandhi rules text = re.sub(r'(கு|க்கு)\s+(ப|த|க|ச)', r'\1ப் \2', text) # Add more sandhi rules as needed return text def preprocess_text(text): # Apply common error corrections for error, correction in common_errors.items(): text = text.replace(error, correction) return text def postprocess_text(text): # Apply sandhi rules text = apply_sandhi_rules(text) return text def correct_text(input_text): # Preprocess the input text preprocessed_text = preprocess_text(input_text) # Tokenize the preprocessed text input_ids = tokenizer.encode(preprocessed_text, return_tensors='pt') # Generate corrected text with torch.no_grad(): output = model.generate(input_ids, max_length=100, num_return_sequences=1, temperature=0.7) # Decode the generated text corrected_text = tokenizer.decode(output[0], skip_special_tokens=True) # Postprocess the corrected text final_text = postprocess_text(corrected_text) return final_text # Create the Gradio interface iface = gr.Interface( fn=correct_text, inputs=gr.Textbox(lines=5, placeholder="Enter Tamil text here..."), outputs=gr.Textbox(label="Corrected Text"), title="Tamil Spell Corrector and Grammar Checker", description="This app uses the 'abinayam/gpt-2-tamil' model along with custom rules to correct spelling and grammar in Tamil text.", examples=[ ["நான் நேற்று கடைக்கு போனேன். அங்கே நிறைய பழங்கல் வாங்கினேன்."], ["நான் பள்ளிகு செல்கிறேன்."], ["அவன் வீட்டுகு வந்தான்."] ] ) # Launch the app iface.launch()