Spaces:
Runtime error
Runtime error
| import gradio as gr | |
| from gradio_rich_textbox import RichTextbox | |
| from PIL import Image | |
| from surya.ocr import run_ocr | |
| from surya.model.detection.segformer import load_model as load_det_model, load_processor as load_det_processor | |
| from surya.model.recognition.model import load_model as load_rec_model | |
| from surya.model.recognition.processor import load_processor as load_rec_processor | |
| # from lang_list import TEXT_SOURCE_LANGUAGE_NAMES | |
| from gradio_client import Client | |
| from dotenv import load_dotenv | |
| import requests | |
| from io import BytesIO | |
| import cohere | |
| import os | |
| import re | |
| import pandas as pd | |
| title = "# Welcome to AyaTonic" | |
| description = "Learn a New Language With Aya" | |
| # Load environment variables | |
| load_dotenv() | |
| COHERE_API_KEY = os.getenv('CO_API_KEY') | |
| SEAMLESSM4T = os.getenv('SEAMLESSM4T') | |
| df = pd.read_csv("lang_list.csv") | |
| choices = df["name"].to_list() | |
| inputlanguage = "" | |
| producetext = "\n\nProduce a complete expositional blog post in {target_language} based on the above :" | |
| formatinputstring = "\n\nthe above text is a learning aid. you must use rich text format to rewrite the above and add 1 . a red color tags for nouns 2. a blue color tag for verbs 3. a green color tag for adjectives and adverbs:" | |
| # Regular expression patterns for each color | |
| patterns = { | |
| "red": r'<span style="color: red;">(.*?)</span>', | |
| "blue": r'<span style="color: blue;">(.*?)</span>', | |
| "green": r'<span style="color: green;">(.*?)</span>', | |
| } | |
| # Dictionaries to hold the matches | |
| matches = { | |
| "red": [], | |
| "blue": [], | |
| "green": [], | |
| } | |
| class TaggedPhraseExtractor: | |
| def __init__(self, text=''): | |
| self.text = text | |
| self.patterns = {} | |
| def set_text(self, text): | |
| """Set the text to search within.""" | |
| self.text = text | |
| def add_pattern(self, color, pattern): | |
| """Add a new color and its associated pattern.""" | |
| self.patterns[color] = pattern | |
| def extract_phrases(self): | |
| """Extract phrases for all colors and patterns added.""" | |
| matches = {color: re.findall(pattern, self.text) for color, pattern in self.patterns.items()} | |
| return matches | |
| def print_phrases(self): | |
| """Extract phrases and print them.""" | |
| matches = self.extract_phrases() | |
| for color, phrases in matches.items(): | |
| print(f"Phrases with color {color}:") | |
| for phrase in phrases: | |
| print(f"- {phrase}") | |
| print() | |
| co = cohere.Client(COHERE_API_KEY) | |
| audio_client = Client(SEAMLESSM4T) | |
| client = Client(SEAMLESSM4T) | |
| def process_audio_to_text(audio_path, inputlanguage="English"): | |
| """ | |
| Convert audio input to text using the Gradio client. | |
| """ | |
| audio_client = Client(SEAMLESSM4T) | |
| result = audio_client.predict( | |
| audio_path, | |
| inputlanguage, | |
| inputlanguage, | |
| api_name="/s2tt" | |
| ) | |
| print("Audio Result: ", result) | |
| return result[0] | |
| def process_text_to_audio(text, translatefrom="English", translateto="English"): | |
| """ | |
| Convert text input to audio using the Gradio client. | |
| """ | |
| audio_client = Client(SEAMLESSM4T) | |
| result = audio_client.predict( | |
| text, | |
| translatefrom, | |
| translateto, | |
| api_name="/t2st" | |
| ) | |
| return result[0] | |
| class OCRProcessor: | |
| def __init__(self, langs=["en"]): #add input language code | |
| self.langs = langs | |
| self.det_processor, self.det_model = load_det_processor(), load_det_model() | |
| self.rec_model, self.rec_processor = load_rec_model(), load_rec_processor() | |
| def process_image(self, image): | |
| """ | |
| Process a PIL image and return the OCR text. | |
| """ | |
| predictions = run_ocr([image], [self.langs], self.det_model, self.det_processor, self.rec_model, self.rec_processor) | |
| return predictions[0] # Assuming the first item in predictions contains the desired text | |
| def process_pdf(self, pdf_path): | |
| """ | |
| Process a PDF file and return the OCR text. | |
| """ | |
| predictions = run_ocr([pdf_path], [self.langs], self.det_model, self.det_processor, self.rec_model, self.rec_processor) | |
| return predictions[0] # Assuming the first item in predictions contains the desired text | |
| def process_input(image=None, file=None, audio=None, text="", translateto = "English", translatefrom = "English" ): | |
| ocr_processor = OCRProcessor() | |
| final_text = text | |
| if image is not None: | |
| ocr_prediction = ocr_processor.process_image(image) | |
| # gettig text from ocr object | |
| for idx in range(len((list(ocr_prediction)[0][1]))): | |
| final_text += " " | |
| final_text += list((list(ocr_prediction)[0][1])[idx])[1][1] | |
| if file is not None: | |
| if file.name.lower().endswith(('.png', '.jpg', '.jpeg')): | |
| pil_image = Image.open(file) | |
| ocr_prediction = ocr_processor.process_image(pil_image) | |
| # gettig text from ocr object | |
| for idx in range(len((list(ocr_prediction)[0][1]))): | |
| final_text += " " | |
| final_text += list((list(ocr_prediction)[0][1])[idx])[1][1] | |
| elif file.name.lower().endswith('.pdf'): | |
| ocr_prediction = ocr_processor.process_pdf(file.name) | |
| # gettig text from ocr object | |
| for idx in range(len((list(ocr_prediction)[0][1]))): | |
| final_text += " " | |
| final_text += list((list(ocr_prediction)[0][1])[idx])[1][1] | |
| else: | |
| final_text += "\nUnsupported file type." | |
| print("OCR Text: ", final_text) | |
| if audio is not None: | |
| audio_text = process_audio_to_text(audio) | |
| final_text += "\n" + audio_text | |
| final_text_with_producetext = final_text + producetext | |
| response = co.generate( | |
| model='c4ai-aya', | |
| prompt=final_text_with_producetext, | |
| max_tokens=1024, | |
| temperature=0.5 | |
| ) | |
| # add graceful handling for errors (overflow) | |
| generated_text = response.generations[0].text | |
| print("Generated Text: ", generated_text) | |
| generated_text_with_format = generated_text + "\n" + formatinputstring | |
| response = co.generate( | |
| model='command-nightly', | |
| prompt=generated_text_with_format, | |
| max_tokens=4000, | |
| temperature=0.5 | |
| ) | |
| processed_text = response.generations[0].text | |
| audio_output = process_text_to_audio(processed_text, translateto, translateto) | |
| return processed_text, audio_output | |
| def main(): | |
| with gr.Blocks() as demo: | |
| gr.Markdown(title) | |
| gr.Markdown(description) | |
| with gr.Row(): | |
| input_language = gr.Dropdown(choices=choices, label="Your Native Language") | |
| target_language = gr.Dropdown(choices=choices, label="Language To Learn") | |
| with gr.Accordion("Talk To 🌟AyaTonic"): | |
| with gr.Tab("🤙🏻Audio & Text"): | |
| audio_input = gr.Audio(sources="microphone", type="filepath", label="Mic Input") | |
| text_input = gr.Textbox(lines=2, label="Text Input") | |
| with gr.Tab("📸Image & File"): | |
| image_input = gr.Image(type="pil", label="Camera Input") | |
| file_input = gr.File(label="File Upload") | |
| process_button = gr.Button("🌟AyaTonic") | |
| processed_text_output = RichTextbox(label="Processed Text") | |
| audio_output = gr.Audio(label="Audio Output") | |
| process_button.click( | |
| fn=process_input, | |
| inputs=[image_input, file_input, audio_input, text_input, input_language, target_language], | |
| outputs=[processed_text_output, audio_output] | |
| ) | |
| if __name__ == "__main__": | |
| main() |