Spaces:

MultiTransformer
/

AyaTonic

Runtime error

App Files Files Community

tonic commited on Feb 25, 2024

Commit

971bee9

1 Parent(s): 5701b30

adding long audio parsing

Browse files

Files changed (2) hide show

app.py +30 -2
requirements.txt +2 -1

app.py CHANGED Viewed

@@ -14,7 +14,9 @@ import cohere
 import os
 import re
 import pandas as pd
 title = "# Welcome to AyaTonic"
 description = "Learn a New Language With Aya"
@@ -70,6 +72,31 @@ def translate_text(text, instructions=translatetextinst):
     )
     return response.generations[0].text
 class TaggedPhraseExtractor:
     def __init__(self, text=''):
         self.text = text
@@ -184,7 +211,8 @@ def process_input(image=None, file=None, audio=None, text="", translateto = "Eng
             final_text += "\nUnsupported file type."
     print("OCR Text: ", final_text)
     if audio is not None:
-        audio_text = process_audio_to_text(audio)
         final_text += "\n" + audio_text
     final_text_with_producetext = final_text + producetext

 import os
 import re
 import pandas as pd
+import pydub
+from pydub import AudioSegment
+from pydub.utils import make_chunks
 title = "# Welcome to AyaTonic"
 description = "Learn a New Language With Aya"
     )
     return response.generations[0].text
+class LongAudioProcessor:
+    def __init__(self, audio_client, api_key=None):
+        self.client = audio_client
+        self.api_key = api_key
+    def process_long_audio(self, audio_path, chunk_length_ms=20000):
+        """
+        Process audio files longer than 29 seconds by chunking them into smaller segments.
+        """
+        audio = AudioSegment.from_file(audio_path)
+        chunks = make_chunks(audio, chunk_length_ms)
+        full_text = ""
+        for i, chunk in enumerate(chunks):
+            chunk_name = f"chunk{i}.wav"
+            with open(chunk_name, 'wb') as file:
+                chunk.export(file, format="wav")
+            try:
+                result = self.process_audio_to_text(chunk_name)
+                full_text += " " + result.strip()
+            except Exception as e:
+                print(f"Error processing {chunk_name}: {e}")
+            finally:
+                if os.path.exists(chunk_name):
+                    os.remove(chunk_name)
+        return full_text.strip()
 class TaggedPhraseExtractor:
     def __init__(self, text=''):
         self.text = text
             final_text += "\nUnsupported file type."
     print("OCR Text: ", final_text)
     if audio is not None:
+        long_audio_processor = LongAudioProcessor(audio_client)
+        audio_text = long_audio_processor.process_long_audio(audio, inputlanguage=translatefrom, outputlanguage=translateto)
         final_text += "\n" + audio_text
     final_text_with_producetext = final_text + producetext

requirements.txt CHANGED Viewed

@@ -7,4 +7,5 @@ pillow
 torchvision
 torch
 python-dotenv
-pandas

 torchvision
 torch
 python-dotenv
+pandas
+pydub