Spaces:
Runtime error
Runtime error
tonic
commited on
Commit
·
971bee9
1
Parent(s):
5701b30
adding long audio parsing
Browse files- app.py +30 -2
- requirements.txt +2 -1
app.py
CHANGED
|
@@ -14,7 +14,9 @@ import cohere
|
|
| 14 |
import os
|
| 15 |
import re
|
| 16 |
import pandas as pd
|
| 17 |
-
|
|
|
|
|
|
|
| 18 |
|
| 19 |
title = "# Welcome to AyaTonic"
|
| 20 |
description = "Learn a New Language With Aya"
|
|
@@ -70,6 +72,31 @@ def translate_text(text, instructions=translatetextinst):
|
|
| 70 |
)
|
| 71 |
return response.generations[0].text
|
| 72 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 73 |
class TaggedPhraseExtractor:
|
| 74 |
def __init__(self, text=''):
|
| 75 |
self.text = text
|
|
@@ -184,7 +211,8 @@ def process_input(image=None, file=None, audio=None, text="", translateto = "Eng
|
|
| 184 |
final_text += "\nUnsupported file type."
|
| 185 |
print("OCR Text: ", final_text)
|
| 186 |
if audio is not None:
|
| 187 |
-
|
|
|
|
| 188 |
final_text += "\n" + audio_text
|
| 189 |
|
| 190 |
final_text_with_producetext = final_text + producetext
|
|
|
|
| 14 |
import os
|
| 15 |
import re
|
| 16 |
import pandas as pd
|
| 17 |
+
import pydub
|
| 18 |
+
from pydub import AudioSegment
|
| 19 |
+
from pydub.utils import make_chunks
|
| 20 |
|
| 21 |
title = "# Welcome to AyaTonic"
|
| 22 |
description = "Learn a New Language With Aya"
|
|
|
|
| 72 |
)
|
| 73 |
return response.generations[0].text
|
| 74 |
|
| 75 |
+
class LongAudioProcessor:
|
| 76 |
+
def __init__(self, audio_client, api_key=None):
|
| 77 |
+
self.client = audio_client
|
| 78 |
+
self.api_key = api_key
|
| 79 |
+
|
| 80 |
+
def process_long_audio(self, audio_path, chunk_length_ms=20000):
|
| 81 |
+
"""
|
| 82 |
+
Process audio files longer than 29 seconds by chunking them into smaller segments.
|
| 83 |
+
"""
|
| 84 |
+
audio = AudioSegment.from_file(audio_path)
|
| 85 |
+
chunks = make_chunks(audio, chunk_length_ms)
|
| 86 |
+
full_text = ""
|
| 87 |
+
for i, chunk in enumerate(chunks):
|
| 88 |
+
chunk_name = f"chunk{i}.wav"
|
| 89 |
+
with open(chunk_name, 'wb') as file:
|
| 90 |
+
chunk.export(file, format="wav")
|
| 91 |
+
try:
|
| 92 |
+
result = self.process_audio_to_text(chunk_name)
|
| 93 |
+
full_text += " " + result.strip()
|
| 94 |
+
except Exception as e:
|
| 95 |
+
print(f"Error processing {chunk_name}: {e}")
|
| 96 |
+
finally:
|
| 97 |
+
if os.path.exists(chunk_name):
|
| 98 |
+
os.remove(chunk_name)
|
| 99 |
+
return full_text.strip()
|
| 100 |
class TaggedPhraseExtractor:
|
| 101 |
def __init__(self, text=''):
|
| 102 |
self.text = text
|
|
|
|
| 211 |
final_text += "\nUnsupported file type."
|
| 212 |
print("OCR Text: ", final_text)
|
| 213 |
if audio is not None:
|
| 214 |
+
long_audio_processor = LongAudioProcessor(audio_client)
|
| 215 |
+
audio_text = long_audio_processor.process_long_audio(audio, inputlanguage=translatefrom, outputlanguage=translateto)
|
| 216 |
final_text += "\n" + audio_text
|
| 217 |
|
| 218 |
final_text_with_producetext = final_text + producetext
|
requirements.txt
CHANGED
|
@@ -7,4 +7,5 @@ pillow
|
|
| 7 |
torchvision
|
| 8 |
torch
|
| 9 |
python-dotenv
|
| 10 |
-
pandas
|
|
|
|
|
|
| 7 |
torchvision
|
| 8 |
torch
|
| 9 |
python-dotenv
|
| 10 |
+
pandas
|
| 11 |
+
pydub
|