Spaces:
Build error
Build error
Vijayanand Sankarasubramanian
commited on
Commit
·
85463e8
1
Parent(s):
48d0b40
added wav2vec based trasncription
Browse files- .gitignore +1 -0
- app.py +38 -24
- tools/transcribe.py +33 -34
.gitignore
CHANGED
@@ -182,3 +182,4 @@ cache
|
|
182 |
flagged
|
183 |
*.rtf
|
184 |
*.mp3
|
|
|
|
182 |
flagged
|
183 |
*.rtf
|
184 |
*.mp3
|
185 |
+
*.txt
|
app.py
CHANGED
@@ -3,7 +3,7 @@ from helpers.model_utils import GPT3, GPT4, LLAMA3, ANTHROPIC2, set_question_ans
|
|
3 |
from tools.summarize import MAPREDUCE, STUFF, summarize_podcast
|
4 |
from tools.answer_bot import answer_question
|
5 |
from tools.aspect_and_sentiment_extraction import extract_aspects_and_sentiment
|
6 |
-
from tools.transcribe import transcribe_podcast, transcribe_podcast_from_mp3
|
7 |
|
8 |
def get_answer_for(user_question, transcript_file_name, question_answer_llm_choice):
|
9 |
if transcript_file_name is None:
|
@@ -42,13 +42,13 @@ def generate_aspects_and_sentiments(transcript_file_name, sentiment_analysis_llm
|
|
42 |
|
43 |
return sentiment, transcript_file_name, sentiment_analysis_llm_choice
|
44 |
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
|
53 |
def setup_summarization_llm(choice, summarization_llm_choice):
|
54 |
set_summarization_llm(choice)
|
@@ -69,27 +69,31 @@ def setup_summarization_method(choice, summarization_method):
|
|
69 |
summarization_method = choice
|
70 |
return choice, summarization_method
|
71 |
|
72 |
-
def
|
|
|
|
|
|
|
|
|
73 |
if not uploaded_file:
|
74 |
-
|
75 |
else:
|
76 |
-
transcript_file_name = transcribe_podcast_from_mp3(uploaded_file.name)
|
77 |
-
|
78 |
-
return
|
79 |
|
80 |
-
def download_and_transcribe_podcast(mp3_url, transcript_file,
|
81 |
if not mp3_url:
|
82 |
-
|
83 |
else:
|
84 |
-
transcript_file = transcribe_podcast(mp3_url)
|
85 |
-
|
86 |
-
return
|
87 |
-
|
88 |
|
89 |
summarization_llm_choices = [GPT3, GPT4, ANTHROPIC2]
|
90 |
question_answer_llm_choices = [GPT3, GPT4, ANTHROPIC2]
|
91 |
sentiment_analysis_llm_choices = [GPT3, GPT4, ANTHROPIC2]
|
92 |
summarize_method_choices = [MAPREDUCE, STUFF]
|
|
|
93 |
|
94 |
with gr.Blocks() as demo:
|
95 |
transcript_file = gr.State()
|
@@ -97,18 +101,28 @@ with gr.Blocks() as demo:
|
|
97 |
question_answer_llm_choice = gr.State()
|
98 |
sentiment_analysis_llm_choice = gr.State()
|
99 |
summarization_llm_choice = gr.State()
|
100 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
101 |
with gr.Row():
|
102 |
with gr.Group("Enter Podcast mp3 URL"):
|
103 |
mp3_url = gr.Textbox(label="Podcast MP3 URL")
|
104 |
submit_button = gr.Button("Transcribe")
|
105 |
-
|
|
|
106 |
with gr.Group("Upload Podcast mp3 File"):
|
107 |
mp3_file = gr.File(label="Podcast mp3 file")
|
108 |
submit_button = gr.Button("Transcribe")
|
109 |
-
|
110 |
-
|
111 |
-
|
|
|
|
|
|
|
112 |
with gr.Group("LLM Selection"):
|
113 |
with gr.Row():
|
114 |
choice = gr.Radio(label="Summarization LLM", choices=summarization_llm_choices)
|
|
|
3 |
from tools.summarize import MAPREDUCE, STUFF, summarize_podcast
|
4 |
from tools.answer_bot import answer_question
|
5 |
from tools.aspect_and_sentiment_extraction import extract_aspects_and_sentiment
|
6 |
+
from tools.transcribe import transcribe_podcast, transcribe_podcast_from_mp3, WAV2VEC, AUTOMODELFORSPEECH
|
7 |
|
8 |
def get_answer_for(user_question, transcript_file_name, question_answer_llm_choice):
|
9 |
if transcript_file_name is None:
|
|
|
42 |
|
43 |
return sentiment, transcript_file_name, sentiment_analysis_llm_choice
|
44 |
|
45 |
+
def setup_transcript_file_handle(uploaded_file, transcript_file_name, transcription_status):
|
46 |
+
if not uploaded_file:
|
47 |
+
transcription_status = "No File Detected, Failure"
|
48 |
+
else:
|
49 |
+
transcript_file_name = uploaded_file.name
|
50 |
+
transcription_status = "Upload Success"
|
51 |
+
return transcription_status, transcript_file_name
|
52 |
|
53 |
def setup_summarization_llm(choice, summarization_llm_choice):
|
54 |
set_summarization_llm(choice)
|
|
|
69 |
summarization_method = choice
|
70 |
return choice, summarization_method
|
71 |
|
72 |
+
def setup_transcription_method(choice, transcription_method):
|
73 |
+
transcription_method = choice
|
74 |
+
return choice, transcription_method
|
75 |
+
|
76 |
+
def transcribe_audio_file(uploaded_file, transcript_file_name, transcription_method):
|
77 |
if not uploaded_file:
|
78 |
+
status = "No File Detected, Failure"
|
79 |
else:
|
80 |
+
transcript_file_name = transcribe_podcast_from_mp3(uploaded_file.name, transcription_method)
|
81 |
+
status = "Upload Success"
|
82 |
+
return transcript_file_name, transcription_method, status
|
83 |
|
84 |
+
def download_and_transcribe_podcast(mp3_url, transcript_file, transcription_method):
|
85 |
if not mp3_url:
|
86 |
+
status = "No URL detected, Failure"
|
87 |
else:
|
88 |
+
transcript_file = transcribe_podcast(mp3_url, transcription_method)
|
89 |
+
status = "Upload Success"
|
90 |
+
return transcript_file, transcription_method, status
|
|
|
91 |
|
92 |
summarization_llm_choices = [GPT3, GPT4, ANTHROPIC2]
|
93 |
question_answer_llm_choices = [GPT3, GPT4, ANTHROPIC2]
|
94 |
sentiment_analysis_llm_choices = [GPT3, GPT4, ANTHROPIC2]
|
95 |
summarize_method_choices = [MAPREDUCE, STUFF]
|
96 |
+
transcription_method_choices = [WAV2VEC, AUTOMODELFORSPEECH]
|
97 |
|
98 |
with gr.Blocks() as demo:
|
99 |
transcript_file = gr.State()
|
|
|
101 |
question_answer_llm_choice = gr.State()
|
102 |
sentiment_analysis_llm_choice = gr.State()
|
103 |
summarization_llm_choice = gr.State()
|
104 |
+
transcription_method = gr.State()
|
105 |
+
|
106 |
+
with gr.Group("Trancsription Model Selection"):
|
107 |
+
with gr.Row():
|
108 |
+
choice = gr.Radio(label="Transcription Model", choices=transcription_method_choices)
|
109 |
+
output = gr.Textbox(label="")
|
110 |
+
choice.change(setup_transcription_method, inputs=[choice, transcription_method], outputs=[output, transcription_method])
|
111 |
with gr.Row():
|
112 |
with gr.Group("Enter Podcast mp3 URL"):
|
113 |
mp3_url = gr.Textbox(label="Podcast MP3 URL")
|
114 |
submit_button = gr.Button("Transcribe")
|
115 |
+
status = gr.Textbox(label="", value="Pending Trancsribe")
|
116 |
+
submit_button.click(download_and_transcribe_podcast, inputs=[mp3_url, transcript_file, transcription_method], outputs=[transcript_file, transcription_method, status])
|
117 |
with gr.Group("Upload Podcast mp3 File"):
|
118 |
mp3_file = gr.File(label="Podcast mp3 file")
|
119 |
submit_button = gr.Button("Transcribe")
|
120 |
+
status = gr.Textbox(label="", value="Pending Transcribe")
|
121 |
+
submit_button.click(transcribe_audio_file, inputs=[mp3_file, transcript_file, transcription_method], outputs=[transcript_file, transcription_method, status])
|
122 |
+
with gr.Group("Upload RTF File"):
|
123 |
+
rtf_file = gr.File(label="Transcripted RTF file")
|
124 |
+
submit_button = gr.Button("Upload RTF")
|
125 |
+
submit_button.click(setup_transcript_file_handle, inputs=[rtf_file, transcript_file], outputs=[transcript_file])
|
126 |
with gr.Group("LLM Selection"):
|
127 |
with gr.Row():
|
128 |
choice = gr.Radio(label="Summarization LLM", choices=summarization_llm_choices)
|
tools/transcribe.py
CHANGED
@@ -4,6 +4,9 @@ from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline
|
|
4 |
import requests
|
5 |
import uuid
|
6 |
|
|
|
|
|
|
|
7 |
class Audio_to_Text:
|
8 |
def __init__(self):
|
9 |
self.model_id = "openai/whisper-large-v3"
|
@@ -34,19 +37,17 @@ class Audio_to_Text:
|
|
34 |
file.write(response.content)
|
35 |
print("MP3 file downloaded and saved successfully.")
|
36 |
|
37 |
-
def convert_audio_to_text(self, audio_file):
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
print("Transcript saved successfully.")
|
47 |
-
return save_file_name
|
48 |
|
49 |
-
def convert_audio_to_text_from_url(self, url):
|
50 |
#get uuid for the audio file
|
51 |
uuid_audio = str(uuid.uuid4())
|
52 |
save_path = f"audio-{uuid_audio}.mp3"
|
@@ -56,34 +57,32 @@ class Audio_to_Text:
|
|
56 |
|
57 |
return path_text_file_of_audio
|
58 |
|
59 |
-
def
|
60 |
-
|
|
|
|
|
|
|
|
|
|
|
61 |
|
62 |
-
|
63 |
-
|
64 |
-
|
|
|
65 |
|
66 |
-
def transcribe_podcast(file_url):
|
67 |
-
# Example usage:
|
68 |
-
# url = "https://chrt.fm/track/138C95/prfx.byspotify.com/e/play.podtrac.com/npr-510310/traffic.megaphone.fm/NPR7010771664.mp3"
|
69 |
-
|
70 |
|
|
|
71 |
audio_to_text = Audio_to_Text()
|
|
|
|
|
|
|
|
|
|
|
72 |
|
73 |
-
|
74 |
# Convert the audio file to text
|
75 |
-
|
76 |
-
path_text_file_of_audio = audio_to_text.convert_audio_to_text_from_url(file_url)
|
77 |
|
78 |
# Print the result
|
79 |
print(path_text_file_of_audio)
|
80 |
-
return path_text_file_of_audio
|
81 |
-
|
82 |
-
def transcribe_audio_to_text(speech):
|
83 |
-
asr = pipeline("automatic-speech-recognition", "facebook/wav2vec2-base-960h")
|
84 |
-
text = asr(speech)["text"]
|
85 |
-
return text
|
86 |
-
|
87 |
-
# def text_to_sentiment(text):
|
88 |
-
# classifier = pipeline("text-classification")
|
89 |
-
# return classifier(text)[0]["label"]
|
|
|
4 |
import requests
|
5 |
import uuid
|
6 |
|
7 |
+
WAV2VEC = "wav2vec"
|
8 |
+
AUTOMODELFORSPEECH = "automodelforspeech"
|
9 |
+
|
10 |
class Audio_to_Text:
|
11 |
def __init__(self):
|
12 |
self.model_id = "openai/whisper-large-v3"
|
|
|
37 |
file.write(response.content)
|
38 |
print("MP3 file downloaded and saved successfully.")
|
39 |
|
40 |
+
def convert_audio_to_text(self, audio_file, transcription_method):
|
41 |
+
if transcription_method == WAV2VEC:
|
42 |
+
return self.transcribe_audio_to_text_using_wav2vec(audio_file)
|
43 |
+
else:
|
44 |
+
transformers.logging.set_verbosity_info()
|
45 |
+
result = self.pipe(audio_file, generate_kwargs={"language": "english"})
|
46 |
+
print("Converted audio to text successfully.")
|
47 |
+
# save the result to a text file
|
48 |
+
return self.save_transcribed_text_to_file(result)
|
|
|
|
|
49 |
|
50 |
+
def convert_audio_to_text_from_url(self, url, transcription_method):
|
51 |
#get uuid for the audio file
|
52 |
uuid_audio = str(uuid.uuid4())
|
53 |
save_path = f"audio-{uuid_audio}.mp3"
|
|
|
57 |
|
58 |
return path_text_file_of_audio
|
59 |
|
60 |
+
def save_transcribed_text_to_file(self, text):
|
61 |
+
uuid_text = str(uuid.uuid4())
|
62 |
+
save_file_name = f"transcript-{uuid_text}.txt"
|
63 |
+
with open(save_file_name, "w") as file:
|
64 |
+
file.write(text)
|
65 |
+
print("Transcript saved successfully.")
|
66 |
+
return save_file_name
|
67 |
|
68 |
+
def transcribe_audio_to_text_using_wav2vec(self, mp3):
|
69 |
+
asr = pipeline("automatic-speech-recognition", "facebook/wav2vec2-base-960h")
|
70 |
+
text = asr(mp3)["text"]
|
71 |
+
return self.save_transcribed_text_to_file(text)
|
72 |
|
|
|
|
|
|
|
|
|
73 |
|
74 |
+
def transcribe_podcast_from_mp3(mp3_file, transcription_method):
|
75 |
audio_to_text = Audio_to_Text()
|
76 |
+
return audio_to_text.convert_audio_to_text(mp3_file, transcription_method);
|
77 |
+
|
78 |
+
def transcribe_podcast(file_url, transcription_method):
|
79 |
+
# Example usage:
|
80 |
+
# url = "https://chrt.fm/track/138C95/prfx.byspotify.com/e/play.podtrac.com/npr-510310/traffic.megaphone.fm/NPR7010771664.mp3"
|
81 |
|
82 |
+
audio_to_text = Audio_to_Text()
|
83 |
# Convert the audio file to text
|
84 |
+
path_text_file_of_audio = audio_to_text.convert_audio_to_text_from_url(file_url, transcription_method)
|
|
|
85 |
|
86 |
# Print the result
|
87 |
print(path_text_file_of_audio)
|
88 |
+
return path_text_file_of_audio
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|