Vijayanand Sankarasubramanian commited on
Commit
85463e8
·
1 Parent(s): 48d0b40

added wav2vec based trasncription

Browse files
Files changed (3) hide show
  1. .gitignore +1 -0
  2. app.py +38 -24
  3. tools/transcribe.py +33 -34
.gitignore CHANGED
@@ -182,3 +182,4 @@ cache
182
  flagged
183
  *.rtf
184
  *.mp3
 
 
182
  flagged
183
  *.rtf
184
  *.mp3
185
+ *.txt
app.py CHANGED
@@ -3,7 +3,7 @@ from helpers.model_utils import GPT3, GPT4, LLAMA3, ANTHROPIC2, set_question_ans
3
  from tools.summarize import MAPREDUCE, STUFF, summarize_podcast
4
  from tools.answer_bot import answer_question
5
  from tools.aspect_and_sentiment_extraction import extract_aspects_and_sentiment
6
- from tools.transcribe import transcribe_podcast, transcribe_podcast_from_mp3
7
 
8
  def get_answer_for(user_question, transcript_file_name, question_answer_llm_choice):
9
  if transcript_file_name is None:
@@ -42,13 +42,13 @@ def generate_aspects_and_sentiments(transcript_file_name, sentiment_analysis_llm
42
 
43
  return sentiment, transcript_file_name, sentiment_analysis_llm_choice
44
 
45
- # def setup_rtf_file_handle(uploaded_file, transcript_file_name):
46
- # if not uploaded_file:
47
- # status = "No File Detected, Failure"
48
- # else:
49
- # transcript_file_name = uploaded_file.name
50
- # status = "Upload Success"
51
- # return status, transcript_file_name
52
 
53
  def setup_summarization_llm(choice, summarization_llm_choice):
54
  set_summarization_llm(choice)
@@ -69,27 +69,31 @@ def setup_summarization_method(choice, summarization_method):
69
  summarization_method = choice
70
  return choice, summarization_method
71
 
72
- def transcribe_audio_file(uploaded_file, transcript_file_name, transcription_status):
 
 
 
 
73
  if not uploaded_file:
74
- transcription_status = "No File Detected, Failure"
75
  else:
76
- transcript_file_name = transcribe_podcast_from_mp3(uploaded_file.name)
77
- transcription_status = "Upload Success"
78
- return transcription_status, transcript_file_name
79
 
80
- def download_and_transcribe_podcast(mp3_url, transcript_file, transcription_status):
81
  if not mp3_url:
82
- transcription_status = "No URL detected, Failure"
83
  else:
84
- transcript_file = transcribe_podcast(mp3_url)
85
- transcription_status = "Upload Success"
86
- return transcription_status, transcript_file
87
-
88
 
89
  summarization_llm_choices = [GPT3, GPT4, ANTHROPIC2]
90
  question_answer_llm_choices = [GPT3, GPT4, ANTHROPIC2]
91
  sentiment_analysis_llm_choices = [GPT3, GPT4, ANTHROPIC2]
92
  summarize_method_choices = [MAPREDUCE, STUFF]
 
93
 
94
  with gr.Blocks() as demo:
95
  transcript_file = gr.State()
@@ -97,18 +101,28 @@ with gr.Blocks() as demo:
97
  question_answer_llm_choice = gr.State()
98
  sentiment_analysis_llm_choice = gr.State()
99
  summarization_llm_choice = gr.State()
100
- transcription_status = gr.State(value = "Pending Transcribe")
 
 
 
 
 
 
101
  with gr.Row():
102
  with gr.Group("Enter Podcast mp3 URL"):
103
  mp3_url = gr.Textbox(label="Podcast MP3 URL")
104
  submit_button = gr.Button("Transcribe")
105
- submit_button.click(download_and_transcribe_podcast, inputs=[mp3_url, transcript_file, transcription_status], outputs=[transcription_status, transcript_file])
 
106
  with gr.Group("Upload Podcast mp3 File"):
107
  mp3_file = gr.File(label="Podcast mp3 file")
108
  submit_button = gr.Button("Transcribe")
109
- submit_button.click(transcribe_audio_file, inputs=[mp3_file, transcript_file, transcription_status], outputs=[transcription_status, transcript_file])
110
- with gr.Group("Transcription Status"):
111
- transcribe_status = gr.Textbox(label="Transcribe Status", value = transcription_status.value)
 
 
 
112
  with gr.Group("LLM Selection"):
113
  with gr.Row():
114
  choice = gr.Radio(label="Summarization LLM", choices=summarization_llm_choices)
 
3
  from tools.summarize import MAPREDUCE, STUFF, summarize_podcast
4
  from tools.answer_bot import answer_question
5
  from tools.aspect_and_sentiment_extraction import extract_aspects_and_sentiment
6
+ from tools.transcribe import transcribe_podcast, transcribe_podcast_from_mp3, WAV2VEC, AUTOMODELFORSPEECH
7
 
8
  def get_answer_for(user_question, transcript_file_name, question_answer_llm_choice):
9
  if transcript_file_name is None:
 
42
 
43
  return sentiment, transcript_file_name, sentiment_analysis_llm_choice
44
 
45
+ def setup_transcript_file_handle(uploaded_file, transcript_file_name, transcription_status):
46
+ if not uploaded_file:
47
+ transcription_status = "No File Detected, Failure"
48
+ else:
49
+ transcript_file_name = uploaded_file.name
50
+ transcription_status = "Upload Success"
51
+ return transcription_status, transcript_file_name
52
 
53
  def setup_summarization_llm(choice, summarization_llm_choice):
54
  set_summarization_llm(choice)
 
69
  summarization_method = choice
70
  return choice, summarization_method
71
 
72
+ def setup_transcription_method(choice, transcription_method):
73
+ transcription_method = choice
74
+ return choice, transcription_method
75
+
76
+ def transcribe_audio_file(uploaded_file, transcript_file_name, transcription_method):
77
  if not uploaded_file:
78
+ status = "No File Detected, Failure"
79
  else:
80
+ transcript_file_name = transcribe_podcast_from_mp3(uploaded_file.name, transcription_method)
81
+ status = "Upload Success"
82
+ return transcript_file_name, transcription_method, status
83
 
84
+ def download_and_transcribe_podcast(mp3_url, transcript_file, transcription_method):
85
  if not mp3_url:
86
+ status = "No URL detected, Failure"
87
  else:
88
+ transcript_file = transcribe_podcast(mp3_url, transcription_method)
89
+ status = "Upload Success"
90
+ return transcript_file, transcription_method, status
 
91
 
92
  summarization_llm_choices = [GPT3, GPT4, ANTHROPIC2]
93
  question_answer_llm_choices = [GPT3, GPT4, ANTHROPIC2]
94
  sentiment_analysis_llm_choices = [GPT3, GPT4, ANTHROPIC2]
95
  summarize_method_choices = [MAPREDUCE, STUFF]
96
+ transcription_method_choices = [WAV2VEC, AUTOMODELFORSPEECH]
97
 
98
  with gr.Blocks() as demo:
99
  transcript_file = gr.State()
 
101
  question_answer_llm_choice = gr.State()
102
  sentiment_analysis_llm_choice = gr.State()
103
  summarization_llm_choice = gr.State()
104
+ transcription_method = gr.State()
105
+
106
+ with gr.Group("Trancsription Model Selection"):
107
+ with gr.Row():
108
+ choice = gr.Radio(label="Transcription Model", choices=transcription_method_choices)
109
+ output = gr.Textbox(label="")
110
+ choice.change(setup_transcription_method, inputs=[choice, transcription_method], outputs=[output, transcription_method])
111
  with gr.Row():
112
  with gr.Group("Enter Podcast mp3 URL"):
113
  mp3_url = gr.Textbox(label="Podcast MP3 URL")
114
  submit_button = gr.Button("Transcribe")
115
+ status = gr.Textbox(label="", value="Pending Trancsribe")
116
+ submit_button.click(download_and_transcribe_podcast, inputs=[mp3_url, transcript_file, transcription_method], outputs=[transcript_file, transcription_method, status])
117
  with gr.Group("Upload Podcast mp3 File"):
118
  mp3_file = gr.File(label="Podcast mp3 file")
119
  submit_button = gr.Button("Transcribe")
120
+ status = gr.Textbox(label="", value="Pending Transcribe")
121
+ submit_button.click(transcribe_audio_file, inputs=[mp3_file, transcript_file, transcription_method], outputs=[transcript_file, transcription_method, status])
122
+ with gr.Group("Upload RTF File"):
123
+ rtf_file = gr.File(label="Transcripted RTF file")
124
+ submit_button = gr.Button("Upload RTF")
125
+ submit_button.click(setup_transcript_file_handle, inputs=[rtf_file, transcript_file], outputs=[transcript_file])
126
  with gr.Group("LLM Selection"):
127
  with gr.Row():
128
  choice = gr.Radio(label="Summarization LLM", choices=summarization_llm_choices)
tools/transcribe.py CHANGED
@@ -4,6 +4,9 @@ from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline
4
  import requests
5
  import uuid
6
 
 
 
 
7
  class Audio_to_Text:
8
  def __init__(self):
9
  self.model_id = "openai/whisper-large-v3"
@@ -34,19 +37,17 @@ class Audio_to_Text:
34
  file.write(response.content)
35
  print("MP3 file downloaded and saved successfully.")
36
 
37
- def convert_audio_to_text(self, audio_file):
38
- transformers.logging.set_verbosity_info()
39
- result = self.pipe(audio_file, generate_kwargs={"language": "english"})
40
- print("Converted audio to text successfully.")
41
- # save the result to a text file
42
- uuid_text = str(uuid.uuid4())
43
- save_file_name = f"transcript-{uuid_text}.txt"
44
- with open(save_file_name, "w") as file:
45
- file.write(result)
46
- print("Transcript saved successfully.")
47
- return save_file_name
48
 
49
- def convert_audio_to_text_from_url(self, url):
50
  #get uuid for the audio file
51
  uuid_audio = str(uuid.uuid4())
52
  save_path = f"audio-{uuid_audio}.mp3"
@@ -56,34 +57,32 @@ class Audio_to_Text:
56
 
57
  return path_text_file_of_audio
58
 
59
- def transcribe_podcast_from_mp3(mp3_file):
60
- audio_to_text = Audio_to_Text()
 
 
 
 
 
61
 
62
- path_text_file_of_audio = audio_to_text.convert_audio_to_text(mp3_file)
63
- print(path_text_file_of_audio)
64
- return path_text_file_of_audio
 
65
 
66
- def transcribe_podcast(file_url):
67
- # Example usage:
68
- # url = "https://chrt.fm/track/138C95/prfx.byspotify.com/e/play.podtrac.com/npr-510310/traffic.megaphone.fm/NPR7010771664.mp3"
69
-
70
 
 
71
  audio_to_text = Audio_to_Text()
 
 
 
 
 
72
 
73
-
74
  # Convert the audio file to text
75
-
76
- path_text_file_of_audio = audio_to_text.convert_audio_to_text_from_url(file_url)
77
 
78
  # Print the result
79
  print(path_text_file_of_audio)
80
- return path_text_file_of_audio
81
-
82
- def transcribe_audio_to_text(speech):
83
- asr = pipeline("automatic-speech-recognition", "facebook/wav2vec2-base-960h")
84
- text = asr(speech)["text"]
85
- return text
86
-
87
- # def text_to_sentiment(text):
88
- # classifier = pipeline("text-classification")
89
- # return classifier(text)[0]["label"]
 
4
  import requests
5
  import uuid
6
 
7
+ WAV2VEC = "wav2vec"
8
+ AUTOMODELFORSPEECH = "automodelforspeech"
9
+
10
  class Audio_to_Text:
11
  def __init__(self):
12
  self.model_id = "openai/whisper-large-v3"
 
37
  file.write(response.content)
38
  print("MP3 file downloaded and saved successfully.")
39
 
40
+ def convert_audio_to_text(self, audio_file, transcription_method):
41
+ if transcription_method == WAV2VEC:
42
+ return self.transcribe_audio_to_text_using_wav2vec(audio_file)
43
+ else:
44
+ transformers.logging.set_verbosity_info()
45
+ result = self.pipe(audio_file, generate_kwargs={"language": "english"})
46
+ print("Converted audio to text successfully.")
47
+ # save the result to a text file
48
+ return self.save_transcribed_text_to_file(result)
 
 
49
 
50
+ def convert_audio_to_text_from_url(self, url, transcription_method):
51
  #get uuid for the audio file
52
  uuid_audio = str(uuid.uuid4())
53
  save_path = f"audio-{uuid_audio}.mp3"
 
57
 
58
  return path_text_file_of_audio
59
 
60
+ def save_transcribed_text_to_file(self, text):
61
+ uuid_text = str(uuid.uuid4())
62
+ save_file_name = f"transcript-{uuid_text}.txt"
63
+ with open(save_file_name, "w") as file:
64
+ file.write(text)
65
+ print("Transcript saved successfully.")
66
+ return save_file_name
67
 
68
+ def transcribe_audio_to_text_using_wav2vec(self, mp3):
69
+ asr = pipeline("automatic-speech-recognition", "facebook/wav2vec2-base-960h")
70
+ text = asr(mp3)["text"]
71
+ return self.save_transcribed_text_to_file(text)
72
 
 
 
 
 
73
 
74
+ def transcribe_podcast_from_mp3(mp3_file, transcription_method):
75
  audio_to_text = Audio_to_Text()
76
+ return audio_to_text.convert_audio_to_text(mp3_file, transcription_method);
77
+
78
+ def transcribe_podcast(file_url, transcription_method):
79
+ # Example usage:
80
+ # url = "https://chrt.fm/track/138C95/prfx.byspotify.com/e/play.podtrac.com/npr-510310/traffic.megaphone.fm/NPR7010771664.mp3"
81
 
82
+ audio_to_text = Audio_to_Text()
83
  # Convert the audio file to text
84
+ path_text_file_of_audio = audio_to_text.convert_audio_to_text_from_url(file_url, transcription_method)
 
85
 
86
  # Print the result
87
  print(path_text_file_of_audio)
88
+ return path_text_file_of_audio