KingNish commited on
Commit
3845c66
·
verified ·
1 Parent(s): 2bf1d0a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +17 -15
app.py CHANGED
@@ -6,10 +6,9 @@ import tempfile
6
  import os
7
  import uuid
8
  import scipy.io.wavfile
9
- import numpy as np
10
 
11
  MODEL_NAME = "ylacombe/whisper-large-v3-turbo"
12
- BATCH_SIZE = 8
13
  device = 0 if torch.cuda.is_available() else "cpu"
14
 
15
  pipe = pipeline(
@@ -22,31 +21,34 @@ pipe = pipeline(
22
  @spaces.GPU
23
  def transcribe(inputs, previous_transcription):
24
  try:
 
 
 
 
 
25
  sample_rate, audio_data = inputs
26
 
27
- # Convert audio data to a NumPy array of floats normalized between -1 and 1
28
- audio_data = np.frombuffer(audio_data, dtype=np.int16).astype(np.float32) / 32768.0
29
 
30
- # Perform transcription
31
- transcription = pipe(audio_data,
32
- batch_size=BATCH_SIZE,
33
- generate_kwargs={"task": "transcribe"},
34
- return_timestamps=True)
35
 
36
- # Append new transcription to previous transcription
37
- previous_transcription += transcription["text"]
38
 
39
  return previous_transcription
40
  except Exception as e:
41
- print(f"Error during transcription: {e}")
42
- return previous_transcription
43
 
44
  with gr.Blocks() as demo:
45
  with gr.Column():
46
- gr.Markdown(f"# Realtime Whisper Large V3 Turbo: Transcribe Audio\n Transcribe inputs in Realtime. This Demo uses the checkpoint [{MODEL_NAME}](https://huggingface.co/{MODEL_NAME}) and 🤗 Transformers.")
47
  input_audio_microphone = gr.Audio(streaming=True)
48
  output = gr.Textbox(label="Transcription", value="")
49
 
50
  input_audio_microphone.stream(transcribe, [input_audio_microphone, output], [output], time_limit=45, stream_every=2, concurrency_limit=None)
51
 
52
- demo.queue().launch()
 
6
  import os
7
  import uuid
8
  import scipy.io.wavfile
 
9
 
10
  MODEL_NAME = "ylacombe/whisper-large-v3-turbo"
11
+ BATCH_SIZE = 16
12
  device = 0 if torch.cuda.is_available() else "cpu"
13
 
14
  pipe = pipeline(
 
21
  @spaces.GPU
22
  def transcribe(inputs, previous_transcription):
23
  try:
24
+ # Generate a unique filename Using UUID
25
+ filename = f"{uuid.uuid4().hex}.wav"
26
+ filepath = os.path.join(tempfile.gettempdir(), filename)
27
+
28
+ # Extract Sample Rate and Audio Data from the Tuple
29
  sample_rate, audio_data = inputs
30
 
31
+ # Save the Audio Data to the Temporary File
32
+ scipy.io.wavfile.write(filepath, sample_rate, audio_data)
33
 
34
+ # Transcribe the Audio
35
+ transcription = pipe(filepath, batch_size=BATCH_SIZE, generate_kwargs={"task": "transcribe"}, return_timestamps=True)["text"]
36
+ previous_transcription += transcription
 
 
37
 
38
+ # Remove the Temporary File after Transcription
39
+ os.remove(filepath)
40
 
41
  return previous_transcription
42
  except Exception as e:
43
+ print(f"Error during Transcription: {e}")
44
+ return previous Transcription
45
 
46
  with gr.Blocks() as demo:
47
  with gr.Column():
48
+ gr.Markdown(f"# Realtime Whisper Large V3 Turbo: Transcribe Audio\n Transcribe Inputs in Realtime. This Demo uses the Checkpoint [{MODEL_NAME}](https://huggingface.co/{MODEL_NAME}) and 🤗 Transformers.")
49
  input_audio_microphone = gr.Audio(streaming=True)
50
  output = gr.Textbox(label="Transcription", value="")
51
 
52
  input_audio_microphone.stream(transcribe, [input_audio_microphone, output], [output], time_limit=45, stream_every=2, concurrency_limit=None)
53
 
54
+ demo.queue(). launch()