hamza2923 commited on
Commit
8a199a7
·
verified ·
1 Parent(s): 7b109f5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +55 -25
app.py CHANGED
@@ -1,4 +1,4 @@
1
- from flask import Flask, request, jsonify, Response # Add Response here
2
  from faster_whisper import WhisperModel
3
  import torch
4
  import io
@@ -8,14 +8,17 @@ from threading import Semaphore
8
  import os
9
  from werkzeug.utils import secure_filename
10
  import tempfile
 
11
 
12
  app = Flask(__name__)
13
 
14
  # Configuration
15
- MAX_CONCURRENT_REQUESTS = 2 # Adjust based on your server capacity
16
- MAX_AUDIO_DURATION = 60 * 30 # 30 minutes maximum audio duration (adjust as needed)
17
  TEMPORARY_FOLDER = tempfile.gettempdir()
18
- ALLOWED_EXTENSIONS = {'mp3', 'wav', 'ogg', 'm4a', 'flac'}
 
 
19
 
20
  # Device check for faster-whisper
21
  device = "cuda" if torch.cuda.is_available() else "cpu"
@@ -23,12 +26,12 @@ compute_type = "float16" if device == "cuda" else "int8"
23
  print(f"Using device: {device} with compute_type: {compute_type}")
24
 
25
  # Faster Whisper setup with optimized parameters for long audio
26
- beamsize = 2 # Slightly larger beam size can help with long-form accuracy
27
  wmodel = WhisperModel(
28
  "guillaumekln/faster-whisper-small",
29
  device=device,
30
  compute_type=compute_type,
31
- download_root="./model_cache" # Cache model to avoid re-downloading
32
  )
33
 
34
  # Concurrency control
@@ -39,13 +42,27 @@ def allowed_file(filename):
39
  return '.' in filename and \
40
  filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS
41
 
42
- def cleanup_temp_files(file_path):
43
  """Ensure temporary files are deleted after processing"""
 
 
 
 
 
 
 
 
 
44
  try:
45
- if os.path.exists(file_path):
46
- os.remove(file_path)
 
 
 
 
 
47
  except Exception as e:
48
- print(f"Error cleaning up temp file {file_path}: {str(e)}")
49
 
50
  @app.route("/health", methods=["GET"])
51
  def health_check():
@@ -56,7 +73,8 @@ def health_check():
56
  'device': device,
57
  'compute_type': compute_type,
58
  'active_requests': active_requests,
59
- 'max_duration_supported': MAX_AUDIO_DURATION
 
60
  })
61
 
62
  @app.route("/status/busy", methods=["GET"])
@@ -70,7 +88,7 @@ def server_busy():
70
  })
71
 
72
  @app.route("/whisper_transcribe", methods=["POST"])
73
- def whisper_transcribe():
74
  global active_requests
75
 
76
  if not request_semaphore.acquire(blocking=False):
@@ -79,38 +97,50 @@ def whisper_transcribe():
79
  active_requests += 1
80
  start_time = time.time()
81
  temp_file_path = None
 
82
 
83
  try:
84
- if 'audio' not in request.files:
85
  return jsonify({'error': 'No file provided'}), 400
86
 
87
- audio_file = request.files['audio']
88
- if not (audio_file and allowed_file(audio_file.filename)):
89
- return jsonify({'error': 'Invalid file format'}), 400
90
 
91
- temp_file_path = os.path.join(TEMPORARY_FOLDER, secure_filename(audio_file.filename))
92
- audio_file.save(temp_file_path)
 
 
 
 
 
 
 
 
 
 
93
 
 
94
  segments, _ = wmodel.transcribe(
95
- temp_file_path,
96
  beam_size=beamsize,
97
  vad_filter=True,
98
- without_timestamps=True, # Ensure timestamps are not included
99
  compression_ratio_threshold=2.4,
100
  word_timestamps=False
101
  )
102
 
103
  full_text = " ".join(segment.text for segment in segments)
104
- return jsonify({'transcription': full_text}), 200
105
-
106
-
 
107
 
108
  except Exception as e:
109
  return jsonify({'error': str(e)}), 500
110
 
111
  finally:
112
- if temp_file_path:
113
- cleanup_temp_files(temp_file_path)
114
  active_requests -= 1
115
  request_semaphore.release()
116
  print(f"Processed in {time.time()-start_time:.2f}s (Active: {active_requests})")
 
1
+ from flask import Flask, request, jsonify, Response
2
  from faster_whisper import WhisperModel
3
  import torch
4
  import io
 
8
  import os
9
  from werkzeug.utils import secure_filename
10
  import tempfile
11
+ from moviepy.editor import VideoFileClip # Added for video processing
12
 
13
  app = Flask(__name__)
14
 
15
  # Configuration
16
+ MAX_CONCURRENT_REQUESTS = 2 # Adjust based on server capacity
17
+ MAX_FILE_DURATION = 60 * 30 # 30 minutes maximum duration (adjust as needed)
18
  TEMPORARY_FOLDER = tempfile.gettempdir()
19
+ ALLOWED_AUDIO_EXTENSIONS = {'mp3', 'wav', 'ogg', 'm4a', 'flac', 'aac', 'wma', 'opus', 'aiff'}
20
+ ALLOWED_VIDEO_EXTENSIONS = {'mp4', 'avi', 'mov', 'mkv', 'webm', 'flv', 'wmv', 'mpeg', 'mpg', '3gp'}
21
+ ALLOWED_EXTENSIONS = ALLOWED_AUDIO_EXTENSIONS.union(ALLOWED_VIDEO_EXTENSIONS)
22
 
23
  # Device check for faster-whisper
24
  device = "cuda" if torch.cuda.is_available() else "cpu"
 
26
  print(f"Using device: {device} with compute_type: {compute_type}")
27
 
28
  # Faster Whisper setup with optimized parameters for long audio
29
+ beamsize = 2
30
  wmodel = WhisperModel(
31
  "guillaumekln/faster-whisper-small",
32
  device=device,
33
  compute_type=compute_type,
34
+ download_root="./model_cache"
35
  )
36
 
37
  # Concurrency control
 
42
  return '.' in filename and \
43
  filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS
44
 
45
+ def cleanup_temp_files(*file_paths):
46
  """Ensure temporary files are deleted after processing"""
47
+ for file_path in file_paths:
48
+ try:
49
+ if file_path and os.path.exists(file_path):
50
+ os.remove(file_path)
51
+ except Exception as e:
52
+ print(f"Error cleaning up temp file {file_path}: {str(e)}")
53
+
54
+ def extract_audio_from_video(video_path, output_audio_path):
55
+ """Extract audio from a video file and save it as a temporary audio file"""
56
  try:
57
+ video = VideoFileClip(video_path)
58
+ if video.duration > MAX_FILE_DURATION:
59
+ video.close()
60
+ raise ValueError(f"Video duration exceeds {MAX_FILE_DURATION} seconds")
61
+ video.audio.write_audiofile(output_audio_path)
62
+ video.close()
63
+ return output_audio_path
64
  except Exception as e:
65
+ raise Exception(f"Failed to extract audio from video: {str(e)}")
66
 
67
  @app.route("/health", methods=["GET"])
68
  def health_check():
 
73
  'device': device,
74
  'compute_type': compute_type,
75
  'active_requests': active_requests,
76
+ 'max_duration_supported': MAX_FILE_DURATION,
77
+ 'supported_formats': list(ALLOWED_EXTENSIONS)
78
  })
79
 
80
  @app.route("/status/busy", methods=["GET"])
 
88
  })
89
 
90
  @app.route("/whisper_transcribe", methods=["POST"])
91
+ def transcribe():
92
  global active_requests
93
 
94
  if not request_semaphore.acquire(blocking=False):
 
97
  active_requests += 1
98
  start_time = time.time()
99
  temp_file_path = None
100
+ temp_audio_path = None
101
 
102
  try:
103
+ if 'file' not in request.files:
104
  return jsonify({'error': 'No file provided'}), 400
105
 
106
+ file = request.files['file']
107
+ if not (file and allowed_file(file.filename)):
108
+ return jsonify({'error': f'Invalid file format. Supported: {", ".join(ALLOWED_EXTENSIONS)}'}), 400
109
 
110
+ # Save uploaded file to temporary location
111
+ temp_file_path = os.path.join(TEMPORARY_FOLDER, secure_filename(file.filename))
112
+ file.save(temp_file_path)
113
+
114
+ # Check if file is a video and extract audio if necessary
115
+ file_extension = file.filename.rsplit('.', 1)[1].lower()
116
+ if file_extension in ALLOWED_VIDEO_EXTENSIONS:
117
+ temp_audio_path = os.path.join(TEMPORARY_FOLDER, f"temp_audio_{int(time.time())}.wav")
118
+ extract_audio_from_video(temp_file_path, temp_audio_path)
119
+ transcription_file = temp_audio_path
120
+ else:
121
+ transcription_file = temp_file_path
122
 
123
+ # Transcribe the audio file
124
  segments, _ = wmodel.transcribe(
125
+ transcription_file,
126
  beam_size=beamsize,
127
  vad_filter=True,
128
+ without_timestamps=True,
129
  compression_ratio_threshold=2.4,
130
  word_timestamps=False
131
  )
132
 
133
  full_text = " ".join(segment.text for segment in segments)
134
+ return jsonify({
135
+ 'transcription': full_text,
136
+ 'file_type': 'video' if file_extension in ALLOWED_VIDEO_EXTENSIONS else 'audio'
137
+ }), 200
138
 
139
  except Exception as e:
140
  return jsonify({'error': str(e)}), 500
141
 
142
  finally:
143
+ cleanup_temp_files(temp_file_path, temp_audio_path)
 
144
  active_requests -= 1
145
  request_semaphore.release()
146
  print(f"Processed in {time.time()-start_time:.2f}s (Active: {active_requests})")