Spaces:

hamza2923
/

faster-whisper-transcription-fcm-api8

Running

App Files Files Community

hamza2923 commited on 20 days ago

Commit

071a691

verified ·

1 Parent(s): e56406d

Update app.py

Browse files

Files changed (1) hide show

app.py +195 -195

app.py CHANGED Viewed

@@ -1,196 +1,196 @@
-from flask import Flask, request, jsonify, Response
-from faster_whisper import WhisperModel
-import torch
-import io
-import time
-import datetime
-from threading import Semaphore
-import os
-from werkzeug.utils import secure_filename
-import tempfile
-from moviepy.editor import VideoFileClip
-import firebase_admin
-from firebase_admin import credentials, messaging  # Added for FCM
-app = Flask(__name__)
-# Configuration
-MAX_CONCURRENT_REQUESTS = 2
-MAX_FILE_DURATION = 60 * 30
-TEMPORARY_FOLDER = tempfile.gettempdir()
-ALLOWED_AUDIO_EXTENSIONS = {'mp3', 'wav', 'ogg', 'm4a', 'flac', 'aac', 'wma', 'opus', 'aiff'}
-ALLOWED_VIDEO_EXTENSIONS = {'mp4', 'avi', 'mov', 'mkv', 'webm', 'flv', 'wmv', 'mpeg', 'mpg', '3gp'}
-ALLOWED_EXTENSIONS = ALLOWED_AUDIO_EXTENSIONS.union(ALLOWED_VIDEO_EXTENSIONS)
-# Initialize Firebase Admin SDK using environment variables
-firebase_credentials = {
-    "type": "service_account",
-    "project_id": os.getenv("FIREBASE_PROJECT_ID"),
-    "private_key_id": os.getenv("FIREBASE_PRIVATE_KEY_ID"),
-    "private_key": os.getenv("FIREBASE_PRIVATE_KEY").replace("\\n", "\n"),
-    "client_email": os.getenv("FIREBASE_CLIENT_EMAIL"),
-    "client_id": os.getenv("FIREBASE_CLIENT_ID"),
-    "auth_uri": "https://accounts.google.com/o/oauth2/auth",
-    "token_uri": "https://oauth2.googleapis.com/token",
-    "auth_provider_x509_cert_url": "https://www.googleapis.com/oauth2/v1/certs",
-    "client_x509_cert_url": f"https://www.googleapis.com/robot/v1/metadata/x509/{os.getenv('FIREBASE_CLIENT_EMAIL')}"
-}
-cred = credentials.Certificate(firebase_credentials)
-firebase_admin.initialize_app(cred)
-# Device check for faster-whisper
-device = "cuda" if torch.cuda.is_available() else "cpu"
-compute_type = "float16" if device == "cuda" else "int8"
-print(f"Using device: {device} with compute_type: {compute_type}")
-# Faster Whisper setup
-beamsize = 2
-wmodel = WhisperModel(
-    "guillaumekln/faster-whisper-small",
-    device=device,
-    compute_type=compute_type,
-    download_root="./model_cache"
-)
-# Concurrency control
-request_semaphore = Semaphore(MAX_CONCURRENT_REQUESTS)
-active_requests = 0
-def allowed_file(filename):
-    return '.' in filename and \
-           filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS
-def cleanup_temp_files(*file_paths):
-    for file_path in file_paths:
-        try:
-            if file_path and os.path.exists(file_path):
-                os.remove(file_path)
-        except Exception as e:
-            print(f"Error cleaning up temp file {file_path}: {str(e)}")
-def extract_audio_from_video(video_path, output_audio_path):
-    try:
-        video = VideoFileClip(video_path)
-        if video.duration > MAX_FILE_DURATION:
-            video.close()
-            raise ValueError(f"Video duration exceeds {MAX_FILE_DURATION} seconds")
-        video.audio.write_audiofile(output_audio_path)
-        video.close()
-        return output_audio_path
-    except Exception as e:
-        raise Exception(f"Failed to extract audio from video: {str(e)}")
-def send_fcm_data_message(fcm_token, transcription, file_type, created_date, transcription_name):
-    """Send an FCM message with transcription details and a notification"""
-    try:
-        message = messaging.Message(
-            notification=messaging.Notification(
-                title=transcription_name,
-                body="Successfully downloaded"
-            ),
-            data={
-                'transcription': transcription,
-                'file_type': file_type,
-                'created_date': created_date,
-                'transcription_name': transcription_name
-            },
-            token=fcm_token
-        )
-        response = messaging.send(message)
-        print(f"FCM message sent: {response}")
-        return True
-    except Exception as e:
-        print(f"Error sending FCM message: {str(e)}")
-        return False
-@app.route("/health", methods=["GET"])
-def health_check():
-    return jsonify({
-        'status': 'API is running',
-        'timestamp': datetime.datetime.now().isoformat(),
-        'device': device,
-        'compute_type': compute_type,
-        'active_requests': active_requests,
-        'max_duration_supported': MAX_FILE_DURATION,
-        'supported_formats': list(ALLOWED_EXTENSIONS)
-    })
-@app.route("/status/busy", methods=["GET"])
-def server_busy():
-    is_busy = active_requests >= MAX_CONCURRENT_REQUESTS
-    return jsonify({
-        'is_busy': is_busy,
-        'active_requests': active_requests,
-        'max_capacity': MAX_CONCURRENT_REQUESTS
-    })
-@app.route("/whisper_transcribe", methods=["POST"])
-def transcribe():
-    global active_requests
-    if not request_semaphore.acquire(blocking=False):
-        return jsonify({'error': 'Server busy'}), 503
-    active_requests += 1
-    start_time = time.time()
-    temp_file_path = None
-    temp_audio_path = None
-    try:
-        if 'file' not in request.files or 'fcm_token' not in request.form:
-            return jsonify({'error': 'Missing file or FCM token'}), 400
-        file = request.files['file']
-        fcm_token = request.form['fcm_token']
-        created_date = request.form['created_date']
-        transcription_name = request.form['transcription_name']
-        if not (file and allowed_file(file.filename)):
-            return jsonify({'error': f'Invalid file format. Supported: {", ".join(ALLOWED_EXTENSIONS)}'}), 400
-        # Save uploaded file
-        temp_file_path = os.path.join(TEMPORARY_FOLDER, secure_filename(file.filename))
-        file.save(temp_file_path)
-        # Handle video/audio
-        file_extension = file.filename.rsplit('.', 1)[1].lower()
-        if file_extension in ALLOWED_VIDEO_EXTENSIONS:
-            temp_audio_path = os.path.join(TEMPORARY_FOLDER, f"temp_audio_{int(time.time())}.wav")
-            extract_audio_from_video(temp_file_path, temp_audio_path)
-            transcription_file = temp_audio_path
-        else:
-            transcription_file = temp_file_path
-        # Transcribe
-        segments, _ = wmodel.transcribe(
-            transcription_file,
-            beam_size=beamsize,
-            vad_filter=True,
-            without_timestamps=True,
-            compression_ratio_threshold=2.4,
-            word_timestamps=False
-        )
-        full_text = " ".join(segment.text for segment in segments)
-        file_type = 'video' if file_extension in ALLOWED_VIDEO_EXTENSIONS else 'audio'
-        # Send FCM data message
-    # Send FCM data message
-        send_fcm_data_message(fcm_token, full_text, file_type, created_date, transcription_name)
-        return jsonify({}), 200
-    except Exception as e:
-        return jsonify({'error': str(e)}), 500
-    finally:
-        cleanup_temp_files(temp_file_path, temp_audio_path)
-        active_requests -= 1
-        request_semaphore.release()
-        print(f"Processed in {time.time()-start_time:.2f}s (Active: {active_requests})")
-if __name__ == "__main__":
-    if not os.path.exists(TEMPORARY_FOLDER):
-        os.makedirs(TEMPORARY_FOLDER)
     app.run(host="0.0.0.0", port=7860, threaded=True)

+from flask import Flask, request, jsonify, Response
+from faster_whisper import WhisperModel
+import torch
+import io
+import time
+import datetime
+from threading import Semaphore
+import os
+from werkzeug.utils import secure_filename
+import tempfile
+from moviepy.editor import VideoFileClip
+import firebase_admin
+from firebase_admin import credentials, messaging  # Added for FCM
+app = Flask(__name__)
+# Configuration
+MAX_CONCURRENT_REQUESTS = 1
+MAX_FILE_DURATION = 60 * 30
+TEMPORARY_FOLDER = tempfile.gettempdir()
+ALLOWED_AUDIO_EXTENSIONS = {'mp3', 'wav', 'ogg', 'm4a', 'flac', 'aac', 'wma', 'opus', 'aiff'}
+ALLOWED_VIDEO_EXTENSIONS = {'mp4', 'avi', 'mov', 'mkv', 'webm', 'flv', 'wmv', 'mpeg', 'mpg', '3gp'}
+ALLOWED_EXTENSIONS = ALLOWED_AUDIO_EXTENSIONS.union(ALLOWED_VIDEO_EXTENSIONS)
+# Initialize Firebase Admin SDK using environment variables
+firebase_credentials = {
+    "type": "service_account",
+    "project_id": os.getenv("FIREBASE_PROJECT_ID"),
+    "private_key_id": os.getenv("FIREBASE_PRIVATE_KEY_ID"),
+    "private_key": os.getenv("FIREBASE_PRIVATE_KEY").replace("\\n", "\n"),
+    "client_email": os.getenv("FIREBASE_CLIENT_EMAIL"),
+    "client_id": os.getenv("FIREBASE_CLIENT_ID"),
+    "auth_uri": "https://accounts.google.com/o/oauth2/auth",
+    "token_uri": "https://oauth2.googleapis.com/token",
+    "auth_provider_x509_cert_url": "https://www.googleapis.com/oauth2/v1/certs",
+    "client_x509_cert_url": f"https://www.googleapis.com/robot/v1/metadata/x509/{os.getenv('FIREBASE_CLIENT_EMAIL')}"
+}
+cred = credentials.Certificate(firebase_credentials)
+firebase_admin.initialize_app(cred)
+# Device check for faster-whisper
+device = "cuda" if torch.cuda.is_available() else "cpu"
+compute_type = "float16" if device == "cuda" else "int8"
+print(f"Using device: {device} with compute_type: {compute_type}")
+# Faster Whisper setup
+beamsize = 2
+wmodel = WhisperModel(
+    "guillaumekln/faster-whisper-small",
+    device=device,
+    compute_type=compute_type,
+    download_root="./model_cache"
+)
+# Concurrency control
+request_semaphore = Semaphore(MAX_CONCURRENT_REQUESTS)
+active_requests = 0
+def allowed_file(filename):
+    return '.' in filename and \
+           filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS
+def cleanup_temp_files(*file_paths):
+    for file_path in file_paths:
+        try:
+            if file_path and os.path.exists(file_path):
+                os.remove(file_path)
+        except Exception as e:
+            print(f"Error cleaning up temp file {file_path}: {str(e)}")
+def extract_audio_from_video(video_path, output_audio_path):
+    try:
+        video = VideoFileClip(video_path)
+        if video.duration > MAX_FILE_DURATION:
+            video.close()
+            raise ValueError(f"Video duration exceeds {MAX_FILE_DURATION} seconds")
+        video.audio.write_audiofile(output_audio_path)
+        video.close()
+        return output_audio_path
+    except Exception as e:
+        raise Exception(f"Failed to extract audio from video: {str(e)}")
+def send_fcm_data_message(fcm_token, transcription, file_type, created_date, transcription_name):
+    """Send an FCM message with transcription details and a notification"""
+    try:
+        message = messaging.Message(
+            notification=messaging.Notification(
+                title=transcription_name,
+                body="Successfully downloaded"
+            ),
+            data={
+                'transcription': transcription,
+                'file_type': file_type,
+                'created_date': created_date,
+                'transcription_name': transcription_name
+            },
+            token=fcm_token
+        )
+        response = messaging.send(message)
+        print(f"FCM message sent: {response}")
+        return True
+    except Exception as e:
+        print(f"Error sending FCM message: {str(e)}")
+        return False
+@app.route("/health", methods=["GET"])
+def health_check():
+    return jsonify({
+        'status': 'API is running',
+        'timestamp': datetime.datetime.now().isoformat(),
+        'device': device,
+        'compute_type': compute_type,
+        'active_requests': active_requests,
+        'max_duration_supported': MAX_FILE_DURATION,
+        'supported_formats': list(ALLOWED_EXTENSIONS)
+    })
+@app.route("/status/busy", methods=["GET"])
+def server_busy():
+    is_busy = active_requests >= MAX_CONCURRENT_REQUESTS
+    return jsonify({
+        'is_busy': is_busy,
+        'active_requests': active_requests,
+        'max_capacity': MAX_CONCURRENT_REQUESTS
+    })
+@app.route("/whisper_transcribe", methods=["POST"])
+def transcribe():
+    global active_requests
+    if not request_semaphore.acquire(blocking=False):
+        return jsonify({'error': 'Server busy'}), 503
+    active_requests += 1
+    start_time = time.time()
+    temp_file_path = None
+    temp_audio_path = None
+    try:
+        if 'file' not in request.files or 'fcm_token' not in request.form:
+            return jsonify({'error': 'Missing file or FCM token'}), 400
+        file = request.files['file']
+        fcm_token = request.form['fcm_token']
+        created_date = request.form['created_date']
+        transcription_name = request.form['transcription_name']
+        if not (file and allowed_file(file.filename)):
+            return jsonify({'error': f'Invalid file format. Supported: {", ".join(ALLOWED_EXTENSIONS)}'}), 400
+        # Save uploaded file
+        temp_file_path = os.path.join(TEMPORARY_FOLDER, secure_filename(file.filename))
+        file.save(temp_file_path)
+        # Handle video/audio
+        file_extension = file.filename.rsplit('.', 1)[1].lower()
+        if file_extension in ALLOWED_VIDEO_EXTENSIONS:
+            temp_audio_path = os.path.join(TEMPORARY_FOLDER, f"temp_audio_{int(time.time())}.wav")
+            extract_audio_from_video(temp_file_path, temp_audio_path)
+            transcription_file = temp_audio_path
+        else:
+            transcription_file = temp_file_path
+        # Transcribe
+        segments, _ = wmodel.transcribe(
+            transcription_file,
+            beam_size=beamsize,
+            vad_filter=True,
+            without_timestamps=True,
+            compression_ratio_threshold=2.4,
+            word_timestamps=False
+        )
+        full_text = " ".join(segment.text for segment in segments)
+        file_type = 'video' if file_extension in ALLOWED_VIDEO_EXTENSIONS else 'audio'
+        # Send FCM data message
+    # Send FCM data message
+        send_fcm_data_message(fcm_token, full_text, file_type, created_date, transcription_name)
+        return jsonify({}), 200
+    except Exception as e:
+        return jsonify({'error': str(e)}), 500
+    finally:
+        cleanup_temp_files(temp_file_path, temp_audio_path)
+        active_requests -= 1
+        request_semaphore.release()
+        print(f"Processed in {time.time()-start_time:.2f}s (Active: {active_requests})")
+if __name__ == "__main__":
+    if not os.path.exists(TEMPORARY_FOLDER):
+        os.makedirs(TEMPORARY_FOLDER)
     app.run(host="0.0.0.0", port=7860, threaded=True)