Spaces:

v-e-n-o-m
/

quran-transcription-api

Running

App Files Files Community

v-e-n-o-m commited on Apr 22

Commit

7ded65d

1 Parent(s): 7379e3f

deploy

Browse files

Files changed (3) hide show

Dockerfile +8 -0
app.py +112 -48
requirements.txt +5 -5

Dockerfile CHANGED Viewed

@@ -9,6 +9,11 @@ RUN apt-get update && apt-get install -y \
 # Set working directory
 WORKDIR /app
 # Copy requirements and install
 COPY requirements.txt .
 RUN pip install --no-cache-dir -r requirements.txt
@@ -16,6 +21,9 @@ RUN pip install --no-cache-dir -r requirements.txt
 # Copy application code
 COPY app.py .
 # Expose port
 EXPOSE 8000

 # Set working directory
 WORKDIR /app
+# Create cache and logs directories
+RUN mkdir -p /app/cache /app/logs && \
+    chown -R 1000:1000 /app/cache /app/logs && \
+    chmod -R 775 /app/cache /app/logs
 # Copy requirements and install
 COPY requirements.txt .
 RUN pip install --no-cache-dir -r requirements.txt
 # Copy application code
 COPY app.py .
+# Set environment variable for transformers cache
+ENV TRANSFORMERS_CACHE=/app/cache
 # Expose port
 EXPOSE 8000

app.py CHANGED Viewed

@@ -1,72 +1,136 @@
-from fastapi import FastAPI, File, UploadFile
 from transformers import pipeline, WhisperForConditionalGeneration, WhisperProcessor
 import torch
 import soundfile as sf
-import io
 import subprocess
 import tempfile
-import os
 app = FastAPI(title="Quran Transcription API")
 # Load model and processor
-model_id = "tarteel-ai/whisper-base-ar-quran"
-processor = WhisperProcessor.from_pretrained(model_id)
-model = WhisperForConditionalGeneration.from_pretrained(model_id)
-model.generation_config.no_timestamps_token_id = processor.tokenizer.convert_tokens_to_ids("<|notimestamps|>")
 # Initialize ASR pipeline
-asr = pipeline(
-    "automatic-speech-recognition",
-    model=model,
-    tokenizer=processor.tokenizer,
-    feature_extractor=processor.feature_extractor,
-    device=0 if torch.cuda.is_available() else -1
-)
 @app.post("/transcribe")
 async def transcribe_audio(file: UploadFile = File(...)):
     # Validate file type
     if not file.filename.lower().endswith(".mp3"):
-        return {"error": "Only MP3 files are supported"}
     # Read MP3 file
-    mp3_data = await file.read()
-    # Create temporary files for conversion
-    with tempfile.NamedTemporaryFile(suffix=".mp3", delete=False) as temp_mp3:
-        temp_mp3.write(mp3_data)
-        temp_mp3_path = temp_mp3.name
-    temp_wav_path = temp_mp3_path.replace(".mp3", ".wav")
-    try:
-        # Convert MP3 to 16 kHz mono WAV using ffmpeg
-        subprocess.run(
-            [
-                "ffmpeg",
-                "-i", temp_mp3_path,
-                "-ar", "16000",
-                "-ac", "1",
-                "-y",  # Overwrite output file if exists
-                temp_wav_path
-            ],
-            check=True,
-            capture_output=True
-        )
         # Read WAV file
-        audio, sample_rate = sf.read(temp_wav_path)
-        if sample_rate != 16000:
-            return {"error": "Converted audio is not 16 kHz"}
         # Transcribe
-        transcription = asr(audio, return_timestamps=False)["text"]
-        return {"transcription": transcription}
-    finally:
-        # Clean up temporary files
-        if os.path.exists(temp_mp3_path):
-            os.unlink(temp_mp3_path)
-        if os.path.exists(temp_wav_path):
-            os.unlink(temp_wav_path)

+import logging
+import os
+from fastapi import FastAPI, File, UploadFile, HTTPException
 from transformers import pipeline, WhisperForConditionalGeneration, WhisperProcessor
 import torch
 import soundfile as sf
 import subprocess
 import tempfile
+from contextlib import contextmanager
+# Configure logging
+logging.basicConfig(
+    level=logging.INFO,
+    format="%(asctime)s [%(levelname)s] %(message)s",
+    handlers=[
+        logging.FileHandler("/app/logs/app.log"),
+        logging.StreamHandler()
+    ]
+)
+logger = logging.getLogger(__name__)
+# Ensure cache directory exists
+os.makedirs("/app/cache", exist_ok=True)
+os.environ["TRANSFORMERS_CACHE"] = "/app/cache"
 app = FastAPI(title="Quran Transcription API")
 # Load model and processor
+try:
+    model_id = "tarteel-ai/whisper-base-ar-quran"
+    logger.info(f"Loading processor for model: {model_id}")
+    processor = WhisperProcessor.from_pretrained(model_id)
+    logger.info(f"Loading model: {model_id}")
+    model = WhisperForConditionalGeneration.from_pretrained(model_id)
+    model.generation_config.no_timestamps_token_id = processor.tokenizer.convert_tokens_to_ids("<|notimestamps|>")
+except Exception as e:
+    logger.error(f"Failed to load model: {str(e)}")
+    raise HTTPException(status_code=500, detail="Model loading failed")
 # Initialize ASR pipeline
+try:
+    logger.info("Initializing ASR pipeline")
+    asr = pipeline(
+        "automatic-speech-recognition",
+        model=model,
+        tokenizer=processor.tokenizer,
+        feature_extractor=processor.feature_extractor,
+        device=0 if torch.cuda.is_available() else -1
+    )
+except Exception as e:
+    logger.error(f"Failed to initialize ASR pipeline: {str(e)}")
+    raise HTTPException(status_code=500, detail="Pipeline initialization failed")
+@contextmanager
+def temporary_files():
+    """Context manager for creating and cleaning up temporary files."""
+    temp_mp3 = tempfile.NamedTemporaryFile(suffix=".mp3", delete=False)
+    temp_wav_path = temp_mp3.name.replace(".mp3", ".wav")
+    try:
+        yield temp_mp3, temp_wav_path
+    finally:
+        for path in [temp_mp3.name, temp_wav_path]:
+            if os.path.exists(path):
+                try:
+                    os.unlink(path)
+                    logger.debug(f"Deleted temporary file: {path}")
+                except Exception as e:
+                    logger.warning(f"Failed to delete temporary file {path}: {str(e)}")
 @app.post("/transcribe")
 async def transcribe_audio(file: UploadFile = File(...)):
+    logger.info(f"Received file: {file.filename}")
     # Validate file type
     if not file.filename.lower().endswith(".mp3"):
+        logger.error(f"Invalid file type: {file.filename}. Only MP3 is supported")
+        raise HTTPException(status_code=400, detail="Only MP3 files are supported")
     # Read MP3 file
+    try:
+        mp3_data = await file.read()
+        logger.debug(f"Read {len(mp3_data)} bytes from MP3 file")
+    except Exception as e:
+        logger.error(f"Failed to read MP3 file: {str(e)}")
+        raise HTTPException(status_code=500, detail="Failed to read audio file")
+    # Convert MP3 to WAV
+    with temporary_files() as (temp_mp3, temp_wav_path):
+        try:
+            temp_mp3.write(mp3_data)
+            temp_mp3.close()
+            logger.info(f"Saved MP3 to temporary file: {temp_mp3.name}")
+            # Convert to 16 kHz mono WAV using ffmpeg
+            logger.info(f"Converting MP3 to WAV: {temp_wav_path}")
+            result = subprocess.run(
+                [
+                    "ffmpeg",
+                    "-i", temp_mp3.name,
+                    "-ar", "16000",
+                    "-ac", "1",
+                    "-y",  # Overwrite output file if exists
+                    temp_wav_path
+                ],
+                check=True,
+                capture_output=True,
+                text=True
+            )
+            logger.debug(f"ffmpeg output: {result.stdout}")
+        except subprocess.CalledProcessError as e:
+            logger.error(f"ffmpeg conversion failed: {e.stderr}")
+            raise HTTPException(status_code=500, detail="Audio conversion failed")
+        except Exception as e:
+            logger.error(f"Unexpected error during conversion: {str(e)}")
+            raise HTTPException(status_code=500, detail="Unexpected error during conversion")
         # Read WAV file
+        try:
+            audio, sample_rate = sf.read(temp_wav_path)
+            logger.info(f"Read WAV file: {temp_wav_path}, sample rate: {sample_rate}")
+            if sample_rate != 16000:
+                logger.error(f"Invalid sample rate: {sample_rate}. Expected 16000 Hz")
+                raise HTTPException(status_code=400, detail="Converted audio is not 16 kHz")
+        except Exception as e:
+            logger.error(f"Failed to read WAV file: {str(e)}")
+            raise HTTPException(status_code=500, detail="Failed to read converted audio")
         # Transcribe
+        try:
+            logger.info("Starting transcription")
+            transcription = asr(audio, return_timestamps=False)["text"]
+            logger.info(f"Transcription completed: {transcription}")
+            return {"transcription": transcription}
+        except Exception as e:
+            logger.error(f"Transcription failed: {str(e)}")
+            raise HTTPException(status_code=500, detail="Transcription failed")

requirements.txt CHANGED Viewed

@@ -1,6 +1,6 @@
-fastapi==0.115.0
-uvicorn==0.30.6
-transformers==4.44.2
-torch==2.4.1
-soundfile==0.12.1
 python-multipart==0.0.9

+fastapi==0.115.0
+uvicorn==0.30.6
+transformers==4.44.2
+torch==2.4.1
+soundfile==0.12.1
 python-multipart==0.0.9