import os os.environ["TRANSFORMERS_CACHE"] = "/tmp/hf-cache" # Important for Docker from fastapi import FastAPI, UploadFile, File from fastapi.middleware.cors import CORSMiddleware import torchaudio import torch from transformers import Wav2Vec2Processor, Wav2Vec2ForCTC import io app = FastAPI() # Allow all origins (for Flutter) app.add_middleware( CORSMiddleware, allow_origins=["*"], allow_methods=["*"], allow_headers=["*"], ) # Load model processor = Wav2Vec2Processor.from_pretrained("Mustafaa4a/ASR-Somali") model = Wav2Vec2ForCTC.from_pretrained("Mustafaa4a/ASR-Somali") @app.get("/") async def root(): return {"message": "Somali Speech-to-Text API is running."} @app.post("/transcribe") async def transcribe(file: UploadFile = File(...)): audio_bytes = await file.read() audio_stream = io.BytesIO(audio_bytes) waveform, sample_rate = torchaudio.load(audio_stream) if sample_rate != 16000: resampler = torchaudio.transforms.Resample(orig_freq=sample_rate, new_freq=16000) waveform = resampler(waveform) inputs = processor(waveform.squeeze(), sampling_rate=16000, return_tensors="pt") with torch.no_grad(): logits = model(**inputs).logits predicted_ids = torch.argmax(logits, dim=-1) transcription = processor.decode(predicted_ids[0]) return {"transcription": transcription}