import os from typing import Optional, Tuple, List from faster_whisper import WhisperModel _model_singleton: Optional[WhisperModel] = None def get_model() -> WhisperModel: global _model_singleton if _model_singleton is None: model_size = os.getenv("WHISPER_MODEL_SIZE", "small") # CPU-friendly defaults; set compute_type="int8" for lower RAM _model_singleton = WhisperModel(model_size, device="cpu", compute_type="int8") return _model_singleton def transcribe_file(audio_path: str) -> Tuple[str, float]: """ Transcribe an audio file to text locally using faster-whisper. Returns (text, avg_logprob). """ model = get_model() segments, info = model.transcribe(audio_path, beam_size=1, vad_filter=True) text_parts: List[str] = [] for seg in segments: text_parts.append(seg.text.strip()) text = " ".join(part for part in text_parts if part) avg_logprob = getattr(info, "avg_logprob", 0.0) if info is not None else 0.0 return text.strip(), float(avg_logprob)