36 lines
987 B
Python
36 lines
987 B
Python
import os
|
|
from typing import Optional, Tuple, List
|
|
|
|
from faster_whisper import WhisperModel
|
|
|
|
_model_singleton: Optional[WhisperModel] = None
|
|
|
|
|
|
def get_model() -> WhisperModel:
|
|
global _model_singleton
|
|
if _model_singleton is None:
|
|
model_size = os.getenv("WHISPER_MODEL_SIZE", "small")
|
|
# CPU-friendly defaults; set compute_type="int8" for lower RAM
|
|
_model_singleton = WhisperModel(model_size, device="cpu", compute_type="int8")
|
|
return _model_singleton
|
|
|
|
|
|
def transcribe_file(audio_path: str) -> Tuple[str, float]:
|
|
"""
|
|
Transcribe an audio file to text locally using faster-whisper.
|
|
Returns (text, avg_logprob).
|
|
"""
|
|
model = get_model()
|
|
segments, info = model.transcribe(audio_path, beam_size=1, vad_filter=True)
|
|
|
|
text_parts: List[str] = []
|
|
for seg in segments:
|
|
text_parts.append(seg.text.strip())
|
|
|
|
text = " ".join(part for part in text_parts if part)
|
|
avg_logprob = getattr(info, "avg_logprob", 0.0) if info is not None else 0.0
|
|
return text.strip(), float(avg_logprob)
|
|
|
|
|
|
|