Files
mknotes/src/transcriber.py
T
2025-05-21 21:03:52 +02:00

45 lines
1.7 KiB
Python

# Audio transcription logic using Faster Whisper
from faster_whisper import WhisperModel
def transcribe_audio(input_path, output_path, model_size="medium", turbo=False):
"""
Transcribe an audio file using Faster Whisper and save the result as a .txt file.
Args:
input_path: Path to the audio file
output_path: Path to save the transcription
model_size: Size of the Whisper model to use
turbo: Whether to use int8_float16 compute type for faster inference
"""
# Try compute types in order of preference
if turbo:
compute_types = ["int8_float16", "float16", "int8", "float32"]
else:
compute_types = ["float16", "int8", "float32"]
model = None
for compute_type in compute_types:
try:
print(f"Attempting to initialize model with compute_type: {compute_type}")
model = WhisperModel(model_size, compute_type=compute_type)
print(f"Successfully initialized model with compute_type: {compute_type}")
break
except ValueError as e:
if "compute type" in str(e) and compute_type != compute_types[-1]:
print(f"Warning: {compute_type} compute type not supported by your hardware.")
print(f"Trying next compute type...")
else:
raise
if model is None:
raise RuntimeError("Failed to initialize Whisper model with any compute type")
segments, info = model.transcribe(input_path)
transcription = ""
for segment in segments:
transcription += segment.text.strip() + "\n"
with open(output_path, "w", encoding="utf-8") as f:
f.write(transcription.strip())
return transcription.strip()