45 lines
1.7 KiB
Python
45 lines
1.7 KiB
Python
# Audio transcription logic using Faster Whisper
|
|
|
|
from faster_whisper import WhisperModel
|
|
|
|
def transcribe_audio(input_path, output_path, model_size="medium", turbo=False):
|
|
"""
|
|
Transcribe an audio file using Faster Whisper and save the result as a .txt file.
|
|
|
|
Args:
|
|
input_path: Path to the audio file
|
|
output_path: Path to save the transcription
|
|
model_size: Size of the Whisper model to use
|
|
turbo: Whether to use int8_float16 compute type for faster inference
|
|
"""
|
|
# Try compute types in order of preference
|
|
if turbo:
|
|
compute_types = ["int8_float16", "float16", "int8", "float32"]
|
|
else:
|
|
compute_types = ["float16", "int8", "float32"]
|
|
|
|
model = None
|
|
for compute_type in compute_types:
|
|
try:
|
|
print(f"Attempting to initialize model with compute_type: {compute_type}")
|
|
model = WhisperModel(model_size, compute_type=compute_type)
|
|
print(f"Successfully initialized model with compute_type: {compute_type}")
|
|
break
|
|
except ValueError as e:
|
|
if "compute type" in str(e) and compute_type != compute_types[-1]:
|
|
print(f"Warning: {compute_type} compute type not supported by your hardware.")
|
|
print(f"Trying next compute type...")
|
|
else:
|
|
raise
|
|
|
|
if model is None:
|
|
raise RuntimeError("Failed to initialize Whisper model with any compute type")
|
|
|
|
segments, info = model.transcribe(input_path)
|
|
transcription = ""
|
|
for segment in segments:
|
|
transcription += segment.text.strip() + "\n"
|
|
with open(output_path, "w", encoding="utf-8") as f:
|
|
f.write(transcription.strip())
|
|
return transcription.strip()
|