# Audio transcription logic using Faster Whisper from faster_whisper import WhisperModel def transcribe_audio(input_path, output_path, model_size="medium", turbo=False): """ Transcribe an audio file using Faster Whisper and save the result as a .txt file. Args: input_path: Path to the audio file output_path: Path to save the transcription model_size: Size of the Whisper model to use turbo: Whether to use int8_float16 compute type for faster inference """ # Try compute types in order of preference if turbo: compute_types = ["int8_float16", "float16", "int8", "float32"] else: compute_types = ["float16", "int8", "float32"] model = None for compute_type in compute_types: try: print(f"Attempting to initialize model with compute_type: {compute_type}") model = WhisperModel(model_size, compute_type=compute_type) print(f"Successfully initialized model with compute_type: {compute_type}") break except ValueError as e: if "compute type" in str(e) and compute_type != compute_types[-1]: print(f"Warning: {compute_type} compute type not supported by your hardware.") print(f"Trying next compute type...") else: raise if model is None: raise RuntimeError("Failed to initialize Whisper model with any compute type") segments, info = model.transcribe(input_path) transcription = "" for segment in segments: transcription += segment.text.strip() + "\n" with open(output_path, "w", encoding="utf-8") as f: f.write(transcription.strip()) return transcription.strip()