Initial commit

2025-05-21 21:03:52 +02:00
commit c47d3205a0
8 changed files with 328 additions and 0 deletions
@@ -0,0 +1,44 @@
+# Audio transcription logic using Faster Whisper
+
+from faster_whisper import WhisperModel
+
+def transcribe_audio(input_path, output_path, model_size="medium", turbo=False):
+    """
+    Transcribe an audio file using Faster Whisper and save the result as a .txt file.
+
+    Args:
+        input_path: Path to the audio file
+        output_path: Path to save the transcription
+        model_size: Size of the Whisper model to use
+        turbo: Whether to use int8_float16 compute type for faster inference
+    """
+    # Try compute types in order of preference
+    if turbo:
+        compute_types = ["int8_float16", "float16", "int8", "float32"]
+    else:
+        compute_types = ["float16", "int8", "float32"]
+
+    model = None
+    for compute_type in compute_types:
+        try:
+            print(f"Attempting to initialize model with compute_type: {compute_type}")
+            model = WhisperModel(model_size, compute_type=compute_type)
+            print(f"Successfully initialized model with compute_type: {compute_type}")
+            break
+        except ValueError as e:
+            if "compute type" in str(e) and compute_type != compute_types[-1]:
+                print(f"Warning: {compute_type} compute type not supported by your hardware.")
+                print(f"Trying next compute type...")
+            else:
+                raise
+
+    if model is None:
+        raise RuntimeError("Failed to initialize Whisper model with any compute type")
+
+    segments, info = model.transcribe(input_path)
+    transcription = ""
+    for segment in segments:
+        transcription += segment.text.strip() + "\n"
+    with open(output_path, "w", encoding="utf-8") as f:
+        f.write(transcription.strip())
+    return transcription.strip()