From 3bf5181687766960dc4e39bedcf0ba70cb77ff61 Mon Sep 17 00:00:00 2001
From: Heiko J Schick <heiko.schick@huawei.com>
Date: Tue, 30 Aug 2022 09:56:27 +0200
Subject: [PATCH] Initial commit

---
 TTS.py | 59 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 59 insertions(+)
 create mode 100644 TTS.py
diff --git a/TTS.py b/TTS.py
new file mode 100644
index 0000000..632c6a4
--- /dev/null
+++ b/TTS.py
@@ -0,0 +1,59 @@
+# rm test.wav; python TTS.py; play test.wav
+
+# Sources:
+# — https://github.com/AI-Guru/arxiv-reader
+# — https://huggingface.co/facebook/fastspeech2-en-ljspeech
+
+from fairseq.checkpoint_utils import load_model_ensemble_and_task_from_hf_hub
+from fairseq.models.text_to_speech.hub_interface import TTSHubInterface
+import scipy
+import numpy as np
+import IPython.display as ipd
+import re
+
+
+models, cfg, task = load_model_ensemble_and_task_from_hf_hub(
+    "facebook/fastspeech2-en-ljspeech",
+    arg_overrides={"vocoder": "hifigan", "fp16": False}
+)
+
+TTSHubInterface.update_cfg_with_data_cfg(cfg, task.data_cfg)
+generator = task.build_generator(models, cfg)
+
+full_wave_file = []
+rate = 44100
+sentences = []
+
+# Read input file
+with open(f"input.txt", "r") as f:
+    lines = f.readlines()
+
+# Convert to sentences
+for line in lines:
+    line = line.replace("-", " - ")
+    line = line.replace("—", ". ")
+    line = line.replace(";", ". ")
+    for x in line.split(". "):
+        # print(x)
+        sentences.append(x.strip())
+        # print(sentences)
+        sentences.append("<PAUSE>")
+        # print(sentences)
+
+# Synthesis text
+for text in sentences:
+    if text == "":
+        continue
+
+    if text == "<PAUSE>":
+        full_wave_file.extend(np.zeros(rate))
+        continue
+
+    sample = TTSHubInterface.get_model_input(task, text)
+    wav, rate = TTSHubInterface.get_prediction(task, models[0], generator, sample)
+    
+    wav = wav.numpy()
+    full_wave_file.extend(wav)
+
+full_wave_file = np.array(full_wave_file, dtype=np.float32)
+scipy.io.wavfile.write("test.wav", rate, full_wave_file)