From 3bf5181687766960dc4e39bedcf0ba70cb77ff61 Mon Sep 17 00:00:00 2001 From: Heiko J Schick Date: Tue, 30 Aug 2022 09:56:27 +0200 Subject: [PATCH] Initial commit --- TTS.py | 59 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 59 insertions(+) create mode 100644 TTS.py diff --git a/TTS.py b/TTS.py new file mode 100644 index 0000000..632c6a4 --- /dev/null +++ b/TTS.py @@ -0,0 +1,59 @@ +# rm test.wav; python TTS.py; play test.wav + +# Sources: +# — https://github.com/AI-Guru/arxiv-reader +# — https://huggingface.co/facebook/fastspeech2-en-ljspeech + +from fairseq.checkpoint_utils import load_model_ensemble_and_task_from_hf_hub +from fairseq.models.text_to_speech.hub_interface import TTSHubInterface +import scipy +import numpy as np +import IPython.display as ipd +import re + + +models, cfg, task = load_model_ensemble_and_task_from_hf_hub( + "facebook/fastspeech2-en-ljspeech", + arg_overrides={"vocoder": "hifigan", "fp16": False} +) + +TTSHubInterface.update_cfg_with_data_cfg(cfg, task.data_cfg) +generator = task.build_generator(models, cfg) + +full_wave_file = [] +rate = 44100 +sentences = [] + +# Read input file +with open(f"input.txt", "r") as f: + lines = f.readlines() + +# Convert to sentences +for line in lines: + line = line.replace("-", " - ") + line = line.replace("—", ". ") + line = line.replace(";", ". ") + for x in line.split(". "): + # print(x) + sentences.append(x.strip()) + # print(sentences) + sentences.append("") + # print(sentences) + +# Synthesis text +for text in sentences: + if text == "": + continue + + if text == "": + full_wave_file.extend(np.zeros(rate)) + continue + + sample = TTSHubInterface.get_model_input(task, text) + wav, rate = TTSHubInterface.get_prediction(task, models[0], generator, sample) + + wav = wav.numpy() + full_wave_file.extend(wav) + +full_wave_file = np.array(full_wave_file, dtype=np.float32) +scipy.io.wavfile.write("test.wav", rate, full_wave_file)