66 lines
1.9 KiB
Python
66 lines
1.9 KiB
Python
# rm test.wav; python tts.py; play test.wav
|
|
|
|
# Sources:
|
|
# — https://huggingface.co/facebook/fastspeech2-en-ljspeech
|
|
# — https://github.com/AI-Guru/arxiv-reader
|
|
|
|
from fairseq.checkpoint_utils import load_model_ensemble_and_task_from_hf_hub
|
|
from fairseq.models.text_to_speech.hub_interface import TTSHubInterface
|
|
import scipy
|
|
import numpy as np
|
|
import IPython.display as ipd
|
|
|
|
def main():
|
|
"""
|
|
Defined starting point of source code.
|
|
"""
|
|
|
|
models, cfg, task = load_model_ensemble_and_task_from_hf_hub(
|
|
"facebook/fastspeech2-en-ljspeech",
|
|
arg_overrides={"vocoder": "hifigan", "fp16": False}
|
|
)
|
|
|
|
TTSHubInterface.update_cfg_with_data_cfg(cfg, task.data_cfg)
|
|
generator = task.build_generator(models, cfg)
|
|
|
|
full_wave_file = []
|
|
rate = 44100
|
|
sentences = []
|
|
|
|
# Read input file
|
|
with open(f"input.txt", "r") as f:
|
|
lines = f.readlines()
|
|
|
|
# Convert to sentences
|
|
for line in lines:
|
|
line = line.replace("-", " - ")
|
|
line = line.replace("/", ", ")
|
|
line = line.replace("—", ". ")
|
|
line = line.replace(":", ". ")
|
|
line = line.replace(";", ". ")
|
|
line = line.replace("(", ". ")
|
|
line = line.replace(")", ". ")
|
|
for x in line.split(". "):
|
|
sentences.append(x.strip())
|
|
sentences.append("<PAUSE>")
|
|
|
|
# Synthesis text
|
|
for text in sentences:
|
|
if text == "":
|
|
continue
|
|
|
|
if text == "<PAUSE>":
|
|
full_wave_file.extend(np.zeros(rate))
|
|
continue
|
|
|
|
sample = TTSHubInterface.get_model_input(task, text)
|
|
wav, rate = TTSHubInterface.get_prediction(task, models[0], generator, sample)
|
|
|
|
wav = wav.numpy()
|
|
full_wave_file.extend(wav)
|
|
|
|
full_wave_file = np.array(full_wave_file, dtype=np.float32)
|
|
scipy.io.wavfile.write("test.wav", rate, full_wave_file)
|
|
|
|
if __name__ == "__main__":
|
|
main() |