Initial commit
This commit is contained in:
@@ -0,0 +1,59 @@
|
|||||||
|
# rm test.wav; python TTS.py; play test.wav
|
||||||
|
|
||||||
|
# Sources:
|
||||||
|
# — https://github.com/AI-Guru/arxiv-reader
|
||||||
|
# — https://huggingface.co/facebook/fastspeech2-en-ljspeech
|
||||||
|
|
||||||
|
from fairseq.checkpoint_utils import load_model_ensemble_and_task_from_hf_hub
|
||||||
|
from fairseq.models.text_to_speech.hub_interface import TTSHubInterface
|
||||||
|
import scipy
|
||||||
|
import numpy as np
|
||||||
|
import IPython.display as ipd
|
||||||
|
import re
|
||||||
|
|
||||||
|
|
||||||
|
models, cfg, task = load_model_ensemble_and_task_from_hf_hub(
|
||||||
|
"facebook/fastspeech2-en-ljspeech",
|
||||||
|
arg_overrides={"vocoder": "hifigan", "fp16": False}
|
||||||
|
)
|
||||||
|
|
||||||
|
TTSHubInterface.update_cfg_with_data_cfg(cfg, task.data_cfg)
|
||||||
|
generator = task.build_generator(models, cfg)
|
||||||
|
|
||||||
|
full_wave_file = []
|
||||||
|
rate = 44100
|
||||||
|
sentences = []
|
||||||
|
|
||||||
|
# Read input file
|
||||||
|
with open(f"input.txt", "r") as f:
|
||||||
|
lines = f.readlines()
|
||||||
|
|
||||||
|
# Convert to sentences
|
||||||
|
for line in lines:
|
||||||
|
line = line.replace("-", " - ")
|
||||||
|
line = line.replace("—", ". ")
|
||||||
|
line = line.replace(";", ". ")
|
||||||
|
for x in line.split(". "):
|
||||||
|
# print(x)
|
||||||
|
sentences.append(x.strip())
|
||||||
|
# print(sentences)
|
||||||
|
sentences.append("<PAUSE>")
|
||||||
|
# print(sentences)
|
||||||
|
|
||||||
|
# Synthesis text
|
||||||
|
for text in sentences:
|
||||||
|
if text == "":
|
||||||
|
continue
|
||||||
|
|
||||||
|
if text == "<PAUSE>":
|
||||||
|
full_wave_file.extend(np.zeros(rate))
|
||||||
|
continue
|
||||||
|
|
||||||
|
sample = TTSHubInterface.get_model_input(task, text)
|
||||||
|
wav, rate = TTSHubInterface.get_prediction(task, models[0], generator, sample)
|
||||||
|
|
||||||
|
wav = wav.numpy()
|
||||||
|
full_wave_file.extend(wav)
|
||||||
|
|
||||||
|
full_wave_file = np.array(full_wave_file, dtype=np.float32)
|
||||||
|
scipy.io.wavfile.write("test.wav", rate, full_wave_file)
|
||||||
Reference in New Issue
Block a user