diff --git a/Makefile b/Makefile index 5abd252..dd530ee 100644 --- a/Makefile +++ b/Makefile @@ -21,10 +21,10 @@ $(VENV_NAME)/bin/activate: ${PYTHON} -m pip install -U pip ${PYTHON} -m pip install fairseq ${PYTHON} -m pip install scipy - ${PYTHON} -m pip install IPython ${PYTHON} -m pip install huggingface_hub ${PYTHON} -m pip install tensorboardX ${PYTHON} -m pip install g2p_en + ${PYTHON} -m pip install pylint touch $(VENV_NAME)/bin/activate tts: env diff --git a/tts.py b/tts.py index 2eecbe1..4a0bdde 100644 --- a/tts.py +++ b/tts.py @@ -1,15 +1,16 @@ -# rm test.wav; python tts.py; play test.wav -# Sources: -# — https://huggingface.co/facebook/fastspeech2-en-ljspeech -# — https://github.com/AI-Guru/arxiv-reader +""" +Text-to-speech synthesis (TTS) +Sources: +— https://huggingface.co/facebook/fastspeech2-en-ljspeech +— https://github.com/AI-Guru/arxiv-reader +""" import argparse from fairseq.checkpoint_utils import load_model_ensemble_and_task_from_hf_hub from fairseq.models.text_to_speech.hub_interface import TTSHubInterface import scipy import numpy as np -import IPython.display as ipd def main(): """ @@ -35,7 +36,7 @@ def main(): sentences = [] # Read input file - with open(args['input'], "r") as f: + with open(args['input'], "r", encoding="utf-8") as f: lines = f.readlines() # Convert to sentences @@ -65,7 +66,7 @@ def main(): line = line.replace(";", ". ") line = line.replace("?", "?. ") line = line.replace("(", ". ") - # line = line.replace(")", ". ") # TODO: Check immune system article + for x in line.split(". "): sentences.append(x.strip()) sentences.append("") @@ -81,7 +82,7 @@ def main(): sample = TTSHubInterface.get_model_input(task, text) wav, rate = TTSHubInterface.get_prediction(task, models[0], generator, sample) - + wav = wav.numpy() full_wave_file.extend(wav) @@ -89,4 +90,4 @@ def main(): scipy.io.wavfile.write(args['output'], rate, full_wave_file) if __name__ == "__main__": - main() \ No newline at end of file + main()