Added main function as entry point
This commit is contained in:
@@ -10,53 +10,57 @@ import scipy
|
|||||||
import numpy as np
|
import numpy as np
|
||||||
import IPython.display as ipd
|
import IPython.display as ipd
|
||||||
|
|
||||||
|
def main():
|
||||||
|
"""
|
||||||
|
Defined starting point of source code.
|
||||||
|
"""
|
||||||
|
|
||||||
models, cfg, task = load_model_ensemble_and_task_from_hf_hub(
|
models, cfg, task = load_model_ensemble_and_task_from_hf_hub(
|
||||||
"facebook/fastspeech2-en-ljspeech",
|
"facebook/fastspeech2-en-ljspeech",
|
||||||
arg_overrides={"vocoder": "hifigan", "fp16": False}
|
arg_overrides={"vocoder": "hifigan", "fp16": False}
|
||||||
)
|
)
|
||||||
|
|
||||||
TTSHubInterface.update_cfg_with_data_cfg(cfg, task.data_cfg)
|
TTSHubInterface.update_cfg_with_data_cfg(cfg, task.data_cfg)
|
||||||
generator = task.build_generator(models, cfg)
|
generator = task.build_generator(models, cfg)
|
||||||
|
|
||||||
full_wave_file = []
|
full_wave_file = []
|
||||||
rate = 44100
|
rate = 44100
|
||||||
sentences = []
|
sentences = []
|
||||||
|
|
||||||
# Read input file
|
# Read input file
|
||||||
with open(f"input.txt", "r") as f:
|
with open(f"input.txt", "r") as f:
|
||||||
lines = f.readlines()
|
lines = f.readlines()
|
||||||
|
|
||||||
# Convert to sentences
|
# Convert to sentences
|
||||||
for line in lines:
|
for line in lines:
|
||||||
line = line.replace("-", " - ")
|
line = line.replace("-", " - ")
|
||||||
line = line.replace("/", ", ")
|
line = line.replace("/", ", ")
|
||||||
line = line.replace("—", ". ")
|
line = line.replace("—", ". ")
|
||||||
line = line.replace(":", ". ")
|
line = line.replace(":", ". ")
|
||||||
line = line.replace(";", ". ")
|
line = line.replace(";", ". ")
|
||||||
line = line.replace("(", ". ")
|
line = line.replace("(", ". ")
|
||||||
line = line.replace(")", ". ")
|
line = line.replace(")", ". ")
|
||||||
for x in line.split(". "):
|
for x in line.split(". "):
|
||||||
# print(x)
|
sentences.append(x.strip())
|
||||||
sentences.append(x.strip())
|
sentences.append("<PAUSE>")
|
||||||
# print(sentences)
|
|
||||||
sentences.append("<PAUSE>")
|
|
||||||
# print(sentences)
|
|
||||||
|
|
||||||
# Synthesis text
|
# Synthesis text
|
||||||
for text in sentences:
|
for text in sentences:
|
||||||
if text == "":
|
if text == "":
|
||||||
continue
|
continue
|
||||||
|
|
||||||
if text == "<PAUSE>":
|
if text == "<PAUSE>":
|
||||||
full_wave_file.extend(np.zeros(rate))
|
full_wave_file.extend(np.zeros(rate))
|
||||||
continue
|
continue
|
||||||
|
|
||||||
sample = TTSHubInterface.get_model_input(task, text)
|
sample = TTSHubInterface.get_model_input(task, text)
|
||||||
wav, rate = TTSHubInterface.get_prediction(task, models[0], generator, sample)
|
wav, rate = TTSHubInterface.get_prediction(task, models[0], generator, sample)
|
||||||
|
|
||||||
wav = wav.numpy()
|
wav = wav.numpy()
|
||||||
full_wave_file.extend(wav)
|
full_wave_file.extend(wav)
|
||||||
|
|
||||||
full_wave_file = np.array(full_wave_file, dtype=np.float32)
|
full_wave_file = np.array(full_wave_file, dtype=np.float32)
|
||||||
scipy.io.wavfile.write("test.wav", rate, full_wave_file)
|
scipy.io.wavfile.write("test.wav", rate, full_wave_file)
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
Reference in New Issue
Block a user