From 0c09e3a91e20901ebffe081e7c4e30ec5d2d3ffe Mon Sep 17 00:00:00 2001 From: Heiko J Schick Date: Wed, 31 Aug 2022 11:20:08 +0200 Subject: [PATCH] Added example handling for abbreviations and ccompound words. --- tts.py | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/tts.py b/tts.py index 9be9262..2eecbe1 100644 --- a/tts.py +++ b/tts.py @@ -40,13 +40,32 @@ def main(): # Convert to sentences for line in lines: + # abbreviations (in alphabetical order) + line = line.replace("%", "per cent") + line = line.replace("5G", "5 G") + line = line.replace("CO2", "C O 2") + line = line.replace("EUR", "Euro") + line = line.replace("II", "2") + line = line.replace("IBM", "I B M") + line = line.replace("IMF", "I M F") + line = line.replace("OECD", "O E C D") + line = line.replace("UN", "U N") + line = line.replace("USB", "U S B") + line = line.replace("WHO", "W H O") + line = line.replace("WTO", "W T O") + # compound words + line = line.replace("biotechnology", "bio technology") + line = line.replace("Coronavirus", "Corona virus") + line = line.replace("immunocompetence", "immuno competence") + # punctuation marks line = line.replace("-", " - ") line = line.replace("/", ", ") line = line.replace("—", ". ") line = line.replace(":", ". ") line = line.replace(";", ". ") + line = line.replace("?", "?. ") line = line.replace("(", ". ") - line = line.replace(")", ". ") + # line = line.replace(")", ". ") # TODO: Check immune system article for x in line.split(". "): sentences.append(x.strip()) sentences.append("")