diff --git a/tts.py b/tts.py index 9be9262..2eecbe1 100644 --- a/tts.py +++ b/tts.py @@ -40,13 +40,32 @@ def main(): # Convert to sentences for line in lines: + # abbreviations (in alphabetical order) + line = line.replace("%", "per cent") + line = line.replace("5G", "5 G") + line = line.replace("CO2", "C O 2") + line = line.replace("EUR", "Euro") + line = line.replace("II", "2") + line = line.replace("IBM", "I B M") + line = line.replace("IMF", "I M F") + line = line.replace("OECD", "O E C D") + line = line.replace("UN", "U N") + line = line.replace("USB", "U S B") + line = line.replace("WHO", "W H O") + line = line.replace("WTO", "W T O") + # compound words + line = line.replace("biotechnology", "bio technology") + line = line.replace("Coronavirus", "Corona virus") + line = line.replace("immunocompetence", "immuno competence") + # punctuation marks line = line.replace("-", " - ") line = line.replace("/", ", ") line = line.replace("—", ". ") line = line.replace(":", ". ") line = line.replace(";", ". ") + line = line.replace("?", "?. ") line = line.replace("(", ". ") - line = line.replace(")", ". ") + # line = line.replace(")", ". ") # TODO: Check immune system article for x in line.split(". "): sentences.append(x.strip()) sentences.append("")