Added example handling for abbreviations and ccompound words.
This commit is contained in:
@@ -40,13 +40,32 @@ def main():
|
||||
|
||||
# Convert to sentences
|
||||
for line in lines:
|
||||
# abbreviations (in alphabetical order)
|
||||
line = line.replace("%", "per cent")
|
||||
line = line.replace("5G", "5 G")
|
||||
line = line.replace("CO2", "C O 2")
|
||||
line = line.replace("EUR", "Euro")
|
||||
line = line.replace("II", "2")
|
||||
line = line.replace("IBM", "I B M")
|
||||
line = line.replace("IMF", "I M F")
|
||||
line = line.replace("OECD", "O E C D")
|
||||
line = line.replace("UN", "U N")
|
||||
line = line.replace("USB", "U S B")
|
||||
line = line.replace("WHO", "W H O")
|
||||
line = line.replace("WTO", "W T O")
|
||||
# compound words
|
||||
line = line.replace("biotechnology", "bio technology")
|
||||
line = line.replace("Coronavirus", "Corona virus")
|
||||
line = line.replace("immunocompetence", "immuno competence")
|
||||
# punctuation marks
|
||||
line = line.replace("-", " - ")
|
||||
line = line.replace("/", ", ")
|
||||
line = line.replace("—", ". ")
|
||||
line = line.replace(":", ". ")
|
||||
line = line.replace(";", ". ")
|
||||
line = line.replace("?", "?. ")
|
||||
line = line.replace("(", ". ")
|
||||
line = line.replace(")", ". ")
|
||||
# line = line.replace(")", ". ") # TODO: Check immune system article
|
||||
for x in line.split(". "):
|
||||
sentences.append(x.strip())
|
||||
sentences.append("<PAUSE>")
|
||||
|
||||
Reference in New Issue
Block a user