From e0e8b45b51673a4d9da3e83b8deac045e3aed04c Mon Sep 17 00:00:00 2001 From: Eliran Wong Date: Wed, 22 Aug 2018 00:04:03 +0100 Subject: [PATCH] Add files via upload --- Script/removeGreekAccents.py | 40 ++++++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) create mode 100644 Script/removeGreekAccents.py diff --git a/Script/removeGreekAccents.py b/Script/removeGreekAccents.py new file mode 100644 index 0000000..35bbd86 --- /dev/null +++ b/Script/removeGreekAccents.py @@ -0,0 +1,40 @@ +import re + +inputFile = 'berean_accented.csv' +outputFile = 'berean_unaccented.csv' + +f = open(inputFile,'r') +newData = f.read() +f.close() + +# Greek unicode characters + +newData = re.sub('[ἀἄᾄἂἆἁἅᾅἃάᾴὰᾶᾷᾳ]', 'α', newData) +newData = re.sub('[ἈἌἎἉἍἋ]', 'Α', newData) + +newData = re.sub('[ἐἔἑἕἓέὲ]', 'ε', newData) +newData = re.sub('[ἘἜἙἝἛ]', 'Ε', newData) + +newData = re.sub('[ἠἤᾔἢἦᾖᾐἡἥἣἧᾗᾑήῄὴῆῇῃ]', 'η', newData) +newData = re.sub('[ἨἬἪἮἩἭἫ]', 'Η', newData) + +newData = re.sub('[ἰἴἶἱἵἳἷίὶῖϊΐῒ]', 'ι', newData) +newData = re.sub('[ἸἼἹἽ]', 'Ι', newData) + +newData = re.sub('[ὀὄὂὁὅὃόὸ]', 'ο', newData) +newData = re.sub('[ὈὌὉὍὋ]', 'Ο', newData) + +newData = re.sub('[ῥ]', 'ρ', newData) +newData = re.sub('[Ῥ]', 'Ρ', newData) + +newData = re.sub('[ὐὔὒὖὑὕὓὗύὺῦϋΰῢ]', 'υ', newData) +newData = re.sub('[ὙὝὟ]', 'Υ', newData) + +newData = re.sub('[ὠὤὢὦᾠὡὥὧᾧώῴὼῶῷῳ]', 'ω', newData) +newData = re.sub('[ὨὬὪὮὩὭὯ]', 'Ω', newData) + +newData = re.sub("[\-\—\,\;\:\\\?\.\·\·\'\‘\’\‹\›\“\”\«\»\(\)\[\]\{\}\⧼\⧽\〈\〉\*\‿\᾽\⇔\¦]", "", newData) + +f = open(outputFile,'w') +f.write(newData) +f.close() \ No newline at end of file