diff --git a/Script/removeGreekAccents.py b/Script/removeGreekAccents.py index 35bbd86..15a21bd 100644 --- a/Script/removeGreekAccents.py +++ b/Script/removeGreekAccents.py @@ -9,31 +9,27 @@ f.close() # Greek unicode characters -newData = re.sub('[ἀἄᾄἂἆἁἅᾅἃάᾴὰᾶᾷᾳ]', 'α', newData) -newData = re.sub('[ἈἌἎἉἍἋ]', 'Α', newData) - -newData = re.sub('[ἐἔἑἕἓέὲ]', 'ε', newData) -newData = re.sub('[ἘἜἙἝἛ]', 'Ε', newData) - -newData = re.sub('[ἠἤᾔἢἦᾖᾐἡἥἣἧᾗᾑήῄὴῆῇῃ]', 'η', newData) -newData = re.sub('[ἨἬἪἮἩἭἫ]', 'Η', newData) - -newData = re.sub('[ἰἴἶἱἵἳἷίὶῖϊΐῒ]', 'ι', newData) -newData = re.sub('[ἸἼἹἽ]', 'Ι', newData) - -newData = re.sub('[ὀὄὂὁὅὃόὸ]', 'ο', newData) -newData = re.sub('[ὈὌὉὍὋ]', 'Ο', newData) - -newData = re.sub('[ῥ]', 'ρ', newData) -newData = re.sub('[Ῥ]', 'Ρ', newData) - -newData = re.sub('[ὐὔὒὖὑὕὓὗύὺῦϋΰῢ]', 'υ', newData) -newData = re.sub('[ὙὝὟ]', 'Υ', newData) - -newData = re.sub('[ὠὤὢὦᾠὡὥὧᾧώῴὼῶῷῳ]', 'ω', newData) -newData = re.sub('[ὨὬὪὮὩὭὯ]', 'Ω', newData) - -newData = re.sub("[\-\—\,\;\:\\\?\.\·\·\'\‘\’\‹\›\“\”\«\»\(\)\[\]\{\}\⧼\⧽\〈\〉\*\‿\᾽\⇔\¦]", "", newData) +searchReplace = ( + ('[ἀἄᾄἂἆἁἅᾅἃάᾴὰᾶᾷᾳ]', 'α'), + ('[ἈἌἎἉἍἋ]', 'Α'), + ('[ἐἔἑἕἓέὲ]', 'ε'), + ('[ἘἜἙἝἛ]', 'Ε'), + ('[ἠἤᾔἢἦᾖᾐἡἥἣἧᾗᾑήῄὴῆῇῃ]', 'η'), + ('[ἨἬἪἮἩἭἫ]', 'Η'), + ('[ἰἴἶἱἵἳἷίὶῖϊΐῒ]', 'ι'), + ('[ἸἼἹἽ]', 'Ι'), + ('[ὀὄὂὁὅὃόὸ]', 'ο'), + ('[ὈὌὉὍὋ]', 'Ο'), + ('[ῥ]', 'ρ'), + ('[Ῥ]', 'Ρ'), + ('[ὐὔὒὖὑὕὓὗύὺῦϋΰῢ]', 'υ'), + ('[ὙὝὟ]', 'Υ'), + ('[ὠὤὢὦᾠὡὥὧᾧώῴὼῶῷῳ]', 'ω'), + ('[ὨὬὪὮὩὭὯ]', 'Ω'), + ("[\-\—\,\;\:\\\?\.\·\·\'\‘\’\‹\›\“\”\«\»\(\)\[\]\{\}\⧼\⧽\〈\〉\*\‿\᾽\⇔\¦]", ""), +) +for search, replace in searchReplace: + newData = re.sub(search, replace, newData) f = open(outputFile,'w') f.write(newData)