OpenGNT/Script/removeGreekAccents.py

36 lines
1.1 KiB
Python
Raw Permalink Normal View History

2018-08-21 23:04:03 +00:00
import re
inputFile = 'berean_accented.csv'
outputFile = 'berean_unaccented.csv'
f = open(inputFile,'r')
newData = f.read()
f.close()
# Greek unicode characters
2019-03-18 21:25:01 +00:00
searchReplace = (
('[ἀἄᾄἂἆἁἅᾅἃάᾴὰᾶᾷᾳ]', 'α'),
('[ἈἌἎἉἍἋ]', 'Α'),
('[ἐἔἑἕἓέὲ]', 'ε'),
('[ἘἜἙἝἛ]', 'Ε'),
('[ἠἤᾔἢἦᾖᾐἡἥἣἧᾗᾑήῄὴῆῇῃ]', 'η'),
('[ἨἬἪἮἩἭἫ]', 'Η'),
('[ἰἴἶἱἵἳἷίὶῖϊΐῒ]', 'ι'),
('[ἸἼἹἽ]', 'Ι'),
('[ὀὄὂὁὅὃόὸ]', 'ο'),
('[ὈὌὉὍὋ]', 'Ο'),
('[ῥ]', 'ρ'),
('[Ῥ]', 'Ρ'),
('[ὐὔὒὖὑὕὓὗύὺῦϋΰῢ]', 'υ'),
('[ὙὝὟ]', 'Υ'),
('[ὠὤὢὦᾠὡὥὧᾧώῴὼῶῷῳ]', 'ω'),
('[ὨὬὪὮὩὭὯ]', 'Ω'),
("[\-\\,\;\:\\\?\.\·\·\'\\\\\\\«\»\(\)\[\]\{\}\\\\\*\\\\¦]", ""),
)
for search, replace in searchReplace:
newData = re.sub(search, replace, newData)
2018-08-21 23:04:03 +00:00
f = open(outputFile,'w')
f.write(newData)
f.close()