2018-08-21 23:04:03 +00:00
|
|
|
|
import re
|
|
|
|
|
|
|
|
|
|
inputFile = 'berean_accented.csv'
|
|
|
|
|
outputFile = 'berean_unaccented.csv'
|
|
|
|
|
|
|
|
|
|
f = open(inputFile,'r')
|
|
|
|
|
newData = f.read()
|
|
|
|
|
f.close()
|
|
|
|
|
|
|
|
|
|
# Greek unicode characters
|
|
|
|
|
|
2019-03-18 21:25:01 +00:00
|
|
|
|
searchReplace = (
|
|
|
|
|
('[ἀἄᾄἂἆἁἅᾅἃάᾴὰᾶᾷᾳ]', 'α'),
|
|
|
|
|
('[ἈἌἎἉἍἋ]', 'Α'),
|
|
|
|
|
('[ἐἔἑἕἓέὲ]', 'ε'),
|
|
|
|
|
('[ἘἜἙἝἛ]', 'Ε'),
|
|
|
|
|
('[ἠἤᾔἢἦᾖᾐἡἥἣἧᾗᾑήῄὴῆῇῃ]', 'η'),
|
|
|
|
|
('[ἨἬἪἮἩἭἫ]', 'Η'),
|
|
|
|
|
('[ἰἴἶἱἵἳἷίὶῖϊΐῒ]', 'ι'),
|
|
|
|
|
('[ἸἼἹἽ]', 'Ι'),
|
|
|
|
|
('[ὀὄὂὁὅὃόὸ]', 'ο'),
|
|
|
|
|
('[ὈὌὉὍὋ]', 'Ο'),
|
|
|
|
|
('[ῥ]', 'ρ'),
|
|
|
|
|
('[Ῥ]', 'Ρ'),
|
|
|
|
|
('[ὐὔὒὖὑὕὓὗύὺῦϋΰῢ]', 'υ'),
|
|
|
|
|
('[ὙὝὟ]', 'Υ'),
|
|
|
|
|
('[ὠὤὢὦᾠὡὥὧᾧώῴὼῶῷῳ]', 'ω'),
|
|
|
|
|
('[ὨὬὪὮὩὭὯ]', 'Ω'),
|
|
|
|
|
("[\-\—\,\;\:\\\?\.\·\·\'\‘\’\‹\›\“\”\«\»\(\)\[\]\{\}\⧼\⧽\〈\〉\*\‿\᾽\⇔\¦]", ""),
|
|
|
|
|
)
|
|
|
|
|
for search, replace in searchReplace:
|
|
|
|
|
newData = re.sub(search, replace, newData)
|
2018-08-21 23:04:03 +00:00
|
|
|
|
|
|
|
|
|
f = open(outputFile,'w')
|
|
|
|
|
f.write(newData)
|
|
|
|
|
f.close()
|