From 188adf921ef6a0a3ec6b0ad323b37867644e50bc Mon Sep 17 00:00:00 2001 From: Eliran Wong Date: Thu, 9 Aug 2018 13:42:39 +0100 Subject: [PATCH] script for updating OpenGNTGloss --- Script/updateOpenGNTGloss.py | 66 ++++++++++++++++++++++++++++++++++++ 1 file changed, 66 insertions(+) create mode 100644 Script/updateOpenGNTGloss.py diff --git a/Script/updateOpenGNTGloss.py b/Script/updateOpenGNTGloss.py new file mode 100644 index 0000000..d95b692 --- /dev/null +++ b/Script/updateOpenGNTGloss.py @@ -0,0 +1,66 @@ +# This file is created for merging latest work on OpenGNTGloss and NET2Words into main database file OpenGNT.csv + +import re + +inputFile = 'OpenGNTGloss_NET2Words_updating.csv' +outputFile = 'OpenGNTGloss_NET2Words.csv' +databaseFile = 'OpenGNT.csv' + +# export latest glosses + +f = open(inputFile,'r') +newData = f.read() +f.close() + +newData = re.sub('^.*?{([^{}]*?)}\t〈([^|]*?)|.*?$', r'\2\t\1*', newData, flags=re.M) +newData = re.sub('^.*?[^*]\n', '', newData, flags=re.M) + +f = open(outputFile,'w') +f.write(newData) +f.close() + +# insert latest glosses into database + +f = open(databaseFile,'r') +oldData = f.read() +f.close() + +newData = newData + oldData + +f = open(databaseFile,'w') +f.write(newData) +f.close() + +# sort data + +lines = open(databaseFile, 'r').readlines() +f = open(databaseFile, 'w') + +for line in sorted(lines, key=lambda line: line.split()[0]): + f.write(line) + +f.close() + +# merge glosses + +f = open(databaseFile,'r') +newData = f.read() +f.close() + +newData = re.sub('^.*?\t(.*?)*\n([^〔]*?〔[^〔]*?〔[^〔]*?〔[^〔]*?〔[^〔]*?〔[^〔]*?〔[^|]*?|[^|]*?|)([^|]*?|[^|]*?)〕', r'\2\1〕', newData, flags=re.M) + +f = open(databaseFile,'w') +f.write(newData) +f.close() + +# clean up OpenGNTGloss_NET2Words.csv + +f = open(outputFile,'r') +newData = f.read() +f.close() + +newData = re.sub('*', '', newData) + +f = open(outputFile,'w') +f.write(newData) +f.close()