From d3d0b6b87e50ae9ed102eac245b1d01d7928d431 Mon Sep 17 00:00:00 2001 From: Eliran Wong Date: Fri, 17 Aug 2018 20:31:55 +0100 Subject: [PATCH] updated to align with new database format --- Script/updateOpenGNTGloss.py | 55 ++++++++++++++++++++++++++++++++++++ 1 file changed, 55 insertions(+) create mode 100644 Script/updateOpenGNTGloss.py diff --git a/Script/updateOpenGNTGloss.py b/Script/updateOpenGNTGloss.py new file mode 100644 index 0000000..8cc59cf --- /dev/null +++ b/Script/updateOpenGNTGloss.py @@ -0,0 +1,55 @@ +# This file is created for merging latest work on OpenGNTGloss and NET2Words into main database file OpenGNT.csv + +import re + +inputFile = 'OpenGNTGloss_NET2Words_updating.csv' +outputFile = 'OpenGNTGloss_NET2Words.csv' +databaseFile = 'OpenGNT.csv' + +# export latest glosses + +f = open(inputFile,'r') +newData = f.read() +f.close() + +newData = re.sub('^.*?{([^{}]*?)}\t〈([^|]*?)|.*?$', r'\2\t\1*', newData, flags=re.M) +newData = re.sub('^.*?[^*]\n', '', newData, flags=re.M) +newData = re.sub('*', '', newData) + +f = open(outputFile,'w') +f.write(newData) +f.close() + +# insert latest glosses into database + +f = open(databaseFile,'r') +oldData = f.read() +f.close() + +newData = newData + oldData + +f = open(databaseFile,'w') +f.write(newData) +f.close() + +# sort data + +lines = open(databaseFile, 'r').readlines() +f = open(databaseFile, 'w') + +for line in sorted(lines, key=lambda line: line.split()[0]): + f.write(line) + +f.close() + +# merge glosses + +f = open(databaseFile,'r') +newData = f.read() +f.close() + +newData = re.sub(r'^(.*?\t)(.*?)\n\1([^\n〔]*?〔[^\n〔]*?〔[^\n〔]*?〔[^\n〔]*?〔[^\n〔]*?〔[^\n〔]*?〔[^\n〔]*?〔[^\n|]*?|[^\n|]*?|)([^\n|]*?|[^\n|]*?)〕', r'\1\3\2〕', newData, flags=re.M) + +f = open(databaseFile,'w') +f.write(newData) +f.close()