script for updating OpenGNTGloss

This commit is contained in:
Eliran Wong 2018-08-09 13:42:39 +01:00 committed by GitHub
parent b305b6882c
commit 188adf921e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
1 changed files with 66 additions and 0 deletions

View File

@ -0,0 +1,66 @@
# This file is created for merging latest work on OpenGNTGloss and NET2Words into main database file OpenGNT.csv
import re
inputFile = 'OpenGNTGloss_NET2Words_updating.csv'
outputFile = 'OpenGNTGloss_NET2Words.csv'
databaseFile = 'OpenGNT.csv'
# export latest glosses
f = open(inputFile,'r')
newData = f.read()
f.close()
newData = re.sub('^.*?([^]*?)\t〈([^]*?).*?$', r'\2\t\1', newData, flags=re.M)
newData = re.sub('^.*?[^]\n', '', newData, flags=re.M)
f = open(outputFile,'w')
f.write(newData)
f.close()
# insert latest glosses into database
f = open(databaseFile,'r')
oldData = f.read()
f.close()
newData = newData + oldData
f = open(databaseFile,'w')
f.write(newData)
f.close()
# sort data
lines = open(databaseFile, 'r').readlines()
f = open(databaseFile, 'w')
for line in sorted(lines, key=lambda line: line.split()[0]):
f.write(line)
f.close()
# merge glosses
f = open(databaseFile,'r')
newData = f.read()
f.close()
newData = re.sub('^.*?\t(.*?)\n([^]*?[^]*?[^]*?[^]*?[^]*?[^]*?[^]*?[^]*?)([^]*?[^]*?)', r'\2\1', newData, flags=re.M)
f = open(databaseFile,'w')
f.write(newData)
f.close()
# clean up OpenGNTGloss_NET2Words.csv
f = open(outputFile,'r')
newData = f.read()
f.close()
newData = re.sub('', '', newData)
f = open(outputFile,'w')
f.write(newData)
f.close()