script for updating OpenGNTGloss & NET2Words

This commit is contained in:
Eliran Wong 2018-08-10 17:59:26 +01:00 committed by GitHub
parent 41c947f5bd
commit dc2c1e46ad
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
1 changed files with 55 additions and 0 deletions

View File

@ -0,0 +1,55 @@
# This file is created for merging latest work on OpenGNTGloss and NET2Words into main database file OpenGNT.csv
import re
inputFile = 'OpenGNTGloss_NET2Words_updating.csv'
outputFile = 'OpenGNTGloss_NET2Words.csv'
databaseFile = 'OpenGNT.csv'
# export latest glosses
f = open(inputFile,'r')
newData = f.read()
f.close()
newData = re.sub('^.*?([^]*?)\t〈([^]*?).*?$', r'\2\t\1', newData, flags=re.M)
newData = re.sub('^.*?[^]\n', '', newData, flags=re.M)
newData = re.sub('', '', newData)
f = open(outputFile,'w')
f.write(newData)
f.close()
# insert latest glosses into database
f = open(databaseFile,'r')
oldData = f.read()
f.close()
newData = newData + oldData
f = open(databaseFile,'w')
f.write(newData)
f.close()
# sort data
lines = open(databaseFile, 'r').readlines()
f = open(databaseFile, 'w')
for line in sorted(lines, key=lambda line: line.split()[0]):
f.write(line)
f.close()
# merge glosses
f = open(databaseFile,'r')
newData = f.read()
f.close()
newData = re.sub(r'^(.*?\t)(.*?)\n\1([^\n]*?[^\n]*?[^\n]*?[^\n]*?[^\n]*?[^\n]*?[^\n]*?[^\n]*?)([^\n]*?[^\n]*?)', r'\1\3\2', newData, flags=re.M)
f = open(databaseFile,'w')
f.write(newData)
f.close()