Andley_OGNTa/Scripts/3 Add-CSB-Glosses.py

33 lines
664 B
Python
Raw Normal View History

2021-10-22 01:57:19 +00:00
import re
glosses = "./CSB-Glosses"
inputFile = "./OGNTa.tsv"
outputFile = "./OGNTa-TC.tsv"
# loading glosses into dictionary
gloss_dict = {}
gloss_file = open(glosses,'r',encoding="utf-8")
for Line in gloss_file:
key, value = Line.split()
gloss_dict [key] = value
gloss_file.close()
# loading OGNTa
f = open(inputFile,'r',encoding="utf-8")
Lines = f.readlines()
f.close()
# processing
f = open(outputFile,'w',encoding='utf-8')
# make file encoded "utf-8 with BOM"
2021-10-29 22:09:54 +00:00
#f.write('\ufeff')
2021-10-22 01:57:19 +00:00
for ol in Lines:
x = re.split ("\t", ol)
x[6] = gloss_dict.get(x[0], x[5])
f.write(x[0]+"\t"+x[1]+"\t"+x[2]+"\t"+x[3]+"\t"+x[4]+"\t"+x[5]+"\t"+x[6]+"\n")
f.close()