Andley_OGNTa/obsolete scripts/Trim OGNTa.py

70 lines
2.5 KiB
Python
Raw Normal View History

2021-09-11 01:04:26 +00:00
# trim original OGNT to mininum data
2021-09-08 04:01:09 +00:00
import re
2021-12-09 01:29:52 +00:00
# ══════ trim interlinear ═══════════════════════
2022-04-12 09:11:37 +00:00
inputFile = "./source/OpenGNT_version3_3.csv"
outputFile = "./OGNTa-LN.txt"
2021-12-09 01:29:52 +00:00
# Trimming OGNT to minimum dataset required for interlinear
# 1-OGNTsort 2-TANTTsort 3-FEATURESsort1- 4-LevinsohnClauseID 5-OTquotation 6-BGBsortI7-LTsortI8-STsortI 9-Book10-Chapter11-Verse 12-OGNTk13-OGNTu14-OGNTa15-lexeme16-rmac17-sn 18-BDAGentry19-EDNTentry20-MounceEntry21-GoodrickKohlenbergerNumbers22-LN-LouwNidaNumbers 23-transSBLcap24-transSBL25-modernGreek26-Fonética_Transliteración 27-TBESG28-IT29-LT30-ST31-Español 32-PMpWord33-PMfWord 34-Note35-Mvar36-Mlexeme37-Mrmac38-Msn39-MTBESG
2021-09-08 04:01:09 +00:00
2021-09-11 01:04:26 +00:00
f = open(inputFile,'r',encoding="utf-8")
2021-12-09 01:29:52 +00:00
Lines = f.readlines()[1:]
2021-09-08 04:01:09 +00:00
f.close()
2021-12-09 01:29:52 +00:00
f = open(outputFile,'w',encoding="utf_8_sig")
for line in Lines:
#print (line)
line = line.replace('\t','\t')
line = line.replace('\t','\t')
line = line.replace('','\t')
x = re.split("\t", line)
#
x[27] = x[27].replace(',','')
x[27] = x[27].replace(';','')
x[27] = x[27].replace('.','')
x[27] = x[27].replace('','')
x[27] = x[27].replace(':','')
#
x[31] = x[31].replace('<pm>','')
2022-04-12 09:11:37 +00:00
x[31] = x[31].replace('</pm>','')
2021-12-09 01:29:52 +00:00
x[31] = x[31].replace('[[','')
2022-04-12 09:11:37 +00:00
x[32] = x[32].replace('<pm>','')
2021-12-09 01:29:52 +00:00
x[32] = x[32].replace('</pm>','')
x[32] = x[32].replace(']]','')
#
x[8] = x[8].replace('40', 'Mat')
x[8] = x[8].replace('41', 'Mar')
x[8] = x[8].replace('42', 'Luk')
x[8] = x[8].replace('43', 'Joh')
x[8] = x[8].replace('44', 'Act')
x[8] = x[8].replace('45', 'Rom')
x[8] = x[8].replace('46', '1Co')
x[8] = x[8].replace('47', '2Co')
x[8] = x[8].replace('48', 'Gal')
x[8] = x[8].replace('49', 'Eph')
x[8] = x[8].replace('50', 'Php')
x[8] = x[8].replace('51', 'Col')
x[8] = x[8].replace('52', '1Th')
x[8] = x[8].replace('53', '2Th')
x[8] = x[8].replace('54', '1Ti')
x[8] = x[8].replace('55', '2Ti')
x[8] = x[8].replace('56', 'Tit')
x[8] = x[8].replace('57', 'Phm')
x[8] = x[8].replace('58', 'Heb')
x[8] = x[8].replace('59', 'Jas')
x[8] = x[8].replace('60', '1Pe')
x[8] = x[8].replace('61', '2Pe')
x[8] = x[8].replace('62', '1Jo')
x[8] = x[8].replace('63', '2Jo')
x[8] = x[8].replace('64', '3Jo')
x[8] = x[8].replace('65', 'Jud')
x[8] = x[8].replace('66', 'Rev')
#
#f.write (x[0]+'\t'+x[8]+' '+x[9]+':'+x[10]+'\t'+x[31]+x[13]+x[32]+'\t'+x[14]+'\t'+x[21]+'\t'+x[15]+'\t'+x[27]+'\n')
f.write (x[21]+'\n')
2021-12-09 01:29:52 +00:00
f.close()