Andley_OGNTa/obsolete scripts/Trim OGNTa.py

# trim original OGNT to mininum data

import re

# ══════ trim interlinear ═══════════════════════

inputFile = "./source/OpenGNT_version3_3.csv"
outputFile = "./OGNTa-LN.txt"

# Trimming OGNT to minimum dataset required for interlinear
# 1-OGNTsort	2-TANTTsort	3-FEATURESsort1-	4-LevinsohnClauseID	5-OTquotation	〔6-BGBsortI｜7-LTsortI｜8-STsortI〕	〔9-Book｜10-Chapter｜11-Verse〕	〔12-OGNTk｜13-OGNTu｜14-OGNTa｜15-lexeme｜16-rmac｜17-sn〕	〔18-BDAGentry｜19-EDNTentry｜20-MounceEntry｜21-GoodrickKohlenbergerNumbers｜22-LN-LouwNidaNumbers〕	〔23-transSBLcap｜24-transSBL｜25-modernGreek｜26-Fonética_Transliteración〕	〔27-TBESG｜28-IT｜29-LT｜30-ST｜31-Español〕	〔32-PMpWord｜33-PMfWord〕	〔34-Note｜35-Mvar｜36-Mlexeme｜37-Mrmac｜38-Msn｜39-MTBESG〕

f = open(inputFile,'r',encoding="utf-8")
Lines = f.readlines()[1:]
f.close()

f = open(outputFile,'w',encoding="utf_8_sig")
for line in Lines:
	#print (line)
	line = line.replace('\t〔','\t')
	line = line.replace('〕\t','\t')
	line = line.replace('｜','\t')
	x = re.split("\t", line)
	#
	x[27] = x[27].replace(',','')
	x[27] = x[27].replace(';','')
	x[27] = x[27].replace('.','')
	x[27] = x[27].replace(' —','')
	x[27] = x[27].replace(':','')
	#
	x[31] = x[31].replace('<pm>','')
	x[31] = x[31].replace('</pm>','')
	x[31] = x[31].replace('[[','⟦')
	x[32] = x[32].replace('<pm>','')
	x[32] = x[32].replace('</pm>','')
	x[32] = x[32].replace(']]','⟧')
	#
	x[8] = x[8].replace('40', 'Mat')
	x[8] = x[8].replace('41', 'Mar')
	x[8] = x[8].replace('42', 'Luk')
	x[8] = x[8].replace('43', 'Joh')
	x[8] = x[8].replace('44', 'Act')
	x[8] = x[8].replace('45', 'Rom')
	x[8] = x[8].replace('46', '1Co')
	x[8] = x[8].replace('47', '2Co')
	x[8] = x[8].replace('48', 'Gal')
	x[8] = x[8].replace('49', 'Eph')
	x[8] = x[8].replace('50', 'Php')
	x[8] = x[8].replace('51', 'Col')
	x[8] = x[8].replace('52', '1Th')
	x[8] = x[8].replace('53', '2Th')
	x[8] = x[8].replace('54', '1Ti')
	x[8] = x[8].replace('55', '2Ti')
	x[8] = x[8].replace('56', 'Tit')
	x[8] = x[8].replace('57', 'Phm')
	x[8] = x[8].replace('58', 'Heb')
	x[8] = x[8].replace('59', 'Jas')
	x[8] = x[8].replace('60', '1Pe')
	x[8] = x[8].replace('61', '2Pe')
	x[8] = x[8].replace('62', '1Jo')
	x[8] = x[8].replace('63', '2Jo')
	x[8] = x[8].replace('64', '3Jo')
	x[8] = x[8].replace('65', 'Jud')
	x[8] = x[8].replace('66', 'Rev')
	#
	#f.write (x[0]+'\t'+x[8]+' '+x[9]+':'+x[10]+'\t'+x[31]+x[13]+x[32]+'\t'+x[14]+'\t'+x[21]+'\t'+x[15]+'\t'+x[27]+'\n')
	f.write (x[21]+'\n')

f.close()
-												udapted scripts

											
										
										
											2021-09-11 01:04:26 +00:00
+								# trim original OGNT to mininum data
-												update script

											
										
										
											2021-09-08 04:01:09 +00:00
 								import re
-												files clean up

											
										
										
											2021-12-09 01:29:52 +00:00
+								# ══════ trim interlinear ═══════════════════════
-												update structure

											
										
										
											2022-04-12 09:11:37 +00:00
+								inputFile = "./source/OpenGNT_version3_3.csv"
-												revert back to original mood (P/M/E/D/N/O)

											
										
										
											2022-05-10 02:18:28 +00:00
+								outputFile = "./OGNTa-LN.txt"
-												files clean up

											
										
										
											2021-12-09 01:29:52 +00:00
 								# Trimming OGNT to minimum dataset required for interlinear
 								# 1-OGNTsort	2-TANTTsort	3-FEATURESsort1-	4-LevinsohnClauseID	5-OTquotation	〔6-BGBsortI｜7-LTsortI｜8-STsortI〕	〔9-Book｜10-Chapter｜11-Verse〕	〔12-OGNTk｜13-OGNTu｜14-OGNTa｜15-lexeme｜16-rmac｜17-sn〕	〔18-BDAGentry｜19-EDNTentry｜20-MounceEntry｜21-GoodrickKohlenbergerNumbers｜22-LN-LouwNidaNumbers〕	〔23-transSBLcap｜24-transSBL｜25-modernGreek｜26-Fonética_Transliteración〕	〔27-TBESG｜28-IT｜29-LT｜30-ST｜31-Español〕	〔32-PMpWord｜33-PMfWord〕	〔34-Note｜35-Mvar｜36-Mlexeme｜37-Mrmac｜38-Msn｜39-MTBESG〕
-												update script

											
										
										
											2021-09-08 04:01:09 +00:00
-												udapted scripts

											
										
										
											2021-09-11 01:04:26 +00:00
+								f = open(inputFile,'r',encoding="utf-8")
-												files clean up

											
										
										
											2021-12-09 01:29:52 +00:00
+								Lines = f.readlines()[1:]
-												update script

											
										
										
											2021-09-08 04:01:09 +00:00
+								f.close()
-												files clean up

											
										
										
											2021-12-09 01:29:52 +00:00
+								f = open(outputFile,'w',encoding="utf_8_sig")
 								for line in Lines:
 									#print (line)
 									line = line.replace('\t〔','\t')
 									line = line.replace('〕\t','\t')
 									line = line.replace('｜','\t')
 									x = re.split("\t", line)
 									#
 									x[27] = x[27].replace(',','')
 									x[27] = x[27].replace(';','')
 									x[27] = x[27].replace('.','')
 									x[27] = x[27].replace(' —','')
 									x[27] = x[27].replace(':','')
 									#
 									x[31] = x[31].replace('<pm>','')
-												update structure

											
										
										
											2022-04-12 09:11:37 +00:00
+									x[31] = x[31].replace('</pm>','')
-												files clean up

											
										
										
											2021-12-09 01:29:52 +00:00
+									x[31] = x[31].replace('[[','⟦')
-												update structure

											
										
										
											2022-04-12 09:11:37 +00:00
+									x[32] = x[32].replace('<pm>','')
-												files clean up

											
										
										
											2021-12-09 01:29:52 +00:00
+									x[32] = x[32].replace('</pm>','')
 									x[32] = x[32].replace(']]','⟧')
 									#
 									x[8] = x[8].replace('40', 'Mat')
 									x[8] = x[8].replace('41', 'Mar')
 									x[8] = x[8].replace('42', 'Luk')
 									x[8] = x[8].replace('43', 'Joh')
 									x[8] = x[8].replace('44', 'Act')
 									x[8] = x[8].replace('45', 'Rom')
 									x[8] = x[8].replace('46', '1Co')
 									x[8] = x[8].replace('47', '2Co')
 									x[8] = x[8].replace('48', 'Gal')
 									x[8] = x[8].replace('49', 'Eph')
 									x[8] = x[8].replace('50', 'Php')
 									x[8] = x[8].replace('51', 'Col')
 									x[8] = x[8].replace('52', '1Th')
 									x[8] = x[8].replace('53', '2Th')
 									x[8] = x[8].replace('54', '1Ti')
 									x[8] = x[8].replace('55', '2Ti')
 									x[8] = x[8].replace('56', 'Tit')
 									x[8] = x[8].replace('57', 'Phm')
 									x[8] = x[8].replace('58', 'Heb')
 									x[8] = x[8].replace('59', 'Jas')
 									x[8] = x[8].replace('60', '1Pe')
 									x[8] = x[8].replace('61', '2Pe')
 									x[8] = x[8].replace('62', '1Jo')
 									x[8] = x[8].replace('63', '2Jo')
 									x[8] = x[8].replace('64', '3Jo')
 									x[8] = x[8].replace('65', 'Jud')
 									x[8] = x[8].replace('66', 'Rev')
 									#
-												revert back to original mood (P/M/E/D/N/O)

											
										
										
											2022-05-10 02:18:28 +00:00
+									#f.write (x[0]+'\t'+x[8]+' '+x[9]+':'+x[10]+'\t'+x[31]+x[13]+x[32]+'\t'+x[14]+'\t'+x[21]+'\t'+x[15]+'\t'+x[27]+'\n')
 									f.write (x[21]+'\n')
-												files clean up

											
										
										
											2021-12-09 01:29:52 +00:00
 								f.close()