Andley_OGNTa/Scripts/2 Update.py

244 lines
5.5 KiB
Python

# Update all corrections to OGNT
import re
inputFile = "./tmp/OGNT-trimmed.txt"
outputFile = "./OGNTa.txt"
# modifications to morphological code
ChangeList = (
('001339', 'V-IAI⁞AAI-3S'),
('001446', 'N-NSF⁞VSF'),
('001449', 'N-NSF⁞VSF'),
('001456', 'N-NSF⁞VSF-L'),
('002311', 'D-GPN⁞GPM'),
('002313', 'T-GSM⁞GSN'),
('002314', 'A-GSM⁞GSN'),
('002332', 'T-DSN⁞DSM'),
('002729', 'T-GSM⁞GSN'),
('018408', 'A-NSF-L'),
('044656', 'A-NSM'),
('044681', 'A-NSM'),
('07490', 'V-IAI⁞AAI-3S'),
('010230', 'V-IAI⁞AAI-3S'),
('011277', 'V-IAI⁞AAI-3S'),
('012521', 'V-IAI⁞AAI-3S'),
('013408', 'V-IAI⁞AAI-3S'),
('014321', 'N-NPM⁞NPF'),
('014722', 'V-PAI⁞PAM-2P'),
('014870', 'V-PAI⁞PAM-2P'),
('015295', 'V-IAI⁞AAI-3S'),
('015338', 'V-IAI⁞AAI-3S'),
('015856', 'A-ASM⁞ASF⁞ASN'),
('016276', 'V-IAI⁞AAI-3S'),
('017996', 'V-IAI⁞AAI-3S'),
('023937', 'V-IAI⁞AAI-3S'),
('024376', 'V-IAI⁞AAI-3S'),
('024873', 'V-IAI⁞AAI-3S'),
('025028', 'V-IAI⁞AAI-3S'),
('026430', 'V-IAI⁞AAI-3S'),
('027324', 'V-PAI⁞PAM-2P'),
('027922', 'V-IAI⁞AAI-3S'),
('030929', 'V-AAI⁞IAI-3S'),
('061790', 'T-GSM⁞GSN'),
('061797', 'T-GSM⁞GSN'),
('111572', 'T-GSM⁞GSN'),
('058604', 'V-PAI⁞PAM-2P'),
('059844', 'V-PAI⁞PAM-2P'),
('059851', 'V-PAI⁞PAM-2P'),
('060735', 'V-PAI⁞PAM-2P'),
('060772', 'V-PAI⁞PAM-2P'),
('040274', 'V-PAI⁞PAM-2P'),
('046258', 'V-PAI⁞PAM-2P'),
('052654', 'V-PAI⁞PAM-2P'),
('081192', 'V-PAI⁞PAM-2P'),
('090732', 'V-PAI⁞PAM-2P'),
('094554', 'V-PAI⁞PAM-2P'),
('102458', 'V-PAI⁞PAM-2P'),
('109852', 'V-PAI⁞PAM-2P'),
('117367', 'V-PAI⁞PAM-2P'),
('120546', 'V-PAI⁞PAM-2P'),
('125856', 'V-PAI⁞PAM-2P'),
('125884', 'V-PAI⁞PAM-2P'),
('126387', 'V-PAI⁞PAM-2P'),
('071398', 'V-IAI⁞AAI-3S'),
('075568', 'V-IAI⁞AAI-3S'),
('079686', 'V-IAI⁞AAI-3S'),
('044497', 'ADV'),
('122411', 'V-PNI⁞PNM-2P'),
('122457', 'V-PNI⁞PNM-2P'),
('122784', 'V-PPI⁞PPM-2P'),
('044085', 'N-ASF⁞ASM'),
('045957', 'N-NPM⁞NPF'),
('049236', 'V-PNP-ASM⁞NSN'),
('050688', 'V-PAS⁞AAS-3S'),
('050951', 'A-GPN⁞GPM'),
('050973', 'A-GPN⁞GPM'),
('053457', 'V-AAS⁞FAI-1S'),
('105014', 'A-GPM⁞GPN'),
('105017', 'A-GPM⁞GPN'),
('105020', 'A-GPM⁞GPN'),
('055128', 'V-PAS⁞AAS-1S'),
('055267', 'T-GPN⁞GPM'),
('055272', 'T-GPN⁞GPM'),
('066321', 'D-DSN⁞DSM'),
('085145', 'A-GSM⁞GSN'),
('085139', 'A-GSM⁞GSN'),
('071563', 'A-GPM⁞GPN'),
('085394', 'A-GSN⁞GSM'),
('085404', 'A-GSN⁞GSM'),
('086768', 'A-APN⁞NPN'),
('125446', 'P-DSM⁞DSN'),
('124694', 'D-DSM⁞DSN'),
('124690', 'R-DSM⁞DSN'),
('124062', 'R-GPN⁞GPF'),
('122410', 'R-DSM⁞DSN'),
('120232', 'A-DPM⁞DPN'),
('120005', 'V-RAI⁞RAM-2P'),
('118918', 'N-NSM⁞NSN'),
('116068', 'A-GSM⁞GSN'),
('108996', 'A-DPN⁞DPM'),
('105687', 'A-GPN⁞GPM'),
('105496', 'V-RAM⁞RAI-2P'),
('103687', 'V-PAS⁞AAS-3S'),
('100496', 'V-PMI⁞PMM-2P'),
('099999', 'D-ASN⁞NSN'),
('095378', 'D-GPN⁞GPF'),
('095354', 'N-ASN⁞NSN'),
('092950', 'A-NSF⁞NSM'),
('091068', 'A-DPN⁞DPM'),
('088635', 'T-DPM⁞DPN'),
('088636', 'A-DPM⁞DPN'),
('088062', 'V-FAI⁞AAS-1S'),
('088057', 'V-FAI⁞AAS-1S'),
('106998', 'V-PEI⁞PEM-2P'),
('107119', 'A-ASN'),
('030349', 'CONJ'),
('053269', 'CONJ'),
('053447', 'CONJ'),
('053471', 'CONJ'),
('058273', 'CONJ'),
('059756', 'CONJ'),
('059762', 'CONJ'),
('060562', 'CONJ'),
('060630', 'CONJ'),
('060727', 'CONJ'),
('061545', 'CONJ'),
('061569', 'CONJ'),
('062758', 'CONJ'),
('075931', 'CONJ'),
('089866', 'CONJ'),
('106424', 'CONJ'),
('109400', 'CONJ'),
('112077', 'CONJ'),
('126106', 'CONJ'),
('126321', 'CONJ'),
('126803', 'CONJ'),
('126862', 'CONJ'),
('127342', 'CONJ'),
('127356', 'CONJ'),
('127530', 'CONJ'),
('016647', 'CONJ'),
('019328', 'CONJ'),
('023926', 'CONJ'),
('024224', 'CONJ'),
('025128', 'CONJ'),
('033721', 'CONJ'),
('038041', 'CONJ'),
('038188', 'CONJ'),
('040275', 'CONJ'),
('047331', 'CONJ'),
('048999', 'CONJ'),
('051666', 'CONJ'),
('056390', 'CONJ'),
('056465', 'CONJ'),
('060880', 'CONJ'),
('062139', 'CONJ'),
('063761', 'CONJ'),
('064700', 'CONJ'),
('075553', 'CONJ'),
('078184', 'CONJ'),
('078198', 'CONJ'),
('078239', 'CONJ'),
('078863', 'CONJ'),
('080479', 'CONJ'),
('080603', 'CONJ'),
('083101', 'CONJ'),
('083744', 'CONJ'),
('083812', 'CONJ'),
('085551', 'CONJ'),
('086230', 'CONJ'),
('086976', 'CONJ'),
('087050', 'CONJ'),
('087551', 'CONJ'),
('088251', 'CONJ'),
('088917', 'CONJ'),
('090521', 'CONJ'),
('090738', 'CONJ'),
('092764', 'CONJ'),
('094529', 'CONJ'),
('096028', 'CONJ'),
('096039', 'CONJ'),
('096042', 'CONJ'),
('096052', 'CONJ'),
('096692', 'CONJ'),
('098757', 'CONJ'),
('099629', 'CONJ'),
('099918', 'CONJ'),
('100292', 'CONJ'),
('100354', 'CONJ'),
('100649', 'CONJ'),
('101833', 'CONJ'),
('105498', 'CONJ'),
('106359', 'CONJ'),
('106596', 'CONJ'),
('106726', 'CONJ'),
('107012', 'CONJ'),
('107088', 'CONJ'),
('110591', 'CONJ'),
('111689', 'CONJ'),
('111912', 'CONJ'),
('112009', 'CONJ'),
('113652', 'CONJ'),
('114072', 'CONJ'),
('122152', 'CONJ'),
('124363', 'CONJ'),
('124797', 'CONJ'),
('124838', 'CONJ'),
('124891', 'CONJ'),
('125200', 'CONJ'),
('126568', 'CONJ'),
('126574', 'CONJ'),
('127033', 'CONJ'),
('127094', 'CONJ'),
('128743', 'CONJ'),
('129279', 'CONJ'),
('129282', 'CONJ'),
('129619', 'CONJ')
)
f = open(inputFile,'r',encoding="utf_8_sig")
Lines = f.readlines()
f.close()
f = open(outputFile,'w',encoding='utf_8_sig')
#flag = 0
for line in Lines:
x = re.split("\t", line)
for OGNTsort, rmac in ChangeList:
if (x[0] == OGNTsort):
f.write(x[0]+"\t"+x[1]+"\t"+x[2]+"\t"+x[3]+"\t"+rmac+"\t"+x[5]+"\t"+x[6])
flag = 1
break
else:
flag = 0
continue
if (flag == 0): f.write (line)
f.close()