2021-10-22 01:57:19 +00:00
|
|
|
|
# Mark emphasis on all verbals
|
|
|
|
|
|
|
|
|
|
import re
|
|
|
|
|
|
|
|
|
|
####### —————————————— Processing OGNTa ——————————————
|
|
|
|
|
|
2021-12-09 01:29:52 +00:00
|
|
|
|
inputFile = "./OGNTa.txt"
|
2022-04-02 12:05:25 +00:00
|
|
|
|
outputFile1 = "./tmp/OGNTa-marked.txt"
|
2021-10-22 01:57:19 +00:00
|
|
|
|
|
2021-12-09 01:29:52 +00:00
|
|
|
|
f = open(inputFile,'r',encoding="utf_8_sig")
|
2021-10-22 01:57:19 +00:00
|
|
|
|
Lines = f.readlines()
|
|
|
|
|
f.close()
|
|
|
|
|
|
2022-04-02 12:05:25 +00:00
|
|
|
|
f1 = open(outputFile1,'w',encoding="utf_8_sig")
|
2021-10-22 01:57:19 +00:00
|
|
|
|
|
|
|
|
|
for line in Lines:
|
|
|
|
|
x = re.split("\t", line)
|
|
|
|
|
|
2023-09-01 01:04:39 +00:00
|
|
|
|
x[2] = re.sub(r'([\.,;·])',r' <mark class="pm">\1</mark>',x[2])
|
2023-07-24 14:06:49 +00:00
|
|
|
|
# print (x[2])
|
|
|
|
|
|
2021-10-22 01:57:19 +00:00
|
|
|
|
# add emphasis for Verbs
|
2022-04-02 11:24:15 +00:00
|
|
|
|
if (re.match(r'V-...-\d.',x[4])) or (re.match(r'V-...⁞...-\d.',x[4])) or (re.match(r'V-....-\d.',x[4])):
|
2022-04-12 13:25:24 +00:00
|
|
|
|
f1.write(x[0]+"\t"+x[1]+"\t<strong>"+x[2]+"</strong>\t"+x[3]+"\t"+x[4]+"\t"+x[5])
|
2021-10-22 01:57:19 +00:00
|
|
|
|
|
2021-11-19 03:41:06 +00:00
|
|
|
|
# add emphasis for participles
|
2022-04-02 11:24:15 +00:00
|
|
|
|
elif (re.match(r'V-..P-...',x[4])) or (re.match(r'V-..P-...⁞...',x[4])) or (re.match(r'V-...P-...',x[4])):
|
2022-04-12 13:25:24 +00:00
|
|
|
|
f1.write(x[0]+"\t"+x[1]+"\t<em>"+x[2]+"</em>\t"+x[3]+"\t"+x[4]+"\t"+x[5])
|
2021-11-19 03:41:06 +00:00
|
|
|
|
|
|
|
|
|
# add emphasis for infinitives
|
|
|
|
|
elif (re.match(r'V-..N',x[4])) or (re.match(r'V-...N',x[4])):
|
2022-04-12 13:25:24 +00:00
|
|
|
|
f1.write(x[0]+"\t"+x[1]+"\t<em>"+x[2]+"</em>\t"+x[3]+"\t"+x[4]+"\t"+x[5])
|
2021-10-22 01:57:19 +00:00
|
|
|
|
|
|
|
|
|
else:
|
2022-04-12 13:25:24 +00:00
|
|
|
|
f1.write(x[0]+"\t"+x[1]+"\t"+x[2]+"\t"+x[3]+"\t"+x[4]+"\t"+x[5])
|
2021-10-22 01:57:19 +00:00
|
|
|
|
|
2022-04-02 12:05:25 +00:00
|
|
|
|
f1.close()
|
2021-10-22 01:57:19 +00:00
|
|
|
|
|
|
|
|
|
####### —————————————— Processing OGNTa-TC ——————————————
|
|
|
|
|
|
2021-12-09 01:29:52 +00:00
|
|
|
|
inputFile = "./OGNTa-TC.txt"
|
2022-04-02 12:05:25 +00:00
|
|
|
|
outputFile1 = "./tmp/OGNTa-TC-marked.txt"
|
2021-10-22 01:57:19 +00:00
|
|
|
|
|
2021-12-09 01:29:52 +00:00
|
|
|
|
f = open(inputFile,'r',encoding="utf_8_sig")
|
2021-10-22 01:57:19 +00:00
|
|
|
|
Lines = f.readlines()
|
|
|
|
|
f.close()
|
|
|
|
|
|
2022-04-02 12:05:25 +00:00
|
|
|
|
f1 = open(outputFile1,'w',encoding="utf_8_sig")
|
2021-10-22 01:57:19 +00:00
|
|
|
|
|
|
|
|
|
for line in Lines:
|
|
|
|
|
x = re.split("\t", line)
|
|
|
|
|
|
2023-09-03 22:48:16 +00:00
|
|
|
|
x[2] = re.sub(r'([\.,;·])',r' <mark class="pm">\1</mark>',x[2])
|
2023-07-24 14:06:49 +00:00
|
|
|
|
# print (x[2])
|
|
|
|
|
|
2021-10-22 01:57:19 +00:00
|
|
|
|
# add emphasis for Verbs
|
2022-04-02 11:24:15 +00:00
|
|
|
|
if (re.match(r'V-...-\d.',x[4])) or (re.match(r'V-...⁞...-\d.',x[4])) or (re.match(r'V-....-\d.',x[4])):
|
2022-04-12 13:25:24 +00:00
|
|
|
|
f1.write(x[0]+"\t"+x[1]+"\t<strong>"+x[2]+"</strong>\t"+x[3]+"\t"+x[4]+"\t"+x[5])
|
2021-11-19 03:41:06 +00:00
|
|
|
|
|
|
|
|
|
# add emphasis for participles
|
2022-04-02 11:24:15 +00:00
|
|
|
|
elif (re.match(r'V-..P-...',x[4])) or (re.match(r'V-..P-...⁞...',x[4])) or (re.match(r'V-...P-...',x[4])):
|
2022-04-12 13:25:24 +00:00
|
|
|
|
f1.write(x[0]+"\t"+x[1]+"\t<em>"+x[2]+"</em>\t"+x[3]+"\t"+x[4]+"\t"+x[5])
|
2021-10-22 01:57:19 +00:00
|
|
|
|
|
2021-11-19 03:41:06 +00:00
|
|
|
|
# add emphasis for infinitives
|
|
|
|
|
elif (re.match(r'V-..N',x[4])) or (re.match(r'V-...N',x[4])):
|
2022-04-12 13:25:24 +00:00
|
|
|
|
f1.write(x[0]+"\t"+x[1]+"\t<em>"+x[2]+"</em>\t"+x[3]+"\t"+x[4]+"\t"+x[5])
|
2021-10-22 01:57:19 +00:00
|
|
|
|
|
|
|
|
|
else:
|
2022-04-12 13:25:24 +00:00
|
|
|
|
f1.write(x[0]+"\t"+x[1]+"\t"+x[2]+"\t"+x[3]+"\t"+x[4]+"\t"+x[5])
|
2021-10-22 01:57:19 +00:00
|
|
|
|
|
2022-04-02 12:05:25 +00:00
|
|
|
|
f1.close()
|