Tips_and_Hacks/MAST_tW_PDF_Updater/FilesForUpdates/Build_MAST_NT_from_csv.pl

102 lines
2.3 KiB
Perl
Executable File
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

use 5.12.0;
use File::Slurp;
use File::Find ;
use Cwd ;
use utf8;
#use open IN => ":utf8", OUT => ":utf8";
use open IO => ":utf8";
binmode STDOUT, ":encoding(UTF-8)";
open LOG, ">Logs/log.log";
my (%bk);
my ($last_bn, $last_ch, $last_vs, $bklc) = ("00", "00", "00");
say "Reading data ...";
while (<DATA>) {
chomp;
if (/^(\d\d)-(...)/) {
$bk{$1} = $2;
}
}
#open IN, "/Users/Henry/Documents/WACS/Tips_and_Hacks/MAST_tW_PDF_Updater/FilesForUpdates/OGNT/OpenGNT_version3_3.csv" or die "$!";
open IN, "/Users/Henry/Documents/WACS/OpenGNT_version3_3.csv" or die "$!";
say "Reading input ...";
while (<IN>) {
chomp;
Separate();
}
say OUT " </verse>\n </chapter>\n </book>\n</xml>";
say "Closing input and output files ...";
close OUT;
close IN;
close LOG;
say "Done.";
sub Separate {
if (/[^\t]*\t[^\t]*\t[^\t]*\t[^\t]*\t.\t[^\t]*\t(\d+)(\d+)(\d+)\t[^\]*[^\]*([^\]*)([^\]*)([^\]*)([^\]*)/) {
my ($bn, $ch, $vs, $word, $lexeme, $gram, $sn) = ($1, $2, $3, $4, $5, $6, $7);
say LOG "$1, $2, $3, $4, $5, $6, $7";
$sn =~ s/[GH]//;
$bn = $bn + 1;
if ($bn ne $last_bn) {
my ($this_bk) = ($bk{$bn});
$bklc = lc $bk{$bn};
if (OUT-> opened()) {
say OUT " </verse>\n </chapter>\n </book>\n</xml>";
close OUT;
}
open OUT, ">:utf8", "/Users/Henry/Documents/WACS/Tips_and_Hacks/MAST_tW_PDF_Updater/FilesForUpdates/OGNT/$bn-$bk{$bn}.xml" or die "$! $bn-$bk{$bn}.xml";
say OUT "\n<xml>\n <div type=\"book\" osisID=\"$bklc\">\n <chapter osisID=\"$bklc.$ch\">\n <verse osisID=\"$bklc.$ch.$vs\">";
($last_bn, $last_ch, $last_vs) = ($bn, $ch, $vs)
}
elsif ($ch ne $last_ch) {
say OUT " </verse>\n </chapter>\n <chapter osisID=\"$bklc.$ch\">\n <verse osisID=\"$bklc.$ch.$vs\">";
($last_ch, $last_vs) = ($ch, $vs)
}
elsif ($vs ne $last_vs) {
my ($this_bk, $bklc) = ($bk{$bn}, lc $bk{$bn});
say OUT " </verse>\n <verse osisID=\"$bklc.$ch.$vs\">";
$last_vs = $vs;
}
say OUT "\t\t\t\t<w lemma=\"$sn\" morph=\"$gram\" lexeme=\"$lexeme\">$word</w>"
}
}
__DATA__
41-MAT.xml
42-MRK.xml
43-LUK.xml
44-JHN.xml
45-ACT.xml
46-ROM.xml
47-1CO.xml
48-2CO.xml
49-GAL.xml
50-EPH.xml
51-PHP.xml
52-COL.xml
53-1TH.xml
54-2TH.xml
55-1TI.xml
56-2TI.xml
57-TIT.xml
58-PHM.xml
59-HEB.xml
60-JAS.xml
61-1PE.xml
62-2PE.xml
63-1JN.xml
64-2JN.xml
65-3JN.xml
66-JUD.xml
67-REV.xml