Work on MAST PDF

This commit is contained in:
Henry Whitney 2020-07-24 16:50:00 -04:00
parent e2b88b6805
commit 265e05de6d
7 changed files with 101 additions and 955 deletions

View File

@ -1,5 +1,7 @@
# Builds easily searchable files from current OGNT and MAST-HB XML files
use 5.12.0;
# Builds easily searchable files from current OGNT and MAST-HB XML file
# Takes verse at a time from slurped file
use 5.18.0;
use File::Slurp;
use File::Find ;
use Cwd ;
use utf8;
@ -22,6 +24,8 @@ while (<DATA>) {
foreach my $folder (@folders) {
say "$folder";
#system "cd $folder;xml val *.xml;echo 'Continue? (Control + C to quit, Enter to continue)';read name;";
my $topDir = $folder;
my @filesToRun = ();
@ -30,24 +34,24 @@ foreach my $folder (@folders) {
@filesToRun = sort @filesToRun;
foreach my $file ( @filesToRun ) {
say $file;
open (IN, $file) or die "$!";
while (<IN>) {
my ($bk, $ch, $vs, $lemma, $word);
chomp;
if (/<verse osisID="(.*)\.(\d+)\.(\d+)">/) {
($bk, $ch, $vs) = ($long{$1}, $2, $3);
$outText .= "\n$bk $ch:$vs\t"
} elsif (/<w lemma="([^"]*)" morph=".*" lexeme=".*">(.*)<\/w>/ && not /<note type="variant">/) {
($lemma, $word) = ($1, $2);
$lemma =~ s/^[^\d]*(\d{1,4})[^\d]*$/G$1/;
$outText .= "$word <$lemma> "
} elsif (/<w lemma="([^"]*)" (n="[^"]*" )?morph="[^"]*" id="[^"]*">([^<]*)<\/w>/ && not /<note type="variant">/) {
($lemma, $word) = ($1, $3);
$lemma =~ s/^[^\d]*(\d{1,4})[^\d]*$/H$1/;
$word =~ s/\///g;
$outText .= "$word <$lemma> "
}
my $fileText = read_file("$file", binmode => 'utf8');
my ($bk, $ch, $vs, $lemma, $word, $nbk, $nch, $nvs, $previous, $current, $interruption, $verse);
while ($fileText =~ /<verse osisID="(.*)\.(\d+)\.(\d+)".*?<\/verse>/spg) {
$verse = $&;
($bk, $ch, $vs) = ($long{$1}, $2, $3);
$previous = $current;
$current = "$bk $ch:$vs";
if ($verse =~ /<note>KJV:(.*)\.(.*).(.*)<\/note>/) {
($nbk, $nch, $nvs) = ($long{$1}, $2, $3);
$interruption = "$nbk $nch:$nvs";
if ($interruption ne $current) {
$current = $interruption;
$outText .= "\n$current\t"
}
$outText .= "\n$current\t"
}
}
}

View File

@ -34,7 +34,7 @@ while (<IN>) {
Separate();
}
say OUT " </verse>\n </chapter>\n </div>\n </book>\n</xml>";
say OUT " </verse>\n </chapter>\n </div>\n</xml>";
say "Closing input and output files ...";
@ -54,20 +54,20 @@ sub Separate {
my ($this_bk) = ($bk{$bn});
$bklc = lc $bk{$bn};
if (OUT-> opened()) {
say OUT " </verse>\n </chapter>\n </div>\n </book>\n</xml>";
say OUT " </verse>\n </chapter>\n </book>\n</xml>";
close OUT;
}
open OUT, ">:utf8", "OGNT_for_tagging/$bn-$bk{$bn}.xml" or die "$! $bn-$bk{$bn}.xml";
say OUT "\n<xml>\n <book>\n <div type=\"book\" osisID=\"$bklc\">\n <chapter osisID=\"$bklc.$ch\">\n <verse osisID=\"$bklc.$ch.$vs\">";
say OUT "\n<xml>\n <div type=\"book\" osisID=\"$bklc\">\n <chapter osisID=\"$bklc.$ch\">\n <verse osisID=\"$bklc.$ch.$vs\">";
($last_bn, $last_ch, $last_vs) = ($bn, $ch, $vs)
}
elsif ($ch ne $last_ch) {
say OUT " </verse>\n </chapter>\n <chapter osisID=\"$bklc.$ch\">\n <verse osisID=\"$bklc.$ch.$vs\">";
say OUT " </verse>\n </chapter>\n <chapter osisID=\"$bklc.$ch\">\n <verse osisID=\"$bklc.$ch.$vs\">";
($last_ch, $last_vs) = ($ch, $vs)
}
elsif ($vs ne $last_vs) {
my ($this_bk, $bklc) = ($bk{$bn}, lc $bk{$bn});
say OUT " </verse>\n <verse osisID=\"$bklc.$ch.$vs\">";
say OUT " </verse>\n <verse osisID=\"$bklc.$ch.$vs\">";
$last_vs = $vs;
}
say OUT "\t\t\t\t\t<w OGNTsort=\"$OGNTSort\" ULBorder=\"---\" lemma=\"G$sn\" morph=\"$gram\" lexeme=\"$lexeme\">$word</w>"

View File

@ -1,102 +0,0 @@
#!/usr/bin/perl
use warnings;
use strict;
use autodie;
use File::Copy;
my %filenames;
while (<DATA>) {
chomp;
if (/([^\t]*)\t([^\t]*)/) {
#($oldName, $newName) = ($1, $2);
$filenames{$1} = $2;
}
}
# capture script name, in case we are running the script from the
# same directory we working on.
my $this_file = (split(/\//, $0))[-1];
print "skipping file: $this_file\n";
my $oldnames = "/home/henry/Documents/WA_Repo/OSHB";
my $newnames = "/home/henry/Documents/WA_Repo/MAST_HB";
# open the directory
opendir(my $dh, $oldnames);
# grep out all directories and possibly this script.
my @files_to_rename = grep { !-d && $_ ne $this_file } readdir $dh;
closedir $dh;
### UPDATED ###
# create hash of file names from lists:
my @missing_new_file = ();
# change directory, so we don't have to worry about pathing
# of files to rename and move...
chdir($oldnames);
mkdir($newnames) if !-e $newnames;
### UPDATED ###
for my $file (@files_to_rename) {
# Check that current file exists in the hash,
# if true, copy old file to new location with new name
if( exists($filenames->{$file}) ) {
copy($file, "$newnames/$filenames->{$file}");
} else {
push @missing_new_file, $file;
}
}
if( @missing_new_file ) {
print "Could not map files:\n",
join("\n", @missing_new_file), "\n";
}
__DATA__
Gen.xml 01-GEN.xml
Exod.xml 02-EXO.xml
Lev.xml 03-LEV.xml
Num.xml 04-NUM.xml
Deut.xml 05-DEU.xml
Josh.xml 06-JOS.xml
Judg.xml 07-JDG.xml
Ruth.xml 08-RUT.xml
1Sam.xml 09-1SA.xml
2Sam.xml 10-2SA.xml
1Kgs.xml 11-1KI.xml
2Kgs.xml 12-2KI.xml
1Chr.xml 13-1CH.xml
2Chr.xml 14-2CH.xml
Ezra.xml 15-EZR.xml
Neh.xml 16-NEH.xml
Esth.xml 17-EST.xml
Job.xml 18-JOB.xml
Ps.xml 19-PSA.xml
Prov.xml 20-PRO.xml
Eccl.xml 21-ECC.xml
Song.xml 22-SNG.xml
Isa.xml 23-ISA.xml
Jer.xml 24-JER.xml
Lam.xml 25-LAM.xml
Ezek.xml 26-EZK.xml
Dan.xml 27-DAN.xml
Hos.xml 28-HOS.xml
Joel.xml 29-JOL.xml
Amos.xml 30-AMO.xml
Obad.xml 31-OBA.xml
Jonah.xml 32-JON.xml
Mic.xml 33-MIC.xml
Nah.xml 34-NAM.xml
Hab.xml 35-HAB.xml
Zeph.xml 36-ZEP.xml
Hag.xml 37-HAG.xml
Zech.xml 38-ZEC.xml
Mal.xml 39-MAL.xml

View File

@ -1,63 +0,0 @@
# Step 1 in updating OSHB to MAST_HB
# Converts file names to WA MAST abbreviations
# After this need to run Convert_refs_in_MAST_HB.pl to get data in file to conform
while (<DATA>) {
chomp;
if (/([^\t]*)\t([^\t]*)/) {
#($oldName, $newName) = ($1, $2);
$new_name{$1} = $2;
}
}
chdir = "/home/henry/Documents/WA_Repo/MAST_HB";
foreach $old_name (sort keys %new_name) {
print "$old_name to $new_name{$old_name}\n";
system "mv $old_name $new_name{$old_name}"
#system "mv $olddir/$old_name to $newdir/$new_name{$old_name}"
}
print "Done.\n"
__DATA__
Gen.xml 01-GEN.xml
Exod.xml 02-EXO.xml
Lev.xml 03-LEV.xml
Num.xml 04-NUM.xml
Deut.xml 05-DEU.xml
Josh.xml 06-JOS.xml
Judg.xml 07-JDG.xml
Ruth.xml 08-RUT.xml
1Sam.xml 09-1SA.xml
2Sam.xml 10-2SA.xml
1Kgs.xml 11-1KI.xml
2Kgs.xml 12-2KI.xml
1Chr.xml 13-1CH.xml
2Chr.xml 14-2CH.xml
Ezra.xml 15-EZR.xml
Neh.xml 16-NEH.xml
Esth.xml 17-EST.xml
Job.xml 18-JOB.xml
Ps.xml 19-PSA.xml
Prov.xml 20-PRO.xml
Eccl.xml 21-ECC.xml
Song.xml 22-SNG.xml
Isa.xml 23-ISA.xml
Jer.xml 24-JER.xml
Lam.xml 25-LAM.xml
Ezek.xml 26-EZK.xml
Dan.xml 27-DAN.xml
Hos.xml 28-HOS.xml
Joel.xml 29-JOL.xml
Amos.xml 30-AMO.xml
Obad.xml 31-OBA.xml
Jonah.xml 32-JON.xml
Mic.xml 33-MIC.xml
Nah.xml 34-NAM.xml
Hab.xml 35-HAB.xml
Zeph.xml 36-ZEP.xml
Hag.xml 37-HAG.xml
Zech.xml 38-ZEC.xml
Mal.xml 39-MAL.xml

View File

@ -1,66 +0,0 @@
# Step 1 in updating OSHB to MAST_HB
# Converts file names to WA MAST abbreviations
# After this need to run Convert_refs_in_MAST_HB.pl to get data in file to conform
while (<DATA>) {
chomp;
if (/([^\t]*)\t([^\t]*)/) {
#($oldName, $newName) = ($1, $2);
$new_name{$1} = $2;
}
}
my $oldnames = "/Users/virginiawhitney/Documents/Henry/WA/Repos/OSHB";
my $newnames = "/Users/virginiawhitney/Documents/Henry/WA/Repos/MAST_HB";
chdir($oldnames);
mkdir($newnames) if !-e $newnames;
foreach $old_name (sort keys %new_name) {
print "$old_name to $newnames/$new_name{$old_name}\n";
system "mv $old_name $newnames/$new_name{$old_name}"
#system "mv $olddir/$old_name to $newdir/$new_name{$old_name}"
}
print "Done.\n"
__DATA__
Gen.xml 01-GEN.xml
Exod.xml 02-EXO.xml
Lev.xml 03-LEV.xml
Num.xml 04-NUM.xml
Deut.xml 05-DEU.xml
Josh.xml 06-JOS.xml
Judg.xml 07-JDG.xml
Ruth.xml 08-RUT.xml
1Sam.xml 09-1SA.xml
2Sam.xml 10-2SA.xml
1Kgs.xml 11-1KI.xml
2Kgs.xml 12-2KI.xml
1Chr.xml 13-1CH.xml
2Chr.xml 14-2CH.xml
Ezra.xml 15-EZR.xml
Neh.xml 16-NEH.xml
Esth.xml 17-EST.xml
Job.xml 18-JOB.xml
Ps.xml 19-PSA.xml
Prov.xml 20-PRO.xml
Eccl.xml 21-ECC.xml
Song.xml 22-SNG.xml
Isa.xml 23-ISA.xml
Jer.xml 24-JER.xml
Lam.xml 25-LAM.xml
Ezek.xml 26-EZK.xml
Dan.xml 27-DAN.xml
Hos.xml 28-HOS.xml
Joel.xml 29-JOL.xml
Amos.xml 30-AMO.xml
Obad.xml 31-OBA.xml
Jonah.xml 32-JON.xml
Mic.xml 33-MIC.xml
Nah.xml 34-NAM.xml
Hab.xml 35-HAB.xml
Zeph.xml 36-ZEP.xml
Hag.xml 37-HAG.xml
Zech.xml 38-ZEC.xml
Mal.xml 39-MAL.xml

View File

@ -8,7 +8,7 @@ use utf8;
use open IO => ":utf8";
my ($oldName, $newName);
my $topDir = "/Users/virginiawhitney/Documents/Henry/WA/Repos/MAST_HB";
my $topDir = "/Users/Henry/Documents/WACS/MAST_HB";
#my $topDir = "/Users/Henry/Documents/WACS/MAST_HB";
my (%new_name);
@ -16,7 +16,7 @@ while (<DATA>) {
chomp;
if (/([^\t]*)\t([^\t]*)/) {
($oldName, $newName) = ($1, lc $2);
$new_name{$1} = lc $2
$new_name{$oldName} = $newName
}
}
@ -31,12 +31,13 @@ foreach my $file ( @filesToRun ) {
foreach my $key (sort keys %new_name) {
$fileText =~ s/(osisID=")$key(\.\d+\.\d+")/$1$new_name{$key}$2/g;
$fileText =~ s/(<note>KJV:)$key(\.\d+\.\d+<\/note>)/$1$new_name{$key}$2/g;
$fileText =~ s/(<\/w>)\n <seg/$1<seg/g;
}
open(OUT, ">:utf8", "$file") or die "$file:\n$!";
say OUT $fileText;
close OUT;
}
print "Done.\n"
print "Done.\n\nBe sure to validate XML by running\nxml val *.xml\nin both OGNT and MAST-HB directories before running\nBuild_OL_files_from_XML.pl\n"
__DATA__
Gen GEN

File diff suppressed because it is too large Load Diff