Work on MAST PDF
This commit is contained in:
parent
e2b88b6805
commit
265e05de6d
|
@ -1,5 +1,7 @@
|
|||
# Builds easily searchable files from current OGNT and MAST-HB XML files
|
||||
use 5.12.0;
|
||||
# Builds easily searchable files from current OGNT and MAST-HB XML file
|
||||
# Takes verse at a time from slurped file
|
||||
use 5.18.0;
|
||||
use File::Slurp;
|
||||
use File::Find ;
|
||||
use Cwd ;
|
||||
use utf8;
|
||||
|
@ -22,6 +24,8 @@ while (<DATA>) {
|
|||
|
||||
foreach my $folder (@folders) {
|
||||
say "$folder";
|
||||
#system "cd $folder;xml val *.xml;echo 'Continue? (Control + C to quit, Enter to continue)';read name;";
|
||||
|
||||
my $topDir = $folder;
|
||||
|
||||
my @filesToRun = ();
|
||||
|
@ -30,24 +34,24 @@ foreach my $folder (@folders) {
|
|||
@filesToRun = sort @filesToRun;
|
||||
foreach my $file ( @filesToRun ) {
|
||||
say $file;
|
||||
open (IN, $file) or die "$!";
|
||||
while (<IN>) {
|
||||
my ($bk, $ch, $vs, $lemma, $word);
|
||||
chomp;
|
||||
if (/<verse osisID="(.*)\.(\d+)\.(\d+)">/) {
|
||||
($bk, $ch, $vs) = ($long{$1}, $2, $3);
|
||||
$outText .= "\n$bk $ch:$vs\t"
|
||||
} elsif (/<w lemma="([^"]*)" morph=".*" lexeme=".*">(.*)<\/w>/ && not /<note type="variant">/) {
|
||||
($lemma, $word) = ($1, $2);
|
||||
$lemma =~ s/^[^\d]*(\d{1,4})[^\d]*$/G$1/;
|
||||
$outText .= "$word <$lemma> "
|
||||
} elsif (/<w lemma="([^"]*)" (n="[^"]*" )?morph="[^"]*" id="[^"]*">([^<]*)<\/w>/ && not /<note type="variant">/) {
|
||||
($lemma, $word) = ($1, $3);
|
||||
$lemma =~ s/^[^\d]*(\d{1,4})[^\d]*$/H$1/;
|
||||
$word =~ s/\///g;
|
||||
$outText .= "$word <$lemma> "
|
||||
}
|
||||
my $fileText = read_file("$file", binmode => 'utf8');
|
||||
my ($bk, $ch, $vs, $lemma, $word, $nbk, $nch, $nvs, $previous, $current, $interruption, $verse);
|
||||
while ($fileText =~ /<verse osisID="(.*)\.(\d+)\.(\d+)".*?<\/verse>/spg) {
|
||||
$verse = $&;
|
||||
($bk, $ch, $vs) = ($long{$1}, $2, $3);
|
||||
$previous = $current;
|
||||
$current = "$bk $ch:$vs";
|
||||
if ($verse =~ /<note>KJV:(.*)\.(.*).(.*)<\/note>/) {
|
||||
($nbk, $nch, $nvs) = ($long{$1}, $2, $3);
|
||||
$interruption = "$nbk $nch:$nvs";
|
||||
if ($interruption ne $current) {
|
||||
$current = $interruption;
|
||||
$outText .= "\n$current\t"
|
||||
}
|
||||
$outText .= "\n$current\t"
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
|
|
@ -34,7 +34,7 @@ while (<IN>) {
|
|||
Separate();
|
||||
}
|
||||
|
||||
say OUT " </verse>\n </chapter>\n </div>\n </book>\n</xml>";
|
||||
say OUT " </verse>\n </chapter>\n </div>\n</xml>";
|
||||
|
||||
say "Closing input and output files ...";
|
||||
|
||||
|
@ -54,20 +54,20 @@ sub Separate {
|
|||
my ($this_bk) = ($bk{$bn});
|
||||
$bklc = lc $bk{$bn};
|
||||
if (OUT-> opened()) {
|
||||
say OUT " </verse>\n </chapter>\n </div>\n </book>\n</xml>";
|
||||
say OUT " </verse>\n </chapter>\n </book>\n</xml>";
|
||||
close OUT;
|
||||
}
|
||||
open OUT, ">:utf8", "OGNT_for_tagging/$bn-$bk{$bn}.xml" or die "$! $bn-$bk{$bn}.xml";
|
||||
say OUT "\n<xml>\n <book>\n <div type=\"book\" osisID=\"$bklc\">\n <chapter osisID=\"$bklc.$ch\">\n <verse osisID=\"$bklc.$ch.$vs\">";
|
||||
say OUT "\n<xml>\n <div type=\"book\" osisID=\"$bklc\">\n <chapter osisID=\"$bklc.$ch\">\n <verse osisID=\"$bklc.$ch.$vs\">";
|
||||
($last_bn, $last_ch, $last_vs) = ($bn, $ch, $vs)
|
||||
}
|
||||
elsif ($ch ne $last_ch) {
|
||||
say OUT " </verse>\n </chapter>\n <chapter osisID=\"$bklc.$ch\">\n <verse osisID=\"$bklc.$ch.$vs\">";
|
||||
say OUT " </verse>\n </chapter>\n <chapter osisID=\"$bklc.$ch\">\n <verse osisID=\"$bklc.$ch.$vs\">";
|
||||
($last_ch, $last_vs) = ($ch, $vs)
|
||||
}
|
||||
elsif ($vs ne $last_vs) {
|
||||
my ($this_bk, $bklc) = ($bk{$bn}, lc $bk{$bn});
|
||||
say OUT " </verse>\n <verse osisID=\"$bklc.$ch.$vs\">";
|
||||
say OUT " </verse>\n <verse osisID=\"$bklc.$ch.$vs\">";
|
||||
$last_vs = $vs;
|
||||
}
|
||||
say OUT "\t\t\t\t\t<w OGNTsort=\"$OGNTSort\" ULBorder=\"---\" lemma=\"G$sn\" morph=\"$gram\" lexeme=\"$lexeme\">$word</w>"
|
||||
|
|
|
@ -1,102 +0,0 @@
|
|||
#!/usr/bin/perl
|
||||
use warnings;
|
||||
use strict;
|
||||
|
||||
use autodie;
|
||||
use File::Copy;
|
||||
|
||||
my %filenames;
|
||||
|
||||
while (<DATA>) {
|
||||
chomp;
|
||||
if (/([^\t]*)\t([^\t]*)/) {
|
||||
#($oldName, $newName) = ($1, $2);
|
||||
$filenames{$1} = $2;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
# capture script name, in case we are running the script from the
|
||||
# same directory we working on.
|
||||
my $this_file = (split(/\//, $0))[-1];
|
||||
print "skipping file: $this_file\n";
|
||||
|
||||
my $oldnames = "/home/henry/Documents/WA_Repo/OSHB";
|
||||
my $newnames = "/home/henry/Documents/WA_Repo/MAST_HB";
|
||||
|
||||
# open the directory
|
||||
opendir(my $dh, $oldnames);
|
||||
|
||||
# grep out all directories and possibly this script.
|
||||
my @files_to_rename = grep { !-d && $_ ne $this_file } readdir $dh;
|
||||
closedir $dh;
|
||||
|
||||
### UPDATED ###
|
||||
# create hash of file names from lists:
|
||||
my @missing_new_file = ();
|
||||
|
||||
|
||||
# change directory, so we don't have to worry about pathing
|
||||
# of files to rename and move...
|
||||
chdir($oldnames);
|
||||
mkdir($newnames) if !-e $newnames;
|
||||
|
||||
|
||||
### UPDATED ###
|
||||
for my $file (@files_to_rename) {
|
||||
# Check that current file exists in the hash,
|
||||
# if true, copy old file to new location with new name
|
||||
if( exists($filenames->{$file}) ) {
|
||||
copy($file, "$newnames/$filenames->{$file}");
|
||||
} else {
|
||||
push @missing_new_file, $file;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
if( @missing_new_file ) {
|
||||
print "Could not map files:\n",
|
||||
join("\n", @missing_new_file), "\n";
|
||||
}
|
||||
|
||||
|
||||
__DATA__
|
||||
Gen.xml 01-GEN.xml
|
||||
Exod.xml 02-EXO.xml
|
||||
Lev.xml 03-LEV.xml
|
||||
Num.xml 04-NUM.xml
|
||||
Deut.xml 05-DEU.xml
|
||||
Josh.xml 06-JOS.xml
|
||||
Judg.xml 07-JDG.xml
|
||||
Ruth.xml 08-RUT.xml
|
||||
1Sam.xml 09-1SA.xml
|
||||
2Sam.xml 10-2SA.xml
|
||||
1Kgs.xml 11-1KI.xml
|
||||
2Kgs.xml 12-2KI.xml
|
||||
1Chr.xml 13-1CH.xml
|
||||
2Chr.xml 14-2CH.xml
|
||||
Ezra.xml 15-EZR.xml
|
||||
Neh.xml 16-NEH.xml
|
||||
Esth.xml 17-EST.xml
|
||||
Job.xml 18-JOB.xml
|
||||
Ps.xml 19-PSA.xml
|
||||
Prov.xml 20-PRO.xml
|
||||
Eccl.xml 21-ECC.xml
|
||||
Song.xml 22-SNG.xml
|
||||
Isa.xml 23-ISA.xml
|
||||
Jer.xml 24-JER.xml
|
||||
Lam.xml 25-LAM.xml
|
||||
Ezek.xml 26-EZK.xml
|
||||
Dan.xml 27-DAN.xml
|
||||
Hos.xml 28-HOS.xml
|
||||
Joel.xml 29-JOL.xml
|
||||
Amos.xml 30-AMO.xml
|
||||
Obad.xml 31-OBA.xml
|
||||
Jonah.xml 32-JON.xml
|
||||
Mic.xml 33-MIC.xml
|
||||
Nah.xml 34-NAM.xml
|
||||
Hab.xml 35-HAB.xml
|
||||
Zeph.xml 36-ZEP.xml
|
||||
Hag.xml 37-HAG.xml
|
||||
Zech.xml 38-ZEC.xml
|
||||
Mal.xml 39-MAL.xml
|
|
@ -1,63 +0,0 @@
|
|||
# Step 1 in updating OSHB to MAST_HB
|
||||
# Converts file names to WA MAST abbreviations
|
||||
# After this need to run Convert_refs_in_MAST_HB.pl to get data in file to conform
|
||||
|
||||
while (<DATA>) {
|
||||
chomp;
|
||||
if (/([^\t]*)\t([^\t]*)/) {
|
||||
#($oldName, $newName) = ($1, $2);
|
||||
$new_name{$1} = $2;
|
||||
}
|
||||
}
|
||||
|
||||
chdir = "/home/henry/Documents/WA_Repo/MAST_HB";
|
||||
|
||||
foreach $old_name (sort keys %new_name) {
|
||||
print "$old_name to $new_name{$old_name}\n";
|
||||
system "mv $old_name $new_name{$old_name}"
|
||||
#system "mv $olddir/$old_name to $newdir/$new_name{$old_name}"
|
||||
|
||||
}
|
||||
|
||||
print "Done.\n"
|
||||
|
||||
__DATA__
|
||||
Gen.xml 01-GEN.xml
|
||||
Exod.xml 02-EXO.xml
|
||||
Lev.xml 03-LEV.xml
|
||||
Num.xml 04-NUM.xml
|
||||
Deut.xml 05-DEU.xml
|
||||
Josh.xml 06-JOS.xml
|
||||
Judg.xml 07-JDG.xml
|
||||
Ruth.xml 08-RUT.xml
|
||||
1Sam.xml 09-1SA.xml
|
||||
2Sam.xml 10-2SA.xml
|
||||
1Kgs.xml 11-1KI.xml
|
||||
2Kgs.xml 12-2KI.xml
|
||||
1Chr.xml 13-1CH.xml
|
||||
2Chr.xml 14-2CH.xml
|
||||
Ezra.xml 15-EZR.xml
|
||||
Neh.xml 16-NEH.xml
|
||||
Esth.xml 17-EST.xml
|
||||
Job.xml 18-JOB.xml
|
||||
Ps.xml 19-PSA.xml
|
||||
Prov.xml 20-PRO.xml
|
||||
Eccl.xml 21-ECC.xml
|
||||
Song.xml 22-SNG.xml
|
||||
Isa.xml 23-ISA.xml
|
||||
Jer.xml 24-JER.xml
|
||||
Lam.xml 25-LAM.xml
|
||||
Ezek.xml 26-EZK.xml
|
||||
Dan.xml 27-DAN.xml
|
||||
Hos.xml 28-HOS.xml
|
||||
Joel.xml 29-JOL.xml
|
||||
Amos.xml 30-AMO.xml
|
||||
Obad.xml 31-OBA.xml
|
||||
Jonah.xml 32-JON.xml
|
||||
Mic.xml 33-MIC.xml
|
||||
Nah.xml 34-NAM.xml
|
||||
Hab.xml 35-HAB.xml
|
||||
Zeph.xml 36-ZEP.xml
|
||||
Hag.xml 37-HAG.xml
|
||||
Zech.xml 38-ZEC.xml
|
||||
Mal.xml 39-MAL.xml
|
|
@ -1,66 +0,0 @@
|
|||
# Step 1 in updating OSHB to MAST_HB
|
||||
# Converts file names to WA MAST abbreviations
|
||||
# After this need to run Convert_refs_in_MAST_HB.pl to get data in file to conform
|
||||
|
||||
while (<DATA>) {
|
||||
chomp;
|
||||
if (/([^\t]*)\t([^\t]*)/) {
|
||||
#($oldName, $newName) = ($1, $2);
|
||||
$new_name{$1} = $2;
|
||||
}
|
||||
}
|
||||
|
||||
my $oldnames = "/Users/virginiawhitney/Documents/Henry/WA/Repos/OSHB";
|
||||
my $newnames = "/Users/virginiawhitney/Documents/Henry/WA/Repos/MAST_HB";
|
||||
chdir($oldnames);
|
||||
mkdir($newnames) if !-e $newnames;
|
||||
|
||||
foreach $old_name (sort keys %new_name) {
|
||||
print "$old_name to $newnames/$new_name{$old_name}\n";
|
||||
system "mv $old_name $newnames/$new_name{$old_name}"
|
||||
#system "mv $olddir/$old_name to $newdir/$new_name{$old_name}"
|
||||
|
||||
}
|
||||
|
||||
print "Done.\n"
|
||||
|
||||
__DATA__
|
||||
Gen.xml 01-GEN.xml
|
||||
Exod.xml 02-EXO.xml
|
||||
Lev.xml 03-LEV.xml
|
||||
Num.xml 04-NUM.xml
|
||||
Deut.xml 05-DEU.xml
|
||||
Josh.xml 06-JOS.xml
|
||||
Judg.xml 07-JDG.xml
|
||||
Ruth.xml 08-RUT.xml
|
||||
1Sam.xml 09-1SA.xml
|
||||
2Sam.xml 10-2SA.xml
|
||||
1Kgs.xml 11-1KI.xml
|
||||
2Kgs.xml 12-2KI.xml
|
||||
1Chr.xml 13-1CH.xml
|
||||
2Chr.xml 14-2CH.xml
|
||||
Ezra.xml 15-EZR.xml
|
||||
Neh.xml 16-NEH.xml
|
||||
Esth.xml 17-EST.xml
|
||||
Job.xml 18-JOB.xml
|
||||
Ps.xml 19-PSA.xml
|
||||
Prov.xml 20-PRO.xml
|
||||
Eccl.xml 21-ECC.xml
|
||||
Song.xml 22-SNG.xml
|
||||
Isa.xml 23-ISA.xml
|
||||
Jer.xml 24-JER.xml
|
||||
Lam.xml 25-LAM.xml
|
||||
Ezek.xml 26-EZK.xml
|
||||
Dan.xml 27-DAN.xml
|
||||
Hos.xml 28-HOS.xml
|
||||
Joel.xml 29-JOL.xml
|
||||
Amos.xml 30-AMO.xml
|
||||
Obad.xml 31-OBA.xml
|
||||
Jonah.xml 32-JON.xml
|
||||
Mic.xml 33-MIC.xml
|
||||
Nah.xml 34-NAM.xml
|
||||
Hab.xml 35-HAB.xml
|
||||
Zeph.xml 36-ZEP.xml
|
||||
Hag.xml 37-HAG.xml
|
||||
Zech.xml 38-ZEC.xml
|
||||
Mal.xml 39-MAL.xml
|
|
@ -8,7 +8,7 @@ use utf8;
|
|||
use open IO => ":utf8";
|
||||
|
||||
my ($oldName, $newName);
|
||||
my $topDir = "/Users/virginiawhitney/Documents/Henry/WA/Repos/MAST_HB";
|
||||
my $topDir = "/Users/Henry/Documents/WACS/MAST_HB";
|
||||
#my $topDir = "/Users/Henry/Documents/WACS/MAST_HB";
|
||||
my (%new_name);
|
||||
|
||||
|
@ -16,7 +16,7 @@ while (<DATA>) {
|
|||
chomp;
|
||||
if (/([^\t]*)\t([^\t]*)/) {
|
||||
($oldName, $newName) = ($1, lc $2);
|
||||
$new_name{$1} = lc $2
|
||||
$new_name{$oldName} = $newName
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -31,12 +31,13 @@ foreach my $file ( @filesToRun ) {
|
|||
foreach my $key (sort keys %new_name) {
|
||||
$fileText =~ s/(osisID=")$key(\.\d+\.\d+")/$1$new_name{$key}$2/g;
|
||||
$fileText =~ s/(<note>KJV:)$key(\.\d+\.\d+<\/note>)/$1$new_name{$key}$2/g;
|
||||
$fileText =~ s/(<\/w>)\n <seg/$1<seg/g;
|
||||
}
|
||||
open(OUT, ">:utf8", "$file") or die "$file:\n$!";
|
||||
say OUT $fileText;
|
||||
close OUT;
|
||||
}
|
||||
print "Done.\n"
|
||||
print "Done.\n\nBe sure to validate XML by running\nxml val *.xml\nin both OGNT and MAST-HB directories before running\nBuild_OL_files_from_XML.pl\n"
|
||||
|
||||
__DATA__
|
||||
Gen GEN
|
||||
|
|
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue