Tips_and_Hacks/MAST_tW_PDF_Updater/FilesForUpdates/tWs.from.MAST_HB.pl

681 lines
23 KiB
Perl

# Produces list of tWs for each verse by linking MAST_HB to ULB through tWs.
# Taken from tWs.from.UGNT.7.pl, with changes needed because that used USFM
# and this uses XML
# The output from this script is useful for the interleaved PDFs used in MAST.
# This version uses an exception file to handle places where the MAST_HB points to
# a tW page different from that on which the ULB term appears.
# Make sure the correct input file is $ULBfile. Run script.
# Output is in $output file.
# Check the $not_handled
# file for needed corrections, probably lines needing to be added to the
# $exceptions file.
# Change from ver. 2 in that this uses converted MAST_HB files.
# Change from ver. 3 in that this uses the KJV verses in the XML file, not the remaps file
use 5.12.0;
use File::Slurp;
use File::Find ;
use Cwd ;
use FindBin '$Bin';
use utf8;
#use open IN => ":utf8", OUT => ":utf8";
use open IO => ":utf8";
use List::MoreUtils qw(uniq);
$|=1;
$"="\n";
my ($pwd, $d) = ($Bin, "/");
if ($^O eq "MSWin32") {
$d = "\\";
$pwd =~ s/\//\\/g;
}
my ($udf) = "User_defaults.windows.txt";
if ($^O eq "linux") {$udf = "User_defaults.linux.txt"}
#elsif ($^O eq "darwin") {$udf = "User_defaults.mac.txt"}
elsif ($^O eq "darwin") {$udf = "User_defaults.mac.txt"}
open (my $defaults, "<:utf8", "$pwd${d}User${d}$udf") or die "$pwd${d}User${d}$udf:\n$!";
my ($cv, $ULBfile, $exceptions, $not_handled, $exc_log, $tW_log, $log, $output, $workFile) =
("00000",
"$Bin${d}Temp${d}Extract.txt",
"$Bin${d}Exceptions${d}Exceptions_tWs_from_MAST_HB.txt",
"$Bin${d}Output${d}Entries_not_handled.txt",
"Logs${d}Exc_log.log",
"Logs${d}tW_pairs.log",
"Logs${d}tWs_from_MAST.log",
"Output${d}tWs_for_PDF.txt",
"User${d}tW_work_OT.txt",
);
my ($MAST_HBfile, $topTwDir, $textEditor, $repoPath, $topSourceLangDir);
my (%entries, %text, %ref, %order, %pages, %listOfPages, %sourcePage, %checkPages, %foundPages, %substitutedPages,
%specifiedText, %realPage, %checkPage, %workEntries, %vsn, %StrongNum, %fullText, %ulbOrder, %bkAbr, %bkFull, %relevantSNs,
%SNsInCV, %entriesThisSN, %entriesThisPage, %pagesThisSN, %pageThisEntry, %adjust, %newRef);
my $book;
my (@MAST_HBfileList);
# ==============================
chdir("$pwd");
open LOG, ">:utf8", "$exc_log" or die "\$log: $exc_log: $!";
open OUT, ">:utf8", $output or die "$!";
open MISSING, ">$not_handled" or die "$!";
while (<DATA>) {
chomp;
if (/([^\t]*)\t([^\t]*)/) {
$bkAbr{$2} = $1;
$bkFull{$1} = $2;
say LOG "\$bkAbr{$2}: $bkAbr{$2}, \$bkFull{$1}: $bkFull{$1}";
}
}
GetUserDefaults();
GetULBBooksToProcess();
ReadExceptions();
close LOG;
open LOG, ">:utf8", "$tW_log" or die "\$tW_log: $tW_log: $!";
PairtWEntriesTotWPageAndUniqSNs();
close LOG;
open LOG, ">:utf8", "$log" or die "\$log: $log: $!";
ReadLinkedSNs();
LinkULBtoCV();
LinkSNsToULBtextViaEntries();
#Output();
close MISSING;
close OUT;
close LOG;
if ($^O eq "darwin") {system ("$textEditor $not_handled")}
print "\n\tDone.\n\n";
# ==============================
sub GetUserDefaults {
open (my $defaults, "<:utf8", "User${d}$udf") or die "User${d}$udf:\n$!";
while (my $thisLine = <$defaults>) {
chomp $thisLine;
if ($thisLine =~ /^Text editor: (.*)$/) {
$textEditor = $1;
if ($^O eq "darwin") {
$textEditor = "open -a $textEditor"
}
} elsif ($thisLine =~ /^Repository directory: (.*)$/) {
$repoPath = $1;
#say $repoPath; die;
}
}
say LOG "\$textEditor: $textEditor\n\$repoPath: $repoPath";
die "No text editor found" if $textEditor eq "";
die "No path to repo found" if $repoPath eq "";
($topTwDir, $topSourceLangDir) = ("$repoPath${d}en_tw${d}bible", "$repoPath${d}MAST_HB");
close $defaults;
}
sub GetULBBooksToProcess {
say LOG "GetULBBooksToProcess on \$workFile: $workFile";
open (my $file, "<:utf8", "$workFile") or die "$workFile:\n$!";
while (my $line = <$file>) {
chomp $line;
#say LOG $line;
if ($line =~ /^([^#][^\t]*)\t[^\t]*\t([^\t]*)\t[^\t]*$/) {
my ($this_bk) = $1 . "-" . uc $2;
$MAST_HBfile = "$topSourceLangDir${d}$this_bk.xml";
push @MAST_HBfileList, $MAST_HBfile;
}
}
close $file;
say LOG "\@MAST_HBfileList: @MAST_HBfileList"
}
sub ReadExceptions {
say "Reading exceptions";
say LOG "ReadExceptions from \$exceptions: $exceptions";
open (my $file, "<:utf8", "$exceptions") or die "$exceptions:\n$!";
while (my $line = <$file>) {
chomp $line;
if ($line =~ /^([^#\n][^\t\n]*)\t([^\t\n]*\t[^\t\n]*)/) {
my ($rf, $oldNew) = ($1, $2);
say LOG "\$line: $line, \$rf: $rf, \$oldNew: $oldNew";
($adjust{$rf}) .= "$oldNew, ";
$specifiedText{$rf} = 1;
}
}
foreach my $key (sort keys %adjust) {
say LOG "\$key: $key: \$adjust{$key}: $adjust{$key}"
}
close $file;
}
sub PairtWEntriesTotWPageAndUniqSNs {
say "Pairing tW entries with tW pages and unique Strong's numbers";
my (@filesToRun, @relevantSNs) = ();
my $filePattern = '*.md' ;
find (sub {push @filesToRun, $File::Find::name if (m/^(.*)$filePattern$/)}, $topTwDir) ;
@filesToRun = sort @filesToRun;
#say LOG "\@filesToRun: @filesToRun";
foreach my $file (@filesToRun) {
print ".";
$file =~ s/\//\\/g unless $^O eq "linux" || $^O eq "darwin";
my ($thisList, $shortFile) = ("", $file);
$shortFile =~ s/^\Q$topTwDir${d}\E//;
$shortFile =~ s/\.md$//;
$shortFile =~ s/\Q$d\E/,/;
#say "|$shortFile|"; die;
#if ($shortFile =~ /^(kt|names)/) {
#my $fileText = read_file("$file", binmode => 'utf8');
open IN, $file or die "$!";
while (<IN>) {
if (/^# ([^\n]*)$/) {
$thisList = $1;
$thisList =~ s/[\r\n]*$//;
#say LOG "\$thisList = |$thisList|";
$thisList =~ s/ \([^\)]*\)//g;
$entriesThisPage{$shortFile} = $thisList;
my @ULBEntries = split /, /, $thisList;
foreach my $ULB_entry (@ULBEntries) {
$pageThisEntry{$ULB_entry} = $shortFile;
}
my @tempArray = split /, /, $thisList;
foreach my $slice (@tempArray) {
$sourcePage{$slice} = $shortFile;
#say LOG "$slice: $sourcePage{$slice}";
}
}
if (/Strong's(.*)$/) {
my $SNs = $1;
while ($SNs =~ s/[H](\d*)//) {
push @relevantSNs, $1;
$entriesThisSN{$1} .= "$thisList, ";
$pagesThisSN{$1} .= "$shortFile, ";
}
}
@relevantSNs = uniq(@relevantSNs);
foreach (@relevantSNs) {
$relevantSNs{$_} = "$_";
#say LOG $relevantSNs{$_}
}
}
close IN;
}
say "";
#say LOG "====";
#say LOG "====";
foreach my $thisSN (sort keys %entriesThisSN) {
say LOG "tW entries for $thisSN: $entriesThisSN{$thisSN}"
}
}
sub LinkULBtoCV {
say "Linking ULB to chapter and verse";
say LOG "LinkULBtoCV, \$ULBfile: $ULBfile";
open IN, "$ULBfile" or die "$ULBfile: $!";
while (<IN>) {
if (/^([^\t]*)\t(.*)$/) {
$cv ++;
($text{$1}, $fullText{$1}) = ($2, $2);
$ref{$cv} = $1;
$order{$1} = $cv;
}
#say LOG "First \$ref{$cv}: $ref{$cv}\n\$text{$ref{$cv}}: $text{$ref{$cv}}";
}
close IN;
#foreach my $key (sort keys %ref) {
#say LOG "\$key: $key:, \$ref{$key}: $ref{$key}, \$text{$ref{$key}}: $text{$ref{$key}}"
#}
}
sub ReadLinkedSNs {
say "Reading linked Strong's numbers\n\@MAST_HBfileList: @MAST_HBfileList";
my ($flag, $MAST_HBfile) = ("","");
foreach $MAST_HBfile (@MAST_HBfileList) {
say LOG "opening \$MAST_HBfile: $MAST_HBfile";
open IN, "$MAST_HBfile" or die "$MAST_HBfile can't be opened\n\n";
my ($thisBook, $thisChap, $thisVers, $thisRef);
my (@pages);
while (<IN>) {
chomp;
if (/<verse osisID="([^\.]*).(\d+).(\d+)">/) {
my ($bk, $ch, $vs) = ($1, $2, $3);
($thisRef) = ("$bkFull{$bk} $ch:$vs");
say LOG "##\t\$bk \$ch:\$vs: $bk $ch:$vs, \$bkFull{$bk}: $bkFull{$bk}, \$thisRef: $thisRef";
}
elsif (/<note>KJV:([^\.]*).(\d+).(\d+)<\/note>/) {
my ($bk, $ch, $vs) = ($1, $2, $3);
($thisRef) = ("$bkFull{$bk} $ch:$vs");
say LOG "###\t\$bk \$ch:\$vs: $bk $ch:$vs, \$bkFull{$bk}: $bkFull{$bk}, \$thisRef: $thisRef";
}
else {
while (/<w lemma="(\w\/)?(\d+)( \w)?"/g) {
#say LOG $_;
my ($thisNum) = ($2);
say LOG "\t\$thisNum: $thisNum, \$SNsInCV{$thisRef}: $SNsInCV{$thisRef}";
if (exists $relevantSNs{$thisNum}) {
say LOG "\t\t\$relevantSNs{$thisNum}: $relevantSNs{$thisNum}";
$SNsInCV{$thisRef} .= "$thisNum√" unless ($SNsInCV{$thisRef} =~ /\b$thisNum\b/);
}
#say LOG ">\t\$thisNum: $thisNum, \$SNsInCV{$thisRef}: $SNsInCV{$thisRef}";
}
}
}
close IN;
}
my %temp;
foreach my $oldRef (sort keys %SNsInCV) {
if (exists $newRef{$oldRef}) {
$temp{$newRef{$oldRef}} = $SNsInCV{$oldRef};
delete $SNsInCV{$oldRef};
}
}
foreach my $changedRef (sort keys %temp) {
$SNsInCV{$changedRef} = $temp{$changedRef};
say LOG "\$SNsInCV{$changedRef}: $SNsInCV{$changedRef}";
}
}
sub LinkSNsToULBtextViaEntries {
say "Linking Strong's numbers to ULB text via tW page entries";
say LOG "sub LinkSNsToULBtextViaEntries called";
foreach my $thisRef (sort keys %ref) {
say LOG "\nLinkSNsToULBtextViaEntries: \$thisRef: $thisRef\t\$ref{$thisRef}: $ref{$thisRef}, \$specifiedText{$ref{$thisRef}}: $specifiedText{$ref{$thisRef}}";
(%workEntries, %ulbOrder) = ();
my %workPage;
my ($thisCV, $checkList, $tempString, $outString) = ($ref{$thisRef}, "", "", "");
say OUT "$thisCV:";
my (@allEntries);
$listOfPages{$thisCV} = "";
#say LOG "\$SNsInCV{$thisCV}: |$SNsInCV{$thisCV}|\n$text{$thisCV}";
#say LOG "\t\$SNsInCV{$thisCV}: >$SNsInCV{$thisCV}<\n\$text{$thisCV}: $text{$thisCV}";
$SNsInCV{$thisCV} =~ s/√+$//;
$SNsInCV{$thisCV} =~ s/^ +//;
$SNsInCV{$thisCV} =~ s/ +$//;
$SNsInCV{$thisCV} =~ s/ {2,}/ /g;
say LOG "*\t\$SNsInCV{$thisCV}: >$SNsInCV{$thisCV}<";
if (exists $specifiedText{$thisCV}) {
#say LOG "*\t\$SNsInCV{$thisCV}: $SNsInCV{$thisCV}";
$SNsInCV{$thisCV} = Adjust($SNsInCV{$thisCV}, $thisCV);
$SNsInCV{$thisCV} =~ s/^ +(.*)/$1/;
$SNsInCV{$thisCV} =~ s/(.*) +$/$1/;
$SNsInCV{$thisCV} =~ s/ {2,}/ /g;
$SNsInCV{$thisCV} =~ s/^√+//;
$SNsInCV{$thisCV} =~ s/√{2,}/√/g;
$SNsInCV{$thisCV} =~ s/√ /√/g;
$SNsInCV{$thisCV} =~ s/√$//;
say LOG "**\t\$SNsInCV{$thisCV}: >$SNsInCV{$thisCV}<";
}
#say LOG "<>\t<>\t\$SNsInCV{$thisCV}: |$SNsInCV{$thisCV}|";
say LOG "*7*\t\$SNsInCV{$thisCV}:$SNsInCV{$thisCV}";
#while ($SNsInCV{$thisCV} =~ s/(\[[^\]]*?) ([^\]]*?\] )/$1√$2/) {};
say LOG "*8*\t\$SNsInCV{$thisCV}:$SNsInCV{$thisCV}";
my @tempArray = split /√/, $SNsInCV{$thisCV};
my %alreadyUsed;
my @regArray;
foreach my $slice (@tempArray) {
if ($slice =~ /^(\d*)/) {
my $number = "$1";
push (@regArray, $slice) unless (exists $alreadyUsed{$number});
$alreadyUsed{$number} = $number
}
}
$" = "|\n";
say LOG "*9*\t\$SNsInCV{$thisCV}:$SNsInCV{$thisCV}\n\@regArray: >@regArray<";
foreach my $thisNum (@regArray) {
say LOG "\$thisNum: $thisNum";
my ($found, $specPage);
if ($thisNum =~ /(\d+)\[(.*?)\]/) {
($thisNum) = ($1);
my ($forced_entry_for_display, $forced_entry_for_page, $forced_entry_for_search) = ($2, $2, $2);
unless (exists $pageThisEntry{$forced_entry_for_page}) {
my $try = lc $forced_entry_for_page;
if (exists $pageThisEntry{$try}) {
$forced_entry_for_page = lc $forced_entry_for_page
}
else {
say "!!!\tNo \$pageThisEntry{$try} found for $try\n\$thisNum: $thisNum\t\$forced_entry_for_page: $forced_entry_for_page\n$ref{$thisRef}\t$thisNum\t$forced_entry_for_page";
#die
}
}
say LOG
"*0* \$forced_entry_for_display: $forced_entry_for_display, \$pageThisEntry{$forced_entry_for_page}: $pageThisEntry{$forced_entry_for_page}";
#while ($forced_entry_for_display =~ s/([^ \]]*?)√([^ \]]*?)/$1 $2/) {}
$forced_entry_for_search = lc $forced_entry_for_display;
#while ($forced_entry_for_search =~ s/ ... /\.\*\?/) {}
say LOG
"*A* \$thisNum: >>$thisNum<< \$forced_entry_for_display: >$forced_entry_for_display<,
\$forced_entry_for_search: >>$forced_entry_for_search<<
\$pageThisEntry{$forced_entry_for_page}: $pageThisEntry{$forced_entry_for_page}";
$outString .= "[$forced_entry_for_display]($pageThisEntry{$forced_entry_for_page})\n";
say LOG "\t\t$outString:\n$outString";
if ($forced_entry_for_search =~ /([^,]*) \.\.\. ([^,]*) \.\.\. ([^,]*)/) {
my ($first, $second, $third) = ($1, $2, $3);
$text{$thisCV} =~ s/$first(.*?)$second(.*?)$third/$1 $2/i;
say LOG "\t*\t$text{$thisCV}";
} elsif ($forced_entry_for_search =~ /([^,]*) \.\.\. ([^,]*)/) {
my ($first, $second) = ($1, $2);
$text{$thisCV} =~ s/$first(.*?)$second/$1/i;
say LOG "\t**\t$text{$thisCV}";
} else {
#say "\$text{$thisCV}:\n$text{$thisCV}\n\$forced_entry_for_search: |$forced_entry_for_search|";
$text{$thisCV} =~ s/$forced_entry_for_search//i;
say LOG "\t***\t$text{$thisCV}";
}
next;
} elsif ($thisNum =~ /\d+(\(([\w\/]+)\))/) {
$specPage .= $2;
say LOG "*B*\t\$specPage: $specPage";
} else {
say LOG "*C*\t\$thisNum: $thisNum";
}
say LOG "\t\$specPage: $specPage";
if ($specPage) {
$workEntries{$thisNum} = $entriesThisPage{$specPage};
} else {
$workEntries{$thisNum} = $entriesThisSN{$thisNum};
}
$workEntries{$thisNum} =~ s/, $//;
say LOG "*D*\t\$thisNum: >$thisNum<\t\$workEntries{$thisNum}: >$workEntries{$thisNum}<";
my @beforeArray = split /, /, $workEntries{$thisNum};
my @sortedArray = reverse sort { substr($a,0,1) <=> substr($b,0,1)
|| length($a) <=> length($b)
|| $a <=> $b }
@beforeArray;
$" = "\n\t";
say LOG "*E*\t\@sortedArray: @sortedArray\n\$outString: $outString\n\$text{$ref{$thisRef}}: $text{$ref{$thisRef}}";
foreach my $entry (@sortedArray) {
my $testEntry = $entry;
#print LOG "\$entry: $entry. Becomes ";
while ($testEntry =~ s/^(.*) \.\.\. (.*)/($1)\\b(.*?)\\b($2)/) {}
#print LOG "\$testEntry: |$testEntry| ";
if ($testEntry =~ /\(\.\*\?\)/ && $text{$thisCV} =~ s/\b($testEntry)\b/$3/i) {
say LOG "\n===\n$thisNum |$testEntry| is found in first test.\n===";
$outString .= "[$entry]($pageThisEntry{$entry})\n";
say LOG $outString . "\n===" . $text{$thisCV};
$found = 1;
goto Breakout;
} elsif ($text{$thisCV} =~ s/\b($testEntry)[^\w']//i) {
say LOG "\n===\n$thisNum |$testEntry| is found in second test.\n===";
$outString .= "[$entry]($pageThisEntry{$entry})\n";
say LOG $outString . "\n" . $text{$thisCV};
$found = 1;
goto Breakout;
} elsif ($text{$thisCV} =~ s/\b($testEntry)\b//i) {
say LOG "\n===\n$thisNum |$testEntry| is found in third test.\n===";
$outString .= "[$entry]($pageThisEntry{$entry})\n";
say LOG $outString . "\n" . $text{$thisCV};
$found = 1;
goto Breakout;
} else {
say LOG "\$testEntry «$testEntry» is not found in\n$text{$thisCV}}";
}
}
Breakout:
unless ($found) {
say MISSING "$thisCV $thisNum";
say LOG "\$thisCV: $thisCV\t\$thisNum: $thisNum"
}
next if $found;
}
say LOG "\t\$outString:\n$outString";
$outString = ProperOrderOutString($outString, $thisCV);
say LOG "<>\t\$outString: $outString";
say OUT "$outString";
#say LOG "sub LinkSNsToULBtextViaEntries finished";
}
}
sub Adjust {
my ($snsOld, $ref, $snsNew, $addToSnsNew) = ($_[0], $_[1], "", "");
say LOG "\$specifiedText{$ref}: $specifiedText{$ref}\n\$snsOld: |$snsOld|";
my (%tempEntries);
#say LOG ">\t\$sns: |$sns|";
#say LOG ">\t\$specifiedText{$ref}: |$specifiedText{$ref}|";
$snsOld =~ s/^[√ ]+/ /;
my @oldArray = split / /, $snsOld;
#say LOG "\$adjust{$ref}: $adjust{$ref}";
my @preadjustments = split /, /, $adjust{$ref};
foreach my $adjustment (@preadjustments) {
say LOG "<><>\t\$adjustment: >$adjustment<";
if ($adjustment =~ /([^\t]*)\t\|\|$/) { # delete this from list to look for
my $found = $1;
$snsOld =~ s/\b$found\b ?//;
say LOG "*1*\t\$found: $found should be deleted from \$snsOld: $snsOld";
} elsif ($adjustment =~ /^(\d+)\t(\d+)$/) { # change to specified number
my ($found1, $found2) = ($1, $2);
#$addToSnsNew .= "$1\[$2\] ";
$snsOld =~ s/$found1√//g;
$snsNew .= "$found2√" unless $snsNew =~ /\b$found2\b /;
say LOG "*2*\t\$snsOld: $snsOld\n\$snsNew: $snsNew";
} elsif ($adjustment =~ /^\|\|\t(.*)/) { # add this to list to look for
my $adj = $1;
say LOG "\$adjustment: $adjustment, \$adj: $adj";
if ($adj =~ /([^\t]*)\t([^\t]*)/) {
$snsNew = "[$1]($pageThisEntry{$2})"
} else {
$snsNew .= "$adj "
}
say LOG "*4*\t\$snsOld: $snsOld\n\$snsNew: $snsNew";
} elsif ($adjustment =~ /^(\d+)\t([\w\. \-\']+)$/) { # add specified word
my ($found1, $found2) = ($1, $2);
#$addToSnsNew .= "$1\[$2\] ";
if ($snsOld =~ s/\b$found1\b/$found1\[$found2\]/) {}
else {$snsNew =~ s/\b$found1\b/$found1\[$found2\]/}
$snsOld =~ s/ {2,}/ /;
say LOG "*4*\t\$snsOld: $snsOld\n\$snsNew: $snsNew";
} elsif (($adjustment =~ /^(\d+)\t([\/\d\w]+)$/)) { # add specified page
#$addToSnsNew .= "$1\{$2\} "
my ($found1, $found2) = ($1, $2);
$snsOld =~ s/$found1/$found1\($found2\)/;
say LOG "*5*\t\$snsOld: $snsOld";
}
}
say LOG "*5A*\t\$snsNew: >$snsNew<\n\$snsOld+\$snsNew: >$snsOld< >$snsNew<";
$snsNew =~ s/ +$//;
#$snsNew = "$snsOld $snsNew";
$snsNew = "$snsOld√$snsNew";
while ($snsNew =~ s/(\d+) (\d+\[[^\[\]]*\])/$2 $1/) {}
say LOG "*5B*\t\$snsNew: $snsNew";
return $snsNew;
}
sub Output {
say "Outputting";
#say LOG "Output subRoutine called";
foreach my $key (sort keys %ref) {
my %donePages;
my $thisRef = $ref{$key};
#print LOG "\$key: $key\t\$thisRef: $thisRef\t";
#say LOG "\$SNsInCV{$thisRef}: |$SNsInCV{$thisRef}|";
$SNsInCV{$thisRef} =~ s/^ +//;
$SNsInCV{$thisRef} =~ s/ +$//;
$SNsInCV{$thisRef} =~ s/ {2,}/ /;
#say LOG "\$SNsInCV{$thisRef}: |$SNsInCV{$thisRef}|";
#say LOG "\$listOfPages{$thisRef}: |$listOfPages{$thisRef}|";
#$listOfPages{$thisRef} =~ s/^ +//;
#$listOfPages{$thisRef} =~ s/ +$//;
#$listOfPages{$thisRef} =~ s/ {2,}/ /;
#say LOG "\$listOfPages{$thisRef}: |$listOfPages{$thisRef}|";
my @array = split /\n/, $listOfPages{$thisRef};
#say LOG "\@array: |@array|";
my @sorted =
sort sort { lc($a) cmp lc($b) }
@array;
#say LOG "\@sorted: |@sorted|";
$" = "\n";
$listOfPages{$thisRef} = "@sorted";
say LOG "\$listOfPages{$thisRef}: $listOfPages{$thisRef}\n\$checkPages{$thisRef}: $checkPages{$thisRef}";
#say OUT "$thisRef: $listOfPages{$thisRef}\n";
$checkPages{$thisRef} =~ s/^ +//;
$checkPages{$thisRef} =~ s/ +$//;
$checkPages{$thisRef} =~ s/ {2,}/ /;
$checkPages{$thisRef} =~ s/ \|\|//;
say LOG "\$checkPages{$thisRef}:\t|$checkPages{$thisRef}|";
my @checkArray = split / /, $checkPages{$thisRef};
shift @sorted;
#say LOG "\@checkArray: |@checkArray|";
#say LOG "\@sorted: |@sorted|";
#shift @sorted;
#say LOG "\@sorted: |@sorted|";
foreach my $slice (@sorted) {
#print LOG "\$slice: $slice\t";
$slice =~ s/\[.*?\]\((.*?)\)/$1/;
#say LOG "\t\$slice: $slice";
$donePages{$slice} = $slice;
#say LOG "\t\$donePages{$slice}: $donePages{$slice}"
}
#say LOG "\@checkArray: |@checkArray|";
foreach my $slice (@checkArray) {
#say LOG "\$slice: $slice";
unless (exists $donePages{$slice}) {
say LOG "\$thisRef: $thisRef\t\$slice:$slice";
#say MISSING "$thisRef\t$slice\t||";
say MISSING "$thisRef\t$slice";
}
}
}
}
sub Substitute {
foreach my $key (sort keys %pages) {
say LOG "\$key: $key\t\$SNsInCV{$key}: $SNsInCV{$key}";
if (exists $substitutedPages{$key}) {
#say LOG "\$substitutedPages{$key}: $substitutedPages{$key}";
$substitutedPages{$key} =~ s/, $//;
my @array = split /, /, $substitutedPages{$key};
foreach my $slice (@array) {
#say LOG "\$slice: $slice";
if ($slice =~ /([^\t]*)\t([^\t]*)/) {
#say LOG "\n\$key: $key";
my ($old, $new) = ($1, $2);
#say LOG "\$old: >$old<\t\$new: >$new<";
#say LOG "\$SNsInCV{$key}: >>$SNsInCV{$key}<<";
if ($old eq "||") {
$SNsInCV{$key} .= "$new√";
$checkPages{$key} = $SNsInCV{$key};
}
elsif ($new eq "||") {
$SNsInCV{$key} =~ s/$old //;
$checkPages{$key} = $SNsInCV{$key};
}
else {
$SNsInCV{$key} =~ s/$old/$new/;
$checkPages{$key} = $SNsInCV{$key};
}
$SNsInCV{$key} =~ s/ \|\|//g;
#say LOG "\$SNsInCV{$key}: >>>$SNsInCV{$key}<<<";
}
}
} else {
$checkPages{$key} = $SNsInCV{$key};
}
say LOG "<>\t\$key: $key\t\$SNsInCV{$key}: $SNsInCV{$key}";
}
}
sub ProperOrderOutString {
say LOG "\$_[0]: $_[0]\n\$_[1]: |$_[1]|";
my @unordered = split /\n/, $_[0];
my ($thisCV, $outS) = ($_[1], "");
my (%orderedSet);
foreach my $thisSet (@unordered) {
say LOG "\t>A>\t\$thisSet: $thisSet";
if ($thisSet =~ /(\[([^\]]*)\])(\([^\)]*\))/) {
my ($ulb, $fileLoc) = ($2, $3);
$ulb =~ s/ \.\.\. /.*?/g;
say LOG "\t>B>\t\$ulb: $ulb, \$fileLoc: $fileLoc, \$fullText{$thisCV}\n$fullText{$thisCV}";
if ($ulb =~ /^(.*)\.\*\?(.*)$/) {
# if ($ulb =~ /^(.*)\.\.\.(.*)$/) {
say LOG "\t>C>\tThere is an ellipsis in \$ulb: $ulb.";
my ($found1, $found2, $blank1, $blank2) = ($1, $2, "", "");
while (length $blank1 < length $found1) {$blank1 .= " "}
while (length $blank2 < length $found2) {$blank2 .= " "}
#say LOG "\t\t\$found1: $found1\t\$found2: $found2";
say LOG "Looking for\ns/^(.*?)$found1(.*?)$found2(.*)\$\nin\n$fullText{$thisCV}";
if ($fullText{$thisCV} =~ s/^(.*?)\b$found1\b(.*?)\b$found2\b(.*)$/$1$blank1$2$blank2$3/i) {
say LOG "\t>C1>\t$fullText{$thisCV}";
my ($order) = (length $1);
$orderedSet{$order} = $thisSet;
say LOG "\t\t\t\$order: $order\t \$orderedSet{$order}: $orderedSet{$order}"
}
} else {
say LOG "\t>D>\tThere is no ellipsis in \$ulb: $ulb.";
my $blank = "";
while (length $blank < length $ulb) {$blank .= " "}
if ($fullText{$thisCV} =~ s/^(.*?)\b$ulb([^\w'](.*))?$/$1$blank$2/i) {
say LOG "\t>D1>\t$fullText{$thisCV}";
my ($order) = (length $1);
$orderedSet{$order} = $thisSet;
say LOG ">D2>\t\$order: $order\t \$orderedSet{$order}: $orderedSet{$order}"
} elsif ($fullText{$thisCV} =~ s/^(.*?)\b$ulb(\b(.*))?$/$1$blank$2/i) {
say LOG "\t>D3>\t$fullText{$thisCV}";
my ($order) = (length $1);
$orderedSet{$order} = $thisSet;
say LOG ">D4>\t\$order: $order\t \$orderedSet{$order}: $orderedSet{$order}"
}
}
}
}
foreach my $key (sort {$a<=>$b} keys %orderedSet) {
$outS .= "$orderedSet{$key}\n"
}
#say LOG "\$outS: $outS";
return $outS;
}
__DATA__
gen Genesis
exo Exodus
lev Leviticus
num Numbers
deu Deuteronomy
jos Joshua
jdg Judges
rut Ruth
1sa 1 Samuel
2sa 2 Samuel
1ki 1 Kings
2ki 2 Kings
1ch 1 Chronicles
2ch 2 Chronicles
ezr Ezra
neh Nehemiah
est Esther
job Job
psa Psalms
pro Proverbs
ecc Ecclesiastes
sng Song of Solomon
isa Isaiah
jer Jeremiah
lam Lamentations
ezk Ezekiel
dan Daniel
hos Hosea
jol Joel
amo Amos
oba Obadiah
jon Jonah
mic Micah
nam Nahum
hab Habakkuk
zep Zephaniah
hag Haggai
zec Zechariah
mal Malachi