Tips_and_Hacks/MAST_tW_PDF_Updater/FilesForUpdates/tWs.from.MAST_NT.pl

663 lines
22 KiB
Perl

# Produces list of tWs for each verse by linking MAST_NT to ULB through tWs.
# Taken from tWs.from.MAST_NT.2.pl.
# The output from this script is useful for the interleaved PDFs used in MAST.
# This version uses an exception file to handle places where the MAST_NT points to
# a tW page different from that on which the ULB term appears.
# Make sure the correct input file is $ULBfile. Run script.
# Output is in $output file.
# Check the $missing
# file for needed corrections, probably lines needing to be added to the
# $exceptions file.
use 5.12.0;
use File::Slurp;
use File::Find ;
use Cwd ;
use FindBin '$Bin';
use utf8;
#use open IN => ":utf8", OUT => ":utf8";
use open IO => ":utf8";
use List::MoreUtils qw(uniq);
$|=1;
my ($pwd, $d) = ($Bin, "/");
if ($^O eq "MSWin32") {
$d = "\\";
$pwd =~ s/\//\\/g;
}
my ($udf) = "User_defaults.windows.txt";
if ($^O eq "linux") {$udf = "User_defaults.linux.txt"}
#elsif ($^O eq "darwin") {$udf = "User_defaults.mac.txt"}
elsif ($^O eq "darwin") {$udf = "User_defaults.mac.txt"}
open (my $defaults, "<:utf8", "$pwd${d}User${d}$udf") or die "$pwd${d}User${d}$udf:\n$!";
my ($cv, $ULBfile, $exceptions, $missing, $output, $workFile) =
("00000",
"$Bin${d}Temp${d}Extract.txt",
"$Bin${d}Exceptions${d}Exceptions_tWs_from_MAST_NT.txt",
"$Bin${d}Output${d}Entries_not_handled.txt",
"$Bin${d}Output${d}tWs_for_PDF.txt",
"$Bin${d}User${d}tW_work_NT.txt",
);
my ($MAST_NTfile, $topTwDir, $textEditor, $repoPath, $topSourceLangDir);
my (%entries, %text, %ref, %order, %pages, %listOfPages, %sourcePage, %checkPages, %foundPages, %substitutedPages,
%specifiedText, %realPage, %checkPage, %workEntries, %vsn, %StrongNum, %fullText, %ulbOrder, %bkAbr, %bkFull, %relevantSNs,
%SNsInCV, %entriesThisSN, %entriesThisPage, %pagesThisSN, %pageThisEntry, %adjust, %newRef);
my $book;
my (@MAST_NTfileList);
# ==============================
chdir("$pwd");
open LOG, ">:utf8", "Logs${d}Exc_log.log" or die "\$log: Logs${d}Exc_log.log: $!";
open OUT, ">:utf8", $output or die "$!";
open MISSING, ">$missing" or die "$!";
while (<DATA>) {
chomp;
if (/([^\t]*)\t([^\t]*)/) {
$bkAbr{$2} = $1;
$bkFull{$1} = $2;
}
}
GetUserDefaults();
GetULBBooksToProcess();
ReadExceptions();
close LOG;
open LOG, ">:utf8", "Logs${d}tW_pairs.log" or die "Logs${d}tW_pairs.log: $!";
PairtWEntriesTotWPageAndUniqSNs();
close LOG;
open LOG, ">:utf8", "Logs${d}tWs_from_MAST.log" or die "Logs${d}tWs_from_MAST.log: $!";
ReadLinkedSNs();
LinkULBtoCV();
LinkSNsToULBtextViaEntries();
#Output();
close MISSING;
close OUT;
close LOG;
if ($^O eq "darwin") {system ("$textEditor $missing")}
print "\n\tDone.\n\n";
# ==============================
sub GetUserDefaults {
open (my $defaults, "<:utf8", "User${d}$udf") or die "User${d}$udf:\n$!";
while (my $thisLine = <$defaults>) {
chomp $thisLine;
if ($thisLine =~ /^Text editor: (.*)$/) {
$textEditor = $1;
if ($^O eq "darwin") {
$textEditor = "open -a $textEditor"
}
} elsif ($thisLine =~ /^Repository directory: (.*)$/) {
$repoPath = $1;
#say $repoPath; die;
}
}
#say LOG "\$textEditor: $textEditor\n\$repoPath: $repoPath";
die "No text editor found" if $textEditor eq "";
die "No path to repo found" if $repoPath eq "";
($topTwDir, $topSourceLangDir) = ("$repoPath${d}en_tw${d}bible", "$repoPath${d}MAST_NT");
close $defaults;
}
sub GetULBBooksToProcess {
say LOG "GetULBBooksToProcess on \$workFile: $workFile";
open (my $file, "<:utf8", "$workFile") or die "$workFile:\n$!";
while (my $line = <$file>) {
chomp $line;
#say LOG "\t$line";
if ($line =~ /^([^#][^\t]*)\t[^\t]*\t([^\t]*)\t[^\t]*$/) {
my ($this_bk) = $1 . "-" . uc $2;
$MAST_NTfile = "$topSourceLangDir${d}$this_bk.xml";
push @MAST_NTfileList, $MAST_NTfile;
}
}
close $file;
#say LOG "\@MAST_NTfileList: @MAST_NTfileList";
}
sub ReadExceptions {
say "Reading exceptions";
say LOG "ReadExceptions from \$exceptions: $exceptions";
open (my $file, "<:utf8", "$exceptions") or die "$exceptions:\n$!";
while (my $line = <$file>) {
chomp $line;
my $rf;
if ($line =~ /^([^#\n][^\t\n]*)\t([^\t\n]*\t[^\t\n]*)/) {
my ($oldNew) = ($2);
$rf = $1;
#say LOG "\$line: $line, \$rf: $rf, \$oldNew: $oldNew";
($adjust{$rf}) .= "$oldNew√";
$specifiedText{$rf} = 1;
}
}
foreach my $key (sort keys %adjust) {
say LOG "\$key: $key: \$adjust{$key}: $adjust{$key}"
}
close $file;
}
sub PairtWEntriesTotWPageAndUniqSNs {
say "Pairing tW entries with tW pages and unique Strong's numbers";
my (@filesToRun, @relevantSNs) = ();
my $filePattern = '*.md' ;
find (sub {push @filesToRun, $File::Find::name if (m/^(.*)$filePattern$/)}, $topTwDir) ;
@filesToRun = sort @filesToRun;
#say LOG "\@filesToRun: @filesToRun";
foreach my $file (@filesToRun) {
print ".";
$file =~ s/\//\\/g unless $^O eq "linux" || $^O eq "darwin";
my ($thisList, $shortFile) = ("", $file);
$shortFile =~ s/^\Q$topTwDir${d}\E//;
$shortFile =~ s/\.md$//;
$shortFile =~ s/\Q$d\E/,/;
#say "|$shortFile|"; die;
#if ($shortFile =~ /^(kt|names)/) {
#my $fileText = read_file("$file", binmode => 'utf8');
open IN, $file or die "$!";
while (<IN>) {
if (/^# ([^\n]*)$/) {
$thisList = $1;
$thisList =~ s/[\r\n]*$//;
#say LOG "\$thisList = |$thisList|";
$thisList =~ s/ \([^\)]*\)//g;
$entriesThisPage{$shortFile} = $thisList;
my @ULBEntries = split /, /, $thisList;
foreach my $ULB_entry(@ULBEntries) {
$pageThisEntry{$ULB_entry} = $shortFile;
say LOG "\$pageThisEntry{$ULB_entry}: $pageThisEntry{$ULB_entry}"
}
my @tempArray = split /, /, $thisList;
foreach my $slice (@tempArray) {
$sourcePage{$slice} = $shortFile;
say LOG "$slice: \$sourcePage{$slice}: $sourcePage{$slice}";
}
}
if (/Strong's(.*)$/) {
my $SNs = $1;
while ($SNs =~ s/[G](\d*)//) {
push @relevantSNs, $1;
$entriesThisSN{$1} .= "$thisList, ";
$pagesThisSN{$1} .= "$shortFile, ";
}
}
@relevantSNs = uniq(@relevantSNs);
foreach (@relevantSNs) {
$relevantSNs{$_} = "$_";
}
}
close IN;
}
say "";
#say LOG "====";
#say LOG "====";
foreach my $thisSN (sort keys %entriesThisSN) {
say LOG "tW entries for $thisSN: $entriesThisSN{$thisSN}"
}
}
sub LinkULBtoCV {
say "Linking ULB to chapter and verse";
say LOG "LinkULBtoCV, \$ULBfile: $ULBfile";
open IN, "$ULBfile" or die "$ULBfile: $!";
while (<IN>) {
if (/^([^\t]*)\t(.*)$/) {
$cv ++;
($text{$1}, $fullText{$1}) = ($2, $2);
$ref{$cv} = $1;
$order{$1} = $cv;
}
#say "First \$ref{$cv}: $ref{$cv}\nGetULBBooksToProcess\$text{$ref{$cv}}: $text{$ref{$cv}}";
}
close IN;
#foreach my $key (sort keys %ref) {
# say LOG "\$key: $key:, \$ref{$key}: $ref{$key}, \$text{$ref{$key}}: $text{$ref{$key}}"
#}
}
sub ReadLinkedSNs {
say "Reading linked Strong's numbers";
my ($flag, $MAST_NTfile) = ("","");
foreach $MAST_NTfile (@MAST_NTfileList) {
say LOG "opening \$MAST_NTfile: $MAST_NTfile";
open IN, "$MAST_NTfile" or die "$MAST_NTfile can't be opened\n\n";
my ($thisBook, $thisChap, $thisVers, $thisRef);
my (@pages);
while (<IN>) {
chomp;
if (/<verse osisID="([^\.]*).(\d+).(\d+)">/) {
my ($bk, $ch, $vs) = ($1, $2, $3);
#say LOG "#>\t$bk $ch:$vs, \$bkFull{$bk}: $bkFull{$bk}";
if (exists $newRef{"$bk $ch:$vs"}) {
$thisRef = $newRef{"$bk $ch:$vs"}
} else {
($thisRef) = ("$bkFull{$bk} $ch:$vs");
}
#say LOG "##\t$bk $ch:$vs, $thisRef";
}
else {
while (/<w lemma="(\d+)"/g) {
#say LOG $_;
my ($thisNum) = ($1);
#say LOG "\t\$thisNum: $thisNum, \$SNsInCV{$thisRef}: $SNsInCV{$thisRef}";
if (exists $relevantSNs{$thisNum}) {
$SNsInCV{$thisRef} .= "$thisNum√" unless ($SNsInCV{$thisRef} =~ /\b$thisNum\b/);
}
#say LOG ">\t\$thisNum: $thisNum, \$SNsInCV{$thisRef}: $SNsInCV{$thisRef}";
}
}
}
close IN;
}
my %temp;
foreach my $oldRef (sort keys %SNsInCV) {
if (exists $newRef{$oldRef}) {
$temp{$newRef{$oldRef}} = $SNsInCV{$oldRef};
delete $SNsInCV{$oldRef};
}
}
foreach my $changedRef (sort keys %temp) {
$SNsInCV{$changedRef} = $temp{$changedRef};
say LOG "\$SNsInCV{$changedRef}: $SNsInCV{$changedRef}";
}
}
sub LinkSNsToULBtextViaEntries {
say "Linking Strong's numbers to ULB text via tW page entries";
say LOG "sub LinkSNsToULBtextViaEntries called";
foreach my $thisRef (sort keys %ref) {
say LOG "\nLinkSNsToULBtextViaEntries: \$thisRef: $thisRef\t\$ref{$thisRef}: $ref{$thisRef}, \$specifiedText{$ref{$thisRef}}: $specifiedText{$ref{$thisRef}}";
(%workEntries, %ulbOrder) = ();
my %workPage;
my ($thisCV, $checkList, $tempString, $outString) = ($ref{$thisRef}, "", "", "");
say OUT "$thisCV:";
my (@allEntries);
$listOfPages{$thisCV} = "";
#say LOG "\$SNsInCV{$thisCV}: |$SNsInCV{$thisCV}|\n$text{$thisCV}";
say LOG "\t\$SNsInCV{$thisCV}: >$SNsInCV{$thisCV}<";
$SNsInCV{$thisCV} =~ s/√+$//;
$SNsInCV{$thisCV} =~ s/^ +//;
$SNsInCV{$thisCV} =~ s/ +$//;
$SNsInCV{$thisCV} =~ s/ {2,}/ /g;
#say LOG "*\t\$SNsInCV{$thisCV}: >$SNsInCV{$thisCV}<";
if (exists $specifiedText{$thisCV}) {
#say LOG "*\t\$SNsInCV{$thisCV}: $SNsInCV{$thisCV}";
$SNsInCV{$thisCV} = Adjust($SNsInCV{$thisCV}, $thisCV);
$SNsInCV{$thisCV} =~ s/^ +(.*)/$1/;
$SNsInCV{$thisCV} =~ s/(.*) +$/$1/;
$SNsInCV{$thisCV} =~ s/ {2,}/ /g;
$SNsInCV{$thisCV} =~ s/^√+//;
$SNsInCV{$thisCV} =~ s/√{2,}/√/g;
$SNsInCV{$thisCV} =~ s/√ /√/g;
$SNsInCV{$thisCV} =~ s/√$//;
say LOG "**\t\$SNsInCV{$thisCV}: >$SNsInCV{$thisCV}<";
}
#say LOG "<>\t<>\t\$SNsInCV{$thisCV}: |$SNsInCV{$thisCV}|";
say LOG "*7*\t\$SNsInCV{$thisCV}:$SNsInCV{$thisCV}";
# while ($SNsInCV{$thisCV} =~ s/(\[[^\]]*?) ([^\]]*?\] )/$1√$2/) {};
say LOG "*8*\t\$SNsInCV{$thisCV}:$SNsInCV{$thisCV}";
my @tempArray = split /√/, $SNsInCV{$thisCV};
my %alreadyUsed;
my @regArray;
foreach my $slice (@tempArray) {
if ($slice =~ /^(\d*)/) {
my $number = "$1";
push (@regArray, $slice) unless (exists $alreadyUsed{$number});
$alreadyUsed{$number} = $number
}
}
$" = "|\n";
say LOG "*9*\t\$SNsInCV{$thisCV}:$SNsInCV{$thisCV}\n\@regArray: >@regArray<";
foreach my $thisNum (@regArray) {
say LOG "\$thisNum: $thisNum";
my ($found, $specPage);
if ($thisNum =~ /(\d+)\[(.*?)\]/) {
($thisNum) = ($1);
my ($forced_entry_for_display, $forced_entry_for_page, $forced_entry_for_search) = ($2, $2, $2);
unless (exists $pageThisEntry{$forced_entry_for_page}) {
my $try = lc $forced_entry_for_page;
if (exists $pageThisEntry{$try}) {
$forced_entry_for_page = lc $forced_entry_for_page
}
else {
say "\$thisNum: $thisNum\t\$forced_entry_for_page: $forced_entry_for_page";
die
}
}
say LOG
"*0* \$forced_entry_for_display: $forced_entry_for_display, \$pageThisEntry{$forced_entry_for_page}: $pageThisEntry{$forced_entry_for_page}";
#while ($forced_entry_for_display =~ s/([^ \]]*?)√([^ \]]*?)/$1 $2/) {}
$forced_entry_for_search = lc $forced_entry_for_display;
#while ($forced_entry_for_search =~ s/ ... /\.\*\?/) {}
say LOG
"*A* \$thisNum: >>$thisNum<< \$forced_entry_for_display: >$forced_entry_for_display<,
\$forced_entry_for_search: >>$forced_entry_for_search<<
\$pageThisEntry{$forced_entry_for_page}: $pageThisEntry{$forced_entry_for_page}";
$outString .= "[$forced_entry_for_display]($pageThisEntry{$forced_entry_for_page})\n";
say LOG "\t\t$outString:\n$outString";
if ($forced_entry_for_search =~ /([^,]*) \.\.\. ([^,]*) \.\.\. ([^,]*)/) {
my ($first, $second, $third) = ($1, $2, $3);
$text{$thisCV} =~ s/$first(.*?)$second(.*?)$third/$1 $2/i;
say LOG "\t*\t$text{$thisCV}";
} elsif ($forced_entry_for_search =~ /([^,]*) \.\.\. ([^,]*)/) {
my ($first, $second) = ($1, $2);
$text{$thisCV} =~ s/$first(.*?)$second/$1/i;
say LOG "\t**\t$text{$thisCV}";
} else {
#say "\$text{$thisCV}:\n$text{$thisCV}\n\$forced_entry_for_search: |$forced_entry_for_search|";
$text{$thisCV} =~ s/$forced_entry_for_search//i;
say LOG "\t***\t$text{$thisCV}";
}
next;
} elsif ($thisNum =~ /\d+(\(([\w\/]+)\))/) {
$specPage .= $2;
say LOG "*B*\t\$specPage: $specPage";
} else {
say LOG "*C*\t\$thisNum: $thisNum";
}
say LOG "\t\$specPage: $specPage";
if ($specPage) {
$workEntries{$thisNum} = $entriesThisPage{$specPage};
} else {
$workEntries{$thisNum} = $entriesThisSN{$thisNum};
}
$workEntries{$thisNum} =~ s/, $//;
say LOG "*D*\t\$thisNum: >$thisNum<\t\$workEntries{$thisNum}: >$workEntries{$thisNum}<";
my @beforeArray = split /, /, $workEntries{$thisNum};
my @sortedArray = reverse sort { substr($a,0,1) <=> substr($b,0,1)
|| length($a) <=> length($b)
|| $a <=> $b }
@beforeArray;
$" = "\n\t";
say LOG "*E*\t\@sortedArray: @sortedArray\n\$outString: $outString\n\$text{$ref{$thisRef}}: $text{$ref{$thisRef}}";
foreach my $entry (@sortedArray) {
my $testEntry = $entry;
#print LOG "\$entry: $entry. Becomes ";
while ($testEntry =~ s/^(.*) \.\.\. (.*)/($1)\\b(.*?)\\b($2)/) {}
#print LOG "\$testEntry: |$testEntry| ";
if ($testEntry =~ /\(\.\*\?\)/ && $text{$thisCV} =~ s/\b($testEntry)\b/$3/i) {
say LOG "\n===\n$thisNum |$testEntry| is found in first test.\n===";
$outString .= "[$entry]($pageThisEntry{$entry})\n";
say LOG $outString . "\n===" . $text{$thisCV};
$found = 1;
goto Breakout;
} elsif ($text{$thisCV} =~ s/\b($testEntry)[^\w']//i) {
say LOG "\n===\n$thisNum |$testEntry| is found in second test.\n===";
$outString .= "[$entry]($pageThisEntry{$entry})\n";
say LOG $outString . "\n" . $text{$thisCV};
$found = 1;
goto Breakout;
} else {
#say LOG "and is not found in\n$text{$ref{$thisRef}}";
}
}
Breakout:
unless ($found) {
say MISSING "$thisCV $thisNum";
say LOG "\$thisCV: $thisCV\t\$thisNum: $thisNum"
}
next if $found;
}
say LOG "*F*\t\$outString: $outString";
$outString = ProperOrderOutString($outString, $thisCV);
say LOG "Final \$outString:\n\$outString: $outString";
say OUT "$outString";
#say LOG "sub LinkSNsToULBtextViaEntries finished";
}
}
sub Adjust {
my ($snsOld, $ref, $snsNew, $addToSnsNew) = ($_[0], $_[1], "", "");
say LOG "\$specifiedText{$ref}: $specifiedText{$ref}\n\$snsOld: |$snsOld|";
my (%tempEntries);
#say LOG ">\t\$sns: |$sns|";
#say LOG ">\t\$specifiedText{$ref}: |$specifiedText{$ref}|";
$snsOld =~ s/^ +/ /;
$snsNew =~ s/√$//;
my @oldArray = split / /, $snsOld;
$adjust{$ref} =~ s/√$//;
say LOG "*0*\t\$adjust{$ref}: $adjust{$ref}";
my @preadjustments = split /√/, $adjust{$ref};
foreach my $adjustment (@preadjustments) {
say LOG "<><>\t\$adjustment: >$adjustment<";
if ($adjustment =~ /([^\t]*)\t\|\|$/) { # delete this from list to look for
my $found = $1;
$snsOld =~ s/\b$found\b ?//;
say LOG "*1*\t\$found: $found should be deleted from \$snsOld: $snsOld";
} elsif ($adjustment =~ /^(\d+)\t(\d+)$/) { # change to specified number
my ($found1, $found2) = ($1, $2);
#$addToSnsNew .= "$1\[$2\] ";
$snsOld =~ s/$found1√//g;
$snsNew .= "$found2√" unless $snsNew =~ /\b$found2\b /;
say LOG "*2*\t\$snsOld: $snsOld\n\$snsNew: $snsNew";
} elsif ($adjustment =~ /^\|\|\t(.*)/) { # add this to list to look for
my $adj = $1;
say LOG "*3*\t\$adjustment: $adjustment, \$adj: $adj";
if ($adj =~ /([^\t]*)\t([^\t]*)/) {
$snsNew = "[$1]($pageThisEntry{$2})"
} else {
$snsNew .= "$adj "
}
} elsif ($adjustment =~ /^(\d+)\t([\w\. \-\']+)$/) { # add specified word
my ($found1, $found2) = ($1, $2);
#$addToSnsNew .= "$1\[$2\] ";
if ($snsOld =~ s/\b$found1\b/$found1\[$found2\]/) {
say LOG "*\t4a\t\$snsOld: $snsOld";
if ($snsOld =~ s/^(.+√)($found1\[$found2\]√)/$2$1/) {}
elsif ($snsOld =~ s/^(.+√)($found1\[$found2\]$)/$2√$1/) {}
}
else {
$snsNew .= "${found1}√";
$snsOld .= s/\b$found1\b//;
say LOG "*\t4b\t\$snsNew: $snsNew";
$snsNew =~ s/\b$found1\b/$found1\[$found2\]/;
$snsNew =~ s/^(.+√)($found1\[$found2\]√)/$2$1/;
$snsNew =~ s/^(.+√)($found1\[$found2\]$)/$2√$1/;
}
$snsOld =~ s/ {2,}/ /;
$snsOld =~ s/√$//;
say LOG "*4*\t\$snsOld: $snsOld\n\$snsNew: $snsNew";
} elsif (($adjustment =~ /^(\d+)\t([\/\d\w]+)$/)) { # add specified page
#$addToSnsNew .= "$1\{$2\} "
my ($found1, $found2) = ($1, $2);
$snsOld =~ s/$found1/$found1\($found2\)/;
say LOG "*5*\t\t\$snsOld: $snsOld";
}
}
$snsOld =~ s/^√//;
$snsOld =~ s/√+/√/g;
say LOG "\$snsNew: >$snsNew<\n\$snsNew+\$snsOld: >$snsNew< >$snsOld<";
$snsNew = "$snsNew√$snsOld";
say LOG "*5*\t*\t\$snsNew: |$snsNew|";
while ($snsNew =~ s/(\d+) (\d+\[[^\[\]]*\])/$2$1/) {}
say LOG "*5*\t**\t\$snsNew: |$snsNew|";
$snsNew =~ s/√+/√/g;
$snsNew =~ s/^[ √]//;
say LOG "*6*\t\t\$snsNew: |$snsNew|";
return $snsNew;
}
sub Output {
say "Outputting";
#say LOG "Output subRoutine called";
foreach my $key (sort keys %ref) {
my %donePages;
my $thisRef = $ref{$key};
#print LOG "\$key: $key\t\$thisRef: $thisRef\t";
#say LOG "\$SNsInCV{$thisRef}: |$SNsInCV{$thisRef}|";
$SNsInCV{$thisRef} =~ s/^ +//;
$SNsInCV{$thisRef} =~ s/ +$//;
$SNsInCV{$thisRef} =~ s/ {2,}/ /;
#say LOG "\$SNsInCV{$thisRef}: |$SNsInCV{$thisRef}|";
#say LOG "\$listOfPages{$thisRef}: |$listOfPages{$thisRef}|";
#$listOfPages{$thisRef} =~ s/^ +//;
#$listOfPages{$thisRef} =~ s/ +$//;
#$listOfPages{$thisRef} =~ s/ {2,}/ /;
#say LOG "\$listOfPages{$thisRef}: |$listOfPages{$thisRef}|";
my @array = split /\n/, $listOfPages{$thisRef};
#say LOG "\@array: |@array|";
my @sorted =
sort sort { lc($a) cmp lc($b) }
@array;
#say LOG "\@sorted: |@sorted|";
$" = "\n";
$listOfPages{$thisRef} = "@sorted";
say LOG "\$listOfPages{$thisRef}: $listOfPages{$thisRef}\n\$checkPages{$thisRef}: $checkPages{$thisRef}";
#say OUT "$thisRef: $listOfPages{$thisRef}\n";
$checkPages{$thisRef} =~ s/^ +//;
$checkPages{$thisRef} =~ s/ +$//;
$checkPages{$thisRef} =~ s/ {2,}/ /;
$checkPages{$thisRef} =~ s/ \|\|//;
say LOG "\$checkPages{$thisRef}:\t|$checkPages{$thisRef}|";
my @checkArray = split / /, $checkPages{$thisRef};
shift @sorted;
#say LOG "\@checkArray: |@checkArray|";
#say LOG "\@sorted: |@sorted|";
#shift @sorted;
#say LOG "\@sorted: |@sorted|";
foreach my $slice (@sorted) {
#print LOG "\$slice: $slice\t";
$slice =~ s/\[.*?\]\((.*?)\)/$1/;
#say LOG "\t\$slice: $slice";
$donePages{$slice} = $slice;
#say LOG "\t\$donePages{$slice}: $donePages{$slice}"
}
#say LOG "\@checkArray: |@checkArray|";
foreach my $slice (@checkArray) {
#say LOG "\$slice: $slice";
unless (exists $donePages{$slice}) {
#say LOG "\$thisRef: $thisRef\t\$slice:$slice";
#say MISSING "$thisRef\t$slice\t||";
say MISSING "$thisRef\t$slice";
}
}
}
}
sub Substitute {
foreach my $key (sort keys %pages) {
say LOG "\$key: $key\t\$SNsInCV{$key}: $SNsInCV{$key}";
if (exists $substitutedPages{$key}) {
#say LOG "\$substitutedPages{$key}: $substitutedPages{$key}";
$substitutedPages{$key} =~ s/, $//;
my @array = split /, /, $substitutedPages{$key};
foreach my $slice (@array) {
#say LOG "\$slice: $slice";
if ($slice =~ /([^\t]*)\t([^\t]*)/) {
#say LOG "\n\$key: $key";
my ($old, $new) = ($1, $2);
#say LOG "\$old: >$old<\t\$new: >$new<";
#say LOG "\$SNsInCV{$key}: >>$SNsInCV{$key}<<";
if ($old eq "||") {
$SNsInCV{$key} .= "$new ";
$checkPages{$key} = $SNsInCV{$key};
}
elsif ($new eq "||") {
$SNsInCV{$key} =~ s/$old //;
$checkPages{$key} = $SNsInCV{$key};
}
else {
$SNsInCV{$key} =~ s/$old/$new/;
$checkPages{$key} = $SNsInCV{$key};
}
$SNsInCV{$key} =~ s/ \|\|//g;
#say LOG "\$SNsInCV{$key}: >>>$SNsInCV{$key}<<<";
}
}
} else {
$checkPages{$key} = $SNsInCV{$key};
}
say LOG "<>\t\$key: $key\t\$SNsInCV{$key}: $SNsInCV{$key}";
}
}
sub ProperOrderOutString {
my @unordered = split /\n/, $_[0];
my ($thisCV, $outS) = ($_[1], "");
my (%orderedSet);
foreach my $thisSet (@unordered) {
say LOG "\t>\t$thisSet";
if ($thisSet =~ /(\[([^\]]*)\])(\([^\)]*\))/) {
my ($ulb, $fileLoc) = ($2, $3);
$ulb =~ s/ \.\.\. /.*?/g;
say LOG "\t>>\t\$ulb: $ulb, \$fileLoc: $fileLoc, \$fullText{$thisCV}\n$fullText{$thisCV}";
if ($ulb =~ /^(.*)\.\.\.(.*)$/) {
my ($found1, $found2, $blank1, $blank2) = ($1, $2, "", "");
while (length $blank1 < length $found1) {$blank1 .= " "}
while (length $blank2 < length $found2) {$blank2 .= " "}
if ($fullText{$thisCV} =~ s/^(.*?)\b$found1\b(.*)\b$found1\b(.*)$/$1$blank1$2$blank2$3/i) {
say LOG "\t>>>\t$fullText{$thisCV}";
my ($order) = (length $1);
$orderedSet{$order} = $thisSet;
say LOG "\t\t\t\$order: $order\t \$orderedSet{$order}: $orderedSet{$order}"
}
} else {
my $blank = "";
while (length $blank < length $ulb) {$blank .= " "}
if ($fullText{$thisCV} =~ s/^(.*?)\b$ulb\b(.*)$/$1$blank$2/i) {
say LOG "\t>>>>\t$fullText{$thisCV}";
my ($order) = (length $1);
$orderedSet{$order} = $thisSet;
say LOG "\$order: $order\t \$orderedSet{$order}: $orderedSet{$order}"
}
}
}
}
foreach my $key (sort {$a<=>$b} keys %orderedSet) {
$outS .= "$orderedSet{$key}\n"
}
return $outS;
}
__DATA__
mat Matthew
mrk Mark
luk Luke
jhn John
act Acts
rom Romans
1co 1 Corinthians
2co 2 Corinthians
gal Galatians
eph Ephesians
php Philippians
col Colossians
1th 1 Thessalonians
2th 2 Thessalonians
1ti 1 Timothy
2ti 2 Timothy
tit Titus
phm Philemon
heb Hebrews
jas James
1pe 1 Peter
2pe 2 Peter
1jn 1 John
2jn 2 John
3jn 3 John
jud Jude
rev Revelation