Tips_and_Hacks/MAST_tW_PDF_Updater/FilesForUpdates/tWs.from.MAST_NT.pl

# Produces list of tWs for each verse by linking MAST_NT to ULB through tWs.

# Taken from tWs.from.MAST_NT.2.pl.

# The output from this script is useful for the interleaved PDFs used in MAST.
# This version uses an exception file to handle places where the MAST_NT points to
# a tW page different from that on which the ULB term appears.

# Make sure the correct input file is $ULBfile. Run script.
# Output is in $output file.
# Check the $missing
# file for needed corrections, probably lines needing to be added to the
# $exceptions file.

use 5.12.0;
use File::Slurp;
use File::Find ;
use Cwd ;
use FindBin '$Bin';
use utf8;
#use open IN => ":utf8", OUT => ":utf8";
use open IO => ":utf8";
use List::MoreUtils qw(uniq);
$|=1;

my ($pwd, $d) = ($Bin, "/");
if ($^O eq "MSWin32") {
	$d = "\\";
	$pwd =~ s/\//\\/g;
}

my ($udf) = "User_defaults.windows.txt";
if ($^O eq "linux") {$udf = "User_defaults.linux.txt"}
#elsif ($^O eq "darwin") {$udf = "User_defaults.mac.txt"}
elsif ($^O eq "darwin") {$udf = "User_defaults.mac.txt"}

open (my $defaults, "<:utf8", "$pwd${d}User${d}$udf") or die "$pwd${d}User${d}$udf:\n$!";

my ($cv, $ULBfile, $exceptions, $missing, $output, $workFile) =
					("00000",
					"$Bin${d}Temp${d}Extract.txt",
					"$Bin${d}Exceptions${d}Exceptions_tWs_from_MAST_NT.txt",
					"$Bin${d}Output${d}Entries_not_handled.txt",
					"$Bin${d}Output${d}tWs_for_PDF.txt",
					"$Bin${d}User${d}tW_work_NT.txt",
					);

my ($MAST_NTfile, $topTwDir, $textEditor, $repoPath, $topSourceLangDir);

my (%entries, %text, %ref, %order, %pages, %listOfPages, %sourcePage, %checkPages, %foundPages, %substitutedPages,
	 %specifiedText, %realPage, %checkPage, %workEntries, %vsn, %StrongNum, %fullText, %ulbOrder, %bkAbr, %bkFull, %relevantSNs,
	 %SNsInCV, %entriesThisSN, %entriesThisPage, %pagesThisSN, %pageThisEntry, %adjust, %newRef);

my $book;
my (@MAST_NTfileList);

# ==============================

chdir("$pwd");
open LOG, ">:utf8", "Logs${d}Exc_log.log" or die "\$log: Logs${d}Exc_log.log: $!";
open OUT, ">:utf8", $output or die "$!";
open MISSING, ">$missing" or die "$!";

while (<DATA>) {
	chomp;
	if (/([^\t]*)\t([^\t]*)/) {
		$bkAbr{$2} = $1;
		$bkFull{$1} = $2;
	}
}

GetUserDefaults();
GetULBBooksToProcess();
ReadExceptions();
close LOG;
open LOG, ">:utf8", "Logs${d}tW_pairs.log" or die "Logs${d}tW_pairs.log: $!";
PairtWEntriesTotWPageAndUniqSNs();
close LOG;
open LOG, ">:utf8", "Logs${d}tWs_from_MAST.log" or die "Logs${d}tWs_from_MAST.log: $!";
ReadLinkedSNs();
LinkULBtoCV();
LinkSNsToULBtextViaEntries();
#Output();

close MISSING;
close OUT;
close LOG;

if ($^O eq "darwin") {system ("$textEditor $missing")}

print "\n\tDone.\n\n";

# ==============================

sub GetUserDefaults {
	open (my $defaults, "<:utf8", "User${d}$udf") or die "User${d}$udf:\n$!";

		while (my $thisLine = <$defaults>) {
			chomp $thisLine;
			if ($thisLine =~ /^Text editor: (.*)$/) {
				$textEditor = $1;
				if ($^O eq "darwin") {
					$textEditor = "open -a $textEditor"
				}
			} elsif ($thisLine =~ /^Repository directory: (.*)$/) {
				$repoPath = $1;
				#say $repoPath; die;
			}
		}

		#say LOG "\$textEditor: $textEditor\n\$repoPath: $repoPath";
		die "No text editor found" if $textEditor eq "";
		die "No path to repo found" if $repoPath eq "";

		($topTwDir, $topSourceLangDir) = ("$repoPath${d}en_tw${d}bible", "$repoPath${d}MAST_NT");

	close $defaults;
}

sub GetULBBooksToProcess {
	say LOG "GetULBBooksToProcess on \$workFile: $workFile";
	open (my $file, "<:utf8", "$workFile") or die "$workFile:\n$!";

		while (my $line = <$file>) {
			chomp $line;
			#say LOG "\t$line";
			if ($line =~ /^([^#][^\t]*)\t[^\t]*\t([^\t]*)\t[^\t]*$/) {
				my ($this_bk) = $1 . "-" . uc $2;
				$MAST_NTfile = "$topSourceLangDir${d}$this_bk.xml";
				push @MAST_NTfileList, $MAST_NTfile;
			}
		}

	close $file;
	#say LOG "\@MAST_NTfileList: @MAST_NTfileList";
}

sub ReadExceptions {
	say "Reading exceptions";
	say LOG "ReadExceptions from \$exceptions: $exceptions";
	open (my $file, "<:utf8", "$exceptions") or die "$exceptions:\n$!";

	while (my $line = <$file>) {
		chomp $line;
		my $rf;
		if ($line =~ /^([^#\n][^\t\n]*)\t([^\t\n]*\t[^\t\n]*)/) {
			my ($oldNew) = ($2);
			$rf = $1;
			#say LOG "\$line: $line, \$rf: $rf, \$oldNew: $oldNew";
			($adjust{$rf}) .= "$oldNew√";
			$specifiedText{$rf} = 1;
		}
	}
	foreach my $key (sort keys %adjust) {
		say LOG "\$key: $key: \$adjust{$key}: $adjust{$key}"
	}
	close $file;
}

sub PairtWEntriesTotWPageAndUniqSNs {
	say "Pairing tW entries with tW pages and unique Strong's numbers";
	my (@filesToRun, @relevantSNs) = ();
	my $filePattern = '*.md' ;
	find (sub {push @filesToRun, $File::Find::name if (m/^(.*)$filePattern$/)}, $topTwDir) ;
	@filesToRun = sort @filesToRun;
	#say LOG "\@filesToRun: @filesToRun";
	foreach my $file (@filesToRun) {
		print ".";
		$file =~ s/\//\\/g unless $^O eq "linux" || $^O eq "darwin";
		my ($thisList, $shortFile) = ("", $file);
		$shortFile =~ s/^\Q$topTwDir${d}\E//;
		$shortFile =~ s/\.md$//;
		$shortFile =~ s/\Q$d\E/,/;
		#say "|$shortFile|"; die;
		#if ($shortFile =~ /^(kt|names)/) {
		#my $fileText = read_file("$file", binmode => 'utf8');
		open IN, $file or die "$!";
			while (<IN>) {
				if (/^# ([^\n]*)$/) {
					  $thisList = $1;
					  $thisList =~ s/[\r\n]*$//;
					  #say LOG "\$thisList = |$thisList|";
					  $thisList =~ s/ \([^\)]*\)//g;
					  $entriesThisPage{$shortFile} = $thisList;
					  my @ULBEntries = split /, /, $thisList;
					  foreach my $ULB_entry(@ULBEntries) {
						  $pageThisEntry{$ULB_entry} = $shortFile;
						  say LOG "\$pageThisEntry{$ULB_entry}: $pageThisEntry{$ULB_entry}"
					  }
					  my @tempArray = split /, /, $thisList;
					  foreach my $slice (@tempArray) {
						  $sourcePage{$slice} = $shortFile;
						  say LOG "$slice: \$sourcePage{$slice}: $sourcePage{$slice}";
					  }
				}
				if (/Strong's(.*)$/) {
					  my $SNs = $1;
					  while ($SNs =~ s/[G](\d*)//) {
						  push @relevantSNs, $1;
						  $entriesThisSN{$1} .= "$thisList, ";
						  $pagesThisSN{$1} .= "$shortFile, ";
					  }
				}
				@relevantSNs = uniq(@relevantSNs);
				foreach (@relevantSNs) {
					$relevantSNs{$_} = "$_";
				}
			}
		close IN;
	}
	say "";
	#say LOG "====";
	#say LOG "====";
	foreach my $thisSN (sort keys %entriesThisSN) {
		say LOG "tW entries for $thisSN: $entriesThisSN{$thisSN}"
	}
}

sub LinkULBtoCV {
	say "Linking ULB to chapter and verse";
	say LOG "LinkULBtoCV, \$ULBfile: $ULBfile";
	open IN, "$ULBfile" or die "$ULBfile: $!";
		while (<IN>) {
			if (/^([^\t]*)\t(.*)$/) {
				$cv ++;
				($text{$1}, $fullText{$1}) = ($2, $2);
				$ref{$cv} = $1;
				$order{$1} = $cv;
			}
			#say "First \$ref{$cv}: $ref{$cv}\nGetULBBooksToProcess\$text{$ref{$cv}}: $text{$ref{$cv}}";
		}
	close IN;
	#foreach my $key (sort keys %ref) {
	#	say LOG "\$key: $key:, \$ref{$key}: $ref{$key}, \$text{$ref{$key}}: $text{$ref{$key}}"
	#}
}

sub ReadLinkedSNs {
	say "Reading linked Strong's numbers";
	my ($flag, $MAST_NTfile) = ("","");
	foreach $MAST_NTfile (@MAST_NTfileList) {
		say LOG "opening \$MAST_NTfile: $MAST_NTfile";
		open IN, "$MAST_NTfile" or die "$MAST_NTfile can't be opened\n\n";
			my ($thisBook, $thisChap, $thisVers, $thisRef);
			my (@pages);
			while (<IN>) {
				chomp;
				if (/<verse osisID="([^\.]*).(\d+).(\d+)">/) {
					my ($bk, $ch, $vs) = ($1, $2, $3);
					#say LOG "#>\t$bk $ch:$vs, \$bkFull{$bk}: $bkFull{$bk}";
					if (exists $newRef{"$bk $ch:$vs"}) {
						$thisRef = $newRef{"$bk $ch:$vs"}
					} else {
						($thisRef) = ("$bkFull{$bk} $ch:$vs");
					}
					#say LOG "##\t$bk $ch:$vs, $thisRef";
				}
				else {
					while (/<w lemma="(\d+)"/g) {
						#say LOG $_;
						my ($thisNum) = ($1);
						#say LOG "\t\$thisNum: $thisNum, \$SNsInCV{$thisRef}: $SNsInCV{$thisRef}";
						if (exists $relevantSNs{$thisNum}) {
							$SNsInCV{$thisRef} .= "$thisNum√" unless ($SNsInCV{$thisRef} =~ /\b$thisNum\b/);
						}
						#say LOG ">\t\$thisNum: $thisNum, \$SNsInCV{$thisRef}: $SNsInCV{$thisRef}";
					}
				}
			}
		close IN;
	}
	my %temp;
	foreach my $oldRef (sort keys %SNsInCV) {
		if (exists $newRef{$oldRef}) {
			$temp{$newRef{$oldRef}} = $SNsInCV{$oldRef};
			delete $SNsInCV{$oldRef};
		}
	}
	foreach my $changedRef (sort keys %temp) {
		$SNsInCV{$changedRef} = $temp{$changedRef};
		say LOG "\$SNsInCV{$changedRef}: $SNsInCV{$changedRef}";
	}
}

sub LinkSNsToULBtextViaEntries  {
	say "Linking Strong's numbers to ULB text via tW page entries";
	say LOG "sub LinkSNsToULBtextViaEntries called";
	foreach my $thisRef (sort keys %ref) {
		say LOG "\nLinkSNsToULBtextViaEntries: \$thisRef: $thisRef\t\$ref{$thisRef}: $ref{$thisRef}, \$specifiedText{$ref{$thisRef}}: $specifiedText{$ref{$thisRef}}";
		(%workEntries, %ulbOrder) = ();
		my %workPage;
		my ($thisCV, $checkList, $tempString, $outString) = ($ref{$thisRef}, "", "", "");
		say OUT "$thisCV:";
		my (@allEntries);
		$listOfPages{$thisCV} = "";
		#say LOG "\$SNsInCV{$thisCV}: |$SNsInCV{$thisCV}|\n$text{$thisCV}";

		say LOG "\t\$SNsInCV{$thisCV}: >$SNsInCV{$thisCV}<";
		$SNsInCV{$thisCV} =~ s/√+$//;
		$SNsInCV{$thisCV} =~ s/^ +//;
		$SNsInCV{$thisCV} =~ s/ +$//;
		$SNsInCV{$thisCV} =~ s/ {2,}/ /g;
		#say LOG "*\t\$SNsInCV{$thisCV}: >$SNsInCV{$thisCV}<";
		if (exists $specifiedText{$thisCV}) {
			#say LOG "*\t\$SNsInCV{$thisCV}: $SNsInCV{$thisCV}";

			$SNsInCV{$thisCV} = Adjust($SNsInCV{$thisCV}, $thisCV);

			$SNsInCV{$thisCV} =~ s/^ +(.*)/$1/;
			$SNsInCV{$thisCV} =~ s/(.*) +$/$1/;
			$SNsInCV{$thisCV} =~ s/ {2,}/ /g;
			$SNsInCV{$thisCV} =~ s/^√+//;
			$SNsInCV{$thisCV} =~ s/√{2,}/√/g;
			$SNsInCV{$thisCV} =~ s/√ /√/g;
			$SNsInCV{$thisCV} =~ s/√$//;
			say LOG "**\t\$SNsInCV{$thisCV}: >$SNsInCV{$thisCV}<";
		}
		#say LOG "<>\t<>\t\$SNsInCV{$thisCV}: |$SNsInCV{$thisCV}|";
		say LOG "*7*\t\$SNsInCV{$thisCV}:$SNsInCV{$thisCV}";
#		while ($SNsInCV{$thisCV} =~ s/(\[[^\]]*?) ([^\]]*?\] )/$1√$2/) {};
		say LOG "*8*\t\$SNsInCV{$thisCV}:$SNsInCV{$thisCV}";
		my @tempArray = split /√/, $SNsInCV{$thisCV};
		my %alreadyUsed;
		my @regArray;
		foreach my $slice (@tempArray) {
			if ($slice =~ /^(\d*)/) {
				my $number = "$1";
				push (@regArray, $slice) unless (exists $alreadyUsed{$number});
				$alreadyUsed{$number} = $number
			}
		}
		$" = "|\n";
		say LOG "*9*\t\$SNsInCV{$thisCV}:$SNsInCV{$thisCV}\n\@regArray: >@regArray<";
		foreach my $thisNum (@regArray) {
			say LOG "\$thisNum: $thisNum";
			my ($found, $specPage);
			if ($thisNum =~ /(\d+)\[(.*?)\]/) {
				($thisNum) = ($1);
				my ($forced_entry_for_display, $forced_entry_for_page, $forced_entry_for_search) = ($2, $2, $2);
				unless (exists $pageThisEntry{$forced_entry_for_page}) {
					my $try = lc $forced_entry_for_page;
					if (exists $pageThisEntry{$try}) {
						$forced_entry_for_page = lc $forced_entry_for_page
					}
					else {
						say "\$thisNum: $thisNum\t\$forced_entry_for_page: $forced_entry_for_page";
						die
					}
				}
				say LOG
"*0*	\$forced_entry_for_display: $forced_entry_for_display, \$pageThisEntry{$forced_entry_for_page}: $pageThisEntry{$forced_entry_for_page}";
				#while ($forced_entry_for_display =~ s/([^ \]]*?)√([^ \]]*?)/$1 $2/) {}
				$forced_entry_for_search = lc $forced_entry_for_display;
				#while ($forced_entry_for_search =~ s/ ... /\.\*\?/) {}
				say LOG
"*A*	\$thisNum: >>$thisNum<<	\$forced_entry_for_display: >$forced_entry_for_display<,
	\$forced_entry_for_search: >>$forced_entry_for_search<<
	\$pageThisEntry{$forced_entry_for_page}: $pageThisEntry{$forced_entry_for_page}";
				$outString .= "[$forced_entry_for_display]($pageThisEntry{$forced_entry_for_page})\n";
				say LOG "\t\t$outString:\n$outString";
				if ($forced_entry_for_search =~ /([^,]*) \.\.\. ([^,]*) \.\.\. ([^,]*)/) {
					my ($first, $second, $third) = ($1, $2, $3);
					$text{$thisCV} =~ s/$first(.*?)$second(.*?)$third/$1 $2/i;
					say LOG "\t*\t$text{$thisCV}";
				} elsif ($forced_entry_for_search =~ /([^,]*) \.\.\. ([^,]*)/) {
					my ($first, $second) = ($1, $2);
					$text{$thisCV} =~ s/$first(.*?)$second/$1/i;
					say LOG "\t**\t$text{$thisCV}";
				} else {
					#say "\$text{$thisCV}:\n$text{$thisCV}\n\$forced_entry_for_search: |$forced_entry_for_search|";
					$text{$thisCV} =~ s/$forced_entry_for_search//i;
					say LOG "\t***\t$text{$thisCV}";
				}
				next;
			} elsif ($thisNum =~ /\d+(\(([\w\/]+)\))/) {
				$specPage .= $2;
				say LOG "*B*\t\$specPage: $specPage";
			} else {
				say LOG "*C*\t\$thisNum: $thisNum";
			}
			say LOG "\t\$specPage: $specPage";
			if ($specPage) {
				$workEntries{$thisNum} = $entriesThisPage{$specPage};
			} else {
				$workEntries{$thisNum} = $entriesThisSN{$thisNum};
			}
			$workEntries{$thisNum} =~ s/, $//;
			say LOG "*D*\t\$thisNum: >$thisNum<\t\$workEntries{$thisNum}: >$workEntries{$thisNum}<";
			my @beforeArray = split /, /, $workEntries{$thisNum};
			my @sortedArray = reverse sort {  substr($a,0,1) <=> substr($b,0,1)
				 || length($a) <=> length($b)
				 || $a <=> $b }
				@beforeArray;
			$" = "\n\t";
			say LOG "*E*\t\@sortedArray: @sortedArray\n\$outString: $outString\n\$text{$ref{$thisRef}}: $text{$ref{$thisRef}}";
			foreach my $entry (@sortedArray) {
				my $testEntry = $entry;
				#print LOG "\$entry: $entry. Becomes ";
				while ($testEntry =~ s/^(.*) \.\.\. (.*)/($1)\\b(.*?)\\b($2)/) {}
				#print LOG "\$testEntry: |$testEntry| ";
				if ($testEntry =~ /\(\.\*\?\)/ && $text{$thisCV} =~ s/\b($testEntry)\b/$3/i) {
					say LOG "\n===\n$thisNum |$testEntry| is found in first test.\n===";
					$outString .= "[$entry]($pageThisEntry{$entry})\n";
					say LOG $outString . "\n===" . $text{$thisCV};
					$found = 1;
					goto Breakout;
				} elsif ($text{$thisCV} =~ s/\b($testEntry)[^\w']//i) {
					say LOG "\n===\n$thisNum |$testEntry| is found in second test.\n===";
					$outString .= "[$entry]($pageThisEntry{$entry})\n";
					say LOG $outString . "\n" . $text{$thisCV};
					$found = 1;
					goto Breakout;
				} else {
					#say LOG "and is not found in\n$text{$ref{$thisRef}}";
				}
			}
			Breakout:
			unless ($found) {
				say MISSING "$thisCV	$thisNum";
				say LOG "\$thisCV: $thisCV\t\$thisNum: $thisNum"
			}
			next if $found;
		}
		say LOG "*F*\t\$outString: $outString";
		$outString = ProperOrderOutString($outString, $thisCV);
		say LOG "Final \$outString:\n\$outString: $outString";
		say OUT "$outString";
	#say LOG "sub LinkSNsToULBtextViaEntries finished";
	}
}

sub Adjust {
	my ($snsOld, $ref, $snsNew, $addToSnsNew) = ($_[0], $_[1], "", "");
	say LOG "\$specifiedText{$ref}: $specifiedText{$ref}\n\$snsOld: |$snsOld|";
	my (%tempEntries);
	#say LOG ">\t\$sns: |$sns|";
	#say LOG ">\t\$specifiedText{$ref}: |$specifiedText{$ref}|";
	$snsOld =~ s/^ +/ /;
	$snsNew =~ s/√$//;
	my @oldArray = split / /, $snsOld;
	$adjust{$ref} =~ s/√$//;
	say LOG "*0*\t\$adjust{$ref}: $adjust{$ref}";
	my @preadjustments = split /√/, $adjust{$ref};
	foreach my $adjustment (@preadjustments) {
		say LOG "<><>\t\$adjustment: >$adjustment<";
		if ($adjustment =~ /([^\t]*)\t\|\|$/) { # delete this from list to look for
			my $found = $1;
			$snsOld =~ s/\b$found\b ?//;
			say LOG "*1*\t\$found: $found should be deleted from \$snsOld: $snsOld";
		} elsif ($adjustment =~ /^(\d+)\t(\d+)$/) { # change to specified number
			my ($found1, $found2) = ($1, $2);
			#$addToSnsNew .= "$1\[$2\] ";
			$snsOld =~ s/$found1√//g;
			$snsNew .= "$found2√" unless $snsNew =~ /\b$found2\b /;
			say LOG "*2*\t\$snsOld: $snsOld\n\$snsNew: $snsNew";
		} elsif ($adjustment =~ /^\|\|\t(.*)/) { # add this to list to look for
			my $adj = $1;
			say LOG "*3*\t\$adjustment: $adjustment, \$adj: $adj";
			if ($adj =~ /([^\t]*)\t([^\t]*)/) {
				$snsNew = "[$1]($pageThisEntry{$2})"
			} else {
				$snsNew .= "$adj "
			}
		} elsif ($adjustment =~ /^(\d+)\t([\w\. \-\']+)$/) { # add specified word
			my ($found1, $found2) = ($1, $2);
			#$addToSnsNew .= "$1\[$2\] ";
			if ($snsOld =~ s/\b$found1\b/$found1\[$found2\]/) {
				say LOG "*\t4a\t\$snsOld: $snsOld";
				if ($snsOld =~ s/^(.+√)($found1\[$found2\]√)/$2$1/) {}
				elsif ($snsOld =~ s/^(.+√)($found1\[$found2\]$)/$2√$1/) {}
			}
			else {
				$snsNew .= "${found1}√";
				$snsOld .= s/\b$found1\b//;
				say LOG "*\t4b\t\$snsNew: $snsNew";
				$snsNew =~ s/\b$found1\b/$found1\[$found2\]/;
				$snsNew =~ s/^(.+√)($found1\[$found2\]√)/$2$1/;
				$snsNew =~ s/^(.+√)($found1\[$found2\]$)/$2√$1/;
			}
			$snsOld =~ s/ {2,}/ /;
			$snsOld =~ s/√$//;
			say LOG "*4*\t\$snsOld: $snsOld\n\$snsNew: $snsNew";
		} elsif (($adjustment =~ /^(\d+)\t([\/\d\w]+)$/)) { # add specified page
			#$addToSnsNew .= "$1\{$2\} "
			my ($found1, $found2) = ($1, $2);
			$snsOld =~ s/$found1/$found1\($found2\)/;
			say LOG "*5*\t\t\$snsOld: $snsOld";
		}
	}
	$snsOld =~ s/^√//;
	$snsOld =~ s/√+/√/g;
	say LOG "\$snsNew: >$snsNew<\n\$snsNew+\$snsOld: >$snsNew< >$snsOld<";
	$snsNew = "$snsNew√$snsOld";
	say LOG "*5*\t*\t\$snsNew: |$snsNew|";
	while ($snsNew =~ s/(\d+) (\d+\[[^\[\]]*\])/$2$1/) {}
	say LOG "*5*\t**\t\$snsNew: |$snsNew|";
	$snsNew =~ s/√+/√/g;
	$snsNew =~ s/^[ √]//;
	say LOG "*6*\t\t\$snsNew: |$snsNew|";
	return $snsNew;
}

sub Output {
	say "Outputting";
	#say LOG "Output subRoutine called";
	foreach my $key (sort keys %ref) {
		my %donePages;
		my $thisRef = $ref{$key};
		#print LOG "\$key: $key\t\$thisRef: $thisRef\t";
		#say LOG "\$SNsInCV{$thisRef}: |$SNsInCV{$thisRef}|";
		$SNsInCV{$thisRef} =~ s/^ +//;
		$SNsInCV{$thisRef} =~ s/ +$//;
		$SNsInCV{$thisRef} =~ s/ {2,}/ /;
		#say LOG "\$SNsInCV{$thisRef}: |$SNsInCV{$thisRef}|";
		#say LOG "\$listOfPages{$thisRef}: |$listOfPages{$thisRef}|";
		#$listOfPages{$thisRef} =~ s/^ +//;
		#$listOfPages{$thisRef} =~ s/ +$//;
		#$listOfPages{$thisRef} =~ s/ {2,}/ /;
		#say LOG "\$listOfPages{$thisRef}: |$listOfPages{$thisRef}|";
		my @array = split /\n/, $listOfPages{$thisRef};
		#say LOG "\@array: |@array|";
		my @sorted =
			sort sort { lc($a) cmp lc($b) }
			 @array;
		#say LOG "\@sorted: |@sorted|";
		$" = "\n";
		$listOfPages{$thisRef} = "@sorted";
		say LOG "\$listOfPages{$thisRef}: $listOfPages{$thisRef}\n\$checkPages{$thisRef}: $checkPages{$thisRef}";
		#say OUT "$thisRef: $listOfPages{$thisRef}\n";
		$checkPages{$thisRef} =~ s/^ +//;
		$checkPages{$thisRef} =~ s/ +$//;
		$checkPages{$thisRef} =~ s/ {2,}/ /;
		$checkPages{$thisRef} =~ s/ \|\|//;
		say LOG "\$checkPages{$thisRef}:\t|$checkPages{$thisRef}|";
		my @checkArray = split / /, $checkPages{$thisRef};
		shift @sorted;
		#say LOG "\@checkArray: |@checkArray|";
		#say LOG "\@sorted: |@sorted|";
		#shift @sorted;
		#say LOG "\@sorted: |@sorted|";
		foreach my $slice (@sorted) {
			#print LOG "\$slice: $slice\t";
			$slice =~ s/\[.*?\]\((.*?)\)/$1/;
			  #say LOG "\t\$slice: $slice";
			  $donePages{$slice} = $slice;
			  #say LOG "\t\$donePages{$slice}: $donePages{$slice}"
		}
		#say LOG "\@checkArray: |@checkArray|";
			foreach my $slice (@checkArray) {
				#say LOG "\$slice: $slice";
				unless (exists $donePages{$slice}) {
					#say LOG "\$thisRef: $thisRef\t\$slice:$slice";
					#say MISSING "$thisRef\t$slice\t||";
					say MISSING "$thisRef\t$slice";
				}
			}
	}
}

sub Substitute {
	foreach my $key (sort keys %pages) {
		say LOG "\$key: $key\t\$SNsInCV{$key}: $SNsInCV{$key}";
		if (exists $substitutedPages{$key}) {
			#say LOG "\$substitutedPages{$key}: $substitutedPages{$key}";
			$substitutedPages{$key} =~ s/, $//;
			my @array = split /, /, $substitutedPages{$key};
			foreach my $slice (@array) {
				#say LOG "\$slice: $slice";
				if ($slice =~ /([^\t]*)\t([^\t]*)/) {
					#say LOG "\n\$key: $key";
					my ($old, $new) = ($1, $2);
					#say LOG "\$old: >$old<\t\$new: >$new<";
					#say LOG "\$SNsInCV{$key}: >>$SNsInCV{$key}<<";
					if ($old eq "||") {
						$SNsInCV{$key} .= "$new ";
						$checkPages{$key} = $SNsInCV{$key};
					}
					elsif ($new eq "||") {
						$SNsInCV{$key} =~ s/$old //;
						$checkPages{$key} = $SNsInCV{$key};
					}
					else {
						$SNsInCV{$key} =~ s/$old/$new/;
						$checkPages{$key} = $SNsInCV{$key};
					}
					$SNsInCV{$key} =~ s/ \|\|//g;
					#say LOG "\$SNsInCV{$key}: >>>$SNsInCV{$key}<<<";
				}
			}
		} else {
			$checkPages{$key} = $SNsInCV{$key};
		}
		say LOG "<>\t\$key: $key\t\$SNsInCV{$key}: $SNsInCV{$key}";
	}
}

sub ProperOrderOutString {
	my @unordered = split /\n/, $_[0];
	my ($thisCV, $outS) = ($_[1], "");
	my (%orderedSet);
	foreach my $thisSet (@unordered) {
		say LOG "\t>\t$thisSet";
		if ($thisSet =~ /(\[([^\]]*)\])(\([^\)]*\))/) {
			my ($ulb, $fileLoc) = ($2, $3);
			$ulb =~ s/ \.\.\. /.*?/g;
			say LOG "\t>>\t\$ulb: $ulb, \$fileLoc: $fileLoc, \$fullText{$thisCV}\n$fullText{$thisCV}";
			if ($ulb =~ /^(.*)\.\.\.(.*)$/) {
				my ($found1, $found2, $blank1, $blank2) = ($1, $2, "", "");
				while (length $blank1 < length $found1) {$blank1 .= " "}
				while (length $blank2 < length $found2) {$blank2 .= " "}
				if ($fullText{$thisCV} =~ s/^(.*?)\b$found1\b(.*)\b$found1\b(.*)$/$1$blank1$2$blank2$3/i) {
					say LOG "\t>>>\t$fullText{$thisCV}";
					my ($order) = (length $1);
					$orderedSet{$order} = $thisSet;
					say LOG "\t\t\t\$order: $order\t \$orderedSet{$order}: $orderedSet{$order}"
				}
			} else {
				my $blank = "";
				while (length $blank < length $ulb) {$blank .= " "}
				if ($fullText{$thisCV} =~ s/^(.*?)\b$ulb\b(.*)$/$1$blank$2/i) {
					say LOG "\t>>>>\t$fullText{$thisCV}";
					my ($order) = (length $1);
					$orderedSet{$order} = $thisSet;
					say LOG "\$order: $order\t \$orderedSet{$order}: $orderedSet{$order}"
				}
			}
		}
	}
	foreach my $key (sort {$a<=>$b} keys %orderedSet) {
		$outS .= "$orderedSet{$key}\n"
	}
	return $outS;
}

__DATA__
mat	Matthew
mrk	Mark
luk	Luke
jhn	John
act	Acts
rom	Romans
1co	1 Corinthians
2co	2 Corinthians
gal	Galatians
eph	Ephesians
php	Philippians
col	Colossians
1th	1 Thessalonians
2th	2 Thessalonians
1ti	1 Timothy
2ti	2 Timothy
tit	Titus
phm	Philemon
heb	Hebrews
jas	James
1pe	1 Peter
2pe	2 Peter
1jn	1 John
2jn	2 John
3jn	3 John
jud	Jude
rev	Revelation