Tips_and_Hacks/MAST_tW_PDF_Updater/FilesForUpdates/tWs.from.MAST_HB.pl

# Produces list of tWs for each verse by linking MAST_HB to ULB through tWs.

# Taken from tWs.from.UGNT.7.pl, with changes needed because that used USFM
# and this uses XML

# The output from this script is useful for the interleaved PDFs used in MAST.
# This version uses an exception file to handle places where the MAST_HB points to
# a tW page different from that on which the ULB term appears.

# Make sure the correct input file is $ULBfile. Run script.
# Output is in $output file.
# Check the $not_handled
# file for needed corrections, probably lines needing to be added to the
# $exceptions file.
# Change from ver. 2 in that this uses converted MAST_HB files.
# Change from ver. 3 in that this uses the KJV verses in the XML file, not the remaps file

use 5.12.0;
use File::Slurp;
use File::Find ;
use Cwd ;
use FindBin '$Bin';
use utf8;
#use open IN => ":utf8", OUT => ":utf8";
use open IO => ":utf8";
use List::MoreUtils qw(uniq);
$|=1;
$"="\n";

my ($pwd, $d) = ($Bin, "/");
if ($^O eq "MSWin32") {
	$d = "\\";
	$pwd =~ s/\//\\/g;
}

my ($udf) = "User_defaults.windows.txt";
if ($^O eq "linux") {$udf = "User_defaults.linux.txt"}
#elsif ($^O eq "darwin") {$udf = "User_defaults.mac.txt"}
elsif ($^O eq "darwin") {$udf = "User_defaults.mac.txt"}

open (my $defaults, "<:utf8", "$pwd${d}User${d}$udf") or die "$pwd${d}User${d}$udf:\n$!";

my ($cv, $ULBfile, $exceptions, $not_handled, $exc_log, $tW_log, $log, $output, $workFile) =
					("00000",
					"$Bin${d}Temp${d}Extract.txt",
					"$Bin${d}Exceptions${d}Exceptions_tWs_from_MAST_HB.txt",
					"$Bin${d}Output${d}Entries_not_handled.txt",
					"Logs${d}Exc_log.log",
					"Logs${d}tW_pairs.log",
					"Logs${d}tWs_from_MAST.log",
					"Output${d}tWs_for_PDF.txt",
					"User${d}tW_work_OT.txt",
					);

my ($MAST_HBfile, $topTwDir, $textEditor, $repoPath, $topSourceLangDir);

my (%entries, %text, %ref, %order, %pages, %listOfPages, %sourcePage, %checkPages, %foundPages, %substitutedPages,
	 %specifiedText, %realPage, %checkPage, %workEntries, %vsn, %StrongNum, %fullText, %ulbOrder, %bkAbr, %bkFull, %relevantSNs,
	 %SNsInCV, %entriesThisSN, %entriesThisPage, %pagesThisSN, %pageThisEntry, %adjust, %newRef);

my $book;
my (@MAST_HBfileList);

# ==============================

chdir("$pwd");
open LOG, ">:utf8", "$exc_log" or die "\$log: $exc_log: $!";
open OUT, ">:utf8", $output or die "$!";
open MISSING, ">$not_handled" or die "$!";

while (<DATA>) {
	chomp;
	if (/([^\t]*)\t([^\t]*)/) {
		$bkAbr{$2} = $1;
		$bkFull{$1} = $2;
		say LOG "\$bkAbr{$2}: $bkAbr{$2}, \$bkFull{$1}: $bkFull{$1}";
	}
}
GetUserDefaults();
GetULBBooksToProcess();
ReadExceptions();
close LOG;
open LOG, ">:utf8", "$tW_log" or die "\$tW_log: $tW_log: $!";
PairtWEntriesTotWPageAndUniqSNs();
close LOG;
open LOG, ">:utf8", "$log" or die "\$log: $log: $!";
ReadLinkedSNs();
LinkULBtoCV();
LinkSNsToULBtextViaEntries();
#Output();

close MISSING;
close OUT;
close LOG;

if ($^O eq "darwin") {system ("$textEditor $not_handled")}

print "\n\tDone.\n\n";

# ==============================

sub GetUserDefaults {
	open (my $defaults, "<:utf8", "User${d}$udf") or die "User${d}$udf:\n$!";

		while (my $thisLine = <$defaults>) {
			chomp $thisLine;
			if ($thisLine =~ /^Text editor: (.*)$/) {
				$textEditor = $1;
				if ($^O eq "darwin") {
					$textEditor = "open -a $textEditor"
				}
			} elsif ($thisLine =~ /^Repository directory: (.*)$/) {
				$repoPath = $1;
				#say $repoPath; die;
			}
		}

		say LOG "\$textEditor: $textEditor\n\$repoPath: $repoPath";
		die "No text editor found" if $textEditor eq "";
		die "No path to repo found" if $repoPath eq "";

		($topTwDir, $topSourceLangDir) = ("$repoPath${d}en_tw${d}bible", "$repoPath${d}MAST_HB");

	close $defaults;
}

sub GetULBBooksToProcess {
	say LOG "GetULBBooksToProcess on \$workFile: $workFile";
	open (my $file, "<:utf8", "$workFile") or die "$workFile:\n$!";

		while (my $line = <$file>) {
			chomp $line;
			#say LOG $line;
			if ($line =~ /^([^#][^\t]*)\t[^\t]*\t([^\t]*)\t[^\t]*$/) {
				my ($this_bk) = $1 . "-" . uc $2;
				$MAST_HBfile = "$topSourceLangDir${d}$this_bk.xml";
				push @MAST_HBfileList, $MAST_HBfile;
			}
		}
	close $file;
	say LOG "\@MAST_HBfileList: @MAST_HBfileList"
}

sub ReadExceptions {
	say "Reading exceptions";
	say LOG "ReadExceptions from \$exceptions: $exceptions";
	open (my $file, "<:utf8", "$exceptions") or die "$exceptions:\n$!";

	while (my $line = <$file>) {
		chomp $line;
		if ($line =~ /^([^#\n][^\t\n]*)\t([^\t\n]*\t[^\t\n]*)/) {
			my ($rf, $oldNew) = ($1, $2);
			say LOG "\$line: $line, \$rf: $rf, \$oldNew: $oldNew";
			($adjust{$rf}) .= "$oldNew, ";
			$specifiedText{$rf} = 1;
		}
	}
	foreach my $key (sort keys %adjust) {
		say LOG "\$key: $key: \$adjust{$key}: $adjust{$key}"
	}
	close $file;
}

sub PairtWEntriesTotWPageAndUniqSNs {
	say "Pairing tW entries with tW pages and unique Strong's numbers";
	my (@filesToRun, @relevantSNs) = ();
	my $filePattern = '*.md' ;
	find (sub {push @filesToRun, $File::Find::name if (m/^(.*)$filePattern$/)}, $topTwDir) ;
	@filesToRun = sort @filesToRun;
	#say LOG "\@filesToRun: @filesToRun";
	foreach my $file (@filesToRun) {
		print ".";
		$file =~ s/\//\\/g unless $^O eq "linux" || $^O eq "darwin";
		my ($thisList, $shortFile) = ("", $file);
		$shortFile =~ s/^\Q$topTwDir${d}\E//;
		$shortFile =~ s/\.md$//;
		$shortFile =~ s/\Q$d\E/,/;
		#say "|$shortFile|"; die;
		#if ($shortFile =~ /^(kt|names)/) {
		#my $fileText = read_file("$file", binmode => 'utf8');
		open IN, $file or die "$!";
			while (<IN>) {
				if (/^# ([^\n]*)$/) {
					  $thisList = $1;
					  $thisList =~ s/[\r\n]*$//;
					  #say LOG "\$thisList = |$thisList|";
					  $thisList =~ s/ \([^\)]*\)//g;
					  $entriesThisPage{$shortFile} = $thisList;
					  my @ULBEntries = split /, /, $thisList;
					  foreach my $ULB_entry (@ULBEntries) {
						  $pageThisEntry{$ULB_entry} = $shortFile;
					  }
					  my @tempArray = split /, /, $thisList;
					  foreach my $slice (@tempArray) {
						  $sourcePage{$slice} = $shortFile;
						  #say LOG "$slice: $sourcePage{$slice}";
					  }
				}
				if (/Strong's(.*)$/) {
					  my $SNs = $1;
					  while ($SNs =~ s/[H](\d*)//) {
						  push @relevantSNs, $1;
						  $entriesThisSN{$1} .= "$thisList, ";
						  $pagesThisSN{$1} .= "$shortFile, ";
					  }
				}
				@relevantSNs = uniq(@relevantSNs);
				foreach (@relevantSNs) {
					$relevantSNs{$_} = "$_";
					#say LOG $relevantSNs{$_}
				}
			}
		close IN;
	}
	say "";
	#say LOG "====";
	#say LOG "====";
	foreach my $thisSN (sort keys %entriesThisSN) {
		say LOG "tW entries for $thisSN: $entriesThisSN{$thisSN}"
	}
}

sub LinkULBtoCV {
	say "Linking ULB to chapter and verse";
	say LOG "LinkULBtoCV, \$ULBfile: $ULBfile";
	open IN, "$ULBfile" or die "$ULBfile: $!";
		while (<IN>) {
			if (/^([^\t]*)\t(.*)$/) {
				$cv ++;
				($text{$1}, $fullText{$1}) = ($2, $2);
				$ref{$cv} = $1;
				$order{$1} = $cv;
			}
			#say LOG "First \$ref{$cv}: $ref{$cv}\n\$text{$ref{$cv}}: $text{$ref{$cv}}";
		}
	close IN;
	#foreach my $key (sort keys %ref) {
		#say LOG "\$key: $key:, \$ref{$key}: $ref{$key}, \$text{$ref{$key}}: $text{$ref{$key}}"
	#}
}

sub ReadLinkedSNs {
	say "Reading linked Strong's numbers\n\@MAST_HBfileList: @MAST_HBfileList";
	my ($flag, $MAST_HBfile) = ("","");
	foreach $MAST_HBfile (@MAST_HBfileList) {
		say LOG "opening \$MAST_HBfile: $MAST_HBfile";
		open IN, "$MAST_HBfile" or die "$MAST_HBfile can't be opened\n\n";
			my ($thisBook, $thisChap, $thisVers, $thisRef);
			my (@pages);
			while (<IN>) {
				chomp;
				if (/<verse osisID="([^\.]*).(\d+).(\d+)">/) {
					my ($bk, $ch, $vs) = ($1, $2, $3);
					($thisRef) = ("$bkFull{$bk} $ch:$vs");
					say LOG "##\t\$bk \$ch:\$vs: $bk $ch:$vs, \$bkFull{$bk}: $bkFull{$bk}, \$thisRef: $thisRef";
				}
				elsif (/<note>KJV:([^\.]*).(\d+).(\d+)<\/note>/) {
					my ($bk, $ch, $vs) = ($1, $2, $3);
					($thisRef) = ("$bkFull{$bk} $ch:$vs");
					say LOG "###\t\$bk \$ch:\$vs: $bk $ch:$vs, \$bkFull{$bk}: $bkFull{$bk}, \$thisRef: $thisRef";
				}
				else {
					while (/<w lemma="(\w\/)?(\d+)( \w)?"/g) {
						#say LOG $_;
						my ($thisNum) = ($2);
						say LOG "\t\$thisNum: $thisNum, \$SNsInCV{$thisRef}: $SNsInCV{$thisRef}";
						if (exists $relevantSNs{$thisNum}) {
							say LOG "\t\t\$relevantSNs{$thisNum}: $relevantSNs{$thisNum}";
							$SNsInCV{$thisRef} .= "$thisNum√" unless ($SNsInCV{$thisRef} =~ /\b$thisNum\b/);
						}
						#say LOG ">\t\$thisNum: $thisNum, \$SNsInCV{$thisRef}: $SNsInCV{$thisRef}";
					}
				}
			}
		close IN;
	}
	my %temp;
	foreach my $oldRef (sort keys %SNsInCV) {
		if (exists $newRef{$oldRef}) {
			$temp{$newRef{$oldRef}} = $SNsInCV{$oldRef};
			delete $SNsInCV{$oldRef};
		}
	}
	foreach my $changedRef (sort keys %temp) {
		$SNsInCV{$changedRef} = $temp{$changedRef};
		say LOG "\$SNsInCV{$changedRef}: $SNsInCV{$changedRef}";
	}
}

sub LinkSNsToULBtextViaEntries  {
	say "Linking Strong's numbers to ULB text via tW page entries";
	say LOG "sub LinkSNsToULBtextViaEntries called";
	foreach my $thisRef (sort keys %ref) {
		say LOG "\nLinkSNsToULBtextViaEntries: \$thisRef: $thisRef\t\$ref{$thisRef}: $ref{$thisRef}, \$specifiedText{$ref{$thisRef}}: $specifiedText{$ref{$thisRef}}";
		(%workEntries, %ulbOrder) = ();
		my %workPage;
		my ($thisCV, $checkList, $tempString, $outString) = ($ref{$thisRef}, "", "", "");
		say OUT "$thisCV:";
		my (@allEntries);
		$listOfPages{$thisCV} = "";
		#say LOG "\$SNsInCV{$thisCV}: |$SNsInCV{$thisCV}|\n$text{$thisCV}";
		#say LOG "\t\$SNsInCV{$thisCV}: >$SNsInCV{$thisCV}<\n\$text{$thisCV}: $text{$thisCV}";

		$SNsInCV{$thisCV} =~ s/√+$//;
		$SNsInCV{$thisCV} =~ s/^ +//;
		$SNsInCV{$thisCV} =~ s/ +$//;
		$SNsInCV{$thisCV} =~ s/ {2,}/ /g;
		say LOG "*\t\$SNsInCV{$thisCV}: >$SNsInCV{$thisCV}<";
		if (exists $specifiedText{$thisCV}) {
			#say LOG "*\t\$SNsInCV{$thisCV}: $SNsInCV{$thisCV}";

			$SNsInCV{$thisCV} = Adjust($SNsInCV{$thisCV}, $thisCV);

			$SNsInCV{$thisCV} =~ s/^ +(.*)/$1/;
			$SNsInCV{$thisCV} =~ s/(.*) +$/$1/;
			$SNsInCV{$thisCV} =~ s/ {2,}/ /g;
			$SNsInCV{$thisCV} =~ s/^√+//;
			$SNsInCV{$thisCV} =~ s/√{2,}/√/g;
			$SNsInCV{$thisCV} =~ s/√ /√/g;
			$SNsInCV{$thisCV} =~ s/√$//;
			say LOG "**\t\$SNsInCV{$thisCV}: >$SNsInCV{$thisCV}<";
		}
		#say LOG "<>\t<>\t\$SNsInCV{$thisCV}: |$SNsInCV{$thisCV}|";
		say LOG "*7*\t\$SNsInCV{$thisCV}:$SNsInCV{$thisCV}";
		#while ($SNsInCV{$thisCV} =~ s/(\[[^\]]*?) ([^\]]*?\] )/$1√$2/) {};
		say LOG "*8*\t\$SNsInCV{$thisCV}:$SNsInCV{$thisCV}";
		my @tempArray = split /√/, $SNsInCV{$thisCV};
		my %alreadyUsed;
		my @regArray;
		foreach my $slice (@tempArray) {
			if ($slice =~ /^(\d*)/) {
				my $number = "$1";
				push (@regArray, $slice) unless (exists $alreadyUsed{$number});
				$alreadyUsed{$number} = $number
			}
		}
		$" = "|\n";
		say LOG "*9*\t\$SNsInCV{$thisCV}:$SNsInCV{$thisCV}\n\@regArray: >@regArray<";
		foreach my $thisNum (@regArray) {
			say LOG "\$thisNum: $thisNum";
			my ($found, $specPage);
			if ($thisNum =~ /(\d+)\[(.*?)\]/) {
				($thisNum) = ($1);
				my ($forced_entry_for_display, $forced_entry_for_page, $forced_entry_for_search) = ($2, $2, $2);
				unless (exists $pageThisEntry{$forced_entry_for_page}) {
					my $try = lc $forced_entry_for_page;
					if (exists $pageThisEntry{$try}) {
						$forced_entry_for_page = lc $forced_entry_for_page
					}
					else {
						say "!!!\tNo \$pageThisEntry{$try} found for $try\n\$thisNum: $thisNum\t\$forced_entry_for_page: $forced_entry_for_page\n$ref{$thisRef}\t$thisNum\t$forced_entry_for_page";
						#die
					}
				}
				say LOG
"*0*	\$forced_entry_for_display: $forced_entry_for_display, \$pageThisEntry{$forced_entry_for_page}: $pageThisEntry{$forced_entry_for_page}";
				#while ($forced_entry_for_display =~ s/([^ \]]*?)√([^ \]]*?)/$1 $2/) {}
				$forced_entry_for_search = lc $forced_entry_for_display;
				#while ($forced_entry_for_search =~ s/ ... /\.\*\?/) {}
				say LOG
"*A*	\$thisNum: >>$thisNum<<	\$forced_entry_for_display: >$forced_entry_for_display<,
	\$forced_entry_for_search: >>$forced_entry_for_search<<
	\$pageThisEntry{$forced_entry_for_page}: $pageThisEntry{$forced_entry_for_page}";
				$outString .= "[$forced_entry_for_display]($pageThisEntry{$forced_entry_for_page})\n";
				say LOG "\t\t$outString:\n$outString";
				if ($forced_entry_for_search =~ /([^,]*) \.\.\. ([^,]*) \.\.\. ([^,]*)/) {
					my ($first, $second, $third) = ($1, $2, $3);
					$text{$thisCV} =~ s/$first(.*?)$second(.*?)$third/$1 $2/i;
					say LOG "\t*\t$text{$thisCV}";
				} elsif ($forced_entry_for_search =~ /([^,]*) \.\.\. ([^,]*)/) {
					my ($first, $second) = ($1, $2);
					$text{$thisCV} =~ s/$first(.*?)$second/$1/i;
					say LOG "\t**\t$text{$thisCV}";
				} else {
					#say "\$text{$thisCV}:\n$text{$thisCV}\n\$forced_entry_for_search: |$forced_entry_for_search|";
					$text{$thisCV} =~ s/$forced_entry_for_search//i;
					say LOG "\t***\t$text{$thisCV}";
				}
				next;
			} elsif ($thisNum =~ /\d+(\(([\w\/]+)\))/) {
				$specPage .= $2;
				say LOG "*B*\t\$specPage: $specPage";
			} else {
				say LOG "*C*\t\$thisNum: $thisNum";
			}
			say LOG "\t\$specPage: $specPage";
			if ($specPage) {
				$workEntries{$thisNum} = $entriesThisPage{$specPage};
			} else {
				$workEntries{$thisNum} = $entriesThisSN{$thisNum};
			}
			$workEntries{$thisNum} =~ s/, $//;
			say LOG "*D*\t\$thisNum: >$thisNum<\t\$workEntries{$thisNum}: >$workEntries{$thisNum}<";
			my @beforeArray = split /, /, $workEntries{$thisNum};
			my @sortedArray = reverse sort {  substr($a,0,1) <=> substr($b,0,1)
				 || length($a) <=> length($b)
				 || $a <=> $b }
				@beforeArray;
			$" = "\n\t";
			say LOG "*E*\t\@sortedArray: @sortedArray\n\$outString: $outString\n\$text{$ref{$thisRef}}: $text{$ref{$thisRef}}";
			foreach my $entry (@sortedArray) {
				my $testEntry = $entry;
				#print LOG "\$entry: $entry. Becomes ";
				while ($testEntry =~ s/^(.*) \.\.\. (.*)/($1)\\b(.*?)\\b($2)/) {}
				#print LOG "\$testEntry: |$testEntry| ";
				if ($testEntry =~ /\(\.\*\?\)/ && $text{$thisCV} =~ s/\b($testEntry)\b/$3/i) {
					say LOG "\n===\n$thisNum |$testEntry| is found in first test.\n===";
					$outString .= "[$entry]($pageThisEntry{$entry})\n";
					say LOG $outString . "\n===" . $text{$thisCV};
					$found = 1;
					goto Breakout;
				} elsif ($text{$thisCV} =~ s/\b($testEntry)[^\w']//i) {
					say LOG "\n===\n$thisNum |$testEntry| is found in second test.\n===";
					$outString .= "[$entry]($pageThisEntry{$entry})\n";
					say LOG $outString . "\n" . $text{$thisCV};
					$found = 1;
					goto Breakout;
				} elsif ($text{$thisCV} =~ s/\b($testEntry)\b//i) {
					say LOG "\n===\n$thisNum |$testEntry| is found in third test.\n===";
					$outString .= "[$entry]($pageThisEntry{$entry})\n";
					say LOG $outString . "\n" . $text{$thisCV};
					$found = 1;
					goto Breakout;
				} else {
					say LOG "\$testEntry «$testEntry» is not found in\n$text{$thisCV}}";
				}
			}
			Breakout:
			unless ($found) {
				say MISSING "$thisCV	$thisNum";
				say LOG "\$thisCV: $thisCV\t\$thisNum: $thisNum"
			}
			next if $found;
		}
		say LOG "\t\$outString:\n$outString";
		$outString = ProperOrderOutString($outString, $thisCV);
		say LOG "<>\t\$outString: $outString";
		say OUT "$outString";
	#say LOG "sub LinkSNsToULBtextViaEntries finished";
	}
}

sub Adjust {
	my ($snsOld, $ref, $snsNew, $addToSnsNew) = ($_[0], $_[1], "", "");
	say LOG "\$specifiedText{$ref}: $specifiedText{$ref}\n\$snsOld: |$snsOld|";
	my (%tempEntries);
	#say LOG ">\t\$sns: |$sns|";
	#say LOG ">\t\$specifiedText{$ref}: |$specifiedText{$ref}|";
	$snsOld =~ s/^[√ ]+/ /;
	my @oldArray = split / /, $snsOld;
	#say LOG "\$adjust{$ref}: $adjust{$ref}";
	my @preadjustments = split /, /, $adjust{$ref};
	foreach my $adjustment (@preadjustments) {
		say LOG "<><>\t\$adjustment: >$adjustment<";
		if ($adjustment =~ /([^\t]*)\t\|\|$/) { # delete this from list to look for
			my $found = $1;
			$snsOld =~ s/\b$found\b ?//;
			say LOG "*1*\t\$found: $found should be deleted from \$snsOld: $snsOld";
		} elsif ($adjustment =~ /^(\d+)\t(\d+)$/) { # change to specified number
			my ($found1, $found2) = ($1, $2);
			#$addToSnsNew .= "$1\[$2\] ";
			$snsOld =~ s/$found1√//g;
			$snsNew .= "$found2√" unless $snsNew =~ /\b$found2\b /;
			say LOG "*2*\t\$snsOld: $snsOld\n\$snsNew: $snsNew";
		} elsif ($adjustment =~ /^\|\|\t(.*)/) { # add this to list to look for
			my $adj = $1;
			say LOG "\$adjustment: $adjustment, \$adj: $adj";
			if ($adj =~ /([^\t]*)\t([^\t]*)/) {
				$snsNew = "[$1]($pageThisEntry{$2})"
			} else {
				$snsNew .= "$adj "
			}
			say LOG "*4*\t\$snsOld: $snsOld\n\$snsNew: $snsNew";
		} elsif ($adjustment =~ /^(\d+)\t([\w\. \-\']+)$/) { # add specified word
			my ($found1, $found2) = ($1, $2);
			#$addToSnsNew .= "$1\[$2\] ";
			if ($snsOld =~ s/\b$found1\b/$found1\[$found2\]/) {}
			else {$snsNew =~ s/\b$found1\b/$found1\[$found2\]/}
			$snsOld =~ s/ {2,}/ /;
			say LOG "*4*\t\$snsOld: $snsOld\n\$snsNew: $snsNew";
		} elsif (($adjustment =~ /^(\d+)\t([\/\d\w]+)$/)) { # add specified page
			#$addToSnsNew .= "$1\{$2\} "
			my ($found1, $found2) = ($1, $2);
			$snsOld =~ s/$found1/$found1\($found2\)/;
			say LOG "*5*\t\$snsOld: $snsOld";
		}
	}
	say LOG "*5A*\t\$snsNew: >$snsNew<\n\$snsOld+\$snsNew: >$snsOld< >$snsNew<";
	$snsNew =~ s/ +$//;
	#$snsNew = "$snsOld $snsNew";
	$snsNew = "$snsOld√$snsNew";
	while ($snsNew =~ s/(\d+) (\d+\[[^\[\]]*\])/$2 $1/) {}
	say LOG "*5B*\t\$snsNew: $snsNew";
	return $snsNew;
}

sub Output {
	say "Outputting";
	#say LOG "Output subRoutine called";
	foreach my $key (sort keys %ref) {
		my %donePages;
		my $thisRef = $ref{$key};
		#print LOG "\$key: $key\t\$thisRef: $thisRef\t";
		#say LOG "\$SNsInCV{$thisRef}: |$SNsInCV{$thisRef}|";
		$SNsInCV{$thisRef} =~ s/^ +//;
		$SNsInCV{$thisRef} =~ s/ +$//;
		$SNsInCV{$thisRef} =~ s/ {2,}/ /;
		#say LOG "\$SNsInCV{$thisRef}: |$SNsInCV{$thisRef}|";
		#say LOG "\$listOfPages{$thisRef}: |$listOfPages{$thisRef}|";
		#$listOfPages{$thisRef} =~ s/^ +//;
		#$listOfPages{$thisRef} =~ s/ +$//;
		#$listOfPages{$thisRef} =~ s/ {2,}/ /;
		#say LOG "\$listOfPages{$thisRef}: |$listOfPages{$thisRef}|";
		my @array = split /\n/, $listOfPages{$thisRef};
		#say LOG "\@array: |@array|";
		my @sorted =
			sort sort { lc($a) cmp lc($b) }
			 @array;
		#say LOG "\@sorted: |@sorted|";
		$" = "\n";
		$listOfPages{$thisRef} = "@sorted";
		say LOG "\$listOfPages{$thisRef}: $listOfPages{$thisRef}\n\$checkPages{$thisRef}: $checkPages{$thisRef}";
		#say OUT "$thisRef: $listOfPages{$thisRef}\n";
		$checkPages{$thisRef} =~ s/^ +//;
		$checkPages{$thisRef} =~ s/ +$//;
		$checkPages{$thisRef} =~ s/ {2,}/ /;
		$checkPages{$thisRef} =~ s/ \|\|//;
		say LOG "\$checkPages{$thisRef}:\t|$checkPages{$thisRef}|";
		my @checkArray = split / /, $checkPages{$thisRef};
		shift @sorted;
		#say LOG "\@checkArray: |@checkArray|";
		#say LOG "\@sorted: |@sorted|";
		#shift @sorted;
		#say LOG "\@sorted: |@sorted|";
		foreach my $slice (@sorted) {
			#print LOG "\$slice: $slice\t";
			$slice =~ s/\[.*?\]\((.*?)\)/$1/;
			  #say LOG "\t\$slice: $slice";
			  $donePages{$slice} = $slice;
			  #say LOG "\t\$donePages{$slice}: $donePages{$slice}"
		}
		#say LOG "\@checkArray: |@checkArray|";
			foreach my $slice (@checkArray) {
				#say LOG "\$slice: $slice";
				unless (exists $donePages{$slice}) {
					say LOG "\$thisRef: $thisRef\t\$slice:$slice";
					#say MISSING "$thisRef\t$slice\t||";
					say MISSING "$thisRef\t$slice";
				}
			}
	}
}

sub Substitute {
	foreach my $key (sort keys %pages) {
		say LOG "\$key: $key\t\$SNsInCV{$key}: $SNsInCV{$key}";
		if (exists $substitutedPages{$key}) {
			#say LOG "\$substitutedPages{$key}: $substitutedPages{$key}";
			$substitutedPages{$key} =~ s/, $//;
			my @array = split /, /, $substitutedPages{$key};
			foreach my $slice (@array) {
				#say LOG "\$slice: $slice";
				if ($slice =~ /([^\t]*)\t([^\t]*)/) {
					#say LOG "\n\$key: $key";
					my ($old, $new) = ($1, $2);
					#say LOG "\$old: >$old<\t\$new: >$new<";
					#say LOG "\$SNsInCV{$key}: >>$SNsInCV{$key}<<";
					if ($old eq "||") {
						$SNsInCV{$key} .= "$new√";
						$checkPages{$key} = $SNsInCV{$key};
					}
					elsif ($new eq "||") {
						$SNsInCV{$key} =~ s/$old //;
						$checkPages{$key} = $SNsInCV{$key};
					}
					else {
						$SNsInCV{$key} =~ s/$old/$new/;
						$checkPages{$key} = $SNsInCV{$key};
					}
					$SNsInCV{$key} =~ s/ \|\|//g;
					#say LOG "\$SNsInCV{$key}: >>>$SNsInCV{$key}<<<";
				}
			}
		} else {
			$checkPages{$key} = $SNsInCV{$key};
		}
		say LOG "<>\t\$key: $key\t\$SNsInCV{$key}: $SNsInCV{$key}";
	}
}

sub ProperOrderOutString {
	say LOG "\$_[0]: $_[0]\n\$_[1]: |$_[1]|";
	my @unordered = split /\n/, $_[0];
	my ($thisCV, $outS) = ($_[1], "");
	my (%orderedSet);
	foreach my $thisSet (@unordered) {
		say LOG "\t>A>\t\$thisSet: $thisSet";
		if ($thisSet =~ /(\[([^\]]*)\])(\([^\)]*\))/) {
			my ($ulb, $fileLoc) = ($2, $3);
			$ulb =~ s/ \.\.\. /.*?/g;
			say LOG "\t>B>\t\$ulb: $ulb, \$fileLoc: $fileLoc, \$fullText{$thisCV}\n$fullText{$thisCV}";
			if ($ulb =~ /^(.*)\.\*\?(.*)$/) {
#			if ($ulb =~ /^(.*)\.\.\.(.*)$/) {
				say LOG "\t>C>\tThere is an ellipsis in \$ulb: $ulb.";
				my ($found1, $found2, $blank1, $blank2) = ($1, $2, "", "");
				while (length $blank1 < length $found1) {$blank1 .= " "}
				while (length $blank2 < length $found2) {$blank2 .= " "}
				#say LOG "\t\t\$found1: $found1\t\$found2: $found2";
				say LOG "Looking for\ns/^(.*?)$found1(.*?)$found2(.*)\$\nin\n$fullText{$thisCV}";
				if ($fullText{$thisCV} =~ s/^(.*?)\b$found1\b(.*?)\b$found2\b(.*)$/$1$blank1$2$blank2$3/i) {
					say LOG "\t>C1>\t$fullText{$thisCV}";
					my ($order) = (length $1);
					$orderedSet{$order} = $thisSet;
					say LOG "\t\t\t\$order: $order\t \$orderedSet{$order}: $orderedSet{$order}"
				}
			} else {
				say LOG "\t>D>\tThere is no ellipsis in \$ulb: $ulb.";
				my $blank = "";
				while (length $blank < length $ulb) {$blank .= " "}
				if ($fullText{$thisCV} =~ s/^(.*?)\b$ulb([^\w'](.*))?$/$1$blank$2/i) {
					say LOG "\t>D1>\t$fullText{$thisCV}";
					my ($order) = (length $1);
					$orderedSet{$order} = $thisSet;
					say LOG ">D2>\t\$order: $order\t \$orderedSet{$order}: $orderedSet{$order}"
				} elsif ($fullText{$thisCV} =~ s/^(.*?)\b$ulb(\b(.*))?$/$1$blank$2/i) {
					say LOG "\t>D3>\t$fullText{$thisCV}";
					my ($order) = (length $1);
					$orderedSet{$order} = $thisSet;
					say LOG ">D4>\t\$order: $order\t \$orderedSet{$order}: $orderedSet{$order}"
				}
			}
		}
	}
	foreach my $key (sort {$a<=>$b} keys %orderedSet) {
		$outS .= "$orderedSet{$key}\n"
	}
	#say LOG "\$outS: $outS";
	return $outS;
}

__DATA__
gen	Genesis
exo	Exodus
lev	Leviticus
num	Numbers
deu	Deuteronomy
jos	Joshua
jdg	Judges
rut	Ruth
1sa	1 Samuel
2sa	2 Samuel
1ki	1 Kings
2ki	2 Kings
1ch	1 Chronicles
2ch	2 Chronicles
ezr	Ezra
neh	Nehemiah
est	Esther
job	Job
psa	Psalms
pro	Proverbs
ecc	Ecclesiastes
sng	Song of Solomon
isa	Isaiah
jer	Jeremiah
lam	Lamentations
ezk	Ezekiel
dan	Daniel
hos	Hosea
jol	Joel
amo	Amos
oba	Obadiah
jon	Jonah
mic	Micah
nam	Nahum
hab	Habakkuk
zep	Zephaniah
hag	Haggai
zec	Zechariah
mal	Malachi