forked from hmw3/Tips_and_Hacks
663 lines
22 KiB
Perl
663 lines
22 KiB
Perl
# Produces list of tWs for each verse by linking MAST_NT to ULB through tWs.
|
|
|
|
# Taken from tWs.from.MAST_NT.2.pl.
|
|
|
|
# The output from this script is useful for the interleaved PDFs used in MAST.
|
|
# This version uses an exception file to handle places where the MAST_NT points to
|
|
# a tW page different from that on which the ULB term appears.
|
|
|
|
# Make sure the correct input file is $ULBfile. Run script.
|
|
# Output is in $output file.
|
|
# Check the $missing
|
|
# file for needed corrections, probably lines needing to be added to the
|
|
# $exceptions file.
|
|
|
|
use 5.12.0;
|
|
use File::Slurp;
|
|
use File::Find ;
|
|
use Cwd ;
|
|
use FindBin '$Bin';
|
|
use utf8;
|
|
#use open IN => ":utf8", OUT => ":utf8";
|
|
use open IO => ":utf8";
|
|
use List::MoreUtils qw(uniq);
|
|
$|=1;
|
|
|
|
my ($pwd, $d) = ($Bin, "/");
|
|
if ($^O eq "MSWin32") {
|
|
$d = "\\";
|
|
$pwd =~ s/\//\\/g;
|
|
}
|
|
|
|
my ($udf) = "User_defaults.windows.txt";
|
|
if ($^O eq "linux") {$udf = "User_defaults.linux.txt"}
|
|
#elsif ($^O eq "darwin") {$udf = "User_defaults.mac.txt"}
|
|
elsif ($^O eq "darwin") {$udf = "User_defaults.mac.txt"}
|
|
|
|
open (my $defaults, "<:utf8", "$pwd${d}User${d}$udf") or die "$pwd${d}User${d}$udf:\n$!";
|
|
|
|
my ($cv, $ULBfile, $exceptions, $missing, $output, $workFile) =
|
|
("00000",
|
|
"$Bin${d}Temp${d}Extract.txt",
|
|
"$Bin${d}Exceptions${d}Exceptions_tWs_from_MAST_NT.txt",
|
|
"$Bin${d}Output${d}Entries_not_handled.txt",
|
|
"$Bin${d}Output${d}tWs_for_PDF.txt",
|
|
"$Bin${d}User${d}tW_work_NT.txt",
|
|
);
|
|
|
|
my ($MAST_NTfile, $topTwDir, $textEditor, $repoPath, $topSourceLangDir);
|
|
|
|
my (%entries, %text, %ref, %order, %pages, %listOfPages, %sourcePage, %checkPages, %foundPages, %substitutedPages,
|
|
%specifiedText, %realPage, %checkPage, %workEntries, %vsn, %StrongNum, %fullText, %ulbOrder, %bkAbr, %bkFull, %relevantSNs,
|
|
%SNsInCV, %entriesThisSN, %entriesThisPage, %pagesThisSN, %pageThisEntry, %adjust, %newRef);
|
|
|
|
my $book;
|
|
my (@MAST_NTfileList);
|
|
|
|
# ==============================
|
|
|
|
chdir("$pwd");
|
|
open LOG, ">:utf8", "Logs${d}Exc_log.log" or die "\$log: Logs${d}Exc_log.log: $!";
|
|
open OUT, ">:utf8", $output or die "$!";
|
|
open MISSING, ">$missing" or die "$!";
|
|
|
|
while (<DATA>) {
|
|
chomp;
|
|
if (/([^\t]*)\t([^\t]*)/) {
|
|
$bkAbr{$2} = $1;
|
|
$bkFull{$1} = $2;
|
|
}
|
|
}
|
|
|
|
GetUserDefaults();
|
|
GetULBBooksToProcess();
|
|
ReadExceptions();
|
|
close LOG;
|
|
open LOG, ">:utf8", "Logs${d}tW_pairs.log" or die "Logs${d}tW_pairs.log: $!";
|
|
PairtWEntriesTotWPageAndUniqSNs();
|
|
close LOG;
|
|
open LOG, ">:utf8", "Logs${d}tWs_from_MAST.log" or die "Logs${d}tWs_from_MAST.log: $!";
|
|
ReadLinkedSNs();
|
|
LinkULBtoCV();
|
|
LinkSNsToULBtextViaEntries();
|
|
#Output();
|
|
|
|
close MISSING;
|
|
close OUT;
|
|
close LOG;
|
|
|
|
if ($^O eq "darwin") {system ("$textEditor $missing")}
|
|
|
|
print "\n\tDone.\n\n";
|
|
|
|
# ==============================
|
|
|
|
sub GetUserDefaults {
|
|
open (my $defaults, "<:utf8", "User${d}$udf") or die "User${d}$udf:\n$!";
|
|
|
|
while (my $thisLine = <$defaults>) {
|
|
chomp $thisLine;
|
|
if ($thisLine =~ /^Text editor: (.*)$/) {
|
|
$textEditor = $1;
|
|
if ($^O eq "darwin") {
|
|
$textEditor = "open -a $textEditor"
|
|
}
|
|
} elsif ($thisLine =~ /^Repository directory: (.*)$/) {
|
|
$repoPath = $1;
|
|
#say $repoPath; die;
|
|
}
|
|
}
|
|
|
|
#say LOG "\$textEditor: $textEditor\n\$repoPath: $repoPath";
|
|
die "No text editor found" if $textEditor eq "";
|
|
die "No path to repo found" if $repoPath eq "";
|
|
|
|
($topTwDir, $topSourceLangDir) = ("$repoPath${d}en_tw${d}bible", "$repoPath${d}MAST_NT");
|
|
|
|
close $defaults;
|
|
}
|
|
|
|
sub GetULBBooksToProcess {
|
|
say LOG "GetULBBooksToProcess on \$workFile: $workFile";
|
|
open (my $file, "<:utf8", "$workFile") or die "$workFile:\n$!";
|
|
|
|
while (my $line = <$file>) {
|
|
chomp $line;
|
|
#say LOG "\t$line";
|
|
if ($line =~ /^([^#][^\t]*)\t[^\t]*\t([^\t]*)\t[^\t]*$/) {
|
|
my ($this_bk) = $1 . "-" . uc $2;
|
|
$MAST_NTfile = "$topSourceLangDir${d}$this_bk.xml";
|
|
push @MAST_NTfileList, $MAST_NTfile;
|
|
}
|
|
}
|
|
|
|
close $file;
|
|
#say LOG "\@MAST_NTfileList: @MAST_NTfileList";
|
|
}
|
|
|
|
sub ReadExceptions {
|
|
say "Reading exceptions";
|
|
say LOG "ReadExceptions from \$exceptions: $exceptions";
|
|
open (my $file, "<:utf8", "$exceptions") or die "$exceptions:\n$!";
|
|
|
|
while (my $line = <$file>) {
|
|
chomp $line;
|
|
my $rf;
|
|
if ($line =~ /^([^#\n][^\t\n]*)\t([^\t\n]*\t[^\t\n]*)/) {
|
|
my ($oldNew) = ($2);
|
|
$rf = $1;
|
|
#say LOG "\$line: $line, \$rf: $rf, \$oldNew: $oldNew";
|
|
($adjust{$rf}) .= "$oldNew√";
|
|
$specifiedText{$rf} = 1;
|
|
}
|
|
}
|
|
foreach my $key (sort keys %adjust) {
|
|
say LOG "\$key: $key: \$adjust{$key}: $adjust{$key}"
|
|
}
|
|
close $file;
|
|
}
|
|
|
|
sub PairtWEntriesTotWPageAndUniqSNs {
|
|
say "Pairing tW entries with tW pages and unique Strong's numbers";
|
|
my (@filesToRun, @relevantSNs) = ();
|
|
my $filePattern = '*.md' ;
|
|
find (sub {push @filesToRun, $File::Find::name if (m/^(.*)$filePattern$/)}, $topTwDir) ;
|
|
@filesToRun = sort @filesToRun;
|
|
#say LOG "\@filesToRun: @filesToRun";
|
|
foreach my $file (@filesToRun) {
|
|
print ".";
|
|
$file =~ s/\//\\/g unless $^O eq "linux" || $^O eq "darwin";
|
|
my ($thisList, $shortFile) = ("", $file);
|
|
$shortFile =~ s/^\Q$topTwDir${d}\E//;
|
|
$shortFile =~ s/\.md$//;
|
|
$shortFile =~ s/\Q$d\E/,/;
|
|
#say "|$shortFile|"; die;
|
|
#if ($shortFile =~ /^(kt|names)/) {
|
|
#my $fileText = read_file("$file", binmode => 'utf8');
|
|
open IN, $file or die "$!";
|
|
while (<IN>) {
|
|
if (/^# ([^\n]*)$/) {
|
|
$thisList = $1;
|
|
$thisList =~ s/[\r\n]*$//;
|
|
#say LOG "\$thisList = |$thisList|";
|
|
$thisList =~ s/ \([^\)]*\)//g;
|
|
$entriesThisPage{$shortFile} = $thisList;
|
|
my @ULBEntries = split /, /, $thisList;
|
|
foreach my $ULB_entry(@ULBEntries) {
|
|
$pageThisEntry{$ULB_entry} = $shortFile;
|
|
say LOG "\$pageThisEntry{$ULB_entry}: $pageThisEntry{$ULB_entry}"
|
|
}
|
|
my @tempArray = split /, /, $thisList;
|
|
foreach my $slice (@tempArray) {
|
|
$sourcePage{$slice} = $shortFile;
|
|
say LOG "$slice: \$sourcePage{$slice}: $sourcePage{$slice}";
|
|
}
|
|
}
|
|
if (/Strong's(.*)$/) {
|
|
my $SNs = $1;
|
|
while ($SNs =~ s/[G](\d*)//) {
|
|
push @relevantSNs, $1;
|
|
$entriesThisSN{$1} .= "$thisList, ";
|
|
$pagesThisSN{$1} .= "$shortFile, ";
|
|
}
|
|
}
|
|
@relevantSNs = uniq(@relevantSNs);
|
|
foreach (@relevantSNs) {
|
|
$relevantSNs{$_} = "$_";
|
|
}
|
|
}
|
|
close IN;
|
|
}
|
|
say "";
|
|
#say LOG "====";
|
|
#say LOG "====";
|
|
foreach my $thisSN (sort keys %entriesThisSN) {
|
|
say LOG "tW entries for $thisSN: $entriesThisSN{$thisSN}"
|
|
}
|
|
}
|
|
|
|
sub LinkULBtoCV {
|
|
say "Linking ULB to chapter and verse";
|
|
say LOG "LinkULBtoCV, \$ULBfile: $ULBfile";
|
|
open IN, "$ULBfile" or die "$ULBfile: $!";
|
|
while (<IN>) {
|
|
if (/^([^\t]*)\t(.*)$/) {
|
|
$cv ++;
|
|
($text{$1}, $fullText{$1}) = ($2, $2);
|
|
$ref{$cv} = $1;
|
|
$order{$1} = $cv;
|
|
}
|
|
#say "First \$ref{$cv}: $ref{$cv}\nGetULBBooksToProcess\$text{$ref{$cv}}: $text{$ref{$cv}}";
|
|
}
|
|
close IN;
|
|
#foreach my $key (sort keys %ref) {
|
|
# say LOG "\$key: $key:, \$ref{$key}: $ref{$key}, \$text{$ref{$key}}: $text{$ref{$key}}"
|
|
#}
|
|
}
|
|
|
|
sub ReadLinkedSNs {
|
|
say "Reading linked Strong's numbers";
|
|
my ($flag, $MAST_NTfile) = ("","");
|
|
foreach $MAST_NTfile (@MAST_NTfileList) {
|
|
say LOG "opening \$MAST_NTfile: $MAST_NTfile";
|
|
open IN, "$MAST_NTfile" or die "$MAST_NTfile can't be opened\n\n";
|
|
my ($thisBook, $thisChap, $thisVers, $thisRef);
|
|
my (@pages);
|
|
while (<IN>) {
|
|
chomp;
|
|
if (/<verse osisID="([^\.]*).(\d+).(\d+)">/) {
|
|
my ($bk, $ch, $vs) = ($1, $2, $3);
|
|
#say LOG "#>\t$bk $ch:$vs, \$bkFull{$bk}: $bkFull{$bk}";
|
|
if (exists $newRef{"$bk $ch:$vs"}) {
|
|
$thisRef = $newRef{"$bk $ch:$vs"}
|
|
} else {
|
|
($thisRef) = ("$bkFull{$bk} $ch:$vs");
|
|
}
|
|
#say LOG "##\t$bk $ch:$vs, $thisRef";
|
|
}
|
|
else {
|
|
while (/<w lemma="(\d+)"/g) {
|
|
#say LOG $_;
|
|
my ($thisNum) = ($1);
|
|
#say LOG "\t\$thisNum: $thisNum, \$SNsInCV{$thisRef}: $SNsInCV{$thisRef}";
|
|
if (exists $relevantSNs{$thisNum}) {
|
|
$SNsInCV{$thisRef} .= "$thisNum√" unless ($SNsInCV{$thisRef} =~ /\b$thisNum\b/);
|
|
}
|
|
#say LOG ">\t\$thisNum: $thisNum, \$SNsInCV{$thisRef}: $SNsInCV{$thisRef}";
|
|
}
|
|
}
|
|
}
|
|
close IN;
|
|
}
|
|
my %temp;
|
|
foreach my $oldRef (sort keys %SNsInCV) {
|
|
if (exists $newRef{$oldRef}) {
|
|
$temp{$newRef{$oldRef}} = $SNsInCV{$oldRef};
|
|
delete $SNsInCV{$oldRef};
|
|
}
|
|
}
|
|
foreach my $changedRef (sort keys %temp) {
|
|
$SNsInCV{$changedRef} = $temp{$changedRef};
|
|
say LOG "\$SNsInCV{$changedRef}: $SNsInCV{$changedRef}";
|
|
}
|
|
}
|
|
|
|
sub LinkSNsToULBtextViaEntries {
|
|
say "Linking Strong's numbers to ULB text via tW page entries";
|
|
say LOG "sub LinkSNsToULBtextViaEntries called";
|
|
foreach my $thisRef (sort keys %ref) {
|
|
say LOG "\nLinkSNsToULBtextViaEntries: \$thisRef: $thisRef\t\$ref{$thisRef}: $ref{$thisRef}, \$specifiedText{$ref{$thisRef}}: $specifiedText{$ref{$thisRef}}";
|
|
(%workEntries, %ulbOrder) = ();
|
|
my %workPage;
|
|
my ($thisCV, $checkList, $tempString, $outString) = ($ref{$thisRef}, "", "", "");
|
|
say OUT "$thisCV:";
|
|
my (@allEntries);
|
|
$listOfPages{$thisCV} = "";
|
|
#say LOG "\$SNsInCV{$thisCV}: |$SNsInCV{$thisCV}|\n$text{$thisCV}";
|
|
|
|
say LOG "\t\$SNsInCV{$thisCV}: >$SNsInCV{$thisCV}<";
|
|
$SNsInCV{$thisCV} =~ s/√+$//;
|
|
$SNsInCV{$thisCV} =~ s/^ +//;
|
|
$SNsInCV{$thisCV} =~ s/ +$//;
|
|
$SNsInCV{$thisCV} =~ s/ {2,}/ /g;
|
|
#say LOG "*\t\$SNsInCV{$thisCV}: >$SNsInCV{$thisCV}<";
|
|
if (exists $specifiedText{$thisCV}) {
|
|
#say LOG "*\t\$SNsInCV{$thisCV}: $SNsInCV{$thisCV}";
|
|
|
|
$SNsInCV{$thisCV} = Adjust($SNsInCV{$thisCV}, $thisCV);
|
|
|
|
$SNsInCV{$thisCV} =~ s/^ +(.*)/$1/;
|
|
$SNsInCV{$thisCV} =~ s/(.*) +$/$1/;
|
|
$SNsInCV{$thisCV} =~ s/ {2,}/ /g;
|
|
$SNsInCV{$thisCV} =~ s/^√+//;
|
|
$SNsInCV{$thisCV} =~ s/√{2,}/√/g;
|
|
$SNsInCV{$thisCV} =~ s/√ /√/g;
|
|
$SNsInCV{$thisCV} =~ s/√$//;
|
|
say LOG "**\t\$SNsInCV{$thisCV}: >$SNsInCV{$thisCV}<";
|
|
}
|
|
#say LOG "<>\t<>\t\$SNsInCV{$thisCV}: |$SNsInCV{$thisCV}|";
|
|
say LOG "*7*\t\$SNsInCV{$thisCV}:$SNsInCV{$thisCV}";
|
|
# while ($SNsInCV{$thisCV} =~ s/(\[[^\]]*?) ([^\]]*?\] )/$1√$2/) {};
|
|
say LOG "*8*\t\$SNsInCV{$thisCV}:$SNsInCV{$thisCV}";
|
|
my @tempArray = split /√/, $SNsInCV{$thisCV};
|
|
my %alreadyUsed;
|
|
my @regArray;
|
|
foreach my $slice (@tempArray) {
|
|
if ($slice =~ /^(\d*)/) {
|
|
my $number = "$1";
|
|
push (@regArray, $slice) unless (exists $alreadyUsed{$number});
|
|
$alreadyUsed{$number} = $number
|
|
}
|
|
}
|
|
$" = "|\n";
|
|
say LOG "*9*\t\$SNsInCV{$thisCV}:$SNsInCV{$thisCV}\n\@regArray: >@regArray<";
|
|
foreach my $thisNum (@regArray) {
|
|
say LOG "\$thisNum: $thisNum";
|
|
my ($found, $specPage);
|
|
if ($thisNum =~ /(\d+)\[(.*?)\]/) {
|
|
($thisNum) = ($1);
|
|
my ($forced_entry_for_display, $forced_entry_for_page, $forced_entry_for_search) = ($2, $2, $2);
|
|
unless (exists $pageThisEntry{$forced_entry_for_page}) {
|
|
my $try = lc $forced_entry_for_page;
|
|
if (exists $pageThisEntry{$try}) {
|
|
$forced_entry_for_page = lc $forced_entry_for_page
|
|
}
|
|
else {
|
|
say "\$thisNum: $thisNum\t\$forced_entry_for_page: $forced_entry_for_page";
|
|
die
|
|
}
|
|
}
|
|
say LOG
|
|
"*0* \$forced_entry_for_display: $forced_entry_for_display, \$pageThisEntry{$forced_entry_for_page}: $pageThisEntry{$forced_entry_for_page}";
|
|
#while ($forced_entry_for_display =~ s/([^ \]]*?)√([^ \]]*?)/$1 $2/) {}
|
|
$forced_entry_for_search = lc $forced_entry_for_display;
|
|
#while ($forced_entry_for_search =~ s/ ... /\.\*\?/) {}
|
|
say LOG
|
|
"*A* \$thisNum: >>$thisNum<< \$forced_entry_for_display: >$forced_entry_for_display<,
|
|
\$forced_entry_for_search: >>$forced_entry_for_search<<
|
|
\$pageThisEntry{$forced_entry_for_page}: $pageThisEntry{$forced_entry_for_page}";
|
|
$outString .= "[$forced_entry_for_display]($pageThisEntry{$forced_entry_for_page})\n";
|
|
say LOG "\t\t$outString:\n$outString";
|
|
if ($forced_entry_for_search =~ /([^,]*) \.\.\. ([^,]*) \.\.\. ([^,]*)/) {
|
|
my ($first, $second, $third) = ($1, $2, $3);
|
|
$text{$thisCV} =~ s/$first(.*?)$second(.*?)$third/$1 $2/i;
|
|
say LOG "\t*\t$text{$thisCV}";
|
|
} elsif ($forced_entry_for_search =~ /([^,]*) \.\.\. ([^,]*)/) {
|
|
my ($first, $second) = ($1, $2);
|
|
$text{$thisCV} =~ s/$first(.*?)$second/$1/i;
|
|
say LOG "\t**\t$text{$thisCV}";
|
|
} else {
|
|
#say "\$text{$thisCV}:\n$text{$thisCV}\n\$forced_entry_for_search: |$forced_entry_for_search|";
|
|
$text{$thisCV} =~ s/$forced_entry_for_search//i;
|
|
say LOG "\t***\t$text{$thisCV}";
|
|
}
|
|
next;
|
|
} elsif ($thisNum =~ /\d+(\(([\w\/]+)\))/) {
|
|
$specPage .= $2;
|
|
say LOG "*B*\t\$specPage: $specPage";
|
|
} else {
|
|
say LOG "*C*\t\$thisNum: $thisNum";
|
|
}
|
|
say LOG "\t\$specPage: $specPage";
|
|
if ($specPage) {
|
|
$workEntries{$thisNum} = $entriesThisPage{$specPage};
|
|
} else {
|
|
$workEntries{$thisNum} = $entriesThisSN{$thisNum};
|
|
}
|
|
$workEntries{$thisNum} =~ s/, $//;
|
|
say LOG "*D*\t\$thisNum: >$thisNum<\t\$workEntries{$thisNum}: >$workEntries{$thisNum}<";
|
|
my @beforeArray = split /, /, $workEntries{$thisNum};
|
|
my @sortedArray = reverse sort { substr($a,0,1) <=> substr($b,0,1)
|
|
|| length($a) <=> length($b)
|
|
|| $a <=> $b }
|
|
@beforeArray;
|
|
$" = "\n\t";
|
|
say LOG "*E*\t\@sortedArray: @sortedArray\n\$outString: $outString\n\$text{$ref{$thisRef}}: $text{$ref{$thisRef}}";
|
|
foreach my $entry (@sortedArray) {
|
|
my $testEntry = $entry;
|
|
#print LOG "\$entry: $entry. Becomes ";
|
|
while ($testEntry =~ s/^(.*) \.\.\. (.*)/($1)\\b(.*?)\\b($2)/) {}
|
|
#print LOG "\$testEntry: |$testEntry| ";
|
|
if ($testEntry =~ /\(\.\*\?\)/ && $text{$thisCV} =~ s/\b($testEntry)\b/$3/i) {
|
|
say LOG "\n===\n$thisNum |$testEntry| is found in first test.\n===";
|
|
$outString .= "[$entry]($pageThisEntry{$entry})\n";
|
|
say LOG $outString . "\n===" . $text{$thisCV};
|
|
$found = 1;
|
|
goto Breakout;
|
|
} elsif ($text{$thisCV} =~ s/\b($testEntry)[^\w']//i) {
|
|
say LOG "\n===\n$thisNum |$testEntry| is found in second test.\n===";
|
|
$outString .= "[$entry]($pageThisEntry{$entry})\n";
|
|
say LOG $outString . "\n" . $text{$thisCV};
|
|
$found = 1;
|
|
goto Breakout;
|
|
} else {
|
|
#say LOG "and is not found in\n$text{$ref{$thisRef}}";
|
|
}
|
|
}
|
|
Breakout:
|
|
unless ($found) {
|
|
say MISSING "$thisCV $thisNum";
|
|
say LOG "\$thisCV: $thisCV\t\$thisNum: $thisNum"
|
|
}
|
|
next if $found;
|
|
}
|
|
say LOG "*F*\t\$outString: $outString";
|
|
$outString = ProperOrderOutString($outString, $thisCV);
|
|
say LOG "Final \$outString:\n\$outString: $outString";
|
|
say OUT "$outString";
|
|
#say LOG "sub LinkSNsToULBtextViaEntries finished";
|
|
}
|
|
}
|
|
|
|
sub Adjust {
|
|
my ($snsOld, $ref, $snsNew, $addToSnsNew) = ($_[0], $_[1], "", "");
|
|
say LOG "\$specifiedText{$ref}: $specifiedText{$ref}\n\$snsOld: |$snsOld|";
|
|
my (%tempEntries);
|
|
#say LOG ">\t\$sns: |$sns|";
|
|
#say LOG ">\t\$specifiedText{$ref}: |$specifiedText{$ref}|";
|
|
$snsOld =~ s/^ +/ /;
|
|
$snsNew =~ s/√$//;
|
|
my @oldArray = split / /, $snsOld;
|
|
$adjust{$ref} =~ s/√$//;
|
|
say LOG "*0*\t\$adjust{$ref}: $adjust{$ref}";
|
|
my @preadjustments = split /√/, $adjust{$ref};
|
|
foreach my $adjustment (@preadjustments) {
|
|
say LOG "<><>\t\$adjustment: >$adjustment<";
|
|
if ($adjustment =~ /([^\t]*)\t\|\|$/) { # delete this from list to look for
|
|
my $found = $1;
|
|
$snsOld =~ s/\b$found\b ?//;
|
|
say LOG "*1*\t\$found: $found should be deleted from \$snsOld: $snsOld";
|
|
} elsif ($adjustment =~ /^(\d+)\t(\d+)$/) { # change to specified number
|
|
my ($found1, $found2) = ($1, $2);
|
|
#$addToSnsNew .= "$1\[$2\] ";
|
|
$snsOld =~ s/$found1√//g;
|
|
$snsNew .= "$found2√" unless $snsNew =~ /\b$found2\b /;
|
|
say LOG "*2*\t\$snsOld: $snsOld\n\$snsNew: $snsNew";
|
|
} elsif ($adjustment =~ /^\|\|\t(.*)/) { # add this to list to look for
|
|
my $adj = $1;
|
|
say LOG "*3*\t\$adjustment: $adjustment, \$adj: $adj";
|
|
if ($adj =~ /([^\t]*)\t([^\t]*)/) {
|
|
$snsNew = "[$1]($pageThisEntry{$2})"
|
|
} else {
|
|
$snsNew .= "$adj "
|
|
}
|
|
} elsif ($adjustment =~ /^(\d+)\t([\w\. \-\']+)$/) { # add specified word
|
|
my ($found1, $found2) = ($1, $2);
|
|
#$addToSnsNew .= "$1\[$2\] ";
|
|
if ($snsOld =~ s/\b$found1\b/$found1\[$found2\]/) {
|
|
say LOG "*\t4a\t\$snsOld: $snsOld";
|
|
if ($snsOld =~ s/^(.+√)($found1\[$found2\]√)/$2$1/) {}
|
|
elsif ($snsOld =~ s/^(.+√)($found1\[$found2\]$)/$2√$1/) {}
|
|
}
|
|
else {
|
|
$snsNew .= "${found1}√";
|
|
$snsOld .= s/\b$found1\b//;
|
|
say LOG "*\t4b\t\$snsNew: $snsNew";
|
|
$snsNew =~ s/\b$found1\b/$found1\[$found2\]/;
|
|
$snsNew =~ s/^(.+√)($found1\[$found2\]√)/$2$1/;
|
|
$snsNew =~ s/^(.+√)($found1\[$found2\]$)/$2√$1/;
|
|
}
|
|
$snsOld =~ s/ {2,}/ /;
|
|
$snsOld =~ s/√$//;
|
|
say LOG "*4*\t\$snsOld: $snsOld\n\$snsNew: $snsNew";
|
|
} elsif (($adjustment =~ /^(\d+)\t([\/\d\w]+)$/)) { # add specified page
|
|
#$addToSnsNew .= "$1\{$2\} "
|
|
my ($found1, $found2) = ($1, $2);
|
|
$snsOld =~ s/$found1/$found1\($found2\)/;
|
|
say LOG "*5*\t\t\$snsOld: $snsOld";
|
|
}
|
|
}
|
|
$snsOld =~ s/^√//;
|
|
$snsOld =~ s/√+/√/g;
|
|
say LOG "\$snsNew: >$snsNew<\n\$snsNew+\$snsOld: >$snsNew< >$snsOld<";
|
|
$snsNew = "$snsNew√$snsOld";
|
|
say LOG "*5*\t*\t\$snsNew: |$snsNew|";
|
|
while ($snsNew =~ s/(\d+) (\d+\[[^\[\]]*\])/$2$1/) {}
|
|
say LOG "*5*\t**\t\$snsNew: |$snsNew|";
|
|
$snsNew =~ s/√+/√/g;
|
|
$snsNew =~ s/^[ √]//;
|
|
say LOG "*6*\t\t\$snsNew: |$snsNew|";
|
|
return $snsNew;
|
|
}
|
|
|
|
sub Output {
|
|
say "Outputting";
|
|
#say LOG "Output subRoutine called";
|
|
foreach my $key (sort keys %ref) {
|
|
my %donePages;
|
|
my $thisRef = $ref{$key};
|
|
#print LOG "\$key: $key\t\$thisRef: $thisRef\t";
|
|
#say LOG "\$SNsInCV{$thisRef}: |$SNsInCV{$thisRef}|";
|
|
$SNsInCV{$thisRef} =~ s/^ +//;
|
|
$SNsInCV{$thisRef} =~ s/ +$//;
|
|
$SNsInCV{$thisRef} =~ s/ {2,}/ /;
|
|
#say LOG "\$SNsInCV{$thisRef}: |$SNsInCV{$thisRef}|";
|
|
#say LOG "\$listOfPages{$thisRef}: |$listOfPages{$thisRef}|";
|
|
#$listOfPages{$thisRef} =~ s/^ +//;
|
|
#$listOfPages{$thisRef} =~ s/ +$//;
|
|
#$listOfPages{$thisRef} =~ s/ {2,}/ /;
|
|
#say LOG "\$listOfPages{$thisRef}: |$listOfPages{$thisRef}|";
|
|
my @array = split /\n/, $listOfPages{$thisRef};
|
|
#say LOG "\@array: |@array|";
|
|
my @sorted =
|
|
sort sort { lc($a) cmp lc($b) }
|
|
@array;
|
|
#say LOG "\@sorted: |@sorted|";
|
|
$" = "\n";
|
|
$listOfPages{$thisRef} = "@sorted";
|
|
say LOG "\$listOfPages{$thisRef}: $listOfPages{$thisRef}\n\$checkPages{$thisRef}: $checkPages{$thisRef}";
|
|
#say OUT "$thisRef: $listOfPages{$thisRef}\n";
|
|
$checkPages{$thisRef} =~ s/^ +//;
|
|
$checkPages{$thisRef} =~ s/ +$//;
|
|
$checkPages{$thisRef} =~ s/ {2,}/ /;
|
|
$checkPages{$thisRef} =~ s/ \|\|//;
|
|
say LOG "\$checkPages{$thisRef}:\t|$checkPages{$thisRef}|";
|
|
my @checkArray = split / /, $checkPages{$thisRef};
|
|
shift @sorted;
|
|
#say LOG "\@checkArray: |@checkArray|";
|
|
#say LOG "\@sorted: |@sorted|";
|
|
#shift @sorted;
|
|
#say LOG "\@sorted: |@sorted|";
|
|
foreach my $slice (@sorted) {
|
|
#print LOG "\$slice: $slice\t";
|
|
$slice =~ s/\[.*?\]\((.*?)\)/$1/;
|
|
#say LOG "\t\$slice: $slice";
|
|
$donePages{$slice} = $slice;
|
|
#say LOG "\t\$donePages{$slice}: $donePages{$slice}"
|
|
}
|
|
#say LOG "\@checkArray: |@checkArray|";
|
|
foreach my $slice (@checkArray) {
|
|
#say LOG "\$slice: $slice";
|
|
unless (exists $donePages{$slice}) {
|
|
#say LOG "\$thisRef: $thisRef\t\$slice:$slice";
|
|
#say MISSING "$thisRef\t$slice\t||";
|
|
say MISSING "$thisRef\t$slice";
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
sub Substitute {
|
|
foreach my $key (sort keys %pages) {
|
|
say LOG "\$key: $key\t\$SNsInCV{$key}: $SNsInCV{$key}";
|
|
if (exists $substitutedPages{$key}) {
|
|
#say LOG "\$substitutedPages{$key}: $substitutedPages{$key}";
|
|
$substitutedPages{$key} =~ s/, $//;
|
|
my @array = split /, /, $substitutedPages{$key};
|
|
foreach my $slice (@array) {
|
|
#say LOG "\$slice: $slice";
|
|
if ($slice =~ /([^\t]*)\t([^\t]*)/) {
|
|
#say LOG "\n\$key: $key";
|
|
my ($old, $new) = ($1, $2);
|
|
#say LOG "\$old: >$old<\t\$new: >$new<";
|
|
#say LOG "\$SNsInCV{$key}: >>$SNsInCV{$key}<<";
|
|
if ($old eq "||") {
|
|
$SNsInCV{$key} .= "$new ";
|
|
$checkPages{$key} = $SNsInCV{$key};
|
|
}
|
|
elsif ($new eq "||") {
|
|
$SNsInCV{$key} =~ s/$old //;
|
|
$checkPages{$key} = $SNsInCV{$key};
|
|
}
|
|
else {
|
|
$SNsInCV{$key} =~ s/$old/$new/;
|
|
$checkPages{$key} = $SNsInCV{$key};
|
|
}
|
|
$SNsInCV{$key} =~ s/ \|\|//g;
|
|
#say LOG "\$SNsInCV{$key}: >>>$SNsInCV{$key}<<<";
|
|
}
|
|
}
|
|
} else {
|
|
$checkPages{$key} = $SNsInCV{$key};
|
|
}
|
|
say LOG "<>\t\$key: $key\t\$SNsInCV{$key}: $SNsInCV{$key}";
|
|
}
|
|
}
|
|
|
|
sub ProperOrderOutString {
|
|
my @unordered = split /\n/, $_[0];
|
|
my ($thisCV, $outS) = ($_[1], "");
|
|
my (%orderedSet);
|
|
foreach my $thisSet (@unordered) {
|
|
say LOG "\t>\t$thisSet";
|
|
if ($thisSet =~ /(\[([^\]]*)\])(\([^\)]*\))/) {
|
|
my ($ulb, $fileLoc) = ($2, $3);
|
|
$ulb =~ s/ \.\.\. /.*?/g;
|
|
say LOG "\t>>\t\$ulb: $ulb, \$fileLoc: $fileLoc, \$fullText{$thisCV}\n$fullText{$thisCV}";
|
|
if ($ulb =~ /^(.*)\.\.\.(.*)$/) {
|
|
my ($found1, $found2, $blank1, $blank2) = ($1, $2, "", "");
|
|
while (length $blank1 < length $found1) {$blank1 .= " "}
|
|
while (length $blank2 < length $found2) {$blank2 .= " "}
|
|
if ($fullText{$thisCV} =~ s/^(.*?)\b$found1\b(.*)\b$found1\b(.*)$/$1$blank1$2$blank2$3/i) {
|
|
say LOG "\t>>>\t$fullText{$thisCV}";
|
|
my ($order) = (length $1);
|
|
$orderedSet{$order} = $thisSet;
|
|
say LOG "\t\t\t\$order: $order\t \$orderedSet{$order}: $orderedSet{$order}"
|
|
}
|
|
} else {
|
|
my $blank = "";
|
|
while (length $blank < length $ulb) {$blank .= " "}
|
|
if ($fullText{$thisCV} =~ s/^(.*?)\b$ulb\b(.*)$/$1$blank$2/i) {
|
|
say LOG "\t>>>>\t$fullText{$thisCV}";
|
|
my ($order) = (length $1);
|
|
$orderedSet{$order} = $thisSet;
|
|
say LOG "\$order: $order\t \$orderedSet{$order}: $orderedSet{$order}"
|
|
}
|
|
}
|
|
}
|
|
}
|
|
foreach my $key (sort {$a<=>$b} keys %orderedSet) {
|
|
$outS .= "$orderedSet{$key}\n"
|
|
}
|
|
return $outS;
|
|
}
|
|
|
|
__DATA__
|
|
mat Matthew
|
|
mrk Mark
|
|
luk Luke
|
|
jhn John
|
|
act Acts
|
|
rom Romans
|
|
1co 1 Corinthians
|
|
2co 2 Corinthians
|
|
gal Galatians
|
|
eph Ephesians
|
|
php Philippians
|
|
col Colossians
|
|
1th 1 Thessalonians
|
|
2th 2 Thessalonians
|
|
1ti 1 Timothy
|
|
2ti 2 Timothy
|
|
tit Titus
|
|
phm Philemon
|
|
heb Hebrews
|
|
jas James
|
|
1pe 1 Peter
|
|
2pe 2 Peter
|
|
1jn 1 John
|
|
2jn 2 John
|
|
3jn 3 John
|
|
jud Jude
|
|
rev Revelation
|