forked from hmw3/Tips_and_Hacks
681 lines
23 KiB
Perl
681 lines
23 KiB
Perl
# Produces list of tWs for each verse by linking MAST_HB to ULB through tWs.
|
|
|
|
# Taken from tWs.from.UGNT.7.pl, with changes needed because that used USFM
|
|
# and this uses XML
|
|
|
|
# The output from this script is useful for the interleaved PDFs used in MAST.
|
|
# This version uses an exception file to handle places where the MAST_HB points to
|
|
# a tW page different from that on which the ULB term appears.
|
|
|
|
# Make sure the correct input file is $ULBfile. Run script.
|
|
# Output is in $output file.
|
|
# Check the $not_handled
|
|
# file for needed corrections, probably lines needing to be added to the
|
|
# $exceptions file.
|
|
# Change from ver. 2 in that this uses converted MAST_HB files.
|
|
# Change from ver. 3 in that this uses the KJV verses in the XML file, not the remaps file
|
|
|
|
use 5.12.0;
|
|
use File::Slurp;
|
|
use File::Find ;
|
|
use Cwd ;
|
|
use FindBin '$Bin';
|
|
use utf8;
|
|
#use open IN => ":utf8", OUT => ":utf8";
|
|
use open IO => ":utf8";
|
|
use List::MoreUtils qw(uniq);
|
|
$|=1;
|
|
$"="\n";
|
|
|
|
my ($pwd, $d) = ($Bin, "/");
|
|
if ($^O eq "MSWin32") {
|
|
$d = "\\";
|
|
$pwd =~ s/\//\\/g;
|
|
}
|
|
|
|
my ($udf) = "User_defaults.windows.txt";
|
|
if ($^O eq "linux") {$udf = "User_defaults.linux.txt"}
|
|
#elsif ($^O eq "darwin") {$udf = "User_defaults.mac.txt"}
|
|
elsif ($^O eq "darwin") {$udf = "User_defaults.mac.txt"}
|
|
|
|
open (my $defaults, "<:utf8", "$pwd${d}User${d}$udf") or die "$pwd${d}User${d}$udf:\n$!";
|
|
|
|
my ($cv, $ULBfile, $exceptions, $not_handled, $exc_log, $tW_log, $log, $output, $workFile) =
|
|
("00000",
|
|
"$Bin${d}Temp${d}Extract.txt",
|
|
"$Bin${d}Exceptions${d}Exceptions_tWs_from_MAST_HB.txt",
|
|
"$Bin${d}Output${d}Entries_not_handled.txt",
|
|
"Logs${d}Exc_log.log",
|
|
"Logs${d}tW_pairs.log",
|
|
"Logs${d}tWs_from_MAST.log",
|
|
"Output${d}tWs_for_PDF.txt",
|
|
"User${d}tW_work_OT.txt",
|
|
);
|
|
|
|
my ($MAST_HBfile, $topTwDir, $textEditor, $repoPath, $topSourceLangDir);
|
|
|
|
my (%entries, %text, %ref, %order, %pages, %listOfPages, %sourcePage, %checkPages, %foundPages, %substitutedPages,
|
|
%specifiedText, %realPage, %checkPage, %workEntries, %vsn, %StrongNum, %fullText, %ulbOrder, %bkAbr, %bkFull, %relevantSNs,
|
|
%SNsInCV, %entriesThisSN, %entriesThisPage, %pagesThisSN, %pageThisEntry, %adjust, %newRef);
|
|
|
|
my $book;
|
|
my (@MAST_HBfileList);
|
|
|
|
# ==============================
|
|
|
|
chdir("$pwd");
|
|
open LOG, ">:utf8", "$exc_log" or die "\$log: $exc_log: $!";
|
|
open OUT, ">:utf8", $output or die "$!";
|
|
open MISSING, ">$not_handled" or die "$!";
|
|
|
|
while (<DATA>) {
|
|
chomp;
|
|
if (/([^\t]*)\t([^\t]*)/) {
|
|
$bkAbr{$2} = $1;
|
|
$bkFull{$1} = $2;
|
|
say LOG "\$bkAbr{$2}: $bkAbr{$2}, \$bkFull{$1}: $bkFull{$1}";
|
|
}
|
|
}
|
|
GetUserDefaults();
|
|
GetULBBooksToProcess();
|
|
ReadExceptions();
|
|
close LOG;
|
|
open LOG, ">:utf8", "$tW_log" or die "\$tW_log: $tW_log: $!";
|
|
PairtWEntriesTotWPageAndUniqSNs();
|
|
close LOG;
|
|
open LOG, ">:utf8", "$log" or die "\$log: $log: $!";
|
|
ReadLinkedSNs();
|
|
LinkULBtoCV();
|
|
LinkSNsToULBtextViaEntries();
|
|
#Output();
|
|
|
|
close MISSING;
|
|
close OUT;
|
|
close LOG;
|
|
|
|
if ($^O eq "darwin") {system ("$textEditor $not_handled")}
|
|
|
|
print "\n\tDone.\n\n";
|
|
|
|
# ==============================
|
|
|
|
sub GetUserDefaults {
|
|
open (my $defaults, "<:utf8", "User${d}$udf") or die "User${d}$udf:\n$!";
|
|
|
|
while (my $thisLine = <$defaults>) {
|
|
chomp $thisLine;
|
|
if ($thisLine =~ /^Text editor: (.*)$/) {
|
|
$textEditor = $1;
|
|
if ($^O eq "darwin") {
|
|
$textEditor = "open -a $textEditor"
|
|
}
|
|
} elsif ($thisLine =~ /^Repository directory: (.*)$/) {
|
|
$repoPath = $1;
|
|
#say $repoPath; die;
|
|
}
|
|
}
|
|
|
|
say LOG "\$textEditor: $textEditor\n\$repoPath: $repoPath";
|
|
die "No text editor found" if $textEditor eq "";
|
|
die "No path to repo found" if $repoPath eq "";
|
|
|
|
($topTwDir, $topSourceLangDir) = ("$repoPath${d}en_tw${d}bible", "$repoPath${d}MAST_HB");
|
|
|
|
close $defaults;
|
|
}
|
|
|
|
sub GetULBBooksToProcess {
|
|
say LOG "GetULBBooksToProcess on \$workFile: $workFile";
|
|
open (my $file, "<:utf8", "$workFile") or die "$workFile:\n$!";
|
|
|
|
while (my $line = <$file>) {
|
|
chomp $line;
|
|
#say LOG $line;
|
|
if ($line =~ /^([^#][^\t]*)\t[^\t]*\t([^\t]*)\t[^\t]*$/) {
|
|
my ($this_bk) = $1 . "-" . uc $2;
|
|
$MAST_HBfile = "$topSourceLangDir${d}$this_bk.xml";
|
|
push @MAST_HBfileList, $MAST_HBfile;
|
|
}
|
|
}
|
|
close $file;
|
|
say LOG "\@MAST_HBfileList: @MAST_HBfileList"
|
|
}
|
|
|
|
sub ReadExceptions {
|
|
say "Reading exceptions";
|
|
say LOG "ReadExceptions from \$exceptions: $exceptions";
|
|
open (my $file, "<:utf8", "$exceptions") or die "$exceptions:\n$!";
|
|
|
|
while (my $line = <$file>) {
|
|
chomp $line;
|
|
if ($line =~ /^([^#\n][^\t\n]*)\t([^\t\n]*\t[^\t\n]*)/) {
|
|
my ($rf, $oldNew) = ($1, $2);
|
|
say LOG "\$line: $line, \$rf: $rf, \$oldNew: $oldNew";
|
|
($adjust{$rf}) .= "$oldNew, ";
|
|
$specifiedText{$rf} = 1;
|
|
}
|
|
}
|
|
foreach my $key (sort keys %adjust) {
|
|
say LOG ">0>\t\$key: $key: \$adjust{$key}: $adjust{$key}"
|
|
}
|
|
close $file;
|
|
}
|
|
|
|
sub PairtWEntriesTotWPageAndUniqSNs {
|
|
say "Pairing tW entries with tW pages and unique Strong's numbers";
|
|
my (@filesToRun, @relevantSNs) = ();
|
|
my $filePattern = '*.md' ;
|
|
find (sub {push @filesToRun, $File::Find::name if (m/^(.*)$filePattern$/)}, $topTwDir) ;
|
|
@filesToRun = sort @filesToRun;
|
|
#say LOG "\@filesToRun: @filesToRun";
|
|
foreach my $file (@filesToRun) {
|
|
print ".";
|
|
$file =~ s/\//\\/g unless $^O eq "linux" || $^O eq "darwin";
|
|
my ($thisList, $shortFile) = ("", $file);
|
|
$shortFile =~ s/^\Q$topTwDir${d}\E//;
|
|
$shortFile =~ s/\.md$//;
|
|
$shortFile =~ s/\Q$d\E/,/;
|
|
#say "|$shortFile|"; die;
|
|
#if ($shortFile =~ /^(kt|names)/) {
|
|
#my $fileText = read_file("$file", binmode => 'utf8');
|
|
open IN, $file or die "$!";
|
|
while (<IN>) {
|
|
if (/^# ([^\n]*)$/) {
|
|
$thisList = $1;
|
|
$thisList =~ s/[\r\n]*$//;
|
|
#say LOG "\$thisList = |$thisList|";
|
|
$thisList =~ s/ \([^\)]*\)//g;
|
|
$entriesThisPage{$shortFile} = $thisList;
|
|
my @ULBEntries = split /, /, $thisList;
|
|
foreach my $ULB_entry (@ULBEntries) {
|
|
$pageThisEntry{$ULB_entry} = $shortFile;
|
|
}
|
|
my @tempArray = split /, /, $thisList;
|
|
foreach my $slice (@tempArray) {
|
|
$sourcePage{$slice} = $shortFile;
|
|
#say LOG "$slice: $sourcePage{$slice}";
|
|
}
|
|
}
|
|
if (/Strong's(.*)$/) {
|
|
my $SNs = $1;
|
|
while ($SNs =~ s/[H](\d*)//) {
|
|
push @relevantSNs, $1;
|
|
$entriesThisSN{$1} .= "$thisList, ";
|
|
$pagesThisSN{$1} .= "$shortFile, ";
|
|
}
|
|
}
|
|
@relevantSNs = uniq(@relevantSNs);
|
|
foreach (@relevantSNs) {
|
|
$relevantSNs{$_} = "$_";
|
|
#say LOG $relevantSNs{$_}
|
|
}
|
|
}
|
|
close IN;
|
|
}
|
|
say "";
|
|
#say LOG "====";
|
|
#say LOG "====";
|
|
foreach my $thisSN (sort keys %entriesThisSN) {
|
|
say LOG "tW entries for $thisSN: $entriesThisSN{$thisSN}"
|
|
}
|
|
}
|
|
|
|
sub LinkULBtoCV {
|
|
say "Linking ULB to chapter and verse";
|
|
say LOG "LinkULBtoCV, \$ULBfile: $ULBfile";
|
|
open IN, "$ULBfile" or die "$ULBfile: $!";
|
|
while (<IN>) {
|
|
if (/^([^\t]*)\t(.*)$/) {
|
|
$cv ++;
|
|
($text{$1}, $fullText{$1}) = ($2, $2);
|
|
$ref{$cv} = $1;
|
|
$order{$1} = $cv;
|
|
}
|
|
#say LOG "First \$ref{$cv}: $ref{$cv}\n\$text{$ref{$cv}}: $text{$ref{$cv}}";
|
|
}
|
|
close IN;
|
|
#foreach my $key (sort keys %ref) {
|
|
#say LOG "\$key: $key:, \$ref{$key}: $ref{$key}, \$text{$ref{$key}}: $text{$ref{$key}}"
|
|
#}
|
|
}
|
|
|
|
sub ReadLinkedSNs {
|
|
say "Reading linked Strong's numbers\n\@MAST_HBfileList: @MAST_HBfileList";
|
|
my ($flag, $MAST_HBfile) = ("","");
|
|
foreach $MAST_HBfile (@MAST_HBfileList) {
|
|
say LOG "opening \$MAST_HBfile: $MAST_HBfile";
|
|
open IN, "$MAST_HBfile" or die "$MAST_HBfile can't be opened\n\n";
|
|
my ($thisBook, $thisChap, $thisVers, $thisRef);
|
|
my (@pages);
|
|
while (<IN>) {
|
|
chomp;
|
|
if (/<verse osisID="([^\.]*).(\d+).(\d+)">/) {
|
|
my ($bk, $ch, $vs) = ($1, $2, $3);
|
|
($thisRef) = ("$bkFull{$bk} $ch:$vs");
|
|
say LOG "##\t\$bk \$ch:\$vs: $bk $ch:$vs, \$bkFull{$bk}: $bkFull{$bk}, \$thisRef: $thisRef";
|
|
}
|
|
elsif (/<note>KJV:([^\.]*).(\d+).(\d+)<\/note>/) {
|
|
my ($bk, $ch, $vs) = ($1, $2, $3);
|
|
($thisRef) = ("$bkFull{$bk} $ch:$vs");
|
|
say LOG "###\t\$bk \$ch:\$vs: $bk $ch:$vs, \$bkFull{$bk}: $bkFull{$bk}, \$thisRef: $thisRef";
|
|
}
|
|
else {
|
|
while (/<w lemma="(\w\/)?(\d+)( \w)?"/g) {
|
|
#say LOG $_;
|
|
my ($thisNum) = ($2);
|
|
say LOG "\t\$thisNum: $thisNum, \$SNsInCV{$thisRef}: $SNsInCV{$thisRef}";
|
|
if (exists $relevantSNs{$thisNum}) {
|
|
say LOG "\t\t\$relevantSNs{$thisNum}: $relevantSNs{$thisNum}";
|
|
$SNsInCV{$thisRef} .= "$thisNum√" unless ($SNsInCV{$thisRef} =~ /\b$thisNum\b/);
|
|
}
|
|
#say LOG ">\t\$thisNum: $thisNum, \$SNsInCV{$thisRef}: $SNsInCV{$thisRef}";
|
|
}
|
|
}
|
|
}
|
|
close IN;
|
|
}
|
|
my %temp;
|
|
foreach my $oldRef (sort keys %SNsInCV) {
|
|
if (exists $newRef{$oldRef}) {
|
|
$temp{$newRef{$oldRef}} = $SNsInCV{$oldRef};
|
|
delete $SNsInCV{$oldRef};
|
|
}
|
|
}
|
|
foreach my $changedRef (sort keys %temp) {
|
|
$SNsInCV{$changedRef} = $temp{$changedRef};
|
|
say LOG "\$SNsInCV{$changedRef}: $SNsInCV{$changedRef}";
|
|
}
|
|
}
|
|
|
|
sub LinkSNsToULBtextViaEntries {
|
|
say "Linking Strong's numbers to ULB text via tW page entries";
|
|
say LOG "sub LinkSNsToULBtextViaEntries called";
|
|
foreach my $thisRef (sort keys %ref) {
|
|
say LOG "\nLinkSNsToULBtextViaEntries: \$thisRef: $thisRef\t\$ref{$thisRef}: $ref{$thisRef}, \$specifiedText{$ref{$thisRef}}: $specifiedText{$ref{$thisRef}}";
|
|
(%workEntries, %ulbOrder) = ();
|
|
my %workPage;
|
|
my ($thisCV, $checkList, $tempString, $outString) = ($ref{$thisRef}, "", "", "");
|
|
say OUT "$thisCV:";
|
|
my (@allEntries);
|
|
$listOfPages{$thisCV} = "";
|
|
#say LOG "\$SNsInCV{$thisCV}: |$SNsInCV{$thisCV}|\n$text{$thisCV}";
|
|
#say LOG "\t\$SNsInCV{$thisCV}: >$SNsInCV{$thisCV}<\n\$text{$thisCV}: $text{$thisCV}";
|
|
|
|
$SNsInCV{$thisCV} =~ s/√+$//;
|
|
$SNsInCV{$thisCV} =~ s/^ +//;
|
|
$SNsInCV{$thisCV} =~ s/ +$//;
|
|
$SNsInCV{$thisCV} =~ s/ {2,}/ /g;
|
|
say LOG "*\t\$SNsInCV{$thisCV}: >$SNsInCV{$thisCV}<";
|
|
if (exists $specifiedText{$thisCV}) {
|
|
#say LOG "*\t\$SNsInCV{$thisCV}: $SNsInCV{$thisCV}";
|
|
|
|
$SNsInCV{$thisCV} = Adjust($SNsInCV{$thisCV}, $thisCV);
|
|
|
|
$SNsInCV{$thisCV} =~ s/^ +(.*)/$1/;
|
|
$SNsInCV{$thisCV} =~ s/(.*) +$/$1/;
|
|
$SNsInCV{$thisCV} =~ s/ {2,}/ /g;
|
|
$SNsInCV{$thisCV} =~ s/^√+//;
|
|
$SNsInCV{$thisCV} =~ s/√{2,}/√/g;
|
|
$SNsInCV{$thisCV} =~ s/√ /√/g;
|
|
$SNsInCV{$thisCV} =~ s/√$//;
|
|
say LOG "**\t\$SNsInCV{$thisCV}: >$SNsInCV{$thisCV}<";
|
|
}
|
|
#say LOG "<>\t<>\t\$SNsInCV{$thisCV}: |$SNsInCV{$thisCV}|";
|
|
say LOG "*7*\t\$SNsInCV{$thisCV}:$SNsInCV{$thisCV}";
|
|
#while ($SNsInCV{$thisCV} =~ s/(\[[^\]]*?) ([^\]]*?\] )/$1√$2/) {};
|
|
say LOG "*8*\t\$SNsInCV{$thisCV}:$SNsInCV{$thisCV}";
|
|
my @tempArray = split /√/, $SNsInCV{$thisCV};
|
|
my %alreadyUsed;
|
|
my @regArray;
|
|
foreach my $slice (@tempArray) {
|
|
if ($slice =~ /^(\d*)/) {
|
|
my $number = "$1";
|
|
push (@regArray, $slice) unless (exists $alreadyUsed{$number});
|
|
$alreadyUsed{$number} = $number
|
|
}
|
|
}
|
|
$" = "|\n";
|
|
say LOG "*9*\t\$SNsInCV{$thisCV}:$SNsInCV{$thisCV}\n\@regArray: >@regArray<";
|
|
foreach my $thisNum (@regArray) {
|
|
say LOG "\$thisNum: $thisNum";
|
|
my ($found, $specPage);
|
|
if ($thisNum =~ /(\d+)\[(.*?)\]/) {
|
|
($thisNum) = ($1);
|
|
my ($forced_entry_for_display, $forced_entry_for_page, $forced_entry_for_search) = ($2, $2, $2);
|
|
unless (exists $pageThisEntry{$forced_entry_for_page}) {
|
|
my $try = lc $forced_entry_for_page;
|
|
if (exists $pageThisEntry{$try}) {
|
|
$forced_entry_for_page = lc $forced_entry_for_page
|
|
}
|
|
else {
|
|
say "!!!\tNo \$pageThisEntry{$try} found for $try\n\$thisNum: $thisNum\t\$forced_entry_for_page: $forced_entry_for_page\n$ref{$thisRef}\t$thisNum\t$forced_entry_for_page";
|
|
#die
|
|
}
|
|
}
|
|
say LOG
|
|
"*0* \$forced_entry_for_display: $forced_entry_for_display, \$pageThisEntry{$forced_entry_for_page}: $pageThisEntry{$forced_entry_for_page}";
|
|
#while ($forced_entry_for_display =~ s/([^ \]]*?)√([^ \]]*?)/$1 $2/) {}
|
|
$forced_entry_for_search = lc $forced_entry_for_display;
|
|
#while ($forced_entry_for_search =~ s/ ... /\.\*\?/) {}
|
|
say LOG
|
|
"*A* \$thisNum: >>$thisNum<< \$forced_entry_for_display: >$forced_entry_for_display<,
|
|
\$forced_entry_for_search: >>$forced_entry_for_search<<
|
|
\$pageThisEntry{$forced_entry_for_page}: $pageThisEntry{$forced_entry_for_page}";
|
|
$outString .= "[$forced_entry_for_display]($pageThisEntry{$forced_entry_for_page})\n";
|
|
say LOG "\t\t$outString:\n$outString";
|
|
if ($forced_entry_for_search =~ /([^,]*) \.\.\. ([^,]*) \.\.\. ([^,]*)/) {
|
|
my ($first, $second, $third) = ($1, $2, $3);
|
|
$text{$thisCV} =~ s/$first(.*?)$second(.*?)$third/$1 $2/i;
|
|
say LOG "\t*\t$text{$thisCV}";
|
|
} elsif ($forced_entry_for_search =~ /([^,]*) \.\.\. ([^,]*)/) {
|
|
my ($first, $second) = ($1, $2);
|
|
$text{$thisCV} =~ s/$first(.*?)$second/$1/i;
|
|
say LOG "\t**\t$text{$thisCV}";
|
|
} else {
|
|
#say "\$text{$thisCV}:\n$text{$thisCV}\n\$forced_entry_for_search: |$forced_entry_for_search|";
|
|
$text{$thisCV} =~ s/$forced_entry_for_search//i;
|
|
say LOG "\t***\t$text{$thisCV}";
|
|
}
|
|
next;
|
|
} elsif ($thisNum =~ /\d+(\(([\w\/]+)\))/) {
|
|
$specPage .= $2;
|
|
say LOG "*B*\t\$specPage: $specPage";
|
|
} else {
|
|
say LOG "*C*\t\$thisNum: $thisNum";
|
|
}
|
|
say LOG "\t\$specPage: $specPage";
|
|
if ($specPage) {
|
|
$workEntries{$thisNum} = $entriesThisPage{$specPage};
|
|
} else {
|
|
$workEntries{$thisNum} = $entriesThisSN{$thisNum};
|
|
}
|
|
$workEntries{$thisNum} =~ s/, $//;
|
|
say LOG "*D*\t\$thisNum: >$thisNum<\t\$workEntries{$thisNum}: >$workEntries{$thisNum}<";
|
|
my @beforeArray = split /, /, $workEntries{$thisNum};
|
|
my @sortedArray = reverse sort { substr($a,0,1) <=> substr($b,0,1)
|
|
|| length($a) <=> length($b)
|
|
|| $a <=> $b }
|
|
@beforeArray;
|
|
$" = "\n\t";
|
|
say LOG "*E*\t\@sortedArray: @sortedArray\n\$outString: $outString\n\$text{$ref{$thisRef}}: $text{$ref{$thisRef}}";
|
|
foreach my $entry (@sortedArray) {
|
|
my $testEntry = $entry;
|
|
#print LOG "\$entry: $entry. Becomes ";
|
|
while ($testEntry =~ s/^(.*) \.\.\. (.*)/($1)\\b(.*?)\\b($2)/) {}
|
|
#print LOG "\$testEntry: |$testEntry| ";
|
|
if ($testEntry =~ /\(\.\*\?\)/ && $text{$thisCV} =~ s/\b($testEntry)\b/$3/i) {
|
|
say LOG "\n===\n$thisNum |$testEntry| is found in first test.\n===";
|
|
$outString .= "[$entry]($pageThisEntry{$entry})\n";
|
|
say LOG $outString . "\n===" . $text{$thisCV};
|
|
$found = 1;
|
|
goto Breakout;
|
|
} elsif ($text{$thisCV} =~ s/\b($testEntry)[^\w']//i) {
|
|
say LOG "\n===\n$thisNum |$testEntry| is found in second test.\n===";
|
|
$outString .= "[$entry]($pageThisEntry{$entry})\n";
|
|
say LOG $outString . "\n" . $text{$thisCV};
|
|
$found = 1;
|
|
goto Breakout;
|
|
} elsif ($text{$thisCV} =~ s/\b($testEntry)\b//i) {
|
|
say LOG "\n===\n$thisNum |$testEntry| is found in third test.\n===";
|
|
$outString .= "[$entry]($pageThisEntry{$entry})\n";
|
|
say LOG $outString . "\n" . $text{$thisCV};
|
|
$found = 1;
|
|
goto Breakout;
|
|
} else {
|
|
say LOG "\$testEntry «$testEntry» is not found in\n$text{$thisCV}}";
|
|
}
|
|
}
|
|
Breakout:
|
|
unless ($found) {
|
|
say MISSING "$thisCV $thisNum";
|
|
say LOG "\$thisCV: $thisCV\t\$thisNum: $thisNum"
|
|
}
|
|
next if $found;
|
|
}
|
|
say LOG "\t\$outString:\n$outString";
|
|
$outString = ProperOrderOutString($outString, $thisCV);
|
|
say LOG "<>\t\$outString: $outString";
|
|
say OUT "$outString";
|
|
#say LOG "sub LinkSNsToULBtextViaEntries finished";
|
|
}
|
|
}
|
|
|
|
sub Adjust {
|
|
my ($snsOld, $ref, $snsNew, $addToSnsNew) = ($_[0], $_[1], "", "");
|
|
say LOG "\$specifiedText{$ref}: $specifiedText{$ref}\n\$snsOld: |$snsOld|";
|
|
my (%tempEntries);
|
|
#say LOG ">\t\$sns: |$sns|";
|
|
#say LOG ">\t\$specifiedText{$ref}: |$specifiedText{$ref}|";
|
|
$snsOld =~ s/^[√ ]+/ /;
|
|
my @oldArray = split / /, $snsOld;
|
|
#say LOG "\$adjust{$ref}: $adjust{$ref}";
|
|
my @preadjustments = split /, /, $adjust{$ref};
|
|
foreach my $adjustment (@preadjustments) {
|
|
say LOG "<><>\t\$adjustment: >$adjustment<";
|
|
if ($adjustment =~ /([^\t]*)\t\|\|$/) { # delete this from list to look for
|
|
my $found = $1;
|
|
$snsOld =~ s/\b$found\b ?//;
|
|
say LOG "*1*\t\$found: $found should be deleted from \$snsOld: $snsOld";
|
|
} elsif ($adjustment =~ /^(\d+)\t(\d+)$/) { # change to specified number
|
|
my ($found1, $found2) = ($1, $2);
|
|
#$addToSnsNew .= "$1\[$2\] ";
|
|
$snsOld =~ s/$found1√//g;
|
|
$snsNew .= "$found2√" unless $snsNew =~ /\b$found2\b /;
|
|
say LOG "*2*\t\$snsOld: $snsOld\n\$snsNew: $snsNew";
|
|
} elsif ($adjustment =~ /^\|\|\t(.*)/) { # add this to list to look for
|
|
my $adj = $1;
|
|
say LOG "\$adjustment: $adjustment, \$adj: $adj";
|
|
if ($adj =~ /([^\t]*)\t([^\t]*)/) {
|
|
$snsNew = "[$1]($pageThisEntry{$2})"
|
|
} else {
|
|
$snsNew .= "$adj "
|
|
}
|
|
say LOG "*4*\t\$snsOld: $snsOld\n\$snsNew: $snsNew";
|
|
} elsif ($adjustment =~ /^(\d+)\t([\w\. \-\']+)$/) { # add specified word
|
|
my ($found1, $found2) = ($1, $2);
|
|
#$addToSnsNew .= "$1\[$2\] ";
|
|
if ($snsOld =~ s/\b$found1\b/$found1\[$found2\]/) {}
|
|
else {$snsNew =~ s/\b$found1\b/$found1\[$found2\]/}
|
|
$snsOld =~ s/ {2,}/ /;
|
|
say LOG "*4*\t\$snsOld: $snsOld\n\$snsNew: $snsNew";
|
|
} elsif (($adjustment =~ /^(\d+)\t([\/\d\w]+)$/)) { # add specified page
|
|
#$addToSnsNew .= "$1\{$2\} "
|
|
my ($found1, $found2) = ($1, $2);
|
|
$snsOld =~ s/$found1/$found1\($found2\)/;
|
|
say LOG "*5*\t\$snsOld: $snsOld";
|
|
}
|
|
}
|
|
say LOG "*5A*\t\$snsNew: >$snsNew<\n\$snsOld+\$snsNew: >$snsOld< >$snsNew<";
|
|
$snsNew =~ s/ +$//;
|
|
#$snsNew = "$snsOld $snsNew";
|
|
$snsNew = "$snsOld√$snsNew";
|
|
while ($snsNew =~ s/(\d+) (\d+\[[^\[\]]*\])/$2 $1/) {}
|
|
say LOG "*5B*\t\$snsNew: $snsNew";
|
|
return $snsNew;
|
|
}
|
|
|
|
sub Output {
|
|
say "Outputting";
|
|
#say LOG "Output subRoutine called";
|
|
foreach my $key (sort keys %ref) {
|
|
my %donePages;
|
|
my $thisRef = $ref{$key};
|
|
#print LOG "\$key: $key\t\$thisRef: $thisRef\t";
|
|
#say LOG "\$SNsInCV{$thisRef}: |$SNsInCV{$thisRef}|";
|
|
$SNsInCV{$thisRef} =~ s/^ +//;
|
|
$SNsInCV{$thisRef} =~ s/ +$//;
|
|
$SNsInCV{$thisRef} =~ s/ {2,}/ /;
|
|
#say LOG "\$SNsInCV{$thisRef}: |$SNsInCV{$thisRef}|";
|
|
#say LOG "\$listOfPages{$thisRef}: |$listOfPages{$thisRef}|";
|
|
#$listOfPages{$thisRef} =~ s/^ +//;
|
|
#$listOfPages{$thisRef} =~ s/ +$//;
|
|
#$listOfPages{$thisRef} =~ s/ {2,}/ /;
|
|
#say LOG "\$listOfPages{$thisRef}: |$listOfPages{$thisRef}|";
|
|
my @array = split /\n/, $listOfPages{$thisRef};
|
|
#say LOG "\@array: |@array|";
|
|
my @sorted =
|
|
sort sort { lc($a) cmp lc($b) }
|
|
@array;
|
|
#say LOG "\@sorted: |@sorted|";
|
|
$" = "\n";
|
|
$listOfPages{$thisRef} = "@sorted";
|
|
say LOG "\$listOfPages{$thisRef}: $listOfPages{$thisRef}\n\$checkPages{$thisRef}: $checkPages{$thisRef}";
|
|
#say OUT "$thisRef: $listOfPages{$thisRef}\n";
|
|
$checkPages{$thisRef} =~ s/^ +//;
|
|
$checkPages{$thisRef} =~ s/ +$//;
|
|
$checkPages{$thisRef} =~ s/ {2,}/ /;
|
|
$checkPages{$thisRef} =~ s/ \|\|//;
|
|
say LOG "\$checkPages{$thisRef}:\t|$checkPages{$thisRef}|";
|
|
my @checkArray = split / /, $checkPages{$thisRef};
|
|
shift @sorted;
|
|
#say LOG "\@checkArray: |@checkArray|";
|
|
#say LOG "\@sorted: |@sorted|";
|
|
#shift @sorted;
|
|
#say LOG "\@sorted: |@sorted|";
|
|
foreach my $slice (@sorted) {
|
|
#print LOG "\$slice: $slice\t";
|
|
$slice =~ s/\[.*?\]\((.*?)\)/$1/;
|
|
#say LOG "\t\$slice: $slice";
|
|
$donePages{$slice} = $slice;
|
|
#say LOG "\t\$donePages{$slice}: $donePages{$slice}"
|
|
}
|
|
#say LOG "\@checkArray: |@checkArray|";
|
|
foreach my $slice (@checkArray) {
|
|
#say LOG "\$slice: $slice";
|
|
unless (exists $donePages{$slice}) {
|
|
say LOG "\$thisRef: $thisRef\t\$slice:$slice";
|
|
#say MISSING "$thisRef\t$slice\t||";
|
|
say MISSING "$thisRef\t$slice";
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
sub Substitute {
|
|
foreach my $key (sort keys %pages) {
|
|
say LOG "\$key: $key\t\$SNsInCV{$key}: $SNsInCV{$key}";
|
|
if (exists $substitutedPages{$key}) {
|
|
#say LOG "\$substitutedPages{$key}: $substitutedPages{$key}";
|
|
$substitutedPages{$key} =~ s/, $//;
|
|
my @array = split /, /, $substitutedPages{$key};
|
|
foreach my $slice (@array) {
|
|
#say LOG "\$slice: $slice";
|
|
if ($slice =~ /([^\t]*)\t([^\t]*)/) {
|
|
#say LOG "\n\$key: $key";
|
|
my ($old, $new) = ($1, $2);
|
|
#say LOG "\$old: >$old<\t\$new: >$new<";
|
|
#say LOG "\$SNsInCV{$key}: >>$SNsInCV{$key}<<";
|
|
if ($old eq "||") {
|
|
$SNsInCV{$key} .= "$new√";
|
|
$checkPages{$key} = $SNsInCV{$key};
|
|
}
|
|
elsif ($new eq "||") {
|
|
$SNsInCV{$key} =~ s/$old //;
|
|
$checkPages{$key} = $SNsInCV{$key};
|
|
}
|
|
else {
|
|
$SNsInCV{$key} =~ s/$old/$new/;
|
|
$checkPages{$key} = $SNsInCV{$key};
|
|
}
|
|
$SNsInCV{$key} =~ s/ \|\|//g;
|
|
#say LOG "\$SNsInCV{$key}: >>>$SNsInCV{$key}<<<";
|
|
}
|
|
}
|
|
} else {
|
|
$checkPages{$key} = $SNsInCV{$key};
|
|
}
|
|
say LOG "<>\t\$key: $key\t\$SNsInCV{$key}: $SNsInCV{$key}";
|
|
}
|
|
}
|
|
|
|
sub ProperOrderOutString {
|
|
say LOG "\$_[0]: $_[0]\n\$_[1]: |$_[1]|";
|
|
my @unordered = split /\n/, $_[0];
|
|
my ($thisCV, $outS) = ($_[1], "");
|
|
my (%orderedSet);
|
|
foreach my $thisSet (@unordered) {
|
|
say LOG "\t>A>\t\$thisSet: $thisSet";
|
|
if ($thisSet =~ /(\[([^\]]*)\])(\([^\)]*\))/) {
|
|
my ($ulb, $fileLoc) = ($2, $3);
|
|
$ulb =~ s/ \.\.\. /.*?/g;
|
|
say LOG "\t>B>\t\$ulb: $ulb, \$fileLoc: $fileLoc, \$fullText{$thisCV}\n$fullText{$thisCV}";
|
|
if ($ulb =~ /^(.*)\.\*\?(.*)$/) {
|
|
# if ($ulb =~ /^(.*)\.\.\.(.*)$/) {
|
|
say LOG "\t>C>\tThere is an ellipsis in \$ulb: $ulb.";
|
|
my ($found1, $found2, $blank1, $blank2) = ($1, $2, "", "");
|
|
while (length $blank1 < length $found1) {$blank1 .= " "}
|
|
while (length $blank2 < length $found2) {$blank2 .= " "}
|
|
#say LOG "\t\t\$found1: $found1\t\$found2: $found2";
|
|
say LOG "Looking for\ns/^(.*?)$found1(.*?)$found2(.*)\$\nin\n$fullText{$thisCV}";
|
|
if ($fullText{$thisCV} =~ s/^(.*?)\b$found1\b(.*?)\b$found2\b(.*)$/$1$blank1$2$blank2$3/i) {
|
|
say LOG "\t>C1>\t$fullText{$thisCV}";
|
|
my ($order) = (length $1);
|
|
$orderedSet{$order} = $thisSet;
|
|
say LOG "\t\t\t\$order: $order\t \$orderedSet{$order}: $orderedSet{$order}"
|
|
}
|
|
} else {
|
|
say LOG "\t>D>\tThere is no ellipsis in \$ulb: $ulb.";
|
|
my $blank = "";
|
|
while (length $blank < length $ulb) {$blank .= " "}
|
|
if ($fullText{$thisCV} =~ s/^(.*?)\b$ulb([^\w'](.*))?$/$1$blank$2/i) {
|
|
say LOG "\t>D1>\t$fullText{$thisCV}";
|
|
my ($order) = (length $1);
|
|
$orderedSet{$order} = $thisSet;
|
|
say LOG ">D2>\t\$order: $order\t \$orderedSet{$order}: $orderedSet{$order}"
|
|
} elsif ($fullText{$thisCV} =~ s/^(.*?)\b$ulb(\b(.*))?$/$1$blank$2/i) {
|
|
say LOG "\t>D3>\t$fullText{$thisCV}";
|
|
my ($order) = (length $1);
|
|
$orderedSet{$order} = $thisSet;
|
|
say LOG ">D4>\t\$order: $order\t \$orderedSet{$order}: $orderedSet{$order}"
|
|
}
|
|
}
|
|
}
|
|
}
|
|
foreach my $key (sort {$a<=>$b} keys %orderedSet) {
|
|
$outS .= "$orderedSet{$key}\n"
|
|
}
|
|
#say LOG "\$outS: $outS";
|
|
return $outS;
|
|
}
|
|
|
|
__DATA__
|
|
gen Genesis
|
|
exo Exodus
|
|
lev Leviticus
|
|
num Numbers
|
|
deu Deuteronomy
|
|
jos Joshua
|
|
jdg Judges
|
|
rut Ruth
|
|
1sa 1 Samuel
|
|
2sa 2 Samuel
|
|
1ki 1 Kings
|
|
2ki 2 Kings
|
|
1ch 1 Chronicles
|
|
2ch 2 Chronicles
|
|
ezr Ezra
|
|
neh Nehemiah
|
|
est Esther
|
|
job Job
|
|
psa Psalms
|
|
pro Proverbs
|
|
ecc Ecclesiastes
|
|
sng Song of Solomon
|
|
isa Isaiah
|
|
jer Jeremiah
|
|
lam Lamentations
|
|
ezk Ezekiel
|
|
dan Daniel
|
|
hos Hosea
|
|
jol Joel
|
|
amo Amos
|
|
oba Obadiah
|
|
jon Jonah
|
|
mic Micah
|
|
nam Nahum
|
|
hab Habakkuk
|
|
zep Zephaniah
|
|
hag Haggai
|
|
zec Zechariah
|
|
mal Malachi
|