update for MAST PDF

This commit is contained in:
Henry Whitney 2020-07-10 17:46:36 -04:00
parent 50cdb84f2c
commit 961f0b15e4
5 changed files with 925 additions and 902 deletions

File diff suppressed because it is too large Load Diff

View File

@ -15,7 +15,7 @@ if ($^O eq "darwin" || $^O eq "linux") {$d = "/"}
my (@deletes); my (@deletes);
open LOG, ">:utf8", "$Bin${d}Logs${d}log.log" or die; open LOG, ">:utf8", "$Bin${d}Logs${d}Consistent_deletes.log" or die;
# my $fileText = read_file("Exceptions${d}Consistent_Deletes.txt", binmode => 'utf8'); # my $fileText = read_file("Exceptions${d}Consistent_Deletes.txt", binmode => 'utf8');
#say LOG $fileText; #say LOG $fileText;
@ -37,6 +37,7 @@ open LOG, ">:utf8", "$Bin${d}Logs${d}log.log" or die;
# $fileText =~ s/(\t$thisNo)(\r?\n)/$1\t||$2/g; # $fileText =~ s/(\t$thisNo)(\r?\n)/$1\t||$2/g;
#} #}
foreach my $thisNo (sort keys %obviates) { foreach my $thisNo (sort keys %obviates) {
say LOG "$obviates{$thisNo}";
$fileText =~ s/(\t$obviates{$thisNo})(\r?\n)/$1\t||$2/g; $fileText =~ s/(\t$obviates{$thisNo})(\r?\n)/$1\t||$2/g;
} }

View File

@ -85,6 +85,7 @@ close OUT3;
#==== #====
close LOG; close LOG;
print "\n\tDone.\n\n"; print "\n\tDone.\n\n";
@ -224,6 +225,10 @@ sub ParseLine {
sub FindVerseInGlossed { sub FindVerseInGlossed {
system "grep '$book_name,$ch,$vs' /Users/Henry/Documents/WACS/Tips_and_Hacks/MAST_tW_PDF_Updater/FilesForUpdates/Output/tWs_for_PDF.txt > Temp/temp.tmp; perl -i -pe 's/^(.*)\$/<p>\$1<\\/p>/' Temp/temp.tmp";
my $soFar = read_file "Temp/temp.tmp";
open OUT, ">:utf8", "$results_file" or die; open OUT, ">:utf8", "$results_file" or die;
say OUT "<!DOCTYPE html> say OUT "<!DOCTYPE html>
@ -298,6 +303,8 @@ sub FindVerseInGlossed {
} }
say OUT " say OUT "
<hr/>
$soFar
</body> </body>
</html>"; </html>";
close OUT; close OUT;

View File

@ -1 +1,3 @@
Daniel 10:19 2530 <p>Genesis,33,11,God,kt,god</p>
<p>Genesis,33,11,graciously,kt,grace</p>
<p>Genesis,33,11,urged,kt,exhort</p>

View File

@ -53,7 +53,7 @@ my ($sourceFile, $topTwDir, $textEditor, $repoPath, $topSourceLangDir, $topOTSou
my (%entries, %ULBtext, %orderRef, %order, %pages, %listOfPages, %sourcePage, %checkPages, %foundPages, %substitutedPages, my (%entries, %ULBtext, %orderRef, %order, %pages, %listOfPages, %sourcePage, %checkPages, %foundPages, %substitutedPages,
%specifiedText, %realPage, %checkPage, %workEntries, %vsn, %StrongNum, %fullText, %ulbOrder, %bkAbr, %bkFull, %relevantSNs, %specifiedText, %realPage, %checkPage, %workEntries, %vsn, %StrongNum, %fullText, %ulbOrder, %bkAbr, %bkFull, %relevantSNs,
%relevantSNsInCV, %entriesThisSN, %entriesThisPage, %pagesThisSN, %pagesThisEntry, %adjust, %deleteNum, %newRef, %relevantSNsInCV, %entriesThisSN, %entriesThisPage, %pagesThisSN, %pagesThisEntry, %adjust, %deleteNum, %newRef,
%specifiedEntries, %finalOutput, %TWDir, %tempRef, %dir); %specifiedEntries, %finalOutput, %TWDir, %tempRef, %dir, %rawDir);
my ($book, $testament); my ($book, $testament);
my (@fileList); my (@fileList);
@ -203,16 +203,17 @@ sub PairtWEntriesTotWPageAndUniqSNs {
print "."; print ".";
$file =~ s/\//\\/g unless $^O eq "linux" || $^O eq "darwin"; $file =~ s/\//\\/g unless $^O eq "linux" || $^O eq "darwin";
my ($thisList, $shortFile) = ("", $file); my ($thisList, $shortFile) = ("", $file);
print LOG "\$file: $file\t\$shortFile: $shortFile => "; print LOG "\$file: $file\n\$shortFile: $shortFile";
$shortFile =~ s/^\Q$topTwDir${d}\E//; $shortFile =~ s/^\Q$topTwDir${d}\E//;
print LOG "$shortFile => "; print LOG " =1> $shortFile";
$shortFile =~ s/\.md$//; $shortFile =~ s/\.md$//;
print LOG "$shortFile => "; print LOG " =2> $shortFile";
$shortFile =~ s/\Q$d\E/,/; $shortFile =~ s/\Q$d\E/,/;
if ($file =~ /\/([^\/]*)\/[^\/]*\.md/) { if ($file =~ /\/([^\/]*)\/([^\/]*)\.md/) {
$dir{$shortFile} = $1 $dir{$shortFile} = $1;
$rawDir{$2} = $1;
} }
say LOG "<4>\$shortFile: $shortFile\t\$dir{$shortFile}: $dir{$shortFile}"; say LOG " =3> \$shortFile: $shortFile\t\$dir{$shortFile}: $dir{$shortFile}";
#say "|$shortFile|"; die; #say "|$shortFile|"; die;
#if ($shortFile =~ /^(kt|names)/) { #if ($shortFile =~ /^(kt|names)/) {
#my $fileText = read_file("$file", binmode => 'utf8'); #my $fileText = read_file("$file", binmode => 'utf8');
@ -434,87 +435,92 @@ sub ExecuteProcessSequence {
# if specified tW # if specified tW
say LOG "=====\n\$ref: $ref\t\$candidate: $candidate\t\$entriesThisSN{$candidate}: $entriesThisSN{$candidate}\n$tempText"; say LOG "=====\n\$ref: $ref\t\$candidate: $candidate\t\$entriesThisSN{$candidate}: $entriesThisSN{$candidate}\n$tempText";
my ($found, $sn, $ulbWord, $tWpage); my ($found, $sn, $ulbWord, $tWpage);
my( $staticCandidate) = $candidate;
if ($candidate =~ /([^≈]*)≈([^≈]*)≈([^≈]*)/) { if ($candidate =~ /([^≈]*)≈([^≈]*)≈([^≈]*)/) {
# get position in true text to array # get position in true text to array
# delete found text from temp text # delete found text from temp text
($sn, $ulbWord, $tWpage) = ($1,$2,$3); ($sn, $ulbWord, $staticCandidate, $tWpage) = ($1, $2, $2, $3);
while ($ulbWord =~ s/^(.*) \.\.\. (.*) \.\.\. (.*)/($1)\\b(.*?)\\b($2)\\b(.*?)\\b($3)/) {} while ($ulbWord =~ s/^(.*) \.\.\. (.*) \.\.\. (.*)/($1)\\b(.*?)\\b($2)\\b(.*?)\\b($3)/) {}
while ($ulbWord =~ s/^(.*) \.\.\. (.*)/($1)\\b(.*?)\\b($2)/) {} while ($ulbWord =~ s/^(.*) \.\.\. (.*)/($1)\\b(.*?)\\b($2)/) {}
say LOG "<A>\t\$ulbWord: $ulbWord"; say LOG "<A>\t\$ulbWord: $ulbWord \$sourcePage{$staticCandidate}: $sourcePage{$staticCandidate}\t\$rawDir{\$tWpage}: $rawDir{$tWpage} \$dir{\$tWpage}: $dir{$tWpage}";
if ($tempText =~ s/^(.*)\b$ulbWord\b(.*)$/$1$2/) { if ($tempText =~ s/^(.*)\b$ulbWord\b(.*)$/$1$2/) {
$position = length $1; $position = length $1;
$snippetSequence{$position} = "$ulbWord,$dir{$tWpage},$tWpage"; $snippetSequence{$position} = "$ulbWord,$sourcePage{$staticCandidate}";
} }
} }
# else # else
else { else {
my @possibleEntries = split /, /, $entriesThisSN{$candidate}; my @possibleEntries = split /, /, $entriesThisSN{$candidate};
@possibleEntries = reverse sort { substr($a,0,1) <=> substr($b,0,1) @possibleEntries = reverse sort { substr($a,0,1) <=> substr($b,0,1)
|| length($a) <=> length($b) || length($a) <=> length($b)
|| $a <=> $b } || $a <=> $b }
@possibleEntries; @possibleEntries;
foreach my $thisEntry (@possibleEntries) { foreach my $thisEntry (@possibleEntries) {
# for each possible entry # for each possible entry;
print LOG "-----\n\$thisEntry: $thisEntry\t=>\t"; my $staticEntry = $thisEntry;
while ($thisEntry =~ s/^(.*) \.\.\. (.*)/$1\\b(.*?)\\b$2/) {} print LOG "-----\n\$thisEntry: $thisEntry\t=>\t";
say LOG "$thisEntry\t\$pagesThisEntry{$thisEntry}: $pagesThisEntry{$thisEntry}"; while ($thisEntry =~ s/^(.*) \.\.\. (.*)/$1\\b(.*?)\\b$2/) {}
# if tW entry matches ULB text say LOG "$thisEntry\t\$pagesThisEntry{$staticEntry}: $pagesThisEntry{$staticEntry}";
# get ULB snippet to verse match list # if tW entry matches ULB text
# get position in true text to array # get ULB snippet to verse match list
# delete found text from temp text # get position in true text to array
if ($thisEntry =~ /\(\.\*\?\)/ && $tempText =~ /$thisEntry/i) { # delete found text from temp text
say LOG "<7>\t\$thisEntry |$thisEntry| is found in the first test"; if ($thisEntry =~ /\(\.\*\?\)/ && $tempText =~ /$thisEntry/i) {
my ($first, $second, $third, $fourth, $fifth); say LOG "<7>\t\$thisEntry |$thisEntry| is found in the first test";
if ($tempText =~ /^(.*)\b($thisEntry)\b(.*)$/i) { my ($first, $second, $third, $fourth, $fifth);
($first, $second, $third, $fourth, $fifth) = ($!, $2, $3, $4, $5); if ($tempText =~ /^(.*)\b($thisEntry)\b(.*)$/i) {
say LOG "<7.1>\t\$first: $first \$second: $second \$third: $third \$fourth: $fourth \$fifth: $fifth "; ($first, $second, $third, $fourth, $fifth) = ($!, $2, $3, $4, $5);
if ($fifth ne "") { say LOG "<7.1>\t\$first: $first \$second: $second \$third: $third \$fourth: $fourth \$fifth: $fifth ";
$tempText =~ s/^(.*)\b($thisEntry)\b(.*)$/$1$4$5/i if ($fifth ne "") {
$tempText =~ s/^(.*)\b($thisEntry)\b(.*)$/$1$4$5/i
}
else {
$tempText =~ s/^(.*)\b($thisEntry)\b(.*)$/$1$3$4/i
}
} }
else { if ($trueText =~ /^(.*)\b($thisEntry)\b.*$/) {
$tempText =~ s/^(.*)\b($thisEntry)\b(.*)$/$1$3$4/i $position = length $1;
say LOG "<7.2>\t\$position: $position"
} }
$snippetSequence{$position} = "$thisEntry,$pagesThisEntry{$staticEntry}";
$found = 1;
goto Breakout;
} }
if ($trueText =~ /^(.*)\b($thisEntry)\b.*$/) { elsif ($tempText =~ s/\b($thisEntry)[^\w']//i || $tempText =~ s/\b($thisEntry)["']//i || $tempText =~ s/["']($thisEntry)\b//i) {
$position = length $1; say LOG "<7.3>\t\$thisEntry |$thisEntry| is found in the second test\n---
say LOG "<7.2>\t\$position: $position" ";
if ($trueText =~ /^(.*)\b$thisEntry[^\w']/i || $trueText =~ /^(.*)\b($thisEntry)["']/i || $trueText =~ /^(.*)["']($thisEntry)\b/i) {
$position = length $1;
}
$snippetSequence{$position} = "$thisEntry,$pagesThisEntry{$thisEntry}";
say LOG "<7.4>\t\$snippetSequence{$position}: $snippetSequence{$position}";
$found = 1;
goto Breakout;
}
else {
say LOG "\$thisEntry |$thisEntry| is not found in |$ref|";
} }
$snippetSequence{$position} = "$thisEntry,$dir{$pagesThisEntry{$thisEntry}},$pagesThisEntry{$thisEntry}";
$found = 1;
goto Breakout;
} }
elsif ($tempText =~ s/\b($thisEntry)[^\w']//i || $tempText =~ s/\b($thisEntry)["']//i || $tempText =~ s/["']($thisEntry)\b//i) { Breakout:
say LOG "\$thisEntry |$thisEntry| is found in the second test\n--- unless ($found) {
"; say MISSING "$ref $candidate";
if ($trueText =~ /^(.*)\b$thisEntry[^\w']/i || $trueText =~ /^(.*)\b($thisEntry)["']/i || $trueText =~ /^(.*)["']($thisEntry)\b/i) { say LOG "Breakout: \$ref: $ref\t\$candidate: $candidate"
$position = length $1;
}
$snippetSequence{$position} = "$thisEntry,$dir{$pagesThisEntry{$thisEntry}},$pagesThisEntry{$thisEntry}";
say LOG "\$snippetSequence{$position}: $snippetSequence{$position}";
$found = 1;
goto Breakout;
}
else {
say LOG "\$thisEntry |$thisEntry| is not found in |$ref|";
} }
next if $found;
} }
Breakout: $sequence = $specifiedEntries{$ref};
unless ($found) { $sequence =~ s/[GH]\d+≈([^≈]*)≈([^≈√]*)/$1\t$2/g;
say MISSING "$ref $candidate";
say LOG "Breakout: \$ref: $ref\t\$candidate: $candidate"
}
next if $found;
}
$sequence = $specifiedEntries{$ref};
$sequence =~ s/[GH]\d+≈([^≈]*)≈([^≈√]*)/$1\t$2/g;
} }
foreach my $key (sort {$a <=> $b} (keys %snippetSequence)) { foreach my $key (sort {$a <=> $b} (keys %snippetSequence)) {
$snippetSequence{$key} =~ s/\(([^\)]*)\)\\b.*?\\b\(([^\)]*)\)\\b.*?\\b\(([^\)]*)\)/$1 ... $2 ... $3/; $snippetSequence{$key} =~ s/\(([^\)]*)\)\\b.*?\\b\(([^\)]*)\)\\b.*?\\b\(([^\)]*)\)/$1 ... $2 ... $3/;
$snippetSequence{$key} =~ s/\(([^\)]*)\)\\b.*?\\b\(([^\)]*)\)/$1 ... $2/; $snippetSequence{$key} =~ s/\(([^\)]*)\)\\b.*?\\b\(([^\)]*)\)/$1 ... $2/;
say LOG "---\n\$key: $key\t\$snippetSequence{$key}: $snippetSequence{$key}\n===\n"; say LOG "---\n\$key: $key\t\$snippetSequence{$key}: $snippetSequence{$key}\n===";
$snippetSequence{$key} =~ s/(\\b\.\*\?|\.\*\?\\b)//g;
$snippetSequence{$key} =~ s/\\b\(\.\*\?\)\\b/ ... /g;
$thisCVOutString .= "$outputFormRef,$snippetSequence{$key}\n" $thisCVOutString .= "$outputFormRef,$snippetSequence{$key}\n"
} }
say LOG "<7.5>\t\$thisCVOutString\n$thisCVOutString";
return $thisCVOutString return $thisCVOutString
} }