update for MAST PDF

This commit is contained in:
Henry Whitney 2020-07-10 17:46:36 -04:00
parent 50cdb84f2c
commit 961f0b15e4
5 changed files with 925 additions and 902 deletions

File diff suppressed because it is too large Load Diff

View File

@ -15,7 +15,7 @@ if ($^O eq "darwin" || $^O eq "linux") {$d = "/"}
my (@deletes);
open LOG, ">:utf8", "$Bin${d}Logs${d}log.log" or die;
open LOG, ">:utf8", "$Bin${d}Logs${d}Consistent_deletes.log" or die;
# my $fileText = read_file("Exceptions${d}Consistent_Deletes.txt", binmode => 'utf8');
#say LOG $fileText;
@ -37,6 +37,7 @@ open LOG, ">:utf8", "$Bin${d}Logs${d}log.log" or die;
# $fileText =~ s/(\t$thisNo)(\r?\n)/$1\t||$2/g;
#}
foreach my $thisNo (sort keys %obviates) {
say LOG "$obviates{$thisNo}";
$fileText =~ s/(\t$obviates{$thisNo})(\r?\n)/$1\t||$2/g;
}

View File

@ -85,6 +85,7 @@ close OUT3;
#====
close LOG;
print "\n\tDone.\n\n";
@ -224,6 +225,10 @@ sub ParseLine {
sub FindVerseInGlossed {
system "grep '$book_name,$ch,$vs' /Users/Henry/Documents/WACS/Tips_and_Hacks/MAST_tW_PDF_Updater/FilesForUpdates/Output/tWs_for_PDF.txt > Temp/temp.tmp; perl -i -pe 's/^(.*)\$/<p>\$1<\\/p>/' Temp/temp.tmp";
my $soFar = read_file "Temp/temp.tmp";
open OUT, ">:utf8", "$results_file" or die;
say OUT "<!DOCTYPE html>
@ -298,6 +303,8 @@ sub FindVerseInGlossed {
}
say OUT "
<hr/>
$soFar
</body>
</html>";
close OUT;

View File

@ -1 +1,3 @@
Daniel 10:19 2530
<p>Genesis,33,11,God,kt,god</p>
<p>Genesis,33,11,graciously,kt,grace</p>
<p>Genesis,33,11,urged,kt,exhort</p>

View File

@ -53,7 +53,7 @@ my ($sourceFile, $topTwDir, $textEditor, $repoPath, $topSourceLangDir, $topOTSou
my (%entries, %ULBtext, %orderRef, %order, %pages, %listOfPages, %sourcePage, %checkPages, %foundPages, %substitutedPages,
%specifiedText, %realPage, %checkPage, %workEntries, %vsn, %StrongNum, %fullText, %ulbOrder, %bkAbr, %bkFull, %relevantSNs,
%relevantSNsInCV, %entriesThisSN, %entriesThisPage, %pagesThisSN, %pagesThisEntry, %adjust, %deleteNum, %newRef,
%specifiedEntries, %finalOutput, %TWDir, %tempRef, %dir);
%specifiedEntries, %finalOutput, %TWDir, %tempRef, %dir, %rawDir);
my ($book, $testament);
my (@fileList);
@ -203,16 +203,17 @@ sub PairtWEntriesTotWPageAndUniqSNs {
print ".";
$file =~ s/\//\\/g unless $^O eq "linux" || $^O eq "darwin";
my ($thisList, $shortFile) = ("", $file);
print LOG "\$file: $file\t\$shortFile: $shortFile => ";
print LOG "\$file: $file\n\$shortFile: $shortFile";
$shortFile =~ s/^\Q$topTwDir${d}\E//;
print LOG "$shortFile => ";
print LOG " =1> $shortFile";
$shortFile =~ s/\.md$//;
print LOG "$shortFile => ";
print LOG " =2> $shortFile";
$shortFile =~ s/\Q$d\E/,/;
if ($file =~ /\/([^\/]*)\/[^\/]*\.md/) {
$dir{$shortFile} = $1
if ($file =~ /\/([^\/]*)\/([^\/]*)\.md/) {
$dir{$shortFile} = $1;
$rawDir{$2} = $1;
}
say LOG "<4>\$shortFile: $shortFile\t\$dir{$shortFile}: $dir{$shortFile}";
say LOG " =3> \$shortFile: $shortFile\t\$dir{$shortFile}: $dir{$shortFile}";
#say "|$shortFile|"; die;
#if ($shortFile =~ /^(kt|names)/) {
#my $fileText = read_file("$file", binmode => 'utf8');
@ -434,87 +435,92 @@ sub ExecuteProcessSequence {
# if specified tW
say LOG "=====\n\$ref: $ref\t\$candidate: $candidate\t\$entriesThisSN{$candidate}: $entriesThisSN{$candidate}\n$tempText";
my ($found, $sn, $ulbWord, $tWpage);
my( $staticCandidate) = $candidate;
if ($candidate =~ /([^≈]*)≈([^≈]*)≈([^≈]*)/) {
# get position in true text to array
# delete found text from temp text
($sn, $ulbWord, $tWpage) = ($1,$2,$3);
($sn, $ulbWord, $staticCandidate, $tWpage) = ($1, $2, $2, $3);
while ($ulbWord =~ s/^(.*) \.\.\. (.*) \.\.\. (.*)/($1)\\b(.*?)\\b($2)\\b(.*?)\\b($3)/) {}
while ($ulbWord =~ s/^(.*) \.\.\. (.*)/($1)\\b(.*?)\\b($2)/) {}
say LOG "<A>\t\$ulbWord: $ulbWord";
say LOG "<A>\t\$ulbWord: $ulbWord \$sourcePage{$staticCandidate}: $sourcePage{$staticCandidate}\t\$rawDir{\$tWpage}: $rawDir{$tWpage} \$dir{\$tWpage}: $dir{$tWpage}";
if ($tempText =~ s/^(.*)\b$ulbWord\b(.*)$/$1$2/) {
$position = length $1;
$snippetSequence{$position} = "$ulbWord,$dir{$tWpage},$tWpage";
$snippetSequence{$position} = "$ulbWord,$sourcePage{$staticCandidate}";
}
}
# else
else {
my @possibleEntries = split /, /, $entriesThisSN{$candidate};
@possibleEntries = reverse sort { substr($a,0,1) <=> substr($b,0,1)
|| length($a) <=> length($b)
|| $a <=> $b }
@possibleEntries;
foreach my $thisEntry (@possibleEntries) {
# for each possible entry
print LOG "-----\n\$thisEntry: $thisEntry\t=>\t";
while ($thisEntry =~ s/^(.*) \.\.\. (.*)/$1\\b(.*?)\\b$2/) {}
say LOG "$thisEntry\t\$pagesThisEntry{$thisEntry}: $pagesThisEntry{$thisEntry}";
# if tW entry matches ULB text
# get ULB snippet to verse match list
# get position in true text to array
# delete found text from temp text
if ($thisEntry =~ /\(\.\*\?\)/ && $tempText =~ /$thisEntry/i) {
say LOG "<7>\t\$thisEntry |$thisEntry| is found in the first test";
my ($first, $second, $third, $fourth, $fifth);
if ($tempText =~ /^(.*)\b($thisEntry)\b(.*)$/i) {
($first, $second, $third, $fourth, $fifth) = ($!, $2, $3, $4, $5);
say LOG "<7.1>\t\$first: $first \$second: $second \$third: $third \$fourth: $fourth \$fifth: $fifth ";
if ($fifth ne "") {
$tempText =~ s/^(.*)\b($thisEntry)\b(.*)$/$1$4$5/i
my @possibleEntries = split /, /, $entriesThisSN{$candidate};
@possibleEntries = reverse sort { substr($a,0,1) <=> substr($b,0,1)
|| length($a) <=> length($b)
|| $a <=> $b }
@possibleEntries;
foreach my $thisEntry (@possibleEntries) {
# for each possible entry;
my $staticEntry = $thisEntry;
print LOG "-----\n\$thisEntry: $thisEntry\t=>\t";
while ($thisEntry =~ s/^(.*) \.\.\. (.*)/$1\\b(.*?)\\b$2/) {}
say LOG "$thisEntry\t\$pagesThisEntry{$staticEntry}: $pagesThisEntry{$staticEntry}";
# if tW entry matches ULB text
# get ULB snippet to verse match list
# get position in true text to array
# delete found text from temp text
if ($thisEntry =~ /\(\.\*\?\)/ && $tempText =~ /$thisEntry/i) {
say LOG "<7>\t\$thisEntry |$thisEntry| is found in the first test";
my ($first, $second, $third, $fourth, $fifth);
if ($tempText =~ /^(.*)\b($thisEntry)\b(.*)$/i) {
($first, $second, $third, $fourth, $fifth) = ($!, $2, $3, $4, $5);
say LOG "<7.1>\t\$first: $first \$second: $second \$third: $third \$fourth: $fourth \$fifth: $fifth ";
if ($fifth ne "") {
$tempText =~ s/^(.*)\b($thisEntry)\b(.*)$/$1$4$5/i
}
else {
$tempText =~ s/^(.*)\b($thisEntry)\b(.*)$/$1$3$4/i
}
}
else {
$tempText =~ s/^(.*)\b($thisEntry)\b(.*)$/$1$3$4/i
if ($trueText =~ /^(.*)\b($thisEntry)\b.*$/) {
$position = length $1;
say LOG "<7.2>\t\$position: $position"
}
$snippetSequence{$position} = "$thisEntry,$pagesThisEntry{$staticEntry}";
$found = 1;
goto Breakout;
}
if ($trueText =~ /^(.*)\b($thisEntry)\b.*$/) {
$position = length $1;
say LOG "<7.2>\t\$position: $position"
elsif ($tempText =~ s/\b($thisEntry)[^\w']//i || $tempText =~ s/\b($thisEntry)["']//i || $tempText =~ s/["']($thisEntry)\b//i) {
say LOG "<7.3>\t\$thisEntry |$thisEntry| is found in the second test\n---
";
if ($trueText =~ /^(.*)\b$thisEntry[^\w']/i || $trueText =~ /^(.*)\b($thisEntry)["']/i || $trueText =~ /^(.*)["']($thisEntry)\b/i) {
$position = length $1;
}
$snippetSequence{$position} = "$thisEntry,$pagesThisEntry{$thisEntry}";
say LOG "<7.4>\t\$snippetSequence{$position}: $snippetSequence{$position}";
$found = 1;
goto Breakout;
}
else {
say LOG "\$thisEntry |$thisEntry| is not found in |$ref|";
}
$snippetSequence{$position} = "$thisEntry,$dir{$pagesThisEntry{$thisEntry}},$pagesThisEntry{$thisEntry}";
$found = 1;
goto Breakout;
}
elsif ($tempText =~ s/\b($thisEntry)[^\w']//i || $tempText =~ s/\b($thisEntry)["']//i || $tempText =~ s/["']($thisEntry)\b//i) {
say LOG "\$thisEntry |$thisEntry| is found in the second test\n---
";
if ($trueText =~ /^(.*)\b$thisEntry[^\w']/i || $trueText =~ /^(.*)\b($thisEntry)["']/i || $trueText =~ /^(.*)["']($thisEntry)\b/i) {
$position = length $1;
}
$snippetSequence{$position} = "$thisEntry,$dir{$pagesThisEntry{$thisEntry}},$pagesThisEntry{$thisEntry}";
say LOG "\$snippetSequence{$position}: $snippetSequence{$position}";
$found = 1;
goto Breakout;
}
else {
say LOG "\$thisEntry |$thisEntry| is not found in |$ref|";
Breakout:
unless ($found) {
say MISSING "$ref $candidate";
say LOG "Breakout: \$ref: $ref\t\$candidate: $candidate"
}
next if $found;
}
Breakout:
unless ($found) {
say MISSING "$ref $candidate";
say LOG "Breakout: \$ref: $ref\t\$candidate: $candidate"
}
next if $found;
}
$sequence = $specifiedEntries{$ref};
$sequence =~ s/[GH]\d+≈([^≈]*)≈([^≈√]*)/$1\t$2/g;
$sequence = $specifiedEntries{$ref};
$sequence =~ s/[GH]\d+≈([^≈]*)≈([^≈√]*)/$1\t$2/g;
}
foreach my $key (sort {$a <=> $b} (keys %snippetSequence)) {
$snippetSequence{$key} =~ s/\(([^\)]*)\)\\b.*?\\b\(([^\)]*)\)\\b.*?\\b\(([^\)]*)\)/$1 ... $2 ... $3/;
$snippetSequence{$key} =~ s/\(([^\)]*)\)\\b.*?\\b\(([^\)]*)\)/$1 ... $2/;
say LOG "---\n\$key: $key\t\$snippetSequence{$key}: $snippetSequence{$key}\n===\n";
say LOG "---\n\$key: $key\t\$snippetSequence{$key}: $snippetSequence{$key}\n===";
$snippetSequence{$key} =~ s/(\\b\.\*\?|\.\*\?\\b)//g;
$snippetSequence{$key} =~ s/\\b\(\.\*\?\)\\b/ ... /g;
$thisCVOutString .= "$outputFormRef,$snippetSequence{$key}\n"
}
say LOG "<7.5>\t\$thisCVOutString\n$thisCVOutString";
return $thisCVOutString
}