update for MAST PDF
This commit is contained in:
parent
50cdb84f2c
commit
961f0b15e4
File diff suppressed because it is too large
Load Diff
|
@ -15,7 +15,7 @@ if ($^O eq "darwin" || $^O eq "linux") {$d = "/"}
|
||||||
|
|
||||||
my (@deletes);
|
my (@deletes);
|
||||||
|
|
||||||
open LOG, ">:utf8", "$Bin${d}Logs${d}log.log" or die;
|
open LOG, ">:utf8", "$Bin${d}Logs${d}Consistent_deletes.log" or die;
|
||||||
|
|
||||||
# my $fileText = read_file("Exceptions${d}Consistent_Deletes.txt", binmode => 'utf8');
|
# my $fileText = read_file("Exceptions${d}Consistent_Deletes.txt", binmode => 'utf8');
|
||||||
#say LOG $fileText;
|
#say LOG $fileText;
|
||||||
|
@ -37,6 +37,7 @@ open LOG, ">:utf8", "$Bin${d}Logs${d}log.log" or die;
|
||||||
# $fileText =~ s/(\t$thisNo)(\r?\n)/$1\t||$2/g;
|
# $fileText =~ s/(\t$thisNo)(\r?\n)/$1\t||$2/g;
|
||||||
#}
|
#}
|
||||||
foreach my $thisNo (sort keys %obviates) {
|
foreach my $thisNo (sort keys %obviates) {
|
||||||
|
say LOG "$obviates{$thisNo}";
|
||||||
$fileText =~ s/(\t$obviates{$thisNo})(\r?\n)/$1\t||$2/g;
|
$fileText =~ s/(\t$obviates{$thisNo})(\r?\n)/$1\t||$2/g;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -85,6 +85,7 @@ close OUT3;
|
||||||
|
|
||||||
#====
|
#====
|
||||||
|
|
||||||
|
|
||||||
close LOG;
|
close LOG;
|
||||||
print "\n\tDone.\n\n";
|
print "\n\tDone.\n\n";
|
||||||
|
|
||||||
|
@ -224,6 +225,10 @@ sub ParseLine {
|
||||||
|
|
||||||
|
|
||||||
sub FindVerseInGlossed {
|
sub FindVerseInGlossed {
|
||||||
|
system "grep '$book_name,$ch,$vs' /Users/Henry/Documents/WACS/Tips_and_Hacks/MAST_tW_PDF_Updater/FilesForUpdates/Output/tWs_for_PDF.txt > Temp/temp.tmp; perl -i -pe 's/^(.*)\$/<p>\$1<\\/p>/' Temp/temp.tmp";
|
||||||
|
|
||||||
|
my $soFar = read_file "Temp/temp.tmp";
|
||||||
|
|
||||||
open OUT, ">:utf8", "$results_file" or die;
|
open OUT, ">:utf8", "$results_file" or die;
|
||||||
|
|
||||||
say OUT "<!DOCTYPE html>
|
say OUT "<!DOCTYPE html>
|
||||||
|
@ -298,6 +303,8 @@ sub FindVerseInGlossed {
|
||||||
}
|
}
|
||||||
|
|
||||||
say OUT "
|
say OUT "
|
||||||
|
<hr/>
|
||||||
|
$soFar
|
||||||
</body>
|
</body>
|
||||||
</html>";
|
</html>";
|
||||||
close OUT;
|
close OUT;
|
||||||
|
|
|
@ -1 +1,3 @@
|
||||||
Daniel 10:19 2530
|
<p>Genesis,33,11,God,kt,god</p>
|
||||||
|
<p>Genesis,33,11,graciously,kt,grace</p>
|
||||||
|
<p>Genesis,33,11,urged,kt,exhort</p>
|
||||||
|
|
|
@ -53,7 +53,7 @@ my ($sourceFile, $topTwDir, $textEditor, $repoPath, $topSourceLangDir, $topOTSou
|
||||||
my (%entries, %ULBtext, %orderRef, %order, %pages, %listOfPages, %sourcePage, %checkPages, %foundPages, %substitutedPages,
|
my (%entries, %ULBtext, %orderRef, %order, %pages, %listOfPages, %sourcePage, %checkPages, %foundPages, %substitutedPages,
|
||||||
%specifiedText, %realPage, %checkPage, %workEntries, %vsn, %StrongNum, %fullText, %ulbOrder, %bkAbr, %bkFull, %relevantSNs,
|
%specifiedText, %realPage, %checkPage, %workEntries, %vsn, %StrongNum, %fullText, %ulbOrder, %bkAbr, %bkFull, %relevantSNs,
|
||||||
%relevantSNsInCV, %entriesThisSN, %entriesThisPage, %pagesThisSN, %pagesThisEntry, %adjust, %deleteNum, %newRef,
|
%relevantSNsInCV, %entriesThisSN, %entriesThisPage, %pagesThisSN, %pagesThisEntry, %adjust, %deleteNum, %newRef,
|
||||||
%specifiedEntries, %finalOutput, %TWDir, %tempRef, %dir);
|
%specifiedEntries, %finalOutput, %TWDir, %tempRef, %dir, %rawDir);
|
||||||
|
|
||||||
my ($book, $testament);
|
my ($book, $testament);
|
||||||
my (@fileList);
|
my (@fileList);
|
||||||
|
@ -203,16 +203,17 @@ sub PairtWEntriesTotWPageAndUniqSNs {
|
||||||
print ".";
|
print ".";
|
||||||
$file =~ s/\//\\/g unless $^O eq "linux" || $^O eq "darwin";
|
$file =~ s/\//\\/g unless $^O eq "linux" || $^O eq "darwin";
|
||||||
my ($thisList, $shortFile) = ("", $file);
|
my ($thisList, $shortFile) = ("", $file);
|
||||||
print LOG "\$file: $file\t\$shortFile: $shortFile => ";
|
print LOG "\$file: $file\n\$shortFile: $shortFile";
|
||||||
$shortFile =~ s/^\Q$topTwDir${d}\E//;
|
$shortFile =~ s/^\Q$topTwDir${d}\E//;
|
||||||
print LOG "$shortFile => ";
|
print LOG " =1> $shortFile";
|
||||||
$shortFile =~ s/\.md$//;
|
$shortFile =~ s/\.md$//;
|
||||||
print LOG "$shortFile => ";
|
print LOG " =2> $shortFile";
|
||||||
$shortFile =~ s/\Q$d\E/,/;
|
$shortFile =~ s/\Q$d\E/,/;
|
||||||
if ($file =~ /\/([^\/]*)\/[^\/]*\.md/) {
|
if ($file =~ /\/([^\/]*)\/([^\/]*)\.md/) {
|
||||||
$dir{$shortFile} = $1
|
$dir{$shortFile} = $1;
|
||||||
|
$rawDir{$2} = $1;
|
||||||
}
|
}
|
||||||
say LOG "<4>\$shortFile: $shortFile\t\$dir{$shortFile}: $dir{$shortFile}";
|
say LOG " =3> \$shortFile: $shortFile\t\$dir{$shortFile}: $dir{$shortFile}";
|
||||||
#say "|$shortFile|"; die;
|
#say "|$shortFile|"; die;
|
||||||
#if ($shortFile =~ /^(kt|names)/) {
|
#if ($shortFile =~ /^(kt|names)/) {
|
||||||
#my $fileText = read_file("$file", binmode => 'utf8');
|
#my $fileText = read_file("$file", binmode => 'utf8');
|
||||||
|
@ -434,87 +435,92 @@ sub ExecuteProcessSequence {
|
||||||
# if specified tW
|
# if specified tW
|
||||||
say LOG "=====\n\$ref: $ref\t\$candidate: $candidate\t\$entriesThisSN{$candidate}: $entriesThisSN{$candidate}\n$tempText";
|
say LOG "=====\n\$ref: $ref\t\$candidate: $candidate\t\$entriesThisSN{$candidate}: $entriesThisSN{$candidate}\n$tempText";
|
||||||
my ($found, $sn, $ulbWord, $tWpage);
|
my ($found, $sn, $ulbWord, $tWpage);
|
||||||
|
my( $staticCandidate) = $candidate;
|
||||||
if ($candidate =~ /([^≈]*)≈([^≈]*)≈([^≈]*)/) {
|
if ($candidate =~ /([^≈]*)≈([^≈]*)≈([^≈]*)/) {
|
||||||
# get position in true text to array
|
# get position in true text to array
|
||||||
# delete found text from temp text
|
# delete found text from temp text
|
||||||
($sn, $ulbWord, $tWpage) = ($1,$2,$3);
|
($sn, $ulbWord, $staticCandidate, $tWpage) = ($1, $2, $2, $3);
|
||||||
while ($ulbWord =~ s/^(.*) \.\.\. (.*) \.\.\. (.*)/($1)\\b(.*?)\\b($2)\\b(.*?)\\b($3)/) {}
|
while ($ulbWord =~ s/^(.*) \.\.\. (.*) \.\.\. (.*)/($1)\\b(.*?)\\b($2)\\b(.*?)\\b($3)/) {}
|
||||||
while ($ulbWord =~ s/^(.*) \.\.\. (.*)/($1)\\b(.*?)\\b($2)/) {}
|
while ($ulbWord =~ s/^(.*) \.\.\. (.*)/($1)\\b(.*?)\\b($2)/) {}
|
||||||
say LOG "<A>\t\$ulbWord: $ulbWord";
|
say LOG "<A>\t\$ulbWord: $ulbWord \$sourcePage{$staticCandidate}: $sourcePage{$staticCandidate}\t\$rawDir{\$tWpage}: $rawDir{$tWpage} \$dir{\$tWpage}: $dir{$tWpage}";
|
||||||
if ($tempText =~ s/^(.*)\b$ulbWord\b(.*)$/$1$2/) {
|
if ($tempText =~ s/^(.*)\b$ulbWord\b(.*)$/$1$2/) {
|
||||||
$position = length $1;
|
$position = length $1;
|
||||||
$snippetSequence{$position} = "$ulbWord,$dir{$tWpage},$tWpage";
|
$snippetSequence{$position} = "$ulbWord,$sourcePage{$staticCandidate}";
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
# else
|
# else
|
||||||
else {
|
else {
|
||||||
my @possibleEntries = split /, /, $entriesThisSN{$candidate};
|
my @possibleEntries = split /, /, $entriesThisSN{$candidate};
|
||||||
@possibleEntries = reverse sort { substr($a,0,1) <=> substr($b,0,1)
|
@possibleEntries = reverse sort { substr($a,0,1) <=> substr($b,0,1)
|
||||||
|| length($a) <=> length($b)
|
|| length($a) <=> length($b)
|
||||||
|| $a <=> $b }
|
|| $a <=> $b }
|
||||||
@possibleEntries;
|
@possibleEntries;
|
||||||
foreach my $thisEntry (@possibleEntries) {
|
foreach my $thisEntry (@possibleEntries) {
|
||||||
# for each possible entry
|
# for each possible entry;
|
||||||
print LOG "-----\n\$thisEntry: $thisEntry\t=>\t";
|
my $staticEntry = $thisEntry;
|
||||||
while ($thisEntry =~ s/^(.*) \.\.\. (.*)/$1\\b(.*?)\\b$2/) {}
|
print LOG "-----\n\$thisEntry: $thisEntry\t=>\t";
|
||||||
say LOG "$thisEntry\t\$pagesThisEntry{$thisEntry}: $pagesThisEntry{$thisEntry}";
|
while ($thisEntry =~ s/^(.*) \.\.\. (.*)/$1\\b(.*?)\\b$2/) {}
|
||||||
# if tW entry matches ULB text
|
say LOG "$thisEntry\t\$pagesThisEntry{$staticEntry}: $pagesThisEntry{$staticEntry}";
|
||||||
# get ULB snippet to verse match list
|
# if tW entry matches ULB text
|
||||||
# get position in true text to array
|
# get ULB snippet to verse match list
|
||||||
# delete found text from temp text
|
# get position in true text to array
|
||||||
if ($thisEntry =~ /\(\.\*\?\)/ && $tempText =~ /$thisEntry/i) {
|
# delete found text from temp text
|
||||||
say LOG "<7>\t\$thisEntry |$thisEntry| is found in the first test";
|
if ($thisEntry =~ /\(\.\*\?\)/ && $tempText =~ /$thisEntry/i) {
|
||||||
my ($first, $second, $third, $fourth, $fifth);
|
say LOG "<7>\t\$thisEntry |$thisEntry| is found in the first test";
|
||||||
if ($tempText =~ /^(.*)\b($thisEntry)\b(.*)$/i) {
|
my ($first, $second, $third, $fourth, $fifth);
|
||||||
($first, $second, $third, $fourth, $fifth) = ($!, $2, $3, $4, $5);
|
if ($tempText =~ /^(.*)\b($thisEntry)\b(.*)$/i) {
|
||||||
say LOG "<7.1>\t\$first: $first \$second: $second \$third: $third \$fourth: $fourth \$fifth: $fifth ";
|
($first, $second, $third, $fourth, $fifth) = ($!, $2, $3, $4, $5);
|
||||||
if ($fifth ne "") {
|
say LOG "<7.1>\t\$first: $first \$second: $second \$third: $third \$fourth: $fourth \$fifth: $fifth ";
|
||||||
$tempText =~ s/^(.*)\b($thisEntry)\b(.*)$/$1$4$5/i
|
if ($fifth ne "") {
|
||||||
|
$tempText =~ s/^(.*)\b($thisEntry)\b(.*)$/$1$4$5/i
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
$tempText =~ s/^(.*)\b($thisEntry)\b(.*)$/$1$3$4/i
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
else {
|
if ($trueText =~ /^(.*)\b($thisEntry)\b.*$/) {
|
||||||
$tempText =~ s/^(.*)\b($thisEntry)\b(.*)$/$1$3$4/i
|
$position = length $1;
|
||||||
|
say LOG "<7.2>\t\$position: $position"
|
||||||
}
|
}
|
||||||
|
$snippetSequence{$position} = "$thisEntry,$pagesThisEntry{$staticEntry}";
|
||||||
|
$found = 1;
|
||||||
|
goto Breakout;
|
||||||
}
|
}
|
||||||
if ($trueText =~ /^(.*)\b($thisEntry)\b.*$/) {
|
elsif ($tempText =~ s/\b($thisEntry)[^\w']//i || $tempText =~ s/\b($thisEntry)["']//i || $tempText =~ s/["']($thisEntry)\b//i) {
|
||||||
$position = length $1;
|
say LOG "<7.3>\t\$thisEntry |$thisEntry| is found in the second test\n---
|
||||||
say LOG "<7.2>\t\$position: $position"
|
";
|
||||||
|
if ($trueText =~ /^(.*)\b$thisEntry[^\w']/i || $trueText =~ /^(.*)\b($thisEntry)["']/i || $trueText =~ /^(.*)["']($thisEntry)\b/i) {
|
||||||
|
$position = length $1;
|
||||||
|
}
|
||||||
|
$snippetSequence{$position} = "$thisEntry,$pagesThisEntry{$thisEntry}";
|
||||||
|
say LOG "<7.4>\t\$snippetSequence{$position}: $snippetSequence{$position}";
|
||||||
|
$found = 1;
|
||||||
|
goto Breakout;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
say LOG "\$thisEntry |$thisEntry| is not found in |$ref|";
|
||||||
}
|
}
|
||||||
$snippetSequence{$position} = "$thisEntry,$dir{$pagesThisEntry{$thisEntry}},$pagesThisEntry{$thisEntry}";
|
|
||||||
$found = 1;
|
|
||||||
goto Breakout;
|
|
||||||
}
|
}
|
||||||
elsif ($tempText =~ s/\b($thisEntry)[^\w']//i || $tempText =~ s/\b($thisEntry)["']//i || $tempText =~ s/["']($thisEntry)\b//i) {
|
Breakout:
|
||||||
say LOG "\$thisEntry |$thisEntry| is found in the second test\n---
|
unless ($found) {
|
||||||
";
|
say MISSING "$ref $candidate";
|
||||||
if ($trueText =~ /^(.*)\b$thisEntry[^\w']/i || $trueText =~ /^(.*)\b($thisEntry)["']/i || $trueText =~ /^(.*)["']($thisEntry)\b/i) {
|
say LOG "Breakout: \$ref: $ref\t\$candidate: $candidate"
|
||||||
$position = length $1;
|
|
||||||
}
|
|
||||||
$snippetSequence{$position} = "$thisEntry,$dir{$pagesThisEntry{$thisEntry}},$pagesThisEntry{$thisEntry}";
|
|
||||||
say LOG "\$snippetSequence{$position}: $snippetSequence{$position}";
|
|
||||||
$found = 1;
|
|
||||||
goto Breakout;
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
say LOG "\$thisEntry |$thisEntry| is not found in |$ref|";
|
|
||||||
}
|
}
|
||||||
|
next if $found;
|
||||||
}
|
}
|
||||||
Breakout:
|
$sequence = $specifiedEntries{$ref};
|
||||||
unless ($found) {
|
$sequence =~ s/[GH]\d+≈([^≈]*)≈([^≈√]*)/$1\t$2/g;
|
||||||
say MISSING "$ref $candidate";
|
|
||||||
say LOG "Breakout: \$ref: $ref\t\$candidate: $candidate"
|
|
||||||
}
|
|
||||||
next if $found;
|
|
||||||
}
|
|
||||||
$sequence = $specifiedEntries{$ref};
|
|
||||||
$sequence =~ s/[GH]\d+≈([^≈]*)≈([^≈√]*)/$1\t$2/g;
|
|
||||||
}
|
}
|
||||||
foreach my $key (sort {$a <=> $b} (keys %snippetSequence)) {
|
foreach my $key (sort {$a <=> $b} (keys %snippetSequence)) {
|
||||||
$snippetSequence{$key} =~ s/\(([^\)]*)\)\\b.*?\\b\(([^\)]*)\)\\b.*?\\b\(([^\)]*)\)/$1 ... $2 ... $3/;
|
$snippetSequence{$key} =~ s/\(([^\)]*)\)\\b.*?\\b\(([^\)]*)\)\\b.*?\\b\(([^\)]*)\)/$1 ... $2 ... $3/;
|
||||||
$snippetSequence{$key} =~ s/\(([^\)]*)\)\\b.*?\\b\(([^\)]*)\)/$1 ... $2/;
|
$snippetSequence{$key} =~ s/\(([^\)]*)\)\\b.*?\\b\(([^\)]*)\)/$1 ... $2/;
|
||||||
say LOG "---\n\$key: $key\t\$snippetSequence{$key}: $snippetSequence{$key}\n===\n";
|
say LOG "---\n\$key: $key\t\$snippetSequence{$key}: $snippetSequence{$key}\n===";
|
||||||
|
$snippetSequence{$key} =~ s/(\\b\.\*\?|\.\*\?\\b)//g;
|
||||||
|
$snippetSequence{$key} =~ s/\\b\(\.\*\?\)\\b/ ... /g;
|
||||||
$thisCVOutString .= "$outputFormRef,$snippetSequence{$key}\n"
|
$thisCVOutString .= "$outputFormRef,$snippetSequence{$key}\n"
|
||||||
}
|
}
|
||||||
|
say LOG "<7.5>\t\$thisCVOutString\n$thisCVOutString";
|
||||||
return $thisCVOutString
|
return $thisCVOutString
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue