update for MAST PDF
This commit is contained in:
parent
50cdb84f2c
commit
961f0b15e4
File diff suppressed because it is too large
Load Diff
|
@ -15,7 +15,7 @@ if ($^O eq "darwin" || $^O eq "linux") {$d = "/"}
|
|||
|
||||
my (@deletes);
|
||||
|
||||
open LOG, ">:utf8", "$Bin${d}Logs${d}log.log" or die;
|
||||
open LOG, ">:utf8", "$Bin${d}Logs${d}Consistent_deletes.log" or die;
|
||||
|
||||
# my $fileText = read_file("Exceptions${d}Consistent_Deletes.txt", binmode => 'utf8');
|
||||
#say LOG $fileText;
|
||||
|
@ -37,6 +37,7 @@ open LOG, ">:utf8", "$Bin${d}Logs${d}log.log" or die;
|
|||
# $fileText =~ s/(\t$thisNo)(\r?\n)/$1\t||$2/g;
|
||||
#}
|
||||
foreach my $thisNo (sort keys %obviates) {
|
||||
say LOG "$obviates{$thisNo}";
|
||||
$fileText =~ s/(\t$obviates{$thisNo})(\r?\n)/$1\t||$2/g;
|
||||
}
|
||||
|
||||
|
|
|
@ -85,6 +85,7 @@ close OUT3;
|
|||
|
||||
#====
|
||||
|
||||
|
||||
close LOG;
|
||||
print "\n\tDone.\n\n";
|
||||
|
||||
|
@ -224,6 +225,10 @@ sub ParseLine {
|
|||
|
||||
|
||||
sub FindVerseInGlossed {
|
||||
system "grep '$book_name,$ch,$vs' /Users/Henry/Documents/WACS/Tips_and_Hacks/MAST_tW_PDF_Updater/FilesForUpdates/Output/tWs_for_PDF.txt > Temp/temp.tmp; perl -i -pe 's/^(.*)\$/<p>\$1<\\/p>/' Temp/temp.tmp";
|
||||
|
||||
my $soFar = read_file "Temp/temp.tmp";
|
||||
|
||||
open OUT, ">:utf8", "$results_file" or die;
|
||||
|
||||
say OUT "<!DOCTYPE html>
|
||||
|
@ -298,6 +303,8 @@ sub FindVerseInGlossed {
|
|||
}
|
||||
|
||||
say OUT "
|
||||
<hr/>
|
||||
$soFar
|
||||
</body>
|
||||
</html>";
|
||||
close OUT;
|
||||
|
|
|
@ -1 +1,3 @@
|
|||
Daniel 10:19 2530
|
||||
<p>Genesis,33,11,God,kt,god</p>
|
||||
<p>Genesis,33,11,graciously,kt,grace</p>
|
||||
<p>Genesis,33,11,urged,kt,exhort</p>
|
||||
|
|
|
@ -53,7 +53,7 @@ my ($sourceFile, $topTwDir, $textEditor, $repoPath, $topSourceLangDir, $topOTSou
|
|||
my (%entries, %ULBtext, %orderRef, %order, %pages, %listOfPages, %sourcePage, %checkPages, %foundPages, %substitutedPages,
|
||||
%specifiedText, %realPage, %checkPage, %workEntries, %vsn, %StrongNum, %fullText, %ulbOrder, %bkAbr, %bkFull, %relevantSNs,
|
||||
%relevantSNsInCV, %entriesThisSN, %entriesThisPage, %pagesThisSN, %pagesThisEntry, %adjust, %deleteNum, %newRef,
|
||||
%specifiedEntries, %finalOutput, %TWDir, %tempRef, %dir);
|
||||
%specifiedEntries, %finalOutput, %TWDir, %tempRef, %dir, %rawDir);
|
||||
|
||||
my ($book, $testament);
|
||||
my (@fileList);
|
||||
|
@ -203,16 +203,17 @@ sub PairtWEntriesTotWPageAndUniqSNs {
|
|||
print ".";
|
||||
$file =~ s/\//\\/g unless $^O eq "linux" || $^O eq "darwin";
|
||||
my ($thisList, $shortFile) = ("", $file);
|
||||
print LOG "\$file: $file\t\$shortFile: $shortFile => ";
|
||||
print LOG "\$file: $file\n\$shortFile: $shortFile";
|
||||
$shortFile =~ s/^\Q$topTwDir${d}\E//;
|
||||
print LOG "$shortFile => ";
|
||||
print LOG " =1> $shortFile";
|
||||
$shortFile =~ s/\.md$//;
|
||||
print LOG "$shortFile => ";
|
||||
print LOG " =2> $shortFile";
|
||||
$shortFile =~ s/\Q$d\E/,/;
|
||||
if ($file =~ /\/([^\/]*)\/[^\/]*\.md/) {
|
||||
$dir{$shortFile} = $1
|
||||
if ($file =~ /\/([^\/]*)\/([^\/]*)\.md/) {
|
||||
$dir{$shortFile} = $1;
|
||||
$rawDir{$2} = $1;
|
||||
}
|
||||
say LOG "<4>\$shortFile: $shortFile\t\$dir{$shortFile}: $dir{$shortFile}";
|
||||
say LOG " =3> \$shortFile: $shortFile\t\$dir{$shortFile}: $dir{$shortFile}";
|
||||
#say "|$shortFile|"; die;
|
||||
#if ($shortFile =~ /^(kt|names)/) {
|
||||
#my $fileText = read_file("$file", binmode => 'utf8');
|
||||
|
@ -434,87 +435,92 @@ sub ExecuteProcessSequence {
|
|||
# if specified tW
|
||||
say LOG "=====\n\$ref: $ref\t\$candidate: $candidate\t\$entriesThisSN{$candidate}: $entriesThisSN{$candidate}\n$tempText";
|
||||
my ($found, $sn, $ulbWord, $tWpage);
|
||||
my( $staticCandidate) = $candidate;
|
||||
if ($candidate =~ /([^≈]*)≈([^≈]*)≈([^≈]*)/) {
|
||||
# get position in true text to array
|
||||
# delete found text from temp text
|
||||
($sn, $ulbWord, $tWpage) = ($1,$2,$3);
|
||||
($sn, $ulbWord, $staticCandidate, $tWpage) = ($1, $2, $2, $3);
|
||||
while ($ulbWord =~ s/^(.*) \.\.\. (.*) \.\.\. (.*)/($1)\\b(.*?)\\b($2)\\b(.*?)\\b($3)/) {}
|
||||
while ($ulbWord =~ s/^(.*) \.\.\. (.*)/($1)\\b(.*?)\\b($2)/) {}
|
||||
say LOG "<A>\t\$ulbWord: $ulbWord";
|
||||
say LOG "<A>\t\$ulbWord: $ulbWord \$sourcePage{$staticCandidate}: $sourcePage{$staticCandidate}\t\$rawDir{\$tWpage}: $rawDir{$tWpage} \$dir{\$tWpage}: $dir{$tWpage}";
|
||||
if ($tempText =~ s/^(.*)\b$ulbWord\b(.*)$/$1$2/) {
|
||||
$position = length $1;
|
||||
$snippetSequence{$position} = "$ulbWord,$dir{$tWpage},$tWpage";
|
||||
$snippetSequence{$position} = "$ulbWord,$sourcePage{$staticCandidate}";
|
||||
}
|
||||
}
|
||||
# else
|
||||
else {
|
||||
my @possibleEntries = split /, /, $entriesThisSN{$candidate};
|
||||
@possibleEntries = reverse sort { substr($a,0,1) <=> substr($b,0,1)
|
||||
|| length($a) <=> length($b)
|
||||
|| $a <=> $b }
|
||||
@possibleEntries;
|
||||
foreach my $thisEntry (@possibleEntries) {
|
||||
# for each possible entry
|
||||
print LOG "-----\n\$thisEntry: $thisEntry\t=>\t";
|
||||
while ($thisEntry =~ s/^(.*) \.\.\. (.*)/$1\\b(.*?)\\b$2/) {}
|
||||
say LOG "$thisEntry\t\$pagesThisEntry{$thisEntry}: $pagesThisEntry{$thisEntry}";
|
||||
# if tW entry matches ULB text
|
||||
# get ULB snippet to verse match list
|
||||
# get position in true text to array
|
||||
# delete found text from temp text
|
||||
if ($thisEntry =~ /\(\.\*\?\)/ && $tempText =~ /$thisEntry/i) {
|
||||
say LOG "<7>\t\$thisEntry |$thisEntry| is found in the first test";
|
||||
my ($first, $second, $third, $fourth, $fifth);
|
||||
if ($tempText =~ /^(.*)\b($thisEntry)\b(.*)$/i) {
|
||||
($first, $second, $third, $fourth, $fifth) = ($!, $2, $3, $4, $5);
|
||||
say LOG "<7.1>\t\$first: $first \$second: $second \$third: $third \$fourth: $fourth \$fifth: $fifth ";
|
||||
if ($fifth ne "") {
|
||||
$tempText =~ s/^(.*)\b($thisEntry)\b(.*)$/$1$4$5/i
|
||||
my @possibleEntries = split /, /, $entriesThisSN{$candidate};
|
||||
@possibleEntries = reverse sort { substr($a,0,1) <=> substr($b,0,1)
|
||||
|| length($a) <=> length($b)
|
||||
|| $a <=> $b }
|
||||
@possibleEntries;
|
||||
foreach my $thisEntry (@possibleEntries) {
|
||||
# for each possible entry;
|
||||
my $staticEntry = $thisEntry;
|
||||
print LOG "-----\n\$thisEntry: $thisEntry\t=>\t";
|
||||
while ($thisEntry =~ s/^(.*) \.\.\. (.*)/$1\\b(.*?)\\b$2/) {}
|
||||
say LOG "$thisEntry\t\$pagesThisEntry{$staticEntry}: $pagesThisEntry{$staticEntry}";
|
||||
# if tW entry matches ULB text
|
||||
# get ULB snippet to verse match list
|
||||
# get position in true text to array
|
||||
# delete found text from temp text
|
||||
if ($thisEntry =~ /\(\.\*\?\)/ && $tempText =~ /$thisEntry/i) {
|
||||
say LOG "<7>\t\$thisEntry |$thisEntry| is found in the first test";
|
||||
my ($first, $second, $third, $fourth, $fifth);
|
||||
if ($tempText =~ /^(.*)\b($thisEntry)\b(.*)$/i) {
|
||||
($first, $second, $third, $fourth, $fifth) = ($!, $2, $3, $4, $5);
|
||||
say LOG "<7.1>\t\$first: $first \$second: $second \$third: $third \$fourth: $fourth \$fifth: $fifth ";
|
||||
if ($fifth ne "") {
|
||||
$tempText =~ s/^(.*)\b($thisEntry)\b(.*)$/$1$4$5/i
|
||||
}
|
||||
else {
|
||||
$tempText =~ s/^(.*)\b($thisEntry)\b(.*)$/$1$3$4/i
|
||||
}
|
||||
|
||||
}
|
||||
else {
|
||||
$tempText =~ s/^(.*)\b($thisEntry)\b(.*)$/$1$3$4/i
|
||||
if ($trueText =~ /^(.*)\b($thisEntry)\b.*$/) {
|
||||
$position = length $1;
|
||||
say LOG "<7.2>\t\$position: $position"
|
||||
}
|
||||
|
||||
$snippetSequence{$position} = "$thisEntry,$pagesThisEntry{$staticEntry}";
|
||||
$found = 1;
|
||||
goto Breakout;
|
||||
}
|
||||
if ($trueText =~ /^(.*)\b($thisEntry)\b.*$/) {
|
||||
$position = length $1;
|
||||
say LOG "<7.2>\t\$position: $position"
|
||||
elsif ($tempText =~ s/\b($thisEntry)[^\w']//i || $tempText =~ s/\b($thisEntry)["']//i || $tempText =~ s/["']($thisEntry)\b//i) {
|
||||
say LOG "<7.3>\t\$thisEntry |$thisEntry| is found in the second test\n---
|
||||
";
|
||||
if ($trueText =~ /^(.*)\b$thisEntry[^\w']/i || $trueText =~ /^(.*)\b($thisEntry)["']/i || $trueText =~ /^(.*)["']($thisEntry)\b/i) {
|
||||
$position = length $1;
|
||||
}
|
||||
$snippetSequence{$position} = "$thisEntry,$pagesThisEntry{$thisEntry}";
|
||||
say LOG "<7.4>\t\$snippetSequence{$position}: $snippetSequence{$position}";
|
||||
$found = 1;
|
||||
goto Breakout;
|
||||
}
|
||||
else {
|
||||
say LOG "\$thisEntry |$thisEntry| is not found in |$ref|";
|
||||
}
|
||||
$snippetSequence{$position} = "$thisEntry,$dir{$pagesThisEntry{$thisEntry}},$pagesThisEntry{$thisEntry}";
|
||||
$found = 1;
|
||||
goto Breakout;
|
||||
}
|
||||
elsif ($tempText =~ s/\b($thisEntry)[^\w']//i || $tempText =~ s/\b($thisEntry)["']//i || $tempText =~ s/["']($thisEntry)\b//i) {
|
||||
say LOG "\$thisEntry |$thisEntry| is found in the second test\n---
|
||||
";
|
||||
if ($trueText =~ /^(.*)\b$thisEntry[^\w']/i || $trueText =~ /^(.*)\b($thisEntry)["']/i || $trueText =~ /^(.*)["']($thisEntry)\b/i) {
|
||||
$position = length $1;
|
||||
}
|
||||
$snippetSequence{$position} = "$thisEntry,$dir{$pagesThisEntry{$thisEntry}},$pagesThisEntry{$thisEntry}";
|
||||
say LOG "\$snippetSequence{$position}: $snippetSequence{$position}";
|
||||
$found = 1;
|
||||
goto Breakout;
|
||||
}
|
||||
else {
|
||||
say LOG "\$thisEntry |$thisEntry| is not found in |$ref|";
|
||||
Breakout:
|
||||
unless ($found) {
|
||||
say MISSING "$ref $candidate";
|
||||
say LOG "Breakout: \$ref: $ref\t\$candidate: $candidate"
|
||||
}
|
||||
next if $found;
|
||||
}
|
||||
Breakout:
|
||||
unless ($found) {
|
||||
say MISSING "$ref $candidate";
|
||||
say LOG "Breakout: \$ref: $ref\t\$candidate: $candidate"
|
||||
}
|
||||
next if $found;
|
||||
}
|
||||
$sequence = $specifiedEntries{$ref};
|
||||
$sequence =~ s/[GH]\d+≈([^≈]*)≈([^≈√]*)/$1\t$2/g;
|
||||
$sequence = $specifiedEntries{$ref};
|
||||
$sequence =~ s/[GH]\d+≈([^≈]*)≈([^≈√]*)/$1\t$2/g;
|
||||
}
|
||||
foreach my $key (sort {$a <=> $b} (keys %snippetSequence)) {
|
||||
$snippetSequence{$key} =~ s/\(([^\)]*)\)\\b.*?\\b\(([^\)]*)\)\\b.*?\\b\(([^\)]*)\)/$1 ... $2 ... $3/;
|
||||
$snippetSequence{$key} =~ s/\(([^\)]*)\)\\b.*?\\b\(([^\)]*)\)/$1 ... $2/;
|
||||
say LOG "---\n\$key: $key\t\$snippetSequence{$key}: $snippetSequence{$key}\n===\n";
|
||||
say LOG "---\n\$key: $key\t\$snippetSequence{$key}: $snippetSequence{$key}\n===";
|
||||
$snippetSequence{$key} =~ s/(\\b\.\*\?|\.\*\?\\b)//g;
|
||||
$snippetSequence{$key} =~ s/\\b\(\.\*\?\)\\b/ ... /g;
|
||||
$thisCVOutString .= "$outputFormRef,$snippetSequence{$key}\n"
|
||||
}
|
||||
say LOG "<7.5>\t\$thisCVOutString\n$thisCVOutString";
|
||||
return $thisCVOutString
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue