Latest work.
This commit is contained in:
parent
8acc11fac2
commit
779b351201
|
@ -51488,6 +51488,7 @@ Matthew 12:17 G2036 ||
|
|||
Matthew 12:17 G3004 ||
|
||||
Matthew 12:18 G2532 ||
|
||||
Matthew 12:18 G5087 ||
|
||||
Matthew 12:19 G2051 ||
|
||||
Matthew 12:20 G1544 ||
|
||||
Matthew 12:20 G2532 ||
|
||||
Matthew 12:21 G2532 ||
|
||||
|
|
|
@ -14,13 +14,12 @@ use FindBin '$Bin';
|
|||
use Cwd ;
|
||||
|
||||
my ($workDir, $d) = ($Bin, "\\");
|
||||
my ($language, $textEditor, $repoPath, $browser, $blbRef, $intrln_ref, $tN_page, $tW_list, $html_wrap) = ($Bin, "\\");
|
||||
my ($language, $textEditor, $repoPath, $browser, $blbRef, $intrln_ref, $tN_page, $tW_list, $html_wrap);
|
||||
my (%abbr, %bkno);
|
||||
|
||||
if ($^O eq "darwin" || $^O eq "linux") {$d = "/"}
|
||||
|
||||
open LOG, ">:utf8", "$Bin${d}Logs${d}mine_log.log" or die ("$!:\n$Bin$[d}Logs${d}mine_log.log");
|
||||
|
||||
#===
|
||||
|
||||
open OUT3, ">strongs.sh" or die "$!";
|
||||
|
@ -57,7 +56,7 @@ GetUserDefaults();
|
|||
my ($exceptions_file, $topDir, $ULB_and_OL, $dataFile, $results_file, $filePattern) = (
|
||||
"$Bin${d}Exceptions${d}Exceptions.txt",
|
||||
#"$repoPath${d}en_tw/bible",
|
||||
"$repoPath${d}Restructure/bible/kt",
|
||||
"$repoPath${d}Restructure/bible",
|
||||
"$Bin${d}Temp${d}ULB_OL_Strongs.txt",
|
||||
"$Bin${d}User${d}tW_work.txt",
|
||||
"$Bin${d}Temp${d}mine_results.html",
|
||||
|
@ -70,25 +69,11 @@ my @tWfiles;
|
|||
|
||||
find( sub { push @tWfiles, $File::Find::name if ( m/^(.*)$filePattern$/ ) }, $topDir) ;
|
||||
|
||||
open OUT, ">:utf8", "$results_file" or die;
|
||||
|
||||
say OUT "<!DOCTYPE html>
|
||||
<html>
|
||||
<head>
|
||||
<meta charset=\"UTF-8\"/>
|
||||
</head>
|
||||
<body>
|
||||
";
|
||||
|
||||
ParseLine();
|
||||
FindVerseInGlossed($ref);
|
||||
ChecktWPages($word);
|
||||
Finish();
|
||||
|
||||
say OUT "
|
||||
</body>
|
||||
</html>";
|
||||
close OUT;
|
||||
die;
|
||||
#ChecktWPages();
|
||||
#Finish();
|
||||
|
||||
say LOG "\$tW_list\$tN_page: $tW_list$tN_page, \$language: $language, \$strong: $strong";
|
||||
|
||||
|
@ -240,6 +225,15 @@ sub ParseLine {
|
|||
|
||||
|
||||
sub FindVerseInGlossed {
|
||||
open OUT, ">:utf8", "$results_file" or die;
|
||||
|
||||
say OUT "<!DOCTYPE html>
|
||||
<html>
|
||||
<head>
|
||||
<meta charset=\"UTF-8\"/>
|
||||
</head>
|
||||
<body>
|
||||
";
|
||||
|
||||
say LOG "\$ULB_and_OL: $ULB_and_OL";
|
||||
|
||||
|
@ -259,83 +253,83 @@ sub FindVerseInGlossed {
|
|||
if ($fileText =~ /$ref(\t[^\n]*\n[^\n]*<)($sn)(.?>[^\n]*\n)/) {
|
||||
#my ($fore, $sn, $aft) = ($1, $2, $3);
|
||||
my $found = $&;
|
||||
say LOG "\$found: $found";
|
||||
$found =~ s/</</g;
|
||||
$found =~ s/>/>/g;
|
||||
$found =~ s/\n/<br \/> /g;
|
||||
$found =~ s/$sn/<span style=\"color:red\">$sn<\/span>/g;
|
||||
say OUT "<p>$found</p>\n";
|
||||
} elsif ($fileText =~ /$ref[^\n]*\n[^\n]*\n/) {
|
||||
}
|
||||
elsif ($fileText =~ /$ref[^\n]*\n[^\n]*\n/) {
|
||||
my $display = $&;
|
||||
$display =~ s/</</g;
|
||||
$display =~ s/>/>/g;
|
||||
$display =~ s/\n/<br \/> /g;
|
||||
say OUT "<p>The Strong's code <<<span style=\"color:red\">$sn</span>>> is not found in OGNT or MAST-HB in $ref.<br /><br />$display</p>";
|
||||
#system ("bbfind -g \"${ref}\\t[^\\n]*\\n[^\\n]*\" 'data${d}ULB_NASB_Strongs.txt'") or die "$!";
|
||||
}
|
||||
|
||||
my $dump;
|
||||
$dump = "<p></p><p>";
|
||||
foreach my $file ( @tWfiles ) {
|
||||
my $fileText = read_file("$file", binmode => 'utf8');
|
||||
# While finds entries and Strong's numbers
|
||||
while ($fileText =~ /($sn)[^\d]/g) {
|
||||
my $found = $1;
|
||||
#system `clear`;
|
||||
my $abb = $file;
|
||||
$abb =~ s/.md$//;
|
||||
$dump .= "$abb: <span style=color:red>$found</span><br />\n";
|
||||
}
|
||||
my $dump = "<p></p><p>";
|
||||
foreach my $file ( @tWfiles ) {
|
||||
my $fileText = read_file("$file", binmode => 'utf8');
|
||||
my $entriesThisFile;
|
||||
if ($fileText =~ /## Forms Found in the English ULB:\n\n([^\n]*)\n/) {
|
||||
$entriesThisFile = $1
|
||||
}
|
||||
|
||||
say $sn;
|
||||
|
||||
if ($^O eq "darwin") {system `$browser https://www.blueletterbible.org/lang/lexicon/lexicon.cfm?strongs=$sn`}
|
||||
# While finds entries and Strong's numbers
|
||||
while ($fileText =~ /($sn)[^\d]/g) {
|
||||
my $found = $1;
|
||||
#system `clear`;
|
||||
my $abb = $file;
|
||||
#$abb =~ s/.md$//;
|
||||
$dump .= "$abb: <span style=color:red>$found</span><br />$entriesThisFile<br />";
|
||||
}
|
||||
}
|
||||
|
||||
$strong =~ s/^[HG]//;
|
||||
say OUT "$dump</p><p>$sn</p>";
|
||||
say $sn;
|
||||
|
||||
if ($^O eq "darwin") {system `$browser https://www.blueletterbible.org/lang/lexicon/lexicon.cfm?strongs=$sn`}
|
||||
|
||||
#system ("find $topDir -name \"*.md\" -exec grep -H --color \"$strong\[, \\n\\r\]\" {} \\;");
|
||||
#system ("find $topDir -name \"*.md\" -exec grep -H --color \"$strong\$\" {} \\;");
|
||||
}
|
||||
$strong =~ s/^[HG]//;
|
||||
say OUT "$dump</p><p>$sn</p>";
|
||||
|
||||
#system ("find $topDir -name \"*.md\" -exec grep -H --color \"$strong\[, \\n\\r\]\" {} \\;");
|
||||
#system ("find $topDir -name \"*.md\" -exec grep -H --color \"$strong\$\" {} \\;");
|
||||
}
|
||||
if ($word eq "") {
|
||||
say "There is no \$word in $ref\n$thisULB";
|
||||
if ($^O eq "darwin") {
|
||||
system `$browser https://www.blueletterbible.org/lang/lexicon/lexicon.cfm?strongs=$sn`;
|
||||
}
|
||||
if ($^O eq "linux") {
|
||||
#system "chromium https://www.blueletterbible.org/lang/lexicon/lexicon.cfm?strongs=H$strong &";
|
||||
#system "xdg-open https://www.blueletterbible.org/lang/lexicon/lexicon.cfm?strongs=$sn &";
|
||||
}
|
||||
if ($^O eq "MSWin32" || $^O eq "MSWin64" ) {
|
||||
system "START \"\" https://www.blueletterbible.org/lang/lexicon/lexicon.cfm?strongs=$sn";
|
||||
}
|
||||
die
|
||||
}
|
||||
return $word;
|
||||
|
||||
say OUT "
|
||||
</body>
|
||||
</html>";
|
||||
close OUT;
|
||||
|
||||
}
|
||||
|
||||
sub ChecktWPages{
|
||||
|
||||
say LOG "<<$word>>";
|
||||
if ($word eq "") {
|
||||
say "\nThere is no \$word for \$strong = $sn in $ref\n$thisULB\n";
|
||||
die
|
||||
}
|
||||
#my $topDir = "$repoPath${d}en_tw${d}bible";
|
||||
my $topDir = "$repoPath${d}Restructure${d}bible${d}kt";
|
||||
my $topDir = "$repoPath${d}Restructure${d}bible";
|
||||
say OUT "<p>";
|
||||
my $entriesThisWord;
|
||||
foreach my $file ( @tWfiles ) {
|
||||
my $fileText = read_file("$file", binmode => 'utf8');
|
||||
my $foundHere;
|
||||
# While finds entries and Strong's numbers
|
||||
while ($fileText =~ /($sn)[^\d]|^(Forms found in the English ULB:\n\n# [^\n]*\b$word\b)/g) {
|
||||
$foundHere = 1;
|
||||
#system `clear`;
|
||||
my $abb = $file;
|
||||
$abb =~ s/.md$//;
|
||||
say OUT "$abb<br />\n";
|
||||
if ($fileText =~ /$sn/) {
|
||||
if ($fileText =~ /## Forms Found in the English ULB:\n\n([^\n]*)\n/) {
|
||||
$entriesThisWord = $1;
|
||||
}
|
||||
|
||||
}
|
||||
if ($foundHere == 1) {unless (exists $tW_file{$file}) {$tW_file{$file} = $file}}
|
||||
|
||||
# my $foundHere;
|
||||
# # While finds entries and Strong's numbers
|
||||
# while ($fileText =~ /($sn)[^\d]|^(Forms found in the English ULB:\n\n# [^\n]*\b$word\b)/g) {
|
||||
# $foundHere = 1;
|
||||
# #system `clear`;
|
||||
# my $abb = $file;
|
||||
# #$abb =~ s/.md$//;
|
||||
# say OUT "$abb<br />\n";
|
||||
# }
|
||||
# if ($foundHere == 1) {unless (exists $tW_file{$file}) {$tW_file{$file} = $file}}
|
||||
}
|
||||
say OUT "</p>";
|
||||
}
|
||||
|
|
Binary file not shown.
|
@ -1,673 +0,0 @@
|
|||
# Produces list of tWs for each verse by linking MAST_NT to ULB through tWs.
|
||||
# Disambiguates entries found on more than one tW page.
|
||||
# Does both Testaments.
|
||||
|
||||
# Taken from tWs.from.MAST_NT.2.pl.
|
||||
|
||||
# The output from this script is useful for the interleaved PDFs used in MAST.
|
||||
# This version uses an exception file to handle places where the MAST_NT points to
|
||||
# a tW page different from that on which the ULB term appears.
|
||||
|
||||
# Make sure the correct input file is $ULBfile. Run script.
|
||||
# Output is in $output file.
|
||||
# Check the $missing
|
||||
# file for needed corrections, probably lines needing to be added to the
|
||||
# $exceptions file.
|
||||
|
||||
use 5.12.0;
|
||||
use File::Slurp;
|
||||
use File::Find ;
|
||||
use Cwd ;
|
||||
use FindBin '$Bin';
|
||||
use utf8;
|
||||
#use open IN => ":utf8", OUT => ":utf8";
|
||||
use open IO => ":utf8";
|
||||
use List::MoreUtils qw(uniq);
|
||||
$|=1;
|
||||
$"="\n";
|
||||
|
||||
my ($pwd, $d) = ($Bin, "/");
|
||||
if ($^O eq "MSWin32") {
|
||||
$d = "\\";
|
||||
$pwd =~ s/\//\\/g;
|
||||
}
|
||||
|
||||
my ($udf) = "User_defaults.windows.txt";
|
||||
if ($^O eq "linux") {$udf = "User_defaults.linux.txt"}
|
||||
#elsif ($^O eq "darwin") {$udf = "User_defaults.mac.txt"}
|
||||
elsif ($^O eq "darwin") {$udf = "User_defaults.mac.txt"}
|
||||
|
||||
open (my $defaults, "<:utf8", "$pwd${d}User${d}$udf") or die "$pwd${d}User${d}$udf:\n$!";
|
||||
|
||||
my ($cv, $ULBfile, $exceptions, $missing, $output, $workFile) =
|
||||
("00000",
|
||||
"$Bin${d}Temp${d}Extract.txt",
|
||||
"$Bin${d}Exceptions${d}Exceptions.txt",
|
||||
"$Bin${d}Output${d}Entries_not_handled.txt",
|
||||
"$Bin${d}Output${d}tWs_for_PDF.txt",
|
||||
"$Bin${d}User${d}tW_work.txt",
|
||||
);
|
||||
|
||||
my ($sourceFile, $topTwDir, $textEditor, $repoPath, $topSourceLangDir, $topOTSourceLangDir, $topNTSourceLangDir, $twPath, $outString);
|
||||
|
||||
my (%entries, %text, %ref, %order, %pages, %listOfPages, %sourcePage, %checkPages, %foundPages, %substitutedPages,
|
||||
%specifiedText, %realPage, %checkPage, %workEntries, %vsn, %StrongNum, %fullText, %ulbOrder, %bkAbr, %bkFull, %relevantSNs,
|
||||
%SNsInCV, %entriesThisSN, %entriesThisPage, %pagesThisSN, %pagesThisEntry, %adjust, %deleteNum, %newRef, %specifiedEntries, %finalOutput, %searchSequence);
|
||||
|
||||
my ($book, $testament);
|
||||
my (@fileList);
|
||||
|
||||
# ==============================
|
||||
|
||||
chdir("$pwd");
|
||||
open LOG, ">:utf8", "Logs${d}Exc_log.log" or die "\$log: Logs${d}Exc_log.log: $!";
|
||||
open OUT, ">:utf8", $output or die "$!";
|
||||
open MISSING, ">$missing" or die "$!";
|
||||
|
||||
while (<DATA>) {
|
||||
chomp;
|
||||
if (/([^\t]*)\t([^\t]*)/) {
|
||||
$bkAbr{$2} = $1;
|
||||
$bkFull{$1} = $2;
|
||||
} elsif (/^..$/) {
|
||||
$testament = $&;
|
||||
}
|
||||
}
|
||||
|
||||
GetUserDefaults();
|
||||
GetULBBooksToProcess();
|
||||
ReadExceptions();
|
||||
close LOG;
|
||||
open LOG, ">:utf8", "Logs${d}tW_pairs.log" or die "Logs${d}tW_pairs.log: $!";
|
||||
PairtWEntriesTotWPageAndUniqSNs();
|
||||
close LOG;
|
||||
open LOG, ">:utf8", "Logs${d}tWs_from_MAST.log" or die "Logs${d}tWs_from_MAST.log: $!";
|
||||
ReadLinkedSNs();
|
||||
LinkULBtoCV();
|
||||
LinkSNsToULBtextViaEntries();
|
||||
#Output();
|
||||
|
||||
close MISSING;
|
||||
close OUT;
|
||||
close LOG;
|
||||
|
||||
if ($^O eq "darwin") {system ("$textEditor $missing")}
|
||||
|
||||
print "\n\tDone.\n\n";
|
||||
|
||||
# ==============================
|
||||
|
||||
sub GetUserDefaults {
|
||||
open (my $defaults, "<:utf8", "User${d}$udf") or die "User${d}$udf:\n$!";
|
||||
|
||||
while (my $thisLine = <$defaults>) {
|
||||
chomp $thisLine;
|
||||
if ($thisLine =~ /^Text editor: (.*)$/) {
|
||||
$textEditor = $1;
|
||||
if ($^O eq "darwin") {
|
||||
$textEditor = "open -a $textEditor"
|
||||
}
|
||||
} elsif ($thisLine =~ /^Repository directory: (.*)$/) {
|
||||
$repoPath = $1;
|
||||
#say $repoPath; die;
|
||||
} elsif ($thisLine =~ /^translationWords path: (.*)$/) {
|
||||
$twPath = $1;
|
||||
}
|
||||
}
|
||||
|
||||
#say LOG "\$textEditor: $textEditor\n\$repoPath: $repoPath";
|
||||
die "No text editor found" if $textEditor eq "";
|
||||
die "No path to repo found" if $repoPath eq "";
|
||||
|
||||
($topTwDir, $topOTSourceLangDir, $topNTSourceLangDir) = ("$repoPath${d}$twPath", "$repoPath${d}MAST_HB", "$repoPath${d}OGNT");
|
||||
|
||||
close $defaults;
|
||||
}
|
||||
|
||||
sub GetULBBooksToProcess {
|
||||
say LOG "GetULBBooksToProcess on \$workFile: $workFile";
|
||||
open (my $file, "<:utf8", "$workFile") or die "$workFile:\n$!";
|
||||
|
||||
while (my $line = <$file>) {
|
||||
chomp $line;
|
||||
#say LOG "\t$line";
|
||||
if ($line =~ /^([^#][^\n\t]*)\t[^\n\t]*\t([^\n\t]*)\t[^\n\t]*$/) {
|
||||
my ($bn, $bx) = ($1, $2);
|
||||
my ($this_bk) = $bn . "-" . uc $bx;
|
||||
if ($bn > 39) {
|
||||
$topSourceLangDir = $topNTSourceLangDir
|
||||
} else {
|
||||
$topSourceLangDir = $topOTSourceLangDir
|
||||
}
|
||||
|
||||
$sourceFile = "$topSourceLangDir${d}$this_bk.xml";
|
||||
push @fileList, $sourceFile;
|
||||
}
|
||||
}
|
||||
|
||||
close $file;
|
||||
#say LOG "\@fileList:\n@fileList";
|
||||
}
|
||||
|
||||
sub ReadExceptions {
|
||||
say "Reading exceptions";
|
||||
say LOG "ReadExceptions from \$exceptions: $exceptions";
|
||||
open (my $file, "<:utf8", "$exceptions") or die "$exceptions:\n$!";
|
||||
#say LOG "<><>";
|
||||
while (my $line = <$file>) {
|
||||
chomp $line;
|
||||
#say LOG $line;
|
||||
my $rf;
|
||||
if ($line =~ /^([^#\n][^\t\n]*)\t([GH]\d+)\t\|\|$/) {
|
||||
my ($oldNew) = ($2);
|
||||
$rf = $1;
|
||||
#say LOG "\$line: $line, \$rf: $rf, \$oldNew: $oldNew";
|
||||
($deleteNum{$rf}) .= "$oldNew√";
|
||||
$specifiedText{$rf} = 1;
|
||||
#say LOG "\$specifiedText{$rf}: $specifiedText{$rf}";
|
||||
} elsif ($line =~ /^([^#\n][^\t\n]*)\t(\d+\t\d+)/) {
|
||||
my ($oldNew) = ($2);
|
||||
$rf = $1;
|
||||
#say LOG "\$line: $line, \$rf: $rf, \$oldNew: $oldNew";
|
||||
($adjust{$rf}) .= "$oldNew√";
|
||||
$specifiedText{$rf} = 1;
|
||||
}
|
||||
elsif ($line =~ /^([^#\n\t][^\t\n]*)\t(.\d+)\t([^\t\n]*)\t([^\t\n]*)$/) {
|
||||
my ($rf, $sn, $snippet, $page) = ($1, $2, $3, $4);
|
||||
#say LOG "\$line: $line, \$rf: $rf, \$oldNew: $oldNew";
|
||||
$specifiedEntries{$rf} .= "$sn≈$snippet≈$page√";
|
||||
$specifiedText{$rf} = 1;
|
||||
}
|
||||
|
||||
}
|
||||
close $file;
|
||||
}
|
||||
|
||||
sub PairtWEntriesTotWPageAndUniqSNs {
|
||||
say "Pairing tW entries with tW pages and unique Strong's numbers";
|
||||
my (@filesToRun, @relevantSNs) = ();
|
||||
my $filePattern = '*.md' ;
|
||||
find (sub {push @filesToRun, $File::Find::name if (m/^(.*)$filePattern$/)}, $topTwDir) ;
|
||||
@filesToRun = sort @filesToRun;
|
||||
#say LOG "\@filesToRun: @filesToRun";
|
||||
foreach my $file (@filesToRun) {
|
||||
print ".";
|
||||
$file =~ s/\//\\/g unless $^O eq "linux" || $^O eq "darwin";
|
||||
my ($thisList, $shortFile) = ("", $file);
|
||||
$shortFile =~ s/^\Q$topTwDir${d}\E//;
|
||||
$shortFile =~ s/\.md$//;
|
||||
$shortFile =~ s/\Q$d\E/,/;
|
||||
#say "|$shortFile|"; die;
|
||||
#if ($shortFile =~ /^(kt|names)/) {
|
||||
#my $fileText = read_file("$file", binmode => 'utf8');
|
||||
my $fileText = read_file("$file", binmode => 'utf8');
|
||||
$fileText =~ s/$/\n/;
|
||||
#say LOG "=====\n$file:\n$fileText";
|
||||
if ($fileText =~ /## Forms Found in the English ULB:\n\n([^\n]*)\n/s) {
|
||||
$thisList = $1;
|
||||
#say LOG "$file: $thisList";
|
||||
$thisList =~ s/[\r\n]+$//;
|
||||
my @ULBEntries = split /, /, $thisList;
|
||||
my @tempArray = split /, /, $thisList;
|
||||
foreach my $ULB_entry(@ULBEntries) {
|
||||
if ($pagesThisEntry{$ULB_entry} =~ /^.+$/) {
|
||||
say LOG "*!!*"
|
||||
}
|
||||
|
||||
$pagesThisEntry{$ULB_entry} .= "$shortFile, ";
|
||||
$sourcePage{$ULB_entry} = $shortFile;
|
||||
say LOG "\$pagesThisEntry{$ULB_entry}: $pagesThisEntry{$ULB_entry}, \$sourcePage{$ULB_entry}: $sourcePage{$ULB_entry}, \$shortFile: $shortFile"
|
||||
}
|
||||
}
|
||||
if ($fileText =~ /Strong's([^\n]*)\n/) {
|
||||
my $SNs = $1;
|
||||
while ($SNs =~ s/([GH]\d*)//) {
|
||||
my $thisSN = $1;
|
||||
print LOG "! $shortFile ! $thisSN !";
|
||||
push @relevantSNs, $thisSN;
|
||||
$entriesThisSN{$thisSN} .= "$thisList, ";
|
||||
$pagesThisSN{$thisSN} .= "$shortFile, ";
|
||||
say LOG " \$pagesThisSN{$thisSN}: $pagesThisSN{$thisSN}"
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
@relevantSNs = uniq(@relevantSNs);
|
||||
foreach (@relevantSNs) {
|
||||
$relevantSNs{$_} = "$_";
|
||||
}
|
||||
say "";
|
||||
#say LOG "====";
|
||||
#say LOG "====";
|
||||
foreach my $thisSN (sort keys %entriesThisSN) {
|
||||
$entriesThisSN{$thisSN} =~ s/, $//;
|
||||
#say LOG "\$entriesThisSN{$thisSN}: $entriesThisSN{$thisSN}, \$pagesThisSN{$thisSN}: $pagesThisSN{$thisSN}"
|
||||
}
|
||||
}
|
||||
|
||||
sub LinkULBtoCV {
|
||||
say "Linking ULB to chapter and verse";
|
||||
say LOG "LinkULBtoCV, \$ULBfile: $ULBfile";
|
||||
open IN, "$ULBfile" or die "$ULBfile: $!";
|
||||
while (<IN>) {
|
||||
if (/^([^\t]*)\t(.*)$/) {
|
||||
# "\$_:$_";
|
||||
$cv ++;
|
||||
($text{$1}, $fullText{$1}) = ($2, $2);
|
||||
$ref{$cv} = $1;
|
||||
$order{$1} = $cv;
|
||||
}
|
||||
#say LOG "First \$ref{$cv}: $ref{$cv}\nGetULBBooksToProcess\n\$text{$ref{$cv}}: $text{$ref{$cv}}";
|
||||
}
|
||||
close IN;
|
||||
foreach my $key (sort keys %ref) {
|
||||
$text{$ref{$key}} =~ s/[^\w]+$//;
|
||||
$text{$ref{$key}} .= " q";
|
||||
# say LOG "\$key: $key:, \$ref{$key}: $ref{$key}, \$text{$ref{$key}}: $text{$ref{$key}}"
|
||||
}
|
||||
}
|
||||
|
||||
sub ReadLinkedSNs {
|
||||
say "Reading linked Strong's numbers";
|
||||
my ($flag, $sourceFile) = ("","");
|
||||
foreach $sourceFile (@fileList) {
|
||||
my $hg = "G";
|
||||
if ($sourceFile =~ /(..)-...\.xml$/) {
|
||||
$hg = "H" if ($1 < 40);
|
||||
}
|
||||
say LOG "opening \$sourceFile: $sourceFile";
|
||||
open IN, "$sourceFile" or die "$sourceFile can't be opened\n\n";
|
||||
my ($thisBook, $thisChap, $thisVers, $thisRef);
|
||||
my (@pages);
|
||||
while (<IN>) {
|
||||
chomp;
|
||||
if (/<verse osisID="([^\.]*).(\d+).(\d+)">/) {
|
||||
#say LOG "$thisRef: \$SNsInCV{$thisRef}: $SNsInCV{$thisRef}";# Making sure previous verse is populated
|
||||
my ($bk, $ch, $vs) = ($1, $2, $3);
|
||||
#say LOG "#>\t$bk $ch:$vs, \$bkFull{$bk}: $bkFull{$bk}";
|
||||
if (exists $newRef{"$bk $ch:$vs"}) {
|
||||
$thisRef = $newRef{"$bk $ch:$vs"}
|
||||
} else {
|
||||
($thisRef) = ("$bkFull{$bk} $ch:$vs");
|
||||
}
|
||||
#say LOG "##\t$bk $ch:$vs, $thisRef";
|
||||
}
|
||||
else {
|
||||
s/(lemma=").*?(\d+).*?("\n)/$1$2$3/;
|
||||
while (/<w lemma="(\d+)"/g) {
|
||||
#say LOG $_;
|
||||
my ($thisSN) = ($hg . $1);
|
||||
#say LOG "\t\$thisSN: $thisSN, \$SNsInCV{$thisRef}: $SNsInCV{$thisRef}";
|
||||
if (exists $relevantSNs{$thisSN}) {
|
||||
$SNsInCV{$thisRef} .= "$thisSN√" unless ($SNsInCV{$thisRef} =~ /\b$thisSN\b/);
|
||||
}
|
||||
#say LOG ">\t\$thisSN: $thisSN, \$SNsInCV{$thisRef}: $SNsInCV{$thisRef}";
|
||||
}
|
||||
}
|
||||
}
|
||||
close IN;
|
||||
}
|
||||
my %temp;
|
||||
foreach my $oldRef (sort keys %SNsInCV) {
|
||||
if (exists $newRef{$oldRef}) {
|
||||
$temp{$newRef{$oldRef}} = $SNsInCV{$oldRef};
|
||||
delete $SNsInCV{$oldRef};
|
||||
}
|
||||
}
|
||||
foreach my $changedRef (sort keys %temp) {
|
||||
$SNsInCV{$changedRef} = $temp{$changedRef};
|
||||
say LOG "\$SNsInCV{$changedRef}: $SNsInCV{$changedRef}";
|
||||
}
|
||||
}
|
||||
|
||||
sub LinkSNsToULBtextViaEntries {
|
||||
say "Linking Strong's numbers to ULB text via tW page entries";
|
||||
say LOG "sub LinkSNsToULBtextViaEntries called";
|
||||
foreach my $thisRef (sort keys %ref) {
|
||||
my $tempRef = $ref{$thisRef};
|
||||
say LOG "\nLinkSNsToULBtextViaEntries: \$thisRef: $thisRef\t\$ref{$thisRef}: $ref{$thisRef}, \$tempRef: $tempRef, \$specifiedText{$ref{$thisRef}}: $specifiedText{$ref{$thisRef}}";
|
||||
if (exists $deleteNum{$tempRef}) {
|
||||
DeleteUnneededSNs($tempRef);
|
||||
}
|
||||
if (exists $specifiedEntries{$tempRef}) {
|
||||
($searchSequence{$tempRef}, $finalOutput{$tempRef}) = AssigntWPages($tempRef, $SNsInCV{$tempRef});
|
||||
}
|
||||
($finalOutput{$tempRef}) = PopulateVerse();
|
||||
}
|
||||
}
|
||||
|
||||
sub DeleteUnneededSNs {
|
||||
my ($ref) = (@_);
|
||||
say LOG "\$ref: $ref, \$SNsInCV{$ref}: $SNsInCV{$ref}, \$deleteNum{$ref}: $deleteNum{$ref}";
|
||||
my @delArray = split /√/, $deleteNum{$ref};
|
||||
foreach my $del (@delArray) {
|
||||
$SNsInCV{$ref} =~ s/$del√//;
|
||||
}
|
||||
say LOG "\$ref: $ref, \$SNsInCV{$ref}: $SNsInCV{$ref}, \$deleteNum{$ref}: $deleteNum{$ref}";
|
||||
}
|
||||
sub AssigntWPages {
|
||||
my ($ref, $SNs) = (@_);
|
||||
my ($tempText, $sequence, $results) = ($text{$ref}, "", "");
|
||||
say LOG "\$ref: $ref, \$tempText: $tempText\n\$SNsInCV{$ref}: $SNsInCV{$ref}, \$specifiedEntries{$ref}: $specifiedEntries{$ref}";
|
||||
my @searchArray = split /√/, $SNs;
|
||||
foreach my $thisSN (@searchArray) {
|
||||
my $found;
|
||||
#say LOG "\$thisSN: $thisSN\t$entriesThisSN{$thisSN}";
|
||||
my @entries = split /, /, $entriesThisSN{$thisSN};
|
||||
#say LOG "\t\@entries: @entries";
|
||||
@entries = SortSearchEntriesArray(@entries);
|
||||
#say LOG "\t\t@entries";
|
||||
foreach my $thisEntry (@entries) {
|
||||
while ($thisEntry =~ s/^(.*) \.\.\. (.*)/($1)\\b(.*?)\\b($2)/) {}
|
||||
if ($thisEntry =~ /\(\.\*\?\)/ && $tempText =~ s/\b($thisEntry)\b/$3/i) {
|
||||
say LOG "\n===\n$thisSN |$thisEntry| is found in first test.\n===";
|
||||
$outString .= "[$thisEntry]($pagesThisEntry{$thisEntry})\n";
|
||||
say LOG $outString . "\n===" . $tempText;
|
||||
$found = 1;
|
||||
goto Breakout;
|
||||
} elsif ($tempText =~ s/\b($thisEntry)[^\w']//i || $tempText =~ s/\b($thisEntry)["']//i || $tempText =~ s/["']($thisEntry)\b//i) {
|
||||
say LOG "\n===\n$thisSN |$thisEntry| is found in second test.\n===";
|
||||
$outString .= "[$thisEntry]($pagesThisEntry{$thisEntry})\n";
|
||||
say LOG $outString . "\n" . $tempText;
|
||||
$found = 1;
|
||||
goto Breakout;
|
||||
} else {
|
||||
say LOG "\$thisEntry $thisEntry is not found in\n|$ref|";
|
||||
}
|
||||
}
|
||||
$sequence = $specifiedEntries{$ref};
|
||||
$sequence =~ s/[GH]\d+≈([^≈]*)≈([^≈√]*)/$1\t$2/g;
|
||||
Breakout:
|
||||
unless ($found) {
|
||||
say MISSING "$ref $thisSN";
|
||||
say LOG "Breakout: \$ref: $ref\t\$thisSN: $thisSN"
|
||||
}
|
||||
next if $found;
|
||||
}
|
||||
return ($sequence, $results);
|
||||
}
|
||||
|
||||
sub Adjust {
|
||||
my ($snsOld, $ref, $snsNew, $addToSnsNew) = ($_[0], $_[1], "", "");
|
||||
say LOG "\$specifiedText{$ref}: $specifiedText{$ref}\n\$snsOld: |$snsOld|";
|
||||
my (%tempEntries);
|
||||
#say LOG ">\t\$sns: |$sns|";
|
||||
#say LOG ">\t\$specifiedText{$ref}: |$specifiedText{$ref}|";
|
||||
$snsOld =~ s/^ +/ /;
|
||||
$snsNew =~ s/√$//;
|
||||
my @oldArray = split / /, $snsOld;
|
||||
$adjust{$ref} =~ s/√$//;
|
||||
say LOG "*0*\t\$adjust{$ref}: $adjust{$ref}";
|
||||
my @preadjustments = split /√/, $adjust{$ref};
|
||||
foreach my $adjustment (@preadjustments) {
|
||||
say LOG "<><>\t\$adjustment: >$adjustment<";
|
||||
if ($adjustment =~ /([^\t]*)\t\|\|$/) { # delete this from list to look for
|
||||
my $found = $1;
|
||||
$snsOld =~ s/\b$found\b ?//;
|
||||
say LOG "*1*\t\$found: $found should be deleted from \$snsOld: $snsOld";
|
||||
} elsif ($adjustment =~ /^(\d+)\t(\d+)$/) { # change to specified number
|
||||
my ($found1, $found2) = ($1, $2);
|
||||
#$addToSnsNew .= "$1\[$2\] ";
|
||||
$snsOld =~ s/$found1√//g;
|
||||
$snsNew .= "$found2√" unless $snsNew =~ /\b$found2\b /;
|
||||
say LOG "*2*\t\$snsOld: $snsOld\n\$snsNew: $snsNew";
|
||||
} elsif ($adjustment =~ /^\|\|\t(.*)/) { # add this to list to look for
|
||||
my $adj = $1;
|
||||
say LOG "*3*\t\$adjustment: $adjustment, \$adj: $adj";
|
||||
if ($adj =~ /([^\t]*)\t([^\t]*)/) {
|
||||
$snsNew = "[$1]($pagesThisEntry{$2})"
|
||||
} else {
|
||||
$snsNew .= "$adj "
|
||||
}
|
||||
} elsif ($adjustment =~ /^(\d+)\t([\w\. \-\']+)$/) { # add specified word
|
||||
my ($found1, $found2) = ($1, $2);
|
||||
#$addToSnsNew .= "$1\[$2\] ";
|
||||
if ($snsOld =~ s/\b$found1\b/$found1\[$found2\]/) {
|
||||
say LOG "*\t4a\t\$snsOld: $snsOld";
|
||||
if ($snsOld =~ s/^(.+√)($found1\[$found2\]√)/$2$1/) {}
|
||||
elsif ($snsOld =~ s/^(.+√)($found1\[$found2\]$)/$2√$1/) {}
|
||||
}
|
||||
else {
|
||||
$snsNew .= "${found1}√";
|
||||
$snsOld .= s/\b$found1\b//;
|
||||
say LOG "*\t4b\t\$snsNew: $snsNew";
|
||||
$snsNew =~ s/\b$found1\b/$found1\[$found2\]/;
|
||||
$snsNew =~ s/^(.+√)($found1\[$found2\]√)/$2$1/;
|
||||
$snsNew =~ s/^(.+√)($found1\[$found2\]$)/$2√$1/;
|
||||
}
|
||||
$snsOld =~ s/ {2,}/ /;
|
||||
$snsOld =~ s/√$//;
|
||||
say LOG "*4*\t\$snsOld: $snsOld\n\$snsNew: $snsNew";
|
||||
} elsif (($adjustment =~ /^(\d+)\t([\/\d\w]+)$/)) { # add specified page
|
||||
#$addToSnsNew .= "$1\{$2\} "
|
||||
my ($found1, $found2) = ($1, $2);
|
||||
$snsOld =~ s/$found1/$found1\($found2\)/;
|
||||
say LOG "*5*\t\t\$snsOld: $snsOld";
|
||||
}
|
||||
}
|
||||
$snsOld =~ s/^√//;
|
||||
$snsOld =~ s/√+/√/g;
|
||||
say LOG "\$snsNew: >$snsNew<\n\$snsNew+\$snsOld: >$snsNew< >$snsOld<";
|
||||
$snsNew = "$snsNew√$snsOld";
|
||||
say LOG "*5*\t*\t\$snsNew: |$snsNew|";
|
||||
while ($snsNew =~ s/(\d+) (\d+\[[^\[\]]*\])/$2$1/) {}
|
||||
say LOG "*5*\t**\t\$snsNew: |$snsNew|";
|
||||
$snsNew =~ s/√+/√/g;
|
||||
$snsNew =~ s/^[ √]//;
|
||||
say LOG "*6*\t\t\$snsNew: |$snsNew|";
|
||||
return $snsNew;
|
||||
}
|
||||
|
||||
sub Output {
|
||||
say "Outputting";
|
||||
#say LOG "Output subRoutine called";
|
||||
foreach my $key (sort keys %ref) {
|
||||
my %donePages;
|
||||
my $thisRef = $ref{$key};
|
||||
#print LOG "\$key: $key\t\$thisRef: $thisRef\t";
|
||||
#say LOG "\$SNsInCV{$thisRef}: |$SNsInCV{$thisRef}|";
|
||||
$SNsInCV{$thisRef} =~ s/^ +//;
|
||||
$SNsInCV{$thisRef} =~ s/ +$//;
|
||||
$SNsInCV{$thisRef} =~ s/ {2,}/ /;
|
||||
#say LOG "\$SNsInCV{$thisRef}: |$SNsInCV{$thisRef}|";
|
||||
#say LOG "\$listOfPages{$thisRef}: |$listOfPages{$thisRef}|";
|
||||
#$listOfPages{$thisRef} =~ s/^ +//;
|
||||
#$listOfPages{$thisRef} =~ s/ +$//;
|
||||
#$listOfPages{$thisRef} =~ s/ {2,}/ /;
|
||||
#say LOG "\$listOfPages{$thisRef}: |$listOfPages{$thisRef}|";
|
||||
my @array = split /\n/, $listOfPages{$thisRef};
|
||||
#say LOG "\@array: |@array|";
|
||||
my @sorted =
|
||||
sort sort { lc($a) cmp lc($b) }
|
||||
@array;
|
||||
#say LOG "\@sorted: |@sorted|";
|
||||
$" = "\n";
|
||||
$listOfPages{$thisRef} = "@sorted";
|
||||
say LOG "\$listOfPages{$thisRef}: $listOfPages{$thisRef}\n\$checkPages{$thisRef}: $checkPages{$thisRef}";
|
||||
#say OUT "$thisRef: $listOfPages{$thisRef}\n";
|
||||
$checkPages{$thisRef} =~ s/^ +//;
|
||||
$checkPages{$thisRef} =~ s/ +$//;
|
||||
$checkPages{$thisRef} =~ s/ {2,}/ /;
|
||||
$checkPages{$thisRef} =~ s/ \|\|//;
|
||||
say LOG "\$checkPages{$thisRef}:\t|$checkPages{$thisRef}|";
|
||||
my @checkArray = split / /, $checkPages{$thisRef};
|
||||
shift @sorted;
|
||||
#say LOG "\@checkArray: |@checkArray|";
|
||||
#say LOG "\@sorted: |@sorted|";
|
||||
#shift @sorted;
|
||||
#say LOG "\@sorted: |@sorted|";
|
||||
foreach my $slice (@sorted) {
|
||||
#print LOG "\$slice: $slice\t";
|
||||
$slice =~ s/\[.*?\]\((.*?)\)/$1/;
|
||||
#say LOG "\t\$slice: $slice";
|
||||
$donePages{$slice} = $slice;
|
||||
#say LOG "\t\$donePages{$slice}: $donePages{$slice}"
|
||||
}
|
||||
#say LOG "\@checkArray: |@checkArray|";
|
||||
foreach my $slice (@checkArray) {
|
||||
#say LOG "\$slice: $slice";
|
||||
unless (exists $donePages{$slice}) {
|
||||
#say LOG "\$thisRef: $thisRef\t\$slice:$slice";
|
||||
#say MISSING "$thisRef\t$slice\t||";
|
||||
say MISSING "$thisRef\t$slice";
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
sub Substitute {
|
||||
foreach my $key (sort keys %pages) {
|
||||
say LOG "\$key: $key\t\$SNsInCV{$key}: $SNsInCV{$key}";
|
||||
if (exists $substitutedPages{$key}) {
|
||||
#say LOG "\$substitutedPages{$key}: $substitutedPages{$key}";
|
||||
$substitutedPages{$key} =~ s/, $//;
|
||||
my @array = split /, /, $substitutedPages{$key};
|
||||
foreach my $slice (@array) {
|
||||
#say LOG "\$slice: $slice";
|
||||
if ($slice =~ /([^\t]*)\t([^\t]*)/) {
|
||||
#say LOG "\n\$key: $key";
|
||||
my ($old, $new) = ($1, $2);
|
||||
#say LOG "\$old: >$old<\t\$new: >$new<";
|
||||
#say LOG "\$SNsInCV{$key}: >>$SNsInCV{$key}<<";
|
||||
if ($old eq "||") {
|
||||
$SNsInCV{$key} .= "$new ";
|
||||
$checkPages{$key} = $SNsInCV{$key};
|
||||
}
|
||||
elsif ($new eq "||") {
|
||||
$SNsInCV{$key} =~ s/$old //;
|
||||
$checkPages{$key} = $SNsInCV{$key};
|
||||
}
|
||||
else {
|
||||
$SNsInCV{$key} =~ s/$old/$new/;
|
||||
$checkPages{$key} = $SNsInCV{$key};
|
||||
}
|
||||
$SNsInCV{$key} =~ s/ \|\|//g;
|
||||
#say LOG "\$SNsInCV{$key}: >>>$SNsInCV{$key}<<<";
|
||||
}
|
||||
}
|
||||
} else {
|
||||
$checkPages{$key} = $SNsInCV{$key};
|
||||
}
|
||||
say LOG "<>\t\$key: $key\t\$SNsInCV{$key}: $SNsInCV{$key}";
|
||||
}
|
||||
}
|
||||
|
||||
sub ProperOrderOutString {
|
||||
my @unordered = split /\n/, $_[0];
|
||||
my ($thisCV, $outS) = ($_[1], "");
|
||||
my (%orderedSet);
|
||||
foreach my $thisSet (@unordered) {
|
||||
say LOG "\t>\t$thisSet";
|
||||
if ($thisSet =~ /(\[([^\]]*)\])(\([^\)]*\))/) {
|
||||
my ($ulb, $fileLoc) = ($2, $3);
|
||||
$ulb =~ s/ \.\.\. /.*?/g;
|
||||
say LOG "\t>>\t\$ulb: $ulb, \$fileLoc: $fileLoc, \$fullText{$thisCV}\n$fullText{$thisCV}";
|
||||
if ($ulb =~ /^(.*)\.\.\.(.*)$/) {
|
||||
my ($found1, $found2, $blank1, $blank2) = ($1, $2, "", "");
|
||||
while (length $blank1 < length $found1) {$blank1 .= " "}
|
||||
while (length $blank2 < length $found2) {$blank2 .= " "}
|
||||
if ($fullText{$thisCV} =~ s/^(.*?)\b$found1\b(.*)\b$found1\b(.*)$/$1$blank1$2$blank2$3/i) {
|
||||
say LOG "\t>>>\t$fullText{$thisCV}";
|
||||
my ($order) = (length $1);
|
||||
$orderedSet{$order} = $thisSet;
|
||||
say LOG "\t\t\t\$order: $order\t \$orderedSet{$order}: $orderedSet{$order}"
|
||||
}
|
||||
} else {
|
||||
my $blank = "";
|
||||
while (length $blank < length $ulb) {$blank .= " "}
|
||||
if ($fullText{$thisCV} =~ s/^(.*?)\b$ulb\b(.*)$/$1$blank$2/i) {
|
||||
say LOG "\t>>>>\t$fullText{$thisCV}";
|
||||
my ($order) = (length $1);
|
||||
$orderedSet{$order} = $thisSet;
|
||||
say LOG "\$order: $order\t \$orderedSet{$order}: $orderedSet{$order}"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
foreach my $key (sort {$a<=>$b} keys %orderedSet) {
|
||||
$outS .= "$orderedSet{$key}\n"
|
||||
}
|
||||
return $outS;
|
||||
}
|
||||
|
||||
sub SortSearchEntriesArray {
|
||||
my @entries = @_;
|
||||
#say LOG "@entries";
|
||||
@entries = reverse sort { substr($a,0,1) <=> substr($b,0,1)
|
||||
|| length($a) <=> length($b)
|
||||
|| $a <=> $b } @entries;
|
||||
return @entries;
|
||||
}
|
||||
sub PopulateVerse {
|
||||
|
||||
}
|
||||
__DATA__
|
||||
OT
|
||||
gen Genesis
|
||||
exo Exodus
|
||||
lev Leviticus
|
||||
num Numbers
|
||||
deu Deuteronomy
|
||||
jos Joshua
|
||||
jdg Judges
|
||||
rut Ruth
|
||||
1sa 1 Samuel
|
||||
2sa 2 Samuel
|
||||
1ki 1 Kings
|
||||
2ki 2 Kings
|
||||
1ch 1 Chronicles
|
||||
2ch 2 Chronicles
|
||||
ezr Ezra
|
||||
neh Nehemiah
|
||||
est Esther
|
||||
job Job
|
||||
psa Psalms
|
||||
pro Proverbs
|
||||
ecc Ecclesiastes
|
||||
sng Song of Solomon
|
||||
isa Isaiah
|
||||
jer Jeremiah
|
||||
lam Lamentations
|
||||
ezk Ezekiel
|
||||
dan Daniel
|
||||
hos Hosea
|
||||
jol Joel
|
||||
amo Amos
|
||||
oba Obadiah
|
||||
jon Jonah
|
||||
mic Micah
|
||||
nam Nahum
|
||||
hab Habakkuk
|
||||
zep Zephaniah
|
||||
hag Haggai
|
||||
zec Zechariah
|
||||
mal Malachi
|
||||
NT
|
||||
mat Matthew
|
||||
mrk Mark
|
||||
luk Luke
|
||||
jhn John
|
||||
act Acts
|
||||
rom Romans
|
||||
1co 1 Corinthians
|
||||
2co 2 Corinthians
|
||||
gal Galatians
|
||||
eph Ephesians
|
||||
php Philippians
|
||||
col Colossians
|
||||
1th 1 Thessalonians
|
||||
2th 2 Thessalonians
|
||||
1ti 1 Timothy
|
||||
2ti 2 Timothy
|
||||
tit Titus
|
||||
phm Philemon
|
||||
heb Hebrews
|
||||
jas James
|
||||
1pe 1 Peter
|
||||
2pe 2 Peter
|
||||
1jn 1 John
|
||||
2jn 2 John
|
||||
3jn 3 John
|
||||
jud Jude
|
||||
rev Revelation
|
Loading…
Reference in New Issue