Redoing script to fit new requirements

This commit is contained in:
Henry Whitney 2020-06-08 16:19:42 -04:00
parent 3a420a2716
commit 7d1b2e6274
9 changed files with 1830 additions and 25 deletions

View File

@ -0,0 +1,130 @@
# Builds easily searchable files from current OGNT and MAST-HB XML files
use 5.12.0;
use File::Find ;
use Cwd ;
use utf8;
#use open IN => ":utf8", OUT => ":utf8";
use open IO => ":utf8";
open LOG, ">:utf8", "Logs/log.txt" or die;
open OUT, ">:utf8", "Output/Original_languages.txt" or die;
my (@folders) = ("/Users/Henry/Documents/WACS/MAST_HB", "/Users/Henry/Documents/WACS/OGNT");
my (%order, %long);
my $outText;
while (<DATA>) {
chomp;
if (/^([^\t]*)\t([^\t]*)\t(.*)$/) {
$order{$1} = $3;
$long{$2} = $3;
}
}
foreach my $folder (@folders) {
say "$folder";
my $topDir = $folder;
my @filesToRun = ();
my $filePattern = '*.xml' ;
find( sub { push @filesToRun, $File::Find::name if ( m/^(.*)$filePattern$/ ) }, $topDir) ;
@filesToRun = sort @filesToRun;
foreach my $file ( @filesToRun ) {
say $file;
open (IN, $file) or die "$!";
while (<IN>) {
my ($bk, $ch, $vs, $lemma, $word);
chomp;
if (/<verse osisID="(.*)\.(\d+)\.(\d+)">/) {
($bk, $ch, $vs) = ($long{$1}, $2, $3);
$outText .= "\n$bk $ch:$vs\t"
} elsif (/<w lemma="([^"]*)" morph=".*" lexeme=".*">(.*)<\/w>/) {
($lemma, $word) = ($1, $2);
$lemma =~ s/^[^\d]*(\d{1,4})[^\d]*$/G$1/;
$outText .= "$word <$lemma> "
} elsif (/<w lemma="([^"]*)" (n="[^"]*" )?morph="[^"]*" id="[^"]*">([^<]*)<\/w>/) {
($lemma, $word) = ($1, $3);
$lemma =~ s/^[^\d]*(\d{1,4})[^\d]*$/H$1/;
$word =~ s/\///g;
$outText .= "$word <$lemma> "
}
}
}
}
say OUT $outText;
close OUT;
close LOG;
print "\n\tDone.";
__DATA__
01 gen Genesis
02 exo Exodus
03 lev Leviticus
04 num Numbers
05 deu Deuteronomy
06 jos Joshua
07 jdg Judges
08 rut Ruth
09 1sa 1 Samuel
10 2sa 2 Samuel
11 1ki 1 Kings
12 2ki 2 Kings
13 1ch 1 Chronicles
14 2ch 2 Chronicles
15 ezr Ezra
16 neh Nehemiah
17 est Esther
18 job Job
19 psa Psalms
20 pro Proverbs
21 ecc Ecclesiastes
22 sng Song of Solomon
23 isa Isaiah
24 jer Jeremiah
25 lam Lamentations
26 ezk Ezekiel
27 dan Daniel
28 hos Hosea
29 jol Joel
30 amo Amos
31 oba Obadiah
32 jon Jonah
33 mic Micah
34 nam Nahum
35 hab Habakkuk
36 zep Zephaniah
37 hag Haggai
38 zec Zechariah
39 mal Malachi
41 mat Matthew
42 mrk Mark
43 luk Luke
44 jhn John
45 act Acts
46 rom Romans
47 1co 1 Corinthians
48 2co 2 Corinthians
49 gal Galatians
50 eph Ephesians
51 php Philippians
52 col Colossians
53 1th 1 Thessalonians
54 2th 2 Thessalonians
55 1ti 1 Timothy
56 2ti 2 Timothy
57 tit Titus
58 phm Philemon
59 heb Hebrews
60 jas James
61 1pe 1 Peter
62 2pe 2 Peter
63 1jn 1 John
64 2jn 2 John
65 3jn 3 John
66 jud Jude
67 rev Revelation

View File

@ -0,0 +1,54 @@
use 5.12.0;
use File::Slurp;
use File::Find ;
use Cwd ;
use FindBin '$Bin';
use utf8;
#use open IN => ":utf8", OUT => ":utf8";
use open IO => ":utf8";
my ($pwd, $d) = (cwd(), "\\");
if ($^O eq "darwin" || $^O eq "linux") {$d = "/"}
my $book;
my %books;
open LOG, ">$Bin${d}Logs${d}log.log" or die "$Bin${d}Logs${d}log.log: $!";
open (my $file, "<:utf8", "$Bin${d}User${d}tW_work.txt") or die "$Bin${d}User${d}tW_work.txt:\n$!";
while (my $line = <$file>) {
chomp $line;
if ($line =~ /^[^#][^\t]*\t([^\t]*)\t([^\t]*)\t([^\t]*)$/) {
$book = "$1";
$books{$book} = $book;
}
}
close $file;
say "Removing old Extract.txt";
unlink "$Bin${d}Temp${d}Extract.txt";
say "Building new Extract.txt";
open(OUT, ">:utf8", "$Bin${d}Temp${d}Extract.txt") or die "$!: $Bin${d}Temp${d}Extract.txt";
open ($file, "<:utf8", "$Bin${d}Temp${d}ULB_text.txt") or die "$Bin${d}Temp${d}ULB_text.txt:\n$!";
while (my $thisLine = <$file>) {
chomp $thisLine;
if ($thisLine =~ /^([^:]*) \d+:\d.*$/) {
if (exists $books{$1}) {
say OUT $thisLine;
}
}
}
close $file;
close OUT;
close LOG;
say "Done."

View File

@ -0,0 +1,74 @@
# Combines most recent ULB with MAST-HB and OGNT with codes.
use 5.12.0;
use Cwd;
use FindBin '$Bin';
my ($pwd, $d) = ($Bin, "\\");
if ($^O eq "darwin" || $^O eq "linux") {$d = "/"}
my ($ulb, $OLtext) = ("$Bin${d}Temp${d}ULB_text.txt", "$Bin${d}Data${d}OL_text.txt");
my ($ref, $val, $textEditor);
my (%codes);
open LOG, ">:utf8", "$Bin${d}Logs${d}log.log" or die;
open OUT, ">:utf8", "$Bin${d}Temp${d}ULB_OL_Strongs.txt" or die;
#say "\$pwd: $pwd${d}User${d}User_defaults.txt";
#say " /media/henry/92C6F7E3C6F7C58F/Users/henry/WA_Repo/Tips_and_Hacks/MAST_tW_PDF_Updater/FilesForUpdates/User/User_defaults.txt";
chdir $Bin;
my ($udf) = "User_defaults.windows.txt";
if ($^O eq "linux") {$udf = "User_defaults.linux.txt"}
#elsif ($^O eq "darwin") {$udf = "User_defaults.mac.txt"}
elsif ($^O eq "darwin") {$udf = "User_defaults.mac.txt"}
open (my $defaults, "<:utf8", "$Bin${d}User${d}$udf") or die "$Bin${d}User${d}$udf:\n$!";
while (my $thisLine = <$defaults>) {
chomp $thisLine;
if ($thisLine =~ /^Text editor: (.*)$/) {
#say LOG "$thisLine";
$textEditor = $1;
}
}
#say "\$textEditor: $textEditor";
die "No text editor found" if $textEditor eq "";
close $defaults;
open (my $file, "<:utf8", "$OLtext") or die "$OLtext:\n$!";
while (my $line = <$file>) {
chomp $line;
if ($line =~ /^([^\t]*)\t(.*)$/) {
($ref, $val) = ($1, $2);
$ref =~ s/^Song of Solomon/Song of Songs/;
$codes{$ref} = $val;
}
}
open ($file, "<:utf8", "$ulb") or die "$ulb:\n$!";
while (my $line = <$file>) {
chomp $line;
#say LOG $line;
if ($line =~ /^([^\t]*)\t(.*)$/) {
($ref, $val) = ($1, $2);
if (exists $codes{$ref}) {
say OUT "$ref\t$val\n $codes{$ref}"
}
}
}
close OUT;
# say "Calling \$textEditor $textEditor";
if ($^O eq "darwin") {
system ("open -a $textEditor $Bin${d}Temp${d}ULB_OL_Strongs.txt");
}
close LOG;
print "\nDone\n\nOutput is in $Bin${d}Temp${d}ULB_OL_Strongs.txt\n\n";

View File

@ -0,0 +1,127 @@
use 5.12.0;
use File::Find ;
use File::Slurp;
use Cwd ;
my $topDir = "/Users/Henry/Documents/WACS/OGNT";
my ($lemmaToFind, $word, $choices) = (5610, "hour", "((appointed )?times?|hours?|moment)");
open LOG, ">Logs/log.log" or die;
open OUT, ">Output/output.txt" or die;
my %value;
while (<DATA>) {
chomp;
if (/([^\t]*)\t([^\t]*)/) {
$value{$1} = $2
}
}
my $fileText = read_file("/Users/Henry/Documents/WACS/Tips_and_Hacks/MAST_tW_PDF_Updater/FilesForUpdates/Temp/ULB_NASB_Strongs.txt", binmode => 'utf8');
my @filesToRun = ();
my $filePattern = '*.xml' ;
find( sub { push @filesToRun, $File::Find::name if ( m/^(.*)$filePattern$/ ) }, $topDir) ;
foreach my $file ( @filesToRun )
{
say LOG $file;
open(IN, $file) or die $!;
my ($bk, $ch, $vs);
while (<IN>) {
if (/<verse osisID="(.*?).(\d+).(\d+)">/) {
($bk, $ch, $vs) = ($value{$1}, $2, $3)
} elsif (/<w lemma="$lemmaToFind" /) {
if ($fileText =~ /$bk $ch:$vs.*\b($choices)\b/) {
my ($found, $chosen) = ($&, $1);
say LOG "Found: $found";
say OUT "$bk $ch:$vs\t$lemmaToFind\t$chosen\t$word"
} elsif ($fileText =~ /($bk $ch:$vs[^\n]*)\n/) {
say LOG "Missed: $1";
say OUT "$bk $ch:$vs\t$lemmaToFind\t\t$word"
}
}
}
}
close OUT;
close LOG;
print "\n\tDone."
__DATA__
gen Genesis
exo Exodus
lev Leviticus
num Numbers
deu Deuteronomy
jos Joshua
jdg Judges
rut Ruth
1sa 1 Samuel
2sa 2 Samuel
1ki 1 Kings
2ki 2 Kings
1ch 1 Chronicles
2ch 2 Chronicles
ezr Ezra
neh Nehemiah
est Esther
job Job
psa Psalms
pro Proverbs
ecc Ecclesiastes
sng Song of Solomon
isa Isaiah
jer Jeremiah
lam Lamentations
ezk Ezekiel
dan Daniel
hos Hosea
jol Joel
amo Amos
oba Obadiah
jon Jonah
mic Micah
nam Nahum
hab Habakkuk
zep Zephaniah
hag Haggai
zec Zechariah
mal Malachi
mat Matthew
mrk Mark
luk Luke
jhn John
act Acts
rom Romans
1co 1 Corinthians
2co 2 Corinthians
gal Galatians
eph Ephesians
php Philippians
col Colossians
1th 1 Thessalonians
2th 2 Thessalonians
1ti 1 Timothy
2ti 2 Timothy
tit Titus
phm Philemon
heb Hebrews
jas James
1pe 1 Peter
2pe 2 Peter
1jn 1 John
2jn 2 John
3jn 3 John
jud Jude
rev Revelation

View File

@ -0,0 +1,10 @@
echo Updating ...
echo Making ULB
perl "MakeULB.4.pl"
echo Combining ULB and NASB
perl "CombineULBandNASBwithCodes.pl";
echo Building extract file
perl "Build_extract.pl";
echo Extracting tWs from Source language
# perl "tWs.from.UGNT.8.pl"
perl "tWs.from.MAST.pl"

View File

@ -0,0 +1,10 @@
echo Updating ...
echo Making ULB
perl "MakeULB.4.pl"
echo Combining ULB and NASB
perl "CombineULBandOLwithCodes.pl";
echo Building extract file
perl "Build_extract.pl";
echo Extracting tWs from Source language
# perl "tWs.from.UGNT.8.pl"
perl "tWs.from.MAST.pl"

View File

@ -0,0 +1,700 @@
# Produces list of tWs for each verse by linking MAST_NT to ULB through tWs.
# Disambiguates entries found on more than one tW page.
# Does both Testaments.
# Taken from tWs.from.MAST_NT.2.pl.
# The output from this script is useful for the interleaved PDFs used in MAST.
# This version uses an exception file to handle places where the MAST_NT points to
# a tW page different from that on which the ULB term appears.
# Make sure the correct input file is $ULBfile. Run script.
# Output is in $output file.
# Check the $missing
# file for needed corrections, probably lines needing to be added to the
# $exceptions file.
use 5.12.0;
use File::Slurp;
use File::Find ;
use Cwd ;
use FindBin '$Bin';
use utf8;
#use open IN => ":utf8", OUT => ":utf8";
use open IO => ":utf8";
use List::MoreUtils qw(uniq);
$|=1;
$"="\n";
my ($pwd, $d) = ($Bin, "/");
if ($^O eq "MSWin32") {
$d = "\\";
$pwd =~ s/\//\\/g;
}
my ($udf) = "User_defaults.windows.txt";
if ($^O eq "linux") {$udf = "User_defaults.linux.txt"}
#elsif ($^O eq "darwin") {$udf = "User_defaults.mac.txt"}
elsif ($^O eq "darwin") {$udf = "User_defaults.mac.txt"}
open (my $defaults, "<:utf8", "$pwd${d}User${d}$udf") or die "$pwd${d}User${d}$udf:\n$!";
my ($cv, $ULBfile, $exceptions, $missing, $output, $workFile) =
("00000",
"$Bin${d}Temp${d}Extract.txt",
"$Bin${d}Exceptions${d}Exceptions.txt",
"$Bin${d}Output${d}Entries_not_handled.txt",
"$Bin${d}Output${d}tWs_for_PDF.txt",
"$Bin${d}User${d}tW_work.txt",
);
my ($sourceFile, $topTwDir, $textEditor, $repoPath, $topSourceLangDir, $topOTSourceLangDir, $topNTSourceLangDir, $twPath, $outString);
my (%entries, %ULBtext, %ref, %order, %pages, %listOfPages, %sourcePage, %checkPages, %foundPages, %substitutedPages,
%specifiedText, %realPage, %checkPage, %workEntries, %vsn, %StrongNum, %fullText, %ulbOrder, %bkAbr, %bkFull, %relevantSNs,
%SNsInCV, %entriesThisSN, %entriesThisPage, %pagesThisSN, %pagesThisEntry, %adjust, %deleteNum, %newRef, %specifiedEntries, %finalOutput, %searchSequence);
my ($book, $testament);
my (@fileList);
# ==============================
chdir("$pwd");
open LOG, ">:utf8", "Logs${d}Exc_log.log" or die "\$log: Logs${d}Exc_log.log: $!";
open OUT, ">:utf8", $output or die "$!";
open MISSING, ">$missing" or die "$!";
while (<DATA>) {
chomp;
if (/([^\t]*)\t([^\t]*)/) {
$bkAbr{$2} = $1;
$bkFull{$1} = $2;
} elsif (/^..$/) {
$testament = $&;
}
}
GetUserDefaults();
GetULBBooksToProcess();
ReadExceptions();
close LOG;
open LOG, ">:utf8", "Logs${d}tW_pairs.log" or die "Logs${d}tW_pairs.log: $!";
PairtWEntriesTotWPageAndUniqSNs();
close LOG;
open LOG, ">:utf8", "Logs${d}tWs_from_MAST.log" or die "Logs${d}tWs_from_MAST.log: $!";
GetRelevantSNsForEachVerse();
LinkULBtoCV();
ProcessEachVerse();
#Output();
close MISSING;
close OUT;
close LOG;
if ($^O eq "darwin") {system ("$textEditor $missing")}
print "\n\tDone.\n\n";
# ==============================
sub GetUserDefaults {
open (my $defaults, "<:utf8", "User${d}$udf") or die "User${d}$udf:\n$!";
while (my $thisLine = <$defaults>) {
chomp $thisLine;
if ($thisLine =~ /^Text editor: (.*)$/) {
$textEditor = $1;
if ($^O eq "darwin") {
$textEditor = "open -a $textEditor"
}
} elsif ($thisLine =~ /^Repository directory: (.*)$/) {
$repoPath = $1;
#say $repoPath; die;
} elsif ($thisLine =~ /^translationWords path: (.*)$/) {
$twPath = $1;
}
}
#say LOG "\$textEditor: $textEditor\n\$repoPath: $repoPath";
die "No text editor found" if $textEditor eq "";
die "No path to repo found" if $repoPath eq "";
($topTwDir, $topOTSourceLangDir, $topNTSourceLangDir) = ("$repoPath${d}$twPath", "$repoPath${d}MAST_HB", "$repoPath${d}OGNT");
close $defaults;
}
sub GetULBBooksToProcess {
say LOG "GetULBBooksToProcess on \$workFile: $workFile";
open (my $file, "<:utf8", "$workFile") or die "$workFile:\n$!";
while (my $line = <$file>) {
chomp $line;
#say LOG "\t$line";
if ($line =~ /^([^#][^\n\t]*)\t[^\n\t]*\t([^\n\t]*)\t[^\n\t]*$/) {
my ($bn, $bx) = ($1, $2);
my ($this_bk) = $bn . "-" . uc $bx;
if ($bn > 39) {
$topSourceLangDir = $topNTSourceLangDir
} else {
$topSourceLangDir = $topOTSourceLangDir
}
$sourceFile = "$topSourceLangDir${d}$this_bk.xml";
push @fileList, $sourceFile;
}
}
close $file;
#say LOG "\@fileList:\n@fileList";
}
sub ReadExceptions {
say "Reading exceptions";
say LOG "ReadExceptions from \$exceptions: $exceptions";
open (my $file, "<:utf8", "$exceptions") or die "$exceptions:\n$!";
#say LOG "<><>";
while (my $line = <$file>) {
chomp $line;
#say LOG $line;
my $rf;
if ($line =~ /^([^#\n][^\t\n]*)\t([GH]\d+)\t\|\|$/) {
my ($oldNew) = ($2);
$rf = $1;
#say LOG "\$line: $line, \$rf: $rf, \$oldNew: $oldNew";
($deleteNum{$rf}) .= "$oldNew√";
$specifiedText{$rf} = 1;
#say LOG "\$specifiedText{$rf}: $specifiedText{$rf}";
} elsif ($line =~ /^([^#\n][^\t\n]*)\t(\d+\t\d+)/) {
my ($oldNew) = ($2);
$rf = $1;
#say LOG "\$line: $line, \$rf: $rf, \$oldNew: $oldNew";
($adjust{$rf}) .= "$oldNew√";
$specifiedText{$rf} = 1;
}
elsif ($line =~ /^([^#\n\t][^\t\n]*)\t(.\d+)\t([^\t\n]*)\t([^\t\n]*)$/) {
my ($rf, $sn, $snippet, $page) = ($1, $2, $3, $4);
#say LOG "\$line: $line, \$rf: $rf, \$oldNew: $oldNew";
$specifiedEntries{$rf} .= "$sn≈$snippet≈$page√";
$specifiedText{$rf} = 1;
}
}
close $file;
}
sub PairtWEntriesTotWPageAndUniqSNs {
say "Pairing tW entries with tW pages and unique Strong's numbers";
my (@filesToRun, @relevantSNs) = ();
my $filePattern = '*.md' ;
find (sub {push @filesToRun, $File::Find::name if (m/^(.*)$filePattern$/)}, $topTwDir) ;
@filesToRun = sort @filesToRun;
#say LOG "\@filesToRun: @filesToRun";
foreach my $file (@filesToRun) {
print ".";
$file =~ s/\//\\/g unless $^O eq "linux" || $^O eq "darwin";
my ($thisList, $shortFile) = ("", $file);
$shortFile =~ s/^\Q$topTwDir${d}\E//;
$shortFile =~ s/\.md$//;
$shortFile =~ s/\Q$d\E/,/;
#say "|$shortFile|"; die;
#if ($shortFile =~ /^(kt|names)/) {
#my $fileText = read_file("$file", binmode => 'utf8');
my $fileText = read_file("$file", binmode => 'utf8');
$fileText =~ s/$/\n/;
#say LOG "=====\n$file:\n$fileText";
if ($fileText =~ /## Forms Found in the English ULB:\n\n([^\n]*)\n/s) {
$thisList = $1;
#say LOG "$file: $thisList";
$thisList =~ s/[\r\n]+$//;
my @ULBEntries = split /, /, $thisList;
my @tempArray = split /, /, $thisList;
foreach my $ULB_entry(@ULBEntries) {
if ($pagesThisEntry{$ULB_entry} =~ /^.+$/) {
say LOG "*!!*"
}
$pagesThisEntry{$ULB_entry} .= "$shortFile, ";
$sourcePage{$ULB_entry} = $shortFile;
say LOG "\$pagesThisEntry{$ULB_entry}: $pagesThisEntry{$ULB_entry}, \$sourcePage{$ULB_entry}: $sourcePage{$ULB_entry}, \$shortFile: $shortFile"
}
}
if ($fileText =~ /Strong's([^\n]*)\n/) {
my $SNs = $1;
while ($SNs =~ s/([GH]\d*)//) {
my $thisSN = $1;
print LOG "! $shortFile ! $thisSN !";
push @relevantSNs, $thisSN;
$entriesThisSN{$thisSN} .= "$thisList, ";
$pagesThisSN{$thisSN} .= "$shortFile, ";
say LOG " \$pagesThisSN{$thisSN}: $pagesThisSN{$thisSN}"
}
}
}
@relevantSNs = uniq(@relevantSNs);
foreach (@relevantSNs) {
$relevantSNs{$_} = "$_";
}
say "";
#say LOG "====";
#say LOG "====";
foreach my $thisSN (sort keys %entriesThisSN) {
$entriesThisSN{$thisSN} =~ s/, $//;
#say LOG "\$entriesThisSN{$thisSN}: $entriesThisSN{$thisSN}, \$pagesThisSN{$thisSN}: $pagesThisSN{$thisSN}"
}
}
sub LinkULBtoCV {
say "Linking ULB to chapter and verse";
say LOG "LinkULBtoCV, \$ULBfile: $ULBfile";
open IN, "$ULBfile" or die "$ULBfile: $!";
while (<IN>) {
if (/^([^\t]*)\t(.*)$/) {
# "\$_:$_";
$cv ++;
($ULBtext{$1}, $fullText{$1}) = ($2, $2);
$ref{$cv} = $1;
$order{$1} = $cv;
}
#say LOG "First \$ref{$cv}: $ref{$cv}\nGetULBBooksToProcess\n\$ULBtext{$ref{$cv}}: $ULBtext{$ref{$cv}}";
}
close IN;
foreach my $key (sort keys %ref) {
$ULBtext{$ref{$key}} =~ s/[^\w]+$//;
$ULBtext{$ref{$key}} .= " q";
#say LOG "\$key: $key:, \$ref{$key}: $ref{$key}, \$ULBtext{$ref{$key}}: $ULBtext{$ref{$key}}"
}
}
sub GetRelevantSNsForEachVerse {
say "Getting relevant SNs for each verse";
my ($flag, $sourceFile) = ("","");
foreach $sourceFile (@fileList) {
my $hg = "G";
if ($sourceFile =~ /(..)-...\.xml$/) {
$hg = "H" if ($1 < 40);
}
#say LOG "opening \$sourceFile: $sourceFile";
open IN, "$sourceFile" or die "$sourceFile can't be opened\n\n";
my ($thisBook, $thisChap, $thisVers, $thisRef);
my (@pages);
while (<IN>) {
chomp;
if (/<verse osisID="([^\.]*).(\d+).(\d+)">/) {
#say LOG "$thisRef: \$SNsInCV{$thisRef}: $SNsInCV{$thisRef}";# Making sure previous verse is populated
my ($bk, $ch, $vs) = ($1, $2, $3);
#say LOG "#>\t$bk $ch:$vs, \$bkFull{$bk}: $bkFull{$bk}";
if (exists $newRef{"$bk $ch:$vs"}) {
$thisRef = $newRef{"$bk $ch:$vs"}
} else {
($thisRef) = ("$bkFull{$bk} $ch:$vs");
}
#say LOG "##\t$bk $ch:$vs, $thisRef";
}
else {
s/(lemma=").*?(\d+).*?("\n)/$1$2$3/;
while (/<w lemma="(\d+)"/g) {
#say LOG $_;
my ($thisSN) = ($hg . $1);
#say LOG "\t\$thisSN: $thisSN, \$SNsInCV{$thisRef}: $SNsInCV{$thisRef}";
if (exists $relevantSNs{$thisSN}) {
$SNsInCV{$thisRef} .= "$thisSN√" unless ($SNsInCV{$thisRef} =~ /\b$thisSN\b/);
}
#say LOG ">\t\$thisSN: $thisSN, \$SNsInCV{$thisRef}: $SNsInCV{$thisRef}";
}
}
}
close IN;
}
my %temp;
foreach my $oldRef (sort keys %SNsInCV) {
if (exists $newRef{$oldRef}) {
$temp{$newRef{$oldRef}} = $SNsInCV{$oldRef};
delete $SNsInCV{$oldRef};
}
}
foreach my $changedRef (sort keys %temp) {
$SNsInCV{$changedRef} = $temp{$changedRef};
say LOG "\$SNsInCV{$changedRef}: $SNsInCV{$changedRef}";
}
}
sub ProcessEachVerse {
say "Processing ULB";
say LOG "sub ProcessEachVerse called";
foreach my $key (sort keys %ref) {
my ($thisRef) = ($ref{$key});
say LOG "$thisRef\t\$SNsInCV{$thisRef}: $SNsInCV{$thisRef}\t\$deleteNum{$thisRef}: $deleteNum{$thisRef}, \$specifiedEntries{$thisRef}: $specifiedEntries{$thisRef}";
($SNsInCV{$thisRef}) = DeleteObviatedSNs($SNsInCV{$thisRef}, $deleteNum{$thisRef});
say LOG "\t\$SNsInCV{$thisRef}: $SNsInCV{$thisRef}";
my $processSequence = "$specifiedEntries{$thisRef}√$SNsInCV{$thisRef}";
$processSequence =~ s/√+/√/g;
$processSequence =~ s/^√+//;
say LOG "\t\t\$processSequence: $processSequence\n";
ExecuteProcessSequence($thisRef, $processSequence, $ULBtext{$thisRef});
}
}
sub DeleteUnneededSNs {
my ($ref) = (@_);
say LOG "\$ref: $ref, \$SNsInCV{$ref}: $SNsInCV{$ref}, \$deleteNum{$ref}: $deleteNum{$ref}";
my @delArray = split /√/, $deleteNum{$ref};
foreach my $del (@delArray) {
$SNsInCV{$ref} =~ s/$del√//;
}
say LOG "\$ref: $ref, \$SNsInCV{$ref}: $SNsInCV{$ref}, \$deleteNum{$ref}: $deleteNum{$ref}";
}
sub AssigntWPages {
my ($ref, $SNs) = (@_);
my ($tempText, $sequence, $results) = ($ULBtext{$ref}, "", "");
say LOG "\$ref: $ref, \$tempText: $tempText\n\$SNsInCV{$ref}: $SNsInCV{$ref}, \$specifiedEntries{$ref}: $specifiedEntries{$ref}";
my @searchArray = split /√/, $SNs;
foreach my $thisSN (@searchArray) {
my $found;
#say LOG "\$thisSN: $thisSN\t$entriesThisSN{$thisSN}";
my @entries = split /, /, $entriesThisSN{$thisSN};
#say LOG "\t\@entries: @entries";
@entries = SortSearchEntriesArray(@entries);
#say LOG "\t\t@entries";
foreach my $thisEntry (@entries) {
while ($thisEntry =~ s/^(.*) \.\.\. (.*)/($1)\\b(.*?)\\b($2)/) {}
if ($thisEntry =~ /\(\.\*\?\)/ && $tempText =~ s/\b($thisEntry)\b/$3/i) {
say LOG "\n===\n$thisSN |$thisEntry| is found in first test.\n===";
$outString .= "[$thisEntry]($pagesThisEntry{$thisEntry})\n";
say LOG $outString . "\n===" . $tempText;
$found = 1;
goto Breakout;
} elsif ($tempText =~ s/\b($thisEntry)[^\w']//i || $tempText =~ s/\b($thisEntry)["']//i || $tempText =~ s/["']($thisEntry)\b//i) {
say LOG "\n===\n$thisSN |$thisEntry| is found in second test.\n===";
$outString .= "[$thisEntry]($pagesThisEntry{$thisEntry})\n";
say LOG $outString . "\n" . $tempText;
$found = 1;
goto Breakout;
} else {
say LOG "\$thisEntry $thisEntry is not found in\n|$ref|";
}
}
$sequence = $specifiedEntries{$ref};
$sequence =~ s/[GH]\d+≈([^≈]*)≈([^≈√]*)/$1\t$2/g;
Breakout:
unless ($found) {
say MISSING "$ref $thisSN";
say LOG "Breakout: \$ref: $ref\t\$thisSN: $thisSN"
}
next if $found;
}
return ($sequence, $results);
}
sub Adjust {
my ($snsOld, $ref, $snsNew, $addToSnsNew) = ($_[0], $_[1], "", "");
say LOG "\$specifiedText{$ref}: $specifiedText{$ref}\n\$snsOld: |$snsOld|";
my (%tempEntries);
#say LOG ">\t\$sns: |$sns|";
#say LOG ">\t\$specifiedText{$ref}: |$specifiedText{$ref}|";
$snsOld =~ s/^ +/ /;
$snsNew =~ s/√$//;
my @oldArray = split / /, $snsOld;
$adjust{$ref} =~ s/√$//;
say LOG "*0*\t\$adjust{$ref}: $adjust{$ref}";
my @preadjustments = split /√/, $adjust{$ref};
foreach my $adjustment (@preadjustments) {
say LOG "<><>\t\$adjustment: >$adjustment<";
if ($adjustment =~ /([^\t]*)\t\|\|$/) { # delete this from list to look for
my $found = $1;
$snsOld =~ s/\b$found\b ?//;
say LOG "*1*\t\$found: $found should be deleted from \$snsOld: $snsOld";
} elsif ($adjustment =~ /^(\d+)\t(\d+)$/) { # change to specified number
my ($found1, $found2) = ($1, $2);
#$addToSnsNew .= "$1\[$2\] ";
$snsOld =~ s/$found1√//g;
$snsNew .= "$found2√" unless $snsNew =~ /\b$found2\b /;
say LOG "*2*\t\$snsOld: $snsOld\n\$snsNew: $snsNew";
} elsif ($adjustment =~ /^\|\|\t(.*)/) { # add this to list to look for
my $adj = $1;
say LOG "*3*\t\$adjustment: $adjustment, \$adj: $adj";
if ($adj =~ /([^\t]*)\t([^\t]*)/) {
$snsNew = "[$1]($pagesThisEntry{$2})"
} else {
$snsNew .= "$adj "
}
} elsif ($adjustment =~ /^(\d+)\t([\w\. \-\']+)$/) { # add specified word
my ($found1, $found2) = ($1, $2);
#$addToSnsNew .= "$1\[$2\] ";
if ($snsOld =~ s/\b$found1\b/$found1\[$found2\]/) {
say LOG "*\t4a\t\$snsOld: $snsOld";
if ($snsOld =~ s/^(.+√)($found1\[$found2\]√)/$2$1/) {}
elsif ($snsOld =~ s/^(.+√)($found1\[$found2\]$)/$2√$1/) {}
}
else {
$snsNew .= "${found1}√";
$snsOld .= s/\b$found1\b//;
say LOG "*\t4b\t\$snsNew: $snsNew";
$snsNew =~ s/\b$found1\b/$found1\[$found2\]/;
$snsNew =~ s/^(.+√)($found1\[$found2\]√)/$2$1/;
$snsNew =~ s/^(.+√)($found1\[$found2\]$)/$2√$1/;
}
$snsOld =~ s/ {2,}/ /;
$snsOld =~ s/√$//;
say LOG "*4*\t\$snsOld: $snsOld\n\$snsNew: $snsNew";
} elsif (($adjustment =~ /^(\d+)\t([\/\d\w]+)$/)) { # add specified page
#$addToSnsNew .= "$1\{$2\} "
my ($found1, $found2) = ($1, $2);
$snsOld =~ s/$found1/$found1\($found2\)/;
say LOG "*5*\t\t\$snsOld: $snsOld";
}
}
$snsOld =~ s/^√//;
$snsOld =~ s/√+/√/g;
say LOG "\$snsNew: >$snsNew<\n\$snsNew+\$snsOld: >$snsNew< >$snsOld<";
$snsNew = "$snsNew√$snsOld";
say LOG "*5*\t*\t\$snsNew: |$snsNew|";
while ($snsNew =~ s/(\d+) (\d+\[[^\[\]]*\])/$2$1/) {}
say LOG "*5*\t**\t\$snsNew: |$snsNew|";
$snsNew =~ s/√+/√/g;
$snsNew =~ s/^[ √]//;
say LOG "*6*\t\t\$snsNew: |$snsNew|";
return $snsNew;
}
sub Output {
say "Outputting";
#say LOG "Output subRoutine called";
foreach my $key (sort keys %ref) {
my %donePages;
my $thisRef = $ref{$key};
#print LOG "\$key: $key\t\$thisRef: $thisRef\t";
#say LOG "\$SNsInCV{$thisRef}: |$SNsInCV{$thisRef}|";
$SNsInCV{$thisRef} =~ s/^ +//;
$SNsInCV{$thisRef} =~ s/ +$//;
$SNsInCV{$thisRef} =~ s/ {2,}/ /;
#say LOG "\$SNsInCV{$thisRef}: |$SNsInCV{$thisRef}|";
#say LOG "\$listOfPages{$thisRef}: |$listOfPages{$thisRef}|";
#$listOfPages{$thisRef} =~ s/^ +//;
#$listOfPages{$thisRef} =~ s/ +$//;
#$listOfPages{$thisRef} =~ s/ {2,}/ /;
#say LOG "\$listOfPages{$thisRef}: |$listOfPages{$thisRef}|";
my @array = split /\n/, $listOfPages{$thisRef};
#say LOG "\@array: |@array|";
my @sorted =
sort sort { lc($a) cmp lc($b) }
@array;
#say LOG "\@sorted: |@sorted|";
$" = "\n";
$listOfPages{$thisRef} = "@sorted";
say LOG "\$listOfPages{$thisRef}: $listOfPages{$thisRef}\n\$checkPages{$thisRef}: $checkPages{$thisRef}";
#say OUT "$thisRef: $listOfPages{$thisRef}\n";
$checkPages{$thisRef} =~ s/^ +//;
$checkPages{$thisRef} =~ s/ +$//;
$checkPages{$thisRef} =~ s/ {2,}/ /;
$checkPages{$thisRef} =~ s/ \|\|//;
say LOG "\$checkPages{$thisRef}:\t|$checkPages{$thisRef}|";
my @checkArray = split / /, $checkPages{$thisRef};
shift @sorted;
#say LOG "\@checkArray: |@checkArray|";
#say LOG "\@sorted: |@sorted|";
#shift @sorted;
#say LOG "\@sorted: |@sorted|";
foreach my $slice (@sorted) {
#print LOG "\$slice: $slice\t";
$slice =~ s/\[.*?\]\((.*?)\)/$1/;
#say LOG "\t\$slice: $slice";
$donePages{$slice} = $slice;
#say LOG "\t\$donePages{$slice}: $donePages{$slice}"
}
#say LOG "\@checkArray: |@checkArray|";
foreach my $slice (@checkArray) {
#say LOG "\$slice: $slice";
unless (exists $donePages{$slice}) {
#say LOG "\$thisRef: $thisRef\t\$slice:$slice";
#say MISSING "$thisRef\t$slice\t||";
say MISSING "$thisRef\t$slice";
}
}
}
}
sub Substitute {
foreach my $key (sort keys %pages) {
say LOG "\$key: $key\t\$SNsInCV{$key}: $SNsInCV{$key}";
if (exists $substitutedPages{$key}) {
#say LOG "\$substitutedPages{$key}: $substitutedPages{$key}";
$substitutedPages{$key} =~ s/, $//;
my @array = split /, /, $substitutedPages{$key};
foreach my $slice (@array) {
#say LOG "\$slice: $slice";
if ($slice =~ /([^\t]*)\t([^\t]*)/) {
#say LOG "\n\$key: $key";
my ($old, $new) = ($1, $2);
#say LOG "\$old: >$old<\t\$new: >$new<";
#say LOG "\$SNsInCV{$key}: >>$SNsInCV{$key}<<";
if ($old eq "||") {
$SNsInCV{$key} .= "$new ";
$checkPages{$key} = $SNsInCV{$key};
}
elsif ($new eq "||") {
$SNsInCV{$key} =~ s/$old //;
$checkPages{$key} = $SNsInCV{$key};
}
else {
$SNsInCV{$key} =~ s/$old/$new/;
$checkPages{$key} = $SNsInCV{$key};
}
$SNsInCV{$key} =~ s/ \|\|//g;
#say LOG "\$SNsInCV{$key}: >>>$SNsInCV{$key}<<<";
}
}
} else {
$checkPages{$key} = $SNsInCV{$key};
}
say LOG "<>\t\$key: $key\t\$SNsInCV{$key}: $SNsInCV{$key}";
}
}
sub ProperOrderOutString {
my @unordered = split /\n/, $_[0];
my ($thisCV, $outS) = ($_[1], "");
my (%orderedSet);
foreach my $thisSet (@unordered) {
say LOG "\t>\t$thisSet";
if ($thisSet =~ /(\[([^\]]*)\])(\([^\)]*\))/) {
my ($ulb, $fileLoc) = ($2, $3);
$ulb =~ s/ \.\.\. /.*?/g;
say LOG "\t>>\t\$ulb: $ulb, \$fileLoc: $fileLoc, \$fullText{$thisCV}\n$fullText{$thisCV}";
if ($ulb =~ /^(.*)\.\.\.(.*)$/) {
my ($found1, $found2, $blank1, $blank2) = ($1, $2, "", "");
while (length $blank1 < length $found1) {$blank1 .= " "}
while (length $blank2 < length $found2) {$blank2 .= " "}
if ($fullText{$thisCV} =~ s/^(.*?)\b$found1\b(.*)\b$found1\b(.*)$/$1$blank1$2$blank2$3/i) {
say LOG "\t>>>\t$fullText{$thisCV}";
my ($order) = (length $1);
$orderedSet{$order} = $thisSet;
say LOG "\t\t\t\$order: $order\t \$orderedSet{$order}: $orderedSet{$order}"
}
} else {
my $blank = "";
while (length $blank < length $ulb) {$blank .= " "}
if ($fullText{$thisCV} =~ s/^(.*?)\b$ulb\b(.*)$/$1$blank$2/i) {
say LOG "\t>>>>\t$fullText{$thisCV}";
my ($order) = (length $1);
$orderedSet{$order} = $thisSet;
say LOG "\$order: $order\t \$orderedSet{$order}: $orderedSet{$order}"
}
}
}
}
foreach my $key (sort {$a<=>$b} keys %orderedSet) {
$outS .= "$orderedSet{$key}\n"
}
return $outS;
}
sub SortSearchEntriesArray {
my @entries = @_;
#say LOG "@entries";
@entries = reverse sort { substr($a,0,1) <=> substr($b,0,1)
|| length($a) <=> length($b)
|| $a <=> $b } @entries;
return @entries;
}
sub PopulateVerse {
}
sub DeleteObviatedSNs {
my ($sns, $toDelete) = @_;
my @sns = split /√/, $sns;
my @delete = split /√/, $toDelete;
foreach my $one (@delete) {
$sns =~ s/^(.*)$one(√)?(.*)$/$1$2$3/;
}
return $sns
}
sub ExecuteProcessSequence {
my ($ref, $sequence, $text) = @_;
my (@sequence) = split /√/, $sequence;
foreach my $candidate (@sequence) {
my @possibleEntries = split /, /, $entriesThisSN{$ref}
# begin searchString with specified SNs
# append other SNs
# Duplicate verse as tempText
# for each SN in searchString
# # find and delete tempText
# sort entries longest to shortest
# for each entry
# if entry found
# foundText and tW{foundText} to workingResults
# find characterNumber in ULB verse to sequencingData
# else record in error file
}
}
__DATA__
OT
gen Genesis
exo Exodus
lev Leviticus
num Numbers
deu Deuteronomy
jos Joshua
jdg Judges
rut Ruth
1sa 1 Samuel
2sa 2 Samuel
1ki 1 Kings
2ki 2 Kings
1ch 1 Chronicles
2ch 2 Chronicles
ezr Ezra
neh Nehemiah
est Esther
job Job
psa Psalms
pro Proverbs
ecc Ecclesiastes
sng Song of Solomon
isa Isaiah
jer Jeremiah
lam Lamentations
ezk Ezekiel
dan Daniel
hos Hosea
jol Joel
amo Amos
oba Obadiah
jon Jonah
mic Micah
nam Nahum
hab Habakkuk
zep Zephaniah
hag Haggai
zec Zechariah
mal Malachi
NT
mat Matthew
mrk Mark
luk Luke
jhn John
act Acts
rom Romans
1co 1 Corinthians
2co 2 Corinthians
gal Galatians
eph Ephesians
php Philippians
col Colossians
1th 1 Thessalonians
2th 2 Thessalonians
1ti 1 Timothy
2ti 2 Timothy
tit Titus
phm Philemon
heb Hebrews
jas James
1pe 1 Peter
2pe 2 Peter
1jn 1 John
2jn 2 John
3jn 3 John
jud Jude
rev Revelation

View File

@ -0,0 +1,673 @@
# Produces list of tWs for each verse by linking MAST_NT to ULB through tWs.
# Disambiguates entries found on more than one tW page.
# Does both Testaments.
# Taken from tWs.from.MAST_NT.2.pl.
# The output from this script is useful for the interleaved PDFs used in MAST.
# This version uses an exception file to handle places where the MAST_NT points to
# a tW page different from that on which the ULB term appears.
# Make sure the correct input file is $ULBfile. Run script.
# Output is in $output file.
# Check the $missing
# file for needed corrections, probably lines needing to be added to the
# $exceptions file.
use 5.12.0;
use File::Slurp;
use File::Find ;
use Cwd ;
use FindBin '$Bin';
use utf8;
#use open IN => ":utf8", OUT => ":utf8";
use open IO => ":utf8";
use List::MoreUtils qw(uniq);
$|=1;
$"="\n";
my ($pwd, $d) = ($Bin, "/");
if ($^O eq "MSWin32") {
$d = "\\";
$pwd =~ s/\//\\/g;
}
my ($udf) = "User_defaults.windows.txt";
if ($^O eq "linux") {$udf = "User_defaults.linux.txt"}
#elsif ($^O eq "darwin") {$udf = "User_defaults.mac.txt"}
elsif ($^O eq "darwin") {$udf = "User_defaults.mac.txt"}
open (my $defaults, "<:utf8", "$pwd${d}User${d}$udf") or die "$pwd${d}User${d}$udf:\n$!";
my ($cv, $ULBfile, $exceptions, $missing, $output, $workFile) =
("00000",
"$Bin${d}Temp${d}Extract.txt",
"$Bin${d}Exceptions${d}Exceptions.txt",
"$Bin${d}Output${d}Entries_not_handled.txt",
"$Bin${d}Output${d}tWs_for_PDF.txt",
"$Bin${d}User${d}tW_work.txt",
);
my ($sourceFile, $topTwDir, $textEditor, $repoPath, $topSourceLangDir, $topOTSourceLangDir, $topNTSourceLangDir, $twPath, $outString);
my (%entries, %text, %ref, %order, %pages, %listOfPages, %sourcePage, %checkPages, %foundPages, %substitutedPages,
%specifiedText, %realPage, %checkPage, %workEntries, %vsn, %StrongNum, %fullText, %ulbOrder, %bkAbr, %bkFull, %relevantSNs,
%SNsInCV, %entriesThisSN, %entriesThisPage, %pagesThisSN, %pagesThisEntry, %adjust, %deleteNum, %newRef, %specifiedEntries, %finalOutput, %searchSequence);
my ($book, $testament);
my (@fileList);
# ==============================
chdir("$pwd");
open LOG, ">:utf8", "Logs${d}Exc_log.log" or die "\$log: Logs${d}Exc_log.log: $!";
open OUT, ">:utf8", $output or die "$!";
open MISSING, ">$missing" or die "$!";
while (<DATA>) {
chomp;
if (/([^\t]*)\t([^\t]*)/) {
$bkAbr{$2} = $1;
$bkFull{$1} = $2;
} elsif (/^..$/) {
$testament = $&;
}
}
GetUserDefaults();
GetULBBooksToProcess();
ReadExceptions();
close LOG;
open LOG, ">:utf8", "Logs${d}tW_pairs.log" or die "Logs${d}tW_pairs.log: $!";
PairtWEntriesTotWPageAndUniqSNs();
close LOG;
open LOG, ">:utf8", "Logs${d}tWs_from_MAST.log" or die "Logs${d}tWs_from_MAST.log: $!";
ReadLinkedSNs();
LinkULBtoCV();
LinkSNsToULBtextViaEntries();
#Output();
close MISSING;
close OUT;
close LOG;
if ($^O eq "darwin") {system ("$textEditor $missing")}
print "\n\tDone.\n\n";
# ==============================
sub GetUserDefaults {
open (my $defaults, "<:utf8", "User${d}$udf") or die "User${d}$udf:\n$!";
while (my $thisLine = <$defaults>) {
chomp $thisLine;
if ($thisLine =~ /^Text editor: (.*)$/) {
$textEditor = $1;
if ($^O eq "darwin") {
$textEditor = "open -a $textEditor"
}
} elsif ($thisLine =~ /^Repository directory: (.*)$/) {
$repoPath = $1;
#say $repoPath; die;
} elsif ($thisLine =~ /^translationWords path: (.*)$/) {
$twPath = $1;
}
}
#say LOG "\$textEditor: $textEditor\n\$repoPath: $repoPath";
die "No text editor found" if $textEditor eq "";
die "No path to repo found" if $repoPath eq "";
($topTwDir, $topOTSourceLangDir, $topNTSourceLangDir) = ("$repoPath${d}$twPath", "$repoPath${d}MAST_HB", "$repoPath${d}OGNT");
close $defaults;
}
sub GetULBBooksToProcess {
say LOG "GetULBBooksToProcess on \$workFile: $workFile";
open (my $file, "<:utf8", "$workFile") or die "$workFile:\n$!";
while (my $line = <$file>) {
chomp $line;
#say LOG "\t$line";
if ($line =~ /^([^#][^\n\t]*)\t[^\n\t]*\t([^\n\t]*)\t[^\n\t]*$/) {
my ($bn, $bx) = ($1, $2);
my ($this_bk) = $bn . "-" . uc $bx;
if ($bn > 39) {
$topSourceLangDir = $topNTSourceLangDir
} else {
$topSourceLangDir = $topOTSourceLangDir
}
$sourceFile = "$topSourceLangDir${d}$this_bk.xml";
push @fileList, $sourceFile;
}
}
close $file;
#say LOG "\@fileList:\n@fileList";
}
sub ReadExceptions {
say "Reading exceptions";
say LOG "ReadExceptions from \$exceptions: $exceptions";
open (my $file, "<:utf8", "$exceptions") or die "$exceptions:\n$!";
#say LOG "<><>";
while (my $line = <$file>) {
chomp $line;
#say LOG $line;
my $rf;
if ($line =~ /^([^#\n][^\t\n]*)\t([GH]\d+)\t\|\|$/) {
my ($oldNew) = ($2);
$rf = $1;
#say LOG "\$line: $line, \$rf: $rf, \$oldNew: $oldNew";
($deleteNum{$rf}) .= "$oldNew√";
$specifiedText{$rf} = 1;
#say LOG "\$specifiedText{$rf}: $specifiedText{$rf}";
} elsif ($line =~ /^([^#\n][^\t\n]*)\t(\d+\t\d+)/) {
my ($oldNew) = ($2);
$rf = $1;
#say LOG "\$line: $line, \$rf: $rf, \$oldNew: $oldNew";
($adjust{$rf}) .= "$oldNew√";
$specifiedText{$rf} = 1;
}
elsif ($line =~ /^([^#\n\t][^\t\n]*)\t(.\d+)\t([^\t\n]*)\t([^\t\n]*)$/) {
my ($rf, $sn, $snippet, $page) = ($1, $2, $3, $4);
#say LOG "\$line: $line, \$rf: $rf, \$oldNew: $oldNew";
$specifiedEntries{$rf} .= "$sn≈$snippet≈$page√";
$specifiedText{$rf} = 1;
}
}
close $file;
}
sub PairtWEntriesTotWPageAndUniqSNs {
say "Pairing tW entries with tW pages and unique Strong's numbers";
my (@filesToRun, @relevantSNs) = ();
my $filePattern = '*.md' ;
find (sub {push @filesToRun, $File::Find::name if (m/^(.*)$filePattern$/)}, $topTwDir) ;
@filesToRun = sort @filesToRun;
#say LOG "\@filesToRun: @filesToRun";
foreach my $file (@filesToRun) {
print ".";
$file =~ s/\//\\/g unless $^O eq "linux" || $^O eq "darwin";
my ($thisList, $shortFile) = ("", $file);
$shortFile =~ s/^\Q$topTwDir${d}\E//;
$shortFile =~ s/\.md$//;
$shortFile =~ s/\Q$d\E/,/;
#say "|$shortFile|"; die;
#if ($shortFile =~ /^(kt|names)/) {
#my $fileText = read_file("$file", binmode => 'utf8');
my $fileText = read_file("$file", binmode => 'utf8');
$fileText =~ s/$/\n/;
#say LOG "=====\n$file:\n$fileText";
if ($fileText =~ /## Forms Found in the English ULB:\n\n([^\n]*)\n/s) {
$thisList = $1;
#say LOG "$file: $thisList";
$thisList =~ s/[\r\n]+$//;
my @ULBEntries = split /, /, $thisList;
my @tempArray = split /, /, $thisList;
foreach my $ULB_entry(@ULBEntries) {
if ($pagesThisEntry{$ULB_entry} =~ /^.+$/) {
say LOG "*!!*"
}
$pagesThisEntry{$ULB_entry} .= "$shortFile, ";
$sourcePage{$ULB_entry} = $shortFile;
say LOG "\$pagesThisEntry{$ULB_entry}: $pagesThisEntry{$ULB_entry}, \$sourcePage{$ULB_entry}: $sourcePage{$ULB_entry}, \$shortFile: $shortFile"
}
}
if ($fileText =~ /Strong's([^\n]*)\n/) {
my $SNs = $1;
while ($SNs =~ s/([GH]\d*)//) {
my $thisSN = $1;
print LOG "! $shortFile ! $thisSN !";
push @relevantSNs, $thisSN;
$entriesThisSN{$thisSN} .= "$thisList, ";
$pagesThisSN{$thisSN} .= "$shortFile, ";
say LOG " \$pagesThisSN{$thisSN}: $pagesThisSN{$thisSN}"
}
}
}
@relevantSNs = uniq(@relevantSNs);
foreach (@relevantSNs) {
$relevantSNs{$_} = "$_";
}
say "";
#say LOG "====";
#say LOG "====";
foreach my $thisSN (sort keys %entriesThisSN) {
$entriesThisSN{$thisSN} =~ s/, $//;
#say LOG "\$entriesThisSN{$thisSN}: $entriesThisSN{$thisSN}, \$pagesThisSN{$thisSN}: $pagesThisSN{$thisSN}"
}
}
sub LinkULBtoCV {
say "Linking ULB to chapter and verse";
say LOG "LinkULBtoCV, \$ULBfile: $ULBfile";
open IN, "$ULBfile" or die "$ULBfile: $!";
while (<IN>) {
if (/^([^\t]*)\t(.*)$/) {
# "\$_:$_";
$cv ++;
($text{$1}, $fullText{$1}) = ($2, $2);
$ref{$cv} = $1;
$order{$1} = $cv;
}
#say LOG "First \$ref{$cv}: $ref{$cv}\nGetULBBooksToProcess\n\$text{$ref{$cv}}: $text{$ref{$cv}}";
}
close IN;
foreach my $key (sort keys %ref) {
$text{$ref{$key}} =~ s/[^\w]+$//;
$text{$ref{$key}} .= " q";
# say LOG "\$key: $key:, \$ref{$key}: $ref{$key}, \$text{$ref{$key}}: $text{$ref{$key}}"
}
}
sub ReadLinkedSNs {
say "Reading linked Strong's numbers";
my ($flag, $sourceFile) = ("","");
foreach $sourceFile (@fileList) {
my $hg = "G";
if ($sourceFile =~ /(..)-...\.xml$/) {
$hg = "H" if ($1 < 40);
}
say LOG "opening \$sourceFile: $sourceFile";
open IN, "$sourceFile" or die "$sourceFile can't be opened\n\n";
my ($thisBook, $thisChap, $thisVers, $thisRef);
my (@pages);
while (<IN>) {
chomp;
if (/<verse osisID="([^\.]*).(\d+).(\d+)">/) {
#say LOG "$thisRef: \$SNsInCV{$thisRef}: $SNsInCV{$thisRef}";# Making sure previous verse is populated
my ($bk, $ch, $vs) = ($1, $2, $3);
#say LOG "#>\t$bk $ch:$vs, \$bkFull{$bk}: $bkFull{$bk}";
if (exists $newRef{"$bk $ch:$vs"}) {
$thisRef = $newRef{"$bk $ch:$vs"}
} else {
($thisRef) = ("$bkFull{$bk} $ch:$vs");
}
#say LOG "##\t$bk $ch:$vs, $thisRef";
}
else {
s/(lemma=").*?(\d+).*?("\n)/$1$2$3/;
while (/<w lemma="(\d+)"/g) {
#say LOG $_;
my ($thisSN) = ($hg . $1);
#say LOG "\t\$thisSN: $thisSN, \$SNsInCV{$thisRef}: $SNsInCV{$thisRef}";
if (exists $relevantSNs{$thisSN}) {
$SNsInCV{$thisRef} .= "$thisSN√" unless ($SNsInCV{$thisRef} =~ /\b$thisSN\b/);
}
#say LOG ">\t\$thisSN: $thisSN, \$SNsInCV{$thisRef}: $SNsInCV{$thisRef}";
}
}
}
close IN;
}
my %temp;
foreach my $oldRef (sort keys %SNsInCV) {
if (exists $newRef{$oldRef}) {
$temp{$newRef{$oldRef}} = $SNsInCV{$oldRef};
delete $SNsInCV{$oldRef};
}
}
foreach my $changedRef (sort keys %temp) {
$SNsInCV{$changedRef} = $temp{$changedRef};
say LOG "\$SNsInCV{$changedRef}: $SNsInCV{$changedRef}";
}
}
sub LinkSNsToULBtextViaEntries {
say "Linking Strong's numbers to ULB text via tW page entries";
say LOG "sub LinkSNsToULBtextViaEntries called";
foreach my $thisRef (sort keys %ref) {
my $tempRef = $ref{$thisRef};
say LOG "\nLinkSNsToULBtextViaEntries: \$thisRef: $thisRef\t\$ref{$thisRef}: $ref{$thisRef}, \$tempRef: $tempRef, \$specifiedText{$ref{$thisRef}}: $specifiedText{$ref{$thisRef}}";
if (exists $deleteNum{$tempRef}) {
DeleteUnneededSNs($tempRef);
}
if (exists $specifiedEntries{$tempRef}) {
($searchSequence{$tempRef}, $finalOutput{$tempRef}) = AssigntWPages($tempRef, $SNsInCV{$tempRef});
}
($finalOutput{$tempRef}) = PopulateVerse();
}
}
sub DeleteUnneededSNs {
my ($ref) = (@_);
say LOG "\$ref: $ref, \$SNsInCV{$ref}: $SNsInCV{$ref}, \$deleteNum{$ref}: $deleteNum{$ref}";
my @delArray = split /√/, $deleteNum{$ref};
foreach my $del (@delArray) {
$SNsInCV{$ref} =~ s/$del√//;
}
say LOG "\$ref: $ref, \$SNsInCV{$ref}: $SNsInCV{$ref}, \$deleteNum{$ref}: $deleteNum{$ref}";
}
sub AssigntWPages {
my ($ref, $SNs) = (@_);
my ($tempText, $sequence, $results) = ($text{$ref}, "", "");
say LOG "\$ref: $ref, \$tempText: $tempText\n\$SNsInCV{$ref}: $SNsInCV{$ref}, \$specifiedEntries{$ref}: $specifiedEntries{$ref}";
my @searchArray = split /√/, $SNs;
foreach my $thisSN (@searchArray) {
my $found;
#say LOG "\$thisSN: $thisSN\t$entriesThisSN{$thisSN}";
my @entries = split /, /, $entriesThisSN{$thisSN};
#say LOG "\t\@entries: @entries";
@entries = SortSearchEntriesArray(@entries);
#say LOG "\t\t@entries";
foreach my $thisEntry (@entries) {
while ($thisEntry =~ s/^(.*) \.\.\. (.*)/($1)\\b(.*?)\\b($2)/) {}
if ($thisEntry =~ /\(\.\*\?\)/ && $tempText =~ s/\b($thisEntry)\b/$3/i) {
say LOG "\n===\n$thisSN |$thisEntry| is found in first test.\n===";
$outString .= "[$thisEntry]($pagesThisEntry{$thisEntry})\n";
say LOG $outString . "\n===" . $tempText;
$found = 1;
goto Breakout;
} elsif ($tempText =~ s/\b($thisEntry)[^\w']//i || $tempText =~ s/\b($thisEntry)["']//i || $tempText =~ s/["']($thisEntry)\b//i) {
say LOG "\n===\n$thisSN |$thisEntry| is found in second test.\n===";
$outString .= "[$thisEntry]($pagesThisEntry{$thisEntry})\n";
say LOG $outString . "\n" . $tempText;
$found = 1;
goto Breakout;
} else {
say LOG "\$thisEntry $thisEntry is not found in\n|$ref|";
}
}
$sequence = $specifiedEntries{$ref};
$sequence =~ s/[GH]\d+≈([^≈]*)≈([^≈√]*)/$1\t$2/g;
Breakout:
unless ($found) {
say MISSING "$ref $thisSN";
say LOG "Breakout: \$ref: $ref\t\$thisSN: $thisSN"
}
next if $found;
}
return ($sequence, $results);
}
sub Adjust {
my ($snsOld, $ref, $snsNew, $addToSnsNew) = ($_[0], $_[1], "", "");
say LOG "\$specifiedText{$ref}: $specifiedText{$ref}\n\$snsOld: |$snsOld|";
my (%tempEntries);
#say LOG ">\t\$sns: |$sns|";
#say LOG ">\t\$specifiedText{$ref}: |$specifiedText{$ref}|";
$snsOld =~ s/^ +/ /;
$snsNew =~ s/√$//;
my @oldArray = split / /, $snsOld;
$adjust{$ref} =~ s/√$//;
say LOG "*0*\t\$adjust{$ref}: $adjust{$ref}";
my @preadjustments = split /√/, $adjust{$ref};
foreach my $adjustment (@preadjustments) {
say LOG "<><>\t\$adjustment: >$adjustment<";
if ($adjustment =~ /([^\t]*)\t\|\|$/) { # delete this from list to look for
my $found = $1;
$snsOld =~ s/\b$found\b ?//;
say LOG "*1*\t\$found: $found should be deleted from \$snsOld: $snsOld";
} elsif ($adjustment =~ /^(\d+)\t(\d+)$/) { # change to specified number
my ($found1, $found2) = ($1, $2);
#$addToSnsNew .= "$1\[$2\] ";
$snsOld =~ s/$found1√//g;
$snsNew .= "$found2√" unless $snsNew =~ /\b$found2\b /;
say LOG "*2*\t\$snsOld: $snsOld\n\$snsNew: $snsNew";
} elsif ($adjustment =~ /^\|\|\t(.*)/) { # add this to list to look for
my $adj = $1;
say LOG "*3*\t\$adjustment: $adjustment, \$adj: $adj";
if ($adj =~ /([^\t]*)\t([^\t]*)/) {
$snsNew = "[$1]($pagesThisEntry{$2})"
} else {
$snsNew .= "$adj "
}
} elsif ($adjustment =~ /^(\d+)\t([\w\. \-\']+)$/) { # add specified word
my ($found1, $found2) = ($1, $2);
#$addToSnsNew .= "$1\[$2\] ";
if ($snsOld =~ s/\b$found1\b/$found1\[$found2\]/) {
say LOG "*\t4a\t\$snsOld: $snsOld";
if ($snsOld =~ s/^(.+√)($found1\[$found2\]√)/$2$1/) {}
elsif ($snsOld =~ s/^(.+√)($found1\[$found2\]$)/$2√$1/) {}
}
else {
$snsNew .= "${found1}√";
$snsOld .= s/\b$found1\b//;
say LOG "*\t4b\t\$snsNew: $snsNew";
$snsNew =~ s/\b$found1\b/$found1\[$found2\]/;
$snsNew =~ s/^(.+√)($found1\[$found2\]√)/$2$1/;
$snsNew =~ s/^(.+√)($found1\[$found2\]$)/$2√$1/;
}
$snsOld =~ s/ {2,}/ /;
$snsOld =~ s/√$//;
say LOG "*4*\t\$snsOld: $snsOld\n\$snsNew: $snsNew";
} elsif (($adjustment =~ /^(\d+)\t([\/\d\w]+)$/)) { # add specified page
#$addToSnsNew .= "$1\{$2\} "
my ($found1, $found2) = ($1, $2);
$snsOld =~ s/$found1/$found1\($found2\)/;
say LOG "*5*\t\t\$snsOld: $snsOld";
}
}
$snsOld =~ s/^√//;
$snsOld =~ s/√+/√/g;
say LOG "\$snsNew: >$snsNew<\n\$snsNew+\$snsOld: >$snsNew< >$snsOld<";
$snsNew = "$snsNew√$snsOld";
say LOG "*5*\t*\t\$snsNew: |$snsNew|";
while ($snsNew =~ s/(\d+) (\d+\[[^\[\]]*\])/$2$1/) {}
say LOG "*5*\t**\t\$snsNew: |$snsNew|";
$snsNew =~ s/√+/√/g;
$snsNew =~ s/^[ √]//;
say LOG "*6*\t\t\$snsNew: |$snsNew|";
return $snsNew;
}
sub Output {
say "Outputting";
#say LOG "Output subRoutine called";
foreach my $key (sort keys %ref) {
my %donePages;
my $thisRef = $ref{$key};
#print LOG "\$key: $key\t\$thisRef: $thisRef\t";
#say LOG "\$SNsInCV{$thisRef}: |$SNsInCV{$thisRef}|";
$SNsInCV{$thisRef} =~ s/^ +//;
$SNsInCV{$thisRef} =~ s/ +$//;
$SNsInCV{$thisRef} =~ s/ {2,}/ /;
#say LOG "\$SNsInCV{$thisRef}: |$SNsInCV{$thisRef}|";
#say LOG "\$listOfPages{$thisRef}: |$listOfPages{$thisRef}|";
#$listOfPages{$thisRef} =~ s/^ +//;
#$listOfPages{$thisRef} =~ s/ +$//;
#$listOfPages{$thisRef} =~ s/ {2,}/ /;
#say LOG "\$listOfPages{$thisRef}: |$listOfPages{$thisRef}|";
my @array = split /\n/, $listOfPages{$thisRef};
#say LOG "\@array: |@array|";
my @sorted =
sort sort { lc($a) cmp lc($b) }
@array;
#say LOG "\@sorted: |@sorted|";
$" = "\n";
$listOfPages{$thisRef} = "@sorted";
say LOG "\$listOfPages{$thisRef}: $listOfPages{$thisRef}\n\$checkPages{$thisRef}: $checkPages{$thisRef}";
#say OUT "$thisRef: $listOfPages{$thisRef}\n";
$checkPages{$thisRef} =~ s/^ +//;
$checkPages{$thisRef} =~ s/ +$//;
$checkPages{$thisRef} =~ s/ {2,}/ /;
$checkPages{$thisRef} =~ s/ \|\|//;
say LOG "\$checkPages{$thisRef}:\t|$checkPages{$thisRef}|";
my @checkArray = split / /, $checkPages{$thisRef};
shift @sorted;
#say LOG "\@checkArray: |@checkArray|";
#say LOG "\@sorted: |@sorted|";
#shift @sorted;
#say LOG "\@sorted: |@sorted|";
foreach my $slice (@sorted) {
#print LOG "\$slice: $slice\t";
$slice =~ s/\[.*?\]\((.*?)\)/$1/;
#say LOG "\t\$slice: $slice";
$donePages{$slice} = $slice;
#say LOG "\t\$donePages{$slice}: $donePages{$slice}"
}
#say LOG "\@checkArray: |@checkArray|";
foreach my $slice (@checkArray) {
#say LOG "\$slice: $slice";
unless (exists $donePages{$slice}) {
#say LOG "\$thisRef: $thisRef\t\$slice:$slice";
#say MISSING "$thisRef\t$slice\t||";
say MISSING "$thisRef\t$slice";
}
}
}
}
sub Substitute {
foreach my $key (sort keys %pages) {
say LOG "\$key: $key\t\$SNsInCV{$key}: $SNsInCV{$key}";
if (exists $substitutedPages{$key}) {
#say LOG "\$substitutedPages{$key}: $substitutedPages{$key}";
$substitutedPages{$key} =~ s/, $//;
my @array = split /, /, $substitutedPages{$key};
foreach my $slice (@array) {
#say LOG "\$slice: $slice";
if ($slice =~ /([^\t]*)\t([^\t]*)/) {
#say LOG "\n\$key: $key";
my ($old, $new) = ($1, $2);
#say LOG "\$old: >$old<\t\$new: >$new<";
#say LOG "\$SNsInCV{$key}: >>$SNsInCV{$key}<<";
if ($old eq "||") {
$SNsInCV{$key} .= "$new ";
$checkPages{$key} = $SNsInCV{$key};
}
elsif ($new eq "||") {
$SNsInCV{$key} =~ s/$old //;
$checkPages{$key} = $SNsInCV{$key};
}
else {
$SNsInCV{$key} =~ s/$old/$new/;
$checkPages{$key} = $SNsInCV{$key};
}
$SNsInCV{$key} =~ s/ \|\|//g;
#say LOG "\$SNsInCV{$key}: >>>$SNsInCV{$key}<<<";
}
}
} else {
$checkPages{$key} = $SNsInCV{$key};
}
say LOG "<>\t\$key: $key\t\$SNsInCV{$key}: $SNsInCV{$key}";
}
}
sub ProperOrderOutString {
my @unordered = split /\n/, $_[0];
my ($thisCV, $outS) = ($_[1], "");
my (%orderedSet);
foreach my $thisSet (@unordered) {
say LOG "\t>\t$thisSet";
if ($thisSet =~ /(\[([^\]]*)\])(\([^\)]*\))/) {
my ($ulb, $fileLoc) = ($2, $3);
$ulb =~ s/ \.\.\. /.*?/g;
say LOG "\t>>\t\$ulb: $ulb, \$fileLoc: $fileLoc, \$fullText{$thisCV}\n$fullText{$thisCV}";
if ($ulb =~ /^(.*)\.\.\.(.*)$/) {
my ($found1, $found2, $blank1, $blank2) = ($1, $2, "", "");
while (length $blank1 < length $found1) {$blank1 .= " "}
while (length $blank2 < length $found2) {$blank2 .= " "}
if ($fullText{$thisCV} =~ s/^(.*?)\b$found1\b(.*)\b$found1\b(.*)$/$1$blank1$2$blank2$3/i) {
say LOG "\t>>>\t$fullText{$thisCV}";
my ($order) = (length $1);
$orderedSet{$order} = $thisSet;
say LOG "\t\t\t\$order: $order\t \$orderedSet{$order}: $orderedSet{$order}"
}
} else {
my $blank = "";
while (length $blank < length $ulb) {$blank .= " "}
if ($fullText{$thisCV} =~ s/^(.*?)\b$ulb\b(.*)$/$1$blank$2/i) {
say LOG "\t>>>>\t$fullText{$thisCV}";
my ($order) = (length $1);
$orderedSet{$order} = $thisSet;
say LOG "\$order: $order\t \$orderedSet{$order}: $orderedSet{$order}"
}
}
}
}
foreach my $key (sort {$a<=>$b} keys %orderedSet) {
$outS .= "$orderedSet{$key}\n"
}
return $outS;
}
sub SortSearchEntriesArray {
my @entries = @_;
#say LOG "@entries";
@entries = reverse sort { substr($a,0,1) <=> substr($b,0,1)
|| length($a) <=> length($b)
|| $a <=> $b } @entries;
return @entries;
}
sub PopulateVerse {
}
__DATA__
OT
gen Genesis
exo Exodus
lev Leviticus
num Numbers
deu Deuteronomy
jos Joshua
jdg Judges
rut Ruth
1sa 1 Samuel
2sa 2 Samuel
1ki 1 Kings
2ki 2 Kings
1ch 1 Chronicles
2ch 2 Chronicles
ezr Ezra
neh Nehemiah
est Esther
job Job
psa Psalms
pro Proverbs
ecc Ecclesiastes
sng Song of Solomon
isa Isaiah
jer Jeremiah
lam Lamentations
ezk Ezekiel
dan Daniel
hos Hosea
jol Joel
amo Amos
oba Obadiah
jon Jonah
mic Micah
nam Nahum
hab Habakkuk
zep Zephaniah
hag Haggai
zec Zechariah
mal Malachi
NT
mat Matthew
mrk Mark
luk Luke
jhn John
act Acts
rom Romans
1co 1 Corinthians
2co 2 Corinthians
gal Galatians
eph Ephesians
php Philippians
col Colossians
1th 1 Thessalonians
2th 2 Thessalonians
1ti 1 Timothy
2ti 2 Timothy
tit Titus
phm Philemon
heb Hebrews
jas James
1pe 1 Peter
2pe 2 Peter
1jn 1 John
2jn 2 John
3jn 3 John
jud Jude
rev Revelation

View File

@ -48,11 +48,11 @@ my ($cv, $ULBfile, $exceptions, $missing, $output, $workFile) =
"$Bin${d}User${d}tW_work.txt",
);
my ($sourceFile, $topTwDir, $textEditor, $repoPath, $topSourceLangDir, $topOTSourceLangDir, $topNTSourceLangDir, $twPath);
my ($sourceFile, $topTwDir, $textEditor, $repoPath, $topSourceLangDir, $topOTSourceLangDir, $topNTSourceLangDir, $twPath, $outString);
my (%entries, %text, %ref, %order, %pages, %listOfPages, %sourcePage, %checkPages, %foundPages, %substitutedPages,
%specifiedText, %realPage, %checkPage, %workEntries, %vsn, %StrongNum, %fullText, %ulbOrder, %bkAbr, %bkFull, %relevantSNs,
%SNsInCV, %entriesThisSN, %entriesThisPage, %pagesThisSN, %pagesThisEntry, %adjust, %deleteNum, %newRef, %specifiedEntries);
%SNsInCV, %entriesThisSN, %entriesThisPage, %pagesThisSN, %pagesThisEntry, %adjust, %deleteNum, %newRef, %specifiedEntries, %finalOutput, %searchSequence);
my ($book, $testament);
my (@fileList);
@ -297,12 +297,12 @@ sub ReadLinkedSNs {
s/(lemma=").*?(\d+).*?("\n)/$1$2$3/;
while (/<w lemma="(\d+)"/g) {
#say LOG $_;
my ($thisNum) = ($hg . $1);
#say LOG "\t\$thisNum: $thisNum, \$SNsInCV{$thisRef}: $SNsInCV{$thisRef}";
if (exists $relevantSNs{$thisNum}) {
$SNsInCV{$thisRef} .= "$thisNum√" unless ($SNsInCV{$thisRef} =~ /\b$thisNum\b/);
my ($thisSN) = ($hg . $1);
#say LOG "\t\$thisSN: $thisSN, \$SNsInCV{$thisRef}: $SNsInCV{$thisRef}";
if (exists $relevantSNs{$thisSN}) {
$SNsInCV{$thisRef} .= "$thisSN√" unless ($SNsInCV{$thisRef} =~ /\b$thisSN\b/);
}
#say LOG ">\t\$thisNum: $thisNum, \$SNsInCV{$thisRef}: $SNsInCV{$thisRef}";
#say LOG ">\t\$thisSN: $thisSN, \$SNsInCV{$thisRef}: $SNsInCV{$thisRef}";
}
}
}
@ -326,14 +326,14 @@ sub LinkSNsToULBtextViaEntries {
say LOG "sub LinkSNsToULBtextViaEntries called";
foreach my $thisRef (sort keys %ref) {
my $tempRef = $ref{$thisRef};
say LOG "\nLinkSNsToULBtextViaEntries: \$thisRef: $thisRef\t\$ref{$thisRef}: $ref{$thisRef}, \$specifiedText{$ref{$thisRef}}: $specifiedText{$ref{$thisRef}}";
say LOG "\nLinkSNsToULBtextViaEntries: \$thisRef: $thisRef\t\$ref{$thisRef}: $ref{$thisRef}, \$tempRef: $tempRef, \$specifiedText{$ref{$thisRef}}: $specifiedText{$ref{$thisRef}}";
if (exists $deleteNum{$tempRef}) {
DeleteUnneededSNs($tempRef);
}
if (exists $specifiedEntries{$tempRef}) {
AssigntWPages($tempRef, $SNsInCV{$tempRef});
($searchSequence{$tempRef}, $finalOutput{$tempRef}) = AssigntWPages($tempRef, $SNsInCV{$tempRef});
}
ReorderEntries($tempRef);
($finalOutput{$tempRef}) = PopulateVerse();
}
}
@ -346,25 +346,48 @@ sub DeleteUnneededSNs {
}
say LOG "\$ref: $ref, \$SNsInCV{$ref}: $SNsInCV{$ref}, \$deleteNum{$ref}: $deleteNum{$ref}";
}
sub AssigntWPages {
my ($ref, $SNs) = (@_);
my $tempText = $text{$ref};
say LOG "\$ref: $ref, \$SNsInCV{$ref}: $SNsInCV{$ref}, \$specifiedEntries{$ref}: $specifiedEntries{$ref}";
my ($tempText, $sequence, $results) = ($text{$ref}, "", "");
say LOG "\$ref: $ref, \$tempText: $tempText\n\$SNsInCV{$ref}: $SNsInCV{$ref}, \$specifiedEntries{$ref}: $specifiedEntries{$ref}";
my @searchArray = split /√/, $SNs;
foreach my $thisSN (@searchArray) {
say LOG "\$thisSN: $thisSN\t$entriesThisSN{$thisSN}";
my $found;
#say LOG "\$thisSN: $thisSN\t$entriesThisSN{$thisSN}";
my @entries = split /, /, $entriesThisSN{$thisSN};
#say LOG "\t\@entries: @entries";
@entries = SortSearchEntriesArray(@entries);
#say LOG "\t\t@entries";
foreach my $thisEntry (@entries) {
if ($tempText =~ s/$thisEntry//) {
SortArray();
}
while ($thisEntry =~ s/^(.*) \.\.\. (.*)/($1)\\b(.*?)\\b($2)/) {}
if ($thisEntry =~ /\(\.\*\?\)/ && $tempText =~ s/\b($thisEntry)\b/$3/i) {
say LOG "\n===\n$thisSN |$thisEntry| is found in first test.\n===";
$outString .= "[$thisEntry]($pagesThisEntry{$thisEntry})\n";
say LOG $outString . "\n===" . $tempText;
$found = 1;
goto Breakout;
} elsif ($tempText =~ s/\b($thisEntry)[^\w']//i || $tempText =~ s/\b($thisEntry)["']//i || $tempText =~ s/["']($thisEntry)\b//i) {
say LOG "\n===\n$thisSN |$thisEntry| is found in second test.\n===";
$outString .= "[$thisEntry]($pagesThisEntry{$thisEntry})\n";
say LOG $outString . "\n" . $tempText;
$found = 1;
goto Breakout;
} else {
say LOG "\$thisEntry $thisEntry is not found in\n|$ref|";
}
}
$sequence = $specifiedEntries{$ref};
$sequence =~ s/[GH]\d+≈([^≈]*)≈([^≈√]*)/$1\t$2/g;
Breakout:
unless ($found) {
say MISSING "$ref $thisSN";
say LOG "Breakout: \$ref: $ref\t\$thisSN: $thisSN"
}
next if $found;
}
return ($sequence, $results);
}
sub ReorderEntries {}
sub Adjust {
my ($snsOld, $ref, $snsNew, $addToSnsNew) = ($_[0], $_[1], "", "");
say LOG "\$specifiedText{$ref}: $specifiedText{$ref}\n\$snsOld: |$snsOld|";
@ -568,12 +591,16 @@ sub ProperOrderOutString {
return $outS;
}
sub SortArray {
my @sorted = reverse sort { substr($a,0,1) <=> substr($b,0,1)
|| length($a) <=> length($b)
|| $a <=> $b }
@array;
sub SortSearchEntriesArray {
my @entries = @_;
#say LOG "@entries";
@entries = reverse sort { substr($a,0,1) <=> substr($b,0,1)
|| length($a) <=> length($b)
|| $a <=> $b } @entries;
return @entries;
}
sub PopulateVerse {
}
__DATA__
OT