Update after new exceptions file made

This commit is contained in:
Henry Whitney 2020-06-03 17:02:16 -04:00
parent 92c7b7ca6e
commit cdc8d56f19
11 changed files with 129239 additions and 528 deletions

View File

@ -0,0 +1,69 @@
use 5.12.0;
use File::Slurp;
use File::Find ;
use Cwd ;
use FindBin '$Bin';
my $topDir = "/Users/Henry/Documents/WACS/en_tw";
my (%page);
my (@filesToRun, @entryList, @workList) = ();
my $filePattern = '.md' ;
open LOG, ">$Bin/Logs/log.txt" or die "$!";
find( sub { push @filesToRun, $File::Find::name if ( m/^(.*)$filePattern$/ ) }, $topDir) ;
say "Pairing entries";
foreach my $file ( @filesToRun ) {
my ($entry, $shortFile) = ("", $file);
my (@theseEntries);
#print LOG "$shortFile\t";
$shortFile =~ s/^[^\n]*\/([^\n\/\\\.]*)\.md$/$1/;
#say LOG "$shortFile: ";
my $fileText = read_file("$file", binmode => 'utf8');
#say LOG $fileText;
if ($fileText =~ /^# ([^\n]*)\n/) {
my $entries = $1;
#say LOG "$entries";
@theseEntries = split /, /, $entries;
push (@entryList, @theseEntries);
}
#say LOG "$shortFile: @entryList";
foreach my $slice (@theseEntries) {
$page{$slice} = $shortFile;
say LOG "$slice\t$page{$slice}";
}
#say LOG "\n";
}
say LOG "<>";
@workList = reverse sort { substr($a,0,1) <=> substr($b,0,1)
|| length($a) <=> length($b)
|| $a <=> $b }
@entryList;
say "Reading exceptions file";
my $fileText = read_file("/Users/Henry/Documents/WACS/Tips_and_Hacks/MAST_tW_PDF_Updater/FilesForUpdates/Exceptions/Exceptions.txt", binmode => 'utf8');
$" = "\n";
say LOG "===Entry list===\n@entryList\n===";
foreach my $entry (@workList) {
say $entry;
$fileText =~ s/(\n[^#\t\n]*\t[^\t\n]*\t$entry)\n/$1\t$page{$entry}\n/g;
}
say LOG "\n===Work list===\n@workList\n===";
open(OUT, ">/Users/Henry/Documents/WACS/Tips_and_Hacks/MAST_tW_PDF_Updater/FilesForUpdates/Output/New.exceptions.txt") or die "$1";
say OUT $fileText;
close OUT;
close LOG;
say "Done.";

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -34479,6 +34479,7 @@ Job 27:4 1696 ||
Job 27:4 1897 ||
Job 27:4 8193 ||
Job 27:5 5493 ||
Job 27:5 6663 ||
Job 27:6 2388 ||
Job 27:6 3117 ||
Job 27:6 7503 ||

View File

@ -1,525 +0,0 @@
|6
34
68
113
116
120
157
183
214
215
227
241
251
270
281
312
314
319
325
374
376
398
399
400
481
539
560
606
724
753
830
926
927
935
954
995
1004
1086
1101
1121
1123
1129
1156
1167
1214
1238
1245
1270
1320
1323
1363
1364
1387
1389
1396
1397
1419
1431
1438
1505
1524
1530
1540
1543
1580
1644
1692
1696
1697
1730
1777
1817
1819
1820
1826
1869
1870
1934
1935
1980
1984
1993
1995
2015
2094
2106
2114
2142
2145
2146
2152
2166
2220
2233
2236
2256
2257
2280
2308
2319
2342
2370
2372
2376
2380
2388
2428
2436
2470
2490
2491
2502
2505
2506
2534
2560
2600
2651
2654
2656
2658
2672
2691
2706
2764
2790
2803
2860
2864
2896
2975
2976
3021
3027
3028
3034
3046
3068
3069
3083
3117
3118
3148
3179
3201
3205
3206
3212
3220
3225
3243
3254
3293
3318
3330
3335
3358
3365
3366
3381
3384
3426
3427
3463
3470
3477
3493
3498
3499
3513
3533
3557
3559
3582
3611
3615
3627
3644
3667
3671
3708
3709
3730
3760
3772
3782
3789
3791
3820
3885
3899
3908
3947
3956
3966
3978
4005
4060
4101
4116
4127
4136
4150
4161
4241
4242
4279
4307
4325
4334
4339
4349
4385
4399
4406
4456
4481
4483
4486
4487
4557
4585
4591
4592
4605
4672
4725
4740
4744
4758
4800
4830
4832
4917
4941
4960
4994
5027
5035
5045
5046
5058
5062
5066
5071
5074
5104
5117
5128
5142
5183
5186
5193
5197
5221
5232
5307
5315
5324
5337
5344
5362
5375
5413
5414
5428
5437
5445
5458
5459
5462
5493
5518
5564
5590
5592
5608
5631
5640
5642
5647
5674
5675
5705
5732
5750
5769
5782
5791
5794
5800
5810
5824
5857
5869
5892
5922
5927
5945
5953
5957
5975
5977
5980
6030
6031
6083
6086
6106
6113
6118
6121
6148
6153
6213
6235
6242
6256
6285
6293
6310
6327
6381
6418
6437
6438
6440
6466
6471
6475
6504
6524
6555
6565
6567
6600
6601
6607
6679
6696
6726
6738
6743
6779
6793
6887
6901
6921
6924
6957
6963
6965
6966
6994
7019
7043
7050
7069
7110
7121
7123
7126
7138
7144
7194
7198
7200
7218
7223
7225
7227
7230
7235
7236
7264
7272
7291
7307
7308
7311
7339
7342
7368
7381
7386
7399
7423
7451
7453
7455
7462
7468
7469
7494
7522
7560
7576
7580
7588
7592
7604
7605
7606
7623
7637
7641
7646
7651
7655
7657
7665
7673
7725
7737
7751
7760
7761
7786
7814
7817
7832
7857
7896
7909
7921
7931
7934
7936
7962
7970
7971
7973
7981
7992
7993
7999
8003
8027
8040
8081
8085
8086
8104
8130
8133
8138
8145
8147
8175
8193
8217
8245
8248
8254
8354
8389
8394
8421
8426
8446
8462
8503
8548
8552
8562
8610
8651
5414
6612
7069
7225
2580
3212
3381
4390
5307
1870
7272
4116
2219
5869
1167
3671
7339
1993
5892
5042
5186
6544
14
935
7722
227
6030
4672
398
8085
6310
8104
376
1696
6213
7743
753
3254
7922
120
8172
3474
3474
6555
6443
6086
7834
3868
3027
7725
4279
3426
2790
7378
2600
1580
|
$ARGV[0]: 5475

View File

@ -0,0 +1,131 @@
use 5.12.0;
use File::Slurp;
use File::Find ;
use Cwd ;
my (%long);
my (@topDirs) = ('/Users/Henry/Documents/WACS/MAST_HB', '/Users/Henry/Documents/WACS/OGNT');
my $outText;
open LOG, ">Logs/log.txt" or die;
open(OUT, ">Output/Lemmas.txt") or die "$!";
GetBookNames();
ProcessFiles();
say OUT $outText;
close OUT;
close LOG;
say "Done.";
sub ProcessFiles {
foreach my $topDir (@topDirs) {
say $topDir;
my @filesToRun = ();
my $filePattern = '*.xml' ;
find( sub { push @filesToRun, $File::Find::name if ( m/^(.*)$filePattern$/ ) }, $topDir) ;
@filesToRun = sort @filesToRun;
foreach my $file ( @filesToRun ) {
say $file;
open(INPUT, "$file") or die "$!";
while (<INPUT>) {
chomp;
if (/<verse osisID="(.*)\.(\d{1,3})\.(\d{1,3})">/) {
my ($thisBk, $thisCh, $thisVs) = ($1, $2, $3);
$thisBk = $long{$thisBk};
$outText .= "\n$thisBk $thisCh:$thisVs: ";
}
elsif (/lemma="[^\d]*(\d{1,4})[^\d]*"/) {
my $thisLemma = $1;
$outText .= "$thisLemma, "
}
}
close INPUT;
}
}
$outText =~ s/, $//g;
$outText =~ s/^\n//;
}
sub GetBookNames {
while (<DATA>) {
chomp;
if (/^([^\t\n]*)\t([^\t\n]*)$/) {
$long{$1} = $2
}
}
}
__DATA__
gen Genesis
exo Exodus
lev Leviticus
num Numbers
deu Deuteronomy
jos Joshua
jdg Judges
rut Ruth
1sa 1 Samuel
2sa 2 Samuel
1ki 1 Kings
2ki 2 Kings
1ch 1 Chronicles
2ch 2 Chronicles
ezr Ezra
neh Nehemiah
est Esther
job Job
psa Psalms
pro Proverbs
ecc Ecclesiastes
sng Song of Solomon
isa Isaiah
jer Jeremiah
lam Lamentations
ezk Ezekiel
dan Daniel
hos Hosea
jol Joel
amo Amos
oba Obadiah
jon Jonah
mic Micah
nam Nahum
hab Habakkuk
zep Zephaniah
hag Haggai
zec Zechariah
mal Malachi
mat Matthew
mrk Mark
luk Luke
jhn John
act Acts
rom Romans
1co 1 Corinthians
2co 2 Corinthians
gal Galatians
eph Ephesians
php Philippians
col Colossians
1th 1 Thessalonians
2th 2 Thessalonians
1ti 1 Timothy
2ti 2 Timothy
tit Titus
phm Philemon
heb Hebrews
jas James
1pe 1 Peter
2pe 2 Peter
1jn 1 John
2jn 2 John
3jn 3 John
jud Jude
rev Revelation

View File

@ -25,7 +25,7 @@ my ($whatami, $inFile);
my ($book, $chap, $vers, $text, $outText, $newV, $newC, $newB, $outFile, $usfmText, $metathesis, $repoPath, $ulbPath);
open(LOG, ">:utf8", "$Bin${d}Logs${d}log.log") or die "$Bin${d}${d}Logs${d}log.log:\n$!";
say LOG "hi";
my ($udf) = "User_defaults.windows.txt";
if ($^O eq "linux") {$udf = "User_defaults.linux.txt"}
elsif ($^O eq "darwin") {$udf = "User_defaults.mac.txt"}

View File

@ -0,0 +1,62 @@
# Adds Synonyms and Related Words section and
# Forms Found in the English ULB section
# to tW pages
use 5.12.0;
use File::Slurp;
use File::Find ;
use Cwd ;
use utf8;
#use open IN => ":utf8", OUT => ":utf8";
use open IO => ":utf8";
open LOG, ">Logs/log.log";
my $topDir = "/Users/Henry/Documents/WACS/W_Q_Restructure/bible";
my $topOutDir = "/Users/Henry/Documents/WACS/W_Q_Restructure_new/bible";
my @filesToRun = ();
my $filePattern = '*.md' ;
find( sub { push @filesToRun, $File::Find::name if ( m/^(.*)$filePattern$/ ) }, $topDir) ;
foreach my $file ( @filesToRun ) {
say $file;
my $fileText = read_file("$file", binmode => 'utf8');
my $outText = Process($fileText);
Output($file, $outText);
}
close LOG;
say "Done.";
# =====================
sub Process {
my $text = $_[0];
my ($entries, $keyWord, $bulk, $forms);
if ($text =~ /^# ([^\n]*)\n/) {
$entries = $1;
}
if ($text =~ /^# (([^\n,]*)(\n|,))/) {
$keyWord = $2
}
if ($text =~ /(## (Facts|Definition):.*)$/s) {
$bulk = $1
}
my @forms = split /, /, $entries;
@forms = sort @forms;
$forms = join(', ', @forms);
$text = "# $keyWord\n\n## Synonyms and Related Words:\n\n$forms\n\n$bulk\n\n## Forms Found in the English ULB\n\n$forms\n\n\n\n";
while ($text =~ s/\n{3,}/\n\n/g) {}
#$text =~ s/\n+$/\n/;
return $text;
}
sub Output {
my ($OutFile, $text) = ($_[0], $_[1]);
$OutFile =~ s/$topDir/$topOutDir/;
open(OUT, ">$OutFile") or die $!;
print OUT $text;
close OUT
}

View File

@ -10,13 +10,13 @@ HTML browser: /Applications/Firefox.app
Repository directory: /Users/Henry/Documents/WACS
translationNotes path: en_tn
translationWords path: bible.en_tw
translationWords path: bible.en_tw.kt
Unlocked Literal Bible path: en_ulb
# translationNotes path: gl_.*_tn
# translationWords path: gl_.*_bible.en_tw
# Unlocked Literal Bible path: gl_.*_ulb
Hebrew Bible XML directory: MAST_HB
Greek Bible XML directory: MAST_NT
Greek Bible XML directory: OGNT
===============

View File

@ -0,0 +1,719 @@
# Produces list of tWs for each verse by linking MAST_NT to ULB through tWs.
# Disambiguates entries found on more than one tW page.
# Does both Testaments.
# Taken from tWs.from.MAST_NT.2.pl.
# The output from this script is useful for the interleaved PDFs used in MAST.
# This version uses an exception file to handle places where the MAST_NT points to
# a tW page different from that on which the ULB term appears.
# Make sure the correct input file is $ULBfile. Run script.
# Output is in $output file.
# Check the $missing
# file for needed corrections, probably lines needing to be added to the
# $exceptions file.
use 5.12.0;
use File::Slurp;
use File::Find ;
use Cwd ;
use FindBin '$Bin';
use utf8;
#use open IN => ":utf8", OUT => ":utf8";
use open IO => ":utf8";
use List::MoreUtils qw(uniq);
$|=1;
$"="\n";
my ($pwd, $d) = ($Bin, "/");
if ($^O eq "MSWin32") {
$d = "\\";
$pwd =~ s/\//\\/g;
}
my ($udf) = "User_defaults.windows.txt";
if ($^O eq "linux") {$udf = "User_defaults.linux.txt"}
#elsif ($^O eq "darwin") {$udf = "User_defaults.mac.txt"}
elsif ($^O eq "darwin") {$udf = "User_defaults.mac.txt"}
open (my $defaults, "<:utf8", "$pwd${d}User${d}$udf") or die "$pwd${d}User${d}$udf:\n$!";
my ($cv, $ULBfile, $exceptions, $missing, $output, $workFile) =
("00000",
"$Bin${d}Temp${d}Extract.txt",
"$Bin${d}Exceptions${d}Exceptions.txt",
"$Bin${d}Output${d}Entries_not_handled.txt",
"$Bin${d}Output${d}tWs_for_PDF.txt",
"$Bin${d}User${d}tW_work.txt",
);
my ($sourceFile, $topTwDir, $textEditor, $repoPath, $topSourceLangDir, $topOTSourceLangDir, $topNTSourceLangDir);
my (%entries, %text, %ref, %order, %pages, %listOfPages, %sourcePage, %checkPages, %foundPages, %substitutedPages,
%specifiedText, %realPage, %checkPage, %workEntries, %vsn, %StrongNum, %fullText, %ulbOrder, %bkAbr, %bkFull, %relevantSNs,
%SNsInCV, %entriesThisSN, %entriesThisPage, %pagesThisSN, %pageThisEntry, %adjust, %newRef);
my ($book, $testament);
my (@fileList);
# ==============================
chdir("$pwd");
open LOG, ">:utf8", "Logs${d}Exc_log.log" or die "\$log: Logs${d}Exc_log.log: $!";
open OUT, ">:utf8", $output or die "$!";
open MISSING, ">$missing" or die "$!";
while (<DATA>) {
chomp;
if (/([^\t]*)\t([^\t]*)/) {
$bkAbr{$2} = $1;
$bkFull{$1} = $2;
} elsif (/^..$/) {
$testament = $&;
}
}
GetUserDefaults();
GetULBBooksToProcess();
#ReadExceptions();
#close LOG;
#open LOG, ">:utf8", "Logs${d}tW_pairs.log" or die "Logs${d}tW_pairs.log: $!";
#PairtWEntriesTotWPageAndUniqSNs();
#close LOG;
#open LOG, ">:utf8", "Logs${d}tWs_from_MAST.log" or die "Logs${d}tWs_from_MAST.log: $!";
#ReadLinkedSNs();
#LinkULBtoCV();
#LinkSNsToULBtextViaEntries();
#Output();
close MISSING;
close OUT;
close LOG;
if ($^O eq "darwin") {system ("$textEditor $missing")}
print "\n\tDone.\n\n";
# ==============================
sub GetUserDefaults {
open (my $defaults, "<:utf8", "User${d}$udf") or die "User${d}$udf:\n$!";
while (my $thisLine = <$defaults>) {
chomp $thisLine;
if ($thisLine =~ /^Text editor: (.*)$/) {
$textEditor = $1;
if ($^O eq "darwin") {
$textEditor = "open -a $textEditor"
}
} elsif ($thisLine =~ /^Repository directory: (.*)$/) {
$repoPath = $1;
#say $repoPath; die;
}
}
#say LOG "\$textEditor: $textEditor\n\$repoPath: $repoPath";
die "No text editor found" if $textEditor eq "";
die "No path to repo found" if $repoPath eq "";
($topTwDir, $topOTSourceLangDir, $topNTSourceLangDir) = ("$repoPath${d}en_tw${d}bible", "$repoPath${d}MAST_HB", "$repoPath${d}OGNT");
close $defaults;
}
sub GetULBBooksToProcess {
say LOG "GetULBBooksToProcess on \$workFile: $workFile";
open (my $file, "<:utf8", "$workFile") or die "$workFile:\n$!";
while (my $line = <$file>) {
chomp $line;
#say LOG "\t$line";
if ($line =~ /^([^#][^\n\t]*)\t[^\n\t]*\t([^\n\t]*)\t[^\n\t]*$/) {
my ($bn, $bx) = ($1, $2);
my ($this_bk) = $bn . "-" . uc $bx;
if ($bn > 39) {
$topSourceLangDir = $topNTSourceLangDir
} else {
$topSourceLangDir = $topOTSourceLangDir
}
$sourceFile = "$topSourceLangDir${d}$this_bk.xml";
push @fileList, $sourceFile;
}
}
close $file;
say LOG "\@fileList:\n@fileList";
}
sub ReadExceptions {
say "Reading exceptions";
say LOG "ReadExceptions from \$exceptions: $exceptions";
open (my $file, "<:utf8", "$exceptions") or die "$exceptions:\n$!";
while (my $line = <$file>) {
chomp $line;
my $rf;
if ($line =~ /^([^#\n][^\t\n]*)\t([^\t\n]*\t[^\t\n]*)/) {
my ($oldNew) = ($2);
$rf = $1;
#say LOG "\$line: $line, \$rf: $rf, \$oldNew: $oldNew";
($adjust{$rf}) .= "$oldNew√";
$specifiedText{$rf} = 1;
}
}
foreach my $key (sort keys %adjust) {
say LOG "\$key: $key: \$adjust{$key}: $adjust{$key}"
}
close $file;
}
sub PairtWEntriesTotWPageAndUniqSNs {
say "Pairing tW entries with tW pages and unique Strong's numbers";
my (@filesToRun, @relevantSNs) = ();
my $filePattern = '*.md' ;
find (sub {push @filesToRun, $File::Find::name if (m/^(.*)$filePattern$/)}, $topTwDir) ;
@filesToRun = sort @filesToRun;
#say LOG "\@filesToRun: @filesToRun";
foreach my $file (@filesToRun) {
print ".";
$file =~ s/\//\\/g unless $^O eq "linux" || $^O eq "darwin";
my ($thisList, $shortFile) = ("", $file);
$shortFile =~ s/^\Q$topTwDir${d}\E//;
$shortFile =~ s/\.md$//;
$shortFile =~ s/\Q$d\E/,/;
#say "|$shortFile|"; die;
#if ($shortFile =~ /^(kt|names)/) {
#my $fileText = read_file("$file", binmode => 'utf8');
open IN, $file or die "$!";
while (<IN>) {
if (/^# ([^\n]*)$/) {
$thisList = $1;
$thisList =~ s/[\r\n]*$//;
#say LOG "\$thisList = |$thisList|";
$thisList =~ s/ \([^\)]*\)//g;
$entriesThisPage{$shortFile} = $thisList;
my @ULBEntries = split /, /, $thisList;
foreach my $ULB_entry(@ULBEntries) {
$pageThisEntry{$ULB_entry} = $shortFile;
say LOG "\$pageThisEntry{$ULB_entry}: $pageThisEntry{$ULB_entry}"
}
my @tempArray = split /, /, $thisList;
foreach my $slice (@tempArray) {
$sourcePage{$slice} = $shortFile;
say LOG "$slice: \$sourcePage{$slice}: $sourcePage{$slice}";
}
}
if (/Strong's(.*)$/) {
my $SNs = $1;
while ($SNs =~ s/[G](\d*)//) {
push @relevantSNs, $1;
$entriesThisSN{$1} .= "$thisList, ";
$pagesThisSN{$1} .= "$shortFile, ";
}
}
@relevantSNs = uniq(@relevantSNs);
foreach (@relevantSNs) {
$relevantSNs{$_} = "$_";
}
}
close IN;
}
say "";
#say LOG "====";
#say LOG "====";
foreach my $thisSN (sort keys %entriesThisSN) {
say LOG "tW entries for $thisSN: $entriesThisSN{$thisSN}"
}
}
sub LinkULBtoCV {
say "Linking ULB to chapter and verse";
say LOG "LinkULBtoCV, \$ULBfile: $ULBfile";
open IN, "$ULBfile" or die "$ULBfile: $!";
while (<IN>) {
if (/^([^\t]*)\t(.*)$/) {
# "\$_:$_";
$cv ++;
($text{$1}, $fullText{$1}) = ($2, $2);
$ref{$cv} = $1;
$order{$1} = $cv;
}
#say LOG "First \$ref{$cv}: $ref{$cv}\nGetULBBooksToProcess\n\$text{$ref{$cv}}: $text{$ref{$cv}}";
}
close IN;
foreach my $key (sort keys %ref) {
$text{$ref{$key}} =~ s/[^\w]+$//;
$text{$ref{$key}} .= " q";
# say LOG "\$key: $key:, \$ref{$key}: $ref{$key}, \$text{$ref{$key}}: $text{$ref{$key}}"
}
}
sub ReadLinkedSNs {
say "Reading linked Strong's numbers";
my ($flag, $sourceFile) = ("","");
foreach $sourceFile (@fileList) {
say LOG "opening \$sourceFile: $sourceFile";
open IN, "$sourceFile" or die "$sourceFile can't be opened\n\n";
my ($thisBook, $thisChap, $thisVers, $thisRef);
my (@pages);
while (<IN>) {
chomp;
if (/<verse osisID="([^\.]*).(\d+).(\d+)">/) {
my ($bk, $ch, $vs) = ($1, $2, $3);
#say LOG "#>\t$bk $ch:$vs, \$bkFull{$bk}: $bkFull{$bk}";
if (exists $newRef{"$bk $ch:$vs"}) {
$thisRef = $newRef{"$bk $ch:$vs"}
} else {
($thisRef) = ("$bkFull{$bk} $ch:$vs");
}
#say LOG "##\t$bk $ch:$vs, $thisRef";
}
else {
s/(lemma=").*?(\d+).*?("\n)/$1$2$3/;
while (/<w lemma="(\d+)"/g) {
#say LOG $_;
my ($thisNum) = ($1);
#say LOG "\t\$thisNum: $thisNum, \$SNsInCV{$thisRef}: $SNsInCV{$thisRef}";
if (exists $relevantSNs{$thisNum}) {
$SNsInCV{$thisRef} .= "$thisNum√" unless ($SNsInCV{$thisRef} =~ /\b$thisNum\b/);
}
#say LOG ">\t\$thisNum: $thisNum, \$SNsInCV{$thisRef}: $SNsInCV{$thisRef}";
}
}
}
close IN;
}
my %temp;
foreach my $oldRef (sort keys %SNsInCV) {
if (exists $newRef{$oldRef}) {
$temp{$newRef{$oldRef}} = $SNsInCV{$oldRef};
delete $SNsInCV{$oldRef};
}
}
foreach my $changedRef (sort keys %temp) {
$SNsInCV{$changedRef} = $temp{$changedRef};
say LOG "\$SNsInCV{$changedRef}: $SNsInCV{$changedRef}";
}
}
sub LinkSNsToULBtextViaEntries {
say "Linking Strong's numbers to ULB text via tW page entries";
say LOG "sub LinkSNsToULBtextViaEntries called";
foreach my $thisRef (sort keys %ref) {
say LOG "\nLinkSNsToULBtextViaEntries: \$thisRef: $thisRef\t\$ref{$thisRef}: $ref{$thisRef}, \$specifiedText{$ref{$thisRef}}: $specifiedText{$ref{$thisRef}}";
(%workEntries, %ulbOrder) = ();
my %workPage;
my ($thisCV, $checkList, $tempString, $outString) = ($ref{$thisRef}, "", "", "");
say OUT "$thisCV:";
my (@allEntries);
$listOfPages{$thisCV} = "";
#say LOG "\$SNsInCV{$thisCV}: |$SNsInCV{$thisCV}|\n$text{$thisCV}";
say LOG "\t\$SNsInCV{$thisCV}: >$SNsInCV{$thisCV}<";
$SNsInCV{$thisCV} =~ s/√+$//;
$SNsInCV{$thisCV} =~ s/^ +//;
$SNsInCV{$thisCV} =~ s/ +$//;
$SNsInCV{$thisCV} =~ s/ {2,}/ /g;
#say LOG "*\t\$SNsInCV{$thisCV}: >$SNsInCV{$thisCV}<";
if (exists $specifiedText{$thisCV}) {
#say LOG "*\t\$SNsInCV{$thisCV}: $SNsInCV{$thisCV}";
$SNsInCV{$thisCV} = Adjust($SNsInCV{$thisCV}, $thisCV);
$SNsInCV{$thisCV} =~ s/^ +(.*)/$1/;
$SNsInCV{$thisCV} =~ s/(.*) +$/$1/;
$SNsInCV{$thisCV} =~ s/ {2,}/ /g;
$SNsInCV{$thisCV} =~ s/^√+//;
$SNsInCV{$thisCV} =~ s/√{2,}/√/g;
$SNsInCV{$thisCV} =~ s/√ /√/g;
$SNsInCV{$thisCV} =~ s/√$//;
say LOG "**\t\$SNsInCV{$thisCV}: >$SNsInCV{$thisCV}<";
}
#say LOG "<>\t<>\t\$SNsInCV{$thisCV}: |$SNsInCV{$thisCV}|";
say LOG "*7*\t\$SNsInCV{$thisCV}:$SNsInCV{$thisCV}";
# while ($SNsInCV{$thisCV} =~ s/(\[[^\]]*?) ([^\]]*?\] )/$1√$2/) {};
say LOG "*8*\t\$SNsInCV{$thisCV}:$SNsInCV{$thisCV}";
my @tempArray = split /√/, $SNsInCV{$thisCV};
my %alreadyUsed;
my @regArray;
foreach my $slice (@tempArray) {
if ($slice =~ /^(\d*)/) {
my $number = "$1";
push (@regArray, $slice) unless (exists $alreadyUsed{$number});
$alreadyUsed{$number} = $number
}
}
$" = "|\n";
say LOG "*9*\t\$SNsInCV{$thisCV}:$SNsInCV{$thisCV}\n\@regArray: >@regArray<";
foreach my $thisNum (@regArray) {
say LOG "\$thisNum: $thisNum";
my ($found, $specPage);
if ($thisNum =~ /(\d+)\[(.*?)\]/) {
($thisNum) = ($1);
my ($forced_entry_for_display, $forced_entry_for_page, $forced_entry_for_search) = ($2, $2, $2);
unless (exists $pageThisEntry{$forced_entry_for_page}) {
my $try = lc $forced_entry_for_page;
if (exists $pageThisEntry{$try}) {
$forced_entry_for_page = lc $forced_entry_for_page
}
else {
say "!!!\tNo \$pageThisEntry{$try} found for $try\n\$thisNum: $thisNum\t\$forced_entry_for_page: $forced_entry_for_page\n$ref{$thisRef}\t$thisNum\t$forced_entry_for_page";
#die
}
}
say LOG
"*0* \$forced_entry_for_display: $forced_entry_for_display, \$pageThisEntry{$forced_entry_for_page}: $pageThisEntry{$forced_entry_for_page}";
#while ($forced_entry_for_display =~ s/([^ \]]*?)√([^ \]]*?)/$1 $2/) {}
$forced_entry_for_search = lc $forced_entry_for_display;
#while ($forced_entry_for_search =~ s/ ... /\.\*\?/) {}
say LOG
"*A* \$thisNum: >>$thisNum<< \$forced_entry_for_display: >$forced_entry_for_display<,
\$forced_entry_for_search: >>$forced_entry_for_search<<
\$pageThisEntry{$forced_entry_for_page}: $pageThisEntry{$forced_entry_for_page}";
$outString .= "[$forced_entry_for_display]($pageThisEntry{$forced_entry_for_page})\n";
say LOG "\t\t$outString:\n$outString";
if ($forced_entry_for_search =~ /([^,]*) \.\.\. ([^,]*) \.\.\. ([^,]*)/) {
my ($first, $second, $third) = ($1, $2, $3);
$text{$thisCV} =~ s/$first(.*?)$second(.*?)$third/$1 $2/i;
say LOG "\t*\t$text{$thisCV}";
} elsif ($forced_entry_for_search =~ /([^,]*) \.\.\. ([^,]*)/) {
my ($first, $second) = ($1, $2);
$text{$thisCV} =~ s/$first(.*?)$second/$1/i;
say LOG "\t**\t$text{$thisCV}";
} else {
#say "\$text{$thisCV}:\n$text{$thisCV}\n\$forced_entry_for_search: |$forced_entry_for_search|";
$text{$thisCV} =~ s/$forced_entry_for_search//i;
say LOG "\t***\t$text{$thisCV}";
}
next;
} elsif ($thisNum =~ /\d+(\(([\w\/]+)\))/) {
$specPage .= $2;
say LOG "*B*\t\$specPage: $specPage";
} else {
say LOG "*C*\t\$thisNum: $thisNum";
}
say LOG "\t\$specPage: $specPage";
if ($specPage) {
$workEntries{$thisNum} = $entriesThisPage{$specPage};
} else {
$workEntries{$thisNum} = $entriesThisSN{$thisNum};
}
$workEntries{$thisNum} =~ s/, $//;
say LOG "*D*\t\$thisNum: >$thisNum<\t\$workEntries{$thisNum}: >$workEntries{$thisNum}<";
my @beforeArray = split /, /, $workEntries{$thisNum};
my @sortedArray = reverse sort { substr($a,0,1) <=> substr($b,0,1)
|| length($a) <=> length($b)
|| $a <=> $b }
@beforeArray;
$" = "\n\t";
say LOG "*E*\t\@sortedArray: @sortedArray\n\$outString so far:\n$outString\n\$text{$ref{$thisRef}}: $text{$ref{$thisRef}}";
foreach my $entry (@sortedArray) {
my $testEntry = $entry;
print LOG "\$entry: $entry. Becomes ";
while ($testEntry =~ s/^(.*) \.\.\. (.*)/($1)\\b(.*?)\\b($2)/) {}
print LOG "\$testEntry: |$testEntry| ";
if ($testEntry =~ /\(\.\*\?\)/ && $text{$thisCV} =~ s/\b($testEntry)\b/$3/i) {
say LOG "\n===\n$thisNum |$testEntry| is found in first test.\n===";
$outString .= "[$entry]($pageThisEntry{$entry})\n";
say LOG $outString . "\n===" . $text{$thisCV};
$found = 1;
goto Breakout;
} elsif ($text{$thisCV} =~ s/\b($testEntry)[^\w']//i || $text{$thisCV} =~ s/\b($testEntry)["']//i || $text{$thisCV} =~ s/["']($testEntry)\b//i) {
say LOG "\n===\n$thisNum |$testEntry| is found in second test.\n===";
$outString .= "[$entry]($pageThisEntry{$entry})\n";
say LOG $outString . "\n" . $text{$thisCV};
$found = 1;
goto Breakout;
} else {
say LOG "and is not found in\n|$text{$ref{$thisRef}}|";
}
}
Breakout:
unless ($found) {
say MISSING "$thisCV $thisNum";
say LOG "Breakout: \$thisCV: $thisCV\t\$thisNum: $thisNum"
}
next if $found;
}
say LOG "*F*\t\$outString: $outString";
$outString = ProperOrderOutString($outString, $thisCV);
say LOG "Final \$outString:\n\$outString: $outString";
say OUT "$outString";
#say LOG "sub LinkSNsToULBtextViaEntries finished";
}
}
sub Adjust {
my ($snsOld, $ref, $snsNew, $addToSnsNew) = ($_[0], $_[1], "", "");
say LOG "\$specifiedText{$ref}: $specifiedText{$ref}\n\$snsOld: |$snsOld|";
my (%tempEntries);
#say LOG ">\t\$sns: |$sns|";
#say LOG ">\t\$specifiedText{$ref}: |$specifiedText{$ref}|";
$snsOld =~ s/^ +/ /;
$snsNew =~ s/√$//;
my @oldArray = split / /, $snsOld;
$adjust{$ref} =~ s/√$//;
say LOG "*0*\t\$adjust{$ref}: $adjust{$ref}";
my @preadjustments = split /√/, $adjust{$ref};
foreach my $adjustment (@preadjustments) {
say LOG "<><>\t\$adjustment: >$adjustment<";
if ($adjustment =~ /([^\t]*)\t\|\|$/) { # delete this from list to look for
my $found = $1;
$snsOld =~ s/\b$found\b ?//;
say LOG "*1*\t\$found: $found should be deleted from \$snsOld: $snsOld";
} elsif ($adjustment =~ /^(\d+)\t(\d+)$/) { # change to specified number
my ($found1, $found2) = ($1, $2);
#$addToSnsNew .= "$1\[$2\] ";
$snsOld =~ s/$found1√//g;
$snsNew .= "$found2√" unless $snsNew =~ /\b$found2\b /;
say LOG "*2*\t\$snsOld: $snsOld\n\$snsNew: $snsNew";
} elsif ($adjustment =~ /^\|\|\t(.*)/) { # add this to list to look for
my $adj = $1;
say LOG "*3*\t\$adjustment: $adjustment, \$adj: $adj";
if ($adj =~ /([^\t]*)\t([^\t]*)/) {
$snsNew = "[$1]($pageThisEntry{$2})"
} else {
$snsNew .= "$adj "
}
} elsif ($adjustment =~ /^(\d+)\t([\w\. \-\']+)$/) { # add specified word
my ($found1, $found2) = ($1, $2);
#$addToSnsNew .= "$1\[$2\] ";
if ($snsOld =~ s/\b$found1\b/$found1\[$found2\]/) {
say LOG "*\t4a\t\$snsOld: $snsOld";
if ($snsOld =~ s/^(.+√)($found1\[$found2\]√)/$2$1/) {}
elsif ($snsOld =~ s/^(.+√)($found1\[$found2\]$)/$2√$1/) {}
}
else {
$snsNew .= "${found1}√";
$snsOld .= s/\b$found1\b//;
say LOG "*\t4b\t\$snsNew: $snsNew";
$snsNew =~ s/\b$found1\b/$found1\[$found2\]/;
$snsNew =~ s/^(.+√)($found1\[$found2\]√)/$2$1/;
$snsNew =~ s/^(.+√)($found1\[$found2\]$)/$2√$1/;
}
$snsOld =~ s/ {2,}/ /;
$snsOld =~ s/√$//;
say LOG "*4*\t\$snsOld: $snsOld\n\$snsNew: $snsNew";
} elsif (($adjustment =~ /^(\d+)\t([\/\d\w]+)$/)) { # add specified page
#$addToSnsNew .= "$1\{$2\} "
my ($found1, $found2) = ($1, $2);
$snsOld =~ s/$found1/$found1\($found2\)/;
say LOG "*5*\t\t\$snsOld: $snsOld";
}
}
$snsOld =~ s/^√//;
$snsOld =~ s/√+/√/g;
say LOG "\$snsNew: >$snsNew<\n\$snsNew+\$snsOld: >$snsNew< >$snsOld<";
$snsNew = "$snsNew√$snsOld";
say LOG "*5*\t*\t\$snsNew: |$snsNew|";
while ($snsNew =~ s/(\d+) (\d+\[[^\[\]]*\])/$2$1/) {}
say LOG "*5*\t**\t\$snsNew: |$snsNew|";
$snsNew =~ s/√+/√/g;
$snsNew =~ s/^[ √]//;
say LOG "*6*\t\t\$snsNew: |$snsNew|";
return $snsNew;
}
sub Output {
say "Outputting";
#say LOG "Output subRoutine called";
foreach my $key (sort keys %ref) {
my %donePages;
my $thisRef = $ref{$key};
#print LOG "\$key: $key\t\$thisRef: $thisRef\t";
#say LOG "\$SNsInCV{$thisRef}: |$SNsInCV{$thisRef}|";
$SNsInCV{$thisRef} =~ s/^ +//;
$SNsInCV{$thisRef} =~ s/ +$//;
$SNsInCV{$thisRef} =~ s/ {2,}/ /;
#say LOG "\$SNsInCV{$thisRef}: |$SNsInCV{$thisRef}|";
#say LOG "\$listOfPages{$thisRef}: |$listOfPages{$thisRef}|";
#$listOfPages{$thisRef} =~ s/^ +//;
#$listOfPages{$thisRef} =~ s/ +$//;
#$listOfPages{$thisRef} =~ s/ {2,}/ /;
#say LOG "\$listOfPages{$thisRef}: |$listOfPages{$thisRef}|";
my @array = split /\n/, $listOfPages{$thisRef};
#say LOG "\@array: |@array|";
my @sorted =
sort sort { lc($a) cmp lc($b) }
@array;
#say LOG "\@sorted: |@sorted|";
$" = "\n";
$listOfPages{$thisRef} = "@sorted";
say LOG "\$listOfPages{$thisRef}: $listOfPages{$thisRef}\n\$checkPages{$thisRef}: $checkPages{$thisRef}";
#say OUT "$thisRef: $listOfPages{$thisRef}\n";
$checkPages{$thisRef} =~ s/^ +//;
$checkPages{$thisRef} =~ s/ +$//;
$checkPages{$thisRef} =~ s/ {2,}/ /;
$checkPages{$thisRef} =~ s/ \|\|//;
say LOG "\$checkPages{$thisRef}:\t|$checkPages{$thisRef}|";
my @checkArray = split / /, $checkPages{$thisRef};
shift @sorted;
#say LOG "\@checkArray: |@checkArray|";
#say LOG "\@sorted: |@sorted|";
#shift @sorted;
#say LOG "\@sorted: |@sorted|";
foreach my $slice (@sorted) {
#print LOG "\$slice: $slice\t";
$slice =~ s/\[.*?\]\((.*?)\)/$1/;
#say LOG "\t\$slice: $slice";
$donePages{$slice} = $slice;
#say LOG "\t\$donePages{$slice}: $donePages{$slice}"
}
#say LOG "\@checkArray: |@checkArray|";
foreach my $slice (@checkArray) {
#say LOG "\$slice: $slice";
unless (exists $donePages{$slice}) {
#say LOG "\$thisRef: $thisRef\t\$slice:$slice";
#say MISSING "$thisRef\t$slice\t||";
say MISSING "$thisRef\t$slice";
}
}
}
}
sub Substitute {
foreach my $key (sort keys %pages) {
say LOG "\$key: $key\t\$SNsInCV{$key}: $SNsInCV{$key}";
if (exists $substitutedPages{$key}) {
#say LOG "\$substitutedPages{$key}: $substitutedPages{$key}";
$substitutedPages{$key} =~ s/, $//;
my @array = split /, /, $substitutedPages{$key};
foreach my $slice (@array) {
#say LOG "\$slice: $slice";
if ($slice =~ /([^\t]*)\t([^\t]*)/) {
#say LOG "\n\$key: $key";
my ($old, $new) = ($1, $2);
#say LOG "\$old: >$old<\t\$new: >$new<";
#say LOG "\$SNsInCV{$key}: >>$SNsInCV{$key}<<";
if ($old eq "||") {
$SNsInCV{$key} .= "$new ";
$checkPages{$key} = $SNsInCV{$key};
}
elsif ($new eq "||") {
$SNsInCV{$key} =~ s/$old //;
$checkPages{$key} = $SNsInCV{$key};
}
else {
$SNsInCV{$key} =~ s/$old/$new/;
$checkPages{$key} = $SNsInCV{$key};
}
$SNsInCV{$key} =~ s/ \|\|//g;
#say LOG "\$SNsInCV{$key}: >>>$SNsInCV{$key}<<<";
}
}
} else {
$checkPages{$key} = $SNsInCV{$key};
}
say LOG "<>\t\$key: $key\t\$SNsInCV{$key}: $SNsInCV{$key}";
}
}
sub ProperOrderOutString {
my @unordered = split /\n/, $_[0];
my ($thisCV, $outS) = ($_[1], "");
my (%orderedSet);
foreach my $thisSet (@unordered) {
say LOG "\t>\t$thisSet";
if ($thisSet =~ /(\[([^\]]*)\])(\([^\)]*\))/) {
my ($ulb, $fileLoc) = ($2, $3);
$ulb =~ s/ \.\.\. /.*?/g;
say LOG "\t>>\t\$ulb: $ulb, \$fileLoc: $fileLoc, \$fullText{$thisCV}\n$fullText{$thisCV}";
if ($ulb =~ /^(.*)\.\.\.(.*)$/) {
my ($found1, $found2, $blank1, $blank2) = ($1, $2, "", "");
while (length $blank1 < length $found1) {$blank1 .= " "}
while (length $blank2 < length $found2) {$blank2 .= " "}
if ($fullText{$thisCV} =~ s/^(.*?)\b$found1\b(.*)\b$found1\b(.*)$/$1$blank1$2$blank2$3/i) {
say LOG "\t>>>\t$fullText{$thisCV}";
my ($order) = (length $1);
$orderedSet{$order} = $thisSet;
say LOG "\t\t\t\$order: $order\t \$orderedSet{$order}: $orderedSet{$order}"
}
} else {
my $blank = "";
while (length $blank < length $ulb) {$blank .= " "}
if ($fullText{$thisCV} =~ s/^(.*?)\b$ulb\b(.*)$/$1$blank$2/i) {
say LOG "\t>>>>\t$fullText{$thisCV}";
my ($order) = (length $1);
$orderedSet{$order} = $thisSet;
say LOG "\$order: $order\t \$orderedSet{$order}: $orderedSet{$order}"
}
}
}
}
foreach my $key (sort {$a<=>$b} keys %orderedSet) {
$outS .= "$orderedSet{$key}\n"
}
return $outS;
}
__DATA__
OT
gen Genesis
exo Exodus
lev Leviticus
num Numbers
deu Deuteronomy
jos Joshua
jdg Judges
rut Ruth
1sa 1 Samuel
2sa 2 Samuel
1ki 1 Kings
2ki 2 Kings
1ch 1 Chronicles
2ch 2 Chronicles
ezr Ezra
neh Nehemiah
est Esther
job Job
psa Psalms
pro Proverbs
ecc Ecclesiastes
sng Song of Solomon
isa Isaiah
jer Jeremiah
lam Lamentations
ezk Ezekiel
dan Daniel
hos Hosea
jol Joel
amo Amos
oba Obadiah
jon Jonah
mic Micah
nam Nahum
hab Habakkuk
zep Zephaniah
hag Haggai
zec Zechariah
mal Malachi
NT
mat Matthew
mrk Mark
luk Luke
jhn John
act Acts
rom Romans
1co 1 Corinthians
2co 2 Corinthians
gal Galatians
eph Ephesians
php Philippians
col Colossians
1th 1 Thessalonians
2th 2 Thessalonians
1ti 1 Timothy
2ti 2 Timothy
tit Titus
phm Philemon
heb Hebrews
jas James
1pe 1 Peter
2pe 2 Peter
1jn 1 John
2jn 2 John
3jn 3 John
jud Jude
rev Revelation