Update after new exceptions file made
This commit is contained in:
parent
92c7b7ca6e
commit
cdc8d56f19
|
@ -0,0 +1,69 @@
|
|||
use 5.12.0;
|
||||
use File::Slurp;
|
||||
use File::Find ;
|
||||
use Cwd ;
|
||||
use FindBin '$Bin';
|
||||
|
||||
my $topDir = "/Users/Henry/Documents/WACS/en_tw";
|
||||
my (%page);
|
||||
my (@filesToRun, @entryList, @workList) = ();
|
||||
my $filePattern = '.md' ;
|
||||
|
||||
open LOG, ">$Bin/Logs/log.txt" or die "$!";
|
||||
|
||||
find( sub { push @filesToRun, $File::Find::name if ( m/^(.*)$filePattern$/ ) }, $topDir) ;
|
||||
|
||||
say "Pairing entries";
|
||||
|
||||
foreach my $file ( @filesToRun ) {
|
||||
my ($entry, $shortFile) = ("", $file);
|
||||
my (@theseEntries);
|
||||
#print LOG "$shortFile\t";
|
||||
$shortFile =~ s/^[^\n]*\/([^\n\/\\\.]*)\.md$/$1/;
|
||||
#say LOG "$shortFile: ";
|
||||
my $fileText = read_file("$file", binmode => 'utf8');
|
||||
#say LOG $fileText;
|
||||
if ($fileText =~ /^# ([^\n]*)\n/) {
|
||||
my $entries = $1;
|
||||
#say LOG "$entries";
|
||||
@theseEntries = split /, /, $entries;
|
||||
push (@entryList, @theseEntries);
|
||||
}
|
||||
#say LOG "$shortFile: @entryList";
|
||||
foreach my $slice (@theseEntries) {
|
||||
$page{$slice} = $shortFile;
|
||||
say LOG "$slice\t$page{$slice}";
|
||||
}
|
||||
#say LOG "\n";
|
||||
}
|
||||
|
||||
say LOG "<>";
|
||||
|
||||
@workList = reverse sort { substr($a,0,1) <=> substr($b,0,1)
|
||||
|| length($a) <=> length($b)
|
||||
|| $a <=> $b }
|
||||
@entryList;
|
||||
|
||||
say "Reading exceptions file";
|
||||
|
||||
my $fileText = read_file("/Users/Henry/Documents/WACS/Tips_and_Hacks/MAST_tW_PDF_Updater/FilesForUpdates/Exceptions/Exceptions.txt", binmode => 'utf8');
|
||||
|
||||
$" = "\n";
|
||||
say LOG "===Entry list===\n@entryList\n===";
|
||||
|
||||
foreach my $entry (@workList) {
|
||||
say $entry;
|
||||
$fileText =~ s/(\n[^#\t\n]*\t[^\t\n]*\t$entry)\n/$1\t$page{$entry}\n/g;
|
||||
}
|
||||
|
||||
say LOG "\n===Work list===\n@workList\n===";
|
||||
|
||||
open(OUT, ">/Users/Henry/Documents/WACS/Tips_and_Hacks/MAST_tW_PDF_Updater/FilesForUpdates/Output/New.exceptions.txt") or die "$1";
|
||||
|
||||
say OUT $fileText;
|
||||
|
||||
close OUT;
|
||||
|
||||
close LOG;
|
||||
|
||||
say "Done.";
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
|
@ -34479,6 +34479,7 @@ Job 27:4 1696 ||
|
|||
Job 27:4 1897 ||
|
||||
Job 27:4 8193 ||
|
||||
Job 27:5 5493 ||
|
||||
Job 27:5 6663 ||
|
||||
Job 27:6 2388 ||
|
||||
Job 27:6 3117 ||
|
||||
Job 27:6 7503 ||
|
||||
|
|
|
@ -1,525 +0,0 @@
|
|||
|6
|
||||
34
|
||||
68
|
||||
113
|
||||
116
|
||||
120
|
||||
157
|
||||
183
|
||||
214
|
||||
215
|
||||
227
|
||||
241
|
||||
251
|
||||
270
|
||||
281
|
||||
312
|
||||
314
|
||||
319
|
||||
325
|
||||
374
|
||||
376
|
||||
398
|
||||
399
|
||||
400
|
||||
481
|
||||
539
|
||||
560
|
||||
606
|
||||
724
|
||||
753
|
||||
830
|
||||
926
|
||||
927
|
||||
935
|
||||
954
|
||||
995
|
||||
1004
|
||||
1086
|
||||
1101
|
||||
1121
|
||||
1123
|
||||
1129
|
||||
1156
|
||||
1167
|
||||
1214
|
||||
1238
|
||||
1245
|
||||
1270
|
||||
1320
|
||||
1323
|
||||
1363
|
||||
1364
|
||||
1387
|
||||
1389
|
||||
1396
|
||||
1397
|
||||
1419
|
||||
1431
|
||||
1438
|
||||
1505
|
||||
1524
|
||||
1530
|
||||
1540
|
||||
1543
|
||||
1580
|
||||
1644
|
||||
1692
|
||||
1696
|
||||
1697
|
||||
1730
|
||||
1777
|
||||
1817
|
||||
1819
|
||||
1820
|
||||
1826
|
||||
1869
|
||||
1870
|
||||
1934
|
||||
1935
|
||||
1980
|
||||
1984
|
||||
1993
|
||||
1995
|
||||
2015
|
||||
2094
|
||||
2106
|
||||
2114
|
||||
2142
|
||||
2145
|
||||
2146
|
||||
2152
|
||||
2166
|
||||
2220
|
||||
2233
|
||||
2236
|
||||
2256
|
||||
2257
|
||||
2280
|
||||
2308
|
||||
2319
|
||||
2342
|
||||
2370
|
||||
2372
|
||||
2376
|
||||
2380
|
||||
2388
|
||||
2428
|
||||
2436
|
||||
2470
|
||||
2490
|
||||
2491
|
||||
2502
|
||||
2505
|
||||
2506
|
||||
2534
|
||||
2560
|
||||
2600
|
||||
2651
|
||||
2654
|
||||
2656
|
||||
2658
|
||||
2672
|
||||
2691
|
||||
2706
|
||||
2764
|
||||
2790
|
||||
2803
|
||||
2860
|
||||
2864
|
||||
2896
|
||||
2975
|
||||
2976
|
||||
3021
|
||||
3027
|
||||
3028
|
||||
3034
|
||||
3046
|
||||
3068
|
||||
3069
|
||||
3083
|
||||
3117
|
||||
3118
|
||||
3148
|
||||
3179
|
||||
3201
|
||||
3205
|
||||
3206
|
||||
3212
|
||||
3220
|
||||
3225
|
||||
3243
|
||||
3254
|
||||
3293
|
||||
3318
|
||||
3330
|
||||
3335
|
||||
3358
|
||||
3365
|
||||
3366
|
||||
3381
|
||||
3384
|
||||
3426
|
||||
3427
|
||||
3463
|
||||
3470
|
||||
3477
|
||||
3493
|
||||
3498
|
||||
3499
|
||||
3513
|
||||
3533
|
||||
3557
|
||||
3559
|
||||
3582
|
||||
3611
|
||||
3615
|
||||
3627
|
||||
3644
|
||||
3667
|
||||
3671
|
||||
3708
|
||||
3709
|
||||
3730
|
||||
3760
|
||||
3772
|
||||
3782
|
||||
3789
|
||||
3791
|
||||
3820
|
||||
3885
|
||||
3899
|
||||
3908
|
||||
3947
|
||||
3956
|
||||
3966
|
||||
3978
|
||||
4005
|
||||
4060
|
||||
4101
|
||||
4116
|
||||
4127
|
||||
4136
|
||||
4150
|
||||
4161
|
||||
4241
|
||||
4242
|
||||
4279
|
||||
4307
|
||||
4325
|
||||
4334
|
||||
4339
|
||||
4349
|
||||
4385
|
||||
4399
|
||||
4406
|
||||
4456
|
||||
4481
|
||||
4483
|
||||
4486
|
||||
4487
|
||||
4557
|
||||
4585
|
||||
4591
|
||||
4592
|
||||
4605
|
||||
4672
|
||||
4725
|
||||
4740
|
||||
4744
|
||||
4758
|
||||
4800
|
||||
4830
|
||||
4832
|
||||
4917
|
||||
4941
|
||||
4960
|
||||
4994
|
||||
5027
|
||||
5035
|
||||
5045
|
||||
5046
|
||||
5058
|
||||
5062
|
||||
5066
|
||||
5071
|
||||
5074
|
||||
5104
|
||||
5117
|
||||
5128
|
||||
5142
|
||||
5183
|
||||
5186
|
||||
5193
|
||||
5197
|
||||
5221
|
||||
5232
|
||||
5307
|
||||
5315
|
||||
5324
|
||||
5337
|
||||
5344
|
||||
5362
|
||||
5375
|
||||
5413
|
||||
5414
|
||||
5428
|
||||
5437
|
||||
5445
|
||||
5458
|
||||
5459
|
||||
5462
|
||||
5493
|
||||
5518
|
||||
5564
|
||||
5590
|
||||
5592
|
||||
5608
|
||||
5631
|
||||
5640
|
||||
5642
|
||||
5647
|
||||
5674
|
||||
5675
|
||||
5705
|
||||
5732
|
||||
5750
|
||||
5769
|
||||
5782
|
||||
5791
|
||||
5794
|
||||
5800
|
||||
5810
|
||||
5824
|
||||
5857
|
||||
5869
|
||||
5892
|
||||
5922
|
||||
5927
|
||||
5945
|
||||
5953
|
||||
5957
|
||||
5975
|
||||
5977
|
||||
5980
|
||||
6030
|
||||
6031
|
||||
6083
|
||||
6086
|
||||
6106
|
||||
6113
|
||||
6118
|
||||
6121
|
||||
6148
|
||||
6153
|
||||
6213
|
||||
6235
|
||||
6242
|
||||
6256
|
||||
6285
|
||||
6293
|
||||
6310
|
||||
6327
|
||||
6381
|
||||
6418
|
||||
6437
|
||||
6438
|
||||
6440
|
||||
6466
|
||||
6471
|
||||
6475
|
||||
6504
|
||||
6524
|
||||
6555
|
||||
6565
|
||||
6567
|
||||
6600
|
||||
6601
|
||||
6607
|
||||
6679
|
||||
6696
|
||||
6726
|
||||
6738
|
||||
6743
|
||||
6779
|
||||
6793
|
||||
6887
|
||||
6901
|
||||
6921
|
||||
6924
|
||||
6957
|
||||
6963
|
||||
6965
|
||||
6966
|
||||
6994
|
||||
7019
|
||||
7043
|
||||
7050
|
||||
7069
|
||||
7110
|
||||
7121
|
||||
7123
|
||||
7126
|
||||
7138
|
||||
7144
|
||||
7194
|
||||
7198
|
||||
7200
|
||||
7218
|
||||
7223
|
||||
7225
|
||||
7227
|
||||
7230
|
||||
7235
|
||||
7236
|
||||
7264
|
||||
7272
|
||||
7291
|
||||
7307
|
||||
7308
|
||||
7311
|
||||
7339
|
||||
7342
|
||||
7368
|
||||
7381
|
||||
7386
|
||||
7399
|
||||
7423
|
||||
7451
|
||||
7453
|
||||
7455
|
||||
7462
|
||||
7468
|
||||
7469
|
||||
7494
|
||||
7522
|
||||
7560
|
||||
7576
|
||||
7580
|
||||
7588
|
||||
7592
|
||||
7604
|
||||
7605
|
||||
7606
|
||||
7623
|
||||
7637
|
||||
7641
|
||||
7646
|
||||
7651
|
||||
7655
|
||||
7657
|
||||
7665
|
||||
7673
|
||||
7725
|
||||
7737
|
||||
7751
|
||||
7760
|
||||
7761
|
||||
7786
|
||||
7814
|
||||
7817
|
||||
7832
|
||||
7857
|
||||
7896
|
||||
7909
|
||||
7921
|
||||
7931
|
||||
7934
|
||||
7936
|
||||
7962
|
||||
7970
|
||||
7971
|
||||
7973
|
||||
7981
|
||||
7992
|
||||
7993
|
||||
7999
|
||||
8003
|
||||
8027
|
||||
8040
|
||||
8081
|
||||
8085
|
||||
8086
|
||||
8104
|
||||
8130
|
||||
8133
|
||||
8138
|
||||
8145
|
||||
8147
|
||||
8175
|
||||
8193
|
||||
8217
|
||||
8245
|
||||
8248
|
||||
8254
|
||||
8354
|
||||
8389
|
||||
8394
|
||||
8421
|
||||
8426
|
||||
8446
|
||||
8462
|
||||
8503
|
||||
8548
|
||||
8552
|
||||
8562
|
||||
8610
|
||||
8651
|
||||
5414
|
||||
6612
|
||||
7069
|
||||
7225
|
||||
2580
|
||||
3212
|
||||
3381
|
||||
4390
|
||||
5307
|
||||
1870
|
||||
7272
|
||||
4116
|
||||
2219
|
||||
5869
|
||||
1167
|
||||
3671
|
||||
7339
|
||||
1993
|
||||
5892
|
||||
5042
|
||||
5186
|
||||
6544
|
||||
14
|
||||
935
|
||||
7722
|
||||
227
|
||||
6030
|
||||
4672
|
||||
398
|
||||
8085
|
||||
6310
|
||||
8104
|
||||
376
|
||||
1696
|
||||
6213
|
||||
7743
|
||||
753
|
||||
3254
|
||||
7922
|
||||
120
|
||||
8172
|
||||
3474
|
||||
3474
|
||||
6555
|
||||
6443
|
||||
6086
|
||||
7834
|
||||
3868
|
||||
3027
|
||||
7725
|
||||
4279
|
||||
3426
|
||||
2790
|
||||
7378
|
||||
2600
|
||||
1580
|
||||
|
|
||||
$ARGV[0]: 5475
|
|
@ -0,0 +1,131 @@
|
|||
use 5.12.0;
|
||||
use File::Slurp;
|
||||
use File::Find ;
|
||||
use Cwd ;
|
||||
|
||||
my (%long);
|
||||
my (@topDirs) = ('/Users/Henry/Documents/WACS/MAST_HB', '/Users/Henry/Documents/WACS/OGNT');
|
||||
my $outText;
|
||||
|
||||
open LOG, ">Logs/log.txt" or die;
|
||||
open(OUT, ">Output/Lemmas.txt") or die "$!";
|
||||
|
||||
GetBookNames();
|
||||
ProcessFiles();
|
||||
|
||||
say OUT $outText;
|
||||
|
||||
close OUT;
|
||||
close LOG;
|
||||
|
||||
say "Done.";
|
||||
|
||||
sub ProcessFiles {
|
||||
foreach my $topDir (@topDirs) {
|
||||
say $topDir;
|
||||
my @filesToRun = ();
|
||||
my $filePattern = '*.xml' ;
|
||||
find( sub { push @filesToRun, $File::Find::name if ( m/^(.*)$filePattern$/ ) }, $topDir) ;
|
||||
|
||||
@filesToRun = sort @filesToRun;
|
||||
|
||||
foreach my $file ( @filesToRun ) {
|
||||
say $file;
|
||||
open(INPUT, "$file") or die "$!";
|
||||
while (<INPUT>) {
|
||||
chomp;
|
||||
if (/<verse osisID="(.*)\.(\d{1,3})\.(\d{1,3})">/) {
|
||||
my ($thisBk, $thisCh, $thisVs) = ($1, $2, $3);
|
||||
$thisBk = $long{$thisBk};
|
||||
$outText .= "\n$thisBk $thisCh:$thisVs: ";
|
||||
}
|
||||
elsif (/lemma="[^\d]*(\d{1,4})[^\d]*"/) {
|
||||
my $thisLemma = $1;
|
||||
$outText .= "$thisLemma, "
|
||||
}
|
||||
}
|
||||
|
||||
close INPUT;
|
||||
}
|
||||
}
|
||||
$outText =~ s/, $//g;
|
||||
$outText =~ s/^\n//;
|
||||
}
|
||||
|
||||
sub GetBookNames {
|
||||
while (<DATA>) {
|
||||
chomp;
|
||||
if (/^([^\t\n]*)\t([^\t\n]*)$/) {
|
||||
$long{$1} = $2
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
__DATA__
|
||||
gen Genesis
|
||||
exo Exodus
|
||||
lev Leviticus
|
||||
num Numbers
|
||||
deu Deuteronomy
|
||||
jos Joshua
|
||||
jdg Judges
|
||||
rut Ruth
|
||||
1sa 1 Samuel
|
||||
2sa 2 Samuel
|
||||
1ki 1 Kings
|
||||
2ki 2 Kings
|
||||
1ch 1 Chronicles
|
||||
2ch 2 Chronicles
|
||||
ezr Ezra
|
||||
neh Nehemiah
|
||||
est Esther
|
||||
job Job
|
||||
psa Psalms
|
||||
pro Proverbs
|
||||
ecc Ecclesiastes
|
||||
sng Song of Solomon
|
||||
isa Isaiah
|
||||
jer Jeremiah
|
||||
lam Lamentations
|
||||
ezk Ezekiel
|
||||
dan Daniel
|
||||
hos Hosea
|
||||
jol Joel
|
||||
amo Amos
|
||||
oba Obadiah
|
||||
jon Jonah
|
||||
mic Micah
|
||||
nam Nahum
|
||||
hab Habakkuk
|
||||
zep Zephaniah
|
||||
hag Haggai
|
||||
zec Zechariah
|
||||
mal Malachi
|
||||
mat Matthew
|
||||
mrk Mark
|
||||
luk Luke
|
||||
jhn John
|
||||
act Acts
|
||||
rom Romans
|
||||
1co 1 Corinthians
|
||||
2co 2 Corinthians
|
||||
gal Galatians
|
||||
eph Ephesians
|
||||
php Philippians
|
||||
col Colossians
|
||||
1th 1 Thessalonians
|
||||
2th 2 Thessalonians
|
||||
1ti 1 Timothy
|
||||
2ti 2 Timothy
|
||||
tit Titus
|
||||
phm Philemon
|
||||
heb Hebrews
|
||||
jas James
|
||||
1pe 1 Peter
|
||||
2pe 2 Peter
|
||||
1jn 1 John
|
||||
2jn 2 John
|
||||
3jn 3 John
|
||||
jud Jude
|
||||
rev Revelation
|
|
@ -25,7 +25,7 @@ my ($whatami, $inFile);
|
|||
my ($book, $chap, $vers, $text, $outText, $newV, $newC, $newB, $outFile, $usfmText, $metathesis, $repoPath, $ulbPath);
|
||||
|
||||
open(LOG, ">:utf8", "$Bin${d}Logs${d}log.log") or die "$Bin${d}${d}Logs${d}log.log:\n$!";
|
||||
|
||||
say LOG "hi";
|
||||
my ($udf) = "User_defaults.windows.txt";
|
||||
if ($^O eq "linux") {$udf = "User_defaults.linux.txt"}
|
||||
elsif ($^O eq "darwin") {$udf = "User_defaults.mac.txt"}
|
||||
|
|
|
@ -0,0 +1,62 @@
|
|||
# Adds Synonyms and Related Words section and
|
||||
# Forms Found in the English ULB section
|
||||
# to tW pages
|
||||
|
||||
use 5.12.0;
|
||||
use File::Slurp;
|
||||
use File::Find ;
|
||||
use Cwd ;
|
||||
use utf8;
|
||||
#use open IN => ":utf8", OUT => ":utf8";
|
||||
use open IO => ":utf8";
|
||||
|
||||
open LOG, ">Logs/log.log";
|
||||
|
||||
my $topDir = "/Users/Henry/Documents/WACS/W_Q_Restructure/bible";
|
||||
my $topOutDir = "/Users/Henry/Documents/WACS/W_Q_Restructure_new/bible";
|
||||
|
||||
my @filesToRun = ();
|
||||
my $filePattern = '*.md' ;
|
||||
find( sub { push @filesToRun, $File::Find::name if ( m/^(.*)$filePattern$/ ) }, $topDir) ;
|
||||
|
||||
foreach my $file ( @filesToRun ) {
|
||||
say $file;
|
||||
my $fileText = read_file("$file", binmode => 'utf8');
|
||||
my $outText = Process($fileText);
|
||||
Output($file, $outText);
|
||||
}
|
||||
|
||||
close LOG;
|
||||
|
||||
say "Done.";
|
||||
|
||||
# =====================
|
||||
|
||||
sub Process {
|
||||
my $text = $_[0];
|
||||
my ($entries, $keyWord, $bulk, $forms);
|
||||
if ($text =~ /^# ([^\n]*)\n/) {
|
||||
$entries = $1;
|
||||
}
|
||||
if ($text =~ /^# (([^\n,]*)(\n|,))/) {
|
||||
$keyWord = $2
|
||||
}
|
||||
if ($text =~ /(## (Facts|Definition):.*)$/s) {
|
||||
$bulk = $1
|
||||
}
|
||||
my @forms = split /, /, $entries;
|
||||
@forms = sort @forms;
|
||||
$forms = join(', ', @forms);
|
||||
$text = "# $keyWord\n\n## Synonyms and Related Words:\n\n$forms\n\n$bulk\n\n## Forms Found in the English ULB\n\n$forms\n\n\n\n";
|
||||
while ($text =~ s/\n{3,}/\n\n/g) {}
|
||||
#$text =~ s/\n+$/\n/;
|
||||
return $text;
|
||||
}
|
||||
|
||||
sub Output {
|
||||
my ($OutFile, $text) = ($_[0], $_[1]);
|
||||
$OutFile =~ s/$topDir/$topOutDir/;
|
||||
open(OUT, ">$OutFile") or die $!;
|
||||
print OUT $text;
|
||||
close OUT
|
||||
}
|
|
@ -10,13 +10,13 @@ HTML browser: /Applications/Firefox.app
|
|||
Repository directory: /Users/Henry/Documents/WACS
|
||||
|
||||
translationNotes path: en_tn
|
||||
translationWords path: bible.en_tw
|
||||
translationWords path: bible.en_tw.kt
|
||||
Unlocked Literal Bible path: en_ulb
|
||||
# translationNotes path: gl_.*_tn
|
||||
# translationWords path: gl_.*_bible.en_tw
|
||||
# Unlocked Literal Bible path: gl_.*_ulb
|
||||
Hebrew Bible XML directory: MAST_HB
|
||||
Greek Bible XML directory: MAST_NT
|
||||
Greek Bible XML directory: OGNT
|
||||
|
||||
===============
|
||||
|
||||
|
|
|
@ -0,0 +1,719 @@
|
|||
# Produces list of tWs for each verse by linking MAST_NT to ULB through tWs.
|
||||
# Disambiguates entries found on more than one tW page.
|
||||
# Does both Testaments.
|
||||
|
||||
# Taken from tWs.from.MAST_NT.2.pl.
|
||||
|
||||
# The output from this script is useful for the interleaved PDFs used in MAST.
|
||||
# This version uses an exception file to handle places where the MAST_NT points to
|
||||
# a tW page different from that on which the ULB term appears.
|
||||
|
||||
# Make sure the correct input file is $ULBfile. Run script.
|
||||
# Output is in $output file.
|
||||
# Check the $missing
|
||||
# file for needed corrections, probably lines needing to be added to the
|
||||
# $exceptions file.
|
||||
|
||||
use 5.12.0;
|
||||
use File::Slurp;
|
||||
use File::Find ;
|
||||
use Cwd ;
|
||||
use FindBin '$Bin';
|
||||
use utf8;
|
||||
#use open IN => ":utf8", OUT => ":utf8";
|
||||
use open IO => ":utf8";
|
||||
use List::MoreUtils qw(uniq);
|
||||
$|=1;
|
||||
$"="\n";
|
||||
|
||||
my ($pwd, $d) = ($Bin, "/");
|
||||
if ($^O eq "MSWin32") {
|
||||
$d = "\\";
|
||||
$pwd =~ s/\//\\/g;
|
||||
}
|
||||
|
||||
my ($udf) = "User_defaults.windows.txt";
|
||||
if ($^O eq "linux") {$udf = "User_defaults.linux.txt"}
|
||||
#elsif ($^O eq "darwin") {$udf = "User_defaults.mac.txt"}
|
||||
elsif ($^O eq "darwin") {$udf = "User_defaults.mac.txt"}
|
||||
|
||||
open (my $defaults, "<:utf8", "$pwd${d}User${d}$udf") or die "$pwd${d}User${d}$udf:\n$!";
|
||||
|
||||
my ($cv, $ULBfile, $exceptions, $missing, $output, $workFile) =
|
||||
("00000",
|
||||
"$Bin${d}Temp${d}Extract.txt",
|
||||
"$Bin${d}Exceptions${d}Exceptions.txt",
|
||||
"$Bin${d}Output${d}Entries_not_handled.txt",
|
||||
"$Bin${d}Output${d}tWs_for_PDF.txt",
|
||||
"$Bin${d}User${d}tW_work.txt",
|
||||
);
|
||||
|
||||
my ($sourceFile, $topTwDir, $textEditor, $repoPath, $topSourceLangDir, $topOTSourceLangDir, $topNTSourceLangDir);
|
||||
|
||||
my (%entries, %text, %ref, %order, %pages, %listOfPages, %sourcePage, %checkPages, %foundPages, %substitutedPages,
|
||||
%specifiedText, %realPage, %checkPage, %workEntries, %vsn, %StrongNum, %fullText, %ulbOrder, %bkAbr, %bkFull, %relevantSNs,
|
||||
%SNsInCV, %entriesThisSN, %entriesThisPage, %pagesThisSN, %pageThisEntry, %adjust, %newRef);
|
||||
|
||||
my ($book, $testament);
|
||||
my (@fileList);
|
||||
|
||||
# ==============================
|
||||
|
||||
chdir("$pwd");
|
||||
open LOG, ">:utf8", "Logs${d}Exc_log.log" or die "\$log: Logs${d}Exc_log.log: $!";
|
||||
open OUT, ">:utf8", $output or die "$!";
|
||||
open MISSING, ">$missing" or die "$!";
|
||||
|
||||
while (<DATA>) {
|
||||
chomp;
|
||||
if (/([^\t]*)\t([^\t]*)/) {
|
||||
$bkAbr{$2} = $1;
|
||||
$bkFull{$1} = $2;
|
||||
} elsif (/^..$/) {
|
||||
$testament = $&;
|
||||
}
|
||||
}
|
||||
|
||||
GetUserDefaults();
|
||||
GetULBBooksToProcess();
|
||||
#ReadExceptions();
|
||||
#close LOG;
|
||||
#open LOG, ">:utf8", "Logs${d}tW_pairs.log" or die "Logs${d}tW_pairs.log: $!";
|
||||
#PairtWEntriesTotWPageAndUniqSNs();
|
||||
#close LOG;
|
||||
#open LOG, ">:utf8", "Logs${d}tWs_from_MAST.log" or die "Logs${d}tWs_from_MAST.log: $!";
|
||||
#ReadLinkedSNs();
|
||||
#LinkULBtoCV();
|
||||
#LinkSNsToULBtextViaEntries();
|
||||
#Output();
|
||||
|
||||
close MISSING;
|
||||
close OUT;
|
||||
close LOG;
|
||||
|
||||
if ($^O eq "darwin") {system ("$textEditor $missing")}
|
||||
|
||||
print "\n\tDone.\n\n";
|
||||
|
||||
# ==============================
|
||||
|
||||
sub GetUserDefaults {
|
||||
open (my $defaults, "<:utf8", "User${d}$udf") or die "User${d}$udf:\n$!";
|
||||
|
||||
while (my $thisLine = <$defaults>) {
|
||||
chomp $thisLine;
|
||||
if ($thisLine =~ /^Text editor: (.*)$/) {
|
||||
$textEditor = $1;
|
||||
if ($^O eq "darwin") {
|
||||
$textEditor = "open -a $textEditor"
|
||||
}
|
||||
} elsif ($thisLine =~ /^Repository directory: (.*)$/) {
|
||||
$repoPath = $1;
|
||||
#say $repoPath; die;
|
||||
}
|
||||
}
|
||||
|
||||
#say LOG "\$textEditor: $textEditor\n\$repoPath: $repoPath";
|
||||
die "No text editor found" if $textEditor eq "";
|
||||
die "No path to repo found" if $repoPath eq "";
|
||||
|
||||
($topTwDir, $topOTSourceLangDir, $topNTSourceLangDir) = ("$repoPath${d}en_tw${d}bible", "$repoPath${d}MAST_HB", "$repoPath${d}OGNT");
|
||||
|
||||
close $defaults;
|
||||
}
|
||||
|
||||
sub GetULBBooksToProcess {
|
||||
say LOG "GetULBBooksToProcess on \$workFile: $workFile";
|
||||
open (my $file, "<:utf8", "$workFile") or die "$workFile:\n$!";
|
||||
|
||||
while (my $line = <$file>) {
|
||||
chomp $line;
|
||||
#say LOG "\t$line";
|
||||
if ($line =~ /^([^#][^\n\t]*)\t[^\n\t]*\t([^\n\t]*)\t[^\n\t]*$/) {
|
||||
my ($bn, $bx) = ($1, $2);
|
||||
my ($this_bk) = $bn . "-" . uc $bx;
|
||||
if ($bn > 39) {
|
||||
$topSourceLangDir = $topNTSourceLangDir
|
||||
} else {
|
||||
$topSourceLangDir = $topOTSourceLangDir
|
||||
}
|
||||
|
||||
$sourceFile = "$topSourceLangDir${d}$this_bk.xml";
|
||||
push @fileList, $sourceFile;
|
||||
}
|
||||
}
|
||||
|
||||
close $file;
|
||||
say LOG "\@fileList:\n@fileList";
|
||||
}
|
||||
|
||||
sub ReadExceptions {
|
||||
say "Reading exceptions";
|
||||
say LOG "ReadExceptions from \$exceptions: $exceptions";
|
||||
open (my $file, "<:utf8", "$exceptions") or die "$exceptions:\n$!";
|
||||
|
||||
while (my $line = <$file>) {
|
||||
chomp $line;
|
||||
my $rf;
|
||||
if ($line =~ /^([^#\n][^\t\n]*)\t([^\t\n]*\t[^\t\n]*)/) {
|
||||
my ($oldNew) = ($2);
|
||||
$rf = $1;
|
||||
#say LOG "\$line: $line, \$rf: $rf, \$oldNew: $oldNew";
|
||||
($adjust{$rf}) .= "$oldNew√";
|
||||
$specifiedText{$rf} = 1;
|
||||
}
|
||||
}
|
||||
foreach my $key (sort keys %adjust) {
|
||||
say LOG "\$key: $key: \$adjust{$key}: $adjust{$key}"
|
||||
}
|
||||
close $file;
|
||||
}
|
||||
|
||||
sub PairtWEntriesTotWPageAndUniqSNs {
|
||||
say "Pairing tW entries with tW pages and unique Strong's numbers";
|
||||
my (@filesToRun, @relevantSNs) = ();
|
||||
my $filePattern = '*.md' ;
|
||||
find (sub {push @filesToRun, $File::Find::name if (m/^(.*)$filePattern$/)}, $topTwDir) ;
|
||||
@filesToRun = sort @filesToRun;
|
||||
#say LOG "\@filesToRun: @filesToRun";
|
||||
foreach my $file (@filesToRun) {
|
||||
print ".";
|
||||
$file =~ s/\//\\/g unless $^O eq "linux" || $^O eq "darwin";
|
||||
my ($thisList, $shortFile) = ("", $file);
|
||||
$shortFile =~ s/^\Q$topTwDir${d}\E//;
|
||||
$shortFile =~ s/\.md$//;
|
||||
$shortFile =~ s/\Q$d\E/,/;
|
||||
#say "|$shortFile|"; die;
|
||||
#if ($shortFile =~ /^(kt|names)/) {
|
||||
#my $fileText = read_file("$file", binmode => 'utf8');
|
||||
open IN, $file or die "$!";
|
||||
while (<IN>) {
|
||||
if (/^# ([^\n]*)$/) {
|
||||
$thisList = $1;
|
||||
$thisList =~ s/[\r\n]*$//;
|
||||
#say LOG "\$thisList = |$thisList|";
|
||||
$thisList =~ s/ \([^\)]*\)//g;
|
||||
$entriesThisPage{$shortFile} = $thisList;
|
||||
my @ULBEntries = split /, /, $thisList;
|
||||
foreach my $ULB_entry(@ULBEntries) {
|
||||
$pageThisEntry{$ULB_entry} = $shortFile;
|
||||
say LOG "\$pageThisEntry{$ULB_entry}: $pageThisEntry{$ULB_entry}"
|
||||
}
|
||||
my @tempArray = split /, /, $thisList;
|
||||
foreach my $slice (@tempArray) {
|
||||
$sourcePage{$slice} = $shortFile;
|
||||
say LOG "$slice: \$sourcePage{$slice}: $sourcePage{$slice}";
|
||||
}
|
||||
}
|
||||
if (/Strong's(.*)$/) {
|
||||
my $SNs = $1;
|
||||
while ($SNs =~ s/[G](\d*)//) {
|
||||
push @relevantSNs, $1;
|
||||
$entriesThisSN{$1} .= "$thisList, ";
|
||||
$pagesThisSN{$1} .= "$shortFile, ";
|
||||
}
|
||||
}
|
||||
@relevantSNs = uniq(@relevantSNs);
|
||||
foreach (@relevantSNs) {
|
||||
$relevantSNs{$_} = "$_";
|
||||
}
|
||||
}
|
||||
close IN;
|
||||
}
|
||||
say "";
|
||||
#say LOG "====";
|
||||
#say LOG "====";
|
||||
foreach my $thisSN (sort keys %entriesThisSN) {
|
||||
say LOG "tW entries for $thisSN: $entriesThisSN{$thisSN}"
|
||||
}
|
||||
}
|
||||
|
||||
sub LinkULBtoCV {
|
||||
say "Linking ULB to chapter and verse";
|
||||
say LOG "LinkULBtoCV, \$ULBfile: $ULBfile";
|
||||
open IN, "$ULBfile" or die "$ULBfile: $!";
|
||||
while (<IN>) {
|
||||
if (/^([^\t]*)\t(.*)$/) {
|
||||
# "\$_:$_";
|
||||
$cv ++;
|
||||
($text{$1}, $fullText{$1}) = ($2, $2);
|
||||
$ref{$cv} = $1;
|
||||
$order{$1} = $cv;
|
||||
}
|
||||
#say LOG "First \$ref{$cv}: $ref{$cv}\nGetULBBooksToProcess\n\$text{$ref{$cv}}: $text{$ref{$cv}}";
|
||||
}
|
||||
close IN;
|
||||
foreach my $key (sort keys %ref) {
|
||||
$text{$ref{$key}} =~ s/[^\w]+$//;
|
||||
$text{$ref{$key}} .= " q";
|
||||
# say LOG "\$key: $key:, \$ref{$key}: $ref{$key}, \$text{$ref{$key}}: $text{$ref{$key}}"
|
||||
}
|
||||
}
|
||||
|
||||
sub ReadLinkedSNs {
|
||||
say "Reading linked Strong's numbers";
|
||||
my ($flag, $sourceFile) = ("","");
|
||||
foreach $sourceFile (@fileList) {
|
||||
say LOG "opening \$sourceFile: $sourceFile";
|
||||
open IN, "$sourceFile" or die "$sourceFile can't be opened\n\n";
|
||||
my ($thisBook, $thisChap, $thisVers, $thisRef);
|
||||
my (@pages);
|
||||
while (<IN>) {
|
||||
chomp;
|
||||
if (/<verse osisID="([^\.]*).(\d+).(\d+)">/) {
|
||||
my ($bk, $ch, $vs) = ($1, $2, $3);
|
||||
#say LOG "#>\t$bk $ch:$vs, \$bkFull{$bk}: $bkFull{$bk}";
|
||||
if (exists $newRef{"$bk $ch:$vs"}) {
|
||||
$thisRef = $newRef{"$bk $ch:$vs"}
|
||||
} else {
|
||||
($thisRef) = ("$bkFull{$bk} $ch:$vs");
|
||||
}
|
||||
#say LOG "##\t$bk $ch:$vs, $thisRef";
|
||||
}
|
||||
else {
|
||||
s/(lemma=").*?(\d+).*?("\n)/$1$2$3/;
|
||||
while (/<w lemma="(\d+)"/g) {
|
||||
#say LOG $_;
|
||||
my ($thisNum) = ($1);
|
||||
#say LOG "\t\$thisNum: $thisNum, \$SNsInCV{$thisRef}: $SNsInCV{$thisRef}";
|
||||
if (exists $relevantSNs{$thisNum}) {
|
||||
$SNsInCV{$thisRef} .= "$thisNum√" unless ($SNsInCV{$thisRef} =~ /\b$thisNum\b/);
|
||||
}
|
||||
#say LOG ">\t\$thisNum: $thisNum, \$SNsInCV{$thisRef}: $SNsInCV{$thisRef}";
|
||||
}
|
||||
}
|
||||
}
|
||||
close IN;
|
||||
}
|
||||
my %temp;
|
||||
foreach my $oldRef (sort keys %SNsInCV) {
|
||||
if (exists $newRef{$oldRef}) {
|
||||
$temp{$newRef{$oldRef}} = $SNsInCV{$oldRef};
|
||||
delete $SNsInCV{$oldRef};
|
||||
}
|
||||
}
|
||||
foreach my $changedRef (sort keys %temp) {
|
||||
$SNsInCV{$changedRef} = $temp{$changedRef};
|
||||
say LOG "\$SNsInCV{$changedRef}: $SNsInCV{$changedRef}";
|
||||
}
|
||||
}
|
||||
|
||||
sub LinkSNsToULBtextViaEntries {
|
||||
say "Linking Strong's numbers to ULB text via tW page entries";
|
||||
say LOG "sub LinkSNsToULBtextViaEntries called";
|
||||
foreach my $thisRef (sort keys %ref) {
|
||||
say LOG "\nLinkSNsToULBtextViaEntries: \$thisRef: $thisRef\t\$ref{$thisRef}: $ref{$thisRef}, \$specifiedText{$ref{$thisRef}}: $specifiedText{$ref{$thisRef}}";
|
||||
(%workEntries, %ulbOrder) = ();
|
||||
my %workPage;
|
||||
my ($thisCV, $checkList, $tempString, $outString) = ($ref{$thisRef}, "", "", "");
|
||||
say OUT "$thisCV:";
|
||||
my (@allEntries);
|
||||
$listOfPages{$thisCV} = "";
|
||||
#say LOG "\$SNsInCV{$thisCV}: |$SNsInCV{$thisCV}|\n$text{$thisCV}";
|
||||
|
||||
say LOG "\t\$SNsInCV{$thisCV}: >$SNsInCV{$thisCV}<";
|
||||
$SNsInCV{$thisCV} =~ s/√+$//;
|
||||
$SNsInCV{$thisCV} =~ s/^ +//;
|
||||
$SNsInCV{$thisCV} =~ s/ +$//;
|
||||
$SNsInCV{$thisCV} =~ s/ {2,}/ /g;
|
||||
#say LOG "*\t\$SNsInCV{$thisCV}: >$SNsInCV{$thisCV}<";
|
||||
if (exists $specifiedText{$thisCV}) {
|
||||
#say LOG "*\t\$SNsInCV{$thisCV}: $SNsInCV{$thisCV}";
|
||||
|
||||
$SNsInCV{$thisCV} = Adjust($SNsInCV{$thisCV}, $thisCV);
|
||||
|
||||
$SNsInCV{$thisCV} =~ s/^ +(.*)/$1/;
|
||||
$SNsInCV{$thisCV} =~ s/(.*) +$/$1/;
|
||||
$SNsInCV{$thisCV} =~ s/ {2,}/ /g;
|
||||
$SNsInCV{$thisCV} =~ s/^√+//;
|
||||
$SNsInCV{$thisCV} =~ s/√{2,}/√/g;
|
||||
$SNsInCV{$thisCV} =~ s/√ /√/g;
|
||||
$SNsInCV{$thisCV} =~ s/√$//;
|
||||
say LOG "**\t\$SNsInCV{$thisCV}: >$SNsInCV{$thisCV}<";
|
||||
}
|
||||
#say LOG "<>\t<>\t\$SNsInCV{$thisCV}: |$SNsInCV{$thisCV}|";
|
||||
say LOG "*7*\t\$SNsInCV{$thisCV}:$SNsInCV{$thisCV}";
|
||||
# while ($SNsInCV{$thisCV} =~ s/(\[[^\]]*?) ([^\]]*?\] )/$1√$2/) {};
|
||||
say LOG "*8*\t\$SNsInCV{$thisCV}:$SNsInCV{$thisCV}";
|
||||
my @tempArray = split /√/, $SNsInCV{$thisCV};
|
||||
my %alreadyUsed;
|
||||
my @regArray;
|
||||
foreach my $slice (@tempArray) {
|
||||
if ($slice =~ /^(\d*)/) {
|
||||
my $number = "$1";
|
||||
push (@regArray, $slice) unless (exists $alreadyUsed{$number});
|
||||
$alreadyUsed{$number} = $number
|
||||
}
|
||||
}
|
||||
$" = "|\n";
|
||||
say LOG "*9*\t\$SNsInCV{$thisCV}:$SNsInCV{$thisCV}\n\@regArray: >@regArray<";
|
||||
foreach my $thisNum (@regArray) {
|
||||
say LOG "\$thisNum: $thisNum";
|
||||
my ($found, $specPage);
|
||||
if ($thisNum =~ /(\d+)\[(.*?)\]/) {
|
||||
($thisNum) = ($1);
|
||||
my ($forced_entry_for_display, $forced_entry_for_page, $forced_entry_for_search) = ($2, $2, $2);
|
||||
unless (exists $pageThisEntry{$forced_entry_for_page}) {
|
||||
my $try = lc $forced_entry_for_page;
|
||||
if (exists $pageThisEntry{$try}) {
|
||||
$forced_entry_for_page = lc $forced_entry_for_page
|
||||
}
|
||||
else {
|
||||
say "!!!\tNo \$pageThisEntry{$try} found for $try\n\$thisNum: $thisNum\t\$forced_entry_for_page: $forced_entry_for_page\n$ref{$thisRef}\t$thisNum\t$forced_entry_for_page";
|
||||
#die
|
||||
}
|
||||
}
|
||||
say LOG
|
||||
"*0* \$forced_entry_for_display: $forced_entry_for_display, \$pageThisEntry{$forced_entry_for_page}: $pageThisEntry{$forced_entry_for_page}";
|
||||
#while ($forced_entry_for_display =~ s/([^ \]]*?)√([^ \]]*?)/$1 $2/) {}
|
||||
$forced_entry_for_search = lc $forced_entry_for_display;
|
||||
#while ($forced_entry_for_search =~ s/ ... /\.\*\?/) {}
|
||||
say LOG
|
||||
"*A* \$thisNum: >>$thisNum<< \$forced_entry_for_display: >$forced_entry_for_display<,
|
||||
\$forced_entry_for_search: >>$forced_entry_for_search<<
|
||||
\$pageThisEntry{$forced_entry_for_page}: $pageThisEntry{$forced_entry_for_page}";
|
||||
$outString .= "[$forced_entry_for_display]($pageThisEntry{$forced_entry_for_page})\n";
|
||||
say LOG "\t\t$outString:\n$outString";
|
||||
if ($forced_entry_for_search =~ /([^,]*) \.\.\. ([^,]*) \.\.\. ([^,]*)/) {
|
||||
my ($first, $second, $third) = ($1, $2, $3);
|
||||
$text{$thisCV} =~ s/$first(.*?)$second(.*?)$third/$1 $2/i;
|
||||
say LOG "\t*\t$text{$thisCV}";
|
||||
} elsif ($forced_entry_for_search =~ /([^,]*) \.\.\. ([^,]*)/) {
|
||||
my ($first, $second) = ($1, $2);
|
||||
$text{$thisCV} =~ s/$first(.*?)$second/$1/i;
|
||||
say LOG "\t**\t$text{$thisCV}";
|
||||
} else {
|
||||
#say "\$text{$thisCV}:\n$text{$thisCV}\n\$forced_entry_for_search: |$forced_entry_for_search|";
|
||||
$text{$thisCV} =~ s/$forced_entry_for_search//i;
|
||||
say LOG "\t***\t$text{$thisCV}";
|
||||
}
|
||||
next;
|
||||
} elsif ($thisNum =~ /\d+(\(([\w\/]+)\))/) {
|
||||
$specPage .= $2;
|
||||
say LOG "*B*\t\$specPage: $specPage";
|
||||
} else {
|
||||
say LOG "*C*\t\$thisNum: $thisNum";
|
||||
}
|
||||
say LOG "\t\$specPage: $specPage";
|
||||
if ($specPage) {
|
||||
$workEntries{$thisNum} = $entriesThisPage{$specPage};
|
||||
} else {
|
||||
$workEntries{$thisNum} = $entriesThisSN{$thisNum};
|
||||
}
|
||||
$workEntries{$thisNum} =~ s/, $//;
|
||||
say LOG "*D*\t\$thisNum: >$thisNum<\t\$workEntries{$thisNum}: >$workEntries{$thisNum}<";
|
||||
my @beforeArray = split /, /, $workEntries{$thisNum};
|
||||
my @sortedArray = reverse sort { substr($a,0,1) <=> substr($b,0,1)
|
||||
|| length($a) <=> length($b)
|
||||
|| $a <=> $b }
|
||||
@beforeArray;
|
||||
$" = "\n\t";
|
||||
say LOG "*E*\t\@sortedArray: @sortedArray\n\$outString so far:\n$outString\n\$text{$ref{$thisRef}}: $text{$ref{$thisRef}}";
|
||||
foreach my $entry (@sortedArray) {
|
||||
my $testEntry = $entry;
|
||||
print LOG "\$entry: $entry. Becomes ";
|
||||
while ($testEntry =~ s/^(.*) \.\.\. (.*)/($1)\\b(.*?)\\b($2)/) {}
|
||||
print LOG "\$testEntry: |$testEntry| ";
|
||||
if ($testEntry =~ /\(\.\*\?\)/ && $text{$thisCV} =~ s/\b($testEntry)\b/$3/i) {
|
||||
say LOG "\n===\n$thisNum |$testEntry| is found in first test.\n===";
|
||||
$outString .= "[$entry]($pageThisEntry{$entry})\n";
|
||||
say LOG $outString . "\n===" . $text{$thisCV};
|
||||
$found = 1;
|
||||
goto Breakout;
|
||||
} elsif ($text{$thisCV} =~ s/\b($testEntry)[^\w']//i || $text{$thisCV} =~ s/\b($testEntry)["']//i || $text{$thisCV} =~ s/["']($testEntry)\b//i) {
|
||||
say LOG "\n===\n$thisNum |$testEntry| is found in second test.\n===";
|
||||
$outString .= "[$entry]($pageThisEntry{$entry})\n";
|
||||
say LOG $outString . "\n" . $text{$thisCV};
|
||||
$found = 1;
|
||||
goto Breakout;
|
||||
} else {
|
||||
say LOG "and is not found in\n|$text{$ref{$thisRef}}|";
|
||||
}
|
||||
}
|
||||
Breakout:
|
||||
unless ($found) {
|
||||
say MISSING "$thisCV $thisNum";
|
||||
say LOG "Breakout: \$thisCV: $thisCV\t\$thisNum: $thisNum"
|
||||
}
|
||||
next if $found;
|
||||
}
|
||||
say LOG "*F*\t\$outString: $outString";
|
||||
$outString = ProperOrderOutString($outString, $thisCV);
|
||||
say LOG "Final \$outString:\n\$outString: $outString";
|
||||
say OUT "$outString";
|
||||
#say LOG "sub LinkSNsToULBtextViaEntries finished";
|
||||
}
|
||||
}
|
||||
|
||||
sub Adjust {
|
||||
my ($snsOld, $ref, $snsNew, $addToSnsNew) = ($_[0], $_[1], "", "");
|
||||
say LOG "\$specifiedText{$ref}: $specifiedText{$ref}\n\$snsOld: |$snsOld|";
|
||||
my (%tempEntries);
|
||||
#say LOG ">\t\$sns: |$sns|";
|
||||
#say LOG ">\t\$specifiedText{$ref}: |$specifiedText{$ref}|";
|
||||
$snsOld =~ s/^ +/ /;
|
||||
$snsNew =~ s/√$//;
|
||||
my @oldArray = split / /, $snsOld;
|
||||
$adjust{$ref} =~ s/√$//;
|
||||
say LOG "*0*\t\$adjust{$ref}: $adjust{$ref}";
|
||||
my @preadjustments = split /√/, $adjust{$ref};
|
||||
foreach my $adjustment (@preadjustments) {
|
||||
say LOG "<><>\t\$adjustment: >$adjustment<";
|
||||
if ($adjustment =~ /([^\t]*)\t\|\|$/) { # delete this from list to look for
|
||||
my $found = $1;
|
||||
$snsOld =~ s/\b$found\b ?//;
|
||||
say LOG "*1*\t\$found: $found should be deleted from \$snsOld: $snsOld";
|
||||
} elsif ($adjustment =~ /^(\d+)\t(\d+)$/) { # change to specified number
|
||||
my ($found1, $found2) = ($1, $2);
|
||||
#$addToSnsNew .= "$1\[$2\] ";
|
||||
$snsOld =~ s/$found1√//g;
|
||||
$snsNew .= "$found2√" unless $snsNew =~ /\b$found2\b /;
|
||||
say LOG "*2*\t\$snsOld: $snsOld\n\$snsNew: $snsNew";
|
||||
} elsif ($adjustment =~ /^\|\|\t(.*)/) { # add this to list to look for
|
||||
my $adj = $1;
|
||||
say LOG "*3*\t\$adjustment: $adjustment, \$adj: $adj";
|
||||
if ($adj =~ /([^\t]*)\t([^\t]*)/) {
|
||||
$snsNew = "[$1]($pageThisEntry{$2})"
|
||||
} else {
|
||||
$snsNew .= "$adj "
|
||||
}
|
||||
} elsif ($adjustment =~ /^(\d+)\t([\w\. \-\']+)$/) { # add specified word
|
||||
my ($found1, $found2) = ($1, $2);
|
||||
#$addToSnsNew .= "$1\[$2\] ";
|
||||
if ($snsOld =~ s/\b$found1\b/$found1\[$found2\]/) {
|
||||
say LOG "*\t4a\t\$snsOld: $snsOld";
|
||||
if ($snsOld =~ s/^(.+√)($found1\[$found2\]√)/$2$1/) {}
|
||||
elsif ($snsOld =~ s/^(.+√)($found1\[$found2\]$)/$2√$1/) {}
|
||||
}
|
||||
else {
|
||||
$snsNew .= "${found1}√";
|
||||
$snsOld .= s/\b$found1\b//;
|
||||
say LOG "*\t4b\t\$snsNew: $snsNew";
|
||||
$snsNew =~ s/\b$found1\b/$found1\[$found2\]/;
|
||||
$snsNew =~ s/^(.+√)($found1\[$found2\]√)/$2$1/;
|
||||
$snsNew =~ s/^(.+√)($found1\[$found2\]$)/$2√$1/;
|
||||
}
|
||||
$snsOld =~ s/ {2,}/ /;
|
||||
$snsOld =~ s/√$//;
|
||||
say LOG "*4*\t\$snsOld: $snsOld\n\$snsNew: $snsNew";
|
||||
} elsif (($adjustment =~ /^(\d+)\t([\/\d\w]+)$/)) { # add specified page
|
||||
#$addToSnsNew .= "$1\{$2\} "
|
||||
my ($found1, $found2) = ($1, $2);
|
||||
$snsOld =~ s/$found1/$found1\($found2\)/;
|
||||
say LOG "*5*\t\t\$snsOld: $snsOld";
|
||||
}
|
||||
}
|
||||
$snsOld =~ s/^√//;
|
||||
$snsOld =~ s/√+/√/g;
|
||||
say LOG "\$snsNew: >$snsNew<\n\$snsNew+\$snsOld: >$snsNew< >$snsOld<";
|
||||
$snsNew = "$snsNew√$snsOld";
|
||||
say LOG "*5*\t*\t\$snsNew: |$snsNew|";
|
||||
while ($snsNew =~ s/(\d+) (\d+\[[^\[\]]*\])/$2$1/) {}
|
||||
say LOG "*5*\t**\t\$snsNew: |$snsNew|";
|
||||
$snsNew =~ s/√+/√/g;
|
||||
$snsNew =~ s/^[ √]//;
|
||||
say LOG "*6*\t\t\$snsNew: |$snsNew|";
|
||||
return $snsNew;
|
||||
}
|
||||
|
||||
sub Output {
|
||||
say "Outputting";
|
||||
#say LOG "Output subRoutine called";
|
||||
foreach my $key (sort keys %ref) {
|
||||
my %donePages;
|
||||
my $thisRef = $ref{$key};
|
||||
#print LOG "\$key: $key\t\$thisRef: $thisRef\t";
|
||||
#say LOG "\$SNsInCV{$thisRef}: |$SNsInCV{$thisRef}|";
|
||||
$SNsInCV{$thisRef} =~ s/^ +//;
|
||||
$SNsInCV{$thisRef} =~ s/ +$//;
|
||||
$SNsInCV{$thisRef} =~ s/ {2,}/ /;
|
||||
#say LOG "\$SNsInCV{$thisRef}: |$SNsInCV{$thisRef}|";
|
||||
#say LOG "\$listOfPages{$thisRef}: |$listOfPages{$thisRef}|";
|
||||
#$listOfPages{$thisRef} =~ s/^ +//;
|
||||
#$listOfPages{$thisRef} =~ s/ +$//;
|
||||
#$listOfPages{$thisRef} =~ s/ {2,}/ /;
|
||||
#say LOG "\$listOfPages{$thisRef}: |$listOfPages{$thisRef}|";
|
||||
my @array = split /\n/, $listOfPages{$thisRef};
|
||||
#say LOG "\@array: |@array|";
|
||||
my @sorted =
|
||||
sort sort { lc($a) cmp lc($b) }
|
||||
@array;
|
||||
#say LOG "\@sorted: |@sorted|";
|
||||
$" = "\n";
|
||||
$listOfPages{$thisRef} = "@sorted";
|
||||
say LOG "\$listOfPages{$thisRef}: $listOfPages{$thisRef}\n\$checkPages{$thisRef}: $checkPages{$thisRef}";
|
||||
#say OUT "$thisRef: $listOfPages{$thisRef}\n";
|
||||
$checkPages{$thisRef} =~ s/^ +//;
|
||||
$checkPages{$thisRef} =~ s/ +$//;
|
||||
$checkPages{$thisRef} =~ s/ {2,}/ /;
|
||||
$checkPages{$thisRef} =~ s/ \|\|//;
|
||||
say LOG "\$checkPages{$thisRef}:\t|$checkPages{$thisRef}|";
|
||||
my @checkArray = split / /, $checkPages{$thisRef};
|
||||
shift @sorted;
|
||||
#say LOG "\@checkArray: |@checkArray|";
|
||||
#say LOG "\@sorted: |@sorted|";
|
||||
#shift @sorted;
|
||||
#say LOG "\@sorted: |@sorted|";
|
||||
foreach my $slice (@sorted) {
|
||||
#print LOG "\$slice: $slice\t";
|
||||
$slice =~ s/\[.*?\]\((.*?)\)/$1/;
|
||||
#say LOG "\t\$slice: $slice";
|
||||
$donePages{$slice} = $slice;
|
||||
#say LOG "\t\$donePages{$slice}: $donePages{$slice}"
|
||||
}
|
||||
#say LOG "\@checkArray: |@checkArray|";
|
||||
foreach my $slice (@checkArray) {
|
||||
#say LOG "\$slice: $slice";
|
||||
unless (exists $donePages{$slice}) {
|
||||
#say LOG "\$thisRef: $thisRef\t\$slice:$slice";
|
||||
#say MISSING "$thisRef\t$slice\t||";
|
||||
say MISSING "$thisRef\t$slice";
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
sub Substitute {
|
||||
foreach my $key (sort keys %pages) {
|
||||
say LOG "\$key: $key\t\$SNsInCV{$key}: $SNsInCV{$key}";
|
||||
if (exists $substitutedPages{$key}) {
|
||||
#say LOG "\$substitutedPages{$key}: $substitutedPages{$key}";
|
||||
$substitutedPages{$key} =~ s/, $//;
|
||||
my @array = split /, /, $substitutedPages{$key};
|
||||
foreach my $slice (@array) {
|
||||
#say LOG "\$slice: $slice";
|
||||
if ($slice =~ /([^\t]*)\t([^\t]*)/) {
|
||||
#say LOG "\n\$key: $key";
|
||||
my ($old, $new) = ($1, $2);
|
||||
#say LOG "\$old: >$old<\t\$new: >$new<";
|
||||
#say LOG "\$SNsInCV{$key}: >>$SNsInCV{$key}<<";
|
||||
if ($old eq "||") {
|
||||
$SNsInCV{$key} .= "$new ";
|
||||
$checkPages{$key} = $SNsInCV{$key};
|
||||
}
|
||||
elsif ($new eq "||") {
|
||||
$SNsInCV{$key} =~ s/$old //;
|
||||
$checkPages{$key} = $SNsInCV{$key};
|
||||
}
|
||||
else {
|
||||
$SNsInCV{$key} =~ s/$old/$new/;
|
||||
$checkPages{$key} = $SNsInCV{$key};
|
||||
}
|
||||
$SNsInCV{$key} =~ s/ \|\|//g;
|
||||
#say LOG "\$SNsInCV{$key}: >>>$SNsInCV{$key}<<<";
|
||||
}
|
||||
}
|
||||
} else {
|
||||
$checkPages{$key} = $SNsInCV{$key};
|
||||
}
|
||||
say LOG "<>\t\$key: $key\t\$SNsInCV{$key}: $SNsInCV{$key}";
|
||||
}
|
||||
}
|
||||
|
||||
sub ProperOrderOutString {
|
||||
my @unordered = split /\n/, $_[0];
|
||||
my ($thisCV, $outS) = ($_[1], "");
|
||||
my (%orderedSet);
|
||||
foreach my $thisSet (@unordered) {
|
||||
say LOG "\t>\t$thisSet";
|
||||
if ($thisSet =~ /(\[([^\]]*)\])(\([^\)]*\))/) {
|
||||
my ($ulb, $fileLoc) = ($2, $3);
|
||||
$ulb =~ s/ \.\.\. /.*?/g;
|
||||
say LOG "\t>>\t\$ulb: $ulb, \$fileLoc: $fileLoc, \$fullText{$thisCV}\n$fullText{$thisCV}";
|
||||
if ($ulb =~ /^(.*)\.\.\.(.*)$/) {
|
||||
my ($found1, $found2, $blank1, $blank2) = ($1, $2, "", "");
|
||||
while (length $blank1 < length $found1) {$blank1 .= " "}
|
||||
while (length $blank2 < length $found2) {$blank2 .= " "}
|
||||
if ($fullText{$thisCV} =~ s/^(.*?)\b$found1\b(.*)\b$found1\b(.*)$/$1$blank1$2$blank2$3/i) {
|
||||
say LOG "\t>>>\t$fullText{$thisCV}";
|
||||
my ($order) = (length $1);
|
||||
$orderedSet{$order} = $thisSet;
|
||||
say LOG "\t\t\t\$order: $order\t \$orderedSet{$order}: $orderedSet{$order}"
|
||||
}
|
||||
} else {
|
||||
my $blank = "";
|
||||
while (length $blank < length $ulb) {$blank .= " "}
|
||||
if ($fullText{$thisCV} =~ s/^(.*?)\b$ulb\b(.*)$/$1$blank$2/i) {
|
||||
say LOG "\t>>>>\t$fullText{$thisCV}";
|
||||
my ($order) = (length $1);
|
||||
$orderedSet{$order} = $thisSet;
|
||||
say LOG "\$order: $order\t \$orderedSet{$order}: $orderedSet{$order}"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
foreach my $key (sort {$a<=>$b} keys %orderedSet) {
|
||||
$outS .= "$orderedSet{$key}\n"
|
||||
}
|
||||
return $outS;
|
||||
}
|
||||
|
||||
__DATA__
|
||||
OT
|
||||
gen Genesis
|
||||
exo Exodus
|
||||
lev Leviticus
|
||||
num Numbers
|
||||
deu Deuteronomy
|
||||
jos Joshua
|
||||
jdg Judges
|
||||
rut Ruth
|
||||
1sa 1 Samuel
|
||||
2sa 2 Samuel
|
||||
1ki 1 Kings
|
||||
2ki 2 Kings
|
||||
1ch 1 Chronicles
|
||||
2ch 2 Chronicles
|
||||
ezr Ezra
|
||||
neh Nehemiah
|
||||
est Esther
|
||||
job Job
|
||||
psa Psalms
|
||||
pro Proverbs
|
||||
ecc Ecclesiastes
|
||||
sng Song of Solomon
|
||||
isa Isaiah
|
||||
jer Jeremiah
|
||||
lam Lamentations
|
||||
ezk Ezekiel
|
||||
dan Daniel
|
||||
hos Hosea
|
||||
jol Joel
|
||||
amo Amos
|
||||
oba Obadiah
|
||||
jon Jonah
|
||||
mic Micah
|
||||
nam Nahum
|
||||
hab Habakkuk
|
||||
zep Zephaniah
|
||||
hag Haggai
|
||||
zec Zechariah
|
||||
mal Malachi
|
||||
NT
|
||||
mat Matthew
|
||||
mrk Mark
|
||||
luk Luke
|
||||
jhn John
|
||||
act Acts
|
||||
rom Romans
|
||||
1co 1 Corinthians
|
||||
2co 2 Corinthians
|
||||
gal Galatians
|
||||
eph Ephesians
|
||||
php Philippians
|
||||
col Colossians
|
||||
1th 1 Thessalonians
|
||||
2th 2 Thessalonians
|
||||
1ti 1 Timothy
|
||||
2ti 2 Timothy
|
||||
tit Titus
|
||||
phm Philemon
|
||||
heb Hebrews
|
||||
jas James
|
||||
1pe 1 Peter
|
||||
2pe 2 Peter
|
||||
1jn 1 John
|
||||
2jn 2 John
|
||||
3jn 3 John
|
||||
jud Jude
|
||||
rev Revelation
|
Loading…
Reference in New Issue