forked from WycliffeAssociates/en_tw
Description of procedures and files needed to update tW list in PDFs.
This commit is contained in:
parent
df985bef4f
commit
1787b41cac
|
@ -0,0 +1,39 @@
|
||||||
|
# Combines most recent ULB with KJV with codes.
|
||||||
|
|
||||||
|
use 5.12.0;
|
||||||
|
|
||||||
|
my ($ulb, $nasb) = ("/Users/Henry/Google Drive/WA/Test/Unlocked Bible/ULB text.txt", "/Users/Henry/Google Drive/WA/Test/data/NASB.Strongs.txt");
|
||||||
|
my ($ref, $val);
|
||||||
|
my (%codes);
|
||||||
|
|
||||||
|
open LOG, ">:utf8", "/Users/Henry/Google Drive/WA/Test/out/log.log" or die;
|
||||||
|
open OUT, ">:utf8", "/Users/Henry/Google Drive/WA/Test/data/ULB.NASB.Strongs.txt" or die;
|
||||||
|
|
||||||
|
open (my $file, "<:utf8", "$nasb") or die "$nasb:\n$!";
|
||||||
|
|
||||||
|
while (my $line = <$file>) {
|
||||||
|
chomp $line;
|
||||||
|
if ($line =~ /^([^\t]*)\t(.*)$/) {
|
||||||
|
($ref, $val) = ($1, $2);
|
||||||
|
$codes{$1} = $2;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
open ($file, "<:utf8", "$ulb") or die "$ulb:\n$!";
|
||||||
|
|
||||||
|
while (my $line = <$file>) {
|
||||||
|
chomp $line;
|
||||||
|
if ($line =~ /^([^\t]*)\t(.*)$/) {
|
||||||
|
($ref, $val) = ($1, $2);
|
||||||
|
if (exists $codes{$ref}) {
|
||||||
|
say OUT "$ref\t$val\n $codes{$ref}"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
close OUT;
|
||||||
|
close LOG;
|
||||||
|
|
||||||
|
system 'open -a /Applications/BBEdit.app "/Users/Henry/Google Drive/WA/Test/data/ULB.NASB.Strongs.txt"';
|
||||||
|
print "\n\tDone\n\n\tOutput is in /Users/Henry/Google Drive/WA/Test/data/ULB.NASB.Strongs.txt\n\n";
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,19 @@
|
||||||
|
use 5.12.0;
|
||||||
|
|
||||||
|
my $files = "open -a /Applications/BBEdit.app ";
|
||||||
|
|
||||||
|
open(IN, "/Users/Henry/Google Drive/WA/tN instructions/mismatched_snippets.html") or die "$!/Users/Henry/Google Drive/WA/tN instructions/mismatched_snippets.html";
|
||||||
|
|
||||||
|
while (<IN>) {
|
||||||
|
chomp;
|
||||||
|
# say $_;
|
||||||
|
if (/<p><b>(.*)<\/b><\/p>/) {
|
||||||
|
$files .= "$1 "
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
close IN;
|
||||||
|
|
||||||
|
say $files;
|
||||||
|
|
||||||
|
system ($files)
|
|
@ -0,0 +1,224 @@
|
||||||
|
# Before running this program, run
|
||||||
|
# find -s . "*.md"
|
||||||
|
# on
|
||||||
|
# /Users/Henry/Documents/git.Door43/en_tn/
|
||||||
|
# and paste the output into dir.dir in that directory
|
||||||
|
|
||||||
|
use 5.12.0;
|
||||||
|
use File::Slurp;
|
||||||
|
use utf8;
|
||||||
|
#use open IN => ":utf8", OUT => ":utf8";
|
||||||
|
use open IO => ":utf8";
|
||||||
|
use Cwd;
|
||||||
|
use File::Find ;
|
||||||
|
|
||||||
|
$" = "\n";
|
||||||
|
|
||||||
|
my $pwd = cwd();
|
||||||
|
my $topDir = "/Users/Henry/Documents/git.Door43/en_tn";
|
||||||
|
|
||||||
|
my (@filesToRun, @array);
|
||||||
|
my $filePattern = '*.md' ;
|
||||||
|
my (%abbrev, %full, %ulb);
|
||||||
|
|
||||||
|
find( sub { push @filesToRun, $File::Find::name if ( m/^(.*)$filePattern$/ && !m/(LICENSE|README|intro)\.$filePattern$/ ) }, $topDir) ;
|
||||||
|
|
||||||
|
my %toDummy = (" \\.\\.\\. ", ".*", "\\?", "QM", "\"", "QD", "\'", "QS", "\!", "XM", "\\(", "QOXP", "\\)", "QCP");
|
||||||
|
my %fromDummy = ("\\.\\*", " ... ", "QM", "?", "QD", "\"", "QS", "'", "XM", "!", "QOXP", "(", "QCP", ")");
|
||||||
|
|
||||||
|
|
||||||
|
open LOG, ">/Users/Henry/Google Drive/WA/Test/out/log.log" or die;
|
||||||
|
|
||||||
|
#open output
|
||||||
|
open OUT, ">/Users/Henry/Google Drive/WA/tN instructions/mismatched_snippets.html" or die;
|
||||||
|
|
||||||
|
#Read in ULB
|
||||||
|
|
||||||
|
ReadData();
|
||||||
|
ReadULB();
|
||||||
|
ProcessFiles();
|
||||||
|
|
||||||
|
sub ReadData {
|
||||||
|
while (<DATA>) {
|
||||||
|
chomp;
|
||||||
|
if (/([^\t]*)\t([^\t]*)\t(.*)/) {
|
||||||
|
($abbrev{$3}, $full{$2}) = ($2, $3)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#foreach my $key (sort keys %abbrev) {say LOG "$key\t$abbrev{$key}"}
|
||||||
|
#foreach my $key (sort keys %full) {say LOG "$key\t$full{$key}"}
|
||||||
|
}
|
||||||
|
|
||||||
|
sub ReadULB {
|
||||||
|
open IN, "/Users/Henry/Google Drive/WA/Test/Unlocked Bible/ULB text.txt" or die;
|
||||||
|
my ($checkText, $thisChunkText, $thisRef, $thisBook, $thisChap, $thisVerse, $id, $thisText, $tempText, $thisKey);
|
||||||
|
while (<IN>) {
|
||||||
|
chomp;
|
||||||
|
#say LOG ">$_<";
|
||||||
|
if (/^([^\t]*)\t(.*)$/) {
|
||||||
|
my ($tempID, $tempText) = ($1, $2);
|
||||||
|
#say LOG "<$tempID>\t|$tempText|";
|
||||||
|
#say LOG "\$id, \$thisText\t$id, $thisText";
|
||||||
|
($id) = ($tempID);
|
||||||
|
if ($id =~ /^([^:]*) (\d+):(\d+)/) {
|
||||||
|
($thisBook, $thisChap, $thisVerse) = ($1, $2, $3);
|
||||||
|
if ($thisBook ne "Psalms") {
|
||||||
|
while (length $thisChap < 2) {$thisChap =~ s/^/0/};
|
||||||
|
while (length $thisVerse < 2) {$thisVerse =~ s/^/0/}
|
||||||
|
} else {
|
||||||
|
while (length $thisChap < 3) {$thisChap =~ s/^/0/};
|
||||||
|
while (length $thisVerse < 3) {$thisVerse =~ s/^/0/}
|
||||||
|
}
|
||||||
|
$thisBook = $abbrev{$thisBook};
|
||||||
|
}
|
||||||
|
$id = "$thisBook/$thisChap/$thisVerse";
|
||||||
|
$tempText =~ s/\\f \+.*?\\f\*//g;
|
||||||
|
#say LOG $tempText;
|
||||||
|
$tempText =~ s/ {2,}/ /g;
|
||||||
|
$ulb{$id} .= "$tempText ";
|
||||||
|
$ulb{$id} =~ s/— /—/g;
|
||||||
|
$ulb{$id} =~ s/ —/—/g;
|
||||||
|
#say LOG "\$id = $id\n\$ulb{$id} = $ulb{$id}";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
close IN;
|
||||||
|
#say LOG "Hi";
|
||||||
|
#foreach my $key (sort keys %ulb) {say LOG "|$key|\t<$ulb{$key}>"}
|
||||||
|
}
|
||||||
|
|
||||||
|
# assign passages as values to chunk keys
|
||||||
|
|
||||||
|
#Read in each file
|
||||||
|
sub ProcessFiles {
|
||||||
|
foreach my $slice (@filesToRun) {
|
||||||
|
#say LOG ">>\$slice: $slice<<";
|
||||||
|
my ($thisText, $thisNote, $textReserved, $curRef, $tb, $ct, $vt, $anchor);
|
||||||
|
if ($slice =~ /^.*\/(([^\.]*)\/([^\.]*)\/([^\.]*)).md$/) {
|
||||||
|
$curRef = $1;
|
||||||
|
$anchor = $1;
|
||||||
|
($tb, $ct, $vt) = ($2, $3, $4);
|
||||||
|
$tb = $full{$tb};
|
||||||
|
$ct =~ s/^0+//;
|
||||||
|
$vt =~ s/^0+//;
|
||||||
|
#say LOG ">3>$anchor > $tb $ct:$vt<3<";
|
||||||
|
$thisText = $ulb{$anchor};
|
||||||
|
#say LOG ">5>\$anchor: $anchor; \$thisText:\n$thisText<5<";
|
||||||
|
}
|
||||||
|
#my $tN = read_file("$slice", binmode => 'utf8') or die "|$slice|\n$!";
|
||||||
|
my $tN = read_file("$slice", binmode => 'utf8');
|
||||||
|
#my $tN = read_file("$slice", binmode => 'utf8') or next DoFile;
|
||||||
|
#say LOG ">6>\$slice: $slice; \$tN:\n$tN<6<";
|
||||||
|
foreach my $key (sort keys %toDummy) {
|
||||||
|
#say LOG "$key\t|$toDummy{$key}|";
|
||||||
|
$tN =~ s/$key/$toDummy{$key}/g;
|
||||||
|
$thisText =~ s/$key/$toDummy{$key}/g;
|
||||||
|
}
|
||||||
|
$tN =~ s/# ((General Information|Connecting Statement|translationWords):?)[^\r\n]*\r?\n//g;
|
||||||
|
$tN =~ s/\* \[\[[^\r\n]*\r?\n//g;
|
||||||
|
$tN =~ s/(#[^\r\n]*\r?\n)[^\r\n]*\r?\n[^\r\n]*\r?\n/$1/g;
|
||||||
|
# ">7>\n\n\n\$curRef: $curRef\n\$thisText:$thisText\n\$tN: $tN<7<";
|
||||||
|
while ($tN =~ /# ([^\r\n]*)\r?\n/g) {
|
||||||
|
$thisNote = $1;
|
||||||
|
#say LOG ">8>\t>\t|$thisNote|\n$thisText<8<";
|
||||||
|
$thisText =~ s/ {2,}/ /g;
|
||||||
|
#say LOG ">9>\t>\t|$thisNote|\n$thisText<9<";
|
||||||
|
unless ($thisText =~ /$thisNote/) {
|
||||||
|
say LOG ">A>\n$tb $ct:$vt\n$slice\n$thisNote\n$thisText\n<A<";
|
||||||
|
foreach my $key (sort keys %fromDummy) {
|
||||||
|
$thisNote =~ s/$key/$fromDummy{$key}/g;
|
||||||
|
$thisText =~ s/$key/$fromDummy{$key}/g;
|
||||||
|
}
|
||||||
|
push @array, "\n<p>$tb $ct:$vt</p>\n<p><b>$slice</b></p>\n<p><i>$thisNote</i></p>\n<p>$thisText</p>";
|
||||||
|
#push @array, "\t$thisNote";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
print OUT "<?xml version=\"1.0\" encoding=\"utf-8\"?>
|
||||||
|
<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Strict//EN\"
|
||||||
|
\"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd\">
|
||||||
|
<html xmlns=\"http://www.w3.org/1999/xhtml\" xml:lang=\"en\" lang=\"en\">
|
||||||
|
<head>
|
||||||
|
<meta http-equiv=\"content-type\" content=\"text/html; charset=utf-8\" />
|
||||||
|
<title>Mismatched Snippets</title>
|
||||||
|
<meta name=\"generator\" content=\"BBEdit 8.5\" />
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
@array
|
||||||
|
</body>
|
||||||
|
</html>";
|
||||||
|
close OUT;
|
||||||
|
|
||||||
|
close LOG;
|
||||||
|
|
||||||
|
say "Done.";
|
||||||
|
|
||||||
|
__DATA__
|
||||||
|
01 gen Genesis
|
||||||
|
02 exo Exodus
|
||||||
|
03 lev Leviticus
|
||||||
|
04 num Numbers
|
||||||
|
05 deu Deuteronomy
|
||||||
|
06 jos Joshua
|
||||||
|
07 jdg Judges
|
||||||
|
08 rut Ruth
|
||||||
|
09 1sa 1 Samuel
|
||||||
|
10 2sa 2 Samuel
|
||||||
|
11 1ki 1 Kings
|
||||||
|
12 2ki 2 Kings
|
||||||
|
13 1ch 1 Chronicles
|
||||||
|
14 2ch 2 Chronicles
|
||||||
|
15 ezr Ezra
|
||||||
|
16 neh Nehemiah
|
||||||
|
17 est Esther
|
||||||
|
18 job Job
|
||||||
|
19 psa Psalms
|
||||||
|
20 pro Proverbs
|
||||||
|
21 ecc Ecclesiastes
|
||||||
|
22 sng Song of Songs
|
||||||
|
23 isa Isaiah
|
||||||
|
24 jer Jeremiah
|
||||||
|
25 lam Lamentations
|
||||||
|
26 ezk Ezekiel
|
||||||
|
27 dan Daniel
|
||||||
|
28 hos Hosea
|
||||||
|
29 jol Joel
|
||||||
|
30 amo Amos
|
||||||
|
31 oba Obadiah
|
||||||
|
32 jon Jonah
|
||||||
|
33 mic Micah
|
||||||
|
34 nam Nahum
|
||||||
|
35 hab Habakkuk
|
||||||
|
36 zep Zephaniah
|
||||||
|
37 hag Haggai
|
||||||
|
38 zec Zechariah
|
||||||
|
39 mal Malachi
|
||||||
|
41 mat Matthew
|
||||||
|
42 mrk Mark
|
||||||
|
43 luk Luke
|
||||||
|
44 jhn John
|
||||||
|
45 act Acts
|
||||||
|
46 rom Romans
|
||||||
|
47 1co 1 Corinthians
|
||||||
|
48 2co 2 Corinthians
|
||||||
|
49 gal Galatians
|
||||||
|
50 eph Ephesians
|
||||||
|
51 php Philippians
|
||||||
|
52 col Colossians
|
||||||
|
53 1th 1 Thessalonians
|
||||||
|
54 2th 2 Thessalonians
|
||||||
|
55 1ti 1 Timothy
|
||||||
|
56 2ti 2 Timothy
|
||||||
|
57 tit Titus
|
||||||
|
58 phm Philemon
|
||||||
|
59 heb Hebrews
|
||||||
|
60 jas James
|
||||||
|
61 1pe 1 Peter
|
||||||
|
62 2pe 2 Peter
|
||||||
|
63 1jn 1 John
|
||||||
|
64 2jn 2 John
|
||||||
|
65 3jn 3 John
|
||||||
|
66 jud Jude
|
||||||
|
67 rev Revelation
|
||||||
|
|
|
@ -0,0 +1,36 @@
|
||||||
|
use 5.12.0;
|
||||||
|
use File::Slurp;
|
||||||
|
use File::Find ;
|
||||||
|
use Cwd ;
|
||||||
|
use utf8;
|
||||||
|
#use open IN => ":utf8", OUT => ":utf8";
|
||||||
|
use open IO => ":utf8";
|
||||||
|
|
||||||
|
my $book;
|
||||||
|
my @bookList;
|
||||||
|
|
||||||
|
open (my $file, "<:utf8", "/Users/Henry/Google Drive/WA/Test/data/tW.work.dat") or die "$/Users/Henry/Google Drive/WA/Test/data/tW.work.dat:\n$!";
|
||||||
|
|
||||||
|
while (my $line = <$file>) {
|
||||||
|
chomp $line;
|
||||||
|
if ($line =~ /^([^#][^\t\n]*)\t([^\t\n]*)\t[^\t\n]*$/) {
|
||||||
|
$book = "$1";
|
||||||
|
push @bookList, $book;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
close $file;
|
||||||
|
|
||||||
|
|
||||||
|
say "Removing old Extract.txt";
|
||||||
|
|
||||||
|
system `rm \"/Users/Henry/Google Drive/WA/Test/Unlocked Bible/Extract.txt\"`;
|
||||||
|
|
||||||
|
say "Grepping the list of books.";
|
||||||
|
|
||||||
|
foreach $book (@bookList) {
|
||||||
|
say $book;
|
||||||
|
system `grep \"^$book [0-9]*:[0-9]*.*$\" \"/Users/Henry/Google Drive/WA/Test/Unlocked Bible/ULB text.txt\" >> \"/Users/Henry/Google Drive/WA/Test/Unlocked Bible/Extract.txt\"`;
|
||||||
|
}
|
||||||
|
|
||||||
|
say "Done."
|
|
@ -0,0 +1,106 @@
|
||||||
|
use 5.12.0;
|
||||||
|
use utf8;
|
||||||
|
use Cwd;
|
||||||
|
use File::Slurp;
|
||||||
|
use open IO => ":utf8";
|
||||||
|
$| = 1;
|
||||||
|
$" = "\n";
|
||||||
|
|
||||||
|
my ($inDir, $outDir) = ("/Users/Henry/Documents/git.Door43/en_ulb", "out");
|
||||||
|
my ($pwd, $os, $fileSpec) = (cwd(), $^O, "\.usfm");
|
||||||
|
my (@array, @usfmLines);
|
||||||
|
my (%hash);
|
||||||
|
my ($d, $whatami, $inFile);
|
||||||
|
my ($book, $chap, $vers, $text, $outText, $newV, $newC, $newB, $outFile, $usfmText, $metathesis);
|
||||||
|
###
|
||||||
|
if ($os eq "darwin" || $os eq "linux") {$d = "/"}
|
||||||
|
else {$d = "\\"}
|
||||||
|
|
||||||
|
#if (-e $outDir) {
|
||||||
|
# -d _ || die "$whatami: $outDir is not a directory!\n";
|
||||||
|
# -r _ && -w _ && -x _ || die "$whatami: $outDir is inaccessible!\n";
|
||||||
|
# chdir $outDir;
|
||||||
|
# my $glob = unlink glob "*.*";
|
||||||
|
# chdir "$pwd";
|
||||||
|
#}else{
|
||||||
|
# mkdir($outDir, 0755) || die "$whatami: Can't create $outDir!\n";
|
||||||
|
#}
|
||||||
|
|
||||||
|
chdir("$inDir");
|
||||||
|
opendir THISDIR, "." or die "serious dainbramage: $!";
|
||||||
|
my @infiles = grep /$fileSpec$/i, readdir *THISDIR;
|
||||||
|
closedir THISDIR;
|
||||||
|
chdir("$pwd");
|
||||||
|
|
||||||
|
open(LOG, ">:utf8", "$outDir${d}log.log") or die "$outDir${d}log.log:\n$!";
|
||||||
|
say "$outDir${d}log.log open";
|
||||||
|
ProcessFiles();
|
||||||
|
say "Done.";
|
||||||
|
close LOG;
|
||||||
|
|
||||||
|
sub ProcessFiles {
|
||||||
|
my $thisLine;
|
||||||
|
open(OUT, ">:utf8", "/Users/Henry/Google Drive/WA/Test/Unlocked Bible/ULB text.txt") or die "/Users/Henry/Google Drive/WA/Test/Unlocked Bible/ULB text.txt:\n$!";
|
||||||
|
my $finalTextForm;
|
||||||
|
@infiles = sort @infiles;
|
||||||
|
foreach $inFile (@infiles) {
|
||||||
|
my $thisFile;
|
||||||
|
say $inDir . "/" . $inFile;
|
||||||
|
|
||||||
|
$usfmText = read_file("$inDir${d}$inFile", binmode => 'utf8') or die;
|
||||||
|
$usfmText =~ s/\r?\n([^\\ \r\n])/ $1/g;
|
||||||
|
$usfmText =~ s/\\f \+.*?\\f\*//g;
|
||||||
|
$usfmText =~ s/\\pi/\\p/g;
|
||||||
|
$usfmText =~ s/\\((ide?)|(toc.)|(mt|sp)|(c \\d+))[^\r\n]*\r?\n//g;
|
||||||
|
$usfmText =~ s/\r?\n\\(m|pi?|(q\d?)) ([^\r\n]*)\r\n/$3/g;
|
||||||
|
$usfmText =~ s/ +\n/\n/g;
|
||||||
|
$usfmText =~ s/(\n\\v \d+)\n/$1 \[blank\]\n/g;
|
||||||
|
$usfmText =~ s/ —/—/g;
|
||||||
|
#say LOG $usfmText;
|
||||||
|
#say LOG "$usfmText\n=====\n";
|
||||||
|
@usfmLines = "";
|
||||||
|
@usfmLines = split /\r?\n/, $usfmText;
|
||||||
|
foreach $thisLine (@usfmLines) {
|
||||||
|
chomp $thisLine;
|
||||||
|
#say LOG ">\t$thisLine";
|
||||||
|
$thisLine =~ s/^(\\q)[\t ]$/$1/;
|
||||||
|
#say LOG "<\t$thisLine";
|
||||||
|
$thisLine = SearchAndReplace($thisLine);
|
||||||
|
$thisFile .= $thisLine;
|
||||||
|
}
|
||||||
|
$thisFile =~ s/\r?\n>>\t/ /g;
|
||||||
|
$thisFile =~ s/>\t//g;
|
||||||
|
$thisFile =~ s/(\r?\n){2,}/\n/;
|
||||||
|
$thisFile =~ s/— /—/g;
|
||||||
|
$thisFile =~ s/\\q\d//g;
|
||||||
|
$thisFile =~ s/\\p//g;
|
||||||
|
$thisFile =~ s/\\q //g;
|
||||||
|
$thisFile =~ s/\\qs( .*)\\qs\* ?/$1/g;
|
||||||
|
$thisFile =~ s/\n{2,}/\n/g;
|
||||||
|
$thisFile =~ s/ {2,}/ /g;
|
||||||
|
say OUT $thisFile;
|
||||||
|
}
|
||||||
|
close OUT;
|
||||||
|
}
|
||||||
|
|
||||||
|
sub SearchAndReplace {
|
||||||
|
my $thisxLine = shift;
|
||||||
|
#say LOG $thisxLine;
|
||||||
|
$thisxLine =~ s/\\s5.*$/\n-------\n/;
|
||||||
|
if ($thisxLine =~ s/\\h (.+) *$//) {$book = $1;$newB = 1}
|
||||||
|
elsif ($thisxLine =~ s/\\c (\d+)//) {$chap = $1; $newC = 1}
|
||||||
|
elsif ($thisxLine =~ s/\\v (\d+(-\d+)?) (.*)$/$3/) {
|
||||||
|
#print OUT "\n$outText\n";
|
||||||
|
$vers = $1;
|
||||||
|
$newV = 1;
|
||||||
|
$thisxLine = "\n$book $chap:$vers\t$metathesis$thisxLine";
|
||||||
|
$metathesis = ""
|
||||||
|
#say LOG ">\t<$book> $chap:$vers\t$thisxLine";
|
||||||
|
}
|
||||||
|
elsif ($thisxLine =~ s/^\\q\d? (.*)$/ $1/) {}
|
||||||
|
elsif ($thisxLine =~ s/^\\m (.*)$/ $1/) {}
|
||||||
|
elsif ($thisxLine =~ s/^\\d (.*)//) {$metathesis = "$1 "}
|
||||||
|
elsif ($thisxLine =~ s/^\\[qpm]$//) {}
|
||||||
|
#say LOG $thisxLine;
|
||||||
|
return $thisxLine;
|
||||||
|
}
|
|
@ -0,0 +1,202 @@
|
||||||
|
# Routine to take missing.log entries and link to UGNT and ULB.KJV.Strongs
|
||||||
|
|
||||||
|
use 5.12.0;
|
||||||
|
use File::Slurp;
|
||||||
|
$| = "\n";
|
||||||
|
use utf8;
|
||||||
|
#use open IN => ":utf8", OUT => ":utf8";
|
||||||
|
use open IO => ":utf8";
|
||||||
|
use File::Find ;
|
||||||
|
use Cwd ;
|
||||||
|
|
||||||
|
binmode(STDOUT, "encoding(UTF-8)");
|
||||||
|
|
||||||
|
my ($outputFiles, $topDir, $usfmFile, $txtFile, $lbsBk, $dataFile, $doFlag) = (
|
||||||
|
"/Users/Henry/Documents/git.Door43/en_tw/bible/",
|
||||||
|
"/Users/Henry/Documents/git.Door43/en_tw/bible",
|
||||||
|
"",
|
||||||
|
"/Users/Henry/Google Drive/WA/Test/data/ULB.NASB.Strongs.txt",
|
||||||
|
"",
|
||||||
|
"/Users/Henry/Google Drive/WA/Test/data/tW.work.dat"
|
||||||
|
);
|
||||||
|
|
||||||
|
my ($missingLine, $bk, $ch, $vs, $ref, $url, $strong, $word, $flag, $putative, $tNid, $abbr);
|
||||||
|
|
||||||
|
open LOG, ">:utf8", "/Users/Henry/Google Drive/WA/Test/out/mine.log.log" or die;
|
||||||
|
#open OUT, ">:utf8", $outputFile or die;
|
||||||
|
|
||||||
|
ParseLine();
|
||||||
|
FindURL();
|
||||||
|
FindVerse($ref);
|
||||||
|
ChecktWPages($word);
|
||||||
|
Finish();
|
||||||
|
#close OUT;
|
||||||
|
close LOG;
|
||||||
|
|
||||||
|
print "\n\tDone.\n\n";
|
||||||
|
|
||||||
|
sub ParseLine {
|
||||||
|
say "\n\n\033[0;1;31mEnter line from missing.log:\033[m\n";
|
||||||
|
$missingLine = <STDIN>; # I moved chomp to a new line to make it more readable
|
||||||
|
chomp $missingLine; # Get rid of newline character at the end
|
||||||
|
exit 0 if ($missingLine eq ""); # If empty string, exit.
|
||||||
|
#$missingLine = "Mark 11:6 kt/command";
|
||||||
|
if ($missingLine =~ /^(([^:]*) (\d+):(\d+))\t([^\/]*\/([^\t]*))/) {
|
||||||
|
($ref, $bk, $ch, $vs, $url, $putative) = ($1, $2, $3, $4, $5, $6);
|
||||||
|
$outputFiles .= "${url}.md ";
|
||||||
|
say LOG "\$bk: $bk";
|
||||||
|
open (my $file, "<:utf8", "$dataFile") or die "$dataFile:\n$!";
|
||||||
|
|
||||||
|
while (my $line = <$file>) {
|
||||||
|
chomp $line;
|
||||||
|
if ($line =~ /^(# )?([^\t]*)\t([^\t]*)\t([^\t]*)$/) {
|
||||||
|
my ($fullBk, $numBk, $abr) = ($2, $3, $4);
|
||||||
|
say LOG "\$fullBk: |$fullBk|, \$numBk: $numBk, \$abr: $abr, \$bk: |$bk|";
|
||||||
|
if ($fullBk eq $bk) {
|
||||||
|
say LOG "\$fullBk: |$fullBk|, \$bk: |$bk|";
|
||||||
|
$lbsBk = $abr;
|
||||||
|
if ($numBk =~ /(\d\d)-(...)/) {
|
||||||
|
$abbr = lc $2;
|
||||||
|
say LOG "\$abbr: $abbr";
|
||||||
|
my $mxl;
|
||||||
|
if ($bk eq "Psalms?") {$mxl = 3}
|
||||||
|
else {$mxl = 2}
|
||||||
|
while (length $ch < $mxl) {$ch =~ s/^/0/}
|
||||||
|
while (length $vs < $mxl) {$vs =~ s/^/0/}
|
||||||
|
}
|
||||||
|
$usfmFile = "/Users/Henry/Documents/git.Door43/UGNT/${numBk}.usfm";
|
||||||
|
say LOG "\$abbr/\$ch/\$vs: $abbr/$ch/$vs";
|
||||||
|
system `open -a /Applications/BBEdit.app /Users/Henry/Documents/git.Door43/en_tn/$abbr/$ch/$vs.md`;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
close $file;
|
||||||
|
|
||||||
|
} else {
|
||||||
|
die "\n\tInput unreadable.\n"
|
||||||
|
}
|
||||||
|
say LOG "Looking for $putative in $usfmFile";
|
||||||
|
say LOG "\$missingLine: $missingLine, \$ref: $ref, \$bk: $bk, \$ch: $ch, \$vs: $vs, \$url: $url\n\n";
|
||||||
|
#system `open -a /Applications/Logos.app "logosres:esv;ref=BibleESV.$lbsBk${ch}.$vs"`;
|
||||||
|
system `open -a /Applications/Logos.app "logos4:TextComparison;ref=BibleESV.$lbsBk${ch}.$vs;res=esv,niv2011,niv,nasb95,nrsv,gs-netbible,nlt,leb,kjv1900"
|
||||||
|
`;
|
||||||
|
}
|
||||||
|
sub FindURL {
|
||||||
|
|
||||||
|
my ($thisChap, $thisVers);
|
||||||
|
|
||||||
|
open (my $file, "<:utf8", "$usfmFile") or die "$usfmFile:\n$!";
|
||||||
|
|
||||||
|
while (my $line = <$file>) {
|
||||||
|
chomp $line;
|
||||||
|
if ($line =~ /\\mt (.*)$/) {
|
||||||
|
my $thisBook = $1;
|
||||||
|
if ($thisBook eq $bk) {
|
||||||
|
say LOG "\$thisBook: $thisBook\t\$bk: $bk";
|
||||||
|
$doFlag = 1;
|
||||||
|
} else {
|
||||||
|
say LOG "\nThe wrong book is being searched.\n"
|
||||||
|
}
|
||||||
|
} elsif ($doFlag && $line =~ /^\\c (\d+)$/) {
|
||||||
|
$thisChap = $1
|
||||||
|
} elsif ($doFlag && $line =~ /^\\v (\d+)$/) {
|
||||||
|
$thisVers = $1
|
||||||
|
} elsif ($doFlag && $thisChap == $ch && $thisVers == $vs) {
|
||||||
|
say LOG "$thisChap:$thisVers $line";
|
||||||
|
if ($line =~ /strong="([GH]....)(.).*$url/) {
|
||||||
|
say "\n\$line:\n$line\n";
|
||||||
|
$strong = $1;
|
||||||
|
my $test = $2;
|
||||||
|
if ($test ne "0") {die "\n\t\tStrong's number won't work.\n\n"}
|
||||||
|
unless ($strong =~ /^.+$/) {die "\nThe Strong's number <$strong> is not found.\n"}
|
||||||
|
while ($strong =~ s/([GH])0/$1/) {}
|
||||||
|
if ($strong =~ /^.+$/) {
|
||||||
|
say "\033[0;1;31m$strong\033[m\n";
|
||||||
|
last
|
||||||
|
}
|
||||||
|
} elsif ($line =~ /\\k-s[^\n]*$url/) {
|
||||||
|
say "\n\t$url is part of a phrase\n";
|
||||||
|
$flag = 1;
|
||||||
|
}
|
||||||
|
} elsif ($line =~ /\\mt (.*)$/) {
|
||||||
|
$doFlag = 0
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
close $file;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
sub FindVerse {
|
||||||
|
|
||||||
|
my $fileText = read_file("$txtFile", binmode => 'utf8');
|
||||||
|
|
||||||
|
say LOG "FindVerse |$strong|.";
|
||||||
|
|
||||||
|
if ($flag && $fileText =~ /$ref\t[^\n]*\n[^\n]*\n/) {
|
||||||
|
say LOG "$ref\n$&";
|
||||||
|
say $&;
|
||||||
|
exit 0;
|
||||||
|
} else {
|
||||||
|
if ($fileText =~ /$ref([^\n]*\n )([^\n]*<)$strong(.?>[^\n]*)/) {
|
||||||
|
my ($fore, $precon, $aft) = ($1, $2, $3);
|
||||||
|
say LOG "\$ref: $ref\n\$fore: $fore\n\$precon:\n$precon\n\$aft: $aft";
|
||||||
|
my $preprecon;
|
||||||
|
if ($precon =~ /^(.*([,>\w\'\"\- —;] |['";\.\?\!]))([\w\-]+) (<[^<>]*> )?<$/) {
|
||||||
|
($preprecon, $word) = ($1, $3);
|
||||||
|
say LOG "\$&: $&\n\$preprecon: $preprecon\n\$word: $word";
|
||||||
|
}
|
||||||
|
say "$ref$fore$preprecon\033[0;0;32m$word\033[m <\033[0;1;31m$strong\033[m$aft\n";
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
say "\n\nThe Strong's code <<$strong>> is not found in $ref.\n\n";
|
||||||
|
system ("bbfind -g \"${ref}\\t[^\\n]*\\n[^\\n]*\" '/Users/Henry/Google Drive/WA/Test/data/ULB.NASB.Strongs.txt'") or die "$!";
|
||||||
|
system ("find $topDir -name \"*.md\" -exec grep -H --color \"$strong\[, \\n\\r\]\" {} \\;");
|
||||||
|
#system ("find $topDir -name \"*.md\" -exec grep -H --color \"$strong\$\" {} \\;");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return $word;
|
||||||
|
}
|
||||||
|
|
||||||
|
sub ChecktWPages{
|
||||||
|
|
||||||
|
say LOG "<<$word>>";
|
||||||
|
die "\n\$word is empty.\n" if $word eq "";
|
||||||
|
my $topDir = "/Users/Henry/Documents/git.Door43/en_tw/bible";
|
||||||
|
|
||||||
|
my @filesToRun = ();
|
||||||
|
my $filePattern = '*.md' ;
|
||||||
|
find( sub { push @filesToRun, $File::Find::name if ( m/^(.*)$filePattern$/ ) }, $topDir) ;
|
||||||
|
|
||||||
|
foreach my $file ( @filesToRun ) {
|
||||||
|
my $fileText = read_file("$file", binmode => 'utf8');
|
||||||
|
# While finds entries and Strong's numbers
|
||||||
|
while ($fileText =~ /($strong)[^\d]|^(# [^\n]*\b$word\b)/g) {
|
||||||
|
#system `clear`;
|
||||||
|
my $abb = $file;
|
||||||
|
$abb =~ s/.md$//;
|
||||||
|
say "\033[0;1;31m$abb\033[m";
|
||||||
|
$outputFiles .= "$file "
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
sub Finish {
|
||||||
|
say "\nLooking for $strong.";
|
||||||
|
# find $topDir -name "*.md" -exec grep -H '($strong[^\d]|$strong$)' {} +
|
||||||
|
system ("find $topDir -name \"*.md\" -exec grep -H --color \"$strong\[, \\n\\r\]\" {} \\;");
|
||||||
|
system ("find $topDir -name \"*.md\" -exec grep -H --color \"$strong\$\" {} \\;");
|
||||||
|
#system ("find $topDir -name \"*.md\" -exec grep -H --color \"\($strong\[, \\n\\r\]\|$strong\$\)\" {} \\;");
|
||||||
|
say "\nLooking for $putative.";
|
||||||
|
#system ("find $topDir -name \"*.md\" -exec grep -Hi --color \"^# $putative\[^A-Za-z\]\" {} \\;");
|
||||||
|
#system ("find $topDir -name \"*.md\" -exec grep -Hi --color \"^# .*\[^A-Za-z\]$putative\[^A-Za-z\]\" {} \\;");
|
||||||
|
system ("find $topDir -name \"*.md\" -exec egrep -Hi --color \"^# (.*\[^A-Za-z\])?$putative\[^A-Za-z\]\" {} \\;");
|
||||||
|
say "\nLooking for $word.";
|
||||||
|
#system ("find $topDir -name \"*.md\" -exec grep -Hi --color \"^#$word\[^A-Za-z\]\" {} \\;");
|
||||||
|
#system ("find $topDir -name \"*.md\" -exec grep -Hi --color \"^# .*\[^A-Za-z\]$word\[^A-Za-z\]\" {} \\;");
|
||||||
|
system ("find $topDir -name \"*.md\" -exec egrep -Hi --color \"^# (.*\[^A-Za-z\])?$word\[^A-Za-z\]\" {} \\;");
|
||||||
|
say "Opening .md files.";
|
||||||
|
system `open -a /Applications/Firefox.app https://www.blueletterbible.org/lang/lexicon/lexicon.cfm?strongs=$strong`;
|
||||||
|
system `open -a /Applications/BBEdit.app $outputFiles`;
|
||||||
|
}
|
|
@ -0,0 +1,11 @@
|
||||||
|
# opens files with mismatched snippets
|
||||||
|
|
||||||
|
use 5.12.0;
|
||||||
|
use File::Slurp;
|
||||||
|
my $openString = "";
|
||||||
|
my $fileText = read_file("/Users/Henry/Google Drive/WA/tN instructions/mismatched_snippets.html", binmode => 'utf8');
|
||||||
|
while ($fileText =~ /<p><b>(.*)<\/b><\/p>/g) {
|
||||||
|
$openString .= "$1 " unless $openString =~ /$1/;
|
||||||
|
}
|
||||||
|
say "\n\nopen -a /Applications/BBEdit.app $openString\n";
|
||||||
|
system 'open -a /Applications/BBEdit.app $openString';
|
|
@ -0,0 +1,33 @@
|
||||||
|
use 5.12.0;
|
||||||
|
|
||||||
|
my ($bk, $ch, $vs, $outputFile, $ulb, $tw, $dir, $ref) = ("", "", "", "/Users/Henry/Documents/git.Door43/en_tw/ForPDF/tWs.for.PDF.txt");
|
||||||
|
|
||||||
|
open LOG, ">:utf8", "/Users/Henry/Google Drive/WA/Test/out/log.log" or die;
|
||||||
|
open OUT, ">:utf8", $outputFile or die;
|
||||||
|
say OUT "Book,Chapter,Verse,Term,Dir,Ref";
|
||||||
|
|
||||||
|
open (my $file, "<:utf8", "/Users/Henry/Google Drive/WA/Test/out/output.dat") or die "/Users/Henry/Google Drive/WA/Test/out/output.dat:\n$!";
|
||||||
|
|
||||||
|
while (my $line = <$file>) {
|
||||||
|
chomp $line;
|
||||||
|
#say LOG $line;
|
||||||
|
if ($line =~ /^([^:]*) (\d+):(\d+):$/) {
|
||||||
|
($bk, $ch, $vs) = ($1, $2, $3);
|
||||||
|
say LOG $line;
|
||||||
|
} elsif ($line =~ /^\[([^\]]*)\]\(([^\)]*)\)$/) {
|
||||||
|
($ulb, $tw) = ($1, $2);
|
||||||
|
if ($tw =~ /^([^\/]*)\/([^\/]*)$/) {
|
||||||
|
($dir, $ref) = ($1, $2)
|
||||||
|
}
|
||||||
|
say OUT "$bk,$ch,$vs,\"$ulb\",$dir,$ref"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
close $file;
|
||||||
|
|
||||||
|
close OUT;
|
||||||
|
close LOG;
|
||||||
|
|
||||||
|
system 'open -a /Applications/BBEdit.app $outputFile';
|
||||||
|
|
||||||
|
print "\n\tDone.";
|
|
@ -4,7 +4,7 @@ These procedures work on my Mac. If a version is needed for another machine, I c
|
||||||
|
|
||||||
## Files needed
|
## Files needed
|
||||||
|
|
||||||
I have put the files I use in the Git. The directory structures will have to be changed if the work is done on another machine.
|
I have put the files I use in the Git. The directory structures assumed by the scripts will have to be changed if the work is done on another machine. (At some point I'll need to make this a single-directory package.)
|
||||||
|
|
||||||
### Shell scripts (Batch files in Windows)
|
### Shell scripts (Batch files in Windows)
|
||||||
|
|
||||||
|
@ -24,6 +24,10 @@ Mine.URL.Strong.Verse.pl
|
||||||
OpenMismatchedFiles.pl
|
OpenMismatchedFiles.pl
|
||||||
Output.to.csv.pl
|
Output.to.csv.pl
|
||||||
|
|
||||||
|
### Exceptions file
|
||||||
|
|
||||||
|
Exceptions.tWs.from.UGNT.txt
|
||||||
|
|
||||||
## Procedures
|
## Procedures
|
||||||
|
|
||||||
**First run do.sh**
|
**First run do.sh**
|
|
@ -0,0 +1,5 @@
|
||||||
|
sh update.sh
|
||||||
|
#perl "/Users/Henry/Google Drive/WA/Test/tWs.from.UGNT.4.pl"
|
||||||
|
#perl "/Users/Henry/Google Drive/WA/Test/tWs.from.UGNT.5.pl"
|
||||||
|
#perl "/Users/Henry/Google Drive/WA/Test/tWs.from.UGNT.6.pl"
|
||||||
|
perl "/Users/Henry/Google Drive/WA/Test/tWs.from.UGNT.7.pl"
|
|
@ -0,0 +1,2 @@
|
||||||
|
# Connects the URL, Strong's number, and ref for anomalous entries.
|
||||||
|
perl "/Users/Henry/Google Drive/WA/Test/Mine.URL.Strong.Verse.pl"
|
|
@ -0,0 +1,4 @@
|
||||||
|
perl "/Users/Henry/Google Drive/WA/Test/MakeULB.2.pl"
|
||||||
|
perl "/Users/Henry/Google Drive/WA/Test/CombineULBandNASBwithCodes.pl";
|
||||||
|
perl "/Users/Henry/Google Drive/WA/Test/GrepBook.pl";
|
||||||
|
#perl "/Users/Henry/Google Drive/WA/Test/GrepBook.0.pl";
|
|
@ -0,0 +1,21 @@
|
||||||
|
cd "/Users/Henry/Documents/git.Door43/en_ulb"
|
||||||
|
git pull
|
||||||
|
cd "/Users/Henry/Documents/git.Door43/en_udb"
|
||||||
|
git pull
|
||||||
|
cd "/Users/Henry/Documents/git.Door43/en_tw"
|
||||||
|
git pull
|
||||||
|
cd "/Users/Henry/Documents/git.Door43/en_tn"
|
||||||
|
git pull
|
||||||
|
cd "/Users/Henry/Documents/git.Door43/en_hq"
|
||||||
|
git pull
|
||||||
|
cd "/Users/Henry/Google Drive/WA/Test/"
|
||||||
|
perl "/Users/Henry/Google Drive/WA/Test/MakeULB.2.pl"
|
||||||
|
perl "/Users/Henry/Google Drive/WA/Test/FindMismatchedULBSnippets.2.noChunks.pl"
|
||||||
|
# cp "/Users/Henry/Google Drive/WA/Test/out/output.dat" "/Users/Henry/Google Drive/WA/tN instructions/mismatched_snippets.html"
|
||||||
|
#perl "/Users/Henry/Google Drive/WA/Test/CombineULBandKJVwithCodes.pl"
|
||||||
|
perl "/Users/Henry/Google Drive/WA/Test/CombineULBandNASBwithCodes.pl"
|
||||||
|
open -a /Applications/Firefox.app "/Users/Henry/Google Drive/WA/tN instructions/mismatched_snippets.html"
|
||||||
|
#open -a /Applications/BBEdit.app "/Users/Henry/Google Drive/WA/tN instructions/mismatched_snippets.html"
|
||||||
|
perl "/Users/Henry/Google Drive/WA/Test/OpenMismatchedFiles.pl"
|
||||||
|
open -a /Applications/BBEdit.app "/Users/Henry/Google Drive/WA/Test/Unlocked Bible/ULB text.txt"
|
||||||
|
perl "/Users/Henry/Google Drive/WA/Test/ExtractLinksFromScratchPad.pl"
|
Loading…
Reference in New Issue