forked from WycliffeAssociates/en_tw
225 lines
5.8 KiB
Perl
225 lines
5.8 KiB
Perl
# Before running this program, run
|
|
# find -s . "*.md"
|
|
# on
|
|
# /Users/Henry/Documents/git.Door43/en_tn/
|
|
# and paste the output into dir.dir in that directory
|
|
|
|
use 5.12.0;
|
|
use File::Slurp;
|
|
use utf8;
|
|
#use open IN => ":utf8", OUT => ":utf8";
|
|
use open IO => ":utf8";
|
|
use Cwd;
|
|
use File::Find ;
|
|
|
|
$" = "\n";
|
|
|
|
my $pwd = cwd();
|
|
my $topDir = "/Users/Henry/Documents/git.Door43/en_tn";
|
|
|
|
my (@filesToRun, @array);
|
|
my $filePattern = '*.md' ;
|
|
my (%abbrev, %full, %ulb);
|
|
|
|
find( sub { push @filesToRun, $File::Find::name if ( m/^(.*)$filePattern$/ && !m/(LICENSE|README|intro)\.$filePattern$/ ) }, $topDir) ;
|
|
|
|
my %toDummy = (" \\.\\.\\. ", ".*", "\\?", "QM", "\"", "QD", "\'", "QS", "\!", "XM", "\\(", "QOXP", "\\)", "QCP");
|
|
my %fromDummy = ("\\.\\*", " ... ", "QM", "?", "QD", "\"", "QS", "'", "XM", "!", "QOXP", "(", "QCP", ")");
|
|
|
|
|
|
open LOG, ">/Users/Henry/Google Drive/WA/Test/out/log.log" or die;
|
|
|
|
#open output
|
|
open OUT, ">/Users/Henry/Google Drive/WA/tN instructions/mismatched_snippets.html" or die;
|
|
|
|
#Read in ULB
|
|
|
|
ReadData();
|
|
ReadULB();
|
|
ProcessFiles();
|
|
|
|
sub ReadData {
|
|
while (<DATA>) {
|
|
chomp;
|
|
if (/([^\t]*)\t([^\t]*)\t(.*)/) {
|
|
($abbrev{$3}, $full{$2}) = ($2, $3)
|
|
}
|
|
}
|
|
#foreach my $key (sort keys %abbrev) {say LOG "$key\t$abbrev{$key}"}
|
|
#foreach my $key (sort keys %full) {say LOG "$key\t$full{$key}"}
|
|
}
|
|
|
|
sub ReadULB {
|
|
open IN, "/Users/Henry/Google Drive/WA/Test/Unlocked Bible/ULB text.txt" or die;
|
|
my ($checkText, $thisChunkText, $thisRef, $thisBook, $thisChap, $thisVerse, $id, $thisText, $tempText, $thisKey);
|
|
while (<IN>) {
|
|
chomp;
|
|
#say LOG ">$_<";
|
|
if (/^([^\t]*)\t(.*)$/) {
|
|
my ($tempID, $tempText) = ($1, $2);
|
|
#say LOG "<$tempID>\t|$tempText|";
|
|
#say LOG "\$id, \$thisText\t$id, $thisText";
|
|
($id) = ($tempID);
|
|
if ($id =~ /^([^:]*) (\d+):(\d+)/) {
|
|
($thisBook, $thisChap, $thisVerse) = ($1, $2, $3);
|
|
if ($thisBook ne "Psalms") {
|
|
while (length $thisChap < 2) {$thisChap =~ s/^/0/};
|
|
while (length $thisVerse < 2) {$thisVerse =~ s/^/0/}
|
|
} else {
|
|
while (length $thisChap < 3) {$thisChap =~ s/^/0/};
|
|
while (length $thisVerse < 3) {$thisVerse =~ s/^/0/}
|
|
}
|
|
$thisBook = $abbrev{$thisBook};
|
|
}
|
|
$id = "$thisBook/$thisChap/$thisVerse";
|
|
$tempText =~ s/\\f \+.*?\\f\*//g;
|
|
#say LOG $tempText;
|
|
$tempText =~ s/ {2,}/ /g;
|
|
$ulb{$id} .= "$tempText ";
|
|
$ulb{$id} =~ s/— /—/g;
|
|
$ulb{$id} =~ s/ —/—/g;
|
|
#say LOG "\$id = $id\n\$ulb{$id} = $ulb{$id}";
|
|
}
|
|
}
|
|
close IN;
|
|
#say LOG "Hi";
|
|
#foreach my $key (sort keys %ulb) {say LOG "|$key|\t<$ulb{$key}>"}
|
|
}
|
|
|
|
# assign passages as values to chunk keys
|
|
|
|
#Read in each file
|
|
sub ProcessFiles {
|
|
foreach my $slice (@filesToRun) {
|
|
#say LOG ">>\$slice: $slice<<";
|
|
my ($thisText, $thisNote, $textReserved, $curRef, $tb, $ct, $vt, $anchor);
|
|
if ($slice =~ /^.*\/(([^\.]*)\/([^\.]*)\/([^\.]*)).md$/) {
|
|
$curRef = $1;
|
|
$anchor = $1;
|
|
($tb, $ct, $vt) = ($2, $3, $4);
|
|
$tb = $full{$tb};
|
|
$ct =~ s/^0+//;
|
|
$vt =~ s/^0+//;
|
|
#say LOG ">3>$anchor > $tb $ct:$vt<3<";
|
|
$thisText = $ulb{$anchor};
|
|
#say LOG ">5>\$anchor: $anchor; \$thisText:\n$thisText<5<";
|
|
}
|
|
#my $tN = read_file("$slice", binmode => 'utf8') or die "|$slice|\n$!";
|
|
my $tN = read_file("$slice", binmode => 'utf8');
|
|
#my $tN = read_file("$slice", binmode => 'utf8') or next DoFile;
|
|
#say LOG ">6>\$slice: $slice; \$tN:\n$tN<6<";
|
|
foreach my $key (sort keys %toDummy) {
|
|
#say LOG "$key\t|$toDummy{$key}|";
|
|
$tN =~ s/$key/$toDummy{$key}/g;
|
|
$thisText =~ s/$key/$toDummy{$key}/g;
|
|
}
|
|
$tN =~ s/# ((General Information|Connecting Statement|translationWords):?)[^\r\n]*\r?\n//g;
|
|
$tN =~ s/\* \[\[[^\r\n]*\r?\n//g;
|
|
$tN =~ s/(#[^\r\n]*\r?\n)[^\r\n]*\r?\n[^\r\n]*\r?\n/$1/g;
|
|
# ">7>\n\n\n\$curRef: $curRef\n\$thisText:$thisText\n\$tN: $tN<7<";
|
|
while ($tN =~ /# ([^\r\n]*)\r?\n/g) {
|
|
$thisNote = $1;
|
|
#say LOG ">8>\t>\t|$thisNote|\n$thisText<8<";
|
|
$thisText =~ s/ {2,}/ /g;
|
|
#say LOG ">9>\t>\t|$thisNote|\n$thisText<9<";
|
|
unless ($thisText =~ /$thisNote/) {
|
|
say LOG ">A>\n$tb $ct:$vt\n$slice\n$thisNote\n$thisText\n<A<";
|
|
foreach my $key (sort keys %fromDummy) {
|
|
$thisNote =~ s/$key/$fromDummy{$key}/g;
|
|
$thisText =~ s/$key/$fromDummy{$key}/g;
|
|
}
|
|
push @array, "\n<p>$tb $ct:$vt</p>\n<p><b>$slice</b></p>\n<p><i>$thisNote</i></p>\n<p>$thisText</p>";
|
|
#push @array, "\t$thisNote";
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
print OUT "<?xml version=\"1.0\" encoding=\"utf-8\"?>
|
|
<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Strict//EN\"
|
|
\"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd\">
|
|
<html xmlns=\"http://www.w3.org/1999/xhtml\" xml:lang=\"en\" lang=\"en\">
|
|
<head>
|
|
<meta http-equiv=\"content-type\" content=\"text/html; charset=utf-8\" />
|
|
<title>Mismatched Snippets</title>
|
|
<meta name=\"generator\" content=\"BBEdit 8.5\" />
|
|
</head>
|
|
<body>
|
|
@array
|
|
</body>
|
|
</html>";
|
|
close OUT;
|
|
|
|
close LOG;
|
|
|
|
say "Done.";
|
|
|
|
__DATA__
|
|
01 gen Genesis
|
|
02 exo Exodus
|
|
03 lev Leviticus
|
|
04 num Numbers
|
|
05 deu Deuteronomy
|
|
06 jos Joshua
|
|
07 jdg Judges
|
|
08 rut Ruth
|
|
09 1sa 1 Samuel
|
|
10 2sa 2 Samuel
|
|
11 1ki 1 Kings
|
|
12 2ki 2 Kings
|
|
13 1ch 1 Chronicles
|
|
14 2ch 2 Chronicles
|
|
15 ezr Ezra
|
|
16 neh Nehemiah
|
|
17 est Esther
|
|
18 job Job
|
|
19 psa Psalms
|
|
20 pro Proverbs
|
|
21 ecc Ecclesiastes
|
|
22 sng Song of Songs
|
|
23 isa Isaiah
|
|
24 jer Jeremiah
|
|
25 lam Lamentations
|
|
26 ezk Ezekiel
|
|
27 dan Daniel
|
|
28 hos Hosea
|
|
29 jol Joel
|
|
30 amo Amos
|
|
31 oba Obadiah
|
|
32 jon Jonah
|
|
33 mic Micah
|
|
34 nam Nahum
|
|
35 hab Habakkuk
|
|
36 zep Zephaniah
|
|
37 hag Haggai
|
|
38 zec Zechariah
|
|
39 mal Malachi
|
|
41 mat Matthew
|
|
42 mrk Mark
|
|
43 luk Luke
|
|
44 jhn John
|
|
45 act Acts
|
|
46 rom Romans
|
|
47 1co 1 Corinthians
|
|
48 2co 2 Corinthians
|
|
49 gal Galatians
|
|
50 eph Ephesians
|
|
51 php Philippians
|
|
52 col Colossians
|
|
53 1th 1 Thessalonians
|
|
54 2th 2 Thessalonians
|
|
55 1ti 1 Timothy
|
|
56 2ti 2 Timothy
|
|
57 tit Titus
|
|
58 phm Philemon
|
|
59 heb Hebrews
|
|
60 jas James
|
|
61 1pe 1 Peter
|
|
62 2pe 2 Peter
|
|
63 1jn 1 John
|
|
64 2jn 2 John
|
|
65 3jn 3 John
|
|
66 jud Jude
|
|
67 rev Revelation
|
|
|