forked from hmw3/Tips_and_Hacks
340 lines
8.2 KiB
Perl
340 lines
8.2 KiB
Perl
# Before running this program, run
|
|
# find -s . "*.md"
|
|
# on
|
|
# /Users/Henry/Documents/WACS/en_tn/
|
|
# and paste the output into dir.dir in that directory
|
|
|
|
use 5.12.0;
|
|
use File::Slurp;
|
|
use utf8;
|
|
#use open IN => ":utf8", OUT => ":utf8";
|
|
use open IO => ":utf8";
|
|
use Cwd;
|
|
use File::Find ;
|
|
use FindBin '$Bin';
|
|
|
|
$" = "\n";
|
|
|
|
my ($pwd, $d, $fileSpec, $repoPath) = (cwd(), "\\", "\.usfm");
|
|
if ($^O eq "linux" || $^O eq "darwin") {$d = "/"}
|
|
|
|
my ($udf) = "User_defaults.windows.txt";
|
|
if ($^O eq "linux") {$udf = "User_defaults.linux.txt"}
|
|
#elsif ($^O eq "darwin") {$udf = "User_defaults.mac.txt"}
|
|
elsif ($^O eq "darwin") {$udf = "User_defaults.mac.txt"}
|
|
|
|
open (my $defaults, "<:utf8", "$Bin${d}User${d}$udf") or die "$Bin${d}User${d}$udf:\n$!";
|
|
|
|
GetUserDefaults();
|
|
|
|
my (@filesToRun, @array);
|
|
my $filePattern = "*\.md" ;
|
|
my (%abbrev, %full, %ulb);
|
|
|
|
find( sub { push @filesToRun, $File::Find::name if ( m/^(.*)$filePattern$/ && !m/(LICENSE|README|intro|index|tR-.*|About.*|t[HN]-.*|ISSUE.*)\.$filePattern$/ ) }, $repoPath) ;
|
|
my %toDummy = (" \\.\\.\\. ", ".*", "\\?", "QM", "\"", "QD", "\'", "QS", "\!", "XM", "\\(", "QOXP", "\\)", "QCP");
|
|
my %fromDummy = ("\\.\\*", " ... ", "QM", "?", "QD", "\"", "QS", "'", "XM", "!", "QOXP", "(", "QCP", ")");
|
|
|
|
|
|
open LOG, ">Logs${d}log.log" or die;
|
|
#say LOG "\$repoPath: $repoPath\n\@filesToRun: @filesToRun";die;
|
|
|
|
#open output
|
|
open OUT, ">Temp${d}mismatched_snippets.html" or die;
|
|
|
|
#Read in ULB
|
|
|
|
ReadData();
|
|
ReadULB();
|
|
ProcessFiles();
|
|
|
|
sub GetUserDefaults {
|
|
say "Reading user defaults";
|
|
my ($ptte, $tNrepo);
|
|
open (my $defaults, "<:utf8", "$Bin${d}User${d}$udf") or die "$Bin${d}User${d}$udf:\n$!";
|
|
|
|
while (my $thisLine = <$defaults>) {
|
|
chomp $thisLine;
|
|
if ($thisLine =~ /^translationNotes path: (.*)$/) {
|
|
$tNrepo = $1
|
|
} elsif ($thisLine =~ /^Repository directory: (.*)$/) {
|
|
$repoPath = "$1";
|
|
}
|
|
}
|
|
$repoPath = "$repoPath${d}$tNrepo";
|
|
die "No repo path found" if $repoPath eq "";
|
|
|
|
close $defaults;
|
|
}
|
|
|
|
|
|
sub ReadData {
|
|
say "Reading Bible book names and abbreviations";
|
|
while (<DATA>) {
|
|
chomp;
|
|
if (/([^\t]*)\t([^\t]*)\t(.*)/) {
|
|
($abbrev{$3}, $full{$2}) = ($2, $3)
|
|
}
|
|
}
|
|
#foreach my $key (sort keys %abbrev) {say LOG "$key\t$abbrev{$key}"}
|
|
#foreach my $key (sort keys %full) {say LOG "$key\t$full{$key}"}
|
|
}
|
|
|
|
sub ReadULB {
|
|
say "Reading ULB";
|
|
open IN, "Temp${d}ULB_text.txt" or die;
|
|
my ($checkText, $thisChunkText, $thisRef, $thisBook, $thisChap, $thisVerse, $id, $thisText, $tempText, $thisKey);
|
|
while (<IN>) {
|
|
chomp;
|
|
#say LOG ">$_<";
|
|
if (/^([^\t]*)\t(.*)$/) {
|
|
my ($tempID, $tempText) = ($1, $2);
|
|
#say LOG "<$tempID>\t|$tempText|";
|
|
#say LOG "\$id, \$thisText\t$id, $thisText";
|
|
($id) = ($tempID);
|
|
if ($id =~ /^([^:]*) (\d+):(\d+)/) {
|
|
($thisBook, $thisChap, $thisVerse) = ($1, $2, $3);
|
|
if ($thisBook ne "Psalms") {
|
|
while (length $thisChap < 2) {$thisChap =~ s/^/0/};
|
|
while (length $thisVerse < 2) {$thisVerse =~ s/^/0/}
|
|
} else {
|
|
while (length $thisChap < 3) {$thisChap =~ s/^/0/};
|
|
while (length $thisVerse < 3) {$thisVerse =~ s/^/0/}
|
|
}
|
|
$thisBook = $abbrev{$thisBook};
|
|
}
|
|
$id = "$thisBook${d}$thisChap${d}$thisVerse";
|
|
$tempText =~ s/\\f \+.*?\\f\*//g;
|
|
#say LOG $tempText;
|
|
$tempText =~ s/ {2,}/ /g;
|
|
$ulb{$id} .= "$tempText ";
|
|
$ulb{$id} =~ s/— /—/g;
|
|
$ulb{$id} =~ s/ —/—/g;
|
|
# say LOG "\$id = $id\n\$ulb{$id} = $ulb{$id}";
|
|
}
|
|
}
|
|
close IN;
|
|
#say LOG "Hi";
|
|
#foreach my $key (sort keys %ulb) {say LOG "|$key|\t<$ulb{$key}>"}
|
|
}
|
|
|
|
# assign passages as values to chunk keys
|
|
|
|
#Read in each file
|
|
sub ProcessFiles {
|
|
say "Processing files";
|
|
my $oldBook;
|
|
foreach my $slice (@filesToRun) {
|
|
if ($^O ne "linux" && $^O ne "darwin") {$slice =~ s!\/!\\!g}
|
|
say LOG ">>\$slice: $slice<<";
|
|
my ($thisText, $thisNote, $textReserved, $curRef, $tb, $ct, $vt, $anchor);
|
|
if ($slice =~ /^.*\/(([^\.]*)\/([^\.]*)\/([^\.]*)).md$/ || $slice =~ /^.*\\(([^\.]*)\\([^\.]*)\\([^\.]*)).md$/) {
|
|
$curRef = $1;
|
|
$anchor = $1;
|
|
($tb, $ct, $vt) = ($2, $3, $4);
|
|
$tb = $full{$tb};
|
|
if ($tb ne $oldBook) {
|
|
say $tb;
|
|
$oldBook = $tb
|
|
}
|
|
$ct =~ s/^0+//;
|
|
$vt =~ s/^0+//;
|
|
say LOG ">3>$anchor > $tb $ct:$vt<3<";
|
|
$thisText = $ulb{$anchor};
|
|
say LOG ">5>\$anchor: $anchor; \$thisText:\n$thisText<5<";
|
|
}
|
|
#my $tN = read_file("$slice", binmode => 'utf8') or die "|$slice|\n$!";
|
|
my $tN = read_file("$slice", binmode => 'utf8');
|
|
#my $tN = read_file("$slice", binmode => 'utf8') or next DoFile;
|
|
say LOG ">6>\$slice: $slice; \$tN:\n$tN<6<";
|
|
foreach my $key (sort keys %toDummy) {
|
|
#say LOG "$key\t|$toDummy{$key}|";
|
|
$tN =~ s/$key/$toDummy{$key}/g;
|
|
$thisText =~ s/$key/$toDummy{$key}/g;
|
|
}
|
|
$tN =~ s/# ((General Information|Connecting Statement|translationWords|Informasi Umum):?)[^\r\n]*\r?\n//g;
|
|
$tN =~ s/\* \[\[[^\r\n]*\r?\n//g;
|
|
$tN =~ s/(#[^\r\n]*\r?\n)[^\r\n]*\r?\n[^\r\n]*\r?\n/$1/g;
|
|
say LOG ">7>\n\$curRef: $curRef\n\$thisText:$thisText\n\$tN: $tN\n<7<";
|
|
$tN =~ s/\\\*//g;
|
|
#$tN =~ s/\*//g;
|
|
$tN =~ s/\n#{2,} /\n/g;
|
|
$tN =~ s/\[\[[^\]]*\]\]//g;
|
|
$tN =~ s/(\x{A0})+\n/\n/g;
|
|
while ($tN =~ /# ([^\r\n]*)\r?\n/g) {
|
|
$thisNote = $1;
|
|
$thisNote =~ s/^ +//;
|
|
$thisNote =~ s/ +$//;
|
|
say LOG ">8>\t\$thisNote: $thisNote\n\$thisText: $thisText<8<";
|
|
$thisText =~ s/ {2,}/ /g;
|
|
say LOG ">9>\t>\t|$thisNote|\n$thisText<9<";
|
|
unless ($thisText =~ /$thisNote/) {
|
|
say LOG ">A>\n$tb $ct:$vt\n$slice\n$thisNote\n$thisText\n<A<";
|
|
foreach my $key (sort keys %fromDummy) {
|
|
$thisNote =~ s/$key/$fromDummy{$key}/g;
|
|
$thisText =~ s/$key/$fromDummy{$key}/g;
|
|
}
|
|
push @array, "\n<p><span style=\"color:red;\">$tb $ct:$vt</span></p>\n<p><b>$slice</b></p>\n<p><i>$thisNote</i></p>\n<p>$thisText</p>";
|
|
#push @array, "\t$thisNote";
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
print OUT "<?xml version=\"1.0\" encoding=\"utf-8\"?>
|
|
<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Strict//EN\"
|
|
\"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd\">
|
|
<html xmlns=\"http://www.w3.org/1999/xhtml\" xml:lang=\"en\" lang=\"en\">
|
|
<head>
|
|
<meta http-equiv=\"content-type\" content=\"text/html; charset=utf-8\" />
|
|
<title>Mismatched Snippets</title>
|
|
<meta name=\"generator\" content=\"BBEdit 8.5\" />
|
|
</head>
|
|
<body>
|
|
@array
|
|
</body>
|
|
</html>";
|
|
close OUT;
|
|
|
|
close LOG;
|
|
|
|
say "Done.";
|
|
|
|
#
|
|
# Indonesian data
|
|
#__DATA__
|
|
#1 gen KEJADIAN
|
|
#2 exo KELUARAN
|
|
#3 lev IMAMAT
|
|
#4 num BILANGAN
|
|
#5 deu ULANGAN
|
|
#6 jos YOSUA
|
|
#7 jdg HAKIM-HAKIM
|
|
#8 rut RUT
|
|
#9 1sa 1 SAMUEL
|
|
#10 2sa 2 SAMUEL
|
|
#11 1ki 1 RAJA-RAJA
|
|
#12 2ki 2 RAJA-RAJA
|
|
#13 1ch 1 TAWARIKH
|
|
#14 2ch 2 TAWARIKH
|
|
#15 ezr EZRA
|
|
#16 neh NEHEMIA
|
|
#17 est ESTER
|
|
#18 job AYUB
|
|
#19 psa MAZMUR
|
|
#20 pro AMSAL
|
|
#21 ecc PENGKHOTBAH
|
|
#22 sng KIDUNG AGUNG
|
|
#23 isa YESAYA
|
|
#24 jer YEREMIA
|
|
#25 lam RATAPAN
|
|
#26 ezk YEHEZKIEL
|
|
#27 dan DANIEL
|
|
#28 hos HOSEA
|
|
#29 jol YOEL
|
|
#30 amo AMOS
|
|
#31 oba OBAJA
|
|
#32 jon YUNUS
|
|
#33 mic MIKHA
|
|
#34 nam NAHUM
|
|
#35 hab HABAKUK
|
|
#36 zep ZEFANYA
|
|
#37 hag HAGAI
|
|
#38 zec ZAKHARIA
|
|
#39 mal MALEAKHI
|
|
#41 mat MATIUS
|
|
#42 mrk MARKUS
|
|
#43 luk LUKAS
|
|
#44 jhn YOHANES
|
|
#45 act KISAH PARA RASUL
|
|
#46 rom ROMA
|
|
#47 1co 1 KORINTUS
|
|
#48 2co 2 KORINTUS
|
|
#49 gal GALATIA
|
|
#50 eph EFESUS
|
|
#51 php FILIPI
|
|
#52 col KOLOSE
|
|
#53 1th 1 TESALONIKA
|
|
#54 2th 2 TESALONIKA
|
|
#55 1ti 1 TIMOTIUS
|
|
#56 2ti 2 TIMOTIUS
|
|
#57 tit TITUS
|
|
#58 phm FILEMON
|
|
#59 heb IBRANI
|
|
#60 jas YAKOBUS
|
|
#61 1pe 1 PETRUS
|
|
#62 2pe 2 PETRUS
|
|
#63 1jn 1 YOHANES
|
|
#64 2jn 2 YOHANES
|
|
#65 3jn 3 YOHANES
|
|
#66 jud YUDAS
|
|
#67 rev WAHYU
|
|
# English data
|
|
__DATA__
|
|
01 gen Genesis
|
|
02 exo Exodus
|
|
03 lev Leviticus
|
|
04 num Numbers
|
|
05 deu Deuteronomy
|
|
06 jos Joshua
|
|
07 jdg Judges
|
|
08 rut Ruth
|
|
09 1sa 1 Samuel
|
|
10 2sa 2 Samuel
|
|
11 1ki 1 Kings
|
|
12 2ki 2 Kings
|
|
13 1ch 1 Chronicles
|
|
14 2ch 2 Chronicles
|
|
15 ezr Ezra
|
|
16 neh Nehemiah
|
|
17 est Esther
|
|
18 job Job
|
|
19 psa Psalms
|
|
20 pro Proverbs
|
|
21 ecc Ecclesiastes
|
|
22 sng Song of Songs
|
|
23 isa Isaiah
|
|
24 jer Jeremiah
|
|
25 lam Lamentations
|
|
26 ezk Ezekiel
|
|
27 dan Daniel
|
|
28 hos Hosea
|
|
29 jol Joel
|
|
30 amo Amos
|
|
31 oba Obadiah
|
|
32 jon Jonah
|
|
33 mic Micah
|
|
34 nam Nahum
|
|
35 hab Habakkuk
|
|
36 zep Zephaniah
|
|
37 hag Haggai
|
|
38 zec Zechariah
|
|
39 mal Malachi
|
|
41 mat Matthew
|
|
42 mrk Mark
|
|
43 luk Luke
|
|
44 jhn John
|
|
45 act Acts
|
|
46 rom Romans
|
|
47 1co 1 Corinthians
|
|
48 2co 2 Corinthians
|
|
49 gal Galatians
|
|
50 eph Ephesians
|
|
51 php Philippians
|
|
52 col Colossians
|
|
53 1th 1 Thessalonians
|
|
54 2th 2 Thessalonians
|
|
55 1ti 1 Timothy
|
|
56 2ti 2 Timothy
|
|
57 tit Titus
|
|
58 phm Philemon
|
|
59 heb Hebrews
|
|
60 jas James
|
|
61 1pe 1 Peter
|
|
62 2pe 2 Peter
|
|
63 1jn 1 John
|
|
64 2jn 2 John
|
|
65 3jn 3 John
|
|
66 jud Jude
|
|
67 rev Revelation
|