Tips_and_Hacks/MAST_tW_PDF_Updater/FilesForUpdates/FindMismatchedULBSnippets.3...

# Before running this program, run
# find -s . "*.md"
# on
# /Users/Henry/Documents/WACS/en_tn/
# and paste the output into dir.dir in that directory

use 5.12.0;
use File::Slurp;
use utf8;
#use open IN => ":utf8", OUT => ":utf8";
use open IO => ":utf8";
use Cwd;
use File::Find ;
use FindBin '$Bin';

$" = "\n";

my ($pwd, $d, $fileSpec, $repoPath) = (cwd(), "\\", "\.usfm");
if ($^O eq "linux" || $^O eq "darwin") {$d = "/"}

my ($udf) = "User_defaults.windows.txt";
if ($^O eq "linux") {$udf = "User_defaults.linux.txt"}
#elsif ($^O eq "darwin") {$udf = "User_defaults.mac.txt"}
elsif ($^O eq "darwin") {$udf = "User_defaults.mac.txt"}

open (my $defaults, "<:utf8", "$Bin${d}User${d}$udf") or die "$Bin${d}User${d}$udf:\n$!";

GetUserDefaults();

my (@filesToRun, @array);
my $filePattern = "*\.md" ;
my (%abbrev, %full, %ulb);

find( sub { push @filesToRun, $File::Find::name if ( m/^(.*)$filePattern$/ && !m/(LICENSE|README|intro|index|tR-.*|About.*|t[HN]-.*|ISSUE.*)\.$filePattern$/ ) }, $repoPath) ;
my %toDummy = (" \\.\\.\\. ", ".*", "\\?", "QM", "\"", "QD", "\'", "QS", "\!", "XM", "\\(", "QOXP", "\\)", "QCP");
my %fromDummy = ("\\.\\*", " ... ", "QM", "?", "QD", "\"", "QS", "'", "XM", "!", "QOXP", "(", "QCP", ")");


open LOG, ">Logs${d}log.log" or die;
#say LOG "\$repoPath: $repoPath\n\@filesToRun: @filesToRun";die;

#open output
open OUT, ">Temp${d}mismatched_snippets.html" or die;

#Read in ULB

ReadData();
ReadULB();
ProcessFiles();

sub GetUserDefaults {
	say "Reading user defaults";
	my ($ptte, $tNrepo);
	open (my $defaults, "<:utf8", "$Bin${d}User${d}$udf") or die "$Bin${d}User${d}$udf:\n$!";

		while (my $thisLine = <$defaults>) {
			chomp $thisLine;
			if ($thisLine =~ /^translationNotes path: (.*)$/) {
				$tNrepo = $1
			} elsif ($thisLine =~ /^Repository directory: (.*)$/) {
				$repoPath = "$1";
			}
		}
		$repoPath = "$repoPath${d}$tNrepo";
		die "No repo path found" if $repoPath eq "";

	close $defaults;
}


sub ReadData {
	say "Reading Bible book names and abbreviations";
	while (<DATA>) {
		chomp;
		if (/([^\t]*)\t([^\t]*)\t(.*)/) {
			($abbrev{$3}, $full{$2}) = ($2, $3)
		}
	}
	#foreach my $key (sort keys %abbrev) {say LOG "$key\t$abbrev{$key}"}
	#foreach my $key (sort keys %full) {say LOG "$key\t$full{$key}"}
}

sub ReadULB {
	say "Reading ULB";
	open IN, "Temp${d}ULB_text.txt" or die;
		my ($checkText, $thisChunkText, $thisRef, $thisBook, $thisChap, $thisVerse, $id, $thisText, $tempText, $thisKey);
		while (<IN>) {
			chomp;
			#say LOG ">$_<";
			if (/^([^\t]*)\t(.*)$/) {
				my ($tempID, $tempText) = ($1, $2);
				#say LOG "<$tempID>\t|$tempText|";
						#say LOG "\$id, \$thisText\t$id, $thisText";
					($id) = ($tempID);
					if ($id =~ /^([^:]*) (\d+):(\d+)/) {
						($thisBook, $thisChap, $thisVerse) = ($1, $2, $3);
						if ($thisBook ne "Psalms") {
							while (length $thisChap < 2) {$thisChap =~ s/^/0/};
							while (length $thisVerse < 2) {$thisVerse =~ s/^/0/}
						} else {
							while (length $thisChap < 3) {$thisChap =~ s/^/0/};
							while (length $thisVerse < 3) {$thisVerse =~ s/^/0/}
						}
						$thisBook = $abbrev{$thisBook};
				}
				$id = "$thisBook${d}$thisChap${d}$thisVerse";
				$tempText =~ s/\\f \+.*?\\f\*//g;
				#say LOG $tempText;
				$tempText =~ s/ {2,}/ /g;
				$ulb{$id} .= "$tempText ";
				$ulb{$id} =~ s/— /—/g;
				$ulb{$id} =~ s/ —/—/g;
				# say LOG "\$id = $id\n\$ulb{$id} = $ulb{$id}";
			}
		}
	close IN;
	#say LOG "Hi";
	#foreach my $key (sort keys %ulb) {say LOG "|$key|\t<$ulb{$key}>"}
}

#	assign passages as values to chunk keys

#Read in each file
sub ProcessFiles {
	say "Processing files";
	my $oldBook;
	foreach my $slice (@filesToRun) {
		if ($^O ne "linux" && $^O ne "darwin") {$slice =~ s!\/!\\!g}
		say LOG ">>\$slice: $slice<<";
		my ($thisText, $thisNote, $textReserved, $curRef, $tb, $ct, $vt, $anchor);
		if ($slice =~ /^.*\/(([^\.]*)\/([^\.]*)\/([^\.]*)).md$/ || $slice =~ /^.*\\(([^\.]*)\\([^\.]*)\\([^\.]*)).md$/) {
			$curRef = $1;
			$anchor = $1;
			($tb, $ct, $vt) = ($2, $3, $4);
			$tb = $full{$tb};
			if ($tb ne $oldBook) {
				say $tb;
				$oldBook = $tb
			}
			$ct =~ s/^0+//;
			$vt =~ s/^0+//;
			say LOG ">3>$anchor > $tb $ct:$vt<3<";
			$thisText = $ulb{$anchor};
			say LOG ">5>\$anchor: $anchor; \$thisText:\n$thisText<5<";
		}
		#my $tN = read_file("$slice", binmode => 'utf8') or die "|$slice|\n$!";
		my $tN = read_file("$slice", binmode => 'utf8');
		#my $tN = read_file("$slice", binmode => 'utf8') or next DoFile;
		say LOG ">6>\$slice: $slice; \$tN:\n$tN<6<";
		foreach my $key (sort keys %toDummy) {
			#say LOG "$key\t|$toDummy{$key}|";
			$tN =~ s/$key/$toDummy{$key}/g;
			$thisText =~ s/$key/$toDummy{$key}/g;
		}
		$tN =~ s/# ((General Information|Connecting Statement|translationWords|Informasi Umum):?)[^\r\n]*\r?\n//g;
		$tN =~ s/\* \[\[[^\r\n]*\r?\n//g;
		$tN =~ s/(#[^\r\n]*\r?\n)[^\r\n]*\r?\n[^\r\n]*\r?\n/$1/g;
		say LOG ">7>\n\$curRef: $curRef\n\$thisText:$thisText\n\$tN: $tN\n<7<";
		$tN =~ s/\\\*//g;
		#$tN =~ s/\*//g;
		$tN =~ s/\n#{2,} /\n/g;
		$tN =~ s/\[\[[^\]]*\]\]//g;
		$tN =~ s/(\x{A0})+\n/\n/g;
		while ($tN =~ /# ([^\r\n]*)\r?\n/g) {
			$thisNote = $1;
			$thisNote =~ s/^ +//;
			$thisNote =~ s/ +$//;
			say LOG ">8>\t\$thisNote: $thisNote\n\$thisText: $thisText<8<";
			$thisText =~ s/ {2,}/ /g;
			say LOG ">9>\t>\t|$thisNote|\n$thisText<9<";
			unless ($thisText =~ /$thisNote/) {
				say LOG ">A>\n$tb $ct:$vt\n$slice\n$thisNote\n$thisText\n<A<";
				foreach my $key (sort keys %fromDummy) {
					$thisNote =~ s/$key/$fromDummy{$key}/g;
					$thisText =~ s/$key/$fromDummy{$key}/g;
				}
				push @array, "\n<p><span style=\"color:red;\">$tb $ct:$vt</span></p>\n<p><b>$slice</b></p>\n<p><i>$thisNote</i></p>\n<p>$thisText</p>";
				#push @array, "\t$thisNote";
				}
		}
	}
}

print OUT "<?xml version=\"1.0\" encoding=\"utf-8\"?>
<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Strict//EN\"
        \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd\">
<html xmlns=\"http://www.w3.org/1999/xhtml\" xml:lang=\"en\" lang=\"en\">
<head>
	<meta http-equiv=\"content-type\" content=\"text/html; charset=utf-8\" />
	<title>Mismatched Snippets</title>
	<meta name=\"generator\" content=\"BBEdit 8.5\" />
</head>
<body>
@array
</body>
</html>";
close OUT;

close LOG;

say "Done.";

#
# Indonesian data
#__DATA__
#1	gen	KEJADIAN
#2	exo	KELUARAN
#3	lev	IMAMAT
#4	num	BILANGAN
#5	deu	ULANGAN
#6	jos	YOSUA
#7	jdg	HAKIM-HAKIM
#8	rut	RUT
#9	1sa	1 SAMUEL
#10	2sa	2 SAMUEL
#11	1ki	1 RAJA-RAJA
#12	2ki	2 RAJA-RAJA
#13	1ch	1 TAWARIKH
#14	2ch	2 TAWARIKH
#15	ezr	EZRA
#16	neh	NEHEMIA
#17	est	ESTER
#18	job	AYUB
#19	psa	MAZMUR
#20	pro	AMSAL
#21	ecc	PENGKHOTBAH
#22	sng	KIDUNG AGUNG
#23	isa	YESAYA
#24	jer	YEREMIA
#25	lam	RATAPAN
#26	ezk	YEHEZKIEL
#27	dan	DANIEL
#28	hos	HOSEA
#29	jol	YOEL
#30	amo	AMOS
#31	oba	OBAJA
#32	jon	YUNUS
#33	mic	MIKHA
#34	nam	NAHUM
#35	hab	HABAKUK
#36	zep	ZEFANYA
#37	hag	HAGAI
#38	zec	ZAKHARIA
#39	mal	MALEAKHI
#41	mat	MATIUS
#42	mrk	MARKUS
#43	luk	LUKAS
#44	jhn	YOHANES
#45	act	KISAH PARA RASUL
#46	rom	ROMA
#47	1co	1 KORINTUS
#48	2co	2 KORINTUS
#49	gal	GALATIA
#50	eph	EFESUS
#51	php	FILIPI
#52	col	KOLOSE
#53	1th	1 TESALONIKA
#54	2th	2 TESALONIKA
#55	1ti	1 TIMOTIUS
#56	2ti	2 TIMOTIUS
#57	tit	TITUS
#58	phm	FILEMON
#59	heb	IBRANI
#60	jas	YAKOBUS
#61	1pe	1 PETRUS
#62	2pe	2 PETRUS
#63	1jn	1 YOHANES
#64	2jn	2 YOHANES
#65	3jn	3 YOHANES
#66	jud	YUDAS
#67	rev	WAHYU
# English data
__DATA__
01	gen	Genesis
02	exo	Exodus
03	lev	Leviticus
04	num	Numbers
05	deu	Deuteronomy
06	jos	Joshua
07	jdg	Judges
08	rut	Ruth
09	1sa	1 Samuel
10	2sa	2 Samuel
11	1ki	1 Kings
12	2ki	2 Kings
13	1ch	1 Chronicles
14	2ch	2 Chronicles
15	ezr	Ezra
16	neh	Nehemiah
17	est	Esther
18	job	Job
19	psa	Psalms
20	pro	Proverbs
21	ecc	Ecclesiastes
22	sng	Song of Songs
23	isa	Isaiah
24	jer	Jeremiah
25	lam	Lamentations
26	ezk	Ezekiel
27	dan	Daniel
28	hos	Hosea
29	jol	Joel
30	amo	Amos
31	oba	Obadiah
32	jon	Jonah
33	mic	Micah
34	nam	Nahum
35	hab	Habakkuk
36	zep	Zephaniah
37	hag	Haggai
38	zec	Zechariah
39	mal	Malachi
41	mat	Matthew
42	mrk	Mark
43	luk	Luke
44	jhn	John
45	act	Acts
46	rom	Romans
47	1co	1 Corinthians
48	2co	2 Corinthians
49	gal	Galatians
50	eph	Ephesians
51	php	Philippians
52	col	Colossians
53	1th	1 Thessalonians
54	2th	2 Thessalonians
55	1ti	1 Timothy
56	2ti	2 Timothy
57	tit	Titus
58	phm	Philemon
59	heb	Hebrews
60	jas	James
61	1pe	1 Peter
62	2pe	2 Peter
63	1jn	1 John
64	2jn	2 John
65	3jn	3 John
66	jud	Jude
67	rev	Revelation