Now needed for tagging

2020-07-28 17:34:00 -04:00 · 2020-07-28 17:34:00 -04:00 · 91d789da65
parent 13b8c8c948
commit 91d789da65
8 changed files with 1315 additions and 0 deletions
--- a/Build_OL_files_from_XML.pl
+++ b/Build_OL_files_from_XML.pl
@ -0,0 +1,180 @@
 # Builds easily searchable file from current OGNT and MAST-HB XML file
 # Takes verse at a time from slurped file
 # Useful for Mine routine building MAST PDF
 use 5.18.0;
 use File::Slurp;
 use File::Find ; 
 use Cwd ; 
 use utf8;
 #use open IN => ":utf8", OUT => ":utf8";
 use open IO => ":utf8";
 open LOG, ">:utf8", "Logs/log.txt" or die;
 open OUT, ">:utf8", "Output/Original_languages.txt" or die;
 my (@folders) = ("/Users/Henry/Documents/WACS/MAST_HB", "/Users/Henry/Documents/WACS/OGNT");
 my (%order, %long);
 my $outText;
 while (<DATA>) {
 	chomp;
 	if (/^([^\t]*)\t([^\t]*)\t(.*)$/) {
 		$order{$1} = $3;
 		$long{$2} = $3;
 	}
 }
 #foreach my $key (sort keys %long) {
 #	say LOG $key . "\t" . $long{$key};
 #}
 foreach my $folder (@folders) {
 	say LOG "$folder";
 	#system "cd $folder;xml val *.xml;echo 'Continue? (Control + C to quit, Enter to continue)';read name;";
 	my ($topDir, $lang) = ($folder, "H");
 	if ($folder =~ /OGNT/) {$lang = "G"}
 	my @filesToRun = ();
 	my $filePattern = '*.xml' ; 
 	find( sub { push @filesToRun, $File::Find::name if ( m/^(.*)$filePattern$/ ) }, $topDir) ;
 	@filesToRun = sort @filesToRun;
 	foreach  my $file ( @filesToRun  ) {
 	   say LOG $file;
 	   my $fileText = read_file("$file", binmode => 'utf8');
 	   my ($bk, $ch, $vs, $lemma, $word, $nbk, $nch, $nvs, $previous, $current, $interruption, $next, $verse, $thisBookText, $prevVsText, $holdText, $thisVsText, $nextVsText, $oldHold, $shortCur, $shortIntr);
 	   while ($fileText =~ /<verse osisID="((.*?)\.(\d+)\.(\d+))"(\n|.)*?<\/verse>/spg) {
 			$verse = $&;
 			say LOG "\$1: $1, \$2: $2, \$3: $3, \$4: $4";
 			($shortCur, $bk, $ch, $vs) = ($1, $long{$2}, $3, $4);
 			$previous = $current;
 			$current = "$bk $ch:$vs";
 			say LOG "<0>\t\$current: $current";
 			my $verseText;
   			if ($verse =~ /<note>KJV:(([^\.]*)\.([^\.]*).([^<]*))<\/note>/p) { # Occurs only in OT
 				say LOG "<1>\t$&";
 				($shortIntr, $nbk, $nch, $nvs) = ($1, $long{$2}, $3, $4);
 				$interruption = "$nbk $nch:$nvs";
 				say LOG "<2>\t\$interruption: $interruption (of $current)";
 				if ($verse =~ /<verse osisID="$shortCur">\n[^<\n]*<note>KJV:$shortIntr<\/note>/) { # Complete renumber of verse
 					say LOG "<3>\t$&";
 					$current = $interruption;
 					$verseText = GetContent($verse);
 					$verseText = "$current\t$oldHold$verseText";
 					$oldHold = "";
 				}
 				elsif ($interruption ne $current && $verse =~ /<note>KJV:([^\.]*)\.([^\.]*).([^<]*)<\/note>/p) { # New verse begins here
 					say LOG "<4>\t$&";
 					($thisVsText, $nextVsText) = (${^PREMATCH}, ${^POSTMATCH});
 		   			$thisVsText = GetContent($thisVsText);
 					$nextVsText = GetContent($nextVsText);
 					$outText .= "$oldHold\n$current\t$thisVsText ";
 					$oldHold = "$nextVsText ";
 				}
 				elsif ($interruption eq $current && $verse =~ /<note>KJV:([^\.]*)\.([^\.]*).([^<]*)<\/note>/p) { # Previous verse continues here
 					say LOG "<5>\t$&";
 					($prevVsText, $thisVsText) = (${^PREMATCH}, ${^POSTMATCH});
 					$prevVsText = GetContent($prevVsText);
 					$thisVsText = GetContent($thisVsText);
 					$verseText .= "$oldHold\n$current\t$thisVsText";
 					$oldHold = "";
 				}
 			}
 			else {
 				# The whole verse should be processed in one piece
 				#$verseText = GetContent($verse);
 				#$verseText = "$current\$tverseText"
 			}
 			#$thisBookText .= "\n$verseText";
 			#$oldHold = $holdText
 	   }
 	   #$thisBookText =~ s/</<$lang/g;
 	   #$outText .= "$thisBookText\n";
 	} 
 }
 say OUT $outText;
 close OUT;
 close LOG;
 print "\n\tDone.";
 sub GetContent {
 	my ($text, $returnText) = ($_[0], "");
 	while ($text =~ /<w lemma="([^"]*)"[^>]*>([^<]*)<\/w>/) {
 		my ($lemma, $OL) = ($1, $2);
 		$lemma =~ s/[^\d"]*(\d+)[^\d"]*/$1/;
 		$returnText .= "$OL <$lemma> "
 	}
 	return $returnText
 }
 __DATA__
 01	gen	Genesis
 02	exo	Exodus
 03	lev	Leviticus
 04	num	Numbers
 05	deu	Deuteronomy
 06	jos	Joshua
 07	jdg	Judges
 08	rut	Ruth
 09	1sa	1 Samuel
 10	2sa	2 Samuel
 11	1ki	1 Kings
 12	2ki	2 Kings
 13	1ch	1 Chronicles
 14	2ch	2 Chronicles
 15	ezr	Ezra
 16	neh	Nehemiah
 17	est	Esther
 18	job	Job
 19	psa	Psalms
 20	pro	Proverbs
 21	ecc	Ecclesiastes
 22	sng	Song of Solomon
 23	isa	Isaiah
 24	jer	Jeremiah
 25	lam	Lamentations
 26	ezk	Ezekiel
 27	dan	Daniel
 28	hos	Hosea
 29	jol	Joel
 30	amo	Amos
 31	oba	Obadiah
 32	jon	Jonah
 33	mic	Micah
 34	nam	Nahum
 35	hab	Habakkuk
 36	zep	Zephaniah
 37	hag	Haggai
 38	zec	Zechariah
 39	mal	Malachi
 41	mat	Matthew
 42	mrk	Mark
 43	luk	Luke
 44	jhn	John
 45	act	Acts
 46	rom	Romans
 47	1co	1 Corinthians
 48	2co	2 Corinthians
 49	gal	Galatians
 50	eph	Ephesians
 51	php	Philippians
 52	col	Colossians
 53	1th	1 Thessalonians
 54	2th	2 Thessalonians
 55	1ti	1 Timothy
 56	2ti	2 Timothy
 57	tit	Titus
 58	phm	Philemon
 59	heb	Hebrews
 60	jas	James
 61	1pe	1 Peter
 62	2pe	2 Peter
 63	1jn	1 John
 64	2jn	2 John
 65	3jn	3 John
 66	jud	Jude
 67	rev	Revelation
--- a/Build_ULB_XML_for_Tagging.pl
+++ b/Build_ULB_XML_for_Tagging.pl
@ -0,0 +1,126 @@
 # Creates workable ULB.xml file that has all USFM markers in place.
 use 5.18.0;
 use File::Slurp;
 use File::Find ; 
 use Cwd ; 
 use utf8;
 #use open IN => ":utf8", OUT => ":utf8";
 use open IO => ":utf8";
 open(LOG, ">Logs/Log.txt") or die "$!";
 open(OUT, ">/Users/Henry/Documents/WACS/en_ulb_tagged/ULB_xml/ULB.xml") or die "$!";
 say OUT "<xml>";
 my ($topDir, $outDir) = ("/Users/Henry/Documents/WACS/en_ulb", "/Users/Henry/Documents/WACS/en_ulb_tagged/ULB_xml");
 my @filesToRun = ();
 my $filePattern = '\.usfm' ;
 #my $filePattern = '67-REV\.usfm' ;
 my $file;
 find( sub { push @filesToRun, $File::Find::name  if ( m/^(.*)$filePattern$/ ) }, $topDir) ;
@filesToRun = sort @filesToRun;
 ReadFiles();
 say OUT "</xml>";
 close OUT;
 close LOG;
 say "\nDone.";
 # =====
 sub ReadFiles {
 	foreach  $file ( @filesToRun  ) {
 		say $file;
 		my @array;
 		my $fileText = read_file("$file", binmode => 'utf8');
 		$fileText =~ s/[ \n]+$//;
 		say LOG "|$fileText|";
 #Delete \n
 		my ($book, $chap, $vers, $chapStart);
 		if ($fileText =~ /\\h ([^\n]*)/) {
 			$book = $1
 		}
 		#say LOG $book;
 		$fileText =~ s/\n/ /g;
 		$fileText =~ s/  / /g;
 		$fileText =~ s/\\s5/\n$&/g;
 		$fileText =~ s/\\v/√/g;
 		while ($fileText =~ s/(√[^√\n]*)(√)/$1\n$2/) {}
 		$fileText =~ s/√/\\v/g;
 		$fileText =~ s/(\\id[^\n]*)\n/\t\t<heading>$1<\/heading>\n/;
 		$fileText =~ s/ +\n/\n/g;
 		$fileText =~ s/(\\(q\d?|pi?|m|n?b))\n/\n$1 /g;
 		#say LOG $fileText;
 		@array = split /\n/, $fileText;
 		$fileText = "";
 		foreach my $line (@array) {
 			chomp;
 			if ($line =~ /<book name="(.*?)">/) {$book = $1;}
 			if ($line =~ /\\c (\d+).* \\v (\d+)/) {
 				($chap, $vers) = ($1, $2);
 				$line = "\t\t<chapter name=\"$book $chap\">\n\t\t\t<verse name=\"$book $chap:$vers\">$line</verse>";
 				$line = "\t\t</chapter>\n$line" if $chapStart;
 				$chapStart = 1;
 			}
 			elsif ($line =~ /\\v (\d+)/) {
 				$vers = $1;
 				$line = "\t\t\t<verse name=\"$book $chap:$vers\">$line</verse>"
 			}
 			#say LOG "===\n<AA>\n$line";
 			$line =~ s/(<verse[^>]*>)(.*\\v \d+ )(.*)(<\/verse>)/$1\n\t\t\t\t<preText>$2<\/preText>\n\t\t\t\t<text>$3<\/text>\n\t\t\t$4/s;
 			#say LOG "===\n<BB>\n$line";
 			if ($line =~ /<text>.*<\/text>/p) {
 				say LOG "<-0>\t$line";
 				my ($pre, $match, $post) = (${^PREMATCH}, ${^MATCH}, ${^POSTMATCH});
 				#say LOG "<-1>\t\$pre: $pre,\n\$match: $match,\n\$post: $post";
 				$match = TagInternalUSFM ($match);
 				$line = $pre . $match . $post;
 			}
 			say LOG "---\n<CC>\n$line\n===";
 			$line =~ s# +</#</#g;
 			$fileText .= $line . "\n";
 		}
 		say OUT "\t<book name=\"$book\">\n$fileText\t\t</chapter>\n\t</book>";
 	} 
 }
 sub TagInternalUSFM {
 	my ($line, $placeNum) = ($_[0], 1);
 	my %places;
 	#say LOG "Tagging internal USFM in \$line $line.";
 	while ($line =~ /(<text>.*)(\\f .*?\\f\*)(.*<\/text>)/g) {
 		#say LOG "<+1>\t$2";
 		$line =~ s/(<text>.*)(\\f .*?\\f\*)(.*<\/text>)/$1<place number="$placeNum"\/>$3/;
 		$places{$placeNum} = $2;
 		$placeNum ++;
 	}
 	#say LOG "<+2>\t$line";
 	while ($line =~ /(<text>.*)(\\qs .*?\\qs\*)(.*<\/text>)/g) {
 		#say LOG "<+3>\t$2";
 		$line =~ s/(<text>.*)(\\qs .*?\\qs\*)(.*<\/text>)/$1<place number="$placeNum"\/>$3/;
 		$places{$placeNum} = $2;
 		$placeNum ++;
 	}
 	#say LOG "<+4>\t$line";
 	while ($line =~ /(<text>.*)(\\([bm]|pi?|q\d?|s2))( .*<\/text>)/g) {
 		#say LOG "<+5>\t$2";
 		$line =~ s/(<text>.*)(\\([bm]|pi?|q\d?|s2))(.*<\/text>)/$1<place number="$placeNum"\/>$4/;
 		$places{$placeNum} = $2;
 		$placeNum ++;
 	}
 	#say LOG "<+6>\t$line";
 	$line =~ s/  / /g;
 	#say LOG "<+7>\t$line";
 	foreach my $place (sort keys %places) {
 		#say LOG "<+8>\tReplacing <place number=\"$place\"\/> with <usfm>$places{$place}<\/usfm> in\n$line.";
 		unless ($line =~ s/<place number="$place"\/>/<usfm>$places{$place}<\/usfm>/) {die}
 	}
 	say LOG "<+9>\t$line";
 	return $line;
 }
--- a/Check_ULB.pl
+++ b/Check_ULB.pl
@ -0,0 +1,113 @@
 # Checks ULB in ULB.xml against tagged ULB
 use 5.18.0;
 use File::Slurp;
 use File::Find ; 
 use Cwd ; 
 use utf8;
 #use open IN => ":utf8", OUT => ":utf8";
 use open IO => ":utf8";
 open(LOG, ">Logs/Log.txt") or die "$!";
 my ($ULBxml, $taggedULBDir) = ("/Users/Henry/Documents/WACS/Tips_and_Hacks/Tagged_OGNT/ULB_xml/ULB.xml", "/Users/Henry/Documents/WACS/Tips_and_Hacks/Tagged_OGNT/Manual_Tagging");
 my (@filesToRun) = ();
 my %fullName;
 my $filePattern = "\.xml" ;
 my $file;
 my $xmlText = read_file("$ULBxml", binmode => 'utf8');
 GetBooksToCheck();
 Compare();
 sub Compare {
 	foreach my $file (@filesToRun) {
 		say LOG "|$file|, |$taggedULBDir/$file.xml|";
 		my $taggedText = read_file("$taggedULBDir/$file.xml", binmode => 'utf8');
 		GetGist($file, $taggedText);
 	}
 }
 sub GetGist {
 	my ($fileName, $wholeTaggedText) = @_;
 	my ($verseRef, $standard, $tagged);
 	say LOG "|$fileName|, |$fullName{$fileName}|";
 	while ($wholeTaggedText =~ /<verse name="($fullName{$fileName} \d+:\d+)">((.|\n)*?)<preText>(.*?)<\/preText>((.|\n)*?)\n\t+((<w ((.|\n)*?)\n)*)\t+<\/verse>/sg) {
 		my ($preText, $gist) = ($4, $7);
 		$verseRef = $1;
 		say LOG "\$verseRef: |$verseRef|";
 		if ($xmlText =~ /<verse name="$verseRef">\n\t+<preText>([^\n]*)<\/preText>\n\t+<text>([^\n]*)<\/text>\n\t+<\/verse>/s) {
 			my ($standardPT, $standardT) = ($1, $2);
 			($tagged) = Untag($preText, $gist);
 			#say LOG $tagged;
 			$standard = $standardPT . " " . $standardT;
 			$standard =~ s/<[^<>]*>//g;
 			$standard =~ s/ {2,}/ /g;
 			$standard =~ s/ +$//;
 			if ($standard ne $tagged) {
 				say LOG "\nMISMATCH:\n\$standard\n$standard\n\$tagged\n$tagged\n"
 			}
 		}
 	}
 }
 sub Untag {
 	my ($pre, $txt) = ($_[0], $_[1]);
 	#say LOG "\$pre: $pre\n\$txt: $txt";
 	$txt =~ s/[\t\n]/ /g;
 	$txt =~ s/(√|<[^<>]*>)//g;
 	$txt = $pre . " " . $txt;
 	$txt =~ s/ {2,}/ /g;
 	$txt =~ s/ +$//;
 	return $txt;
 }
 sub GetBooksToCheck {
 	while (<DATA>) {
 		chomp;
 		unless (/^#/) {
 			if (/([^\t]*)\t([^\t]*)/) {
 				my ($file, $book) = ($1, $2);
 				say "|$file|";
 				push @filesToRun, "$file";
 				$fullName{$file} = $book;
 			}
 		}
 	}
 }
 close LOG;
 say "\nDone.";
 # =====
 __DATA__
 #41-MAT	Matthew
 #42-MRK	Mark
 #43-LUK	Luke
 #44-JHN	John
 #45-ACT	Acts
 #46-ROM	Romans
 #47-1CO	1 Corinthians
 #48-2CO	2 Corinthians
 #49-GAL	Galatians
 #50-EPH	Ephesians
 #51-PHP	Philippians
 #52-COL	Colossians
 #53-1TH	1 Thessalonians
 #54-2TH	2 Thessalonians
 #55-1TI	1 Timothy
 #56-2TI	2 Timothy
 57-TIT	Titus
 #58-PHM	Philemon
 #59-HEB	Hebrews
 #60-JAS	James
 #61-1PE	1 Peter
 #62-2PE	2 Peter
 #63-1JN	1 John
 #64-2JN	2 John
 #65-3JN	3 John
 #66-JUD	Jude
 #67-REV	Revelation
--- a/Construct_tagged_ULB_XML_files_from_unified_ULB_XML_and_tWs_and_OGNT.pl
+++ b/Construct_tagged_ULB_XML_files_from_unified_ULB_XML_and_tWs_and_OGNT.pl
@ -0,0 +1,410 @@
 # Takes current tW entries and populates tagged OGNT XML
 # 
 # This is the current best version
 #  Requires ULB that includes USFMs.
 use 5.12.0;
 use File::Slurp;
 use File::Find ; 
 use Cwd ; 
 use utf8;
 #use open IN => ":utf8", OUT => ":utf8";
 use open IO => ":utf8";
 $" = "\n";
 mkdir "Logs";
 open(LOG, ">Logs/tW_pairs.txt") or die "$!";
 my $ULBfile = "/Users/Henry/Documents/WACS/en_ulb_tagged/ULB_xml/ULB.xml";
 my $topDirOGNT = "/Users/Henry/Documents/WACS/OGNT";
 #my $topDirOGNT = "/Users/Henry/Documents/WACS/en_ulb_tagged/Tag_test";
 my $topDirtW = "/Users/Henry/Documents/WACS/en_tw/bible";
 my ($outDir, $outFile) = ("/Users/Henry/Documents/WACS/en_ulb_tagged/Auto-tagged", "");
 my ($ULBText, $workText, $language);
 my ($file);
 my (%ULBtextThisVerse, %ULBpreTextThisVerse, %SNsThisVerse, %entriesThisSN, %longName);
 my @OGNTfilesToRun = ();
 #my $filePattern = '\.xml' ;
 my $filePattern = '52-COL\.xml' ;
 find( sub { push @OGNTfilesToRun, $File::Find::name  if ( m/^(.*)$filePattern$/ ) }, $topDirOGNT) ;
 say LOG "\@OGNTfilesToRun:\n@OGNTfilesToRun\n";
 my @tWfilesToRun = ();
 $filePattern = '.md' ;
 find( sub { push @tWfilesToRun, $File::Find::name  if ( m/^(.*)$filePattern$/ ) }, $topDirtW) ;
 Read_tW_Files();
 close LOG;
 open(LOG, ">Logs/Log.txt") or die "$!";
 LongBookNames();
 Prepare_ULB_file();
 say LOG "Prepare_ULB_file done.\n\@OGNTfilesToRun:\n@OGNTfilesToRun\n";
 ProcessXML();
 #	put unused SN at end of verse
 close LOG;
 say "\nDone.";
 # =====
 sub Read_tW_Files {
 	foreach  $file ( @tWfilesToRun  ) {
 		say LOG $file;
 		my (@sns);
 		my $entries;
 		my $fileText = read_file("$file", binmode => 'utf8');
 		if ($fileText =~ /\* Strong's: ([^\n]*)\n/) {
 			my $sns = $1;
 			#say LOG "\t$sns";
 			@sns = split /, /, $sns;
 		}
 		if ($fileText =~ /Forms Found in the English ULB:\n\n([^\n]*)\n/) {
 			$entries = $1;
 			die "$fileText" if $entries eq "";			
 			#say LOG "\t\t$entries"
 		}
 		foreach my $sn (@sns) {
 			$entriesThisSN{$sn} .= $entries . ", ";
 			#say LOG "\t\t\t$sn: $entriesThisSN{$sn}"
 		}
 	}
 	foreach my $sn (sort keys %entriesThisSN) {
 		#say LOG "$sn: $entriesThisSN{$sn}";
 		my @entries = split /, /, $entriesThisSN{$sn};
 		@entries = reverse sort {  substr($a,0,1) <=> substr($b,0,1)
 			|| length($a) <=> length($b)
 			|| $a <=> $b }
 		@entries;
 		$entriesThisSN{$sn} = "";
 		foreach my $slice (@entries) {
 			$entriesThisSN{$sn} .= "$slice, "
 		}
 		$entriesThisSN{$sn} =~ s/, $//;
 		say LOG "$sn: $entriesThisSN{$sn}";
 	}
 }
 sub LongBookNames {
 	while (<DATA>) {
 		chomp;
 		if (/([^\t]*)\t([^\t]*)\t([^\t]*)/) {
 			$longName{$2} = $3
 		}
 	}
 }
 sub ProcessXML {
 	foreach my $file (@OGNTfilesToRun) {
 		my $greekText;
 		my $fileGist;
 		if ($file =~ /((..)....\.xml)/) {
 			($fileGist, $language) = ($1, $2);
 			if ($language > 40) {
 				$language = "G"
 			} else {$language = "H"}
 		}
 		say LOG "<0>\t$file \t$fileGist";
 		open(OUT, ">$outDir/$fileGist") or die "$outDir/$fileGist: $!";
 		my ($pre, $gist, $post, $bk, $ch, $vs, $thisVerse, $staticText, $residueText, $matchedLines, $flag, $thisVerseForOutput,
 			$linesWithRelevantSNs, $linesNotMatched, $orderedOutputLines, $linesToSkip, $thisPreText);
 		open (my $thisFile, "<:utf8", "$file") or die "$file:\n$!";
 		my ($originalLinesCount, $rsnCount, $skipCount, $noRSNCount, $outCount);
 		while (my $thisLine = <$thisFile>) {
 			chomp $thisLine;
 			if ($thisLine =~ /<verse/) {say LOG "\n=========================="}
 			if ($thisLine =~ /<\/verse>/) {
 				say LOG "<0.1>\t$thisLine";
 				say LOG "<11>\n\$linesWithRelevantSNs\n$linesWithRelevantSNs\n\$linesToSkip\n$linesToSkip\$residueText\n$residueText";
 				($matchedLines, $residueText, $linesNotMatched) = ProcessRelevantSNs($linesWithRelevantSNs, $staticText, $residueText);
 				say LOG "<14>\t\$matchedLines\n$matchedLines\n\$linesNotMatched\n$linesNotMatched";
 				my %orderedLine;
 				$matchedLines =~ s/\n{2,}/\n/gs;
 				say LOG "<15\tBefore sort of \$matchedLines:\n$matchedLines\n";
 				while ($matchedLines =~ /([^◊]*)◊(\d*)\n/g) {
 					$orderedLine{$2} = $1;
 					say LOG "<5>\t\$2: $2\t\$1: $1";
 				}
 				$matchedLines = "";
 				foreach my $line (sort {$a <=> $b} keys %orderedLine) {
 					say LOG "<5.5>\t\$line: $line\t\$orderedLine{$line}: $orderedLine{$line}";
 					$matchedLines .= "$orderedLine{$line}\n"
 				}
 				chomp $matchedLines;
 				say LOG "<16>\tAfter sort of \$matchedLines:\n$matchedLines\n\$linesNotMatched\n$linesNotMatched";					
 				$residueText =~ s/<usfm>.*?<\/usfm>//g;
 				$residueText =~ s/(^q | q$)//g;
 				$residueText =~ s/ {3,}/  /g;
 				$residueText =~ s/^ +//;
 				$residueText =~ s/ +$/$1/;
 				$greekText =~ s/^ +//;
 				$greekText =~ s/ +$/$1/;
 				$staticText =~ s/^ +//;
 				$staticText =~ s/ +$/$1/;
 				my $internalUSFM;
 				$internalUSFM .= "\t\t\t\t\t$&\n" while ($staticText =~ /<usfm>.*?<\/usfm>/g);
 				$linesNotMatched =~ s/\n+$//;
 				$linesToSkip =~ s/\n+$//;
 				$matchedLines =~ s/^\n+//;
 				$internalUSFM =~ s/\n+$//;
 				say LOG "<17>\tAfter pruning \$matchedLines:\n$matchedLines\n\$linesNotMatched\n$linesNotMatched";					
 				say OUT "\t\t\t\t\t<Greek>$greekText</Greek>";
 				say OUT "\t\t\t\t\t<preText>$thisPreText</preText>";
 				say OUT "\t\t\t\t\t<ULB>$staticText</ULB>";
 				say OUT "\t\t\t\t\t<residue>$residueText</residue>";
 				say OUT "$matchedLines" unless ($matchedLines eq "");
 				say OUT "$linesNotMatched" unless ($linesNotMatched eq "");
 				say OUT "$internalUSFM" unless ($internalUSFM eq "");
 				say OUT "$linesToSkip" if ($linesToSkip);
 				say OUT "$thisLine";
 				($originalLinesCount, $rsnCount, $skipCount, $noRSNCount, $outCount) = ();
 				($thisVerseForOutput, $flag, $workText, $greekText, $linesNotMatched, $linesToSkip, $residueText, $orderedOutputLines, $linesWithRelevantSNs) = ();
 				($linesToSkip) = ("");
 			}
 			elsif ($thisLine =~ /<w /) {
 				say LOG "<0.2>\t$thisLine";
 				$originalLinesCount ++;
 				if ($thisLine =~ />([^\n<>]*)</) {
 					$greekText .= $1 . " "
 				}
 				$thisLine =~ s/(<w .*)>([^<]*)(<\/w>)/$1 text="$2">$3/;
 				if ($thisLine =~ /lemma="(\d+)"/) {
 					my $thisLemma = $language . $1;
 					if (exists $entriesThisSN{$thisLemma}) {
 						$rsnCount ++;
 						$linesWithRelevantSNs .= $thisLine . "\n";
 						say LOG "<0.2.1>\t\$thisLemma: $thisLemma; line pushed to \$linesWithRelevantSNs";
 					}
 					else {
 						$skipCount ++;
 						$thisLine =~ s/><\/w>/>√<\/w>/;
 						$linesToSkip .= "$thisLine\n";
 						#say LOG "<0.2.2>\t\$thisLemma: $thisLemma; line pushed to \@LinesToSkip";
 					}
 				}
 			}
 			elsif ($thisLine =~ /<verse osisID="(.*?)\.(.*?)\.(.*?)">/) {
 				say LOG "<0.3>\t$thisLine";
 				($bk, $ch, $vs) = ($1,$2,$3);
 				($thisVerse, $greekText) = ("$longName{$bk} $ch:$vs", "");
 				$staticText = $ULBtextThisVerse{$thisVerse};
 				$residueText = "q $staticText q";
 				$thisPreText = $ULBpreTextThisVerse{$thisVerse};
 				say OUT "\t\t\t\t<verse name=\"$thisVerse\">";
 				($flag) = (1);
 			}
 			else {say OUT $thisLine}
 		}
 		close $thisFile;
 		close OUT;
 	}
 }
 sub ProcessRelevantSNs {
 	my ($relevantLines, $staticText, $residueText, $linesNotMatched) = (@_);
 	my ($matchedLines, $thisLine);
 	my @relevantLines = split /\n/, $relevantLines;
 	foreach my $line (@relevantLines) {
 		if ($line =~ /lemma="(\d+)"/) {
 			my $thisSN = $language . $1;
 			say LOG "<12>\t\$line: $line, \$thisSN: $thisSN, \$entriesThisSN{$thisSN}\n$entriesThisSN{$thisSN}";
 			($thisLine, $residueText, $linesNotMatched) = MatchAndPlace($line, $thisSN, $staticText, $residueText, $linesNotMatched);
 			$thisLine =~ s/[ \t]+$//;
 			$matchedLines .= $thisLine . "\n";
 			$matchedLines =~ s/\n{2,}$/\n/s;
 			say LOG "<13>\t\$matchedLines\n$matchedLines\n\$linesNotMatched\n$linesNotMatched+++"
 		}
 	}
 	return ($matchedLines, $residueText, $linesNotMatched);
 }
 sub MatchAndPlace {
 	my ($line, $sn, $staticText, $workText, $linesNotMatched) = @_;
 	#say LOG "<8>\t\$line: $line	 \$sn: $sn	 \$workText\n$workText";
 	my ($workEntry, $found, $matchedLines, $first, $second, $third, $firstLen, $secondLen, $thirdLen);
 	my @entries = split /, /, $entriesThisSN{$sn};
 	foreach my $entry (@entries) {
 		my $entryType;
 		if ($entry =~ /^(.*) \.\.\. (.*) \.\.\. (.*)$/) {
 			($first, $second, $third) = ($1, $2, $3);
 			($firstLen, $secondLen, $thirdLen) = (length $first, length $second, length $third);
 			$workEntry = "\\b" . $first . "\\b" . ".*?" . "\\b" . $second . "\\b" . ".*?" . "\\b" . $third;
 			say LOG "<1a>\t\$first: $first,	 \$second: $second,	 \$third: $third,	 \$firstLen: $firstLen,	 \$secondLen,: $secondLen,	 \$thirdLen: $thirdLen	\$entry: |$entry|\t\$workEntry: |$workEntry|";
 			$entryType = 1;
 		}
 		elsif ($entry =~ /^(.*) \.\.\. (.*)$/) {
 			($first, $second) = ($1, $2);
 			($firstLen, $secondLen) = (length $first, length $second);
 			$workEntry = "\\b" . $first . "\\b" . ".*?" . "\\b" . $second . "\\b";
 			say LOG "<2a>\t\$first: $first,	 \$second: $second,	 \$third: $third,	 \$firstLen: $firstLen,	 \$secondLen,: $secondLen,	 \$entry: |$entry|\t\$workEntry: |$workEntry|";
 			$entryType = 2;
 		}
 		else {$workEntry = $entry;}
 		my $foundText;
 		#say LOG "<8.1>\t\$entryType: $entryType\t\$entry: $entry\t\$workEntry: $workEntry";
 		if ($workText =~ /\b$workEntry\b/p) {
 			say LOG "<8.1>Found: \t\$entryType: $entryType\t\$entry: $entry\t\$workEntry: $workEntry";
 				($foundText, $workText) = ($&, "${^PREMATCH}ı${^POSTMATCH}");
 				my ($place, $foundTextLength, $replacementSpaces) = (length ${^PREMATCH}, length $foundText, "");
 				$line =~ s/></>$entry</;
 				while (length $replacementSpaces < $foundTextLength) {$replacementSpaces .= " "}
 				if ($entryType) {
 					say LOG "<8.2>\n\$workText,: $workText,	 \$matchedLines:\n$matchedLines	";
 					($workText) = FixWorkText($line, $workText, $workEntry, $foundText, $foundTextLength, $first, $firstLen, $second, $secondLen, $third, $thirdLen);
 					say LOG "<8.3>\n\$workText:\n$workText\n\$matchedLines:\n$matchedLines";
 				}
 				else {$workText =~ s/ı/$replacementSpaces/;}
 				$matchedLines .= "$line◊$place";
 				say LOG "<8.4>\tAfter found, new \$workText:\n$workText";
 				$found = 1;
 			}
 		else {
 			#say LOG "\$workEntry $workEntry not found"
 		}
 		if ($found) {
 			last
 		}
 	}
 	unless ($found) {
 		$line =~ s/></>?</;
 		$linesNotMatched .= "$line\n"
 	}
 	return ($matchedLines, $workText, $linesNotMatched)
 }
 sub FixWorkText {
 	my ($thisLine, $text, $entry, $foundText, $foundTextLength, $first, $firstLen, $second, $secondLen, $third, $thirdLen) = @_;
 	my ($firstSpace, $secondSpace, $thirdSpace);
 	while (length $firstSpace < $firstLen) {$firstSpace .= " "}
 	while (length $secondSpace < $secondLen) {$secondSpace .= " "}
 	while (length $thirdSpace < $thirdLen) {$thirdSpace .= " "}
 	say LOG "<9>\$text:\n$text\n\t\t\$entry: $entry	\$foundText: $foundText\t \$foundTextLength: $foundTextLength\t\$first: $first\t\$second: $second\t\$third: $third\n\$firstSpace: $firstSpace\t\$secondSpace: $secondSpace\t\$thirdSpace: $thirdSpace";
 	if ($third) {
 		if ($foundText =~ /$first(.*)$second(.*)$third/) {
 			my ($firstGap, $secondGap) = ($1, $2);
 			my $repText = "$firstSpace$firstGap$secondSpace$secondGap$thirdSpace";
 			say LOG "<9.1> \$repText: $repText";
 			$text =~ s/ı/$repText/;
 		}
 	}
 	else {
 		if ($foundText =~ /$first(.*)$second/) {
 			my ($firstGap) = ($1);
 			say LOG "<9.2>\t\$firstSpace: |$firstSpace|\t\$firstGap: |$firstGap|\t\$secondSpace: |$secondSpace|";
 			my $repText ="$firstSpace$firstGap$secondSpace";
 			say LOG "<9.3> \$repText: |$repText|";
 			$text =~ s/ı/$repText/;
 		}
 	}
 	return ($text)
 }
 sub Prepare_ULB_file {
 	my $thisVerse;
 	#$ULBText = read_file($ULBfile, binmode => 'utf8');
 	#while ($ULBText =~ /<verse name="(.*?)">\n<preText>(.*?)<\/preText>\n.*<text>(.*?)<\/text>.*<\/verse>/sg) {
 	#	($ULBtextThisVerse{$1}, $ULBpreTextThisVerse{$1}) = ($3, $2);
 	#}
 	#
 	open (my $file, "<:utf8", "$ULBfile") or die "$ULBfile:\n$!";
 		while (my $thisLine = <$file>) {
 			chomp $thisLine;
 			if ($thisLine =~ /verse name="(.*?)"/) {
 				$thisVerse = $1;
 				#say LOG "$thisVerse:\n$thisLine"
 			}
 			elsif ($thisLine =~ /<preText>(.*?)<\/preText>/) {
 				$ULBpreTextThisVerse{$thisVerse} = $1;
 				#say LOG "$thisVerse:\n$ULBpreTextThisVerse{$thisVerse}"
 			}
 			elsif ($thisLine =~ /<text>(.*?)<\/text>/) {
 				$ULBtextThisVerse{$thisVerse} = $1;
 				#say LOG "$thisVerse:\n$ULBtextThisVerse{$thisVerse}"
 			}
 		}
 	close $file;
 }
 __DATA__
 01	gen	Genesis
 02	exo	Exodus
 03	lev	Leviticus
 04	num	Numbers
 05	deu	Deuteronomy
 06	jos	Joshua
 07	jdg	Judges
 08	rut	Ruth
 09	1sa	1 Samuel
 10	2sa	2 Samuel
 11	1ki	1 Kings
 12	2ki	2 Kings
 13	1ch	1 Chronicles
 14	2ch	2 Chronicles
 15	ezr	Ezra
 16	neh	Nehemiah
 17	est	Esther
 18	job	Job
 19	psa	Psalms
 20	pro	Proverbs
 21	ecc	Ecclesiastes
 22	sng	Song of Solomon
 23	isa	Isaiah
 24	jer	Jeremiah
 25	lam	Lamentations
 26	ezk	Ezekiel
 27	dan	Daniel
 28	hos	Hosea
 29	jol	Joel
 30	amo	Amos
 31	oba	Obadiah
 32	jon	Jonah
 33	mic	Micah
 34	nam	Nahum
 35	hab	Habakkuk
 36	zep	Zephaniah
 37	hag	Haggai
 38	zec	Zechariah
 39	mal	Malachi
 41	mat	Matthew
 42	mrk	Mark
 43	luk	Luke
 44	jhn	John
 45	act	Acts
 46	rom	Romans
 47	1co	1 Corinthians
 48	2co	2 Corinthians
 49	gal	Galatians
 50	eph	Ephesians
 51	php	Philippians
 52	col	Colossians
 53	1th	1 Thessalonians
 54	2th	2 Thessalonians
 55	1ti	1 Timothy
 56	2ti	2 Timothy
 57	tit	Titus
 58	phm	Philemon
 59	heb	Hebrews
 60	jas	James
 61	1pe	1 Peter
 62	2pe	2 Peter
 63	1jn	1 John
 64	2jn	2 John
 65	3jn	3 John
 66	jud	Jude
 67	rev	Revelation
--- a/Find_first_occurrence_of_proper_nouns.pl
+++ b/Find_first_occurrence_of_proper_nouns.pl
@ -0,0 +1,59 @@
 use 5.12.0;
 use File::Slurp;
 use File::Find ; 
 use Cwd ; 
 my %location;
 open LOG, ">log/log.log" or die;
 open(IN, "/Users/Henry/Documents/WACS/Tips_and_Hacks/MAST_tW_PDF_Updater/FilesForUpdates/Temp/ULB_text.txt") or die "$!";
 say "Reading ULB";
 while (<IN>) {
 	#print LOG "$_";
 	chomp;
 	while (s/^([^\n\t]*)\t([^\n]*?)([A-Z][a-z]+(-[A-Z][a-z]+)?)/$1\t$2/) {
 	#	say LOG $3;
 		unless (exists $location{$3}) {$location{$3} = $1}
 	}
 }
 close IN;
 say "Outputting hash";
 open(OUT, ">out/results.txt") or die "$!";
 foreach my $word (sort keys %location) {
 	say OUT "$word, $location{$word}";
 }
 close OUT;
 say "Deleting common words";
 my $fileText = read_file("/Users/Henry/Documents/WACS/Tips_and_Hacks/MAST_tW_PDF_Updater/FilesForUpdates/Temp/ULB_text.txt", binmode => 'utf8');
 foreach my $word (sort keys %location) {
 	my $temp = lc $word;
 	#say LOG $word . "\t" . $temp;
 	if ($fileText =~ /\b$temp\b/) {
 		delete $location{$word}
 	}
 }
 say "Outputting final product";
 open(OUT, ">out/results.txt") or die "$!";
 foreach my $word (sort keys %location) {
 	say OUT "$word, $location{$word}";
 }
 close OUT;
 close LOG;
--- a/Find_first_occurrence_of_tWs.pl
+++ b/Find_first_occurrence_of_tWs.pl
@ -0,0 +1,365 @@
 use 5.12.0;
 use File::Slurp;
 use File::Find ; 
 use Cwd ; 
 my %value;
 open LOG, ">log/log.log" or die;
 while (<DATA>) {
 	chomp;
 	if (/^(.*)$/) {
 		$value{$1} = $1
 	}
 }
 my $fileText = read_file("/Users/Henry/Documents/WACS/Tips_and_Hacks/MAST_tW_PDF_Updater/FilesForUpdates/Temp/ULB_text.txt", binmode => 'utf8');
 $fileText = "\n" . $fileText;
 foreach my $thisWord (sort keys %value) {
 	print $thisWord . "\n";
 	if ($fileText =~ /\n([^\n\t]*)\t[^\n]*$thisWord\b/) {
 		say LOG $thisWord . ", " . $1;
 	}
 }
 close LOG;
 __DATA__
 Aaron
 Abel
 Abiathar
 Abijah
 Abimelek
 Abner
 Abraham
 Absalom
 Adam
 Adonijah
 Ahab
 Ahaz
 Ahaziah
 Ahijah
 Ai
 Amalek
 Amaziah
 Ammon
 Amnon
 Amorite
 Amos
 Amoz
 Andrew
 Annas
 Antioch
 Apollos
 Aquila
 Arabah
 Arabia
 Aram
 Ararat
 Artaxerxes
 Asa
 Asaph
 Ashdod
 Asher
 Asherah
 Ashkelon
 Asia
 Assyria
 Athaliah
 Azariah
 Baal
 Baasha
 Babel
 Babylon
 Balaam
 Barabbas
 Barnabas
 Bartholomew
 Baruch
 Bashan
 Bathsheba
 Beelzebul
 Beersheba
 Benaiah
 Benjamin
 Berea
 Beth Shemesh
 Bethany
 Bethel
 Bethlehem
 Bethuel
 Boaz
 Caesar
 Caesarea
 Caiaphas
 Cain
 Caleb
 Cana
 Canaan
 Capernaum
 Carmel
 Chaldea
 Cilicia
 Colossae
 Corinth
 Cornelius
 Crete
 Cush
 Cyprus
 Cyrene
 Cyrus
 Damascus
 Dan
 Daniel
 Darius
 David
 Delilah
 Eden
 Edom
 Egypt
 Ekron
 Elam
 Eleazar
 Eliakim
 Elijah
 Elisha
 Elizabeth
 En Gedi
 Enoch
 Ephesus
 Ephraim
 Ephrath
 Esau
 Esther
 Ethiopia
 Euphrates River
 Eve
 Ezekiel
 Ezra
 Gabriel
 Gad
 Galatia
 Galilee
 Gath
 Gaza
 Gerar
 Geshur
 Gethsemane
 Gibeah
 Gibeon
 Gideon
 Gilead
 Gilgal
 Girgashites
 Golgotha
 Goliath
 Gomorrah
 Goshen
 Greece
 Greek
 Habakkuk
 Hagar
 Haggai
 Ham
 Hamath
 Hamor
 Hananiah
 Hannah
 Haran
 Hebron
 Hermon
 Herod
 Herodias
 Hezekiah
 Hilkiah
 Hittite
 Hivite
 Horeb
 Hosea
 Hoshea
 Iconium
 Isaac
 Isaiah
 Ishmael
 Issachar
 Jacob
 James 
 Japheth
 Jebus
 Jehoiachin
 Jehoiada
 Jehoiakim
 Jehoram
 Jehoshaphat
 Jehu
 Jephthah
 Jeremiah
 Jericho
 Jeroboam
 Jerusalem
 Jesse
 Jethro
 Jezebel
 Jezreel
 Joab
 Joash
 Job
 Joel
 John 
 John Mark
 Jonah
 Jonathan
 Joppa
 Joram
 Jordan River
 Joseph 
 Joshua
 Josiah
 Jotham
 Judah
 Judas.*Iscariot
 Judas son of James
 Judea
 Kadesh
 Kedar
 Kedesh
 Kerethites
 Kidron Valley
 Korah
 Laban
 Lamech
 Lazarus
 Leah
 Lebanon
 Levi
 Leviathan
 Lot
 Luke
 Lystra
 Maakah
 Macedonia
 Maker
 Malachi
 Manasseh
 Martha
 Mary
 Mary 
 Mary.*Magdalene
 Matthew
 Mede
 Melchizedek
 Memphis
 Meshech
 Mesopotamia
 Micah
 Michael
 Midian
 Miriam
 Mishael
 Mizpah
 Moab
 Molech
 Mordecai
 Moses
 Mount of Olives
 Naaman
 Nahor
 Nahum
 Naphtali
 Nathan
 Nazareth
 Nebuchadnezzar
 Negev
 Nehemiah
 Nile River
 Nineveh
 Noah
 Obadiah
 Omri
 Paddan Aram
 Paran
 Paul
 Peor
 Perizzite
 Persia
 Peter
 Pharaoh
 Philip
 Philippi
 Philistia
 Philistines
 Phinehas
 Phoenicia
 Pilate
 Pontus
 Potiphar
 Priscilla
 Rabbah
 Rachel
 Rahab
 Ramah
 Ramoth
 Rebekah
 Rehoboam
 Reuben
 Rimmon
 Rome
 Ruth
 Salt Sea
 Samaria
 Samson
 Samuel
 Sarah
 Saul
 Sea of Galilee
 Sea of Reeds
 Sennacherib
 Seth
 Sharon
 Sheba
 Shechem
 Shem
 Shiloh
 Shimei
 Shinar
 Sidon
 Silas
 Simeon
 Simon the Zealot
 Sinai
 Sodom
 Solomon
 Stephen
 Sukkoth
 Syria
 Tamar
 Tarshish
 Tarsus
 Terah
 Thessalonica
 Thomas
 Timothy
 Tirzah
 Titus
 Troas
 Tubal
 Tychicus
 Tyre
 Ur
 Uriah
 Uzziah
 Vashti
 Xerxes
 Zacchaeus
 Zadok
 Zebedee
 Zebulun
 Zechariah 
 Zedekiah
 Zephaniah
 Zerubbabel
 Zoar
--- a/Get_Strong_variants.pl
+++ b/Get_Strong_variants.pl
@ -0,0 +1,24 @@
 $pre = "https://biblehub.com/greek/";
 $var = "a, b, c, d, e, f, g, h";
 $post = ".htm";
@array = split (/, /, $var);
 #$out = system "curl --fail https://biblehub.com/greek/2.htm";
 #print "\n\n\t\$out: $out";
 open OUT, ">out/results.txt";
 foreach $xx (1611 .. 1613) {
 	foreach $var (@array) {
 		$string = $pre . $xx . $var . $post;
 		$out = `curl $string`;
 		if ($out =~ /We're sorry, we were not able to find that passage./) {
 			last
 		} else {
 			print OUT "\$xx: $xx. \$string: $string.";
 		}
 	}
 }
 close OUT;
--- a/Inventory_usfm_markers.pl
+++ b/Inventory_usfm_markers.pl
@ -0,0 +1,38 @@
 use 5.18.0;
 use File::Slurp;
 use File::Find ; 
 use Cwd ; 
 my $topDir = "/Users/Henry/Documents/WACS/en_ulb";
 my %found;
 my @filesToRun = ();
 my $filePattern = '*.usfm' ; 
 open LOG, ">/Users/Henry/Google Drive/WA/Scripts/out/log.log" or die;
 open OUT, ">/Users/Henry/Google Drive/WA/Scripts/out/output.txt" or die;
 find( sub { push @filesToRun, $File::Find::name if ( m/^(.*)$filePattern$/ ) }, $topDir) ;
 foreach  my $file ( @filesToRun  ) 
 {
    print "$file\n" ;
 	my $fileText = read_file("$file", binmode => 'utf8');
 	$fileText =~ s/\n/ /g;
 	while ($fileText =~ /(\\[^ ]*) /g) {
 		my $code = $1;
 		unless (exists $found{$code}) {
 			$found{$code} = $code
 		}
 	}
 } 	
 foreach my $code (sort keys %found) {
 	say OUT $code
 }
 close OUT;
 close LOG;
 print "\n\tDone."