Revamping NT tool for use with OGNT

2019-06-05 16:14:14 -04:00 · 2019-06-05 16:14:14 -04:00 · b8d70a29c2
parent 2f243dc5f5
commit b8d70a29c2
5 changed files with 127 additions and 259 deletions
--- a/MAST_tW_PDF_Updater/FilesForUpdates/Build_extract_NT.pl
+++ b/MAST_tW_PDF_Updater/FilesForUpdates/Build_extract_NT.pl
@ -15,11 +15,12 @@ my @bookList;

 open LOG, ">$Bin${d}Logs${d}log.log" or die "$Bin${d}Logs${d}log.log: $!";

-	open (my $file, "<:utf8", "$Bin${d}User${d}tW_work_NT.txt") or die "$Bin${d}User${d}tW.work.NT.dat:\n$!";
+	open (my $file, "<:utf8", "$Bin${d}User${d}tW_work_NT.txt") or die "$Bin${d}User${d}tW.work.NT.txt:\n$!";
 	
 		while (my $line = <$file>) {
 			chomp $line;
-			if ($line =~ /^([^#][^\t]*)\t([^\t]*)\t([^\t]*)\t([^\t]*)$/) {
+			say LOG $line;
+			if ($line =~ /^([^#][^\n\t]*)\t([^\n\t]*)\t([^\n\t]*)$/) {

 				$book = "$1";
 				push @bookList, $book;
@ -27,21 +28,22 @@ open LOG, ">$Bin${d}Logs${d}log.log" or die "$Bin${d}Logs${d}log.log: $!";
 		}
 	close $file;

+say LOG "\@bookList: @bookList";
+say LOG "Removing old Extract.txt";

-say  LOG "Removing old Extract.txt";
-
-unlink "Temp${d}Extract.txt";
+unlink "$Bin${d}Temp${d}Extract.txt";

 say LOG "Building new Extract.txt";

-open(OUT, ">:utf8", "Temp${d}Extract.txt") or die "$!: Temp${d}Extract.txt";
+open(OUT, ">:utf8", "$Bin${d}Temp${d}Extract.txt") or die "$!: $Bin${d}Temp${d}Extract.txt";

 foreach $book (@bookList) {
-	say LOG "\$book: $book";
+	#say "\$book: $book";
 		my $fileText = read_file("$Bin${d}Temp${d}ULB_text.txt", binmode => 'utf8');
 	 # While finds entries and Strong's numbers
 		while ($fileText =~ /$book [0-9]*:[0-9]*[^\n]*\n/g) {
-			say OUT $&;
+			#say LOG $&;
+			print OUT $&;
 		}
 }

--- a/MAST_tW_PDF_Updater/FilesForUpdates/Exceptions/Exceptions_tWs_from_OSHB.txt
+++ b/MAST_tW_PDF_Updater/FilesForUpdates/Exceptions/Exceptions_tWs_from_OSHB.txt
@ -6460,6 +6460,7 @@ Joel 2:17	4910	||
 Joel 2:19	6030	||
 Joel 2:19	7646	||
 Joel 2:19	5414	||
+Joel 2:19	1471	nations
 Joel 2:20	7368	||
 Joel 2:20	6440	||
 Joel 2:20	314	||
@ -7978,6 +7979,7 @@ Zechariah 7:9	6213	||
 Zechariah 7:9	376	||
 Zechariah 7:10	376	||
 Zechariah 7:10	251	||
+Zechariah 7:10	6041	||
 Zechariah 7:11	241	||
 Zechariah 7:11	3513	||
 Zechariah 7:11	8085	||
@ -7992,6 +7994,7 @@ Zechariah 7:14	5674	||
 Zechariah 7:14	7760	||
 Zechariah 8:1	3068	||
 Zechariah 8:2	6635	||
+Zechariah 8:22	1471	nations
 Zechariah 8:3	7931	||
 Zechariah 8:3	5892	||
 Zechariah 8:4	6635	||
--- a/MAST_tW_PDF_Updater/FilesForUpdates/MakeULB.3.pl
+++ b/MAST_tW_PDF_Updater/FilesForUpdates/MakeULB.3.pl
@ -1,134 +0,0 @@
-use 5.12.0;
-use utf8;
-use Cwd;
-use File::Slurp;
-use File::Basename;
-use FindBin '$Bin';
-use open IO => ":utf8";
-$| = 1;
-$" = "\n";
-
-my ($pwd, $d, $fileSpec) = ($Bin, "\\", "\.usfm");
-if ($^O eq "linux" || $^O eq "darwin") {
-	($d) = ("/");
-	}
-
-chdir $pwd;
-
-my ($inDir, $outDir) = ("", "out");
-my (@array, @usfmLines);
-my (%hash);
-my ($whatami, $inFile);
-my ($book, $chap, $vers, $text, $outText, $newV, $newC, $newB, $outFile, $usfmText, $metathesis, $repoPath);
-
-open(LOG, ">:utf8", "Logs${d}log.log") or die "${d}Logs${d}log.log:\n$!";
-
-my ($udf) = "User_defaults.windows.txt";
-if ($^O eq "linux") {$udf = "User_defaults.linux.txt"}
-elsif ($^O eq "darwin") {$udf = "User_defaults.mac.txt"}
-
-open (my $defaults, "<:utf8", "User${d}$udf") or die "User${d}$udf:\n$!";
-
-GetUserDefaults();
-
-say LOG "\$inDir: $inDir";
-chdir("$repoPath");
-opendir THISDIR, "." or die "serious dainbramage: $!";
-my @infiles = grep /$fileSpec$/i, readdir *THISDIR;
-say LOG "\$repoPath: $repoPath, \$fileSpec: $fileSpec\n\@infiles:\n@infiles";
-closedir THISDIR;
-chdir("$pwd");
-	ProcessFiles();
-	say "Done.";
-close LOG;
-
-sub GetUserDefaults {
-	open (my $defaults, "<:utf8", "User${d}$udf") or die "User{d}$udf:\n$!";
-	
-		my ($ptte, $ptr);
-		while (my $thisLine = <$defaults>) {
-			chomp $thisLine;
-			if ($thisLine =~ /^Repository directory: (.*)$/) {
-				$repoPath = $1
-			}
-		}
-		die "No path to repo found" if $repoPath eq "";
-		
-		($repoPath) = ("$repoPath${d}en_ulb");
-		say LOG "\$repoPath: $repoPath";
-	
-	close $defaults;
-}
-
-
-sub ProcessFiles {
-	say "Processing files";
-	my $thisLine;
-	open(OUT, ">:utf8", "Temp${d}ULB_text.txt") or die "Temp${d}ULB_text.txt:\n$!";
-	my $finalTextForm;
-	@infiles = sort @infiles;
-	foreach $inFile (@infiles) {
-		#say LOG $inFile;
-		my $thisFile;
-		$usfmText = read_file("$repoPath${d}$inFile", binmode => 'utf8') or die;
-		$usfmText =~ s/\r?\n([^\\ \r\n])/ $1/g;
-		$usfmText =~ s/\\f \+.*?\\f\*//g;
-		$usfmText =~ s/\\pi/\\p/g;
-		$usfmText =~ s/\\((ide?)|(toc.)|(mt|sp)|(c \\d+))[^\r\n]*\r?\n//g;
-		$usfmText =~ s/\r?\n\\(m|pi?|(q\d?)) ([^\r\n]*)\r?\n/ $3/g;
-		$usfmText =~ s/([^\r\n])\\q/$1/g;
-		$usfmText =~ s/ +\r?\n/\n/g;
-		$usfmText =~ s/(\r?\n\\v \d+)\r?\n/$1 \[blank\]\n/g;
-		$usfmText =~ s/ —/—/g;
-		#say LOG $usfmText;
-		#say LOG "\$usfmText:\n$usfmText\n=====\n";
-		@usfmLines = "";
-		@usfmLines = split /\r?\n/, $usfmText;
-		#say LOG "\@usfmLines:\n@usfmLines";
-		foreach $thisLine (@usfmLines) {
-			chomp $thisLine;
-			say LOG "<3>\t$thisLine";
-			$thisLine =~ s/^(\\q)$//;
-			say LOG "<4>\t$thisLine";
-			$thisLine = SearchAndReplace($thisLine);
-			say LOG "<5>\t$thisLine";
-			$thisFile .= $thisLine;
-		}
-			say LOG "Before changes:\n$thisFile";
-			$thisFile =~ s/\r?\n>>\t/ /g;
-			$thisFile =~ s/>\t//g;
-			$thisFile =~ s/[\r\n]/÷/g;;
-			$thisFile =~ s/— /—/g;
-			$thisFile =~ s/\\q\d//g;
-			$thisFile =~ s/\\p//g;
-			$thisFile =~ s/\\q //g;
-			$thisFile =~ s/\\qs( .*)\\qs\* ?/$1/g;
-			$thisFile =~ s/\÷+/\n/g;
-			$thisFile =~ s/ {2,}/ /g;
-			#say LOG "After changes:\n$thisFile";
-			say OUT $thisFile;
-	}
-	close OUT;
-}
-
-sub SearchAndReplace {
-	my $thisxLine = shift;
-	$thisxLine =~ s/\\s5.*$/\n-------\n/;
-	say LOG "<0>\t$thisxLine";
-	if ($thisxLine =~ s/\\h (.+) *$//) {$book = $1;$newB = 1}
-	elsif ($thisxLine =~ s/\\c (\d+)//) {$chap = $1; $newC = 1}
-	elsif ($thisxLine =~ s/\\v (\d+(-\d+)?) (.*)$/$3/) {
-		#print OUT "\n$outText\n";
-		$vers = $1;
-		$newV = 1;
-		$thisxLine = "\n$book $chap:$vers\t$metathesis$thisxLine";
-		$metathesis = "";
-		say LOG "<1>\t<$book> $chap:$vers\t$thisxLine";
-	}
-	elsif ($thisxLine =~ s/^\\q\d? (.*)$/ $1/) {}
-	elsif ($thisxLine =~ s/^\\m (.*)$/ $1/) {}
-	elsif ($thisxLine =~ s/^\\d (.*)//) {$metathesis = "$1 "}
-	elsif ($thisxLine =~ s/^\\[qpm]$//) {}
-	say LOG "<2>\t|$thisxLine|";
-	return $thisxLine;
-}
--- a/MAST_tW_PDF_Updater/FilesForUpdates/Update_OGNT.pl
+++ b/MAST_tW_PDF_Updater/FilesForUpdates/Update_OGNT.pl
@ -20,7 +20,7 @@ my $all_file = "/Users/Henry/Documents/git.Door43/OpenGNT_version3_3.csv";# late
 # my $all_file = "/Users/Henry/Documents/git.Door43/Test.csv";# latest version of OGNT
 my $OGNT_directory = "/Users/Henry/Documents/git.Door43/OGNT";
 my (%book_no, %wa_book_no, %abbr, %full, %bk_abr, %book_name, %wa_book_name);
-my ($old_bk_abr, $old_ch, $old_vs);
+my ($old_bk_abr, $old_ch, $old_vs, $file_ID);
 my ($udf) = "User_defaults.windows.txt";
 if ($^O eq "linux") {$udf = "User_defaults.linux.txt"}
 elsif ($^O eq "darwin") {$udf = "User_defaults.mac.txt"}
@ -39,7 +39,7 @@ while (<DATA>) {
 	chomp;
 	if (/^([^\t]*)\t([^\t]*)\t([^\t]*)\t([^\t]*)/) {
 		($book_no{$4}, $wa_book_no{$4}, $abbr{$4}, $full{$3}) = ($1, $2, $3, $4);
-		($book_name{$1}, $bk_abr{$1})= ($4, $3);
+		($book_name{$1}, $bk_abr{$1}, $file_ID) = ($4, $3, "$2-" . uc $3);
 	}
 }

@ -80,14 +80,15 @@ sub PlaceParsed {
 	if ($bkabr ne $old_bk_abr) {
 		#say LOG "\$bkabr $bkabr ne \$old_bk_abr $old_bk_abr. Should be opening $Bin${d}OGNT${d}$bkabr.xml";
 		EndFile();
-		open OUT, ">$repoPath${d}OGNT${d}$bkabr.xml" or die "$!: $Bin${d}OGNT${d}$bkabr.xml";
+		# Find out where file_ID is established and get it into the next line
+		open OUT, ">$repoPath${d}OGNT${d}$file_ID.xml" or die "$!: $Bin${d}OGNT${d}$file_ID.xml";
 		say OUT "<?xml version=\"1.0\" encoding=\"utf-8\"?>\n\t<book osisID=\"$bkabr\">\n\t\t<chapter osisID=\"$bkabr.$ch\">\n\t\t\t<verse osisID=\"$bkabr.$ch.$vs\">"
 	} elsif ($ch ne $old_ch) {
 		say OUT "\t\t\t</verse>\n\t\t</chapter>\n\t\t<chapter osisID=\"$bkabr.$ch\">\n\t\t\t<verse osisID=\"$bkabr.$ch.$vs\">"
 	} elsif ($vs ne $old_vs) {
 		say OUT "\t\t\t</verse>\n\t\t\t<verse osisID=\"$bkabr.$ch.$vs\">"
 	}
-	say OUT "\t\t\t\t<w lemma\"$strong\" morph=\"$parse\">$word</w>";
+	say OUT "\t\t\t\t<w lemma=\"$strong\" morph=\"$parse\">$word</w>";
 	($old_bk_abr, $old_ch, $old_vs) = ($bkabr, $ch, $vs);
 	#say LOG "(\$old_bk_abr: $old_bk_abr, \$old_ch: $old_ch, \$old_vs: $old_vs) = (\$bkabr: $bkabr, \$ch: $ch, \$vs: $vs)";
 }
--- a/MAST_tW_PDF_Updater/FilesForUpdates/tWs.from.OGNT.1.pl
+++ b/MAST_tW_PDF_Updater/FilesForUpdates/tWs.from.OGNT.1.pl
@ -1,9 +1,9 @@
 # Produces list of tWs for each verse by linking OGNT to ULB through tWs.

-# Taken from tWs.from.OSHB.2.pl.
+# Taken from tWs.from.OGNT.2.pl.

 # The output from this script is useful for the interleaved PDFs used in MAST.
-# This version uses an exception file to handle places where the OSHB points to
+# This version uses an exception file to handle places where the OGNT points to
 # a tW page different from that on which the ULB term appears.

 # Make sure the correct input file is $ULBfile. Run script.
@ -37,25 +37,25 @@ elsif ($^O eq "darwin") {$udf = "User_defaults.mac.txt"}

 open (my $defaults, "<:utf8", "$pwd${d}User${d}$udf") or die "$pwd${d}User${d}$udf:\n$!";

-my ($cv, $ULBfile, $exceptions, $missing, $log, $output, $workFile, $verseMap) =
+my ($cv, $ULBfile, $exceptions, $missing, $log, $output, $workFile) =
 					("00000",
-					"Temp${d}Extract.txt",
-					"Exceptions${d}Exceptions_tWs_from_OGNT.txt",
-					"Output${d}Entries_not_handled.txt",
-					"Logs${d}log.log",
-					"Output${d}tWs_for_PDF.txt",
-					"User${d}tW_work_NT.txt",
-					"OSHB${d}VerseMap.xml",
+					"$Bin${d}Temp${d}Extract.txt",
+					"$Bin${d}Exceptions${d}Exceptions_tWs_from_OGNT.txt",
+					"$Bin${d}Output${d}Entries_not_handled.txt",
+					"$Bin${d}Logs${d}log.log",
+					"$Bin${d}Output${d}tWs_for_PDF.txt",
+					"$Bin${d}User${d}tW_work_NT.txt",
 					);

-my ($OSHBfile, $topTwDir, $textEditor, $repoPath, $topSourceLangDir);
+
+my ($OGNTfile, $topTwDir, $textEditor, $repoPath, $topSourceLangDir);

 my (%entries, %text, %ref, %order, %pages, %listOfPages, %sourcePage, %checkPages, %foundPages, %substitutedPages,
 	 %specifiedText, %realPage, %checkPage, %workEntries, %vsn, %StrongNum, %fullText, %ulbOrder, %bkAbr, %bkFull, %relevantSNs,
 	 %SNsInCV, %entriesThisSN, %entriesThisPage, %pagesThisSN, %pageThisEntry, %adjust, %newRef);

 my $book;
-my (@OSHBfileList);
+my (@OGNTfileList);

 # ==============================

@ -75,7 +75,7 @@ while (<DATA>) {

 GetUserDefaults();
 GetULBBooksToProcess();
-CheckForRemaps();
+#CheckForRemaps();
 ReadExceptions();
 PairtWEntriesTotWPageAndUniqSNs();
 ReadLinkedSNs();
@ -113,48 +113,48 @@ sub GetUserDefaults {
 		die "No text editor found" if $textEditor eq "";
 		die "No path to repo found" if $repoPath eq "";
 		
-		($topTwDir, $topSourceLangDir) = ("$repoPath${d}en_tw${d}bible", "$repoPath${d}OSHB");
+		($topTwDir, $topSourceLangDir) = ("$repoPath${d}en_tw${d}bible", "$repoPath${d}OGNT");
 	
 	close $defaults;
 }

 sub GetULBBooksToProcess {
-	say LOG "GetULBBooksToProcess";
+	say LOG "GetULBBooksToProcess on \$workFile: $workFile";
 	open (my $file, "<:utf8", "$workFile") or die "$workFile:\n$!";
 	
 		while (my $line = <$file>) {
 			chomp $line;
-			say LOG $line;
-			if ($line =~ /^[^#][^\t]*\t[^\t]*\t([^\t]*)\t[^\t]*$/) {
-				$OSHBfile = "$topSourceLangDir${d}$1.xml";
-				push @OSHBfileList, $OSHBfile;
+			say LOG "\t$line";
+			if ($line =~ /^[^#][^\t]*\t[^\t]*\t([^\t]*)$/) {
+				$OGNTfile = "$topSourceLangDir${d}$1.xml";
+				push @OGNTfileList, $OGNTfile;
 			}
 		}
 	
 	close $file;
-	say LOG "\@OSHBfileList: @OSHBfileList"
+	say LOG "\@OGNTfileList: @OGNTfileList";die;
 }

-sub CheckForRemaps {
-	say "Checking for remaps in $repoPath${d}$verseMap";
-	#/media/henry/92C6F7E3C6F7C58F/Users/henry/Documents/WA_Repo/OSHB/VerseMap.xml:
-	#/media/henry/92C6F7E3C6F7C58F/Users/henry/WA_Repo/OSHB/VerseMap.xml
-	open (my $file, "<:utf8", "$repoPath${d}$verseMap") or die "$repoPath${d}$verseMap:\n$!";
-	
-		while (my $thisLine = <$file>) {
-			chomp $thisLine;
-			#say LOG $thisLine;
-			if ($thisLine =~ /<verse wlc="([^\.]*)\.(\d+)\.(\d+)" kjv="([^\.]*)\.(\d+)\.(\d+)" type="full"\/>/) {
-				#say LOG "*\t$thisLine";
-				my ($oldB, $oldC, $oldV, $newB, $newC, $newV) =($bkFull{$1}, $2, $3, $bkFull{$4}, $5, $6);
-				my $oldie = "$oldB $oldC:$oldV";
-				$newRef{$oldie} = "$newB $newC:$newV";
-				say LOG "**\t" . "\$newRef{$oldie}: " . $newRef{"$oldB $oldC:$oldV"}
-			}
-		}
-	
-	close $file;
-}
+#sub CheckForRemaps {
+#	say "Checking for remaps in $repoPath${d}$verseMap";
+#	#/media/henry/92C6F7E3C6F7C58F/Users/henry/Documents/WA_Repo/OGNT/VerseMap.xml:
+#	#/media/henry/92C6F7E3C6F7C58F/Users/henry/WA_Repo/OGNT/VerseMap.xml
+#	open (my $file, "<:utf8", "$repoPath${d}$verseMap") or die "$repoPath${d}$verseMap:\n$!";
+#	
+#		while (my $thisLine = <$file>) {
+#			chomp $thisLine;
+#			#say LOG $thisLine;
+#			if ($thisLine =~ /<verse wlc="([^\.]*)\.(\d+)\.(\d+)" kjv="([^\.]*)\.(\d+)\.(\d+)" type="full"\/>/) {
+#				#say LOG "*\t$thisLine";
+#				my ($oldB, $oldC, $oldV, $newB, $newC, $newV) =($bkFull{$1}, $2, $3, $bkFull{$4}, $5, $6);
+#				my $oldie = "$oldB $oldC:$oldV";
+#				$newRef{$oldie} = "$newB $newC:$newV";
+#				say LOG "**\t" . "\$newRef{$oldie}: " . $newRef{"$oldB $oldC:$oldV"}
+#			}
+#		}
+#	
+#	close $file;
+#}

 sub ReadExceptions {
 	say "Reading exceptions";
@ -181,12 +181,12 @@ sub PairtWEntriesTotWPageAndUniqSNs {
 	my $filePattern = '*.md' ; 
 	find (sub {push @filesToRun, $File::Find::name if (m/^(.*)$filePattern$/)}, $topTwDir) ;
 	@filesToRun = sort @filesToRun;
-	say LOG "\@filesToRun: @filesToRun";
+	#say LOG "\@filesToRun: @filesToRun";
 	foreach my $file (@filesToRun) {
 		print ".";
 		$file =~ s/\//\\/g unless $^O eq "linux" || $^O eq "darwin"; 
 		my ($thisList, $shortFile) = ("", $file);
-		#say "|$shortFile|\t|$topTwDir|";
+		say LOG "\$file: $file, \$shortFile: $shortFile, \$topTwDir: $topTwDir";
 		$shortFile =~ s/^\Q$topTwDir${d}\E//;
 		$shortFile =~ s/\.md$//;
 		$shortFile =~ s/\Q$d\E/,/;
@ -198,7 +198,7 @@ sub PairtWEntriesTotWPageAndUniqSNs {
 				if (/^# ([^\n]*)$/) {
 					  $thisList = $1;
 					  $thisList =~ s/[\r\n]*$//;
-					  say LOG "\$thisList = |$thisList|";
+					  #say LOG "\$thisList = |$thisList|";
 					  $thisList =~ s/ \([^\)]*\)//g;
 					  $entriesThisPage{$shortFile} = $thisList;
 					  my @ULBEntries = split /, /, $thisList;
@ -213,7 +213,7 @@ sub PairtWEntriesTotWPageAndUniqSNs {
 				}
 				if (/Strong's(.*)$/) {
 					  my $SNs = $1;
-					  while ($SNs =~ s/[H](\d*)//) {
+					  while ($SNs =~ s/[G](\d*)//) {
 						  push @relevantSNs, $1;
 						  $entriesThisSN{$1} .= "$thisList, ";
 						  $pagesThisSN{$1} .= "$shortFile, ";
@ -226,41 +226,18 @@ sub PairtWEntriesTotWPageAndUniqSNs {
 				}
 			}
 		close IN;
-
-		#}
-		  #say LOG "|$shortFile|: \t |$entriesThisPage{$shortFile}|"
 	}
 	say "";
 	#say LOG "====";
 	#say LOG "====";
 }

-sub LinkULBtoCV {
-	say "Linking ULB to chapter and verse";
-	say LOG "LinkULBtoCV, \$ULBfile: $ULBfile";
-	open IN, "$ULBfile" or die "$ULBfile: $!";
-		while (<IN>) {
-			#say LOG $_;
-			if (/^([^\t]*)\t(.*)$/) {
-				$cv ++;
-				($text{$1}, $fullText{$1}) = ($2, $2);
-				$ref{$cv} = $1;
-				$order{$1} = $cv;
-			}
-			#say LOG "First \$ref{$cv}: $ref{$cv}\n\$text{$ref{$cv}}: $text{$ref{$cv}}";
-		}
-	close IN;
-	foreach my $key (sort keys %ref) {
-		#say LOG "\$key: $key:, \$ref{$key}: $ref{$key}, \$text{$ref{$key}}: $text{$ref{$key}}"
-	}
-}
-
 sub ReadLinkedSNs {
 	say "Reading linked Strong's numbers";
-	my ($flag, $OSHBfile) = ("","");
-	foreach $OSHBfile (@OSHBfileList) {
-		#say LOG "opening \$^OHB $OSHBfile";
-		open IN, "$OSHBfile" or die "$OSHBfile can't be opened\n\n";
+	my ($flag, $OGNTfile) = ("","");
+	foreach $OGNTfile (@OGNTfileList) {
+		say LOG "opening \$OGNTfile: $OGNTfile";
+		open IN, "$OGNTfile" or die "$OGNTfile can't be opened\n\n";
 			my ($thisBook, $thisChap, $thisVers, $thisRef);
 			my (@pages);
 			while (<IN>) {
@ -276,7 +253,7 @@ sub ReadLinkedSNs {
 					say LOG "##\t$bk $ch:$vs, $thisRef";
 				}
 				else {
-					while (/<w lemma="(\w\/)?(\d+)( \w)?"/g) {
+					while (/<w lemma="(\d+)"/g) {
 						#say LOG $_;
 						my ($thisNum) = ($2);
 						say LOG "\t\$thisNum: $thisNum, \$SNsInCV{$thisRef}: $SNsInCV{$thisRef}";
@ -301,6 +278,25 @@ sub ReadLinkedSNs {
 	}
 }

+sub LinkULBtoCV {
+	say "Linking ULB to chapter and verse";
+	say LOG "LinkULBtoCV, \$ULBfile: $ULBfile";
+	open IN, "$ULBfile" or die "$ULBfile: $!";
+		while (<IN>) {
+			if (/^([^\t]*)\t(.*)$/) {
+				$cv ++;
+				($text{$1}, $fullText{$1}) = ($2, $2);
+				$ref{$cv} = $1;
+				$order{$1} = $cv;
+			}
+			#say "First \$ref{$cv}: $ref{$cv}\nGetULBBooksToProcess\$text{$ref{$cv}}: $text{$ref{$cv}}";
+		}
+	close IN;
+	foreach my $key (sort keys %ref) {
+		#say LOG "\$key: $key:, \$ref{$key}: $ref{$key}, \$text{$ref{$key}}: $text{$ref{$key}}"
+	}
+}
+
 sub LinkSNsToULBtextViaEntries  {
 	say "Linking Strong's numbers to ULB text via tW page entries";
 	say LOG "sub LinkSNsToULBtextViaEntries called";
@ -387,47 +383,6 @@ sub LinkSNsToULBtextViaEntries  {
 	}
 }

-sub Adjust {
-	my ($snsOld, $ref, $snsNew, $addToSnsNew) = ($_[0], $_[1], "", "");
-	say LOG "\$specifiedText{$ref}: $specifiedText{$ref}\n\$snsOld: $snsOld";
-	my (%tempEntries);
-	#say LOG ">\t\$sns: |$sns|";
-	#say LOG ">\t\$specifiedText{$ref}: |$specifiedText{$ref}|";
-	$snsOld =~ s/^ +LinkSNsToULBtextViaEntries//;
-	my @oldArray = split / /, $snsOld;
-	#say LOG "\$adjust{$ref}: $adjust{$ref}";
-	my @preadjustments = split /, /, $adjust{$ref};
-	foreach my $adjustment (@preadjustments) {
-		#say LOG "<><>\t\$adjustment: >$adjustment<";
-		if ($adjustment =~ /([^\t]*)\t\|\|$/) { # delete this from list to look for
-			my $found = $1;
-			$snsOld =~ s/\b$found\b ?//;
-			say LOG "\$found: $found should be deleted from \$snsOld: $snsOld";
-		} elsif ($adjustment =~ /^\|\|\t(.*)/) { # add this to list to look for
-			my $adj = $1;
-			say LOG "\$adjustment: $adjustment, \$adj: $adj";
-			if ($adj =~ /([^\t]*)\t([^\t]*)/) {
-				$snsNew = "[$1]($pageThisEntry{$2})"
-			} else {
-				$snsNew .= "$adj "
-			}
-		} elsif ($adjustment =~ /^(\d+)\t(\w+)$/) { # add specified word
-			my ($found1, $found2) = ($1, $2);
-			#$addToSnsNew .= "$1\[$2\] ";
-			$snsOld =~ s/$found1/$found1\[$found2\]/;
-			say LOG "\$snsOld: $snsOld";
-		} elsif (($adjustment =~ /^(\d+)\t([\/\d\w]+)$/)) { # add specified page
-			#$addToSnsNew .= "$1\{$2\} "
-			my ($found1, $found2) = ($1, $2);
-			$snsOld =~ s/$found1/$found1\($found2\)/;
-			say LOG "\$snsOld: $snsOld";
-		}
-	}
-	say LOG "\$snsNew: >$snsNew<\n\$snsOld+\$snsNew: >$snsOld< >$snsNew<";
-	$snsNew = "$snsOld $snsNew";
-	return $snsNew;
-}
-
 sub Output {
 	say "Outputting";
 	#say LOG "Output subRoutine called";
@ -485,6 +440,47 @@ sub Output {
 	}
 }

+sub Adjust {
+	my ($snsOld, $ref, $snsNew, $addToSnsNew) = ($_[0], $_[1], "", "");
+	say LOG "\$specifiedText{$ref}: $specifiedText{$ref}\n\$snsOld: $snsOld";
+	my (%tempEntries);
+	#say LOG ">\t\$sns: |$sns|";
+	#say LOG ">\t\$specifiedText{$ref}: |$specifiedText{$ref}|";
+	$snsOld =~ s/^ +LinkSNsToULBtextViaEntries//;
+	my @oldArray = split / /, $snsOld;
+	#say LOG "\$adjust{$ref}: $adjust{$ref}";
+	my @preadjustments = split /, /, $adjust{$ref};
+	foreach my $adjustment (@preadjustments) {
+		#say LOG "<><>\t\$adjustment: >$adjustment<";
+		if ($adjustment =~ /([^\t]*)\t\|\|$/) { # delete this from list to look for
+			my $found = $1;
+			$snsOld =~ s/\b$found\b ?//;
+			say LOG "\$found: $found should be deleted from \$snsOld: $snsOld";
+		} elsif ($adjustment =~ /^\|\|\t(.*)/) { # add this to list to look for
+			my $adj = $1;
+			say LOG "\$adjustment: $adjustment, \$adj: $adj";
+			if ($adj =~ /([^\t]*)\t([^\t]*)/) {
+				$snsNew = "[$1]($pageThisEntry{$2})"
+			} else {
+				$snsNew .= "$adj "
+			}
+		} elsif ($adjustment =~ /^(\d+)\t(\w+)$/) { # add specified word
+			my ($found1, $found2) = ($1, $2);
+			#$addToSnsNew .= "$1\[$2\] ";
+			$snsOld =~ s/$found1/$found1\[$found2\]/;
+			say LOG "\$snsOld: $snsOld";
+		} elsif (($adjustment =~ /^(\d+)\t([\/\d\w]+)$/)) { # add specified page
+			#$addToSnsNew .= "$1\{$2\} "
+			my ($found1, $found2) = ($1, $2);
+			$snsOld =~ s/$found1/$found1\($found2\)/;
+			say LOG "\$snsOld: $snsOld";
+		}
+	}
+	say LOG "\$snsNew: >$snsNew<\n\$snsOld+\$snsNew: >$snsOld< >$snsNew<";
+	$snsNew = "$snsOld $snsNew";
+	return $snsNew;
+}
+
 sub Substitute {
 	foreach my $key (sort keys %pages) {
 		say LOG "\$key: $key\t\$SNsInCV{$key}: $SNsInCV{$key}";