From e1fac9168b3b04d78a93a5b32a59da42d1044c26 Mon Sep 17 00:00:00 2001 From: stephendillard Date: Fri, 2 Apr 2021 11:06:18 -0400 Subject: [PATCH] Updated the buld scripts to Stephen's local setup --- Build_ULB_XML_for_Tagging.pl | 4 ++-- Check_ULB.pl | 9 ++++----- ..._XML_files_from_unified_ULB_XML_and_tWs_and_OGNT.pl | 10 +++++----- 3 files changed, 11 insertions(+), 12 deletions(-) mode change 100644 => 100755 Build_ULB_XML_for_Tagging.pl diff --git a/Build_ULB_XML_for_Tagging.pl b/Build_ULB_XML_for_Tagging.pl old mode 100644 new mode 100755 index 5c86d5e4..6efe9045 --- a/Build_ULB_XML_for_Tagging.pl +++ b/Build_ULB_XML_for_Tagging.pl @@ -9,10 +9,10 @@ use utf8; use open IO => ":utf8"; open(LOG, ">Logs/Log.txt") or die "$!"; -open(OUT, ">/Users/Henry/Documents/WACS/en_ulb_tagged/ULB_xml/ULB.xml") or die "$!"; +open(OUT, ">/Users/dillardfam/Documents/WA/WACS/fork/ULB_xml/ULB.xml") or die "$!"; say OUT ""; -my ($topDir, $outDir) = ("/Users/Henry/Documents/WACS/en_ulb", "/Users/Henry/Documents/WACS/en_ulb_tagged/ULB_xml"); +my ($topDir, $outDir) = ("/Users/dillardfam/Documents/WA/WACS/en_ulb", "/Users/dillardfam/Documents/WA/WACS/fork/ULB_xml"); my @filesToRun = (); my $filePattern = '\.usfm' ; diff --git a/Check_ULB.pl b/Check_ULB.pl index 038add6d..068c3e7d 100755 --- a/Check_ULB.pl +++ b/Check_ULB.pl @@ -14,7 +14,7 @@ open(OUT2, ">Output/Tagged.txt") or die "$!"; my ($ULBxml, $taggedULBDir) = ("/Users/dillardfam/Documents/WA/WACS/fork/ULB_xml/ULB.xml", "/Users/dillardfam/Documents/WA/WACS/fork/Manual_Tagging"); my (@filesToRun) = (); my %fullName; -my $filePattern = "\.xml" ; +my $filePattern = "\55-1TI.xml" ; my $file; my $xmlText = read_file("$ULBxml", binmode => 'utf8'); @@ -37,7 +37,6 @@ sub GetGist { my ($verseRef, $standard, $tagged); say LOG "|$fileName|, |$fullName{$fileName}|"; # while ($wholeTaggedText =~ /((.|\n)*?)(.*?)<\/preText>((.|\n)*?)\n\t+((/sg) { -# while ($wholeTaggedText =~ /((.|\n)*?)(.*?)<\/preText>((.|\n)*?)\n\t+((| )((.|\n)*?)\n)*)\t+<\/verse>/sg) { while ($wholeTaggedText =~ /((.|\n)*?)(.*?)<\/preText>((.|\n)*?)\n\t+((<(w|usfm)(>| )((.|\n)*?)\n)*)\t+<\/verse>/sg) { my ($preText, $gist) = ($4, $7); $gist =~ s/.*?<\/comment>//sg; @@ -78,9 +77,9 @@ sub Untag { $txt =~ s/(√|<[^<>]*>)//g; $txt = $pre . " " . $txt; $txt =~ s/ {2,}/ /g; - $txt =~ s/— /—/g; $txt =~ s/^(.+[^ ])(\\)/$1 $2/g; $txt =~ s/ +$//; + $txt =~ s/— +/—/g; # say LOG "<1>\t$txt"; return $txt; } @@ -117,7 +116,7 @@ say "\nDone."; __DATA__ #41-MAT Matthew #42-MRK Mark -43-LUK Luke +#43-LUK Luke #44-JHN John #45-ACT Acts #46-ROM Romans @@ -125,7 +124,7 @@ __DATA__ #48-2CO 2 Corinthians #49-GAL Galatians #50-EPH Ephesians -51-PHP Philippians +#51-PHP Philippians #52-COL Colossians #53-1TH 1 Thessalonians #54-2TH 2 Thessalonians diff --git a/Construct_auto-tagged_ULB_XML_files_from_unified_ULB_XML_and_tWs_and_OGNT.pl b/Construct_auto-tagged_ULB_XML_files_from_unified_ULB_XML_and_tWs_and_OGNT.pl index b8ba42c8..d6020e49 100644 --- a/Construct_auto-tagged_ULB_XML_files_from_unified_ULB_XML_and_tWs_and_OGNT.pl +++ b/Construct_auto-tagged_ULB_XML_files_from_unified_ULB_XML_and_tWs_and_OGNT.pl @@ -17,11 +17,11 @@ $" = "\n"; mkdir "Logs"; open(LOG, ">Logs/tW_pairs.txt") or die "$!"; -my $ULBfile = "/Users/Henry/Documents/WACS/en_ulb_tagged/ULB_xml/ULB.xml"; -my $topDirOGNT = "/Users/Henry/Documents/WACS/OGNT"; -#my $topDirOGNT = "/Users/Henry/Documents/WACS/en_ulb_tagged/Tag_test"; -my $topDirtW = "/Users/Henry/Documents/WACS/en_tw/bible"; -my ($outDir, $outFile) = ("/Users/Henry/Documents/WACS/en_ulb_tagged/Auto-tagged", ""); +my $ULBfile = "/Users/dillardfam/Documents/WACS/fork/ULB_xml/ULB.xml"; +my $topDirOGNT = "/Users/dillardfam/Documents/WACS/OGNT"; +#my $topDirOGNT = "/Users/dillardfam/Documents/WACS/fork/Tag_test"; +my $topDirtW = "/Users/dillardfam/Documents/WACS/en_tw/bible"; +my ($outDir, $outFile) = ("/Users/dillardfam/Documents/WACS/fork/Auto-tagged", ""); my ($ULBText, $workText, $language); my ($file); my (%ULBtextThisVerse, %ULBpreTextThisVerse, %SNsThisVerse, %entriesThisSN, %longName);