Updated the buld scripts to Stephen's local setup
This commit is contained in:
parent
3c524d09ec
commit
e1fac9168b
|
@ -9,10 +9,10 @@ use utf8;
|
||||||
use open IO => ":utf8";
|
use open IO => ":utf8";
|
||||||
|
|
||||||
open(LOG, ">Logs/Log.txt") or die "$!";
|
open(LOG, ">Logs/Log.txt") or die "$!";
|
||||||
open(OUT, ">/Users/Henry/Documents/WACS/en_ulb_tagged/ULB_xml/ULB.xml") or die "$!";
|
open(OUT, ">/Users/dillardfam/Documents/WA/WACS/fork/ULB_xml/ULB.xml") or die "$!";
|
||||||
say OUT "<xml>";
|
say OUT "<xml>";
|
||||||
|
|
||||||
my ($topDir, $outDir) = ("/Users/Henry/Documents/WACS/en_ulb", "/Users/Henry/Documents/WACS/en_ulb_tagged/ULB_xml");
|
my ($topDir, $outDir) = ("/Users/dillardfam/Documents/WA/WACS/en_ulb", "/Users/dillardfam/Documents/WA/WACS/fork/ULB_xml");
|
||||||
|
|
||||||
my @filesToRun = ();
|
my @filesToRun = ();
|
||||||
my $filePattern = '\.usfm' ;
|
my $filePattern = '\.usfm' ;
|
||||||
|
|
|
@ -14,7 +14,7 @@ open(OUT2, ">Output/Tagged.txt") or die "$!";
|
||||||
my ($ULBxml, $taggedULBDir) = ("/Users/dillardfam/Documents/WA/WACS/fork/ULB_xml/ULB.xml", "/Users/dillardfam/Documents/WA/WACS/fork/Manual_Tagging");
|
my ($ULBxml, $taggedULBDir) = ("/Users/dillardfam/Documents/WA/WACS/fork/ULB_xml/ULB.xml", "/Users/dillardfam/Documents/WA/WACS/fork/Manual_Tagging");
|
||||||
my (@filesToRun) = ();
|
my (@filesToRun) = ();
|
||||||
my %fullName;
|
my %fullName;
|
||||||
my $filePattern = "\.xml" ;
|
my $filePattern = "\55-1TI.xml" ;
|
||||||
my $file;
|
my $file;
|
||||||
|
|
||||||
my $xmlText = read_file("$ULBxml", binmode => 'utf8');
|
my $xmlText = read_file("$ULBxml", binmode => 'utf8');
|
||||||
|
@ -37,7 +37,6 @@ sub GetGist {
|
||||||
my ($verseRef, $standard, $tagged);
|
my ($verseRef, $standard, $tagged);
|
||||||
say LOG "|$fileName|, |$fullName{$fileName}|";
|
say LOG "|$fileName|, |$fullName{$fileName}|";
|
||||||
# while ($wholeTaggedText =~ /<verse name="($fullName{$fileName} \d+:\d+)">((.|\n)*?)<preText>(.*?)<\/preText>((.|\n)*?)\n\t+((<w ((.|\n)*?)\n)*)\t+<\/verse>/sg) {
|
# while ($wholeTaggedText =~ /<verse name="($fullName{$fileName} \d+:\d+)">((.|\n)*?)<preText>(.*?)<\/preText>((.|\n)*?)\n\t+((<w ((.|\n)*?)\n)*)\t+<\/verse>/sg) {
|
||||||
# while ($wholeTaggedText =~ /<verse name="($fullName{$fileName} \d+:\d+)">((.|\n)*?)<preText>(.*?)<\/preText>((.|\n)*?)\n\t+((<w(>| )((.|\n)*?)\n)*)\t+<\/verse>/sg) {
|
|
||||||
while ($wholeTaggedText =~ /<verse name="($fullName{$fileName} \d+:\d+)">((.|\n)*?)<preText>(.*?)<\/preText>((.|\n)*?)\n\t+((<(w|usfm)(>| )((.|\n)*?)\n)*)\t+<\/verse>/sg) {
|
while ($wholeTaggedText =~ /<verse name="($fullName{$fileName} \d+:\d+)">((.|\n)*?)<preText>(.*?)<\/preText>((.|\n)*?)\n\t+((<(w|usfm)(>| )((.|\n)*?)\n)*)\t+<\/verse>/sg) {
|
||||||
my ($preText, $gist) = ($4, $7);
|
my ($preText, $gist) = ($4, $7);
|
||||||
$gist =~ s/<comment>.*?<\/comment>//sg;
|
$gist =~ s/<comment>.*?<\/comment>//sg;
|
||||||
|
@ -78,9 +77,9 @@ sub Untag {
|
||||||
$txt =~ s/(√|<[^<>]*>)//g;
|
$txt =~ s/(√|<[^<>]*>)//g;
|
||||||
$txt = $pre . " " . $txt;
|
$txt = $pre . " " . $txt;
|
||||||
$txt =~ s/ {2,}/ /g;
|
$txt =~ s/ {2,}/ /g;
|
||||||
$txt =~ s/— /—/g;
|
|
||||||
$txt =~ s/^(.+[^ ])(\\)/$1 $2/g;
|
$txt =~ s/^(.+[^ ])(\\)/$1 $2/g;
|
||||||
$txt =~ s/ +$//;
|
$txt =~ s/ +$//;
|
||||||
|
$txt =~ s/— +/—/g;
|
||||||
# say LOG "<1>\t$txt";
|
# say LOG "<1>\t$txt";
|
||||||
return $txt;
|
return $txt;
|
||||||
}
|
}
|
||||||
|
@ -117,7 +116,7 @@ say "\nDone.";
|
||||||
__DATA__
|
__DATA__
|
||||||
#41-MAT Matthew
|
#41-MAT Matthew
|
||||||
#42-MRK Mark
|
#42-MRK Mark
|
||||||
43-LUK Luke
|
#43-LUK Luke
|
||||||
#44-JHN John
|
#44-JHN John
|
||||||
#45-ACT Acts
|
#45-ACT Acts
|
||||||
#46-ROM Romans
|
#46-ROM Romans
|
||||||
|
@ -125,7 +124,7 @@ __DATA__
|
||||||
#48-2CO 2 Corinthians
|
#48-2CO 2 Corinthians
|
||||||
#49-GAL Galatians
|
#49-GAL Galatians
|
||||||
#50-EPH Ephesians
|
#50-EPH Ephesians
|
||||||
51-PHP Philippians
|
#51-PHP Philippians
|
||||||
#52-COL Colossians
|
#52-COL Colossians
|
||||||
#53-1TH 1 Thessalonians
|
#53-1TH 1 Thessalonians
|
||||||
#54-2TH 2 Thessalonians
|
#54-2TH 2 Thessalonians
|
||||||
|
|
|
@ -17,11 +17,11 @@ $" = "\n";
|
||||||
|
|
||||||
mkdir "Logs";
|
mkdir "Logs";
|
||||||
open(LOG, ">Logs/tW_pairs.txt") or die "$!";
|
open(LOG, ">Logs/tW_pairs.txt") or die "$!";
|
||||||
my $ULBfile = "/Users/Henry/Documents/WACS/en_ulb_tagged/ULB_xml/ULB.xml";
|
my $ULBfile = "/Users/dillardfam/Documents/WACS/fork/ULB_xml/ULB.xml";
|
||||||
my $topDirOGNT = "/Users/Henry/Documents/WACS/OGNT";
|
my $topDirOGNT = "/Users/dillardfam/Documents/WACS/OGNT";
|
||||||
#my $topDirOGNT = "/Users/Henry/Documents/WACS/en_ulb_tagged/Tag_test";
|
#my $topDirOGNT = "/Users/dillardfam/Documents/WACS/fork/Tag_test";
|
||||||
my $topDirtW = "/Users/Henry/Documents/WACS/en_tw/bible";
|
my $topDirtW = "/Users/dillardfam/Documents/WACS/en_tw/bible";
|
||||||
my ($outDir, $outFile) = ("/Users/Henry/Documents/WACS/en_ulb_tagged/Auto-tagged", "");
|
my ($outDir, $outFile) = ("/Users/dillardfam/Documents/WACS/fork/Auto-tagged", "");
|
||||||
my ($ULBText, $workText, $language);
|
my ($ULBText, $workText, $language);
|
||||||
my ($file);
|
my ($file);
|
||||||
my (%ULBtextThisVerse, %ULBpreTextThisVerse, %SNsThisVerse, %entriesThisSN, %longName);
|
my (%ULBtextThisVerse, %ULBpreTextThisVerse, %SNsThisVerse, %entriesThisSN, %longName);
|
||||||
|
|
Loading…
Reference in New Issue