127 lines
3.7 KiB
Perl
Executable File
127 lines
3.7 KiB
Perl
Executable File
# Creates workable ULB.xml file that has all USFM markers in place.
|
|
|
|
use 5.18.0;
|
|
use File::Slurp;
|
|
use File::Find ;
|
|
use Cwd ;
|
|
use utf8;
|
|
#use open IN => ":utf8", OUT => ":utf8";
|
|
use open IO => ":utf8";
|
|
|
|
open(LOG, ">Logs/Log.txt") or die "$!";
|
|
open(OUT, ">/Users/dillardfam/Documents/WA/WACS/fork/ULB_xml/ULB.xml") or die "$!";
|
|
say OUT "<xml>";
|
|
|
|
my ($topDir, $outDir) = ("/Users/dillardfam/Documents/WA/WACS/en_ulb", "/Users/dillardfam/Documents/WA/WACS/fork/ULB_xml");
|
|
|
|
my @filesToRun = ();
|
|
my $filePattern = '\.usfm' ;
|
|
#my $filePattern = '67-REV\.usfm' ;
|
|
my $file;
|
|
find( sub { push @filesToRun, $File::Find::name if ( m/^(.*)$filePattern$/ ) }, $topDir) ;
|
|
|
|
@filesToRun = sort @filesToRun;
|
|
|
|
ReadFiles();
|
|
|
|
say OUT "</xml>";
|
|
close OUT;
|
|
close LOG;
|
|
|
|
say "\nDone.";
|
|
# =====
|
|
sub ReadFiles {
|
|
|
|
foreach $file ( @filesToRun ) {
|
|
say $file;
|
|
my @array;
|
|
my $fileText = read_file("$file", binmode => 'utf8');
|
|
$fileText =~ s/[ \n]+$//;
|
|
say LOG "|$fileText|";
|
|
|
|
#Delete \n
|
|
my ($book, $chap, $vers, $chapStart);
|
|
if ($fileText =~ /\\h ([^\n]*)/) {
|
|
$book = $1
|
|
}
|
|
#say LOG $book;
|
|
$fileText =~ s/\n/ /g;
|
|
$fileText =~ s/ / /g;
|
|
$fileText =~ s/\\s5/\n$&/g;
|
|
$fileText =~ s/\\v/√/g;
|
|
while ($fileText =~ s/(√[^√\n]*)(√)/$1\n$2/) {}
|
|
$fileText =~ s/√/\\v/g;
|
|
$fileText =~ s/(\\id[^\n]*)\n/\t\t<heading>$1<\/heading>\n/;
|
|
$fileText =~ s/ +\n/\n/g;
|
|
$fileText =~ s/(\\(q\d?|pi?|m|n?b))\n/\n$1 /g;
|
|
#say LOG $fileText;
|
|
@array = split /\n/, $fileText;
|
|
$fileText = "";
|
|
foreach my $line (@array) {
|
|
chomp;
|
|
if ($line =~ /<book name="(.*?)">/) {$book = $1;}
|
|
if ($line =~ /\\c (\d+).* \\v (\d+)/) {
|
|
($chap, $vers) = ($1, $2);
|
|
$line = "\t\t<chapter name=\"$book $chap\">\n\t\t\t<verse name=\"$book $chap:$vers\">$line</verse>";
|
|
$line = "\t\t</chapter>\n$line" if $chapStart;
|
|
$chapStart = 1;
|
|
}
|
|
elsif ($line =~ /\\v (\d+)/) {
|
|
$vers = $1;
|
|
$line = "\t\t\t<verse name=\"$book $chap:$vers\">$line</verse>"
|
|
}
|
|
#say LOG "===\n<AA>\n$line";
|
|
$line =~ s/(<verse[^>]*>)(.*\\v \d+ )(.*)(<\/verse>)/$1\n\t\t\t\t<preText>$2<\/preText>\n\t\t\t\t<text>$3<\/text>\n\t\t\t$4/s;
|
|
#say LOG "===\n<BB>\n$line";
|
|
if ($line =~ /<text>.*<\/text>/p) {
|
|
say LOG "<-0>\t$line";
|
|
my ($pre, $match, $post) = (${^PREMATCH}, ${^MATCH}, ${^POSTMATCH});
|
|
#say LOG "<-1>\t\$pre: $pre,\n\$match: $match,\n\$post: $post";
|
|
$match = TagInternalUSFM ($match);
|
|
$line = $pre . $match . $post;
|
|
}
|
|
|
|
say LOG "---\n<CC>\n$line\n===";
|
|
$line =~ s# +</#</#g;
|
|
$fileText .= $line . "\n";
|
|
}
|
|
say OUT "\t<book name=\"$book\">\n$fileText\t\t</chapter>\n\t</book>";
|
|
}
|
|
|
|
}
|
|
|
|
sub TagInternalUSFM {
|
|
my ($line, $placeNum) = ($_[0], 1);
|
|
my %places;
|
|
#say LOG "Tagging internal USFM in \$line $line.";
|
|
while ($line =~ /(<text>.*)(\\f .*?\\f\*)(.*<\/text>)/g) {
|
|
#say LOG "<+1>\t$2";
|
|
$line =~ s/(<text>.*)(\\f .*?\\f\*)(.*<\/text>)/$1<place number="$placeNum"\/>$3/;
|
|
$places{$placeNum} = $2;
|
|
$placeNum ++;
|
|
}
|
|
#say LOG "<+2>\t$line";
|
|
while ($line =~ /(<text>.*)(\\qs .*?\\qs\*)(.*<\/text>)/g) {
|
|
#say LOG "<+3>\t$2";
|
|
$line =~ s/(<text>.*)(\\qs .*?\\qs\*)(.*<\/text>)/$1<place number="$placeNum"\/>$3/;
|
|
$places{$placeNum} = $2;
|
|
$placeNum ++;
|
|
}
|
|
#say LOG "<+4>\t$line";
|
|
while ($line =~ /(<text>.*)(\\([bm]|pi?|q\d?|s2))( .*<\/text>)/g) {
|
|
#say LOG "<+5>\t$2";
|
|
$line =~ s/(<text>.*)(\\([bm]|pi?|q\d?|s2))(.*<\/text>)/$1<place number="$placeNum"\/>$4/;
|
|
$places{$placeNum} = $2;
|
|
$placeNum ++;
|
|
}
|
|
#say LOG "<+6>\t$line";
|
|
$line =~ s/ / /g;
|
|
#say LOG "<+7>\t$line";
|
|
foreach my $place (sort keys %places) {
|
|
#say LOG "<+8>\tReplacing <place number=\"$place\"\/> with <usfm>$places{$place}<\/usfm> in\n$line.";
|
|
unless ($line =~ s/<place number="$place"\/>/<usfm>$places{$place}<\/usfm>/) {die}
|
|
}
|
|
say LOG "<+9>\t$line";
|
|
return $line;
|
|
}
|