# Creates workable ULB.xml file that has all USFM markers in place. use 5.18.0; use File::Slurp; use File::Find ; use Cwd ; use utf8; #use open IN => ":utf8", OUT => ":utf8"; use open IO => ":utf8"; open(LOG, ">Logs/Log.txt") or die "$!"; open(OUT, ">/Users/dillardfam/Documents/WA/WACS/fork/ULB_xml/ULB.xml") or die "$!"; say OUT ""; my ($topDir, $outDir) = ("/Users/dillardfam/Documents/WA/WACS/en_ulb", "/Users/dillardfam/Documents/WA/WACS/fork/ULB_xml"); my @filesToRun = (); my $filePattern = '\.usfm' ; #my $filePattern = '67-REV\.usfm' ; my $file; find( sub { push @filesToRun, $File::Find::name if ( m/^(.*)$filePattern$/ ) }, $topDir) ; @filesToRun = sort @filesToRun; ReadFiles(); say OUT ""; close OUT; close LOG; say "\nDone."; # ===== sub ReadFiles { foreach $file ( @filesToRun ) { say $file; my @array; my $fileText = read_file("$file", binmode => 'utf8'); $fileText =~ s/[ \n]+$//; say LOG "|$fileText|"; #Delete \n my ($book, $chap, $vers, $chapStart); if ($fileText =~ /\\h ([^\n]*)/) { $book = $1 } #say LOG $book; $fileText =~ s/\n/ /g; $fileText =~ s/ / /g; $fileText =~ s/\\s5/\n$&/g; $fileText =~ s/\\v/√/g; while ($fileText =~ s/(√[^√\n]*)(√)/$1\n$2/) {} $fileText =~ s/√/\\v/g; $fileText =~ s/(\\id[^\n]*)\n/\t\t$1<\/heading>\n/; $fileText =~ s/ +\n/\n/g; $fileText =~ s/(\\(q\d?|pi?|m|n?b))\n/\n$1 /g; #say LOG $fileText; @array = split /\n/, $fileText; $fileText = ""; foreach my $line (@array) { chomp; if ($line =~ //) {$book = $1;} if ($line =~ /\\c (\d+).* \\v (\d+)/) { ($chap, $vers) = ($1, $2); $line = "\t\t\n\t\t\t$line"; $line = "\t\t\n$line" if $chapStart; $chapStart = 1; } elsif ($line =~ /\\v (\d+)/) { $vers = $1; $line = "\t\t\t$line" } #say LOG "===\n\n$line"; $line =~ s/(]*>)(.*\\v \d+ )(.*)(<\/verse>)/$1\n\t\t\t\t$2<\/preText>\n\t\t\t\t$3<\/text>\n\t\t\t$4/s; #say LOG "===\n\n$line"; if ($line =~ /.*<\/text>/p) { say LOG "<-0>\t$line"; my ($pre, $match, $post) = (${^PREMATCH}, ${^MATCH}, ${^POSTMATCH}); #say LOG "<-1>\t\$pre: $pre,\n\$match: $match,\n\$post: $post"; $match = TagInternalUSFM ($match); $line = $pre . $match . $post; } say LOG "---\n\n$line\n==="; $line =~ s# +\n$fileText\t\t\n\t"; } } sub TagInternalUSFM { my ($line, $placeNum) = ($_[0], 1); my %places; #say LOG "Tagging internal USFM in \$line $line."; while ($line =~ /(.*)(\\f .*?\\f\*)(.*<\/text>)/g) { #say LOG "<+1>\t$2"; $line =~ s/(.*)(\\f .*?\\f\*)(.*<\/text>)/$1$3/; $places{$placeNum} = $2; $placeNum ++; } #say LOG "<+2>\t$line"; while ($line =~ /(.*)(\\qs .*?\\qs\*)(.*<\/text>)/g) { #say LOG "<+3>\t$2"; $line =~ s/(.*)(\\qs .*?\\qs\*)(.*<\/text>)/$1$3/; $places{$placeNum} = $2; $placeNum ++; } #say LOG "<+4>\t$line"; while ($line =~ /(.*)(\\([bm]|pi?|q\d?|s2))( .*<\/text>)/g) { #say LOG "<+5>\t$2"; $line =~ s/(.*)(\\([bm]|pi?|q\d?|s2))(.*<\/text>)/$1$4/; $places{$placeNum} = $2; $placeNum ++; } #say LOG "<+6>\t$line"; $line =~ s/ / /g; #say LOG "<+7>\t$line"; foreach my $place (sort keys %places) { #say LOG "<+8>\tReplacing with $places{$place}<\/usfm> in\n$line."; unless ($line =~ s//$places{$place}<\/usfm>/) {die} } say LOG "<+9>\t$line"; return $line; }