Tips_and_Hacks/MAST_tW_PDF_Updater/FilesForUpdates/MakeULB.4.pl

116 lines
2.9 KiB
Perl

# Converts USFM to ULB text file
# Reads USFM files line by line rather than slurp
use 5.12.0;
use utf8;
use Cwd;
use File::Slurp;
use File::Basename;
use FindBin '$Bin';
use open IO => ":utf8";
$| = 1;
$" = "\n";
my ($pwd, $d, $fileSpec) = ($Bin, "\\", "\.usfm");
if ($^O eq "linux" || $^O eq "darwin") {
($d) = ("/");
}
chdir $pwd;
my ($inDir, $outDir) = ("", "out");
my (@array, @usfmLines);
my (%hash);
my ($whatami, $inFile);
my ($book, $chap, $vers, $text, $outText, $newV, $newC, $newB, $outFile, $usfmText, $metathesis, $repoPath, $ulbPath);
open(LOG, ">:utf8", "$Bin${d}Logs${d}log.log") or die "$Bin${d}${d}Logs${d}log.log:\n$!";
my ($udf) = "User_defaults.windows.txt";
if ($^O eq "linux") {$udf = "User_defaults.linux.txt"}
elsif ($^O eq "darwin") {$udf = "User_defaults.mac.txt"}
GetUserDefaults();
#say LOG "\$inDir: |$inDir|";
chdir("$repoPath");
opendir THISDIR, "." or die "serious dainbramage: $!";
my @infiles = grep /$fileSpec$/i, readdir *THISDIR;
say LOG "\$repoPath: $repoPath, \$fileSpec: $fileSpec\n\@infiles:\n@infiles";
closedir THISDIR;
chdir("$pwd");
ProcessFiles();
say "Done.";
close LOG;
sub GetUserDefaults {
open (my $defaults, "<:utf8", "User${d}$udf") or die "User{d}$udf:\n$!";
my ($ptte, $ptr);
while (my $thisLine = <$defaults>) {
chomp $thisLine;
if ($thisLine =~ /^Repository directory: (.*)$/) {
$repoPath = $1
} elsif ($thisLine =~ /^Unlocked Literal Bible path: (.*)/) {
$ulbPath = $1;
}
}
die "No path to repo found" if $repoPath eq "";
($repoPath) = ("$repoPath${d}$ulbPath");
#say LOG "\$repoPath: $repoPath";
#say LOG "\$ulbPath: $ulbPath";
#die;
close $defaults;
}
sub ProcessFiles {
say "Processing files";
my ($thisLine, $thisFile, $finalTextForm);
my ($bk, $ch, $vs);
my %codes;
open(OUT, ">:utf8", "Temp${d}ULB_text.txt") or die "Temp${d}ULB_text.txt:\n$!";
@infiles = sort @infiles;
foreach $inFile (@infiles) {
say LOG "$repoPath${d}$inFile";
open IN, "$repoPath${d}$inFile" or die "$repoPath${d}$inFile\n$!";
my $metathesis;
while (<IN>) {
chomp;
say LOG "<->\t$_";
if (/\\c (\d*)/) {$ch = $1}
elsif (/^\\h (.*)$/) {$bk = $1}
elsif (/^\\v (\d*)( .*)$/) {
my ($verseText) = ($2);
$vs = $1;
$thisFile .= "\n$bk $ch:$vs\t$metathesis$verseText";
say LOG "<0>\t$bk $ch:$vs\t$metathesis$verseText";
$metathesis = "";
} elsif (/^\\s5/) {
$thisFile .= "\n-----"
} elsif (/^\\[pqm].?( .*)/) {
$thisFile .= " $1";
say LOG "<3> $1";
} elsif (/^\\[pqm].?$/) {
$thisFile .= " ";
say LOG "<1>\t| |";
} elsif (/^\\d (.*)/) {$metathesis = "$1 "}
}
close IN;
$thisFile .= "\n\n";
}
$thisFile =~ s/\\qs\*//g;
$thisFile =~ s/\\q\d //g;
$thisFile =~ s/\\f \+.*?\\f\*//g;
#say LOG "\$thisFile:\n$thisFile";
$thisFile =~ s/— /—/g;
$thisFile =~ s/ / /g;
$thisFile =~ s/ +(\r?\n)/$1/g;
open OUT, ">$Bin${d}Temp${d}ULB_text.txt" or die "$!";
say OUT $thisFile;
close OUT;
}