forked from hmw3/Tips_and_Hacks
116 lines
2.9 KiB
Perl
116 lines
2.9 KiB
Perl
# Converts USFM to ULB text file
|
|
# Reads USFM files line by line rather than slurp
|
|
|
|
use 5.12.0;
|
|
use utf8;
|
|
use Cwd;
|
|
use File::Slurp;
|
|
use File::Basename;
|
|
use FindBin '$Bin';
|
|
use open IO => ":utf8";
|
|
$| = 1;
|
|
$" = "\n";
|
|
|
|
my ($pwd, $d, $fileSpec) = ($Bin, "\\", "\.usfm");
|
|
if ($^O eq "linux" || $^O eq "darwin") {
|
|
($d) = ("/");
|
|
}
|
|
|
|
chdir $pwd;
|
|
|
|
my ($inDir, $outDir) = ("", "out");
|
|
my (@array, @usfmLines);
|
|
my (%hash);
|
|
my ($whatami, $inFile);
|
|
my ($book, $chap, $vers, $text, $outText, $newV, $newC, $newB, $outFile, $usfmText, $metathesis, $repoPath, $ulbPath);
|
|
|
|
open(LOG, ">:utf8", "$Bin${d}Logs${d}log.log") or die "$Bin${d}${d}Logs${d}log.log:\n$!";
|
|
|
|
my ($udf) = "User_defaults.windows.txt";
|
|
if ($^O eq "linux") {$udf = "User_defaults.linux.txt"}
|
|
elsif ($^O eq "darwin") {$udf = "User_defaults.mac.txt"}
|
|
|
|
GetUserDefaults();
|
|
|
|
#say LOG "\$inDir: |$inDir|";
|
|
chdir("$repoPath");
|
|
opendir THISDIR, "." or die "serious dainbramage: $!";
|
|
my @infiles = grep /$fileSpec$/i, readdir *THISDIR;
|
|
say LOG "\$repoPath: $repoPath, \$fileSpec: $fileSpec\n\@infiles:\n@infiles";
|
|
closedir THISDIR;
|
|
chdir("$pwd");
|
|
ProcessFiles();
|
|
say "Done.";
|
|
close LOG;
|
|
|
|
sub GetUserDefaults {
|
|
open (my $defaults, "<:utf8", "User${d}$udf") or die "User{d}$udf:\n$!";
|
|
|
|
my ($ptte, $ptr);
|
|
while (my $thisLine = <$defaults>) {
|
|
chomp $thisLine;
|
|
if ($thisLine =~ /^Repository directory: (.*)$/) {
|
|
$repoPath = $1
|
|
} elsif ($thisLine =~ /^Unlocked Literal Bible path: (.*)/) {
|
|
$ulbPath = $1;
|
|
}
|
|
}
|
|
die "No path to repo found" if $repoPath eq "";
|
|
|
|
($repoPath) = ("$repoPath${d}$ulbPath");
|
|
#say LOG "\$repoPath: $repoPath";
|
|
#say LOG "\$ulbPath: $ulbPath";
|
|
#die;
|
|
|
|
close $defaults;
|
|
}
|
|
|
|
|
|
sub ProcessFiles {
|
|
say "Processing files";
|
|
my ($thisLine, $thisFile, $finalTextForm);
|
|
my ($bk, $ch, $vs);
|
|
my %codes;
|
|
open(OUT, ">:utf8", "Temp${d}ULB_text.txt") or die "Temp${d}ULB_text.txt:\n$!";
|
|
@infiles = sort @infiles;
|
|
foreach $inFile (@infiles) {
|
|
say LOG "$repoPath${d}$inFile";
|
|
open IN, "$repoPath${d}$inFile" or die "$repoPath${d}$inFile\n$!";
|
|
my $metathesis;
|
|
while (<IN>) {
|
|
chomp;
|
|
say LOG "<->\t$_";
|
|
if (/\\c (\d*)/) {$ch = $1}
|
|
elsif (/^\\h (.*)$/) {$bk = $1}
|
|
elsif (/^\\v (\d*)( .*)$/) {
|
|
my ($verseText) = ($2);
|
|
$vs = $1;
|
|
$thisFile .= "\n$bk $ch:$vs\t$metathesis$verseText";
|
|
say LOG "<0>\t$bk $ch:$vs\t$metathesis$verseText";
|
|
$metathesis = "";
|
|
} elsif (/^\\s5/) {
|
|
$thisFile .= "\n-----"
|
|
} elsif (/^\\[pqm].?( .*)/) {
|
|
$thisFile .= " $1";
|
|
say LOG "<3> $1";
|
|
} elsif (/^\\[pqm].?$/) {
|
|
$thisFile .= " ";
|
|
say LOG "<1>\t| |";
|
|
} elsif (/^\\d (.*)/) {$metathesis = "$1 "}
|
|
}
|
|
close IN;
|
|
$thisFile .= "\n\n";
|
|
}
|
|
$thisFile =~ s/\\qs\*//g;
|
|
$thisFile =~ s/\\q\d //g;
|
|
$thisFile =~ s/\\f \+.*?\\f\*//g;
|
|
#say LOG "\$thisFile:\n$thisFile";
|
|
$thisFile =~ s/— /—/g;
|
|
$thisFile =~ s/ / /g;
|
|
$thisFile =~ s/ +(\r?\n)/$1/g;
|
|
open OUT, ">$Bin${d}Temp${d}ULB_text.txt" or die "$!";
|
|
say OUT $thisFile;
|
|
close OUT;
|
|
}
|
|
|