parent
98cb259579
commit
b24f178123
|
@ -0,0 +1,173 @@
|
|||
# Builds easily searchable files from current OGNT and MAST-HB XML file
|
||||
# Takes verse at a time from slurped file
|
||||
use 5.18.0;
|
||||
use File::Slurp;
|
||||
use File::Find ;
|
||||
use Cwd ;
|
||||
use utf8;
|
||||
#use open IN => ":utf8", OUT => ":utf8";
|
||||
use open IO => ":utf8";
|
||||
open LOG, ">:utf8", "Logs/log.txt" or die;
|
||||
open OUT, ">:utf8", "Output/Original_languages.txt" or die;
|
||||
|
||||
my (@folders) = ("/Users/Henry/Documents/WACS/MAST_HB", "/Users/Henry/Documents/WACS/OGNT");
|
||||
my (%order, %long);
|
||||
my $outText;
|
||||
|
||||
while (<DATA>) {
|
||||
chomp;
|
||||
if (/^([^\t]*)\t([^\t]*)\t(.*)$/) {
|
||||
$order{$1} = $3;
|
||||
$long{$2} = $3;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
foreach my $folder (@folders) {
|
||||
say LOG "$folder";
|
||||
#system "cd $folder;xml val *.xml;echo 'Continue? (Control + C to quit, Enter to continue)';read name;";
|
||||
my ($topDir, $lang) = ($folder, "H");
|
||||
|
||||
if ($folder =~ /OGNT/) {$lang = "G"}
|
||||
|
||||
my @filesToRun = ();
|
||||
my $filePattern = '*.xml' ;
|
||||
find( sub { push @filesToRun, $File::Find::name if ( m/^(.*)$filePattern$/ ) }, $topDir) ;
|
||||
@filesToRun = sort @filesToRun;
|
||||
foreach my $file ( @filesToRun ) {
|
||||
say LOG $file;
|
||||
my $fileText = read_file("$file", binmode => 'utf8');
|
||||
my ($bk, $ch, $vs, $lemma, $word, $nbk, $nch, $nvs, $previous, $current, $interruption, $next, $verse, $thisBookText, $prevVsText, $holdText, $thisVsText, $nextVsText, $oldHold);
|
||||
while ($fileText =~ /<verse osisID="(.*)\.(\d+)\.(\d+)"(\n|.)*?<\/verse>/spg) {
|
||||
$verse = $&;
|
||||
#say LOG $verse;
|
||||
($bk, $ch, $vs) = ($long{$1}, $2, $3);
|
||||
$previous = $current;
|
||||
$current = "$bk $ch:$vs";
|
||||
say LOG "$current:\n$verse";
|
||||
my $verseText;
|
||||
|
||||
# Need to know why dies at Gen 50:24
|
||||
|
||||
# if ($verse =~ /<note>KJV:(.*)\.(.*).(.*)<\/note>/p) { # Occurs only in OT
|
||||
# ($nbk, $nch, $nvs) = ($long{$1}, $2, $3);
|
||||
# $interruption = "$nbk $nch:$nvs";
|
||||
# if ($verse =~ /<verse osisID="$current">\n[^<]*<note>KJV:$interruption<\/note>/) { # Complete renumber of verse
|
||||
# $current = $interruption;
|
||||
# $verseText = GetContent($verse);
|
||||
# $verseText = "$current\t$oldHold$verseText";
|
||||
# $oldHold = "";
|
||||
# }
|
||||
# elsif ($interruption ne $current || $verse =~ /<note>KJV:(.*)\.(.*).(.*)<\/note>/p) { # New verse begins here
|
||||
# ($thisVsText, $nextVsText) = (${^PREMATCH}, ${^POSTMATCH});
|
||||
# $thisVsText = GetContent($thisVsText);
|
||||
# $nextVsText = GetContent($nextVsText);
|
||||
# $outText .= "$oldHold\n$current\t$thisVsText ";
|
||||
# $oldHold = "$nextVsText ";
|
||||
# }
|
||||
# elsif ($interruption eq $current || $verse =~ /<note>KJV:(.*)\.(.*).(.*)<\/note>/p) { # Previous verse continues here
|
||||
# ($prevVsText, $thisVsText) = (${^PREMATCH}, ${^POSTMATCH});
|
||||
# $prevVsText = GetContent($prevVsText);
|
||||
# $thisVsText = GetContent($thisVsText);
|
||||
# $verseText .= "$oldHold\n$current\t$thisVsText";
|
||||
# $oldHold = "";
|
||||
# }
|
||||
# }
|
||||
# else {
|
||||
# $verseText = GetContent($verse);
|
||||
# $verseText = "$current\$tverseText"
|
||||
# }
|
||||
# $thisBookText .= "\n$verseText";
|
||||
# $oldHold = $holdText
|
||||
}
|
||||
$thisBookText =~ s/</<$lang/g;
|
||||
$outText .= "$thisBookText\n";
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
say OUT $outText;
|
||||
|
||||
close OUT;
|
||||
close LOG;
|
||||
|
||||
print "\n\tDone.";
|
||||
|
||||
sub GetContent {
|
||||
my ($text, $returnText) = ($_[0], "");
|
||||
while ($text =~ /<w lemma="([^"]*)"[^>]*>([^<]*)<\/w>/) {
|
||||
my ($lemma, $OL) = ($1, $2);
|
||||
$lemma =~ s/[^\d"]*(\d+)[^\d"]*/$1/;
|
||||
$returnText .= "$OL <$lemma> "
|
||||
}
|
||||
return $returnText
|
||||
}
|
||||
|
||||
__DATA__
|
||||
01 gen Genesis
|
||||
02 exo Exodus
|
||||
03 lev Leviticus
|
||||
04 num Numbers
|
||||
05 deu Deuteronomy
|
||||
06 jos Joshua
|
||||
07 jdg Judges
|
||||
08 rut Ruth
|
||||
09 1sa 1 Samuel
|
||||
10 2sa 2 Samuel
|
||||
11 1ki 1 Kings
|
||||
12 2ki 2 Kings
|
||||
13 1ch 1 Chronicles
|
||||
14 2ch 2 Chronicles
|
||||
15 ezr Ezra
|
||||
16 neh Nehemiah
|
||||
17 est Esther
|
||||
18 job Job
|
||||
19 psa Psalms
|
||||
20 pro Proverbs
|
||||
21 ecc Ecclesiastes
|
||||
22 sng Song of Solomon
|
||||
23 isa Isaiah
|
||||
24 jer Jeremiah
|
||||
25 lam Lamentations
|
||||
26 ezk Ezekiel
|
||||
27 dan Daniel
|
||||
28 hos Hosea
|
||||
29 jol Joel
|
||||
30 amo Amos
|
||||
31 oba Obadiah
|
||||
32 jon Jonah
|
||||
33 mic Micah
|
||||
34 nam Nahum
|
||||
35 hab Habakkuk
|
||||
36 zep Zephaniah
|
||||
37 hag Haggai
|
||||
38 zec Zechariah
|
||||
39 mal Malachi
|
||||
41 mat Matthew
|
||||
42 mrk Mark
|
||||
43 luk Luke
|
||||
44 jhn John
|
||||
45 act Acts
|
||||
46 rom Romans
|
||||
47 1co 1 Corinthians
|
||||
48 2co 2 Corinthians
|
||||
49 gal Galatians
|
||||
50 eph Ephesians
|
||||
51 php Philippians
|
||||
52 col Colossians
|
||||
53 1th 1 Thessalonians
|
||||
54 2th 2 Thessalonians
|
||||
55 1ti 1 Timothy
|
||||
56 2ti 2 Timothy
|
||||
57 tit Titus
|
||||
58 phm Philemon
|
||||
59 heb Hebrews
|
||||
60 jas James
|
||||
61 1pe 1 Peter
|
||||
62 2pe 2 Peter
|
||||
63 1jn 1 John
|
||||
64 2jn 2 John
|
||||
65 3jn 3 John
|
||||
66 jud Jude
|
||||
67 rev Revelation
|
|
@ -70,7 +70,7 @@ sub Separate {
|
|||
say OUT " </verse>\n <verse osisID=\"$bklc.$ch.$vs\">";
|
||||
$last_vs = $vs;
|
||||
}
|
||||
say OUT "\t\t\t\t\t<w OGNTsort=\"$OGNTSort\" lemma=\"$sn\" morph=\"$gram\" lexeme=\"$lexeme\">$word</w>"
|
||||
say OUT "\t\t\t\t\t<w OGNTsort=\"$OGNTSort\" ULBorder=\"---\" lemma=\"G$sn\" morph=\"$gram\" lexeme=\"$lexeme\">$word</w>"
|
||||
}
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue