From b24f1781230760eb15e55754a3724617d16be0c8 Mon Sep 17 00:00:00 2001 From: Henry Whitney Date: Tue, 28 Jul 2020 17:37:12 -0400 Subject: [PATCH] Revert "work on tagged OGNT" This reverts commit 98cb2595791e7afbec66fdc6f61df9adcd3a9e58. --- .../Build_OL_files_from_XML.pl | 173 ++++++++++++++++++ .../Buld_MAST_OGNT_from_csv.pl | 2 +- 2 files changed, 174 insertions(+), 1 deletion(-) create mode 100644 MAST_tW_PDF_Updater/FilesForUpdates/Build_OL_files_from_XML.pl diff --git a/MAST_tW_PDF_Updater/FilesForUpdates/Build_OL_files_from_XML.pl b/MAST_tW_PDF_Updater/FilesForUpdates/Build_OL_files_from_XML.pl new file mode 100644 index 0000000..89f049b --- /dev/null +++ b/MAST_tW_PDF_Updater/FilesForUpdates/Build_OL_files_from_XML.pl @@ -0,0 +1,173 @@ +# Builds easily searchable files from current OGNT and MAST-HB XML file +# Takes verse at a time from slurped file +use 5.18.0; +use File::Slurp; +use File::Find ; +use Cwd ; +use utf8; +#use open IN => ":utf8", OUT => ":utf8"; +use open IO => ":utf8"; +open LOG, ">:utf8", "Logs/log.txt" or die; +open OUT, ">:utf8", "Output/Original_languages.txt" or die; + +my (@folders) = ("/Users/Henry/Documents/WACS/MAST_HB", "/Users/Henry/Documents/WACS/OGNT"); +my (%order, %long); +my $outText; + +while () { + chomp; + if (/^([^\t]*)\t([^\t]*)\t(.*)$/) { + $order{$1} = $3; + $long{$2} = $3; + } +} + + +foreach my $folder (@folders) { + say LOG "$folder"; + #system "cd $folder;xml val *.xml;echo 'Continue? (Control + C to quit, Enter to continue)';read name;"; + my ($topDir, $lang) = ($folder, "H"); + + if ($folder =~ /OGNT/) {$lang = "G"} + + my @filesToRun = (); + my $filePattern = '*.xml' ; + find( sub { push @filesToRun, $File::Find::name if ( m/^(.*)$filePattern$/ ) }, $topDir) ; + @filesToRun = sort @filesToRun; + foreach my $file ( @filesToRun ) { + say LOG $file; + my $fileText = read_file("$file", binmode => 'utf8'); + my ($bk, $ch, $vs, $lemma, $word, $nbk, $nch, $nvs, $previous, $current, $interruption, $next, $verse, $thisBookText, $prevVsText, $holdText, $thisVsText, $nextVsText, $oldHold); + while ($fileText =~ //spg) { + $verse = $&; + #say LOG $verse; + ($bk, $ch, $vs) = ($long{$1}, $2, $3); + $previous = $current; + $current = "$bk $ch:$vs"; + say LOG "$current:\n$verse"; + my $verseText; + + # Need to know why dies at Gen 50:24 + +# if ($verse =~ /KJV:(.*)\.(.*).(.*)<\/note>/p) { # Occurs only in OT +# ($nbk, $nch, $nvs) = ($long{$1}, $2, $3); +# $interruption = "$nbk $nch:$nvs"; +# if ($verse =~ /\n[^<]*KJV:$interruption<\/note>/) { # Complete renumber of verse +# $current = $interruption; +# $verseText = GetContent($verse); +# $verseText = "$current\t$oldHold$verseText"; +# $oldHold = ""; +# } +# elsif ($interruption ne $current || $verse =~ /KJV:(.*)\.(.*).(.*)<\/note>/p) { # New verse begins here +# ($thisVsText, $nextVsText) = (${^PREMATCH}, ${^POSTMATCH}); +# $thisVsText = GetContent($thisVsText); +# $nextVsText = GetContent($nextVsText); +# $outText .= "$oldHold\n$current\t$thisVsText "; +# $oldHold = "$nextVsText "; +# } +# elsif ($interruption eq $current || $verse =~ /KJV:(.*)\.(.*).(.*)<\/note>/p) { # Previous verse continues here +# ($prevVsText, $thisVsText) = (${^PREMATCH}, ${^POSTMATCH}); +# $prevVsText = GetContent($prevVsText); +# $thisVsText = GetContent($thisVsText); +# $verseText .= "$oldHold\n$current\t$thisVsText"; +# $oldHold = ""; +# } +# } +# else { +# $verseText = GetContent($verse); +# $verseText = "$current\$tverseText" +# } +# $thisBookText .= "\n$verseText"; +# $oldHold = $holdText + } + $thisBookText =~ s/]*>([^<]*)<\/w>/) { + my ($lemma, $OL) = ($1, $2); + $lemma =~ s/[^\d"]*(\d+)[^\d"]*/$1/; + $returnText .= "$OL <$lemma> " + } + return $returnText +} + +__DATA__ +01 gen Genesis +02 exo Exodus +03 lev Leviticus +04 num Numbers +05 deu Deuteronomy +06 jos Joshua +07 jdg Judges +08 rut Ruth +09 1sa 1 Samuel +10 2sa 2 Samuel +11 1ki 1 Kings +12 2ki 2 Kings +13 1ch 1 Chronicles +14 2ch 2 Chronicles +15 ezr Ezra +16 neh Nehemiah +17 est Esther +18 job Job +19 psa Psalms +20 pro Proverbs +21 ecc Ecclesiastes +22 sng Song of Solomon +23 isa Isaiah +24 jer Jeremiah +25 lam Lamentations +26 ezk Ezekiel +27 dan Daniel +28 hos Hosea +29 jol Joel +30 amo Amos +31 oba Obadiah +32 jon Jonah +33 mic Micah +34 nam Nahum +35 hab Habakkuk +36 zep Zephaniah +37 hag Haggai +38 zec Zechariah +39 mal Malachi +41 mat Matthew +42 mrk Mark +43 luk Luke +44 jhn John +45 act Acts +46 rom Romans +47 1co 1 Corinthians +48 2co 2 Corinthians +49 gal Galatians +50 eph Ephesians +51 php Philippians +52 col Colossians +53 1th 1 Thessalonians +54 2th 2 Thessalonians +55 1ti 1 Timothy +56 2ti 2 Timothy +57 tit Titus +58 phm Philemon +59 heb Hebrews +60 jas James +61 1pe 1 Peter +62 2pe 2 Peter +63 1jn 1 John +64 2jn 2 John +65 3jn 3 John +66 jud Jude +67 rev Revelation diff --git a/MAST_tW_PDF_Updater/FilesForUpdates/Buld_MAST_OGNT_from_csv.pl b/MAST_tW_PDF_Updater/FilesForUpdates/Buld_MAST_OGNT_from_csv.pl index 52c2cac..a9bac7b 100755 --- a/MAST_tW_PDF_Updater/FilesForUpdates/Buld_MAST_OGNT_from_csv.pl +++ b/MAST_tW_PDF_Updater/FilesForUpdates/Buld_MAST_OGNT_from_csv.pl @@ -70,7 +70,7 @@ sub Separate { say OUT " \n "; $last_vs = $vs; } - say OUT "\t\t\t\t\t$word" + say OUT "\t\t\t\t\t$word" } }