Setup ULB XML Importer
This commit is contained in:
parent
fce5f52300
commit
2750bf115a
|
@ -0,0 +1,44 @@
|
|||
<?php
|
||||
|
||||
namespace App\Console\Commands;
|
||||
|
||||
use App\Handlers\UlbXmlImportHandler;
|
||||
use Illuminate\Console\Command;
|
||||
|
||||
class ImportUlbXmlData extends Command
|
||||
{
|
||||
/**
|
||||
* The name and signature of the console command.
|
||||
*
|
||||
* @var string
|
||||
*/
|
||||
protected $signature = 'gwt:import-ulb-xml';
|
||||
|
||||
/**
|
||||
* The console command description.
|
||||
*
|
||||
* @var string
|
||||
*/
|
||||
protected $description = 'Import ULB data from XML file';
|
||||
|
||||
/**
|
||||
* Create a new command instance.
|
||||
*
|
||||
* @return void
|
||||
*/
|
||||
public function __construct()
|
||||
{
|
||||
parent::__construct();
|
||||
}
|
||||
|
||||
/**
|
||||
* Execute the console command.
|
||||
*
|
||||
* @return mixed
|
||||
*/
|
||||
public function handle()
|
||||
{
|
||||
$importHandler = new UlbXmlImportHandler();
|
||||
$importHandler->run();
|
||||
}
|
||||
}
|
|
@ -0,0 +1,49 @@
|
|||
<?php
|
||||
namespace App\Handlers;
|
||||
|
||||
/**
|
||||
* BookXmlFilesTrait.php
|
||||
*
|
||||
* @author: Leonard Smith <leonard@acornwebconsultants.com>
|
||||
* Date: 9/19/20
|
||||
* Time: 10:43 AM
|
||||
*/
|
||||
trait BookXmlFilesTrait
|
||||
{
|
||||
protected static $bookXmlFiles = [
|
||||
'matthew' => '41-MAT.xml',
|
||||
'mark' => '42-MRK.xml',
|
||||
'luke' => '43-LUK.xml',
|
||||
'john' => '44-JHN.xml',
|
||||
'acts' => '45-ACT.xml',
|
||||
'romans' => '46-ROM.xml',
|
||||
'1corinthians' => '47-1CO.xml',
|
||||
'2corinthians' => '48-2CO.xml',
|
||||
'galatians' => '49-GAL.xml',
|
||||
'ephesians' => '50-EPH.xml',
|
||||
'philippians' => '51-PHP.xml',
|
||||
'colossians' => '52-COL.xml',
|
||||
'1thessalonians' => '53-1TH.xml',
|
||||
'2thessalonians' => '54-1TH.xml',
|
||||
'1timothy' => '55-1TI.xml',
|
||||
'2timothy' => '56-2TI.xml',
|
||||
'titus' => '57-TIT.xml',
|
||||
'philemon' => '58-PHM.xml',
|
||||
'hebrews' => '59-HEB.xml',
|
||||
'james' => '60-JAS.xml',
|
||||
'1peter' => '61-1PE.xml',
|
||||
'2peter' => '62-2PE.xml',
|
||||
'1john' => '63-1JN.xml',
|
||||
'2john' => '64-2JN.xml',
|
||||
'3john' => '65-3JN.xml',
|
||||
'jude' => '66-JUD.xml',
|
||||
'revelation' => '67-REV.xml',
|
||||
];
|
||||
|
||||
public function getBookXmlFilePath($folder, $book)
|
||||
{
|
||||
$folder = '/'. trim($folder,'/') . '/';
|
||||
|
||||
return storage_path() . $folder . self::$bookXmlFiles[$book];
|
||||
}
|
||||
}
|
|
@ -0,0 +1,130 @@
|
|||
<?php
|
||||
namespace App\Handlers;
|
||||
|
||||
use Illuminate\Support\Facades\URL;
|
||||
/**
|
||||
* LexiconHandler.php
|
||||
*
|
||||
* @author: Leonard Smith <leonard@acornwebconsultants.com>
|
||||
* Date: 9/19/20
|
||||
* Time: 8:56 AM
|
||||
*/
|
||||
class LexiconHandler
|
||||
{
|
||||
use BookXmlFilesTrait;
|
||||
|
||||
public function getEntriesJson($book, $chapter, $verse)
|
||||
{
|
||||
$entries = $this->getEntriesByVerse($book, $chapter, $verse);
|
||||
|
||||
$json = [];
|
||||
foreach ($entries as $id => $entry) {
|
||||
$json[] = self::formatForJson($id, $entry);
|
||||
}
|
||||
return $json;
|
||||
}
|
||||
|
||||
public static function getEntryById($id)
|
||||
{
|
||||
$content = self::getLexicalContent($id);
|
||||
|
||||
return self::formatForJson($id, $content);
|
||||
}
|
||||
|
||||
public function formatForJson($id, $entry)
|
||||
{
|
||||
list($lexeme, $commentary) = $this->parseEntry($entry);
|
||||
|
||||
return [
|
||||
'type' => 'lexical-entries',
|
||||
'id' => $id,
|
||||
'attributes' => [
|
||||
'strongs-number' => $id,
|
||||
'lexeme' => $lexeme,
|
||||
'commentary' => $commentary,
|
||||
],
|
||||
'links' => [
|
||||
'self' => URL::route('lexicon-entry', $id),
|
||||
],
|
||||
];
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract the lexeme and the commentary portion into separate variables
|
||||
*
|
||||
* @param $entry
|
||||
* @return array
|
||||
*/
|
||||
public function parseEntry($entry)
|
||||
{
|
||||
$lexeme = '';
|
||||
$commentary = '';
|
||||
|
||||
$n = 0;
|
||||
foreach(preg_split('~[\r\n]+~', $entry) as $line){
|
||||
if(empty($line) or ctype_space($line)) continue; // skip only spaces
|
||||
|
||||
if ($n === 0) {
|
||||
$lexeme = ltrim($line, '#');
|
||||
} else {
|
||||
$commentary .= $line . "\n";
|
||||
}
|
||||
$n++;
|
||||
}
|
||||
|
||||
return [$lexeme, $commentary];
|
||||
}
|
||||
|
||||
public function getEntriesByVerse($book, $chapter, $verse)
|
||||
{
|
||||
$xmlFile = $this->getBookXmlFile('/ulb/', $book);
|
||||
|
||||
$document = new \DOMDocument;
|
||||
$document->load($xmlFile);
|
||||
|
||||
$verseNodes = $document->getElementsByTagName('verse');
|
||||
|
||||
$entryArray = [];
|
||||
foreach ($verseNodes as $vn) {
|
||||
if ($vn->getAttribute('name') === ucfirst($book) . ' ' . $chapter . ':' . $verse) {
|
||||
$entries = $vn->getElementsByTagName('w');
|
||||
|
||||
foreach ($entries as $entry) {
|
||||
$strongsNumber = $entry->getAttribute('lemma');
|
||||
$entryArray[$strongsNumber] = LexiconHandler::getLexicalContent($strongsNumber);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return $entryArray;
|
||||
}
|
||||
|
||||
public function getLexicalContent($strongsNumber)
|
||||
{
|
||||
$filepath = $this->getFilePath($strongsNumber);
|
||||
|
||||
if (file_exists($filepath)) {
|
||||
$contents = file_get_contents($filepath);
|
||||
} else {
|
||||
$contents = 'CONTENT NOT FOUND: ' . $filepath;
|
||||
}
|
||||
return $contents;
|
||||
}
|
||||
|
||||
public function getFilePath($strongsNumber)
|
||||
{
|
||||
return $this->getFolderName($strongsNumber) . lcfirst($strongsNumber) . '.md';
|
||||
}
|
||||
|
||||
public function getFolderName($strongsNumber)
|
||||
{
|
||||
$intPortion = substr($strongsNumber, 1);
|
||||
$intValue = intval($intPortion);
|
||||
|
||||
$upperValue = ceil($intValue / 10) * 10;
|
||||
$lowerValue = (floor($intValue / 10) * 10) + 1;
|
||||
|
||||
return storage_path() . '/gwt/' . 'g' . $lowerValue . '-' . 'g' . $upperValue . '/';
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,155 @@
|
|||
<?php
|
||||
namespace App\Handlers;
|
||||
|
||||
use Illuminate\Support\Facades\URL;
|
||||
/**
|
||||
* TextMorphologyHandler.php
|
||||
*
|
||||
* @author: Leonard Smith <leonard@acornwebconsultants.com>
|
||||
* Date: 8/18/20
|
||||
* Time: 7:31 PM
|
||||
*/
|
||||
|
||||
class TextMorphologyHandler
|
||||
{
|
||||
use BookXmlFilesTrait;
|
||||
|
||||
public function getVerse($book, $chapter, $verse)
|
||||
{
|
||||
$document = $this->openBookXml($book);
|
||||
|
||||
$verseNodes = $document->getElementsByTagName('verse');
|
||||
|
||||
$words = [];
|
||||
foreach ($verseNodes as $vn) {
|
||||
if ($vn->getAttribute('name') === ucfirst($book) . ' ' . $chapter . ':' . $verse) {
|
||||
$wordNodes = $vn->getElementsByTagName('w');
|
||||
|
||||
foreach ($wordNodes as $w) {
|
||||
$words[] = [
|
||||
'ulb' => $w->nodeValue,
|
||||
'greek' => $w->getAttribute('text'),
|
||||
'morph' => $w->getAttribute('morph'),
|
||||
'ognt-sort' => $w->getAttribute('OGNTsort'),
|
||||
'lexeme' => $w->getAttribute('lexeme'),
|
||||
];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return $words;
|
||||
}
|
||||
|
||||
public function getWordsJson($book, $chapter, $verse)
|
||||
{
|
||||
$words = $this->getVerse($book, $chapter, $verse);
|
||||
|
||||
$json = [];
|
||||
foreach ($words as $word) {
|
||||
$json[] = $this->getWordJson(implode('-', [$book, $chapter, $verse, $word['ognt-sort']]), $word);
|
||||
}
|
||||
|
||||
return json_encode($json);
|
||||
}
|
||||
|
||||
protected function getWordJson($id, $word)
|
||||
{
|
||||
return [
|
||||
'type' => 'words',
|
||||
'id' => $id,
|
||||
'attributes' => $word,
|
||||
'links' => [
|
||||
'self' => URL::route('verse-word', $id),
|
||||
]
|
||||
];
|
||||
}
|
||||
|
||||
public function getWordById($id)
|
||||
{
|
||||
$idParts = explode('-', $id);
|
||||
|
||||
$book = $idParts[0];
|
||||
$chapter = $idParts[1];
|
||||
$verse = $idParts[2];
|
||||
$ogntSort = $idParts[3];
|
||||
|
||||
$words = $this->getVerse($book, $chapter, $verse);
|
||||
|
||||
$str = [];
|
||||
foreach ($words as $word) {
|
||||
$str[] = $word;
|
||||
if ($word['ognt-sort'] == $ogntSort) {
|
||||
return $this->getWordJson($id, $word);
|
||||
}
|
||||
}
|
||||
return "NOT FOUND: $ogntSort\n";
|
||||
}
|
||||
|
||||
protected function openBookXml($book)
|
||||
{
|
||||
$book = strtolower($book);
|
||||
|
||||
$xmlFile = $this->getBookXmlFile('/ulb', $book);
|
||||
|
||||
$document = new \DOMDocument;
|
||||
$document->load($xmlFile);
|
||||
|
||||
return $document;
|
||||
}
|
||||
|
||||
public function getChaptersByBook($book)
|
||||
{
|
||||
$document = $this->openBookXml($book);
|
||||
|
||||
$chapterNodes = $document->getElementsByTagName('chapter');
|
||||
|
||||
$data = [];
|
||||
foreach ($chapterNodes as $node) {
|
||||
$data[] = $this->getChapterDataFromNode($node);
|
||||
}
|
||||
|
||||
return json_encode($data);
|
||||
}
|
||||
|
||||
public function getChapterDataById($id)
|
||||
{
|
||||
|
||||
}
|
||||
|
||||
public function getChapterDataFromNode($node)
|
||||
{
|
||||
return [
|
||||
'chapter' => $this->getChapterNameFromOsisId($node->getAttribute('osisID')),
|
||||
'verses' => $this->getUlbChapterVerses($node),
|
||||
];
|
||||
}
|
||||
|
||||
public function getChapterNameFromOsisId($osisId)
|
||||
{
|
||||
preg_match("|^[A-Za-z1-3]*.([0-9]*)$|", $osisId, $matches);
|
||||
|
||||
return $matches[1];
|
||||
}
|
||||
|
||||
public function getUlbChapterVerses($xmlNode)
|
||||
{
|
||||
$verseNodes = $xmlNode->getElementsByTagName('verse');
|
||||
|
||||
$verses = [];
|
||||
foreach ($verseNodes as $vn) {
|
||||
$verse['number'] = $this->getVerseNumber($vn->getAttribute('name'));
|
||||
$verse['text'] = $vn->getElementsByTagName('ULB')->item(0)->nodeValue;
|
||||
|
||||
$verses[] = $verse;
|
||||
}
|
||||
|
||||
return $verses;
|
||||
}
|
||||
|
||||
protected function getVerseNumber($string)
|
||||
{
|
||||
preg_match("|^[A-Za-z1-3]*\s*[0-9]*:([0-9]*)$|", $string, $matches);
|
||||
|
||||
return $matches[1];
|
||||
}
|
||||
}
|
|
@ -0,0 +1,173 @@
|
|||
<?php
|
||||
namespace App\Handlers;
|
||||
|
||||
use App\Book;
|
||||
use App\Chapter;
|
||||
use App\Word;
|
||||
use App\Verse;
|
||||
use DOMDocument;
|
||||
use DOMElement;
|
||||
/**
|
||||
* UlbXmlImportHandler.php
|
||||
*
|
||||
* @author: Leonard Smith <leonard@acornwebconsultants.com>
|
||||
* Date: 10/2/20
|
||||
* Time: 8:04 AM
|
||||
*/
|
||||
class UlbXmlImportHandler
|
||||
{
|
||||
use BookXmlFilesTrait;
|
||||
|
||||
const REPO_FOLDER = '/ulb/';
|
||||
|
||||
/**
|
||||
* @var array
|
||||
*/
|
||||
protected $availableFiles = [];
|
||||
|
||||
public function run() : void
|
||||
{
|
||||
// Collect available xml files
|
||||
foreach (self::$bookXmlFiles as $name => $filename)
|
||||
{
|
||||
$filepath = $this->getBookXmlFilePath(self::REPO_FOLDER, $name);
|
||||
|
||||
if (file_exists($filepath)) {
|
||||
$this->availableFiles[$name] = $filepath;
|
||||
}
|
||||
}
|
||||
|
||||
$this->loopOverXmlFiles();
|
||||
}
|
||||
|
||||
public function loopOverXmlFiles() : void
|
||||
{
|
||||
foreach ($this->availableFiles as $name => $filepath) {
|
||||
$this->importBook($name, $filepath);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @param string $bookTitle
|
||||
* @param string $filepath
|
||||
* @return Book
|
||||
*/
|
||||
public function importBook(string $bookTitle, string $filepath) : Book
|
||||
{
|
||||
/** @var DOMDocument $document */
|
||||
$document = $this->openBook($filepath);
|
||||
|
||||
$book = Book::create([
|
||||
'name' => $bookTitle,
|
||||
]);
|
||||
|
||||
$chapterCollection = $document->getElementsByTagName('chapter');
|
||||
|
||||
foreach ($chapterCollection as $chapterElem) {
|
||||
$chapter = $this->importChapter($chapterElem);
|
||||
$book->chapters()->save($chapter);
|
||||
}
|
||||
|
||||
return $book;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param DOMElement $chapterElem
|
||||
* @return Chapter
|
||||
*/
|
||||
public function importChapter(DOMElement $chapterElem) : Chapter
|
||||
{
|
||||
$chapter = Chapter::create([
|
||||
'name' => $this->parseChapterName($chapterElem),
|
||||
]);
|
||||
|
||||
$verseCollection = $chapterElem->getElementsByTagName('verse');
|
||||
|
||||
foreach ($verseCollection as $verseElem) {
|
||||
$verse = $this->importVerse($verseElem);
|
||||
$chapter->verses()->save($verse);
|
||||
}
|
||||
|
||||
return $chapter;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param DOMElement $verseElem
|
||||
* @return Verse
|
||||
*/
|
||||
public function importVerse(DOMElement $verseElem) : Verse
|
||||
{
|
||||
$verse = Verse::create([
|
||||
'name' => $this->parseVerseNumber($verseElem),
|
||||
'greek_text' => $verseElem->getElementsByTagName('Greek')[0]->nodeValue,
|
||||
'ulb_text' => $verseElem->getElementsByTagName('ULB')[0]->nodeValue,
|
||||
]);
|
||||
|
||||
$wordCollection = $verseElem->getElementsByTagName('w');
|
||||
|
||||
foreach ($wordCollection as $wordElem) {
|
||||
$word = $this->importWord($wordElem);
|
||||
$verse->words()->save($word);
|
||||
}
|
||||
|
||||
return $verse;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param DOMElement $wordElem
|
||||
* @return Word
|
||||
*/
|
||||
public function importWord(DOMElement $wordElem) : Word
|
||||
{
|
||||
// NOTE: We have to switch thins around a bit as the incoming XML file
|
||||
// use lexeme for lemma and lemma for the strongs number
|
||||
|
||||
$word = Word::create([
|
||||
'ulb' => $wordElem->nodeValue,
|
||||
'greek' => $wordElem->getAttribute('text'),
|
||||
'lemma' => $wordElem->getAttribute('lexeme'),
|
||||
'morph' => $wordElem->getAttribute('morph'),
|
||||
'ognt_sort' => $wordElem->getAttribute('OGNTsort'),
|
||||
'strongs_number' => $wordElem->getAttribute('lemma'),
|
||||
]);
|
||||
|
||||
return $word;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param DOMElement $chapterElem
|
||||
* @return string
|
||||
*/
|
||||
protected function parseChapterName(DOMElement $chapterElem) : string
|
||||
{
|
||||
$osisId = $chapterElem->getAttribute('osisID');
|
||||
|
||||
preg_match("|^[A-Za-z1-3]*.([0-9]*)$|", $osisId, $matches);
|
||||
|
||||
return $matches[1];
|
||||
}
|
||||
|
||||
/**
|
||||
* @param DOMElement $verseElem
|
||||
* @return string
|
||||
*/
|
||||
protected function parseVerseNumber(DOMElement $verseElem) : string
|
||||
{
|
||||
$string = $verseElem->getAttribute('name');
|
||||
preg_match("|^[A-Za-z1-3]*\s*[0-9]*:([0-9]*)$|", $string, $matches);
|
||||
|
||||
return $matches[1];
|
||||
}
|
||||
|
||||
/**
|
||||
* @param $filepath
|
||||
* @return DOMDocument
|
||||
*/
|
||||
protected function openBook($filepath) : DOMDocument
|
||||
{
|
||||
$document = new \DOMDocument;
|
||||
$document->load($filepath);
|
||||
|
||||
return $document;
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue