
155 lines
4.5 KiB

* ugntHelpers.js - this is the code called by ugntParse.js to download and convert greek resources (was BHP and now
import path from 'path-extra';
import fs from 'fs-extra';
const outputPath = path.join(__dirname, '../resources/en/lexicons/uhl');
* @description - generates UGNT for each book from github and split into chapters and saves under version.
* @param {function} resolve - callback when finished
export async function generateStrongsFiles(version) {
const versionPath = path.join(outputPath, version);
const inputPath = path.join(__dirname, '../HebrewStrong.xml');
const hebrewStrongs = fs.readFileSync(inputPath).toString();
const words = hebrewStrongs.split('<entry id="');
const index = [];
for (let i = 1, len = words.length; i < len; i++) {
const item = words[i];
let parts = item.split('"');
let strongsCode = parts[0];
const strongsNum = strongsCode.substr(1);
const word = getXmlTag(item, 'w');
const source = getXmlTag(item, 'source');
let meaning = getXmlTag(item, 'meaning');
const usage = getXmlTag(item, 'usage');
const def = getXmlTag(meaning.content, 'def');
meaning.content = replaceTag(meaning.content, 'def', '"' + def.content + '"');
let definition = "";
definition = addContent(meaning, definition, 'Meaning');
definition = addContent(usage, definition, 'Usage');
definition = addContent(source, definition, 'Source');
console.log(strongsNum + " definition= " + definition);
if (definition.indexOf("<") >= 0) {
assert.fail("should not have xml: " + definition);
const entry = {
brief: def.content,
long: definition
const filePath = path.join(versionPath, 'content', strongsNum + ".json");
fs.outputJsonSync(filePath, entry);
const indexEntry = {
id: strongsCode,
name: word.content
const filePath = path.join(versionPath, "index.json");
fs.outputJsonSync(filePath, index);
console.log(`Finished Parsing Strongs`);
function getXmlTag(text, tag) {
let startTag = '<' + tag + ' ';
let attr = "";
let content = "";
let startPos = text.indexOf(startTag);
if (startPos < 0) {
startTag = '<' + tag + '>';
startPos = text.indexOf(startTag);
if (startPos >= 0) {
startPos += startTag.length;
} else {
const endStart = text.indexOf('>', startPos);
attr = text.substring(startPos + startTag.length, endStart);
startPos = endStart + 1;
if (startPos >= 0) {
const endTag = '</' + tag + '>';
const endPos = text.indexOf(endTag, startPos);
content = text.substring(startPos, endPos);
return { content, attr };
function getAttr(word, attr) {
let src;
const parts = word.attr.split(' ');
for (let i = 0, len = parts.length; i < len; i++) {
const part = parts[i];
if (part.startsWith(attr + '="')) {
src = part.substring(attr.length + 2, part.length - 1);
return src;
function addContent(source, definition, label) {
if (source.content) {
let content = source.content;
while(1) {
const word = getXmlTag(content, 'w');
if (!word.content) {
let src = getAttr(word, 'src');
if (src) {
content = replaceTag(content, 'w', '"' + src + '"');
} else {
content = replaceTag(content, 'w', '"' + word.content + '"');
while(1) {
const word = getXmlTag(content, 'def');
if (!word.content) {
content = replaceTag(content, 'def', '"' + word.content + '"');
while(1) {
const word = getXmlTag(content, 'note');
if (!word.content) {
content = replaceTag(content, 'note', '');
if (definition) {
let trimmed = definition.trim();
const last = trimmed.substr(-1);
if (last === '.') {
definition = trimmed + ' ';
} else if ([',', ';', ':'].includes(last)) {
definition = trimmed.slice(0, -1) + '. ';
} else {
definition = trimmed + '. ';
definition += label + ': ' + content + ' ';
return definition;
function replaceTag(source, tag, replace) {
const startPos = source.indexOf('<' + tag);
if (startPos >= 0) {
let endTag = '</' + tag + '>';
const endPos = source.indexOf(endTag, startPos);
source = source.substring(0, startPos) + replace + source.substring(endPos + endTag.length);
return source;