2017-10-31 02:46:39 +00:00
|
|
|
#version 0.2
|
2017-08-16 16:51:31 +00:00
|
|
|
#by John Wood - for Tech Advance
|
|
|
|
# This script reads through the monolithic .md files in a DokuWiki copy of OBS and splits them out into chunks in folders
|
|
|
|
# named like the .md files. The chunks are named 1.txt to n.txt where `n` is the last chunk. The folders and files are
|
|
|
|
# automatically named correctly. Because the script doesn't generate a manifest.json I create a project in translationStudio
|
|
|
|
# and then merge the folders created by this script into the translation folder created by translationStudio.
|
|
|
|
|
2017-10-31 02:46:39 +00:00
|
|
|
# Usage: python convert.py <path to DokuWiki OBS files>
|
|
|
|
|
|
|
|
# Import necessary python components
|
2017-08-16 16:44:17 +00:00
|
|
|
import os
|
2017-10-31 02:46:39 +00:00
|
|
|
# os is used for file system commands
|
2017-08-16 16:44:17 +00:00
|
|
|
import re
|
2017-10-31 02:46:39 +00:00
|
|
|
# re is used for regular expressions
|
|
|
|
import sys
|
|
|
|
# sys is used for command line arguments
|
|
|
|
import shutil
|
|
|
|
# shutil is high-level file operations
|
|
|
|
|
|
|
|
from subprocess import call
|
|
|
|
# to fork for git
|
|
|
|
|
|
|
|
# print "Starting the conversion process"
|
2017-08-16 16:51:31 +00:00
|
|
|
|
2017-10-31 02:46:39 +00:00
|
|
|
program_name=sys.argv[0];
|
|
|
|
arguments=sys.argv[1:];
|
|
|
|
count_args=len(arguments);
|
|
|
|
if count_args !=1 :
|
|
|
|
# print "Usage: convert old_dir"
|
|
|
|
sys.exit(1);
|
|
|
|
|
|
|
|
convertdir = sys.argv[1];
|
|
|
|
projectinfo = convertdir.split("_");
|
|
|
|
oldpath=projectinfo[0];
|
|
|
|
path=oldpath.split("/");
|
|
|
|
language=path[len(path)-1];
|
|
|
|
book=projectinfo[1];
|
|
|
|
|
|
|
|
#projectType=projectinfo[2];
|
|
|
|
if len(projectinfo)!=2:
|
|
|
|
print"This may not be a DokuWiki OBS project, and I'm not sure how to handle it";
|
|
|
|
sys.exit(1)
|
|
|
|
elif book=="obs":
|
|
|
|
# munge the filename to get the name of the resulting folder.
|
|
|
|
targetpath=convertdir+"_text_obs"
|
|
|
|
|
|
|
|
# print "The language is "+language;
|
|
|
|
# print "Source directory is "+convertdir;
|
|
|
|
# print "Target directory is "+targetpath;
|
|
|
|
|
|
|
|
# Create the manifest file
|
|
|
|
|
|
|
|
for filename in os.listdir(convertdir+"/content/"):
|
2017-08-16 16:51:31 +00:00
|
|
|
#run the script in the target directory
|
2017-10-31 02:46:39 +00:00
|
|
|
# Check for special folders first
|
|
|
|
if (filename=="front"):
|
|
|
|
if not os.path.exists(targetpath+"/front"):
|
|
|
|
os.makedirs(targetpath+"/front")
|
|
|
|
for filename2 in os.listdir(convertdir+"/content/"+filename+"/"):
|
|
|
|
shutil.copyfile(convertdir+"/content/"+filename+"/"+filename2,targetpath+"/front/"+filename2.replace(".md",".txt"));
|
|
|
|
# Else, if the filename is a MarkDown file
|
|
|
|
elif filename.endswith(".md"):
|
2017-08-16 16:44:17 +00:00
|
|
|
filenum = 00;
|
2017-10-31 02:46:39 +00:00
|
|
|
# We start by making a folder/directory matching the name of the Markdown file
|
2017-08-16 16:44:17 +00:00
|
|
|
newpath = filename.replace(".md","");
|
2017-10-31 02:46:39 +00:00
|
|
|
newpath=targetpath+"/"+newpath;
|
2017-08-16 16:44:17 +00:00
|
|
|
if not os.path.exists(newpath):
|
|
|
|
os.makedirs(newpath)
|
|
|
|
|
2017-10-31 02:46:39 +00:00
|
|
|
worksite = convertdir + "/content/";
|
|
|
|
filename=worksite+filename;
|
|
|
|
|
|
|
|
# Then, opening the file to scan it
|
2017-08-16 16:44:17 +00:00
|
|
|
with open(filename) as mdfile:
|
2017-10-31 02:46:39 +00:00
|
|
|
# Parsing the file by line
|
2017-08-16 16:44:17 +00:00
|
|
|
for line in mdfile:
|
2017-10-31 02:46:39 +00:00
|
|
|
# print "Working with line: "+line.strip("\s")
|
|
|
|
# Lines with ![Image are image references not needed in the translationStudio project
|
|
|
|
if re.match('\!\[Image|\[\[https',line):
|
|
|
|
# print " Found a line matching ![Image"
|
2017-08-16 16:44:17 +00:00
|
|
|
filenum = filenum + 1
|
2017-10-31 02:46:39 +00:00
|
|
|
# print " Filenumber is "+format(filenum)
|
2017-08-16 16:44:17 +00:00
|
|
|
elif re.match("#",line):
|
2017-10-31 02:46:39 +00:00
|
|
|
# print " Found a line Matching # -- this is a Title"
|
|
|
|
# print " Writing to "+newpath;
|
|
|
|
myTitle = newpath + "/title.txt"
|
2017-08-16 16:44:17 +00:00
|
|
|
with open(myTitle, "a+") as newfile:
|
2017-10-31 02:46:39 +00:00
|
|
|
# We write the same line to the new file, but erase the hashes (#)
|
2017-08-16 16:44:17 +00:00
|
|
|
newfile.write(line.replace("#",""));
|
|
|
|
elif re.match("_",line):
|
2017-10-31 02:46:39 +00:00
|
|
|
# Lines with underscores are the references
|
|
|
|
myRef = newpath + "/reference.txt"
|
2017-08-16 16:44:17 +00:00
|
|
|
with open(myRef, "a+") as newfile:
|
2017-10-31 02:46:39 +00:00
|
|
|
#we write the same line to the new file, but erase the underscores
|
2017-08-16 16:44:17 +00:00
|
|
|
newfile.write(line.replace("_",""));
|
2017-10-31 02:46:39 +00:00
|
|
|
# Only process lines that aren't blank
|
|
|
|
elif not line.strip("\s")=="\n":
|
|
|
|
# print "Line "+'"'+line.strip("\s")+'"'+" is blank"
|
|
|
|
#else:
|
|
|
|
# print " The current file number is "+format(filenum)
|
|
|
|
if filenum==0: filenum=1
|
|
|
|
newFileName = "{:0>2d}".format(filenum)+".txt";
|
|
|
|
# print " Writing the text file - "+newFileName
|
|
|
|
myNewFile = newpath + "/"+newFileName
|
2017-08-16 16:44:17 +00:00
|
|
|
with open(myNewFile, "a+") as newfile:
|
2017-10-31 02:46:39 +00:00
|
|
|
newfile.write(line)
|
|
|
|
|
|
|
|
print "Generating the manifest"
|
|
|
|
with open(convertdir+"/manifest.yaml") as manfile:
|
|
|
|
with open(targetpath+"/manifest.json","a+") as newmanfile:
|
|
|
|
for manline in manfile:
|
|
|
|
title_match = re.search(r"^ title: (.+)",manline)
|
|
|
|
direction_match = re.search(r" direction: (\w+)",manline)
|
|
|
|
modified_date_match= re.search(r" modified: '(\d\d\d\d-\d\d-\d\d)'",manline)
|
|
|
|
if modified_date_match:
|
|
|
|
modified_date=modified_date_match.group(1);
|
|
|
|
elif title_match:
|
|
|
|
target_language_name=title_match.group(1);
|
|
|
|
elif direction_match:
|
|
|
|
target_direction = direction_match.group(1);
|
|
|
|
|
|
|
|
newmanfile.write('\n'.join([
|
|
|
|
'{',
|
|
|
|
' "package_version": 6,',
|
|
|
|
' "format": "markdown",',
|
|
|
|
' "generator": {',
|
|
|
|
' "name": "ts-desktop",',
|
|
|
|
' "build": "132"',
|
|
|
|
' },',
|
|
|
|
' "target_language": {',
|
|
|
|
' "id": "'+language+'",',
|
|
|
|
' "name": "'+target_language_name+'",',
|
|
|
|
' "direction": "'+target_direction+'"',
|
|
|
|
' },',
|
|
|
|
' "project": {',
|
|
|
|
' "id": "obs",',
|
|
|
|
' "name": "Open Bible Stories"',
|
|
|
|
' },',
|
|
|
|
' "type": {',
|
|
|
|
' "id": "text",',
|
|
|
|
' "name": "Text"',
|
|
|
|
' },',
|
|
|
|
' "resource": {',
|
|
|
|
' "id": "obs",',
|
|
|
|
' "name": "Open Bible Stories"',
|
|
|
|
' },',
|
|
|
|
' "source_translations": [',
|
|
|
|
' {',
|
|
|
|
' "language_id": "en",',
|
|
|
|
' "resource_id": "obs",',
|
|
|
|
' "checking_level": "3",',
|
|
|
|
' "date_modified": "'+modified_date+'",',
|
|
|
|
' "version": "4"',
|
|
|
|
' }',
|
|
|
|
' ],',
|
|
|
|
' "parent_draft": {},',
|
|
|
|
' "translators": [],',
|
|
|
|
' "finished_chunks": []',
|
|
|
|
'}']));
|
|
|
|
|
|
|
|
#gitCommand = "/usr/bin/git init "+targetpath;
|
|
|
|
#call("/usr/bin/git init", targetpath)
|