Tips_and_Hacks/convert DokuWiki/convert.py

150 lines
6.1 KiB
Python
Executable File

#convert.py
#version 0.3
# by John Wood -- for Tech Advance
# This script reads through the monolithic .md files in a DokuWiki copy of OBS and splits them
# out into chunks in folders named like the .md files. The chunks are named 01.txt to nn.txt
# where `nn` is the last chunk.
# The folders and files are automatically named correctly. The script now creates a
# manifest.json file as well.
# Usage: python convert.py <path to DokuWiki OBS files>
#Import necessary python components
import os # file system commands
import re # regular expressions
import sys # command line arguments
import shutil # high level file operations
from subprocess import call # to fork for git
program_name=sys.argv[0]
arguments=sys.argv[1:]
count_args=len(arguments)
if count_args !=1 : #If there is not exactly one argument, fail with a usage remark.
print ("convert.py script to convert DokuWiki OBS to translationStudio format")
print ("Usage: python converty.py <old directory>")
sys.exit(1)
convertdir=sys.argv[1]
projectinfo=convertdir.split("_") # splitting the argument on undescores will give us the
# information about the OBS project
oldpath=projectinfo[0] # The first element of the argument is the path to the old project
path=oldpath.split("/") # we can then learn more about the project from its path
language=path[len(path)-1] # The last element in the path is the language code. We have to
# subtract one because the first element is 0, rather than 1
book=projectinfo[1] # The book name, then, should be the second part of the project name
if len(projectinfo)!=2: # tS projects look like lll_obs_text_obs rather than lll_obs
print("This may not be a DokuWiki OBS project and I'm not sure how to handle it")
sys.exit(1)
elif book=="obs":
targetpath=convertdir+"_text_obs" #this gets the target name into the right format
worksite=convertdir+"/content/"
for filename in os.listdir(worksite): #the actual content is in a subdirectory
if(filename=="front"):
if not os.path.exists(targetpath+"/front"):
os.makedirs(targetpath+"/front")
for filename2 in os.listdir(worksite+filename+"/"):
shutil.copyfile(worksite+filename+"/"+filename2, targetpath+"/front/"+filename2.replace(".md",".txt"))
#copy the file to the new location, changing its extension to .txt
elif filename.endswith(".md"): # all other files we deal with are MarkDown files
filenum=01
# We start by making a folder/directory matching the name of the md file
newpath=filename.replace(".md","")
newpath=targetpath+"/"+newpath
if not os.path.exists(newpath):
os.makedirs(newpath)
filename=worksite+filename
with open(filename) as mdfile: #we open the old md to scan it
for line in mdfile:
if re.match("\!\[Image\]",line):
#line is an image: increment the file counter
newFileName = newpath + "/{:0>2d}".format(filenum)+".txt"
filenum = filenum+1
writeLine=""
elif re.match(r'\[\[https',line):
#line is an image: increment the file counter
newFileName = newpath + "/{:0>2d}".format(filenum)+".txt"
filenum = filenum+1
writeLine=""
elif re.match("_",line):
#lines with underscores are the references
newFileName = newpath+"/reference.txt"
writeLine=line.replace("_","")
elif re.match("#",line):
#matching title
newFileName = newpath+"/title.txt"
writeLine=line.replace("#","")
elif not line.strip("\s")=="\n":
# checking that the line isn't blank
writeLine=line
elif line.strip("\s")=="\n":
writeLine=""
with open(newFileName, "a+") as newfile:
newfile.write(writeLine)
newfile.close()
with open(convertdir+"/manifest.yaml") as manfile:
with open(targetpath+"/manifest.json","a+") as newmanfile:
for manline in manfile:
title_match = re.search(r"^ title: (.+)",manline)
direction_match = re.search(r" direction: (\w+)",manline)
modified_date_match= re.search(r" modified: '(\d\d\d\d-\d\d-\d\d)'",manline)
if modified_date_match:
modified_date=modified_date_match.group(1)
elif title_match:
target_language_name=title_match.group(1)
elif direction_match:
target_direction = direction_match.group(1)
newmanfile.write('\n'.join([
'{',
' "package_version": 6,',
' "format": "markdown",',
' "generator": {',
' "name": "ts-desktop",',
' "build": "132"',
' },',
' "target_language": {',
' "id": "'+language+'",',
' "name": "'+target_language_name+'",',
' "direction": "'+target_direction+'"',
' },',
' "project": {',
' "id": "obs",',
' "name": "Open Bible Stories"',
' },',
' "type": {',
' "id": "text",',
' "name": "Text"',
' },',
' "resource": {',
' "id": "obs",',
' "name": "Open Bible Stories"',
' },',
' "source_translations": [',
' {',
' "language_id": "en",',
' "resource_id": "obs",',
' "checking_level": "3",',
' "date_modified": "'+modified_date+'",',
' "version": "4"',
' }',
' ],',
' "parent_draft": {},',
' "translators": [],',
' "finished_chunks": []',
'}']))
os.chdir(targetpath)
call(["git","init"])
call(["git","add","."])
call(["git","commit","-m Initial commit"])
print ("New project written in "+targetpath)