#convert.py #version 0.3 # by John Wood -- for Tech Advance # This script reads through the monolithic .md files in a DokuWiki copy of OBS and splits them # out into chunks in folders named like the .md files. The chunks are named 01.txt to nn.txt # where `nn` is the last chunk. # The folders and files are automatically named correctly. The script now creates a # manifest.json file as well. # Usage: python convert.py #Import necessary python components import os # file system commands import re # regular expressions import sys # command line arguments import shutil # high level file operations from subprocess import call # to fork for git program_name=sys.argv[0] arguments=sys.argv[1:] count_args=len(arguments) if count_args !=1 : #If there is not exactly one argument, fail with a usage remark. print ("convert.py script to convert DokuWiki OBS to translationStudio format") print ("Usage: python converty.py ") sys.exit(1) convertdir=sys.argv[1] projectinfo=convertdir.split("_") # splitting the argument on undescores will give us the # information about the OBS project oldpath=projectinfo[0] # The first element of the argument is the path to the old project path=oldpath.split("/") # we can then learn more about the project from its path language=path[len(path)-1] # The last element in the path is the language code. We have to # subtract one because the first element is 0, rather than 1 book=projectinfo[1] # The book name, then, should be the second part of the project name if len(projectinfo)!=2: # tS projects look like lll_obs_text_obs rather than lll_obs print("This may not be a DokuWiki OBS project and I'm not sure how to handle it") sys.exit(1) elif book=="obs": targetpath=convertdir+"_text_obs" #this gets the target name into the right format worksite=convertdir+"/content/" for filename in os.listdir(worksite): #the actual content is in a subdirectory if(filename=="front"): if not os.path.exists(targetpath+"/front"): os.makedirs(targetpath+"/front") for filename2 in os.listdir(worksite+filename+"/"): shutil.copyfile(worksite+filename+"/"+filename2, targetpath+"/front/"+filename2.replace(".md",".txt")) #copy the file to the new location, changing its extension to .txt elif filename.endswith(".md"): # all other files we deal with are MarkDown files filenum=01 # We start by making a folder/directory matching the name of the md file newpath=filename.replace(".md","") newpath=targetpath+"/"+newpath if not os.path.exists(newpath): os.makedirs(newpath) filename=worksite+filename with open(filename) as mdfile: #we open the old md to scan it for line in mdfile: if re.match("\!\[Image\]",line): #line is an image: increment the file counter newFileName = newpath + "/{:0>2d}".format(filenum)+".txt" filenum = filenum+1 writeLine="" elif re.match(r'\[\[https',line): #line is an image: increment the file counter newFileName = newpath + "/{:0>2d}".format(filenum)+".txt" filenum = filenum+1 writeLine="" elif re.match("_",line): #lines with underscores are the references newFileName = newpath+"/reference.txt" writeLine=line.replace("_","") elif re.match("#",line): #matching title newFileName = newpath+"/title.txt" writeLine=line.replace("#","") elif not line.strip("\s")=="\n": # checking that the line isn't blank writeLine=line elif line.strip("\s")=="\n": writeLine="" with open(newFileName, "a+") as newfile: newfile.write(writeLine) newfile.close() with open(convertdir+"/manifest.yaml") as manfile: with open(targetpath+"/manifest.json","a+") as newmanfile: for manline in manfile: title_match = re.search(r"^ title: (.+)",manline) direction_match = re.search(r" direction: (\w+)",manline) modified_date_match= re.search(r" modified: '(\d\d\d\d-\d\d-\d\d)'",manline) if modified_date_match: modified_date=modified_date_match.group(1) elif title_match: target_language_name=title_match.group(1) elif direction_match: target_direction = direction_match.group(1) newmanfile.write('\n'.join([ '{', ' "package_version": 6,', ' "format": "markdown",', ' "generator": {', ' "name": "ts-desktop",', ' "build": "132"', ' },', ' "target_language": {', ' "id": "'+language+'",', ' "name": "'+target_language_name+'",', ' "direction": "'+target_direction+'"', ' },', ' "project": {', ' "id": "obs",', ' "name": "Open Bible Stories"', ' },', ' "type": {', ' "id": "text",', ' "name": "Text"', ' },', ' "resource": {', ' "id": "obs",', ' "name": "Open Bible Stories"', ' },', ' "source_translations": [', ' {', ' "language_id": "en",', ' "resource_id": "obs",', ' "checking_level": "3",', ' "date_modified": "'+modified_date+'",', ' "version": "4"', ' }', ' ],', ' "parent_draft": {},', ' "translators": [],', ' "finished_chunks": []', '}'])) os.chdir(targetpath) call(["git","init"]) call(["git","add","."]) call(["git","commit","-m Initial commit"]) print ("New project written in "+targetpath)