#version 0.1 #by John Wood - for Tech Advance # This script reads through the monolithic .md files in a DokuWiki copy of OBS and splits them out into chunks in folders # named like the .md files. The chunks are named 1.txt to n.txt where `n` is the last chunk. The folders and files are # automatically named correctly. Because the script doesn't generate a manifest.json I create a project in translationStudio # and then merge the folders created by this script into the translation folder created by translationStudio. import os import re # Import necessary python components for filename in os.listdir("."): #run the script in the target directory if filename.endswith(".md"): filenum = 00; newpath = filename.replace(".md",""); # munge the filename to get the name of the resulting folder. newpath = "/Users/jdwood/Downloads/tpi_obs/content/" + newpath; # Right now the path is hard coded. This should be changed. Change this before using the script or you're likely # to have problems. if not os.path.exists(newpath): os.makedirs(newpath) newpath = newpath + "/"; with open(filename) as mdfile: print "Working with " + filename + ".\n" # Simple comment line. for line in mdfile: if re.match("\!\[OBS",line): filenum = filenum + 1 elif re.match("#",line): myTitle = newpath + "title.txt" with open(myTitle, "a+") as newfile: newfile.write(line.replace("#","")); elif re.match("_",line): myRef = newpath + "reference.txt" with open(myRef, "a+") as newfile: newfile.write(line.replace("_","")); else: myNewFile = newpath + "{:0>2d}".format(filenum) + ".txt" with open(myNewFile, "a+") as newfile: newfile.write(line)