#version 0.1
#by John Wood - for Tech Advance
# This script reads through the monolithic .md files in a DokuWiki copy of OBS and splits them out into chunks in folders
# named like the .md files. The chunks are named 1.txt to n.txt where `n` is the last chunk. The folders and files are
# automatically named correctly. Because the script doesn't generate a manifest.json I create a project in translationStudio
# and then merge the folders created by this script into the translation folder created by translationStudio.

import os
import re
# Import necessary python components

for filename in os.listdir("."):
  #run the script in the target directory
    if filename.endswith(".md"):
        filenum = 00;
        newpath = filename.replace(".md","");
        # munge the filename to get the name of the resulting folder.
        newpath = "/Users/jdwood/Downloads/tpi_obs/content/" + newpath;
        # Right now the path is hard coded. This should be changed. Change this before using the script or you're likely
        # to have problems.
        if not os.path.exists(newpath):
            os.makedirs(newpath)
            
        newpath = newpath + "/";
        with open(filename) as mdfile:
            print "Working with " + filename + ".\n"
            # Simple comment line.
            for line in mdfile:
                if re.match("\!\[OBS",line):
                    filenum = filenum + 1
                elif re.match("#",line):
                    myTitle = newpath + "title.txt"
                    with open(myTitle, "a+") as newfile:
                        newfile.write(line.replace("#",""));
                elif re.match("_",line):
                    myRef = newpath + "reference.txt"
                    with open(myRef, "a+") as newfile:
                        newfile.write(line.replace("_",""));
                else:
                    myNewFile = newpath + "{:0>2d}".format(filenum) + ".txt"
                    with open(myNewFile, "a+") as newfile:
                        newfile.write(line)