# -*- coding: utf-8 -*-

import string
import codecs
import xml.dom.minidom
import sys
import re
import MySQLdb
import os

# replace ASCII in Greek source files with UTF characters 
def utf_replace(t,replacements,eng):

   t = t.strip()
   
   # replacements is None if text_db is being used (i.e., if text is being formatted for db)
   if eng or replacements == None:
      codes = html_code_replacements()
      for i in codes:
         t = t.replace(i[1],i[2])
      return t
   for k, v in replacements:
      t = t.replace(k, v)

   # final sigma fix:
   t = t.replace(u'\u03C2',u'\u03C3')
   t = re.sub(ur"\u03C3([<\s,;\.;\:—\"\'])", ur"\u03C2\1", t, re.UNICODE)
   t = re.sub(ur"\u03C3$", ur"\u03C2", t, re.UNICODE)
   return t 

# returns a list of replacements for Greek source files
def define_utf_replacements():
   replacements = []
   f = codecs.open("./greek-unicode-table.txt","r","utf-8")
   for s in f:
      elements = string.split(s,"%")
      if len(elements) == 2:
         replacements.append([elements[0], elements[1][:-2] ])

   replacements.sort(key = lambda x: len(x[0]), reverse = True)

   html_codes = html_code_replacements()
   for i in html_codes:
      replacements.insert(0,[i[1],i[2]]) 

   return replacements

# reformats source XML using HTML
def get_milestone(node,title):
 
          if node.attributes:
             for i in range(node.attributes.length):
                a = node.attributes.item(i)

             try: 
                attr = [node.attributes["ed"].value, node.attributes["unit"].value] 
                if attr == ["P","para"]:
                   return '<br /><br /><!-- paragraph -->\n' 
                if attr == ["P","Loeb chap"]:
                   if title == "Metaphysics":
                      return ' ' + node.attributes["n"].value + ' '
                   else:
                      return '<br /><br /><!-- paragraph -->\n' + node.attributes["n"].value + ' '
                else:
                   return ""
             
             except:
                pass 
             try: 
                attr = [node.attributes["unit"].value, node.attributes["n"].value] 
                if attr[0] == "section":
                   if title == "Rhetoric":
                       try:
                           sect = int(attr[1])
                           if (sect < 100):
                               return '<br /><br />' + str(sect) + '. '
                       except:
                           return '\n<div class="milestone">' + attr[1] + "</div>\n" 
                   else:
                      return '\n<div class="milestone">' + attr[1] + "</div>\n" 
                if attr[0] == "chapter":
                   return '<br /><br /><b>Chapter ' + attr[1] + '</b><br /><br />'
                else:
                   return ""
             except:
                pass 

          return "" 

# parses and re-formats an XML node from a source file
def get_text(title,text,node2,replacements,eng):

       if node2.nodeName == "milestone":
          text.append(get_milestone(node2,title))
       if node2.nodeName == "sp":
          for node3 in node2.childNodes:
             if node3.nodeName == "speaker":
                text.append('<div class="speaker">' 
                   + utf_replace(node3.firstChild.toxml(),replacements,eng) + ":</div>\n")
             if node3.nodeName == "p":
                for node4 in node3.childNodes:
                   if node4.nodeName == "milestone":
                      text.append(get_milestone(node4,title))
                   if node4.nodeType == node2.TEXT_NODE:
                      text.append('<div class="text">' + utf_replace(node4.toxml(),replacements,eng) + "</div>\n")
             if node3.nodeName == "milestone":
                text.append(get_milestone(node3,title))
       if node2.nodeName == "p":
          for node4 in node2.childNodes:
             if node4.nodeName == "milestone":
                text.append(get_milestone(node4,title))
             if node4.nodeType == node2.TEXT_NODE:
                text.append('<div class="text">' + utf_replace(node4.toxml(),replacements,eng) + "</div>\n")

       
       return ""

# converts abbreviations to titles
def get_title(abbrev,eng=0):
   title = {

   "Euthyph.":["Εὐθύφρων","Euthyphro"],
   "Apol.":["Α᾿πολογία Σωκράτους","Apology"],
   "Crito":["Κρίτων","Crito"],
   "Phaedo":["Φαίδων","Phaedo"],
   "Crat.":["Κρατύλος","Cratylus"],
   "Theaet.":["Θεαίτητος","Theaetetus"],
   "Soph.":["Σοφιστής","Sophist"],
   "Stat.":["Πολιτικός","Statesman"],
   "Parm.":["Παρμενίδης","Parmenides"],
   "Phileb.":["Φίληβος","Philebus"],
   "Sym.":["Συμποσίον","Symposium"],
   "Phaedrus":["Φαῖδρος","Phaedrus"],
   "Alc. 1":["Ἀλκιβιάδης α","Alcibiades 1"],
   "Alc. 2":["Ἀλκιβιάδης β","Alcibiades 2"],
   "Hipparch.":["Ἱππάρχος","Hipparchus"],
   "Lovers":["Ε᾿ρασταί","Lovers"],
   "Theag.":["Θεάγης","Theages"],
   "Charm.":["Χαρμίδης","Charmides"],
   "Lach.":["Λάχης","Laches"],
   "Lysis":["Λύσις","Lysis"],
   "Hipp. Maj.":["Ἱππίας μειζών","Greater Hippias"],
   "Hipp. Min.":["Ἱππίας Ε᾿λαττών","Lesser Hippias"],
   "Ion":["Ἴων","Ion"],
   "Menex.":["Μενέξενος","Menexenus"],
   "Cleit.":["Κλειτόφων","Cleitophon"],
   "Tim.":["Τίμαιος","Timaeus"],
   "Criti.":[" Κριτίας","Critias"],
   "Minos":["Μίνως","Minos"],
   "Epin.":["Ε᾿πίνομις","Epinomis"],

   "Euthyd.":["Εὐθύδημος","Euthydemus"],
   "Prot.":["Πρωταγόρας","Protagoras"],
   "Gorg.":["Γοργίας","Gorgias"],
   "Meno":["Μένων","Meno"],

   "Laws":["Νόμοι","Laws"],
   "Republic":["Πολιτεία","Republic"],

   "Metaphysics":["Μεταφυσικά","Metaphysics"],
   "Rhetoric":["Ρητορική","Rhetoric"],
   "Politics":["Πολιτικά","Politics"],
                       
   "Eudemian Ethics":["Ηθικά Ευδήμεια","Eudemian Ethics"],
   "Nicomachean Ethics":["Ἠθικὰ Νικομάχεια","Nicomachean Ethics"]

   }

   try: 
      full_title = title[abbrev] 
   except:
      return "RTT"

   if eng:
      return full_title[1]

   html_index.append([full_title[0],full_title[1]])

   return full_title[1] 

# improves layout and whitespace; called just before file is saved
def clean_text(text_string):

   # to fix multiple line breaks created by adjacent "P" and "Bekker" milestones at e.g. start of Metaphysics
   text_string = text_string.replace('<br /><br /><br />','<br />') 

   # for e.g. Eud. Ethics
   text_string = re.sub(r'(\d+\.\d+)\s*<br /><br /><!-- paragraph -->\s*<div class="text">',r'\1\n<div class="text">',text_string)

   # remove blank line caused by paragraph line breaks before a (text) div
   text_string = re.sub(r'(<div class="milestone">[^<]*</div>)\s*<br /><br />\s*<\!-- paragraph -->',r"\1",text_string)

   # for e.g. Metaphysics 980a:
   text_string = re.sub(r'(<div\s+class="milestone">[^<]*</div>\s*)<br /><br />\s*<\!-- paragraph -->',r"\1",text_string)

   r = re.compile('<div class="milestone">.*?<.div>',re.DOTALL)
   m = re.findall(r,text_string)
   for i in m:
      count = string.count(text_string,i)
      if count > 1:
         # from http://www.gossamer-threads.com/lists/python/dev/705513
         tmp = 'TMP-XUEOSOEKE'
         text_string = text_string.replace(i,tmp,1).replace(i,'').replace(tmp,i)

   # for e.g. Prot. 315b
   text_string = text_string.replace(u'““','&ldquo;')
   text_string = text_string.replace(u'””','&rdquo;')

   text_string = re.sub('(<br /><br /><!-- paragraph -->\s*[0-9\.]+)\s*<br /><br /><!-- paragraph -->',r"\1",text_string) 

   return text_string

# saves reformatted source files in UTF-8
def save_text(text,file_name):
     global db_files_target_directory
   
     text_string = "".join(text)
     text_string = clean_text(text_string)

     formatted = text_string

     f=codecs.open(db_files_target_directory + file_name,"w","utf-8")
     f.write(formatted)
     f.close
     return db_files_target_directory + file_name

# replaces HTML-encoded symbols with UTF-8 characters
def html_code_replacements():

   html_codes = [
      ["&rsquo;",u'\u2019'],
      ["&ldquo;",u'\u201C'],
      ["&rdquo;",u'\u201D'],
      ["&lsqb;",u'\u005B'],
      ["&fund.AnnCPB;",' '],
      ["&dagger;",' '],
      ["&mdash;",u'\u2014'],
      ["&rsqb;",u'\u005D'],
      ["&lt;",u'\u003C'],
      ["&Perseus.publish;",' '],
      ["&responsibility;",' '],
      ["&gt;",u'\u003E'],
      ["&lsquo;",u'\u2018']
   ]
   codes = []
   for i in html_codes:
      codes.append([i[0],"/" + i[0][1:],i[1]]) 

   return codes

# load data from source file, get bibliographic info, reformat and fix source errors
def preprocess_text_file (path,eng = 0):

   f = open(path,"r")
   file_str = "" 
   last = "" 

   for line in f:
     file_str = file_str + line
   f.close
 
   # note: "grep -c '<monogr>' *xml" shows that each group file and text file has only one monograph associated with it. 

   # get biblio data
   bib = xml.dom.minidom.parseString(file_str)
   try:
      biblio = bib.getElementsByTagName("biblStruct")[0] 
      monogr = biblio.getElementsByTagName("monogr")[0]
   # for Nic. Ethics:
   except:
      biblio = bib.getElementsByTagName("sourceDesc")[0] 
      monogr = biblio.getElementsByTagName("bibl")[0]

   title = monogr.getElementsByTagName("title")[0].firstChild.nodeValue
   pub = monogr.getElementsByTagName("publisher")[0].firstChild.nodeValue
   date = monogr.getElementsByTagName("date")[0].firstChild.nodeValue
   biblio = '<div class = "source_info"><b>Print source:</b> <i>' + title + '</i>, ' + pub + ', ' + date + '. <br /><br /><b>Electronic source:</b> <a href = "http://www.perseus.tufts.edu/hopper/collections">Perseus Digital Library</a></div>\n'  

   # for debugging:
   """
   f=codecs.open("./monogr.txt","a","utf-8")
   f.write("\n"+path+"\n")
   f.write('<i>' + title + '</i>, ' + pub + ', ' + date + '\n')  
   f.close
   """

   # replace e.g. <placeName key="perseus,Abdera">Abdera</placeName> (Protagoras 309c)
   r = re.compile(r"<placeName.*?>",re.DOTALL)
   file_str = re.sub(r,'',file_str)
   file_str = re.sub('<\/placeName>','',file_str)

   # for Ion 539b missing Greek text:
   s = '<quote type="verse"><l met="dactylic">o)/rnis ga/r sfin e)ph=lqe perhse/menai memaw=sin,'
   r = "πολλαχοῦ δὲ καὶ ἐν Ἰλιάδι, οἷον καὶ ἐπὶ τειχομαχίᾳ: λέγει γὰρ καὶ ἐνταῦθα &mdash;"
   file_str = file_str.replace(s,r+s)

   # add missing text to Lesser Hippias 365b
   if ('plat.tet789_gk.xml' in path):  
      #tetelesme/non e)/stai.</l></quote></p>
      file_str = file_str.replace('tetelesme/non e)/stai.</l></quote></p>','tetelesme/non e)/stai.</l></quote></p> <p>ἐν τούτοις δηλοῖ τοῖς ἔπεσιν τὸν πρότον ἑκατέρον τοῦ ἀνδρός, ὡς ὁ μὲν Ἀχιλλεὺς εἴη ἀληθής τε καὶ ἁπλοῦς, ὁ δὲ Ὀδυσσεὺς πολύπροπός τε καὶ ψευδής: ποιεῖ γὰρ τὸν Ἀχιλλέα εἰς τὸν Ὀδυσσέα λέγοντα ταῦτα τὰ ἔπη.</p>')

   # standardize milestones in Rhetoric
   if ('aristot.rh_' in path):
     file_str = re.sub('<pb[^>]+n="([0-9]+[a-z]+)"[^>]*>',r'<milestone unit="section" n="\1"/>',file_str)

   r = re.compile(r"<l>",re.DOTALL)
   file_str = re.sub(r,'',file_str)
   r = re.compile(r"<l\s+.*?>",re.DOTALL)
   file_str = re.sub(r,'',file_str)

   # e.g. <lg type="dact"> in Rhetoric
   r = re.compile(r"<lg\s+.*?>",re.DOTALL)
   file_str = re.sub(r,'',file_str)
   r = re.compile(r"<\/lg>",re.DOTALL)
   file_str = re.sub(r,'',file_str)

   r = re.compile(r"<\/l><\/quote>",re.DOTALL)
   file_str = re.sub(r,"</quote>",file_str)
   r = re.compile(r"<\/l>",re.DOTALL)
   file_str = re.sub(r,' ',file_str)

   # for e.g. Ion 539a
   r = re.compile(r"quote>\s*<bibl.*?>.*?<\/bibl>",re.DOTALL)
   file_str = re.sub(r,"quote>",file_str)

   r = re.compile(r"<quote.*?>",re.DOTALL)
   file_str = re.sub(r,' &ldquo;',file_str)
   r = re.compile(r"<\/quote>",re.DOTALL)
   file_str = re.sub(r,'&rdquo; ',file_str)
   
   r = re.compile(r"<cit.*?>",re.DOTALL)
   file_str = re.sub(r,'',file_str)
   r = re.compile(r"<\/cit>",re.DOTALL)
   file_str = re.sub(r,'',file_str)

   r = re.compile(r"<term.*?>",re.DOTALL)
   file_str = re.sub(r,'',file_str)
   r = re.compile(r"<\/term>",re.DOTALL)
   file_str = re.sub(r,'',file_str)

   # e.g. <hi rend="Italic"...qua</hi> in Metaphysics
   r = re.compile(r"<hi\s+.*?>",re.DOTALL)
   file_str = re.sub(r,'',file_str)
   r = re.compile(r"<\/hi>",re.DOTALL)
   file_str = re.sub(r,'',file_str)

   # consider: put <i> tags around titles?
   r = re.compile(r"<bibl>Iliad<\/bibl>",re.DOTALL)
   file_str = re.sub(r,'Iliad',file_str)
   r = re.compile(r"<bibl>Odyssey<\/bibl>",re.DOTALL)
   file_str = re.sub(r,'Odyssey',file_str)
   
   r = re.compile(r"<bibl>.*?<\/bibl>",re.DOTALL)
   file_str = re.sub(r,'',file_str)
   r = re.compile(r"<bibl\s+.*?>.*?<\/bibl>",re.DOTALL)
   file_str = re.sub(r,'',file_str)

   r = re.compile(r"<title>",re.DOTALL)
   file_str = re.sub(r,'',file_str)
   r = re.compile(r"<title\s+.*?>",re.DOTALL)
   file_str = re.sub(r,'',file_str)
   r = re.compile(r"<\/title>",re.DOTALL)
   file_str = re.sub(r,'',file_str)

   # Metaphysics 1022b etc.:
   file_str = file_str.replace('&lpar;','&#40;')
   file_str = file_str.replace('&rpar;','&#41;')
   # Laws 969d:
   file_str = file_str.replace('&lt;su\&gt;','σὺ')
   # Crito:
   file_str = file_str.replace('e)sti/n;g','e)sti/n;')
   # Phaedo:
   file_str = file_str.replace('h)/kousas;g','h)/kousas;')
   # mainly for Plato tet1 stray gammas (e.g. Crito 43a):
   file_str = file_str.replace(';g<',';<')
   # Gorgias 476b etc:
   file_str = file_str.replace('Õs',"'s")

   html_codes = html_code_replacements()

   for i in html_codes:
      file_str = file_str.replace(i[0],i[1])
   
   return [file_str,biblio]

# load a group file (e.g. plat.tet789_gk.xml), parse XML and reformat  
def process_group_file (path, eng=0):
   global i
   global html_index, db_files
   
   file_str,biblio = preprocess_text_file(path,eng)   

   replacements = define_utf_replacements()
   doc = xml.dom.minidom.parseString(file_str)
   group = doc.getElementsByTagName("group")[0]
   texts = group.getElementsByTagName("text")
 
   for text_node in texts:
       i += 1
       text = []
       text_db = []
       title = get_title(text_node.getAttribute("n"),eng)
       text.append ("<h3>" + title + "</h3>\n")

       if not eng:
          text_db.append ("<h3>" + title + "</h3>\n")
       body = text_node.getElementsByTagName("body")[0]
       for node2 in body.childNodes:
          text.append(get_text(title,text,node2,replacements,eng))
          text_db.append(get_text(title,text_db,node2,replacements,eng))
       text_db.append("end of file")

       if not eng:
          save_text(text,title + ".html")
          db_files.append([title,save_text(text_db, title + "-db.txt"),0,biblio])
       else:
          save_text(text,title + "-eng.html")
          db_files.append([title,save_text(text_db, title + "-eng-db.txt"),1,biblio])

# load a text source file, parse XML and reformat  
# this is for processing single-text files such Laws and Republic
def process_text_file (path,title,eng=0):
    global i
    global html_index, db_files

    file_str,biblio = preprocess_text_file(path,eng)   

    doc = xml.dom.minidom.parseString(file_str)
    replacements = define_utf_replacements()

    i += 1
    text = []
    text_db = []

    title = get_title(title,eng)
    text.append ("<h3>" + title + "</h3>")
    if not eng:
       text_db.append ("<h3>" + title + "</h3>\n")

    body = doc.getElementsByTagName("body")[0]
    divs = body.getElementsByTagName("div1")
    for node in divs:
        for node2 in node.childNodes:
            text.append(get_text(title,text,node2,replacements,eng))
            text_db.append(get_text(title,text_db,node2,replacements,eng))

    text_db.append("end of file")
    
    if not eng:
       save_text(text,title + ".html")
       db_files.append([title,save_text(text_db, title + "-db.txt"),0,biblio])
    else:
       save_text(text,title + "-eng.html")
       db_files.append([title,save_text(text_db, title + "-eng-db.txt"),1,biblio])

# generate and save HTML/CSS/JS for front page
def create_html_index(search_script_url):
    global html_index
    global target_directory

    header = """
     <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
        "http://www.w3.org/TR/html4/strict.dtd">
     <head>
        <meta http-equiv="content-type" content="text/html; charset=UTF-8">
        <title>Plato and Aristotle</title>

        <style type="text/css">
           .speaker {
              color:red;
           }
           a {
              text-decoration: none;
           }
           .title_row {
              clear: both;
              width: 450px;
              margin-left:auto;
              margin-right:auto;
           }
           .greek_title {
              float: left;
              width: 180px;
           }
 
           .english_title {
              float: left;
              width: 150px;
              color: #666666;
           }
           .author {
              display: inline;
              margin-bottom: 15px;
           }
           .english_title.author {
              width: 100px;
           }
           #search {
              margin-top: 20px;
              width: 280px;
              float: left;
           }
           #search_text {
              width: 195px;
           }
           .title {
              padding: 3px;
              padding-left: 5px;
              margin-top: 10px;
              margin-left: 15px;
           }
           .subtitle {
              margin-top: 0px;
              font-size: 80%;
              padding-top: 0px;
           }
           
        </style>

       <script src="http://code.jquery.com/jquery-1.9.1.min.js"></script>
       <script src="http://code.jquery.com/ui/1.10.3/jquery-ui.js"></script>
       <link rel="stylesheet" href="http://code.jquery.com/ui/1.10.3/themes/smoothness/jquery-ui.css" />

        <script>
           $(document).ready(function(){

           $("#search_button").click(function() {
              window.location.href = "[search_script_url]?a=" + $("#search_text").val();
           });

           $("#search_text").keyup(function(e) {
              if (e.which == 13)
                 $("#search_button").trigger('click')
           });

           });
       </script>

     </head>
     <body style="margin-left: 80px; margin-right: 150px; width: 700px;">
     <div style="border: 1px solid #DDDDDD; min-width: 650px; float: left;">

     <div class="title">Plato and Aristotle</div>
     <div class="title subtitle">Selected texts from the Perseus Digital Library in Greek and English</div>
    """

    header = header.replace('[search_script_url]',search_script_url)

    search_box = """
<div id="search">
<input id="search_text" type="text" />
<input id="search_button" type="button" value = "Search" />

<div style="font-size: 90%">

<br />

Greek and English texts and English titles and section numbers are
recognized by the search function. You can search for parts of words,
words, phrases, and sentences. You can also search for a particular section or
range of sections. To restrict a search to a particular text, include
&lsquo;in:[title]&rsquo; after the search term.
<br /><br />
Search examples:
<br />
<br />

λόγος<br />
ἐμοὶ δοκεῖ<br />
story<br />
it seems to me<br />
ψυχ in:meno<br />
gorgias 447c<br />
meno 98a - 100b<br />

<br />

Please note that while effort has been made to remove textual errors, some
errors remain. Print sources such as the Loeb Classical Library and the Oxford
Classical Texts are more accurate and can be used to verify any of the passages
here.

</div>

</div>
"""

    footer = """
      </div>

   <div style="clear: both; font-size: 95%; margin-top: 32px;">
   <br />
   Greek texts and English translations are from the <a href = "http://www.perseus.tufts.edu/hopper/collections">Perseus Digital Library</a>. Perseus data and this site are licensed under a <a href="http://creativecommons.org/licenses/by-sa/3.0/us/">Creative Commons Attribution-ShareAlike 3.0 United States License</a>. The code and data for installing this site can be downloaded here: <a href="./db2013.zip">db2013.zip</a>. Installation instructions are available <a href="./instructions.txt">here</a>.
   <br />
   <br />
   This site was developed by Anthony Pasqualoni with funding provided by the <a href="http://www.ppls.ed.ac.uk/">School of Philosophy, Psychology and Language Sciences</a> of the University of Edinburgh.
   <br />
   <br />

   <div>
   Feedback is welcomed. Please send comments to A.M.Pasqualoni at <span style="display: none">yahoo.com</span> sms.ed.ac.uk.
   </div>
   <br />

   </div>

      </body>
      </html>
    """

    html_index.sort(key=lambda tup: tup[1])

    gk_html = ""
    eng_html= ""
    aristotle_gk_html = ""
    aristotle_eng_html= ""
    aristotle = ["Metaphysics","Rhetoric","Eudemian Ethics","Politics","Nicomachean Ethics"]

    for file in html_index:
       if file[1] in aristotle:
          aristotle_gk_html = aristotle_gk_html + '\n<div class="title_row">\n<div class="greek_title"><a href="' + file[1] + '.html">' + file[0] + '</a></div><div class="english_title">' + file[1] + '</div>\n</div>'
       else: 
          gk_html = gk_html + '\n<div class="title_row">\n<div class="greek_title"><a href="' + file[1] + '.html">' + file[0] + '</a></div><div class="english_title">' + file[1] + '</div>\n</div>'

    gk_html = "<div style=\"float: left; width: 320px; border: 1px solid grey; margin: 20px; padding: 0px 10px 10px 10px; background-color: #DDDDDD\">\n\n<h3 class=\"greek_title author\">Πλάτων</h3><h3 class=\"english_title author\">Plato</h3>" + gk_html + "</div>\n\n"
    aristotle_gk_html = "<div style=\"float: left; width: 320px; border: 1px solid grey; margin: 20px; padding: 0px 10px 10px 10px; background-color: #DDDDDD\">\n\n<h3 class=\"greek_title\">Αριστοτέλης</h3><h3 class=\"english_title author\">Aristotle</h3>" + aristotle_gk_html + "</div>\n\n"

    f=codecs.open(target_directory + "index.html","w","utf-8")

    f.write(header + gk_html.decode('utf-8') + search_box.decode('utf-8') + aristotle_gk_html.decode('utf-8') + footer.decode('utf-8'))

    f.close
   
# connect to database and return db connection
# uses user-specified parameters / environment variables set in Main
def db_connect(db_settings):

   db = MySQLdb.Connection(host = db_settings['host'],user = db_settings['user'], passwd = db_settings['passwd'],db = db_settings['db'])
   return db
   
# create HTML files using data stored in database 
def make_html_from_db(source_title,search_script_url,base_url,db_settings):
   global html_index
   global db_files
   global target_directory
   
   db = db_connect(db_settings)
   cur = db.cursor()
   
   f=codecs.open("./template.html","r","utf-8")
   header = f.read()
   f.close()

   header = header.replace('[search_script_url]',search_script_url)
   header = header.replace('[base_url]',base_url)

   f=codecs.open(target_directory + source_title + ".html","w","utf-8")
   f.write(header)

   cur.execute("select greek.source,greek.section,greek.text,english.source,english.section,english.text from greek inner join english on greek.source = '" + source_title + "' and english.source = '" + source_title + "' and greek.section = english.section;")

   html = ''
   delimiter = "</div><!-- text section -->\n"

   source_title_greek = ""
   for i,j in html_index:
      if j == source_title:
         source_title_greek = i.decode('utf-8')

   for title,path,eng,source in db_files:
      if title == source_title:
          if eng == 1:
             biblio_eng = source
          else:
             biblio_gk = source 
   
   html += "\n" + '<div class="container">' + "\n"

   html += '<div class="greek top">' + "\n"
   html += '<div class="title">' + source_title_greek + '</div>' + biblio_gk
   html += "</div>\n"  
   html += '<div class="english top">' + "\n"
   html += '<div class="title">' + source_title + '</div>' + biblio_eng
   html += "</div>\n"  
   html += '</div> <!-- container -->' + "\n"
   html += '<div style="clear: both"></div>' + "\n"

   for i in cur:
      html += "\n" + '<div class="container">' + "\n"
      html += '<div class="greek">' + "\n"
      greek = i[2]
      html += greek.decode('utf-8')
      html += delimiter 
      html += '<div class="english">' + "\n"
      english = i[5]
      try:
         html += english
      except:
         html += english.decode('utf-8') 
         #print '*error inserting string* : ' + english
      html += delimiter 

      html += '</div> <!-- container -->' + "\n"
      html += '<div style="clear: both"></div>' + "\n"

   html += """
   <div class="container style="width: 100%; height: 20px;">
   </div>

   """

   f.write(html)

   footer = """

      </body>
      </html>
   """

   f.write(footer)
   f.close()

   # close the db connection
   db.close()

# create database tables for storing texts in Greek and English
# tables already present are erased and replaced with new tables
def db_create_tables(db_settings):

   db = db_connect(db_settings)
   cur = db.cursor()

   try:
      cur.execute("DROP TABLE greek")
   except:
      print "could not drop table"
   cur.execute("CREATE TABLE greek (id MEDIUMINT NOT NULL AUTO_INCREMENT, source VARCHAR(50), section VARCHAR(20), text VARCHAR(7000), PRIMARY KEY (id))")
   try:
      cur.execute("DROP TABLE english")
   except:
      print "could not drop table."
   cur.execute("CREATE TABLE english (id MEDIUMINT NOT NULL AUTO_INCREMENT, source VARCHAR(50), section VARCHAR(20), text VARCHAR(7000), PRIMARY KEY (id))")

   # close the db connection
   db.close()

# insert texts into database
def db_insert(title, source_file, eng, db_settings):

   db = db_connect(db_settings)
   cur = db.cursor()
   
   source = open(source_file)

   text = ""
   section = ""

   for row in source:

      m = re.match('<div class="milestone">(.*)</div>',row)
      if m or row == "end of file":
         text = text.replace("'","\\'") 
         if text != "" and section != "":
             if not eng:
                 cmd = "INSERT INTO greek (source,section,text) VALUES('%s','%s','%s');" % (title,section,text)
             else:
                 cmd = "INSERT INTO english (source,section,text) VALUES('%s','%s','%s');" % (title,section,text)
             cur.execute(cmd)
         if (m):
            section = m.group(1)
         text = ""
      text += row
 
   db.close()

replacements = define_utf_replacements()

# Main
# load, format, and save data from source files to MySQL database and HTML files
# ========================================================================================

# environment variables:
target_directory = "[...]"
db_files_target_directory = "./db_files/"
search_script_url = "[e.g. http://www.../cgi-bin/search2013.py]"
base_url = "[e.g. http://www...]"
db_settings = {
   'host': "localhost",
   'user': "[username]",
   'passwd': "[password]",
   'db': "[db name]"
}

db_files = []

i = 0
html_index = []

process_group_file('./source_data/plat.tet789_gk.xml')
process_group_file('./source_data/plat.tet789_eng.xml',eng=1)

process_text_file('./source_data/aristot.met_gk.xml',"Metaphysics")
process_text_file('./source_data/aristot.met_eng.xml',"Metaphysics",eng=1)

process_text_file('./source_data/aristot.rh_gk.xml',"Rhetoric")
process_text_file('./source_data/aristot.rh_eng.xml',"Rhetoric",eng=1)
process_text_file('./source_data/aristot.pol_gk.xml',"Politics")
process_text_file('./source_data/aristot.pol_eng.xml',"Politics",eng=1)
process_text_file('./source_data/aristot.eud.eth_gk.xml',"Eudemian Ethics")
process_text_file('./source_data/aristot.eud.eth_eng.xml',"Eudemian Ethics",eng=1)

# Euthydemus, Protagoras, Gorgias, Meno
process_group_file('./source_data/plat.tet6_gk.xml')
process_group_file('./source_data/plat.tet6_eng.xml',eng=1)

process_text_file('./source_data/plat.rep_gk.xml',"Republic")
process_text_file('./source_data/plat.rep_eng.xml',"Republic",eng=1)

process_text_file('./source_data/plat.laws_gk.xml',"Laws")
process_text_file('./source_data/plat.laws_eng.xml',"Laws",eng=1)

process_group_file('./source_data/plat.tet1_gk.xml')
process_group_file('./source_data/plat.tet1_eng.xml',eng=1)

process_group_file('./source_data/plat.tet2_gk.xml')
process_group_file('./source_data/plat.tet2_eng.xml',eng=1)

# Phaedrus etc.
process_group_file('./source_data/plat.tet3_gk.xml')
process_group_file('./source_data/plat.tet3_eng-corrected.xml',eng=1)

process_group_file('./source_data/plat.tet45_gk.xml')
process_group_file('./source_data/plat.tet45_eng.xml',eng=1)

print "Creating database tables."
db_create_tables(db_settings)
for f in db_files:
   # f[0]: title
   # f[1]: path to db file
   # f[2]: 1 if english, 0 if not
   # f[3]: source/biblio info 
   print "Inserting " + f[0] + ": " + f[1] 
   db_insert (f[0],f[1],f[2],db_settings)
   make_html_from_db(f[0],search_script_url,base_url,db_settings)
create_html_index(search_script_url)
os.system("cp instructions.txt " + target_directory)
print "Finished."
sys.exit()


