#!/usr/bin/python2


# author: Johannes Henkel jhenkel@jhenkel.de
# License: GNU GPL (see http://www.gnu.org/copyleft/gpl.html)

import xml.dom.minidom
import sys
import string

bibtexml_user = ["bibtex:abstract", "bibtex:affiliation",
          "bibtex:contents", "bibtex:copyright",
          "bibtex:isbn","bibtex:issn", 
          "bibtex:keyword", "bibtex:language", "bibtex:lccn", 
          "bibtex:location", "bibtex:mrnumber", "bibtex:price", 
          "bibtex:size", "bibtex:url", "bibtex:category"]
								
bibtexml_common = ["bibtex:key", "bibtex:annotate", "bibtex:crossref"]
bibtexml_common = bibtexml_common + bibtexml_user
bibtexml_dtd = {
"bibtex:article": ["bibtex:author", "bibtex:title", "bibtex:journal",
               "bibtex:year", "bibtex:volume", "bibtex:number", "bibtex:pages",
               "bibtex:month", "bibtex:note"] + bibtexml_common ,
"bibtex:book" : ["bibtex:author","bibtex:editor","bibtex:title",
               "bibtex:publisher", "bibtex:year", "bibtex:volume", "bibtex:number",
               "bibtex:series", "bibtex:address", "bibtex:edition", "bibtex:month",
               "bibtex:note"] + bibtexml_common,
"bibtex:booklet" : ["bibtex:author", "bibtex:title",
               "bibtex:howpublished", "bibtex:address", "bibtex:month", 
               "bibtex:year", "bibtex:note"] + bibtexml_common,
"bibtex:manual" : ["bibtex:author", "bibtex:title",
               "bibtex:organization", "bibtex:address", "bibtex:edition",
               "bibtex:month", "bibtex:year", "bibtex:note"] + bibtexml_common,
"bibtex:techreport" : ["bibtex:author", "bibtex:title",
               "bibtex:institution", "bibtex:year", "bibtex:type", "bibtex:number",
               "bibtex:address", "bibtex:month", "bibtex:note"] + bibtexml_common ,
"bibtex:mastersthesis" : ["bibtex:author", "bibtex:title", "bibtex:school",
                    "bibtex:year", "bibtex:type", "bibtex:address", "bibtex:month",
                    "bibtex:note"] + bibtexml_common,
"bibtex:phdthesis" : ["bibtex:author", "bibtex:title", "bibtex:school",
                    "bibtex:year", "bibtex:type", "bibtex:address", "bibtex:month",
                    "bibtex:note"] + bibtexml_common,
"bibtex:inbook": ["bibtex:author","bibtex:editor","bibtex:title",
               "bibtex:chapter", "bibtex:pages",
               "bibtex:publisher", "bibtex:year", "bibtex:volume",
               "bibtex:number", "bibtex:series", "bibtex:type",
               "bibtex:address", "bibtex:edition", "bibtex:month", 
               "bibtex:note"] + bibtexml_common,
"bibtex:incollection" : ["bibtex:author","bibtex:title",
               "bibtex:booktitle", "bibtex:publisher", "bibtex:year",
               "bibtex:editor", "bibtex:volume", "bibtex:number",
               "bibtex:series", "bibtex:type", "bibtex:chapter", 
               "bibtex:pages", "bibtex:address", "bibtex:edition", 
               "bibtex:month", "bibtex:note"] + bibtexml_common,
"bibtex:proceedings" : ["bibtex:editor","bibtex:title", "bibtex:year",
               "bibtex:volume", "bibtex:number", "bibtex:series", 
               "bibtex:address", "bibtex:month", "bibtex:organization", 
               "bibtex:publisher", "bibtex:note"] + bibtexml_common,
"bibtex:inproceedings" : ["bibtex:author", "bibtex:title", "bibtex:booktitle",
                    "bibtex:year", "bibtex:editor", 
                    "bibtex:volume", "bibtex:number",
                    "bibtex:series", "bibtex:pages", "bibtex:address", 
                    "bibtex:month", "bibtex:organization", "bibtex:publisher",
                    "bibtex:note"] + bibtexml_common,
"bibtex:conference" : ["bibtex:author", "bibtex:title", "bibtex:booktitle",
                    "bibtex:year", "bibtex:editor", 
                    "bibtex:volume", "bibtex:number",
                    "bibtex:series", "bibtex:pages", "bibtex:address", 
                    "bibtex:month", "bibtex:organization", "bibtex:publisher",
                    "bibtex:note"] + bibtexml_common,
"bibtex:unpublished" : ["bibtex:author", "bibtex:title", "bibtex:note",
               "bibtex:month", "bibtex:year"] + bibtexml_common,
"bibtex:misc": ["bibtex:author", "bibtex:title",
               "bibtex:howpublished", "bibtex:month", "bibtex:year", "bibtex:note",
               ] + bibtexml_common
}

def parsePersons(str):
	"returns a list of name objects."
	lexed = namelex(str)
	ast1 = []
	while lexed.count("and")>0:
		andIndex = lexed.index("and")
		ast1.append(lexed[0:andIndex])
		del lexed[0:andIndex+1]
	ast1.append(lexed)
	ast = []
	for name in ast1:
		namelist = []
		while name.count(",")>0:
			commaIndex = name.index(",")
			namelist.append(name[0:commaIndex])
			del name[0:commaIndex+1]
		namelist.append(name)
		ast.append(namelist)
	result = []
	for name in ast:
		assert len(name)>0 and len(name)<=3
		first=von=last=junior=""
		
		partlist = name[0]
		lastitem = partlist.pop()
		if len(name)<3 and (lastitem=="Jr." or lastitem=="Jr"):
			junior=lastitem
			lastitem = partlist.pop()
		last=lastitem
		vonBegin=len(partlist)
		while vonBegin>0 and partlist[vonBegin-1][0] in string.lowercase:
			vonBegin = vonBegin - 1
		von = string.join(partlist[vonBegin:len(partlist)])
		del partlist[vonBegin: len(partlist)]
		if len(name)==1: first = string.join(partlist)
		if len(name)==3: von = name[1]
		if len(name)==2:
			partlist = name[1]
		if len(name)==3:
			partlist = name[2]
		if len(name)==2 or len(name)==3:
			vonBegin = len(partlist)
			while vonBegin>0 and partlist[vonBegin-1][0] in string.lowercase:
				vonBegin = vonBegin-1
			von = string.join(partlist[vonBegin:len(partlist)])+von
			del partlist[vonBegin: len(partlist)]
			first = string.join(partlist)
		
		result.append(Person(first,von,last,junior))
		
	return result

def namelex(str):
	"A lexer for the parsePersons method."
	result = []
	pos=0;
	while pos<len(str):
		if str[pos] in string.whitespace:
			pos = pos + 1
		elif str[pos]==',':
			result.append(',')
			pos = pos + 1
		else:
			part = ""
			brackets = 0
			while 1:
				assert(brackets>=0)
				if pos >= len(str):
					break
				elif str[pos]=='{':
					part = part + "{"
					brackets = brackets + 1
					pos = pos + 1					
				elif str[pos]=='}':
					part = part + "}"
					brackets = brackets - 1
					pos = pos + 1
				elif pos+1<len(str) and str[pos:pos+1]=="\{":
					part = part + "\{"
					pos = pos+2
				elif pos+1<len(str) and str[pos:pos+1]=="\}":
					part = part + "\}"
					pos = pos+2
				elif str[pos]==',':
					if brackets>0:
						part = part + ","
						pos = pos + 1
					else:
						break
				elif str[pos] in string.whitespace:
					if brackets>0:
						part = part + " "
						pos = pos + 1
					else:
						break
				else:
					part = part + str[pos]
					pos = pos + 1
			if string.lower(part)=="and":
				result.append("and")
			elif string.lower(part)=="others":
				result.append("Others") # so this will be detecetd as a last name ... ;-)
			else: result.append(part)
	return result

def getText(nodelist):
    rc = ""
    for node in nodelist:
        if node.nodeType == node.TEXT_NODE:
            rc = rc + node.data
    return rc


class Person:
	"Models a bibtex name"
			
	def __init__(self, first, von, last, junior):
		self.first=first;
		self.von=von;
		self.last=last;
		self.junior=junior;
		
	def getFirst(self):
		return self.first;
		
	def getVon(self):
		return self.von;
		
	def getLast(self):
		return self.last;
		
	def getJunior(self):
		return self.junior;
	
	def __repr__(self):
		return "(first: "+self.first+", von: "+self.von+", last: "+self.last+", junior: "+self.junior+")"
		


def handleFile(bibtexFile):
	for entry in bibtexFile.getElementsByTagName("bibtex:entry"):
		handleEntry(entry)

def handleEntry(bibtexEntry):
	for child in bibtexEntry.childNodes:
		if child.nodeType==child.ELEMENT_NODE:
			if not(child.nodeName in bibtexml_dtd) and OPT_drop:
				if not OPT_silent:
					message = "WARNING: Dropping entry \""
					message = message + bibtexEntry.getAttribute("id")
					message = message + "\" - " 
					message = message + child.nodeName
					message = message + " not supported by bibtexml.\n"
					sys.stderr.write(message)
			else:
				handleNamedEntry(child,bibtexEntry.getAttribute("id"))
			
def handleNamedEntry(entry,citationid):
	factory = entry.ownerDocument
	if entry.nodeName in bibtexml_dtd and OPT_reorder:
		mapkids = {}
		for child in entry.childNodes[:]:
			if child.nodeType==child.ELEMENT_NODE:
				mapkids[child.nodeName]=child
			entry.removeChild(child)
		fieldlist = bibtexml_dtd[entry.nodeName]
		for field in fieldlist:
			if field in mapkids:
				entry.appendChild(factory.createTextNode("\n      "))
				entry.appendChild(mapkids[field])
				del mapkids[field]
		if OPT_drop and not OPT_silent:
			for kid in mapkids.values():
				message = "WARNING: Dropping field \""
				message = message + kid.nodeName
				message = message + "\" in entry \""
				message = message + citationid
				message = message + "\" -  not supported by bibtexml.\n"
				sys.stderr.write(message)
		elif not OPT_drop and len(mapkids.values())>0:
			entry.appendChild(factory.createTextNode("\n      "))			
			entry.appendChild(
				factory.createComment(
					"begin fields not supported by bibtexml"))
			for kid in mapkids.values():
				entry.appendChild(factory.createTextNode("\n        "))			
				entry.appendChild(kid)
				if not OPT_silent:
					message = "WARNING: Field \""
					message = message + kid.nodeName
					message = message + "\" in entry \""
					message = message + citationid
					message = message + "\" not supported by bibtexml.\n"
					sys.stderr.write(message)
			entry.appendChild(factory.createTextNode("\n      "))					
			entry.appendChild(
				factory.createComment(
					"end fields not supported by bibtexml"))
		entry.appendChild(factory.createTextNode("\n    "))
	if OPT_unflatten:
		mapkids = {}
		for child in entry.childNodes:
			if child.nodeName=="bibtex:author":
				xmlauthors = factory.createElement("bibtex:authors")				
				entry.replaceChild(xmlauthors,child)
				unflatten(xmlauthors,getText(child.childNodes))
			if child.nodeName=="bibtex:editor":
				xmleditors = factory.createElement("bibtex:editors")
				entry.replaceChild(xmleditors,child)
				unflatten(xmleditors,getText(child.childNodes))

def unflatten(targetNode, strval):
	factory = targetNode.ownerDocument
	persons = parsePersons(strval)
	for person in persons:
		targetNode.appendChild(factory.createTextNode("\n        "))
		xmlperson = factory.createElement("bibtex:person")
		targetNode.appendChild(xmlperson)

		if person.getFirst():
			xmlperson.appendChild(factory.createTextNode("\n          "))		
			realfirst = string.split(person.getFirst())[0]
			xmlfirst = factory.createElement("bibtex:first")
			xmlperson.appendChild(xmlfirst)
			xmlfirst.appendChild(factory.createTextNode(realfirst))

		if person.getFirst() and ' ' in person.getFirst():
			xmlperson.appendChild(factory.createTextNode("\n          "))		
			tailfirst = string.split(person.getFirst())
			middle = string.join(tailfirst[1:len(tailfirst)])
			xmlmiddle = factory.createElement("bibtex:middle")
			xmlperson.appendChild(xmlmiddle)
			xmlmiddle.appendChild(factory.createTextNode(middle))

		if person.getVon():
			xmlperson.appendChild(factory.createTextNode("\n          "))		
			xmlprelast = factory.createElement("bibtex:prelast")
			xmlperson.appendChild(xmlprelast)
			xmlprelast.appendChild(factory.createTextNode(person.getVon()))

		if person.getLast():
			xmlperson.appendChild(factory.createTextNode("\n          "))		
			xmllast = factory.createElement("bibtex:last")
			xmlperson.appendChild(xmllast)
			xmllast.appendChild(factory.createTextNode(person.getLast()))

		if person.getJunior():
			xmlperson.appendChild(factory.createTextNode("\n          "))		
			xmllineage = factory.createElement("bibtex:lineage")
			xmlperson.appendChild(xmllineage)
			xmllast.appendChild(factory.createTextNode(person.getJunior()))
		xmlperson.appendChild(factory.createTextNode("\n        "))
	targetNode.appendChild(factory.createTextNode("\n      "))		

if "--help" in sys.argv or len(sys.argv) <2:
	print """
	
usage: unflatten.py [options] file
	
where the options are:
		
--unflatten
===========
This will convert <bibtex:author>Donald E. Knuth</bibtex:author>
into <bibtex:authors>
   	<bibtex:person>
	 	 <bibtex:first>Donald</bibtex:first>
		 <bibtex:middle>E.</bibtex:middle>
		 <bibtex:last>Knuth</bibtex:last>
		</bibtex:person>
	  <bibtex:authors>
Similar conversions will be applied to editor.
title and keywords will not be converted.
				  
--reorder
=========
This reorders the fields of an entry so that the format conforms
to the bibtexml DTD. If a field appears in the document that is not
supported by bibtexml, a warning message is printed.
		
--drop
======
Same as --reorder, but additionally drops all fields and entries
that are not supported by the bibtexml DTD. Displays a warning message for
each field it drops.
			
--silent
========
No warning messages for dropped or unsupported fields.

"""
else:			
	
	infilename = sys.argv.pop()
	if infilename=='-':
		infile=sys.stdin
	else:
		infile=open(infilename)
	
	OPT_unflatten=0
	OPT_reorder=0
	OPT_drop=0
	OPT_silent=0
	
	if "--unflatten" in sys.argv:
		OPT_unflatten=1
		sys.argv.remove("--unflatten")
	if "--reorder" in sys.argv:
		OPT_reorder=1
		sys.argv.remove("--reorder")
	elif "--drop" in sys.argv:
		OPT_drop=1
		OPT_reorder=1
		sys.argv.remove("--drop")
	if "--silent" in sys.argv:
		OPT_silent=1
		sys.argv.remove("--silent")
		
	if len(sys.argv)>1:
		print "Unknown or redundant arguments:"
		for arg in sys.argv[1:len(sys.argv)]:
			print "\t", arg
		print "type unflatten.py --help for more information."
	elif not OPT_unflatten and not OPT_reorder:
		print "Nothing can be done here. Read --help for more information."
	else:
		bibxmldoc = xml.dom.minidom.parse(infile)
		handleFile(bibxmldoc)
		bibxmldoc.writexml(sys.stdout)
		print ""
