#!/usr/local/bin/python2.5
# t2tconv - txt2tags version 1.7 -> 2.0 convertor
#
# This file converts txt2tags source files (.t2t) from the old
# version 1.7 format to the new 2.0 series format.
#
# Just call it passing the file(s) you want to convert and it
# will update them, saving the original contents as
# <filename>.old
#
# Examples:
#   Convert a single file : t2tconv myfile.t2t
#   Convert multiple files: t2tconv myfile.t2t other.t2t another.t2t
#
# For more info about txt2tags, please go to http://txt2tags.sf.net
#

import re, string, sys

NEW_VERB_AREA   = '```'
NEW_VERB_1LINE  = '``` '
NEW_INLINE_VERB = '``'
NEW_INLINE_RAW  = '""'
NEW_DEFLIST_ID  = ': '
CHANGED_OPTS    = {
  '--noheaders': '--no-headers',
  '--enumtitle': '--enum-title',
  '--maskemail': '--mask-email',
  '--toclevel' : '--toc-level',
  '--toconly'  : '--toc-only'
}
DEBUG = 0      # use -d
VERBOSE = 0    # use -v
DRYRUN = 0     # use -n
FIXES = 0
regex = {}
STDIN = STDOUT = '-'
ESCCHAR = '\x00'
listindent = []

#-----------------------------------------------------------------------

def Error(msg): print "ERROR: %s"%msg ; sys.exit()
def Verbose(msg,linenr):
	global FIXES
	FIXES = FIXES + 1
	if VERBOSE: print "+ [line %04d] %s" % (linenr,msg)
def Debug(msg,i=0,linenr=None):
	if i > DEBUG: return
	if linenr is not None:
		print "(%d) %04d:%s"%(i,linenr,msg)
	else:
		print "(%d) %s"%(i,msg)
def CopyFile(orig, dest):
	try: f = open(orig)    ; txt = f.read(); f.close()
	except: Error("Cannot read file for copying: %s"%orig)
	try: f = open(dest,'w'); f.write(txt)  ; f.close()
	except: Error("Cannot write backup file: %s"%dest)
def Readfile(file):
	global LB
	if file == '-':
		try: data = sys.stdin.readlines()
		except: Error('You must feed me with data on STDIN!')
	else:
		try: f = open(file); data = f.readlines() ; f.close()
		except: Error("Cannot read file: %s"%file)
	# detect original file line break and save it
	LB = '\n'
	try:
		if   data[0][-1 ] == '\r'  : LB = '\r'
		elif data[0][-2:] == '\r\n': LB = '\r\n'
	except: pass
	Debug("line break char: %s" % repr(LB),1)
	return data
def Savefile(file, contents):
	try: f = open(file, 'wb')
	except: Error("Cannot open file for writing:\n    %s"%file)
	if type(contents) == type([]): doit = f.writelines
	else: doit = f.write
	doit(contents) ; f.close()

def ParseConfig(text='',name='', target=''):
	ret = {}
	if not text: return ret
	re_name = name or '[a-z]+'
	re_target = target or '[a-z]*'
	cfgregex = re.compile("""
	  ^%%!\s*               # leading id with opt spaces
	  (?P<name>%s)\s*       # config name
	  (\((?P<target>%s)\))? # optional target spec inside ()
	  \s*:\s*               # key:value delimiter with opt spaces
	  (?P<value>\S.+?)      # config value
	  \s*$                  # rstrip() spaces and hit EOL
	  """%(re_name,re_target), re.I+re.VERBOSE)
	match = cfgregex.match(text)
	if match:
		ret = {'name'  :string.lower(match.group('name') or ''),
		       'target':string.lower(match.group('target') or 'all'),
		       'value' :match.group('value') }
	return ret

class Proprierties:
	def __init__(self, filename=''):
		self.buffer = ['']   # text start at pos 1
		self.areas = ['head','conf','body']
		self.arearef = []
		self.headers = ['','','']
		self.filename = filename
		self.conflines = []
		self.bodylines = []
		if filename:
			self.read_file(filename)
			self.find_areas()
			self.fix_cmdline()
	
	def read_file(self, file):
		lines = Readfile(file)
		if not lines: Error('Empty file! %s'%file)
		self.buffer.extend(lines)
	
	def find_areas(self):
		"Run through buffer and identify head/conf/body areas"
		buf = self.buffer ; ref = [1,4,0]       # defaults
		if not string.strip(buf[1]):            # no header
			ref[0] = 0 ; ref[1] = 2
		for i in range(ref[1],len(buf)):        # find body init
			if string.strip(buf[i]) and buf[i][0] != '%':
				ref[2] = i ; break      # !blank, !comment
			if ParseConfig(buf[i], 'include'):
				ref[2] = i ; break      # %!include command
		if ref[1] == ref[2]: ref[1] = 0         # no conf area
		for i in 0,1,2:                         # del !existent
			if not ref[i]: self.areas[i] = ''
		self.arearef = ref                      # save results
		Debug('Head,Conf,Body start line: %s'%ref, 1)
		# store CONF and BODY lines found
		cfgend = ref[2] or len(buf)
		self.conflines = buf[ref[1]:cfgend]
		if ref[2]: self.bodylines = buf[ref[2]:]
	
	def dump_headers(self):
		"Extract and dump headers contents"
		if not self.arearef: self.find_areas()
		if not self.areas.count('head'):
			return ['\n']
		else:
			return self.buffer[1:4]
	
	def fix_cmdline(self):
		if not self.arearef: self.find_areas()
		if not self.areas.count('conf'): return
		linenr = self.arearef[1]-1  # for debug messages
		i = 0
		while i < len(self.conflines):
			line = self.conflines[i]
			i = i + 1
			linenr = linenr + 1
			if len(line) < 3: continue
			if line[:2] != '%!': continue
			cfg = ParseConfig(line, 'cmdline')
			if not cfg: continue   # not cmdline
			# get data
			targ, val = cfg['target'], cfg['value']
			target_regex = '(-t|--type)\s+(txt|sgml|html|pm6|mgp|moin|man|tex)\s*'
			m = re.search(target_regex, val)
			if m:
				target = m.group(2)
				val = re.sub(target_regex, '', val)  # remove target setting
				val = string.strip(val)              # remove extra spaces
				if targ == 'all':
					if not val:      # %!cmdline: -t target
						self.conflines[i-1] = "%%!target: %s" % target
					else:            # %!cmdline: -t target -a -b -c...
						self.conflines.insert(i-1, "%%!target: %s" % target)
						linenr = linenr + 1 ; i = i + 1
						self.arearef[-1] = self.arearef[-1]+1
					Verbose("Added %!target from old %!cmdline", linenr)
					if not val: continue
			# check for old --<option>
			for old in CHANGED_OPTS.keys():
				if string.count(val, old):
					new = CHANGED_OPTS[old]
					val = string.replace(val, old, new)
					Verbose("Old %-11s option changed to %s" % (old,new),
					        linenr)
			# change %!cmdline to %!options
			if targ == 'all':
				self.conflines[i-1] = "%%!options: %s" % val
			else:
				self.conflines[i-1] = "%%!options(%s): %s" % (targ, val)
			Verbose("Old %!cmdline setting changed to %!options", linenr)

def getRegexes():
	regex = {
	'title'   :re.compile(r'^\s*(?P<id>={1,5})(?P<txt>[^=].*[^=])\1\s*$'),
	'numtitle':re.compile(r'^\s*(?P<id>\+{1,5})(?P<txt>[^+].*[^+])\1\s*$'),
	'areaPreOpen'   : re.compile(r'^---$'),
	'areaPreClose'  : re.compile(r'^---$'),
	'1linePre'      : re.compile(r'^--- (?=.)'),
	'fontBold'      : re.compile(r'\*\*([^\s*].*?)\*\*'),
	'fontItalic'    : re.compile(r'(^|[^:])//([^ /].*?)//'),
	'fontUnderline' : re.compile(r'__([^_].*?)__'),
	'fontMono'      : re.compile(r'`([^`]+)`'),
	'fontBolditalic': re.compile(r'\*/([^/].*?)/\*'),
	'list'          : re.compile(r'^( *)([+-]) ([^ ])'),
	'deflist'       : re.compile(r'^( *)(=) ([^:]+):'),
	'bar'           : re.compile(r'^\s*([_=-]{20,})\s*$'),
	'blankline'     : re.compile(r'^\s*$'),
	'comment'       : re.compile(r'^%'),
	'raw'           : re.compile(r'``(.+?)``')
	}
	regex['date'] = re.compile(r'%%date\b(\((?P<fmt>.*?)\))?', re.I)
	patt_img = r'\[([\w_,.+%$#@!?+~/-]+\.(png|jpe?g|gif|eps|bmp))\]'
	urlskel = {
	  'proto' : r'(https?|ftp|news|telnet|gopher|wais)://',
	  'guess' : r'(www[23]?|ftp)\.',
	  'login' : r'A-Za-z0-9_.-',
	  'pass'  : r'[^ @]*',
	  'chars' : r'A-Za-z0-9%._/~:,=$@-',
	  'anchor': r'A-Za-z0-9%._-',
	  'form'  : r'A-Za-z0-9/%&=+.,@*_-',
	  'punct' : r'.,;:!?'
	}
	patt_url_login = r'([%s]+(:%s)?@)?'%(urlskel['login'],urlskel['pass'])
	retxt_url = r'\b(%s%s|%s)[%s]+\b/*(\?[%s]+)?(#[%s]+)?'%(
	             urlskel['proto'],patt_url_login, urlskel['guess'],
	             urlskel['chars'],urlskel['form'],urlskel['anchor'])
	retxt_url_local = r'[%s]+|[%s]*(#[%s]+)'%(
	             urlskel['chars'],urlskel['chars'],urlskel['anchor'])
	patt_email = r'\b[%s]+@([A-Za-z0-9_-]+\.)+[A-Za-z]{2,4}\b(\?[%s]+)?'%(
	             urlskel['login'],urlskel['form'])
	regex['_urlskel'] = urlskel
	regex['email'] = re.compile(patt_email,re.I)
	regex['link'] = \
		re.compile(r'%s|%s'%(retxt_url,patt_email), re.I)
	regex['linkmark'] = \
		re.compile(r'\[(?P<label>%s|[^]]+) (?P<link>%s|%s|%s)\]'%(
		   patt_img, retxt_url, patt_email, retxt_url_local),
		   re.L+re.I)
	regex['img'] = re.compile('^ *%s\s*$'%patt_img, re.L+re.I)
	regex['macro'] = regex['date']
	regex['special'] = re.compile(r'^%!')
	return regex

def EscapeCharHandler(action, data):
	"Mask/Unmask the Escape Char on the given string"
	if not string.strip(data): return data
	if action not in ['mask','unmask']:
		Error("EscapeCharHandler: Invalid action '%s'"%action)
	if action == 'mask': return string.replace(data,'\\',ESCCHAR)
	else:                return string.replace(data,ESCCHAR,'\\')

def maskEscapeChar(data):
	"Replace any Escape Char \ with a text mask (Input: str or list)"
	if type(data) == type([]):
		return map(lambda x: EscapeCharHandler('mask', x), data)
	return EscapeCharHandler('mask',data)

def unmaskEscapeChar(data):
	"Undo the Escape char \ masking (Input: str or list)"
	if type(data) == type([]):
		return map(lambda x: EscapeCharHandler('unmask', x), data)
	return EscapeCharHandler('unmask',data)

def addLineBreaks(list):
	ret = []
	for line in list:
		line = re.sub('[\n\r]+$','',line)    # del line break
		line = string.replace(line,'\n',LB)  # embedded \n's
		ret.append(line+LB)                  # add final line break
	return ret

def doCloseList(howmany=None):
	global listindent
	if not howmany: howmany = len(listindent)
	for i in range(howmany): del listindent[-1]

def finish_him(outlist, outfile):
	if not FIXES:
		print "%s is ok - nothing to fix" % outfile
		return
	if not DRYRUN:
		CopyFile(outfile, outfile+'.old')           # save backup
		outlist = addLineBreaks(unmaskEscapeChar(outlist))
		#for line in outlist: print line,            # 4debug
		Savefile(outfile, outlist)
		print 'Rewritten %s (%d fixes) - backup saved as %s.old'%(
			outfile, FIXES, outfile)
	else:
		print 'File %s has %d fixes to be made'%(outfile, FIXES)

def fix_me_please(infiles):
	global FIXES
	for infile in infiles:
		prop = Proprierties(infile)
		head = prop.dump_headers() or []
		conf = prop.conflines or []
		body = fix_body(prop.bodylines, prop.arearef[-1])
		outlist = head + conf + body
#		finish_him(outlist, "new.t2t")
		finish_him(outlist, infile)
		FIXES = 0

def fix_body(bodylines, firstlinenr=1):
	global regex, listindent
	regex = getRegexes()
	linkmask  = '@@_link_@@'
	monomask  = '@@_mono_@@'
	macromask = '@@_macro_@@'
	rawmask   = '@@_raw_@@'
	boldmask  = '@@_bold_@@'
	italicmask= '@@_italic_@@'
	undermask = '@@_under_@@'
	f_tt = 0
	listindent = []
	f_lastwasblank = 0
	
	# let's mark it up!
	linenr = firstlinenr-1
	lineref = -1
	while lineref < len(bodylines)-1:
		lineref = lineref + 1
		linkbank = []
		monobank = []
		macrobank = []
		rawbank = []
		boldbank = []
		italicbank = []
		underbank = []
		
		untouchedline = bodylines[lineref]
		line = re.sub('[\n\r]+$','',untouchedline)   # del line break
		line = maskEscapeChar(line)                  # protect \ char
		linenr = linenr +1
		Debug('LINE %04d: %s'%(linenr,repr(line)), 1)  # heavy debug
		
		#---------------------[ PRE formatted ]----------------------
		
		# we're already on a PRE area
		if f_tt:
			# closing PRE
			if regex['areaPreClose'].search(line):
				line = re.sub(regex['areaPreClose'], NEW_VERB_AREA, line)
				bodylines[lineref] = line
				Verbose("Old --- mark changed to ``` to Close Verbatim Area",
				        linenr)
				f_tt = 0
				continue
			
			# normal pre line
			continue
		
		# detecting PRE area init
		if regex['areaPreOpen'].search(line):
			line = re.sub(regex['areaPreOpen'], NEW_VERB_AREA, line)
			bodylines[lineref] = line
			Verbose("Old --- mark changed to ``` to Open Verbatim Area",
			        linenr)
			f_lastwasblank = 0
			f_tt = 1
			continue
		
		# one line PRE-formatted text
		if regex['1linePre'].search(line):
			line = regex['1linePre'].sub(NEW_VERB_1LINE,line)
			bodylines[lineref] = line
			Verbose("Old --- mark changed to ``` to define 1 Line"
			        " Verbatim", linenr)
			f_lastwasblank = 0
			continue
		
		#---------------------[ blank lines ]-----------------------
		
		if regex['blankline'].search(line):
			
			# closing all open lists
			if f_lastwasblank and listindent: doCloseList()
			
			f_lastwasblank = 1
			continue
		
		#---------------------[ special ]------------------------
		
		if regex['special'].search(line):
			
			cfg = ParseConfig(line, 'include')
			if not cfg: continue  # plain comment
			newval = '' ;
			newtarget = '' ;
			val, targ = cfg['value'], cfg['target']
			if   val[0] == val[-1] == '`': newval = "`%s`" % val
			elif val[0] == val[-1] == "'": newval = "'%s'" % val
			if newval:
				if targ and targ != 'all':
					newtarget = '(%s)'%targ
				line = "%%!include%s: %s"%(newtarget, newval)
				bodylines[lineref] = line
				Verbose("The %!include command has now DOUBLE"
				        " marks", linenr)
			continue
		
		#---------------------[ comments ]-----------------------
		
		# just skip them (if not macro or config)
		if regex['comment'].search(line) and not \
		   regex['date'].match(line):
			continue
		f_lastwasblank = 0       # reset blank status
		
		#---------------------[ Title ]-----------------------
		
		if (regex['title'].search(line) or
		    regex['numtitle'].search(line)) and not listindent:
			continue
		
		#---------------------[ apply masks ]-----------------------
		### protect important structures from escaping and formatting
		
		# protect raw text
		while regex['raw'].search(line):
			txt = regex['raw'].search(line).group(1); rawbank.append(txt)
			line = regex['raw'].sub(rawmask,line,1)
		# protect pre-formatted font text
		while regex['fontMono'].search(line):
			txt = regex['fontMono'].search(line).group(1); monobank.append(txt)
			line = regex['fontMono'].sub(monomask,line,1)
		# protect macros
		while regex['macro'].search(line):
			txt = regex['macro'].search(line).group(); macrobank.append(txt)
			line = regex['macro'].sub(macromask,line,1)
		# protect URLs and emails
		while regex['linkmark'].search(line) or regex['link'].search(line):
			# try to match plain or named links
			match_link  = regex['link'].search(line)
			match_named = regex['linkmark'].search(line)
			# define the current match
			if match_link and match_named:
				# both types found, which is the first?
				m = match_link
				if match_named.start() < match_link.start():
					m = match_named
			else:
				# just one type found, we're fine
				m = match_link or match_named
			# extract link data and apply mask
			if m == match_link:              # plain link
				label = ''
				link  = m.group()
				line  = regex['link'].sub(linkmask,line,1)
			else:                            # named link
				label = string.rstrip(m.group('label'))
				link  = m.group('link')
				line  = regex['linkmark'].sub(linkmask,line,1)
			# save link data to the link bank
			linkbank.append((label, link))
		# protect bold
		while regex['fontBold'].search(line):
			txt = regex['fontBold'].search(line).group(1)
			boldbank.append(txt)
			line = regex['fontBold'].sub(boldmask,line,1)
		# protect italic
		while regex['fontItalic'].search(line):
			txt = regex['fontItalic'].search(line).group(2)
			italicbank.append(txt)
			line=regex['fontItalic'].sub('\\1%s'%italicmask,line,1)
		# protect underline
		while regex['fontUnderline'].search(line):
			txt = regex['fontUnderline'].search(line).group(1)
			underbank.append(txt)
			line = regex['fontUnderline'].sub(undermask,line,1)
		
		
		#---------------------[ Horizontal Bar ]--------------------
		
		if regex['bar'].search(line): continue
		
		#---------------------[ Quote ]-----------------------
		#---------------------[ Lists ]-----------------------
		
		if (regex['list'].search(line) or regex['deflist'].search(line)):
			
			if regex['list'].search(line): rgx = regex['list']
			else: rgx = regex['deflist']
			m = rgx.search(line)
			listitemindent = m.group(1)
			listtype = m.group(2)
			if listtype == '=':
				listdefterm = m.group(3)
				rest = string.strip(regex['deflist'].sub('',line))
				line = listitemindent + NEW_DEFLIST_ID + listdefterm
				Verbose("Old = DEFTERM: mark changed to "
				        ": DEFTERM in Definition List",
				        linenr)
				if rest:
					line = line + '\n' + rest
					Verbose("Oneliners on deflist are not"
					        " allowed anymore", linenr)
			# new sublist
			if not listindent or len(listitemindent) > len(listindent[-1]):
				listindent.append(listitemindent)
			# closing sublists
			while len(listitemindent) < len(listindent[-1]):
				doCloseList(1)
		
		#---------------------[ Table ]-----------------------
		#---------------------[ Beauti ]-----------------------
		
		b_msg = "The %s inline marks must be GLUED with its contents"
		for txt in underbank:
			if string.strip(txt) != txt:
				txt = string.strip(txt)
				Verbose(b_msg%'under', linenr)
			line = string.replace(line,undermask,"__%s__"%txt,1)
		for txt in italicbank:
			if string.strip(txt) != txt:
				txt = string.strip(txt)
				Verbose(b_msg%'italic', linenr)
			line = string.replace(line,italicmask,"//%s//"%txt,1)
		for txt in boldbank:
			if string.strip(txt) != txt:
				txt = string.strip(txt)
				Verbose(b_msg%'bold', linenr)
			line = string.replace(line,boldmask,"**%s**"%txt,1)
		
		if regex['fontBolditalic'].search(line):
			line = regex['fontBolditalic'].sub(r'**//\1//**', line)
			Verbose("Old */bolditalic/* mark is dead, "
			        "now using **//bold+italic//**", linenr)
		
		#---------------------[ URL & E-mail ]-----------------------
		
		for label,url in linkbank:
			if not label: link = url                # plain
			else: link = "[%s %s]" % (label, url)   # named
			line = string.replace(line, linkmask, link, 1)
		
		#---------------------[ Image ]-----------------------
		
		if regex['img'].match(line):
			line = "%s%s%s" % (' '*10, string.strip(line), ' '*10)
			Verbose("The centered [image] now must be enclosed"
			        " by spaces", linenr)
		
		#---------------------[ Expand Macros ]-----------------------
		
		if macrobank:
			for macro in macrobank:
				line = string.replace(line, macromask, macro,1)
		
		#---------------------[ Expand PREs ]-----------------------
		
		for mono in monobank:
			open = close = NEW_INLINE_VERB
			mono = string.strip(mono)
			line = string.replace(line,monomask,open+mono+close,1)
			Verbose("Old `verb` inline mark changed to ``verb``",
			        linenr)
		
		#---------------------[ Expand raw ]-----------------------
		
		for raw in rawbank:
			open = close = NEW_INLINE_RAW
			raw = string.strip(raw)
			line = string.replace(line,rawmask,open+raw+close,1)
			Verbose("Old ``raw`` inline mark changed to "
			        '""raw""', linenr)
		
		#---------------------[ Final Escapes ]-----------------------
		
		bodylines[lineref] = line
	
	return bodylines


################################################################################
################################################################################


if __name__ == '__main__':

	if sys.argv.count('-d'):
		DEBUG   = 1 ; sys.argv.remove('-d')
	if sys.argv.count('-v'):
		VERBOSE = 1 ; sys.argv.remove('-v')
	if sys.argv.count('-n'):
		DRYRUN = 1 ; sys.argv.remove('-n')
	
	Debug("system platform: %s"%sys.platform,1)
	
	fix_me_please(sys.argv[1:])
	sys.exit(0)

# vim: ts=4
