#!/usr/local/bin/python2.7
#
# Run doclifter against an entire manual tree.
# Sees all files in section 1 through 8 by default.

import sys, os, getopt, signal, time, re, commands, cStringIO, stat
import hotshot, hotshot.stats

mandir = "/usr/share/man"
patchdir = os.path.abspath("prepatch")
outdir = None
patched = 0
makehtml = False
xslfragment = None
processed = set([])
excluded_files = []

def manfile(section, basename=""):
    "Return a manual file or directory based on section name."
    if not basename:
        # Return the directory
        return "%s/man%s/" % (mandir, section)
    elif basename[0] == '/':
        return basename
    elif basename.endswith(".gz") or basename.endswith(".bz2") or basename.endswith(".Z"):
        # We've been handed an actual filename
        return "%s/man%s/%s" % (mandir, section, basename)
    else:
        # We've been handed a filename section
        return "%s/man%s/%s.%s.gz" % (mandir, section[:1], basename, section)

def analyze_manpage(manpage):
    "Provide log annotations based on content."
    exclusions = (
        ("<html>", "This page is HTML"),
        ("auto-generated by docbook2man-spec", "DocBook"),
        ("automatically generated by docbook2man",  "DocBook"),
        ("Generated by db2man.xsl", "XML DocBook"),
        ("Automatically generated by Pod::Man", "Pod::Man"),
        ("Man page generated from reStructeredText", "reStructuredText"),
        ("Man page generated from reStructuredText", "reStructuredText"),
        ("Generator: DocBook XSL Stylesheets", "DocBook stylesheets"),
        ("Generated by docutils manpage writer", "docutils"),
        ("DocBook SGML with docbook-to-man", "DocBook SGML"),
        ("Doxygen", "Doxygen"),
        )
    output = ""
    fp = open(manpage)
    text = fp.read()
    for (pattern, generator) in exclusions:
        if text.find(pattern) > -1:
            output += "Generated from %s\n" % generator
    fp.close()
    return output

def fetch_page(file, localcopy, patch):
    "Grab a local copy of a man page, patching if needed."
    output = ""
    if file[-3:] == ".gz":
        cstat = os.system("gunzip <%s >%s" % (file, localcopy))
    elif file[-4:] == ".bz2":
        cstat = os.system("bunzip2 <%s >%s" % (file, localcopy))
    elif file[-2:] == ".Z":
        cstat = os.system("uncompress <%s >%s" % (file, localcopy))
    else:
        cstat = os.system("cp %s %s" % (file, localcopy))
    if os.WIFSIGNALED(cstat) or os.WEXITSTATUS(cstat):
        return (1, output + "manlifter: copy failed, status %d", cstat)
    if os.path.exists(patch):
        here = os.getcwd()
        os.chdir(outdir)
        patch = commands.getoutput("patch <%s" % (patch,))
        stem = os.path.basename(localcopy)
        os.system("rm -f %s.orig %s.rej" % (stem, stem))
        os.chdir(here)
        if patch:
            output += patch + "\n"
    return (0, output)

def getstem(file):
    "Reduce the name of a man page or generated HTML file to its stem"
    if file.endswith(".xml"):
        file = file[:-4]
    file = ".".join(file.split(".")[:-1])	# Remove section 
    return file

def make_xml(source, options):
    "Make XML from specified man page."
    (doclifter_status, output) = commands.getstatusoutput("doclifter -I %s %s %s" % (mandir, options, source))
    if output:
        output += "\n"
    if os.WIFEXITED(doclifter_status):
        doclifter_status = os.WEXITSTATUS(doclifter_status)
    else:
        # Should never happen
        raise ValueError
    lxmlloc = None
    if doclifter_status == 2:
        fp = open(source)
        contents = fp.read()
        inclusions = re.compile(r"\.so\s+(.*)").search(contents)
        fp.close()
        if inclusions:
            lxmlloc = os.path.join(outdir, getstem(inclusions.group(1)) + ".xml")
        return(2, lxmlloc, output)
    return (doclifter_status, None, output)

def validate(translation):
    "Validate an XML file produced by translation."
    output = ""
    # If it has entity inclusions it won't validate, so don't try.
    # This is only a good idea because man pages that have these are
    # usually trivial wrappers like builtins.1
    try:
        fp = open(translation)
        text = fp.read()
        inclusions = re.compile("<!ENTITY.*SYSTEM '(.*)'>").search(text)
        equation = "<equation" in text 
        fp.close()
        if inclusions:
            output += "Won't validate due to entity inclusion of %s\n" % inclusions.group(1)
            return (0, output)
        if equation:
            output += "Won't validate due to MathML inclusions\n"
            return (0, output)
    except IOError:
        output += "%s is missing.\n" % translation
    # Run the validation checker
    (bstat, validate_out) = commands.getstatusoutput("xmllint --xinclude --postvalid %s >/dev/null" % translation)
    if validate_out:
        output += validate_out  + "\n"
    if os.WIFSIGNALED(bstat):
        output += "Bailing out of xmllint...\n"
        return (-1, output)
    xmllint_error_status = os.WEXITSTATUS(bstat)
    if xmllint_error_status:
        output += "xmllint error status:%s\n" % os.WEXITSTATUS(bstat)
    if xmllint_error_status:
        return (6, output)
    return (0, output)

def format(translation, fmt, xslfragment):
    "Format an XML file to a specified format."
    output = ""
    here = os.getcwd()
    os.chdir(os.path.dirname(translation))
    if xslfragment:
        command = "xmlto %s %s" % (fmt, os.path.basename(translation))
    else:
        command = "xmlto -m %s %s %s" % (xslfragment, fmt, os.path.basename(translation))
    (bstat, format_out) = commands.getstatusoutput(command)
    os.chdir(here)
    if format_out:
        output += format_out  + "\n"
    if os.WIFSIGNALED(bstat):
        output += "Bailing out of %s formatting...\n" % fmt
        return (-1, output)
    format_error_status = os.WEXITSTATUS(bstat)
    if format_error_status:
        output += "format error status:%s\n" % os.WEXITSTATUS(bstat)
    if format_error_status:
        return (6, output)
    return (0, output)

def deploy(source, target):
    try:
        os.rename(source, target)
    except OSError, e:
        return(3, "Rename of %s to %s failed, errno = %d" % (source, target, e.errno,))
    return (0, "")

def makelink(source, target):
    try:
        os.symlink(os.path.abspath(source), os.path.abspath(target))
    except OSError:
        pass

def singlerun(file, options, tmpstem="foo"+`os.getpid()`, batchmode=False):
    "Test-format a single file."
    global patched
    foundpatch = False
    if not os.path.exists(file):
        return (0, False, "")
    output = ""
    if file[-3:] == ".gz":
        withsect = os.path.basename(file)[:-3]
    elif file[-4:] == ".bz2":
        withsect = os.path.basename(file)[:-4]
    elif file[-2:] == ".Z":
        withsect = os.path.basename(file)[:-2]
    else:
        withsect = os.path.basename(file)
    dot = withsect.rindex(".")
    section = withsect[dot+1:dot+2]
    subdir = os.path.join(outdir, "man" + section)
    stem = getstem(withsect)
    xmlloc = os.path.join(subdir, stem + ".xml")
    # Count patches here so our stats won't be off
    patch = os.path.join(patchdir, withsect + ".patch")
    if os.path.exists(patch):
        patched += 1
        foundpatch = True
    try:
        global processed
        tmpstem = os.path.join(outdir, tmpstem)
        source = tmpstem + ".man"
        # Grab the actual manual page
        localcopy = os.path.join(outdir, withsect)
        (status, output) = fetch_page(file, localcopy, patch)
        if (status):
            return (status, False, output)
        # Save work by doing conversions only as needed
        analysis = analyze_manpage(localcopy)
        rebuild_xml = True
        if batchmode and os.path.exists(xmlloc):
            if os.stat(file).st_mtime < os.lstat(xmlloc).st_mtime:
                output += "XML conversion is up to date.\n"
                processed.discard(withsect)
                rebuild_xml = False
        if batchmode and "DocBook" in analysis:
            output += "Made from DocBook masters.\n"
            processed.discard(withsect)
            return (7, False, output)
        if batchmode and "Doxygen" in analysis:
            output += "Made by Doxygen.\n"
            processed.discard(withsect)
            return (7, False, output)
        htmlloc = os.path.join(subdir, stem + ".html")
        if rebuild_xml:
            # Note the the patch was used
            processed.discard(withsect)
            # Add any annotations
            output += analysis
            # Save the location of the page
            loc = tmpstem + ".loc"
            lfp = open(loc, "w")
            lfp.write(withsect)
            lfp.close()
            # Move the source file into the output directory
            os.rename(localcopy, source) 
            # Run the translator
            (doclifter_status, lxmlloc, note) = make_xml(source, options)
            output += note
            if doclifter_status not in (0, 2):
                if not batchmode:
                    output +=  "doclifter error status: %s\n" % doclifter_status
                return (doclifter_status, foundpatch, output)
            translation = tmpstem + ".man.xml"
            # Warn about FIX-ME problems
            output += commands.getoutput("grep FIX-ME " + translation + " 2>/dev/null")
            # If the translation went through, cleaning up consists
            # of putting this in its permanent location.
            try:
                # This will foo up if we ever have to symlink between dirs
                if batchmode and not os.path.exists(subdir):
                    os.mkdir(subdir)
            except OSError, e:
                return(3, foundpatch, output + "Creation of %s failed, errno = %d\n"%(subdir,e.errno))
            if doclifter_status == 2:
                makelink(lxmlloc, xmlloc)
            if doclifter_status == 0:
                if not makehtml:
                    (status, more) = validate(translation)
                    output += more
                    if batchmode and status:
                        os.remove(translation)
                        try:
                            os.remove(htmlloc)
                        except OSError:
                            pass
                        return (status, foundpatch, output)
                if batchmode:
                    (status, more) = deploy(translation, xmlloc)
                    translation = xmlloc
                    output += more
                    if status:
                        return (status, foundpatch, output)
        # Save work by doing HTML conversions only as needed
        rebuild_html = makehtml
        if batchmode and os.path.exists(htmlloc):
            if os.stat(xmlloc).st_mtime < os.lstat(htmlloc).st_mtime:
                output += "HTML conversion is up to date\n"
                rebuild_html = False
        if rebuild_html:
            if batchmode:
                htmlloc = os.path.join(subdir, stem + ".html")
            else:
                htmlloc = stem + ".html"
            if batchmode and stat.S_ISLNK(os.lstat(xmlloc).st_mode):
                makelink(os.readlink(xmlloc)[:-4]+".html", htmlloc)
            else:
                (status, more) = format(translation, "xhtml-nochunks", xslfragment)
                output += more
                if status:
                    if batchmode:
                        os.remove(xmlloc)
                        try:
                            os.remove(htmlloc)
                        except OSError:
                            pass
                    return (status, foundpatch, output)
    finally:
        # Clean up
        if batchmode:
            if os.path.exists(source):
                os.remove(source)
    return (0, foundpatch, output)

def sectionfiles(sections):
    "Generate files corresponding to a list of sections."
    files = []
    for section in sections:
        files = files + map(lambda f: manfile(section, f), os.listdir(manfile(section)))
    files.sort()
    return files

total = eligible = starttime = 0

def report_elapsed(elapsed):
    "Report elapsed time in friendly format."
    return "%02dh:%02dm:%02ds" % (elapsed/3600, (elapsed % 3600)/60, elapsed % 60)

def massrun(files, options, profiling):
    "Test against all files in specified sections."
    def bailout(signum, frame):
        print "\nBailing out with signal %d..." % signum
        os.system("rm -f doclifter_test%s.py doclifter_test%s.py[co]" % (os.getpid(), os.getpid()))
        sys.exit(0)
    global total, eligible, starttime
    total = 0
    starttime = int(time.time())
    eligible = len(files)
    doclifter_error_count = xmllint_error_count = docbook_count = total = 0
    def report(sig, frame, out=sys.stderr):
        ftotal = float(total)
        elapsed = int(time.time()) - starttime
        out.write("\n%%%d of %d files in %s, %d OK, %d preconverted, %d patched, %d doclifter errors, %d validation failures, %2.2f%% good.\n" % \
              (total, eligible, report_elapsed(elapsed),
               (total - doclifter_error_count - xmllint_error_count),
               docbook_count,
               patched,
               doclifter_error_count, 
               xmllint_error_count,
               (ftotal-doclifter_error_count-xmllint_error_count-patched)*100.0/ftotal))
    def test(file, options):
        before = time.time()
        (status, patched, output) = singlerun(file=file, options=options, batchmode=True)
        after = time.time()
        sys.stdout.write("! %s=%d%s (%2.2f)\n%s\n" % (file, status, " *"[patched], after-before, output))
        return (status, output)
    signal.signal(signal.SIGUSR2, report)
    signal.signal(signal.SIGHUP, bailout)
    signal.signal(signal.SIGINT, bailout)
    signal.signal(signal.SIGTERM, bailout)
    print "%Test started", time.ctime()
    if profiling:
        print "%Profiling enabled.\n"
    else:
        print "%Profiling not enabled.\n"
    try:
        for file in files:
            if file in excluded_files:
                continue
            (status, output) = test(file=file, options=options)
            if status == -1:
                break
            elif status in (1, 4):	# Doclifter parse or internal error.
                doclifter_error_count += 1
            elif status == 2:		# .so inclusion
                pass
            elif status in (3, 5):	# File I/O error or keyboard interrupt
                pass
            elif status == 6:		# Validation failure
                xmllint_error_count += 1
            elif status == 7:
                docbook_count += 1
            total = total + 1
    except KeyboardInterrupt:
        pass
    report(0, sys.stdout)

htmlheader = '''
<?xml version="1.0" encoding="ISO-8859-1"?>
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
    "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<html xmlns="http://www.w3.org/1999/xhtml">
<head>
<title>Manlifter contents page</title>
</head>
<body>
'''
htmltrailer = "</body>\n</html>\n"

def genindex(ofp):
    # Collect all section/name/description triples
    filelist = []
    section_re = re.compile("/man([^/]*)")
    extract_re = re.compile("<refpurpose>([^<]*)</refpurpose>")
    section_dict = {}
    for (root, dirs, files) in os.walk('xmlman'):
        for file in files:
            try:
                if not file.endswith(".xml"): continue
                # Extract the manual section
                m = section_re.search(root)
                if m:
                    section = m.group(1)
                else:
                    continue
                section_dict[section] = []
                # Extract the manual page name
                name = ".".join(file.split(".")[:-1])
                # Extract the description
                file = os.path.join(root, file)
                fp = open(file)
                contents = fp.read()
                fp.close()
                m = extract_re.search(contents)
                if m:
                    description = m.group(1)
                else:
                    description = "(no description)"
                # Build an index entry
                filelist.append((section, name, description))
            except IOError:
                pass
    filelist.sort()	# In case the directory was pieced together by several runs
    for (section, name, description) in filelist:
        section_dict[section].append((name, description))
    keys = section_dict.keys()
    keys.sort()

    for section in keys:
        ofp.write(htmlheader)
        ofp.write("<h1>%s:</h1>\n<dl>\n" % section)
        for (name, description) in section_dict[section]:
            ofp.write("<dt><a href='man%s/%s.html'>%s</a></dt><dd>%s</dd>\n" \
                  % (section, name, name, description))
        ofp.write("</dl>\n")
        ofp.write(htmltrailer)

def statistics():
    legends = (
        "OK ",	# No error
        "???",	# Unliftable (normal error status)
        ".so",	# failure due to inclusion
        "I/O",	# I/O failure, could not reach page
        "!!!",	# Internal error, doclifter blew up
        "^C ",	# Translation interrupted
        "XML",	# XML validation failure
        "NOP",	# Already in DocBook
        )
    counts = [0] * len(legends)

    patchcount = re.compile("([0-9]+) patched")
    warnings = 0
    warn_latch = False
    while True:
        line = sys.stdin.readline()
        if not line:
            break
        elif not line.strip():
            if warn_latch:
                warnings += 1
            continue
        m = patchcount.search(line)
        if m:
            patched = int(m.group(1))
        if "warning -" in line:
            warn_latch = True
        if line[0] != '!':
            continue
        warn_latch = False
        line = line[2:]
        rcolon = line.rindex("=")
        file = line[:rcolon]
        retval = line[rcolon+1:].split()[0]
        if retval.endswith("*"):
            retval = retval[:-1]
        if file.endswith(".gz"):
            file = file[:-3]
        elif file.endswith(".bz2"):
            file = file[:-4]
        elif file.endswith(".Z"):
            file = file[:-2]
        file = os.path.basename(file)
        counts[int(retval)] += 1

    total = sum(counts)
    for (i, count) in enumerate(counts):
        print "%d = %s: %5d	%2.2f%%" % (i, legends[i], count, (count * 1.0)*100/total)
    good = counts[0]
    bad = sum(counts[1:7])
    print "Total: %d  Errors: %d  Warnings: %d" % (total, bad, warnings) 
    print "Patched: %d (%2.2f%%)" % (patched, patched*100/float(total))
    print "With patches: %d (%2.2f%%)" % (good, good*100/float(total))
    print "Without patches: %d (%2.2f%%)" % (good-patched, (good-patched)*100/float(total))

def errorclean(error_only, pattern):
    if pattern:
        pattern = re.compile(pattern)
    pagename = re.compile(r"! (.*)=([0-9]+)")
    while 1:
        header = sys.stdin.readline()
        if not header:
            break
        # Look for a log leader
        m = pagename.search(header)
        if not m:
           continue 
        subject = m.group(1)
        status = int(m.group(2))
        # Collect following error messages up to a blank line
        trailer = ''
        while 1:
            line = sys.stdin.readline()
            trailer += line
            if not line or not line.strip():
                break
        if pattern:
            # Emit by pattern
            if pattern.search(trailer):
                sys.stdout.write(subject+"\n")
        else:
            # Emit some of them by status
            def matches(s): return trailer.find(s) > -1
            if status == 0 and not matches("warning") and not matches("FAILED") and not matches("offset"):
                continue
            if status == 1 and (matches("page is empty") or matches("page has no text")):
                continue
            if status in (2, 7):
                continue
            # Otherwise, emit
            if error_only:
                print subject
            else:
                sys.stdout.write(header + trailer)

def patchman(stem="foobar"):
    "Make a patch against the last page lifted."
    if not os.path.exists(stem + ".man"):
        sys.stderr.write("manlifter: no trial page waiting.\n")
        raise SystemExit, 1
    if not os.path.exists(stem + ".loc"):
        sys.stderr.write("manlifter: no saved page location.\n")
        raise SystemExit, 1
    # Retrieve the location of the last page
    lfp = open(stem + ".loc")
    withsect = lfp.read()
    lfp.close()
    # Fail if patch already exists
    patch = os.path.join(patchdir, withsect + ".patch")
    if os.path.exists(patch):
        sys.stderr.write("manlifter: patch for %s already exists.\n" % withsect)
        raise SystemExit, 1
    # Make copies for patching and do it
    trialpage = stem + ".man"
    unpatched = withsect + "-unpatched"
    try:
        os.system("cp %s %s" % (trialpage, unpatched))
        os.system("cp %s %s" % (trialpage, withsect))
        if os.system(os.getenv("EDITOR") + " " + withsect) == 0:
            os.system("diff -u %s %s >%s" % (unpatched, withsect, patch))
    finally:
        os.system("rm -f %s %s %s" % (withsect, withsect + "~", unpatched))

citereftemplate = '''
<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
                version="1.0">

<xsl:param name="citerefentry.link" select="1"/>

<xsl:template name="generate.citerefentry.link">
  <xsl:text>%s</xsl:text>
  <xsl:text>/man</xsl:text>
  <xsl:value-of select="manvolnum"/>
  <xsl:text>/</xsl:text>
  <xsl:value-of select="refentrytitle"/>
  <xsl:text>.html</xsl:text>
</xsl:template>

</xsl:stylesheet>
'''

def doclifter_driver(options, arguments):
    "Lift old markup to new."
    global mandir, makehtml, outdir, xslfragment, patchdir, makepatch, excluded_files
    filelist = []
    sections = []
    callopts = ""
    patchlift = False
    makehtml = False
    errorfilter = False
    quiet = False
    fval = None
    makepatch = False
    profiling = False
    excluded_files = []
    for (switch, val) in options:
        if (switch == '-d'):
            callopts += " -d " + val
        elif (switch == '-e'):
            errorfilter = True
	elif (switch == '-f'):	# Translate files in the specified list
            fval = val
        elif (switch == '-h'):
            makehtml = True
        elif (switch == '-I'):	# Specify the root of the manual hierarchy
            mandir = val
        elif (switch == '-m'):	# Make a patch from the last fetched page
            makepatch = True
        elif (switch == '-M'):	# Make a patch with specified page
            patchlift = True
	elif (switch == '-p'):	# Specify patch directory
	    patchdir = os.path.abspath(val)
        elif (switch == '-P'):
            profiling = True
	elif (switch in ("-q", '-v', '-w')):	# Set verbosity level
            quiet = True
	    callopts += " " + switch
	elif (switch == '-s'):	# Specify search list of sections
	    sections.append(val)
	elif (switch == '-S'):	# Generate statistics from log on stdin
	    statistics()
            sys.exit(0)
        elif (switch == '-X'):
            excluded_files = open(val).read().split()
    if not sections:
        sections = ["1", "2", "3", "4", "5", "6", "7", "8"]
    if not outdir:
        if not arguments:
            outdir = 'xmlman'
        else:
            outdir = '.'
    # Clean/create the output directory
    if not arguments:
        if not os.path.exists(outdir):
            os.mkdir(outdir)
            # Create XSL fragment for making refentries into links
            xslfragment = os.path.abspath(os.path.join(outdir, "citerefentry.xsl"))
            fp = open(xslfragment, "w")
            fp.write(citereftemplate % outdir)
            fp.close()
    try:
        # Process args, if present
        if arguments: 
            for file in arguments:
                for section in sections:
                    manpage = manfile(section, file)
                    print "Trying", manpage
                    if os.path.exists(manpage):
                        (status, patched, output) = singlerun(manpage, callopts, "foobar", batchmode=False)
                        print output
                        break
                if patchlift:
                    patchman()
        elif makepatch:
            patchman()
        elif errorfilter:
            errorclean(quiet, fval)
        elif fval:
            fp = open(fval)
            filelist = map(lambda x: x.rstrip(), fp.readlines())
            fp.close()
            massrun(filelist, callopts, profiling)
        else:
            global processed 
            processed = set([])
            if os.path.exists(patchdir):
                processed = set(map(lambda x: x.replace(".patch", "").replace(".correction", ""), os.listdir(patchdir)))
            massrun(sectionfiles(sections), callopts, profiling)
            if processed:
                print "%% %d patches not used:" % len(processed)
                for file in processed:
                    print file
    finally:
        pass
        #os.remove(xslfragment)
    # Now, rebuild the index page
    if makehtml:
        fp = open(os.path.join(outdir, "index.html"), "w")
        genindex(fp)
        fp.close()

if __name__ == "__main__":
    # Find a copy of doclifter
    for pathdir in ["."] + os.environ["PATH"].split(":"):
        where = os.path.join(pathdir, "doclifter")
        if os.path.exists(where):
            break
    else:
        sys.stderr.write("manlifter: can't find doclifter!\n")
        sys.exit(1)
    # Gather options
    (options, arguments) = getopt.getopt(sys.argv[1:], "d:ef:hI:mMp:Pqs:SvwX:")
    # Do the real work
    if "-P" in sys.argv:
        prof = hotshot.Profile("manlifter.prof")
        prof.runcall(doclifter_driver, options, arguments)
        prof.close()
        starttime = time.time()
        print "% Digesting profile results...",
        stats = hotshot.stats.load("manlifter.prof")
        stats.sort_stats('time', 'calls')
        print "took %s." % report_elapsed(time.time() - starttime)
        stats.print_stats(30)
    else:
        doclifter_driver(options, arguments)
# End
