#!/bin/sh
# yelp-check
# Copyright (C) 2011 Shaun McCance <shaunm@gnome.org>
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.

xsl_mal_link='/usr/local/share/yelp-xsl/xslt/mallard/common/mal-link.xsl'
xsl_mal_status='/usr/local/share/yelp-tools/xslt/mal-status.xsl'
xsl_mal_rng='/usr/local/share/yelp-tools/xslt/mal-rng.xsl'
xsl_comments='/usr/local/share/yelp-tools/xslt/comments.xsl'

urlencode () {
    # We usually don't want to urlencode slashes, because we're
    # usually converting file paths to URIs. But we do want to
    # urlencode slases for names of RNG files in validate_page.
    if [ "x$2" = "x/" ]; then
        urlencode_slash=''
    else
        urlencode_slash='\/'
    fi
    echo "$1" | LANG=C awk '
BEGIN {
  for (i = 1; i <= 255; i++) chars[sprintf("%c", i)] = i;
}
{
  ret = "";
  for (i = 1; i <= length($0); i++) {
    c = substr($0, i, 1);
    if (c ~ /['$urlencode_slash'a-zA-Z0-9._-]/)
      ret = ret c;
    else
      ret = ret sprintf("%%%X%X", int(chars[c] / 16), chars[c] % 16);
  }
  print ret;
}'
}

yelp_usage () {
    (
        echo "Usage: yelp-check <COMMAND> [OPTIONS] [FILES]"
        echo ""
        echo "Commands:"
        echo "  comments      Print the editorial comments in a document"
        echo "  hrefs         Find broken external links in a document"
        echo "  links         Find broken xref or linkend links in a document"
        echo "  orphans       Find orphaned pages in a Mallard document"
        echo "  status        Report the status of Mallard pages"
        echo "  validate      Validate files against a DTD or RNG"
    ) 1>&2
}
yelp_usage_hrefs () {
    (
        echo "Usage: yelp-check hrefs <FILES>"
        echo ""
        echo "  Find broken href links in FILES in a Mallard document, or"
        echo "  broken ulink or XLink links in FILES in a DocBook document."
    ) 1>&2
}
yelp_usage_links () {
    (
        echo "Usage: yelp-check links <FILES>"
        echo ""
        echo "  Find broken xref links in FILES in a Mallard document,"
        echo "  or broken linkend links in FILES in a DocBook document."
        echo ""
        echo "Options:"
        echo "  -c CACHE      Use the existing Mallard cache CACHE"
    ) 1>&2
}
yelp_usage_orphans () {
    (
        echo "Usage: yelp-check orphans <FILES>"
        echo ""
        echo "  Locate orphaned pages among FILES in a Mallard document."
        echo "  Orphaned pages are any pages that cannot be reached by"
        echo "  topic links alone from the index page."
        echo ""
        echo "Options:"
        echo "  -c CACHE      Use the existing Mallard cache CACHE"
    ) 1>&2
}
yelp_usage_comments () {
    (
        echo "Usage: yelp-check comments <FILES>"
        echo ""
        echo "  Print the editorial comments in the files FILES, using the"
        echo "  comment element in Mallard and the remark element in DocBook."
    ) 1>&2
}
yelp_usage_status () {
    (
        echo "Usage: yelp-check status <FILES>"
        echo ""
        echo "  Report the status of the Mallard page files FILES. Each"
        echo "  matching page is reporting along with its status."
        echo ""
        echo "Options:"
        echo "  --version VER       Select revisions with the version attribute VER"
        echo "  --docversion VER    Select revisions with the docversion attribute VER"
        echo "  --pkgversion VER    Select revisions with the pkgversion attribute VER"
        echo "  --older DATE        Only show pages older than DATE"
        echo "  --newer DATE        Only show pages newer than DATE"
        echo "  --only STATUSES     Only show pages whose status is in STATUSES"
        echo "  --except STATUSES   Exclude pages whose status is in STATUSES"
        echo "  --totals            Show total counts for each status"
        echo "For --only and --except, STATUSES is a space-separated list."
    ) 1>&2
}
yelp_usage_validate () {
    (
        echo "Usage: yelp-check validate <FILES>"
        echo ""
        echo "  Validate FILES against the appropriate DTD or RNG."
        echo "  For Mallard pages, perform automatic RNG merging"
        echo "  based on the version attribute."
        echo ""
        echo "Options:"
        echo "  --strict            Disallow unknown namespaces"
    ) 1>&2
}

if [ $# = 0 ]; then
    yelp_usage
    exit 1
fi

yelp_hrefs_db () {
    (
        echo '<xsl:stylesheet'
        echo ' xmlns:xsl="http://www.w3.org/1999/XSL/Transform"'
        echo ' xmlns:db="http://docbook.org/ns/docbook"'
        echo ' xmlns:xlink="www.w3.org/1999/xlink"'
        echo ' version="1.0">'
        echo '<xsl:output method="text"/>'
        echo '<xsl:template match="/">'
        echo '<xsl:for-each select="//ulink/@url | //*/xlink:href">'
        echo '<xsl:if test="not(starts-with(string(.), '\''mailto:'\''))">'
        echo '<xsl:value-of select="(ancestor-or-self::*/@id | ancestor-or-self::*/@xml:id)[last()]"/>'
        echo '<xsl:text> </xsl:text>'
        echo '<xsl:value-of select="string(.)"/>'
        echo '<xsl:text>&#x000A;</xsl:text>'
        echo '</xsl:if>'
        echo '</xsl:for-each>'
        echo '</xsl:template>'
        echo '</xsl:stylesheet>'
    ) | xsltproc --xinclude - "$1"
}

yelp_hrefs_page () {
    (
        echo '<xsl:stylesheet'
        echo ' xmlns:xsl="http://www.w3.org/1999/XSL/Transform"'
        echo ' xmlns:mal="http://projectmallard.org/1.0/"'
        echo ' version="1.0">'
        echo '<xsl:output method="text"/>'
        echo '<xsl:template match="/mal:page">'
        echo '<xsl:for-each select="//*[@href]">'
        echo '<xsl:if test="not(starts-with(@href, '\''mailto:'\''))">'
        echo '<xsl:value-of select="/mal:page/@id"/>'
        echo '<xsl:text> </xsl:text>'
        echo '<xsl:value-of select="@href"/>'
        echo '<xsl:text>&#x000A;</xsl:text>'
        echo '</xsl:if>'
        echo '</xsl:for-each>'
        echo '</xsl:template>'
        echo '</xsl:stylesheet>'
    ) | xsltproc --xinclude - "$1"
}

yelp_hrefs () {
    if [ "$#" = "0" -o "x$1" = "x--help" ]; then
        yelp_usage_hrefs
        exit 1
    fi
    for xml in "$@"; do
        if [ -d "$xml" ]; then
            for page in "$xml"/*.page; do
                yelp_hrefs_page "$page"
            done
        else
            ext=`echo "$1" | sed -e 's/.*\.//'`
            if [ "x$ext" = "xxml" -o "x$ext" = "xdocbook" ]; then
                yelp_hrefs_db "$xml"
            else
                yelp_hrefs_page "$xml"
            fi
        fi
    done | sort | uniq | while read id url; do
        (curl -s -I -L "$url" | grep '^HTTP/' | tail -n 1 | head -n 1 | grep -q 'HTTP/.\.. 200 .*') ||
        echo "$id: $url"
    done
}

yelp_links_db () {
    (
        echo '<xsl:stylesheet'
        echo ' xmlns:xsl="http://www.w3.org/1999/XSL/Transform"'
        echo ' xmlns:db="http://docbook.org/ns/docbook"'
        echo ' xmlns:exsl="http://exslt.org/common"'
        echo ' extension-element-prefixes="exsl"'
        echo ' version="1.0">'
        echo '<xsl:output method="text"/>'
        echo '<xsl:key name="idkey" match="*[@id or @xml:id]" use="@id | @xml:id"/>'
        echo '<xsl:template match="/">'
        echo ' <xsl:for-each select="//*[@linkend]">'
        echo '  <xsl:if test="not(key('"'idkey'"', @linkend))">'
        echo '   <xsl:value-of select="(ancestor-or-self::*/@id | ancestor-or-self::*/@xml:id)[last()]"/>'
        echo '   <xsl:text>: </xsl:text>'
        echo '   <xsl:value-of select="@linkend"/>'
        echo '   <xsl:text>&#x000A;</xsl:text>'
        echo '  </xsl:if>'
        echo ' </xsl:for-each>'
        echo '</xsl:template>'
        echo '</xsl:stylesheet>'
    ) | xsltproc --xinclude - "$1"
}

yelp_links_page () {
    (
        echo '<xsl:stylesheet'
        echo ' xmlns:xsl="http://www.w3.org/1999/XSL/Transform"'
        echo ' xmlns:mal="http://projectmallard.org/1.0/"'
        echo ' xmlns:exsl="http://exslt.org/common"'
        echo ' extension-element-prefixes="exsl"'
        echo ' version="1.0">'
        xsl='file://'`urlencode "$xsl_mal_link"`
        echo '<xsl:import href="'"$xsl"'"/>'
        check_cache_url='file://'`urlencode "$check_cache_file"`
        echo '<xsl:param name="mal.cache.file" select="'"'$check_cache_url'"'"/>'
        echo '<xsl:output method="text"/>'
        echo '<xsl:template match="/mal:page">'
        echo ' <xsl:variable name="page" select="@id"/>'
        echo ' <xsl:for-each select="//*[@xref]">'
        echo '  <xsl:variable name="xref" select="@xref"/>'
        echo '  <xsl:variable name="linkid">'
        echo '   <xsl:call-template name="mal.link.xref.linkid"/>'
        echo '  </xsl:variable>'
        echo '  <xsl:for-each select="$mal.cache">'
        echo '   <xsl:variable name="target" select="key('"'mal.cache.key'"', $linkid)"/>'
        echo '   <xsl:if test="count($target) = 0">'
        echo '    <xsl:value-of select="$page"/>'
        echo '    <xsl:text>: </xsl:text>'
        echo '    <xsl:value-of select="$xref"/>'
        echo '    <xsl:text>&#x000A;</xsl:text>'
        echo '   </xsl:if>'
        echo '  </xsl:for-each>'
        echo ' </xsl:for-each>'
        echo '</xsl:template>'
        echo '</xsl:stylesheet>'
    ) | xsltproc --xinclude - "$1"
}

yelp_links () {
    if [ "$#" = "0" -o "x$1" = "x--help" ]; then
        yelp_usage_links
        exit 1
    fi
    while [ "$#" != "0" ]; do
        case "$1" in
            "-c")
                shift
                check_cache_file="$1"
                shift
                ;;
            *)
                break
                ;;
        esac
    done
    if [ "$#" = "0" -o "x$1" = "x--help" ]; then
        yelp_usage_links
        exit 1
    fi
    if [ "x$check_cache_file" != "x" ]; then
        check_cache_file=`(cd $(dirname "$check_cache_file") && pwd)`/`basename "$check_cache_file"`
    fi
    check_out_file=`mktemp`
    for xml in "$@"; do
        if [ -d "$xml" ]; then
            if [ "x$check_cache_file" = "x" ]; then
                check_cache_file_is_tmp="yes"
                check_cache_file=`mktemp`
                yelp-build cache -o "$check_cache_file" "$@"
            fi
            for page in "$xml"/*.page; do
                yelp_links_page "$page"
            done
        else
            ext=`echo "$1" | sed -e 's/.*\.//'`
            if [ "x$ext" = "xxml" -o "x$ext" = "xdocbook" ]; then
                yelp_links_db "$xml"
            else
                if [ "x$check_cache_file" = "x" ]; then
                    check_cache_file_is_tmp="yes"
                    check_cache_file=`mktemp`
                    yelp-build cache -o "$check_cache_file" "$@"
                fi
                yelp_links_page "$xml"
            fi
        fi
    done > "$check_out_file"
    ret=`cat "$check_out_file" | wc -l`
    if test "x$ret" != "x0"; then ret="1"; fi
    cat "$check_out_file"
    if [ "x$check_cache_file_is_tmp" = "xyes" ]; then
        rm "$check_cache_file"
    fi
    rm "$check_out_file"
    exit $ret
}

yelp_orphans_page () {
    (
        echo '<xsl:stylesheet'
        echo ' xmlns:xsl="http://www.w3.org/1999/XSL/Transform"'
        echo ' xmlns:mal="http://projectmallard.org/1.0/"'
        echo ' xmlns:exsl="http://exslt.org/common"'
        echo ' extension-element-prefixes="exsl"'
        echo ' version="1.0">'
        xsl='file://'`urlencode "$xsl_mal_link"`
        echo '<xsl:import href="'"$xsl"'"/>'
        check_cache_url='file://'`urlencode "$check_cache_file"`
        echo '<xsl:param name="mal.cache.file" select="'"'$check_cache_url'"'"/>'
        echo '<xsl:output method="text"/>'
        echo '<xsl:template match="/mal:page">'
        echo ' <xsl:variable name="trails">'
        echo '  <xsl:call-template name="mal.link.linktrails"/>'
        echo ' </xsl:variable>'
        echo ' <xsl:if test="@id != '"'index'"' and count(exsl:node-set($trails)/*) = 0">'
        echo '  <xsl:value-of select="@id"/>'
        echo '  <xsl:text>&#x000A;</xsl:text>'
        echo ' </xsl:if>'
        echo '</xsl:template>'
        echo '</xsl:stylesheet>'
    ) | xsltproc --xinclude - "$1"
}

yelp_orphans () {
    if [ "$#" = "0" -o "x$1" = "x--help" ]; then
        yelp_usage_orphans
        exit 1
    fi
    while [ "$#" != "0" ]; do
        case "$1" in
            "-c")
                shift
                check_cache_file="$1"
                shift
                ;;
            *)
                break
                ;;
        esac
    done
    if [ "$#" = "0" -o "x$1" = "x--help" ]; then
        yelp_usage_orphans
        exit 1
    fi
    if [ "x$check_cache_file" != "x" ]; then
        check_cache_file=`(cd $(dirname "$check_cache_file") && pwd)`/`basename "$check_cache_file"`
    else
        check_cache_file_is_tmp="yes"
        check_cache_file=`mktemp`
        yelp-build cache -o "$check_cache_file" "$@"
    fi
    check_out_file=`mktemp`
    for xml in "$@"; do
        if [ -d "$xml" ]; then
            for page in "$xml"/*.page; do
                yelp_orphans_page "$page"
            done
        else
            yelp_orphans_page "$xml"
        fi
    done > "$check_out_file"
    ret=`cat "$check_out_file" | wc -l`
    if test "x$ret" != "x0"; then ret="1"; fi
    cat "$check_out_file"
    if [ "x$check_cache_file_is_tmp" = "xyes" ]; then
        rm "$check_cache_file"
    fi
    rm "$check_out_file"
    exit $ret
}

yelp_comments_page () {
    ext=`echo "$1" | sed -e 's/.*\.//'`
    bname=`basename "$1" ".$ext"`
    xsltproc --stringparam basename "$bname" "$xsl_comments" "$1"
}

yelp_comments () {
    if [ "$#" = "0" -o "x$1" = "x--help" ]; then
        yelp_usage_comments
        exit 1
    fi
    for xml in "$@"; do
        if [ -d "$xml" ]; then
            for page in "$xml"/*.page; do
                yelp_comments_page "$page"
            done
        else
            yelp_comments_page "$xml"
        fi
    done
}

yelp_status () {
    if [ "$#" = "0" -o "x$1" = "x--help" ]; then
        yelp_usage_status
        exit 1
    fi
    while [ "$#" != "0" ]; do
        case "$1" in
            "--version")
                shift
                check_version="$1"
                shift
                ;;
            "--docversion")
                shift
                check_docversion="$1"
                shift
                ;;
            "--pkgversion")
                shift
                check_pkgversion="$1"
                shift
                ;;
            "--older")
                shift
                check_older="$1"
                shift
                ;;
            "--newer")
                shift
                check_newer="$1"
                shift
                ;;
            "--only")
                shift
                check_only="$1"
                shift
                ;;
            "--except")
                shift
                check_except="$1"
                shift
                ;;
            "--totals")
                check_totals="1"
                shift
                ;;
            *)
                break
                ;;
        esac
    done
    if [ "$#" = "0" -o "x$1" = "x--help" ]; then
        yelp_usage_status
        exit 1
    fi
    check_cache_file=`mktemp`
    yelp-build cache -o "$check_cache_file" "$@"
    xsltproc \
        --stringparam version "$check_version" \
        --stringparam docversion "$check_docversion" \
        --stringparam pkgversion "$check_pkgversion" \
        --stringparam newer "$check_newer" \
        --stringparam older "$check_older" \
        --stringparam only "$check_only" \
        --stringparam except "$check_except" \
        --stringparam totals "$check_totals" \
        "$xsl_mal_status" "$check_cache_file"
    rm "$check_cache_file"
    return 0
}

yelp_validate_page () {
    # Using temp files because pipes create subshells, making it really
    # hard to return the right exit status in a portable way.
    if [ "x$check_rng_dir" = "x" ]; then
        check_rng_dir=`mktemp -d`
    fi
    check_out_file=`mktemp`
    check_rng_file=`(
            echo '<xsl:stylesheet'
            echo ' xmlns:xsl="http://www.w3.org/1999/XSL/Transform"'
            echo ' version="1.0">'
            echo '<xsl:output method="text"/>'
            echo '<xsl:template match="/*">'
            echo '<xsl:choose>'
            echo '<xsl:when test="string(@version) != '"''"'">'
            echo '<xsl:value-of select="@version"/>'
            echo '</xsl:when>'
            echo '<xsl:otherwise>'
            echo '<xsl:text>1.0</xsl:text>'
            echo '</xsl:otherwise>'
            echo '</xsl:choose>'
            echo '</xsl:template>'
            echo '</xsl:stylesheet>'
            ) | xsltproc - "$1"`
    check_rng_file=`urlencode "$check_rng_file" /`.rng
    if [ ! -f "$check_rng_dir/$check_rng_file" ]; then
        # If we've already made an RNG file for this version string, don't
        # do it again. We've urlencoded the file name + slashes, because
        # version strings often contain slashes. But xsltproc treats the
        # -o option as a URL and urldecodes, so doubly urlencode, because
        # we want the urlencoded string to be the on-disk name.
        xsltproc -o "$check_rng_dir/"`urlencode "$check_rng_file"` --param rng.strict "$check_strict" "$xsl_mal_rng" "$1"
    fi
    xmllint --noout --xinclude --noent --relaxng "$check_rng_dir/$check_rng_file" "$1" > "$check_out_file" 2>&1
    ret="$?"
    cat "$check_out_file" | grep -v 'validates$'
    rm "$check_out_file"
    return $ret;
}

yelp_validate () {
    if [ "$#" = "0" -o "x$1" = "x--help" ]; then
        yelp_usage_validate
        exit 1
    fi
    gret="0"
    check_strict="false()"
    while [ "$#" != "0" ]; do
        case "$1" in
            "--strict")
                check_strict="true()"
                shift
                ;;
            *)
                break
                ;;
        esac
    done
    for xml in "$@"; do
        ext=`echo "$1" | sed -e 's/.*\.//'`
        if [ -d "$xml" ]; then
            for page in "$xml"/*.page; do
                yelp_validate_page "$page" || gret="$?"
            done
        elif [ "x$ext" = "xpage" ]; then
            yelp_validate_page "$xml" || gret="$?"
        else
            xmllint --noout --xinclude --noent --postvalid "$xml" || gret="$?"
        fi
    done
    if [ "x$check_rng_dir" != "x" ]; then
        rm "$check_rng_dir"/*.rng
        rmdir "$check_rng_dir"
    fi
    exit "$gret"
}

cmd="$1"
shift
case "x$cmd" in
    "xcomments")
        yelp_comments "$@"
        ;;
    "xhrefs")
        yelp_hrefs "$@"
        ;;
    "xlinks")
        yelp_links "$@"
        ;;
    "xorphans")
        yelp_orphans "$@"
        ;;
    "xstatus")
        yelp_status "$@"
        ;;
    "xvalidate")
        yelp_validate "$@"
        ;;
    *)
        yelp_usage
        ;;
esac
