#!/bin/sh
: ; exec klone $0 "$@"
; The above line allows not to embed the exact path of the klone executable

;;Skeleton of a typical klone script

(setq args (getopts "USAGE: %0 [options] xml-file
This prints stats on an XML document. It is not as much a useful program as an 
implementation of an XML Benchmark of the same name by Clark Cooper, see:
http://www.xml.com/xml/pub/Benchmark/parsertest.html
The result: around 0m4.294s wrt 0m2.908s for java-xp, quite good!
      "
    ("-v" () verbose "verbose operation")
    ("-debug" () enter-debugger-on-error "enter klone debugger on error")
    ("-stackdump" () stackdump-on-error "verbose stack dump on error")
))

(if enter-debugger-on-error (kdb t))
(if stackdump-on-error (stack-dump-on-error t))

;; The structures used to keep the results
(setq taglist (list))			;list of all tags (to keep order)
(setq tagstats (Hashtable ()))		;table of tags / TagInfos

(defstruct TagInfo 
  :occurences				;number
  :parents 
  :children 
  :attributes				;keywords
  :chardata				;number of chars
  :empty?
)

(defun main (&aux tree info tag
    (re-space (regcomp "^[ \t\n]*$"))
    (re-trim (regcomp "^[ \t\n]*(.*[^ \t\n])[ \t\n]*$"))
  )
  ;;parse the tree in memory
  (setq tree (sxp:load (get args 0 "<no-arguments>")))

  ;; walk the tree, collects stats in TagInfo structures
  (sxp:donode (node tree)
    (setq tag (xnl:name node))
    (setq info (get tagstats tag
	'(with (new-node (vector '# 'TagInfo 0 (list) (list) (list) 0 t))
	  (put tagstats tag new-node)
	  (lappend taglist tag)
	  new-node
    )))
    (TagInfo-occurences info (+ (TagInfo-occurences info) 1))
    (if (getn sxp:donode-stack -1) 
      (inc-count :parents (xnl:name (getn sxp:donode-stack -1)))
    )
    (dohash (attribute value (xnl:attributes node))
      (inc-count :attributes attribute)
    )
    (if (xnl:children node) (progn
	(dolist (child (xnl:children node))
	  (if (xnl:chardata? child)
	    (if (regexec re-space child)
	      ()				;ignore whitespace
	      (TagInfo-chardata info 
		(+ (TagInfo-chardata info) 
		  (length child)
	      ))
	    )
	    (inc-count :children (xnl:name child))
	))
	(TagInfo-empty? info ())
    ))
  )
  ;; prints the results
  (dolist (tag taglist)
    (setq info (getn tagstats tag))
    (TagInfo-attributes info 	;remove leading : of attribute names
      (map List (lambda (e) (if (typep e Keyword) (subseq e 1) e))
	(TagInfo-attributes info)
    ))
    (PF "\n================\n%0: %1%2\n" tag (TagInfo-occurences info)
      (if (TagInfo-empty? info) "\nAlways empty"
	(/= 0 (TagInfo-chardata info))
	(PF String "\nHad %0 bytes of character data" (TagInfo-chardata info))
	""
    ))
    (dohash (label field (list
	  "Parents" TagInfo-parents
	  "Children" TagInfo-children
	  "Attributes" TagInfo-attributes
      ))
      (if (field info) (progn
	  (PF "\n   %0:\n" label)
	  (dohash (tag count (field info))
	    (PF "      %0%1%2\n" tag 
	      (make-string (- 27 (+ (length tag) (length (String count)))))
	      count
	  ))
	  (if (= field TagInfo-children)
	    (if (> (length (field info)) 2)
	      (with (total 0)
		(dohash (tag count (field info))
		  (incf total count)
		)
		(PF "                            =====\n      %0%1\n" 
		  (make-string (- 27 (length (String total)))) total
	  ))))
      ))
    )
  )
)

(defmacrod inc-count (field subtag)
  `(put (,(intern (+ "TagInfo-" (subseq field 1))) info)
    ,subtag
    (+ (get (,(intern (+ "TagInfo-" (subseq field 1))) info) ,subtag 0) 1)
))

(main)

;;; EMACS MODES
;;; Local Variables: ***
;;; mode:lisp ***
;;; End: ***

