#!/bin/sh
: ; exec klone $0 "$@"
; The above line finds the klone executable in the $PATH

(stack-dump-on-error t)
(setq USAGE "wrd2txt [options] files...
-? or -help for help
treat foo.WRD files from the Psion Series 3 text processor and creates foo.txt
ASCII files formatted in a user-readable form geared towards pure ASCII readers
(emacs, mails...)
Formatting functions can be added as Klone code to this script")

(setq files (getopts USAGE
    ("-p" profile profiles 
      "profile holding the conversion functions for Psion WRD paragraph 
tags as functions print-par-XX, or by redefining everything with 
print-pars (see source between CUSTOM_START and CUSTOM_END)"
      :multiple t
    )
    ("-a" abbrev predefined-abbrevs 
      "defines an abbrev, for instance to define an abbrev /fb for
foobar:    -a /fb=foobar"
      :multiple t
    )
    ("-i" abbrev ignored-abbrevs 
      "ignores an abbrev, (leaves it verbatim in text)
to ignore /fb, say -i /fb or -i fb"
      :multiple t
    )
    ("-n" () nonum "do not number paragraphs")
    ("-par" P1P2 paragraph-refinitions1 
      "tells that a paragraph P1 must be printed as if of type P2.
Exemple to treat ZA pars as BT: -par ZABT"
      :multiple t
    )
    ("-q" () quiet "quiet, no messages")
    ("-v" () verbose "verbose operation")
    ("-HBcount" N print-par-HB:count-init-s "start of HB count, default 0")
)))

;; TODO:
;; -kout prints output as a klone representation of file
;;      #(tag text emphases)
;;      emphases: #(tag text begin end)
;;      (this representation can be re-read as it begins by ;;WRD KLONE)

(setq wrd:records-start 40)		;start of type 1 record
(setq wrd:pardefs 0x93)			;start of paragraph styles
(setq wrd:parlength 84)			;length of a paragraph style
(setq wrd:emplength 32)			;length of an emphase style
(setq wrd:par-id 06)			;id of paragraph style def
(setq wrd:emp-id 07)			;id of emphase style def
(setq wrd:text-id 08)			;id of start of text
(setq wrd:markers-id 09)		;id of paragraphs makers section
(setq wrd:id-length 4)			;length of an ID
(setq wrd:markerlength 6)		;length of a marker

(setq print-margin:width 78)
(setq print-margin:width-orig 78)
(setq file-inits (list))

(setq accents-table	[		;accents psion->iso (plist)
  #\ #\
  #\ #\
  #\ #\
  #\ #\
  #\ #\
  #\ #\
  #\ #\
  #\ #\
  #\ #\
  #\ #\
  #\ #\
  #\ #\
  ])

;; option checks
(if (not files) (fatal-error 1 "%0\n" USAGE))
(if print-par-HB:count-init-s
  (setq print-par-HB:count-init (Int print-par-HB:count-init-s))
  (setq print-par-HB:count-init 0)
)

(setq paragraph-refinitions (list))
(dolist (pd paragraph-refinitions1)
  (if (not (match "^[A-Za-z][A-Za-z][A-Za-z][A-Za-z]$" pd))
    (fatal-error 1 "paragraph-refinition must be a 4 letter string, not %r0\n"
      pd
  ))
  (lappend paragraph-refinitions (toupper (subseq pd 0 2)))
  (lappend paragraph-refinitions (toupper (subseq pd 2 4)))
)

;; init char. translation table: translation-string
(dotimes (i (length accents-table))	;correct an old bug with signed chars
  (if (< #[accents-table i] 0) 
    (setq #[accents-table i] (+ 256 #[accents-table i])))
)
(setq translation-string (make-string 256))
(dotimes (i 256) (put translation-string i i))
(dohash (psion-char iso-char accents-table)
  (put translation-string psion-char iso-char)
)

;; main loop
(defun main ()
  (do-load-profile-file)
  (dolist (file files) 
    (process-file file)
))

(defun do-load-profile-file ()
  (dolist (profile-file profiles)
    (load profile-file)		       
))

;; data structure built by parsing file

(defstruct Par				;a text item
  tag					;paragraph style (2-letter string)
  text					;the characters themselves
  begin					;begin offset
  end					;end offset
  emphases				;lists of emphases
)
(defstruct Emp				;emphase in a text item
  tag					;emphase style (abbrev)
  text					;the chars themselves
  begin					;begin offset
  end					;end offset
)

(defstruct Marker
  par					;paragraph (abbrev)
  emp					;emphase (abbrev)
  length				;length
  begin					;start offset in text
  end					;end offset in text
)

(defun process-file (file &aux
    fd-in re-file file-out offset buffer name textlen marklen abbrev
    text				;the raw text buffer (of len textlen)
    (paragraph-styles (vector))		;list of defined paragraph styles
    (emphase-styles (vector))		;list of defined emphases styles
    (pars (vector))			;list of paragraphs
    (markers (vector))			;list of markers
  )
  (catch 'FileDone
    ;; set up IO
    (if (and file (not quiet)) (print-format "wrd2txt: file %0\n" file))
    (dolist (f file-inits)
      (apply f ())
    )
    (setq fd-in (open file))
    (if (/= "PSIONWPDATAFILE" (read-chars 15 fd-in))
      (throw 'FileDone (print-format *standard-error*
	  "wrd2txt: %0 is not a psion .WRD file, ignored\n" file
    )))
    (setq re-file (regcomp "^(.*)[.][Ww][rR][dD]$"))
    (if (regexec re-file file)
      (setq file-out (regsub re-file "\\1.txt"))
      (setq file-out (+ file ".txt"))
    )
    (verbose? " - word file format version: %0" (read-word fd-in 16))

    (if (/= 0 (read-word fd-in 18))
      (throw 'FileDone (print-format *standard-error*
	  "File is encrypted, cannot process!\n"
    )))
    ;; read headers 1 to 5
    (file-position fd-in wrd:records-start)
    (check-record fd-in 1 "file info")
    (check-record fd-in 2 "printer set-up")
    (check-record fd-in 3 "printer driver info")
    (check-record fd-in 4 "header text")
    (check-record fd-in 5 "footer text")
    (setq offset (file-position fd-in))

    ;; read paragraph definitions type 6
    ;;(setq offset wrd:pardefs)
    (while (progn
	(file-position fd-in offset)
	(setq buffer (read-chars wrd:parlength fd-in))
	(= wrd:par-id (get buffer 0))
      )
      (incf offset wrd:parlength)
      (setq abbrev (subseq buffer wrd:id-length (+ 2 wrd:id-length)))
      (lappend paragraph-styles abbrev)
      (setq name (subseq buffer (+ 2 wrd:id-length) (+ 17 wrd:id-length)))
      (setq name (match "^(.*[^ ])[ ]*$" name 1))
      (lappend paragraph-styles name)
      (verbose? " - at %0, style definition %1 %2" offset abbrev name)
;;;      (PV "par def:" abbrev name)
    )
    
    ;; read emphase definitions type 7
    (while (progn
	(file-position fd-in offset)
	(setq buffer (read-chars wrd:emplength fd-in))
	(= wrd:emp-id (get buffer 0))
      )
      (incf offset wrd:emplength)
      (setq abbrev (subseq buffer wrd:id-length (+ 2 wrd:id-length)))
      (lappend paragraph-styles abbrev)
      (setq name (subseq buffer (+ 2 wrd:id-length) (+ 17 wrd:id-length)))
      (setq name (match "^(.*[^ ])[ ]*$" name 1))
      (lappend emphase-styles name)
      (verbose? " - at %0, emphase definition %1 %2" offset abbrev name)
;;;      (PV "emp def:" abbrev name)
    )
    ;; start of text. verify validity

    (if (/= wrd:text-id (get buffer 0))
      (throw 'FileDone (print-format *standard-error*
	  "%2%0 instead of expected %1, at position %3\n" 
	  (get buffer 0) wrd:text-id 
	  "wrd2txt: at start of text: parsing error: read ID # "
	  offset
    )))
    
    ;; gobble text type 8
    (setq textlen (id-length buffer))
    (file-position fd-in (+ offset wrd:id-length))
    (setq text (read-chars textlen fd-in))
    (put text -1 0)			;insert terminating null
    ;; translate chars (iso accents)
    (dotimes (i textlen)
      (put text i #[translation-string #[text i]])
    )
    
    ;; read markers type 9
    (setq offset (+ offset wrd:id-length textlen))
    (setq buffer (read-chars wrd:id-length fd-in))
    ;; verify validity
    (if (/= wrd:markers-id (get buffer 0))
      (throw 'FileDone (print-format *standard-error*
	  "%2%0 instead of expected %1\n" 
	  (get buffer 0) wrd:markers-id 
	  "wrd2txt: at start of markers: parsing error: read ID # "
    )))
    (file-position fd-in (+ offset wrd:id-length))
    (setq marklen (id-length buffer))
    (with (marker () len 0 text-offset 0)
      (dotimes (i (/ marklen wrd:markerlength)) ;read markers
	(setq marker (read-chars wrd:markerlength fd-in))
	(setq len (+ #[marker 0] (* 256 #[marker 1])))
	(lappend markers (make-Marker :length len :begin text-offset 
	    :end (+ text-offset len) :par (subseq marker 2 4)
	    :emp (subseq marker 4 6)
	))
	(incf text-offset len)
;;;	(PV "marker: " (get markers -1))
    ))
    
    (init-abbrevs predefined-abbrevs ignored-abbrevs)

    ;; last phase: create all text items from markers and text
    (with (text-offset 0 marker-index 0 par () end-offset 0 mark ())
      (while (<= text-offset textlen)
	(setq end-offset text-offset)
	(while (/= 0 (get text end-offset))
	  (incf end-offset)
	)
	(setq par (make-Par :text (subseq text text-offset end-offset)
	    :begin text-offset :end end-offset :emphases (vector)
	))
	(if (marker-of-par (setq mark (get markers marker-index)) par) (progn
	    (Par-tag par (Marker-par mark))
	    (while (and (< marker-index (length markers))
		(marker-of-par (setq mark (get markers marker-index)) par)
	      )
	      (if (/= "NN" (Marker-emp mark))
		(lappend (Par-emphases par)
		  (make-Emp :tag (Marker-emp mark)
		    :text (subseq text (Marker-begin  mark) (Marker-end mark))
		    :begin (- (Par-begin par) (Marker-begin  mark))
		    :end (- (Par-end par) (Marker-end mark))
	      )))
	      (incf marker-index)
	  ))
	  (throw 'FileDone (print-format *standard-error*
	      "%3%r0 not marked!\n    marker = %1\n    Par = %r2)\n" 
	      (Par-text par) mark par "wrd2txt: CRYPTED FILE???\n    text "
	  ))
	)
	;; treat some special pars
	(if (regexec re-abbrev-def (Par-text par))
	  (define-abbrev (Par-text par))
	  t
	  (lappend pars par)		; else store it
	)
	(expand-abbrev (Par-text par))
;;;	(PV par)
	(setq text-offset (+ end-offset 1)) ;skip null separator
    ))
    (write-text-file file-out pars paragraph-styles emphase-styles)
    (print-undefined-abbrevs file)
))

(defun read-word (fd-in &optional offset)
  (if offset (file-position fd-in offset))
  (+ (read-char fd-in) (* 256 (read-char fd-in)))
)

(defun check-record (fd-in expected-id description &aux id len)
  (setq id (read-word fd-in))
  (if (/= expected-id id)
    (throw 'FileDone (print-format *standard-error*
	"error: expected to find %0 section, id %1, found ID %2\n"
	description expected-id id
  )))
  (setq len (read-word fd-in))
  (verbose? " - record #%0 %1, %2 bytes at %3"
    id description len (- (file-position fd-in) 4)
  )
  (read-chars len fd-in)
)

(setq re-par-not-empty (regcomp "[^ \t \n]"))
(defun par-empty? (par)
  (not (regexec re-par-not-empty (Par-text par)))
)

;; main routine: sorts pars and groups them

(defun write-text-file (file-out pars paragraph-styles emphase-styles &aux
    (fd-out (open file-out :direction :output :if-exists :supersede))
    par curtag (pargroups (list)) pargroup
  )
  (dolist (par pars)
    (if (par-empty? par)
      (if curtag (lappend pargroup ()) ;an empty par is a nil
	)				;ignore empty pars at start
      (if				;non empty par
	(= (Par-tag par) curtag)	;same paragraph type?
	(lappend pargroup par)		;yes => add it
	(progn				;no => close this pargroup and start new
	  (if curtag (lappend pargroups pargroup))
	  (setq pargroup (list par))
	  (setq curtag (Par-tag par))
  ))))
					;close last pargroup
  (if pargroup (lappend pargroups pargroup))
					;now call user code on this list
  (apply print-pars (list fd-out pargroups))
)

(defun id-length (buffer)
  (+ (* #[buffer 1] 65536) (* #[buffer 3] 256) #[buffer 2])
)

(defun buf2wordlist (buffer &aux
    (l (vector))
    (re (regcomp "^[ \t\n]*([^ \t\n]+)([ \t\n]*)"))
    (offset 0)
  )
  (while (regexec re buffer offset)
    (lappend l (regsub re 1))
    (setq offset (get (get re 2) 1))
  )
  l
)
  
(defun marker-of-par (marker par)
  (and (>= (Marker-begin marker) (Par-begin par))
    (<= (Marker-end marker) (+ (Par-end par) 1))
))

;;============================================================================
;; abbrevs

(setq re-abbrev-def (regcomp "^([/][-a-zA-Z_+][-0-9a-zA-Z_+]*)[=](.*)$"))
(setq re-abbrev (regcomp "([/][-a-zA-Z_+][-0-9a-zA-Z_+]*)"))
(setq re-abbrev-ignore (regcomp "^[/]?(.*)$"))

(defun init-abbrevs (predefined-abbrevs ignored-abbrevs)
  (setq abbrevs (Hashtable ()))
  (setq unknown-abbrevs (list))
  (dolist (abbrev-def predefined-abbrevs)
    (if (regexec re-abbrev-def abbrev-def)
      (put abbrevs (regsub re-abbrev-def 1) (regsub re-abbrev-def 2))
    )
  )
  (dolist (abbrev-def ignored-abbrevs)
    (if (regexec re-abbrev-ignore abbrev-def)
      (put abbrevs (regsub re-abbrev-ignore "/\\1")
	(regsub re-abbrev-ignore "/\\1")
))))

(defun define-abbrev (text)
  (put abbrevs (regsub re-abbrev-def 1) (regsub re-abbrev-def 2))
)

(defun expand-abbrev (text &aux
    (offset 0)
    abbrev
    value
    start
  )
  (while (regexec re-abbrev text offset)
    (setq start (get (get re-abbrev 1) 0))
    (if (setq value (getn abbrevs (setq abbrev (regsub re-abbrev 1))))
      (progn
	(dotimes (i (length abbrev)) (delete text start))
	(insert text start value)
	(setq offset start)
      )
      (progn
	(put unknown-abbrevs abbrev (+ 1 (get unknown-abbrevs abbrev 0)))
	(setq offset (get (get re-abbrev 1) 1))
    ))
  )
)

(defun print-undefined-abbrevs (filename &aux (l (list)))
  (if unknown-abbrevs (progn
      (dohash (a n unknown-abbrevs) (lappend l a))
      (sort l compare-nocase)
      (print-format *standard-error* "*** %0 UNDEFINED ABBREV%1! in %2:\n"
	(length l) (if (> (length l) 1) "S" "") filename
      )
      (dolist (a l) 
	(print-format *standard-error* "    %0    \t(%1 occurences)\n"
	  a (get unknown-abbrevs a 0)
      ))
)))

(defun indent-by (indentation text &aux
    (in (Stream text))
    (out (Stream ""))
    line
    (i (make-string indentation))
  )
  (while (setq line (read-line in ()))
    (print-format out "%n0%n1\n" i line)
  )
  (String out)
)

;; CUSTOM_START
;;=============================================================================
;;                    GENERATION OF TEXT (contents of ~/.wrd2txt)
;;=============================================================================
;; Once the WRD file is parsed, the pargroups and the stream to write to is 
;; passed to the user-redefinable print-pars, which you should normally NOT
;; redefine, but redefine the individual paragraph printing functions 
;; print-par-XX (where XX is the type of par: BT, HA, HB, etc...)
;; - pargroups is a list of pargroup-s,
;; - a pargroup is a list of pars, first one is nonempty
;; - a par is either () (empty paragraph), or a Par structure which has 2 
;;   interesting fields Par-text (raw text having no newlines) and 
;;   Par-tag (string of 2 uppercase letters giving the type of paragraph)
;; NOTE: the first par of a pargroup cannot be ()

(setq RM 78)				;right margin

(defun print-pars (fd-out pargroups &aux
    partag previoustag tag-printer
  )
  (dolist (pargroup pargroups)
    (setq partag (Par-tag (getn pargroup 0)))
    (if (getn paragraph-refinitions partag)
      (setq partag (getn paragraph-refinitions partag))
    )
    (setq tag-printer (intern (+ "print-par-" partag)))
    (if (not (boundp tag-printer))
      (print-format *standard-error*
	"*** UNKNOWN PAR TAG: \"%0\", define a %2 function for text:\n%1"
	partag (indent-by 4 (print-par-?? fd-out pargroup 0)) tag-printer
      )
      (with (newlines 0 non-emptys (length pargroup))
	(while (not (get pargroup (- non-emptys 1))) 
	  (incf non-emptys -1) (incf newlines)
	)
	(setq pargroup (subseq pargroup 0 non-emptys))
	(if verbose (PV "Applying: " tag-printer print-par-HB:count print-par-HC:count))
	(apply tag-printer (list fd-out pargroup newlines))
      )
    )
    (setq previoustag partag)
))

;; default printer (must return the text)

(defun print-par-?? (fd pargroup newlines &aux
    (stream (Stream ""))
    (tag (Par-tag (getn pargroup 0)))
  )
  (dolist (par pargroup)
    (if par (progn
	(print-format stream "<%0> %1\n" tag (Par-text par))
	(print-format fd "<%0> %1\n" tag (Par-text par))
  )))
  (String stream)
)

;; paragraph-specific printers

(defun print-par-HA (fd pargroup newlines &aux
    (len 0) lm l
  )
  (dolist (par pargroup) (if par (setq len (max len (length (Par-text par))))))
  (setq lm (/ (+ (- RM len) 4) 2))
  (print-format fd "%0%1\n" (make-string lm) (make-string (+ 4 len) #\#))
  (dolist (par pargroup)
    (if par
      (print-format fd "%0# %3%1%2 #\n" (make-string lm) (Par-text par)
	(make-string (/ (setq l (- len (length (Par-text par)))) 2))
	(make-string (- l (/ l 2)))
      )
      (print-format fd "%0# %1 #\n" (make-string lm) (make-string len))
  ))
  (print-format fd "%0%1\n\n" (make-string lm) (make-string (+ 4 len) #\#))
)

(lappend file-inits
  (lambda () (setq print-par-HB:count print-par-HB:count-init)))

(defun print-par-HB (fd pargroup newlines &aux
    (re-bullet (regcomp "^[ \t]*([*]|[[][0-9+][]])[ \t]*(.*)$"))
  )
  (dolist (par pargroup) (if par (progn
	(if (regexec re-bullet (Par-text par))
	  (Par-text par (regsub re-bullet 2))
	)
	(if nonum (progn
	    (print-format fd "%0\n%1\n\n"
	      (Par-text par)
	      (make-string (length (Par-text par)) #\=)
	  ))
	  (progn
	    (defvar print-par-HB:count 0)
	    (setq print-par-HC:count 0)
	    (incf print-par-HB:count)
	    (print-format fd "[%0] %1\n%2\n\n" print-par-HB:count
	      (Par-text par)
	      (make-string (+ 3 (length (String print-par-HB:count))
		  (length (Par-text par))
		) #\=
	))))
  )))
)

(lappend file-inits (lambda () (setq print-par-HC:count 0)))

(defun print-par-HC (fd pargroup newlines &aux
    (re-bullet (regcomp "^[ \t]*[*][ \t]*(.*)$"))
  )
  (dolist (par pargroup) (if par (progn
	(if (regexec re-bullet (Par-text par))
	  (Par-text par (regsub re-bullet 1))
	)
	(if nonum (progn
	    (print-format fd "%0\n%1\n\n" (Par-text par)
	      (make-string (length (Par-text par)) #\-)
	  ))
	  (progn
	    (defvar print-par-HB:count 0)
	    (defvar print-par-HC:count 0)
	    (incf print-par-HC:count)
	    (print-format fd "(%3.%0) %1\n%2\n\n" 
	      print-par-HC:count
	      (Par-text par)
	      (make-string (+ 4 (length (String print-par-HC:count))
		  (length (String print-par-HB:count))
		  (length (Par-text par))
		) #\- )
	      print-par-HB:count
	)))
  )))
)

(defun print-par-BT (fd pargroup newlines &aux
    (re-bullet (regcomp "^[ \t]*([*-]|[[<(][0-9+][]>).:-])[ \t]+(.*)$"))
    margin
  )
  (dolist (par pargroup) (if par (progn
	(with (*standard-output* fd)
	  (if (regexec re-bullet (Par-text par)) 
	    (setq margin (make-string (+ 1 (length (regsub re-bullet 1)))))
	    (setq margin "")
	  )
	  (print-margin-words (Par-text par) (list "" margin "\n" "" " "))
      ))
;;      (write-char #\newline fd)
  ))
)

(defun print-par-TT (fd pargroup newlines &aux
  )
  (dolist (par pargroup) 
    (if par 
      (write-line (Par-text par) fd)
      (write-char #\newline fd)
  ))
  (write-char #\newline fd)
)

(defun print-par-BL (fd pargroup newlines &aux
    (re-bullet (regcomp "^[ \t]*([*-])[ \t]+(.*)$"))
    (re-numeroted (regcomp "^[ \t]*([[(]?[0-9]+[]).-])[ \t]+(.*)$"))
    (num 0)
  )
  (dolist (par pargroup) (if par (with (*standard-output* fd
	  prefix "  * "
	  text (Par-text par)
	)
	(if (regexec re-bullet text)
	  (setq text (regsub re-bullet 2))
	  (regexec re-numeroted text) (progn
	    (setq text (regsub re-bullet 2))
	    (setq prefix (+ "  [" (String (incf num)) "] "))
	  )
	)
	(print-margin-words text (list prefix (make-string (length prefix))
	    "\n" "" " "
	))
      )
      (write-char #\newline fd)
  ))
  (write-char #\newline fd)
)

(defun print-par-BK (fd pargroup newlines &aux
    (re-bullet (regcomp "^[ \t]*([*-])[ \t]+(.*)$"))
    (re-numeroted (regcomp "^[ \t]*([[(]?[0-9]+[]).-])[ \t]+(.*)$"))
    (num 0)
  )
  (dolist (par pargroup) (if par (with (*standard-output* fd
	  prefix "      * "
	  text (Par-text par)
	)
	(if (regexec re-bullet text)
	  (setq text (regsub re-bullet 2))
	  (regexec re-numeroted text) (progn
	    (setq text (regsub re-bullet 2))
	    (setq prefix (+ "      <" (String (incf num)) "> "))
	  )
	)
	(print-margin-words text (list prefix (make-string (length prefix))
	    "\n" "" " "
	))
      )
      (write-char #\newline fd)
  ))
  (write-char #\newline fd)
)

;; CUSTOM_END
;;============================================================================

(main)

;; here is info on the format:
; PSIONICS FILE - WORD.FMT
; ========================
; Format of Word files
; Last modified 1994-03-28
; ========================
; 
; A word file begins with a header of the following form:
;   Offset  0 (cstr): "PSIONWPDATAFILE"
;   Offset 16 (word): format version number
;   Offset 18 (word): unknown, 0 if not encrypted, 1 if encrypted
;   Offset 20 to  28: unknown, presumably encrypted magic value
;   Offset 29 to  35: copy of offset 20 to 26
;   Offset 36 (word): $EAEA if not encrypted, zero if encrypted
;   Offset 38 (word): unused
; 
; The format version number is 1 for non-passworded files and 256 for passworded
; files. @Offset 18 is probably an encryption algorithm version number. Apart
; from the header, encryption affects only the data of record type 8 (see
; below).@
; 
; The rest of the file consists of records. All records have the form:
;   Offset  0 (word): record type
;   Offset  2 (word): size of data portion (L)
;   Offset  4 to L+3: data portion
; 
; Word files have record types 1 to 9; the word processor application creates
; them in numerical order of type. Exactly one record of each type is used,
; except that there may be more than one record of types 6 and 7.
; 
; All distances and font sizes are in units of 0.05 points (i.e. a value of
; 1440 represents one inch). All font names are represented by standard code
; numbers:
;     -1 = Inherited (where permitted)
;      0 = Courier              17 = Emperor              40 = Greek
;      1 = Pica                 18 = Madeleine            41 = Kana
;      2 = Elite                19 = Zapf Humanist        42 = Hebrew
;      3 = Prestige             20 = Classic              44 = Russian
;      4 = Letter Gothic        24 = Times Roman          48 = Narrator
;      5 = Gothic               25 = Century              49 = Emphasis
;      6 = Cubic                26 = Palatino             50 = Zapf Chancery
;      7 = Lineprinter          27 = Souvenir             52 = Old English
;      8 = Helvetica            28 = Garamond             55 = Cooper Black
;      9 = Avant Garde          29 = Caledonia            56 = Symbol
;     10 = Spartan              30 = Bodoni               57 = Line Draw
;     11 = Metro                31 = University           58 = Math 7
;     12 = Presentation         32 = Script               59 = Math 8
;     13 = APL                  33 = Script PS            60 = Dingbats
;     14 = OCR A                36 = Commercial Script    61 = EAN
;     15 = OCR B                37 = Park Avenue          62 = PC Line
;     16 = Standard Roman       38 = Coronet
; 
; Record type 1 holds information about the file. It is always 10 bytes:
;   Offset  0 (word): cursor position within text record (type 8)
;   Offset  2 (byte): each set bit indicates a character type should be shown
;                     as symbols:
;     Bit 0: tabs
;     Bit 1: spaces
;     Bit 2: carriage returns
;     Bit 3: soft hyphens
;     Bit 4: forced line breaks
;   Offset  3 (byte): (Series 3a only)
;     Bits 0 to 1: status window: 0=none, 1=narrow, 2=wide
;     Bits 4 to 5: zoom state: 0=smallest, ... 3=largest
;   Offset  4 (byte): 0=style bar off, 1=style bar on
;   Offset  5 (byte): 0=file type is paragraph, 1=file type is line
;   Offset  6 (byte): outlining level
;   Offset  7 (byte): unused
;   Offset  8 (word): unused
; 
; Record type 2 holds information about printer set-up. It is
; always 58 bytes:
;   Offset  0 (word): page width
;   Offset  2 (word): page height
;     (Note: the above fields assume that the paper orientation is "portrait")
;   Offset  4 (word): left margin
;   Offset  6 (word): top margin
;   Offset  8 (word): width of printing area
;   Offset 10 (word): height of printing area
;     (Note: these four fields have only been checked for portrait)
;   Offset 12 (word): header offset (bottom of header to top of text)
;   Offset 14 (word): footer offset (bottom of footer to bottom of text)
;   Offset 16 (word): paper orientation: 0=portrait, 1=landscape
;   Offset 18 (word): unknown
;   Offset 20 (word): first page to print (1=first page)
;   Offset 22 (word): last page to print ($FFFF=end of file)
;   Offset 24 (word): header font code number
;   Offset 26 (byte): header style
;     Bit 0: underline
;     Bit 1: bold
;     Bit 2: italic
;     Bit 3: superscript
;     Bit 4: subscript
;   Offset 27 (byte): unused
;   Offset 28 (word): header font size
;   Offset 30 (byte): header alignment:
;     0 = left
;     1 = right
;     2 = centered
;     3 = justified
;     4 = two column
;     5 = three column
;   Offset 31 (byte): header on first page: 0=no, 1=yes
;   Offset 32 to  39: as 24 to 31, but apply to footer, not header
;   Offset 40 (word): page number of first page minus 1
;   Offset 42 (word): number of pages
;   Offset 44 (word): page number style: 0="1,2,3", 1="I,II,III", 2="i,ii,iii"
;   Offset 46 (word): base font code number
;   Offset 48 (byte): base style (as offset 26)
;   Offset 49 (byte): unused
;   Offset 50 (word): base font size
;   Offset 52 (byte): paper size code:
;     0 = A4        (11906 x 16838)
;     1 = Custom
;     2 = Executive (10440 x 15120)
;     3 = Legal     (12240 x 20160)
;     4 = Letter    (12240 x 15840)
;     5 = Monarch   ( 5580 x 10800)
;     6 = DL        ( 6236 x 12472)
;   Offset 53 (byte): widows/orphans allowed: 0=no, 1=yes
;   Offset 54 (long): unused
; The base font code, style, and font size are unused by Word (and should be set
; to code 0, style 0, size 240). Other applications using this record layout may
; use them and provide means to set them.
; 
; Record type 3 holds information about the printer driver:
;   Offset  0 (byte): printer driver model number
;   Offset  1 (cstr): printer driver library
; A printer driver library can support several similar printers; the model number
; specifies which is selected.
; 
; Record types 4 and 5 hold cstrs giving the header and footer text respectively.
; 
; Record types 6 and 7 have a similar layout. Record type 6 describes a style
; and uses all 80 bytes. Record type 7 describes an emphasis and uses only the
; first 28 bytes.
;   Offset  0 to   1: short code, as uppercase letters
;   Offset  2 (cstr): full name
;   Offset 18 (byte):
;     Bit 0: 0=style, 1=emphasis
;     Bit 1: set if style or emphasis undeletable
;     Bit 2: set for default style or emphasis
;   Offset 19 (byte): unused
;   Offset 20 (word): font code number (can be inherited)
;   Offset 22 (byte): style (bits inherited must be clear in this byte)
;     Bit 0: underline
;     Bit 1: bold
;     Bit 2: italic
;     Bit 3: superscript (available in emphasis only)
;     Bit 4: subscript   (available in emphasis only)
;   Offset 23 (byte): unused
;   Offset 24 (word): font size
;   Offset 26 (byte):
;     Bit 0: inherit underline setting
;     Bit 1: inherit bold setting
;     Bit 2: inherit italic setting
;     Bit 3: inherit superscript setting (available in emphasis only)
;     Bit 4: inherit subscript setting   (available in emphasis only)
;   Offset 27 (byte): unused
;   Offset 28 (word): left indent
;   Offset 30 (word): right indent
;   Offset 32 (word): first line indent
;   Offset 34 (word): alignment: 0=left, 1=right, 2=centred, 3=justified
;   Offset 36 (word): line spacing
;   Offset 38 (word): space above paragraph
;   Offset 40 (word): space below paragraph
;   Offset 42 (byte): spacing controls:
;     Bit 0: set to keep with next
;     Bit 1: set to keep together
;     Bit 2: set to start new page
;   Offset 43 (byte): unused
;   Offset 44 (word): Outline level (1 to 9)
;   Offset 46 (word): number of tab stops set
;   Offset 48 (word): position of first tab stop
;   Offset 50 (word): type of first tab stop: 0=left, 1=right, 2=centred
;   Offset 52 to  55: as offsets 48 to 51 for second tab stop
;   Offset 56 to  79: as offsets 48 to 51 for third to eighth tab stops
; 
; Record type 8 holds the actual text. The following bytes have special
; meanings:
;    0 = paragraph separator
;    7 = unbreakable hyphen
;   14 = soft hyphen (displayed only if used to break line)
;   15 = unbreakable space
; 
; Record type 9 consists of a sequence of blocks giving the style and emphasis
; for the text; each block covers some number of consecutive bytes, and the
; blocks between them cover the entire text. No block crosses a paragraph
; boundary, but the last block of the paragraph includes the zero byte separating
; it from the next paragraph. Each block is 6 bytes:
;   Offset  0 (word): number of bytes covered
;   Offset  2 to   3: shortcode of style applied
;   Offset  4 to   5: shortcode of emphasis applied
; The last block should cover an extra byte (an imaginary extra zero separator),
; so that the sum of the bytes covered is one more than the size of the type 8
; record.


;;; EMACS MODES
;;; Local Variables: ***
;;; mode:lisp ***
;;; End: ***

