#!/bin/sh
: ; exec klone $0 "$@"
; The above line allows not to embed the exact path of the klone executable

;;Skeleton of a typical klone script

(setq args (getopts "USAGE: %0 [options] [wwwboard-dir]
reads all files in a WWWboard directory (pwd by default) and outputs a text
file, if no other output option is given
"
    ("-web" dir webdir "creates a web site in dir. 
reads config file there in file .config")
    ("-url" url weburl "gives external URL for the web site")
    ("-mbox" file mboxfile "prints to file in mbox format")
    ("-anyboard" dir anyboard "prints to dir in anyboard format")
    ("-v" () verbose "verbose operation")
;; --- Hidden Options ---
    ("-debug" () enter-debugger-on-error "enter klone debugger on error"
    :hidden t)
    ("-stackdump" () stackdump-on-error "verbose stack dump on error"
    :hidden t)
))

(if enter-debugger-on-error (kdb t))
(if stackdump-on-error (stack-dump-on-error t))

;;=============================================================================
;;                    Data
;;=============================================================================
(defstruct Message
  id					;number (string)
  subject				;title
  email					;email of poster, can be ()
  from					;name of poster
  date					;date of post: numlist: Y M D H M S
  in-reply-to				;id
  followups				;list of ids
  body					;text of message
  link					;optional URL, can be ()
  linkname				;its descriptive text, can be ()
  image					;optional image URL, can be ()
)  

id2mes = (Hashtable ())

;;=============================================================================
;;                    Code
;;=============================================================================
(defun main (&aux 
    messages
  )
  messages = (read-w3b-dir (if args (0 args) "."))
  ;;(verify-html-body messages)
  (if 
    mboxfile (print-mboxfile messages mboxfile)
    anyboard (print-anyboard messages anyboard)
    webdir (fatal-error 1 "-web not yet implemented\n")
    t (print-text messages)
  )
)

(defun read-w3b-dir (dir &aux
    (re-filename (regcomp "^([0-9]+)[.]html$"))
    (messages (vector))
    mes
  )
  (dolist (file (directory dir))
    (if (re-filename file) (progn
	(verbose? "reading message file %0" file)
	(with (mes (read-w3b-file (+ dir "/" file) (re-filename 1)))
	  (put id2mes mes.id mes)
	  (lappend messages mes))
  )))
  messages
)

(dohash (v s [
      re-title "<title>([^<]*)</title>"
      re-from-truncated "^Posted by [^<]*$"
      re-from-truncated2 "^Posted by <[^>]*$"
      re-from-truncated3 "^Posted by <a[^>]*>[^<]*$"
      re-from "<b>Posted by: </b><a href=\"mailto:([^\"]*)\">([^<]*)</a> on ([^<]*)<p>"
      re-from2 "Posted by <a href=\"mailto:([^\"]*)\">([^<]*)</a> [(][0-9]+[.][0-9]+[.][0-9]+[.][0-9]+[)] on ([^<]*)<p>"
      re-from3 "<b>Posted by: </b>(.*) on ([A-Z][a-z][^<]*)<p>"
      re-from4 "Posted by (.*) [(][0-9]+[.][0-9]+[.][0-9]+[.][0-9]+[)] on ([A-Z][a-z][^<]*)<p>"
      re-reply "(<b>)?In Reply to: (</b>)?<a href=\"([0-9]+)[.]html\">"
      re-br "^<br>$"
      re-mess-end "^<br><hr size=7 width=75%><p>$"
      re-followups "^<a name="followups"><b>Follow Ups:</b></a><br>$"
      re-followup "<a href=\"([0-9]+)[.]html\">"
      re-end-followups "^<br><hr><p>$"
      re-link "^<ul><li><a href=\"([^\"]+)\">([^<]*)</a></ul>$"
      re-link2 "^<ul><li><a href=\"([^\"]+)$"
      re-link3 "^\">([^<]*)</a></ul>$"
      re-image "^<center><img src=\"([^\"]+)\"></center><p>$"
      re-date "([A-Z][a-z]+) +([0-9]+), +([0-9]+) at ([0-9][0-9]):([0-9][0-9]):([0-9][0-9])"
      re-tag-p "<p>"
      re-tag-br "<br>"
      re-empty-space-start "^([ \t\r]\n)+"
      re-empty-space-end "([ \t\r\n])+$"
    ends-with-p "<p>$"
  ])
  (set v (regcomp s))
)

(setq monthnames ["???" "January" "February" "March" "April" "May" "June"
    "July" "August" "September" "October" "November" "December"]
)

(defun read-w3b-file (filename id &aux
    (fd (open filename))
    line
    (mes (Message))
  )
  mes.id = id
  (skip-lines 2)
  (read-re re-title) mes.subject = (re-title 1)
  (skip-lines 8)
  line = (read-line fd)
  (if (re-from-truncated line) (nconc line (read-line fd)))
  (if (re-from-truncated2 line) (nconc line (read-line fd)))
  (if (re-from-truncated3 line) (nconc line (read-line fd)))

  (if (re-from line) (progn		;email given
      mes.email = (re-from 1)
      mes.from = (re-from 2)
      mes.date = (re-from 3)
    )
    (re-from2 line) (progn		;email and IP given
      mes.email = (re-from2 1)
      mes.from = (re-from2 2)
      mes.date = (re-from2 3)
    )
    (re-from3 line) (progn		;no email given
      mes.from = (re-from3 1)
      mes.date = (re-from3 2)
    )
    (re-from4 line) (progn		;no email given, IP
      mes.from = (re-from4 1)
      mes.date = (re-from4 2)
    )
    t (fatal-error 1 "Message %0: bad from line: %1\n" mes.id line)
  )
  (replace-string mes.date {regcomp "19100"} "2000" :all t :quote t) ;Y2K bug
  (if (re-date mes.date) 
    (with (month (seek monthnames (re-date 1)))
      (if (not month) 
	(error "Parse error WWWBoard file(1): %0\n    bad date: %1" mes.date)
      )
      mes.date = (list
	  (Int (re-date 3)) month (Int (re-date 2))
	  (Int (re-date 4))(Int (re-date 5))(Int (re-date 6))
    ))
    (error "Parse error WWWBoard file(2): %0\n    bad date: %1" mes.date)
  )
  line = (read-line fd)
  (if (re-reply line) (progn		;this is a reply to
      (while (not (ends-with-p line)) (nconc line (read-line fd)))
      (re-reply line)
      mes.in-reply-to = (re-reply 3)
      line = (read-line fd)
  ))
  (if (re-image line) (progn		;there is an embedded image
      mes.image = (re-image 1)
      line = (read-line fd)
  ))
  mes.body = line			;body fits on a line
  (replace-string line re-tag-br "\n" :all t :quote t) ;trim html tags
  (replace-string line re-tag-p "\n\n" :all t :quote t)
  (replace-string line re-empty-space-start "" :all t :quote t)
  (replace-string line re-empty-space-end "" :all t :quote t)
  (read-re re-br) 
  line = (read-line fd ())
  (if (re-link line) (progn		;embedded URL
      mes.link = (re-link 1)
      mes.linkname = (re-link 2)
      (read-re re-mess-end)	
    )
    (re-link2 line) (progn		;in case of \n at end of URL
      (read-re re-link3)
      mes.link = (re-link2 1)
      mes.linkname = (re-link3 1)
      (read-re re-mess-end)
    )
    ;; else we read re-mess-end, gobbled
    (re-mess-end line) ()
    t (error "Parse error WWWBoard file(3): %0\n    re: %1\n  line: %2" 
      filename re-mess-end line
    )
  )
  (read-re re-followups)
  mes.followups = (vector)
  line = (read-line fd ())
  (while (and line (not (re-end-followups line)))
    (if (re-followup line)
      (lappend mes.followups (re-followup 1))
    )
    line = (read-line fd ())
  )
  mes
)

(defun skip-lines (n &aux line) 
  (dotimes n line = (read-line fd ()))
  (if (not line) (error "Truncated WWWBoard file: %0" filename))
)

(defun read-re (re &optional ignore-err? &aux (line (read-line fd ())))
  ;; global: fd filename
  (if (not line) ; EOF
    (error "Truncated WWWBoard file: %0" filename)
    (re line)
    line
    (if ignore-err? ()
      (error "Parse error WWWBoard file(4): %0\n    re: %1\n  line: %2" 
	filename re line
    ))
  )
)

;; misc checks

(defun verify-html-body (messages &aux
    (re-html (regcomp "<(p|br)>"))
  )
   (dolist (mes messages)
     (doregexp (re "<[^>]*>" mes.body)
       (if (not (re-html (re 0)))
 	(PF "*** In message %0: html tag %1\n" mes.id (re 0))
))))

;;=============================================================================
;;                    Text output
;;=============================================================================
;; a mail-style format. ^L### separates mails
(defun print-text (messages &aux)
  (dolist (mes messages)
    (PF "%0 %1\n" (make-string 72 #\#) mes.id)
    (print-message mes)
  )
)

;; Individual messages
(defun print-message (message &optional (out  *standard-output*))
  (PF out "Subject: %0\n" mes.subject)
  (PF out "From: %0\n" mes.from)
  (if mes.email (PF out "Email: %0\n" mes.email))
  (with (d mes.date)
    (PF out "Date: %0-%1-%2 %3:%4:%5\n" d.0 (expand-num d.1 2) 
      (expand-num d.2 2) (expand-num d.3 2) (expand-num d.4 2)
      (expand-num d.5 2)
  ))
  (PF out "Id: %0\n" mes.id)
  (if mes.in-reply-to (PF out "In-Reply-To: %0\n" mes.in-reply-to))
  (when mes.followups 
    (PF out "Followups:")
    (dolist (f mes.followups) (PF out " %0" f))
    (PF out "\n")
  )
  (if mes.link 
    (PF out "Link-URL: %0\nLink-Name: %1\n" mes.link mes.linkname))
  (if mes.image (PF out "Image: %0\n" mes.image))
  (PF out "\n%0\n\n" mes.body)
)

(defun print-mboxfile (messages outname &aux
    (fd (open outname :direction :output :if-exists :supersede :error ()))
  )
  (if (not fd) (die 1 "Cannot create file %0\n" outname))
  (dolist (mes messages) (print-mboxentry mes fd))
)

print-mboxentry-re = (regcomp "\nFrom ")
print-mboxentry-re2 = (regcomp "^From ")

(defun print-mboxentry (mes out &aux
    (sender (if mes.email (PF String "%0 <%1>" mes.from mes.email)
	(PF String "%0" mes.from)
    ))
    (d (time-to-date (date-to-time mes.date)))
    sep body
  )
  (PF out "From %0 %1 %2 %3 %4:%5:%6 %7\n" mes.from
    (get '[Sun Mon Tue Wed Thu Fri Sat] d.6 "Mon") 
    (get '[Jan Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec] d.1 'Jan)
    d.2 d.3 d.4 d.5 d.0
  )
  (PF out "From: %0\n" sender)
  (PF out "Subject: %0\n" mes.subject)
  (PF out "Date: %0, %1 %2 %3 %4:%5:%6 +0000\n"
    (get '[Sun Mon Tue Wed Thu Fri Sat] d.6 "Mon") d.2 
    (get '[Jan Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec] d.1 'Jan)
    d.0 d.3 d.4 d.5
  )
  (PF out "Message-Id: <%0>\n" mes.id)
  (if mes.in-reply-to (PF out "In-Reply-To: <%0>\n" mes.in-reply-to))
;;  (when mes.followups 
;;    (PF out "X-Followups:")
;;    sep = ()
;;    (dolist (f mes.followups) 
;;      (if sep (PF out ",") sep = t)
;;      (PF out " <%0>" f)
;;    )
;;    (PF out "\n")
;;  )
  (if mes.link 
    (PF out "X-Link-URL: %0\nX-Link-Name: %1\n" mes.link mes.linkname))
  (if mes.image (PF out "X-Image: %0\n" mes.image))
  body = mes.body
  (replace-string  body print-mboxentry-re "\n From " :all t :quote t)
  (replace-string  body print-mboxentry-re2 " From " :quote t)
  (PF out "\n%0\n\n" mes.body)
)

(defun print-anyboard (messages anyboard &aux
    s fd
    (boundary "YXASASA9785432460_957438423854427")
    (re-id (regcomp "^[0-9]+"))
    desc (descs (copy ""))
  )
  *current-directory* = anyboard
  (dolist (mes messages)
    s = (copy "")
    fd = (open s :type :string :direction :output)
    desc = (anyboard-desc mes)
    (nconc descs desc "\n")
    (anyboard-items fd boundary 
      "aK" (anyboard-top-of-thread mes messages)
      "jE" (if mes.in-reply-to (String mes.in-reply-to) "0")
      "fI" mes.id
      "wW" (String mes.subject)
      "hC" (String mes.from)
      "wM" (date-to-time mes.date)
      "size" (String (length mes.body))
      "pQ" "a806c020"
      "xE" "0"
      "email" (String mes.email)
      "eZz" ""
      "to" ""
      "mood" ""
      "tP" (String mes.link)
      "rhost" ""
      "mtime" ""
      "scat" ""
      "track" "978542388.a806c020"
      "vers" "8.1.0.1 Free"
      "rlink_title" (String mes.linkname)
      "img" (String mes.image)
      "aliases" ""
      "body" mes.body
      "hack" (PF String "<!--X=%0-->" desc)
      "modifier" ""
    )
    (with (out (open (PF String "%0.dat" mes.id) :direction :output 
	  :if-exists :supersede
      ))
      (PF out "AB8DF
Content-type: multipart/form-data; boundary=%0
Content-disposition: form-data; name=message_data
Content-length: %1\n\n%2--%0--\n" boundary (length s) s
      )
      (close out)
    )
  )
  ;; create the message list (needed) above
  (with (msglist (open "/tmp/msglist" :direction :output :if-exists :supersede))
    (PF msglist "%0" descs)
    (close msglist)
  )
  (sh chmod a+rw "/tmp/msglist")
  (sh mv "/tmp/msglist" (+ anyboard "/../.msglist"))
)

(defun anyboard-desc (mes)
  (+ (PF String "%0	%1	%2	%3	%4	%5	%6	%7	%8	%9"
      (anyboard-top-of-thread mes messages)
      (if mes.in-reply-to (String mes.in-reply-to) "0")
      mes.id
      (String mes.subject)
      (String mes.from)
      (date-to-time mes.date)
      (length mes.body)
      "a806c020"
      "0"
    )
    (PF String "%0	%1	%2	%3	%4	%5	%6	%7	%8	%9	8.1.0.1 Free"
      "" ""  ""  "" 
      (String mes.link)
      "" ""  ""
      "978542388.a806c020"
  ))
)

(defun anyboard-items (fd boundary &rest args)
  (dohash (name body args)
    (anyboard-item fd boundary name body)
))

(defun anyboard-item (fd boundary name body)
  (setq body (String body))
  (PF fd "--%0\nContent-disposition: form-data; name=%1\nContent-type: text/plain\n\n%2%3"
    boundary name body (if ({regcomp "\n$"} body) "" "\n")
))
    
(defun anyboard-top-of-thread (m messages &aux
    new-m
  )
  (while (and m.in-reply-to 
      (setq new-m (message-of-id messages m.in-reply-to)))
    m = new-m
  )
  m.id
)

(defun message-of-id (messages id)
  (catch 'Found
    (dolist (m messages)
      (if (= id m.id) (throw 'Found m))
    )
    ()
  )
)

(main)

;;; EMACS MODES
;;; Local Variables: ***
;;; mode:lisp ***
;;; End: ***

