#!/bin/sh
: ; exec klone $0 "$@"
; The above line allows not to embed the exact path of the klone executable

;;Skeleton of a typical klone script
;;(kdb t)
;; TODO: verifier thread title ok
;; TODO: simpler display of --verbose, via numids
;; TODO: pouvoir matcher dates day, Month

;; Message separators are recognized by having:
;; Message-Id: <xmh-thread@separator>

(setq args (getopts "USAGE: %0 [options]
Threads the mail messages in current directory, supposed in MH format
It just reorders mails to group consecutive threads together.
Its algorithm is quite sophisiticated however, and should give good results 
Note: please dont abort (via ^C or DEL) the process while reoganising mails, or
the directory may be a mess. If this happens, rerun this script with the
-repair option. No mails can be lost anyways, some mails can just be named 
prefixed by _ "
    ("-v" () verbose "verbose operation")
    ("-n" () do-nothing "only parses things, but dont move mails")
    ("-t" () do-titles "create thread titles pseudo mails")
    ("-e" () end-marker "makes a pseudo-mail after other to mark the end")
    ("-q" () quiet "quiet mode (no warnings)")
    ("-u" () uniq "removes duplicated mails (slower)")
    ("-vv" () vverbose "very verbose operation, debug mode")
    ("-repair" () repair "repair after a previous run crashed or was aborted")
    ("-dir" dir godir "run into dir instead of current directory.
dir can be a file, goes into dir of file then")
    ("-debug" () enter-debugger-on-error "enter klone debugger on error")
    ("-stackdump" () stackdump-on-error "verbose stack dump on error")
    ("-D" () DEBUG "specific debug mode")
    ("-kdb" () enter-kdb "enter kdb at end")
))

(setq thread-span-limit 2600000)	; if more than 1 month, warn

(if enter-debugger-on-error (kdb t))
(if stackdump-on-error (stack-dump-on-error t))
(if vverbose (setq verbose t))

(setq re-num (regcomp "^[0-9]+$"))
(setq re-dir (regcomp "^(.*)[/]([^/]*)$"))

(defun main (&aux 
    (n 0) 
    (start-time (get-internal-run-time))
  )
  (if godir (progn
      (if (/= 'directory (file-type godir))
	(if (regexec re-dir godir)
	  (setq godir (regsub re-dir 1))
      ))
      (setq *current-directory* godir))
  )
  (when repair (do-repair) (exit 0))

  (verbose? "Parsing mails (one . per 100 mails)")
  (dolist (file (directory)) 
    (when (regexec repair-re file)
      (setq file (do-repair-file file t))
    )
    (when (regexec re-num file)
      (Parse-mail file)
      (when verbose (incf n) (if (= 0 (mod n 100)) (verbose? :n ".")))
    )
  )
  (if uniq (remove-duplicates))
  (if (>= n 100) (verbose? ""))
  (verbose? "%0 mails parsed, computing threads" (length mails))
  (validate-refs)
  (treat-re)
  (Parse-threads)
  (Sort-threads)
  (compute-new-names)
  ;; from here the mails structures are fully updated
  (if vverbose (print-threads))
  (if DEBUG (Debug:test-order))
  (move-mails)
  (check-thread-spans)
  (if end-marker (put-end-marker))
  (verbose? "Done in %0 seconds\n"
    (/ (- (get-internal-run-time) start-time) 1000)
  )
  (if enter-kdb (kdb))
)

(defun compute-new-names (&aux (n 1))
  ;; first set up things: new-num fields, and nums table
  (dolist (thread threads)		;compute new nums
    (when (and thread do-titles)	;create thread title fake mails
      (create-title n thread)
      (incf n)
    )
    (dolist (mail thread)
      (when (Mail-num mail)
	(Mail-new-num mail n)		;compute where it should go
	(incf n)
  )))
)
  
(defun move-mails (&aux (n 1) (N 0))
  ;; now, new-num fields are Ok
  (unless do-nothing
    (when verbose
      (with (n 0)
	(dolist (mail mails) 
	  (when (and (Mail-id mail)	;dont count title pseudo mails
	      (/= (String (Mail-num mail)) (String (Mail-new-num mail)))
	    )
	    (incf n)
	))
	(verbose? "Moving %0 mails" n)
    ))
    (with (move-mails:stack (list) move-mails:redo (list))
      (dolist (mail mails) (move-mail mail))
      (while move-mails:redo
	(with (todo move-mails:redo)
	  (setq move-mails:redo (list))
	  (dolist (mail todo) (move-mail mail))
  ))))
  (if (>= N 100) (verbose? ""))
  (verbose? "Waiting for shell to finish moves")
  (async-shell-command "rm -f .xmhcache")
  (async-shell-command :exit)
)

(setq move-mails:stack-limit 100)	; limit to avoid stack overflow
(defun move-mail (mail &aux to-displace
  )
  (when (/= (String (Mail-num mail)) (String (Mail-new-num mail)))
    (when verbose (incf N) (if (= 0 (mod N 100)) (verbose? :n ".")))
    (if (or (> (length move-mails:stack) move-mails:stack-limit)
	(seekq move-mails:stack mail)) ;stop recursion, move out of the way
      (with (temp-name (+ "_" (String (Mail-new-num mail))))
	(move-file (Mail-num mail) temp-name)
	(Mail-num mail temp-name)
	(lappend move-mails:redo mail)	;will retry next pass
      )
      (progn
	;; must move?
	(if (setq to-displace		;somebody in the way? recurse
	    (getn nums (String (Mail-new-num mail))))
	  (with (move-mails:stack (+ move-mails:stack (list mail)))
	    (move-mail to-displace)
	))
	(move-file (Mail-num mail) (Mail-new-num mail))
	(Mail-num mail (Mail-new-num mail))
    ))
))

(defstruct Mail
  num					;number of the mail (filename in MH)
                                        ; can be () for fake mails (targets)
  new-num				;where will it be moved?
  date					;date of mail, can be ()
  deldate				;delivery date of mail, can be ()
  subject				;subject line
  from					;for info only
  id					;message-id field
  ref					;in-reply-to
  refs					;references
  re					;is Re: to something (id)
  thread				;thread it belongs to
  dist					;weight computation for sorting
  cache					;plist of ids/dists
)

(defvar mails (vector))			;list of all read mails
(defvar mail-ids (Hashtable ()))	;p-list of ids/mails
(defvar threads (vector))		;list of all threads
(defvar subjects (Hashtable ()))	;p-list of re-less subjects/mails
(defvar nums (Hashtable ()))		;p-list of filenames / mails

;; create a Mail structure from the filename contents, appends it to the list

;; first re to skip non-interesting ones to speed things up
(setq Parse-mail:re-interesting (re-nocase "^[fsmird]"))
(setq Parse-mail:re-subject (re-nocase "^Subject:[ \t]*(.*)$")) ;1 contents
(setq Parse-mail:re-from (re-nocase "^From:[ \t]*(.*)$")) ;1 contents
(setq Parse-mail:re-id (re-nocase "^Message-Id:[ \t]*(.*[^ \t])[ \t]*$")) ;1 id
(setq Parse-mail:re-ref (re-nocase "^In-Reply-To:[ \t]*(<[^>]*>)")) ;1 id
(setq Parse-mail:re-refs (re-nocase "^References:[ \t]*(.*[^ \t])[ \t]*$")) ;1 id
(setq Parse-mail:re-deldate (re-nocase (+ "^Delivery-Date:"
      "[ \t]*([^ \t]+)"			;1 name of day
      "[ \t]*([^ \t]+)"			;2 name of month
      "[ \t]*([0-9]+)"			;3 day
      "[ \t]*([0-9][0-9]):([0-9][0-9]):([0-9][0-9])" ;4 hour 5 mn 6 s
      "[ \t]*(([0-9][0-9])?[0-9][0-9])" 	;7 year
      "[ \t]*$"))
)
(setq Parse-mail:re-date (re-nocase (+ "^Date:"
      "[ \t]*([^ \t]+,)?"		;1 name of day
      "[ \t]*([0-9]+)"			;2 day
      "[ \t]*([^ \t]+)"			;3 name of month
      "[ \t]*(([0-9][0-9])?[0-9][0-9])" 	;4 year
      "[ \t]*([0-9][0-9]):([0-9][0-9]):([0-9][0-9])" ;6 hour 7 mn 8 s
      "[ \t]*([^ \t]*)"			;9 timezone
      "([ \t]*| .*)$"))
)
(setq Parse-mail:re-timezone (regcomp "[+-][0-9][0-9][0-9][0-9]"))
(setq Parse-mail:re-continuation (regcomp "^[ \t]+([^ \t].*)$"))


(setq title-id "<xmh-thread@separator>")

;; we must parse mails:
;; date based on Delivery-Date:, if not on Date
;; id on Message-Id:, if not on with SMTP id 

(defun Parse-mail (filename &aux
    (mail (make-Mail :num (Int filename)))
    (fd (open filename 
	:error '(fatal-error 1 "Cannot Open file %0\n" filename)))
    curline
    m2
  )
  (catch 'HeaderEnd
    (doline (line fd)
      (if (regexec Parse-mail:re-continuation line)
	(nconc curline (regsub Parse-mail:re-continuation 1))
	(progn
	  (if curline 
	    (if (regexec Parse-mail:re-interesting curline)
	      (Parse-headerline curline mail)
	  ))
	  (setq curline line)
	  (if (= "" line) (throw 'HeaderEnd))
  ))))

  (if (not (valid-time? (Mail-date mail)))
    (Mail-date mail ())
  )
  (if (not (Mail-date mail))
    (if (valid-time? (Mail-deldate mail))
      (Mail-date mail (Mail-deldate mail))
      (fix-invalid-date mail)
    )
  )
  (if (not (Mail-subject mail)) (Mail-subject mail (copy "")))
  (if (Mail-from mail)
    (Mail-from mail (fix-from (Mail-from mail)))
    (Mail-from mail (copy ""))
  )
  
  ;; DEBUG
  (if (not (Mail-date mail)) (PFq "No date in %0\n" filename))
  (when (not (Mail-id mail))
    (PFq "No id in %0\n" filename)
    ;; generate an ID with body length & cksum: <xmh-thread:length@checksum:num>
    (with (body () fbody (open (String (Mail-num mail))) 
	fp 0 fh 0 f2 () head ())
      (while (/= "" (read-line fbody "")) (setq fh (file-position fbody)))
      (setq fp (file-position fbody))
      (while (= "" (read-line fbody ())) (setq fp (file-position fbody)))
      (file-position fbody fp)
      (setq body (String fbody))
      (Mail-id mail (PF String "<xmh-thread:%0@%1:%2>" 
	  (length body) (*:hash body) (Mail-num mail)))
      (setq f2 (open (+ (String (Mail-num mail)) ",new") 
	  :direction :output :if-exists :supersede
      ))
      (file-position fbody 0)
      (setq head (read-chars fh fbody))
      (PF f2 "%0Message-Id: %1\n\n%2" head (Mail-id mail) body)
      (close f2) (close fbody)
      (wait (system (list "mv"
	    (+ (String (Mail-num mail)) ",new") (String (Mail-num mail))
      )))
  ))
;;  (if (and (Mail-refs mail) (not (Mail-ref mail)))
;;    (PFq "Refs but not reply-to in %0\n" filename)
;;  )
  (with (m2 (getn mail-ids (Mail-id mail)))
    (if m2
      (with (
	  s1 (get (file-stats (String (Mail-num mail))) 'size 0)
	  s2 (get (file-stats (String (Mail-num mail))) 'size 0)
	)
	(if (>= s2 s1) (progn
	    (PFq "Warning: mail %0 ignored (duplicated ID with mail %1)\n"
	      (Mail-num mail) (Mail-num m2)
	    )
	    (remove-file (String (Mail-num mail)))
	  )
	  (progn
	    (PFq "Warning: mail %1 ignored (duplicated ID with mail %0)\n"
	      (Mail-num mail) (Mail-num m2)
	    )
	    (put mail-ids (Mail-id mail) mail)
	    (delete-item-eq mails m2)
	    (remove-file (String (Mail-num m2)))
	  )
      ))
      (if (= title-id (Mail-id mail)) (progn
	  (if (not do-nothing) (sh rm "-f" ,(Mail-num mail)))
	) (progn
	  (put mail-ids (Mail-id mail) mail)
	  (lappend mails mail)  
	  (put nums filename mail)
      ))
  ))
  mail
)

(setqn ffrep (regcomp "[(]([^)]+)[)]"))
(setqn ffreq (regcomp "^[\"]([^\"]+)[\"]"))
(setqn ffreb (regcomp "^(.+)[<]([^>]+)[>\]"))
(setqn ffres (regcomp "^[ \t]*(.*[^ \t])[ \t]*$"))

(defun fix-from (s)
  (if (regexec ffreb s) (setq s (regsub ffreb 1)))
  (if (regexec ffrep s) (setq s (regsub ffrep 1)))
  (if (regexec ffreq s) (setq s (regsub ffreq 1)))
  (if (regexec ffres s) (setq s (regsub ffres 1)))
  s
)

(defun remove-file (name)
  (if (not do-nothing) (async-shell-command (PF String "rm -f %0" name)))
)

(setq timezones [
  "UTC" 0 "GMT" 0 "AST" 4 "ADT" 3 "EST" 5 "EDT" 4 "CST" 6 "CDT" 5
  "MST" 7 "MDT" 6 "PST" 8 "PDT" 7 
  ]
)

;; supposes the date has matched Parse-mail:re-date
(defun Parse-headerdate ()
  (+ 
    (date-to-time (list
	(with (y (Int (regsub Parse-mail:re-date 4)))
	  (if (> y 100) y (if (> y 69) (+ 1900 y) (+ 2000 y)))
	)
	(monthnum (regsub Parse-mail:re-date 3))
	(Int (regsub Parse-mail:re-date 2))
	(Int (regsub Parse-mail:re-date 6))
	(Int (regsub Parse-mail:re-date 7))
	(Int (regsub Parse-mail:re-date 8))
    ))
    (if (regexec Parse-mail:re-timezone (regsub  Parse-mail:re-date 9))
      (- (* (Int (regsub  Parse-mail:re-date 9)) 36))
      (* 3600 (get timezones (regsub  Parse-mail:re-date 9) 0))
    )
))

(defun Parse-headerline (line mail)
  (if
    (regexec Parse-mail:re-date line)
    (if (= (Mail-id mail) title-id) ()
      (Mail-date mail (Parse-headerdate))
    )

    (regexec Parse-mail:re-deldate line)
    (Mail-deldate mail (date-to-time (list
	  (Int (regsub Parse-mail:re-deldate 7))
	  (monthnum (regsub Parse-mail:re-deldate 2))
	  (Int (regsub Parse-mail:re-deldate 3))
	  (Int (regsub Parse-mail:re-deldate 4))
	  (Int (regsub Parse-mail:re-deldate 5))
	  (Int (regsub Parse-mail:re-deldate 6))
    )))
    
    (regexec Parse-mail:re-subject line)
    (Mail-subject mail (trim-spaces (regsub Parse-mail:re-subject 1)))
    
    (regexec Parse-mail:re-from line)
    (Mail-from mail (trim-spaces (regsub Parse-mail:re-from 1)))
    
    (regexec Parse-mail:re-id line)
    (Mail-id mail (regsub Parse-mail:re-id 1))
    
    (regexec Parse-mail:re-ref line)
    (Mail-ref mail (regsub Parse-mail:re-ref 1))
    
    (regexec Parse-mail:re-refs line) 
    (Mail-refs mail (list-of-refs (regsub Parse-mail:re-refs 1)))
  )
)

(defmacrod valid-time? (date)
  `(and ,date
     (< ,date Mail-valid-date:ct) ;future date
    (> ,date 347151600) ;; 1980
))

(setq received-date-re (regcomp (+
      "^[Rr]eceived:.*((Mon|Tue|Wed|Thu|Fri|Sat|Sun),[ \t]+[0-9]+[ \t]+"
      "(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)[ \t]+[0-9]+[ \t]+"
      "[0-9:]+[ \t]+"
      "[-+][0-9][0-9][0-9][0-9])"
)))
(defun fix-invalid-date (mail &aux
    (file (String (Mail-num mail)))
    (fd (open file))
    curline
  )
  (catch 'Valid
    (catch 'HeaderEnd
      (while (setq line (read-line fd ()))
	(if (regexec Parse-mail:re-continuation line)
	  (nconc curline (regsub Parse-mail:re-continuation 1))
	  (progn
	    (if curline 
	      (if (regexec received-date-re curline) 
		(if (regexec Parse-mail:re-date 
		    (+ "Date: " (regsub received-date-re 1)))
		  (with (date (Parse-headerdate))
		    (if (valid-time? date)
		      (throw 'Valid (Mail-date mail date))
		)))
	    ))
	    (setq curline line)
	    (if (= "" line) (throw 'HeaderEnd))
	  )
    )))

    (with (stats (file-stats file))
      (Mail-date mail (min (get stats 'mtime) (get stats 'ctime))
    ))
  )
)

(defun create-fake-mail (id subject)
  (if vverbose (PF "creating fake mail of id %0\n" id))
  (put mail-ids id (make-Mail :subject subject :id id))
)

(defmacrod is-fake? (mail) `(not (Mail-num ,mail)))

(defun delete-fake-mails (&aux m)
  ;; remove from threads
  (dolist (thread threads)
    (dolist (mail (copy thread))
      (if (is-fake? mail) (delete-item-eq thread mail))
  ))
  ;; remove mail refs pointing to fakes
  (dolist (mail mails)
    (if (is-fake? (mailid (Mail-ref mail)))
      (Mail-ref mail ())
    )
    (if (is-fake? (mailid (Mail-re mail)))
      (Mail-re mail ())
    )
    (dolist (mail2 (copy (Mail-refs mail)))
      (if (is-fake? (mailid (Mail-re mail2)))
	(delete-item-eq (Mail-refs mail) mail2)
    ))
  )
)

(setq re-subject-re (regcomp "^[ \t]*[Rr][Ee]:[ \t]*(.*)$"))

(defun treat-re (&aux m2 s (l (list)) subject)
  ;; store no-re messages subjects
  (dolist (mail mails)
    (if (Mail-subject mail)
      (if (regexec re-subject-re (Mail-subject mail))
	(lappend l mail)
	(add-subjects (Mail-subject mail) mail)
  )))
  
  ;; store all Re subjects
  (dolist (mail l)
    (if (regexec re-subject-re (Mail-subject mail))
      (add-subjects (regsub re-subject-re 1) mail)
  ))
  ;; then mark RE: mails without refs as RE: to these subjects
  (dolist (mail l)
    (if (Mail-subject mail)
      (when (regexec re-subject-re (Mail-subject mail))
	(setq subject (trim-spaces (regsub re-subject-re 1)))
	(setq m2 (getn subjects subject))
	(if 
	  (and m2 (not (eq m2 mail)))
	  (Mail-re mail (Mail-id m2))
	  
	  (setq m2 (subpart-of-subject mail subjects subject))
	  (Mail-re mail (Mail-id m2))
	)
    ))
))

(defun subpart-of-subject (mail subjects subject &aux
    (re (regcomp (+ "^" (quote-string-for-regexp subject))))
  )
  (catch 'Found
    (dohash (sub m subjects)
      (if (and (regexec re sub) (/= m mail))
	(throw 'Found m)
    ))
    ()
  )
)

(defun add-subjects (s mail &aux m)
  (setq s (trim-spaces s))
  (setq m (getn subjects s))
  (if m
    (if (depends m mail) 
      (put subjects s mail)
    )
    (put subjects s mail)
  )
)

(setq trim-spaces:re (regcomp "^[ \t\n\r]*(.*[^ \t\n\r])[ \t\n\r]*$"))
(defun trim-spaces (s)
  (if (and (typep s String) (regexec trim-spaces:re s))
    (regsub trim-spaces:re 1)
    s
))

(defun validate-refs ()
  (dolist (mail mails)
    (when (and (Mail-ref mail)
	(not (getn mail-ids (Mail-ref mail)))
      )
      (create-fake-mail (Mail-ref mail) (Mail-subject mail))
    )
    (dolist (ref (Mail-refs mail))
      (when (not (getn mail-ids ref))
	(create-fake-mail ref (Mail-subject mail))
))))

;; compare date, ignoring dates in the future (+ 2 days) or less than 1971
(setq Mail-valid-date:ct (+ (cur-time) (* 3600 50)))

;; a thread is a list of mails
(defun Parse-threads (&aux 
    (put-in-thread:curmail (vector))	;stack of examinated mails
    (suspect-threads (list))		;list of threads to examine after
  )
  (dolist (mail mails)
    (put-in-thread mail)
  )
  (delete-fake-mails)
  (fix-empty-dates)

  (if suspect-threads (examine-suspect-threads suspect-threads))
  (dolist (thread threads)
    (sort-thread thread)
  )
)

(setq put-in-thread:attempt? ())

(defun put-in-thread (mail &aux
    (put-in-thread:curmail (+ put-in-thread:curmail (list mail)))
  )
  (if (Mail-ref mail)
    (put-in-thread-of (getn mail-ids (Mail-ref mail)) mail)
  )
  (if (not (Mail-thread mail)) 
    (dolist (m (Mail-refs mail))
      (put-in-thread-of (getn mail-ids m) mail)
  ))
  (if (not (Mail-thread mail)) 
    (if (Mail-re mail)
      (if put-in-thread:attempt?
	(put-in-thread-of (getn mail-ids (Mail-re mail)) mail)
	(with (put-in-thread:attempt? t)
	  (catch 'Attempt
	    (put-in-thread-of (getn mail-ids (Mail-re mail)) mail)
  )))))

  (if (not (Mail-thread mail)) 
    (with (new-thread (vector mail))
      (Mail-thread mail (length threads))
      (lappend threads new-thread)	;new thread
      (is-suspect? mail)
  ))
)

(defun put-in-thread-of (mt mail &aux)
  (if mt 
    (progn
      (if (not (Mail-thread mt)) 
	(if (seekq put-in-thread:curmail mt) ; we are looping
	  (if put-in-thread:attempt? ;first, try to cut the re dependency
	    (throw 'Attempt ())		;this attempt failed
	    (with (new-thread (vector mt)) ;it failed, start a new thread
	      (PFq "Warning: looping while examining mail %0\n" mt)
	      (dolist (m put-in-thread:curmail)
		(print-mail-line m (Mail-num m) t)
	      )
	      (Mail-thread mt (length threads))
	      (lappend threads new-thread)
	      (is-suspect? mail)
	  ))
	  (put-in-thread mt)
      ))
      (if (Mail-thread mail) 
	(if (/= (Mail-thread mail) (Mail-thread mt))
	  (if (and (is-thread-head-fake? mt) 
	      (not (is-thread-head-fake? mail))
	    )
	    (merge-thread (get threads (Mail-thread mt)) 
	      (get threads (Mail-thread mail))
	    )
	    (is-thread-head-fake? mail) 
	    (merge-thread (get threads (Mail-thread mail)) 
	      (get threads (Mail-thread mt))
	    )
	    (progn
	      (PFq "Warning: mail %0 belongs to 2 threads: %3 of %1 and %4 of %2\n" 
		(Mail-num-or-id mail)
		(Mail-num-or-id (0 (get threads (Mail-thread mail))))
		(Mail-num-or-id (0 (get threads (Mail-thread mt))))
		(Mail-thread mail) (Mail-thread mt)
	      )
	      (change-thread mail mt)
	  ))
	)
	(with (thread (get threads (Mail-thread mt)))
	  (Mail-thread mail (Mail-thread mt))
	  (lappend thread mail)
;;	  (PFq "mail %0 put in thread %1 of mail %2 [%3]\n"
;;	    (Mail-num mail) (Mail-thread mt)  (Mail-num mt) (length thread)
;;	  )
	)
      )
  ))  
)
   
;; creating a thread with a Re: message is suspect...
(defun is-suspect? (mail &aux s)
  (if (and (regexec re-subject-re (Mail-subject mail))
      (not (seekq suspect-threads (getn threads (Mail-thread mail))))
    )
    (lappend suspect-threads (getn threads (Mail-thread mail)))
))

;; remove the optional Re: parts
(setq re-subject-radix (regcomp "^([Rr][Ee]: *)*([^ \t].*)$"))
(defun subject-radix (s)
  (if (regexec re-subject-radix s)
    (regsub re-subject-radix 2)
    s
))

(defun examine-suspect-threads (suspect-threads &aux 
    mail possible-threads subject
  )
  ;; build hashtable of subjects, list of threadnums
  (setq all-subjects (Hashtable ()))
  (dolist (mail mails)
    (when (> (length (Mail-subject mail)) 0)
      (setq subject (subject-radix (Mail-subject mail)))
      (with (l (getn all-subjects subject))
	(if l (if (not (seek l (Mail-thread mail)))
	    (lappend l (Mail-thread mail))
	  )
	  (put all-subjects subject (list (Mail-thread mail)))
  ))))

  ;; Then for each suspect thread, look if the leader has same subject as
  ;; another thread, then choose the closer by date and merge the thread
  (dolist (thread suspect-threads)
    (when thread
      (setq mail (0 thread))
      (setq possible-threads (list))
      (when (> (length (Mail-subject mail)) 0)
	(setq subject (subject-radix (Mail-subject mail)))
	(with (tnums (getn all-subjects subject))
	  (dolist (tnum tnums)
	    (if (and (/= tnum (Mail-thread mail))
		(get threads tnum)
	      )
	      (lappend possible-threads (get threads tnum))
	  ))
	  ;; sort threads by closeness to mail date
	  (when possible-threads
	    (sort possible-threads compare-threads-by-proximity-to-mail)
	    (verbose? "Thread %0 seem to be part of %1, merging"
	      (Mail-thread mail) (seekq threads (0 possible-threads))
	    )
	    (merge-thread (get threads (Mail-thread mail)) (0 possible-threads))
))))))

(defun compare-threads-by-proximity-to-mail (t1 t2)
  (compare 
    (abs (- (Mail-date (0 t1)) (Mail-date mail)))
    (abs (- (Mail-date (0 t2)) (Mail-date mail)))
))

(defun fix-empty-dates ()
  (dolist (thread threads)
    (unless (Mail-date (0 thread))
      (catch 'Found
	(dolist (mail thread)
	  (if (Mail-date mail) (throw 'Found
	      (Mail-date (0 thread) (Mail-date mail))
	)))
  )))
)

(defun change-thread (m1 m2)
  (change-thread-into m1 m2)
  (change-thread-into m2 m1)
)

(defun change-thread-into (m1 m2 &aux
    (t1 (get threads (Mail-thread m1)))
    (t2 (get threads (Mail-thread m2)))
  )
  (if (catch 'Done
      (dolist (mail t1)
	(if (not
	    (catch 'Depend
	      (dolist (mail2 t2)
		(if (depends mail mail2) (throw 'Depend t))
	      )
	      ()
	  ))
	  (throw 'Done ())
	)
      )
      t
    ) 
    (merge-thread t1 t2)
))

;; put all mails of t1 into t2, empties t1
(defun merge-thread (t1 t2 &aux (t2num (seekq threads t2))
    (m1 (0 t1))
    (m2 (0 t2))
  )
  (when verbose
    (PF "Merging thread %2 of %0 to %3 of %1\n" (Mail-num m1) (Mail-num m2)
      (Mail-thread m1) (Mail-thread m2)
    )
    (print-thread-line t1)
    (print-thread-line t2)
  )
  (dolist (mail t1)
    (lappend t2 mail)
    (Mail-thread mail t2num)
  )
  (while t1 (delete t1 0))		;empty t1
)

(defun is-thread-head-fake? (mail)
  (is-fake? (0 (get threads (Mail-thread mail))))
)


;; add mail tracing code if vverbose
;; trace only mails in DM it DM is non-nil
(setq DM ())
;;(setq DM [1 2])
(defun TM (s m n)
  (if (or DM (seek DM (Mail-num m)))
    (PF "##%0[%2]: %r1\n" s m n))
)

(if vverbose (progn
    (setq vvn 0)
    (put put-in-thread 'body (list 
	`(with (vvn (+ 1 vvn)) 
	  (TM "PUT-IN-THREAD" mail vvn)
	  ,@(get put-in-thread 'body) 
	  (PF "==put-in-thread[%0]: mail #%1 is in thread %2\n"
	    vvn (Mail-num mail) (Mail-thread mail)
	))
    ))
    (put put-in-thread-of 'body (list 
	`(with (vvn (+ 1 vvn)) 
	  (TM "PUT-IN-THREAD-of-mt" mt vvn)
	  (TM "PUT-IN-THREAD-of-mail" mail vvn)
	  ,@(get put-in-thread-of 'body) 
	  (PF "==put-in-thread-of[%0]: mt %3 in in thread %4, mail #%1 is in thread %2\n"
	    vvn (Mail-num mail) (Mail-thread mail)
	    (Mail-num mt) (Mail-thread mt)
	))
    ))
))

;; checks if a thread do not have mails too distant in dates  
(defun check-thread-spans ()
  (dolist (thread threads) (check-thread-span thread))
)

(defun check-thread-span (thread &aux m M d)
  (dolist (mail thread)			;init things
    (if (setq d (Mail-date mail)) (progn
	(if (or (not m) (< d m))
	  (setq m d)
	)
	(if (or (not M) (> d M))
	  (setq M d)
  ))))
  (if (and m M (> (- M m) thread-span-limit)) ;; more than one month long, warn
    (PFq "Thread too long, mails: %0\n" 
      (map List (lambda (m) (Mail-new-num m)) thread)
  ))
)

(defun depends-sort (m1 m2 &aux res)
  (if (depends m1 m2)
    (if (depends m2 m1)
      (compare-dates m1 m2)
      -1
    )
    (if (depends m2 m1)
      1
      (compare-dates m1 m2)
    )
  )
)

(defun compare-dates (m1 m2 &aux res)
  (if (and (Mail-date m1) (Mail-date m2))
    (if (/= 0 (setq res (compare (Mail-date m1) (Mail-date m2))))
      res
      (compare (Mail-id m1) (Mail-id m2))
    )
    (compare (Mail-id m1) (Mail-id m2))
))

(setq Thread-sort depends-sort)

(defun depends (m1 m2 &aux (id2 (Mail-id m2)))
  (or
    (= id2 (Mail-ref m1))
    (catch 'Done
      (dolist (id (Mail-refs m1))
	(if (= id2 id) (throw 'Done t))
      )
      ()
    )
    (and (regexec re-subject-re (Mail-subject m1))
      (= (trim-spaces (regsub re-subject-re 1)) (Mail-subject m2))
    )
  )
)

(defun Sort-threads ()
  (sort threads 
    (lambda (t1 t2 &aux (d1 0) (d2 0))
      (dolist (mail t1)
	(if (and (Mail-date mail) (> (Mail-date mail) d1))
	  (setq d1 (Mail-date mail))
      ))
      (dolist (mail t2)
	(if (and (Mail-date mail) (> (Mail-date mail) d2))
	  (setq d2 (Mail-date mail))
      ))
      (compare d1 d2)
  ))
)

(defun monthnum (name &aux
    (pos (seek ["Jan" "Feb" "Mar" "Apr" "May" "Jun" "Jul" "Aug" "Sep"
	"Oct" "Nov" "Dec"] name 
    ))
  )
  (if pos (+ pos 1)
    (fatal-error 1 "Bad Deliverable-date month name: %0 for mail %1\n" 
      name (Mail-num mail))
))

(defun compact-date (time)
  (if time
    (with (date (time-to-date time))
      (PF String "%0-%1-%2,%3:%4" (0 date)
	(expand-num (1 date) 2)
	(expand-num (2 date) 2)
	(expand-num (3 date) 2)
	(expand-num (4 date) 2)
      )
    )
    (PF String "<INVALID DATE>")
  )
)

(setq list-of-refs:re (regcomp "<[^>]*>"))
(defun list-of-refs (line &aux (offset 0) (res (list)))
  (while (regexec list-of-refs:re line offset)
    (lappend res (regsub list-of-refs:re 0))
    (setq offset (1 (get list-of-refs:re 0)))
  )
  res
)

(defun mailnum (n) (catch 'Found (dolist (mail mails)
      (if (= n (Mail-num mail)) (throw 'Found mail))
)))

(defmacrod mailid (n) `(getn mail-ids ,n))



(setq trim-spaces:re (regcomp "^[ \t\n\r]*(.*[^ \t\n\r])[ \t\n\r]*$"))
(defun trim-spaces (s)
  (if (and (typep s String) (regexec trim-spaces:re s))
    (regsub trim-spaces:re 1)
    s
))

(defun print-mail-line (mail &optional (n (Mail-num mail)) dependencies?)
  (if do-nothing (setq n (Mail-num mail)))
  (PF "  %0 %1\n" n (if (Mail-subject mail) 
      (subseq (Mail-subject mail) 0 (- 76 (length (String n))))
      ""
  ))
  (when dependencies?
    (if (Mail-ref mail)
      (PF "    in reply to %0\n" (Mail-num (mailid (Mail-ref mail))))
    )
    (when (Mail-refs mail)
      (PF "    refers to")
      (dolist (m (Mail-refs mail)) (PF " %0" (Mail-num (mailid m))))
      (PF "\n")
    )
    (if (Mail-re mail) 
      (PF "    subjects Re: to %0\n" (Mail-num (mailid (Mail-re mail))))
    )
  )
)

(defun print-thread-line (thread)
  (PF "Thread %0:\n" (seekq threads thread))
  (dolist (mail thread)
    (if (Mail-num mail)
      (print-mail-line mail (Mail-num mail) t)
)))

(defun print-threads (&aux)
  (dolist (thread threads)
    (if thread (progn
	(PF "%3 Thread %0 %1 -> %2 %3%3%3\n" (seekq threads thread)
	  (compact-date (Mail-date (0 thread)))
	  (compact-date (Mail-date (get thread -1)))
	  (make-string 4 create-title:char)
	)
	(dolist (mail thread)
	  (when (Mail-num mail)
	    (print-mail-line mail 
	      (if do-nothing (Mail-num mail) (Mail-new-num mail))
	)))
))))

(setq create-title:char #\=)
(defun create-title (n thread &aux 
    (filename (if (getn nums (String n)) 
	(+ "_" (String n))
	(String n)
    ))
    (fd (open filename :direction :output :if-exists :supersede))
    (date (date-in-mail-format (time-to-date (Mail-date (0 thread)))))
    (w 1) sl
    title
    (num n)
  )
  (PF fd "Message-Id: %0\nDate: %1\nFrom: %2\nSubject: %3 %4 -> %5 %3\n\n \n"
    title-id date (make-string 20 create-title:char)
    (make-string 4 create-title:char)
    (compact-date (Mail-date (0 thread)))
    (compact-date (Mail-date (get thread -1)))
  )
  (dolist (mail thread)
    (if (> (length (String (Mail-num mail))) w)
      (setq w (length (String (Mail-num mail))))
  ))
  (setq sl (- 52 w))
  (dolist (mail thread)
    (when (Mail-num mail)
      (incf num)			;new-nums are not yet computed
      (with (date (time-to-date (Mail-date mail)))
	(PF fd "%0%1  %5/%6 %2 %4\n"
	  (make-string (- w (length (String (Mail-num mail))))) ;0
	  num		;1
	  (subseq (Mail-from mail) 0 17)	;2
	  ()
	  (subseq (Mail-subject mail) 0 sl) ;4
	  (expand-num (1 date) 2)	;5
	  (expand-num (2 date) 2)	;6
  ))))
  
  (close fd)
  (lappend mails (setq title (make-Mail :new-num n :num filename)))
  (put nums filename title)
)

(defun put-end-marker (&aux (n 1))
  (dolist (mail mails) 
    (if (>= (Mail-new-num mail) n)
      (setq n (+ (Mail-new-num mail) 1))
    )
  )
  (create-marker-mail n (cur-time))
)

(defun create-marker-mail (n datenum &aux
    (filename (String n))
    (fd (open filename :direction :output :if-exists :supersede))
    (date (date-in-mail-format (time-to-date datenum)))
    title
  )
  (PF fd "Message-Id: %0\nDate: %1\nFrom: %2\nSubject: %3 \n\nThreaded at %4\n"
    title-id date (make-string 20 create-title:char)
    (make-string 50 create-title:char)
    date
  )
  (close fd)
  (lappend mails (setq title (make-Mail :new-num n :num filename)))
  (put nums filename title)
)

(defun date-in-mail-format (d)
  (if (not d) (setq d (cur-date)))
  (+					;formats for html
    (get '("???" "Sun" "Mon" "Tue" "Wed" "Thu" "Fri" "Sat" ()) (6 d) "???")
    ", " (String (2 d)) " "
    (get '("???" "Jan" "Feb" "Mar" "Apr" "May" "Jun" "Jul" "Aug" "Sep" "Oct" 
	"Nov" "Dec") (1 d) "???")
    " " (String (0 d)) " "
    (expand-num (get d 3 "??") 2) ":"
    (expand-num (get d 4 "??") 2) ":"
    (expand-num (get d 5 "??") 2) " +0000"
  )
)

(defun move-file (old new)
  (if (not (and old new)) (fatal-error 1 "bad command: mv %0 %1\n" old new))
  (async-shell-command (PF String "mv %0 %1" old new))
)

;; executes a shell command in background
;; (async-shell-command ()) flushes, (async-shell-command :exit) terminates
(defvar async-shell-command:pid ())
(defvar async-shell-command:in ())
(defvar async-shell-command:count 0)
(defvar async-shell-command:pack 10) ;flush every commands

(defun async-shell-command (s)
  (if 
    (= s :exit) (if async-shell-command:pid (progn
	(PF async-shell-command:in "\nexit 0\n")
	(flush async-shell-command:in)
	(wait async-shell-command:pid)
	(setq async-shell-command:pid ())
	(setq async-shell-command:in ())
    ))

    s (progn 
      (if (not async-shell-command:pid)
	(setq async-shell-command:pid (system (getenv "SHELL") :input 
	    'async-shell-command:in
      )))
      (PF async-shell-command:in "%0\n" s)
      (if (= 0 (mod async-shell-command:count async-shell-command:pack))
	(flush async-shell-command:in)
      )
      (incf async-shell-command:count)
    )

    (flush async-shell-command:in)
  )
)

(setq repair-re (regcomp "^_([0-9]+)$"))
(defun do-repair ()
  (verbose? "Recovering files...")
  (dolist (file (directory))
    (if (regexec repair-re file)
      (do-repair-file file)
  ))
  (async-shell-command :exit)
)
;; returns the new name
(defun do-repair-file (file &optional immediate)
  (with (new-name (find-free-filename (Int (regsub repair-re 1))))
    (verbose? "renaming %0 to %1" file new-name)
    (if immediate
      (wait (system (list "mv" file new-name)))
      (move-file file new-name)
    )
    new-name
))

;skip existing names until we find a free name
(defun find-free-filename (n)
  (catch 'Found
    (while t
      (if (file-stats (String n))
	(incf n)
	(throw 'Found (String n))
))))

(defun Mail-num-or-id (mail)
  (if (Mail-num mail) (Mail-num mail)
    (Mail-id mail)
))

;;============================================================================
;;Computing uniq mails. If same body, keep the one with the biggest header
(defvar mail-bodies (Hashtable ()))     ;p-list of bodies/ids-list

(defun remove-duplicates (&aux body l m (n 0)
  )
  (verbose? "= Removing duplicates")
  (dolist (mail mails)
    (setq body (read-mail-body mail))
    (if (setq l (getn mail-bodies body))
      (lappend l (Mail-id mail))
      (put mail-bodies body (list (Mail-id mail)))
  ))
  (dohash (body ids mail-bodies)
    (when (> (length ids) 1)
       (sort ids compare-id-headers)
       (lreverse ids :in-place t)
       (verbose? "Removing duplicates of: %0" ids)
       (dolist (id (subseq ids 1))
         (setq m (getn mail-ids id))
         (delete-item-eq mails m)
         (delete mail-ids id)
	 (incf n)
         (remove-file (String (Mail-num m)))
  )))
  (verbose? "= Removed duplicates: %0" n)
)

(defvar re-end-space (regcomp "([ \t\n\r]+)$")))

(defun read-mail-body (mail &aux
    (body "") (fbody (open (String (Mail-num mail)))) (fp 0) (fh 0) f2 head
  )
  (while (/= "" (read-line fbody "")) (setq fh (file-position fbody)))
  (setq fp (file-position fbody)) 
  (while (= "" (read-line fbody ())) (setq fp (file-position fbody)))
  (file-position fbody fp)
  (setq body (String fbody))
  (if (regexec re-end-space body)
    (setq body (subseq body 0 (0 (get re-end-space 1))))
  )
  body
)

(defun compare-id-headers (id1 id2 &aux 
    (m1 (getn mail-ids id1)) (m2 (getn mail-ids id2))
    f1 f2 p1 p2
  )
  (if m1
    (if m2 (progn
      )
      (if m1 1 0)
    )
    (if m2 -1 0)
  )
  (setq f1 (open (String (Mail-num m1))))
  (setq f2 (open (String (Mail-num m2))))
  (while (/= "" (read-line f1 "")) (setq p1 (file-position f1)))
  (while (/= "" (read-line f2 "")) (setq p2 (file-position f2)))
  (compare p1 p2)
)

;;============================================================================
;;for sorting inside threads, try pushing subnodes after simple nodes
(defun sort-thread (thread &aux
    (thread-num (seekq threads thread))
  )
  (dolist (mail thread)			;init things
    (Mail-cache mail (vector))
  )
  (dolist (mail thread)			;compute dists
    (dolist (m thread) (Mail-dist m 0))
    (Mail-dist mail 4)			;to stop recursion here
    (compute-dists mail)		;forward push computations
    (Mail-dist mail 0)			;reput value
    (dolist (m thread)			;stores value in cache
      (put (Mail-cache m) (Mail-id mail) (Mail-dist m))
    )    
  )
  (sort thread compare-mail-in-thread)	;sort
  (if (not DEBUG)
    (dolist (mail thread)			;clean things
      (Mail-cache mail ())
  ))
)

;; sort, using the 2D graph distance function cached previously
(defun compare-mail-in-thread (m1 m2 &aux res)
  (if
    (= 0 (setq res
	(compare (getn (Mail-cache m2) (Mail-id m1))
	  (getn (Mail-cache m1) (Mail-id m2))
    )))
    (if
      (= 0 (setq res (compare (Mail-date m1) (Mail-date m2))))
      (compare (Mail-id m1) (Mail-id m2))
      res
    )
    res
  )
)
  
;; forwars explortion of distnaces in graphs, distance is the minimum of 
;; dependence weight found on a path
(defun compute-dists (mail)
  (if (Mail-ref mail)
    (push-dist (mailid (Mail-ref mail)) (min 3 (Mail-dist mail)))
  )
  (dolist (id (Mail-refs mail))
    (push-dist (mailid id) (min 2 (Mail-dist mail)))
  )
  (if (Mail-re mail)
    (push-dist (mailid (Mail-re mail)) (min 1 (Mail-dist mail)))
  )
)

;; stays in global var thread-num
(defun push-dist (mail dist)
  (if (= thread-num (Mail-thread mail))
    (if (> dist (Mail-dist mail)) (progn
	(Mail-dist mail dist)
	(compute-dists mail)
  )))
)

;; test code: verify that our comparison function is a complete order
;; we test compare-mail-in-thread
(defun Debug:test-order (&aux res res2 res3)
  (PF "Testing comparison function\n")
  (dolist (thread threads)
    (when thread
      (dolist (m1 thread)
	(dolist (m2 thread)
	  (if (eq m1 m2)
	    (if (/= 0 (setq res (compare-mail-in-thread m1 m2)))
	      (PF "COMPARE ERROR: same mail, result not 0: %0\n" m1)
	    )
	    (if (= 0 (setq res (compare-mail-in-thread m1 m2)))
	      (PF "COMPARE ERROR: diff mails, result 0: %0 %1\n" m1 m2)
	      (and (< res 0) (>= 0 (compare-mail-in-thread m2 m1)))
	       (PF "COMPARE ERROR: not commutative: %0 %1\n" m1 m2)
	      (and (> res 0) (<= 0 (compare-mail-in-thread m2 m1)))
	       (PF "COMPARE ERROR: not commutative: %0 %1\n" m1 m2)
	    )
	  )
	  (dolist (m3 thread)
	    (setq res2 (compare-mail-in-thread m2 m3))
	    (if (and (> res 0) (> res2 0))
	      (if (<= (setq res3 (compare-mail-in-thread m1 m3)) 0)
		(PF "COMPARE ERROR: transitivity error %0 < %1 < %2, but %0 >= %2\n" 
		  (Mail-new-num m1) (Mail-new-num m2) (Mail-new-num m3)
)))))))))


	;; verify correctness of transitivity in sort results
;;	(dolist (m2 (subseq thread (+ 1 (seekq thread m1))))
;;	  (if (>= (setq res (compare-mail-in-thread m1 m2)) 0)
;;	    (PF "COMPARE ERROR: transitivity error 1 %0[%3] %1[%4] = %2\n" 
;;	      (Mail-new-num m1) (Mail-new-num m2) res
;;	      (getn (Mail-cache m1) (Mail-id m2))
;;	      (getn (Mail-cache m2) (Mail-id m1))
;;	    )
;;	  )
;;;;	  (if (<= (setq res (compare-mail-in-thread m2 m1)) 0)
;;	    (PF "COMPARE ERROR: transitivity error 2 %1[%4] %0[%3] = %2\n"
;;	      (Mail-new-num m1) (Mail-new-num m2) res
;;	      (getn (Mail-cache m1) (Mail-id m2))
;;	      (getn (Mail-cache m2) (Mail-id m1))
;;	    )
;;	  )
)))))

(defun id2num (id &aux 
    (m (if id (getn mail-ids id) ()))
    (num (Mail-num m))
  )
  (if num num
    (if id id "")
  )
)

;; pretty-print of mails
(defun PVM (mail &aux)
  (PF "/|Mail %0 [%6]: %1\n||  Id: %2\n||  Ref: %3\n||  Refs: %4\n\\|  Re: %5\n"
    (Mail-num mail)
    (Mail-subject mail)
    (Mail-id mail)
    (id2num (Mail-ref mail))
    (with (s (copy ""))
      (dolist (id (Mail-refs mail)) 
        (nconc s (String (id2num id)) " "))
      s
    )
    (id2num (Mail-re mail))
    (if (Mail-thread mail) (Mail-thread mail) "")
))

(defmacro PFq (&rest args)
  `(if (not quiet) (PF ,@args))
)

(main)

;;; EMACS MODES
;;; Local Variables: ***
;;; mode:lisp ***
;;; End: ***
