#!/usr/local/bin/klone

(stack-dump-on-error t)

(setq users (getopts
    "ftp-list-news [options] users...
lists new files in the anonymous ftp site \"site\", expanding the body of README
files, and mailing the output (if non empty) to users...
if site not given, taken in env var SITE_HOST

The oprating mode is to set for each site XXX in this dir a file:
    XXX.ftp    which will hold the parameters to ftp-list-news and list of users
then
    XXX.llr    will hold the latest full ls -lR listing
    XXX.new    will be the last daily message sent to registered users
in the XXX.ftp file you can use the following options:
WARNING: in XXX.ftp options cannot have imbedded blanks, and you must *not* 
quote them by enclosing quotes, e.g:
  -x \\\"not wanted\\\" cannot be specified
  -x \\\"not_wanted\\\" will not work
  -x not_wanted will work
"
    ("-host" internet-adress site "the host internet adress of the ftp site")
    ("-new" () site:new
      "create a new script to monitor a new site"
    )
    ("-dir" directory site:dir
      "the directory to list on the site, defaults to /"
    )
    ("-name" descriptive-name site:name
      "the name of the site to be used in mails and 
saved files. defaults to the env variable SITE_NAME or site name"
    )
    ("-ls-lR" filename site:index
      "the name of the index files on the remote host if 
present. If not, the ls -lR will be done by the script"
    )
    ("-ls-options" ls-options (ls-options "-lR")
      "the options to be used by ls on remote host. defaults to \"-lR\""
    )
    ("-subdir" subdirname site:subdirs
      "list only this subdir(s), not the main dir. do not work with -ls-lR"
      :multiple t
    )
    ("-lsp" postprocessor site:decomp
      "the unix command to apply on index file after getting it, file
given as argument, e.g.: -lsp /usr/bin/gnu/gunzip"
    )
    ("-nols" () site:nols
      "give this flag if the index is not in ls -lR format"
    )
    ("-cache" directory (dircache *current-directory*)
      "the place to store locally (cache) previous contents
of the site. Defaults to current directory"
    )
    ("-password" mail-adress site:password
      "the password to use to connect. defaults to user@"
    )
    ("-x" regexp site:excludeds
      "exclude files whose full pathname match regexp from listing
e.g.: \"^(.*[/])?ls-lt?R([.]Z|[.]gz)$\" to exclude all ls-l(t)R files"
      :multiple t
    )
    ("-X" pathname site:excludeds-exact
      "exclude files whose full pathname name is exactly equal to 
pathname from listing. e.g.: \"FILES\", \"pub/INDEX\""
      :multiple t
    )
    ("-Xf" pathname site:excludeds-exact-filename
      "exclude files whose name is equal is exactly equal to 
pathname in any directory "
      :multiple t
    )
    ("-cat" regexp readme-regexps
      "include the CONTENTS of the files whose full pathname
matches the regexp. By default, includes the regexp 
\"[rR][eE][aA][dD][mM][eE]$\""
      :multiple t
    )
    ("-readmes" file site:readmes
     "do not get list of new files, only get readmes
listed in file")
    ("-noreadmes" () site:noreadmes
     "only get list of new files, no readmes"
    )
    ("-all" () do-all "scans cache dir for all sites and query them")
    ("-v" () verbose "outputs some information, otherwise quite silent")
    ("-debug" () debug "debugging mode, very verbose")
))

(setq error-in-arguments "\n**************************************************
ftp-list-news must have the -host option at least!
type: \"ftp-list-news -?\" for help
**************************************************")

;; first, remove spurious quotes
(dolist (l (list site:excludeds site:excludeds-exact 
      site:excludeds-exact-filename readme-regexps
  ))
  (dotimes (i (length l)) 
    (with (s #[l i])
      (if (and (= #\" #[s 0]) (= #\" #[s -1])) ;quoted expr
	(put l i (read (Stream s)))
))))

(if debug (PV site:excludeds site:excludeds-exact 
      site:excludeds-exact-filename readme-regexps
))

(defun fatal-error (&rest args)
  (print-format "ftp-list-news ERROR!\n")
  (apply print-format args)
  (print-format "\n")
  (exit 1)
)

(if (not (setq username (getenv "USER")))
  (setq username (match "^([^ \t]+)[ \t]+" (String (sh:open who am i 2>/dev/null)) 1))
)
(if (or (not username) (= "" username)) (setq username "anybody"))

(if site:new 
  (with (*current-directory* dircache)
    (? "Please enter site internet address (or local directory): ")
    (setq host (read-line))
    (? "Please enter a name for this site: ")
    (setq name (read-line))
    (if (not (match "^[-a-zA-Z0-9_.]+$" name))
      (fatal-error "sorry, invalid file name: %0" name)
    )
    (if (file-stats (+ name ".ftp"))
      (fatal-error "sorry, file %0 already exists!" (+ name ".ftp"))
    )
    (? "Please enter the directory to monitor on this host [/]: ")
    (setq dir (read-line))
    (if (= "" dir) (setq dir "/"))
    (if (/= #\/ #[dir 0]) (setq dir (+ "/" dir)))

    (with (fd (open (+ name ".ftp") :direction :output))
      (print-format fd "-name %0\n-host %1\n-dir %2\n-X FILES
-x ^(.*[/])?ls-lt?R([.]Z|[.]gz)$\n\n%3\n"
	name host dir username)
    )
    (print-format 
      "\nOk, edit file %0.ftp 
to correct your mail address (%1) at the end
and tweak the options\n
You can run the program by
	./ftp-list-news `cat %0.ftp`
"       name username
    )
    (sh chmod a+rw ,(+ name ".ftp"))
    (exit 0)
))

(if (not site)
  (if do-all
    (setq site "")
    (if (not (setq site (getenv "SITE_HOST")))
      (fatal-error error-in-arguments)
)))


(if (and (not site:name) (not (setq site:name (getenv "SITE_NAME"))))
  (setq site:name site)
)
(if (and (not site:dir) (not (setq site:dir (getenv "SITE_DIR"))))
  (setq site:dir "/")
)

(if (not site:password) (setq site:password (+ username "@"))) 

;;=============================================================================
;;                    end of option parsing
;;=============================================================================

(setq localindex (+ site:name ".llr"))
(setq localindex-old (+ localindex ".old"))
(setq *current-directory* dircache)
(setq readme-prefix (+ "___" (String *current-process-id*) "___"))
(if debug (setq verbose t))
(if debug (kdb t))

(if site:excludeds-exact-filename (progn
    (if (not site:excludeds) (setq site:excludeds (copy site:excludeds)))
    (dolist (excluded-exact site:excludeds-exact-filename)
      (lappend site:excludeds (+ "^(.*[/])?"
	  (quote-string-for-regexp excluded-exact) "$")
      )
)))
(if site:excludeds-exact (progn
    (if (not site:excludeds) (setq site:excludeds (copy site:excludeds)))
    (dolist (excluded-exact site:excludeds-exact)
      (lappend site:excludeds (+ "^"
	  (quote-string-for-regexp excluded-exact) "$")
      )
)))

(dotimes (i (length site:excludeds))	;compile regexps
  (put site:excludeds i (regcomp (get site:excludeds i "^$")))
)
(setq re-line-to-name (regcomp " ([^ ]+)$"))

(defun main (&aux 
    fs
    message
    out
  )
  (if do-all (only-do-all))
  (if verbose (print-format "Listing site %0...\n" site:name))
  (setq message (+ site:name ".new"))
  (setq out (open message :direction :output :if-exists :supersede))
  (if site:readmes (only-list-readmes))
  (if (file-stats localindex) 
    (sh mv ,localindex ,localindex-old)
  )
  (if (= #\/ (getn site 0)) 
    (with (*current-directory* site) 
      (sh rm -f ,(+ dircache "/" localindex))
      (wait (system (list "ls" "-lR") :output 
	  (+ dircache "/" localindex) :error "/dev/null"))
    )
    (if site:subdirs
      (list-only-subdirs  site site:password site:dir site:subdirs ls-options
	localindex
      )
      (doftp site site:password site:dir 
	(if site:index (+ "get " site:index " " localindex)
	  (+ "ls " ls-options " " localindex)
  ))))
  (if (and site:decomp (file-stats localindex))
    (with (command (print-format String "%0 %1" site:decomp localindex))
      (wait (system command))
  ))
  (if debug (sh ls -l ,localindex))
  (if (and 
      (setq fs (file-stats localindex))
      (/= 0 (get fs 'size))
      (or (site:nols) (normalize-ls-lR localindex) t)
    )
    (look-for-news site site:password site:dir localindex localindex-old)
    (progn
      (if verbose
	(print-format out "ftp-list-news: could not get index file for %0 (%1)\n"
	  site:name site
      ))
      (if (file-stats localindex-old) 
	(sh mv ,localindex-old ,localindex)
	(sh rm -f ,localindex)
  )))
  (sh rm -f ,localindex-old)
  (mail-results out message users)
)

(defun look-for-news (site site:password site:dir localindex localindex-old 
    &aux
    (fd (open localindex :error ()))
    (fdold (open localindex-old :error ()))
    (oldlines (Hashtable ()))
    (lines (list))
    line
  )
  (if fdold
    (while (setq line (read-line fdold ()))
      (put oldlines line t)
  ))
  (if fd
    (if site:excludeds
      (while (setq line (read-line fd ()))
	(if (getn oldlines line) ()
	  (catch 'Excluded
	    (dolist (re-excluded site:excludeds)
	      (if (regexec re-excluded (match re-line-to-name line 1))
		(throw 'Excluded t)
	    ))
	    (lappend lines line)
      )))
      (while (setq line (read-line fd ()))
	(if (getn oldlines line) ()
	  (lappend lines line)		;new line
      ))
  ))
  (if debug (PV "look-for-news:" lines))
  (if lines
    (print-results site site:password site:dir lines)
  )  
)

(defun only-list-readmes  (&aux (lines (list)) (fd (open site:readmes)))
  (while (setq line (read-line fd ()))
    (lappend lines line))
  (if lines
    (print-results site site:password site:dir lines)
  )
  (exit 0)
)

(setq print-results:re-readme (regcomp "[rR][eE][aA][dD][mM][eE]$"))
(setq print-results:re-readme-list (list print-results:re-readme))
(dolist (expr readme-regexps)
  (lappend print-results:re-readme-list (regcomp expr))
)

(defun print-results (site site:password site:dir lines &aux
    filename
    (readmes (list))
    (i 0)
    (commands (list))
    fd
    s
    (re-filename (regcomp "([^ ]+)$"))
  )
  (dolist (line lines)
    (if (not site:readmes) (write-line line out))
    (dolist (re print-results:re-readme-list)
      (if (and (regexec re line)
	  (regexec re-filename line)
	)
	(lappend readmes (regsub re-filename 1))
  )))
  
  (if (and (not site:noreadmes) readmes) (progn
      (print-format out "\nREADMES:\n========\n")
      (dolist (filename readmes)
	(lappend commands (+ "get " filename " " readme-prefix (String i)))
	(incf i)
      )
      
      
      (if (= #\/ (getn site 0)) 
	(dolist	(com commands)		;local
	  (wait (system (+ "cp " site "/" (subseq com (length "get ")))))
	)
	(doftp site site:password site:dir commands)
      )
      
      (setq i 0)
      (dolist (filename readmes)
	(print-format out "\n%0 %1:\n" 
	  (make-string (- 77 (length filename)) #\*) filename
	)
	(if (setq fd (open (+ readme-prefix (String i)) :error ())) (progn
	    (while (setq l (read-line fd ()))
	      (write-line l out)
	    )
	    (setq fd ())
	  )
	  (print-format out "*** file %0 not found! ***\n" filename)
	)
	(incf i)
      )
      (sh rm -f ,(+ readme-prefix "*"))
)))))))))

(defun list-only-subdirs (site password dir subdirs ls-options localindex  &aux 
    (commands (copy ""))
    line
    (i 0)
    (fd-localindex (open localindex :direction :output :if-exists :supersede))
    ;; same as normalize-ls-lR
    (re-empty (regcomp 
	"^(([ \t]*)|(total [0-9]+)|([^ \t]+[ \t]+unreadable))$"))
    (re-dir (regcomp "^([.][/])?([^ \t]*[^:])[:]?$"))
    (re-entry (regcomp (+
	  "[dbclps-][r-][w-][xstST-][r-][w-][xstST-][r-][w-][xstST-]" ;mode
	  " +[0-9]+"			;links
	  " +[0-9a-zA-Z_-]+"		;user
	  "( +[0-9a-zA-Z_-]+)?"		;group (optional) 1
	  " +([0-9]+)"			;size 2
	  " +([^ ]+ +[^ ]+ +[^ ]+)"	;date 3
	  " +(.+)$"			;name 4
    )))
  )
  (dolist (subdir subdirs)
    (nconc commands (PF String "cd %0\nls %1 %2\ncd ..\n"
	subdir ls-options (+ localindex "-" (String i))
    ))
    (incf i)
  )
  (doftp site password dir commands)
  (if debug (system (+ "ls -l " localindex "*")))
  (setq i 0)
  (dolist (subdir subdirs)
    (with (fd (open (+ localindex "-" (String i)) :error ()))
      (when fd 
	(while (setq line (read-line fd ()))
	  (if (regexec re-empty line)
	    ()
	    
	    (= line ".:")
	    (PF fd-localindex "./%0:\n" subdir)
	    
	    (regexec re-dir line)
	    (PF fd-localindex "./%0/%1:\n" subdir (regsub re-dir 2))
	    
	    (regexec re-entry line)
	    (write-line line fd-localindex)
	))
	(close fd)
    ))
    (if debug (progn
	(flush fd-localindex)
	(PF "[%1]%0 : " subdir i) (flush *stdout*)
	(sh ls -l ,(+ localindex "-" (String i)))
	(sh mv ,(+ localindex "-" (String i)) /tmp)
      )
      (sh rm -f ,(+ localindex "-" (String i)))
    )
    (incf i)
  )
  (flush fd-localindex)
  (if debug (sh cp ,localindex /tmp))
)

(defun normalize-ls-lR (file &aux
    (fd (open file))
    (lines (list))
    (dir "")
    line
    (re-empty (regcomp 
	"^(([.]:?)|([ \t]*)|(total [0-9]+)|([^ \t]+[ \t]+unreadable))$"))
    (re-dir (regcomp "^([.][/])?([^:]*)[:]?$"))
    (re-dir2 (regcomp "^([.][/])?(.*)[:]$"))
    (re-entry (regcomp (+
	  "[dbclps-][r-][w-][xstST-][r-][w-][xstST-][r-][w-][xstST-]" ;mode
	  " +[0-9]+"			;links
	  " +[0-9a-zA-Z_-]+"		;user
	  "( +[0-9a-zA-Z_-]+)?"		;group (optional) 1
	  " +([0-9]+)"			;size 2
	  " +([^ ]+ +[^ ]+ +[^ ]+)"	;date 3
	  " +(.+)$"			;name 4
    )))
  )
  (while (setq line (read-line fd ()))
    (if (regexec re-empty line)
      ()
      
      (regexec re-entry line)
      (if (seek "-l" (0 line))		;look only to symlinks and files
	(lappend lines (+ (normalize-date (regsub re-entry 3))
	    " " (normalize-size (Int (regsub re-entry 2))) " " 
	    (+ dir (if (= "" dir) "" "/")  (regsub re-entry 4))
	))
      )

      (regexec re-dir line) (setq dir (regsub re-dir 2))
      (regexec re-dir2 line) (setq dir (regsub re-dir2 2))
      
      (print-format out "unrecognized entry: \"%0\"\n" line)
    )
  )
  (close fd)
  (setq fd (open file :direction :output :if-exists :supersede))
  (dolist (line lines)
    (write-line line fd)
  )
  (flush fd)
  (close fd)
)

(setq normalize-date:re-date (regcomp "([A-Z][a-z][a-z]) +([0-9]+) +(.*)$"))
(setq normalize-date:re-time (regcomp "([0-9]+):([0-9])+"))
(setq normalize-date:date (cur-date))

(defun normalize-date (date &aux res
    year month day
    (mnum '("Jan" 1 "Feb" 2 "Mar" 3 "Apr" 4 "May" 5 "Jun" 6 "Jul" 7 "Aug" 8
	"Sep" 9 "Oct" 10 "Nov" 11 "Dec" 12
    ))
  )
  (if (regexec normalize-date:re-date date) (progn
      (setq day (regsub normalize-date:re-date 2))
      (setq month (regsub normalize-date:re-date 1))
      (if (regexec normalize-date:re-time (regsub normalize-date:re-date 3))
	(setq year (String
	    (if (<= (get mnum month 0) (get normalize-date:date 1))
		(get normalize-date:date 0)
		(- (get normalize-date:date 0) 1)
	    )
	))
	(setq year (regsub normalize-date:re-date 3))
      )
      (setq res (+ month (make-string (- 3 (length day))) day " "
	  (subseq year 2)
      ))
    )
    (setq res "??? ?? ??")
  )
  res      
)

(defun normalize-size (size &aux res
    (field-width 4)
  )
  (if (= size 0)
    (setq res (String size))
    (< size 1024000) 
    (setq res (+ (String (/ (+ 1023 size) 1024)) "k"))
    (< size 1048576)
    (setq res "1M")
    t
    (setq res (+ (String (/ size 1048576)) "M" 
	(subseq (String (/ (* (+ 1048576 (mod size 1048576)) 1000) 1048576)) 1)
    ))
  )
  
  (if (< (length res) field-width)
    (setq res (+ (make-string (- field-width (length res))) res))
    (> (length res) field-width)
    (setq res (subseq res 0 field-width))
  )
  res  
)

(defun doftp (site site:password site:dir &rest commands &aux
    fd
    pid
    (temp-file (+ "/tmp/ftp-list-news.coms." site))
  )
  (setq pid (system (+ "ftp -inv 2>&1 > " temp-file) :input 'fd))
  (print-format fd "open %0\n\nuser anonymous %1\nbinary\ncd %2\n"
    site
    site:password
    site:dir
  )
  (flush fd)
  (if debug (print-format fd "pwd\ndir\n"))
  (wait (system (list "chmod" "a+rw" temp-file)))
  (doftp:command commands fd)
  (print-format fd "quit\n")
  (flush fd)
  (wait pid)
)


(defun doftp:command (command fd)
  (if (typep command List)
    (dolist (com  command) (doftp:command com fd))
    (progn
      (if debug (PV com))
      (print-format fd "%0\n" com)
      (flush fd)
  ))
)

;; go to cache and perform a search for ALL sites listed there
(defun only-do-all (&aux 
    (pids (list))
  )
  (dolist (file (directory))
    (if (match "[.]ftp$" file) (progn
	(if verbose (PF "executing %0 `cat %1`\n" #[*arguments* 0] file))
	(lappend pids (system (+ #[*arguments* 0]
	      (if verbose " -v" "") (if debug " -debug" "")
	      " `cat " file "`")))
  )))
  (wait pids :blocking t)
  (exit 0)
)

(defun current-date-string (&aux
    (cd (cur-date))
    (months '("???" "Jan" "Feb" "Mar" "Apr" "May" "Jun" "Jul" "Aug" 
      "Sep" "Oct" "Nov" "Dec")
    )
  )
  (+ (String #[cd 2]) "-" #[months #[cd 1]] "-" (String #[cd 0]))
)

(defun mail-results (fd filename users &aux
    in
    body
  )
  (if users				;keep silent if there is nobody to mail
    (if (/= 0 (file-position fd)) (progn	;non-empty
	(setq body (open filename))
	(system (+ (list "/usr/lib/sendmail") users)
	  :nohup t :input 'in :output "/dev/null" :error "/dev/null")
	(print-format in "Subject: Listing of %0 ( %1:%2 ) on %3\n\n"
	  site:name site site:dir (current-date-string)
	)
	(setq s (print-format String "New files on ftp site %0 ( ftp://%1%2 ):"
	    site:name site site:dir
	))
	(print-format in "%0\n%1\n\n" s (make-string (length s) #\=))
	(close fd)
	(write-string (String body) in)
	(flush in)
	(close in)  
      )					;no news. print it but dont mail it
      (if verbose (print-format "NO NEWS for %0\n" filename))
)))


(main)

(if debug (PF "DONE\n"))

;;; EMACS MODES
;;; Local Variables: ***
;;; mode:lisp ***
;;; End: ***
