$OpenBSD: patch-lib_pyzor_client_py,v 1.1.1.1 2008/09/02 12:33:17 jasper Exp $

- Allow pyzor to scan mailboxes. From Debian.
- Handle unknown encodings. Even binary parts are handled now.
  Treat empty type as 'text'. From FreeBSD.

--- lib/pyzor/client.py.orig	Tue Sep  2 12:48:16 2008
+++ lib/pyzor/client.py	Tue Sep  2 12:52:33 2008
@@ -8,6 +8,7 @@ import cStringIO
 import getopt
 import tempfile
 import mimetools
+import multifile
 import sha
 
 import pyzor
@@ -58,11 +59,6 @@ class Client(object):
         self.send(msg, address)
         return self.read_response(msg.get_thread())
 
-    def shutdown(self, address):
-        msg = ShutdownRequest()
-        self.send(msg, address)
-        return self.read_response(msg.get_thread())
-
     def build_socket(self):
         self.socket = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
 
@@ -132,39 +128,50 @@ class ExecCall(object):
 
     def run(self):
         debug = 0
-        (options, args) = getopt.getopt(sys.argv[1:], 'dh:', ['homedir='])
-        if len(args) < 1:
-           self.usage()
-
         specified_homedir = None
+        options = None
+        log = None
+        
+        try:
+            (options, args) = getopt.getopt(sys.argv[1:], 'd', ['homedir=', 'log'])
+        except getopt.GetoptError:
+            self.usage()    
 
+        if len(args) < 1:
+            self.usage()
+
         for (o, v) in options:
             if o == '-d':
                 debug = 1
-            elif o == '-h':
-               self.usage()
             elif o == '--homedir':
                 specified_homedir = v
+            elif o == '--log':
+                log = 1
         
         self.output = Output(debug=debug)
-
         homedir = pyzor.get_homedir(specified_homedir)
-
+        
+        if log:
+            sys.stderr = open(homedir + "/pyzor.log", 'a')
+            sys.stderr.write("\npyzor[" + repr (os.getpid()) + "]:\n")
+        
         config = pyzor.Config(homedir)
         config.add_section('client')
 
-        defaults = {'ServersFile': 'servers',
+        defaults = {'ServersFile':        'servers',
                     'DiscoverServersURL': ServerList.inform_url,
-                    'AccountsFile' : 'accounts',
+                    'AccountsFile':       'accounts',
+                    'Timeout':            str(Client.timeout),
                     }
 
         for k, v in defaults.items():
             config.set('client', k, v)
-            
+        
         config.read(os.path.join(homedir, 'config'))
         
         servers_fn = config.get_filename('client', 'ServersFile')
-    
+        Client.timeout = config.getint('client', 'Timeout')
+        
         if not os.path.exists(homedir):
             os.mkdir(homedir)
 
@@ -197,10 +204,13 @@ class ExecCall(object):
     def usage(self, s=None):
         if s is not None:
             sys.stderr.write("%s\n" % s)
-        sys.stderr.write("""usage: %s [-d] [--homedir dir] command [cmd_opts]
+        sys.stderr.write("""
+usage: %s [-d] [--homedir dir] command [cmd_opts]
 command is one of: check, report, discover, ping, digest, predigest,
-                   genkey, shutdown
+                   genkey
+                   
 Data is read on standard input (stdin).
+
 """
                          % sys.argv[0])
         sys.exit(2)
@@ -208,9 +218,9 @@ Data is read on standard input (stdin).
 
 
     def ping(self, args):
-        getopt.getopt(args[1:], '')
-
-        if len(args) > 1:
+        try:
+            getopt.getopt(args[1:], '')
+        except getopt.GetoptError:
             self.usage("%s does not take any non-option arguments" % args[0])
 
         runner = ClientRunner(self.client.ping)
@@ -221,30 +231,23 @@ Data is read on standard input (stdin).
         return runner.all_ok
         
 
-    def shutdown(self, args):
-        (opts, args2) = getopt.getopt(args[1:], '')
-
-        if len(args2) > 1:
+    def info(self, args):
+        try:
+            (options, args2) = getopt.getopt(args[1:], '', ['mbox'])
+        except getopt.GetoptError:
             self.usage("%s does not take any non-option arguments" % args[0])
 
-        runner = ClientRunner(self.client.shutdown)
+        do_mbox = 'msg'
 
-        for arg in args2:
-            server = Address.from_str(arg)
-            runner.run(server, (server,))
-                    
-        return runner.all_ok
+        for (o, v) in options:
+            if o == '--mbox':
+                do_mbox = 'mbox'
 
-
-    def info(self, args):
-        getopt.getopt(args[1:], '')
-        
-        if len(args) > 1:
-            self.usage("%s does not take any non-option arguments" % args[0])
-
         runner = InfoClientRunner(self.client.info)
 
-        for digest in FileDigester(sys.stdin, self.digest_spec):
+        for digest in get_input_handler(sys.stdin, self.digest_spec, do_mbox):
+            if digest is None:
+                continue
             for server in self.servers:
                 response = runner.run(server, (digest, server))
 
@@ -252,34 +255,45 @@ Data is read on standard input (stdin).
 
 
     def check(self, args):
-        getopt.getopt(args[1:], '')
-
-        if len(args) > 1:
+        try:
+            (options, args2) = getopt.getopt(args[1:], '', ['mbox'])
+        except getopt.GetoptError:
             self.usage("%s does not take any non-option arguments" % args[0])
 
+        do_mbox = 'msg'
+
+        for (o, v) in options:
+            if o == '--mbox':
+                do_mbox = 'mbox'
+
         runner = CheckClientRunner(self.client.check)
 
-        for digest in FileDigester(sys.stdin, self.digest_spec):
+        for digest in get_input_handler(sys.stdin, self.digest_spec, do_mbox):
+            if digest is None:
+                continue
             for server in self.servers:
-                response = runner.run(server, (digest, server))
+                runner.run(server, (digest, server))
                 
         return (runner.found_hit and not runner.whitelisted)
 
 
     def report(self, args):
-        (options, args2) = getopt.getopt(args[1:], '', ['mbox'])
-        do_mbox = False
-
-        if len(args2) > 1:
+        try:
+            (options, args2) = getopt.getopt(args[1:], '', ['mbox'])
+        except getopt.GetoptError:
             self.usage("%s does not take any non-option arguments" % args[0])
 
+        do_mbox = 'msg'
+
         for (o, v) in options:
             if o == '--mbox':
-                do_mbox = True
+                do_mbox = 'mbox'
                 
         all_ok = True
 
-        for digest in FileDigester(sys.stdin, self.digest_spec, do_mbox):
+        for digest in get_input_handler(sys.stdin, self.digest_spec, do_mbox):
+            if digest is None:
+                continue
             if not self.send_digest(digest, self.digest_spec,
                                     self.client.report):
                 all_ok = False
@@ -302,20 +316,22 @@ Data is read on standard input (stdin).
 
 
     def whitelist(self, args):
-        (options, args2) = getopt.getopt(args[1:], '', ['mbox'])
-
-        if len(args2) > 1:
+        try:
+            (options, args2) = getopt.getopt(args[1:], '', ['mbox'])
+        except getopt.GetoptError:
             self.usage("%s does not take any non-option arguments" % args[0])
 
-        do_mbox = False
+        do_mbox = 'msg'
 
         for (o, v) in options:
             if o == '--mbox':
-                do_mbox = True
+                do_mbox = 'mbox'
                 
         all_ok = True
 
-        for digest in FileDigester(sys.stdin, self.digest_spec, do_mbox):
+        for digest in get_input_handler(sys.stdin, self.digest_spec, do_mbox):
+            if digest is None:
+                continue
             if not self.send_digest(digest, self.digest_spec,
                                     self.client.whitelist):
                 all_ok = False
@@ -324,28 +340,29 @@ Data is read on standard input (stdin).
 
 
     def digest(self, args):
-        (options, args2) = getopt.getopt(args[1:], '', ['mbox'])
-
-        if len(args2) > 1:
+        try:
+            (options, args2) = getopt.getopt(args[1:], '', ['mbox'])
+        except getopt.GetoptError:
             self.usage("%s does not take any non-option arguments" % args[0])
 
+        do_mbox = 'msg'
 
-        do_mbox = False
-
         for (o, v) in options:
             if o == '--mbox':
-                do_mbox = True
+                do_mbox = 'mbox'
                 
-        for digest in FileDigester(sys.stdin, self.digest_spec, do_mbox):
+        for digest in get_input_handler(sys.stdin, self.digest_spec, do_mbox):
+            if digest is None:
+                continue
             sys.stdout.write("%s\n" % digest)
 
         return True
 
 
     def print_digested(self, args):
-        getopt.getopt(args[1:], '')
-
-        if len(args) > 1:
+        try:
+            getopt.getopt(args[1:], '')
+        except getopt.GetoptError:
             self.usage("%s does not take any non-option arguments" % args[0])
 
         def loop():
@@ -358,9 +375,9 @@ Data is read on standard input (stdin).
         return True
 
     def genkey(self, args):
-        getopt.getopt(args[1:], '')
-
-        if len(args) > 1:
+        try:
+            getopt.getopt(args[1:], '')
+        except getopt.GetoptError:
             self.usage("%s does not take any non-option arguments" % args[0])
 
         import getpass
@@ -414,7 +431,6 @@ Data is read on standard input (stdin).
                   'report':    report,
                   'ping' :     ping,
                   'genkey':    genkey,
-                  'shutdown':  shutdown,
                   'info':      info,
                   'whitelist': whitelist,
                   'digest':    digest,
@@ -466,7 +482,7 @@ class DataDigester(object):
 
         (fp, offsets) = self.get_line_offsets(fp)
         
-        # did we get an empty file?
+        # did we get an empty (parsed output) file?
         if len(offsets) == 0:
             return
 
@@ -608,32 +624,38 @@ class PrintingDataDigester(DataDigester):
 
 
 
-class FileDigester(BasicIterator):
-    __slots__ = ['digester']
-
-    def __init__(self, fp, spec, mbox=False):
-        self.digester = iter(get_file_digester(fp, spec, mbox))
-        self.output = pyzor.Output()
-
-    def next(self):
-        digest = self.digester.next()
-        self.output.debug("calculated digest: %s" % digest)
-        return digest
-
-
-
-def get_file_digester(fp, spec, mbox, seekable=False):
+def get_input_handler(fp, spec, style='msg', seekable=False):
     """Return an object that can be iterated over
     to get all the digests from fp according to spec.
     mbox is a boolean"""
-    if mbox:
+    if style == 'msg':
+        return filter(lambda x: x is not None,
+                      (DataDigester(rfc822BodyCleaner(fp),
+                                    spec, seekable).get_digest(),)
+                      )
+
+    elif style =='mbox':
         return MailboxDigester(fp, spec)
 
-    return (DataDigester(rfc822BodyCleaner(fp),
-                         spec, seekable).get_digest(),)
+    elif style == 'digests':
+        return JustDigestsIterator(fp)
 
+    raise ValueError, "unknown input style"
 
 
+class JustDigestsIterator(BasicIterator):
+    __slots__ = ['fp']
+    
+    def __init__(self, fp):
+        self.fp = fp
+
+    def next(self):
+        l = fp.readline()
+        if not l:
+            raise StopIteration
+        return l.rstrip()
+
+
 class MailboxDigester(BasicIterator):
     __slots__ = ['mbox', 'digest_spec', 'seekable']
     
@@ -645,7 +667,12 @@ class MailboxDigester(BasicIterator):
         self.seekable    = seekable
 
     def next(self):
-        next_msg = self.mbox.next()
+        try:
+            next_msg = self.mbox.next()
+        except IOError:
+            print "Error: Please feed mailbox files in on stdin, i.e."
+            print "    pyzor digest --mbox < my_mbox_file"
+            next_msg = None
         if next_msg is None:
             raise StopIteration
         return DataDigester(next_msg, self.digest_spec,
@@ -662,39 +689,70 @@ class rfc822BodyCleaner(BasicIterator):
         self.multifile = None
         self.curfile   = None
 
+	# Check if we got a mail or not. Set type to binary if there is no 'From:' header and
+	# type text/plain with encoding 7bit. 7bit is passed trough anyway so nobody cares.
+	if (not msg.has_key("From") and self.type == 'text' and msg.subtype == 'plain' and msg.getencoding() == '7bit'):
+		self.type = 'binary';
+
+	if self.type is '':
+	    self.type = 'text';
+
         if self.type == 'text':
             encoding = msg.getencoding()
-            if encoding == '7bit':
-                self.curfile = msg.fp
-            else:
-                self.curfile = tempfile.TemporaryFile()
-                mimetools.decode(msg.fp, self.curfile, encoding)
-                self.curfile.seek(0)
-                
+	    self.curfile = msg.fp
+	    if encoding != '7bit':
+		# fix bad encoding name
+		if encoding == '8bits':
+		    encoding = '8bit'
+		try:
+		    newcurfile = tempfile.TemporaryFile()
+		    mimetools.decode(msg.fp, newcurfile, encoding)
+		    newcurfile.seek(0)
+		    self.curfile = newcurfile
+		except:
+		    # ignore encoding on errors, pass msg as is
+		    pass
+
         elif self.type == 'multipart':
             import multifile
             self.multifile = multifile.MultiFile(msg.fp, seekable=False)
             self.multifile.push(msg.getparam('boundary'))
-            self.multifile.next()
-            self.curfile = self.__class__(self.multifile)
+	    try:
+	       self.multifile.next()
+	       self.curfile = self.__class__(self.multifile)
+	    except:
+	       #
+	       # Catch multipart decoding errors
+	       #
+	       fp.seek(0)
+	       self.curfile = fp
+	       self.type = 'binary'
 
 
         if self.type == 'text' or self.type == 'multipart':
             assert self.curfile is not None
+        elif self.type == 'binary':
+	    try:
+	 	fp.seek(0)
+	    except:
+	    	pass
+	    self.curfile = fp
         else:
             assert self.curfile is None
 
         
     def readline(self):
         l = ''
-        if self.type in ('text', 'multipart'):
-            l = self.curfile.readline()
-
-        if self.type == 'multipart' and not l and self.multifile.next():
-            self.curfile = self.__class__(self.multifile)
-            # recursion.  Could get messy if
-            # we get a bunch of empty multifile parts
-            l = self.readline()
+	try:
+		if self.type in ('text', 'multipart', 'binary'):
+		    l = self.curfile.readline()
+		if self.type == 'multipart' and not l and self.multifile.next():
+		    self.curfile = self.__class__(self.multifile)
+		    # recursion.  Could get messy if
+		    # we get a bunch of empty multifile parts
+		    l = self.readline()
+	except (TypeError, multifile.Error):
+		pass
         return l
 
 
