#!/usr/bin/env python
#
# reposurgeon - a repository surgeon.
#
# By ESR, October 2010.  BSD terms apply.
#
# Requires Python 2.7.2 or newer.
#
import sys, os, cmd, tempfile, subprocess, glob, hashlib, cProfile, cPickle
import re, signal, shutil, copy, shlex, collections, resource, uuid
import time, calendar, unittest
import email.message, email.parser, email.utils

# This import only works on Unixes.  The intention is to enable
# Ctrl-P, Ctrl-N, and friends in Cmd. 
try:
    import readline
except ImportError:
    pass

version="2.12"

#
# This code is intended to be hackable to support for special-purpose or
# custom operations, though it's even better if you can come up with a new
# surgical primitive general enough to ship with the stock version.  For
# either case, here's a guide to the architecture.
#
# The core classes are largely about deserializing and reserializing import
# streams.  In between these two operations the repo state lives in a
# fairly simple Python object, Repository. The main part of Repository
# is just a list of events - Commits, Blobs, Tags, Resets, and Passthroughs.
# These are straightforward representations of the command types in an
# import stream, with Passthrough as a way of losslessly conveying lines
# the parser does not recognize.
#
#  +-------------+    +---------+    +-------------+
#  | Deserialize |--->| Operate |--->| Reserialize |
#  +-------------+    +---------+    +-------------+
#
# The general theory of reposurgeon is: you deserialize, you do stuff
# to the event list that preserves correctness invariants, you
# reserialize.  The "do stuff" is mostly not in the core classes, but
# there is one major exception.  The primitive to delete a commit and
# shuffle its fileops forwards or backwards is seriously intertwined
# with the core classes and actually makes up almost 50% of Repository
# by line count.
#
# The rest of the surgical code lives outside the core classes. Most
# of it lives in the RepoSurgeon class (the command interpreter) or
# the RepositoryList class (which encapsulated name access to a list
# of repositories and also hosts surgical operations involving
# multiple repositories). A few bits, like the repository reader and
# builder, have enough logic that's independent of these
# classes to be factored out of it.
#
# In designing new commands for the interpreter, try hard to keep them
# orthogonal to the selection-set code. As often as possible, commands
# should all have a similar form with a (single) selection set argument.
#
# VCS is not a core class.  The code for manipulating actual repos is bolted
# on the the ends of the pipeline, like this:
#
#  +--------+    +-------------+    +---------+    +-----------+    +--------+
#  | Import |--->| Deserialize |--->| Operate |--->| Serialize |--->| Export |
#  +--------+    +-------------+ A  +---------+    +-----------+    +--------+
#       +-----------+            |
#       | Extractor |------------+
#       +-----------+
#
# The Import and Export boxes call methods in VCS.
#
# Extractor classes build the deserialized internal representation directly.
# Each extractor class is a set of VCS-specific methods to be used by the
# RepoStreamer driver class.
#

class VCS:
    "Class representing a version-control system."
    def __init__(self, name,
                 subdirectory,
                 exporter,
                 styleflags,
                 properties,
                 initializer,
                 lister,
                 importer,
                 checkout,
                 preserve,
                 authormap,
                 ignorename,
                 project,
                 notes):
        self.name = name
        self.subdirectory = subdirectory
        self.exporter = exporter
        self.styleflags = styleflags
        self.properties = properties
        self.initializer = initializer
        self.lister = lister
        self.importer = importer
        self.checkout = checkout
        self.preserve = preserve
        self.authormap = authormap
        self.ignorename = ignorename
        self.project = project
        self.notes = notes
    def __repr__(self):
        return "         Name: " + str(self.name) + "\n" \
             + " Subdirectory: " + str(self.subdirectory) + "\n" \
             + "     Exporter: " + str(self.exporter) + "\n" \
             + " Export-Style: {" + ",".join(list(self.styleflags)) + "}\n" \
             + "   Properties: " + repr(self.properties) + "\n" \
             + "  Initializer: " + str(self.initializer) + "\n" \
             + "       Lister: " + str(self.lister) + "\n" \
             + "     Importer: " + str(self.importer) + "\n" \
             + "     Checkout: " + str(self.checkout) + "\n" \
             + "     Preserve: {" + ",".join(list(self.preserve)) + "}\n" \
             + "    Authormap: " + str(self.authormap) + "\n" \
             + "   Ignorename: " + str(self.ignorename) + "\n" \
             + "      Project: " + str(self.project) + "\n" \
             + "        Notes: " + str(self.notes) + "\n"

# Most knowledge about specific version-control systems lives in the
# following class list. Exception; there's a git-specific hook in the
# repo reader; also see the extractor classes; also see the dump method
# in the Blob() class.
# The members are, respectively:
#
# * Name of its characteristic subdirectory.
# * Command to export from the VCS to the interchange format
# * Export-style flags.
#     "nl-after-commit" = inserts an extra NL after each commit
#     "nl-after-comment" = inserts an extra NL after each comment
#     "export-progress" = exporter generates its own progress messages,
#                         no need for baton prompt.
# * Flag specifying whether it handles per-commit properties on import
# * Command to initialize a new repo
# * Command to import from the interchange format
# * Command to check out working copies of the repo files.
# * Default preserve set (e.g. config & hook files; parts can be directories).
# * Likely location for an importer to drop an authormap file
# * Command to list files under repository control.
#
# Note that some of the commands used here are plugins or extensions
# that are not part of the basic VCS. Thus these may fail when called;
# we need to be prepared to cope with that.
#
# %(tempfile)s in a command gets substituted with the name of a
# tempile that the calling code will know to read or write from as
# appropriate after the command is done.  If your exporter can simply
# dump to stdout, or your importer read from stdin, leave out the
# %(tempfile)s; reposurgeon will popen(3) the command, and it will
# actually be slightly faster (especially on large repos) because it
# won't have to wait for the tempfile I/O to complete.
#
# %(basename) is replaced with the basename of the repo directory.
#
vcstypes = [
    VCS(name="git",
        subdirectory=".git",
        exporter="git fast-export -M -C --signed-tags=verbatim --tag-of-filtered-object=drop --all",
        styleflags=set(["nl-after-commit"]),
        properties=False,
        initializer="git init",
        importer="git fast-import --quiet",
        checkout="git checkout",
        lister="git ls-files",
        preserve=set(['.git/config', '.git/hooks']),
        authormap=".git/cvs-authors",
        ignorename=".gitignore",
        project="http://git-scm.com/",
        notes="The authormap is not required, but will be used if present."),
    # 
    VCS(name="bzr",
        subdirectory=".bzr",
        exporter="bzr fast-export --no-plain %(basename)s",
        styleflags=set(["export-progress", "nl-after-comment"]),
        properties=True,
        initializer=None,
        lister=None,
        importer="bzr fast-import -",
        checkout="bzr checkout",
        preserve=set([]),
        authormap=None,
        project="http://bazaar.canonical.com/en/",
        ignorename=".bzrignore",
        notes="Requires the bzr-fast-import plugin."),
    # Export is tested and works; import is flaky.
    VCS(name="hg",
        subdirectory=".hg",
        exporter="hg-fast-export.py --marks /dev/null --mapping /dev/null --heads /dev/null --status /dev/null --repo .",
        styleflags=set(["nl-after-comment",
                        "nl-after-commit",
                        "export-progress"]),
        properties=False,
        initializer="hg init",
        lister="hg locate",
        importer="hg fastimport %(tempfile)s",
        checkout="hg checkout",
        preserve=set([".hg/hgrc"]),
        authormap=None,
        ignorename=".hgignore",
        project="http://mercurial.selenic.com/",
        notes="The hg export-import methods are not part of stock Mercurial."),
    # Styleflags may need tweaking for round-tripping 
    VCS(name="darcs",
        subdirectory="_darcs",
        exporter="darcs fastconvert export",
        styleflags=set([]),
        properties=False,
        initializer=None,
        lister="darcs show files",
        importer="darcs fastconvert import",
        checkout=None,
        preserve=set([]),
        authormap=None,
        ignorename="_darcs/prefs/boring",
        project="http://darcs.net/",
        notes="Assumes no boringfile preference has been set."),
    # Export is experimental and doesn't round-trip
    VCS(name="svn",
        subdirectory="locks",
        exporter="svnadmin dump .",
        styleflags=set(["export-progress"]),
        properties=False,
        initializer="svn create .",
        importer="svnadmin load .",
        checkout=None,
        lister=None,
        preserve=set(["hooks"]),
        authormap=None,
        ignorename=None,
        project="http://subversion.apache.org/",
        notes="Run from the repository, not a checkout directory."),
    VCS(name="cvs",
        subdirectory="CVS",
        exporter="cvsps --fast-export",
        styleflags=set([]),
        properties=False,
        initializer=None,
        importer="None",
        checkout=None,
        lister=None,
        preserve=set([]),
        authormap=None,
        ignorename=None,
        project="http://www.catb.org/~esr/cvsps/",
        notes="Requires cvsps at version 3.3 or later."),
    VCS(name="rcs",
        subdirectory="RCS",
        exporter="rcs-fast-export",
        styleflags=set([]),
        properties=False,
        initializer=None,
        importer="None",
        checkout=None,
        lister=None,
        preserve=set([]),
        authormap=None,
        ignorename=None,
        project="http://git.oblomov.eu/rcs-fast-export",
        notes="Requires rcs-fast-export."),
    ]

# How to write extractor classes:
#
# Clone one of the existing ones and mutate.  
#
# Significant fact: None of the get_* methods for extracting information about
# a revision is called until after checkout has been called on that revision.
#
# Most methods take a native revision ID as argument. The value and type of the
# ID don't matter to any of the code that will call the extractor, except that
# IDs must be hashable so they can be dictionary keys.
#
# The 'name', 'subdirectory', and 'visible' members must be set. The
# subdirectory member is how an extractor recognizes what repositories
# it can consume.  If the visible member is false, the 'read' command
# will ignore the existence of the extractor.
#
# The strings returned by get_committer() and get_authors() should look like
#
# J. Random User <random@foobar> 2011-11-29T10:13:32Z
#
# that is, a free text name followed by an email ID followed by a date.
# The date specification can be anything Attribution() can parse; in
# particular, RFC3339 dates are good, so are RFC822 (email) dates,
# and so is git's native integer-Unix-timestamp/timezone pairs.

class GitExtractor:
    "Repository extractor for the git version-control system."
    # Regardless of what revision and branch was current at start,
    # after the git extractor runs the head revision on the master branch
    # will be checked out.
    #
    # The git extractor does not attempt to recover N ops,
    # symbolic links, gitlinks, or directory fileops.
    #
    # To be streamed, a git repo must have <emphasis>local</emphasis>
    # refs to all branches - in particular, local tracking branches
    # corresponding to all remotes.
    #
    # Some of these limitations could be fixed, but the git extractor
    # is not intended to replace git-fast-export; it only exists as a
    # test for the generic RepoStreamer code and a model for future
    # extractors.
    def __init__(self):
        # These must be set for every extractor class
        self.name = "git-extractor"
        self.subdirectory = ".git"
        self.visible = False
        self.properties = False
        self.ignorename = ".gitignore"
        # These are internal
        self.revlist = []
        self.parents = {}
        self.header = {}
        self.meta = {}
        self.tags = []
        self.refs = {}
        self.baton = None
    def analyze(self, baton):
        "Analyze a git repository for streaming."
        self.baton = baton
        # Get the topologically-ordered list of revisions and parent hashes
        with popen_or_die("git log --all --topo-order --reverse --format='%H %P'") as fp:
            for line in fp:
                fields = line.strip().split()
                self.revlist.append(fields[0])
                self.parents[fields[0]] = fields[1:]
        self.baton.twirl()
        # Next, all other per-commit data except branch IDs
        with popen_or_die("git log --all --reverse --date=raw --format='%H|%cn <%ce> %cd|%an <%ae> %ad'") as fp:
            for line in fp:
                (h, ci, ai) = line.strip().split('|')
                self.meta[h] = {'ci':ci, 'ai':ai}
        # Next, find all refs
        for root, dirs, files in os.walk(".git/refs"):
            for leaf in files:
                assert dirs is not None  # Pacify pylint
                ref = os.path.join(root, leaf)
                with file(ref) as fp:
                    self.refs[ref[5:]] = fp.read().strip()
        self.baton.twirl()
        # Next, grab all tag objects.
        with popen_or_die("git tag -l") as fp:
            for line in fp:
                tag = line.strip()
                with popen_or_die("git rev-parse %s" % tag) as fp:
                    taghash = fp.read().strip()
                # Annotated tags are first-class objects with their
                # own hashes.  The hash of a lightweight tag is just
                # the commit it points to. Handle both cases.
                objecthash = taghash
                with popen_or_die("git cat-file -p %s" % tag) as fp:
                    comment = None
                    tagger = None
                    for line in fp:
                        line = line.strip()
                        if line.startswith("tagger "):
                            tagger = line[len("tagger "):]
                        elif line.startswith("object"):
                            objecthash = line.split()[1]
                        elif comment is None and not line:
                            comment = ""
                        elif type(comment) == type(""):
                            comment += line + "\n"
                            if objecthash != taghash:
                                # committish isn't a mark; we'll fix that later
                                self.tags.append(Tag(None,
                                                     name=tag,
                                                     tagger=Attribution(tagger),
                                                     comment=comment,
                                                     committish=objecthash))
                    self.refs["refs/tags/" + tag] = objecthash
        self.baton.twirl()
        # Color branches in the order the tips occur.  Emulate the
        # git-export order.
        refnames = self.refs.keys()
        refnames.sort(key=lambda name: self.revlist.index(self.refs[name]))
        for ref in refnames:
            self.__branch_color(self.refs[ref], ref)
        uncolored = [revision for revision in self.revlist if 'branch' not in self.meta[revision]]
        if uncolored:
            if verbose >= 1:
                raise Fatal("missing branch attribute for: %s" % uncolored)
            else:
                raise Fatal("some branches do not have local ref names.")
        self.baton.twirl()
    def __metadata(self, rev, fmt):
        with popen_or_die("git log -1 --format='%s' %s" % (fmt, rev)) as fp:
            return fp.read()[:-1]
    def __branch_color(self, rev, color):
        if rev.startswith("ref"):
            return
        while not 'branch' in self.meta[rev]:
            self.meta[rev]['branch'] = color
            parents = self.get_parents(rev)
            if not parents:
                break
            elif len(parents) == 1:
                # This case avoids blowing Python's stack by recursing
                # too deep on large repos.
                rev = parents[0]
            else:
                for parent in parents:
                    self.__branch_color(parent, color)
                break
    def pre_extract(self, repo):
        "Hook for any setup actions required before streaming."
        assert repo is not None  # Pacify pylint
    def post_extract(self, repo):
        for event in repo.commits():
            event.properties = collections.OrderedDict()
        os.system("git checkout --quiet master")
    def isclean(self):
        "Return True if repo has no unsaved changes."
        return not capture("git ls-files --modified")
    def get_revlist(self):
        "Return a list of commit ID strings in commit timestamp order."
        return self.revlist
    def get_taglist(self):
        "Return a list of tag name strings."
        return self.tags
    def get_resetlist(self):
        "Return a dictionary of reset names with revisions as values."
        return [item for item in self.refs.items() if "/tags/" not in item[0]]
    def checkout(self, rev, filemap):
        "Check the directory out to a specified revision."
        assert filemap is not None # pacify pylint
        os.system("git checkout --quiet %s" % rev)
        manifest = capture("git ls-files").split()
        return manifest
    def cleanup(self, rev, issued):
        "Cleanup after checkout."
        assert rev and (issued is not None) # Pacify pylint
    def get_parents(self, rev):
        "Return the list of commit IDs of a commit's parents."
        return self.parents[rev]
    def get_branch(self, rev):
        return self.meta[rev]['branch']
    def get_comment(self, rev):
        "Return a commit's change comment as a string."
        return self.__metadata(rev, "%B")
    def get_committer(self, rev):
        "Return the committer's ID/date as a string."
        return self.meta[rev]['ci']
    def get_authors(self, rev):
        "Return the author's name and email address as a string."
        return [self.meta[rev]['ai']]
    def get_properties(self, rev):
        "Return a list of properties for the commit."
        assert rev is not None # Pacify pylint
        return collections.OrderedDict()

# More extractors go here

extractors = [GitExtractor()]

verbose         = 0
DEBUG_SVNDUMP   = 2    # Debug Subversion dumping
DEBUG_TOPOLOGY  = 2    # Debug repo-extractor logic (coarse-grained)
DEBUG_EXTRACT   = 2    # Debug repo-extractor logic (fine-grained)
DEBUG_FILEMAP   = 3    # Debug building of filemaps
DEBUG_DELETE    = 3    # Debug canonicalization after deletes
DEBUG_IGNORES   = 3    # Debug ignore generation
DEBUG_SVNPARSE  = 4    # Lower-level Subversion parsing details
DEBUG_EMAILIN   = 4    # Debug event round-tripping through mailbox_{out|in} 
DEBUG_SHUFFLE   = 4    # Debug file and directory handling
DEBUG_COMMANDS  = 5    # Show commands as they are executed
DEBUG_UNITE     = 5    # Debug mark assignments in merging
DEBUG_LEXER     = 6    # Debug selection-language parsing
quiet = False

global_options = {}

def screenwidth():
    "Return the current width of the terminal window."
    with popen_or_die('stty size', 'r') as tp:
        return int(tp.read().split()[1])

def debug_enable(level):
    "Hook for debug filtering."
    return verbose >= level

def nuke(directory, legend):
    "Remove a (large) directory, with a progress indicator."
    with Baton(legend, enable=debug_enable(DEBUG_SHUFFLE)) as baton:
        for root, dirs, files in os.walk(directory, topdown=False):
            for name in files:
                os.remove(os.path.join(root, name))
                baton.twirl()
            for name in dirs:
                os.rmdir(os.path.join(root, name))
                baton.twirl()
    try:
        os.rmdir(directory)
    except OSError:
        pass

def rfc3339(t):
    "RFC3339 string from Unix time."
    return time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime(t))

def complain(msg):
    sys.stdout.flush()
    sys.stderr.write("reposurgeon: %s\n" % msg)

def announce(msg):
    sys.stdout.write("reposurgeon: %s\n" % msg)

class Baton:
    "Ship progress indications to stdout."
    def __init__(self, prompt, endmsg='done', enable=False):
        self.prompt = prompt
        self.endmsg = endmsg
        self.countfmt = None
        self.counter = 0
        if enable:
            self.stream = sys.stdout
        else:
            self.stream = None
        self.count = 0
        self.time = 0
    def __enter__(self):
        if self.stream:
            self.stream.write(self.prompt + "...")
            if os.isatty(self.stream.fileno()):
                self.stream.write(" \010")
            self.stream.flush()
        self.count = 0
        self.time = time.time()
        return self
    def startcounter(self, countfmt, initial=1):
        self.countfmt = countfmt
        self.counter = initial
    def bumpcounter(self):
        if self.stream is None:
            return
        if os.isatty(self.stream.fileno()):
            if self.countfmt:
                update = self.countfmt % self.counter
                self.stream.write(update + ("\010" * len(update)))
                self.stream.flush()
            else:
                self.twirl()
        self.counter = self.counter + 1
    def endcounter(self):
        if self.stream:
            w = len(self.countfmt % self.count)
            self.stream.write((" " * w) + ("\010" * w))
            self.stream.flush()
        self.countfmt = None
    def twirl(self, ch=None):
        "One twirl of the baton."
        if self.stream is None:
            return
        if os.isatty(self.stream.fileno()):
            if ch:
                self.stream.write(ch)
                self.stream.flush()
                return
            else:
                update = "-/|\\"[self.count % 4]
                self.stream.write(update + ("\010" * len(update)))
                self.stream.flush()
        self.count = self.count + 1
    def __exit__(self, extype, value_unused, traceback_unused):
        if extype == KeyboardInterrupt:
            self.endmsg = "interrupted"
        if extype == Fatal:
            self.endmsg = "aborted by error"
        if self.stream:
            self.stream.write("...(%2.2f sec) %s.\n" \
                              % (time.time() - self.time, self.endmsg))
        return False

class RepoSurgeonEmail(email.message.Message):
    "Specialized email message with a distinguishing starter."
    Divider = 78 * "-"
    def __init__(self, **kwargs):
        email.message.Message.__init__(self, **kwargs)        
        self.set_unixfrom(RepoSurgeonEmail.Divider)
    @staticmethod
    def readmsg(fp):
        msg = ''
        firstline = fp.readline()
        if not firstline:
            return None
        elif not firstline.startswith(RepoSurgeonEmail.Divider):
            msg = firstline
        while True:
            line = fp.readline()
            if not line:
                break
            if line.startswith(RepoSurgeonEmail.Divider):
                break
            msg += line
        return msg
    def __str__(self):
        return email.message.Message.__str__(self).replace("\n--", "\n.--")

class Date:
    "A time/date in UTC. Preserves the original TZ information and uses it to convert back when formatting."
    def __init__(self, text):
        "Recognize date formats that exporters or email programs might emit."
        # First, look for git's preferred format, which is a timestamp
        # in UTC followed by an offset to be used as a hint for what
        # timezone to display the date in when converting to other
        # formats
        text = text.strip()
        if re.match(r"[0-9]+\s*[+-][0-9]+$", text):
            (self.timestamp, self.orig_tz_string) = text.split()
            self.tz_offset = Date.secondsFromOffsetString(self.orig_tz_string)
            self.timestamp = int(self.timestamp)
            return
        # If that didn't work, look for an RFC822 date, which git also
        # accepts. Note, there could be edge cases that Python's parser
        # handles but git doesn't.
        try:
            dt = email.utils.parsedate_tz(text)
            self.tz_offset = dt[9]
            self.timestamp = int(calendar.timegm(dt) - self.tz_offset)
            self.orig_tz_string = text.split()[5]
            return
        except TypeError:
            # time.mktime throws this when it gets None:
            # TypeError: argument must be 9-item sequence, not None
            pass
        # Also accept RFC3339 dates in Zulu time, just because I like them.
        try:
            # Discard subsecond precision, import-stream format can't use it.
            text = re.sub(r"\.[0-9]+Z", "Z", text)
            rfc3339date = time.strptime(text, "%Y-%m-%dT%H:%M:%SZ")
            self.timestamp = calendar.timegm(rfc3339date)
            self.orig_tz_string = "+0000"
            self.tz_offset = 0
            return
        except ValueError:
            # time.strptime() throws this
            # "time data 'xxxxxx' does not match format '%Y-%m-%dT%H:%M:%S'" 
            pass
        # Date format not recognized
        raise Fatal("'%s' is not a valid timestamp" % text)
    @staticmethod
    def secondsFromOffsetString(text):
        m = re.match(r"^([-+]?)([0-9]{2})([0-9]{2})$", text)
        if m is not None:
            sign = -1 if m.group(1) == "-" else 1
            hours = int(m.group(2))
            mins = int(m.group(3))
            if hours < -14 or hours > 13 or mins > 59:
                complain("dubious UTC offset '%s'." % text)
            return (hours * 60 + mins) * 60 * sign
        else:
            complain("invalid UTC offset '%s', assuming +0000 instead." % text)
            return 0
    def rfc3339(self):
        return rfc3339(self.timestamp)
    def rfc822(self):
        "Format as an RFC822 timestamp."
        return time.strftime("%a %d %b %Y %H:%M:%S", time.gmtime(self.timestamp + self.tz_offset)) + " " + self.orig_tz_string
    def delta(self, other):
        return other.timestamp - self.timestamp
    def __str__(self):
        "Format as a git timestamp."
        return str(self.timestamp) + " " + self.orig_tz_string
    def __cmp__(self, other):
        return cmp(self.timestamp, other.timestamp)

class DateTests(unittest.TestCase):
    def test_conversion(self):
        def do_test(init, formats):
            date = Date(init)
            for (func, result) in formats.items():
                self.assertEqual(getattr(date, func)(), result)
        data = [['2010-10-27T18:43:32Z',
                 { 'rfc3339': "2010-10-27T18:43:32Z",
                   'rfc822': "Wed 27 Oct 2010 18:43:32 +0000",
                   '__str__': "1288205012 +0000" }],
                ['1288205012 +0000',
                 { 'rfc3339': "2010-10-27T18:43:32Z",
                   'rfc822': "Wed 27 Oct 2010 18:43:32 +0000",
                   '__str__': "1288205012 +0000" }],
                ['Wed 27 Oct 2010 18:43:32 +0000',
                 { 'rfc3339': "2010-10-27T18:43:32Z",
                   'rfc822': "Wed 27 Oct 2010 18:43:32 +0000",
                   '__str__': "1288205012 +0000" }]]
        for init, formats in data:
            do_test(init, formats)
    def test_equality(self):
        d1 = Date('2010-10-27T18:43:32Z')
        d2 = Date('1288205012 +0000')
        d3 = Date('Wed 27 Oct 2010 18:43:32 +0000')
        self.assertEqual(d1, d1)
        self.assertEqual(d1, d2)
        self.assertEqual(d1, d3)
        self.assertEqual(d2, d2)
        self.assertEqual(d2, d3)
        self.assertEqual(d3, d3)
    def test_inequality(self):
        d1 = Date('Wed 27 Oct 2010 18:43:32 +0000')
        d2 = Date('Wed 27 Oct 2010 18:43:33 +0000')
        d3 = Date('Wed 27 Oct 2010 18:43:32 +0100')
        self.assertNotEqual(d1, d2)
        self.assertTrue(d1 < d2)
        self.assertTrue(d2 > d1)
        self.assertNotEqual(d1, d3)
        self.assertTrue(d1 > d3)
        self.assertTrue(d3 < d1)
        self.assertNotEqual(d2, d3)
        self.assertTrue(d2 > d3)
        self.assertTrue(d3 < d2)
        d1 = Date('2010-10-27T18:43:32Z')
        d2 = Date('2010-10-27T18:43:33Z')
        self.assertNotEqual(d1, d2)
        self.assertTrue(d1 < d2)
        self.assertTrue(d2 > d1)
        d1 = Date('1288205012 +0000')
        d2 = Date('1288205013 +0000')
        self.assertNotEqual(d1, d2)
        self.assertTrue(d1 < d2)
        self.assertTrue(d2 > d1)
    def test_deltas(self):
        d1 = Date('Wed 27 Oct 2010 18:43:32 +0000')
        d2 = Date('Wed 27 Oct 2010 18:43:33 +0000')
        d3 = Date('Wed 27 Oct 2010 18:43:32 +0100')
        self.assertEqual(d1.delta(d2), 1)
        self.assertEqual(d2.delta(d3), -3601)
        self.assertEqual(d3.delta(d1), 3600)
        self.assertEqual(d1.delta(d1), 0)

class Attribution:
    "Represents an attribution of a repo action to a person and time."
    def __init__(self, person=None):
        self.name = self.email = self.date = None
        if person:
            # Deal with a cvs2svn artifact
            person = person.replace("(no author)", "no-author")
            # First, validity-check the email address
            (self.name, self.email) = email.utils.parseaddr(person)
            if not self.email:
                raise Fatal("can't recognize address in attribution %s" % person)
            # Attribution format is actually stricter than RFC822;
            # needs to have a following date in the right place.
            person = person.replace(" <", "|").replace("> ", "|")
            try:
                self.date = Date(person.strip().split("|")[2])
            except (ValueError, IndexError):
                raise Fatal("malformed attribution date in %s" % person)
    def email_out(self, _modifiers, msg, hdr):
        "Update an RC822 message object with a representation of this."
        msg[hdr] = self.name + " <" + self.email + ">"
        msg[hdr + "-Date"] = self.date.rfc822()
    def remap(self, authors):
        "Remap the attribution name."
        for (local, (name, mail, timezone)) in authors.items():
            if self.email.startswith(local + "@") or self.email == local:
                self.name = name
                self.email = mail
                if timezone:
                    self.date.orig_tz_string = timezone
                break
    def action_stamp(self):
        return self.date.rfc3339() + "!" + self.email
    def __eq__(self, other):
        "Compare attributions after canonicalization."
        return (self.name == other.name
                and self.email == other.email
                and self.date == other.date)
    def who(self):
        return self.name + " <" + self.email + ">"
    def __str__(self):
        return self.name + " <" + self.email + "> " + str(self.date)

class Blob:
    "Represent a detached blob of data referenced by a mark."
    def __init__(self, repo=None):
        self.repo = repo
        self.mark = None
        self.path = None      # First in-repo path associated with this blob
        self.colors = []
        self.cookie = None
    def blobfile(self):
        "File where the content lives."
        return self.repo.subdir() + "/blob-" + repr(id(self))
    def content(self):
        "Content of the blob as a string."
        with open(self.blobfile()) as dp:
            return dp.read()
    def moveto(self, repo):
        "Change the repo this blob is associated with."
        oldloc = self.blobfile()
        self.repo = repo
        newloc = self.blobfile()
        if debug_enable(DEBUG_SHUFFLE):
            announce("blob rename calls os.rename(%s, %s)" % (oldloc, newloc))
        os.rename(oldloc, newloc)
        return self
    def clone(self, repo):
        "Clone a copy of this blob, pointing at the same file."
        c = copy.copy(self)
        c.repo = repo
        c.colors = []
        if debug_enable(DEBUG_SHUFFLE):
            announce("blob clone for %s (%s) calls os.link()" % (self.mark, self.path))
        os.link(self.blobfile(), c.blobfile())
        return c
    def dump(self, vcs=None):
        if not os.path.exists(self.blobfile()):
            return ''
        else:
            if vcs is None and self.repo.vcs and self.repo.vcs.importer:
                vcs = self.repo.vcs
            content = self.content()
            # Ugh.  This is where we mess with ignore syntax translation
            if vcs and self.repo.vcs and self.path and self.path.endswith(".gitignore"):
                if vcs.name == "hg" and self.repo.vcs.name != "hg":
                    if not content.startswith("syntax: glob\n"):
                        content = "syntax: glob\n" + content
            return "blob\nmark %s\ndata %d\n%s\n" % (self.mark, len(content), content)
    def __str__(self):
        return self.dump()

class Tag:
    "Represents an annotated tag."
    def __init__(self, repo=None,
                 name=None, committish=None, tagger=None, comment=None):
        self.repo = repo
        self.name = name
        self.committish = committish
        self.tagger = tagger
        self.comment = comment
        self.color = None
    def index(self):
        "Our 0-origin index in our repo."
        return self.repo.index(self)
    def id_me(self):
        "ID this tag for humans."
        return "tag@%d (%s)" % (self.index()+1, self.name)
    def email_out(self, modifiers, eventnum):
        "Enable do_mailbox_out() to report these."
        msg = RepoSurgeonEmail()
        msg["Event-Number"] = str(eventnum+1)
        msg["Tag-Name"] = self.name
        if self.tagger:
            self.tagger.email_out(modifiers, msg, "Tagger")
        msg.set_payload(self.comment)
        if not self.comment.endswith("\n"):
            complain("in tag %s, comment was not LF-terminated." % self.name)
        return str(msg)
    def email_in(self, msg):
        "Update this Tag from a parsed email message."
        if "Tag-Name" not in msg or "Tagger" not in msg:
            raise Fatal("update to tag %s is malformed" % self.name)
        modified = False
        newname = msg["Tag-Name"]
        if self.name != newname:
            if debug_enable(DEBUG_EMAILIN):
                announce("in tag %d, Tag-Name is modified %s -> %s" \
                      % (int(msg["Event-Number"]), repr(self.name), repr(newname)))
            self.name = newname
            modified = True
        if "Tagger" in msg:
            (newname, newemail) = email.utils.parseaddr(msg["Tagger"])
            if not newname or not newemail:
                raise Fatal("can't recognize address in Tagger: %s" % msg['Tagger'])
            else:
                if self.tagger.name != newname or self.tagger.email != newemail:
                    (self.tagger.name, self.tagger.email) = (newname, newemail)
                    if debug_enable(DEBUG_EMAILIN):
                        announce("in tag %d, Tagger is modified" \
                              % (int(msg["Event-Number"])))
                    modified = True
            if "Tagger-Date" in msg:
                date = Date(msg["Tagger-Date"])
                if self.tagger.date is None or date != self.tagger.date:
                    # Yes, display this unconditionally
                    if self.repo:
                        announce("in %s, Tagger-Date is modified '%s' -> '%s' (delta %d)" \
                             % (self.id_me(),
                                self.tagger.date, date,
                                self.tagger.date.delta(date)))
                    self.tagger.date = date
                    modified = True
        newcomment = msg.get_payload()
        if global_options["canonicalize"]:
            newcomment = newcomment.strip() + '\n'
        if newcomment != self.comment:
            if debug_enable(DEBUG_EMAILIN):
                announce("in tag %d, comment is modified %s -> %s" \
                      % (int(msg["Event-Number"]), repr(self.comment), repr(newcomment)))
            modified = True
            self.comment = newcomment
        return modified
    def dump(self, _vcs=None):
        "Dump this tag in import-stream format."
        st = "tag %s\nfrom %s\n" % (self.name, self.committish)
        if self.tagger:
            st += "tagger %s\n" % self.tagger
        st += "data %d\n%s\n" % (len(self.comment), self.comment,)
        return st
    def __str__(self):
        return self.dump()

class Reset:
    "Represents a branch creation."
    def __init__(self):
        self.ref = None
        self.committish = None
    def dump(self, _vcs=None):
        "Dump this reset in import-stream format."
        st = "reset %s\n" % self.ref
        if self.committish:
            st += "from %s\n\n" % self.committish
        return st
    def __str__(self):
        return self.dump()

class FileOp:
    "Represent a primitive operation on a file."
    modify_re = re.compile(r"(M) ([0-9]+) (\S+) (.*)")
    def __init__(self, vcs=None):
        self.vcs = vcs
        self.op = None
        self.committish = None
        self.source = None
        self.target = None
        self.mode = None
        self.path = None
        self.ref = None
        self.inline = None
        self._paths = None
    def path_remap_in(self):
        "Hack the fileop's basename to map it to git conventions."
        # Ignore file names from non-git VCSes need to get
        # mapped to .gitignore, because we have to
        # have some way to recognize what they are
        # in order to remap the name properly on
        # export.
        if self.vcs is not None:
            if os.path.basename(self.path) == self.vcs.ignorename:
                self.path = os.path.join(os.path.dirname(self.path), ".gitignore")
    def path_remap_out(self, path, vcs):
        "Hack the fileop's basename to map it to a target VCS's conventions."
        if vcs is not None:
            if os.path.basename(path) == ".gitignore":
                return os.path.join(os.path.dirname(path), vcs.ignorename)
        return path
    def setOp(self, op):
        self.op = op
        self._paths = None
    # Following two functions emulate the FileOp sort used by git
    # fast_export As it says, 'Handle files below a directory first,
    # in case they are all deleted and the directory changes to a file
    # or symlink.'
    def __pathname__(self):
        if self.path:
            return self.path
        elif self.source:
            return self.source
        elif self.op == 'deleteall':
            return ""
        else:
            raise Fatal("internal error while extracting pathname")
    @staticmethod
    def compare(a, b):
        "Emulates the sort that git fast-export uses."
        name_a = FileOp.__pathname__(a)
        name_b = FileOp.__pathname__(b)
        len_a = len(name_a)
        len_b = len(name_b)
        slen = min(len_a, len_b)
        # strcmp will sort 'd' before 'd/e', we want 'd/e' before 'd'
        c = cmp(name_a[:slen], name_b[:slen])
        if c:
            return c
        c = len_b - len_a
        if c:
            return c
        # renames go last
        return (a.op == 'R') - (b.op == 'R') 
    def construct(self, *opargs):
        if opargs[0] == "M":
            (self.op, self.mode, self.ref, self.path) = opargs
            self.path_remap_in()
            if type(self.mode) == type(0):
                self.mode = "%06o" % self.mode
        elif opargs[0] == "D":
            (self.op, self.path) = opargs
            self.path_remap_in()
        elif opargs[0] == "N":
            (self.op, self.ref, self.committish) = opargs
        elif opargs[0] in ("R", "C"):
            (self.op, self.source, self.target) = opargs
        elif opargs[0] == "deleteall":
            self.setOp("deleteall")
        else:
            raise Fatal("unexpected fileop %s" % opargs[0])
    def parse(self, opline):
        if opline.startswith("M"):
            m = FileOp.modify_re.match(opline)
            if not m:
                raise Fatal("bad format of M line: %s" % repr(opline))
            (self.op, self.mode, self.ref, self.path) = m.groups()
            self._paths = None
            if self.path[0] == '"' and self.path[-1] == '"':
                self.path = self.path[1:-1]
            self.path_remap_in()
        elif opline[0] == "N":
            try:
                opline = opline.replace("'", r"\'")
                (self.op, self.ref, self.committish) = shlex.split(opline)
            except ValueError:
                raise Fatal("ill-formed fileop %s" % repr(opline))
        elif opline[0] == "D":
            (self.op, self.path) = ("D", opline[2:].strip())
            if self.path[0] == '"' and self.path[-1] == '"':
                self.path = self.path[1:-1]
            self.path_remap_in()
        elif opline[0] in ("R", "C"):
            try:
                opline = opline.replace("'", r"\'")
                (self.op, self.source, self.target) = shlex.split(opline)
            except ValueError:
                raise Fatal("ill-formed fileop %s" % repr(opline))
        elif opline == "deleteall":
            self.op = "deleteall"
        else:
            raise Fatal("unexpected fileop %s while parsing" % opline)
        self._paths = None
        return self
    def paths(self):
        "Return the set of all paths touched by this file op."
        if self._paths is None:
            if self.op in ("M", "D"):
                self._paths = set([self.path])
            elif self.op in ("R", "C"):
                self._paths = set([self.source, self.target])
            # Ugh...this isn't right for deleteall, but since we don't expect
            # to see that except at branch tips we'll ignore it for now.
            elif self.op in ("N", "deleteall"):
                self._paths = set([])
        return self._paths
    def relevant(self, other):
        "Do two fileops touch the same file(s)?"
        if self.op == "deleteall" or other.op == "deleteall":
            return True
        else:
            return self.paths() & other.paths()
    def dump(self, vcs=None):
        "Dump this fileop in import-stream format."
        if self.op == "M":
            showmode = self.mode
            if type(self.mode) == type(0):
                showmode = "%06o" % self.mode
            st = " ".join((self.op, showmode, self.ref)) + " "
            if len(self.path.split()) > 1:
                st += '"' + self.path + '"'
            else:
                st += self.path_remap_out(self.path, vcs)
            if self.ref == 'inline':
                st += "\ndata %d\n%s" % (len(self.inline), self.inline)
        elif self.op == "N":
            st = " ".join((self.op, self.ref, self.committish)) + "\n"
            if self.ref == 'inline':
                st += "data %d\n%s" % (len(self.inline), self.inline)
        elif self.op == "D":
            st = "D "
            if len(self.path.split()) > 1:
                st += '"' + self.path + '"'
            else:
                st += self.path_remap_out(self.path, vcs)
        elif self.op in ("R", "C"):
            st = '%s "%s" "%s"' %  (self.op,
                                    self.path_remap_out(self.source, vcs),
                                    self.path_remap_out(self.target, vcs))
        elif self.op == "deleteall":
            st = self.op
        else:
            raise Fatal("unexpected fileop %s while writing" % self.op)
        return st
    def __str__(self):
        return self.dump(self.vcs)
    __repr__ = __str__

class Commit:
    "Generic commit object."
    def __init__(self, repo=None):
        self.repo = repo
        self.mark = None             # Mark name of commit (may be None)
        self.authors = []            # Authors of commit
        self.committer = None        # Person responsible for committing it.
        self.comment = None          # Commit comment
        self._parent_marks = []      # marks from each of the parent nodes
        self._parent_nodes = None    # cached list of parent nodes; invalidated by changes to _parent_marks
        self.branch = None           # branch name
        self.fileops = []            # blob and file operation list
        self.properties = collections.OrderedDict()         # commit properties (extension)
        self.pushed_to = False       # Flagged for resolution after delete
        self.filemap = {}
        self.color = None
        self.fossil_id = None        # Commit's ID in an alien system
        self.common = None           # Used only by the Subversion parser
        self._pathset = None
        self.splits = None          # split command increments this to avoid creating multiple new commits with duplicate marks
    def index(self):
        "Our 0-origin index in our repo."
        return self.repo.index(self)
    def id_me(self):
        "ID this commit for humans."
        myid = "commit@%d" % (self.index()+1)
        if self.fossil_id:
            myid += "=<%s>" % self.fossil_id
        return myid
    def when(self):
        "Imputed timestamp for sorting after unites."
        return self.committer.date.timestamp
    def moveto(self, repo):
        "Change the repo this commit is associated with."
        self.repo = repo
    def clone(self, repo=None):
        "Clone this commit, without its fileops and color."
        c = copy.copy(self)
        c.committer = copy.deepcopy(self.committer)
        c.authors = copy.deepcopy(self.authors)
        c.fileops = []
        c._pathset = None
        c.color = None
        if repo is not None:
            c.moveto(repo)
        return c
    def lister(self, _modifiers, eventnum, cols):
        "Enable do_list() to report commits."
        topline = self.comment.split("\n")[0]
        summary = "%6d %s %6s " % \
                      (eventnum+1, self.committer.date.rfc3339(), self.mark)
        if self.fossil_id:
            fossil = "<%s>" % self.fossil_id
            summary += "%6s " % fossil
        return (summary + topline)[:cols]
    def tip(self, _modifiers, eventnum, cols):
        "Enable do_tip() to report deduced branch tips."
        summary = "%6d %s %6s " % \
                      (eventnum+1, self.committer.date.rfc3339(), self.mark)
        return (summary + self.head())[:cols]
    def tags(self, _modifiers, eventnum, cols):
        "Enable do_list() to report lightweight tags."
        assert cols > -1    # pacify pylint
        return self.branch and "/tags/" in self.branch and "%6d %s" % (eventnum+1, self.branch) 
    def email_out(self, modifiers, eventnum):
        "Enable do_mailbox_out() to report these."
        msg = RepoSurgeonEmail()
        msg["Event-Number"] = str(eventnum+1)
        msg["Branch"] = self.branch
        msg["Parents"] = " ".join(self._parent_marks)
        if self.authors:
            self.authors[0].email_out(modifiers, msg, "Author")
            for (i, coauthor) in enumerate(self.authors[1:]):
                coauthor.email_out(msg, "Author" + repr(2+i))
        self.committer.email_out(modifiers, msg, "Committer")
        if self.fossil_id:
            msg["Fossil-ID"] = self.fossil_id
        for (name, value) in self.properties.items():
            hdr = "-".join([s.capitalize() for s in name.split("-")])
            value = value.replace("\n", r"\n")
            value = value.replace("\t", r"\t")
            msg["Property-" + hdr] = value
        msg.set_payload(self.comment)
        if not self.comment.endswith("\n"):
            complain("in commit %s, comment was not LF-terminated." % self.mark)
        return str(msg)
    def email_in(self, msg):
        "Update this commit from a parsed email message."
        modified = False
        if "Branch" in msg:
            if self.branch != msg["Branch"]:
                modified = True
            self.branch = msg["Branch"]
        if "Parents" in msg:
            if self._parent_marks != msg["Parents"].split():
                modified = True
            self.setParents(msg["Parents"].split())
        if "Committer" in msg:
            (newname, newemail) = email.utils.parseaddr(msg["Committer"])

            if not newemail:
                raise Fatal("can't recognize address in Committer: %s" % msg["Committer"])
            else:
                if self.committer.name != newname or self.committer.email != newemail:
                    (self.committer.name, self.committer.email) = (newname, newemail)
                    # Yes, display this unconditionally
                    if self.repo:
                        announce("in %s, Committer is modified" % self.id_me())
                    modified = True
        if "Committer-Date" in msg:
            date = Date(msg["Committer-Date"])
            if self.committer.date is None or date != self.committer.date:
                # Yes, display this unconditionally
                if self.repo:
                    announce("in %s, Committer-Date is modified '%s' -> '%s' (delta %d)" \
                          % (self.id_me(),
                             self.committer.date, date,
                             self.committer.date.delta(date)))
                self.committer.date = date
                modified = True
        if "Author" in msg:
            author_re = re.compile("Author[0-9]*$")
            authorkeys = list(filter(author_re.match, list(msg.keys())))
            # Potential minor bug here if > 10 authors;
            # lexicographic sort order doesn't match numeric
            authorkeys.sort()
            for i in range(len(authorkeys) - len(self.authors)):
                self.authors.append(Attribution())
            # Another potential minor bug: permuting the set of authors
            # will look like a modification, as old and new authors are
            # compaired pairwise rather than set equality being checked.
            # Possibly a feature if one thinks order is significant, but
            # I just did it this way because it was easier.
            for (i, hdr) in enumerate(authorkeys):
                (newname, newemail) = email.utils.parseaddr(msg[hdr])
                if not newemail:
                    raise Fatal("can't recognize address in %s: %s" % (hdr, msg[hdr]))
                else:
                    if self.authors[i].name != newname or self.authors[i].email != newemail:
                        (self.authors[i].name, self.authors[i].email) = (newname, newemail)
                        if debug_enable(DEBUG_EMAILIN):
                            announce("in commit %s, Author #%d is modified" \
                                  % (msg["Event-Number"], i+1))
                        modified = True
                if hdr + "-Date" in msg:
                    date = Date(msg[hdr + "-Date"])
                    if date != self.authors[i].date:
                        # Yes, display this unconditionally
                        if self.repo:
                            announce("in event %s, %s-Date #%d is modified" \
                                     % (msg["Event-Number"], hdr, i+1))
                        self.authors[i].date = date
                        modified = True
        if "Fossil-ID" in msg:
            if msg["Fossil-ID"] != self.fossil_id:
                modified = True
                msg["Fossil-ID"] = self.fossil_id
        newprops = collections.OrderedDict()
        for prophdr in [s for s in list(msg.keys()) if s.startswith("Property-")]:
            propkey = prophdr[9:].lower()
            propval = msg[prophdr]
            if propval == "True":
                propval = True
            elif propval == "False":
                propval = False
            else:
                propval = propval.replace(r"\n", "\n")
                propval = propval.replace(r"\t", "\t")
            newprops[propkey] = propval
        modified |= (newprops != self.properties)
        self.properties = newprops
        newcomment = msg.get_payload()
        if global_options["canonicalize"]:
            newcomment = newcomment.strip() + '\n'
        if newcomment != self.comment:
            if debug_enable(DEBUG_EMAILIN):
                announce("in %s, comment is modified %s -> %s" \
                      % (self.id_me(), repr(self.comment), repr(newcomment)))
            modified = True
            self.comment = newcomment
        return modified
    def children(self):
        "Get a list of this commit's children."
        return [e for e in self.repo.commits() if self.mark in e._parent_marks] 
    # Hide the parent list behind an interface, so that we can memoize
    # the computation, which is very expensive and frequently
    # performed.
    def parents(self):
        "Get a list of this commit's parents."
        if self._parent_nodes == None:
            self._parent_nodes = [e for e in self.repo.commits() if e.mark in self._parent_marks]
        return self._parent_nodes
    def parentMarks(self):
        return self._parent_marks
    def setParents(self, marks):
        self._parent_nodes = None
        self._parent_marks = marks
    def addParent(self, mark):
        self._parent_nodes = None
        self._parent_marks.append(mark)
    def insertParent(self, idx, mark):
        self._parent_nodes = None
        self._parent_marks.insert(idx, mark)
    def removeParent(self, mark):
        self._parent_nodes = None
        self._parent_marks.remove(mark)
    def hasParents(self):
        return len(self._parent_marks) > 0
    def descended_from(self, other):
        "Is this commit a descendent of the specified other?"
        if not self._parent_marks or self.committer.date < other.committer.date:
            return False
        elif other.mark in self._parent_marks:
            return True
        else:
            return any([parent.descended_from(other) \
                        for parent in self.parents()])
    def cliques(self):
        "Return a dictionary mapping filenames to associated M cliques."
        cliques = {}
        for (i, fileop) in enumerate(self.fileops):
            if fileop.op == "M":
                if fileop.path not in cliques:
                    cliques[fileop.path] = []
                cliques[fileop.path].append(i)
        return cliques
    def fileop_dump(self, i):
        "Dump file ops without data or inlines; used for debugging only."
        print("commit %d, mark %s:" % (i+1, self.mark))
        for (i, op) in enumerate(self.fileops):
            if op is not None:
                print("%d: %-20s" % (i, str(op)))
    def paths(self):
        "Return the set of all paths touched by this commit."
        if self._pathset is None:
            self._pathset = set([])
            for fileop in self.fileops:
                self._pathset |= fileop.paths()
        return self._pathset
    def manifest(self):
        "Return a map from paths to marks for files existing at this commit."
        if self.filemap or not self._parent_marks:
            return self.filemap
        ancestors = {}
        sys.setrecursionlimit(len(self.repo.events) * 2)
        for commit in self.parents():
            ancestors.update(commit.manifest())
        for fileop in self.fileops:
            if fileop.op == 'M':
                ancestors[fileop.path] = fileop.ref
            elif fileop.op == 'D':
                if fileop.path in ancestors:
                    del ancestors[fileop.path]
            elif fileop.op == 'C':
                ancestors[fileop.target] = ancestors[fileop.source]
            elif fileop.op == 'R':
                ancestors[fileop.target] = ancestors[fileop.source]
                if fileop.source in ancestors:
                    del ancestors[fileop.source]
        self.filemap = ancestors
        return ancestors
    def alldeletes(self, killset={"D", "deleteall"}):
        "Is this an all-deletes commit?"
        for fileop in self.fileops:
            if fileop.op not in killset:
                return False
        return True
    def checkout(self, directory=None):
        "Make a directory with links to files in a specified checkout."
        if not directory:
            directory = os.path.join(self.repo.subdir(), self.mark)
        try:
            sys.setrecursionlimit(len(self.repo.events) * 2)
            os.mkdir(directory)
            for (path, mark) in self.manifest().items():
                fullpath = os.path.join(directory, path)
                fulldir = os.path.dirname(fullpath)
                if not os.path.exists(fulldir):
                    os.makedirs(fulldir)
                os.link(self.repo.objfind(mark).blobfile(), fullpath)
        except OSError:
            raise Recoverable("could not create checkout directory or files.")
        return directory
    def head(self):
        "Return the branch to which this commit belongs."
        if self.branch.startswith("refs/heads/"):
            return self.branch
        offspring = self.children()
        if not offspring:
            return self.branch
        elif len(offspring) == 1:
            return offspring[0].head()
        else:
            for child in offspring:
                if child.branch == self.branch:
                    return child.head()
        raise Recoverable("can't deduce a branch head for %s" % self.mark)
    def dump(self, vcs=None):
        "Dump this commit in import-stream format."
        if vcs is None and self.repo.vcs and self.repo.vcs.importer:
            vcs = self.repo.vcs
        st = ""
        if self.fossil_id:
            st += "# Fossil-ID: %s\n" % self.fossil_id
        st += "commit %s\n" % self.branch
        if self.mark:
            st += "mark %s\n" % self.mark
        if self.authors:
            for author in self.authors:
                st += "author %s\n" % author
        if self.committer:
            st += "committer %s\n" % self.committer
        if self.comment is not None:
            comment = self.comment
            if global_options["fossilize"] and self.fossil_id:
                comment += "\nFossil-ID: %s\n" % self.fossil_id
            st += "data %d\n" % len(comment)
            st += comment
        if "nl-after-comment" in self.repo.export_style():
            st += "\n"
        if self._parent_marks:
            st += "from %s\n" % self._parent_marks[0]
        for ancestor in self._parent_marks[1:]:
            st += "merge %s\n" % ancestor
        if vcs and vcs.properties:
            for (name, value) in self.properties.items():
                if value in (True, False):
                    if value:
                        st += "property %s\n" % name
                else:
                    st += "property %s %d %s\n" % (name, len(str(value)), str(value))
        for op in self.fileops:
            st += op.dump(vcs) + "\n"
        if "nl-after-commit" in self.repo.export_style():
            st += "\n"
        return st
    def __str__(self):
        return self.dump()

class Passthrough:
    "Represents a passthrough line."
    def __init__(self, line):
        self.text = line
    def email_out(self, _modifiers, eventnum):
        "Enable do_mailbox_out() to report these."
        msg = RepoSurgeonEmail()
        msg["Event-Number"] = str(eventnum+1)
        msg.set_payload(self.text)
        return str(msg)
    def email_in(self, msg):
        self.text = msg.get_payload()
    def dump(self, _vcs=True):
        "Dump this passthrough in import-stream format."
        return self.text
    def __str__(self):
        return self.dump()

class Fatal(Exception):
    "Unrecoverable error."
    def __init__(self, msg):
        Exception.__init__(self)
        self.msg = msg

# Generic extractor code begins here

class signature:
    "A file signature - file path, hash value of content and permissions."
    def __init__(self, path):
        self.path = path
        self.hashval = None
        self.perms = None
        if not os.path.isdir(path):
            with file(path) as fp:
                self.hashval = hashlib.sha1(fp.read()).hexdigest()
            self.perms = os.stat(path).st_mode
            # Map to the restricted set of modes that are allowed in
            # the stream format.
            if self.perms & 0o100700 == 0o100700:
                self.perms = 0o100755
            elif self.perms & 0o100600 == 0o100600:
                self.perms = 0o100644
    def __eq__(self, other):
        #if debug_enable(DEBUG_EXTRACT):
        #    announce("%s == %s -> %s" % (str(self),
        #                                 str(other),
        #                                 self.__dict__ == other.__dict__))
        return self.__dict__ == other.__dict__
    def __ne__(self, other):
        return not signature.__eq__(self, other)
    def __str__(self):
        return "<%s:%s:%s>" % (self.path, "%6o" % self.perms, self.hashval[:4])
    __repr__ = __str__

def capture(command):
    "Run a specified command, capturing the output."
    if debug_enable(DEBUG_COMMANDS):
        announce("%s: capturing %s" % (rfc3339(time.time()), command))
    try:
        content = subprocess.check_output(command, shell=True)
    except (subprocess.CalledProcessError, OSError) as oe:
        raise Fatal("execution of '%s' failed: %s" % (command, oe))
    if debug_enable(DEBUG_COMMANDS):
        sys.stderr.write(content)
    return content

class PathMap:
    """Represent the set of filenames visible in a Subversion
    revision, using copy-on-write to keep the size of the structure in
    line with the size of the Subversion repository metadata."""
    def __init__(self, elements=None):
        # elements is a dictionary mapping single-component names to
        # True (for files) or to another PathMap (for directories).
        # The dictionary may be shared with other PathMap instances if
        # shared is true.
        if elements is None:
            self.elements = {}
            self.shared = False
        else:
            self.elements = elements
            self.shared = True
    def snapshot(self):
        "Return a copy-on-write snapshot of the set."
        self.shared = True
        return PathMap(self.elements)
    def copy_from(self, target_path, source_pathset, source_path):
        "Insert, at target_path, a snapshot of source_path in source_pathset."
        source_obj = source_pathset._find(self._split_path(source_path))
        if source_obj is None:
            return
        if isinstance(source_obj, PathMap):
            source_obj = source_obj.snapshot()
        self._insert(self._split_path(target_path), source_obj)
    def __contains__(self, path):
        "Return true if path is present in the set as a file."
        elt = self._find(self._split_path(path))
        return not isinstance(elt, PathMap) and elt is not None
    def __getitem__(self, path):
        "Return the value associated with a specified path."
        elt = self._find(self._split_path(path))
        if elt is None or isinstance(elt, PathMap):
            # This is not quite like indexing, which would throw IndexError
            return None
        return elt
    def __setitem__(self, path, value):
        "Add a filename to the set, with associated value (not None)."
        assert value is not None
        self._insert(self._split_path(path), value)
    def __delitem__(self, path):
        """Remove a filename, or all descendents of a directory name,
        from the set."""
        self._remove(self._split_path(path))
    def __nonzero__(self):
        "Return true if any filenames are present in the set."
        return any(self.elements.itervalues())
    def __iter__(self):
        for (name, value) in sorted(self.elements.items()):
            if isinstance(value, PathMap):
                for path in value:
                    yield os.path.join(name, path)
            elif value is not None:
                yield name
    def __str__(self):
        tell = '<PathMap: '
        for path in self:
            tell += path + ' '
        return tell[:-1] + '>'
    __repr__ = __str__
    # Insert obj at the location given by components.  Destroys
    # components as we recurse.
    def _insert(self, components, obj):
        if not components:
            return
        self._unshare()
        first = components.pop()
        if not components:
            self.elements[first] = obj
        else:
            if not isinstance(self.elements.get(first), PathMap):
                self.elements[first] = PathMap()
            self.elements[first]._insert(components, obj)
    # Remove the location given by components.  Destroys components as
    # we recurse.
    def _remove(self, components):
        if not components:
            return
        self._unshare()
        first = components.pop()
        if not components:
            if first in self.elements:
                del self.elements[first]
        else:
            obj = self.elements.get(first)
            if isinstance(obj, PathMap):
                obj._remove(components)
    # Return the object at the location given by components--either
    # the associated value if it's present as a filename, or a PathMap
    # containing the descendents if it's a directory name.  Return
    # None if the location does not exist in the set.  Destroys
    # components as we recurse.
    def _find(self, components):
        if not components:
            return self
        first = components.pop()
        if first not in self.elements:
            return None
        down = self.elements[first]
        if isinstance(down, PathMap):
            return down._find(components)
        elif not components:
            return down
        else:
            return None        
    # If elements might be shared with another PathMap, copy it.
    def _unshare(self):
        if self.shared:
            self.elements = {k: (v.snapshot() if isinstance(v, PathMap) else v)
                             for k,v in self.elements.iteritems()}
            self.shared = False
    # Return a list of the components in path in reverse order.
    @staticmethod
    def _split_path(path):
        components = []
        while True:
            dirname, basename = os.path.split(path)
            if basename != '':
                components.append(basename)
            if dirname == path or dirname == '':
                break
            path = dirname
        return components

class RepoStreamer:
    "Repository factory driver class for all repo analyzers."
    def __init__(self, extractor):
        self.markseq = 0
        self.tagseq = 0
        self.commits = {}
        self.markmap = {}
        self.filemap = {}
        self.hash_to_mark = {}
        self.baton = None
        self.extractor = extractor
    def __newmark(self, revision=None):
        self.markseq += 1
        mark = ":" + str(self.markseq)
        if revision:
            self.markmap[revision] = mark
        return mark
    def extract(self, repo, progress=True):
        if not self.extractor.isclean():
            raise Recoverable("directory %s has unsaved changes." % os.getcwd())
        repo.makedir()
        with Baton(prompt="Extracting", enable=progress) as self.baton:
            self.extractor.analyze(self.baton)
            self.extractor.pre_extract(repo)
            #saved_umask = os.umask(0)
            consume = copy.copy(self.extractor.get_revlist())
            while consume:
                revision = consume.pop(0)
                commit = Commit(repo)
                self.baton.twirl()
                present = self.extractor.checkout(revision, self.filemap)
                parents = self.extractor.get_parents(revision)
                commit.committer = Attribution(self.extractor.get_committer(revision))
                commit.authors = [Attribution(a) \
                                  for a in self.extractor.get_authors(revision)]
                commit.setParents([self.markmap[rev] for rev in parents])
                commit.branch = self.extractor.get_branch(revision)
                commit.comment = self.extractor.get_comment(revision)
                if debug_enable(DEBUG_EXTRACT):
                    msg = commit.comment
                    if msg == None:
                        msg = ""
                    announce("r%s: comment '%s'" % (revision, msg.strip()))
                self.filemap[revision] = {}
                for rev in parents:
                    self.filemap[revision].update(self.filemap[rev])
                if present:
                    removed = set(self.filemap[revision].keys()) - set(present)
                    for path in present:
                        if os.path.isdir(path):
                            continue
                        if not os.path.exists(path):
                            announce("r%s: expected path %s does not exist!" % \
                                     (revision, path))
                            continue
                        newsig = signature(path)
                        if newsig.hashval in self.hash_to_mark:
                            #if debug_enable(DEBUG_EXTRACT):
                            #    announce("r%s: %s has old hash" \
                            #             % (revision, path))
                            # The file's hash corresponds to an existing
                            # blob; generate modify, copy, or rename as
                            # appropriate.
                            if path not in self.filemap[revision] \
                                   or self.filemap[revision][path]!=newsig:
                                if debug_enable(DEBUG_EXTRACT):
                                    announce("r%s: update for %s" % (revision, path))
                                for (oldpath, oldsig) in self.filemap[revision].items():
                                    if oldsig == newsig:
                                        if oldpath in removed:
                                            op = FileOp()
                                            op.construct('R', oldpath, path)
                                            commit.fileops.append(op)
                                            del self.filemap[revision][oldpath]
                                        elif oldpath != path:
                                            op = FileOp()
                                            op.construct('C', oldpath, path)
                                            commit.fileops.append(op)
                                        break
                                else:
                                    op = FileOp()
                                    op.construct('M',
                                                 newsig.perms,
                                                 self.hash_to_mark[newsig.hashval],
                                                 path)
                                    commit.fileops.append(op)
                        else:
                            # Content hash doesn't match any existing blobs
                            if debug_enable(DEBUG_EXTRACT):
                                announce("r%s: %s has new hash" \
                                         % (revision, path))
                            blobmark = self.__newmark()
                            self.hash_to_mark[newsig.hashval] = blobmark
                            # Actual content enters the representation
                            blob = Blob(repo)
                            blob.mark = blobmark
                            shutil.copyfile(path, blob.blobfile())
                            blob.path = path
                            repo.addEvent(blob)
                            # Its new fileop is added to the commit
                            op = FileOp()
                            op.construct('M', newsig.perms, blobmark, path)
                            commit.fileops.append(op)
                        self.filemap[revision][path] = newsig
                    for tbd in removed:
                        op = FileOp()
                        op.construct('D', tbd)
                        commit.fileops.append(op)
                        del self.filemap[revision][tbd]
                self.extractor.cleanup(revision, True)
                if not parents and commit.branch != "refs/heads/master":
                    reset = Reset()
                    reset.ref = commit.branch
                    repo.addEvent(reset)
                commit.fileops.sort(cmp=FileOp.compare)
                commit.fossil_id = revision
                commit.properties.update(self.extractor.get_properties(revision)) 
                commit.mark = self.__newmark(revision)
                if debug_enable(DEBUG_EXTRACT):
                    announce("r%s: gets mark %s (%d ops)" % (revision, commit.mark, len(commit.fileops)))
                repo.addEvent(commit)
            # Now append reset objects
            resets = self.extractor.get_resetlist()
            resets.sort(key=lambda (k, v): v)
            for (resetname, revision) in resets:
                reset = Reset()
                reset.ref = resetname
                reset.committish = self.markmap[revision]
                repo.addEvent(reset)
            # Last, append tag objects.
            tags = self.extractor.get_taglist()
            tags.sort(key=lambda t: t.tagger.date)
            for tag in tags:
                tag.committish = self.markmap.get(tag.committish)
                repo.addEvent(tag)
            self.extractor.post_extract(repo)
        return repo

# Stream parsing
#
# The Subversion dumpfile format is documented at
#
# https://svn.apache.org/repos/asf/subversion/trunk/notes/dump-load-format.txt

# Use numeric codes rather than (un-interned) strings
# to reduce working-set size.
SD_NONE = 0
SD_FILE = 1
SD_DIR = 2
SD_ADD = 0
SD_DELETE = 1
SD_CHANGE = 2
SD_REPLACE = 3

class StreamParser:
    "Parse a fast-import stream or Subversion dump to populate a Repository."
    # If these don't match the constants above, havoc will ensue
    class NodeAction:
        ActionValues = ("add", "delete", "change", "replace")
        PathTypeValues = ("none", "file", "dir")
        def __init__(self):
            # These are set during parsing
            self.revision = None
            self.path = None
            self.kind = SD_NONE
            self.action = None
            self.from_rev = None
            self.from_path = None
            self.content_hash = None
            self.from_hash = None
            self.blob = None
            self.props = None
            # These are set during the analysis phase
            self.from_set = None
            self.blobmark = None
            self.generated = False
        def __str__(self):
            tell = "<NodeAction: r%s %s %s '%s' " \
                   % (self.revision,
                      StreamParser.NodeAction.ActionValues[self.action],
                      StreamParser.NodeAction.PathTypeValues[self.kind],
                      self.path)
            if self.from_rev:
                tell += "from=%s~%s " % (self.from_rev, self.from_path)
            if self.from_set:
                tell += "sources=%s " % self.from_set
            if self.generated:
                tell += "generated "
            if self.props is not None:
                # Trim off the OrderedDict wrapper
                showprops = repr(self.props)
                if showprops.startswith("OrderedDict("):
                    showprops = showprops[12:-1]
                tell += "properties=%s " % showprops
            return tell[:-1] + ">"
        __repr__ = __str__
    class RevisionRecord:
        def __init__(self, nodes, props):
            self.nodes = nodes
            self.props = props
    # Native Subversion properties that we don't suppress: svn:externals
    # The reason for these suppressions is to avoid a huge volume of
    # junk file properties - cvs2svn in particular generates them like
    # mad.  We want to let through other properties that might carry
    # useful information.
    IgnoreProperties = (
        "svn:executable",  # We special-case this one elsewhere
        "svn:ignore",      # We special-case this one elsewhere
        "svn:special",     # We special-case this one elsewhere
        "svn:mergeinfo",   # We special-case this one elsewhere
        "svn:mime-type",
        "svn:keywords",
        "svn:needs-lock",
        "svn:eol-style",   # Don't want to suppress, but cvs2svn floods these.
        )
    # These are the default patterns globally ignored by Subversion.
    SubversionDefaultIgnores = """\
*.o
*.lo
*.la
*.al
.libs
*.so
*.so.[0-9]*
*.a
*.pyc
*.pyo
*.rej
*~
.#*
.*.swp
.DS_store
"""
    SplitSep = '.'
    def __init__(self, repo):
        self.repo = repo
        self.fp = None
        self.import_line = 0
        self.markseq = 0
        self.ccount = 0
        self.linebuffers = []
        self.warnings = []
        # Everything below here is Subversion-specific
        self.branches = {}
        self.branchlink = {}
        self.branchdeletes = set([])
        self.revisions = collections.OrderedDict()
        self.copycounts = collections.OrderedDict()
        self.hashmap = {}
        self.permissions = {}
        self.has_properties = set([])
        self.fileop_branchlinks  = set([])
        self.directory_branchlinks  = set([])
        self.active_gitignores = set([])
        self.mergeinfo = {}
    def error(self, msg):
        "Throw fatal error during parsing."
        raise Fatal(msg + " at line " + repr(self.import_line))
    def warn(self, msg):
        "Display a parse warning associated with a line."
        if self.import_line:
            complain(msg + " at line " + repr(self.import_line))
        else:
            complain(msg)
    def gripe(self, msg):
        "Display or queue up an error message."
        if verbose == 1:
            self.warnings.append(msg)
        else:
            complain(msg)
    def __newmark(self):
        self.markseq += 1
        mark = ":" + str(self.markseq)
        return mark
    def readline(self):
        if self.linebuffers:
            line = self.linebuffers.pop()
        else:
            line = self.fp.readline()
        self.ccount += len(line)
        self.import_line += 1
        return line
    def pushback(self, line):
        self.ccount -= len(line)
        self.import_line -= 1
        self.linebuffers.append(line)
    # Helpers for import-stream files
    def fi_read_data(self, line=None):
        "Read a fast-import data section."
        if not line:
            line = self.readline()
        if line.startswith("data <<"):
            delim = line[7:]
            while True:
                dataline = self.readline()
                if dataline == delim:
                    break
                elif not dataline:
                    raise Fatal("EOF while reading blob")
        elif line.startswith("data"):
            try:
                count = int(line[5:])
                data = self.fp.read(count)
            except ValueError:
                self.error("bad count in data")
        else:
            self.error("malformed data header %s" % repr(line))
        line = self.readline()
        if line != '\n':
            self.pushback(line) # Data commands optionally end with LF
        return data
    def fi_parse_fileop(self, fileop):
        # Read a fast-import fileop
        if fileop.ref[0] == ':':
            pass
        elif fileop.ref == 'inline':
            fileop.inline = self.fi_read_data()
        else:
            self.error("unknown content type in filemodify")
    # Helpers for Subversion dumpfiles
    @staticmethod
    def sd_body(line):
        # Parse the body from a Subversion header line
        return line.split(":")[1].strip()
    def sd_require_header(self, hdr):
        # Consume a required header line
        line = self.readline()
        self.ccount += len(line)
        if not line.startswith(hdr):
            self.error('required %s header missing')
        return StreamParser.sd_body(line)
    def sd_require_spacer(self):
        line = self.readline()
        if line.strip():
            self.error('found %s expecting blank line' % repr(line))
    def sd_read_blob(self, length):
        # Read a Subversion file-content blob.
        content = self.fp.read(length)
        if self.fp.read(1) != '\n':
            self.error("EOL not seen where expected, Content-Length incorrect")
        self.import_line += content.count('\n') + 1
        self.ccount += len(content) + 1
        return content
    def sd_read_props(self, target, checklength):
        # Parse a Subversion properties section, return as an OrderedDict.
        props = collections.OrderedDict()
        self.ccount = 0
        while self.ccount < checklength:
            line = self.readline()
            if debug_enable(DEBUG_SVNPARSE):
                announce("readprops, line %d: %s" % \
                         (self.import_line, repr(line)))
            if line.startswith("PROPS-END"):
                # This test should be !=, but I get random off-by-ones from
                # real dumpfiles - I don't know why.
                if self.ccount < checklength:
                    self.error("expected %d property chars, got %d"\
                               % (checklength, self.ccount))
                break
            elif not line.strip():
                continue
            elif line[0] == "K":
                key = self.sd_read_blob(int(line.split()[1]))
                line = self.readline()
                if line[0] != 'V':
                    raise self.error("property value garbled")
                value = self.sd_read_blob(int(line.split()[1]))
                props[key] = value
                if debug_enable(DEBUG_SVNPARSE):
                    announce("readprops: on %s, setting %s = %s"\
                             % (target, key, repr(value)))
        return props
    #
    # The main event
    #
    def fast_import(self, fp, progress=False):
        "Initialize the repo from a fast-import stream or Subversion dump."
        self.repo.makedir()
        try:
            self.fp = fp
            with Baton("reposurgeon: from %s" % os.path.relpath(fp.name), enable=progress) as baton:
                self.import_line = 0
                self.linebuffers = []
                # First, determine the input type
                line = self.readline()
                if line.startswith("SVN-fs-dump-format-version: "):
                    if StreamParser.sd_body(line) not in ("1", "2"):
                        raise Fatal("unsupported dump format version %s" \
                                    % version)
                    # Beginning of Subversion dump parsing
                    while True:
                        line = self.readline()
                        if not line:
                            break
                        elif not line.strip():
                            continue
                        elif line.startswith("UUID:"):
                            self.repo.uuid = StreamParser.sd_body(line)
                        elif line.startswith("Revision-number: "):
                            # Begin Revision processing
                            baton.twirl()
                            if debug_enable(DEBUG_SVNPARSE):
                                announce("revision parsing, line %d: begins" % \
                                     (self.import_line))
                            revision = StreamParser.sd_body(line)
                            plen = int(self.sd_require_header("Prop-content-length"))
                            self.sd_require_header("Content-length")
                            self.sd_require_spacer()
                            props = self.sd_read_props("commit", plen)
                            # Parsing of the revision header is done
                            node = None # pacify pylint
                            nodes = []
                            in_header = False
                            plen = tlen = -1
                            # Node list parsing begins
                            while True:
                                line = self.readline()
                                if debug_enable(DEBUG_SVNPARSE):
                                    announce("node list parsing, line %d: %s" % \
                                             (self.import_line, repr(line)))
                                if not line:
                                    break
                                elif not line.strip():
                                    if not in_header:
                                        continue
                                    else:
                                        if plen > -1:
                                            node.props = self.sd_read_props(node.path, plen)
                                        if tlen > -1:
                                            text = self.sd_read_blob(tlen)
                                            node.blob = Blob(self.repo)
                                            with open(node.blob.blobfile(), "w") as wfp:
                                                wfp.write(text)
                                        node.revision = revision
                                        nodes.append(node)
                                        in_header = False
                                elif line.startswith("Revision-number: "):
                                    self.pushback(line)
                                    break
                                # Node processing begins
                                elif line.startswith("Node-path: "):
                                    node = StreamParser.NodeAction()
                                    node.path = StreamParser.sd_body(line)
                                    plen = tlen = -1
                                    in_header = True
                                elif line.startswith("Node-kind: "):
                                    node.kind = StreamParser.sd_body(line)
                                    node.kind = StreamParser.NodeAction.PathTypeValues.index(node.kind)
                                    if node.kind is None:
                                        self.error("unknown kind %s"%node.kind)
                                elif line.startswith("Node-action: "):
                                    node.action = StreamParser.sd_body(line)
                                    node.action = StreamParser.NodeAction.ActionValues.index(node.action)
                                    
                                    if node.action is None:
                                        self.error("unknown action %s" \
                                                   % node.action)
                                elif line.startswith("Node-copyfrom-rev: "):
                                    node.from_rev = StreamParser.sd_body(line)
                                elif line.startswith("Node-copyfrom-path: "):
                                    node.from_path = StreamParser.sd_body(line)
                                elif line.startswith("Text-copy-source-md5: "):
                                    node.from_hash = StreamParser.sd_body(line)
                                elif line.startswith("Text-content-md5: "):
                                    node.content_hash = StreamParser.sd_body(line)
                                elif line.startswith("Text-content-sha1: "):
                                    continue
                                elif line.startswith("Text-content-length: "):
                                    tlen = int(StreamParser.sd_body(line))
                                elif line.startswith("Prop-content-length: "):
                                    plen = int(StreamParser.sd_body(line))
                                elif line.startswith("Content-length: "):
                                    continue
                                else:
                                    if debug_enable(DEBUG_SVNPARSE):
                                        announce("node list parsing, line %d: uninterpreted line %s" % \
                                             (self.import_line, repr(line)))
                                    continue
                                # Node processing ends
                            # Node list parsing ends
                            self.revisions[revision] = StreamParser.RevisionRecord(nodes, props)
                            if debug_enable(DEBUG_SVNPARSE):
                                announce("revision parsing, line %d: ends" % \
                                         (self.import_line))
                            # End Revision processing
                    # End of Subversion dump parsing
                    self.svn_process(baton)
                    elapsed = time.time() - baton.time
                    baton.twirl("%d revisions (%d/s)" %
                                 (int(revision), int(int(revision)/elapsed)))
                else:
                    self.pushback(line)
                    # Beginning of fast-import stream parsing
                    while True:
                        line = self.readline()
                        if not line:
                            break
                        elif not line.strip():
                            continue
                        elif line.startswith("blob"):
                            blob = Blob(self.repo)
                            line = self.readline()
                            if line.startswith("mark"):
                                blob.mark = line[5:].strip()
                                wfp = open(blob.blobfile(), "w")
                                blobcontent = self.fi_read_data()
                                # Parse CVS and Subversion $-headers
                                # There'd better not be more than one of these.
                                for m in re.finditer(r"\$Id *:[^$]*\$",
                                                     blobcontent):
                                    fields = m.group(0).split()
                                    if len(fields) < 2:
                                        self.gripe("malformed $-cookie '%s'" % m.group(0))
                                    else:
                                        # Save file basename and CVS version
                                        if fields[1].endswith(",v"):
                                            # CVS revision
                                            blob.cookie = (fields[1][:-2], fields[2])
                                        else:
                                            # Subversion revision
                                            blob.cookie = fields[1]
                                for m in re.finditer(r"\$Revision *: *([^$]*)\$",
                                                     blobcontent):
                                    rev = m.group(0).strip()
                                    if '.' not in rev:
                                        # Subversion revision
                                        blob.cookie = rev
                                wfp.write(blobcontent)
                                wfp.close()
                            else:
                                self.error("missing mark after blob")
                            self.repo.addEvent(blob)
                            baton.twirl()
                        elif line.startswith("data"):
                            self.error("unexpected data object")
                        elif line.startswith("commit"):
                            baton.twirl()
                            commitbegin = self.import_line
                            commit = Commit(self.repo)
                            commit.branch = line.split()[1]
                            while True:
                                line = self.readline()
                                if not line:
                                    break
                                elif line.startswith("mark"):
                                    commit.mark = line[5:].strip()
                                elif line.startswith("author"):
                                    try:
                                        commit.authors.append(Attribution(line[7:]))
                                    except ValueError:
                                        self.error("malformed author line")
                                elif line.startswith("committer"):
                                    try:
                                        commit.committer = Attribution(line[10:])
                                    except ValueError:
                                        self.error("malformed committer line")
                                elif line.startswith("property"):
                                    fields = line.split(" ")
                                    if len(fields) < 3:
                                        self.error("malformed property line")
                                    elif len(fields) == 3:
                                        commit.properties[fields[1]] = True
                                    else:
                                        name = fields[1]
                                        length = int(fields[2])
                                        value = " ".join(fields[3:])
                                        if len(value) < length:
                                            value += fp.read(length-len(value))
                                            if fp.read(1) != '\n':
                                                self.error("trailing junk on property value")
                                        elif len(value) == length + 1:
                                            value = value[:-1] # Trim '\n'
                                        else:
                                            self.error("garbage length field on property line")
                                        commit.properties[name] = value
                                elif line.startswith("data"):
                                    commit.comment = self.fi_read_data(line)
                                elif line.startswith("from") or line.startswith("merge"):
                                    commit.addParent(line.split()[1])
                                # Handling of file ops begins.
                                elif line[0] in ("C", "D", "R"):
                                    commit.fileops.append(FileOp(self.repo.vcs).parse(line))
                                elif line == "deleteall\n":
                                    commit.fileops.append(FileOp(self.repo.vcs).parse("deleteall"))
                                elif line[0] == "M":
                                    fileop = FileOp(self.repo.vcs).parse(line)
                                    commit.fileops.append(fileop)
                                    if fileop.mode == "160000":
                                        # This is a submodule link.  The ref
                                        # field is a SHA1 hash and the path
                                        # is an external reference name.
                                        # Don't try to collect data, just pass
                                        # it through.
                                        self.warn("submodule link")
                                    else:
                                        # 100644, 100755, 120000.
                                        self.fi_parse_fileop(fileop)
                                elif line[0] == "N":
                                    fileop = FileOp(self.repo.vcs).parse(line)
                                    commit.fileops.append(fileop)
                                    self.fi_parse_fileop(fileop)
                                # Handling of file ops ends.
                                elif line.isspace():
                                    # This handles slightly broken
                                    # exporters like the bzr-fast-export
                                    # one that may tack an extra LF onto
                                    # the end of data objects.  With it,
                                    # we don't drop out of the
                                    # commit-processing loop until we see
                                    # a *nonblank* line that doesn't match
                                    # a commit subpart.
                                    continue
                                else:
                                    # Dodgy bzr autodetection hook...
                                    if not self.repo.vcs:
                                        if "branch-nick" in commit.properties:
                                            for vcs in vcstypes:
                                                if vcs.name == "bzr":
                                                    self.repo.vcs = vcs
                                                    break
                                    self.pushback(line)
                                    break
                            if not (commit.mark and commit.committer):
                                self.import_line = commitbegin
                                self.error("missing required fields in commit")
                            if commit.mark is None:
                                self.warn("unmarked commit")
                            self.repo.addEvent(commit)
                            baton.twirl()
                        elif line.startswith("reset"):
                            reset = Reset()
                            reset.ref = line[6:].strip()
                            line = self.readline()
                            if line.startswith("from"):
                                reset.committish = line[5:].strip()
                            else:
                                self.pushback(line)
                            self.repo.addEvent(reset)
                            baton.twirl()
                        elif line.startswith("tag"):
                            tagger = None
                            tagname = line[4:].strip()
                            line = self.readline()
                            if line.startswith("from"):
                                referent = line[5:].strip()
                            else:
                                self.error("missing from after tag")
                            line = self.readline()
                            if line.startswith("tagger"):
                                try:
                                    tagger = Attribution(line[7:])
                                except ValueError:
                                    self.error("malformed tagger line")
                            else:
                                self.warn("missing tagger after from in tag")
                                self.pushback(line)
                            self.repo.addEvent(Tag(self.repo, tagname,
                                                   referent, tagger,
                                                   self.fi_read_data()))
                            baton.twirl()
                        else:
                            # Simply pass through any line we don't understand.
                            self.repo.addEvent(Passthrough(line))
                    # End of fast-import parsing
                self.import_line = 0
            if self.warnings:
                for warning in self.warnings:
                    complain(warning)
        except KeyboardInterrupt:
            nuke(self.repo.subdir(), "reposurgeon: import interrupted, removing %s" % self.repo.subdir())
            raise KeyboardInterrupt
    #
    # The rendezvous between parsing and object building for import
    # streams is pretty trivial and best done inline in the parser
    # because reposurgeon's internal structures are designed to match
    # those entities. For Subversion dumpfiles, on the other hand,
    # there's a fair bit of impedance-matching required.  That happens
    # in the following functions.
    #
    @staticmethod
    def node_permissions(node):
        "Fileop permissions from node properties"
        if node.props:
            if "svn:executable" in node.props:
                return 0o100755
            elif "svn:special" in node.props:
                # Map to git symlink, which behaves the same way.
                # Blob contents is the path the link should resolve to. 
                return 0o120000
        return 0o100644
    def branchpath(self, path):
        "Strip the branch prefix from a path."
        if not self.branches or path.count(os.sep) == 0:
            return path
        for branch in self.branches:
            if path.startswith(branch):
                return path[len(branch):]
        raise Fatal("couldn't assign %s to a branch in %s" \
                    % (path, self.branches.keys()))
    def svn_process(self, baton):
        "Subversion actions to import-stream commits."
        def countfmt(count):
            return " %%%dd of %s" % (len(str(count)), count)
        # Find all copy sources and compute the set of branches
        if debug_enable(DEBUG_EXTRACT):
            announce("Pass 1")
        baton.twirl("copynodes")
        baton.startcounter(countfmt(len(self.revisions)))
        copynodes = []
        for revision in self.revisions:
            record = self.revisions[revision]
            for node in record.nodes:
                if node.from_path is not None:
                    copynodes.append(node)
                    if debug_enable(DEBUG_EXTRACT):
                        announce("copynode at %s" % node)
                if node.action == SD_ADD and node.kind == SD_DIR and not node.path+os.sep in self.branches and not global_options['svn_nobranch']:
                    for trial in global_options['svn_branchify']:
                        if '*' not in trial and trial == node.path:
                            self.branches[node.path+os.sep] = None
                        elif trial.endswith(os.sep + '*') \
                                 and os.path.dirname(trial) == os.path.dirname(node.path):
                            self.branches[node.path+os.sep] = None
                        elif trial == '*' and not node.path + os.sep + '*' in global_options['svn_branchify'] and node.path.count(os.sep) < 1:
                            self.branches[node.path+os.sep] = None
                    if node.path+os.sep in self.branches and debug_enable(DEBUG_TOPOLOGY):
                        announce("%s recognized as a branch" % node.path+os.sep)
            baton.bumpcounter()
            copynodes.sort(key=lambda n: n.from_rev)
        baton.endcounter()
        baton.twirl('+')
        # Build filemaps.
        if debug_enable(DEBUG_EXTRACT):
            announce("Pass 2")
        baton.twirl("filemaps")
        baton.startcounter(countfmt(len(self.revisions)))
        filemaps = {}
        filemap = PathMap()
        split_commits = []
        for revision in self.revisions:
            record = self.revisions[revision]
            for node in record.nodes:
                # Mutate the filemap according to copies
                if node.from_rev:
                    assert int(node.from_rev) < int(revision)
                    filemap.copy_from(node.path, filemaps[node.from_rev],
                                      node.from_path)
                    if debug_enable(DEBUG_FILEMAP):
                        announce("r%s~%s copied to %s" \
                                 % (node.from_rev, node.from_path, node.path))
                # Mutate the filemap according to adds/deletes/changes
                if node.action == SD_ADD and node.kind == SD_FILE:
                    filemap[node.path] = node
                    if debug_enable(DEBUG_FILEMAP):
                        announce("r%s~%s added" % (node.revision, node.path))
                elif node.action == SD_DELETE:
                    if node.kind == SD_NONE:
                        node.kind = SD_FILE if node.path in filemap else SD_DIR
                    # Snapshot the deleted paths before removing them.
                    node.from_set = PathMap()
                    node.from_set.copy_from(node.path, filemap, node.path)
                    del filemap[node.path]
                    if debug_enable(DEBUG_FILEMAP):
                        announce("r%s~%s deleted" \
                                 % (node.revision, node.path))
                elif node.action == SD_CHANGE and node.kind == SD_FILE:
                    filemap[node.path] = node
                    if debug_enable(DEBUG_FILEMAP):
                        announce("r%s~%s changed" % (node.revision, node.path))
            filemaps[revision] = filemap.snapshot()
            baton.bumpcounter()
        baton.endcounter()
        baton.twirl('+')
        # Blows up huge on large repos...
        #if debug_enable(DEBUG_FILEMAP):
        #    announce("filemaps %s" % filemaps)
        # Build from sets in each directory copy record.
        if debug_enable(DEBUG_EXTRACT):
            announce("Pass 3")
        baton.twirl("copysets")
        baton.startcounter(countfmt(len(copynodes)))
        for copynode in copynodes:
            if debug_enable(DEBUG_FILEMAP):
                announce("r%s copynode filemap is %s" \
                         % (copynode.from_rev, filemaps[copynode.from_rev]))
            copynode.from_set = PathMap()
            copynode.from_set.copy_from(copynode.from_path,
                                        filemaps[copynode.from_rev],
                                        copynode.from_path)
            # Sanity check: if the directory node has no from set, but
            # there are files underneath it, this means the directory
            # structure implied by the filemaps is not consistent with
            # what's in the parsed Subversion nodes.  This should never
            # happen.
            if not copynode.from_set:
                for path in filemaps[copynode.revision]:
                    if path.startswith(node.path + os.sep):
                        self.gripe("inconsistently empty from set for %s" % copynode)
                        break
            baton.bumpcounter()
        baton.endcounter()
        baton.twirl('+')
        # Build commits
        # This code can eat your processor, so we make it give up
        # its timeslice at reasonable intervals. Needed because
        # it doesn't hit the disk.
        if debug_enable(DEBUG_EXTRACT):
            announce("Pass 4")
        baton.twirl("commits")
        baton.startcounter(countfmt(len(self.revisions)))
        previous = None
        for revision in self.revisions:
            record = self.revisions[revision]
            if debug_enable(DEBUG_EXTRACT):
                announce("Revision %s:" % revision)
            for node in record.nodes:
                # if node.props is None, no property section.
                # if node.blob is None, no text section.
                try:
                    assert node.action in (SD_CHANGE, SD_ADD, SD_DELETE, SD_REPLACE)
                    assert node.blob is not None or \
                           node.props is not None or \
                           node.from_rev or \
                           node.action in (SD_ADD, SD_DELETE)
                    assert (node.from_rev is None) == (node.from_path is None)
                    assert node.kind in (SD_FILE, SD_DIR)
                    assert node.kind != SD_NONE or node.action == SD_DELETE
                    assert node.action in (SD_ADD, SD_REPLACE) or not node.from_rev
                except AssertionError:
                    raise Fatal("forbidden operation in dump stream at r%s: %s" \
                                % (revision, node))
            commit = Commit(self.repo)
            try:
                ad = record.props.pop("svn:date")
            except KeyError, key:
                self.error("missing required %s" % key)
            if "svn:author" in record.props:
                au = record.props.pop("svn:author")
            else:
                au = "no-author"
            if "svn:log" in record.props:
                commit.comment = record.props.pop("svn:log")
                if not commit.comment.endswith("\n"):
                    commit.comment += "\n"
            if global_options["svn_use_uuid"]:
                attribution = "%s <%s@%s> %s" % (au, au, self.repo.uuid, ad)
            else:
                attribution = "%s <%s> %s" % (au, au, ad)
            commit.committer = Attribution(attribution)
            commit.properties.update(record.props)
            # Zero revision is never interesting - no operations, no
            # comment, no author, it's just a start marker for a
            # non-incremental dump.
            if revision == "0": 
                continue
            expanded_nodes = []
            for (n, node) in enumerate(record.nodes):
                if debug_enable(DEBUG_EXTRACT):
                    announce("r%s:%d: %s" % (revision, n+1, node))
                elif node.kind == SD_DIR \
                         and node.action != SD_CHANGE \
                         and debug_enable(DEBUG_TOPOLOGY):
                    announce(str(node))
                # Handle per-path properties.
                if node.props is not None:
                    if "cvs2svn:cvs-rev" in node.props:
                        cvskey = "CVS:%s:%s" % (node.path,
                                                node.props["cvs2svn:cvs-rev"])
                        self.repo.fossil_map[cvskey] = commit
                        del node.props["cvs2svn:cvs-rev"]
                    latch = False
                    for (prop, val) in node.props.items():
                        if prop not in StreamParser.IgnoreProperties:
                            if not latch:
                                self.gripe("r%s~%s properties set:" \
                                           % (node.revision, node.path))
                                latch = True
                            self.gripe("\t%s = '%s'" % (prop, val))
                            self.has_properties.add(node.path)
                    if node.path in self.has_properties and not latch:
                        self.has_properties.discard(node.path)
                        announce("r%s~%s: properties cleared." \
                                 % (node.revision, node.path))
                if node.kind == SD_FILE:
                    expanded_nodes.append(node)
                elif node.kind == SD_DIR:
                    # os.sep is appended to avoid collisions with path
                    # prefixes.
                    node.path += os.sep                   
                    if node.from_path:
                        node.from_path += os.sep
                    if node.action == SD_ADD:
                        if node.path == "trunk" + os.sep and not node.props:
                            node.props = {"svn:ignore":
                                          StreamParser.SubversionDefaultIgnores}
                    elif node.action in (SD_DELETE, SD_REPLACE):
                        if node.path in self.branches:
                            self.branchdeletes.add(node.path)
                            expanded_nodes.append(node)
                        else:
                            # A delete or replace with no from set
                            # can occur if the directory is empty.
                            # We can just ignore this case.
                            if node.from_set is not None:
                                for child in node.from_set:
                                    if debug_enable(DEBUG_EXTRACT):
                                        announce("r%s: deleting %s" \
                                                 % (revision, child))
                                    newnode = StreamParser.NodeAction()
                                    newnode.path = child
                                    newnode.revision = revision
                                    newnode.action = SD_DELETE
                                    newnode.kind = SD_FILE
                                    newnode.generated = True
                                    expanded_nodes.append(newnode)
                                ignorepath = os.path.join(node.path, ".gitignore")
                                if ignorepath in self.active_gitignores:
                                    newnode = StreamParser.NodeAction()
                                    newnode.path = ignorepath
                                    newnode.revision = revision
                                    newnode.action = SD_DELETE
                                    newnode.kind = SD_FILE
                                    newnode.generated = True
                                    expanded_nodes.append(newnode)
                        # Property settings can be present on either
                    # SD_ADD or SD_CHANGE actions.
                    if node.props is not None:
                        if debug_enable(DEBUG_EXTRACT):
                            announce("r%s: setting properties %s on %s" \
                                     % (revision, node.props, node.path))
                        # svn:mergeinfo gets handled here
                        if 'svn:mergeinfo' in node.props:
                            val = node.props['svn:mergeinfo']
                            # Ignore an invalid empty value set (not
                            # no effect) by some tools.
                            if val:
                                # The general case is multiline; each line
                                # may describe multiple spans merging
                                # to this revision.  For each span, we
                                # link from the last revision.
                                for line in val.split('\n'):
                                    if line and ':' in line:
                                        (_, ranges) = line.split(":")
                                        for span in ranges.split(","):
                                            if '-' in span:
                                                (_, end) = span.split("-")
                                            else:
                                                end = span
                                            # Because mergeinfo properties will
                                            # persist like other properties,
                                            # we want to record the earliest
                                            # instance of a merge to a given
                                            # path from a given source revision
                                            # then ignore later ones.
                                            if (node.path, end) not in self.mergeinfo:
                                                self.mergeinfo[(node.path, end)] = revision
                                            if debug_enable(DEBUG_EXTRACT):
                                                announce("r%s: mergeinfo link from %s\n" \
                                                        % (revision, end))
                        # svn:ignore gets handled here,
                        if node.path == os.sep:
                            gitignore_path = ".gitignore"
                        else:
                            gitignore_path = os.path.join(node.path,
                                                          ".gitignore")
                        # There are no other directory properties that can
                        # turn into fileops.
                        if "svn:ignore" in node.props:
                            blob = Blob(self.repo)
                            with open(blob.blobfile(), "w") as wfp:
                                wfp.write(node.props["svn:ignore"])
                            newnode = StreamParser.NodeAction()
                            newnode.path = gitignore_path
                            newnode.revision = revision
                            newnode.action = SD_ADD
                            newnode.kind = SD_FILE
                            newnode.blob = blob
                            if debug_enable(DEBUG_IGNORES):
                                announce("r%s: queuing up %s generation with:\n%s." % (revision, newnode.path, node.props["svn:ignore"]))
                            # Must append rather than simply performing.
                            # Otherwise when the property is unset we
                            # won't have the right thing happen.
                            newnode.generated = True
                            expanded_nodes.append(newnode)
                            self.active_gitignores.add(gitignore_path)
                        elif gitignore_path in self.active_gitignores:
                            newnode = StreamParser.NodeAction()
                            newnode.path = gitignore_path
                            newnode.revision = revision
                            newnode.action = SD_DELETE
                            newnode.kind = SD_FILE
                            if debug_enable(DEBUG_IGNORES):
                                announce("r%s: queuing up %s deletion." % (revision, newnode.path))
                            newnode.generated = True
                            expanded_nodes.append(newnode)
                            self.active_gitignores.remove(gitignore_path)
                    # Handle directory copies.
                    # If this is a copy between branches, do nothing;
                    # no fileop should be issued until there is an
                    # actual file modification on the new branch.
                    # Exception: If the target branch has been deleted,
                    # perform a normal copy and interpret this as an
                    # ad-hoc branch merge.
                    if node.from_path:
                        branchcopy = node.from_path in self.branches \
                                         and node.path in self.branches \
                                         and node.path not in self.branchdeletes
                        if debug_enable(DEBUG_TOPOLOGY):
                            announce("r%s: directory copy to %s from r%s~%s (branchcopy %s)" \
                                     % (revision,
                                        node.path,
                                        node.from_rev,
                                        node.from_path,
                                        branchcopy))
                        if not branchcopy:
                            self.branchdeletes.discard(node.path)
                        else:
                            continue
                        for source in node.from_set:
                            lookback = filemaps[node.from_rev][source]
                            if lookback is None:
                                raise Fatal("r%s: can't find ancestor %s" \
                                         % (revision, source))
                            subnode = StreamParser.NodeAction()
                            subnode.path = os.path.join(node.path,
                                                        source[len(node.from_path+os.sep)-1:])
                            subnode.revision = revision
                            subnode.from_path = lookback.path
                            subnode.from_rev = lookback.revision
                            subnode.from_hash = lookback.content_hash
                            subnode.action = SD_ADD
                            subnode.kind = SD_FILE
                            if debug_enable(DEBUG_TOPOLOGY):
                                announce("r%s: generated copy r%s~%s -> %s" \
                                         % (revision,
                                            subnode.from_rev,
                                            subnode.from_path,
                                            subnode.path))
                            subnode.generated = True
                            expanded_nodes.append(subnode)
            # Lift .cvsignore files, which we can assume are fossils
            # from a bygone era and happen to have syntax upward-compatible
            # with that of .gitignore
            for node in expanded_nodes:
                if node.path.endswith(".cvsignore"):
                    node.path = node.path[:-len(".cvsignore")] + ".gitignore"
            # Create actions corresponding to both
            # parsed and generated nodes.
            actions = []
            for (n, node) in enumerate(expanded_nodes):
                if node.kind == SD_FILE:
                    if node.action == SD_DELETE:
                        assert node.blob is None
                        fileop = FileOp()
                        fileop.construct("D", node.path)
                        actions.append((node, fileop))
                    elif node.action in (SD_ADD, SD_CHANGE, SD_REPLACE):
                        # Try to figure out who the ancestor of
                        # this node is.
                        if node.from_hash:
                            ancestor = self.hashmap[node.from_hash]
                            node.blobmark = ancestor.blobmark
                        elif node.from_path:
                            # A copy node is somehow missing its hash.
                            ancestor = filemaps[node.from_rev][node.from_path]
                        elif node.action != SD_ADD:
                            # Ordinary inheritance, no node copy.  For
                            # robustness, we don't assume revisions are
                            # consecutive numbers.
                            ancestor = filemaps[previous][node.path]
                        else:
                            ancestor = None
                        # Time for fileop generation
                        if node.blob is not None:
                            if node.content_hash in self.hashmap:
                                # Blob matches an existing one -
                                # node was created by a
                                # non-Subversion copy followed by
                                # add.  Get the ancestry right,
                                # otherwise parent pointers won't
                                # be computed properly.
                                ancestor = self.hashmap[node.content_hash]
                                node.from_path = ancestor.from_path
                                node.from_rev = ancestor.from_rev
                                node.blobmark = ancestor.blobmark
                            else:
                                # An entirely new blob
                                node.blobmark = node.blob.mark = self.__newmark()
                                self.repo.addEvent(node.blob)
                                # Blobs generated by reposurgeon
                                # (e.g .gitignore content) have no
                                # content hash.  Don't record
                                # them, otherwise they'll all
                                # collide :-)
                                if node.content_hash:
                                    self.hashmap[node.content_hash] = node
                        elif ancestor:
                            node.blobmark = ancestor.blobmark
                        else:
                            # No ancestor, no blob. Has to be a
                            # pure property change.  There's no
                            # way to figure out what mark to use
                            # in a fileop.
                            self.gripe("r%s~%s: permission information may be lost." \
                                          % (node.revision, node.path))
                            continue
                        assert node.blobmark
                        # Time for fileop generation
                        if ancestor:
                            perms = oldperms = self.permissions.get(ancestor.path,
                                                                    0o100644)
                        else:
                            perms = oldperms = 0o100644
                        if node.props is not None:
                            perms = self.node_permissions(node)
                        # This ugly nasty guard is critically important.
                        # We need to generate a modify if:
                        # 1. There is new content.
                        # 2. This node was generated as an
                        # expansion of a directory copy.
                        # 3. The node was produced by an explicit
                        # Subversion file copy (not a directory copy)
                        # in which case it has an MD5 hash that points
                        # back to a source.
                        # 4. The permissions for this path have changed;
                        # we need to generate a modify with an old mark
                        # but new permissions.
                        new_content = (node.blob is not None)
                        generated_file_copy = node.generated
                        subversion_file_copy = (node.from_hash is not None)
                        permissions_changed = (perms != oldperms)
                        if (new_content or
                            generated_file_copy or
                            subversion_file_copy or
                            permissions_changed):
                            assert perms
                            fileop = FileOp()
                            fileop.construct("M",
                                             perms,
                                             node.blobmark,
                                             node.path)
                            actions.append((node, fileop))
                        elif debug_enable(DEBUG_EXTRACT):
                            announce("r%s~%s: unmodified" % (node.revision, node.path))
                        self.permissions[node.path] = perms
                # These are directory actions.
                elif node.action in (SD_DELETE, SD_REPLACE):
                    if debug_enable(DEBUG_EXTRACT):
                        announce("r%s: deleteall %s" % (revision,node.path))
                    fileop = FileOp()
                    fileop.construct("deleteall", node.path[:-1])
                    actions.append((node, fileop))
            # Time to generate commits from actions and fileops.
            if debug_enable(DEBUG_EXTRACT):
                announce("r%s: %d actions" % (revision, len(actions)))
            # First, break the file operations into branch cliques
            cliques = {}
            for (node, fileop) in actions:
                for branch in self.branches:
                    if node.path.startswith(branch):
                        if branch not in cliques:
                            cliques[branch] = []
                        cliques[branch].append((node, fileop))
                        break
                else:
                    if "" not in cliques:
                        cliques[""] = []
                    cliques[""].append((node, fileop))
            # Make an operation list from the cliques.
            oplist = []
            for (branch, actions) in cliques.items():
                oplist.append((branch, [action[1] for action in actions]))
            # Figure out which branch cliques contain only branch deletes
            deletealls = set([])
            for (branch, ops) in oplist:
                if len(ops) == 1 and ops[0].op == "deleteall":
                    deletealls.add(branch)
            # The commit is truly mixed if there is more than one clique
            # not consisting entirely of deleteall operations.  Sort the
            # oplist so all non-deleteall cliques are at the front.
            mixed = len(oplist) - len(deletealls) > 1
            oplist.sort(key=lambda b: not b[0] in deletealls)
            newcommits = []
            # In the ordinary case, we can assign all non-deleteall commits
            # to the base commit.
            commit.fossil_id = revision
            if mixed:
                split_commits.append(revision)
            else:
                self.repo.fossil_map["SVN:%s" % commit.fossil_id] = commit
                if oplist:
                    (commit.common, commit.fileops) = oplist.pop(0)
                    commit._pathset = None
                else:
                    commit.common = os.path.commonprefix([node.path for node in record.nodes])
                commit.mark = self.__newmark()
                if debug_enable(DEBUG_EXTRACT):
                    announce("r%s gets mark %s" % (revision, commit.mark))
                newcommits.append(commit)
            # If the commit is mixed, or there are deletealls left over,
            # handle that.
            oplist.sort(key=lambda b: b[0])
            for (i, (branch, fileops)) in enumerate(oplist):
                split = copy.copy(commit)
                split.common = branch
                # Sequence numbers for split commits are 1-origin
                split.fossil_id += StreamParser.SplitSep + str(i + 1)
                self.repo.fossil_map["SVN:%s" % split.fossil_id] = split
                split.comment += "\n[[Split portion of a mixed commit.]]\n"
                split.mark = self.__newmark()
                split.fileops = fileops
                split._pathset = None
                newcommits.append(split)
            # Deduce links between branches on the basis of copies. This
            # is tricky because a revision can be the target of multiple
            # copies.  Humans don't abuse this because tracking multiple
            # copies is too hard to do in a slow organic brain, but tools
            # like cvs2svn can generate large sets of them. cvs2svn seems
            # to try to copy each file and directory from the commit
            # corresponding to the CVS revision where the file was last
            # changed before the copy, which may be substantially earlier
            # than the CVS revision corresponding to the
            # copy). Fortunately, we can resolve such sets by the simple
            # expedient of picking the *latest* revision in them!
            for newcommit in newcommits:
                newcommit.fileops.sort(cmp=FileOp.compare)
                if commit.mark not in self.branchlink:
                    copies = [node for node in record.nodes \
                              if node.from_rev is not None \
                              and node.path.startswith(newcommit.common)]
                    if copies and debug_enable(DEBUG_TOPOLOGY):
                        announce("r%s: copy operations %s" %
                                     (newcommit.fossil_id, copies))
                    linkback = False
                    # If the copies include one for the directory, we're good.
                    if [node for node in copies if node.kind == SD_DIR and node.from_path and node.path == newcommit.common]:
                        self.directory_branchlinks.add(newcommit.common)
                        if debug_enable(DEBUG_TOPOLOGY):
                            announce("r%s: directory copy with %s" \
                                     % (newcommit.fossil_id, copies))
                        linkback = True
                    # Use may have botched a branch creation by doing a
                    # non-Subversion directory copy followed by a bunch of
                    # Subversion adds. Blob hashes will match existing files,
                    # but from_rev and from_path won't be set at parse time.
                    # Our code detects this case and makes file
                    # backlinks, but can't deduce the directory copy.
                    # Thus, we have to treat multiple file copies as
                    # an instruction to create a gitspace branch.
                    #
                    # This guard filters out copy op sets that are
                    # *single* file copies. We're making an assumption
                    # here that multiple file copies should always
                    # trigger a branch link creation.  This assumption
                    # could be wrong, which is why we emit a warning
                    # message later on for branch links detected this
                    # way
                    #
                    # Even with this filter you'll tend to end up with lots
                    # of little merge bubbles with no commits on one side;
                    # these have to be removed by a debubbling pass later.
                    # I don't know what generates these things - cvs2svn, maybe.
                    #
                    # The second conjunct of this guard filters out the case
                    # where the user actually did do a previous Subversion file
                    # copy to start the branch, in which case we want to link
                    # through that.
                    elif len(copies) > 1 \
                             and newcommit.common not in self.directory_branchlinks:
                        self.fileop_branchlinks.add(newcommit.common)
                        if debug_enable(DEBUG_TOPOLOGY):
                            announce("r%s: making branch link %s" %
                                     (newcommit.fossil_id, newcommit.common))
                        linkback = True
                    if linkback:
                        copies.sort(key=lambda node: int(node.from_rev))
                        latest = copies[-1]
                        threshold = False
                        commits = self.repo.commits()
                        ncommits = len(commits)
                        for i in xrange(ncommits):
                            prev = commits[ncommits - i - 1]
                            if prev.fossil_id == latest.from_rev:
                                if debug_enable(DEBUG_TOPOLOGY):
                                    announce("r%s: found %s looking for %s" \
                                             % (newcommit.fossil_id, latest, newcommit.common))
                                threshold = True
                            if threshold:
                                if debug_enable(DEBUG_TOPOLOGY):
                                    announce("r%s: looking at r%s" \
                                             % (newcommit.fossil_id, prev.fossil_id))
                                if latest.from_path.startswith(prev.common) or global_options["svn_nobranch"]:
                                    self.branchlink[newcommit.mark] = prev.mark
                                    if debug_enable(DEBUG_TOPOLOGY):
                                        announce("r%s: link %s (%s) back to %s (%s, %s)" % \
                                                 (newcommit.fossil_id,
                                                  newcommit.mark,
                                                  newcommit.common,
                                                  latest.from_rev,
                                                  prev.mark,
                                                  prev.common
                                                  ))
                                    break
                        else:
                            if debug_enable(DEBUG_TOPOLOGY):
                                complain("lookback for %s failed" % latest)
                            raise Fatal("couldn't find a branch root for the copy of %s at r%s." % (latest.path, latest.revision))
            # We're done, add all the new commits 
            self.repo.events += newcommits
            self.repo.declare_sequence_mutation()
            # Report progress, and give up our scheduler slot
            # so as not to eat the processor.
            baton.bumpcounter()
            time.sleep(0)
            previous = revision
        # Warn about dubious branch links
        self.fileop_branchlinks.discard("trunk" + os.sep)
        if self.fileop_branchlinks - self.directory_branchlinks:
            self.gripe("branch links detected by file ops only: %s" % " ".join(self.fileop_branchlinks - self.directory_branchlinks))
        baton.endcounter()
        if debug_enable(DEBUG_EXTRACT):
            announce("at post-parsing time:")
            for commit in self.repo.commits():
                msg = commit.comment
                if msg == None:
                    msg = ""
                announce("r%-4s %4s %2d %2d '%s'" % \
                         (commit.fossil_id, commit.mark,
                          len(commit.fileops),
                          len(commit.properties),
                          msg.strip()[:20]))
        baton.twirl("+")
        baton.twirl("branches")
        baton.startcounter(countfmt(len(self.repo.commits())))
        # First, turn the root commit into a tag
        initial = self.repo.commits()[0]
        if not initial.fileops:
            if len(self.repo.commits()) >= 2:
                self.repo.tagify(initial,
                                 "root",
                                 self.repo.commits()[1].mark,
                                 "[[Tag from root commit at Subversion r%s]]\n" % initial.fossil_id)
            else:
                self.gripe("could not tagify root commit.")
        # Now, branch analysis.
        if not self.branches or global_options['svn_nobranch']:
            lastmark = None
            for commit in self.repo.commits():
                commit.branch = os.path.join("refs", "heads", "master") + os.sep
                if lastmark:
                    commit.setParents([lastmark])
                lastmark = commit.mark
                baton.bumpcounter()
        else:
            # Instead, determine a branch for each commit...
            if debug_enable(DEBUG_EXTRACT):
                announce("Branches: %s" % (self.branches,))
            for commit in self.repo.commits():
                for branch in self.branches:
                    if commit.common.startswith(branch):
                        commit.branch = branch
                        for fileop in commit.fileops:
                            if fileop.op in ("M", "D"):
                                fileop.path = fileop.path[len(branch):]
                            elif fileop.op in ("R", "C"):
                                fileop.source = fileop.source[len(branch):]
                                fileop.target = fileop.target[len(branch):]
                        commit._pathset = None
                        break
                else:
                    commit.branch = "root"
                    self.branches["root"] = None
                baton.bumpcounter()
            baton.endcounter()
            baton.twirl("+")
            # ...then rebuild parent links so they follow the branches
            branchroots = []
            baton.twirl("parents")
            baton.startcounter(countfmt(len(self.repo.commits())))
            for commit in self.repo.commits():
                if self.branches[commit.branch] is None:
                    branchroots.append(commit)
                    commit.setParents([])
                else:
                    commit.setParents([self.branches[commit.branch]])
                self.branches[commit.branch] = commit.mark
                baton.bumpcounter()
            baton.endcounter()
            baton.twirl("+")
            baton.twirl("root")
            # The root branch is special. It wasn't made by a copy, so
            # we didn't get the information to connect it to trunk in the
            # last phase.
            if "root" in self.branches:
                for commit in self.repo.commits():
                    if commit.branch == "root":
                        break
                self.branchlink[commit.mark] = self.repo.commits()[0].mark
            # Add links due to Subversion copy operations
            if debug_enable(DEBUG_EXTRACT):
                announce("branch roots: %s, links %s" % ([c.mark for c in branchroots], self.branchlink))
            baton.twirl("+")
            baton.twirl("branchlinks")
            for (later, earlier) in self.branchlink.items():
                child = self.repo.objfind(later)
                if earlier not in child.parentMarks():
                    child.addParent(earlier)
            nonempty = set([c for c in self.repo.commits() if c.fileops]) 
            for root in branchroots:
                rootrev = root.fossil_id
                if commit.branch in nonempty and root.branch != ("trunk" + os.sep):
                    self.gripe("r%s: can't connect nonempty branch %s to origin" \
                                % (rootrev, root.branch))
                baton.twirl()
            baton.twirl("+")
            # Add links due to svn:mergeinfo properties
            baton.twirl("svn-mergeinfo")
            for ((_, early), late) in self.mergeinfo.items():
                if early in split_commits:
                    self.gripe("cannot resolve mergeinfo from split commit %s to %s." % (early, late))
                elif late in split_commits:
                    self.gripe("cannot resolve mergeinfo from %s to split commit %s." % (early, late))
                else:
                    late_commit = self.repo.fossil_map["SVN:%s" % late]
                    early_commit = self.repo.fossil_map["SVN:%s" % early]
                    if early_commit.mark not in late_commit.parentMarks():
                        late_commit.addParent(early_commit.mark)
            baton.twirl("+")
            if debug_enable(DEBUG_EXTRACT):
                announce("after branch analysis")
                for commit in self.repo.commits():
                    parents = commit.parents()
                    if len(parents):
                        ancestor = parents[0]
                    else:
                        ancestor = '-'
                    announce("r%-4s %4s %4s %2d %2d '%s'" % \
                             (commit.fossil_id,
                              commit.mark, ancestor,
                              len(commit.fileops),
                              len(commit.properties),
                              commit.branch))
            baton.twirl("tagifying")
            baton.startcounter(countfmt(len(self.repo.commits())))
            # Tagify normal branch-root commits, they don't carry any
            # information other than their metadata. The exceptions
            # are trunk and root (if the later exists); neither is
            # the result of a normal copy operation.
            for commit in self.repo.commits():
                if commit in branchroots \
                       and commit.branch != ("trunk"+os.sep) \
                       and commit.branch != "root" \
                       and commit.hasParents() \
                       and not commit.fileops:
                    tagname = os.path.basename(commit.branch[:-1])
                    if "tags" not in commit.branch:
                        tagname += "-root"
                    self.repo.tagify(commit,
                                     tagname,
                                     commit.parentMarks()[0])
                baton.bumpcounter()
            baton.endcounter()
        baton.twirl("+")
        # Code controlled by svn_nobranch option ends.
        baton.twirl("tagify-empty")
        baton.startcounter(countfmt(len(self.repo.commits())))
        for commit in self.repo.commits():
            # Now we need to tagify all other commits without fileops, because
            # git is going to just discard them when we build a live repo and
            # they might possibly contain interesting metadata.  Usually they're
            # just debris from tagging, though.
            if not commit.fileops:
                if commit.hasParents():
                    legend = "[[Tag from zero-fileop commit at Subversion r%s" \
                             % commit.fossil_id
                    if self.revisions[commit.fossil_id].nodes:
                        legend += ":\n"
                        for node in self.revisions[commit.fossil_id].nodes:
                            legend += str(node) + "\n"
                    legend += "]]\n"
                    self.repo.tagify(commit,
                                     "emptycommit-%s" % commit.fossil_id,
                                     commit.parentMarks()[0],
                                     legend)
                else:
                    self.gripe("r%s: deleting parentless zero-op commit." \
                               % commit.fossil_id)
                    self.repo.quiet_delete(commit)
            # Also, tagify tip commits that consist only of deletes.
            # The fileops aren't worth saving; the comment metadata
            # just might be.
            elif commit.alldeletes(killset={"deleteall"}) \
                     and not commit.children():
                if commit.hasParents():
                    if commit.branch.endswith(os.sep):
                        commit.branch = commit.branch[:-1]
                    label = os.path.basename(commit.branch)
                    commit.fileops = []
                    self.repo.tagify(commit,
                                     "tipdelete-%s" % label,
                                     commit.parentMarks()[0])
                else:
                    self.gripe("r%s: deleting parentless tip delete of %s" \
                               % (commit.fossil_id, commit.branch))
                    self.repo.quiet_delete(commit)
            baton.bumpcounter()
        baton.endcounter()
        baton.twirl("+")
        baton.twirl("polishing")
        # Now pretty up the branch names
        baton.startcounter(countfmt(len(self.repo.commits())))
        for commit in self.repo.commits():
            if commit.branch == "root":
                commit.branch = os.path.join("refs", "heads", "root")
            elif commit.branch.startswith("tags" + os.sep):
                if commit.branch.endswith(os.sep):
                    commit.branch = commit.branch[:-1]
            elif commit.branch == "trunk" + os.sep:
                commit.branch = os.path.join("refs", "heads", "master")
            else:
                commit.branch = os.path.join("refs", "heads",
                                                 os.path.basename(commit.branch[:-1]))
            baton.bumpcounter()
        baton.endcounter()
        baton.twirl("+")
        # cvs2svn likes to crap out sequences of deletes followed by
        # filecopies on the same node when it's generating tag commits.
        # These are lots of examples of this in the nut.svn test load.
        # These show up as redundant (D, M) fileop pairs.
        baton.twirl("canonicalizing")
        baton.startcounter(countfmt(len(self.repo.commits())))
        for commit in self.repo.commits():
            if [fileop for fileop in commit.fileops if fileop is None]:
                raise Fatal("Null fileop at r%s" % commit.fossil_id)
            for i in range(len(commit.fileops)-1):
                if commit.fileops[i].op == 'D' and commit.fileops[i+1].op == 'M':
                    if commit.fileops[i].path == commit.fileops[i+1].path:
                        commit.fileops[i].op = None
            commit.fileops = [fileop for fileop in commit.fileops if fileop.op is not None]
            baton.bumpcounter()
        baton.endcounter()
        baton.twirl("+")
        # Issue resets when required
        baton.twirl("resets")
        baton.startcounter(countfmt(len(self.repo.commits())))
        save_events = self.repo.events
        self.repo.events = []
        self.repo.declare_sequence_mutation()
        issued = set([])
        for event in save_events:
            if isinstance(event, Commit) and event.branch not in issued:
                reset = Reset()
                reset.ref = event.branch
                self.repo.addEvent(reset)
                issued.add(event.branch)
            self.repo.addEvent(event)
            baton.bumpcounter()
        baton.twirl("+")
        # Remove spurious parent links caused by random cvs2svn file copies.
        baton.twirl("debubbling")
        baton.startcounter(countfmt(len(self.repo.commits())))
        for commit in self.repo.commits():
            if len(commit.parentMarks()) == 2:
                parents = commit.parents()
                if len(parents) != 2:
                    self.gripe("r%s: duplicate parent marks" % commit.fossil_id)
                    continue
                (a, b) = parents
                if a.branch != commit.branch or b.branch != commit.branch:
                    continue
                if b.committer.date < a.committer.date:
                    (a, b) = (b, a)
                if b.descended_from(a):
                    commit.removeParent(a.mark)
            baton.bumpcounter()
        baton.endcounter()
        baton.twirl("+")
        baton.twirl("renumbering")
        self.repo.renumber(baton=baton)
        baton.twirl("+")
        self.repo.write_fossils = True
        # Look for tag and branch merges that mean we may want to undo a
        # tag or branch creation
        dubious = [commit for commit in self.repo.commits() if \
                   commit.fileops and commit.fileops[0].op == 'deleteall' \
                   and commit.children()]
        for commit in dubious:
            self.gripe("mid-branch deleteall at <%s>." % commit.fossil_id)

class SubversionDumper:
    "Respository to Subversion stream dump."
    def __init__(self, repo):
        self.repo = repo
        self.pathmap = {}
        self.mark_to_revision = {}
        self.branches_created = []
        self.tag_latch = False
    class FlowState:
        def __init__(self, rev, props=None):
            self.rev = rev
            self.props = props or {}
            self.is_directory = False
            self.subfiles = 0
    @staticmethod
    def svnprops(pdict):
        keys = pdict.keys()
        keys.sort()
        flattened = ""
        for key in keys:
            val = pdict[key]
            if val:
                flattened += "K %d\n%s\nV %d\n%s\n" \
                             % (len(key), key, len(val), val)
        return flattened
    @staticmethod
    def dump_revprops(fp, revision, date, author=None, log=None, parents=None):
        "Emit a Revision-number record describing unversioned properties."
        fp.write("Revision-number: %d\n" % revision)
        revprops = ""
        revprops += SubversionDumper.svnprops({"svn:log": log})
        revprops += SubversionDumper.svnprops({"svn:author": author})
        # Ugh.  Subversion apparently insists on those decimal places
        revprops += SubversionDumper.svnprops({"svn:date": date.rfc3339()[:-1]+".000000Z"})
        # Hack merge links into mergeinfo properties.  This is a kluge
        # - the Subversion model is really like cherrypicking rather
        # than branch merging - but it's better than nothing, and
        # should at least round-trip with the logic in the Subversion
        # dump parser.
        parents = parents or []
        if parents[1:]:
            ancestral = parents[1:]
            ancestral.sort()
            ancestral = ".".join(str(x) for x in ancestral)
            revprops += SubversionDumper.svnprops({"svn:mergeinfo": ancestral})
        revprops += "PROPS-END\n"
        fp.write("Prop-content-length: %d\n" % len(revprops))
        fp.write("Content-length: %d\n\n" % len(revprops))
        fp.write(revprops + "\n")
    @staticmethod
    def dump_node(fp, path, kind, action, content="",
                  from_rev=None, from_path=None,
                  props=None):
        "Emit a Node record describing versioned properties and content."
        fp.write("Node-path: %s\n" % path)
        fp.write("Node-kind: %s\n" % kind)
        fp.write("Node-action: %s\n" % action)
        if from_rev:
            fp.write("Node-copyfrom-rev: %s\n" % from_rev)
        if from_path:
            fp.write("Node-copyfrom-path: %s\n" % from_path)
        nodeprops = SubversionDumper.svnprops(props or {}) + "PROPS-END\n"
        fp.write("Prop-content-length: %d\n" % len(nodeprops))
        if content:
            fp.write("Text-content-length: %d\n" % len(content))
            # Checksum validation in svnload works if we do sha1 but
            # not if we try md5.  It's unknown why - possibly svn load
            # is simply ignoring sha1.
            #fp.write("Text-content-md5: %s\n" % hashlib.md5(content).hexdigest())
            fp.write("Text-content-sha1: %s\n" % hashlib.sha1(content).hexdigest())
        fp.write("Content-length: %d\n\n" % (len(nodeprops) + len(content)))
        fp.write(nodeprops + "\n")            
        if content:
            fp.write(content)
        fp.write("\n")
    @staticmethod
    def svnbranch(branch):
        "The branch directory corresponding to a specified git branch."
        segments = branch.split(os.sep)
        assert segments[0] == "refs"
        if tuple(segments) == ("refs", "heads", "master"):
            return "trunk"
        if segments[1] not in ("tags", "heads") or len(segments) != 3:
            raise Recoverable("%s can't be mapped to Subversion." % branch)
        svnbase = segments[2]
        if svnbase.endswith("trunk"):
            svnbase += "-git"
        if segments[1] == "tags":
            return os.path.join("tags", svnbase)
        else:
            return os.path.join("branches", svnbase)
    @staticmethod
    def svnize(branch, path=""):
        "Return SVN path corresponding to a specified gitspace branch and path."
        return os.path.join(SubversionDumper.svnbranch(branch), path)
    def filedelete(self, fp, branch, path):
        "Emit the dump-stream records required to delete a file."
        if debug_enable(DEBUG_SVNDUMP):
            announce("filedelete%s" % repr((branch, path)))
        svnpath = SubversionDumper.svnize(branch, path)
        fp.write("Node-path: %s\n" % svnpath)
        fp.write("Node-action: delete\n\n\n")
        del self.pathmap[svnpath]
        while True:
            svnpath = os.path.dirname(svnpath)
            # The second disjunct in this guard is a
            # spasmodic twitch in the direction of
            # respecting Subversion's notion of a "flow".
            # We refrain from deleting branch directories
            # so they'll have just one flow throughout the
            # life of the repository.
            if not svnpath or svnpath in self.branches_created:
                break
            self.pathmap[svnpath].subfiles -= 1
            if self.pathmap[svnpath].subfiles == 0:
                fp.write("Node-path: %s\n" % svnpath)
                fp.write("Node-action: delete\n\n\n")
                del self.pathmap[svnpath]
    def directory_create(self, fp, revision, branch, path, parents=None):
        if debug_enable(DEBUG_SVNDUMP):
            announce("directory_create%s" % repr((revision, branch, path)))
        creations = []
        # Branch creation may be required
        svnout = SubversionDumper.svnbranch(branch)
        if svnout not in self.branches_created:
            if not svnout.startswith("tags") and "branches" not in self.branches_created:
                self.branches_created.append("branches")
                creations.append(("branches", None, None))
            self.branches_created.append(svnout)
            if parents:
                from_rev = self.mark_to_revision[parents[0].mark],
                from_branch = SubversionDumper.svnbranch(parents[0].branch)
                creations.append((svnout, from_rev, from_branch))
                for key in self.pathmap.keys():
                    if key.startswith(from_branch + os.sep) and key != from_branch:
                        counterpart = svnout + key[len(from_branch):]
                        self.pathmap[counterpart] = SubversionDumper.FlowState(revision)
            else:
                creations.append((svnout, None, None))
        # Create all directory segments required
        # to get down to the level where we can
        # create the file.
        parts = os.path.dirname(path).split(os.sep)
        if parts[0]:
            parents = [os.sep.join(parts[:i+1])
                                   for i in range(len(parts))]
            for parentdir in parents:
                if parentdir not in self.pathmap:
                    fullpath = os.path.join(svnout, parentdir)
                    creations.append((fullpath, None, None))
        for (path, from_rev, from_path) in creations:
            SubversionDumper.dump_node(fp,
                                       path=path,
                                       kind="dir",
                                       action="add",
                                       from_rev=from_rev,
                                       from_path=from_path)
            self.pathmap[path] = SubversionDumper.FlowState(revision)
            self.pathmap[path].is_directory = True
            self.pathmap[path].subfiles += 1
    def filemodify(self, fp, revision, branch, mode, ref, path, parents):
        "Emit the dump-stream records required to add or modify a file."
        if debug_enable(DEBUG_SVNDUMP):
            announce("filemodify%s" % repr((revision, branch, mode, ref, path,
                                            [event.mark for event in parents])))
        # Branch and directory creation may be required.
        # This has to be called early so copy can update the filemap.
        self.directory_create(fp, revision, branch, path, parents)
        svnpath = SubversionDumper.svnize(branch, path)
        if svnpath in self.pathmap:
            svnop = "change"
            self.pathmap[svnpath].rev = revision
        else:
            svnop = "add"
            self.pathmap[svnpath] = SubversionDumper.FlowState(revision)
        if debug_enable(DEBUG_SVNDUMP):
            announce("Generating %s %s" % (svnpath, svnop))
        with open(self.repo.objfind(ref).blobfile()) as dp:
            content = dp.read()
        changeprops = None
        if svnpath in self.pathmap:
            if mode == '100755':
                if "svn:executable" not in self.pathmap[svnpath].props:
                    self.pathmap[svnpath].props["svn:executable"] = "true"
                    changeprops = self.pathmap[svnpath].props
            elif mode == '100644':
                if "svn:executable" in self.pathmap[svnpath].props:
                    self.pathmap[svnpath].props["svn:executable"] = "false"
                    changeprops = self.pathmap[svnpath].props
        # The actual content
        SubversionDumper.dump_node(fp,
                  path=svnpath,
                  kind="file",
                  action=svnop,
                  props=changeprops,
                  content=content)
    def filecopy(self, fp, revision, branch, source, target):
        if debug_enable(DEBUG_SVNDUMP):
            announce("filecopy%s" % repr((revision, branch, source, target)))
        svnsource = SubversionDumper.svnize(branch, source)
        try:
            flow = self.pathmap[svnsource]
        except:
            raise Fatal("couldn't retrieve flow information for %s" % source)
        self.directory_create(fp, revision, branch, target)
        svntarget = SubversionDumper.svnize(branch, target)
        self.pathmap[svntarget] = self.pathmap[svnsource]
        SubversionDumper.dump_node(fp,
                                   path=svntarget,
                                   kind="file",
                                   action="add",
                                   from_path=svnsource,
                                   from_rev=flow.rev)
    def make_tag(self, fp, revision, branch, name, log, author):
        if debug_enable(DEBUG_SVNDUMP):
            announce("make_tag%s" % repr((revision, branch, name, log, str(author))))
        svnsource = SubversionDumper.svnize(branch)
        svntarget = os.path.join("tags", name)
        SubversionDumper.dump_revprops(fp, revision,
                                       log=log,
                                       author=author.email.split("@")[0],
                                       date=author.date)
        if not self.tag_latch:
            self.tag_latch = True
            SubversionDumper.dump_node(fp,
                                   path="tags",
                                   kind="dir",
                                   action="add")
        SubversionDumper.dump_node(fp,
                                   path=svntarget,
                                   kind="dir",
                                   action="add",
                                   from_path=svnsource,
                                   from_rev=revision-1)
    def dump(self, selection, fp, progress=False):
        "Export the repository as a Subversion dumpfile."
        self.tag_latch = False
        tags = [event for event in self.repo.events if isinstance(event, Tag)]
        with Baton("reposurgeon: dumping", enable=progress) as baton:
            try:
                fp.write("SVN-fs-dump-format-version: 2\n\n")
                fp.write("UUID: %s\n\n" % (self.repo.uuid or uuid.uuid4()))
                SubversionDumper.dump_revprops(fp,
                                               revision=0,
                                               date=Date(rfc3339(time.time()))) 
                baton.twirl()
                revision = 0
                for i in selection:
                    event = self.repo.events[i]
                    # Passthroughs are lost; there are no equivalents
                    # in Subversion's ontology.
                    if not isinstance(event, Commit):
                        continue
                    revision += 1
                    self.mark_to_revision[event.mark] = revision
                    # We must treat the gitspace committer attribute
                    # as the author: gitspace author information is
                    # lost.  So is everything but the local part of
                    # the committer name.
                    backlinks = [self.mark_to_revision[mark]
                                 for mark in event.parentMarks()]
                    SubversionDumper.dump_revprops(fp, revision,
                                                   log=event.comment,
                                                   author=event.committer.email.split("@")[0],
                                                   date=event.committer.date,
                                                   parents=backlinks)
                    for fileop in event.fileops:
                        if fileop.op == "D":
                            if fileop.path.endswith(".gitignore"):
                                svnpath = SubversionDumper.svnize(event.head(), fileop.path)
                                self.pathmap[svnpath].props["svn:ignore"] = ""
                                SubversionDumper.dump_node(fp,
                                          path=os.path.dirname(svnpath),
                                          kind="dir",
                                          action="change",
                                          props = self.pathmap[svnpath].props)
                            else:
                                self.filedelete(fp, event.head(), fileop.path)
                        elif fileop.op == "M":
                            if fileop.path.endswith(".gitignore"):
                                svnpath = SubversionDumper.svnize(event.head(),
                                                                  os.path.dirname(fileop.path))
                                blob = self.repo.objfind(fileop.ref)
                                if svnpath not in self.pathmap:
                                    self.pathmap[svnpath] = SubversionDumper.FlowState(revision)
                                self.pathmap[svnpath].props["svn:ignore"] = blob.content()
                                SubversionDumper.dump_node(fp,
                                          path=os.path.dirname(svnpath),
                                          kind="dir",
                                          action="change",
                                          props = self.pathmap[svnpath].props)
                            else:
                                self.filemodify(fp,
                                                revision,
                                                event.head(),
                                                fileop.mode,
                                                fileop.ref,
                                                fileop.path,
                                                event.parents())
                        elif fileop.op == "R":
                            self.filecopy(fp,
                                          revision,
                                          event.head(),
                                          fileop.source,
                                          fileop.target)
                            self.filedelete(fp, event.branch, fileop.source)
                        elif fileop.op == "C":
                            self.filecopy(fp,
                                          revision,
                                          event.head(),
                                          fileop.source,
                                          fileop.target)
                        elif fileop.op == "deleteall":
                            branchdir = self.svnbranch(event.head())
                            for path in self.pathmap.keys():
                                if path.startswith(branchdir + os.sep):
                                    del self.pathmap[path]
                            fp.write("Node-path: %s\n" % branchdir)
                            fp.write("Node-action: delete\n\n\n")
                        else:
                            raise Fatal("unsupported fileop type %s." \
                                        % fileop.op)
                    # Turn any annotated tag pointing at this commit into
                    # a directory copy.
                    for tag in tags:
                        if tag.committish == event.mark:
                            revision += 1
                            self.make_tag(fp,
                                          revision,
                                          event.head(),
                                          name=tag.name,
                                          log=tag.comment,
                                          author=tag.tagger)
                            break
                    else:
                        # Preserve lightweight tags, too.  Ugh, O(n**2).
                        children = event.children()
                        if children:
                            for child in children:
                                if child.branch == event.branch: 
                                    break
                            else:
                                revision += 1
                                self.make_tag(fp,
                                              revision,
                                              event.head(),
                                              name=os.path.basename(event.branch),
                                              log="",
                                              author=event.committer)
                    fp.flush()
            except IOError as e:
                raise Fatal("export error: %s" % e)

# Generic repository-manipulation code begins here

class Repository:
    "Generic repository object."
    def __init__(self, name=None):
        self.name = name
        self.readtime = time.time()
        self.vcs = None
        self.sourcedir = None
        self.events = []    # A list of the events encountered, in order
        self._commits = None
        self.preserve_set = set([])
        self.case_coverage = set([])
        self.basedir = os.getcwd()
        self.uuid = None
        self.write_fossils = False
        self.dollar_map = {}        # From dollar cookies in files
        self.fossil_map = {}    # From anything that doesn't survive rebuild
    def cleanup(self):
        "Release blob files associated with this repo."
        nuke(self.subdir(), "reposurgeon: cleaning up %s" % self.subdir())
    def subdir(self, name=None):
        if name is None:
            name = self.name
        if not name:
            return os.path.join(self.basedir, ".rs" + repr(os.getpid()))
        else:
            return os.path.join(self.basedir, ".rs" + repr(os.getpid())+ "-" + name) 
    def makedir(self):
        try:
            if debug_enable(DEBUG_SHUFFLE):
                announce("repository fast import creates " + self.subdir())
            target = self.subdir()
            if not os.path.exists(target):
                os.mkdir(target)
        except OSError:
            raise Fatal("can't create operating directory")
    def size(self):
        "Return the size of this import stream, for statistics display."
        return sum([len(str(e)) for e in self.events])
    def branchlist(self):
        "Return a list of branchnames in this repo."
        lst = []
        for commit in self.commits():
            if commit.branch not in lst:
                lst.append(commit.branch)
        return lst
    def index(self, obj):
        "Index of the specified object."
        for (ind, event) in enumerate(self.events):
            if event == obj:
                return ind
        raise Fatal("internal error: <%s> not matched in repository %s" % (obj.fossil_id, self.name))
    def find(self, mark):
        "Find an object index by mark"
        for (ind, event) in enumerate(self.events):
            if hasattr(event, "mark") and mark == event.mark:
                return ind
        return None
    def objfind(self, mark):
        "Find an object by mark"
        for (ind, event) in enumerate(self.events):
            if hasattr(event, "mark") and mark == event.mark:
                return self.events[ind]
        return None
    def read_authormap(self, selection, fp):
        "Read an author-mapping file and apply it to the repo."
        authormap = {}
        try:
            for line in fp:
                line = line.strip()
                if not line:
                    continue
                if line.startswith('#'):
                    continue
                (local, netwide) = line.strip().split('=')
                (address, timezone) =  netwide.split(">")
                address += ">"
                timezone = timezone.strip()
                (name, mail) = email.utils.parseaddr(address.strip())
                if not mail:
                    raise Fatal("can't recognize address in '%s'" % netwide)
                authormap[local.strip()] = (name, mail, timezone)
        except IOError:
            raise Recoverable("couldn't open author-map file")
        except ValueError:
            raise Recoverable("bad author map syntax: %s" % repr(line))
        for ei in selection:
            event = self.events[ei]
            if isinstance(event, Commit):
                event.committer.remap(authormap)
                for author in event.authors:
                    author.remap(authormap)
            elif isinstance(event, Tag):
                event.tagger.remap(authormap)
    def write_authormap(self, selection, fp):
        "List the identifiers we need."
        contributors = {}
        for ei in selection:
            event = self.events[ei]
            if isinstance(event, Commit):
                contributors[event.committer.name] = event.committer.who()
                for author in event.authors:
                    contributors[author.name] = author.who()
            elif isinstance(event, Tag):
                contributors[event.tagger.name] = event.tagger.who()
        for (name, cid) in contributors.items():
            fp.write("%s = %s\n" % (name, cid))
    def read_fossilmap(self, fp):
        "Read a fossil-references dump and initialize the repo's fossil map."
        commit_map = {}
        for event in self.commits():
            key = (event.committer.date.timestamp, event.committer.email)
            if key not in commit_map:
                commit_map[key] = []
            commit_map[key].append(event)
        try:
            matched = unmatched = 0
            for line in fp:
                (fossil, stamp) = line.split()
                (timefield, person) = stamp.split('!')
                if ':' in person:
                    (person, seq) = person.split(':')
                    seq = int(seq) - 1
                else:
                    seq = 0
                assert fossil and timefield and person
                when_who = (Date(timefield).timestamp, person)
                if when_who in commit_map:
                    self.fossil_map[fossil] = commit_map[when_who][seq]
                    if fossil.startswith("SVN:"):
                        commit_map[when_who][seq].fossil_id = fossil[4:]
                    matched += 1
                else:
                    unmatched += 1
            if verbose >= 1:
                announce("%d matched, %d unmatched, %d total"\
                         % (matched, unmatched, matched+unmatched))
            del commit_map
        except ValueError:
            raise Recoverable("bad syntax in fossils file.")
    def write_fossilmap(self, fp):
        "Dump fossil references."
        cookies = list(self.fossil_map.keys())
        cookies.sort(key=lambda x: (self.fossil_map[x].committer.date.timestamp, x))
        for cookie in cookies:
            commit = self.fossil_map[cookie]
            if "SVN" in cookie and StreamParser.SplitSep in cookie:
                serial = ':' + cookie.split(StreamParser.SplitSep)[1]
            else:
                serial = ''
            if commit.fossil_id:
                fp.write("%s\t%s!%s%s\n" % (cookie,
                                           commit.committer.date.rfc3339(),
                                           commit.committer.email,
                                           serial))
    def tagify(self, commit, name, committish, legend=""):
        "Turn a commit into a tag."
        if debug_enable(DEBUG_EXTRACT):
            commit_id = commit.mark
            if commit.fossil_id:
                commit_id += " <%s>" % commit.fossil_id
            announce("tagifying: %s -> %s" % (commit_id, name))
        if commit.fileops:
            raise Fatal("Attempting to tagify a commit with fileops.")
        if not commit.comment:
            pref = ""
        else:
            pref = commit.comment + "\n"
        self.addEvent(Tag(commit.repo,
                          name=name,
                          committish=committish,
                          tagger=commit.committer,
                          comment=pref + legend))
        self.quiet_delete(commit)
    def fast_import(self, fp, progress=False):
        "Read a stream file and use it to populate the repo."
        StreamParser(self).fast_import(fp, progress)
        self.readtime = time.time()
    def parse_dollar_cookies(self):
        "Extract info about fossil references from CVS/SVN header cookies."
        if self.dollar_map:
            return
        # The goal here is to throw away CVS and Subversion header
        # information still fossilized into $Id$ and $Subversion$
        # headers after conversion to a later version. For each
        # cookie, all but the earliest blob containing it has it
        # as a fossil which should be removed.  Then, the earliest
        # commit referencing that blob gets a fossil property set;
        # later references will be branching artifacts.
        seen = set([])
        for event in self.events:
            if isinstance(event, Blob) and event.cookie:
                if event.cookie in seen:
                    continue
                else:
                    # The first commit immediately after this blob
                    for ei in range(self.find(event.mark), len(self.events)):
                        if isinstance(self.events[ei], Commit):
                            commit = self.events[ei]
                            break
                    seen.add(event.cookie)
                    if "fossil" in commit.properties:
                        complain("fossil property of %s overwritten" \
                                 % commit.mark)
                    if type(event.cookie) == type(""):
                        svnkey = "SVN:" + event.cookie
                        self.dollar_map[svnkey] = commit
                    else:
                        (basename, cvsref) = event.cookie
                        for fileop in commit.fileops:
                            if fileop.op == 'M' and fileop.ref == event.mark:
                                if not os.path.basename(fileop.path).endswith(basename):
                                    # Usually the harmless result of a
                                    # file move or copy that cvs2svn or
                                    # git-svn didn't pick up on.
                                    complain("mismatched CVS header path '%s' in %s vs '%s' in %s"
                                             % (fileop.path, commit.mark, basename, event.mark))
                                cvskey = "CVS:%s:%s" % (fileop.path, cvsref)
                                self.dollar_map[cvskey] = commit
    def export_style(self):
        "How should we tune the export dump format?"
        if self.vcs:
            return self.vcs.styleflags
        else:
            # Default to git style
            return ("nl-after-commit",)
    def fast_export(self, selection, fp, target=None, progress=False):
        "Dump the repo object in Subversion dump or fast-export format."
        if target and target.name == "svn":
            SubversionDumper(self).dump(selection, fp, progress)
            return
        with Baton("reposurgeon: exporting", enable=progress) as baton:
            try:
                fossil_latch = False
                for ei in selection:
                    baton.twirl()
                    event = self.events[ei]
                    #fossil_latch = fossil_latch or hasattr(event, "fossil_id")
                    if debug_enable(DEBUG_UNITE):
                        if hasattr(event, "mark"):
                            announce("writing %d %s %s" % (ei, event.mark, event.__class__.__name__))
                    fp.write(event.dump(target))
                if fossil_latch:
                    fp.write("reset fossil_id\n")
                    endcommit = Commit(self)
                    endcommit.branch = "refs/heads/master"
                    endcommit.comment = "Fossil-ID notes\n"
                    endcommit.committer = Attribution("Nowhere Man <nowhere@nobody.net> " + rfc3339(time.time()))
                    for ei in selection:
                        event = self.events[ei]
                        if hasattr(event, "fossil_id"):
                            fileop = FileOp()
                            fileop.inline = "Fossil-ID: %s" % event.fossil_id
                            fileop.construct('N', 'inline', event.mark)
                            endcommit.fileops.append(fileop)
                    fp.write(str(endcommit))
            except IOError as e:
                raise Fatal("export error: %s" % e)
    def preserve(self, filename):
        "Add a path to the preserve set, to be copied back on rebuild."
        if os.path.exists(filename):
            self.preserve_set.add(filename)
        else:
            raise Recoverable("%s doesn't exist" % filename)
    def unpreserve(self, filename):
        "Remove a path from the preserve set."
        if filename in self.preserve_set:
            self.preserve_set.remove(filename)
        else:
            raise Recoverable("%s doesn't exist" % filename)
    def preservable(self):
        "Return the repo's preserve set."
        return self.preserve_set
    def rename(self, newname):
        "Rename the repo."
        try:
            # Can fail if the target directory exists.
            if debug_enable(DEBUG_SHUFFLE):
                announce("repository rename %s->%s calls os.rename(%s, %s)" % (self.name, newname, repr(self.subdir()), repr(self.subdir(newname))))
            os.rename(self.subdir(), self.subdir(newname))
            self.name = newname
        except OSError as e:
            raise Fatal("repo rename %s -> %s failed: %s"
                                       % (self.subdir(), self.subdir(newname), e))
    def addEvent(self, event):
        self.events.append(event)
        self.declare_sequence_mutation()
    def commits(self):
        "Return a list of the repository commit objects."
        if self._commits is None:
            self._commits = [e for e in self.events if isinstance(e, Commit)]
        return self._commits
    def declare_sequence_mutation(self):
        "Mark the repo event sequence sequence modified."
        self._commits = None
    def earliest(self):
        "Return the date of earliest commit."
        return self.commits()[0].committer.date
    #
    # Delete machinery begins here
    #
    def __ancestor_count(self, event, path):
        "Count modifications of a path in this commit and its ancestors."
        count = 0
        while True:
            for fileop in event.fileops:
                if fileop and fileop.op == "M" and fileop.path == path:
                    count += 1
                    break
            # 0, 1, and >1 are the interesting cases
            if count > 1:
                return count
            if event.parents():
                event = event.parents()[0]
            else:
                break
        return count
    def __compose(self, event, left, right):
        "Compose two relevant fileops."
        # Here's what the fields in the return value mean:
        # 0: Was this a modification
        # 1: Op to replace the first with (None means delete)
        # 2: Op to replace the second with (None means delete)
        # 3: If not None, a warning to emit
        # 4: Case number, for coverage analysis
        pair = (left.op, right.op)
        #
        # First op M
        #
        if pair == ("M", "M"):
            # Leave these in place, they get handled later.
            return (False, left, right, None, 0)
        # M a + D a -> D a
        # Or, could reduce to nothing if M a was the only modify..
        elif left.op == "M" and right.op in "D":
            if self.__ancestor_count(event, left.path) == 1:
                return (True, None, None, None, 1)
            else:
                return (True, right, None, None, 2)
        elif left.op == "M" and right.op == "R":
            # M a + R a b -> R a b M b, so R falls towards start of list
            if left.path == right.source:
                if self.__ancestor_count(event, left.path) == 1:
                    # M a has no ancestors, preceding R can be dropped
                    left.path = right.target
                    return (True, left, None, None, 3)
                else:
                    # M a has ancestors, R is still needed
                    left.path = right.target
                    return (True, right, left, None, 4)
            # M b + R a b can't happen.  If you try to generate this with
            # git mv it throws an error.  An ordinary mv results in D b M a.
            elif left.path == right.target:
                return(True, right, None, "M followed by R to the M operand?", -1)
        # Correct reduction for this would be M a + C a b -> C a b + M a + M b,
        # that is we'd have to duplicate the modify. We'll leave it in place
        # for now.
        elif left.op == "M" and right.op == "C":
            return (False, left, right, None, 5)
        #
        # First op D or deleteall
        #
        # Delete followed by modify undoes delete, since M carries whole files. 
        elif pair == ("D", "M"):
            return (True, None, right, None, 6)
        # But we have to leave deletealls in place, since they affect right ops
        elif pair == ("deleteall", "M"):
            return (False, left, right, None, 7)
        # These cases should be impossible.  But cvs2svn actually generates
        # adjacent deletes into Subversion dumpfiles which turn into (D, D).
        elif left.op == "deleteall" and right.op != "M":
            return (False, left, right,
                    "Non-M operation after deleteall?", -1)
        elif left.op == "D" and right.op == "D":
            return (True, left, None, None, -2)
        elif left.op == "D" and right.op in ("R", "C"):
            if left.path == right.source:
                return (False, left, right,
                        "R or C of %s after deletion?" % left.path, -3)
            else:
                return (False, left, right, None, 8)
        #
        # First op R
        #
        elif pair == ("R", "D"):
            if left.target == right.path:
                # Rename followed by delete of target composes to source delete
                right.path = left.source
                return (True, None, right, None, 9)
            else:
                # On rename followed by delete of source discard the delete
                # but user should be warned. 
                return (False, left, None,
                        "delete of %s after renaming to %s?" % (right.path, left.source), -4)
        # Rename followed by deleteall shouldn't be possible
        elif pair == ("R", "deleteall") and left.target == right.path:
            return (False, None, right,
                    "rename before deleteall not removed?", -5)
        # Leave rename or copy followed by modify alone
        elif pair == ("R", "M") or pair == ("C", "M"):
            return (False, left, right, None, 10)
        # Compose renames where possible
        elif left.op == "R" and right.op == "R":
            if left.target == right.source:
                left.target = right.target
                return (True, left, None, None, 11)
            else:
                return (False, left, right,
                        "R %s %s is inconsistent with following operation" \
                        % (left.source, left.target), -6)
        # We could do R a b + C b c -> C a c + R a b, but why?
        if left.op == "R" and right.op == "C":
            return (False, left, right, None, 12)
        #
        # First op C
        #
        elif pair == ("C", "D"):
            if left.source == right.path:
                # Copy followed by delete of the source is a rename.
                left.setOp("R")
                return (True, left, None, None, 13)
            elif left.target == right.path:
                # This delete undoes the copy
                return (True, None, None, None, 14)
        elif pair == ("C", "R"):
            if left.source == right.source:
                # No reduction
                return (False, left, right, None, 15)
            else:
                # Copy followed by a rename of the target reduces to single copy
                if left.target == right.source:
                    left.target = right.target
                    return (True, left, None, None, 16)
        elif pair == ("C", "C"):
            # No reduction
            return (False, left, right, None, 17)
        #
        # Case not covered
        #
        raise Fatal("can't compose op '%s' and '%s'" % (left, right))
    def canonicalize(self, commit):
        "Canonicalize the list of file operations in this commit."
        coverage = set([])
        # Handling deleteall operations is simple
        lastdeleteall = None
        for (i, a) in enumerate(commit.fileops):
            if a.op == "deleteall":
                lastdeleteall = i
        if lastdeleteall is not None:
            if debug_enable(DEBUG_DELETE):
                announce("removing all before rightmost deleteall")
            commit.fileops = commit.fileops[lastdeleteall:]
            commit._pathset = None
        # Composition in the general case is trickier.
        while True:
            # Keep making passes until nothing mutates
            mutated = False
            for i in range(len(commit.fileops)):
                for j in range(i+1, len(commit.fileops)):
                    a = commit.fileops[i]
                    b = commit.fileops[j]
                    if a is not None and b is not None and a.relevant(b):
                        (modified, newa, newb, warn, case) = self.__compose(commit, a, b)
                        if debug_enable(DEBUG_DELETE):
                            announce("Reduction case %d fired on %s" % (case, (i,j)))
                        if modified:
                            mutated = True
                            commit.fileops[i] = newa
                            commit.fileops[j] = newb
                            if debug_enable(DEBUG_DELETE):
                                announce("During canonicalization:")
                                commit.fileop_dump(j)
                            if warn:
                                complain(warn)
                            coverage.add(case)
            if not mutated:
                break
            commit.fileops = [x for x in commit.fileops if x is not None]
            commit._pathset = None
        return coverage
    def delete(self, selected, policy):
        "Delete commits, handling multiple Ms on a file with specified policy"
        # Make sure we do deletions from greatest commit number to least
        selected = copy.copy(selected)
        selected.sort(reverse=True)
        if debug_enable(DEBUG_DELETE):
            announce("Deletion list is %s" % [x+1 for x in selected])
        # Sanity checks
        for ei in selected:
            event = self.events[ei]
            if isinstance(event, Blob) and not "quiet" in policy:
                raise Recoverable("attempt to directly delete blob %d" % (ei+1))
            elif  isinstance(event, Commit):
                if "obliterate" in policy and not "quiet" in policy:
                    speak = "warning: commit %s to be obliterated has " % event.mark 
                    if '/' in event.branch and not '/heads/' in event.branch:
                        complain(speak + "non-head branch attribute %s" % event.branch)
                    if not event.alldeletes():
                        announce(speak + "non-delete fileops.")
                        break
        # Here are the deletions
        for ei in selected:
            event = self.events[ei]
            if event.__class__ in (Reset, Tag, Passthrough, Blob):
                self.events.pop(ei)
            elif isinstance(event, Commit):
                if event.branch and "/tags/" in event.branch:
                    identical = False
                    if "tagback" in policy:
                        if event.parents():
                            identical = event.parents()[0].branch == event.branch
                            if not identical:
                                event.parents()[0].branch = event.branch
                    elif "tagforward" in policy:
                        if event.children():
                            identical = event.children()[0].branch == event.branch
                            if not identical:
                                event.children()[0].branch = event.branch
                    else:
                        if "pushback" in policy:
                            if event.parents():
                                identical = event.parents()[0].branch == event.branch
                        else:
                            if event.children():
                                identical = event.children()[0].branch == event.branch        
                        if not identical:
                            complain("tag %s on event %s will be lost" % (event.branch, event.mark))
                # Reparent each child
                for child in event.children():
                    child.removeParent(event.mark)
                    for parent_mark in event.parentMarks():
                        if parent_mark not in child.parentMarks():
                            child.addParent(parent_mark)
                    if "obliterate" not in policy and "pushback" not in policy:
                        # Prepend a copy of this event's file ops to
                        # each child's list and mark the child as
                        # needing resolution.
                        child.fileops = copy.copy(event.fileops) + child.fileops
                        child._pathset = None
                        child.pushed_to = True
                # We might be trying to hand the event's fileops to parents.
                if "pushback" in policy:
                    for parent in event.parents():
                        # Append a copy of this event's file ops to
                        # each parent's list and mark the parent as needing
                        # resolution.
                        for fileop in event.fileops:
                            # On a pushback (but not a push forward)
                            # we might have moved the fileop so it's
                            # now referred to before its actual
                            # definition.  This will cause a fatal
                            # error "mark not defined" on import.
                            if fileop.op == 'M':
                                swapblob = self.find(fileop.ref)
                                swapcommit = self.find(parent.mark)
                                if swapblob > swapcommit:
                                    #print "Uh oh!", swapcommit, swapblob
                                    saveblob = self.events[swapblob]
                                    for i in range(swapblob-swapcommit):
                                        countdown = swapblob-i
                                        #print "Moving %d to %d" % (countdown-1, countdown)
                                        self.events[countdown] = self.events[countdown-1]
                                    #print "Moving %d to %d" % (swapblob, swapcommit)
                                    self.events[swapcommit] = saveblob
                                    self.declare_sequence_mutation()
                        parent.fileops += copy.copy(event.fileops)
                        parent._pathset = None
                        parent.pushed_to = True
                if "tagback" not in policy and "tagforward" not in policy:
                    self.events = [t for t in self.events if not (isinstance(t, Tag)
                                                        and t.committish == event.mark)]
                elif "tagforward" in policy:
                    for t in self.events:
                        if isinstance(t, Tag) and t.committish == event.mark:
                            t.committish = event.children()[0].mark
                elif "tagback" in policy:
                    for t in self.events:
                        if isinstance(t, Tag) and t.committish == event.mark:
                            t.committish = event.parents()[0].mark
                # And remove the deleted event
                self.events.pop(ei)
            else:
                raise Fatal("unexpected object in event array")
        # Canonicalize all the commits that got ops pushed to them
        if "obliterate" not in policy:
            for (ei, event) in enumerate(self.events):
                if not isinstance(event, Commit):
                    continue
                elif event.pushed_to:
                    if debug_enable(DEBUG_DELETE):
                        announce("Before canonicalization:")
                        event.fileop_dump(ei)
                    self.case_coverage |= self.canonicalize(self.events[ei])
                    if debug_enable(DEBUG_DELETE):
                        announce("After canonicalization:")
                        event.fileop_dump(ei)
                    # Now apply policy in the mutiple-M case
                    for (path, oplist) in list(event.cliques().items()):
                        if len(oplist) == 1:
                            continue
                        if ("coalesce" not in policy and "obliterate" not in policy) or debug_enable(DEBUG_DELETE):
                            complain("commit %s has multiple Ms for %s" % (event.mark, path))
                        if "coalesce" in policy:
                            # Remove all but the last M.
                            while len(oplist) > 1:
                                event.fileops.pop(oplist.pop(0))
                            event._pathset = None
                        if debug_enable(DEBUG_DELETE):
                            print("Commit %d, after applying policy:" % (ei +1,))
                            for op in event.fileops:
                                print(str(op))
        self.declare_sequence_mutation()
        # Clear everybody's problem flag
        for commit in self.commits():
            commit.pushed_to = False
    def quiet_delete(self, commit):
        self.delete([self.events.index(commit)], ["obliterate", "quiet"])
    #
    # Delete machinery ends here
    #
    def front_events(self):
        "Return options, features."
        return [e for e in self.events \
                if isinstance(e, Passthrough) \
                and (e.text.startswith("option") or e.text.startswith("feature"))]
    def renumber(self, origin=1, baton=None):
        "Renumber the marks in a repo starting from a specified origin."
        marklist = []
        def remark(m):
            try:
                return ":" + repr(origin + marklist.index(m))
            except ValueError:
                raise Fatal("unknown mark %s cannot be renumbered!" % m)
        if baton:
            count = len(self.events)
            baton.startcounter(" %%%dd of %s" % (len(str(count)), count))
        for event in self.events:
            if hasattr(event, "mark"):
                if event.mark is None:
                    continue
                elif not event.mark.startswith(":"):
                    raise Fatal("field not in mark format")
                else:
                    marklist.append(event.mark)
        for event in self.events:
            for fld in ("mark", "committish"):
                if hasattr(event, fld) and getattr(event, fld):
                    old = getattr(event, fld)
                    new = remark(old)
                    if debug_enable(DEBUG_UNITE):
                        announce("renumbering %s -> %s in %s.%s" % (old, new,
                                                                    event.__class__.__name__,
                                                                    fld))
                    setattr(event, fld, new)
            if isinstance(event, Commit):
                parent_marks = event.parentMarks()
                for (i, old) in enumerate(parent_marks):
                    new = remark(old)
                    if debug_enable(DEBUG_UNITE):
                        announce("renumbering %s -> %s in parents" % (old, new))
                    parent_marks[i] = new
                event.setParents(parent_marks)
                for fileop in event.fileops:
                    if fileop.op == "M" and fileop.ref.startswith(":"):
                        new = remark(fileop.ref)
                        if debug_enable(DEBUG_UNITE):
                            announce("renumbering %s -> %s in fileop" % (fileop.ref, new))
                        fileop.ref = new
            if baton:
                baton.bumpcounter()
        if baton:
            baton.endcounter()
    def uniquify(self, color):
        "Disambiguate branches, tags, and marks using the specified label."
        for event in self.events:
            # Disambiguate all tags.
            for (objtype, attr) in ((Tag, "name"),):
                if isinstance(event, objtype):
                    setattr(event, attr, color + "-" + getattr(event, attr))
            # Disambiguate all branches and refs.
            for (objtype, attr) in ((Commit, "branch"),
                                 (Reset, "ref")):
                if isinstance(event, objtype):
                    old = getattr(event, attr)
                    new = old + "-" + color
                    if debug_enable(DEBUG_UNITE):
                        announce("moving %s -> %s in %s.%s"
                                 % (old, new,
                                    objtype.__name__,
                                    attr))
                    setattr(event, attr, new)
            # Disambiguate defining marks.
            for fld in ("mark", "committish"):
                if hasattr(event, fld):
                    old = getattr(event, fld)
                    if old is None:
                        continue
                    elif not old.startswith(":"):
                        raise Fatal("field not in mark format")
                    else:
                        new = old + "-" + color
                        if debug_enable(DEBUG_UNITE):
                            announce("moving %s -> %s in %s.%s"
                                     % (old, new,
                                        event.__class__.__name__,
                                        fld))
                        setattr(event, fld, new)
            # Now marks in fileops
            if isinstance(event, Commit):
                parent_marks = event.parentMarks()
                for (j, old) in enumerate(parent_marks):
                    new = old + "-" + color
                    if debug_enable(DEBUG_UNITE):
                        announce("moving %s -> %s in parents" % (old, new))
                    parent_marks[j] = new
                event.setParents(parent_marks)
                for fileop in event.fileops:
                    if fileop.op == "M" and fileop.ref.startswith(":"):
                        new = fileop.ref + "-" + color
                        if debug_enable(DEBUG_UNITE):
                            announce("moving %s -> %s in fileop"
                                     % (fileop.ref, new))
                        fileop.ref = new
        return
    def absorb(self, other):
        # Only vcstype, sourcedir, and basedir are not copied here
        self.preserve_set |= other.preserve_set
        self.case_coverage |= other.case_coverage
        # Strip feature events off the front, they have to stay in front.
        while isinstance(other[0], Passthrough):
            front = [x for x in self.events if isinstance(x, Passthrough)]
            self.events.insert(len(front), other.events.pop(0))
        # Merge in the non-feature events and blobs
        self.events += other.events
        self.declare_sequence_mutation()
        # Transplant in fileops, blobs, and other impedimenta
        for event in other:
            if hasattr(event, "moveto"):
                event.moveto(self)
        other.events = []
        other.cleanup()
        #del other
    def graft(self, graft_repo, graft_point):
        "Graft a repo on to this one at a specified point."
        where = self.events[graft_point]
        if not isinstance(where, Commit):
            raise Recoverable("%s in %s is not a commit." % \
                              (where.mark, self.name))
        # Errors aren't recoverable after this
        graft_repo.uniquify(graft_repo.name)
        graft_repo.commits()[0].addParent(where.mark)
        self.absorb(graft_repo)
        self.renumber()
    def __last_modification(self, commit, path):
        "Locate the last modification of the specified path before this commit."
        ancestors = commit.parents()
        while ancestors:
            backto = []
            for ancestor in ancestors:
                # This is potential trouble if the file was renamed
                # down one side of a merge bubble but not the other.
                # Might cause an internal-error message, but no real
                # harm will be done.
                for (i, fileop) in enumerate(ancestor.fileops):
                    if fileop.op == 'R' and fileop.target == path:
                        path = fileop.source
                    elif fileop.op == 'M' and fileop.path == path:
                        return (ancestor, i)
                else:
                    backto += ancestor.parents()
            ancestors = backto
        return None
    def move_to_rename(self):
        "Make rename sequences from matched delete-modify pairs."
        # TODO: Actually use this somewhere...
        rename_count = 0
        for commit in self.commits():
            renames = []
            for (d, op) in enumerate(commit.fileops):
                if op.op == 'D':
                    previous = self.__last_modification(commit, op.path)
                    if not previous:
                        raise Recoverable("internal error looking for renames of %s" % op.path)
                    else:
                        (ancestor, i) = previous
                        for (m, op2) in enumerate(commit.fileops):
                            if op2.op == 'M' and \
                               ancestor.fileops[i].mode == op2.mode and \
                               ancestor.fileops[i].ref == op2.ref:
                                renames.append((d, m))
                                rename_count += 1
                                break
            for (d, m) in renames:
                commit.fileops[d].source = commit.fileops[d].path
                commit.fileops[d].target = commit.fileops[m].path
                del commit.fileops[d].path
                commit.fileops[d].op = 'R'
                commit.fileops.pop(m)
                commit._pathset = None
        return rename_count
    def path_walk(self, selection, hook=lambda path: path):
        "Apply a hook to all paths, returning the set of modified paths."
        modified = set([])
        for ei in selection:
            event = self.events[ei]
            if isinstance(event, Commit):
                for fileop in event.fileops:
                    if fileop.op in ("M", "D"):
                        newpath = hook(fileop.path)
                        if newpath != fileop.path:
                            modified.add(newpath)
                        fileop.path = newpath
                    elif fileop.op in ("R", "C"):
                        newpath = hook(fileop.source)
                        if newpath != fileop.source:
                            modified.add(newpath)
                        fileop.source = newpath
                        newpath = hook(fileop.target)
                        if newpath != fileop.target:
                            modified.add(newpath)
                        fileop.target = newpath
                event._pathset = None
        modified = list(modified)
        modified.sort()
        return modified
    def split_commit(self, where, splitfunc):
        event = self.events[where]
        # Fileop split happens here
        (fileops, fileops2) = splitfunc(event.fileops)
        if fileops and fileops2:
            self.events.insert(where+1, event.clone())
            self.declare_sequence_mutation()
            event2 = self.events[where+1]
            # need a new mark
            assert(event.mark == event2.mark)
            if event.splits is None:
                event.splits = 1
            else:
                event.splits += 1
            newmark = "%s.%s" % (event.mark, event.splits)
            # Fix up parent/child relationships
            for child in event.children():
                parent_marks = child.parentMarks()
                for (j, mark) in enumerate(parent_marks):
                    if mark == event.mark:
                        parent_marks[j] = newmark
                child.setParents(parent_marks)
            event2.setParents([event.mark])
            event2.mark = newmark
            # and then finalize the ops
            event2.fileops = fileops2
            event2._pathset = None
            event.fileops = fileops
            event._pathset = None
            return True
        return False
    def split_commit_by_index(self, where, splitpoint):
        return self.split_commit(where,
                                 lambda ops: (ops[splitpoint:],
                                              ops[:splitpoint]))
    def split_commit_by_prefix(self, where, prefix):
        return self.split_commit(where,
                                 lambda ops: ([op for op in ops if not op.path.startswith(prefix)],
                                              [op for op in ops if (op.path or op.target) and
                                                                   (op.path or op.target).startswith(prefix)]))

    # Container emulation methods
    def __len__(self):
        return len(self.events)
    def __getitem__(self, i):
        return self.events[i]
    def __setitem__(self, i, v):
        self.events[i] = v

def read_repo(source, preferred):
    "Read a repository using fast-import."
    if source == '-':
        repo = Repository()
        repo.fast_import(sys.stdin, progress=(verbose==1 and not quiet))
    elif not os.path.exists(source):
        raise Recoverable("%s does not exist" % source)
    elif not os.path.isdir(source):
        repo = Repository()
        repo.fast_import(open(source), progress=(verbose==1 and not quiet))
    else:
        if debug_enable(DEBUG_SHUFFLE):
            if preferred:
                announce("looking for a %s repo..." % preferred.name)
            else:
                announce("reposurgeon: looking for any repo at %s..." % \
                         os.path.abspath(source))
        hitcount = 0
        extractor = vcs = None
        for possible in vcstypes:
            if preferred and possible.name != preferred.name:
                continue
            subdir = os.path.join(source, possible.subdirectory)
            if os.path.exists(subdir) and os.path.isdir(subdir):
                vcs = possible
                hitcount += 1
        for possible in extractors:
            if preferred and possible.name != preferred.name:
                continue
            subdir = os.path.join(source, possible.subdirectory)
            if os.path.exists(subdir) and os.path.isdir(subdir):
                if possible.visible or preferred \
                       and possible.name == preferred.name:
                    extractor = possible
                    hitcount += 1
        if hitcount == 0:
            raise Recoverable("couldn't find a repo under %s" % os.path.relpath(source))
        elif hitcount > 1:
            raise Recoverable("too many repos under %s" % os.path.relpath(source))
        elif verbose > 0:
            announce("found %s repository" % getattr(vcs or extractor, "name"))
        repo = Repository()
        repo.sourcedir = source
        if vcs:
            repo.vcs = vcs
            repo.preserve_set = vcs.preserve
            showprogress = (verbose > 0) and not "export-progress" in repo.export_style()
            context = {"basename": os.path.basename(repo.sourcedir)}
        try:
            here = os.getcwd()
            os.chdir(repo.sourcedir)
            # We found a matching VCS type
            if vcs:
                if "%(tempfile)s" in repo.vcs.exporter:
                    try:
                        (tfdesc, tfname) = tempfile.mkstemp()
                        assert tfdesc > -1    # pacify pylint
                        context["tempfile"] = tfname
                        do_or_die(repo.vcs.exporter % context, "repository export")
                        with open(tfname) as tp:
                            repo.fast_import(tp, progress=showprogress)
                    finally:
                        os.remove(tfname)
                else:
                    with popen_or_die(repo.vcs.exporter % context, "repository export") as tp:
                        repo.fast_import(tp, progress=showprogress)
                if repo.vcs.authormap and os.path.exists(repo.vcs.authormap):
                    announce("reading author map.")
                    with open(repo.vcs.authormap) as fp:
                        repo.read_authormap(range(len(repo.events)),fp)
                fossils = os.path.join(vcs.subdirectory, "fossils")
                if os.path.exists(fossils):
                    with open(fossils) as rfp:
                        repo.read_fossilmap(rfp)
                if vcs.lister:
                    def fileset(exclude):
                        allfiles = []
                        for root, dirs, files in os.walk("."):
                            allfiles += [os.path.join(root, name)[2:] for name in files]
                            for exdir in exclude:
                                if exdir in dirs:
                                    dirs.remove(exdir)
                        return set(allfiles)
                    with popen_or_die(vcs.lister) as fp:
                        repofiles = set(fp.read().split())
                    allfiles = fileset(exclude=[vcs.subdirectory]\
                                       + glob.glob(".rs*"))
                    repo.preserve_set = allfiles - repofiles
                # kluge: git-specific hook
                if repo.vcs.name == "git":
                    if os.path.exists(".git/cvs-revisions"):
                        announce("reading cvs-revisions map.")
                        pathrev_to_hash = {}
                        # Pass 1: Get git's path/revision to hash mapping
                        for line in open(".git/cvs-revisions"):
                            (path, rev, hashv) = line.split()
                            pathrev_to_hash[(path, rev)] = hashv
                        # Pass 2: get git's hash to (time,person) mapping 
                        hash_to_action = {}
                        stamp_set = set({})
                        with popen_or_die("git log --all --format='%H %ct %ce'", "r") as fp:
                            for line in fp:
                                (hashv, ctime, cperson) = line.split()
                                stamp = (int(ctime), cperson)
                                if stamp in stamp_set:
                                    complain("more than one commit matches %s!%s (%s)" \
                                             % (rfc3339(int(ctime)), cperson, hashv))
                                    if stamp in hash_to_action:
                                        del hash_to_action[hashv]
                                else:
                                    hash_to_action[hashv] = stamp
                                    stamp_set.add(stamp)
                            # Pass 3: build a (time,person) to commit mapping 
                            action_to_mark = {}
                            for commit in repo.commits():
                                action_to_mark[(commit.committer.date.timestamp, commit.committer.email)] = commit
                            # Pass 4: use it to set commit properties
                            for ((path, rev), value) in pathrev_to_hash.items():
                                if value in hash_to_action:
                                    (ctime, cperson) = hash_to_action[value]
                                    action_to_mark[(ctime, cperson)].fossil_id = "CVS:%s:%s" % (path, rev)
                            del pathrev_to_hash
                            del hash_to_action
                            del stamp_set
            # We found a matching custom extractor
            if extractor:
                streamer = RepoStreamer(extractor)
                streamer.extract(repo, progress=verbose>0)
        finally:
            os.chdir(here)
    return repo

class CriticalRegion:
    "Encapsulate operations to try and make us un-interruptible."
    # This number is magic. Python sets a much higher signal.NSIG
    # value, but under Linux the signal calls start to trigger
    # runtime errors at this value and above.
    NSIG = 32
    def __init__(self):
        self.handlers = None	# Pacifies pylint
    def __enter__(self):
        "Begin critical region."
        if debug_enable(DEBUG_COMMANDS):
            complain("critical region begins...")
        # Alas that we lack sigblock support
        self.handlers = [None]*(CriticalRegion.NSIG+1)
        for sig in range(1, CriticalRegion.NSIG):
            if not sig in (signal.SIGKILL, signal.SIGSTOP):
                self.handlers[sig] = signal.signal(sig, signal.SIG_IGN)
    def __exit__(self, extype_unused, value_unused, traceback_unused):
        "End critical region."
        for sig in range(1, CriticalRegion.NSIG):
            if not sig in (signal.SIGKILL, signal.SIGSTOP):
                signal.signal(sig, self.handlers[sig])
        if debug_enable(DEBUG_COMMANDS):
            complain("critical region ends.")
        return False

def rebuild_repo(repo, target, preferred):
    "Rebuild a repository from the captured state."
    if not target and repo.sourcedir:
        target = repo.sourcedir
    if target:
        target = os.path.abspath(target)
    else:
        raise Recoverable("no default destination for rebuild")
    vcs = preferred or repo.vcs
    if not vcs:
        raise Recoverable("please prefer a repo type first")
    if not hasattr(vcs, "exporter") or vcs.importer is None:
        raise Recoverable("%s repositories are supported for read only." \
                          % preferred.name)

    if not os.path.join("refs", "heads", "master") in repo.branchlist():
        complain("repository has no branch named master. git will have no HEAD commit after the import; consider using the branch command to rename one of your branches to master.")

    # Create a new empty directory to do the rebuild in
    if not os.path.exists(target):
        staging = target
        try:
            os.mkdir(target)
        except OSError:
            raise Recoverable("target directory creation failed")
    else:
        staging = target + "-stage" + str(os.getpid())
        assert(os.path.isabs(target) and os.path.isabs(staging))
        try:
            os.mkdir(staging)
        except OSError:
            raise Recoverable("staging directory creation failed")

    # Try the rebuild in the empty staging directory 
    here = os.getcwd()
    try:
        os.chdir(staging)
        if vcs.initializer:
            do_or_die(vcs.initializer, "repository initialization")
        parameters = {"basename": os.path.basename(target)}
        if "%(tempfile)s" in vcs.importer:
            try:
                (tfdesc, tfname) = tempfile.mkstemp()
                assert tfdesc > -1    # pacify pylint
                with open(tfname, "w") as tp:
                    repo.fast_export(list(range(len(repo))), tp, progress=verbose>0, target=preferred)
                do_or_die(vcs.exporter % parameters, "import")
            finally:
                os.remove(tfname)
        else:
            with popen_or_die(vcs.importer % parameters, "import", mode="w") as tp:
                repo.fast_export(list(range(len(repo))), tp,
                                 target=preferred,
                                 progress=verbose>0)
        if repo.write_fossils:
            try:
                fossilfile = os.path.join(vcs.subdirectory, "fossils")
                with open(fossilfile, "w") as wfp:
                    repo.write_fossilmap(wfp)
            except IOError:
                raise Recoverable("fossils file %s could not be written." \
                                  % fossilfile)

        do_or_die(vcs.checkout, "repository_checkout")
        if verbose:
            announce("rebuild is complete.")

        os.chdir(here)
        # Rebuild succeeded - make an empty backup directory
        backupcount = 1
        while True:
            savedir = target + (".~%d~" % backupcount)
            if os.path.exists(savedir):
                backupcount += 1
            else:
                break
        assert(os.path.abspath(savedir))
        os.mkdir(savedir)

        if staging != target:
            # This is a critical region.  Ignore all signals until we're done.
            with CriticalRegion():
                # Move the unmodified repo contents in target to the
                # backup directory.  Then move the staging contents to the
                # target directory.  Finally, restore designated files
                # from backup to target.
                for sub in os.listdir(target):
                    os.rename(os.path.join(target, sub),
                              os.path.join(savedir, sub))
                if verbose:
                    announce("repo backed up to %s." % os.path.relpath(savedir))
                for sub in os.listdir(staging):
                    os.rename(os.path.join(staging, sub),
                              os.path.join(target, sub))
                if verbose:
                    announce("modified repo moved to %s." % os.path.relpath(target))
            if repo.preserve_set:
                for sub in repo.preserve_set:
                    src = os.path.join(savedir, sub)
                    dst = os.path.join(target, sub)
                    if os.path.exists(src):
                        if os.path.isdir(src):
                            shutil.copytree(src, dst)
                        else:
                            shutil.copy2(src, dst)
                if verbose:
                    announce("preserved files restored.")
            elif verbose:
                announce("no preservations.")
    finally:
        os.chdir(here)
        if staging != target:
            nuke(staging, "reposurgeon: removing staging directory")

def do_or_die(dcmd, legend=""):
    "Either execute a command or raise a fatal exception."
    if legend:
        legend = " "  + legend
    if debug_enable(DEBUG_COMMANDS):
        announce("executing '%s'%s" % (dcmd, legend))
    try:
        retcode = subprocess.call(dcmd, shell=True)
        if retcode < 0:
            raise Fatal("child was terminated by signal %d." % -retcode)
        elif retcode != 0:
            raise Fatal("child returned %d." % retcode)
    except (OSError, IOError) as e:
        raise Fatal("execution of %s%s failed: %s" % (dcmd, legend, e))

class popen_or_die:
    "Read or write from a subordinate process."
    def __init__(self, command, legend="", mode="r"):
        assert mode in ("r", "w")
        self.command = command
        self.legend = legend
        self.mode = mode
        if self.legend:
            self.legend = " "  + self.legend
        self.fp = None
    def __enter__(self):
        if debug_enable(DEBUG_COMMANDS):
            if self.mode == "r":
                announce("%s: reading from '%s'%s" % (rfc3339(time.time()), self.command, self.legend))
            else:
                announce("%s: writing to '%s'%s" % (rfc3339(time.time()), self.command, self.legend))
        try:
            self.fp = os.popen(self.command, self.mode)
            return self.fp
        except (OSError, IOError) as oe:
            raise Fatal("execution of %s%s failed: %s" \
                                 % (self.command, self.legend, oe))
    def __exit__(self, extype, value, traceback):
        if extype:
            if verbose:
                complain("fatal exception in popen_or_die.")
            # This is what we want, but it's only in true Python 3.x
            if sys.version_info.major >= 3:
                raise extype(value).with_traceback(traceback)
            else:
                try:
                    # Python 3.2 chokes on this syntax.
                    raise extype, value, traceback
                except SyntaxError:
                    pass
        if self.fp.close() is not None:
            raise Fatal("%s%s returned error." % (self.command, self.legend))
        return False

class Recoverable(Exception):
    def __init__(self, msg):
        Exception.__init__(self)
        self.msg = msg

class RepositoryList:
    "A repository list with selection and access by name."
    def __init__(self):
        self.repo = None
        self.repolist = []
        self.cut_index = None
    def chosen(self):
        return self.repo
    def choose(self, repo):
        self.repo = repo
    def unchoose(self):
        self.repo = None
    def reponames(self):
        "Return a list of the names of all repositories."
        return [r.name for r in self.repolist]
    def uniquify(self, name):
        "Uniquify a repo name in the repo list."
        if name.endswith(".fi"):
            name = name[:-3]
        if name not in self.reponames():
            return name
        else:
            # repo "foo" is #1
            seq = 2
            while name + str(seq) in self.reponames():
                seq += 1
            return name + str(seq)
    def repo_by_name(self, name):
        "Retrieve a repo by name."
        return self.repolist[self.reponames().index(name)]
    def remove_by_name(self, name):
        "Remove a repo by name."
        if self.repo and self.repo.name == name:
            self.unchoose()
        self.repolist.pop(self.reponames().index(name))        
    def cut_conflict(self, early, late):
        "Apply a graph-coloring algorithm to see if the repo can be split here."
        self.cut_index = late.parentMarks().index(early.mark)
        late.removeParent(early.mark)
        def do_color(commit, color):
            commit.color = color
            for fileop in commit.fileops:
                if fileop.op == "M" and fileop.ref != "inline":
                    blob = self.repo.find(fileop.ref)
                    assert isinstance(self.repo[blob], Blob)
                    self.repo[blob].colors.append(color)
        do_color(early, "early")
        do_color(late, "late")
        conflict = False
        keepgoing = True
        while keepgoing and not conflict:
            keepgoing = False
            for event in self.repo.commits():
                if event.color:
                    for neighbor in event.parents() + event.children():
                        if neighbor.color == None:
                            do_color(neighbor, event.color)
                            keepgoing = True
                            break
                        elif neighbor.color != event.color:
                            conflict = True
                            break
        return conflict
    def cut_clear(self, early, late):
        "Undo a cut operation and clear all colors."
        late.insertParent(self.cut_index, early.mark)
        for event in self.repo:
            if hasattr(event, "color"):
                event.color = None
            if hasattr(event, "colors"):
                event.colors = []
    def cut(self, early, late):
        "Attempt to topologically cut the selected repo."
        if self.cut_conflict(early, late):
            self.cut_clear(early, late)
            return False
        # Repo can be split, so we need to color tags
        for t in self.repo.events:
            if isinstance(t, Tag):
                for c in self.repo.events:
                    if isinstance(c, Commit):
                        if c.mark == t.committish:
                            t.color = c.color
        # Front events go with early segment, they'll be copied to late one. 
        for event in self.repo.front_events():
            event.color = "early"        
        assert all([hasattr(x, "color") or hasattr(x, "colors") or isinstance(x, Reset) for x in self.repo])
        # Resets are tricky.  One may have both colors.
        # Blobs can have both colors too, through references in
        # commits on both sides of the cut, but we took care
        # of that earlier.
        trackbranches = {"early": set([]), "late": set([])}
        for commit in self.repo.commits():
            if commit.color is None:
                complain("%s is uncolored!" % commit.mark)
            else:
                trackbranches[commit.color].add(commit.branch)
        # Now it's time to do the actual partitioning
        early = Repository(self.repo.name + "-early")
        os.mkdir(early.subdir())
        late = Repository(self.repo.name + "-late")
        os.mkdir(late.subdir())
        for event in self.repo:
            if isinstance(event, Reset):
                if event.ref in trackbranches["early"]:
                    early.addEvent(copy.copy(event))
                if event.ref in trackbranches["late"]:
                    late.addEvent(copy.copy(event))
            elif isinstance(event, Blob):
                if "early" in event.colors:
                    early.addEvent(event.clone(early))
                if "late" in event.colors:
                    late.addEvent(event.clone(late))
            else:
                if event.color == "early":
                    if hasattr(event, "moveto"):
                        event.moveto(early)
                    early.addEvent(event)
                elif event.color == "late":
                    if hasattr(event, "moveto"):
                        event.moveto(late)
                    late.addEvent(event)
                else:
                    # TODO: Someday, color passthroughs that aren't fronted.
                    raise Fatal("coloring algorithm failed on %s" % event)
        # Options and features may need to be copied to the late fragment.
        late.events = copy.copy(early.front_events()) + late.events
        late.declare_sequence_mutation()
        # Add the split results to the repo list. 
        self.repolist.append(early)
        self.repolist.append(late)
        self.repo.cleanup()
        self.remove_by_name(self.repo.name)
        return True
    def unite(self, factors):
        "Unite multiple repos into a union repo."
        factors.sort(key=lambda x: x.earliest())
        roots = [x.commits()[0] for x in factors]
        union = Repository("+".join([r.name for r in factors]))
        os.mkdir(union.subdir())
        for (i, factor) in enumerate(factors):
            if i != 0:
                factor.uniquify(factor.name)
            union.absorb(factor)
            self.remove_by_name(factor.name)
        # Renumber all events
        union.renumber()
        # Sort out the root grafts. The way we used to do this involved
        # sorting the union commits by timestamp, but this fails because
        # in real-world repos timestamp order may not coincide with mark
        # order - leading to "mark not defined" errors from the importer at
        # rebuild time.  This method gives less intuitive results but at
        # least means we never need to reorder.
        commits = union.commits()
        for root in roots[1:]:
            most_recent = None
            for (i, event) in enumerate(commits):
                if root.when() >= event.when():
                    continue
                elif not most_recent or event.when() < most_recent.when():
                    most_recent = commits[i-1]
                    break
            if most_recent is None:
                # Should never fire bacause we sorted the factors array
                # so root[0] is the oldest commit and the first in union.
                raise Fatal("should never happen!")
            elif most_recent.mark is None:
                # This should never happen either.
                raise Fatal("can't link to commit with no mark")
            else:
                root.addParent(most_recent.mark)
        # Put the result on the load list
        self.repolist.append(union)
        self.choose(union)
    def expunge(self, selection, matchers):
        "Expunge a set of files from the commits in the selection set."
        def digest(toklist):
            return re.compile("|".join(["(?:" + s + ")" for s in toklist]))
        try:
            # First pass: compute fileop deletions
            alterations = []
            expunge = digest(matchers)
            for ei in selection:
                event = self.repo[ei]
                deletia = []
                if hasattr(event, "fileops"):
                    for (i, fileop) in enumerate(event.fileops):
                        if debug_enable(DEBUG_DELETE):
                            print(str(fileop))
                        if fileop.op in "DM":
                            if expunge.search(fileop.path):
                                deletia.append(i)
                        elif fileop.op in "RC":
                            fileop.sourcedelete = expunge.search(fileop.source)
                            fileop.targetdelete = expunge.search(fileop.target)
                            if fileop.sourcedelete:
                                deletia.append(i)
                                announce("following %s of %s to %s" %
                                         (fileop.op,
                                          fileop.source,
                                          fileop.target))
                                if fileop.op == "R" and fileop.source in matchers:
                                    matchers.remove(fileop.source)
                                matchers.append("^" + fileop.target + "$")
                                expunge = digest(matchers)
                            elif fileop.targetdelete:
                                if fileop.op == "R":
                                    fileop.op = "D"
                                elif fileop.op == "C":
                                    deletia.append(i)
                                matchers.append("^" + fileop.target + "$")
                                expunge = digest(matchers)
                alterations.append(deletia)
        except re.error:
            raise Recoverable("you confused the regexp processor!")
        # Second pass: perform actual fileop expunges
        expunged = Repository(self.repo.name + "-expunges")
        expunged.makedir()
        for event in self.repo:
            event.deletehook = None
        for (ei, deletia) in zip(selection, alterations):
            event = self.repo[ei]
            keepers = []
            blobs = []
            deletia.reverse()
            for i in deletia:
                fileop = event.fileops[i]
                if fileop.op == 'D':
                    keepers.append(fileop)
                    if verbose:
                        announce("at %d, expunging D %s" \
                                 % (ei+1, fileop.path))
                elif fileop.op == 'M':
                    keepers.append(fileop)
                    if fileop.ref != 'inline':
                        bi = self.repo.find(fileop.ref)
                        blob = self.repo[bi]
                        assert(isinstance(blob, Blob))
                        blobs.append(blob)
                    if verbose:
                        announce("at %d, expunging M %s" \
                                 % (ei+1, fileop.path))
                elif fileop.op in ("R", "C"):
                    assert(fileop.sourcedelete or fileop.targetdelete)
                    if fileop.sourcedelete and fileop.targetdelete:
                        keepers.append(fileop)
                event.fileops.pop(i)
                event._pathset = None
            # If there are any keeper fileops, hang them them and
            # their blobs on deletehooks, cloning the commit() for them.
            if keepers:
                keepers.reverse()
                blobs.reverse()
                newevent = event.clone(expunged)
                newevent.fileops = keepers
                newevent._pathset = None
                for blob in blobs:
                    blob.deletehook = blob.clone(expunged)
                event.deletehook = newevent
        # Build the new repo and hook it into the load list
        expunged.events = copy.copy(self.repo.front_events())
        expunged.declare_sequence_mutation()
        expunged_branches = expunged.branchlist()
        for event in self.repo:
            if event.deletehook:
                expunged.addEvent(event.deletehook)
                event.deletehook = None
            elif isinstance(event, Reset) or isinstance(event, Tag):
                target = self.repo.find(event.committish)
                if target is not None:
                    if self.repo[target].deletehook:
                        expunged.addEvent(copy.deepcopy(event))
                    continue
                if isinstance(event, Reset) and event.ref in expunged_branches:
                    expunged.addEvent(copy.copy(event))
        for event in self.repo.events + expunged.events:
            if hasattr(event, "deletehook"):
                delattr(event, "deletehook")
        expunged_marks = set([event.mark for event in expunged.events if hasattr(event, "mark")])
        for event in expunged.events:
            if hasattr(event, "parents"):
                event.setParents([e for e in event.parentMarks() if e in expunged_marks])
        keeper_marks = set([event.mark for event in self.repo.events if hasattr(event, "mark")])
        for event in self.repo.events:
            if hasattr(event, "parents"):
                event.setParents([e for e in event.parentMarks() if e in keeper_marks])
        backreferences = collections.Counter()
        for event in self.repo.events:
            if isinstance(event, Commit):
                for fileop in event.fileops:
                    if fileop.op == 'M':
                        backreferences[fileop.ref] += 1
        # Now remove commits that no longer have fileops, and released blobs.
        deletia = [not ((isinstance(e, Commit) and len(e.fileops)==0) or (isinstance(e, Blob) and not backreferences[e.mark])) for e in self.repo.events]
        deletia = [x[0] for x in [i_e for i_e in enumerate(deletia) if not i_e[1]]]
        self.repolist.append(expunged)
        if not deletia:
            announce("deletion set is empty.")
            return
        if verbose:
            announce("deleting blobs and empty commits %s" % [x+1 for x in deletia])
        self.repo.delete(deletia, ["obliterate", "quiet"])

class RepoSurgeon(cmd.Cmd, RepositoryList):
    "Repository surgeon command interpreter."
    OptionFlags = (
        ("svn_use_uuid", """\
    If set, use Subversion UUID when faking up email addresses, a la git-svn.
Otherwise, fake up addresses the way git cvs-import does it.
"""),
        ("svn_nobranch", """\
    If set, don't perform branch analysis when lifting a Subversion repo. Leave
it as a linear sequence of commits. This may be useful if the repo has an
unusual topology and you intend to do your own branch surgery.
"""),
        ("canonicalize", """\
    If set, mailbox_in and edit will canonicalize comments by stripping
leading and trailing whitespace and then appending a linefeed.
"""),
        ("fossilize", """\
    If set, the Fossil-ID of each commit is appended to its commit comment
at write time. This option is mainly useful for debugging conversion edge cases.
"""),
        )
    class LineParse:
        "Preparse a command line."
        def __init__(self, line, capabilities=None):
            self.line = line
            self.capabilities = capabilities or []
            self.stdin = sys.stdin
            self.stdout = sys.stdout
        def __enter__(self):
            # Input redirection
            m = re.search(r"<\S+", self.line)
            if m:
                if "stdin" not in self.capabilities:
                    raise Recoverable("no support for < redirection")
                infile = m.group(0)[1:]
                if infile and infile != '-':
                    try:
                        self.stdin = open(infile, "r")
                    except (IOError, OSError):
                        raise Recoverable("can't open %s for read" \
                                          % infile)
                self.line = self.line[:m.start(0)] + self.line[m.end(0)+1:]
            # Output redirection
            m = re.search(r">\S+", self.line)
            if m:
                if "stdout" not in self.capabilities:
                    raise Recoverable("no support for > redirection")
                outfile = m.group(0)[1:]
                if outfile and outfile != '-':
                    try:
                        self.stdout = open(outfile, "w")
                    except (IOError, OSError):
                        raise Recoverable("can't open %s for write" \
                                          % outfile)
                self.line = self.line[:m.start(0)] + self.line[m.end(0)+1:]
            return self
        def __exit__(self, extype_unused, value_unused, traceback_unused):
            pass
        def tokens(self):
            "Return the argument token list after the parse for redirects."
            return self.line.split()
    def __init__(self):
        cmd.Cmd.__init__(self)
        RepositoryList.__init__(self)
        self.use_rawinput = True
        self.echo = 0
        self.prompt = "reposurgeon% "
        self.preferred = None
        self.selection = []
        self.line = ""
        self.history = []
        self.callstack = []
        self.profile_log = None
        for option in dict(RepoSurgeon.OptionFlags):
            global_options[option] = False
        global_options['svn_branchify'] = ['trunk', 'tags/*', 'branches/*', '*']
    #
    # Housekeeping hooks.
    #
    def onecmd(self, line):
        "Execute one command, fielding interrupts for recoverable exceptions."
        try:
            cmd.Cmd.onecmd(self, line)
        except Recoverable as e:
            complain(e.msg)
    def postcmd(self, unused, line):
        assert unused is not []   # pacify pylint
        if line == "EOF":
            return True
    def emptyline(self):
        pass
    def precmd(self, line):
        "Pre-command hook."
        self.history.append(line.rstrip())
        if self.echo:
            sys.stdout.write(line.rstrip()+"\n")
        if "#" in line:
            line = line[:line.index("#")].rstrip()
        return line
    def do_shell(self, line):
        "Execute a shell command."
        sys.stdout.flush()
        sys.stderr.flush()
        if os.system(line):
            raise Recoverable("'shell %s' returned error." % line)
    def do_EOF(self, unused):
        "Terminate reposurgeon."
        assert unused is not None   # pacify pylint
        print("")
        return True
    def cleanup(self):
        "Tell all the repos we're holding to clean up."
        if debug_enable(DEBUG_SHUFFLE):
            announce("interpreter cleanup called.")
        for repo in self.repolist:
            repo.cleanup()
    #
    # The selection-language parsing code starts here.
    #
    def set_selection_set(self, line, default=None):
        "Implement object-selection syntax."
        # Returns the line with the selection removed
        self.selection = []
        if not self.chosen():
            return line
        self.line = line
        self.selection = list(self.eval_expression())
        if self.line == line:
            self.selection = default
        else:
            # TODO: We probably want to stop doing this
            self.selection.sort()
        return self.line.lstrip()
    def peek(self):
        return self.line and self.line[0]
    def pop(self):
        if not self.line:
            return ''
        else:
            c = self.line[0]
            self.line = self.line[1:]
            return c
    def eval_expression(self):
        if debug_enable(DEBUG_LEXER):
            announce("eval_expression(%s)" % self.line)
        self.line = self.line.lstrip()
        value = self.eval_disjunct()
        c = self.peek()
        if c == '?':
            self.pop()
            add_list = []
            remove_list = []
            for ei in value:
                event = self.chosen().events[ei]
                if isinstance(event, Commit):
                    for parent in event.parents():
                        add_list.append(self.chosen().find(parent.mark))
                    for child in event.children():
                        add_list.append(self.chosen().find(child.mark))
                elif isinstance(event, Blob):
                    remove_list.append(ei) # Don't select the blob itself
                    for (i, event2) in enumerate(self.chosen().events):
                        if isinstance(event2, Commit):
                            for fileop in event2.fileops:
                                if fileop.op == 'M' and fileop.ref==event.mark:
                                    add_list.append(i)
                elif isinstance(event, Tag) or isinstance(event, Reset):
                    add_list.append(self.chosen().find(event.committish))
            value |= set(add_list)
            value -= set(remove_list)
        self.line = self.line.lstrip()
        if debug_enable(DEBUG_LEXER):
            announce("%s <- eval_expression(), left = %s" % (value, repr(self.line)))
        return value
    def eval_disjunct(self):
        "Evaluate a disjunctive expression (| has lowest precedence)" 
        if debug_enable(DEBUG_LEXER):
            announce("eval_disjunct(%s)" % self.line)
        self.line = self.line.lstrip()
        disjunct = set([])
        while True:
            conjunct = self.eval_conjunct()
            if conjunct is None:
                break
            else:
                disjunct |= conjunct
            self.line = self.line.lstrip()
            if self.peek() == '|':
                self.pop()
            else:
                break
        if debug_enable(DEBUG_LEXER):
            announce("%s <- eval_disjunct(), left = %s" % (disjunct, repr(self.line)))
        return disjunct
    def eval_conjunct(self):
        "Evaluate a conjunctive expression (& has higher precedence)" 
        if debug_enable(DEBUG_LEXER):
            announce("eval_conjunct(%s)" % self.line)
        self.line = self.line.lstrip()
        conjunct = set(range(0, len(self.chosen())))
        while True:
            term = self.eval_term()
            if term is None:
                break
            else:
                conjunct = conjunct & term
            self.line = self.line.lstrip()
            if self.peek() == '&':
                self.pop()
            else:
                break
        if debug_enable(DEBUG_LEXER):
            announce("%s <- eval_conjunct(), left = %s" % (conjunct, repr(self.line)))
        return conjunct
    def eval_term(self):
        if debug_enable(DEBUG_LEXER):
            announce("eval_term(%s)" % self.line)
        self.line = self.line.lstrip()
        if self.peek() == '{':
            self.pop()
            term = self.eval_disjunct()
            self.line = self.line.lstrip()
            if self.peek() != '}':
                raise Recoverable("trailing junk on inner expression")
            else:
                self.pop()
        else:
            term = self.eval_visibility()
            if term is None:
                term = self.eval_polyrange()
                if term is None:
                    term = self.eval_textsearch()
                    if term == None:
                        term = self.eval_branchset()
                        if term == None:
                            term = self.eval_pathset()
        if debug_enable(DEBUG_LEXER):
            announce("%s <- eval_term(), left = %s" % (term, repr(self.line)))
        return term
    def eval_visibility(self):
        "Parse a visibility spec."
        if debug_enable(DEBUG_LEXER):
            announce("eval_visibility(%s)" % self.line)
        self.line = self.line.lstrip()
        if not self.peek() == "=":
            visibility = None
        else:
            typeletters = {
                "B" : lambda e: isinstance(e, Blob),
                "C" : lambda e: isinstance(e, Commit),
                "T" : lambda e: isinstance(e, Tag),
                "R" : lambda e: isinstance(e, Reset),
                "P" : lambda e: isinstance(e, Passthrough),
                "H" : lambda e: isinstance(e, Commit) and not e.children(),
                }
            visible = set([])
            self.pop()
            while self.peek() in typeletters:
                c = self.pop()
                if c in typeletters:
                    visible.add(typeletters[c])
            # We need a special check here because these expressions
            # could otherwise run onto the text part of the command.
            if self.peek() not in "()|& ":
                raise Recoverable("garbled type mask at %s" % repr(self.line))
            if debug_enable(DEBUG_LEXER):
                announce("visibility set is %s with %s left" % ([x.__name__ for x in visible], repr(self.line)))
            selected = []
            for (i, event) in enumerate(self.chosen()):
                for predicate in visible:
                    if predicate(event):
                        selected.append(i)
                        break
            visibility = set(selected)
        if debug_enable(DEBUG_LEXER):
            announce("%s <- eval_visibility(), left = %s" % (visibility, repr(self.line)))
        return visibility
    def eval_polyrange(self):
        "Parse a polyrange specification (list of intervals)."
        if debug_enable(DEBUG_LEXER):
            announce("eval_polyrange(%s)" % self.line)
        self.line = self.line.lstrip()
        polyrange_initials = (":","0","1","2","3","4","5","6","7","8","9","$", "<")
        if not self.peek() in polyrange_initials:
            polyrange = None
        else:
            selection = []
            while self.peek() in polyrange_initials + (".", ","):
                # First, literal command numbers (1-origin)
                match = re.match("[0-9]+", self.line)
                if match:
                    number = match.group()
                    selection.append(int(number)-1)
                    self.line = self.line[len(number):]
                    continue
                # Next, mark references
                match = re.match(":[0-9]+", self.line)
                if match:
                    markref = match.group()
                    self.line = self.line[len(markref):]
                    for (i, event) in enumerate(self.chosen()):
                        if hasattr(event, "mark") and event.mark == markref:
                            selection.append(i)
                            break
                        elif hasattr(event, "committish") and event.committish == markref:
                            selection.append(i)
                            break
                    else:
                        raise Recoverable("mark %s not found." % markref)
                    continue
                elif self.peek() == ':':
                    raise Recoverable("malformed mark")
                # $ means last commit, a la ed(1).
                if self.peek() == "$":
                    selection.append(len(self.chosen())-1)
                    self.pop()
                    continue
                # Comma just delimits a location spec
                if self.peek() == ",":
                    self.pop()
                    continue
                # Following ".." means a span
                if self.line[:2] == "..":
                    if selection:
                        selection.append("..")
                        self.line = self.line[2:]
                        continue
                    else:
                        raise Recoverable("start of span is missing")
                if self.peek() == "<":
                    self.pop()
                    closer = self.line.find('>')
                    if closer == -1:
                        raise Recoverable("reference improperly terminated. '%s'" % self.line)
                    ref = self.line[:closer]
                    self.line = self.line[closer+1:]
                    matched = False
                    # First, search tags
                    for (i, event) in enumerate(self.chosen()):
                        if isinstance(event, Tag) and event.name == ref:
                            matched = True
                            selection.append(i)
                            break
                    # Next, search branches
                    if not matched:
                        branchlist = self.chosen().branchlist()
                        branchlist.sort(key=len, reverse=True) # longest name first
                        for symbol in branchlist:
                            if ref == os.path.basename(symbol):
                                loc = None
                                # Find the last commit with this branchname
                                for (i, event) in enumerate(self.chosen()):
                                    if isinstance(event, Commit):
                                        if event.branch == symbol:
                                            loc = i
                                if loc is None:
                                    raise Recoverable("branch name %s points to hyperspace" % symbol)
                                else:
                                    matched = True
                                    selection.append(loc)
                    # Next, fossil-ID references
                    if not matched:
                        for (i, event) in enumerate(self.chosen()):
                            if hasattr(event, "fossil_id") and event.fossil_id == ref:
                                selection.append(i)
                                matched = True
                                break
                    # Might be a date or action stamp
                    date = None
                    bang = ref.find('!')
                    date_end = len(ref)
                    if bang >= 0:
                        date_end = min(bang, date_end)
                    try:
                        date = Date(ref[:date_end])
                    except Fatal:
                        date = None
                    email_id = None
                    if date is not None and bang > -1:
                        email_id = ref[bang+1:]
                    matches = []
                    if date:
                        for (ei, event) in enumerate(self.chosen().events):
                            if hasattr(event, 'committer'):
                                if event.committer.date != date:
                                    continue
                                if email_id and event.committer.email != email_id:
                                    continue
                                else:
                                    matches.append(ei)
                            elif hasattr(event, 'tagger'):
                                if event.tagger.date != date:
                                    continue
                                elif email_id and event.tagger.email!=email_id:
                                    continue
                                else:
                                    matches.append(ei)
                        if len(matches) < 1:
                            raise Recoverable("no events match %s" % ref)
                        elif len(matches) > 1:
                            raise Recoverable("multiple events match %s" % ref)
                        else:
                            selection.append(matches[0])
                            matched = True
                    if not matched:
                        raise Recoverable("couldn't match a name at <%s>" % ref)
            if debug_enable(DEBUG_LEXER):
                announce("location list is %s with %s left" % (selection, repr(self.line)))
            # Resolve spans
            resolved = []
            spanning = last = 0
            for elt in selection:
                if elt == '..':
                    spanning = True
                else:
                    if spanning:
                        resolved += list(range(last+1, elt+1))
                        spanning = False
                    else:
                        resolved.append(elt)
                    last = elt
            selection = resolved
            if debug_enable(DEBUG_LEXER):
                announce("resolved list is %s with %s left" % (selection, repr(self.line)))
            # Sanity checks
            if spanning:
                raise Recoverable("incomplete range expression.")
            for elt in selection:
                if elt < 0 or elt > len(self.chosen())-1:
                    raise Recoverable("event number %s out of range" % (elt+1))
            polyrange = set(selection)
        if debug_enable(DEBUG_LEXER):
            announce("%s <- eval_polyrange(), left = %s" % (polyrange, repr(self.line)))
        return polyrange
    def eval_textsearch(self):
        "Parse a text search specification."
        if debug_enable(DEBUG_LEXER):
            announce("eval_textsearch(%s)" % self.line)
        self.line = self.line.lstrip()
        if not self.peek() == '/':
            return None
        elif '/' not in self.line[1:]:
            raise Recoverable("malformed text search specifier")
        else:
            assert(self.pop() == '/')
            endat = self.line.index('/')
            try:
                regex = re.compile(self.line[:endat])
            except re.error:
                raise Recoverable("invalid regular expression")
            self.line = self.line[endat+1:]
            matchers = []
            for (i, e) in enumerate(self.chosen()):
                for searchable in ("author", "branch", "comment",
                                   "committer", "committish", "text",
                                   "tagger", "name"):
                    if hasattr(e, searchable) and regex.search(str(getattr(e, searchable))):
                        matchers.append(i)
                # We don't do blobs because it would be too slow
                # and not very useful.
            if debug_enable(DEBUG_LEXER):
                announce("%s <- eval_textsearch(), left = %s" % (matchers, repr(self.line)))
            return set(matchers)
    def eval_pathset(self):
        "Resolve a path name to the set of commits that refer to it."
        if self.peek() != "[":
            return None
        elif self.line.find("]") == -1:
            raise Recoverable("malformed path wildcard")
        else:
            self.pop()
            i = self.line.find("]")
            path = self.line[:i]
            self.line = self.line[i+1:]
            selection = []
            for (ei, event) in enumerate(self.chosen().events):
                if isinstance(event, Commit):
                    if path in event.paths():
                        selection.append(ei)
            return set(selection)
    def eval_branchset(self):
        "Resolve a branch name to its set of associated events."
        if self.peek() != "(":
            return None
        else:
            self.pop()
            branchlist = self.chosen().branchlist()
            branchlist.sort(key=len, reverse=True) # longest name first
            selection = []
            for symbol in branchlist:
                if self.line.startswith(os.path.basename(symbol)):
                    for (i, event) in enumerate(self.chosen()):
                        if isinstance(event, Reset):
                            if event.ref == symbol:
                                selection.append(i)
                        elif isinstance(event, Commit):
                            if event.branch == symbol:
                                selection.append(i)
                        elif isinstance(event, Tag):
                            ti = self.chosen().find(event.committish)
                            assert(ti is not None)
                            assert(isinstance(self.chosen()[ti], Commit))
                            if self.chosen()[ti].branch == symbol:
                                selection.append(i)
                    self.line = self.line[len(os.path.basename(symbol)):]
                    if self.pop() != ')':
                        raise Recoverable("branch set improperly terminated.")
                    break
            else:
                raise Recoverable("unknown branch name %s" % self.line)
            return set(selection)
    #
    # Helpers
    #
    def report_select(self, line, method, optargs=()):
        "Generate a repository report on all objects with a specified method."
        if not self.chosen():
            complain("no repo has been chosen.")
            return
        default = [x[0] for x in [n_o for n_o in enumerate(self.chosen()) if hasattr(n_o[1], method)]]
        line = self.set_selection_set(line, default)
        with RepoSurgeon.LineParse(line, capabilities=["stdout"]) as parse:
            for i in self.selection:
                event = self.chosen().events[i]
                if hasattr(event, method):
                    summary = getattr(event, method)(*((parse, i,)+optargs))
                    if summary:
                        parse.stdout.write(summary + "\n")
    @staticmethod
    def pop_token(line):
        "Grab a whitespace-delimited token from the front of the line."
        tok = ""
        line = line.lstrip()
        while True:
            if not line or line[0].isspace():
                break
            else:
                tok += line[0]
                line = line[1:]
        line = line.lstrip()
        return (tok, line)
    def edit(self, selection, line):
        # Mailboxize and edit the non-blobs in the selection
        editor = line.strip() or os.getenv("EDITOR")
        if not editor:
            complain("you have not specified an editor and $EDITOR is not set")
            return
        # Special case: user selected a single blob
        if len(self.selection) == 1:
            singleton = self.chosen()[self.selection[0]]
            if isinstance(singleton, Blob):
                def find_successor(event, path):
                    here = []
                    for child in event.children():
                        for fileop in child.fileops:
                            if fileop.op == "M" and fileop.path == path:
                                here.append(child.mark)
                        here += find_successor(child, path)
                    return here 
                for event in self.chosen().commits():
                    for fileop in event.fileops:
                        if fileop.op == 'M' and fileop.ref == singleton.mark:
                            if len(find_successor(event, fileop.path)) > 0:
                                complain("beware: not the last 'M %s' on its branch" % fileop.path)
                            break
                os.system(editor + " " + singleton.blobfile())
                return
            # Fall through
        (tfdesc, tfname) = tempfile.mkstemp()
        assert tfdesc > -1    # pacify pylint
        try:
            with open(tfname, "w") as tfp:
                for i in selection:
                    event = self.chosen()[i]
                    if hasattr(event, "email_out"):
                        tfp.write(event.email_out([], i))
        except IOError:
            raise Recoverable("write of editor tempfile failed")
        if os.system(editor + " " + tfname):
            raise Recoverable("%s returned a failure status" % editor)
        else:
            self.do_mailbox_in("<" + tfname)
        # No try/finally here - we want the tempfile to survive on fatal error
        # because it might have megabytes of metadata edits in it.
        os.remove(tfname)

    def help_selection(self):
        print("""
A quick example-centered reference for selection-set syntax.

First, these ways of constructing singleton sets:

123        event numbered 123 (1-origin)
:345       event with mark 345
<456>      commit with fossil-ID 456 (probably an SVN rev)
<foo>      the tag named 'foo', or failing that the tip commmit of branch foo

You can select commits and tags by date, or by date and committer:

<2011-05-25T07:30:37Z>      specifying the commit date
<2011-05-25T07:30:37Z!esr>  specifying the commit date and committer

More ways to construct event sets:

/foo/      all commits and tags containing the string 'foo' in text or metadata
(foo)      all commits on branch 'foo'.
[foo]      all commits touching the file named 'foo'.
=C         all commits
=H         all head (branch tip) commits
=T         all tags
=B         all blobs
=R         all resets
=P         all passthroughs

You can compose sets as follows:

:123,<foo>     the event marked 123 and the event referenced by 'foo'.
:123..<foo>    the range of events from mark 123 to the reference 'foo'

Sets can be composed with | (union) and & (intersection). | has lower
precedence than &, but set expressions can be grouped with { }.
""")

    def help_syntax(self):
        print("""
All commands begin with a command keyword.  Most take a selection set
immediately following it; see 'help selection' for details.  Some
commands take additional modifier arguments after the selection set.

Most report-generation commands support output redirection. When
arguments for these are parsed, any argument beginning with '>' is
extracted and interpreted as the name of a file to which command
output should be redirected.  Any remaining arguments are available to
the command logic.

Some commands support input redirection. When arguments for these are
parsed, any argument beginning with '<' is extracted and interpreted
as the name of a file from which command output should be taken.  Any
remaining arguments are available to the command logic.
""")
            
    ##
    ## Command implementation begins here
    ##
    #
    # On-line help and instrumentation
    #
    def help_help(self):
        print("Show help for a command. Follow with space and the command name.")
    def help_verbose(self):
        print("""
Without an argument, this command requests a report of the verbosity
level.  'verbose 1' enables progress messages, 'verbose 0' disables
them. Higher levels of verbosity are available but intended for
developers only.
""")
    def do_verbose(self, line):
        global verbose
        if line:
            try:
                verbose = int(line)
            except ValueError:
                complain("verbosity value must be an integer")
        if not line or verbose:
            announce("verbose %d" % verbose)

    def help_quiet(self):
        print("""
Without an argument, this command requests a report of the quiet
boolean; with the argument 'on' or 'off' it is changed.  When quiet is
on, time-varying report fields which would otherwise cause spurious
failures in regression testing are suppressed.
""")
    def do_quiet(self, line):
        global quiet
        if line:
            if line == "on":
                quiet = True
            elif line == "off":
                quiet = False
        if not line:
            announce("quiet %s" % ("on" if quiet else "off"))

    def do_echo(self, line):
        "Set or clear echoing commands before processing."
        try:
            self.echo = int(line)
        except ValueError:
            announce("echo value must be an integer")
        if verbose:
            announce("echo %d" % self.echo)

    def help_resolve(self):
        print("""
Does nothing but resolve a selection-set expression
and report the resulting event-number set to standard
output. Implemented mainly for regression testing, but may be useful
for exploring the selection-set language.
""")
    def do_resolve(self, line):
        "Display the set of event numbers generated by a selection set."
        self.set_selection_set(line)
        if self.selection is None:
            print("No selection")
        elif type(self.selection) == type([]):
            print([x+1 for x in self.selection])
        else:
            complain("resolve didn't expect a selection of %s" % self.selection)

    def help_names(self):
        print("""
List all known symbolic names of branches and tags. Supports > redirection.
""")
    def do_names(self, line):
        with RepoSurgeon.LineParse(line, capabilities=["stdout"]) as parse:
            for branch in self.chosen().branchlist():
                parse.stdout.write("branch %s" % branch)
            for event in self.chosen():
                if isinstance(event, Tag):
                    parse.stdout.write("tag    %s" % event.name)

    def do_script(self, line):
        "Read and execute commands from a named file."
        if not line:
            complain("script requires a file argument")
            return
        try:
            self.callstack.append(line.split())
            with open(self.callstack[-1][0]) as scriptfp:
                while True:
                    scriptline = scriptfp.readline()
                    if not scriptline:
                        break
                    # Simulate shell here-document processing
                    if '<<' not in scriptline:
                        heredoc = None
                    else:
                        (scriptline, terminator) = scriptline.split("<<")
                        heredoc = tempfile.NamedTemporaryFile(mode="w",
                                                              delete=False)
                        while True:
                            nextline = scriptfp.readline()
                            if nextline == '':
                                break
                            elif nextline == terminator:
                                break
                            else:
                                heredoc.write(nextline)
                        heredoc.close()
                        # Note: the command must accept < redirection!
                        scriptline += "<" + heredoc.name
                    # End of heredoc simulation
                    for i in range(len(self.callstack[-1])):
                        scriptline = scriptline.replace('$' + str(i), self.callstack[-1][i])
                    self.onecmd(self.precmd(scriptline))
                    if heredoc:
                        os.remove(heredoc.name)
            self.callstack.pop()
        except IOError as e:
            complain("script failure on '%s': %s" % (line, e))

    def do_history(self, line):
        "Dump your command list from this session so far."
        for line in self.history:
            print(line)

    def do_coverage(self, unused):
        "Display the coverage-case set (developer instrumentation)."
        assert unused is not None   # pacify pylint
        if not self.chosen():
            complain("no repo has been chosen.")
            return
        for (i, e) in enumerate(self.chosen().events):
            if isinstance(e, Commit):
                e.fileop_dump(i)
        coverage = list(self.chosen().case_coverage)
        coverage.sort()
        sys.stdout.write("Case coverage: %s\n" % coverage)

    def help_index(self):
        print("""
Display four columns of info on selected objects: their number, their
type, the associate mark (or '-' if no mark) and a summary field
varying by type.  For a branch or tag it's the reference; for a commit
it's the commit branch; for a blob it's the repository path of the
file in the blob.  Supports > redirection.
""")
    def do_index(self, line):
        "Generate a summary listing of objects."
        if not self.chosen():
            complain("no repo has been chosen.")
            return
        # We could do all this logic using report_select() and index() methods
        # in the objects, but that would have two disadvantages.  First, we'd
        # get a default-set computation we don't want.  Second, for this
        # function it's helpful to have the method strings close together so
        # we can maintain columnation.
        default = [x[0] for x in [n_o1 for n_o1 in enumerate(self.chosen()) if not isinstance(n_o1[1], Blob)]]
        line = self.set_selection_set(line, default)
        with RepoSurgeon.LineParse(line, capabilities=["stdout"]) as parse:
            for i in self.selection:
                event = self.chosen().events[i]
                if isinstance(event, Blob):
                    parse.stdout.write("%6d blob   %6s    %s\n" % (i+1, event.mark,event.path))
                    continue
                if isinstance(event, Commit):
                    parse.stdout.write("%6d commit %6s    %s\n" % (i+1, event.mark or '-', event.branch)) 
                    continue
                if isinstance(event, Tag):
                    parse.stdout.write("%6d tag    %6s    %4s\n" % (i+1, event.committish, repr(event.name),)) 
                    continue
                if isinstance(event, Reset):
                    parse.stdout.write("%6d branch %6s    %s\n" % (i+1, event.committish or '-', event.ref)) 
                    continue
                else:
                    parse.stdout.write("?      -      %s\n" % (event,)) 
    def help_profile(self):
        print("""
Enable profiling. Must be one of the initial command-line arguments, and
gathers statistics only on code executed via '-'.
""")
    def do_profile(self, line):
        "Enable profiling."
        assert line is not None # Pacify pylint
        self.profile_log = line
        announce("profiling enabled.")

    #
    # Information-gathering
    #
    def help_stats(self):
        print("""
Report size statistics and import/export method information of the
currently chosen repository. Supports > redirection.
""")
    def do_stats(self, line):
        "Report information on repositories."
        with RepoSurgeon.LineParse(line, capabilities=["stdout"]) as parse:
            if not parse.line:
                parse.line = self.chosen().name
                if parse.line is None:
                    complain("no repo has been chosen.")
                    return
            for name in parse.tokens():
                repo = self.repo_by_name(name)
                if repo is None:
                    raise Recoverable("no such repo as %s" % name)
                else:
                    def count(otype):
                        return len([x for x in repo.events if isinstance(x,otype)])
                    parse.stdout.write("%s: %.0fK, %d events, %d blobs, %d commits, %d tags, %d resets, %s.\n" % \
                          (repo.name, repo.size() / 1000.0, len(repo),
                           count(Blob), count(Commit), count(Tag), count(Reset),
                           rfc3339(repo.readtime)))
                    if repo.sourcedir:
                        parse.stdout.write("  Loaded from %s\n" % repo.sourcedir)
                    if repo.vcs:
                        parse.stdout.write(repr(repo.vcs) + "\n")

    def help_list(self):
        print("""
Display commits in a human-friendly format; the first column is raw
event numbers, the second a timestamp in local time. If the repository
has fossil IDs, they will be displayed in the third column. The
leading portion of the comment follows. Supports > redirection.
""")
    def do_list(self, line):
        "Generate a human-friendly listing of objects."
        self.report_select(line, "lister", (screenwidth(),))

    def help_tip(self):
        print("""
Display the branch tip names associated with commits in the selection
set.  These will not necessarily be the same as their branch fields
(which will often be tag names if the repo contains either annotated
or lightweight tags).

If a commit is at a branch tip, its tip is its branch name.  If it has
only one child, its tip is the child's tip.  If it has multiple children,
then if there is a child with a matching branch name its tip is the
child's tip.  Otherwise this function throws a recoverable error.

Supports > redirection.
""")
    def do_tip(self, line):
        "Generate a human-friendly listing of objects."
        self.report_select(line, "tip", (screenwidth(),))

    def help_tags(self):
        print("""
Display lightweight tags: two fields, an event number and a tag name.
Supports > redirection.
""")
    def do_tags(self, line):
        "Generate a human-friendly listing of lightweight tags."
        self.report_select(line, "tags", (screenwidth(),))

    #
    # Housekeeping
    #
    def help_prefer(self):
        print("""
Report or set (with argument) the preferred type of repository. With
no arguments, describe capabilities of all supported systems. With
an argument (which must be the name of a supported system) this has
two effects:

First, if there are multiple repositories in a directory you do a read
on, reposurgeon will read the preferred one (otherwise it will
complain that it can't choose among them).

Secondly, if there is a selected repo, this will change its type.
This means that you do a write to a directory, it will build a repo of
the preferred type rather than its original type (if it had one).

If no preferred type has been explicitly selected, reading in a
repository (but not a fast-import stream) will implicitly set it
to the type of that repository.
""")
    def do_prefer(self, line):
        "Report or select the preferred repository type."
        if not line:
            for vcs in vcstypes:
                print(vcs)
            if [ext for ext in extractors if ext.visible]:
                print("Other systems supported for read only: %s\n" \
                      % " ".join(ext.name for ext in extractors if ext.visible))
        else:
            for repotype in vcstypes + extractors:
                if line.lower() == repotype.name:
                    self.preferred = repotype
                    if self.chosen():
                        self.chosen().vcs = self.preferred
                    break
            else:
                complain("known types are %s." % " ".join([x.name for x in vcstypes] + [x.name for x in extractors if x.visible]))
        if verbose:
            if not self.preferred:
                print("No preferred type has been set.")
            else:
                print("%s is the preferred type." % self.preferred.name)

    def help_choose(self):
        print("""
Choose a named repo on which to operate.  The name of a repo is
normally the basename of the directory or file it was loaded from, but
repos loaded from standard input are 'unnamed'. The program will add
a disambiguating suffix if there have been multiple reads from the
same source.

With no argument, lists the names of the currently stored repositories
and their load times.  The second column is '*' for the currently selected
repository, '-' for others.
""")
    def do_choose(self, line):
        "Choose a named repo on which to operate."
        if not self.repolist:
            if verbose > 0:
                complain("no repositories are loaded.")
                return
        self.repolist.sort(key=lambda x: x.name)
        if not line:
            for repo in self.repolist:
                status =  '-'
                if self.chosen() and repo == self.chosen():
                    status = '*'
                if not quiet:
                    sys.stdout.write(rfc3339(repo.readtime) + " ")
                sys.stdout.write("%s %s\n" % (status, repo.name))
        else:
            if line in self.reponames():
                self.choose(self.repo_by_name(line))
                if verbose:
                    self.do_stats(line)
            else:
                complain("no such repo as %s" % line)

    def help_drop(self):
        print("""
Drop a repo named by the argument from reposurgeon's list, freeing the memory
used for its metadata and deleting on-disk blobs. With no argument, drops the
currently chosen repo.
""")
    def do_drop(self, line):
        "Drop a repo from reposurgeon's list."
        if not self.reponames():
            if verbose:
                complain("no repositories are loaded.")
                return
        if not line:
            line = self.chosen().name
        if line in self.reponames():
            if line == self.chosen().name:
                self.unchoose()
            holdrepo = self.repo_by_name(line)
            holdrepo.cleanup()
            self.remove_by_name(line)
            del holdrepo
        else:
            complain("no such repo as %s" % line)
        if verbose:
            # Emit listing of remaining repos
            self.do_choose('')

    def help_rename(self):
        print("""
Rename the currently chosen repo; requires an argument.  Won't do it
if there is already one by the new name.
""")
    def do_rename(self, line):
        "Rename a repository."
        if line in self.reponames():
            complain("there is already a repo named %s." % line)
        elif not self.chosen():
            complain("no repository is currently chosen.")
        else:
            self.chosen().rename(line)

    def help_preserve(self):
        print("""
Add (presumably untracked) files or directories to the repo's list of
paths to be restored from the backup directory after a rebuild. Each
argument, if any, is interpreted as a pathname.  The current preserve
list is displayed afterwards.
""")
    def do_preserve(self, line):
        "Add files and subdirectories to the preserve set."
        for filename in line.split():
            self.chosen().preserve(filename)
        announce("preserving %s." % list(self.chosen().preservable()))

    def help_unpreserve(self):
        print("""
Remove (presumably untracked) files or directories to the repo's list
of paths to be restored from the backup directory after a
rebuild. Each argument, if any, is interpreted as a pathname.  The
current preserve list is displayed afterwards.
""")
    def do_unpreserve(self, line):
        "Remove files and subdirectories from the preserve set."
        for filename in line.split():
            self.chosen().unpreserve(filename)
        announce("preserving %s." % list(self.chosen().preservable()))

    #
    # Serialization and de-serialization.
    #
    def help_read(self):
        print("""
A read command with no arguments is treated as 'read .', operating on the
current directory.
 
With a directory-name argument, this command attempts to read in the
contents of a repository in any supported version-control system under
that directory.

If the argument is the name of a plain file, it will be read in as a
fast-import stream or Subversion dump, whichever it is.

With an argument of '-', this command reads a fast-import stream or
Subversion dump from standard input (this will be useful in filters
constructed with command-line arguments).
""")
    def do_read(self, line):
        "Read in a repository for surgery."
        if line:
            line = os.path.expanduser(line)
        if not line or line == '.':
            line = os.getcwd()
        repo = read_repo(line, self.preferred)
        self.repolist.append(repo)
        self.choose(repo)
        if self.chosen():
            if self.chosen().vcs:
                self.preferred = self.chosen().vcs
            name = self.uniquify(os.path.basename(self.chosen().sourcedir or line or "unnamed"))
            self.chosen().rename(name)
        if verbose:
            self.do_choose('')

    def help_write(self):
        print("""
Dump a fast-import stream representing selected events to standard
output (if second argument is empty or '-') or a file. Property
extensions will be omitted if the importer for the selected repo cannot
digest them. Fails if the argument exists and is a directory or
anything other than a plain file. The default selection is all events.
""")
    def do_write(self, line):
        "Stream out the results of repo surgery."
        if self.chosen() is None:
            complain("no repo has been chosen.")
            return
        line = self.set_selection_set(line, list(range(len(self.chosen()))))
        # Backward-compatibility hack.  Will have to be removed if we
        # ever define modifiers for this command.
        if line and not line.startswith(">"):
            line =  ">" + line
        with RepoSurgeon.LineParse(line, capabilities=["stdout"]) as parse:
            self.chosen().fast_export(self.selection, parse.stdout, progress=(verbose==1 and not quiet), target=self.preferred)

    def help_inspect(self):
        print("""
Dump a fast-import stream representing selected events to standard output.
Just like a write, except (1) the progress meter is disabled, and (2) there
is an identifying header before each event dump.  Supports > redirection.
""")
    def do_inspect(self, line):
        "Dump raw events."
        if self.chosen() is None:
            complain("no repo has been chosen.")
            return
        line = self.set_selection_set(line, list(range(len(self.chosen()))))
        with RepoSurgeon.LineParse(line, capabilities=["stdout"]) as parse:
            for ei in self.selection:
                event = self.chosen().events[ei]
                header = "Event %s, " % repr(ei+1)
                header = header[:-2]
                header += " " + ((72 - len(header)) * "=") + "\n"
                parse.stdout.write(header)
                if isinstance(event, Commit):
                    parse.stdout.write(event.dump())
                else:
                    parse.stdout.write(str(event))

    def help_rebuild(self):
        print("""
Rebuild a repository from the state held by reposurgeon.  The argument
specifies the target directory in which to do the rebuild; if the
repository read was from a repo directory (and not a git-import stream), it
defaults to that directory.  If the target directory is nonempty
its contents are backed up to a save directory.
""")
    def do_rebuild(self, line):
        "Rebuild a repository from the edited state."
        if self.chosen() is None:
            complain("no repo has been chosen.")
            return
        rebuild_repo(self.chosen(), line, self.preferred)

    #
    # Editing commands
    #
    def help_mailbox_out(self):
        print("""
Emit a mailbox file of messages in RFC822 format representing the
contents of repository metadata. Takes a selection set; members of the set
other than commits, annotated tags, and passthroughs are ignored (that
is, presently, blobs and resets). Supports > redirection.
""")
    def do_mailbox_out(self, line):
        "Generate a mailbox file representing object metadata."
        self.report_select(line, "email_out")

    def help_mailbox_in(self):
        print("""
Accept on standard input a mailbox file of messages in RFC822 format
representing the contents of the metadata in selected commits and
annotated tags. Takes no selection set. Takes < redirection.

Users should be aware that modifying an Event-Number field will change
which event the update from that message is applied to.  This is
unlikely to have good results.

If the Event-Number field is absent, the mailbox_in logic will
attempt to match the commit or tag first by Fossil-ID, then by a unique
committer ID and timestamp pair.

If output is redirected and the modifier 'changed' appears, a minimal
set of modifications actually made is written to the output file.
""")
    def do_mailbox_in(self, line):
        "Accept a mailbox file representing object metadata and update from it."
        with RepoSurgeon.LineParse(line, capabilities=["stdin","stdout"]) as parse:
            update_list = []
            while True:
                msg = RepoSurgeonEmail.readmsg(parse.stdin)
                if not msg:
                    break
                update_list.append(email.message_from_string(msg))
        # First, a validation pass
        attribution_map = {}
        attribution_counts = collections.Counter()
        for commit in self.chosen().commits():
            stamp = commit.committer.action_stamp()
            attribution_map[stamp] = commit
            attribution_counts[stamp] += 1
        for event in self.chosen().events:
            if isinstance(event, Tag):
                stamp = event.tagger.action_stamp()
                attribution_map[stamp] = event
                attribution_counts[stamp] += 1
        fossil_map = {}
        for commit in self.chosen().commits():
            if commit.fossil_id:
                fossil_map[commit.fossil_id] = commit
        events = []
        errors = 0
        for (i, message) in enumerate(update_list):
            if "Event-Number" in message:
                try:
                    eventnum = int(message["Event-Number"]) - 1
                except ValueError:
                    complain("event number garbled in update %d" % (i+1,))
                    errors += 1
                if eventnum < 0 or eventnum >= len(self.chosen()):
                    complain("event number %d out of range in update %d" \
                                      % (eventnum, i+1))
                    errors += 1
                event = self.chosen()[eventnum]
            elif "Fossil-ID" in message:
                try:
                    event = fossil_map[message["Fossil-ID"]]
                except KeyError:
                    complain("no commit matches fossil %s" \
                                      % message["Fossil-ID"])
                    errors += 1
            elif "Committer" in message and "Committer-Date" in message:
                blank = Commit()
                blank.committer = Attribution()
                blank.email_in(message)
                stamp = blank.committer.action_stamp()
                try:
                    event = attribution_map[stamp]
                except KeyError:
                    complain("no commit matches stamp %s" % stamp)
                    errors += 1
                if attribution_counts[stamp] > 1:
                    complain("multiple events match %s" % stamp)
                    errors += 1
            elif "Tagger" in message and "Tagger-Date" in message:
                blank = Tag()
                blank.tagger = Attribution()
                blank.email_in(message)
                stamp = blank.tagger.action_stamp()
                try:
                    event = attribution_map[stamp]
                except KeyError:
                    complain("no tag matches stamp %s" % stamp)
                    errors += 1
                if attribution_counts[stamp] > 1:
                    complain("multiple events match %s" % stamp)
                    errors += 1
            else:
                complain("no commit matches update %d:\n%s" % (i+1, message))
                errors += 1
            if not hasattr(event, "email_in"):
                complain("event %d cannot be modified"%(eventnum+1,))
                errors += 1
            events.append(event)
        if errors > 0:
            raise Recoverable("%d errors in metadata updates" % errors)
        # Now apply the updates
        changers = []
        for (event, update) in zip(events, update_list):
            if event.email_in(update):
                changers.append(update)
        if verbose:
            if not changers:
                announce("no events modified.")
            else:
                announce("%d events modified." % len(changers))
        if parse.stdout != sys.stdout:
            if "changed" in parse.line:
                for update in changers:
                    parse.stdout.write(RepoSurgeonEmail.Divider + "\n" + update.as_string(unixfrom=False))

    def help_edit(self):
        print("""
Report the selection set of events to a tempfile as mailbox_out does,
call an editor on it, and update from the result as mailbox_in does.
If you do not specify an editor name as second argument, it will be
taken from the $EDITOR variable in your environment.

Normally this command ignores blobs because mailbox_out does.
However, if you specify a selection set consisting of a single
blob, your editor will be called on the blob file.

The modifier 'multiline' will trim the selection set to commits that
are multiline and not in summary/blank-line/details form.
""")
    def do_edit(self, line):
        "Edit metadata interactively."
        if not self.chosen():
            complain("no repo is loaded")
            return
        default = [x[0] for x in [n_o2 for n_o2 in enumerate(self.chosen()) if hasattr(n_o2[1], "email_out")]]
        rest = self.set_selection_set(line, default)
        if "multiline" in rest:
            rest = rest.replace("multiline", "")
            mr = re.compile("[^\n]*\n[^\n]")
            filtered = []
            for ei in self.selection:
                event = self.chosen().events[ei]
                if isinstance(event, Commit) and mr.match(event.comment):
                    filtered.append(ei)
            self.selection = filtered
        self.edit(self.selection, rest)

    def help_delete(self):
        print("""
Delete a selection set of commits (and their associated blobs, if
any).  The default selection set for this command is empty.  Tags
pointing at the commits are also removed.

Note that applying this command to a commit with a modify operation
will *not* necessarily remove changes made by that commit from later
versions.  It will have the effect of retracting the modifications
only when they are the final ones on the commit's branch.
""")
    def do_delete(self, line):
        "Delete events in the specified selection set."
        if not self.chosen():
            complain("no repo is loaded")
            return
        line = self.set_selection_set(line, [])
        line = str(line)   # pacify pylint by forcing string type
        if line:
            for token in line.split():
                if token not in ["complain",
                                 "coalesce",
                                 "obliterate",
                                 "pushback",
                                 "tagback",
                                 "tagforward",
                                 "quiet"]:
                    complain("no such deletion modifier as " + token)
                    return
        self.chosen().delete(self.selection, self.line)

    def help_coalesce(self):
        print("""
Scan the selection set for runs of commits with identical
comments close to each other in time (this is a common form of scar
tissues in repository up-conversions from older file-oriented
version-control systems).  Merge these cliques by deleting all but the
last commit, in order.

The optional second argument, if present, is a maximum time
separation in seconds; the default is 90 seconds.
""")
    def do_coalesce(self, line):
        "Coalesce events in the specified selection set."
        if not self.chosen():
            complain("no repo is loaded")
            return
        line = self.set_selection_set(line, [])
        if not line:
            timefuzz = 90
        else:
            try:
                timefuzz = int(line)
            except ValueError:
                raise Recoverable("time-fuzz value must be an integer")
        eligible = []
        # This is a crude search that ignores the repo graph structure;
        # properly speaking we should be chasing child links.  Screw
        # it; this operation only make sense for cleaning up
        # artifacts in linear stretches of history that have been
        # lifted from file-oriented VCSes like RCS and CVS.
        commits = [i for i in enumerate(self.chosen()) if isinstance(i[1], Commit)]
        for i in range(len(commits)-1):
            cthis = self.chosen().events[commits[i][0]]
            cnext = self.chosen().events[commits[i+1][0]]
            if not (isinstance(cthis, Commit) and isinstance(cnext, Commit)):
                continue
            elif cthis.branch != cnext.branch:
                continue
            elif cthis.comment != cnext.comment:
                continue
            #elif cthis.committer.email != cnext.committer.email:
            #    continue
            elif cthis.committer.date.delta(cnext.committer.date) < timefuzz:
                eligible.append(commits[i][0])
        if verbose:
            announce("deletion set is %s" % [x+1 for x in eligible])
        self.chosen().delete(eligible, "coalesce")

    def help_renumber(self):
        print("""
Renumber the marks in a repository, from :1 up to <n> where <n> is the
count of the last mark. Just in case an importer ever cares about mark
ordering or gaps in the sequence.
""")
    def do_renumber(self, unused):
        "Renumber the marks in the selected repo."
        assert unused is not None    # pacify pylint
        if self.chosen() is None:
            complain("no repo has been chosen.")
            return
        self.repo.renumber()

    def help_timeoffset(self):
        print("""
Apply a time offset to all time/date stamps in the selected set.  An offset
argument is required; it may be in the form [+-]ss, [+-]mm:ss or [+-]hh:mm:ss.
The leading sign is required to distingush it from a selection expression.

Optionally you may also specify another argument in the form [+-]hhmm, a
timeone literal to apply.  To apply a timezone without an offset, use
an offset literal of +0 or -0.
""")
    def do_timeoffset(self, line):
        "Apply a time offset to all dates in selected events."
        if self.chosen() is None:
            complain("no repo has been chosen.")
            return
        line = self.set_selection_set(line, list(range(len(self.chosen()))))
        if not line:
            complain("a signed time offset argument is required.")
            return
        elif not line[0] in ('-', '+'):
            complain("time offset argument must begin with + or -.")
            return
        line = str(line)   # pacify pylint by forcing string type
        args = line.split()
        h = m = "0"
        if args[0].count(":") == 0:
            s = args[0]
        elif args[0].count(":") == 1:
            (m, s) = args[0].split(":")
        elif args[0].count(":") == 2:
            (h, m, s) = args[0].split(":")
        else:
            complain("too many colons")
            return
        try:
            offset = int(h)*360 + int(m)*60 + int(s)
        except ValueError:
            complain("expected numeric literals in date format")
            return
        if len(args) > 1:
            if not re.match("[+-][0-9][0-9][0-9][0-9]", args[1]):
                complain("expected timezone literal to be [+-]hhmm")
        for ei in self.selection:
            event = self.chosen()[ei]
            if isinstance(event, Tag):
                if event.tagger:
                    event.tagger.date.timestamp += offset
                    if len(args) > 1:
                        event.tagger.date.timezone = args[1]
            elif isinstance(event, Commit):
                event.committer.date.timestamp += offset
                if len(args) > 1:
                    event.committer.date.timezone = args[1]
                for author in event.authors:
                    author.date.timestamp += offset
                    if len(args) > 1:
                        author.date.timezone = args[1]

    def help_divide(self):
        print("""
Attempt to partition a repo by cutting the parent-child link
between two specified commits (they must be adjacent). Does not take a
general selection-set argument.  It is only necessary to specify the
parent commit, unless it has multiple children in which case the child
commit must follow (separate it with a comma).

If the repo was named 'foo', you will normally end up with two repos
named 'foo-early' and 'foo-late'.  But if the commit graph would
remain connected through another path after the cut, the behavior
changes.  In this case, if the parent and child were on the same
branch 'qux', the branch segments are renamed 'qux-early' and
'qux-late'.
""")
    def do_divide(self, line):
        "Attempt to topologically partition the repo."
        if self.chosen() is None:
            complain("no repo has been chosen.")
            return
        line = self.set_selection_set(line, [])
        if len(self.selection) == 0:
            complain("one or possibly two arguments specifying a link are required")
            return
        early = self.chosen()[self.selection[0]]
        possibles = early.children() 
        if len(self.selection) == 1:    
            if len(possibles) > 1:
                complain("commit has multiple children, one must be specified")
                return
            elif len(possibles) == 1:
                possibles = early.children()
                late = possibles[0]
            else:
                complain("parent has no children")
                return
        elif len(self.selection) == 2:
            late = self.chosen()[self.selection[1]]
            if early.mark not in late.parentMarks():
                complain("not a parent-child pair")
                return
        elif len(self.selection) > 2:
            complain("too many arguments")
        assert(early and late)
        # Try the topological cut first
        if not self.cut(early, late):
            # If that failed, cut anyway and rename the branch segments
            late.removeParent(early.mark)
            if early.branch != late.branch:
                announce("no branch renames were required")
            else:
                basename = early.branch
                announce("%s has been split into %s-early and %s-late" \
                         % (basename, basename, basename))
                for (i, event) in enumerate(self.chosen().events):
                    if hasattr(event, "branch") and event.branch == basename:
                        if i <= self.selection[0]:
                            event.branch += "-early"
                        else:
                            event.branch += "-late"
        if verbose:
            self.do_choose("")

    def help_expunge(self):
        print("""
Expunge files from the selected portion of the repo history; the
default is the entire history.  The arguments to this command may be
paths or Python regular expressions matching paths.

All filemodify (M) operations and delete (D) operations involving a
matched file in the selected set of events are disconnected from the
repo and put in a removal set.  Renames are followed as the tool walks
forward in the selection set; each triggers a warning message. If a
selected file is a copy (C) target, the copy will be deleted and a
warning message issued. If a selected file is a copy source, the copy
target will be added to the list of paths to be deleted and a warning
issued.

After file expunges have been performed, any commits with no
remaining file operations will be deleted, and any tags pointing to
them. Commits with deleted fileops pointing both in and outside the
path set are not deleted, but are cloned into the removal set.

The removal set is not discarded. It is assembled into a new
repository named after the old one with the suffix "-expunges" added.
Thus, this command can be used to carve a repository into sections by
file path matches.
""")
    def do_expunge(self, line):
        "Expunge files from the chosen repository."
        if self.chosen() is None:
            complain("no repo has been chosen.")
            return
        line = self.set_selection_set(line, list(range(len(self.chosen()))))
        line = str(line)   # pacify pylint by forcing string type
        self.expunge(self.selection, line.split())

    def help_split(self):
        print("""
Split a specified commit in two, the opposite of coalesce.

    split N at M
    split N by PREFIX

The first argument is required to be a commit location; the second is
a preposition which indicates which splitting method to use. If the
preposition is 'at', then the third argument must be an integer
1-origin index of a file operation within the commit. If it is 'in',
then the third argument must be a pathname to be matched.

The commit is copied and inserted into a new position in the
event sequence, immediately following itself; the duplicate becomes
the child of the original, and replaces it as parent of the original's
children. Commit metadata is duplicated; the mark of the new commit is
then changed, with 'bis' added as a suffix.

Finally, some file operations - starting at the one matched or indexed
by the split argument - are moved forward from the original commit
into the new one.  Legal indices are 2-n, where n is the number of
file operations in the original commit.
""")
    def do_split(self, line):
        "Split a commit."
        if self.chosen() is None:
            raise Recoverable("no repo has been chosen.")
        line = self.set_selection_set(line, [])
        if len(self.selection) != 1:
            raise Recoverable("selection of a single commit required for this command")
        where = self.selection[0]
        event = self.chosen()[where]
        if not isinstance(event, Commit):
            raise Recoverable("fileop argument doesn't point at a commit")
        line = str(line)   # pacify pylint by forcing string type
        (prep, obj) = line.split()
        if prep == 'at':
            try:
                splitpoint = int(obj) - 1
                if splitpoint not in list(range(1, len(event.fileops))):
                    raise Recoverable("fileop index out of range")
                self.chosen().split_commit_by_index(where, splitpoint)
            except ValueError:
                raise Recoverable("expected integer fileop index (1-origin)")
        elif prep == 'in':
            split = self.chosen().split_commit_by_prefix(where, obj)
            if not split:
                raise Recoverable("couldn't find '%s' in a fileop path." \
                                  % obj)
        else:
            raise Recoverable("don't know what to do for preposition %s" % prep)
        if verbose:
            self.do_inspect(repr(where+1) + "," + repr(where+2))

    def help_unite(self):
        print("""
Unite repositories. Name any number of loaded repositories; they will
be united into one union repo and removed from the load list.  The
union repo will be selected.

Before merging, the repos will be sorted by date of first commit.  The
oldest will keep all its branch and tag names unchanged (this rule is
followed so there will always be a defined default branch).  All others
will have their branch and tag names suffixed with their load name.
Marks will be renumbered.

The name of the new repo will be the names of all parts concatenated,
separated by '+'. It will have no source directory or preferred system
type.
""")
    def do_unite(self, line):
        "Unite repos together."
        self.unchoose()
        factors = []
        for name in line.split():
            repo = self.repo_by_name(name)
            if repo is None:
                raise Recoverable("no such repo as %s" % name)
            else:
                factors.append(repo)
        if not factors or len(factors) < 2: 
            raise Recoverable("unite requires repo name arguments")
        self.unite(factors)
        if verbose:
            self.do_choose('')

    def help_graft(self):
        print("""
For when unite doesn't give you enough control.  The selection set
must be of size 1, identifying a single commit in the currently
selected repo.  A following argument must be a repository name.
Labels and branches in the named repo are prefixed with its name; then
it is grafted to the selected one. Its root becomes a child of the
specified commit.  Finally the named repo is removed from the load
list.
""")
    def do_graft(self, line):
        "Graft a named repo onto the selected one."
        if self.chosen() is None:
            complain("no repo has been chosen.")
            return
        line = self.set_selection_set(line, [])
        if len(self.selection) == 1:
            graft_point = self.selection[0]
        else:
            raise Recoverable("a singleton selection set is required.")
        if not self.repolist:
            raise Recoverable("no repositories are loaded.")
        if line in self.reponames():
            graft_repo = self.repo_by_name(line)
        else:
            raise Recoverable("no such repo as %s" % line)
        # OK, we've got the two repos and the graft point.  Do it.
        self.chosen().graft(graft_repo, graft_point)
        self.remove_by_name(graft_repo.name)

    def help_paths(self):
        print("""
Without a modifier, list all paths touched by fileops in
the selection set (which defaults to the entire repo). This
variant does > redirection.

With the 'sub' modifier, take a second argument that is a directory
name and prepend it to every path. With the 'sup' modifier, strip the
first directory component from every path.
""" )
    def do_paths(self, line):
        if self.chosen() is None:
            complain("no repo has been chosen.")
            return
        rest = self.set_selection_set(line, list(range(len(self.chosen()))))
        if not rest:
            with RepoSurgeon.LineParse(line, capabilities=["stdout"]) as parse:
                allpaths = set([])
                self.chosen().path_walk(self.selection,
                                      lambda f: allpaths.add(f) or f)
                allpaths = list(allpaths)
                allpaths.sort()
                parse.stdout.write("\n".join(allpaths) + "\n")
                return
        fields = rest.split()
        if fields[0] == "sub":
            prefix = fields[1]
            modified = self.chosen().path_walk(self.selection,
                                               lambda f: os.path.join(prefix,f))
            print "\n".join(modified)
        elif fields[0] == "sup":
            try:
                modified = self.chosen().path_walk(self.selection,
                                               lambda f: f[f.find(os.sep)+1:])
                print "\n".join(modified)
            except IndexError:
                raise Recoverable("no / in sup path.")

    def help_merge(self):
        print("""
Create a merge link. Takes a selection set argument, ignoring all but
the lowest (source) and highest (target) members.  Creates a merge link
from the highest member (child) to the lowest (parent).
""" )
    def do_merge(self, line):
        if self.chosen() is None:
            complain("no repo has been chosen.")
            return
        repo = self.chosen()
        self.set_selection_set(line)
        if not len(self.selection):
            raise Recoverable("merge requires a nonempty selection set.")
        self.selection = list(self.selection)
        self.selection.sort()
        self.selection = [e for e in self.selection \
                          if isinstance(repo.events[e], Commit)]
        if not self.selection or len(self.selection) < 2:
            raise Recoverable("merge requires two commit arguments.")
        earlier = repo.events[list(self.selection)[0]]
        later = repo.events[list(self.selection)[-1]]
        later.addParent(earlier.mark)
        #earlier_id = "%s (%s)" % (earlier.mark, earlier.branch)
        #later_id = "%s (%s)" % (later.mark, later.branch)
        #announce("%s added as a parent of %s" % (earlier_id, later_id))

    def help_branch(self):
        print("""
Rename or delete a branch (and any associated resets).  First argument
must be an existing branch name; second argument must one of the verbs
'rename' or 'delete'.

For a 'rename', the third argument may be any token that is a syntactically
valid branch name (but not the name of an existing branch). For a 'delete',
no third argument is required.

For either name, if it does not contain a '/' the prefix 'refs/heads'
is prepended.
""")
    def do_branch(self, line):
        "Rename a branch or delete it."
        if self.chosen() is None:
            complain("no repo has been chosen.")
            return
        repo = self.chosen()
        (branchname, line) = RepoSurgeon.pop_token(line)
        if not "/" in branchname:
            branchname = 'refs/heads/' + branchname
        if branchname not in repo.branchlist():
            raise Recoverable("no such branch as %s" % branchname)
        (verb, line) = RepoSurgeon.pop_token(line)
        if verb == "rename":
            (newname, line) = RepoSurgeon.pop_token(line)
            if not newname:
                raise Recoverable("new branch name must be nonempty.")
            if not "/" in newname:
                newname = 'refs/heads/' + newname
            if newname in repo.branchlist():
                raise Recoverable("there is already a branch named '%s'." \
                                  % newname)
            for event in repo:
                if isinstance(event, Commit):
                    if event.branch == branchname:
                        event.branch = newname
                elif isinstance(event, Reset):
                    if event.ref == branchname:
                        event.ref = newname
        elif verb == "delete":
            repo.delete([i for i in range(len(repo.events)) if
                         (isinstance(repo.events[i], Reset) and repo.events[i].ref == branchname) \
                         or \
                         (isinstance(repo.events[i], Commit) and repo.events[i].branch == branchname)],
                        ["obliterate", "quiet"])
            repo.declare_sequence_mutation()
        else:
            raise Recoverable("unknown verb '%s' in branch command.")

    def help_tag(self):
        print("""
Move, rename, or delete a tag.  First argument must be an
existing tag name; second argument must be one of the verbs 'move',
'rename', or 'delete'.

For a 'move', a third argument must be a singleton selection set. For
a 'rename', the third argument may be any token token that is a
syntactically valid tag name (but not the name of an existing
tag). For a 'delete', no third argument is required.
""")
    def do_tag(self, line):
        "Move a tag to point to a specified commit, or rename it, or delete it."
        if self.chosen() is None:
            complain("no repo has been chosen.")
            return
        repo = self.chosen()
        (tagname, line) = RepoSurgeon.pop_token(line)
        for event in repo.events:
            if isinstance(event, Tag) and event.name == tagname:
                tag = event
                break
        else:
            raise Recoverable("no such tag as %s" % tagname)
        (verb, line) = RepoSurgeon.pop_token(line)
        if verb == "move":
            self.set_selection_set(line)
            if len(self.selection) != 1:
                raise Recoverable("tag move requires a singleton set.")
            else:
                target = self.selection.pop(0)
            if not isinstance(repo.events[target], Commit):
                raise Recoverable("move target must be a commit.")
            tag.committish = repo.events[target].mark
        elif verb == "rename":
            (newname, line) = RepoSurgeon.pop_token(line)
            if not newname:
                raise Recoverable("new tag name must be nonempty.")
            tag.name = newname
        elif verb == "delete":
            repo.events.remove(tag)
            repo.declare_sequence_mutation()
        else:
            raise Recoverable("unknown verb '%s' in tag command.")

    #
    # Artifact Removal
    #
    def help_authors(self):
        print("""
Apply or dump author-map information for the specified selection
set, defaulting to all events. 

Lifts from CVS and Subversion may have only usernames local to
the repository host in committer and author IDs. DVCSes want email
addresses (net-wide identifiers) and complete names. To supply the map
from oune to the other, an authors file is expected to consist of
lines each beginning with a local user ID, followed by a '=' (possibly
surrounded by whitespace) followed by a full name and email address.

When an authors file is applied, email addresses in committer and author
metdata for which the local ID matches between &lt; and @ are replaced
according to the mapping (this handles git-svn lifts). Alternatively,
if the local ID is the entire address, this is also considered a match
(this handles what git-cvsimport and cvs2git do) 

With the 'read' modifier, or no modifier, apply author mapping data
(from standard input or a <-redirected input file).  May be useful if
you are editing a repo or dump created by cvs2git or by git-svn
invoked without -A.

With the 'write' modifier, write a mapping file that could be
interpreted by 'authors read', with entries for each unique committer,
author, and tagger (to standard output or a >-redirected file). This
may be helpful as a start on building an authors file, though each
part to the right of an equals sign will need editing.
""")
    def do_authors(self, line):
        "Apply or dump author-mapping file."
        if self.chosen() is None:
            complain("no repo has been chosen.")
            return
        line = self.set_selection_set(line, list(range(len(self.chosen()))))
        if line.startswith("write"):
            line = line[5:].strip()
            with RepoSurgeon.LineParse(line, capabilities=["stdout"]) as parse:
                if parse.tokens():
                    raise Recoverable("authors write no longer takes a filename argument - use > redirection instead")
                self.chosen().write_authormap(self.selection, parse.stdout)
        else:
            if line.startswith("read"):
                line = line[4:].strip()
            with RepoSurgeon.LineParse(line, capabilities=["stdin"]) as parse:
                if parse.tokens():
                    raise Recoverable("authors read no longer takes a filename argument - use < redirection instead")
                self.chosen().read_authormap(self.selection, parse.stdin)

    #
    # Reference lifting
    #
    def help_fossils(self):
        print("""
Apply or list fossil-reference information. Does not take a
selection set. The 'read' variant reads from standard input or a
<-redirected filename; the 'write' variant writes to standard
output or a >-redirected filename.
""")
    def do_fossils(self, line):
        "Apply a reference-mapping file."
        if self.chosen() is None:
            complain("no repo has been chosen.")
            return
        if line.startswith("write"):
            line = line[5:].strip()
            with RepoSurgeon.LineParse(line, capabilities=["stdout"]) as parse:
                if parse.tokens():
                    raise Recoverable("fossils write no longer takes a filename argument - use > redirection instead")
                self.chosen().write_fossilmap(parse.stdout)
        else:
            if line.startswith("read"):
                line = line[4:].strip()
            with RepoSurgeon.LineParse(line, capabilities=["stdin"]) as parse:
                if parse.tokens():
                    raise Recoverable("fossils read no longer takes a filename argument - use < redirection instead")
                self.chosen().read_fossilmap(parse.stdin)

    def help_references(self):
        print("""
With no modifier, produces a listing of events that may have
Subversion or CVS commit references in them.  This version
of the command supports >-redirection

With the modifier 'edit', edit this set.

With the modifier 'lift', transform commit-reference cookies from CVS
and Subversion into action stamps.  This command expects cookies
consisting of the leading string '[[', followed by a VCS identifier
(currently SVN or CVS) followed by VCS-dependent information, followed
by ']]'. An action stamp pointing at the corresponding commit is
substituted when possible.  Enables writing of the fassil-reference
map when the repo is written or rebuilt.
""")
    def do_references(self, line):
        "Look for things that might be CVS or Subversion revision references."
        if self.chosen() is None:
            complain("no repo has been chosen.")
            return
        repo = self.chosen()
        repo.parse_dollar_cookies()
        rest = self.set_selection_set(line, list(range(len(self.chosen()))))
        if "lift" in rest:
            hits = 0
            def substitute(getter, matchobj):
                payload = matchobj.group(0)[2:-2]
                commit = getter(payload)
                if commit is None:
                    complain("no commit matches " + repr(payload))
                    return matchobj.group(0) # no replacement
                elif commit:
                    text = commit.committer.action_stamp()
                    return text
                else:
                    complain("cannot resolve %s" % payload)
                    return matchobj.group(0) # no replacement
            for (regexp, getter) in \
                    ((r"CVS:[^:\]]+:[0-9.]+",
                      lambda p: repo.fossil_map.get(p) or repo.dollar_map.get(p)),
                     ("SVN:[0-9]+",
                      lambda p: repo.fossil_map.get(p) or repo.dollar_map.get(p)),
                     (":[0-9]+",
                      lambda p: repo.objfind(p)),
                     ):
                match_re = re.compile(re.escape("[[")+regexp+re.escape("]]"))
                for ei in self.selection:
                    event = repo.events[ei]
                    if isinstance(event, Commit) or isinstance(event, Tag):
                        event.comment, new_hits = match_re.subn(
                            lambda m: substitute(getter, m),
                            event.comment)
                        hits += new_hits
            announce("%d references resolved." % hits)
            repo.write_fossils = True
        else:
            # No modifier, just list or edit
            refstyles = (
                # Subversion references
                r"\Wr([0-9]+)\W",
                r"(?:SVN|svn|Subversion|subversion|rev|version).*\W([0-9]+)\W",
                # CVS references
                r"(?:CVS|cvs|rev|version).*\W([0-9][0-9.]+)\W",
                # Possible bare CVS references
                r"[0-9]+\.[0-9]+\.[0-9]+",
                )
            idhits = []
            for ei in self.selection:
                event = repo.events[ei]
                if hasattr(event, "comment"):
                    text = event.comment
                elif hasattr(event, "text"):
                    text = event.text
                else:
                    continue
                for pattern in refstyles:
                    if re.search(pattern, text):
                        if ei not in idhits:
                            idhits.append(ei)
            if idhits:
                if rest.startswith("edit"):
                    self.edit(idhits, rest[4:].strip())
                else:
                    with RepoSurgeon.LineParse(rest, capabilities=["stdout"]) as parse:
                        for ei in idhits:
                            event = repo.events[ei]
                            if hasattr(event, "lister"):
                                summary = event.lister(ei, screenwidth())
                                if summary:
                                    parse.stdout.write(summary + "\n")

    #
    # Examining tree states
    #
    def help_checkout(self):
        print("""
Check out files for a specified commit into a directory.  The selection
set must resolve to a singleton commit.
""")
    def do_checkout(self, line):
        "Check out files for a specified commit into a directory."
        if self.chosen() is None:
            complain("no repo has been chosen.")
            return
        repo = self.chosen()
        rest = self.set_selection_set(line, list(range(len(self.chosen()))))
        if not rest:
            raise Recoverable("no target directory specified.")
        if len(self.selection) == 1:
            commit = repo.events[self.selection[0]]
            if not isinstance(commit, Commit):
                raise Recoverable("not a commit.")
        else:
            raise Recoverable("a singleton selection set is required.")
        commit.checkout(rest)

    def help_diff(self):
        print("""
Display the difference between commits. Takes a selection-set argument which
must resolve to exactly two commits.
""")
    def do_diff(self, line):
        "Display a diff between versions."
        if self.chosen() is None:
            complain("no repo has been chosen.")
            return
        repo = self.chosen()
        self.set_selection_set(line, list(range(len(self.chosen()))))
        bounds = list(self.selection)
        bounds.sort()
        bounds = tuple([repo.events[i] for i in bounds])
        if len(self.selection) != 2 or \
               not isinstance(bounds[0], Commit) or \
               not isinstance(bounds[1], Commit):
            raise Recoverable("a pair of commits is required.")
        dir1 = bounds[0].checkout()
        dir2 = bounds[1].checkout()
        command = "diff -r --label 'commit %s' --label 'commit %s' -u %s %s" % \
                  (bounds[0].mark, bounds[1].mark, dir1, dir2)
        os.system(command)
    #
    # Setting patch to branchify
    #
    def help_branchify(self):
        print("""
Specify the list of directories to be treated as potential branches (to
become tags if there are no modifications after the creation copies)
when analyzing a Subversion repo. This list is ignored when the
svn_nobranch option is set.  It defaults to the 'standard layout'
set of directories, plus any unrecognized directories in the
repository root.

With no arguments, displays the current branchification set.

An asterisk at the end of a path in the set means 'all immediate
subdirectories of this path, unless they are part of another (longer)
path in the branchify set'.

Note that the branchify set is a property of the reposurgeon interpreter, not
of any individual repository, and will persist across Subversion
dumpfile reads. This may lead to unexpected results if you forget
to re-set it.
""")
    def do_branchify(self, line):
        if line.strip():
            global_options['svn_branchify'] = line.strip().split()
        announce("branchify " + " ".join(global_options['svn_branchify']))

    #
    # Setting options
    #
    def help_set(self):
        print("""
Set a boolean option to control reposurgeon's behavior.   With no arguments,
displays the state of all flags and options. The following flags and
options are defined:
""")
        for (opt, expl) in RepoSurgeon.OptionFlags:
            print(opt + ":\n" + expl)
    def do_set(self, line):
        if not line.strip():
            for opt in dict(RepoSurgeon.OptionFlags):
                print("\t%s = %s" % (opt, global_options.get(opt, False)))
        else:
            for option in line.split():
                if option not in dict(RepoSurgeon.OptionFlags):
                    complain("no such option flag as '%s'" % option)
                else:
                    global_options[option] = True
    def help_clear(self):
        print("""
Clear a boolean option to control reposurgeon's behavior.   With no arguments,
displays the state of all flags. The following flags and options are defined:
""")
        for (opt, expl) in RepoSurgeon.OptionFlags:
            print(opt + ":\n" + expl)
    def do_clear(self, line):
        if not line.strip():
            for opt in dict(RepoSurgeon.OptionFlags):
                print("\t%s = %s" % (opt, global_options.get(opt, False)))
        else:
            for option in line.split():
                if option not in dict(RepoSurgeon.OptionFlags):
                    complain("no such option flag as '%s'" % option)
                else:
                    global_options[option] = False

    #
    # Version binding 
    #
    def help_version(self):
        print("""
With no argument, display the reposurgeon version and supported VCSes.
With argument, declare the major version (single digit) or full
version (major.minor) under which the enclosing script was seveloped.
The program will error out if the major version has changed (which
means the surgical language is not backwards compatible).
""")
    def do_version(self, line):
        if not line:
            announce("reposurgeon " + version + " supporting " + " ".join([x.name for x in (vcstypes+extractors)]))
        else:
            (vmajor, _) = version.split(".")
            if '.' in line:
                try:
                    (major, _) = line.strip().split(".")
                except ValueError:
                    complain("invalid version.")
                    return
            else:
                major = line.strip()
            if major != vmajor:
                raise Fatal("major version mismatch, aborting.")
            elif verbose > 0:
                announce("version check passed.")
    #
    # Running unit tests (undocumented)
    #
    def help_runtests(self):
        print("""
Runs the unit tests and reports the results.
""")
    def do_runtests(self, line):
        def runtest(name):
            result = unittest.TextTestRunner().run(unittest.defaultTestLoader.loadTestsFromTestCase(globals()[name]))
            if not result.wasSuccessful():
                raise Recoverable("unit tests failed")
        available = ["DateTests"]
        if line:
            if line in available:
                runtest(line)
            else:
                complain("no test class known as '%s'" % line)
        else:
            for name in available:
                runtest(name)

if __name__ == '__main__':
    # Increase max stack size from 8MB to 512MB
    # Needed to handle really large repositories.
    try:
        resource.setrlimit(resource.RLIMIT_STACK, (2**29,-1))
        sys.setrecursionlimit(10**6)
    except ValueError:
        # May not be allowed on some systems.  Whether or not we can do it
        # isn't interesting, it only matters whether the limit is actually
        # blown.
        pass
    try:
        def interactive():
            global verbose
            interpreter.use_rawinput = True
            if verbose == 0:
                verbose = 1
            interpreter.cmdloop()
            interpreter.use_rawinput = False
        interpreter = RepoSurgeon()
        interpreter.use_rawinput = False
        if not sys.argv[1:]:
            sys.argv.append("-")
        try:
            for arg in sys.argv[1:]:
                for arg in arg.split(";"):
                    if arg == '-':
                        if interpreter.profile_log is None:
                            interactive()
                        elif interpreter.profile_log:
                            cProfile.run('interactive()', \
                                         interpreter.profile_log)
                        else:
                            cProfile.run('interactive()')
                    else:
                        # Call the base method so RecoverableExceptions
                        # won't be caught; we want them to abort scripting.
                        cmd.Cmd.onecmd(interpreter, interpreter.precmd(arg))
        finally:
            interpreter.cleanup()
    except (Recoverable, Fatal) as xe:
        complain(xe.msg)
        sys.exit(1)
    except KeyboardInterrupt:
        print("")
# end
