#!/usr/bin/env python2
#
# reposurgeon - a repository surgeon.
#
# By ESR, October 2010.  BSD terms apply.
#
# Requires Python 2.7.2 or newer.
#
from __future__ import print_function #unicode_literals

import sys, os, cmd, tempfile, subprocess, glob, hashlib, cProfile
import re, signal, shutil, copy, shlex, collections, uuid, cgi, bz2
import time, calendar, unittest, itertools, operator, functools, filecmp
import email.message, email.parser, email.utils

# This import only works on Unixes.  The intention is to enable
# Ctrl-P, Ctrl-N, and friends in Cmd. 
try:
    import readline
except ImportError:
    pass

version="3.2"

#
# This code is intended to be hackable to support for special-purpose or
# custom operations, though it's even better if you can come up with a new
# surgical primitive general enough to ship with the stock version.  For
# either case, here's a guide to the architecture.
#
# The core classes are largely about deserializing and reserializing import
# streams.  In between these two operations the repo state lives in a
# fairly simple Python object, Repository. The main part of Repository
# is just a list of events - Commits, Blobs, Tags, Resets, and Passthroughs.
# These are straightforward representations of the command types in an
# import stream, with Passthrough as a way of losslessly conveying lines
# the parser does not recognize.
#
#  +-------------+    +---------+    +-------------+
#  | Deserialize |--->| Operate |--->| Reserialize |
#  +-------------+    +---------+    +-------------+
#
# The general theory of reposurgeon is: you deserialize, you do stuff
# to the event list that preserves correctness invariants, you
# reserialize.  The "do stuff" is mostly not in the core classes, but
# there is one major exception.  The primitive to delete a commit and
# squash its fileops forwards or backwards is seriously intertwined
# with the core classes and actually makes up almost 50% of Repository
# by line count.
#
# The rest of the surgical code lives outside the core classes. Most
# of it lives in the RepoSurgeon class (the command interpreter) or
# the RepositoryList class (which encapsulated name access to a list
# of repositories and also hosts surgical operations involving
# multiple repositories). A few bits, like the repository reader and
# builder, have enough logic that's independent of these
# classes to be factored out of it.
#
# In designing new commands for the interpreter, try hard to keep them
# orthogonal to the selection-set code. As often as possible, commands
# should all have a similar form with a (single) selection set argument.
#
# VCS is not a core class.  The code for manipulating actual repos is bolted
# on the the ends of the pipeline, like this:
#
#  +--------+    +-------------+    +---------+    +-----------+    +--------+
#  | Import |--->| Deserialize |--->| Operate |--->| Serialize |--->| Export |
#  +--------+    +-------------+ A  +---------+    +-----------+    +--------+
#       +-----------+            |
#       | Extractor |------------+
#       +-----------+
#
# The Import and Export boxes call methods in VCS.
#
# Extractor classes build the deserialized internal representation directly.
# Each extractor class is a set of VCS-specific methods to be used by the
# RepoStreamer driver class.
#

class VCS:
    "Class representing a version-control system."
    def __init__(self, name,
                 subdirectory,
                 exporter,
                 styleflags,
                 properties,
                 initializer,
                 lister,
                 importer,
                 checkout,
                 preserve,
                 authormap,
                 ignorename,
                 project,
                 notes):
        self.name = name
        self.subdirectory = subdirectory
        self.exporter = exporter
        self.styleflags = styleflags
        self.properties = properties
        self.initializer = initializer
        self.lister = lister
        self.importer = importer
        self.checkout = checkout
        self.preserve = preserve
        self.authormap = authormap
        self.ignorename = ignorename
        self.project = project
        self.notes = notes
    def __str__(self):
        return "         Name: {self.name}\n" \
               " Subdirectory: {self.subdirectory}\n" \
               "     Exporter: {self.exporter}\n" \
               " Export-Style: {{{styleflags}}}\n" \
               "   Properties: {self.properties!r}\n" \
               "  Initializer: {self.initializer}\n" \
               "       Lister: {self.lister}\n" \
               "     Importer: {self.importer}\n" \
               "     Checkout: {self.checkout}\n" \
               "     Preserve: {{{preserve}}}\n" \
               "    Authormap: {self.authormap}\n" \
               "   Ignorename: {self.ignorename}\n" \
               "      Project: {self.project}\n" \
               "        Notes: {self.notes}\n".format(
                       self = self,
                       styleflags = ", ".join(self.styleflags),
                       preserve = ", ".join(self.preserve))

# Most knowledge about specific version-control systems lives in the
# following class list. Exception; there's a git-specific hook in the
# repo reader; also see the extractor classes; also see the dump method
# in the Blob() class.
# The members are, respectively:
#
# * Name of its characteristic subdirectory.
# * Command to export from the VCS to the interchange format
# * Export-style flags.
#     "no-nl-after-commit" = no extra NL after each commit
#     "nl-after-comment" = inserts an extra NL after each comment
#     "export-progress" = exporter generates its own progress messages,
#                         no need for baton prompt.
# * Flag specifying whether it handles per-commit properties on import
# * Command to initialize a new repo
# * Command to import from the interchange format
# * Command to check out working copies of the repo files.
# * Default preserve set (e.g. config & hook files; parts can be directories).
# * Likely location for an importer to drop an authormap file
# * Command to list files under repository control.
#
# Note that some of the commands used here are plugins or extensions
# that are not part of the basic VCS. Thus these may fail when called;
# we need to be prepared to cope with that.
#
# %(tempfile)s in a command gets substituted with the name of a
# tempile that the calling code will know to read or write from as
# appropriate after the command is done.  If your exporter can simply
# dump to stdout, or your importer read from stdin, leave out the
# %(tempfile)s; reposurgeon will popen(3) the command, and it will
# actually be slightly faster (especially on large repos) because it
# won't have to wait for the tempfile I/O to complete.
#
# %(basename) is replaced with the basename of the repo directory.
#
vcstypes = [
    VCS(name="git",
        subdirectory=".git",
        exporter="git fast-export -M -C --signed-tags=verbatim --tag-of-filtered-object=drop --all",
        styleflags=set(),
        properties=False,
        initializer="git init --quiet",
        importer="git fast-import --quiet",
        checkout="git checkout",
        lister="git ls-files",
        preserve={'.git/config', '.git/hooks'},
        authormap=".git/cvs-authors",
        ignorename=".gitignore",
        project="http://git-scm.com/",
        notes="The authormap is not required, but will be used if present."),
    # 
    VCS(name="bzr",
        subdirectory=".bzr",
        exporter="bzr fast-export --no-plain %(basename)s",
        styleflags={"export-progress", "no-nl-after-commit", "nl-after-comment"},
        properties=True,
        initializer=None,
        lister=None,
        importer="bzr fast-import -",
        checkout="bzr checkout",
        preserve=set(),
        authormap=None,
        project="http://bazaar.canonical.com/en/",
        ignorename=".bzrignore",
        notes="Requires the bzr-fast-import plugin."),
    # Export is tested and works; import is flaky.
    VCS(name="hg",
        subdirectory=".hg",
        exporter="hg-fast-export.py --marks /dev/null --mapping /dev/null --heads /dev/null --status /dev/null --repo .",
        styleflags={"nl-after-comment",
                        "export-progress"},
        properties=False,
        initializer="hg init",
        lister="hg locate",
        importer="hg fastimport %(tempfile)s",
        checkout="hg checkout",
        preserve={".hg/hgrc"},
        authormap=None,
        ignorename=".hgignore",
        project="http://mercurial.selenic.com/",
        notes="The hg export-import methods are not part of stock Mercurial."),
    # Styleflags may need tweaking for round-tripping 
    VCS(name="darcs",
        subdirectory="_darcs",
        exporter="darcs fastconvert export",
        styleflags=set(),
        properties=False,
        initializer=None,
        lister="darcs show files",
        importer="darcs fastconvert import",
        checkout=None,
        preserve=set(),
        authormap=None,
        ignorename="_darcs/prefs/boring",
        project="http://darcs.net/",
        notes="Assumes no boringfile preference has been set."),
    # Export is experimental and doesn't round-trip
    VCS(name="svn",
        subdirectory="locks",
        exporter="svnadmin dump .",
        styleflags={"export-progress"},
        properties=False,
        initializer="svn create .",
        importer="svnadmin load .",
        checkout=None,
        lister=None,
        preserve={"hooks"},
        authormap=None,
        ignorename=None,
        project="http://subversion.apache.org/",
        notes="Run from the repository, not a checkout directory."),
    VCS(name="cvs",
        subdirectory="CVS",
        exporter="find . -name '*,v' -print | cvs-fast-export -k --reposurgeon",
        styleflags={"export-progress"},
        properties=False,
        initializer=None,
        importer=None,
        checkout=None,
        lister=None,
        preserve=set(),
        authormap=None,
        ignorename=None,
        project="http://www.catb.org/~esr/cvs-fast-export",
        notes="Requires cvs-fast-export."),
    VCS(name="rcs",
        subdirectory="RCS",
        exporter="find . -name '*,v' -print | cvs-fast-export -k --reposurgeon",
        styleflags={"export-progress"},
        properties=False,
        initializer=None,
        importer=None,
        checkout=None,
        lister=None,
        preserve=set(),
        authormap=None,
        ignorename=None,
        project="http://www.catb.org/~esr/cvs-fast-export",
        notes="Requires cvs-fast-export."),
    ]

class Fatal(Exception):
    "Unrecoverable error."
    def __init__(self, msg):
        Exception.__init__(self)
        self.msg = msg

# How to write extractor classes:
#
# Clone one of the existing ones and mutate.  
#
# Significant fact: None of the get_* methods for extracting information about
# a revision is called until after checkout has been called on that revision.
#
# Most methods take a native revision ID as argument. The value and type of the
# ID don't matter to any of the code that will call the extractor, except that
# IDs must be hashable so they can be dictionary keys.
#
# The 'name', 'subdirectory', and 'visible' members must be set. The
# subdirectory member is how an extractor recognizes what repositories
# it can consume.  If the visible member is false, the 'read' command
# will ignore the existence of the extractor.
#
# The strings returned by get_committer() and get_authors() should look like
#
# J. Random User <random@foobar> 2011-11-29T10:13:32Z
#
# that is, a free text name followed by an email ID followed by a date.
# The date specification can be anything Attribution() can parse; in
# particular, RFC3339 dates are good, so are RFC822 (email) dates,
# and so is git's native integer-Unix-timestamp/timezone pairs.

class GitExtractor:
    "Repository extractor for the git version-control system."
    # Regardless of what revision and branch was current at start,
    # after the git extractor runs the head revision on the master branch
    # will be checked out.
    #
    # The git extractor does not attempt to recover N ops,
    # symbolic links, gitlinks, or directory fileops.
    #
    # To be streamed, a git repo must have <emphasis>local</emphasis>
    # refs to all branches - in particular, local tracking branches
    # corresponding to all remotes.
    #
    # Some of these limitations could be fixed, but the git extractor
    # is not intended to replace git-fast-export; it only exists as a
    # test for the generic RepoStreamer code and a model for future
    # extractors.
    def __init__(self):
        # These must be set for every extractor class
        self.name = "git-extractor"
        self.subdirectory = ".git"
        self.visible = False
        self.properties = False
        self.ignorename = ".gitignore"
        # These are internal
        self.revlist = []
        self.parents = {}
        self.header = {}
        self.meta = {}
        self.tags = []
        self.refs = {}
        self.baton = None
    def analyze(self, baton):
        "Analyze a git repository for streaming."
        self.baton = baton
        # Get the topologically-ordered list of revisions and parent hashes
        with popen_or_die("git log --all --topo-order --reverse --format='%H %P'") as fp:
            for line in fp:
                fields = line.strip().split()
                self.revlist.append(fields[0])
                self.parents[fields[0]] = fields[1:]
        self.baton.twirl()
        # Next, all other per-commit data except branch IDs
        with popen_or_die("git log --all --reverse --date=raw --format='%H|%cn <%ce> %cd|%an <%ae> %ad'") as fp:
            for line in fp:
                (h, ci, ai) = line.strip().split('|')
                self.meta[h] = {'ci':ci, 'ai':ai}
        # Next, find all refs
        for root, dirs, files in os.walk(".git/refs"):
            for leaf in files:
                assert dirs is not None  # Pacify pylint
                ref = os.path.join(root, leaf)
                with open(ref, "rb") as fp:
                    self.refs[ref[5:]] = fp.read().strip()
        self.baton.twirl()
        # Next, grab all tag objects.
        with popen_or_die("git tag -l") as fp:
            for line in fp:
                tag = line.strip()
                with popen_or_die("git rev-parse %s" % tag) as fp:
                    taghash = fp.read().strip()
                # Annotated tags are first-class objects with their
                # own hashes.  The hash of a lightweight tag is just
                # the commit it points to. Handle both cases.
                objecthash = taghash
                with popen_or_die("git cat-file -p %s" % tag) as fp:
                    comment = None
                    tagger = None
                    for line in fp:
                        line = line.strip()
                        if line.startswith("tagger "):
                            tagger = line[len("tagger "):]
                        elif line.startswith("object"):
                            objecthash = line.split()[1]
                        elif comment is None and not line:
                            comment = ""
                        elif isinstance(comment, str):
                            comment += line + "\n"
                            if objecthash != taghash:
                                # committish isn't a mark; we'll fix that later
                                self.tags.append(Tag(None,
                                                     name=tag,
                                                     tagger=Attribution(tagger),
                                                     comment=comment,
                                                     committish=objecthash))
                    self.refs["refs/tags/" + tag] = objecthash
        self.baton.twirl()
        # Color branches in the order the tips occur.  Emulate the
        # git-export order.
        for refname, refobj in sorted(self.refs.iteritems(),
                                      key=lambda ref: self.revlist.index(ref[1])):
            self.__branch_color(refobj, refname)
        uncolored = [revision for revision in self.revlist if 'branch' not in self.meta[revision]]
        if uncolored:
            if verbose >= 1:
                raise Fatal("missing branch attribute for: %s" % uncolored)
            else:
                raise Fatal("some branches do not have local ref names.")
        self.baton.twirl()
    def __metadata(self, rev, fmt):
        with popen_or_die("git log -1 --format='%s' %s" % (fmt, rev)) as fp:
            return fp.read()[:-1]
    def __branch_color(self, rev, color):
        if rev.startswith("ref"):
            return
        while not 'branch' in self.meta[rev]:
            self.meta[rev]['branch'] = color
            parents = self.get_parents(rev)
            if not parents:
                break
            elif len(parents) == 1:
                # This case avoids blowing Python's stack by recursing
                # too deep on large repos.
                rev = parents[0]
            else:
                for parent in parents:
                    self.__branch_color(parent, color)
                break
    def pre_extract(self, repo):
        "Hook for any setup actions required before streaming."
        assert repo is not None  # Pacify pylint
    def post_extract(self, repo):
        for event in repo.commits():
            event.properties = collections.OrderedDict()
        os.system("git checkout --quiet master")
    def isclean(self):
        "Return True if repo has no unsaved changes."
        return not capture("git ls-files --modified")
    def get_revlist(self):
        "Return a list of commit ID strings in commit timestamp order."
        return self.revlist
    def get_taglist(self):
        "Return a list of tag name strings."
        return self.tags
    def iter_resets(self):
        "Return an iterator yielding (reset name, revision) pairs."
        return (item for item in self.refs.iteritems() if "/tags/" not in item[0])
    def checkout(self, rev, filemap):
        "Check the directory out to a specified revision."
        assert filemap is not None # pacify pylint
        os.system("git checkout --quiet %s" % rev)
        manifest = capture("git ls-files").split()
        return manifest
    def cleanup(self, rev, issued):
        "Cleanup after checkout."
        assert rev and (issued is not None) # Pacify pylint
    def get_parents(self, rev):
        "Return the list of commit IDs of a commit's parents."
        return self.parents[rev]
    def get_branch(self, rev):
        return self.meta[rev]['branch']
    def get_comment(self, rev):
        "Return a commit's change comment as a string."
        return self.__metadata(rev, "%B")
    def get_committer(self, rev):
        "Return the committer's ID/date as a string."
        return self.meta[rev]['ci']
    def get_authors(self, rev):
        "Return the author's name and email address as a string."
        return [self.meta[rev]['ai']]
    def get_properties(self, rev):
        "Return a list of properties for the commit."
        assert rev is not None # Pacify pylint
        return collections.OrderedDict()

# More extractors go here

extractors = [GitExtractor()]

verbose         = 0
DEBUG_SVNDUMP   = 2    # Debug Subversion dumping
DEBUG_TOPOLOGY  = 2    # Debug repo-extractor logic (coarse-grained)
DEBUG_EXTRACT   = 2    # Debug repo-extractor logic (fine-grained)
DEBUG_FILEMAP   = 3    # Debug building of filemaps
DEBUG_DELETE    = 3    # Debug canonicalization after deletes
DEBUG_IGNORES   = 3    # Debug ignore generation
DEBUG_SVNPARSE  = 4    # Lower-level Subversion parsing details
DEBUG_EMAILIN   = 4    # Debug event round-tripping through mailbox_{out|in} 
DEBUG_SHUFFLE   = 4    # Debug file and directory handling
DEBUG_COMMANDS  = 5    # Show commands as they are executed
DEBUG_UNITE     = 5    # Debug mark assignments in merging
DEBUG_LEXER     = 6    # Debug selection-language parsing
quiet = False

global_options = {}

def screenwidth():
    "Return the current width of the terminal window."
    with popen_or_die('stty size', 'r') as tp:
        return int(tp.read().split()[1])

def debug_enable(level):
    "Hook for debug filtering."
    return verbose >= level

def nuke(directory, legend):
    "Remove a (large) directory, with a progress indicator."
    with Baton(legend, enable=debug_enable(DEBUG_SHUFFLE)) as baton:
        for root, dirs, files in os.walk(directory, topdown=False):
            for name in files:
                os.remove(os.path.join(root, name))
                baton.twirl()
            for name in dirs:
                os.rmdir(os.path.join(root, name))
                baton.twirl()
    try:
        os.rmdir(directory)
    except OSError:
        pass

def rfc3339(t):
    "RFC3339 string from Unix time."
    return time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime(t))

def complain(msg):
    sys.stdout.flush()
    sys.stderr.write("reposurgeon: %s\n" % msg)
    sys.stderr.flush()

def announce(msg):
    sys.stdout.write("reposurgeon: %s\n" % msg)

def pacify_pylint(_unused):
    "Head off spurious unused-variable warnings."
    pass

def memoize_iterator(iterator_f, mem_attr = None):
    """From a class method returning an iterator, create
       one which caches the iterator results and replays
       them later. Arguments:
        - iterator_f: the *unbound* class method
        - mem_attr:   the name of the attribute on the class
                      instance that stores the cache
                      (default: _mem_attr_<function name>)
    """
    if mem_attr is None:
        mem_attr = "_mem_attr_" + iterator_f.__name__
    # Define the caching iterator
    def f(self):
        # Obtain the store or create a new one
        # The cache is
        #    - cache: a list containing all values already
        #             yielded by the iterator,
        #    - it:    the iterator, ready to yield the next
        #             uncached value, or already at its end.
        try:
            cache, it = getattr(self, mem_attr, None)
        except (TypeError, ValueError):
            cache = []; it = iterator_f(self)
            setattr(self, mem_attr, (cache, it))
        # Yield values from the list, enlarging the latter
        # if necessary. We use an infinite loop over all
        # integers; when there are no more values available
        # to enlarge the list, the call to next(it) will
        # raise StopIteration which will bubble through our
        # caller and tell him that we have reached our end.
        for pos in itertools.count():
            if len(cache) <= pos: cache.append(next(it))
            yield cache[pos]
    # update_wrapper ensures that f gets all interesting
    # attributes of iterator_f (especially the docstring)
    try:
        return functools.update_wrapper(f, iterator_f)
    except AttributeError:
        # Cython doesn't support setting name or docstring
        return f

def memoized_iterator(mem_attr = None):
    # This is curryification: the goal is that memoized_iterator(A)(f)
    # is equivalent to memoize_iterator(f, A). The reason is that
    # function decorators need to take only the function as argument.
    # In other words, memoized_iterator is a function factory.
    return functools.partial(memoize_iterator, mem_attr = mem_attr)

class Baton:
    "Ship progress indications to stdout."
    def __init__(self, prompt, endmsg='done', enable=False):
        self.prompt = prompt
        self.endmsg = endmsg
        self.countfmt = None
        self.counter = 0
        if enable:
            self.stream = sys.stdout
        else:
            self.stream = None
        self.count = 0
        self.time = 0
    def __enter__(self):
        if self.stream:
            self.stream.write(self.prompt + "...")
            if os.isatty(self.stream.fileno()):
                self.stream.write(" \b")
            self.stream.flush()
        self.count = 0
        self.time = time.time()
        return self
    def startcounter(self, countfmt, initial=1):
        self.countfmt = countfmt
        self.counter = initial
    def bumpcounter(self):
        if self.stream is None:
            return
        if os.isatty(self.stream.fileno()):
            if self.countfmt:
                update = self.countfmt % self.counter
                self.stream.write(update + ("\b" * len(update)))
                self.stream.flush()
            else:
                self.twirl()
        self.counter = self.counter + 1
    def endcounter(self):
        if self.stream:
            w = len(self.countfmt % self.count)
            self.stream.write((" " * w) + ("\b" * w))
            self.stream.flush()
        self.countfmt = None
    def twirl(self, ch=None):
        "One twirl of the baton."
        if self.stream is None:
            return
        if os.isatty(self.stream.fileno()):
            if ch:
                self.stream.write(ch)
                self.stream.flush()
                return
            else:
                update = "-/|\\"[self.count % 4]
                self.stream.write(update + ("\b" * len(update)))
                self.stream.flush()
        self.count = self.count + 1
    def __exit__(self, extype, value_unused, traceback_unused):
        if extype == KeyboardInterrupt:
            self.endmsg = "interrupted"
        if extype == Fatal:
            self.endmsg = "aborted by error"
        if self.stream:
            self.stream.write("...(%2.2f sec) %s.\n" \
                              % (time.time() - self.time, self.endmsg))
        return False

class RepoSurgeonEmail(email.message.Message, object):
    "Specialized email message with a distinguishing starter."
    Divider = 78 * "-"
    __hash__ = None
    def __init__(self, **kwargs):
        email.message.Message.__init__(self, **kwargs)        
        self.set_unixfrom(RepoSurgeonEmail.Divider)
    @staticmethod
    def readmsg(fp):
        msg = ''
        firstline = fp.readline()
        if not firstline:
            return None
        elif not firstline.startswith(RepoSurgeonEmail.Divider):
            msg = firstline
        while True:
            line = fp.readline()
            if not line:
                break
            if line.startswith(RepoSurgeonEmail.Divider):
                break
            msg += line
        return msg
    def __str__(self):
        return super(RepoSurgeonEmail, self).as_string(unixfrom=True).replace("\n--", "\n.--")

class Date(object):
    "A time/date in UTC. Preserves the original TZ information and uses it to convert back when formatting."
    __slots__ = ("timestamp", "tz_offset", "orig_tz_string")
    __hash__ = None
    date_re = re.compile(r"[0-9]+\s*[+-][0-9]+$")
    subsecond_re = re.compile(r"\.[0-9]+Z")
    offset_re = re.compile(r"^([-+]?)([0-9]{2})([0-9]{2})$")
    def __init__(self, text, error=Fatal):
        "Recognize date formats that exporters or email programs might emit."
        # First, look for git's preferred format, which is a timestamp
        # in UTC followed by an offset to be used as a hint for what
        # timezone to display the date in when converting to other
        # formats
        text = text.strip()
        if Date.date_re.match(text):
            (self.timestamp, self.orig_tz_string) = text.split()
            self.tz_offset = Date.secondsFromOffsetString(self.orig_tz_string)
            self.timestamp = int(self.timestamp)
            return
        # If that didn't work, look for an RFC822 date, which git also
        # accepts. Note, there could be edge cases that Python's parser
        # handles but git doesn't.
        try:
            dt = email.utils.parsedate_tz(text)
            self.tz_offset = dt[9]
            self.timestamp = int(calendar.timegm(dt) - self.tz_offset)
            self.orig_tz_string = text.split()[5]
            return
        except TypeError:
            # time.mktime throws this when it gets None:
            # TypeError: argument must be 9-item sequence, not None
            pass
        # Also accept RFC3339 dates in Zulu time, just because I like them.
        try:
            # Discard subsecond precision, import-stream format can't use it.
            text = re.sub(Date.subsecond_re, "Z", text)
            rfc3339date = time.strptime(text, "%Y-%m-%dT%H:%M:%SZ")
            self.timestamp = calendar.timegm(rfc3339date)
            self.orig_tz_string = "+0000"
            self.tz_offset = 0
            return
        except ValueError:
            # time.strptime() throws this
            # "time data 'xxxxxx' does not match format '%Y-%m-%dT%H:%M:%S'" 
            pass
        # Date format not recognized
        raise error("'%s' is not a valid timestamp" % text)
    @staticmethod
    def secondsFromOffsetString(text):
        m = re.match(Date.offset_re, text)
        if m is not None:
            sign = -1 if m.group(1) == "-" else 1
            hours = int(m.group(2))
            mins = int(m.group(3))
            if hours < -14 or hours > 13 or mins > 59:
                complain("dubious UTC offset '%s'." % text)
            return (hours * 60 + mins) * 60 * sign
        else:
            complain("invalid UTC offset '%s', assuming +0000 instead." % text)
            return 0
    def rfc3339(self):
        return rfc3339(self.timestamp)
    def rfc822(self):
        "Format as an RFC822 timestamp."
        return time.strftime("%a %d %b %Y %H:%M:%S", time.gmtime(self.timestamp + self.tz_offset)) + " " + self.orig_tz_string
    def delta(self, other):
        return other.timestamp - self.timestamp
    @staticmethod
    def tzresolve(tz):
        "Hacky way to beat the Unix timezone database into resolving TZ names."
        if tz[0] in "+-":
            return tz
        oldtz = os.getenv("TZ")
        try:
            os.putenv("TZ", tz)
            time.tzset()
            now = int(time.time())
            tm = time.localtime(int(now))
            os.putenv("TZ", "UTC")
            time.tzset()
            seconds = int(time.mktime(tm)) - now
        finally:
            os.putenv("TZ", oldtz or "")
            time.tzset()
        if str(seconds).startswith("-"):
            sgn = "-"
            seconds = -seconds
        else:
            sgn = "+"
        return sgn + ("%02d" % (seconds / 3600)) + str("%02d" % abs(seconds % 3600))
    def __str__(self):
        "Format as a git timestamp."
        return str(self.timestamp) + " " + self.orig_tz_string
    def __eq__(self, other):
        return self.timestamp == other.timestamp
    def __ne__(self, other):
        return self.timestamp != other.timestamp
    def __lt__(self, other):
        return self.timestamp < other.timestamp

class DateTests(unittest.TestCase):
    def test_conversion(self):
        def do_test(init, formats):
            date = Date(init)
            for (func, result) in formats.iteritems():
                self.assertEqual(getattr(date, func)(), result)
        data = [['2010-10-27T18:43:32Z',
                 { 'rfc3339': "2010-10-27T18:43:32Z",
                   'rfc822': "Wed 27 Oct 2010 18:43:32 +0000",
                   '__str__': "1288205012 +0000" }],
                ['1288205012 +0000',
                 { 'rfc3339': "2010-10-27T18:43:32Z",
                   'rfc822': "Wed 27 Oct 2010 18:43:32 +0000",
                   '__str__': "1288205012 +0000" }],
                ['Wed 27 Oct 2010 18:43:32 +0000',
                 { 'rfc3339': "2010-10-27T18:43:32Z",
                   'rfc822': "Wed 27 Oct 2010 18:43:32 +0000",
                   '__str__': "1288205012 +0000" }]]
        for init, formats in data:
            do_test(init, formats)
    def test_equality(self):
        d1 = Date('2010-10-27T18:43:32Z')
        d2 = Date('1288205012 +0000')
        d3 = Date('Wed 27 Oct 2010 18:43:32 +0000')
        self.assertEqual(d1, d1)
        self.assertEqual(d1, d2)
        self.assertEqual(d1, d3)
        self.assertEqual(d2, d2)
        self.assertEqual(d2, d3)
        self.assertEqual(d3, d3)
        self.assertEqual(Date.tzresolve("EST"), "-0500")
        self.assertEqual(Date.tzresolve("-0500"), "-0500")
        self.assertIn(Date.tzresolve("Europe/Warsaw"), ("+0100", "+0200"))
    def test_inequality(self):
        d1 = Date('Wed 27 Oct 2010 18:43:32 +0000')
        d2 = Date('Wed 27 Oct 2010 18:43:33 +0000')
        d3 = Date('Wed 27 Oct 2010 18:43:32 +0100')
        self.assertNotEqual(d1, d2)
        self.assertTrue(d1 < d2)
        self.assertTrue(d2 > d1)
        self.assertNotEqual(d1, d3)
        self.assertTrue(d1 > d3)
        self.assertTrue(d3 < d1)
        self.assertNotEqual(d2, d3)
        self.assertTrue(d2 > d3)
        self.assertTrue(d3 < d2)
        d1 = Date('2010-10-27T18:43:32Z')
        d2 = Date('2010-10-27T18:43:33Z')
        self.assertNotEqual(d1, d2)
        self.assertTrue(d1 < d2)
        self.assertTrue(d2 > d1)
        d1 = Date('1288205012 +0000')
        d2 = Date('1288205013 +0000')
        self.assertNotEqual(d1, d2)
        self.assertTrue(d1 < d2)
        self.assertTrue(d2 > d1)
    def test_deltas(self):
        d1 = Date('Wed 27 Oct 2010 18:43:32 +0000')
        d2 = Date('Wed 27 Oct 2010 18:43:33 +0000')
        d3 = Date('Wed 27 Oct 2010 18:43:32 +0100')
        self.assertEqual(d1.delta(d2), 1)
        self.assertEqual(d2.delta(d3), -3601)
        self.assertEqual(d3.delta(d1), 3600)
        self.assertEqual(d1.delta(d1), 0)

class Attribution(object):
    "Represents an attribution of a repo action to a person and time."
    __slots__ = ("name", "email", "date")
    __hash__ = None
    def __init__(self, operson=None):
        self.name = self.email = self.date = None
        person = operson
        if person:
            # Deal with a cvs2svn artifact
            person = person.replace("(no author)", "no-author")
            # First, validity-check the email address
            try:
                (self.name, self.email) = email.utils.parseaddr(person)
            except ValueError:
                pass
            # Attribution format is actually stricter than RFC822;
            # needs to have a following date in the right place.
            if person.startswith("<"):
                person = " " + person
            person = person.replace(" <", "|").replace("> ", "|")
            tokens = person.strip().split("|")
            # Recover from nasty malformations
            if self.name is None:
                self.name = tokens[0].strip()
                self.email = tokens[1].strip()
            try:
                self.date = Date(tokens[2])
            except (ValueError, IndexError):
                raise Fatal("malformed attribution date '%s' in '%s'" \
                            % (tokens[2], operson))
    def email_out(self, _modifiers, msg, hdr):
        "Update an RC822 message object with a representation of this."
        msg[hdr] = self.name + " <" + self.email + ">"
        msg[hdr + "-Date"] = self.date.rfc822()
    def remap(self, authors):
        "Remap the attribution name."
        for (local, (name, mail, timezone)) in authors.iteritems():
            if self.email.lower().startswith(local + "@") or self.email.lower() == local:
                self.name = name
                self.email = mail
                if timezone:
                    self.date.orig_tz_string = timezone
                break
    def action_stamp(self):
        return self.date.rfc3339() + "!" + self.email
    def __eq__(self, other):
        "Compare attributions after canonicalization."
        return (self.name == other.name
                and self.email == other.email
                and self.date == other.date)
    def who(self):
        return self.name + " <" + self.email + ">"
    def __str__(self):
        return self.name + " <" + self.email + "> " + str(self.date)

class Blob(object):
    "Represent a detached blob of data referenced by a mark."
    __slots__ = ("repo", "mark", "pathlist", "colors", "cookie", "start", "size", "deletehook")
    __hash__ = None
    def __init__(self, repo=None):
        self.repo = repo
        self.mark = None
        self.pathlist = []      # Set of in-repo paths associated with this blob
        self.colors = []
        self.cookie = None
        self.start = None
        self.size = 0
        self.deletehook = None
    def id_me(self):
        "ID this blob for humans."
        return "blob@%s" % self.mark
    def paths(self):
        "For uniformity with commits and fileops."
        return self.pathlist
    def blobfile(self, create=False):
        "File where the content lives."
        stem = repr(id(self))
        parts = ("blobs", stem[:3], stem[3:6], stem[6:]) 
        if create:
            for d in range(len(parts)-1):
                partial = os.path.join(self.repo.subdir(), *parts[:d+1])
                if not os.path.exists(partial):
                    os.mkdir(partial)
        return os.path.join(self.repo.subdir(), *parts)
    def hasfile(self):
        "Does this blob have its own file?"
        return not self.repo.seekstream or self.start is None
    def materialize(self):
        "Materialize this content as a separate file, if it isn't already."
        if not self.hasfile():
            self.set_content(self.get_content())
        return self.blobfile()
    def get_content(self):
        "Get the content of the blob as a string."
        if not self.hasfile():
            self.repo.seekstream.seek(self.start)
            return self.repo.seekstream.read(self.size)
        elif global_options["compressblobs"]:
            with bz2.BZ2File(self.blobfile(), "r") as rfp:
                return rfp.read()
        else:
            with open(self.blobfile(), "rb") as rfp:
                return rfp.read()
    def set_mark(self, mark):
        "Set the blob's mark."
        self.mark = mark
        self.repo._mark_to_object[mark] = self
        return mark
    def forget(self):
        "De-link this commit from its repo."
        self.repo = None
    def set_content(self, text, tell=None):
        "Set the content of the blob from a string."
        self.start = tell
        self.size = len(text)
        if self.hasfile():
            if global_options["compressblobs"]:
                with bz2.BZ2File(self.blobfile(create=True), "w") as wfp:
                    return wfp.write(text)
            else:
                with open(self.blobfile(create=True), "wb") as wfp:
                    wfp.write(text)
    def moveto(self, repo):
        "Change the repo this blob is associated with."
        if self.hasfile():
            oldloc = self.blobfile()
            self.repo = repo
            newloc = self.blobfile(create=True)
            if debug_enable(DEBUG_SHUFFLE):
                announce("blob rename calls os.rename(%s, %s)" % (oldloc, newloc))
            os.rename(oldloc, newloc)
        return self
    def clone(self, repo):
        "Clone a copy of this blob, pointing at the same file."
        c = copy.copy(self)
        c.repo = repo
        c.colors = []
        if self.hasfile():
            if debug_enable(DEBUG_SHUFFLE):
                announce("blob clone for %s (%s) calls os.link(): %s -> %s" % (self.mark, self.pathlist, self.blobfile(), c.blobfile()))
            os.link(self.blobfile(), c.blobfile(create=True))
        return c
    def dump(self, vcs=None, options=None, realized=None):
        pacify_pylint(realized)
        if self.hasfile() and not os.path.exists(self.blobfile()):
            return ''
        else:
            content = self.get_content()
            if vcs is None and self.repo.vcs and self.repo.vcs.importer:
                vcs = self.repo.vcs
            # Ugh.  This is where we mess with ignore syntax translation
            if options is None or not '--noignores' in options:
                if vcs and self.repo.vcs and len(self.pathlist) == 1 and self.pathlist[0].endswith(".gitignore"):
                    if vcs.name == "hg" and self.repo.vcs.name != "hg":
                        if not content.startswith("syntax: glob\n"):
                            content = "syntax: glob\n" + content
            return "blob\nmark %s\ndata %d\n%s\n" % (self.mark, len(content), content)
    def __str__(self):
        return self.dump()

class Tag(object):
    "Represents an annotated tag."
    __slots__ = ("repo", "name", "color", "committish",
                 "target", "tagger", "comment", "deletehook")
    __hash__ = None
    def __init__(self, repo=None,
                 name=None, committish=None, target=None, tagger=None, comment=None):
        self.repo = None
        self.name = name
        self.color = None
        self.committish = None
        self.target = None
        self.remember(repo, committish=committish, target=target)
        self.tagger = tagger
        self.comment = comment
        self.deletehook = None
    def remember(self, repo, committish=None, target=None):
        "Remember an attachment to a repo and commit."
        self.repo = repo
        if target is not None:
            self.target = target
            self.committish = target.mark
        else:
            self.committish = committish
            if self.repo:
                self.target = self.repo.objfind(self.committish)
        if self.target:
            self.target.attachments.append(self)
    def forget(self):
        "Forget this tag's attachment to its commit and repo."
        if self.target:
            try:
                self.target.attachments.remove(self)
            except ValueError:
                pass
            self.target = None
        self.repo = None
    def index(self):
        "Our 0-origin index in our repo."
        return self.repo.index(self)
    def id_me(self):
        "ID this tag for humans."
        return "tag@%s (%s)" % (self.mark, self.name)
    def tags(self, _modifiers, eventnum, _cols):
        "Enable do_tags() to report tags."
        return "%6d\ttag\t%s" % (eventnum+1, self.name)
    def email_out(self, modifiers, eventnum):
        "Enable do_mailbox_out() to report these."
        msg = RepoSurgeonEmail()
        msg["Event-Number"] = str(eventnum+1)
        msg["Tag-Name"] = self.name
        if self.tagger:
            self.tagger.email_out(modifiers, msg, "Tagger")
        msg.set_payload(self.comment)
        if self.comment and not self.comment.endswith("\n"):
            complain("in tag %s, comment was not LF-terminated." % self.name)
        return str(msg)
    def email_in(self, msg):
        "Update this Tag from a parsed email message."
        if "Tag-Name" not in msg:
            raise Fatal("update to tag %s is malformed" % self.name)
        modified = False
        newname = msg["Tag-Name"]
        if self.name != newname:
            if debug_enable(DEBUG_EMAILIN):
                announce("in tag %d, Tag-Name is modified %s -> %s" \
                      % (int(msg["Event-Number"]), repr(self.name), repr(newname)))
            self.name = newname
            modified = True
        if "Tagger" in msg:
            (newname, newemail) = email.utils.parseaddr(msg["Tagger"])
            if not newname or not newemail:
                raise Fatal("can't recognize address in Tagger: %s" % msg['Tagger'])
            else:
                if self.tagger.name != newname or self.tagger.email != newemail:
                    (self.tagger.name, self.tagger.email) = (newname, newemail)
                    if debug_enable(DEBUG_EMAILIN):
                        announce("in tag %d, Tagger is modified" \
                              % (int(msg["Event-Number"])))
                    modified = True
            if "Tagger-Date" in msg:
                date = Date(msg["Tagger-Date"])
                if self.tagger.date is None or date != self.tagger.date:
                    # Yes, display this unconditionally
                    if self.repo:
                        announce("in %s, Tagger-Date is modified '%s' -> '%s' (delta %d)" \
                             % (self.id_me(),
                                self.tagger.date, date,
                                self.tagger.date.delta(date)))
                    self.tagger.date = date
                    modified = True
        newcomment = msg.get_payload()
        if global_options["canonicalize"]:
            newcomment = newcomment.strip().replace("\r\n", "\n") + '\n'
        if newcomment != self.comment:
            if debug_enable(DEBUG_EMAILIN):
                announce("in tag %d, comment is modified %s -> %s" \
                      % (int(msg["Event-Number"]), repr(self.comment), repr(newcomment)))
            modified = True
            self.comment = newcomment
        return modified
    @staticmethod
    def branchname(tagname):
        "Return the full branch reference corresponding to a tag."
        fulltagname = tagname
        if tagname.count("/") == 0:
            fulltagname = "tags/" + fulltagname
        if not fulltagname.startswith("refs/"):
            fulltagname = "refs/" + fulltagname
        return fulltagname
    def dump(self, vcs=None, options=None, realized=None):
        "Dump this tag in import-stream format."
        pacify_pylint(vcs)
        pacify_pylint(options)
        pacify_pylint(realized)
        parts = ["tag %s\nfrom %s\n" % (self.name, self.committish)]
        if self.tagger:
            parts.append("tagger %s\n" % self.tagger)
        parts.append("data %d\n%s\n" % (len(self.comment or ""), self.comment or ""))
        return "".join(parts)
    def __str__(self):
        return self.dump()

class Reset(object):
    "Represents a branch creation."
    __slots__ = ("repo", "ref", "committish", "target", "deletehook", "color")
    __hash__ = None
    def __init__(self, repo, ref=None, committish=None, target=None):
        self.repo = None
        self.ref = ref
        self.committish = None
        self.target = None
        self.remember(repo, committish=committish, target=target)
        self.deletehook = None
        self.color = None
    def remember(self, repo, committish=None, target=None):
        "Remember an attachment to a repo and commit."
        self.repo = repo
        if target is not None:
            self.target = target
            self.committish = target.mark
        else:
            self.committish = committish
            if self.repo:
                self.target = self.repo.objfind(self.committish)
        if self.target:
            self.target.attachments.append(self)
    def forget(self):
        "Forget this reset's attachment to its commit and repo."
        if self.target:
            try:
                self.target.attachments.remove(self)
            except ValueError:
                pass
            self.target = None
        self.repo = None
    def moveto(self, repo):
        "Change the repo this reset is associated with."
        self.repo = repo
    def tags(self, _modifiers, eventnum, _cols):
        "Enable do_tags() to report resets."
        return "%6d\treset\t%s" % (eventnum+1, self.ref)
    def dump(self, vcs=None, options=None, realized=None):
        "Dump this reset in import-stream format."
        pacify_pylint(vcs)
        pacify_pylint(options)
        if realized is not None:
            if '^' in self.ref:
                branch = self.ref.split("^")[0]
            else:
                branch = self.ref
            realized[branch] = True
        st = "reset %s\n" % self.ref
        if not self.committish:
            return st
        return st + "from %s\n\n" % self.committish
    def __str__(self):
        return self.dump()

class FileOp(object):
    "Represent a primitive operation on a file."
    __slots__ = ("vcs", "op", "committish", "source", "target",
                 "mode", "path", "ref", "inline",
                 "sourcedelete", "targetdelete")
    __hash__ = None
    modify_re = re.compile(r"(M) ([0-9]+) (\S+) (.*)")
    sortkey_sentinel = chr(ord("/") + 1)
    def __init__(self, vcs=None):
        self.vcs = vcs
        self.op = None
        self.committish = None
        self.source = None
        self.target = None
        self.mode = None
        self.path = None
        self.ref = None
        self.inline = None
    def path_remap_in(self):
        "Hack the fileop's basename to map it to git conventions."
        # Ignore file names from non-git VCSes need to get
        # mapped to .gitignore, because we have to
        # have some way to recognize what they are
        # in order to remap the name properly on
        # export.
        if self.vcs is not None:
            if os.path.basename(self.path) == self.vcs.ignorename:
                self.path = os.path.join(os.path.dirname(self.path), ".gitignore")
    def path_remap_out(self, path, vcs):
        "Hack the fileop's basename to map it to a target VCS's conventions."
        if vcs is not None and vcs.ignorename is not None:
            if os.path.basename(path) == ".gitignore":
                return os.path.join(os.path.dirname(path), vcs.ignorename)
        return path
    def setOp(self, op):
        self.op = op
    @staticmethod
    def sortkey(fileop):
        "Compute a key suited for sorting FileOps as git fast-export does."
        # As it says, 'Handle files below a directory first, in case they are
        # all deleted and the directory changes to a file or symlink.'
        # First sort the renames last, then sort lexicographically
        # We append a sentinel to make sure "a/b/c" < "a/b" < "a".
        return (fileop.op == "R",
                (fileop.path or fileop.source or "") + \
                        fileop.sortkey_sentinel)
    def construct(self, *opargs):
        if opargs[0] == "M":
            (self.op, self.mode, self.ref, self.path) = opargs
            self.path_remap_in()
            if isinstance(self.mode, int):
                self.mode = "%06o" % self.mode
        elif opargs[0] == "D":
            (self.op, self.path) = opargs
            self.path_remap_in()
        elif opargs[0] == "N":
            (self.op, self.ref, self.committish) = opargs
        elif opargs[0] in ("R", "C"):
            (self.op, self.source, self.target) = opargs
        elif opargs[0] == "deleteall":
            self.setOp("deleteall")
        else:
            raise Fatal("unexpected fileop %s" % opargs[0])
    def parse(self, opline):
        if opline.startswith("M"):
            m = FileOp.modify_re.match(opline)
            if not m:
                raise Fatal("bad format of M line: %s" % repr(opline))
            (self.op, self.mode, self.ref, self.path) = m.groups()
            if self.path[0] == '"' and self.path[-1] == '"':
                self.path = self.path[1:-1]
            self.path_remap_in()
        elif opline[0] == "N":
            try:
                opline = opline.replace("'", r"\'")
                (self.op, self.ref, self.committish) = shlex.split(opline)
            except ValueError:
                raise Fatal("ill-formed fileop %s" % repr(opline))
        elif opline[0] == "D":
            (self.op, self.path) = ("D", opline[2:].strip())
            if self.path[0] == '"' and self.path[-1] == '"':
                self.path = self.path[1:-1]
            self.path_remap_in()
        elif opline[0] in ("R", "C"):
            try:
                opline = opline.replace("'", r"\'")
                (self.op, self.source, self.target) = shlex.split(opline)
            except ValueError:
                raise Fatal("ill-formed fileop %s" % repr(opline))
        elif opline == "deleteall":
            self.op = "deleteall"
        else:
            raise Fatal("unexpected fileop %s while parsing" % opline)
        return self
    def paths(self):
        "Return the set of all paths touched by this file op."
        if self.op in ("M", "D"): return {self.path}
        if self.op in ("R", "C"): return {self.source, self.target}
        # Ugh...this isn't right for deleteall, but since we don't expect
        # to see that except at branch tips we'll ignore it for now.
        if self.op in ("N", "deleteall"): return set()
        raise Fatal("unknown fileop type")
    def relevant(self, other):
        "Do two fileops touch the same file(s)?"
        if self.op == "deleteall" or other.op == "deleteall":
            return True
        else:
            return self.paths() & other.paths()
    def dump(self, vcs=None, options=None):
        "Dump this fileop in import-stream format."
        pacify_pylint(vcs)
        pacify_pylint(options)
        if self.op == "M":
            showmode = self.mode
            if isinstance(self.mode, int):
                showmode = "%06o" % self.mode
            parts = [" ".join((self.op, showmode, self.ref)), " "]
            if len(self.path.split()) > 1:
                parts.extend(('"', self.path, '"'))
            else:
                parts.append(self.path_remap_out(self.path, vcs))
            if self.ref == 'inline':
                parts.append("\ndata %d\n%s" % (len(self.inline), self.inline))
        elif self.op == "N":
            parts = [" ".join((self.op, self.ref, self.committish)), "\n"]
            if self.ref == 'inline':
                parts.append("data %d\n%s" % (len(self.inline), self.inline))
        elif self.op == "D":
            parts = ["D "]
            if len(self.path.split()) > 1:
                parts.extend(('"', self.path, '"'))
            else:
                parts.append(self.path_remap_out(self.path, vcs))
        elif self.op in ("R", "C"):
            parts = ['%s "%s" "%s"' %  (self.op,
                                    self.path_remap_out(self.source, vcs),
                                    self.path_remap_out(self.target, vcs))]
        elif self.op == "deleteall":
            parts = [self.op]
        else:
            raise Fatal("unexpected fileop %s while writing" % self.op)
        return "".join(parts)
    def __str__(self):
        return self.dump(self.vcs)

class Commit(object):
    "Generic commit object."
    __slots__ = ("repo", "mark", "authors", "committer", "comment",
                 "branch", "fileops", "properties", "filemap", "color",
                 "fossil_id", "common", "splits", "deletehook", "attachments",
                 "_parent_nodes", "_child_nodes", "_pathset")
    __hash__ = None
    def __init__(self, repo=None):
        self.repo = repo
        self.mark = None             # Mark name of commit (may be None)
        self.authors = []            # Authors of commit
        self.committer = None        # Person responsible for committing it.
        self.comment = None          # Commit comment
        self.branch = None           # branch name
        self.fileops = []            # blob and file operation list
        self.properties = collections.OrderedDict()         # commit properties (extension)
        self.filemap = None
        self.color = None
        self.fossil_id = None        # Commit's ID in an alien system
        self.common = None           # Used only by the Subversion parser
        self.splits = None           # split command increments this
                                     # to avoid creating multiple new commits
                                     # with duplicate marks
        self.deletehook = None	     # Hook used during deletion operations
        self.attachments = []
        self._parent_nodes = []      # list of parent nodes
        self._child_nodes = []       # list of child nodes
        self._pathset = None
    def index(self):
        "Our 0-origin index in our repo."
        return self.repo.index(self)
    def id_me(self):
        "ID this commit for humans."
        myid = "commit@%s" % self.mark
        if self.fossil_id:
            myid += "=<%s>" % self.fossil_id
        return myid
    def when(self):
        "Imputed timestamp for sorting after unites."
        return self.committer.date.timestamp
    def moveto(self, repo):
        "Change the repo this commit is associated with."
        self.repo = repo
    def set_branch(self, branch):
        "Set the repo's branch field, optimizing for fast comparisons."
        self.branch = intern(branch)
    def clone(self, repo=None):
        "Clone this commit, without its fileops, color and children."
        c = copy.copy(self)
        c.committer = copy.deepcopy(self.committer)
        c.authors = copy.deepcopy(self.authors)
        c.fileops = []
        c.filemap = None
        c._pathset = None
        c.color = None
        if repo is not None:
            c.repo = repo
        c._child_nodes = []
        # use the encapsulation to set parents instead of relying
        # on the copy, so that Commit can do its bookkeeping.
        c._parent_nodes = [] # avoid confusing set_parents()
        c.set_parents(list(self.parents()))
        return c
    def showfossil(self):
        "Show a fossil ID in the expected form for the ancestral system."
        if not self.fossil_id:
            return None
        # Special case for Subversion
        if self.repo and self.repo.vcs and self.repo.vcs.name == "svn":
            return "r" + self.fossil_id
        else:
            return self.fossil_id
    def lister(self, _modifiers, eventnum, cols):
        "Enable do_list() to report commits."
        topline = self.comment.split("\n")[0]
        summary = "%6d %s %6s " % \
                      (eventnum+1, self.committer.date.rfc3339(), self.mark)
        if self.fossil_id:
            fossil = "<%s>" % self.fossil_id
            summary += "%6s " % fossil
        return (summary + topline)[:cols]
    def tip(self, _modifiers, eventnum, cols):
        "Enable do_tip() to report deduced branch tips."
        summary = "%6d %s %6s " % \
                      (eventnum+1, self.committer.date.rfc3339(), self.mark)
        return (summary + self.head())[:cols]
    def tags(self, _modifiers, eventnum, _cols):
        "Enable do_tags() to report tag tip commits."
        if not self.branch or not "/tags/" in self.branch:
            return
        if self.has_children():
            successor_branches = {child.branch for child in self.children() if child.parents()[0] == self}
            if len(successor_branches) == 1 and successor_branches.pop() == self.branch:
                return
        return "%6d\tcommit\t%s" % (eventnum+1, self.branch) 
    def email_out(self, modifiers, eventnum):
        "Enable do_mailbox_out() to report these."
        msg = RepoSurgeonEmail()
        msg["Event-Number"] = str(eventnum+1)
        msg["Branch"] = self.branch
        msg["Parents"] = " ".join(self.parent_marks())
        if self.authors:
            self.authors[0].email_out(modifiers, msg, "Author")
            for (i, coauthor) in enumerate(self.authors[1:]):
                coauthor.email_out(msg, "Author" + repr(2+i))
        self.committer.email_out(modifiers, msg, "Committer")
        if self.fossil_id:
            msg["Fossil-ID"] = self.fossil_id
        for (name, value) in self.properties.iteritems():
            hdr = "-".join(s.capitalize() for s in name.split("-"))
            value = value.replace("\n", r"\n")
            value = value.replace("\t", r"\t")
            msg["Property-" + hdr] = value
        msg.set_payload(self.comment)
        if not self.comment.endswith("\n"):
            complain("in commit %s, comment was not LF-terminated." % self.mark)
        return str(msg)
    def action_stamp(self):
        "Control how a commit stamp is made."
        # Prefer the primary author to the committer because it
        # doesn't get messed with when passing around and applying
        # patch sets.
        if self.authors:
            return self.authors[0].action_stamp()
        else:
            return self.committer.action_stamp()
    def email_in(self, msg):
        "Update this commit from a parsed email message."
        modified = False
        if "Branch" in msg:
            if self.branch != msg["Branch"]:
                modified = True
            self.set_branch(msg["Branch"])
        if "Parents" in msg:
            if self.parent_marks() != msg["Parents"].split():
                modified = True
            self.set_parent_marks(msg["Parents"].split())
        if "Committer" in msg:
            (newname, newemail) = email.utils.parseaddr(msg["Committer"])

            if not newemail:
                raise Fatal("can't recognize address in Committer: %s" % msg["Committer"])
            else:
                if self.committer.name != newname or self.committer.email != newemail:
                    (self.committer.name, self.committer.email) = (newname, newemail)
                    # Yes, display this unconditionally
                    if self.repo:
                        announce("in %s, Committer is modified" % self.id_me())
                    modified = True
        if "Committer-Date" in msg:
            date = Date(msg["Committer-Date"])
            if self.committer.date is None or date != self.committer.date:
                # Yes, display this unconditionally
                if self.repo:
                    announce("in %s, Committer-Date is modified '%s' -> '%s' (delta %d)" \
                          % (self.id_me(),
                             self.committer.date, date,
                             self.committer.date.delta(date)))
                self.committer.date = date
                modified = True
        if "Author" in msg:
            author_re = re.compile("Author[0-9]*$")
            # Potential minor bug here if > 10 authors;
            # lexicographic sort order doesn't match numeric
            # msg is *not* a dict so the .keys() is correct
            authorkeys = sorted(filter(author_re.match, msg.keys()))
            for i in range(len(authorkeys) - len(self.authors)):
                self.authors.append(Attribution())
            # Another potential minor bug: permuting the set of authors
            # will look like a modification, as old and new authors are
            # compaired pairwise rather than set equality being checked.
            # Possibly a feature if one thinks order is significant, but
            # I just did it this way because it was easier.
            for (i, hdr) in enumerate(authorkeys):
                (newname, newemail) = email.utils.parseaddr(msg[hdr])
                if not newemail:
                    raise Fatal("can't recognize address in %s: %s" % (hdr, msg[hdr]))
                else:
                    if self.authors[i].name != newname or self.authors[i].email != newemail:
                        (self.authors[i].name, self.authors[i].email) = (newname, newemail)
                        if debug_enable(DEBUG_EMAILIN):
                            announce("in commit %s, Author #%d is modified" \
                                  % (msg["Event-Number"], i+1))
                        modified = True
                if hdr + "-Date" in msg:
                    date = Date(msg[hdr + "-Date"])
                    if date != self.authors[i].date:
                        # Yes, display this unconditionally
                        if self.repo:
                            announce("in event %s, %s-Date #%d is modified" \
                                     % (msg["Event-Number"], hdr, i+1))
                        self.authors[i].date = date
                        modified = True
        if "Fossil-ID" in msg:
            if msg["Fossil-ID"] != self.fossil_id:
                modified = True
                msg["Fossil-ID"] = self.fossil_id
        newprops = collections.OrderedDict()
        for prophdr in msg.keys():
            if not prophdr.startswith("Property-"): continue
            propkey = prophdr[9:].lower()
            propval = msg[prophdr]
            if propval == "True":
                propval = True
            elif propval == "False":
                propval = False
            else:
                propval = propval.replace(r"\n", "\n")
                propval = propval.replace(r"\t", "\t")
            newprops[propkey] = propval
        modified |= (newprops != self.properties)
        self.properties = newprops
        newcomment = msg.get_payload()
        if global_options["canonicalize"]:
            newcomment = newcomment.strip() + '\n'
        if newcomment != self.comment:
            if debug_enable(DEBUG_EMAILIN):
                announce("in %s, comment is modified %s -> %s" \
                      % (self.id_me(), repr(self.comment), repr(newcomment)))
            modified = True
            self.comment = newcomment
        return modified
    def set_mark(self, mark):
        "Set the commit's mark."
        self.mark = mark
        self.repo._mark_to_object[mark] = self
        return mark
    def forget(self):
        "De-link this commit from its parents."
        self.set_parents([])
        self.repo = None
    # Hide the parent list behind an interface, so that we can memoize
    # the computation, which is very expensive and frequently
    # performed.
    def parents(self):
        "Get a list of this commit's parents."
        return self._parent_nodes
    def parent_marks(self):
        return [x.mark for x in self._parent_nodes]
    def set_parent_marks(self, marks):
        self.set_parents([self.repo.objfind(x) for x in marks])
    def set_parents(self, parents):
        for parent in self._parent_nodes:
            # remove all occurences of self in old parent's children cache
            parent._child_nodes = [n for n in parent._child_nodes if n is not self]
        self._parent_nodes = parents
        assert all(self._parent_nodes)
        for parent in self._parent_nodes:
            parent._child_nodes.append(self)
        self.repo.invalidate_manifests()
    def add_parent(self, mark):
        if isinstance(mark, Commit):
            newparent = mark
        else:
            newparent = self.repo.objfind(mark)
        assert(newparent)
        self._parent_nodes.append(newparent)
        newparent._child_nodes.append(self)
        self.repo.invalidate_manifests()
    def insert_parent(self, idx, mark):
        newparent = self.repo.objfind(mark)
        assert(newparent)
        self._parent_nodes.insert(idx, newparent)
        newparent._child_nodes.append(self)
        self.repo.invalidate_manifests()
    def remove_parent(self, event):
        # remove *all* occurences of event in parents
        self._parent_nodes = [n for n in self._parent_nodes if n is not event]
        # and all occurences of self in events children
        event._child_nodes = [n for n in event._child_nodes if n is not self]
        self.repo.invalidate_manifests()
    def replace_parent(self, e1, e2):
        self._parent_nodes[self._parent_nodes.index(e1)] = e2
        e1._child_nodes.remove(self)
        e2._child_nodes.append(self)
        self.repo.invalidate_manifests()
    def has_parents(self):
        return bool(self._parent_nodes)
    def children(self):
        "Get a list of this commit's children."
        return self._child_nodes
    def child_marks(self):
        return [x.mark for x in self._child_nodes]
    def has_children(self):
        "Predicate - does this commit have children?"
        return bool(self._child_nodes)
    def first_child(self):
        "Get the first child of this commit, or None if not has_children()."
        return self._child_nodes[0]
    def descended_from(self, other):
        "Is this commit a descendent of the specified other?"
        if not self.has_parents() or self.committer.date < other.committer.date:
            return False
        elif other in self.parents():
            return True
        else:
            return any(parent.descended_from(other) \
                        for parent in self.parents())
    def cliques(self):
        "Return a dictionary mapping filenames to associated M cliques."
        cliques = collections.defaultdict(list)
        for (i, fileop) in enumerate(self.fileops):
            if fileop.op == "M": cliques[fileop.path].append(i)
        return cliques
    def fileop_dump(self):
        "Dump file ops without data or inlines; used for debugging only."
        print("commit %d, mark %s:" % (self.repo.find(self.mark)+1, self.mark))
        for (i, op) in enumerate(self.fileops):
            if op is not None:
                print("%d: %-20s" % (i, str(op)))
    def paths(self):
        "Return the set of all paths touched by this commit."
        if self._pathset is None:
            self._pathset = set()
            for fileop in self.fileops:
                self._pathset |= fileop.paths()
        return self._pathset
    def visible(self, path):
        "Is the specified path modified and not deleted in the ancestors?"
        ancestor = self
        while True:
            parents = ancestor.parents()
            if not parents:
                break
            else:
                ancestor = parents[0]
                for fileop in ancestor.fileops:
                    if fileop.op == "D" and fileop.path == path:
                        break
                    elif fileop.op == "M" and fileop.path == path:
                        return ancestor
                    elif fileop.op in ("R", "C") and fileop.target == path:
                        return ancestor
        return None
    def manifest(self):
        "Return a map from paths to marks for files existing at this commit."
        self.repo._has_manifests = True
        sys.setrecursionlimit(max(
                sys.getrecursionlimit(),
                len(self.repo.events) * 2))
        return self._manifest()
    def _manifest(self):
        if self.filemap is not None:
            return self.filemap
        # Get the first parent manifest, or an empty one.
        try:
            ancestors = self.parents()[0]._manifest().snapshot()
        except IndexError:
            ancestors = PathMap()
        # Take own fileops into account.
        for fileop in self.fileops:
            if fileop.op == 'M':
                ancestors[fileop.path] = (fileop.mode, fileop.ref)
            elif fileop.op == 'D':
                if fileop.path in ancestors:
                    del ancestors[fileop.path]
            elif fileop.op == 'C':
                ancestors[fileop.target] = ancestors[fileop.source]
            elif fileop.op == 'R':
                ancestors[fileop.target] = ancestors[fileop.source]
                if fileop.source in ancestors:
                    del ancestors[fileop.source]
            elif fileop.op == 'deleteall':
                ancestors = PathMap()
        self.filemap = ancestors
        return ancestors
    def canonicalize(self):
        "Replace fileops by a minimal set of D and M with the same result."
        # If last fileop is a deleteall, only keep that.
        try:
            lastop = self.fileops[-1]
        except IndexError:
            return
        else:
            if lastop.op == "deleteall":
                self.fileops = [lastop]
                return
        # Fetch the tree state before us...
        try:
            parent = self.parents()[0]
        except IndexError:
            parent = PathMap()
        else:
            parent = parent.manifest()
        # ... and after our file operations have been applied.
        current = self.manifest()
        # Get paths touched by non-deleteall operations.
        paths = self.paths()
        # Generate needed D fileops.
        if any(op.op == "deleteall" for op in self.fileops):
            # Any file in the parent tree might disappear.
            check_delete = parent
        else:
            # Only files touched by non-deleteall ops might disappear.
            check_delete = paths
        self.fileops = new_ops = []
        for path in check_delete:
            if path in parent and path not in current:
                fileop = FileOp()
                fileop.construct("D", path)
                new_ops.append(fileop)
        # Generate needed M fileops.
        # Only paths touched by non-deleteall ops can be changed.
        for path in paths:
            try:
                mode, mark = current[path]
            except TypeError:
                continue
            if (mode, mark) != parent[path]:
                fileop = FileOp()
                fileop.construct("M", mode, mark, path)
                new_ops.append(fileop)
        # Finishing touches:
        new_ops.sort(key=FileOp.sortkey)
        self._pathset = None
    def alldeletes(self, killset={"D", "deleteall"}):
        "Is this an all-deletes commit?"
        return all(fileop.op in killset for fileop in self.fileops)
    def checkout(self, directory=None):
        "Make a directory with links to files in a specified checkout."
        if not directory:
            directory = os.path.join(self.repo.subdir(), self.mark)
        try:
            os.mkdir(directory)
            for (path, (_, mark)) in self.manifest().iteritems():
                fullpath = os.path.join(directory, path)
                fulldir = os.path.dirname(fullpath)
                if not os.path.exists(fulldir):
                    os.makedirs(fulldir)
                blob = self.repo.objfind(mark)
                if blob.hasfile():
                    os.link(blob.blobfile(), fullpath)
                else:
                    with open(fullpath, "wb") as wfp:
                        wfp.write(blob.get_content())
        except OSError:
            raise Recoverable("could not create checkout directory or files.")
        return directory
    def head(self):
        "Return the branch to which this commit belongs."
        if self.branch.startswith("refs/heads/") or not self.has_children():
            return self.branch
        rank = 0; child = None # pacify pylint
        for rank, child in enumerate(self.children()):
            if child.branch == self.branch:
                return child.head()
        if rank == 0:
            return child.head() # there was only one child
        raise Recoverable("can't deduce a branch head for %s" % self.mark)
    def delete(self, policy=None):
        "Delete this commit from its repository."
        self.repo.delete([self.index()], policy)
    def dump(self, vcs=None, options=None, realized=None):
        "Dump this commit in import-stream format."
        pacify_pylint(options)
        if vcs is None and self.repo.vcs and self.repo.vcs.importer:
            vcs = self.repo.vcs
        parts = []
        if realized is not None and self.has_parents():
            if self.branch not in realized and self.parents()[0].branch not in realized:
                parts.append("reset %s^0\n\n" % self.branch)
        if self.fossil_id:
            parts.append("# Fossil-ID: %s\n" % self.fossil_id)
        parts.append("commit %s\n" % self.branch)
        if realized is not None:
            realized[self.branch] = True
        if self.mark:
            parts.append("mark %s\n" % self.mark)
        if self.authors:
            for author in self.authors:
                parts.append("author %s\n" % author)
        if self.committer:
            parts.append("committer %s\n" % self.committer)
        if self.comment is not None:
            comment = self.comment
            if options and "--fossilize" in options and self.fossil_id:
                comment += "\nFossil-ID: %s\n" % self.fossil_id
            parts.append("data %d\n%s" % (len(comment), comment))
        if "nl-after-comment" in self.repo.export_style():
            parts.append("\n")
        parents = self.parents()
        if parents:
            parts.append("from %s\n" % parents[0].mark)
        for ancestor in parents[1:]:
            parts.append("merge %s\n" % ancestor.mark)
        if vcs and vcs.properties:
            for (name, value) in self.properties.iteritems():
                if value in (True, False):
                    if value:
                        parts.append("property %s\n" % name)
                else:
                    parts.append("property %s %d %s\n" % (name, len(str(value)), str(value)))
        parts.extend(op.dump(vcs) + "\n" for op in self.fileops)
        if not "no-nl-after-commit" in self.repo.export_style():
            parts.append("\n")
        return "".join(parts)
    def __str__(self):
        return self.dump()

class Passthrough(object):
    "Represents a passthrough line."
    __slots__ = ("text", "deletehook", "color")
    __hash__ = None
    def __init__(self, line):
        self.text = line
        self.deletehook = None
        self.color = None
    def email_out(self, _modifiers, eventnum):
        "Enable do_mailbox_out() to report these."
        msg = RepoSurgeonEmail()
        msg["Event-Number"] = str(eventnum+1)
        msg.set_payload(self.text)
        return str(msg)
    def email_in(self, msg):
        self.text = msg.get_payload()
    def dump(self, vcs=True, options=None, realized=None):
        "Dump this passthrough in import-stream format."
        pacify_pylint(vcs)
        pacify_pylint(options)
        pacify_pylint(realized)
        return self.text
    def __str__(self):
        return self.dump()

# Generic extractor code begins here

class signature:
    "A file signature - file path, hash value of content and permissions."
    def __init__(self, path):
        self.path = path
        self.hashval = None
        self.perms = None
        if not os.path.isdir(path):
            with open(path, "rb") as fp:
                self.hashval = hashlib.sha1(fp.read()).hexdigest()
            self.perms = os.stat(path).st_mode
            # Map to the restricted set of modes that are allowed in
            # the stream format.
            if self.perms & 0o100700 == 0o100700:
                self.perms = 0o100755
            elif self.perms & 0o100600 == 0o100600:
                self.perms = 0o100644
    def __eq__(self, other):
        #if debug_enable(DEBUG_EXTRACT):
        #    announce("%s == %s -> %s" % (str(self),
        #                                 str(other),
        #                                 self.__dict__ == other.__dict__))
        return self.__dict__ == other.__dict__
    def __ne__(self, other):
        return not signature.__eq__(self, other)
    def __str__(self):
        return "<%s:%s:%s>" % (self.path, "%6o" % self.perms, self.hashval[:4])

def capture(command):
    "Run a specified command, capturing the output."
    if debug_enable(DEBUG_COMMANDS):
        announce("%s: capturing %s" % (rfc3339(time.time()), command))
    try:
        content = subprocess.check_output(command, shell=True).decode()
    except (subprocess.CalledProcessError, OSError) as oe:
        raise Fatal("execution of '%s' failed: %s" % (command, oe))
    if debug_enable(DEBUG_COMMANDS):
        sys.stderr.write(content)
    return content

class PathMap(object):
    """Represent the set of filenames visible in a Subversion
    revision, using copy-on-write to keep the size of the structure in
    line with the size of the Subversion repository metadata."""
    __slots__ = ("shared", "maxid", "snapid", "store")
    __hash__ = None
    _self_value = object()
    def __init__(self, other = None):
        # The instance may be a child of several other PathMaps if |shared|
        # is True. |snapid| is an integer unique among related PathMaps,
        # and |maxid| is a list (for reference sharing) whose only value is
        # the maximum |snapid| of the collection. |store| is a dict mapping
        # single-component names to lists of values indexed by snapids. The
        # values which can be other PathMaps (for directories) or anything
        # except PathMaps and None (for files).
        if not isinstance(other, PathMap):
            self.store = {}
            self.maxid = [0]
            self.snapid = 0
        else:
            self.store = other.store
            self.maxid = other.maxid
            self.snapid = self.maxid[0] = self.maxid[0] + 1
        self.shared = False
    def snapshot(self):
        "Return a copy-on-write snapshot of the set."
        r = PathMap(self)
        if self.snapid < r.snapid - 1:
            # Late snapshot of an "old" PathMap. Restore values which may
            # have changed since. This is uncommon, don't over-optimize.
            for component in self.store: # _elt_items() would skip None
                r._elts_set(component, self._elts_get(component))
        for _, v in r._elts_items():
            if isinstance(v, PathMap):
                v.shared = True
        return r
    def copy_from(self, target_path, source_pathset, source_path):
        "Insert, at target_path, a snapshot of source_path in source_pathset."
        source_obj = source_pathset._find(source_path)
        if source_obj is None:
            return
        if source_obj is source_pathset:
            # Do not share toplevel instances, only inner ones
            source_obj = source_obj.snapshot()
        elif isinstance(source_obj, PathMap):
            source_obj.shared = True
        self._insert(target_path, source_obj)
    def ls_R(self, path):
        elt = self._find(path)
        if isinstance(elt, PathMap):
            return iter(elt)
        return iter(()) # empty iterator
    def __contains__(self, path):
        "Return true if path is present in the set as a file."
        elt = self._find(path)
        return not isinstance(elt, PathMap) and elt is not None
    def __getitem__(self, path):
        "Return the value associated with a specified path."
        elt = self._find(path)
        if elt is None or isinstance(elt, PathMap):
            # This is not quite like indexing, which would throw IndexError
            return None
        return elt
    def __setitem__(self, path, value):
        "Add a filename to the set, with associated value (not None)."
        assert value is not None
        self._insert(path, value)
    def __delitem__(self, path):
        """Remove a filename, or all descendents of a directory name,
        from the set."""
        basename, components = self._split_path(path)
        assert(not self.shared)
        for component in components:
            nxt = self._elts_get(component)
            if not isinstance(nxt, PathMap):
                return
            if nxt.shared:
                nxt = self._elts_set(component, nxt.snapshot())
            self = nxt
        # Set value to None since PathMap doesn't tell None and absence apart
        self._elts_set(basename, None)
    def __nonzero__(self):
        "Return true if any filenames are present in the set."
        return any(v for _, v in self._elts_items())
    def __len__(self):
        "Return the number of files in the set."
        return sum(len(v) if isinstance(v, PathMap) else 1
                for _, v in self._elts_items())
    def iteritems(self):
        for (name, value) in sorted(self._elts_items()):
            if isinstance(value, PathMap):
                for path, v in value.iteritems():
                    yield (os.path.join(name, path), v)
            elif value is not None:
                yield (name, value)
    def __iter__(self):
        return itertools.imap(operator.itemgetter(0), self.iteritems())
    def __str__(self):
        return '<PathMap: {}>'.format(' '.join(self))
    # Return the current value associated with the component in the store
    def _elts_get(self, component):
        snaplist = self.store.get(component) or [None]
        return snaplist[min(self.snapid, len(snaplist) - 1)]
    # Set the current value associated with the component in the store
    def _elts_set(self, component, value):
        snaplist = self.store.setdefault(component, [None])
        needed = min(self.maxid[0], self.snapid + 1) + 1
        if len(snaplist) < needed:
            last = snaplist[-1]
            snaplist.extend(last for _ in range(len(snaplist), needed))
        snaplist[self.snapid] = value
        return value
    # Iterate through (component, current values) pairs
    def _elts_items(self):
        snapid = self.snapid
        for component, snaplist in self.store.iteritems():
            if component is self._self_value: continue
            val = snaplist[min(snapid, len(snaplist) - 1)]
            if val is not None: yield (component, val)
    # Insert obj at the location given by components.
    def _insert(self, path, obj):
        basename, components = self._split_path(path)
        if not basename:
            return
        assert(not self.shared)
        for component in components:
            nxt = self._elts_get(component)
            if not isinstance(nxt, PathMap):
                nxt = self._elts_set(component, PathMap())
            elif nxt.shared:
                nxt = self._elts_set(component, nxt.snapshot())
            self = nxt
        self._elts_set(basename, obj)
    # Return the object at the location given by components--either
    # the associated value if it's present as a filename, or a PathMap
    # containing the descendents if it's a directory name.  Return
    # None if the location does not exist in the set.
    def _find(self, path):
        basename, components = self._split_path(path)
        if not basename:
            return self
        for component in components:
            self = self._elts_get(component)
            if not isinstance(self, PathMap):
                return None
        return self._elts_get(basename)
    # Return a list of the components in path in reverse order.
    @staticmethod
    def _split_path(path):
        if isinstance(path, str):
            components = filter(None, os.path.normpath(path).split(os.sep))
            return (components.pop() if components else None, components)
        else:
            return (PathMap._self_value,
                    filter(None, os.path.normpath(path[0]).split(os.sep)))

class RepoStreamer:
    "Repository factory driver class for all repo analyzers."
    def __init__(self, extractor):
        self.markseq = 0
        self.tagseq = 0
        self.commits = {}
        self.commit_map = {}
        self.filemap = {}
        self.hash_to_mark = {}
        self.baton = None
        self.extractor = extractor
    def __newmark(self):
        self.markseq += 1
        mark = ":" + str(self.markseq)
        return mark
    def extract(self, repo, progress=True):
        if not self.extractor.isclean():
            raise Recoverable("directory %s has unsaved changes." % os.getcwd())
        repo.makedir()
        with Baton(prompt="Extracting", enable=progress) as self.baton:
            self.extractor.analyze(self.baton)
            self.extractor.pre_extract(repo)
            #saved_umask = os.umask(0)
            consume = copy.copy(self.extractor.get_revlist())
            while consume:
                revision = consume.pop(0)
                commit = Commit(repo)
                self.baton.twirl()
                present = self.extractor.checkout(revision, self.filemap)
                parents = self.extractor.get_parents(revision)
                commit.committer = Attribution(self.extractor.get_committer(revision))
                commit.authors = [Attribution(a) \
                                  for a in self.extractor.get_authors(revision)]
                commit.set_parents([self.commit_map[rev] for rev in parents])
                commit.set_branch(self.extractor.get_branch(revision))
                commit.comment = self.extractor.get_comment(revision)
                if debug_enable(DEBUG_EXTRACT):
                    msg = commit.comment
                    if msg == None:
                        msg = ""
                    announce("r%s: comment '%s'" % (revision, msg.strip()))
                self.filemap[revision] = {}
                for rev in parents:
                    self.filemap[revision].update(self.filemap[rev])
                if present:
                    removed = set(self.filemap[revision]) - set(present)
                    for path in present:
                        if os.path.isdir(path):
                            continue
                        if not os.path.exists(path):
                            announce("r%s: expected path %s does not exist!" % \
                                     (revision, path))
                            continue
                        newsig = signature(path)
                        if newsig.hashval in self.hash_to_mark:
                            #if debug_enable(DEBUG_EXTRACT):
                            #    announce("r%s: %s has old hash" \
                            #             % (revision, path))
                            # The file's hash corresponds to an existing
                            # blob; generate modify, copy, or rename as
                            # appropriate.
                            if path not in self.filemap[revision] \
                                   or self.filemap[revision][path]!=newsig:
                                if debug_enable(DEBUG_EXTRACT):
                                    announce("r%s: update for %s" % (revision, path))
                                # Iterating through dict items (with
                                # iteritems() or itemsview() for
                                # instance) while mutating the
                                # underlying dict is not supported by
                                # Python. The following loop thus uses
                                # items(), which returns a new
                                # independent list containing the
                                # (key,value) pairs.
                                for (oldpath, oldsig) in self.filemap[revision].items():
                                    if oldsig == newsig:
                                        if oldpath in removed:
                                            op = FileOp()
                                            op.construct('R', oldpath, path)
                                            commit.fileops.append(op)
                                            del self.filemap[revision][oldpath]
                                        elif oldpath != path:
                                            op = FileOp()
                                            op.construct('C', oldpath, path)
                                            commit.fileops.append(op)
                                        break
                                else:
                                    op = FileOp()
                                    op.construct('M',
                                                 newsig.perms,
                                                 self.hash_to_mark[newsig.hashval],
                                                 path)
                                    commit.fileops.append(op)
                        else:
                            # Content hash doesn't match any existing blobs
                            if debug_enable(DEBUG_EXTRACT):
                                announce("r%s: %s has new hash" \
                                         % (revision, path))
                            blobmark = self.__newmark()
                            self.hash_to_mark[newsig.hashval] = blobmark
                            # Actual content enters the representation
                            blob = Blob(repo)
                            blob.set_mark(blobmark)
                            shutil.copyfile(path, blob.blobfile(create=True))
                            blob.pathlist.append(path)
                            repo.addEvent(blob)
                            # Its new fileop is added to the commit
                            op = FileOp()
                            op.construct('M', newsig.perms, blobmark, path)
                            commit.fileops.append(op)
                        self.filemap[revision][path] = newsig
                    for tbd in removed:
                        op = FileOp()
                        op.construct('D', tbd)
                        commit.fileops.append(op)
                        del self.filemap[revision][tbd]
                self.extractor.cleanup(revision, True)
                if not parents and commit.branch != "refs/heads/master":
                    reset = Reset(repo)
                    reset.ref = commit.branch
                    repo.addEvent(reset)
                commit.fileops.sort(key=FileOp.sortkey)
                commit.fossil_id = revision
                commit.properties.update(self.extractor.get_properties(revision)) 
                commit.set_mark(self.__newmark())
                self.commit_map[revision] = commit
                if debug_enable(DEBUG_EXTRACT):
                    announce("r%s: gets mark %s (%d ops)" % (revision, commit.mark, len(commit.fileops)))
                repo.addEvent(commit)
            # Now append reset objects
            for (resetname, revision) in sorted(self.extractor.iter_resets(),
                                                key=operator.itemgetter(1)):
                # FIXME: what if revision is unknown ? keep previous behavior for now
                reset = Reset(repo, target=self.commit_map[revision])
                reset.ref = resetname
                repo.addEvent(reset)
            # Last, append tag objects.
            for tag in sorted(self.extractor.get_taglist(),
                              key=operator.attrgetter("tagger.date")):
                # Hashes produced by the GitExtractor are turned into proper
                # committish marks here.
                c = self.commit_map.get(tag.committish)
                if c is None:
                    # FIXME: we should probably error here, keep previous
                    # behavior for now
                    tag.remember(repo, committish=None)
                else:
                    tag.remember(repo, target=c)
                repo.addEvent(tag)
            self.extractor.post_extract(repo)
        return repo

# Stream parsing
#
# The Subversion dumpfile format is documented at
#
# https://svn.apache.org/repos/asf/subversion/trunk/notes/dump-load-format.txt

# Use numeric codes rather than (un-interned) strings
# to reduce working-set size.
SD_NONE = 0
SD_FILE = 1
SD_DIR = 2
SD_ADD = 0
SD_DELETE = 1
SD_CHANGE = 2
SD_REPLACE = 3

class StreamParser:
    "Parse a fast-import stream or Subversion dump to populate a Repository."
    class NodeAction(object):
        __slots__ = ("revision", "path", "kind", "action",
                     "from_rev", "from_path", "content_hash",
                     "from_hash", "blob", "props",
                     "from_set", "blobmark", "generated")
        # If these don't match the constants above, havoc will ensue
        ActionValues = ("add", "delete", "change", "replace")
        PathTypeValues = ("none", "file", "dir", "ILLEGAL-TYPE")
        def __init__(self):
            # These are set during parsing
            self.revision = None
            self.path = None
            self.kind = SD_NONE
            self.action = None
            self.from_rev = None
            self.from_path = None
            self.content_hash = None
            self.from_hash = None
            self.blob = None
            self.props = None
            # These are set during the analysis phase
            self.from_set = None
            self.blobmark = None
            self.generated = False
        def __str__(self):
            # Prefer dict's repr() to OrderedDict's verbose one
            fmt = dict.__repr__ if isinstance(self.props, dict) else repr
            return "<NodeAction: r{rev} {action} {kind} '{path}'" \
                    "{from_rev}{from_set}{generated}{props}>".format(
                    rev = self.revision,
                    action = "ILLEGAL-ACTION" if self.action is None else StreamParser.NodeAction.ActionValues[self.action],
                    kind = StreamParser.NodeAction.PathTypeValues[self.kind or -1],
                    path = self.path,
                    from_rev = " from=%s~%s" % (self.from_rev, self.from_path)
                                    if self.from_rev else "",
                    from_set = " sources=%s" % self.from_set
                                    if self.from_set else "",
                    generated = " generated" if self.generated else "",
                    props = " properties=%s" % fmt(self.props)
                                    if self.props else "")
    class RevisionRecord(object):
        __slots__ = ("nodes", "props")
        def __init__(self, nodes, props):
            self.nodes = nodes
            self.props = props
    # Native Subversion properties that we don't suppress: svn:externals
    # The reason for these suppressions is to avoid a huge volume of
    # junk file properties - cvs2svn in particular generates them like
    # mad.  We want to let through other properties that might carry
    # useful information.
    IgnoreProperties = {
        "svn:executable",  # We special-case this one elsewhere
        "svn:ignore",      # We special-case this one elsewhere
        "svn:special",     # We special-case this one elsewhere
        "svn:mergeinfo",   # We special-case this one elsewhere
        "svn:mime-type",
        "svn:keywords",
        "svn:needs-lock",
        "svn:eol-style",   # Don't want to suppress, but cvs2svn floods these.
        }
    # These are the default patterns globally ignored by Subversion.
    SubversionDefaultIgnores = """\
# A simulation of Subversion default ignores, generated by reposurgeon.
*.o
*.lo
*.la
*.al
.libs
*.so
*.so.[0-9]*
*.a
*.pyc
*.pyo
*.rej
*~
.#*
.*.swp
.DS_store
"""
    cvs2svn_tag_re = re.compile("This commit was manufactured by cvs2svn to create tag.*'([^']*)'")
    cvs2svn_branch_re = re.compile("This commit was manufactured by cvs2svn to create branch.*'([^']*)'")
    SplitSep = '.'
    def __init__(self, repo):
        self.repo = repo
        self.fp = None
        self.import_line = 0
        self.markseq = 0
        self.ccount = 0
        self.linebuffers = []
        self.warnings = []
        # Everything below here is Subversion-specific
        self.branches = {}
        self.branchlink = {}
        self.branchdeletes = set()
        self.branchcopies = set()
        self.generated_deletes = []
        self.revisions = collections.OrderedDict()
        self.copycounts = collections.OrderedDict()
        self.hashmap = {}
        self.permissions = {}
        self.fileop_branchlinks  = set()
        self.directory_branchlinks  = set()
        self.active_gitignores = {}
    def error(self, msg):
        "Throw fatal error during parsing."
        raise Fatal(msg + " at line " + repr(self.import_line))
    def warn(self, msg):
        "Display a parse warning associated with a line."
        if self.import_line:
            complain(msg + " at line " + repr(self.import_line))
        else:
            complain(msg)
    def gripe(self, msg):
        "Display or queue up an error message."
        if verbose < 2:
            self.warnings.append(msg)
        else:
            complain(msg)
    def __newmark(self):
        self.markseq += 1
        mark = ":" + str(self.markseq)
        return mark
    def readline(self):
        if self.linebuffers:
            line = self.linebuffers.pop()
        else:
            line = self.fp.readline()
        self.ccount += len(line)
        self.import_line += 1
        return line
    def tell(self):
        "Return the current read offset in the source stream."
        try:
            return self.fp.tell()
        except IOError:
            return None
    def pushback(self, line):
        self.ccount -= len(line)
        self.import_line -= 1
        self.linebuffers.append(line)
    # Helpers for import-stream files
    def fi_read_data(self, line=None):
        "Read a fast-import data section."
        if not line:
            line = self.readline()
        if line.startswith("data <<"):
            delim = line[7:]
            data = ""
            start = self.tell()
            while True:
                dataline = self.readline()
                if dataline == delim:
                    break
                elif not dataline:
                    raise Fatal("EOF while reading blob")
                else:
                    data += dataline
        elif line.startswith("data"):
            try:
                count = int(line[5:])
                start = self.tell()
                data = self.fp.read(count)
            except ValueError:
                self.error("bad count in data")
        elif line.startswith("property"):
            line = line[9:]			# Skip this token
            line = line[line.index(" "):]	# Skip the property name
            nextws = line.index(" ")
            count = int(line[:nextws-1])
            start = self.tell()
            data = line[nextws:] + self.fp.read(count)
        else:
            self.error("malformed data header %s" % repr(line))
        line = self.readline()
        if line != '\n':
            self.pushback(line) # Data commands optionally end with LF
        return (data, start)
    def fi_parse_fileop(self, fileop):
        # Read a fast-import fileop
        if fileop.ref[0] == ':':
            pass
        elif fileop.ref == 'inline':
            fileop.inline = self.fi_read_data()[0]
        else:
            self.error("unknown content type in filemodify")
    # Helpers for Subversion dumpfiles
    @staticmethod
    def sd_body(line):
        # Parse the body from a Subversion header line
        return line.split(":")[1].strip()
    def sd_require_header(self, hdr):
        # Consume a required header line
        line = self.readline()
        self.ccount += len(line)
        if not line.startswith(hdr):
            self.error('required %s header missing' % hdr)
        return StreamParser.sd_body(line)
    def sd_require_spacer(self):
        line = self.readline()
        if line.strip():
            self.error('found %s expecting blank line' % repr(line))
    def sd_read_blob(self, length):
        # Read a Subversion file-content blob.
        content = self.fp.read(length)
        if self.fp.read(1) != '\n':
            self.error("EOL not seen where expected, Content-Length incorrect")
        self.import_line += content.count('\n') + 1
        self.ccount += len(content) + 1
        return content
    def sd_read_props(self, target, checklength):
        # Parse a Subversion properties section, return as an OrderedDict.
        props = collections.OrderedDict()
        self.ccount = 0
        while self.ccount < checklength:
            line = self.readline()
            if debug_enable(DEBUG_SVNPARSE):
                announce("readprops, line %d: %s" % \
                         (self.import_line, repr(line)))
            if line.startswith("PROPS-END"):
                # This test should be !=, but I get random off-by-ones from
                # real dumpfiles - I don't know why.
                if self.ccount < checklength:
                    self.error("expected %d property chars, got %d"\
                               % (checklength, self.ccount))
                break
            elif not line.strip():
                continue
            elif line[0] == "K":
                key = self.sd_read_blob(int(line.split()[1]))
                line = self.readline()
                if line[0] != 'V':
                    raise self.error("property value garbled")
                value = self.sd_read_blob(int(line.split()[1]))
                props[key] = value
                if debug_enable(DEBUG_SVNPARSE):
                    announce("readprops: on %s, setting %s = %s"\
                             % (target, key, repr(value)))
        return props
    #
    # The main event
    #
    def fast_import(self, fp, options, progress=False):
        "Initialize the repo from a fast-import stream or Subversion dump."
        self.repo.makedir()
        self.repo.timings = [("start", time.time())]
        try:
            self.fp = fp
            # Optimization: if we're reading from a plain file,
            # no need to clone all the blobs. 
            if os.path.isfile(self.fp.name):
                self.repo.seekstream = fp
            with Baton("reposurgeon: from %s" % os.path.relpath(fp.name), enable=progress) as baton:
                self.import_line = self.repo.fossil_count = 0
                self.linebuffers = []
                # First, determine the input type
                line = self.readline()
                if line.startswith("SVN-fs-dump-format-version: "):
                    if StreamParser.sd_body(line) not in ("1", "2"):
                        raise Fatal("unsupported dump format version %s" \
                                    % StreamParser.sd_body(line))
                    # Beginning of Subversion dump parsing
                    while True:
                        line = self.readline()
                        if not line:
                            break
                        elif not line.strip():
                            continue
                        elif line.startswith("UUID:"):
                            self.repo.uuid = StreamParser.sd_body(line)
                        elif line.startswith("Revision-number: "):
                            # Begin Revision processing
                            baton.twirl()
                            if debug_enable(DEBUG_SVNPARSE):
                                announce("revision parsing, line %d: begins" % \
                                     (self.import_line))
                            revision = StreamParser.sd_body(line)
                            plen = int(self.sd_require_header("Prop-content-length"))
                            self.sd_require_header("Content-length")
                            self.sd_require_spacer()
                            props = self.sd_read_props("commit", plen)
                            # Parsing of the revision header is done
                            node = None # pacify pylint
                            nodes = []
                            in_header = False
                            plen = tlen = -1
                            # Node list parsing begins
                            while True:
                                line = self.readline()
                                if debug_enable(DEBUG_SVNPARSE):
                                    announce("node list parsing, line %d: %s" % \
                                             (self.import_line, repr(line)))
                                if not line:
                                    break
                                elif not line.strip():
                                    if not in_header:
                                        continue
                                    else:
                                        if plen > -1:
                                            node.props = self.sd_read_props(node.path, plen)
                                        if tlen > -1:
                                            start = self.tell()
                                            # This is a crock. It is
                                            # justified only by the fact that
                                            # we get None back from self.tell()
                                            # only when the parser input is
                                            # coming from an inferior process
                                            # rather than a file. In this case
                                            # the start offset can be any random
                                            # garbage, because we'll never try
                                            # to use it for seeking blob
                                            # content.
                                            if start is None: start = 0
                                            text = self.sd_read_blob(tlen)
                                            node.blob = Blob(self.repo)
                                            # Ugh - cope with strange
                                            # undocumented Subversion format
                                            # for storing links.  Apparently the
                                            # dumper puts 'link ' in front of
                                            # the path and the loader (or at
                                            # least git-svn) removes it.
                                            if node.props and "svn:special" in node.props:
                                                if text.startswith("link "):
                                                    node.blob.set_content(
                                                        text[5:], start+5)
                                                else:
                                                    # Don't know if this will
                                                    # ever happen.  Best to fail
                                                    # loudly...
                                                    self.error("unexpected link prefix in %s" % repr(text))
                                            else:
                                                node.blob.set_content(text, start)
                                        node.revision = revision
                                        nodes.append(node)
                                        in_header = False
                                elif line.startswith("Revision-number: "):
                                    self.pushback(line)
                                    break
                                # Node processing begins
                                elif line.startswith("Node-path: "):
                                    node = StreamParser.NodeAction()
                                    node.path = StreamParser.sd_body(line)
                                    plen = tlen = -1
                                    in_header = True
                                elif line.startswith("Node-kind: "):
                                    node.kind = StreamParser.sd_body(line)
                                    node.kind = StreamParser.NodeAction.PathTypeValues.index(node.kind)
                                    if node.kind is None:
                                        self.error("unknown kind %s"%node.kind)
                                elif line.startswith("Node-action: "):
                                    node.action = StreamParser.sd_body(line)
                                    node.action = StreamParser.NodeAction.ActionValues.index(node.action)
                                    
                                    if node.action is None:
                                        self.error("unknown action %s" \
                                                   % node.action)
                                elif line.startswith("Node-copyfrom-rev: "):
                                    node.from_rev = StreamParser.sd_body(line)
                                elif line.startswith("Node-copyfrom-path: "):
                                    node.from_path = StreamParser.sd_body(line)
                                elif line.startswith("Text-copy-source-md5: "):
                                    node.from_hash = StreamParser.sd_body(line)
                                elif line.startswith("Text-content-md5: "):
                                    node.content_hash = StreamParser.sd_body(line)
                                elif line.startswith("Text-content-sha1: "):
                                    continue
                                elif line.startswith("Text-content-length: "):
                                    tlen = int(StreamParser.sd_body(line))
                                elif line.startswith("Prop-content-length: "):
                                    plen = int(StreamParser.sd_body(line))
                                elif line.startswith("Content-length: "):
                                    continue
                                else:
                                    if debug_enable(DEBUG_SVNPARSE):
                                        announce("node list parsing, line %d: uninterpreted line %s" % \
                                             (self.import_line, repr(line)))
                                    continue
                                # Node processing ends
                            # Node list parsing ends
                            self.revisions[revision] = StreamParser.RevisionRecord(nodes, props)
                            self.repo.fossil_count += 1
                            if debug_enable(DEBUG_SVNPARSE):
                                announce("revision parsing, line %d: ends" % \
                                         (self.import_line))
                            # End Revision processing
                    # End of Subversion dump parsing
                    self.repo.timings.append(("parsing", time.time()))
                    self.svn_process(options, baton)
                    elapsed = time.time() - baton.time
                    baton.twirl("%d revisions (%d/s)" %
                                 (self.repo.fossil_count,
                                  int(self.repo.fossil_count/elapsed)))
                else:
                    self.pushback(line)
                    # Beginning of fast-import stream parsing
                    while True:
                        line = self.readline()
                        if not line:
                            break
                        elif not line.strip():
                            continue
                        elif line.startswith("blob"):
                            blob = Blob(self.repo)
                            line = self.readline()
                            if line.startswith("mark"):
                                blob.set_mark(line[5:].strip())
                                (blobcontent, blobstart) = self.fi_read_data()
                                # Parse CVS and Subversion $-headers
                                # There'd better not be more than one of these.
                                for m in re.finditer(r"\$Id *:[^$]+\$",
                                                     blobcontent):
                                    fields = m.group(0).split()
                                    if len(fields) < 2:
                                        self.gripe("malformed $-cookie '%s'" % m.group(0))
                                    else:
                                        # Save file basename and CVS version
                                        if fields[1].endswith(",v"):
                                            # CVS revision
                                            blob.cookie = (fields[1][:-2], fields[2])
                                        else:
                                            # Subversion revision
                                            blob.cookie = fields[1]
                                for m in re.finditer(r"\$Revision *: *([^$]*)\$",
                                                     blobcontent):
                                    rev = m.group(0).strip()
                                    if '.' not in rev:
                                        # Subversion revision
                                        blob.cookie = rev
                                blob.set_content(blobcontent, blobstart)
                            else:
                                self.error("missing mark after blob")
                            self.repo.addEvent(blob)
                            baton.twirl()
                        elif line.startswith("data"):
                            self.error("unexpected data object")
                        elif line.startswith("commit"):
                            baton.twirl()
                            commitbegin = self.import_line
                            commit = Commit(self.repo)
                            commit.set_branch(line.split()[1])
                            while True:
                                line = self.readline()
                                if not line:
                                    break
                                elif line.startswith("mark"):
                                    commit.set_mark(line[5:].strip())
                                elif line.startswith("author"):
                                    try:
                                        commit.authors.append(Attribution(line[7:]))
                                    except ValueError:
                                        self.error("malformed author line")
                                elif line.startswith("committer"):
                                    try:
                                        commit.committer = Attribution(line[10:])
                                    except ValueError:
                                        self.error("malformed committer line")
                                elif line.startswith("property"):
                                    fields = line.split(" ")
                                    if len(fields) < 3:
                                        self.error("malformed property line")
                                    elif len(fields) == 3:
                                        commit.properties[fields[1]] = True
                                    else:
                                        name = fields[1]
                                        length = int(fields[2])
                                        value = " ".join(fields[3:])
                                        if len(value) < length:
                                            value += fp.read(length-len(value))
                                            if fp.read(1) != '\n':
                                                self.error("trailing junk on property value")
                                        elif len(value) == length + 1:
                                            value = value[:-1] # Trim '\n'
                                        else:
                                            value += self.fp.read(length - len(value))
                                            assert self.fp.read(1) == '\n'
                                        commit.properties[name] = value
                                        # Generated by cvsps
                                        if name == "cvs-revisions":
                                            for line in value.split('\n'):
                                                if line:
                                                    self.repo.fossil_map["CVS:"+line] = commit
                                elif line.startswith("data"):
                                    commit.comment = self.fi_read_data(line)[0]
                                    if global_options["canonicalize"]:
                                        commit.comment = commit.comment.strip().replace("\r\n", "\n") + '\n'
                                elif line.startswith("from") or line.startswith("merge"):
                                    commit.add_parent(line.split()[1])
                                # Handling of file ops begins.
                                elif line[0] in ("C", "D", "R"):
                                    commit.fileops.append(FileOp(self.repo.vcs).parse(line))
                                elif line == "deleteall\n":
                                    commit.fileops.append(FileOp(self.repo.vcs).parse("deleteall"))
                                elif line[0] == "M":
                                    fileop = FileOp(self.repo.vcs).parse(line)
                                    if fileop.ref != 'inline':
                                        try:
                                            self.repo.objfind(fileop.ref).pathlist.append(fileop.path)
                                        except AttributeError:
                                            # Crap out on anything but a
                                            # submodule link.
                                            if fileop.mode != "160000":
                                                self.error("ref %s could not be resolved" % fileop.ref)
                                    commit.fileops.append(fileop)
                                    if fileop.mode == "160000":
                                        # This is a submodule link.  The ref
                                        # field is a SHA1 hash and the path
                                        # is an external reference name.
                                        # Don't try to collect data, just pass
                                        # it through.
                                        self.warn("submodule link")
                                    else:
                                        # 100644, 100755, 120000.
                                        self.fi_parse_fileop(fileop)
                                elif line[0] == "N":
                                    fileop = FileOp(self.repo.vcs).parse(line)
                                    commit.fileops.append(fileop)
                                    self.fi_parse_fileop(fileop)
                                # Handling of file ops ends.
                                elif line.isspace():
                                    # This handles slightly broken
                                    # exporters like the bzr-fast-export
                                    # one that may tack an extra LF onto
                                    # the end of data objects.  With it,
                                    # we don't drop out of the
                                    # commit-processing loop until we see
                                    # a *nonblank* line that doesn't match
                                    # a commit subpart.
                                    continue
                                else:
                                    # Dodgy bzr autodetection hook...
                                    if not self.repo.vcs:
                                        if "branch-nick" in commit.properties:
                                            for vcs in vcstypes:
                                                if vcs.name == "bzr":
                                                    self.repo.vcs = vcs
                                                    break
                                    self.pushback(line)
                                    break
                            if not (commit.mark and commit.committer):
                                self.import_line = commitbegin
                                self.error("missing required fields in commit")
                            if commit.mark is None:
                                self.warn("unmarked commit")
                            self.repo.addEvent(commit)
                            baton.twirl()
                        elif line.startswith("reset"):
                            reset = Reset(self.repo)
                            reset.ref = line[6:].strip()
                            line = self.readline()
                            if line.startswith("from"):
                                reset.remember(self.repo, committish=line[5:].strip())
                            else:
                                self.pushback(line)
                            self.repo.addEvent(reset)
                            baton.twirl()
                        elif line.startswith("tag"):
                            tagger = None
                            tagname = line[4:].strip()
                            line = self.readline()
                            if line.startswith("from"):
                                referent = line[5:].strip()
                            else:
                                self.error("missing from after tag")
                            line = self.readline()
                            if line.startswith("tagger"):
                                try:
                                    tagger = Attribution(line[7:])
                                except ValueError:
                                    self.error("malformed tagger line")
                            else:
                                self.warn("missing tagger after from in tag")
                                self.pushback(line)
                            self.repo.addEvent(Tag(repo = self.repo,
                                                   name = tagname,
                                                   committish = referent,
                                                   tagger = tagger,
                                                   comment = self.fi_read_data()[0]))
                            baton.twirl()
                        else:
                            # Simply pass through any line we don't understand.
                            self.repo.addEvent(Passthrough(line))
                    # End of fast-import parsing
                    self.repo.timings.append(("parsing", time.time()))
                self.import_line = 0
                if not self.repo.events:
                    raise Recoverable("ignoring empty repository")
            if self.warnings:
                for warning in self.warnings:
                    complain(warning)
        except KeyboardInterrupt:
            nuke(self.repo.subdir(), "reposurgeon: import interrupted, removing %s" % self.repo.subdir())
            raise KeyboardInterrupt
    #
    # The rendezvous between parsing and object building for import
    # streams is pretty trivial and best done inline in the parser
    # because reposurgeon's internal structures are designed to match
    # those entities. For Subversion dumpfiles, on the other hand,
    # there's a fair bit of impedance-matching required.  That happens
    # in the following functions.
    #
    @staticmethod
    def node_permissions(node):
        "Fileop permissions from node properties"
        if node.props:
            if "svn:executable" in node.props:
                return 0o100755
            elif "svn:special" in node.props:
                # Map to git symlink, which behaves the same way.
                # Blob contents is the path the link should resolve to. 
                return 0o120000
        return 0o100644
    def branchpath(self, path):
        "Strip the branch prefix from a path."
        if not self.branches or path.count(os.sep) == 0:
            return path
        for branch in self.branches:
            if path.startswith(branch):
                return path[len(branch):]
        raise Fatal("couldn't assign %s to a branch in %s" \
                    % (path, self.branches.keys()))
    def svn_process(self, options, baton):
        "Subversion actions to import-stream commits."
        # Find all copy sources and compute the set of branches
        if debug_enable(DEBUG_EXTRACT):
            announce("Pass 1")
        nobranch = '--nobranch' in options
        copynodes = []
        for (revision, record) in self.revisions.iteritems():
            for node in record.nodes:
                if node.from_path is not None:
                    copynodes.append(node)
                    if debug_enable(DEBUG_EXTRACT):
                        announce("copynode at %s" % node)
                if node.action == SD_ADD and node.kind == SD_DIR and not node.path+os.sep in self.branches and not nobranch:
                    for trial in global_options['svn_branchify']:
                        if '*' not in trial and trial == node.path:
                            self.branches[node.path+os.sep] = None
                        elif trial.endswith(os.sep + '*') \
                                 and os.path.dirname(trial) == os.path.dirname(node.path):
                            self.branches[node.path+os.sep] = None
                        elif trial == '*' and not node.path + os.sep + '*' in global_options['svn_branchify'] and node.path.count(os.sep) < 1:
                            self.branches[node.path+os.sep] = None
                    if node.path+os.sep in self.branches and debug_enable(DEBUG_TOPOLOGY):
                        announce("%s recognized as a branch" % node.path+os.sep)
            # Per-commit spinner disabled because this pass is fast
            #baton.twirl()
        copynodes.sort(key=operator.attrgetter("from_rev"))
        self.repo.timings.append(["copynodes", time.time()])
        baton.twirl()
        # Build filemaps.
        if debug_enable(DEBUG_EXTRACT):
            announce("Pass 2")
        filemaps = {}
        filemap = PathMap()
        for (revision, record) in self.revisions.iteritems():
            for node in record.nodes:
                # Mutate the filemap according to copies
                if node.from_rev:
                    assert int(node.from_rev) < int(revision)
                    filemap.copy_from(node.path, filemaps[node.from_rev],
                                      node.from_path)
                    if debug_enable(DEBUG_FILEMAP):
                        announce("r%s~%s copied to %s" \
                                 % (node.from_rev, node.from_path, node.path))
                # Mutate the filemap according to adds/deletes/changes
                if node.action == SD_ADD and node.kind == SD_FILE:
                    filemap[node.path] = node
                    if debug_enable(DEBUG_FILEMAP):
                        announce("r%s~%s added" % (node.revision, node.path))
                elif node.action == SD_DELETE:
                    if node.kind == SD_NONE:
                        node.kind = SD_FILE if node.path in filemap else SD_DIR
                    # Snapshot the deleted paths before removing them.
                    node.from_set = PathMap()
                    node.from_set.copy_from(node.path, filemap, node.path)
                    del filemap[node.path]
                    if debug_enable(DEBUG_FILEMAP):
                        announce("r%s~%s deleted" \
                                 % (node.revision, node.path))
                elif node.action in (SD_CHANGE, SD_REPLACE) and node.kind == SD_FILE:
                    filemap[node.path] = node
                    if debug_enable(DEBUG_FILEMAP):
                        announce("r%s~%s changed" % (node.revision, node.path))
            filemaps[revision] = filemap.snapshot()
            baton.twirl()
        del filemap
        self.repo.timings.append(["filemaps", time.time()]) 
        baton.twirl()
        # Blows up huge on large repos...
        #if debug_enable(DEBUG_FILEMAP):
        #    announce("filemaps %s" % filemaps)
        # Build from sets in each directory copy record.
        if debug_enable(DEBUG_EXTRACT):
            announce("Pass 3")
        for copynode in copynodes:
            if debug_enable(DEBUG_FILEMAP):
                announce("r%s copynode filemap is %s" \
                         % (copynode.from_rev, filemaps[copynode.from_rev]))
            copynode.from_set = PathMap()
            copynode.from_set.copy_from(copynode.from_path,
                                        filemaps[copynode.from_rev],
                                        copynode.from_path)
            # Sanity check: if the directory node has no from set, but
            # there are files underneath it, this means the directory
            # structure implied by the filemaps is not consistent with
            # what's in the parsed Subversion nodes.  This should never
            # happen.
            if not copynode.from_set and \
                    any(filemaps[copynode.revision].ls_R(node.path)):
                self.gripe("inconsistently empty from set for %s" % copynode)
            baton.twirl()
        self.repo.timings.append(["copysets", time.time()]) 
        baton.twirl()
        # Build commits
        # This code can eat your processor, so we make it give up
        # its timeslice at reasonable intervals. Needed because
        # it doesn't hit the disk.
        if debug_enable(DEBUG_EXTRACT):
            announce("Pass 4")
        split_commits = {}
        def last_relevant_commit(max_rev, path,
                                 getbranch = operator.attrgetter("branch")):
            # Make path look like a branch
            if path[0] == "/": path = path[1:]
            if path[-1] != os.sep: path = path + os.sep
            # If the revision is split, try from the last split commit
            try:
                max_rev = split_commits[max_rev]
            except KeyError:
                pass
            # Find the commit object...
            try:
                obj = self.repo.fossil_map["SVN:%s" % max_rev]
            except KeyError:
                return None
            # ...then iterate backwards from there...
            past_events = (self.repo.events[i] for i in
                    range(self.repo.index(obj), -1, -1))
            # ... finding branches of commits...
            commit_branch = ((e, getbranch(e))
                             for e in past_events
                             if isinstance(e, Commit))
            # ...to find one on the right branch.
            return next((e for (e, branch) in commit_branch
                         if branch and path.startswith(branch)),
                        None)
        previous = None
        for (revision, record) in self.revisions.iteritems():
            if debug_enable(DEBUG_EXTRACT):
                announce("Revision %s:" % revision)
            for node in record.nodes:
                # if node.props is None, no property section.
                # if node.blob is None, no text section.
                try:
                    assert node.action in (SD_CHANGE, SD_ADD, SD_DELETE, SD_REPLACE)
                    assert node.blob is not None or \
                           node.props is not None or \
                           node.from_rev or \
                           node.action in (SD_ADD, SD_DELETE)
                    assert (node.from_rev is None) == (node.from_path is None)
                    assert node.kind in (SD_FILE, SD_DIR)
                    assert node.kind != SD_NONE or node.action == SD_DELETE
                    assert node.action in (SD_ADD, SD_REPLACE) or not node.from_rev
                except AssertionError:
                    raise Fatal("forbidden operation in dump stream at r%s: %s" \
                                % (revision, node))
            commit = Commit(self.repo)
            try:
                ad = record.props.pop("svn:date")
            except KeyError as key:
                self.error("missing required %s" % key)
            if "svn:author" in record.props:
                au = record.props.pop("svn:author")
            else:
                au = "no-author"
            if "svn:log" in record.props:
                commit.comment = record.props.pop("svn:log")
                if not commit.comment.endswith("\n"):
                    commit.comment += "\n"
            if '--use-uuid' in options:
                attribution = "%s <%s@%s> %s" % (au, au, self.repo.uuid, ad)
            else:
                attribution = "%s <%s> %s" % (au, au, ad)
            commit.committer = Attribution(attribution)
            commit.properties.update(record.props)
            # Zero revision is never interesting - no operations, no
            # comment, no author, it's just a start marker for a
            # non-incremental dump.
            if revision == "0": 
                continue
            expanded_nodes = []
            has_properties = set()
            for (n, node) in enumerate(record.nodes):
                if debug_enable(DEBUG_EXTRACT):
                    announce("r%s:%d: %s" % (revision, n+1, node))
                elif node.kind == SD_DIR \
                         and node.action != SD_CHANGE \
                         and debug_enable(DEBUG_TOPOLOGY):
                    announce(str(node))
                # Handle per-path properties.
                if node.props is not None:
                    if "cvs2svn:cvs-rev" in node.props:
                        cvskey = "CVS:%s:%s" % (node.path,
                                                node.props["cvs2svn:cvs-rev"])
                        self.repo.fossil_map[cvskey] = commit
                        del node.props["cvs2svn:cvs-rev"]
                    if not "--ignore-properties" in options:
                        prop_items = ((prop, val) \
                                        for (prop,val) in node.props.iteritems() \
                                        if prop not in StreamParser.IgnoreProperties)
                        try:
                            first = next(prop_items)
                        except StopIteration:
                            if node.path in has_properties:
                                self.gripe("r%s~%s: properties cleared." \
                                             % (node.revision, node.path))
                                has_properties.discard(node.path)
                        else:
                            self.gripe("r%s~%s properties set:" \
                                                   % (node.revision, node.path))
                            for prop, val in itertools.chain((first,), prop_items):
                                self.gripe("\t%s = '%s'" % (prop, val))
                            has_properties.add(node.path)
                if node.kind == SD_FILE:
                    expanded_nodes.append(node)
                elif node.kind == SD_DIR:
                    # os.sep is appended to avoid collisions with path
                    # prefixes.
                    node.path += os.sep
                    if node.from_path:
                        node.from_path += os.sep
                    if node.action in (SD_ADD, SD_CHANGE):
                        if node.path in self.branches:
                            if not node.props: node.props = {}
                            if "--noignores" in options:
                                startwith = ""
                            else:
                                startwith = StreamParser.SubversionDefaultIgnores
                            try:
                                ignore = startwith + \
                                         "# The contents of the svn:ignore" \
                                         "property on the branch root.\n" + \
                                         node.props["svn:ignore"]
                            except KeyError:
                                ignore = startwith
                            node.props["svn:ignore"] = ignore
                    elif node.action in (SD_DELETE, SD_REPLACE):
                        if node.path in self.branches:
                            self.branchdeletes.add(node.path)
                            expanded_nodes.append(node)
                            # The deleteall will also delete .gitignore files
                            for ignorepath in list(gi
                                        for gi in self.active_gitignores
                                        if gi.startswith(node.path)):
                                del self.active_gitignores[ignorepath]
                        else:
                            # A delete or replace with no from set
                            # can occur if the directory is empty.
                            # We can just ignore this case.
                            if node.from_set is not None:
                                for child in node.from_set:
                                    if debug_enable(DEBUG_EXTRACT):
                                        announce("r%s: deleting %s" \
                                                 % (revision, child))
                                    newnode = StreamParser.NodeAction()
                                    newnode.path = child
                                    newnode.revision = revision
                                    newnode.action = SD_DELETE
                                    newnode.kind = SD_FILE
                                    newnode.generated = True
                                    expanded_nodes.append(newnode)
                            # Emit delete actions for the .gitignore files we
                            # have generated. Note that even with a directory
                            # with no files from SVN, we might have added
                            # .gitignore files we now must delete.
                            for ignorepath in list(gi
                                        for gi in self.active_gitignores
                                        if gi.startswith(node.path)):
                                newnode = StreamParser.NodeAction()
                                newnode.path = ignorepath
                                newnode.revision = revision
                                newnode.action = SD_DELETE
                                newnode.kind = SD_FILE
                                newnode.generated = True
                                expanded_nodes.append(newnode)
                                del self.active_gitignores[ignorepath]
                    # Handle directory copies.  If this is a copy
                    # between branches, no fileop should be issued
                    # until there is an actual file modification on
                    # the new branch. Instead, remember that the
                    # branch root inherits the tree of the source
                    # branch and should not start with a deleteall.
                    # Exception: If the target branch has been
                    # deleted, perform a normal copy and interpret
                    # this as an ad-hoc branch merge.
                    if node.from_path:
                        branchcopy = node.from_path in self.branches \
                                         and node.path in self.branches \
                                         and node.path not in self.branchdeletes
                        if debug_enable(DEBUG_TOPOLOGY):
                            announce("r%s: directory copy to %s from " \
                                     "r%s~%s (branchcopy %s)" \
                                     % (revision,
                                        node.path,
                                        node.from_rev,
                                        node.from_path,
                                        branchcopy))
                        # Update our .gitignore list so that it includes those
                        # in the newly created copy, to ensure they correctly
                        # get deleted during a future directory deletion.
                        l = len(node.from_path)
                        for sourcegi, value in list((gi,v) for (gi,v) in
                                    self.active_gitignores.iteritems()
                                    if gi.startswith(node.from_path)):
                            destgi = node.path + sourcegi[l:]
                            self.active_gitignores[destgi] = value
                        if branchcopy:
                            self.branchcopies.add(node.path)
                        else:
                            self.branchdeletes.discard(node.path)
                            # Generate copy ops for generated .gitignore files
                            # to match the copy of svn:ignore props on the
                            # Subversion side. We use the just updated
                            # active_gitignores dict for that purpose.
                            for gipath, ignore in list(
                                        (gi,v) for (gi,v) in
                                        self.active_gitignores.iteritems()
                                        if gi.startswith(node.path)):
                                blob = Blob(self.repo)
                                blob.set_content(ignore)
                                subnode = StreamParser.NodeAction()
                                subnode.path = gipath
                                subnode.revision = revision
                                subnode.action = SD_ADD
                                subnode.kind = SD_FILE
                                subnode.blob = blob
                                subnode.content_hash = \
                                        hashlib.md5(ignore).hexdigest()
                                subnode.generated = True
                                expanded_nodes.append(subnode)
                            # Now generate copies for all files in the source
                            for source in node.from_set:
                                lookback = filemaps[node.from_rev][source]
                                if lookback is None:
                                    raise Fatal("r%s: can't find ancestor %s" \
                                             % (revision, source))
                                subnode = StreamParser.NodeAction()
                                subnode.path = node.path + \
                                        source[len(node.from_path):]
                                subnode.revision = revision
                                subnode.from_path = lookback.path
                                subnode.from_rev = lookback.revision
                                subnode.from_hash = lookback.content_hash
                                subnode.action = SD_ADD
                                subnode.kind = SD_FILE
                                if debug_enable(DEBUG_TOPOLOGY):
                                    announce("r%s: generated copy r%s~%s -> %s" \
                                             % (revision,
                                                subnode.from_rev,
                                                subnode.from_path,
                                                subnode.path))
                                subnode.generated = True
                                expanded_nodes.append(subnode)
                    # Property settings can be present on either
                    # SD_ADD or SD_CHANGE actions.
                    if node.props is not None:
                        if debug_enable(DEBUG_EXTRACT):
                            announce("r%s: setting properties %s on %s" \
                                     % (revision, node.props, node.path))
                        # svn:ignore gets handled here,
                        if node.path == os.sep:
                            gitignore_path = ".gitignore"
                        else:
                            gitignore_path = os.path.join(node.path,
                                                          ".gitignore")
                        # There are no other directory properties that can
                        # turn into fileops.
                        ignore = node.props.get("svn:ignore")
                        if ignore is not None:
                            blob = Blob(self.repo)
                            blob.set_content(ignore)
                            newnode = StreamParser.NodeAction()
                            newnode.path = gitignore_path
                            newnode.revision = revision
                            newnode.action = SD_ADD
                            newnode.kind = SD_FILE
                            newnode.blob = blob
                            newnode.content_hash = \
                                    hashlib.md5(ignore).hexdigest()
                            if debug_enable(DEBUG_IGNORES):
                                announce("r%s: queuing up %s generation with:\n%s." % (revision, newnode.path, node.props["svn:ignore"]))
                            # Must append rather than simply performing.
                            # Otherwise when the property is unset we
                            # won't have the right thing happen.
                            newnode.generated = True
                            expanded_nodes.append(newnode)
                            self.active_gitignores[gitignore_path] = ignore
                        elif gitignore_path in self.active_gitignores:
                            newnode = StreamParser.NodeAction()
                            newnode.path = gitignore_path
                            newnode.revision = revision
                            newnode.action = SD_DELETE
                            newnode.kind = SD_FILE
                            if debug_enable(DEBUG_IGNORES):
                                announce("r%s: queuing up %s deletion." % (revision, newnode.path))
                            newnode.generated = True
                            expanded_nodes.append(newnode)
                            del self.active_gitignores[gitignore_path]
            # Lift .cvsignore files, which we can assume are fossils
            # from a bygone era and happen to have syntax upward-compatible
            # with that of .gitignore
            for node in expanded_nodes:
                if node.path.endswith(".cvsignore"):
                    node.path = node.path[:-len(".cvsignore")] + ".gitignore"
            # Ugh.  Because cvs2svn is brain-dead and issues D/M pairs
            # for identical paths in generated commits, we have to remove those
            # D ops here.  Otherwise later on when we're generating ops, if
            # the M node happens to be missing its hash it will be seen as
            # unmodified and only the D will be issued.
            seen = set()
            for node in reversed(expanded_nodes):
                if node.action == SD_DELETE and node.path in seen:
                    node.action = None
                seen.add(node.path)
            # Create actions corresponding to both
            # parsed and generated nodes.
            actions = []
            ancestor_nodes = {}
            for node in expanded_nodes:
                if node.action is None: continue
                if node.kind == SD_FILE:
                    if node.action == SD_DELETE:
                        assert node.blob is None
                        fileop = FileOp()
                        fileop.construct("D", node.path)
                        actions.append((node, fileop))
                        ancestor_nodes[node.path] = None
                    elif node.action in (SD_ADD, SD_CHANGE, SD_REPLACE):
                        # Try to figure out who the ancestor of
                        # this node is.
                        if node.from_path or node.from_hash:
                            # Try first via from_path
                            ancestor = filemaps[node.from_rev][node.from_path]
                            if debug_enable(DEBUG_TOPOLOGY):
                                if ancestor:
                                    announce("r%s~%s -> %s (via filemap)" % \
                                             (node.revision, node.path, ancestor))
                                else:
                                    announce("r%s~%s has no ancestor (via filemap)" % \
                                             (node.revision, node.path))
                            # Fallback on the first blob that had this hash
                            if node.from_hash and not ancestor:
                                ancestor = self.hashmap[node.from_hash]
                                if debug_enable(DEBUG_TOPOLOGY):
                                    announce("r%s~%s -> %s (via hashmap)" % \
                                         (node.revision, node.path, ancestor))
                            if not ancestor and not node.path.endswith(".gitignore"):
                                self.gripe("r%s~%s: missing filemap node." \
                                          % (node.revision, node.path))
                        elif node.action != SD_ADD:
                            # Ordinary inheritance, no node copy.  For
                            # robustness, we don't assume revisions are
                            # consecutive numbers.
                            try:
                                ancestor = ancestor_nodes[node.path]
                            except KeyError:
                                ancestor = filemaps[previous][node.path]
                        else:
                            ancestor = None
                        # Time for fileop generation
                        if node.blob is not None:
                            if node.content_hash in self.hashmap:
                                # Blob matches an existing one -
                                # node was created by a
                                # non-Subversion copy followed by
                                # add.  Get the ancestry right,
                                # otherwise parent pointers won't
                                # be computed properly.
                                ancestor = self.hashmap[node.content_hash]
                                node.from_path = ancestor.from_path
                                node.from_rev = ancestor.from_rev
                                node.blobmark = ancestor.blobmark
                            else:
                                # An entirely new blob
                                node.blobmark = node.blob.set_mark(self.__newmark())
                                self.repo.addEvent(node.blob)
                                # Blobs generated by reposurgeon
                                # (e.g .gitignore content) have no
                                # content hash.  Don't record
                                # them, otherwise they'll all
                                # collide :-)
                                if node.content_hash:
                                    self.hashmap[node.content_hash] = node
                        elif ancestor:
                            node.blobmark = ancestor.blobmark
                        else:
                            # No ancestor, no blob. Has to be a
                            # pure property change.  There's no
                            # way to figure out what mark to use
                            # in a fileop.
                            if not node.path.endswith(".gitignore"):
                                self.gripe("r%s~%s: permission information may be lost." \
                                           % (node.revision, node.path))
                            continue
                        ancestor_nodes[node.path] = node
                        assert node.blobmark
                        # Time for fileop generation
                        if ancestor:
                            perms = oldperms = self.permissions.get(ancestor.path,
                                                                    0o100644)
                        else:
                            perms = oldperms = 0o100644
                        if node.props is not None:
                            perms = self.node_permissions(node)
                        # This ugly nasty guard is critically important.
                        # We need to generate a modify if:
                        # 1. There is new content.
                        # 2. This node was generated as an
                        # expansion of a directory copy.
                        # 3. The node was produced by an explicit
                        # Subversion file copy (not a directory copy)
                        # in which case it has an MD5 hash that points
                        # back to a source.
                        # 4. The permissions for this path have changed;
                        # we need to generate a modify with an old mark
                        # but new permissions.
                        new_content = (node.blob is not None)
                        generated_file_copy = node.generated
                        subversion_file_copy = (node.from_hash is not None)
                        permissions_changed = (perms != oldperms)
                        if (new_content or
                            generated_file_copy or
                            subversion_file_copy or
                            permissions_changed):
                            assert perms
                            fileop = FileOp()
                            fileop.construct("M",
                                             perms,
                                             node.blobmark,
                                             node.path)
                            actions.append((node, fileop))
                            self.repo.objfind(fileop.ref).pathlist.append(node.path)
                        elif debug_enable(DEBUG_EXTRACT):
                            announce("r%s~%s: unmodified" % (node.revision, node.path))
                        self.permissions[node.path] = perms
                # These are directory actions.
                elif node.action in (SD_DELETE, SD_REPLACE):
                    if debug_enable(DEBUG_EXTRACT):
                        announce("r%s: deleteall %s" % (revision,node.path))
                    fileop = FileOp()
                    fileop.construct("deleteall", node.path[:-1])
                    actions.append((node, fileop))
            # Time to generate commits from actions and fileops.
            if debug_enable(DEBUG_EXTRACT):
                announce("r%s: %d actions" % (revision, len(actions)))
            # First, break the file operations into branch cliques
            cliques = collections.defaultdict(list)
            lastbranch = None
            for (node, fileop) in actions:
                # Try last seen branch first
                if lastbranch and node.path.startswith(lastbranch):
                    cliques[lastbranch].append(fileop)
                    continue
                for branch in self.branches:
                    if node.path.startswith(branch):
                        cliques[branch].append(fileop)
                        lastbranch = branch
                        break
                else:
                    cliques[""].append(fileop)
            # Make two operation lists from the cliques, sorting cliques
            # containing only branch deletes from other cliques.
            deleteall_ops = []
            other_ops = []
            for (branch, ops) in cliques.iteritems():
                if len(ops) == 1 and ops[0].op == "deleteall":
                    deleteall_ops.append((branch, ops))
                else:
                    other_ops.append((branch, ops))
            oplist = itertools.chain(other_ops, deleteall_ops)
            # Create all commits corresponding to the revision
            newcommits = []
            commit.fossil_id = revision
            if len(other_ops) <= 1:
                # In the ordinary case, we can assign all non-deleteall fileops
                # to the base commit.
                self.repo.fossil_map["SVN:%s" % commit.fossil_id] = commit
                try:
                    commit.common, commit.fileops = next(oplist)
                    commit._pathset = None
                except StopIteration:
                    commit.common = os.path.commonprefix([node.path for node in record.nodes])
                commit.set_mark(self.__newmark())
                if debug_enable(DEBUG_EXTRACT):
                    announce("r%s gets mark %s" % (revision, commit.mark))
                newcommits.append(commit)
            # If the commit is mixed, or there are deletealls left over,
            # handle that.
            oplist = sorted(oplist, key=operator.itemgetter(0))
            for (i, (branch, fileops)) in enumerate(oplist):
                split = commit.clone()
                split.common = branch
                # Sequence numbers for split commits are 1-origin
                split.fossil_id += StreamParser.SplitSep + str(i + 1)
                self.repo.fossil_map["SVN:%s" % split.fossil_id] = split
                split.comment += "\n[[Split portion of a mixed commit.]]\n"
                split.set_mark(self.__newmark())
                split.fileops = fileops
                split._pathset = None
                newcommits.append(split)
            # The revision is truly mixed if there is more than one clique
            # not consisting entirely of deleteall operations.
            if len(other_ops) > 1:
                # Store the last used split id
                split_commits[revision] = split.fossil_id
            # Sort fileops according to git rules
            for newcommit in newcommits:
                newcommit.fileops.sort(key=FileOp.sortkey)
            # Deduce links between branches on the basis of copies. This
            # is tricky because a revision can be the target of multiple
            # copies.  Humans don't abuse this because tracking multiple
            # copies is too hard to do in a slow organic brain, but tools
            # like cvs2svn can generate large sets of them. cvs2svn seems
            # to try to copy each file and directory from the commit
            # corresponding to the CVS revision where the file was last
            # changed before the copy, which may be substantially earlier
            # than the CVS revision corresponding to the
            # copy). Fortunately, we can resolve such sets by the simple
            # expedient of picking the *latest* revision in them!
            # No code uses the result if branch analysis is turned off.
            if not nobranch:
                for newcommit in newcommits:
                    if commit.mark in self.branchlink: continue
                    copies = [node for node in record.nodes \
                              if node.from_rev is not None \
                              and node.path.startswith(newcommit.common)]
                    if copies and debug_enable(DEBUG_TOPOLOGY):
                        announce("r%s: copy operations %s" %
                                     (newcommit.fossil_id, copies))
                    # If the copies include one for the directory, use that as
                    # the first parent: most of the files in the new branch
                    # will come from that copy, and that might well be a full
                    # branch copy where doing that way is needed because the
                    # fileop for the copy didn't get generated and the commit
                    # tree would be wrong if we didn't.
                    latest = next((node for node in copies
                                    if node.kind == SD_DIR and
                                       node.from_path and
                                       node.path == newcommit.common),
                                  None)
                    if latest is not None:
                        self.directory_branchlinks.add(newcommit.common)
                        if debug_enable(DEBUG_TOPOLOGY):
                            announce("r%s: directory copy with %s" \
                                     % (newcommit.fossil_id, copies))
                    # Use may have botched a branch creation by doing a
                    # non-Subversion directory copy followed by a bunch of
                    # Subversion adds. Blob hashes will match existing files,
                    # but from_rev and from_path won't be set at parse time.
                    # Our code detects this case and makes file
                    # backlinks, but can't deduce the directory copy.
                    # Thus, we have to treat multiple file copies as
                    # an instruction to create a gitspace branch.
                    #
                    # This guard filters out copy op sets that are
                    # *single* file copies. We're making an assumption
                    # here that multiple file copies should always
                    # trigger a branch link creation.  This assumption
                    # could be wrong, which is why we emit a warning
                    # message later on for branch links detected this
                    # way
                    #
                    # Even with this filter you'll tend to end up with lots
                    # of little merge bubbles with no commits on one side;
                    # these have to be removed by a debubbling pass later.
                    # I don't know what generates these things - cvs2svn, maybe.
                    #
                    # The second conjunct of this guard filters out the case
                    # where the user actually did do a previous Subversion file
                    # copy to start the branch, in which case we want to link
                    # through that.
                    elif len(copies) > 1 \
                             and newcommit.common not in self.directory_branchlinks:
                        self.fileop_branchlinks.add(newcommit.common)
                        if debug_enable(DEBUG_TOPOLOGY):
                            announce("r%s: making branch link %s" %
                                     (newcommit.fossil_id, newcommit.common))
                        # Use max() on the reversed iterator since max returns
                        # the first item with the max key and we want the last
                        latest = max(reversed(copies),
                                     key=lambda node: int(node.from_rev))
                    if latest is not None:
                        prev = last_relevant_commit(
                                latest.from_rev, latest.from_path,
                                operator.attrgetter("common"))
                        if prev is not None:
                            self.branchlink[newcommit.mark] = (newcommit, prev)
                            if debug_enable(DEBUG_TOPOLOGY):
                                announce("r%s: link %s (%s) back to %s (%s, %s)" % \
                                         (newcommit.fossil_id,
                                          newcommit.mark,
                                          newcommit.common,
                                          latest.from_rev,
                                          prev.mark,
                                          prev.common
                                          ))
                        else:
                            if debug_enable(DEBUG_TOPOLOGY):
                                complain("lookback for %s failed" % latest)
                            raise Fatal("couldn't find a branch root for the copy of %s at r%s." % (latest.path, latest.revision))
            # We're done, add all the new commits 
            self.repo.events += newcommits
            self.repo.declare_sequence_mutation()
            # Report progress, and give up our scheduler slot
            # so as not to eat the processor.
            baton.twirl()
            time.sleep(0)
            previous = revision
        # Filemaps are no longer needed
        del filemaps
        # Warn about dubious branch links
        self.fileop_branchlinks.discard("trunk" + os.sep)
        if self.fileop_branchlinks - self.directory_branchlinks:
            self.gripe("branch links detected by file ops only: %s" % " ".join(self.fileop_branchlinks - self.directory_branchlinks))
        self.repo.timings.append(["commits", time.time()]) 
        if debug_enable(DEBUG_EXTRACT):
            announce("at post-parsing time:")
            for commit in self.repo.commits():
                msg = commit.comment
                if msg == None:
                    msg = ""
                announce("r%-4s %4s %2d %2d '%s'" % \
                         (commit.fossil_id, commit.mark,
                          len(commit.fileops),
                          len(commit.properties),
                          msg.strip()[:20]))
        baton.twirl()
        # First, turn the root commit into a tag
        if self.repo.events and not self.repo.earliest_commit().fileops:
            try:
                initial, second = itertools.islice(self.repo.commits(), 2)
                self.repo.tagify(initial,
                                 "root",
                                 second,
                                 "[[Tag from root commit at Subversion r%s]]\n" % initial.fossil_id)
            except ValueError: # self.repo has less than two commits
                self.gripe("could not tagify root commit.")
        # Now, branch analysis.
        branchroots = []
        if not self.branches or nobranch:
            last = None
            for commit in self.repo.commits():
                commit.set_branch(os.path.join("refs", "heads", "master") + os.sep)
                if last is not None: commit.set_parents([last])
                last = commit
        else:
            # Instead, determine a branch for each commit...
            if debug_enable(DEBUG_EXTRACT):
                announce("Branches: %s" % (self.branches,))
            lastbranch = None
            for commit in self.repo.commits():
                if lastbranch is not None \
                        and commit.common.startswith(lastbranch):
                    branch = lastbranch
                else:
                    branch = next((b for b in self.branches
                                  if commit.common.startswith(b)),
                                  None)
                if branch is not None:
                    commit.set_branch(branch)
                    for fileop in commit.fileops:
                        if fileop.op in ("M", "D"):
                            fileop.path = fileop.path[len(branch):]
                        elif fileop.op in ("R", "C"):
                            fileop.source = fileop.source[len(branch):]
                            fileop.target = fileop.target[len(branch):]
                    commit._pathset = None
                else:
                    commit.set_branch("root")
                    self.branches["root"] = None
                lastbranch = branch
                baton.twirl()
            self.repo.timings.append(["branches", time.time()]) 
            baton.twirl()
            # ...then rebuild parent links so they follow the branches
            for commit in self.repo.commits():
                if self.branches[commit.branch] is None:
                    branchroots.append(commit)
                    commit.set_parents([])
                else:
                    commit.set_parents([self.branches[commit.branch]])
                self.branches[commit.branch] = commit
                # Per-commit spinner disabled because this pass is fast
                #baton.twirl()
            self.repo.timings.append(["parents", time.time()]) 
            baton.twirl()
            # The root branch is special. It wasn't made by a copy, so
            # we didn't get the information to connect it to trunk in the
            # last phase.
            try:
                commit = next(c for c in self.repo.commits()
                              if c.branch == "root")
            except StopIteration:
                pass
            else:
                earliest = self.repo.earliest_commit()
                if commit != earliest:
                    self.branchlink[commit.mark] = (commit, earliest)
            self.repo.timings.append(["root", time.time()])
            baton.twirl()
            # Add links due to Subversion copy operations
            if debug_enable(DEBUG_EXTRACT):
                announce("branch roots: [{roots}], links {{{links}}}".format(
                    roots = ", ".join(c.mark for c in branchroots),
                    links = ", ".join("{l[0].mark}: {l[1].mark}".format(l=l)
                                      for l in self.branchlink.itervalues())))
            for (child, parent) in self.branchlink.itervalues():
                if not parent.repo is self.repo:
                    # The parent has been deleted since, don't add the link;
                    # it can only happen if parent was the now tagified root.
                    continue
                if not child.has_parents() \
                        and not child.branch in self.branchcopies:
                    # The branch wasn't created by copying another branch and
                    # is instead populated by fileops. Prepend a deleteall to
                    # ensure that it starts with a clean tree instead of
                    # inheriting that of its soon to be added first parent.
                    # The deleteall is put on the first commit of the branch
                    # which has fileops or more than one child.
                    commit = child
                    while len(commit.children()) == 1 and not commit.fileops:
                        commit = commit.first_child()
                    if commit.fileops or commit.has_children():
                        fileop = FileOp()
                        fileop.construct("deleteall")
                        commit.fileops.insert(0, fileop)
                        self.generated_deletes.append(commit)
                if parent not in child.parents():
                    child.add_parent(parent)
            for root in branchroots:
                if getattr(commit.branch, "fileops", None) \
                        and root.branch != ("trunk" + os.sep):
                    self.gripe("r%s: can't connect nonempty branch %s to origin" \
                                % (root.fossil_id, root.branch))
            self.repo.timings.append(["branchlinks", time.time()]) 
            baton.twirl()
            # Add links due to svn:mergeinfo properties
            mergeinfo = PathMap()
            mergeinfos = {}
            for (revision, record) in self.revisions.iteritems():
                for node in record.nodes:
                    if node.kind != SD_DIR: continue
                    # Mutate the mergeinfo according to copies
                    if node.from_rev:
                        assert int(node.from_rev) < int(revision)
                        mergeinfo.copy_from(
                                node.path,
                                mergeinfos.get(node.from_rev) or PathMap(),
                                node.from_path)
                        if debug_enable(DEBUG_EXTRACT):
                            announce("r%s~%s mergeinfo copied to %s" \
                                % (node.from_rev, node.from_path, node.path))
                    # Mutate the filemap according to current mergeinfo.
                    # The general case is multiline: each line may describe
                    # multiple spans merging to this revision; we only consider
                    # the end revision of each span.
                    # Because svn:mergeinfo will persist like other properties,
                    # we need to compare with the already present mergeinfo and
                    # only take new entries into account when creating merge
                    # links. Also, since merging will also inherit the
                    # mergeinfo entries of the source path, we also need to
                    # gather and ignore those.
                    existing_merges = set(mergeinfo[(node.path,)] or [])
                    own_merges = set()
                    try:
                        info = node.props['svn:mergeinfo']
                    except (AttributeError, TypeError, KeyError):
                        pass
                    else:
                        for line in info.split('\n'):
                            try:
                                from_path, ranges = line.split(":", 1)
                            except ValueError:
                                continue
                            for span in ranges.split(","):
                                # Ignore single-rev fields, they are cherry-picks.
                                # TODO: maybe we should even test if min_rev
                                # corresponds to some from_rev + 1 to ensure no
                                # commit has been skipped.
                                try:
                                    min_rev, from_rev = span.split("-", 1)
                                except ValueError:
                                    min_rev = from_rev = None
                                if (not min_rev) or (not from_rev): continue
                                # Import mergeinfo from merged branches
                                try:
                                    past_merges = mergeinfos[from_rev][(from_path,)]
                                except KeyError:
                                    pass
                                else:
                                    if past_merges:
                                        existing_merges.update(past_merges)
                                # Svn doesn't fit the merge range to commits on
                                # the source branch; we need to find the latest
                                # commit between min_rev and from_rev made on
                                # that branch.
                                from_commit = last_relevant_commit(
                                                    from_rev, from_path)
                                if from_commit is not None and \
                                        int(from_commit.fossil_id.split(".",1)[0]) \
                                            >= int(min_rev):
                                    own_merges.add(from_commit.mark)
                                else:
                                    self.gripe("cannot resolve mergeinfo "
                                               "source from revision %s for "
                                               "path %s." % (from_rev,
                                                             node.path))
                    mergeinfo[(node.path,)] = own_merges
                    new_merges = own_merges - existing_merges
                    if not new_merges: continue
                    # Find the correct commit in the split case
                    commit = last_relevant_commit(revision, node.path)
                    if commit is None or \
                            not commit.fossil_id.startswith(revision):
                        # The reverse lookup went past the target revision
                        self.gripe("cannot resolve mergeinfo destination "
                                   "to revision %s for path %s."
                                   % (revision, node.path))
                        continue
                    for mark in new_merges:
                        parent = self.repo.objfind(mark)
                        if parent not in commit.parents():
                            commit.add_parent(parent)
                        if debug_enable(DEBUG_TOPOLOGY):
                            announce("processed new mergeinfo from r%s "
                                     "to r%s." % (parent.fossil_id,
                                                  commit.fossil_id))
                mergeinfos[revision] = mergeinfo.snapshot()
                baton.twirl()
            del mergeinfo, mergeinfos
            self.repo.timings.append(["mergeinfo", time.time()])
            baton.twirl()
            if debug_enable(DEBUG_EXTRACT):
                announce("after branch analysis")
                for commit in self.repo.commits():
                    try:
                        ancestor = commit.parents()[0]
                    except IndexError:
                        ancestor = '-'
                    announce("r%-4s %4s %4s %2d %2d '%s'" % \
                             (commit.fossil_id,
                              commit.mark, ancestor,
                              len(commit.fileops),
                              len(commit.properties),
                              commit.branch))
        baton.twirl()
        # Code controlled by --nobranch option ends.
        # Canonicalize all commits to ensure all ops actually do something.
        for commit in self.repo.commits():
            commit.canonicalize()
            baton.twirl()
        self.repo.timings.append(["canonicalize", time.time()])
        baton.twirl()
        if debug_enable(DEBUG_EXTRACT):
            announce("after canonicalization")
        # Now clean up junk commits generated by cvs2svn.
        # We need a list copy since commits are deleted in the loop
        for commit in list(self.repo.commits()):
            # Things that cvs2svn created as tag surrogates
            # get turned into actual tags.
            m = StreamParser.cvs2svn_tag_re.search(commit.comment)
            if m and not commit.has_children():
                fulltag = os.path.join("refs", "tags", m.group(1))
                self.repo.events.append(Reset(self.repo, ref=fulltag,
                                              target=commit.parents()[0]))
                commit.delete(["--tagback"])
            # Childless generated branch commits carry no informationn,
            # and just get removed.
            m = StreamParser.cvs2svn_branch_re.search(commit.comment)
            if m and not commit.has_children():
                commit.delete(["--tagback"])
            baton.twirl()
        self.repo.timings.append(["junk", time.time()]) 
        baton.twirl()
        if debug_enable(DEBUG_EXTRACT):
            announce("after cvs2svn artifact removal")
        # Now we need to tagify all other commits without fileops, because git
        # is going to just discard them when we build a live repo and they
        # might possibly contain interesting metadata.
        # * Commits from tag creation often have no fileops since they come
        #   from a directory copy in Subversion. The annotated tag name is the
        #   basename of the SVN tag directory.
        # * Same for branch-root commits. The tag name is the basename of the
        #   branch directory in SVN, with "-root" appended to distinguish them
        #   from SVN tags.
        # * Commits at a branch tip that consist only of deleteall are also
        #   tagified: their fileops aren't worth saving; the comment metadata
        #   just might be.
        # * All other commits without fileops get turned into an annotated tag
        #   with name "emptycommit-<revision>".
        rootmarks = {root.mark for root in branchroots} # empty if nobranch
        rootskip = {"trunk"+os.sep, "root"}
        def tagname(commit):
            # Give branch and tag roots a special name, except for "trunk" and
            # "root" which do not come from a regular branch copy.
            if commit.mark in rootmarks and commit.branch not in rootskip:
                name = os.path.basename(commit.branch[:-1])
                if commit.branch.startswith("tags"):
                    return name
                return name + "-root"
            # Fallback on standard rules.
            return None
        def taglegend(commit):
            # Tipdelete commits and branch roots don't get any legend.
            if commit.fileops or (commit.mark in rootmarks \
                    and commit.branch not in rootskip):
                return ""
            # Otherwise, generate one for inspection.
            legend = ["[[Tag from zero-fileop commit at Subversion r%s" \
                             % commit.fossil_id]
            # This guard can fail on a split commit
            if commit.fossil_id in self.revisions:
                if self.revisions[commit.fossil_id].nodes:
                    legend.append(":\n")
                    legend.extend(str(node)+"\n"
                            for node in self.revisions[commit.fossil_id].nodes)
            legend.append("]]\n")
            return "".join(legend)
        self.repo.tagify_empty(tipdeletes = True,
                               canonicalize = False,
                               name_func = tagname,
                               legend_func = taglegend,
                               gripe = self.gripe)
        self.repo.timings.append(["tagifying", time.time()])
        baton.twirl()
        if debug_enable(DEBUG_EXTRACT):
            announce("after tagification")
        # Now pretty up the branch names
        for commit in self.repo.commits():
            if commit.branch == "root":
                commit.set_branch(os.path.join("refs", "heads", "root"))
            elif commit.branch.startswith("tags" + os.sep):
                branch = commit.branch
                if branch.endswith(os.sep):
                    branch = branch[:-1]
                commit.set_branch(os.path.join("refs", "tags",
                                              os.path.basename(branch)))
            elif commit.branch == "trunk" + os.sep:
                commit.set_branch(os.path.join("refs", "heads", "master"))
            else:
                commit.set_branch(os.path.join("refs", "heads",
                                              os.path.basename(commit.branch[:-1])))
            baton.twirl()
        ##self.repo.timings.append(["polishing", time.time()]) 
        baton.twirl()
        if debug_enable(DEBUG_EXTRACT):
            announce("after branch name mapping")
        # cvs2svn likes to crap out sequences of deletes followed by
        # filecopies on the same node when it's generating tag commits.
        # These are lots of examples of this in the nut.svn test load.
        # These show up as redundant (D, M) fileop pairs.
        for commit in self.repo.commits():
            if any(fileop is None for fileop in commit.fileops):
                raise Fatal("Null fileop at r%s" % commit.fossil_id)
            for i in range(len(commit.fileops)-1):
                if commit.fileops[i].op == 'D' and commit.fileops[i+1].op == 'M':
                    if commit.fileops[i].path == commit.fileops[i+1].path:
                        commit.fileops[i].op = None
            commit.fileops = [fileop for fileop in commit.fileops if fileop.op is not None]
            baton.twirl()
        self.repo.timings.append(["canonicalizing", time.time()]) 
        baton.twirl()
        if debug_enable(DEBUG_EXTRACT):
            announce("after delete/copy canonicalization")
        # Remove spurious parent links caused by random cvs2svn file copies.
        #baton.twirl("debubbling")
        for commit in self.repo.commits():
            try:
                a, b = commit.parents()
            except ValueError:
                pass
            else:
                if a is b:
                    self.gripe("r%s: duplicate parent marks" % commit.fossil_id)
                elif a.branch == b.branch == commit.branch:
                    if b.committer.date < a.committer.date:
                        (a, b) = (b, a)
                    if b.descended_from(a):
                        commit.remove_parent(a)
            # Per-commit spinner disabled because this pass is fast
            #baton.twirl()
        self.repo.timings.append(["debubbling", time.time()]) 
        baton.twirl()
        self.repo.renumber(baton=baton)
        baton.twirl()
        self.repo.timings.append(["renumbering", time.time()]) 
        self.repo.write_fossils = True
        # Look for tag and branch merges that mean we may want to undo a
        # tag or branch creation
        ignore_deleteall = set(commit.mark
                               for commit in self.generated_deletes)
        for commit in self.repo.commits():
            if commit.fileops and commit.fileops[0].op == 'deleteall' \
                    and commit.has_children() \
                    and commit.mark not in ignore_deleteall:
                self.gripe("mid-branch deleteall on %s at <%s>." % \
                        (commit.branch, commit.fossil_id))
        self.repo.timings.append(["linting", time.time()]) 
        # Treat this in-core state is though it was read from an SVN repo 
        self.repo.vcs = next(vcstype for vcstype in vcstypes if vcstype.name == "svn")

class SubversionDumper:
    "Respository to Subversion stream dump."
    def __init__(self, repo, nobranch=False):
        self.repo = repo
        self.nobranch = nobranch
        self.pathmap = {}
        self.mark_to_revision = {}
        self.branches_created = []
        self.tag_latch = False
    class FlowState:
        def __init__(self, rev, props=None):
            self.rev = rev
            self.props = props or {}
            self.is_directory = False
            self.subfiles = 0
    @staticmethod
    def svnprops(pdict):
        return "".join("K %d\n%s\nV %d\n%s\n" % (len(key), key, len(val), val)
                        for key, val in sorted(pdict.iteritems()) if val)
    @staticmethod
    def dump_revprops(fp, revision, date, author=None, log=None, parents=None):
        "Emit a Revision-number record describing unversioned properties."
        fp.write("Revision-number: %d\n" % revision)
        parts = []
        parts.append(SubversionDumper.svnprops({"svn:log": log}))
        parts.append(SubversionDumper.svnprops({"svn:author": author}))
        # Ugh.  Subversion apparently insists on those decimal places
        parts.append(SubversionDumper.svnprops({"svn:date": date.rfc3339()[:-1]+".000000Z"}))
        # Hack merge links into mergeinfo properties.  This is a kluge
        # - the Subversion model is really like cherrypicking rather
        # than branch merging - but it's better than nothing, and
        # should at least round-trip with the logic in the Subversion
        # dump parser.
        if len(parents or []) > 1:
            parents = iter(parents)
            next(parents) # ignore main parent
            ancestral = ".".join(itertools.imap(str, sorted(parents)))
            parts.append(SubversionDumper.svnprops({"svn:mergeinfo": ancestral}))
        parts.append("PROPS-END\n")
        parts.append("\n")
        revprops = "".join(parts)
        fp.write("Prop-content-length: %d\n" % (len(revprops)-1))
        fp.write("Content-length: %d\n\n" % (len(revprops)-1))
        fp.write(revprops)
    @staticmethod
    def dump_node(fp, path, kind, action, content="",
                  from_rev=None, from_path=None,
                  props=None):
        "Emit a Node record describing versioned properties and content."
        fp.write("Node-path: %s\n" % path)
        fp.write("Node-kind: %s\n" % kind)
        fp.write("Node-action: %s\n" % action)
        if from_rev:
            fp.write("Node-copyfrom-rev: %s\n" % from_rev)
        if from_path:
            fp.write("Node-copyfrom-path: %s\n" % from_path)
        nodeprops = SubversionDumper.svnprops(props or {}) + "PROPS-END\n"
        fp.write("Prop-content-length: %d\n" % len(nodeprops))
        if content:
            fp.write("Text-content-length: %d\n" % len(content))
            # Checksum validation in svnload works if we do sha1 but
            # not if we try md5.  It's unknown why - possibly svn load
            # is simply ignoring sha1.
            #fp.write("Text-content-md5: %s\n" % hashlib.md5(content).hexdigest())
            fp.write("Text-content-sha1: %s\n" % hashlib.sha1(content).hexdigest())
        fp.write("Content-length: %d\n\n" % (len(nodeprops) + len(content)))
        fp.write(nodeprops)            
        if content:
            fp.write(content)
        fp.write("\n\n")
    @staticmethod
    def svnbranch(branch):
        "The branch directory corresponding to a specified git branch."
        segments = branch.split(os.sep)
        assert segments[0] == "refs"
        if tuple(segments) == ("refs", "heads", "master"):
            return "trunk"
        if segments[1] not in ("tags", "heads") or len(segments) != 3:
            raise Recoverable("%s can't be mapped to Subversion." % branch)
        svnbase = segments[2]
        if svnbase.endswith("trunk"):
            svnbase += "-git"
        if segments[1] == "tags":
            return os.path.join("tags", svnbase)
        else:
            return os.path.join("branches", svnbase)
    def svnize(self, branch, path=""):
        "Return SVN path corresponding to a specified gitspace branch and path."
        if self.nobranch:
            return path
        return os.path.join(SubversionDumper.svnbranch(branch), path)
    def filedelete(self, fp, branch, path):
        "Emit the dump-stream records required to delete a file."
        if debug_enable(DEBUG_SVNDUMP):
            announce("filedelete%s" % repr((branch, path)))
        svnpath = self.svnize(branch, path)
        fp.write("Node-path: %s\n" % svnpath)
        fp.write("Node-action: delete\n\n\n")
        del self.pathmap[svnpath]
        while True:
            svnpath = os.path.dirname(svnpath)
            # The second disjunct in this guard is a
            # spasmodic twitch in the direction of
            # respecting Subversion's notion of a "flow".
            # We refrain from deleting branch directories
            # so they'll have just one flow throughout the
            # life of the repository.
            if not svnpath or svnpath in self.branches_created:
                break
            self.pathmap[svnpath].subfiles -= 1
            if self.pathmap[svnpath].subfiles == 0:
                fp.write("Node-path: %s\n" % svnpath)
                fp.write("Node-action: delete\n\n\n")
                del self.pathmap[svnpath]
    def directory_create(self, fp, revision, branch, path, parents=None):
        if debug_enable(DEBUG_SVNDUMP):
            announce("directory_create%s" % repr((revision, branch, path)))
        creations = []
        # Branch creation may be required
        svnout = SubversionDumper.svnbranch(branch)
        if svnout not in self.branches_created:
            if not svnout.startswith("tags") and "branches" not in self.branches_created:
                self.branches_created.append("branches")
                creations.append(("branches", None, None))
            self.branches_created.append(svnout)
            if parents:
                from_rev = self.mark_to_revision[parents[0].mark],
                from_branch = SubversionDumper.svnbranch(parents[0].branch)
                creations.append((svnout, from_rev, from_branch))
                # Iterating through dict keys while mutating the dict
                # is not supported by Python. The following thus uses
                # keys() which returns a new list of the dict keys to
                # loop over.
                for key in self.pathmap.keys():
                    if key.startswith(from_branch + os.sep) and key != from_branch:
                        counterpart = svnout + key[len(from_branch):]
                        self.pathmap[counterpart] = SubversionDumper.FlowState(revision)
            else:
                creations.append((svnout, None, None))
        # Create all directory segments required
        # to get down to the level where we can
        # create the file.
        parts = os.path.dirname(path).split(os.sep)
        if parts[0]:
            parents = [os.sep.join(parts[:i+1])
                                   for i in range(len(parts))]
            for parentdir in parents:
                if parentdir not in self.pathmap:
                    fullpath = os.path.join(svnout, parentdir)
                    creations.append((fullpath, None, None))
        for (path, from_rev, from_path) in creations:
            SubversionDumper.dump_node(fp,
                                       path=path,
                                       kind="dir",
                                       action="add",
                                       from_rev=from_rev,
                                       from_path=from_path)
            self.pathmap[path] = SubversionDumper.FlowState(revision)
            self.pathmap[path].is_directory = True
            self.pathmap[path].subfiles += 1
    def filemodify(self, fp, revision, branch, mode, ref, path, parents):
        "Emit the dump-stream records required to add or modify a file."
        if debug_enable(DEBUG_SVNDUMP):
            announce("filemodify%s" % repr((revision, branch, mode, ref, path,
                                            [event.mark for event in parents])))
        # Branch and directory creation may be required.
        # This has to be called early so copy can update the filemap.
        self.directory_create(fp, revision, branch, path, parents)
        svnpath = self.svnize(branch, path)
        if svnpath in self.pathmap:
            svnop = "change"
            self.pathmap[svnpath].rev = revision
        else:
            svnop = "add"
            self.pathmap[svnpath] = SubversionDumper.FlowState(revision)
        if debug_enable(DEBUG_SVNDUMP):
            announce("Generating %s %s" % (svnpath, svnop))
        content = self.repo.objfind(ref).get_content()
        changeprops = None
        if svnpath in self.pathmap:
            if mode == '100755':
                if "svn:executable" not in self.pathmap[svnpath].props:
                    self.pathmap[svnpath].props["svn:executable"] = "true"
                    changeprops = self.pathmap[svnpath].props
            elif mode == '100644':
                if "svn:executable" in self.pathmap[svnpath].props:
                    self.pathmap[svnpath].props["svn:executable"] = "false"
                    changeprops = self.pathmap[svnpath].props
        #if mode == "120000":
        #    changeprops = {"svn:special":"*"}
        #    content = "link " + content
        # The actual content
        SubversionDumper.dump_node(fp,
                  path=svnpath,
                  kind="file",
                  action=svnop,
                  props=changeprops,
                  content=content)
    def filecopy(self, fp, revision, branch, source, target):
        if debug_enable(DEBUG_SVNDUMP):
            announce("filecopy%s" % repr((revision, branch, source, target)))
        svnsource = self.svnize(branch, source)
        try:
            flow = self.pathmap[svnsource]
        except:
            raise Fatal("couldn't retrieve flow information for %s" % source)
        self.directory_create(fp, revision, branch, target)
        svntarget = self.svnize(branch, target)
        self.pathmap[svntarget] = self.pathmap[svnsource]
        SubversionDumper.dump_node(fp,
                                   path=svntarget,
                                   kind="file",
                                   action="add",
                                   from_path=svnsource,
                                   from_rev=flow.rev)
    def make_tag(self, fp, revision, branch, name, log, author):
        if debug_enable(DEBUG_SVNDUMP):
            announce("make_tag%s" % repr((revision, branch, name, log, str(author))))
        svnsource = self.svnize(branch)
        svntarget = os.path.join("tags", name)
        SubversionDumper.dump_revprops(fp, revision,
                                       log=log,
                                       author=author.email.split("@")[0],
                                       date=author.date)
        if not self.tag_latch:
            self.tag_latch = True
            SubversionDumper.dump_node(fp,
                                   path="tags",
                                   kind="dir",
                                   action="add")
        SubversionDumper.dump_node(fp,
                                   path=svntarget,
                                   kind="dir",
                                   action="add",
                                   from_path=svnsource,
                                   from_rev=revision-1)
    def dump(self, selection, fp, progress=False):
        "Export the repository as a Subversion dumpfile."
        self.tag_latch = False
        tags = [event for event in self.repo.events if isinstance(event, Tag)]
        with Baton("reposurgeon: dumping", enable=progress) as baton:
            try:
                fp.write("SVN-fs-dump-format-version: 2\n\n")
                fp.write("UUID: %s\n\n" % (self.repo.uuid or uuid.uuid4()))
                SubversionDumper.dump_revprops(fp,
                                               revision=0,
                                               date=Date(rfc3339(time.time()))) 
                baton.twirl()
                revision = 0
                for i in selection:
                    event = self.repo.events[i]
                    # Passthroughs are lost; there are no equivalents
                    # in Subversion's ontology.
                    if not isinstance(event, Commit):
                        continue
                    revision += 1
                    self.mark_to_revision[event.mark] = revision
                    # We must treat the gitspace committer attribute
                    # as the author: gitspace author information is
                    # lost.  So is everything but the local part of
                    # the committer name.
                    backlinks = [self.mark_to_revision[mark]
                                 for mark in event.parent_marks()]
                    SubversionDumper.dump_revprops(fp, revision,
                                                   log=event.comment,
                                                   author=event.committer.email.split("@")[0],
                                                   date=event.committer.date,
                                                   parents=backlinks)
                    for fileop in event.fileops:
                        if fileop.op == "D":
                            if fileop.path.endswith(".gitignore"):
                                svnpath = self.svnize(event.head(), fileop.path)
                                self.pathmap[svnpath].props["svn:ignore"] = ""
                                SubversionDumper.dump_node(fp,
                                          path=os.path.dirname(svnpath),
                                          kind="dir",
                                          action="change",
                                          props = self.pathmap[svnpath].props)
                            else:
                                self.filedelete(fp, event.head(), fileop.path)
                        elif fileop.op == "M":
                            if fileop.path.endswith(".gitignore"):
                                svnpath = self.svnize(event.head(),
                                                      os.path.dirname(fileop.path))
                                blob = self.repo.objfind(fileop.ref)
                                if svnpath not in self.pathmap:
                                    self.pathmap[svnpath] = SubversionDumper.FlowState(revision)
                                self.pathmap[svnpath].props["svn:ignore"] = blob.get_content()
                                SubversionDumper.dump_node(fp,
                                          path=os.path.dirname(svnpath),
                                          kind="dir",
                                          action="change",
                                          props = self.pathmap[svnpath].props)
                            else:
                                self.filemodify(fp,
                                                revision,
                                                event.head(),
                                                fileop.mode,
                                                fileop.ref,
                                                fileop.path,
                                                event.parents())
                        elif fileop.op == "R":
                            self.filecopy(fp,
                                          revision,
                                          event.head(),
                                          fileop.source,
                                          fileop.target)
                            self.filedelete(fp, event.branch, fileop.source)
                        elif fileop.op == "C":
                            self.filecopy(fp,
                                          revision,
                                          event.head(),
                                          fileop.source,
                                          fileop.target)
                        elif fileop.op == "deleteall":
                            branchdir = self.svnbranch(event.head())
                            # Here again the object is mutated, so a copy list must be used.
                            for path in self.pathmap.keys():
                                if path.startswith(branchdir + os.sep):
                                    del self.pathmap[path]
                            fp.write("Node-path: %s\n" % branchdir)
                            fp.write("Node-action: delete\n\n\n")
                        else:
                            raise Fatal("unsupported fileop type %s." \
                                        % fileop.op)
                    # Turn any annotated tag pointing at this commit into
                    # a directory copy.
                    for tag in tags:
                        if tag.target is event:
                            revision += 1
                            self.make_tag(fp,
                                          revision,
                                          event.head(),
                                          name=tag.name,
                                          log=tag.comment,
                                          author=tag.tagger)
                            break
                    else:
                        # Preserve lightweight tags, too.  Ugh, O(n**2).
                        if event.has_children():
                            for child in event.children():
                                if child.branch == event.branch: 
                                    break
                            else:
                                revision += 1
                                self.make_tag(fp,
                                              revision,
                                              event.head(),
                                              name=os.path.basename(event.branch),
                                              log="",
                                              author=event.committer)
                    fp.flush()
            except IOError as e:
                raise Fatal("export error: %s" % e)

# Generic repository-manipulation code begins here

class Repository:
    "Generic repository object."
    def __init__(self, name=None):
        self.name = name
        self.readtime = time.time()
        self.vcs = None
        self.sourcedir = None
        self.seekstream = None
        self.events = []    # A list of the events encountered, in order
        self._commits = None
        self._mark_to_index = {}
        self._mark_to_object = {}
        self.preserve_set = set()
        self.case_coverage = set()
        self.basedir = os.getcwd()
        self.uuid = None
        self.write_fossils = False
        self.dollar_map = {}        # From dollar cookies in files
        self.fossil_map = {}    # From anything that doesn't survive rebuild
        self.fossil_count = None
        self.timings = []
        self._has_manifests = False
    def cleanup(self):
        "Release blob files associated with this repo."
        nuke(self.subdir(), "reposurgeon: cleaning up %s" % self.subdir())
    def subdir(self, name=None):
        if name is None:
            name = self.name
        if not name:
            return os.path.join(self.basedir, ".rs" + repr(os.getpid()))
        else:
            return os.path.join(self.basedir, ".rs" + repr(os.getpid())+ "-" + name) 
    def makedir(self):
        try:
            if debug_enable(DEBUG_SHUFFLE):
                announce("repository fast import creates " + self.subdir())
            target = self.subdir()
            if not os.path.exists(target):
                os.mkdir(target)
        except OSError:
            raise Fatal("can't create operating directory")
    def size(self):
        "Return the size of this import stream, for statistics display."
        return sum(len(str(e)) for e in self.events)
    def branchset(self):
        "Return a set of all branchnames appearing in this repo."
        branches = set()
        for e in self.events:
            if isinstance(e, Reset) and e.committish is not None:
                branches.add(e.ref)
            elif isinstance(e, Commit):
                branches.add(e.branch)
        return branches
    def branchmap(self):
        "Return a map of branchnames to terminal marks in this repo."
        brmap = {}
        for e in self.events:
            if isinstance(e, Reset):
                if e.committish is None:
                    brmap.pop(e.ref, None)
                else:
                    brmap[e.ref] = e.committish
            elif isinstance(e, Commit):
                brmap[e.branch] = e.mark
        return brmap
    def index(self, obj):
        "Index of the specified object."
        try:
            mark = obj.mark
        except AttributeError:
            for (ind, event) in enumerate(self.events):
                if event == obj: return ind
        else:
            ind = self.find(mark)
            if ind is not None: return ind
        raise Fatal("internal error: <%s> not matched "
                    "in repository %s" % (obj.fossil_id, self.name))
    def find(self, mark):
        "Find an object index by mark"
        if not self._mark_to_index:
            for (ind, event) in enumerate(self.events):
                if hasattr(event, "mark"):
                    self._mark_to_index[event.mark] = ind
        return self._mark_to_index.get(mark)
    def objfind(self, mark):
        "Find an object by mark"
        if not self._mark_to_object:
            for event in self.events:
                if hasattr(event, "mark"):
                    self._mark_to_object[event.mark] = event
        return self._mark_to_object.get(mark)
    def all(self):
        "Return a set that selects the entire repository."
        return range(len(self.events))
    def invalidate_object_map(self):
        "Force an object-map rebuild on the next lookup."
        self._mark_to_object = {}
    def invalidate_manifests(self):
        if self._has_manifests:
            for c in self.commits():
                c.filemap = None
            self._has_manifests = False
    def read_authormap(self, selection, fp):
        "Read an author-mapping file and apply it to the repo."
        authormap = {}
        try:
            for line in fp:
                line = line.strip()
                if not line:
                    continue
                if line.startswith('#'):
                    continue
                (local, netwide) = line.strip().split('=')
                (address, timezone) =  netwide.split(">")
                address += ">"
                timezone = timezone.strip()
                if timezone:
                    timezone = Date.tzresolve(timezone)
                (name, mail) = email.utils.parseaddr(address.strip())
                if not mail:
                    raise Fatal("can't recognize address in '%s'" % netwide)
                authormap[local.strip().lower()] = (name, mail, timezone)
        except IOError:
            raise Recoverable("couldn't open author-map file")
        except ValueError:
            raise Recoverable("bad author map syntax: %s" % repr(line))
        for ei in selection:
            event = self.events[ei]
            if isinstance(event, Commit):
                event.committer.remap(authormap)
                for author in event.authors:
                    author.remap(authormap)
            elif isinstance(event, Tag):
                event.tagger.remap(authormap)
    def write_authormap(self, selection, fp):
        "List the identifiers we need."
        contributors = {}
        for ei in selection:
            event = self.events[ei]
            if isinstance(event, Commit):
                contributors[event.committer.name] = event.committer.who()
                for author in event.authors:
                    contributors[author.name] = author.who()
            elif isinstance(event, Tag):
                contributors[event.tagger.name] = event.tagger.who()
        for (name, cid) in contributors.iteritems():
            fp.write("%s = %s\n" % (name, cid))
    def read_fossilmap(self, fp):
        "Read a fossil-references dump and initialize the repo's fossil map."
        commit_map = {}
        for event in self.commits():
            key = (event.committer.date.timestamp, event.committer.email)
            if key not in commit_map:
                commit_map[key] = []
            commit_map[key].append(event)
        try:
            matched = unmatched = 0
            for line in fp:
                (fossil, stamp) = line.split()
                (timefield, person) = stamp.split('!')
                if ':' in person:
                    (person, seq) = person.split(':')
                    seq = int(seq) - 1
                else:
                    seq = 0
                assert fossil and timefield and person
                when_who = (Date(timefield).timestamp, person)
                if when_who in commit_map:
                    self.fossil_map[fossil] = commit_map[when_who][seq]
                    if fossil.startswith("SVN:"):
                        commit_map[when_who][seq].fossil_id = fossil[4:]
                    matched += 1
                else:
                    unmatched += 1
            if verbose >= 1:
                announce("%d matched, %d unmatched, %d total"\
                         % (matched, unmatched, matched+unmatched))
            del commit_map
        except ValueError:
            raise Recoverable("bad syntax in fossils file.")
    def write_fossilmap(self, fp):
        "Dump fossil references."
        for cookie, commit in sorted(
                self.fossil_map.iteritems(),
                key=lambda f: (f[1].committer.date.timestamp, f[0])):
            if "SVN" in cookie and StreamParser.SplitSep in cookie:
                serial = ':' + cookie.split(StreamParser.SplitSep)[1]
            else:
                serial = ''
            # The objfind test is needed in case this repo is an expunge
            # fragment with a copied fossil map.  It's a simple substitute
            # for partitioning the map at expunge time.
            if self.objfind(commit.mark) and commit.fossil_id:
                fp.write("%s\t%s!%s%s\n" % (cookie,
                                           commit.committer.date.rfc3339(),
                                           commit.committer.email,
                                           serial))
    def tagify(self, commit, name, target, legend="", delete=True):
        "Turn a commit into a tag."
        if debug_enable(DEBUG_EXTRACT):
            commit_id = commit.mark
            if commit.fossil_id:
                commit_id += " <%s>" % commit.fossil_id
            announce("tagifying: %s -> %s" % (commit_id, name))
        if commit.fileops:
            raise Fatal("Attempting to tagify a commit with fileops.")
        if not commit.comment:
            pref = ""
        else:
            pref = commit.comment + "\n"
        self.addEvent(Tag(commit.repo,
                          name=name,
                          target=target,
                          tagger=commit.committer,
                          comment=pref + legend))
        if delete: commit.delete(["--tagback"])
    def tagify_empty(self, commits = None,
                           tipdeletes = False,
                           tagify_merges = False,
                           canonicalize = True,
                           name_func = lambda _: None,
                           legend_func = lambda _: "",
                           gripe = complain
                          ):
        """Turn into tags commits without (meaningful) fileops.
            Arguments: * commits:       None, or an iterable of event indices
                                        tagify_empty() ignores non-commits
                       * tipdeletes:    whether tipdeletes should be tagified
                       * canonicalize:  whether to canonicalize fileops first
                       * name_func:     custom function for choosing the tag
                                        name; if it returns a False value like
                                        None, a default scheme is used
                       * legend_func:   custom function for choosing the legend
                                        of a tag; no fallback is provided. By
                                        default it always returns ""."""
        # Default scheme for tag names
        def default_name(commit):
            if commit.fileops:
                branch = commit.branch
                if branch[-1] == os.sep: branch = branch[:-1]
                return "tipdelete-" + os.path.basename(branch)
            if commit.fossil_id:
                return "emptycommit-" + commit.fossil_id
            elif commit.mark:
                return "emptycommit-mark" + commit.mark[1:]
            else:
                return "emptycommit-index" + commit.index()
        # Use a separate loop because delete() invalidates manifests.
        if canonicalize:
            for _, commit in self.iterevents(commits, types=Commit):
                commit.canonicalize()
        # Tagify commits without fileops
        usednames = {e.name for e in self.events if isinstance(e, Tag)}
        if tipdeletes:
            is_tipdelete = lambda c: c.alldeletes(killset={"deleteall"}) \
                                     and not c.has_children()
        else:
            is_tipdelete = lambda _: False
        deletia = []
        for index, commit in self.iterevents(commits, types=Commit):
            if (not commit.fileops) or is_tipdelete(commit):
                if commit.has_parents():
                    if len(commit.parents()) > 1 and not tagify_merges:
                        continue
                    name = name_func(commit) or default_name(commit)
                    for i in itertools.count():
                        suffix = ".{}".format(i) if i else ""
                        if name + suffix not in usednames: break
                    usednames.add(name + suffix)
                    legend = legend_func(commit)
                    if commit.fileops: commit.fileops = []
                    self.tagify(commit,
                                name + suffix,
                                commit.parents()[0],
                                legend,
                                delete = False)
                    deletia.append(index)
                else:
                    msg = []
                    if commit.fossil_id:
                        msg.append("r%s:" % commit.fossil_id)
                    elif commit.mark:
                        msg.append("'%s':" % commit.mark)
                    msg.append("deleting parentless")
                    if commit.fileops:
                        msg.append("tip delete of %s." % commit.branch)
                    else:
                        msg.append("zero-op commit on %s." % commit.branch)
                    gripe(" ".join(msg))
                    deletia.append(index)
        self.delete(deletia, ["--tagback"])
    def fast_import(self, fp, options, progress=False):
        "Read a stream file and use it to populate the repo."
        StreamParser(self).fast_import(fp, options, progress)
        self.readtime = time.time()
    def parse_dollar_cookies(self):
        "Extract info about fossil references from CVS/SVN header cookies."
        if self.dollar_map:
            return
        # The goal here is to throw away CVS and Subversion header
        # information still fossilized into $Id$ and $Subversion$
        # headers after conversion to a later version. For each
        # cookie, all but the earliest blob containing it has it
        # as a fossil which should be removed.  Then, the earliest
        # commit referencing that blob gets a fossil property set;
        # later references will be branching artifacts.
        seen = set()
        for event in self.events:
            if isinstance(event, Blob) and event.cookie:
                if event.cookie in seen:
                    continue
                else:
                    # The first commit immediately after this blob
                    for ei in range(self.find(event.mark), len(self.events)):
                        if isinstance(self.events[ei], Commit):
                            commit = self.events[ei]
                            break
                    seen.add(event.cookie)
                    if "fossil" in commit.properties:
                        complain("fossil property of %s overwritten" \
                                 % commit.mark)
                    if isinstance(event.cookie, str):
                        svnkey = "SVN:" + event.cookie
                        self.dollar_map[svnkey] = commit
                    else:
                        (basename, cvsref) = event.cookie
                        for fileop in commit.fileops:
                            if fileop.op == 'M' and fileop.ref == event.mark:
                                if not os.path.basename(fileop.path).endswith(basename):
                                    # Usually the harmless result of a
                                    # file move or copy that cvs2svn or
                                    # git-svn didn't pick up on.
                                    complain("mismatched CVS header path '%s' in %s vs '%s' in %s"
                                             % (fileop.path, commit.mark, basename, event.mark))
                                cvskey = "CVS:%s:%s" % (fileop.path, cvsref)
                                self.dollar_map[cvskey] = commit
    def export_style(self):
        "How should we tune the export dump format?"
        if self.vcs:
            return self.vcs.styleflags
        else:
            # Default to git style
            return ("nl-after-commit",)
    def fast_export(self, selection, fp, options, target=None, progress=False):
        "Dump the repo object in Subversion dump or fast-export format."
        if target and target.name == "svn":
            SubversionDumper(self).dump(selection, fp, progress)
            return
        with Baton("reposurgeon: exporting", enable=progress) as baton:
            try:
                fossil_latch = False
                realized = {}
                for ei in selection:
                    baton.twirl()
                    event = self.events[ei]
                    #fossil_latch = fossil_latch or hasattr(event, "fossil_id")
                    if debug_enable(DEBUG_UNITE):
                        if hasattr(event, "mark"):
                            announce("writing %d %s %s" % (ei, event.mark, event.__class__.__name__))
                    fp.write(event.dump(target, options=options, realized=realized))
                if fossil_latch:
                    fp.write("reset fossil_id\n")
                    endcommit = Commit(self)
                    endcommit.set_branch("refs/heads/master")
                    endcommit.comment = "Fossil-ID notes\n"
                    endcommit.committer = Attribution("Nowhere Man <nowhere@nobody.net> " + rfc3339(time.time()))
                    for ei in selection:
                        event = self.events[ei]
                        if hasattr(event, "fossil_id"):
                            fileop = FileOp()
                            fileop.inline = "Fossil-ID: %s" % event.fossil_id
                            fileop.construct('N', 'inline', event.mark)
                            endcommit.fileops.append(fileop)
                    fp.write(str(endcommit))
            except IOError as e:
                raise Fatal("export error: %s" % e)
    def preserve(self, filename):
        "Add a path to the preserve set, to be copied back on rebuild."
        if os.path.exists(filename):
            self.preserve_set.add(filename)
        else:
            raise Recoverable("%s doesn't exist" % filename)
    def unpreserve(self, filename):
        "Remove a path from the preserve set."
        if filename in self.preserve_set:
            self.preserve_set.remove(filename)
        else:
            raise Recoverable("%s doesn't exist" % filename)
    def preservable(self):
        "Return the repo's preserve set."
        return self.preserve_set
    def rename(self, newname):
        "Rename the repo."
        try:
            # Can fail if the target directory exists.
            if debug_enable(DEBUG_SHUFFLE):
                announce("repository rename %s->%s calls os.rename(%s, %s)" % (self.name, newname, repr(self.subdir()), repr(self.subdir(newname))))
            os.rename(self.subdir(), self.subdir(newname))
            self.name = newname
        except OSError as e:
            raise Fatal("repo rename %s -> %s failed: %s"
                                       % (self.subdir(), self.subdir(newname), e))
    def addEvent(self, event):
        self.events.append(event)
        self.declare_sequence_mutation()
    @memoized_iterator("_commits")
    def commits(self):
        "Iterate through the repository commit objects."
        return (e for e in self.events if isinstance(e, Commit))
    def declare_sequence_mutation(self):
        "Mark the repo event sequence sequence modified."
        self._commits = None
        self._mark_to_index = {}
    def earliest_commit(self):
        "Return the earliest commit."
        return next(self.commits())
    def earliest(self):
        "Return the date of earliest commit."
        return next(self.commits()).committer.date
    def ancestors(self, ei):
        "Return ancestors of an event, in reverse order."
        trail = []
        while True:
            if not self.events[ei].has_parents():
                break
            else:
                efrom = self.find(self.events[ei].parent_marks()[0])
                trail.append(efrom)
                ei = efrom
        return trail
    #
    # Delete machinery begins here
    #
    def ancestor_count(self, event, path):
        "Count modifications of a path in this commit and its ancestors."
        count = 0
        while True:
            for fileop in event.fileops:
                if fileop and fileop.op == "M" and fileop.path == path:
                    count += 1
                    break
            # 0, 1, and >1 are the interesting cases
            if count > 1:
                return count
            try:
                event = event.parents()[0]
            except IndexError:
                break
        return count
    def __compose(self, event, left, right):
        "Compose two relevant fileops."
        # Here's what the fields in the return value mean:
        # 0: Was this a modification
        # 1: Op to replace the first with (None means delete)
        # 2: Op to replace the second with (None means delete)
        # 3: If not None, a warning to emit
        # 4: Case number, for coverage analysis
        pair = (left.op, right.op)
        #
        # First op M
        #
        if pair == ("M", "M"):
            # Leave these in place, they get handled later.
            return (False, left, right, None, 0)
        # M a + D a -> D a
        # Or, could reduce to nothing if M a was the only modify..
        elif left.op == "M" and right.op in "D":
            if self.ancestor_count(event, left.path) == 1:
                return (True, None, None, None, 1)
            else:
                return (True, right, None, None, 2)
        elif left.op == "M" and right.op == "R":
            # M a + R a b -> R a b M b, so R falls towards start of list
            if left.path == right.source:
                if self.ancestor_count(event, left.path) == 1:
                    # M a has no ancestors, preceding R can be dropped
                    left.path = right.target
                    return (True, left, None, None, 3)
                else:
                    # M a has ancestors, R is still needed
                    left.path = right.target
                    return (True, right, left, None, 4)
            # M b + R a b can't happen.  If you try to generate this with
            # git mv it throws an error.  An ordinary mv results in D b M a.
            elif left.path == right.target:
                return(True, right, None, "M followed by R to the M operand?", -1)
        # Correct reduction for this would be M a + C a b -> C a b + M a + M b,
        # that is we'd have to duplicate the modify. We'll leave it in place
        # for now.
        elif left.op == "M" and right.op == "C":
            return (False, left, right, None, 5)
        #
        # First op D or deleteall
        #
        # Delete followed by modify undoes delete, since M carries whole files. 
        elif pair == ("D", "M"):
            return (True, None, right, None, 6)
        # But we have to leave deletealls in place, since they affect right ops
        elif pair == ("deleteall", "M"):
            return (False, left, right, None, 7)
        # These cases should be impossible.  But cvs2svn actually generates
        # adjacent deletes into Subversion dumpfiles which turn into (D, D).
        elif left.op == "deleteall" and right.op != "M":
            return (False, left, right,
                    "Non-M operation after deleteall?", -1)
        elif left.op == "D" and right.op == "D":
            return (True, left, None, None, -2)
        elif left.op == "D" and right.op in ("R", "C"):
            if left.path == right.source:
                return (False, left, right,
                        "R or C of %s after deletion?" % left.path, -3)
            else:
                return (False, left, right, None, 8)
        #
        # First op R
        #
        elif pair == ("R", "D"):
            if left.target == right.path:
                # Rename followed by delete of target composes to source delete
                right.path = left.source
                return (True, None, right, None, 9)
            else:
                # On rename followed by delete of source discard the delete
                # but user should be warned. 
                return (False, left, None,
                        "delete of %s after renaming to %s?" % (right.path, left.source), -4)
        # Rename followed by deleteall shouldn't be possible
        elif pair == ("R", "deleteall") and left.target == right.path:
            return (False, None, right,
                    "rename before deleteall not removed?", -5)
        # Leave rename or copy followed by modify alone
        elif pair == ("R", "M") or pair == ("C", "M"):
            return (False, left, right, None, 10)
        # Compose renames where possible
        elif left.op == "R" and right.op == "R":
            if left.target == right.source:
                left.target = right.target
                return (True, left, None, None, 11)
            else:
                return (False, left, right,
                        "R %s %s is inconsistent with following operation" \
                        % (left.source, left.target), -6)
        # We could do R a b + C b c -> C a c + R a b, but why?
        if left.op == "R" and right.op == "C":
            return (False, left, right, None, 12)
        #
        # First op C
        #
        elif pair == ("C", "D"):
            if left.source == right.path:
                # Copy followed by delete of the source is a rename.
                left.setOp("R")
                return (True, left, None, None, 13)
            elif left.target == right.path:
                # This delete undoes the copy
                return (True, None, None, None, 14)
        elif pair == ("C", "R"):
            if left.source == right.source:
                # No reduction
                return (False, left, right, None, 15)
            else:
                # Copy followed by a rename of the target reduces to single copy
                if left.target == right.source:
                    left.target = right.target
                    return (True, left, None, None, 16)
        elif pair == ("C", "C"):
            # No reduction
            return (False, left, right, None, 17)
        #
        # Case not covered
        #
        raise Fatal("can't compose op '%s' and '%s'" % (left, right))
    def canonicalize(self, commit):
        "Canonicalize the list of file operations in this commit."
        coverage = set()
        # Handling deleteall operations is simple
        lastdeleteall = None
        for (i, a) in enumerate(commit.fileops):
            if a.op == "deleteall":
                lastdeleteall = i
        if lastdeleteall is not None:
            if debug_enable(DEBUG_DELETE):
                announce("removing all before rightmost deleteall")
            commit.fileops = commit.fileops[lastdeleteall:]
            commit._pathset = None
        # Composition in the general case is trickier.
        while True:
            # Keep making passes until nothing mutates
            mutated = False
            for i in range(len(commit.fileops)):
                for j in range(i+1, len(commit.fileops)):
                    a = commit.fileops[i]
                    b = commit.fileops[j]
                    if a is not None and b is not None and a.relevant(b):
                        (modified, newa, newb, warn, case) = self.__compose(commit, a, b)
                        if debug_enable(DEBUG_DELETE):
                            announce("Reduction case %d fired on %s" % (case, (i,j)))
                        if modified:
                            mutated = True
                            commit.fileops[i] = newa
                            commit.fileops[j] = newb
                            if debug_enable(DEBUG_DELETE):
                                announce("During canonicalization:")
                                commit.fileop_dump()
                            if warn:
                                complain(warn)
                            coverage.add(case)
            if not mutated:
                break
            commit.fileops = [x for x in commit.fileops if x is not None]
            commit._pathset = None
        return coverage
    def squash(self, selected, policy):
        "Delete a set of events, or rearrange it forward or backwards."
        if debug_enable(DEBUG_DELETE):
            announce("Deletion list is %s" % [x+1 for x in selected])
        for qualifier in policy:
            if qualifier not in ["--complain",
                                 "--coalesce",
                                 "--delete",
                                 "--pushback",
                                 "--pushforward",
                                 "--tagback",
                                 "--tagforward",
                                 "--quiet"]:
                raise Recoverable("no such deletion modifier as " + qualifier)
        # Make sure we do deletions from greatest commit number to least
        selected = sorted(selected, reverse=True)
        dquiet = "--quiet" in policy
        delete = "--delete" in policy
        tagback = "--tagback" in policy
        tagforward = "--tagforward" in policy or (not delete and not tagback)
        pushback = "--pushback" in policy
        pushforward = "--pushforward" in policy or (not delete and not pushback)
        # Sanity checks
        if not dquiet:
            for ei in selected:
                event = self.events[ei]
                if  isinstance(event, Commit):
                    if delete:
                        speak = "warning: commit %s to be deleted has " % event.mark 
                        if '/' in event.branch and not '/heads/' in event.branch:
                            complain(speak + "non-head branch attribute %s" % event.branch)
                        if not event.alldeletes():
                            announce(speak + "non-delete fileops.")
                    if not delete:
                        if pushback and not event.has_parents():
                            complain("warning: "
                                     "pushback of parentless commit %s" \
                                     % event.mark)
                        if pushforward and not event.has_children():
                            complain("warning: "
                                     "pushforward of childless commit %s" \
                                     % event.mark)
        altered = []
        # Here are the deletions
        for e in self.events:
            e.deletehook = False
        for ei in selected:
            event = self.events[ei]
            if isinstance(event, Blob):
                # Never delete a blob except as a side effect of
                # deleting a commit.
                event.deletehook = False
            elif isinstance(event, (Tag, Reset, Passthrough)):
                event.deletehook = ("--delete" in policy)
            elif isinstance(event, Commit):
                event.deletehook = True
                # Decide the new target for tags
                filter_only = True
                if tagforward and event.has_children():
                    filter_only = False
                    new_target = event.first_child()
                elif tagback and event.parents():
                    filter_only = False
                    new_target = event.parents()[0]
                # Reparent each child
                for child in list(event.children()):
                    # Insert event's parents in place of event in child's
                    # parent list. We keep existing duplicates in case they
                    # are wanted, but ensure we don't introduce new ones.
                    old_parents = list(child.parents())
                    event_pos = old_parents.index(event)
                    # Start with existing parents before us,
                    # including existing duplicates
                    new_parents = old_parents[:event_pos]
                    # Add our parents, with possible duplicates, but not if
                    # already present before.
                    to_add = [p for p in event.parents() if p not in new_parents]
                    new_parents.extend(to_add)
                    # Avoid duplicates due to event.parents() insertion.
                    new_parents.extend(
                            p
                            for p in itertools.islice(old_parents,
                                                      event_pos+1, None)
                            if p not in to_add)
                    # Prepend a copy of this event's file ops to
                    # all children with the event as their first
                    # parent, and mark each such child as needing
                    # resolution.
                    if pushforward and child.parents()[0] == event:
                        child.fileops = copy.copy(event.fileops) + child.fileops
                        child._pathset = None
                        altered.append(child)
                    # Really set the parents to the newly constructed list
                    child.set_parents(new_parents)
                    # If event was the first parent of child yet has no parents
                    # of its own, then child's first parent has changed.
                    # Prepend a deleteall to child's fileops to ensure it
                    # starts with an empty tree (as event does) instead of
                    # inheriting that of its new first parent.
                    if event_pos == 0 and not event.parents():
                        fileop = FileOp()
                        fileop.construct("deleteall")
                        child.fileops.insert(0, fileop)
                        child._pathset = None
                        altered.append(child)
                # We might be trying to hand the event's fileops to its
                # primary parent.
                if pushback and event.has_parents():
                    # Append a copy of this event's file ops to its primary
                    # parent fileop list and mark the parent as needing
                    # resolution.
                    parent = event.parents()[0]
                    parent.fileops += copy.copy(event.fileops)
                    parent._pathset = None
                    altered.append(parent)
                    # We need to ensure all fileop blobs are defined before the
                    # corresponding fileop, in other words ensure that the blobs
                    # appear before the primary parent in the stream.
                    earliest = parent.index()
                    swap_indices = set()
                    for fileop in event.fileops:
                        if fileop.op == 'M':
                            blob_index = self.find(fileop.ref)
                            if blob_index > earliest: swap_indices.add(blob_index)
                    if swap_indices:
                        last = max(swap_indices)
                        neworder = itertools.chain(
                                swap_indices, # first take the blobs
                                # then all others
                                itertools.ifilterfalse(swap_indices.__contains__,
                                         range(earliest, last+1)) )
                        self.events[earliest:last+1] = map(
                                self.events.__getitem__, neworder)
                        self.declare_sequence_mutation()
                # Move tags and attachments
                if filter_only:
                    for e in event.attachments:
                        e.deletehook = True
                else:
                    if event.branch and "/tags/" in event.branch \
                            and new_target.branch != event.branch:
                        # By deleting the commit, we would loose the fact that
                        # it moves its branch (to create a lightweight tag for
                        # instance): replace it by a Reset which will save this
                        # very information. The following loop will take care
                        # of moving the attachment to the new target.
                        reset = Reset(self, ref = event.branch,
                                            target = event)
                        self.events[ei] = reset
                    # use a copy of attachments since it will be mutated
                    for t in list(event.attachments):
                        t.forget()
                        t.remember(self, target=new_target)
                # And forget the deleted event
                event.forget()
        # Do the actual deletions
        self.events = [e for e in self.events if not e.deletehook]
        self.declare_sequence_mutation()
        # Canonicalize all the commits that got ops pushed to them
        if not delete:
            for event in altered:
                if event.deletehook: continue
                if debug_enable(DEBUG_DELETE):
                    announce("Before canonicalization:")
                    event.fileop_dump()
                self.case_coverage |= self.canonicalize(event)
                if debug_enable(DEBUG_DELETE):
                    announce("After canonicalization:")
                    event.fileop_dump()
                # Now apply policy in the mutiple-M case
                cliques = event.cliques()
                if ("--coalesce" not in policy and not delete) \
                        or debug_enable(DEBUG_DELETE):
                    for (path, oplist) in cliques.iteritems():
                        if len(oplist) > 1:
                            complain("commit %s has multiple Ms for %s"
                                    % (event.mark, path))
                if "--coalesce" in policy:
                    # Only keep last M of each clique, leaving other ops alone
                    event.fileops = \
                           [op for (i, op) in enumerate(event.fileops)
                            if (op.op != "M") or (i == cliques[op.path][-1])]
                    event._pathset = None
                if debug_enable(DEBUG_DELETE):
                    announce("Commit %d, after applying policy:" % (ei + 1,))
                    event.fileop_dump()
        # Cleanup
        for e in self.events:
            del e.deletehook
        if '--delete' in policy:
            self.gc_blobs()
    def delete(self, selected, policy=None):
        "Delete a set of events."
        policy = policy or []
        self.squash(selected, ["--delete", "--quiet"] + policy)
    def gc_blobs(self):
        "Garbage-collect blobs that no longer have references."
        backreferences = collections.Counter()
        for event in self.events:
            if isinstance(event, Commit):
                for fileop in event.fileops:
                    if fileop.op == 'M':
                        backreferences[fileop.ref] += 1
        self.events = [e for e in self.events
                       if (not isinstance(e, Blob))
                       or backreferences[e.mark]]
        self.invalidate_manifests()	# Might not be needed
        self.declare_sequence_mutation()
    def __delitem__(self, index):
        # To make Repository a proper container (and please pylint)
        self.squash([index], ["--delete", "--quiet", "--tagback"])
    #
    # Delete machinery ends here
    #
    def front_events(self):
        "Return options, features."
        return [e for e in self.events \
                if isinstance(e, Passthrough) \
                and (e.text.startswith("option") or e.text.startswith("feature"))]
    def renumber(self, origin=1, baton=None):
        "Renumber the marks in a repo starting from a specified origin."
        markmap = {}
        def remark(m, e):
            try:
                return ":" + repr(markmap[m])
            except KeyError:
                raise Fatal("unknown mark %s in %s cannot be renumbered!" % \
                            (m, e.id_me()))
        if baton:
            count = len(self.events)
            baton.startcounter(" %%%dd of %s" % (len(str(count)), count))
        newcount = 0
        for event in self.events:
            if hasattr(event, "mark"):
                if event.mark is None:
                    continue
                elif not event.mark.startswith(":"):
                    raise Fatal("field not in mark format")
                else:
                    markmap[event.mark] = origin + newcount
                    newcount += 1
        for event in self.events:
            for fld in ("mark", "committish"):
                try:
                    old = getattr(event, fld)
                    if old is not None:    
                        new = remark(old, event)
                        if debug_enable(DEBUG_UNITE):
                            announce("renumbering %s -> %s in %s.%s" % (old, new,
                                                                        event.__class__.__name__,
                                                                        fld))
                        setattr(event, fld, new)
                except AttributeError:
                    pass
        for commit in self.commits():
            for fileop in commit.fileops:
                if fileop.op == "M" and fileop.ref.startswith(":"):
                    new = remark(fileop.ref, fileop)
                    if debug_enable(DEBUG_UNITE):
                        announce("renumbering %s -> %s in fileop" % (fileop.ref, new))
                    fileop.ref = new
            if baton:
                baton.bumpcounter()
        self.invalidate_object_map()
        self._mark_to_index = {}
        if baton:
            baton.endcounter()
    def uniquify(self, color, persist=None):
        "Disambiguate branches, tags, and marks using the specified label."
        for event in self.events:
            for (objtype, attr) in ((Commit, "branch"),
                                    (Reset, "ref"),
                                    (Tag, "name"),):
                if isinstance(event, objtype):
                    oldname = getattr(event, attr)
                    newname = None
                    if persist is None:
                        # we're not trying to preserve names
                        if objtype == Tag:
                            newname = color + "-" + oldname
                        else:
                            newname = oldname + "-" + color
                    elif not oldname in persist:
                        # record name as belonging to this repo
                        persist[oldname] = color
                        continue
                    elif persist.get(oldname) == color:
                        # name belongs here, do nothing
                        continue
                    else:
                        # collision - oldname belongs to a different repo
                        if objtype == Tag:
                            newname = color + "-" + oldname
                        else:
                            newname = oldname + "-" + color
                    if newname:
                        setattr(event, attr, newname)
                        if debug_enable(DEBUG_UNITE):
                            announce("moving %s -> %s in %s.%s"
                                     % (oldname, newname,
                                        objtype.__name__,
                                        attr))
                        if persist is not None:
                            persist[newname] = color
             # Disambiguate defining marks.
            for fld in ("mark", "committish"):
                if hasattr(event, fld):
                    old = getattr(event, fld)
                    if old is None:
                        continue
                    elif not old.startswith(":"):
                        raise Fatal("field not in mark format")
                    else:
                        new = old + "-" + color
                        if debug_enable(DEBUG_UNITE):
                            announce("moving %s -> %s in %s.%s"
                                     % (old, new,
                                        event.__class__.__name__,
                                        fld))
                        setattr(event, fld, new)
            self.invalidate_object_map()
            # Now marks in fileops
            if isinstance(event, Commit):
                parent_marks = event.parent_marks()
                for (j, old) in enumerate(parent_marks):
                    if '-' not in old:
                        new = old + "-" + color
                        if debug_enable(DEBUG_UNITE):
                            announce("moving %s -> %s in parents" % (old, new))
                        parent_marks[j] = new
                event.set_parent_marks(parent_marks)
                for fileop in event.fileops:
                    if fileop.op == "M" and fileop.ref.startswith(":"):
                        new = fileop.ref + "-" + color
                        if debug_enable(DEBUG_UNITE):
                            announce("moving %s -> %s in fileop"
                                     % (fileop.ref, new))
                        fileop.ref = new
        return persist
    def absorb(self, other):
        # Only vcstype, sourcedir, and basedir are not copied here
        self.preserve_set |= other.preserve_set
        self.case_coverage |= other.case_coverage
        # Strip feature events off the front, they have to stay in front.
        while isinstance(other[0], Passthrough):
            lenfront = sum(1 for x in self.events if isinstance(x, Passthrough))
            self.events.insert(lenfront, other.events.pop(0))
        other.renumber(len(self.events))
        # Merge in the non-feature events and blobs
        self.events += other.events
        self.declare_sequence_mutation()
        # Transplant in fileops, blobs, and other impedimenta
        for event in other:
            if hasattr(event, "moveto"):
                event.moveto(self)
        other.events = []
        other.cleanup()
        #del other
    def graft(self, graft_repo, graft_point):
        "Graft a repo on to this one at a specified point."
        where = self.events[graft_point]
        if not isinstance(where, Commit):
            raise Recoverable("%s in %s is not a commit." % \
                              (where.mark, self.name))
        # Errors aren't recoverable after this
        graft_repo.uniquify(graft_repo.name)
        graftroot = graft_repo.earliest_commit()
        self.absorb(graft_repo)
        graftroot.add_parent(where.mark)
        self.renumber()
    def __last_modification(self, commit, path):
        "Locate the last modification of the specified path before this commit."
        ancestors = commit.parents()
        while ancestors:
            backto = []
            for ancestor in ancestors:
                # This is potential trouble if the file was renamed
                # down one side of a merge bubble but not the other.
                # Might cause an internal-error message, but no real
                # harm will be done.
                for (i, fileop) in enumerate(ancestor.fileops):
                    if fileop.op == 'R' and fileop.target == path:
                        path = fileop.source
                    elif fileop.op == 'M' and fileop.path == path:
                        return (ancestor, i)
                else:
                    backto += ancestor.parents()
            ancestors = backto
        return None
    def move_to_rename(self):
        "Make rename sequences from matched delete-modify pairs."
        # TODO: Actually use this somewhere...
        rename_count = 0
        for commit in self.commits():
            renames = []
            for (d, op) in enumerate(commit.fileops):
                if op.op == 'D':
                    previous = self.__last_modification(commit, op.path)
                    if not previous:
                        raise Recoverable("internal error looking for renames of %s" % op.path)
                    else:
                        (ancestor, i) = previous
                        for (m, op2) in enumerate(commit.fileops):
                            if op2.op == 'M' and \
                               ancestor.fileops[i].mode == op2.mode and \
                               ancestor.fileops[i].ref == op2.ref:
                                renames.append((d, m))
                                rename_count += 1
                                break
            for (d, m) in renames:
                commit.fileops[d].source = commit.fileops[d].path
                commit.fileops[d].target = commit.fileops[m].path
                del commit.fileops[d].path
                commit.fileops[d].op = 'R'
                commit.fileops.pop(m)
                commit._pathset = None
        return rename_count
    def path_walk(self, selection, hook=lambda path: path):
        "Apply a hook to all paths, returning the set of modified paths."
        modified = set()
        for ei in selection:
            event = self.events[ei]
            if isinstance(event, Commit):
                for fileop in event.fileops:
                    if fileop.op in ("M", "D"):
                        newpath = hook(fileop.path)
                        if newpath != fileop.path:
                            modified.add(newpath)
                        fileop.path = newpath
                    elif fileop.op in ("R", "C"):
                        newpath = hook(fileop.source)
                        if newpath != fileop.source:
                            modified.add(newpath)
                        fileop.source = newpath
                        newpath = hook(fileop.target)
                        if newpath != fileop.target:
                            modified.add(newpath)
                        fileop.target = newpath
                event._pathset = None
        return sorted(modified)
    def split_commit(self, where, splitfunc):
        event = self.events[where]
        # Fileop split happens here
        (fileops, fileops2) = splitfunc(event.fileops)
        if fileops and fileops2:
            self.events.insert(where+1, event.clone())
            self.declare_sequence_mutation()
            event2 = self.events[where+1]
            # need a new mark
            assert(event.mark == event2.mark)
            if event.splits is None:
                event.splits = 1
            else:
                event.splits += 1
            event2.set_mark("%s.%s" % (event.mark, event.splits))
            self.invalidate_object_map()
            # Fix up parent/child relationships
            for child in list(event.children()):
                child.replace_parent(event, event2)
            event2.set_parents([event])
            # and then finalize the ops
            event2.fileops = fileops2
            event2._pathset = None
            event.fileops = fileops
            event._pathset = None
            return True
        return False
    def split_commit_by_index(self, where, splitpoint):
        return self.split_commit(where,
                                 lambda ops: (ops[splitpoint:],
                                              ops[:splitpoint]))
    def split_commit_by_prefix(self, where, prefix):
        return self.split_commit(where,
                                 lambda ops: ([op for op in ops if not op.path.startswith(prefix)],
                                              [op for op in ops if (op.path or op.target) and
                                                                   (op.path or op.target).startswith(prefix)]))

    # Sequence emulation methods
    def __len__(self):
        return len(self.events)
    def __getitem__(self, i):
        return self.events[i]
    def __setitem__(self, i, v):
        self.events[i] = v
    def iterevents(self, indices=None, types=None):
        "Iterate over events matching conditions."
        if indices is None:
            events = lambda: self.events
            withindices = enumerate(self.events)
        else:
            events = lambda: itertools.imap(self.events.__getitem__, indices)
            withindices = itertools.izip(indices, events())
        if types is None: return withindices
        isinstances = itertools.imap(isinstance,
                                     events(), itertools.repeat(types))
        return itertools.compress(withindices, isinstances)

def read_repo(source, options, preferred):
    "Read a repository using fast-import."
    if debug_enable(DEBUG_SHUFFLE):
        if preferred:
            announce("looking for a %s repo..." % preferred.name)
        else:
            announce("reposurgeon: looking for any repo at %s..." % \
                     os.path.abspath(source))
    hitcount = 0
    extractor = vcs = None
    for possible in vcstypes:
        if preferred and possible.name != preferred.name:
            continue
        subdir = os.path.join(source, possible.subdirectory)
        if os.path.exists(subdir) and os.path.isdir(subdir):
            vcs = possible
            hitcount += 1
    for possible in extractors:
        if preferred and possible.name != preferred.name:
            continue
        subdir = os.path.join(source, possible.subdirectory)
        if os.path.exists(subdir) and os.path.isdir(subdir):
            if possible.visible or preferred \
                   and possible.name == preferred.name:
                extractor = possible
                hitcount += 1
    if hitcount == 0:
        raise Recoverable("couldn't find a repo under %s" % os.path.relpath(source))
    elif hitcount > 1:
        raise Recoverable("too many repos under %s" % os.path.relpath(source))
    elif verbose > 0:
        announce("found %s repository" % getattr(vcs or extractor, "name"))
    repo = Repository()
    repo.sourcedir = source
    if vcs:
        repo.vcs = vcs
        repo.preserve_set = vcs.preserve
        showprogress = (verbose > 0) and not "export-progress" in repo.export_style()
        context = {"basename": os.path.basename(repo.sourcedir)}
    try:
        here = os.getcwd()
        os.chdir(repo.sourcedir)
        # We found a matching VCS type
        if vcs:
            if "%(tempfile)s" in repo.vcs.exporter:
                try:
                    (tfdesc, tfname) = tempfile.mkstemp()
                    assert tfdesc > -1    # pacify pylint
                    context["tempfile"] = tfname
                    do_or_die(repo.vcs.exporter % context, "repository export")
                    with open(tfname, "rb") as tp:
                        repo.fast_import(tp, options, progress=showprogress)
                finally:
                    os.remove(tfname)
            else:
                with popen_or_die(repo.vcs.exporter % context, "repository export") as tp:
                    repo.fast_import(tp, options, progress=showprogress)
            if repo.vcs.authormap and os.path.exists(repo.vcs.authormap):
                announce("reading author map.")
                with open(repo.vcs.authormap, "rb") as fp:
                    repo.read_authormap(range(len(repo.events)),fp)
            fossils = os.path.join(vcs.subdirectory, "fossils")
            if os.path.exists(fossils):
                with open(fossils, "rb") as rfp:
                    repo.read_fossilmap(rfp)
            if vcs.lister:
                def fileset(exclude):
                    allfiles = []
                    for root, dirs, files in os.walk("."):
                        allfiles += [os.path.join(root, name)[2:] for name in files]
                        for exdir in exclude:
                            if exdir in dirs:
                                dirs.remove(exdir)
                    return set(allfiles)
                with popen_or_die(vcs.lister) as fp:
                    repofiles = set(fp.read().split())
                allfiles = fileset(exclude=[vcs.subdirectory]\
                                   + glob.glob(".rs*"))
                repo.preserve_set = allfiles - repofiles
            # kluge: git-specific hook
            if repo.vcs.name == "git":
                if os.path.exists(".git/cvs-revisions"):
                    announce("reading cvs-revisions map.")
                    pathrev_to_hash = {}
                    # Pass 1: Get git's path/revision to hash mapping
                    for line in open(".git/cvs-revisions", "rb"):
                        (path, rev, hashv) = line.split()
                        pathrev_to_hash[(path, rev)] = hashv
                    # Pass 2: get git's hash to (time,person) mapping 
                    hash_to_action = {}
                    stamp_set = set({})
                    with popen_or_die("git log --all --format='%H %ct %ce'", "r") as fp:
                        for line in fp:
                            (hashv, ctime, cperson) = line.split()
                            stamp = (int(ctime), cperson)
                            if stamp in stamp_set:
                                complain("more than one commit matches %s!%s (%s)" \
                                         % (rfc3339(int(ctime)), cperson, hashv))
                                if stamp in hash_to_action:
                                    del hash_to_action[hashv]
                            else:
                                hash_to_action[hashv] = stamp
                                stamp_set.add(stamp)
                        # Pass 3: build a (time,person) to commit mapping 
                        action_to_mark = {}
                        for commit in repo.commits():
                            action_to_mark[(commit.committer.date.timestamp, commit.committer.email)] = commit
                        # Pass 4: use it to set commit properties
                        for ((path, rev), value) in pathrev_to_hash.iteritems():
                            if value in hash_to_action:
                                (ctime, cperson) = hash_to_action[value]
                                action_to_mark[(ctime, cperson)].fossil_id = "CVS:%s:%s" % (path, rev)
                        del pathrev_to_hash
                        del hash_to_action
                        del stamp_set
        # We found a matching custom extractor
        if extractor:
            streamer = RepoStreamer(extractor)
            streamer.extract(repo, progress=verbose>0)
    finally:
        os.chdir(here)
    return repo

class CriticalRegion:
    "Encapsulate operations to try and make us un-interruptible."
    # This number is magic. Python sets a much higher signal.NSIG
    # value, but under Linux the signal calls start to trigger
    # runtime errors at this value and above.
    NSIG = 32
    def __init__(self):
        self.handlers = None	# Pacifies pylint
    def __enter__(self):
        "Begin critical region."
        if debug_enable(DEBUG_COMMANDS):
            complain("critical region begins...")
        # Alas that we lack sigblock support
        self.handlers = [None]*(CriticalRegion.NSIG+1)
        for sig in range(1, CriticalRegion.NSIG):
            if not sig in (signal.SIGKILL, signal.SIGSTOP):
                self.handlers[sig] = signal.signal(sig, signal.SIG_IGN)
    def __exit__(self, extype_unused, value_unused, traceback_unused):
        "End critical region."
        for sig in range(1, CriticalRegion.NSIG):
            if not sig in (signal.SIGKILL, signal.SIGSTOP):
                signal.signal(sig, self.handlers[sig])
        if debug_enable(DEBUG_COMMANDS):
            complain("critical region ends.")
        return False

def rebuild_repo(repo, target, options, preferred):
    "Rebuild a repository from the captured state."
    if not target and repo.sourcedir:
        target = repo.sourcedir
    if target:
        target = os.path.abspath(target)
    else:
        raise Recoverable("no default destination for rebuild")
    vcs = preferred or repo.vcs
    if not vcs:
        raise Recoverable("please prefer a repo type first")
    if not hasattr(vcs, "exporter") or vcs.importer is None:
        raise Recoverable("%s repositories are supported for read only." \
                          % preferred.name)

    if not os.path.join("refs", "heads", "master") in repo.branchset():
        complain("repository has no branch named master. git will have no HEAD commit after the import; consider using the branch command to rename one of your branches to master.")

    # Create a new empty directory to do the rebuild in
    if not os.path.exists(target):
        staging = target
        try:
            os.mkdir(target)
        except OSError:
            raise Recoverable("target directory creation failed")
    else:
        staging = target + "-stage" + str(os.getpid())
        assert(os.path.isabs(target) and os.path.isabs(staging))
        try:
            os.mkdir(staging)
        except OSError:
            raise Recoverable("staging directory creation failed")

    # Try the rebuild in the empty staging directory 
    here = os.getcwd()
    try:
        os.chdir(staging)
        if vcs.initializer:
            do_or_die(vcs.initializer, "repository initialization")
        parameters = {"basename": os.path.basename(target)}
        if "%(tempfile)s" in vcs.importer:
            try:
                (tfdesc, tfname) = tempfile.mkstemp()
                assert tfdesc > -1    # pacify pylint
                with open(tfname, "wb") as tp:
                    repo.fast_export(range(len(repo)), tp, options, progress=verbose>0, target=preferred)
                do_or_die(vcs.exporter % parameters, "import")
            finally:
                os.remove(tfname)
        else:
            with popen_or_die(vcs.importer % parameters, "import", mode="w") as tp:
                repo.fast_export(range(len(repo)), tp, options,
                                 target=preferred,
                                 progress=verbose>0)
        if repo.write_fossils:
            try:
                fossilfile = os.path.join(vcs.subdirectory, "fossils")
                with open(fossilfile, "wb") as wfp:
                    repo.write_fossilmap(wfp)
            except IOError:
                raise Recoverable("fossils file %s could not be written." \
                                  % fossilfile)

        do_or_die(vcs.checkout, "repository_checkout")
        if verbose:
            announce("rebuild is complete.")

        os.chdir(here)
        # Rebuild succeeded - make an empty backup directory
        backupcount = 1
        while True:
            savedir = target + (".~%d~" % backupcount)
            if os.path.exists(savedir):
                backupcount += 1
            else:
                break
        assert(os.path.abspath(savedir))
        os.mkdir(savedir)

        if staging != target:
            # This is a critical region.  Ignore all signals until we're done.
            with CriticalRegion():
                # Move the unmodified repo contents in target to the
                # backup directory.  Then move the staging contents to the
                # target directory.  Finally, restore designated files
                # from backup to target.
                for sub in os.listdir(target):
                    os.rename(os.path.join(target, sub),
                              os.path.join(savedir, sub))
                if verbose:
                    announce("repo backed up to %s." % os.path.relpath(savedir))
                for sub in os.listdir(staging):
                    os.rename(os.path.join(staging, sub),
                              os.path.join(target, sub))
                if verbose:
                    announce("modified repo moved to %s." % os.path.relpath(target))
            if repo.preserve_set:
                for sub in repo.preserve_set:
                    src = os.path.join(savedir, sub)
                    dst = os.path.join(target, sub)
                    if os.path.exists(src):
                        if os.path.isdir(src):
                            shutil.copytree(src, dst)
                        else:
                            shutil.copy2(src, dst)
                if verbose:
                    announce("preserved files restored.")
            elif verbose:
                announce("no preservations.")
    finally:
        os.chdir(here)
        if staging != target:
            nuke(staging, "reposurgeon: removing staging directory")

def do_or_die(dcmd, legend=""):
    "Either execute a command or raise a fatal exception."
    if legend:
        legend = " "  + legend
    if debug_enable(DEBUG_COMMANDS):
        announce("executing '%s'%s" % (dcmd, legend))
    try:
        retcode = subprocess.call(dcmd, shell=True)
        if retcode < 0:
            raise Fatal("child was terminated by signal %d." % -retcode)
        elif retcode != 0:
            raise Fatal("child returned %d." % retcode)
    except (OSError, IOError) as e:
        raise Fatal("execution of %s%s failed: %s" % (dcmd, legend, e))

class popen_or_die:
    "Read or write from a subordinate process."
    def __init__(self, command, legend="", mode="r"):
        assert mode in ("r", "w")
        self.command = command
        self.legend = legend
        self.mode = mode
        if self.legend:
            self.legend = " "  + self.legend
        self.fp = None
    def __enter__(self):
        if debug_enable(DEBUG_COMMANDS):
            if self.mode == "r":
                announce("%s: reading from '%s'%s" % (rfc3339(time.time()), self.command, self.legend))
            else:
                announce("%s: writing to '%s'%s" % (rfc3339(time.time()), self.command, self.legend))
        try:
            self.fp = os.popen(self.command, self.mode)
            return self.fp
        except (OSError, IOError) as oe:
            raise Fatal("execution of %s%s failed: %s" \
                                 % (self.command, self.legend, oe))
    def __exit__(self, extype, value, traceback):
        if extype:
            if verbose:
                complain("fatal exception in popen_or_die.")
            raise extype, value, traceback
        if self.fp.close() is not None:
            raise Fatal("%s%s returned error." % (self.command, self.legend))
        return False

class Recoverable(Exception):
    def __init__(self, msg):
        Exception.__init__(self)
        self.msg = msg

class RepositoryList:
    "A repository list with selection and access by name."
    def __init__(self):
        self.repo = None
        self.repolist = []
        self.cut_index = None
    def chosen(self):
        return self.repo
    def choose(self, repo):
        self.repo = repo
    def unchoose(self):
        self.repo = None
    def reponames(self):
        "Return a list of the names of all repositories."
        return [r.name for r in self.repolist]
    def uniquify(self, name):
        "Uniquify a repo name in the repo list."
        if name.endswith(".fi"):
            name = name[:-3]
        elif name.endswith(".svn"):
            name = name[:-4]
        if name not in self.reponames():
            return name
        else:
            # repo "foo" is #1
            seq = 2
            while name + str(seq) in self.reponames():
                seq += 1
            return name + str(seq)
    def repo_by_name(self, name):
        "Retrieve a repo by name."
        return self.repolist[self.reponames().index(name)]
    def remove_by_name(self, name):
        "Remove a repo by name."
        if self.repo and self.repo.name == name:
            self.unchoose()
        self.repolist.pop(self.reponames().index(name))        
    def cut_conflict(self, early, late):
        "Apply a graph-coloring algorithm to see if the repo can be split here."
        self.cut_index = late.parent_marks().index(early.mark)
        late.remove_parent(early)
        def do_color(commit, color):
            commit.color = color
            for fileop in commit.fileops:
                if fileop.op == "M" and fileop.ref != "inline":
                    blob = self.repo.find(fileop.ref)
                    assert isinstance(self.repo[blob], Blob)
                    self.repo[blob].colors.append(color)
        do_color(early, "early")
        do_color(late, "late")
        conflict = False
        keepgoing = True
        while keepgoing and not conflict:
            keepgoing = False
            for event in self.repo.commits():
                if event.color:
                    for neighbor in itertools.chain(event.parents(), event.children()):
                        if neighbor.color == None:
                            do_color(neighbor, event.color)
                            keepgoing = True
                            break
                        elif neighbor.color != event.color:
                            conflict = True
                            break
        return conflict
    def cut_clear(self, early, late):
        "Undo a cut operation and clear all colors."
        late.insert_parent(self.cut_index, early.mark)
        for event in self.repo:
            if hasattr(event, "color"):
                event.color = None
            if hasattr(event, "colors"):
                event.colors = []
    def cut(self, early, late):
        "Attempt to topologically cut the selected repo."
        if self.cut_conflict(early, late):
            self.cut_clear(early, late)
            return False
        # Repo can be split, so we need to color tags
        for t in self.repo.events:
            if isinstance(t, Tag):
                for c in self.repo.events:
                    if isinstance(c, Commit):
                        if c is t.target:
                            t.color = c.color
        # Front events go with early segment, they'll be copied to late one. 
        for event in self.repo.front_events():
            event.color = "early"        
        assert all(hasattr(x, "color") or hasattr(x, "colors") or isinstance(x, Reset) for x in self.repo)
        # Resets are tricky.  One may have both colors.
        # Blobs can have both colors too, through references in
        # commits on both sides of the cut, but we took care
        # of that earlier.
        trackbranches = {"early": set(), "late": set()}
        for commit in self.repo.commits():
            if commit.color is None:
                complain("%s is uncolored!" % commit.mark)
            else:
                trackbranches[commit.color].add(commit.branch)
        # Now it's time to do the actual partitioning
        early = Repository(self.repo.name + "-early")
        os.mkdir(early.subdir())
        late = Repository(self.repo.name + "-late")
        os.mkdir(late.subdir())
        for event in self.repo:
            if isinstance(event, Reset):
                if event.ref in trackbranches["early"]:
                    early.addEvent(copy.copy(event))
                if event.ref in trackbranches["late"]:
                    late.addEvent(copy.copy(event))
            elif isinstance(event, Blob):
                if "early" in event.colors:
                    early.addEvent(event.clone(early))
                if "late" in event.colors:
                    late.addEvent(event.clone(late))
            else:
                if event.color == "early":
                    if hasattr(event, "moveto"):
                        event.moveto(early)
                    early.addEvent(event)
                elif event.color == "late":
                    if hasattr(event, "moveto"):
                        event.moveto(late)
                    late.addEvent(event)
                else:
                    # TODO: Someday, color passthroughs that aren't fronted.
                    raise Fatal("coloring algorithm failed on %s" % event)
        # Options and features may need to be copied to the late fragment.
        late.events = copy.copy(early.front_events()) + late.events
        late.declare_sequence_mutation()
        # Add the split results to the repo list. 
        self.repolist.append(early)
        self.repolist.append(late)
        self.repo.cleanup()
        self.remove_by_name(self.repo.name)
        return True
    def unite(self, factors, options):
        "Unite multiple repos into a union repo."
        factors.sort(key=operator.methodcaller("earliest"))
        roots = [x.earliest_commit() for x in factors]
        union = Repository("+".join(r.name for r in factors))
        os.mkdir(union.subdir())
        factors.reverse()
        persist = {}
        for factor in factors:
            persist = factor.uniquify(factor.name, persist)
        factors.reverse()
        for factor in factors:
            union.absorb(factor)
            self.remove_by_name(factor.name)
        # Renumber all events
        union.renumber()
        # Sort out the root grafts. The way we used to do this
        # involved sorting the union commits by timestamp, but this
        # fails because in real-world repos timestamp order may not
        # coincide with mark order - leading to "mark not defined"
        # errors from the importer at rebuild time. Instead we graft
        # each root just after the last commit in the dump sequence
        # with a date prior to it.  This method gives less intuitive
        # results, but at least means we never need to reorder
        # commits.
        for root in roots[1:]:
            most_recent = None
            def predicate(event):
                return root.when() > event.when() \
                        or (most_recent and event.when() > most_recent.when())
            # Get last commit such that it and all before satisfy predicate()
            # Never raises IndexError since union.earliest_commit() is root[0]
            # which satisfies predicate() thanks to factors sorting.
            most_recent = collections.deque(
                    itertools.takewhile(predicate, union.commits()),
                    maxlen = 1).pop()
            if most_recent.mark is None:
                # This should never happen either.
                raise Fatal("can't link to commit with no mark")
            root.add_parent(most_recent.mark)
            # We may not want files from the ancestral stock to persist
            # in the grafted branch unless they have modify ops in the branch
            # root.
            if "--prune" in options:
                keepers = root.paths()
                for path in most_recent.manifest():
                    if not path in keepers:
                        fileop = FileOp()
                        fileop.construct("D", path)
                        root.fileops.append(fileop)
        # Put the result on the load list
        self.repolist.append(union)
        self.choose(union)
    def expunge(self, selection, matchers):
        "Expunge a set of files from the commits in the selection set."
        def digest(toklist):
            digested = []
            for s in toklist:
                if s.startswith('/') and s.endswith('/'):
                    digested.append("(?:" + s[1:-1] + ")")
                else:
                    digested.append("^" + re.escape(s) + "$")
            return re.compile("|".join(digested))
        try:
            # First pass: compute fileop deletions
            alterations = []
            expunge = digest(matchers)
            for ei in selection:
                event = self.repo[ei]
                deletia = []
                if hasattr(event, "fileops"):
                    for (i, fileop) in enumerate(event.fileops):
                        if debug_enable(DEBUG_DELETE):
                            print(str(fileop))
                        if fileop.op in "DM":
                            if expunge.search(fileop.path):
                                deletia.append(i)
                        elif fileop.op in "RC":
                            fileop.sourcedelete = expunge.search(fileop.source)
                            fileop.targetdelete = expunge.search(fileop.target)
                            if fileop.sourcedelete:
                                deletia.append(i)
                                announce("following %s of %s to %s" %
                                         (fileop.op,
                                          fileop.source,
                                          fileop.target))
                                if fileop.op == "R":
                                    try:
                                        matchers.remove("^" + fileop.source + "$")
                                    except ValueError:
                                        pass
                                matchers.append("^" + fileop.target + "$")
                                expunge = digest(matchers)
                            elif fileop.targetdelete:
                                if fileop.op == "R":
                                    fileop.op = "D"
                                elif fileop.op == "C":
                                    deletia.append(i)
                                matchers.append("^" + fileop.target + "$")
                                expunge = digest(matchers)
                alterations.append(deletia)
        except re.error:
            raise Recoverable("you confused the regexp processor!")
        # Second pass: perform actual fileop expunges
        expunged = Repository(self.repo.name + "-expunges")
        expunged.seekstream = self.repo.seekstream
        expunged.makedir()
        for event in self.repo:
            event.deletehook = None
        for (ei, deletia) in zip(selection, alterations):
            if not deletia: continue
            event = self.repo[ei]
            keepers = []
            blobs = []
            for i in deletia:
                fileop = event.fileops[i]
                if fileop.op == 'D':
                    keepers.append(fileop)
                    if verbose:
                        announce("at %d, expunging D %s" \
                                 % (ei+1, fileop.path))
                elif fileop.op == 'M':
                    keepers.append(fileop)
                    if fileop.ref != 'inline':
                        bi = self.repo.find(fileop.ref)
                        blob = self.repo[bi]
                        assert(isinstance(blob, Blob))
                        blobs.append(blob)
                    if verbose:
                        announce("at %d, expunging M %s" \
                                 % (ei+1, fileop.path))
                elif fileop.op in ("R", "C"):
                    assert(fileop.sourcedelete or fileop.targetdelete)
                    if fileop.sourcedelete and fileop.targetdelete:
                        keepers.append(fileop)
            deletia = set(deletia) # To speed up the following
            event.fileops = [op for (i, op) in enumerate(event.fileops)
                                if i not in deletia]
            event._pathset = None
            # If there are any keeper fileops, hang them them and
            # their blobs on keeps, cloning the commit() for them.
            if keepers:
                newevent = event.clone(expunged)
                newevent.fileops = keepers
                newevent._pathset = None
                for blob in blobs:
                    blob.deletehook = blob.clone(expunged)
                event.deletehook = newevent
        # Build the new repo and hook it into the load list
        expunged.events = copy.copy(self.repo.front_events())
        expunged.declare_sequence_mutation()
        expunged_branches = expunged.branchset()
        for event in self.repo:
            if event.deletehook:
                expunged.addEvent(event.deletehook)
                event.deletehook = None
            elif isinstance(event, Reset):
                if event.target is not None:
                    if event.target.deletehook:
                        expunged.addEvent(copy.deepcopy(event))
                elif isinstance(event, Reset) and event.ref in expunged_branches:
                    newreset = copy.copy(event)
                    newreset.repo = expunged
                    expunged.addEvent(newreset)
            elif isinstance(event, Tag) and \
                    event.target is not None and \
                    event.target.deletehook:
                expunged.addEvent(copy.deepcopy(event))
        for event in itertools.chain(self.repo.events, expunged.events):
            if hasattr(event, "deletehook"):
                delattr(event, "deletehook")
        expunged_marks = set(event.mark for event in expunged.events if hasattr(event, "mark"))
        for event in expunged.events:
            if hasattr(event, "parents"):
                # Parents still are Commits in the non-expunged repository
                # We use set_parent_marks so that the correct parents are
                # searched in the expunged repository.
                event.set_parent_marks(m for m in event.parent_marks()
                                         if m in expunged_marks)
        keeper_marks = set(event.mark for event in self.repo.events if hasattr(event, "mark"))
        for event in self.repo.events:
            if hasattr(event, "parents"):
                event.set_parents([e for e in event.parents() if e.mark in keeper_marks])
        backreferences = collections.Counter()
        for event in self.repo.events:
            if isinstance(event, Commit):
                for fileop in event.fileops:
                    if fileop.op == 'M':
                        backreferences[fileop.ref] += 1
        # Now remove commits that no longer have fileops, and released blobs.
        # Announce events that will be deleted.
        if verbose:
            to_delete = [i+1 for i,e in enumerate(self.repo.events)
                    if (isinstance(e, Blob) and not backreferences[e.mark])
                    or (isinstance(e, Commit) and not e.fileops)]
            if not to_delete:
                announce("deletion set is empty.")
            else:
                announce("deleting blobs and empty commits %s" % to_delete)
            del to_delete
        # First delete the blobs.
        self.repo.events = [e for e in self.repo.events
                              if (not isinstance(e, Blob))
                              or backreferences[e.mark]]
        # Then tagify empty commits.
        self.repo.tagify_empty(canonicalize = False)
        # And tell we changed the manifests and the event sequence.
        self.repo.invalidate_manifests()
        self.repo.declare_sequence_mutation()
        # At last, add the expunged repository to the loaded list.
        self.repolist.append(expunged)

class RepoSurgeon(cmd.Cmd, RepositoryList):
    "Repository surgeon command interpreter."
    OptionFlags = (
        ("canonicalize", """\
    If set, import stream reads and mailbox_in and edit will canonicalize
comments by replacing CR-LF with LF, stripping leading and trailing whitespace,
and then appending a LF.
"""),
        ("compressblobs", """\
    Use compression for on-disk copies of blobs. Accepts an increase
in repository read and write time in order to reduce the amount of
disk space required while editing; this may be useful for large
repositories. No effect if the edit input was a dump stream; in that
case, reposurgeon doesn't make on-disk blob copies at all (it points
into sections of the input stream instead).

"""),
        )
    unclean = re.compile("[^\n]*\n[^\n]")
    class LineParse:
        "Parse a command line implementing shell-like syntax."
        def __init__(self, line, capabilities=None):
            self.line = line
            self.capabilities = capabilities or []
            self.stdin = sys.stdin
            self.infile = None
            self.stdout = sys.stdout
            self.outfile = None
            self.redirected = False
            self.options = set([])
        def __enter__(self):
            # Input redirection
            m = re.search(r"<\S+", self.line)
            if m:
                if "stdin" not in self.capabilities:
                    raise Recoverable("no support for < redirection")
                self.infile = m.group(0)[1:]
                if self.infile and self.infile != '-':
                    try:
                        self.stdin = open(self.infile, "rb")
                    except (IOError, OSError):
                        raise Recoverable("can't open %s for read" \
                                          % self.infile)
                self.line = self.line[:m.start(0)] + self.line[m.end(0)+1:]
                self.redirected = True
            # Output redirection
            m = re.search(r">\S+", self.line)
            if m:
                if "stdout" not in self.capabilities:
                    raise Recoverable("no support for > redirection")
                self.outfile = m.group(0)[1:]
                if self.outfile and self.outfile != '-':
                    if os.path.exists(self.outfile) and not os.path.isfile(self.outfile):
                        raise Recoverable("not a plain file")
                    try:
                        self.stdout = open(self.outfile, "wb")
                    except (IOError, OSError):
                        raise Recoverable("can't open %s for write" \
                                          % self.outfile)
                self.line = self.line[:m.start(0)] + self.line[m.end(0)+1:]
                self.redirected = True
            # Options 
            while True:
                m = re.search(r"--\S+", self.line)
                if not m:
                    break
                else:
                    self.options.add(m.group(0).strip())
                    self.line = self.line[:m.start(0)] + self.line[m.end(0)+1:]
            # Dash redirection
            if not self.redirected and self.line.strip() == '-':
                if "stdin" not in self.capabilities and "stdout" not in self.capabilities:
                    raise Recoverable("no support for - redirection")
                else:
                    self.line = ""
                    self.redirected = True
            self.line = self.line.strip()
            return self
        def __exit__(self, extype_unused, value_unused, traceback_unused):
            pass
        def tokens(self):
            "Return the argument token list after the parse for redirects."
            return self.line.split()
    def __init__(self):
        cmd.Cmd.__init__(self)
        RepositoryList.__init__(self)
        self.use_rawinput = True
        self.echo = 0
        self.prompt = "reposurgeon% "
        self.preferred = None
        self.selection = []
        self.line = ""
        self.history = []
        self.callstack = []
        self.definitions = {}
        self.profile_log = None
        self.capture = None
        for option in dict(RepoSurgeon.OptionFlags):
            global_options[option] = False
        global_options['svn_branchify'] = ['trunk', 'tags/*', 'branches/*', '*']
    #
    # Housekeeping hooks.
    #
    def onecmd(self, line):
        "Execute one command, fielding interrupts for recoverable exceptions."
        try:
            cmd.Cmd.onecmd(self, line)
        except Recoverable as e:
            complain(e.msg)
    def postcmd(self, unused, line):
        assert unused is not []   # pacify pylint
        if line == "EOF":
            return True
    def emptyline(self):
        pass
    def precmd(self, line):
        "Pre-command hook."
        if self.capture is not None:
            if line.startswith("}"):
                self.capture = None
            else:
                self.capture.append(line)
            return ""
        self.history.append(line.rstrip())
        if self.echo:
            sys.stdout.write(line.rstrip()+"\n")
        self.selection = None
        if line.startswith("#"):
            return ""
        m = re.compile(r"\s+#")
        if m:
            line = m.split(line)[0]
        # This is the only place in the implementation that knows
        # whether the syntax is VSO or SVO.
        if self.chosen():
            line = self.set_selection_set(line)
        return line
    def do_shell(self, line):
        "Execute a shell command."
        sys.stdout.flush()
        sys.stderr.flush()
        if os.system(line):
            raise Recoverable("'shell %s' returned error." % line)
    def do_EOF(self, unused):
        "Terminate reposurgeon."
        assert unused is not None   # pacify pylint
        print("")
        return True
    def cleanup(self):
        "Tell all the repos we're holding to clean up."
        if debug_enable(DEBUG_SHUFFLE):
            announce("interpreter cleanup called.")
        for repo in self.repolist:
            repo.cleanup()
    def selected(self, types=None):
        "Iterate over the selection set."
        return self.chosen().iterevents(indices=self.selection, types=types)
    #
    # The selection-language parsing code starts here.
    #
    def set_selection_set(self, line):
        "Implement object-selection syntax."
        # Returns the line with the selection removed
        self.selection = None
        if not self.chosen():
            return line
        self.line = line
        try:
            Date(self.line, error=Recoverable)
            self.line = "<" + self.line + ">"
        except Recoverable:
            pass
        self.selection = list(self.eval_expression(set(self.chosen().all())))
        if self.line == line:
            self.selection = None
        else:
            # TODO: We probably want to stop doing this
            self.selection.sort()
        return self.line.lstrip()
    def peek(self):
        return self.line and self.line[0]
    def pop(self):
        if not self.line:
            return ''
        else:
            c = self.line[0]
            self.line = self.line[1:]
            return c
    def eval_expression(self, preselection):
        if debug_enable(DEBUG_LEXER):
            announce("eval_expression(%s)" % self.line)
        self.line = self.line.lstrip()
        value = self.eval_disjunct(preselection)
        c = self.peek()
        while True:
            c = self.peek()
            if c != '?':
                break
            self.pop()
            add_list = []
            remove_list = []
            for ei in value:
                event = self.chosen().events[ei]
                if isinstance(event, Commit):
                    for parent in event.parents():
                        add_list.append(self.chosen().find(parent.mark))
                    for child in event.children():
                        add_list.append(self.chosen().find(child.mark))
                elif isinstance(event, Blob):
                    remove_list.append(ei) # Don't select the blob itself
                    for i in preselection:
                        event2 = self.chosen().events[i]
                        if isinstance(event2, Commit):
                            for fileop in event2.fileops:
                                if fileop.op == 'M' and fileop.ref==event.mark:
                                    add_list.append(i)
                elif isinstance(event, (Tag, Reset)):
                    if event.target:
                        add_list.append(event.target.index())
            value |= set(add_list)
            value -= set(remove_list)
        self.line = self.line.lstrip()
        if debug_enable(DEBUG_LEXER):
            announce("%s <- eval_expression(), left = %s" % (value, repr(self.line)))
        return value
    def eval_disjunct(self, preselection):
        "Evaluate a disjunctive expression (| has lowest precedence)" 
        if debug_enable(DEBUG_LEXER):
            announce("eval_disjunct(%s)" % self.line)
        self.line = self.line.lstrip()
        unselected = set(preselection)
        while True:
            conjunct = self.eval_conjunct(unselected)
            if conjunct is None:
                break
            else:
                unselected -= conjunct
            self.line = self.line.lstrip()
            if self.peek() == '|':
                self.pop()
            else:
                break
        if debug_enable(DEBUG_LEXER):
            announce("%s <- eval_disjunct(), left = %s" % (conjunct, repr(self.line)))
        return preselection - unselected
    def eval_conjunct(self, preselection):
        "Evaluate a conjunctive expression (& has higher precedence)" 
        if debug_enable(DEBUG_LEXER):
            announce("eval_conjunct(%s)" % self.line)
        self.line = self.line.lstrip()
        conjunct = set(preselection)
        while True:
            term = self.eval_term(conjunct)
            if term is None:
                break
            else:
                conjunct = conjunct & term
            self.line = self.line.lstrip()
            if self.peek() == '&':
                self.pop()
            else:
                break
        if debug_enable(DEBUG_LEXER):
            announce("%s <- eval_conjunct(), left = %s" % (conjunct, repr(self.line)))
        return conjunct
    def eval_term(self, preselection):
        if debug_enable(DEBUG_LEXER):
            announce("eval_term(%s)" % self.line)
        self.line = self.line.lstrip()
        if self.peek() == '(':
            self.pop()
            term = self.eval_expression(preselection)
            self.line = self.line.lstrip()
            if self.peek() != ')':
                raise Recoverable("trailing junk on inner expression")
            else:
                self.pop()
        else:
            term = self.eval_visibility(preselection)
            if term is None:
                term = self.eval_polyrange(preselection)
                if term is None:
                    term = self.eval_textsearch(preselection)
                    if term == None:
                        term = self.eval_pathset(preselection)
        if debug_enable(DEBUG_LEXER):
            announce("%s <- eval_term(), left = %s" % (term, repr(self.line)))
        return term
    def eval_visibility(self, preselection):
        "Parse a visibility spec."
        if debug_enable(DEBUG_LEXER):
            announce("eval_visibility(%s)" % self.line)
        self.line = self.line.lstrip()
        if not self.peek() == "=":
            visibility = None
        else:
            typeletters = {
                "B" : lambda e: isinstance(e, Blob),
                "C" : lambda e: isinstance(e, Commit),
                "T" : lambda e: isinstance(e, Tag),
                "R" : lambda e: isinstance(e, Reset),
                "P" : lambda e: isinstance(e, Passthrough),
                "H" : lambda e: isinstance(e, Commit) and not e.has_children(),
                "O" : lambda e: isinstance(e, Commit) and not e.has_parents(),
                "M" : lambda e: isinstance(e, Commit) and len(e.parents()) > 1,
                "F" : lambda e: isinstance(e, Commit) and len(e.children()) > 1,
                "L" : lambda e: isinstance(e, Commit) and RepoSurgeon.unclean.match(e.comment),
                }

            visible = set()
            self.pop()
            while self.peek() in typeletters:
                c = self.pop()
                if c in typeletters:
                    visible.add(typeletters[c])
            # We need a special check here because these expressions
            # could otherwise run onto the text part of the command.
            if self.peek() not in "()|& ":
                raise Recoverable("garbled type mask at %s" % repr(self.line))
            if debug_enable(DEBUG_LEXER):
                announce("visibility set is %s with %s left" % ([x.__name__ for x in visible], repr(self.line)))
            visibility = set()
            for i in preselection:
                event = self.chosen().events[i]
                if any(predicate(event) for predicate in visible):
                    visibility.add(i)
        if debug_enable(DEBUG_LEXER):
            announce("%s <- eval_visibility(), left = %s" % (visibility, repr(self.line)))
        return visibility
    def eval_polyrange(self, _preselection):
        "Parse a polyrange specification (list of intervals)."
        # preselection is not used since it is perfectly legal to have range
        # bounds be outside of the reduced set.
        if debug_enable(DEBUG_LEXER):
            announce("eval_polyrange(%s)" % self.line)
        self.line = self.line.lstrip()
        polyrange_initials = (":","0","1","2","3","4","5","6","7","8","9","$", "<")
        if not self.peek() in polyrange_initials:
            polyrange = None
        # Avoid having an input redirect mistaken for the start of a literal.
        # This might break if a command can ever have both input and output
        # redirects.
        elif self.peek() == "<" and ">" not in self.line:
            polyrange = None
        else:
            selection = []
            while self.peek() in polyrange_initials + (".", ","):
                # First, literal command numbers (1-origin)
                match = re.match("[0-9]+", self.line)
                if match:
                    number = match.group()
                    selection.append(int(number)-1)
                    self.line = self.line[len(number):]
                    continue
                # Next, mark references
                match = re.match(":[0-9]+", self.line)
                if match:
                    markref = match.group()
                    self.line = self.line[len(markref):]
                    for (i, event) in enumerate(self.chosen()):
                        if hasattr(event, "mark") and event.mark == markref:
                            selection.append(i)
                            break
                        elif hasattr(event, "committish") and event.committish == markref:
                            selection.append(i)
                            break
                    else:
                        raise Recoverable("mark %s not found." % markref)
                    continue
                elif self.peek() == ':':
                    raise Recoverable("malformed mark")
                # $ means last commit, a la ed(1).
                if self.peek() == "$":
                    selection.append(len(self.chosen())-1)
                    self.pop()
                    continue
                # Comma just delimits a location spec
                if self.peek() == ",":
                    self.pop()
                    continue
                # Following ".." means a span
                if self.line[:2] == "..":
                    if selection:
                        selection.append("..")
                        self.line = self.line[2:]
                        continue
                    else:
                        raise Recoverable("start of span is missing")
                if self.peek() == "<":
                    self.pop()
                    closer = self.line.find('>')
                    if closer == -1:
                        raise Recoverable("reference improperly terminated. '%s'" % self.line)
                    ref = self.line[:closer]
                    self.line = self.line[closer+1:]
                    matched = False
                    # First, search tags
                    for (i, event) in enumerate(self.chosen()):
                        if isinstance(event, Tag) and event.name == ref:
                            matched = True
                            selection.append(i)
                            break
                    # Next, search branches
                    if not matched:
                        for symbol in sorted(self.chosen().branchset(),
                                             key=len, reverse=True): # longest name first
                            if ref == os.path.basename(symbol):
                                loc = None
                                # Find the last commit with this branchname
                                for (i, event) in enumerate(self.chosen()):
                                    if isinstance(event, Commit):
                                        if event.branch == symbol:
                                            loc = i
                                if loc is None:
                                    raise Recoverable("branch name %s points to hyperspace" % symbol)
                                else:
                                    matched = True
                                    selection.append(loc)
                    # Next, fossil-ID references
                    if not matched:
                        for (i, event) in enumerate(self.chosen()):
                            if hasattr(event, "fossil_id") and event.fossil_id == ref:
                                selection.append(i)
                                matched = True
                                break
                    # Might be a date or action stamp
                    ordinal = None
                    m = re.search("#[0-9]+$", ref)
                    if m:
                        try:
                            ordinal = int(m.group(0)[1:])
                            ref = ref[:-len(m.group(0))]
                        except ValueError:
                            raise Recoverable("ill-formed date")
                    bang = ref.find('!')
                    date_end = len(ref)
                    if bang >= 0:
                        date_end = min(bang, date_end)
                    date = ref[:date_end]
                    datematch = None
                    try:
                        date = Date(date)
                        datematch = lambda t: t == date
                    except Fatal:
                        try:
                            date = calendar.timegm(time.strptime(date, "%Y-%m-%d"))
                            datematch = lambda t: t.timestamp >= date and t.timestamp < date + 24*60*60
                        except ValueError:
                            datematch = None
                    email_id = None
                    if date is not None and bang > -1:
                        email_id = ref[bang+1:]
                    matches = []
                    if datematch:
                        for (ei, event) in enumerate(self.chosen().events):
                            if hasattr(event, 'committer'):
                                if not datematch(event.committer.date):
                                    continue
                                if email_id and event.committer.email != email_id:
                                    continue
                                else:
                                    matches.append(ei)
                            elif hasattr(event, 'tagger'):
                                if not datematch(event.tagger.date):
                                    continue
                                elif email_id and event.tagger.email!=email_id:
                                    continue
                                else:
                                    matches.append(ei)
                        if len(matches) < 1:
                            raise Recoverable("no events match %s" % ref)
                        elif len(matches) > 1:
                            if ordinal is not None and ordinal < len(matches):
                                selection.append(matches[ordinal-1])
                            else:
                                selection += matches
                            matched = True
                        else:
                            selection.append(matches[0])
                            matched = True
                    if not matched:
                        raise Recoverable("couldn't match a name at <%s>" % ref)
            if debug_enable(DEBUG_LEXER):
                announce("location list is %s with %s left" % (selection, repr(self.line)))
            # Resolve spans
            resolved = []
            spanning = last = 0
            for elt in selection:
                if elt == '..':
                    spanning = True
                else:
                    if spanning:
                        resolved.extend(range(last+1, elt+1))
                        spanning = False
                    else:
                        resolved.append(elt)
                    last = elt
            selection = resolved
            if debug_enable(DEBUG_LEXER):
                announce("resolved list is %s with %s left" % (selection, repr(self.line)))
            # Sanity checks
            if spanning:
                raise Recoverable("incomplete range expression.")
            for elt in selection:
                if elt < 0 or elt > len(self.chosen())-1:
                    raise Recoverable("event number %s out of range" % (elt+1))
            polyrange = set(selection)
        if debug_enable(DEBUG_LEXER):
            announce("%s <- eval_polyrange(), left = %s" % (polyrange, repr(self.line)))
        return polyrange
    def eval_textsearch(self, preselection):
        "Parse a text search specification."
        if debug_enable(DEBUG_LEXER):
            announce("eval_textsearch(%s)" % self.line)
        self.line = self.line.lstrip()
        if not self.peek() == '/':
            return None
        elif '/' not in self.line[1:]:
            raise Recoverable("malformed text search specifier")
        else:
            assert(self.pop() == '/')
            endat = self.line.index('/')
            try:
                search = re.compile(self.line[:endat]).search
            except re.error:
                raise Recoverable("invalid regular expression")
            self.line = self.line[endat+1:]
            matchers = set()
            searchable_attrs = {"a":"author",		# commit
                                "b":"branch",		# commit
                                "c":"comment",		# commit or tag
                                "C":"committer",	# commit
                                "r":"committish",	# tag or reset
                                "p":"text",		# passthrough
                                "t":"tagger",		# tag
                                "n":"name"		# tag
                                }
            search_in = searchable_attrs.values()
            check_blobs = False
            if self.line and self.line[0].isalpha():
                search_in = []
                while self.line and (self.line[0] in searchable_attrs.iterkeys() or self.line[0] == 'B'):
                    if self.line[0] == 'B':
                        check_blobs = True
                    else:
                        search_in.append(searchable_attrs[self.line[0]])
                    self.line = self.line[1:]
            for i in preselection:
                e = self.chosen().events[i]
                if any(hasattr(e, searchable) and
                       search(str(getattr(e, searchable)))
                       for searchable in search_in):
                    matchers.add(i)
                elif check_blobs and isinstance(e, Blob) and search(e.get_content()):
                    matchers.add(i)
            if debug_enable(DEBUG_LEXER):
                announce("%s <- eval_textsearch(), left = %s" % (matchers, repr(self.line)))
            return matchers
    def eval_pathset(self, preselection):
        "Resolve a path name to the set of commits that refer to it."
        chosen = self.chosen()
        if self.peek() != "[":
            return None
        self.pop()
        depth = 1
        for (i, c) in enumerate(self.line):
            if c == '[':
                depth += 1
            elif c == ']':
                depth -= 1
            if depth == 0:
                matcher = self.line[:i]
                self.line = self.line[i+1:]
                break
        else:
            raise Recoverable("malformed path matcher")
        if matcher.startswith('/'):
            flags = ''
            while matcher[-1] in ("a", "c"):
                flags += matcher[-1]
                matcher = matcher[:-1]
            if matcher[-1] != '/':
                raise Recoverable("regexp matcher missing trailing /")
            try:
                search = re.compile(matcher[1:-1]).search
            except re.error:
                raise Recoverable("invalid regular expression")
            if "c" in flags:
                return self.eval_pathset_full(search,
                                              preselection,
                                              "a" in flags)
            preselected = ((i, chosen.events[i]) for i in preselection)
            match_paths = all if "a" in flags else any
            return {i for (i, event) in preselected
                      if isinstance(event, (Commit, Blob)) and
                            match_paths(search(path)
                                        for path in event.paths())}
        else:
            preselected = ((i, chosen.events[i]) for i in preselection)
            return {i for (i, event) in preselected
                    if isinstance(event, (Commit, Blob)) and matcher in event.paths()}
    def eval_pathset_full(self, match_condition,
                                preselection,
                                match_all = False):
        result = set()
        last_event = max(preselection)
        match = match_condition
        if isinstance(match_condition, collections.Callable):
            # Try to match a regex in the trees. For each commit we remember
            # only the part of the tree that matches the regex. In most cases
            # it is a lot less memory and CPU hungry than running regexes on
            # the full commit manifests. In the match_all case we instead
            # select commits that nowhere match the opposite condition.
            if match_all:
                match = lambda p: not match_condition(p)
            match_trees = {}
            for (i, event) in enumerate(self.chosen().events):
                if i > last_event: break
                if not isinstance(event, Commit): continue
                try:
                    parent = event.parents()[0]
                except IndexError:
                    tree = PathMap()
                else:
                    tree = match_trees[parent.mark].snapshot()
                for fileop in event.fileops:
                    if fileop.op == 'M' and match(fileop.path):
                        tree[fileop.path] = True
                    elif fileop.op in ('C', 'R') and match(fileop.target):
                        tree[fileop.target] = True
                    elif fileop.op == 'D' and match(fileop.path):
                        del tree[fileop.path]
                    elif fileop.op == 'R' and match(fileop.source):
                        del tree[fileop.source]
                    elif fileop.op == 'deleteall':
                        tree = PathMap()
                match_trees[event.mark] = tree
                if (not tree) == match_all:
                    result.add(i)
        else:
            # Search for an absolute path in the trees. We only need to
            # remember if that path is or not in any given commit tree.
            # Note: match_all is always false here, no need to check.
            containing = set()
            for (i, event) in enumerate(self.chosen().events):
                if i > last_event: break
                if not isinstance(event, Commit): continue
                contains = event.has_parents() \
                        and event.parents()[0].mark in containing
                for fileop in event.fileops:
                    if fileop.op == 'M' and fileop.path == match:
                        contains = True
                    elif fileop.op in ('C', 'R') and fileop.target == match:
                        contains = True
                    elif fileop.op == 'D' and fileop.path == match:
                        contains = False
                    elif fileop.op == 'R' and fileop.source == match:
                        contains = False
                    elif fileop.op == 'deleteall':
                        contains = False
                if contains:
                    containing.add(event.mark)
                    result.add(i)
        return result
    #
    # Helpers
    #
    def report_select(self, line, method, optargs=()):
        "Generate a repository report on all objects with a specified method."
        if not self.chosen():
            complain("no repo has been chosen.")
            return
        with RepoSurgeon.LineParse(line, capabilities=["stdout"]) as parse:
            if self.selection is None and parse.line.strip():
                parse.line = self.set_selection_set(parse.line)
            elif self.selection is None:
                self.selection = self.chosen().all()
            for i, event in self.selected():
                if hasattr(event, method):
                    summary = getattr(event, method)(*((parse, i,)+optargs))
                    if summary:
                        parse.stdout.write(summary + "\n")
    @staticmethod
    def pop_token(line):
        "Grab a whitespace-delimited token from the front of the line."
        tok = ""
        line = line.lstrip()
        while True:
            if not line or line[0].isspace():
                break
            else:
                tok += line[0]
                line = line[1:]
        line = line.lstrip()
        return (tok, line)
    def edit(self, selection, line):
        # Mailboxize and edit the non-blobs in the selection
        # Assumes that self.chosen() and selection are not None
        editor = line.strip() or os.getenv("EDITOR")
        if not editor:
            complain("you have not specified an editor and $EDITOR is not set")
            return
        # Special case: user selected a single blob
        if len(selection) == 1:
            singleton = self.chosen()[selection[0]]
            if isinstance(singleton, Blob):
                def find_successor(event, path):
                    here = []
                    for child in event.children():
                        for fileop in child.fileops:
                            if fileop.op == "M" and fileop.path == path:
                                here.append(child.mark)
                        here += find_successor(child, path)
                    return here 
                for event in self.chosen().commits():
                    for fileop in event.fileops:
                        if fileop.op == 'M' and fileop.ref == singleton.mark:
                            if len(find_successor(event, fileop.path)) > 0:
                                complain("beware: not the last 'M %s' on its branch" % fileop.path)
                            break
                os.system(editor + " " + singleton.materialize())
                return
            # Fall through
        (tfdesc, tfname) = tempfile.mkstemp()
        assert tfdesc > -1    # pacify pylint
        try:
            with open(tfname, "wb") as tfp:
                for i in selection:
                    event = self.chosen()[i]
                    if hasattr(event, "email_out"):
                        tfp.write(event.email_out([], i))
        except IOError:
            raise Recoverable("write of editor tempfile failed")
        if os.system(editor + " " + tfname):
            raise Recoverable("%s returned a failure status" % editor)
        else:
            self.do_mailbox_in("<" + tfname)
        # No try/finally here - we want the tempfile to survive on fatal error
        # because it might have megabytes of metadata edits in it.
        os.remove(tfname)

    def help_selection(self):
        print("""
A quick example-centered reference for selection-set syntax.

First, these ways of constructing singleton sets:

123        event numbered 123 (1-origin)
:345       event with mark 345
<456>      commit with fossil-ID 456 (probably a Subversion revsion)
<foo>      the tag named 'foo', or failing that the tip commmit of branch foo

You can select commits and tags by date, or by date and committer:

<2011-05-25>                  all commits and tags with this date
<2011-05-25!esr>              all with this date and committer
<2011-05-25T07:30:37Z>        all commits and tags with this date and time
<2011-05-25T07:30:37Z!esr>    all with this date and time and committer
<2011-05-25T07:30:37Z!esr#2>  event #2 (1-origin) in the above set

More ways to construct event sets:

/foo/      all commits and tags containing the string 'foo' in text or metadata
           suffix letters: a=author, b=branch, c=comment in commit or tag,
                           C=committer, r=committish, p=text, t=tagger, n=name,
                           B=blob content in blobs.
[foo]      all commits and blobs touching the file named 'foo'.
=C         all commits
=H         all head (branch tip) commits
=T         all tags
=B         all blobs
=R         all resets
=P         all passthroughs
=O         all orphan (parentless) commits
=M         all merge commits
=F         all fork (multiple-child) commits
=L         all commits with unclean multi-line comments

You can compose sets as follows:

:123,<foo>     the event marked 123 and the event referenced by 'foo'.
:123..<foo>    the range of events from mark 123 to the reference 'foo'

Sets can be composed with | (union) and & (intersection). | has lower
precedence than &, but set expressions can be grouped with { }. Postfixing
a ? to a selection expression widens it to include all immediate neighbors
of the selection; you can do this repeatedly for effect.
""")

    def help_syntax(self):
        print("""
Commands are distinguished by a command keyword.  Most take a selection set
immediately before it; see 'help selection' for details.  Some
commands take additional modifier arguments after the command keyword.

Most report-generation commands support output redirection. When
arguments for these are parsed, any argument beginning with '>' is
extracted and interpreted as the name of a file to which command
output should be redirected.  Any remaining arguments are available to
the command logic.

Some commands support input redirection. When arguments for these are
parsed, any argument beginning with '<' is extracted and interpreted
as the name of a file from which command output should be taken.  Any
remaining arguments are available to the command logic.
""")
            
    ##
    ## Command implementation begins here
    ##
    #
    # On-line help and instrumentation
    #
    def help_help(self):
        print("Show help for a command. Follow with space and the command name.")
    def help_verbose(self):
        print("""
Without an argument, this command requests a report of the verbosity
level.  'verbose 1' enables progress messages, 'verbose 0' disables
them. Higher levels of verbosity are available but intended for
developers only.
""")
    def do_verbose(self, line):
        global verbose
        if line:
            try:
                verbose = int(line)
            except ValueError:
                complain("verbosity value must be an integer")
        if not line or verbose:
            announce("verbose %d" % verbose)

    def help_quiet(self):
        print("""
Without an argument, this command requests a report of the quiet
boolean; with the argument 'on' or 'off' it is changed.  When quiet is
on, time-varying report fields which would otherwise cause spurious
failures in regression testing are suppressed.
""")
    def do_quiet(self, line):
        global quiet
        if line:
            if line == "on":
                quiet = True
            elif line == "off":
                quiet = False
        if not line:
            announce("quiet %s" % ("on" if quiet else "off"))

    def do_echo(self, line):
        "Set or clear echoing commands before processing."
        try:
            self.echo = int(line)
        except ValueError:
            announce("echo value must be an integer")
        if verbose:
            announce("echo %d" % self.echo)

    def help_resolve(self):
        print("""
Does nothing but resolve a selection-set expression
and report the resulting event-number set to standard
output. Implemented mainly for regression testing, but may be useful
for exploring the selection-set language.
""")
    def do_resolve(self, _line):
        "Display the set of event numbers generated by a selection set."
        if self.selection is None:
            print("No selection")
        elif isinstance(self.selection, list):
            print([x+1 for x in self.selection])
        else:
            complain("resolve didn't expect a selection of %s" % self.selection)

    def help_names(self):
        print("""
List all known symbolic names of branches and tags. Supports > redirection.
""")
    def do_names(self, line):
        with RepoSurgeon.LineParse(line, capabilities=["stdout"]) as parse:
            branches = list(self.chosen().branchset())
            branches.sort()
            for branch in branches:
                parse.stdout.write("branch %s\n" % branch)
            for event in self.chosen():
                if isinstance(event, Tag):
                    parse.stdout.write("tag    %s\n" % event.name)

    def do_script(self, line):
        "Read and execute commands from a named file."
        if not line:
            complain("script requires a file argument")
            return
        try:
            self.callstack.append(line.split())
            with open(self.callstack[-1][0], "rb") as scriptfp:
                while True:
                    scriptline = scriptfp.readline()
                    if not scriptline:
                        break
                    # Handle multiline commands
                    while scriptline.endswith("\\\n"):
                        scriptline = scriptline[:-2] + scriptfp.readline()
                    # Simulate shell here-document processing
                    if '<<' not in scriptline:
                        heredoc = None
                    else:
                        (scriptline, terminator) = scriptline.split("<<")
                        heredoc = tempfile.NamedTemporaryFile(mode="w",
                                                              delete=False)
                        while True:
                            nextline = scriptfp.readline()
                            if nextline == '':
                                break
                            elif nextline == terminator:
                                break
                            else:
                                heredoc.write(nextline)
                        heredoc.close()
                        # Note: the command must accept < redirection!
                        scriptline += "<" + heredoc.name
                    # End of heredoc simulation
                    for i in range(len(self.callstack[-1])):
                        scriptline = scriptline.replace('$' + str(i), self.callstack[-1][i])
                    scriptline =  scriptline.replace('$$', str(os.getpid()))
                    self.onecmd(self.precmd(scriptline))
                    if heredoc:
                        os.remove(heredoc.name)
            self.callstack.pop()
        except IOError as e:
            complain("script failure on '%s': %s" % (line, e))

    def do_history(self, line):
        "Dump your command list from this session so far."
        for line in self.history:
            print(line)

    def do_coverage(self, unused):
        "Display the coverage-case set (developer instrumentation)."
        assert unused is not None   # pacify pylint
        if not self.chosen():
            complain("no repo has been chosen.")
            return
        for e in self.chosen().commits():
            e.fileop_dump()
        sys.stdout.write("Case coverage: %s\n" % sorted(self.chosen().case_coverage))

    def help_index(self):
        print("""
Display four columns of info on selected objects: their number, their
type, the associate mark (or '-' if no mark) and a summary field
varying by type.  For a branch or tag it's the reference; for a commit
it's the commit branch; for a blob it's the repository path of the
file in the blob.  Supports > redirection.
""")
    def do_index(self, line):
        "Generate a summary listing of objects."
        if not self.chosen():
            complain("no repo has been chosen.")
            return
        # We could do all this logic using report_select() and index() methods
        # in the objects, but that would have two disadvantages.  First, we'd
        # get a default-set computation we don't want.  Second, for this
        # function it's helpful to have the method strings close together so
        # we can maintain columnation.
        if self.selection is None:
            self.selection = [n for n, o1 in enumerate(self.chosen()) if not isinstance(o1, Blob)]
        with RepoSurgeon.LineParse(line, capabilities=["stdout"]) as parse:
            for i, event in self.selected():
                if isinstance(event, Blob):
                    parse.stdout.write("%6d blob   %6s    %s\n" % (i+1, event.mark," ".join(event.paths())))
                    continue
                if isinstance(event, Commit):
                    parse.stdout.write("%6d commit %6s    %s\n" % (i+1, event.mark or '-', event.branch)) 
                    continue
                if isinstance(event, Tag):
                    parse.stdout.write("%6d tag    %6s    %4s\n" % (i+1, event.committish, repr(event.name),)) 
                    continue
                if isinstance(event, Reset):
                    parse.stdout.write("%6d branch %6s    %s\n" % (i+1, event.committish or '-', event.ref)) 
                    continue
                else:
                    parse.stdout.write("?      -      %s\n" % (event,)) 
    def help_profile(self):
        print("""
Enable profiling. Must be one of the initial command-line arguments, and
gathers statistics only on code executed via '-'.
""")
    def do_profile(self, line):
        "Enable profiling."
        assert line is not None # Pacify pylint
        self.profile_log = line
        announce("profiling enabled.")

    def help_timing(self):
        print("""
Report phase-timing results from repository analysis.
""")
    def do_timing(self, _line):
        "Report repo-analysis times."
        total = self.repo.timings[-1][1] - self.repo.timings[0][-1]
        commit_count = sum(1 for _ in self.repo.commits())
        if self.repo.fossil_count is None:
            print("        commits: %d" % commit_count)
        else:
            print("        commits: %d (from %d)" % (commit_count, self.repo.fossil_count))
        for (i, (phase, _interval)) in enumerate(self.repo.timings):
            if i > 0:
                interval = self.repo.timings[i][1] - self.repo.timings[i-1][1]
                print("%15s: %.3f (%2.2f%%)" % (phase,
                                              interval,
                                              (interval * 100)/total))
        print("          total: %.3f (%d/sec)" % (total, int((self.repo.fossil_count or commit_count))/total))

    #
    # Information-gathering
    #
    def help_stats(self):
        print("""
Report size statistics and import/export method information of the
currently chosen repository. Supports > redirection.
""")
    def do_stats(self, line):
        "Report information on repositories."
        with RepoSurgeon.LineParse(line, capabilities=["stdout"]) as parse:
            if not parse.line:
                parse.line = self.chosen().name
                if parse.line is None:
                    complain("no repo has been chosen.")
                    return
            for name in parse.tokens():
                repo = self.repo_by_name(name)
                if repo is None:
                    raise Recoverable("no such repo as %s" % name)
                else:
                    def count(otype):
                        return sum(1 for x in repo.events if isinstance(x,otype))
                    parse.stdout.write("%s: %.0fK, %d events, %d blobs, %d commits, %d tags, %d resets, %s.\n" % \
                          (repo.name, repo.size() / 1000.0, len(repo),
                           count(Blob), count(Commit), count(Tag), count(Reset),
                           rfc3339(repo.readtime)))
                    if repo.sourcedir:
                        parse.stdout.write("  Loaded from %s\n" % repo.sourcedir)
                    #if repo.vcs:
                    #    parse.stdout.write(str(repo.vcs) + "\n")

    def help_list(self):
        print("""
Display commits in a human-friendly format; the first column is raw
event numbers, the second a timestamp in local time. If the repository
has fossil IDs, they will be displayed in the third column. The
leading portion of the comment follows. Supports > redirection.
""")
    def do_list(self, line):
        "Generate a human-friendly listing of objects."
        self.report_select(line, "lister", (screenwidth(),))

    def help_tip(self):
        print("""
Display the branch tip names associated with commits in the selection
set.  These will not necessarily be the same as their branch fields
(which will often be tag names if the repo contains either annotated
or lightweight tags).

If a commit is at a branch tip, its tip is its branch name.  If it has
only one child, its tip is the child's tip.  If it has multiple children,
then if there is a child with a matching branch name its tip is the
child's tip.  Otherwise this function throws a recoverable error.

Supports > redirection.
""")
    def do_tip(self, line):
        "Generate a human-friendly listing of objects."
        self.report_select(line, "tip", (screenwidth(),))

    def help_tags(self):
        print("""
Display tags and resets: three fields, an event number and a type and a name.
Branch tip commits associated with tags are also displayed with the type
field 'commit'. Supports > redirection.
""")
    def do_tags(self, line):
        "Generate a human-friendly listing of tags and resets."
        self.report_select(line, "tags", (screenwidth(),))
    def help_sizes(self):
        print("""
Print a report on data volume per branch; takes a selection set,
defaulting to all events. The numbers tally the size of uncompressed
blobs, commit and tag comments, and other metadata strings (a blob is
counted each time a commit points at it).  Not an exact measure of
storage size: intended mainly as a way to get information on how to
efficiently partition a repository that has become large enough to be
unwieldy.
""")
    def do_sizes(self, line):
        "Report branch relative sizes."
        if not self.chosen():
            complain("no repo has been chosen.")
            return
        if self.selection is None:
            self.selection = self.chosen().all()
        sizes = {}
        with RepoSurgeon.LineParse(line, capabilities=["stdout"]) as parse:
            for _, event in self.selected():
                if isinstance(event, Commit):
                    if event.branch not in sizes:
                        sizes[event.branch] = 0
                    sizes[event.branch] += len(str(event.committer))
                    for author in event.authors:
                        sizes[event.branch] += len(str(author))
                    sizes[event.branch] += len(event.comment)
                    for fileop in event.fileops:
                        if fileop.op == "M":
                            sizes[event.branch] += self.repo.objfind(fileop.ref).size
                elif isinstance(event, Tag):
                    commit = event.target
                    if commit.branch not in sizes:
                        sizes[commit.branch] = 0
                    sizes[commit.branch] += len(str(event.tagger))
                    sizes[commit.branch] += len(event.comment)
            total = sum(sizes.itervalues())
            def sz(n, s):
                parse.stdout.write("%9d\t%2.2f%%\t%s\n" \
                                   % (n, (n * 100.0) / total, s))
            for key in sorted(sizes.iterkeys()):
                sz(sizes[key], key)
            sz(total, "")
    def help_lint(self):
        print("""
Look for DAG and metadata configurations that may indicate a
problem. Presently checks for: (1) Mid-branch deletes, (2)
disconnected commits, (3) parentless commits, (4) the existance of
multiple roots, (5) committer and author IDs that don't look
well-formed as DVCS IDs, (6) multiple child links with identical
branch labels descending from the same commit.
""")
    def do_lint(self, line):
        "Look for lint in a repo."
        if not self.chosen():
            complain("no repo has been chosen.")
            return
        if self.selection is None:
            self.selection = self.chosen().all()
        with RepoSurgeon.LineParse(line, capabilities=["stdout"]) as parse:
            unmapped = re.compile("[^@]*$|[^@]*@" + str(self.chosen().uuid) + "$")
            shortset = set()
            deletealls = set()
            disconnected = set()
            roots = set()
            emptyaddr = set()
            emptyname = set()
            badaddress = set()
            for _, event in self.selected(Commit):
                if event.fileops and event.fileops[0].op == 'deleteall' and event.has_children():
                    deletealls.add("on %s at %s" % (event.branch, event.id_me()))
                if not event.has_parents() and not event.has_children():
                    disconnected.add(event.id_me())
                elif not event.has_parents():
                    roots.add(event.id_me())
                if unmapped:
                    for person in [event.committer] + event.authors:
                        if unmapped.match(person.email):
                            shortset.add(person.email)
                if not event.committer.email:
                    emptyaddr.add(event.id_me())
                elif "@" not in event.committer.email:
                    badaddress.add(event.id_me())
                for author in event.authors:
                    if not author.email:
                        emptyaddr.add(event.id_me())
                    elif "@" not in author.email:
                        badaddress.add(event.id_me())
                if not event.committer.name:
                    emptyname.add(event.id_me())
                for author in event.authors:
                    if not author.name:
                        emptyname.add(event.id_me())

            for item in deletealls:
                parse.stdout.write("mid-branch delete: %s\n" % item)
            for item in disconnected:
                parse.stdout.write("disconnected commit: %s\n" % item)
            if len(roots) > 1:
                parse.stdout.write("multiple root commits: %s\n" % roots)
            for item in shortset:
                parse.stdout.write("unknown shortname: %s\n" % item)
            for item in emptyaddr:
                parse.stdout.write("empty address: %s\n" % item)
            for item in emptyname:
                parse.stdout.write("empty name: %s\n" % item)
            for item in badaddress:
                parse.stdout.write("email address missing @: %s\n" % item)
    #
    # Housekeeping
    #
    def help_prefer(self):
        print("""
Report or set (with argument) the preferred type of repository. With
no arguments, describe capabilities of all supported systems. With
an argument (which must be the name of a supported system) this has
two effects:

First, if there are multiple repositories in a directory you do a read
on, reposurgeon will read the preferred one (otherwise it will
complain that it can't choose among them).

Secondly, if there is a selected repo, this will change its type.
This means that you do a write to a directory, it will build a repo of
the preferred type rather than its original type (if it had one).

If no preferred type has been explicitly selected, reading in a
repository (but not a fast-import stream) will implicitly set it
to the type of that repository.
""")
    def do_prefer(self, line):
        "Report or select the preferred repository type."
        if not line:
            for vcs in vcstypes:
                print(vcs)
            if any(ext.visible for ext in extractors):
                print("Other systems supported for read only: %s\n" \
                      % " ".join(ext.name for ext in extractors if ext.visible))
        else:
            for repotype in vcstypes + extractors:
                if line.lower() == repotype.name:
                    self.preferred = repotype
                    if self.chosen():
                        self.chosen().vcs = self.preferred
                    break
            else:
                complain("known types are %s." % " ".join([x.name for x in vcstypes] + [x.name for x in extractors if x.visible]))
        if verbose:
            if not self.preferred:
                print("No preferred type has been set.")
            else:
                print("%s is the preferred type." % self.preferred.name)

    def help_choose(self):
        print("""
Choose a named repo on which to operate.  The name of a repo is
normally the basename of the directory or file it was loaded from, but
repos loaded from standard input are 'unnamed'. The program will add
a disambiguating suffix if there have been multiple reads from the
same source.

With no argument, lists the names of the currently stored repositories
and their load times.  The second column is '*' for the currently selected
repository, '-' for others.
""")
    def do_choose(self, line):
        "Choose a named repo on which to operate."
        if self.selection is not None:
            raise Recoverable("choose does not take a selection set")
        if not self.repolist:
            if verbose > 0:
                complain("no repositories are loaded.")
                return
        self.repolist.sort(key=operator.attrgetter("name"))
        if not line:
            for repo in self.repolist:
                status =  '-'
                if self.chosen() and repo == self.chosen():
                    status = '*'
                if not quiet:
                    sys.stdout.write(rfc3339(repo.readtime) + " ")
                sys.stdout.write("%s %s\n" % (status, repo.name))
        else:
            if line in self.reponames():
                self.choose(self.repo_by_name(line))
                if verbose:
                    self.do_stats(line)
            else:
                complain("no such repo as %s" % line)

    def help_drop(self):
        print("""
Drop a repo named by the argument from reposurgeon's list, freeing the memory
used for its metadata and deleting on-disk blobs. With no argument, drops the
currently chosen repo.
""")
    def do_drop(self, line):
        "Drop a repo from reposurgeon's list."
        if not self.reponames():
            if verbose:
                complain("no repositories are loaded.")
                return
        if self.selection is not None:
            raise Recoverable("drop does not take a selection set")
        if not line:
            line = self.chosen().name
        if line in self.reponames():
            if line == self.chosen().name:
                self.unchoose()
            holdrepo = self.repo_by_name(line)
            holdrepo.cleanup()
            self.remove_by_name(line)
            del holdrepo
        else:
            complain("no such repo as %s" % line)
        if verbose:
            # Emit listing of remaining repos
            self.do_choose('')

    def help_rename(self):
        print("""
Rename the currently chosen repo; requires an argument.  Won't do it
if there is already one by the new name.
""")
    def do_rename(self, line):
        "Rename a repository."
        if self.selection is not None:
            raise Recoverable("rename does not take a selection set")
        if line in self.reponames():
            complain("there is already a repo named %s." % line)
        elif not self.chosen():
            complain("no repository is currently chosen.")
        else:
            self.chosen().rename(line)

    def help_preserve(self):
        print("""
Add (presumably untracked) files or directories to the repo's list of
paths to be restored from the backup directory after a rebuild. Each
argument, if any, is interpreted as a pathname.  The current preserve
list is displayed afterwards.
""")
    def do_preserve(self, line):
        "Add files and subdirectories to the preserve set."
        if self.selection is not None:
            raise Recoverable("preserve does not take a selection set")
        for filename in line.split():
            self.chosen().preserve(filename)
        announce("preserving %s." % list(self.chosen().preservable()))

    def help_unpreserve(self):
        print("""
Remove (presumably untracked) files or directories to the repo's list
of paths to be restored from the backup directory after a
rebuild. Each argument, if any, is interpreted as a pathname.  The
current preserve list is displayed afterwards.
""")
    def do_unpreserve(self, line):
        "Remove files and subdirectories from the preserve set."
        if self.selection is not None:
            raise Recoverable("unpreserve does not take a selection set")
        for filename in line.split():
            self.chosen().unpreserve(filename)
        announce("preserving %s." % list(self.chosen().preservable()))

    #
    # Serialization and de-serialization.
    #
    def help_read(self):
        print("""
A read command with no arguments is treated as 'read .', operating on the
current directory.
 
With a directory-name argument, this command attempts to read in the
contents of a repository in any supported version-control system under
that directory.

If input is redirected from a plain file, it will be read in as a
fast-import stream or Subversion dump, whichever it is.

With an argument of '-', this command reads a fast-import stream or
Subversion dump from standard input (this will be useful in filters
constructed with command-line arguments).
""")
    def do_read(self, line):
        "Read in a repository for surgery."
        if self.selection is not None:
            raise Recoverable("read does not take a selection set")
        with RepoSurgeon.LineParse(line, capabilities=["stdin"]) as parse:
            if parse.redirected:
                repo = Repository()
                repo.fast_import(parse.stdin, parse.options, progress=(verbose==1 and not quiet))
            # This is slightly asymmetrical with the write side, which
            # interprets an empty argument list as '-'
            elif not parse.line or parse.line == '.':
                repo = read_repo(os.getcwd(), parse.options, self.preferred)
            elif os.path.isdir(parse.line):
                repo = read_repo(parse.line, parse.options, self.preferred)
            else:
                raise Recoverable("read no longer takes a filename argument - use < redirection instead")
        self.repolist.append(repo)
        self.choose(repo)
        if self.chosen():
            if self.chosen().vcs:
                self.preferred = self.chosen().vcs
            name = self.uniquify(os.path.basename(self.chosen().sourcedir or parse.infile or "unnamed"))
            self.chosen().rename(name)
        if verbose:
            self.do_choose('')

    def help_write(self):
        print("""
Dump a fast-import stream representing selected events to standard
output (if second argument is empty or '-') or via redirect to a file.
Alternatively, if there ia no redirect and the argument names a
directory the repository is rebuilt into that directory, with any
selection set argument being ignored; if that target directory is
nonempty its contents are backed up to a save directory.

Property extensions will be omitted if the importer for the
selected repository type cannot digest them.
""")
    def do_write(self, line):
        "Stream out the results of repo surgery."
        if self.chosen() is None:
            complain("no repo has been chosen.")
            return
        if self.selection is None:
            self.selection = self.chosen().all()
        if line:
            line = os.path.expanduser(line)
        with RepoSurgeon.LineParse(line, capabilities=["stdout"]) as parse:
            # This is slightly asymmetrical with the read side, which
            # interprets an empty argument list as '.'
            if parse.redirected or not parse.line:
                self.chosen().fast_export(self.selection, parse.stdout, parse.options, progress=(verbose==1 and not quiet), target=self.preferred)
            elif os.path.isdir(parse.line):
                rebuild_repo(self.chosen(), parse.line, parse.options, self.preferred)
            else:
                raise Recoverable("write no longer takes a filename argument - use > redirection instead")

    def help_inspect(self):
        print("""
Dump a fast-import stream representing selected events to standard output.
Just like a write, except (1) the progress meter is disabled, and (2) there
is an identifying header before each event dump.  Supports > redirection.
""")
    def do_inspect(self, line):
        "Dump raw events."
        if self.chosen() is None:
            complain("no repo has been chosen.")
            return
        with RepoSurgeon.LineParse(line, capabilities=["stdout"]) as parse:
            if self.selection is None and parse.line.strip():
                parse.line = self.set_selection_set(parse.line)
            elif self.selection is None:
                self.selection = self.chosen().all()
            for ei, event in self.selected():
                header = "Event %s, " % repr(ei+1)
                header = header[:-2]
                header += " " + ((72 - len(header)) * "=") + "\n"
                parse.stdout.write(header)
                if isinstance(event, Commit):
                    parse.stdout.write(event.dump())
                else:
                    parse.stdout.write(str(event))

    def help_strip(self):
        print("""
Replace the blobs in the selected repository with self-identifying stubs;
and/or strip out topologically uninteresting commits.  The modifiers for
this are 'blobs' and 'reduce' respectively; the default is 'blobs'.

This is intended for producing reduced test cases from large repositories.
""")
    def do_strip(self, line):
        "Drop content to produce a reduced test case."
        repo = self.chosen()
        if repo is None:
            complain("no repo has been chosen.")
            return
        if not line:
            striptypes = ["blobs"]
        else:
            striptypes = line.split()
        if "blobs" in striptypes:
            for event in repo.events:
                if isinstance(event, Blob):
                    event.set_content("Blob at %s\n" % event.mark)
        if "reduce" in striptypes:
            interesting = set([])
            for event in repo.events:
                if isinstance(event, Tag):
                    interesting.add(event.committish)
                elif isinstance(event, Reset):
                    interesting.add(event.ref)
                elif isinstance(event, Commit):
                    if len(event.children()) != 1 or len(event.parents()) != 1:
                        interesting.add(event.mark)
                    else:
                        for op in event.fileops:
                            if op.op != 'M' or repo.ancestor_count(event.parents()[0], op.path) == 0:
                                interesting.add(event.mark)
                                break
            neighbors = set()
            for event in repo.events:
                if isinstance(event, Commit) and event.mark in interesting:
                    neighbors |= set(event.parent_marks())
                    neighbors |= set(event.child_marks())
            interesting |= neighbors
            repo.delete([i for i in range(len(repo.events)) \
                         if isinstance(event, Commit) and event.mark not in interesting])

    def help_graph(self):
        print("""
Dump a graph representing selected events to standard output in DOT markup
for graphviz.
""")
    def do_graph(self, line):
        "Dump a commit graph."
        if self.chosen() is None:
            complain("no repo has been chosen.")
            return
        if self.selection is None:
            self.selection = self.chosen().all()
        with RepoSurgeon.LineParse(line, capabilities=["stdout"]) as parse:
            parse.stdout.write("digraph {\n")
            for _, event in self.selected():
                if isinstance(event, Commit):
                    for parent in event.parent_marks():
                        if self.chosen().find(parent) in self.selection:
                            parse.stdout.write('\t%s -> %s;\n' \
                                               % (parent[1:], event.mark[1:]))
                if isinstance(event, Tag):
                    parse.stdout.write('\t"%s" -> "%s" [style=dotted];\n' \
                                       % (event.name, event.committish[1:], ))
                    parse.stdout.write('\t{rank=same; "%s"; "%s"}\n' \
                                       % (event.name, event.committish[1:], ))
            for _, event in self.selected():
                if isinstance(event, Commit):
                    summary = cgi.escape(event.comment.split('\n')[0][:42])
                    cid = event.mark
                    if event.fossil_id:
                        cid = event.showfossil() + " &rarr; " + cid
                    parse.stdout.write('\t%s [shape=box,width=5,label=<<table cellspacing="0" border="0" cellborder="0"><tr><td><font color="blue">%s</font></td><td>%s</td></tr></table>>];\n' \
                                       % (event.mark[1:], cid, summary))
                    if all(event.branch != child.branch for child in event.children()):
                        parse.stdout.write('\t"%s" [shape=oval,width=2];\n' % event.branch)
                        parse.stdout.write('\t"%s" -> "%s" [style=dotted];\n' % (event.mark[1:], event.branch))
                if isinstance(event, Tag):
                    summary = cgi.escape(event.comment.split('\n')[0][:32])
                    parse.stdout.write('\t"%s" [label=<<table cellspacing="0" border="0" cellborder="0"><tr><td><font color="blue">%s</font></td><td>%s</td></tr></table>>];\n' \
                                       % (event.name, event.name, summary))
            parse.stdout.write("}\n")

    def help_rebuild(self):
        print("""
Rebuild a repository from the state held by reposurgeon.  The argument
specifies the target directory in which to do the rebuild; if the
repository read was from a repo directory (and not a git-import stream), it
defaults to that directory.  If the target directory is nonempty
its contents are backed up to a save directory.
""")
    def do_rebuild(self, line):
        "Rebuild a repository from the edited state."
        if self.chosen() is None:
            complain("no repo has been chosen.")
            return
        if self.selection is not None:
            raise Recoverable("rebuild does not take a selection set")
        with RepoSurgeon.LineParse(line, capabilities=["stdout"]) as parse:
            rebuild_repo(self.chosen(), parse.line, parse.options, self.preferred)

    #
    # Editing commands
    #
    def help_mailbox_out(self):
        print("""
Emit a mailbox file of messages in RFC822 format representing the
contents of repository metadata. Takes a selection set; members of the set
other than commits, annotated tags, and passthroughs are ignored (that
is, presently, blobs and resets). Supports > redirection.
""")
    def do_mailbox_out(self, line):
        "Generate a mailbox file representing object metadata."
        self.report_select(line, "email_out")

    def help_mailbox_in(self):
        print("""
Accept on standard input a mailbox file of messages in RFC822 format
representing the contents of the metadata in selected commits and
annotated tags. Takes no selection set. Takes < redirection.

Users should be aware that modifying an Event-Number field will change
which event the update from that message is applied to.  This is
unlikely to have good results.

If the Event-Number field is absent, the mailbox_in logic will
attempt to match the commit or tag first by Fossil-ID, then by a unique
committer ID and timestamp pair.

If output is redirected and the modifier 'changed' appears, a minimal
set of modifications actually made is written to the output file.
""")
    def do_mailbox_in(self, line):
        "Accept a mailbox file representing object metadata and update from it."
        with RepoSurgeon.LineParse(line, capabilities=["stdin","stdout"]) as parse:
            update_list = []
            while True:
                msg = RepoSurgeonEmail.readmsg(parse.stdin)
                if not msg:
                    break
                update_list.append(email.message_from_string(msg))
        # First, a validation pass
        attribution_map = {}
        name_map = {}
        attribution_counts = collections.Counter()
        for commit in self.chosen().commits():
            stamp = commit.action_stamp()
            attribution_map[stamp] = commit
            attribution_counts[stamp] += 1
        for event in self.chosen().events:
            if isinstance(event, Tag):
                if event.name:
                    name_map[event.name] = event
                if event.tagger:
                    stamp = event.tagger.action_stamp()
                    attribution_map[stamp] = event
                    attribution_counts[stamp] += 1
        fossil_map = {}
        for commit in self.chosen().commits():
            if commit.fossil_id:
                fossil_map[commit.fossil_id] = commit
        events = []
        errors = 0
        for (i, message) in enumerate(update_list):
            event = None
            if "Event-Number" in message:
                try:
                    eventnum = int(message["Event-Number"]) - 1
                except ValueError:
                    complain("event number garbled in update %d" % (i+1,))
                    errors += 1
                if eventnum < 0 or eventnum >= len(self.chosen()):
                    complain("event number %d out of range in update %d" \
                                      % (eventnum, i+1))
                    errors += 1
                event = self.chosen()[eventnum]
            elif "Fossil-ID" in message:
                try:
                    event = fossil_map[message["Fossil-ID"]]
                except KeyError:
                    complain("no commit matches fossil %s" \
                                      % message["Fossil-ID"])
                    errors += 1
            elif "Event-Mark" in message:
                event = self.chosen().objfind(message["Event-Mark"])
                if not event:
                    complain("no commit matches mark %s" \
                             % message["Event-Mark"])
                    errors += 1
            elif "Committer" in message and "Committer-Date" in message:
                blank = Commit()
                blank.committer = Attribution()
                blank.email_in(message)
                stamp = blank.action_stamp()
                try:
                    event = attribution_map[stamp]
                except KeyError:
                    complain("no commit matches stamp %s" % stamp)
                    errors += 1
                if attribution_counts[stamp] > 1:
                    complain("multiple events match %s" % stamp)
                    errors += 1
            elif "Tagger" in message and "Tagger-Date" in message:
                blank = Tag()
                blank.tagger = Attribution()
                blank.email_in(message)
                stamp = blank.tagger.action_stamp()
                try:
                    event = attribution_map[stamp]
                except KeyError:
                    complain("no tag matches stamp %s" % stamp)
                    errors += 1
                if attribution_counts[stamp] > 1:
                    complain("multiple events match %s" % stamp)
                    errors += 1
            elif "Tag-Name" in message:
                blank = Tag()
                blank.tagger = Attribution()
                blank.email_in(message)
                try:
                    event = name_map[blank.name]
                except KeyError:
                    complain("no tag matches name %s" % blank.name)
                    errors += 1
            else:
                complain("no commit matches update %d:\n%s" % (i+1, message))
                errors += 1
            if event is not None and not hasattr(event, "email_in"):
                try:
                    complain("event %d cannot be modified"%(event.index()+1))
                except AttributeError:
                    complain("event cannot be modified")
                errors += 1
            # Always append, even None, to stay in sync with update_list
            events.append(event)
        if errors > 0:
            raise Recoverable("%d errors in metadata updates" % errors)
        # Now apply the updates
        changers = []
        for (event, update) in zip(events, update_list):
            if event.email_in(update):
                changers.append(update)
        if verbose:
            if not changers:
                announce("no events modified.")
            else:
                announce("%d events modified." % len(changers))
        if parse.stdout != sys.stdout:
            if "changed" in parse.line:
                for update in changers:
                    parse.stdout.write(RepoSurgeonEmail.Divider + "\n" + update.as_string(unixfrom=False))

    def help_edit(self):
        print("""
Report the selection set of events to a tempfile as mailbox_out does,
call an editor on it, and update from the result as mailbox_in does.
If you do not specify an editor name as second argument, it will be
taken from the $EDITOR variable in your environment.

Normally this command ignores blobs because mailbox_out does.
However, if you specify a selection set consisting of a single
blob, your editor will be called on the blob file.
""")
    def do_edit(self, line):
        "Edit metadata interactively."
        if not self.chosen():
            complain("no repo is loaded")
            return
        if self.selection is None:
            self.selection = [n for n, o2 in enumerate(self.chosen()) \
                              if hasattr(o2, "email_out")]
        self.edit(self.selection, line)

    def help_filter(self):
        print("""
Run blobs, commit comments, or tag comments in the selection set
through the filter specified on the command line.

Attempting to specify a selection set including both blobs and
non-blobs (that is, commits or tags) throws an error. Inline content
in commits is filtered when the selection set contains (only) blobs
and the commit is within the range bounded by the earliest and latest
blob in the specification.

When filtering blobs, if the command line contains the magic cookie
'%PATHS%' it is replaced with a space-separated list of all paths
that reference the blob.

With --shell, the remainder of the line specifies a filter as a
shell command. Each blob or comment is presented to the filter on
standard input; the content is replaced with whatever the filter emits
to standard output.

With --regex, the remainder of the line is expected to be a Python
regular expression substitution written as /from/to/ with 'from' and
'to' being passed as arguments to the standard re.sub() function and
that applied to modify the content. Ordinarily only the first such
substitution is performed; putting 'g' after the slash replaces
globally, and a numeric literal before the g gives the maximum number
of substitutions to perform. Actually, any non-space character will
work as a delimiter in place of the /; this makes it easier to use
/ in patterns.

With --replace, the behavior is like --regexp but the expressions are
not interpreted as regular expressions. (This is slighly faster).
""")
    def do_filter(self, line):
        if not self.chosen():
            complain("no repo is loaded")
            return
        if not line:
            complain("no filter is specified")
            return
        if self.selection is None:
            complain("no selection")
            return
        blobs = any(isinstance(self.chosen().events[i], Blob)
                    for i in self.selection)
        nonblobs = any(not isinstance(self.chosen().events[i], Blob)
                       for i in self.selection)
        # Try to prevent user from shooting self in foot
        if blobs and nonblobs:
            complain("cannot filter blobs and nonblobs in same command")
        # If user is filtering blobs, filter all inlines within the range.
        if blobs:
            for ei in range(self.selection[0], self.selection[-1]):
                event = self.chosen().events[ei]            
                if isinstance(event, (Commit, Tag)):
                    for fileop in event.fileops:
                        if fileop.inline is not None:
                            self.selection.append(ei)
            self.selection.sort()
        class FilterCommand:
            def __init__(self, repo, filtercmd):
                "Initialize the filter from the command line."
                self.repo = repo
                self.filtercmd = None
                self.sub = None
                self.regex = None
                # Must not use LineParse here as it would try to strip options
                # in shell commands.
                if filtercmd.startswith('--shell'):
                    self.filtercmd = filtercmd[7:].lstrip()
                elif filtercmd.startswith('--regex') or filtercmd.startswith('--replace'):
                    firstspace = filtercmd.find(' ')
                    if firstspace == -1:
                        raise Recoverable("missing filter specification")
                    stripped = filtercmd[firstspace:].lstrip()
                    parts = stripped.split(stripped[0])
                    subflags = parts[-1]
                    if len(parts) != 4:
                        raise Recoverable("malformed filter specification")
                    elif parts[0]:
                        raise Recoverable("bad prefix '%s' on filter specification" % parts[0])
                    elif subflags and not re.match("[0-9]*g?", subflags):
                        raise Recoverable("unrecognized filter flags")
                    elif "%PATHS%" in filtercmd:
                        raise Recoverable("%PATHS% is not yet supported in regex filters")
                    else:
                        if not subflags:
                            subcount = 1
                        elif subflags == "g":
                            subcount = 0
                        else:
                            subcount = int(parts[-1])
                        if filtercmd.startswith('--regex'):
                            self.regex = re.compile(parts[1])	# optimization
                            self.sub = lambda s: self.regex.sub(parts[2],
                                                            s,
                                                            subcount)
                        elif filtercmd.startswith('--replace'):
                            self.sub = lambda s: s.replace(parts[1], 
                                                           parts[2],
                                                           subcount)
                else:
                    raise Recoverable("--shell or --regex is required")
            def do(self, is_file, content_or_file, pathsubst=""):
                "Perform the filter on string content or a file."
                (indesc, intmp) = tempfile.mkstemp(prefix=self.repo.subdir())
                (outdesc, outtmp) = tempfile.mkstemp(prefix=self.repo.subdir())
                if self.filtercmd:
                    if pathsubst:
                        filtercmd = self.filtercmd.replace("%PATHS%", pathsubst)
                    else:
                        filtercmd = self.filtercmd
                    try:
                        assert indesc > -1 and outdesc > -1    # pacify pylint
                        if not is_file:
                            with open(intmp, "wb") as wfp:
                                wfp.write(content_or_file)
                            return capture("%s <%s" % (filtercmd, intmp))
                        else:
                            do_or_die("%s <%s >%s" % (filtercmd, content_or_file, outtmp))
                            same = filecmp.cmp(content_or_file, outtmp, shallow=False)
                            if not same:
                                shutil.copyfile(outtmp, content_or_file)
                            return int(not same)
                    finally:
                        os.remove(intmp)
                        os.remove(outtmp)
                elif self.sub:
                    if is_file:
                        with open(content_or_file, "rb") as rfp:
                            content = rfp.read()
                        modified = self.sub(content)
                        if content != modified:
                            with open(content_or_file, "wb") as wfp:
                                wfp.write(modified)
                        return int(content != modified)
                    else:
                        return self.sub(content_or_file)
                else:
                    raise Recoverable("unknown mode in filter command")
        # Mainline of do_filter() continues:
        filterhook = FilterCommand(self.chosen(), line)
        with Baton(prompt="Filtering", enable=(verbose == 1)) as baton:
            altered = 0
            for _, event in self.selected():
                if isinstance(event, (Commit, Tag)):
                    if nonblobs:
                        oldcomment = event.comment
                        event.comment = filterhook.do(False, event.comment)
                        altered += (oldcomment != event.comment)
                    if blobs and isinstance(event, Commit):
                        for fileop in event.fileops:
                            if fileop.inline is not None:
                                oldinline = fileop.inline
                                fileop.inline = filterhook.do(False,
                                                              fileop.inline,
                                                              event.path)
                                altered += int(fileop.inline != oldinline)
                elif isinstance(event, Blob):
                    if not event.hasfile():
                        event.materialize()
                    altered += filterhook.do(True,
                                             event.blobfile(),
                                             " ".join(event.paths()))
                baton.twirl()
        announce("%d items modified." % altered)

    def help_squash(self):
        print("""
Combine a selection set of events; this may mean deleting them or
pushing their content forward or back onto a target commit just
outside the selection range, depending on policy flags.

The default selection set for this command is empty.  Blobs cannot be
directly affected by this command; they move or are deleted only when
removal of fileops associated with commits requires this.

""")
    def do_squash(self, line):
        "Squash events in the specified selection set."
        if not self.chosen():
            complain("no repo is loaded")
            return
        if self.selection is None:
            self.selection = []
        with RepoSurgeon.LineParse(line) as parse:
            self.chosen().squash(self.selection, parse.options)
    def help_delete(self):
        print("""
Delete a selection set of events.  The default selection set for this
command is empty.  Tags, resets, and passthroughs are deleted with no
side effects.  Blobs cannot be directly deleted with this command; they
are removed only when removal of fileops associated with commits requires this.

When a commit is deleted, what becomes of tags and fileops attached to
it is controlled by policy flags.  A delete is equivalent to a
squash with the --delete flag.
""")
    def do_delete(self, line):
        "Delete events in the specified selection set."
        if not self.chosen():
            complain("no repo is loaded")
            return
        if self.selection is None:
            self.selection = []
        with RepoSurgeon.LineParse(line) as parse:
            self.chosen().squash(self.selection, set(["--delete"]) | parse.options)

    def help_coalesce(self):
        print("""
Scan the selection set (defaulting to all) for runs of commits with
identical comments close to each other in time (this is a common form
of scar tissues in repository up-conversions from older file-oriented
version-control systems).  Merge these cliques by pushing their
fileops and tags up to the last commit, in order.

The optional second argument, if present, is a maximum time
separation in seconds; the default is 90 seconds.

With  the --debug option, show messages about mismatches.
""")
    def do_coalesce(self, line):
        "Coalesce events in the specified selection set."
        if not self.chosen():
            complain("no repo is loaded")
            return
        if self.selection is None:
            self.selection = self.chosen().all()
        with RepoSurgeon.LineParse(line) as parse:
            timefuzz = 90
            if parse.line:
                try:
                    timefuzz = int(parse.line)
                except ValueError:
                    raise Recoverable("time-fuzz value must be an integer")
            eligible = []
            # This is a crude search that ignores the repo graph structure;
            # properly speaking we should be chasing child links.  Screw
            # it; this operation only make sense for cleaning up
            # artifacts in linear stretches of history that have been
            # lifted from file-oriented VCSes like RCS and CVS.
            icthis, icnext = itertools.tee(self.selected(Commit))
            next(icnext, None)
            for (ithis, cthis), (_inext, cnext) in itertools.izip(icthis, icnext):
                if cthis.branch != cnext.branch:
                    if verbose >= DEBUG_DELETE or '--debug' in parse.options:
                        announce("branch mismatch at %s" % cnext.id_me())
                    continue
                elif cthis.comment != cnext.comment:
                    if verbose >= DEBUG_DELETE or '--debug' in parse.options:
                        announce("comment mismatch at %s" % cnext.id_me())
                    continue
                elif cthis.committer.email != cnext.committer.email:
                    if verbose >= DEBUG_DELETE or '--debug' in parse.options:
                        announce("committer email mismatch at %s" % cnext.id_me())
                    continue
                if cthis.committer.date.delta(cnext.committer.date) >= timefuzz:
                    if verbose >= DEBUG_DELETE or '--debug' in parse.options:
                        announce("time fuzz exceeded at %s" % cnext.id_me())
                    continue
                eligible.append(ithis)
            if verbose >= DEBUG_DELETE:
                announce("deletion set is %s" % [x+1 for x in eligible])
            self.chosen().squash(eligible, ("--coalesce",))

    def help_remove(self):
        print("""
From a specified commit, remove a specified fileop. The syntax is:

     remove OP [to COMMIT]

The selection set must be a singleton and the OP a file path - or, if
it does not match a path, a 1-origin numeric index.

If the to clause is present, the removed op is appended to the
commit specified by the following singleton selection set.

Note that this command does not attempt to scavenge blobs even if the
deleted fileop might be the only reference to them. This behavior may
change in a future release.
""")
    def do_remove(self, line):
        "Delete a fileop from a specified commit."
        if not self.chosen():
            complain("no repo is loaded")
            return
        if self.selection is None:
            self.selection = []
        if len(self.selection) != 1:
            complain("from selection must be a singleton")
            return
        event = self.chosen().events[self.selection[0]]
        if not isinstance(event, Commit):
            complain("from selection must be a commit")
        (opindex, line) = RepoSurgeon.pop_token(line)
        for (ind, op) in enumerate(event.fileops):
            if hasattr(op, "path") and getattr(op, "path") == opindex:
                break
            if hasattr(op, "source") and getattr(op, "source") == opindex:
                break
            if hasattr(op, "target") and getattr(op, "target") == opindex:
                break
        else:
            try:
                ind = int(opindex) - 1
            except (ValueError, IndexError):
                complain("invalid or missing fileop specification %s" % opindex)
                return
        target = None
        if line:
            (verb, line)  = RepoSurgeon.pop_token(line)
            # This feature is experimental and not documented
            if verb == 'to':
                self.set_selection_set(line)
                if len(self.selection) != 1:
                    raise Recoverable("remove to requires a singleton selection")
                target = self.selection[0]
        try:
            removed = event.fileops.pop(ind)
            if target:
                self.chosen().events[target].fileops.append(removed)
        except IndexError:
            complain("out-of-range fileop index %s" % ind)
            return

    def help_renumber(self):
        print("""
Renumber the marks in a repository, from :1 up to <n> where <n> is the
count of the last mark. Just in case an importer ever cares about mark
ordering or gaps in the sequence.
""")
    def do_renumber(self, unused):
        "Renumber the marks in the selected repo."
        assert unused is not None    # pacify pylint
        if self.chosen() is None:
            complain("no repo has been chosen.")
            return
        self.repo.renumber()

    def help_timeoffset(self):
        print("""
Apply a time offset to all time/date stamps in the selected set.  An offset
argument is required; it may be in the form [+-]ss, [+-]mm:ss or [+-]hh:mm:ss.
The leading sign is required to distingush it from a selection expression.

Optionally you may also specify another argument in the form [+-]hhmm, a
timeone literal to apply.  To apply a timezone without an offset, use
an offset literal of +0 or -0.
""")
    def do_timeoffset(self, line):
        "Apply a time offset to all dates in selected events."
        if self.chosen() is None:
            complain("no repo has been chosen.")
            return
        if self.selection is None:
            self.selection = self.chosen().all()
        if not line:
            complain("a signed time offset argument is required.")
            return
        elif not line[0] in ('-', '+'):
            complain("time offset argument must begin with + or -.")
            return
        line = str(line)   # pacify pylint by forcing string type
        args = line.split()
        h = m = "0"
        if args[0].count(":") == 0:
            s = args[0]
        elif args[0].count(":") == 1:
            (m, s) = args[0].split(":")
        elif args[0].count(":") == 2:
            (h, m, s) = args[0].split(":")
        else:
            complain("too many colons")
            return
        try:
            offset = int(h)*360 + int(m)*60 + int(s)
        except ValueError:
            complain("expected numeric literals in date format")
            return
        if len(args) > 1:
            if not re.match("[+-][0-9][0-9][0-9][0-9]", args[1]):
                complain("expected timezone literal to be [+-]hhmm")
        for _, event in self.selected():
            if isinstance(event, Tag):
                if event.tagger:
                    event.tagger.date.timestamp += offset
                    if len(args) > 1:
                        event.tagger.date.timezone = args[1]
            elif isinstance(event, Commit):
                event.committer.date.timestamp += offset
                if len(args) > 1:
                    event.committer.date.timezone = args[1]
                for author in event.authors:
                    author.date.timestamp += offset
                    if len(args) > 1:
                        author.date.timezone = args[1]

    def help_divide(self):
        print("""
Attempt to partition a repo by cutting the parent-child link
between two specified commits (they must be adjacent). Does not take a
general selection-set argument.  It is only necessary to specify the
parent commit, unless it has multiple children in which case the child
commit must follow (separate it with a comma).

If the repo was named 'foo', you will normally end up with two repos
named 'foo-early' and 'foo-late'.  But if the commit graph would
remain connected through another path after the cut, the behavior
changes.  In this case, if the parent and child were on the same
branch 'qux', the branch segments are renamed 'qux-early' and
'qux-late'.
""")
    def do_divide(self, _line):
        "Attempt to topologically partition the repo."
        if self.chosen() is None:
            complain("no repo has been chosen.")
            return
        if self.selection is None:
            self.selection = []
        if len(self.selection) == 0:
            complain("one or possibly two arguments specifying a link are required")
            return
        early = self.chosen()[self.selection[0]]
        if not isinstance(early, Commit):
            complain("first element of selection is not a commit")
            return
        possibles = list(early.children())
        if len(self.selection) == 1:    
            if len(possibles) > 1:
                complain("commit has multiple children, one must be specified")
                return
            elif len(possibles) == 1:
                late = possibles[0]
            else:
                complain("parent has no children")
                return
        elif len(self.selection) == 2:
            late = self.chosen()[self.selection[1]]
            if not isinstance(late, Commit):
                complain("last element of selection is not a commit")
                return
            if early.mark not in late.parent_marks():
                complain("not a parent-child pair")
                return
        elif len(self.selection) > 2:
            complain("too many arguments")
        assert(early and late)
        # Try the topological cut first
        if not self.cut(early, late):
            # If that failed, cut anyway and rename the branch segments
            late.remove_parent(early)
            if early.branch != late.branch:
                announce("no branch renames were required")
            else:
                basename = early.branch
                announce("%s has been split into %s-early and %s-late" \
                         % (basename, basename, basename))
                for (i, event) in enumerate(self.chosen().events):
                    if hasattr(event, "branch") and event.branch == basename:
                        if i <= self.selection[0]:
                            event.branch += "-early"
                        else:
                            event.branch += "-late"
        if verbose:
            self.do_choose("")

    def help_expunge(self):
        print("""
Expunge files from the selected portion of the repo history; the
default is the entire history.  The arguments to this command may be
paths or Python regular expressions matching paths (regexps must
be marked by being surrounded with //).

All filemodify (M) operations and delete (D) operations involving a
matched file in the selected set of events are disconnected from the
repo and put in a removal set.  Renames are followed as the tool walks
forward in the selection set; each triggers a warning message. If a
selected file is a copy (C) target, the copy will be deleted and a
warning message issued. If a selected file is a copy source, the copy
target will be added to the list of paths to be deleted and a warning
issued.

After file expunges have been performed, any commits with no
remaining file operations will be deleted, and any tags pointing to
them. Commits with deleted fileops pointing both in and outside the
path set are not deleted, but are cloned into the removal set.

The removal set is not discarded. It is assembled into a new
repository named after the old one with the suffix "-expunges" added.
Thus, this command can be used to carve a repository into sections by
file path matches.
""")
    def do_expunge(self, line):
        "Expunge files from the chosen repository."
        if self.chosen() is None:
            complain("no repo has been chosen.")
            return
        if self.selection is None:
            self.selection = self.chosen().all()
        self.expunge(self.selection, line.split())

    def help_split(self):
        print("""
Split a specified commit in two, the opposite of join.

    split at M
    split by PREFIX

The selection set is required to be a commit location; the modifier is
a preposition which indicates which splitting method to use. If the
preposition is 'at', then the third argument must be an integer
1-origin index of a file operation within the commit. If it is 'in',
then the third argument must be a pathname to be matched.

The commit is copied and inserted into a new position in the
event sequence, immediately following itself; the duplicate becomes
the child of the original, and replaces it as parent of the original's
children. Commit metadata is duplicated; the mark of the new commit is
then changed, with 'bis' added as a suffix.

Finally, some file operations - starting at the one matched or indexed
by the split argument - are moved forward from the original commit
into the new one.  Legal indices are 2-n, where n is the number of
file operations in the original commit.
""")
    def do_split(self, line):
        "Split a commit."
        if self.chosen() is None:
            raise Recoverable("no repo has been chosen.")
        if self.selection is None:
            self.selection = []
        if len(self.selection) != 1:
            raise Recoverable("selection of a single commit required for this command")
        where = self.selection[0]
        event = self.chosen()[where]
        if not isinstance(event, Commit):
            raise Recoverable("fileop argument doesn't point at a commit")
        line = str(line)   # pacify pylint by forcing string type
        (prep, obj) = line.split()
        if prep == 'at':
            try:
                splitpoint = int(obj) - 1
                if splitpoint not in range(1, len(event.fileops)):
                    raise Recoverable("fileop index out of range")
                self.chosen().split_commit_by_index(where, splitpoint)
            except ValueError:
                raise Recoverable("expected integer fileop index (1-origin)")
        elif prep == 'in':
            split = self.chosen().split_commit_by_prefix(where, obj)
            if not split:
                raise Recoverable("couldn't find '%s' in a fileop path." \
                                  % obj)
        else:
            raise Recoverable("don't know what to do for preposition %s" % prep)
        if verbose:
            self.do_inspect(repr(where+1) + "," + repr(where+2))

    def help_unite(self):
        print("""
Unite repositories. Name any number of loaded repositories; they will
be united into one union repo and removed from the load list.  The
union repo will be selected.

The root of each repo (other than the oldest repo) will be grafted as
a child to the last commit in the dump with a preceding commit date.
Running last to first, duplicate names will be disambiguated using the
source repository name (thus, recent duplicates will get priority over
older ones). After all grafts, marks will be renumbered.

The name of the new repo will be the names of all parts concatenated,
separated by '+'. It will have no source directory or preferred system
type.
""")
    def do_unite(self, line):
        "Unite repos together."
        self.unchoose()
        factors = []
        with RepoSurgeon.LineParse(line) as parse:
            for name in parse.line.split():
                repo = self.repo_by_name(name)
                if repo is None:
                    raise Recoverable("no such repo as %s" % name)
                else:
                    factors.append(repo)
            if not factors or len(factors) < 2: 
                raise Recoverable("unite requires repo name arguments")
            self.unite(factors, parse.options)
        if verbose:
            self.do_choose('')

    def help_graft(self):
        print("""
For when unite doesn't give you enough control.  The selection set
must be of size 1, identifying a single commit in the currently
selected repo.  A following argument must be a repository name.
Labels and branches in the named repo are prefixed with its name; then
it is grafted to the selected one. Its root becomes a child of the
specified commit.  Finally the named repo is removed from the load
list.
""")
    def do_graft(self, line):
        "Graft a named repo onto the selected one."
        if self.chosen() is None:
            complain("no repo has been chosen.")
            return
        if self.selection is None:
            self.selection = []
        if len(self.selection) == 1:
            graft_point = self.selection[0]
        else:
            raise Recoverable("a singleton selection set is required.")
        if not self.repolist:
            raise Recoverable("no repositories are loaded.")
        if line in self.reponames():
            graft_repo = self.repo_by_name(line)
        else:
            raise Recoverable("no such repo as %s" % line)
        # OK, we've got the two repos and the graft point.  Do it.
        self.chosen().graft(graft_repo, graft_point)
        self.remove_by_name(graft_repo.name)

    def help_debranch(self):
        print("""
Takes one or two arguments which must be the names of source and target
branches; if the second (target) argument is omitted it defaults to 'master'.
The history of the source branch is merged into the history of the target
branch, becoming the history of a subdirectory with the name of the source
branch. Any trailing segment of a branch name is accepted as a synonym for
it; thus 'master' is the same as 'refs/heads/master'.  Any resets of the
source branch are removed.
""")
    def do_debranch(self, line):
        "Turn a branch into a subdirectory."
        if self.chosen() is None:
            complain("no repo has been chosen.")
        args = line.split()
        if not args:
            complain("debranch command requires at least one argument")
        else:
            target = 'refs/heads/master'
            source = args[0]
            if len(args) == 2:
                target = args[1]
            repo = self.chosen()
            branches = repo.branchmap()
            if not source in branches.iterkeys():
                for candidate in branches.iterkeys():
                    if candidate.endswith(os.sep + source):
                        source = candidate
                        break
                else:
                    complain("no branch matches source %s" % source)
                    return
            if not target in branches.iterkeys():
                for candidate in branches.iterkeys():
                    if candidate.endswith(os.sep + target):
                        target = candidate
                        break
                else:
                    complain("no branch matches %s" % target)
                    return
            # Now that the arguments are in proper form, implement
            stip = repo.find(branches[source])
            scommits = repo.ancestors(stip) + [stip]
            pref = os.path.basename(source)
            for ci in scommits:
                for fileop in repo.events[ci].fileops:
                    if fileop.op in ("D", "M"):
                        fileop.path = os.path.join(pref, fileop.path)
                    elif fileop.op in ("R", "C"):
                        fileop.source = os.path.join(pref, fileop.source)
                        fileop.target = os.path.join(pref, fileop.target)
            ttip = repo.find(branches[target])
            tcommits = repo.ancestors(ttip) + [ttip]
            merged = sorted(set(scommits + tcommits))
            last_parent = []
            source_reset = None
            for i in merged:
                event = repo.events[i]
                if last_parent is not None:
                    event.set_parent_marks(last_parent + event.parent_marks()[1:])
                event.set_branch(target)
                last_parent = [event.mark]
            for (i, event) in enumerate(self.repo.events):
                if isinstance(event, Reset) and event.ref == source:
                    source_reset = i
            if source_reset is not None:
                del repo.events[source_reset]
            repo.declare_sequence_mutation()

    def help_path(self):
        print("""
Rename a path in every fileop of every selected commit.  The
default selection set is all commits. The first argument is interpreted as a 
Python regular expression to match against paths; the second may contain
back-reference syntax.
""")
    def do_path(self, line):
        "Rename paths in the history."
        if self.chosen() is None:
            complain("no repo has been chosen.")
            return
        repo = self.chosen()
        if self.selection is None:
            self.selection = repo.all()
        (source_re, line) = RepoSurgeon.pop_token(line)
        (verb, line) = RepoSurgeon.pop_token(line)
        with RepoSurgeon.LineParse(line) as parse:
            if verb == "rename":
                (target_re, _) = RepoSurgeon.pop_token(parse.line)
                if not target_re:
                    raise Recoverable("no target specified in rename")
                actions = []
                for _,commit in repo.iterevents(types=Commit):
                    for fileop in commit.fileops:
                        for attr in ("path", "source", "target"):
                            if hasattr(fileop, attr):
                                oldpath = getattr(fileop, attr)
                                if oldpath and re.search(source_re, oldpath):
                                    newpath = re.sub(source_re, target_re, oldpath)
                                    if commit.visible(newpath):
                                        raise Recoverable("rename target %s exists" % newpath)
                                    else:
                                        actions.append((fileop, attr, newpath))
                # All checks must pass before any renames
                for (fileop, attr, newpath) in actions:
                    setattr(fileop, attr, newpath)
            else:
                raise Recoverable("unknown verb '%s' in path command." % verb)

    def help_paths(self):
        print("""
Without a modifier, list all paths touched by fileops in
the selection set (which defaults to the entire repo). This
variant does > redirection.

With the 'sub' modifier, take a second argument that is a directory
name and prepend it to every path. With the 'sup' modifier, strip the
first directory component from every path.
""" )
    def do_paths(self, line):
        if self.chosen() is None:
            complain("no repo has been chosen.")
            return
        if self.selection is None:
            self.selection = self.chosen().all()
        if not line.startswith(("sub", "sup")):
            with RepoSurgeon.LineParse(line, capabilities=["stdout"]) as parse:
                allpaths = set()
                for _, event in self.selected(Commit):
                    allpaths.update(event.paths())
                parse.stdout.write("\n".join(sorted(allpaths)) + "\n")
                return
        fields = line.split()
        if fields[0] == "sub":
            prefix = fields[1]
            modified = self.chosen().path_walk(self.selection,
                                               lambda f: os.path.join(prefix,f))
            print("\n".join(modified))
        elif fields[0] == "sup":
            try:
                modified = self.chosen().path_walk(self.selection,
                                               lambda f: f[f.find(os.sep)+1:])
                print("\n".join(modified))
            except IndexError:
                raise Recoverable("no / in sup path.")
        self.chosen().invalidate_manifests()

    def help_manifest(self):
        print("""
Print commit trees contents. Takes an optional selection set argument
defaulting to all commits, and an optional Python regular expression.
For each commit in the selection set, print the mapping of all paths in
that commit tree to the corresponding blob marks, mirroring what files
would be created in a checkout of the commit. If a regular expression
is given, only print "path -> mark" lines for paths matching it.
This command supports > redirection.
""")
    def do_manifest(self, line):
        "Print all files (matching the regex) in the selected commits trees."
        if self.chosen() is None:
            raise Recoverable("no repo has been chosen")
        if self.selection is None:
            self.selection = self.chosen().all()
        with RepoSurgeon.LineParse(line, capabilities=["stdout"]) as parse:
            filter_func = None
            line = parse.line.strip()
            if line:
                try:
                    filter_func = re.compile(line).search
                except re.error:
                    raise Recoverable("invalid regular expression")
            for ei, event in self.selected(Commit):
                header = "Event %s, " % repr(ei+1)
                header = header[:-2]
                header += " " + ((72 - len(header)) * "=") + "\n"
                parse.stdout.write(header)
                if event.fossil_id:
                    parse.stdout.write("# Fossil-ID: %s\n" % event.fossil_id)
                parse.stdout.write("commit %s\n" % event.branch)
                if event.mark:
                    parse.stdout.write("mark %s\n" % event.mark)
                parse.stdout.write("\n")
                if filter_func is None:
                    parse.stdout.write("\n".join("%s -> %s" % (path, mark)
                            for path, (mode, mark)
                            in event.manifest().iteritems()))
                else:
                    parse.stdout.write("\n".join("%s -> %s" % (path, mark)
                            for path, (mode, mark)
                            in event.manifest().iteritems()
                            if filter_func(path)))
                parse.stdout.write("\n")

    def help_tagify(self):
        print("""
Search for empty commits and turn them into tags. Takes an optional selection
set argument defaulting to all commits. For each commit in the selection set,
turn it into a tag with the same message and author information if it has no
fileops. By default merge commits are not considered, even if they have no
fileops (thus no tree differences with their first parent). To change that, see
the '--tagify-merges' option.

The name of the tag is 'emptycommit-<ident>', where <ident> is generated from
the fossil_id of the deleted commit, or from its mark, or from its index in the
repository, with a disambiguation suffix if needed.

tagify currently recognizes three options: first is '--canonicalize' which
makes tagify try harder to detect trivial commits by first ensuring that all
fileops of selected commits will have an actual effect when processed by
fast-import.

The second option is '--tipdeletes' which makes tagify also consider branch
tips with only deleteall fileops to be candidates for tagification. The
corresponding tags get names of the form 'tipdelete-<branchname>' rather than
the default 'emptycommit-<ident>'.

The third option is '--tagify-merges' that makes reposurgeon also tagify merge
commits that have no fileops.
""")
    def do_tagify(self, line):
        "Search for empty commits and turn them into tags."
        repo = self.chosen()
        if repo is None:
            raise Recoverable("no repo has been chosen")
        if self.selection is None:
            self.selection = self.chosen().all()
        with RepoSurgeon.LineParse(line) as parse:
            if parse.line:
                raise Recoverable("too many arguments for tagify.")
            repo.tagify_empty(
                    commits = self.selection,
                    canonicalize = "--canonicalize" in parse.options,
                    tipdeletes = "--tipdeletes" in parse.options,
                    tagify_merges = "--tagify-merges" in parse.options)

    def help_merge(self):
        print("""
Create a merge link. Takes a selection set argument, ignoring all but
the lowest (source) and highest (target) members.  Creates a merge link
from the highest member (child) to the lowest (parent).
""" )
    def do_merge(self, _line):
        if self.chosen() is None:
            complain("no repo has been chosen.")
            return
        try:
            commits = sorted(self.selected(Commit))
            commits[1:-1] = [] # Drop all but first and last
            (_, earlier), (_, later) = commits
        except (TypeError, ValueError):
            raise Recoverable("merge requires a selection set "
                              "with at least two commits.")
        later.add_parent(earlier)
        #earlier_id = "%s (%s)" % (earlier.mark, earlier.branch)
        #later_id = "%s (%s)" % (later.mark, later.branch)
        #announce("%s added as a parent of %s" % (earlier_id, later_id))

    def help_unmerge(self):
        print("""
Linearizes a commit. Takes a selection set argument, which must resolve to a
single commit, and removes all its parents except for the first. It is
equivalent to reparent {first parent},{commit} rebase, where {commit} is the
selection set given to unmerge and {first parent} is a set resolving to that
commit's first parent, but doesn't need you to find the first parent yourself.
""" )
    def do_unmerge(self, _line):
        if self.chosen() is None:
            complain("no repo has been chosen.")
            return
        try:
            if len(self.selection) != 1: raise ValueError()
            (_, commit), = self.selected(Commit)
        except (TypeError, ValueError):
            raise Recoverable("unmerge requires a single commit.")
        commit.set_parents(commit.parents()[:1])

    def help_reparent(self):
        print("""
Changes the parent list of a commit. Takes a selection set argument and an
optional policy argument. The selection set must resolve to exactly two
commits, the latest of which is the commit to modify, and the earliest is the
new first parent. All other parents links are cleared; if you want you can
recreate them with the 'merge' command.

By default, the manifest of the reparented commit is computed before modifying
it, and fileops are prepended so that the manifest stays unchanged even when
the first parent has been changed. Using the keyword 'rebase' as a third
argument inhibits this behavior and the tree contents of all descendents can be
modified as a result.
""")
    def do_reparent(self, line):
        repo = self.chosen()
        if repo is None:
            complain("no repo has been chosen.")
            return
        try:
            if len(self.selection) != 2: raise ValueError()
            (_, parent), (_, child) = sorted(self.selected(Commit))
        except (TypeError, ValueError):
            raise Recoverable("reparent requires exactly two selected commits")
        if line and line != "rebase":
            raise Recoverable("unknown policy for reparent")
        if line != "rebase":
            # Recreate the state of the tree
            f = FileOp()
            f.construct("deleteall")
            newops = [f]
            for (path, (mode, mark)) in child.manifest().iteritems():
                f = FileOp()
                f.construct("M", mode, mark, path)
                newops.append(f)
            newops.extend(child.fileops)
            child.fileops = newops
        child.set_parents([parent])

    def help_branch(self):
        print("""
Rename or delete a branch (and any associated resets).  First argument
must be an existing branch name; second argument must one of the verbs
'rename' or 'delete'.

For a 'rename', the third argument may be any token that is a syntactically
valid branch name (but not the name of an existing branch). For a 'delete',
no third argument is required.

For either name, if it does not contain a '/' the prefix 'heads/'
is prepended. If it does not begin with 'refs/', 'refs/' is prepended.
""")
    def do_branch(self, line):
        "Rename a branch or delete it."
        if self.chosen() is None:
            complain("no repo has been chosen.")
            return
        repo = self.chosen()
        (branchname, line) = RepoSurgeon.pop_token(line)
        if not "/" in branchname:
            branchname = 'refs/heads/' + branchname
        if branchname not in repo.branchset():
            raise Recoverable("no such branch as %s" % branchname)
        (verb, line) = RepoSurgeon.pop_token(line)
        if verb == "rename":
            (newname, line) = RepoSurgeon.pop_token(line)
            if not newname:
                raise Recoverable("new branch name must be nonempty.")
            if not "/" in newname:
                newname = 'refs/heads/' + newname
            if newname in repo.branchset():
                raise Recoverable("there is already a branch named '%s'." \
                                  % newname)
            for event in repo:
                if isinstance(event, Commit):
                    if event.branch == branchname:
                        event.set_branch(newname)
                elif isinstance(event, Reset):
                    if event.ref == branchname:
                        event.ref = newname
        elif verb == "delete":
            repo.delete([i for i in range(len(repo.events)) if
                         (isinstance(repo.events[i], Reset) and repo.events[i].ref == branchname) \
                         or \
                         (isinstance(repo.events[i], Commit) and repo.events[i].branch == branchname)])
        else:
            raise Recoverable("unknown verb '%s' in branch command." % verb)

    def help_tag(self):
        print("""
Move, rename, or delete a tag.  First argument must be an existing
name referring to a tag object, lightweight tag, or reset; second
argument must be one of the verbs 'move', 'rename', or 'delete'.

For a 'move', a third argument must be a singleton selection set. For
a 'rename', the third argument may be any token that is a
syntactically valid tag name (but not the name of an existing
tag). For a 'delete', no third argument is required.

The behavior of this command is complex because features which present
as tags may be any of three things: (1) True tag objects, (2)
lightweight tags, actually sequences of commits with a common
branchname beginning with 'refs/tags' - in this case the tag is
considered to point to the last commit in the sequence, (3) Reset
objects.  These may occur in combination; in fact, stream exporters
form sysyems with annotation tags commonly express each of these as a
true tag object (1) pointing at the tip commit of a sequence (2) in
which the basename of the common branch field is identical to the tag
name.  An exporter that generates lightweight-tagged commit sequences (2)
may or may not generate resets poinmting at their tip commits.

This command tries to handle all combinations in a natural way by
doing up to three operations on any true tag, commit sequence, and
reset matching the source name. In a rename, all are renamed together.
In a delete, any matching or reset is tag is deleted; then matching
branch fields are changed to match the branch of the unique descendent
of the tagged commit, if there is one.  When a tag is moved, no branch
fields are changed and a warning is issued.
""")
    def do_tag(self, line):
        "Move a tag to point to a specified commit, or rename it, or delete it."
        if self.chosen() is None:
            complain("no repo has been chosen.")
            return
        repo = self.chosen()
        # A tag name can erfere to one of the following things:
        # (1) A tag object, by name
        # (2) A reset object having a name in the tags/ namespace
        # (3) The tip commit of a branch with branch fields 
        # These things often occur in combination. Notably, git-fast-export
        # generates for each tag object corresponding branch labels on
        # some ancestor commmits - the rule for where this stops is unclear.
        (tagname, line) = RepoSurgeon.pop_token(line)
        tag = None
        resets = []
        commits = []
        fulltagname = Tag.branchname(tagname)
        for event in repo.events:
            if isinstance(event, Tag) and event.name == tagname:
                tag = event
            elif isinstance(event, Reset) and event.ref == fulltagname:
                resets.append(event)
            elif isinstance(event, Commit) and event.branch == fulltagname:
                commits.append(event)
        if not tag and not resets and not commits:
            raise Recoverable("no such tag as %s" % tagname)
        (verb, line) = RepoSurgeon.pop_token(line)
        if verb == "move":
            self.set_selection_set(line)
            try:
                if len(self.selection) != 1: raise ValueError()
                target, = self.selected(Commit)
            except (TypeError, ValueError):
                raise Recoverable("tag move requires a singleton commit set.")
            if tag:
                tag.forget()
                tag.remember(repo, target=target)
            if resets:
                if len(resets) == 1:
                    resets[0].committish = target.mark
                else:
                    complain("cannot move multiple tags.")
            if commits:
                complain("warning - tag move does not modify branch fields")
        elif verb == "rename":
            (newname, line) = RepoSurgeon.pop_token(line)
            if not newname:
                raise Recoverable("new tag name must be nonempty.")
            if tag:
                for event in repo.events:
                    if isinstance(event, Tag) and event != tag and event.name == tag.name:
                        raise Recoverable("tag name collision, not renaming.")
                tag.name = newname
            fullnewname = Tag.branchname(newname)
            for reset in resets:
                reset.ref = fullnewname
            for event in commits:
                event.branch = fullnewname
        elif verb == "delete":
            if tag:
                tag.forget()
                repo.events.remove(tag)
                repo.declare_sequence_mutation()
            for reset in resets:
                reset.forget()
                repo.events.remove(reset)
                repo.declare_sequence_mutation()
            if commits:
                successors = {child.branch for child in commits[-1].children() if child.parents()[0] == commits[-1]}
                if len(successors) == 1:
                    successor = successors.pop()
                    for event in commits:
                        event.branch = successor
                else:
                    complain("couldn't determine a unique successor for %s at %s" % (tagname, commits[-1].id_me()))
        else:
            raise Recoverable("unknown verb '%s' in tag command." % verb)

    def help_reset(self):
        print("""
Move, rename, or delete a reset.  First argument must match an
existing reset name; second argument must be one of the verbs 'move',
'rename', or 'delete'.

For a 'move', a third argument must be a singleton selection set. For
a 'rename', the third argument may be any token that can be interpreted
as a valid reset name (but not the name of an existing
reset). For a 'delete', no third argument is required.

An argument matches a reset's name if it is either the entire
reference (refs/heads/FOO or refs/tags/FOO for some some value of FOO)
or the basename (e.g. FOO), or a suffix of the form heads/FOO or tags/FOO.
An unqualified basename is assumed to refer to a head.

When a reset is renamed, commit branch fields matching the tag are
renamed with it to match.  When a reset is deleted, matching branch
fields are changed to match the branch of the unique descendent of the
tip commit of the associated branch, if there is one.  When a reset is
moved, no branch fields are changed.
""")
    def do_reset(self, line):
        "Move a reset to point to a specified commit, or rename it, or delete it."
        if self.chosen() is None:
            complain("no repo has been chosen.")
            return
        repo = self.chosen()
        (resetname, line) = RepoSurgeon.pop_token(line)
        if not "/" in resetname:
            resetname = "heads/" + resetname
        if not resetname.startswith("refs/"):
            resetname = "refs/" + resetname
        resets = [e for _,e in repo.iterevents(types=Reset)
                      if e.ref == resetname]
        if not resets:
            raise Recoverable("no such reset as %s" % resetname)
        (verb, line) = RepoSurgeon.pop_token(line)
        if verb == "move":
            if len(resets) == 1:
                reset = resets[0]
            else:
                raise Recoverable("can't move multiple resets")
            self.set_selection_set(line)
            reset.forget()
            try:
                if len(self.selection) != 1: raise ValueError()
                target, = self.selected(Commit)
            except (TypeError, ValueError):
                raise Recoverable("reset move requires a singleton commit set.")
            reset.forget()
            reset.remember(repo, target=target)
        elif verb == "rename":
            (newname, line) = RepoSurgeon.pop_token(line)
            if not newname:
                raise Recoverable("new reset name must be nonempty.")
            if newname.count("/") == 0:
                newname = "heads/" + newname
            if not newname.startswith("refs/"):
                newname = "refs/" + newname
            if any(r.ref == newname for _,r in repo.iterevents(types=Reset)) \
                    or any(c.branch == newname
                           for _,c in repo.iterevents(types=Commit)):
                raise Recoverable("reset reference collision, not renaming.")
            for reset in resets:
                reset.ref = newname
            for event in repo.iterevents(types=Commit):
                if event.branch == resetname:
                    event.branch = newname
        elif verb == "delete":
            tip = next((c for _,c in repo.iterevents(types=Commit)
                          if c.branch == resetname),
                       None)
            if tip and len(tip.children()) == 1:
                successor = tip.children()[0].branch
                for event in repo.iterevents(types=Commit):
                    if event.branch == resetname:
                        event.branch = successor
            for reset in resets:
                reset.forget()
                repo.events.remove(reset)
            repo.declare_sequence_mutation()
        else:
            raise Recoverable("unknown verb '%s' in reset command." % verb)

    #
    # Artifact removal
    #
    def help_authors(self):
        print("""
Apply or dump author-map information for the specified selection
set, defaulting to all events. 

Lifts from CVS and Subversion may have only usernames local to
the repository host in committer and author IDs. DVCSes want email
addresses (net-wide identifiers) and complete names. To supply the map
from one to the other, an authors file is expected to consist of
lines each beginning with a local user ID, followed by a '=' (possibly
surrounded by whitespace) followed by a full name and email address.

When an authors file is applied, email addresses in committer and author
metdata for which the local ID matches between &lt; and @ are replaced
according to the mapping (this handles git-svn lifts). Alternatively,
if the local ID is the entire address, this is also considered a match
(this handles what git-cvsimport and cvs2git do) 

With the 'read' modifier, or no modifier, apply author mapping data
(from standard input or a <-redirected input file).  May be useful if
you are editing a repo or dump created by cvs2git or by git-svn
invoked without -A.

With the 'write' modifier, write a mapping file that could be
interpreted by 'authors read', with entries for each unique committer,
author, and tagger (to standard output or a >-redirected file). This
may be helpful as a start on building an authors file, though each
part to the right of an equals sign will need editing.
""")
    def do_authors(self, line):
        "Apply or dump author-mapping file."
        if self.chosen() is None:
            complain("no repo has been chosen.")
            return
        if self.selection is None:
            self.selection = self.chosen().all()
        if line.startswith("write"):
            line = line[5:].strip()
            with RepoSurgeon.LineParse(line, capabilities=["stdout"]) as parse:
                if parse.tokens():
                    raise Recoverable("authors write no longer takes a filename argument - use > redirection instead")
                self.chosen().write_authormap(self.selection, parse.stdout)
        else:
            if line.startswith("read"):
                line = line[4:].strip()
            with RepoSurgeon.LineParse(line, capabilities=["stdin"]) as parse:
                if parse.tokens():
                    raise Recoverable("authors read no longer takes a filename argument - use < redirection instead")
                self.chosen().read_authormap(self.selection, parse.stdin)

    #
    # Reference lifting
    #
    def help_fossils(self):
        print("""
Apply or list fossil-reference information. Does not take a
selection set. The 'read' variant reads from standard input or a
<-redirected filename; the 'write' variant writes to standard
output or a >-redirected filename.
""")
    def do_fossils(self, line):
        "Apply a reference-mapping file."
        if self.chosen() is None:
            complain("no repo has been chosen.")
            return
        if line.startswith("write"):
            line = line[5:].strip()
            with RepoSurgeon.LineParse(line, capabilities=["stdout"]) as parse:
                if parse.tokens():
                    raise Recoverable("fossils write no longer takes a filename argument - use > redirection instead")
                self.chosen().write_fossilmap(parse.stdout)
        else:
            if line.startswith("read"):
                line = line[4:].strip()
            with RepoSurgeon.LineParse(line, capabilities=["stdin"]) as parse:
                if parse.tokens():
                    raise Recoverable("fossils read no longer takes a filename argument - use < redirection instead")
                self.chosen().read_fossilmap(parse.stdin)

    def help_references(self):
        print("""
With no modifier, produces a listing of events that may have
Subversion or CVS commit references in them.  This version
of the command supports >-redirection

With the modifier 'edit', edit this set.

With the modifier 'lift', transform commit-reference cookies from CVS
and Subversion into action stamps.  This command expects cookies
consisting of the leading string '[[', followed by a VCS identifier
(currently SVN or CVS) followed by VCS-dependent information, followed
by ']]'. An action stamp pointing at the corresponding commit is
substituted when possible.  Enables writing of the fassil-reference
map when the repo is written or rebuilt.
""")
    def do_references(self, line):
        "Look for things that might be CVS or Subversion revision references."
        if self.chosen() is None:
            complain("no repo has been chosen.")
            return
        repo = self.chosen()
        repo.parse_dollar_cookies()
        if self.selection is None:
            self.selection = self.chosen().all()
        if "lift" in line:
            hits = 0
            def substitute(getter, matchobj):
                payload = matchobj.group(0)[2:-2]
                commit = getter(payload)
                if commit is None:
                    complain("no commit matches " + repr(payload))
                    return matchobj.group(0) # no replacement
                elif commit:
                    text = commit.action_stamp()
                    return text
                else:
                    complain("cannot resolve %s" % payload)
                    return matchobj.group(0) # no replacement
            for (regexp, getter) in \
                    ((r"CVS:[^:\]]+:[0-9.]+",
                      lambda p: repo.fossil_map.get(p) or repo.dollar_map.get(p)),
                     ("SVN:[0-9]+",
                      lambda p: repo.fossil_map.get(p) or repo.dollar_map.get(p)),
                     (":[0-9]+",
                      lambda p: repo.objfind(p)),
                     ):
                match_re = re.compile(re.escape("[[")+regexp+re.escape("]]"))
                for _, event in self.selected():
                    if isinstance(event, (Commit, Tag)):
                        event.comment, new_hits = match_re.subn(
                            lambda m: substitute(getter, m),
                            event.comment)
                        hits += new_hits
            announce("%d references resolved." % hits)
            repo.write_fossils = True
        else:
            # No modifier, just list or edit
            refstyles = (
                # Subversion references
                r"\Wr([0-9]+)\W",
                r"(?:SVN|svn|Subversion|subversion|rev|version).*\W([0-9]+)\W",
                # CVS references
                r"(?:CVS|cvs|rev|version).*\W([0-9][0-9.]+)\W",
                # Possible bare CVS references
                r"[0-9]+\.[0-9]+\.[0-9]+",
                )
            idhits = []
            for ei, event in self.selected():
                if hasattr(event, "comment"):
                    text = event.comment
                elif hasattr(event, "text"):
                    text = event.text
                else:
                    continue
                for pattern in refstyles:
                    if re.search(pattern, text):
                        if ei not in idhits:
                            idhits.append(ei)
            if idhits:
                if line.startswith("edit"):
                    self.edit(idhits, line[4:].strip())
                else:
                    with RepoSurgeon.LineParse(line, capabilities=["stdout"]) as parse:
                        for ei in idhits:
                            event = repo.events[ei]
                            if hasattr(event, "lister"):
                                summary = event.lister(None, ei, screenwidth())
                                if summary:
                                    parse.stdout.write(summary + "\n")

    #
    # Examining tree states
    #
    def help_checkout(self):
        print("""
Check out files for a specified commit into a directory.  The selection
set must resolve to a singleton commit.
""")
    def do_checkout(self, line):
        "Check out files for a specified commit into a directory."
        if self.chosen() is None:
            complain("no repo has been chosen.")
            return
        repo = self.chosen()
        if self.selection is None:
            self.selection = self.chosen().all()
        if not line:
            raise Recoverable("no target directory specified.")
        if len(self.selection) == 1:
            commit = repo.events[self.selection[0]]
            if not isinstance(commit, Commit):
                raise Recoverable("not a commit.")
        else:
            raise Recoverable("a singleton selection set is required.")
        commit.checkout(line)

    def help_diff(self):
        print("""
Display the difference between commits. Takes a selection-set argument which
must resolve to exactly two commits.
""")
    def do_diff(self,line):
        "Display a diff between versions."
        if self.chosen() is None:
            complain("no repo has been chosen.")
            return
        repo = self.chosen()
        if self.selection is None:
            self.selection = self.chosen().all()
        bounds = tuple(repo.events[i] for i in sorted(self.selection))
        if len(self.selection) != 2 or \
               not isinstance(bounds[0], Commit) or \
               not isinstance(bounds[1], Commit):
            raise Recoverable("a pair of commits is required.")
        dir1 = bounds[0].checkout()
        dir2 = bounds[1].checkout()
        with RepoSurgeon.LineParse(line, capabilities=["stdout"]) as parse:
            command = "diff -r --label 'commit %s' --label 'commit %s' -u %s %s" % \
                      (bounds[0].mark, bounds[1].mark, dir1, dir2)
            if parse.stdout != sys.stdout:
                command += " >" + parse.stdout.name
            os.system(command)

    #
    # Setting paths to branchify
    #
    def help_branchify(self):
        print("""
Specify the list of directories to be treated as potential branches (to
become tags if there are no modifications after the creation copies)
when analyzing a Subversion repo. This list is ignored when reading
with the --nobranch option.  It defaults to the 'standard layout'
set of directories, plus any unrecognized directories in the
repository root.

With no arguments, displays the current branchification set.

An asterisk at the end of a path in the set means 'all immediate
subdirectories of this path, unless they are part of another (longer)
path in the branchify set'.

Note that the branchify set is a property of the reposurgeon interpreter, not
of any individual repository, and will persist across Subversion
dumpfile reads. This may lead to unexpected results if you forget
to re-set it.
""")
    def do_branchify(self, line):
        if self.selection is not None:
            raise Recoverable("branchify does not take a selection set")
        if line.strip():
            global_options['svn_branchify'] = line.strip().split()
        announce("branchify " + " ".join(global_options['svn_branchify']))

    #
    # Setting options
    #
    def help_set(self):
        print("""
Set a boolean option to control reposurgeon's behavior.   With no arguments,
displays the state of all flags and options. The following flags and
options are defined:
""")
        for (opt, expl) in RepoSurgeon.OptionFlags:
            print(opt + ":\n" + expl)
    def do_set(self, line):
        if not line.strip():
            for (opt, _expl) in RepoSurgeon.OptionFlags:
                print("\t%s = %s" % (opt, global_options.get(opt, False)))
        else:
            for option in line.split():
                if option not in dict(RepoSurgeon.OptionFlags):
                    complain("no such option flag as '%s'" % option)
                else:
                    global_options[option] = True
    def help_clear(self):
        print("""
Clear a boolean option to control reposurgeon's behavior.   With no arguments,
displays the state of all flags. The following flags and options are defined:
""")
        for (opt, expl) in RepoSurgeon.OptionFlags:
            print(opt + ":\n" + expl)
    def do_clear(self, line):
        if not line.strip():
            for opt in dict(RepoSurgeon.OptionFlags):
                print("\t%s = %s" % (opt, global_options.get(opt, False)))
        else:
            for option in line.split():
                if option not in dict(RepoSurgeon.OptionFlags):
                    complain("no such option flag as '%s'" % option)
                else:
                    global_options[option] = False

    #
    # Macros
    #
    def help_define(self):
        print("""
Define a macro.  The first whitespace-separated token is the name; the
remainder of the line is the body, unless it is '{', which begins a
multi-line macro terminated by a line beginning with '}'.

A later 'do' call can invoke this macro.
""")
    def do_define(self, line):
        "Define a macro"
        name = line.split()[0]
        body = line[len(name):].strip()
        if body[0] != '{':
            self.definitions[name] = [body]
        else:
            self.capture = self.definitions[name] = []

    def help_do(self):
        "Do a macro."
        print("""
Expand and perform a macro.  The first whitespace-separated token is
the name of the macro to be called; remaining tokens replace {0},
{1}... in the macro definition (the conventions used are those of the
Python format method). Tokens may contain whitespace if they are
string-quoted; string quotes are stripped. Macros can call macros.
If the macro expansion does not itself begin with a selection set,
whatever set was specified before the 'do' keyword is available to
the command generated by the expansion.
""")
    def do_do(self, line):
        name = line.split()[0]
        if name not in self.definitions:
            raise Recoverable("'%s' is not a defined macro" % name)
        args = shlex.split(line[len(name):])
        do_selection = self.selection
        for line in self.definitions[name]:
            line = line.format(*args)
            # If a leading portion of the expansion body is a selection
            # expression, use it.  Otherwise we'll restore whatever
            # selection set came before the do keyword.
            expansion = self.precmd(line)
            if self.selection is None:
                self.selection = do_selection
            # Call the base method so RecoverableExceptions
            # won't be caught; we want them to abort macros.
            self.onecmd(expansion)
    def help_undefine(self):
        print("""
Undefine the macro named in this command's first argument.
""")
    def do_undefine(self, line):
        name = line.split()[0]
        if name not in self.definitions:
            raise Recoverable("'%s' is not a defined macro" % name)
        else:
            del self.definitions[name]

    #
    # Version binding 
    #
    def help_version(self):
        print("""
With no argument, display the reposurgeon version and supported VCSes.
With argument, declare the major version (single digit) or full
version (major.minor) under which the enclosing script was developed.
The program will error out if the major version has changed (which
means the surgical language is not backwards compatible).
""")
    def do_version(self, line):
        if not line:
            announce("reposurgeon " + version + " supporting " + " ".join(x.name for x in (vcstypes+extractors)))
        else:
            (vmajor, _) = version.split(".")
            if '.' in line:
                try:
                    (major, _) = line.strip().split(".")
                except ValueError:
                    complain("invalid version.")
                    return
            else:
                major = line.strip()
            if major != vmajor:
                raise Fatal("major version mismatch, aborting.")
            elif verbose > 0:
                announce("version check passed.")
    #
    # Running unit tests (undocumented)
    #
    def help_runtests(self):
        print("""
Runs the unit tests and reports the results.
""")
    def do_runtests(self, line):
        def runtest(name):
            result = unittest.TextTestRunner().run(unittest.defaultTestLoader.loadTestsFromTestCase(globals()[name]))
            if not result.wasSuccessful():
                raise Recoverable("unit tests failed")
        available = ["DateTests"]
        if line:
            if line in available:
                runtest(line)
            else:
                complain("no test class known as '%s'" % line)
        else:
            for name in available:
                runtest(name)

if __name__ == '__main__':
    # Increase max stack size from 8MB to 512MB
    # Needed to handle really large repositories.
    try:
        sys.setrecursionlimit(10**6)
        import resource
        resource.setrlimit(resource.RLIMIT_STACK, (2**29,-1))
    except ImportError:
        # Don't fail to start if 'resource' isn't available
        pass
    except ValueError:
        # May not be allowed on some systems.  Whether or not we can do it
        # isn't interesting, it only matters whether the limit is actually
        # blown.
        pass
    try:
        def interactive():
            global verbose
            interpreter.use_rawinput = True
            if verbose == 0:
                verbose = 1
            interpreter.cmdloop()
            interpreter.use_rawinput = False
        interpreter = RepoSurgeon()
        interpreter.use_rawinput = False
        if not sys.argv[1:]:
            sys.argv.append("-")
        try:
            for arg in sys.argv[1:]:
                for arg in arg.split(";"):
                    if arg == '-':
                        if interpreter.profile_log is None:
                            interactive()
                        elif interpreter.profile_log:
                            cProfile.run('interactive()', \
                                         interpreter.profile_log)
                        else:
                            cProfile.run('interactive()')
                    else:
                        # Call the base method so RecoverableExceptions
                        # won't be caught; we want them to abort scripting.
                        cmd.Cmd.onecmd(interpreter, interpreter.precmd(arg))
        finally:
            interpreter.cleanup()
    except (Recoverable, Fatal) as xe:
        complain(xe.msg)
        sys.exit(1)
    except KeyboardInterrupt:
        print("")

# The following sets edit modes for GNU EMACS
# Local Variables:
# mode:python
# End:
# end
