#!/usr/bin/env python
#
# reposurgeon - a repository surgeon.
#
# By ESR, October 2010.  BSD terms apply.
#
# Requires Python 2.7.2 or newer.
#
from __future__ import print_function #unicode_literals

import sys, os, cmd, tempfile, subprocess, glob, hashlib, cProfile, cPickle
import re, signal, shutil, copy, shlex, collections, uuid, cgi, bz2
import time, calendar, unittest, itertools, operator, functools, filecmp
import email.message, email.parser, email.utils

# This import only works on Unixes.  The intention is to enable
# Ctrl-P, Ctrl-N, and friends in Cmd. 
try:
    import readline
except ImportError:
    pass

version="2.37"

#
# This code is intended to be hackable to support for special-purpose or
# custom operations, though it's even better if you can come up with a new
# surgical primitive general enough to ship with the stock version.  For
# either case, here's a guide to the architecture.
#
# The core classes are largely about deserializing and reserializing import
# streams.  In between these two operations the repo state lives in a
# fairly simple Python object, Repository. The main part of Repository
# is just a list of events - Commits, Blobs, Tags, Resets, and Passthroughs.
# These are straightforward representations of the command types in an
# import stream, with Passthrough as a way of losslessly conveying lines
# the parser does not recognize.
#
#  +-------------+    +---------+    +-------------+
#  | Deserialize |--->| Operate |--->| Reserialize |
#  +-------------+    +---------+    +-------------+
#
# The general theory of reposurgeon is: you deserialize, you do stuff
# to the event list that preserves correctness invariants, you
# reserialize.  The "do stuff" is mostly not in the core classes, but
# there is one major exception.  The primitive to delete a commit and
# shuffle its fileops forwards or backwards is seriously intertwined
# with the core classes and actually makes up almost 50% of Repository
# by line count.
#
# The rest of the surgical code lives outside the core classes. Most
# of it lives in the RepoSurgeon class (the command interpreter) or
# the RepositoryList class (which encapsulated name access to a list
# of repositories and also hosts surgical operations involving
# multiple repositories). A few bits, like the repository reader and
# builder, have enough logic that's independent of these
# classes to be factored out of it.
#
# In designing new commands for the interpreter, try hard to keep them
# orthogonal to the selection-set code. As often as possible, commands
# should all have a similar form with a (single) selection set argument.
#
# VCS is not a core class.  The code for manipulating actual repos is bolted
# on the the ends of the pipeline, like this:
#
#  +--------+    +-------------+    +---------+    +-----------+    +--------+
#  | Import |--->| Deserialize |--->| Operate |--->| Serialize |--->| Export |
#  +--------+    +-------------+ A  +---------+    +-----------+    +--------+
#       +-----------+            |
#       | Extractor |------------+
#       +-----------+
#
# The Import and Export boxes call methods in VCS.
#
# Extractor classes build the deserialized internal representation directly.
# Each extractor class is a set of VCS-specific methods to be used by the
# RepoStreamer driver class.
#

class VCS:
    "Class representing a version-control system."
    def __init__(self, name,
                 subdirectory,
                 exporter,
                 styleflags,
                 properties,
                 initializer,
                 lister,
                 importer,
                 checkout,
                 preserve,
                 authormap,
                 ignorename,
                 project,
                 notes):
        self.name = name
        self.subdirectory = subdirectory
        self.exporter = exporter
        self.styleflags = styleflags
        self.properties = properties
        self.initializer = initializer
        self.lister = lister
        self.importer = importer
        self.checkout = checkout
        self.preserve = preserve
        self.authormap = authormap
        self.ignorename = ignorename
        self.project = project
        self.notes = notes
    def __str__(self):
        return "         Name: {self.name}\n" \
               " Subdirectory: {self.subdirectory}\n" \
               "     Exporter: {self.exporter}\n" \
               " Export-Style: {{{styleflags}}}\n" \
               "   Properties: {self.properties!r}\n" \
               "  Initializer: {self.initializer}\n" \
               "       Lister: {self.lister}\n" \
               "     Importer: {self.importer}\n" \
               "     Checkout: {self.checkout}\n" \
               "     Preserve: {{{preserve}}}\n" \
               "    Authormap: {self.authormap}\n" \
               "   Ignorename: {self.ignorename}\n" \
               "      Project: {self.project}\n" \
               "        Notes: {self.notes}\n".format(
                       self = self,
                       styleflags = ", ".join(self.styleflags),
                       preserve = ", ".join(self.preserve))

# Most knowledge about specific version-control systems lives in the
# following class list. Exception; there's a git-specific hook in the
# repo reader; also see the extractor classes; also see the dump method
# in the Blob() class.
# The members are, respectively:
#
# * Name of its characteristic subdirectory.
# * Command to export from the VCS to the interchange format
# * Export-style flags.
#     "no-nl-after-commit" = no extra NL after each commit
#     "nl-after-comment" = inserts an extra NL after each comment
#     "export-progress" = exporter generates its own progress messages,
#                         no need for baton prompt.
# * Flag specifying whether it handles per-commit properties on import
# * Command to initialize a new repo
# * Command to import from the interchange format
# * Command to check out working copies of the repo files.
# * Default preserve set (e.g. config & hook files; parts can be directories).
# * Likely location for an importer to drop an authormap file
# * Command to list files under repository control.
#
# Note that some of the commands used here are plugins or extensions
# that are not part of the basic VCS. Thus these may fail when called;
# we need to be prepared to cope with that.
#
# %(tempfile)s in a command gets substituted with the name of a
# tempile that the calling code will know to read or write from as
# appropriate after the command is done.  If your exporter can simply
# dump to stdout, or your importer read from stdin, leave out the
# %(tempfile)s; reposurgeon will popen(3) the command, and it will
# actually be slightly faster (especially on large repos) because it
# won't have to wait for the tempfile I/O to complete.
#
# %(basename) is replaced with the basename of the repo directory.
#
vcstypes = [
    VCS(name="git",
        subdirectory=".git",
        exporter="git fast-export -M -C --signed-tags=verbatim --tag-of-filtered-object=drop --all",
        styleflags=set(),
        properties=False,
        initializer="git init --quiet",
        importer="git fast-import --quiet",
        checkout="git checkout",
        lister="git ls-files",
        preserve={'.git/config', '.git/hooks'},
        authormap=".git/cvs-authors",
        ignorename=".gitignore",
        project="http://git-scm.com/",
        notes="The authormap is not required, but will be used if present."),
    # 
    VCS(name="bzr",
        subdirectory=".bzr",
        exporter="bzr fast-export --no-plain %(basename)s",
        styleflags={"export-progress", "no-nl-after-commit", "nl-after-comment"},
        properties=True,
        initializer=None,
        lister=None,
        importer="bzr fast-import -",
        checkout="bzr checkout",
        preserve=set(),
        authormap=None,
        project="http://bazaar.canonical.com/en/",
        ignorename=".bzrignore",
        notes="Requires the bzr-fast-import plugin."),
    # Export is tested and works; import is flaky.
    VCS(name="hg",
        subdirectory=".hg",
        exporter="hg-fast-export.py --marks /dev/null --mapping /dev/null --heads /dev/null --status /dev/null --repo .",
        styleflags={"nl-after-comment",
                        "export-progress"},
        properties=False,
        initializer="hg init",
        lister="hg locate",
        importer="hg fastimport %(tempfile)s",
        checkout="hg checkout",
        preserve={".hg/hgrc"},
        authormap=None,
        ignorename=".hgignore",
        project="http://mercurial.selenic.com/",
        notes="The hg export-import methods are not part of stock Mercurial."),
    # Styleflags may need tweaking for round-tripping 
    VCS(name="darcs",
        subdirectory="_darcs",
        exporter="darcs fastconvert export",
        styleflags=set(),
        properties=False,
        initializer=None,
        lister="darcs show files",
        importer="darcs fastconvert import",
        checkout=None,
        preserve=set(),
        authormap=None,
        ignorename="_darcs/prefs/boring",
        project="http://darcs.net/",
        notes="Assumes no boringfile preference has been set."),
    # Export is experimental and doesn't round-trip
    VCS(name="svn",
        subdirectory="locks",
        exporter="svnadmin dump .",
        styleflags={"export-progress"},
        properties=False,
        initializer="svn create .",
        importer="svnadmin load .",
        checkout=None,
        lister=None,
        preserve={"hooks"},
        authormap=None,
        ignorename=None,
        project="http://subversion.apache.org/",
        notes="Run from the repository, not a checkout directory."),
    VCS(name="cvs",
        subdirectory="CVS",
        exporter="find . -name '*,v' -print | cvs-fast-export -k --reposurgeon",
        styleflags={"export-progress"},
        properties=False,
        initializer=None,
        importer=None,
        checkout=None,
        lister=None,
        preserve=set(),
        authormap=None,
        ignorename=None,
        project="http://www.catb.org/~esr/cvs-fast-export",
        notes="Requires cvs-fast-export."),
    VCS(name="rcs",
        subdirectory="RCS",
        exporter="find . -name '*,v' -print | cvs-fast-export -k --reposurgeon",
        styleflags={"export-progress"},
        properties=False,
        initializer=None,
        importer=None,
        checkout=None,
        lister=None,
        preserve=set(),
        authormap=None,
        ignorename=None,
        project="http://www.catb.org/~esr/cvs-fast-export",
        notes="Requires cvs-fast-export."),
    ]

# How to write extractor classes:
#
# Clone one of the existing ones and mutate.  
#
# Significant fact: None of the get_* methods for extracting information about
# a revision is called until after checkout has been called on that revision.
#
# Most methods take a native revision ID as argument. The value and type of the
# ID don't matter to any of the code that will call the extractor, except that
# IDs must be hashable so they can be dictionary keys.
#
# The 'name', 'subdirectory', and 'visible' members must be set. The
# subdirectory member is how an extractor recognizes what repositories
# it can consume.  If the visible member is false, the 'read' command
# will ignore the existence of the extractor.
#
# The strings returned by get_committer() and get_authors() should look like
#
# J. Random User <random@foobar> 2011-11-29T10:13:32Z
#
# that is, a free text name followed by an email ID followed by a date.
# The date specification can be anything Attribution() can parse; in
# particular, RFC3339 dates are good, so are RFC822 (email) dates,
# and so is git's native integer-Unix-timestamp/timezone pairs.

class GitExtractor:
    "Repository extractor for the git version-control system."
    # Regardless of what revision and branch was current at start,
    # after the git extractor runs the head revision on the master branch
    # will be checked out.
    #
    # The git extractor does not attempt to recover N ops,
    # symbolic links, gitlinks, or directory fileops.
    #
    # To be streamed, a git repo must have <emphasis>local</emphasis>
    # refs to all branches - in particular, local tracking branches
    # corresponding to all remotes.
    #
    # Some of these limitations could be fixed, but the git extractor
    # is not intended to replace git-fast-export; it only exists as a
    # test for the generic RepoStreamer code and a model for future
    # extractors.
    def __init__(self):
        # These must be set for every extractor class
        self.name = "git-extractor"
        self.subdirectory = ".git"
        self.visible = False
        self.properties = False
        self.ignorename = ".gitignore"
        # These are internal
        self.revlist = []
        self.parents = {}
        self.header = {}
        self.meta = {}
        self.tags = []
        self.refs = {}
        self.baton = None
    def analyze(self, baton):
        "Analyze a git repository for streaming."
        self.baton = baton
        # Get the topologically-ordered list of revisions and parent hashes
        with popen_or_die("git log --all --topo-order --reverse --format='%H %P'") as fp:
            for line in fp:
                fields = line.strip().split()
                self.revlist.append(fields[0])
                self.parents[fields[0]] = fields[1:]
        self.baton.twirl()
        # Next, all other per-commit data except branch IDs
        with popen_or_die("git log --all --reverse --date=raw --format='%H|%cn <%ce> %cd|%an <%ae> %ad'") as fp:
            for line in fp:
                (h, ci, ai) = line.strip().split('|')
                self.meta[h] = {'ci':ci, 'ai':ai}
        # Next, find all refs
        for root, dirs, files in os.walk(".git/refs"):
            for leaf in files:
                assert dirs is not None  # Pacify pylint
                ref = os.path.join(root, leaf)
                with open(ref) as fp:
                    self.refs[ref[5:]] = fp.read().strip()
        self.baton.twirl()
        # Next, grab all tag objects.
        with popen_or_die("git tag -l") as fp:
            for line in fp:
                tag = line.strip()
                with popen_or_die("git rev-parse %s" % tag) as fp:
                    taghash = fp.read().strip()
                # Annotated tags are first-class objects with their
                # own hashes.  The hash of a lightweight tag is just
                # the commit it points to. Handle both cases.
                objecthash = taghash
                with popen_or_die("git cat-file -p %s" % tag) as fp:
                    comment = None
                    tagger = None
                    for line in fp:
                        line = line.strip()
                        if line.startswith("tagger "):
                            tagger = line[len("tagger "):]
                        elif line.startswith("object"):
                            objecthash = line.split()[1]
                        elif comment is None and not line:
                            comment = ""
                        elif isinstance(comment, str):
                            comment += line + "\n"
                            if objecthash != taghash:
                                # committish isn't a mark; we'll fix that later
                                self.tags.append(Tag(None,
                                                     name=tag,
                                                     tagger=Attribution(tagger),
                                                     comment=comment,
                                                     committish=objecthash))
                    self.refs["refs/tags/" + tag] = objecthash
        self.baton.twirl()
        # Color branches in the order the tips occur.  Emulate the
        # git-export order.
        for refname, refobj in sorted(self.refs.iteritems(),
                                      key=lambda ref: self.revlist.index(ref[1])):
            self.__branch_color(refobj, refname)
        uncolored = [revision for revision in self.revlist if 'branch' not in self.meta[revision]]
        if uncolored:
            if verbose >= 1:
                raise Fatal("missing branch attribute for: %s" % uncolored)
            else:
                raise Fatal("some branches do not have local ref names.")
        self.baton.twirl()
    def __metadata(self, rev, fmt):
        with popen_or_die("git log -1 --format='%s' %s" % (fmt, rev)) as fp:
            return fp.read()[:-1]
    def __branch_color(self, rev, color):
        if rev.startswith("ref"):
            return
        while not 'branch' in self.meta[rev]:
            self.meta[rev]['branch'] = color
            parents = self.get_parents(rev)
            if not parents:
                break
            elif len(parents) == 1:
                # This case avoids blowing Python's stack by recursing
                # too deep on large repos.
                rev = parents[0]
            else:
                for parent in parents:
                    self.__branch_color(parent, color)
                break
    def pre_extract(self, repo):
        "Hook for any setup actions required before streaming."
        assert repo is not None  # Pacify pylint
    def post_extract(self, repo):
        for event in repo.commits():
            event.properties = collections.OrderedDict()
        os.system("git checkout --quiet master")
    def isclean(self):
        "Return True if repo has no unsaved changes."
        return not capture("git ls-files --modified")
    def get_revlist(self):
        "Return a list of commit ID strings in commit timestamp order."
        return self.revlist
    def get_taglist(self):
        "Return a list of tag name strings."
        return self.tags
    def iter_resets(self):
        "Return an iterator yielding (reset name, revision) pairs."
        return (item for item in self.refs.iteritems() if "/tags/" not in item[0])
    def checkout(self, rev, filemap):
        "Check the directory out to a specified revision."
        assert filemap is not None # pacify pylint
        os.system("git checkout --quiet %s" % rev)
        manifest = capture("git ls-files").split()
        return manifest
    def cleanup(self, rev, issued):
        "Cleanup after checkout."
        assert rev and (issued is not None) # Pacify pylint
    def get_parents(self, rev):
        "Return the list of commit IDs of a commit's parents."
        return self.parents[rev]
    def get_branch(self, rev):
        return self.meta[rev]['branch']
    def get_comment(self, rev):
        "Return a commit's change comment as a string."
        return self.__metadata(rev, "%B")
    def get_committer(self, rev):
        "Return the committer's ID/date as a string."
        return self.meta[rev]['ci']
    def get_authors(self, rev):
        "Return the author's name and email address as a string."
        return [self.meta[rev]['ai']]
    def get_properties(self, rev):
        "Return a list of properties for the commit."
        assert rev is not None # Pacify pylint
        return collections.OrderedDict()

# More extractors go here

extractors = [GitExtractor()]

verbose         = 0
DEBUG_SVNDUMP   = 2    # Debug Subversion dumping
DEBUG_TOPOLOGY  = 2    # Debug repo-extractor logic (coarse-grained)
DEBUG_EXTRACT   = 2    # Debug repo-extractor logic (fine-grained)
DEBUG_FILEMAP   = 3    # Debug building of filemaps
DEBUG_DELETE    = 3    # Debug canonicalization after deletes
DEBUG_IGNORES   = 3    # Debug ignore generation
DEBUG_SVNPARSE  = 4    # Lower-level Subversion parsing details
DEBUG_EMAILIN   = 4    # Debug event round-tripping through mailbox_{out|in} 
DEBUG_SHUFFLE   = 4    # Debug file and directory handling
DEBUG_COMMANDS  = 5    # Show commands as they are executed
DEBUG_UNITE     = 5    # Debug mark assignments in merging
DEBUG_LEXER     = 6    # Debug selection-language parsing
quiet = False

global_options = {}

def screenwidth():
    "Return the current width of the terminal window."
    with popen_or_die('stty size', 'r') as tp:
        return int(tp.read().split()[1])

def debug_enable(level):
    "Hook for debug filtering."
    return verbose >= level

def nuke(directory, legend):
    "Remove a (large) directory, with a progress indicator."
    with Baton(legend, enable=debug_enable(DEBUG_SHUFFLE)) as baton:
        for root, dirs, files in os.walk(directory, topdown=False):
            for name in files:
                os.remove(os.path.join(root, name))
                baton.twirl()
            for name in dirs:
                os.rmdir(os.path.join(root, name))
                baton.twirl()
    try:
        os.rmdir(directory)
    except OSError:
        pass

def rfc3339(t):
    "RFC3339 string from Unix time."
    return time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime(t))

def complain(msg):
    sys.stdout.flush()
    sys.stderr.write("reposurgeon: %s\n" % msg)
    sys.stderr.flush()

def announce(msg):
    sys.stdout.write("reposurgeon: %s\n" % msg)

def memoize_iterator(iterator_f, mem_attr = None):
    """From a class method returning an iterator, create
       one which caches the iterator results and replays
       them later. Arguments:
        - iterator_f: the *unbound* class method
        - mem_attr:   the name of the attribute on the class
                      instance that stores the cache
                      (default: _mem_attr_<function name>)
    """
    if mem_attr is None:
        mem_attr = "_mem_attr_" + iterator_f.__name__
    # Define the caching iterator
    def f(self):
        # Obtain the store or create a new one
        # The cache is
        #    - cache: a list containing all values already
        #             yielded by the iterator,
        #    - it:    the iterator, ready to yield the next
        #             uncached value, or already at its end.
        try:
            cache, it = getattr(self, mem_attr, None)
        except (TypeError, ValueError):
            cache = []; it = iterator_f(self)
            setattr(self, mem_attr, (cache, it))
        # Yield values from the list, enlarging the latter
        # if necessary. We use an infinite loop over all
        # integers; when there are no more values available
        # to enlarge the list, the call to next(it) will
        # raise StopIteration which will bubble through our
        # caller and tell him that we have reached our end.
        for pos in itertools.count():
            if len(cache) <= pos: cache.append(next(it))
            yield cache[pos]
    # update_wrapper ensures that f gets all interesting
    # attributes of iterator_f (especially the docstring)
    try:
        return functools.update_wrapper(f, iterator_f)
    except AttributeError:
        # Cython doesn't support setting name or docstring
        return f

def memoized_iterator(mem_attr = None):
    # This is curryification: the goal is that memoized_iterator(A)(f)
    # is equivalent to memoize_iterator(f, A). The reason is that
    # function decorators need to take only the function as argument.
    # In other words, memoized_iterator is a function factory.
    return functools.partial(memoize_iterator, mem_attr = mem_attr)

class Baton:
    "Ship progress indications to stdout."
    def __init__(self, prompt, endmsg='done', enable=False):
        self.prompt = prompt
        self.endmsg = endmsg
        self.countfmt = None
        self.counter = 0
        if enable:
            self.stream = sys.stdout
        else:
            self.stream = None
        self.count = 0
        self.time = 0
    def __enter__(self):
        if self.stream:
            self.stream.write(self.prompt + "...")
            if os.isatty(self.stream.fileno()):
                self.stream.write(" \010")
            self.stream.flush()
        self.count = 0
        self.time = time.time()
        return self
    def startcounter(self, countfmt, initial=1):
        self.countfmt = countfmt
        self.counter = initial
    def bumpcounter(self):
        if self.stream is None:
            return
        if os.isatty(self.stream.fileno()):
            if self.countfmt:
                update = self.countfmt % self.counter
                self.stream.write(update + ("\010" * len(update)))
                self.stream.flush()
            else:
                self.twirl()
        self.counter = self.counter + 1
    def endcounter(self):
        if self.stream:
            w = len(self.countfmt % self.count)
            self.stream.write((" " * w) + ("\010" * w))
            self.stream.flush()
        self.countfmt = None
    def twirl(self, ch=None):
        "One twirl of the baton."
        if self.stream is None:
            return
        if os.isatty(self.stream.fileno()):
            if ch:
                self.stream.write(ch)
                self.stream.flush()
                return
            else:
                update = "-/|\\"[self.count % 4]
                self.stream.write(update + ("\010" * len(update)))
                self.stream.flush()
        self.count = self.count + 1
    def __exit__(self, extype, value_unused, traceback_unused):
        if extype == KeyboardInterrupt:
            self.endmsg = "interrupted"
        if extype == Fatal:
            self.endmsg = "aborted by error"
        if self.stream:
            self.stream.write("...(%2.2f sec) %s.\n" \
                              % (time.time() - self.time, self.endmsg))
        return False

class RepoSurgeonEmail(email.message.Message, object):
    "Specialized email message with a distinguishing starter."
    Divider = 78 * "-"
    def __init__(self, **kwargs):
        email.message.Message.__init__(self, **kwargs)        
        self.set_unixfrom(RepoSurgeonEmail.Divider)
    @staticmethod
    def readmsg(fp):
        msg = ''
        firstline = fp.readline()
        if not firstline:
            return None
        elif not firstline.startswith(RepoSurgeonEmail.Divider):
            msg = firstline
        while True:
            line = fp.readline()
            if not line:
                break
            if line.startswith(RepoSurgeonEmail.Divider):
                break
            msg += line
        return msg
    def __str__(self):
        return super(RepoSurgeonEmail, self).as_string(unixfrom=True).replace("\n--", "\n.--")

class Date(object):
    "A time/date in UTC. Preserves the original TZ information and uses it to convert back when formatting."
    __slots__ = ("timestamp", "tz_offset", "orig_tz_string")
    date_re = re.compile(r"[0-9]+\s*[+-][0-9]+$")
    subsecond_re = re.compile(r"\.[0-9]+Z")
    offset_re = re.compile(r"^([-+]?)([0-9]{2})([0-9]{2})$")
    def __init__(self, text):
        "Recognize date formats that exporters or email programs might emit."
        # First, look for git's preferred format, which is a timestamp
        # in UTC followed by an offset to be used as a hint for what
        # timezone to display the date in when converting to other
        # formats
        text = text.strip()
        if Date.date_re.match(text):
            (self.timestamp, self.orig_tz_string) = text.split()
            self.tz_offset = Date.secondsFromOffsetString(self.orig_tz_string)
            self.timestamp = int(self.timestamp)
            return
        # If that didn't work, look for an RFC822 date, which git also
        # accepts. Note, there could be edge cases that Python's parser
        # handles but git doesn't.
        try:
            dt = email.utils.parsedate_tz(text)
            self.tz_offset = dt[9]
            self.timestamp = int(calendar.timegm(dt) - self.tz_offset)
            self.orig_tz_string = text.split()[5]
            return
        except TypeError:
            # time.mktime throws this when it gets None:
            # TypeError: argument must be 9-item sequence, not None
            pass
        # Also accept RFC3339 dates in Zulu time, just because I like them.
        try:
            # Discard subsecond precision, import-stream format can't use it.
            text = re.sub(Date.subsecond_re, "Z", text)
            rfc3339date = time.strptime(text, "%Y-%m-%dT%H:%M:%SZ")
            self.timestamp = calendar.timegm(rfc3339date)
            self.orig_tz_string = "+0000"
            self.tz_offset = 0
            return
        except ValueError:
            # time.strptime() throws this
            # "time data 'xxxxxx' does not match format '%Y-%m-%dT%H:%M:%S'" 
            pass
        # Date format not recognized
        raise Fatal("'%s' is not a valid timestamp" % text)
    @staticmethod
    def secondsFromOffsetString(text):
        m = re.match(Date.offset_re, text)
        if m is not None:
            sign = -1 if m.group(1) == "-" else 1
            hours = int(m.group(2))
            mins = int(m.group(3))
            if hours < -14 or hours > 13 or mins > 59:
                complain("dubious UTC offset '%s'." % text)
            return (hours * 60 + mins) * 60 * sign
        else:
            complain("invalid UTC offset '%s', assuming +0000 instead." % text)
            return 0
    def rfc3339(self):
        return rfc3339(self.timestamp)
    def rfc822(self):
        "Format as an RFC822 timestamp."
        return time.strftime("%a %d %b %Y %H:%M:%S", time.gmtime(self.timestamp + self.tz_offset)) + " " + self.orig_tz_string
    def delta(self, other):
        return other.timestamp - self.timestamp
    @staticmethod
    def tzresolve(tz):
        "Hacky way to beat the Unix timezone database into resolving TZ names."
        if tz[0] in "+-":
            return tz
        oldtz = os.getenv("TZ")
        try:
            os.putenv("TZ", tz)
            time.tzset()
            now = int(time.time())
            tm = time.localtime(int(now))
            os.putenv("TZ", "UTC")
            time.tzset()
            seconds = int(time.mktime(tm)) - now
        finally:
            os.putenv("TZ", oldtz or "")
            time.tzset()
        if str(seconds).startswith("-"):
            sgn = "-"
            seconds = -seconds
        else:
            sgn = "+"
        return sgn + ("%02d" % (seconds / 3600)) + str("%02d" % abs(seconds % 3600))
    def __str__(self):
        "Format as a git timestamp."
        return str(self.timestamp) + " " + self.orig_tz_string
    def __eq__(self, other):
        return self.timestamp == other.timestamp
    def __ne__(self, other):
        return self.timestamp != other.timestamp
    def __lt__(self, other):
        return self.timestamp < other.timestamp

class DateTests(unittest.TestCase):
    def test_conversion(self):
        def do_test(init, formats):
            date = Date(init)
            for (func, result) in formats.iteritems():
                self.assertEqual(getattr(date, func)(), result)
        data = [['2010-10-27T18:43:32Z',
                 { 'rfc3339': "2010-10-27T18:43:32Z",
                   'rfc822': "Wed 27 Oct 2010 18:43:32 +0000",
                   '__str__': "1288205012 +0000" }],
                ['1288205012 +0000',
                 { 'rfc3339': "2010-10-27T18:43:32Z",
                   'rfc822': "Wed 27 Oct 2010 18:43:32 +0000",
                   '__str__': "1288205012 +0000" }],
                ['Wed 27 Oct 2010 18:43:32 +0000',
                 { 'rfc3339': "2010-10-27T18:43:32Z",
                   'rfc822': "Wed 27 Oct 2010 18:43:32 +0000",
                   '__str__': "1288205012 +0000" }]]
        for init, formats in data:
            do_test(init, formats)
    def test_equality(self):
        d1 = Date('2010-10-27T18:43:32Z')
        d2 = Date('1288205012 +0000')
        d3 = Date('Wed 27 Oct 2010 18:43:32 +0000')
        self.assertEqual(d1, d1)
        self.assertEqual(d1, d2)
        self.assertEqual(d1, d3)
        self.assertEqual(d2, d2)
        self.assertEqual(d2, d3)
        self.assertEqual(d3, d3)
        self.assertEqual(Date.tzresolve("EST"), "-0500")
        self.assertEqual(Date.tzresolve("-0500"), "-0500")
        self.assertIn(Date.tzresolve("Europe/Warsaw"), ("+0100", "+0200"))
    def test_inequality(self):
        d1 = Date('Wed 27 Oct 2010 18:43:32 +0000')
        d2 = Date('Wed 27 Oct 2010 18:43:33 +0000')
        d3 = Date('Wed 27 Oct 2010 18:43:32 +0100')
        self.assertNotEqual(d1, d2)
        self.assertTrue(d1 < d2)
        self.assertTrue(d2 > d1)
        self.assertNotEqual(d1, d3)
        self.assertTrue(d1 > d3)
        self.assertTrue(d3 < d1)
        self.assertNotEqual(d2, d3)
        self.assertTrue(d2 > d3)
        self.assertTrue(d3 < d2)
        d1 = Date('2010-10-27T18:43:32Z')
        d2 = Date('2010-10-27T18:43:33Z')
        self.assertNotEqual(d1, d2)
        self.assertTrue(d1 < d2)
        self.assertTrue(d2 > d1)
        d1 = Date('1288205012 +0000')
        d2 = Date('1288205013 +0000')
        self.assertNotEqual(d1, d2)
        self.assertTrue(d1 < d2)
        self.assertTrue(d2 > d1)
    def test_deltas(self):
        d1 = Date('Wed 27 Oct 2010 18:43:32 +0000')
        d2 = Date('Wed 27 Oct 2010 18:43:33 +0000')
        d3 = Date('Wed 27 Oct 2010 18:43:32 +0100')
        self.assertEqual(d1.delta(d2), 1)
        self.assertEqual(d2.delta(d3), -3601)
        self.assertEqual(d3.delta(d1), 3600)
        self.assertEqual(d1.delta(d1), 0)

class Attribution(object):
    "Represents an attribution of a repo action to a person and time."
    __slots__ = ("name", "email", "date")
    def __init__(self, person=None):
        self.name = self.email = self.date = None
        if person:
            # Deal with a cvs2svn artifact
            person = person.replace("(no author)", "no-author")
            # First, validity-check the email address
            (self.name, self.email) = email.utils.parseaddr(person)
            if not self.email:
                raise Fatal("can't recognize address in attribution %s" % person)
            # Attribution format is actually stricter than RFC822;
            # needs to have a following date in the right place.
            person = person.replace(" <", "|").replace("> ", "|")
            try:
                self.date = Date(person.strip().split("|")[2])
            except (ValueError, IndexError):
                raise Fatal("malformed attribution date in %s" % person)
    def email_out(self, _modifiers, msg, hdr):
        "Update an RC822 message object with a representation of this."
        msg[hdr] = self.name + " <" + self.email + ">"
        msg[hdr + "-Date"] = self.date.rfc822()
    def remap(self, authors):
        "Remap the attribution name."
        for (local, (name, mail, timezone)) in authors.iteritems():
            if self.email.lower().startswith(local + "@") or self.email.lower() == local:
                self.name = name
                self.email = mail
                if timezone:
                    self.date.orig_tz_string = timezone
                break
    def action_stamp(self):
        return self.date.rfc3339() + "!" + self.email
    def __eq__(self, other):
        "Compare attributions after canonicalization."
        return (self.name == other.name
                and self.email == other.email
                and self.date == other.date)
    def who(self):
        return self.name + " <" + self.email + ">"
    def __str__(self):
        return self.name + " <" + self.email + "> " + str(self.date)

class Blob(object):
    "Represent a detached blob of data referenced by a mark."
    __slots__ = ("repo", "mark", "paths", "colors", "cookie", "start", "size", "deletehook")
    def __init__(self, repo=None):
        self.repo = repo
        self.mark = None
        self.paths = []      # Set of in-repo paths associated with this blob
        self.colors = []
        self.cookie = None
        self.start = None
        self.size = 0
        self.deletehook = None
    def blobfile(self, create=False):
        "File where the content lives."
        stem = repr(id(self))
        parts = ("blobs", stem[:3], stem[3:6], stem[6:]) 
        if create:
            for d in range(len(parts)-1):
                partial = os.path.join(self.repo.subdir(), *parts[:d+1])
                if not os.path.exists(partial):
                    os.mkdir(partial)
        return os.path.join(self.repo.subdir(), *parts)
    def hasfile(self):
        "Does this blob have its own file?"
        return not self.repo.seekstream or self.start is None
    def materialize(self):
        "Materialize this content as a separate file, if it isn't already."
        if not self.hasfile():
            self.set_content(self.get_content())
        return self.blobfile()
    def get_content(self):
        "Get the content of the blob as a string."
        if not self.hasfile():
            self.repo.seekstream.seek(self.start)
            return self.repo.seekstream.read(self.size)
        elif global_options["compressblobs"]:
            with bz2.BZ2File(self.blobfile(), "r") as rfp:
                return rfp.read()
        else:
            with open(self.blobfile(), "r") as rfp:
                return rfp.read()
    def set_mark(self, mark):
        "Set the blob's mark."
        self.mark = mark
        self.repo._mark_to_object[mark] = self
        return mark
    def forget(self):
        "De-link this commit from its repo."
        self.repo = None
    def set_content(self, text, tell=None):
        "Set the content of the blob from a string."
        self.start = tell
        self.size = len(text)
        if self.hasfile():
            if global_options["compressblobs"]:
                with bz2.BZ2File(self.blobfile(create=True), "w") as wfp:
                    return wfp.write(text)
            else:
                with open(self.blobfile(create=True), "w") as wfp:
                    wfp.write(text)
    def moveto(self, repo):
        "Change the repo this blob is associated with."
        if self.hasfile():
            oldloc = self.blobfile()
            self.repo = repo
            newloc = self.blobfile()
            if debug_enable(DEBUG_SHUFFLE):
                announce("blob rename calls os.rename(%s, %s)" % (oldloc, newloc))
            os.rename(oldloc, newloc)
        return self
    def clone(self, repo):
        "Clone a copy of this blob, pointing at the same file."
        c = copy.copy(self)
        c.repo = repo
        c.colors = []
        if debug_enable(DEBUG_SHUFFLE):
            announce("blob clone for %s (%s) calls os.link()" % (self.mark, self.paths))
        if self.hasfile():
            os.link(self.blobfile(), c.blobfile(create=True))
        return c
    def dump(self, vcs=None):
        if self.hasfile() and not os.path.exists(self.blobfile()):
            return ''
        else:
            content = self.get_content()
            if vcs is None and self.repo.vcs and self.repo.vcs.importer:
                vcs = self.repo.vcs
            # Ugh.  This is where we mess with ignore syntax translation
            if vcs and self.repo.vcs and len(self.paths) == 1 and self.paths[0].endswith(".gitignore"):
                if vcs.name == "hg" and self.repo.vcs.name != "hg":
                    if not content.startswith("syntax: glob\n"):
                        content = "syntax: glob\n" + content
            return "blob\nmark %s\ndata %d\n%s\n" % (self.mark, len(content), content)
    def __str__(self):
        return self.dump()

class Tag(object):
    "Represents an annotated tag."
    __slots__ = ("repo", "name", "color", "committish",
                 "target", "tagger", "comment", "deletehook")
    def __init__(self, repo=None,
                 name=None, committish=None, target=None, tagger=None, comment=None):
        self.repo = None
        self.name = name
        self.color = None
        self.committish = None
        self.target = None
        self.remember(repo, committish=committish, target=target)
        self.tagger = tagger
        self.comment = comment
        self.deletehook = None
    def remember(self, repo, committish=None, target=None):
        "Remember an attachment to a repo and commit."
        self.repo = repo
        if target is not None:
            self.target = target
            self.committish = target.mark
        else:
            self.committish = committish
            if self.repo:
                self.target = self.repo.objfind(self.committish)
        if self.target:
            self.target.attachments.add(self)
    def forget(self):
        "Forget this tag's attachment to its commit and repo."
        if self.target:
            self.target.attachments.discard(self)
            self.target = None
        self.repo = None
    def index(self):
        "Our 0-origin index in our repo."
        return self.repo.index(self)
    def id_me(self):
        "ID this tag for humans."
        return "tag@%d (%s)" % (self.index()+1, self.name)
    def email_out(self, modifiers, eventnum):
        "Enable do_mailbox_out() to report these."
        msg = RepoSurgeonEmail()
        msg["Event-Number"] = str(eventnum+1)
        msg["Tag-Name"] = self.name
        if self.tagger:
            self.tagger.email_out(modifiers, msg, "Tagger")
        msg.set_payload(self.comment)
        if self.comment and not self.comment.endswith("\n"):
            complain("in tag %s, comment was not LF-terminated." % self.name)
        return str(msg)
    def email_in(self, msg):
        "Update this Tag from a parsed email message."
        if "Tag-Name" not in msg:
            raise Fatal("update to tag %s is malformed" % self.name)
        modified = False
        newname = msg["Tag-Name"]
        if self.name != newname:
            if debug_enable(DEBUG_EMAILIN):
                announce("in tag %d, Tag-Name is modified %s -> %s" \
                      % (int(msg["Event-Number"]), repr(self.name), repr(newname)))
            self.name = newname
            modified = True
        if "Tagger" in msg:
            (newname, newemail) = email.utils.parseaddr(msg["Tagger"])
            if not newname or not newemail:
                raise Fatal("can't recognize address in Tagger: %s" % msg['Tagger'])
            else:
                if self.tagger.name != newname or self.tagger.email != newemail:
                    (self.tagger.name, self.tagger.email) = (newname, newemail)
                    if debug_enable(DEBUG_EMAILIN):
                        announce("in tag %d, Tagger is modified" \
                              % (int(msg["Event-Number"])))
                    modified = True
            if "Tagger-Date" in msg:
                date = Date(msg["Tagger-Date"])
                if self.tagger.date is None or date != self.tagger.date:
                    # Yes, display this unconditionally
                    if self.repo:
                        announce("in %s, Tagger-Date is modified '%s' -> '%s' (delta %d)" \
                             % (self.id_me(),
                                self.tagger.date, date,
                                self.tagger.date.delta(date)))
                    self.tagger.date = date
                    modified = True
        newcomment = msg.get_payload()
        if global_options["canonicalize"]:
            newcomment = newcomment.strip().replace("\r\n", "\n") + '\n'
        if newcomment != self.comment:
            if debug_enable(DEBUG_EMAILIN):
                announce("in tag %d, comment is modified %s -> %s" \
                      % (int(msg["Event-Number"]), repr(self.comment), repr(newcomment)))
            modified = True
            self.comment = newcomment
        return modified
    def dump(self, _vcs=None):
        "Dump this tag in import-stream format."
        parts = ["tag %s\nfrom %s\n" % (self.name, self.committish)]
        if self.tagger:
            parts.append("tagger %s\n" % self.tagger)
        parts.append("data %d\n%s\n" % (len(self.comment or ""), self.comment or ""))
        return "".join(parts)
    def __str__(self):
        return self.dump()

class Reset(object):
    "Represents a branch creation."
    __slots__ = ("repo", "ref", "committish", "target", "deletehook", "color")
    def __init__(self, repo, ref=None, committish=None, target=None):
        self.repo = None
        self.ref = ref
        self.committish = None
        self.target = None
        self.remember(repo, committish=committish, target=target)
        self.deletehook = None
        self.color = None
    def remember(self, repo, committish=None, target=None):
        "Remember an attachment to a repo and commit."
        self.repo = repo
        if target is not None:
            self.target = target
            self.committish = target.mark
        else:
            self.committish = committish
            if self.repo:
                self.target = self.repo.objfind(self.committish)
        if self.target:
            self.target.attachments.add(self)
    def forget(self):
        "Forget this reset's attachment to its commit and repo."
        if self.target:
            self.target.attachments.discard(self)
            self.target = None
        self.repo = None
    def moveto(self, repo):
        "Change the repo this reset is associated with."
        self.repo = repo
    def dump(self, _vcs=None):
        "Dump this reset in import-stream format."
        st = "reset %s\n" % self.ref
        if not self.committish:
            return st
        return st + "from %s\n\n" % self.committish
    def __str__(self):
        return self.dump()

class FileOp(object):
    "Represent a primitive operation on a file."
    __slots__ = ("vcs", "op", "committish", "source", "target",
                 "mode", "path", "ref", "inline",
                 "sourcedelete", "targetdelete")
    modify_re = re.compile(r"(M) ([0-9]+) (\S+) (.*)")
    sortkey_sentinel = chr(ord("/") + 1)
    def __init__(self, vcs=None):
        self.vcs = vcs
        self.op = None
        self.committish = None
        self.source = None
        self.target = None
        self.mode = None
        self.path = None
        self.ref = None
        self.inline = None
    def path_remap_in(self):
        "Hack the fileop's basename to map it to git conventions."
        # Ignore file names from non-git VCSes need to get
        # mapped to .gitignore, because we have to
        # have some way to recognize what they are
        # in order to remap the name properly on
        # export.
        if self.vcs is not None:
            if os.path.basename(self.path) == self.vcs.ignorename:
                self.path = os.path.join(os.path.dirname(self.path), ".gitignore")
    def path_remap_out(self, path, vcs):
        "Hack the fileop's basename to map it to a target VCS's conventions."
        if vcs is not None and vcs.ignorename is not None:
            if os.path.basename(path) == ".gitignore":
                return os.path.join(os.path.dirname(path), vcs.ignorename)
        return path
    def setOp(self, op):
        self.op = op
    @staticmethod
    def sortkey(fileop):
        "Compute a key suited for sorting FileOps as git fast-export does."
        # As it says, 'Handle files below a directory first, in case they are
        # all deleted and the directory changes to a file or symlink.'
        # First sort the renames last, then sort lexicographically
        # We append a sentinel to make sure "a/b/c" < "a/b" < "a".
        return (fileop.op == "R",
                (fileop.path or fileop.source or "") + \
                        fileop.sortkey_sentinel)
    def construct(self, *opargs):
        if opargs[0] == "M":
            (self.op, self.mode, self.ref, self.path) = opargs
            self.path_remap_in()
            if isinstance(self.mode, int):
                self.mode = "%06o" % self.mode
        elif opargs[0] == "D":
            (self.op, self.path) = opargs
            self.path_remap_in()
        elif opargs[0] == "N":
            (self.op, self.ref, self.committish) = opargs
        elif opargs[0] in ("R", "C"):
            (self.op, self.source, self.target) = opargs
        elif opargs[0] == "deleteall":
            self.setOp("deleteall")
        else:
            raise Fatal("unexpected fileop %s" % opargs[0])
    def parse(self, opline):
        if opline.startswith("M"):
            m = FileOp.modify_re.match(opline)
            if not m:
                raise Fatal("bad format of M line: %s" % repr(opline))
            (self.op, self.mode, self.ref, self.path) = m.groups()
            if self.path[0] == '"' and self.path[-1] == '"':
                self.path = self.path[1:-1]
            self.path_remap_in()
        elif opline[0] == "N":
            try:
                opline = opline.replace("'", r"\'")
                (self.op, self.ref, self.committish) = shlex.split(opline)
            except ValueError:
                raise Fatal("ill-formed fileop %s" % repr(opline))
        elif opline[0] == "D":
            (self.op, self.path) = ("D", opline[2:].strip())
            if self.path[0] == '"' and self.path[-1] == '"':
                self.path = self.path[1:-1]
            self.path_remap_in()
        elif opline[0] in ("R", "C"):
            try:
                opline = opline.replace("'", r"\'")
                (self.op, self.source, self.target) = shlex.split(opline)
            except ValueError:
                raise Fatal("ill-formed fileop %s" % repr(opline))
        elif opline == "deleteall":
            self.op = "deleteall"
        else:
            raise Fatal("unexpected fileop %s while parsing" % opline)
        return self
    def paths(self):
        "Return the set of all paths touched by this file op."
        if self.op in ("M", "D"): return {self.path}
        if self.op in ("R", "C"): return {self.source, self.target}
        # Ugh...this isn't right for deleteall, but since we don't expect
        # to see that except at branch tips we'll ignore it for now.
        if self.op in ("N", "deleteall"): return set()
        raise Fatal("unknown fileop type")
    def relevant(self, other):
        "Do two fileops touch the same file(s)?"
        if self.op == "deleteall" or other.op == "deleteall":
            return True
        else:
            return self.paths() & other.paths()
    def dump(self, vcs=None):
        "Dump this fileop in import-stream format."
        if self.op == "M":
            showmode = self.mode
            if isinstance(self.mode, int):
                showmode = "%06o" % self.mode
            parts = [" ".join((self.op, showmode, self.ref)), " "]
            if len(self.path.split()) > 1:
                parts.extend(('"', self.path, '"'))
            else:
                parts.append(self.path_remap_out(self.path, vcs))
            if self.ref == 'inline':
                parts.append("\ndata %d\n%s" % (len(self.inline), self.inline))
        elif self.op == "N":
            parts = [" ".join((self.op, self.ref, self.committish)), "\n"]
            if self.ref == 'inline':
                parts.append("data %d\n%s" % (len(self.inline), self.inline))
        elif self.op == "D":
            parts = ["D "]
            if len(self.path.split()) > 1:
                parts.extend(('"', self.path, '"'))
            else:
                parts.append(self.path_remap_out(self.path, vcs))
        elif self.op in ("R", "C"):
            parts = ['%s "%s" "%s"' %  (self.op,
                                    self.path_remap_out(self.source, vcs),
                                    self.path_remap_out(self.target, vcs))]
        elif self.op == "deleteall":
            parts = [self.op]
        else:
            raise Fatal("unexpected fileop %s while writing" % self.op)
        return "".join(parts)
    def __str__(self):
        return self.dump(self.vcs)

class Commit(object):
    "Generic commit object."
    __slots__ = ("repo", "mark", "authors", "committer", "comment",
                 "branch", "fileops", "properties", "filemap", "color",
                 "fossil_id", "common", "splits", "deletehook", "attachments",
                 "_parent_nodes", "_child_nodes", "_pathset")
    def __init__(self, repo=None):
        self.repo = repo
        self.mark = None             # Mark name of commit (may be None)
        self.authors = []            # Authors of commit
        self.committer = None        # Person responsible for committing it.
        self.comment = None          # Commit comment
        self.branch = None           # branch name
        self.fileops = []            # blob and file operation list
        self.properties = collections.OrderedDict()         # commit properties (extension)
        self.filemap = {}
        self.color = None
        self.fossil_id = None        # Commit's ID in an alien system
        self.common = None           # Used only by the Subversion parser
        self.splits = None           # split command increments this
                                     # to avoid creating multiple new commits
                                     # with duplicate marks
        self.deletehook = None	     # Hook used during deletion operations
        self.attachments = set()
        self._parent_nodes = []      # list of parent nodes
        self._child_nodes = []       # list of child nodes
        self._pathset = None
    def index(self):
        "Our 0-origin index in our repo."
        return self.repo.index(self)
    def id_me(self):
        "ID this commit for humans."
        myid = "commit@%d" % (self.index()+1)
        if self.fossil_id:
            myid += "=<%s>" % self.fossil_id
        return myid
    def when(self):
        "Imputed timestamp for sorting after unites."
        return self.committer.date.timestamp
    def moveto(self, repo):
        "Change the repo this commit is associated with."
        self.repo = repo
    def set_branch(self, branch):
        "Set the repo's branch field, optimizing for fast comparisons."
        self.branch = intern(branch)
    def clone(self, repo=None):
        "Clone this commit, without its fileops, color and children."
        c = copy.copy(self)
        c.committer = copy.deepcopy(self.committer)
        c.authors = copy.deepcopy(self.authors)
        c.fileops = []
        c._pathset = None
        c.color = None
        if repo is not None:
            c.repo = repo
        c._child_nodes = []
        # use the encapsulation to set parents instead of relying
        # on the copy, so that Commit can do its bookkeeping.
        c._parent_nodes = [] # avoid confusing set_parents()
        c.set_parents(list(self.parents()))
        return c
    def showfossil(self):
        "Show a fossil ID in the expected form for the ancestral system."
        if not self.fossil_id:
            return None
        # Special case for Subversion
        if self.repo and self.repo.vcs and self.repo.vcs.name == "svn":
            return "r" + self.fossil_id
        else:
            return self.fossil_id
    def lister(self, _modifiers, eventnum, cols):
        "Enable do_list() to report commits."
        topline = self.comment.split("\n")[0]
        summary = "%6d %s %6s " % \
                      (eventnum+1, self.committer.date.rfc3339(), self.mark)
        if self.fossil_id:
            fossil = "<%s>" % self.fossil_id
            summary += "%6s " % fossil
        return (summary + topline)[:cols]
    def tip(self, _modifiers, eventnum, cols):
        "Enable do_tip() to report deduced branch tips."
        summary = "%6d %s %6s " % \
                      (eventnum+1, self.committer.date.rfc3339(), self.mark)
        return (summary + self.head())[:cols]
    def tags(self, _modifiers, eventnum, cols):
        "Enable do_list() to report lightweight tags."
        assert cols > -1    # pacify pylint
        return self.branch and "/tags/" in self.branch and "%6d %s" % (eventnum+1, self.branch) 
    def email_out(self, modifiers, eventnum):
        "Enable do_mailbox_out() to report these."
        msg = RepoSurgeonEmail()
        msg["Event-Number"] = str(eventnum+1)
        msg["Branch"] = self.branch
        msg["Parents"] = " ".join(self.parent_marks())
        if self.authors:
            self.authors[0].email_out(modifiers, msg, "Author")
            for (i, coauthor) in enumerate(self.authors[1:]):
                coauthor.email_out(msg, "Author" + repr(2+i))
        self.committer.email_out(modifiers, msg, "Committer")
        if self.fossil_id:
            msg["Fossil-ID"] = self.fossil_id
        for (name, value) in self.properties.iteritems():
            hdr = "-".join(s.capitalize() for s in name.split("-"))
            value = value.replace("\n", r"\n")
            value = value.replace("\t", r"\t")
            msg["Property-" + hdr] = value
        msg.set_payload(self.comment)
        if not self.comment.endswith("\n"):
            complain("in commit %s, comment was not LF-terminated." % self.mark)
        return str(msg)
    def action_stamp(self):
        "Control how a commit stamp is made."
        # Prefer the primary author to the committer because it
        # doesn't get messed with when passing around and applying
        # patch sets.
        if self.authors:
            return self.authors[0].action_stamp()
        else:
            return self.committer.action_stamp()
    def email_in(self, msg):
        "Update this commit from a parsed email message."
        modified = False
        if "Branch" in msg:
            if self.branch != msg["Branch"]:
                modified = True
            self.set_branch(msg["Branch"])
        if "Parents" in msg:
            if self.parent_marks() != msg["Parents"].split():
                modified = True
            self.set_parent_marks(msg["Parents"].split())
        if "Committer" in msg:
            (newname, newemail) = email.utils.parseaddr(msg["Committer"])

            if not newemail:
                raise Fatal("can't recognize address in Committer: %s" % msg["Committer"])
            else:
                if self.committer.name != newname or self.committer.email != newemail:
                    (self.committer.name, self.committer.email) = (newname, newemail)
                    # Yes, display this unconditionally
                    if self.repo:
                        announce("in %s, Committer is modified" % self.id_me())
                    modified = True
        if "Committer-Date" in msg:
            date = Date(msg["Committer-Date"])
            if self.committer.date is None or date != self.committer.date:
                # Yes, display this unconditionally
                if self.repo:
                    announce("in %s, Committer-Date is modified '%s' -> '%s' (delta %d)" \
                          % (self.id_me(),
                             self.committer.date, date,
                             self.committer.date.delta(date)))
                self.committer.date = date
                modified = True
        if "Author" in msg:
            author_re = re.compile("Author[0-9]*$")
            # Potential minor bug here if > 10 authors;
            # lexicographic sort order doesn't match numeric
            # msg is *not* a dict so the .keys() is correct
            authorkeys = sorted(filter(author_re.match, msg.keys()))
            for i in range(len(authorkeys) - len(self.authors)):
                self.authors.append(Attribution())
            # Another potential minor bug: permuting the set of authors
            # will look like a modification, as old and new authors are
            # compaired pairwise rather than set equality being checked.
            # Possibly a feature if one thinks order is significant, but
            # I just did it this way because it was easier.
            for (i, hdr) in enumerate(authorkeys):
                (newname, newemail) = email.utils.parseaddr(msg[hdr])
                if not newemail:
                    raise Fatal("can't recognize address in %s: %s" % (hdr, msg[hdr]))
                else:
                    if self.authors[i].name != newname or self.authors[i].email != newemail:
                        (self.authors[i].name, self.authors[i].email) = (newname, newemail)
                        if debug_enable(DEBUG_EMAILIN):
                            announce("in commit %s, Author #%d is modified" \
                                  % (msg["Event-Number"], i+1))
                        modified = True
                if hdr + "-Date" in msg:
                    date = Date(msg[hdr + "-Date"])
                    if date != self.authors[i].date:
                        # Yes, display this unconditionally
                        if self.repo:
                            announce("in event %s, %s-Date #%d is modified" \
                                     % (msg["Event-Number"], hdr, i+1))
                        self.authors[i].date = date
                        modified = True
        if "Fossil-ID" in msg:
            if msg["Fossil-ID"] != self.fossil_id:
                modified = True
                msg["Fossil-ID"] = self.fossil_id
        newprops = collections.OrderedDict()
        for prophdr in msg.keys():
            if not prophdr.startswith("Property-"): continue
            propkey = prophdr[9:].lower()
            propval = msg[prophdr]
            if propval == "True":
                propval = True
            elif propval == "False":
                propval = False
            else:
                propval = propval.replace(r"\n", "\n")
                propval = propval.replace(r"\t", "\t")
            newprops[propkey] = propval
        modified |= (newprops != self.properties)
        self.properties = newprops
        newcomment = msg.get_payload()
        if global_options["canonicalize"]:
            newcomment = newcomment.strip() + '\n'
        if newcomment != self.comment:
            if debug_enable(DEBUG_EMAILIN):
                announce("in %s, comment is modified %s -> %s" \
                      % (self.id_me(), repr(self.comment), repr(newcomment)))
            modified = True
            self.comment = newcomment
        return modified
    def set_mark(self, mark):
        "Set the commit's mark."
        self.mark = mark
        self.repo._mark_to_object[mark] = self
        return mark
    def forget(self):
        "De-link this commit from its parents."
        self.set_parents([])
        self.repo = None
    # Hide the parent list behind an interface, so that we can memoize
    # the computation, which is very expensive and frequently
    # performed.
    def parents(self):
        "Get a list of this commit's parents."
        return self._parent_nodes
    def parent_marks(self):
        return [x.mark for x in self._parent_nodes]
    def set_parent_marks(self, marks):
        self.set_parents([self.repo.objfind(x) for x in marks])
    def set_parents(self, parents):
        for parent in self._parent_nodes:
            # remove all occurences of self in old parent's children cache
            parent._child_nodes = [n for n in parent._child_nodes if n is not self]
        self._parent_nodes = parents
        assert all(self._parent_nodes)
        for parent in self._parent_nodes:
            parent._child_nodes.append(self)
    def add_parent(self, mark):
        if isinstance(mark, Commit):
            newparent = mark
        else:
            newparent = self.repo.objfind(mark)
        assert(newparent)
        self._parent_nodes.append(newparent)
        newparent._child_nodes.append(self)
    def insert_parent(self, idx, mark):
        newparent = self.repo.objfind(mark)
        assert(newparent)
        self._parent_nodes.insert(idx, newparent)
        newparent._child_nodes.append(self)
    def remove_parent(self, event):
        # remove *all* occurences of event in parents
        self._parent_nodes = [n for n in self._parent_nodes if n is not event]
        # and all occurences of self in events children
        event._child_nodes = [n for n in event._child_nodes if n is not self]
    def replace_parent(self, e1, e2):
        self._parent_nodes[self._parent_nodes.index(e1)] = e2
        e1._child_nodes.remove(self)
        e2._child_nodes.append(self)
    def has_parents(self):
        return bool(self._parent_nodes)
    def children(self):
        "Get a list of this commit's children."
        return self._child_nodes
    def has_children(self):
        "Predicate - does this commit have children?"
        return bool(self._child_nodes)
    def first_child(self):
        "Get the first child of this commit, or None if not has_children()."
        return self._child_nodes[0]
    def descended_from(self, other):
        "Is this commit a descendent of the specified other?"
        if not self.has_parents() or self.committer.date < other.committer.date:
            return False
        elif other in self.parents():
            return True
        else:
            return any(parent.descended_from(other) \
                        for parent in self.parents())
    def cliques(self):
        "Return a dictionary mapping filenames to associated M cliques."
        cliques = collections.defaultdict(list)
        for (i, fileop) in enumerate(self.fileops):
            if fileop.op == "M": cliques[fileop.path].append(i)
        return cliques
    def fileop_dump(self):
        "Dump file ops without data or inlines; used for debugging only."
        print("commit %d, mark %s:" % (self.repo.find(self.mark)+1, self.mark))
        for (i, op) in enumerate(self.fileops):
            if op is not None:
                print("%d: %-20s" % (i, str(op)))
    def paths(self):
        "Return the set of all paths touched by this commit."
        if self._pathset is None:
            self._pathset = set()
            for fileop in self.fileops:
                self._pathset |= fileop.paths()
        return self._pathset
    def manifest(self):
        "Return a map from paths to marks for files existing at this commit."
        sys.setrecursionlimit(len(self.repo.events) * 2)
        return self._manifest()
    def _manifest(self):
        if self.filemap:
            return self.filemap
        # Get the first parent manifest, or an empty one.
        try:
            ancestors = self.parents()[0]._manifest().snapshot()
        except IndexError:
            ancestors = PathMap()
        # Take own fileops into account.
        for fileop in self.fileops:
            if fileop.op == 'M':
                ancestors[fileop.path] = fileop.ref
            elif fileop.op == 'D':
                if fileop.path in ancestors:
                    del ancestors[fileop.path]
            elif fileop.op == 'C':
                ancestors[fileop.target] = ancestors[fileop.source]
            elif fileop.op == 'R':
                ancestors[fileop.target] = ancestors[fileop.source]
                if fileop.source in ancestors:
                    del ancestors[fileop.source]
            elif fileop.op == 'deleteall':
                ancestors = PathMap()
        self.filemap = ancestors
        return ancestors
    def alldeletes(self, killset={"D", "deleteall"}):
        "Is this an all-deletes commit?"
        return all(fileop.op in killset for fileop in self.fileops)
    def checkout(self, directory=None):
        "Make a directory with links to files in a specified checkout."
        if not directory:
            directory = os.path.join(self.repo.subdir(), self.mark)
        try:
            os.mkdir(directory)
            for (path, mark) in self.manifest().iteritems():
                fullpath = os.path.join(directory, path)
                fulldir = os.path.dirname(fullpath)
                if not os.path.exists(fulldir):
                    os.makedirs(fulldir)
                blob = self.repo.objfind(mark)
                if blob.hasfile():
                    os.link(blob.blobfile(), fullpath)
                else:
                    with open(fullpath, "w") as wfp:
                        wfp.write(blob.get_content())
        except OSError:
            raise Recoverable("could not create checkout directory or files.")
        return directory
    def head(self):
        "Return the branch to which this commit belongs."
        if self.branch.startswith("refs/heads/") or not self.has_children():
            return self.branch
        rank = 0; child = None # pacify pylint
        for rank, child in enumerate(self.children()):
            if child.branch == self.branch:
                return child.head()
        if rank == 0:
            return child.head() # there was only one child
        raise Recoverable("can't deduce a branch head for %s" % self.mark)
    def dump(self, vcs=None):
        "Dump this commit in import-stream format."
        if vcs is None and self.repo.vcs and self.repo.vcs.importer:
            vcs = self.repo.vcs
        parts = []
        if self.fossil_id:
            parts.append("# Fossil-ID: %s\n" % self.fossil_id)
        parts.append("commit %s\n" % self.branch)
        if self.mark:
            parts.append("mark %s\n" % self.mark)
        if self.authors:
            for author in self.authors:
                parts.append("author %s\n" % author)
        if self.committer:
            parts.append("committer %s\n" % self.committer)
        if self.comment is not None:
            comment = self.comment
            if global_options["fossilize"] and self.fossil_id:
                comment += "\nFossil-ID: %s\n" % self.fossil_id
            parts.append("data %d\n%s" % (len(comment), comment))
        if "nl-after-comment" in self.repo.export_style():
            parts.append("\n")
        parents = self.parents()
        if parents:
            parts.append("from %s\n" % parents[0].mark)
        for ancestor in parents[1:]:
            parts.append("merge %s\n" % ancestor.mark)
        if vcs and vcs.properties:
            for (name, value) in self.properties.iteritems():
                if value in (True, False):
                    if value:
                        parts.append("property %s\n" % name)
                else:
                    parts.append("property %s %d %s\n" % (name, len(str(value)), str(value)))
        parts.extend(op.dump(vcs) + "\n" for op in self.fileops)
        if not "no-nl-after-commit" in self.repo.export_style():
            parts.append("\n")
        return "".join(parts)
    def __str__(self):
        return self.dump()

class Passthrough(object):
    "Represents a passthrough line."
    __slots__ = ("text", "deletehook", "color")
    def __init__(self, line):
        self.text = line
        self.deletehook = None
        self.color = None
    def email_out(self, _modifiers, eventnum):
        "Enable do_mailbox_out() to report these."
        msg = RepoSurgeonEmail()
        msg["Event-Number"] = str(eventnum+1)
        msg.set_payload(self.text)
        return str(msg)
    def email_in(self, msg):
        self.text = msg.get_payload()
    def dump(self, _vcs=True):
        "Dump this passthrough in import-stream format."
        return self.text
    def __str__(self):
        return self.dump()

class Fatal(Exception):
    "Unrecoverable error."
    def __init__(self, msg):
        Exception.__init__(self)
        self.msg = msg

# Generic extractor code begins here

class signature:
    "A file signature - file path, hash value of content and permissions."
    def __init__(self, path):
        self.path = path
        self.hashval = None
        self.perms = None
        if not os.path.isdir(path):
            with open(path, "rb") as fp:
                self.hashval = hashlib.sha1(fp.read()).hexdigest()
            self.perms = os.stat(path).st_mode
            # Map to the restricted set of modes that are allowed in
            # the stream format.
            if self.perms & 0o100700 == 0o100700:
                self.perms = 0o100755
            elif self.perms & 0o100600 == 0o100600:
                self.perms = 0o100644
    def __eq__(self, other):
        #if debug_enable(DEBUG_EXTRACT):
        #    announce("%s == %s -> %s" % (str(self),
        #                                 str(other),
        #                                 self.__dict__ == other.__dict__))
        return self.__dict__ == other.__dict__
    def __ne__(self, other):
        return not signature.__eq__(self, other)
    def __str__(self):
        return "<%s:%s:%s>" % (self.path, "%6o" % self.perms, self.hashval[:4])

def capture(command):
    "Run a specified command, capturing the output."
    if debug_enable(DEBUG_COMMANDS):
        announce("%s: capturing %s" % (rfc3339(time.time()), command))
    try:
        content = subprocess.check_output(command, shell=True).decode()
    except (subprocess.CalledProcessError, OSError) as oe:
        raise Fatal("execution of '%s' failed: %s" % (command, oe))
    if debug_enable(DEBUG_COMMANDS):
        sys.stderr.write(content)
    return content

class PathMap(object):
    """Represent the set of filenames visible in a Subversion
    revision, using copy-on-write to keep the size of the structure in
    line with the size of the Subversion repository metadata."""
    __slots__ = ("shared", "maxid", "snapid", "store")
    def __init__(self, other = None):
        # The instance may be a child of several other PathMaps if |shared|
        # is True. |snapid| is an integer unique among related PathMaps,
        # and |maxid| is a list (for reference sharing) whose only value is
        # the maximum |snapid| of the collection. |store| is a dict mapping
        # single-component names to lists of values indexed by snapids. The
        # values which can be other PathMaps (for directories) or anything
        # except PathMaps and None (for files).
        if not isinstance(other, PathMap):
            self.store = {}
            self.maxid = [0]
            self.snapid = 0
        else:
            self.store = other.store
            self.maxid = other.maxid
            self.snapid = self.maxid[0] = self.maxid[0] + 1
        self.shared = False
    def snapshot(self):
        "Return a copy-on-write snapshot of the set."
        r = PathMap(self)
        if self.snapid < r.snapid - 1:
            # Late snapshot of an "old" PathMap. Restore values which may
            # have changed since. This is uncommon, don't over-optimize.
            for component in self.store: # _elt_items() would skip None
                r._elts_set(component, self._elts_get(component))
        for _, v in r._elts_items():
            if isinstance(v, PathMap):
                v.shared = True
        return r
    def update(self, other):
        "Insert all files of other into the PathMap, overwriting if needed."
        for n, v in other._elts_items():
            self._elts_set(n, v)
            if isinstance(v, PathMap):
                v.shared = True
    def copy_from(self, target_path, source_pathset, source_path):
        "Insert, at target_path, a snapshot of source_path in source_pathset."
        source_obj = source_pathset._find(source_path)
        if source_obj is None:
            return
        if source_obj is source_pathset:
            # Do not share toplevel instances, only inner ones
            source_obj = source_obj.snapshot()
        elif isinstance(source_obj, PathMap):
            source_obj.shared = True
        self._insert(target_path, source_obj)
    def ls_R(self, path):
        elt = self._find(path)
        if isinstance(elt, PathMap):
            return iter(elt)
        return iter(()) # empty iterator
    def __contains__(self, path):
        "Return true if path is present in the set as a file."
        elt = self._find(path)
        return not isinstance(elt, PathMap) and elt is not None
    def __getitem__(self, path):
        "Return the value associated with a specified path."
        elt = self._find(path)
        if elt is None or isinstance(elt, PathMap):
            # This is not quite like indexing, which would throw IndexError
            return None
        return elt
    def __setitem__(self, path, value):
        "Add a filename to the set, with associated value (not None)."
        assert value is not None
        self._insert(path, value)
    def __delitem__(self, path):
        """Remove a filename, or all descendents of a directory name,
        from the set."""
        basename, components = self._split_path(path)
        assert(not self.shared)
        for component in components:
            nxt = self._elts_get(component)
            if not isinstance(nxt, PathMap):
                return
            if nxt.shared:
                nxt = self._elts_set(component, nxt.snapshot())
            self = nxt
        # Set value to None since PathMap doesn't tell None and absence apart
        self._elts_set(basename, None)
    def __nonzero__(self):
        "Return true if any filenames are present in the set."
        return any(v for _, v in self._elts_items())
    def __len__(self):
        "Return the number of files in the set."
        return sum(len(v) if isinstance(v, PathMap) else 1
                for _, v in self._elts_items())
    def iteritems(self):
        for (name, value) in sorted(self._elts_items()):
            if isinstance(value, PathMap):
                for path, v in value.iteritems():
                    yield (os.path.join(name, path), v)
            elif value is not None:
                yield (name, value)
    def __iter__(self):
        return itertools.imap(operator.itemgetter(0), self.iteritems())
    def __str__(self):
        return '<PathMap: {}>'.format(' '.join(self))
    # Return the current value associated with the component in the store
    def _elts_get(self, component):
        snaplist = self.store.get(component) or [None]
        return snaplist[min(self.snapid, len(snaplist) - 1)]
    # Set the current value associated with the component in the store
    def _elts_set(self, component, value):
        snaplist = self.store.setdefault(component, [None])
        needed = min(self.maxid[0], self.snapid + 1) + 1
        if len(snaplist) < needed:
            last = snaplist[-1]
            snaplist.extend(last for _ in xrange(len(snaplist), needed))
        snaplist[self.snapid] = value
        return value
    # Iterate through (component, current values) pairs
    def _elts_items(self):
        snapid = self.snapid
        for component, snaplist in self.store.iteritems():
            val = snaplist[min(snapid, len(snaplist) - 1)]
            if val is not None: yield (component, val)
    # Insert obj at the location given by components.
    def _insert(self, path, obj):
        basename, components = self._split_path(path)
        if not basename:
            return
        assert(not self.shared)
        for component in components:
            nxt = self._elts_get(component)
            if not isinstance(nxt, PathMap):
                nxt = self._elts_set(component, PathMap())
            elif nxt.shared:
                nxt = self._elts_set(component, nxt.snapshot())
            self = nxt
        self._elts_set(basename, obj)
    # Return the object at the location given by components--either
    # the associated value if it's present as a filename, or a PathMap
    # containing the descendents if it's a directory name.  Return
    # None if the location does not exist in the set.
    def _find(self, path):
        basename, components = self._split_path(path)
        if not basename:
            return self
        for component in components:
            self = self._elts_get(component)
            if not isinstance(self, PathMap):
                return None
        return self._elts_get(basename)
    # Return a list of the components in path in reverse order.
    @staticmethod
    def _split_path(path):
        components = filter(None, os.path.normpath(path).split(os.sep))
        return (components.pop() if components else None, components)

class RepoStreamer:
    "Repository factory driver class for all repo analyzers."
    def __init__(self, extractor):
        self.markseq = 0
        self.tagseq = 0
        self.commits = {}
        self.commit_map = {}
        self.filemap = {}
        self.hash_to_mark = {}
        self.baton = None
        self.extractor = extractor
    def __newmark(self):
        self.markseq += 1
        mark = ":" + str(self.markseq)
        return mark
    def extract(self, repo, progress=True):
        if not self.extractor.isclean():
            raise Recoverable("directory %s has unsaved changes." % os.getcwd())
        repo.makedir()
        with Baton(prompt="Extracting", enable=progress) as self.baton:
            self.extractor.analyze(self.baton)
            self.extractor.pre_extract(repo)
            #saved_umask = os.umask(0)
            consume = copy.copy(self.extractor.get_revlist())
            while consume:
                revision = consume.pop(0)
                commit = Commit(repo)
                self.baton.twirl()
                present = self.extractor.checkout(revision, self.filemap)
                parents = self.extractor.get_parents(revision)
                commit.committer = Attribution(self.extractor.get_committer(revision))
                commit.authors = [Attribution(a) \
                                  for a in self.extractor.get_authors(revision)]
                commit.set_parents([self.commit_map[rev] for rev in parents])
                commit.set_branch(self.extractor.get_branch(revision))
                commit.comment = self.extractor.get_comment(revision)
                if debug_enable(DEBUG_EXTRACT):
                    msg = commit.comment
                    if msg == None:
                        msg = ""
                    announce("r%s: comment '%s'" % (revision, msg.strip()))
                self.filemap[revision] = {}
                for rev in parents:
                    self.filemap[revision].update(self.filemap[rev])
                if present:
                    removed = set(self.filemap[revision]) - set(present)
                    for path in present:
                        if os.path.isdir(path):
                            continue
                        if not os.path.exists(path):
                            announce("r%s: expected path %s does not exist!" % \
                                     (revision, path))
                            continue
                        newsig = signature(path)
                        if newsig.hashval in self.hash_to_mark:
                            #if debug_enable(DEBUG_EXTRACT):
                            #    announce("r%s: %s has old hash" \
                            #             % (revision, path))
                            # The file's hash corresponds to an existing
                            # blob; generate modify, copy, or rename as
                            # appropriate.
                            if path not in self.filemap[revision] \
                                   or self.filemap[revision][path]!=newsig:
                                if debug_enable(DEBUG_EXTRACT):
                                    announce("r%s: update for %s" % (revision, path))
                                # Iterating through dict items (with
                                # iteritems() or itemsview() for
                                # instance) while mutating the
                                # underlying dict is not supported by
                                # Python. The following loop thus uses
                                # items(), which returns a new
                                # independent list containing the
                                # (key,value) pairs.
                                for (oldpath, oldsig) in self.filemap[revision].items():
                                    if oldsig == newsig:
                                        if oldpath in removed:
                                            op = FileOp()
                                            op.construct('R', oldpath, path)
                                            commit.fileops.append(op)
                                            del self.filemap[revision][oldpath]
                                        elif oldpath != path:
                                            op = FileOp()
                                            op.construct('C', oldpath, path)
                                            commit.fileops.append(op)
                                        break
                                else:
                                    op = FileOp()
                                    op.construct('M',
                                                 newsig.perms,
                                                 self.hash_to_mark[newsig.hashval],
                                                 path)
                                    commit.fileops.append(op)
                        else:
                            # Content hash doesn't match any existing blobs
                            if debug_enable(DEBUG_EXTRACT):
                                announce("r%s: %s has new hash" \
                                         % (revision, path))
                            blobmark = self.__newmark()
                            self.hash_to_mark[newsig.hashval] = blobmark
                            # Actual content enters the representation
                            blob = Blob(repo)
                            blob.set_mark(blobmark)
                            shutil.copyfile(path, blob.blobfile(create=True))
                            blob.paths.append(path)
                            repo.addEvent(blob)
                            # Its new fileop is added to the commit
                            op = FileOp()
                            op.construct('M', newsig.perms, blobmark, path)
                            commit.fileops.append(op)
                        self.filemap[revision][path] = newsig
                    for tbd in removed:
                        op = FileOp()
                        op.construct('D', tbd)
                        commit.fileops.append(op)
                        del self.filemap[revision][tbd]
                self.extractor.cleanup(revision, True)
                if not parents and commit.branch != "refs/heads/master":
                    reset = Reset(repo)
                    reset.ref = commit.branch
                    repo.addEvent(reset)
                commit.fileops.sort(key=FileOp.sortkey)
                commit.fossil_id = revision
                commit.properties.update(self.extractor.get_properties(revision)) 
                commit.set_mark(self.__newmark())
                self.commit_map[revision] = commit
                if debug_enable(DEBUG_EXTRACT):
                    announce("r%s: gets mark %s (%d ops)" % (revision, commit.mark, len(commit.fileops)))
                repo.addEvent(commit)
            # Now append reset objects
            for (resetname, revision) in sorted(self.extractor.iter_resets(),
                                                key=operator.itemgetter(1)):
                # FIXME: what if revision is unknown ? keep previous behavior for now
                reset = Reset(repo, target=self.commit_map[revision])
                reset.ref = resetname
                repo.addEvent(reset)
            # Last, append tag objects.
            for tag in sorted(self.extractor.get_taglist(),
                              key=operator.attrgetter("tagger.date")):
                # Hashes produced by the GitExtractor are turned into proper
                # committish marks here.
                c = self.commit_map.get(tag.committish)
                if c is None:
                    # FIXME: we should probably error here, keep previous
                    # behavior for now
                    tag.remember(repo, committish=None)
                else:
                    tag.remember(repo, target=c)
                repo.addEvent(tag)
            self.extractor.post_extract(repo)
        return repo

# Stream parsing
#
# The Subversion dumpfile format is documented at
#
# https://svn.apache.org/repos/asf/subversion/trunk/notes/dump-load-format.txt

# Use numeric codes rather than (un-interned) strings
# to reduce working-set size.
SD_NONE = 0
SD_FILE = 1
SD_DIR = 2
SD_ADD = 0
SD_DELETE = 1
SD_CHANGE = 2
SD_REPLACE = 3

class StreamParser:
    "Parse a fast-import stream or Subversion dump to populate a Repository."
    class NodeAction(object):
        __slots__ = ("revision", "path", "kind", "action",
                     "from_rev", "from_path", "content_hash",
                     "from_hash", "blob", "props",
                     "from_set", "blobmark", "generated")
        # If these don't match the constants above, havoc will ensue
        ActionValues = ("add", "delete", "change", "replace")
        PathTypeValues = ("none", "file", "dir", "ILLEGAL-TYPE")
        def __init__(self):
            # These are set during parsing
            self.revision = None
            self.path = None
            self.kind = SD_NONE
            self.action = None
            self.from_rev = None
            self.from_path = None
            self.content_hash = None
            self.from_hash = None
            self.blob = None
            self.props = None
            # These are set during the analysis phase
            self.from_set = None
            self.blobmark = None
            self.generated = False
        def __str__(self):
            # Prefer dict's repr() to OrderedDict's verbose one
            fmt = dict.__repr__ if isinstance(self.props, dict) else repr
            return "<NodeAction: r{rev} {action} {kind} '{path}'" \
                    "{from_rev}{from_set}{generated}{props}>".format(
                    rev = self.revision,
                    action = "ILLEGAL-ACTION" if self.action is None else StreamParser.NodeAction.ActionValues[self.action],
                    kind = StreamParser.NodeAction.PathTypeValues[self.kind or -1],
                    path = self.path,
                    from_rev = " from=%s~%s" % (self.from_rev, self.from_path)
                                    if self.from_rev else "",
                    from_set = " sources=%s" % self.from_set
                                    if self.from_set else "",
                    generated = " generated" if self.generated else "",
                    props = " properties=%s" % fmt(self.props)
                                    if self.props else "")
    class RevisionRecord(object):
        __slots__ = ("nodes", "props")
        def __init__(self, nodes, props):
            self.nodes = nodes
            self.props = props
    # Native Subversion properties that we don't suppress: svn:externals
    # The reason for these suppressions is to avoid a huge volume of
    # junk file properties - cvs2svn in particular generates them like
    # mad.  We want to let through other properties that might carry
    # useful information.
    IgnoreProperties = {
        "svn:executable",  # We special-case this one elsewhere
        "svn:ignore",      # We special-case this one elsewhere
        "svn:special",     # We special-case this one elsewhere
        "svn:mergeinfo",   # We special-case this one elsewhere
        "svn:mime-type",
        "svn:keywords",
        "svn:needs-lock",
        "svn:eol-style",   # Don't want to suppress, but cvs2svn floods these.
        }
    # These are the default patterns globally ignored by Subversion.
    SubversionDefaultIgnores = """\
# A simulation of Subversion default ignores, generated by reposurgeon.
*.o
*.lo
*.la
*.al
.libs
*.so
*.so.[0-9]*
*.a
*.pyc
*.pyo
*.rej
*~
.#*
.*.swp
.DS_store
"""
    cvs2svn_tag_re = re.compile("This commit was manufactured by cvs2svn to create tag.*'([^']*)'")
    cvs2svn_branch_re = re.compile("This commit was manufactured by cvs2svn to create branch.*'([^']*)'")
    SplitSep = '.'
    def __init__(self, repo):
        self.repo = repo
        self.fp = None
        self.import_line = 0
        self.markseq = 0
        self.ccount = 0
        self.linebuffers = []
        self.warnings = []
        # Everything below here is Subversion-specific
        self.branches = {}
        self.branchlink = {}
        self.branchdeletes = set()
        self.branchcopies = set()
        self.generated_deletes = []
        self.revisions = collections.OrderedDict()
        self.copycounts = collections.OrderedDict()
        self.hashmap = {}
        self.permissions = {}
        self.fileop_branchlinks  = set()
        self.directory_branchlinks  = set()
        self.active_gitignores = set()
        self.mergeinfo = {}
    def error(self, msg):
        "Throw fatal error during parsing."
        raise Fatal(msg + " at line " + repr(self.import_line))
    def warn(self, msg):
        "Display a parse warning associated with a line."
        if self.import_line:
            complain(msg + " at line " + repr(self.import_line))
        else:
            complain(msg)
    def gripe(self, msg):
        "Display or queue up an error message."
        if verbose < 2:
            self.warnings.append(msg)
        else:
            complain(msg)
    def __newmark(self):
        self.markseq += 1
        mark = ":" + str(self.markseq)
        return mark
    def readline(self):
        if self.linebuffers:
            line = self.linebuffers.pop()
        else:
            line = self.fp.readline()
        self.ccount += len(line)
        self.import_line += 1
        return line
    def tell(self):
        "Return the current read offset in the source stream."
        try:
            return self.fp.tell()
        except IOError:
            return None
    def pushback(self, line):
        self.ccount -= len(line)
        self.import_line -= 1
        self.linebuffers.append(line)
    # Helpers for import-stream files
    def fi_read_data(self, line=None):
        "Read a fast-import data section."
        if not line:
            line = self.readline()
        if line.startswith("data <<"):
            delim = line[7:]
            data = ""
            start = self.tell()
            while True:
                dataline = self.readline()
                if dataline == delim:
                    break
                elif not dataline:
                    raise Fatal("EOF while reading blob")
                else:
                    data += dataline
        elif line.startswith("data"):
            try:
                count = int(line[5:])
                start = self.tell()
                data = self.fp.read(count)
            except ValueError:
                self.error("bad count in data")
        elif line.startswith("property"):
            line = line[9:]			# Skip this token
            line = line[line.index(" "):]	# Skip the property name
            nextws = line.index(" ")
            count = int(line[:nextws-1])
            start = self.tell()
            data = line[nextws:] + self.fp.read(count)
        else:
            self.error("malformed data header %s" % repr(line))
        line = self.readline()
        if line != '\n':
            self.pushback(line) # Data commands optionally end with LF
        return (data, start)
    def fi_parse_fileop(self, fileop):
        # Read a fast-import fileop
        if fileop.ref[0] == ':':
            pass
        elif fileop.ref == 'inline':
            fileop.inline = self.fi_read_data()[0]
        else:
            self.error("unknown content type in filemodify")
    # Helpers for Subversion dumpfiles
    @staticmethod
    def sd_body(line):
        # Parse the body from a Subversion header line
        return line.split(":")[1].strip()
    def sd_require_header(self, hdr):
        # Consume a required header line
        line = self.readline()
        self.ccount += len(line)
        if not line.startswith(hdr):
            self.error('required %s header missing' % hdr)
        return StreamParser.sd_body(line)
    def sd_require_spacer(self):
        line = self.readline()
        if line.strip():
            self.error('found %s expecting blank line' % repr(line))
    def sd_read_blob(self, length):
        # Read a Subversion file-content blob.
        content = self.fp.read(length)
        if self.fp.read(1) != '\n':
            self.error("EOL not seen where expected, Content-Length incorrect")
        self.import_line += content.count('\n') + 1
        self.ccount += len(content) + 1
        return content
    def sd_read_props(self, target, checklength):
        # Parse a Subversion properties section, return as an OrderedDict.
        props = collections.OrderedDict()
        self.ccount = 0
        while self.ccount < checklength:
            line = self.readline()
            if debug_enable(DEBUG_SVNPARSE):
                announce("readprops, line %d: %s" % \
                         (self.import_line, repr(line)))
            if line.startswith("PROPS-END"):
                # This test should be !=, but I get random off-by-ones from
                # real dumpfiles - I don't know why.
                if self.ccount < checklength:
                    self.error("expected %d property chars, got %d"\
                               % (checklength, self.ccount))
                break
            elif not line.strip():
                continue
            elif line[0] == "K":
                key = self.sd_read_blob(int(line.split()[1]))
                line = self.readline()
                if line[0] != 'V':
                    raise self.error("property value garbled")
                value = self.sd_read_blob(int(line.split()[1]))
                props[key] = value
                if debug_enable(DEBUG_SVNPARSE):
                    announce("readprops: on %s, setting %s = %s"\
                             % (target, key, repr(value)))
        return props
    #
    # The main event
    #
    def fast_import(self, fp, progress=False):
        "Initialize the repo from a fast-import stream or Subversion dump."
        self.repo.makedir()
        self.repo.timings = [("start", time.time())]
        try:
            self.fp = fp
            # Optimization: if we're reading from a plain file,
            # no need to clone all the blobs. 
            if os.path.isfile(self.fp.name):
                self.repo.seekstream = fp
            with Baton("reposurgeon: from %s" % os.path.relpath(fp.name), enable=progress) as baton:
                self.import_line = self.repo.fossil_count = 0
                self.linebuffers = []
                # First, determine the input type
                line = self.readline()
                if line.startswith("SVN-fs-dump-format-version: "):
                    if StreamParser.sd_body(line) not in ("1", "2"):
                        raise Fatal("unsupported dump format version %s" \
                                    % StreamParser.sd_body(line))
                    # Beginning of Subversion dump parsing
                    while True:
                        line = self.readline()
                        if not line:
                            break
                        elif not line.strip():
                            continue
                        elif line.startswith("UUID:"):
                            self.repo.uuid = StreamParser.sd_body(line)
                        elif line.startswith("Revision-number: "):
                            # Begin Revision processing
                            baton.twirl()
                            if debug_enable(DEBUG_SVNPARSE):
                                announce("revision parsing, line %d: begins" % \
                                     (self.import_line))
                            revision = StreamParser.sd_body(line)
                            plen = int(self.sd_require_header("Prop-content-length"))
                            self.sd_require_header("Content-length")
                            self.sd_require_spacer()
                            props = self.sd_read_props("commit", plen)
                            # Parsing of the revision header is done
                            node = None # pacify pylint
                            nodes = []
                            in_header = False
                            plen = tlen = -1
                            # Node list parsing begins
                            while True:
                                line = self.readline()
                                if debug_enable(DEBUG_SVNPARSE):
                                    announce("node list parsing, line %d: %s" % \
                                             (self.import_line, repr(line)))
                                if not line:
                                    break
                                elif not line.strip():
                                    if not in_header:
                                        continue
                                    else:
                                        if plen > -1:
                                            node.props = self.sd_read_props(node.path, plen)
                                        if tlen > -1:
                                            start = self.tell()
                                            text = self.sd_read_blob(tlen)
                                            node.blob = Blob(self.repo)
                                            # Ugh - cope with strange
                                            # undocumented Subversion format
                                            # for storing links.  Apparently the
                                            # dumper puts 'link ' in front of
                                            # the path and the loader (or at
                                            # least git-svn) removes it.
                                            if node.props and "svn:special" in node.props:
                                                if text.startswith("link "):
                                                    node.blob.set_content(
                                                        text[5:], start+5)
                                                else:
                                                    # Don't know if this will
                                                    # ever happen.  Best to fail
                                                    # loudly...
                                                    self.error("unexpected link prefix in %s" % repr(text))
                                            else:
                                                node.blob.set_content(text, start)
                                        node.revision = revision
                                        nodes.append(node)
                                        in_header = False
                                elif line.startswith("Revision-number: "):
                                    self.pushback(line)
                                    break
                                # Node processing begins
                                elif line.startswith("Node-path: "):
                                    node = StreamParser.NodeAction()
                                    node.path = StreamParser.sd_body(line)
                                    plen = tlen = -1
                                    in_header = True
                                elif line.startswith("Node-kind: "):
                                    node.kind = StreamParser.sd_body(line)
                                    node.kind = StreamParser.NodeAction.PathTypeValues.index(node.kind)
                                    if node.kind is None:
                                        self.error("unknown kind %s"%node.kind)
                                elif line.startswith("Node-action: "):
                                    node.action = StreamParser.sd_body(line)
                                    node.action = StreamParser.NodeAction.ActionValues.index(node.action)
                                    
                                    if node.action is None:
                                        self.error("unknown action %s" \
                                                   % node.action)
                                elif line.startswith("Node-copyfrom-rev: "):
                                    node.from_rev = StreamParser.sd_body(line)
                                elif line.startswith("Node-copyfrom-path: "):
                                    node.from_path = StreamParser.sd_body(line)
                                elif line.startswith("Text-copy-source-md5: "):
                                    node.from_hash = StreamParser.sd_body(line)
                                elif line.startswith("Text-content-md5: "):
                                    node.content_hash = StreamParser.sd_body(line)
                                elif line.startswith("Text-content-sha1: "):
                                    continue
                                elif line.startswith("Text-content-length: "):
                                    tlen = int(StreamParser.sd_body(line))
                                elif line.startswith("Prop-content-length: "):
                                    plen = int(StreamParser.sd_body(line))
                                elif line.startswith("Content-length: "):
                                    continue
                                else:
                                    if debug_enable(DEBUG_SVNPARSE):
                                        announce("node list parsing, line %d: uninterpreted line %s" % \
                                             (self.import_line, repr(line)))
                                    continue
                                # Node processing ends
                            # Node list parsing ends
                            self.revisions[revision] = StreamParser.RevisionRecord(nodes, props)
                            self.repo.fossil_count += 1
                            if debug_enable(DEBUG_SVNPARSE):
                                announce("revision parsing, line %d: ends" % \
                                         (self.import_line))
                            # End Revision processing
                    # End of Subversion dump parsing
                    self.repo.timings.append(("parsing", time.time()))
                    self.svn_process(baton)
                    elapsed = time.time() - baton.time
                    baton.twirl("%d revisions (%d/s)" %
                                 (self.repo.fossil_count,
                                  int(self.repo.fossil_count/elapsed)))
                else:
                    self.pushback(line)
                    # Beginning of fast-import stream parsing
                    while True:
                        line = self.readline()
                        if not line:
                            break
                        elif not line.strip():
                            continue
                        elif line.startswith("blob"):
                            blob = Blob(self.repo)
                            line = self.readline()
                            if line.startswith("mark"):
                                blob.set_mark(line[5:].strip())
                                (blobcontent, blobstart) = self.fi_read_data()
                                # Parse CVS and Subversion $-headers
                                # There'd better not be more than one of these.
                                for m in re.finditer(r"\$Id *:[^$]+\$",
                                                     blobcontent):
                                    fields = m.group(0).split()
                                    if len(fields) < 2:
                                        self.gripe("malformed $-cookie '%s'" % m.group(0))
                                    else:
                                        # Save file basename and CVS version
                                        if fields[1].endswith(",v"):
                                            # CVS revision
                                            blob.cookie = (fields[1][:-2], fields[2])
                                        else:
                                            # Subversion revision
                                            blob.cookie = fields[1]
                                for m in re.finditer(r"\$Revision *: *([^$]*)\$",
                                                     blobcontent):
                                    rev = m.group(0).strip()
                                    if '.' not in rev:
                                        # Subversion revision
                                        blob.cookie = rev
                                blob.set_content(blobcontent, blobstart)
                            else:
                                self.error("missing mark after blob")
                            self.repo.addEvent(blob)
                            baton.twirl()
                        elif line.startswith("data"):
                            self.error("unexpected data object")
                        elif line.startswith("commit"):
                            baton.twirl()
                            commitbegin = self.import_line
                            commit = Commit(self.repo)
                            commit.set_branch(line.split()[1])
                            while True:
                                line = self.readline()
                                if not line:
                                    break
                                elif line.startswith("mark"):
                                    commit.set_mark(line[5:].strip())
                                elif line.startswith("author"):
                                    try:
                                        commit.authors.append(Attribution(line[7:]))
                                    except ValueError:
                                        self.error("malformed author line")
                                elif line.startswith("committer"):
                                    try:
                                        commit.committer = Attribution(line[10:])
                                    except ValueError:
                                        self.error("malformed committer line")
                                elif line.startswith("property"):
                                    fields = line.split(" ")
                                    if len(fields) < 3:
                                        self.error("malformed property line")
                                    elif len(fields) == 3:
                                        commit.properties[fields[1]] = True
                                    else:
                                        name = fields[1]
                                        length = int(fields[2])
                                        value = " ".join(fields[3:])
                                        if len(value) < length:
                                            value += fp.read(length-len(value))
                                            if fp.read(1) != '\n':
                                                self.error("trailing junk on property value")
                                        elif len(value) == length + 1:
                                            value = value[:-1] # Trim '\n'
                                        else:
                                            value += self.fp.read(length - len(value))
                                            assert self.fp.read(1) == '\n'
                                        commit.properties[name] = value
                                        # Generated by cvsps
                                        if name == "cvs-revisions":
                                            for line in value.split('\n'):
                                                if line:
                                                    self.repo.fossil_map["CVS:"+line] = commit
                                elif line.startswith("data"):
                                    commit.comment = self.fi_read_data(line)[0]
                                elif line.startswith("from") or line.startswith("merge"):
                                    commit.add_parent(line.split()[1])
                                # Handling of file ops begins.
                                elif line[0] in ("C", "D", "R"):
                                    commit.fileops.append(FileOp(self.repo.vcs).parse(line))
                                elif line == "deleteall\n":
                                    commit.fileops.append(FileOp(self.repo.vcs).parse("deleteall"))
                                elif line[0] == "M":
                                    fileop = FileOp(self.repo.vcs).parse(line)
                                    if fileop.ref != 'inline':
                                        try:
                                            self.repo.objfind(fileop.ref).paths.append(fileop.path)
                                        except AttributeError:
                                            self.error("ref could not be resolved")
                                    commit.fileops.append(fileop)
                                    if fileop.mode == "160000":
                                        # This is a submodule link.  The ref
                                        # field is a SHA1 hash and the path
                                        # is an external reference name.
                                        # Don't try to collect data, just pass
                                        # it through.
                                        self.warn("submodule link")
                                    else:
                                        # 100644, 100755, 120000.
                                        self.fi_parse_fileop(fileop)
                                elif line[0] == "N":
                                    fileop = FileOp(self.repo.vcs).parse(line)
                                    commit.fileops.append(fileop)
                                    self.fi_parse_fileop(fileop)
                                # Handling of file ops ends.
                                elif line.isspace():
                                    # This handles slightly broken
                                    # exporters like the bzr-fast-export
                                    # one that may tack an extra LF onto
                                    # the end of data objects.  With it,
                                    # we don't drop out of the
                                    # commit-processing loop until we see
                                    # a *nonblank* line that doesn't match
                                    # a commit subpart.
                                    continue
                                else:
                                    # Dodgy bzr autodetection hook...
                                    if not self.repo.vcs:
                                        if "branch-nick" in commit.properties:
                                            for vcs in vcstypes:
                                                if vcs.name == "bzr":
                                                    self.repo.vcs = vcs
                                                    break
                                    self.pushback(line)
                                    break
                            if not (commit.mark and commit.committer):
                                self.import_line = commitbegin
                                self.error("missing required fields in commit")
                            if commit.mark is None:
                                self.warn("unmarked commit")
                            self.repo.addEvent(commit)
                            baton.twirl()
                        elif line.startswith("reset"):
                            reset = Reset(self.repo)
                            reset.ref = line[6:].strip()
                            line = self.readline()
                            if line.startswith("from"):
                                reset.remember(self.repo, committish=line[5:].strip())
                            else:
                                self.pushback(line)
                            self.repo.addEvent(reset)
                            baton.twirl()
                        elif line.startswith("tag"):
                            tagger = None
                            tagname = line[4:].strip()
                            line = self.readline()
                            if line.startswith("from"):
                                referent = line[5:].strip()
                            else:
                                self.error("missing from after tag")
                            line = self.readline()
                            if line.startswith("tagger"):
                                try:
                                    tagger = Attribution(line[7:])
                                except ValueError:
                                    self.error("malformed tagger line")
                            else:
                                self.warn("missing tagger after from in tag")
                                self.pushback(line)
                            self.repo.addEvent(Tag(repo = self.repo,
                                                   name = tagname,
                                                   committish = referent,
                                                   tagger = tagger,
                                                   comment = self.fi_read_data()[0]))
                            baton.twirl()
                        else:
                            # Simply pass through any line we don't understand.
                            self.repo.addEvent(Passthrough(line))
                    # End of fast-import parsing
                    self.repo.timings.append(("parsing", time.time()))
                self.import_line = 0
            if self.warnings:
                for warning in self.warnings:
                    complain(warning)
        except KeyboardInterrupt:
            nuke(self.repo.subdir(), "reposurgeon: import interrupted, removing %s" % self.repo.subdir())
            raise KeyboardInterrupt
    #
    # The rendezvous between parsing and object building for import
    # streams is pretty trivial and best done inline in the parser
    # because reposurgeon's internal structures are designed to match
    # those entities. For Subversion dumpfiles, on the other hand,
    # there's a fair bit of impedance-matching required.  That happens
    # in the following functions.
    #
    @staticmethod
    def node_permissions(node):
        "Fileop permissions from node properties"
        if node.props:
            if "svn:executable" in node.props:
                return 0o100755
            elif "svn:special" in node.props:
                # Map to git symlink, which behaves the same way.
                # Blob contents is the path the link should resolve to. 
                return 0o120000
        return 0o100644
    def branchpath(self, path):
        "Strip the branch prefix from a path."
        if not self.branches or path.count(os.sep) == 0:
            return path
        for branch in self.branches:
            if path.startswith(branch):
                return path[len(branch):]
        raise Fatal("couldn't assign %s to a branch in %s" \
                    % (path, self.branches.keys()))
    def svn_process(self, baton):
        "Subversion actions to import-stream commits."
        # Find all copy sources and compute the set of branches
        if debug_enable(DEBUG_EXTRACT):
            announce("Pass 1")
        copynodes = []
        for revision in self.revisions:
            record = self.revisions[revision]
            for node in record.nodes:
                if node.from_path is not None:
                    copynodes.append(node)
                    if debug_enable(DEBUG_EXTRACT):
                        announce("copynode at %s" % node)
                if node.action == SD_ADD and node.kind == SD_DIR and not node.path+os.sep in self.branches and not global_options['svn_nobranch']:
                    for trial in global_options['svn_branchify']:
                        if '*' not in trial and trial == node.path:
                            self.branches[node.path+os.sep] = None
                        elif trial.endswith(os.sep + '*') \
                                 and os.path.dirname(trial) == os.path.dirname(node.path):
                            self.branches[node.path+os.sep] = None
                        elif trial == '*' and not node.path + os.sep + '*' in global_options['svn_branchify'] and node.path.count(os.sep) < 1:
                            self.branches[node.path+os.sep] = None
                    if node.path+os.sep in self.branches and debug_enable(DEBUG_TOPOLOGY):
                        announce("%s recognized as a branch" % node.path+os.sep)
            # Per-commit spinner disabled because this pass is fast
            #baton.twirl()
        copynodes.sort(key=operator.attrgetter("from_rev"))
        self.repo.timings.append(["copynodes", time.time()])
        baton.twirl()
        # Build filemaps.
        if debug_enable(DEBUG_EXTRACT):
            announce("Pass 2")
        filemaps = {}
        filemap = PathMap()
        split_commits = []
        for (revision, record) in self.revisions.items():
            for node in record.nodes:
                # Mutate the filemap according to copies
                if node.from_rev:
                    assert int(node.from_rev) < int(revision)
                    filemap.copy_from(node.path, filemaps[node.from_rev],
                                      node.from_path)
                    if debug_enable(DEBUG_FILEMAP):
                        announce("r%s~%s copied to %s" \
                                 % (node.from_rev, node.from_path, node.path))
                # Mutate the filemap according to adds/deletes/changes
                if node.action == SD_ADD and node.kind == SD_FILE:
                    filemap[node.path] = node
                    if debug_enable(DEBUG_FILEMAP):
                        announce("r%s~%s added" % (node.revision, node.path))
                elif node.action == SD_DELETE:
                    if node.kind == SD_NONE:
                        node.kind = SD_FILE if node.path in filemap else SD_DIR
                    # Snapshot the deleted paths before removing them.
                    node.from_set = PathMap()
                    node.from_set.copy_from(node.path, filemap, node.path)
                    del filemap[node.path]
                    if debug_enable(DEBUG_FILEMAP):
                        announce("r%s~%s deleted" \
                                 % (node.revision, node.path))
                elif node.action in (SD_CHANGE, SD_REPLACE) and node.kind == SD_FILE:
                    filemap[node.path] = node
                    if debug_enable(DEBUG_FILEMAP):
                        announce("r%s~%s changed" % (node.revision, node.path))
            filemaps[revision] = filemap.snapshot()
            baton.twirl()
        self.repo.timings.append(["filemaps", time.time()]) 
        baton.twirl()
        # Blows up huge on large repos...
        #if debug_enable(DEBUG_FILEMAP):
        #    announce("filemaps %s" % filemaps)
        # Build from sets in each directory copy record.
        if debug_enable(DEBUG_EXTRACT):
            announce("Pass 3")
        for copynode in copynodes:
            if debug_enable(DEBUG_FILEMAP):
                announce("r%s copynode filemap is %s" \
                         % (copynode.from_rev, filemaps[copynode.from_rev]))
            copynode.from_set = PathMap()
            copynode.from_set.copy_from(copynode.from_path,
                                        filemaps[copynode.from_rev],
                                        copynode.from_path)
            # Sanity check: if the directory node has no from set, but
            # there are files underneath it, this means the directory
            # structure implied by the filemaps is not consistent with
            # what's in the parsed Subversion nodes.  This should never
            # happen.
            if not copynode.from_set and \
                    any(filemaps[copynode.revision].ls_R(node.path)):
                self.gripe("inconsistently empty from set for %s" % copynode)
            baton.twirl()
        self.repo.timings.append(["copysets", time.time()]) 
        baton.twirl()
        # Build commits
        # This code can eat your processor, so we make it give up
        # its timeslice at reasonable intervals. Needed because
        # it doesn't hit the disk.
        if debug_enable(DEBUG_EXTRACT):
            announce("Pass 4")
        previous = None
        for (revision, record) in self.revisions.items():
            if debug_enable(DEBUG_EXTRACT):
                announce("Revision %s:" % revision)
            for node in record.nodes:
                # if node.props is None, no property section.
                # if node.blob is None, no text section.
                try:
                    assert node.action in (SD_CHANGE, SD_ADD, SD_DELETE, SD_REPLACE)
                    assert node.blob is not None or \
                           node.props is not None or \
                           node.from_rev or \
                           node.action in (SD_ADD, SD_DELETE)
                    assert (node.from_rev is None) == (node.from_path is None)
                    assert node.kind in (SD_FILE, SD_DIR)
                    assert node.kind != SD_NONE or node.action == SD_DELETE
                    assert node.action in (SD_ADD, SD_REPLACE) or not node.from_rev
                except AssertionError:
                    raise Fatal("forbidden operation in dump stream at r%s: %s" \
                                % (revision, node))
            commit = Commit(self.repo)
            try:
                ad = record.props.pop("svn:date")
            except KeyError as key:
                self.error("missing required %s" % key)
            if "svn:author" in record.props:
                au = record.props.pop("svn:author")
            else:
                au = "no-author"
            if "svn:log" in record.props:
                commit.comment = record.props.pop("svn:log")
                if not commit.comment.endswith("\n"):
                    commit.comment += "\n"
            if global_options["svn_use_uuid"]:
                attribution = "%s <%s@%s> %s" % (au, au, self.repo.uuid, ad)
            else:
                attribution = "%s <%s> %s" % (au, au, ad)
            commit.committer = Attribution(attribution)
            commit.properties.update(record.props)
            # Zero revision is never interesting - no operations, no
            # comment, no author, it's just a start marker for a
            # non-incremental dump.
            if revision == "0": 
                continue
            expanded_nodes = []
            has_properties = set()
            for (n, node) in enumerate(record.nodes):
                if debug_enable(DEBUG_EXTRACT):
                    announce("r%s:%d: %s" % (revision, n+1, node))
                elif node.kind == SD_DIR \
                         and node.action != SD_CHANGE \
                         and debug_enable(DEBUG_TOPOLOGY):
                    announce(str(node))
                # Handle per-path properties.
                if node.props is not None:
                    if "cvs2svn:cvs-rev" in node.props:
                        cvskey = "CVS:%s:%s" % (node.path,
                                                node.props["cvs2svn:cvs-rev"])
                        self.repo.fossil_map[cvskey] = commit
                        del node.props["cvs2svn:cvs-rev"]
                    if not global_options["svn_ignore_properties"]:
                        prop_items = ((prop, val) \
                                        for (prop,val) in node.props.iteritems() \
                                        if prop not in StreamParser.IgnoreProperties)
                        try:
                            first = next(prop_items)
                        except StopIteration:
                            if node.path in has_properties:
                                self.gripe("r%s~%s: properties cleared." \
                                             % (node.revision, node.path))
                                has_properties.discard(node.path)
                        else:
                            self.gripe("r%s~%s properties set:" \
                                                   % (node.revision, node.path))
                            for prop, val in itertools.chain((first,), prop_items):
                                self.gripe("\t%s = '%s'" % (prop, val))
                            has_properties.add(node.path)
                if node.kind == SD_FILE:
                    expanded_nodes.append(node)
                elif node.kind == SD_DIR:
                    # os.sep is appended to avoid collisions with path
                    # prefixes.
                    node.path += os.sep                   
                    if node.from_path:
                        node.from_path += os.sep
                    if node.action == SD_ADD:
                        if node.path == "trunk" + os.sep and not node.props:
                            node.props = {"svn:ignore":
                                          StreamParser.SubversionDefaultIgnores}
                    elif node.action in (SD_DELETE, SD_REPLACE):
                        if node.path in self.branches:
                            self.branchdeletes.add(node.path)
                            expanded_nodes.append(node)
                        else:
                            # A delete or replace with no from set
                            # can occur if the directory is empty.
                            # We can just ignore this case.
                            if node.from_set is not None:
                                for child in node.from_set:
                                    if debug_enable(DEBUG_EXTRACT):
                                        announce("r%s: deleting %s" \
                                                 % (revision, child))
                                    newnode = StreamParser.NodeAction()
                                    newnode.path = child
                                    newnode.revision = revision
                                    newnode.action = SD_DELETE
                                    newnode.kind = SD_FILE
                                    newnode.generated = True
                                    expanded_nodes.append(newnode)
                                for ignorepath in (gi for gi in self.active_gitignores if gi.startswith(node.path)):
                                    newnode = StreamParser.NodeAction()
                                    newnode.path = ignorepath
                                    newnode.revision = revision
                                    newnode.action = SD_DELETE
                                    newnode.kind = SD_FILE
                                    newnode.generated = True
                                    expanded_nodes.append(newnode)
                        # Property settings can be present on either
                    # SD_ADD or SD_CHANGE actions.
                    if node.props is not None:
                        if debug_enable(DEBUG_EXTRACT):
                            announce("r%s: setting properties %s on %s" \
                                     % (revision, node.props, node.path))
                        # svn:mergeinfo gets handled here
                        if 'svn:mergeinfo' in node.props:
                            val = node.props['svn:mergeinfo']
                            # Ignore an invalid empty value set (not
                            # no effect) by some tools.
                            if val:
                                # The general case is multiline; each line
                                # may describe multiple spans merging
                                # to this revision.  For each span, we
                                # link from the last revision.
                                for line in val.split('\n'):
                                    if line and ':' in line:
                                        (_, ranges) = line.split(":")
                                        for span in ranges.split(","):
                                            if '-' in span:
                                                (_, end) = span.split("-")
                                            else:
                                                end = span
                                            # Because mergeinfo properties will
                                            # persist like other properties,
                                            # we want to record the earliest
                                            # instance of a merge to a given
                                            # path from a given source revision
                                            # then ignore later ones.
                                            if (node.path, end) not in self.mergeinfo:
                                                self.mergeinfo[(node.path, end)] = revision
                                            if debug_enable(DEBUG_EXTRACT):
                                                announce("r%s: mergeinfo link from %s\n" \
                                                        % (revision, end))
                        # svn:ignore gets handled here,
                        if node.path == os.sep:
                            gitignore_path = ".gitignore"
                        else:
                            gitignore_path = os.path.join(node.path,
                                                          ".gitignore")
                        # There are no other directory properties that can
                        # turn into fileops.
                        if "svn:ignore" in node.props:
                            blob = Blob(self.repo)
                            blob.set_content(node.props["svn:ignore"])
                            newnode = StreamParser.NodeAction()
                            newnode.path = gitignore_path
                            newnode.revision = revision
                            newnode.action = SD_ADD
                            newnode.kind = SD_FILE
                            newnode.blob = blob
                            if debug_enable(DEBUG_IGNORES):
                                announce("r%s: queuing up %s generation with:\n%s." % (revision, newnode.path, node.props["svn:ignore"]))
                            # Must append rather than simply performing.
                            # Otherwise when the property is unset we
                            # won't have the right thing happen.
                            newnode.generated = True
                            expanded_nodes.append(newnode)
                            self.active_gitignores.add(gitignore_path)
                        elif gitignore_path in self.active_gitignores:
                            newnode = StreamParser.NodeAction()
                            newnode.path = gitignore_path
                            newnode.revision = revision
                            newnode.action = SD_DELETE
                            newnode.kind = SD_FILE
                            if debug_enable(DEBUG_IGNORES):
                                announce("r%s: queuing up %s deletion." % (revision, newnode.path))
                            newnode.generated = True
                            expanded_nodes.append(newnode)
                            self.active_gitignores.remove(gitignore_path)
                    # Handle directory copies.
                    # If this is a copy between branches, no fileop should be
                    # issued until there is an actual file modification on the
                    # new branch. Instead, remember that the branch root
                    # inherits the tree of the source branch and should not
                    # start with a deleteall.
                    # Exception: If the target branch has been deleted, perform
                    # a normal copy and interpret this as an ad-hoc branch merge.
                    if node.from_path:
                        branchcopy = node.from_path in self.branches \
                                         and node.path in self.branches \
                                         and node.path not in self.branchdeletes
                        if debug_enable(DEBUG_TOPOLOGY):
                            announce("r%s: directory copy to %s from r%s~%s (branchcopy %s)" \
                                     % (revision,
                                        node.path,
                                        node.from_rev,
                                        node.from_path,
                                        branchcopy))
                        if not branchcopy:
                            self.branchdeletes.discard(node.path)
                        else:
                            self.branchcopies.add(node.path)
                            continue
                        for source in node.from_set:
                            lookback = filemaps[node.from_rev][source]
                            if lookback is None:
                                raise Fatal("r%s: can't find ancestor %s" \
                                         % (revision, source))
                            subnode = StreamParser.NodeAction()
                            subnode.path = os.path.join(node.path,
                                                        source[len(node.from_path+os.sep)-1:])
                            subnode.revision = revision
                            subnode.from_path = lookback.path
                            subnode.from_rev = lookback.revision
                            subnode.from_hash = lookback.content_hash
                            subnode.action = SD_ADD
                            subnode.kind = SD_FILE
                            if debug_enable(DEBUG_TOPOLOGY):
                                announce("r%s: generated copy r%s~%s -> %s" \
                                         % (revision,
                                            subnode.from_rev,
                                            subnode.from_path,
                                            subnode.path))
                            subnode.generated = True
                            expanded_nodes.append(subnode)
            # Lift .cvsignore files, which we can assume are fossils
            # from a bygone era and happen to have syntax upward-compatible
            # with that of .gitignore
            for node in expanded_nodes:
                if node.path.endswith(".cvsignore"):
                    node.path = node.path[:-len(".cvsignore")] + ".gitignore"
            # Ugh.  Because cvs2svn is brain-dead and issues D/M pairs
            # for identical paths in generated commits, we have to remove those
            # D ops here.  Otherwise later on when we're generating ops, if
            # the M node happens to be missing its hash it will be seen as
            # unmodified and only the D will be issued.
            seen = set()
            for node in reversed(expanded_nodes):
                if node.action == SD_DELETE and node.path in seen:
                    node.action = None
                seen.add(node.path)
            # Create actions corresponding to both
            # parsed and generated nodes.
            actions = []
            ancestor_nodes = {}
            for node in expanded_nodes:
                if node.action is None: continue
                if node.kind == SD_FILE:
                    if node.action == SD_DELETE:
                        assert node.blob is None
                        fileop = FileOp()
                        fileop.construct("D", node.path)
                        actions.append((node, fileop))
                        ancestor_nodes[node.path] = None
                    elif node.action in (SD_ADD, SD_CHANGE, SD_REPLACE):
                        # Try to figure out who the ancestor of
                        # this node is.
                        if node.from_hash:
                            ancestor = self.hashmap[node.from_hash]
                            node.blobmark = ancestor.blobmark
                            if debug_enable(DEBUG_TOPOLOGY):
                                announce("r%s~%s -> %s (via hashmap)" % \
                                         (node.revision, node.path, ancestor))
                            # FIXME: This check generates spurious warnings
                            # on the references.svn test load - er, or are
                            # they real?  Investigate...
                            #if ancestor != filemaps[node.from_rev][node.from_path]:
                            #    announce("r%s~%s: hash map ancestry doesn't match filemap ancestry." \
                            #             % (node.revision, node.path))
                        elif node.from_path:
                            # A copy node is somehow missing its hash.
                            ancestor = filemaps[node.from_rev][node.from_path]
                            if debug_enable(DEBUG_TOPOLOGY):
                                if ancestor:
                                    announce("r%s~%s -> %s (via filemap)" % \
                                             (node.revision, node.path, ancestor))
                                else:
                                    announce("r%s~%s has no ancestor (via filemap)" % \
                                             (node.revision, node.path))
                            if not ancestor and not node.path.endswith(".gitignore"):
                                self.gripe("r%s~%s: missing filemap node." \
                                          % (node.revision, node.path))
                        elif node.action != SD_ADD:
                            # Ordinary inheritance, no node copy.  For
                            # robustness, we don't assume revisions are
                            # consecutive numbers.
                            try:
                                ancestor = ancestor_nodes[node.path]
                            except KeyError:
                                ancestor = filemaps[previous][node.path]
                        else:
                            ancestor = None
                        # Time for fileop generation
                        if node.blob is not None:
                            if node.content_hash in self.hashmap:
                                # Blob matches an existing one -
                                # node was created by a
                                # non-Subversion copy followed by
                                # add.  Get the ancestry right,
                                # otherwise parent pointers won't
                                # be computed properly.
                                ancestor = self.hashmap[node.content_hash]
                                node.from_path = ancestor.from_path
                                node.from_rev = ancestor.from_rev
                                node.blobmark = ancestor.blobmark
                            else:
                                # An entirely new blob
                                node.blobmark = node.blob.set_mark(self.__newmark())
                                self.repo.addEvent(node.blob)
                                # Blobs generated by reposurgeon
                                # (e.g .gitignore content) have no
                                # content hash.  Don't record
                                # them, otherwise they'll all
                                # collide :-)
                                if node.content_hash:
                                    self.hashmap[node.content_hash] = node
                        elif ancestor:
                            node.blobmark = ancestor.blobmark
                        else:
                            # No ancestor, no blob. Has to be a
                            # pure property change.  There's no
                            # way to figure out what mark to use
                            # in a fileop.
                            if not node.path.endswith(".gitignore"):
                                self.gripe("r%s~%s: permission information may be lost." \
                                           % (node.revision, node.path))
                            continue
                        ancestor_nodes[node.path] = node
                        assert node.blobmark
                        # Time for fileop generation
                        if ancestor:
                            perms = oldperms = self.permissions.get(ancestor.path,
                                                                    0o100644)
                        else:
                            perms = oldperms = 0o100644
                        if node.props is not None:
                            perms = self.node_permissions(node)
                        # This ugly nasty guard is critically important.
                        # We need to generate a modify if:
                        # 1. There is new content.
                        # 2. This node was generated as an
                        # expansion of a directory copy.
                        # 3. The node was produced by an explicit
                        # Subversion file copy (not a directory copy)
                        # in which case it has an MD5 hash that points
                        # back to a source.
                        # 4. The permissions for this path have changed;
                        # we need to generate a modify with an old mark
                        # but new permissions.
                        new_content = (node.blob is not None)
                        generated_file_copy = node.generated
                        subversion_file_copy = (node.from_hash is not None)
                        permissions_changed = (perms != oldperms)
                        if (new_content or
                            generated_file_copy or
                            subversion_file_copy or
                            permissions_changed):
                            assert perms
                            fileop = FileOp()
                            fileop.construct("M",
                                             perms,
                                             node.blobmark,
                                             node.path)
                            actions.append((node, fileop))
                            self.repo.objfind(fileop.ref).paths.append(node.path)
                        elif debug_enable(DEBUG_EXTRACT):
                            announce("r%s~%s: unmodified" % (node.revision, node.path))
                        self.permissions[node.path] = perms
                # These are directory actions.
                elif node.action in (SD_DELETE, SD_REPLACE):
                    if debug_enable(DEBUG_EXTRACT):
                        announce("r%s: deleteall %s" % (revision,node.path))
                    fileop = FileOp()
                    fileop.construct("deleteall", node.path[:-1])
                    actions.append((node, fileop))
            # Time to generate commits from actions and fileops.
            if debug_enable(DEBUG_EXTRACT):
                announce("r%s: %d actions" % (revision, len(actions)))
            # First, break the file operations into branch cliques
            cliques = collections.defaultdict(list)
            lastbranch = None
            for (node, fileop) in actions:
                # Try last seen branch first
                if lastbranch and node.path.startswith(lastbranch):
                    cliques[lastbranch].append(fileop)
                    continue
                for branch in self.branches:
                    if node.path.startswith(branch):
                        cliques[branch].append(fileop)
                        lastbranch = branch
                        break
                else:
                    cliques[""].append(fileop)
            # Make two operation lists from the cliques, sorting cliques
            # containing only branch deletes from other cliques.
            deleteall_ops = []
            other_ops = []
            for (branch, ops) in cliques.iteritems():
                if len(ops) == 1 and ops[0].op == "deleteall":
                    deleteall_ops.append((branch, ops))
                else:
                    other_ops.append((branch, ops))
            oplist = itertools.chain(other_ops, deleteall_ops)
            # Create all commits corresponding to the revision
            newcommits = []
            commit.fossil_id = revision
            # The revision is truly mixed if there is more than one clique
            # not consisting entirely of deleteall operations.
            if len(other_ops) > 1:
                split_commits.append(revision)
            else:
                # In the ordinary case, we can assign all non-deleteall fileops
                # to the base commit.
                self.repo.fossil_map["SVN:%s" % commit.fossil_id] = commit
                try:
                    commit.common, commit.fileops = next(oplist)
                    commit._pathset = None
                except StopIteration:
                    commit.common = os.path.commonprefix([node.path for node in record.nodes])
                commit.set_mark(self.__newmark())
                if debug_enable(DEBUG_EXTRACT):
                    announce("r%s gets mark %s" % (revision, commit.mark))
                newcommits.append(commit)
            # If the commit is mixed, or there are deletealls left over,
            # handle that.
            oplist = sorted(oplist, key=operator.itemgetter(0))
            for (i, (branch, fileops)) in enumerate(oplist):
                split = commit.clone()
                split.common = branch
                # Sequence numbers for split commits are 1-origin
                split.fossil_id += StreamParser.SplitSep + str(i + 1)
                self.repo.fossil_map["SVN:%s" % split.fossil_id] = split
                split.comment += "\n[[Split portion of a mixed commit.]]\n"
                split.set_mark(self.__newmark())
                split.fileops = fileops
                split._pathset = None
                newcommits.append(split)
            # Deduce links between branches on the basis of copies. This
            # is tricky because a revision can be the target of multiple
            # copies.  Humans don't abuse this because tracking multiple
            # copies is too hard to do in a slow organic brain, but tools
            # like cvs2svn can generate large sets of them. cvs2svn seems
            # to try to copy each file and directory from the commit
            # corresponding to the CVS revision where the file was last
            # changed before the copy, which may be substantially earlier
            # than the CVS revision corresponding to the
            # copy). Fortunately, we can resolve such sets by the simple
            # expedient of picking the *latest* revision in them!
            for newcommit in newcommits:
                newcommit.fileops.sort(key=FileOp.sortkey)
                if commit.mark not in self.branchlink:
                    copies = [node for node in record.nodes \
                              if node.from_rev is not None \
                              and node.path.startswith(newcommit.common)]
                    if copies and debug_enable(DEBUG_TOPOLOGY):
                        announce("r%s: copy operations %s" %
                                     (newcommit.fossil_id, copies))
                    linkback = False
                    # If the copies include one for the directory, we're good.
                    if any(node.kind == SD_DIR and node.from_path
                                    and node.path == newcommit.common
                                for node in copies):
                        self.directory_branchlinks.add(newcommit.common)
                        if debug_enable(DEBUG_TOPOLOGY):
                            announce("r%s: directory copy with %s" \
                                     % (newcommit.fossil_id, copies))
                        linkback = True
                    # Use may have botched a branch creation by doing a
                    # non-Subversion directory copy followed by a bunch of
                    # Subversion adds. Blob hashes will match existing files,
                    # but from_rev and from_path won't be set at parse time.
                    # Our code detects this case and makes file
                    # backlinks, but can't deduce the directory copy.
                    # Thus, we have to treat multiple file copies as
                    # an instruction to create a gitspace branch.
                    #
                    # This guard filters out copy op sets that are
                    # *single* file copies. We're making an assumption
                    # here that multiple file copies should always
                    # trigger a branch link creation.  This assumption
                    # could be wrong, which is why we emit a warning
                    # message later on for branch links detected this
                    # way
                    #
                    # Even with this filter you'll tend to end up with lots
                    # of little merge bubbles with no commits on one side;
                    # these have to be removed by a debubbling pass later.
                    # I don't know what generates these things - cvs2svn, maybe.
                    #
                    # The second conjunct of this guard filters out the case
                    # where the user actually did do a previous Subversion file
                    # copy to start the branch, in which case we want to link
                    # through that.
                    elif len(copies) > 1 \
                             and newcommit.common not in self.directory_branchlinks:
                        self.fileop_branchlinks.add(newcommit.common)
                        if debug_enable(DEBUG_TOPOLOGY):
                            announce("r%s: making branch link %s" %
                                     (newcommit.fossil_id, newcommit.common))
                        linkback = True
                    if linkback:
                        # Use max() on the reversed iterator since max returns
                        # the first item with the max key and we want the last
                        latest = max(reversed(copies),
                                     key=lambda node: int(node.from_rev))
                        threshold = False
                        # We do not use the memoized commits() iterator since:
                        #  - for reverse iteration it first needs to be
                        #    exhausted and fully cached
                        #  - this is the only need of it in this phase
                        #  - the cache is clobbered each revision, that is
                        #    between any two uses of this iterator
                        for prev in reversed(self.repo):
                            if not isinstance(prev, Commit): continue
                            if prev.fossil_id == latest.from_rev:
                                if debug_enable(DEBUG_TOPOLOGY):
                                    announce("r%s: found %s looking for %s" \
                                             % (newcommit.fossil_id, latest, newcommit.common))
                                threshold = True
                            if threshold:
                                if debug_enable(DEBUG_TOPOLOGY):
                                    announce("r%s: looking at r%s" \
                                             % (newcommit.fossil_id, prev.fossil_id))
                                if latest.from_path.startswith(prev.common) or global_options["svn_nobranch"]:
                                    self.branchlink[newcommit.mark] = (newcommit, prev)
                                    if debug_enable(DEBUG_TOPOLOGY):
                                        announce("r%s: link %s (%s) back to %s (%s, %s)" % \
                                                 (newcommit.fossil_id,
                                                  newcommit.mark,
                                                  newcommit.common,
                                                  latest.from_rev,
                                                  prev.mark,
                                                  prev.common
                                                  ))
                                    break
                        else:
                            if debug_enable(DEBUG_TOPOLOGY):
                                complain("lookback for %s failed" % latest)
                            raise Fatal("couldn't find a branch root for the copy of %s at r%s." % (latest.path, latest.revision))
            # We're done, add all the new commits 
            self.repo.events += newcommits
            self.repo.declare_sequence_mutation()
            # Report progress, and give up our scheduler slot
            # so as not to eat the processor.
            baton.twirl()
            time.sleep(0)
            previous = revision
        # Warn about dubious branch links
        self.fileop_branchlinks.discard("trunk" + os.sep)
        if self.fileop_branchlinks - self.directory_branchlinks:
            self.gripe("branch links detected by file ops only: %s" % " ".join(self.fileop_branchlinks - self.directory_branchlinks))
        self.repo.timings.append(["commits", time.time()]) 
        if debug_enable(DEBUG_EXTRACT):
            announce("at post-parsing time:")
            for commit in self.repo.commits():
                msg = commit.comment
                if msg == None:
                    msg = ""
                announce("r%-4s %4s %2d %2d '%s'" % \
                         (commit.fossil_id, commit.mark,
                          len(commit.fileops),
                          len(commit.properties),
                          msg.strip()[:20]))
        baton.twirl()
        # First, turn the root commit into a tag
        if not self.repo.earliest_commit().fileops:
            try:
                initial, second = itertools.islice(self.repo.commits(), 2)
                self.repo.tagify(initial,
                                 "root",
                                 second,
                                 "[[Tag from root commit at Subversion r%s]]\n" % initial.fossil_id)
            except ValueError: # self.repo has less than two commits
                self.gripe("could not tagify root commit.")
        # Now, branch analysis.
        if not self.branches or global_options['svn_nobranch']:
            last = None
            for commit in self.repo.commits():
                commit.set_branch(os.path.join("refs", "heads", "master") + os.sep)
                if last is not None: commit.set_parents([last])
                last = commit
        else:
            # Instead, determine a branch for each commit...
            if debug_enable(DEBUG_EXTRACT):
                announce("Branches: %s" % (self.branches,))
            lastbranch = None
            for commit in self.repo.commits():
                if lastbranch is not None \
                        and commit.common.startswith(lastbranch):
                    branch = lastbranch
                else:
                    branch = next((b for b in self.branches
                                  if commit.common.startswith(b)),
                                  None)
                if branch is not None:
                    commit.set_branch(branch)
                    for fileop in commit.fileops:
                        if fileop.op in ("M", "D"):
                            fileop.path = fileop.path[len(branch):]
                        elif fileop.op in ("R", "C"):
                            fileop.source = fileop.source[len(branch):]
                            fileop.target = fileop.target[len(branch):]
                    commit._pathset = None
                else:
                    commit.set_branch("root")
                    self.branches["root"] = None
                lastbranch = branch
                baton.twirl()
            self.repo.timings.append(["branches", time.time()]) 
            baton.twirl()
            # ...then rebuild parent links so they follow the branches
            branchroots = []
            for commit in self.repo.commits():
                if self.branches[commit.branch] is None:
                    branchroots.append(commit)
                    commit.set_parents([])
                else:
                    commit.set_parents([self.branches[commit.branch]])
                self.branches[commit.branch] = commit
                # Per-commit spinner disabled because this pass is fast
                #baton.twirl()
            self.repo.timings.append(["parents", time.time()]) 
            baton.twirl()
            # The root branch is special. It wasn't made by a copy, so
            # we didn't get the information to connect it to trunk in the
            # last phase.
            try:
                commit = next(c for c in self.repo.commits()
                              if c.branch == "root")
            except StopIteration:
                pass
            else:
                self.branchlink[commit.mark] = (commit, self.repo.earliest_commit())
            self.repo.timings.append(["root", time.time()])
            baton.twirl()
            # Add links due to Subversion copy operations
            if debug_enable(DEBUG_EXTRACT):
                announce("branch roots: [{roots}], links {{{links}}}".format(
                    roots = ", ".join(c.mark for c in branchroots),
                    links = ", ".join("{l[0].mark}: {l[1].mark}".format(l=l)
                                      for l in self.branchlink.itervalues())))
            for (child, parent) in self.branchlink.itervalues():
                if not child.has_parents() \
                        and not child.branch in self.branchcopies:
                    # The branch wasn't created by copying another branch and
                    # is instead populated by fileops. Prepend a deleteall to
                    # ensure that it starts with a clean tree instead of
                    # inheriting that of its soon to be added first parent.
                    # The deleteall is put on the first commit of the branch
                    # which has fileops or more than one child.
                    commit = child
                    while len(commit.children()) == 1 and not commit.fileops:
                        commit = commit.first_child()
                    if commit.fileops or commit.has_children():
                        fileop = FileOp()
                        fileop.construct("deleteall")
                        commit.fileops.insert(0, fileop)
                        self.generated_deletes.append(commit)
                if parent not in child.parents():
                    child.add_parent(parent)
            for root in branchroots:
                if getattr(commit.branch, "fileops", None) \
                        and root.branch != ("trunk" + os.sep):
                    self.gripe("r%s: can't connect nonempty branch %s to origin" \
                                % (root.fossil_id, root.branch))
            self.repo.timings.append(["branchlinks", time.time()]) 
            baton.twirl()
            # Add links due to svn:mergeinfo properties
            for ((_, early), late) in self.mergeinfo.iteritems():
                if early in split_commits:
                    self.gripe("cannot resolve mergeinfo from split commit %s to %s." % (early, late))
                elif late in split_commits:
                    self.gripe("cannot resolve mergeinfo from %s to split commit %s." % (early, late))
                else:
                    try:
                        late_commit = self.repo.fossil_map["SVN:%s" % late]
                        early_commit = self.repo.fossil_map["SVN:%s" % early]
                    except KeyError:
                        continue
                    if early_commit not in late_commit.parents():
                        late_commit.add_parent(early_commit)
            self.repo.timings.append(["mergeinfo", time.time()]) 
            baton.twirl()
            if debug_enable(DEBUG_EXTRACT):
                announce("after branch analysis")
                for commit in self.repo.commits():
                    try:
                        ancestor = commit.parents()[0]
                    except IndexError:
                        ancestor = '-'
                    announce("r%-4s %4s %4s %2d %2d '%s'" % \
                             (commit.fossil_id,
                              commit.mark, ancestor,
                              len(commit.fileops),
                              len(commit.properties),
                              commit.branch))
            # Tagify normal branch-root commits, they don't carry any
            # information other than their metadata. The exceptions
            # are trunk and root (if the later exists); neither is
            # the result of a normal copy operation.
            for commit in branchroots:
                if commit.branch != ("trunk"+os.sep) \
                       and commit.branch != "root" \
                       and commit.has_parents() \
                       and not commit.fileops:
                    tagname = os.path.basename(commit.branch[:-1])
                    if not commit.branch.startswith("tags"):
                        tagname += "-root"
                    self.repo.tagify(commit,
                                     tagname,
                                     commit.parents()[0])
                baton.twirl()
            self.repo.timings.append(["tagifying", time.time()]) 
        baton.twirl()
        # Code controlled by svn_nobranch option ends.
        # Now clean up junk commits generated by cvs2svn.
        # We need a list copy since commits are deleted in the loop
        for commit in list(self.repo.commits()):
            # Things that cvs2svn created as tag surrogates
            # get turned into actual tags.
            m = StreamParser.cvs2svn_tag_re.search(commit.comment)
            if m and not commit.has_children():
                fulltag = os.path.join("refs", "tags", m.group(1))
                self.repo.events.append(Reset(self.repo, ref=fulltag,
                                              target=commit.parents()[0]))
                self.repo.quiet_delete(commit)
            # Childless generated branch commits carry no informationn,
            # and just get removed.
            m = StreamParser.cvs2svn_branch_re.search(commit.comment)
            if m and not commit.has_children():
                self.repo.quiet_delete(commit)
            baton.twirl()
        self.repo.timings.append(["junk", time.time()]) 
        baton.twirl()
        # We need a list copy since commits are deleted in the loop
        for commit in list(self.repo.commits()):
            # Now we need to tagify all other commits without fileops, because
            # git is going to just discard them when we build a live repo and
            # they might possibly contain interesting metadata.  Usually they're
            # just debris from tagging, though.
            if not commit.fileops:
                if commit.has_parents():
                    legend = "[[Tag from zero-fileop commit at Subversion r%s" \
                             % commit.fossil_id
                    if self.revisions[commit.fossil_id].nodes:
                        legend += ":\n"
                        for node in self.revisions[commit.fossil_id].nodes:
                            legend += str(node) + "\n"
                    legend += "]]\n"
                    self.repo.tagify(commit,
                                     "emptycommit-%s" % commit.fossil_id,
                                     commit.parents()[0],
                                     legend)
                else:
                    self.gripe("r%s: deleting parentless zero-op commit." \
                               % commit.fossil_id)
                    self.repo.quiet_delete(commit)
            # Also, tagify tip commits that consist only of deletes.
            # The fileops aren't worth saving; the comment metadata
            # just might be.
            elif commit.alldeletes(killset={"deleteall"}) \
                     and not commit.has_children():
                if commit.has_parents():
                    if commit.branch.endswith(os.sep):
                        commit.set_branch(commit.branch[:-1])
                    label = os.path.basename(commit.branch)
                    commit.fileops = []
                    self.repo.tagify(commit,
                                     "tipdelete-%s" % label,
                                     commit.parents()[0])
                else:
                    self.gripe("r%s: deleting parentless tip delete of %s" \
                               % (commit.fossil_id, commit.branch))
                    self.repo.quiet_delete(commit)
            baton.twirl()
        self.repo.timings.append(["tagify-empty", time.time()]) 
        baton.twirl()
        # Now pretty up the branch names
        for commit in self.repo.commits():
            if commit.branch == "root":
                commit.set_branch(os.path.join("refs", "heads", "root"))
            elif commit.branch.startswith("tags" + os.sep):
                branch = commit.branch
                if branch.endswith(os.sep):
                    branch = branch[:-1]
                commit.set_branch(os.path.join("refs", "tags",
                                              os.path.basename(branch)))
            elif commit.branch == "trunk" + os.sep:
                commit.set_branch(os.path.join("refs", "heads", "master"))
            else:
                commit.set_branch(os.path.join("refs", "heads",
                                              os.path.basename(commit.branch[:-1])))
            baton.twirl()
        ##self.repo.timings.append(["polishing", time.time()]) 
        baton.twirl()
        # cvs2svn likes to crap out sequences of deletes followed by
        # filecopies on the same node when it's generating tag commits.
        # These are lots of examples of this in the nut.svn test load.
        # These show up as redundant (D, M) fileop pairs.
        for commit in self.repo.commits():
            if any(fileop is None for fileop in commit.fileops):
                raise Fatal("Null fileop at r%s" % commit.fossil_id)
            for i in range(len(commit.fileops)-1):
                if commit.fileops[i].op == 'D' and commit.fileops[i+1].op == 'M':
                    if commit.fileops[i].path == commit.fileops[i+1].path:
                        commit.fileops[i].op = None
            commit.fileops = [fileop for fileop in commit.fileops if fileop.op is not None]
            baton.twirl()
        self.repo.timings.append(["canonicalizing", time.time()]) 
        baton.twirl()
        # Issue resets when required
        save_events = self.repo.events
        self.repo.events = []
        self.repo.declare_sequence_mutation()
        issued = set()
        for event in save_events:
            if isinstance(event, Commit) and event.branch not in issued:
                reset = Reset(self.repo)
                reset.ref = event.branch
                self.repo.addEvent(reset)
                issued.add(event.branch)
            self.repo.addEvent(event)
            baton.twirl()
        self.repo.timings.append(["resets", time.time()]) 
        baton.twirl()
        # Remove spurious parent links caused by random cvs2svn file copies.
        #baton.twirl("debubbling")
        for commit in self.repo.commits():
            try:
                a, b = commit.parents()
            except ValueError:
                pass
            else:
                if a is b:
                    self.gripe("r%s: duplicate parent marks" % commit.fossil_id)
                elif a.branch == b.branch == commit.branch:
                    if b.committer.date < a.committer.date:
                        (a, b) = (b, a)
                    if b.descended_from(a):
                        commit.remove_parent(a)
            # Per-commit spinner disabled because this pass is fast
            #baton.twirl()
        self.repo.timings.append(["debubbling", time.time()]) 
        baton.twirl()
        self.repo.renumber(baton=baton)
        baton.twirl()
        self.repo.timings.append(["renumbering", time.time()]) 
        self.repo.write_fossils = True
        # Look for tag and branch merges that mean we may want to undo a
        # tag or branch creation
        ignore_deleteall = set(commit.mark
                               for commit in self.generated_deletes)
        for commit in self.repo.commits():
            if commit.fileops and commit.fileops[0].op == 'deleteall' \
                    and commit.has_children() \
                    and commit.mark not in ignore_deleteall:
                self.gripe("mid-branch deleteall on %s at <%s>." % \
                        (commit.branch, commit.fossil_id))
        self.repo.timings.append(["linting", time.time()]) 
        # Treat this in-core state is though it was read from an SVN repo 
        self.repo.vcs = next(vcstype for vcstype in vcstypes if vcstype.name == "svn")

class SubversionDumper:
    "Respository to Subversion stream dump."
    def __init__(self, repo):
        self.repo = repo
        self.pathmap = {}
        self.mark_to_revision = {}
        self.branches_created = []
        self.tag_latch = False
    class FlowState:
        def __init__(self, rev, props=None):
            self.rev = rev
            self.props = props or {}
            self.is_directory = False
            self.subfiles = 0
    @staticmethod
    def svnprops(pdict):
        return "".join("K %d\n%s\nV %d\n%s\n" % (len(key), key, len(val), val)
                        for key, val in sorted(pdict.iteritems()) if val)
    @staticmethod
    def dump_revprops(fp, revision, date, author=None, log=None, parents=None):
        "Emit a Revision-number record describing unversioned properties."
        fp.write("Revision-number: %d\n" % revision)
        parts = []
        parts.append(SubversionDumper.svnprops({"svn:log": log}))
        parts.append(SubversionDumper.svnprops({"svn:author": author}))
        # Ugh.  Subversion apparently insists on those decimal places
        parts.append(SubversionDumper.svnprops({"svn:date": date.rfc3339()[:-1]+".000000Z"}))
        # Hack merge links into mergeinfo properties.  This is a kluge
        # - the Subversion model is really like cherrypicking rather
        # than branch merging - but it's better than nothing, and
        # should at least round-trip with the logic in the Subversion
        # dump parser.
        if len(parents or []) > 1:
            parents = iter(parents)
            next(parents) # ignore main parent
            ancestral = ".".join(itertools.imap(str, sorted(parents)))
            parts.append(SubversionDumper.svnprops({"svn:mergeinfo": ancestral}))
        parts.append("PROPS-END\n")
        parts.append("\n")
        revprops = "".join(parts)
        fp.write("Prop-content-length: %d\n" % (len(revprops)-1))
        fp.write("Content-length: %d\n\n" % (len(revprops)-1))
        fp.write(revprops)
    @staticmethod
    def dump_node(fp, path, kind, action, content="",
                  from_rev=None, from_path=None,
                  props=None):
        "Emit a Node record describing versioned properties and content."
        fp.write("Node-path: %s\n" % path)
        fp.write("Node-kind: %s\n" % kind)
        fp.write("Node-action: %s\n" % action)
        if from_rev:
            fp.write("Node-copyfrom-rev: %s\n" % from_rev)
        if from_path:
            fp.write("Node-copyfrom-path: %s\n" % from_path)
        nodeprops = SubversionDumper.svnprops(props or {}) + "PROPS-END\n"
        fp.write("Prop-content-length: %d\n" % len(nodeprops))
        if content:
            fp.write("Text-content-length: %d\n" % len(content))
            # Checksum validation in svnload works if we do sha1 but
            # not if we try md5.  It's unknown why - possibly svn load
            # is simply ignoring sha1.
            #fp.write("Text-content-md5: %s\n" % hashlib.md5(content).hexdigest())
            fp.write("Text-content-sha1: %s\n" % hashlib.sha1(content.encode()).hexdigest())
        fp.write("Content-length: %d\n\n" % (len(nodeprops) + len(content)))
        fp.write(nodeprops + "\n")            
        if content:
            fp.write(content)
        fp.write("\n")
    @staticmethod
    def svnbranch(branch):
        "The branch directory corresponding to a specified git branch."
        segments = branch.split(os.sep)
        assert segments[0] == "refs"
        if tuple(segments) == ("refs", "heads", "master"):
            return "trunk"
        if segments[1] not in ("tags", "heads") or len(segments) != 3:
            raise Recoverable("%s can't be mapped to Subversion." % branch)
        svnbase = segments[2]
        if svnbase.endswith("trunk"):
            svnbase += "-git"
        if segments[1] == "tags":
            return os.path.join("tags", svnbase)
        else:
            return os.path.join("branches", svnbase)
    @staticmethod
    def svnize(branch, path=""):
        "Return SVN path corresponding to a specified gitspace branch and path."
        return os.path.join(SubversionDumper.svnbranch(branch), path)
    def filedelete(self, fp, branch, path):
        "Emit the dump-stream records required to delete a file."
        if debug_enable(DEBUG_SVNDUMP):
            announce("filedelete%s" % repr((branch, path)))
        svnpath = SubversionDumper.svnize(branch, path)
        fp.write("Node-path: %s\n" % svnpath)
        fp.write("Node-action: delete\n\n\n")
        del self.pathmap[svnpath]
        while True:
            svnpath = os.path.dirname(svnpath)
            # The second disjunct in this guard is a
            # spasmodic twitch in the direction of
            # respecting Subversion's notion of a "flow".
            # We refrain from deleting branch directories
            # so they'll have just one flow throughout the
            # life of the repository.
            if not svnpath or svnpath in self.branches_created:
                break
            self.pathmap[svnpath].subfiles -= 1
            if self.pathmap[svnpath].subfiles == 0:
                fp.write("Node-path: %s\n" % svnpath)
                fp.write("Node-action: delete\n\n\n")
                del self.pathmap[svnpath]
    def directory_create(self, fp, revision, branch, path, parents=None):
        if debug_enable(DEBUG_SVNDUMP):
            announce("directory_create%s" % repr((revision, branch, path)))
        creations = []
        # Branch creation may be required
        svnout = SubversionDumper.svnbranch(branch)
        if svnout not in self.branches_created:
            if not svnout.startswith("tags") and "branches" not in self.branches_created:
                self.branches_created.append("branches")
                creations.append(("branches", None, None))
            self.branches_created.append(svnout)
            if parents:
                from_rev = self.mark_to_revision[parents[0].mark],
                from_branch = SubversionDumper.svnbranch(parents[0].branch)
                creations.append((svnout, from_rev, from_branch))
                # Iterating through dict keys while mutating the dict
                # is not supported by Python. The following thus uses
                # keys() which returns a new list of the dict keys to
                # loop over.
                for key in self.pathmap.keys():
                    if key.startswith(from_branch + os.sep) and key != from_branch:
                        counterpart = svnout + key[len(from_branch):]
                        self.pathmap[counterpart] = SubversionDumper.FlowState(revision)
            else:
                creations.append((svnout, None, None))
        # Create all directory segments required
        # to get down to the level where we can
        # create the file.
        parts = os.path.dirname(path).split(os.sep)
        if parts[0]:
            parents = [os.sep.join(parts[:i+1])
                                   for i in range(len(parts))]
            for parentdir in parents:
                if parentdir not in self.pathmap:
                    fullpath = os.path.join(svnout, parentdir)
                    creations.append((fullpath, None, None))
        for (path, from_rev, from_path) in creations:
            SubversionDumper.dump_node(fp,
                                       path=path,
                                       kind="dir",
                                       action="add",
                                       from_rev=from_rev,
                                       from_path=from_path)
            self.pathmap[path] = SubversionDumper.FlowState(revision)
            self.pathmap[path].is_directory = True
            self.pathmap[path].subfiles += 1
    def filemodify(self, fp, revision, branch, mode, ref, path, parents):
        "Emit the dump-stream records required to add or modify a file."
        if debug_enable(DEBUG_SVNDUMP):
            announce("filemodify%s" % repr((revision, branch, mode, ref, path,
                                            [event.mark for event in parents])))
        # Branch and directory creation may be required.
        # This has to be called early so copy can update the filemap.
        self.directory_create(fp, revision, branch, path, parents)
        svnpath = SubversionDumper.svnize(branch, path)
        if svnpath in self.pathmap:
            svnop = "change"
            self.pathmap[svnpath].rev = revision
        else:
            svnop = "add"
            self.pathmap[svnpath] = SubversionDumper.FlowState(revision)
        if debug_enable(DEBUG_SVNDUMP):
            announce("Generating %s %s" % (svnpath, svnop))
        content = self.repo.objfind(ref).get_content()
        changeprops = None
        if svnpath in self.pathmap:
            if mode == '100755':
                if "svn:executable" not in self.pathmap[svnpath].props:
                    self.pathmap[svnpath].props["svn:executable"] = "true"
                    changeprops = self.pathmap[svnpath].props
            elif mode == '100644':
                if "svn:executable" in self.pathmap[svnpath].props:
                    self.pathmap[svnpath].props["svn:executable"] = "false"
                    changeprops = self.pathmap[svnpath].props
        # The actual content
        SubversionDumper.dump_node(fp,
                  path=svnpath,
                  kind="file",
                  action=svnop,
                  props=changeprops,
                  content=content)
    def filecopy(self, fp, revision, branch, source, target):
        if debug_enable(DEBUG_SVNDUMP):
            announce("filecopy%s" % repr((revision, branch, source, target)))
        svnsource = SubversionDumper.svnize(branch, source)
        try:
            flow = self.pathmap[svnsource]
        except:
            raise Fatal("couldn't retrieve flow information for %s" % source)
        self.directory_create(fp, revision, branch, target)
        svntarget = SubversionDumper.svnize(branch, target)
        self.pathmap[svntarget] = self.pathmap[svnsource]
        SubversionDumper.dump_node(fp,
                                   path=svntarget,
                                   kind="file",
                                   action="add",
                                   from_path=svnsource,
                                   from_rev=flow.rev)
    def make_tag(self, fp, revision, branch, name, log, author):
        if debug_enable(DEBUG_SVNDUMP):
            announce("make_tag%s" % repr((revision, branch, name, log, str(author))))
        svnsource = SubversionDumper.svnize(branch)
        svntarget = os.path.join("tags", name)
        SubversionDumper.dump_revprops(fp, revision,
                                       log=log,
                                       author=author.email.split("@")[0],
                                       date=author.date)
        if not self.tag_latch:
            self.tag_latch = True
            SubversionDumper.dump_node(fp,
                                   path="tags",
                                   kind="dir",
                                   action="add")
        SubversionDumper.dump_node(fp,
                                   path=svntarget,
                                   kind="dir",
                                   action="add",
                                   from_path=svnsource,
                                   from_rev=revision-1)
    def dump(self, selection, fp, progress=False):
        "Export the repository as a Subversion dumpfile."
        self.tag_latch = False
        tags = [event for event in self.repo.events if isinstance(event, Tag)]
        with Baton("reposurgeon: dumping", enable=progress) as baton:
            try:
                fp.write("SVN-fs-dump-format-version: 2\n\n")
                fp.write("UUID: %s\n\n" % (self.repo.uuid or uuid.uuid4()))
                SubversionDumper.dump_revprops(fp,
                                               revision=0,
                                               date=Date(rfc3339(time.time()))) 
                baton.twirl()
                revision = 0
                for i in selection:
                    event = self.repo.events[i]
                    # Passthroughs are lost; there are no equivalents
                    # in Subversion's ontology.
                    if not isinstance(event, Commit):
                        continue
                    revision += 1
                    self.mark_to_revision[event.mark] = revision
                    # We must treat the gitspace committer attribute
                    # as the author: gitspace author information is
                    # lost.  So is everything but the local part of
                    # the committer name.
                    backlinks = [self.mark_to_revision[mark]
                                 for mark in event.parent_marks()]
                    SubversionDumper.dump_revprops(fp, revision,
                                                   log=event.comment,
                                                   author=event.committer.email.split("@")[0],
                                                   date=event.committer.date,
                                                   parents=backlinks)
                    for fileop in event.fileops:
                        if fileop.op == "D":
                            if fileop.path.endswith(".gitignore"):
                                svnpath = SubversionDumper.svnize(event.head(), fileop.path)
                                self.pathmap[svnpath].props["svn:ignore"] = ""
                                SubversionDumper.dump_node(fp,
                                          path=os.path.dirname(svnpath),
                                          kind="dir",
                                          action="change",
                                          props = self.pathmap[svnpath].props)
                            else:
                                self.filedelete(fp, event.head(), fileop.path)
                        elif fileop.op == "M":
                            if fileop.path.endswith(".gitignore"):
                                svnpath = SubversionDumper.svnize(event.head(),
                                                                  os.path.dirname(fileop.path))
                                blob = self.repo.objfind(fileop.ref)
                                if svnpath not in self.pathmap:
                                    self.pathmap[svnpath] = SubversionDumper.FlowState(revision)
                                self.pathmap[svnpath].props["svn:ignore"] = blob.get_content()
                                SubversionDumper.dump_node(fp,
                                          path=os.path.dirname(svnpath),
                                          kind="dir",
                                          action="change",
                                          props = self.pathmap[svnpath].props)
                            else:
                                self.filemodify(fp,
                                                revision,
                                                event.head(),
                                                fileop.mode,
                                                fileop.ref,
                                                fileop.path,
                                                event.parents())
                        elif fileop.op == "R":
                            self.filecopy(fp,
                                          revision,
                                          event.head(),
                                          fileop.source,
                                          fileop.target)
                            self.filedelete(fp, event.branch, fileop.source)
                        elif fileop.op == "C":
                            self.filecopy(fp,
                                          revision,
                                          event.head(),
                                          fileop.source,
                                          fileop.target)
                        elif fileop.op == "deleteall":
                            branchdir = self.svnbranch(event.head())
                            # Here again the object is mutated, so a copy list must be used.
                            for path in self.pathmap.keys():
                                if path.startswith(branchdir + os.sep):
                                    del self.pathmap[path]
                            fp.write("Node-path: %s\n" % branchdir)
                            fp.write("Node-action: delete\n\n\n")
                        else:
                            raise Fatal("unsupported fileop type %s." \
                                        % fileop.op)
                    # Turn any annotated tag pointing at this commit into
                    # a directory copy.
                    for tag in tags:
                        if tag.target is event:
                            revision += 1
                            self.make_tag(fp,
                                          revision,
                                          event.head(),
                                          name=tag.name,
                                          log=tag.comment,
                                          author=tag.tagger)
                            break
                    else:
                        # Preserve lightweight tags, too.  Ugh, O(n**2).
                        if event.has_children():
                            for child in event.children():
                                if child.branch == event.branch: 
                                    break
                            else:
                                revision += 1
                                self.make_tag(fp,
                                              revision,
                                              event.head(),
                                              name=os.path.basename(event.branch),
                                              log="",
                                              author=event.committer)
                    fp.flush()
            except IOError as e:
                raise Fatal("export error: %s" % e)

# Generic repository-manipulation code begins here

class Repository:
    "Generic repository object."
    def __init__(self, name=None):
        self.name = name
        self.readtime = time.time()
        self.vcs = None
        self.sourcedir = None
        self.seekstream = None
        self.events = []    # A list of the events encountered, in order
        self._commits = None
        self._mark_to_index = {}
        self._mark_to_object = {}
        self.preserve_set = set()
        self.case_coverage = set()
        self.basedir = os.getcwd()
        self.uuid = None
        self.write_fossils = False
        self.dollar_map = {}        # From dollar cookies in files
        self.fossil_map = {}    # From anything that doesn't survive rebuild
        self.fossil_count = None
        self.timings = []
    def cleanup(self):
        "Release blob files associated with this repo."
        nuke(self.subdir(), "reposurgeon: cleaning up %s" % self.subdir())
    def subdir(self, name=None):
        if name is None:
            name = self.name
        if not name:
            return os.path.join(self.basedir, ".rs" + repr(os.getpid()))
        else:
            return os.path.join(self.basedir, ".rs" + repr(os.getpid())+ "-" + name) 
    def makedir(self):
        try:
            if debug_enable(DEBUG_SHUFFLE):
                announce("repository fast import creates " + self.subdir())
            target = self.subdir()
            if not os.path.exists(target):
                os.mkdir(target)
        except OSError:
            raise Fatal("can't create operating directory")
    def size(self):
        "Return the size of this import stream, for statistics display."
        return sum(len(str(e)) for e in self.events)
    def branchset(self):
        "Return a set of branchnames in this repo."
        return set(commit.branch for commit in self.commits())
    def branchmap(self):
        "Return a map of branchnames to terminal marks in this repo."
        return dict((commit.branch, commit.mark) for commit in self.commits())
    def index(self, obj):
        "Index of the specified object."
        for (ind, event) in enumerate(self.events):
            if event == obj:
                return ind
        raise Fatal("internal error: <%s> not matched in repository %s" % (obj.fossil_id, self.name))
    def find(self, mark):
        "Find an object index by mark"
        if not self._mark_to_index:
            for (ind, event) in enumerate(self.events):
                if hasattr(event, "mark"):
                    self._mark_to_index[event.mark] = ind
        return self._mark_to_index.get(mark)
    def objfind(self, mark):
        "Find an object by mark"
        if not self._mark_to_object:
            for event in self.events:
                if hasattr(event, "mark"):
                    self._mark_to_object[event.mark] = event
        return self._mark_to_object.get(mark)
    def invalidate_object_map(self):
        "Force an object-map rebuild on the next lookup."
        self._mark_to_object = {}
    def read_authormap(self, selection, fp):
        "Read an author-mapping file and apply it to the repo."
        authormap = {}
        try:
            for line in fp:
                line = line.strip()
                if not line:
                    continue
                if line.startswith('#'):
                    continue
                (local, netwide) = line.strip().split('=')
                (address, timezone) =  netwide.split(">")
                address += ">"
                timezone = timezone.strip()
                if timezone:
                    timezone = Date.tzresolve(timezone)
                (name, mail) = email.utils.parseaddr(address.strip())
                if not mail:
                    raise Fatal("can't recognize address in '%s'" % netwide)
                authormap[local.strip().lower()] = (name, mail, timezone)
        except IOError:
            raise Recoverable("couldn't open author-map file")
        except ValueError:
            raise Recoverable("bad author map syntax: %s" % repr(line))
        for ei in selection:
            event = self.events[ei]
            if isinstance(event, Commit):
                event.committer.remap(authormap)
                for author in event.authors:
                    author.remap(authormap)
            elif isinstance(event, Tag):
                event.tagger.remap(authormap)
    def write_authormap(self, selection, fp):
        "List the identifiers we need."
        contributors = {}
        for ei in selection:
            event = self.events[ei]
            if isinstance(event, Commit):
                contributors[event.committer.name] = event.committer.who()
                for author in event.authors:
                    contributors[author.name] = author.who()
            elif isinstance(event, Tag):
                contributors[event.tagger.name] = event.tagger.who()
        for (name, cid) in contributors.iteritems():
            fp.write("%s = %s\n" % (name, cid))
    def read_fossilmap(self, fp):
        "Read a fossil-references dump and initialize the repo's fossil map."
        commit_map = {}
        for event in self.commits():
            key = (event.committer.date.timestamp, event.committer.email)
            if key not in commit_map:
                commit_map[key] = []
            commit_map[key].append(event)
        try:
            matched = unmatched = 0
            for line in fp:
                (fossil, stamp) = line.split()
                (timefield, person) = stamp.split('!')
                if ':' in person:
                    (person, seq) = person.split(':')
                    seq = int(seq) - 1
                else:
                    seq = 0
                assert fossil and timefield and person
                when_who = (Date(timefield).timestamp, person)
                if when_who in commit_map:
                    self.fossil_map[fossil] = commit_map[when_who][seq]
                    if fossil.startswith("SVN:"):
                        commit_map[when_who][seq].fossil_id = fossil[4:]
                    matched += 1
                else:
                    unmatched += 1
            if verbose >= 1:
                announce("%d matched, %d unmatched, %d total"\
                         % (matched, unmatched, matched+unmatched))
            del commit_map
        except ValueError:
            raise Recoverable("bad syntax in fossils file.")
    def write_fossilmap(self, fp):
        "Dump fossil references."
        for cookie, commit in sorted(
                self.fossil_map.iteritems(),
                key=lambda f: (f[1].committer.date.timestamp, f[0])):
            if "SVN" in cookie and StreamParser.SplitSep in cookie:
                serial = ':' + cookie.split(StreamParser.SplitSep)[1]
            else:
                serial = ''
            # The objfind test is needed in case this repo is an expunge
            # fragment with a copied fossil map.  It's a simple substitute
            # for partitioning the map at expunge time.
            if self.objfind(commit.mark) and commit.fossil_id:
                fp.write("%s\t%s!%s%s\n" % (cookie,
                                           commit.committer.date.rfc3339(),
                                           commit.committer.email,
                                           serial))
    def tagify(self, commit, name, target, legend=""):
        "Turn a commit into a tag."
        if debug_enable(DEBUG_EXTRACT):
            commit_id = commit.mark
            if commit.fossil_id:
                commit_id += " <%s>" % commit.fossil_id
            announce("tagifying: %s -> %s" % (commit_id, name))
        if commit.fileops:
            raise Fatal("Attempting to tagify a commit with fileops.")
        if not commit.comment:
            pref = ""
        else:
            pref = commit.comment + "\n"
        self.addEvent(Tag(commit.repo,
                          name=name,
                          target=target,
                          tagger=commit.committer,
                          comment=pref + legend))
        self.quiet_delete(commit)
    def fast_import(self, fp, progress=False):
        "Read a stream file and use it to populate the repo."
        StreamParser(self).fast_import(fp, progress)
        self.readtime = time.time()
    def parse_dollar_cookies(self):
        "Extract info about fossil references from CVS/SVN header cookies."
        if self.dollar_map:
            return
        # The goal here is to throw away CVS and Subversion header
        # information still fossilized into $Id$ and $Subversion$
        # headers after conversion to a later version. For each
        # cookie, all but the earliest blob containing it has it
        # as a fossil which should be removed.  Then, the earliest
        # commit referencing that blob gets a fossil property set;
        # later references will be branching artifacts.
        seen = set()
        for event in self.events:
            if isinstance(event, Blob) and event.cookie:
                if event.cookie in seen:
                    continue
                else:
                    # The first commit immediately after this blob
                    for ei in range(self.find(event.mark), len(self.events)):
                        if isinstance(self.events[ei], Commit):
                            commit = self.events[ei]
                            break
                    seen.add(event.cookie)
                    if "fossil" in commit.properties:
                        complain("fossil property of %s overwritten" \
                                 % commit.mark)
                    if isinstance(event.cookie, str):
                        svnkey = "SVN:" + event.cookie
                        self.dollar_map[svnkey] = commit
                    else:
                        (basename, cvsref) = event.cookie
                        for fileop in commit.fileops:
                            if fileop.op == 'M' and fileop.ref == event.mark:
                                if not os.path.basename(fileop.path).endswith(basename):
                                    # Usually the harmless result of a
                                    # file move or copy that cvs2svn or
                                    # git-svn didn't pick up on.
                                    complain("mismatched CVS header path '%s' in %s vs '%s' in %s"
                                             % (fileop.path, commit.mark, basename, event.mark))
                                cvskey = "CVS:%s:%s" % (fileop.path, cvsref)
                                self.dollar_map[cvskey] = commit
    def export_style(self):
        "How should we tune the export dump format?"
        if self.vcs:
            return self.vcs.styleflags
        else:
            # Default to git style
            return ("nl-after-commit",)
    def fast_export(self, selection, fp, target=None, progress=False):
        "Dump the repo object in Subversion dump or fast-export format."
        if target and target.name == "svn":
            SubversionDumper(self).dump(selection, fp, progress)
            return
        with Baton("reposurgeon: exporting", enable=progress) as baton:
            try:
                fossil_latch = False
                for ei in selection:
                    baton.twirl()
                    event = self.events[ei]
                    #fossil_latch = fossil_latch or hasattr(event, "fossil_id")
                    if debug_enable(DEBUG_UNITE):
                        if hasattr(event, "mark"):
                            announce("writing %d %s %s" % (ei, event.mark, event.__class__.__name__))
                    fp.write(event.dump(target))
                if fossil_latch:
                    fp.write("reset fossil_id\n")
                    endcommit = Commit(self)
                    endcommit.set_branch("refs/heads/master")
                    endcommit.comment = "Fossil-ID notes\n"
                    endcommit.committer = Attribution("Nowhere Man <nowhere@nobody.net> " + rfc3339(time.time()))
                    for ei in selection:
                        event = self.events[ei]
                        if hasattr(event, "fossil_id"):
                            fileop = FileOp()
                            fileop.inline = "Fossil-ID: %s" % event.fossil_id
                            fileop.construct('N', 'inline', event.mark)
                            endcommit.fileops.append(fileop)
                    fp.write(str(endcommit))
            except IOError as e:
                raise Fatal("export error: %s" % e)
    def preserve(self, filename):
        "Add a path to the preserve set, to be copied back on rebuild."
        if os.path.exists(filename):
            self.preserve_set.add(filename)
        else:
            raise Recoverable("%s doesn't exist" % filename)
    def unpreserve(self, filename):
        "Remove a path from the preserve set."
        if filename in self.preserve_set:
            self.preserve_set.remove(filename)
        else:
            raise Recoverable("%s doesn't exist" % filename)
    def preservable(self):
        "Return the repo's preserve set."
        return self.preserve_set
    def rename(self, newname):
        "Rename the repo."
        try:
            # Can fail if the target directory exists.
            if debug_enable(DEBUG_SHUFFLE):
                announce("repository rename %s->%s calls os.rename(%s, %s)" % (self.name, newname, repr(self.subdir()), repr(self.subdir(newname))))
            os.rename(self.subdir(), self.subdir(newname))
            self.name = newname
        except OSError as e:
            raise Fatal("repo rename %s -> %s failed: %s"
                                       % (self.subdir(), self.subdir(newname), e))
    def addEvent(self, event):
        self.events.append(event)
        self.declare_sequence_mutation()
    @memoized_iterator("_commits")
    def commits(self):
        "Iterate through the repository commit objects."
        return (e for e in self.events if isinstance(e, Commit))
    def declare_sequence_mutation(self):
        "Mark the repo event sequence sequence modified."
        self._commits = None
        self._mark_to_index = {}
    def earliest_commit(self):
        "Return the earliest commit."
        return next(self.commits())
    def earliest(self):
        "Return the date of earliest commit."
        return next(self.commits()).committer.date
    def ancestors(self, ei):
        "Return ancestors of an event, in reverse order."
        trail = []
        while True:
            if not self.events[ei].has_parents():
                break
            else:
                efrom = self.find(self.events[ei].parent_marks()[0])
                trail.append(efrom)
                ei = efrom
        return trail
    #
    # Delete machinery begins here
    #
    def __ancestor_count(self, event, path):
        "Count modifications of a path in this commit and its ancestors."
        count = 0
        while True:
            for fileop in event.fileops:
                if fileop and fileop.op == "M" and fileop.path == path:
                    count += 1
                    break
            # 0, 1, and >1 are the interesting cases
            if count > 1:
                return count
            try:
                event = event.parents()[0]
            except IndexError:
                break
        return count
    def __compose(self, event, left, right):
        "Compose two relevant fileops."
        # Here's what the fields in the return value mean:
        # 0: Was this a modification
        # 1: Op to replace the first with (None means delete)
        # 2: Op to replace the second with (None means delete)
        # 3: If not None, a warning to emit
        # 4: Case number, for coverage analysis
        pair = (left.op, right.op)
        #
        # First op M
        #
        if pair == ("M", "M"):
            # Leave these in place, they get handled later.
            return (False, left, right, None, 0)
        # M a + D a -> D a
        # Or, could reduce to nothing if M a was the only modify..
        elif left.op == "M" and right.op in "D":
            if self.__ancestor_count(event, left.path) == 1:
                return (True, None, None, None, 1)
            else:
                return (True, right, None, None, 2)
        elif left.op == "M" and right.op == "R":
            # M a + R a b -> R a b M b, so R falls towards start of list
            if left.path == right.source:
                if self.__ancestor_count(event, left.path) == 1:
                    # M a has no ancestors, preceding R can be dropped
                    left.path = right.target
                    return (True, left, None, None, 3)
                else:
                    # M a has ancestors, R is still needed
                    left.path = right.target
                    return (True, right, left, None, 4)
            # M b + R a b can't happen.  If you try to generate this with
            # git mv it throws an error.  An ordinary mv results in D b M a.
            elif left.path == right.target:
                return(True, right, None, "M followed by R to the M operand?", -1)
        # Correct reduction for this would be M a + C a b -> C a b + M a + M b,
        # that is we'd have to duplicate the modify. We'll leave it in place
        # for now.
        elif left.op == "M" and right.op == "C":
            return (False, left, right, None, 5)
        #
        # First op D or deleteall
        #
        # Delete followed by modify undoes delete, since M carries whole files. 
        elif pair == ("D", "M"):
            return (True, None, right, None, 6)
        # But we have to leave deletealls in place, since they affect right ops
        elif pair == ("deleteall", "M"):
            return (False, left, right, None, 7)
        # These cases should be impossible.  But cvs2svn actually generates
        # adjacent deletes into Subversion dumpfiles which turn into (D, D).
        elif left.op == "deleteall" and right.op != "M":
            return (False, left, right,
                    "Non-M operation after deleteall?", -1)
        elif left.op == "D" and right.op == "D":
            return (True, left, None, None, -2)
        elif left.op == "D" and right.op in ("R", "C"):
            if left.path == right.source:
                return (False, left, right,
                        "R or C of %s after deletion?" % left.path, -3)
            else:
                return (False, left, right, None, 8)
        #
        # First op R
        #
        elif pair == ("R", "D"):
            if left.target == right.path:
                # Rename followed by delete of target composes to source delete
                right.path = left.source
                return (True, None, right, None, 9)
            else:
                # On rename followed by delete of source discard the delete
                # but user should be warned. 
                return (False, left, None,
                        "delete of %s after renaming to %s?" % (right.path, left.source), -4)
        # Rename followed by deleteall shouldn't be possible
        elif pair == ("R", "deleteall") and left.target == right.path:
            return (False, None, right,
                    "rename before deleteall not removed?", -5)
        # Leave rename or copy followed by modify alone
        elif pair == ("R", "M") or pair == ("C", "M"):
            return (False, left, right, None, 10)
        # Compose renames where possible
        elif left.op == "R" and right.op == "R":
            if left.target == right.source:
                left.target = right.target
                return (True, left, None, None, 11)
            else:
                return (False, left, right,
                        "R %s %s is inconsistent with following operation" \
                        % (left.source, left.target), -6)
        # We could do R a b + C b c -> C a c + R a b, but why?
        if left.op == "R" and right.op == "C":
            return (False, left, right, None, 12)
        #
        # First op C
        #
        elif pair == ("C", "D"):
            if left.source == right.path:
                # Copy followed by delete of the source is a rename.
                left.setOp("R")
                return (True, left, None, None, 13)
            elif left.target == right.path:
                # This delete undoes the copy
                return (True, None, None, None, 14)
        elif pair == ("C", "R"):
            if left.source == right.source:
                # No reduction
                return (False, left, right, None, 15)
            else:
                # Copy followed by a rename of the target reduces to single copy
                if left.target == right.source:
                    left.target = right.target
                    return (True, left, None, None, 16)
        elif pair == ("C", "C"):
            # No reduction
            return (False, left, right, None, 17)
        #
        # Case not covered
        #
        raise Fatal("can't compose op '%s' and '%s'" % (left, right))
    def canonicalize(self, commit):
        "Canonicalize the list of file operations in this commit."
        coverage = set()
        # Handling deleteall operations is simple
        lastdeleteall = None
        for (i, a) in enumerate(commit.fileops):
            if a.op == "deleteall":
                lastdeleteall = i
        if lastdeleteall is not None:
            if debug_enable(DEBUG_DELETE):
                announce("removing all before rightmost deleteall")
            commit.fileops = commit.fileops[lastdeleteall:]
            commit._pathset = None
        # Composition in the general case is trickier.
        while True:
            # Keep making passes until nothing mutates
            mutated = False
            for i in range(len(commit.fileops)):
                for j in range(i+1, len(commit.fileops)):
                    a = commit.fileops[i]
                    b = commit.fileops[j]
                    if a is not None and b is not None and a.relevant(b):
                        (modified, newa, newb, warn, case) = self.__compose(commit, a, b)
                        if debug_enable(DEBUG_DELETE):
                            announce("Reduction case %d fired on %s" % (case, (i,j)))
                        if modified:
                            mutated = True
                            commit.fileops[i] = newa
                            commit.fileops[j] = newb
                            if debug_enable(DEBUG_DELETE):
                                announce("During canonicalization:")
                                commit.fileop_dump()
                            if warn:
                                complain(warn)
                            coverage.add(case)
            if not mutated:
                break
            commit.fileops = [x for x in commit.fileops if x is not None]
            commit._pathset = None
        return coverage
    def delete(self, selected, policy):
        "Delete commits, handling multiple Ms on a file with specified policy"
        if debug_enable(DEBUG_DELETE):
            announce("Deletion list is %s" % [x+1 for x in selected])
        # Make sure we do deletions from greatest commit number to least
        selected = sorted(selected, reverse=True)
        dquiet = "quiet" in policy
        obliterate = "obliterate" in policy
        tagback = "tagback" in policy
        tagforward = "tagforward" in policy
        pushback = "pushback" in policy
        # Sanity checks
        if not dquiet:
            for ei in selected:
                event = self.events[ei]
                if isinstance(event, Blob):
                    raise Recoverable("attempt to directly delete blob %d" % (ei+1))
                elif  isinstance(event, Commit):
                    if obliterate:
                        speak = "warning: commit %s to be obliterated has " % event.mark 
                        if '/' in event.branch and not '/heads/' in event.branch:
                            complain(speak + "non-head branch attribute %s" % event.branch)
                        if not event.alldeletes():
                            announce(speak + "non-delete fileops.")
                            break
        altered = []
        # Here are the deletions
        to_delete = set()
        for ei in selected:
            event = self.events[ei]
            to_delete.add(event)
            # Nothing special to do for Reset, Tag, Passthrough, Blob
            if isinstance(event, Commit):
                if event.branch and "/tags/" in event.branch:
                    identical = False
                    if tagback:
                        if event.has_parents():
                            identical = event.parents()[0].branch == event.branch
                            if not identical:
                                event.parents()[0].set_branch(event.branch)
                    elif tagforward:
                        if event.has_children():
                            identical = event.first_child().branch == event.branch
                            if not identical:
                                event.first_child().setbranch(event.branch)
                            if event.has_children():
                                identical = event.first_child().branch == event.branch
                        if not identical:
                            complain("tag %s on event %s will be lost" % (event.branch, event.mark))
                # Decide the new target for tags
                filter_only = True
                if tagforward and event.has_children():
                    filter_only = False
                    new_target = event.first_child()
                elif tagback and event.parents():
                    filter_only = False
                    new_target = event.parents()[0]
                # Reparent each child
                for child in list(event.children()):
                    parents = iter(event.parents())
                    try:
                        first_parent = next(parents)
                    except StopIteration:
                        child.remove_parent(event)
                    else:
                        child.replace_parent(event, first_parent)
                        for parent in parents:
                            if parent not in child.parents():
                                child.add_parent(parent)
                    if not obliterate and not pushback:
                        # Prepend a copy of this event's file ops to
                        # each child's list and mark the child as
                        # needing resolution.
                        child.fileops = copy.copy(event.fileops) + child.fileops
                        child._pathset = None
                        altered.append(child)
                # We might be trying to hand the event's fileops to parents.
                if pushback:
                    # Append a copy of this event's file ops to each parent's
                    # list and mark the parent as needing resolution.
                    for parent in event.parents():
                        parent.fileops += copy.copy(event.fileops)
                        parent._pathset = None
                        altered.append(parent)
                    # We need to ensure all fileop blobs are defined before the
                    # corresponding fileop, in other words ensure that the blobs
                    # are before the parent with lowest index.
                    earliest = min(self.find(p.mark) for p in event.parents())
                    swap_indices = set()
                    for fileop in event.fileops:
                        if fileop.op == 'M':
                            blob_index = self.find(fileop.ref)
                            if blob_index > earliest: swap_indices.add(blob_index)
                    if swap_indices:
                        last = max(swap_indices)
                        neworder = itertools.chain(
                                swap_indices, # first take the blobs
                                # then all others
                                itertools.ifilterfalse(swap_indices.__contains__,
                                         xrange(earliest, last+1)) )
                        self.events[earliest:last+1] = map(
                                self.events.__getitem__, neworder)
                        self.declare_sequence_mutation()
                if filter_only:
                    to_delete.update(event.attachments)
                else:
                    # use a copy of attachments since it will be mutated
                    for t in list(event.attachments):
                        t.forget()
                        t.remember(self, target=new_target)
                # And forget the deleted event
                event.forget()
        # Do the actual deletions
        self.events = [e for e in self.events if e not in to_delete]
        self.declare_sequence_mutation()
        # Canonicalize all the commits that got ops pushed to them
        if not obliterate:
            for event in altered:
                if event in to_delete: continue
                if debug_enable(DEBUG_DELETE):
                    announce("Before canonicalization:")
                    event.fileop_dump()
                self.case_coverage |= self.canonicalize(event)
                if debug_enable(DEBUG_DELETE):
                    announce("After canonicalization:")
                    event.fileop_dump()
                # Now apply policy in the mutiple-M case
                cliques = event.cliques()
                if ("coalesce" not in policy and not obliterate) \
                        or debug_enable(DEBUG_DELETE):
                    for (path, oplist) in cliques.iteritems():
                        if len(oplist) > 1:
                            complain("commit %s has multiple Ms for %s"
                                    % (event.mark, path))
                if "coalesce" in policy:
                    # Only keep last M of each clique, leaving other ops alone
                    event.fileops = \
                           [op for (i, op) in enumerate(event.fileops)
                            if (op.op != "M") or (i == cliques[op.path][-1])]
                    event._pathset = None
                if debug_enable(DEBUG_DELETE):
                    announce("Commit %d, after applying policy:" % (ei + 1,))
                    event.fileop_dump()
    def quiet_delete(self, commit):
        self.delete([commit.index()], ["obliterate", "quiet", "tagback"])
    def __delitem__(self, index):
        # To make Repository a proper container (and please pylint)
        self.delete([index], ["obliterate", "quiet", "tagback"])
    #
    # Delete machinery ends here
    #
    def front_events(self):
        "Return options, features."
        return [e for e in self.events \
                if isinstance(e, Passthrough) \
                and (e.text.startswith("option") or e.text.startswith("feature"))]
    def renumber(self, origin=1, baton=None):
        "Renumber the marks in a repo starting from a specified origin."
        markmap = {}
        def remark(m, e):
            try:
                return ":" + repr(markmap[m])
            except KeyError:
                raise Fatal("unknown mark %s in %s cannot be renumbered!" % \
                            (m, e.__class__.__name__.lower()))
        if baton:
            count = len(self.events)
            baton.startcounter(" %%%dd of %s" % (len(str(count)), count))
        newcount = 0
        for event in self.events:
            if hasattr(event, "mark"):
                if event.mark is None:
                    continue
                elif not event.mark.startswith(":"):
                    raise Fatal("field not in mark format")
                else:
                    markmap[event.mark] = origin + newcount
                    newcount += 1
        for event in self.events:
            for fld in ("mark", "committish"):
                try:
                    old = getattr(event, fld)
                    if old is not None:    
                        new = remark(old, event)
                        if debug_enable(DEBUG_UNITE):
                            announce("renumbering %s -> %s in %s.%s" % (old, new,
                                                                        event.__class__.__name__,
                                                                        fld))
                        setattr(event, fld, new)
                except AttributeError:
                    pass
        for commit in self.commits():
            for fileop in commit.fileops:
                if fileop.op == "M" and fileop.ref.startswith(":"):
                    new = remark(fileop.ref, fileop)
                    if debug_enable(DEBUG_UNITE):
                        announce("renumbering %s -> %s in fileop" % (fileop.ref, new))
                    fileop.ref = new
            if baton:
                baton.bumpcounter()
        self.invalidate_object_map()
        if baton:
            baton.endcounter()
    def uniquify(self, color):
        "Disambiguate branches, tags, and marks using the specified label."
        for event in self.events:
            # Disambiguate all tags.
            for (objtype, attr) in ((Tag, "name"),):
                if isinstance(event, objtype):
                    setattr(event, attr, color + "-" + getattr(event, attr))
            # Disambiguate all branches and refs.
            for (objtype, attr) in ((Commit, "branch"),
                                 (Reset, "ref")):
                if isinstance(event, objtype):
                    old = getattr(event, attr)
                    new = old + "-" + color
                    if debug_enable(DEBUG_UNITE):
                        announce("moving %s -> %s in %s.%s"
                                 % (old, new,
                                    objtype.__name__,
                                    attr))
                    setattr(event, attr, new)
            # Disambiguate defining marks.
            for fld in ("mark", "committish"):
                if hasattr(event, fld):
                    old = getattr(event, fld)
                    if old is None:
                        continue
                    elif not old.startswith(":"):
                        raise Fatal("field not in mark format")
                    else:
                        new = old + "-" + color
                        if debug_enable(DEBUG_UNITE):
                            announce("moving %s -> %s in %s.%s"
                                     % (old, new,
                                        event.__class__.__name__,
                                        fld))
                        setattr(event, fld, new)
            self.invalidate_object_map()
            # Now marks in fileops
            if isinstance(event, Commit):
                parent_marks = event.parent_marks()
                for (j, old) in enumerate(parent_marks):
                    if '-' not in old:
                        new = old + "-" + color
                        if debug_enable(DEBUG_UNITE):
                            announce("moving %s -> %s in parents" % (old, new))
                        parent_marks[j] = new
                event.set_parent_marks(parent_marks)
                for fileop in event.fileops:
                    if fileop.op == "M" and fileop.ref.startswith(":"):
                        new = fileop.ref + "-" + color
                        if debug_enable(DEBUG_UNITE):
                            announce("moving %s -> %s in fileop"
                                     % (fileop.ref, new))
                        fileop.ref = new
        return
    def absorb(self, other):
        # Only vcstype, sourcedir, and basedir are not copied here
        self.preserve_set |= other.preserve_set
        self.case_coverage |= other.case_coverage
        # Strip feature events off the front, they have to stay in front.
        while isinstance(other[0], Passthrough):
            lenfront = sum(1 for x in self.events if isinstance(x, Passthrough))
            self.events.insert(lenfront, other.events.pop(0))
        other.renumber(len(self.events))
        # Merge in the non-feature events and blobs
        self.events += other.events
        self.declare_sequence_mutation()
        # Transplant in fileops, blobs, and other impedimenta
        for event in other:
            if hasattr(event, "moveto"):
                event.moveto(self)
        other.events = []
        other.cleanup()
        #del other
    def graft(self, graft_repo, graft_point):
        "Graft a repo on to this one at a specified point."
        where = self.events[graft_point]
        if not isinstance(where, Commit):
            raise Recoverable("%s in %s is not a commit." % \
                              (where.mark, self.name))
        # Errors aren't recoverable after this
        graft_repo.uniquify(graft_repo.name)
        graftroot = graft_repo.earliest_commit()
        self.absorb(graft_repo)
        graftroot.add_parent(where.mark)
        self.renumber()
    def __last_modification(self, commit, path):
        "Locate the last modification of the specified path before this commit."
        ancestors = commit.parents()
        while ancestors:
            backto = []
            for ancestor in ancestors:
                # This is potential trouble if the file was renamed
                # down one side of a merge bubble but not the other.
                # Might cause an internal-error message, but no real
                # harm will be done.
                for (i, fileop) in enumerate(ancestor.fileops):
                    if fileop.op == 'R' and fileop.target == path:
                        path = fileop.source
                    elif fileop.op == 'M' and fileop.path == path:
                        return (ancestor, i)
                else:
                    backto += ancestor.parents()
            ancestors = backto
        return None
    def move_to_rename(self):
        "Make rename sequences from matched delete-modify pairs."
        # TODO: Actually use this somewhere...
        rename_count = 0
        for commit in self.commits():
            renames = []
            for (d, op) in enumerate(commit.fileops):
                if op.op == 'D':
                    previous = self.__last_modification(commit, op.path)
                    if not previous:
                        raise Recoverable("internal error looking for renames of %s" % op.path)
                    else:
                        (ancestor, i) = previous
                        for (m, op2) in enumerate(commit.fileops):
                            if op2.op == 'M' and \
                               ancestor.fileops[i].mode == op2.mode and \
                               ancestor.fileops[i].ref == op2.ref:
                                renames.append((d, m))
                                rename_count += 1
                                break
            for (d, m) in renames:
                commit.fileops[d].source = commit.fileops[d].path
                commit.fileops[d].target = commit.fileops[m].path
                del commit.fileops[d].path
                commit.fileops[d].op = 'R'
                commit.fileops.pop(m)
                commit._pathset = None
        return rename_count
    def path_walk(self, selection, hook=lambda path: path):
        "Apply a hook to all paths, returning the set of modified paths."
        modified = set()
        for ei in selection:
            event = self.events[ei]
            if isinstance(event, Commit):
                for fileop in event.fileops:
                    if fileop.op in ("M", "D"):
                        newpath = hook(fileop.path)
                        if newpath != fileop.path:
                            modified.add(newpath)
                        fileop.path = newpath
                    elif fileop.op in ("R", "C"):
                        newpath = hook(fileop.source)
                        if newpath != fileop.source:
                            modified.add(newpath)
                        fileop.source = newpath
                        newpath = hook(fileop.target)
                        if newpath != fileop.target:
                            modified.add(newpath)
                        fileop.target = newpath
                event._pathset = None
        return sorted(modified)
    def split_commit(self, where, splitfunc):
        event = self.events[where]
        # Fileop split happens here
        (fileops, fileops2) = splitfunc(event.fileops)
        if fileops and fileops2:
            self.events.insert(where+1, event.clone())
            self.declare_sequence_mutation()
            event2 = self.events[where+1]
            # need a new mark
            assert(event.mark == event2.mark)
            if event.splits is None:
                event.splits = 1
            else:
                event.splits += 1
            event2.set_mark("%s.%s" % (event.mark, event.splits))
            self.invalidate_object_map()
            # Fix up parent/child relationships
            for child in list(event.children()):
                child.replace_parent(event, event2)
            event2.set_parents([event])
            # and then finalize the ops
            event2.fileops = fileops2
            event2._pathset = None
            event.fileops = fileops
            event._pathset = None
            return True
        return False
    def split_commit_by_index(self, where, splitpoint):
        return self.split_commit(where,
                                 lambda ops: (ops[splitpoint:],
                                              ops[:splitpoint]))
    def split_commit_by_prefix(self, where, prefix):
        return self.split_commit(where,
                                 lambda ops: ([op for op in ops if not op.path.startswith(prefix)],
                                              [op for op in ops if (op.path or op.target) and
                                                                   (op.path or op.target).startswith(prefix)]))

    # Sequence emulation methods
    def __len__(self):
        return len(self.events)
    def __getitem__(self, i):
        return self.events[i]
    def __setitem__(self, i, v):
        self.events[i] = v

def read_repo(source, preferred):
    "Read a repository using fast-import."
    if debug_enable(DEBUG_SHUFFLE):
        if preferred:
            announce("looking for a %s repo..." % preferred.name)
        else:
            announce("reposurgeon: looking for any repo at %s..." % \
                     os.path.abspath(source))
    hitcount = 0
    extractor = vcs = None
    for possible in vcstypes:
        if preferred and possible.name != preferred.name:
            continue
        subdir = os.path.join(source, possible.subdirectory)
        if os.path.exists(subdir) and os.path.isdir(subdir):
            vcs = possible
            hitcount += 1
    for possible in extractors:
        if preferred and possible.name != preferred.name:
            continue
        subdir = os.path.join(source, possible.subdirectory)
        if os.path.exists(subdir) and os.path.isdir(subdir):
            if possible.visible or preferred \
                   and possible.name == preferred.name:
                extractor = possible
                hitcount += 1
    if hitcount == 0:
        raise Recoverable("couldn't find a repo under %s" % os.path.relpath(source))
    elif hitcount > 1:
        raise Recoverable("too many repos under %s" % os.path.relpath(source))
    elif verbose > 0:
        announce("found %s repository" % getattr(vcs or extractor, "name"))
    repo = Repository()
    repo.sourcedir = source
    if vcs:
        repo.vcs = vcs
        repo.preserve_set = vcs.preserve
        showprogress = (verbose > 0) and not "export-progress" in repo.export_style()
        context = {"basename": os.path.basename(repo.sourcedir)}
    try:
        here = os.getcwd()
        os.chdir(repo.sourcedir)
        # We found a matching VCS type
        if vcs:
            if "%(tempfile)s" in repo.vcs.exporter:
                try:
                    (tfdesc, tfname) = tempfile.mkstemp()
                    assert tfdesc > -1    # pacify pylint
                    context["tempfile"] = tfname
                    do_or_die(repo.vcs.exporter % context, "repository export")
                    with open(tfname) as tp:
                        repo.fast_import(tp, progress=showprogress)
                finally:
                    os.remove(tfname)
            else:
                with popen_or_die(repo.vcs.exporter % context, "repository export") as tp:
                    repo.fast_import(tp, progress=showprogress)
            if repo.vcs.authormap and os.path.exists(repo.vcs.authormap):
                announce("reading author map.")
                with open(repo.vcs.authormap) as fp:
                    repo.read_authormap(range(len(repo.events)),fp)
            fossils = os.path.join(vcs.subdirectory, "fossils")
            if os.path.exists(fossils):
                with open(fossils) as rfp:
                    repo.read_fossilmap(rfp)
            if vcs.lister:
                def fileset(exclude):
                    allfiles = []
                    for root, dirs, files in os.walk("."):
                        allfiles += [os.path.join(root, name)[2:] for name in files]
                        for exdir in exclude:
                            if exdir in dirs:
                                dirs.remove(exdir)
                    return set(allfiles)
                with popen_or_die(vcs.lister) as fp:
                    repofiles = set(fp.read().split())
                allfiles = fileset(exclude=[vcs.subdirectory]\
                                   + glob.glob(".rs*"))
                repo.preserve_set = allfiles - repofiles
            # kluge: git-specific hook
            if repo.vcs.name == "git":
                if os.path.exists(".git/cvs-revisions"):
                    announce("reading cvs-revisions map.")
                    pathrev_to_hash = {}
                    # Pass 1: Get git's path/revision to hash mapping
                    for line in open(".git/cvs-revisions"):
                        (path, rev, hashv) = line.split()
                        pathrev_to_hash[(path, rev)] = hashv
                    # Pass 2: get git's hash to (time,person) mapping 
                    hash_to_action = {}
                    stamp_set = set({})
                    with popen_or_die("git log --all --format='%H %ct %ce'", "r") as fp:
                        for line in fp:
                            (hashv, ctime, cperson) = line.split()
                            stamp = (int(ctime), cperson)
                            if stamp in stamp_set:
                                complain("more than one commit matches %s!%s (%s)" \
                                         % (rfc3339(int(ctime)), cperson, hashv))
                                if stamp in hash_to_action:
                                    del hash_to_action[hashv]
                            else:
                                hash_to_action[hashv] = stamp
                                stamp_set.add(stamp)
                        # Pass 3: build a (time,person) to commit mapping 
                        action_to_mark = {}
                        for commit in repo.commits():
                            action_to_mark[(commit.committer.date.timestamp, commit.committer.email)] = commit
                        # Pass 4: use it to set commit properties
                        for ((path, rev), value) in pathrev_to_hash.iteritems():
                            if value in hash_to_action:
                                (ctime, cperson) = hash_to_action[value]
                                action_to_mark[(ctime, cperson)].fossil_id = "CVS:%s:%s" % (path, rev)
                        del pathrev_to_hash
                        del hash_to_action
                        del stamp_set
        # We found a matching custom extractor
        if extractor:
            streamer = RepoStreamer(extractor)
            streamer.extract(repo, progress=verbose>0)
    finally:
        os.chdir(here)
    return repo

class CriticalRegion:
    "Encapsulate operations to try and make us un-interruptible."
    # This number is magic. Python sets a much higher signal.NSIG
    # value, but under Linux the signal calls start to trigger
    # runtime errors at this value and above.
    NSIG = 32
    def __init__(self):
        self.handlers = None	# Pacifies pylint
    def __enter__(self):
        "Begin critical region."
        if debug_enable(DEBUG_COMMANDS):
            complain("critical region begins...")
        # Alas that we lack sigblock support
        self.handlers = [None]*(CriticalRegion.NSIG+1)
        for sig in range(1, CriticalRegion.NSIG):
            if not sig in (signal.SIGKILL, signal.SIGSTOP):
                self.handlers[sig] = signal.signal(sig, signal.SIG_IGN)
    def __exit__(self, extype_unused, value_unused, traceback_unused):
        "End critical region."
        for sig in range(1, CriticalRegion.NSIG):
            if not sig in (signal.SIGKILL, signal.SIGSTOP):
                signal.signal(sig, self.handlers[sig])
        if debug_enable(DEBUG_COMMANDS):
            complain("critical region ends.")
        return False

def rebuild_repo(repo, target, preferred):
    "Rebuild a repository from the captured state."
    if not target and repo.sourcedir:
        target = repo.sourcedir
    if target:
        target = os.path.abspath(target)
    else:
        raise Recoverable("no default destination for rebuild")
    vcs = preferred or repo.vcs
    if not vcs:
        raise Recoverable("please prefer a repo type first")
    if not hasattr(vcs, "exporter") or vcs.importer is None:
        raise Recoverable("%s repositories are supported for read only." \
                          % preferred.name)

    if not os.path.join("refs", "heads", "master") in repo.branchset():
        complain("repository has no branch named master. git will have no HEAD commit after the import; consider using the branch command to rename one of your branches to master.")

    # Create a new empty directory to do the rebuild in
    if not os.path.exists(target):
        staging = target
        try:
            os.mkdir(target)
        except OSError:
            raise Recoverable("target directory creation failed")
    else:
        staging = target + "-stage" + str(os.getpid())
        assert(os.path.isabs(target) and os.path.isabs(staging))
        try:
            os.mkdir(staging)
        except OSError:
            raise Recoverable("staging directory creation failed")

    # Try the rebuild in the empty staging directory 
    here = os.getcwd()
    try:
        os.chdir(staging)
        if vcs.initializer:
            do_or_die(vcs.initializer, "repository initialization")
        parameters = {"basename": os.path.basename(target)}
        if "%(tempfile)s" in vcs.importer:
            try:
                (tfdesc, tfname) = tempfile.mkstemp()
                assert tfdesc > -1    # pacify pylint
                with open(tfname, "w") as tp:
                    repo.fast_export(range(len(repo)), tp, progress=verbose>0, target=preferred)
                do_or_die(vcs.exporter % parameters, "import")
            finally:
                os.remove(tfname)
        else:
            with popen_or_die(vcs.importer % parameters, "import", mode="w") as tp:
                repo.fast_export(range(len(repo)), tp,
                                 target=preferred,
                                 progress=verbose>0)
        if repo.write_fossils:
            try:
                fossilfile = os.path.join(vcs.subdirectory, "fossils")
                with open(fossilfile, "w") as wfp:
                    repo.write_fossilmap(wfp)
            except IOError:
                raise Recoverable("fossils file %s could not be written." \
                                  % fossilfile)

        do_or_die(vcs.checkout, "repository_checkout")
        if verbose:
            announce("rebuild is complete.")

        os.chdir(here)
        # Rebuild succeeded - make an empty backup directory
        backupcount = 1
        while True:
            savedir = target + (".~%d~" % backupcount)
            if os.path.exists(savedir):
                backupcount += 1
            else:
                break
        assert(os.path.abspath(savedir))
        os.mkdir(savedir)

        if staging != target:
            # This is a critical region.  Ignore all signals until we're done.
            with CriticalRegion():
                # Move the unmodified repo contents in target to the
                # backup directory.  Then move the staging contents to the
                # target directory.  Finally, restore designated files
                # from backup to target.
                for sub in os.listdir(target):
                    os.rename(os.path.join(target, sub),
                              os.path.join(savedir, sub))
                if verbose:
                    announce("repo backed up to %s." % os.path.relpath(savedir))
                for sub in os.listdir(staging):
                    os.rename(os.path.join(staging, sub),
                              os.path.join(target, sub))
                if verbose:
                    announce("modified repo moved to %s." % os.path.relpath(target))
            if repo.preserve_set:
                for sub in repo.preserve_set:
                    src = os.path.join(savedir, sub)
                    dst = os.path.join(target, sub)
                    if os.path.exists(src):
                        if os.path.isdir(src):
                            shutil.copytree(src, dst)
                        else:
                            shutil.copy2(src, dst)
                if verbose:
                    announce("preserved files restored.")
            elif verbose:
                announce("no preservations.")
    finally:
        os.chdir(here)
        if staging != target:
            nuke(staging, "reposurgeon: removing staging directory")

def do_or_die(dcmd, legend=""):
    "Either execute a command or raise a fatal exception."
    if legend:
        legend = " "  + legend
    if debug_enable(DEBUG_COMMANDS):
        announce("executing '%s'%s" % (dcmd, legend))
    try:
        retcode = subprocess.call(dcmd, shell=True)
        if retcode < 0:
            raise Fatal("child was terminated by signal %d." % -retcode)
        elif retcode != 0:
            raise Fatal("child returned %d." % retcode)
    except (OSError, IOError) as e:
        raise Fatal("execution of %s%s failed: %s" % (dcmd, legend, e))

class popen_or_die:
    "Read or write from a subordinate process."
    def __init__(self, command, legend="", mode="r"):
        assert mode in ("r", "w")
        self.command = command
        self.legend = legend
        self.mode = mode
        if self.legend:
            self.legend = " "  + self.legend
        self.fp = None
    def __enter__(self):
        if debug_enable(DEBUG_COMMANDS):
            if self.mode == "r":
                announce("%s: reading from '%s'%s" % (rfc3339(time.time()), self.command, self.legend))
            else:
                announce("%s: writing to '%s'%s" % (rfc3339(time.time()), self.command, self.legend))
        try:
            self.fp = os.popen(self.command, self.mode)
            return self.fp
        except (OSError, IOError) as oe:
            raise Fatal("execution of %s%s failed: %s" \
                                 % (self.command, self.legend, oe))
    def __exit__(self, extype, value, traceback):
        if extype:
            if verbose:
                complain("fatal exception in popen_or_die.")
            raise extype, value, traceback
        if self.fp.close() is not None:
            raise Fatal("%s%s returned error." % (self.command, self.legend))
        return False

class Recoverable(Exception):
    def __init__(self, msg):
        Exception.__init__(self)
        self.msg = msg

class RepositoryList:
    "A repository list with selection and access by name."
    def __init__(self):
        self.repo = None
        self.repolist = []
        self.cut_index = None
    def chosen(self):
        return self.repo
    def choose(self, repo):
        self.repo = repo
    def unchoose(self):
        self.repo = None
    def reponames(self):
        "Return a list of the names of all repositories."
        return [r.name for r in self.repolist]
    def uniquify(self, name):
        "Uniquify a repo name in the repo list."
        if name.endswith(".fi"):
            name = name[:-3]
        elif name.endswith(".svn"):
            name = name[:-4]
        if name not in self.reponames():
            return name
        else:
            # repo "foo" is #1
            seq = 2
            while name + str(seq) in self.reponames():
                seq += 1
            return name + str(seq)
    def repo_by_name(self, name):
        "Retrieve a repo by name."
        return self.repolist[self.reponames().index(name)]
    def remove_by_name(self, name):
        "Remove a repo by name."
        if self.repo and self.repo.name == name:
            self.unchoose()
        self.repolist.pop(self.reponames().index(name))        
    def cut_conflict(self, early, late):
        "Apply a graph-coloring algorithm to see if the repo can be split here."
        self.cut_index = late.parent_marks().index(early.mark)
        late.remove_parent(early)
        def do_color(commit, color):
            commit.color = color
            for fileop in commit.fileops:
                if fileop.op == "M" and fileop.ref != "inline":
                    blob = self.repo.find(fileop.ref)
                    assert isinstance(self.repo[blob], Blob)
                    self.repo[blob].colors.append(color)
        do_color(early, "early")
        do_color(late, "late")
        conflict = False
        keepgoing = True
        while keepgoing and not conflict:
            keepgoing = False
            for event in self.repo.commits():
                if event.color:
                    for neighbor in itertools.chain(event.parents(), event.children()):
                        if neighbor.color == None:
                            do_color(neighbor, event.color)
                            keepgoing = True
                            break
                        elif neighbor.color != event.color:
                            conflict = True
                            break
        return conflict
    def cut_clear(self, early, late):
        "Undo a cut operation and clear all colors."
        late.insert_parent(self.cut_index, early.mark)
        for event in self.repo:
            if hasattr(event, "color"):
                event.color = None
            if hasattr(event, "colors"):
                event.colors = []
    def cut(self, early, late):
        "Attempt to topologically cut the selected repo."
        if self.cut_conflict(early, late):
            self.cut_clear(early, late)
            return False
        # Repo can be split, so we need to color tags
        for t in self.repo.events:
            if isinstance(t, Tag):
                for c in self.repo.events:
                    if isinstance(c, Commit):
                        if c is t.target:
                            t.color = c.color
        # Front events go with early segment, they'll be copied to late one. 
        for event in self.repo.front_events():
            event.color = "early"        
        assert all(hasattr(x, "color") or hasattr(x, "colors") or isinstance(x, Reset) for x in self.repo)
        # Resets are tricky.  One may have both colors.
        # Blobs can have both colors too, through references in
        # commits on both sides of the cut, but we took care
        # of that earlier.
        trackbranches = {"early": set(), "late": set()}
        for commit in self.repo.commits():
            if commit.color is None:
                complain("%s is uncolored!" % commit.mark)
            else:
                trackbranches[commit.color].add(commit.branch)
        # Now it's time to do the actual partitioning
        early = Repository(self.repo.name + "-early")
        os.mkdir(early.subdir())
        late = Repository(self.repo.name + "-late")
        os.mkdir(late.subdir())
        for event in self.repo:
            if isinstance(event, Reset):
                if event.ref in trackbranches["early"]:
                    early.addEvent(copy.copy(event))
                if event.ref in trackbranches["late"]:
                    late.addEvent(copy.copy(event))
            elif isinstance(event, Blob):
                if "early" in event.colors:
                    early.addEvent(event.clone(early))
                if "late" in event.colors:
                    late.addEvent(event.clone(late))
            else:
                if event.color == "early":
                    if hasattr(event, "moveto"):
                        event.moveto(early)
                    early.addEvent(event)
                elif event.color == "late":
                    if hasattr(event, "moveto"):
                        event.moveto(late)
                    late.addEvent(event)
                else:
                    # TODO: Someday, color passthroughs that aren't fronted.
                    raise Fatal("coloring algorithm failed on %s" % event)
        # Options and features may need to be copied to the late fragment.
        late.events = copy.copy(early.front_events()) + late.events
        late.declare_sequence_mutation()
        # Add the split results to the repo list. 
        self.repolist.append(early)
        self.repolist.append(late)
        self.repo.cleanup()
        self.remove_by_name(self.repo.name)
        return True
    def unite(self, factors):
        "Unite multiple repos into a union repo."
        factors.sort(key=operator.methodcaller("earliest"))
        roots = [x.earliest_commit() for x in factors]
        union = Repository("+".join(r.name for r in factors))
        os.mkdir(union.subdir())
        for (i, factor) in enumerate(factors):
            if i != 0:
                factor.uniquify(factor.name)
            union.absorb(factor)
            self.remove_by_name(factor.name)
        # Renumber all events
        union.renumber()
        # Sort out the root grafts. The way we used to do this involved
        # sorting the union commits by timestamp, but this fails because
        # in real-world repos timestamp order may not coincide with mark
        # order - leading to "mark not defined" errors from the importer at
        # rebuild time.  This method gives less intuitive results but at
        # least means we never need to reorder.
        for root in roots[1:]:
            most_recent = None
            def predicate(event):
                return root.when() >= event.when() \
                        or (most_recent and event.when() >= most_recent.when())
            # Get last commit such that it and all before satisfy predicate()
            # Never raises IndexError since union.earliest_commit() is root[0]
            # which satisfies predicate() thanks to factors sorting.
            most_recent = collections.deque(
                    itertools.takewhile(predicate, union.commits()),
                    maxlen = 1).pop()
            if most_recent.mark is None:
                # This should never happen either.
                raise Fatal("can't link to commit with no mark")
            root.add_parent(most_recent.mark)
        # Put the result on the load list
        self.repolist.append(union)
        self.choose(union)
    def expunge(self, selection, matchers):
        "Expunge a set of files from the commits in the selection set."
        def digest(toklist):
            return re.compile("|".join("(?:" + s + ")" for s in toklist))
        try:
            # First pass: compute fileop deletions
            alterations = []
            expunge = digest(matchers)
            for ei in selection:
                event = self.repo[ei]
                deletia = []
                if hasattr(event, "fileops"):
                    for (i, fileop) in enumerate(event.fileops):
                        if debug_enable(DEBUG_DELETE):
                            print(str(fileop))
                        if fileop.op in "DM":
                            if expunge.search(fileop.path):
                                deletia.append(i)
                        elif fileop.op in "RC":
                            fileop.sourcedelete = expunge.search(fileop.source)
                            fileop.targetdelete = expunge.search(fileop.target)
                            if fileop.sourcedelete:
                                deletia.append(i)
                                announce("following %s of %s to %s" %
                                         (fileop.op,
                                          fileop.source,
                                          fileop.target))
                                if fileop.op == "R" and fileop.source in matchers:
                                    matchers.remove(fileop.source)
                                matchers.append("^" + fileop.target + "$")
                                expunge = digest(matchers)
                            elif fileop.targetdelete:
                                if fileop.op == "R":
                                    fileop.op = "D"
                                elif fileop.op == "C":
                                    deletia.append(i)
                                matchers.append("^" + fileop.target + "$")
                                expunge = digest(matchers)
                alterations.append(deletia)
        except re.error:
            raise Recoverable("you confused the regexp processor!")
        # Second pass: perform actual fileop expunges
        expunged = Repository(self.repo.name + "-expunges")
        expunged.seekstream = self.repo.seekstream
        expunged.makedir()
        for event in self.repo:
            event.deletehook = None
        for (ei, deletia) in zip(selection, alterations):
            event = self.repo[ei]
            keepers = []
            blobs = []
            deletia.reverse()
            for i in deletia:
                fileop = event.fileops[i]
                if fileop.op == 'D':
                    keepers.append(fileop)
                    if verbose:
                        announce("at %d, expunging D %s" \
                                 % (ei+1, fileop.path))
                elif fileop.op == 'M':
                    keepers.append(fileop)
                    if fileop.ref != 'inline':
                        bi = self.repo.find(fileop.ref)
                        blob = self.repo[bi]
                        assert(isinstance(blob, Blob))
                        blobs.append(blob)
                    if verbose:
                        announce("at %d, expunging M %s" \
                                 % (ei+1, fileop.path))
                elif fileop.op in ("R", "C"):
                    assert(fileop.sourcedelete or fileop.targetdelete)
                    if fileop.sourcedelete and fileop.targetdelete:
                        keepers.append(fileop)
                event.fileops.pop(i)
                event._pathset = None
            # If there are any keeper fileops, hang them them and
            # their blobs on deletehooks, cloning the commit() for them.
            if keepers:
                keepers.reverse()
                blobs.reverse()
                newevent = event.clone(expunged)
                newevent.fileops = keepers
                newevent._pathset = None
                for blob in blobs:
                    blob.deletehook = blob.clone(expunged)
                event.deletehook = newevent
        # Build the new repo and hook it into the load list
        expunged.events = copy.copy(self.repo.front_events())
        expunged.declare_sequence_mutation()
        expunged_branches = expunged.branchset()
        for event in self.repo:
            if event.deletehook:
                expunged.addEvent(event.deletehook)
                event.deletehook = None
            elif isinstance(event, Reset):
                if event.target is not None:
                    if event.target.deletehook:
                        expunged.addEvent(copy.deepcopy(event))
                elif isinstance(event, Reset) and event.ref in expunged_branches:
                    newreset = copy.copy(event)
                    newreset.repo = expunged
                    expunged.addEvent(newreset)
            elif isinstance(event, Tag) and \
                    event.target is not None and \
                    event.target.deletehook:
                expunged.addEvent(copy.deepcopy(event))
        for event in itertools.chain(self.repo.events, expunged.events):
            if hasattr(event, "deletehook"):
                delattr(event, "deletehook")
        expunged_marks = set(event.mark for event in expunged.events if hasattr(event, "mark"))
        for event in expunged.events:
            if hasattr(event, "parents"):
                event.set_parents([e for e in event.parents() if e.mark in expunged_marks])
        keeper_marks = set(event.mark for event in self.repo.events if hasattr(event, "mark"))
        for event in self.repo.events:
            if hasattr(event, "parents"):
                event.set_parents([e for e in event.parents() if e.mark in keeper_marks])
        backreferences = collections.Counter()
        for event in self.repo.events:
            if isinstance(event, Commit):
                for fileop in event.fileops:
                    if fileop.op == 'M':
                        backreferences[fileop.ref] += 1
        # Now remove commits that no longer have fileops, and released blobs.
        deletia = (not ((isinstance(e, Commit) and len(e.fileops)==0) or (isinstance(e, Blob) and not backreferences[e.mark])) for e in self.repo.events)
        deletia = [i for i,e in enumerate(deletia) if not e]
        self.repolist.append(expunged)
        if not deletia:
            announce("deletion set is empty.")
            return
        if verbose:
            announce("deleting blobs and empty commits %s" % [x+1 for x in deletia])
        self.repo.delete(deletia, ["obliterate", "quiet"])

class RepoSurgeon(cmd.Cmd, RepositoryList):
    "Repository surgeon command interpreter."
    OptionFlags = (
        ("svn_use_uuid", """\
    If set, use Subversion UUID when faking up email addresses, a la git-svn.
Otherwise, fake up addresses the way git cvs-import does it.
"""),
        ("svn_nobranch", """\
    If set, don't perform branch analysis when lifting a Subversion repo. Leave
it as a linear sequence of commits. This may be useful if the repo has an
unusual topology and you intend to do your own branch surgery.
"""),
        ("svn_ignore_properties", """\
    Suppress read-time warnings about discarded property settings.
"""),
        ("canonicalize", """\
    If set, mailbox_in and edit will canonicalize comments by replacing CR-LF
with LF, stripping leading and trailing whitespace and then appending a LF.
"""),
        ("fossilize", """\
    If set, the Fossil-ID of each commit is appended to its commit comment
at write time. This option is mainly useful for debugging conversion edge cases.
"""),
        ("compressblobs", """\
Use compression for on-disk copies of blobs. Accepts an increase in repository
read and write time in order to reduce the anount of disk space required while
editing; this may be useful for large repositories. No effect if the edit
input was a dump stream; in that case, reposurgeon doesn't make on-disk blob
copies at all (it points into sections of the input stream instead).
"""),
        )
    class LineParse:
        "Preparse a command line."
        def __init__(self, line, capabilities=None):
            self.line = line
            self.capabilities = capabilities or []
            self.stdin = sys.stdin
            self.stdout = sys.stdout
        def __enter__(self):
            # Input redirection
            m = re.search(r"<\S+", self.line)
            if m:
                if "stdin" not in self.capabilities:
                    raise Recoverable("no support for < redirection")
                infile = m.group(0)[1:]
                if infile and infile != '-':
                    try:
                        self.stdin = open(infile, "r")
                    except (IOError, OSError):
                        raise Recoverable("can't open %s for read" \
                                          % infile)
                self.line = self.line[:m.start(0)] + self.line[m.end(0)+1:]
            # Output redirection
            m = re.search(r">\S+", self.line)
            if m:
                if "stdout" not in self.capabilities:
                    raise Recoverable("no support for > redirection")
                outfile = m.group(0)[1:]
                if outfile and outfile != '-':
                    try:
                        self.stdout = open(outfile, "w")
                    except (IOError, OSError):
                        raise Recoverable("can't open %s for write" \
                                          % outfile)
                self.line = self.line[:m.start(0)] + self.line[m.end(0)+1:]
            return self
        def __exit__(self, extype_unused, value_unused, traceback_unused):
            pass
        def tokens(self):
            "Return the argument token list after the parse for redirects."
            return self.line.split()
    def __init__(self):
        cmd.Cmd.__init__(self)
        RepositoryList.__init__(self)
        self.use_rawinput = True
        self.echo = 0
        self.prompt = "reposurgeon% "
        self.preferred = None
        self.selection = []
        self.line = ""
        self.history = []
        self.callstack = []
        self.profile_log = None
        for option in dict(RepoSurgeon.OptionFlags):
            global_options[option] = False
        global_options['svn_branchify'] = ['trunk', 'tags/*', 'branches/*', '*']
    #
    # Housekeeping hooks.
    #
    def onecmd(self, line):
        "Execute one command, fielding interrupts for recoverable exceptions."
        try:
            cmd.Cmd.onecmd(self, line)
        except Recoverable as e:
            complain(e.msg)
    def postcmd(self, unused, line):
        assert unused is not []   # pacify pylint
        if line == "EOF":
            return True
    def emptyline(self):
        pass
    def precmd(self, line):
        "Pre-command hook."
        self.history.append(line.rstrip())
        if self.echo:
            sys.stdout.write(line.rstrip()+"\n")
        if "#" in line:
            line = line[:line.index("#")].rstrip()
        return line
    def do_shell(self, line):
        "Execute a shell command."
        sys.stdout.flush()
        sys.stderr.flush()
        if os.system(line):
            raise Recoverable("'shell %s' returned error." % line)
    def do_EOF(self, unused):
        "Terminate reposurgeon."
        assert unused is not None   # pacify pylint
        print("")
        return True
    def cleanup(self):
        "Tell all the repos we're holding to clean up."
        if debug_enable(DEBUG_SHUFFLE):
            announce("interpreter cleanup called.")
        for repo in self.repolist:
            repo.cleanup()
    #
    # The selection-language parsing code starts here.
    #
    def set_selection_set(self, line, default=None):
        "Implement object-selection syntax."
        # Returns the line with the selection removed
        self.selection = []
        if not self.chosen():
            return line
        self.line = line
        self.selection = list(self.eval_expression())
        if self.line == line:
            self.selection = default
        else:
            # TODO: We probably want to stop doing this
            self.selection.sort()
        return self.line.lstrip()
    def peek(self):
        return self.line and self.line[0]
    def pop(self):
        if not self.line:
            return ''
        else:
            c = self.line[0]
            self.line = self.line[1:]
            return c
    def eval_expression(self):
        if debug_enable(DEBUG_LEXER):
            announce("eval_expression(%s)" % self.line)
        self.line = self.line.lstrip()
        value = self.eval_disjunct()
        c = self.peek()
        while True:
            c = self.peek()
            if c != '?':
                break
            self.pop()
            add_list = []
            remove_list = []
            for ei in value:
                event = self.chosen().events[ei]
                if isinstance(event, Commit):
                    for parent in event.parents():
                        add_list.append(self.chosen().find(parent.mark))
                    for child in event.children():
                        add_list.append(self.chosen().find(child.mark))
                elif isinstance(event, Blob):
                    remove_list.append(ei) # Don't select the blob itself
                    for (i, event2) in enumerate(self.chosen().events):
                        if isinstance(event2, Commit):
                            for fileop in event2.fileops:
                                if fileop.op == 'M' and fileop.ref==event.mark:
                                    add_list.append(i)
                elif isinstance(event, (Tag, Reset)):
                    if event.target:
                        add_list.append(event.target.index())
            value |= set(add_list)
            value -= set(remove_list)
        self.line = self.line.lstrip()
        if debug_enable(DEBUG_LEXER):
            announce("%s <- eval_expression(), left = %s" % (value, repr(self.line)))
        return value
    def eval_disjunct(self):
        "Evaluate a disjunctive expression (| has lowest precedence)" 
        if debug_enable(DEBUG_LEXER):
            announce("eval_disjunct(%s)" % self.line)
        self.line = self.line.lstrip()
        disjunct = set()
        while True:
            conjunct = self.eval_conjunct()
            if conjunct is None:
                break
            else:
                disjunct |= conjunct
            self.line = self.line.lstrip()
            if self.peek() == '|':
                self.pop()
            else:
                break
        if debug_enable(DEBUG_LEXER):
            announce("%s <- eval_disjunct(), left = %s" % (disjunct, repr(self.line)))
        return disjunct
    def eval_conjunct(self):
        "Evaluate a conjunctive expression (& has higher precedence)" 
        if debug_enable(DEBUG_LEXER):
            announce("eval_conjunct(%s)" % self.line)
        self.line = self.line.lstrip()
        conjunct = set(range(0, len(self.chosen())))
        while True:
            term = self.eval_term()
            if term is None:
                break
            else:
                conjunct = conjunct & term
            self.line = self.line.lstrip()
            if self.peek() == '&':
                self.pop()
            else:
                break
        if debug_enable(DEBUG_LEXER):
            announce("%s <- eval_conjunct(), left = %s" % (conjunct, repr(self.line)))
        return conjunct
    def eval_term(self):
        if debug_enable(DEBUG_LEXER):
            announce("eval_term(%s)" % self.line)
        self.line = self.line.lstrip()
        if self.peek() == '{':
            self.pop()
            term = self.eval_disjunct()
            self.line = self.line.lstrip()
            if self.peek() != '}':
                raise Recoverable("trailing junk on inner expression")
            else:
                self.pop()
        else:
            term = self.eval_visibility()
            if term is None:
                term = self.eval_polyrange()
                if term is None:
                    term = self.eval_textsearch()
                    if term == None:
                        term = self.eval_branchset()
                        if term == None:
                            term = self.eval_pathset()
        if debug_enable(DEBUG_LEXER):
            announce("%s <- eval_term(), left = %s" % (term, repr(self.line)))
        return term
    def eval_visibility(self):
        "Parse a visibility spec."
        if debug_enable(DEBUG_LEXER):
            announce("eval_visibility(%s)" % self.line)
        self.line = self.line.lstrip()
        if not self.peek() == "=":
            visibility = None
        else:
            typeletters = {
                "B" : lambda e: isinstance(e, Blob),
                "C" : lambda e: isinstance(e, Commit),
                "T" : lambda e: isinstance(e, Tag),
                "R" : lambda e: isinstance(e, Reset),
                "P" : lambda e: isinstance(e, Passthrough),
                "H" : lambda e: isinstance(e, Commit) and not e.has_children(),
                }
            visible = set()
            self.pop()
            while self.peek() in typeletters:
                c = self.pop()
                if c in typeletters:
                    visible.add(typeletters[c])
            # We need a special check here because these expressions
            # could otherwise run onto the text part of the command.
            if self.peek() not in "()|& ":
                raise Recoverable("garbled type mask at %s" % repr(self.line))
            if debug_enable(DEBUG_LEXER):
                announce("visibility set is %s with %s left" % ([x.__name__ for x in visible], repr(self.line)))
            selected = []
            for (i, event) in enumerate(self.chosen()):
                for predicate in visible:
                    if predicate(event):
                        selected.append(i)
                        break
            visibility = set(selected)
        if debug_enable(DEBUG_LEXER):
            announce("%s <- eval_visibility(), left = %s" % (visibility, repr(self.line)))
        return visibility
    def eval_polyrange(self):
        "Parse a polyrange specification (list of intervals)."
        if debug_enable(DEBUG_LEXER):
            announce("eval_polyrange(%s)" % self.line)
        self.line = self.line.lstrip()
        polyrange_initials = (":","0","1","2","3","4","5","6","7","8","9","$", "<")
        if not self.peek() in polyrange_initials:
            polyrange = None
        else:
            selection = []
            while self.peek() in polyrange_initials + (".", ","):
                # First, literal command numbers (1-origin)
                match = re.match("[0-9]+", self.line)
                if match:
                    number = match.group()
                    selection.append(int(number)-1)
                    self.line = self.line[len(number):]
                    continue
                # Next, mark references
                match = re.match(":[0-9]+", self.line)
                if match:
                    markref = match.group()
                    self.line = self.line[len(markref):]
                    for (i, event) in enumerate(self.chosen()):
                        if hasattr(event, "mark") and event.mark == markref:
                            selection.append(i)
                            break
                        elif hasattr(event, "committish") and event.committish == markref:
                            selection.append(i)
                            break
                    else:
                        raise Recoverable("mark %s not found." % markref)
                    continue
                elif self.peek() == ':':
                    raise Recoverable("malformed mark")
                # $ means last commit, a la ed(1).
                if self.peek() == "$":
                    selection.append(len(self.chosen())-1)
                    self.pop()
                    continue
                # Comma just delimits a location spec
                if self.peek() == ",":
                    self.pop()
                    continue
                # Following ".." means a span
                if self.line[:2] == "..":
                    if selection:
                        selection.append("..")
                        self.line = self.line[2:]
                        continue
                    else:
                        raise Recoverable("start of span is missing")
                if self.peek() == "<":
                    self.pop()
                    closer = self.line.find('>')
                    if closer == -1:
                        raise Recoverable("reference improperly terminated. '%s'" % self.line)
                    ref = self.line[:closer]
                    self.line = self.line[closer+1:]
                    matched = False
                    # First, search tags
                    for (i, event) in enumerate(self.chosen()):
                        if isinstance(event, Tag) and event.name == ref:
                            matched = True
                            selection.append(i)
                            break
                    # Next, search branches
                    if not matched:
                        for symbol in sorted(self.chosen().branchset(),
                                             key=len, reverse=True): # longest name first
                            if ref == os.path.basename(symbol):
                                loc = None
                                # Find the last commit with this branchname
                                for (i, event) in enumerate(self.chosen()):
                                    if isinstance(event, Commit):
                                        if event.branch == symbol:
                                            loc = i
                                if loc is None:
                                    raise Recoverable("branch name %s points to hyperspace" % symbol)
                                else:
                                    matched = True
                                    selection.append(loc)
                    # Next, fossil-ID references
                    if not matched:
                        for (i, event) in enumerate(self.chosen()):
                            if hasattr(event, "fossil_id") and event.fossil_id == ref:
                                selection.append(i)
                                matched = True
                                break
                    # Might be a date or action stamp
                    date = None
                    bang = ref.find('!')
                    date_end = len(ref)
                    if bang >= 0:
                        date_end = min(bang, date_end)
                    try:
                        date = Date(ref[:date_end])
                    except Fatal:
                        date = None
                    email_id = None
                    if date is not None and bang > -1:
                        email_id = ref[bang+1:]
                    matches = []
                    if date:
                        for (ei, event) in enumerate(self.chosen().events):
                            if hasattr(event, 'committer'):
                                if event.committer.date != date:
                                    continue
                                if email_id and event.committer.email != email_id:
                                    continue
                                else:
                                    matches.append(ei)
                            elif hasattr(event, 'tagger'):
                                if event.tagger.date != date:
                                    continue
                                elif email_id and event.tagger.email!=email_id:
                                    continue
                                else:
                                    matches.append(ei)
                        if len(matches) < 1:
                            raise Recoverable("no events match %s" % ref)
                        elif len(matches) > 1:
                            raise Recoverable("multiple events match %s" % ref)
                        else:
                            selection.append(matches[0])
                            matched = True
                    if not matched:
                        raise Recoverable("couldn't match a name at <%s>" % ref)
            if debug_enable(DEBUG_LEXER):
                announce("location list is %s with %s left" % (selection, repr(self.line)))
            # Resolve spans
            resolved = []
            spanning = last = 0
            for elt in selection:
                if elt == '..':
                    spanning = True
                else:
                    if spanning:
                        resolved.extend(xrange(last+1, elt+1))
                        spanning = False
                    else:
                        resolved.append(elt)
                    last = elt
            selection = resolved
            if debug_enable(DEBUG_LEXER):
                announce("resolved list is %s with %s left" % (selection, repr(self.line)))
            # Sanity checks
            if spanning:
                raise Recoverable("incomplete range expression.")
            for elt in selection:
                if elt < 0 or elt > len(self.chosen())-1:
                    raise Recoverable("event number %s out of range" % (elt+1))
            polyrange = set(selection)
        if debug_enable(DEBUG_LEXER):
            announce("%s <- eval_polyrange(), left = %s" % (polyrange, repr(self.line)))
        return polyrange
    def eval_textsearch(self):
        "Parse a text search specification."
        if debug_enable(DEBUG_LEXER):
            announce("eval_textsearch(%s)" % self.line)
        self.line = self.line.lstrip()
        if not self.peek() == '/':
            return None
        elif '/' not in self.line[1:]:
            raise Recoverable("malformed text search specifier")
        else:
            assert(self.pop() == '/')
            endat = self.line.index('/')
            try:
                search = re.compile(self.line[:endat]).search
            except re.error:
                raise Recoverable("invalid regular expression")
            self.line = self.line[endat+1:]
            matchers = set()
            searchable_attrs = ("author", "branch", "comment",
                                "committer", "committish", "text",
                                "tagger", "name")
            for (i, e) in enumerate(self.chosen()):
                if any(hasattr(e, searchable) and
                       search(str(getattr(e, searchable)))
                       for searchable in searchable_attrs):
                    matchers.add(i)
                # We don't do blobs because it would be too slow
                # and not very useful.
            if debug_enable(DEBUG_LEXER):
                announce("%s <- eval_textsearch(), left = %s" % (matchers, repr(self.line)))
            return matchers
    def eval_pathset(self):
        "Resolve a path name to the set of commits that refer to it."
        if self.peek() != "[":
            return None
        self.pop()
        end = "]"
        flags = ""
        while self.peek() in ("*", "@"):
            flags += self.pop()
        is_regex = (self.peek() == "/")
        if self.peek() in ("'", '"', "/"):
            end = self.pop() + end
        try:
            end_index = self.line.index(end)
        except ValueError:
            raise Recoverable("malformed path wildcard")
        path = self.line[:end_index]
        self.line = self.line[end_index+len(end):]
        if is_regex:
            try:
                search = re.compile(path).search
            except re.error:
                raise Recoverable("invalid regular expression")
            if "*" in flags:
                return self.eval_pathset_full(search, "@" in flags)
            if "@" in flags:
                return {i for (i, event) in enumerate(self.chosen().events)
                          if isinstance(event, Commit) and
                                all(search(path) for path in event.paths())}
            return {i for (i, event) in enumerate(self.chosen().events)
                      if isinstance(event, Commit) and
                            any(search(path) for path in event.paths())}
        else:
            # The "@" flag is ignored here since it would only make sense
            # for trees with a single file.
            if "*" in flags:
                return self.eval_pathset_full(path)
            return {i for (i, event) in enumerate(self.chosen().events)
                      if isinstance(event, Commit) and
                            path in event.paths()}
    def eval_pathset_full(self, match_condition, match_all = False):
        result = set()
        match = match_condition
        if isinstance(match_condition, collections.Callable):
            # Try to match a regex in the trees. For each commit we remember
            # only the part of the tree that matches the regex. In most cases
            # it is a lot less memory and CPU hungry than running regexes on
            # the full commit manifests. In the match_all case we instead
            # select commits that nowhere match the opposite condition.
            if match_all:
                match = lambda p: not match_condition(p)
            match_trees = {}
            for (i, event) in enumerate(self.chosen().events):
                if not isinstance(event, Commit): continue
                try:
                    parent = event.parents()[0]
                except IndexError:
                    tree = PathMap()
                else:
                    tree = match_trees[parent.mark].snapshot()
                for fileop in event.fileops:
                    if fileop.op == 'M' and match(fileop.path):
                        tree[fileop.path] = True
                    elif fileop.op in ('C', 'R') and match(fileop.target):
                        tree[fileop.target] = True
                    elif fileop.op == 'D' and match(fileop.path):
                        del tree[fileop.path]
                    elif fileop.op == 'R' and match(fileop.source):
                        del tree[fileop.source]
                    elif fileop.op == 'deleteall':
                        tree = PathMap()
                match_trees[event.mark] = tree
                if (not tree) == match_all:
                    result.add(i)
        else:
            # Search for an absolute path in the trees. We only need to
            # remember if that path is or not in any given commit tree.
            # Note: match_all is always false here, no need to check.
            containing = set()
            for (i, event) in enumerate(self.chosen().events):
                if not isinstance(event, Commit): continue
                contains = event.has_parents() \
                        and event.parents()[0].mark in containing
                for fileop in event.fileops:
                    if fileop.op == 'M' and fileop.path == match:
                        contains = True
                    elif fileop.op in ('C', 'R') and fileop.target == match:
                        contains = True
                    elif fileop.op == 'D' and fileop.path == match:
                        contains = False
                    elif fileop.op == 'R' and fileop.source == match:
                        contains = False
                    elif fileop.op == 'deleteall':
                        contains = False
                if contains:
                    containing.add(event.mark)
                    result.add(i)
        return result
    def eval_branchset(self):
        "Resolve a branch name to its set of associated events."
        if self.peek() != "(":
            return None
        else:
            self.pop()
            selection = []
            for symbol in sorted(self.chosen().branchset(),
                                 key=len, reverse=True): # longest name first
                if self.line.startswith(os.path.basename(symbol)):
                    for (i, event) in enumerate(self.chosen()):
                        if isinstance(event, Reset):
                            if event.ref == symbol:
                                selection.append(i)
                        elif isinstance(event, Commit):
                            if event.branch == symbol:
                                selection.append(i)
                        elif isinstance(event, Tag):
                            assert(event.target is not None)
                            assert(isinstance(event.target, Commit))
                            if event.target.branch == symbol:
                                selection.append(i)
                    self.line = self.line[len(os.path.basename(symbol)):]
                    if self.pop() != ')':
                        raise Recoverable("branch set improperly terminated.")
                    break
            else:
                raise Recoverable("unknown branch name %s" % self.line)
            return set(selection)
    #
    # Helpers
    #
    def report_select(self, line, method, optargs=()):
        "Generate a repository report on all objects with a specified method."
        if not self.chosen():
            complain("no repo has been chosen.")
            return
        default = [n for n,o in enumerate(self.chosen()) if hasattr(o, method)]
        line = self.set_selection_set(line, default)
        with RepoSurgeon.LineParse(line, capabilities=["stdout"]) as parse:
            for i in self.selection:
                event = self.chosen().events[i]
                if hasattr(event, method):
                    summary = getattr(event, method)(*((parse, i,)+optargs))
                    if summary:
                        parse.stdout.write(summary + "\n")
    @staticmethod
    def pop_token(line):
        "Grab a whitespace-delimited token from the front of the line."
        tok = ""
        line = line.lstrip()
        while True:
            if not line or line[0].isspace():
                break
            else:
                tok += line[0]
                line = line[1:]
        line = line.lstrip()
        return (tok, line)
    def edit(self, selection, line):
        # Mailboxize and edit the non-blobs in the selection
        editor = line.strip() or os.getenv("EDITOR")
        if not editor:
            complain("you have not specified an editor and $EDITOR is not set")
            return
        # Special case: user selected a single blob
        if len(self.selection) == 1:
            singleton = self.chosen()[self.selection[0]]
            if isinstance(singleton, Blob):
                def find_successor(event, path):
                    here = []
                    for child in event.children():
                        for fileop in child.fileops:
                            if fileop.op == "M" and fileop.path == path:
                                here.append(child.mark)
                        here += find_successor(child, path)
                    return here 
                for event in self.chosen().commits():
                    for fileop in event.fileops:
                        if fileop.op == 'M' and fileop.ref == singleton.mark:
                            if len(find_successor(event, fileop.path)) > 0:
                                complain("beware: not the last 'M %s' on its branch" % fileop.path)
                            break
                os.system(editor + " " + singleton.materialize())
                return
            # Fall through
        (tfdesc, tfname) = tempfile.mkstemp()
        assert tfdesc > -1    # pacify pylint
        try:
            with open(tfname, "w") as tfp:
                for i in selection:
                    event = self.chosen()[i]
                    if hasattr(event, "email_out"):
                        tfp.write(event.email_out([], i))
        except IOError:
            raise Recoverable("write of editor tempfile failed")
        if os.system(editor + " " + tfname):
            raise Recoverable("%s returned a failure status" % editor)
        else:
            self.do_mailbox_in("<" + tfname)
        # No try/finally here - we want the tempfile to survive on fatal error
        # because it might have megabytes of metadata edits in it.
        os.remove(tfname)

    def help_selection(self):
        print("""
A quick example-centered reference for selection-set syntax.

First, these ways of constructing singleton sets:

123        event numbered 123 (1-origin)
:345       event with mark 345
<456>      commit with fossil-ID 456 (probably an SVN rev)
<foo>      the tag named 'foo', or failing that the tip commmit of branch foo

You can select commits and tags by date, or by date and committer:

<2011-05-25T07:30:37Z>      specifying the commit date
<2011-05-25T07:30:37Z!esr>  specifying the commit date and committer

More ways to construct event sets:

/foo/      all commits and tags containing the string 'foo' in text or metadata
(foo)      all commits on branch 'foo'.
[foo]      all commits touching the file named 'foo'.
=C         all commits
=H         all head (branch tip) commits
=T         all tags
=B         all blobs
=R         all resets
=P         all passthroughs

You can compose sets as follows:

:123,<foo>     the event marked 123 and the event referenced by 'foo'.
:123..<foo>    the range of events from mark 123 to the reference 'foo'

Sets can be composed with | (union) and & (intersection). | has lower
precedence than &, but set expressions can be grouped with { }. Postfixing
a ? to a selection expression widens it to include all immediate neighbors
of the selection; you can do this repeatedly for effect.
""")

    def help_syntax(self):
        print("""
All commands begin with a command keyword.  Most take a selection set
immediately following it; see 'help selection' for details.  Some
commands take additional modifier arguments after the selection set.

Most report-generation commands support output redirection. When
arguments for these are parsed, any argument beginning with '>' is
extracted and interpreted as the name of a file to which command
output should be redirected.  Any remaining arguments are available to
the command logic.

Some commands support input redirection. When arguments for these are
parsed, any argument beginning with '<' is extracted and interpreted
as the name of a file from which command output should be taken.  Any
remaining arguments are available to the command logic.
""")
            
    ##
    ## Command implementation begins here
    ##
    #
    # On-line help and instrumentation
    #
    def help_help(self):
        print("Show help for a command. Follow with space and the command name.")
    def help_verbose(self):
        print("""
Without an argument, this command requests a report of the verbosity
level.  'verbose 1' enables progress messages, 'verbose 0' disables
them. Higher levels of verbosity are available but intended for
developers only.
""")
    def do_verbose(self, line):
        global verbose
        if line:
            try:
                verbose = int(line)
            except ValueError:
                complain("verbosity value must be an integer")
        if not line or verbose:
            announce("verbose %d" % verbose)

    def help_quiet(self):
        print("""
Without an argument, this command requests a report of the quiet
boolean; with the argument 'on' or 'off' it is changed.  When quiet is
on, time-varying report fields which would otherwise cause spurious
failures in regression testing are suppressed.
""")
    def do_quiet(self, line):
        global quiet
        if line:
            if line == "on":
                quiet = True
            elif line == "off":
                quiet = False
        if not line:
            announce("quiet %s" % ("on" if quiet else "off"))

    def do_echo(self, line):
        "Set or clear echoing commands before processing."
        try:
            self.echo = int(line)
        except ValueError:
            announce("echo value must be an integer")
        if verbose:
            announce("echo %d" % self.echo)

    def help_resolve(self):
        print("""
Does nothing but resolve a selection-set expression
and report the resulting event-number set to standard
output. Implemented mainly for regression testing, but may be useful
for exploring the selection-set language.
""")
    def do_resolve(self, line):
        "Display the set of event numbers generated by a selection set."
        self.set_selection_set(line)
        if self.selection is None:
            print("No selection")
        elif isinstance(self.selection, list):
            print([x+1 for x in self.selection])
        else:
            complain("resolve didn't expect a selection of %s" % self.selection)

    def help_names(self):
        print("""
List all known symbolic names of branches and tags. Supports > redirection.
""")
    def do_names(self, line):
        with RepoSurgeon.LineParse(line, capabilities=["stdout"]) as parse:
            branches = list(self.chosen().branchset())
            branches.sort()
            for branch in branches:
                parse.stdout.write("branch %s\n" % branch)
            for event in self.chosen():
                if isinstance(event, Tag):
                    parse.stdout.write("tag    %s\n" % event.name)

    def do_script(self, line):
        "Read and execute commands from a named file."
        if not line:
            complain("script requires a file argument")
            return
        line = self.set_selection_set(line)
        try:
            self.callstack.append(line.split())
            with open(self.callstack[-1][0]) as scriptfp:
                while True:
                    scriptline = scriptfp.readline()
                    if not scriptline:
                        break
                    # Handle multiline commands
                    while scriptline.endswith("\\\n"):
                        scriptline = scriptline[:-2] + scriptfp.readline()
                    # Simulate shell here-document processing
                    if '<<' not in scriptline:
                        heredoc = None
                    else:
                        (scriptline, terminator) = scriptline.split("<<")
                        heredoc = tempfile.NamedTemporaryFile(mode="w",
                                                              delete=False)
                        while True:
                            nextline = scriptfp.readline()
                            if nextline == '':
                                break
                            elif nextline == terminator:
                                break
                            else:
                                heredoc.write(nextline)
                        heredoc.close()
                        # Note: the command must accept < redirection!
                        scriptline += "<" + heredoc.name
                    # End of heredoc simulation
                    for i in range(len(self.callstack[-1])):
                        scriptline = scriptline.replace('$' + str(i), self.callstack[-1][i])
                    scriptline =  scriptline.replace('$$', str(os.getpid()))
                    self.onecmd(self.precmd(scriptline))
                    if heredoc:
                        os.remove(heredoc.name)
            self.callstack.pop()
        except IOError as e:
            complain("script failure on '%s': %s" % (line, e))

    def do_history(self, line):
        "Dump your command list from this session so far."
        for line in self.history:
            print(line)

    def do_coverage(self, unused):
        "Display the coverage-case set (developer instrumentation)."
        assert unused is not None   # pacify pylint
        if not self.chosen():
            complain("no repo has been chosen.")
            return
        for e in self.chosen().commits():
            e.fileop_dump()
        sys.stdout.write("Case coverage: %s\n" % sorted(self.chosen().case_coverage))

    def help_index(self):
        print("""
Display four columns of info on selected objects: their number, their
type, the associate mark (or '-' if no mark) and a summary field
varying by type.  For a branch or tag it's the reference; for a commit
it's the commit branch; for a blob it's the repository path of the
file in the blob.  Supports > redirection.
""")
    def do_index(self, line):
        "Generate a summary listing of objects."
        if not self.chosen():
            complain("no repo has been chosen.")
            return
        # We could do all this logic using report_select() and index() methods
        # in the objects, but that would have two disadvantages.  First, we'd
        # get a default-set computation we don't want.  Second, for this
        # function it's helpful to have the method strings close together so
        # we can maintain columnation.
        default = [n for n, o1 in enumerate(self.chosen()) if not isinstance(o1, Blob)]
        line = self.set_selection_set(line, default)
        with RepoSurgeon.LineParse(line, capabilities=["stdout"]) as parse:
            for i in self.selection:
                event = self.chosen().events[i]
                if isinstance(event, Blob):
                    parse.stdout.write("%6d blob   %6s    %s\n" % (i+1, event.mark,event.path))
                    continue
                if isinstance(event, Commit):
                    parse.stdout.write("%6d commit %6s    %s\n" % (i+1, event.mark or '-', event.branch)) 
                    continue
                if isinstance(event, Tag):
                    parse.stdout.write("%6d tag    %6s    %4s\n" % (i+1, event.committish, repr(event.name),)) 
                    continue
                if isinstance(event, Reset):
                    parse.stdout.write("%6d branch %6s    %s\n" % (i+1, event.committish or '-', event.ref)) 
                    continue
                else:
                    parse.stdout.write("?      -      %s\n" % (event,)) 
    def help_profile(self):
        print("""
Enable profiling. Must be one of the initial command-line arguments, and
gathers statistics only on code executed via '-'.
""")
    def do_profile(self, line):
        "Enable profiling."
        assert line is not None # Pacify pylint
        self.profile_log = line
        announce("profiling enabled.")

    def help_timing(self):
        print("""
Report phase-timing results from repository analysis.
""")
    def do_timing(self, _line):
        "Report repo-analysis times."
        total = self.repo.timings[-1][1] - self.repo.timings[0][-1]
        commit_count = sum(1 for _ in self.repo.commits())
        if self.repo.fossil_count is None:
            print("        commits: %d" % commit_count)
        else:
            print("        commits: %d (from %d)" % (commit_count, self.repo.fossil_count))
        for (i, (phase, _interval)) in enumerate(self.repo.timings):
            if i > 0:
                interval = self.repo.timings[i][1] - self.repo.timings[i-1][1]
                print("%15s: %.3f (%2.2f%%)" % (phase,
                                              interval,
                                              (interval * 100)/total))
        print("          total: %.3f (%d/sec)" % (total, int((self.repo.fossil_count or commit_count))/total))

    #
    # Information-gathering
    #
    def help_stats(self):
        print("""
Report size statistics and import/export method information of the
currently chosen repository. Supports > redirection.
""")
    def do_stats(self, line):
        "Report information on repositories."
        with RepoSurgeon.LineParse(line, capabilities=["stdout"]) as parse:
            if not parse.line:
                parse.line = self.chosen().name
                if parse.line is None:
                    complain("no repo has been chosen.")
                    return
            for name in parse.tokens():
                repo = self.repo_by_name(name)
                if repo is None:
                    raise Recoverable("no such repo as %s" % name)
                else:
                    def count(otype):
                        return sum(1 for x in repo.events if isinstance(x,otype))
                    parse.stdout.write("%s: %.0fK, %d events, %d blobs, %d commits, %d tags, %d resets, %s.\n" % \
                          (repo.name, repo.size() / 1000.0, len(repo),
                           count(Blob), count(Commit), count(Tag), count(Reset),
                           rfc3339(repo.readtime)))
                    if repo.sourcedir:
                        parse.stdout.write("  Loaded from %s\n" % repo.sourcedir)
                    if repo.vcs:
                        parse.stdout.write(repr(repo.vcs) + "\n")

    def help_list(self):
        print("""
Display commits in a human-friendly format; the first column is raw
event numbers, the second a timestamp in local time. If the repository
has fossil IDs, they will be displayed in the third column. The
leading portion of the comment follows. Supports > redirection.
""")
    def do_list(self, line):
        "Generate a human-friendly listing of objects."
        self.report_select(line, "lister", (screenwidth(),))

    def help_tip(self):
        print("""
Display the branch tip names associated with commits in the selection
set.  These will not necessarily be the same as their branch fields
(which will often be tag names if the repo contains either annotated
or lightweight tags).

If a commit is at a branch tip, its tip is its branch name.  If it has
only one child, its tip is the child's tip.  If it has multiple children,
then if there is a child with a matching branch name its tip is the
child's tip.  Otherwise this function throws a recoverable error.

Supports > redirection.
""")
    def do_tip(self, line):
        "Generate a human-friendly listing of objects."
        self.report_select(line, "tip", (screenwidth(),))

    def help_tags(self):
        print("""
Display lightweight tags: two fields, an event number and a tag name.
Supports > redirection.
""")
    def do_tags(self, line):
        "Generate a human-friendly listing of lightweight tags."
        self.report_select(line, "tags", (screenwidth(),))
    def help_sizes(self):
        print("""
Print a report on data volume per branch; takes a selection set,
defaulting to all events. The numbers tally the size of uncompressed
blobs, commit and tag comments, and other metadata strings (a blob is
counted each time a commit points at it).  Not an exact measure of
storage size: intended mainly as a way to get information on how to
efficiently partition a repository that has become large enough to be
unwieldy.
""")
    def do_sizes(self, line):
        "Report branch relative sizes."
        if not self.chosen():
            complain("no repo has been chosen.")
            return
        line = self.set_selection_set(line, range(len(self.chosen())))
        sizes = {}
        with RepoSurgeon.LineParse(line, capabilities=["stdout"]) as parse:
            for i in self.selection:
                event = self.chosen().events[i]
                if isinstance(event, Commit):
                    if event.branch not in sizes:
                        sizes[event.branch] = 0
                    sizes[event.branch] += len(str(event.committer))
                    for author in event.authors:
                        sizes[event.branch] += len(str(author))
                    sizes[event.branch] += len(event.comment)
                    for fileop in event.fileops:
                        if fileop.op == "M":
                            sizes[event.branch] += self.repo.objfind(fileop.ref).size
                elif isinstance(event, Tag):
                    commit = event.target
                    if commit.branch not in sizes:
                        sizes[commit.branch] = 0
                    sizes[commit.branch] += len(str(event.tagger))
                    sizes[commit.branch] += len(event.comment)
            total = sum(sizes.itervalues())
            def sz(n, s):
                parse.stdout.write("%9d\t%2.2f%%\t%s\n" \
                                   % (n, (n * 100.0) / total, s))
            for key in sorted(sizes.iterkeys()):
                sz(sizes[key], key)
            sz(total, "")
    def help_lint(self):
        print("""
Look for metadata configurations that may indicate a problem. Presently
checks for: (1) Mid-branch deletes, (2) committer and author IDs that
don't look well-formed as DVCS IDs.  These reports can be individually
selected with the modifiers 'structure' and 'names'. 
""")
    def do_lint(self, line):
        "Look for lint in a repo."
        if not self.chosen():
            complain("no repo has been chosen.")
            return
        line = self.set_selection_set(line, range(len(self.chosen())))
        with RepoSurgeon.LineParse(line, capabilities=["stdout"]) as parse:
            unmapped = re.compile("[^@]*$|[^@]*@" + str(self.chosen().uuid) + "$")
            shortset = set()
            deletealls = set()
            disconnected = set()
            for i in self.selection:
                event = self.chosen().events[i]
                if isinstance(event, Commit):
                    if not line or "structure" in line:
                        if event.fileops and event.fileops[0].op == 'deleteall' and event.has_children():
                            deletealls.add("on %s at %s" % (event.branch, event.id_me()))
                        if not event.has_parents() and not event.has_children():
                            disconnected.add(event.id_me())
                    if unmapped and (not line or "names" in line):
                        for person in [event.committer] + event.authors:
                            if unmapped.match(person.email):
                                shortset.add(person.email)
            if not line or "structure" in line:
                for item in deletealls:
                    parse.stdout.write("mid-branch delete: %s\n" % item)
                for item in disconnected:
                    parse.stdout.write("disconnected commit: %s\n" % item)
            if "names" in line:
                parse.stdout.write("\n".join(list(shortset)) + "\n")
            elif not line:
                for item in shortset:
                    parse.stdout.write("unknown shortname: %s\n" % item)
    #
    # Housekeeping
    #
    def help_prefer(self):
        print("""
Report or set (with argument) the preferred type of repository. With
no arguments, describe capabilities of all supported systems. With
an argument (which must be the name of a supported system) this has
two effects:

First, if there are multiple repositories in a directory you do a read
on, reposurgeon will read the preferred one (otherwise it will
complain that it can't choose among them).

Secondly, if there is a selected repo, this will change its type.
This means that you do a write to a directory, it will build a repo of
the preferred type rather than its original type (if it had one).

If no preferred type has been explicitly selected, reading in a
repository (but not a fast-import stream) will implicitly set it
to the type of that repository.
""")
    def do_prefer(self, line):
        "Report or select the preferred repository type."
        if not line:
            for vcs in vcstypes:
                print(vcs)
            if any(ext.visible for ext in extractors):
                print("Other systems supported for read only: %s\n" \
                      % " ".join(ext.name for ext in extractors if ext.visible))
        else:
            for repotype in vcstypes + extractors:
                if line.lower() == repotype.name:
                    self.preferred = repotype
                    if self.chosen():
                        self.chosen().vcs = self.preferred
                    break
            else:
                complain("known types are %s." % " ".join([x.name for x in vcstypes] + [x.name for x in extractors if x.visible]))
        if verbose:
            if not self.preferred:
                print("No preferred type has been set.")
            else:
                print("%s is the preferred type." % self.preferred.name)

    def help_choose(self):
        print("""
Choose a named repo on which to operate.  The name of a repo is
normally the basename of the directory or file it was loaded from, but
repos loaded from standard input are 'unnamed'. The program will add
a disambiguating suffix if there have been multiple reads from the
same source.

With no argument, lists the names of the currently stored repositories
and their load times.  The second column is '*' for the currently selected
repository, '-' for others.
""")
    def do_choose(self, line):
        "Choose a named repo on which to operate."
        if not self.repolist:
            if verbose > 0:
                complain("no repositories are loaded.")
                return
        self.repolist.sort(key=operator.attrgetter("name"))
        if not line:
            for repo in self.repolist:
                status =  '-'
                if self.chosen() and repo == self.chosen():
                    status = '*'
                if not quiet:
                    sys.stdout.write(rfc3339(repo.readtime) + " ")
                sys.stdout.write("%s %s\n" % (status, repo.name))
        else:
            if line in self.reponames():
                self.choose(self.repo_by_name(line))
                if verbose:
                    self.do_stats(line)
            else:
                complain("no such repo as %s" % line)

    def help_drop(self):
        print("""
Drop a repo named by the argument from reposurgeon's list, freeing the memory
used for its metadata and deleting on-disk blobs. With no argument, drops the
currently chosen repo.
""")
    def do_drop(self, line):
        "Drop a repo from reposurgeon's list."
        if not self.reponames():
            if verbose:
                complain("no repositories are loaded.")
                return
        if not line:
            line = self.chosen().name
        if line in self.reponames():
            if line == self.chosen().name:
                self.unchoose()
            holdrepo = self.repo_by_name(line)
            holdrepo.cleanup()
            self.remove_by_name(line)
            del holdrepo
        else:
            complain("no such repo as %s" % line)
        if verbose:
            # Emit listing of remaining repos
            self.do_choose('')

    def help_rename(self):
        print("""
Rename the currently chosen repo; requires an argument.  Won't do it
if there is already one by the new name.
""")
    def do_rename(self, line):
        "Rename a repository."
        if line in self.reponames():
            complain("there is already a repo named %s." % line)
        elif not self.chosen():
            complain("no repository is currently chosen.")
        else:
            self.chosen().rename(line)

    def help_preserve(self):
        print("""
Add (presumably untracked) files or directories to the repo's list of
paths to be restored from the backup directory after a rebuild. Each
argument, if any, is interpreted as a pathname.  The current preserve
list is displayed afterwards.
""")
    def do_preserve(self, line):
        "Add files and subdirectories to the preserve set."
        for filename in line.split():
            self.chosen().preserve(filename)
        announce("preserving %s." % list(self.chosen().preservable()))

    def help_unpreserve(self):
        print("""
Remove (presumably untracked) files or directories to the repo's list
of paths to be restored from the backup directory after a
rebuild. Each argument, if any, is interpreted as a pathname.  The
current preserve list is displayed afterwards.
""")
    def do_unpreserve(self, line):
        "Remove files and subdirectories from the preserve set."
        for filename in line.split():
            self.chosen().unpreserve(filename)
        announce("preserving %s." % list(self.chosen().preservable()))

    #
    # Serialization and de-serialization.
    #
    def help_read(self):
        print("""
A read command with no arguments is treated as 'read .', operating on the
current directory.
 
With a directory-name argument, this command attempts to read in the
contents of a repository in any supported version-control system under
that directory.

If the argument is the name of a plain file, it will be read in as a
fast-import stream or Subversion dump, whichever it is.

With an argument of '-', this command reads a fast-import stream or
Subversion dump from standard input (this will be useful in filters
constructed with command-line arguments).
""")
    def do_read(self, line):
        "Read in a repository for surgery."
        if line:
            line = os.path.expanduser(line)
        if not line or line == '.':
            line = os.getcwd()
        name = line
        if os.path.isdir(line):
            repo = read_repo(line, self.preferred)
        else:
            repo = Repository()
            # Backward-compatibility hack.  Will have to be removed if we
            # ever define modifiers for this command.
            if line and not line.startswith("<"):
                line =  "<" + line
            with RepoSurgeon.LineParse(line, capabilities=["stdin"]) as parse:
                repo.fast_import(parse.stdin, progress=(verbose==1 and not quiet))
        self.repolist.append(repo)
        self.choose(repo)
        if self.chosen():
            if self.chosen().vcs:
                self.preferred = self.chosen().vcs
            name = self.uniquify(os.path.basename(self.chosen().sourcedir or name or "unnamed"))
            self.chosen().rename(name)
        if verbose:
            self.do_choose('')

    def help_write(self):
        print("""
Dump a fast-import stream representing selected events to standard
output (if second argument is empty or '-') or a file. Property
extensions will be omitted if the importer for the selected repo cannot
digest them. Fails if the argument exists and is a directory or
anything other than a plain file. The default selection is all events.
""")
    def do_write(self, line):
        "Stream out the results of repo surgery."
        if self.chosen() is None:
            complain("no repo has been chosen.")
            return
        line = self.set_selection_set(line, range(len(self.chosen())))
        # Backward-compatibility hack.  Will have to be removed if we
        # ever define modifiers for this command.
        if line and not line.startswith(">"):
            line =  ">" + line
        with RepoSurgeon.LineParse(line, capabilities=["stdout"]) as parse:
            self.chosen().fast_export(self.selection, parse.stdout, progress=(verbose==1 and not quiet), target=self.preferred)

    def help_inspect(self):
        print("""
Dump a fast-import stream representing selected events to standard output.
Just like a write, except (1) the progress meter is disabled, and (2) there
is an identifying header before each event dump.  Supports > redirection.
""")
    def do_inspect(self, line):
        "Dump raw events."
        if self.chosen() is None:
            complain("no repo has been chosen.")
            return
        line = self.set_selection_set(line, range(len(self.chosen())))
        with RepoSurgeon.LineParse(line, capabilities=["stdout"]) as parse:
            for ei in self.selection:
                event = self.chosen().events[ei]
                header = "Event %s, " % repr(ei+1)
                header = header[:-2]
                header += " " + ((72 - len(header)) * "=") + "\n"
                parse.stdout.write(header)
                if isinstance(event, Commit):
                    parse.stdout.write(event.dump())
                else:
                    parse.stdout.write(str(event))

    def help_graph(self):
        print("""
Dump a graph representing selected events to standard output in DOT markup
for graphviz.
""")
    def do_graph(self, line):
        "Dump a commit graph."
        if self.chosen() is None:
            complain("no repo has been chosen.")
            return
        line = self.set_selection_set(line, range(len(self.chosen())))
        with RepoSurgeon.LineParse(line, capabilities=["stdout"]) as parse:
            parse.stdout.write("digraph {\n")
            for ei in self.selection:
                event = self.chosen().events[ei]
                if isinstance(event, Commit):
                    for parent in event.parent_marks():
                        if self.chosen().find(parent) in self.selection:
                            parse.stdout.write('\t%s -> %s;\n' \
                                               % (parent[1:], event.mark[1:]))
                if isinstance(event, Tag):
                    parse.stdout.write('\t"%s" -> "%s" [style=dotted];\n' \
                                       % (event.name, event.committish[1:], ))
                    parse.stdout.write('\t{rank=same; "%s"; "%s"}\n' \
                                       % (event.name, event.committish[1:], ))
            for ei in self.selection:
                event = self.chosen().events[ei]
                if isinstance(event, Commit):
                    summary = cgi.escape(event.comment.split('\n')[0][:42])
                    cid = event.mark
                    if event.fossil_id:
                        cid = event.showfossil() + " &rarr; " + cid
                    parse.stdout.write('\t%s [shape=box,width=5,label=<<table cellspacing="0" border="0" cellborder="0"><tr><td><font color="blue">%s</font></td><td>%s</td></tr></table>>];\n' \
                                       % (event.mark[1:], cid, summary))
                    if all(event.branch != child.branch for child in event.children()):
                        parse.stdout.write('\t"%s" [shape=oval,width=2];\n' % event.branch)
                        parse.stdout.write('\t"%s" -> "%s" [style=dotted];\n' % (event.mark[1:], event.branch))
                if isinstance(event, Tag):
                    summary = cgi.escape(event.comment.split('\n')[0][:32])
                    parse.stdout.write('\t"%s" [label=<<table cellspacing="0" border="0" cellborder="0"><tr><td><font color="blue">%s</font></td><td>%s</td></tr></table>>];\n' \
                                       % (event.name, event.name, summary))
            parse.stdout.write("}\n")

    def help_rebuild(self):
        print("""
Rebuild a repository from the state held by reposurgeon.  The argument
specifies the target directory in which to do the rebuild; if the
repository read was from a repo directory (and not a git-import stream), it
defaults to that directory.  If the target directory is nonempty
its contents are backed up to a save directory.
""")
    def do_rebuild(self, line):
        "Rebuild a repository from the edited state."
        if self.chosen() is None:
            complain("no repo has been chosen.")
            return
        rebuild_repo(self.chosen(), line, self.preferred)

    #
    # Editing commands
    #
    def help_mailbox_out(self):
        print("""
Emit a mailbox file of messages in RFC822 format representing the
contents of repository metadata. Takes a selection set; members of the set
other than commits, annotated tags, and passthroughs are ignored (that
is, presently, blobs and resets). Supports > redirection.
""")
    def do_mailbox_out(self, line):
        "Generate a mailbox file representing object metadata."
        self.report_select(line, "email_out")

    def help_mailbox_in(self):
        print("""
Accept on standard input a mailbox file of messages in RFC822 format
representing the contents of the metadata in selected commits and
annotated tags. Takes no selection set. Takes < redirection.

Users should be aware that modifying an Event-Number field will change
which event the update from that message is applied to.  This is
unlikely to have good results.

If the Event-Number field is absent, the mailbox_in logic will
attempt to match the commit or tag first by Fossil-ID, then by a unique
committer ID and timestamp pair.

If output is redirected and the modifier 'changed' appears, a minimal
set of modifications actually made is written to the output file.
""")
    def do_mailbox_in(self, line):
        "Accept a mailbox file representing object metadata and update from it."
        with RepoSurgeon.LineParse(line, capabilities=["stdin","stdout"]) as parse:
            update_list = []
            while True:
                msg = RepoSurgeonEmail.readmsg(parse.stdin)
                if not msg:
                    break
                update_list.append(email.message_from_string(msg))
        # First, a validation pass
        attribution_map = {}
        name_map = {}
        attribution_counts = collections.Counter()
        for commit in self.chosen().commits():
            stamp = commit.action_stamp()
            attribution_map[stamp] = commit
            attribution_counts[stamp] += 1
        for event in self.chosen().events:
            if isinstance(event, Tag):
                if event.name:
                    name_map[event.name] = event
                if event.tagger:
                    stamp = event.tagger.action_stamp()
                    attribution_map[stamp] = event
                    attribution_counts[stamp] += 1
        fossil_map = {}
        for commit in self.chosen().commits():
            if commit.fossil_id:
                fossil_map[commit.fossil_id] = commit
        events = []
        errors = 0
        for (i, message) in enumerate(update_list):
            event = None
            if "Event-Number" in message:
                try:
                    eventnum = int(message["Event-Number"]) - 1
                except ValueError:
                    complain("event number garbled in update %d" % (i+1,))
                    errors += 1
                if eventnum < 0 or eventnum >= len(self.chosen()):
                    complain("event number %d out of range in update %d" \
                                      % (eventnum, i+1))
                    errors += 1
                event = self.chosen()[eventnum]
            elif "Fossil-ID" in message:
                try:
                    event = fossil_map[message["Fossil-ID"]]
                except KeyError:
                    complain("no commit matches fossil %s" \
                                      % message["Fossil-ID"])
                    errors += 1
            elif "Event-Mark" in message:
                event = self.chosen().objfind(message["Event-Mark"])
                if not event:
                    complain("no commit matches mark %s" \
                             % message["Event-Mark"])
                    errors += 1
            elif "Committer" in message and "Committer-Date" in message:
                blank = Commit()
                blank.committer = Attribution()
                blank.email_in(message)
                stamp = blank.action_stamp()
                try:
                    event = attribution_map[stamp]
                except KeyError:
                    complain("no commit matches stamp %s" % stamp)
                    errors += 1
                if attribution_counts[stamp] > 1:
                    complain("multiple events match %s" % stamp)
                    errors += 1
            elif "Tagger" in message and "Tagger-Date" in message:
                blank = Tag()
                blank.tagger = Attribution()
                blank.email_in(message)
                stamp = blank.tagger.action_stamp()
                try:
                    event = attribution_map[stamp]
                except KeyError:
                    complain("no tag matches stamp %s" % stamp)
                    errors += 1
                if attribution_counts[stamp] > 1:
                    complain("multiple events match %s" % stamp)
                    errors += 1
            elif "Tag-Name" in message:
                blank = Tag()
                blank.tagger = Attribution()
                blank.email_in(message)
                try:
                    event = name_map[blank.name]
                except KeyError:
                    complain("no tag matches name %s" % blank.name)
                    errors += 1
            else:
                complain("no commit matches update %d:\n%s" % (i+1, message))
                errors += 1
            if event is not None and not hasattr(event, "email_in"):
                try:
                    complain("event %d cannot be modified"%(event.index()+1))
                except AttributeError:
                    complain("event cannot be modified")
                errors += 1
            # Always append, even None, to stay in sync with update_list
            events.append(event)
        if errors > 0:
            raise Recoverable("%d errors in metadata updates" % errors)
        # Now apply the updates
        changers = []
        for (event, update) in zip(events, update_list):
            if event.email_in(update):
                changers.append(update)
        if verbose:
            if not changers:
                announce("no events modified.")
            else:
                announce("%d events modified." % len(changers))
        if parse.stdout != sys.stdout:
            if "changed" in parse.line:
                for update in changers:
                    parse.stdout.write(RepoSurgeonEmail.Divider + "\n" + update.as_string(unixfrom=False))

    def help_edit(self):
        print("""
Report the selection set of events to a tempfile as mailbox_out does,
call an editor on it, and update from the result as mailbox_in does.
If you do not specify an editor name as second argument, it will be
taken from the $EDITOR variable in your environment.

Normally this command ignores blobs because mailbox_out does.
However, if you specify a selection set consisting of a single
blob, your editor will be called on the blob file.

The modifier 'multiline' will trim the selection set to commits that
are multiline and not in summary/blank-line/details form.
""")
    def do_edit(self, line):
        "Edit metadata interactively."
        if not self.chosen():
            complain("no repo is loaded")
            return
        default = [n for n, o2 in enumerate(self.chosen()) if hasattr(o2, "email_out")]
        rest = self.set_selection_set(line, default)
        if "multiline" in rest:
            rest = rest.replace("multiline", "")
            mr = re.compile("[^\n]*\n[^\n]")
            filtered = []
            for ei in self.selection:
                event = self.chosen().events[ei]
                if isinstance(event, Commit) and mr.match(event.comment):
                    filtered.append(ei)
            self.selection = filtered
        self.edit(self.selection, rest)

    def help_filter(self):
        print("""
Run all blobs, filter comments, and tag comments in the selection
set (defaulting to all) through the filter specified on the command line.
Each blob and comment is presented to the filter on standard input; the
content is replaced with whatever the filter emits to standard output.

If the command line contains the magic cookie '%PATHS%', it is replaced
with a space-separated list of all paths that reference the blob.
""")
    def do_filter(self, line):
        if not self.chosen():
            complain("no repo is loaded")
            return
        line = self.set_selection_set(line, range(len(self.chosen())))
        if not line:
            complain("no filter is specified")
            return
        altered = 0
        (indesc, intmp) = tempfile.mkstemp(prefix=self.chosen().subdir())
        (outdesc, outtmp) = tempfile.mkstemp(prefix=self.chosen().subdir())
        assert indesc > -1 and outdesc > -1    # pacify pylint
        with Baton(prompt="Filtering", enable=(verbose == 1)) as baton:
            for ei in self.selection:
                event = self.chosen().events[ei]            
                if isinstance(event, (Commit, Tag)):
                    filtercmd = line.replace("%PATHS%", "")
                    with open(intmp, "w") as wfp:
                        wfp.write(event.comment)
                    newcomment = capture("%s <%s" % (filtercmd, intmp))
                    altered += (newcomment != event.comment)
                    event.comment = newcomment
                    if isinstance(event, Commit):
                        for fileop in event.fileops:
                            if fileop.inline is not None:
                                with open(intmp, "w") as wfp:
                                    wfp.write(fileop.inline)
                                # It's unclear whether it would be the right thing
                                # to increment altered here.
                                filtercmd = line.replace("%PATHS%", event.path)
                                fileop.inline = capture("%s <%s" % (filtercmd, intmp))
                elif isinstance(event, Blob):
                    if "%PATHS%" in line:
                        filtercmd = line.replace("%PATHS%", " ".join(event.paths))
                    else:
                        filtercmd = line
                    if event.hasfile():
                        do_or_die("%s <%s >%s" % (filtercmd, event.blobfile(), outtmp))
                        same = filecmp.cmp(event.blobfile(), outtmp, shallow=False)
                    else:
                        with open(intmp, "w") as wfp:
                            wfp.write(event.get_content())
                        do_or_die("%s <%s >%s" % (filtercmd, intmp, outtmp))
                        same = filecmp.cmp(intmp, outtmp, shallow=False)
                    if not same:
                        altered += 1
                        event.materialize()
                        shutil.copyfile(outtmp, event.blobfile())
                baton.twirl()
        os.remove(intmp)
        os.remove(outtmp)
        announce("%d items modified." % altered)

    def help_delete(self):
        print("""
Delete a selection set of commits (and their associated blobs, if
any).  The default selection set for this command is empty.  Tags
pointing at the commits are also removed.

Note that applying this command to a commit with a modify operation
will *not* necessarily remove changes made by that commit from later
versions.  It will have the effect of retracting the modifications
only when they are the final ones on the commit's branch.
""")
    def do_delete(self, line):
        "Delete events in the specified selection set."
        if not self.chosen():
            complain("no repo is loaded")
            return
        line = self.set_selection_set(line, [])
        line = str(line)   # pacify pylint by forcing string type
        if line:
            for token in line.split():
                if token not in ["complain",
                                 "coalesce",
                                 "obliterate",
                                 "pushback",
                                 "tagback",
                                 "tagforward",
                                 "quiet"]:
                    complain("no such deletion modifier as " + token)
                    return
        self.chosen().delete(self.selection, self.line)

    def help_coalesce(self):
        print("""
Scan the selection set for runs of commits with identical
comments close to each other in time (this is a common form of scar
tissues in repository up-conversions from older file-oriented
version-control systems).  Merge these cliques by deleting all but the
last commit, in order.

The optional second argument, if present, is a maximum time
separation in seconds; the default is 90 seconds.
""")
    def do_coalesce(self, line):
        "Coalesce events in the specified selection set."
        if not self.chosen():
            complain("no repo is loaded")
            return
        line = self.set_selection_set(line, [])
        if not line:
            timefuzz = 90
        else:
            try:
                timefuzz = int(line)
            except ValueError:
                raise Recoverable("time-fuzz value must be an integer")
        eligible = []
        # This is a crude search that ignores the repo graph structure;
        # properly speaking we should be chasing child links.  Screw
        # it; this operation only make sense for cleaning up
        # artifacts in linear stretches of history that have been
        # lifted from file-oriented VCSes like RCS and CVS.
        icthis, icnext = itertools.tee(
                ((i, c) for i, c in enumerate(self.chosen()) if isinstance(c, Commit)))
        next(icnext, None)
        for (ithis, cthis), (_inext, cnext) in itertools.izip(icthis, icnext):
            if cthis.branch != cnext.branch or cthis.comment != cnext.comment:
                continue
            #elif cthis.committer.email != cnext.committer.email:
            #    continue
            if cthis.committer.date.delta(cnext.committer.date) < timefuzz:
                eligible.append(ithis)
        if verbose:
            announce("deletion set is %s" % [x+1 for x in eligible])
        self.chosen().delete(eligible, "coalesce")

    def help_from(self):
        print("""
From a specified commit, remove a specified fileop. The syntax is:

     from SELECTION remove OP

The selection set must be a singleton and the OP a 1-origin numeric index.
Note that this command does not attempt to scavenge blobs even if the
deleted fileop might be the only reference to them. This behavior may
change in a future release.
""")
    def do_from(self, line):
        "Delete a fileop from a specified commit."
        if not self.chosen():
            complain("no repo is loaded")
            return
        line = self.set_selection_set(line, [])
        if len(self.selection) != 1:
            complain("from selection must be a singleton")
            return
        event = self.chosen().events[list(self.selection)[0]]
        if not isinstance(event, Commit):
            complain("from selection must be a commit")
        fields = line.split()
        if fields[0] == "remove":
            try:
                ind = int(fields[1]) - 1
            except (ValueError, IndexError):
                complain("ill-formed or missing fileop index")
                return
            try:
                event.fileops.pop(ind)
            except IndexError:
                complain("out-of-range fileop index")
                return
        else:
            complain("unknown verb after from")
            return

    def help_renumber(self):
        print("""
Renumber the marks in a repository, from :1 up to <n> where <n> is the
count of the last mark. Just in case an importer ever cares about mark
ordering or gaps in the sequence.
""")
    def do_renumber(self, unused):
        "Renumber the marks in the selected repo."
        assert unused is not None    # pacify pylint
        if self.chosen() is None:
            complain("no repo has been chosen.")
            return
        self.repo.renumber()

    def help_timeoffset(self):
        print("""
Apply a time offset to all time/date stamps in the selected set.  An offset
argument is required; it may be in the form [+-]ss, [+-]mm:ss or [+-]hh:mm:ss.
The leading sign is required to distingush it from a selection expression.

Optionally you may also specify another argument in the form [+-]hhmm, a
timeone literal to apply.  To apply a timezone without an offset, use
an offset literal of +0 or -0.
""")
    def do_timeoffset(self, line):
        "Apply a time offset to all dates in selected events."
        if self.chosen() is None:
            complain("no repo has been chosen.")
            return
        line = self.set_selection_set(line, range(len(self.chosen())))
        if not line:
            complain("a signed time offset argument is required.")
            return
        elif not line[0] in ('-', '+'):
            complain("time offset argument must begin with + or -.")
            return
        line = str(line)   # pacify pylint by forcing string type
        args = line.split()
        h = m = "0"
        if args[0].count(":") == 0:
            s = args[0]
        elif args[0].count(":") == 1:
            (m, s) = args[0].split(":")
        elif args[0].count(":") == 2:
            (h, m, s) = args[0].split(":")
        else:
            complain("too many colons")
            return
        try:
            offset = int(h)*360 + int(m)*60 + int(s)
        except ValueError:
            complain("expected numeric literals in date format")
            return
        if len(args) > 1:
            if not re.match("[+-][0-9][0-9][0-9][0-9]", args[1]):
                complain("expected timezone literal to be [+-]hhmm")
        for ei in self.selection:
            event = self.chosen()[ei]
            if isinstance(event, Tag):
                if event.tagger:
                    event.tagger.date.timestamp += offset
                    if len(args) > 1:
                        event.tagger.date.timezone = args[1]
            elif isinstance(event, Commit):
                event.committer.date.timestamp += offset
                if len(args) > 1:
                    event.committer.date.timezone = args[1]
                for author in event.authors:
                    author.date.timestamp += offset
                    if len(args) > 1:
                        author.date.timezone = args[1]

    def help_divide(self):
        print("""
Attempt to partition a repo by cutting the parent-child link
between two specified commits (they must be adjacent). Does not take a
general selection-set argument.  It is only necessary to specify the
parent commit, unless it has multiple children in which case the child
commit must follow (separate it with a comma).

If the repo was named 'foo', you will normally end up with two repos
named 'foo-early' and 'foo-late'.  But if the commit graph would
remain connected through another path after the cut, the behavior
changes.  In this case, if the parent and child were on the same
branch 'qux', the branch segments are renamed 'qux-early' and
'qux-late'.
""")
    def do_divide(self, line):
        "Attempt to topologically partition the repo."
        if self.chosen() is None:
            complain("no repo has been chosen.")
            return
        line = self.set_selection_set(line, [])
        if len(self.selection) == 0:
            complain("one or possibly two arguments specifying a link are required")
            return
        early = self.chosen()[self.selection[0]]
        possibles = list(early.children())
        if len(self.selection) == 1:    
            if len(possibles) > 1:
                complain("commit has multiple children, one must be specified")
                return
            elif len(possibles) == 1:
                late = possibles[0]
            else:
                complain("parent has no children")
                return
        elif len(self.selection) == 2:
            late = self.chosen()[self.selection[1]]
            if early.mark not in late.parent_marks():
                complain("not a parent-child pair")
                return
        elif len(self.selection) > 2:
            complain("too many arguments")
        assert(early and late)
        # Try the topological cut first
        if not self.cut(early, late):
            # If that failed, cut anyway and rename the branch segments
            late.remove_parent(early)
            if early.branch != late.branch:
                announce("no branch renames were required")
            else:
                basename = early.branch
                announce("%s has been split into %s-early and %s-late" \
                         % (basename, basename, basename))
                for (i, event) in enumerate(self.chosen().events):
                    if hasattr(event, "branch") and event.branch == basename:
                        if i <= self.selection[0]:
                            event.branch += "-early"
                        else:
                            event.branch += "-late"
        if verbose:
            self.do_choose("")

    def help_expunge(self):
        print("""
Expunge files from the selected portion of the repo history; the
default is the entire history.  The arguments to this command may be
paths or Python regular expressions matching paths.

All filemodify (M) operations and delete (D) operations involving a
matched file in the selected set of events are disconnected from the
repo and put in a removal set.  Renames are followed as the tool walks
forward in the selection set; each triggers a warning message. If a
selected file is a copy (C) target, the copy will be deleted and a
warning message issued. If a selected file is a copy source, the copy
target will be added to the list of paths to be deleted and a warning
issued.

After file expunges have been performed, any commits with no
remaining file operations will be deleted, and any tags pointing to
them. Commits with deleted fileops pointing both in and outside the
path set are not deleted, but are cloned into the removal set.

The removal set is not discarded. It is assembled into a new
repository named after the old one with the suffix "-expunges" added.
Thus, this command can be used to carve a repository into sections by
file path matches.
""")
    def do_expunge(self, line):
        "Expunge files from the chosen repository."
        if self.chosen() is None:
            complain("no repo has been chosen.")
            return
        line = self.set_selection_set(line, range(len(self.chosen())))
        line = str(line)   # pacify pylint by forcing string type
        self.expunge(self.selection, line.split())

    def help_split(self):
        print("""
Split a specified commit in two, the opposite of coalesce.

    split N at M
    split N by PREFIX

The first argument is required to be a commit location; the second is
a preposition which indicates which splitting method to use. If the
preposition is 'at', then the third argument must be an integer
1-origin index of a file operation within the commit. If it is 'in',
then the third argument must be a pathname to be matched.

The commit is copied and inserted into a new position in the
event sequence, immediately following itself; the duplicate becomes
the child of the original, and replaces it as parent of the original's
children. Commit metadata is duplicated; the mark of the new commit is
then changed, with 'bis' added as a suffix.

Finally, some file operations - starting at the one matched or indexed
by the split argument - are moved forward from the original commit
into the new one.  Legal indices are 2-n, where n is the number of
file operations in the original commit.
""")
    def do_split(self, line):
        "Split a commit."
        if self.chosen() is None:
            raise Recoverable("no repo has been chosen.")
        line = self.set_selection_set(line, [])
        if len(self.selection) != 1:
            raise Recoverable("selection of a single commit required for this command")
        where = self.selection[0]
        event = self.chosen()[where]
        if not isinstance(event, Commit):
            raise Recoverable("fileop argument doesn't point at a commit")
        line = str(line)   # pacify pylint by forcing string type
        (prep, obj) = line.split()
        if prep == 'at':
            try:
                splitpoint = int(obj) - 1
                if splitpoint not in xrange(1, len(event.fileops)):
                    raise Recoverable("fileop index out of range")
                self.chosen().split_commit_by_index(where, splitpoint)
            except ValueError:
                raise Recoverable("expected integer fileop index (1-origin)")
        elif prep == 'in':
            split = self.chosen().split_commit_by_prefix(where, obj)
            if not split:
                raise Recoverable("couldn't find '%s' in a fileop path." \
                                  % obj)
        else:
            raise Recoverable("don't know what to do for preposition %s" % prep)
        if verbose:
            self.do_inspect(repr(where+1) + "," + repr(where+2))

    def help_unite(self):
        print("""
Unite repositories. Name any number of loaded repositories; they will
be united into one union repo and removed from the load list.  The
union repo will be selected.

Before merging, the repos will be sorted by date of first commit.  The
oldest will keep all its branch and tag names unchanged (this rule is
followed so there will always be a defined default branch).  All others
will have their branch and tag names suffixed with their load name.
Marks will be renumbered.

The name of the new repo will be the names of all parts concatenated,
separated by '+'. It will have no source directory or preferred system
type.
""")
    def do_unite(self, line):
        "Unite repos together."
        self.unchoose()
        factors = []
        for name in line.split():
            repo = self.repo_by_name(name)
            if repo is None:
                raise Recoverable("no such repo as %s" % name)
            else:
                factors.append(repo)
        if not factors or len(factors) < 2: 
            raise Recoverable("unite requires repo name arguments")
        self.unite(factors)
        if verbose:
            self.do_choose('')

    def help_graft(self):
        print("""
For when unite doesn't give you enough control.  The selection set
must be of size 1, identifying a single commit in the currently
selected repo.  A following argument must be a repository name.
Labels and branches in the named repo are prefixed with its name; then
it is grafted to the selected one. Its root becomes a child of the
specified commit.  Finally the named repo is removed from the load
list.
""")
    def do_graft(self, line):
        "Graft a named repo onto the selected one."
        if self.chosen() is None:
            complain("no repo has been chosen.")
            return
        line = self.set_selection_set(line, [])
        if len(self.selection) == 1:
            graft_point = self.selection[0]
        else:
            raise Recoverable("a singleton selection set is required.")
        if not self.repolist:
            raise Recoverable("no repositories are loaded.")
        if line in self.reponames():
            graft_repo = self.repo_by_name(line)
        else:
            raise Recoverable("no such repo as %s" % line)
        # OK, we've got the two repos and the graft point.  Do it.
        self.chosen().graft(graft_repo, graft_point)
        self.remove_by_name(graft_repo.name)

    def help_debranch(self):
        print("""
Takes one or two arguments which must be the names of source and target
branches; if the second (target) argument is omitted it defaults to 'master'.
The history of the source branch is merged into the history of the target
branch, becoming the history of a subdirectory with the name of the source
branch. Any trailing segment of a branch name is accepted as a synonym for
it; thus 'master' is the same as 'refs/heads/master'.  Any resets of the
source branch are removed.
""")
    def do_debranch(self, line):
        "Turn a branch into a subdirectory."
        if self.chosen() is None:
            complain("no repo has been chosen.")
        args = line.split()
        if not args:
            complain("debranch command requires at least one argument")
        else:
            target = 'refs/heads/master'
            source = args[0]
            if len(args) == 2:
                target = args[1]
            repo = self.chosen()
            branches = repo.branchmap()
            if not source in branches.iterkeys():
                for candidate in branches.iterkeys():
                    if candidate.endswith(os.sep + source):
                        source = candidate
                        break
                else:
                    complain("no branch matches source %s" % source)
                    return
            if not target in branches.iterkeys():
                for candidate in branches.iterkeys():
                    if candidate.endswith(os.sep + target):
                        target = candidate
                        break
                else:
                    complain("no branch matches %s" % target)
                    return
            # Now that the arguments are in proper form, implement
            stip = repo.find(branches[source])
            scommits = repo.ancestors(stip) + [stip]
            pref = os.path.basename(source)
            for ci in scommits:
                for fileop in repo.events[ci].fileops:
                    if fileop.op in ("D", "M"):
                        fileop.path = os.path.join(pref, fileop.path)
                    elif fileop.op in ("R", "C"):
                        fileop.source = os.path.join(pref, fileop.source)
                        fileop.target = os.path.join(pref, fileop.target)
            ttip = repo.find(branches[target])
            tcommits = repo.ancestors(ttip) + [ttip]
            merged = sorted(set(scommits + tcommits))
            last_parent = []
            source_reset = None
            for i in merged:
                event = repo.events[i]
                if last_parent is not None:
                    event.set_parent_marks(last_parent + event.parent_marks()[1:])
                event.set_branch(target)
                last_parent = [event.mark]
            for (i, event) in enumerate(self.repo.events):
                if isinstance(event, Reset) and event.ref == source:
                    source_reset = i
            if source_reset is not None:
                del repo.events[source_reset]
            repo.declare_sequence_mutation()

    def help_paths(self):
        print("""
Without a modifier, list all paths touched by fileops in
the selection set (which defaults to the entire repo). This
variant does > redirection.

With the 'sub' modifier, take a second argument that is a directory
name and prepend it to every path. With the 'sup' modifier, strip the
first directory component from every path.
""" )
    def do_paths(self, line):
        if self.chosen() is None:
            complain("no repo has been chosen.")
            return
        rest = self.set_selection_set(line, range(len(self.chosen())))
        if not rest.startswith(("sub", "sup")):
            with RepoSurgeon.LineParse(rest, capabilities=["stdout"]) as parse:
                allpaths = set()
                for event in itertools.imap(self.chosen().events.__getitem__,
                                            self.selection):
                    if isinstance(event, Commit):
                        allpaths.update(event.paths())
                parse.stdout.write("\n".join(sorted(allpaths)) + "\n")
                return
        fields = rest.split()
        if fields[0] == "sub":
            prefix = fields[1]
            modified = self.chosen().path_walk(self.selection,
                                               lambda f: os.path.join(prefix,f))
            print("\n".join(modified))
        elif fields[0] == "sup":
            try:
                modified = self.chosen().path_walk(self.selection,
                                               lambda f: f[f.find(os.sep)+1:])
                print("\n".join(modified))
            except IndexError:
                raise Recoverable("no / in sup path.")

    def help_manifest(self):
        print("""
Print commit trees contents. Takes an optional selection set argument
defaulting to all commits, and an optional Python regular expression.
For each commit in the selection set, print the mapping of all paths in
that commit tree to the corresponding blob marks, mirroring what files
would be created in a checkout of the commit. If a regular expression
is given, only print "path -> mark" lines for paths matching it.
This command supports > redirection.
""")
    def do_manifest(self, line):
        "Print all files (matching the regex) in the selected commits trees."
        if self.chosen() is None:
            raise Recoverable("no repo has been chosen")
        line = self.set_selection_set(line, range(len(self.chosen())))
        with RepoSurgeon.LineParse(line, capabilities=["stdout"]) as parse:
            filter_func = None
            line = parse.line.strip()
            if line:
                try:
                    filter_func = re.compile(line).search
                except re.error:
                    raise Recoverable("invalid regular expression")
            for ei, event in enumerate(self.chosen().events):
                if not (isinstance(event, Commit) and ei in self.selection):
                    continue
                header = "Event %s, " % repr(ei+1)
                header = header[:-2]
                header += " " + ((72 - len(header)) * "=") + "\n"
                parse.stdout.write(header)
                if event.fossil_id:
                    parse.stdout.write("# Fossil-ID: %s\n" % event.fossil_id)
                parse.stdout.write("commit %s\n" % event.branch)
                if event.mark:
                    parse.stdout.write("mark %s\n" % event.mark)
                parse.stdout.write("\n")
                if filter_func is None:
                    parse.stdout.write("\n".join("%s -> %s" % path_mark
                            for path_mark in event.manifest().iteritems()))
                else:
                    parse.stdout.write("\n".join("%s -> %s" % path_mark
                            for path_mark in event.manifest().iteritems()
                            if filter_func(path_mark[0])))
                parse.stdout.write("\n")

    def help_merge(self):
        print("""
Create a merge link. Takes a selection set argument, ignoring all but
the lowest (source) and highest (target) members.  Creates a merge link
from the highest member (child) to the lowest (parent).
""" )
    def do_merge(self, line):
        if self.chosen() is None:
            complain("no repo has been chosen.")
            return
        repo = self.chosen()
        self.set_selection_set(line)
        if not len(self.selection):
            raise Recoverable("merge requires a nonempty selection set.")
        self.selection = [e for e in sorted(self.selection) \
                          if isinstance(repo.events[e], Commit)]
        if not self.selection or len(self.selection) < 2:
            raise Recoverable("merge requires two commit arguments.")
        earlier = repo.events[self.selection[0]]
        later = repo.events[self.selection[-1]]
        later.add_parent(earlier)
        #earlier_id = "%s (%s)" % (earlier.mark, earlier.branch)
        #later_id = "%s (%s)" % (later.mark, later.branch)
        #announce("%s added as a parent of %s" % (earlier_id, later_id))

    def help_branch(self):
        print("""
Rename or delete a branch (and any associated resets).  First argument
must be an existing branch name; second argument must one of the verbs
'rename' or 'delete'.

For a 'rename', the third argument may be any token that is a syntactically
valid branch name (but not the name of an existing branch). For a 'delete',
no third argument is required.

For either name, if it does not contain a '/' the prefix 'refs/heads'
is prepended.
""")
    def do_branch(self, line):
        "Rename a branch or delete it."
        if self.chosen() is None:
            complain("no repo has been chosen.")
            return
        repo = self.chosen()
        (branchname, line) = RepoSurgeon.pop_token(line)
        if not "/" in branchname:
            branchname = 'refs/heads/' + branchname
        if branchname not in repo.branchset():
            raise Recoverable("no such branch as %s" % branchname)
        (verb, line) = RepoSurgeon.pop_token(line)
        if verb == "rename":
            (newname, line) = RepoSurgeon.pop_token(line)
            if not newname:
                raise Recoverable("new branch name must be nonempty.")
            if not "/" in newname:
                newname = 'refs/heads/' + newname
            if newname in repo.branchset():
                raise Recoverable("there is already a branch named '%s'." \
                                  % newname)
            for event in repo:
                if isinstance(event, Commit):
                    if event.branch == branchname:
                        event.set_branch(newname)
                elif isinstance(event, Reset):
                    if event.ref == branchname:
                        event.ref = newname
        elif verb == "delete":
            repo.delete([i for i in range(len(repo.events)) if
                         (isinstance(repo.events[i], Reset) and repo.events[i].ref == branchname) \
                         or \
                         (isinstance(repo.events[i], Commit) and repo.events[i].branch == branchname)],
                        ["obliterate", "quiet"])
        else:
            raise Recoverable("unknown verb '%s' in branch command.")

    def help_tag(self):
        print("""
Move, rename, or delete a tag.  First argument must be an
existing tag name; second argument must be one of the verbs 'move',
'rename', or 'delete'.

For a 'move', a third argument must be a singleton selection set. For
a 'rename', the third argument may be any token that is a
syntactically valid tag name (but not the name of an existing
tag). For a 'delete', no third argument is required.
""")
    def do_tag(self, line):
        "Move a tag to point to a specified commit, or rename it, or delete it."
        if self.chosen() is None:
            complain("no repo has been chosen.")
            return
        repo = self.chosen()
        (tagname, line) = RepoSurgeon.pop_token(line)
        for event in repo.events:
            if isinstance(event, Tag) and event.name == tagname:
                tag = event
                break
        else:
            raise Recoverable("no such tag as %s" % tagname)
        (verb, line) = RepoSurgeon.pop_token(line)
        if verb == "move":
            self.set_selection_set(line)
            if len(self.selection) != 1:
                raise Recoverable("tag move requires a singleton set.")
            else:
                target = self.selection.pop(0)
            if not isinstance(repo.events[target], Commit):
                raise Recoverable("move target must be a commit.")
            tag.forget()
            tag.remember(repo, target=repo.events[target])
        elif verb == "rename":
            (newname, line) = RepoSurgeon.pop_token(line)
            if not newname:
                raise Recoverable("new tag name must be nonempty.")
            tag.name = newname
        elif verb == "delete":
            tag.forget()
            repo.events.remove(tag)
            repo.declare_sequence_mutation()
        else:
            raise Recoverable("unknown verb '%s' in tag command.")

    def help_reset(self):
        print("""
Move, rename, or delete a reset.  First argument must match an
existing reset name; second argument must be one of the verbs 'move',
'rename', or 'delete'.

For a 'move', a third argument must be a singleton selection set. For
a 'rename', the third argument may be any token that can be interpreted
as a valid reset name (but not the name of an existing
reset). For a 'delete', no third argument is required.

An argument matches a reset's name if it is either the entire reference
(refs/heads/FOO for some some value of FOO) or the basename (e.g. FOO).
""")
    def do_reset(self, line):
        "Move a reset to point to a specified commit, or rename it, or delete it."
        if self.chosen() is None:
            complain("no repo has been chosen.")
            return
        repo = self.chosen()
        (resetname, line) = RepoSurgeon.pop_token(line)
        if resetname.count("/") == 0:
            resetname = "refs/heads/" + resetname 
        for event in repo.events:
            if isinstance(event, Reset) and event.ref == resetname:
                reset = event
                break
        else:
            raise Recoverable("no such reset as %s" % resetname)
        (verb, line) = RepoSurgeon.pop_token(line)
        if verb == "move":
            self.set_selection_set(line)
            if len(self.selection) != 1:
                raise Recoverable("reset move requires a singleton set.")
            else:
                target = self.selection.pop(0)
            if not isinstance(repo.events[target], Commit):
                raise Recoverable("move target must be a commit.")
            reset.forget()
            reset.remember(repo, target=repo.events[target])
        elif verb == "rename":
            (newname, line) = RepoSurgeon.pop_token(line)
            if newname.count("/") == 0:
                newname = "refs/heads/" + newname 
            if not newname:
                raise Recoverable("new reset name must be nonempty.")
            reset.ref = newname
        elif verb == "delete":
            reset.forget()
            repo.events.remove(reset)
            repo.declare_sequence_mutation()
        else:
            raise Recoverable("unknown verb '%s' in reset command.")

    #
    # Artifact removal
    #
    def help_authors(self):
        print("""
Apply or dump author-map information for the specified selection
set, defaulting to all events. 

Lifts from CVS and Subversion may have only usernames local to
the repository host in committer and author IDs. DVCSes want email
addresses (net-wide identifiers) and complete names. To supply the map
from one to the other, an authors file is expected to consist of
lines each beginning with a local user ID, followed by a '=' (possibly
surrounded by whitespace) followed by a full name and email address.

When an authors file is applied, email addresses in committer and author
metdata for which the local ID matches between &lt; and @ are replaced
according to the mapping (this handles git-svn lifts). Alternatively,
if the local ID is the entire address, this is also considered a match
(this handles what git-cvsimport and cvs2git do) 

With the 'read' modifier, or no modifier, apply author mapping data
(from standard input or a <-redirected input file).  May be useful if
you are editing a repo or dump created by cvs2git or by git-svn
invoked without -A.

With the 'write' modifier, write a mapping file that could be
interpreted by 'authors read', with entries for each unique committer,
author, and tagger (to standard output or a >-redirected file). This
may be helpful as a start on building an authors file, though each
part to the right of an equals sign will need editing.
""")
    def do_authors(self, line):
        "Apply or dump author-mapping file."
        if self.chosen() is None:
            complain("no repo has been chosen.")
            return
        line = self.set_selection_set(line, range(len(self.chosen())))
        if line.startswith("write"):
            line = line[5:].strip()
            with RepoSurgeon.LineParse(line, capabilities=["stdout"]) as parse:
                if parse.tokens():
                    raise Recoverable("authors write no longer takes a filename argument - use > redirection instead")
                self.chosen().write_authormap(self.selection, parse.stdout)
        else:
            if line.startswith("read"):
                line = line[4:].strip()
            with RepoSurgeon.LineParse(line, capabilities=["stdin"]) as parse:
                if parse.tokens():
                    raise Recoverable("authors read no longer takes a filename argument - use < redirection instead")
                self.chosen().read_authormap(self.selection, parse.stdin)

    #
    # Reference lifting
    #
    def help_fossils(self):
        print("""
Apply or list fossil-reference information. Does not take a
selection set. The 'read' variant reads from standard input or a
<-redirected filename; the 'write' variant writes to standard
output or a >-redirected filename.
""")
    def do_fossils(self, line):
        "Apply a reference-mapping file."
        if self.chosen() is None:
            complain("no repo has been chosen.")
            return
        if line.startswith("write"):
            line = line[5:].strip()
            with RepoSurgeon.LineParse(line, capabilities=["stdout"]) as parse:
                if parse.tokens():
                    raise Recoverable("fossils write no longer takes a filename argument - use > redirection instead")
                self.chosen().write_fossilmap(parse.stdout)
        else:
            if line.startswith("read"):
                line = line[4:].strip()
            with RepoSurgeon.LineParse(line, capabilities=["stdin"]) as parse:
                if parse.tokens():
                    raise Recoverable("fossils read no longer takes a filename argument - use < redirection instead")
                self.chosen().read_fossilmap(parse.stdin)

    def help_references(self):
        print("""
With no modifier, produces a listing of events that may have
Subversion or CVS commit references in them.  This version
of the command supports >-redirection

With the modifier 'edit', edit this set.

With the modifier 'lift', transform commit-reference cookies from CVS
and Subversion into action stamps.  This command expects cookies
consisting of the leading string '[[', followed by a VCS identifier
(currently SVN or CVS) followed by VCS-dependent information, followed
by ']]'. An action stamp pointing at the corresponding commit is
substituted when possible.  Enables writing of the fassil-reference
map when the repo is written or rebuilt.
""")
    def do_references(self, line):
        "Look for things that might be CVS or Subversion revision references."
        if self.chosen() is None:
            complain("no repo has been chosen.")
            return
        repo = self.chosen()
        repo.parse_dollar_cookies()
        rest = self.set_selection_set(line, range(len(self.chosen())))
        if "lift" in rest:
            hits = 0
            def substitute(getter, matchobj):
                payload = matchobj.group(0)[2:-2]
                commit = getter(payload)
                if commit is None:
                    complain("no commit matches " + repr(payload))
                    return matchobj.group(0) # no replacement
                elif commit:
                    text = commit.action_stamp()
                    return text
                else:
                    complain("cannot resolve %s" % payload)
                    return matchobj.group(0) # no replacement
            for (regexp, getter) in \
                    ((r"CVS:[^:\]]+:[0-9.]+",
                      lambda p: repo.fossil_map.get(p) or repo.dollar_map.get(p)),
                     ("SVN:[0-9]+",
                      lambda p: repo.fossil_map.get(p) or repo.dollar_map.get(p)),
                     (":[0-9]+",
                      lambda p: repo.objfind(p)),
                     ):
                match_re = re.compile(re.escape("[[")+regexp+re.escape("]]"))
                for ei in self.selection:
                    event = repo.events[ei]
                    if isinstance(event, (Commit, Tag)):
                        event.comment, new_hits = match_re.subn(
                            lambda m: substitute(getter, m),
                            event.comment)
                        hits += new_hits
            announce("%d references resolved." % hits)
            repo.write_fossils = True
        else:
            # No modifier, just list or edit
            refstyles = (
                # Subversion references
                r"\Wr([0-9]+)\W",
                r"(?:SVN|svn|Subversion|subversion|rev|version).*\W([0-9]+)\W",
                # CVS references
                r"(?:CVS|cvs|rev|version).*\W([0-9][0-9.]+)\W",
                # Possible bare CVS references
                r"[0-9]+\.[0-9]+\.[0-9]+",
                )
            idhits = []
            for ei in self.selection:
                event = repo.events[ei]
                if hasattr(event, "comment"):
                    text = event.comment
                elif hasattr(event, "text"):
                    text = event.text
                else:
                    continue
                for pattern in refstyles:
                    if re.search(pattern, text):
                        if ei not in idhits:
                            idhits.append(ei)
            if idhits:
                if rest.startswith("edit"):
                    self.edit(idhits, rest[4:].strip())
                else:
                    with RepoSurgeon.LineParse(rest, capabilities=["stdout"]) as parse:
                        for ei in idhits:
                            event = repo.events[ei]
                            if hasattr(event, "lister"):
                                summary = event.lister(ei, screenwidth())
                                if summary:
                                    parse.stdout.write(summary + "\n")

    #
    # Examining tree states
    #
    def help_checkout(self):
        print("""
Check out files for a specified commit into a directory.  The selection
set must resolve to a singleton commit.
""")
    def do_checkout(self, line):
        "Check out files for a specified commit into a directory."
        if self.chosen() is None:
            complain("no repo has been chosen.")
            return
        repo = self.chosen()
        rest = self.set_selection_set(line, range(len(self.chosen())))
        if not rest:
            raise Recoverable("no target directory specified.")
        if len(self.selection) == 1:
            commit = repo.events[self.selection[0]]
            if not isinstance(commit, Commit):
                raise Recoverable("not a commit.")
        else:
            raise Recoverable("a singleton selection set is required.")
        commit.checkout(rest)

    def help_diff(self):
        print("""
Display the difference between commits. Takes a selection-set argument which
must resolve to exactly two commits.
""")
    def do_diff(self, line):
        "Display a diff between versions."
        if self.chosen() is None:
            complain("no repo has been chosen.")
            return
        repo = self.chosen()
        self.set_selection_set(line, range(len(self.chosen())))
        bounds = tuple(repo.events[i] for i in sorted(self.selection))
        if len(self.selection) != 2 or \
               not isinstance(bounds[0], Commit) or \
               not isinstance(bounds[1], Commit):
            raise Recoverable("a pair of commits is required.")
        dir1 = bounds[0].checkout()
        dir2 = bounds[1].checkout()
        command = "diff -r --label 'commit %s' --label 'commit %s' -u %s %s" % \
                  (bounds[0].mark, bounds[1].mark, dir1, dir2)
        os.system(command)
    #
    # Setting patch to branchify
    #
    def help_branchify(self):
        print("""
Specify the list of directories to be treated as potential branches (to
become tags if there are no modifications after the creation copies)
when analyzing a Subversion repo. This list is ignored when the
svn_nobranch option is set.  It defaults to the 'standard layout'
set of directories, plus any unrecognized directories in the
repository root.

With no arguments, displays the current branchification set.

An asterisk at the end of a path in the set means 'all immediate
subdirectories of this path, unless they are part of another (longer)
path in the branchify set'.

Note that the branchify set is a property of the reposurgeon interpreter, not
of any individual repository, and will persist across Subversion
dumpfile reads. This may lead to unexpected results if you forget
to re-set it.
""")
    def do_branchify(self, line):
        if line.strip():
            global_options['svn_branchify'] = line.strip().split()
        announce("branchify " + " ".join(global_options['svn_branchify']))

    #
    # Setting options
    #
    def help_set(self):
        print("""
Set a boolean option to control reposurgeon's behavior.   With no arguments,
displays the state of all flags and options. The following flags and
options are defined:
""")
        for (opt, expl) in RepoSurgeon.OptionFlags:
            print(opt + ":\n" + expl)
    def do_set(self, line):
        if not line.strip():
            for opt in dict(RepoSurgeon.OptionFlags):
                print("\t%s = %s" % (opt, global_options.get(opt, False)))
        else:
            for option in line.split():
                if option not in dict(RepoSurgeon.OptionFlags):
                    complain("no such option flag as '%s'" % option)
                else:
                    global_options[option] = True
    def help_clear(self):
        print("""
Clear a boolean option to control reposurgeon's behavior.   With no arguments,
displays the state of all flags. The following flags and options are defined:
""")
        for (opt, expl) in RepoSurgeon.OptionFlags:
            print(opt + ":\n" + expl)
    def do_clear(self, line):
        if not line.strip():
            for opt in dict(RepoSurgeon.OptionFlags):
                print("\t%s = %s" % (opt, global_options.get(opt, False)))
        else:
            for option in line.split():
                if option not in dict(RepoSurgeon.OptionFlags):
                    complain("no such option flag as '%s'" % option)
                else:
                    global_options[option] = False

    #
    # Version binding 
    #
    def help_version(self):
        print("""
With no argument, display the reposurgeon version and supported VCSes.
With argument, declare the major version (single digit) or full
version (major.minor) under which the enclosing script was seveloped.
The program will error out if the major version has changed (which
means the surgical language is not backwards compatible).
""")
    def do_version(self, line):
        if not line:
            announce("reposurgeon " + version + " supporting " + " ".join(x.name for x in (vcstypes+extractors)))
        else:
            (vmajor, _) = version.split(".")
            if '.' in line:
                try:
                    (major, _) = line.strip().split(".")
                except ValueError:
                    complain("invalid version.")
                    return
            else:
                major = line.strip()
            if major != vmajor:
                raise Fatal("major version mismatch, aborting.")
            elif verbose > 0:
                announce("version check passed.")
    #
    # Running unit tests (undocumented)
    #
    def help_runtests(self):
        print("""
Runs the unit tests and reports the results.
""")
    def do_runtests(self, line):
        def runtest(name):
            result = unittest.TextTestRunner().run(unittest.defaultTestLoader.loadTestsFromTestCase(globals()[name]))
            if not result.wasSuccessful():
                raise Recoverable("unit tests failed")
        available = ["DateTests"]
        if line:
            if line in available:
                runtest(line)
            else:
                complain("no test class known as '%s'" % line)
        else:
            for name in available:
                runtest(name)

if __name__ == '__main__':
    # Increase max stack size from 8MB to 512MB
    # Needed to handle really large repositories.
    try:
        sys.setrecursionlimit(10**6)
        import resource
        resource.setrlimit(resource.RLIMIT_STACK, (2**29,-1))
    except ImportError:
        # Don't fail to start if 'resource' isn't available
        pass
    except ValueError:
        # May not be allowed on some systems.  Whether or not we can do it
        # isn't interesting, it only matters whether the limit is actually
        # blown.
        pass
    try:
        def interactive():
            global verbose
            interpreter.use_rawinput = True
            if verbose == 0:
                verbose = 1
            interpreter.cmdloop()
            interpreter.use_rawinput = False
        interpreter = RepoSurgeon()
        interpreter.use_rawinput = False
        if not sys.argv[1:]:
            sys.argv.append("-")
        try:
            for arg in sys.argv[1:]:
                for arg in arg.split(";"):
                    if arg == '-':
                        if interpreter.profile_log is None:
                            interactive()
                        elif interpreter.profile_log:
                            cProfile.run('interactive()', \
                                         interpreter.profile_log)
                        else:
                            cProfile.run('interactive()')
                    else:
                        # Call the base method so RecoverableExceptions
                        # won't be caught; we want them to abort scripting.
                        cmd.Cmd.onecmd(interpreter, interpreter.precmd(arg))
        finally:
            interpreter.cleanup()
    except (Recoverable, Fatal) as xe:
        complain(xe.msg)
        sys.exit(1)
    except KeyboardInterrupt:
        print("")
# end
