# Mercurial support (work in progress)

# Based on git backend with:
# Copyright (c) 2020-2023 Andreas Gustafsson.  All rights reserved.
# Please refer to the file COPYRIGHT for detailed copyright information.

# XXXTODO This hardcodes the src module.  This doesn't handle
# the case of a code base split into multiple modules, such as
# src+xsrc, displayed on a shared timeline.  It should be changed
# to iterate over the modules and merge the data from all of them
# into the "dates" array.  We also need a new data structure to
# keep track of which commit belongs to which module.

import os
import json
import re
import subprocess
import html

import bracket
from bracket import branch_name, cno2ts, config, last_commit_cno, runv, ts2rcs, remove_email
from bracket import Commit, RepositoryUpdateFailed
from htmlgen import div
from report import link_if

TIMEZONE_RE = re.compile(r'(.*)[-+][0-9]*$')

# TODO: remove hardcoded module assumption
module = 'src'


def default_branch():
    return 'trunk'


def setup():
    pass


# Return the pathname to hg module "module"
def _hg_dir(module):
    module_dir = os.path.join(config['repo_root'], module)
    return module_dir


# Return the beginning of an argv for invoking hg on module "module"
def _hg_cmdv(module):
    return ['hg', '-R', _hg_dir(module)]


def get_commit_data(module, revision, data):
    s = subprocess.run(_hg_cmdv(module) +
                       ['log', '--rev', revision,
                        '--template', '[{user|json}, {files|json}]'],
                       text=True, capture_output=True,
                       encoding='utf-8')
    if s.returncode != 0:
        print(f'failed to extract commit information for module {module} revision {revision.hex()}')
    commit_data_json = json.loads(s.stdout)
    data['committer'] = commit_data_json[0]
    data['files'] = commit_data_json[1]


def update_repo_module(module):
    module_dir = _hg_dir(module)
    if not os.path.exists(module_dir):
        status = runv(['hg', 'clone',
                       f'https://anonhg.netbsd.org/{module}/',
                       module_dir])
        if status != 0:
            raise RepositoryUpdateFailed()
        # match '--mirror' mode used in git backend:
        # remove files in checkout
        status = runv(['hg', 'update', 'null'], cwd=module_dir)
        if status != 0:
            raise RepositoryUpdateFailed()
    else:
        # Update existing tree
        status = runv(_hg_cmdv(module) + ['pull'])
    if status != 0:
        raise RepositoryUpdateFailed()


def index_repo():
    # Hg repositories are well indexed in themselves, so we
    # don't need to do any indexing ahead of time; querying the
    # repository directly in read_dates() is fast enough.
    pass


def read_dates():
    print("begin read dates")
    cmd = _hg_cmdv(module) + \
        ['log', '--template', '{date} {node}\n',
         '--rev', f'branch({branch_name(config)})']
    child = subprocess.Popen(cmd, stdout=subprocess.PIPE,
                             text=True, encoding='ASCII')
    pipe = child.stdout
    prev_commit_ts = 0
    dates = []
    ts2sha = {}
    for line in pipe:
        commit_ts_str, rev = line.rstrip().split()
        # remove timezone
        if m := TIMEZONE_RE.match(commit_ts_str):
            commit_ts_str = m.group(1)
        commit_ts = int(float(commit_ts_str))
        # Deal with non-monontonically-increasing commit timestamps by
        # adjusting each timestamp to be at least one second after the
        # previous one.  This should work well enough with a repo
        # converted from CVS, but if or when we start accepting direct
        # git commits with unchecked commit timestamps, we may have
        # to add fancier heuristics that basically amount to solving
        # what is known as the "Longest Increasing Subsequence" problem.
        if commit_ts <= prev_commit_ts:
            print("warning: time did not increase between commits "
                  "at %d (diff %d), fudging to %d" %
                  (commit_ts, commit_ts - prev_commit_ts,
                   prev_commit_ts + 1))
            commit_ts = prev_commit_ts + 1
        prev_commit_ts = commit_ts
        dates.append(commit_ts)
        ts2sha[commit_ts] = rev
    bracket.dates = dates
    bracket.ts2sha = ts2sha
    pipe.close()
    child.wait()
    print("end read dates")


# ignore branch argument since the ts has a unique commit
# and the commit has a branch associated already
def checkout(_branch, module, ts, builddir, logfd):
    # Use hg archive rather than clone to avoid the needless expense
    # of creating metadata files in the source tree.
    return runv(_hg_cmdv(module) + ['archive',
                                    '--rev', bracket.ts2sha[ts],
                                    f'{builddir}/{module}'],
                stdout=logfd, stderr=logfd)


def last_safe_commit_ts():
    # With hg, there are no unsafe commits
    return cno2ts(last_commit_cno())


def get_commits(ts0, ts1):
    print("get_commits", ts2rcs(ts0), ts2rcs(ts1))
    bracket.read_dates()
    our_dates = [ts for ts in bracket.dates if ts >= ts0 and ts < ts1]

    commits = []
    for ts in our_dates:
        rev = bracket.ts2sha[ts]
        data = {}
        get_commit_data(module, rev, data)
        commit = Commit()
        commit.timestamp = ts
        commit.committer = data['committer']
        commit.revision = rev
        commit.files = data['files']
        commits.append(commit)

    return commits

def format_commit_html(c):
    url = f'https://anonhg.netbsd.org/{module}/rev/{c.revision}'
    return \
        div(' '.join([
            'commit',
            link_if(url, url, c.revision),
            html.escape(remove_email(c.committer))])) + \
        "\n" + \
        "\n".join([div({'class': 'file'}, f) for f in c.files])

def format_commit_email(c):
    r = f"    commit {c.revision} by {remove_email(c.committer)}\n"
    for file in c.files:
        r += f"        {file}\n"
    return r
