#! /usr/bin/env bash
#
# Cleanup tasks after Bro termination:  move the node's working directory
# to a tmp dir and create a new working directory, create a crash report if
# the node crashed, wait for this node's archive-log processes to finish,
# try to archive any remaining logs (and send an email if this fails), and
# finally (if the node didn't crash) remove the tmp dir if all logs were
# successfully archived.
#
# post-terminate <type> <dir> [<crashflag>]
#
# <type> is the node's type ("manager", "worker", etc.).
# <dir> is the node's working directory.
#
# If <crashflag> is not set, then BroControl has stopped Bro normally.
# If <crashflag> is "crash", then BroControl has determined that Bro crashed
# and this script will return information about the crash on stdout which is
# suitable for mailing to the user.  If <crashflag> is "killed", then
# BroControl terminated Bro forcefully (but intentionally) by SIGKILL while
# trying to stop Bro.

function sendfailuremail
{
    if [ "${mailarchivelogfail}" = "0" ]; then
        return
    fi

    $scriptdir/send-mail "archive log failure" <<_EOF_
Unable to archive one or more logs in directory:
${tmp}
Check the post-terminate.out file in that directory for any error messages.
_EOF_
}

if [ $# -lt 2 -o $# -gt 3 ]; then
    echo "post-terminate: wrong usage: $@"
    exit 1
fi

ntype=$1
dir=$2

if [ ! -d "$dir" ]; then
    echo "post-terminate: directory not found: $dir"
    exit 1
fi

crash=0
killed=0
if [ "$3" = "crash" ]; then
    crash=1
elif [ "$3" = "killed" ]; then
    killed=1
fi

# Compute both timestamps here so we get consistent results.
tmpdirtimestamp=`date +%Y-%m-%d-%H-%M-%S`
archivelogtime=`date +%y-%m-%d_%H.%M.%S`

scriptdir=`dirname $0`
. $scriptdir/broctl-config.sh

if [ -z "${tmpdir}" ]; then
    echo "post-terminate: broctl option tmpdir not set"
    exit 1
fi

if [ ! -d "${tmpdir}" ]; then
    mkdir "${tmpdir}"
fi

tmp=${tmpdir}/post-terminate-$ntype-$tmpdirtimestamp-$$

if [ $crash -eq 1 ]; then
    tmp=$tmp-crash
fi

mv "$dir" "$tmp"
if [ $? -ne 0 ]; then
    exit 1
fi

mkdir "$dir"

cd "$tmp"

if [ -d .state ]; then
    mv .state "$dir"
fi

if [ $crash -eq 1 ]; then
    # output the crash report
    "${scriptsdir}"/crash-diag "$tmp"

    mybro=${bro}
    if [ "${havenfs}" = "1" ]; then
        mybro=${tmpexecdir}/`basename "${bro}"`
    fi
    cp "$mybro" .
fi

if [ ! -f .startup ]; then
    echo "post-terminate: file not found: .startup"
    exit 1
fi

# Execute the remaining part of this script in the background so that broctl
# doesn't need to wait for it to finish.  Stdout/stderr is redirected to a
# file to capture error messages.
(

# Gather list of all archive-log PID files.
pidfiles=$(find . -maxdepth 1 -type f -name '.archive-log.*.tmp')

# Wait for any archive-log processes to finish, so that we can either
# launch new ones (below) or remove this directory.
while [ -n "$pidfiles" ]; do
    for pfile in $pidfiles ; do
        # If PID file is empty, then check it again later.
        if [ -s $pfile ]; then
            # Check if a process with given PID exists
            ps -p $(cat $pfile) > /dev/null 2>&1
            if [ $? -ne 0 ]; then
                # No such process exists, so remove PID file
                rm -f $pfile
            fi
        fi
    done

    sleep 1

    pidfiles=$(find . -maxdepth 1 -type f -name '.archive-log.*.tmp')
done

find_cmd="find -E"
if [ "${os}" = "Linux" ]; then
    find_cmd=find
elif [ "${os}" = "OpenBSD" ]; then
    # OpenBSD find command doesn't have "-regex" option.
    find_cmd=gfind
fi

startuptime=`cat .startup | tail -1`
flags=
failed=0

function archivelogs
{
    # Note: this assumes the user is using the default ".log" file extension.
    if [ "$1" = "rotated" ]; then
        # Rotated logs always have more than one period in the filename.
        logfiles=$(find . -maxdepth 1 -type f -name '*.*.log')
    else
        # Unrotated logs have only one period in the filename.
        logfiles=$($find_cmd . -maxdepth 1 -type f -regex '\./[^.]+\.log')
    fi

    for i in $logfiles; do
        # Strip off the leading "./"
        logname=`echo $i | sed 's#^\./##'`

        # Get the basename (such as "conn").
        basename=`echo $logname | sed 's/\..*//'`

        # Try to figure out a reasonable start time for the log.
        if [ "$1" = "rotated" ]; then
            # Get the start time from log filename
            ts=`echo $logname | sed 's/[^.]*\.//' | sed 's/\.[^.]*$//'`
            if echo $ts | grep -q '\.' ; then
                # Timestamp is already in the required format
                strt=$ts
            else
                # Convert time from YYYY-MM-DD-HH-MM-SS to YY-MM-DD_HH.MM.SS
                strt=`echo $ts | awk -F '-' '{ printf("%s-%s-%s_%s.%s.%s",substr($1,3,2),$2,$3,$4,$5,$6) }'`
            fi
        else
            # Note: there is a possibility that the start time computed here
            # might be later than the end time.
            strt=$startuptime
            if [ -f .rotated.$basename ]; then
                strt=`cat .rotated.$basename`
            fi
        fi

        # Just assume the end time of the log is the time this script is run.
        end=$archivelogtime

        # Note: here we assume the log is ascii
        "${scriptsdir}"/archive-log $flags $logname $basename $strt $end 1 ascii
        if [ $? -ne 0 ]; then
            failed=1
        fi
    done
}

# Archive all rotated logs.  Presumably such logs exist only if a previous
# attempt failed, but at least this time if it fails then an email will be
# sent.
archivelogs rotated

# If Bro crashed or was killed, then we run log postprocessors here
# on all unrotated logs (including stdout.log/stderr.log, as they might
# have useful info in this situation).
if [ $crash -eq 1 -o $killed -eq 1 ]; then
    if [ $crash -eq 1 ]; then
        # If Bro crashed, then we tell archive-log to not delete the log file
        # (however, in this case the log file will be renamed to indicate it
        # was successfully archived).
        flags=-c
    fi

    archivelogs unrotated
fi

# If one or more logs failed to be archived, then try to send an email.
if [ $failed -ne 0 ]; then
    sendfailuremail
fi

# If Bro crashed, then we don't need to do anything else, because we don't
# want to remove the tmp directory.
if [ $crash -eq 1 ]; then
    exit 0
fi

# If no archive-log processes started from this script failed, then remove
# the directory.  If the directory is not removed, then an email was sent
# to notify the user to look in this directory for logs.
if [ $failed -eq 0 ]; then
    rm -rf "$tmp"
fi

) >post-terminate.out 2>&1 &

