#! /usr/bin/env mpibash

################################################
# Quickly tar up a set of files using MPI-Bash #
# By Scott Pakin <pakin@lanl.gov>              #
################################################

# Initialize both MPI and Libcircle.
enable -f mpibash.so mpi_init
mpi_init
mpi_comm_rank rank
enable -f circlebash.so circle_init
circle_init

# Define the maximum number of bytes to read/write at once.  This
# should be large enough to get good performance from a parallel
# filesystem but not so large that it thrashes the TLB.
maxblocksize=104857600

# Define a character that must not appear in a filespec.
sep=$'\t'

# Parse the command line.
usagestr="Usage: $0 -c [-C <dir>] [-T <list_file>] [-v] -f <output.tar> <input_file>..."
verbose=no
create=no
workdir=.
while getopts cC:T:vf: optname ; do
    case $optname in
        \?)
            exit 1
            ;;
        f)
            tarfile=$(readlink -f "$OPTARG")
            tarfile=${tarfile%/}
            ;;
        C)
            workdir="$OPTARG"
            ;;
        T)
            listfile="$OPTARG"
            ;;
        v)
            verbose=yes
            ;;
        c)
            create=yes
            ;;

    esac
done
shift $((OPTIND-1))
if [ -z "$tarfile" ] || [ $create = no ] || [[ $# -eq 0 && -z "$listfile" ]] ; then
    if [ "$rank" -eq 0 ] ; then
        echo "$usagestr" 1>&2
    fi
    exit 1
fi
arglist=("$@")
cd "$workdir" || exit 1

# Given a file name and file size, return the expected tar header + data size.
declare -A -x header_size_cache
function get_tar_size () {
    local fname="$1"
    local header_bytes
    if [ ${#fname} -gt 100 ] ; then
        # HACK: Assume that long filenames always consume 1536 bytes
        # of header.  This is typical, but they can consume more if
        # the filename is _very_ long.
        header_bytes=1536
    else
        header_bytes=512
    fi
    local size="$2"
    (( total_size = ((header_bytes + size + 511)/512)*512 ))
    echo $total_size
}

# Keep track of 1/Nth of the directory tree as a mapping from file
# name or directory name to size.
declare -A fname2size   # Excludes the tar header
local_fsize=0           # Includes the tar header
declare -a dnamelist    # Tar header N/A
local_dsize=0           # Includes the tar header

# Define a function to seed a directory traversal from both the
# command line and the argument to -T.
function seed_traversal () {
    local arg
    if [ ${#arglist[@]} -ge 1 ] ; then
        for arg in "${arglist[@]}" ; do
            circle_enqueue "$arg"
        done
    fi
    if [ "$listfile" ] ; then
        while read -r arg ; do
            circle_enqueue "$arg"
        done < "$listfile"
    fi
}

# Define a function to traverse a directory tree and keep track of
# the size of each file it contains.
function traverse_tree () {
    local fname            # Name of file or directory assigned to us
    local total_size       # Size of Tar header + contents

    # Re-enqueue directory contents.
    circle_dequeue fname
    if [ -d "$fname" ] ; then
        local dname="$fname"
        dnamelist+=("$dname")
        total_size=$(get_tar_size "$dname" 0)
        (( local_dsize += total_size ))
        header_size_cache["$dname"]=$total_size
        local dircontents=($(ls -A "$dname"))
        for cname in "${dircontents[@]}" ; do
            circle_enqueue "$dname/$cname"
        done
        return
    fi

    # Locally process a non-directrory.
    local fsize=0
    if [ -f "$fname" ] && [ ! -L "$fname" ] ; then
        fsize=$(stat -c%s "$fname")
    fi
    fname2size["$fname"]=$fsize
    total_size=$(get_tar_size "$fname" "$fsize")
    (( local_fsize += total_size ))
    header_size_cache["$fname"]=$(get_tar_size "$fname" 0)
}

# Use Libcircle to traverse the directory tree.
circle_cb_create seed_traversal
circle_cb_process traverse_tree
circle_begin

# Determine each rank's starting offset for its directory list and file list.
local_doffset=0
mpi_exscan $local_dsize local_doffset
mpi_allreduce $local_dsize global_dsize
local_foffset=0
mpi_exscan $local_fsize local_foffset
(( local_foffset += global_dsize ))

# Define a function to enqueue tarring of each rank's set of
# directories and files.  The message format is as follows:
#   - tar header size
#   - file size, excluding the tar header
#   - byte offset into tarball of segment 0
#   - segment number (number of $maxblocksize chunks into the file)
#   - file/directory name
function enqueue_tar_work () {
    # Enqueue directory work.  Directories always occupy a single segment.
    local dname total_size tsize
    local cumulative_doffset=$local_doffset
    for dname in "${dnamelist[@]}" ; do
        tsize=${header_size_cache["$dname"]}
        circle_enqueue "$tsize${sep}0$sep$cumulative_doffset${sep}0$sep$dname"
        (( cumulative_doffset += ${header_size_cache["$dname"]} ))
    done

    # Enqueue file work.  Files may occupy any number of segments.
    local fname fsize segment fofs
    local cumulative_foffset=$local_foffset
    for fname in "${!fname2size[@]}" ; do
        tsize=$(get_tar_size "$fname" 0)
        fsize=${fname2size["$fname"]}
        segment=0
        if [ "$fsize" -eq 0 ] ; then
            circle_enqueue "$tsize$sep$fsize$sep$cumulative_foffset$sep$segment$sep$fname"
        else
            for (( fofs=0; fofs<fsize; fofs+=maxblocksize )) ; do
                circle_enqueue "$tsize$sep$fsize$sep$cumulative_foffset$sep$segment$sep$fname"
                (( segment++ ))
            done
        fi
        (( cumulative_foffset += $(get_tar_size "$fname" "$fsize") ))
    done
}

# Define a function to add a single directory to the target tarball.
function inject_tar_data () {
    # Determine what we need to do.
    local msg fields
    circle_dequeue msg
    IFS="$sep" read -r -a fields <<< "$msg"
    local tsize="${fields[0]}"
    local size="${fields[1]}"
    local offset="${fields[2]}"
    local segment="${fields[3]}"
    local name="${fields[4]}"
    if [ $verbose = yes ] ; then
        local nsegs=$(( (size + maxblocksize - 1)/maxblocksize ))
        if [ $nsegs -eq 0 ] ; then
            nsegs=1
        fi
        echo "$name (fragment $((segment+1)) of $nsegs)"
    fi

    # Segment 0: Inject the tar header.
    if [ "$segment" -eq 0 ] ; then
        tar --no-recursion -cf - "$name" | \
            dd of="$tarfile" bs="$tsize" count=1 oflag=seek_bytes seek="$offset" conv=notrunc status=none
    fi

    # All segments: Inject up to one segment's worth of data.
    if [ "$size" -gt 0 ] ; then
        local seek_bytes=$(( offset + tsize + segment*maxblocksize ))
        dd if="$name" of="$tarfile" bs="$maxblocksize" count=1 skip="$segment" oflag=seek_bytes seek="$seek_bytes" conv=notrunc status=none
    fi

}

# Use Libcircle to include all specified files and directories in the
# target tarball.
if [ "$rank" -eq 0 ] ; then
    rm -f "$tarfile"
fi
circle_set_options create_global
circle_cb_create enqueue_tar_work
circle_cb_process inject_tar_data
circle_begin

# Pad the file to a multiple of the block size (512 bytes) and include
# two all-zero EOF blocks.
if [ "$rank" -eq 0 ] ; then
    truncate --size=%512 "$tarfile"
    truncate --size=+1024 "$tarfile"
fi

# Finalize both MPI and Libcircle.
circle_finalize
mpi_finalize
