#! /bin/sh
# Prepare up to $2 batches of size $1 in files named togo.[0-9] .  We limit
# $2 to 7 to stay within awk's limits on file descriptors (we need a
# couple of other descriptors).  We ultimately work from togo, but if it's
# the only thing we've got to work on, we immediately shuffle it aside into
# togo.more so that we can unlock the news system.  If we've got an existing
# non-empty togo.more, we use that.  As a further optimization, if there
# is more than will fit in the numbered batches, we put the next few
# lots in togo.next, and use that thereafter until it's empty.  This
# avoids the need to paw through the whole huge list every time when
# a large backlog has built up.  We also punt to sed to trim the big
# list when we do process it, avoiding the need to run it all through awk.
#
# If the togo files do not contain file sizes, we make an arbitrary guess
# at an average size.

case "$1" in
'')	echo 'Usage: batchsplit size [nwanted]' >&2
	exit 2
	;;
-x)	lotsmissing=0
	shift
	;;
esac

case "$2" in
[1-7])	nwanted="$2"	;;
*)	nwanted=7	;;
esac

# =()<. ${NEWSCONFIG-@<NEWSCONFIG>@}>()=
. ${NEWSCONFIG-/var/lib/news/bin/config}

PATH=$NEWSCTL/bin:$NEWSBIN/batch:$NEWSBIN:$NEWSPATH ; export PATH
umask $NEWSUMASK

# pick an input file, shuffling togo aside (with locking) if needed
if test -s togo.next
then
	input=togo.next
elif test -s togo.more
then
	input=togo.more
else
	# Locking.
	lock="$NEWSCTL/LOCK"
	ltemp="$NEWSCTL/L.$$"
	echo $$ >$ltemp
	trap "rm -f $ltemp ; exit 0" 0 1 2 15
	while true
	do
		if newslock $ltemp $lock
		then
			trap "rm -f $ltemp $lock ; exit 0" 0 1 2 15
			break
		fi
		sleep 30
	done

	# Do it.
	rm -f togo.more
	mv togo togo.more
	>togo
	input=togo.more

	# Unlock.
	trap 0 1 2 15
	rm -f $ltemp $lock
fi

# A little precaution... do there seem to be a lot of nonexistent files?
# Check first three as quick screening, check next fifty to decide whether
# a relatively-costly existence filtering is in order.
nextonly=0
lotsmissing=${lotsmissing-25}
if test " `sed 3q $input | batchcheck -v | wc -l`" -gt 0 && \
	test " `sed 50q $input | batchcheck -v | wc -l`" -gt $lotsmissing
then
	# need to filter togo.next, or generate one for filtering
	case "$input" in
	togo.next)	batchcheck <togo.next >togo.tmp
			mv togo.tmp togo.next
			if test ! -s togo.next
			then
				# it's really bad
				rm -f togo.next
				input=togo.more
				nextonly=1
			fi
			;;
	togo.more)	nextonly=1		;;
	esac
fi

# main processing
rm -f togo.overflow togo.count
awk 'BEGIN { total = 0 ; ninbatch = 0 ; bno = 1 ; limit = '$1'
		batch = "togo." bno ; nbatches = '"$nwanted"'
		if ('$nextonly' == 1) {
			# just make a new togo.next, no togo.[0-9] batches
			bno = nbatches
			ninbatch = 1
			total = limit+1
		}
	}
	{
		if (NF == 1) {
			if ($1 ~ /^<.*>$/)	# probably ihave/sendme m-id
				size = length($0) + 1
			else
				size = 3000	# Arbitrary guess.
		} else
			size = $NF + 15		# 15 for "#! rnews nnnnn"
		if (total + size > limit && ninbatch > 0) {
			# Go to next batch.
			bno++
			if (bno <= nbatches) {
				batch = "togo." bno
				ninbatch = 0
			} else if (bno == nbatches+1 && FILENAME == "togo.more") {
				batch = "togo.next"
				limit = 4 * nbatches * limit
			} else {
				print NR - 1 >"togo.count"
				exit
			}
			total = 0
		}
		ninbatch++
		total += size
		print >batch
	}' $input

# handle the overflow case efficiently
if test -s togo.count
then
	sed "1,`cat togo.count`d" $input >togo.overflow
	rm togo.count
	mv togo.overflow $input
else
	rm $input
fi
