
# $Id: graph,v 1.5 1995/10/25 16:47:21 lm Exp $
eval "exec perl -Ss $0 $*"
	if 0;

# A graphing preprocessor for GNU pic / troff package.
# Hacked into existence by Larry McVoy (lm@sun.com now lm@sgi.com).
# Copyright (c) 1994 Larry McVoy.  GPLed software.
#
# Input format is like that of Xgraph, i.e., sets of X Y pairs,
# divided up by blank lines and titled with a "title.  Like so
#
#	1 1
#	2 2
#	"straight slope
#
#	4 4
#	1 4
#	"straight down
#
# Optional "quartile" data input format.
# The drawing is ----- o  ---, with the lines being from y1..y2, y4..y5,
# and the mark at y3.
#
#	x y1 y2 y3 y4 y5
#	x y1 y2 y3 y4 y5
#	x y1 y2 y3 y4 y5
#
# Optional input (superset of Xgraph) is like so:
#
#	%T Graph title in +4 point font
#	%X X axis title and/or units in +2 point font
#	%Y Y axis title and/or units in +2 point font
#	%fakemax-X <value>	force graph to be that big
#	%fakemax-Y <value>	force graph to be that big
#	%fakemin-X <value>	force graph to be that big
#	%fakemin-Y <value>	force graph to be that big
#
# Options:
#  -rev		reverse X/Y data sense (and titles)
#  -below	put data set titles below the graph rather than to the right
#  -close	no extra space around the data
#  -qline	connect the quartile center points
#  -grid	dotted grid marks
#  -nobox	no box around whole graph
#  -big		make the graph take the whole page
#  -slide	make the graph fit in my slides
#  -small	make the graph be small so you can do a lot of them.
#  -notitle	no Title label
#  -nolabels	no X/Y/Title labels
#  -nodatal	no dataset labels
#  -nomarks	no marks on the graphs.
#  -k		print (absolute) values larger than 1000 as (value/1000)K
#  -grapheach	graph each data set separately
#  -br_title	start a new graph at each title.
#  -nospace	no .sp at top of picture
#
# Hacks :-)
# -xk		multiply X input by 1024.
# -logx		take the log base 2 of X input
# -logy		take the log base 2 of Y input
#
# Much thanks to James Clark for providing such a nice replacement for
# the Unix troff package.  Thanks to the Xgraph folks for providing
# inspiration.  Thanks to Declan Murphy for math :-)
# Thanks to noone for floating point numbers, they suck dog doo.
# There are lots of hacks in here to deal with rounding errors.
#
# TODO:
#	All of the option parsing done manually.
#	A filter option to print ranges of the data?
#	A way to do each data set in it's own graph.
#	All of the other xgraph options?
#	For Adam, that butthead, an option to sort the labels such that they
#	are in the same order as the right endpoints of the data sets.
#
# STATE:
#	The log stuff is a little weird.  The tick marks are wrong for non
#	whole numbers.  It works for some input and not for others.

&init;
&autosize;
&pic;
exit;

# init - slurp in the data and apply any transformations.
sub init
{
	@lines = <>;	# sluuuuuuuuuuuurp
	$fake = "";
	foreach (@lines) {
		if (/^"?%fake/) {
			$fake = $_;
			s/"?%fakemax-//;
			s/"?%fakemin-//;
			split;
			$_ = "$_[1] $_[1]";
		} else {
			next if /^\s*["%#]/;
			next if /^\s*$/;
		}
		split;
		if ($xk) {
			$_[0] = $_[0] * 1024;
		} elsif ($xm) {
			$_[0] = $_[0] * 1024 * 1024;
		}
		if ($logx) {
			$_[0] = &logbase(2, $_[0]);
		}
		if ($yk) {
			$_[1] = $_[1] * 1024;
		} elsif ($ym) {
			$_[1] = $_[1] * 1024 * 1024;
		}
		if ($logy) {
			$_[1] = &logbase(2, $_[1]);
		}
		if ($rev) {
			$_ = "$_[1] $_[0]";
		} else {
			$_ = "$_[0] $_[1]";
		}
		if ($fake =~ /[XY]/) {
			# XXX - reverse?  What should it do?
			if ($fake =~ /fakemax-X/) {
				$fakemax_X = $_[0];
			} elsif ($fake =~ /fakemax-Y/) {
				$fakemax_Y = $_[1];
			} elsif ($fake =~ /fakemin-X/) {
				$fakemin_X = $_[0];
			} elsif ($fake =~ /fakemin-Y/) {
				$fakemin_Y = $_[1];
			}
			$_ = $fake;
			$fake = "";
		}
	}
	# Diddle this to create different marks.
	#
	# I swiped these from grap and I think I liked mine better.
	@marks = (
	    '[ "\(ci" ]',
	    '[ "\(sq" ]',
	    '[ "\(*D" ]',
	    '[ "\(mu" ]',
	    '[ "\s+4\(bu\s0" ]',
	    '[ box ht .07 wid .07 fill 1 ]',
	    '[ "\s+2\(pl\s0" ]',
	    '[ "\s+4\(**\s0" ]',
	    );
	$nmarks = $#marks + 1;
	$nomark = '[ box invis ht .05 wid .05 ]';

	$first_title = 1;

	if ($small) {
		$PS = 8;
		$ft = "B";
		$tick = .1;
	} elsif ($slide) {
		$ft = "HB";
		$PS = 10;
		$tick = .15;
	} else {
		$ft = "CB";
		$PS = 12;
		$tick = .15;
	}
	$VS = $PS - 1;
	$thk = .75;
	$thk = 1 if $thk1;
	$thk = 1.5 if $thk1_5;
	$thk = 2 if $thk2;
	$thk = .2 if $thk.2;
}

# Calculate min/max to autosize the graph.
sub autosize
{
	foreach $_ (@lines) {
		next if /^\s*["#%]/;
		next if /^\s*$/;
		split;
		if ($#_ == 1) {
			$Ymax = $Ymin = $_[1];
		} elsif ($#_ == 5) {	# Quartile plot
			$Ymax = $Ymin = $_[1];
			for ($i = 2; $i <= 5; ++$i) {
				$Ymax = $_[$i] if ($Ymax < $_[$i]);
				$Ymin = $_[$i] if ($Ymin > $_[$i]);
			}
		} else {
			die "Data format error: $_\n";
		}
		if (!defined $xmin) {
			$xmin = $_[0];
			$xmax = $_[0];
			$ymin = $Ymin;
			$ymax = $Ymax;
		}
		else {
			$xmin = $_[0] if ($xmin > $_[0]);
			$xmax = $_[0] if ($xmax < $_[0]);
			$ymin = $Ymin if ($ymin > $Ymin);
			$ymax = $Ymax if ($ymax < $Ymax);
		}
	}

	# Handle fake max
	if (defined($fakemax_X) && $fakemax_X > $xmax) {
		$xmax = $fakemax_X;
	}
	if (defined($fakemax_Y) && $fakemax_Y > $ymax) {
		$ymax = $fakemax_Y;
	}
	if (defined($fakemin_X) && $fakemin_X < $xmin) {
		$xmin = $fakemin_X;
	}
	if (defined($fakemin_Y) && $fakemin_Y < $ymin) {
		$ymin = $fakemin_Y;
	}
	($xlower, $xupper, $xtick) = &tick($xmin, $xmax, $logx ? 2 : 10);
	($ylower, $yupper, $ytick) = &tick($ymin, $ymax, $logy ? 2 : 10);
	$xn = int(.9 + ($xupper - $xlower) / $xtick);
	$yn = int(.9 + ($yupper - $ylower) / $ytick);
	$xlower = sprintf("%.6f", $xlower);	# really ugly cast
	$xupper = sprintf("%.6f", $xupper);	# really ugly cast
	$xtick = sprintf("%.6f", $xtick);	# really ugly cast
	$xn = sprintf("%.0f", $xn);		# really ugly cast
	$ylower = sprintf("%.6f", $ylower);	# really ugly cast
	$yupper = sprintf("%.6f", $yupper);	# really ugly cast
	$ytick = sprintf("%.6f", $ytick);	# really ugly cast
	$yn = sprintf("%.0f", $yn);		# really ugly cast
}

# Since I had to go rethink it, here's the explanation:
#
# log base e 10 = X implies e**x = 10
# e ** (v * x) = (e ** x) ** v
# since e ** x == 10, that implies e ** (v * x) is 10 ** v
# Capeesh?
sub expbase
{
	local($base, $val) = @_;

	exp($val * log($base));
}

sub logbase
{
	local($base, $val) = @_;

	if ($val == 0) {
		return 0;
	}
	if ($val < 0) {
		die "Input: $_: can't take log of negative value: $val\n";
	}
	log($val) / log($base);
}

# Figure out the tick marks.
# XXX - the log stuff is not quite right.
sub tick
{
	local($min, $max, $base) = @_;
	local($delta, $adj, $lower, $upper, $tick);

	$delta = $max - $min;
	$tick = int(&logbase(10, $delta));
	$tick = &expbase(10, $tick - 1);
	if ($delta / $tick > 10) {
		if ($base == 10 && ($delta / (2 * $tick)) > 10) {
			$adj = 5;
		} else {
			$adj = 2;
		}
	} else {
		$adj = 1;
	}
	$tick *= $adj;

	# Go figure out the endpoints.  This is O(log10(n)) where N is the
	# number of ticks from 0 to the min.
	$lower = 0;
	for ($i = 10e99; $i > 0; $i = int($i/$base)) {
		$fudge = $i * $tick;
		$bound = $min + $fudge * .00001;

		# Sometimes it's too big
		while ($lower > $bound) {
			$lower -= $fudge;
		}

		# Sometimes it's too small
		while (($lower + $fudge) <= $bound) {
			$lower += $fudge;
		}
	}
	for ($upper = $lower; $upper < $max - $tick * .00001; $upper += $tick) {
	}
	# If you don't like your end points on the board then do this.
	unless ($close) {
		if ($min - $lower < .1 * $tick) {
			$lower -= $tick;
		}
		if ($max - $upper < .1 * $tick) {
			$upper += $tick;
		}
	}
	($lower, $upper, $tick);
}

# Spit out the pic stuff.
# The idea here is to spit the variables and let pic do most of the math.
# This allows tweaking of the output by hand.
sub pic
{
	if ($k) {
		$print = 'sprintf("%.0fK", j/1000)';
	} else {
		$print = 'sprintf("%.0f", j)';
	}
	if ($grid) {
		$ygrid = 
		"line dotted thick gthk from O.sw + (0, g) to O.sw + (xsize, g)"
		;
		$xgrid =
		"line dotted thick gthk from O.sw + (g, 0) to O.sw + (g, ysize)"
		;
	} else {
		$ygrid = $xgrid = "";
	}
	if ($nobox) {
		$nobox = "invis";
	}
	if ($big) {
		print ".sp .5i\n.po .25i\n";
		if ($below) {
			$ysize = 8;
		} else {
			$ysize = 9;
		}
		if ($nodatal) {
			$xsize = 7;
		} else {
			$xsize = 6;
		}
	} elsif ($small) {
		$ysize = 1.75;
		$xsize = 1.75;
	} elsif ($slide) {
		print ".sp .35i\n";
		$xsize = 4.25;
		$ysize = 4.5;
	} else {
		print ".sp 1i\n";
		$ysize = 5;
		$xsize = 5;
	}
	&graph;

	# Mark the data points
	@datasets = ();
	for ($sub = 0; $sub <= $#lines; $sub++) {
		$_ = $lines[$sub];
		if (/^\s*$/) {		# end of data set
			&data($set++);
			if ($grapheach) {
				&titles;
				if ($set == 4) {
					print ".sp -11i\n";
					print ".po 3.5i\n";
				} elsif ($set == 8) {
					print ".sp -11i\n";
					print ".po 6i\n";
				}
				if ($sub < $#lines) {
					&graph;
				}
			}
			next;
		}
		if (/^"?%fake/) {	# Skip this
			next;
		}
		if (/^"?%T\s+/) {	# Title specification
			# Spit out the last graph at next title.
			if ($br_title && $graphs++ > 0) {
				&titles;
				if ($graphs == 5) {
					print ".sp -11i\n";
					print ".po 3.5i\n";
				} elsif ($graphs == 9) {
					print ".sp -11i\n";
					print ".po 6i\n";
				}
				&graph;
			}
			s/^"?%T\s+//;
			chop;
			$Gtitle = $_;
			next;
		}
		if (/^"?%X\s+/) {	# X axis title specification
			s/^"?%X\s+//;
			chop;
			$Xtitle = $_;
			next;
		}
		if (/^"?%Y\s+/) {	# Y axis title specification
			s/^"?%Y\s+//;
			chop;
			$Ytitle = $_;
			next;
		}
		if (/^"/) {		# Data set title
			s/^"//;
			chop;
			$dataset = $_;
			push(@datasets, "$dataset");
			next;
		}
		push(@data, $_);
	}
	unless ($grapheach) {
		&data($set++);
		&titles;
	}
}

# Draw the titles and finish this graph.
sub titles
{
	# Do X/Y titles, if any.
	unless ($nolabels) {
		$Xtitle = defined($Xtitle) ? $Xtitle : "X";
		$Ytitle = defined($Ytitle) ? $Ytitle : "Y";
		if ($xk && $first_title) {
			$Xtitle = "$Xtitle * 1K";
		}
#		if ($xm && $first_title) {
#			$Xtitle = "$Xtitle * 1M";
#		}
		if ($logx && $first_title) {
			$Xtitle = "log2($Xtitle)";
		}
		if ($yk && $first_title) {
			$Ytitle = "$Ytitle * 1K";
		}
		if ($ym && $first_title) {
			$Ytitle = "$Ytitle * 1M";
		}
		if ($logy && $first_title) {
			$Ytitle = "log2 of ${Ytitle} ";
		}
		if ($rev && $first_title) {
			$tmp = $Xtitle;
			$Xtitle = $Ytitle;
			$Ytitle = $tmp;
		}
		print "\n# Xaxis title.\n";
		print "\"\\s+4$Xtitle\\s0\" rjust at O.se - (0, .6)\n";
	
		print "\n# Yaxis title ($Ytitle)\n.ps +2\n";
		$tmp = $Ytitle;
		while (length($tmp) > 0) {
			$tmp =~ s/(.)//;
	    		print "\"$1\" ";
		}
		print "\\\n    at O.w - (.75, 0)\n.ps\n";
	}

	# Do the graph title, if any.
	$Gtitle = defined($Gtitle) ? $Gtitle : "Pic Graph";
	if ($grapheach) {
		$Gtitle = $datasets[$#datasets];
		print "\n# Graph title.\n";
		print "\"\\s+2$Gtitle\\s0\" at O.n + (0, .1)\n";
	}

	if ($br_title) {
		print "\n# Graph title.\n";
		print "\"\\s+2$Gtitle\\s0\" at O.n + (0, .1)\n";
	}

	unless ($nolabels || $notitle) {
		print "\n# Graph title.\n";
		if ($big) {
			print "\"\\s+8$Gtitle\\s0\" at O.n + (0, .3)\n";
		} else {
			print "\"\\s+4$Gtitle\\s0\" at O.n + (0, .3)\n";
		}
	}

	# Do the dataset titles.
	unless ($nodatal) {
		print "\n# Title.\n";
		if (!$grapheach) {
			print ".ft R\n" if ($slide);
			for ($i = 0; $i <= $#datasets; $i++) {
				print $marks[$i % $nmarks];
				if ($below) {
					print " at O.sw - (0, .75 + $i * vs)\n";
				} else {
					print " at O.ne + (.25, - $i * vs)\n";
				}
				print 
			    "\"$datasets[$i]\" ljust at last [].e + (.1, 0)\n";
		    	}
			print ".ft\n" if ($slide);
		}
	}

	# Finish up.
	print "]\n.ft\n.ps\n.PE\n";

	$first_title = 0;
}

sub graph
{
	print ".sp .25i\n" unless $nospace;
	print <<EOF;
.PS
.ps $PS
.vs 11
.ft $ft
[
# Variables, tweak these.
	xtick = $xtick			# width of an X tick
	xlower = $xlower			# where the xtick start
	xupper = $xupper			# upper range of graph
	xn = $xn					# number of ticks to do
	ytick = $ytick			# width of an Y tick
	ylower = $ylower			# where the ytick start
	yupper = $yupper			# upper range of graph
	yn = $yn					# number of ticks to do
	xsize = $xsize				# width of the graph
	ysize = $ysize				# height of the graph
	yscale = ysize / (yupper - ylower)	# scale data to paper
	xscale = xsize / (xupper - xlower)	# scale data to paper
	tick = $tick				# distance towards numbers
	gthk = .1				# thickness of grid lines
	thk = $thk				# thickness of data lines
	qthk = 2.0				# thickness of quartile lines
	vs = .15				# works for 10 point fonts

# Draw the graph borders and tick marks
	O:	box $nobox thick 1.5 ht ysize wid xsize
	j = ylower
	t = tick * .5
	for i = 0 to yn by 1 do {
		ys = j - ylower
		g = ys * yscale
		line thick 1.5 from O.sw + (-tick, g) to O.sw + (0, g)
		$ygrid
		if (i < yn) then {
			y2 = (ys + (ytick / 2)) * yscale
			line thick .5 from O.sw + (-t, y2) to O.sw + (0, y2)
		}
		if (yupper - ylower > 999) then {
			$print rjust at O.sw + (-.2, g - .02)
		} else { if (yupper - ylower > 10) then {
			sprintf("%.0f", j) rjust at O.sw + (-.2, g - .02)
		} else { if (yupper - ylower > 1) then {
			sprintf("%.1f", j) rjust at O.sw + (-.2, g - .02)
		} else {
			sprintf("%.2f", j) rjust at O.sw + (-.2, g - .02)
		}}}
		j = j + ytick
	}
	j = xlower
	for i = 0 to xn by 1 do {
		xs = j - xlower
		g = xs * xscale
		line thick 1.5 from O.sw + (g, -tick) to O.sw + (g, 0)
		$xgrid
		if (i < xn) then {
			x2 = (xs + (xtick / 2)) * xscale
			line thick .5 from O.sw + (x2, 0) to O.sw + (x2, -t)
		}
		if (xupper - xlower > 999) then {
			$print at O.sw + (g, -.25)
		} else { if (xupper - xlower > 10) then {
			sprintf("%.0f", j) at O.sw + (g, -.25)
		} else { if (xupper - xlower > 1) then {
			sprintf("%.1f", j) at O.sw + (g, -.25)
		} else {
			sprintf("%.2f", j) at O.sw + (g, -.25)
		}}}
		j = j + xtick
	}
EOF
}

sub data
{
	local($mark) = int(int($_[0]) % int($nmarks));

	print "\n# DATASET: $dataset, MARK $mark\n";
	$first = 1;
	foreach $d (@data) {
		next if $d =~ /^\s*"/;
		next if $d =~ /^\s*#/;
		next if $d =~ /^\s*$/;
		split(/[ \t\n]+/, $d);
		$x = sprintf("%.6g", $_[0]);
		$y = sprintf("%.6g", $_[1]);
		if ($#_ == 1) {
			if ($nomarks && ($grapheach || !$first)) {
				print $nomark . " at O.sw + \\\n\t" . 
				    "(xscale * ($x - xlower), " . 
				    "yscale * ($y - ylower))\n";
			} else {
				print $marks[$mark] . " at O.sw + \\\n\t" . 
				    "(xscale * ($x - xlower), " . 
				    "yscale * ($y - ylower))\n";
			}
			if ($first != 1) {
				print "line thick thk from " .
				    "2nd last [].c to last [].c\n";
			}
			$first = 0;
		} elsif ($#_ == 5) {	# Quartile graph
			# Draw the lower line
			print "x = xscale * ($_[0] - xlower)\n";
			print "    line thick qthk from \\\n\t" .
			    "O.sw + x, yscale * ($_[1] - ylower) to\\\n\t" .
			    "O.sw + x, yscale * ($_[2] - ylower)\n";
			# Draw the mark
			print "    $marks[$mark]" . " at O.sw + \\\n\t" . 
			    "x, yscale * ($_[3] - ylower)\n";
			# Draw the upper line
			print "    line thick qthk from \\\n\t" .
			    "O.sw + x, yscale * ($_[4] - ylower) to\\\n\t" .
			    "O.sw + x, yscale * ($_[5] - ylower)\n";
			# Connect the lines?
			if ($qline) {
				if ($first != 1) {
					print "line thick thk from " .
					    "2nd last [].c to last [].c\n";
				}
			}
			$first = 0;
		}
	}
	# Put a mark on the end point
	if ($nomarks && !$nodatal && !$first && !$grapheach) {
		print $marks[$mark] . " at O.sw + \\\n\t" . 
		    "(xscale * ($x - xlower), " . 
		    "yscale * ($y - ylower))\n";
	}
	@data = ();
}
