#!/usr/bin/perl
# -*- perl -*-
#
# varnish_ - Munin plugin to for Varnish
# Copyright (C) 2009  Redpill Linpro AS
#
# Author: Kristian Lyngstøl <kristian@redpill-linpro.com>
# 
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
# 
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
# 
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.

=head1 NAME

varnish_ - Munin plugin to monitor various aspects of varnish

=head1 APPLICABLE SYSTEMS

Varnish 2.0 or newer with varnishstat

=head1 CONFIGURATION

The plugin needs to be able to execute varnishstat. 

The configuration section shows the defaults
  [varnish_*]
     env.varnishstat varnishstat
     env.name 

env.varnishstat can be a full path to varnishstat if it's
not in the path already. 

env.name is blank (undefined) by default and can be used to specify a -n
name argument to varnish if multiple instances are running on the same
server.

A few aspects are not linked by default. They are marked as 
'DEBUG' => 'yes' (or any other value). They are:

data_structures, vcl_and_purges, lru, objects_per_objhead,
obj_sendfile_vs_write, losthdr, esi, hcb, shm, shm_writes, overflow,
session, session_herd

You can link them yourself with something like this:

  ln -s /usr/local/libexec/munin/plugins/varnish_ \
    /etc/munin/plugins/varnish_data_structures

=head1 INTERPRETATION

Each graph uses data from varnishstat.

=head1 MAGIC MARKERS

 #%# family=auto
 #%# capabilities=autoconf suggest

=head1 VERSION

 $Id$

=head1 BUGS

The hit_rate graph requires munin r2040 or newer to display
correctly.

=head1 PATCHES-TO

The varnish-tools repository is the canonical upstream for this plugin.
Please send patches to Kristian Lyngstol <kristian@redpill-linpro.com>
and/or varnish-misc@projects.linpro.no for inclusion before adding to
munin trunk.

=head1 AUTHOR

Kristian Lyngstol <kristian@redpill-linpro.com>

=head1 LICENSE

GPLv2

=cut 


use strict;

# Set to 1 to enable output when a variable is defined in a graph but
# omitted because it doesn't exist in varnishstat.
my $DEBUG = 0;

# Set to 1 to ignore 'DEBUG' and suggest all available aspects.
my $FULL_SUGGEST = 0;

# Varnishstat executable. Include full path if it's not in your path.
my $varnishstatexec = exists $ENV{'varnishstat'} ? $ENV{'varnishstat'} : "varnishstat";

# For multiple instances
my $varnishname = exists $ENV{'name'} ? $ENV{'name'} : undef;

my %varnishstat = ();
my %varnishstatnames = ();
my $self; # Haha, myself, what a clever pun.

# Parameters that can be defined on top level of a graph. Config will print
# them as "graph_$foo $value\n"
my @graph_parameters = ('title','total','order','scale','vlabel','args');

# Parameters that can be defined on a value-to-value basis and will be
# blindly passed to config. Printed as "$fieldname.$param $value\n".
#
# 'label' is hardcoded as it defaults to a varnishstat-description if not
# set.
my @field_parameters = ('graph', 'min', 'max', 'draw', 'cdef', 'warning',
			'colour', 'info', 'type');

# Data structure that defines all possible graphs (aspects) and how they
# are to be plotted. Every top-level entry is a graph/aspect. Each top-level graph
# MUST have title set and 'values'. 
# 
# The 'values' hash must have at least one value definition. The actual
# value used is either fetched from varnishstat based on the value-name, or
# if 'rpn' is defined: calculated. 'type' SHOULD be set. 
#
# Graphs with 'DEBUG' set to anything is omitted from 'suggest'.
# 
# 'rpn' on values allows easy access to graphs consisting of multiple
# values from varnishstat. (Reverse polish notation). The RPN
# implementation only accepts +-*/ and varnishstat-values.
#
# With the exception of 'label', which is filled with the
# varnishstat-description if left undefined, any value left undefined will
# be left up to Munin to define/ignore/yell about.
#
# See munin documentation or rrdgraph/rrdtool for more information.
my %ASPECTS = ( 
	'request_rate' => {
		'title' => 'Request rates',
		'order' => 'cache_hit cache_hitpass cache_miss '
			 . 'backend_conn backend_unhealthy '
			 . 'client_req client_conn' ,
		'values' => {
			'client_conn' => { 
				'type' => 'DERIVE',
				'min' => '0',
				'colour' => '444444',
				'graph' => 'ON'
			},
			'client_req' => {
				'type' => 'DERIVE',
				'colour' => '111111',
				'min' => '0'
			},
			'cache_hit' => {
				'type' => 'DERIVE',
				'draw' => 'AREA',
				'colour' => '00FF00',
				'min' => '0'
			},
			'cache_hitpass' => {
				'info' => 'Hitpass are cached passes: An '
					. 'entry in the cache instructing '
					. 'Varnish to pass. Typically '
					. 'achieved after a pass in '
					. 'vcl_fetch.',
				'type' => 'DERIVE',
				'draw' => 'STACK',
				'colour' => 'FFFF00',
				'min' => '0'
			},
			'cache_miss' => {
				'type' => 'DERIVE',
				'colour' => 'FF0000',
				'draw' => 'STACK',
				'min' => '0'
			},
			'backend_conn' => {
				'type' => 'DERIVE',
				'colour' => '995599',
				'min' => '0'
			},
			'backend_unhealthy' => {
				'type' => 'DERIVE',
				'min' => '0',
				'colour' => 'FF55FF'
			},
			's_pipe' => {
				'type' => 'DERIVE',
				'min' => '0',
				'colour' => '1d2bdf'
			},
			's_pass' => {
				'type' => 'DERIVE',
				'min' => '0',
				'colour' => '785d0d'
			}
		}
	},
	'hit_rate' => {
		'title' => 'Hit rates',
		'order' => 'client_req cache_hit cache_miss '
			 . 'cache_hitpass' ,
		'vlabel' => '%',
		'args' => '-u 100 --rigid',
		'scale' => 'no',
		'values' => {
			'client_req' => {
				'type' => 'DERIVE',
				'min' => '0',
				'graph' => 'off'
			},
			'cache_hit' => {
				'type' => 'DERIVE',
				'min' => '0',
				'draw' => 'AREA',
				'cdef' => 'cache_hit,client_req,/,100,*'
			},
			'cache_miss' => {
				'type' => 'DERIVE',
				'draw' => 'STACK',
				'min' => '0',
				'cdef' => 'cache_miss,client_req,/,100,*'
			},
			'cache_hitpass' => {
				'type' => 'DERIVE',
				'draw' => 'STACK',
				'min' => '0',
				'cdef' => 'cache_hitpass,client_req,/,100,*'
			},
		}
	},
	'backend_traffic' => {
		'title' => 'Backend traffic',
		'values' => {
			'backend_conn' => {
				'type' => 'DERIVE',
				'min' => '0'
			},
			'backend_unhealthy' => {
				'type' => 'DERIVE',
				'min' => '0',
				'warning' => ':1'
			},
			'backend_busy' => {
				'type' => 'DERIVE',
				'min' => '0'
			},
			'backend_fail' => {
				'type' => 'DERIVE',
				'min' => '0'
			},
			'backend_reuse' => {
				'type' => 'DERIVE',
				'min' => 0
			},
			'backend_recycle' => {
				'type' => 'DERIVE',
				'min' => 0
			},
			'backend_unused' => {
				'type' => 'DERIVE',
				'min' => '0'
			},
			'backend_req' => {
				'type' => 'DERIVE',
				'min' => '0'
			}
		}
	},
	'objects' => {
		'title' => 'Number of objects',
		'values' => {
			'n_object' => {
				'type' => 'GAUGE',
				'label' => 'Number of objects'
			},
			'n_objecthead' => {
				'type' => 'GAUGE',
				'label' => 'Number of object heads',
				'info' => 'Each object head can have one '
					. 'or more ojbect attached, '
					. 'typically based on the Vary: header'
			}
		}
	},
	'transfer_rates' => {
		'title' => 'Transfer rates',
		'order' => 's_bodybytes s_hdrbytes',
		'args' => '-l 0',
		'vlabel' => 'bit/s',
		'values' => {
			's_hdrbytes' => {
				'type' => 'DERIVE',
				'label' => 'Header traffic',
				'draw' => 'STACK',
				'min' => '0',
				'info' => 'HTTP Header traffic. TCP/IP '
					. 'overhead is not included.',
				'cdef' => 's_hdrbytes,8,*'
			},
			's_bodybytes' => {
				'type' => 'DERIVE',
				'draw' => 'AREA',
				'label' => 'Body traffic',
				'min' => '0',
				'cdef' => 's_bodybytes,8,*'
			}
		}
	},
	'threads' => {
		'title' => 'Thread status',
		'values' => { 
			'n_wrk' => {
				'type' => 'GAUGE',
				'min' => '0',
				'warning' => '1:'
			},
			'n_wrk_create' => {
				'type' => 'DERIVE',
				'min' => '0'
			},
			'n_wrk_failed' => {
				'type' => 'DERIVE',
				'min' => '0',
				'warning' => ':1'
			},
			'n_wrk_max' => { 
				'type' => 'DERIVE',
				'min' => '0'
			},
			'n_wrk_overflow' => {
				'type' => 'DERIVE',
				'min' => '0'
			},
			'n_wrk_drop' => {
				'type' => 'DERIVE',
				'min' => '0',
				'warning' => ':1'
			}
		}
	},
	'memory_usage' => {
		'title' => 'Memory usage',
		'order' => 'sm_balloc sma_nbytes sms_nbytes',
		'total' => 'Total',
		'args' => '--base 1024',
		'values' => {
			'sm_balloc' => {
				'type' => 'GAUGE',
				'draw' => 'AREA'
			},
			'sma_nbytes' => {
				'type' => 'GAUGE',
				'draw' => 'STACK'
			},
			'sms_nbytes' => {
				'type' => 'GAUGE',
				'draw' => 'STACK'
			}
		}
	},
	'uptime' => {
		'title' => 'Varnish uptime',
		'vlabel' => 'days',
		'scale' => 'no',
		'values' => {
			'uptime' => {
				'type' => 'GAUGE',
				'cdef' => 'uptime,86400,/'
			}
		}
	},
	'objects_per_objhead' => {
		'title' => 'Objects per objecthead',
		'DEBUG' => 'yes',
		'values' => {
			'obj_per_objhead' => {
				'type' => 'GAUGE',
				'label' => 'Objects per object heads',
				'rpn' => [ 'n_object','n_objecthead','/' ]
			}
		}
	},
	'losthdr' => {
		'title' => 'HTTP Header overflows',
		'DEBUG' => 'yes',
		'values' => {
			'losthdr' => {
				'type' => 'DERIVE',
				'min' => '0'
			}
		}
	},
	'obj_sendfile_vs_write' => {
		'title' => 'Objects delivered with sendfile() versus '
			 . 'write()',
		'DEBUG' => 'yes',
		'values' => {
			'n_objsendfile' => {
				'type' => 'DERIVE',
				'min' => '0',
			},
			'n_objwrite' => {
				'type' => 'DERIVE',
				'min' => '0'
			}
		}
	},
	'hcb' => {
		'title' => 'Critbit data',
		'DEBUG' => 'yes',
		'values' => {
			'hcb_nolock' => {
				'type' => 'DERIVE',
				'min' => '0'
			},
			'hcb_lock' => {
				'type' => 'DERIVE',
				'min' => '0'
			},
			'hcb_insert' => {
				'type' => 'DERIVE',
				'min' => '0'
			}
		}
	},
	'esi' => {
		'title' => 'ESI',
		'DEBUG' => 'yes',
		'values' => {
			'esi_parse' => {
				'type' => 'DERIVE',
				'min' => '0'
			},
			'esi_errors' => {
				'type' => 'DERIVE',
				'min' => '0'
			}
		}
	},
	'objoverflow' => {
		'title' => 'Objects overflowing workspace',
		'DEBUG' => 'yes',
		'values' => {
			'n_objoverflow' => {
				'type' => 'DERIVE',
				'min' => '0'
			}
		}
	},
	'session' => {
		'title' => 'Sessions',
		'DEBUG' => 'yes',
		'values' => {
			'sess_closed' => {
				'type' => 'DERIVE',
				'min' => '0'
			},
			'sess_pipeline' => {
				'type' => 'DERIVE',
				'min' => '0'
			},
			'sess_readahead' => {
				'type' => 'DERIVE',
				'min' => '0'
			},
			'sess_linger' => {
				'type' => 'DERIVE',
				'min' => '0'
			}
		}
	},
	'session_herd' => {
		'title' => 'Session herd',
		'DEBUG' => 'yes',
		'values' => {
			'sess_herd' => {
				'type' => 'DERIVE',
				'min' => '0'
			}
		}
	},
	'shm_writes' => {
		'title' => 'SHM writes and records',
		'DEBUG' => 'yes',
		'values' => {
			'shm_records' => {
				'type' => 'DERIVE',
				'min' => '0'
			},
			'shm_writes' => {
				'type' => 'DERIVE',
				'min' => '0'
			}
		}
	},
	'shm' => {
		'title' => 'Shared memory activity',
		'DEBUG' => 'yes',
		'values' => {
			'shm_flushes' => {
				'type' => 'DERIVE',
				'min' => '0'
			},
			'shm_cont' => {
				'type' => 'DERIVE',
				'min' => '0'
			},
			'shm_cycles' => {
				'type' => 'DERIVE',
				'min' => '0'
			}
		}
	},
	'allocations' => {
		'title' => 'Memory allocation requests',
		'DEBUG' => 'yes',
		'values' => {
			'sm_nreq' => {
				'type' => 'DERIVE',
				'min' => '0'
			},
			'sma_nreq' => {
				'type' => 'DERIVE',
				'min' => '0'
			},
			'sms_nreq' => { 
				'type' => 'DERIVE',
				'min' => '0'
			}
		}
	},
	'vcl_and_purges' => {
		'title' => 'VCL and purges',
		'DEBUG' => 'yes',
		'values' => {
			'n_backend' => {
				'type' => 'GAUGE'
			},
			'n_vcl' => {
				'type' => 'DERIVE',
				'min' => '0'
			},
			'n_vcl_avail' => {
				'type' => 'DERIVE',
				'min' => '0'
			},
			'n_vcl_discard' => {
				'type' => 'DERIVE',
				'min' => '0'
			},
			'n_purge' => {
				'type' => 'GAUGE'
			},
			'n_purge_add' => {
				'type' => 'DERIVE',
				'min' => '0'
			},
			'n_purge_retire' => {
				'type' => 'DERIVE',
				'min' => '0'
			},
			'n_purge_obj_test' => {
				'type' => 'DERIVE',
				'min' => '0'
			},
			'n_purge_re_test' => {
				'type' => 'DERIVE',
				'min' => '0'
			},
			'n_purge_dups' => {
				'type' => 'DERIVE',
				'min' => '0'
			}
		}
	},
	'expunge' => {
		'title' => 'Object expunging',
		'values' => {
			'n_expired' => {
				'type' => 'DERIVE',
				'min' => '0'
			},
			'n_lru_nuked' => {
				'type' => 'DERIVE',
				'min' => '0'
			}
		}
	},
	'lru' => {
		'title' => 'LRU activity',
		'DEBUG' => 'yes',
		'values' => {
			'n_lru_saved' => {
				'type' => 'DERIVE',
				'min' => '0'
			},
			'n_lru_moved' => {
				'type' => 'DERIVE',
				'min' => '0'
			}
		}
	},
	'data_structures' => {
		'DEBUG' => 'YES',
		'title' => 'Data structure sizes',
		'values' => {
			'n_srcaddr' => {
				'type' => 'GAUGE'
			},
			'n_srcaddr_act' => {
				'type' => 'GAUGE'
			},
			'n_sess_mem' => {
				'type' => 'GAUGE'
			},
			'n_sess' => {
				'type' => 'GAUGE'
			},
			'n_smf' => {
				'type' => 'GAUGE'
			},
			'n_smf_frag' => {
				'type' => 'GAUGE'
			},
			'n_smf_large' => {
				'type' => 'GAUGE'
			},
			'n_vbe_conn' => {
				'type' => 'GAUGE'
			},
			'n_bereq' => {
				'type' => 'GAUGE'
			}
		}
	}
);

# Populate %varnishstat with values and %varnishstatnames with
# descriptions.
sub populate_stats
{
	my $arg = "-1";
	if ($varnishname) {
		$arg .= " -n $varnishname";
	} 
	foreach my $line (`$varnishstatexec $arg`) {
		chomp($line);
		if ($line =~ /^([^ ]*)\s+(\d+)\s+(\d*\.\d*)\s+(.*)$/) {
			$varnishstat{"$1"} = $2;
			$varnishstatnames{"$1"} = $4;
		}
	}
}

# Bail-function.
sub usage
{
	if (defined(@_) && "@_" ne "") {
		print STDERR "@_" . "\n\n";
	}
	print STDERR "Known arguments: suggest, config, autoconf.\n";
	print STDERR "Run with suggest to get a list of known aspects.\n";
	exit 1;
}

# Print 'yes' and exit true if it's reasonable to use this plugin.
# Otherwise exit with false and a human-readable reason.
sub autoconf
{
	if (`which $varnishstatexec 2>/dev/null` eq '') {
		print "no (which $varnishstatexec returns blank)\n";
		exit 0;
	}
	print "yes\n";
	exit 0;
}

# Suggest relevant aspects/values of $self. 
# 'DEBUG'-graphs are excluded.
sub suggest
{
	foreach my $key (keys %ASPECTS) {
		if (defined($ASPECTS{$key}{'DEBUG'}) && $FULL_SUGGEST != 1) {
			next;
		}
		print "$key\n";
	}
}

# Print the value of a two-dimensional hash if it exist.
# Returns false if non-existant.
#
# Output is formatted for plugins if arg4 is blank, otherwise arg4 is used
# as the title/name of the field (ie: arg4=graph_title).
sub print_if_exist
{
	my %values = %{$_[0]};
	my $value = $_[1];
	my $field = $_[2];
	my $title = "$value.$field";
	if (defined($_[3])) {
		$title = $_[3];
	}
	if (defined($values{$value}{$field})) {
		print "$title $values{$value}{$field}\n";
	} else {
		return 0;
	}
}

# Walk through the relevant aspect and print all top-level configuration
# values and value-definitions. 
sub get_config
{
	my $graph = $_[0];

	# Need to double-check since set_aspect only checks this if there
	# is no argument (suggest/autoconf doesn't require a valid aspect)
	if (!defined($ASPECTS{$graph})) {
		usage "No such aspect";
	}
	my %values = %{$ASPECTS{$graph}{'values'}};

	print "graph_category Varnish\n";
	foreach my $field (@graph_parameters) {
		print_if_exist(\%ASPECTS,$graph,$field,"graph_$field");
	}

	foreach my $value (keys %values) {
		# Need either RPN definition or a varnishstat value.
		if (!defined($varnishstat{$value}) &&
		    !defined($values{$value}{'rpn'})) {
			if ($DEBUG) {
				print "ERROR: $value not part of varnishstat.\n"
			}
			next;
		}
		
		if (!print_if_exist(\%values,$value,'label')) {
			print "$value.label $varnishstatnames{$value}\n";
		}
		foreach my $field (@field_parameters) {
			print_if_exist(\%values,$value,$field);
		}
	}
}

# Read and verify the aspect ($self). 
sub set_aspect
{
	$self = $0;
	$self =~ s/^.*\/varnish_//;
	if (!defined($ASPECTS{$self}) && @ARGV == 0) {
		usage "No such aspect";
	}
}

# Handle arguments (config, autoconf, suggest)
# Populate stats for config is necessary, but we want to avoid it for
# autoconf as it would generate a nasty error.
sub check_args
{
	if (@ARGV && $ARGV[0] eq '') {
		shift @ARGV;
	}
	if (@ARGV == 1) {
		if ($ARGV[0] eq "config") {
			populate_stats;  
			get_config($self);
			exit 0;
		} elsif ($ARGV[0] eq "autoconf") {
			autoconf($self);
			exit 0;
		} elsif ($ARGV[0] eq "suggest") {
			suggest;
			exit 0;
		}
		usage "Unknown argument";
	}
}

# Braindead RPN: +,-,/,* will pop two items from @stack, and perform
# the relevant operation on the items. If the item in the array isn't one
# of the 4 basic math operations, a value from varnishstat is pushed on to
# the stack. IE: 'client_req','client_conn','/' will leave the value of
# "client_req/client_conn" on the stack.
#
# If only one item is left on the stack, it is printed. Otherwise, an error
# message is printed.
sub rpn
{
	my @stack;
	my $left;
	my $right;
	foreach my $item (@{$_[0]}) {
		if ($item eq "+") {
			$right = pop(@stack);
			$left = pop(@stack);
			push(@stack,$left+$right);
		} elsif ($item eq "-") {
			$right = pop(@stack);
			$left = pop(@stack);
			push(@stack,$left-$right);
		} elsif ($item eq "/") {
			$right = pop(@stack);
			$left = pop(@stack);
			push(@stack,$left/$right);
		} elsif ($item eq "*") {
			$right = pop(@stack);
			$left = pop(@stack);
			push(@stack,$left*$right);
		} else {
			push(@stack,int($varnishstat{$item}));
		}
	}
	if (@stack > 1)
	{
		print STDERR "RPN error: Stack has more than one item left.\n";
		print STDERR "@stack\n";
		exit 255;
	}
	print "@stack";
	print "\n";
}

################################
# Execution starts here        #
################################

set_aspect;
check_args;
populate_stats; 

# We only get here if we're supposed to.

# Walks through the relevant values and either prints the varnishstat, or
# if the 'rpn' variable is set, calls rpn() to execute ... the rpn.
# 
# NOTE: Due to differences in varnish-versions, this checks if the value
# actually exist before using it.
foreach my $value (keys %{$ASPECTS{$self}{'values'}}) {
	if (defined($ASPECTS{$self}{'values'}{$value}{'rpn'})) {
		print "$value.value ";
		rpn($ASPECTS{$self}{'values'}{$value}{'rpn'});
	} else {
		if (!defined($varnishstat{$value})) {
			if ($DEBUG) {
				print STDERR "Error: $value not part of "
					   . "varnishstat.\n";
			}
			next;
		}
		print "$value.value ";
		print "$varnishstat{$value}\n";
	}
}
