#!/usr/bin/perl

# $Id: safetynet,v 1.15 2001/02/20 17:17:35 syntec Exp $

# Author  : Evan Borgstrom ($Author: syntec $) at unixpimps.org
# Created : 2000/10/13
# Purpose : Keep things running without complicated init scripts.
# Modified: $Date: 2001/02/20 17:17:35 $

##################################################################
# - safetynet -
# safetynet [--cron] [-h|--help]
# this is backwards compatible with the old file format, there is
# however a new config file format.
# see the README file for more details.
##################################################################

require 5.000;
use POSIX;
use Getopt::Long;
use Fcntl;
use FileHandle;

## config variable ###############################################
# edit if nessesary
$conf_file = "/etc/safetynet.conf";

# state dir. this is where to check for <procname> files to ignore
# for checks. see the readme file for more information.
$state_dir = "/var/state/safetynet";

# this should be updated if mail is not what you use to send a mail
# message.
$mail_prog = "/bin/mail";

# if you have any special options to pass to the mail command you
# may do so here
$mail_opts = "";

# who should the mail goto?
$mail_to = "root";

# mail subject
$mail_subject = "Safetynet Report: Dead process found.";

# if for instance you need to change how we list all running processes
# do so here
$pscmd = "ps -Af";

# you shouldn't need to edit anything below here.
##################################################################

# remove the $'s from the revision
$snv   = '$Revision: 1.15 $';
$snver = substr($snv,1,-2);

# the general help string
$helpstr = "
Safetynet ($snver) - system process checker
 safetynet [--cron [--nomail]] [--config <file>] [-h|--help]
 --cron   : will not output general diagnostic messages
            unless there is a dead process.
 --nomail : will not send mail when run in cron mode.

 --config : will use <file> as opposed to the file defined in
            the script.

";

##################################################################
# get the options
GetOptions("h!", "help!", "cron!", "nomail!", "config=s", "debug!") or die "GetOptions: $!";

$helpflag = 0;
$cronflag = 0;
$debug = 0;
$nomail = 0;

if (defined($opt_debug)) { $debug = 1; }
if (defined($opt_cron)) { $cronflag = 1; }
if (defined($opt_nomail)) { $nomail = 1; }
if (defined($opt_config)) { $conf_file = $opt_config; }
if (defined($opt_h) or defined($opt_help)) { die $helpstr; }

################################################################
# lets do this

$passed = 1;
$cron_buffer = "";

out("Safetynet: Process checker. ($snver)\n");
out("Reading config file...");
readconfig();
out(" done! Found $cjob jobs.\n\n");

if (-e "$state_dir/ALL") {
	out("Admin down by ALL. Exiting.\n\n");
	exit(1);
}

$status = 0;
for ($i = 1; $i <= $cjob; $i++) {
	out("Checking: $jobs[$i][0]: ");
	
	$jobstatus = checkproc($jobs[$i][1]);
	if ($jobstatus == 1) {
		# it's running
		out("\tRunning!\n");
	} elsif ($jobstatus == -1) {
		# it's admin down'd
		out("\tAdmin down.\n");
	} else {
		# it's dead, we need to restart it.
		out("\tDead! Trying to recover... ");

		# try and restart it...
		system($jobs[$i][2]);
		sleep(5);
		$jobstatus = checkproc($jobs[$i][1]);

		if ($jobstatus == 1) {
			out("Recovered! ");

			# do we need to do something?
			if (length($jobs[$i][4]) > 0) {
                        	out("[system($jobs[$i][4])]");
                        	system("$jobs[$i][4] 2>&1 > /dev/null");
			}
		} else {
			out("Failure ");

			# do we need to do something
			if (length($jobs[$i][3]) > 0) {
				out("[system($jobs[$i][3])]");
				system("$jobs[$i][3] 2>&1 > /dev/null");
			}
		}

		# set our failure flag
		$passed = 0;

		out("\n");
	}
}

out("\nCheck done.\n");

if (($passed == 0) and ($cronflag == 1)) {
	if (! $nomail) {
		$tmpfile = "/tmp/safetynet.$$";
		open(TMPFILE, ">$tmpfile") or die "Couldn't open tmp file for writing!\n";
		print TMPFILE "Return-Path: </dev/null>\nFrom: Safetynet <safety\@localhost>\nSubject: $mail_subject\n\n";
		print TMPFILE $cron_buffer;
		close(TMPFILE);

		$mailcmd = "cat $tmpfile | $mail_prog $mail_opts $mail_to";
		system("$mailcmd");
		unlink $tmpfile;
	}
}

## out(str) ####################################################
# send text out to STDOUT, or if the cron flag is set create
# a buffer and only display it if there was a dead proc.
sub out {
	if ($cronflag == 0) {
		print STDOUT $_[0];
	} else {
		$cron_buffer .= $_[0];
	}
}

## trim(str) ###################################################
# remove begining & trailing whitespace from a scalar
sub trim { for ($_[0]) { s/^\s+//; s/\s+$//; } }

## checkproc(procname) #########################################
# see if a process is running
sub checkproc {
	my $proc = $_[0];
	my $pflag = 0;

	$_ = $proc; s/[\W\d_]/\_/g;
	$pfile = $_;
	if (-e "$state_dir/$proc") { return -1 }

	open(PROC, "$pscmd | grep $proc |") or die "Can't check proc!: $!\npscmd ($pscmd) failed\n";
	local $_;
	while (<PROC>) { 
		# while it's pretty much in the bag that it's running
		# since we are here it never hurts to double check.
		if (index(lc($_),"grep") != -1) { next; }
		$pflag = 1;
	}
	close(PROC);

	return $pflag;
}

## readconfig() #######################################
# read our config file.
sub readconfig {
	open(CONFIG, "<$conf_file") or die "Couldn't read config file: $conf_file: $!\n";

	$workstr = "";
	$wstr = "";
	$jobs[0][0] = "";
	$cjob = 0;
	$getjob = 0;

	while (<CONFIG>) {
		# remove comments.
		($wstr,$dummy) = split(/#/); # like this one
		chop($wstr);
		trim($wstr);

		# skip empty lines.
		if (length($wstr) == 0) { next; }

		# find job defines.
		if (index($_,"job") == 0) {
                        $workstr = "";
                        $getjob = 1;
                }

		# if were trying to fill a job.
		if ($getjob == 1) {
			$workstr .= $wstr;

			if (index($wstr,"}") != -1) {
				$getjob = 0;

				# at this point the job define will all be on one line.
				# job <name> {<param>;<restart>;<failed>;<passed>}

				($jobstr,$jobparam) = split(/{/,$workstr);
                        	($dummy, $jobstr) = split(/ /,$jobstr);
                        	($jobparam,$dummy) = split(/}/,$jobparam);
                        	($jobps,$jobcmd,$failcmd,$passcmd) = split(/;/,$jobparam);

                        	trim($jobstr);
                        	trim($jobps);
                        	trim($jobcmd);
                        	trim($failcmd);
                        	trim($passcmd);

                        	if ($debug) { out("jobstr: $jobstr - ps: $jobps, cmd: $jobcmd, fail: $failcmd, pass: $passcmd\n"); }

                        	# fillup a job
                        	$cjob++;
                        	$jobs[$cjob][0] = $jobstr;
                        	$jobs[$cjob][1] = $jobps;
                       		$jobs[$cjob][2] = $jobcmd;
                        	$jobs[$cjob][3] = $failcmd;
                        	$jobs[$cjob][4] = $passcmd;
			}
			next;
		}
	}
}
