/*
 * babysitter.c
 *
 * Monitor the functioning of a collection of NASD drives
 *
 * Author: David Rochberg 
 * 
 */
/*
 * Copyright (c) of Carnegie Mellon University, 1996,1997,1998,1999.
 *
 * Permission to reproduce, use, and prepare derivative works of
 * this software for internal use is granted provided the copyright
 * and "No Warranty" statements are included with all reproductions
 * and derivative works. This software may also be redistributed
 * without charge provided that the copyright and "No Warranty"
 * statements are included in all redistributions.
 *
 * NO WARRANTY. THIS SOFTWARE IS FURNISHED ON AN "AS IS" BASIS.
 * CARNEGIE MELLON UNIVERSITY MAKES NO WARRANTIES OF ANY KIND, EITHER
 * EXPRESSED OR IMPLIED AS TO THE MATTER INCLUDING, BUT NOT LIMITED
 * TO: WARRANTY OF FITNESS FOR PURPOSE OR MERCHANTABILITY, EXCLUSIVITY
 * OF RESULTS OR RESULTS OBTAINED FROM USE OF THIS SOFTWARE. CARNEGIE
 * MELLON UNIVERSITY DOES NOT MAKE ANY WARRANTY OF ANY KIND WITH RESPECT
 * TO FREEDOM FROM PATENT, TRADEMARK, OR COPYRIGHT INFRINGEMENT.
 */
#include <nasd/nasd_options.h>
#include <stdio.h>
#include <stdlib.h>
#include <errno.h>
#include <nasd/nasd_mem.h>
#include <nasd/nasd_getopt.h>
#include <nasd/nasd_pdrive.h>
#include <nasd/nasd_pdrive_client.h>
#include <nasd/nasd_pdrive_client_kpdev.h>
#include <nasd/nasd_timer.h>
#include <nasd/nasd_threadstuff.h>
#include <nasd/nasd_varargs.h>


#define ASSERT_NASD_OK(_rc_,_string_) {if (_rc_) printf("Fatal NASD error %s(%d) in %s (%s:%d)\n",\
	nasd_error_string((_rc_)),(_rc_),(_string_),__FILE__,__LINE__); }
#define ERR_BUF 1000

typedef struct drive_s {
  int			in_progress; /* under sitter_mutex */
  nasd_drive_handle_t	h;
  char			*name;
  char			status[ERR_BUF];
  int			binding_type;
  time_t		check_again;
} drive_t;

typedef struct sitter_args_s {
  nasd_thread_t		thread;
  time_t		last_called;
} sitter_args_t;

/* sometimes I want to search for where global variables are defined */
char	       *binding_port = NASD_PDRIVE_PORT;
int		quit=0;
int		poll_rate=1;
int		sitter_current; /* under sitter_mutex */
time_t		last_start=0;
int		total_drives = 0;
int		nthreads=2;
drive_t	       *drives;
sitter_args_t  *threads;
NASD_DECLARE_MUTEX(sitter_mutex);
NASD_DECLARE_MUTEX(output_lock);

NASD_DECLARE_COND(start_check_cond);
NASD_DECLARE_MUTEX(start_check_mutex);

NASD_DECLARE_MUTEX(bogon_mutex);


void
emit(drive_t *d, int suppress_repeat, const char * class,const char *fmt, ...)
{
  nasd_valist_t ap;
  char buffer[ERR_BUF];
  char *p;
  int  i;
  snprintf(buffer,ERR_BUF,"%s: %s: %n",d->name,class,&i);
  /* add timestamp */
  p = buffer + i;
  
  NASD_VARARG_START(ap, fmt);
  vsnprintf(p, ERR_BUF-i, fmt, ap);
  NASD_VARARG_END(ap);
  NASD_LOCK_MUTEX(output_lock);
  fputs(buffer,stdout);
  fputc('\n',stdout);
  fflush(stdout);
  NASD_UNLOCK_MUTEX(output_lock);
}



void create_binding (drive_t *d)
{
  nasd_status_t                 rc;

  rc = nasd_bind_to_drive(d->name, binding_port, NASD_BIND_DEFAULT, NULL, 0, &d->h);
  if (rc) {
    d->h = NULL;
    emit(d,1,"fail","binding failure: %s",nasd_error_string(rc));
    return;
  }
}

void lock_drive(drive_t *d) 
{
  /* NASD_ASSERT_LOCKED_CURRENT(sitter_mutex); not supported currently  */
  NASD_ASSERT(0==d->in_progress);
  d->in_progress = 1;
}

void unlock_drive(drive_t *d)
{
  NASD_ASSERT(d->in_progress);
  d->in_progress = 0;
}

/* We have a set of threads so that we can continue if one gets hung
   up talking to a stuck drive */

void sitter_thread(nasd_threadarg_t  cast_me)
{
  int                   target,i;
  time_t		now;
  sitter_args_t         *me = (sitter_args_t *) cast_me;
  drive_t		*d,*tmp;
  nasd_status_t		nasd_status;
  nasd_rpc_status_t	rpc_status;
  nasd_error_string_t	err;
  
  while (1) {    
  top:
    if (quit) NASD_THREAD_KILL_SELF();
    now = time(NULL);
    d = NULL;
    
    NASD_LOCK_MUTEX(sitter_mutex);
    for (i=0;i<total_drives;i++) {
      target = sitter_current++ % total_drives ;
      tmp = &drives[target];
      if ((tmp->check_again< now)&&
	  (!tmp->in_progress)) {
	d = tmp;
	break;
      }
    }
    
    if (!d) {
      /* no eligible drives.  sleep. */
      NASD_UNLOCK_MUTEX(sitter_mutex);
      NASD_LOCK_MUTEX(start_check_mutex);
      NASD_WAIT_COND(start_check_cond,start_check_mutex);
      NASD_UNLOCK_MUTEX(start_check_mutex);
      goto top;
    } 
    
    lock_drive(d);
    NASD_UNLOCK_MUTEX(sitter_mutex);
    d->check_again = now + poll_rate;
    if (!d->h) {
      create_binding(d);
    }
    if (!d->h) {
      unlock_drive(d);
      goto top;
    }
    nasd_cl_p_null_dr(d->h,&nasd_status,&rpc_status);
    if (rpc_status) {
      emit(d,1,"fail","RPC error from drive_null: %s(%d)",
	   nasd_cl_error_string(d->h,rpc_status,err),rpc_status);
      nasd_unbind_drive(&d->h);
    } else if (nasd_status) {
      emit(d,1,"fail","NASD error from drive_null: %s(%d)",
	   nasd_error_string(nasd_status), nasd_status);
    } else {
      emit(d,1,"normal","is alive");
    }
    unlock_drive(d);
    d = NULL;
  }
  NASD_THREAD_KILL_SELF();
}



int main (int argc,char **argv) 
{
  nasd_status_t                 rc;
  int				i;
  drive_t			*d;
  sitter_args_t			*t;

  rc = nasd_cl_p_init();
  ASSERT_NASD_OK(rc,"client library initialization");
  nasd_mutex_init(&sitter_mutex);
  nasd_mutex_init(&output_lock);
  nasd_cond_init(&start_check_cond);
  nasd_mutex_init(&start_check_mutex);

  nasd_mutex_init(&bogon_mutex);
  
  NASD_Malloc(drives,argc * sizeof(drive_t),(drive_t *));
  NASD_ASSERT(drives);
  
  NASD_Malloc(threads,nthreads*sizeof(sitter_args_t),(sitter_args_t *));
  NASD_ASSERT(threads);
  
  d = drives;
  argv++;argc--;
  while (argc--) {
    d->in_progress = 0;
    d->name = strdup(*argv++);
    d->check_again = 0;
    memset(d->status,0,sizeof(d->status));
    total_drives++;
    d++;
  }
  
  t=threads;
  for (i=0;i<nthreads;i++,t++) {
    t->last_called = 0;
    rc = nasd_thread_create(&t->thread,sitter_thread,t);
    NASD_ASSERT(!rc);
  }
  while(1) {
    NASD_BROADCAST_COND(start_check_cond);
    sleep(poll_rate);
  }
  exit(0);
}

