/*
 * nasd_dux_timeout_kernel.c
 *
 * NASD timeout mechanism for dux kernel.
 *
 * Author: Jim Zelenka
 */
/*
 * Copyright (c) of Carnegie Mellon University, 1998,1999.
 *
 * Permission to reproduce, use, and prepare derivative works of
 * this software for internal use is granted provided the copyright
 * and "No Warranty" statements are included with all reproductions
 * and derivative works. This software may also be redistributed
 * without charge provided that the copyright and "No Warranty"
 * statements are included in all redistributions.
 *
 * NO WARRANTY. THIS SOFTWARE IS FURNISHED ON AN "AS IS" BASIS.
 * CARNEGIE MELLON UNIVERSITY MAKES NO WARRANTIES OF ANY KIND, EITHER
 * EXPRESSED OR IMPLIED AS TO THE MATTER INCLUDING, BUT NOT LIMITED
 * TO: WARRANTY OF FITNESS FOR PURPOSE OR MERCHANTABILITY, EXCLUSIVITY
 * OF RESULTS OR RESULTS OBTAINED FROM USE OF THIS SOFTWARE. CARNEGIE
 * MELLON UNIVERSITY DOES NOT MAKE ANY WARRANTY OF ANY KIND WITH RESPECT
 * TO FREEDOM FROM PATENT, TRADEMARK, OR COPYRIGHT INFRINGEMENT.
 */


#include <nasd/nasd_options.h>
#include <nasd/nasd_types.h>
#include <nasd/nasd_freelist.h>
#include <nasd/nasd_mem.h>
#include <nasd/nasd_common.h>
#include <nasd/nasd_shutdown.h>
#include <nasd/nasd_timeout.h>
#include <sys/types.h>
#include <sys/time.h>

nasd_threadgroup_t nasd_timeout_group;
nasd_thread_t nasd_timeout_thread;

nasd_timeout_t nasd_timeouts_pending;

int nasd_timeout_suspended;
int nasd_timeout_running;
NASD_DECLARE_COND(nasd_timeout_suspend_cond)

nasd_freelist_t *nasd_timeout_freelist;
#define NASD_MAX_FREE_TIMEOUT 128
#define NASD_TIMEOUT_INC       16
#define NASD_TIMEOUT_INITIAL   32

#define nasd_timeout_free(_tm_) \
  NASD_FREELIST_FREE(nasd_timeout_freelist,_tm_,tm_next)

NASD_DECLARE_MUTEX(nasd_timeout_mutex)
#define LOCK_TM()   NASD_LOCK_MUTEX(nasd_timeout_mutex)
#define UNLOCK_TM() NASD_UNLOCK_MUTEX(nasd_timeout_mutex)

#define NASD_TM_DEQ(_tm_) { \
  (_tm_)->tm_next->tm_prev = (_tm_)->tm_prev; \
  (_tm_)->tm_prev->tm_next = (_tm_)->tm_next; \
  (_tm_)->tm_next = (_tm_)->tm_prev = NULL; \
}

#define NASD_TM_DEQ_B(_tm_) { \
  (_tm_)->tm_bnext->tm_bprev = (_tm_)->tm_bprev; \
  (_tm_)->tm_bprev->tm_bnext = (_tm_)->tm_bnext; \
  (_tm_)->tm_bnext = (_tm_)->tm_bprev = NULL; \
}

#define TM_CANCELLED(_tm_) ((_tm_)->tm_status&NASD_TIMEOUT_S_CANCELLED)
#define TM_RUNNING(_tm_) ((_tm_)->tm_status&NASD_TIMEOUT_S_RUNNING)
#define TM_PERIODIC(_tm_)  ((_tm_)->tm_status&NASD_TIMEOUT_S_PERIODIC)

nasd_uint64 nasd_timeout_handle_counter;
#define NASD_TIMEOUT_BUCKET_BITS         8
#define NASD_TIMEOUT_BUCKETS (1<<(NASD_TIMEOUT_BUCKET_BITS-1))
#define NASD_TIMEOUT_HANDLE_COUNTER_MAX ((nasd_uint64)(nasd_uint64cast(1)<<(63-NASD_TIMEOUT_BUCKET_BITS)))
int nasd_timeout_haswrapped;

nasd_timeout_t nasd_timeout_buckets[NASD_TIMEOUT_BUCKETS];
int nasd_timeout_nextbucket;

#define TM_THREAD_KICK() \
  thread_wakeup((vm_offset_t)&nasd_timeouts_pending);

extern int hz;

nasd_timespec_t nasd_timeout_waiting_for;

/*
 * Call with nasd_timeout_mutex held
 *
 * Returns if enqueued at head-of-queue
 *
 * There are two versions of the queueing code here.
 * One does the queueing "forward," one does it "backward."
 * Both are correct, and will produce identical results.
 * Which is more optimal depends on whether you expect
 * timeouts to tend to be inserted closer to the end
 * or the beginning of the list more often.
 */
int
nasd_timeout_enq_internal(
  nasd_timeout_t  *tm)
{
  nasd_timeout_t *ntm;

#if 0
  for(ntm=nasd_timeouts_pending.tm_next;
    ntm!=&nasd_timeouts_pending;
    ntm=ntm->tm_next)
  {
    if (NASD_TIMESPEC_GT(ntm->tm_time, tm->tm_time)) {
      /* enqueue before ntm */
      tm->tm_next = ntm;
      tm->tm_prev = ntm->tm_prev;
      tm->tm_prev->tm_next = tm;
      tm->tm_next->tm_prev = tm;
      if (tm->tm_prev == &nasd_timeouts_pending)
        return(1);
      else
        return(0);
    }
  }
  NASD_ASSERT((ntm->tm_next == &nasd_timeouts_pending)
    || (ntm == &nasd_timeouts_pending));
  tm->tm_next = &nasd_timeouts_pending;
  tm->tm_prev = nasd_timeouts_pending.tm_prev;
  tm->tm_prev->tm_next = tm;
  tm->tm_next->tm_prev = tm;
  NASD_ASSERT(NASD_TIMESPEC_GE(tm->tm_time, tm->tm_prev->tm_time));

  if (nasd_timeouts_pending.tm_next == tm)
    return(1);
  else
    return(0);
#else
  for(ntm=nasd_timeouts_pending.tm_prev;
    ntm!=&nasd_timeouts_pending;
    ntm=ntm->tm_prev)
  {
    if (NASD_TIMESPEC_LE(ntm->tm_time, tm->tm_time)) {
      /* enqueue after ntm */
      tm->tm_prev = ntm;
      tm->tm_next = ntm->tm_next;
      tm->tm_prev->tm_next = tm;
      tm->tm_next->tm_prev = tm;
      if (tm->tm_prev == &nasd_timeouts_pending)
        return(1);
      else
        return(0);
    }
  }
  NASD_ASSERT(ntm == &nasd_timeouts_pending);
  tm->tm_prev = &nasd_timeouts_pending;
  tm->tm_next = nasd_timeouts_pending.tm_next;
  tm->tm_prev->tm_next = tm;
  tm->tm_next->tm_prev = tm;
  return(1);
#endif
}

void
nasd_timeout_shutdown_freelist(
  void  *ignored)
{
  NASD_FREELIST_DESTROY(nasd_timeout_freelist,tm_next,(nasd_timeout_t *));
}

void
nasd_timeout_killgroup(
  void  *ignored)
{
  nasd_status_t rc;

  rc = nasd_destroy_threadgroup(&nasd_timeout_group);
  if (rc) {
    printf("WARNING: could not destroy timeout thread group rc=0x%x (%s)\n",
      rc, nasd_error_string(rc));
  }
}

void
nasd_timeout_kicker(
  caddr_t  arg)
{
  thread_wakeup((vm_offset_t)arg);
}

void
nasd_timeout_proc(
  void  *ignored)
{
  nasd_timespec_t ts, now, then;
  int ret, found, evt_asserted;
  nasd_timeout_proc_t proc;
  nasd_timeout_handle_t h;
  unsigned long us, whz;
  nasd_timeout_t *tm;
  void *arg1, *arg2;

  NASD_THREADGROUP_RUNNING(&nasd_timeout_group);
  evt_asserted = 0;

  nasd_timeout_waiting_for.ts_sec = 0;
  nasd_timeout_waiting_for.ts_nsec = 0;

  while(!NASD_THREADGROUP_SHUTDOWNP(&nasd_timeout_group)) {
    /*
     * Run expired timeouts, compute delay until next one
     */
    LOCK_TM();
    nasd_timeout_waiting_for.ts_sec = 0;
    nasd_timeout_waiting_for.ts_nsec = 0;

    if (NASD_THREADGROUP_SHUTDOWNP(&nasd_timeout_group)) {
      UNLOCK_TM();
      goto done;
    }
    while(nasd_timeout_suspended) {
      NASD_ASSERT(evt_asserted == 0);
      assert_wait((vm_offset_t)&nasd_timeouts_pending, TRUE);
      evt_asserted = 1;
      UNLOCK_TM();
      thread_block();
      evt_asserted = 0;
      LOCK_TM();
      if (NASD_THREADGROUP_SHUTDOWNP(&nasd_timeout_group)) {
        UNLOCK_TM();
        goto done;
      }
    }
    do {
      tm = nasd_timeouts_pending.tm_next;
      if (tm == &nasd_timeouts_pending) {
        break;
      }
      nasd_gettime(&now);
      if (NASD_TIMESPEC_GE(now, tm->tm_time)) {
        found = 1;
        /*
         * Run timeout
         */
        NASD_TM_DEQ(tm);
        proc = tm->tm_proc;
        h = tm->tm_handle;
        arg1 = tm->tm_arg1;
        arg2 = tm->tm_arg2;
        tm->tm_status |= NASD_TIMEOUT_S_RUNNING;
        nasd_timeout_running = 1;
        UNLOCK_TM();
        (*proc)(h, arg1, arg2);
        LOCK_TM();
        nasd_timeout_running = 0;
        if (nasd_timeout_suspended) {
          NASD_BROADCAST_COND(nasd_timeout_suspend_cond);
        }
        NASD_ASSERT(TM_RUNNING(tm));
        tm->tm_status &= ~NASD_TIMEOUT_S_RUNNING;
        if (TM_PERIODIC(tm) && (!TM_CANCELLED(tm))) {
          /* periodic and not cancelled, requeue */
          nasd_gettime(&tm->tm_time);
          NASD_TIMESPEC_ADD(tm->tm_time,tm->tm_interval);
          nasd_timeout_enq_internal(tm);
        }
        else {
          NASD_TM_DEQ_B(tm);
          nasd_timeout_free(tm);
        }
      }
      else {
        found = 0;
        then = tm->tm_time;
        nasd_timeout_waiting_for = tm->tm_time;
        NASD_TIMESPEC_SUB(then, now);
        us = then.ts_sec * 1000000;
        us += then.ts_nsec / 1000;
        whz = (hz * us) / 1000000;
        assert_wait((vm_offset_t)&nasd_timeouts_pending, TRUE);
        evt_asserted = 1;
        timeout(nasd_timeout_kicker, (caddr_t)&nasd_timeouts_pending, whz);
      }
    } while(found && (!NASD_THREADGROUP_SHUTDOWNP(&nasd_timeout_group))
        && (nasd_timeout_suspended == 0));

    /* wait for next timeout */
    if (evt_asserted == 0) {
      assert_wait((vm_offset_t)&nasd_timeouts_pending, TRUE);
      evt_asserted = 1;
    }
    UNLOCK_TM();
    thread_block();
    nasd_timeout_waiting_for.ts_sec = 0;
    nasd_timeout_waiting_for.ts_nsec = 0;
    evt_asserted = 0;
  }
done:
  NASD_THREADGROUP_DONE(&nasd_timeout_group);
  NASD_THREAD_KILL_SELF();
}

void
nasd_timeout_killthread(
  void  *ignored)
{
  nasd_timeout_t *tm, *next;
  int ret;
  char c;

  /* tell thread to stop running */
  NASD_THREADGROUP_INDICATE_SHUTDOWN(&nasd_timeout_group);
  TM_THREAD_KICK();

  /* wait for it to stop running */
  NASD_THREADGROUP_WAIT_STOP(&nasd_timeout_group);

  /* clean up pending timeout queue */
  for(tm=nasd_timeouts_pending.tm_next;tm!=&nasd_timeouts_pending;tm=next) {
    next = tm->tm_next;
    NASD_TM_DEQ(tm);
    NASD_TM_DEQ_B(tm);
    nasd_timeout_free(tm);
  }
}

nasd_status_t
nasd_sys_timeout_init(
  nasd_shutdown_list_t  *sl)
{
  nasd_status_t rc;
  int ret, i;

  nasd_timeout_suspended = 0;
  nasd_timeout_running = 0;
  nasd_timeout_handle_counter = 1;
  nasd_timeout_haswrapped = 0;
  nasd_timeout_nextbucket = 0;

  bzero((char *)&nasd_timeouts_pending, sizeof(nasd_timeouts_pending));
  nasd_timeouts_pending.tm_prev = &nasd_timeouts_pending;
  nasd_timeouts_pending.tm_next = &nasd_timeouts_pending;

  for(i=0;i<NASD_TIMEOUT_BUCKETS;i++) {
    bzero((char *)&nasd_timeout_buckets[i], sizeof(nasd_timeout_buckets[i]));
    nasd_timeout_buckets[i].tm_bnext = &nasd_timeout_buckets[i];
    nasd_timeout_buckets[i].tm_bprev = &nasd_timeout_buckets[i];
  }

  rc = nasd_cond_init(&nasd_timeout_suspend_cond);
  if (rc)
    return(rc);
  rc = nasd_shutdown_cond(sl, &nasd_timeout_suspend_cond);
  if (rc) {
    return(rc);
  }

  NASD_FREELIST_CREATE(nasd_timeout_freelist, NASD_MAX_FREE_TIMEOUT,
    NASD_TIMEOUT_INC, sizeof(nasd_timeout_t));
  if (nasd_timeout_freelist == NULL)
    return(NASD_NO_MEM);
  NASD_FREELIST_PRIME(nasd_timeout_freelist, NASD_MAX_FREE_TIMEOUT,tm_next,
    (nasd_timeout_t *));
  rc = nasd_shutdown_proc(sl, nasd_timeout_shutdown_freelist, NULL);
  if (rc) {
    nasd_timeout_shutdown_freelist(NULL);
    return(rc);
  }

  rc = nasd_mutex_init(&nasd_timeout_mutex);
  if (rc)
    return(rc);
  rc = nasd_shutdown_mutex(sl, &nasd_timeout_mutex);
  if (rc) {
    return(rc);
  }

  rc = nasd_init_threadgroup(&nasd_timeout_group);
  if (rc)
    return(rc);
  rc = nasd_shutdown_proc(sl, nasd_timeout_killgroup, NULL);
  if (rc) {
    nasd_timeout_killgroup(NULL);
    return(rc);
  }

  rc = nasd_thread_create(&nasd_timeout_thread, nasd_timeout_proc, NULL);
  if (rc)
    return(rc);
  NASD_THREADGROUP_STARTED(&nasd_timeout_group);
  NASD_THREADGROUP_WAIT_START(&nasd_timeout_group);
  rc = nasd_shutdown_proc(sl, nasd_timeout_killthread, NULL);
  if (rc) {
    nasd_timeout_killthread(NULL);
    return(rc);
  }

  return(NASD_SUCCESS);
}

nasd_status_t
nasd_timeout_add(
  nasd_timeout_handle_t  *tmhp,
  nasd_timeout_proc_t     proc,
  void                   *arg1,
  void                   *arg2,
  nasd_timespec_t         first,
  nasd_timespec_t         interval,
  nasd_timeout_flags_t    flags)
{
  nasd_timeout_t *tm, *bucket, *tmb;
  nasd_timeout_handle_t hr, h;
  int ret, nb, dup, dokick;
  nasd_timespec_t now;
  char c;

  nasd_gettime(&now);

  if (!NASD_TIMESPEC_VALID(first))
    return(NASD_BAD_TIMESPEC);
  if ((flags&NASD_TIMEOUT_F_PERIODIC) && (!NASD_TIMESPEC_VALID_NZ(interval)))
    return(NASD_BAD_TIMESPEC);
  NASD_FREELIST_GET(nasd_timeout_freelist,tm,tm_next,(nasd_timeout_t *));
  if (tm == NULL)
    return(NASD_NO_MEM);

  tm->tm_status = NASD_TIMEOUT_S_KNOWN;

  tm->tm_time = first;

  if (!(flags&NASD_TIMEOUT_F_ABSOLUTE)) {
    NASD_TIMESPEC_ADD(tm->tm_time, now);
  }

  if (flags&NASD_TIMEOUT_F_PERIODIC) {
    tm->tm_status |= NASD_TIMEOUT_S_PERIODIC;
    tm->tm_interval = interval;
  }
  else {
    tm->tm_interval.ts_sec = 0;
    tm->tm_interval.ts_nsec = 0;
  }

  LOCK_TM();

  nb = nasd_timeout_nextbucket;
  nasd_timeout_nextbucket++;
  if (nasd_timeout_nextbucket >= NASD_TIMEOUT_BUCKETS)
    nasd_timeout_nextbucket = 0;

  /*
   * If we discover that the number of timeouts is insane,
   * or the hash table is doing poorly, we could load-balance
   * here by picking nb to be an emptyish bucket.
   */

  hr = nasd_timeout_handle_counter;
  h = hr<<NASD_TIMEOUT_BUCKET_BITS;
  h |= (nasd_uint64)nb;
  ret = 0;

  bucket = &nasd_timeout_buckets[nb];

  if (nasd_timeout_haswrapped) {
    /* check for duplication of id, attempt avoidance */
    do {
      dup = 0;
      for(tmb=bucket->tm_bnext;tmb!=bucket;tmb=tmb->tm_bnext) {
        if (tmb->tm_handle == h) {
          /* duplicate! try a different h */
          dup = 1;
          if (ret == 0) {
            ret = 1;
            hr = 1;
          }
          else {
            hr++;
            if (hr == 0) {
              /*
               * I seriously doubt we ever get here in some valid
               * manner. That means we have 281474976710656 timeouts
               * in this hash table bucket alone. Shyeah. That's why
               * I don't look at another bucket.
               */
              nasd_timeout_free(tm);
              UNLOCK_TM();
              return(NASD_NO_MEM);
            }
          }
          h = hr<<NASD_TIMEOUT_BUCKET_BITS;
          h |= (nasd_uint64)nb;
        }
      }
    } while(dup);
  }

  nasd_timeout_handle_counter++;
  if (nasd_timeout_handle_counter >= NASD_TIMEOUT_HANDLE_COUNTER_MAX) {
    nasd_timeout_handle_counter = 1;
    nasd_timeout_haswrapped = 1;
  }

  tm->tm_handle = h;

  tm->tm_proc = proc;
  tm->tm_arg1 = arg1;
  tm->tm_arg2 = arg2;

  tm->tm_bprev = bucket;
  tm->tm_bnext = bucket->tm_bnext;
  tm->tm_bprev->tm_bnext = tm;
  tm->tm_bnext->tm_bprev = tm;

  dokick = nasd_timeout_enq_internal(tm);

  if ((NASD_TIMESPEC_GE(nasd_timeouts_pending.tm_next->tm_time,
    nasd_timeout_waiting_for) && (nasd_timeout_waiting_for.ts_sec))
    || nasd_timeout_running)
  {
    dokick = 0;
  }

  UNLOCK_TM();

  if (dokick)
    TM_THREAD_KICK();

  *tmhp = h;

  return(NASD_SUCCESS);
}

/*
 * Call with nasd_timeout_mutex held
 */
nasd_timeout_t *
nasd_timeout_handle_to_tm(
  nasd_timeout_handle_t  tmh)
{
  nasd_timeout_t *tm, *bucket;
  int nb;

  if (tmh == NASD_TIMEOUT_HANDLE_NULL)
    return(NULL);

  nb = tmh&(NASD_TIMEOUT_BUCKETS-1);

  bucket = &nasd_timeout_buckets[nb];
  for(tm=bucket->tm_bnext;tm!=bucket;tm=tm->tm_bnext) {
    if (tm->tm_handle == tmh) {
      return(tm);
    }
  }

  return(NULL);
}

nasd_status_t
nasd_timeout_cancel(
  nasd_timeout_handle_t  tmh)
{
  nasd_timeout_t *tm;

  LOCK_TM();
  tm = nasd_timeout_handle_to_tm(tmh);
  if (tm) {
    if (TM_RUNNING(tm)) {
      tm->tm_status |= NASD_TIMEOUT_S_CANCELLED;
    }
    else {
      /* dequeue and deallocate */
      NASD_TM_DEQ(tm);
      NASD_TM_DEQ_B(tm);
      nasd_timeout_free(tm);
    }
  }
  UNLOCK_TM();
  return(NASD_SUCCESS);
}

nasd_status_t
nasd_timeout_get_status(
  nasd_timeout_handle_t   tmh,
  nasd_timeout_status_t  *tsp)
{
  nasd_timeout_t *tm;

  LOCK_TM();
  tm = nasd_timeout_handle_to_tm(tmh);
  if (tm == NULL) {
    *tsp = 0;
  }
  else {
    *tsp = tm->tm_status;
  }
  UNLOCK_TM();

  return(NASD_SUCCESS);
}

/*
 * This is really for the delay mechanism, and not
 * properly part of the timeout mechanism, but
 * this is as good a place as any to put it.
 */
void
nasd_dux_delaycnt_kicker(
  caddr_t  arg)
{
  thread_wakeup((vm_offset_t)arg);
}

void
nasd_timeout_suspend()
{
  LOCK_TM();
  nasd_timeout_suspended++;
  while(nasd_timeout_running) {
    NASD_WAIT_COND(nasd_timeout_suspend_cond,nasd_timeout_mutex);
  }
  UNLOCK_TM();
}

void
nasd_timeout_resume()
{
  LOCK_TM();
  nasd_timeout_suspended--;
  if (nasd_timeout_suspended == 0) {
    TM_THREAD_KICK();
  }
  UNLOCK_TM();
}

/* Local Variables:  */
/* indent-tabs-mode: nil */
/* tab-width: 2 */
/* End: */
