/*
 * nasd_layout_reg.c
 *
 * Region-based layout for NASD embedded filesystem.
 *
 * Author: Jim Zelenka
 */
/*
 * Copyright (c) of Carnegie Mellon University, 1998,1999.
 *
 * Permission to reproduce, use, and prepare derivative works of
 * this software for internal use is granted provided the copyright
 * and "No Warranty" statements are included with all reproductions
 * and derivative works. This software may also be redistributed
 * without charge provided that the copyright and "No Warranty"
 * statements are included in all redistributions.
 *
 * NO WARRANTY. THIS SOFTWARE IS FURNISHED ON AN "AS IS" BASIS.
 * CARNEGIE MELLON UNIVERSITY MAKES NO WARRANTIES OF ANY KIND, EITHER
 * EXPRESSED OR IMPLIED AS TO THE MATTER INCLUDING, BUT NOT LIMITED
 * TO: WARRANTY OF FITNESS FOR PURPOSE OR MERCHANTABILITY, EXCLUSIVITY
 * OF RESULTS OR RESULTS OBTAINED FROM USE OF THIS SOFTWARE. CARNEGIE
 * MELLON UNIVERSITY DOES NOT MAKE ANY WARRANTY OF ANY KIND WITH RESPECT
 * TO FREEDOM FROM PATENT, TRADEMARK, OR COPYRIGHT INFRINGEMENT.
 */


#include <nasd/nasd_options.h>
#include <nasd/nasd_drive_options.h>
#include <nasd/nasd_general.h>
#include <nasd/nasd_types.h>
#include <nasd/nasd_freelist.h>
#include <nasd/nasd_itypes.h>
#include <nasd/nasd_mem.h>
#include <nasd/nasd_cache.h>
#include <nasd/nasd_common.h>
#include <nasd/nasd_timer.h>
#include <nasd/nasd_security.h>
#include <nasd/nasd_sys.h>
#include <nasd/nasd_layout.h>
#include <nasd/nasd_timeout.h>

#if NASD_DRIVE_LAYOUT_REGION_INCLUDE > 0

#define NASD_NL_REG_MAP_LOCK_DEBUG     0
#define NASD_NL_REG_INCLUDE_NPT        0
#define NASD_NL_REG_TIME_COBLKS_HELPER 0

/*
 * We divide the disk up into regions. Until we get
 * full, only related objects go into regions. We
 * store a "region map" which includes information
 * about how we've filled regions, and which regions
 * are related.
 *
 * We don't do anything with preallocation, yet.
 */

typedef struct nasd_nl_reg_map_s     nasd_nl_reg_map_t;
typedef struct nasd_nl_reg_region_s  nasd_nl_reg_region_t;
typedef nasd_blkno_t                 nasd_nl_reg_id_t;

struct nasd_nl_reg_region_s {
  nasd_nl_reg_id_t  reg_id;         /* root identity */
  nasd_int8         reg_freeblocks; /* free blocks */
  nasd_int8         reg_iblks;      /* node blocks */
  nasd_int8         reg_dblks;      /* data (or indirect) blocks */
  nasd_int8         reg_unrelated;  /* blocks unrelated to region id */
};

#define NASD_NL_REG_MAP_NBUCKETS 109
#define NASD_NL_REG_MAP_BLOB       8

#define NASD_NL_REG_MAP_HASH(_regid_) ((_regid_)%NASD_NL_REG_MAP_NBUCKETS)

#define NASD_NL_REG_MARK_NODE 1
#define NASD_NL_REG_MARK_DATA 2
#define NASD_NL_REG_MARK_UREL 3

/*
 * This structure is what's found in the buckets of the
 * nasd_nl_reg_map_buckets hashtable. Each bucket is a linked list.
 * If you want to know what regions are associated with region foo,
 * hash foo into nasd_nl_reg_map_buckets using NASD_NL_REG_MAP_HASH().
 * Scan the list sequentially, looking for entries with the base
 * value set to foo (there may be more than one). Scan the ents
 * array of each such entry. Each entry in the ents array with a
 * valid reg_id is an associated region.
 */
struct nasd_nl_reg_map_s {
  nasd_nl_reg_id_t       base;
  nasd_nl_reg_id_t       ent_ids[NASD_NL_REG_MAP_BLOB];
  nasd_nl_reg_map_t     *next;
};

nasd_nl_reg_map_t *nasd_nl_reg_map_buckets[NASD_NL_REG_MAP_NBUCKETS];

nasd_freelist_t *nasd_nl_reg_map_freelist;
#define NASD_NL_REG_MAX_FREE_MAP 24576
#define NASD_NL_REG_MAP_INC         64
#define NASD_NL_REG_MAP_INITIAL  21504

NASD_DECLARE_MUTEX(nasd_nl_reg_map_lock)
#if NASD_NL_REG_MAP_LOCK_DEBUG > 0
char *nasd_nl_reg_map_lock_file = NULL;
int   nasd_nl_reg_map_lock_line = 0;
#define LOCK_REG_MAP() { \
  NASD_LOCK_MUTEX(nasd_nl_reg_map_lock); \
  nasd_nl_reg_map_lock_file = __FILE__; \
  nasd_nl_reg_map_lock_line = __LINE__; \
}
#define UNLOCK_REG_MAP() { \
  nasd_nl_reg_map_lock_file = NULL; \
  nasd_nl_reg_map_lock_line = 0; \
  NASD_UNLOCK_MUTEX(nasd_nl_reg_map_lock); \
}
#else /* NASD_NL_REG_MAP_LOCK_DEBUG > 0 */
#define LOCK_REG_MAP()   NASD_LOCK_MUTEX(nasd_nl_reg_map_lock)
#define UNLOCK_REG_MAP() NASD_UNLOCK_MUTEX(nasd_nl_reg_map_lock)
#endif /* NASD_NL_REG_MAP_LOCK_DEBUG > 0 */

nasd_odc_exlist_t nasd_nl_reg_unused_regions;

nasd_uint64 nasd_nl_reg_added_unrelated;

#if NASD_NL_REG_SCOREBOARD > 0

int nasd_nl_reg_scoreboard_thresh = 10;

/*
 * We hash chunks of regions when scoreboarding; this is how
 * many regions go together
 */
#define SCOREBOARD_CHUNK_SHIFT 5
#define SCOREBOARD_CHUNK       (1<<SCOREBOARD_CHUNK_SHIFT)

#define SCOREBOARD_CHUNK_BASE(_regid_) ((_regid_)&~(SCOREBOARD_CHUNK-1))

#define SCOREBOARD_HASH_WIDTH 16
#define SCOREBOARD_HASH(_regid_) (((_regid_)>>SCOREBOARD_CHUNK_SHIFT)&0xf)

nasd_freelist_t *nasd_nl_reg_chash_freelist;
#define NASD_NL_REG_MAX_FREE_CHASH 1536
#define NASD_NL_REG_CHASH_INC        64
#define NASD_NL_REG_CHASH_INITIAL  1536

int nasd_nl_reg_scoreboard_active = 0;

typedef struct nasd_nl_reg_chash_ent_s  nasd_nl_reg_chash_ent_t;
typedef struct nasd_nl_reg_info_s       nasd_nl_reg_info_t;

nasd_timespec_t nasd_nl_reg_scoreboard_interval = { 5, 0 };

struct nasd_nl_reg_chash_ent_s {
  nasd_nl_reg_id_t          baseid;
  nasd_uint64               cnt[SCOREBOARD_CHUNK];
  nasd_nl_reg_chash_ent_t  *c_next;
  nasd_nl_reg_chash_ent_t  *c_prev;
};

/*
 * The counters represent follow-up accesses.
 * That is, if r0->counters[r1] is set, that means
 * that r1 was accessed within nasd_nl_reg_scoreboard_interval
 * _AFTER_ r0 was accessed. This counters "array" is a
 * logical construct, represented sparsely by the chash
 * mechanism.
 */
struct nasd_nl_reg_info_s {
  nasd_uint64               total;     /* total accesses */
  nasd_nl_reg_chash_ent_t  *chash[SCOREBOARD_HASH_WIDTH];
  nasd_uint64               noaccess;  /* no subsequent access before timeout */
  nasd_uint64               accessed;  /* accessed this time around */
  nasd_timeout_handle_t     tm_handle; /* handle for removing from queue */
  nasd_nl_reg_info_t       *t_next;    /* link for queue access-after queue */
  nasd_nl_reg_info_t       *t_prev;    /* link for queue access-after queue */
  nasd_nl_reg_info_t       *s_next;    /* link for sorting */
  nasd_nl_reg_info_t       *s_prev;    /* link for sorting */
};

/*
 * We break nasd_nl_reg_infos into multiple chunks to avoid trying to
 * allocate too large a contiguous chunk of memory and thus causing
 * stress for certain kernels' address mapping.
 */
#define NASD_NL_REG_INFOS_CHUNKS 17
int nasd_nl_reg_info_chunksize;
int nasd_nl_reg_info_last_chunksize;
nasd_nl_reg_info_t *nasd_nl_reg_infos[NASD_NL_REG_INFOS_CHUNKS];
nasd_nl_reg_info_t nasd_nl_reg_current_queue;

NASD_DECLARE_MUTEX(nasd_nl_reg_scoreboard_lock)
#define LOCK_SCOREBOARD()   NASD_LOCK_MUTEX(nasd_nl_reg_scoreboard_lock)
#define UNLOCK_SCOREBOARD() NASD_UNLOCK_MUTEX(nasd_nl_reg_scoreboard_lock)

#define RI_DEQ(_ri_) { \
  (_ri_)->t_prev->t_next = (_ri_)->t_next; \
  (_ri_)->t_next->t_prev = (_ri_)->t_prev; \
  (_ri_)->t_next = NULL; \
  (_ri_)->t_prev = NULL; \
}

#define RI_ENQ(_ri_) { \
  (_ri_)->t_prev = &nasd_nl_reg_current_queue; \
  (_ri_)->t_next = nasd_nl_reg_current_queue.t_next; \
  (_ri_)->t_prev->t_next = (_ri_); \
  (_ri_)->t_next->t_prev = (_ri_); \
}

#define CH_CHECK(_ri_,_h_,_pass_) { \
  nasd_nl_reg_chash_ent_t *_ch; \
  if ((_h_) == 0) { \
    if ((_ri_)->chash[_h_]) { \
      for(_ch=(_ri_)->chash[_h_];_ch;_ch=_ch->c_next) { \
        if (_ch->c_next) { \
          if (_ch->c_next->c_prev != _ch) { \
            nasd_printf("dump at %d   h %d  ri 0x%lx  pass %d A\n", __LINE__, _h_, _ri_, _pass_); \
            for(_ch=(_ri_)->chash[_h_];_ch;_ch=_ch->c_next) { \
              nasd_printf("  0x%lx    [ 0x%lx  0x%lx  0x%lx ]   %u\n", _ch, _ch->c_prev, _ch, _ch->c_next, _ch->baseid); \
            } \
            NASD_PANIC(); \
          } \
        } \
        if (_ch->c_prev) { \
          if (_ch->c_prev->c_next != _ch) { \
            nasd_printf("dump at %d   h %d  ri 0x%lx  pass %d B\n", __LINE__, _h_, _ri_, _pass_); \
            for(_ch=(_ri_)->chash[_h_];_ch;_ch=_ch->c_next) { \
              nasd_printf("  0x%lx    [ 0x%lx  0x%lx  0x%lx ]   %u\n", _ch, _ch->c_prev, _ch, _ch->c_next, _ch->baseid); \
            } \
            NASD_PANIC(); \
          } \
        } \
      } \
      for(_ch=(_ri_)->chash[_h_]->c_next;_ch;_ch=_ch->c_next) { \
        if (_ch->c_prev == NULL) { \
          nasd_printf("dump at %d   h %d  ri 0x%lx  pass %d\n", __LINE__, _h_, _ri_, _pass_); \
          for(_ch=(_ri_)->chash[_h_];_ch;_ch=_ch->c_next) { \
            nasd_printf("  0x%lx    [ 0x%lx  0x%lx  0x%lx ]   %u\n", _ch, _ch->c_prev, _ch, _ch->c_next, _ch->baseid); \
          } \
          NASD_PANIC(); \
        } \
      } \
    } \
  } \
}

#define CH_DEQ(_ri_,_ch_,_h_) { \
  nasd_nl_reg_chash_ent_t *_prev; \
  _prev = (_ch_)->c_prev; \
  if ((_ch_)->c_prev) { \
    (_ch_)->c_prev->c_next = (_ch_)->c_next; \
    (_ch_)->c_prev = NULL; \
  } \
  else { \
    NASD_ASSERT((_ri_)->chash[_h_] == (_ch_)); \
    (_ri_)->chash[_h_] = (_ch_)->c_next; \
  } \
  if ((_ch_)->c_next) { \
    (_ch_)->c_next->c_prev = _prev; \
    (_ch_)->c_next = NULL; \
  } \
}

#define CH_ENQ(_ri_,_ch_,_h_) { \
  NASD_ASSERT((_ch_)->c_next == NULL); \
  NASD_ASSERT((_ch_)->c_prev == NULL); \
  (_ch_)->c_next = (_ri_)->chash[_h_]; \
  (_ri_)->chash[_h_] = (_ch_); \
  if ((_ch_)->c_next) \
    (_ch_)->c_next->c_prev = (_ch_); \
  (_ch_)->c_prev = NULL; \
}

#define CH_FREE(_ch_) { \
  NASD_FREELIST_FREE(nasd_nl_reg_chash_freelist,_ch_,c_next); \
}

#define CH_GET(_ch_) { \
  NASD_FREELIST_GET(nasd_nl_reg_chash_freelist,_ch_,c_next, \
    (nasd_nl_reg_chash_ent_t *)); \
  if (_ch_) { \
    (_ch_)->c_next = NULL; \
    (_ch_)->c_prev = NULL; \
  } \
}

#if NASD_NL_REG_SCOREBOARD_TIMERS > 0
typedef struct nasd_nl_reg_scoreboard_timers_s nasd_nl_reg_scoreboard_timers_t;

struct nasd_nl_reg_scoreboard_timers_s {
  nasd_timespec_t  access_time;
  nasd_timespec_t  timeout_time;
  nasd_uint64      access_calls;
  nasd_uint64      access_regs;
};

nasd_nl_reg_scoreboard_timers_t nasd_nl_reg_scoreboard_timers;
#endif /* NASD_NL_REG_SCOREBOARD_TIMERS > 0 */

#endif /* NASD_NL_REG_SCOREBOARD > 0 */

#define NASD_NL_REG_VALIDATE_NONBUSY_NOLOCK(_reg_ent_) { \
  nasd_odc_wait_not_busy_invalid(_reg_ent_); \
}

#define NASD_NL_REG_VALIDATE_NONBUSY(_reg_ent_) { \
  if ((_reg_ent_)->data_flags&(NASD_CD_BUSY|NASD_CD_INVALID)) { \
    NASD_ODC_LOCK_BLOCK(_reg_ent_); \
    nasd_odc_wait_not_busy_invalid(_reg_ent_); \
    NASD_ODC_UNLOCK_BLOCK(_reg_ent_); \
  } \
}

/*
 * LAYOUT
 *
 * In the data region, there is a region of region map entries,
 * followed by enough data blocks that the region of map entries
 * fully describes them, then another region of map entries, then
 * more data regions, etc. A region of map blocks followed by some
 * number of blocks is a "clump."
 */

/*
 * Block number of first block in first clump
 */
nasd_blkno_t nasd_nl_reg_first_clumpblk;

/*
 * Number of non-map regions on the disk.
 */
nasd_blkcnt_t nasd_nl_reg_cnt;

/*
 * Number of non-map regions in a clump
 */
nasd_blkcnt_t nasd_nl_reg_clump_regs;

/*
 * Number of clumps on the disk.
 */
nasd_blkcnt_t nasd_nl_reg_clump_cnt;

/*
 * Number of region map entries in a single data block
 */
nasd_blkcnt_t nasd_nl_reg_ents_per_blk;

/*
 * Number of blocks in a clump
 */
nasd_blkcnt_t nasd_nl_reg_clump_blocks;

/*
 * Number of map entry blocks on disk
 */
nasd_blkcnt_t nasd_nl_reg_map_blks;

/*
 * ID of last region and number of blocks in said region,
 * if last region is partially covered
 */
nasd_nl_reg_id_t nasd_nl_reg_last_reg;
int nasd_nl_reg_last_reg_size;

/*
 * When in "whole region" mode, what fraction of a region must we grab for
 * our "last one" to break into one more region?
 */
int nasd_nl_reg_wholereg_onemore_frac = 3;

/*
 * How big to make hash regions in unused region list
 */
int nasd_nl_reg_unused_region_hashchunk = 1000;

nasd_odc_ent_t  **nasd_nl_reg_mapent_ptrs = NULL; /* ptrs to map ents */

int nasd_nl_reg_formatted = 0;

void nasd_nl_reg_region_to_slot(nasd_nl_reg_id_t regid,
  nasd_blkno_t *map_blkno, int *map_slot);
void nasd_nl_reg_blkno_to_regid(nasd_blkno_t blkno,
  nasd_nl_reg_id_t *regid);
void nasd_nl_reg_range(nasd_nl_reg_id_t regid, nasd_blkno_t *firstp,
  nasd_blkno_t *lastp);
void nasd_nl_reg_shutdown_map_freelist(void *ignored);
nasd_status_t nasd_nl_reg_compute_init(nasd_blkno_t first_real_data_blk);
void nasd_nl_reg_shutdown_layout_blocks(void *ignored);
nasd_status_t nasd_nl_reg_get_regent(nasd_nl_reg_id_t regid,
  nasd_odc_ent_t **reg_ent_p, int *slotp);
nasd_status_t _nasd_nl_reg_release_regent(nasd_odc_ent_t *reg_ent,
  char *file, int line);
nasd_status_t nasd_nl_reg_get_coblks_helper(int partnum,
  nasd_nl_reg_id_t regid, nasd_blkcnt_t nblocks, int node_only,
  nasd_odc_exlist_t *exlist, nasd_nl_reg_id_t *reg_rootp);
nasd_status_t nasd_nl_reg_get_coblks_regions(int partnum,
  nasd_nl_reg_id_t reg_root, nasd_nl_reg_id_t post_regid,
  nasd_blkcnt_t nblocks, int whole_regs_only,
  int node_only, nasd_nl_reg_map_t **bucketp, nasd_odc_exlist_t *exlist,
  nasd_blkno_t *la_blkp);
nasd_status_t nasd_nl_reg_get_root(nasd_nl_reg_id_t regid,
  nasd_nl_reg_id_t *reg_rootp);
nasd_status_t nasd_nl_reg_get_coblks(int partnum, nasd_blkno_t co_blk,
  nasd_blkcnt_t nblocks, int node_only, nasd_odc_exlist_ent_t **exle_p,
  nasd_blkcnt_t *blocks_allocated_p);
nasd_status_t nasd_nl_reg_reparent_reg(nasd_nl_reg_id_t regid);
nasd_status_t nasd_nl_reg_mark_extent_regions(nasd_odc_exlist_ent_t *exle,
  int mark_what, int mark_dir);
void nasd_nl_reg_dump_re(nasd_nl_reg_region_t *re);

#define nasd_nl_reg_release_regent(_ent_) \
  _nasd_nl_reg_release_regent(_ent_, __FILE__, __LINE__); \

#if NASD_NL_REG_TIME_COBLKS_HELPER > 0
nasd_timespec_t  nasd_nl_reg_get_coblks_helper_time;
nasd_timespec_t  nasd_nl_reg_get_coblks_helper_get_regent_time;
nasd_timespec_t  nasd_nl_reg_get_coblks_helper_o1_time;
nasd_timespec_t  nasd_nl_reg_get_coblks_helper_o2_time;
nasd_timespec_t  nasd_nl_reg_get_coblks_helper_getblk_time;
nasd_timespec_t  nasd_nl_reg_get_coblks_helper_bail1_time;
nasd_timespec_t  nasd_nl_reg_get_coblks_helper_relblk_time;
nasd_timespec_t  nasd_nl_reg_get_coblks_helper_dance_time;
void nasd_nl_reg_print_coblks_helper_time(void *ignored);
#endif /* NASD_NL_REG_TIME_COBLKS_HELPER > 0 */


/*
 * Given a region id, give back the block number and
 * slot number of its map entry.
 */
void
nasd_nl_reg_region_to_slot(
  nasd_nl_reg_id_t   regid,
  nasd_blkno_t      *map_blkno,
  int               *map_slot)
{
  nasd_blkno_t clump_id, real_reg, reg_in_clump, blk_in_clump, clump_base;

  real_reg = regid - 1;
  clump_id = real_reg / nasd_nl_reg_clump_regs;
  reg_in_clump = real_reg % nasd_nl_reg_clump_regs;
  blk_in_clump = reg_in_clump / nasd_nl_reg_ents_per_blk;

  *map_slot = reg_in_clump % nasd_nl_reg_ents_per_blk;

  clump_base = clump_id * nasd_nl_reg_clump_blocks
    + nasd_nl_reg_first_clumpblk;

  *map_blkno = clump_base + blk_in_clump;
}

/*
 * The obvious thing to do here is blkno/nasd_od_region_blocks.
 * This is not correct because it neglects the fact that the
 * first region of each clump isn't assigned an id, and that
 * we start numbering regions at 1. So, we map our block number
 * into clump space, then determine which region in the clump
 * it is, dodge a couple off-by-one errors, and produce a result.
 */
void
nasd_nl_reg_blkno_to_regid(
  nasd_blkno_t       in_blkno,
  nasd_nl_reg_id_t  *regid)
{
  nasd_blkno_t blkno, clump, blk_in_clump, reg_in_clump;

  blkno = in_blkno - nasd_nl_reg_first_clumpblk;
  clump = blkno / nasd_nl_reg_clump_blocks;
  blk_in_clump = blkno % nasd_nl_reg_clump_blocks;
  reg_in_clump = blk_in_clump / nasd_od_region_blocks;
  if (reg_in_clump == 0) {
    nasd_printf("DRIVE WARNING: reg_in_clump 0 in "
      "nasd_nl_reg_blkno_to_regid() (blkno=%u)\n",
      blkno);
    *regid = 0;
    return;
  }
  *regid = (clump * nasd_nl_reg_clump_regs) + reg_in_clump;
  /* regid is unsigned, cannot be negative */
  NASD_ASSERT(*regid < nasd_nl_reg_cnt);
}

/*
 * Given a region id, return the first and last blocks
 * in that region.
 */
void
nasd_nl_reg_range(
  nasd_nl_reg_id_t   regid,
  nasd_blkno_t      *firstp,
  nasd_blkno_t      *lastp)
{
  nasd_blkno_t clump, reg_in_clump, first, last;
  nasd_nl_reg_id_t real_reg;

  real_reg = regid - 1;
  clump = real_reg / nasd_nl_reg_clump_regs;
  reg_in_clump = real_reg % nasd_nl_reg_clump_regs;
  first = (clump * nasd_nl_reg_clump_blocks)
    + ((reg_in_clump+1) * nasd_od_region_blocks);
  first += nasd_nl_reg_first_clumpblk;
  last = first + nasd_od_region_blocks - 1;
  *firstp = first;
  *lastp = last;
}

/*
 * Clean up region map hash table. First, move
 * all hash table entries to freelist. Next,
 * destroy the freelist and its contents.
 */
void
nasd_nl_reg_shutdown_map_freelist(
  void  *ignored)
{
  nasd_nl_reg_map_t *me;
  int i;

  nasd_printf("%" NASD_64u_FMT " unrelated adds\n",
    nasd_nl_reg_added_unrelated);

  for(i=0;i<NASD_NL_REG_MAP_NBUCKETS;i++) {
    while(nasd_nl_reg_map_buckets[i]) {
      me = nasd_nl_reg_map_buckets[i];
      nasd_nl_reg_map_buckets[i] = me->next;
      NASD_FREELIST_FREE(nasd_nl_reg_map_freelist,me,next);
    }
  }
  NASD_FREELIST_DESTROY(nasd_nl_reg_map_freelist,next,
    (nasd_nl_reg_map_t *));
}

/*
 * Compute basic parameters of regioning.
 * Used by format and dynamic init.
 */
nasd_status_t
nasd_nl_reg_compute_init(
  nasd_blkno_t       first_real_data_blk)
{
  nasd_blkcnt_t nb, clump_sz, covered, uncovered, extra;
  nasd_blkno_t fb, lb, eb;
#if NASD_NL_REG_SCOREBOARD > 0
  nasd_status_t rc;
#endif /* NASD_NL_REG_SCOREBOARD > 0 */

  nasd_nl_reg_added_unrelated = 0;

  fb = first_real_data_blk;
  lb = nasd_od_blocks;
  nb = lb - fb + 1;

#if NASD_NL_REG_INCLUDE_NPT > 0
  /*
   * Don't use first_real_data_block here, because we
   * want to include NPT blocks.
   */
  nasd_nl_reg_first_clumpblk = 1;
#else /* NASD_NL_REG_INCLUDE_NPT > 0 */
  nasd_nl_reg_first_clumpblk = fb;
#endif /* NASD_NL_REG_INCLUDE_NPT > 0 */

  nasd_nl_reg_ents_per_blk = NASD_OD_BASIC_BLOCKSIZE
    / sizeof(nasd_nl_reg_region_t);
  nasd_nl_reg_clump_regs = nasd_od_region_blocks * nasd_nl_reg_ents_per_blk;

  clump_sz = nasd_nl_reg_clump_regs + 1;

  nasd_nl_reg_clump_blocks = clump_sz * nasd_od_region_blocks;

  nasd_nl_reg_clump_cnt = (nasd_od_blocks - nasd_nl_reg_first_clumpblk)
    / nasd_nl_reg_clump_blocks;

  covered = nasd_nl_reg_first_clumpblk +
    nasd_nl_reg_clump_cnt * nasd_nl_reg_clump_blocks;
  uncovered = nasd_od_blocks - covered;

  nasd_nl_reg_cnt = nasd_nl_reg_clump_cnt * nasd_nl_reg_clump_regs;

  nasd_nl_reg_last_reg = 0;
  nasd_nl_reg_last_reg_size = 0;

  if (uncovered > nasd_od_region_blocks) {
    /*
     * We have a partial clump at the end of the disk.
     */
    nasd_nl_reg_clump_cnt++;
    /*
     * (uncovered-1) is really
     * (uncovered - nasd_od_region_blocks + (nasd_od_region_blocks - 1))
     * to subtract out the first nasd_od_region_blocks to be the
     * chunk of map entries for the clump (somewhat suboptimal, but
     * makes life much easier elsewhere), then rounds up the number
     * of regions remaining. extra gets assigned the number of
     * additional non-map regions in this new clump (including
     * the last one, which may be fractional).
     */
    extra = (uncovered-1) / nasd_od_region_blocks;
    nasd_nl_reg_cnt += extra;
    eb = covered + (extra * nasd_od_region_blocks);
    if (eb > nasd_od_blocks) {
      nasd_nl_reg_last_reg = nasd_nl_reg_cnt;
      nasd_nl_reg_last_reg_size = nasd_od_region_blocks
        - (eb - nasd_od_blocks);
      NASD_ASSERT(nasd_nl_reg_last_reg_size > 0);
    }
  }
  else {
    nasd_od_blocks -= uncovered;
  }

  nasd_nl_reg_map_blks = nasd_nl_reg_clump_cnt * nasd_od_region_blocks;

#if NASD_NL_REG_SCOREBOARD > 0
  nasd_nl_reg_info_chunksize =
    nasd_nl_reg_cnt / (NASD_NL_REG_INFOS_CHUNKS - 1);
  nasd_nl_reg_info_last_chunksize =
    nasd_nl_reg_cnt % (NASD_NL_REG_INFOS_CHUNKS - 1);

  rc = nasd_nl_reg_scoreboard_init();
  if (rc)
    return(rc);
#endif /* NASD_NL_REG_SCOREBOARD > 0 */

#if NASD_NL_REG_TIME_COBLKS_HELPER > 0
  NASD_TIMESPEC_ZERO(nasd_nl_reg_get_coblks_helper_time);
#ifdef TMT
#undef TMT
#endif
#define TMT(_tm_) NASD_TIMESPEC_ZERO(_tm_)
  TMT(nasd_nl_reg_get_coblks_helper_get_regent_time);
  TMT(nasd_nl_reg_get_coblks_helper_o1_time);
  TMT(nasd_nl_reg_get_coblks_helper_o2_time);
  TMT(nasd_nl_reg_get_coblks_helper_getblk_time);
  TMT(nasd_nl_reg_get_coblks_helper_bail1_time);
  TMT(nasd_nl_reg_get_coblks_helper_relblk_time);
  TMT(nasd_nl_reg_get_coblks_helper_dance_time);
#endif /* NASD_NL_REG_TIME_COBLKS_HELPER > 0 */

  return(NASD_SUCCESS);
}

/*
 * General init. Set up some basic memory structures.
 */
nasd_status_t
nasd_nl_reg_init(
  nasd_od_config_t *config)
{
  nasd_status_t rc;
  int i;

  NASD_ASSERT(nasd_od_region_blocks <= 128);

  nasd_odc_read_regions = 1;

  nasd_nl_reg_mapent_ptrs = NULL;

  nasd_nl_reg_formatted = 0;

  for(i=0;i<NASD_NL_REG_MAP_NBUCKETS;i++)
    nasd_nl_reg_map_buckets[i] = NULL;

  rc = nasd_mutex_init(&nasd_nl_reg_map_lock);
  if (rc)
    return(rc);
  rc = nasd_shutdown_mutex(nasd_odc_shutdown, &nasd_nl_reg_map_lock);
  if (rc) {
    return(rc);
  }

  rc = nasd_odc_init_exlist(&nasd_nl_reg_unused_regions);
  if (rc)
    return(rc);

  rc = nasd_shutdown_proc(nasd_odc_shutdown, nasd_odc_shutdown_exlist,
    &nasd_nl_reg_unused_regions);
  if (rc) {
    nasd_odc_shutdown_exlist(&nasd_nl_reg_unused_regions);
    return(rc);
  }

  NASD_FREELIST_CREATE(nasd_nl_reg_map_freelist, NASD_NL_REG_MAX_FREE_MAP,
    NASD_NL_REG_MAP_INC, sizeof(nasd_nl_reg_map_t));
  if (nasd_nl_reg_map_freelist == NULL)
    return(NASD_NO_MEM);
  NASD_FREELIST_PRIME(nasd_nl_reg_map_freelist, NASD_NL_REG_MAP_INITIAL,
    next, (nasd_nl_reg_map_t *));

  rc = nasd_shutdown_proc(nasd_odc_shutdown, nasd_nl_reg_shutdown_map_freelist,
    NULL);
  if (rc) {
    nasd_nl_reg_shutdown_map_freelist(NULL);
    return(rc);
  }

#if NASD_NL_REG_TIME_COBLKS_HELPER > 0
  rc = nasd_shutdown_proc(nasd_odc_shutdown,
    nasd_nl_reg_print_coblks_helper_time, NULL);
  if (rc) {
    /* no need to do it now */
    return(rc);
  }
#endif /* NASD_NL_REG_TIME_COBLKS_HELPER > 0 */

  return(NASD_SUCCESS);
}

/*
 * Do work necessary for format.
 * Init the region map (ondisk).
 */
nasd_status_t
nasd_nl_reg_format(
  nasd_od_config_t  *config,
  nasd_blkno_t       first_real_data_blk)
{
  nasd_blkno_t clumpno, regbase, regblk, regtop;
  nasd_odc_exlist_ent_t *exle;
  nasd_odc_ent_t *ent;
  nasd_status_t rc;

  rc = nasd_nl_reg_compute_init(first_real_data_blk);
  if (rc)
    return(rc);

  /*
   * Iterate through all region map entry blocks,
   * initializing them. Also, mark them in-use
   * via the refcount engine.
   */
  rc = nasd_odc_get_extent_list(&exle);
  if (rc)
    return(rc);
  for(clumpno=0;clumpno<nasd_nl_reg_clump_cnt;clumpno++) {
    /*
     * regbase = first block of region map entries
     * regblk  = blockno of region map block we're looking at
     * clumpno = clump number we're looking at
     */
    regbase = (clumpno * nasd_nl_reg_clump_blocks)
      + nasd_nl_reg_first_clumpblk;
    regtop = regbase + nasd_od_region_blocks;
    for(regblk=regbase;regblk<regtop;regblk++) {
      rc = nasd_odc_block_get(NULL, regblk,
        NASD_ODC_L_FORCE, &ent,
        NASD_ID_NULL, 0, NASD_ODC_T_LAYOUT, NULL);
      if (rc != NASD_SUCCESS) {
        nasd_printf("DRIVE ERROR: rc=0x%x (%s) getting layout block %u\n",
          rc, nasd_error_string(rc), regblk);
        NASD_PANIC();
      }
      NASD_ODC_WLOCK_BLOCK_DATA(ent);
      NASD_ODC_LOCK_BLOCK(ent);
      nasd_odc_wait_not_busy(ent);
      nasd_odc_dirty_ent(ent);
      bzero((char *)ent->data.buf, NASD_OD_BASIC_BLOCKSIZE);
      ent->data_flags &= ~(NASD_CD_NZ|NASD_CD_INVALID);
      NASD_ODC_UNLOCK_BLOCK(ent);
      NASD_ODC_WUNLOCK_BLOCK_DATA(ent);
      nasd_odc_block_release(ent);
    }
    exle->range.first = regbase;
    exle->range.last = regbase + nasd_od_region_blocks - 1;
    rc = nasd_odc_ref_ranges(-1, exle, 1, NULL, NASD_ODC_REF_NOFLAGS);
    if (rc) {
      nasd_odc_release_extent_list(exle);
      return(rc);
    }
  }

  nasd_printf("DRIVE: formatted with %d layout regions\n", nasd_nl_reg_cnt);
  nasd_odc_release_extent_list(exle);

  nasd_nl_reg_formatted = 1;

  return(NASD_SUCCESS);
}

void
nasd_nl_reg_shutdown_layout_blocks(
  void  *ignored)
{
  nasd_nl_reg_region_t *regents, *re;
  nasd_nl_reg_id_t regid, fr;
  nasd_blkno_t b, clumpno;
  nasd_odc_ent_t *ent;
  nasd_status_t rc;
  int i, j;

  regid = 0;
  regents = NULL;
  for(i=0;i<nasd_nl_reg_map_blks;i++) {
    clumpno = i / nasd_od_region_blocks;
    b = (clumpno * nasd_nl_reg_clump_blocks)
      + nasd_nl_reg_first_clumpblk + (i % nasd_od_region_blocks);
    rc = nasd_odc_block_get(NULL, b,
        NASD_ODC_L_FORCE|NASD_ODC_L_BLOCK|NASD_ODC_L_LOAD,
        &ent, NASD_ID_NULL, 0, NASD_ODC_T_LAYOUT_STATIC, NULL);
    if (rc != NASD_SUCCESS)
      NASD_PANIC();
    fr = regid + 1;
    NASD_ODC_WLOCK_BLOCK_DATA(ent);
    NASD_NL_REG_VALIDATE_NONBUSY(ent);
    regents = (nasd_nl_reg_region_t *)ent->data.buf;
    for(j=0;j<nasd_nl_reg_ents_per_blk;j++) {
      regid++; /* first one is 1 */
      re = &regents[j];
    }
    NASD_ODC_WUNLOCK_BLOCK_DATA(ent);
    nasd_odc_block_release(ent);
  }

  if (nasd_nl_reg_mapent_ptrs) {
    for(i=0;i<nasd_nl_reg_map_blks;i++) {
      if (nasd_nl_reg_mapent_ptrs[i]) {
        nasd_odc_free_ent(nasd_nl_reg_mapent_ptrs[i]);
        nasd_nl_reg_mapent_ptrs[i] = NULL;
      }
    }
  }
}

/*
 * Init any diskstate-dependent state.
 * Here, we load in the region map and parse it
 * into our funky hash table.
 */
nasd_status_t
nasd_nl_reg_init_dynamic(
  nasd_blkno_t  first_real_data_blk)
{
  nasd_blkno_t b, clumpno, regbase, regtop, regblk;
  nasd_blkcnt_t reg_cnt, root_reg_cnt;
  nasd_nl_reg_region_t *regents, *re;
  nasd_nl_reg_map_t **bucketp, *me;
  nasd_nl_reg_id_t regid, fr;
  nasd_odc_ent_t *ent;
  int sz, i, j, k, h;
  nasd_status_t rc;

  if (nasd_nl_reg_formatted) {
    /*
     * Get rid of the layout blocks we computed earlier.
     * We'll replace them with static layout blocks below.
     */
    for(clumpno=0;clumpno<nasd_nl_reg_clump_cnt;clumpno++) {
      regbase = (clumpno * nasd_nl_reg_clump_blocks)
        + nasd_nl_reg_first_clumpblk;
      regtop = regbase + nasd_od_region_blocks;
      for(regblk=regbase;regblk<regtop;regblk++) {
        nasd_odc_block_eject_by_num(regblk);
      }
    }
  }
  else {
    rc = nasd_nl_reg_compute_init(first_real_data_blk);
    if (rc)
      return(rc);
  }

  /*
   * Establish hashing for the unused region map to speed
   * things along.
   */
  rc = nasd_odc_exlist_hashify(&nasd_nl_reg_unused_regions,
    nasd_nl_reg_cnt, nasd_nl_reg_unused_region_hashchunk);
  if (rc)
    return(rc);

  sz = nasd_nl_reg_map_blks * sizeof(nasd_odc_ent_t *);
  NASD_Malloc(nasd_nl_reg_mapent_ptrs, sz, (nasd_odc_ent_t **));
  if (nasd_nl_reg_mapent_ptrs == NULL)
    return(NASD_NO_MEM);
  bzero((char *)nasd_nl_reg_mapent_ptrs, sz);
  rc = nasd_shutdown_mem(nasd_odc_shutdown, nasd_nl_reg_mapent_ptrs, sz);
  if (rc) {
    NASD_Free(nasd_nl_reg_mapent_ptrs, sz);
    nasd_nl_reg_mapent_ptrs = NULL;
    return(rc);
  }

  nasd_printf("DRIVE: %d static layout blocks\n", nasd_nl_reg_map_blks);
  for(i=0;i<nasd_nl_reg_map_blks;i++) {
    clumpno = i / nasd_od_region_blocks;
    b = (clumpno * nasd_nl_reg_clump_blocks)
      + nasd_nl_reg_first_clumpblk + (i % nasd_od_region_blocks);

    rc = nasd_odc_alloc_ent(&ent);
    if (rc) {
      nasd_nl_reg_shutdown_layout_blocks(NULL);
      return(rc);
    }
    nasd_nl_reg_mapent_ptrs[i] = ent;

    rc = nasd_odc_init_ent(ent, NASD_ODC_T_LAYOUT_STATIC, 1);
    if (rc) {
      nasd_nl_reg_shutdown_layout_blocks(NULL);
      return(rc);
    }

    ent->blkno = b;
    nasd_odc_preinsert(ent);
  }

  rc = nasd_shutdown_proc(nasd_odc_shutdown,
    nasd_nl_reg_shutdown_layout_blocks, NULL);
  if (rc) {
    nasd_nl_reg_shutdown_layout_blocks(NULL);
    return(rc);
  }

  /*
   * Now the cache contains our magic static blocks.
   * Re-look them up (and validate them in the process),
   * and construct our reverse-lookup hash table as well.
   */
  regid = 0;
  reg_cnt = 0;
  root_reg_cnt = 0;
  for(i=0;i<nasd_nl_reg_map_blks;i++) {
    clumpno = i / nasd_od_region_blocks;
    b = (clumpno * nasd_nl_reg_clump_blocks)
      + nasd_nl_reg_first_clumpblk + (i % nasd_od_region_blocks);
    rc = nasd_odc_block_get(NULL, b,
        NASD_ODC_L_FORCE|NASD_ODC_L_BLOCK|NASD_ODC_L_LOAD,
        &ent, NASD_ID_NULL, 0, NASD_ODC_T_LAYOUT_STATIC, NULL);
    if (rc != NASD_SUCCESS)
      NASD_PANIC();
    fr = regid + 1;
    NASD_ODC_WLOCK_BLOCK_DATA(ent);
    NASD_NL_REG_VALIDATE_NONBUSY(ent);
    regents = (nasd_nl_reg_region_t *)ent->data.buf;
    for(j=0;j<nasd_nl_reg_ents_per_blk;j++) {
      regid++; /* first one is 1 */
      if (regid > nasd_nl_reg_cnt)
        break;
      re = &regents[j];
      if (re->reg_id) {
        if (nasd_nl_reg_formatted != 0) {
          nasd_printf("DRIVE ERROR: found region on newly-formatted disk\n");
          nasd_printf("i=%d b=%u ent=0x%lx regents=0x%lx re=0x%lx "
            "regid=%u j=%d\n",
            i, b, (unsigned long)ent, (unsigned long)regents,
            (unsigned long)re, regid, j);
        }
        NASD_ASSERT(nasd_nl_reg_formatted == 0);
        /*
         * Valid region in use. Keep track of it.
         */
        reg_cnt++;
        if (re->reg_id == regid) {
          /*
           * This region is the "base" region.
           * We don't need to do any special work
           * here.
           */
          root_reg_cnt++;
        }
        else {
          /*
           * This region is the expansion of some other
           * base region. Add a mapping that indicates
           * this fact.
           */
          LOCK_REG_MAP();
          h = NASD_NL_REG_MAP_HASH(re->reg_id);
          bucketp = &nasd_nl_reg_map_buckets[h];
          for(me=*bucketp;me;me=me->next) {
            if (me->base == re->reg_id) {
              for(k=0;k<NASD_NL_REG_MAP_BLOB;k++) {
                if (me->ent_ids[k] == 0) {
                  me->ent_ids[k] = regid;
                  break;
                }
              }
            }
          }
          if (me == NULL) {
            /*
             * We were not able to append to an existing entry,
             * so add a new entry.
             */
            NASD_FREELIST_GET(nasd_nl_reg_map_freelist,me,next,
              (nasd_nl_reg_map_t *));
            if (me == NULL) {
              UNLOCK_REG_MAP();
              nasd_odc_block_release(ent);
              return(NASD_NO_MEM);
            }
            me->base = re->reg_id;
            me->ent_ids[0] = regid;
            for(k=1;k<NASD_NL_REG_MAP_BLOB;k++)
              me->ent_ids[k] = 0;
            me->next = *bucketp;
            *bucketp = me;
          }
          UNLOCK_REG_MAP();
        }
      }
      else {
        /*
         * Unused region. Track it.
         */
        rc = nasd_odc_exlist_release_oneblock(&nasd_nl_reg_unused_regions,
          regid);
        if (rc)
          return(rc);
      }
    }
    NASD_ODC_WUNLOCK_BLOCK_DATA(ent);
    nasd_odc_block_release(ent);
  }

  nasd_printf("DRIVE: %u active regions, %u active root regions\n",
    reg_cnt, root_reg_cnt);
#if 0
  nasd_printf("UNUSED REGIONS\n");
  nasd_odc_exlist_dump(&nasd_nl_reg_unused_regions);
#endif

  return(NASD_SUCCESS);
}

/*
 * Caller holds partition write lock.
 *
 * Convert layout hint to region id. In order, attempt:
 *  1. colocate object in that region
 *  2. locate in a related region
 *  3. allocate a new region
 *  4. just put it anywhere
 *
 * If no valid hint is given, start with #3.
 */
nasd_status_t
nasd_nl_reg_get_node_block(
  int                      partnum,
  nasd_blkcnt_t            prealloc_blocks,
  nasd_layout_hint_t      *layout_hint,
  nasd_odc_exlist_ent_t  **exle_p,
  nasd_odc_exlist_ent_t  **pre_exle_p)
{
  nasd_blkno_t co_lvl2_hint, co_blk, re_bid, re_first, re_last;
  nasd_nl_reg_region_t *re, *regents;
  nasd_blkcnt_t blocks_got, rel_cnt;
  nasd_odc_exlist_ent_t *exle;
  nasd_nodenum_t co_nodenum;
  nasd_generation_t co_gen;
  nasd_identifier_t co_id;
  nasd_odc_ent_t *reg_ent;
  nasd_nl_reg_id_t regid;
  nasd_status_t rc, rc2;
  int co_pn, reg_slot;

  co_nodenum = 0;
  co_blk = 0;
  *exle_p = NULL;
  *pre_exle_p = NULL;

  if (layout_hint) {
    co_id = layout_hint->lh_nid;
    /* get location of the hinted object on disk */
    rc = nasd_od_decompose_id(co_id, &co_pn, &co_nodenum, &co_lvl2_hint,
      &co_gen);
    if (rc)
      co_nodenum = 0;
  }

  if (co_nodenum) {
    /*
     * Note that the dance we do here allows us to colocate with an
     * object in another partition.
     */
    rc = nasd_odc_nodenum_to_blknum(co_pn, co_nodenum, co_lvl2_hint, &co_blk);
    if (rc)
      co_blk = 0;
  }

  if (co_blk) {
    rc = nasd_nl_reg_get_coblks(partnum, co_blk, 1, 1, exle_p, &blocks_got);
    return(rc);
  }

  /*
   * We want to put this in a new region, unrelated to
   * any old ones. Find such a region.
   */
  rc = nasd_odc_exlist_get_oneblock(&nasd_nl_reg_unused_regions, 1, &re_bid);
  if (rc) {
    if (rc != NASD_NO_SPACE) {
      return(rc);
    }
    /*
     * No unused regions remain. We must add ourselves
     * to an existing region as unrelated blocks.
     */
    nasd_nl_reg_added_unrelated++;
nasd_printf("unrelated add\n");

    rc = nasd_odc_free_get_range(1, co_blk, &exle, &blocks_got);
    if (rc) {
      return(rc);
    }
    NASD_ASSERT(blocks_got == 1);

    nasd_nl_reg_blkno_to_regid(exle->range.first, &regid);

    rc = nasd_nl_reg_get_regent(regid, &reg_ent, &reg_slot);
    if (rc) {
      rc2 = nasd_odc_free_release_blocks(exle, &rel_cnt);
      if (rc2 != NASD_SUCCESS) {
        NASD_PANIC();
      }
      NASD_ASSERT(rel_cnt == blocks_got);
      return(rc);
    }

    NASD_ODC_WLOCK_BLOCK_DATA(reg_ent);

    NASD_ODC_LOCK_BLOCK(reg_ent);
    NASD_NL_REG_VALIDATE_NONBUSY_NOLOCK(reg_ent);
    nasd_odc_dirty_ent(reg_ent);
    NASD_ODC_UNLOCK_BLOCK(reg_ent);

    regents = (nasd_nl_reg_region_t *)reg_ent->data.buf;
    re = &regents[reg_slot];
    NASD_ASSERT(re->reg_id != 0);
    re->reg_unrelated++;
    re->reg_freeblocks--;

    NASD_ODC_WUNLOCK_BLOCK_DATA(reg_ent);

    rc = nasd_nl_reg_release_regent(reg_ent);
    if (rc) {
      NASD_PANIC();
    }

    *exle_p = exle;

    return(NASD_SUCCESS);
  }

  /*
   * re_bid is a new region that we'll break in
   * (unrelated to any old regions)
   * Put our new block there.
   */
  regid = re_bid;
  nasd_nl_reg_range(regid, &re_first, &re_last);

  rc = nasd_odc_free_get_range_bounded_partial(1, re_first, re_last,
    &exle, &blocks_got);
  if (rc) {
    return(rc);
  }
  NASD_ASSERT(blocks_got == 1);

  rc = nasd_nl_reg_get_regent(regid, &reg_ent, &reg_slot);
  if (rc) {
    rc2 = nasd_odc_free_release_blocks(exle, &rel_cnt);
    if (rc2 != NASD_SUCCESS) {
      NASD_PANIC();
    }
    NASD_ASSERT(rel_cnt == blocks_got);
    return(rc);
  }

  NASD_ODC_WLOCK_BLOCK_DATA(reg_ent);

  NASD_ODC_LOCK_BLOCK(reg_ent);
  NASD_NL_REG_VALIDATE_NONBUSY_NOLOCK(reg_ent);
  nasd_odc_dirty_ent(reg_ent);
  NASD_ODC_UNLOCK_BLOCK(reg_ent);

  regents = (nasd_nl_reg_region_t *)reg_ent->data.buf;
  re = &regents[reg_slot];
  NASD_ASSERT(re->reg_id == 0);
  re->reg_id = regid;
  if (regid == nasd_nl_reg_last_reg)
    re->reg_freeblocks = nasd_nl_reg_last_reg_size - 1;
  else
    re->reg_freeblocks = nasd_od_region_blocks - 1;
  re->reg_iblks = 1;
  re->reg_dblks = 0;
  re->reg_unrelated = 0;

  NASD_ODC_WUNLOCK_BLOCK_DATA(reg_ent);

  rc = nasd_nl_reg_release_regent(reg_ent);
  if (rc) {
    NASD_PANIC();
  }

  *exle_p = exle;

  return(NASD_SUCCESS);
}

/*
 * Returns a region block and slot for a given region id.
 * Caller responsible for waiting for block to not be
 * busy, invalid, etc.
 */
nasd_status_t
nasd_nl_reg_get_regent(
  nasd_nl_reg_id_t    regid,
  nasd_odc_ent_t    **reg_ent_p,
  int                *slotp)
{
  nasd_odc_ent_t *ent;
  nasd_blkno_t blkno;
  nasd_status_t rc;
  int slot;

  NASD_ASSERT(regid != 0);
  if (regid == 0)
    return(NASD_BAD_BLKNO);

  nasd_nl_reg_region_to_slot(regid, &blkno, &slot);
  *slotp = slot;

  rc = nasd_odc_block_get(NULL, blkno,
    NASD_ODC_L_FORCE|NASD_ODC_L_BLOCK|NASD_ODC_L_LOAD,
    &ent, NASD_ID_NULL, 0, NASD_ODC_T_LAYOUT_STATIC, NULL);
  if (rc)
    return(rc);

  *reg_ent_p = ent;

  return(NASD_SUCCESS);
}

/*
 * Caller holds write data lock on ent,
 * surrenders lock and ent
 */
nasd_status_t
_nasd_nl_reg_release_regent(
  nasd_odc_ent_t  *reg_ent,
  char            *file,
  int              line)
{
  return(nasd_odc_block_release(reg_ent));
}

/*
 * Helper function for nasd_nl_reg_get_coblks().
 * Given a region, see what useful blocks we can
 * grab from it.
 */
NASD_INLINE nasd_status_t
nasd_nl_reg_get_coblks_helper(
  int                 partnum,
  nasd_nl_reg_id_t    regid,
  nasd_blkcnt_t       nblocks,
  int                 node_only,
  nasd_odc_exlist_t  *exlist,
  nasd_nl_reg_id_t   *reg_rootp)
{
  nasd_nl_reg_region_t *re, *regents;
  nasd_blkno_t re_first, re_last;
  nasd_odc_exlist_ent_t *exle;
  nasd_blkcnt_t take, got;
  nasd_odc_ent_t *reg_ent;
  nasd_status_t rc, rc2;
  int reg_slot;
#if NASD_NL_REG_TIME_COBLKS_HELPER > 0
  nasd_timer_t tm, tm2;
  nasd_timespec_t ts;
#endif /* NASD_NL_REG_TIME_COBLKS_HELPER > 0 */

#if NASD_NL_REG_TIME_COBLKS_HELPER > 0
  NASD_TM_START(&tm);
#endif /* NASD_NL_REG_TIME_COBLKS_HELPER > 0 */

#if NASD_NL_REG_TIME_COBLKS_HELPER > 0
  NASD_TM_START(&tm2);
#endif /* NASD_NL_REG_TIME_COBLKS_HELPER > 0 */
  rc = nasd_nl_reg_get_regent(regid, &reg_ent, &reg_slot);
#if NASD_NL_REG_TIME_COBLKS_HELPER > 0
  NASD_TM_STOP(&tm2);
  NASD_TM_ELAPSED_TS(&tm2, &ts);
  NASD_ATOMIC_TIMESPEC_ADD(&nasd_nl_reg_get_coblks_helper_get_regent_time, &ts);
#endif /* NASD_NL_REG_TIME_COBLKS_HELPER > 0 */
  if (rc)
    goto done_get_coblks_helper;

#if NASD_NL_REG_TIME_COBLKS_HELPER > 0
  NASD_TM_START(&tm2);
#endif /* NASD_NL_REG_TIME_COBLKS_HELPER > 0 */
  NASD_ODC_WLOCK_BLOCK_DATA(reg_ent);

  NASD_NL_REG_VALIDATE_NONBUSY(reg_ent);

  regents = (nasd_nl_reg_region_t *)reg_ent->data.buf;
  re = &regents[reg_slot];
  NASD_ASSERT(re->reg_id != 0);
#if NASD_NL_REG_TIME_COBLKS_HELPER > 0
  NASD_TM_STOP(&tm2);
  NASD_TM_ELAPSED_TS(&tm2, &ts);
  NASD_ATOMIC_TIMESPEC_ADD(&nasd_nl_reg_get_coblks_helper_o1_time, &ts);
#endif /* NASD_NL_REG_TIME_COBLKS_HELPER > 0 */
  if (re->reg_freeblocks) {
    /* how many blocks do we need? how many are there? */
    take = NASD_MIN(nblocks-exlist->num, re->reg_freeblocks);
    /* find range region occupies */
    nasd_nl_reg_range(regid, &re_first, &re_last);
#if NASD_NL_REG_TIME_COBLKS_HELPER > 0
    NASD_TM_START(&tm2);
#endif /* NASD_NL_REG_TIME_COBLKS_HELPER > 0 */
    /* get blocks from that range */
    rc = nasd_odc_free_get_range_bounded_partial(take, re_first, re_last,
      &exle, &got);
#if NASD_NL_REG_TIME_COBLKS_HELPER > 0
    NASD_TM_STOP(&tm2);
    NASD_TM_ELAPSED_TS(&tm2, &ts);
    NASD_ATOMIC_TIMESPEC_ADD(&nasd_nl_reg_get_coblks_helper_getblk_time, &ts);
#endif /* NASD_NL_REG_TIME_COBLKS_HELPER > 0 */
    if (rc) {
#if NASD_NL_REG_TIME_COBLKS_HELPER > 0
      NASD_TM_START(&tm2);
#endif /* NASD_NL_REG_TIME_COBLKS_HELPER > 0 */
      NASD_ODC_WUNLOCK_BLOCK_DATA(reg_ent);
      rc2 = nasd_nl_reg_release_regent(reg_ent);
      if (rc2) {
        NASD_PANIC();
      }
#if NASD_NL_REG_TIME_COBLKS_HELPER > 0
      NASD_TM_STOP(&tm2);
      NASD_TM_ELAPSED_TS(&tm2, &ts);
      NASD_ATOMIC_TIMESPEC_ADD(&nasd_nl_reg_get_coblks_helper_bail1_time, &ts);
#endif /* NASD_NL_REG_TIME_COBLKS_HELPER > 0 */
      goto done_get_coblks_helper;
    }
    NASD_ASSERT(got == take);
    /* keep a list of what blocks we have */
#if NASD_NL_REG_TIME_COBLKS_HELPER > 0
    NASD_TM_START(&tm2);
#endif /* NASD_NL_REG_TIME_COBLKS_HELPER > 0 */
    rc = nasd_odc_free_release_blocks_nolock(exlist,
      exle, &got);
#if NASD_NL_REG_TIME_COBLKS_HELPER > 0
    NASD_TM_STOP(&tm2);
    NASD_TM_ELAPSED_TS(&tm2, &ts);
    NASD_ATOMIC_TIMESPEC_ADD(&nasd_nl_reg_get_coblks_helper_relblk_time, &ts);
#endif /* NASD_NL_REG_TIME_COBLKS_HELPER > 0 */
    if (rc) {
      /*
       * Should not be possible for this to fail- we're providing all
       * the necessary memory, and the list is empty, so there's nothing
       * to do but swizzle pointers.
       */
      NASD_PANIC();
    }
#if NASD_NL_REG_TIME_COBLKS_HELPER > 0
    NASD_TM_START(&tm2);
#endif /* NASD_NL_REG_TIME_COBLKS_HELPER > 0 */
    NASD_ODC_LOCK_BLOCK(reg_ent);
    nasd_odc_dirty_ent(reg_ent);
    NASD_ODC_UNLOCK_BLOCK(reg_ent);
    re->reg_freeblocks -= take;
    if (node_only) {
      re->reg_iblks += take;
    }
    else {
      re->reg_dblks += take;
    }
#if NASD_NL_REG_TIME_COBLKS_HELPER > 0
    NASD_TM_STOP(&tm2);
    NASD_TM_ELAPSED_TS(&tm2, &ts);
    NASD_ATOMIC_TIMESPEC_ADD(&nasd_nl_reg_get_coblks_helper_dance_time, &ts);
#endif /* NASD_NL_REG_TIME_COBLKS_HELPER > 0 */
  }
#if NASD_NL_REG_TIME_COBLKS_HELPER > 0
  NASD_TM_START(&tm2);
#endif /* NASD_NL_REG_TIME_COBLKS_HELPER > 0 */

  if (reg_rootp)
    *reg_rootp = re->reg_id;
  NASD_ODC_WUNLOCK_BLOCK_DATA(reg_ent);

  rc = nasd_nl_reg_release_regent(reg_ent);
  if (rc) {
    NASD_PANIC();
  }
#if NASD_NL_REG_TIME_COBLKS_HELPER > 0
  NASD_TM_STOP(&tm2);
  NASD_TM_ELAPSED_TS(&tm2, &ts);
  NASD_ATOMIC_TIMESPEC_ADD(&nasd_nl_reg_get_coblks_helper_o2_time, &ts);
#endif /* NASD_NL_REG_TIME_COBLKS_HELPER > 0 */

done_get_coblks_helper:

#if NASD_NL_REG_TIME_COBLKS_HELPER > 0
  NASD_TM_STOP(&tm);
  NASD_TM_ELAPSED_TS(&tm, &ts);
  NASD_ATOMIC_TIMESPEC_ADD(&nasd_nl_reg_get_coblks_helper_time, &ts);
#endif /* NASD_NL_REG_TIME_COBLKS_HELPER > 0 */

  return(rc);
}

/*
 * Call with REG_MAP lock held.
 *
 * Grab new regions on behalf of nasd_nl_reg_get_coblks().
 * In a separate function so we can reuse for different
 * steps in optimized layout.
 */
nasd_status_t
nasd_nl_reg_get_coblks_regions(
  int                  partnum,
  nasd_nl_reg_id_t     reg_root,
  nasd_nl_reg_id_t     post_regid, /* what region we'd like to locate after */
  nasd_blkcnt_t        nblocks,
  int                  whole_regs_only,
  int                  node_only,
  nasd_nl_reg_map_t  **bucketp,
  nasd_odc_exlist_t   *exlist,
  nasd_blkno_t        *la_blkp)
{
  nasd_blkcnt_t rel_cnt, blocks_got, nr, need;
  nasd_odc_exlist_ent_t *exle, *re_exle;
  nasd_nl_reg_region_t *re, *regents;
  nasd_blkno_t re_first, re_last;
  nasd_odc_ent_t *reg_ent;
  nasd_nl_reg_id_t regid;
  nasd_nl_reg_map_t *me;
  nasd_status_t rc, rc2;
  nasd_blkno_t re_bid;
  int reg_slot, i;

  re_exle = NULL;
  if (whole_regs_only) {
    need = nblocks - exlist->num ;
    nr = need / nasd_od_region_blocks;
    if ((need % nasd_od_region_blocks)
      >= (nasd_od_region_blocks / nasd_nl_reg_wholereg_onemore_frac))
    {
      nr++;
    }
    rc = nasd_odc_exlist_get_contig(&nasd_nl_reg_unused_regions,
      post_regid, nr, &re_exle);
    if (rc)
      re_exle = NULL;
    /*
     * re_exle is now a non-NULL list of regions that we should
     * "prefer", because they're contiguous
     */
  }

  regid = reg_root;

  while(exlist->num < nblocks) {
    if (whole_regs_only) {
      if ((nblocks - exlist->num)
        < (nasd_od_region_blocks / nasd_nl_reg_wholereg_onemore_frac))
      {
        break;
      }
    }
    if (re_exle) {
      /*
       * We got "preferred" regions above, so use them.
       */
      rc = NASD_SUCCESS;
      regid = re_exle->range.first;
      NASD_ASSERT(re_exle->range.first <= re_exle->range.last);
      if (re_exle->range.first == re_exle->range.last) {
        /* this was the last one, get rid of it */
        re_exle->next = NULL;
        nasd_odc_release_extent_list(re_exle);
        re_exle = NULL;
      }
      else {
        re_exle->range.first++;
      }
      re_bid = regid;
    }
    else {
      /*
       * The first time through here, regid is reg_root. After that,
       * it's the last region we looked at. This helps maximize
       * sequentiality.
       */
      rc = nasd_odc_exlist_get_oneblock(&nasd_nl_reg_unused_regions,
        regid, &re_bid);
    }
    if (rc) {
      if (rc != NASD_NO_SPACE) {
        NASD_ASSERT(re_exle == NULL);
        return(rc);
      }
      /*
       * No more unused regions, stack into existing
       * regions.
       */
      break;
    }

    /*
     * re_bid is a new region that we'll break in
     * Start putting blocks there.
     */
    regid = re_bid;
    nasd_nl_reg_range(regid, &re_first, &re_last);
    *la_blkp = re_last;

    NASD_FREELIST_GET(nasd_nl_reg_map_freelist,me,next,
      (nasd_nl_reg_map_t *));
    if (me == NULL) {
      if (re_exle) {
        rc2 = nasd_odc_free_release_blocks_to_list(&nasd_nl_reg_unused_regions,
          re_exle, &rel_cnt);
        if (rc2) {
          NASD_PANIC();
        }
      }
      return(NASD_NO_MEM);
    }

    rc = nasd_odc_free_get_range_bounded_partial(nblocks-exlist->num,
      re_first, re_last, &exle, &blocks_got);
    if (rc) {
      NASD_FREELIST_FREE(nasd_nl_reg_map_freelist,me,next);
      if (re_exle) {
        rc2 = nasd_odc_free_release_blocks_to_list(&nasd_nl_reg_unused_regions,
          re_exle, &rel_cnt);
        if (rc2) {
          NASD_PANIC();
        }
      }
      return(rc);
    }

    /*
     * Now merge this new range with what we've already got in exlist
     */
    rc = nasd_odc_free_release_blocks_nolock(exlist, exle, &rel_cnt);
    if (rc) {
      /*
       * Should not be possible for this to fail- we're providing all
       * the necessary memory, and the list is empty, so there's nothing
       * to do but swizzle pointers.
       */
      NASD_PANIC();
    }
    NASD_ASSERT(blocks_got == rel_cnt);
    rc = nasd_nl_reg_get_regent(regid, &reg_ent, &reg_slot);
    if (rc) {
      rc2 = nasd_odc_free_release_blocks(exle, &rel_cnt);
      if (rc2 != NASD_SUCCESS) {
        NASD_PANIC();
      }
      NASD_ASSERT(rel_cnt == blocks_got);
      NASD_FREELIST_FREE(nasd_nl_reg_map_freelist,me,next);
      if (re_exle) {
        rc2 = nasd_odc_free_release_blocks_to_list(&nasd_nl_reg_unused_regions,
          re_exle, &rel_cnt);
        if (rc2) {
          NASD_PANIC();
        }
      }
      return(rc);
    }

    NASD_ODC_WLOCK_BLOCK_DATA(reg_ent);

    NASD_ODC_LOCK_BLOCK(reg_ent);
    NASD_NL_REG_VALIDATE_NONBUSY_NOLOCK(reg_ent);
    nasd_odc_dirty_ent(reg_ent);
    NASD_ODC_UNLOCK_BLOCK(reg_ent);

    /*
     * Mark use in region
     */
    regents = (nasd_nl_reg_region_t *)reg_ent->data.buf;
    re = &regents[reg_slot];
    NASD_ASSERT(re->reg_id == 0);
    re->reg_id = reg_root;
    if (regid == nasd_nl_reg_last_reg)
      re->reg_freeblocks = nasd_nl_reg_last_reg_size - blocks_got;
    else
      re->reg_freeblocks = nasd_od_region_blocks - blocks_got;
    if (node_only) {
      NASD_ASSERT(blocks_got == 1);
      re->reg_iblks = blocks_got;
      re->reg_dblks = 0;
    }
    else {
      re->reg_iblks = 0;
      re->reg_dblks = blocks_got;
    }
    re->reg_unrelated = 0;

    NASD_ODC_WUNLOCK_BLOCK_DATA(reg_ent);

    rc = nasd_nl_reg_release_regent(reg_ent);
    if (rc) {
      NASD_PANIC();
    }

    /*
     * Now hash into reverse lookup
     */
    me->base = reg_root;
    me->ent_ids[0] = regid;
    for(i=1;i<NASD_NL_REG_MAP_BLOB;i++)
      me->ent_ids[i] = 0;
    me->next = *bucketp;
    *bucketp = me;

    NASD_ASSERT(exlist->num <= nblocks);
  }

  NASD_ASSERT(re_exle == NULL);
  return(NASD_SUCCESS);
}

/*
 * Given a region id, return the corresponding root id
 */
nasd_status_t
nasd_nl_reg_get_root(
  nasd_nl_reg_id_t   regid,
  nasd_nl_reg_id_t  *reg_rootp)
{
  nasd_nl_reg_region_t *re, *regents;
  nasd_odc_ent_t *reg_ent;
  nasd_status_t rc;
  int reg_slot;

  rc = nasd_nl_reg_get_regent(regid, &reg_ent, &reg_slot);
  if (rc)
    return(rc);

  NASD_ODC_RLOCK_BLOCK_DATA(reg_ent);

  NASD_NL_REG_VALIDATE_NONBUSY(reg_ent);

  regents = (nasd_nl_reg_region_t *)reg_ent->data.buf;
  re = &regents[reg_slot];
  NASD_ASSERT(re->reg_id != 0);
  *reg_rootp = re->reg_id;

  NASD_ODC_RUNLOCK_BLOCK_DATA(reg_ent);

  rc = nasd_nl_reg_release_regent(reg_ent);
  return(rc);
}

/*
 * Given a block, attempt to allocate some blocks in
 * the same region, or regions associated with the
 * region (associating new regions if necessary and
 * possible).
 */
nasd_status_t
nasd_nl_reg_get_coblks(
  int                      partnum,
  nasd_blkno_t             co_blk,
  nasd_blkcnt_t            nblocks,
  int                      node_only,
  nasd_odc_exlist_ent_t  **exle_p,
  nasd_blkcnt_t           *blocks_allocated_p)
{
  nasd_odc_exlist_ent_t *exle, *exle_oldfirst, *exle_newfirst;
  nasd_odc_exlist_ent_t *exle_r, *exle_tmp, *exle_oldlast;
  nasd_nl_reg_map_t **bucketp, *me, *me_p, *me_next;
  nasd_nl_reg_id_t regid, reg_root, looked_last;
  nasd_blkno_t re_first, re_last, la_blk;
  nasd_blkcnt_t rel_cnt, blocks_got;
  nasd_odc_exlist_t *exlist;
  nasd_status_t rc, rc2;
  int h, i, f;

  la_blk = co_blk;
  nasd_nl_reg_blkno_to_regid(co_blk, &regid);

  rc = nasd_odc_exlist_get(&exlist);
  if (rc)
    return(rc);

  /*
   * If we want more blocks than will fit in a single
   * region, then start by grabbing whole regions.
   */
  if (nblocks >= nasd_od_region_blocks) {
    NASD_ASSERT(node_only == 0);
    rc = nasd_nl_reg_get_root(regid, &reg_root);
    if (rc)
      goto bail;

    h = NASD_NL_REG_MAP_HASH(reg_root);
    bucketp = &nasd_nl_reg_map_buckets[h];

    LOCK_REG_MAP();
    rc = nasd_nl_reg_get_coblks_regions(partnum, reg_root, regid, nblocks,
      1, 0, bucketp, exlist, &la_blk);
    UNLOCK_REG_MAP();
    if (rc)
      goto bail;
  }

  NASD_ASSERT(exlist->num <= nblocks);
  if (exlist->num == nblocks)
    goto got_enough;

  /*
   * Look in this region to see if there's
   * enough blocks to satisfy us.
   */
  looked_last = regid;
  rc = nasd_nl_reg_get_coblks_helper(partnum, regid,
    nblocks, node_only, exlist, &reg_root);
  if (rc)
    goto bail;

  NASD_ASSERT(exlist->num <= nblocks);
  if (exlist->num == nblocks)
    goto got_enough;

  /*
   * We didn't find enough blocks yet to satisfy the
   * allocation. If the region we just looked in (regid)
   * is not the "root" region for this identity set,
   * then look there.
   */
  if (regid != reg_root) {
    looked_last = reg_root;
    rc = nasd_nl_reg_get_coblks_helper(partnum, reg_root,
      nblocks, node_only, exlist, &reg_root);
    if (rc)
      goto bail;
  }

  NASD_ASSERT(exlist->num <= nblocks);
  if (exlist->num == nblocks)
    goto got_enough;

  /*
   * We still need blocks. The region we started in didn't
   * satisfy us. Nor did the root region (if that's a
   * different one). Now search the hash table for related
   * regions. We'll find them in hash table order, but the
   * extent list operations will correctly resort all the
   * blocks we find into a sequential ordering in exlist
   * when we use the release_blocks_nolock operation. This
   * is pretty psychologically abusive, but really all the
   * freelist stuff is an extent list manager, and we want
   * to manage a list of extents, so that's the code we use.
   *
   * Searching the hash table, we'll grab blocks from a region
   * anytime we come across a candidate region that has free
   * blocks. We have to actually go lookup the region map block
   * for each candidate region to get the locking protocol right
   * (consider that we might be talking about an unused map block,
   * or we might be talking about a map block that two threads are
   * reading, one is modifying, and another is writing back to the
   * disk).
   */
  LOCK_REG_MAP();

  h = NASD_NL_REG_MAP_HASH(reg_root);
  bucketp = &nasd_nl_reg_map_buckets[h];
  for(me_p=NULL,me=*bucketp;me&&(exlist->num<nblocks);me=me_next) {
    me_next = me->next;
    if (me->base == reg_root) {
      for(f=i=0;(i<NASD_NL_REG_MAP_BLOB)&&(exlist->num<nblocks);i++) {
        if (me->ent_ids[i]) {
          f++;
          regid = me->ent_ids[i];
          /* Try to get some blocks from the indicated region. */
          looked_last = regid;
          rc = nasd_nl_reg_get_coblks_helper(partnum, regid,
            nblocks, node_only, exlist, NULL);
          if (rc) {
            UNLOCK_REG_MAP();
            goto bail;
          }
          NASD_ASSERT(exlist->num <= nblocks);
          if (exlist->num == nblocks)
            break;
          nasd_nl_reg_range(regid, &re_first, &re_last);
          la_blk = re_last;
        }
      }
      if (f == 0) {
        /*
         * No entries in this unit. Garbage-collect it so save
         * future work.
         */
        if (me_p)
          me_p->next = me_next;
        else
          *bucketp = me_next;
        NASD_FREELIST_FREE(nasd_nl_reg_map_freelist,me,next);
      }
      else {
        me_p = me;
      }
    }
  }

  NASD_ASSERT(exlist->num <= nblocks);
  if (exlist->num == nblocks) {
    UNLOCK_REG_MAP();
    goto got_enough;
  }

  /*
   * No associated regions have enough space. Break into unused regions.
   */
  rc = nasd_nl_reg_get_coblks_regions(partnum, reg_root, looked_last, nblocks,
    0, node_only, bucketp, exlist, &la_blk);
  if (rc) {
    UNLOCK_REG_MAP();
    goto bail;
  }

  UNLOCK_REG_MAP();

  NASD_ASSERT(exlist->num <= nblocks);
  if (exlist->num == nblocks)
    goto got_enough;

  /*
   * Still want more blocks. No space in existing regions that are
   * part of the logical association. No empty regions. Stack 'em
   * into other regions.
   */
  rc = nasd_odc_free_get_range(nblocks-exlist->num, la_blk, &exle,
    &blocks_got);
  if (rc == NASD_SUCCESS) {
    rc = nasd_nl_reg_mark_extent_regions(exle, NASD_NL_REG_MARK_UREL, 1);
    if (rc) {
      NASD_PANIC();
    }
    rc = nasd_odc_free_release_blocks_nolock(exlist, exle, &rel_cnt);
    if (rc) {
      /*
       * Should not be possible for this to fail- we're providing all
       * the necessary memory, and the list is empty, so there's nothing
       * to do but swizzle pointers.
       */
      NASD_PANIC();
    }
    NASD_ASSERT(blocks_got == rel_cnt);
  }
  else if (rc == NASD_NO_SPACE) {
    /*
     * No more space. This isn't a failure, we return
     * what we've got.
     */
    rc = NASD_SUCCESS;
  }

got_enough:
  rc = NASD_SUCCESS;

bail:
  if (exlist->head.next != &exlist->head) {
    exle_oldlast = exlist->head.prev;
    exlist->head.prev->next = NULL;
    exlist->head.next->prev = NULL;
    exle = exlist->head.next;
    if (rc == NASD_SUCCESS) {
      /*
       * Okay, remember how we used the exlist to sort the results?
       * Well, now we're going to go and disarrange them so that the
       * first block in the list we return is the first block in the
       * sorted list occurring at or after co_blk, and anything in
       * the list before that is appended. This will assist the I/O
       * elevator.
       */
      exle_oldfirst = exle;
      exle_newfirst = exle;
      for(exle_r=exle_oldfirst;exle_r;exle_r=exle_r->next) {
        if (exle_r->range.first >= co_blk) {
          exle_newfirst = exle_r;
          break;
        }
        if (exle_r->range.last >= co_blk) {
          NASD_ASSERT(exle_r->range.first < co_blk);
          rc = nasd_odc_get_extent_list(&exle_tmp);
          if (rc)
            goto bail2;
          exle_tmp->range.first = co_blk;
          exle_tmp->range.last = exle_r->range.last;
          exle_r->range.last = co_blk - 1;
          exle_tmp->next = exle_r->next;
          exle_r->next = exle_tmp;
          break;
        }
      }
      if (exle_newfirst != exle_oldfirst) {
        exle_newfirst->prev->next = NULL;
        exle_newfirst->prev = NULL;
        exle_oldlast->next = exle_oldfirst;
        exle_oldfirst->prev = exle_oldlast;
      }
    }
  }
  else {
    exle = NULL;
  }
bail2:
  if (rc) {
    if (exle && (exle != &exlist->head)) {
      /*
       * Fix up bogusly updated region map entries.
       */
      rc2 = nasd_nl_reg_mark_extent_regions(exle,
        (node_only ? NASD_NL_REG_MARK_NODE : NASD_NL_REG_MARK_DATA), -1);
      if (rc2) {
        NASD_PANIC();
      }
      /*
       * Give whatever blocks we grabbed back to the free
       * block manager.
       */
      rc2 = nasd_odc_free_release_blocks(exle, &rel_cnt);
      if (rc2) {
        NASD_PANIC();
      }
      NASD_ASSERT(rel_cnt == exlist->num);
    }
  }
  else {
    *exle_p = exle;
    *blocks_allocated_p = exlist->num;
  }

  nasd_odc_exlist_free(exlist);

  return(rc);
}

/*
 * "Reparent" all regions associated with regid to an arbitrary
 * non-empty member of the grouping, rehash them all in the hash
 * table, and update the on-disk region map blocks.
 */
nasd_status_t
nasd_nl_reg_reparent_reg(
  nasd_nl_reg_id_t  old_regid)
{
  nasd_nl_reg_map_t **bucketp, *me, *me_p, *me_next, *me_l, *me_l_tail;
  nasd_nl_reg_region_t *re, *regents;
  nasd_nl_reg_id_t new_root, regid;
  int h, i, found, reg_slot;
  nasd_odc_ent_t *reg_ent;
  nasd_status_t rc;

  NASD_ASSERT(old_regid != 0);
  h = NASD_NL_REG_MAP_HASH(old_regid);
  bucketp = &nasd_nl_reg_map_buckets[h];
  me_l = me_l_tail = NULL;
  /*
   * Run through the list. Stack up what we find on me_l.
   */
  for(me_p=NULL,me=*bucketp;me;me=me_next) {
    me_next = me->next;
    if (me->base != old_regid) {
      me_p = me;
      continue;
    }
    if (me_p)
      me_p->next = me->next;
    else
      *bucketp = me->next;
    for(found=i=0;i<NASD_NL_REG_MAP_BLOB;i++) {
      if (me->ent_ids[i])
        found++;
    }
    if (found == 0) {
      /* empty entry, garbage-collect it */
      NASD_FREELIST_FREE(nasd_nl_reg_map_freelist,me,next);
      continue;
    }
    me->next = NULL;
    if (me_l_tail == NULL) {
      NASD_ASSERT(me_l == NULL);
      me_l = me_l_tail = me;
    }
    else {
      me_l_tail->next = me;
      me_l_tail = me;
    }
  }

  /*
   * Pick one to be our new root.
   */
  new_root = 0;
  for(i=0;i<NASD_NL_REG_MAP_BLOB;i++) {
    if (me_l_tail->ent_ids[i]) {
      new_root = me_l->ent_ids[i];
      break;
    }
  }
  NASD_ASSERT(new_root != 0);

  /*
   * Load up all the map entries and remark them
   * with their new root.
   */
  for(me=me_l;me;me=me->next) {
    for(i=0;i<NASD_NL_REG_MAP_BLOB;i++) {
      regid = me->ent_ids[i];
      if (regid == 0)
        continue;
      rc = nasd_nl_reg_get_regent(regid, &reg_ent, &reg_slot);
      if (rc) {
        NASD_PANIC();
      }

      NASD_ODC_WLOCK_BLOCK_DATA(reg_ent);

      NASD_NL_REG_VALIDATE_NONBUSY(reg_ent);

      regents = (nasd_nl_reg_region_t *)reg_ent->data.buf;
      re = &regents[reg_slot];
      NASD_ASSERT(re->reg_id == old_regid);
      NASD_ASSERT(new_root != 0);
      re->reg_id = new_root;

      NASD_ODC_WUNLOCK_BLOCK_DATA(reg_ent);

      rc = nasd_nl_reg_release_regent(reg_ent);
      if (rc) {
        NASD_PANIC();
      }
    }
  }

  /*
   * Finally, drop all these map references back
   * into the map hash table, in new buckets.
   */
  h = NASD_NL_REG_MAP_HASH(new_root);
  bucketp = &nasd_nl_reg_map_buckets[h];
  me_l_tail->next = *bucketp;
  *bucketp = me_l;

  return(NASD_SUCCESS);
}

/*
 * Iterate through the extent list, updating region map
 * entries to indicate that blocks of the appropriate type
 * are going in/out of use.
 *
 * This is probably one of the most dense pieces of code
 * in this module. This density is largely a set of
 * optimizations.
 */
nasd_status_t
nasd_nl_reg_mark_extent_regions(
  nasd_odc_exlist_ent_t  *exle,
  int                     mark_what,
  int                     mark_dir)
{
  nasd_blkno_t re_first, re_last, cur_blk, l_blk, map_blk;
  nasd_nl_reg_map_t **bucketp, *me, *me_p, *me_next;
  int reg_slot, h, i, found, keep, j, adv_me;
  nasd_nl_reg_region_t *re, *regents;
  nasd_nl_reg_id_t regid, re_root;
  nasd_odc_exlist_ent_t *e;
  nasd_odc_ent_t *reg_ent;
  nasd_status_t rc, rc2;
  nasd_blkcnt_t mk_blk;

  reg_ent = NULL;
  regid = 0;
  rc = NASD_SUCCESS;
  regents = NULL;

  /*
   * The premise: run through the list of extents. At each
   * entry, run through it mapping it to individual regions,
   * then update that region map entry. We only re-grab
   * region map blocks when necessary.
   */
  for(e=exle;e;e=e->next) {
    cur_blk = e->range.first;
    l_blk = e->range.last;
    /*
     * Iterate through this extent in the list.
     */
    while(cur_blk <= l_blk) {
      /* figure out what region we're in */
      nasd_nl_reg_blkno_to_regid(cur_blk, &regid);
      nasd_nl_reg_region_to_slot(regid, &map_blk, &reg_slot);
      if ((reg_ent == NULL) || (reg_ent->blkno != map_blk)) {
        /*
         * Either we're not holding a region map block, or we're
         * holding the wrong one.
         */
        if (reg_ent) {
          /* wrong one- unlock and release */
          NASD_ODC_WUNLOCK_BLOCK_DATA(reg_ent);
          rc = nasd_nl_reg_release_regent(reg_ent);
          if (rc) {
            NASD_PANIC();
          }
        }
        /*
         * Get the right one
         */
        rc = nasd_odc_block_get(NULL, map_blk,
          NASD_ODC_L_FORCE|NASD_ODC_L_BLOCK|NASD_ODC_L_LOAD,
          &reg_ent, NASD_ID_NULL, 0, NASD_ODC_T_LAYOUT_STATIC, NULL);
        if (rc) {
          NASD_PANIC();
        }
        NASD_ODC_WLOCK_BLOCK_DATA(reg_ent);
        NASD_ODC_LOCK_BLOCK(reg_ent);
        NASD_NL_REG_VALIDATE_NONBUSY_NOLOCK(reg_ent);
        nasd_odc_dirty_ent(reg_ent);
        NASD_ODC_UNLOCK_BLOCK(reg_ent);
        regents = (nasd_nl_reg_region_t *)reg_ent->data.buf;
      }
      /*
       * Now we have the correct map block, and reg_slot is the
       * slot of the region we're looking at. Figure out the
       * range of blocks represented by this region, and mark
       * whatever region of blocks is covered by the range
       * cur_blk..MIN(l_blk,re_last) (l_blk is last block in
       * extent, re_last is last block in region).
       */
      re = &regents[reg_slot];
      if (re->reg_id == 0) {
        nasd_printf("unexpected reg_id 0\n");
        nasd_printf("map_blk=%u reg_ent=0x%lx regents=0x%lx reg_slot=%d\n",
          map_blk, (unsigned long)reg_ent, (unsigned long)regents, reg_slot);
        nasd_printf("cur_blk=%u regid=%u\n", cur_blk, regid);
        nasd_printf("nasd_od_blocks=%u nasd_nl_reg_clump_regs=%d "
          "nasd_nl_reg_cnt=%d\n",
          nasd_od_blocks, nasd_nl_reg_clump_regs, nasd_nl_reg_cnt);
        nasd_printf("nasd_nl_reg_clump_cnt=%d nasd_nl_reg_clump_blocks=%d\n",
          nasd_nl_reg_clump_cnt, nasd_nl_reg_clump_blocks);
        nasd_printf("nasd_nl_reg_last_reg=%u nasd_nl_reg_last_reg_size=%d\n",
          nasd_nl_reg_last_reg, nasd_nl_reg_last_reg_size);
      }
      NASD_ASSERT(re->reg_id != 0);
      nasd_nl_reg_range(regid, &re_first, &re_last);

      /*
       * Compute that MIN
       * mk_blk is # of blocks covered
       * update cur_blk
       */
      if (l_blk <= re_last) {
        mk_blk = l_blk - cur_blk + 1;
        cur_blk = l_blk + 1;
      }
      else {
        mk_blk = re_last - cur_blk + 1;
        cur_blk = re_last + 1;
      }

      if (mark_what == NASD_NL_REG_MARK_NODE) {
        if (mark_dir > 0) {
          re->reg_iblks += mk_blk;
          re->reg_freeblocks -= mk_blk;
        }
        else {
          re->reg_iblks -= mk_blk;
          re->reg_freeblocks += mk_blk;
        }
      }
      else if (mark_what == NASD_NL_REG_MARK_DATA) {
        if (mark_dir > 0) {
          re->reg_dblks += mk_blk;
          re->reg_freeblocks -= mk_blk;
        }
        else {
          re->reg_dblks -= mk_blk;
          re->reg_freeblocks += mk_blk;
        }
      }
      else if (mark_what == NASD_NL_REG_MARK_UREL) {
        if (mark_dir > 0) {
          re->reg_unrelated += mk_blk;
          re->reg_freeblocks -= mk_blk;
        }
        else {
          re->reg_unrelated -= mk_blk;
          re->reg_freeblocks += mk_blk;
        }
      }
      else {
        NASD_PANIC();
      }

      NASD_ASSERT(re->reg_freeblocks <= nasd_od_region_blocks);
      if (re->reg_freeblocks == nasd_od_region_blocks) {
        /*
         * This region is now empty.
         */

        LOCK_REG_MAP();

        NASD_ASSERT(re->reg_id != 0);
        re_root = re->reg_id;
        re->reg_id = 0;
        h = NASD_NL_REG_MAP_HASH(re_root);
        bucketp = &nasd_nl_reg_map_buckets[h];
        found = 0;
        keep = 0;
        if (re_root == regid) {
          /*
           * This is the root region of an associated region set.
           * See if we have any associated regions.
           */
          for(me_p=NULL,me=*bucketp;me;me=me_next) {
            adv_me = 1;
            me_next = me->next;
            if (me->base == re_root) {
              for(j=0;j<NASD_NL_REG_MAP_BLOB;j++) {
                if (me->ent_ids[j]) {
                  found++;
                  break;
                }
              }
              if (found)
                break;
              /* empty entry, garbage-collect it */
              if (me_p)
                me_p->next = me->next;
              else
                *bucketp = me->next;
              adv_me = 0;
              NASD_FREELIST_FREE(nasd_nl_reg_map_freelist,me,next);
            }
            if (adv_me)
              me_p = me;
          }
          if (found == 0) {
            /*
             * No associated regions. We can just go to the
             * region freelist.
             */
            found = 1;
          }
          else {
            /*
             * There are still regions for which we're the 'root.'
             *
             * "Reparent" them all to an arbitrary non-empty member
             * of the grouping, rehash them all in the hash table,
             * and update the on-disk region map blocks.
             */
            rc = nasd_nl_reg_reparent_reg(re_root);
            if (rc) {
              NASD_PANIC();
            }
          }
        }
        else {
          /*
           * This is not the root region for a set of
           * related regions. Disassociate it
           * from its root, and put it in the empty
           * regions list.
           */
          for(me=*bucketp;me;me=me->next) {
            if (me->base != re_root)
              continue;
            for(i=0;i<NASD_NL_REG_MAP_BLOB;i++) {
              if (me->ent_ids[i] == regid) {
                found = 1;
                me->ent_ids[i] = 0;
              }
            }
          }
        }

        UNLOCK_REG_MAP();

        NASD_ASSERT(found > 0);
        /*
         * There are no longer any regions associated with
         * this one (any associations have been removed).
         * Mark it free.
         */
        rc = nasd_odc_exlist_release_oneblock(&nasd_nl_reg_unused_regions,
          regid);
        if (rc)
          goto bail;
      } /* re->reg_freeblocks == nasd_od_region_blocks */
    } /* cur_blk <= l_blk */
  }

bail:
  if (reg_ent) {
    /* wrong one- unlock and release */
    NASD_ODC_WUNLOCK_BLOCK_DATA(reg_ent);
    rc2 = nasd_nl_reg_release_regent(reg_ent);
    if (rc2) {
      NASD_PANIC();
    }
  }

  return(rc);
}

/*
 * Since we don't support preallocation, all we
 * have to do is free up the block for the
 * inode and update our state.
 */
nasd_status_t
nasd_nl_reg_node_fail_create(
  int                     partnum,
  nasd_odc_exlist_ent_t  *exle,
  nasd_odc_exlist_ent_t  *pre_exle)
{
  nasd_blkcnt_t rel_cnt;
  nasd_status_t rc;

  NASD_ASSERT(pre_exle == NULL);

  rc = nasd_nl_reg_mark_extent_regions(exle, NASD_NL_REG_MARK_NODE, -1);
  if (rc)
    return(rc);

  rc = nasd_odc_free_release_blocks(exle, &rel_cnt);
  if (rc)
    return(rc);

  NASD_ASSERT(rel_cnt == 1);
  return(NASD_SUCCESS);
}

/*
 * One block is being released. If layout_handle is
 * non-zero, it indicates that we got here through a
 * ref operation triggered elsewhere in this module,
 * and we should update our region ranges accordingly.
 */
nasd_status_t
nasd_nl_reg_release_oneblock(
  int            partnum,
  nasd_blkno_t   blknum,
  void          *layout_handle)
{
  nasd_odc_exlist_ent_t ele;
  nasd_status_t rc;
  int lh;

  lh = (int)((unsigned long)layout_handle);

  bzero((char *)&ele, sizeof(ele));
  ele.range.first = ele.range.last = blknum;

  if (lh == 0) {
    rc = nasd_nl_reg_mark_extent_regions(&ele, NASD_NL_REG_MARK_DATA, -1);
  }
  else {
    rc = nasd_nl_reg_mark_extent_regions(&ele, lh, -1);
  }

  if (rc)
    return(rc);

  rc = nasd_odc_free_release_oneblock(blknum);
  return(rc);
}

/*
 * Blocks are being released. If layout_handle is
 * non-zero, it indicates that we got here through a
 * ref operation triggered elsewhere in this module,
 * and we should update our region ranges accordingly.
 */
nasd_status_t
nasd_nl_reg_release_blocks(
  int                     partnum,
  nasd_odc_exlist_ent_t  *exle,
  nasd_blkcnt_t          *blocks_released_p,
  void                   *layout_handle)
{
  nasd_status_t rc;
  int lh;

  lh = (int)((unsigned long)layout_handle);

  if (lh == 0) {
    rc = nasd_nl_reg_mark_extent_regions(exle, NASD_NL_REG_MARK_DATA, -1);
  }
  else {
    rc = nasd_nl_reg_mark_extent_regions(exle, lh, -1);
  }

  if (rc)
    return(rc);

  rc = nasd_odc_free_release_blocks(exle, blocks_released_p);
  return(rc);
}

/*
 * Call with partition write lock held.
 *
 * We don't support preallocation, so
 * 
 */
nasd_status_t
nasd_nl_reg_adj_prealloc(
  int                              partnum,
  nasd_odc_ent_t                  *ne,
  nasd_odc_prealloc_adj_handle_t  *pah,
  int                              len_changed)
{
  nasd_od_node_t *np;

  np = ne->data.node;

  NASD_ASSERT(np->prealloc_ex.first == 0);
  NASD_ASSERT(np->prealloc_ex.last == 0);

  return(NASD_SUCCESS);
}

/*
 * Give up the range of blocks assigned as the
 * preallocation range.
 */
nasd_status_t
nasd_nl_reg_surrender_prealloc(
  int              partnum,
  nasd_odc_ent_t  *ne)
{
  nasd_odc_exlist_ent_t *fe;
  nasd_blkcnt_t rel_cnt;
  nasd_od_node_t *np;
  nasd_status_t rc;

  np = ne->data.node;

  if (np->prealloc_ex.first) {
    /* we shouldn't ever get here */
    nasd_printf("REG WARNING: prealloc found in surrender_prealloc\n");
    NASD_ASSERT(np->prealloc_ex.last >= np->prealloc_ex.first);
    rc = nasd_odc_get_extent_list(&fe);
    if (rc)
      return(rc);
    fe->range = np->prealloc_ex;
    np->prealloc_ex.first = np->prealloc_ex.last = 0;
    rc = nasd_nl_reg_release_blocks(partnum, fe, &rel_cnt, NULL);
    if (rc)
      NASD_PANIC();
  }
  else {
    NASD_ASSERT(np->prealloc_ex.last == 0);
  }

  return(NASD_SUCCESS);
}

/*
 * Delete the inode occupying block nb.
 */
nasd_status_t
nasd_nl_reg_node_deleting(
  int              partnum,
  nasd_odc_ent_t  *ne)
{
  nasd_odc_exlist_ent_t *fe;
  nasd_odc_icpart_t *icp;
  nasd_od_part_t *part;
  nasd_status_t rc;
  nasd_blkno_t nb;

  nb = ne->blkno;

  part = &PART(partnum);
  icp = &nasd_odc_state->parts[partnum];

  rc = nasd_odc_get_extent_list(&fe);
  if (rc)
    return(rc);

  nasd_odc_block_eject(ne);
  nasd_odc_block_release(ne);

  fe->range.first = fe->range.last = nb;
  fe->next = NULL;

  NASD_ODC_ICPART_LOCK_WRITE(icp);
  rc = nasd_odc_ref_ranges(partnum, fe, -1, (void *)NASD_NL_REG_MARK_NODE,
    NASD_ODC_REF_NOFLAGS);
  NASD_ODC_ICPART_UNLOCK_WRITE(icp);

  if (rc)
    NASD_PANIC();

  nasd_odc_release_extent_list(fe);

  return(NASD_SUCCESS);
}

/*
 * Call with partition write lock held.
 * Call with object lock held.
 */
nasd_status_t
nasd_nl_reg_alloc_blocks(
  int                      partnum,
  nasd_odc_ent_t          *ne,
  nasd_blkcnt_t            needblks,
  nasd_blkno_t             blk_hint,
  nasd_odc_exlist_ent_t  **exle_p,
  nasd_blkcnt_t           *blocks_allocated_p)
{
  nasd_status_t rc;

  rc = nasd_nl_reg_get_coblks(partnum, blk_hint, needblks,
    0, exle_p, blocks_allocated_p);
  return(rc);
}

void
nasd_nl_reg_dump_re(
  nasd_nl_reg_region_t  *re)
{
  unsigned int fb, ib, db, ub, sum, isgood;

  fb = (unsigned int)re->reg_freeblocks;
  ib = (unsigned int)re->reg_iblks;
  db = (unsigned int)re->reg_dblks;
  ub = (unsigned int)re->reg_unrelated;
  sum = fb + ib + db + ub;
  if (sum == nasd_od_region_blocks)
    isgood = 1;
  else
    isgood = 0;
  nasd_printf("regid %u freeblocks %u iblks %u dblks %u unrelated %u %s\n",
    re->reg_id, fb, ib, db, ub,
    (isgood ? "good" : "bad"));
  if (re->reg_id == 0)
    return;
  if (isgood == 0)
    NASD_PANIC();
}

#if NASD_NL_REG_SCOREBOARD > 0

void
nasd_nl_reg_shutdown_chash_freelist(
  void  *ignored)
{
  NASD_FREELIST_DESTROY(nasd_nl_reg_chash_freelist,c_next,
    (nasd_nl_reg_chash_ent_t *));
}

void
nasd_nl_reg_info_shutdown(
  nasd_nl_reg_info_t  *ri)
{
  nasd_nl_reg_chash_ent_t *ch, *next;
  int i;

  nasd_timeout_cancel(ri->tm_handle);
  ri->tm_handle = NASD_TIMEOUT_HANDLE_NULL;
  for(i=0;i<SCOREBOARD_HASH_WIDTH;i++) {
    for(ch=ri->chash[i];ch;ch=next) {
      next = ch->c_next;
      CH_FREE(ch);
    }
    ri->chash[i] = NULL;
  }
}

nasd_status_t
nasd_nl_reg_info_init(
  nasd_nl_reg_info_t  *ri)
{
  int i;

  /* already bzero'd */
  ri->tm_handle = NASD_TIMEOUT_HANDLE_NULL;

  return(NASD_SUCCESS);
}

void
nasd_nl_reg_shutdown_scoreboard(
  void  *ignored)
{
  nasd_nl_reg_id_t i;
  int j, sz;

  nasd_nl_reg_scoreboard_active = 0;

  sz = nasd_nl_reg_info_chunksize * sizeof(nasd_nl_reg_info_t);
  for(j=0;j<NASD_NL_REG_INFOS_CHUNKS-1;j++) {
    if (nasd_nl_reg_infos[j] == NULL)
      continue;
    for(i=0;i<nasd_nl_reg_info_chunksize;i++) {
      nasd_nl_reg_info_shutdown(&nasd_nl_reg_infos[j][i]);
    }
    NASD_Free(nasd_nl_reg_infos[j], sz);
    nasd_nl_reg_infos[j] = NULL;
  }
  NASD_ASSERT(j == (NASD_NL_REG_INFOS_CHUNKS-1));
  sz = nasd_nl_reg_info_last_chunksize * sizeof(nasd_nl_reg_info_t);
  if (nasd_nl_reg_infos[j]) {
    for(i=0;i<nasd_nl_reg_info_last_chunksize;i++) {
      nasd_nl_reg_info_shutdown(&nasd_nl_reg_infos[j][i]);
    }
    NASD_Free(nasd_nl_reg_infos[j], sz);
    nasd_nl_reg_infos[j] = NULL;
  }

#if NASD_NL_REG_SCOREBOARD_TIMERS > 0

#define DUMP_SB_TIMER(_t_) { \
  nasd_printf("  %8s: %2d:%09d\n", NASD_STRING(_t_), \
    nasd_nl_reg_scoreboard_timers._t_##_time.ts_sec, \
    nasd_nl_reg_scoreboard_timers._t_##_time.ts_nsec); \
}

  nasd_printf("Scoreboard timers:\n");
  DUMP_SB_TIMER(access);
  DUMP_SB_TIMER(timeout);
  if (nasd_nl_reg_scoreboard_timers.access_calls) {
    nasd_uint64 avg;
    avg = nasd_nl_reg_scoreboard_timers.access_regs /
      nasd_nl_reg_scoreboard_timers.access_calls;
    nasd_printf("  %" NASD_64u_FMT " access calls for %" NASD_64u_FMT
      " region entries, average %" NASD_64u_FMT "\n",
      nasd_nl_reg_scoreboard_timers.access_calls,
      nasd_nl_reg_scoreboard_timers.access_regs, avg);
  }
  else {
    nasd_printf("  0 access calls for 0 region entries, average 0\n");
  }

#endif /* NASD_NL_REG_SCOREBOARD_TIMERS > 0 */
}

nasd_status_t
nasd_nl_reg_scoreboard_init()
{
  nasd_nl_reg_id_t i, j;
  nasd_status_t rc;
  int k, sz;

  bzero((char *)&nasd_nl_reg_current_queue, sizeof(nasd_nl_reg_current_queue));
  nasd_nl_reg_current_queue.t_next = &nasd_nl_reg_current_queue;
  nasd_nl_reg_current_queue.t_prev = &nasd_nl_reg_current_queue;

#if NASD_NL_REG_SCOREBOARD_TIMERS > 0
  bzero((char *)&nasd_nl_reg_scoreboard_timers,
    sizeof(nasd_nl_reg_scoreboard_timers));
#endif /* NASD_NL_REG_SCOREBOARD_TIMERS > 0 */

  rc = nasd_mutex_init(&nasd_nl_reg_scoreboard_lock);
  if (rc)
    return(rc);
  rc = nasd_shutdown_mutex(nasd_odc_shutdown, &nasd_nl_reg_scoreboard_lock);
  if (rc) {
    return(rc);
  }

  NASD_FREELIST_CREATE(nasd_nl_reg_chash_freelist, NASD_NL_REG_MAX_FREE_CHASH,
    NASD_NL_REG_CHASH_INC, sizeof(nasd_nl_reg_chash_ent_t));
  if (nasd_nl_reg_chash_freelist == NULL)
    return(NASD_NO_MEM);
  NASD_FREELIST_PRIME(nasd_nl_reg_chash_freelist, NASD_NL_REG_CHASH_INITIAL,
    c_next, (nasd_nl_reg_chash_ent_t *));

  rc = nasd_shutdown_proc(nasd_odc_shutdown,
    nasd_nl_reg_shutdown_chash_freelist, NULL);
  if (rc) {
    nasd_nl_reg_shutdown_chash_freelist(NULL);
    return(rc);
  }

  sz = nasd_nl_reg_info_chunksize * sizeof(nasd_nl_reg_info_t);
  for(k=0;k<NASD_NL_REG_INFOS_CHUNKS-1;k++) {
    NASD_Malloc(nasd_nl_reg_infos[k], sz, (nasd_nl_reg_info_t *));
    if (nasd_nl_reg_infos[k] == NULL)
      return(NASD_NO_MEM);
    /* nasd_nl_reg_infos[k] is freed by nasd_nl_reg_shutdown_scoreboard */
    bzero((char *)nasd_nl_reg_infos[k], sz);
    for(i=0;i<nasd_nl_reg_info_chunksize;i++) {
      rc = nasd_nl_reg_info_init(&nasd_nl_reg_infos[k][i]);
      if (rc) {
        nasd_nl_reg_shutdown_scoreboard(NULL);
        return(rc);
      }
    }
  }
  NASD_ASSERT(k == (NASD_NL_REG_INFOS_CHUNKS-1));
  if (nasd_nl_reg_info_last_chunksize) {
    sz = nasd_nl_reg_info_last_chunksize * sizeof(nasd_nl_reg_info_t);
    NASD_Malloc(nasd_nl_reg_infos[k], sz, (nasd_nl_reg_info_t *));
    if (nasd_nl_reg_infos[k] == NULL)
      return(NASD_NO_MEM);
    /* nasd_nl_reg_infos[k] is freed by nasd_nl_reg_shutdown_scoreboard */
    bzero((char *)nasd_nl_reg_infos[k], sz);
    for(i=0;i<nasd_nl_reg_info_last_chunksize;i++) {
      rc = nasd_nl_reg_info_init(&nasd_nl_reg_infos[k][i]);
      if (rc) {
        nasd_nl_reg_shutdown_scoreboard(NULL);
        return(rc);
      }
    }
  }
  else {
    nasd_nl_reg_infos[k] = NULL;
  }

  nasd_nl_reg_scoreboard_active = 1;
  rc = nasd_shutdown_proc(nasd_odc_shutdown, nasd_nl_reg_shutdown_scoreboard,
    NULL);
  if (rc) {
    nasd_nl_reg_shutdown_scoreboard(NULL);
    return(rc);
  }

  return(NASD_SUCCESS);
}

void
nasd_nl_reg_init_ent(
  nasd_odc_ent_t  *ent)
{
  nasd_nl_reg_id_t reg_id;

  if (nasd_nl_reg_scoreboard_active == 0) {
    ent->reg_id = 0;
    return;
  }

  switch(ent->type) {
#if NASD_NL_REG_INCLUDE_NPT > 0
    case NASD_ODC_T_NPT:
    case NASD_ODC_T_ANON:
#endif /* NASD_NL_REG_INCLUDE_NPT > 0 */
    case NASD_ODC_T_NODE:
    case NASD_ODC_T_IND:
    case NASD_ODC_T_FREE:
    case NASD_ODC_T_DATA:
      nasd_nl_reg_blkno_to_regid(ent->blkno, &reg_id);
      ent->reg_id = reg_id;
      ent->reg_flags = NASD_ODC_R_VALID;
      break;
#if NASD_NL_REG_INCLUDE_NPT == 0
    case NASD_ODC_T_ANON:
      if (ent->blkno < nasd_nl_reg_first_clumpblk) {
        ent->reg_id = 0;
        ent->reg_flags = 0;
      }
      else {
        nasd_nl_reg_blkno_to_regid(ent->blkno, &reg_id);
        ent->reg_id = reg_id;
        ent->reg_flags = NASD_ODC_R_VALID;
      }
      break;
#endif /* NASD_NL_REG_INCLUDE_NPT == 0 */
    default:
      ent->reg_id = 0;
      ent->reg_flags = 0;
  }
}

void
nasd_nl_reg_ri_timeout(
  nasd_timeout_handle_t   tmh,
  void                   *arg1,
  void                   *arg2)
{
  nasd_timeout_status_t tmstat;
  nasd_nl_reg_info_t *ri;
  nasd_status_t rc;
#if NASD_NL_REG_SCOREBOARD_TIMERS > 0
  nasd_timespec_t ts;
  nasd_timer_t tm;
  nasd_uint64 cca;
#endif /* NASD_NL_REG_SCOREBOARD_TIMERS > 0 */

  if (nasd_nl_reg_scoreboard_active == 0) {
    /*
     * We're shutting down. Don't touch anything.
     */
    return;
  }

#if NASD_NL_REG_SCOREBOARD_TIMERS > 0
  NASD_TM_START(&tm);
#endif /* NASD_NL_REG_SCOREBOARD_TIMERS > 0 */

  ri = (nasd_nl_reg_info_t *)arg1;
  NASD_ASSERT(arg2 == NULL);

  LOCK_SCOREBOARD();

  rc = nasd_timeout_get_status(tmh, &tmstat);
  if (rc) {
    nasd_printf("DRIVE: could not get status for timeout in"
      " scoreboard timeout handler, error 0x%x (%s)\n",
      rc, nasd_error_string(rc));
    goto done;
  }

  NASD_ASSERT(tmstat&NASD_TIMEOUT_S_RUNNING);
  NASD_ASSERT(tmstat&NASD_TIMEOUT_S_KNOWN);
  if (tmstat&NASD_TIMEOUT_S_CANCELLED) {
    /* timeout cancelled */
    goto done;
  }

  NASD_ASSERT(ri->t_next != NULL);
  NASD_ASSERT(ri->t_prev != NULL);
  RI_DEQ(ri);

  if (ri->accessed == 0)
    ri->noaccess++;
  ri->accessed = 0;

done:
  UNLOCK_SCOREBOARD();

#if NASD_NL_REG_SCOREBOARD_TIMERS > 0
  NASD_TM_STOP(&tm);
  NASD_TM_ELAPSED_TS(&tm, &ts);
  NASD_ATOMIC_TIMESPEC_ADD(&nasd_nl_reg_scoreboard_timers.timeout_time, &ts);
#endif /* NASD_NL_REG_SCOREBOARD_TIMERS > 0 */
}

void
nasd_nl_reg_access_ent(
  nasd_odc_ent_t  *ent)
{
  int h, readd, n, reg_chunk, reg_ind;
  nasd_nl_reg_id_t reg_id, chunk_base;
  nasd_nl_reg_info_t *ri, *r, *next;
  nasd_nl_reg_chash_ent_t *ch;
  nasd_status_t rc;
#if NASD_NL_REG_SCOREBOARD_TIMERS > 0
  nasd_timespec_t ts;
  nasd_timer_t tm;
  nasd_uint64 cca;
#endif /* NASD_NL_REG_SCOREBOARD_TIMERS > 0 */

  if (nasd_nl_reg_scoreboard_active == 0)
    return;

  if ((ent->reg_flags&NASD_ODC_R_VALID) == 0)
    return;

#if NASD_NL_REG_SCOREBOARD_TIMERS > 0
  NASD_TM_START(&tm);
#endif /* NASD_NL_REG_SCOREBOARD_TIMERS > 0 */

  if (ent->reg_id == 0)
    goto done2;

  reg_id = ent->reg_id - 1;

  LOCK_SCOREBOARD();

#if NASD_NL_REG_SCOREBOARD_TIMERS > 0
  nasd_nl_reg_scoreboard_timers.access_calls++;
#endif /* NASD_NL_REG_SCOREBOARD_TIMERS > 0 */

  /*
   * Must recheck after blocking for scoreboard lock
   */
  if (nasd_nl_reg_scoreboard_active == 0)
    goto done;

  NASD_ASSERT(reg_id >= 0);
  NASD_ASSERT(reg_id < nasd_nl_reg_cnt);

  reg_chunk = reg_id/nasd_nl_reg_info_chunksize;
  reg_ind = reg_id%nasd_nl_reg_info_chunksize;
  NASD_ASSERT(reg_chunk >= 0);
  NASD_ASSERT(reg_chunk < NASD_NL_REG_INFOS_CHUNKS);
  NASD_ASSERT(reg_ind >= 0);
  NASD_ASSERT(reg_ind < nasd_nl_reg_info_chunksize);
  if (reg_chunk == (NASD_NL_REG_INFOS_CHUNKS-1)) {
    NASD_ASSERT(reg_ind < nasd_nl_reg_info_last_chunksize);
  }
  ri = &nasd_nl_reg_infos[reg_chunk][reg_ind];

  readd = 1;
  if (ri->t_next) {
    RI_DEQ(ri);
  }

  ri->total++;

  nasd_timeout_cancel(ri->tm_handle);
  ri->tm_handle = NASD_TIMEOUT_HANDLE_NULL;
  rc = nasd_timeout_add(&ri->tm_handle, nasd_nl_reg_ri_timeout,
    ri, NULL, nasd_nl_reg_scoreboard_interval,
    nasd_nl_reg_scoreboard_interval, 0);
  if (rc) {
    nasd_printf("DRIVE: region scoreboard got 0x%x (%s) adding timeout\n",
      rc, nasd_error_string(rc));
    /*
     * Since timeout will never remove us, just bear in
     * mind that we should not requeue later on.
     */
    readd = 0;
  }

  h = SCOREBOARD_HASH(reg_id);
  chunk_base = SCOREBOARD_CHUNK_BASE(reg_id);
  NASD_ASSERT(reg_id >= chunk_base);

  /*
   * Do hits on first nasd_nl_reg_scoreboard_thresh regions only.
   */
  for(n=0,r=nasd_nl_reg_current_queue.t_next;
    (r!=&nasd_nl_reg_current_queue)&&(n<nasd_nl_reg_scoreboard_thresh);
    r=r->t_next,n++)
  {
#if NASD_NL_REG_SCOREBOARD_TIMERS > 0
    nasd_nl_reg_scoreboard_timers.access_regs++;
#endif /* NASD_NL_REG_SCOREBOARD_TIMERS > 0 */
    r->accessed++;
    for(ch=r->chash[h];ch;ch=ch->c_next) {
      if (ch->baseid == chunk_base) {
        ch->cnt[reg_id-chunk_base]++;
        CH_DEQ(r,ch,h);
        break;
      }
    }
    if (ch == NULL) {
      CH_GET(ch);
      if (ch) {
        ch->baseid = chunk_base;
        ch->cnt[reg_id-chunk_base] = 1;
      }
    }
    if (ch) {
      CH_ENQ(r, ch, h);
    }
  }

  if (r && (n >= nasd_nl_reg_scoreboard_thresh)) {
    /*
     * "Expire" extra entries off queue
     */
    for(;r!=&nasd_nl_reg_current_queue;r=next) {
      next = r->t_next;
      NASD_ASSERT(r->tm_handle != NASD_TIMEOUT_HANDLE_NULL);
      RI_DEQ(r);
      nasd_timeout_cancel(r->tm_handle);
      r->tm_handle = NASD_TIMEOUT_HANDLE_NULL;
      if (r->accessed == 0)
        r->noaccess++;
      r->accessed = 0;
    }
  }

  NASD_ASSERT(ri->t_next == NULL);
  NASD_ASSERT(ri->t_prev == NULL);
  RI_ENQ(ri);

done:
  UNLOCK_SCOREBOARD();

done2:
#if NASD_NL_REG_SCOREBOARD_TIMERS > 0
  NASD_TM_STOP(&tm);
  NASD_TM_ELAPSED_TS(&tm, &ts);
  NASD_ATOMIC_TIMESPEC_ADD(&nasd_nl_reg_scoreboard_timers.access_time, &ts);
#endif /* NASD_NL_REG_SCOREBOARD_TIMERS > 0 */
  return;
}

#endif /* NASD_NL_REG_SCOREBOARD > 0 */

#if NASD_NL_REG_TIME_COBLKS_HELPER > 0
void
nasd_nl_reg_print_coblks_helper_time(
  void  *ignored)
{
  nasd_printf("nasd_nl_reg_get_coblks_helper() took %d:%09d\n",
    nasd_nl_reg_get_coblks_helper_time.ts_sec,
    nasd_nl_reg_get_coblks_helper_time.ts_nsec);
#ifdef TMT
#undef TMT
#endif
#define TMT(_tm_) { \
  nasd_printf("%50s: %d:%09d\n", NASD_STRING(_tm_), (_tm_).ts_sec, \
    (_tm_).ts_nsec); \
}
  TMT(nasd_nl_reg_get_coblks_helper_get_regent_time);
  TMT(nasd_nl_reg_get_coblks_helper_o1_time);
  TMT(nasd_nl_reg_get_coblks_helper_o2_time);
  TMT(nasd_nl_reg_get_coblks_helper_getblk_time);
  TMT(nasd_nl_reg_get_coblks_helper_bail1_time);
  TMT(nasd_nl_reg_get_coblks_helper_relblk_time);
  TMT(nasd_nl_reg_get_coblks_helper_dance_time);
}
#endif /* NASD_NL_REG_TIME_COBLKS_HELPER > 0 */

/*
 * Init function for no-cluster-fetching regioning.
 */
nasd_status_t
nasd_nl_reg_nocl_init(
  nasd_od_config_t *config)
{
  nasd_status_t rc;

  rc = nasd_nl_reg_init(config);
  if (rc)
    return(rc);

  nasd_odc_read_regions = 0;

  return(NASD_SUCCESS);
}

#endif /* NASD_DRIVE_LAYOUT_REGION_INCLUDE > 0 */

/* Local Variables:  */
/* indent-tabs-mode: nil */
/* tab-width: 2 */
/* End: */
