/* 
 * Mach Operating System
 * Copyright (c) 1989 Carnegie-Mellon University
 * All rights reserved.  The CMU software License Agreement specifies
 * the terms and conditions for use and redistribution.
 */
/*
 * HISTORY
 * $Log:	afs_cache.c,v $
 * Revision 2.17  90/03/28  14:26:02  dlb
 * 	More afs_GetDownD and afs_CheckSize fixes.  Use reserved value
 * 	(AFS_GETDOWND_SLOT_ONLY = -1) to tell afs_GetDownD that only
 * 	slots are needed instead of setting ablocks to zero.  Fix afs_CheckSize
 * 	to pass zero (requesting complete cache flush) instead of a negative
 * 	number if the cache size comes up negative.  Change printf at end of
 * 	afs_GetDownD to complain about incomplete flush instead of cache size
 * 	over limit if flush was requested.
 * 	[90/03/28            dlb]
 * 
 * Revision 2.16  90/02/09  12:33:02  berman
 * 	Complete rewrite of afs_GetDownD() and changes to afs_CheckSize
 * 	to implement new disk space allocation technique.  This lets
 * 	the afs cache grow whenever it is feasible.   from jsb
 * 	[90/02/09            berman]
 * 
 * Revision 2.15  89/10/19  13:13:43  af
 * 	If out of luck in get cache space just print a warning and return.
 * 	Presumably users will take action and enlarge the AFS cache.
 * 	[89/10/19  12:58:00  af]
 * 
 * Revision 2.14  89/09/05  20:39:39  jsb
 * 	Added a panic in afs_GetDCache to ensure that rx_EndCall doesn't
 * 	wipe out any error returned from CacheFetchProc.
 * 	Added a panic in afs_InitCacheInfo to catch osi_Write failure.
 * 	Added a panic in afs_WriteDCache to catch osi_Write failure,
 * 	since everyone ignores afs_WriteDCache's return value.
 * 	In general, a panic is better than an ignored error from osi_Write,
 * 	which can silently corrupt cache files.
 * 	[89/09/05  20:24:45  jsb]
 * 
 * Revision 2.13  89/08/24  19:35:29  jsb
 * 	Eliminated callback extension hack. Added use of afs_IsWired in
 * 	afs_GetDownD (so that no wired cache files get flushed) and in
 * 	afs_VerifyVCache and afs_GetVCache (so that callbacks and callback
 * 	expirations are ignored while a file is wired).
 * 	[89/08/24  17:02:04  jsb]
 * 
 * Revision 2.12  89/08/10  22:41:56  jsb
 * 	Added missing ReleaseWriteLock in code to extend callback.
 * 	[89/08/10  22:38:36  jsb]
 * 
 * Revision 2.11  89/08/09  09:47:43  jsb
 * 	Added support for resource pausing. Fixed bug in afs_GetDownV which
 * 	made it too stingy about allocating new vcache entries.
 * 	[89/08/09  09:32:07  jsb]
 * 
 * Revision 2.10  89/08/02  07:58:21  jsb
 * 	Replaced osi_Alloc with osi_Zalloc whereever possible. Allocate
 * 	vcaches on demand instead of preallocating a huge free list.
 * 	Print more informative message during cache scan.
 * 	Added hack so that if a callback expires while a server is
 * 	down, we extend the callback for a few seconds (this is
 * 	primarily to keep demand paged executables from dying).
 * 	[89/07/31  19:24:54  jsb]
 * 
 * Revision 2.9  89/06/12  14:50:10  jsb
 * 	Changed initialization of vm_info fields to use vm_info_init.
 * 	[89/06/12  10:28:55  jsb]
 * 
 * Revision 2.8  89/06/03  15:26:23  jsb
 * 	Merged with newer ITC sources. This includes a fix to not flush chunks
 * 	from the cache that belong to open files or running executables.
 * 	[89/05/26  19:22:29  jsb]
 * 
 * Revision 2.7  89/05/06  15:54:10  rpd
 * 	From jsb: Initialize vm_info when allocating a vcache in afs_NewVCache.
 * 	[89/05/06            rpd]
 * 
 * Revision 2.6  89/04/22  15:12:58  gm0w
 * 	Updated to RX version.
 * 	[89/04/14            gm0w]
 * 
 */
/*
 * P_R_P_Q_# (C) COPYRIGHT IBM CORPORATION 1987, 1988
 * LICENSED MATERIALS - PROPERTY OF IBM
 * REFER TO COPYRIGHT INSTRUCTIONS FORM NUMBER G120-2083
 */

#include <afs/param.h>
#include <sys/types.h>
#ifdef	AFS_AUX_ENV
#include <sys/mmu.h>
#include <sys/seg.h>
#include <sys/sysmacros.h>
#include <sys/signal.h>
#include <sys/errno.h>
#endif
#include <sys/param.h>
#if	!defined(AFS_IBM_ENV) || !defined(sys_rt_r3)
#include <sys/time.h>
#endif	AFS_IBM_ENV
#ifdef	AFS_AIX_ENV
#include <sys/errno.h>
#include <afs/aix_vfs.h>
#else
#include <sys/kernel.h>
#endif
#include <sys/socket.h>
#include <sys/socketvar.h>
#include <sys/protosw.h>
#include <sys/dir.h>
#include <sys/user.h>
#include <sys/file.h>
#include <sys/uio.h>
#ifdef	AFS_GFS_ENV
#include <sys/mount.h>
#include <afs/gfs_vfs.h>
#include <afs/gfs_vnode.h>
#else
#ifdef	AFS_MACH_ENV
#include <vfs/vfs.h>
#include <vfs/vnode.h>
#include <sys/inode.h>
#include <vm/vm_pager.h>
#include <kern/mfs.h>
#else	AFS_MACH_ENV
#include <sys/vfs.h>
#include <sys/vnode.h>
#include <ufs/inode.h>
#endif	AFS_MACH_ENV
#endif	AFS_GFS_ENV
#include <netinet/in.h>
#include <sys/mbuf.h>
#include <rpc/types.h>
#include <rpc/xdr.h>
#include <sys/stat.h>

#include <afs/osi.h>
#include <rx/rx.h>

#include <afs/lock.h>
#include <afs/volerrors.h>
#include <afsint/afsint.h>
#include <afs/afs.h>

extern char *afs_GetMariner();
extern struct vfs *afs_globalVFS;

/* convenient release macro for use when afs_PutDCache would cause deadlock on afs_xdcache lock */
#define	lockedPutDCache(ad) ((ad)->refCount--)

struct osi_dev cacheDev;		    /* cache device */ 
struct vfs *cacheVFS;			    /* cache VFS */
struct afs_lock afs_xvcache;		    /* lock: alloc new stat cache entries */
long afs_mariner = 0;
struct afs_lock afs_xdcache;		    /* lock: alloc new disk cache entries */
struct afs_lock afs_xvcb;
static long cacheCounter=0;		    /* number of disk cache entries */
static long cacheInfoModTime=0;
struct vcache *freeVCList = 0;		    /* free list for stat cache entries */
short freeDCList = NULLIDX;		    /* free list for disk cache entries */
long  freeDCCount = 0;			    /* count of elts in freeDCList */
struct dcache *freeDSList = 0, *Initial_freeDSList;    /* free list for disk slots */
long cacheInode;
long volumeInode;
struct afs_q VLRU, DLRU;
struct vcache *afs_FindVCache();

short afs_dvhashTable[DVHASHSIZE];	/* Data cache hash table*/
short afs_dchashTable[DCHASHSIZE];	/* Data cache hash table*/
struct dcache **afs_indexTable;		/* pointers to dcache entries */
unsigned long *afs_indexTimes;		/* Dcache entry Access times */
unsigned short *afs_indexLRU;		/* LRU sorting index, for GetDownD */
char *afs_indexFlags;			/* only one: is there data there? */
unsigned long afs_indexCounter=0;	/* fake time for marking index entries */
long afs_cacheFiles;			/* size of afs_indexTable */
long afs_cacheBlocks;			/* 1K blocks in cache */
long afs_origCacheBlocks;		/* from boot */
long afs_minCacheBlocks;		/* minimum cache size */
long afs_cacheStats;			/* stat entries in cache */
struct vcache *afs_vhashTable[VCSIZE];	/* stat cache hash table */
long afs_blocksUsed;			/* number of blocks in use */
long afs_reusedFiles = 0;		/* number of files reused */
long afs_vcount = 0;			/* number of vcache entries */
long afs_maxvcount = 0;			/* max num vcache entries */

/* Special value of blocks requested for afs_GetDownD if only slots needed */
/* WARNING: afs_GetDownD assumes any number >= this is an ok block req. */
#define	AFS_GETDOWND_SLOT_ONLY	-1

/* Initialization order is important.  Must first call afs_CacheInit.
  Next must call cache file and volume file initialization routines.
  Next must call individual cache entry initialization routines.
  
  In this routine, astatSize is the number of stat cache (vnode) entries to allocate.
  afiles is the number of disk files to allocate to the cache
  ablocks is the max number of 1024 byte units that all of the files in the cache may occupy
  
  This routine should only be called at initialization time, since it reclaims no resources
  and doesn't sufficiently synchronize with other processes.
*/

#define	DDSIZE	    100		/* max # of struct dcache's resident at any time */

afs_CacheInit(astatSize, afiles, ablocks)
    long afiles;
    long astatSize, ablocks; {
    register struct vcache *tvp;
    register struct dcache *tdp;
    register long i;

    if (afiles > 0xffff) {
	/* since afs_indexLRU is an array of unsigned shorts... */
	printf("afs: Cannot handle %d files; will use %d files instead\n",
	    afiles, 0xffff);
	afiles = 0xffff;
    }
    printf("afs: starting cache scan: %d files (%d blocks).\n",
	afiles, ablocks);
    Lock_Init(&afs_xvcache);
    Lock_Init(&afs_xdcache);
    Lock_Init(&afs_xvcb);

    /* initialize hash tables */
    for(i=0;i<DVHASHSIZE;i++) afs_dvhashTable[i] = NULLIDX;
    for(i=0;i<DCHASHSIZE;i++) afs_dchashTable[i] = NULLIDX;

    /* Allocate and thread the struct vcache entries */
    freeVCList = (struct vcache *) 0;
    if (!afs_maxvcount) afs_maxvcount = astatSize;
    /* Allocate and zero the pointer array to the dcache entries */
    afs_indexTable = (struct dcache **)
      osi_Alloc(sizeof(struct dcache *) * afiles);
    bzero(afs_indexTable, sizeof(struct dcache *) * afiles);
    afs_indexTimes = (unsigned long *) osi_Alloc(afiles * sizeof(long));
    bzero(afs_indexTimes, afiles * sizeof(long));
    afs_indexFlags = (char *) osi_Alloc(afiles * sizeof(char));
    bzero(afs_indexFlags, afiles * sizeof(char));
    afs_indexLRU = (unsigned short *) osi_Alloc(afiles * sizeof(short));
    for(i=0;i<afiles;i++) afs_indexLRU[i] = i;

    /* Allocate and thread the struct dcache entries themselves */
    tdp = Initial_freeDSList = (struct dcache *) osi_Alloc(DDSIZE * sizeof(struct dcache));
    bzero(tdp, DDSIZE * sizeof(struct dcache));
    freeDSList = &tdp[0];
    for(i=0; i < DDSIZE-1; i++)
	tdp[i].lruq.next = (struct afs_q *) (&tdp[i+1]);
    tdp[DDSIZE-1].lruq.next = (struct afs_q *) 0;
    afs_cacheFiles = afiles;
    afs_cacheStats = astatSize;
    afs_origCacheBlocks = afs_cacheBlocks = ablocks;
    afs_blocksUsed = 0;
    QInit(&VLRU);
    QInit(&DLRU);
    return 0;
}

/*
 *  This routine ensures that there is room for aextra KB more data in
 *  the cache. It will let the cache grow whenever the disk is less
 *  than 95% full (as reported by df). It will shrink the cache only
 *  when disk usage has exceeded 105%, or when disk usage has exceeded 95%
 *  and the cache is larger than afs_minCacheBlocks (the minimum cache size).
 *  Afs_minCacheBlocks is initialized to the number of blocks in the cache
 *  at startup, and is increased whenever the cache is allowed to grow,
 *  until is reaches afs_cacheBlocks (the cache size specified in
 *  /usr/vice/etc/cacheinfo, nee vstab).
 *
 *  These semantics, while complicated, achieve the following goals:
 *  1. The cache will never try to use more disk space than is available.
 *  2. While the cache is smaller than the cacheinfo limit, it will
 *     accumulate space, while leaving some breathing space for other
 *     users of the same partition. However, it will aggressively
 *     protect space it has accumulated, only giving up blocks when
 *     the super-user requires them (since only root can write to a
 *     disk that is more than 100% full).
 *  3. While the cache is larger than the cacheinfo limit, it will grow
 *     and shrink to keep the disk 95% full, making effective use of
 *     disk space that would otherwise be unused.
 *
 *  The numbers 95% and 105% are actually adjustable with the variables
 *  afs_minavail and afs_maxminavail. I have left 95 and 105 in the
 *  comments and variable names for their mnemonic value.
 *
 *  Afs_minavail is the percentage around 100% to use to calculate
 *  {blocks,avail}At{95,105}. The default is 5%.
 *
 *  Afs_maxminavail is the upper bound, in KB, on the distance from 100%
 *  calculated from afs_minavail. The default is 10 MB.
 */
int afs_minavail = 5;		/* for 95% and 105% */
int afs_maxminavail = 10240;	/* 10 Meg */

afs_CheckSize(aextra)
    register long aextra; {
    struct statfs fstat;
    int bavail, blocksAt95, blocksAt105, blocksToBeUsed, reqBlocks;
    static int availAt95 = 0, availAt105 = 0;

    /*
     *  Get stats for the filesystem that the cache lives on.
     */
    VFS_STATFS(cacheVFS, &fstat);
    /*
     *  If we have not done so before, calculate the values that bavail will
     *  be at when the disk is 95% and 105% full (as reported by df).
     *  Note that bavail is 0 at 100% and thus availAt105 will be negative.
     *
     *  While we're at it, set initial value for afs_minCacheBlocks.
     */
    if (availAt95 == 0) {
	availAt95 = afs_minavail * fstat.f_bsize * fstat.f_blocks / 100 / 1024;
	availAt95 = MIN(availAt95, afs_maxminavail);
	availAt105 = -availAt95; /* since bavail at 100% is 0 */
	afs_minCacheBlocks = afs_blocksUsed;
    }
    /*
     *  Calculate the available disk space, in 1K blocks.
     *  Note that this can be negative (despite comment in ufs_statfs).
     */
    bavail = (fstat.f_bavail * fstat.f_bsize) / 1024;
    /*
     *  Calculate the number of blocks that would be in the cache if the disk
     *  were 95% or 105% full, holding non-cache disk usage fixed.
     */
    blocksAt95  = afs_blocksUsed - (availAt95  - bavail);
    blocksAt105 = afs_blocksUsed - (availAt105 - bavail);
    /*
     *  Calculate the number of blocks that would be in the cache if
     *  aextra blocks were added. If this would result in less
     *  than 95% of the disk being used, then there is no need to
     *  shrink cache, so return.
     */
    blocksToBeUsed = afs_blocksUsed + aextra;
    if (blocksToBeUsed <= blocksAt95) {
	/*
	 *  Compute new minimum cache size, if necessary.
	 *  This is the only place where it is increased.
	 *  The MIN ensures that the minimum cache size never
	 *  exceeds afs_cacheBlocks (the cacheinfo limit).
	 */
	if (afs_minCacheBlocks < MIN(afs_cacheBlocks, blocksToBeUsed)) {
	    afs_minCacheBlocks = MIN(afs_cacheBlocks, blocksToBeUsed);
	}
	return;
    }
    /*
     *  If disk is getting too full, back off on minimum cache size.
     *  This is the only place where it is decreased.
     */
    if (afs_minCacheBlocks > blocksAt105) {
	afs_minCacheBlocks = blocksAt105;
    }
    /*
     *  If we will not exceed minimum cache size, then return
     *  without shrinking cache.
     */
    if (blocksToBeUsed <= afs_minCacheBlocks) {
	return;
    }
    /*
     *  At this point, if we do not shrink the cache, we will
     *  exceed the minimum cache size, and disk usage will exceed 95%.
     *  We must shrink cache by enough such that one of these conditions
     *  will not be true. This is achieved by shrinking cache to
     *      MAX(afs_minCacheBlocks, blocksAt95) - aextra
     *  since after adding aextra blocks to the cache, either
     *      afs_blocksUsed <= afs_minCacheBlocks
     *  or
     *      afs_blocksUsed <= blocksAt95
     *  will be true.
     */
    reqBlocks = MAX(afs_minCacheBlocks, blocksAt95) - aextra;
    if (reqBlocks <= 0) {
	printf("afs_CheckSize: flushing cache to make room for %d blocks!\n",
		aextra);
	printf("afs_CheckSize: cache size too small, or partition too full\n");
	reqBlocks = 0;
    }
    ObtainWriteLock(&afs_xdcache);
    afs_GetDownD(reqBlocks, 0);
    ReleaseWriteLock(&afs_xdcache);
}

/* make adjustment for the new size in the disk cache entry */
afs_AdjustSize(adc, anewSize)
    register struct dcache *adc;
    register long anewSize; {
    register long oldSize;

    oldSize = adc->f.chunkBytes;
    adc->f.chunkBytes = anewSize;
    adc->flags |= DFEntryMod;
    oldSize = ((oldSize-1)|1023)+1;	    /* round up both sizes */
    anewSize = ((anewSize-1)|1023)+1;
    afs_blocksUsed += ((anewSize - oldSize) >> 10);
}

/*
 *  Auxilary routines for afs_SortTimes (see below).
 */
#define	afs_qcmp(x, y)	((long)(afs_indexTimes[*(x)] - afs_indexTimes[*(y)]))

afs_qst(base, max)
    unsigned short *base, *max; {
    register unsigned short c, *i, *j, *jj;
    unsigned short *mid, *tmp;
    int lo, hi;

    lo = max - base;
    do {
	mid = i = base + (lo >> 1);
	if (lo >= 6) {
	    j = (afs_qcmp((jj = base), i) > 0 ? jj : i);
	    if (afs_qcmp(j, (tmp = max - 1)) > 0) {
		j = (j == jj ? i : jj);
		if (afs_qcmp(j, tmp) < 0) j = tmp;
	    }
	    if (j != i) { c = *i; *i++ = *j; *j++ = c; }
	}
	for (i = base, j = max - 1; ; ) {
	    while (i < mid && afs_qcmp(i, mid) <= 0) i++;
	    while (j > mid) {
		if (afs_qcmp(mid, j) <= 0) { j--; continue; }
		tmp = i + 1;
		if (i == mid) mid = jj = j; else jj = j--;
		goto swap;
	    }
	    if (i == mid) break;
	    jj = mid; tmp = mid = i; j--;
	swap:
	    c = *i; *i++ = *jj; *jj++ = c; i = tmp;
	}
	i = (j = mid) + 1;
	if ((lo = j - base) <= (hi = max - i)) {
	    if (lo >= 4) afs_qst(base, j);
	    base = i; lo = hi;
	} else {
	    if (hi >= 4) afs_qst(i, max);
	    max = j;
	}
    } while (lo >= 4);
}

/*
 *  This routine sorts afs_indexLRU. It is called by afs_GetDownD.
 *  It has been adapted from the 4.3BSD qsort library routine.
 */
afs_SortTimes() {
    register unsigned short c, *i, *j, *lo, *hi;
    unsigned short *min, *max;

    max = &afs_indexLRU[afs_cacheFiles];
    afs_qst(afs_indexLRU, max);
    hi = afs_indexLRU + 4;
    for (j = lo = afs_indexLRU; ++lo < hi; )
	if (afs_qcmp(j, lo) > 0) j = lo;
    if (j != afs_indexLRU) {
	for (i = afs_indexLRU, hi = afs_indexLRU + 1; i < hi; ) {
	    c = *j; *j++ = *i; *i++ = c;
	}
    }
    for (min = afs_indexLRU; (hi = ++min) < max; ) {
	while (afs_qcmp(--hi, min) > 0) ;
	if (++hi != min) {
	    for (lo = min + 1; --lo >= min; ) {
		c = *lo;
		for (i = j = lo; --j >= hi; i = j) *i = *j;
		*i = c;
	    }
	}
    }
}

/*
 *  This routine will ensure that the disk space used by the cache is no
 *  greater than ablocks, and that there are at least aslots free cache files.
 *
 *  This routine makes three passes over the list of cache files, each
 *  time looking at the entries from least to most recently used.
 *
 *  Pass 0: look for empty slots
 *  Pass 1: look for nonempty slots holding inactive files
 *  Pass 2: look for active files which are not wired
 *
 *  This routine must be called with afs_xdcache write-locked.
 */
afs_GetDownD(ablocks, aslots)
    int	ablocks;
    int aslots;
{
    struct dcache *tdc;
    struct vcache *tvc;
    int i, i0, pass;

    if (CheckLock(&afs_xdcache) != -1) {
	panic("afs_GetDownD: nolock");
    }
    if (ablocks < AFS_GETDOWND_SLOT_ONLY || aslots >= afs_cacheFiles) {
	printf("afs_GetDownD(blocks=%d, slots=%d)\n", ablocks, aslots);
	panic("afs_GetDownD: absurd request\n");
    }
    if (afs_debug & AFSDEB_GENERAL) {
	afs_dp("afs_GetDownD(blocks=%d, slots=%d)\n", ablocks, aslots);
    }

    afs_SortTimes();

    for (pass = 0; pass <= 2; pass++) {
	for (i0 = 0; i0 < afs_cacheFiles; i0++) {
	    i = afs_indexLRU[i0];
	    if (afs_indexFlags[i] & (IFDataMod | IFFree)) {
		continue; /* can't flush, or already free */
	    }
	    tdc = afs_indexTable[i];
	    if (tdc) {
		if (tdc->refCount > 0 || pass < 1) {
		    continue; /* can't flush, or not desperate enough yet */
		}
		tdc->refCount++;
	    } else {
		tdc = afs_GetDSlot(i, 0);
	    }
	    tvc = afs_FindVCache(&tdc->f.fid);
	    if (tvc) {
		if ((pass < 2 && osi_Active(tvc)) || afs_IsWired(tvc)) {
		    tdc->refCount--;
		    tvc->vrefCount--;
		    continue;
		}
		tvc->vrefCount--;
	    }
	    afs_FlushDCache(tdc);
	    tdc->refCount--;
	    if (pass == 2) {
		printf("afs: cache too small: flushing active file\n");
	    }
	    if (freeDCCount >= aslots) {
		if (ablocks == AFS_GETDOWND_SLOT_ONLY ||
		    afs_blocksUsed <= ablocks) {
			return;
		}
	    }
	}
    }
    if (freeDCCount < aslots) {
	panic("afs: not enough files in cache");
    }
    if (afs_blocksUsed > ablocks) {
	if (ablocks == 0)
	    printf("afs: incomplete cache flush, %d blocks remain\n",
		afs_blocksUsed);
	else
	    printf("afs: cache size exceeding limit by %d blocks\n",
		afs_blocksUsed - ablocks);
    }
}

/* this routine must be called with the afs_xdcache lock held (in write mode) */
afs_FlushDCache(adc)
    register struct dcache *adc; {
    register struct dcache *udc;
    register long i;
    register short us;
    struct osi_file *tfile;
    struct dcache tmpdc;

    /* we know this guy's in the LRUQ.  We'll move dude into DCQ below */
    if (afs_debug & AFSDEB_GENERAL) afs_dp("flushing %x (slot %d) bytes: %d\n", adc, adc->index, adc->f.chunkBytes);

    DZap(&adc->f.inode);
    /* if this guy is in the hash table, pull him out */
    if (adc->f.fid.Fid.Volume != 0) {
	/* remove entry from first hash chains */
	i = DCHash(&adc->f.fid, adc->f.chunk);
	us = afs_dchashTable[i];
	if (us == adc->index) {
	    /* first dude in the list */
	    afs_dchashTable[i] = adc->f.hcNextp;
	}
	else {
	    /* somewhere on the chain */
	    while (us != NULLIDX) {
		/*
		 * Supply a temporary dcache structure to use in looking up the
		 * next slot in case it's not already in memory- we're here because
		     * there's a shortage of them! */
		udc = afs_GetDSlot(us, &tmpdc);
		if (udc->f.hcNextp == adc->index) {
		    /* found item pointing at the one to delete */
		    udc->f.hcNextp = adc->f.hcNextp;
		    afs_WriteDCache(udc, 1);
		    lockedPutDCache(udc); /* fix refCount*/
		    break;
		}
		us = udc->f.hcNextp;
		lockedPutDCache(udc);
	    }
	    if (us == NULLIDX) panic("dcache hc");
	}

	/* remove entry from *other* hash chain */
	i = DVHash(&adc->f.fid);
	us = afs_dvhashTable[i];
	if (us == adc->index) {
	    /* first dude in the list */
	    afs_dvhashTable[i] = adc->f.hvNextp;
	}
	else {
	    /* somewhere on the chain */
	    while (us != NULLIDX) {
		/*
		 * Same as above: don't ask the slot lookup to grab an in-memory
		     * dcache structure - we can't spare one. */
		udc = afs_GetDSlot(us, &tmpdc);
		if (udc->f.hvNextp == adc->index) {
		    /* found item pointing at the one to delete */
		    udc->f.hvNextp = adc->f.hvNextp;
		    afs_WriteDCache(udc, 1);
		    lockedPutDCache(udc); /* fix refCount */
		    break;
		}
		us = udc->f.hvNextp;
		lockedPutDCache(udc);
	    }
	    if (us == NULLIDX) panic("dcache hv");
	}
    }

    /* format the entry to look like it has no associated file any more */
    adc->f.fid.Fid.Volume = 0;	/* invalid */
    
    /* free its space */
    tfile = osi_UFSOpen(&cacheDev, adc->f.inode);
    if (!tfile) panic("flushdcache truncate");
    osi_Truncate(tfile, 0);
    afs_AdjustSize(adc,	0);	/* fix up size */
    osi_Close(tfile);

    /* finally put the entry in the free list */
    adc->f.hvNextp = freeDCList;
    freeDCList = adc->index;
    freeDCCount++;
    afs_indexFlags[adc->index] |= IFFree;
    adc->flags |= DFEntryMod;
}

afs_GetDownDSlot(anumber)
    int anumber; {
    register struct afs_q *tq, *nq;
    register struct dcache *tdc;
    register long ix;

    if (CheckLock(&afs_xdcache) != -1) panic("getdowndslot nolock");
    /* decrement anumber first for all dudes in free list */
    for(tdc = freeDSList; tdc; tdc = (struct dcache *)tdc->lruq.next) anumber--;
    if (anumber	<= 0) return;	/* enough already free */

    for(tq = DLRU.prev; tq != &DLRU && anumber > 0; tq = nq) {
	tdc = (struct dcache *)	tq;	/* q is first elt in dcache entry */
	nq = QPrev(tq);	/* in case we remove it */
	if (tdc->refCount == 0) {
	    if ((ix=tdc->index) == NULLIDX) panic("getdowndslot");

	    /* pull the entry out of the lruq and put it on the free list */
	    QRemove(&tdc->lruq);

	    /* write-through if modified */
	    if (tdc->flags & DFEntryMod) {
		tdc->flags &= ~DFEntryMod;
		afs_WriteDCache(tdc, 1);
	    }

	    /* finally put the entry in the free list */
	    afs_indexTable[ix] = (struct dcache *) 0;
	    afs_indexTimes[ix] = afs_indexCounter++;
	    tdc->index = NULLIDX;
	    tdc->lruq.next = (struct afs_q *) freeDSList;
	    freeDSList = tdc;
	    anumber--;
	}
    }
}

/* This routine is responsible for moving at least one, (up to anumber) entries from the LRU queue
      to the free queue.  Anumber is just a hint, but this routine must (if possible) move at least
      one entry, or its caller will panic.
      
      This routine must be called with afs_xvcache write-locked.
*/
afs_GetDownV(anumber)
    int anumber; {
    register struct afs_q *tq;
    struct afs_q *uq;
    register struct vcache *tvc;

    if (CheckLock(&afs_xvcache) != -1) panic("getdownv lock");
    /* decrement anumber first for all dudes in free list */
    for(tvc = freeVCList; tvc; tvc = (struct vcache *) (tvc->lruq.next)) anumber--;
    if (anumber	<= 0) return;	/* enough already free */

    if (afs_vcount < afs_maxvcount) return; /* let caller zalloc new one */

    for(tq = VLRU.prev; tq != &VLRU && anumber > 0; tq = uq) {
	tvc = QTOV(tq);
	uq = QPrev(tq);
	if (tvc->vrefCount == 0) {
	    afs_FlushVCache(tvc);
	    anumber--;

	}
    }
    return;
}

/* this routine must be called with the afs_xvcache lock held for writing */
afs_FlushVCache(avc)
    register struct vcache *avc; {
    register long i;
    register struct vcache **uvc, *wvc;

    if (avc->vrefCount != 0) return EBUSY;
    /* pull the entry out of the lruq and put it on the free list */
    QRemove(&avc->lruq);
    /* remove entry from the hash chain */
    i = VCHash(&avc->fid);
    uvc = &afs_vhashTable[i];
    for(wvc = *uvc; wvc; uvc = &wvc->hnext, wvc = *uvc) {
	if (avc == wvc) {
	    *uvc = avc->hnext;
	    break;
	}
    }
    if (!wvc) panic("flushvcache");	/* not in correct hash bucket */
    if (avc->mvid) osi_Zfree(afs_VenusFid_zone, avc->mvid);
#ifdef	AFS_AIX_ENV
    /* Free the alloced gnode that was accompanying the vcache's vnode */
    aix_gnode_rele((struct vnode *)avc);
#endif
    if (avc->linkData) osi_Free(avc->linkData, strlen(avc->linkData)+1);
    /* put the entry in the free list and free the callback */
    avc->lruq.next = (struct afs_q *) freeVCList;
    freeVCList = avc;
    if ((avc->states & CRO) == 0 && avc->callback) {
	afs_QueueVCB(&avc->fid);
    }
    /* next keep track of which vnodes we've deleted for create's optimistic
	synchronization algorithm */
    if (avc->fid.Fid.Vnode & 1) afs_oddZaps++;
    else afs_evenZaps++;
    return 0;
}

afs_QueueVCB(afid)
    register struct VenusFid *afid; {
    register struct server *ts;
    register struct conn *tc;
    struct volume *tv;
    register long code;
    register struct ViceFid *tfid;
    int foundFlag;
    struct vrequest treq;
    struct AFSCallBack CallBacks[1];
    struct AFSCBFids FidArray;
    struct AFSCBs CBArray;

    tv = afs_GetVolume(afid, (struct vrequest *)0);
    if (!tv) return EINVAL;
    /* otherwise, serverHost[0] is the server of interest */
    ts = tv->serverHost[0];
    afs_PutVolume(tv);
    if (!ts) return EINVAL;

    /* now obtain vcb lock, purge queued delete callbacks if need be, and add
	this one, if it isn't already in the list */
    ObtainWriteLock(&afs_xvcb);
    FidArray.AFSCBFids_len = ts->vcbCount;
    FidArray.AFSCBFids_val = (struct AFSFid *) ts->vcbs;
    CBArray.AFSCBs_len = 1;
    CBArray.AFSCBs_val = CallBacks;
    CallBacks[0].CallBackType = EXCLUSIVE;
    if (ts->vcbCount >= VCBS) {
	afs_InitReq(&treq, &osi_cred);
	do {
	    tc = afs_ConnByHost(ts, AFS_FSPORT, ts->cell->cell, &treq, 0);
	    if (tc) code = RXAFS_GiveUpCallBacks(tc->id, &FidArray, &CBArray);
	    else code = -1;
	} while(afs_Analyze(tc, code, 0, &treq));
	/* code doesn't matter, server may have done the work */
	ts->vcbCount = 0;
    }
    foundFlag = 0;
    tfid = ts->vcbs;
    for(code = 0; code < ts->vcbCount; tfid++, code++) {
	if (afid->Fid.Volume == tfid->Volume && afid->Fid.Unique == tfid->Unique && afid->Fid.Vnode == tfid->Vnode) {
	    foundFlag = 1;
	    break;
	}
    }
    if (!foundFlag) ts->vcbs[ts->vcbCount++] = afid->Fid;
    ReleaseWriteLock(&afs_xvcb);
}

afs_RemoveVCB(afid)
    register struct VenusFid *afid; {
    register int i;
    struct volume *tv;
    register struct server *ts;
    register struct ViceFid *tfid;

    tv = afs_GetVolume(afid, 0);
    if (!tv) return EINVAL;
    /* otherwise, serverHost[0] is the server of interest */
    ts = tv->serverHost[0];
    afs_PutVolume(tv);
    if (!ts) return EINVAL;

    ObtainWriteLock(&afs_xvcb);
    tfid = ts->vcbs;
    for(i=0; i<ts->vcbCount; i++,tfid++) {
	if (afid->Fid.Volume == tfid->Volume && afid->Fid.Unique == tfid->Unique && afid->Fid.Vnode == tfid->Vnode) {
	    tfid->Volume = 0;
	    break;
	}
    }
    ReleaseWriteLock(&afs_xvcb);
}

/* This routine is responsible for allocating a new cache entry from the free list.  It formats
    the cache entry and inserts it into the appropriate hash tables.  It must be called with
    afs_xvcache write-locked so as to prevent several processes from trying to create
    a new cache entry simultaneously.
    
    The afid parameter is the file id of the file whose cache entry is being created.
*/
struct vcache *afs_NewVCache(afid, ahost)
    long ahost;	    /* host to create callback from */
    register struct VenusFid *afid; {
    register struct vcache *tvc;
    register long i;
#ifdef	AFS_AIX_ENV
    struct gnode *gnodepnt;
#endif
#ifdef	AFS_MACH_ENV
    struct vm_info * vm_info_ptr;
#endif

    /* pull out a free cache entry */
    if (!freeVCList) afs_GetDownV(5);
    if (!freeVCList) {
	/* none free, making one is better than a panic */
	afs_vcount++;	/* count in case we have a leak */
	tvc = (struct vcache *) osi_Zalloc(afs_vcache_zone);
#ifdef	AFS_MACH_ENV
	tvc->v.v_vm_info = VM_INFO_NULL;
	vm_info_init(tvc);
#endif
    }
    else {
	tvc = freeVCList;   /* take from free list */
	freeVCList = (struct vcache *) (tvc->lruq.next);
    }
#ifdef	AFS_MACH_ENV
    vm_info_ptr = tvc->v.v_vm_info;
#endif
    bzero(&tvc->v, sizeof(struct vnode));
#ifdef	AFS_MACH_ENV
    tvc->v.v_vm_info = vm_info_ptr;
    tvc->v.v_vm_info->pager = MEMORY_OBJECT_NULL;
#endif
    tvc->parentVnode = 0;
    tvc->mvid = (struct VenusFid *) 0;
    tvc->linkData = (char *) 0;
    tvc->cbExpires = 0;
    tvc->opens = 0;
    tvc->execsOrWriters = 0;
    tvc->flockCount = 0;
    tvc->anyAccess = 0;
    tvc->states = 0;
    tvc->fid = *afid;
    tvc->flushDV = AFS_MAXDV;	/* don't have to flush text/buffers until vnode read once */
    tvc->truncPos = AFS_NOTRUNC;	/* don't truncate until we need to */
    tvc->m.DataVersion = 0;	/* in case we copy it into flushDV */
    tvc->vrefCount = 1;	/* us */
    i = VCHash(afid);
    tvc->hnext = afs_vhashTable[i];
    afs_vhashTable[i] = tvc;
    QAdd(&VLRU, &tvc->lruq);				/* put in lruq */
    Lock_Init(&tvc->lock);
#ifdef	AFS_GATEWAY
    for(i=0;i<CPSIZE;i++) {
	tvc->randomUid[i] = -1;
	tvc->randomHostaddr[i] = -1;
    }
#else	AFS_GATEWAY
    for(i=0;i<CPSIZE;i++) tvc->randomUid[i] = -1;
#endif	AFS_GATEWAY
    tvc->callback = ahost;	/* to minimize chance that clear request is lost */
    tvc->chunkShift = 30;
    /* initialize vnode data, note vrefCount is v.v_count */
#ifdef	AFS_AIX_ENV
    /* Don't forget to free the gnode space */
    tvc->v.v_gnode = gnodepnt = (struct gnode *) osi_Alloc(sizeof(struct gnode));
    bzero((char *)gnodepnt, sizeof(struct gnode));
#endif    
#ifdef	AFS_MACH_ENV
    tvc->v.v_type = ITYPE_AFS;
#else
    tvc->v.v_op = afs_ops;
#endif
    if (afid->Fid.Vnode == 1 && afid->Fid.Unique == 1)
	tvc->mvstat = 2;
    else
	tvc->mvstat = 0;
    if (afs_globalVFS == (struct vfs *) 0) panic("afs globalvfs");
    vSetVfsp(tvc, afs_globalVFS);
    vSetType(tvc, VREG);
#ifdef	AFS_AIX_ENV
    tvc->v.v_vfsnext = afs_globalVFS->vfs_vnodes;   /* link off vfs */
    tvc->v.v_vfsprev = NULL;
    afs_globalVFS->vfs_vnodes = &tvc->v;
    if (tvc->v.v_vfsnext != NULL)
	tvc->v.v_vfsnext->v_vfsprev = &tvc->v;
    tvc->v.v_next = gnodepnt->gn_vnode;	    /* Single vnode per gnode for us! */
    gnodepnt->gn_vnode = &tvc->v;
#endif
#ifdef	AFS_GFS_ENV
    tvc->v.g_dev = ((struct mount *)afs_globalVFS->vfs_data)->m_dev;
#endif	AFS_GFS_ENN
    return tvc;
}

afs_KeepFlocksAlive() {
    register struct vcache *tvc;
    register int i;
    register struct conn *tc;
    register long code;
    struct vrequest treq;
    struct AFSVolSync tsync;

    afs_InitReq(&treq, &osi_cred);
    ObtainReadLock(&afs_xvcache);
    for(i=0;i<VCSIZE;i++) {
	for(tvc = afs_vhashTable[i]; tvc; tvc=tvc->hnext) {
	    /* if this entry has an flock, send a keep-alive call out */
	    if (tvc->flockCount != 0) {
		/* what we should probably do is bump the vrefCount count,
		    release the xvcache lock, lock the entry, and then make the
		    call, undoing all of this later, but perhaps that's overdoing
		    things a bit.  Nevertheless, this does hold the xvcache lock
		    for quite a while, which is almost certainly an error */
		tvc->vrefCount++;
		ReleaseReadLock(&afs_xvcache);
		ObtainWriteLock(&tvc->lock);
		do {
		    tc = afs_Conn(&tvc->fid, &treq);
		    if (tc) code = RXAFS_ExtendLock(tc->id, (struct AFSFid *) &tvc->fid.Fid, &tsync);
		    else code = -1;
		} while(afs_Analyze(tc, code, &tvc->fid, &treq));
		ReleaseWriteLock(&tvc->lock);
		ObtainReadLock(&afs_xvcache);
		tvc->vrefCount--;   /* our tvc ptr is still good until now */
	    }
	}
    }
    ReleaseReadLock(&afs_xvcache);
}

/* make sure a cache entry is up-to-date, status-wise */
int afs_VerifyVCache(avc, areq)
    register struct vrequest *areq;
    register struct vcache *avc; {
    register struct vcache *tvc;
    ObtainReadLock(&avc->lock);
    /* first convert an expired callback into a non-callback */
    if (avc->callback && avc->cbExpires <= osi_Time())
	avc->callback = 0;
    /* check if we're all done */
    if ((avc->states & CStatd) && ((avc->states & CRO) || avc->callback)) {
	ReleaseReadLock(&avc->lock);
	return 0;
    }
    if (afs_IsWired(avc)) {
	ReleaseReadLock(&avc->lock);
	return 0;
    }
    ReleaseReadLock(&avc->lock);
    /* otherwise we must fetch the status info */
    tvc = afs_GetVCache(&avc->fid, areq);
    if (!tvc) return ENOENT;
    afs_PutVCache(tvc);	    /* put back; caller has already incremented vrefCount */
    return 0;
}

/* This function takes a fid and a vrequest structure, and is responsible for storing
    the status information *only* back to the server.  This routine must be called
    with a read lock held on the item */
afs_WriteVCache(avc, astatus, areq)
    register struct vcache *avc;
    register struct AFSStoreStatus *astatus;
    struct vrequest *areq; {
    register long code;
    register struct conn *tc;
    struct AFSFetchStatus OutStatus;
    struct AFSVolSync tsync;

    do {
	tc = afs_Conn(&avc->fid, areq);
	if (tc) {
	    code = RXAFS_StoreStatus(tc->id, (struct AFSFid *) &avc->fid.Fid, astatus, &OutStatus, &tsync);
	}
	else code = -1;
    } while (afs_Analyze(tc, code, &avc->fid, areq));
    UpgradeSToWLock(&avc->lock);
    if (code == 0) {
	/* success, do the changes locally */
	afs_SimpleVStat(avc, &OutStatus);
	/* now update the date, too.  SimpleVStat didn't do this, since it thought
	    we were doing this after fetching new status over a file being written. */
	avc->m.Date = OutStatus.ClientModTime;
    }
    else {
	/* failure, set up to check with server next time */
	avc->states &= ~CStatd;	    /* turn off stat valid flag */
    }
    ConvertWToSLock(&avc->lock);
    return code;
}

/* This function takes a file id and a Venus request structure, and is responsible
  for fetching the status information associated with the file.
  
      The file is identified by afid.
  
      The user whose authentication tokens will be used is specified by areq.
  
      The cache entry is returned with an increased vrefCount field.  The entry must be
      discarded by calling afs_PutVCache when you are through using the pointer to the
      cache entry.

      You should not hold any locks when calling this function, except locks on other vcache entries.
      If you lock more than one vcache entry simultaneously, you should lock them in this order:

      1.  Lock all files first, then directories.
  
      2.  Within a particular type, lock entries in Fid.Vnode order.
  
      This locking hierarchy is convenient because it allows locking of a parent dir cache entry, given a
      file (to check its access control list).  It also allows renames to be handled easily by locking
      directories in a constant order.

*/
struct vcache *afs_GetVCache(afid, areq)
    register struct VenusFid *afid;
    struct vrequest *areq; {
    register long code, i;
    register struct conn *tc;
    register struct vcache *tvc;
    struct AFSFetchStatus OutStatus;
    struct AFSCallBack CallBack;
    struct AFSVolSync tsync;
    struct volume *tvp;

    ObtainSharedLock(&afs_xvcache);
    i = VCHash(afid);
    for(tvc = afs_vhashTable[i]; tvc; tvc = tvc->hnext) {
	if (tvc->fid.Fid.Unique == afid->Fid.Unique &&
	    tvc->fid.Fid.Volume == afid->Fid.Volume &&
	    tvc->fid.Cell == afid->Cell && tvc->fid.Fid.Vnode == afid->Fid.Vnode) {
	    break;
	}
    }
    if (!tvc) {
	/* no cache entry, better grab one */
	UpgradeSToWLock(&afs_xvcache);
	tvc = afs_NewVCache(afid, 0);
	ConvertWToSLock(&afs_xvcache);
    }
    else {
	tvc->vrefCount++;
	QRemove(&tvc->lruq);		/* move to lruq head */
	QAdd(&VLRU, &tvc->lruq);
    }
    ReleaseSharedLock(&afs_xvcache);
    ObtainReadLock(&tvc->lock);
    /* first convert an expired callback into a non-callback */
    if (tvc->callback && tvc->cbExpires <= osi_Time())
	tvc->callback = 0;
    /* check if we're all done */
    if ((tvc->states & CStatd) && ((tvc->states & CRO) || tvc->callback)) {
	ReleaseReadLock(&tvc->lock);
	return tvc;
    }
    if (afs_IsWired(tvc)) {
	ReleaseReadLock(&tvc->lock);
	return tvc;
    }
    ReleaseReadLock(&tvc->lock);
    ObtainWriteLock(&tvc->lock);
#ifdef	AFS_GATEWAY
    for(i=0;i<CPSIZE;i++) {
	tvc->randomUid[i] = -1;
	tvc->randomHostaddr[i] = -1;
    }
#else	AFS_GATEWAY
    for(i=0;i<CPSIZE;i++) tvc->randomUid[i] = -1;
#endif	AFS_GATEWAY
    tvp	= afs_GetVolume(afid, areq);	    /* copy useful per-volume info */
    if (tvp) {
	if (tvp->states & VRO) tvc->states |= CRO;
	/* now copy ".." entry back out of volume structure, if necessary */
	if (tvc->mvstat == 2  && tvp->dotdot.Fid.Volume != 0) {
	    if (!tvc->mvid) tvc->mvid = (struct VenusFid *) osi_Zalloc(afs_VenusFid_zone);
	    *tvc->mvid = tvp->dotdot;
	}
	afs_PutVolume(tvp);
    }
    /* stat the file */
    afs_RemoveVCB(afid);
    do {
	tc = afs_Conn(afid, areq);
	if (tc) {
	    tvc->callback = tc->server->host;
	    i = osi_Time();
	    code = RXAFS_FetchStatus(tc->id, (struct AFSFid *) &afid->Fid, &OutStatus, &CallBack, &tsync);
	    if (code == 0 && CallBack.ExpirationTime != 0) {
		tvc->cbExpires = CallBack.ExpirationTime+i;
	    }
	    else tvc->callback = 0;
	}
	else code = -1;
    } while (afs_Analyze(tc, code, afid, areq));
    if (code) {
	tvc->states &= ~CStatd;
	ReleaseWriteLock(&tvc->lock);
	ObtainReadLock(&afs_xvcache);
	tvc->vrefCount--;
	ReleaseReadLock(&afs_xvcache);
	return (struct vcache *) 0;
    }
    tvc->states |= CStatd;
    afs_ProcessFS(tvc, &OutStatus);
    ReleaseWriteLock(&tvc->lock);
    return tvc;
}

/*
  This function is called to decrement the reference count on a cache entry.
*/

afs_PutVCache(avc)
    register struct vcache *avc; {
    ObtainReadLock(&afs_xvcache);	/* can we use read lock here? */
    avc->vrefCount--;
    ReleaseReadLock(&afs_xvcache);
}

/* find a vcache entry.  Must be called with the afs_xvcache lock at least
    held at read level.
*/
struct vcache *afs_FindVCache(afid)
    register struct VenusFid *afid; {
    register struct vcache *tvc;
    register long i;

    i = VCHash(afid);
    for(tvc = afs_vhashTable[i]; tvc; tvc = tvc->hnext) {
	if (tvc->fid.Fid.Unique == afid->Fid.Unique &&
	    tvc->fid.Fid.Volume == afid->Fid.Volume &&
	    tvc->fid.Cell == afid->Cell && tvc->fid.Fid.Vnode == afid->Fid.Vnode) {
	    break;
	}
    }
    if (tvc) {
	tvc->vrefCount++;
	QRemove(&tvc->lruq);
	QAdd(&VLRU, &tvc->lruq);
    }
    return tvc;
}

/* This function is called to decrement the reference count on a disk cache entry */
afs_PutDCache(ad)
    register struct dcache *ad; {
    ObtainWriteLock(&afs_xdcache);
    if (ad->refCount <= 0) panic("putdcache");
    --ad->refCount;
    ReleaseWriteLock(&afs_xdcache);
    return 0;
}

/* try to discard all data associated with this file from the cache */
afs_TryToSmush(avc)
    register struct vcache *avc; {
    register struct dcache *tdc;
    register int index;
    register int i;

    i =	DVHash(&avc->fid);  /* hash chain containing all dce's for this fid */
    ObtainWriteLock(&afs_xdcache);
    for(index = afs_dvhashTable[i]; index != NULLIDX; index=i) {
	tdc = afs_GetDSlot(index, (struct dcache *)0);
	i = tdc->f.hvNextp;	/* next pointer this hash table */
	lockedPutDCache(tdc);
	if (!FidCmp(&tdc->f.fid, &avc->fid)) {
	    if ((afs_indexFlags[index] & IFDataMod) == 0 && tdc->refCount == 0) {
		afs_FlushDCache(tdc);
		if (afs_debug & AFSDEB_GENERAL) afs_dp("Smushed v %x, d %x\n", avc, tdc);
	    }
	}
    }
    ReleaseWriteLock(&afs_xdcache);
}

struct dcache *afs_FindDCache(avc, abyte)
    register struct vcache *avc;    /*Held*/
    long abyte; {
    long chunk;
    register long i, index;
    register struct dcache *tdc;

    chunk = (abyte >> avc->chunkShift);
    /*
         * Hash on the [fid, chunk] and get the corresponding dcache index after
         * write-locking the dcache.
         */
    i = DCHash(&avc->fid, chunk);
    ObtainWriteLock(&afs_xdcache);
    for(index = afs_dchashTable[i]; index != NULLIDX;) {
	tdc = afs_GetDSlot(index, (struct dcache *)0);
	if (!FidCmp(&tdc->f.fid, &avc->fid) && chunk == tdc->f.chunk) {
	    break;  /* leaving refCount high for caller */
	}
	index = tdc->f.hcNextp;
	lockedPutDCache(tdc);
    }
    ReleaseWriteLock(&afs_xdcache);
    if (index != NULLIDX) {
	afs_indexTimes[tdc->index] = afs_indexCounter++;
	return tdc;
    }
    else return (struct dcache *) 0;
}

/* routine called on store */
static int CacheStoreProc(acall, afile, alen, avc)
register struct rx_call *acall;
struct osi_file *afile;
register long alen;
struct vcache *avc;
{
    register long code;
    register char *tbuffer;
    register int tlen;
    int shouldWake;

    /* for now, only do 'continue from close' code if file fits in one
	chunk.  Could clearly do better: if only one modified chunk
	then can still do this.  can do this on *last* modified chunk */
    tlen = avc->m.Length-1; /* byte position of last byte we'll store */
    if (AFS_CHUNK(tlen) != 0) shouldWake = 0;
    else shouldWake = 1;

    tbuffer = osi_AllocSendSpace();
    while (alen > 0) {
	tlen = (alen > osi_PACKETSIZE? osi_PACKETSIZE : alen);
	code = osi_Read(afile, tbuffer, tlen);
	if (code != tlen) {
	    osi_FreeSendSpace(tbuffer);
	    return EIO;
	}
	code = rx_Write(acall, tbuffer, tlen);
	if (code != tlen) {
	    osi_FreeSendSpace(tbuffer);
	    return -33;
	}
	alen -= tlen;
	/* if file has been locked on server, can allow store to continue */
	if (shouldWake && (rx_GetRemoteStatus(acall) & 1)) {
	    shouldWake = 0;
	    afs_wakeup(avc);
	}
    }
    osi_FreeSendSpace(tbuffer);
    return 0;
}

/* routine called on fetch; also tells people waiting for data that more has arrived */
static int CacheFetchProc(acall, afile, abase, adc)
register struct rx_call *acall;
long abase;
struct dcache *adc;
struct osi_file *afile;
{
    long length;
    register long code;
    register char *tbuffer;
    register int tlen;

    code = rx_Read(acall, &length, sizeof(long));
    length = ntohl(length);
    if (code != sizeof(long)) return -1;
    tbuffer = osi_AllocSendSpace();
    while (length > 0) {
	tlen = (length > osi_PACKETSIZE? osi_PACKETSIZE : length);
	code = rx_Read(acall, tbuffer, tlen);
	if (code != tlen) {
	    osi_FreeSendSpace(tbuffer);
	    return -34;
	}
	code = osi_Write(afile, tbuffer, tlen);
	if (code != tlen) {
	    osi_FreeSendSpace(tbuffer);
	    return EIO;
	}
	abase += tlen;
	length -= tlen;
	adc->validPos = abase;
	if (adc->flags & DFWaiting) {
	    adc->flags &= ~DFWaiting;
	    osi_Wakeup(&adc->validPos);
	}
    }
    osi_FreeSendSpace(tbuffer);
    return 0;
}

/* This function is called to obtain a reference to data stored in the disk
  cache.  Passed in are an unlocked vcache entry, the byte position in the
  file desired and a Venus request structure identifying the requesting user.
  
  This function is responsible for locating a chunk of data containing the desired
  byte and returning a reference to the disk cache entry, with its reference
  count incremented.  In addition, *aoffset is set to the offset within
  the chunk where the request byte resides, and *alen is set to the
  number of bytes of data after the desired byte (including the desired byte)
  which can be read from this chunk.
  
  Flags are 1->set locks, 2->return after creating entry.

*/
struct dcache *afs_GetDCache(avc, abyte, areq, aoffset, alen, aflags)
    register struct vcache *avc;    /*Held*/
    long abyte;
    int	aflags;			    /* should we set locks? */
    long *aoffset, *alen;	    /*Return values*/
    register struct vrequest *areq; {

    register long i, code, savecode;
    int setLocks;
    long index;
    long chunk;
    long maxGoodLength;	/* amount of good data at server */
    struct rx_call *tcall;
    long Position = 0; /* Not used yet */
    long size;		/* size of segment to transfer */
    struct AFSVolSync tsync;
    struct AFSFetchStatus OutStatus;
    struct AFSCallBack CallBack;
    register struct dcache *tdc;
    struct osi_file *file;
    register struct conn *tc;

    /*
         * Determine the chunk number and offset within the chunk corresponding to the
         * desired byte.
         */
    if (vType(avc) == VDIR) {
	chunk = 0;
    }
    else {
	chunk = AFS_CHUNK(abyte);
    }

    /*
         * Hash on the [fid, chunk] and get the corresponding dcache index after
         * write-locking the dcache.
         */
    i = DCHash(&avc->fid, chunk);
    setLocks = aflags & 1;
    ObtainWriteLock(&afs_xdcache);
    for(index = afs_dchashTable[i]; index != NULLIDX;) {
	tdc = afs_GetDSlot(index, (struct dcache *)0);
	if (!FidCmp(&tdc->f.fid, &avc->fid) && chunk == tdc->f.chunk) {
	    ReleaseWriteLock(&afs_xdcache);
	    break;  /* leaving refCount high for caller */
	}
	index = tdc->f.hcNextp;
	lockedPutDCache(tdc);
    }

    /*
     * If we didn't find the entry, we'll create one.
         */
    if (index == NULLIDX) {
	if (afs_debug & AFSDEB_GENERAL) afs_dp("getdcache failed to find %x.%d\n", avc, chunk);
	if (freeDCList == NULLIDX)
	    afs_GetDownD(AFS_GETDOWND_SLOT_ONLY, 1);	/* just need a slot */
	if (freeDCList == NULLIDX) panic("getdcache");
	afs_indexFlags[freeDCList] &= ~IFFree;
	tdc = afs_GetDSlot(freeDCList, 0);
	freeDCList = tdc->f.hvNextp;
	freeDCCount--;

	/*
	  * Fill in the newly-allocated dcache record.
	  */
	tdc->f.fid = avc->fid;
	tdc->f.versionNo = -1;	    /* invalid value */
	tdc->f.chunk = chunk;
	if (tdc->lruq.prev == &tdc->lruq) panic("lruq 1");
	/* now add to the two hash chains */
	tdc->f.hcNextp = afs_dchashTable[i];	/* i still set from above DCHash call */
	afs_dchashTable[i] = tdc->index;
	i = DVHash(&avc->fid);
	tdc->f.hvNextp = afs_dvhashTable[i];
	afs_dvhashTable[i] = tdc->index;
	tdc->flags = DFEntryMod;
	tdc->f.states = 0;
	ReleaseWriteLock(&afs_xdcache);
    }

    /* Here we have the unlocked entry in tdc, with its refCount incremented. */

    /* don't use S-lock, it costs concurrency when storing a file back to the server */
    if (setLocks) ObtainReadLock(&avc->lock);
    /* here we check for 0 length fetch */
    maxGoodLength = avc->m.Length;
    if (avc->truncPos < maxGoodLength) maxGoodLength = avc->truncPos;
    if (AFS_CHUNKTOBASE(chunk) >= maxGoodLength && avc->m.DataVersion != tdc->f.versionNo) {
	/* no data in file to read at this position */
	if (setLocks) {
	    ReleaseReadLock(&avc->lock);
	    ObtainWriteLock(&avc->lock);
	}
	/* check again, now that we have a write lock */
	maxGoodLength = avc->m.Length;
	if (avc->truncPos < maxGoodLength) maxGoodLength = avc->truncPos;
	if (AFS_CHUNKTOBASE(chunk) >= maxGoodLength && avc->m.DataVersion != tdc->f.versionNo) {
	    file = osi_UFSOpen(&cacheDev, tdc->f.inode);
	    if (!file) panic("getdcache open0");
	    osi_Truncate(file, 0);
	    afs_AdjustSize(tdc, 0);
	    osi_Close(file);
	    tdc->f.versionNo = avc->m.DataVersion;
	    tdc->flags |= DFEntryMod;
	    if (setLocks) ReleaseWriteLock(&avc->lock);
	    if (afs_debug & AFSDEB_GENERAL) afs_dp("faking 0 byte entry\n");
	    if (setLocks) {
		ReleaseWriteLock(&avc->lock);
		ObtainReadLock(&avc->lock);
	    }
	}
    }
    if (setLocks) ReleaseReadLock(&avc->lock);

    /*
         * We must read in the whole chunk iff the version number doesn't match.
         */

    if (aflags & 2) {
	/* don't need data, just a unique dcache entry */
	afs_indexTimes[tdc->index] = afs_indexCounter++;
	return tdc;	/* check if we're done */
    }

    if (setLocks) ObtainReadLock(&avc->lock);
    if (avc->m.DataVersion != tdc->f.versionNo) {
	if (setLocks) {
	    ReleaseReadLock(&avc->lock);
	    ObtainWriteLock(&avc->lock);
	}

	/* if data ever existed for this vnode, and this is a text object, do some clearing.  Now, you'd think you need only do the flush when VTEXT is on, but VTEXT is turned off when the text object is freed, while pages are left lying around in memory marked with this vnode.  If we would reactivate (create a new text object from) this vnode, we could easily stumble upon some of these old pages in pagein.  So, we always flush these guys.  Sun has a wonderful lack of useful invariants in this system. */
	if (avc->flushDV < avc->m.DataVersion) {
	    if (setLocks) ReleaseWriteLock(&avc->lock);
	    osi_FlushText(avc);
	    if (setLocks) ObtainWriteLock(&avc->lock);
	}

	/* watch for standard race condition */
	if (avc->m.DataVersion == tdc->f.versionNo) {
	    if (setLocks) ReleaseWriteLock(&avc->lock);
	    goto done;
	}
	/* recompute again, due to relocking */
	maxGoodLength = avc->m.Length;
	if (avc->truncPos < maxGoodLength) maxGoodLength = avc->truncPos;
	Position = AFS_CHUNKBASE(abyte);
	if (vType(avc) == VDIR) {
	    size = avc->m.Length;
	    if (size > tdc->f.chunkBytes) {
		/* pre-reserve space for file */
		afs_AdjustSize(tdc, size);
	    }
	    size = 999999999;	    /* max size for transfer */
	}
	else {
	    size = AFS_CHUNKSIZE(abyte);	/* expected max size */
	    /* don't read past end of good data on server */
	    if (Position + size > maxGoodLength)
		size = maxGoodLength - Position;
	    if (size < 0) size = 0; /* Handle random races */
	    if (size > tdc->f.chunkBytes) {
		/* pre-reserve space for file */
		afs_AdjustSize(tdc, size);	/* changes chunkBytes */
		/* max size for transfer still in size */
	    }
	}
	if (afs_mariner && !tdc->f.chunk) afs_MarinerLog("fetch$Fetching", avc);
	/* right now, we only have one tool, and it's a hammer.  So, we fetch the whole file. */
	DZap(&tdc->f.inode);	/* pages in cache may be old */
	file = osi_UFSOpen(&cacheDev, tdc->f.inode);
	if (!file) panic("getdcache open");
	afs_RemoveVCB(&avc->fid);
	tdc->f.states |= DWriting;
	tdc->flags |= DFFetching;
	tdc->validPos = Position;	    /* last valid position in this chunk */
	if (tdc->flags & DFFetchReq) {
	    tdc->flags &= ~DFFetchReq;
	    osi_Wakeup(&tdc->validPos);
	}
	do {
	    osi_Seek(file, 0);	/* on retries, we need this */
	    tc = afs_Conn(&avc->fid, areq);
	    if (tc) {
		avc->callback = tc->server->host;
		ConvertWToSLock(&avc->lock);
		i = osi_Time();
		tcall = rx_NewCall(tc->id);
		code = StartRXAFS_FetchData(tcall, (struct AFSFid *) &avc->fid.Fid, Position, size);
		if (code == 0) {
		    code = CacheFetchProc(tcall, file, Position, tdc);
		}
		if (code == 0) code = EndRXAFS_FetchData(tcall, &OutStatus, &CallBack, &tsync);
		savecode = code;
		code = rx_EndCall(tcall, code);
		if (savecode && !code) panic("lost error in afs_GetDCache\n");
		UpgradeSToWLock(&avc->lock);
	    }
	    else {
		code = -1;
	    }
	    if (code == 0) {
		if (CallBack.ExpirationTime == 0) avc->callback = 0;
		else avc->cbExpires = CallBack.ExpirationTime+i;
		/* validPos is updated by CacheFetchProc, and can only be modifed under an S or W lock, which we've blocked out */
		size = tdc->validPos - Position;	/* actual segment size */
		if (size < 0) size = 0;
		osi_Truncate(file, size);   /* prune it */
	    }
	    else avc->callback = 0;
	} while(afs_Analyze(tc, code, &avc->fid, areq));
	tdc->flags &= ~DFFetching;
	if (tdc->flags & DFWaiting) {
	    tdc->flags &= ~DFWaiting;
	    osi_Wakeup(&tdc->validPos);
	}
	if (avc->execsOrWriters == 0) tdc->f.states &= ~DWriting;

	/* now, if code != 0, we have an error and should punt */
	if (code) {
	    osi_Truncate(file, 0);	/* discard old data */
	    afs_AdjustSize(tdc, 0);
	    osi_Close(file);
	    tdc->f.versionNo = -1;	/* invalid value */
	    tdc->refCount--;
	    tdc->flags |= DFEntryMod;	/* we've modified the entry! */
	    avc->states &= ~CStatd;
	    if (setLocks) ReleaseWriteLock(&avc->lock);
	    return (struct dcache *) 0;
	}

	/* otherwise we copy in the just-fetched info */
	osi_Close(file);
	afs_AdjustSize(tdc, size);  /* new size */
	afs_ProcessFS(avc, &OutStatus);	/* copy appropriate fields into vcache */
	tdc->f.versionNo = OutStatus.DataVersion;
	tdc->flags |= DFEntryMod;
	if (setLocks) ReleaseWriteLock(&avc->lock);
    }
    else {
	if (setLocks) ReleaseReadLock(&avc->lock);
    }

done:
    /* fixup lru info */
    afs_indexTimes[tdc->index] = afs_indexCounter++;

    /* return the data */
    if (vType(avc) == VDIR)
	*aoffset = abyte;
    else
	*aoffset = AFS_CHUNKOFFSET(abyte);
    *alen = (tdc->f.chunkBytes - *aoffset);
    return tdc;
}

/* simple copy of stat info into cache. */
afs_SimpleVStat(avc, astat)
    register struct vcache *avc;
    register struct AFSFetchStatus *astat; {
    if (avc->execsOrWriters <= 0) {
	/* if writing the file, don't fetch over this value */
	avc->m.Length = astat->Length;
	avc->m.Date = astat->ClientModTime;
    }
    avc->m.Owner = astat->Owner;
    avc->m.Group = astat->Group;
    avc->m.Mode = astat->UnixModeBits;
    if (vType(avc) == VREG) {
	avc->m.Mode |= S_IFREG;
    }
    else if (vType(avc) == VDIR) {
	avc->m.Mode |= S_IFDIR;
    }
    else if (vType(avc) == VLNK) {
	avc->m.Mode |= S_IFLNK;
	if ((avc->m.Mode & 0111) == 0) avc->mvstat = 1;
    }
}

/* copy astat block into vcache info; must be called under a write lock */
afs_ProcessFS(avc, astat)
    register struct vcache *avc;
    register struct AFSFetchStatus *astat; {
    if (avc->execsOrWriters <= 0) {
	/* if writing the file, don't fetch over these values */
	avc->m.Length = astat->Length;
	avc->m.Date = astat->ClientModTime;
    }
    avc->m.DataVersion = astat->DataVersion;
    avc->m.Owner = astat->Owner;
    avc->m.Mode = astat->UnixModeBits;
    avc->m.Group = astat->Group;
    avc->m.LinkCount = astat->LinkCount;
    if (astat->FileType == File) {
	vSetType(avc, VREG);
	avc->m.Mode |= S_IFREG;
    }
    else if (astat->FileType == Directory) {
	vSetType(avc, VDIR);
	avc->m.Mode |= S_IFDIR;
    }
    else if (astat->FileType == SymbolicLink) {
	vSetType(avc, VLNK);
	avc->m.Mode |= S_IFLNK;
	if ((avc->m.Mode & 0111) == 0) avc->mvstat = 1;
    }
    avc->anyAccess = astat->AnonymousAccess;
}

/* This function is called only during initialization, and is passed one parameter,
  a file name.  This file is declared to be the volume info storage file for
  the Andrew file system.  It must be already truncated to 0 length.
  
  Warning: data will be written to this file over time by the Andrew file system.

*/
afs_InitVolumeInfo (afile)
    register char *afile; {
    register long code;
    register struct osi_file *tfile;
    struct vnode *filevp;

    code = gop_lookupname(afile, AFS_UIOSYS, 0, (struct vnode *) 0, &filevp);
    if (code) return ENOENT;
    volumeInode = VTOI(filevp)->i_number;
    VN_RELE(filevp);
    tfile = osi_UFSOpen(&cacheDev, volumeInode);
    osi_Truncate(tfile, 0);
    osi_Close(tfile);
    return 0;
}

/* This function is called only during initialization, and is passed one parameter,
  a file name.  This file is assumed to be the cache info file for venus, and
  will be used as such.  This file should *not* be truncated to 0 length; its
  contents describe what data is really in the cache.
  
  Warning: data will be written to this file over time by the Andrew file system.

*/
afs_InitCacheInfo (afile)
    register char *afile; {
    register long code;
    struct osi_stat tstat;
    register struct osi_file *tfile;
    struct afs_fheader theader;
    struct vnode *filevp;
    int goodFile;

    code = gop_lookupname(afile, AFS_UIOSYS, 0, (struct vnode *) 0, &filevp);
    if (code) return ENOENT;
    cacheInode = VTOI(filevp)->i_number;
    cacheDev.dev = VTOI(filevp)->i_dev;
    cacheVFS = filevp->v_vfsp;
    dirp_SetCacheDev(&cacheDev);	/* tell dir package where cache is */
    VN_RELE(filevp);
    tfile = osi_UFSOpen(&cacheDev, cacheInode);
    if (!tfile) panic("initcacheinfo");
    osi_Stat(tfile, &tstat);
    cacheInfoModTime = tstat.mtime;
    code = osi_Read(tfile, &theader, sizeof(theader));
    goodFile = 0;
    if (code == sizeof(theader)) {
	/* read the header correctly */
	if (theader.magic == AFS_FHMAGIC && theader.firstCSize == AFS_FIRSTCSIZE && theader.otherCSize == AFS_OTHERCSIZE)
	    goodFile = 1;
    }
    if (!goodFile) {
	/* write out a good file label */
	theader.magic = AFS_FHMAGIC;
	theader.firstCSize = AFS_FIRSTCSIZE;
	theader.otherCSize = AFS_OTHERCSIZE;
	osi_Seek(tfile, 0);
	code = osi_Write(tfile, &theader, sizeof(theader));
	if (code != sizeof(theader)) panic("write initcacheinfo");
	/* now truncate the rest of the file, since it may be arbitrarily wrong */
	osi_Truncate(tfile, sizeof(struct afs_fheader));
    }
    osi_Close(tfile);
    return 0;
}

/* This function is called only during initialization.  It is passed one parameter:
  a file name of a file in the cache.
  
  The file specified will be written to be the Andrew file system.
*/
int afs_InitCacheFile(afile, ainode)
    long ainode;
    char *afile; {
    register long code;
    struct vnode *filevp;
    long index;
    int fileIsBad;
    struct osi_file *tfile;
    struct osi_stat tstat;
    register struct dcache *tdc;

    index = cacheCounter;
    if (index >= afs_cacheFiles) return EINVAL;

    ObtainWriteLock(&afs_xdcache);
    tdc = afs_GetDSlot(index, (struct dcache *)0);
    ReleaseWriteLock(&afs_xdcache);
    /* note, leaves newly-formatted cache entry in LRUQ */
    if (!tdc) {
/*** How is it possible to release filepvp when nothing is assigned to it? It's a mute point since we're panicing but still...  
	VN_RELE(filevp);
*****/
	panic("initcachefile 1");
    }

    if (afile) {
	code = gop_lookupname(afile, AFS_UIOSYS, 0, (struct vnode *) 0, &filevp);
	if (code) {
	    afs_PutDCache(tdc);
	    return code;
	}
	/* otherwise we have a VN_HOLD on filevp.  Get the useful info out and return.
	 we make use here of the fact that the cache is in the UFS file system,
	 and just record the inode number. */
	tdc->f.inode = VTOI(filevp)->i_number;
	VN_RELE(filevp);
    }
    else {
	tdc->f.inode = ainode;
    }
    tdc->f.chunkBytes = 0;
    fileIsBad = 0;
    if ((tdc->f.states & DWriting) || tdc->f.fid.Fid.Volume == 0) fileIsBad = 1;
    tfile = osi_UFSOpen(&cacheDev, tdc->f.inode);
    if (!tfile) panic("initcachefile open");
    code = osi_Stat(tfile, &tstat);
    if (code) panic("initcachefile stat");
    /* if file changed within T (120?) seconds of cache info file, it's probably bad.  In addition, if
	slot changed within last T seconds, the cache info file may be incorrectly identified, and so
	slot may be bad. */
    if (cacheInfoModTime < tstat.mtime + 120) fileIsBad = 1;
    if (cacheInfoModTime < tdc->f.modTime + 120) fileIsBad = 1;
    if (fileIsBad) {
	tdc->f.fid.Fid.Volume =	0;  /* not in the hash table */
	if (tstat.size != 0)
	    osi_Truncate(tfile, 0);
	/* put entry in free cache slot list */
	tdc->f.hvNextp = freeDCList;
	tdc->f.chunkBytes = 0;
	freeDCList = index;
	freeDCCount++;
	afs_indexFlags[index] |= IFFree;
    }
    else {
	/* we must put this entry in the appropriate hash tables */
	code = DCHash(&tdc->f.fid, tdc->f.chunk);
	tdc->f.hcNextp = afs_dchashTable[code];	/* i still set from above DCHash call */
	afs_dchashTable[code] = tdc->index;
	code = DVHash(&tdc->f.fid);
	tdc->f.hvNextp = afs_dvhashTable[code];
	afs_dvhashTable[code] = tdc->index;
	tdc->f.chunkBytes = 0;		    /* assumed old size */
	afs_AdjustSize(tdc, tstat.size);    /* adjust to new size */
	if (tstat.size > 0) afs_indexFlags[index] |= IFEverUsed;    /* has nontrivial amt. of data */
	afs_reusedFiles++;
	/* initialize index times to file's mod times; init indexCounter to max thereof */
	afs_indexTimes[index] = tstat.atime;
	if (afs_indexCounter < tstat.atime) afs_indexCounter = tstat.atime;
    }
    osi_Close(tfile);
    tdc->f.states &= ~DWriting;
    tdc->flags &= ~DFEntryMod;
    afs_WriteDCache(tdc, 0);	    /* don't set f.modTime; we're just cleaning up */
    afs_PutDCache(tdc);
    cacheCounter++;
    return 0;
}

/* This function is responsible for ensuring the cache info file is
    up-to-date.
*/
afs_WriteThroughDSlots() {
    register struct dcache *tdc;
    register long i;
    
    ObtainWriteLock(&afs_xdcache);
    for(i=0;i<afs_cacheFiles;i++) {
	tdc = afs_indexTable[i];
	if (tdc && (tdc->flags & DFEntryMod)) {
	    tdc->flags &= ~DFEntryMod;
	    afs_WriteDCache(tdc, 1);
	}
    }
    ReleaseWriteLock(&afs_xdcache);
}

/* This function is responsible for returning a particular dcache entry, named by its slot
    index.  If the entry is already present, it is returned, otherwise the contents are read
    from the CacheInfo file.  If the caller has supplied us with a pointer to an in-memory
    dcache structure, we place the info there.  Otherwise, we allocate a dcache entry from
    the free list and use it.

    We return the address of the in-memory copy of the file record.  This entry's refCount
    field has been incremented; use afs_PutDCache() to release it.
    
    This function must be called with the afs_xdcache lock write-locked.
*/
struct dcache *afs_GetDSlot(aslot, tmpdc)
    register long aslot;
    register struct dcache *tmpdc; {

    register long code;
    register struct dcache *tdc;
    register struct osi_file *tfile;

    if (CheckLock(&afs_xdcache) != -1) panic("getdslot nolock");
    if (aslot < 0 || aslot >= afs_cacheFiles) panic("getdslot slot");
    tdc = afs_indexTable[aslot];
    if (tdc) {
	QRemove(&tdc->lruq);	    /* move to queue head */
	QAdd(&DLRU, &tdc->lruq);
	tdc->refCount++;
	return tdc;
    }
    /* otherwise we should read it in from the cache file */
    tfile = osi_UFSOpen(&cacheDev, cacheInode);
    if (!tfile) panic("CacheInfo");

    /*
         * If we weren't passed an in-memory region to place the file info, we have
         * to allocate one.
	 */
    if (tmpdc == (struct dcache *)0) {
	if (!freeDSList) afs_GetDownDSlot(5);
	if (!freeDSList) panic("getdslot 17");
	tdc = freeDSList;
	freeDSList = (struct dcache *) tdc->lruq.next;
	tdc->flags = 0;	/* up-to-date, not in free q */
	QAdd(&DLRU, &tdc->lruq);
	if (tdc->lruq.prev == &tdc->lruq) panic("lruq 3");
    }
    else {
	tdc = tmpdc;
	tdc->f.states = 0;
    }

    /*
      * Seek to the aslot'th entry and read it in.
      */
    osi_Seek(tfile, sizeof(struct fcache) * aslot + sizeof(struct afs_fheader));
    code = osi_Read(tfile, (char *)(&tdc->f), sizeof(struct fcache));
    if (code != sizeof(struct fcache)) {
	tdc->f.fid.Cell = 0;
	tdc->f.fid.Fid.Volume = 0;
	tdc->f.chunk = -1;
	tdc->f.versionNo = -1;
	tdc->f.hcNextp = tdc->f.hvNextp = tdc->f.chunkNextp = NULLIDX;
	tdc->flags |= DFEntryMod;
    }
    tdc->refCount = 1;
    tdc->index = aslot;

    /*
         * If we didn't read into a temporary dcache region, update the slot pointer table.
         */
    if (tmpdc == (struct dcache *)0)
	afs_indexTable[aslot] = tdc;
    osi_Close(tfile);
    return tdc;
}

/* This function is called to write a particular dcache entry back to its home in the CacheInfo file.
  It has one parameter, the dcache entry.  The reference count is not changed.
  
  This function must be called with the afs_xdcache lock at least read-locked.

*/
int afs_WriteDCache(adc, atime)
    int atime;
    register struct dcache *adc; {
    register struct osi_file *tfile;
    register long code;

    if (atime) adc->f.modTime =	osi_Time();
    tfile = osi_UFSOpen(&cacheDev, cacheInode);
    if (!tfile) panic("cacheinfo 2");

    /*
         * Seek to the right dcache slot and write the in-memory image out to disk.
         */
    osi_Seek(tfile, sizeof(struct fcache) * adc->index + sizeof(struct afs_fheader));
    code = osi_Write(tfile, (char *)(&adc->f), sizeof(struct fcache));
    osi_Close(tfile);
    if (code != sizeof(struct fcache)) panic("write writedcache");
    return 0;
}

/* this function is called under a write locked to send a truncation request to a
    file server.
*/
int afs_StoreMini(avc, areq)
register struct vcache *avc;
struct vrequest *areq;
{
    register struct conn *tc;
    struct AFSStoreStatus InStatus;
    struct AFSFetchStatus OutStatus;
    struct AFSVolSync tsync;
    register long code;
    register struct rx_call *tcall;
    long tlen;

    tlen = avc->m.Length;
    if (avc->truncPos < tlen) tlen = avc->truncPos;
    avc->truncPos = AFS_NOTRUNC;

    InStatus.Mask = 0;	    /* not sending anything special */
    do {
	tc = afs_Conn(&avc->fid, areq);
	if (tc) {
	    tcall = rx_NewCall(tc->id);
	    code = StartRXAFS_StoreData(tcall, (struct AFSFid *)&avc->fid.Fid, &InStatus, 0, 0, tlen);
	    if (code == 0) {
		code = EndRXAFS_StoreData(tcall, &OutStatus, &tsync);
	    }
	    code = rx_EndCall(tcall, code);
	}
	else code = -1;
    } while (afs_Analyze(tc, code, &avc->fid, areq));
    if (code == 0) {
	afs_ProcessFS(avc, &OutStatus);
    }
    else {
	/* blew it away */
	avc->states &= ~CStatd;
    }
    return code;
}

/* this function is called under a shared lock to write a dcache entry back
  to the server.
*/

int afs_StoreDCache(avc, adc, areq)
    register struct vcache *avc;
    struct vrequest *areq;
    register struct dcache *adc; {
    register struct conn *tc;
    register long code;
    struct AFSStoreStatus InStatus;
    struct AFSFetchStatus OutStatus;
    struct rx_call *tcall;
    struct AFSVolSync tsync;
    long base;
    long tlen;
    struct osi_file *tfile;

    /* store the file */
    if (afs_mariner && !adc->f.chunk) afs_MarinerLog("store$Storing", avc);
    if (afs_debug & AFSDEB_GENERAL) afs_dp("storing file %x\n", avc);
    tlen = avc->m.Length;
    if (avc->truncPos < tlen) tlen = avc->truncPos;
    avc->truncPos = AFS_NOTRUNC;
    base = AFS_CHUNKTOBASE(adc->f.chunk);
    do {
	tc = afs_Conn(&avc->fid, areq);
	if (tc) {
	    /* The writes done before a store back will clear setuid-ness in cache file. */
	    tfile = osi_UFSOpen(&cacheDev, adc->f.inode);	/* So fchmod below works */
	    if (!tfile) panic("bad inode in store");
	    InStatus.Mask = AFS_SETMODTIME;
	    InStatus.ClientModTime = avc->m.Date;
	    tcall = rx_NewCall(tc->id);
	    code = StartRXAFS_StoreData(tcall, (struct AFSFid *) &avc->fid.Fid, &InStatus, base, adc->f.chunkBytes, tlen);
	    if (code == 0) {
		/* transfer the file */
		code = CacheStoreProc(tcall, tfile, adc->f.chunkBytes, avc);
	    }
	    if (code == 0) code = EndRXAFS_StoreData(tcall, &OutStatus, &tsync);
	    code = rx_EndCall(tcall, code);
	    osi_Close(tfile);
	}
	else code = -1;
#ifdef	AFS_MACH_ENV
    } while (afs_Analyze(tc, code, &avc->fid, areq) || afs_fspause(avc,code));
#else
    } while (afs_Analyze(tc, code, &avc->fid, areq));
#endif
    /* now copy stuff back out */
    UpgradeSToWLock(&avc->lock);    /* keep out others for a while */
    if (code == 0) {
	afs_ProcessFS(avc, &OutStatus);
	adc->f.versionNo = avc->m.DataVersion;
	afs_indexFlags[adc->index] &= ~IFDataMod;
    }
    else {
	adc->f.versionNo = -1;
	avc->states &= ~CStatd;
    }
    adc->flags |= DFEntryMod;
    ConvertWToSLock(&avc->lock);
    if (afs_debug & AFSDEB_GENERAL) afs_dp("store done\n");
    return code;
}


shutdown_cache() {
    afs_WriteThroughDSlots();
    osi_Free(afs_indexTable, afs_cacheFiles * sizeof(struct dcache *));
    osi_Free(afs_indexTimes, afs_cacheFiles * sizeof(long));
    osi_Free(afs_indexFlags, afs_cacheFiles * sizeof(char));
    osi_Free(afs_indexLRU,   afs_cacheFiles * sizeof(short));
    osi_Free(Initial_freeDSList, DDSIZE * sizeof(struct dcache));
    /* XXX free all vcaches? */
    {
	register struct afs_q *tq, *uq;
	register struct vcache *tvc;
	int i;
	for (tq = VLRU.prev; tq != &VLRU; tq = uq) {
	    tvc = QTOV(tq);
	    uq = QPrev(tq);
	    if (tvc->mvid) {
		osi_Zfree(afs_VenusFid_zone, tvc->mvid);
		tvc->mvid = 0;
	    }
#ifdef	AFS_AIX_ENV
	    aix_gnode_rele((struct vnode *)tvc);
#endif
	    if (tvc->linkData) {
		osi_Free(tvc->linkData, strlen(tvc->linkData)+1);
		tvc->linkData = 0;
	    }
	}
	/* Also free the remaining ones in the Cache */
	for (i=0; i < VCSIZE; i++) {
	    for (tvc = afs_vhashTable[i]; tvc; tvc = tvc->hnext) {
		if (tvc->mvid) {
		    osi_Zfree(afs_VenusFid_zone, tvc->mvid);
		    tvc->mvid = 0;
		}
#ifdef	AFS_AIX_ENV
		if (tvc->v.v_gnode) osi_Free(tvc->v.v_gnode, sizeof(struct gnode));
#endif
		if (tvc->linkData) {
		    osi_Free(tvc->linkData, strlen(tvc->linkData)+1);
		    tvc->linkData = 0;
		}
	    }
	    afs_vhashTable[i] = 0;
	}
    }
    afs_reusedFiles = afs_cacheStats = afs_blocksUsed = afs_indexCounter = 0;
    afs_cacheFiles = afs_cacheBlocks = afs_origCacheBlocks = 0;
    freeDCCount = 0;
    freeDCList = NULLIDX;
    freeDSList = Initial_freeDSList = 0;
    freeVCList = 0;
    cacheInode = volumeInode = afs_mariner = 0;
    cacheCounter = cacheInfoModTime = 0;
    bzero(&cacheDev, sizeof(struct osi_dev));
}

/* called with avc write-locked; stores all modified segments back to server */
afs_StoreAllSegments(avc, areq)
register struct vcache *avc;
struct vrequest *areq; {
    register struct dcache *tdc;
    register long code;
    register long index;
    int hash;
    int didAny;

    hash = DVHash(&avc->fid);
    didAny = 0;	    /* didn't do any stores yet */
    code = 0;

    /* store modified entries */
    ObtainWriteLock(&afs_xdcache);  /* block out others from updating this table */
    for(index = afs_dvhashTable[hash]; index != NULLIDX;) {
	tdc = afs_GetDSlot(index, 0);
	if ((afs_indexFlags[index] & IFDataMod) && !FidCmp(&tdc->f.fid, &avc->fid)) {
	    /* same file, and modified, we'll store it back */
	    ReleaseWriteLock(&afs_xdcache);
	    ConvertWToSLock(&avc->lock);
	    code = afs_StoreDCache(avc, tdc, areq);
	    didAny = 1;
	    UpgradeSToWLock(&avc->lock);
	    ObtainWriteLock(&afs_xdcache);
	    if (code) {
		lockedPutDCache(tdc);
		break;
	    }
	}
	index = tdc->f.hvNextp;
	lockedPutDCache(tdc);
    }
    ReleaseWriteLock(&afs_xdcache);

    /* send a trivial truncation store if did nothing else */
    if (code == 0 && !didAny) {
	/* if either we're supposed to truncate the file or the file has no associated inode (dataversion == 0), then send a truncation request (which allocates an inode, btw) */
	if ((avc->truncPos != AFS_NOTRUNC) || avc->m.DataVersion == 0) {
	    code = afs_StoreMini(avc, areq);
	}
    }

    /* finally, turn off IFDataMod and DWriting, turn on DFEntryMod, update f.versionNo
	and clear CStatd if anything went wrong. */
    ObtainWriteLock(&afs_xdcache);
    for(index = afs_dvhashTable[hash]; index != NULLIDX;) {
	tdc = afs_GetDSlot(index, 0);
	if (!tdc) panic("store 2");
	if (!FidCmp(&tdc->f.fid, &avc->fid)) {
	    /* this is the file */
	    afs_indexFlags[index] &= ~IFDataMod;
	    tdc->f.states &= ~DWriting;
	    tdc->flags |= DFEntryMod;
	    tdc->f.versionNo = (code? -1 : avc->m.DataVersion);
	}
	index = tdc->f.hvNextp;
	lockedPutDCache(tdc);
    }
    if (code) avc->states &= ~CStatd;
    ReleaseWriteLock(&afs_xdcache);

    return code;
}

/* called with avc write-locked; truncates the cache files appropriately */
afs_TruncateAllSegments(avc, alen, areq)
long alen;
register struct vcache *avc;
struct vrequest *areq; {
    register struct dcache *tdc;
    register long code;
    register long index;
    struct osi_file *tfile;
    long newSize;

    avc->m.Date = osi_Time();
    if (alen >= avc->m.Length) return 0;
    avc->m.Length = alen;
    if (alen < avc->truncPos) avc->truncPos = alen;
    code = DVHash(&avc->fid);
    ObtainWriteLock(&afs_xdcache);  /* block out others from screwing with this table */
    for(index = afs_dvhashTable[code]; index != NULLIDX;) {
	tdc = afs_GetDSlot(index, 0);
	if (!FidCmp(&tdc->f.fid, &avc->fid)) {
	    /* same file, and modified, we'll store it back */
	    newSize = alen - AFS_CHUNKTOBASE(tdc->f.chunk);
	    if (newSize < 0) newSize = 0;
	    if (newSize < tdc->f.chunkBytes) {
		tfile = osi_UFSOpen(&cacheDev, tdc->f.inode);
		if (!tfile) panic("truncateall open");
		osi_Truncate(tfile, newSize);
		osi_Close(tfile);
		afs_AdjustSize(tdc, newSize);
	    }
	}
	index = tdc->f.hvNextp;
	lockedPutDCache(tdc);
    }
    ReleaseWriteLock(&afs_xdcache);
    return 0;
}

afs_wakeup(avc)
register struct vcache *avc; {
    register int i;
    register struct brequest *tb;
    tb = afs_brs;
    for (i=0;i<NBRS;i++, tb++) {
	/* if request is valid and for this file, we've found it */
	if (tb->refCount > 0 && avc == tb->vnode) {
	    if (afs_debug & AFSDEB_GENERAL) afs_dp("bkg store found %x\n", avc);
	    /* if CSafeStore is on, then we don't awaken the
	    guy waiting for the store until the whole store has finished.
		Otherwise, we do it now.  Note that if CSafeStore is on, the
		    BStore routine actually wakes up the user, instead of us.
		    */
	    if ((avc->states & CSafeStore) == 0) {
		tb->code = 0;
		tb->flags |= BUVALID;
		if (tb->flags & BUWAIT) {
		    tb->flags &= ~BUWAIT;
		    osi_Wakeup(tb);
		}
	    }
	    else if (afs_debug & AFSDEB_GENERAL) afs_dp("safestore ignoring %x\n", avc);
	    break;
	}
    }
    return 0;
}
