/*-
 * Copyright (c) 1991, 1992, 1994, 1995 Berkeley Software Design, Inc.
 * All rights reserved.
 * The Berkeley Software Design Inc. software License Agreement specifies
 * the terms and conditions for redistribution.
 *
 *	BSDI $Id: vfs_bio.c,v 2.2 1995/05/04 00:40:39 karels Exp $
 */

/*
 * Copyright (c) 1982, 1986, 1989 Regents of the University of California.
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. All advertising materials mentioning features or use of this software
 *    must display the following acknowledgement:
 *	This product includes software developed by the University of
 *	California, Berkeley and its contributors.
 * 4. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 *	from: @(#)vfs_bio.c	7.40 (Berkeley) 5/8/91
 */

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/proc.h>
#include <sys/buf.h>
#include <sys/vnode.h>
#include <sys/malloc.h>
#include <sys/resourcevar.h>

#define	buf_setflags(vbuf, flags)	((vbuf)->b_flags |= (flags))
#define	buf_clrflags(vbuf, flags)	((vbuf)->b_flags &= ~(flags))
#define	buf_tstflags(vbuf, flags)	((vbuf)->b_flags & (flags))
#define	buf_isasync(vbuf)		(buf_tstflags(vbuf, B_ASYNC))
#define	buf_isfilled(vbuf)		(buf_tstflags(vbuf, B_DONE | B_DELWRI))
#define	buf_isread(vbuf)		(buf_tstflags(vbuf, B_READ))
#define	buf_iswrite(vbuf)		(buf_tstflags(vbuf, B_READ) == 0)
#define	buf_setread(vbuf)		(buf_setflags(vbuf, B_READ))

/*
 * Structures associated with buffer caching.
 */
LIST_HEAD(bufhashhdr, buf) *bufhashtbl, emptybufs;

u_long	bufhash;			/* size of hash table - 1 */
long	bufcachemem;			/* memory occupied by cache buffers */
int	numbufcache;			/* number of buffer headers allocated */
struct	bufq bufq_lru;			/* LRU queue */
int	bufneeded;			/* 1 => waiting for free buffer */

#define	binshash(bp, bhp)	LIST_INSERT_HEAD((bhp), (bp), b_hash)
#define	bremhash(bp)		LIST_REMOVE((bp), b_hash)

/*
 * Hash function for vnode and logical block.
 * Simply divide the vnode address by its power-of-two size as allocated,
 * then add the lbn.  We want the vnodes to be distributed uniformly
 * throughout the table, with sequential lbn's in subsequent buckets.
 * This is simple-minded but seems to work.
 *
 * The malloc_size macro should be in malloc.h.
 */
#define	malloc_size(size)  (1 << BUCKETINDX(size))
#define	BUFHASH(vn, lbn)  (((u_int)(vn) / malloc_size(sizeof(*vn)) + \
				(u_int)(lbn)) & bufhash)

/*
 * We size the hash table according to the amount of memory in the cache,
 * but rounding to an integral number of pages.  Using 1 slot per 4K,
 * or 256 slots per megabyte, we get 1024 entries on a 4K page for
 * up to 4 MB of cache.
 */
#define	MEMPERHASHSLOT	4096

/*
 * Initialize buffer subsystem, allocating and initializing hash headers
 * based on amount of memory to be used for buffers.
 */
void
bufinit()
{
	register struct bufhashhdr *bhp;
	long bufhashsize;

	TAILQ_INIT(&bufq_lru);
	/* XXX could use hashinit() here; later. */
	bufhashsize = roundup(maxbufmem / MEMPERHASHSLOT * (sizeof *bhp),
	    CLBYTES);
	bufhashtbl = (struct bufhashhdr *)malloc((u_long)bufhashsize,
	    M_BUFFER, M_WAITOK);
	for (bufhash = 1; bufhash <= bufhashsize / sizeof(*bhp); bufhash <<= 1)
		/* void */;
	bufhash = (bufhash >> 1) - 1;
	for (bhp = &bufhashtbl[bufhash]; bhp >= bufhashtbl; bhp--)
		LIST_INIT(bhp);
	LIST_INIT(&emptybufs);
}

/*
 * Get a new buffer of the specified size.
 * Allocate if not yet enough in existence, otherwise
 * take one from the front of the LRU list.
 * If we have to block, return null after waiting
 * to allow the caller to recheck the state of its world.
 */
struct buf *
getnewbuf(vbufsize, catch, timo)
	int vbufsize, catch, timo;
{
	register struct buf *vbuf;
	struct ucred *cred;
	int needfree, s;

	/*
	 * Allocate a new buffer.
	 */
	if (bufcachemem + vbufsize < maxbufmem) {
		MALLOC(vbuf, struct buf *, sizeof(*vbuf), M_BUFFER, M_NOWAIT);
		if (vbuf == NULL) {
			/*
			 * Wait here to help ensure that a later call will
			 * succeed, but then return NULL to get caller to
			 * recheck state.
			 */
			vbuf = malloc(sizeof(*vbuf), M_BUFFER, M_WAITOK);
			free(vbuf, M_BUFFER);
			return ((struct buf *) NULL);
		}
		numbufcache++;
		bzero((char *)vbuf, sizeof *vbuf);
		vbuf->b_dev = NODEV;
		vbuf->b_rcred = NOCRED;
		vbuf->b_wcred = NOCRED;
		vbuf->b_flags = B_BUSY;
		vbuf->b_vnbufs.le_next = NOLIST;
	} else {

		/*
		 * Free the cache slot at head of lru chain.
		 */
		needfree = vbufsize;
		for (;;) {
			s = splbio();
			if ((vbuf = bufq_lru.tqh_first) == NULL) {
				bufneeded = 1;
				(void) tsleep((caddr_t)&bufneeded,
				    (PRIBIO + 1) | catch, "newbuf", timo);
				splx(s);
				/*
				 * Allow caller to recheck state,
				 * which may have changed.
				 */
				return ((struct buf *) NULL);
			}
			bremfree(vbuf);

			/*
			 * Clean up any vestiges of the old buffer.
			 */
			if (buf_tstflags(vbuf, B_DELWRI)) {
				splx(s);
				buf_setflags(vbuf, B_BUSY | B_AGE);
				(void) bawrite(vbuf);
				continue;
			}
			/*
			 * Remove from old hash chain
			 */
			bremhash(vbuf);
			splx(s);
			if (vbuf->b_vp)
				brelvp(vbuf);
			if (vbuf->b_rcred != NOCRED) {
				cred = vbuf->b_rcred;
				vbuf->b_rcred = NOCRED;
				crfree(cred);
			}
			if (vbuf->b_wcred != NOCRED) {
				cred = vbuf->b_wcred;
				vbuf->b_wcred = NOCRED;
				crfree(cred);
			}

			/*
			 * We may need to expand this buffer, and reallocbuf
			 * may have expanded other buffers.  Also, some other
			 * code (e.g. the page daemon) might adjust maxbufmem
			 * downward.  Trim a buffer if we will be over our
			 * quota.  We may overrun for a short time, but this
			 * should make the cache gradually shrink to fit when
			 * there is a change in buffer size usage.
			 */
			needfree -= vbuf->b_bufsize;
			if (bufcachemem + needfree > maxbufmem) {
				bufcachemem -= vbuf->b_bufsize;
				FREE(vbuf->b_un.b_addr, M_BUFFER);
				FREE(vbuf, M_BUFFER);
				numbufcache--;
				continue;
			}
			break;
		}

		vbuf->b_dirtyoff = vbuf->b_dirtyend = 0;
	}
	vbuf->b_flags = B_BUSY;
	reallocbuf(vbuf, vbufsize, 0);
	return (vbuf);
}

int minbufsize = DEV_BSIZE;

/*
 * Change the buffer size of an existing buffer, 
 * copying the old data into the new buffer if requested to do so.
 */
reallocbuf(vbuf, vbufsize, copy)
	register struct buf *vbuf;
	int vbufsize, copy;
{
	int allocsize;
	caddr_t newaddr;

#ifdef DIAGNOSTIC
	if (vbufsize <= 0)
		panic("reallocbuf");
#endif
	if (vbufsize > CLBYTES / 2)
		allocsize = roundup(vbufsize, CLBYTES);
	else
		for (allocsize = minbufsize; allocsize < vbufsize;
		     allocsize <<= 1)
			/* void */;
	if (allocsize == vbuf->b_bufsize) {
		vbuf->b_bcount = vbufsize;
		vbuf->b_iocount = vbufsize;
		return;
	}

	/*
	 * Although increasing the size of this buffer might push us
	 * over maxbufmem, we'll shrink later in getnewbuf rather than
	 * duplicating the logic for cleaning and freeing buffers.
	 */
	bufcachemem += allocsize - vbuf->b_bufsize;
	MALLOC(newaddr, caddr_t, allocsize, M_BUFFER, M_WAITOK);
	if (vbuf->b_bufsize > 0) {
		if (copy)
			bcopy(vbuf->b_un.b_addr, newaddr,
			    min(vbuf->b_bcount, vbufsize));
		FREE(vbuf->b_un.b_addr, M_BUFFER);
	}
	vbuf->b_un.b_addr = newaddr;
	vbuf->b_bufsize = allocsize;
	vbuf->b_bcount = vbufsize;
	vbuf->b_iocount = vbufsize;
}

/*
 * Get a buffer identified with the vnode and logical block number,
 * creating one if none is found in the cache.
 * One special case: if vbufsize is 0, we return null if the buffer
 * is not in the cache.  This is used to find a buffer if in the cache
 * without creating it unnecessarily (like incore, but returning
 * a pointer to the buffer).
 */
struct buf *
getblk(vn, lbn, vbufsize, catch, timo)
	register struct vnode *vn;
	daddr_t lbn;
	int vbufsize, catch, timo;
{
	register struct buf *vbuf;
	register struct bufhashhdr *nhp;
	int hash, s, error;

#ifdef DIAGNOSTIC
	if ((unsigned) vbufsize > MAXBSIZE)
		panic("getblk: bad size");
#endif
	nhp = &bufhashtbl[BUFHASH(vn, lbn)];
	for (;;) {
start_again:
		for (vbuf = nhp->lh_first; vbuf != NULL;
		    vbuf = vbuf->b_hash.le_next)
			if (vbuf->b_lblkno == lbn && vbuf->b_vp == vn) {
				s = splbio();
				if (buf_tstflags(vbuf, B_BUSY)) {
					buf_setflags(vbuf, B_WANTED);
					error = tsleep((caddr_t)vbuf,
					    (PRIBIO + 1) | catch, "buf", timo);
					splx(s);
					if (error)
						return ((struct buf *) NULL);
					goto start_again;
				}
				if (buf_tstflags(vbuf, B_INVAL)) {
					splx(s);
					continue;
				}
				bremfree(vbuf);
				splx(s);
				break;
			}
		if (vbuf) {
			/* found it in cache */
			buf_setflags(vbuf, B_BUSY | B_CACHE);
#ifdef DIAGNOSTIC
			if (vbufsize != 0 && vbuf->b_bcount != vbufsize)
				panic("getblk: stray size");
#endif
#if 0
			vbuf->b_iocount = vbuf->b_bcount;
#endif
		} else {
			if (vbufsize == 0)
				return (NULL);
			/* not in cache; create new */
			vbuf = getnewbuf(vbufsize, catch, timo);
			if (vbuf == NULL)
				continue;	/* XXX or return null? */
			binshash(vbuf, nhp);
			bgetvp(vn, vbuf);
			vbuf->b_blkno = lbn;
			vbuf->b_lblkno = lbn;
			vbuf->b_resid = 0;
			vbuf->b_error = 0;
		}
		return (vbuf);
	}
}

/*
 * Check whether a buffer identified with the vnode and logical
 * block number is present in the cache.  Return the buffer
 * if present.  The buffer is not locked by the calling process,
 * but might be locked by another process.
 */
struct buf *
incore(vn, lbn)
	register struct vnode *vn;
	daddr_t lbn;
{
	register struct buf *vbuf;
	register struct bufhashhdr *nhp;
	int hash;

	nhp = &bufhashtbl[BUFHASH(vn, lbn)];
	for (vbuf = nhp->lh_first; vbuf != NULL; vbuf = vbuf->b_hash.le_next)
		if (vbuf->b_lblkno == lbn && vbuf->b_vp == vn &&
		    !buf_tstflags(vbuf, B_INVAL))
			return (vbuf);
	return (NULL);
}

/*
 * Return a buffer of the specified size
 * not associated with a specific logical block.
 */
struct buf *
geteblk(vbufsize)
	int vbufsize;
{
	register struct buf *vbuf;

	do {
		vbuf = getnewbuf(vbufsize, 0, 0);
	} while (vbuf == NULL);
	binshash(vbuf, &emptybufs);
	buf_setflags(vbuf, B_INVAL);
	vbuf->b_blkno = 0;
	vbuf->b_lblkno = 0;
	vbuf->b_resid = 0;
	vbuf->b_error = 0;
	return (vbuf);
}

/*
 * Find or create a vbuf for the specified vnode and logical block number,
 * and fill the contents if it is not resident.
 */
bread(vn, lbn, vbufsize, cred, retvbufp)
	struct vnode *vn;
	daddr_t lbn;
	int vbufsize;
	struct ucred *cred;
	struct buf **retvbufp;
{
	register struct buf *vbuf;
	struct proc *p = curproc;	/* XXX */

	*retvbufp = vbuf = getblk(vn, lbn, vbufsize, 0, 0);
	/*
	 * If this block was a delayed write that has now been
	 * written out, B_AGE will be set, and that would cause
	 * the buffer to be placed at the head of the LRU
	 * rather than the tail when we free it.  This can
	 * cause us to repeatedly write cylinder group blocks
	 * while writing a file.  Clear B_AGE, indicating that
	 * the block has been used since the write-back.
	 */
	buf_clrflags(vbuf, B_AGE);
	if (buf_isfilled(vbuf))
		return (0);
	buf_setread(vbuf);
	if (vbuf->b_rcred == NOCRED && cred != NOCRED) {
		crhold(cred);
		vbuf->b_rcred = cred;
	}
	VOP_STRATEGY(vbuf);
	p->p_stats->p_ru.ru_inblock++;	
	return (biowait(vbuf));
}

/*
 * Find or create a vbuf for the specified vnode and logical block number,
 * and fill the contents if it is not resident.
 * Also prepare for an expected read on the (presumably next) logical block.
 */
breadn(vn, lbn, vbufsize, nextlbn, nextvbufsize, nreadahead, cred, retvbufp)
	struct vnode *vn;
	daddr_t lbn; int vbufsize;
	daddr_t *nextlbn; int *nextvbufsize;
	int nreadahead;
	struct ucred *cred;
	struct buf **retvbufp;
{
	register struct buf *vbuf, *nextvbuf;
	struct proc *p = curproc;	/* XXX */

	/*
	 * Get a cache entry for lbn; if not resident, start a read.
	 */
	*retvbufp = vbuf = getblk(vn, lbn, vbufsize, 0, 0);
	if (buf_isfilled(vbuf) == 0) {
		buf_setread(vbuf);
		if (vbuf->b_rcred == NOCRED && cred != NOCRED) {
			crhold(cred);
			vbuf->b_rcred = cred;
		}
		VOP_STRATEGY(vbuf);
		p->p_stats->p_ru.ru_inblock++;
	}
	/*
	 * For each readahead block, get a cache entry;
	 * if not resident, start a read.
	 */
	for (; nreadahead > 0; nreadahead--, nextlbn++, *nextvbufsize++) {
		nextvbuf = getblk(vn, *nextlbn, *nextvbufsize, 0, 0);
		if (buf_isfilled(nextvbuf))
			brelse(nextvbuf);
		else {
			buf_setflags(nextvbuf, B_ASYNC | B_READ);
			if (nextvbuf->b_rcred == NOCRED && cred != NOCRED) {
				crhold(cred);
				nextvbuf->b_rcred = cred;
			}
			VOP_STRATEGY(nextvbuf);
			p->p_stats->p_ru.ru_inblock++;
		}
	}
	/*
	 * Return the desired block, waiting for I/O to
	 * complete if necessary.
	 */
	return (biowait(vbuf));
}

/*
 * Free buffer, writing contents back; do not wait for completion.
 * Buffer is freed when I/O completes.
 */
bawrite(vbuf)
	register struct buf *vbuf;
{

	buf_setflags(vbuf, B_ASYNC);
	(void) VOP_BWRITE(vbuf);
}

/*
 * Free buffer, writing contents back, waiting for completion,
 * and then unlocking and freeing.
 */
bwrite(vbuf)
	register struct buf *vbuf;
{
	register int oldstate;
	int s, error;
	struct proc *p = curproc;	/* XXX */

	oldstate = vbuf->b_flags;
	buf_clrflags(vbuf, B_DELWRI | B_DONE | B_ERROR | B_READ);
	/*
	 * Need splbio while adjusting v_numoutput
	 * and calling reassignbuf; combining tests
	 * protects a bit more than necessary, but
	 * on average should be better.
	 */
	s = splbio();
	vbuf->b_vp->v_numoutput++;
	/*
	 * If there is an error and B_ASYNC is set,
	 * the vnode may be dissociated from the buffer.
	 */
	if (oldstate & B_DELWRI) {
		if (vbuf->b_vp)
			reassignbuf(vbuf, vbuf->b_vp);
	} else if (p)
		p->p_stats->p_ru.ru_oublock++;
	splx(s);
	buf_setflags(vbuf, B_WRITEINPROG);
	VOP_STRATEGY(vbuf);
	if (oldstate & B_ASYNC)
		return (0);
	error = biowait(vbuf);
	brelse(vbuf);
	return (error);
}

vn_bwrite(ap)
	struct vop_bwrite_args *ap;
{

	return (bwrite(ap->a_bp));
}

/*
 * Free a buffer that has been modified, marking it so that the contents
 * will be written eventually.  Used when additional modifications
 * are expected.
 */
bdwrite(vbuf)
	register struct buf *vbuf;
{
	struct proc *p = curproc;	/* XXX */

	if (buf_tstflags(vbuf, B_DELWRI) == 0) {
		buf_setflags(vbuf, B_DELWRI);
		reassignbuf(vbuf, vbuf->b_vp);
		if (p)
			p->p_stats->p_ru.ru_oublock++;
	}
	/*
	 * If this is a tape drive, the write must be now.
	 */
	if (VOP_IOCTL(vbuf->b_vp, 0, (caddr_t)B_TAPE, 0, NOCRED, p) == 0) {
		bawrite(vbuf);
	} else {
		buf_setflags(vbuf, B_DONE);
		brelse(vbuf);
	}
}

/*
 * Free vbuf.
 *
 * N.B. - Buffers marked B_AGE should be be given less than the full
 * LRU timeouts implemented by this code.  For now, we put buffers
 * marked B_DELWRI | B_AGE at the front of the queue, as they have
 * already gotten to the end of the LRU.  Other B_AGE buffers should
 * be held for a bit longer.
 */
brelse(vbuf)
	register struct buf *vbuf;
{
	struct ucred *cred;
	int wanted = 0;
	int s;

	/*
	 * For active buffers, retry the I/O, otherwise mark invalid
	 */
	if (buf_tstflags(vbuf, B_ERROR)) {
		if (buf_tstflags(vbuf, B_LOCKED))
			buf_clrflags(vbuf, B_ERROR);
		else
			buf_setflags(vbuf, B_INVAL);
	}
	/*
	 * Free the resources of invalid or incorrectly-read buffers.
	 */
	if (buf_tstflags(vbuf, B_ERROR | B_INVAL | B_NOCACHE)) {
		if (buf_tstflags(vbuf, B_NOCACHE))
			buf_setflags(vbuf, B_INVAL);
		if (vbuf->b_vp)
			brelvp(vbuf);
		if (vbuf->b_rcred != NOCRED) {
			cred = vbuf->b_rcred;
			vbuf->b_rcred = NOCRED;
			crfree(cred);
		}
		if (vbuf->b_wcred != NOCRED) {
			cred = vbuf->b_wcred;
			vbuf->b_wcred = NOCRED;
			crfree(cred);
		}
		buf_clrflags(vbuf, B_DELWRI);
	}
	s = splbio();
	if (buf_tstflags(vbuf, B_ERROR | B_INVAL) ||
	    ((vbuf->b_flags & (B_DELWRI | B_AGE)) == (B_DELWRI | B_AGE))) {
		TAILQ_INSERT_HEAD(&bufq_lru, vbuf, b_freelist);
	} else {
		TAILQ_INSERT_TAIL(&bufq_lru, vbuf, b_freelist);
	}
	wanted = vbuf->b_flags & B_WANTED;
	buf_clrflags(vbuf, B_ASYNC | B_BUSY | B_WANTED);
	splx(s);

	if (wanted)
		wakeup((caddr_t)vbuf);
	if (bufneeded) {
		bufneeded = 0;
		wakeup((caddr_t)&bufneeded);
	}
}

/*
 * Await the completion of the I/O operation pending on vbuf,
 * then return any error that occurred.
 */
biowait(vbuf)
	register struct buf *vbuf;
{
	int s;

	s = splbio();
	while (buf_tstflags(vbuf, B_DONE) == 0)
		sleep((caddr_t)vbuf, PRIBIO);
	splx(s);
	if (buf_tstflags(vbuf, B_ERROR) == 0)
		return (0);
	return (vbuf->b_error ? vbuf->b_error : EIO);
}

/*
 * Callback from device drivers to indicate completion of an operation.
 * Note that this may be the first of a chain; if so, all are considered
 * `done' if the B_CHAIN flag was set by the driver.
 */
void
biodone(bhead)
	struct buf *bhead;
{
	register struct buf *vbuf, *next;
	int error = 0, seterr = 0, chaindone = (bhead->b_flags & B_CHAIN);

	vbuf = bhead;
	do {
		/* Break from chain and restore `normal' i/o count. */
		next = vbuf->b_chain;
		vbuf->b_chain = NULL;

		/*
		 * Check whether driver implemented chaining;
		 * if B_CHAIN is set on first buffer, all are done;
		 * otherwise, start the next operation of the chain.
		 */
		if (next && chaindone == 0) {
			if (buf_iswrite(vbuf))
				buf_setflags(next, B_WRITEINPROG);
			VOP_STRATEGY(next);
			next = NULL;
		}
		vbuf->b_flags &= ~(B_CHAIN | B_WRITEINPROG);
		vbuf->b_iocount = vbuf->b_bcount;	/* XXX */
#ifdef DIAGNOSTIC
		if (buf_tstflags(vbuf, B_DONE))
			panic("biodone: dup");
#endif
		/* BEGIN XXX (at least, Chris does not like this) */
		if (vbuf->b_flags & B_ERROR) {
			seterr = 1;	/* propagate error down chain */
			error = vbuf->b_error;
		} else if (seterr) {
			vbuf->b_flags |= B_ERROR;
			vbuf->b_error = error;
		} else if (vbuf->b_resid) {
			seterr = 1;
			error = EIO;
		}
		/* END XXX */
		buf_setflags(vbuf, B_DONE);
		if (buf_iswrite(vbuf))
			vwakeup(vbuf);
		if (buf_tstflags(vbuf, B_CALL)) {
			buf_clrflags(vbuf, B_CALL);
			(*vbuf->b_iodone)(vbuf);
		} else {
			/*
			 * Asynchronous: release buffer
			 * Synchronous: wakeup those awaiting I/O completion
			 */
			if (buf_isasync(vbuf))
				brelse(vbuf);
			else {
				buf_clrflags(vbuf, B_WANTED);
				wakeup((caddr_t)vbuf);
			}
		}
	} while ((vbuf = next) != NULL);
}

/*
 * Called from machine-dependent code to flush pending buffers before
 * reboot.
 */
void
bootsync()
{
	register struct buf *bp;
	int iter, nbusy, s;
	extern int cold;
	extern struct proc proc0;

	printf("syncing disks... ");
	cold = 1;		/* at least chilly, can't sleep normally */

#ifdef notyet
	/*
	 * Make sure that any buffer allocation, etc.
	 * can use all of memory now (otherwise some
	 * is reserved for the page daemon).
	 */
	vm_page_free_reserved = 0;
#endif

	/*
	 * Release vnodes held by texts before sync.
	 */
	if (panicstr == NULL)
		vnode_pager_umount((struct mount *)NULL);
#ifdef notyet
	sync((struct proc *)NULL, (void *)NULL, (int *)NULL);
#else
	sync(&proc0, (void *)NULL, (int *)NULL); /* should be null proc */
#endif

	for (iter = 0; iter < 20; iter++) {
	restart:
		nbusy = numbufcache;
		for (bp = bufq_lru.tqh_first; bp != NULL;
		    bp = bp->b_freelist.tqe_next) {
			s = splbio();

			/* avoid races */
			if (bp->b_flags & B_BUSY) {
				printf("<b>");	/* XXX busy on free list??? */
				splx(s);
				goto restart;
			}

			/* shouldn't happen? */
			if (bp->b_flags & B_DELWRI) {
				bremfree(bp);
				bp->b_flags |= B_BUSY;
				splx(s);
				printf("<d>");	/* XXX */
				bawrite(bp);
				goto restart;
			}
			splx(s);
			--nbusy;
		}
		if (nbusy <= 0)
			break;
		printf("%d ", nbusy);
		DELAY(40000 * iter);
	}
	if (nbusy > 0)
		printf("giving up\n");
	else
		printf("done\n");
}
