/*-
 * Copyright (c) 1992, 1993, 1994, 1995 Berkeley Software Design, Inc.
 * All rights reserved.
 * The Berkeley Software Design Inc. software License Agreement specifies
 * the terms and conditions for redistribution.
 *
 *      BSDI $Id: wd.c,v 2.9 1995/12/22 19:23:52 ewv Exp $
 */
 
/*-
 * Copyright (c) 1990, 1993
 *	The Regents of the University of California.  All rights reserved.
 *
 * This code is derived from software contributed to Berkeley by
 * William Jolitz.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. All advertising materials mentioning features or use of this software
 *    must display the following acknowledgement:
 *	This product includes software developed by the University of
 *	California, Berkeley and its contributors.
 * 4. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 *	@(#)wd.c	8.1 (Berkeley) 6/11/93
 */

/*
 * ST506/RLL/IDE/ESDI disk driver for Western Digital 1002 style controllers
 */

#include <sys/param.h>
#include <sys/dkbad.h>
#include <sys/systm.h>
#include <sys/conf.h>
#include <sys/file.h>
#include <sys/stat.h>
#include <sys/ioctl.h>
#include <sys/disklabel.h>
#include <sys/buf.h>
#include <sys/uio.h>
#include <sys/device.h>
#include <sys/disk.h>
#include <sys/syslog.h>
#include <sys/reboot.h>

#include <machine/bootblock.h>

#include <i386/isa/isavar.h>
#include <i386/isa/icu.h>

#include <vm/vm.h>

#include "wdreg.h"
#include "wdvar.h"

#ifdef EWVDEBUG
#include "ewvdebug.h"
#else
#define TR(desc, parm1, parm2)
#endif

#define	RETRIES		5	/* number of retries before giving up */
#define	TIMEOUT		10	/* seconds for timeout (for laptops) */
#define	INTR_BUSYWAIT	100	/* Timeout (ms) waiting for busy in intr */

#define	wdunit(dev)	dk_unit(dev)
#define	wdpart(dev)	dk_part(dev)
#define	wdno(du)	((du)->wd_dk.dk_dev.dv_unit)

/*
 * Defining SHORT_BLK adds code to handle non-DEV_BSIZEd reads, this
 * doesn't work perfectly since the low bits of the offset are not available
 * to us (due to physio()). This reproduces historical behavior. If SHORT_BLK
 * is not defined any non-DEV_BSIZE-sized transfers will be aborted with
 * an error.
 *
 * This only affects raw reads though specfs.
 */
#define	SHORT_BLK

#define	WDDEBUG
#ifdef	WDDEBUG
int	wddebug = 0;
#define	dprintf(x)	{ if (wddebug) printf x; }
#else
#define	dprintf(x)
#endif

/*
 * Drive states.  Used for open and format operations.
 * States < OPEN (> 0) are transient, during an open operation.
 * OPENRAW is used for unlabeled disks to inhibit partition
 * bounds checking.
 * XXX should use DK_* states in disk.h, but those miss RECAL, RDBADTBL
 */
#define RAWDISK		0x08		/* raw disk operation, no label */
#define ISRAWSTATE(s)	(RAWDISK&(s))	/* are we in a raw state? */
#define DISKSTATE(s)	(~(REINIT|RAWDISK)&(s)) /* basic state of state mach. */
#define REINIT		0x10		/* doing reinitialization sequence */

#define	CLOSED		0		/* disk is closed. */
					/* "cooked" disk states */
#define	WANTOPEN	1		/* open requested, not started */
#define	RECAL		2		/* doing restore */
#define	RDLABEL		3		/* reading pack label */
#define	RDBADTBL	4		/* reading bad-sector table */
#define	OPEN		5		/* done with open */

#define	WANTOPENRAW	(WANTOPEN|RAWDISK)	/* raw WANTOPEN */
#define	RECALRAW	(RECAL|RAWDISK)	/* raw open, doing restore */
#define	OPENRAW		(OPEN|RAWDISK)	/* open, but unlabeled disk */

/* The max number of bad blocks -- kludge, yeah? */
#define NDKBAD	(sizeof(((struct dkbad *)0)->bt_bad)/sizeof(struct bt_bad))

/* End of bad block list -- should be greater than any valid block number */
#define EOBADLIST	0x7fffffff

#define	NCYLGRP		64	/* number of cyl groups for badsect lookup */

/*
 * state of a disk drive.
 */
typedef struct wd_softc {
	struct	dkdevice wd_dk;  /* base device */
	struct	device *wd_parent; /* Controller device (shortcut) */
	wdc_callback_t wd_reset; /* controller reset callback */
	int	wd_iobase;	/* I/O base address */
	int	wd_aiobase;	/* "Alternate" registers I/O base */
	long	wd_bc;		/* byte count left */
	long	wd_wbc;		/* write byte count left (used for recovery) */
	long	wd_cbc;		/* contiguous byte count left (this buf) */
	int	wd_seccount;	/* sectors of sequential i/o in one operation */
	int	wd_bcount;	/* bytes of sequential i/o in one operation */
	caddr_t	wd_addr;	/* user buffer address */
	daddr_t	wd_blknum;	/* number of the block to r/w */
	int	wd_bpint;	/* bytes per interrupt */
	struct	buf *wd_bp;	/* cur bp of cur transfer */
	struct	buf *wd_startbp;/* first bp of cur transfer */
	u_char	wd_unit;	/* physical unit number */
	u_char	wd_statusreg;	/* copy of status reg. (on errors) */
	u_char	wd_errorreg;	/* copy of error reg. (on errors) */
	int	wd_errcnt;	/* Number of errors in this req */
	int	wd_flags;	/* see below */
	int	wd_cylpergrp;	/* number of cylinders in cyl_badindx groups */
	struct	buf wd_tab;	/* head of drive queue */
	wdc_req_t wd_ioreq;	/* Main I/O controller queue element */
	wdc_req_t wd_rawreq;	/* Misc (raw) I/O queue element */
	long	wd_bad[NDKBAD + 1]; /* the list of bad blocks */
	/* using char for dk_badindx assumes a limit of <= 127 bad sectors... */
	char	wd_badindx[NCYLGRP]; /* indices into dk_bad for cyl grps */
#ifdef later
	short	wd_badindx[NCYLGRP]; /* indices into wd_bad for cyl grps */
#endif
} wd_softc_t;

/* redefinitions from dkdevice, pending implementation of generic driver */
#define	wd_state	wd_dk.dk_state
#define	wd_wlabel	wd_dk.dk_wlabel
#define	wd_labelsector	wd_dk.dk_labelsector
#define	wd_copenmask	wd_dk.dk_copenmask
#define	wd_bopenmask	wd_dk.dk_bopenmask
#define	wd_openmask	wd_dk.dk_openmask
#define	wd_dd		wd_dk.dk_label

/* from wd_softc, produce pointer to parent wdc_softc */
#define	wdcont(du)	((struct wdc_softc *)((du)->wd_dk.dk_dev.dv_parent))

/* wd_flags: */
#define	DK_OPSTARTED	0x01	/* continuing operation already started */
#define	DK_BADSRCH	0x02	/* search bad-sector table before trying I/O */
#define	DK_SEBYSE	0x04	/* doing I/O sector by sector */
#define	DK_TIMEOUT	0x08	/* controller timed out */
#define	DK_WAITRAW	0x10	/* waiting for wd_rawreq to attach */
#define	DK_LOCKED	0x20	/* Unit locked for raw I/O */

int	wd_badsearch = 1;	/* force DK_BADSRCH on all controllers */
int	wd_timeout = TIMEOUT;
int	wd_maxsecperint = 4;	/* default secs/int */

/* config file flags */
#define	WD_NOMULT	0x01	/* If set only do 1 sec/int */

/*
 * This label is used as a default when initializing a new or raw disk.
 * This is used while reading the dos partition table and/or the disk label,
 * and thus must be set for maximum values.
 */
#define	DFL_CYL	1023
#define	DFL_TRK	5
#define	DFL_SEC	17

extern	struct biosgeom *biosgeomp;
#ifdef BSDGEOM
extern	struct bsdgeom bsdgeom;
#endif
struct bootparam *getbootparam(int, struct bootparam *);

int	wdmatch __P((struct device *parent, struct cfdata *cf, void *aux));
void	wdattach __P((struct device *parent, struct device *self, void *aux));
int	wdopen __P((dev_t dev, int flags, int fmt, struct proc *p));
int	wdclose __P((dev_t dev, int flags, int fmt, struct proc *p));
void	wd_setbad __P((wd_softc_t *du, struct dkbad *db));
void	wdstrategy __P((struct buf *bp));
void	wd_reqctl __P((wd_softc_t *du));
void	wd_wait_drq __P((int wdc, int unit, char *s));
void	wdstart __P((wd_softc_t *du));
void	wdnextbp __P((wd_softc_t *du, struct buf *bp));
void	wd_reinit __P((wd_softc_t *du, struct buf *bp));
void	wd_ctl_reset __P((struct device *arg));
int	wdintr __P((struct device *arg));
int	wd_rawintr __P((struct device *arg));
void	wdtimeout __P((struct device *arg));
int	wdcontrol __P((wd_softc_t *du, struct buf *bp));
void	wdsetctlr __P((wd_softc_t *du, int own));
void	wd_getraw __P((wd_softc_t *du));
void	wd_rawgo __P((struct device *arg));
void	wd_relraw __P((wd_softc_t *du));
int	wdioctl __P((dev_t dev, int cmd, caddr_t addr, int flag,
	    struct proc *p));
int	wdreadp __P((wd_softc_t *du, caddr_t tb));
int	wdgetgeom __P((wd_softc_t *du, struct disklabel *lp));
int	wdsize __P((dev_t dev));
int	wddump __P((dev_t dev, daddr_t blknum, caddr_t addr, int num));

struct cfdriver wdcd =
    { NULL, "wd", wdmatch, wdattach, DV_DISK, sizeof(wd_softc_t) };
 
struct devsw wdsw = {
	&wdcd,
	wdopen, wdclose, rawread, rawwrite, wdioctl, seltrue, nommap,
	wdstrategy, wddump, wdsize, 0,
	nostop
};

#define	wait_drq(wdc, unit, msg) { \
	if ((inb(wdc + wd_status) & WDCS_DRQ) == 0) \
		wd_wait_drq(wdc, unit, msg); \
}

/*
 * Check for unit presence.
 */
int
wdmatch(parent, cf, aux)
	struct device *parent;
	struct cfdata *cf;
	void *aux;
{
	int wdc = wdc_getiobase(parent);
	wdc_attach_args_t *ap = (wdc_attach_args_t *)aux;
	int drive = ap->drive;
	int drive_loc = cf->cf_loc[LOC_DRIVE];

	if (ap->found || (drive != drive_loc && drive_loc != -1))
		return (0);

	/*
	 * Select drive and test for READY to see whether drive exists.
	 * Nonexistent IDE drives return trash, so check that BUSY is off.
	 * ATAPI drives maintain !WDCS_READY after a reset.
	 */
	outb(wdc + wd_sdh, WDSD_IBM | (drive << 4));
	DELAY(10000);
	if ((inb(wdc + wd_status) & (WDCS_BUSY|WDCS_READY)) == WDCS_READY) {
		ap->found = 1;
		return (1);
	}
	/* Clear spurious interrupts on missing second drive */
	if (drive == 1)
		outb(wdc + wd_sdh, WDSD_IBM);
	return (0);		/* didn't find ready drive */
}

/*
 * Attach discovered unit
 */
void
wdattach(parent, self, aux)
	struct device *parent;
	struct device *self;
	void *aux;
{	
	wd_softc_t *sc = (wd_softc_t *)self;
	wdc_attach_args_t *ap = (wdc_attach_args_t *)aux;
	struct bootparam *parm;
	int cnt;
	int stat;
	int flags;
	char tb[WD_SECSIZE];
	char *sep = ": ";

	aprint_naive(": disk");
	/* Check for flags overrides from the boot command line */
	flags = getdevconf(self->dv_cfdata, NULL, wdno(sc));

	sc->wd_unit = ap->drive;
	sc->wd_parent = parent;
	sc->wd_iobase = wdc_getiobase(parent);
	sc->wd_aiobase = wdc_getaiobase(parent);
	sc->wd_errcnt = 0;

	/* register for controller reset callbacks */
	sc->wd_reset.func = wd_ctl_reset;
	sc->wd_reset.arg = sc;
	wdc_attach_reset(parent, &sc->wd_reset);

	/* Set up main I/O request block */
	sc->wd_ioreq.next = WDC_UNALLOC;
	sc->wd_ioreq.intr = wdintr;
	sc->wd_ioreq.tout = wdtimeout;
	sc->wd_ioreq.go = (wdc_gofunc)wdstart;
	sc->wd_ioreq.self = (struct device *)sc;
	sc->wd_ioreq.timeout = 0;

	/* Set up raw I/O request block */
	sc->wd_rawreq.next = WDC_UNALLOC;
	sc->wd_rawreq.intr = wd_rawintr;
	sc->wd_rawreq.tout = NULL;
	sc->wd_rawreq.go = wd_rawgo;
	sc->wd_rawreq.self = (struct device *)sc;
	sc->wd_rawreq.timeout = 0;

	disk_attach(&sc->wd_dk);
	if (wd_badsearch)
		sc->wd_flags |= DK_BADSRCH;

	sc->wd_bpint = WD_SECSIZE;
	if ((flags & WD_NOMULT) == 0 && wdreadp(sc, tb) == 0) {
		struct wdparams *wp = (struct wdparams *) tb;

		/* check maxsecperint, set up to use it */
		cnt = wp->wdp_nsecperint & 0xff;
		if (cnt > 1) {
			cnt = min(cnt, wd_maxsecperint);
			sc->wd_bpint = WD_SECSIZE * cnt;
			aprint_normal("%ssec/int=%d", sep, cnt);
			sep = " ";
		}
		aprint_normal("%s%d*%d",
		    sep, (wp->wdp_fixedcyl + wp->wdp_removcyl) *
		    wp->wdp_heads * wp->wdp_sectors, WD_SECSIZE); 
	}
	printf("\n");
}

/*
 * Initialize a drive.
 */
int
wdopen(dev, flags, fmt, p)
	dev_t dev;
	int flags;
	int fmt;
	struct proc *p;
{
	unsigned int unit = wdunit(dev);
	register struct buf *bp;
	register wd_softc_t *du;
	dev_t rawdev;
	struct dkbad *db;
	int i, s, error = 0, use_fdisk;
	struct mbpart dospart;
	daddr_t labelsect;
	char *msg;
	extern int cold;
	struct biosgeom *bgp = 0;

	TR("wdopen",dev,0);
	if (unit >= wdcd.cd_ndevs || (du = wdcd.cd_devs[unit]) == NULL)
		return (ENXIO);
	s = splbio();
	while (du->wd_state != OPEN && du->wd_state != OPENRAW &&
	    du->wd_state != CLOSED)
		if (error = tsleep((caddr_t)du, PRIBIO | PCATCH, devopn, 0))
			break;
	splx(s);
	if (error)
		return (error);
	if (du->wd_openmask)
		goto checkpart;	/* already is open, don't mess with it */
	if (flags & O_NONBLOCK) {
		du->wd_state = WANTOPENRAW;
		use_fdisk = 0;
	} else {
		du->wd_state = WANTOPEN;
		use_fdisk = 1;
	}

	/* begin d_init1 */
	/*
	 * If we don't yet have a disklabel, use the geometry passed from
	 * the bootstrap for the boot disk if we have it (ifdef BSDGEOM),
	 * otherwise use the geometry from the BIOS/CMOS if available,
	 * otherwise use the default sizes until we've read the label,
	 * or longer if there isn't one there.
	 */
	if (du->wd_dd.d_ncylinders == 0) {
		labelsect = LABELSECTOR;
		du->wd_dd.d_flags = D_SOFT;
		du->wd_dd.d_type = DTYPE_ST506;
		du->wd_dd.d_secsize = WD_SECSIZE;
#ifdef BSDGEOM
		if (du->wd_parent->dv_unit == 0 &&
		    bsdgeom.unit == du->wd_unit && bsdgeom.ncylinders) {
			du->wd_dd.d_ncylinders = bsdgeom.ncylinders;
			du->wd_dd.d_ntracks = bsdgeom.ntracks;
			du->wd_dd.d_nsectors = bsdgeom.nsectors;
			labelsect += bsdgeom.bsd_startsec;
			use_fdisk = 0;
		} else
#endif
		if (du->wd_parent->dv_unit == 0 && biosgeomp &&
		    ((bgp = &biosgeomp[du->wd_unit])->flags&BIOSGEOM_PRESENT)) {
			du->wd_dd.d_nsectors = bgp->nsectors;
			du->wd_dd.d_ntracks = bgp->ntracks;
			du->wd_dd.d_ncylinders = bgp->ncylinders;
		} else {
			du->wd_dd.d_flags |= D_DEFAULT;
			du->wd_dd.d_nsectors = DFL_SEC;
			du->wd_dd.d_ntracks = DFL_TRK;
			du->wd_dd.d_ncylinders = DFL_CYL;
		}
		du->wd_dd.d_npartitions = 8;
		du->wd_dd.d_secpercyl =
		    du->wd_dd.d_ntracks * du->wd_dd.d_nsectors;
		du->wd_dd.d_partitions[DK_RAWPART].p_offset = 0;
		du->wd_dd.d_partitions[DK_RAWPART].p_size =
		    du->wd_dd.d_ncylinders * du->wd_dd.d_secpercyl;
		du->wd_dd.d_partitions[DK_RAWPART].p_fstype = FS_UNUSED;
		du->wd_dd.d_partitions[0] = du->wd_dd.d_partitions[DK_RAWPART];
		du->wd_labelsector = labelsect;
	} else {
		labelsect = du->wd_labelsector;
		use_fdisk = 0;
	}

	/* no bad block forwarding unless/until we read bad-sector table */
	du->wd_bad[0] = EOBADLIST;
	du->wd_cylpergrp = INT_MAX;
	du->wd_badindx[0] = -1;
	du->wd_dk.dk_stats.dk_bpms = 32 * (60 * WD_SECSIZE / 2); /* XXX */
	du->wd_dk.dk_stats.dk_secsize = WD_SECSIZE;
	/* end d_init1 */

	/*
	 * Check for a DOS partition table.  If we have one,
	 * use its idea of the BSD partition for the label location.
	 * Note that we are running with the default geometry,
	 * and thus the labelsector value we compute is likely to be wrong,
	 * but the read will hopefully work using the same (wrong) geometry.
	 * We recompute the label location once we have the real geometry.
	 */
	rawdev = dv_makedev(major(dev), unit, DK_RAWPART);
	if (use_fdisk && (error = getbsdpartition(rawdev,
	    wdstrategy, &du->wd_dd, &dospart)) == 0) {
		/*
		 * For extended IDE with BIOS mapping, use logical sector
		 * because the BIOS geometry cannot be used here.
		 */
		if (bgp && (bgp->flags & BIOSGEOM_MAPPED)) {
			labelsect += dospart.start;
			dprintf(("label at %d\n", labelsect));
		} else {
			labelsect  +=
			    mbpssec(&dospart) +
			    mbpstrk(&dospart) * du->wd_dd.d_nsectors +
			    mbpscyl(&dospart) * du->wd_dd.d_secpercyl;
			dprintf(("label at %d/%d/%d => %d\n", mbpscyl(&dospart),
			    mbpstrk(&dospart), mbpssec(&dospart), labelsect));
		}
	} else
		use_fdisk = 0;

	/*
	 * Recal will be done in wdcontrol during first read operation,
	 * either from getbsdpartition call above or readdisklabel here.
	 * If the state is WANTOPENRAW, the read operation will "fail"
	 * after doing a recal, and we don't actually read a label.
	 */
	if (msg = readdisklabel(rawdev, wdstrategy, &du->wd_dd, labelsect)) {
		if (du->wd_state != OPENRAW) {
			/*
			 * don't log "no disk label" warning when opening
			 * the raw c partition.  This is a hack for disksetup.
			 */
			extern char no_disk_label[];
			if (msg != no_disk_label || fmt != S_IFCHR ||
			    wdpart(dev) != DK_RAWPART)
				log(LOG_ERR, "wd%d: %s\n", unit, msg);

			if (cold) {
				du->wd_state = CLOSED;
				return (ENXIO);
			} else
				du->wd_state = OPENRAW;
		}
		goto checkpart;
	}
	/*
	 * turn label sect/trk/cyl into block number
	 */
	if (use_fdisk) {
		du->wd_labelsector = LABELSECTOR + mbpssec(&dospart) +
		    mbpstrk(&dospart) * du->wd_dd.d_nsectors +
		    mbpscyl(&dospart) * du->wd_dd.d_secpercyl;
		dprintf(("label really %d\n", du->wd_labelsector));
	} else
		du->wd_labelsector = labelsect;
	du->wd_dk.dk_label.d_bsd_startsec = labelsect - LABELSECTOR;
	du->wd_dk.dk_stats.dk_bpms = (du->wd_dk.dk_label.d_rpm / 60) *
	    du->wd_dk.dk_label.d_nsectors * du->wd_dk.dk_label.d_secsize;

	/* begin d_init2 */
	wdsetctlr(du, 0);		/* XXX redundant */

	/*
	 * Read bad sector table into memory.
	 */
	du->wd_state = RDBADTBL;
	i = 0;
	bp = geteblk(du->wd_dd.d_secsize);
	do {
		bp->b_flags = B_BUSY | B_READ;
		bp->b_dev = rawdev;
		bp->b_blkno = du->wd_dd.d_secperunit - du->wd_dd.d_nsectors + i;
		if (du->wd_dd.d_secsize > DEV_BSIZE)
			bp->b_blkno *= du->wd_dd.d_secsize / DEV_BSIZE;
		else
			bp->b_blkno /= DEV_BSIZE / du->wd_dd.d_secsize;
		bp->b_bcount = bp->b_iocount = du->wd_dd.d_secsize;
		bp->b_cylin = du->wd_dd.d_ncylinders - 1;
		s = splbio();
		wdstrategy(bp);
		biowait(bp);
		splx(s);
	} while ((bp->b_flags & B_ERROR) && (i += 2) < 10 &&
		i < du->wd_dd.d_nsectors);

	db = (struct dkbad *)(bp->b_un.b_addr);
#define DKBAD_MAGIC 0x4321
	if ((bp->b_flags & B_ERROR) == 0 && db->bt_mbz == 0 &&
	    db->bt_flag == DKBAD_MAGIC)
		wd_setbad(du, db);
	else if (wdpart(dev) != DK_RAWPART || fmt != S_IFCHR)
		printf("wd%d: %s bad-sector file\n", unit,
		    (bp->b_flags & B_ERROR) ? "can't read" : "format error in");
	bp->b_flags = B_INVAL | B_AGE;
	brelse(bp);
	/* end d_init2 */

	du->wd_state = OPEN;
	wakeup((caddr_t) du);

checkpart:
	return (dkopenpart(&du->wd_dk, dev, fmt));
}

/*
 * Driver close
 */
int
wdclose(dev, flags, fmt, p)
	dev_t dev;
	int flags;
	int fmt;
	struct proc *p;
{
	wd_softc_t *du = wdcd.cd_devs[wdunit(dev)];

	TR("wdclose",dev,du);
	return (dkclose(&du->wd_dk, dev, flags, fmt, p));
}

/*
 * Set up bad sector info for disk from bad-sector table.
 * We initialize an index table to find the starting position
 * in the bad sector list for each group of cylinders.
 * The lookup uses pre-increment, so we decrement each index.
 */
void
wd_setbad(du, db)
	wd_softc_t *du;
	struct dkbad *db;
{
	daddr_t *xp = du->wd_bad;
	struct bt_bad *bb;
	char *indxp = du->wd_badindx;
	int cyl, i;

	TR("wd_setbad",du,db);
	du->wd_cylpergrp = (du->wd_dd.d_ncylinders + NCYLGRP - 1) / NCYLGRP;

	/* first group starts at 0 */
	*indxp++ = (0 - 1);
	cyl = du->wd_cylpergrp;

	for (i = 0, bb = db->bt_bad; i < NDKBAD; i++, bb++) {
		if (bb->bt_cyl == 0xffff)
			break;
		*xp++ = bb->bt_cyl * (long)(du->wd_dd.d_secpercyl) +
			(bb->bt_trksec >> 8) * du->wd_dd.d_nsectors +
			(bb->bt_trksec & 0xff);
		dprintf(("BAD: %d; ", xp[-1]));
		while (bb->bt_cyl >= cyl) {
			*indxp++ = i - 1;
			cyl += du->wd_cylpergrp;
			dprintf(("INDX: %d\n", i - 1));
		}
	}
	*xp = EOBADLIST;	/* End of a list */
	i--;
	while (indxp < &du->wd_badindx[NCYLGRP]) {
		*indxp++ = i;
		dprintf(("INDX: %d; ", i - 1));
	}
}

/*
 * Read/write routine for a buffer.  Finds the proper unit, range checks
 * arguments, and schedules the transfer.  Does not wait for the transfer
 * to complete.  Multi-page transfers are supported.  All I/O requests must
 * be a multiple of a sector in length.
 */
void
wdstrategy(bp)
	struct buf *bp;
{
	register struct buf *dp;
	int unit = wdunit(bp->b_dev);
	wd_softc_t *du = wdcd.cd_devs[unit];
	register struct partition *p;
	daddr_t bn;
	int sz;
	int s;

	TR("wdstrategy du/bp",du,bp);
	if (DISKSTATE(du->wd_state) < OPEN)
		goto raw;

	/*
	 * Determine the size of the transfer, and make sure it is
	 * within the boundaries of the partition.
	 */
	p = &du->wd_dd.d_partitions[wdpart(bp->b_dev)];
	bn = bp->b_blkno;
	sz = (bp->b_iocount + DEV_BSIZE - 1) >> DEV_BSHIFT;
	if ((unsigned)bn >= p->p_size || bn + sz > p->p_size)
		if ((sz = dktrim(bp, p, bn, sz)) == 0)
			return;
#ifndef SHORT_BLK
	if ((bp->b_iocount & (du->wd_dd.d_secsize - 1)) != 0) {
		bp->b_error = EINVAL;
		goto bad;
	}
#else
	if ((bp->b_iocount & (du->wd_dd.d_secsize - 1)) != 0 &&
	    (bp->b_flags & B_READ) == 0) {
		bp->b_error = EINVAL;
		goto bad;
	}
#endif
	if (bn + p->p_offset <= du->wd_labelsector &&
	    bn + p->p_offset + sz > du->wd_labelsector &&
	    (bp->b_flags & B_READ) == 0 && du->wd_wlabel == 0) {
		bp->b_error = EROFS;
		goto bad;
	}
	bp->b_cylin = (bn + p->p_offset) / du->wd_dd.d_secpercyl;

raw:
	if (bp->b_chain)
		bp->b_flags |= B_CHAIN;		/* we support chaining */
	dp = &du->wd_tab;
	s = splhigh();
	disksort(dp, bp);

	/* Start unit if idle */
	if (dp->b_active == 0)
		wd_reqctl(du);

	splx(s);
	return;

bad:
	bp->b_flags |= B_ERROR;
	biodone(bp);
	return;
}

/* 
 * Request controller access on an idle unit.
 */
void
wd_reqctl(du)
	wd_softc_t *du;
{
	register struct buf *dp = &du->wd_tab;

	TR("wd_reqctl",du,0);
	if (dp->b_active || dp->b_actf == NULL)
		return;

	dp->b_active = 1;		/* mark the drive as busy */
	wdc_request(du->wd_parent, &du->wd_ioreq);
}

/*
 * Wait for DRQ, called if the inline version (wait_drq) doesn't get 
 * it immediately.
 */
void
wd_wait_drq(wdc, unit, s)
	int wdc;
	int unit;
	char *s;
{
	int i;

	for (i = 100000; (inb(wdc + wd_status) & WDCS_DRQ) == 0; ) {
		if (i-- < 0) {
			printf("wd%d: %s, no drq\n", unit, s);
			break;
		}
		DELAY(10);
	}
}

/*
 * Controller startup routine.  This does the calculation, and starts
 * a single-sector read or write operation.  Called to start a transfer,
 * or from the interrupt routine to continue a multi-sector transfer.
 * Always called with access to the controller.
 */
void
wdstart(du)
	wd_softc_t *du;
{
	struct buf *bp;
	daddr_t blknum;
	int i;
	int drive_resid;
	int this_len;
	int unit = wdno(du);
	int wdc = du->wd_iobase;
	int wda = du->wd_aiobase;

	TR("wdstart",du,0);
	/* Get current BP being worked on */
	if ((bp = du->wd_tab.b_actf) == NULL) {
		panic("wdstart called with no work to do");
#ifdef notdef
		wdc_release(du->wd_parent);
		return;
#endif
	}

	/* Start/continue a sequenced operation in progress */
	if (DISKSTATE(du->wd_state) <= RECAL) {
		du->wd_startbp = bp;
		if (wdcontrol(du, bp)) {
			/* Operation done, should never happen here */
			panic("wdstart: wdcontrol returned done");
#ifdef notdef
			/* This is fine but no-one ever biodone's the bp */
			du->wd_tab.b_actf = bp->av_forw;
			goto loop;
#endif
		}
		return;
	}

	if (bp->b_bcount == 0) {
		printf("wdcount 0?\n");
		biodone(bp);
		return;
	}

	if ((du->wd_flags & DK_OPSTARTED) == 0) {
		du->wd_flags |= DK_OPSTARTED;
		du->wd_bc = du->wd_wbc = bp->b_iocount;
		du->wd_startbp = bp;

		du->wd_bp = bp;
		du->wd_cbc = bp->b_bcount;
		du->wd_addr = bp->b_un.b_addr;

		/*
		 * Convert DEV_BSIZE "blocks" to sectors, and
		 * calculate the physical block number and
		 * number of blocks to r/w in one operation.
		 */
		du->wd_blknum = bp->b_blkno * DEV_BSIZE / du->wd_dd.d_secsize;
		if (DISKSTATE(du->wd_state) == OPEN)
			du->wd_blknum +=
			    du->wd_dd.d_partitions[wdpart(bp->b_dev)].p_offset;
		du->wd_dk.dk_stats.dk_xfers++;
		du->wd_dk.dk_stats.dk_sectors +=
		    bp->b_iocount / du->wd_dd.d_secsize;
	} else
		bp = du->wd_bp;
	du->wd_dk.dk_stats.dk_busy = 1;

	/*
	 * Calculate number of sectors to read/write
	 */
	blknum = du->wd_blknum;
	if (du->wd_flags & DK_SEBYSE)
		du->wd_seccount = 1;
	else
#ifdef SHORT_BLK
		du->wd_seccount = (du->wd_bc + du->wd_dd.d_secsize - 1) /
		    du->wd_dd.d_secsize;
#else
		du->wd_seccount = du->wd_bc / du->wd_dd.d_secsize;
#endif
	dprintf(("\nwdstart %d: %s %d bytes blk %d; cbc %d, seccnt %d\n", unit,
		(bp->b_flags & B_READ) ? "read" : "write",
		du->wd_bc, blknum, du->wd_cbc, du->wd_seccount));

	/* 
	 * After an error, or always if forced, see if any part
	 * of the current transfer is in the bad block list.
	 */
	if (du->wd_flags & (DK_SEBYSE|DK_BADSRCH)) {
	    	register daddr_t *xp;
	    	daddr_t eblk = blknum + du->wd_seccount - 1;

		/* Note: b_cylin is set only on the first buffer of a chain. */
	    	xp = &du->wd_bad[du->wd_badindx[blknum / du->wd_dd.d_secpercyl /
		    du->wd_cylpergrp]];
		dprintf(("bad lookup %d-%d\n", blknum, eblk));
		while (*++xp < blknum && *++xp < blknum &&
		       *++xp < blknum && *++xp < blknum)
			 ;
		if (*xp <= eblk) {	
			/*
			 * If we find one of the blocks, see whether it's
			 * the first block of the transfer (revector now),
			 * or a subsequent block.  In the latter case,
			 * shorten the transfer to end just before the
			 * revectored sector.
			 */
			if (*xp == blknum) {
				dprintf(("wd%d: blk %d replaced with ",
				    unit, blknum));
				blknum = du->wd_dd.d_secperunit -
					 du->wd_dd.d_nsectors - 1 -
					 (xp - du->wd_bad);
				du->wd_seccount = 1;
				dprintf(("%d\n", blknum));
			} else
				du->wd_seccount = *xp - blknum;
		}
	}
	du->wd_bcount = du->wd_seccount * du->wd_dd.d_secsize;

	/*
	 * Start the IO.
	 * The wait for BUSY to clear should not be necessary
	 * (and it hangs on non-existent IDE drives).
	 */
	for (i = 100; (inb(wdc + wd_status) & WDCS_BUSY); ) {
		if (i-- < 0) {
			printf("wd%d: wdstart, still busy\n", unit);
			break;
		}
		DELAY(10);
	}

	/* set timeout */
	du->wd_ioreq.timeout = wd_timeout;

	/*
	 * Pin 2 on the daisy chain cable has a different meaning depending
	 * on the type of drive attached, and here we have to tell the
	 * disk controller which signal to send.  If the drive has more
	 * than 8 heads, then the signal is the most significant bit
	 * of the head number.  But, if the drive has fewer than 8 heads,
	 * it may (or may not) use this wire to signal "Reduced Write 
	 * Current".  This signal is asserted by the disk controller
	 * chip whenever the cylinder number of the request is greater
	 * than or equal to the "precomp cylinder".  All drives with
	 * >= 8 heads, and some other drives keep up with the cylinder
	 * number internally, so they don't need this signal from the
	 * controller.
	 *
	 * Virtually all of the pre-production testing of BSDI was done 
	 * with this signal permanantly set to Head Select, so any drives 
	 * that actually interpret this signal for Write Current control 
	 * were effectivly told never to use Reduced Write Current.  Also, 
	 * in all existing labels, d_precompcyl has the value 0 (it is 
	 * referred to in disktab(5) as "d0").
	 *
	 * So, in the interest of not rocking the boat, we only enable
	 * the Reduced Write Current function if someone has taken the
	 * extra step of setting a reasonable precomp cylinder in the 
	 * disk label.  Otherwise, we keep the old behavior.
	 */
	if (du->wd_dd.d_ntracks < 8 && du->wd_dd.d_precompcyl > 0 &&
	    du->wd_dd.d_precompcyl < 1024) {
		outb(wdc + wd_precomp, du->wd_dd.d_precompcyl / 4);
		outb(wda + wda_ctlr, 0);
	} else {
		outb(wdc + wd_precomp, 0xff);
		outb(wda + wda_ctlr, WDCTL_HEAD3ENB);
	}

#ifdef notdef
	if (bp->b_flags & B_FORMAT) {
		wr(wdc + wd_sector, du->wd_dd.d_gap3);
		wr(wdc + wd_seccnt, du->wd_dd.d_nsectors);
	} else {
#endif
		outb(wdc + wd_seccnt, du->wd_seccount);
		outb(wdc + wd_sector, 1 + (blknum % du->wd_dd.d_nsectors));
#ifdef notdef
	}
#endif
	i = blknum / du->wd_dd.d_secpercyl;
	outb(wdc + wd_cyl_lo, i);
	outb(wdc + wd_cyl_hi, i >> 8);
	TR("wdstart: seccnt/cyl", du->wd_seccount, i);

	/* Set up the SDH register (select drive). */
	i = (blknum % du->wd_dd.d_secpercyl) / du->wd_dd.d_nsectors;
	outb(wdc + wd_sdh, WDSD_IBM | (du->wd_unit << 4) | i);
	for (i = 100; (inb(wdc + wd_status) & WDCS_READY) == 0; ) {
		if (i-- < 0) {
			printf("wd%d: wdstart, not ready\n", unit);
			break;
		}
		DELAY(10);
	}
#ifdef notdef
	if (bp->b_flags & B_FORMAT) {
		outb(wdc + wd_command, WDCC_FORMAT);
		return;
	}
#endif
	/* If this is a read operation, just go away until it's done. */
	if (bp->b_flags & B_READ) {
		if (du->wd_bpint == WD_SECSIZE)
			outb(wdc + wd_command, WDCC_READ);
		else
			outb(wdc + wd_command, WDCC_RMULT);
		TR("wdstart: start read bpint", du->wd_bpint, 0);
		return;
	}

	/*
	 * Write command
	 */
	if (du->wd_bpint == WD_SECSIZE)
		outb(wdc + wd_command, WDCC_WRITE);
	else
		outb(wdc + wd_command, WDCC_WMULT);
	TR("wdstart: start write, bpint", du->wd_bpint, 0);

	/* Send data to drive */
	drive_resid = min(du->wd_bcount, du->wd_bpint);
#ifdef DEBUG
	/* 
	 * This can only happen if a write request comes in that is not
	 * an exact number of disk sectors.
	 */
	if (drive_resid > du->wd_bc)
		panic("wdstart: small buffer");
#endif
	TR("wdstart: wait drq stat/resid", inb(wdc + wd_status), drive_resid);
	wait_drq(wdc, unit, "wdstart");
	do {
		if (du->wd_cbc == 0)
			wdnextbp(du, bp);
		this_len = min(drive_resid, du->wd_cbc);
		outsw(wdc + wd_data, du->wd_addr, this_len >> 1);
		TR("wdstart: outsw addr/len", du->wd_addr, this_len >> 1);
		du->wd_cbc -= this_len;
		du->wd_addr += this_len;
		drive_resid -= this_len;
	} while (drive_resid != 0);
}

/*
 * After finishing one buffer in a chain,
 * reset state for the next buffer on the chain.
 */
void
wdnextbp(du, bp)
	wd_softc_t *du;
	struct buf *bp;
{
	TR("wdnextbp",du,bp);
#ifdef notyet
	/*
	 * For a read, we can remove the finished buffer from the chain
	 * and call biodone, allowing a reader to proceed while additional
	 * blocks are read.  For writes, we must retain all buffers
	 * in case of a retry.
	 */
	if (bp->b_flags & B_READ) {
		struct buf *nbp = bp->b_chain;

		bp->b_chain = 0;
		nbp->b_iocount = bp->b_iocount - bp->b_bcount;
		biodone(bp);
		bp = nbp;
		du->wd_tab.b_actf = bp;
		du->wd_startbp = bp;
	} else
#endif
		bp = bp->b_chain;
	du->wd_bp = bp;
	du->wd_addr = bp->b_un.b_addr;
	du->wd_cbc = bp->b_bcount;
	dprintf(("wdnextbp: %d; ", du->wd_cbc));
}

/*
 * Fix up state before retrying a write.
 * We have to recompute the starting address and contiguous byte count 
 * before starting the previous write op to the drive (this is why 
 * wd_wbc is maintained, we cannot use wd_bc because it is not safe
 * to assume that the drive has gotten the data to disk just because
 * the outsw was done).
 */
void
wd_fixbp(du)
	wd_softc_t *du;
{
	struct buf *bp = du->wd_startbp;
	int done = bp->b_iocount - du->wd_wbc;

	dprintf(("wd_fixbp: %d done, ", done));
	while (bp->b_bcount < done) {
		done -= bp->b_bcount;
		bp = bp->b_chain;
	}
	du->wd_bp = bp;
	du->wd_bc = du->wd_wbc;
	du->wd_addr = bp->b_un.b_addr + done;
	du->wd_cbc = bp->b_bcount - done;
	du->wd_blknum = (du->wd_startbp->b_blkno * DEV_BSIZE / 
	    du->wd_dd.d_secsize) + (done / du->wd_dd.d_secsize);
	if (DISKSTATE(du->wd_state) == OPEN)
		du->wd_blknum +=
		    du->wd_dd.d_partitions[wdpart(bp->b_dev)].p_offset;
	dprintf((" cbc %d\n", du->wd_cbc));
}

/*
 * Perform a hard reset during error recovery.
 * Sometimes a controller/drive combination wedges so hard
 * that nothing else will unwedge it (and maybe not even this).
 * All drives on this controller will then need to be reinitialized,
 * doing a recal and setting geometry.
 */
void
wd_reinit(du, bp)
	wd_softc_t *du;
	struct buf *bp;
{
	TR("wd_reinit",du,bp);
	/* Hit the controller, our reset callback arranges for a recal */
	wdc_reinit(du->wd_parent);

	/* Start the recal on this drive, others must queue for it */
	wdcontrol(du, bp);
}

/*
 * Controller reset callback, arrange for a recal
 */
void
wd_ctl_reset(arg)
	struct device *arg;
{
	wd_softc_t *du = (wd_softc_t *)arg;

	TR("wd_ctl_reset",du,0);
	/*
	 * Change disk state to cause recal depending on previous
	 * state.  If already re-initializing (!) or closed,
	 * nothing to do.  If open, REINIT | WANTOPEN | (wasraw)
	 * will do recal then go to previous open state.
	 * Other starts are partly initialized; start them over.
	 *
	 * The recal happens at the next I/O, we don't try to force it here.
	 */ 
	if (du->wd_state & REINIT)
		return;
	switch (DISKSTATE(du->wd_state)) {
	case CLOSED:
		break;
	case OPEN:
		du->wd_state = REINIT|WANTOPEN|(du->wd_state&RAWDISK);
		break;
	default:
		du->wd_state = WANTOPEN|(du->wd_state&RAWDISK);
		break;
	}
}

/*
 * Interrupt routine for the controller.  Acknowledge the interrupt, check for
 * errors on the current operation, mark it done if necessary, and start
 * the next request.  Also check for a partially done transfer, and
 * continue with the next chunk if so.
 */
int
wdintr(arg)
	struct device *arg;
{
	wd_softc_t *du = (wd_softc_t *)arg;
	register struct buf *bp;
	int unit = wdno(du);
	int wdc = du->wd_iobase;
	struct buf *dp = &du->wd_tab;
	int status;
	int done;
	int i;
	int drive_resid;
	int this_len;

	/* Shouldn't need this, but it may be a slow controller. */
	i = INTR_BUSYWAIT;
	while ((status = inb(wdc + wd_status)) & WDCS_BUSY) {
		if (i-- <= 0) {
			printf("wd%d: controller wedged (status %b)\n", unit,
			    status, WDCS_BITS);
			du->wd_flags |= DK_TIMEOUT;
			break;
		}
		DELAY(1000);
	}
	du->wd_dk.dk_stats.dk_busy = 0;
	TR("wdintr",du,status);

	dprintf(("I "));
	if (DISKSTATE(du->wd_state) <= RECAL) {
		if (wdcontrol(du, du->wd_tab.b_actf))
			goto done;
		return (1);
	}
	bp = du->wd_bp;

	/*
	 * Check for errors or timeouts.
	 */
	if (du->wd_flags & DK_TIMEOUT &&
	    (status & (WDCS_BUSY | WDCS_ERR | WDCS_ECCCOR | WDCS_DRQ)) ==
	    WDCS_DRQ) {
		log(LOG_ERR, "wd%d: timeout ignored, processing interrupt\n",
		    unit);
		if (du->wd_bpint != WD_SECSIZE) {
			log(LOG_ERR, "wd%d: switching to one sector/interrupt "
			    "mode\n", unit);
			du->wd_bpint = WD_SECSIZE;
			du->wd_errcnt = 1;	/* Force a reset/retry */
		} else
			du->wd_flags &= ~DK_TIMEOUT;
	}

	if (status & (WDCS_ERR | WDCS_ECCCOR) ||
	    du->wd_flags & DK_TIMEOUT) {
		du->wd_statusreg = status;
		du->wd_errorreg = inb(wdc + wd_error);	/* save error status */
		dprintf(("status %x error %x\n", status, du->wd_errorreg));
#ifdef notdef
		if (bp->b_flags & B_FORMAT) {
			bp->b_flags |= B_ERROR;
			goto done;
		}
#endif

		if (status & WDCS_ERR || du->wd_flags & DK_TIMEOUT) {
			int timedout = du->wd_flags & DK_TIMEOUT;

			du->wd_flags &= ~DK_TIMEOUT;
			if (++du->wd_errcnt <= RETRIES) {
				/*
				 * Retry i/o sector by sector
				 */
				du->wd_ioreq.timeout = 0;
				du->wd_flags |= DK_SEBYSE;
				if ((bp->b_flags & B_READ) == 0)
					wd_fixbp(du);
				if ((du->wd_errorreg & WDERR_ABORT ||
				    timedout) && du->wd_errcnt == 2) {
					/*
					 * Some controller/disk combinations
					 * have a pathological state
					 * characterized by an ABORT error;
					 * only a hard reset seems to make
					 * things better.
					 */
					printf("wd%d: resetting controller\n",
					    unit);
					wd_reinit(du, bp);
					return (1);
				}
				wdstart(du);
				return (1);
			}
			diskerr(du->wd_startbp, "wd", "hard error", LOG_PRINTF,
			    (du->wd_startbp->b_iocount - du->wd_bc) /
			    du->wd_dd.d_secsize, &du->wd_dd);
			printf(" status %b error %b\n",
			    du->wd_statusreg, WDCS_BITS,
			    du->wd_errorreg, WDERR_BITS);
			du->wd_startbp->b_flags |= B_ERROR; /* flag error */
			goto done;
		} else {
			log(LOG_WARNING, "wd%d%c: soft ecc bn %d\n",
			    unit, wdpart(bp->b_dev) + 'a', du->wd_blknum);
#if 0
			du->wd_seccount = 1;	/* start new transfer */
			du->wd_bcount = du->wd_dd.d_secsize;
			if ((bp->b_flags & B_READ) == 0)
				wd_fixbp(du);
#endif
		}
	}

	/*
	 * If this was a successful read operation, fetch the data.
	 */
	done = min(du->wd_bpint, du->wd_bcount);
	if (bp->b_flags & B_READ) {
		drive_resid = done;

		/* Ready to receive data? */
		wait_drq(wdc, unit, "wdintr read");

		do {
			if (du->wd_cbc == 0) {
				if (bp->b_chain)
					wdnextbp(du, bp);
				else {
					/* Soak up excess data */
					while (drive_resid > 0) {
						inw(wdc + wd_data);
						drive_resid -= 2;
					}
					break;
				}
			}
			this_len = min(drive_resid, du->wd_cbc);
			insw(wdc + wd_data, (int) du->wd_addr, this_len >> 1);
			TR("wdintr: insw addr/len", du->wd_addr,
			    this_len >> 1);
			du->wd_cbc -= this_len;
			du->wd_addr += this_len;
			drive_resid -= this_len;
		} while (drive_resid != 0);
	}

	/* If we got an error earlier, report it */
	if (du->wd_errcnt) {
		diskerr(du->wd_startbp, "wd", "soft error", LOG_WARNING,
		    (du->wd_startbp->b_iocount - du->wd_bc) /
		    du->wd_dd.d_secsize, &du->wd_dd);
		addlog(" status %b error %b retries %d\n",
		    du->wd_statusreg, WDCS_BITS,
		    du->wd_errorreg, WDERR_BITS, du->wd_errcnt);
		du->wd_errcnt = 0;
	}

	du->wd_bc -= done;
	du->wd_blknum += done / du->wd_dd.d_secsize;

	/*
	 * If this transfer isn't finished,
	 * proceed with the next contiguous i/o sector.
	 */
	du->wd_seccount -= done / du->wd_dd.d_secsize;
	if (du->wd_seccount > 0) {
		du->wd_bcount -= done;
		/* Write the next piece of data on writing */
		if (!(bp->b_flags & B_READ)) {

			/* Ready to accept data? */
			wait_drq(wdc, unit, "wdintr write");

			drive_resid = min(du->wd_bpint, du->wd_bcount);
			TR("wdintr: write cont stat/resid", 
			    inb(wdc + wd_status), drive_resid);
#ifdef DEBUG
			if (drive_resid > du->wd_bc)
				panic("wdintr: small bufr");
#endif
			do {
				if (du->wd_cbc == 0)
					wdnextbp(du, bp);
				this_len = min(drive_resid, du->wd_cbc);
				outsw(wdc + wd_data, du->wd_addr,
				    this_len >> 1);
				TR("wdintr: outsw addr/len", du->wd_addr,
				    this_len >> 1);
				du->wd_cbc -= this_len;
				du->wd_addr += this_len;
				drive_resid -= this_len;
			} while (drive_resid != 0);
		}
		return (1);
	}

	/*
	 * Do we need to start another i/o to finish this request?
	 */
	if (du->wd_bc > 0) {
		/* Data can be assumed to be written now */
		du->wd_wbc = du->wd_bc;

		/* Start another operation in service of this bp */
		wdstart(du);
		return (1);
	}
#ifdef SHORT_BLK
	else
		/* Fix resid for short blocks (wd_bc may be negative) */
		du->wd_bc = 0;
#endif

done:
	/*
	 * Done with this transfer, with or without error
	 */
	du->wd_errcnt = 0;
	du->wd_flags &= ~(DK_OPSTARTED|DK_SEBYSE);
	bp = du->wd_startbp;
	TR("wdintr: xfr done", bp, 0);
	dp->b_active = 0;
	dp->b_actf = bp->av_forw;
	dp->b_errcnt = 0;
	bp->b_resid = du->wd_bc;	/* 0 except on error... */
	biodone(bp);

	/*
	 * Release controller (even if we are just going to get 
	 * it back again, gives a chance to the other device)
	 */
	wdc_release(du->wd_parent);

	/*
	 * Start the next request.
	 */
	du->wd_ioreq.timeout = 0;
	if (dp->b_actf)
		wd_reqctl(du);		/* requeue disk if more io to do */
	
	return (1);
}

/*
 * Raw request block interrupt handler, currently we don't expect any
 * interrupts while running on this request block (wdreadp()).
 */
int
wd_rawintr(arg)
	struct device *arg;
{
	wd_softc_t *du = (wd_softc_t *)arg;
	int wdc = du->wd_iobase;

	int status = inb(wdc + wd_status);
	printf("wd%d: unexpected intr on rawreq, stat=%b\n", wdno(du), status,
		WDCS_BITS);
	return (1);
}

/*
 * Driver timeout, called by controller timeout routine at splbio(),
 * simulate an interrupt (the missed one hopefully).
 */
void
wdtimeout(arg)
	struct device *arg;
{
	wd_softc_t *du = (wd_softc_t *)arg;

	TR("wdtimeout",du,0);
	printf("wd%d: lost interrupt\n", wdno(du));
	du->wd_ioreq.timeout++;	/* Re-arm for next tick */
	du->wd_flags |= DK_TIMEOUT;
	wdintr((struct device *)du);
}

/*
 * Implement initialization operations.
 * Called from wdstart or wdintr during opens.
 * Uses finite-state-machine to track progress of operation in progress.
 * Returns 0 if operation still in progress, 1 if completed.
 */
int
wdcontrol(du, bp)
	wd_softc_t *du;
	struct buf *bp;
{
	u_char stat;
	u_char err;
	int s;
	int unit = wdno(du);
	int punit = du->wd_unit;
	int wdc = du->wd_iobase;

	TR("wdcontrol",du,bp);

	switch (DISKSTATE(du->wd_state)) {

	case WANTOPEN:			/* set SDH, step rate, do restore */
		/* Advance to RECAL state and maintain REINIT/RAWDISK */
		du->wd_state++;
tryagainrecal:
		dprintf(("wd%d: recal ", unit));
		s = splbio();		/* not called from intr level ... */
		outb(wdc + wd_sdh, WDSD_IBM | (punit << 4));
		du->wd_ioreq.timeout = wd_timeout;
		outb(wdc + wd_command, WDCC_RESTORE | WD_STEP);
		splx(s);
		return (0);

	case RECAL:
		if ((((stat = inb(wdc + wd_status)) & WDCS_ERR) &&
		    (err = inb(wdc + wd_error)) != WDERR_MCHG) ||
		    du->wd_flags & DK_TIMEOUT) {
			du->wd_flags &= ~DK_TIMEOUT;
			printf("wd%d: recal %s", unit,
			    du->wd_flags & DK_TIMEOUT ?
			    "timed out" : "failed");
			if (du->wd_state & REINIT)
				printf(" after controller reset");
			printf(": status %b error %b\n",
			    stat, WDCS_BITS, err, WDERR_BITS);
			if (++du->wd_errcnt < RETRIES)
				goto tryagainrecal;

			bp->b_flags |= B_ERROR;		/* didn't read label */
			du->wd_state = OPENRAW;
			return (1);
		}

		/* Set controller parms */
		wdsetctlr(du, 1);

		/*
		 * If reinitializing after a controller reset,
		 * we are now ready to continue the current operation.
		 */
		if (du->wd_state & REINIT) {
			du->wd_state = (du->wd_state&RAWDISK) ? OPENRAW : OPEN;
			wdstart(du);
			return (0);
		}

		if (ISRAWSTATE(du->wd_state)) {
			bp->b_flags |= B_ERROR;		/* didn't read label */
			du->wd_state = OPENRAW;
			return (1);
		}
		dprintf(("rdlabel "));
		du->wd_state = RDLABEL;
		wdstart(du);
		return (0);

	default:
		panic("wdcontrol");
	}
	/* NOTREACHED */
}

/*
 * Set controller parameters from disk label.
 * If own is 1 wd_ioreq owns the controller and this should just do the
 * operation, otherwise do it synchronously (using sleeps) using the 
 * wd_rawreq block.
 *
 * This also sets the multiple sectors/interrupt feature up.
 */
void
wdsetctlr(du, own)
	wd_softc_t *du;
	int own;
{
	struct wdc_softc *wdcp = wdcont(du);
	int wdc = du->wd_iobase;
	int s;
	int cnt;
	int stat;
	int timeout = 1000000;

	TR("wdsetctlr",du,own);

	if (!own)
		wd_getraw(du);

	outb(wdc + wd_cyl_lo, du->wd_dd.d_ncylinders);
	outb(wdc + wd_cyl_hi, du->wd_dd.d_ncylinders >> 8);
	outb(wdc + wd_sdh,
	    WDSD_IBM | (du->wd_unit << 4) | du->wd_dd.d_ntracks - 1);
	outb(wdc + wd_seccnt, du->wd_dd.d_nsectors);

	/* Run the command, wait for it, and eat the interrupt */
	s = splbio();
	outb(wdc + wd_command, WDCC_SETGEOM);
	while (inb(wdc + wd_status) & WDCS_BUSY && timeout > 0) {
		timeout--;
		DELAY(1);
	}
	if (timeout <= 0) {
		printf("wd: Timeout setting drive geometry\n");
		goto out;
	}

	/* Set number of sectors/interrupt */
	if (du->wd_bpint > WD_SECSIZE) {
		cnt = du->wd_bpint / WD_SECSIZE;
		outb(wdc + wd_seccnt, cnt);
		outb(wdc + wd_command, WDCC_SETMULT);
		while (((stat = inb(wdc + wd_status)) & WDCS_BUSY) &&
		    timeout > 0) {
			timeout--;
			DELAY(1);
		}
		if (timeout <= 0) {
			printf("wd: Timeout setting nsecperint\n");
			goto onesec;
		}
		if (stat & WDCS_ERR) {
			printf("wd: Error setting nsecperint: stat=%x "
			    "err=%x\n", stat, inb(wdc + wd_error));
			goto onesec;
		}
	} else {
onesec:
		du->wd_bpint = WD_SECSIZE;
	}

out:
	DELAY(1000);		/* Sometimes BUSY clears before interrupt */
	inb(wdc + wd_status);
	splx(s);
	if (!own)
		wd_relraw(du);
}

/*
 * Allocate controller for raw operations (warning: do not recurse)
 */
void
wd_getraw(du)
	wd_softc_t *du;
{
	int s = splbio();

	TR("wd_getraw",du,0);

	/* If another process has it, wait */
	while (du->wd_flags & DK_LOCKED)
		sleep((caddr_t)du, PRIBIO);

	/* Get access from the async I/O path */
	du->wd_flags |= DK_WAITRAW|DK_LOCKED;
	wdc_request(du->wd_parent, &du->wd_rawreq);
	while (du->wd_flags & DK_WAITRAW)
		sleep((caddr_t)&du->wd_rawreq, PRIBIO);
	splx(s);
}

/*
 * Go callback when raw access to controller granted
 */
void
wd_rawgo(arg)
	struct device *arg;
{
	wd_softc_t *du = (wd_softc_t *)arg;
	TR("wd_rawgo",du,0);
#ifdef DIAGNOSTIC
	if (!(du->wd_flags & DK_WAITRAW))
		panic("wd_rawgo: spurious");
#endif
	du->wd_flags &= ~DK_WAITRAW;
	wakeup((caddr_t)&du->wd_rawreq);
}

/*
 * Release raw controller access.
 */
void
wd_relraw(du)
	wd_softc_t *du;
{
	int s = splbio();

	TR("wd_relraw",du,0);

	wdc_release(du->wd_parent);
	du->wd_flags &= ~DK_LOCKED;
	wakeup((caddr_t)du);
	splx(s);
}

/*
 * Main ioctl entry point
 */
int
wdioctl(dev, cmd, addr, flag, p)
	dev_t dev;
	int cmd;
	caddr_t addr;
	int flag;
	struct proc *p;
{
	int unit = wdunit(dev);
	wd_softc_t *du = wdcd.cd_devs[unit];
	int error = 0;

	TR("wdioctl",dev,cmd);

	switch (cmd) {

	case DIOCGDINFO:
		*(struct disklabel *)addr = du->wd_dd;
		break;

	case DIOCGHWINFO:
		error = wdgetgeom(du, (struct disklabel *)addr);
		break;

	case DIOCGPART:
		((struct partinfo *)addr)->disklab = &du->wd_dd;
		((struct partinfo *)addr)->part =
		    &du->wd_dd.d_partitions[wdpart(dev)];
		break;

	case DIOCSDINFO:
		if ((flag & FWRITE) == 0)
			error = EBADF;
		else if (((struct disklabel *) addr)->d_ntracks > 16)
			/* there is only a 4-bit field for head number... */
			error = EDOM;			/* XXX */
		else
			error = setdisklabel(&du->wd_dd,
			    (struct disklabel *) addr,
			    (du->wd_state == OPENRAW) ?  0 : du->wd_openmask);
		if (error == 0) {
			if (du->wd_state == OPENRAW)
				du->wd_state = OPEN;
			du->wd_labelsector = du->wd_dd.d_bsd_startsec +
			    LABELSECTOR;
		}
		wdsetctlr(du, 0);
		break;

	case DIOCWLABEL:
		if ((flag & FWRITE) == 0)
			error = EBADF;
		else
			du->wd_wlabel = *(int *)addr;
		break;

	case DIOCWDINFO:
		if ((flag & FWRITE) == 0)
			error = EBADF;
		else if (((struct disklabel *) addr)->d_ntracks > 16)
			/* there is only a 4-bit field for head number... */
			error = EDOM;			/* XXX */
		else if ((error = setdisklabel(&du->wd_dd,
		    (struct disklabel *)addr,
		    (du->wd_state == OPENRAW) ? 0 : (du->wd_bopenmask |
		    (du->wd_copenmask &~ (1 << DK_RAWPART))))) == 0) {
			int wlab;

			if (error == 0) {
				if (du->wd_state == OPENRAW)
					du->wd_state = OPEN;
				du->wd_labelsector = du->wd_dd.d_bsd_startsec +
				    LABELSECTOR;
			}
			wdsetctlr(du, 1);

			/* simulate opening partition 0 so write succeeds */
			du->wd_openmask |= (1 << 0);	    /* XXX */
			wlab = du->wd_wlabel;
			du->wd_wlabel = 1;
			error = writedisklabel(dev, wdstrategy, &du->wd_dd,
			    du->wd_labelsector);
			du->wd_openmask = du->wd_copenmask | du->wd_bopenmask;
			du->wd_wlabel = wlab;
		}
		break;

	case DIOCSBAD:
		if ((flag & FWRITE) == 0)
			error = EBADF;
		else
			wd_setbad(du, (struct dkbad *) addr);
		break;

#ifdef notyet
	case DIOCGDINFOP:
		*(struct disklabel **)addr = &(du->wd_dd);
		break;

	case DIOCWFORMAT:
		if ((flag & FWRITE) == 0)
			error = EBADF;
		else {
			register struct format_op *fop;
			struct uio auio;
			struct iovec aiov;

			fop = (struct format_op *)addr;
			aiov.iov_base = fop->df_buf;
			aiov.iov_len = fop->df_count;
			auio.uio_iov = &aiov;
			auio.uio_iovcnt = 1;
			auio.uio_resid = fop->df_count;
			auio.uio_segflg = 0;
			auio.uio_offset =
				fop->df_startblk * du->wd_dd.d_secsize;
			error = physio(wd_format, &rwdbuf[unit], dev, B_WRITE,
				minphys, &auio);
			fop->df_count -= auio.uio_resid;
			fop->df_reg[0] = du->wd_statusreg;
			fop->df_reg[1] = du->wd_errorreg;
		}
		break;
#endif

	default:
		error = ENOTTY;
		break;
	}
	return (error);
}

#ifdef notdef
wd_format(bp)
	struct buf *bp;
{

	bp->b_flags |= B_FORMAT;
	wdstrategy(bp);
}
#endif

/*
 * The following section is derived from the 386BSD wd driver.
 */

/*
 * issue READP to drive to ask it what it is.
 * As this command may not be implemented by all controllers,
 * we use it only on demand in setting up a disk.
 */
int
wdreadp(du, tb)
	wd_softc_t *du;
	caddr_t tb;
{
	int stat, x, i;
	int error = 0;
	int wdc = du->wd_iobase;
	int timeout = 1000000;

	TR("wdreadp",du,tb);
	wd_getraw(du);
	x = splbio();		/* not called from intr level ... */
	outb(wdc + wd_sdh, WDSD_IBM | (du->wd_unit << 4));

	/* controller ready for command? */
	while (((stat = inb(wdc + wd_status)) & WDCS_BUSY) && timeout > 0)
		timeout--;
	if (timeout <= 0) {
		error = ETIMEDOUT;
		goto out;
	}

	/* send command, await results */
	outb(wdc + wd_command, WDCC_READP);
	while (((stat = inb(wdc + wd_status)) & WDCS_BUSY) && timeout > 0)
		timeout--;
	if (timeout <= 0) {
		error = ETIMEDOUT;
		goto out;
	}

	/* is controller ready to return data? */
	while (((stat = inb(wdc + wd_status)) & (WDCS_ERR|WDCS_DRQ)) == 0 &&
	    timeout > 0)
		timeout--;
	if (timeout <= 0) {
		error = ETIMEDOUT;
		goto out;
	}

	if (stat & WDCS_ERR) {
		error = EIO;
		goto out;
	}

	/* obtain parameters */
	insw(wdc + wd_data, tb, WD_SECSIZE / sizeof(short));

	/* XXX sometimes possibly needed */
	(void) inb(wdc + wd_status);
out:
	wd_relraw(du);
	splx(x);
	return (error);
}

int
wdgetgeom(du, lp)
	wd_softc_t *du;
	struct disklabel *lp;
{
	int error;
	char tb[WD_SECSIZE];
	struct wdparams *wp = (struct wdparams *) tb;

	TR("wdgetgeom",du,lp);
	if (error = wdreadp(du, tb))
		return (error);

	lp->d_ncylinders = wp->wdp_fixedcyl + wp->wdp_removcyl /*+- 1*/;
	lp->d_ntracks = wp->wdp_heads;
	lp->d_nsectors = wp->wdp_sectors;
	return (0);
}

int
wdsize(dev)
	dev_t dev;
{
	int unit = wdunit(dev);
	int part = wdpart(dev);
	wd_softc_t *du;
	int bopen, copen;

	TR("wdsize",dev,0);
	if (unit >= wdcd.cd_ndevs || (du = wdcd.cd_devs[unit]) == NULL)
		return (-1);
	copen = du->wd_copenmask;
	bopen = du->wd_bopenmask;
	if (du->wd_state == 0 &&
	    wdopen(dev, 0, S_IFBLK, (struct proc *) NULL) != 0)
		return (-1);
	/* undo effect of open, in particular marking the unit open */
	du->wd_copenmask = copen;
	du->wd_bopenmask = bopen;
	du->wd_openmask = bopen | copen;
	if (part >= du->wd_dd.d_npartitions)
		return (-1);
	return ((int)((u_long)du->wd_dd.d_partitions[part].p_size *
		du->wd_dd.d_secsize / DEV_BSIZE));
}

extern	caddr_t dumpbufp;

/* dump core after a system crash */
int
wddump(dev, blknum, addr, num)
	dev_t dev;
	daddr_t blknum;
	caddr_t addr;
	int num;
{
	register wd_softc_t *du;	/* disk unit to do the IO */
	int wdc;
	int wda;
	int unit = wdunit(dev);
	int part = wdpart(dev);
	daddr_t sblk;
	daddr_t eblk;
	daddr_t blkcnt;
	daddr_t *xp;
	long cylin, head, sector;
	long secpertrk, secpercyl, nblocks, i;
	static int in_dump = 0;
	
	/* size of memory to dump */
	/* check for acceptable drive number */
	if (unit >= wdcd.cd_ndevs || (du = wdcd.cd_devs[unit]) == NULL)
		return (ENXIO);

	wdc = du->wd_iobase;
	wda = du->wd_aiobase;

	/* was it ever initialized ? */
	if (du->wd_state < OPEN)
		return (ENXIO);

	/* Convert to disk sectors */
	num = (u_long) num / du->wd_dd.d_secsize;

	secpertrk = du->wd_dd.d_nsectors;
	secpercyl = du->wd_dd.d_secpercyl;
	nblocks = du->wd_dd.d_partitions[part].p_size;

	/* check transfer bounds against partition size */
	/* XXX should have been done in dumpsys */
	if (blknum < 0 || blknum >= nblocks)
		return (EINVAL);
	if (blknum + num > nblocks)
		num = nblocks - blknum;
	/* XXX end should have been done in dumpsys */
	blknum += du->wd_dd.d_partitions[part].p_offset;

	if (!in_dump) {
		i = 100000;
		while (inb(wdc + wd_status) & WDCS_BUSY && i-- > 0)
			;
		outb(wdc + wd_sdh, WDSD_IBM | (du->wd_unit << 4));
		outb(wdc + wd_command, WDCC_RESTORE | WD_STEP);
		while (inb(wdc + wd_status) & WDCS_BUSY)
			;

		wdsetctlr(du, 1);

		in_dump = 1;
	}
	if (du->wd_dd.d_ntracks < 8 && du->wd_dd.d_precompcyl > 0 &&
	    du->wd_dd.d_precompcyl < 1024) {
		outb(wdc + wd_precomp, du->wd_dd.d_precompcyl / 4);
		outb(wda + wda_ctlr, 0);   /* enable Reduced Write Current */
	} else {
		outb(wdc + wd_precomp, 0xff);
		outb(wda + wda_ctlr, WDCTL_HEAD3ENB); /* enable head bit 3 */
	}
	
	while (num > 0) {
		blkcnt = min(num, CLBYTES / WD_SECSIZE);
		pmap_enter(kernel_pmap, (vm_offset_t)dumpbufp,
		    (vm_offset_t)addr, VM_PROT_READ, TRUE);

	again:
		sblk = blknum;
	    	eblk = blknum + blkcnt - 1;

	    	xp = &du->wd_bad[du->wd_badindx[(blknum / secpercyl) /
		    du->wd_cylpergrp]];
		dprintf(("bad lookup %d-%d\n", blknum, eblk));
		while (*++xp < blknum)
			 ;
		if (*xp <= eblk) {	
			/*
			 * If we find one of the blocks, see whether it's
			 * the first block of the transfer (revector now),
			 * or a subsequent block.  In the latter case,
			 * shorten the transfer to end just before the
			 * revectored sector.
			 */
			if (*xp == blknum) {
				dprintf(("blk %d replaced with ", blknum));
				sblk = du->wd_dd.d_secperunit -
					 du->wd_dd.d_nsectors - 1 -
					 (xp - du->wd_bad);
				blkcnt = 1;
				dprintf(("%d\n", sblk));
			} else
				blkcnt = *xp - blknum;
		}

		/* compute disk address */
		cylin = sblk / secpercyl;
		head = (sblk % secpercyl) / secpertrk;
		sector = (sblk % secpertrk) + 1;	/* origin 1 */

		/* select drive and head. */
		outb(wdc + wd_sdh, WDSD_IBM | (du->wd_unit<<4) | (head & 0xf));
		while ((inb(wdc + wd_status) & WDCS_READY) == 0)
			;

		/* transfer some blocks */
		outb(wdc + wd_sector, sector);
		outb(wdc + wd_seccnt, blkcnt);
		outb(wdc + wd_cyl_lo, cylin);
		outb(wdc + wd_cyl_hi, cylin >> 8);
#ifdef notdef
		/* lets just talk about this first...*/
		pg("sdh 0%o sector %d cyl %d addr 0x%x",
			inb(wdc + wd_sdh), inb(wdc + wd_sector),
			inb(wdc + wd_cyl_hi)*256 + inb(wdc + wd_cyl_lo), addr);
#endif
		outb(wdc + wd_command, WDCC_WRITE);
		
		while (blkcnt-- > 0) {
			/* Ready to send data?	*/
			while ((inb(wdc + wd_status) & WDCS_DRQ) == 0)
				;
			if (inb(wdc + wd_status) & WDCS_ERR)
				return (EIO);

			outsw(wdc + wd_data, dumpbufp + ((int)addr & PGOFSET),
			    WD_SECSIZE/sizeof(short));
			addr += WD_SECSIZE;
			num--;
			blknum++;

			if (inb(wdc + wd_status) & WDCS_ERR)
				return (EIO);
		}
		/* Check data request (should be done).	 */
		if (inb(wdc + wd_status) & WDCS_DRQ)
			return (EIO);

		/* wait for completion */
		for (i = 1000000; inb(wdc + wd_status) & WDCS_BUSY; i--)
			if (i < 0)
				return (EIO);
		/* error check the xfer */
		if (inb(wdc + wd_status) & WDCS_ERR)
			return (EIO);

		if (num > 0 && (int)addr & PGOFSET) {
			blkcnt = min(num,
			    (CLBYTES - ((int)addr & PGOFSET)) / WD_SECSIZE);
			goto again;
		}
	}
	return (0);
}
