/*-
 * Copyright (c) 1991, 1994, 1995 Berkeley Software Design, Inc.
 * All rights reserved.
 * The Berkeley Software Design Inc. software License Agreement specifies
 * the terms and conditions for redistribution.
 *
 *	BSDI $Id: kern_physio.c,v 2.2 1995/05/10 17:52:42 donn Exp $
 */

/*
 * Copyright (c) 1982, 1986, 1990 Regents of the University of California.
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. All advertising materials mentioning features or use of this software
 *    must display the following acknowledgement:
 *	This product includes software developed by the University of
 *	California, Berkeley and its contributors.
 * 4. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 *	from: @(#)kern_physio.c	7.20 (Berkeley) 5/11/91
 */

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/buf.h>
#include <sys/conf.h>
#include <sys/proc.h>
#include <sys/trace.h>
#include <sys/map.h>
#include <sys/vnode.h>
#include <sys/malloc.h>

#include <vm/vm.h>
#include <vm/vm_kern.h>

/*
 * We use the Mach approach of vm_read()/vm_write() to update user buffers.
 * Earlier attempts to minimize copying ran into trouble with transfers
 * that spanned objects and with incorrect COW behavior.
 */
int
physio_start_vm(map, uaddr, bp)
	vm_map_t map;
	vm_offset_t uaddr;
	struct buf *bp;
{
	vm_offset_t kpage = vm_map_min(kernel_map);
	vm_offset_t start = trunc_page(uaddr);
	vm_size_t len = round_page(uaddr + bp->b_bcount) - start;
	int error = 0;

	if (len == 0)
		panic("physio_start_vm zero transfer");

#ifdef i386
	ptpage_botch(map, start);
	ptpage_botch(map, start + len - 1);
#endif

	if (error = vm_map_find(kernel_map, 0, 0, &kpage, len, 1))
		return (error);
	bp->b_un.b_addr = (caddr_t) (kpage + uaddr - start);
	if (bp->b_flags & B_READ) {
		/*
		 * If the transfer isn't aligned, copy in the trailing
		 * or leading data.  We may take write faults here.
		 */
		if (uaddr != start)
			error = copyin((void *) start, (void *) kpage,
			    uaddr - start);
		uaddr += bp->b_bcount;
		if (!error && uaddr != start + len)
			error = copyin((void *) uaddr,
			    (void *) kpage + uaddr - start,
			    start + len - uaddr);
	} else
		/*
		 * We're writing -- grab the user's buffer for reading.
		 */
		error = vm_map_copy(kernel_map, map, kpage, len, start, 0, 0);
	if (!error)
		error = vm_map_pageable(kernel_map, kpage, kpage + len, FALSE);
	if (error)
		vm_map_delete(kernel_map, kpage, kpage + len);
	return (error);
}

/*
 * This takes a buf pointer AND a count because we can't trust
 * the buffer's count field after an I/O.
 */
void
physio_finish_vm(map, uaddr, bp, cnt)
	vm_map_t map;
	vm_offset_t uaddr;
	struct buf *bp;
	vm_size_t cnt;
{
	vm_offset_t kpage = trunc_page(bp->b_un.b_addr);
	vm_offset_t start = trunc_page(uaddr);
	vm_size_t len = round_page(uaddr + cnt) - start;

	vm_map_pageable(kernel_map, kpage, kpage + len, TRUE);
	if (bp->b_flags & B_READ) {
		vm_map_delete(map, start, start + len);
		vm_map_copy(map, kernel_map, start, len, kpage, 1, 0);
	}
	vm_map_delete(kernel_map, kpage, kpage + len);
}

int
physio(devio, bufhdr, deviceid, ioflag, sizelimit, uio)
	int (*devio)(); 
	register struct buf *bufhdr;
	dev_t deviceid;
	int ioflag;
	u_int (*sizelimit)();
	struct uio *uio;
{
	vm_map_t map = &uio->uio_procp->p_vmspace->vm_map;
	vm_offset_t base;
	vm_size_t cnt, req;
	struct iovec *iov;
	int s;
	int error = 0;

	/*
	 * Complete pseudo-code for physio() can be found on
	 * p. 232 of the BSD book (figure 8.1).  Oh joy!
	 */

	if (uio->uio_segflg != UIO_USERSPACE)
		panic("physio uio_segflg");

	/*
	 * 4.3 Tahoe evidently 'borrow[ed] swap buffer structures' (p. 232);
	 * we just malloc() bufs when they aren't provided.
	 */
	if (bufhdr) {
		s = splbio();
		while (bufhdr->b_flags & B_BUSY) {
			bufhdr->b_flags |= B_WANTED;
			sleep((caddr_t)bufhdr, PRIBIO + 1);
		}
		bufhdr->b_flags |= B_BUSY | B_PHYS | ioflag;
		bufhdr->b_flags &= ~(B_WANTED | B_DONE);
		splx(s);
	} else {
		MALLOC(bufhdr, struct buf *, sizeof *bufhdr, M_BUFFER,
		    M_WAITOK);
		bufhdr->b_flags = B_BUSY | ioflag;
	}

	if ((uio->uio_rw == UIO_READ) == ((bufhdr->b_flags & B_READ) == 0))
		panic("physio inconsistent directions");

	bufhdr->b_error = 0;
	bufhdr->b_dev = deviceid;
	bufhdr->b_resid = 0;
	bufhdr->b_chain = NULL;
	bufhdr->b_proc = uio->uio_procp;
	bufhdr->b_iodone = NULL;
	bufhdr->b_vp = NULL;

	/*
	 * The uio/iov idiom is modeled on uiomove() in kern_subr.c.
	 */
	while (uio->uio_resid) {
		iov = uio->uio_iov;
		base = (vm_offset_t) iov->iov_base;
		cnt = iov->iov_len;
		if (cnt == 0) {
			uio->uio_iov++;
			uio->uio_iovcnt--;
			continue;
		}

		/*
		 * Adjust the size of the transfer.
		 */
		bufhdr->b_bcount = cnt;
		(*sizelimit)(bufhdr);
		req = cnt = bufhdr->b_bcount;

		/*
		 * Check for attempts to penetrate kernel space.
		 * ??? WHY ARE WE DOING THIS RATHER THAN USING useracc?
		 */
		if (base + cnt > VM_MAXUSER_ADDRESS)
			error = EFAULT;
		if (VM_MIN_ADDRESS > 0 && base < VM_MIN_ADDRESS)
			error = EFAULT;
		if (base > base + cnt)
			error = EFAULT;
		if (error)
			break;

		if (error = physio_start_vm(map, base, bufhdr))
			break;

		/*
		 * Perform the transfer.
		 */
		bufhdr->b_blkno = btodb(uio->uio_offset);
		bufhdr->b_iocount = cnt;
		(*devio)(bufhdr);
		error = biowait(bufhdr);
		bufhdr->b_flags &= ~B_DONE;
		physio_finish_vm(map, base, bufhdr, cnt);

		/*
		 * The driver might decrease b_bcount and/or set b_resid.
		 */
		cnt = bufhdr->b_bcount - bufhdr->b_resid;

		iov->iov_base += cnt;
		iov->iov_len -= cnt;
		uio->uio_resid -= cnt;
		uio->uio_offset += cnt;

		if (error)
			break;

		if (cnt != req)
			/* EOF or a short read */
			break;
	}

	if (bufhdr->b_flags & B_PHYS) {
		s = splbio();
		bufhdr->b_flags &= ~B_BUSY;
		if (bufhdr->b_flags & B_WANTED) {
			bufhdr->b_flags &= ~B_WANTED;
			wakeup((caddr_t) bufhdr);
		}
		splx(s);
	} else
		FREE(bufhdr, M_BUFFER);

	return (error);
}

/*
 * Calculate the maximum size of I/O request that can be requested
 * in a single operation. This limit is necessary to prevent a single
 * process from being able to lock more than a fixed amount of memory
 * in the kernel.
 */
u_int
minphys(bufhdr)
	struct buf *bufhdr;
{

	bufhdr->b_bcount = min(bufhdr->b_bcount, MAXPHYS);
}

/*
 * Do a read on a device for a user process.
 */
int
rawread(deviceid, uio, flag)
	dev_t deviceid;
	struct uio *uio;
	int flag;
{
	return (physio(devsw[major(deviceid)]->d_strategy, (struct buf *)NULL,
	    deviceid, B_READ, minphys, uio));
}

/*
 * Do a write on a device for a user process.
 */
int
rawwrite(deviceid, uio, flag)
	dev_t deviceid;
	struct uio *uio;
	int flag;
{
	return (physio(devsw[major(deviceid)]->d_strategy, (struct buf *)NULL,
	    deviceid, B_WRITE, minphys, uio));
}
