
/*
 * $Id: store_io_diskd.c,v 1.33.2.5 2008/02/16 12:45:48 adrian Exp $
 *
 * DEBUG: section 79    Squid-side DISKD I/O functions.
 * AUTHOR: Duane Wessels
 *
 * SQUID Web Proxy Cache          http://www.squid-cache.org/
 * ----------------------------------------------------------
 *
 *  Squid is the result of efforts by numerous individuals from
 *  the Internet community; see the CONTRIBUTORS file for full
 *  details.   Many organizations have provided support for Squid's
 *  development; see the SPONSORS file for full details.  Squid is
 *  Copyrighted (C) 2001 by the Regents of the University of
 *  California; see the COPYRIGHT file for full details.  Squid
 *  incorporates software developed and/or copyrighted by other
 *  sources; see the CREDITS file for full details.
 *
 *  This program is free software; you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation; either version 2 of the License, or
 *  (at your option) any later version.
 *  
 *  This program is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *  
 *  You should have received a copy of the GNU General Public License
 *  along with this program; if not, write to the Free Software
 *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA.
 *
 */

#include "config.h"
#include "squid.h"

#include <sys/ipc.h>
#include <sys/msg.h>
#include <sys/shm.h>

#include "store_diskd.h"

static int storeDiskdSend(int, SwapDir *, int, storeIOState *, int, off_t, int);
static void storeDiskdIOCallback(storeIOState * sio, int errflag);
static CBDUNL storeDiskdIOFreeEntry;

CBDATA_TYPE(storeIOState);

/* === PUBLIC =========================================================== */

storeIOState *
storeDiskdOpen(SwapDir * SD, StoreEntry * e, STFNCB * file_callback,
    STIOCB * callback, void *callback_data)
{
    sfileno f = e->swap_filen;
    int x;
    storeIOState *sio;
    char *buf;
    diskdstate_t *diskdstate;
    int shm_offset;
    diskdinfo_t *diskdinfo = SD->fsdata;
    debug(79, 3) ("storeDiskdOpen: fileno %08X\n", f);
    /*
     * Fail on open() if there are too many requests queued.
     */
    if (diskdinfo->away > diskdinfo->magic1) {
	debug(79, 3) ("storeDiskdOpen: FAILING, too many requests away\n");
	diskd_stats.open_fail_queue_len++;
	return NULL;
    }
    CBDATA_INIT_TYPE_FREECB(storeIOState, storeDiskdIOFreeEntry);
    sio = cbdataAlloc(storeIOState);
    sio->fsstate = diskdstate = memPoolAlloc(diskd_state_pool);

    sio->swap_filen = f;
    sio->swap_dirn = SD->index;
    sio->mode = O_RDONLY | O_BINARY | O_NOATIME;
    sio->callback = callback;
    sio->callback_data = callback_data;
    sio->e = e;
    cbdataLock(callback_data);

    diskdstate->flags.writing = 0;
    diskdstate->flags.reading = 0;
    diskdstate->flags.close_request = 0;
    diskdstate->id = diskd_stats.sio_id++;

    buf = storeDiskdShmGet(SD, &shm_offset);
    xstrncpy(buf, storeDiskdDirFullPath(SD, f, NULL), SHMBUF_BLKSZ);
    x = storeDiskdSend(_MQD_OPEN,
	SD,
	diskdstate->id,
	sio,
	strlen(buf) + 1,
	O_RDONLY,
	shm_offset);
    if (x < 0) {
	debug(79, 1) ("storeDiskdSend OPEN: %s\n", xstrerror());
	storeDiskdShmPut(SD, shm_offset);
	cbdataUnlock(sio->callback_data);
	cbdataFree(sio);
	return NULL;
    }
    diskd_stats.open.ops++;
    return sio;
}

storeIOState *
storeDiskdCreate(SwapDir * SD, StoreEntry * e, STFNCB * file_callback,
    STIOCB * callback, void *callback_data)
{
    sfileno f;
    int x;
    storeIOState *sio;
    char *buf;
    int shm_offset;
    diskdinfo_t *diskdinfo = SD->fsdata;
    diskdstate_t *diskdstate;
    /*
     * Fail on open() if there are too many requests queued.
     */
    if (diskdinfo->away > diskdinfo->magic1) {
	diskd_stats.open_fail_queue_len++;
	return NULL;
    }
    /* Allocate a number */
    f = storeDiskdDirMapBitAllocate(SD);
    debug(79, 3) ("storeDiskdCreate: fileno %08X\n", f);

    CBDATA_INIT_TYPE_FREECB(storeIOState, storeDiskdIOFreeEntry);
    sio = cbdataAlloc(storeIOState);
    sio->fsstate = diskdstate = memPoolAlloc(diskd_state_pool);

    sio->swap_filen = f;
    sio->swap_dirn = SD->index;
    sio->mode = O_WRONLY | O_CREAT | O_TRUNC;
    sio->callback = callback;
    sio->callback_data = callback_data;
    sio->e = e;
    cbdataLock(callback_data);

    diskdstate->flags.writing = 0;
    diskdstate->flags.reading = 0;
    diskdstate->flags.close_request = 0;
    diskdstate->id = diskd_stats.sio_id++;

    buf = storeDiskdShmGet(SD, &shm_offset);
    xstrncpy(buf, storeDiskdDirFullPath(SD, f, NULL), SHMBUF_BLKSZ);
    x = storeDiskdSend(_MQD_OPEN,
	SD,
	diskdstate->id,
	sio,
	strlen(buf) + 1,
	sio->mode,
	shm_offset);
    if (x < 0) {
	debug(79, 1) ("storeDiskdSend OPEN: %s\n", xstrerror());
	storeDiskdShmPut(SD, shm_offset);
	cbdataUnlock(sio->callback_data);
	cbdataFree(sio);
	return NULL;
    }
    storeDiskdDirReplAdd(SD, e);
    diskd_stats.create.ops++;
    return sio;
}


void
storeDiskdClose(SwapDir * SD, storeIOState * sio)
{
    int x;
    diskdstate_t *diskdstate = sio->fsstate;
    debug(79, 3) ("storeDiskdClose: dirno %d, fileno %08X\n", SD->index,
	sio->swap_filen);
    diskdstate->flags.close_request = 1;
    x = storeDiskdSend(_MQD_CLOSE,
	SD,
	diskdstate->id,
	sio,
	0,
	0,
	-1);
    if (x < 0) {
	debug(79, 1) ("storeDiskdSend CLOSE: %s\n", xstrerror());
	storeDiskdIOCallback(sio, DISK_ERROR);
    }
    diskd_stats.close.ops++;
}

void
storeDiskdRead(SwapDir * SD, storeIOState * sio, char *buf, size_t size, squid_off_t offset, STRCB * callback, void *callback_data)
{
    int x;
    int shm_offset;
    char *rbuf;
    diskdstate_t *diskdstate = sio->fsstate;
    debug(79, 3) ("storeDiskdRead: dirno %d, fileno %08X\n", sio->swap_dirn, sio->swap_filen);
    if (diskdstate->flags.close_request) {
	debug(79, 2) ("storeDiskRead: closing, so ignore!\n");
	return;
    }
    if (!cbdataValid(sio))
	return;
    if (diskdstate->flags.reading) {
	debug(79, 1) ("storeDiskdRead: already reading!\n");
	return;
    }
    assert(sio->read.callback == NULL);
    assert(sio->read.callback_data == NULL);
    sio->read.callback = callback;
    sio->read.callback_data = callback_data;
    diskdstate->read_buf = buf;	/* the one passed from above */
    cbdataLock(sio->read.callback_data);
    sio->offset = offset;
    diskdstate->flags.reading = 1;
    rbuf = storeDiskdShmGet(SD, &shm_offset);
    assert(rbuf);
    x = storeDiskdSend(_MQD_READ,
	SD,
	diskdstate->id,
	sio,
	size,
	(off_t) offset,
	shm_offset);
    if (x < 0) {
	debug(79, 1) ("storeDiskdSend READ: %s\n", xstrerror());
	storeDiskdShmPut(SD, shm_offset);
	storeDiskdIOCallback(sio, DISK_ERROR);
    }
    diskd_stats.read.ops++;
}

void
storeDiskdWrite(SwapDir * SD, storeIOState * sio, char *buf, size_t size, squid_off_t offset, FREE * free_func)
{
    int x;
    char *sbuf;
    int shm_offset;
    diskdstate_t *diskdstate = sio->fsstate;
    debug(79, 3) ("storeDiskdWrite: dirno %d, fileno %08X\n", SD->index, sio->swap_filen);
    assert(!diskdstate->flags.close_request);
    if (diskdstate->flags.close_request) {
	debug(79, 2) ("storeDiskWrite: closing, so ignore!\n");
	free_func(buf);
	return;
    }
    if (!cbdataValid(sio)) {
	free_func(buf);
	return;
    }
    diskdstate->flags.writing = 1;
    sbuf = storeDiskdShmGet(SD, &shm_offset);
    xmemcpy(sbuf, buf, size);
    if (free_func)
	free_func(buf);
    x = storeDiskdSend(_MQD_WRITE,
	SD,
	diskdstate->id,
	sio,
	size,
	(off_t) offset,
	shm_offset);
    if (x < 0) {
	debug(79, 1) ("storeDiskdSend WRITE: %s\n", xstrerror());
	storeDiskdShmPut(SD, shm_offset);
	storeDiskdIOCallback(sio, DISK_ERROR);
    }
    diskd_stats.write.ops++;
}

void
storeDiskdUnlink(SwapDir * SD, StoreEntry * e)
{
    int x;
    int shm_offset;
    char *buf;
    diskdinfo_t *diskdinfo = SD->fsdata;

    debug(79, 3) ("storeDiskdUnlink: dirno %d, fileno %08X\n", SD->index,
	e->swap_filen);
    storeDiskdDirReplRemove(e);
    storeDiskdDirMapBitReset(SD, e->swap_filen);
    if (diskdinfo->away >= diskdinfo->magic1) {
	/* Damn, we need to issue a sync unlink here :( */
	debug(79, 2) ("storeDiskUnlink: Out of queue space, sync unlink\n");
	storeDiskdDirUnlinkFile(SD, e->swap_filen);
	return;
    }
    /* We can attempt a diskd unlink */
    buf = storeDiskdShmGet(SD, &shm_offset);
    xstrncpy(buf, storeDiskdDirFullPath(SD, e->swap_filen, NULL), SHMBUF_BLKSZ);
    x = storeDiskdSend(_MQD_UNLINK,
	SD,
	e->swap_filen,
	NULL,
	0,
	0,
	shm_offset);
    if (x < 0) {
	debug(79, 1) ("storeDiskdSend UNLINK: %s\n", xstrerror());
	unlink(buf);		/* XXX EWW! */
	storeDiskdShmPut(SD, shm_offset);
    }
    diskd_stats.unlink.ops++;
}

void
storeDiskdRecycle(SwapDir * SD, StoreEntry * e)
{
    debug(79, 3) ("storeDiskdUnlink: fileno %08X\n", e->swap_filen);

    /* Detach from the underlying physical object */
    if (e->swap_filen > -1) {
	storeDiskdDirReplRemove(e);
	storeDiskdDirMapBitReset(SD, e->swap_filen);
	e->swap_filen = -1;
	e->swap_dirn = -1;
    }
}



/*  === STATIC =========================================================== */

static void
storeDiskdOpenDone(diomsg * M)
{
    storeIOState *sio = M->callback_data;
    statCounter.syscalls.disk.opens++;
    debug(79, 3) ("storeDiskdOpenDone: dirno %d, fileno %08x status %d\n",
	sio->swap_dirn, sio->swap_filen, M->status);
    if (M->status < 0) {
	FILE_MODE(sio->mode) == O_RDONLY ? diskd_stats.open.fail++ : diskd_stats.create.fail++;
	storeDiskdIOCallback(sio, DISK_ERROR);
    } else {
	FILE_MODE(sio->mode) == O_RDONLY ? diskd_stats.open.success++ : diskd_stats.create.success++;
    }
}

static void
storeDiskdCloseDone(diomsg * M)
{
    storeIOState *sio = M->callback_data;
    statCounter.syscalls.disk.closes++;
    debug(79, 3) ("storeDiskdCloseDone: dirno %d, fileno %08x status %d\n",
	sio->swap_dirn, sio->swap_filen, M->status);
    if (M->status < 0) {
	diskd_stats.close.fail++;
	storeDiskdIOCallback(sio, DISK_ERROR);
	return;
    }
    diskd_stats.close.success++;
    storeDiskdIOCallback(sio, DISK_OK);
}

static void
storeDiskdReadDone(diomsg * M)
{
    storeIOState *sio = M->callback_data;
    STRCB *callback = sio->read.callback;
    SwapDir *sd = INDEXSD(sio->swap_dirn);
    diskdstate_t *diskdstate = sio->fsstate;
    diskdinfo_t *diskdinfo = sd->fsdata;
    void *their_data = sio->read.callback_data;
    char *their_buf = diskdstate->read_buf;
    char *sbuf;
    size_t len;
    int valid;
    statCounter.syscalls.disk.reads++;
    diskdstate->flags.reading = 0;
    if (diskdstate->flags.close_request) {
	debug(79, 2) ("storeDiskReadDone: closing, so ignore!\n");
	return;
    }
    valid = cbdataValid(sio->read.callback_data);
    cbdataUnlock(sio->read.callback_data);
    debug(79, 3) ("storeDiskdReadDone: dirno %d, fileno %08x status %d\n",
	sio->swap_dirn, sio->swap_filen, M->status);
    if (M->status < 0) {
	diskd_stats.read.fail++;
	storeDiskdIOCallback(sio, DISK_ERROR);
	return;
    }
    diskd_stats.read.success++;
    sbuf = diskdinfo->shm.buf + M->shm_offset;
    len = M->status;
    sio->offset += len;
    assert(callback);
    assert(their_data);
    sio->read.callback = NULL;
    sio->read.callback_data = NULL;
    if (valid) {
	assert(!diskdstate->flags.close_request);
	/*
	 * Only copy the data if the callback is still valid,
	 * if it isn't valid then the request should have been
	 * aborted.
	 *   -- adrian
	 */
	xmemcpy(their_buf, sbuf, len);	/* yucky copy */
	callback(their_data, their_buf, len);
    }
}

static void
storeDiskdWriteDone(diomsg * M)
{
    storeIOState *sio = M->callback_data;
    diskdstate_t *diskdstate = sio->fsstate;
    statCounter.syscalls.disk.writes++;
    diskdstate->flags.writing = 0;
    debug(79, 3) ("storeDiskdWriteDone: dirno %d, fileno %08x status %d\n",
	sio->swap_dirn, sio->swap_filen, M->status);
    if (M->status < 0) {
	diskd_stats.write.fail++;
	if (!diskdstate->flags.close_request)
	    storeDiskdIOCallback(sio, DISK_ERROR);
	return;
    }
    diskd_stats.write.success++;
    sio->offset += M->status;
}

static void
storeDiskdUnlinkDone(diomsg * M)
{
    debug(79, 3) ("storeDiskdUnlinkDone: fileno %08x status %d\n",
	M->id, M->status);
    statCounter.syscalls.disk.unlinks++;
    if (M->status < 0)
	diskd_stats.unlink.fail++;
    else
	diskd_stats.unlink.success++;
}

void
storeDiskdHandle(diomsg * M)
{
    int valid = M->callback_data ? cbdataValid(M->callback_data) : 1;
    if (M->callback_data)
	cbdataUnlock(M->callback_data);
    if (!valid) {
	debug(79, 3) ("storeDiskdHandle: Invalid callback_data %p\n",
	    M->callback_data);
	/*
	 * The read operation has its own callback.  If we don't
	 * call storeDiskdReadDone(), then we must make sure the
	 * callback_data gets unlocked!
	 */
	if (_MQD_READ == M->mtype) {
	    storeIOState *sio = M->callback_data;
	    cbdataUnlock(sio->read.callback_data);
	}
	return;
    }
    /* set errno passed from diskd.  makes debugging more meaningful */
    if (M->status < 0)
	errno = -M->status;
    switch (M->mtype) {
    case _MQD_OPEN:
	storeDiskdOpenDone(M);
	break;
    case _MQD_CLOSE:
	storeDiskdCloseDone(M);
	break;
    case _MQD_READ:
	storeDiskdReadDone(M);
	break;
    case _MQD_WRITE:
	storeDiskdWriteDone(M);
	break;
    case _MQD_UNLINK:
	storeDiskdUnlinkDone(M);
	break;
    default:
	assert(0);
	break;
    }
}

static void
storeDiskdIOCallback(storeIOState * sio, int errflag)
{
    void *p = sio->callback_data;
    debug(79, 3) ("storeDiskdIOCallback: errflag=%d\n", errflag);
    if (cbdataValid(p))
	sio->callback(p, errflag, sio);
    cbdataUnlock(p);
    cbdataFree(sio);
}

static int
storeDiskdSend(int mtype, SwapDir * sd, int id, storeIOState * sio, int size, off_t offset, int shm_offset)
{
    int x;
    diomsg M;
    static int send_errors = 0;
    static int last_seq_no = 0;
    static int seq_no = 0;
    diskdinfo_t *diskdinfo = sd->fsdata;
    struct timeval delay =
    {0, 1};

    M.mtype = mtype;
    M.callback_data = sio;
    M.size = size;
    M.offset = offset;
    M.status = -1;
    M.shm_offset = (int) shm_offset;
    M.id = id;
    M.seq_no = ++seq_no;
    if (M.callback_data)
	cbdataLock(M.callback_data);
    if (M.seq_no < last_seq_no)
	debug(79, 1) ("WARNING: sequencing out of order\n");

    /*
     * We have to drain the queue here if necessary.  If we don't,
     * then we can have a lot of messages in the queue (probably
     * up to 2*magic1) and we can run out of shared memory buffers.
     */
    /*
     * NOTE that it is important that we call storeDirCallback AFTER
     * locking the callback data M.callback_data because we need
     * to make sure the cbdata lock count doesn't go to zero (and
     * get freed) before we have a chance to send the current message
     * M!
     */
    /*
     * Note that we call storeDirCallback (for all SDs), rather
     * than storeDiskdDirCallback for just this SD, so that while
     * we're "blocking" on this SD we can also handle callbacks
     * from other SDs that might be ready.
     */
    while (diskdinfo->away > diskdinfo->magic2) {
	select(0, NULL, NULL, NULL, &delay);
	storeDirCallback();
	if (delay.tv_usec < 1000000)
	    delay.tv_usec <<= 1;
    }

    x = msgsnd(diskdinfo->smsgid, &M, msg_snd_rcv_sz, IPC_NOWAIT);
    last_seq_no = M.seq_no;
    if (0 == x) {
	diskd_stats.sent_count++;
	diskdinfo->away++;
    } else {
	debug(79, 1) ("storeDiskdSend: msgsnd: %s\n", xstrerror());
	if (M.callback_data)
	    cbdataUnlock(M.callback_data);
	assert(++send_errors < 100);
    }
    return x;
}


/*
 * We can't pass memFree() as a free function here, because we need to free
 * the fsstate variable ..
 */
static void
storeDiskdIOFreeEntry(void *sio)
{
    memPoolFree(diskd_state_pool, ((storeIOState *) sio)->fsstate);
}
