/*----------------------------------------------------------------------*
** File: eng.c
** 
** 
** Project:	PQL
** =====================================================================*
** Description:	Database Engine
**
**  Author:	Bjoern Lemke
**  E-Mail:	lemke@lf.net
**
** Copyright (c) 1994 Bjoern Lemke.
**----------------------------------------------------------------------*/

#ifdef LINUX
char* gdbm_strerror(int x) 
   { 
   static char string[80];
   sprintf(string,"gdbm_errno %d \n",x); 
   return(string);
   }
#endif

/* include this first, FD_SETSIZE must be defined */
#include "eng.h"

#include <unistd.h>
#include <sys/stat.h>
#include <sys/file.h>
#include <fcntl.h>
#include <errno.h>
#include <stdio.h>

#define LMSIZE sizeof(unsigned) /* level mark size */
#define DELETEMASK 0x8000
#define ACTIVEMASK 0x7FFF
#define MAXLEVEL 1000000  /* MAXLEVEL TA before reset must be performed */

/**********************/
/* private prototypes */
/**********************/

enum LockType {
  SHARED,
  EXCLUSIVE
};

static int eng_lck(Relation *rel, int lock);
static int eng_unlck(Relation *rel);
static int isaborted(Relation *rel, unsigned ta);
static char *strerror(int errno);

/********************/
/* global variables */
/********************/
extern char *sys_errlist[];

/*******************/
/* public routines */
/*******************/

int eng_tst(char *relname)
{
  int fd;
  char ctxfile[ENGNAMELEN];
  
  strcpy(ctxfile, relname);
  strcat(ctxfile, CTXSUF);

  if ((fd = open(relname, O_RDONLY)) == -1) {
    fprintf(stderr, "ENGINE: data file %s, %s\n", 
	    relname, strerror(errno));
    return(-1);
  }
  close(fd);
  if ((fd = open(ctxfile, O_RDONLY)) == -1) {
    fprintf(stderr, "ENGINE: context file %s, %s\n", 
	    ctxfile, strerror(errno));
    return(-1);
  }
  close(fd);
  
  return(0);
}

int eng_init(char *path, char *relname, AttList *pkey)
{
  int kptr, i, ctxfd;
  GDBM_FILE relfd;
  Relation relation;
  char ctxfile[ENGNAMELEN];
  unsigned tainit;

  strcpy(ctxfile, path);
  strcat(ctxfile, CTXSUF);

  /* allocate superkey and relhead */

  /* fill up relation head */
  kptr=0;
  for (i=0; i<pkey->num_f; i++) {
    relation.type[i] = pkey->type[i];
    strcpy(relation.name[i], pkey->name[i]);
    relation.atype[i] = PKEY;
    relation.start[i] = kptr;
    kptr = kptr + pkey->size[i];
  }
  relation.pksize = kptr;
  relation.num_pk = pkey->num_f;
  relation.num_f = pkey->num_f;
  relation.maxidx = pkey->num_f+1;
  relation.start[pkey->num_f]=kptr;
  relation.atype[pkey->num_f]=SEP; /* the separator attribute */
  relation.type[pkey->num_f]=NULLMAP; /* the separator attribute */
  strcpy(relation.name[pkey->num_f], BMNAME);
  relation.attsize=0;
  strcpy(relation.relname, relname);
  relation.tasize = 1; /* initialize tasize */
  relation.level=0; /* starting level */

  /* create new context file */
  if ((ctxfd = open(ctxfile, O_TRUNC | O_WRONLY | O_CREAT, 0664)) == -1) {
    fprintf(stderr, "ENGINE: relation %s contextfile, %s\n", 
	    relname, strerror(errno));
    return(-1);
  }
  
  /* create new data file */
  if ((relfd = gdbm_open(path, 512, GDBM_NEWDB, 0664, 0)) == (GDBM_FILE)0) {
    fprintf(stderr, "ENGINE: relation %s datafile, %s\n", 
	    relname, gdbm_strerror (gdbm_errno));
    return(-1);
  }
  
  /* and store relation head in context file*/
  if (write(ctxfd, (char*)&relation, sizeof(Relation)) == -1) {
    fprintf(stderr, "ENGINE: cannot write head for relation %s, %s\n", 
	    relname, strerror(errno));
    return(-1);
  }

  tainit=0;
  /* write the first TA entry to the ctx file */
  if (write(ctxfd, (char*)&tainit, sizeof(unsigned)) == -1) {
    fprintf(stderr, "ENGINE: cannot write TA status for relation %s, %s\n", 
	    relname, strerror(errno));
  }
  gdbm_close(relfd);
  close(ctxfd);
  return(0);
}

int eng_extend(char *path, AttList *atts)
{
  int kptr, i, j, attidx, ctxfd, incsize, start;
  GDBM_FILE relfd;
  datum key, okey, attfld, incattfld;
  fd_set mask;
  Relation relation;
  char ctxfile[ENGNAMELEN];

  strcpy(ctxfile, path);
  strcat(ctxfile, CTXSUF);
  
  if ((ctxfd = open(ctxfile, O_RDWR, 0)) == -1) {
    fprintf(stderr, "ENGINE: contextfile %s, %s\n",
	    ctxfile, strerror(errno));
    return(-1);
  }

  /* get the relation head */

  if (read(ctxfd, (char*)&relation, sizeof(Relation)) == -1) {
    fprintf(stderr, "ENGINE: contextfile %s, %s\n", 
	    ctxfile, strerror(errno));
    return(-1);
  }
  
  /* locking point for writer */
  if (eng_lck(&relation, EXCLUSIVE) == -1) {
      return(-1);
  }

  if ((relfd = gdbm_open(path, 512, GDBM_WRITER, 0664, 0)) == NULL) {
    fprintf(stderr, "ENGINE: datafile %s, %s\n", gdbm_strerror(gdbm_errno));
    return(-1);
  }

  /* fill up relation with NULL values */
  for (key = gdbm_firstkey(relfd); 
       key.dptr != NULL; 
       key = gdbm_nextkey(relfd, key)) {
    if (okey.dptr != NULL) {
      free(okey.dptr);
      okey.dptr = NULL;
    }
    okey=key;
    attfld = gdbm_fetch(relfd, key);
    bcopy(attfld.dptr, mask.fds_bits, sizeof(fd_set));
	
    incsize=0; attidx=0;
    start=relation.num_f - relation.num_pk;
    for (i=start; i < start + atts->num_f; i++) {
      incsize+=atts->size[attidx];
      attidx++;
      FD_SET(i + BMOFFSET, &mask);
    }

    /* malloc here to prepare for variable attribute fields */
    if ((incattfld.dptr = (char*)malloc(attfld.dsize + incsize)) == NULL) {
      fprintf(stderr, "ENGINE: malloc error for %s, %s\n",
	      path, strerror (errno));
      return(-1);
    }
    incattfld.dsize = attfld.dsize + incsize;
    /* copy new NULL value bitmask */
	  
    bcopy(mask.fds_bits, incattfld.dptr, sizeof(fd_set));
    /* copy old contents */
    bcopy(attfld.dptr + sizeof(fd_set), incattfld.dptr + sizeof(fd_set),
	  attfld.dsize - sizeof(fd_set) - LMSIZE);
    /* copy lmark */
    bcopy(attfld.dptr + attfld.dsize - LMSIZE, 
	  incattfld.dptr + incattfld.dsize - LMSIZE, LMSIZE);
    
    gdbm_store(relfd, key, incattfld, GDBM_REPLACE);
  }
  
  /* fill up relhead */
  kptr = sizeof(fd_set) + relation.attsize;
  j=0; /* index counter for attlist */
  for (i = relation.maxidx; 
       i < atts->num_f + relation.maxidx; 
       i++) {
    relation.type[i] = atts->type[j];
    strcpy(relation.name[i], atts->name[j]);
    relation.atype[i] = ORDY;
    relation.start[i] = kptr;
    kptr += atts->size[j];
    relation.attsize += atts->size[j];
    j++;
  }
  relation.start[i] = kptr; 
  
  /* update number of atts and maxidx */
  relation.num_f += atts->num_f;
  relation.maxidx += atts->num_f;
  
  /* and store relation head */
  if (lseek(ctxfd, 0, SEEK_SET) == -1) {
    fprintf(stderr, "ENGINE: seek error for %s, %s\n",
	    path, strerror(errno));
    return(-1);
  }
  if (write(ctxfd, (char*)&relation, sizeof(Relation)) == -1) {
    fprintf(stderr, "ENGINE: contextfile %s, %s\n", 
	    ctxfile, strerror(errno));
    return(-1);
  }

  /* unlock point for writer */
  eng_unlck(&relation);

  gdbm_close(relfd);
  close(ctxfd);
  return(0);
}

int eng_shrink(char *path, char *attname)
{
  int ctxfd, i;
  fd_set mask;
  Relation relation;
  char ctxfile[ENGNAMELEN];
  
  strcpy(ctxfile, path);
  strcat(ctxfile, CTXSUF);
  
  if ((ctxfd = open(ctxfile, O_RDWR, 0)) == -1) {
    fprintf(stderr, "ENGINE: contextfile %s, %s\n",
	    ctxfile, strerror(errno));
    return(-1);
  }

  /* get the relation head */
  if (read(ctxfd, (char*)&relation, sizeof(Relation)) == -1) {
    fprintf(stderr, "ENGINE: contextfile %s, %s\n", 
	    ctxfile, strerror(errno));
    return(-1);
  }

  /* locking point for writer */
  if (eng_lck(&relation, EXCLUSIVE) == -1) {
      return(-1);
  }

  for (i = 0; 
       i < relation.maxidx; 
       i++) {
    if (!strcmp(attname, relation.name[i])) {
      if (relation.atype[i] == PKEY) {
	fprintf(stderr, "ENGINE: cannot shrink primary key\n");
	return(-1);
      }
      relation.atype[i] = DROP;
      relation.num_f--;
      break;
    }
  } 
 
  /* and store relation head */
  if (lseek(ctxfd, 0, SEEK_SET) == -1) {
    fprintf(stderr, "ENGINE: seek error for %s, %s\n",
	    path, strerror(errno));
    return(-1);
  }
  /* and store relation head */
  if (write(ctxfd, (char*)&relation, sizeof(Relation)) == -1) {
    fprintf(stderr, "ENGINE: contextfile %s, %s\n", 
	    ctxfile, strerror(errno));
    return(-1);
  }
  close(ctxfd);
  return(0);
}

Relation *eng_begin(char *path, int mode)
{
  GDBM_FILE relfd;
  int ctxfd, lckfd, pos, num;
  Relation *relation;
  char ctxfile[ENGNAMELEN];
  int gdbm_mode;

  strcpy(ctxfile, path);
  strcat(ctxfile, CTXSUF);

  switch (mode) {
  case READ:
    gdbm_mode = GDBM_READER;
    break;
  case WRITE:
    gdbm_mode = GDBM_WRITER;
    break;
  }

  if ((ctxfd = open(ctxfile, O_RDWR, 0)) == -1) {
    fprintf(stderr, "ENGINE: contextfile %s, %s\n",
	    ctxfile, strerror(errno));
    return(NULL);
  }
  
  if ((relation = (Relation*)malloc(sizeof(Relation))) == NULL) {
    fprintf(stderr, "ENGINE: malloc error for %s, %s\n", 
	    path, strerror (errno));
    return(NULL);
  }

  /* read the header */
  if (read(ctxfd, (char*)relation, sizeof(Relation)) == -1) {
    fprintf(stderr, "ENGINE: read error for %s, %s\n", 
	    path, strerror(errno));
    return(NULL);
  }

  /* locking point for writer */
  if (mode == WRITE) {
    
    /* this should really not ocur :) */
    if (relation->level >= MAXLEVEL) {
      fprintf(stderr, "ENGINE: maximum transaction level %d reached, please perform a reset\n", relation->level);
      return(NULL);
    }
    
    if (eng_lck(relation, EXCLUSIVE) == -1) {
      return(NULL);
    }
  } else {
    /* read mode */
    if (eng_lck(relation, SHARED) == -1) {
      return(NULL);
    }
  }
  
  /* allocate TA status field and read from context file */
  if ((relation->tastats = (int*)malloc((relation->tasize+1) * sizeof(unsigned))) == NULL) {
    fprintf(stderr, "ENGINE: malloc error for %s, %s\n", 
	    path, strerror (errno));
    return(NULL);
  }
  
  pos = 0, num = 0;
  while ((num=read(ctxfd,(char*)(relation->tastats + pos), 
		   (relation->tasize * sizeof(unsigned)) - pos)) 
	 != 0) {
    pos += num;
  }
  
  if (pos != (relation->tasize * sizeof(unsigned))) {
    fprintf(stderr, "ENGINE: tastats read error for %s, %s\n", 
	    path, strerror(errno));
    return(NULL);
  }
  
  
  /* now open the data file */
  
  if ((relfd = gdbm_open(path, 512, gdbm_mode, 0664, 0)) == NULL) {
    fprintf(stderr, "ENGINE: %s\n", gdbm_strerror (gdbm_errno));
    return(NULL);
  }
  
  relation->level++; /* increase for READER to pseudo level */
  relation->relfd = relfd;
  strcpy(relation->ctxfile, ctxfile);
  relation->mode = mode;
  
  if (mode == WRITE) {  
    /* set up abort status */
    switch (relation->tasize % 2) {
    case 1: /* actual phase is commited */
      relation->tastats[relation->tasize]=1;
      relation->tasize++; /* increase the TA field size */
      break;
    case 0: /* actual phase is aborted */
      relation->tastats[relation->tasize - 1]++;
      break;
    } 

    /* reset file cursor */
    if (lseek(ctxfd, 0L, SEEK_SET) == -1) {
      fprintf(stderr, "ENGINE: seek error for %s, %s\n",
	      path, strerror(errno));
      return(NULL);
    }

    /* write back the relation head */
    if (write(ctxfd, (char*)relation, sizeof(Relation)) == -1) {
      fprintf(stderr, "ENGINE: contextfile %s, %s\n", 
	      ctxfile, strerror(errno));
      return(NULL);
    }

    /* write back the TA field */
    pos = 0, num = 0;
    while ((num=write(ctxfd,(char*)(relation->tastats + pos), 
		      (relation->tasize * sizeof(unsigned)) - pos)) 
	   != 0) {
      pos += num;
    }
    if (pos != (relation->tasize * sizeof(unsigned))) {
      fprintf(stderr, "ENGINE: tastats read error for %s, %s\n", 
	      path, strerror(errno));
      return(NULL);
    }
  } /* end of WRITE mode */
  
  close(ctxfd);
  return(relation);
}

int eng_commit(Relation *rel)
{
  /* set up commit status */
  int offset, ctxfd;
  unsigned nstats[2];
  
  if (rel->mode == WRITE) {
    if ((ctxfd = open(rel->ctxfile, O_RDWR, 0)) == -1) {
      fprintf(stderr, "ENGINE: contextfile %s, %s\n",
	      rel->ctxfile, strerror(errno));
      return(-1);
    }
    
    if (rel->tastats[rel->tasize-1] > 1) {
      /* start a new commit level */
      nstats[0]=rel->tastats[rel->tasize-1] -1;
      nstats[1]=1;
      offset = ((rel->tasize - 1) * sizeof(unsigned)) + sizeof(Relation);
      rel->tasize++;
    } else {
      nstats[0] = rel->tastats[rel->tasize - 2] + 1;
      nstats[1] = rel->tastats[rel->tasize-1] -1;
      offset = ((rel->tasize - 2) * sizeof(unsigned)) + sizeof(Relation);
      rel->tasize--;
    }
    
    if (lseek(ctxfd, offset, SEEK_SET) == -1) {
      fprintf(stderr, "ENGINE: seek error for %s, %s\n",
	      rel->relname, strerror(errno));
      return(-1);
    }
    
    /* COMMIT WRITE OPERATION */
    /* this operation should be performed atomically */
    
    /*****************************/
    /* begin of atomic operation */
    if (write(ctxfd, (char*)nstats, 2 * sizeof(unsigned)) == -1) {
      fprintf(stderr, "ENGINE: write error for %s, %s\n",
	      rel->relname, strerror(errno));
      fprintf(stderr, "ENGINE: WARNING !");
      return(-1);
    }
    if (lseek(ctxfd, 0L, SEEK_SET) == -1) {
      fprintf(stderr, "ENGINE: seek error for %s, %s\n",
	      rel->relname, strerror(errno));
      return(-1);
    }
    if (write(ctxfd, (char*)rel, sizeof(Relation)) == -1) {
      fprintf(stderr, "ENGINE: write error for %s, %s\n",
	      rel->relname, strerror(errno));
      return(-1);
    }
    /* end of atomic operation */
    /***************************/
    close(ctxfd);

  } /* end of WRITE mode */
 
  free(rel->tastats);
  gdbm_close(rel->relfd);
  
  /* unlocking point */
  if (eng_unlck(rel) == -1) {
    return(-1);
  }
  free(rel);
  return(0);
}

int eng_abort(Relation *rel)
{
  free(rel->tastats);
  gdbm_close(rel->relfd);
  
  /* unlocking point for writer */
  if (rel->mode == WRITE) {
    if (eng_unlck(rel) == -1) {
      return(-1);
    }
  }
  free(rel);
  return(0);
}


int eng_fetch(Relation *rel, char *pkey, char *atts, int action)
{
  datum key, contents;
  int dstat, aborted;
  char *cptr;
  unsigned lmark;


  switch (action) {
  case FIRST:
    key = gdbm_firstkey(rel->relfd);
    break;
  case NEXT:
    key = gdbm_nextkey(rel->relfd, rel->nkey);
    break;
  }
    
  rel->nkey.dptr=NULL;

  while(1) { /* loop until a non-deleted tuple is found */

    if (rel->nkey.dptr != NULL)
      free(rel->nkey.dptr);
    
    rel->nkey.dptr = key.dptr;    
    rel->nkey.dsize = key.dsize;
    
    if (key.dptr == NULL) {
      return(ENDOFTUP);
    }

    contents = gdbm_fetch(rel->relfd, key);
    if (contents.dptr == NULL) {
      fprintf(stderr, "ENGINE: something mystic occured\n");
      return(-1); /* something mystic occured */
    }
    
    /* get the valid data now */
    bcopy(key.dptr, pkey, key.dsize);
    
    cptr=contents.dptr + contents.dsize;
#ifdef DEBUG
    do {
      /* set up cptr */
      cptr -= LMSIZE;
      /* get the actual mark */
      bcopy(cptr, &lmark, LMSIZE);
      if (lmark & DELETEMASK) {
	/* tuple is deleted on this level */
	dstat=1;
	lmark &= ACTIVEMASK; /* skip the D-Bit */
      } else dstat=0;
      if (!dstat) cptr -= rel->attsize + sizeof(fd_set);
      aborted = isaborted(rel, lmark);
      if (aborted)
	fprintf(stderr, "*");
      fprintf(stderr, "<%d>[%d] ", lmark, dstat);
    } while (cptr > contents.dptr);
    fprintf(stderr, "\n");
    cptr=contents.dptr + contents.dsize;
#endif
    
    do {
      /* set up cptr */
      cptr -= LMSIZE;
      /* get the actual mark */
      bcopy(cptr, &lmark, LMSIZE);
      if (lmark & DELETEMASK) {
	/* tuple is deleted on this level */
	dstat=1;
	lmark &= ACTIVEMASK; /* skip the D-Bit */
      } else dstat=0;
      if (!dstat) cptr -= rel->attsize + sizeof(fd_set);
      aborted = isaborted(rel, lmark);
    } while (aborted && cptr > contents.dptr);

    if (!dstat && !aborted) {
      bcopy(cptr, atts, rel->attsize + sizeof(fd_set));
      free(contents.dptr);
      return(MORETUP);
    }
    free(contents.dptr);
    /* get next key */
    key = gdbm_nextkey(rel->relfd, rel->nkey);
  } 
}

int eng_add(Relation *rel, char *pkey, char *atts)
{
  datum key, newcont, contents;
  unsigned lmark;
  char *cptr;
  int dstat, aborted, copysize;

  key.dsize=rel->pksize;
  key.dptr=pkey;
  
  contents = gdbm_fetch(rel->relfd, key);
  if (contents.dptr != NULL) { /* an entry exists, check if deleted */
    
    lmark = 0;
    cptr=contents.dptr + contents.dsize;
    
    do {
      /* set up cptr */
      cptr -= LMSIZE;
      /* get the actual mark */
      bcopy(cptr, &lmark, LMSIZE);
      if (lmark & DELETEMASK) {
	/* tuple is deleted on this level */
	dstat=1;
	lmark &= ACTIVEMASK; /* skip the D-Bit */
      } else dstat=0;
      if (!dstat) cptr -= rel->attsize + sizeof(fd_set);
      aborted = isaborted(rel, lmark);
    } while (aborted && cptr > contents.dptr);
        
    if (aborted) { 
      if (dstat) 
	copysize=contents.dsize - LMSIZE;
      else
	copysize=contents.dsize - LMSIZE - rel->attsize - sizeof(fd_set);
    } else {
      if (dstat)  {
	copysize=contents.dsize;
      } else {
	fprintf(stderr, "ENGINE: cannot add, tuple exists\n");
	return(-1);
      }
    }
    lmark = rel->level;
    lmark &= ACTIVEMASK;
    
    /* we need some space to write the new tuple */
    if ((newcont.dptr = (char*)malloc(copysize 
				      + rel->attsize
				      + sizeof(fd_set) 
				      + LMSIZE)) == NULL) {
      perror("ENGINE");
      return(-1);
    }
	
    newcont.dsize = copysize + sizeof(fd_set) + rel->attsize + LMSIZE;
    bcopy(contents.dptr, newcont.dptr, copysize);
    bcopy(atts, newcont.dptr + copysize, rel->attsize + sizeof(fd_set));
    bcopy(&lmark, newcont.dptr + copysize + rel->attsize + sizeof(fd_set), 
	  LMSIZE);
    if (gdbm_store(rel->relfd, key, newcont, GDBM_REPLACE) == -1) {
      fprintf(stderr, "ENGINE: %s\n", gdbm_strerror(gdbm_errno));
      return(-1);
    }
  } else {
    lmark = rel->level;
    lmark &= ACTIVEMASK;
    
    /* we need LMSIZE more space to write the new tuple */
    if ((newcont.dptr = (char*)malloc(rel->attsize + sizeof(fd_set) + LMSIZE)) 
	== NULL) {
      perror("ENGINE");
      return(-1);
    }
    
    newcont.dsize = rel->attsize + sizeof(fd_set) + LMSIZE;

    bcopy(atts, newcont.dptr, rel->attsize + sizeof(fd_set));
    bcopy(&lmark, newcont.dptr + rel->attsize + sizeof(fd_set), LMSIZE);
    if (gdbm_store(rel->relfd, key, newcont, GDBM_INSERT) == 1) {
      fprintf(stderr, "ENGINE: %s\n", gdbm_strerror(gdbm_errno));
      return(-1);
    }
  }
  return(0);
}

int eng_del(Relation *rel, char *pkey)
{
  datum key, newcont, contents;
  char *cptr;
  int dstat, aborted, copysize;
  unsigned lmark;
  
  key.dsize=rel->pksize;
  key.dptr=pkey;

  contents = gdbm_fetch(rel->relfd, key);
  if (contents.dptr != NULL) { /* an entry exists, check if we can 
				  delete it */
    lmark=0;
    cptr=contents.dptr + contents.dsize;

    do {
      /* set up cptr */
      cptr -= LMSIZE;
      /* get the actual mark */
      bcopy(cptr, &lmark, LMSIZE);
      if (lmark & DELETEMASK) {
	/* tuple is deleted on this level */
	dstat=1;
	lmark &= ACTIVEMASK; /* skip the D-Bit */
      } else dstat=0;
      if (!dstat) cptr -= rel->attsize + sizeof(fd_set);
      aborted = isaborted(rel, lmark);
    } while (aborted && cptr > contents.dptr);
    
    if (aborted) { 
      if (dstat) 
	copysize=contents.dsize - LMSIZE;
      else
	copysize=contents.dsize - LMSIZE - rel->attsize - sizeof(fd_set);
    } else {
      if (!dstat) {
	copysize=contents.dsize;
      } else {
	fprintf(stderr, "ENGINE: cannot delete, tuple does not exists\n");
	return(-1);
      }
    }

    /* mark the tuple as deleted on the new level */
    lmark = rel->level;
    lmark |= DELETEMASK;

    /* we need LMSIZE more space to write the delete update */
    if ((newcont.dptr = (char*)malloc(copysize + LMSIZE)) == NULL) {
      perror("ENGINE");
      return(-1);
    }
    newcont.dsize = copysize + LMSIZE;
    bcopy(contents.dptr, newcont.dptr, copysize);
    bcopy(&lmark, newcont.dptr + copysize, LMSIZE);
    
    if (gdbm_store(rel->relfd, key, newcont, GDBM_REPLACE) == -1) {
      fprintf(stderr, "ENGINE: %s\n", gdbm_strerror(gdbm_errno));
      return(-1);
    }
    free(newcont.dptr);
    return(0);
  } else {
    fprintf(stderr, "ENGINE: tuple not found\n");
    return(-1);
  }
}

int eng_upd(Relation *rel, char *opkey, char *pkey, char *atts)
{
  datum key, newcont, contents;
  char *cptr;
  int dstat, aborted, copysize;
  int lmark;

  if (bcmp(opkey, pkey, rel->pksize) == 0) { /* primaries are identical */
    
    key.dsize=rel->pksize;    
    key.dptr=pkey; /* pkey or opkey, no matter */
    
    contents = gdbm_fetch(rel->relfd, key);
    if (contents.dptr != NULL) { /* an entry exists */
      
      lmark=0;
      cptr=contents.dptr + contents.dsize;
      
      do {
	/* set up cptr */
	cptr -= LMSIZE;
	/* get the actual mark */
	bcopy(cptr, &lmark, LMSIZE);
	if (lmark & DELETEMASK) {
	  /* tuple is deleted on this level */
	  dstat=1;
	  lmark &= ACTIVEMASK; /* skip the D-Bit */
	} else dstat=0;
	if (!dstat) cptr -= rel->attsize + sizeof(fd_set);
	aborted = isaborted(rel, lmark);
      } while (aborted && cptr > contents.dptr);
      
      if (aborted) { 
	if (dstat) 
	  copysize=contents.dsize - LMSIZE;
	else
	  copysize=contents.dsize - LMSIZE - rel->attsize - sizeof(fd_set);
      } else 
	copysize=contents.dsize;
      
      /* mark the tuple as deleted on the new level */
      lmark = rel->level;
      lmark &= ACTIVEMASK;
	  
      if ((newcont.dptr = (char*)malloc(copysize 
					+ LMSIZE 
					+ rel->attsize 
					+ sizeof(fd_set))) == NULL) {
	perror("ENGINE");
	return(-1);
      }
      newcont.dsize = copysize + LMSIZE + rel->attsize + sizeof(fd_set);
      bcopy(contents.dptr, newcont.dptr, contents.dsize);
      bcopy(atts, newcont.dptr + copysize, rel->attsize + sizeof(fd_set));
      bcopy(&lmark, newcont.dptr + copysize + rel->attsize 
	    + sizeof(fd_set), LMSIZE);
      if (gdbm_store(rel->relfd, key, newcont, GDBM_REPLACE) == -1) {
	fprintf(stderr, "ENGINE: %s\n", gdbm_strerror(gdbm_errno));
	return(-1);
      }
      free(contents.dptr);
      free(newcont.dptr);     
      return(0);
    } else {
      fprintf(stderr, "ENGINE: tuple not found\n");
      return(-1);
    }
  } else {
    if (eng_del(rel, opkey) == -1)
      return(-1);
    if (eng_add(rel, pkey, atts) == -1)
      return(-1);
  }
  return(0);
}

int eng_reset(char *path)
{
  GDBM_FILE relfd;
  int ctxfd;
  datum key, contents, resetcont;
  Relation *relation;
  char ctxfile[ENGNAMELEN], *cptr;
  unsigned lmark, resetlevel;
  int aborted, dstat, pos, num;
  unsigned tainit;
  char *dptr;

  strcpy(ctxfile, path);
  strcat(ctxfile, CTXSUF);
  
  if ((ctxfd = open(ctxfile, O_RDWR, 0)) == -1) {
    fprintf(stderr, "ENGINE: contextfile %s, %s\n",
	    ctxfile, strerror(errno));
    return(-1);
  }
  
  if ((relation = (Relation*)malloc(sizeof(Relation))) == NULL) {
    fprintf(stderr, "ENGINE: malloc error for %s, %s\n", 
	    path, strerror (errno));
    return(-1);
  }

  /* read the header */
  if (read(ctxfd, (char*)relation, sizeof(Relation)) == -1) {
    fprintf(stderr, "ENGINE: read error for %s, %s\n", 
	    path, strerror(errno));
    return(-1);
  }

  /* locking point */
  if (eng_lck(relation, EXCLUSIVE) == -1) {
    return(-1);
  }

  if ((relfd = gdbm_open(path, 512, GDBM_WRITER, 0664, 0)) == NULL) {
    fprintf(stderr, "ENGINE: %s\n", gdbm_strerror (gdbm_errno));
    return(-1);
  }

  /* read the TA field */
  if ((relation->tastats = (int*)malloc(relation->tasize * sizeof(unsigned))) == NULL) {
    fprintf(stderr, "ENGINE: malloc error for %s, %s\n", 
	    path, strerror (errno));
    return(-1);
  }
  
  pos = 0;
  num = 0;
  while ((num=read(ctxfd,(char*)(relation->tastats + pos), 
		   (relation->tasize * sizeof(unsigned)) - pos)) 
	 != 0) {
    pos += num;
  }
  
  if (pos != (relation->tasize * sizeof(unsigned))) {
    fprintf(stderr, "ENGINE: tastats read error for %s, %s\n", 
	    path, strerror(errno));
    return(-1);
  }

  /* start reset here */

  
  key = gdbm_firstkey(relfd);
  
  while(key.dptr != NULL) { /* loop until all tuples are reset */
     
    contents = gdbm_fetch(relfd, key);
    if (contents.dptr == NULL) 
      return(-1); /* something mystic occured */
    
    lmark=0;
    cptr=contents.dptr + contents.dsize;
    
    do {
      /* set up cptr */
      cptr -= LMSIZE;
      /* get the actual mark */
      bcopy(cptr, &lmark, LMSIZE);
      if (lmark & DELETEMASK) {
	/* tuple is deleted on this level */
	dstat=1;
	lmark &= ACTIVEMASK; /* skip the D-Bit */
      } else dstat=0;
      if (!dstat) cptr -= relation->attsize + sizeof(fd_set);
      aborted = isaborted(relation, lmark);
    } while (aborted && cptr > contents.dptr);

    if (!dstat && !aborted) {
      resetcont.dptr=cptr;
      resetlevel=1;
      resetlevel &= ACTIVEMASK; /* not really necessary */

      resetcont.dsize=relation->attsize + sizeof(fd_set) + LMSIZE;
      resetcont.dptr=cptr;
      bcopy(&resetlevel, cptr + relation->attsize + sizeof(fd_set), LMSIZE);

      if (gdbm_store(relfd, key, resetcont, GDBM_REPLACE) != 0) {
	fprintf(stderr, "ENGINE: %s\n", gdbm_strerror(gdbm_errno));
	return(-1);
      }
      dptr = key.dptr;
      key = gdbm_nextkey(relfd, key);
      free(dptr);
    } else {
      /* delete this entry */
      gdbm_delete(relfd, key);
      free(key.dptr);
      key = gdbm_firstkey(relfd);
    }
    free(contents.dptr);
  } 
  gdbm_close(relfd);

  /* reset the context file */

  relation->level=1;
  relation->tasize=1;

  close(ctxfd);

  if ((ctxfd = open(ctxfile, O_RDWR | O_TRUNC, 0)) == -1) {
    fprintf(stderr, "ENGINE: contextfile %s, %s\n",
	    ctxfile, strerror(errno));
    return(-1);
  }
    /* and store reseted head in context file*/
  if (write(ctxfd, (char*)relation, sizeof(Relation)) == -1) {
    fprintf(stderr, "ENGINE: cannot write head for relation %s, %s\n", 
	    relation->relname, strerror(errno));
    return(-1);
  }

  tainit=1;
  /* write the first TA entry to the ctx file */
  if (write(ctxfd, (char*)&tainit, sizeof(unsigned)) == -1) {
    fprintf(stderr, "ENGINE: cannot write TA status for relation %s, %s\n", 
	    relation->relname, strerror(errno));
  }
  close(ctxfd);  
  
  if (eng_unlck(relation) == -1) {
    return(-1);
  }
  free(relation);
  return(0);
}

/********************/
/* private routines */
/********************/

static int eng_lck(Relation *rel, int lock)
{
  char rpath[ENGNAMELEN];
  int lckfd;
  int flag;

  strcpy(rpath, LOCKPATH);
  strcat(rpath, rel->relname);
  strcat(rpath, LOCKSUF);

  if ((rel->lckfd = open(rpath, O_RDWR | O_CREAT, 0666)) == -1) {
    perror("ENGINE");
    return(-1);
  }
  if (lock == SHARED)
    flag=LOCK_SH;
  else if (lock == EXCLUSIVE)
    flag=LOCK_EX;
  else {
    fprintf(stderr, "ENGINE: unknown lock type\n");
    return(-1);
  }

  if (flock(rel->lckfd, flag | LOCK_NB) == -1) {
    if (errno == EWOULDBLOCK)
      fprintf(stderr, "ENGINE: relation %s is locked\n", rel->relname);
    else
      perror("ENGINE");
    return(-1);
  }
  return(0);
}

static int eng_unlck(Relation *rel)
{
  char rpath[ENGNAMELEN];
  int lckfd;

  strcpy(rpath, LOCKPATH);
  strcat(rpath, rel->relname);
  strcat(rpath, LOCKSUF);
  
  if (flock(rel->lckfd, LOCK_UN) == -1) {
    perror("ENGINE");
    return(-1);
  }
  close(rel->lckfd);
  return(0);
}


static int isaborted(Relation *rel, unsigned ta)
{
  int i=0, j=0;

  /* we ourself NOT aborted */
  if (ta == rel->level)
    return(0);

  while (i < ta) {
    i+=rel->tastats[j];
    j++;
  }
  return((j+1) % 2);
}  

/* to be compliant */
static char *strerror(int errno) 
{
  return(sys_errlist[errno]);
}
