
	12) Sequence Number generation is Slow

From: teklabs!decvax!ittvax!swatt
Date: Thu Jul  1 13:07:20 1982
Subject: UUCP sequence number bogs
Newsgroups: net.bugs
References: gename.c


Some time ago several people mentioned that sequence numbers  can
wrap  around  too  quickly  on busy systems and possibly generate
duplicate names.  Decvax simply went  to  hex  for  the  sequence
numbers  and  ran  into  no  problems.   I wrote some routines to
manage the 4-digit sequence number in base 62  to  be  absolutely
sure wraparound would be a long time coming.

As a side effect of all this, I noticed that uux,  in  particular
is very stupid about the way it grabs sequence numbers: it gets 6
sequence  numbers  for  the typical command (such as used by mail
and news), but only uses 3.  Since the sequence number  file  has
to  be  locked for each new number, uux spends a lot of time just
getting sequence numbers.

With sequence numbers in base 62, I decided to look into grabbing
numbers in a HUNK, instead of  1  at  a  time.   Below  are  some
comparisons  (run  during  day  on  VAX780  running  4.1bsd, RP06
disks):

First, the time to generate 100 sequence numbers the old way:

[ Many lines of profil output has been deleted here for the sake of
brevity - contact the author or the editor for the full justification.]

Here is the code for gename.c, simply replace your existing  file
with  the new version.  You can experiment with the included test
main yourself.  The original  also  had  a  bug  such  that  9999
wrapped around to 1000 instead of 0.

	- Alan S. Watt

=================================================================
	/*  gename 3.4  01/15/82 (ITT)  */

#ifdef TEST
# define finish(x)	exit(x)
# define DEBUG(a,b,c)	;
# define SEQLOCK	"SEQLCK"
# define SEQFILE	"SEQFILE"
# include "uucplock.c"
#else !TEST
# include "uucp.h"
# ifdef ITTVAX
#  define TSSEQ
#  define SEQHUNK 10
#  define BASE 62
# endif ITTVAX
#endif TEST


/*******
 *	gename(pre, sys, grade, file)	generate file name
 *	char grade, *sys, pre, *file;
 *
 *	return codes:  none
 */

gename(pre, sys, grade, file)
char pre, *sys, grade, *file;
{
	char sqnum[5];

	getseq(sqnum);
	sprintf(file, "%c.%.7s%c%.4s", pre, sys, grade, sqnum);
	DEBUG(4, "file - %s\n", file);
	return;
}


#define SLOCKTIME 10L
#define SLOCKTRIES 5
#define SEQLEN 4

/*******
 *	getseq(snum)	get next sequence number
 *	char *snum;
 *
 *	return codes:  none
 *
 * Fri Jan 15 15:34:02 EST 1982 ittvax!swatt:
 * if "TSSEQ" is defined, use "ts" routines to keep sequence numbers
 * in either base 36 or 62.
 * if "SEQHUNK" is defined, then sequence numbers are allocated in
 * hunks of that size.  Using SEQHUNK on very busy systems is not
 * advisable unless TSSEQ is also enabled.
 *
 * Also fix so wraparound is correct; the old code did 9999=>1000
 *
 * The hunk idea saves a lot of filesystem activity.  To get a
 * new sequence number from the sequnce file invovles:
 *
 *  1)	Lock the file (creat + link)
 *  2)	Open file
 *  3)	Read from file
 *  4)	Reopen file for writing
 *  5)	Change mode to 0666
 *  6)	Write to file
 *  7)  Unlock file (unlink)
 */

#ifndef SEQHUNK
# define SEQHUNK 1
#endif !SEQHUNK

getseq(snum)
char *snum;
{
	FILE *fp;
#ifdef TSSEQ
	typedef unsigned long int ts_t;
	ts_t atots();
	char *tstoa();
#else !TSSEQ
	typedef int ts_t;
#endif TSSEQ
	char tseqbuf[64];
	static ts_t n;
	static int nseq = 0;

	if (nseq <= 0) {
		for (n = 0; n < SLOCKTRIES; n++) {
			if (!ulockf( SEQLOCK, SLOCKTIME))
				break;
			sleep(5);
		}

		ASSERT(n < SLOCKTRIES, "CAN NOT GET", SEQLOCK, 0);

		/* @@@(ittvax!swatt):
		 * can save something by using "r+" on fopen
		 */
		if ((fp = fopen(SEQFILE, "r")) != NULL) {
#ifdef TSSEQ
			fgets (tseqbuf, sizeof tseqbuf, fp);
			n = atots (tseqbuf);
#else !TSSEQ
			/* read sequence number file */
			fscanf(fp, "%4d", &n);
#endif TSSEQ
			fp = freopen(SEQFILE, "w", fp);
			ASSERT(fp != NULL, "CAN NOT OPEN", SEQFILE, 0);
			chmod(SEQFILE, 0666);
		}
		else {
			/* can not read file - create a new one */
			if ((fp = fopen(SEQFILE, "w")) == NULL)
				/* can not write new seqeunce file */
				return(FAIL);
			chmod(SEQFILE, 0666);
			n = 0;
		}
#ifdef TSSEQ
		tstoa (n+SEQHUNK, tseqbuf, SEQLEN);
#else !TSSEQ
		sprintf (tseqbuf, "%04d", n+SEQHUNK);
#endif TSSEQ
		/* discard high order digits on overflow */
		while (strlen (tseqbuf) > SEQLEN)
			strcpy (tseqbuf, tseqbuf+1);
		fprintf (fp, "%s", tseqbuf);
		fclose (fp);
		rmlock (SEQLOCK);
		nseq = SEQHUNK;
	}

#ifdef TSSEQ
	/* Convert n to base 36 (or base 62) digit string */
	tstoa (n, tseqbuf, SEQLEN);
#else !TSSEQ
	sprintf (tseqbuf, "%04d", n);
#endif TSSEQ
	/* discard high-order digits on overflow */
	while (strlen (tseqbuf) > SEQLEN)
		strcpy (tseqbuf, tseqbuf+1);
	strcpy (snum, tseqbuf);
	++n;
	--nseq;
	return(0);
}

#ifdef TSSEQ
/*********************************************************************
function:	ts
description:	Convert between binary integers and base thirty-six
		strings (hence the name "ts")
programmer:	Alan S. Watt

history:
	01/14/82	original version
*********************************************************************/


/* Alphanumeric string-to-integer conversion routines */

/* BASE is either 36 or 62
 * Base 36 allows magnitudes from 0 to 1679615
 * Base 62 allows magnitudes from 0 to 14776365
 */
#ifndef BASE
# define BASE	62
#endif BASE
#define EOS	'\0'
static char tsdigits[]	=
	"0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz";

#include <ctype.h>

/* Ordinal values for characters
 * Change for non-ASCII
 */
#define DIGORD(c)	(c-'0')
#define UCORD(c)	(c-'A')
#define LCORD(c)	(c-'a')

typedef unsigned long int ts_t;

/* Convert TS string to binary integer */
ts_t
atots (str)
register char *str;
{

	register ts_t ret;
	register digit;

	for (ret = 0; *str != EOS; str++) {
		if (isupper (*str))
			digit = UCORD(*str) + 10;
		else if (islower (*str))
			digit = LCORD(*str) + 10 + (BASE-36);
		else if (isdigit (*str))
			digit = DIGORD(*str);
		else
			break;
		ret = ret * BASE + digit;
	}
	return (ret);
}

/* Convert binary integer to TS string.
 * String is left-padded with zeros up to the
 * width given by 'prec'.  Truncation does not
 * take place here; calling routine must do it
 * if required.
 */
char *
tstoa (num, buf, prec)
register ts_t num;
register char *buf;
int prec;
{
	register ts_t quot = num / BASE;

	if (--prec > 0 || quot != 0)
		buf = tstoa (quot, buf, prec);
	*buf++ = tsdigits[num % BASE];
	*buf = EOS;
	return (buf);
}
#endif TSSEQ

#ifdef TEST
#include <stdio.h>
main (argc, argv)
int argc;
char **argv;
{
	char buf[128];
	int nseq;

	if (argc > 1)
		nseq = atoi (argv[1]);
	else
		nseq = 10;
	while (nseq-- > 0) {
		gename ('C', "ittvax", 'n', buf);
		if (argc < 3)
			printf ("%s\n", buf);
	}
}
#endif TEST
=================================================================




From: teklabs!decvax!ittvax!swatt
Date: Tue Jul 20 09:20:19 1982
Subject: your uucp/gename.c
Newsgroups: net.bugs.uucp
References: gename.c

[I have removed many lines of letters that Alan got about
'unsigned long' ints not working on some compilers. - Mcg]


"unsigned long" is only recognized by the portable C compiler, and
even then I think not all versions do so.

The "unsigned" qualifier is unnecessary for base 62 sequence numbers
of length 4 (max == 14 million something).  You can just change that
to "typedef long int ts_t" and it should all work.

There are also some complaints about "mismatched arguments" to the
ASSERT macro.  The ASSERT calls are all as I found them; I think
the mismatch only occurs in the test versions.

Here is the new source with the change; I've tested it to be sure
that "zzzz" wraps around to "0000" as it should.  This is on a VAX
of course; you should verify it all works on your pdp-11.  This is
what the "#ifdef TEST" junk is for; you can generate a local test
copy.  It expects to include the file "uucplock.c", which on most
installations is "ulockf.c" in the uucp source area.

	- Alan
======================================================================
	/*  gename 3.3  1/5/80  13:51:41  */


[ An old version was included here ]


