/* Copyright 1983 by David Zittin, Biosciences Data Centre. */


/* purpose:a very watered down version of AWK, but what it can do it does
 * approx 5-8 times faster than AWK and takes much less core space.
 * qawk, therefore, is great on small machines to extract fields from 
 * awkable data files.  we found that a large proportion of our awk programming
 * was simple data extraction with reformatting and arithmetic transformations
 * and real AWK was a resource hog on our 11/45; qawk helped solve this problem.
 * usage:
 *	qawk [-v] args...
 * qawk, quietly turns over good and bad AWK programs to real AWK if it decides
 * it cannot handle the user program. qawk, therefore, does little 
 * error reporting. the -v flag will cause qawk to print a message if it
 * execs real awk. at present qawk will only process unconditional print
 * and printf statements with no arithmetic expression capabilities (i told
 * you it was watered down). all of the AWK built in arithmetic functions
 * plus a random number generator are available. 
 * Future plans (not yet implemented): pattern capabilities??
 * examples of programs qawk will handle:
 *	$ qawk '-F|' '{print $1, $6, sqrt($NF)}'
 *	$ qawk '{printf("%d %20s %12.2f\n", NR, $NF, log($3))}'
 *	$ qawk '{print NR, NF, $0}'
 *	$ qawk '{print rand(3), $0)}'	 (print rand# in front of record,seed=3)
 */

#include <stdio.h>
#include <ctype.h>
#include <debug.h>

#define DOLLAR0		-1
#define NR		-2
#define NF		-3
#define DOLLARNF	-4
#define DOLLARNR	-5
#define FILENAME	-6
#define LENGTH		-7
#define RAND		-8
#define FLOAT	1
#define DIGIT	2
#define STRING	3
#define NUMERIC	4
#define OPENBRACE	'{'
#define CLOSEBRACE	'}'
#define OPENPAR		'('
#define CLOSEPAR	')'

#define MAXREC	1024
char *record[MAXREC];
char *recptr;
#define MAXFLDS	120
char *bp[MAXFLDS];		/* place to store pointers to parsed fields */
char FS=' ';			/* default field separator */
int line;			/* lineno in data stream */
int dollar0;			/* true if there is a reference to $0 in prog */
char *progname;
int fmtflg;			/* true if a printf statement is in program */
struct proglist {
	struct proglist *next;
	int	field,		/* field no. - 1 */
		type,		/* as determined from printf fmt ctrl char */
		special;	/* special conditions e.g. $NF */
	char *outfmt;
	double (*func)();	/* function to subject field to */
} *head;

double	sqrt(),			/* the available arithmetic functions */
	log(),
	log10(),
	exp(),
	asinsqrt(),
	asin(),
	sin(),
	atan(),
	fabs(),
	tan(),
	cos(),
	acos();

struct fmap {		/* map string into a function */
	char *fname;	double (*funcptr)();
};

struct fmap flist[] = {		/* map of user string into C function */
	"sqrt",		sqrt,
	"log",		log,
	"log10",	log10,
	"exp",		exp,
	"length",	NULL,
	"asinsqrt",	asinsqrt,
	"asin",		asin,
	"sin",		sin,
	"atan",		atan,
	"fabs",		fabs,
	"tan",		tan,
	"cos",		cos,
	"acos",		acos,
	"rand",		NULL,
	"",		NULL
};


char	*rindex(), *save(), *strsave(), *fmtctl(), lastchar(),
	*myfgets(), *malloc(), *index(), *notblank(), *any(),
	*strcpy(), *nwstrcpy();

char *noprogram = "no program";
char **cmdline, *binawk="/bin/awk";
char *nospace="qawk:nospace";
char *tail;	/* tail end of printf control string */
char *datafile="-";
int ncmdargs;
int vflg;
FILE *myopen();


main(argc,argv)
int argc;
char *argv[];
{
	register char *argp;
	register int i;
	FILE *fp=NULL;
	int compswitch=0;
	
	cmdline = argv;
	ncmdargs = argc;

	progname = argv[0];
	for(i=1; i < argc; i++) {
		argp = argv[i];
		if(*argp == '-') {
			switch(*++argp){

			case 'd':	++debug;
					break;
			case 'v':	++vflg;
					break;
			case 'F':	FS = *++argp;
					break;
			case 'f':	exec_awk("a");	
					break;
			default:	exec_awk("Unknown flag");
					break;
			}
		}
		else {
			if(!compswitch) {
				++compswitch;
				compile(argp);
			}
			else {
				if(!compswitch)
					exec_awk(noprogram);
				datafile = argp;
				fp=myopen(fp, datafile, "r");
				qawk(fp);
			}
		}
	}
	if(!fp) {
		if(!compswitch)
			exec_awk(noprogram);
		fp = myopen(fp, datafile, "r");
		qawk(fp);
	}
}

compile(s)
register char *s;
{
	register struct proglist *tmp;
	char *fmtptr=s, *tmpstr;

	awkcheck(s);

	TRACEF(("just finished awk checking %s", s));
	if(fmtflg) {
		s = rindex(s, '"');		/* find last quote */
		s = index(++s, ',') + 1;	/* find 1 past the comma */
	}
	else
		s = index(s, ' ');		/* find the blk after "print" */

	tmpstr = strsave(s);		/* send getargs a cpy of arg string */
	getargs(tmpstr, bp, MAXFLDS);		/* get the awk args */
	free(tmpstr);	/* free the tmp string */

	if(fmtflg) {
		TRACEF(("starting fmtcontrol scanf at string '%s'",fmtptr));
		fmtptr = index(fmtptr, '"');
		*rindex(++fmtptr, '"') = '\0';		/* null 2nd quote */

		for(tmp=head; tmp; tmp = tmp->next) {

				/* too few ctl chars */
			if(!(tmp->outfmt = fmtctl(&fmtptr)))
				exec_awk("7");

			TRACEF(("outfmt:fld %d='%s'\n",tmp->field,tmp->outfmt));
			TRACEF(("lastchar of fmt segment=%c\n",lastchar(tmp->outfmt)));
			switch(lastchar(tmp->outfmt)) {
			case 	'g' :
			case	'f' :
			case	'e' :	tmp->type = FLOAT;
					break;
			case	'd' :
			case	'x' :
			case	'o' :	tmp->type = DIGIT;
					break;
			case	's' :	tmp->type = STRING;
					break;
			default:	exec_awk("8");
					break;
			}
			TRACEF(("\t\ttype=%d", tmp->type));
		}

		tail = fmtctl(&fmtptr);
		TRACEF(("fmt control tail string='%s'", tail));

		if(fmtctl(&fmtptr)) 	/* more ctl char than args */
			exec_awk("9");
	}
#ifdef DEBUG
if(debug){
	printf("\n+\tCompiled fields structs=");
	for(tmp=head; tmp;tmp = tmp->next)
		printf("\n+\t\tfield=%d\tspecial=%d\toutfmt=%s",
				tmp->field,tmp->special,tmp->outfmt);
	printf("\n+\tend of Compiled field structs\n");
}
#endif
}

			/* given addess of ptr to a fmt str, return
			 * saved str ptr to each segment of the string
			 * ending in a fmt control char. the caller's
			 * ptr is updated to one char past the segment saved
			 */
char *fmtctl(p)
char **p;
{
	register char *start = *p, *endstr = *p;
	char tmp;

	if (!*p)
		return((char *) 0);
retry:
	if(!(endstr=index(endstr,'%'))) {		/* find % */
		*p = (char *) 0;
		return(save(start));
	}
	if(*++endstr == '%') {		/* leave %% alone */
		++endstr;
		goto retry;
	}
	while(!isalpha(*endstr))	/* consume width specs */
		++endstr;
	tmp = *++endstr;		/* save 1 char beyond ctl char */
	*endstr = '\0';			/* null this char to make str */
	start = save (start);		/* save the fmt segment */
	*endstr = tmp;			/* restore the nulled char */
	*p = endstr;			/* update the caller's str ptr */
	return(start ? start : err(nospace) );
}

getargs(s, bp, bpsiz)	/* parse out the awk field arg list */
char *s, *bp[];
int bpsiz;
{
	register char *tmp;
	register int nargs, fld;
	char *nwstr;

	if(!(nwstr=malloc(strlen(s)+1)))
		err(nospace);
	nwstrcpy(nwstr, s);

			/* parse on comma separators */
	if((nargs = gparse(nwstr, bp, bpsiz, ',')) < 1)
		exec_awk("arg count");

	fld = nargs - 1;	/* clean up parenthesis/brace of last arg */

	if((tmp=(fmtflg) ?
		(rindex(bp[fld],CLOSEPAR))
		: (rindex(bp[fld],CLOSEBRACE))))
				*tmp = '\0';

	for(fld=0; fld < nargs; fld++)
		argprocess(bp[fld]);

	free(nwstr);
	return(nargs);
}

			/* look at each field of print(f) arg list.
			 * make a linked list for later arg printing.
			 * recognize special AWK conditions like NF,
			 * NR, $NF, etc.  Also recognize math and length
			 * functions.  Set appropriate members of each
			 * list element so that the data field can be processed
			 * accordingly.
			 */
argprocess(s)
register char *s;
{
	register struct proglist *tmp;
	register char *c;
	static struct proglist *prev;
	char *numstr;
	int i;


				/* alloc list node */
	if(!(tmp=(struct proglist *) malloc( sizeof(struct proglist))))
		err(nospace);

	tmp->special = 0;	/* zero for safety, used for a flag below */
	if(!head)		/* link */
		head = tmp;
	else
		prev->next = tmp;

	prev = tmp;

retry:
	TRACEF(("argproccessing '%s' \t tmp->special=%d", s, tmp->special));
	if(*s == '$') {		/* special awk constructs beginning with '$' */
		if(!strcmp(++s, "NF")) {
			tmp->field = DOLLARNF;
			return;
		}
		if(!strcmp(s, "NR")) {
			tmp->field = DOLLARNR;
			return;
		}

		numstr = s;		/* arg of the form $digits */
		while(isdigit(*s))
			++s;
		if(*s || numstr == s) 	/* no digits! */
			goto error;
		if( (tmp->field = cvtint(&numstr) - 1) == DOLLAR0)
			++dollar0;
		return;
	}
	else {	/* dealing with other AWK constructs or seeding rand gentr */
		if(!strcmp(s, "FILENAME")) {
			tmp->field = FILENAME;
			return;
		}
		if(!strcmp(s, "NF")) {
			tmp->field = NF;
			tmp->type = NUMERIC;
			return;
		}
		if(!strcmp(s, "NR")) {
			tmp->field = NR;
			tmp->type = NUMERIC;
			return;
		}
		if (tmp->special == RAND) {
			TRACEF(("s='%s'", s));
			if (sscanf( s, "%d", &i) != 1)
				goto error;
			srand(i);			/* seed randnum gentr */
			return;
		}

		if((c=index(s, OPENPAR))) {	/* dealing with a function */
			*c = '\0';			/* null the openpar */
			tmp->type = NUMERIC;
			for(i=0; *(flist[i].fname); i++)
				if(!strcmp(s, flist[i].fname))
					break;
			if(!*flist[i].fname)
				goto error;
			TRACEF(("using function '%s'", flist[i].fname));
			tmp->func = flist[i].funcptr;

			if( !strcmp(s,"length") )
				tmp->special = LENGTH;
			else if ( !strcmp(s, "rand") )
				tmp->special = RAND;

			s = ++c;		/* set "s" to openpar+1 */
			if(!(c=index(s, CLOSEPAR)))
				goto error;
			*c = '\0';
			TRACEF(("retry %s", s));
			goto retry;	/* now proccess the funct argument */
		}
	}
error:
	exec_awk("funny arg");
}

		/* ugly checks to make sure the qawk program 
		 * looks like a legal awk prog 
		 * DO NOT DIDDLE WITH ANY CHARS IN THE STRING!
		 */
awkcheck(s)
register char *s;
{
	register char *tmp;
	TRACEF(("awkchecking=%s\n",s));

	if(*(s = notblank(s)) != OPENBRACE)
		exec_awk(s);

	if(strncmp((s=notblank(++s)), "print ", 6) &&
		!(fmtflg = !strncmp(s, "printf", 6))) 
			exec_awk(s);

	TRACEF(("printf flg=%d\n", fmtflg));

				/* printf ( "controlstr" , */
	if(fmtflg) {		/* printf statement look +- ok? */
			/* 1st comes the '(', then the '"' */
		if(*(s=notblank(s+6))!=OPENPAR && *(tmp=s=notblank(++s)) != '"')
			exec_awk(s);
					/* start from 1st quote & find last " */
		if((s = rindex(s, '"')) <= tmp)
			exec_awk(s);
		if(*(s=notblank(++s)) != ',')	/* comma after quote? */
			exec_awk(s);
		if(!(s=rindex(++s, CLOSEPAR)))	/* find last close par */
			exec_awk(s);
		if(*notblank(++s) != CLOSEBRACE)
			exec_awk(s);

	}
	if(!(s = index(s, CLOSEBRACE)) || *notblank(++s) )
		exec_awk("brace");
}

exec_awk(s) char *s;{

#ifdef DEBUG
	int i;
if(debug) {
	cmdline[0] = binawk;
	fprintf(stderr,"\nquit string=%s \t nargs=%d\n", s, ncmdargs);
	fprintf(stderr,"Can't COPE!...exec-ing %s with arguments:\n",binawk); 
	for(i=0; i < ncmdargs; i++)
		fprintf(stderr," %s", cmdline[i]);
	fprintf(stderr,"\n");
}
#endif
	if(vflg)
		fprintf(stderr,"execing /bin/awk (%s)\n", s);
	cmdline[0] = binawk;
	cmdline[ncmdargs] = (char *) 0;
	execv(binawk, cmdline);
	err("%s:No %s",progname, binawk);
}

char *notblank(s)
register char *s;
{
	while(isspace(*s++))
		;
	return(--s);
}

#define FIELDNO(n) (n<nf)?(bp[n]):("")

qawk(dataf)
FILE *dataf;
{
	register struct proglist *p;
	register char *s, c;
	int nf;
	double x;

	if(dollar0 && !recptr && !(recptr=malloc(MAXREC)))
		err(nospace);

	while(getln(record, MAXREC, dataf) != EOF) {
		++line;
		if(dollar0)
			strcpy(recptr, record);
		nf = gparse(record, bp, MAXFLDS, FS);
		for(p=head; p; p = p->next) {

			x = -1.0;	

			switch(p->field) {
			case NR:	x = line;
					break;
			case NF:	x = nf;
					break;
			case FILENAME:	s = datafile;
					break;
			case DOLLARNF:	s = bp[(nf-1 >= 0) ? (nf-1) : 0];
					break;
			case DOLLARNR:	s = FIELDNO(line-1);
					break;
			case DOLLAR0:	s = recptr;
					break;
			default:	s = FIELDNO(p->field);
					break;
			}

			switch(p->special) {
			case LENGTH:	x = strlen(s);
					break;
			case RAND:	x = rand();
					break;
			}

			if(!fmtflg) {
				if(p != head)
					putc(' ',stdout);
				if(p->type == NUMERIC) {
					if(x < 0.0 && sscanf(s, "%F", &x) != 1)
						x = 0.0;
					if(p->func) x = (*p->func)(x);
					printf("%g", x);
				}
				else
					while(c = *s++)
						putc(c, stdout);
			}
			else
				switch(p->type) {
				case STRING:
					printf(p->outfmt, s);
					break;
				case DIGIT:
					if(x < 0.0 && sscanf(s, "%F",&x) != 1)
						x = 0.0;
					if(p->func) x = (*p->func)(x);
					printf(p->outfmt, (int) x);
					break;
				case FLOAT:
					if(x < 0.0 && sscanf(s, "%F",&x) != 1)
						x = 0.0;
					if(p->func) x = (*p->func)(x);
					printf(p->outfmt, x);
					break;
				}
			}
			if(fmtflg)
				for(s=tail; (c = *s++); )
					putc(c, stdout);
		else
			putc('\n',stdout);
	}
}
			

char lastchar(s)
register char *s;
{
	while(*s++)
		;
	--s;
	return(*--s);
}

char *nwstrcpy(newbuf, s)
register char *newbuf, *s;
{
	register char c = *s;
	char *newbase = newbuf;

	for(c = *s; c; c = *++s)
		if(!isspace(c))
			*newbuf++ = c;
	*newbuf = '\0';
	return(newbase);
}

double asinsqrt(x)
double x;
{
	return( asin(sqrt(x)) );
}
