/*
 * yylex and supporting cast
 *
 * adapted from PCC
 * Alan Hastings 2/12/85
 */

static char *sccsid ="@(#)scan.c	1.3 (Berkeley) 12/24/82";

#include "defs.h"
#include "y.tab.h"

/*
 * lexical actions for LXdope
 */

# define A_ERR  0		/* illegal character */
# define A_LET  1		/* saw a letter */
# define A_DIG  2		/* saw a digit */
# define A_1C   3		/* return a single character */
# define A_STR  4		/* string */
# define A_CC   5		/* character constant */
# define A_BCD  6		/* GCOS BCD constant */
# define A_SL   7		/* saw a / */
# define A_DOT  8		/* saw a . */
# define A_PL   9		/* + */
# define A_MI  10		/* - */
# define A_EQ  11		/* = */
# define A_NOT 12		/* ! */
# define A_LT  13		/* < */
# define A_GT  14		/* > */
# define A_AND 16		/* & */
# define A_OR  17		/* | */
# define A_WS  18		/* whitespace (not \n) */
# define A_NL  19		/* \n */

/*
 * character classes for LXmask
 */

# define LEXLET	 01
# define LEXDIG	 02
# define LEXOCT	 04
# define LEXHEX	010
# define LEXWS	020
# define LEXDOT	040

/*
 * local macros
 */
#ifdef ibm
# define CSMASK 0377
# define CSSZ 256
#else
# define CSMASK 0177
# define CSSZ 128
#endif

#define	LXTSZ	BUFSIZ

#define LEXVAL	yylval	/* lexical value shared with yyparse */

	/* fill LXtext from beginning */
#define Lxget(c,m)	(LXgcp=LXtext,Lxmore(c,m))
	/* true if bits are on for given character and mask */
#define Lxclass(ch, cl)	(LXmask[(ch)+1] & (cl))

/*
 * local types
 */

/* special character actions */
struct LXdope {
	short lxch;	/* the character */
	short lxact;	/* the action to be performed */
	short lxtok;	/* the token number to be returned */
	short lxval;	/* the value to be returned */
};

/*
 * global declarations
 */
private char		LXtext[LXTSZ];	/* buffer for partial line processing */
private char		*LXgcp;		/* global pointer into LXtext array */
private short		LXmask[CSSZ+1];	/* character class bitmap */
private struct LXdope	*LXcp[CSSZ+1];	/* index per character into LXdope */
private struct Intcon	LXintcon;	/* integer constant sent back to yyparse */
private struct Floatcon	LXfloatcon;	/* float constant */

/* primary actions on characters */
private struct LXdope LXdope[] = {	/* first 4 slots are "known" in Lxinit */
	'@',	A_ERR,	0,	0,	/* illegal characters go here... */
	'_',	A_LET,	0,	0,	/* letters point here */
	'0',	A_DIG,	0,	0,	/* digits point here */
	' ',	A_WS,	0,	0,	/* whitespace goes here */
	'\n',	A_NL,	0,	0,	/* special handling */
	'"',	A_STR,	0,	0,	/* character string */
	'\'',	A_CC,	0,	0,	/* character constant */
	'`',	A_BCD,	0,	0,	/* GCOS BCD constant */
	'(',	A_1C,	LP,	0,
	')',	A_1C,	RP,	0,
	'{',	A_1C,	LC,	0,
	'}',	A_1C,	RC,	0,
	'[',	A_1C,	LB,	0,
	']',	A_1C,	RB,	0,
	'*',	A_1C,	MUL,	MUL,
	'?',	A_1C,	QUEST,	0,
	':',	A_1C,	COLON,	0,
	'+',	A_PL,	PLUS,	PLUS,
	'-',	A_MI,	MINUS,	MINUS,
	'/',	A_SL,	DIVOP,	DIV,
	'%',	A_1C,	DIVOP,	MOD,
	'&',	A_AND,	AND,	AND,
	'|',	A_OR,	OR,	OR,
	'^',	A_1C,	ER,	ER,
	'!',	A_NOT,	UNOP,	NOT,
	'~',	A_1C,	UNOP,	COMPL,
	',',	A_1C,	CM,	CM,
	';',	A_1C,	SM,	0,
	'.',	A_DOT,	STROP,	DOT,
	'<',	A_LT,	RELOP,	LT,
	'>',	A_GT,	RELOP,	GT,
	'=',	A_EQ,	ASSIGN,	ASSIGN,
	-1,	A_1C,	0,	0,
};

/*
 * Lxinit - initialize tables
 *
 * First input line may be filename specification.
 */
public Lxinit()
{
	register struct LXdope *p;
	register i;
	register char *cp;

	/* set up character classes */
	Lxenter("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_$", LEXLET);
	Lxenter("0123456789", LEXDIG);
	Lxenter("0123456789abcdefABCDEF", LEXHEX);
	/* \013 should become \v someday; \013 is OK for ASCII and EBCDIC */
	Lxenter(" \t\r\b\f\013", LEXWS);
	Lxenter("01234567", LEXOCT);
	LXmask['.'+1] |= LEXDOT;

	/* make LXcp point to appropriate LXdope entry for each character */

	/* initialize error entries */
	for (i= 0; i<=CSSZ; ++i) LXcp[i] = LXdope;

	/* make unique entries */
	for (p=LXdope; ; ++p) {
		LXcp[p->lxch+1] = p;
		if (p->lxch < 0) break;
	}

	/* handle letters, digits, and whitespace */
	/* by convention, first, second, and third places */

	cp = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ$";
	while (*cp)
		LXcp[*cp++ + 1] = &LXdope[1];
	cp = "123456789";
	while (*cp)
		LXcp[*cp++ + 1] = &LXdope[2];
	cp = "\t\b\r\f\013";
	while (*cp)
		LXcp[*cp++ + 1] = &LXdope[3];

	/* first line might have title */
	Lxcontrol();
}

/*
 * Lxenter - initialize LXmask for character classes
 */
private Lxenter(s, m)
register char *s;
register short m;
{
	register c;

	while (c= *s++)
		LXmask[c+1] |= m;
}

/*
 * yylex - lexical analysis for yyparse
 */
public yylex()
{
	register lxchar;
	register struct LXdope *p;

	for (;;) {
		switch((p=LXcp[(lxchar=getchar())+1])->lxact) {

    onechar:
			ungetc(lxchar ,stdin);

		case A_1C:
			/*
			 * one-character actions (EOF comes here)
			 */
			LEXVAL.intval = p->lxval;
			Ltrace(printf("A_1C tok=%d lval=%d\n",p->lxval,p->lxtok));
			return(p->lxtok);

		case A_ERR:
			uerror("illegal character: %03o (octal)", lxchar);
			break;

		case A_LET: {
			Name np;
			Symbol sp;
			/* collect an identifier, check for reserved word, and return */
			Lxget(lxchar, LEXLET|LEXDIG);
			if ((lxchar=Lxres()) > 0) {
				return(lxchar); /* reserved word */
			}
			if (lxchar == 0) continue; /* ASM in-line processed */
			np = Nlookup(LXtext);
			sp = Symlookup(np, 0, Curlevel);
			/* return pointer to name in any case */
			LEXVAL.nameptr = np;
			/* check this name to see if it's really a typedef */
			if (sp != SNIL && sp->s_class==STYPEDEF) {
				Ltrace(printf("A_LET TYPE %s\n",Nstring(np)));
				return(TYPE);
			} else {
				Ltrace(printf("A_LET %s\n",Nstring(np)));
				return(NAME);
			}
		    }

		case A_DIG:
			/* collect a digit string, then look at last one... */
			LXintcon.i_value = 0;
			Lxget(lxchar, LEXDIG);
			return(Lxgetnum(getchar()));

		case A_DOT:
			/* look for a dot: if followed by a digit, floating point */
			lxchar = getchar();
			if (Lxclass(lxchar, LEXDIG)) {
				ungetc(lxchar,stdin);
				LXgcp = LXtext;
				return(Lxgetnum('.'));
			}
			goto onechar;

		case A_STR:
			/* string constant */
			Lxstr('"');
			return(STRING);

		case A_CC:
			/* character constant */
			Lxstr('\'');
			return(ICON);

		case A_BCD:
			/* GCOS BCD constant */
			return(Lxgetbcd());

		case A_SL:
			/* / or /* */
			if ((lxchar=getchar()) != '*') goto onechar;
			Lxcom();
		case A_WS:
			continue;

		case A_NL:
			++Lineno;
			Lxcontrol();
			continue;

		case A_NOT:
			/* ! or != */
			if ((lxchar=getchar()) != '=') goto onechar;
			LEXVAL.intval = NE;
			return(EQUOP);

		case A_MI:
			/* - or -- or -> */
			if ((lxchar=getchar()) == '-') {
				LEXVAL.intval = DECR;
				return(INCOP);
			}
			if (lxchar != '>') goto onechar;
			LEXVAL.intval=STREF;
			return(STROP);

		case A_PL:
			/* + or ++ */
			if ((lxchar=getchar()) != '+') goto onechar;
			LEXVAL.intval = INCR;
			return(INCOP);

		case A_AND:
			/* & or && */
			if ((lxchar=getchar()) != '&') goto onechar;
			return(LEXVAL.intval = ANDAND);

		case A_OR:
			/* | or || */
			if ((lxchar=getchar()) != '|') goto onechar;
			return(LEXVAL.intval = OROR);

		case A_LT:
			/* < or << or <= */
			if ((lxchar=getchar()) == '<') {
				LEXVAL.intval = LS;
				return(SHIFTOP);
			}
			if (lxchar != '=') goto onechar;
			LEXVAL.intval = LE;
			return(RELOP);

		case A_GT:
			/* > or >> or >= */
			if ((lxchar=getchar()) == '>') {
				LEXVAL.intval = RS;
				return(SHIFTOP);
			}
			if (lxchar != '=') goto onechar;
			LEXVAL.intval = GE;
			return(RELOP);

		case A_EQ:
			/* = or == or archaic op= */
			/* The =op operators are handled in the grammar */
			switch(lxchar = getchar()) {

			case '=':
				LEXVAL.intval = EQ;
				return(EQUOP);

			case '+':
				LEXVAL.intval = ASG PLUS;
				break;

			case '-':
				LEXVAL.intval = ASG MINUS;

			warn:
				if (Lxclass((lxchar=getchar()), (LEXLET|LEXDIG|LEXDOT))) {
					werror("ambiguous assignment: assignment op taken");
				}
				ungetc(lxchar ,stdin);
				break;

			case '*':
				LEXVAL.intval = ASG MUL;
				goto warn;

			case '/':
				LEXVAL.intval = ASG DIV;
				break;

			case '%':
				LEXVAL.intval = ASG MOD;
				break;

			case '&':
				LEXVAL.intval = ASG AND;
				break;

			case '|':
				LEXVAL.intval = ASG OR;
				break;

			case '^':
				LEXVAL.intval = ASG ER;
				break;

			case '<':
				if ((lxchar=getchar()) != '<') {
					uerror("=<%c illegal", lxchar);
				}
				LEXVAL.intval = ASG LS;
				break;

			case '>':
				if ((lxchar=getchar()) != '>') {
					uerror("=>%c illegal", lxchar);
				}
				LEXVAL.intval = ASG RS;
				break;

			default:
				goto onechar;

			}

			return(ASOP);

		default:
			cerror("yylex error, character %03o (octal)", lxchar);

		}

		/* ordinarily, repeat here... */
		cerror("out of switch in yylex");
	}
}

/*
 * Lxmore - snarf up characters of some class
 */
private Lxmore(c, m)
register c, m;
{
	register char *cp;

	*(cp = LXgcp) = c;
	while (c=getchar(), Lxclass(c, m)) {
		if (cp < &LXtext[LXTSZ-1]) {
			*++cp = c;
		}
	}
	ungetc(c,stdin);
	*(LXgcp = cp+1) = '\0';
}

/*
 * Lxstr - match a string or character constant, bounded by flagchar
 */
private Lxstr(flagchar)
Integer flagchar;
{
	register c;
	register val;
	register i;
	char strcon[LXTSZ];

	i=0;
	while ((c=getchar()) != flagchar) {
		switch(c) {

		case EOF:
			uerror("unexpected EOF in char or string");
			break;

		case '\n':
			uerror("newline in string or char constant");
			++Lineno;
			break;

		case '\\':
			switch(c = getchar()) {

			case '\n':
				++Lineno;
				continue;

			default:
				val = c;
				goto mkcc;

			case 'n':
				val = '\n';
				goto mkcc;

			case 'r':
				val = '\r';
				goto mkcc;

			case 'b':
				val = '\b';
				goto mkcc;

			case 't':
				val = '\t';
				goto mkcc;

			case 'f':
				val = '\f';
				goto mkcc;

			case 'v':
				val = '\013';
				goto mkcc;

			case '0':
			case '1':
			case '2':
			case '3':
			case '4':
			case '5':
			case '6':
			case '7':
				val = c-'0';
				c=getchar();  /* try for 2 */
				if (Lxclass(c, LEXOCT)) {
					val = (val<<3) | (c-'0');
					c = getchar();  /* try for 3 */
					if (Lxclass(c, LEXOCT)) {
						val = (val<<3) | (c-'0');
					}
					else ungetc(c ,stdin);
				} else
					ungetc(c ,stdin);

				goto mkcc1;

			}
		default:
			val =c;
		mkcc:
			val = CCTRANS(val);
		mkcc1:
			if (flagchar == '\'') {
				/* it is, after all, a "character" constant */
				val = CHARCAST(val);
				makecc(LXintcon.i_value, val, i);
			} else {
				if (i >= LXTSZ)
					uerror("string constant too long");
				else
					strcon[i] = val;
			}
			++i;
			continue;
		}
		break;
	}

	/* end of string or  char constant */

	if (flagchar == '"') {
		if (i >= LXTSZ)
			i = LXTSZ-1;
		strcon[i] = 0;
		LEXVAL.stringcon = strcpy(malloc((unsigned)i+1), strcon);
	} else {
		/* end the character constant */
		if (i == 0)
			uerror("empty character constant");
		if (i>(SZINT/SZCHAR))
			uerror("too many characters in character constant");
		LXintcon.i_islong = false;
		LEXVAL.intcon = &LXintcon;
	}
}

/*
 * Lxcom - skip comments (/* has been seen)
 */
private Lxcom() {
	register c;

	for (;;) {

		switch(c = getchar()) {

		case EOF:
			uerror("unexpected EOF");
			return;

		case '*':
			if ((c = getchar()) == '/') return;
			else ungetc(c ,stdin);
			continue;

		case 'V':	/* check for varargs if -L set */
			Lxget(c, LEXLET);
			if (!LintFlag || strcmp(LXtext, "VARARGS") != 0)
				continue;
			/* VAflag is arg# + 1 */
			if ((c = getchar()) >= '0' && c <= '9')
				VAflag = (c - '0')+1;
			else {
				/* varargs */
				VAflag = 1;
				ungetc(c, stdin);
			}
			continue;

		case '\n':
			++Lineno;

		default:
			continue;

		}
	}
}

/*
 * Lxres - check for reserved keywords
 *
 * check to see if LXtext is reserved; if so,
 * do the appropriate action and return
 * otherwise, return -1
 */
private Lxres()
{
	register c, ch;
	register Keyword k;

	ch = LXtext[0];

	if (!islower(ch)) return(-1);

	if ((k = Keylook(LXtext)) != (Keyword)0) {
		switch(k->k_action) {

		case AR_TY:
			/* type keyword */
			LEXVAL.nameptr = Nlookup(k->k_name);
			checkref(LEXVAL.nameptr);
			return(TYPE);

		case AR_RW:
			/* ordinary reserved word */
			return(LEXVAL.intval = k->k_value);

		case AR_CL:
			/* class word */
			LEXVAL.intval = k->k_value;
			return(CLASS);

		case AR_S:
			/* struct */
			LEXVAL.location = IN_STRUCT;
			return(STRUCT);

		case AR_U:
			/* union */
			LEXVAL.location = IN_UNION;
			return(STRUCT);

		case AR_E:
			/* enums */
			return(LEXVAL.intval = ENUM);

		case AR_A:
			/* asm */
			Lxgetasm();
			return(0);

		default:
			cerror("bad AR_?? action");
		}
	}
	return(-1);
}

/*
 * Lxcontrol - process # linenumber filename directives
 *
 * called after a newline; set linenumber and file name
 */
private Lxcontrol()
{
	register c, val;
	register String cp, cq;
	static Byte fname[256]; /* stub ARH */

	for (;;) {  /* might be several such lines in a row */
		if ((c=getchar()) != '#') {
			if (c != EOF) ungetc(c,stdin);
			return;
		}

		Lxget(' ', LEXWS);
		val = 0;
		for (c=getchar(); isdigit(c); c=getchar()) {
			val = val*10+ c - '0';
		}
		ungetc(c, stdin);
		Lineno = val;
		Lxget(' ', LEXWS);
		if ((c=getchar()) != '\n') {
			for (cp=fname; c!='\n'; c=getchar(),++cp) {
				*cp = c;
			}
			*cp = '\0';
			Filename = fname;
		}
	}
}

/*
 * Lxgetasm - snarf up asm() inline code
 *
 * At this point, just toss out the rest of the line.
 */
private Lxgetasm()
{
	register int c;

	Lxget(' ', LEXWS);
	if (getchar() != '(') goto badasm;
	Lxget(' ', LEXWS);
	if (getchar() != '"') goto badasm;
	while ((c=getchar()) != '"') {
		if (c=='\n' || c==EOF) goto badasm;
	}
	Lxget(' ', LEXWS);
	if (getchar() != ')') goto badasm;
badasm:
	if (!LintFlag)
		uerror("bad asm construction");
}

/*
 * Lxgetbcd - snatch a GCOS BCD constant
 */
private Lxgetbcd()
{
	register i;
	int j;

	for (i=0; i<LXTSZ; ++i) {
		if ((j = getchar()) == '`') break;
		if (j == '\n') {
			uerror("newline in BCD constant");
			break;
		}
		LXtext[i] = j;
	}
	LXtext[i] = '\0';
	if (i>6) uerror("BCD constant exceeds 6 characters");
#ifdef gcos
	else strtob(LXtext, &LXintcon.i_value, i);
	LXintcon.i_value >>= 6*(6-i);
#else
	uerror("gcos BCD constant illegal");
#endif
	LXintcon.i_islong = false;
	LEXVAL.intcon = &LXintcon;
	return(ICON);
}

/*
 * Lxgetnum - finish getting a number from LXtext
 */
private Lxgetnum(lxchar)
register int lxchar;
{
	double atof();

	switch(lxchar) {
	case 'x':
	case 'X':
		if (LXtext[0] != '0' && !LXtext[1])
			uerror("illegal hex constant");
		Lxmore(lxchar, LEXHEX);
		/* convert the value */
		{
			register char *cp;
			for (cp = LXtext+2; *cp; ++cp) {
				/* this code won't work for all wild character sets,
				   but seems ok for ascii and ebcdic */
				LXintcon.i_value <<= 4;
				if (isdigit(*cp)) LXintcon.i_value += *cp-'0';
				else if (isupper(*cp)) LXintcon.i_value += *cp - 'A'+ 10;
				else LXintcon.i_value += *cp - 'a'+ 10;
			}
		}

    hexlong:
		/* criterion for longness for hex and octal
		 * constants is that it fit within 0177777
		 */
		if (LXintcon.i_value & ~0177777L)
			LXintcon.i_islong = true;
		else
			LXintcon.i_islong = false;

		goto islong;

	case '.':
		Lxmore(lxchar, LEXDIG);

    getfp:
		if ((lxchar=getchar()) == 'e' || lxchar == 'E') { /* exponent */

	case 'e':
	case 'E':
			if ((lxchar=getchar()) == '+' || lxchar == '-') {
				*LXgcp++ = 'e';
			} else {
				ungetc(lxchar,stdin);
				lxchar = 'e';
			}
			Lxmore(lxchar, LEXDIG);
			/* now have the whole thing... */
		} else {  /* no exponent */
			ungetc(lxchar ,stdin);
		}
		LXfloatcon.f_value = atof(LXtext);
		LEXVAL.floatcon = &LXfloatcon;
		Ltrace(printf("A_DIG FCON %f\n",LXfloatcon.f_value));
		return(FCON);

	default:
		ungetc(lxchar ,stdin);
		if (LXtext[0] == '0') {
			/* convert in octal */
			register char *cp;
			for (cp = LXtext+1; *cp; ++cp) {
				LXintcon.i_value <<= 3;
				LXintcon.i_value += *cp - '0';
			}
			goto hexlong;
		} else {
			/* convert in decimal */
			register char *cp;
			for (cp = LXtext; *cp; ++cp) {
				LXintcon.i_value = LXintcon.i_value * 10 + *cp - '0';
			}
		}

		/* decide if it is long or not (decimal case) */

		/* if it is positive and fits in 15 bits, or
		 * negative and and fits in 15 bits plus an
 		 * extended sign, it is int; otherwise long
		 * if there is an l or L following, all bets
		 * are off...
		 */
		{	CONSZ v;
			v = LXintcon.i_value & ~077777L;
			if (v == 0 || v == ~077777L) LXintcon.i_islong = false;
			else LXintcon.i_islong = true;
		}

    islong:
		/* finally, look for trailing L or l */
		if ((lxchar = getchar()) == 'L' || lxchar == 'l')
			LXintcon.i_islong = true;
		else
			ungetc(lxchar ,stdin);
		LEXVAL.intcon = &LXintcon;
		Ltrace(printf("A_DIG ICON %d\n",LXintcon.i_value));
		return(ICON);
	}
}
