/*
 * 5799-WZQ (C) COPYRIGHT IBM CORPORATION 1986
 * LICENSED MATERIALS - PROPERTY OF IBM
 * REFER TO COPYRIGHT INSTRUCTIONS FORM NUMBER G120-2083
 */
/* $Header:token.c 12.0$ */
/* $ACIS:token.c 12.0$ */
/* $Source: /ibm/acis/usr/src/ibm/fdb_ca/lib/src/RCS/token.c,v $ */

#ifndef lint
static char *rcsid = "$Header:token.c 12.0$";
#endif

/***************************************************************************\
* 									    *
* Name									    *
* 									    *
* tok_get: get the next token from a character string			    *
* 									    *
* Synopsis								    *
* 									    *
* int token (in_str, tok_type, tok_start, tok_length, next_str)		    *
* unsigned char *in_str;						    *
* int *tok_type;							    *
* unsigned char **tok_start;						    *
* int *tok_length;							    *
* unsigned char **next_str;						    *
* 									    *
* Description								    *
* 									    *
* This routine returns information concerning the next token in in_str 	    *
* using C language tokenizing rules, as follows:			    *
* 									    *
* Parameters:								    *
* 									    *
*    in_str(in): the string to be scaned- After skipping any leading	    *
* 	   white space the first token found is returned.		    *
*    tok_type(out): the type of the token returned as follows-		    *
* 	   1 - alphanumeric						    *
* 	   2 - special, any non-alphanumeric which does not have a	    *
* 		special meaning as defined below.  Generally only one	    *
* 		character long, "C" multi-character operators are	    *
* 		returned as one token, e.g.				    *
* 	       == ++ =+ >> >>= etc.					    *
* 	   3 - left grouper, i.e. (, [, or {				    *
* 	   4 - right grouper, i.e. ), ], or }				    *
* 	   5 - comment, either ` ... \n or standard. Comment		    *
* 		delimitors not removed					    *
* 	   6 - integer, always unsigned.  No range checks are made.	    *
* 	   7 - explicit  long integer, always unsigned.  No range checks    *
* 	       are made.						    *
* 	   8 - floating point, always unsigned.  No range checks are	    *
* 		made.							    *
* 	   9 - string, i.e. enclosed in ". Constructions involving \	    *
* 		are not converted, i.e. they are left as they were	    *
* 		Note the "s are not removed.				    *
* 	  10 - character, i.e. enclosed in '. Constructions involving \	    *
* 		are not converted, i.e. they are left as the were.	    *
* 		Note the 's are not removed.				    *
* 	  11 - new line or equivalent control code (\f)			    *
* 	  12 - no token found, end of input string encountered		    *
* 	  13 - unexpected control character, i.e. <32 and not \n \f \t	    *
* 		or \0							    *
* 	  14 - unrecognized character, should never occure		    *
*    tok_start(out): a pointer to the first character of the token.  In     *
* 	the case of strings, characters, etc. enclosing marks are	    *
* 	considered to be part of the token				    *
*    tok_length(out): the number of characters in the token, always >= 1;   *
*         except when tok_type = 12 (i.e. end of string)		    *
*    next_str(out): a pointer to the character in in_str where		    *
* 	scanning for the next token can be started			    *
* 									    *
* Diagnostics								    *
* 									    *
* Then following codes are returned as the value of the function:	    *
* 									    *
*    -1: parameter error						    *
*     0: normal return							    *
*     1: unexpected control character					    *
*     2: syntax error in token						    *
*     100 + n: unrecognized character, should never occure		    *
* 	       n is translation code of character			    *
* 									    *
* 									    *
* 									    *
* 									    *
* main translation table: used to translate any character into a 	    *
* 	small integer as follows:					    *
* 									    *
*   code  class								    *
*     0   whitespace 1: sp, \t						    *
*     1   alphabetic: A-Z, a-z, _, #, $					    *
*     2   digit: 0-9							    *
*     3   comment 1: `							    *
*     4   whitespace 2: \n, \f						    *
*     5   multi 1: !, *, %, ^, =					    *
* 	  symbol that can be followed by = only				    *
*     6   multi 2: |, &, +						    *
* 	  symbol that can be followed by = or itself			    *
*     7   multi 3: <, >							    *
* 	  symbol that can be followed by =, itself, or itself and =	    *
* 	  e.g. >, >=, >>, >>= are all valid				    *
*     8   multi 4: -							    *
* 	  symbol that can be followed by =, itself, or >		    *
*     9   comment 2: /							    *
*    10   string quote: "						    *
*    11   character quote: '						    *
*    12   possible number: .						    *
*    13   left grouper: (, [, {						    *
*    14   right grouper: ), ], }					    *
*    15   any character not otherwise covered				    *
*    16   end of string: \0						    *
*    17   unexpected control character: <32 and not \f \n \0 \t		    *
* 									    *
\***************************************************************************/
#include "c_lex.h"

int tok_get (in_str, tok_type, tok_start, tok_length, next_str)
unsigned char  *in_str;
int    *tok_type;
unsigned char  *(*tok_start);
int    *tok_length;
unsigned char  *(*next_str);
{
    int     code;		/* temp */
    int     rc;			/* return code */

/*skip leading white space */
    while (c_trans[*in_str] == 0)
	in_str++;

/* Primary switch */
    *tok_start = in_str;
    rc = 0;			/* assume normal return */

    switch (c_trans[*in_str++]) {

	case 1: 		/* begin alphanumeric token */
	    *tok_type = 1;
	    for (code = c_trans[*in_str];
		    (code == 1) || (code == 2);
		    code = c_trans[*++in_str]);
	    break;

	case 12: 		/* possible number begining with . */
	    if (c_trans[*in_str] == 2) {/* it is a number */
		in_str--;	/* backup one and fall through to number
				   processing */
	    }
	    else {		/* not a number so its a special */
		*tok_type = 2;
		break;
	    }

	case 2: 		/* begin numeric token */
	    while (c_trans[*in_str] == 2)
		in_str++;	/* scan over interger part */
	    if (*in_str == '.') {
		*tok_type = 8;	/* type is floating */
		in_str++;
		while (c_trans[*in_str] == 2)
		    in_str++;	/* scan over fraction part */
		if (*in_str == 'e' || *in_str == 'E') {
		/* exponent present */
		    in_str++;
		    if (*in_str == '+' || *in_str == '-')
			in_str++;
		    while (c_trans[*in_str] == 2)
			in_str++;/* scan over exponent */
		}
	    }
	    else
		if (*in_str == 'e' || *in_str == 'E') {
		/* exponent present */
		    *tok_type = 8;/* type is floating */
		    in_str++;
		    if (*in_str == '+' || *in_str == '-')
			in_str++;
		    while (c_trans[*in_str] == 2)
			in_str++;/* scan over exponent */
		}
		else
		    if (*in_str == 'l' || *in_str == 'L') {
		    /* explicit long integer */
			*tok_type = 7;
			in_str++;
		    }
		    else {	/* regular integer */
			*tok_type = 6;
		    }
	    break;

	case 3: 		/* comment denoted by ` */
	    *tok_type = 5;
	/* scan to end of line or end of string */
	    code = c_trans[*in_str];
	    while (code != 4 && code != 16) {
		code = c_trans[*++in_str];
	    }
	    break;

	case 4: 		/* whitespace 2: \n or \f */
	    *tok_type = 11;
	    break;


	case 9: 		/* possible standard comment */
	    if (*in_str == '*') {/* it is a standard comment */
		in_str++;
		*tok_type = 5;
		while (*in_str) {
		/* scan until end of string or end comment found */
		    if (*in_str == '*') {
			in_str++;
			if (*in_str == '/') {/* end of comment found */
			    break;/* ends while loop */
			}
		    }
		    else {
			in_str++;
		    }
		}
		if (*in_str == '\0') {
		/* scan terminated by end of in_str so syntax error */
		    rc = 2;
		    break;
		}
		else {
		    in_str++;	/* syntax ok, so skip / */
		    break;
		}
	    }
	/* not a comment so fall through to case 5 */

	case 5: 		/* multi 1: can be followed by = only */
	    *tok_type = 2;
	    if (*in_str == '=')
		in_str++;
	    break;

	case 6: 		/* multi 2: can be followed by = or itslf 
				*/
	    *tok_type = 2;
	    if (*in_str == '=' || **tok_start == *in_str)
		in_str++;
	    break;

	case 7: 		/* multi 3: can be followed by =, itself,
				   or itself and = */
	    *tok_type = 2;
	    if (*in_str == '=') {
		in_str++;
	    }
	    else
		if (*in_str == **tok_start) {
		    in_str++;
		    if (*in_str == '=')
			in_str++;
		}
	    break;

	case 8: 		/* multi 4: can be followed by = or > */
	    *tok_type = 2;
	    if (*in_str == '=' || *in_str == '>')
		in_str++;
	    break;

	case 10: 		/* string quote " */
	/* scan until end of input string (in_str) or " */
	/* processing \ along the way */
	    *tok_type = 9;
	    while (*in_str) {
		if (*in_str == '"')
		    break;
		else
		    if (*in_str == '\\') {/* \ found so process */
			in_str++;
		     /* skip one character unless end of in_str */
			if (*in_str)
			    in_str++;
			else {	/* syntax error end of string found before
				   closing mark */
			    break;
			}
		    }
		    else
			in_str++;
	    /* not end of string or \ so keep going */
	    }
	    if (*in_str == '\0') {/* scan terminated by end of in_str so
				   syntax error */
		rc = 2;
		break;
	    }
	    else {
		in_str++;	/* syntax ok, so skip " */
		break;
	    }

	case 11: 		/* character quote, ' */
	/* scan one character or \ construction or until end of in_str */
	    *tok_type = 10;
	    if (*in_str == '\'') {
		in_str++;
	    }
	    else {
		if (*in_str == '\\') {/* \ found so process */
		    in_str++;
		    if (c_trans[*in_str] == 2) {
		    /* octal code found so scan past */
			while (c_trans[*in_str] == 2)
			    in_str++;
		    }
		/* not a octal code so skip one character unless end of
		   in_str */
		    else
			if (*in_str) {
			    in_str++;
			}
			else {	/* syntax error end of string found before
				   closing mark */
			    rc = 2;
			    break;
			}
		}
		else {
		    if (*in_str) {
		    /* skip one character unless end of in_str */
			in_str++;
		    }
		    else {	/* syntax error end of string found before
				   closing mark */
			rc = 2;
			break;
		    }
		}
		if (*in_str == '\'') {
		    in_str++;	/* skip final \' if present */
		}
		else {		/* syntax error, character should have
				   been */
		    rc = 2;
		    break;
		}
	    }
	case 13: 		/*  left grouper */
	    *tok_type = 3;
	    break;
	case 14: 		/* right grouper */
	    *tok_type = 4;
	    break;
	case 15: 		/* any character not otherwise covered,
				   this includes most special character
				   and almost all >127 Treated as a one
				   character token */
	    *tok_type = 2;
	    break;
	case 16: 		/* end of string */
	    *tok_type = 12;
	    *tok_length = 1;
	    *next_str = *tok_start;
	/* do not advance scan position past end of string */
	    return (3);
	case 17: 		/* unexpected control character, i.e. not
				   \n, \t, \f, or \0 */
	    *tok_type = 13;
	    break;
	default: 		/* unrecognized character, this should
				   never occure */
	    *tok_type = 0;
	    rc = 100 + c_trans[**tok_start];
	    break;
    }				/* end of primary switch */
    *tok_length = in_str - *tok_start;
    *next_str = in_str;
    return (rc);
}
