#include <ctype.h>
#include <string.h>
#include "lex.h"


inline int isidchar(int c)
    { return isalpha(c) || isdigit(c) || c == '_'; }


struct lex_entry {
    erule_symbol sym;
    erule_symbol sym2;
    char char2;
    lex_entry() { sym = sym2 = LEX_SENTINEL;  char2 = 0; }
};
    
    
class lex_table {
    lex_entry table[257];  // Extra entry for 
public:
    lex_table();
#ifndef DEBUGI
    lex_entry& operator[](int c) { return table[c+1]; }
#else
    // The debugi version tickles a c++ compiler bug if made inline
    lex_entry& operator[](int c);
#endif
};


#ifdef DEBUGI
static lex_entry& lex_table::operator[](int c)
{ AS(c < 256 && c >= -1);  return table[c+1]; }
#endif


static lex_table::lex_table()
{
    if (EOF != -1) fault("lex_table broken (EOF != -1)");

    // Single-char tokens
    (*this)[','].sym = LEX_COMMA;
    (*this)[';'].sym = LEX_SEMI;
    (*this)[':'].sym = LEX_COLON;
    (*this)['-'].sym = LEX_MINUS;
    (*this)['+'].sym = LEX_PLUS;
    (*this)['.'].sym = LEX_DOT;
    (*this)['='].sym = LEX_EQ;
    (*this)['('].sym = LEX_LPAREN;
    (*this)[')'].sym = LEX_RPAREN;
    (*this)['<'].sym = LEX_LT;
    (*this)['>'].sym = LEX_GT;
    (*this)['%'].sym = LEX_PERCENT;

    // Potential double-char tokens
    (*this)['<'].sym2 = LEX_LE;  (*this)['<'].char2 = '=';
    (*this)['>'].sym2 = LEX_GE;  (*this)['>'].char2 = '=';
    (*this)['.'].sym2 = LEX_DOTDOT;  (*this)['.'].char2 = '.';
    (*this)[':'].sym2 = LEX_COLONCOLON;  (*this)[':'].char2 = ':';

    // Alternative double-char tokens
    /* "<>" is the only one -- handled by special case */
}

    

inline void erule_lex::char_error(int n)
{
    erule_err.parse(n, filename(), current_line(), current_lineno(), 
                    current_column());
}


erule_lex::erule_lex(int max_id_length)
    : (&erule_heap), tok_buf(erule_heap)
    { _max_id_length = max_id_length; }


void erule_lex::double_token(char ch, char c2, erule_symbol s2)
{
    tok_buf.put(ch);
    int c = snextc();
    if (c == c2) { tok_buf.put(c);  snextc();  token = s2; }
}


void erule_lex::double_token(char ch,
			     char c2, erule_symbol s2,
			     char c3, erule_symbol s3)
{
    tok_buf.put(ch);
    int c = snextc();
    if (c == c2) { tok_buf.put(c);  snextc();  token = s2; }
    else if (c == c3) { tok_buf.put(c);  snextc();  token = s3; }
}


static lex_table ltable;  // should be local to next_token()
erule_symbol erule_lex::next_token(erule_lex_upshift upshift)
{
    const char QUOTE1 = '\'';
    const char QUOTE2 = '"';
    const char SPACE = ' ';
    const char NEWLINE = '\n';
    const char TAB = '\t';
    const char LCOMMENT = '{';
    const char RCOMMENT = '}';

    tok_buf.clear();

    register int c = sgetc();
    while (isspace(c)) c = snextc(); 

    token = ltable[c].sym;

    if (token != LEX_SENTINEL) {
	erule_symbol sym2 = ltable[c].sym2;
	if (sym2 == LEX_SENTINEL) { tok_buf.put(c);  snextc();  return token; }
	if (c == '<') { double_token(c, '=', LEX_GE, '>', LEX_NE);  return token; }
	double_token(c, ltable[c].char2, sym2);  return token;
    }

    if (isalpha(c)) {		
	// Identifier
	token = LEX_ID;
	int id_length = 0;
	do {
	    if (!_max_id_length || id_length < _max_id_length) tok_buf.put(c);
	    id_length++;
	    c = snextc();
	} while (isidchar((char)c));
	if (_max_id_length && id_length > _max_id_length)
	    error(ERR_ID_TOO_LONG);
	tok_buf.uppercase();
	if (!strcmp("END", tok_buf)) token = LEX_END;
				return token;
    }

    if (isdigit(c)) {
	// Decimal constant
	token = LEX_INT;
	_int_val = 0;
	do { 
	    _int_val *= 10;  _int_val += c - '0';
	    tok_buf.put(c); c = snextc(); 
	} while (isdigit(c));
	return token;
    }

    switch (c) {
    case QUOTE1 : 
    case QUOTE2 : 
	int quote = c;
	token = LEX_STRING; 
	c = snextc();
	if (c != EOF) do {
	    switch (c) {
	    case QUOTE1:
	    case QUOTE2:
		if (c == quote) if (snextc() != quote) {
		    if (upshift) tok_buf.uppercase();
		    return token;
		}
		// Otherwise, fall through to default case
	    default : tok_buf.put(c);  break;
	    case TAB : 
		tok_buf.put(SPACE);	// SCALD eats tabs
		break;
	    case NEWLINE:
		if (upshift) tok_buf.uppercase();
		snextc();
		char_error(ERR_EOL_IN_STR);
		return token;
	    }
	} while ((c = snextc()) != EOF);
	if (upshift) tok_buf.uppercase();
	char_error(ERR_EOL_IN_STR);
	return token;
    case LCOMMENT:
	while ((c = snextc() != RCOMMENT) && (c != EOF)) ;
	if (c == EOF) {
	    char_error(ERR_UNCLOSED_COMMENT);
	    token = LEX_EOF;
	}
	else { snextc();  next_token(); }
	return token;
    case EOF: token = LEX_EOF;  return token; // Leave tok_buf empty
    default:
	tok_buf.put(c);
	snextc();
	char_error(ERR_UNKNOWN_SY);
	return token;
    }
}


#define FERROR(num) { error(num);  return 0; }
const char* erule_lex::scald_file_type()
{
    if (token != LEX_ID || strcmp(val(), "FILE_TYPE")) 
	FERROR(ERR_EXP_FILETYPE);
    next_token();
    if (token != LEX_EQ) FERROR(ERR_EXP_EQ);
    next_token();
    if (token != LEX_ID) FERROR(ERR_EXP_ID);
    return val();
}


void erule_lex::skip(smallset tokens)
{
    tokens |= LEX_END;
    while (token && !(tokens[token])) next_token();
}


void erule_lex::skip_to_id(const char* id)
{
    while (1) {
	skip(LEX_ID);
	if (token == LEX_ID && !strcmp(val(), id)) return;
        next_token();
    }
}


