/*
 * reader.c -- Implementation of Scheme reader
 *
 * (C) m.b (Matthias Blume); Apr 1992, HUB; Jan 1993 PU/CS
 *         Humboldt-University of Berlin
 *         Princeton University, Dept. of Computer Science
 *
 * $Id: reader.c,v 2.10 1994/11/12 22:22:41 blume Exp $
 */

# include "rcs.h"
RCSID ("$Id: reader.c,v 2.10 1994/11/12 22:22:41 blume Exp $")

# include <string.h>
# include <ctype.h>
# include <stdlib.h>
# include <stdio.h>

# include "reader.h"
# include "keyword.h"
# include "type.h"
# include "Cons.h"
# include "Cont.h"
# include "Vector.h"
# include "Character.h"
# include "String.h"
# include "Symbol.h"
# include "Boolean.h"
# include "Numeric.h"
# include "except.h"

# include "realloc.h"

static getc_proc gcp;
static ungetc_proc ugcp;
static void *stream;

static void *read_any (void);

static int
  next_nonwhite (void)
{
  int c;
  int in_comment;

  in_comment = 0;
  while ((c = (* gcp) (stream)) != EOF &&
	 (in_comment || c == ';' || isspace (c)))
    if (in_comment) {
      if (c == '\n')
        in_comment = 0;
    } else if (c == ';')
      in_comment = 1;
  return c;
}

enum {
  NO_TOK,			/* no token */
  EOF_TOK,			/* end of file */
  LPAR_TOK,			/* ( */
  RPAR_TOK,			/* ) */
  LBRAC_TOK,			/* [ */
  RBRAC_TOK,			/* ] */
  VPAR_TOK,			/* #( */
  VBRAC_TOK,			/* #[ */
  QUOT_TOK,			/* ' */
  QQUOT_TOK,			/* ` */
  COMMA_TOK,			/* , */
  SPLICE_TOK,			/* ,@ */
  DOT_TOK,			/* . */
  T_TOK,			/* #t */
  F_TOK,			/* #f */
  CHAR_TOK,			/* #\... */
  STRG_TOK,			/* "..." */
  IDNUM_TOK,			/* anything else */
  ERROR_TOK			/* lexical error */
};

static int current_tok;

void *
  ScmRead (getc_proc ngcp, ungetc_proc nugcp, void *nstream)
{
  void *res;
  int c;

  gcp = ngcp;
  ugcp = nugcp;
  stream = nstream;
  current_tok = NO_TOK;
  if ((c = next_nonwhite ()) == EOF)
    return &ScmEof;
  (* ugcp) (c, stream);
  res = read_any ();
  if (res == NULL)
    error ("unexpected end of input stream");
  return res;
}

static char *tb = NULL; /* text buffer for atoms */
static int tb_len = 0;
static int tb_pos = 0;
# define TB_SIZE_INCR 128

static int initialized = 0;

static void deallocate_all (void)
{
  if (tb != NULL)
    free (tb);
}

static void initialize (void)
{
  if (!initialized) {
    initialized = 1;
    atexit (deallocate_all);
  }
}

static void tb_reset (void)
{
  tb_pos = 0;
}

static void tb_insert (char c)
{
  if (tb_pos >= tb_len) {
    initialize ();
    tb_len += TB_SIZE_INCR;
    tb = REALLOC (tb, tb_len);
    if (tb == NULL) {
      tb_len = 0;
      reset ("Out of space (read/tb_insert)");
    }
  }
  tb [tb_pos++] = c;
}

# define DELIM "()[]`',\";"

static void fill_atomic (void)
{
  int c;

  while ((c = (* gcp) (stream)) != EOF && !isspace (c) &&
	 strchr (DELIM, c) == NULL)
    tb_insert (c);
  if (c != EOF)
    (* ugcp) (c, stream);
}

static int next_token (void)
{
  int c;

  tb_reset ();
  switch ((c = next_nonwhite ())) {
  case EOF:
    return EOF_TOK;
  case '(':
    return LPAR_TOK;
  case ')':
    return RPAR_TOK;
  case '[':
    return LBRAC_TOK;
  case ']':
    return RBRAC_TOK;
  case '\'':
    return QUOT_TOK;
  case '`':
    return QQUOT_TOK;
  case ',':
    if ((c = (* gcp) (stream)) == '@')
      return SPLICE_TOK;
    else if (c != EOF)
      (* ugcp) (c, stream);
    return COMMA_TOK;
  case '"':
    while ((c = (* gcp) (stream)) != '"') {
      if (c == '\\')
	switch ((c = (* gcp) (stream))) {
	case 'n': c = '\n'; break;
	case 'r': c = '\r'; break;
	case 'b': c = '\b'; break;
	case 't': c = '\t'; break;
	case 'a': c = '\a'; break;
	case 'v': c = '\v'; break;
	case 'f': c = '\f'; break;
	default: break;
	}
      if (c == EOF)
	return ERROR_TOK;
      tb_insert (c);
    }
    return STRG_TOK;
  case '#':
    switch ((c = (* gcp) (stream))) {
    case EOF:
      tb_insert ('#');
      return ERROR_TOK;
    case '(':
      return VPAR_TOK;
    case '[':
      return VBRAC_TOK;
    case 't':
    case 'T':
      return T_TOK;
    case 'f':
    case 'F':
      return F_TOK;
    case '\\':
      if ((c = (* gcp) (stream)) == EOF) {
	tb_insert ('#');
	tb_insert ('\\');
	return ERROR_TOK;
      }
      tb_insert (c);		/* at least one character */
      fill_atomic ();		/* maybe more */
      return CHAR_TOK;
    default:
      tb_insert ('#');
      tb_insert (c);
      fill_atomic ();
      return IDNUM_TOK;
    }
  default:
    tb_insert (c);
    fill_atomic ();
    if (tb_pos == 1 && tb [0] == '.')
      return DOT_TOK;
    return IDNUM_TOK;
  }
}

static int peek_token (void)
{
  if (current_tok == NO_TOK)
    current_tok = next_token ();
  return current_tok;
}

static int get_token (void)
{
  int t;
  if (current_tok == NO_TOK)
    return next_token ();
  t = current_tok;
  current_tok = NO_TOK;
  return t;
}

static void finish_up (int bracket, int t)
{
  if (t == RPAR_TOK)
    if (bracket)
      error ("mismatch -- opening bracket vs. closing parenthesis");
    else
      (void) get_token ();
  else
    if (bracket)
      (void) get_token ();
}

static
void *read_restlist (int bracket)
{
  int t;
  void *tmp;
  ScmCons *cons;

  Push (&ScmNil);
  while ((t = peek_token ()) != EOF_TOK &&
	 t != DOT_TOK && t != RPAR_TOK && t != RBRAC_TOK) {
    tmp = read_any ();
    if (tmp == NULL)
      goto eof_error;
    Push (tmp);
    SCM_NEW (cons, Cons);
    cons->car = POP ();
    cons->cdr = PEEK ();
    SET_TOP (cons);
  }
  if (t == EOF_TOK)
    goto eof_error;
  if (t == DOT_TOK) {
    (void) get_token ();
    tmp = read_any ();
    if (tmp == NULL)
      goto eof_error;
    while ((t = peek_token ()) != EOF_TOK && t != RPAR_TOK && t != RBRAC_TOK) {
      (void) get_token ();
      warning ("illegal extra stuff at end of improper list");
    }
    if (t == EOF_TOK)
      goto eof_error;
  } else
    tmp = &ScmNil;
  finish_up (bracket, t);
  return ScmReverseIP2 (POP (), tmp);
 eof_error:
  (void) POP ();
  return NULL;
}

static void *quote_like (void *key)
{
  void *tmp;
  ScmCons *r;

  Push (key);
  if ((tmp = read_any ()) == NULL) {
    (void) POP ();
    return NULL;
  }
  Push (tmp);
  SCM_NEW (r, Cons);
  r->cdr = &ScmNil;
  r->car = PEEK ();
  SET_TOP (r);
  SCM_NEW (r, Cons);
  r->cdr = POP ();
  r->car = POP ();
  return r;
}

static void *read_vector (int bracket)
{
  int cnt, t;
  void *tmp;
  ScmVector *v;

  cnt = 0;
  while ((t = peek_token ()) != EOF_TOK && t != RPAR_TOK && t != RBRAC_TOK) {
    if ((tmp = read_any ()) == NULL) {
      while (cnt-- > 0)
	(void) POP ();
      return NULL;
    }
    Push (tmp);
    ++cnt;
  }
  finish_up (bracket, t);
  SCM_NEW_VECTOR (v, cnt);
  while (cnt-- > 0)
    v->array [cnt] = POP ();
  return t == EOF_TOK ? NULL : v;
}

static void *read_char (void)
{
  int len, c, i, base, d;
  char *tmp;
  static char digits [] = "0123456789abcdef";

  static struct {
    char *name;
    char c;
  } chartab [] = {
    { "space", ' ' },
    { "newline", '\n' },
    { "nl", '\n' },
    { "tab", '\t' },
    { "ht", '\t' },
    { "alarm", '\a' },
    { "escape", '\033' },
    { "esc", '\033' },
    { "vtab", '\v' },
    { "vt", '\v' },
    { "return", '\r' },
    { "cr", '\r' },
    { "backspace", '\b' },
    { "bs", '\b' },
    { "backslash", '\\' },
  };

  len = tb_pos;
  tb_insert ('\0');
  c = tb [0];			/* keep the original first character */
  if (len > 1) {
    for (i = len; i-- > 0; tb [i] = tolower ((unsigned char) tb [i]));
    for (i = 0; i < sizeof chartab / sizeof chartab [0]; i++)
      if (strcmp (tb, chartab [i].name) == 0) {
	c = chartab [i].c;
	goto found;
      }
    switch (tb [0]) {
    case 'o': base = 8; break;
    case 'x': base = 16; break;
    case 'd': base = 10; break;
    case 'b': base = 2; break;
    default:
      warning ("bogus symbolic character specification: %s", tb);
      goto found;
    }
    c = 0;
    for (i = 1; i < len; i++)
      if ((tmp = strchr (digits, tb [i])) != NULL
	  && (d = tmp - digits) < base) {
	if ((c = base * c + d) > 0377) {
	  warning ("numeric character specification out of range: %s", tb);
	  break;
	}
      }	else {
	warning ("bogus numeric character specification: %s", tb);
	break;
      }
  }
 found:
  return &ScmCharacter_array [((int) c) & 0377];
}

static void *read_string (void)
{
  ScmString *strg;

  SCM_VNEW (strg, String, tb_pos, char);
  strg->length = tb_pos;
  memcpy (strg->array, tb, tb_pos);
  return strg;
}

static void *read_idnum (void)
{
  int i;
  void *tmp;

  if ((tmp = ScmParseNumberString (tb, tb_pos, 10)) != NULL)
    return tmp;
  for (i = 0; i < tb_pos; i++)
    tb [i] = tolower ((unsigned char) tb [i]);
  return ScmMakeSymbol (tb, tb_pos);
}

static void *read_any (void)
{
  int t;
  
  switch ((t = get_token ())) {
  case EOF_TOK:
    return NULL;
  case LPAR_TOK:
    return read_restlist (0);
  case LBRAC_TOK:
    return read_restlist (1);
  case RPAR_TOK:
  case RBRAC_TOK:
    error ("unbalanced parentheses");
  case VPAR_TOK:
    return read_vector (0);
  case VBRAC_TOK:
    return read_vector (1);
  case QUOT_TOK:
    return quote_like (ScmQuotePtr);
  case QQUOT_TOK:
    return quote_like (ScmQuasiquotePtr);
  case COMMA_TOK:
    return quote_like (ScmUnquotePtr);
  case SPLICE_TOK:
    return quote_like (ScmUnquoteSplicingPtr);
  case DOT_TOK:
    error ("bogus ``.'' in input");
  case T_TOK:
    return &ScmTrue;
  case F_TOK:
    return &ScmFalse;
  case CHAR_TOK:
    return read_char ();
  case STRG_TOK:
    return read_string ();
  case IDNUM_TOK:
    return read_idnum ();
  case ERROR_TOK:
    tb_insert ('\0');
    error ("unrecognized input: ``%s''", tb);
  default:
    fatal ("unknown token type (runtime system failure)");
  }
}
