%{
 /**********************************************************************
 @Yacc-file{
    author              = "Nelson H. F. Beebe",
    version             = "1.06",
    date                = "23 September 2004",
    time                = "15:06:41 MDT",
    filename            = "bibparse.y",
    address             = "University of Utah
                           Department of Mathematics, 110 LCB
                           155 S 1400 E RM 233
                           Salt Lake City, UT 84112-0090
                           USA",
    telephone           = "+1 801 581 5254",
    FAX                 = "+1 801 581 4148",
    URL                 = "http://www.math.utah.edu/~beebe",
    checksum            = "19519 556 1660 13859",
    email               = "beebe@math.utah.edu, beebe@acm.org,
                           beebe@computer.org (Internet)",
    codetable           = "ISO/ASCII",
    keywords            = "BibTeX, bibliography, lexical analysis, lexer,
                           parsing, parser",
    supported           = "yes",
    docstring           = "This yacc file defines a parser for a
                           prototype BibTeX grammar.

                           The resulting program reads BibTeX data,
                           and/or BibTeX lexical token streams produced
                           by biblex or bibclean, from one or more files
                           specified on the command line, or standard
                           input, and parses the token stream to verify
                           its conformance to the BibTeX grammar.

                           If a command-line -d (or -D) option is
                           specified, debug mode is selected, and
                           extensive commentary on the parsing actions
                           will be output.  It will be necessary to
                           have a copy of the y.output file produced
                           by bison or lex in order to make sense out
                           of the state and rule numbers in this
                           output.

                           The checksum field above contains a CRC-16
                           checksum as the first value, followed by the
                           equivalent of the standard UNIX wc (word
                           count) utility output of lines, words, and
                           characters.  This is produced by Robert
                           Solovay's checksum utility.",
 }
 **********************************************************************/

#include <config.h>

#include <stdio.h>
#include <string.h>
#include <ctype.h>

#if defined(HAVE_STDLIB_H)
#include <stdlib.h>
#endif

#if HAVE_ALLOCA_H
 /* needed for alloca() macro definition if bison is used instead of yacc */
#include <alloca.h>
#endif

#include "args.h"
#include "bibyydcl.h"

static void		doparse ARGS((void));
int			main ARGS((int argc_, char *argv_[]));
static int		nextchar ARGS((void));
static void		recognize ARGS((const char *s_));
int			yyparse ARGS((void));
static void		yywarning ARGS((const char *s_));

#if defined(__cplusplus)
extern "C" {
#endif

static int		(*pyylex) ARGS((void));
int			xxlex ARGS((void));
int			zzlex ARGS((void));

#if defined(__cplusplus)
}
#endif

int			error_count;
char			*program_name;	/* for error messages */

/* These variables are defined in biblex.c: */
extern int		do_lex_output;
extern long		line_number;
extern const char	*the_filename;
extern char		yytext[BIBYYLMAX];

#define	ERROR_PREFIX	"??"	/* this prefixes all error messages */
#define WARNING_PREFIX	"%%"	/* this prefixes all warning messages */

#define RECOGNIZE(s)	recognize(s)

#define YYDEBUG		1		/* need for -d option support */

#if defined(_CRAY)
extern long yydebug;
#else
extern int yydebug;
#endif

%}
%token TOKEN_ABBREV	1
%token TOKEN_AT		2
%token TOKEN_COMMA	3
%token TOKEN_COMMENT	4
%token TOKEN_ENTRY	5
%token TOKEN_EQUALS	6
%token TOKEN_FIELD	7
%token TOKEN_INCLUDE	8
%token TOKEN_INLINE	9
%token TOKEN_KEY	10
%token TOKEN_LBRACE	11
%token TOKEN_LITERAL	12
%token TOKEN_NEWLINE	13
%token TOKEN_PREAMBLE	14
%token TOKEN_RBRACE	15
%token TOKEN_SHARP	16
%token TOKEN_SPACE	17
%token TOKEN_STRING	18
%token TOKEN_VALUE	19

 /**********************************************************************
 Without the following precedence specifications, the BibTeX grammar
 is ambiguous, since the two productions

 bibtex_value:		bibtex_value bibtex_space TOKEN_SHARP
				bibtex_simple_value

 bibtex_assignment:	bibtex_assignment_lhs TOKEN_EQUALS bibtex_value

 result in a shift/reduce conflict when the input stack contains

	bibtex_assignment_lhs TOKEN_EQUALS bibtex_value

 and the next token is one of the bibtex_space tokens.  We could shift
 that token to match the start of the bibtex_value production, or we
 could reduce to bibtex_assignment using the second production.

 The way to remove the ambiguity is described in the book

 @String{pub-PH          = "Pren{\-}tice-Hall"}
 @String{pub-PH:adr      = "Englewood Cliffs, NJ 07632, USA"}

 @Book{Holub:CDC90,
  author =       "Allen I. Holub",
  title =        "Compiler Design in {C}",
  publisher =    pub-PH,
  address =      pub-PH:adr,
  year =         "1990",
  note =         pub-PH # " Software Series, Editor: Brian W.
                 Kernighan.",
  ISBN =         "0-13-155045-4",
 }

 on pp. 871--875.

 The authors of the book

 @String{pub-AW          = "Ad{\-d}i{\-s}on-Wes{\-l}ey"}
 @String{pub-AW:adr      = "Reading, MA, USA"}

 @Book{Aho:CPT86,
  author =       "Alfred V. Aho and Ravi Sethi and Jeffrey D. Ullman",
  title =        "Compilers\emdash Prin\-ci\-ples, Techniques, and
                 Tools",
  publisher =    pub-AW,
  address =      pub-AW:adr,
  year =         "1986",
  ISBN =         "0-201-10088-6",
 }

 on p. 201 point out that the ambiguity above is also met in the
 (in)famous dangling else problem, where the productions

 stmt: IF expr THEN stmt
     | IF expr THEN stmt ELSE stmt
     | OTHER

 produce a shift/reduce conflict when the input stack contains

	IF expr THEN stmt

 and the next input item is ELSE, since we could reduce using the
 first production, or shift using the second production.

 The grammar in this case is not LR(k) for any k, because there could
 be an unending chain of "ELSE xxx" tokens following in the input
 stream.

 The solution adopted by yacc and most other LALR(1) generators is to
 warn of the conflict, then shift, rather than reduce, adopting a
 "maximal munch" strategy that will match ELSE with the nearest
 preceding THEN.

 In our case, we want the "maximal munch" operation to consume

	v # v # ... # v

 as a bibtex_value before doing an assignment.  This is easily done by
 declaring TOKEN_EQUALS to have LOWER precedence than space, and space
 to have lower precedence than TOKEN_SHARP.
 **********************************************************************/

%nonassoc TOKEN_EQUALS
%left TOKEN_SPACE TOKEN_INLINE TOKEN_NEWLINE
%left TOKEN_SHARP

%%
file:		  opt_space
			{RECOGNIZE("file-1");}
		| opt_space object_list opt_space
			{RECOGNIZE("file-2");}
		;

object_list:	  object
			{RECOGNIZE("object-1");}
		| object_list opt_space object
			{RECOGNIZE("object-2");}
		;

object:	  	  TOKEN_AT opt_space at_object
			{RECOGNIZE("object");}
		;

at_object:	  comment
			{RECOGNIZE("comment");}
		| entry
			{RECOGNIZE("entry");}
		| include
			{RECOGNIZE("include");}
		| preamble
			{RECOGNIZE("preamble");}
		| string
			{RECOGNIZE("string");}
		| error TOKEN_RBRACE
			{RECOGNIZE("error");}
		;

comment:	  TOKEN_COMMENT opt_space
			TOKEN_LITERAL
			{RECOGNIZE("comment");}
		;

entry:		  entry_head
			assignment_list
			TOKEN_RBRACE
			{RECOGNIZE("entry-1");}
		| entry_head
			assignment_list
			TOKEN_COMMA opt_space
			TOKEN_RBRACE
			{RECOGNIZE("entry-2");}
		| entry_head TOKEN_RBRACE
			{RECOGNIZE("entry-3");}
		;

entry_head:	  TOKEN_ENTRY opt_space
			TOKEN_LBRACE opt_space
			key_name opt_space
			TOKEN_COMMA opt_space
			{RECOGNIZE("entry_head");}
		;

key_name:	  TOKEN_KEY
			{RECOGNIZE("key_name-1");}
		| TOKEN_ABBREV
			{RECOGNIZE("key_name-2");}
		;

include:	  TOKEN_INCLUDE opt_space
			TOKEN_LITERAL
			{RECOGNIZE("include");}
		;

preamble:	  TOKEN_PREAMBLE opt_space
			TOKEN_LBRACE opt_space
			value opt_space
			TOKEN_RBRACE
			{RECOGNIZE("preamble");}
		;

string:		  TOKEN_STRING opt_space
			TOKEN_LBRACE opt_space
			assignment
			opt_space TOKEN_RBRACE
			{RECOGNIZE("string");}
		;

value:	  	  simple_value
			{RECOGNIZE("value-1");}
		| value opt_space
			{RECOGNIZE("value-1-1");}
			TOKEN_SHARP
			{RECOGNIZE("value-1-2");}
			opt_space simple_value
			{RECOGNIZE("value-2");}
		;

simple_value:	  TOKEN_VALUE
			{RECOGNIZE("simple_value-1");}
		| TOKEN_ABBREV
			{RECOGNIZE("simple_value-2");}
		;

assignment_list:  assignment
			{RECOGNIZE("single assignment");}
		| assignment_list
			TOKEN_COMMA opt_space
			assignment
			{RECOGNIZE("assignment-list");}
		;

assignment:	  assignment_lhs opt_space
			TOKEN_EQUALS opt_space
			{RECOGNIZE("assignment-0");}
			value opt_space
			{RECOGNIZE("assignment");}
		;

assignment_lhs:	  TOKEN_FIELD
			{RECOGNIZE("assignment_lhs-1");}
		| TOKEN_ABBREV
			{RECOGNIZE("assignment_lhs-2");}
		;

opt_space:	/* empty */
			{RECOGNIZE("opt_space-1");}
		| space
			{RECOGNIZE("opt_space-2");}
		;

space:		  single_space
			{RECOGNIZE("single space");}
		| space single_space
			{RECOGNIZE("multiple spaces");}
		;

single_space:	  TOKEN_SPACE
		| TOKEN_INLINE
		| TOKEN_NEWLINE
		;
%%

/* end of grammar */

static void
doparse(VOID)
{		/* parse a complete BibTeX file or BibTeX token stream */
    int c;

    line_number = 1L;

    /* A single character lookahead distinguishes between a token
    stream, which starts with a line number directive, and a BibTeX
    file, which does not.  We then pick a suitable lexical analyzer
    function. */

    c = getchar();
    (void)ungetc(c,stdin);
    pyylex = (c == '#') ? zzlex : xxlex;

    yyparse();				/* this handles the entire file */
}


#if NEW_STYLE
int
main(int argc, char *argv[])
#else /* K&R style */
int
main(argc, argv)
int argc;
char *argv[];
#endif /* NEW_STYLE */
{
    int code;				/* error return code */
    int k;				/* index into argv[] */
    int n;				/* index into argv[] */
    FILE *fp;				/* pointer to current input file */

    do_lex_output = 0;			/* prevent output from xxlex() */
    error_count = 0;
    program_name = argv[0];

    for (k = 1; k < argc; ++k)
    {					/* check for command-line options */
	if ((strcmp(argv[k],"-d") == 0) || (strcmp(argv[k],"-D") == 0))
	{
	    yydebug = 1;
	    do_lex_output = 1;
	    for (n = k; n < argc; ++n)	/* shuffle remaining arguments down */
		argv[n] = argv[n+1];
	    argc--;			/* and reduce argument count */
	}
    }

    if (argc > 1)		/* files to parse named on command line */
    {
	for (k = 1; k < argc; ++k)
	{
	    fp = freopen(argv[k],"r",stdin);
	    if (fp == (FILE*)NULL)
	    {
		(void)fprintf(stderr,
			      "\n%s Ignoring open failure on file [%s]\n",
			      ERROR_PREFIX, argv[k]);
		(void)perror("perror() says");
	    }
	    else
	    {
		the_filename = argv[k];
		doparse();
		(void)fclose(fp);
	    }
	}
    }
    else				/* parse stdin */
    {
	the_filename = "stdin";
	doparse();
    }
    code = (error_count > 0) ? EXIT_FAILURE : EXIT_SUCCESS;

    exit(code);
    return (code);			/* NOT REACHED */
}


static int
nextchar (VOID)
{
    int c;

    c = getchar ();
    if (yydebug)
	putchar (c);
    return (c);
}



#if NEW_STYLE
static void
recognize(const char *s)
#else
static void
recognize(s)
const char *s;
#endif
{
    if (yydebug)
	printf("[%s]\n", s);
}


#if NEW_STYLE
void
yyerror(const char *s)
#else
void
yyerror(s)
const char *s;
#endif
{
    error_count++;
    (void)fflush(stdout);
    (void)fprintf(stderr,"%s \"%s\", line %ld: %s\tNext token = \"%s\"\n",
		  ERROR_PREFIX, the_filename, line_number, s, yytext);
    (void)fflush(stderr);
}


int
yylex (VOID)
{
    int v;

    v = (*pyylex) ();

    /* xxlex() (== yylex() in biblex.c) biases token values by 1000, but
       the token streams produced by biblex and "bibclean --no-prettyprint"
       exclude that bias, for compact output. */

    return ((pyylex == zzlex) ? v : (v - 1000));
}


#if NEW_STYLE
static void
yywarning(const char *s)
#else
static void
yywarning(s)
const char *s;
#endif
{
    (void)fflush(stdout);
    (void)fprintf(stderr,"%s %s\tNext token = \"%s\"\n",
		  WARNING_PREFIX, s, yytext);
    (void)fflush(stderr);
}


int
zzlex (VOID)
{
    char buffer[BUFSIZ];	/* only needs space for a single number token */
    char *limit;		/* how far we can store into buffer[] */
    char *p;			/* pointer into buffer[] */
    int c;			/* current input character */

    limit = &buffer[sizeof(buffer) - 1]; /* address of last slot in buffer[] */

    for (;;)			/* `infinite' loop for ignoring comments and empty lines */
    {
	while (((c = nextchar ()) > 0) && isspace (c))
	    ;			/* skip leading whitespace */

	/* Collect first non-blank token on line, truncating if too big for buffer[] */
	p = &buffer[0];
	*p++ = c;
	while (((c = nextchar ()) > 0) && !isspace (c))
	{
	    if (p < limit)
		*p++ = c;
	}
	*p = '\0';

	while ((c != '\n') && ((c = nextchar ()) > 0))
	    ;			/* skip rest of line */

	if (c == '\n')		/* should always be true */
	    line_number++;

	if (buffer[0] == '#')	/* comment: pseudo return tail-recursively */
	    continue;
	else if (isdigit ((int) buffer[0]))	/* token number */
	    return ((int) strtol ((const char *) &buffer[0], &limit, 10));
	else if (c == '\n')	/* empty line: pseudo return tail-recursively */
	    continue;
	else if (feof (stdin))	/* assume end of file */
	    return (0);		/* standard lex end-of-file signal */
	else
	    return (-1);	/* should never happen */
    }
}
