%{
 /**********************************************************************
 @Lex-file{
    author              = "Nelson H. F. Beebe",
    version             = "1.01",
    date                = "05 October 1993",
    time                = "14:07:22 MDT",
    filename            = "bibunlex.l",
    address             = "Center for Scientific Computing
                           Department of Mathematics
                           University of Utah
                           Salt Lake City, UT 84112
                           USA",
    telephone           = "+1 801 581 5254",
    FAX                 = "+1 801 581 4148",
    checksum            = "24051 544 1283 10629",
    email               = "beebe@math.utah.edu (Internet)",
    codetable           = "ISO/ASCII",
    keywords            = "BibTeX, bibliography, lexical analysis, lexer",
    supported           = "yes",
    docstring           = "Convert a lexical token stream from biblex
                           or bibclean back into a BibTeX file on
                           stdout.

                           Usage:
				bibunlex < infile > outfile
			   or
				bibunlex file(s) >outfile

			   This utility can be conveniently used at
                           the end of a biblex or bibclean pipeline
                           that filters BibTeX files for some purpose.

                           The checksum field above contains a CRC-16
                           checksum as the first value, followed by the
                           equivalent of the standard UNIX wc (word
                           count) utility output of lines, words, and
                           characters.  This is produced by Robert
                           Solovay's checksum utility.",
 }
 **********************************************************************/

#if (defined(__cplusplus) || defined(__STDC__) || defined(c_plusplus))
#define NEW_STYLE 1
#else
#define NEW_STYLE 0
#endif

#if NEW_STYLE
#define VOID	void
#define ARGS(parenthesized_list) parenthesized_list
#include <stdlib.h>
#else /* K&R style */
#define VOID
#define ARGS(parenthesized_list) ()
#endif /* NEW_STYLE */

#if !defined(EXIT_SUCCESS)
#define EXIT_SUCCESS	0
#define EXIT_FAILURE	1
#endif

#include <stdio.h>
#include <string.h>
#include <ctype.h>
#include "bibyydcl.h"

/* Token types for BibTeX grammar */
#if NEW_STYLE
typedef enum token_list {
    TOKEN_UNKNOWN = 0,
    TOKEN_ABBREV = 1,		/* alphabetical order, starting at 1 */
    TOKEN_AT,
    TOKEN_COMMA,
    TOKEN_COMMENT,
    TOKEN_ENTRY,
    TOKEN_EQUALS,
    TOKEN_FIELD,
    TOKEN_INCLUDE,
    TOKEN_INLINE,
    TOKEN_KEY,
    TOKEN_LBRACE,
    TOKEN_LITERAL,
    TOKEN_NEWLINE,
    TOKEN_PREAMBLE,
    TOKEN_RBRACE,
    TOKEN_SHARP,
    TOKEN_SPACE,
    TOKEN_STRING,
    TOKEN_VALUE
} token_t;
#else /* K&R style */
typedef int token_t;
#define	TOKEN_UNKNOWN	0
#define	TOKEN_ABBREV	1		/* alphabetical order, starting at 1 */
#define	TOKEN_AT	2
#define	TOKEN_COMMA	3
#define	TOKEN_COMMENT	4
#define	TOKEN_ENTRY	5
#define	TOKEN_EQUALS	6
#define	TOKEN_FIELD	7
#define	TOKEN_INCLUDE	8
#define	TOKEN_INLINE	9
#define	TOKEN_KEY	10
#define	TOKEN_LBRACE	11
#define	TOKEN_LITERAL	12
#define	TOKEN_NEWLINE	13
#define	TOKEN_PREAMBLE	14
#define	TOKEN_RBRACE	15
#define	TOKEN_SHARP	16
#define	TOKEN_SPACE	17
#define	TOKEN_STRING	18
#define	TOKEN_VALUE	19
#endif /* NEW_STYLE */

/* Token types for biblex/bibclean token stream grammar */

#if NEW_STYLE
typedef enum t_list {
    T_NUMBER = 2001,
    T_NAME,
    T_UNKNOWN,
    T_VALUE
} biblex_token_t;
#else /* NOT NEW_STYLE */
typedef int biblex_token_t;
#define T_NUMBER	2001
#define T_NAME		2002
#define T_UNKNOWN	2003
#define T_VALUE		2004
#endif /* NEW_STYLE */

#define digval(c)	((int)((c) - '0'))
#define isoctal(c)	(isdigit(c) && ((c) != '8') && ((c) != '9'))
#define ishexadecimal(c) (isdigit(c) || \
				(strchr("ABCDEFabcdef",(c)) != (char*)NULL))

static void		bad_token ARGS((void));
static void		dounlex ARGS((void));
static int		hexval ARGS((int c_));
int			main ARGS((int argc_, char *argv_[]));
static void		out_s ARGS((char *s_));
static void		out_space ARGS((int n_));
static void		out_string ARGS((const char *s_));
static void		out_token ARGS((token_t token_));
static char		*unescape ARGS((char *s_));

static long		line_number = 1L;
static long		line_width = 0L;
static const char	*the_filename = "";

#define	ERROR_PREFIX	"??"	/* this prefixes all error messages */

#define FIELD_INDENTATION 2	/* how far to indent "field = value," pairs */

#if !defined(MAX_FIELD_LENGTH)
#define MAX_FIELD_LENGTH 12	/* "howpublished" */
#endif /* !defined(MAX_FIELD_LENGTH) */

#define VALUE_INDENTATION	(FIELD_INDENTATION + MAX_FIELD_LENGTH + 3)

#undef YYLMAX
#define YYLMAX	40960

  /* here is a lex grammar for the bibclean and biblex output token stream */

%}

W		[ \b\f\r\t\v]

%%
^[%?#].*$	; /* ignore messages and line number directives */

^[0-9]+		return (T_NUMBER);

[A-Z]+		return (T_NAME);

{W}		; /* ignore horizontal space */

["].*$		return (T_VALUE);

[\n]		line_number++;

.		return (T_UNKNOWN);
%%
static void
bad_token(VOID)
{
    (void)fprintf(stderr,"%s \"%s\", line %ld: %s [%s].\n",
		  ERROR_PREFIX,
		  the_filename,
		  line_number,
		  "Unrecognized token",
		  yytext);
}


static void
dounlex(VOID)
{
    biblex_token_t type;
    token_t token = TOKEN_UNKNOWN;

    line_number = 1L;
    while ((type = (biblex_token_t)yylex()) > 0)
    {
	switch (type)
	{
	case T_NUMBER:
	    token = (token_t)atoi((const char*)&yytext[0]);
	    break;

	case T_NAME:
	    break;			/* discard name */

	case T_VALUE:
	    out_token(token);
	    break;

	default:
	case T_UNKNOWN:
	    bad_token();
	    break;
	}
    }
}


#if NEW_STYLE
static int
hexval(int c)
#else /* K&R style */
static int
hexval(c)
int c;
#endif /* NEW_STYLE */
{
    static const char *hexdigits = "0123456789abcdef";
    const char *p = (const char *)strchr(hexdigits,
	isupper(c) ? tolower(c) : c);

    return ((p == (const char*)NULL) ? (-1) : (int)(p - hexdigits));
}


#if NEW_STYLE
int
main(int argc, char *argv[])
#else /* K&R style */
int
main(argc, argv)
int argc;
char *argv[];
#endif /* NEW_STYLE */
{
    int k;
    FILE *fp;

    if (argc > 1)		/* files to unlex named on command line */
    {
	for (k = 1; k < argc; ++k)
	{
	    fp = freopen(argv[k],"r",stdin);
	    if (fp == (FILE*)NULL)
	    {
		(void)fprintf(stderr,
		    "\n%s Ignoring open failure on file [%s]\n",
		    ERROR_PREFIX, argv[k]);
		(void)perror("perror() says");
	    }
	    else
	    {
		the_filename = argv[k];
		dounlex();
		(void)fclose(fp);
	    }
	}
    }
    else				/* unlex stdin */
    {
	the_filename = "stdin";
	dounlex();
    }
    exit (EXIT_SUCCESS);
    return (0);
}


#if NEW_STYLE
static void
out_s(char *s)			/* output a string, wrapping long lines */
#else /* K&R style */
static void
out_s(s)			/* output a string, wrapping long lines */
char *s;
#endif /* NEW_STYLE */
{				/* s[] is modified on return */
    s[strlen(s)-1] = '\0';	/* zap final quote */
    out_string(unescape(s+1)); /* discard initial quote */
}


#if NEW_STYLE
static void
out_space(int n)
#else /* K&R style */
static void
out_space(n)
int n;
#endif /* NEW_STYLE */
{
    for ( ; n > 0 ; --n)
	out_string(" ");
}


#if NEW_STYLE
static void
out_string(const char *s)
#else /* K&R style */
static void
out_string(s)
const char *s;
#endif /* NEW_STYLE */
{
    for (; *s; ++s)
    {
	switch (*s)
	{
	case '\r':
	case '\n':
	    line_width = 0L;
	    break;

	case '\t':
	    line_width = (line_width + 7L) & ~7L;
	    break;

	default:
	    line_width++;
	    break;
	}
	putchar(*s);
    }
}


#if NEW_STYLE
static void
out_token(token_t token)
#else /* K&R style */
static void
out_token(token)
token_t token;
#endif /* NEW_STYLE */
{		/* ALL token output is directed through this function */
    static token_t last_token = TOKEN_UNKNOWN;

    switch (token)
    {
    case TOKEN_ABBREV:
	if (last_token == TOKEN_EQUALS)
	    out_space((int)(VALUE_INDENTATION - line_width));
	out_s((char*)&yytext[0]);
	break;

    case TOKEN_AT:
	out_s((char*)&yytext[0]);
	break;

    case TOKEN_COMMA:
	out_s((char*)&yytext[0]);
	break;

    case TOKEN_COMMENT:
	out_s((char*)&yytext[0]);
	break;

    case TOKEN_ENTRY:
	out_s((char*)&yytext[0]);
	break;

    case TOKEN_EQUALS:
	if (last_token != TOKEN_SPACE)
	    out_space(1);
	out_s((char*)&yytext[0]);
	break;

    case TOKEN_FIELD:
	if (last_token != TOKEN_SPACE)
	    out_space(FIELD_INDENTATION);
	out_s((char*)&yytext[0]);
	break;

    case TOKEN_INCLUDE:
	out_s((char*)&yytext[0]);
	break;

    case TOKEN_INLINE:
	out_s((char*)&yytext[0]);
	break;

    case TOKEN_KEY:
	out_s((char*)&yytext[0]);
	break;

    case TOKEN_LBRACE:
	out_s((char*)&yytext[0]);
	break;

    case TOKEN_LITERAL:
	out_s((char*)&yytext[0]);
	break;

    case TOKEN_NEWLINE:
	out_s((char*)&yytext[0]);
	break;

    case TOKEN_PREAMBLE:
	out_s((char*)&yytext[0]);
	break;

    case TOKEN_RBRACE:
	out_s((char*)&yytext[0]);
	break;

    case TOKEN_SHARP:
	if (last_token != TOKEN_SPACE)
	    out_space(1);
	out_s((char*)&yytext[0]);
	break;

    case TOKEN_SPACE:
	out_s((char*)&yytext[0]);
	break;

    case TOKEN_STRING:
	out_s((char*)&yytext[0]);
	break;

    case TOKEN_VALUE:
	if (last_token != TOKEN_SPACE)
	    out_space((int)(VALUE_INDENTATION - line_width));
	out_s((char*)&yytext[0]);
	break;

    case TOKEN_UNKNOWN:
    default:
	bad_token();
	break;
    }
    last_token = token;
}


#if NEW_STYLE
static char*
unescape(char *s)	/* convert escape sequences to chars in place */
#else /* K&R style */
static char*
unescape(s)
char *s;
#endif /* NEW_STYLE */
{
    int c;
    int n;
    char *original = s;
    char *t;				/* target string (overwrites s[]) */

    for (t = s ; *s; )
    {
	switch (*s)
	{
	case '\\':
	    ++s;
	    switch (*s)
	    {
	    case '0':
	    case '1':
	    case '2':
	    case '3':
	    case '4':
	    case '5':
	    case '6':
	    case '7':
		for (n = 0, c = digval(*s), ++s; (n < 3) && isoctal(*s);
		    n++, s++)
		    c = 8*c + digval(*s);
		break;

	    case 'a':
		++s;
		c = '\007';		/* '\a' */
		break;

	    case 'b':
		++s;
		c = '\b';
		break;

	    case 'f':
		++s;
		c = '\f';
		break;

	    case 'n':
		++s;
		c = '\n';
		break;

	    case 'r':
		++s;
		c = '\r';
		break;

	    case 't':
		++s;
		c = '\t';
		break;

	    case 'v':
		++s;
		c = '\v';
		break;

	    case 'x':
		for (++s, c = hexval(*s); ishexadecimal(*s); ++s)
		    c = 16*c + hexval(*s);
		break;

	    default:
		c = *s++;
	    }
	    *t++ = c;
	    break;

	default:
	    *t++ = *s++;
	    break;
	}
    }
    *t = '\0';
    return (original);
}
