NSIS/Docs/src/bin/halibut/input.c
2021-09-14 22:42:37 +00:00

1537 lines
40 KiB
C

/*
* input.c: read the source form
*/
#include <stdio.h>
#include <assert.h>
#include <time.h>
#include "halibut.h"
#define TAB_STOP 8 /* for column number tracking */
static void setpos(input * in, char *fname)
{
in->pos.filename = fname;
in->pos.line = 1;
in->pos.col = (in->reportcols ? 1 : -1);
}
static void unget(input * in, int c, filepos * pos)
{
if (in->npushback >= in->pushbacksize)
{
in->pushbacksize = in->npushback + 16;
in->pushback = resize(in->pushback, in->pushbacksize);
}
in->pushback[in->npushback].chr = c;
in->pushback[in->npushback].pos = *pos; /* structure copy */
in->npushback++;
}
/* ---------------------------------------------------------------------- */
/*
* Macro subsystem
*/
typedef struct macro_Tag macro;
struct macro_Tag {
wchar_t *name, *text;
};
struct macrostack_Tag {
macrostack *next;
wchar_t *text;
int ptr, npushback;
filepos pos;
};
static int macrocmp(void *av, void *bv)
{
macro *a = (macro *) av, *b = (macro *) bv;
return ustrcmp(a->name, b->name);
}
static void
macrodef(tree234 * macros, wchar_t * name, wchar_t * text, filepos fpos)
{
macro *m = mknew(macro);
m->name = name;
m->text = text;
if (add234(macros, m) != m)
{
error(err_macroexists, &fpos, name);
sfree(name);
sfree(text);
}
}
static int
macrolookup(tree234 * macros, input * in, wchar_t * name, filepos * pos)
{
macro m, *gotit;
m.name = name;
gotit = find234(macros, &m, NULL);
if (gotit)
{
macrostack *expansion = mknew(macrostack);
expansion->next = in->stack;
expansion->text = gotit->text;
expansion->pos = *pos; /* structure copy */
expansion->ptr = 0;
expansion->npushback = in->npushback;
in->stack = expansion;
return TRUE;
} else
return FALSE;
}
static void macrocleanup(tree234 * macros)
{
int ti;
macro *m;
for (ti = 0; (m = (macro *) index234(macros, ti)) != NULL; ti++)
{
sfree(m->name);
sfree(m->text);
sfree(m);
}
freetree234(macros);
}
/*
* Can return EOF
*/
static int get(input * in, filepos * pos)
{
int pushbackpt = in->stack ? in->stack->npushback : 0;
if (in->npushback > pushbackpt)
{
--in->npushback;
if (pos)
*pos = in->pushback[in->npushback].pos; /* structure copy */
return in->pushback[in->npushback].chr;
} else if (in->stack)
{
wchar_t c = in->stack->text[in->stack->ptr];
if (pos) *pos = in->stack->pos;
if (in->stack->text[++in->stack->ptr] == L'\0')
{
macrostack *tmp = in->stack;
in->stack = tmp->next;
sfree(tmp);
}
return c;
} else if (in->currfp)
{
int c = getc(in->currfp);
if (c == EOF)
{
fclose(in->currfp);
in->currfp = NULL;
}
/* Track line numbers, for error reporting */
if (pos)
*pos = in->pos;
if (in->reportcols)
{
switch (c)
{
case '\t':
in->pos.col = 1 + (in->pos.col + TAB_STOP - 1) % TAB_STOP;
break;
case '\n':
in->pos.col = 1;
in->pos.line++;
break;
default:
in->pos.col++;
break;
}
} else
{
in->pos.col = -1;
if (c == '\n')
in->pos.line++;
}
/* FIXME: do input charmap translation. We should be returning
* Unicode here. */
return c;
} else
return EOF;
}
/*
* Lexical analysis of source files.
*/
typedef struct token_Tag token;
struct token_Tag {
int type;
int cmd, aux;
wchar_t *text;
filepos pos;
};
enum {
tok_eof, /* end of file */
tok_eop, /* end of paragraph */
tok_white, /* whitespace */
tok_word, /* a word or word fragment */
tok_cmd, /* \command */
tok_lbrace, /* { */
tok_rbrace /* } */
};
#define tokiscmd(t,c) ( (t).type == tok_cmd && (t).cmd == (c) )
/* Halibut command keywords. */
enum {
c__invalid, /* invalid command */
c__comment, /* comment command (\#) */
c__escaped, /* escaped character */
c__nop, /* no-op */
c__nbsp, /* nonbreaking space */
c__midparacmd_unixnow,
c_A, /* appendix heading */
c_B, /* bibliography entry */
c_BR, /* bibliography rewrite */
c_C, /* chapter heading */
c_H, /* heading */
c_I, /* invisible index mark */
c_IM, /* index merge/rewrite */
c_K, /* capitalised cross-reference */
c_S, /* aux field is 0, 1, 2, ... */
c_U, /* unnumbered-chapter heading */
c_W, /* Web hyperlink */
c_L, /* Relative/local hyperlink */
c_b, /* bulletted list */
c_c, /* code */
c_cfg, /* configuration directive */
c_copyright, /* copyright statement */
c_cw, /* weak code */
c_date, /* document processing date */
c_define, /* macro definition */
c_e, /* emphasis */
c_i, /* visible index mark */
c_ii, /* uncapitalised visible index mark */
c_k, /* uncapitalised cross-reference */
c_R, /* free text cross-reference */
c_n, /* numbered list */
c_nocite, /* bibliography trickery */
c_preamble, /* document preamble text */
c_q, /* quote marks */
c_rule, /* horizontal rule */
c_title, /* document title */
c_u, /* aux field is char code */
c_versionid /* document RCS id */
};
#define getcmdstyle(c) \
(c) == c_c ? word_Code : \
(c) == c_cw ? word_WeakCode : \
(c) == c_e ? word_Emph : \
word_Normal
/* Perhaps whitespace should be defined in a more Unicode-friendly way? */
#define iswhite(c) ( (c)==32 || (c)==9 || (c)==13 || (c)==10 )
#define isnl(c) ( (c)==10 )
#define isdec(c) ( ((c)>='0'&&(c)<='9') )
#define fromdec(c) ( (c)-'0' )
#define ishex(c) ( ((c)>='0'&&(c)<='9') || ((c)>='A'&&(c)<='F') || ((c)>='a'&&(c)<='f'))
#define fromhex(c) ( (c)<='9' ? (c)-'0' : ((c)&0xDF) - ('A'-10) )
#define iscmd(c) ( ((c)>='0'&&(c)<='9') || ((c)>='A'&&(c)<='Z') || ((c)>='a'&&(c)<='z'))
/*
* Keyword comparison function. Like strcmp, but between a wchar_t *
* and a char *.
*/
static int kwcmp(wchar_t const *p, char const *q)
{
int i;
do
{
i = *p - *q;
}
while (*p++ && *q++ && !i);
return i;
}
/*
* Match a keyword.
*/
static void match_kw(token * tok)
{
/*
* FIXME. The ids are explicit in here so as to allow long-name
* equivalents to the various very short keywords.
*
* This list must be sorted, it's searched using binary search.
*/
static const struct {
char const *name;
int id;
} keywords[] = {
{
"#", c__comment}
, /* comment command (\#) */
{
"-", c__escaped}
, /* nonbreaking hyphen */
{ ".", c__nop },
{
"A", c_A}
, /* appendix heading */
{
"B", c_B}
, /* bibliography entry */
{
"BR", c_BR}
, /* bibliography rewrite */
{
"C", c_C}
, /* chapter heading */
{
"H", c_H}
, /* heading */
{
"I", c_I}
, /* invisible index mark */
{
"IM", c_IM}
, /* index merge/rewrite */
{
"K", c_K}
, /* capitalised cross-reference */
{
"L", c_L}
, /* Relative/local hyperlink */
{
"R", c_R}
, /* free text cross-reference */
{
"U", c_U}
, /* unnumbered-chapter heading */
{
"W", c_W}
, /* Web hyperlink */
{
"\\", c__escaped}
, /* escaped backslash (\\) */
{
"_", c__nbsp}
, /* nonbreaking space (\_) */
{
"b", c_b}
, /* bulletted list */
{
"c", c_c}
, /* code */
{
"cfg", c_cfg}
, /* configuration directive */
{
"copyright", c_copyright}
, /* copyright statement */
{
"cw", c_cw}
, /* weak code */
{
"date", c_date}
, /* document processing date */
{
"define", c_define}
, /* macro definition */
{
"e", c_e}
, /* emphasis */
{ "hackunixnow", c__midparacmd_unixnow },
{
"i", c_i}
, /* visible index mark */
{
"ii", c_ii}
, /* uncapitalised visible index mark */
{
"k", c_k}
, /* uncapitalised cross-reference */
{
"n", c_n}
, /* numbered list */
{
"nocite", c_nocite}
, /* bibliography trickery */
{
"preamble", c_preamble}
, /* document preamble text */
{
"q", c_q}
, /* quote marks */
{
"rule", c_rule}
, /* horizontal rule */
{
"title", c_title}
, /* document title */
{
"versionid", c_versionid}
, /* document RCS id */
{
"{", c__escaped}
, /* escaped lbrace (\{) */
{
"}", c__escaped}
, /* escaped rbrace (\}) */
};
int i, j, k, c;
/*
* Special cases: \S{0,1,2,...} and \uABCD. If the syntax
* doesn't match correctly, we just fall through to the
* binary-search phase.
*/
if (tok->text[0] == 'S')
{
/* We expect numeric characters thereafter. */
wchar_t *p = tok->text + 1;
int n;
if (!*p)
n = 1;
else
{
n = 0;
while (*p && isdec(*p))
{
n = 10 * n + fromdec(*p);
p++;
}
}
if (!*p)
{
tok->cmd = c_S;
tok->aux = n;
return;
}
} else if (tok->text[0] == 'u')
{
/* We expect hex characters thereafter. */
wchar_t *p = tok->text + 1;
int n = 0;
while (*p && ishex(*p))
{
n = 16 * n + fromhex(*p);
p++;
}
if (!*p)
{
tok->cmd = c_u;
tok->aux = n;
return;
}
}
i = -1;
j = sizeof(keywords) / sizeof(*keywords);
while (j - i > 1)
{
k = (i + j) / 2;
c = kwcmp(tok->text, keywords[k].name);
if (c < 0)
j = k;
else if (c > 0)
i = k;
else
{ /* c == 0 */
tok->cmd = keywords[k].id;
return;
}
}
tok->cmd = c__invalid;
}
/*
* Read a token from the input file, in the normal way (`normal' in
* the sense that code paragraphs work a different way).
*/
token get_token(input * in)
{
int c;
int nls;
token ret;
rdstring rs = { 0, 0, NULL };
filepos cpos;
ret.cmd = c__invalid;
ret.aux = FALSE;
ret.text = NULL; /* default */
c = get(in, &cpos);
ret.pos = cpos;
if (iswhite(c))
{ /* tok_white or tok_eop */
nls = 0;
do
{
if (isnl(c))
nls++;
}
while ((c = get(in, &cpos)) != EOF && iswhite(c));
if (c == EOF)
{
ret.type = tok_eof;
return ret;
}
unget(in, c, &cpos);
ret.type = (nls > 1 ? tok_eop : tok_white);
return ret;
} else if (c == EOF)
{ /* tok_eof */
ret.type = tok_eof;
return ret;
} else if (c == '\\')
{ /* tok_cmd */
c = get(in, &cpos);
if (c == '-' || c == '\\' || c == '_' ||
c == '#' || c == '{' || c == '}' || c == '.')
{
/* single-char command */
rdadd(&rs, (wchar_t)c);
} else if (c == 'u')
{
int len = 0;
do
{
rdadd(&rs, (wchar_t)c);
len++;
c = get(in, &cpos);
}
while (ishex(c) && len < 5);
unget(in, c, &cpos);
} else if (iscmd(c))
{
do
{
rdadd(&rs, (wchar_t)c);
c = get(in, &cpos);
}
while (iscmd(c));
unget(in, c, &cpos);
}
/*
* Now match the command against the list of available
* ones.
*/
ret.type = tok_cmd;
ret.text = ustrdup(rs.text);
match_kw(&ret);
sfree(rs.text);
return ret;
} else if (c == '{')
{ /* tok_lbrace */
ret.type = tok_lbrace;
return ret;
} else if (c == '}')
{ /* tok_rbrace */
ret.type = tok_rbrace;
return ret;
} else
{ /* tok_word */
/*
* Read a word: the longest possible contiguous sequence of
* things other than whitespace, backslash, braces and
* hyphen. A hyphen terminates the word but is returned as
* part of it; everything else is pushed back for the next
* token. The `aux' field contains TRUE if the word ends in
* a hyphen.
*/
ret.aux = FALSE; /* assumed for now */
while (1)
{
if (iswhite(c) || c == '{' || c == '}' || c == '\\' || c == EOF)
{
/* Put back the character that caused termination */
unget(in, c, &cpos);
break;
} else
{
rdadd(&rs, (wchar_t)c);
if (c == '-')
{
ret.aux = TRUE;
break; /* hyphen terminates word */
}
}
c = get(in, &cpos);
}
ret.type = tok_word;
ret.text = ustrdup(rs.text);
sfree(rs.text);
return ret;
}
}
/*
* Determine whether the next input character is an open brace (for
* telling code paragraphs from paragraphs which merely start with
* code).
*/
int isbrace(input * in)
{
int c;
filepos cpos;
c = get(in, &cpos);
unget(in, c, &cpos);
return (c == '{');
}
/*
* Read the rest of a line that starts `\c'. Including nothing at
* all (tok_word with empty text).
*/
token get_codepar_token(input * in)
{
int c;
token ret;
rdstring rs = { 0, 0, NULL };
filepos cpos;
ret.type = tok_word;
c = get(in, &cpos); /* expect (and discard) one space */
ret.pos = cpos;
if (c == ' ')
{
c = get(in, &cpos);
ret.pos = cpos;
}
while (!isnl(c) && c != EOF)
{
int c2 = c;
c = get(in, &cpos);
/* Discard \r just before \n. */
if (c2 != 13 || !isnl(c))
rdadd(&rs, (wchar_t)c2);
}
unget(in, c, &cpos);
ret.text = ustrdup(rs.text);
sfree(rs.text);
return ret;
}
/*
* Adds a new word to a linked list
*/
static word *addword(word newword, word *** hptrptr)
{
word *mnewword;
if (!hptrptr)
return NULL;
mnewword = mknew(word);
*mnewword = newword; /* structure copy */
mnewword->next = NULL;
**hptrptr = mnewword;
*hptrptr = &mnewword->next;
return mnewword;
}
/*
* Adds a new paragraph to a linked list
*/
static paragraph *addpara(paragraph newpara, paragraph *** hptrptr)
{
paragraph *mnewpara = mknew(paragraph);
*mnewpara = newpara; /* structure copy */
mnewpara->next = NULL;
**hptrptr = mnewpara;
*hptrptr = &mnewpara->next;
return mnewpara;
}
/*
* Destructor before token is reassigned; should catch most memory
* leaks
*/
#define dtor(t) ( sfree(t.text) )
static int is_special_midpara_cmd(token*t)
{
return tokiscmd(*t, c__midparacmd_unixnow);
}
static int handle_special_midpara_cmd(token*t, rdstring*rs, paragraph ***hptrptr)
{
wchar_t wbuf[100];
paragraph par;
if (t->type != tok_cmd) return 0;
initpara(par);
par.fpos = t->pos;
switch(t->cmd)
{
case c__midparacmd_unixnow:
ultou(getutcunixtime(), wbuf);
rdadds(rs, wbuf);
return 1;
}
return 0;
}
#define stack_item_push(stck__, sitype__) do { \
struct stack_item *si__ = mknew(struct stack_item); \
si__->type = sitype__; \
stk_push((stck__), si__); \
} while(!__LINE__)
/*
* Reads a single file (ie until get() returns EOF)
*/
static void read_file(paragraph *** ret, input * in, indexdata * idx, tree234 *macros)
{
token t;
paragraph par;
word wd, **whptr, **idximplicit;
wchar_t utext[2], *wdtext;
int style, spcstyle, tmpstyle;
int already;
int iswhite, seenwhite;
int type;
struct stack_item {
enum {
stack_nop = 0, /* do nothing (for error recovery) */
stack_ualt = 1, /* \u alternative */
stack_style = 2, /* \e, \c, \cw */
stack_idx = 4, /* \I, \i, \ii */
stack_hyper = 8, /* \W */
stack_quote = 16, /* \q */
} type;
word **whptr; /* to restore from \u alternatives */
word **idximplicit; /* to restore from \u alternatives */
} *sitem;
stack parsestk;
word *indexword=NULL, *uword=NULL, *iword=NULL;
word *idxwordlist;
rdstring indexstr;
int index_downcase=0, index_visible=0, indexing=0;
const rdstring nullrs = { 0, 0, NULL };
wchar_t uchr;
t.text = NULL;
already = FALSE;
/*
* Loop on each paragraph.
*/
while (1)
{
int start_cmd = c__invalid;
par.words = NULL;
par.keyword = NULL;
whptr = &par.words;
/*
* Get a token.
*/
if (!already)
{
dtor(t), t = get_token(in);
}
already = FALSE;
if (t.type == tok_eof)
break;
/*
* Parse code paragraphs separately.
*/
if (t.type == tok_cmd && t.cmd == c_c && !isbrace(in))
{
par.type = para_Code;
par.fpos = t.pos;
while (1)
{
dtor(t), t = get_codepar_token(in);
wd.type = word_WeakCode;
wd.breaks = FALSE; /* shouldn't need this... */
wd.text = ustrdup(t.text);
wd.alt = NULL;
wd.fpos = t.pos;
addword(wd, &whptr);
dtor(t), t = get_token(in);
if (t.type == tok_white)
{
/*
* The newline after a code-paragraph line
*/
dtor(t), t = get_token(in);
}
if (t.type == tok_eop || t.type == tok_eof)
break;
else if (t.type != tok_cmd || t.cmd != c_c)
{
error(err_brokencodepara, &t.pos);
addpara(par, ret);
while (t.type != tok_eop) /* error recovery: */
dtor(t), t = get_token(in); /* eat rest of paragraph */
goto codeparabroken; /* ick, but such is life */
}
}
addpara(par, ret);
codeparabroken:
continue;
}
while (t.type == tok_cmd && macrolookup(macros, in, t.text, &t.pos))
{
dtor(t), t = get_token(in);
}
/*
* This token begins a paragraph. See if it's one of the
* special commands that define a paragraph type.
*
* (note that \# is special in a way, and \nocite takes no
* text)
*/
par.type = para_Normal;
if (t.type == tok_cmd)
{
int needkw=0;
int is_macro = FALSE;
par.fpos = t.pos;
switch (t.cmd)
{
default:
needkw = -1;
break;
case c__invalid:
error(err_badparatype, t.text, &t.pos);
needkw = 4;
break;
case c__comment:
if (isbrace(in))
{
needkw = -1; // Upstream 56b96573 (r8312)
break; /* `\#{': isn't a comment para */
}
do
{
dtor(t), t = get_token(in);
}
while (t.type != tok_eop && t.type != tok_eof);
continue; /* next paragraph */
/*
* `needkw' values:
*
* 1 -- exactly one keyword
* 2 -- at least one keyword
* 4 -- any number of keywords including zero
* 8 -- at least one keyword and then nothing else
* 16 -- nothing at all! no keywords, no body
* 32 -- no keywords at all
*/
case c_A:
needkw = 2;
par.type = para_Appendix;
break;
case c_B:
needkw = 2;
par.type = para_Biblio;
break;
case c_BR:
needkw = 1;
par.type = para_BR;
start_cmd = c_BR;
break;
case c_C:
needkw = 2;
par.type = para_Chapter;
break;
case c_H:
needkw = 2;
par.type = para_Heading;
par.aux = 0;
break;
case c_IM:
needkw = 2;
par.type = para_IM;
start_cmd = c_IM;
break;
case c_S:
needkw = 2;
par.type = para_Subsect;
par.aux = t.aux;
break;
case c_U:
needkw = 32;
par.type = para_UnnumberedChapter;
break;
/* For \b and \n the keyword is optional */
case c_b:
needkw = 4;
par.type = para_Bullet;
break;
case c_n:
needkw = 4;
par.type = para_NumberedList;
break;
case c_cfg:
needkw = 8;
par.type = para_Config;
start_cmd = c_cfg;
break;
case c_copyright:
needkw = 32;
par.type = para_Copyright;
break;
case c_define:
is_macro = TRUE;
needkw = 1;
break;
/* For \nocite the keyword is _everything_ */
case c_nocite:
needkw = 8;
par.type = para_NoCite;
break;
case c_preamble:
needkw = 32;
par.type = para_Preamble;
break;
case c_rule:
needkw = 16;
par.type = para_Rule;
break;
case c_title:
needkw = 32;
par.type = para_Title;
break;
case c_versionid:
needkw = 32;
par.type = para_VersionID;
break;
}
if (needkw > 0)
{
rdstring rs = { 0, 0, NULL };
int nkeys = 0;
filepos fp;
/* Get keywords. */
dtor(t), t = get_token(in);
fp = t.pos;
while (t.type == tok_lbrace)
{
/* This is a keyword. */
nkeys++;
/* FIXME: there will be bugs if anyone specifies an
* empty keyword (\foo{}), so trap this case. */
while (dtor(t), t = get_token(in),
t.type == tok_word ||
t.type == tok_white ||
(t.type == tok_cmd && t.cmd == c__nbsp) ||
(t.type == tok_cmd && t.cmd == c__escaped) ||
/* TODO: Merge from upstream?: (t.type == tok_cmd && t.cmd == c_u) || */
is_special_midpara_cmd(&t))
{
if (t.type == tok_white ||
(t.type == tok_cmd && t.cmd == c__nbsp))
rdadd(&rs, ' ');
else if (!handle_special_midpara_cmd(&t, &rs, ret))
rdadds(&rs, t.text);
}
if (t.type != tok_rbrace)
{
error(err_kwunclosed, &t.pos);
continue;
}
rdadd(&rs, 0); /* add string terminator */
dtor(t), t = get_token(in); /* eat right brace */
}
rdadd(&rs, 0); /* add string terminator */
/* See whether we have the right number of keywords. */
if ((needkw & 48) && nkeys > 0)
error(err_kwillegal, &fp);
if ((needkw & 11) && nkeys == 0)
error(err_kwexpected, &fp);
if ((needkw & 5) && nkeys > 1)
error(err_kwtoomany, &fp);
if (is_macro)
{
/*
* Macro definition. Get the rest of the line
* as a code-paragraph token, repeatedly until
* there's nothing more left of it. Separate
* with newlines.
*/
rdstring macrotext = { 0, 0, NULL };
while (1)
{
dtor(t), t = get_codepar_token(in);
if (macrotext.pos > 0)
rdadd(&macrotext, L'\n');
rdadds(&macrotext, t.text);
dtor(t), t = get_token(in);
if (t.type == tok_eop || t.type == tok_eof)
break;
}
macrodef(macros, rs.text, macrotext.text, fp);
continue; /* next paragraph */
}
par.keyword = rdtrim(&rs);
/* Move to EOP in case of needkw==8 or 16 (no body) */
if (needkw & 24)
{
/* We allow whitespace even when we expect no para body */
while (t.type == tok_white)
dtor(t), t = get_token(in);
if (t.type != tok_eop && t.type != tok_eof &&
(start_cmd == c__invalid ||
t.type != tok_cmd || t.cmd != start_cmd))
{
error(err_bodyillegal, &t.pos);
/* Error recovery: eat the rest of the paragraph */
while (t.type != tok_eop && t.type != tok_eof &&
(start_cmd == c__invalid ||
t.type != tok_cmd || t.cmd != start_cmd))
dtor(t), t = get_token(in);
}
if (t.type == tok_cmd)
already = TRUE; /* inhibit get_token at top of loop */
addpara(par, ret);
continue; /* next paragraph */
}
}
}
/*
* Now read the actual paragraph, word by word, adding to
* the paragraph list.
*
* Mid-paragraph commands:
*
* \K \k
* \c \cw
* \e
* \i \ii
* \I
* \u
* \W
* \date
* \\ \{ \}
*/
parsestk = stk_new();
style = word_Normal;
spcstyle = word_WhiteSpace;
indexing = FALSE;
seenwhite = TRUE;
while (t.type != tok_eop && t.type != tok_eof)
{
iswhite = FALSE;
already = FALSE;
/* Handle implicit paragraph breaks after \IM, \BR etc */
if (start_cmd != c__invalid &&
t.type == tok_cmd && t.cmd == start_cmd)
{
already = TRUE; /* inhibit get_token at top of loop */
break;
}
if (t.type == tok_cmd && t.cmd == c__nop) {
dtor(t), t = get_token(in);
continue; /* do nothing! */
}
if (t.type == tok_cmd && t.cmd == c__escaped)
{
t.type = tok_word; /* nice and simple */
t.aux = 0; /* even if `\-' - nonbreaking! */
}
if (t.type == tok_cmd && t.cmd == c__nbsp)
{
t.type = tok_word; /* nice and simple */
sfree(t.text);
t.text = ustrdup(L" "); /* text is ` ' not `_' */
t.aux = 0; /* (nonbreaking) */
}
switch (t.type)
{
case tok_white:
if (whptr == &par.words)
break; /* strip whitespace at start of para */
wd.text = NULL;
wd.type = spcstyle;
wd.alt = NULL;
wd.aux = 0;
wd.fpos = t.pos;
wd.breaks = FALSE;
/*
* Inhibit use of whitespace if it's (probably the
* newline) before a repeat \IM / \BR type
* directive.
*/
if (start_cmd != c__invalid)
{
dtor(t), t = get_token(in);
already = TRUE;
if (t.type == tok_cmd && t.cmd == start_cmd)
break;
}
if (indexing)
rdadd(&indexstr, ' ');
if (!indexing || index_visible)
addword(wd, &whptr);
if (indexing)
addword(wd, &idximplicit);
iswhite = TRUE;
break;
case tok_word:
if (indexing)
rdadds(&indexstr, t.text);
wd.type = style;
wd.alt = NULL;
wd.aux = 0;
wd.fpos = t.pos;
wd.breaks = t.aux;
if (!indexing || index_visible)
{
wd.text = ustrdup(t.text);
addword(wd, &whptr);
}
if (indexing)
{
wd.text = ustrdup(t.text);
addword(wd, &idximplicit);
}
break;
case tok_lbrace:
error(err_unexbrace, &t.pos);
/* Error recovery: push nop */
sitem = mknew(struct stack_item);
sitem->type = stack_nop;
stk_push(parsestk, sitem);
break;
case tok_rbrace:
sitem = stk_pop(parsestk);
if (!sitem)
error(err_unexbrace, &t.pos);
else
{
if (sitem->type & stack_ualt)
{
whptr = sitem->whptr;
idximplicit = sitem->idximplicit;
}
if (sitem->type & stack_style)
{
style = word_Normal;
spcstyle = word_WhiteSpace;
}
if (sitem->type & stack_idx ) {
indexword->text = ustrdup(indexstr.text);
if (index_downcase)
ustrlow(indexword->text);
indexing = FALSE;
rdadd(&indexstr, L'\0');
index_merge(idx, FALSE, indexstr.text, idxwordlist);
sfree(indexstr.text);
}
if (sitem->type & stack_hyper)
{
wd.text = NULL;
wd.type = word_HyperEnd;
wd.alt = NULL;
wd.aux = 0;
wd.fpos = t.pos;
wd.breaks = FALSE;
if (!indexing || index_visible)
addword(wd, &whptr);
if (indexing)
addword(wd, &idximplicit);
}
if (sitem->type & stack_quote)
{
wd.text = NULL;
wd.type = toquotestyle(style);
wd.alt = NULL;
wd.aux = quote_Close;
wd.fpos = t.pos;
wd.breaks = FALSE;
if (!indexing || index_visible)
addword(wd, &whptr);
if (indexing)
{
rdadd(&indexstr, L'"');
addword(wd, &idximplicit);
}
}
}
sfree(sitem);
break;
case tok_cmd:
switch (t.cmd)
{
case c__comment:
/*
* In-paragraph comment: \#{ balanced braces }
*
* Anything goes here; even tok_eop. We should
* eat whitespace after the close brace _if_
* there was whitespace before the \#.
*/
dtor(t), t = get_token(in);
if (t.type != tok_lbrace)
{
error(err_explbr, &t.pos);
} else
{
int braces = 1;
while (braces > 0)
{
dtor(t), t = get_token(in);
if (t.type == tok_lbrace)
braces++;
else if (t.type == tok_rbrace)
braces--;
else if (t.type == tok_eof)
{
error(err_commenteof, &t.pos);
break;
}
}
}
if (seenwhite)
{
already = TRUE;
dtor(t), t = get_token(in);
if (t.type == tok_white)
{
iswhite = TRUE;
already = FALSE;
}
}
break;
case c_q:
dtor(t), t = get_token(in);
if (t.type != tok_lbrace)
{
error(err_explbr, &t.pos);
} else
{
wd.text = NULL;
wd.type = toquotestyle(style);
wd.alt = NULL;
wd.aux = quote_Open;
wd.fpos = t.pos;
wd.breaks = FALSE;
if (!indexing || index_visible)
addword(wd, &whptr);
if (indexing)
{
rdadd(&indexstr, L'"');
addword(wd, &idximplicit);
}
sitem = mknew(struct stack_item);
sitem->type = stack_quote;
stk_push(parsestk, sitem);
}
break;
case c_K:
case c_k:
case c_R:
case c_W:
case c_L:
case c_date:
/*
* Keyword, hyperlink, or \date. We expect a
* left brace, some text, and then a right
* brace. No nesting; no arguments.
*/
wd.fpos = t.pos;
wd.breaks = FALSE;
if (t.cmd == c_K)
wd.type = word_UpperXref;
else if (t.cmd == c_k)
wd.type = word_LowerXref;
else if (t.cmd == c_R)
wd.type = word_FreeTextXref;
else if (t.cmd == c_W)
wd.type = word_HyperLink;
else if (t.cmd == c_L)
wd.type = word_LocalHyperLink;
else
wd.type = word_Normal;
dtor(t), t = get_token(in);
if (t.type != tok_lbrace)
{
if (wd.type == word_Normal)
{
time_t thetime = time(NULL);
struct tm *broken = localtime(&thetime);
already = TRUE;
wdtext = ustrftime(NULL, broken);
wd.type = style;
} else
{
error(err_explbr, &t.pos);
wdtext = NULL;
}
} else
{
rdstring rs = { 0, 0, NULL };
while (dtor(t), t = get_token(in),
t.type == tok_word || t.type == tok_white)
{
if (t.type == tok_white)
rdadd(&rs, ' ');
else
rdadds(&rs, t.text);
}
if (wd.type == word_Normal)
{
time_t thetime = time(NULL);
struct tm *broken = localtime(&thetime);
wdtext = ustrftime(rs.text, broken);
wd.type = style;
} else
{
wdtext = ustrdup(rs.text);
}
sfree(rs.text);
if (t.type != tok_rbrace)
{
error(err_kwexprbr, &t.pos);
}
}
wd.alt = NULL;
wd.aux = 0;
if (!indexing || index_visible)
{
wd.text = ustrdup(wdtext);
addword(wd, &whptr);
}
if (indexing)
{
wd.text = ustrdup(wdtext);
addword(wd, &idximplicit);
}
sfree(wdtext);
if (wd.type == word_FreeTextXref || wd.type == word_HyperLink || wd.type == word_LocalHyperLink)
{
/*
* Hyperlinks are different: they then
* expect another left brace, to begin
* delimiting the text marked by the link.
*/
dtor(t), t = get_token(in);
/*
* Special cases: \W{}\c, \W{}\e, \W{}\cw
*/
sitem = mknew(struct stack_item);
sitem->type = stack_hyper;
if (t.type == tok_cmd && (tmpstyle = getcmdstyle(t.cmd)))
{
if (style != word_Normal)
error(err_nestedstyles, &t.pos);
else
{
style = tmpstyle;
spcstyle = tospacestyle(style);
sitem->type |= stack_style;
}
dtor(t), t = get_token(in);
}
if (t.type != tok_lbrace)
{
error(err_explbr, &t.pos);
sfree(sitem);
} else
{
stk_push(parsestk, sitem);
}
}
break;
case c_c:
case c_cw:
case c_e:
type = t.cmd;
if (style != word_Normal)
{
error(err_nestedstyles, &t.pos);
/* Error recovery: eat lbrace, push nop. */
dtor(t), t = get_token(in);
sitem = mknew(struct stack_item);
sitem->type = stack_nop;
stk_push(parsestk, sitem);
}
dtor(t), t = get_token(in);
if (t.type != tok_lbrace)
{
error(err_explbr, &t.pos);
} else
{
style = getcmdstyle(type);
spcstyle = tospacestyle(style);
sitem = mknew(struct stack_item);
sitem->type = stack_style;
stk_push(parsestk, sitem);
}
break;
case c_i:
case c_ii:
case c_I:
type = t.cmd;
if (indexing)
{
error(err_nestedindex, &t.pos);
/* Error recovery: eat lbrace, push nop. */
dtor(t), t = get_token(in);
sitem = mknew(struct stack_item);
sitem->type = stack_nop;
stk_push(parsestk, sitem);
}
sitem = mknew(struct stack_item);
sitem->type = stack_idx;
dtor(t), t = get_token(in);
/*
* Special cases: \i\c, \i\e, \i\cw
*/
wd.fpos = t.pos;
if (t.type == tok_cmd && (tmpstyle = getcmdstyle(t.cmd)))
{
if (style != word_Normal)
error(err_nestedstyles, &t.pos);
else
{
style = tmpstyle;
spcstyle = tospacestyle(style);
sitem->type |= stack_style;
}
dtor(t), t = get_token(in);
}
if (t.type != tok_lbrace)
{
sfree(sitem);
error(err_explbr, &t.pos);
} else
{
/* Add an index-reference word with no text as yet */
wd.type = word_IndexRef;
wd.text = NULL;
wd.alt = NULL;
wd.aux = 0;
wd.breaks = FALSE;
indexword = addword(wd, &whptr);
/* Set up a rdstring to read the index text */
indexstr = nullrs;
/* Flags so that we do the Right Things with text */
index_visible = (type != c_I);
index_downcase = (type == c_ii);
indexing = TRUE;
idxwordlist = NULL;
idximplicit = &idxwordlist;
/* Stack item to close the indexing on exit */
stk_push(parsestk, sitem);
}
break;
case c_u:
uchr = t.aux;
utext[0] = uchr;
utext[1] = 0;
wd.type = style;
wd.breaks = FALSE;
wd.alt = NULL;
wd.aux = 0;
wd.fpos = t.pos;
if (!indexing || index_visible)
{
wd.text = ustrdup(utext);
uword = addword(wd, &whptr);
} else
uword = NULL;
if (indexing)
{
wd.text = ustrdup(utext);
iword = addword(wd, &idximplicit);
} else
iword = NULL;
dtor(t), t = get_token(in);
if (t.type == tok_lbrace)
{
/*
* \u with a left brace. Until the brace
* closes, all further words go on a
* sidetrack from the main thread of the
* paragraph.
*/
sitem = mknew(struct stack_item);
sitem->type = stack_ualt;
sitem->whptr = whptr;
sitem->idximplicit = idximplicit;
stk_push(parsestk, sitem);
whptr = uword ? &uword->alt : NULL;
idximplicit = iword ? &iword->alt : NULL;
} else
{
if (indexing)
rdadd(&indexstr, uchr);
already = TRUE;
}
break;
default:
if (!macrolookup(macros, in, t.text, &t.pos))
error(err_badmidcmd, t.text, &t.pos);
break;
}
}
if (!already)
dtor(t), t = get_token(in);
seenwhite = iswhite;
}
/* Check the stack is empty */
if (NULL != (sitem = stk_pop(parsestk)))
{
do
{
sfree(sitem);
sitem = stk_pop(parsestk);
}
while (sitem);
error(err_missingrbrace, &t.pos);
}
stk_free(parsestk);
addpara(par, ret);
}
/*
* We break to here rather than returning, because otherwise
* this cleanup doesn't happen.
*/
dtor(t);
}
paragraph *read_input(input * in, indexdata * idx)
{
paragraph *head = NULL;
paragraph **hptr = &head;
tree234 *macros;
macros = newtree234(macrocmp);
while (in->currindex < in->nfiles)
{
in->currfp = fopen(in->filenames[in->currindex], "r");
if (in->currfp)
{
setpos(in, in->filenames[in->currindex]);
read_file(&hptr, in, idx, macros);
}
in->currindex++;
}
macrocleanup(macros);
return head;
}