From b23696e64c01daf4021dd95dcb24d5b66734a9a3 Mon Sep 17 00:00:00 2001 From: kichik Date: Fri, 1 Nov 2002 18:22:40 +0000 Subject: [PATCH] Hacked up halibut source code git-svn-id: https://svn.code.sf.net/p/nsis/code/NSIS/trunk@1534 212acab6-be3b-0410-9dea-997c60f758d6 --- Docs/src/bin/halibut/LICENCE | 21 + Docs/src/bin/halibut/biblio.c | 127 ++ Docs/src/bin/halibut/bk_text.c | 751 +++++++++ Docs/src/bin/halibut/bk_whlp.c | 711 +++++++++ Docs/src/bin/halibut/bk_xhtml.c | 1933 +++++++++++++++++++++++ Docs/src/bin/halibut/contents.c | 258 ++++ Docs/src/bin/halibut/error.c | 227 +++ Docs/src/bin/halibut/halibut.h | 438 ++++++ Docs/src/bin/halibut/help.c | 38 + Docs/src/bin/halibut/index.c | 278 ++++ Docs/src/bin/halibut/input.c | 1516 ++++++++++++++++++ Docs/src/bin/halibut/keywords.c | 179 +++ Docs/src/bin/halibut/licence.c | 18 + Docs/src/bin/halibut/main.c | 343 +++++ Docs/src/bin/halibut/makefile | 22 + Docs/src/bin/halibut/malloc.c | 173 +++ Docs/src/bin/halibut/misc.c | 377 +++++ Docs/src/bin/halibut/style.c | 7 + Docs/src/bin/halibut/tree234.c | 2555 +++++++++++++++++++++++++++++++ Docs/src/bin/halibut/tree234.h | 203 +++ Docs/src/bin/halibut/ustring.c | 216 +++ Docs/src/bin/halibut/version.c | 13 + Docs/src/bin/halibut/winhelp.c | 2272 +++++++++++++++++++++++++++ Docs/src/bin/halibut/winhelp.h | 173 +++ 24 files changed, 12849 insertions(+) create mode 100644 Docs/src/bin/halibut/LICENCE create mode 100644 Docs/src/bin/halibut/biblio.c create mode 100644 Docs/src/bin/halibut/bk_text.c create mode 100644 Docs/src/bin/halibut/bk_whlp.c create mode 100644 Docs/src/bin/halibut/bk_xhtml.c create mode 100644 Docs/src/bin/halibut/contents.c create mode 100644 Docs/src/bin/halibut/error.c create mode 100644 Docs/src/bin/halibut/halibut.h create mode 100644 Docs/src/bin/halibut/help.c create mode 100644 Docs/src/bin/halibut/index.c create mode 100644 Docs/src/bin/halibut/input.c create mode 100644 Docs/src/bin/halibut/keywords.c create mode 100644 Docs/src/bin/halibut/licence.c create mode 100644 Docs/src/bin/halibut/main.c create mode 100644 Docs/src/bin/halibut/makefile create mode 100644 Docs/src/bin/halibut/malloc.c create mode 100644 Docs/src/bin/halibut/misc.c create mode 100644 Docs/src/bin/halibut/style.c create mode 100644 Docs/src/bin/halibut/tree234.c create mode 100644 Docs/src/bin/halibut/tree234.h create mode 100644 Docs/src/bin/halibut/ustring.c create mode 100644 Docs/src/bin/halibut/version.c create mode 100644 Docs/src/bin/halibut/winhelp.c create mode 100644 Docs/src/bin/halibut/winhelp.h diff --git a/Docs/src/bin/halibut/LICENCE b/Docs/src/bin/halibut/LICENCE new file mode 100644 index 00000000..b1bfa8bc --- /dev/null +++ b/Docs/src/bin/halibut/LICENCE @@ -0,0 +1,21 @@ +Halibut is copyright (c) 1999-2001 Simon Tatham and James Aylett. + +Permission is hereby granted, free of charge, to any person +obtaining a copy of this software and associated documentation files +(the "Software"), to deal in the Software without restriction, +including without limitation the rights to use, copy, modify, merge, +publish, distribute, sublicense, and/or sell copies of the Software, +and to permit persons to whom the Software is furnished to do so, +subject to the following conditions: + +The above copyright notice and this permission notice shall be +included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS +BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN +ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/Docs/src/bin/halibut/biblio.c b/Docs/src/bin/halibut/biblio.c new file mode 100644 index 00000000..a1a33bd7 --- /dev/null +++ b/Docs/src/bin/halibut/biblio.c @@ -0,0 +1,127 @@ +/* + * biblio.c: process the bibliography + */ + +#include +#include "halibut.h" + +static wchar_t * +gentext (int num) +{ + wchar_t text[22]; + wchar_t *p = text + sizeof (text); + *--p = L'\0'; + *--p = L']'; + while (num != 0) + { + assert (p > text); + *--p = L"0123456789"[num % 10]; + num /= 10; + } + assert (p > text); + *--p = L'['; + return ustrdup (p); +} + +static void +cite_biblio (keywordlist * kl, wchar_t * key, filepos fpos) +{ + keyword *kw = kw_lookup (kl, key); + if (!kw) + error (err_nosuchkw, &fpos, key); + else + { + /* + * We've found a \k reference. If it's a + * bibliography entry ... + */ + if (kw->para->type == para_Biblio) + { + /* + * ... then mark the paragraph as cited. + */ + kw->para->type = para_BiblioCited; + } + } +} + +/* + * Make a pass through the source form, generating citation formats + * for bibliography entries and also marking which bibliography + * entries are actually cited (or \nocite-ed). + */ + +void +gen_citations (paragraph * source, keywordlist * kl) +{ + paragraph *para; + int bibnum = 0; + + for (para = source; para; para = para->next) + { + word *ptr; + + /* + * \BR and \nocite paragraphs get special processing here. + */ + if (para->type == para_BR) + { + keyword *kw = kw_lookup (kl, para->keyword); + if (!kw) + { + error (err_nosuchkw, ¶->fpos, para->keyword); + } + else if (kw->text) + { + error (err_multiBR, ¶->fpos, para->keyword); + } + else + { + kw->text = dup_word_list (para->words); + } + } + else if (para->type == para_NoCite) + { + wchar_t *wp = para->keyword; + while (*wp) + { + cite_biblio (kl, wp, para->fpos); + wp = uadv (wp); + } + } + + /* + * Scan for keyword references. + */ + for (ptr = para->words; ptr; ptr = ptr->next) + { + if (ptr->type == word_UpperXref || ptr->type == word_LowerXref) + cite_biblio (kl, ptr->text, ptr->fpos); + } + } + + /* + * We're now almost done; all that remains is to scan through + * the cited bibliography entries and invent default citation + * texts for the ones that don't already have explicitly + * provided \BR text. + */ + for (para = source; para; para = para->next) + { + if (para->type == para_BiblioCited) + { + keyword *kw = kw_lookup (kl, para->keyword); + assert (kw != NULL); + if (!kw->text) + { + word *wd = smalloc (sizeof (word)); + wd->text = gentext (++bibnum); + wd->type = word_Normal; + wd->alt = NULL; + wd->next = NULL; + kw->text = wd; + } + para->kwtext = kw->text; + } + } +} diff --git a/Docs/src/bin/halibut/bk_text.c b/Docs/src/bin/halibut/bk_text.c new file mode 100644 index 00000000..e100a63e --- /dev/null +++ b/Docs/src/bin/halibut/bk_text.c @@ -0,0 +1,751 @@ +/* + * text backend for Halibut + */ + +#include +#include +#include +#include "halibut.h" + +typedef enum +{ LEFT, LEFTPLUS, CENTRE } +alignment; +typedef struct +{ + alignment align; + int just_numbers; + wchar_t underline; + wchar_t *number_suffix; +} +alignstruct; + +typedef struct +{ + int indent, indent_code; + int listindentbefore, listindentafter; + int width; + alignstruct atitle, achapter, *asect; + int nasect; + int include_version_id; + int indent_preambles; + word bullet; +} +textconfig; + +static int text_convert (wchar_t *, char **); + +static void text_heading (FILE *, word *, word *, word *, alignstruct, int, + int); +static void text_rule (FILE *, int, int); +static void text_para (FILE *, word *, char *, word *, int, int, int); +static void text_codepara (FILE *, word *, int, int); +static void text_versionid (FILE *, word *); + +static alignment +utoalign (wchar_t * p) +{ + if (!ustricmp (p, L"centre") || !ustricmp (p, L"center")) + return CENTRE; + if (!ustricmp (p, L"leftplus")) + return LEFTPLUS; + return LEFT; +} + +static textconfig +text_configure (paragraph * source) +{ + textconfig ret; + + /* + * Non-negotiables. + */ + ret.bullet.next = NULL; + ret.bullet.alt = NULL; + ret.bullet.type = word_Normal; + ret.atitle.just_numbers = FALSE; /* ignored */ + + /* + * Defaults. + */ + ret.indent = 7; + ret.indent_code = 2; + ret.listindentbefore = 1; + ret.listindentafter = 3; + ret.width = 68; + ret.atitle.align = CENTRE; + ret.atitle.underline = L'='; + ret.achapter.align = LEFT; + ret.achapter.just_numbers = FALSE; + ret.achapter.number_suffix = ustrdup (L": "); + ret.achapter.underline = L'-'; + ret.nasect = 1; + ret.asect = mknewa (alignstruct, ret.nasect); + ret.asect[0].align = LEFTPLUS; + ret.asect[0].just_numbers = TRUE; + ret.asect[0].number_suffix = ustrdup (L" "); + ret.asect[0].underline = L'\0'; + ret.include_version_id = TRUE; + ret.indent_preambles = FALSE; + ret.bullet.text = ustrdup (L"-"); + + for (; source; source = source->next) + { + if (source->type == para_Config) + { + if (!ustricmp (source->keyword, L"text-indent")) + { + ret.indent = utoi (uadv (source->keyword)); + } + else if (!ustricmp (source->keyword, L"text-indent-code")) + { + ret.indent_code = utoi (uadv (source->keyword)); + } + else if (!ustricmp (source->keyword, L"text-width")) + { + ret.width = utoi (uadv (source->keyword)); + } + else if (!ustricmp (source->keyword, L"text-list-indent")) + { + ret.listindentbefore = utoi (uadv (source->keyword)); + } + else if (!ustricmp (source->keyword, L"text-listitem-indent")) + { + ret.listindentafter = utoi (uadv (source->keyword)); + } + else if (!ustricmp (source->keyword, L"text-chapter-align")) + { + ret.achapter.align = utoalign (uadv (source->keyword)); + } + else if (!ustricmp (source->keyword, L"text-chapter-underline")) + { + ret.achapter.underline = *uadv (source->keyword); + } + else if (!ustricmp (source->keyword, L"text-chapter-numeric")) + { + ret.achapter.just_numbers = utob (uadv (source->keyword)); + } + else if (!ustricmp (source->keyword, L"text-chapter-suffix")) + { + ret.achapter.number_suffix = ustrdup (uadv (source->keyword)); + } + else if (!ustricmp (source->keyword, L"text-section-align")) + { + wchar_t *p = uadv (source->keyword); + int n = 0; + if (uisdigit (*p)) + { + n = utoi (p); + p = uadv (p); + } + if (n >= ret.nasect) + { + int i; + ret.asect = resize (ret.asect, n + 1); + for (i = ret.nasect; i <= n; i++) + ret.asect[i] = ret.asect[ret.nasect - 1]; + ret.nasect = n + 1; + } + ret.asect[n].align = utoalign (p); + } + else if (!ustricmp (source->keyword, L"text-section-underline")) + { + wchar_t *p = uadv (source->keyword); + int n = 0; + if (uisdigit (*p)) + { + n = utoi (p); + p = uadv (p); + } + if (n >= ret.nasect) + { + int i; + ret.asect = resize (ret.asect, n + 1); + for (i = ret.nasect; i <= n; i++) + ret.asect[i] = ret.asect[ret.nasect - 1]; + ret.nasect = n + 1; + } + ret.asect[n].underline = *p; + } + else if (!ustricmp (source->keyword, L"text-section-numeric")) + { + wchar_t *p = uadv (source->keyword); + int n = 0; + if (uisdigit (*p)) + { + n = utoi (p); + p = uadv (p); + } + if (n >= ret.nasect) + { + int i; + ret.asect = resize (ret.asect, n + 1); + for (i = ret.nasect; i <= n; i++) + ret.asect[i] = ret.asect[ret.nasect - 1]; + ret.nasect = n + 1; + } + ret.asect[n].just_numbers = utob (p); + } + else if (!ustricmp (source->keyword, L"text-section-suffix")) + { + wchar_t *p = uadv (source->keyword); + int n = 0; + if (uisdigit (*p)) + { + n = utoi (p); + p = uadv (p); + } + if (n >= ret.nasect) + { + int i; + ret.asect = resize (ret.asect, n + 1); + for (i = ret.nasect; i <= n; i++) + ret.asect[i] = ret.asect[ret.nasect - 1]; + ret.nasect = n + 1; + } + ret.asect[n].number_suffix = ustrdup (p); + } + else if (!ustricmp (source->keyword, L"text-title-align")) + { + ret.atitle.align = utoalign (uadv (source->keyword)); + } + else if (!ustricmp (source->keyword, L"text-title-underline")) + { + ret.atitle.underline = *uadv (source->keyword); + } + else if (!ustricmp (source->keyword, L"text-versionid")) + { + ret.include_version_id = utob (uadv (source->keyword)); + } + else if (!ustricmp (source->keyword, L"text-indent-preamble")) + { + ret.indent_preambles = utob (uadv (source->keyword)); + } + else if (!ustricmp (source->keyword, L"text-bullet")) + { + ret.bullet.text = uadv (source->keyword); + } + } + } + + return ret; +} + +void +text_backend (paragraph * sourceform, keywordlist * keywords, indexdata * idx) +{ + paragraph *p; + textconfig conf; + word *prefix, *body, *wp; + word spaceword; + FILE *fp; + char *prefixextra; + int indentb, indenta; + + IGNORE (keywords); /* we don't happen to need this */ + IGNORE (idx); /* or this */ + + conf = text_configure (sourceform); + + /* + * Determine the output file name, and open the output file + * + * FIXME: want configurable output file names here. For the + * moment, we'll just call it `output.txt'. + */ + fp = fopen ("output.txt", "w"); + if (!fp) + { + error (err_cantopenw, "output.txt"); + return; + } + + /* Do the title */ + for (p = sourceform; p; p = p->next) + if (p->type == para_Title) + text_heading (fp, NULL, NULL, p->words, + conf.atitle, conf.indent, conf.width); + + /* Do the preamble and copyright */ + for (p = sourceform; p; p = p->next) + if (p->type == para_Preamble) + text_para (fp, NULL, NULL, p->words, + conf.indent_preambles ? conf.indent : 0, 0, + conf.width + (conf.indent_preambles ? 0 : conf.indent)); + for (p = sourceform; p; p = p->next) + if (p->type == para_Copyright) + text_para (fp, NULL, NULL, p->words, + conf.indent_preambles ? conf.indent : 0, 0, + conf.width + (conf.indent_preambles ? 0 : conf.indent)); + + /* Do the main document */ + for (p = sourceform; p; p = p->next) + switch (p->type) + { + + /* + * Things we ignore because we've already processed them or + * aren't going to touch them in this pass. + */ + case para_IM: + case para_BR: + case para_Biblio: /* only touch BiblioCited */ + case para_VersionID: + case para_Copyright: + case para_Preamble: + case para_NoCite: + case para_Title: + break; + + /* + * Chapter titles. + */ + case para_Chapter: + case para_Appendix: + case para_UnnumberedChapter: + text_heading (fp, p->kwtext, p->kwtext2, p->words, + conf.achapter, conf.indent, conf.width); + break; + + case para_Heading: + case para_Subsect: + text_heading (fp, p->kwtext, p->kwtext2, p->words, + conf.asect[p->aux >= + conf.nasect ? conf.nasect - 1 : p->aux], + conf.indent, conf.width); + break; + + case para_Rule: + text_rule (fp, conf.indent, conf.width); + break; + + case para_Normal: + case para_BiblioCited: + case para_Bullet: + case para_NumberedList: + if (p->type == para_Bullet) + { + prefix = &conf.bullet; + prefixextra = NULL; + indentb = conf.listindentbefore; + indenta = conf.listindentafter; + } + else if (p->type == para_NumberedList) + { + prefix = p->kwtext; + prefixextra = "."; /* FIXME: configurability */ + indentb = conf.listindentbefore; + indenta = conf.listindentafter; + } + else + { + prefix = NULL; + prefixextra = NULL; + indentb = indenta = 0; + } + if (p->type == para_BiblioCited) + { + body = dup_word_list (p->kwtext); + for (wp = body; wp->next; wp = wp->next); + wp->next = &spaceword; + spaceword.next = p->words; + spaceword.alt = NULL; + spaceword.type = word_WhiteSpace; + spaceword.text = NULL; + } + else + { + wp = NULL; + body = p->words; + } + text_para (fp, prefix, prefixextra, body, + conf.indent + indentb, indenta, + conf.width - indentb - indenta); + if (wp) + { + wp->next = NULL; + free_word_list (body); + } + break; + + case para_Code: + text_codepara (fp, p->words, conf.indent + conf.indent_code, + conf.width - 2 * conf.indent_code); + break; + } + + /* Do the version ID */ + if (conf.include_version_id) + { + for (p = sourceform; p; p = p->next) + if (p->type == para_VersionID) + text_versionid (fp, p->words); + } + + /* + * Tidy up + */ + fclose (fp); + { + int i; + sfree (conf.achapter.number_suffix); + for (i = 0; i < conf.nasect; i++) + sfree (conf.asect[i].number_suffix); + sfree (conf.asect); + sfree (conf.bullet.text); + } +} + +/* + * Convert a wide string into a string of chars. If `result' is + * non-NULL, mallocs the resulting string and stores a pointer to + * it in `*result'. If `result' is NULL, merely checks whether all + * characters in the string are feasible for the output character + * set. + * + * Return is nonzero if all characters are OK. If not all + * characters are OK but `result' is non-NULL, a result _will_ + * still be generated! + */ +static int +text_convert (wchar_t * s, char **result) +{ + /* + * FIXME. Currently this is ISO8859-1 only. + */ + int doing = (result != 0); + int ok = TRUE; + char *p = NULL; + int plen = 0, psize = 0; + + for (; *s; s++) + { + wchar_t c = *s; + char outc; + + if ((c >= 32 && c <= 126) || (c >= 160 && c <= 255)) + { + /* Char is OK. */ + outc = (char) c; + } + else + { + /* Char is not OK. */ + ok = FALSE; + outc = 0xBF; /* approximate the good old DEC `uh?' */ + } + if (doing) + { + if (plen >= psize) + { + psize = plen + 256; + p = resize (p, psize); + } + p[plen++] = outc; + } + } + if (doing) + { + p = resize (p, plen + 1); + p[plen] = '\0'; + *result = p; + } + return ok; +} + +static void +text_rdaddwc (rdstringc * rs, word * text, word * end) +{ + char *c; + + for (; text && text != end; text = text->next) + switch (text->type) + { + case word_HyperLink: + case word_HyperEnd: + case word_UpperXref: + case word_LowerXref: + case word_XrefEnd: + case word_IndexRef: + break; + + case word_Normal: + case word_Emph: + case word_Code: + case word_WeakCode: + case word_WhiteSpace: + case word_EmphSpace: + case word_CodeSpace: + case word_WkCodeSpace: + case word_Quote: + case word_EmphQuote: + case word_CodeQuote: + case word_WkCodeQuote: + assert (text->type != word_CodeQuote && + text->type != word_WkCodeQuote); + if (towordstyle (text->type) == word_Emph && + (attraux (text->aux) == attr_First || + attraux (text->aux) == attr_Only)) + rdaddc (rs, '_'); /* FIXME: configurability */ + else if (towordstyle (text->type) == word_Code && + (attraux (text->aux) == attr_First || + attraux (text->aux) == attr_Only)) + rdaddc (rs, '`'); /* FIXME: configurability */ + if (removeattr (text->type) == word_Normal) + { + if (text_convert (text->text, &c)) + rdaddsc (rs, c); + else + text_rdaddwc (rs, text->alt, NULL); + sfree (c); + } + else if (removeattr (text->type) == word_WhiteSpace) + { + rdaddc (rs, ' '); + } + else if (removeattr (text->type) == word_Quote) + { + rdaddc (rs, quoteaux (text->aux) == quote_Open ? '`' : '\''); + /* FIXME: configurability */ + } + if (towordstyle (text->type) == word_Emph && + (attraux (text->aux) == attr_Last || + attraux (text->aux) == attr_Only)) + rdaddc (rs, '_'); /* FIXME: configurability */ + else if (towordstyle (text->type) == word_Code && + (attraux (text->aux) == attr_Last || + attraux (text->aux) == attr_Only)) + rdaddc (rs, '\''); /* FIXME: configurability */ + break; + } +} + +static int text_width (word *); + +static int +text_width_list (word * text) +{ + int w = 0; + while (text) + { + w += text_width (text); + text = text->next; + } + return w; +} + +static int +text_width (word * text) +{ + switch (text->type) + { + case word_HyperLink: + case word_HyperEnd: + case word_UpperXref: + case word_LowerXref: + case word_XrefEnd: + case word_IndexRef: + return 0; + + case word_Normal: + case word_Emph: + case word_Code: + case word_WeakCode: + return (((text->type == word_Emph || + text->type == word_Code) + ? (attraux (text->aux) == attr_Only ? 2 : + attraux (text->aux) == attr_Always ? 0 : 1) + : 0) + + (text_convert (text->text, NULL) ? + ustrlen (text->text) : text_width_list (text->alt))); + + case word_WhiteSpace: + case word_EmphSpace: + case word_CodeSpace: + case word_WkCodeSpace: + case word_Quote: + case word_EmphQuote: + case word_CodeQuote: + case word_WkCodeQuote: + assert (text->type != word_CodeQuote && text->type != word_WkCodeQuote); + return (((towordstyle (text->type) == word_Emph || + towordstyle (text->type) == word_Code) + ? (attraux (text->aux) == attr_Only ? 2 : + attraux (text->aux) == attr_Always ? 0 : 1) : 0) + 1); + } + return 0; /* should never happen */ +} + +static void +text_heading (FILE * fp, word * tprefix, word * nprefix, word * text, + alignstruct align, int indent, int width) +{ + rdstringc t = { 0, 0, NULL }; + int margin, length; + int firstlinewidth, wrapwidth; + wrappedline *wrapping, *p; + + if (align.just_numbers && nprefix) + { + char *c; + text_rdaddwc (&t, nprefix, NULL); + if (text_convert (align.number_suffix, &c)) + { + rdaddsc (&t, c); + sfree (c); + } + } + else if (!align.just_numbers && tprefix) + { + char *c; + text_rdaddwc (&t, tprefix, NULL); + if (text_convert (align.number_suffix, &c)) + { + rdaddsc (&t, c); + sfree (c); + } + } + margin = length = (t.text ? strlen (t.text) : 0); + + if (align.align == LEFTPLUS) + { + margin = indent - margin; + if (margin < 0) + margin = 0; + firstlinewidth = indent + width - margin - length; + wrapwidth = width; + } + else if (align.align == LEFT || align.align == CENTRE) + { + margin = 0; + firstlinewidth = indent + width - length; + wrapwidth = indent + width; + } + + wrapping = wrap_para (text, firstlinewidth, wrapwidth, text_width); + for (p = wrapping; p; p = p->next) + { + text_rdaddwc (&t, p->begin, p->end); + length = (t.text ? strlen (t.text) : 0); + if (align.align == CENTRE) + { + margin = (indent + width - length) / 2; + if (margin < 0) + margin = 0; + } + fprintf (fp, "%*s%s\n", margin, "", t.text); + if (align.underline != L'\0') + { + char *u, uc; + wchar_t uw[2]; + uw[0] = align.underline; + uw[1] = L'\0'; + text_convert (uw, &u); + uc = u[0]; + sfree (u); + fprintf (fp, "%*s", margin, ""); + while (length--) + putc (uc, fp); + putc ('\n', fp); + } + if (align.align == LEFTPLUS) + margin = indent; + else + margin = 0; + sfree (t.text); + t = empty_rdstringc; + } + wrap_free (wrapping); + putc ('\n', fp); + + sfree (t.text); +} + +static void +text_rule (FILE * fp, int indent, int width) +{ + while (indent--) + putc (' ', fp); + while (width--) + putc ('-', fp); /* FIXME: configurability! */ + putc ('\n', fp); + putc ('\n', fp); +} + +static void +text_para (FILE * fp, word * prefix, char *prefixextra, word * text, + int indent, int extraindent, int width) +{ + wrappedline *wrapping, *p; + rdstringc pfx = { 0, 0, NULL }; + int e; + int firstlinewidth = width; + + if (prefix) + { + text_rdaddwc (&pfx, prefix, NULL); + if (prefixextra) + rdaddsc (&pfx, prefixextra); + fprintf (fp, "%*s%s", indent, "", pfx.text); + /* If the prefix is too long, shorten the first line to fit. */ + e = extraindent - strlen (pfx.text); + if (e < 0) + { + firstlinewidth += e; /* this decreases it, since e < 0 */ + if (firstlinewidth < 0) + { + e = indent + extraindent; + firstlinewidth = width; + fprintf (fp, "\n"); + } + else + e = 0; + } + sfree (pfx.text); + } + else + e = indent + extraindent; + + wrapping = wrap_para (text, firstlinewidth, width, text_width); + for (p = wrapping; p; p = p->next) + { + rdstringc t = { 0, 0, NULL }; + text_rdaddwc (&t, p->begin, p->end); + fprintf (fp, "%*s%s\n", e, "", t.text); + e = indent + extraindent; + sfree (t.text); + } + wrap_free (wrapping); + putc ('\n', fp); +} + +static void +text_codepara (FILE * fp, word * text, int indent, int width) +{ + for (; text; text = text->next) + if (text->type == word_WeakCode) + { + char *c; + text_convert (text->text, &c); + if (strlen (c) > (size_t) width) + { + /* FIXME: warn */ + } + fprintf (fp, "%*s%s\n", indent, "", c); + sfree (c); + } + + putc ('\n', fp); +} + +static void +text_versionid (FILE * fp, word * text) +{ + rdstringc t = { 0, 0, NULL }; + + rdaddc (&t, '['); /* FIXME: configurability */ + text_rdaddwc (&t, text, NULL); + rdaddc (&t, ']'); /* FIXME: configurability */ + + fprintf (fp, "%s\n", t.text); + sfree (t.text); +} diff --git a/Docs/src/bin/halibut/bk_whlp.c b/Docs/src/bin/halibut/bk_whlp.c new file mode 100644 index 00000000..51a8a595 --- /dev/null +++ b/Docs/src/bin/halibut/bk_whlp.c @@ -0,0 +1,711 @@ +/* + * Windows Help backend for Halibut + * + * TODO: + * - allow user to specify section contexts. + */ + +#include +#include +#include + +#include "halibut.h" +#include "winhelp.h" + +struct bk_whlp_state +{ + WHLP h; + indexdata *idx; + keywordlist *keywords; + WHLP_TOPIC curr_topic; + FILE *cntfp; + int cnt_last_level, cnt_workaround; +}; + +/* + * Indexes of fonts in our standard font descriptor set. + */ +enum +{ + FONT_NORMAL, + FONT_EMPH, + FONT_CODE, + FONT_TITLE, + FONT_TITLE_EMPH, + FONT_TITLE_CODE, + FONT_RULE +}; + +static void whlp_rdaddwc (rdstringc * rs, word * text); +static int whlp_convert (wchar_t * s, char **result, int hard_spaces); +static void whlp_mkparagraph (struct bk_whlp_state *state, + int font, word * text, int subsidiary); +static void whlp_navmenu (struct bk_whlp_state *state, paragraph * p); +static void whlp_contents_write (struct bk_whlp_state *state, + int level, char *text, WHLP_TOPIC topic); + +void +whlp_backend (paragraph * sourceform, keywordlist * keywords, indexdata * idx) +{ + WHLP h; + char *filename, *cntname; + paragraph *p, *lastsect; + struct bk_whlp_state state; + WHLP_TOPIC contents_topic; + int i; + indexentry *ie; + + filename = "output.hlp"; /* FIXME: configurability */ + cntname = "output.cnt"; /* corresponding contents file */ + + state.cntfp = fopen (cntname, "wb"); + state.cnt_last_level = -1; + state.cnt_workaround = 0; + + h = state.h = whlp_new (); + state.keywords = keywords; + state.idx = idx; + + whlp_start_macro (h, "CB(\"btn_about\",\"&About\",\"About()\")"); + whlp_start_macro (h, "CB(\"btn_up\",\"&Up\",\"Contents()\")"); + whlp_start_macro (h, "BrowseButtons()"); + + whlp_create_font (h, "Times New Roman", WHLP_FONTFAM_SERIF, 24, 0, 0, 0, 0); + whlp_create_font (h, "Times New Roman", WHLP_FONTFAM_SERIF, 24, + WHLP_FONT_ITALIC, 0, 0, 0); + whlp_create_font (h, "Courier New", WHLP_FONTFAM_FIXED, 24, 0, 0, 0, 0); + whlp_create_font (h, "Arial", WHLP_FONTFAM_SERIF, 30, + WHLP_FONT_BOLD, 0, 0, 0); + whlp_create_font (h, "Arial", WHLP_FONTFAM_SERIF, 30, + WHLP_FONT_BOLD | WHLP_FONT_ITALIC, 0, 0, 0); + whlp_create_font (h, "Courier New", WHLP_FONTFAM_FIXED, 30, + WHLP_FONT_BOLD, 0, 0, 0); + whlp_create_font (h, "Courier New", WHLP_FONTFAM_SANS, 18, + WHLP_FONT_STRIKEOUT, 0, 0, 0); + + /* + * Loop over the source form finding out whether the user has + * specified particular help topic names for anything. + */ + for (p = sourceform; p; p = p->next) + { + p->private_data = NULL; + if (p->type == para_Config && p->parent) + { + if (!ustricmp (p->keyword, L"winhelp-topic")) + { + char *topicname; + whlp_convert (uadv (p->keyword), &topicname, 0); + /* Store the topic name in the private_data field of the + * containing section. */ + p->parent->private_data = topicname; + } + } + } + + /* + * Loop over the source form registering WHLP_TOPICs for + * everything. + */ + + contents_topic = whlp_register_topic (h, "Top", NULL); + whlp_primary_topic (h, contents_topic); + for (p = sourceform; p; p = p->next) + { + if (p->type == para_Chapter || + p->type == para_Appendix || + p->type == para_UnnumberedChapter || + p->type == para_Heading || p->type == para_Subsect) + { + char *topicid = p->private_data; + char *errstr; + + p->private_data = whlp_register_topic (h, topicid, &errstr); + if (!p->private_data) + { + p->private_data = whlp_register_topic (h, NULL, NULL); + error (err_winhelp_ctxclash, &p->fpos, topicid, errstr); + } + sfree (topicid); + } + } + + /* + * Loop over the index entries, preparing final text forms for + * each one. + */ + for (i = 0; (ie = index234 (idx->entries, i)) != NULL; i++) + { + rdstringc rs = { 0, 0, NULL }; + whlp_rdaddwc (&rs, ie->text); + ie->backend_data = rs.text; + } + + whlp_prepare (h); + + /* ------------------------------------------------------------------ + * Do the contents page, containing title, preamble and + * copyright. + */ + + whlp_begin_topic (h, contents_topic, "Contents", "DB(\"btn_up\")", NULL); + + /* + * The manual title goes in the non-scroll region, and also + * goes into the system title slot. + */ + { + rdstringc rs = { 0, 0, NULL }; + for (p = sourceform; p; p = p->next) + { + if (p->type == para_Title) + { + whlp_begin_para (h, WHLP_PARA_NONSCROLL); + whlp_mkparagraph (&state, FONT_TITLE, p->words, FALSE); + whlp_rdaddwc (&rs, p->words); + whlp_end_para (h); + } + } + if (rs.text) + { + whlp_title (h, rs.text); + fprintf (state.cntfp, ":Title %s\r\n", rs.text); + sfree (rs.text); + } + whlp_contents_write (&state, 1, "Title page", contents_topic); + /* FIXME: configurability in that string */ + } + + /* + * Next comes the preamble, which just goes into the ordinary + * scrolling region. + */ + for (p = sourceform; p; p = p->next) + { + if (p->type == para_Preamble) + { + whlp_para_attr (h, WHLP_PARA_SPACEBELOW, 12); + whlp_begin_para (h, WHLP_PARA_SCROLL); + whlp_mkparagraph (&state, FONT_NORMAL, p->words, FALSE); + whlp_end_para (h); + } + } + + /* + * The copyright goes to two places, again: into the contents + * page and also into the system section. + */ + { + rdstringc rs = { 0, 0, NULL }; + for (p = sourceform; p; p = p->next) + { + if (p->type == para_Copyright) + { + whlp_para_attr (h, WHLP_PARA_SPACEBELOW, 12); + whlp_begin_para (h, WHLP_PARA_SCROLL); + whlp_mkparagraph (&state, FONT_NORMAL, p->words, FALSE); + whlp_end_para (h); + whlp_rdaddwc (&rs, p->words); + } + } + if (rs.text) + { + whlp_copyright (h, rs.text); + sfree (rs.text); + } + } + + /* + * Now do the primary navigation menu. + */ + for (p = sourceform; p; p = p->next) + { + if (p->type == para_Chapter || + p->type == para_Appendix || p->type == para_UnnumberedChapter) + whlp_navmenu (&state, p); + } + + state.curr_topic = contents_topic; + lastsect = NULL; + + /* ------------------------------------------------------------------ + * Now we've done the contents page, we're ready to go through + * and do the main manual text. Ooh. + */ + for (p = sourceform; p; p = p->next) + switch (p->type) + { + /* + * Things we ignore because we've already processed them or + * aren't going to touch them in this pass. + */ + case para_IM: + case para_BR: + case para_Biblio: /* only touch BiblioCited */ + case para_VersionID: + case para_Copyright: + case para_Preamble: + case para_NoCite: + case para_Title: + break; + + /* + * Chapter and section titles: start a new Help topic. + */ + case para_Chapter: + case para_Appendix: + case para_UnnumberedChapter: + case para_Heading: + case para_Subsect: + if (lastsect && lastsect->child) + { + paragraph *q; + /* + * Do a navigation menu for the previous section we + * were in. + */ + for (q = lastsect->child; q; q = q->sibling) + whlp_navmenu (&state, q); + } + { + rdstringc rs = { 0, 0, NULL }; + WHLP_TOPIC new_topic, parent_topic; + char *macro, *topicid; + + new_topic = p->private_data; + whlp_browse_link (h, state.curr_topic, new_topic); + state.curr_topic = new_topic; + + if (p->kwtext) + { + whlp_rdaddwc (&rs, p->kwtext); + rdaddsc (&rs, ": "); /* FIXME: configurability */ + } + whlp_rdaddwc (&rs, p->words); + if (p->parent == NULL) + parent_topic = contents_topic; + else + parent_topic = (WHLP_TOPIC) p->parent->private_data; + topicid = whlp_topic_id (parent_topic); + macro = smalloc (100 + strlen (topicid)); + sprintf (macro, + "CBB(\"btn_up\",\"JI(`',`%s')\");EB(\"btn_up\")", topicid); + whlp_begin_topic (h, new_topic, + rs.text ? rs.text : "", macro, NULL); + sfree (macro); + + { + /* + * Output the .cnt entry. + * + * WinHelp has a bug involving having an internal + * node followed by a leaf at the same level: the + * leaf is output at the wrong level. We can mostly + * work around this by modifying the leaf level + * itself (see whlp_contents_write), but this + * doesn't work for top-level sections since we + * can't turn a level-1 leaf into a level-0 one. So + * for top-level leaf sections (Bibliography + * springs to mind), we output an internal node + * containing only the leaf for that section. + */ + int i; + paragraph *q; + + /* Count up the level. */ + i = 1; + for (q = p; q->parent; q = q->parent) + i++; + + if (p->child || !p->parent) + { + /* + * If p has children then it needs to be a + * folder; if it has no parent then it needs to + * be a folder to work around the bug. + */ + whlp_contents_write (&state, i, rs.text, NULL); + i++; + } + whlp_contents_write (&state, i, rs.text, new_topic); + } + + sfree (rs.text); + + whlp_begin_para (h, WHLP_PARA_NONSCROLL); + if (p->kwtext) + { + whlp_mkparagraph (&state, FONT_TITLE, p->kwtext, FALSE); + whlp_set_font (h, FONT_TITLE); + whlp_text (h, ": "); /* FIXME: configurability */ + } + whlp_mkparagraph (&state, FONT_TITLE, p->words, FALSE); + whlp_end_para (h); + + lastsect = p; + } + break; + + case para_Rule: + whlp_para_attr (h, WHLP_PARA_SPACEBELOW, 12); + whlp_para_attr (h, WHLP_PARA_ALIGNMENT, WHLP_ALIGN_CENTRE); + whlp_begin_para (h, WHLP_PARA_SCROLL); + whlp_set_font (h, FONT_RULE); +#define TEN "\xA0\xA0\xA0\xA0\xA0\xA0\xA0\xA0\xA0\xA0" +#define TWENTY TEN TEN +#define FORTY TWENTY TWENTY +#define EIGHTY FORTY FORTY + whlp_text (h, EIGHTY); +#undef TEN +#undef TWENTY +#undef FORTY +#undef EIGHTY + whlp_end_para (h); + break; + + case para_Normal: + case para_BiblioCited: + case para_Bullet: + case para_NumberedList: + whlp_para_attr (h, WHLP_PARA_SPACEBELOW, 12); + if (p->type == para_Bullet || p->type == para_NumberedList) + { + whlp_para_attr (h, WHLP_PARA_LEFTINDENT, 72); + whlp_para_attr (h, WHLP_PARA_FIRSTLINEINDENT, -36); + whlp_set_tabstop (h, 72, WHLP_ALIGN_LEFT); + whlp_begin_para (h, WHLP_PARA_SCROLL); + whlp_set_font (h, FONT_NORMAL); + if (p->type == para_Bullet) + { + whlp_text (h, "\x95"); + } + else + { + whlp_mkparagraph (&state, FONT_NORMAL, p->kwtext, FALSE); + whlp_text (h, "."); + } + whlp_tab (h); + } + else + { + whlp_begin_para (h, WHLP_PARA_SCROLL); + } + + if (p->type == para_BiblioCited) + { + whlp_mkparagraph (&state, FONT_NORMAL, p->kwtext, FALSE); + whlp_text (h, " "); + } + + whlp_mkparagraph (&state, FONT_NORMAL, p->words, FALSE); + whlp_end_para (h); + break; + + case para_Code: + /* + * In a code paragraph, each individual word is a line. For + * Help files, we will have to output this as a set of + * paragraphs, all but the last of which don't set + * SPACEBELOW. + */ + { + word *w; + char *c; + for (w = p->words; w; w = w->next) + { + if (!w->next) + whlp_para_attr (h, WHLP_PARA_SPACEBELOW, 12); + whlp_begin_para (h, WHLP_PARA_SCROLL); + whlp_set_font (h, FONT_CODE); + whlp_convert (w->text, &c, FALSE); + whlp_text (h, c); + sfree (c); + whlp_end_para (h); + } + } + break; + } + + fclose (state.cntfp); + whlp_close (h, filename); + + /* + * Loop over the index entries, cleaning up our final text + * forms. + */ + for (i = 0; (ie = index234 (idx->entries, i)) != NULL; i++) + { + sfree (ie->backend_data); + } +} + +static void +whlp_contents_write (struct bk_whlp_state *state, + int level, char *text, WHLP_TOPIC topic) +{ + /* + * Horrifying bug in WinHelp. When dropping a section level or + * more without using a folder-type entry, WinHelp accidentally + * adds one to the section level. So we correct for that here. + */ + if (state->cnt_last_level > level && topic) + state->cnt_workaround = -1; + else if (!topic) + state->cnt_workaround = 0; + state->cnt_last_level = level; + + fprintf (state->cntfp, "%d ", level + state->cnt_workaround); + while (*text) + { + if (*text == '=') + fputc ('\\', state->cntfp); + fputc (*text, state->cntfp); + text++; + } + if (topic) + fprintf (state->cntfp, "=%s", whlp_topic_id (topic)); + fputc ('\n', state->cntfp); +} + +static void +whlp_navmenu (struct bk_whlp_state *state, paragraph * p) +{ + whlp_begin_para (state->h, WHLP_PARA_NONSCROLL); + whlp_start_hyperlink (state->h, (WHLP_TOPIC) p->private_data); + if (p->kwtext) + { + whlp_mkparagraph (state, FONT_NORMAL, p->kwtext, TRUE); + whlp_set_font (state->h, FONT_NORMAL); + whlp_text (state->h, ": "); /* FIXME: configurability */ + } + whlp_mkparagraph (state, FONT_NORMAL, p->words, TRUE); + whlp_end_hyperlink (state->h); + whlp_end_para (state->h); + +} + +static void +whlp_mkparagraph (struct bk_whlp_state *state, + int font, word * text, int subsidiary) +{ + keyword *kwl; + int deffont = font; + int currfont = -1; + int newfont; + char *c; + paragraph *xref_target = NULL; + + for (; text; text = text->next) + switch (text->type) + { + case word_HyperLink: + case word_HyperEnd: + break; + + case word_IndexRef: + if (subsidiary) + break; /* disabled in subsidiary bits */ + { + indextag *tag = index_findtag (state->idx, text->text); + int i; + if (!tag) + break; + for (i = 0; i < tag->nrefs; i++) + whlp_index_term (state->h, tag->refs[i]->backend_data, + state->curr_topic); + } + break; + + case word_UpperXref: + case word_LowerXref: + if (subsidiary) + break; /* disabled in subsidiary bits */ + kwl = kw_lookup (state->keywords, text->text); + assert (xref_target == NULL); + if (kwl->para->type == para_NumberedList) + { + break; /* don't xref to numbered list items */ + } + else if (kwl->para->type == para_BiblioCited) + { + /* + * An xref to a bibliography item jumps to the section + * containing it. + */ + if (kwl->para->parent) + xref_target = kwl->para->parent; + else + break; + } + else + { + xref_target = kwl->para; + } + whlp_start_hyperlink (state->h, + (WHLP_TOPIC) xref_target->private_data); + break; + + case word_XrefEnd: + if (subsidiary) + break; /* disabled in subsidiary bits */ + if (xref_target) + whlp_end_hyperlink (state->h); + xref_target = NULL; + break; + + case word_Normal: + case word_Emph: + case word_Code: + case word_WeakCode: + case word_WhiteSpace: + case word_EmphSpace: + case word_CodeSpace: + case word_WkCodeSpace: + case word_Quote: + case word_EmphQuote: + case word_CodeQuote: + case word_WkCodeQuote: + if (towordstyle (text->type) == word_Emph) + newfont = deffont + FONT_EMPH; + else if (towordstyle (text->type) == word_Code || + towordstyle (text->type) == word_WeakCode) + newfont = deffont + FONT_CODE; + else + newfont = deffont; + if (newfont != currfont) + { + currfont = newfont; + whlp_set_font (state->h, newfont); + } + if (removeattr (text->type) == word_Normal) + { + if (whlp_convert (text->text, &c, TRUE)) + whlp_text (state->h, c); + else + whlp_mkparagraph (state, deffont, text->alt, FALSE); + sfree (c); + } + else if (removeattr (text->type) == word_WhiteSpace) + { + whlp_text (state->h, " "); + } + else if (removeattr (text->type) == word_Quote) + { + whlp_text (state->h, + quoteaux (text->aux) == quote_Open ? "\x91" : "\x92"); + /* FIXME: configurability */ + } + break; + } +} + +static void +whlp_rdaddwc (rdstringc * rs, word * text) +{ + char *c; + + for (; text; text = text->next) + switch (text->type) + { + case word_HyperLink: + case word_HyperEnd: + case word_UpperXref: + case word_LowerXref: + case word_XrefEnd: + case word_IndexRef: + break; + + case word_Normal: + case word_Emph: + case word_Code: + case word_WeakCode: + case word_WhiteSpace: + case word_EmphSpace: + case word_CodeSpace: + case word_WkCodeSpace: + case word_Quote: + case word_EmphQuote: + case word_CodeQuote: + case word_WkCodeQuote: + assert (text->type != word_CodeQuote && + text->type != word_WkCodeQuote); + if (removeattr (text->type) == word_Normal) + { + if (whlp_convert (text->text, &c, FALSE)) + rdaddsc (rs, c); + else + whlp_rdaddwc (rs, text->alt); + sfree (c); + } + else if (removeattr (text->type) == word_WhiteSpace) + { + rdaddc (rs, ' '); + } + else if (removeattr (text->type) == word_Quote) + { + rdaddc (rs, quoteaux (text->aux) == quote_Open ? '\x91' : '\x92'); + /* FIXME: configurability */ + } + break; + } +} + +/* + * Convert a wide string into a string of chars. If `result' is + * non-NULL, mallocs the resulting string and stores a pointer to + * it in `*result'. If `result' is NULL, merely checks whether all + * characters in the string are feasible for the output character + * set. + * + * Return is nonzero if all characters are OK. If not all + * characters are OK but `result' is non-NULL, a result _will_ + * still be generated! + */ +static int +whlp_convert (wchar_t * s, char **result, int hard_spaces) +{ + /* + * FIXME. Currently this is ISO8859-1 only. + */ + int doing = (result != 0); + int ok = TRUE; + char *p = NULL; + int plen = 0, psize = 0; + + for (; *s; s++) + { + wchar_t c = *s; + char outc; + + if ((c >= 32 && c <= 126) || (c >= 160 && c <= 255)) + { + /* Char is OK. */ + if (c == 32 && hard_spaces) + outc = '\240'; + else + outc = (char) c; + } + else + { + /* Char is not OK. */ + ok = FALSE; + outc = 0xBF; /* approximate the good old DEC `uh?' */ + } + if (doing) + { + if (plen >= psize) + { + psize = plen + 256; + p = resize (p, psize); + } + p[plen++] = outc; + } + } + if (doing) + { + p = resize (p, plen + 1); + p[plen] = '\0'; + *result = p; + } + return ok; +} diff --git a/Docs/src/bin/halibut/bk_xhtml.c b/Docs/src/bin/halibut/bk_xhtml.c new file mode 100644 index 00000000..b437f069 --- /dev/null +++ b/Docs/src/bin/halibut/bk_xhtml.c @@ -0,0 +1,1933 @@ +/* + * xhtml backend for Halibut + * (initial implementation by James Aylett) + * + * Still to do: + * + * +++ doesn't handle non-breaking hyphens. Not sure how to yet. + * +++ entity names (from a file -- ideally supply normal SGML files) + * +++ configuration directive to file split where the current layout + * code wouldn't. Needs changes to _ponder_layout() and _do_paras(), + * perhaps others. + * + * Limitations: + * + * +++ biblio/index references target the nearest section marker, rather + * than having a dedicated target themselves. In large bibliographies + * this will cause problems. (The solution is to fake up a response + * from xhtml_find_section(), probably linking it into the sections + * chain just in case we need it again, and to make freeing it up + * easier.) docsrc.pl used to work as we do, however, and SGT agrees that + * this is acceptable for now. + * +++ can't cope with leaf-level == 0. It's all to do with the + * top-level file not being normal, probably not even having a valid + * section level, and stuff like that. I question whether this is an + * issue, frankly; small manuals that fit on one page should probably + * not be written in halibut at all. + */ + +#include +#include +#include +#include +#include "halibut.h" + +struct xhtmlsection_Struct +{ + struct xhtmlsection_Struct *next; /* next sibling (NULL if split across files) */ + struct xhtmlsection_Struct *child; /* NULL if split across files */ + struct xhtmlsection_Struct *parent; /* NULL if split across files */ + struct xhtmlsection_Struct *chain; /* single structure independent of weird trees */ + paragraph *para; + struct xhtmlfile_Struct *file; /* which file is this a part of? */ + char *fragment; /* fragment id within the file */ + int level; +}; + +struct xhtmlfile_Struct +{ + struct xhtmlfile_Struct *next; + struct xhtmlfile_Struct *child; + struct xhtmlfile_Struct *parent; + char *filename; + struct xhtmlsection_Struct *sections; /* sections within this file (only one for non-leaf) */ + int is_leaf; /* is this file a leaf file, ie does it not have any children? */ +}; + +typedef struct xhtmlsection_Struct xhtmlsection; +typedef struct xhtmlfile_Struct xhtmlfile; +typedef struct xhtmlindex_Struct xhtmlindex; + +struct xhtmlindex_Struct +{ + int nsection; + int size; + xhtmlsection **sections; +}; + +typedef struct +{ + int just_numbers; + wchar_t *number_suffix; +} +xhtmlheadfmt; + +typedef struct +{ + int contents_depth[6]; + int leaf_contains_contents; + int leaf_level; + int leaf_smallest_contents; + int include_version_id; + wchar_t *author, *description; + wchar_t *head_end, *body, *body_start, *body_end, *address_start, + *address_end, *nav_attrs; + int suppress_address; + xhtmlheadfmt fchapter, *fsect; + int nfsect; +} +xhtmlconfig; + +/*static void xhtml_level(paragraph *, int); +static void xhtml_level_0(paragraph *); +static void xhtml_docontents(FILE *, paragraph *, int); +static void xhtml_dosections(FILE *, paragraph *, int); +static void xhtml_dobody(FILE *, paragraph *, int);*/ + +static void xhtml_doheader (FILE *, word *); +static void xhtml_dofooter (FILE *); +static void xhtml_versionid (FILE *, word *, int); + +static void xhtml_utostr (wchar_t *, char **); +static int xhtml_para_level (paragraph *); +static int xhtml_reservedchar (int); + +static int xhtml_convert (wchar_t *, char **, int); +static void xhtml_rdaddwc (rdstringc *, word *, word *); +static void xhtml_para (FILE *, word *); +static void xhtml_codepara (FILE *, word *); +static void xhtml_heading (FILE *, paragraph *); + +/* File-global variables are much easier than passing these things + * all over the place. Evil, but easier. We can replace this with a single + * structure at some point. + */ +static xhtmlconfig conf; +static keywordlist *keywords; +static indexdata *idx; +static xhtmlfile *topfile; +static xhtmlsection *topsection; +static paragraph *sourceparas; +static xhtmlfile *lastfile; +static xhtmlfile *xhtml_last_file = NULL; +static int last_level = -1; +static xhtmlsection *currentsection; + +static xhtmlconfig +xhtml_configure (paragraph * source) +{ + xhtmlconfig ret; + + /* + * Defaults. + */ + ret.contents_depth[0] = 2; + ret.contents_depth[1] = 3; + ret.contents_depth[2] = 4; + ret.contents_depth[3] = 5; + ret.contents_depth[4] = 6; + ret.contents_depth[5] = 7; + ret.leaf_level = 2; + ret.leaf_smallest_contents = 4; + ret.leaf_contains_contents = FALSE; + ret.include_version_id = TRUE; + ret.author = NULL; + ret.description = NULL; + ret.head_end = NULL; + ret.body = NULL; + ret.body_start = NULL; + ret.body_end = NULL; + ret.address_start = NULL; + ret.address_end = NULL; + ret.nav_attrs = NULL; + ret.suppress_address = FALSE; + + ret.fchapter.just_numbers = FALSE; + ret.fchapter.number_suffix = ustrdup (L": "); + ret.nfsect = 2; + ret.fsect = mknewa (xhtmlheadfmt, ret.nfsect); + ret.fsect[0].just_numbers = FALSE; + ret.fsect[0].number_suffix = ustrdup (L": "); + ret.fsect[1].just_numbers = TRUE; + ret.fsect[1].number_suffix = ustrdup (L" "); + + for (; source; source = source->next) + { + if (source->type == para_Config) + { + if (!ustricmp (source->keyword, L"xhtml-contents-depth-0")) + { + ret.contents_depth[0] = utoi (uadv (source->keyword)); + } + else if (!ustricmp (source->keyword, L"xhtml-contents-depth-1")) + { + ret.contents_depth[1] = utoi (uadv (source->keyword)); + } + else if (!ustricmp (source->keyword, L"xhtml-contents-depth-2")) + { + ret.contents_depth[2] = utoi (uadv (source->keyword)); + } + else if (!ustricmp (source->keyword, L"xhtml-contents-depth-3")) + { + ret.contents_depth[3] = utoi (uadv (source->keyword)); + } + else if (!ustricmp (source->keyword, L"xhtml-contents-depth-4")) + { + ret.contents_depth[4] = utoi (uadv (source->keyword)); + } + else if (!ustricmp (source->keyword, L"xhtml-contents-depth-5")) + { + ret.contents_depth[5] = utoi (uadv (source->keyword)); + } + else if (!ustricmp (source->keyword, L"xhtml-leaf-level")) + { + ret.leaf_level = utoi (uadv (source->keyword)); + } + else + if (!ustricmp (source->keyword, L"xhtml-leaf-smallest-contents")) + { + ret.leaf_smallest_contents = utoi (uadv (source->keyword)); + } + else if (!ustricmp (source->keyword, L"xhtml-versionid")) + { + ret.include_version_id = utob (uadv (source->keyword)); + } + else + if (!ustricmp (source->keyword, L"xhtml-leaf-contains-contents")) + { + ret.leaf_contains_contents = utob (uadv (source->keyword)); + } + else if (!ustricmp (source->keyword, L"xhtml-suppress-address")) + { + ret.suppress_address = utob (uadv (source->keyword)); + } + else if (!ustricmp (source->keyword, L"xhtml-author")) + { + ret.author = uadv (source->keyword); + } + else if (!ustricmp (source->keyword, L"xhtml-description")) + { + ret.description = uadv (source->keyword); + } + else if (!ustricmp (source->keyword, L"xhtml-head-end")) + { + ret.head_end = uadv (source->keyword); + } + else if (!ustricmp (source->keyword, L"xhtml-body-start")) + { + ret.body_start = uadv (source->keyword); + } + else if (!ustricmp (source->keyword, L"xhtml-body-tag")) + { + ret.body = uadv (source->keyword); + } + else if (!ustricmp (source->keyword, L"xhtml-body-end")) + { + ret.body_end = uadv (source->keyword); + } + else if (!ustricmp (source->keyword, L"xhtml-address-start")) + { + ret.address_start = uadv (source->keyword); + } + else if (!ustricmp (source->keyword, L"xhtml-address-end")) + { + ret.address_end = uadv (source->keyword); + } + else + if (!ustricmp (source->keyword, L"xhtml-navigation-attributes")) + { + ret.nav_attrs = uadv (source->keyword); + } + else if (!ustricmp (source->keyword, L"xhtml-chapter-numeric")) + { + ret.fchapter.just_numbers = utob (uadv (source->keyword)); + } + else if (!ustricmp (source->keyword, L"xhtml-chapter-suffix")) + { + ret.fchapter.number_suffix = ustrdup (uadv (source->keyword)); + } + else if (!ustricmp (source->keyword, L"xhtml-section-numeric")) + { + wchar_t *p = uadv (source->keyword); + int n = 0; + if (uisdigit (*p)) + { + n = utoi (p); + p = uadv (p); + } + if (n >= ret.nfsect) + { + int i; + ret.fsect = resize (ret.fsect, n + 1); + for (i = ret.nfsect; i <= n; i++) + ret.fsect[i] = ret.fsect[ret.nfsect - 1]; + ret.nfsect = n + 1; + } + ret.fsect[n].just_numbers = utob (p); + } + else if (!ustricmp (source->keyword, L"xhtml-section-suffix")) + { + wchar_t *p = uadv (source->keyword); + int n = 0; + if (uisdigit (*p)) + { + n = utoi (p); + p = uadv (p); + } + if (n >= ret.nfsect) + { + int i; + ret.fsect = resize (ret.fsect, n + 1); + for (i = ret.nfsect; i <= n; i++) + ret.fsect[i] = ret.fsect[ret.nfsect - 1]; + ret.nfsect = n + 1; + } + ret.fsect[n].number_suffix = ustrdup (p); + } + } + } + + /* printf(" !!! leaf_level = %i\n", ret.leaf_level); + printf(" !!! contentdepth-0 = %i\n", ret.contents_depth[0]); + printf(" !!! contentdepth-1 = %i\n", ret.contents_depth[1]); + printf(" !!! contentdepth-2 = %i\n", ret.contents_depth[2]); + printf(" !!! contentdepth-3 = %i\n", ret.contents_depth[3]); + printf(" !!! contentdepth-4 = %i\n", ret.contents_depth[4]); + printf(" !!! contentdepth-5 = %i\n", ret.contents_depth[5]); + printf(" !!! leaf_contains_contents = %i\n", ret.leaf_contains_contents); */ + return ret; +} + +static xhtmlsection * +xhtml_new_section (xhtmlsection * last) +{ + xhtmlsection *ret = mknew (xhtmlsection); + ret->next = NULL; + ret->child = NULL; + ret->parent = NULL; + ret->chain = last; + ret->para = NULL; + ret->file = NULL; + ret->fragment = NULL; + ret->level = -1; /* marker: end of chain */ + return ret; +} + +/* Returns NULL or the section that marks that paragraph */ +static xhtmlsection * +xhtml_find_section (paragraph * p) +{ + xhtmlsection *ret = topsection; + if (xhtml_para_level (p) == -1) + { /* first, we back-track to a section paragraph */ + paragraph *p2 = sourceparas; + paragraph *p3 = NULL; + while (p2 && p2 != p) + { + if (xhtml_para_level (p2) != -1) + { + p3 = p2; + } + p2 = p2->next; + } + if (p3 == NULL) + { /* for some reason, we couldn't find a section before this paragraph ... ? */ + /* Note that this can happen, if you have a cross-reference to before the first chapter starts. + * So don't do that, then. + */ + return NULL; + } + p = p3; + } + while (ret && ret->para != p) + { +/* printf(" xhtml_find_section(): checking %s for para @ %p\n", ret->fragment, p);*/ + ret = ret->chain; + } + return ret; +} + +static xhtmlfile * +xhtml_new_file (xhtmlsection * sect) +{ + xhtmlfile *ret = mknew (xhtmlfile); + + ret->next = NULL; + ret->child = NULL; + ret->parent = NULL; + ret->filename = NULL; + ret->sections = sect; + ret->is_leaf = (sect != NULL && sect->level == conf.leaf_level); + if (sect == NULL) + { + if (conf.leaf_level == 0) + { /* currently unused */ +#define FILENAME_MANUAL "Manual.html" +#define FILENAME_CONTENTS "Contents.html" + ret->filename = smalloc (strlen (FILENAME_MANUAL) + 1); + sprintf (ret->filename, FILENAME_MANUAL); + } + else + { + ret->filename = smalloc (strlen (FILENAME_CONTENTS) + 1); + sprintf (ret->filename, FILENAME_CONTENTS); + } + } + else + { + paragraph *p = sect->para; + rdstringc fname_c = { 0, 0, NULL }; + char *c; + word *w; + for (w = (p->kwtext) ? (p->kwtext) : (p->words); w; w = w->next) + { + switch (removeattr (w->type)) + { + case word_Normal: + /*case word_Emph: + case word_Code: + case word_WeakCode: */ + xhtml_utostr (w->text, &c); + rdaddsc (&fname_c, c); + sfree (c); + break; + } + } + rdaddsc (&fname_c, ".html"); + ret->filename = rdtrimc (&fname_c); + } + /* printf(" ! new file '%s', is_leaf == %s\n", ret->filename, (ret->is_leaf)?("true"):("false")); */ + return ret; +} + +/* + * Walk the tree fixing up files which are actually leaf (ie + * have no children) but aren't at leaf level, so they have the + * leaf flag set. + */ +void +xhtml_fixup_layout (xhtmlfile * file) +{ + if (file->child == NULL) + { + file->is_leaf = TRUE; + } + else + { + xhtml_fixup_layout (file->child); + } + if (file->next) + xhtml_fixup_layout (file->next); +} + +/* + * Create the tree structure so we know where everything goes. + * Method: + * + * Ignoring file splitting, we have three choices with each new section: + * + * +-----------------+-----------------+ + * | | | + * X +----X----+ (1) + * | | + * Y (2) + * | + * (3) + * + * Y is the last section we added (currentsect). + * If sect is the section we want to add, then: + * + * (1) if sect->level < currentsect->level + * (2) if sect->level == currentsect->level + * (3) if sect->level > currentsect->level + * + * This requires the constraint that you never skip section numbers + * (so you can't have a.b.c.d without all of a, a.b and a.b.c existing). + * + * Note that you _can_ have 1.1.1.1 followed by 1.2 - you can change + * more than one level at a time. Lots of asserts, and probably part of + * the algorithm here, rely on this being true. (It currently isn't + * enforced by halibut, however.) + * + * File splitting makes this harder. For instance, say we added at (3) + * above and now need to add another section. We are splitting at level + * 2, ie the level of Y. Z is the last section we added: + * + * +-----------------+-----------------+ + * | | | + * X +----X----+ (1) + * | | + * +----Y----+ (1) + * | | + * Z (2) + * | + * (3) + * + * The (1) case is now split; we need to search upwards to find where + * to actually link in. The other two cases remain the same (and will + * always be like this). + * + * File splitting makes this harder, however. The decision of whether + * to split to a new file is always on the same condition, however (is + * the level of this section higher than the leaf_level configuration + * value or not). + * + * Treating the cases backwards: + * + * (3) same file if sect->level > conf.leaf_level, otherwise new file + * + * if in the same file, currentsect->child points to sect + * otherwise the linking is done through the file tree (which works + * in more or less the same way, ie currentfile->child points to + * the new file) + * + * (2) same file if sect->level > conf.leaf_level, otherwise new file + * + * if in the same file, currentsect->next points to sect + * otherwise file linking and currentfile->next points to the new + * file (we know that Z must have caused a new file to be created) + * + * (1) same file if sect->level > conf.leaf_level, otherwise new file + * + * this is actually effectively the same case as (2) here, + * except that we first have to travel up the sections to figure + * out which section this new one will be a sibling of. In doing + * so, we may disappear off the top of a file and have to go up + * to its parent in the file tree. + * + */ +static void +xhtml_ponder_layout (paragraph * p) +{ + xhtmlsection *lastsection; + xhtmlsection *currentsect; + xhtmlfile *currentfile; + + lastfile = NULL; + topsection = xhtml_new_section (NULL); + topfile = xhtml_new_file (NULL); + lastsection = topsection; + currentfile = topfile; + currentsect = topsection; + + if (conf.leaf_level == 0) + { + topfile->is_leaf = 1; + topfile->sections = topsection; + topsection->file = topfile; + } + + for (; p; p = p->next) + { + int level = xhtml_para_level (p); + if (level > 0) /* actually a section */ + { + xhtmlsection *sect; + word *w; + char *c; + rdstringc fname_c = { 0, 0, NULL }; + + sect = xhtml_new_section (lastsection); + lastsection = sect; + sect->para = p; + for (w = (p->kwtext2) ? (p->kwtext2) : (p->words); w; w = w->next) /* kwtext2 because we want numbers only! */ + { + switch (removeattr (w->type)) + { + case word_Normal: + /*case word_Emph: + case word_Code: + case word_WeakCode: */ + xhtml_utostr (w->text, &c); + rdaddsc (&fname_c, c); + sfree (c); + break; + } + } +/* rdaddsc(&fname_c, ".html");*/ + sect->fragment = rdtrimc (&fname_c); + sect->level = level; + /* printf(" ! adding para @ %p as sect %s, level %i\n", sect->para, sect->fragment, level); */ + + if (level > currentsect->level) + { /* case (3) */ + if (level > conf.leaf_level) + { /* same file */ + assert (currentfile->is_leaf); + currentsect->child = sect; + sect->parent = currentsect; + sect->file = currentfile; + /* printf("connected '%s' to existing file '%s' [I]\n", sect->fragment, currentfile->filename); */ + currentsect = sect; + } + else + { /* new file */ + xhtmlfile *file = xhtml_new_file (sect); + assert (!currentfile->is_leaf); + currentfile->child = file; + sect->file = file; + file->parent = currentfile; + /* printf("connected '%s' to new file '%s' [I]\n", sect->fragment, file->filename); */ + currentfile = file; + currentsect = sect; + } + } + else if (level >= currentsect->file->sections->level) + { + /* Case (1) or (2) *AND* still under the section that starts + * the current file. + * + * I'm not convinced that this couldn't be rolled in with the + * final else {} leg further down. It seems a lot of effort + * this way. + */ + if (level > conf.leaf_level) + { /* stick within the same file */ + assert (currentfile->is_leaf); + sect->file = currentfile; + while (currentsect && currentsect->level > level && + currentsect->file == currentsect->parent->file) + { + currentsect = currentsect->parent; + } + assert (currentsect); + currentsect->next = sect; + assert (currentsect->level == sect->level); + sect->parent = currentsect->parent; + currentsect = sect; + /* printf("connected '%s' to existing file '%s' [II]\n", sect->fragment, currentfile->filename); */ + } + else + { /* new file */ + xhtmlfile *file = xhtml_new_file (sect); + sect->file = file; + currentfile->next = file; + file->parent = currentfile->parent; + file->is_leaf = (level == conf.leaf_level); + file->sections = sect; + /* printf("connected '%s' to new file '%s' [II]\n", sect->fragment, file->filename); */ + currentfile = file; + currentsect = sect; + } + } + else + { /* Case (1) or (2) and we must move up the file tree first */ + /* this loop is now probably irrelevant - we know we can't connect + * to anything in the current file */ + while (currentsect && level < currentsect->level) + { + currentsect = currentsect->parent; + if (currentsect) + { + /* printf(" * up one level to '%s'\n", currentsect->fragment); */ + } + else + { + /* printf(" * up one level (off top of current file)\n"); */ + } + } + if (currentsect) + { + /* I'm pretty sure this can now never fire */ + assert (currentfile->is_leaf); + /* printf("connected '%s' to existing file '%s' [III]\n", sect->fragment, currentfile->filename); */ + sect->file = currentfile; + currentsect->next = sect; + currentsect = sect; + } + else + { /* find a file we can attach to */ + while (currentfile && currentfile->sections + && level < currentfile->sections->level) + { + currentfile = currentfile->parent; + if (currentfile) + { + /* printf(" * up one file level to '%s'\n", currentfile->filename); */ + } + else + { + /* printf(" * up one file level (off top of tree)\n"); */ + } + } + if (currentfile) + { /* new file (we had to skip up a file to + get here, so we must be dealing with a + level no lower than the configured + leaf_level */ + xhtmlfile *file = xhtml_new_file (sect); + currentfile->next = file; + sect->file = file; + file->parent = currentfile->parent; + file->is_leaf = (level == conf.leaf_level); + file->sections = sect; + /* printf("connected '%s' to new file '%s' [III]\n", sect->fragment, file->filename); */ + currentfile = file; + currentsect = sect; + } + else + { + fatal (err_whatever, + "Ran off the top trying to connect sibling: strange document."); + } + } + } + } + } + topsection = lastsection; /* get correct end of the chain */ + xhtml_fixup_layout (topfile); /* leaf files not at leaf level marked as such */ +} + +static void xhtml_do_index (); +static void xhtml_do_file (xhtmlfile * file); +static void xhtml_do_top_file (xhtmlfile * file, paragraph * sourceform); +static void xhtml_do_paras (FILE * fp, paragraph * p); +static int xhtml_do_contents_limit (FILE * fp, xhtmlfile * file, int limit); +static int xhtml_do_contents_section_limit (FILE * fp, xhtmlsection * section, + int limit); +static int xhtml_add_contents_entry (FILE * fp, xhtmlsection * section, + int limit); +static int xhtml_do_contents (FILE * fp, xhtmlfile * file); +static int xhtml_do_naked_contents (FILE * fp, xhtmlfile * file); +static void xhtml_do_sections (FILE * fp, xhtmlsection * sections); + +/* + * Do all the files in this structure. + */ +static void +xhtml_do_files (xhtmlfile * file) +{ + xhtml_do_file (file); + if (file->child) + xhtml_do_files (file->child); + if (file->next) + xhtml_do_files (file->next); +} + +/* + * Free up all memory used by the file tree from 'xfile' downwards + */ +static void +xhtml_free_file (xhtmlfile * xfile) +{ + if (xfile == NULL) + { + return; + } + + if (xfile->filename) + { + sfree (xfile->filename); + } + xhtml_free_file (xfile->child); + xhtml_free_file (xfile->next); + sfree (xfile); +} + +/* + * Main function. + */ +void +xhtml_backend (paragraph * sourceform, keywordlist * in_keywords, + indexdata * in_idx) +{ +/* int i;*/ + indexentry *ientry; + int ti; + xhtmlsection *xsect; + + sourceparas = sourceform; + conf = xhtml_configure (sourceform); + keywords = in_keywords; + idx = in_idx; + + /* Clear up the index entries backend data pointers */ + for (ti = 0; (ientry = (indexentry *) index234 (idx->entries, ti)) != NULL; + ti++) + { + ientry->backend_data = NULL; + } + + xhtml_ponder_layout (sourceform); + + /* old system ... (writes to *.alt, but gets some stuff wrong and is ugly) */ +/* xhtml_level_0(sourceform); + for (i=1; i<=conf.leaf_level; i++) + { + xhtml_level(sourceform, i); + }*/ + + /* new system ... (writes to *.html, but isn't fully trusted) */ + xhtml_do_top_file (topfile, sourceform); + assert (!topfile->next); /* shouldn't have a sibling at all */ + if (topfile->child) + { + xhtml_do_files (topfile->child); + xhtml_do_index (); + } + + /* release file, section, index data structures */ + xsect = topsection; + while (xsect) + { + xhtmlsection *tmp = xsect->chain; + if (xsect->fragment) + { + sfree (xsect->fragment); + } + sfree (xsect); + xsect = tmp; + } + xhtml_free_file (topfile); + for (ti = 0; (ientry = (indexentry *) index234 (idx->entries, ti)) != NULL; + ti++) + { + if (ientry->backend_data != NULL) + { + xhtmlindex *xi = (xhtmlindex *) ientry->backend_data; + if (xi->sections != NULL) + { + sfree (xi->sections); + } + sfree (xi); + } + ientry->backend_data = NULL; + } + { + int i; + sfree (conf.fchapter.number_suffix); + for (i = 0; i < conf.nfsect; i++) + sfree (conf.fsect[i].number_suffix); + sfree (conf.fsect); + } +} + +static int +xhtml_para_level (paragraph * p) +{ + switch (p->type) + { + case para_Title: + return 0; + break; + case para_UnnumberedChapter: + case para_Chapter: + case para_Appendix: + return 1; + break; +/* case para_BiblioCited: + return 2; + break;*/ + case para_Heading: + case para_Subsect: + return p->aux + 2; + break; + default: + return -1; + break; + } +} + +static char *xhtml_index_filename = "IndexPage.html"; + +/* Output the nav links for the current file. + * file == NULL means we're doing the index + */ +static void +xhtml_donavlinks (FILE * fp, xhtmlfile * file) +{ + xhtmlfile *xhtml_next_file = NULL; + fprintf (fp, "", conf.nav_attrs); + } + else + { + fprintf (fp, ">"); + } + if (xhtml_last_file == NULL) + { + fprintf (fp, "Previous | "); + } + else + { + fprintf (fp, "Previous | ", xhtml_last_file->filename); + } + fprintf (fp, "Contents | "); + if (file != NULL) + { /* otherwise we're doing nav links for the index */ + if (xhtml_next_file == NULL) + xhtml_next_file = file->child; + if (xhtml_next_file == NULL) + xhtml_next_file = file->next; + if (xhtml_next_file == NULL) + xhtml_next_file = file->parent->next; + } + if (xhtml_next_file == NULL) + { + if (file == NULL) + { /* index, so no next file */ + fprintf (fp, "Next "); + } + else + { + fprintf (fp, "Next", xhtml_index_filename); + } + } + else + { + fprintf (fp, "Next", xhtml_next_file->filename); + } + fprintf (fp, "

\n"); +} + +/* Write out the index file */ +static void +xhtml_do_index_body (FILE * fp) +{ + indexentry *y; + int ti; + + if (count234 (idx->entries) == 0) + return; /* don't write anything at all */ + + fprintf (fp, "
\n"); + /* iterate over idx->entries using the tree functions and display everything */ + for (ti = 0; (y = (indexentry *) index234 (idx->entries, ti)) != NULL; ti++) + { + if (y->backend_data) + { + int i; + xhtmlindex *xi; + + fprintf (fp, "
"); + xhtml_para (fp, y->text); + fprintf (fp, "
\n
"); + + xi = (xhtmlindex *) y->backend_data; + for (i = 0; i < xi->nsection; i++) + { + xhtmlsection *sect = xi->sections[i]; + if (sect) + { + fprintf (fp, "", sect->file->filename, + sect->fragment); + if (sect->para->kwtext) + { + xhtml_para (fp, sect->para->kwtext); + } + else if (sect->para->words) + { + xhtml_para (fp, sect->para->words); + } + fprintf (fp, ""); + if (i + 1 < xi->nsection) + { + fprintf (fp, ", "); + } + } + } + fprintf (fp, "
\n"); + } + } + fprintf (fp, "
\n"); +} +static void +xhtml_do_index () +{ + word temp_word = { NULL, NULL, word_Normal, 0, 0, L"Index", {NULL, 0, 0} }; + FILE *fp = fopen (xhtml_index_filename, "w"); + + if (fp == NULL) + fatal (err_cantopenw, xhtml_index_filename); + xhtml_doheader (fp, &temp_word); + xhtml_donavlinks (fp, NULL); + + xhtml_do_index_body (fp); + + xhtml_donavlinks (fp, NULL); + xhtml_dofooter (fp); + fclose (fp); +} + +/* Output the given file. This includes whatever contents at beginning and end, etc. etc. */ +static void +xhtml_do_file (xhtmlfile * file) +{ + FILE *fp = fopen (file->filename, "w"); + if (fp == NULL) + fatal (err_cantopenw, file->filename); + + if (file->sections->para->words) + { + xhtml_doheader (fp, file->sections->para->words); + } + else if (file->sections->para->kwtext) + { + xhtml_doheader (fp, file->sections->para->kwtext); + } + else + { + xhtml_doheader (fp, NULL); + } + + xhtml_donavlinks (fp, file); + + if (file->is_leaf && conf.leaf_contains_contents && + xhtml_do_contents (NULL, file) >= conf.leaf_smallest_contents) + xhtml_do_contents (fp, file); + xhtml_do_sections (fp, file->sections); + if (!file->is_leaf) + xhtml_do_naked_contents (fp, file); + + xhtml_donavlinks (fp, file); + + xhtml_dofooter (fp); + fclose (fp); + + xhtml_last_file = file; +} + +/* Output the top-level file. */ +static void +xhtml_do_top_file (xhtmlfile * file, paragraph * sourceform) +{ + paragraph *p; + int done = FALSE; + FILE *fp = fopen (file->filename, "w"); + if (fp == NULL) + fatal (err_cantopenw, file->filename); + + /* Do the title -- only one allowed */ + for (p = sourceform; p && !done; p = p->next) + { + if (p->type == para_Title) + { + xhtml_doheader (fp, p->words); + done = TRUE; + } + } + if (!done) + xhtml_doheader (fp, NULL /* Eek! */ ); + + /* + * Display the title. + */ + for (p = sourceform; p; p = p->next) + { + if (p->type == para_Title) + { + xhtml_heading (fp, p); + break; + } + } + + /* Do the preamble and copyright */ + for (p = sourceform; p; p = p->next) + { + if (p->type == para_Preamble) + { + fprintf (fp, "

"); + xhtml_para (fp, p->words); + fprintf (fp, "

\n"); + } + } + for (p = sourceform; p; p = p->next) + { + if (p->type == para_Copyright) + { + fprintf (fp, "

"); + xhtml_para (fp, p->words); + fprintf (fp, "

\n"); + } + } + + xhtml_do_contents (fp, file); + xhtml_do_sections (fp, file->sections); + + /* + * Put the index in the top file if we're in single-file mode + * (leaf-level 0). + */ + if (conf.leaf_level == 0 && count234 (idx->entries) > 0) + { + fprintf (fp, "

Index

\n"); + xhtml_do_index_body (fp); + } + + xhtml_dofooter (fp); + fclose (fp); +} + +/* Convert a Unicode string to an ASCII one. '?' is + * used for unmappable characters. + */ +static void +xhtml_utostr (wchar_t * in, char **out) +{ + int l = ustrlen (in); + int i; + *out = smalloc (l + 1); + for (i = 0; i < l; i++) + { + if (in[i] >= 32 && in[i] <= 126) + (*out)[i] = (char) in[i]; + else + (*out)[i] = '?'; + } + (*out)[i] = 0; +} + +/* + * Write contents for the given file, and subfiles, down to + * the appropriate contents depth. Returns the number of + * entries written. + */ +static int +xhtml_do_contents (FILE * fp, xhtmlfile * file) +{ + int level, limit, start_level, count = 0; + if (!file) + return 0; + + level = (file->sections) ? (file->sections->level) : (0); + limit = conf.contents_depth[(level > 5) ? (5) : (level)]; + start_level = (file->is_leaf) ? (level - 1) : (level); + last_level = start_level; + + count += xhtml_do_contents_section_limit (fp, file->sections, limit); + count += xhtml_do_contents_limit (fp, file->child, limit); + if (fp != NULL) + { + while (last_level > start_level) + { + last_level--; + fprintf (fp, "\n"); + } + } + return count; +} + +/* As above, but doesn't do anything in the current file */ +static int +xhtml_do_naked_contents (FILE * fp, xhtmlfile * file) +{ + int level, limit, start_level, count = 0; + if (!file) + return 0; + + level = (file->sections) ? (file->sections->level) : (0); + limit = conf.contents_depth[(level > 5) ? (5) : (level)]; + start_level = (file->is_leaf) ? (level - 1) : (level); + last_level = start_level; + + count = xhtml_do_contents_limit (fp, file->child, limit); + if (fp != NULL) + { + while (last_level > start_level) + { + last_level--; + fprintf (fp, "\n"); + } + } + return count; +} + +/* + * Write contents for the given file, children, and siblings, down to + * given limit contents depth. + */ +static int +xhtml_do_contents_limit (FILE * fp, xhtmlfile * file, int limit) +{ + int count = 0; + while (file) + { + count += xhtml_do_contents_section_limit (fp, file->sections, limit); + count += xhtml_do_contents_limit (fp, file->child, limit); + file = file->next; + } + return count; +} + +/* + * Write contents entries for the given section tree, down to the + * limit contents depth. + */ +static int +xhtml_do_contents_section_deep_limit (FILE * fp, xhtmlsection * section, + int limit) +{ + int count = 0; + while (section) + { + if (!xhtml_add_contents_entry (fp, section, limit)) + return 0; + else + count++; + count += + xhtml_do_contents_section_deep_limit (fp, section->child, limit); + section = section->next; + } + return count; +} + +/* + * Write contents entries for the given section tree, down to the + * limit contents depth. + */ +static int +xhtml_do_contents_section_limit (FILE * fp, xhtmlsection * section, int limit) +{ + int count = 0; + if (!section) + return 0; + xhtml_add_contents_entry (fp, section, limit); + count = 1; + count += xhtml_do_contents_section_deep_limit (fp, section->child, limit); + /* section=section->child; + while (section && xhtml_add_contents_entry(fp, section, limit)) { + section = section->next; + } */ + return count; +} + +/* + * Add a section entry, unless we're exceeding the limit, in which + * case return FALSE (otherwise return TRUE). + */ +static int +xhtml_add_contents_entry (FILE * fp, xhtmlsection * section, int limit) +{ + if (!section || section->level > limit) + return FALSE; + if (fp == NULL || section->level < 0) + return TRUE; + while (last_level > section->level) + { + last_level--; + fprintf (fp, "\n"); + } + while (last_level < section->level) + { + last_level++; + fprintf (fp, "
    \n"); + } + fprintf (fp, "
  • ", section->file->filename, + section->fragment); + if (section->para->kwtext) + { + xhtml_para (fp, section->para->kwtext); + if (section->para->words) + { + fprintf (fp, ": "); + } + } + if (section->para->words) + { + xhtml_para (fp, section->para->words); + } + fprintf (fp, "
  • \n"); + return TRUE; +} + +/* + * Write all the sections in this file. Do all paragraphs in this section, then all + * children (recursively), then go on to the next one (tail recursively). + */ +static void +xhtml_do_sections (FILE * fp, xhtmlsection * sections) +{ + while (sections) + { + currentsection = sections; + xhtml_do_paras (fp, sections->para); + xhtml_do_sections (fp, sections->child); + sections = sections->next; + } +} + +/* Write this list of paragraphs. Close off all lists at the end. */ +static void +xhtml_do_paras (FILE * fp, paragraph * p) +{ + int last_type = -1, first = TRUE; + if (!p) + return; + +/* for (; p && (xhtml_para_level(p)>limit || xhtml_para_level(p)==-1 || first); p=p->next) {*/ + for (; p && (xhtml_para_level (p) == -1 || first); p = p->next) + { + first = FALSE; + switch (p->type) + { + /* + * Things we ignore because we've already processed them or + * aren't going to touch them in this pass. + */ + case para_IM: + case para_BR: + case para_Biblio: /* only touch BiblioCited */ + case para_VersionID: + case para_Copyright: + case para_Preamble: + case para_NoCite: + case para_Title: + break; + + /* + * Chapter titles. + */ + case para_Chapter: + case para_Appendix: + case para_UnnumberedChapter: + xhtml_heading (fp, p); + break; + + case para_Heading: + case para_Subsect: + xhtml_heading (fp, p); + break; + + case para_Rule: + fprintf (fp, "\n
    \n"); + break; + + case para_Normal: + fprintf (fp, "\n

    "); + xhtml_para (fp, p->words); + fprintf (fp, "

    \n"); + break; + + case para_Bullet: + case para_NumberedList: + case para_BiblioCited: + if (last_type != p->type) + { + /* start up list if necessary */ + if (p->type == para_Bullet) + { + fprintf (fp, "
      \n"); + } + else if (p->type == para_NumberedList) + { + fprintf (fp, "
        \n"); + } + else if (p->type == para_BiblioCited) + { + fprintf (fp, "
        \n"); + } + } + if (p->type == para_Bullet || p->type == para_NumberedList) + fprintf (fp, "
      1. "); + else if (p->type == para_BiblioCited) + { + fprintf (fp, "
        "); + xhtml_para (fp, p->kwtext); + fprintf (fp, "
        \n
        "); + } + xhtml_para (fp, p->words); + if (p->type == para_BiblioCited) + { + fprintf (fp, "
        \n"); + } + else if (p->type == para_Bullet || p->type == para_NumberedList) + { + fprintf (fp, "
      2. "); + } + if (p->type == para_Bullet || p->type == para_NumberedList + || p->type == para_BiblioCited) + /* close off list if necessary */ + { + paragraph *p2 = p->next; + int close_off = FALSE; +/* if (p2 && (xhtml_para_level(p2)>limit || xhtml_para_level(p2)==-1)) {*/ + if (p2 && xhtml_para_level (p2) == -1) + { + if (p2->type != p->type) + close_off = TRUE; + } + else + { + close_off = TRUE; + } + if (close_off) + { + if (p->type == para_Bullet) + { + fprintf (fp, "
    \n"); + } + else if (p->type == para_NumberedList) + { + fprintf (fp, "\n"); + } + else if (p->type == para_BiblioCited) + { + fprintf (fp, "\n"); + } + } + } + break; + + case para_Code: + xhtml_codepara (fp, p->words); + break; + } + last_type = p->type; + } +} + +/* + * Output a header for this XHTML file. + */ +static void +xhtml_doheader (FILE * fp, word * title) +{ + fprintf (fp, + "\n"); + fprintf (fp, + "\n\n\n"); + if (title == NULL) + fprintf (fp, "The thing with no name!"); + else + xhtml_para (fp, title); + fprintf (fp, "\n"); + fprintf (fp, + "\n", + version); + if (conf.author) + fprintf (fp, "\n", conf.author); + if (conf.description) + fprintf (fp, "\n", + conf.description); + if (conf.head_end) + fprintf (fp, "%ls\n", conf.head_end); + fprintf (fp, "\n\n"); + if (conf.body) + fprintf (fp, "%ls\n", conf.body); + else + fprintf (fp, "\n"); + if (conf.body_start) + fprintf (fp, "%ls\n", conf.body_start); +} + +/* + * Output a footer for this XHTML file. + */ +static void +xhtml_dofooter (FILE * fp) +{ + fprintf (fp, "\n
    \n\n"); + if (conf.body_end) + fprintf (fp, "%ls\n", conf.body_end); + if (!conf.suppress_address) + { + fprintf (fp, "
    \n"); + if (conf.address_start) + fprintf (fp, "%ls\n", conf.address_start); + /* Do the version ID */ + if (conf.include_version_id) + { + paragraph *p; + int started = 0; + for (p = sourceparas; p; p = p->next) + if (p->type == para_VersionID) + { + xhtml_versionid (fp, p->words, started); + started = 1; + } + } + if (conf.address_end) + fprintf (fp, "%ls\n", conf.address_end); + fprintf (fp, "
    \n"); + } + fprintf (fp, "\n\n\n"); +} + +/* + * Output the versionid paragraph. Typically this is a version control + * ID string (such as $Id...$ in RCS). + */ +static void +xhtml_versionid (FILE * fp, word * text, int started) +{ + rdstringc t = { 0, 0, NULL }; + + rdaddc (&t, '['); /* FIXME: configurability */ + xhtml_rdaddwc (&t, text, NULL); + rdaddc (&t, ']'); /* FIXME: configurability */ + + if (started) + fprintf (fp, "
    \n"); + fprintf (fp, "%s\n", t.text); + sfree (t.text); +} + +/* Is this an XHTML reserved character? */ +static int +xhtml_reservedchar (int c) +{ + if (c == '&' || c == '<' || c == '>' || c == '"') + return TRUE; + else + return FALSE; +} + +/* + * Convert a wide string into valid XHTML: Anything outside ASCII will + * be fixed up as an entity. Currently we don't worry about constraining the + * encoded character set, which we should probably do at some point (we can + * still fix up and return FALSE - see the last comment here). We also don't + * currently + * + * Because this is only used for words, spaces are HARD spaces (any other + * spaces will be word_Whitespace not word_Normal). So they become   + * Unless hard_spaces is FALSE, of course (code paragraphs break the above + * rule). + * + * If `result' is non-NULL, mallocs the resulting string and stores a pointer to + * it in `*result'. If `result' is NULL, merely checks whether all + * characters in the string are feasible. + * + * Return is nonzero if all characters are OK. If not all + * characters are OK but `result' is non-NULL, a result _will_ + * still be generated! + */ +static int +xhtml_convert (wchar_t * s, char **result, int hard_spaces) +{ + int doing = (result != 0); + int ok = TRUE; + char *p = NULL; + int plen = 0, psize = 0; + + for (; *s; s++) + { + wchar_t c = *s; + +#define ensure_size(i) if (i>=psize) { psize = i+256; p = resize(p, psize); } + + if (((c == 32 && !hard_spaces) + || (c > 32 && c <= 126 && !xhtml_reservedchar (c)))) + { + /* Char is OK. */ + if (doing) + { + ensure_size (plen); + p[plen++] = (char) c; + } + } + else + { + /* Char needs fixing up. */ + /* ok = FALSE; -- currently we never return FALSE; we + * might want to when considering a character set for the + * encoded document. + */ + if (doing) + { + if (c == 32) + { /* a space in a word is a hard space */ + ensure_size (plen + 7); /* includes space for the NUL, which is subsequently stomped on */ + sprintf (p + plen, " "); + plen += 6; + } + else + { + switch (c) + { + case '&': + ensure_size (plen + 6); /* includes space for the NUL, which is subsequently stomped on */ + plen += sprintf (p + plen, "&"); + break; + case '"': + ensure_size (plen + 7); /* includes space for the NUL, which is subsequently stomped on */ + plen += sprintf (p + plen, """); + break; + case '<': + if (plen > 1 && *(s-1) == '\\' && *(s-2) == '\\') + { + ensure_size(--plen); + p[plen-1] = (char) c; + p[plen] = 0; + } + else + { + ensure_size (plen + 5); /* includes space for the NUL, which is subsequently stomped on */ + plen += sprintf (p + plen, "<"); + } + break; + case '>': + if (plen > 1 && *(s-1) == '\\' && *(s-2) == '\\') + { + ensure_size(--plen); + p[plen-1] = (char) c; + p[plen] = 0; + } + else + { + ensure_size (plen + 5); /* includes space for the NUL, which is subsequently stomped on */ + plen += sprintf (p + plen, ">"); + } + break; + default: + ensure_size (plen + 8); /* includes space for the NUL, which is subsequently stomped on */ + plen += sprintf (p + plen, "&#%04i;", (int) c); + break; + } + } + } + } + } + if (doing) + { + p = resize (p, plen + 1); + p[plen] = '\0'; + *result = p; + } + + return ok; +} + +/* + * This formats the given words as XHTML. + */ +static void +xhtml_rdaddwc (rdstringc * rs, word * text, word * end) +{ + char *c; + keyword *kwl; + xhtmlsection *sect; + indextag *itag; + int ti; + + for (; text && text != end; text = text->next) + { + switch (text->type) + { + case word_HyperLink: + xhtml_utostr (text->text, &c); + rdaddsc (rs, ""); + sfree (c); + break; + + case word_UpperXref: + case word_LowerXref: + kwl = kw_lookup (keywords, text->text); + if (kwl) + { + sect = xhtml_find_section (kwl->para); + if (sect) + { + rdaddsc (rs, "file->filename); + rdaddc (rs, '#'); + rdaddsc (rs, sect->fragment); + rdaddsc (rs, "\">"); + } + else + { + rdaddsc (rs, + ""); + error (err_whatever, + "Couldn't locate cross-reference! (Probably a bibliography entry.)"); + } + } + else + { + rdaddsc (rs, + ""); + error (err_whatever, + "Couldn't locate cross-reference! (Wasn't in source file.)"); + } + break; + + case word_IndexRef: /* in theory we could make an index target here */ +/* rdaddsc(rs, "text, &c); + rdaddsc(rs, c); + sfree(c); + rdaddsc(rs, "\">");*/ + /* what we _do_ need to do is to fix up the backend data + * for any indexentry this points to. + */ + for (ti = 0; (itag = (indextag *) index234 (idx->tags, ti)) != NULL; + ti++) + { + /* FIXME: really ustricmp() and not ustrcmp()? */ + if (ustricmp (itag->name, text->text) == 0) + { + break; + } + } + if (itag != NULL) + { + if (itag->refs != NULL) + { + int i; + for (i = 0; i < itag->nrefs; i++) + { + xhtmlindex *idx_ref; + indexentry *ientry; + + ientry = itag->refs[i]; + if (ientry->backend_data == NULL) + { + idx_ref = + (xhtmlindex *) smalloc (sizeof (xhtmlindex)); + if (idx_ref == NULL) + fatal (err_nomemory); + idx_ref->nsection = 0; + idx_ref->size = 4; + idx_ref->sections = + (xhtmlsection **) smalloc (idx_ref->size * + sizeof (xhtmlsection + *)); + if (idx_ref->sections == NULL) + fatal (err_nomemory); + ientry->backend_data = idx_ref; + } + else + { + idx_ref = ientry->backend_data; + if (idx_ref->nsection + 1 > idx_ref->size) + { + int new_size = idx_ref->size * 2; + idx_ref->sections = + srealloc (idx_ref->sections, + new_size * sizeof (xhtmlsection)); + if (idx_ref->sections == NULL) + { + fatal (err_nomemory); + } + idx_ref->size = new_size; + } + } + idx_ref->sections[idx_ref->nsection++] = currentsection; +#if 0 +#endif + } + } + else + { + fatal (err_whatever, "Index tag had no entries!"); + } + } + else + { + fprintf (stderr, "Looking for index entry '%ls'\n", text->text); + fatal (err_whatever, + "Couldn't locate index entry! (Wasn't in index.)"); + } + break; + + case word_HyperEnd: + case word_XrefEnd: + rdaddsc (rs, ""); + break; + + case word_Normal: + case word_Emph: + case word_Code: + case word_WeakCode: + case word_WhiteSpace: + case word_EmphSpace: + case word_CodeSpace: + case word_WkCodeSpace: + case word_Quote: + case word_EmphQuote: + case word_CodeQuote: + case word_WkCodeQuote: + assert (text->type != word_CodeQuote && + text->type != word_WkCodeQuote); + if (towordstyle (text->type) == word_Emph && + (attraux (text->aux) == attr_First || + attraux (text->aux) == attr_Only)) + rdaddsc (rs, ""); + else + if ((towordstyle (text->type) == word_Code + || towordstyle (text->type) == word_WeakCode) + && (attraux (text->aux) == attr_First + || attraux (text->aux) == attr_Only)) + rdaddsc (rs, ""); + + if (removeattr (text->type) == word_Normal) + { + static int dont_convert = 0; + if (dont_convert) + { + char buf[2] = " "; + dont_convert = 0; + wchar_t *s = text->text; + for (; *s; s++) { + buf[0] = (char) *s; + rdaddsc (rs, buf); + } + buf[0] = 0; + rdaddsc (rs, buf); + } + else + { + if (*text->text == '\\' && text->next && text->next->text && (*text->next->text == '&' || *text->next->text == '<' || *text->next->text == '>' || *text->next->text == '"')) + dont_convert = 1; + else + { + if (xhtml_convert (text->text, &c, TRUE)) /* spaces in the word are hard */ + rdaddsc (rs, c); + else + xhtml_rdaddwc (rs, text->alt, NULL); + sfree (c); + } + } + } + else if (removeattr (text->type) == word_WhiteSpace) + { + rdaddc (rs, ' '); + } + else if (removeattr (text->type) == word_Quote) + { + rdaddsc (rs, """); + } + + if (towordstyle (text->type) == word_Emph && + (attraux (text->aux) == attr_Last || + attraux (text->aux) == attr_Only)) + rdaddsc (rs, ""); + else + if ((towordstyle (text->type) == word_Code + || towordstyle (text->type) == word_WeakCode) + && (attraux (text->aux) == attr_Last + || attraux (text->aux) == attr_Only)) + rdaddsc (rs, ""); + break; + } + } +} + +/* Output a heading, formatted as XHTML. + */ +static void +xhtml_heading (FILE * fp, paragraph * p) +{ + rdstringc t = { 0, 0, NULL }; + word *tprefix = p->kwtext; + word *nprefix = p->kwtext2; + word *text = p->words; + int level = xhtml_para_level (p); + xhtmlsection *sect = xhtml_find_section (p); + xhtmlheadfmt *fmt; + char *fragment; + if (sect) + { + fragment = sect->fragment; + } + else + { + if (p->type == para_Title) + fragment = "title"; + else + { + fragment = ""; /* FIXME: what else can we do? */ + error (err_whatever, "Couldn't locate heading cross-reference!"); + } + } + + if (p->type == para_Title) + fmt = NULL; + else if (level == 1) + fmt = &conf.fchapter; + else if (level - 1 < conf.nfsect) + fmt = &conf.fsect[level - 1]; + else + fmt = &conf.fsect[conf.nfsect - 1]; + + if (fmt && fmt->just_numbers && nprefix) + { + xhtml_rdaddwc (&t, nprefix, NULL); + if (fmt) + { + char *c; + if (xhtml_convert (fmt->number_suffix, &c, FALSE)) + { + rdaddsc (&t, c); + sfree (c); + } + } + } + else if (fmt && !fmt->just_numbers && tprefix) + { + xhtml_rdaddwc (&t, tprefix, NULL); + if (fmt) + { + char *c; + if (xhtml_convert (fmt->number_suffix, &c, FALSE)) + { + rdaddsc (&t, c); + sfree (c); + } + } + } + xhtml_rdaddwc (&t, text, NULL); + /* + * If we're outputting in single-file mode, we need to lower + * the level of each heading by one, because the overall + * document title will be sitting right at the top as an

    + * and so chapters and sections should start at

    . + * + * Even if not, the document title will come back from + * xhtml_para_level() as level zero, so we must increment that + * no matter what leaf_level is set to. + */ + if (conf.leaf_level == 0 || level == 0) + level++; + fprintf (fp, "%s\n", fragment, level, t.text, + level); + sfree (t.text); +} + +/* Output a paragraph. Styles are handled by xhtml_rdaddwc(). + * This looks pretty simple; I may have missed something ... + */ +static void +xhtml_para (FILE * fp, word * text) +{ + rdstringc out = { 0, 0, NULL }; + xhtml_rdaddwc (&out, text, NULL); + fprintf (fp, "%s", out.text); + sfree (out.text); +} + +/* Output a code paragraph. I'm treating this as preformatted, which + * may not be entirely correct. See xhtml_para() for my worries about + * this being overly-simple; however I think that most of the complexity + * of the text backend came entirely out of word wrapping anyway. + */ +static void +xhtml_codepara (FILE * fp, word * text) +{ + fprintf (fp, "
    ");
    +  for (; text; text = text->next)
    +    if (text->type == word_WeakCode)
    +      {
    +        char *c;
    +        xhtml_convert (text->text, &c, FALSE);
    +        fprintf (fp, "%s\n", c);
    +        sfree (c);
    +      }
    +  fprintf (fp, "
    \n"); +} diff --git a/Docs/src/bin/halibut/contents.c b/Docs/src/bin/halibut/contents.c new file mode 100644 index 00000000..bbabbbce --- /dev/null +++ b/Docs/src/bin/halibut/contents.c @@ -0,0 +1,258 @@ +/* + * contents.c: build a table of contents + */ + +#include +#include +#include +#include +#include "halibut.h" + +struct numberstate_Tag +{ + int chapternum; + int appendixnum; + int ischapter; + int *sectionlevels; + paragraph **currentsects; + paragraph *lastsect; + int oklevel; + int maxsectlevel; + int listitem; + wchar_t *chaptertext; /* the word for a chapter */ + wchar_t *sectiontext; /* the word for a section */ + wchar_t *apptext; /* the word for an appendix */ +}; + +numberstate * +number_init (void) +{ + numberstate *ret = mknew (numberstate); + ret->chapternum = 0; + ret->appendixnum = -1; + ret->ischapter = 1; + ret->oklevel = -1; /* not even in a chapter yet */ + ret->maxsectlevel = 32; + ret->sectionlevels = mknewa (int, ret->maxsectlevel); + ret->currentsects = mknewa (paragraph *, ret->maxsectlevel + 1); + memset (ret->currentsects, 0, + (ret->maxsectlevel + 1) * sizeof (paragraph *)); + ret->lastsect = NULL; + ret->listitem = -1; + return ret; +} + +void +number_free (numberstate * state) +{ + sfree (state->sectionlevels); + sfree (state->currentsects); + sfree (state); +} + +static void +dotext (word *** wret, wchar_t * text) +{ + word *mnewword = mknew (word); + mnewword->text = ustrdup (text); + mnewword->type = word_Normal; + mnewword->alt = NULL; + mnewword->next = NULL; + **wret = mnewword; + *wret = &mnewword->next; +} + +static void +dospace (word *** wret) +{ + word *mnewword = mknew (word); + mnewword->text = NULL; + mnewword->type = word_WhiteSpace; + mnewword->alt = NULL; + mnewword->next = NULL; + **wret = mnewword; + *wret = &mnewword->next; +} + +static void +donumber (word *** wret, int num) +{ + wchar_t text[20]; + wchar_t *p = text + sizeof (text); + *--p = L'\0'; + while (num != 0) + { + assert (p > text); + *--p = L"0123456789"[num % 10]; + num /= 10; + } + dotext (wret, p); +} + +static void +doanumber (word *** wret, int num) +{ + wchar_t text[20]; + wchar_t *p; + int nletters, aton; + nletters = 1; + aton = 25; + while (num > aton) + { + nletters++; + num -= aton + 1; + if (aton < INT_MAX / 26) + aton = (aton + 1) * 26 - 1; + else + aton = INT_MAX; + } + p = text + sizeof (text); + *--p = L'\0'; + while (nletters--) + { + assert (p > text); + *--p = L"ABCDEFGHIJKLMNOPQRSTUVWXYZ"[num % 26]; + num /= 26; + } + dotext (wret, p); +} + +void +number_cfg (numberstate * state, paragraph * source) +{ + /* + * Defaults + */ + state->chaptertext = L"Chapter"; + state->sectiontext = L"Section"; + state->apptext = L"Appendix"; + + for (; source; source = source->next) + { + if (source->type == para_Config) + { + if (!ustricmp (source->keyword, L"chapter")) + { + state->chaptertext = uadv (source->keyword); + } + else if (!ustricmp (source->keyword, L"section")) + { + state->sectiontext = uadv (source->keyword); + } + else if (!ustricmp (source->keyword, L"appendix")) + { + state->apptext = uadv (source->keyword); + } + } + } +} + +word * +number_mktext (numberstate * state, paragraph * p, wchar_t * category, + int prev, int *errflag) +{ + word *ret = NULL; + word **ret2 = &ret; + word **pret = &ret; + int i, level; + + level = -2; /* default for non-section-heading */ + switch (p->type) + { + case para_Chapter: + state->chapternum++; + for (i = 0; i < state->maxsectlevel; i++) + state->sectionlevels[i] = 0; + dotext (&pret, category ? category : state->chaptertext); + dospace (&pret); + ret2 = pret; + donumber (&pret, state->chapternum); + state->ischapter = 1; + state->oklevel = 0; + level = -1; + break; + case para_Heading: + case para_Subsect: + level = (p->type == para_Heading ? 0 : p->aux); + if (level > state->oklevel) + { + error (err_sectjump, &p->fpos); + *errflag = TRUE; + ret = NULL; + break; + } + state->oklevel = level + 1; + if (state->maxsectlevel <= level) + { + state->maxsectlevel = level + 32; + state->sectionlevels = resize (state->sectionlevels, + state->maxsectlevel); + } + state->sectionlevels[level]++; + for (i = level + 1; i < state->maxsectlevel; i++) + state->sectionlevels[i] = 0; + dotext (&pret, category ? category : state->sectiontext); + dospace (&pret); + ret2 = pret; + if (state->ischapter) + donumber (&pret, state->chapternum); + else + doanumber (&pret, state->appendixnum); + for (i = 0; i <= level; i++) + { + dotext (&pret, L"."); + if (state->sectionlevels[i] == 0) + state->sectionlevels[i] = 1; + donumber (&pret, state->sectionlevels[i]); + } + break; + case para_Appendix: + state->appendixnum++; + for (i = 0; i < state->maxsectlevel; i++) + state->sectionlevels[i] = 0; + dotext (&pret, category ? category : state->apptext); + dospace (&pret); + ret2 = pret; + doanumber (&pret, state->appendixnum); + state->ischapter = 0; + state->oklevel = 0; + level = -1; + break; + case para_UnnumberedChapter: + level = -1; + break; + case para_NumberedList: + ret2 = pret; + if (prev != para_NumberedList) + state->listitem = 0; + state->listitem++; + donumber (&pret, state->listitem); + break; + } + + /* + * Now set up parent, child and sibling links. + */ + p->parent = p->child = p->sibling = NULL; + if (level != -2) + { + if (state->currentsects[level + 1]) + state->currentsects[level + 1]->sibling = p; + if (level >= 0 && state->currentsects[level]) + { + p->parent = state->currentsects[level]; + if (!state->currentsects[level]->child) + state->currentsects[level]->child = p; + } + state->currentsects[level + 1] = state->lastsect = p; + for (i = level + 2; i < state->maxsectlevel + 1; i++) + state->currentsects[i] = NULL; + } + else + { + p->parent = state->lastsect; + } + + p->kwtext2 = *ret2; + return ret; +} diff --git a/Docs/src/bin/halibut/error.c b/Docs/src/bin/halibut/error.c new file mode 100644 index 00000000..209ead9e --- /dev/null +++ b/Docs/src/bin/halibut/error.c @@ -0,0 +1,227 @@ +/* + * error.c: Halibut error handling + */ + +#include +#include +#include +#include "halibut.h" + +/* + * Error flags + */ +#define PREFIX 0x0001 /* give `halibut:' prefix */ +#define FILEPOS 0x0002 /* give file position prefix */ + +static void +do_error (int code, va_list ap) +{ + char error[1024]; + char auxbuf[256]; + char *sp, *sp2; + wchar_t *wsp; + filepos fpos, fpos2; + int flags; + + switch (code) + { + case err_nomemory: /* no arguments */ + sprintf (error, "out of memory"); + flags = PREFIX; + break; + case err_optnoarg: + sp = va_arg (ap, char *); + sprintf (error, "option `-%.200s' requires an argument", sp); + flags = PREFIX; + break; + case err_nosuchopt: + sp = va_arg (ap, char *); + sprintf (error, "unrecognised option `-%.200s'", sp); + flags = PREFIX; + break; + case err_noinput: /* no arguments */ + sprintf (error, "no input files"); + flags = PREFIX; + break; + case err_cantopen: + sp = va_arg (ap, char *); + sprintf (error, "unable to open input file `%.200s'", sp); + flags = PREFIX; + break; + case err_nodata: /* no arguments */ + sprintf (error, "no data in input files"); + flags = PREFIX; + break; + case err_brokencodepara: + fpos = *va_arg (ap, filepos *); + sprintf (error, "every line of a code paragraph should begin `\\c'"); + flags = FILEPOS; + break; + case err_kwunclosed: + fpos = *va_arg (ap, filepos *); + sprintf (error, "expected `}' after paragraph keyword"); + flags = FILEPOS; + break; + case err_kwexpected: + fpos = *va_arg (ap, filepos *); + sprintf (error, "expected a paragraph keyword"); + flags = FILEPOS; + break; + case err_kwillegal: + fpos = *va_arg (ap, filepos *); + sprintf (error, "expected no paragraph keyword"); + flags = FILEPOS; + break; + case err_kwtoomany: + fpos = *va_arg (ap, filepos *); + sprintf (error, "expected only one paragraph keyword"); + flags = FILEPOS; + break; + case err_bodyillegal: + fpos = *va_arg (ap, filepos *); + sprintf (error, "expected no text after paragraph keyword"); + flags = FILEPOS; + break; + case err_badparatype: + wsp = va_arg (ap, wchar_t *); + sp = ustrtoa (wsp, auxbuf, sizeof (auxbuf)); + fpos = *va_arg (ap, filepos *); + sprintf (error, "command `%.200s' unrecognised at start of" + " paragraph", sp); + flags = FILEPOS; + break; + case err_badmidcmd: + wsp = va_arg (ap, wchar_t *); + sp = ustrtoa (wsp, auxbuf, sizeof (auxbuf)); + fpos = *va_arg (ap, filepos *); + sprintf (error, "command `%.200s' unexpected in mid-paragraph", sp); + flags = FILEPOS; + break; + case err_unexbrace: + fpos = *va_arg (ap, filepos *); + sprintf (error, "brace character unexpected in mid-paragraph"); + flags = FILEPOS; + break; + case err_explbr: + fpos = *va_arg (ap, filepos *); + sprintf (error, "expected `{' after command"); + flags = FILEPOS; + break; + case err_commenteof: + fpos = *va_arg (ap, filepos *); + sprintf (error, "end of file unexpected inside `\\#{...}' comment"); + flags = FILEPOS; + break; + case err_kwexprbr: + fpos = *va_arg (ap, filepos *); + sprintf (error, "expected `}' after cross-reference"); + flags = FILEPOS; + break; + case err_missingrbrace: + fpos = *va_arg (ap, filepos *); + sprintf (error, "unclosed braces at end of paragraph"); + flags = FILEPOS; + break; + case err_nestedstyles: + fpos = *va_arg (ap, filepos *); + sprintf (error, "unable to nest text styles"); + flags = FILEPOS; + break; + case err_nestedindex: + fpos = *va_arg (ap, filepos *); + sprintf (error, "unable to nest index markings"); + flags = FILEPOS; + break; + case err_nosuchkw: + fpos = *va_arg (ap, filepos *); + wsp = va_arg (ap, wchar_t *); + sp = ustrtoa (wsp, auxbuf, sizeof (auxbuf)); + sprintf (error, "unable to resolve cross-reference to `%.200s'", sp); + flags = FILEPOS; + break; + case err_multiBR: + fpos = *va_arg (ap, filepos *); + wsp = va_arg (ap, wchar_t *); + sp = ustrtoa (wsp, auxbuf, sizeof (auxbuf)); + sprintf (error, "multiple `\\BR' entries given for `%.200s'", sp); + flags = FILEPOS; + break; + case err_nosuchidxtag: + wsp = va_arg (ap, wchar_t *); + sp = ustrtoa (wsp, auxbuf, sizeof (auxbuf)); + sprintf (error, "`\\IM' on unknown index tag `%.200s'", sp); + flags = 0; + /* FIXME: need to get a filepos to here somehow */ + break; + case err_cantopenw: + sp = va_arg (ap, char *); + sprintf (error, "unable to open output file `%.200s'", sp); + flags = PREFIX; + break; + case err_macroexists: + fpos = *va_arg (ap, filepos *); + wsp = va_arg (ap, wchar_t *); + sp = ustrtoa (wsp, auxbuf, sizeof (auxbuf)); + sprintf (error, "macro `%.200s' already defined", sp); + flags = FILEPOS; + break; + case err_sectjump: + fpos = *va_arg (ap, filepos *); + sprintf (error, "expected higher heading levels before this one"); + flags = FILEPOS; + break; + case err_winhelp_ctxclash: + fpos = *va_arg (ap, filepos *); + sp = va_arg (ap, char *); + sp2 = va_arg (ap, char *); + sprintf (error, "Windows Help context id `%.200s' clashes with " + "previously defined `%.200s'", sp, sp2); + flags = FILEPOS; + break; + case err_multikw: + fpos = *va_arg (ap, filepos *); + fpos2 = *va_arg (ap, filepos *); + wsp = va_arg (ap, wchar_t *); + sp = ustrtoa (wsp, auxbuf, sizeof (auxbuf)); + sprintf (error, "paragraph keyword `%.200s' already defined at ", sp); + sprintf (error + strlen (error), "%s:%d", fpos2.filename, fpos2.line); + flags = FILEPOS; + break; + case err_whatever: + sp = va_arg (ap, char *); + vsprintf (error, sp, ap); + flags = PREFIX; + break; + } + + if (flags & PREFIX) + fputs ("halibut: ", stderr); + if (flags & FILEPOS) + { + fprintf (stderr, "%s:%d:", fpos.filename, fpos.line); + if (fpos.col > 0) + fprintf (stderr, "%d:", fpos.col); + fputc (' ', stderr); + } + fputs (error, stderr); + fputc ('\n', stderr); +} + +void +fatal (int code, ...) +{ + va_list ap; + va_start (ap, code); + do_error (code, ap); + va_end (ap); + exit (EXIT_FAILURE); +} + +void +error (int code, ...) +{ + va_list ap; + va_start (ap, code); + do_error (code, ap); + va_end (ap); +} diff --git a/Docs/src/bin/halibut/halibut.h b/Docs/src/bin/halibut/halibut.h new file mode 100644 index 00000000..5a4da275 --- /dev/null +++ b/Docs/src/bin/halibut/halibut.h @@ -0,0 +1,438 @@ +#ifndef HALIBUT_HALIBUT_H +#define HALIBUT_HALIBUT_H + +#include +#include +#include + +#ifdef __GNUC__ +#define NORETURN __attribute__((__noreturn__)) +#else +#define NORETURN /* nothing */ +#endif + +#ifndef TRUE +#define TRUE 1 +#endif +#ifndef FALSE +#define FALSE 0 +#endif + +/* For suppressing unused-parameter warnings */ +#define IGNORE(x) ( (x) = (x) ) + +#include "tree234.h" + +/* + * Structure tags + */ +typedef struct input_Tag input; +typedef struct filepos_Tag filepos; +typedef struct paragraph_Tag paragraph; +typedef struct word_Tag word; +typedef struct keywordlist_Tag keywordlist; +typedef struct keyword_Tag keyword; +typedef struct userstyle_Tag userstyle; +typedef struct numberstate_Tag numberstate; +typedef struct indexdata_Tag indexdata; +typedef struct indextag_Tag indextag; +typedef struct indexentry_Tag indexentry; +typedef struct macrostack_Tag macrostack; + +/* + * Data structure to hold a file name and index, a line and a + * column number, for reporting errors + */ +struct filepos_Tag +{ + char *filename; + int line, col; +}; + +/* + * Data structure to hold all the file names etc for input + */ +typedef struct pushback_Tag +{ + int chr; + filepos pos; +} +pushback; +struct input_Tag +{ + char **filenames; /* complete list of input files */ + int nfiles; /* how many in the list */ + FILE *currfp; /* the currently open one */ + int currindex; /* which one is that in the list */ + pushback *pushback; /* pushed-back input characters */ + int npushback, pushbacksize; + filepos pos; + int reportcols; /* report column numbers in errors */ + macrostack *stack; /* macro expansions in force */ +}; + +/* + * Data structure to hold the input form of the source, ie a linked + * list of paragraphs + */ +struct paragraph_Tag +{ + paragraph *next; + int type; + wchar_t *keyword; /* for most special paragraphs */ + word *words; /* list of words in paragraph */ + int aux; /* number, in a numbered paragraph + * or subsection level + */ + word *kwtext; /* chapter/section indication */ + word *kwtext2; /* numeric-only form of kwtext */ + filepos fpos; + + paragraph *parent, *child, *sibling; /* for hierarchy navigation */ + + void *private_data; /* for temp use in backends */ +}; +enum +{ + para_IM, /* index merge */ + para_BR, /* bibliography rewrite */ + para_Rule, /* random horizontal rule */ + para_Chapter, + para_Appendix, + para_UnnumberedChapter, + para_Heading, + para_Subsect, + para_Normal, + para_Biblio, /* causes no output unless turned ... */ + para_BiblioCited, /* ... into this paragraph type */ + para_Bullet, + para_NumberedList, + para_Code, + para_Copyright, + para_Preamble, + para_NoCite, + para_Title, + para_VersionID, + para_Config, /* configuration directive */ + para_NotParaType /* placeholder value */ +}; + +/* + * Data structure to hold an individual word + */ +struct word_Tag +{ + word *next, *alt; + int type; + int aux; + int breaks; /* can a line break after it? */ + wchar_t *text; + filepos fpos; +}; +enum +{ + /* ORDERING CONSTRAINT: these normal-word types ... */ + word_Normal, + word_Emph, + word_Code, /* monospaced; `quoted' in text */ + word_WeakCode, /* monospaced, normal in text */ + /* ... must be in the same order as these space types ... */ + word_WhiteSpace, /* text is NULL or ignorable */ + word_EmphSpace, /* WhiteSpace when emphasised */ + word_CodeSpace, /* WhiteSpace when code */ + word_WkCodeSpace, /* WhiteSpace when weak code */ + /* ... and must be in the same order as these quote types ... */ + word_Quote, /* text is NULL or ignorable */ + word_EmphQuote, /* Quote when emphasised */ + word_CodeQuote, /* (can't happen) */ + word_WkCodeQuote, /* (can't happen) */ + /* END ORDERING CONSTRAINT */ + word_internal_endattrs, + word_UpperXref, /* \K */ + word_LowerXref, /* \k */ + word_XrefEnd, /* (invisible; no text) */ + word_IndexRef, /* (always an invisible one) */ + word_HyperLink, /* (invisible) */ + word_HyperEnd /* (also invisible; no text) */ +}; +/* aux values for attributed words */ +enum +{ + attr_Only = 0x0000, /* a lone word with the attribute */ + attr_First = 0x0001, /* the first of a series */ + attr_Last = 0x0002, /* the last of a series */ + attr_Always = 0x0003, /* any other part of a series */ + attr_mask = 0x0003, +}; +/* aux values for quote-type words */ +enum +{ + quote_Open = 0x0010, + quote_Close = 0x0020, + quote_mask = 0x0030, +}; +#define isattr(x) ( ( (x) > word_Normal && (x) < word_WhiteSpace ) || \ + ( (x) > word_WhiteSpace && (x) < word_internal_endattrs ) ) +#define sameattr(x,y) ( (((x)-(y)) & 3) == 0 ) +#define towordstyle(x) ( word_Normal + ((x) & 3) ) +#define tospacestyle(x) ( word_WhiteSpace + ((x) & 3) ) +#define toquotestyle(x) ( word_Quote + ((x) & 3) ) +#define removeattr(x) ( word_Normal + ((x) &~ 3) ) + +#define attraux(x) ( (x) & attr_mask ) +#define quoteaux(x) ( (x) & quote_mask ) + +/* + * error.c + */ +void +fatal (int code, ...) + NORETURN; + void error (int code, ...); + enum + { + err_nomemory, /* out of memory */ + err_optnoarg, /* option `-%s' requires an argument */ + err_nosuchopt, /* unrecognised option `-%s' */ + err_noinput, /* no input files */ + err_cantopen, /* unable to open input file `%s' */ + err_nodata, /* no data in input files */ + err_brokencodepara, /* line in codepara didn't begin `\c' */ + err_kwunclosed, /* expected `}' after keyword */ + err_kwillegal, /* paragraph type expects no keyword */ + err_kwexpected, /* paragraph type expects a keyword */ + err_kwtoomany, /* paragraph type expects only 1 */ + err_bodyillegal, /* paragraph type expects only kws! */ + err_badparatype, /* invalid command at start of para */ + err_badmidcmd, /* invalid command in mid-para */ + err_unexbrace, /* unexpected brace */ + err_explbr, /* expected `{' after command */ + err_commenteof, /* EOF inside braced comment */ + err_kwexprbr, /* expected `}' after cross-ref */ + err_missingrbrace, /* unclosed braces at end of para */ + err_nestedstyles, /* unable to nest text styles */ + err_nestedindex, /* unable to nest `\i' thingys */ + err_nosuchkw, /* unresolved cross-reference */ + err_multiBR, /* multiple \BRs on same keyword */ + err_nosuchidxtag, /* \IM on unknown index tag (warning) */ + err_cantopenw, /* can't open output file for write */ + err_macroexists, /* this macro already exists */ + err_sectjump, /* jump a heading level, eg \C -> \S */ + err_winhelp_ctxclash, /* WinHelp context ID hash clash */ + err_multikw, /* keyword clash in sections */ + err_whatever /* random error of another type */ + }; + +/* + * malloc.c + */ +#ifdef LOGALLOC + void *smalloc (char *file, int line, int size); + void *srealloc (char *file, int line, void *p, int size); + void sfree (char *file, int line, void *p); +#define smalloc(x) smalloc(__FILE__, __LINE__, x) +#define srealloc(x, y) srealloc(__FILE__, __LINE__, x, y) +#define sfree(x) sfree(__FILE__, __LINE__, x) +#else + void *smalloc (int size); + void *srealloc (void *p, int size); + void sfree (void *p); +#endif + void free_word_list (word * w); + void free_para_list (paragraph * p); + word *dup_word_list (word * w); + char *dupstr (char *s); + +#define mknew(type) ( (type *) smalloc (sizeof (type)) ) +#define mknewa(type, number) ( (type *) smalloc ((number) * sizeof (type)) ) +#define resize(array, len) ( srealloc ((array), (len) * sizeof (*(array))) ) +#define lenof(array) ( sizeof(array) / sizeof(*(array)) ) + +/* + * ustring.c + */ + wchar_t *ustrdup (wchar_t * s); + char *ustrtoa (wchar_t * s, char *outbuf, int size); + int ustrlen (wchar_t * s); + wchar_t *uadv (wchar_t * s); + wchar_t *ustrcpy (wchar_t * dest, wchar_t * source); + wchar_t utolower (wchar_t); + int ustrcmp (wchar_t * lhs, wchar_t * rhs); + int ustricmp (wchar_t * lhs, wchar_t * rhs); + int utoi (wchar_t *); + int utob (wchar_t *); + int uisdigit (wchar_t); + wchar_t *ustrlow (wchar_t * s); + wchar_t *ustrftime (wchar_t * fmt, struct tm *timespec); + +/* + * help.c + */ + void help (void); + void usage (void); + void showversion (void); + +/* + * licence.c + */ + void licence (void); + +/* + * version.c + */ + const char *const version; + +/* + * misc.c + */ + typedef struct stackTag *stack; + stack stk_new (void); + void stk_free (stack); + void stk_push (stack, void *); + void *stk_pop (stack); + + typedef struct tagRdstring rdstring; + struct tagRdstring + { + int pos, size; + wchar_t *text; + }; + typedef struct tagRdstringc rdstringc; + struct tagRdstringc + { + int pos, size; + char *text; + }; + extern const rdstring empty_rdstring; + extern const rdstringc empty_rdstringc; + void rdadd (rdstring * rs, wchar_t c); + void rdadds (rdstring * rs, wchar_t * p); + wchar_t *rdtrim (rdstring * rs); + void rdaddc (rdstringc * rs, char c); + void rdaddsc (rdstringc * rs, char *p); + char *rdtrimc (rdstringc * rs); + + int compare_wordlists (word * a, word * b); + + void mark_attr_ends (paragraph * sourceform); + + typedef struct tagWrappedLine wrappedline; + struct tagWrappedLine + { + wrappedline *next; + word *begin, *end; /* first & last words of line */ + int nspaces; /* number of whitespaces in line */ + int shortfall; /* how much shorter than max width */ + }; + wrappedline *wrap_para (word *, int, int, int (*)(word *)); + void wrap_free (wrappedline *); + +/* + * input.c + */ + paragraph *read_input (input * in, indexdata * idx); + +/* + * keywords.c + */ + struct keywordlist_Tag + { + int nkeywords; + int size; + tree234 *keys; /* sorted by `key' field */ + word **looseends; /* non-keyword list element numbers */ + int nlooseends; + int looseendssize; + }; + struct keyword_Tag + { + wchar_t *key; /* the keyword itself */ + word *text; /* "Chapter 2", "Appendix Q"... */ + /* (NB: filepos are not set) */ + paragraph *para; /* the paragraph referenced */ + }; + keyword *kw_lookup (keywordlist *, wchar_t *); + keywordlist *get_keywords (paragraph *); + void free_keywords (keywordlist *); + void subst_keywords (paragraph *, keywordlist *); + +/* + * index.c + */ + +/* + * Data structure to hold both sides of the index. + */ + struct indexdata_Tag + { + tree234 *tags; /* holds type `indextag' */ + tree234 *entries; /* holds type `indexentry' */ + }; + +/* + * Data structure to hold an index tag (LHS of index). + */ + struct indextag_Tag + { + wchar_t *name; + word *implicit_text; + word **explicit_texts; + int nexplicit, explicit_size; + int nrefs; + indexentry **refs; /* array of entries referenced by tag */ + }; + +/* + * Data structure to hold an index entry (RHS of index). + */ + struct indexentry_Tag + { + word *text; + void *backend_data; /* private to back end */ + }; + + indexdata *make_index (void); + void cleanup_index (indexdata *); +/* index_merge takes responsibility for freeing arg 3 iff implicit; never + * takes responsibility for arg 2 */ + void index_merge (indexdata *, int is_explicit, wchar_t *, word *); + void build_index (indexdata *); + void index_debug (indexdata *); + indextag *index_findtag (indexdata * idx, wchar_t * name); + +/* + * contents.c + */ + numberstate *number_init (void); + void number_cfg (numberstate *, paragraph *); + word *number_mktext (numberstate *, paragraph *, wchar_t *, int, int *); + void number_free (numberstate *); + +/* + * biblio.c + */ + void gen_citations (paragraph *, keywordlist *); + +/* + * style.c + */ + struct userstyle_Tag + { + }; + +/* + * bk_text.c + */ + void text_backend (paragraph *, keywordlist *, indexdata *); + +/* + * bk_xhtml.c + */ + void xhtml_backend (paragraph *, keywordlist *, indexdata *); + +/* + * bk_whlp.c + */ + void whlp_backend (paragraph *, keywordlist *, indexdata *); + +#endif diff --git a/Docs/src/bin/halibut/help.c b/Docs/src/bin/halibut/help.c new file mode 100644 index 00000000..af1d5157 --- /dev/null +++ b/Docs/src/bin/halibut/help.c @@ -0,0 +1,38 @@ +/* + * help.c: usage instructions + */ + +#include +#include "halibut.h" + +static char *helptext[] = { + "FIXME: help text goes here", + NULL +}; + +static char *usagetext[] = { + "FIXME: usage text goes here", + NULL +}; + +void +help (void) +{ + char **p; + for (p = helptext; *p; p++) + puts (*p); +} + +void +usage (void) +{ + char **p; + for (p = usagetext; *p; p++) + puts (*p); +} + +void +showversion (void) +{ + printf ("Halibut, %s\n", version); +} diff --git a/Docs/src/bin/halibut/index.c b/Docs/src/bin/halibut/index.c new file mode 100644 index 00000000..15e55432 --- /dev/null +++ b/Docs/src/bin/halibut/index.c @@ -0,0 +1,278 @@ +/* + * index.c: create and collate index data structures + */ + +#include +#include +#include "halibut.h" + +static int compare_tags (void *av, void *bv); +static int compare_entries (void *av, void *bv); + +indexdata * +make_index (void) +{ + indexdata *ret = mknew (indexdata); + ret->tags = newtree234 (compare_tags); + ret->entries = newtree234 (compare_entries); + return ret; +} + +static indextag * +make_indextag (void) +{ + indextag *ret = mknew (indextag); + ret->name = NULL; + ret->implicit_text = NULL; + ret->explicit_texts = NULL; + ret->nexplicit = ret->explicit_size = ret->nrefs = 0; + ret->refs = NULL; + return ret; +} + +static int +compare_tags (void *av, void *bv) +{ + indextag *a = (indextag *) av, *b = (indextag *) bv; + return ustricmp (a->name, b->name); +} + +static int +compare_to_find_tag (void *av, void *bv) +{ + wchar_t *a = (wchar_t *) av; + indextag *b = (indextag *) bv; + return ustricmp (a, b->name); +} + +static int +compare_entries (void *av, void *bv) +{ + indexentry *a = (indexentry *) av, *b = (indexentry *) bv; + return compare_wordlists (a->text, b->text); +} + +/* + * Back-end utility: find the indextag with a given name. + */ +indextag * +index_findtag (indexdata * idx, wchar_t * name) +{ + return find234 (idx->tags, name, compare_to_find_tag); +} + +/* + * Add a \IM. `tags' points to a zero-terminated chain of + * zero-terminated strings ("first\0second\0thirdandlast\0\0"). + * `text' points to a word list. + * + * Guarantee on calling sequence: all implicit merges are given + * before the explicit ones. + */ +void +index_merge (indexdata * idx, int is_explicit, wchar_t * tags, word * text) +{ + indextag *t, *existing; + + /* + * FIXME: want to warn on overlapping source sets. + */ + for (; *tags; tags = uadv (tags)) + { + t = make_indextag (); + t->name = tags; + existing = add234 (idx->tags, t); + if (existing == t) + { + /* + * Duplicate this so we can free it independently. + */ + t->name = ustrdup (tags); + + /* + * Every tag has an implicit \IM. So if this tag + * doesn't exist and we're explicit, then we should + * warn (and drop it, since it won't be referenced). + */ + if (is_explicit) + { + error (err_nosuchidxtag, tags); + continue; + } + + /* + * Otherwise, this is a new tag with an implicit \IM. + */ + t->implicit_text = text; + } + else + { + sfree (t); + t = existing; + if (!is_explicit) + { + /* + * An implicit \IM for a tag that's had an implicit + * \IM before. FIXME: we should check the text + * against the existing text and warn on + * differences. And check the tag for case match + * against the existing tag, likewise. + */ + } + else + { + /* + * An explicit \IM added to a valid tag. In + * particular, this removes the implicit \IM if + * present. + */ + if (t->implicit_text) + { + free_word_list (t->implicit_text); + t->implicit_text = NULL; + } + if (t->nexplicit >= t->explicit_size) + { + t->explicit_size = t->nexplicit + 8; + t->explicit_texts = resize (t->explicit_texts, + t->explicit_size); + } + t->explicit_texts[t->nexplicit++] = text; + } + } + } +} + +/* + * Build the final-form index. We now have every tag, with every + * \IM, set up in a 2-3 tree indexed by tag. We now want to collate + * the RHSes of the \IMs, and sort by final form, and decorate the + * entries in the original 2-3 tree with pointers to the RHS + * entries. + */ +void +build_index (indexdata * i) +{ + indextag *t; + word **ta; + int ti; + int j; + + for (ti = 0; (t = (indextag *) index234 (i->tags, ti)) != NULL; ti++) + { + if (t->implicit_text) + { + t->nrefs = 1; + ta = &t->implicit_text; + } + else + { + t->nrefs = t->nexplicit; + ta = t->explicit_texts; + } + if (t->nrefs) + { + t->refs = mknewa (indexentry *, t->nrefs); + for (j = 0; j < t->nrefs; j++) + { + indexentry *ent = mknew (indexentry); + ent->text = *ta++; + t->refs[j] = add234 (i->entries, ent); + if (t->refs[j] != ent) /* duplicate */ + sfree (ent); + } + } + } +} + +void +cleanup_index (indexdata * i) +{ + indextag *t; + indexentry *ent; + int ti; + + for (ti = 0; (t = (indextag *) index234 (i->tags, ti)) != NULL; ti++) + { + sfree (t->name); + free_word_list (t->implicit_text); + sfree (t->explicit_texts); + sfree (t->refs); + sfree (t); + } + freetree234 (i->tags); + for (ti = 0; (ent = (indexentry *) index234 (i->entries, ti)) != NULL; ti++) + { + sfree (ent); + } + freetree234 (i->entries); + sfree (i); +} + +static void dbg_prtwordlist (int level, word * w); +static void dbg_prtmerge (int is_explicit, wchar_t * tag, word * text); + +void +index_debug (indexdata * i) +{ + indextag *t; + indexentry *y; + int ti; + int j; + + printf ("\nINDEX TAGS\n==========\n\n"); + for (ti = 0; (t = (indextag *) index234 (i->tags, ti)) != NULL; ti++) + { + printf ("\n"); + if (t->implicit_text) + dbg_prtmerge (0, t->name, t->implicit_text); + for (j = 0; j < t->nexplicit; j++) + dbg_prtmerge (1, t->name, t->explicit_texts[j]); + } + + printf ("\nINDEX ENTRIES\n=============\n\n"); + for (ti = 0; (y = (indexentry *) index234 (i->entries, ti)) != NULL; ti++) + { + printf ("\n"); + printf ("{\n"); + dbg_prtwordlist (1, y->text); + printf ("}\n"); + } +} + +static void +dbg_prtmerge (int is_explicit, wchar_t * tag, word * text) +{ + printf ("\\IM: %splicit: \"", is_explicit ? "ex" : "im"); + for (; *tag; tag++) + putchar (*tag); + printf ("\" {\n"); + dbg_prtwordlist (1, text); + printf ("}\n"); +} + +static void +dbg_prtwordlist (int level, word * w) +{ + for (; w; w = w->next) + { + wchar_t *wp; + printf ("%*sword %d ", level * 4, "", w->type); + if (w->text) + { + printf ("\""); + for (wp = w->text; *wp; wp++) + putchar (*wp); + printf ("\""); + } + else + printf ("(no text)"); + if (w->alt) + { + printf (" alt = {\n"); + dbg_prtwordlist (level + 1, w->alt); + printf ("%*s}", level * 4, ""); + } + printf ("\n"); + } +} diff --git a/Docs/src/bin/halibut/input.c b/Docs/src/bin/halibut/input.c new file mode 100644 index 00000000..d3c4c2e4 --- /dev/null +++ b/Docs/src/bin/halibut/input.c @@ -0,0 +1,1516 @@ +/* + * input.c: read the source form + */ + +#include +#include +#include +#include "halibut.h" + +#define TAB_STOP 8 /* for column number tracking */ + +static void +setpos (input * in, char *fname) +{ + in->pos.filename = fname; + in->pos.line = 1; + in->pos.col = (in->reportcols ? 1 : -1); +} + +static void +unget (input * in, int c, filepos * pos) +{ + if (in->npushback >= in->pushbacksize) + { + in->pushbacksize = in->npushback + 16; + in->pushback = resize (in->pushback, in->pushbacksize); + } + in->pushback[in->npushback].chr = c; + in->pushback[in->npushback].pos = *pos; /* structure copy */ + in->npushback++; +} + +/* ---------------------------------------------------------------------- */ +/* + * Macro subsystem + */ +typedef struct macro_Tag macro; +struct macro_Tag +{ + wchar_t *name, *text; +}; +struct macrostack_Tag +{ + macrostack *next; + wchar_t *text; + int ptr, npushback; + filepos pos; +}; +static int +macrocmp (void *av, void *bv) +{ + macro *a = (macro *) av, *b = (macro *) bv; + return ustrcmp (a->name, b->name); +} +static void +macrodef (tree234 * macros, wchar_t * name, wchar_t * text, filepos fpos) +{ + macro *m = mknew (macro); + m->name = name; + m->text = text; + if (add234 (macros, m) != m) + { + error (err_macroexists, &fpos, name); + sfree (name); + sfree (text); + } +} +static int +macrolookup (tree234 * macros, input * in, wchar_t * name, filepos * pos) +{ + macro m, *gotit; + m.name = name; + gotit = find234 (macros, &m, NULL); + if (gotit) + { + macrostack *expansion = mknew (macrostack); + expansion->next = in->stack; + expansion->text = gotit->text; + expansion->pos = *pos; /* structure copy */ + expansion->ptr = 0; + expansion->npushback = in->npushback; + in->stack = expansion; + return TRUE; + } + else + return FALSE; +} +static void +macrocleanup (tree234 * macros) +{ + int ti; + macro *m; + for (ti = 0; (m = (macro *) index234 (macros, ti)) != NULL; ti++) + { + sfree (m->name); + sfree (m->text); + sfree (m); + } + freetree234 (macros); +} + +/* + * Can return EOF + */ +static int +get (input * in, filepos * pos) +{ + int pushbackpt = in->stack ? in->stack->npushback : 0; + if (in->npushback > pushbackpt) + { + --in->npushback; + if (pos) + *pos = in->pushback[in->npushback].pos; /* structure copy */ + return in->pushback[in->npushback].chr; + } + else if (in->stack) + { + wchar_t c = in->stack->text[in->stack->ptr]; + if (in->stack->text[++in->stack->ptr] == L'\0') + { + macrostack *tmp = in->stack; + in->stack = tmp->next; + sfree (tmp); + } + return c; + } + else if (in->currfp) + { + int c = getc (in->currfp); + + if (c == EOF) + { + fclose (in->currfp); + in->currfp = NULL; + } + /* Track line numbers, for error reporting */ + if (pos) + *pos = in->pos; + if (in->reportcols) + { + switch (c) + { + case '\t': + in->pos.col = 1 + (in->pos.col + TAB_STOP - 1) % TAB_STOP; + break; + case '\n': + in->pos.col = 1; + in->pos.line++; + break; + default: + in->pos.col++; + break; + } + } + else + { + in->pos.col = -1; + if (c == '\n') + in->pos.line++; + } + /* FIXME: do input charmap translation. We should be returning + * Unicode here. */ + return c; + } + else + return EOF; +} + +/* + * Lexical analysis of source files. + */ +typedef struct token_Tag token; +struct token_Tag +{ + int type; + int cmd, aux; + wchar_t *text; + filepos pos; +}; +enum +{ + tok_eof, /* end of file */ + tok_eop, /* end of paragraph */ + tok_white, /* whitespace */ + tok_word, /* a word or word fragment */ + tok_cmd, /* \command */ + tok_lbrace, /* { */ + tok_rbrace /* } */ +}; + +/* Halibut command keywords. */ +enum +{ + c__invalid, /* invalid command */ + c__comment, /* comment command (\#) */ + c__escaped, /* escaped character */ + c__nbsp, /* nonbreaking space */ + c_A, /* appendix heading */ + c_B, /* bibliography entry */ + c_BR, /* bibliography rewrite */ + c_C, /* chapter heading */ + c_H, /* heading */ + c_I, /* invisible index mark */ + c_IM, /* index merge/rewrite */ + c_K, /* capitalised cross-reference */ + c_S, /* aux field is 0, 1, 2, ... */ + c_U, /* unnumbered-chapter heading */ + c_W, /* Web hyperlink */ + c_b, /* bulletted list */ + c_c, /* code */ + c_cfg, /* configuration directive */ + c_copyright, /* copyright statement */ + c_cw, /* weak code */ + c_date, /* document processing date */ + c_define, /* macro definition */ + c_e, /* emphasis */ + c_i, /* visible index mark */ + c_ii, /* uncapitalised visible index mark */ + c_k, /* uncapitalised cross-reference */ + c_n, /* numbered list */ + c_nocite, /* bibliography trickery */ + c_preamble, /* document preamble text */ + c_q, /* quote marks */ + c_rule, /* horizontal rule */ + c_title, /* document title */ + c_u, /* aux field is char code */ + c_versionid /* document RCS id */ +}; + +/* Perhaps whitespace should be defined in a more Unicode-friendly way? */ +#define iswhite(c) ( (c)==32 || (c)==9 || (c)==13 || (c)==10 ) +#define isnl(c) ( (c)==10 ) +#define isdec(c) ( ((c)>='0'&&(c)<='9') ) +#define fromdec(c) ( (c)-'0' ) +#define ishex(c) ( ((c)>='0'&&(c)<='9') || ((c)>='A'&&(c)<='F') || ((c)>='a'&&(c)<='f')) +#define fromhex(c) ( (c)<='9' ? (c)-'0' : ((c)&0xDF) - ('A'-10) ) +#define iscmd(c) ( ((c)>='0'&&(c)<='9') || ((c)>='A'&&(c)<='Z') || ((c)>='a'&&(c)<='z')) + +/* + * Keyword comparison function. Like strcmp, but between a wchar_t * + * and a char *. + */ +static int +kwcmp (wchar_t const *p, char const *q) +{ + int i; + do + { + i = *p - *q; + } + while (*p++ && *q++ && !i); + return i; +} + +/* + * Match a keyword. + */ +static void +match_kw (token * tok) +{ + /* + * FIXME. The ids are explicit in here so as to allow long-name + * equivalents to the various very short keywords. + */ + static const struct + { + char const *name; + int id; + } + keywords[] = + { + { + "#", c__comment} + , /* comment command (\#) */ + { + "-", c__escaped} + , /* nonbreaking hyphen */ + { + "A", c_A} + , /* appendix heading */ + { + "B", c_B} + , /* bibliography entry */ + { + "BR", c_BR} + , /* bibliography rewrite */ + { + "C", c_C} + , /* chapter heading */ + { + "H", c_H} + , /* heading */ + { + "I", c_I} + , /* invisible index mark */ + { + "IM", c_IM} + , /* index merge/rewrite */ + { + "K", c_K} + , /* capitalised cross-reference */ + { + "U", c_U} + , /* unnumbered-chapter heading */ + { + "W", c_W} + , /* Web hyperlink */ + { + "\\", c__escaped} + , /* escaped backslash (\\) */ + { + "_", c__nbsp} + , /* nonbreaking space (\_) */ + { + "b", c_b} + , /* bulletted list */ + { + "c", c_c} + , /* code */ + { + "cfg", c_cfg} + , /* configuration directive */ + { + "copyright", c_copyright} + , /* copyright statement */ + { + "cw", c_cw} + , /* weak code */ + { + "date", c_date} + , /* document processing date */ + { + "define", c_define} + , /* macro definition */ + { + "e", c_e} + , /* emphasis */ + { + "i", c_i} + , /* visible index mark */ + { + "ii", c_ii} + , /* uncapitalised visible index mark */ + { + "k", c_k} + , /* uncapitalised cross-reference */ + { + "n", c_n} + , /* numbered list */ + { + "nocite", c_nocite} + , /* bibliography trickery */ + { + "preamble", c_preamble} + , /* document preamble text */ + { + "q", c_q} + , /* quote marks */ + { + "rule", c_rule} + , /* horizontal rule */ + { + "title", c_title} + , /* document title */ + { + "versionid", c_versionid} + , /* document RCS id */ + { + "{", c__escaped} + , /* escaped lbrace (\{) */ + { + "}", c__escaped} + , /* escaped rbrace (\}) */ + }; + int i, j, k, c; + + /* + * Special cases: \S{0,1,2,...} and \uABCD. If the syntax + * doesn't match correctly, we just fall through to the + * binary-search phase. + */ + if (tok->text[0] == 'S') + { + /* We expect numeric characters thereafter. */ + wchar_t *p = tok->text + 1; + int n; + if (!*p) + n = 1; + else + { + n = 0; + while (*p && isdec (*p)) + { + n = 10 * n + fromdec (*p); + p++; + } + } + if (!*p) + { + tok->cmd = c_S; + tok->aux = n; + return; + } + } + else if (tok->text[0] == 'u') + { + /* We expect hex characters thereafter. */ + wchar_t *p = tok->text + 1; + int n = 0; + while (*p && ishex (*p)) + { + n = 16 * n + fromhex (*p); + p++; + } + if (!*p) + { + tok->cmd = c_u; + tok->aux = n; + return; + } + } + + i = -1; + j = sizeof (keywords) / sizeof (*keywords); + while (j - i > 1) + { + k = (i + j) / 2; + c = kwcmp (tok->text, keywords[k].name); + if (c < 0) + j = k; + else if (c > 0) + i = k; + else /* c == 0 */ + { + tok->cmd = keywords[k].id; + return; + } + } + + tok->cmd = c__invalid; +} + + +/* + * Read a token from the input file, in the normal way (`normal' in + * the sense that code paragraphs work a different way). + */ +token +get_token (input * in) +{ + int c; + int nls; + token ret; + rdstring rs = { 0, 0, NULL }; + filepos cpos; + + ret.text = NULL; /* default */ + c = get (in, &cpos); + ret.pos = cpos; + if (iswhite (c)) + { /* tok_white or tok_eop */ + nls = 0; + do + { + if (isnl (c)) + nls++; + } + while ((c = get (in, &cpos)) != EOF && iswhite (c)); + if (c == EOF) + { + ret.type = tok_eof; + return ret; + } + unget (in, c, &cpos); + ret.type = (nls > 1 ? tok_eop : tok_white); + return ret; + } + else if (c == EOF) + { /* tok_eof */ + ret.type = tok_eof; + return ret; + } + else if (c == '\\') + { /* tok_cmd */ + c = get (in, &cpos); + if (c == '-' || c == '\\' || c == '_' || + c == '#' || c == '{' || c == '}') + { + /* single-char command */ + rdadd (&rs, c); + } + else if (c == 'u') + { + int len = 0; + do + { + rdadd (&rs, c); + len++; + c = get (in, &cpos); + } + while (ishex (c) && len < 5); + unget (in, c, &cpos); + } + else if (iscmd (c)) + { + do + { + rdadd (&rs, c); + c = get (in, &cpos); + } + while (iscmd (c)); + unget (in, c, &cpos); + } + /* + * Now match the command against the list of available + * ones. + */ + ret.type = tok_cmd; + ret.text = ustrdup (rs.text); + match_kw (&ret); + sfree (rs.text); + return ret; + } + else if (c == '{') + { /* tok_lbrace */ + ret.type = tok_lbrace; + return ret; + } + else if (c == '}') + { /* tok_rbrace */ + ret.type = tok_rbrace; + return ret; + } + else + { /* tok_word */ + /* + * Read a word: the longest possible contiguous sequence of + * things other than whitespace, backslash, braces and + * hyphen. A hyphen terminates the word but is returned as + * part of it; everything else is pushed back for the next + * token. The `aux' field contains TRUE if the word ends in + * a hyphen. + */ + ret.aux = FALSE; /* assumed for now */ + while (1) + { + if (iswhite (c) || c == '{' || c == '}' || c == '\\' || c == EOF) + { + /* Put back the character that caused termination */ + unget (in, c, &cpos); + break; + } + else + { + rdadd (&rs, c); + if (c == '-') + { + ret.aux = TRUE; + break; /* hyphen terminates word */ + } + } + c = get (in, &cpos); + } + ret.type = tok_word; + ret.text = ustrdup (rs.text); + sfree (rs.text); + return ret; + } +} + +/* + * Determine whether the next input character is an open brace (for + * telling code paragraphs from paragraphs which merely start with + * code). + */ +int +isbrace (input * in) +{ + int c; + filepos cpos; + + c = get (in, &cpos); + unget (in, c, &cpos); + return (c == '{'); +} + +/* + * Read the rest of a line that starts `\c'. Including nothing at + * all (tok_word with empty text). + */ +token +get_codepar_token (input * in) +{ + int c; + token ret; + rdstring rs = { 0, 0, NULL }; + filepos cpos; + + ret.type = tok_word; + c = get (in, &cpos); /* expect (and discard) one space */ + ret.pos = cpos; + if (c == ' ') + { + c = get (in, &cpos); + ret.pos = cpos; + } + while (!isnl (c) && c != EOF) + { + int c2 = c; + c = get (in, &cpos); + /* Discard \r just before \n. */ + if (c2 != 13 || !isnl (c)) + rdadd (&rs, c2); + } + unget (in, c, &cpos); + ret.text = ustrdup (rs.text); + sfree (rs.text); + return ret; +} + +/* + * Adds a new word to a linked list + */ +static word * +addword (word newword, word *** hptrptr) +{ + word *mnewword; + if (!hptrptr) + return NULL; + mnewword = mknew (word); + *mnewword = newword; /* structure copy */ + mnewword->next = NULL; + **hptrptr = mnewword; + *hptrptr = &mnewword->next; + return mnewword; +} + +/* + * Adds a new paragraph to a linked list + */ +static paragraph * +addpara (paragraph newpara, paragraph *** hptrptr) +{ + paragraph *mnewpara = mknew (paragraph); + *mnewpara = newpara; /* structure copy */ + mnewpara->next = NULL; + **hptrptr = mnewpara; + *hptrptr = &mnewpara->next; + return mnewpara; +} + +/* + * Destructor before token is reassigned; should catch most memory + * leaks + */ +#define dtor(t) ( sfree(t.text) ) + +/* + * Reads a single file (ie until get() returns EOF) + */ +static void +read_file (paragraph *** ret, input * in, indexdata * idx) +{ + token t; + paragraph par; + word wd, **whptr, **idximplicit; + tree234 *macros; + wchar_t utext[2], *wdtext; + int style, spcstyle; + int already; + int iswhite, seenwhite; + int type; + struct stack_item + { + enum + { + stack_nop = 0, /* do nothing (for error recovery) */ + stack_ualt = 1, /* \u alternative */ + stack_style = 2, /* \e, \c, \cw */ + stack_idx = 4, /* \I, \i, \ii */ + stack_hyper = 8, /* \W */ + stack_quote = 16, /* \q */ + } + type; + word **whptr; /* to restore from \u alternatives */ + word **idximplicit; /* to restore from \u alternatives */ + } + *sitem; + stack parsestk; + word *indexword, *uword, *iword; + word *idxwordlist; + rdstring indexstr; + int index_downcase, index_visible, indexing; + const rdstring nullrs = { 0, 0, NULL }; + wchar_t uchr; + + t.text = NULL; + macros = newtree234 (macrocmp); + already = FALSE; + + /* + * Loop on each paragraph. + */ + while (1) + { + int start_cmd = c__invalid; + par.words = NULL; + par.keyword = NULL; + whptr = &par.words; + + /* + * Get a token. + */ + if (!already) + { + dtor (t), t = get_token (in); + } + already = FALSE; + if (t.type == tok_eof) + break; + + /* + * Parse code paragraphs separately. + */ + if (t.type == tok_cmd && t.cmd == c_c && !isbrace (in)) + { + par.type = para_Code; + par.fpos = t.pos; + while (1) + { + dtor (t), t = get_codepar_token (in); + wd.type = word_WeakCode; + wd.breaks = FALSE; /* shouldn't need this... */ + wd.text = ustrdup (t.text); + wd.alt = NULL; + wd.fpos = t.pos; + addword (wd, &whptr); + dtor (t), t = get_token (in); + if (t.type == tok_white) + { + /* + * The newline after a code-paragraph line + */ + dtor (t), t = get_token (in); + } + if (t.type == tok_eop || t.type == tok_eof) + break; + else if (t.type != tok_cmd || t.cmd != c_c) + { + error (err_brokencodepara, &t.pos); + addpara (par, ret); + while (t.type != tok_eop) /* error recovery: */ + dtor (t), t = get_token (in); /* eat rest of paragraph */ + goto codeparabroken; /* ick, but such is life */ + } + } + addpara (par, ret); + codeparabroken: + continue; + } + + /* + * This token begins a paragraph. See if it's one of the + * special commands that define a paragraph type. + * + * (note that \# is special in a way, and \nocite takes no + * text) + */ + par.type = para_Normal; + if (t.type == tok_cmd) + { + int needkw; + int is_macro = FALSE; + + par.fpos = t.pos; + switch (t.cmd) + { + default: + needkw = -1; + break; + case c__invalid: + error (err_badparatype, t.text, &t.pos); + needkw = 4; + break; + case c__comment: + if (isbrace (in)) + break; /* `\#{': isn't a comment para */ + do + { + dtor (t), t = get_token (in); + } + while (t.type != tok_eop && t.type != tok_eof); + continue; /* next paragraph */ + /* + * `needkw' values: + * + * 1 -- exactly one keyword + * 2 -- at least one keyword + * 4 -- any number of keywords including zero + * 8 -- at least one keyword and then nothing else + * 16 -- nothing at all! no keywords, no body + * 32 -- no keywords at all + */ + case c_A: + needkw = 2; + par.type = para_Appendix; + break; + case c_B: + needkw = 2; + par.type = para_Biblio; + break; + case c_BR: + needkw = 1; + par.type = para_BR; + start_cmd = c_BR; + break; + case c_C: + needkw = 2; + par.type = para_Chapter; + break; + case c_H: + needkw = 2; + par.type = para_Heading; + par.aux = 0; + break; + case c_IM: + needkw = 2; + par.type = para_IM; + start_cmd = c_IM; + break; + case c_S: + needkw = 2; + par.type = para_Subsect; + par.aux = t.aux; + break; + case c_U: + needkw = 32; + par.type = para_UnnumberedChapter; + break; + /* For \b and \n the keyword is optional */ + case c_b: + needkw = 4; + par.type = para_Bullet; + break; + case c_n: + needkw = 4; + par.type = para_NumberedList; + break; + case c_cfg: + needkw = 8; + par.type = para_Config; + start_cmd = c_cfg; + break; + case c_copyright: + needkw = 32; + par.type = para_Copyright; + break; + case c_define: + is_macro = TRUE; + needkw = 1; + break; + /* For \nocite the keyword is _everything_ */ + case c_nocite: + needkw = 8; + par.type = para_NoCite; + break; + case c_preamble: + needkw = 32; + par.type = para_Preamble; + break; + case c_rule: + needkw = 16; + par.type = para_Rule; + break; + case c_title: + needkw = 32; + par.type = para_Title; + break; + case c_versionid: + needkw = 32; + par.type = para_VersionID; + break; + } + + if (needkw > 0) + { + rdstring rs = { 0, 0, NULL }; + int nkeys = 0; + filepos fp; + + /* Get keywords. */ + dtor (t), t = get_token (in); + fp = t.pos; + while (t.type == tok_lbrace) + { + /* This is a keyword. */ + nkeys++; + /* FIXME: there will be bugs if anyone specifies an + * empty keyword (\foo{}), so trap this case. */ + while (dtor (t), t = get_token (in), + t.type == tok_word || + t.type == tok_white || + (t.type == tok_cmd && t.cmd == c__nbsp) || + (t.type == tok_cmd && t.cmd == c__escaped)) + { + if (t.type == tok_white || + (t.type == tok_cmd && t.cmd == c__nbsp)) + rdadd (&rs, ' '); + else + rdadds (&rs, t.text); + } + if (t.type != tok_rbrace) + { + error (err_kwunclosed, &t.pos); + continue; + } + rdadd (&rs, 0); /* add string terminator */ + dtor (t), t = get_token (in); /* eat right brace */ + } + + rdadd (&rs, 0); /* add string terminator */ + + /* See whether we have the right number of keywords. */ + if ((needkw & 48) && nkeys > 0) + error (err_kwillegal, &fp); + if ((needkw & 11) && nkeys == 0) + error (err_kwexpected, &fp); + if ((needkw & 5) && nkeys > 1) + error (err_kwtoomany, &fp); + + if (is_macro) + { + /* + * Macro definition. Get the rest of the line + * as a code-paragraph token, repeatedly until + * there's nothing more left of it. Separate + * with newlines. + */ + rdstring macrotext = { 0, 0, NULL }; + while (1) + { + dtor (t), t = get_codepar_token (in); + if (macrotext.pos > 0) + rdadd (¯otext, L'\n'); + rdadds (¯otext, t.text); + dtor (t), t = get_token (in); + if (t.type == tok_eop) + break; + } + macrodef (macros, rs.text, macrotext.text, fp); + continue; /* next paragraph */ + } + + par.keyword = rdtrim (&rs); + + /* Move to EOP in case of needkw==8 or 16 (no body) */ + if (needkw & 24) + { + /* We allow whitespace even when we expect no para body */ + while (t.type == tok_white) + dtor (t), t = get_token (in); + if (t.type != tok_eop && t.type != tok_eof && + (start_cmd == c__invalid || + t.type != tok_cmd || t.cmd != start_cmd)) + { + error (err_bodyillegal, &t.pos); + /* Error recovery: eat the rest of the paragraph */ + while (t.type != tok_eop && t.type != tok_eof && + (start_cmd == c__invalid || + t.type != tok_cmd || t.cmd != start_cmd)) + dtor (t), t = get_token (in); + } + if (t.type == tok_cmd) + already = TRUE; /* inhibit get_token at top of loop */ + addpara (par, ret); + continue; /* next paragraph */ + } + } + } + + /* + * Now read the actual paragraph, word by word, adding to + * the paragraph list. + * + * Mid-paragraph commands: + * + * \K \k + * \c \cw + * \e + * \i \ii + * \I + * \u + * \W + * \date + * \\ \{ \} + */ + parsestk = stk_new (); + style = word_Normal; + spcstyle = word_WhiteSpace; + indexing = FALSE; + seenwhite = TRUE; + while (t.type != tok_eop && t.type != tok_eof) + { + iswhite = FALSE; + already = FALSE; + + /* Handle implicit paragraph breaks after \IM, \BR etc */ + if (start_cmd != c__invalid && + t.type == tok_cmd && t.cmd == start_cmd) + { + already = TRUE; /* inhibit get_token at top of loop */ + break; + } + + if (t.type == tok_cmd && t.cmd == c__escaped) + { + t.type = tok_word; /* nice and simple */ + t.aux = 0; /* even if `\-' - nonbreaking! */ + } + if (t.type == tok_cmd && t.cmd == c__nbsp) + { + t.type = tok_word; /* nice and simple */ + sfree (t.text); + t.text = ustrdup (L" "); /* text is ` ' not `_' */ + t.aux = 0; /* (nonbreaking) */ + } + switch (t.type) + { + case tok_white: + if (whptr == &par.words) + break; /* strip whitespace at start of para */ + wd.text = NULL; + wd.type = spcstyle; + wd.alt = NULL; + wd.aux = 0; + wd.fpos = t.pos; + wd.breaks = FALSE; + + /* + * Inhibit use of whitespace if it's (probably the + * newline) before a repeat \IM / \BR type + * directive. + */ + if (start_cmd != c__invalid) + { + dtor (t), t = get_token (in); + already = TRUE; + if (t.type == tok_cmd && t.cmd == start_cmd) + break; + } + + if (indexing) + rdadd (&indexstr, ' '); + if (!indexing || index_visible) + addword (wd, &whptr); + if (indexing) + addword (wd, &idximplicit); + iswhite = TRUE; + break; + case tok_word: + if (indexing) + rdadds (&indexstr, t.text); + wd.type = style; + wd.alt = NULL; + wd.aux = 0; + wd.fpos = t.pos; + wd.breaks = t.aux; + if (!indexing || index_visible) + { + wd.text = ustrdup (t.text); + addword (wd, &whptr); + } + if (indexing) + { + wd.text = ustrdup (t.text); + addword (wd, &idximplicit); + } + break; + case tok_lbrace: + error (err_unexbrace, &t.pos); + /* Error recovery: push nop */ + sitem = mknew (struct stack_item); + sitem->type = stack_nop; + stk_push (parsestk, sitem); + break; + case tok_rbrace: + sitem = stk_pop (parsestk); + if (!sitem) + error (err_unexbrace, &t.pos); + else + { + if (sitem->type & stack_ualt) + { + whptr = sitem->whptr; + idximplicit = sitem->idximplicit; + } + if (sitem->type & stack_style) + { + style = word_Normal; + spcstyle = word_WhiteSpace; + } + if (sitem->type & stack_idx) + { + indexword->text = ustrdup (indexstr.text); + if (index_downcase) + ustrlow (indexword->text); + indexing = FALSE; + rdadd (&indexstr, L'\0'); + index_merge (idx, FALSE, indexstr.text, idxwordlist); + sfree (indexstr.text); + } + if (sitem->type & stack_hyper) + { + wd.text = NULL; + wd.type = word_HyperEnd; + wd.alt = NULL; + wd.aux = 0; + wd.fpos = t.pos; + wd.breaks = FALSE; + if (!indexing || index_visible) + addword (wd, &whptr); + if (indexing) + addword (wd, &idximplicit); + } + if (sitem->type & stack_quote) + { + wd.text = NULL; + wd.type = toquotestyle (style); + wd.alt = NULL; + wd.aux = quote_Close; + wd.fpos = t.pos; + wd.breaks = FALSE; + if (!indexing || index_visible) + addword (wd, &whptr); + if (indexing) + { + rdadd (&indexstr, L'"'); + addword (wd, &idximplicit); + } + } + } + sfree (sitem); + break; + case tok_cmd: + switch (t.cmd) + { + case c__comment: + /* + * In-paragraph comment: \#{ balanced braces } + * + * Anything goes here; even tok_eop. We should + * eat whitespace after the close brace _if_ + * there was whitespace before the \#. + */ + dtor (t), t = get_token (in); + if (t.type != tok_lbrace) + { + error (err_explbr, &t.pos); + } + else + { + int braces = 1; + while (braces > 0) + { + dtor (t), t = get_token (in); + if (t.type == tok_lbrace) + braces++; + else if (t.type == tok_rbrace) + braces--; + else if (t.type == tok_eof) + { + error (err_commenteof, &t.pos); + break; + } + } + } + if (seenwhite) + { + already = TRUE; + dtor (t), t = get_token (in); + if (t.type == tok_white) + { + iswhite = TRUE; + already = FALSE; + } + } + break; + case c_q: + dtor (t), t = get_token (in); + if (t.type != tok_lbrace) + { + error (err_explbr, &t.pos); + } + else + { + wd.text = NULL; + wd.type = toquotestyle (style); + wd.alt = NULL; + wd.aux = quote_Open; + wd.fpos = t.pos; + wd.breaks = FALSE; + if (!indexing || index_visible) + addword (wd, &whptr); + if (indexing) + { + rdadd (&indexstr, L'"'); + addword (wd, &idximplicit); + } + sitem = mknew (struct stack_item); + sitem->type = stack_quote; + stk_push (parsestk, sitem); + } + break; + case c_K: + case c_k: + case c_W: + case c_date: + /* + * Keyword, hyperlink, or \date. We expect a + * left brace, some text, and then a right + * brace. No nesting; no arguments. + */ + wd.fpos = t.pos; + wd.breaks = FALSE; + if (t.cmd == c_K) + wd.type = word_UpperXref; + else if (t.cmd == c_k) + wd.type = word_LowerXref; + else if (t.cmd == c_W) + wd.type = word_HyperLink; + else + wd.type = word_Normal; + dtor (t), t = get_token (in); + if (t.type != tok_lbrace) + { + if (wd.type == word_Normal) + { + time_t thetime = time (NULL); + struct tm *broken = localtime (&thetime); + already = TRUE; + wdtext = ustrftime (NULL, broken); + wd.type = style; + } + else + { + error (err_explbr, &t.pos); + wdtext = NULL; + } + } + else + { + rdstring rs = { 0, 0, NULL }; + while (dtor (t), t = get_token (in), + t.type == tok_word || t.type == tok_white) + { + if (t.type == tok_white) + rdadd (&rs, ' '); + else + rdadds (&rs, t.text); + } + if (wd.type == word_Normal) + { + time_t thetime = time (NULL); + struct tm *broken = localtime (&thetime); + wdtext = ustrftime (rs.text, broken); + wd.type = style; + } + else + { + wdtext = ustrdup (rs.text); + } + sfree (rs.text); + if (t.type != tok_rbrace) + { + error (err_kwexprbr, &t.pos); + } + } + wd.alt = NULL; + wd.aux = 0; + if (!indexing || index_visible) + { + wd.text = ustrdup (wdtext); + addword (wd, &whptr); + } + if (indexing) + { + wd.text = ustrdup (wdtext); + addword (wd, &idximplicit); + } + sfree (wdtext); + if (wd.type == word_HyperLink) + { + /* + * Hyperlinks are different: they then + * expect another left brace, to begin + * delimiting the text marked by the link. + */ + dtor (t), t = get_token (in); + /* + * Special cases: \W{}\c, \W{}\e, \W{}\cw + */ + sitem = mknew (struct stack_item); + sitem->type = stack_hyper; + if (t.type == tok_cmd && + (t.cmd == c_e || t.cmd == c_c || t.cmd == c_cw)) + { + if (style != word_Normal) + error (err_nestedstyles, &t.pos); + else + { + style = (t.cmd == c_c ? word_Code : + t.cmd == c_cw ? word_WeakCode : + word_Emph); + spcstyle = tospacestyle (style); + sitem->type |= stack_style; + } + dtor (t), t = get_token (in); + } + if (t.type != tok_lbrace) + { + error (err_explbr, &t.pos); + sfree (sitem); + } + else + { + stk_push (parsestk, sitem); + } + } + break; + case c_c: + case c_cw: + case c_e: + type = t.cmd; + if (style != word_Normal) + { + error (err_nestedstyles, &t.pos); + /* Error recovery: eat lbrace, push nop. */ + dtor (t), t = get_token (in); + sitem = mknew (struct stack_item); + sitem->type = stack_nop; + stk_push (parsestk, sitem); + } + dtor (t), t = get_token (in); + if (t.type != tok_lbrace) + { + error (err_explbr, &t.pos); + } + else + { + style = (type == c_c ? word_Code : + type == c_cw ? word_WeakCode : word_Emph); + spcstyle = tospacestyle (style); + sitem = mknew (struct stack_item); + sitem->type = stack_style; + stk_push (parsestk, sitem); + } + break; + case c_i: + case c_ii: + case c_I: + type = t.cmd; + if (indexing) + { + error (err_nestedindex, &t.pos); + /* Error recovery: eat lbrace, push nop. */ + dtor (t), t = get_token (in); + sitem = mknew (struct stack_item); + sitem->type = stack_nop; + stk_push (parsestk, sitem); + } + sitem = mknew (struct stack_item); + sitem->type = stack_idx; + dtor (t), t = get_token (in); + /* + * Special cases: \i\c, \i\e, \i\cw + */ + wd.fpos = t.pos; + if (t.type == tok_cmd && + (t.cmd == c_e || t.cmd == c_c || t.cmd == c_cw)) + { + if (style != word_Normal) + error (err_nestedstyles, &t.pos); + else + { + style = (t.cmd == c_c ? word_Code : + t.cmd == c_cw ? word_WeakCode : word_Emph); + spcstyle = tospacestyle (style); + sitem->type |= stack_style; + } + dtor (t), t = get_token (in); + } + if (t.type != tok_lbrace) + { + sfree (sitem); + error (err_explbr, &t.pos); + } + else + { + /* Add an index-reference word with no text as yet */ + wd.type = word_IndexRef; + wd.text = NULL; + wd.alt = NULL; + wd.aux = 0; + wd.breaks = FALSE; + indexword = addword (wd, &whptr); + /* Set up a rdstring to read the index text */ + indexstr = nullrs; + /* Flags so that we do the Right Things with text */ + index_visible = (type != c_I); + index_downcase = (type == c_ii); + indexing = TRUE; + idxwordlist = NULL; + idximplicit = &idxwordlist; + /* Stack item to close the indexing on exit */ + stk_push (parsestk, sitem); + } + break; + case c_u: + uchr = t.aux; + utext[0] = uchr; + utext[1] = 0; + wd.type = style; + wd.breaks = FALSE; + wd.alt = NULL; + wd.aux = 0; + wd.fpos = t.pos; + if (!indexing || index_visible) + { + wd.text = ustrdup (utext); + uword = addword (wd, &whptr); + } + else + uword = NULL; + if (indexing) + { + wd.text = ustrdup (utext); + iword = addword (wd, &idximplicit); + } + else + iword = NULL; + dtor (t), t = get_token (in); + if (t.type == tok_lbrace) + { + /* + * \u with a left brace. Until the brace + * closes, all further words go on a + * sidetrack from the main thread of the + * paragraph. + */ + sitem = mknew (struct stack_item); + sitem->type = stack_ualt; + sitem->whptr = whptr; + sitem->idximplicit = idximplicit; + stk_push (parsestk, sitem); + whptr = uword ? &uword->alt : NULL; + idximplicit = iword ? &iword->alt : NULL; + } + else + { + if (indexing) + rdadd (&indexstr, uchr); + already = TRUE; + } + break; + default: + if (!macrolookup (macros, in, t.text, &t.pos)) + error (err_badmidcmd, t.text, &t.pos); + break; + } + } + if (!already) + dtor (t), t = get_token (in); + seenwhite = iswhite; + } + /* Check the stack is empty */ + if (NULL != (sitem = stk_pop (parsestk))) + { + do + { + sfree (sitem); + sitem = stk_pop (parsestk); + } + while (sitem); + error (err_missingrbrace, &t.pos); + } + stk_free (parsestk); + addpara (par, ret); + } + + /* + * We break to here rather than returning, because otherwise + * this cleanup doesn't happen. + */ + dtor (t); + macrocleanup (macros); +} + +paragraph * +read_input (input * in, indexdata * idx) +{ + paragraph *head = NULL; + paragraph **hptr = &head; + + while (in->currindex < in->nfiles) + { + in->currfp = fopen (in->filenames[in->currindex], "r"); + if (in->currfp) + { + setpos (in, in->filenames[in->currindex]); + read_file (&hptr, in, idx); + } + in->currindex++; + } + + return head; +} diff --git a/Docs/src/bin/halibut/keywords.c b/Docs/src/bin/halibut/keywords.c new file mode 100644 index 00000000..7de3bc0b --- /dev/null +++ b/Docs/src/bin/halibut/keywords.c @@ -0,0 +1,179 @@ +/* + * keywords.c: keep track of all cross-reference keywords + */ + +#include +#include +#include +#include "halibut.h" + +static int +kwcmp (void *av, void *bv) +{ + const keyword *a = (const keyword *) av; + const keyword *b = (const keyword *) bv; + return ustrcmp (a->key, b->key); +} + +static int +kwfind (void *av, void *bv) +{ + wchar_t *a = (wchar_t *) av; + const keyword *b = (const keyword *) bv; + return ustrcmp (a, b->key); +} + +keyword * +kw_lookup (keywordlist * kl, wchar_t * str) +{ + return find234 (kl->keys, str, kwfind); +} + +/* + * This function reads through source form and collects the + * keywords. They get collected in a heap, sorted by Unicode + * collation, last at the top (so that we can Heapsort them when we + * finish). + */ +keywordlist * +get_keywords (paragraph * source) +{ + int errors = FALSE; + keywordlist *kl = mknew (keywordlist); + numberstate *n = number_init (); + int prevpara = para_NotParaType; + + number_cfg (n, source); + + kl->size = 0; + kl->keys = newtree234 (kwcmp); + kl->nlooseends = kl->looseendssize = 0; + kl->looseends = NULL; + for (; source; source = source->next) + { + wchar_t *p, *q; + p = q = source->keyword; + + /* + * Look for the section type override (`example', + * `question' or whatever - to replace `chapter' or + * `section' on a per-section basis). + */ + if (q) + { + q = uadv (q); /* point q at the word beyond */ + if (!*q) + q = NULL; + } + + /* + * Number the chapter / section / list-item / whatever. + * This also sets up the `parent', `child' and `sibling' + * links. + */ + source->kwtext = number_mktext (n, source, q, prevpara, &errors); + prevpara = source->type; + + if (p && *p) + { + if (source->kwtext || source->type == para_Biblio) + { + keyword *kw, *ret; + + kw = mknew (keyword); + kw->key = p; + kw->text = source->kwtext; + kw->para = source; + ret = add234 (kl->keys, kw); + if (ret != kw) + { + error (err_multikw, &source->fpos, &ret->para->fpos, p); + sfree (kw); + /* FIXME: what happens to kw->text? Does it leak? */ + } + } + } + else + { + if (kl->nlooseends >= kl->looseendssize) + { + kl->looseendssize = kl->nlooseends + 32; + kl->looseends = resize (kl->looseends, kl->looseendssize); + } + kl->looseends[kl->nlooseends++] = source->kwtext; + } + } + + number_free (n); + + if (errors) + { + free_keywords (kl); + return NULL; + } + + return kl; +} + +void +free_keywords (keywordlist * kl) +{ + keyword *kw; + while (kl->nlooseends) + free_word_list (kl->looseends[--kl->nlooseends]); + sfree (kl->looseends); + while ((kw = index234 (kl->keys, 0)) != NULL) + { + delpos234 (kl->keys, 0); + free_word_list (kw->text); + sfree (kw); + } + freetree234 (kl->keys); + sfree (kl); +} + +void +subst_keywords (paragraph * source, keywordlist * kl) +{ + for (; source; source = source->next) + { + word *ptr; + for (ptr = source->words; ptr; ptr = ptr->next) + { + if (ptr->type == word_UpperXref || ptr->type == word_LowerXref) + { + keyword *kw; + word **endptr, *close, *subst; + + kw = kw_lookup (kl, ptr->text); + if (!kw) + { + error (err_nosuchkw, &ptr->fpos, ptr->text); + subst = NULL; + } + else + subst = dup_word_list (kw->text); + + if (subst && ptr->type == word_LowerXref && + kw->para->type != para_Biblio && + kw->para->type != para_BiblioCited) + ustrlow (subst->text); + + close = mknew (word); + close->text = NULL; + close->alt = NULL; + close->type = word_XrefEnd; + close->fpos = ptr->fpos; + + close->next = ptr->next; + ptr->next = subst; + + for (endptr = &ptr->next; *endptr; endptr = &(*endptr)->next) + (*endptr)->fpos = ptr->fpos; + + *endptr = close; + ptr = close; + } + } + } +} diff --git a/Docs/src/bin/halibut/licence.c b/Docs/src/bin/halibut/licence.c new file mode 100644 index 00000000..7457f27c --- /dev/null +++ b/Docs/src/bin/halibut/licence.c @@ -0,0 +1,18 @@ +/* + * licence.c: licence text + */ + +#include + +static char *licencetext[] = { + "FIXME: licence text goes here", + NULL +}; + +void +licence (void) +{ + char **p; + for (p = licencetext; *p; p++) + puts (*p); +} diff --git a/Docs/src/bin/halibut/main.c b/Docs/src/bin/halibut/main.c new file mode 100644 index 00000000..87b63315 --- /dev/null +++ b/Docs/src/bin/halibut/main.c @@ -0,0 +1,343 @@ +/* + * main.c: command line parsing and top level + */ + +#include +#include +#include "halibut.h" + +static void dbg_prtsource (paragraph * sourceform); +static void dbg_prtwordlist (int level, word * w); +static void dbg_prtkws (keywordlist * kws); + +int +main (int argc, char **argv) +{ + char **infiles; + char *outfile; + int nfiles; + int nogo; + int errs; + int reportcols; + int debug; + + /* + * Set up initial (default) parameters. + */ + infiles = mknewa (char *, argc); + outfile = NULL; + nfiles = 0; + nogo = errs = FALSE; + reportcols = 0; + debug = 0; + + if (argc == 1) + { + usage (); + exit (EXIT_SUCCESS); + } + + /* + * Parse command line arguments. + */ + while (--argc) + { + char *p = *++argv; + if (*p == '-') + { + /* + * An option. + */ + while (p && *++p) + { + char c = *p; + switch (c) + { + case '-': + /* + * Long option. + */ + { + char *opt, *val; + opt = p++; /* opt will have _one_ leading - */ + while (*p && *p != '=') + p++; /* find end of option */ + if (*p == '=') + { + *p++ = '\0'; + val = p; + } + else + val = NULL; + if (!strcmp (opt, "-help")) + { + help (); + nogo = TRUE; + } + else if (!strcmp (opt, "-version")) + { + showversion (); + nogo = TRUE; + } + else if (!strcmp (opt, "-licence") || + !strcmp (opt, "-license")) + { + licence (); + nogo = TRUE; + } + else if (!strcmp (opt, "-output")) + { + if (!val) + errs = TRUE, error (err_optnoarg, opt); + else + outfile = val; + } + else if (!strcmp (opt, "-precise")) + { + reportcols = 1; + } + else + { + errs = TRUE, error (err_nosuchopt, opt); + } + } + p = NULL; + break; + case 'h': + case 'V': + case 'L': + case 'P': + case 'd': + /* + * Option requiring no parameter. + */ + switch (c) + { + case 'h': + help (); + nogo = TRUE; + break; + case 'V': + showversion (); + nogo = TRUE; + break; + case 'L': + licence (); + nogo = TRUE; + break; + case 'P': + reportcols = 1; + break; + case 'd': + debug = TRUE; + break; + } + break; + case 'o': + /* + * Option requiring parameter. + */ + p++; + if (!*p && argc > 1) + --argc, p = *++argv; + else if (!*p) + { + char opt[2]; + opt[0] = c; + opt[1] = '\0'; + errs = TRUE, error (err_optnoarg, opt); + } + /* + * Now c is the option and p is the parameter. + */ + switch (c) + { + case 'o': + outfile = p; + break; + } + p = NULL; /* prevent continued processing */ + break; + default: + /* + * Unrecognised option. + */ + { + char opt[2]; + opt[0] = c; + opt[1] = '\0'; + errs = TRUE, error (err_nosuchopt, opt); + } + } + } + } + else + { + /* + * A non-option argument. + */ + infiles[nfiles++] = p; + } + } + + if (errs) + exit (EXIT_FAILURE); + if (nogo) + exit (EXIT_SUCCESS); + + /* + * Do the work. + */ + if (nfiles == 0) + { + error (err_noinput); + usage (); + exit (EXIT_FAILURE); + } + + { + input in; + paragraph *sourceform, *p; + indexdata *idx; + keywordlist *keywords; + + in.filenames = infiles; + in.nfiles = nfiles; + in.currfp = NULL; + in.currindex = 0; + in.npushback = in.pushbacksize = 0; + in.pushback = NULL; + in.reportcols = reportcols; + in.stack = NULL; + + idx = make_index (); + + sourceform = read_input (&in, idx); + if (!sourceform) + exit (EXIT_FAILURE); + + sfree (in.pushback); + + mark_attr_ends (sourceform); + + sfree (infiles); + + keywords = get_keywords (sourceform); + if (!keywords) + exit (EXIT_FAILURE); + gen_citations (sourceform, keywords); + subst_keywords (sourceform, keywords); + + for (p = sourceform; p; p = p->next) + if (p->type == para_IM) + index_merge (idx, TRUE, p->keyword, p->words); + + build_index (idx); + + if (debug) + { + index_debug (idx); + dbg_prtkws (keywords); + dbg_prtsource (sourceform); + } + + text_backend (sourceform, keywords, idx); + xhtml_backend (sourceform, keywords, idx); + whlp_backend (sourceform, keywords, idx); + + free_para_list (sourceform); + free_keywords (keywords); + cleanup_index (idx); + } + + return 0; +} + +static void +dbg_prtsource (paragraph * sourceform) +{ + /* + * Output source form in debugging format. + */ + + paragraph *p; + for (p = sourceform; p; p = p->next) + { + wchar_t *wp; + printf ("para %d ", p->type); + if (p->keyword) + { + wp = p->keyword; + while (*wp) + { + putchar ('\"'); + for (; *wp; wp++) + putchar (*wp); + putchar ('\"'); + if (*++wp) + printf (", "); + } + } + else + printf ("(no keyword)"); + printf (" {\n"); + dbg_prtwordlist (1, p->words); + printf ("}\n"); + } +} + +static void +dbg_prtkws (keywordlist * kws) +{ + /* + * Output keywords in debugging format. + */ + + int i; + keyword *kw; + + for (i = 0; (kw = index234 (kws->keys, i)) != NULL; i++) + { + wchar_t *wp; + printf ("keyword "); + wp = kw->key; + while (*wp) + { + putchar ('\"'); + for (; *wp; wp++) + putchar (*wp); + putchar ('\"'); + if (*++wp) + printf (", "); + } + printf (" {\n"); + dbg_prtwordlist (1, kw->text); + printf ("}\n"); + } +} + +static void +dbg_prtwordlist (int level, word * w) +{ + for (; w; w = w->next) + { + wchar_t *wp; + printf ("%*sword %d ", level * 4, "", w->type); + if (w->text) + { + printf ("\""); + for (wp = w->text; *wp; wp++) + putchar (*wp); + printf ("\""); + } + else + printf ("(no text)"); + if (w->alt) + { + printf (" alt = {\n"); + dbg_prtwordlist (level + 1, w->alt); + printf ("%*s}", level * 4, ""); + } + printf ("\n"); + } +} diff --git a/Docs/src/bin/halibut/makefile b/Docs/src/bin/halibut/makefile new file mode 100644 index 00000000..5589cdc0 --- /dev/null +++ b/Docs/src/bin/halibut/makefile @@ -0,0 +1,22 @@ +OBJS = biblio.o bk_text.o bk_whlp.o bk_xhtml.o contents.o error.o help.o index.o input.o keywords.o licence.o main.o malloc.o misc.o style.o tree234.o ustring.o version.o winhelp.o +LIBS = + +# -- Programs -- +MAKE = make +CC = gcc +RM = del + +# -- Compilers and linker flags -- +DEFINES = +CFLAGS = -Wall -W $(DEFINES) +LFLAGS = -s + + +all : halibut + +halibut : $(OBJS) + $(CC) $(CFLAGS) $(LFLAGS) -o ..\halibut.exe $(OBJS) $(LIBS) + +clean :: + $(RM) *.o + $(RM) ..\halibut.exe diff --git a/Docs/src/bin/halibut/malloc.c b/Docs/src/bin/halibut/malloc.c new file mode 100644 index 00000000..a3652b68 --- /dev/null +++ b/Docs/src/bin/halibut/malloc.c @@ -0,0 +1,173 @@ +/* + * malloc.c: safe wrappers around malloc, realloc, free, strdup + */ + +#include +#include +#include "halibut.h" + +#ifdef LOGALLOC +#define LOGPARAMS char *file, int line, +static FILE *logallocfp = NULL; +static int logline = 2; /* off by 1: `null pointer is' */ +static void +loginc (void) +{ +} +static void +logallocinit (void) +{ + if (!logallocfp) + { + logallocfp = fopen ("malloc.log", "w"); + if (!logallocfp) + { + fprintf (stderr, "panic: unable to open malloc.log\n"); + exit (10); + } + setvbuf (logallocfp, NULL, _IOLBF, BUFSIZ); + fprintf (logallocfp, "null pointer is %p\n", NULL); + } +} +static void +logprintf (char *fmt, ...) +{ + va_list ap; + va_start (ap, fmt); + vfprintf (logallocfp, fmt, ap); + va_end (ap); +} + +#define LOGPRINT(x) ( logallocinit(), logprintf x ) +#define LOGINC do { loginc(); logline++; } while (0) +#else +#define LOGPARAMS +#define LOGPRINT(x) +#define LOGINC ((void)0) +#endif + +/* + * smalloc should guarantee to return a useful pointer - Halibut + * can do nothing except die when it's out of memory anyway. + */ +void *(smalloc) (LOGPARAMS int size) +{ + void *p; + LOGINC; + LOGPRINT (("%s %d malloc(%ld)", file, line, (long) size)); + p = malloc (size); + if (!p) + fatal (err_nomemory); + LOGPRINT ((" returns %p\n", p)); + return p; +} + +/* + * sfree should guaranteeably deal gracefully with freeing NULL + */ +void (sfree) (LOGPARAMS void *p) +{ + if (p) + { + LOGINC; + LOGPRINT (("%s %d free(%p)\n", file, line, p)); + free (p); + } +} + +/* + * srealloc should guaranteeably be able to realloc NULL + */ +void *(srealloc) (LOGPARAMS void *p, int size) +{ + void *q; + if (p) + { + LOGINC; + LOGPRINT (("%s %d realloc(%p,%ld)", file, line, p, (long) size)); + q = realloc (p, size); + LOGPRINT ((" returns %p\n", q)); + } + else + { + LOGINC; + LOGPRINT (("%s %d malloc(%ld)", file, line, (long) size)); + q = malloc (size); + LOGPRINT ((" returns %p\n", q)); + } + if (!q) + fatal (err_nomemory); + return q; +} + +/* + * dupstr is like strdup, but with the never-return-NULL property + * of smalloc (and also reliably defined in all environments :-) + */ +char * +dupstr (char *s) +{ + char *r = smalloc (1 + strlen (s)); + strcpy (r, s); + return r; +} + +/* + * Duplicate a linked list of words + */ +word * +dup_word_list (word * w) +{ + word *head, **eptr = &head; + + while (w) + { + word *newwd = mknew (word); + *newwd = *w; /* structure copy */ + newwd->text = ustrdup (w->text); + if (w->alt) + newwd->alt = dup_word_list (w->alt); + *eptr = newwd; + newwd->next = NULL; + eptr = &newwd->next; + + w = w->next; + } + + return head; +} + +/* + * Free a linked list of words + */ +void +free_word_list (word * w) +{ + word *t; + while (w) + { + t = w; + w = w->next; + sfree (t->text); + if (t->alt) + free_word_list (t->alt); + sfree (t); + } +} + +/* + * Free a linked list of paragraphs + */ +void +free_para_list (paragraph * p) +{ + paragraph *t; + while (p) + { + t = p; + p = p->next; + sfree (t->keyword); + free_word_list (t->words); + sfree (t); + } +} diff --git a/Docs/src/bin/halibut/misc.c b/Docs/src/bin/halibut/misc.c new file mode 100644 index 00000000..c2f6c9e3 --- /dev/null +++ b/Docs/src/bin/halibut/misc.c @@ -0,0 +1,377 @@ +/* + * misc.c: miscellaneous useful items + */ + +#include "halibut.h" + +struct stackTag +{ + void **data; + int sp; + int size; +}; + +stack +stk_new (void) +{ + stack s; + + s = mknew (struct stackTag); + s->sp = 0; + s->size = 0; + s->data = NULL; + + return s; +} + +void +stk_free (stack s) +{ + sfree (s->data); + sfree (s); +} + +void +stk_push (stack s, void *item) +{ + if (s->size <= s->sp) + { + s->size = s->sp + 32; + s->data = resize (s->data, s->size); + } + s->data[s->sp++] = item; +} + +void * +stk_pop (stack s) +{ + if (s->sp > 0) + return s->data[--s->sp]; + else + return NULL; +} + +/* + * Small routines to amalgamate a string from an input source. + */ +const rdstring empty_rdstring = { 0, 0, NULL }; +const rdstringc empty_rdstringc = { 0, 0, NULL }; + +void +rdadd (rdstring * rs, wchar_t c) +{ + if (rs->pos >= rs->size - 1) + { + rs->size = rs->pos + 128; + rs->text = resize (rs->text, rs->size); + } + rs->text[rs->pos++] = c; + rs->text[rs->pos] = 0; +} + +void +rdadds (rdstring * rs, wchar_t * p) +{ + int len = ustrlen (p); + if (rs->pos >= rs->size - len) + { + rs->size = rs->pos + len + 128; + rs->text = resize (rs->text, rs->size); + } + ustrcpy (rs->text + rs->pos, p); + rs->pos += len; +} + +wchar_t * +rdtrim (rdstring * rs) +{ + rs->text = resize (rs->text, rs->pos + 1); + return rs->text; +} + +void +rdaddc (rdstringc * rs, char c) +{ + if (rs->pos >= rs->size - 1) + { + rs->size = rs->pos + 128; + rs->text = resize (rs->text, rs->size); + } + rs->text[rs->pos++] = c; + rs->text[rs->pos] = 0; +} + +void +rdaddsc (rdstringc * rs, char *p) +{ + int len = strlen (p); + if (rs->pos >= rs->size - len) + { + rs->size = rs->pos + len + 128; + rs->text = resize (rs->text, rs->size); + } + strcpy (rs->text + rs->pos, p); + rs->pos += len; +} + +char * +rdtrimc (rdstringc * rs) +{ + rs->text = resize (rs->text, rs->pos + 1); + return rs->text; +} + +int +compare_wordlists (word * a, word * b) +{ + int t; + while (a && b) + { + if (a->type != b->type) + return (a->type < b->type ? -1 : +1); /* FIXME? */ + t = a->type; + if ((t != word_Normal && t != word_Code && + t != word_WeakCode && t != word_Emph) || a->alt || b->alt) + { + int c; + if (a->text && b->text) + { + c = ustricmp (a->text, b->text); + if (c) + return c; + } + c = compare_wordlists (a->alt, b->alt); + if (c) + return c; + a = a->next; + b = b->next; + } + else + { + wchar_t *ap = a->text, *bp = b->text; + while (*ap && *bp) + { + wchar_t ac = utolower (*ap), bc = utolower (*bp); + if (ac != bc) + return (ac < bc ? -1 : +1); + if (!*++ap && a->next && a->next->type == t && !a->next->alt) + a = a->next, ap = a->text; + if (!*++bp && b->next && b->next->type == t && !b->next->alt) + b = b->next, bp = b->text; + } + if (*ap || *bp) + return (*ap ? +1 : -1); + a = a->next; + b = b->next; + } + } + + if (a || b) + return (a ? +1 : -1); + else + return 0; +} + +void +mark_attr_ends (paragraph * sourceform) +{ + paragraph *p; + word *w, *wp; + for (p = sourceform; p; p = p->next) + { + wp = NULL; + for (w = p->words; w; w = w->next) + { + if (isattr (w->type)) + { + int before = (wp && isattr (wp->type) && + sameattr (wp->type, w->type)); + int after = (w->next && isattr (w->next->type) && + sameattr (w->next->type, w->type)); + w->aux |= (before ? + (after ? attr_Always : attr_Last) : + (after ? attr_First : attr_Only)); + } + wp = w; + } + } +} + +wrappedline * +wrap_para (word * text, int width, int subsequentwidth, + int (*widthfn) (word *)) +{ + wrappedline *head = NULL, **ptr = &head; + int nwords, wordsize; + struct wrapword + { + word *begin, *end; + int width; + int spacewidth; + int cost; + int nwords; + } + *wrapwords; + int i, j, n; + + /* + * Break the line up into wrappable components. + */ + nwords = wordsize = 0; + wrapwords = NULL; + while (text) + { + if (nwords >= wordsize) + { + wordsize = nwords + 64; + wrapwords = srealloc (wrapwords, wordsize * sizeof (*wrapwords)); + } + wrapwords[nwords].width = 0; + wrapwords[nwords].begin = text; + while (text) + { + wrapwords[nwords].width += widthfn (text); + wrapwords[nwords].end = text->next; + if (text->next && (text->next->type == word_WhiteSpace || + text->next->type == word_EmphSpace || + text->breaks)) + break; + text = text->next; + } + if (text && text->next && (text->next->type == word_WhiteSpace || + text->next->type == word_EmphSpace)) + { + wrapwords[nwords].spacewidth = widthfn (text->next); + text = text->next; + } + else + { + wrapwords[nwords].spacewidth = 0; + } + nwords++; + if (text) + text = text->next; + } + + /* + * Perform the dynamic wrapping algorithm: work backwards from + * nwords-1, determining the optimal wrapping for each terminal + * subsequence of the paragraph. + */ + for (i = nwords; i--;) + { + int best = -1; + int bestcost = 0; + int cost; + int linelen = 0, spacewidth = 0; + int seenspace; + int thiswidth = (i == 0 ? width : subsequentwidth); + + j = 0; + seenspace = 0; + while (i + j < nwords) + { + /* + * See what happens if we put j+1 words on this line. + */ + if (spacewidth) + seenspace = 1; + linelen += spacewidth + wrapwords[i + j].width; + spacewidth = wrapwords[i + j].spacewidth; + j++; + if (linelen > thiswidth) + { + /* + * If we're over the width limit, abandon ship, + * _unless_ there is no best-effort yet (which will + * only happen if the first word is too long all by + * itself). + */ + if (best > 0) + break; + } + if (i + j == nwords) + { + /* + * Special case: if we're at the very end of the + * paragraph, we don't score penalty points for the + * white space left on the line. + */ + cost = 0; + } + else + { + cost = (thiswidth - linelen) * (thiswidth - linelen); + cost += wrapwords[i + j].cost; + } + /* + * We compare bestcost >= cost, not bestcost > cost, + * because in cases where the costs are identical we + * want to try to look like the greedy algorithm, + * because readers are likely to have spent a lot of + * time looking at greedy-wrapped paragraphs and + * there's no point violating the Principle of Least + * Surprise if it doesn't actually gain anything. + */ + if (best < 0 || bestcost >= cost) + { + bestcost = cost; + best = j; + } + } + /* + * Now we know the optimal answer for this terminal + * subsequence, so put it in wrapwords. + */ + wrapwords[i].cost = bestcost; + wrapwords[i].nwords = best; + } + + /* + * We've wrapped the paragraph. Now build the output + * `wrappedline' list. + */ + i = 0; + while (i < nwords) + { + wrappedline *w = mknew (wrappedline); + *ptr = w; + ptr = &w->next; + w->next = NULL; + + n = wrapwords[i].nwords; + w->begin = wrapwords[i].begin; + w->end = wrapwords[i + n - 1].end; + + /* + * Count along the words to find nspaces and shortfall. + */ + w->nspaces = 0; + w->shortfall = width; + for (j = 0; j < n; j++) + { + w->shortfall -= wrapwords[i + j].width; + if (j < n - 1 && wrapwords[i + j].spacewidth) + { + w->nspaces++; + w->shortfall -= wrapwords[i + j].spacewidth; + } + } + i += n; + } + + sfree (wrapwords); + + return head; +} + +void +wrap_free (wrappedline * w) +{ + while (w) + { + wrappedline *t = w->next; + sfree (w); + w = t; + } +} diff --git a/Docs/src/bin/halibut/style.c b/Docs/src/bin/halibut/style.c new file mode 100644 index 00000000..8a3f3e64 --- /dev/null +++ b/Docs/src/bin/halibut/style.c @@ -0,0 +1,7 @@ +/* + * style.c: load and keep track of user style preferences + */ + +#include +#include +#include "halibut.h" diff --git a/Docs/src/bin/halibut/tree234.c b/Docs/src/bin/halibut/tree234.c new file mode 100644 index 00000000..b79ffbf8 --- /dev/null +++ b/Docs/src/bin/halibut/tree234.c @@ -0,0 +1,2555 @@ +/* + * tree234.c: reasonably generic counted 2-3-4 tree routines. + * + * This file is copyright 1999-2001 Simon Tatham. + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL SIMON TATHAM BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF + * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include + +#include "tree234.h" + +#define smalloc malloc +#define sfree free + +#define mknew(typ) ( (typ *) smalloc (sizeof (typ)) ) + +#ifdef TEST +#define LOG(x) (printf x) +#else +#define LOG(x) +#endif + +typedef struct node234_Tag node234; + +struct tree234_Tag +{ + node234 *root; + cmpfn234 cmp; +}; + +struct node234_Tag +{ + node234 *parent; + node234 *kids[4]; + int counts[4]; + void *elems[3]; +}; + +/* + * Create a 2-3-4 tree. + */ +tree234 * +newtree234 (cmpfn234 cmp) +{ + tree234 *ret = mknew (tree234); + LOG (("created tree %p\n", ret)); + ret->root = NULL; + ret->cmp = cmp; + return ret; +} + +/* + * Free a 2-3-4 tree (not including freeing the elements). + */ +static void +freenode234 (node234 * n) +{ + if (!n) + return; + freenode234 (n->kids[0]); + freenode234 (n->kids[1]); + freenode234 (n->kids[2]); + freenode234 (n->kids[3]); + sfree (n); +} + +void +freetree234 (tree234 * t) +{ + freenode234 (t->root); + sfree (t); +} + +/* + * Internal function to count a node. + */ +static int +countnode234 (node234 * n) +{ + int count = 0; + int i; + if (!n) + return 0; + for (i = 0; i < 4; i++) + count += n->counts[i]; + for (i = 0; i < 3; i++) + if (n->elems[i]) + count++; + return count; +} + +/* + * Count the elements in a tree. + */ +int +count234 (tree234 * t) +{ + if (t->root) + return countnode234 (t->root); + else + return 0; +} + +/* + * Propagate a node overflow up a tree until it stops. Returns 0 or + * 1, depending on whether the root had to be split or not. + */ +static int +add234_insert (node234 * left, void *e, node234 * right, + node234 ** root, node234 * n, int ki) +{ + int lcount, rcount; + /* + * We need to insert the new left/element/right set in n at + * child position ki. + */ + lcount = countnode234 (left); + rcount = countnode234 (right); + while (n) + { + LOG ((" at %p: %p/%d \"%s\" %p/%d \"%s\" %p/%d \"%s\" %p/%d\n", + n, + n->kids[0], n->counts[0], n->elems[0], + n->kids[1], n->counts[1], n->elems[1], + n->kids[2], n->counts[2], n->elems[2], n->kids[3], n->counts[3])); + LOG ((" need to insert %p/%d \"%s\" %p/%d at position %d\n", + left, lcount, e, right, rcount, ki)); + if (n->elems[1] == NULL) + { + /* + * Insert in a 2-node; simple. + */ + if (ki == 0) + { + LOG ((" inserting on left of 2-node\n")); + n->kids[2] = n->kids[1]; + n->counts[2] = n->counts[1]; + n->elems[1] = n->elems[0]; + n->kids[1] = right; + n->counts[1] = rcount; + n->elems[0] = e; + n->kids[0] = left; + n->counts[0] = lcount; + } + else + { /* ki == 1 */ + LOG ((" inserting on right of 2-node\n")); + n->kids[2] = right; + n->counts[2] = rcount; + n->elems[1] = e; + n->kids[1] = left; + n->counts[1] = lcount; + } + if (n->kids[0]) + n->kids[0]->parent = n; + if (n->kids[1]) + n->kids[1]->parent = n; + if (n->kids[2]) + n->kids[2]->parent = n; + LOG ((" done\n")); + break; + } + else if (n->elems[2] == NULL) + { + /* + * Insert in a 3-node; simple. + */ + if (ki == 0) + { + LOG ((" inserting on left of 3-node\n")); + n->kids[3] = n->kids[2]; + n->counts[3] = n->counts[2]; + n->elems[2] = n->elems[1]; + n->kids[2] = n->kids[1]; + n->counts[2] = n->counts[1]; + n->elems[1] = n->elems[0]; + n->kids[1] = right; + n->counts[1] = rcount; + n->elems[0] = e; + n->kids[0] = left; + n->counts[0] = lcount; + } + else if (ki == 1) + { + LOG ((" inserting in middle of 3-node\n")); + n->kids[3] = n->kids[2]; + n->counts[3] = n->counts[2]; + n->elems[2] = n->elems[1]; + n->kids[2] = right; + n->counts[2] = rcount; + n->elems[1] = e; + n->kids[1] = left; + n->counts[1] = lcount; + } + else + { /* ki == 2 */ + LOG ((" inserting on right of 3-node\n")); + n->kids[3] = right; + n->counts[3] = rcount; + n->elems[2] = e; + n->kids[2] = left; + n->counts[2] = lcount; + } + if (n->kids[0]) + n->kids[0]->parent = n; + if (n->kids[1]) + n->kids[1]->parent = n; + if (n->kids[2]) + n->kids[2]->parent = n; + if (n->kids[3]) + n->kids[3]->parent = n; + LOG ((" done\n")); + break; + } + else + { + node234 *m = mknew (node234); + m->parent = n->parent; + LOG ((" splitting a 4-node; created new node %p\n", m)); + /* + * Insert in a 4-node; split into a 2-node and a + * 3-node, and move focus up a level. + * + * I don't think it matters which way round we put the + * 2 and the 3. For simplicity, we'll put the 3 first + * always. + */ + if (ki == 0) + { + m->kids[0] = left; + m->counts[0] = lcount; + m->elems[0] = e; + m->kids[1] = right; + m->counts[1] = rcount; + m->elems[1] = n->elems[0]; + m->kids[2] = n->kids[1]; + m->counts[2] = n->counts[1]; + e = n->elems[1]; + n->kids[0] = n->kids[2]; + n->counts[0] = n->counts[2]; + n->elems[0] = n->elems[2]; + n->kids[1] = n->kids[3]; + n->counts[1] = n->counts[3]; + } + else if (ki == 1) + { + m->kids[0] = n->kids[0]; + m->counts[0] = n->counts[0]; + m->elems[0] = n->elems[0]; + m->kids[1] = left; + m->counts[1] = lcount; + m->elems[1] = e; + m->kids[2] = right; + m->counts[2] = rcount; + e = n->elems[1]; + n->kids[0] = n->kids[2]; + n->counts[0] = n->counts[2]; + n->elems[0] = n->elems[2]; + n->kids[1] = n->kids[3]; + n->counts[1] = n->counts[3]; + } + else if (ki == 2) + { + m->kids[0] = n->kids[0]; + m->counts[0] = n->counts[0]; + m->elems[0] = n->elems[0]; + m->kids[1] = n->kids[1]; + m->counts[1] = n->counts[1]; + m->elems[1] = n->elems[1]; + m->kids[2] = left; + m->counts[2] = lcount; + /* e = e; */ + n->kids[0] = right; + n->counts[0] = rcount; + n->elems[0] = n->elems[2]; + n->kids[1] = n->kids[3]; + n->counts[1] = n->counts[3]; + } + else + { /* ki == 3 */ + m->kids[0] = n->kids[0]; + m->counts[0] = n->counts[0]; + m->elems[0] = n->elems[0]; + m->kids[1] = n->kids[1]; + m->counts[1] = n->counts[1]; + m->elems[1] = n->elems[1]; + m->kids[2] = n->kids[2]; + m->counts[2] = n->counts[2]; + n->kids[0] = left; + n->counts[0] = lcount; + n->elems[0] = e; + n->kids[1] = right; + n->counts[1] = rcount; + e = n->elems[2]; + } + m->kids[3] = n->kids[3] = n->kids[2] = NULL; + m->counts[3] = n->counts[3] = n->counts[2] = 0; + m->elems[2] = n->elems[2] = n->elems[1] = NULL; + if (m->kids[0]) + m->kids[0]->parent = m; + if (m->kids[1]) + m->kids[1]->parent = m; + if (m->kids[2]) + m->kids[2]->parent = m; + if (n->kids[0]) + n->kids[0]->parent = n; + if (n->kids[1]) + n->kids[1]->parent = n; + LOG ((" left (%p): %p/%d \"%s\" %p/%d \"%s\" %p/%d\n", m, + m->kids[0], m->counts[0], m->elems[0], + m->kids[1], m->counts[1], m->elems[1], + m->kids[2], m->counts[2])); + LOG ((" right (%p): %p/%d \"%s\" %p/%d\n", n, + n->kids[0], n->counts[0], n->elems[0], + n->kids[1], n->counts[1])); + left = m; + lcount = countnode234 (left); + right = n; + rcount = countnode234 (right); + } + if (n->parent) + ki = (n->parent->kids[0] == n ? 0 : + n->parent->kids[1] == n ? 1 : n->parent->kids[2] == n ? 2 : 3); + n = n->parent; + } + + /* + * If we've come out of here by `break', n will still be + * non-NULL and all we need to do is go back up the tree + * updating counts. If we've come here because n is NULL, we + * need to create a new root for the tree because the old one + * has just split into two. */ + if (n) + { + while (n->parent) + { + int count = countnode234 (n); + int childnum; + childnum = (n->parent->kids[0] == n ? 0 : + n->parent->kids[1] == n ? 1 : + n->parent->kids[2] == n ? 2 : 3); + n->parent->counts[childnum] = count; + n = n->parent; + } + return 0; /* root unchanged */ + } + else + { + LOG ((" root is overloaded, split into two\n")); + (*root) = mknew (node234); + (*root)->kids[0] = left; + (*root)->counts[0] = lcount; + (*root)->elems[0] = e; + (*root)->kids[1] = right; + (*root)->counts[1] = rcount; + (*root)->elems[1] = NULL; + (*root)->kids[2] = NULL; + (*root)->counts[2] = 0; + (*root)->elems[2] = NULL; + (*root)->kids[3] = NULL; + (*root)->counts[3] = 0; + (*root)->parent = NULL; + if ((*root)->kids[0]) + (*root)->kids[0]->parent = (*root); + if ((*root)->kids[1]) + (*root)->kids[1]->parent = (*root); + LOG ((" new root is %p/%d \"%s\" %p/%d\n", + (*root)->kids[0], (*root)->counts[0], + (*root)->elems[0], (*root)->kids[1], (*root)->counts[1])); + return 1; /* root moved */ + } +} + +/* + * Add an element e to a 2-3-4 tree t. Returns e on success, or if + * an existing element compares equal, returns that. + */ +static void * +add234_internal (tree234 * t, void *e, int index) +{ + node234 *n; + int ki; + void *orig_e = e; + int c; + + LOG (("adding element \"%s\" to tree %p\n", e, t)); + if (t->root == NULL) + { + t->root = mknew (node234); + t->root->elems[1] = t->root->elems[2] = NULL; + t->root->kids[0] = t->root->kids[1] = NULL; + t->root->kids[2] = t->root->kids[3] = NULL; + t->root->counts[0] = t->root->counts[1] = 0; + t->root->counts[2] = t->root->counts[3] = 0; + t->root->parent = NULL; + t->root->elems[0] = e; + LOG ((" created root %p\n", t->root)); + return orig_e; + } + + n = t->root; + while (n) + { + LOG ((" node %p: %p/%d \"%s\" %p/%d \"%s\" %p/%d \"%s\" %p/%d\n", + n, + n->kids[0], n->counts[0], n->elems[0], + n->kids[1], n->counts[1], n->elems[1], + n->kids[2], n->counts[2], n->elems[2], n->kids[3], n->counts[3])); + if (index >= 0) + { + if (!n->kids[0]) + { + /* + * Leaf node. We want to insert at kid position + * equal to the index: + * + * 0 A 1 B 2 C 3 + */ + ki = index; + } + else + { + /* + * Internal node. We always descend through it (add + * always starts at the bottom, never in the + * middle). + */ + if (index <= n->counts[0]) + { + ki = 0; + } + else if (index -= n->counts[0] + 1, index <= n->counts[1]) + { + ki = 1; + } + else if (index -= n->counts[1] + 1, index <= n->counts[2]) + { + ki = 2; + } + else if (index -= n->counts[2] + 1, index <= n->counts[3]) + { + ki = 3; + } + else + return NULL; /* error: index out of range */ + } + } + else + { + if ((c = t->cmp (e, n->elems[0])) < 0) + ki = 0; + else if (c == 0) + return n->elems[0]; /* already exists */ + else if (n->elems[1] == NULL || (c = t->cmp (e, n->elems[1])) < 0) + ki = 1; + else if (c == 0) + return n->elems[1]; /* already exists */ + else if (n->elems[2] == NULL || (c = t->cmp (e, n->elems[2])) < 0) + ki = 2; + else if (c == 0) + return n->elems[2]; /* already exists */ + else + ki = 3; + } + LOG ((" moving to child %d (%p)\n", ki, n->kids[ki])); + if (!n->kids[ki]) + break; + n = n->kids[ki]; + } + + add234_insert (NULL, e, NULL, &t->root, n, ki); + + return orig_e; +} + +void * +add234 (tree234 * t, void *e) +{ + if (!t->cmp) /* tree is unsorted */ + return NULL; + + return add234_internal (t, e, -1); +} + +void * +addpos234 (tree234 * t, void *e, int index) +{ + if (index < 0 || /* index out of range */ + t->cmp) /* tree is sorted */ + return NULL; /* return failure */ + + return add234_internal (t, e, index); /* this checks the upper bound */ +} + +/* + * Look up the element at a given numeric index in a 2-3-4 tree. + * Returns NULL if the index is out of range. + */ +void * +index234 (tree234 * t, int index) +{ + node234 *n; + + if (!t->root) + return NULL; /* tree is empty */ + + if (index < 0 || index >= countnode234 (t->root)) + return NULL; /* out of range */ + + n = t->root; + + while (n) + { + if (index < n->counts[0]) + n = n->kids[0]; + else if (index -= n->counts[0] + 1, index < 0) + return n->elems[0]; + else if (index < n->counts[1]) + n = n->kids[1]; + else if (index -= n->counts[1] + 1, index < 0) + return n->elems[1]; + else if (index < n->counts[2]) + n = n->kids[2]; + else if (index -= n->counts[2] + 1, index < 0) + return n->elems[2]; + else + n = n->kids[3]; + } + + /* We shouldn't ever get here. I wonder how we did. */ + return NULL; +} + +/* + * Find an element e in a sorted 2-3-4 tree t. Returns NULL if not + * found. e is always passed as the first argument to cmp, so cmp + * can be an asymmetric function if desired. cmp can also be passed + * as NULL, in which case the compare function from the tree proper + * will be used. + */ +void * +findrelpos234 (tree234 * t, void *e, cmpfn234 cmp, int relation, int *index) +{ + node234 *n; + void *ret; + int c; + int idx, ecount, kcount, cmpret; + + if (t->root == NULL) + return NULL; + + if (cmp == NULL) + cmp = t->cmp; + + n = t->root; + /* + * Attempt to find the element itself. + */ + idx = 0; + ecount = -1; + /* + * Prepare a fake `cmp' result if e is NULL. + */ + cmpret = 0; + if (e == NULL) + { + assert (relation == REL234_LT || relation == REL234_GT); + if (relation == REL234_LT) + cmpret = +1; /* e is a max: always greater */ + else if (relation == REL234_GT) + cmpret = -1; /* e is a min: always smaller */ + } + while (1) + { + for (kcount = 0; kcount < 4; kcount++) + { + if (kcount >= 3 || n->elems[kcount] == NULL || + (c = cmpret ? cmpret : cmp (e, n->elems[kcount])) < 0) + { + break; + } + if (n->kids[kcount]) + idx += n->counts[kcount]; + if (c == 0) + { + ecount = kcount; + break; + } + idx++; + } + if (ecount >= 0) + break; + if (n->kids[kcount]) + n = n->kids[kcount]; + else + break; + } + + if (ecount >= 0) + { + /* + * We have found the element we're looking for. It's + * n->elems[ecount], at tree index idx. If our search + * relation is EQ, LE or GE we can now go home. + */ + if (relation != REL234_LT && relation != REL234_GT) + { + if (index) + *index = idx; + return n->elems[ecount]; + } + + /* + * Otherwise, we'll do an indexed lookup for the previous + * or next element. (It would be perfectly possible to + * implement these search types in a non-counted tree by + * going back up from where we are, but far more fiddly.) + */ + if (relation == REL234_LT) + idx--; + else + idx++; + } + else + { + /* + * We've found our way to the bottom of the tree and we + * know where we would insert this node if we wanted to: + * we'd put it in in place of the (empty) subtree + * n->kids[kcount], and it would have index idx + * + * But the actual element isn't there. So if our search + * relation is EQ, we're doomed. + */ + if (relation == REL234_EQ) + return NULL; + + /* + * Otherwise, we must do an index lookup for index idx-1 + * (if we're going left - LE or LT) or index idx (if we're + * going right - GE or GT). + */ + if (relation == REL234_LT || relation == REL234_LE) + { + idx--; + } + } + + /* + * We know the index of the element we want; just call index234 + * to do the rest. This will return NULL if the index is out of + * bounds, which is exactly what we want. + */ + ret = index234 (t, idx); + if (ret && index) + *index = idx; + return ret; +} + +void * +find234 (tree234 * t, void *e, cmpfn234 cmp) +{ + return findrelpos234 (t, e, cmp, REL234_EQ, NULL); +} + +void * +findrel234 (tree234 * t, void *e, cmpfn234 cmp, int relation) +{ + return findrelpos234 (t, e, cmp, relation, NULL); +} + +void * +findpos234 (tree234 * t, void *e, cmpfn234 cmp, int *index) +{ + return findrelpos234 (t, e, cmp, REL234_EQ, index); +} + +/* + * Tree transformation used in delete and split: move a subtree + * right, from child ki of a node to the next child. Update k and + * index so that they still point to the same place in the + * transformed tree. Assumes the destination child is not full, and + * that the source child does have a subtree to spare. Can cope if + * the destination child is undersized. + * + * . C . . B . + * / \ -> / \ + * [more] a A b B c d D e [more] a A b c C d D e + * + * . C . . B . + * / \ -> / \ + * [more] a A b B c d [more] a A b c C d + */ +static void +trans234_subtree_right (node234 * n, int ki, int *k, int *index) +{ + node234 *src, *dest; + int i, srclen, adjust; + + src = n->kids[ki]; + dest = n->kids[ki + 1]; + + LOG ((" trans234_subtree_right(%p, %d):\n", n, ki)); + LOG ((" parent %p: %p/%d \"%s\" %p/%d \"%s\" %p/%d \"%s\" %p/%d\n", + n, + n->kids[0], n->counts[0], n->elems[0], + n->kids[1], n->counts[1], n->elems[1], + n->kids[2], n->counts[2], n->elems[2], n->kids[3], n->counts[3])); + LOG ((" src %p: %p/%d \"%s\" %p/%d \"%s\" %p/%d \"%s\" %p/%d\n", + src, + src->kids[0], src->counts[0], src->elems[0], + src->kids[1], src->counts[1], src->elems[1], + src->kids[2], src->counts[2], src->elems[2], + src->kids[3], src->counts[3])); + LOG ((" dest %p: %p/%d \"%s\" %p/%d \"%s\" %p/%d \"%s\" %p/%d\n", + dest, + dest->kids[0], dest->counts[0], dest->elems[0], + dest->kids[1], dest->counts[1], dest->elems[1], + dest->kids[2], dest->counts[2], dest->elems[2], + dest->kids[3], dest->counts[3])); + /* + * Move over the rest of the destination node to make space. + */ + dest->kids[3] = dest->kids[2]; + dest->counts[3] = dest->counts[2]; + dest->elems[2] = dest->elems[1]; + dest->kids[2] = dest->kids[1]; + dest->counts[2] = dest->counts[1]; + dest->elems[1] = dest->elems[0]; + dest->kids[1] = dest->kids[0]; + dest->counts[1] = dest->counts[0]; + + /* which element to move over */ + i = (src->elems[2] ? 2 : src->elems[1] ? 1 : 0); + + dest->elems[0] = n->elems[ki]; + n->elems[ki] = src->elems[i]; + src->elems[i] = NULL; + + dest->kids[0] = src->kids[i + 1]; + dest->counts[0] = src->counts[i + 1]; + src->kids[i + 1] = NULL; + src->counts[i + 1] = 0; + + if (dest->kids[0]) + dest->kids[0]->parent = dest; + + adjust = dest->counts[0] + 1; + + n->counts[ki] -= adjust; + n->counts[ki + 1] += adjust; + + srclen = n->counts[ki]; + + if (k) + { + LOG ((" before: k,index = %d,%d\n", (*k), (*index))); + if ((*k) == ki && (*index) > srclen) + { + (*index) -= srclen + 1; + (*k)++; + } + else if ((*k) == ki + 1) + { + (*index) += adjust; + } + LOG ((" after: k,index = %d,%d\n", (*k), (*index))); + } + + LOG ((" parent %p: %p/%d \"%s\" %p/%d \"%s\" %p/%d \"%s\" %p/%d\n", + n, + n->kids[0], n->counts[0], n->elems[0], + n->kids[1], n->counts[1], n->elems[1], + n->kids[2], n->counts[2], n->elems[2], n->kids[3], n->counts[3])); + LOG ((" src %p: %p/%d \"%s\" %p/%d \"%s\" %p/%d \"%s\" %p/%d\n", + src, + src->kids[0], src->counts[0], src->elems[0], + src->kids[1], src->counts[1], src->elems[1], + src->kids[2], src->counts[2], src->elems[2], + src->kids[3], src->counts[3])); + LOG ((" dest %p: %p/%d \"%s\" %p/%d \"%s\" %p/%d \"%s\" %p/%d\n", + dest, + dest->kids[0], dest->counts[0], dest->elems[0], + dest->kids[1], dest->counts[1], dest->elems[1], + dest->kids[2], dest->counts[2], dest->elems[2], + dest->kids[3], dest->counts[3])); +} + +/* + * Tree transformation used in delete and split: move a subtree + * left, from child ki of a node to the previous child. Update k + * and index so that they still point to the same place in the + * transformed tree. Assumes the destination child is not full, and + * that the source child does have a subtree to spare. Can cope if + * the destination child is undersized. + * + * . B . . C . + * / \ -> / \ + * a A b c C d D e [more] a A b B c d D e [more] + * + * . A . . B . + * / \ -> / \ + * a b B c C d [more] a A b c C d [more] + */ +static void +trans234_subtree_left (node234 * n, int ki, int *k, int *index) +{ + node234 *src, *dest; + int i, adjust; + + src = n->kids[ki]; + dest = n->kids[ki - 1]; + + LOG ((" trans234_subtree_left(%p, %d):\n", n, ki)); + LOG ((" parent %p: %p/%d \"%s\" %p/%d \"%s\" %p/%d \"%s\" %p/%d\n", + n, + n->kids[0], n->counts[0], n->elems[0], + n->kids[1], n->counts[1], n->elems[1], + n->kids[2], n->counts[2], n->elems[2], n->kids[3], n->counts[3])); + LOG ((" dest %p: %p/%d \"%s\" %p/%d \"%s\" %p/%d \"%s\" %p/%d\n", + dest, + dest->kids[0], dest->counts[0], dest->elems[0], + dest->kids[1], dest->counts[1], dest->elems[1], + dest->kids[2], dest->counts[2], dest->elems[2], + dest->kids[3], dest->counts[3])); + LOG ((" src %p: %p/%d \"%s\" %p/%d \"%s\" %p/%d \"%s\" %p/%d\n", + src, + src->kids[0], src->counts[0], src->elems[0], + src->kids[1], src->counts[1], src->elems[1], + src->kids[2], src->counts[2], src->elems[2], + src->kids[3], src->counts[3])); + + /* where in dest to put it */ + i = (dest->elems[1] ? 2 : dest->elems[0] ? 1 : 0); + dest->elems[i] = n->elems[ki - 1]; + n->elems[ki - 1] = src->elems[0]; + + dest->kids[i + 1] = src->kids[0]; + dest->counts[i + 1] = src->counts[0]; + + if (dest->kids[i + 1]) + dest->kids[i + 1]->parent = dest; + + /* + * Move over the rest of the source node. + */ + src->kids[0] = src->kids[1]; + src->counts[0] = src->counts[1]; + src->elems[0] = src->elems[1]; + src->kids[1] = src->kids[2]; + src->counts[1] = src->counts[2]; + src->elems[1] = src->elems[2]; + src->kids[2] = src->kids[3]; + src->counts[2] = src->counts[3]; + src->elems[2] = NULL; + src->kids[3] = NULL; + src->counts[3] = 0; + + adjust = dest->counts[i + 1] + 1; + + n->counts[ki] -= adjust; + n->counts[ki - 1] += adjust; + + if (k) + { + LOG ((" before: k,index = %d,%d\n", (*k), (*index))); + if ((*k) == ki) + { + (*index) -= adjust; + if ((*index) < 0) + { + (*index) += n->counts[ki - 1] + 1; + (*k)--; + } + } + LOG ((" after: k,index = %d,%d\n", (*k), (*index))); + } + + LOG ((" parent %p: %p/%d \"%s\" %p/%d \"%s\" %p/%d \"%s\" %p/%d\n", + n, + n->kids[0], n->counts[0], n->elems[0], + n->kids[1], n->counts[1], n->elems[1], + n->kids[2], n->counts[2], n->elems[2], n->kids[3], n->counts[3])); + LOG ((" dest %p: %p/%d \"%s\" %p/%d \"%s\" %p/%d \"%s\" %p/%d\n", + dest, + dest->kids[0], dest->counts[0], dest->elems[0], + dest->kids[1], dest->counts[1], dest->elems[1], + dest->kids[2], dest->counts[2], dest->elems[2], + dest->kids[3], dest->counts[3])); + LOG ((" src %p: %p/%d \"%s\" %p/%d \"%s\" %p/%d \"%s\" %p/%d\n", + src, + src->kids[0], src->counts[0], src->elems[0], + src->kids[1], src->counts[1], src->elems[1], + src->kids[2], src->counts[2], src->elems[2], + src->kids[3], src->counts[3])); +} + +/* + * Tree transformation used in delete and split: merge child nodes + * ki and ki+1 of a node. Update k and index so that they still + * point to the same place in the transformed tree. Assumes both + * children _are_ sufficiently small. + * + * . B . . + * / \ -> | + * a A b c C d a A b B c C d + * + * This routine can also cope with either child being undersized: + * + * . A . . + * / \ -> | + * a b B c a A b B c + * + * . A . . + * / \ -> | + * a b B c C d a A b B c C d + */ +static void +trans234_subtree_merge (node234 * n, int ki, int *k, int *index) +{ + node234 *left, *right; + int i, leftlen, rightlen, lsize, rsize; + + left = n->kids[ki]; + leftlen = n->counts[ki]; + right = n->kids[ki + 1]; + rightlen = n->counts[ki + 1]; + + LOG ((" trans234_subtree_merge(%p, %d):\n", n, ki)); + LOG ((" parent %p: %p/%d \"%s\" %p/%d \"%s\" %p/%d \"%s\" %p/%d\n", + n, + n->kids[0], n->counts[0], n->elems[0], + n->kids[1], n->counts[1], n->elems[1], + n->kids[2], n->counts[2], n->elems[2], n->kids[3], n->counts[3])); + LOG ((" left %p: %p/%d \"%s\" %p/%d \"%s\" %p/%d \"%s\" %p/%d\n", + left, + left->kids[0], left->counts[0], left->elems[0], + left->kids[1], left->counts[1], left->elems[1], + left->kids[2], left->counts[2], left->elems[2], + left->kids[3], left->counts[3])); + LOG ((" right %p: %p/%d \"%s\" %p/%d \"%s\" %p/%d \"%s\" %p/%d\n", + right, + right->kids[0], right->counts[0], right->elems[0], + right->kids[1], right->counts[1], right->elems[1], + right->kids[2], right->counts[2], right->elems[2], + right->kids[3], right->counts[3])); + + assert (!left->elems[2] && !right->elems[2]); /* neither is large! */ + lsize = (left->elems[1] ? 2 : left->elems[0] ? 1 : 0); + rsize = (right->elems[1] ? 2 : right->elems[0] ? 1 : 0); + + left->elems[lsize] = n->elems[ki]; + + for (i = 0; i < rsize + 1; i++) + { + left->kids[lsize + 1 + i] = right->kids[i]; + left->counts[lsize + 1 + i] = right->counts[i]; + if (left->kids[lsize + 1 + i]) + left->kids[lsize + 1 + i]->parent = left; + if (i < rsize) + left->elems[lsize + 1 + i] = right->elems[i]; + } + + n->counts[ki] += rightlen + 1; + + sfree (right); + + /* + * Move the rest of n up by one. + */ + for (i = ki + 1; i < 3; i++) + { + n->kids[i] = n->kids[i + 1]; + n->counts[i] = n->counts[i + 1]; + } + for (i = ki; i < 2; i++) + { + n->elems[i] = n->elems[i + 1]; + } + n->kids[3] = NULL; + n->counts[3] = 0; + n->elems[2] = NULL; + + if (k) + { + LOG ((" before: k,index = %d,%d\n", (*k), (*index))); + if ((*k) == ki + 1) + { + (*k)--; + (*index) += leftlen + 1; + } + else if ((*k) > ki + 1) + { + (*k)--; + } + LOG ((" after: k,index = %d,%d\n", (*k), (*index))); + } + + LOG ((" parent %p: %p/%d \"%s\" %p/%d \"%s\" %p/%d \"%s\" %p/%d\n", + n, + n->kids[0], n->counts[0], n->elems[0], + n->kids[1], n->counts[1], n->elems[1], + n->kids[2], n->counts[2], n->elems[2], n->kids[3], n->counts[3])); + LOG ((" merged %p: %p/%d \"%s\" %p/%d \"%s\" %p/%d \"%s\" %p/%d\n", + left, + left->kids[0], left->counts[0], left->elems[0], + left->kids[1], left->counts[1], left->elems[1], + left->kids[2], left->counts[2], left->elems[2], + left->kids[3], left->counts[3])); + +} + +/* + * Delete an element e in a 2-3-4 tree. Does not free the element, + * merely removes all links to it from the tree nodes. + */ +static void * +delpos234_internal (tree234 * t, int index) +{ + node234 *n; + void *retval; + int ki, i; + + retval = NULL; + + n = t->root; /* by assumption this is non-NULL */ + LOG (("deleting item %d from tree %p\n", index, t)); + while (1) + { + node234 *sub; + + LOG ((" node %p: %p/%d \"%s\" %p/%d \"%s\" %p/%d \"%s\" %p/%d index=%d\n", n, n->kids[0], n->counts[0], n->elems[0], n->kids[1], n->counts[1], n->elems[1], n->kids[2], n->counts[2], n->elems[2], n->kids[3], n->counts[3], index)); + if (index <= n->counts[0]) + { + ki = 0; + } + else if (index -= n->counts[0] + 1, index <= n->counts[1]) + { + ki = 1; + } + else if (index -= n->counts[1] + 1, index <= n->counts[2]) + { + ki = 2; + } + else if (index -= n->counts[2] + 1, index <= n->counts[3]) + { + ki = 3; + } + else + { + assert (0); /* can't happen */ + } + + if (!n->kids[0]) + break; /* n is a leaf node; we're here! */ + + /* + * Check to see if we've found our target element. If so, + * we must choose a new target (we'll use the old target's + * successor, which will be in a leaf), move it into the + * place of the old one, continue down to the leaf and + * delete the old copy of the new target. + */ + if (index == n->counts[ki]) + { + node234 *m; + LOG ((" found element in internal node, index %d\n", ki)); + assert (n->elems[ki]); /* must be a kid _before_ an element */ + ki++; + index = 0; + for (m = n->kids[ki]; m->kids[0]; m = m->kids[0]) + continue; + LOG ((" replacing with element \"%s\" from leaf node %p\n", + m->elems[0], m)); + retval = n->elems[ki - 1]; + n->elems[ki - 1] = m->elems[0]; + } + + /* + * Recurse down to subtree ki. If it has only one element, + * we have to do some transformation to start with. + */ + LOG ((" moving to subtree %d\n", ki)); + sub = n->kids[ki]; + if (!sub->elems[1]) + { + LOG ((" subtree has only one element!\n")); + if (ki > 0 && n->kids[ki - 1]->elems[1]) + { + /* + * Child ki has only one element, but child + * ki-1 has two or more. So we need to move a + * subtree from ki-1 to ki. + */ + trans234_subtree_right (n, ki - 1, &ki, &index); + } + else if (ki < 3 && n->kids[ki + 1] && n->kids[ki + 1]->elems[1]) + { + /* + * Child ki has only one element, but ki+1 has + * two or more. Move a subtree from ki+1 to ki. + */ + trans234_subtree_left (n, ki + 1, &ki, &index); + } + else + { + /* + * ki is small with only small neighbours. Pick a + * neighbour and merge with it. + */ + trans234_subtree_merge (n, ki > 0 ? ki - 1 : ki, &ki, &index); + sub = n->kids[ki]; + + if (!n->elems[0]) + { + /* + * The root is empty and needs to be + * removed. + */ + LOG ((" shifting root!\n")); + t->root = sub; + sub->parent = NULL; + sfree (n); + n = NULL; + } + } + } + + if (n) + n->counts[ki]--; + n = sub; + } + + /* + * Now n is a leaf node, and ki marks the element number we + * want to delete. We've already arranged for the leaf to be + * bigger than minimum size, so let's just go to it. + */ + assert (!n->kids[0]); + if (!retval) + retval = n->elems[ki]; + + for (i = ki; i < 2 && n->elems[i + 1]; i++) + n->elems[i] = n->elems[i + 1]; + n->elems[i] = NULL; + + /* + * It's just possible that we have reduced the leaf to zero + * size. This can only happen if it was the root - so destroy + * it and make the tree empty. + */ + if (!n->elems[0]) + { + LOG ((" removed last element in tree, destroying empty root\n")); + assert (n == t->root); + sfree (n); + t->root = NULL; + } + + return retval; /* finished! */ +} + +void * +delpos234 (tree234 * t, int index) +{ + if (index < 0 || index >= countnode234 (t->root)) + return NULL; + return delpos234_internal (t, index); +} + +void * +del234 (tree234 * t, void *e) +{ + int index; + if (!findrelpos234 (t, e, NULL, REL234_EQ, &index)) + return NULL; /* it wasn't in there anyway */ + return delpos234_internal (t, index); /* it's there; delete it. */ +} + +/* + * Join two subtrees together with a separator element between + * them, given their relative height. + * + * (Height<0 means the left tree is shorter, >0 means the right + * tree is shorter, =0 means (duh) they're equal.) + * + * It is assumed that any checks needed on the ordering criterion + * have _already_ been done. + * + * The value returned in `height' is 0 or 1 depending on whether the + * resulting tree is the same height as the original larger one, or + * one higher. + */ +static node234 * +join234_internal (node234 * left, void *sep, node234 * right, int *height) +{ + node234 *root, *node; + int relht = *height; + int ki; + + LOG ((" join: joining %p \"%s\" %p, relative height is %d\n", + left, sep, right, relht)); + if (relht == 0) + { + /* + * The trees are the same height. Create a new one-element + * root containing the separator and pointers to the two + * nodes. + */ + node234 *newroot; + newroot = mknew (node234); + newroot->kids[0] = left; + newroot->counts[0] = countnode234 (left); + newroot->elems[0] = sep; + newroot->kids[1] = right; + newroot->counts[1] = countnode234 (right); + newroot->elems[1] = NULL; + newroot->kids[2] = NULL; + newroot->counts[2] = 0; + newroot->elems[2] = NULL; + newroot->kids[3] = NULL; + newroot->counts[3] = 0; + newroot->parent = NULL; + if (left) + left->parent = newroot; + if (right) + right->parent = newroot; + *height = 1; + LOG ((" join: same height, brand new root\n")); + return newroot; + } + + /* + * This now works like the addition algorithm on the larger + * tree. We're replacing a single kid pointer with two kid + * pointers separated by an element; if that causes the node to + * overload, we split it in two, move a separator element up to + * the next node, and repeat. + */ + if (relht < 0) + { + /* + * Left tree is shorter. Search down the right tree to find + * the pointer we're inserting at. + */ + node = root = right; + while (++relht < 0) + { + node = node->kids[0]; + } + ki = 0; + right = node->kids[ki]; + } + else + { + /* + * Right tree is shorter; search down the left to find the + * pointer we're inserting at. + */ + node = root = left; + while (--relht > 0) + { + if (node->elems[2]) + node = node->kids[3]; + else if (node->elems[1]) + node = node->kids[2]; + else + node = node->kids[1]; + } + if (node->elems[2]) + ki = 3; + else if (node->elems[1]) + ki = 2; + else + ki = 1; + left = node->kids[ki]; + } + + /* + * Now proceed as for addition. + */ + *height = add234_insert (left, sep, right, &root, node, ki); + + return root; +} +static int +height234 (tree234 * t) +{ + int level = 0; + node234 *n = t->root; + while (n) + { + level++; + n = n->kids[0]; + } + return level; +} + +tree234 * +join234 (tree234 * t1, tree234 * t2) +{ + int size2 = countnode234 (t2->root); + if (size2 > 0) + { + void *element; + int relht; + + if (t1->cmp) + { + element = index234 (t2, 0); + element = findrelpos234 (t1, element, NULL, REL234_GE, NULL); + if (element) + return NULL; + } + + element = delpos234 (t2, 0); + relht = height234 (t1) - height234 (t2); + t1->root = join234_internal (t1->root, element, t2->root, &relht); + t2->root = NULL; + } + return t1; +} + +tree234 * +join234r (tree234 * t1, tree234 * t2) +{ + int size1 = countnode234 (t1->root); + if (size1 > 0) + { + void *element; + int relht; + + if (t2->cmp) + { + element = index234 (t1, size1 - 1); + element = findrelpos234 (t2, element, NULL, REL234_LE, NULL); + if (element) + return NULL; + } + + element = delpos234 (t1, size1 - 1); + relht = height234 (t1) - height234 (t2); + t2->root = join234_internal (t1->root, element, t2->root, &relht); + t1->root = NULL; + } + return t2; +} + +/* + * Split out the first elements in a tree and return a + * pointer to the root node. Leave the root node of the remainder + * in t. + */ +static node234 * +split234_internal (tree234 * t, int index) +{ + node234 *halves[2], *n, *sib, *sub; + node234 *lparent, *rparent; + int ki, pki, i, half, lcount, rcount; + + n = t->root; + LOG (("splitting tree %p at point %d\n", t, index)); + + /* + * Easy special cases. After this we have also dealt completely + * with the empty-tree case and we can assume the root exists. + */ + if (index == 0) /* return nothing */ + return NULL; + if (index == countnode234 (t->root)) + { /* return the whole tree */ + node234 *ret = t->root; + t->root = NULL; + return ret; + } + + /* + * Search down the tree to find the split point. + */ + lparent = rparent = NULL; + while (n) + { + LOG ((" node %p: %p/%d \"%s\" %p/%d \"%s\" %p/%d \"%s\" %p/%d index=%d\n", n, n->kids[0], n->counts[0], n->elems[0], n->kids[1], n->counts[1], n->elems[1], n->kids[2], n->counts[2], n->elems[2], n->kids[3], n->counts[3], index)); + lcount = index; + rcount = countnode234 (n) - lcount; + if (index <= n->counts[0]) + { + ki = 0; + } + else if (index -= n->counts[0] + 1, index <= n->counts[1]) + { + ki = 1; + } + else if (index -= n->counts[1] + 1, index <= n->counts[2]) + { + ki = 2; + } + else + { + index -= n->counts[2] + 1; + ki = 3; + } + + LOG ((" splitting at subtree %d\n", ki)); + sub = n->kids[ki]; + + LOG ((" splitting at child index %d\n", ki)); + + /* + * Split the node, put halves[0] on the right of the left + * one and halves[1] on the left of the right one, put the + * new node pointers in halves[0] and halves[1], and go up + * a level. + */ + sib = mknew (node234); + for (i = 0; i < 3; i++) + { + if (i + ki < 3 && n->elems[i + ki]) + { + sib->elems[i] = n->elems[i + ki]; + sib->kids[i + 1] = n->kids[i + ki + 1]; + if (sib->kids[i + 1]) + sib->kids[i + 1]->parent = sib; + sib->counts[i + 1] = n->counts[i + ki + 1]; + n->elems[i + ki] = NULL; + n->kids[i + ki + 1] = NULL; + n->counts[i + ki + 1] = 0; + } + else + { + sib->elems[i] = NULL; + sib->kids[i + 1] = NULL; + sib->counts[i + 1] = 0; + } + } + if (lparent) + { + lparent->kids[pki] = n; + lparent->counts[pki] = lcount; + n->parent = lparent; + rparent->kids[0] = sib; + rparent->counts[0] = rcount; + sib->parent = rparent; + } + else + { + halves[0] = n; + n->parent = NULL; + halves[1] = sib; + sib->parent = NULL; + } + lparent = n; + rparent = sib; + pki = ki; + LOG ((" left node %p: %p/%d \"%s\" %p/%d \"%s\" %p/%d \"%s\" %p/%d\n", + n, + n->kids[0], n->counts[0], n->elems[0], + n->kids[1], n->counts[1], n->elems[1], + n->kids[2], n->counts[2], n->elems[2], n->kids[3], n->counts[3])); + LOG ((" right node %p: %p/%d \"%s\" %p/%d \"%s\" %p/%d \"%s\" %p/%d\n", + sib, + sib->kids[0], sib->counts[0], sib->elems[0], + sib->kids[1], sib->counts[1], sib->elems[1], + sib->kids[2], sib->counts[2], sib->elems[2], + sib->kids[3], sib->counts[3])); + + n = sub; + } + + /* + * We've come off the bottom here, so we've successfully split + * the tree into two equally high subtrees. The only problem is + * that some of the nodes down the fault line will be smaller + * than the minimum permitted size. (Since this is a 2-3-4 + * tree, that means they'll be zero-element one-child nodes.) + */ + LOG ((" fell off bottom, lroot is %p, rroot is %p\n", + halves[0], halves[1])); + lparent->counts[pki] = rparent->counts[0] = 0; + lparent->kids[pki] = rparent->kids[0] = NULL; + + /* + * So now we go back down the tree from each of the two roots, + * fixing up undersize nodes. + */ + for (half = 0; half < 2; half++) + { + /* + * Remove the root if it's undersize (it will contain only + * one child pointer, so just throw it away and replace it + * with its child). This might happen several times. + */ + while (halves[half] && !halves[half]->elems[0]) + { + LOG ((" root %p is undersize, throwing away\n", halves[half])); + halves[half] = halves[half]->kids[0]; + sfree (halves[half]->parent); + halves[half]->parent = NULL; + LOG ((" new root is %p\n", halves[half])); + } + + n = halves[half]; + while (n) + { + void (*toward) (node234 * n, int ki, int *k, int *index); + int ni, merge; + + /* + * Now we have a potentially undersize node on the + * right (if half==0) or left (if half==1). Sort it + * out, by merging with a neighbour or by transferring + * subtrees over. At this time we must also ensure that + * nodes are bigger than minimum, in case we need an + * element to merge two nodes below. + */ + LOG ((" node %p: %p/%d \"%s\" %p/%d \"%s\" %p/%d \"%s\" %p/%d\n", + n, + n->kids[0], n->counts[0], n->elems[0], + n->kids[1], n->counts[1], n->elems[1], + n->kids[2], n->counts[2], n->elems[2], + n->kids[3], n->counts[3])); + if (half == 1) + { + ki = 0; /* the kid we're interested in */ + ni = 1; /* the neighbour */ + merge = 0; /* for merge: leftmost of the two */ + toward = trans234_subtree_left; + } + else + { + ki = (n->kids[3] ? 3 : n->kids[2] ? 2 : 1); + ni = ki - 1; + merge = ni; + toward = trans234_subtree_right; + } + + sub = n->kids[ki]; + if (sub && !sub->elems[1]) + { + /* + * This node is undersized or minimum-size. If we + * can merge it with its neighbour, we do so; + * otherwise we must be able to transfer subtrees + * over to it until it is greater than minimum + * size. + */ + int undersized = (!sub->elems[0]); + LOG ((" child %d is %ssize\n", ki, + undersized ? "under" : "minimum-")); + LOG ((" neighbour is %s\n", + n->kids[ni]->elems[2] ? "large" : + n->kids[ni]->elems[1] ? "medium" : "small")); + if (!n->kids[ni]->elems[1] || + (undersized && !n->kids[ni]->elems[2])) + { + /* + * Neighbour is small, or possibly neighbour is + * medium and we are undersize. + */ + trans234_subtree_merge (n, merge, NULL, NULL); + sub = n->kids[merge]; + if (!n->elems[0]) + { + /* + * n is empty, and hence must have been the + * root and needs to be removed. + */ + assert (!n->parent); + LOG ((" shifting root!\n")); + halves[half] = sub; + halves[half]->parent = NULL; + sfree (n); + } + } + else + { + /* Neighbour is big enough to move trees over. */ + toward (n, ni, NULL, NULL); + if (undersized) + toward (n, ni, NULL, NULL); + } + } + n = sub; + } + } + + t->root = halves[1]; + return halves[0]; +} + +tree234 * +splitpos234 (tree234 * t, int index, int before) +{ + tree234 *ret; + node234 *n; + int count; + + count = countnode234 (t->root); + if (index < 0 || index > count) + return NULL; /* error */ + ret = newtree234 (t->cmp); + n = split234_internal (t, index); + if (before) + { + /* We want to return the ones before the index. */ + ret->root = n; + } + else + { + /* + * We want to keep the ones before the index and return the + * ones after. + */ + ret->root = t->root; + t->root = n; + } + return ret; +} + +tree234 * +split234 (tree234 * t, void *e, cmpfn234 cmp, int rel) +{ + int before; + int index; + + assert (rel != REL234_EQ); + + if (rel == REL234_GT || rel == REL234_GE) + { + before = 1; + rel = (rel == REL234_GT ? REL234_LE : REL234_LT); + } + else + { + before = 0; + } + if (!findrelpos234 (t, e, cmp, rel, &index)) + index = 0; + + return splitpos234 (t, index + 1, before); +} + +static node234 * +copynode234 (node234 * n, copyfn234 copyfn, void *copyfnstate) +{ + int i; + node234 *n2 = mknew (node234); + + for (i = 0; i < 3; i++) + { + if (n->elems[i] && copyfn) + n2->elems[i] = copyfn (copyfnstate, n->elems[i]); + else + n2->elems[i] = n->elems[i]; + } + + for (i = 0; i < 4; i++) + { + if (n->kids[i]) + { + n2->kids[i] = copynode234 (n->kids[i], copyfn, copyfnstate); + n2->kids[i]->parent = n2; + } + else + { + n2->kids[i] = NULL; + } + n2->counts[i] = n->counts[i]; + } + + return n2; +} + +tree234 * +copytree234 (tree234 * t, copyfn234 copyfn, void *copyfnstate) +{ + tree234 *t2; + + t2 = newtree234 (t->cmp); + t2->root = copynode234 (t->root, copyfn, copyfnstate); + t2->root->parent = NULL; + + return t2; +} + +#ifdef TEST + +/* + * Test code for the 2-3-4 tree. This code maintains an alternative + * representation of the data in the tree, in an array (using the + * obvious and slow insert and delete functions). After each tree + * operation, the verify() function is called, which ensures all + * the tree properties are preserved: + * - node->child->parent always equals node + * - tree->root->parent always equals NULL + * - number of kids == 0 or number of elements + 1; + * - tree has the same depth everywhere + * - every node has at least one element + * - subtree element counts are accurate + * - any NULL kid pointer is accompanied by a zero count + * - in a sorted tree: ordering property between elements of a + * node and elements of its children is preserved + * and also ensures the list represented by the tree is the same + * list it should be. (This last check also doubly verifies the + * ordering properties, because the `same list it should be' is by + * definition correctly ordered. It also ensures all nodes are + * distinct, because the enum functions would get caught in a loop + * if not.) + */ + +#include + +#define srealloc realloc + +/* + * Error reporting function. + */ +void +error (char *fmt, ...) +{ + va_list ap; + printf ("ERROR: "); + va_start (ap, fmt); + vfprintf (stdout, fmt, ap); + va_end (ap); + printf ("\n"); +} + +/* The array representation of the data. */ +void **array; +int arraylen, arraysize; +cmpfn234 cmp; + +/* The tree representation of the same data. */ +tree234 *tree; + +/* + * Routines to provide a diagnostic printout of a tree. Currently + * relies on every element in the tree being a one-character string + * :-) + */ +typedef struct +{ + char **levels; +} +dispctx; + +int +dispnode (node234 * n, int level, dispctx * ctx) +{ + if (level == 0) + { + int xpos = strlen (ctx->levels[0]); + int len; + + if (n->elems[2]) + len = sprintf (ctx->levels[0] + xpos, " %s%s%s", + n->elems[0], n->elems[1], n->elems[2]); + else if (n->elems[1]) + len = sprintf (ctx->levels[0] + xpos, " %s%s", + n->elems[0], n->elems[1]); + else + len = sprintf (ctx->levels[0] + xpos, " %s", n->elems[0]); + return xpos + 1 + (len - 1) / 2; + } + else + { + int xpos[4], nkids; + int nodelen, mypos, myleft, x, i; + + xpos[0] = dispnode (n->kids[0], level - 3, ctx); + xpos[1] = dispnode (n->kids[1], level - 3, ctx); + nkids = 2; + if (n->kids[2]) + { + xpos[2] = dispnode (n->kids[2], level - 3, ctx); + nkids = 3; + } + if (n->kids[3]) + { + xpos[3] = dispnode (n->kids[3], level - 3, ctx); + nkids = 4; + } + + if (nkids == 4) + mypos = (xpos[1] + xpos[2]) / 2; + else if (nkids == 3) + mypos = xpos[1]; + else + mypos = (xpos[0] + xpos[1]) / 2; + nodelen = nkids * 2 - 1; + myleft = mypos - ((nodelen - 1) / 2); + assert (myleft >= xpos[0]); + assert (myleft + nodelen - 1 <= xpos[nkids - 1]); + + x = strlen (ctx->levels[level]); + while (x <= xpos[0] && x < myleft) + ctx->levels[level][x++] = ' '; + while (x < myleft) + ctx->levels[level][x++] = '_'; + if (nkids == 4) + x += sprintf (ctx->levels[level] + x, ".%s.%s.%s.", + n->elems[0], n->elems[1], n->elems[2]); + else if (nkids == 3) + x += sprintf (ctx->levels[level] + x, ".%s.%s.", + n->elems[0], n->elems[1]); + else + x += sprintf (ctx->levels[level] + x, ".%s.", n->elems[0]); + while (x < xpos[nkids - 1]) + ctx->levels[level][x++] = '_'; + ctx->levels[level][x] = '\0'; + + x = strlen (ctx->levels[level - 1]); + for (i = 0; i < nkids; i++) + { + int rpos, pos; + rpos = xpos[i]; + if (i > 0 && i < nkids - 1) + pos = myleft + 2 * i; + else + pos = rpos; + if (rpos < pos) + rpos++; + while (x < pos && x < rpos) + ctx->levels[level - 1][x++] = ' '; + if (x == pos) + ctx->levels[level - 1][x++] = '|'; + while (x < pos || x < rpos) + ctx->levels[level - 1][x++] = '_'; + if (x == pos) + ctx->levels[level - 1][x++] = '|'; + } + ctx->levels[level - 1][x] = '\0'; + + x = strlen (ctx->levels[level - 2]); + for (i = 0; i < nkids; i++) + { + int rpos = xpos[i]; + + while (x < rpos) + ctx->levels[level - 2][x++] = ' '; + ctx->levels[level - 2][x++] = '|'; + } + ctx->levels[level - 2][x] = '\0'; + + return mypos; + } +} + +void +disptree (tree234 * t) +{ + dispctx ctx; + char *leveldata; + int width = count234 (t); + int ht = height234 (t) * 3 - 2; + int i; + + if (!t->root) + { + printf ("[empty tree]\n"); + } + + leveldata = smalloc (ht * (width + 2)); + ctx.levels = smalloc (ht * sizeof (char *)); + for (i = 0; i < ht; i++) + { + ctx.levels[i] = leveldata + i * (width + 2); + ctx.levels[i][0] = '\0'; + } + + (void) dispnode (t->root, ht - 1, &ctx); + + for (i = ht; i--;) + printf ("%s\n", ctx.levels[i]); + + sfree (ctx.levels); + sfree (leveldata); +} + +typedef struct +{ + int treedepth; + int elemcount; +} +chkctx; + +int +chknode (chkctx * ctx, int level, node234 * node, + void *lowbound, void *highbound) +{ + int nkids, nelems; + int i; + int count; + + /* Count the non-NULL kids. */ + for (nkids = 0; nkids < 4 && node->kids[nkids]; nkids++); + /* Ensure no kids beyond the first NULL are non-NULL. */ + for (i = nkids; i < 4; i++) + if (node->kids[i]) + { + error ("node %p: nkids=%d but kids[%d] non-NULL", node, nkids, i); + } + else if (node->counts[i]) + { + error ("node %p: kids[%d] NULL but count[%d]=%d nonzero", + node, i, i, node->counts[i]); + } + + /* Count the non-NULL elements. */ + for (nelems = 0; nelems < 3 && node->elems[nelems]; nelems++); + /* Ensure no elements beyond the first NULL are non-NULL. */ + for (i = nelems; i < 3; i++) + if (node->elems[i]) + { + error ("node %p: nelems=%d but elems[%d] non-NULL", node, nelems, i); + } + + if (nkids == 0) + { + /* + * If nkids==0, this is a leaf node; verify that the tree + * depth is the same everywhere. + */ + if (ctx->treedepth < 0) + ctx->treedepth = level; /* we didn't know the depth yet */ + else if (ctx->treedepth != level) + error ("node %p: leaf at depth %d, previously seen depth %d", + node, level, ctx->treedepth); + } + else + { + /* + * If nkids != 0, then it should be nelems+1, unless nelems + * is 0 in which case nkids should also be 0 (and so we + * shouldn't be in this condition at all). + */ + int shouldkids = (nelems ? nelems + 1 : 0); + if (nkids != shouldkids) + { + error ("node %p: %d elems should mean %d kids but has %d", + node, nelems, shouldkids, nkids); + } + } + + /* + * nelems should be at least 1. + */ + if (nelems == 0) + { + error ("node %p: no elems", node, nkids); + } + + /* + * Add nelems to the running element count of the whole tree. + */ + ctx->elemcount += nelems; + + /* + * Check ordering property: all elements should be strictly > + * lowbound, strictly < highbound, and strictly < each other in + * sequence. (lowbound and highbound are NULL at edges of tree + * - both NULL at root node - and NULL is considered to be < + * everything and > everything. IYSWIM.) + */ + if (cmp) + { + for (i = -1; i < nelems; i++) + { + void *lower = (i == -1 ? lowbound : node->elems[i]); + void *higher = (i + 1 == nelems ? highbound : node->elems[i + 1]); + if (lower && higher && cmp (lower, higher) >= 0) + { + error ("node %p: kid comparison [%d=%s,%d=%s] failed", + node, i, lower, i + 1, higher); + } + } + } + + /* + * Check parent pointers: all non-NULL kids should have a + * parent pointer coming back to this node. + */ + for (i = 0; i < nkids; i++) + if (node->kids[i]->parent != node) + { + error ("node %p kid %d: parent ptr is %p not %p", + node, i, node->kids[i]->parent, node); + } + + + /* + * Now (finally!) recurse into subtrees. + */ + count = nelems; + + for (i = 0; i < nkids; i++) + { + void *lower = (i == 0 ? lowbound : node->elems[i - 1]); + void *higher = (i >= nelems ? highbound : node->elems[i]); + int subcount = chknode (ctx, level + 1, node->kids[i], lower, higher); + if (node->counts[i] != subcount) + { + error ("node %p kid %d: count says %d, subtree really has %d", + node, i, node->counts[i], subcount); + } + count += subcount; + } + + return count; +} + +void +verifytree (tree234 * tree, void **array, int arraylen) +{ + chkctx ctx; + int i; + void *p; + + ctx.treedepth = -1; /* depth unknown yet */ + ctx.elemcount = 0; /* no elements seen yet */ + /* + * Verify validity of tree properties. + */ + if (tree->root) + { + if (tree->root->parent != NULL) + error ("root->parent is %p should be null", tree->root->parent); + chknode (&ctx, 0, tree->root, NULL, NULL); + } + printf ("tree depth: %d\n", ctx.treedepth); + /* + * Enumerate the tree and ensure it matches up to the array. + */ + for (i = 0; NULL != (p = index234 (tree, i)); i++) + { + if (i >= arraylen) + error ("tree contains more than %d elements", arraylen); + if (array[i] != p) + error ("enum at position %d: array says %s, tree says %s", + i, array[i], p); + } + if (ctx.elemcount != i) + { + error ("tree really contains %d elements, enum gave %d", + ctx.elemcount, i); + } + if (i < arraylen) + { + error ("enum gave only %d elements, array has %d", i, arraylen); + } + i = count234 (tree); + if (ctx.elemcount != i) + { + error ("tree really contains %d elements, count234 gave %d", + ctx.elemcount, i); + } +} +void +verify (void) +{ + verifytree (tree, array, arraylen); +} + +void +internal_addtest (void *elem, int index, void *realret) +{ + int i, j; + void *retval; + + if (arraysize < arraylen + 1) + { + arraysize = arraylen + 1 + 256; + array = (array == NULL ? smalloc (arraysize * sizeof (*array)) : + srealloc (array, arraysize * sizeof (*array))); + } + + i = index; + /* now i points to the first element >= elem */ + retval = elem; /* expect elem returned (success) */ + for (j = arraylen; j > i; j--) + array[j] = array[j - 1]; + array[i] = elem; /* add elem to array */ + arraylen++; + + if (realret != retval) + { + error ("add: retval was %p expected %p", realret, retval); + } + + verify (); +} + +void +addtest (void *elem) +{ + int i; + void *realret; + + realret = add234 (tree, elem); + + i = 0; + while (i < arraylen && cmp (elem, array[i]) > 0) + i++; + if (i < arraylen && !cmp (elem, array[i])) + { + void *retval = array[i]; /* expect that returned not elem */ + if (realret != retval) + { + error ("add: retval was %p expected %p", realret, retval); + } + } + else + internal_addtest (elem, i, realret); +} + +void +addpostest (void *elem, int i) +{ + void *realret; + + realret = addpos234 (tree, elem, i); + + internal_addtest (elem, i, realret); +} + +void +delpostest (int i) +{ + int index = i; + void *elem = array[i], *ret; + + /* i points to the right element */ + while (i < arraylen - 1) + { + array[i] = array[i + 1]; + i++; + } + arraylen--; /* delete elem from array */ + + if (tree->cmp) + ret = del234 (tree, elem); + else + ret = delpos234 (tree, index); + + if (ret != elem) + { + error ("del returned %p, expected %p", ret, elem); + } + + verify (); +} + +void +deltest (void *elem) +{ + int i; + + i = 0; + while (i < arraylen && cmp (elem, array[i]) > 0) + i++; + if (i >= arraylen || cmp (elem, array[i]) != 0) + return; /* don't do it! */ + delpostest (i); +} + +/* A sample data set and test utility. Designed for pseudo-randomness, + * and yet repeatability. */ + +/* + * This random number generator uses the `portable implementation' + * given in ANSI C99 draft N869. It assumes `unsigned' is 32 bits; + * change it if not. + */ +int +randomnumber (unsigned *seed) +{ + *seed *= 1103515245; + *seed += 12345; + return ((*seed) / 65536) % 32768; +} + +int +mycmp (void *av, void *bv) +{ + char const *a = (char const *) av; + char const *b = (char const *) bv; + return strcmp (a, b); +} + +#define lenof(x) ( sizeof((x)) / sizeof(*(x)) ) + +char *strings[] = { + "0", "2", "3", "I", "K", "d", "H", "J", "Q", "N", "n", "q", "j", "i", + "7", "G", "F", "D", "b", "x", "g", "B", "e", "v", "V", "T", "f", "E", + "S", "8", "A", "k", "X", "p", "C", "R", "a", "o", "r", "O", "Z", "u", + "6", "1", "w", "L", "P", "M", "c", "U", "h", "9", "t", "5", "W", "Y", + "m", "s", "l", "4", +#if 0 + "a", "ab", "absque", "coram", "de", + "palam", "clam", "cum", "ex", "e", + "sine", "tenus", "pro", "prae", + "banana", "carrot", "cabbage", "broccoli", "onion", "zebra", + "penguin", "blancmange", "pangolin", "whale", "hedgehog", + "giraffe", "peanut", "bungee", "foo", "bar", "baz", "quux", + "murfl", "spoo", "breen", "flarn", "octothorpe", + "snail", "tiger", "elephant", "octopus", "warthog", "armadillo", + "aardvark", "wyvern", "dragon", "elf", "dwarf", "orc", "goblin", + "pixie", "basilisk", "warg", "ape", "lizard", "newt", "shopkeeper", + "wand", "ring", "amulet" +#endif +}; + +#define NSTR lenof(strings) + +void +findtest (void) +{ + static const int rels[] = { + REL234_EQ, REL234_GE, REL234_LE, REL234_LT, REL234_GT + }; + static const char *const relnames[] = { + "EQ", "GE", "LE", "LT", "GT" + }; + int i, j, rel, index; + char *p, *ret, *realret, *realret2; + int lo, hi, mid, c; + + for (i = 0; i < (int) NSTR; i++) + { + p = strings[i]; + for (j = 0; j < (int) (sizeof (rels) / sizeof (*rels)); j++) + { + rel = rels[j]; + + lo = 0; + hi = arraylen - 1; + while (lo <= hi) + { + mid = (lo + hi) / 2; + c = strcmp (p, array[mid]); + if (c < 0) + hi = mid - 1; + else if (c > 0) + lo = mid + 1; + else + break; + } + + if (c == 0) + { + if (rel == REL234_LT) + ret = (mid > 0 ? array[--mid] : NULL); + else if (rel == REL234_GT) + ret = (mid < arraylen - 1 ? array[++mid] : NULL); + else + ret = array[mid]; + } + else + { + assert (lo == hi + 1); + if (rel == REL234_LT || rel == REL234_LE) + { + mid = hi; + ret = (hi >= 0 ? array[hi] : NULL); + } + else if (rel == REL234_GT || rel == REL234_GE) + { + mid = lo; + ret = (lo < arraylen ? array[lo] : NULL); + } + else + ret = NULL; + } + + realret = findrelpos234 (tree, p, NULL, rel, &index); + if (realret != ret) + { + error ("find(\"%s\",%s) gave %s should be %s", + p, relnames[j], realret, ret); + } + if (realret && index != mid) + { + error ("find(\"%s\",%s) gave %d should be %d", + p, relnames[j], index, mid); + } + if (realret && rel == REL234_EQ) + { + realret2 = index234 (tree, index); + if (realret2 != realret) + { + error ("find(\"%s\",%s) gave %s(%d) but %d -> %s", + p, relnames[j], realret, index, index, realret2); + } + } +#if 0 + printf ("find(\"%s\",%s) gave %s(%d)\n", p, relnames[j], + realret, index); +#endif + } + } + + realret = findrelpos234 (tree, NULL, NULL, REL234_GT, &index); + if (arraylen && (realret != array[0] || index != 0)) + { + error ("find(NULL,GT) gave %s(%d) should be %s(0)", + realret, index, array[0]); + } + else if (!arraylen && (realret != NULL)) + { + error ("find(NULL,GT) gave %s(%d) should be NULL", realret, index); + } + + realret = findrelpos234 (tree, NULL, NULL, REL234_LT, &index); + if (arraylen && (realret != array[arraylen - 1] || index != arraylen - 1)) + { + error ("find(NULL,LT) gave %s(%d) should be %s(0)", + realret, index, array[arraylen - 1]); + } + else if (!arraylen && (realret != NULL)) + { + error ("find(NULL,LT) gave %s(%d) should be NULL", realret, index); + } +} + +void +splittest (tree234 * tree, void **array, int arraylen) +{ + int i; + tree234 *tree3, *tree4; + for (i = 0; i <= arraylen; i++) + { + tree3 = copytree234 (tree, NULL, NULL); + tree4 = splitpos234 (tree3, i, 0); + verifytree (tree3, array, i); + verifytree (tree4, array + i, arraylen - i); + join234 (tree3, tree4); + freetree234 (tree4); /* left empty by join */ + verifytree (tree3, array, arraylen); + freetree234 (tree3); + } +} + +int +main (void) +{ + int in[NSTR]; + int i, j, k; + int tworoot, tmplen; + unsigned seed = 0; + tree234 *tree2, *tree3, *tree4; + int c; + + setvbuf (stdout, NULL, _IOLBF, 0); + + for (i = 0; i < (int) NSTR; i++) + in[i] = 0; + array = NULL; + arraylen = arraysize = 0; + tree = newtree234 (mycmp); + cmp = mycmp; + + verify (); + for (i = 0; i < 10000; i++) + { + j = randomnumber (&seed); + j %= NSTR; + printf ("trial: %d\n", i); + if (in[j]) + { + printf ("deleting %s (%d)\n", strings[j], j); + deltest (strings[j]); + in[j] = 0; + } + else + { + printf ("adding %s (%d)\n", strings[j], j); + addtest (strings[j]); + in[j] = 1; + } + disptree (tree); + findtest (); + } + + while (arraylen > 0) + { + j = randomnumber (&seed); + j %= arraylen; + deltest (array[j]); + } + + freetree234 (tree); + + /* + * Now try an unsorted tree. We don't really need to test + * delpos234 because we know del234 is based on it, so it's + * already been tested in the above sorted-tree code; but for + * completeness we'll use it to tear down our unsorted tree + * once we've built it. + */ + tree = newtree234 (NULL); + cmp = NULL; + verify (); + for (i = 0; i < 1000; i++) + { + printf ("trial: %d\n", i); + j = randomnumber (&seed); + j %= NSTR; + k = randomnumber (&seed); + k %= count234 (tree) + 1; + printf ("adding string %s at index %d\n", strings[j], k); + addpostest (strings[j], k); + } + + /* + * While we have this tree in its full form, we'll take a copy + * of it to use in split and join testing. + */ + tree2 = copytree234 (tree, NULL, NULL); + verifytree (tree2, array, arraylen); /* check the copy is accurate */ + /* + * Split tests. Split the tree at every possible point and + * check the resulting subtrees. + */ + tworoot = (!tree2->root->elems[1]); /* see if it has a 2-root */ + splittest (tree2, array, arraylen); + /* + * Now do the split test again, but on a tree that has a 2-root + * (if the previous one didn't) or doesn't (if the previous one + * did). + */ + tmplen = arraylen; + while ((!tree2->root->elems[1]) == tworoot) + { + delpos234 (tree2, --tmplen); + } + printf ("now trying splits on second tree\n"); + splittest (tree2, array, tmplen); + freetree234 (tree2); + + /* + * Back to the main testing of uncounted trees. + */ + while (count234 (tree) > 0) + { + printf ("cleanup: tree size %d\n", count234 (tree)); + j = randomnumber (&seed); + j %= count234 (tree); + printf ("deleting string %s from index %d\n", (char *) array[j], j); + delpostest (j); + } + freetree234 (tree); + + /* + * Finally, do some testing on split/join on _sorted_ trees. At + * the same time, we'll be testing split on very small trees. + */ + tree = newtree234 (mycmp); + cmp = mycmp; + arraylen = 0; + for (i = 0; i < 16; i++) + { + addtest (strings[i]); + tree2 = copytree234 (tree, NULL, NULL); + splittest (tree2, array, arraylen); + freetree234 (tree2); + } + freetree234 (tree); + + /* + * Test silly cases of join: join(emptytree, emptytree), and + * also ensure join correctly spots when sorted trees fail the + * ordering constraint. + */ + tree = newtree234 (mycmp); + tree2 = newtree234 (mycmp); + tree3 = newtree234 (mycmp); + tree4 = newtree234 (mycmp); + assert (mycmp (strings[0], strings[1]) < 0); /* just in case :-) */ + add234 (tree2, strings[1]); + add234 (tree4, strings[0]); + array[0] = strings[0]; + array[1] = strings[1]; + verifytree (tree, array, 0); + verifytree (tree2, array + 1, 1); + verifytree (tree3, array, 0); + verifytree (tree4, array, 1); + + /* + * So: + * - join(tree,tree3) should leave both tree and tree3 unchanged. + * - joinr(tree,tree2) should leave both tree and tree2 unchanged. + * - join(tree4,tree3) should leave both tree3 and tree4 unchanged. + * - join(tree, tree2) should move the element from tree2 to tree. + * - joinr(tree4, tree3) should move the element from tree4 to tree3. + * - join(tree,tree3) should return NULL and leave both unchanged. + * - join(tree3,tree) should work and create a bigger tree in tree3. + */ + assert (tree == join234 (tree, tree3)); + verifytree (tree, array, 0); + verifytree (tree3, array, 0); + assert (tree2 == join234r (tree, tree2)); + verifytree (tree, array, 0); + verifytree (tree2, array + 1, 1); + assert (tree4 == join234 (tree4, tree3)); + verifytree (tree3, array, 0); + verifytree (tree4, array, 1); + assert (tree == join234 (tree, tree2)); + verifytree (tree, array + 1, 1); + verifytree (tree2, array, 0); + assert (tree3 == join234r (tree4, tree3)); + verifytree (tree3, array, 1); + verifytree (tree4, array, 0); + assert (NULL == join234 (tree, tree3)); + verifytree (tree, array + 1, 1); + verifytree (tree3, array, 1); + assert (tree3 == join234 (tree3, tree)); + verifytree (tree3, array, 2); + verifytree (tree, array, 0); + + return 0; +} + +#endif + +#if 0 /* sorted list of strings might be useful */ +{ +"0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "A", "B", "C", "D", "E", + "F", "G", "H", "I", "J", "K", "L", "M", "N", "O", "P", "Q", "R", "S", + "T", "U", "V", "W", "X", "Y", "Z", "a", "b", "c", "d", "e", "f", "g", + "h", "i", "j", "k", "l", "m", "n", "o", "p", "q", "r", "s", "t", "u", + "v", "w", "x",} +#endif diff --git a/Docs/src/bin/halibut/tree234.h b/Docs/src/bin/halibut/tree234.h new file mode 100644 index 00000000..44163ff7 --- /dev/null +++ b/Docs/src/bin/halibut/tree234.h @@ -0,0 +1,203 @@ +/* + * tree234.h: header defining functions in tree234.c. + * + * This file is copyright 1999-2001 Simon Tatham. + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL SIMON TATHAM BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF + * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef TREE234_H +#define TREE234_H + +/* + * This typedef is opaque outside tree234.c itself. + */ +typedef struct tree234_Tag tree234; + +typedef int (*cmpfn234) (void *, void *); + +typedef void *(*copyfn234) (void *state, void *element); + +/* + * Create a 2-3-4 tree. If `cmp' is NULL, the tree is unsorted, and + * lookups by key will fail: you can only look things up by numeric + * index, and you have to use addpos234() and delpos234(). + */ +tree234 *newtree234 (cmpfn234 cmp); + +/* + * Free a 2-3-4 tree (not including freeing the elements). + */ +void freetree234 (tree234 * t); + +/* + * Add an element e to a sorted 2-3-4 tree t. Returns e on success, + * or if an existing element compares equal, returns that. + */ +void *add234 (tree234 * t, void *e); + +/* + * Add an element e to an unsorted 2-3-4 tree t. Returns e on + * success, NULL on failure. (Failure should only occur if the + * index is out of range or the tree is sorted.) + * + * Index range can be from 0 to the tree's current element count, + * inclusive. + */ +void *addpos234 (tree234 * t, void *e, int index); + +/* + * Look up the element at a given numeric index in a 2-3-4 tree. + * Returns NULL if the index is out of range. + * + * One obvious use for this function is in iterating over the whole + * of a tree (sorted or unsorted): + * + * for (i = 0; (p = index234(tree, i)) != NULL; i++) consume(p); + * + * or + * + * int maxcount = count234(tree); + * for (i = 0; i < maxcount; i++) { + * p = index234(tree, i); + * assert(p != NULL); + * consume(p); + * } + */ +void *index234 (tree234 * t, int index); + +/* + * Find an element e in a sorted 2-3-4 tree t. Returns NULL if not + * found. e is always passed as the first argument to cmp, so cmp + * can be an asymmetric function if desired. cmp can also be passed + * as NULL, in which case the compare function from the tree proper + * will be used. + * + * Three of these functions are special cases of findrelpos234. The + * non-`pos' variants lack the `index' parameter: if the parameter + * is present and non-NULL, it must point to an integer variable + * which will be filled with the numeric index of the returned + * element. + * + * The non-`rel' variants lack the `relation' parameter. This + * parameter allows you to specify what relation the element you + * provide has to the element you're looking for. This parameter + * can be: + * + * REL234_EQ - find only an element that compares equal to e + * REL234_LT - find the greatest element that compares < e + * REL234_LE - find the greatest element that compares <= e + * REL234_GT - find the smallest element that compares > e + * REL234_GE - find the smallest element that compares >= e + * + * Non-`rel' variants assume REL234_EQ. + * + * If `rel' is REL234_GT or REL234_LT, the `e' parameter may be + * NULL. In this case, REL234_GT will return the smallest element + * in the tree, and REL234_LT will return the greatest. This gives + * an alternative means of iterating over a sorted tree, instead of + * using index234: + * + * // to loop forwards + * for (p = NULL; (p = findrel234(tree, p, NULL, REL234_GT)) != NULL ;) + * consume(p); + * + * // to loop backwards + * for (p = NULL; (p = findrel234(tree, p, NULL, REL234_LT)) != NULL ;) + * consume(p); + */ +enum +{ + REL234_EQ, REL234_LT, REL234_LE, REL234_GT, REL234_GE +}; +void *find234 (tree234 * t, void *e, cmpfn234 cmp); +void *findrel234 (tree234 * t, void *e, cmpfn234 cmp, int relation); +void *findpos234 (tree234 * t, void *e, cmpfn234 cmp, int *index); +void *findrelpos234 (tree234 * t, void *e, cmpfn234 cmp, int relation, + int *index); + +/* + * Delete an element e in a 2-3-4 tree. Does not free the element, + * merely removes all links to it from the tree nodes. + * + * delpos234 deletes the element at a particular tree index: it + * works on both sorted and unsorted trees. + * + * del234 deletes the element passed to it, so it only works on + * sorted trees. (It's equivalent to using findpos234 to determine + * the index of an element, and then passing that index to + * delpos234.) + * + * Both functions return a pointer to the element they delete, for + * the user to free or pass on elsewhere or whatever. If the index + * is out of range (delpos234) or the element is already not in the + * tree (del234) then they return NULL. + */ +void *del234 (tree234 * t, void *e); +void *delpos234 (tree234 * t, int index); + +/* + * Return the total element count of a tree234. + */ +int count234 (tree234 * t); + +/* + * Split a tree234 into two valid tree234s. + * + * splitpos234 splits at a given index. If `before' is TRUE, the + * items at and after that index are left in t and the ones before + * are returned; if `before' is FALSE, the items before that index + * are left in t and the rest are returned. + * + * split234 splits at a given key. You can pass any of the + * relations used with findrel234, except for REL234_EQ. The items + * in the tree that satisfy the relation are returned; the + * remainder are left. + */ +tree234 *splitpos234 (tree234 * t, int index, int before); +tree234 *split234 (tree234 * t, void *e, cmpfn234 cmp, int rel); + +/* + * Join two tree234s together into a single one. + * + * All the elements in t1 are placed to the left of all the + * elements in t2. If the trees are sorted, there will be a test to + * ensure that this satisfies the ordering criterion, and NULL will + * be returned otherwise. If the trees are unsorted, there is no + * restriction on the use of join234. + * + * The tree returned is t1 (join234) or t2 (join234r), if the + * operation is successful. + */ +tree234 *join234 (tree234 * t1, tree234 * t2); +tree234 *join234r (tree234 * t1, tree234 * t2); + +/* + * Make a complete copy of a tree234. Element pointers will be + * reused unless copyfn is non-NULL, in which case it will be used + * to copy each element. (copyfn takes two `void *' parameters; the + * first is private state and the second is the element. A simple + * copy routine probably won't need private state.) + */ +tree234 *copytree234 (tree234 * t, copyfn234 copyfn, void *copyfnstate); + +#endif /* TREE234_H */ diff --git a/Docs/src/bin/halibut/ustring.c b/Docs/src/bin/halibut/ustring.c new file mode 100644 index 00000000..f2a84531 --- /dev/null +++ b/Docs/src/bin/halibut/ustring.c @@ -0,0 +1,216 @@ +/* + * ustring.c: Unicode string routines + */ + +#include +#include +#include "halibut.h" + +wchar_t * +ustrdup (wchar_t * s) +{ + wchar_t *r; + if (s) + { + r = mknewa (wchar_t, 1 + ustrlen (s)); + ustrcpy (r, s); + } + else + { + r = mknew (wchar_t); + *r = 0; + } + return r; +} + +char * +ustrtoa (wchar_t * s, char *outbuf, int size) +{ + char *p; + if (!s) + { + *outbuf = '\0'; + return outbuf; + } + for (p = outbuf; *s && p < outbuf + size; p++, s++) + *p = *s; + if (p < outbuf + size) + *p = '\0'; + else + outbuf[size - 1] = '\0'; + return outbuf; +} + +int +ustrlen (wchar_t * s) +{ + int len = 0; + while (*s++) + len++; + return len; +} + +wchar_t * +uadv (wchar_t * s) +{ + return s + 1 + ustrlen (s); +} + +wchar_t * +ustrcpy (wchar_t * dest, wchar_t * source) +{ + wchar_t *ret = dest; + do + { + *dest++ = *source; + } + while (*source++); + return ret; +} + +int +ustrcmp (wchar_t * lhs, wchar_t * rhs) +{ + if (!lhs && !rhs) + return 0; + if (!lhs) + return -1; + if (!rhs) + return +1; + while (*lhs && *rhs && *lhs == *rhs) + lhs++, rhs++; + if (*lhs < *rhs) + return -1; + else if (*lhs > *rhs) + return 1; + return 0; +} + +wchar_t +utolower (wchar_t c) +{ + if (c == L'\0') + return c; /* this property needed by ustricmp */ + /* FIXME: this doesn't even come close */ + if (c >= 'A' && c <= 'Z') + c += 'a' - 'A'; + return c; +} + +int +ustricmp (wchar_t * lhs, wchar_t * rhs) +{ + wchar_t lc, rc; + while ((lc = utolower (*lhs)) == (rc = utolower (*rhs)) && lc && rc) + lhs++, rhs++; + if (!lc && !rc) + return 0; + if (lc < rc) + return -1; + else + return 1; +} + +wchar_t * +ustrlow (wchar_t * s) +{ + wchar_t *p = s; + while (*p) + { + *p = utolower (*p); + p++; + } + return s; +} + +int +utoi (wchar_t * s) +{ + int sign = +1; + int n; + + if (*s == L'-') + { + s++; + sign = -1; + } + + n = 0; + while (*s && *s >= L'0' && *s <= L'9') + { + n *= 10; + n += (*s - '0'); + s++; + } + + return n; +} + +int +utob (wchar_t * s) +{ + if (!ustricmp (s, L"yes") || !ustricmp (s, L"y") || + !ustricmp (s, L"true") || !ustricmp (s, L"t")) + return TRUE; + return FALSE; +} + +int +uisdigit (wchar_t c) +{ + return c >= L'0' && c <= L'9'; +} + +#define USTRFTIME_DELTA 128 +wchar_t * +ustrftime (wchar_t * wfmt, struct tm * timespec) +{ + void *blk = NULL; + wchar_t *wblk, *wp; + char *fmt, *text, *p; + size_t size = 0; + size_t len; + + /* + * strftime has the entertaining property that it returns 0 + * _either_ on out-of-space _or_ on successful generation of + * the empty string. Hence we must ensure our format can never + * generate the empty string. Somebody throw a custard pie at + * whoever was responsible for that. Please? + */ + if (wfmt) + { + len = ustrlen (wfmt); + fmt = mknewa (char, 2 + len); + ustrtoa (wfmt, fmt + 1, len + 1); + fmt[0] = ' '; + } + else + fmt = " %c"; + + while (1) + { + size += USTRFTIME_DELTA; + blk = resize ((char *) blk, size); + len = strftime ((char *) blk, size - 1, fmt, timespec); + if (len > 0) + break; + } + + /* Note: +1 for the terminating 0, -1 for the initial space in fmt */ + wblk = resize ((wchar_t *) blk, len); + text = mknewa (char, len); + strftime (text, len, fmt + 1, timespec); + /* + * We operate in the C locale, so this all ought to be kosher + * ASCII. If we ever move outside ASCII machines, we may need + * to make this more portable... + */ + for (wp = wblk, p = text; *p; p++, wp++) + *wp = *p; + *wp = 0; + if (wfmt) + sfree (fmt); + sfree (text); + return wblk; +} diff --git a/Docs/src/bin/halibut/version.c b/Docs/src/bin/halibut/version.c new file mode 100644 index 00000000..38fbca1e --- /dev/null +++ b/Docs/src/bin/halibut/version.c @@ -0,0 +1,13 @@ +/* + * version.c: version string + */ + +#include + +#ifndef VERSION +#define VER "anonymous build (" __DATE__ " " __TIME__ ")" +#else +#define VER "version " VERSION +#endif + +const char *const version = VER; diff --git a/Docs/src/bin/halibut/winhelp.c b/Docs/src/bin/halibut/winhelp.c new file mode 100644 index 00000000..6beac093 --- /dev/null +++ b/Docs/src/bin/halibut/winhelp.c @@ -0,0 +1,2272 @@ +/* + * winhelp.c a module to generate Windows .HLP files + * + * Documentation of the .HLP file format comes from the excellent + * HELPFILE.TXT, published alongside the Help decompiler HELPDECO + * by Manfred Winterhoff. This code would not have been possible + * without his efforts. Many thanks. + */ + +/* + * Potential future features: + * + * - perhaps LZ77 compression? This appears to cause a phase order + * problem: it's hard to do the compression until the data to be + * compressed is finalised, and yet you can't finalise the data + * to be compressed until you know how much of it is going into + * which TOPICBLOCK in order to work out the offsets in the + * topic headers - for which you have to have already done the + * compression. Perhaps the thing to do is to implement an LZ77 + * compressor that can guarantee to leave particular bytes in + * the stream as literals, and then go back and fix the offsets + * up later. Not pleasant. + * + * - It would be good to find out what relation (if any) the LCID + * record in the |SYSTEM section bears to the codepage used in + * the actual help text, so as to be able to vary that if the + * user needs it. For the moment I suspect we're stuck with + * Win1252. + * + * - tables might be nice. + * + * Unlikely future features: + * + * - Phrase compression sounds harder. It's reasonably easy + * (though space-costly) to analyse all the text in the file to + * determine the one key phrase which would save most space if + * replaced by a reference everywhere it appears; but finding + * the _1024_ most effective phrases seems much harder since a + * naive analysis might find lots of phrases that all overlap + * (so you wouldn't get the saving you expected, as after taking + * out the first phrase the rest would never crop up). In + * addition, MS hold US patent number 4955066 which may cover + * phrase compression, so perhaps it's best just to leave it. + * + * Cleanup work: + * + * - sort out begin_topic. Ideally we should have a separate + * topic_macro function that adds to the existing linkdata for + * the topic, because that's more flexible than a variadic + * function. This will be fiddly, though: if it's called before + * whlp_begin_topic then we must buffer macros, and if it's + * called afterwards then we must be able to go back and modify + * the linkdata2 of the topic start block. Foo. + * + * - find out what should happen if a single topiclink crosses + * _two_ topicblock boundaries. + * + * - What is the BlockSize in a topic header (first 4 bytes of + * LinkData1 in a type 2 record) supposed to mean? How on earth + * is it measured? The help file doesn't become perceptibly + * corrupt if I frob it randomly; and on some occasions taking a + * bit _out_ of the help file _increases_ that value. I have a + * feeling it's completely made up and/or vestigial, so for the + * moment I'm just making up a plausible value as I go along. + */ + +#include +#include +#include +#include +#include +#include + +#include "halibut.h" +#include "winhelp.h" +#include "tree234.h" + +#ifdef TESTMODE +/* + * This lot is useful for testing. Something like it will also be + * needed to use this module standalone. + */ +#define smalloc malloc +#define srealloc realloc +#define sfree free +#define mknew(type) ( (type *) smalloc (sizeof (type)) ) +#define mknewa(type, number) ( (type *) smalloc ((number) * sizeof (type)) ) +#define resize(array, len) ( srealloc ((array), (len) * sizeof (*(array))) ) +#define lenof(array) ( sizeof(array) / sizeof(*(array)) ) +char * +dupstr (char *s) +{ + char *r = mknewa (char, 1 + strlen (s)); + strcpy (r, s); + return r; +} +#endif + +#define UNUSEDARG(x) ( (x) = (x) ) + +#define GET_32BIT_LSB_FIRST(cp) \ + (((unsigned long)(unsigned char)(cp)[0]) | \ + ((unsigned long)(unsigned char)(cp)[1] << 8) | \ + ((unsigned long)(unsigned char)(cp)[2] << 16) | \ + ((unsigned long)(unsigned char)(cp)[3] << 24)) + +#define PUT_32BIT_LSB_FIRST(cp, value) do { \ + (cp)[0] = 0xFF & (value); \ + (cp)[1] = 0xFF & ((value) >> 8); \ + (cp)[2] = 0xFF & ((value) >> 16); \ + (cp)[3] = 0xFF & ((value) >> 24); } while (0) + +#define GET_16BIT_LSB_FIRST(cp) \ + (((unsigned long)(unsigned char)(cp)[0]) | \ + ((unsigned long)(unsigned char)(cp)[1] << 8)) + +#define PUT_16BIT_LSB_FIRST(cp, value) do { \ + (cp)[0] = 0xFF & (value); \ + (cp)[1] = 0xFF & ((value) >> 8); } while (0) + +#define MAX_PAGE_SIZE 0x800 /* max page size in any B-tree */ +#define TOPIC_BLKSIZE 4096 /* implied by version/flags combo */ + +typedef struct WHLP_TOPIC_tag context; + +struct file +{ + char *name; /* file name, will need freeing */ + unsigned char *data; /* file data, will need freeing */ + int pos; /* position for adding data */ + int len; /* # of meaningful bytes in data */ + int size; /* # of allocated bytes in data */ + int fileoffset; /* offset in the real .HLP file */ +}; + +struct indexrec +{ + char *term; /* index term, will need freeing */ + context *topic; /* topic it links to */ + int count, offset; /* used when building |KWDATA */ +}; + +struct topiclink +{ + int topicoffset, topicpos; /* for referencing from elsewhere */ + int recordtype; + int len1, len2; + unsigned char *data1, *data2; + context *context; + struct topiclink *nonscroll, *scroll, *nexttopic; + int block_size; /* for the topic header - *boggle* */ +}; + +struct WHLP_TOPIC_tag +{ + char *name; /* needs freeing */ + unsigned long hash; + struct topiclink *link; /* this provides TOPICOFFSET */ + context *browse_next, *browse_prev; + char *title; /* needs freeing */ + int index; /* arbitrary number */ +}; + +struct fontdesc +{ + char *font; + int family, rendition, halfpoints; + int r, g, b; +}; + +struct WHLP_tag +{ + tree234 *files; /* stores `struct file' */ + tree234 *pre_contexts; /* stores `context' */ + tree234 *contexts; /* also stores `context' */ + tree234 *titles; /* _also_ stores `context' */ + tree234 *text; /* stores `struct topiclink' */ + tree234 *index; /* stores `struct indexrec' */ + tree234 *tabstops; /* stores `int' */ + tree234 *fontnames; /* stores `char *' */ + tree234 *fontdescs; /* stores `struct fontdesc' */ + struct file *systemfile; /* the |SYSTEM internal file */ + context *ptopic; /* primary topic */ + struct topiclink *prevtopic; /* to link type-2 records together */ + struct topiclink *link; /* while building a topiclink */ + unsigned char linkdata1[TOPIC_BLKSIZE]; /* while building a topiclink */ + unsigned char linkdata2[TOPIC_BLKSIZE]; /* while building a topiclink */ + int topicblock_remaining; /* while building |TOPIC section */ + int lasttopiclink; /* while building |TOPIC section */ + int firsttopiclink_offset; /* while building |TOPIC section */ + int lasttopicstart; /* while building |TOPIC section */ + int para_flags; + int para_attrs[7]; + int ncontexts; +}; + +/* Functions to return the index and leaf data for B-tree contents. */ +typedef int (*bt_index_fn) (const void *item, unsigned char *outbuf); +typedef int (*bt_leaf_fn) (const void *item, unsigned char *outbuf); + +/* Forward references. */ +static void whlp_para_reset (WHLP h); +static struct file *whlp_new_file (WHLP h, char *name); +static void whlp_file_add (struct file *f, const void *data, int len); +static void whlp_file_add_char (struct file *f, int data); +static void whlp_file_add_short (struct file *f, int data); +static void whlp_file_add_long (struct file *f, int data); +static void whlp_file_fill (struct file *f, int len); +static void whlp_file_seek (struct file *f, int pos, int whence); +static int whlp_file_offset (struct file *f); + +/* ---------------------------------------------------------------------- + * Fiddly little functions: B-tree compare, index and leaf functions. + */ + +/* The master index maps file names to help-file offsets. */ + +static int +filecmp (void *av, void *bv) +{ + const struct file *a = (const struct file *) av; + const struct file *b = (const struct file *) bv; + return strcmp (a->name, b->name); +} + +static int +fileindex (const void *av, unsigned char *outbuf) +{ + const struct file *a = (const struct file *) av; + int len = 1 + strlen (a->name); + memcpy (outbuf, a->name, len); + return len; +} + +static int +fileleaf (const void *av, unsigned char *outbuf) +{ + const struct file *a = (const struct file *) av; + int len = 1 + strlen (a->name); + memcpy (outbuf, a->name, len); + PUT_32BIT_LSB_FIRST (outbuf + len, a->fileoffset); + return len + 4; +} + +/* The |CONTEXT internal file maps help context hashes to TOPICOFFSETs. */ + +static int +ctxcmp (void *av, void *bv) +{ + const context *a = (const context *) av; + const context *b = (const context *) bv; + if ((signed long) a->hash < (signed long) b->hash) + return -1; + if ((signed long) a->hash > (signed long) b->hash) + return +1; + return 0; +} + +static int +ctxindex (const void *av, unsigned char *outbuf) +{ + const context *a = (const context *) av; + PUT_32BIT_LSB_FIRST (outbuf, a->hash); + return 4; +} + +static int +ctxleaf (const void *av, unsigned char *outbuf) +{ + const context *a = (const context *) av; + PUT_32BIT_LSB_FIRST (outbuf, a->hash); + PUT_32BIT_LSB_FIRST (outbuf + 4, a->link->topicoffset); + return 8; +} + +/* The |TTLBTREE internal file maps TOPICOFFSETs to title strings. */ + +static int +ttlcmp (void *av, void *bv) +{ + const context *a = (const context *) av; + const context *b = (const context *) bv; + if (a->link->topicoffset < b->link->topicoffset) + return -1; + if (a->link->topicoffset > b->link->topicoffset) + return +1; + return 0; +} + +static int +ttlindex (const void *av, unsigned char *outbuf) +{ + const context *a = (const context *) av; + PUT_32BIT_LSB_FIRST (outbuf, a->link->topicoffset); + return 4; +} + +static int +ttlleaf (const void *av, unsigned char *outbuf) +{ + const context *a = (const context *) av; + int slen; + PUT_32BIT_LSB_FIRST (outbuf, a->link->topicoffset); + slen = 1 + strlen (a->title); + memcpy (outbuf + 4, a->title, slen); + return 4 + slen; +} + +/* The |KWBTREE internal file maps index strings to TOPICOFFSETs. */ + +static int +idxcmp (void *av, void *bv) +{ + const struct indexrec *a = (const struct indexrec *) av; + const struct indexrec *b = (const struct indexrec *) bv; + int cmp; + if ((cmp = strcmp (a->term, b->term)) != 0) + return cmp; + /* Now sort on the index field of the topics. */ + if (a->topic->index < b->topic->index) + return -1; + if (a->topic->index > b->topic->index) + return +1; + return 0; +} + +static int +idxindex (const void *av, unsigned char *outbuf) +{ + const struct indexrec *a = (const struct indexrec *) av; + int len = 1 + strlen (a->term); + memcpy (outbuf, a->term, len); + return len; +} + +static int +idxleaf (const void *av, unsigned char *outbuf) +{ + const struct indexrec *a = (const struct indexrec *) av; + int len = 1 + strlen (a->term); + memcpy (outbuf, a->term, len); + PUT_16BIT_LSB_FIRST (outbuf + len, a->count); + PUT_32BIT_LSB_FIRST (outbuf + len + 2, a->offset); + return len + 6; +} + +/* + * The internal `tabstops' B-tree stores pointers-to-int. Sorting + * is by the low 16 bits of the number (above that is flags). + */ + +static int +tabcmp (void *av, void *bv) +{ + const int *a = (const int *) av; + const int *b = (const int *) bv; + if ((*a & 0xFFFF) < (*b & 0xFFFF)) + return -1; + if ((*a & 0xFFFF) > (*b & 0xFFFF)) + return +1; + return 0; +} + +/* The internal `fontnames' B-tree stores strings. */ +static int +fontcmp (void *av, void *bv) +{ + const char *a = (const char *) av; + const char *b = (const char *) bv; + return strcmp (a, b); +} + +/* ---------------------------------------------------------------------- + * Manage help contexts and topics. + */ + +/* + * This is the code to compute the hash of a context name. Copied + * straight from Winterhoff's documentation. + */ +static unsigned long +context_hash (char *context) +{ + signed char bytemapping[256] = + "\x00\xD1\xD2\xD3\xD4\xD5\xD6\xD7\xD8\xD9\xDA\xDB\xDC\xDD\xDE\xDF" + "\xE0\xE1\xE2\xE3\xE4\xE5\xE6\xE7\xE8\xE9\xEA\xEB\xEC\xED\xEE\xEF" + "\xF0\x0B\xF2\xF3\xF4\xF5\xF6\xF7\xF8\xF9\xFA\xFB\xFC\xFD\x0C\xFF" + "\x0A\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0A\x0B\x0C\x0D\x0E\x0F" + "\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F" + "\x20\x21\x22\x23\x24\x25\x26\x27\x28\x29\x2A\x0B\x0C\x0D\x0E\x0D" + "\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F" + "\x20\x21\x22\x23\x24\x25\x26\x27\x28\x29\x2A\x2B\x2C\x2D\x2E\x2F" + "\x50\x51\x52\x53\x54\x55\x56\x57\x58\x59\x5A\x5B\x5C\x5D\x5E\x5F" + "\x60\x61\x62\x63\x64\x65\x66\x67\x68\x69\x6A\x6B\x6C\x6D\x6E\x6F" + "\x70\x71\x72\x73\x74\x75\x76\x77\x78\x79\x7A\x7B\x7C\x7D\x7E\x7F" + "\x80\x81\x82\x83\x0B\x85\x86\x87\x88\x89\x8A\x8B\x8C\x8D\x8E\x8F" + "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9A\x9B\x9C\x9D\x9E\x9F" + "\xA0\xA1\xA2\xA3\xA4\xA5\xA6\xA7\xA8\xA9\xAA\xAB\xAC\xAD\xAE\xAF" + "\xB0\xB1\xB2\xB3\xB4\xB5\xB6\xB7\xB8\xB9\xBA\xBB\xBC\xBD\xBE\xBF" + "\xC0\xC1\xC2\xC3\xC4\xC5\xC6\xC7\xC8\xC9\xCA\xCB\xCC\xCD\xCE\xCF"; + unsigned long hash; + + /* Sanity check the size of unsigned long */ + enum + { assertion = 1 / (((unsigned long) 0xFFFFFFFF) + 2 == (unsigned long) 1) + }; + + /* + * The hash algorithm starts the hash at 0 and updates it with + * each character. Therefore, logically, the hash of an empty + * string should be 0 (it starts at 0 and is never updated); + * but Winterhoff says it is in fact 1. Shouldn't matter, since + * I never plan to use empty context names, but I'll stick the + * special case in here anyway. + */ + if (!*context) + return 1; + + /* + * Now compute the hash in the normal way. + */ + hash = 0; + while (*context) + { + hash = hash * 43 + bytemapping[(unsigned char) *context]; + context++; + } + return hash; +} + +WHLP_TOPIC +whlp_register_topic (WHLP h, char *context_name, char **clash) +{ + context *ctx = mknew (context); + context *otherctx; + + /* + * Index contexts in order of creation, just so there's some + * sort of non-arbitrary ordering in the index B-tree. Call me + * fussy, but I don't like indexing on pointer values because I + * prefer the code to be deterministic when run under different + * C libraries. + */ + ctx->index = h->ncontexts++; + ctx->browse_prev = ctx->browse_next = NULL; + + if (context_name) + { + /* + * We have a context name, which means we can put this + * context straight into the `contexts' tree. + */ + ctx->name = dupstr (context_name); + ctx->hash = context_hash (context_name); + otherctx = add234 (h->contexts, ctx); + if (otherctx != ctx) + { + /* + * Hash clash. Destroy the new context and return NULL, + * providing the clashing string. + */ + sfree (ctx->name); + sfree (ctx); + if (clash) + *clash = otherctx->name; + return NULL; + } + } + else + { + /* + * We have no context name yet. Enter this into the + * pre_contexts tree of anonymous topics, which we will go + * through later and allocate unique context names and hash + * values. + */ + ctx->name = NULL; + addpos234 (h->pre_contexts, ctx, count234 (h->pre_contexts)); + } + return ctx; +} + +void +whlp_prepare (WHLP h) +{ + /* + * We must go through pre_contexts and allocate a context ID to + * each anonymous context, making sure it doesn't clash with + * the existing contexts. + * + * Our own context IDs will just be of the form `t00000001', + * and we'll increment the number each time and skip over any + * IDs that clash with existing context names. + */ + int ctx_num = 0; + context *ctx, *otherctx; + + while ((ctx = index234 (h->pre_contexts, 0)) != NULL) + { + delpos234 (h->pre_contexts, 0); + ctx->name = mknewa (char, 20); + do + { + sprintf (ctx->name, "t%08d", ctx_num++); + ctx->hash = context_hash (ctx->name); + otherctx = add234 (h->contexts, ctx); + } + while (otherctx != ctx); + } + + /* + * Ensure paragraph attributes are clear for the start of text + * output. + */ + whlp_para_reset (h); +} + +char * +whlp_topic_id (WHLP_TOPIC topic) +{ + return topic->name; +} + +void +whlp_begin_topic (WHLP h, WHLP_TOPIC topic, char *title, ...) +{ + struct topiclink *link = mknew (struct topiclink); + int len, slen; + char *macro; + va_list ap; + + link->nexttopic = NULL; + if (h->prevtopic) + h->prevtopic->nexttopic = link; + h->prevtopic = link; + + link->nonscroll = link->scroll = NULL; + link->context = topic; + link->block_size = 0; + + link->recordtype = 2; /* topic header */ + link->len1 = 4 * 7; /* standard linkdata1 size */ + link->data1 = mknewa (unsigned char, link->len1); + + slen = strlen (title); + assert (slen + 1 <= TOPIC_BLKSIZE); + memcpy (h->linkdata2, title, slen + 1); + len = slen + 1; + + va_start (ap, title); + while ((macro = va_arg (ap, char *)) != NULL) + { + slen = strlen (macro); + assert (len + slen + 1 <= TOPIC_BLKSIZE); + memcpy (h->linkdata2 + len, macro, slen + 1); + len += slen + 1; + } + va_end (ap); + len--; /* lose the last \0 on the last macro */ + + link->len2 = len; + link->data2 = mknewa (unsigned char, link->len2); + memcpy (link->data2, h->linkdata2, link->len2); + + topic->title = dupstr (title); + topic->link = link; + + addpos234 (h->text, link, count234 (h->text)); +} + +void +whlp_browse_link (WHLP h, WHLP_TOPIC before, WHLP_TOPIC after) +{ + UNUSEDARG (h); + + /* + * See if the `before' topic is already linked to another one, + * and break the link to that if so. Likewise the `after' + * topic. + */ + if (before->browse_next) + before->browse_next->browse_prev = NULL; + if (after->browse_prev) + after->browse_prev->browse_next = NULL; + before->browse_next = after; + after->browse_prev = before; +} + +/* ---------------------------------------------------------------------- + * Manage the actual generation of paragraph and text records. + */ + +static void +whlp_linkdata (WHLP h, int which, int c) +{ + int *len = (which == 1 ? &h->link->len1 : &h->link->len2); + char *data = (which == 1 ? h->linkdata1 : h->linkdata2); + assert (*len < TOPIC_BLKSIZE); + data[(*len)++] = c; +} + +static void +whlp_linkdata_short (WHLP h, int which, int data) +{ + whlp_linkdata (h, which, data & 0xFF); + whlp_linkdata (h, which, (data >> 8) & 0xFF); +} + +static void +whlp_linkdata_long (WHLP h, int which, int data) +{ + whlp_linkdata (h, which, data & 0xFF); + whlp_linkdata (h, which, (data >> 8) & 0xFF); + whlp_linkdata (h, which, (data >> 16) & 0xFF); + whlp_linkdata (h, which, (data >> 24) & 0xFF); +} + +static void +whlp_linkdata_cushort (WHLP h, int which, int data) +{ + if (data <= 0x7F) + { + whlp_linkdata (h, which, data * 2); + } + else + { + whlp_linkdata (h, which, 1 + (data % 128 * 2)); + whlp_linkdata (h, which, data / 128); + } +} + +static void +whlp_linkdata_csshort (WHLP h, int which, int data) +{ + if (data >= -0x40 && data <= 0x3F) + whlp_linkdata_cushort (h, which, data + 64); + else + whlp_linkdata_cushort (h, which, data + 16384); +} + +static void +whlp_linkdata_culong (WHLP h, int which, int data) +{ + if (data <= 0x7FFF) + { + whlp_linkdata_short (h, which, data * 2); + } + else + { + whlp_linkdata_short (h, which, 1 + (data % 32768 * 2)); + whlp_linkdata_short (h, which, data / 32768); + } +} + +static void +whlp_linkdata_cslong (WHLP h, int which, int data) +{ + if (data >= -0x4000 && data <= 0x3FFF) + whlp_linkdata_culong (h, which, data + 16384); + else + whlp_linkdata_culong (h, which, data + 67108864); +} + +static void +whlp_para_reset (WHLP h) +{ + int *p; + + h->para_flags = 0; + + while ((p = index234 (h->tabstops, 0)) != NULL) + { + delpos234 (h->tabstops, 0); + sfree (p); + } +} + +void +whlp_para_attr (WHLP h, int attr_id, int attr_param) +{ + if (attr_id >= WHLP_PARA_SPACEABOVE && attr_id <= WHLP_PARA_FIRSTLINEINDENT) + { + h->para_flags |= 1 << attr_id; + h->para_attrs[attr_id] = attr_param; + } + else if (attr_id == WHLP_PARA_ALIGNMENT) + { + h->para_flags &= ~0xC00; + if (attr_param == WHLP_ALIGN_RIGHT) + h->para_flags |= 0x400; + else if (attr_param == WHLP_ALIGN_CENTRE) + h->para_flags |= 0x800; + } +} + +void +whlp_set_tabstop (WHLP h, int tabstop, int alignment) +{ + int *p; + + if (alignment == WHLP_ALIGN_CENTRE) + tabstop |= 0x20000; + if (alignment == WHLP_ALIGN_RIGHT) + tabstop |= 0x10000; + + p = mknew (int); + *p = tabstop; + add234 (h->tabstops, p); + h->para_flags |= 0x0200; +} + +void +whlp_begin_para (WHLP h, int para_type) +{ + struct topiclink *link = mknew (struct topiclink); + int i; + + /* + * Clear these to NULL out of paranoia, although in records + * that aren't type 2 they should never actually be needed. + */ + link->nexttopic = NULL; + link->context = NULL; + link->nonscroll = link->scroll = NULL; + + link->recordtype = 32; /* text record */ + + h->link = link; + link->len1 = link->len2 = 0; + link->data1 = h->linkdata1; + link->data2 = h->linkdata2; + + if (para_type == WHLP_PARA_NONSCROLL && h->prevtopic && + !h->prevtopic->nonscroll) + h->prevtopic->nonscroll = link; + if (para_type == WHLP_PARA_SCROLL && h->prevtopic && !h->prevtopic->scroll) + h->prevtopic->scroll = link; + + /* + * Now we're ready to start accumulating stuff in linkdata1 and + * linkdata2. Next we build up the paragraph info. Note that + * the TopicSize (cslong: size of LinkData1 minus the topicsize + * and topiclength fields) and TopicLength (cushort: size of + * LinkData2) fields are missing; we will put those on when we + * end the paragraph. + */ + whlp_linkdata (h, 1, 0); /* must-be-0x00 */ + whlp_linkdata (h, 1, 0x80); /* must-be-0x80 */ + whlp_linkdata_short (h, 1, 0); /* Winterhoff says `id'; always 0 AFAICT */ + whlp_linkdata_short (h, 1, h->para_flags); + for (i = WHLP_PARA_SPACEABOVE; i <= WHLP_PARA_FIRSTLINEINDENT; i++) + { + if (h->para_flags & (1 << i)) + whlp_linkdata_csshort (h, 1, h->para_attrs[i]); + } + if (h->para_flags & 0x0200) + { + int ntabs; + /* + * Write out tab stop data. + */ + ntabs = count234 (h->tabstops); + whlp_linkdata_csshort (h, 1, ntabs); + for (i = 0; i < ntabs; i++) + { + int tab, *tabp; + tabp = index234 (h->tabstops, i); + tab = *tabp; + if (tab & 0x30000) + tab |= 0x4000; + whlp_linkdata_cushort (h, 1, tab & 0xFFFF); + if (tab & 0x4000) + whlp_linkdata_cushort (h, 1, tab >> 16); + } + } + + /* + * Fine. Now we're ready to start writing actual text and + * formatting commands. + */ +} + +void +whlp_set_font (WHLP h, int font_id) +{ + /* + * Write a NUL into linkdata2 to cause the reader to flip over + * to linkdata1 to see the formatting command. + */ + whlp_linkdata (h, 2, 0); + /* + * Now the formatting command is 0x80 followed by a short. + */ + whlp_linkdata (h, 1, 0x80); + whlp_linkdata_short (h, 1, font_id); +} + +void +whlp_start_hyperlink (WHLP h, WHLP_TOPIC target) +{ + /* + * Write a NUL into linkdata2. + */ + whlp_linkdata (h, 2, 0); + /* + * Now the formatting command is 0xE3 followed by the context + * hash. + */ + whlp_linkdata (h, 1, 0xE3); + whlp_linkdata_long (h, 1, target->hash); +} + +void +whlp_end_hyperlink (WHLP h) +{ + /* + * Write a NUL into linkdata2. + */ + whlp_linkdata (h, 2, 0); + /* + * Now the formatting command is 0x89. + */ + whlp_linkdata (h, 1, 0x89); +} + +void +whlp_tab (WHLP h) +{ + /* + * Write a NUL into linkdata2. + */ + whlp_linkdata (h, 2, 0); + /* + * Now the formatting command is 0x83. + */ + whlp_linkdata (h, 1, 0x83); +} + +void +whlp_text (WHLP h, char *text) +{ + while (*text) + { + whlp_linkdata (h, 2, *text++); + } +} + +void +whlp_end_para (WHLP h) +{ + int data1cut; + + /* + * Round off the paragraph with 0x82 and 0xFF formatting + * commands. Each requires a NUL in linkdata2. + */ + whlp_linkdata (h, 2, 0); + whlp_linkdata (h, 1, 0x82); + whlp_linkdata (h, 2, 0); + whlp_linkdata (h, 1, 0xFF); + + /* + * Now finish up: create the header of linkdata1 (TopicLength + * and TopicSize fields), allocate the real linkdata1 and + * linkdata2 fields, and copy them out of the buffers in h. + * Then insert the finished topiclink into the `text' tree, and + * clean up. + */ + data1cut = h->link->len1; + whlp_linkdata_cslong (h, 1, data1cut); + whlp_linkdata_cushort (h, 1, h->link->len2); + + h->link->data1 = mknewa (unsigned char, h->link->len1); + memcpy (h->link->data1, h->linkdata1 + data1cut, h->link->len1 - data1cut); + memcpy (h->link->data1 + h->link->len1 - data1cut, h->linkdata1, data1cut); + h->link->data2 = mknewa (unsigned char, h->link->len2); + memcpy (h->link->data2, h->linkdata2, h->link->len2); + + addpos234 (h->text, h->link, count234 (h->text)); + + /* Hack: accumulate the `blocksize' parameter in the topic header. */ + if (h->prevtopic) + h->prevtopic->block_size += 21 + h->link->len1 + h->link->len2; + + h->link = NULL; /* this is now in the tree */ + + whlp_para_reset (h); +} + +/* ---------------------------------------------------------------------- + * Manage the layout and generation of the |TOPIC section. + */ + +static void +whlp_topicsect_write (WHLP h, struct file *f, void *data, int len, + int can_break) +{ + unsigned char *p = (unsigned char *) data; + + if (h->topicblock_remaining <= 0 || h->topicblock_remaining < can_break) + { + /* + * Start a new block. + */ + if (h->topicblock_remaining > 0) + whlp_file_fill (f, h->topicblock_remaining); + whlp_file_add_long (f, h->lasttopiclink); + h->firsttopiclink_offset = whlp_file_offset (f); + whlp_file_add_long (f, -1L); /* this will be filled in later */ + whlp_file_add_long (f, h->lasttopicstart); + h->topicblock_remaining = TOPIC_BLKSIZE - 12; + } + while (len > 0) + { + int thislen = (h->topicblock_remaining < len ? + h->topicblock_remaining : len); + whlp_file_add (f, p, thislen); + p += thislen; + len -= thislen; + h->topicblock_remaining -= thislen; + if (len > 0 && h->topicblock_remaining <= 0) + { + /* + * Start a new block. + */ + whlp_file_add_long (f, h->lasttopiclink); + h->firsttopiclink_offset = whlp_file_offset (f); + whlp_file_add_long (f, -1L); /* this will be filled in later */ + whlp_file_add_long (f, h->lasttopicstart); + h->topicblock_remaining = TOPIC_BLKSIZE - 12; + } + } +} + +static void +whlp_topic_layout (WHLP h) +{ + int block, offset, pos; + int i, nlinks, size; + int topicnum; + struct topiclink *link; + struct file *f; + + /* + * Create a final TOPICLINK containing no usable data. + */ + link = mknew (struct topiclink); + link->nexttopic = NULL; + if (h->prevtopic) + h->prevtopic->nexttopic = link; + h->prevtopic = link; + link->data1 = mknewa (unsigned char, 0x1c); + link->block_size = 0; + link->data2 = NULL; + link->len1 = 0x1c; + link->len2 = 0; + link->nexttopic = NULL; + link->recordtype = 2; + link->nonscroll = link->scroll = NULL; + link->context = NULL; + addpos234 (h->text, link, count234 (h->text)); + + /* + * Each TOPICBLOCK has space for TOPIC_BLKSIZE-12 bytes. The + * size of each TOPICLINK is 21 bytes plus the combined lengths + * of LinkData1 and LinkData2. So we can now go through and + * break up the TOPICLINKs into TOPICBLOCKs, and also set up + * the TOPICOFFSET and TOPICPOS of each one while we do so. + */ + + block = 0; + offset = 0; + pos = 12; + nlinks = count234 (h->text); + for (i = 0; i < nlinks; i++) + { + link = index234 (h->text, i); + size = 21 + link->len1 + link->len2; + /* + * We can't split within the topicblock header or within + * linkdata1. So if the split would fall in that area, + * start a new block _now_. + */ + if (TOPIC_BLKSIZE - pos < 21 + link->len1) + { + block++; + offset = 0; + pos = 12; + } + link->topicoffset = block * 0x8000 + offset; + link->topicpos = block * 0x4000 + pos; + pos += size; + if (link->recordtype != 2) /* TOPICOFFSET doesn't count titles */ + offset += link->len2; + while (pos > TOPIC_BLKSIZE) + { + block++; + offset = 0; + pos -= TOPIC_BLKSIZE - 12; + } + } + + /* + * Now we have laid out the TOPICLINKs into blocks, and + * determined the final TOPICOFFSET and TOPICPOS of each one. + * So now we can go through and write the headers of the type-2 + * records. + */ + + topicnum = 0; + for (i = 0; i < nlinks; i++) + { + link = index234 (h->text, i); + if (link->recordtype != 2) + continue; + + PUT_32BIT_LSB_FIRST (link->data1 + 0, link->block_size); + if (link->context && link->context->browse_prev) + PUT_32BIT_LSB_FIRST (link->data1 + 4, + link->context->browse_prev->link->topicoffset); + else + PUT_32BIT_LSB_FIRST (link->data1 + 4, 0xFFFFFFFFL); + if (link->context && link->context->browse_next) + PUT_32BIT_LSB_FIRST (link->data1 + 8, + link->context->browse_next->link->topicoffset); + else + PUT_32BIT_LSB_FIRST (link->data1 + 8, 0xFFFFFFFFL); + PUT_32BIT_LSB_FIRST (link->data1 + 12, topicnum); + topicnum++; + if (link->nonscroll) + PUT_32BIT_LSB_FIRST (link->data1 + 16, link->nonscroll->topicpos); + else + PUT_32BIT_LSB_FIRST (link->data1 + 16, 0xFFFFFFFFL); + if (link->scroll) + PUT_32BIT_LSB_FIRST (link->data1 + 20, link->scroll->topicpos); + else + PUT_32BIT_LSB_FIRST (link->data1 + 20, 0xFFFFFFFFL); + if (link->nexttopic) + PUT_32BIT_LSB_FIRST (link->data1 + 24, link->nexttopic->topicpos); + else + PUT_32BIT_LSB_FIRST (link->data1 + 24, 0xFFFFFFFFL); + } + + /* + * Having done all _that_, we're now finally ready to go + * through and create the |TOPIC section in its final form. + */ + + h->lasttopiclink = -1L; + h->lasttopicstart = 0L; + f = whlp_new_file (h, "|TOPIC"); + h->topicblock_remaining = -1; + whlp_topicsect_write (h, f, NULL, 0, 0); /* start the first block */ + for (i = 0; i < nlinks; i++) + { + unsigned char header[21]; + struct topiclink *otherlink; + + link = index234 (h->text, i); + + /* + * Create and output the TOPICLINK header. + */ + PUT_32BIT_LSB_FIRST (header + 0, 21 + link->len1 + link->len2); + PUT_32BIT_LSB_FIRST (header + 4, link->len2); + if (i == 0) + { + PUT_32BIT_LSB_FIRST (header + 8, 0xFFFFFFFFL); + } + else + { + otherlink = index234 (h->text, i - 1); + PUT_32BIT_LSB_FIRST (header + 8, otherlink->topicpos); + } + if (i + 1 >= nlinks) + { + PUT_32BIT_LSB_FIRST (header + 12, 0xFFFFFFFFL); + } + else + { + otherlink = index234 (h->text, i + 1); + PUT_32BIT_LSB_FIRST (header + 12, otherlink->topicpos); + } + PUT_32BIT_LSB_FIRST (header + 16, 21 + link->len1); + header[20] = link->recordtype; + whlp_topicsect_write (h, f, header, 21, 21 + link->len1); + + /* + * Fill in the `first topiclink' pointer in the block + * header if appropriate. (We do this _after_ outputting + * the header because then we can be sure we'll be in the + * same block as we think we are.) + */ + if (h->firsttopiclink_offset > 0) + { + whlp_file_seek (f, h->firsttopiclink_offset, 0); + whlp_file_add_long (f, link->topicpos); + h->firsttopiclink_offset = 0; + whlp_file_seek (f, 0, 2); + } + + /* + * Update the `last topiclink', and possibly `last + * topicstart', pointers. + */ + h->lasttopiclink = link->topicpos; + if (link->recordtype == 2) + h->lasttopicstart = link->topicpos; + + + /* + * Output LinkData1 and LinkData2. + */ + whlp_topicsect_write (h, f, link->data1, link->len1, link->len1); + whlp_topicsect_write (h, f, link->data2, link->len2, 0); + + /* + * Output the block header. + */ + + link = index234 (h->text, i); + + } +} + +/* ---------------------------------------------------------------------- + * Manage the index sections (|KWDATA, |KWMAP, |KWBTREE). + */ + +void +whlp_index_term (WHLP h, char *index, WHLP_TOPIC topic) +{ + struct indexrec *idx = mknew (struct indexrec); + + idx->term = dupstr (index); + idx->topic = topic; + /* + * If this reference is already in the tree, just silently drop + * the duplicate. + */ + if (add234 (h->index, idx) != idx) + { + sfree (idx->term); + sfree (idx); + } +} + +static void +whlp_build_kwdata (WHLP h) +{ + struct file *f; + int i; + struct indexrec *first, *next; + + f = whlp_new_file (h, "|KWDATA"); + + /* + * Go through the index B-tree, condensing all sequences of + * records with the same term into a single one with a valid + * (count,offset) pair, and building up the KWDATA section. + */ + i = 0; + while ((first = index234 (h->index, i)) != NULL) + { + first->count = 1; + first->offset = whlp_file_offset (f); + whlp_file_add_long (f, first->topic->link->topicoffset); + i++; + while ((next = index234 (h->index, i)) != NULL && + !strcmp (first->term, next->term)) + { + /* + * The next index record has the same term. Fold it + * into this one and remove from the tree. + */ + whlp_file_add_long (f, next->topic->link->topicoffset); + first->count++; + delpos234 (h->index, i); + sfree (next->term); + sfree (next); + } + } + + /* + * Now we should have `index' in a form that's ready to + * construct |KWBTREE. So we can return. + */ +} + +/* ---------------------------------------------------------------------- + * Standard chunks of data for the |SYSTEM and |FONT sections. + */ + +static void +whlp_system_record (struct file *f, int id, const void *data, int length) +{ + whlp_file_add_short (f, id); + whlp_file_add_short (f, length); + whlp_file_add (f, data, length); +} + +static void +whlp_standard_systemsection (struct file *f) +{ + const char lcid[] = { 0, 0, 0, 0, 0, 0, 0, 0, 9, 4 }; + const char charset[] = { 0, 0, 0, 2, 0 }; + + whlp_file_add_short (f, 0x36C); /* magic number */ + whlp_file_add_short (f, 33); /* minor version: HCW 4.00 Win95+ */ + whlp_file_add_short (f, 1); /* major version */ + whlp_file_add_long (f, time (NULL)); /* generation date */ + whlp_file_add_short (f, 0); /* flags=0 means no compression */ + + /* + * Add some magic locale identifier information. (We ought to + * find out something about what all this means; see the TODO + * list at the top of the file.) + */ + whlp_system_record (f, 9, lcid, sizeof (lcid)); + whlp_system_record (f, 11, charset, sizeof (charset)); +} + +void +whlp_title (WHLP h, char *title) +{ + whlp_system_record (h->systemfile, 1, title, 1 + strlen (title)); +} + +void +whlp_copyright (WHLP h, char *copyright) +{ + whlp_system_record (h->systemfile, 2, copyright, 1 + strlen (copyright)); +} + +void +whlp_start_macro (WHLP h, char *macro) +{ + whlp_system_record (h->systemfile, 4, macro, 1 + strlen (macro)); +} + +void +whlp_primary_topic (WHLP h, WHLP_TOPIC t) +{ + h->ptopic = t; +} + +static void +whlp_do_primary_topic (WHLP h) +{ + unsigned char firsttopic[4]; + PUT_32BIT_LSB_FIRST (firsttopic, h->ptopic->link->topicoffset); + whlp_system_record (h->systemfile, 3, firsttopic, sizeof (firsttopic)); +} + +int +whlp_create_font (WHLP h, char *font, int family, int halfpoints, + int rendition, int r, int g, int b) +{ + char *fontname = dupstr (font); + struct fontdesc *fontdesc; + int index; + + font = add234 (h->fontnames, fontname); + if (font != fontname) + { + /* The font name was already present. Free the new copy. */ + sfree (fontname); + } + + fontdesc = mknew (struct fontdesc); + fontdesc->font = font; + fontdesc->family = family; + fontdesc->halfpoints = halfpoints; + fontdesc->rendition = rendition; + fontdesc->r = r; + fontdesc->g = g; + fontdesc->b = b; + + index = count234 (h->fontdescs); + addpos234 (h->fontdescs, fontdesc, index); + return index; +} + +static void +whlp_make_fontsection (WHLP h, struct file *f) +{ + int i; + char *fontname; + struct fontdesc *fontdesc; + + /* + * Header block: number of font names, number of font + * descriptors, offset to font names, and offset to font + * descriptors. + */ + whlp_file_add_short (f, count234 (h->fontnames)); + whlp_file_add_short (f, count234 (h->fontdescs)); + whlp_file_add_short (f, 8); + whlp_file_add_short (f, 8 + 32 * count234 (h->fontnames)); + + /* + * Font names. + */ + for (i = 0; (fontname = index234 (h->fontnames, i)) != NULL; i++) + { + char data[32]; + memset (data, i, sizeof (data)); + strncpy (data, fontname, sizeof (data)); + whlp_file_add (f, data, sizeof (data)); + } + + /* + * Font descriptors. + */ + for (i = 0; (fontdesc = index234 (h->fontdescs, i)) != NULL; i++) + { + int fontpos; + void *ret; + + ret = findpos234 (h->fontnames, fontdesc->font, NULL, &fontpos); + assert (ret != NULL); + + whlp_file_add_char (f, fontdesc->rendition); + whlp_file_add_char (f, fontdesc->halfpoints); + whlp_file_add_char (f, fontdesc->family); + whlp_file_add_short (f, fontpos); + /* Foreground RGB */ + whlp_file_add_char (f, fontdesc->r); + whlp_file_add_char (f, fontdesc->g); + whlp_file_add_char (f, fontdesc->b); + /* Background RGB is apparently unused and always set to zero */ + whlp_file_add_char (f, 0); + whlp_file_add_char (f, 0); + whlp_file_add_char (f, 0); + } + +} + +/* ---------------------------------------------------------------------- + * Routines to manage a B-tree type file. + */ + +static void +whlp_make_btree (struct file *f, int flags, int pagesize, + char *dataformat, tree234 * tree, + struct file *map, bt_index_fn indexfn, bt_leaf_fn leaffn) +{ + void **page_elements = NULL; + int npages = 0, pagessize = 0; + int npages_this_level, nentries, nlevels; + int total_leaf_entries; + char btdata[MAX_PAGE_SIZE]; + int btlen; + int page_start, fixups_offset, unused_bytes; + void *element; + int index; + + assert (pagesize <= MAX_PAGE_SIZE); + + /* + * Start with the B-tree header. We'll have to come back and + * fill in a few bits later. + */ + whlp_file_add_short (f, 0x293B); /* magic number */ + whlp_file_add_short (f, flags); + whlp_file_add_short (f, pagesize); + { + char data[16]; + memset (data, 0, sizeof (data)); + assert (strlen (dataformat) <= sizeof (data)); + memcpy (data, dataformat, strlen (dataformat)); + whlp_file_add (f, data, sizeof (data)); + } + whlp_file_add_short (f, 0); /* must-be-zero */ + fixups_offset = whlp_file_offset (f); + whlp_file_add_short (f, 0); /* page splits; fix up later */ + whlp_file_add_short (f, 0); /* root page index; fix up later */ + whlp_file_add_short (f, -1); /* must-be-minus-one */ + whlp_file_add_short (f, 0); /* total number of pages; fix later */ + whlp_file_add_short (f, 0); /* number of levels; fix later */ + whlp_file_add_long (f, count234 (tree)); /* total B-tree entries */ + + /* + * If we have a map section, leave space at the start for its + * element count. + */ + if (map) + { + whlp_file_add_short (map, 0); + } + + /* + * Now create the leaf pages. + */ + index = 0; + + npages_this_level = 0; + total_leaf_entries = 0; + + element = index234 (tree, index); + while (element) + { + /* + * Make a new leaf page. + */ + npages_this_level++; + if (npages >= pagessize) + { + pagessize = npages + 32; + page_elements = resize (page_elements, pagessize); + } + page_elements[npages++] = element; + + /* + * Leave space in the leaf page for the header. We'll + * come back and add it later. + */ + page_start = whlp_file_offset (f); + whlp_file_add (f, "12345678", 8); + unused_bytes = pagesize - 8; + nentries = 0; + + /* + * Now add leaf entries until we run out of room, or out of + * elements. + */ + while (element) + { + btlen = leaffn (element, btdata); + if (btlen > unused_bytes) + break; + whlp_file_add (f, btdata, btlen); + unused_bytes -= btlen; + nentries++; + index++; + element = index234 (tree, index); + } + + /* + * Now add the unused bytes, and then go back and put + * in the header. + */ + whlp_file_fill (f, unused_bytes); + whlp_file_seek (f, page_start, 0); + whlp_file_add_short (f, unused_bytes); + whlp_file_add_short (f, nentries); + /* Previous-page indicator will automatically go to -1 when + * absent. */ + whlp_file_add_short (f, npages - 2); + /* Next-page indicator must be -1 if we're at the end. */ + if (!element) + whlp_file_add_short (f, -1); + else + whlp_file_add_short (f, npages); + whlp_file_seek (f, 0, 2); + + /* + * If we have a map section, add a map entry. + */ + if (map) + { + whlp_file_add_long (map, total_leaf_entries); + whlp_file_add_short (map, npages_this_level - 1); + } + total_leaf_entries += nentries; + } + + /* + * If we have a map section, write the total number of map + * entries into it. + */ + if (map) + { + whlp_file_seek (map, 0, 0); + whlp_file_add_short (map, npages_this_level); + whlp_file_seek (map, 0, 2); + } + + /* + * Now create further levels until we're down to one page. + */ + nlevels = 1; + while (npages_this_level > 1) + { + int first = npages - npages_this_level; + int last = npages - 1; + int current; + + nlevels++; + npages_this_level = 0; + + current = first; + while (current <= last) + { + /* + * Make a new index page. + */ + npages_this_level++; + if (npages >= pagessize) + { + pagessize = npages + 32; + page_elements = resize (page_elements, pagessize); + } + page_elements[npages++] = page_elements[current]; + + /* + * Leave space for some of the header, but we can put + * in the PreviousPage link already. + */ + page_start = whlp_file_offset (f); + whlp_file_add (f, "1234", 4); + whlp_file_add_short (f, current); + unused_bytes = pagesize - 6; + + /* + * Now add index entries until we run out of either + * space or pages. + */ + current++; + nentries = 0; + while (current <= last) + { + btlen = indexfn (page_elements[current], btdata); + if (btlen + 2 > unused_bytes) + break; + whlp_file_add (f, btdata, btlen); + whlp_file_add_short (f, current); + unused_bytes -= btlen + 2; + nentries++; + current++; + } + + /* + * Now add the unused bytes, and then go back and put + * in the header. + */ + whlp_file_fill (f, unused_bytes); + whlp_file_seek (f, page_start, 0); + whlp_file_add_short (f, unused_bytes); + whlp_file_add_short (f, nentries); + whlp_file_seek (f, 0, 2); + } + } + + /* + * Now we have all our pages ready, and we know where our root + * page is. Fix up the main B-tree header. + */ + whlp_file_seek (f, fixups_offset, 0); + /* Creation of every page requires a split unless it's the first in + * a new level. Hence, page splits equals pages minus levels. */ + whlp_file_add_short (f, npages - nlevels); + whlp_file_add_short (f, npages - 1); /* root page index */ + whlp_file_add_short (f, -1); /* must-be-minus-one */ + whlp_file_add_short (f, npages); /* total number of pages */ + whlp_file_add_short (f, nlevels); /* number of levels */ + + /* Just for tidiness, seek to the end of the file :-) */ + whlp_file_seek (f, 0, 2); + + /* Clean up. */ + sfree (page_elements); +} + + +/* ---------------------------------------------------------------------- + * Routines to manage the `internal file' structure. + */ + +static struct file * +whlp_new_file (WHLP h, char *name) +{ + struct file *f; + f = mknew (struct file); + f->data = NULL; + f->pos = f->len = f->size = 0; + if (name) + { + f->name = dupstr (name); + add234 (h->files, f); + } + else + { + f->name = NULL; + } + return f; +} + +static void +whlp_free_file (struct file *f) +{ + sfree (f->data); + sfree (f->name); /* may be NULL */ + sfree (f); +} + +static void +whlp_file_add (struct file *f, const void *data, int len) +{ + if (f->pos + len > f->size) + { + f->size = f->pos + len + 1024; + f->data = resize (f->data, f->size); + } + memcpy (f->data + f->pos, data, len); + f->pos += len; + if (f->len < f->pos) + f->len = f->pos; +} + +static void +whlp_file_add_char (struct file *f, int data) +{ + unsigned char s; + s = data & 0xFF; + whlp_file_add (f, &s, 1); +} + +static void +whlp_file_add_short (struct file *f, int data) +{ + unsigned char s[2]; + PUT_16BIT_LSB_FIRST (s, data); + whlp_file_add (f, s, 2); +} + +static void +whlp_file_add_long (struct file *f, int data) +{ + unsigned char s[4]; + PUT_32BIT_LSB_FIRST (s, data); + whlp_file_add (f, s, 4); +} + +static void +whlp_file_fill (struct file *f, int len) +{ + if (f->pos + len > f->size) + { + f->size = f->pos + len + 1024; + f->data = resize (f->data, f->size); + } + memset (f->data + f->pos, 0, len); + f->pos += len; + if (f->len < f->pos) + f->len = f->pos; +} + +static void +whlp_file_seek (struct file *f, int pos, int whence) +{ + f->pos = (whence == 0 ? 0 : whence == 1 ? f->pos : f->len) + pos; +} + +static int +whlp_file_offset (struct file *f) +{ + return f->pos; +} + +/* ---------------------------------------------------------------------- + * Open and close routines; final wrapper around everything. + */ + +WHLP +whlp_new (void) +{ + WHLP ret; + struct file *f; + + ret = mknew (struct WHLP_tag); + + /* + * Internal B-trees. + */ + ret->files = newtree234 (filecmp); + ret->pre_contexts = newtree234 (NULL); + ret->contexts = newtree234 (ctxcmp); + ret->titles = newtree234 (ttlcmp); + ret->text = newtree234 (NULL); + ret->index = newtree234 (idxcmp); + ret->tabstops = newtree234 (tabcmp); + ret->fontnames = newtree234 (fontcmp); + ret->fontdescs = newtree234 (NULL); + + /* + * Some standard files. + */ + f = whlp_new_file (ret, "|CTXOMAP"); + whlp_file_add_short (f, 0); /* dummy section */ + f = whlp_new_file (ret, "|SYSTEM"); + whlp_standard_systemsection (f); + ret->systemfile = f; + + /* + * Other variables. + */ + ret->prevtopic = NULL; + ret->ncontexts = 0; + ret->link = NULL; + + return ret; +} + +void +whlp_close (WHLP h, char *filename) +{ + FILE *fp; + int filecount, offset, index, filelen; + struct file *file, *map, *md; + context *ctx; + int has_index; + + /* + * Lay out the topic section. + */ + whlp_topic_layout (h); + + /* + * Finish off the system section. + */ + whlp_do_primary_topic (h); + + /* + * Assemble the font section. + */ + file = whlp_new_file (h, "|FONT"); + whlp_make_fontsection (h, file); + + /* + * Set up the index. + */ + has_index = (count234 (h->index) != 0); + if (has_index) + whlp_build_kwdata (h); + + /* + * Set up the `titles' B-tree for the |TTLBTREE section. + */ + for (index = 0; (ctx = index234 (h->contexts, index)) != NULL; index++) + add234 (h->titles, ctx); + + /* + * Construct the various B-trees. + */ + file = whlp_new_file (h, "|CONTEXT"); + whlp_make_btree (file, 0x0002, 0x0800, "L4", + h->contexts, NULL, ctxindex, ctxleaf); + + file = whlp_new_file (h, "|TTLBTREE"); + whlp_make_btree (file, 0x0002, 0x0800, "Lz", + h->titles, NULL, ttlindex, ttlleaf); + + if (has_index) + { + file = whlp_new_file (h, "|KWBTREE"); + map = whlp_new_file (h, "|KWMAP"); + whlp_make_btree (file, 0x0002, 0x0800, "F24", + h->index, map, idxindex, idxleaf); + } + + /* + * Open the output file. + */ + fp = fopen (filename, "wb"); + if (!fp) + { + whlp_abandon (h); + return; + } + + /* + * Work out all the file offsets. + */ + filecount = count234 (h->files); + offset = 16; /* just after header */ + for (index = 0; index < filecount; index++) + { + file = index234 (h->files, index); + file->fileoffset = offset; + offset += 9 + file->len; /* 9 is size of file header */ + } + /* Now `offset' holds what will be the offset of the master directory. */ + + md = whlp_new_file (h, NULL); /* master directory file */ + whlp_make_btree (md, 0x0402, 0x0400, "z4", + h->files, NULL, fileindex, fileleaf); + + filelen = offset + 9 + md->len; + + /* + * Write out the file header. + */ + { + unsigned char header[16]; + PUT_32BIT_LSB_FIRST (header + 0, 0x00035F3FL); /* magic */ + PUT_32BIT_LSB_FIRST (header + 4, offset); /* offset to directory */ + PUT_32BIT_LSB_FIRST (header + 8, 0xFFFFFFFFL); /* first free block */ + PUT_32BIT_LSB_FIRST (header + 12, filelen); /* total file length */ + fwrite (header, 1, 16, fp); + } + + /* + * Now write out each file. + */ + for (index = 0; index <= filecount; index++) + { + int used, reserved; + unsigned char header[9]; + + if (index == filecount) + file = md; /* master directory comes last */ + else + file = index234 (h->files, index); + + used = file->len; + reserved = used + 9; + + /* File header. */ + PUT_32BIT_LSB_FIRST (header + 0, reserved); + PUT_32BIT_LSB_FIRST (header + 4, used); + header[8] = 0; /* flags */ + fwrite (header, 1, 9, fp); + + /* File data. */ + fwrite (file->data, 1, file->len, fp); + } + + fclose (fp); + + whlp_free_file (md); + + whlp_abandon (h); /* now free everything */ +} + +void +whlp_abandon (WHLP h) +{ + struct file *f; + struct indexrec *idx; + struct topiclink *link; + struct fontdesc *fontdesc; + char *fontname; + context *ctx; + + /* Get rid of any lingering tab stops. */ + whlp_para_reset (h); + + /* Delete the (now empty) tabstops tree. */ + freetree234 (h->tabstops); + + /* Delete the index tree and all its entries. */ + while ((idx = index234 (h->index, 0)) != NULL) + { + delpos234 (h->index, 0); + sfree (idx->term); + sfree (idx); + } + freetree234 (h->index); + + /* Delete the text tree and all its topiclinks. */ + while ((link = index234 (h->text, 0)) != NULL) + { + delpos234 (h->text, 0); + sfree (link->data1); /* may be NULL */ + sfree (link->data2); /* may be NULL */ + sfree (link); + } + freetree234 (h->text); + + /* Delete the fontdescs tree and all its entries. */ + while ((fontdesc = index234 (h->fontdescs, 0)) != NULL) + { + delpos234 (h->fontdescs, 0); + sfree (fontdesc); + } + freetree234 (h->fontdescs); + + /* Delete the fontnames tree and all its entries. */ + while ((fontname = index234 (h->fontnames, 0)) != NULL) + { + delpos234 (h->fontnames, 0); + sfree (fontname); + } + freetree234 (h->fontnames); + + /* There might be an unclosed paragraph in h->link. */ + if (h->link) + sfree (h->link); /* if so it won't have data1 or data2 */ + + /* + * `titles' contains copies of the `contexts' entries, so we + * don't need to free them here. + */ + freetree234 (h->titles); + + /* + * `contexts' and `pre_contexts' _both_ contain contexts that + * need freeing. (pre_contexts shouldn't contain any, unless + * the help generation was abandoned half-way through.) + */ + while ((ctx = index234 (h->pre_contexts, 0)) != NULL) + { + delpos234 (h->index, 0); + sfree (ctx->name); + sfree (ctx->title); + sfree (ctx); + } + freetree234 (h->pre_contexts); + while ((ctx = index234 (h->contexts, 0)) != NULL) + { + delpos234 (h->contexts, 0); + sfree (ctx->name); + sfree (ctx->title); + sfree (ctx); + } + freetree234 (h->contexts); + + /* + * Free all the internal files. + */ + while ((f = index234 (h->files, 0)) != NULL) + { + delpos234 (h->files, 0); + whlp_free_file (f); + } + freetree234 (h->files); + + sfree (h); +} + +#ifdef TESTMODE + +int +main (void) +{ + WHLP h; + WHLP_TOPIC t1, t2, t3; + char *e; + char mymacro[100]; + + h = whlp_new (); + + whlp_title (h, "Test Help File"); + whlp_copyright (h, "This manual is copyright \251 2001 Simon Tatham." + " All rights reversed."); + whlp_start_macro (h, "CB(\"btn_about\",\"&About\",\"About()\")"); + whlp_start_macro (h, "CB(\"btn_up\",\"&Up\",\"Contents()\")"); + whlp_start_macro (h, "BrowseButtons()"); + + whlp_create_font (h, "Arial", WHLP_FONTFAM_SANS, 30, 0, 0, 0, 0); + whlp_create_font (h, "Times New Roman", WHLP_FONTFAM_SERIF, 24, + WHLP_FONT_STRIKEOUT, 0, 0, 0); + whlp_create_font (h, "Times New Roman", WHLP_FONTFAM_SERIF, 24, + WHLP_FONT_ITALIC, 0, 0, 0); + whlp_create_font (h, "Courier New", WHLP_FONTFAM_FIXED, 24, 0, 0, 0, 0); + + t1 = whlp_register_topic (h, "foobar", &e); + assert (t1 != NULL); + t2 = whlp_register_topic (h, "M359HPEHGW", &e); + assert (t2 != NULL); + t3 = whlp_register_topic (h, "Y5VQEXZQVJ", &e); + assert (t3 == NULL && !strcmp (e, "M359HPEHGW")); + t3 = whlp_register_topic (h, NULL, NULL); + assert (t3 != NULL); + + whlp_primary_topic (h, t2); + + whlp_prepare (h); + + whlp_begin_topic (h, t1, "First Topic", "DB(\"btn_up\")", NULL); + + whlp_begin_para (h, WHLP_PARA_NONSCROLL); + whlp_set_font (h, 0); + whlp_text (h, "Foobar"); + whlp_end_para (h); + + whlp_begin_para (h, WHLP_PARA_SCROLL); + whlp_set_font (h, 1); + whlp_text (h, "This is a silly paragraph with "); + whlp_set_font (h, 3); + whlp_text (h, "code"); + whlp_set_font (h, 1); + whlp_text (h, " in it."); + whlp_end_para (h); + + whlp_para_attr (h, WHLP_PARA_SPACEABOVE, 12); + whlp_begin_para (h, WHLP_PARA_SCROLL); + whlp_set_font (h, 1); + whlp_text (h, "This second, equally silly, paragraph has "); + whlp_set_font (h, 2); + whlp_text (h, "emphasis"); + whlp_set_font (h, 1); + whlp_text (h, " just to prove we can do it."); + whlp_end_para (h); + + whlp_begin_para (h, WHLP_PARA_SCROLL); + whlp_set_font (h, 1); + whlp_text (h, "Now I'm going to waffle on indefinitely, in a vague attempt" + " to make some wrapping happen, and also to make the topicblock" + " go across its boundaries. This is going to take a fair amount" + " of text, so I'll just have to cheat and c'n'p a lot of it."); + whlp_end_para (h); + + whlp_begin_para (h, WHLP_PARA_SCROLL); + whlp_set_font (h, 1); + whlp_text (h, "Now I'm going to waffle on indefinitely, in a vague attempt" + " to make some wrapping happen, and also to make the topicblock" + " go across its boundaries. This is going to take a fair amount" + " of text, so I'll just have to cheat and c'n'p a lot of it."); + whlp_end_para (h); + + whlp_begin_para (h, WHLP_PARA_SCROLL); + whlp_set_font (h, 1); + whlp_text (h, "Now I'm going to waffle on indefinitely, in a vague attempt" + " to make some wrapping happen, and also to make the topicblock" + " go across its boundaries. This is going to take a fair amount" + " of text, so I'll just have to cheat and c'n'p a lot of it."); + whlp_end_para (h); + + whlp_begin_para (h, WHLP_PARA_SCROLL); + whlp_set_font (h, 1); + whlp_text (h, "Now I'm going to waffle on indefinitely, in a vague attempt" + " to make some wrapping happen, and also to make the topicblock" + " go across its boundaries. This is going to take a fair amount" + " of text, so I'll just have to cheat and c'n'p a lot of it."); + whlp_end_para (h); + + whlp_begin_para (h, WHLP_PARA_SCROLL); + whlp_set_font (h, 1); + whlp_text (h, "Now I'm going to waffle on indefinitely, in a vague attempt" + " to make some wrapping happen, and also to make the topicblock" + " go across its boundaries. This is going to take a fair amount" + " of text, so I'll just have to cheat and c'n'p a lot of it."); + whlp_end_para (h); + + whlp_begin_para (h, WHLP_PARA_SCROLL); + whlp_set_font (h, 1); + whlp_text (h, "Now I'm going to waffle on indefinitely, in a vague attempt" + " to make some wrapping happen, and also to make the topicblock" + " go across its boundaries. This is going to take a fair amount" + " of text, so I'll just have to cheat and c'n'p a lot of it."); + whlp_end_para (h); + + whlp_begin_para (h, WHLP_PARA_SCROLL); + whlp_set_font (h, 1); + whlp_text (h, "Now I'm going to waffle on indefinitely, in a vague attempt" + " to make some wrapping happen, and also to make the topicblock" + " go across its boundaries. This is going to take a fair amount" + " of text, so I'll just have to cheat and c'n'p a lot of it."); + whlp_end_para (h); + + whlp_begin_para (h, WHLP_PARA_SCROLL); + whlp_set_font (h, 1); + whlp_text (h, "Now I'm going to waffle on indefinitely, in a vague attempt" + " to make some wrapping happen, and also to make the topicblock" + " go across its boundaries. This is going to take a fair amount" + " of text, so I'll just have to cheat and c'n'p a lot of it."); + whlp_end_para (h); + + whlp_begin_para (h, WHLP_PARA_SCROLL); + whlp_set_font (h, 1); + whlp_text (h, "Now I'm going to waffle on indefinitely, in a vague attempt" + " to make some wrapping happen, and also to make the topicblock" + " go across its boundaries. This is going to take a fair amount" + " of text, so I'll just have to cheat and c'n'p a lot of it."); + whlp_end_para (h); + + whlp_begin_para (h, WHLP_PARA_SCROLL); + whlp_set_font (h, 1); + whlp_text (h, "Now I'm going to waffle on indefinitely, in a vague attempt" + " to make some wrapping happen, and also to make the topicblock" + " go across its boundaries. This is going to take a fair amount" + " of text, so I'll just have to cheat and c'n'p a lot of it."); + whlp_end_para (h); + + whlp_begin_para (h, WHLP_PARA_SCROLL); + whlp_set_font (h, 1); + whlp_text (h, "Now I'm going to waffle on indefinitely, in a vague attempt" + " to make some wrapping happen, and also to make the topicblock" + " go across its boundaries. This is going to take a fair amount" + " of text, so I'll just have to cheat and c'n'p a lot of it."); + whlp_end_para (h); + + whlp_begin_para (h, WHLP_PARA_SCROLL); + whlp_set_font (h, 1); + whlp_text (h, "Now I'm going to waffle on indefinitely, in a vague attempt" + " to make some wrapping happen, and also to make the topicblock" + " go across its boundaries. This is going to take a fair amount" + " of text, so I'll just have to cheat and c'n'p a lot of it."); + whlp_end_para (h); + + whlp_begin_para (h, WHLP_PARA_SCROLL); + whlp_set_font (h, 1); + whlp_text (h, "Now I'm going to waffle on indefinitely, in a vague attempt" + " to make some wrapping happen, and also to make the topicblock" + " go across its boundaries. This is going to take a fair amount" + " of text, so I'll just have to cheat and c'n'p a lot of it."); + whlp_end_para (h); + + whlp_begin_para (h, WHLP_PARA_SCROLL); + whlp_set_font (h, 1); + whlp_text (h, "Now I'm going to waffle on indefinitely, in a vague attempt" + " to make some wrapping happen, and also to make the topicblock" + " go across its boundaries. This is going to take a fair amount" + " of text, so I'll just have to cheat and c'n'p a lot of it."); + whlp_end_para (h); + + whlp_begin_para (h, WHLP_PARA_SCROLL); + whlp_set_font (h, 1); + whlp_text (h, "Now I'm going to waffle on indefinitely, in a vague attempt" + " to make some wrapping happen, and also to make the topicblock" + " go across its boundaries. This is going to take a fair amount" + " of text, so I'll just have to cheat and c'n'p a lot of it."); + whlp_end_para (h); + + whlp_begin_para (h, WHLP_PARA_SCROLL); + whlp_set_font (h, 1); + whlp_text (h, "Now I'm going to waffle on indefinitely, in a vague attempt" + " to make some wrapping happen, and also to make the topicblock" + " go across its boundaries. This is going to take a fair amount" + " of text, so I'll just have to cheat and c'n'p a lot of it."); + whlp_end_para (h); + + whlp_begin_para (h, WHLP_PARA_SCROLL); + whlp_set_font (h, 1); + whlp_text (h, "Now I'm going to waffle on indefinitely, in a vague attempt" + " to make some wrapping happen, and also to make the topicblock" + " go across its boundaries. This is going to take a fair amount" + " of text, so I'll just have to cheat and c'n'p a lot of it."); + whlp_end_para (h); + + whlp_begin_para (h, WHLP_PARA_SCROLL); + whlp_set_font (h, 1); + whlp_text (h, "Now I'm going to waffle on indefinitely, in a vague attempt" + " to make some wrapping happen, and also to make the topicblock" + " go across its boundaries. This is going to take a fair amount" + " of text, so I'll just have to cheat and c'n'p a lot of it."); + whlp_end_para (h); + + whlp_begin_para (h, WHLP_PARA_SCROLL); + whlp_set_font (h, 1); + whlp_text (h, "Now I'm going to waffle on indefinitely, in a vague attempt" + " to make some wrapping happen, and also to make the topicblock" + " go across its boundaries. This is going to take a fair amount" + " of text, so I'll just have to cheat and c'n'p a lot of it."); + whlp_end_para (h); + + whlp_begin_para (h, WHLP_PARA_SCROLL); + whlp_set_font (h, 1); + whlp_text (h, "Now I'm going to waffle on indefinitely, in a vague attempt" + " to make some wrapping happen, and also to make the topicblock" + " go across its boundaries. This is going to take a fair amount" + " of text, so I'll just have to cheat and c'n'p a lot of it."); + whlp_end_para (h); + + whlp_begin_para (h, WHLP_PARA_SCROLL); + whlp_set_font (h, 1); + whlp_text (h, "Now I'm going to waffle on indefinitely, in a vague attempt" + " to make some wrapping happen, and also to make the topicblock" + " go across its boundaries. This is going to take a fair amount" + " of text, so I'll just have to cheat and c'n'p a lot of it."); + whlp_end_para (h); + + whlp_begin_para (h, WHLP_PARA_SCROLL); + whlp_set_font (h, 1); + whlp_text (h, "Now I'm going to waffle on indefinitely, in a vague attempt" + " to make some wrapping happen, and also to make the topicblock" + " go across its boundaries. This is going to take a fair amount" + " of text, so I'll just have to cheat and c'n'p a lot of it."); + whlp_end_para (h); + + whlp_begin_para (h, WHLP_PARA_SCROLL); + whlp_set_font (h, 1); + whlp_text (h, "Have a "); + whlp_start_hyperlink (h, t2); + whlp_text (h, "hyperlink"); + whlp_end_hyperlink (h); + whlp_text (h, " to another topic."); + whlp_end_para (h); + + sprintf (mymacro, "CBB(\"btn_up\",\"JI(`',`%s')\");EB(\"btn_up\")", + whlp_topic_id (t3)); + + whlp_begin_topic (h, t2, "Second Topic", mymacro, NULL); + + whlp_begin_para (h, WHLP_PARA_SCROLL); + whlp_set_font (h, 1); + whlp_text (h, "This topic contains no non-scrolling region. I would" + " illustrate this with a ludicrously long paragraph, but that" + " would get very tedious very quickly. Instead I'll just waffle" + " on pointlessly for a little bit and then shut up."); + whlp_end_para (h); + + whlp_set_tabstop (h, 36, WHLP_ALIGN_LEFT); + whlp_para_attr (h, WHLP_PARA_LEFTINDENT, 36); + whlp_para_attr (h, WHLP_PARA_FIRSTLINEINDENT, -36); + whlp_para_attr (h, WHLP_PARA_SPACEABOVE, 12); + whlp_begin_para (h, WHLP_PARA_SCROLL); + whlp_set_font (h, 1); + whlp_text (h, "\225"); /* bullet */ + whlp_tab (h); + whlp_text (h, "This is a paragraph with a bullet. With any luck it should" + " work exactly like it used to in the old NASM help file."); + whlp_end_para (h); + + whlp_set_tabstop (h, 128, WHLP_ALIGN_RIGHT); + whlp_set_tabstop (h, 256, WHLP_ALIGN_CENTRE); + whlp_set_tabstop (h, 384, WHLP_ALIGN_LEFT); + whlp_para_attr (h, WHLP_PARA_SPACEABOVE, 12); + whlp_begin_para (h, WHLP_PARA_SCROLL); + whlp_set_font (h, 1); + whlp_text (h, "Ooh:"); + whlp_tab (h); + whlp_text (h, "Right?"); + whlp_tab (h); + whlp_text (h, "Centre?"); + whlp_tab (h); + whlp_text (h, "Left?"); + whlp_end_para (h); + + whlp_set_tabstop (h, 128, WHLP_ALIGN_RIGHT); + whlp_set_tabstop (h, 256, WHLP_ALIGN_CENTRE); + whlp_set_tabstop (h, 384, WHLP_ALIGN_LEFT); + whlp_begin_para (h, WHLP_PARA_SCROLL); + whlp_set_font (h, 1); + whlp_text (h, "Aah:"); + whlp_tab (h); + whlp_text (h, "R?"); + whlp_tab (h); + whlp_text (h, "C?"); + whlp_tab (h); + whlp_text (h, "L?"); + whlp_end_para (h); + + sprintf (mymacro, "CBB(\"btn_up\",\"JI(`',`%s')\");EB(\"btn_up\")", + whlp_topic_id (t1)); + + whlp_begin_topic (h, t3, "Third Topic", mymacro, NULL); + + whlp_begin_para (h, WHLP_PARA_SCROLL); + whlp_set_font (h, 1); + whlp_text (h, "This third topic is almost as boring as the first. Woo!"); + whlp_end_para (h); + + /* + * Browse sequence. + */ + whlp_browse_link (h, t1, t2); + whlp_browse_link (h, t2, t3); + + /* + * Index terms. + */ + whlp_index_term (h, "foobarbaz", t1); + whlp_index_term (h, "foobarbaz", t2); + whlp_index_term (h, "foobarbaz", t3); + whlp_index_term (h, "foobar", t1); + whlp_index_term (h, "foobar", t2); + whlp_index_term (h, "foobaz", t1); + whlp_index_term (h, "foobaz", t3); + whlp_index_term (h, "barbaz", t2); + whlp_index_term (h, "barbaz", t3); + whlp_index_term (h, "foo", t1); + whlp_index_term (h, "bar", t2); + whlp_index_term (h, "baz", t3); + + whlp_close (h, "test.hlp"); + return 0; +} + +#endif diff --git a/Docs/src/bin/halibut/winhelp.h b/Docs/src/bin/halibut/winhelp.h new file mode 100644 index 00000000..7780de2f --- /dev/null +++ b/Docs/src/bin/halibut/winhelp.h @@ -0,0 +1,173 @@ +/* + * winhelp.h header file for winhelp.c + */ + +typedef struct WHLP_tag *WHLP; + +typedef struct WHLP_TOPIC_tag *WHLP_TOPIC; + +/* + * Initialise a new WHlp context and begin accumulating data in it. + */ +WHLP whlp_new (void); + +/* + * Close a WHlp context and write out the help file it has created. + */ +void whlp_close (WHLP h, char *filename); + +/* + * Abandon and free a WHlp context without writing out anything. + */ +void whlp_abandon (WHLP h); + +/* + * Specify the title and copyright notice of a help file. Also + * specify Help macros to be run on loading. + */ +void whlp_title (WHLP h, char *title); +void whlp_copyright (WHLP h, char *copyright); +void whlp_start_macro (WHLP h, char *macro); + +/* + * Register a help topic. Irritatingly, due to weird phase-order + * issues with the whole file format, you have to register all your + * topics _before_ actually outputting your text. This seems likely + * to require two passes over the source document. + * + * If you want to specify a particular context string (for + * reference from other programs, to provide context-sensitive + * help), you can supply it here. Otherwise, just pass NULL and a + * nondescript one will be allocated automatically. + * + * If you specify two context strings which clash under the Windows + * help file hash algorithm, this function will return NULL and + * provide a pointer to the other context string that this one + * clashed with, and you must tell your user to fix the clash. + * Sadly this is the only way to do it; despite HLP files having a + * perfectly good method of mapping arbitrary strings to things, + * they didn't see fit to use that method for help contexts, so + * instead they hash the context names and expect the hashes to be + * unique. Sigh. + * + * On success (i.e. in any circumstance other than a hash clash), a + * valid WHLP_TOPIC is returned for later use. + */ +WHLP_TOPIC whlp_register_topic (WHLP h, char *context_name, char **clash); + +/* + * Link two topics together in a browse sequence. Automatically + * takes care of the forward and reverse links. + */ +void whlp_browse_link (WHLP h, WHLP_TOPIC before, WHLP_TOPIC after); + +/* + * After calling whlp_register_topic for all topics, you should + * call this, which will sort out all loose ends and allocate + * context names for all anonymous topics. Then you can start + * writing actual text. + */ +void whlp_prepare (WHLP h); + +/* + * Create a link from an index term to a topic. + */ +void whlp_index_term (WHLP h, char *index, WHLP_TOPIC topic); + +/* + * Call this if you need the id of a topic and you don't already + * know it (for example, if whlp_prepare has allocated it + * anonymously for you). You might need this, for example, in + * creating macros for button-bar bindings. + * + * The string returned will be freed when the WHLP context is + * closed. You should not free it yourself. + * + * Do not call this before calling whlp_prepare(). + */ +char *whlp_topic_id (WHLP_TOPIC topic); + +/* + * Call this to specify which help topic will be the first one + * displayed when the help file is loaded. + */ +void whlp_primary_topic (WHLP h, WHLP_TOPIC topic); + +/* + * Call this when about to begin writing out the text for a topic. + * + * Any additional arguments are Help macros, terminated with a + * NULL. So the minimum call sequence is + * + * whlp_begin_topic(helpfile, mytopic, "Title", NULL); + */ +void whlp_begin_topic (WHLP h, WHLP_TOPIC topic, char *title, ...); + +/* + * Call this to set up a font descriptor. You supply the font name, + * the font size (in half-points), the graphic rendition flags + * (bold, italic etc), and the general font family (for Windows to + * select a fallback font if yours is unavailable). You can also + * specify a foreground colour for the text (but unfortunately not + * a background). + * + * Font descriptors are identified in whlp_set_font() by small + * integers, which are allocated from 0 upwards in the order you + * call whlp_create_font(). For your convenience, + * whlp_create_font() returns the integer allocated to each font + * descriptor you create, but you could work this out just as + * easily yourself by counting. + */ +enum +{ + WHLP_FONT_BOLD = 1, + WHLP_FONT_ITALIC = 2, + WHLP_FONT_UNDERLINE = 4, + WHLP_FONT_STRIKEOUT = 8, + WHLP_FONT_DOUBLEUND = 16, + WHLP_FONT_SMALLCAPS = 32 +}; +enum +{ + WHLP_FONTFAM_FIXED = 1, + WHLP_FONTFAM_SERIF = 2, + WHLP_FONTFAM_SANS = 3, + WHLP_FONTFAM_SCRIPT = 4, + WHLP_FONTFAM_DECOR = 5 +}; +int whlp_create_font (WHLP h, char *font, int family, int halfpoints, + int rendition, int r, int g, int b); + +/* + * Routines to output paragraphs and actual text (at last). + * + * You should start by calling whlp_para_attr() to set any + * paragraph attributes that differ from the standard settings. + * Next call whlp_begin_para() to start the paragraph. Then call + * the various in-paragraph functions until you have output the + * whole paragraph, and finally call whlp_end_para() to finish it + * off. + */ +enum +{ + WHLP_PARA_SPACEABOVE = 1, WHLP_PARA_SPACEBELOW, WHLP_PARA_SPACELINES, + WHLP_PARA_LEFTINDENT, WHLP_PARA_RIGHTINDENT, WHLP_PARA_FIRSTLINEINDENT, + WHLP_PARA_ALIGNMENT +}; +enum +{ + WHLP_ALIGN_LEFT, WHLP_ALIGN_RIGHT, WHLP_ALIGN_CENTRE +}; +enum +{ + WHLP_PARA_SCROLL, WHLP_PARA_NONSCROLL +}; +void whlp_para_attr (WHLP h, int attr_id, int attr_param); +void whlp_set_tabstop (WHLP h, int tabstop, int alignment); +void whlp_begin_para (WHLP h, int para_type); +void whlp_end_para (WHLP h); +void whlp_set_font (WHLP h, int font_id); +void whlp_text (WHLP h, char *text); +void whlp_start_hyperlink (WHLP h, WHLP_TOPIC target); +void whlp_end_hyperlink (WHLP h); +void whlp_tab (WHLP h);