Print this page
9718 update mandoc to 1.14.4
   1 /*      $Id: html.c,v 1.219 2017/07/15 17:57:51 schwarze Exp $ */
   2 /*
   3  * Copyright (c) 2008-2011, 2014 Kristaps Dzonsons <kristaps@bsd.lv>
   4  * Copyright (c) 2011-2015, 2017 Ingo Schwarze <schwarze@openbsd.org>
   5  *
   6  * Permission to use, copy, modify, and distribute this software for any
   7  * purpose with or without fee is hereby granted, provided that the above
   8  * copyright notice and this permission notice appear in all copies.
   9  *
  10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
  11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
  12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
  13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
  14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
  15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
  16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
  17  */
  18 #include "config.h"
  19 
  20 #include <sys/types.h>
  21 
  22 #include <assert.h>
  23 #include <ctype.h>
  24 #include <stdarg.h>

  25 #include <stdio.h>
  26 #include <stdint.h>
  27 #include <stdlib.h>
  28 #include <string.h>
  29 #include <unistd.h>
  30 
  31 #include "mandoc_aux.h"

  32 #include "mandoc.h"
  33 #include "roff.h"
  34 #include "out.h"
  35 #include "html.h"
  36 #include "manconf.h"
  37 #include "main.h"
  38 
  39 struct  htmldata {
  40         const char       *name;
  41         int               flags;
  42 #define HTML_NOSTACK     (1 << 0)
  43 #define HTML_AUTOCLOSE   (1 << 1)
  44 #define HTML_NLBEFORE    (1 << 2)
  45 #define HTML_NLBEGIN     (1 << 3)
  46 #define HTML_NLEND       (1 << 4)
  47 #define HTML_NLAFTER     (1 << 5)
  48 #define HTML_NLAROUND    (HTML_NLBEFORE | HTML_NLAFTER)
  49 #define HTML_NLINSIDE    (HTML_NLBEGIN | HTML_NLEND)
  50 #define HTML_NLALL       (HTML_NLAROUND | HTML_NLINSIDE)
  51 #define HTML_INDENT      (1 << 6)
  52 #define HTML_NOINDENT    (1 << 7)
  53 };
  54 
  55 static  const struct htmldata htmltags[TAG_MAX] = {
  56         {"html",        HTML_NLALL},
  57         {"head",        HTML_NLALL | HTML_INDENT},
  58         {"body",        HTML_NLALL},
  59         {"meta",        HTML_NOSTACK | HTML_AUTOCLOSE | HTML_NLALL},
  60         {"title",       HTML_NLAROUND},
  61         {"div",         HTML_NLAROUND},

  62         {"h1",          HTML_NLAROUND},
  63         {"h2",          HTML_NLAROUND},
  64         {"span",        0},
  65         {"link",        HTML_NOSTACK | HTML_AUTOCLOSE | HTML_NLALL},
  66         {"br",          HTML_NOSTACK | HTML_AUTOCLOSE | HTML_NLALL},
  67         {"a",           0},
  68         {"table",       HTML_NLALL | HTML_INDENT},
  69         {"colgroup",    HTML_NLALL | HTML_INDENT},
  70         {"col",         HTML_NOSTACK | HTML_AUTOCLOSE | HTML_NLALL},
  71         {"tr",          HTML_NLALL | HTML_INDENT},
  72         {"td",          HTML_NLAROUND},
  73         {"li",          HTML_NLAROUND | HTML_INDENT},
  74         {"ul",          HTML_NLALL | HTML_INDENT},
  75         {"ol",          HTML_NLALL | HTML_INDENT},
  76         {"dl",          HTML_NLALL | HTML_INDENT},
  77         {"dt",          HTML_NLAROUND},
  78         {"dd",          HTML_NLAROUND | HTML_INDENT},
  79         {"pre",         HTML_NLALL | HTML_NOINDENT},
  80         {"var",         0},
  81         {"cite",        0},
  82         {"b",           0},
  83         {"i",           0},
  84         {"code",        0},
  85         {"small",       0},
  86         {"style",       HTML_NLALL | HTML_INDENT},
  87         {"math",        HTML_NLALL | HTML_INDENT},
  88         {"mrow",        0},
  89         {"mi",          0},
  90         {"mn",          0},
  91         {"mo",          0},
  92         {"msup",        0},
  93         {"msub",        0},
  94         {"msubsup",     0},
  95         {"mfrac",       0},
  96         {"msqrt",       0},
  97         {"mfenced",     0},
  98         {"mtable",      0},
  99         {"mtr",         0},
 100         {"mtd",         0},
 101         {"munderover",  0},
 102         {"munder",      0},
 103         {"mover",       0},
 104 };
 105 
 106 static  const char      *const roffscales[SCALE_MAX] = {
 107         "cm", /* SCALE_CM */
 108         "in", /* SCALE_IN */
 109         "pc", /* SCALE_PC */
 110         "pt", /* SCALE_PT */
 111         "em", /* SCALE_EM */
 112         "em", /* SCALE_MM */
 113         "ex", /* SCALE_EN */
 114         "ex", /* SCALE_BU */
 115         "em", /* SCALE_VS */
 116         "ex", /* SCALE_FS */
 117 };
 118 
 119 static  void     a2width(const char *, struct roffsu *);
 120 static  void     print_byte(struct html *, char);
 121 static  void     print_endword(struct html *);
 122 static  void     print_indent(struct html *);
 123 static  void     print_word(struct html *, const char *);
 124 
 125 static  void     print_ctag(struct html *, struct tag *);
 126 static  int      print_escape(struct html *, char);
 127 static  int      print_encode(struct html *, const char *, const char *, int);
 128 static  void     print_href(struct html *, const char *, const char *, int);
 129 static  void     print_metaf(struct html *, enum mandoc_esc);
 130 
 131 
 132 void *
 133 html_alloc(const struct manoutput *outopts)
 134 {
 135         struct html     *h;
 136 
 137         h = mandoc_calloc(1, sizeof(struct html));
 138 
 139         h->tag = NULL;
 140         h->style = outopts->style;
 141         h->base_man = outopts->man;
 142         h->base_includes = outopts->includes;
 143         if (outopts->fragment)
 144                 h->oflags |= HTML_FRAGMENT;
 145 


 146         return h;
 147 }
 148 
 149 void
 150 html_free(void *p)
 151 {
 152         struct tag      *tag;
 153         struct html     *h;


 154 
 155         h = (struct html *)p;
 156 
 157         while ((tag = h->tag) != NULL) {
 158                 h->tag = tag->next;
 159                 free(tag);
 160         }
 161 
 162         free(h);







 163 }
 164 
 165 void
 166 print_gen_head(struct html *h)
 167 {
 168         struct tag      *t;
 169 
 170         print_otag(h, TAG_META, "?", "charset", "utf-8");





 171 
 172         /*
 173          * Print a default style-sheet.
 174          */
 175 
 176         t = print_otag(h, TAG_STYLE, "");
 177         print_text(h, "table.head, table.foot { width: 100%; }");
 178         print_endline(h);
 179         print_text(h, "td.head-rtitle, td.foot-os { text-align: right; }");
 180         print_endline(h);
 181         print_text(h, "td.head-vol { text-align: center; }");
 182         print_endline(h);
 183         print_text(h, "div.Pp { margin: 1ex 0ex; }");
















 184         print_tagq(h, t);
 185 
 186         if (h->style)
 187                 print_otag(h, TAG_LINK, "?h??", "rel", "stylesheet",
 188                     h->style, "type", "text/css", "media", "all");
 189 }
 190 
 191 static void
 192 print_metaf(struct html *h, enum mandoc_esc deco)
 193 {
 194         enum htmlfont    font;
 195 
 196         switch (deco) {
 197         case ESCAPE_FONTPREV:
 198                 font = h->metal;
 199                 break;
 200         case ESCAPE_FONTITALIC:
 201                 font = HTMLFONT_ITALIC;
 202                 break;
 203         case ESCAPE_FONTBOLD:
 204                 font = HTMLFONT_BOLD;
 205                 break;
 206         case ESCAPE_FONTBI:
 207                 font = HTMLFONT_BI;
 208                 break;


 222         h->metal = h->metac;
 223         h->metac = font;
 224 
 225         switch (font) {
 226         case HTMLFONT_ITALIC:
 227                 h->metaf = print_otag(h, TAG_I, "");
 228                 break;
 229         case HTMLFONT_BOLD:
 230                 h->metaf = print_otag(h, TAG_B, "");
 231                 break;
 232         case HTMLFONT_BI:
 233                 h->metaf = print_otag(h, TAG_B, "");
 234                 print_otag(h, TAG_I, "");
 235                 break;
 236         default:
 237                 break;
 238         }
 239 }
 240 
 241 char *
 242 html_make_id(const struct roff_node *n)
 243 {
 244         const struct roff_node  *nch;
 245         char                    *buf, *cp;


 246 
 247         for (nch = n->child; nch != NULL; nch = nch->next)
 248                 if (nch->type != ROFFT_TEXT)
 249                         return NULL;
 250 
 251         buf = NULL;
 252         deroff(&buf, n);


 253 
 254         /* http://www.w3.org/TR/html5/dom.html#the-id-attribute */





 255 
 256         for (cp = buf; *cp != '\0'; cp++)
 257                 if (*cp == ' ')

 258                         *cp = '_';
 259 

 260         return buf;
 261 }
 262 
 263 int
 264 html_strlen(const char *cp)
 265 {
 266         size_t           rsz;
 267         int              skip, sz;
 268 
 269         /*
 270          * Account for escaped sequences within string length
 271          * calculations.  This follows the logic in term_strlen() as we
 272          * must calculate the width of produced strings.
 273          * Assume that characters are always width of "1".  This is
 274          * hacky, but it gets the job done for approximation of widths.
 275          */
 276 
 277         sz = 0;
 278         skip = 0;
 279         while (1) {
 280                 rsz = strcspn(cp, "\\");
 281                 if (rsz) {
 282                         cp += rsz;
 283                         if (skip) {
 284                                 skip = 0;
 285                                 rsz--;
 286                         }
 287                         sz += rsz;


 288                 }
 289                 if ('\0' == *cp)
 290                         break;
 291                 cp++;
 292                 switch (mandoc_escape(&cp, NULL, NULL)) {
 293                 case ESCAPE_ERROR:
 294                         return sz;
 295                 case ESCAPE_UNICODE:
 296                 case ESCAPE_NUMBERED:
 297                 case ESCAPE_SPECIAL:
 298                 case ESCAPE_OVERSTRIKE:
 299                         if (skip)
 300                                 skip = 0;
 301                         else
 302                                 sz++;
 303                         break;
 304                 case ESCAPE_SKIPCHAR:
 305                         skip = 1;
 306                         break;
 307                 default:
 308                         break;
 309                 }
 310         }
 311         return sz;
 312 }
 313 
 314 static int
 315 print_escape(struct html *h, char c)
 316 {
 317 
 318         switch (c) {
 319         case '<':
 320                 print_word(h, "&lt;");
 321                 break;
 322         case '>':
 323                 print_word(h, "&gt;");
 324                 break;
 325         case '&':
 326                 print_word(h, "&amp;");
 327                 break;
 328         case '"':
 329                 print_word(h, "&quot;");
 330                 break;
 331         case ASCII_NBRSP:


 473                 if (man && p[1] == 'S') {
 474                         if (sec == NULL)
 475                                 print_byte(h, '1');
 476                         else
 477                                 print_encode(h, sec, NULL, 1);
 478                 } else if ((man && p[1] == 'N') ||
 479                     (man == 0 && p[1] == 'I'))
 480                         print_encode(h, name, NULL, 1);
 481                 else
 482                         print_encode(h, p, p + 2, 1);
 483                 pp = p + 2;
 484         }
 485         if (*pp != '\0')
 486                 print_encode(h, pp, NULL, 1);
 487 }
 488 
 489 struct tag *
 490 print_otag(struct html *h, enum htmltag tag, const char *fmt, ...)
 491 {
 492         va_list          ap;
 493         struct roffsu    mysu, *su;
 494         char             numbuf[16];
 495         struct tag      *t;
 496         const char      *attr;
 497         char            *arg1, *arg2;
 498         double           v;
 499         int              i, have_style, tflags;
 500 
 501         tflags = htmltags[tag].flags;
 502 
 503         /* Push this tag onto the stack of open scopes. */
 504 
 505         if ((tflags & HTML_NOSTACK) == 0) {
 506                 t = mandoc_malloc(sizeof(struct tag));
 507                 t->tag = tag;
 508                 t->next = h->tag;
 509                 h->tag = t;
 510         } else
 511                 t = NULL;
 512 
 513         if (tflags & HTML_NLBEFORE)
 514                 print_endline(h);
 515         if (h->col == 0)
 516                 print_indent(h);
 517         else if ((h->flags & HTML_NOSPACE) == 0) {
 518                 if (h->flags & HTML_KEEP)
 519                         print_word(h, "&#x00A0;");
 520                 else {
 521                         if (h->flags & HTML_PREKEEP)
 522                                 h->flags |= HTML_KEEP;
 523                         print_endword(h);
 524                 }
 525         }
 526 
 527         if ( ! (h->flags & HTML_NONOSPACE))
 528                 h->flags &= ~HTML_NOSPACE;
 529         else
 530                 h->flags |= HTML_NOSPACE;
 531 
 532         /* Print out the tag name and attributes. */
 533 
 534         print_byte(h, '<');
 535         print_word(h, htmltags[tag].name);
 536 
 537         va_start(ap, fmt);
 538 
 539         have_style = 0;
 540         while (*fmt != '\0') {
 541                 if (*fmt == 's') {
 542                         have_style = 1;
 543                         fmt++;
 544                         break;
 545                 }
 546 
 547                 /* Parse a non-style attribute and its arguments. */
 548 
 549                 arg1 = va_arg(ap, char *);

 550                 switch (*fmt++) {
 551                 case 'c':
 552                         attr = "class";
 553                         break;
 554                 case 'h':
 555                         attr = "href";
 556                         break;
 557                 case 'i':
 558                         attr = "id";
 559                         break;




 560                 case '?':
 561                         attr = arg1;
 562                         arg1 = va_arg(ap, char *);
 563                         break;
 564                 default:
 565                         abort();
 566                 }
 567                 arg2 = NULL;
 568                 if (*fmt == 'M')
 569                         arg2 = va_arg(ap, char *);
 570                 if (arg1 == NULL)
 571                         continue;
 572 
 573                 /* Print the non-style attributes. */
 574 
 575                 print_byte(h, ' ');
 576                 print_word(h, attr);
 577                 print_byte(h, '=');
 578                 print_byte(h, '"');
 579                 switch (*fmt) {
 580                 case 'I':
 581                         print_href(h, arg1, NULL, 0);
 582                         fmt++;
 583                         break;
 584                 case 'M':
 585                         print_href(h, arg1, arg2, 1);
 586                         fmt++;
 587                         break;
 588                 case 'R':
 589                         print_byte(h, '#');
 590                         print_encode(h, arg1, NULL, 1);
 591                         fmt++;
 592                         break;
 593                 case 'T':
 594                         print_encode(h, arg1, NULL, 1);
 595                         print_word(h, "\" title=\"");
 596                         print_encode(h, arg1, NULL, 1);
 597                         fmt++;
 598                         break;
 599                 default:

 600                         print_encode(h, arg1, NULL, 1);
 601                         break;
 602                 }
 603                 print_byte(h, '"');
 604         }
 605 
 606         /* Print out styles. */
 607 
 608         while (*fmt != '\0') {
 609                 arg1 = NULL;
 610                 su = NULL;
 611 
 612                 /* First letter: input argument type. */
 613 
 614                 switch (*fmt++) {
 615                 case 'h':
 616                         i = va_arg(ap, int);
 617                         su = &mysu;
 618                         SCALE_HS_INIT(su, i);
 619                         break;
 620                 case 's':
 621                         arg1 = va_arg(ap, char *);
 622                         break;
 623                 case 'u':
 624                         su = va_arg(ap, struct roffsu *);
 625                         break;
 626                 case 'w':
 627                         if ((arg2 = va_arg(ap, char *)) != NULL) {
 628                                 su = &mysu;
 629                                 a2width(arg2, su);
 630                         }
 631                         if (*fmt == '*') {
 632                                 if (su != NULL && su->unit == SCALE_EN &&
 633                                     su->scale > 5.9 && su->scale < 6.1)
 634                                         su = NULL;
 635                                 fmt++;
 636                         }
 637                         if (*fmt == '+') {
 638                                 if (su != NULL) {
 639                                         /* Make even bold text fit. */
 640                                         su->scale *= 1.2;
 641                                         /* Add padding. */
 642                                         su->scale += 3.0;
 643                                 }
 644                                 fmt++;
 645                         }
 646                         if (*fmt == '-') {
 647                                 if (su != NULL)
 648                                         su->scale *= -1.0;
 649                                 fmt++;
 650                         }
 651                         break;
 652                 default:
 653                         abort();
 654                 }
 655 
 656                 /* Second letter: style name. */
 657 
 658                 switch (*fmt++) {
 659                 case 'h':
 660                         attr = "height";
 661                         break;
 662                 case 'i':
 663                         attr = "text-indent";
 664                         break;
 665                 case 'l':
 666                         attr = "margin-left";
 667                         break;
 668                 case 'w':
 669                         attr = "width";
 670                         break;
 671                 case 'W':
 672                         attr = "min-width";
 673                         break;
 674                 case '?':
 675                         attr = arg1;
 676                         arg1 = va_arg(ap, char *);
 677                         break;
 678                 default:
 679                         abort();
 680                 }
 681                 if (su == NULL && arg1 == NULL)
 682                         continue;
 683 
 684                 if (have_style == 1)
 685                         print_word(h, " style=\"");
 686                 else
 687                         print_byte(h, ' ');
 688                 print_word(h, attr);
 689                 print_byte(h, ':');
 690                 print_byte(h, ' ');
 691                 if (su != NULL) {
 692                         v = su->scale;
 693                         if (su->unit == SCALE_MM && (v /= 100.0) == 0.0)
 694                                 v = 1.0;
 695                         else if (su->unit == SCALE_BU)
 696                                 v /= 24.0;
 697                         (void)snprintf(numbuf, sizeof(numbuf), "%.2f", v);
 698                         print_word(h, numbuf);
 699                         print_word(h, roffscales[su->unit]);
 700                 } else
 701                         print_word(h, arg1);
 702                 print_byte(h, ';');
 703                 have_style = 2;
 704         }
 705         if (have_style == 2)

 706                 print_byte(h, '"');
 707 
 708         va_end(ap);
 709 
 710         /* Accommodate for "well-formed" singleton escaping. */
 711 
 712         if (HTML_AUTOCLOSE & htmltags[tag].flags)
 713                 print_byte(h, '/');
 714 
 715         print_byte(h, '>');
 716 
 717         if (tflags & HTML_NLBEGIN)
 718                 print_endline(h);
 719         else
 720                 h->flags |= HTML_NOSPACE;
 721 
 722         if (tflags & HTML_INDENT)
 723                 h->indent++;
 724         if (tflags & HTML_NOINDENT)
 725                 h->noindent++;
 726 
 727         return t;


 752         print_indent(h);
 753         print_byte(h, '<');
 754         print_byte(h, '/');
 755         print_word(h, htmltags[tag->tag].name);
 756         print_byte(h, '>');
 757         if (tflags & HTML_NLAFTER)
 758                 print_endline(h);
 759 
 760         h->tag = tag->next;
 761         free(tag);
 762 }
 763 
 764 void
 765 print_gen_decls(struct html *h)
 766 {
 767         print_word(h, "<!DOCTYPE html>");
 768         print_endline(h);
 769 }
 770 
 771 void


























 772 print_text(struct html *h, const char *word)
 773 {
 774         if (h->col && (h->flags & HTML_NOSPACE) == 0) {
 775                 if ( ! (HTML_KEEP & h->flags)) {
 776                         if (HTML_PREKEEP & h->flags)
 777                                 h->flags |= HTML_KEEP;
 778                         print_endword(h);
 779                 } else
 780                         print_word(h, "&#x00A0;");
 781         }
 782 
 783         assert(NULL == h->metaf);
 784         switch (h->metac) {
 785         case HTMLFONT_ITALIC:
 786                 h->metaf = print_otag(h, TAG_I, "");
 787                 break;
 788         case HTMLFONT_BOLD:
 789                 h->metaf = print_otag(h, TAG_B, "");
 790                 break;
 791         case HTMLFONT_BI:


 941         if (h->col)
 942                 return;
 943 
 944         if (h->noindent == 0) {
 945                 h->col = h->indent * 2;
 946                 for (i = 0; i < h->col; i++)
 947                         putchar(' ');
 948         }
 949         h->flags &= ~HTML_NOSPACE;
 950 }
 951 
 952 /*
 953  * Print or buffer some characters
 954  * depending on the current HTML output buffer state.
 955  */
 956 static void
 957 print_word(struct html *h, const char *cp)
 958 {
 959         while (*cp != '\0')
 960                 print_byte(h, *cp++);
 961 }
 962 
 963 /*
 964  * Calculate the scaling unit passed in a `-width' argument.  This uses
 965  * either a native scaling unit (e.g., 1i, 2m) or the string length of
 966  * the value.
 967  */
 968 static void
 969 a2width(const char *p, struct roffsu *su)
 970 {
 971         const char      *end;
 972 
 973         end = a2roffsu(p, su, SCALE_MAX);
 974         if (end == NULL || *end != '\0') {
 975                 su->unit = SCALE_EN;
 976                 su->scale = html_strlen(p);
 977         } else if (su->scale < 0.0)
 978                 su->scale = 0.0;
 979 }
   1 /*      $Id: html.c,v 1.238 2018/06/25 16:54:59 schwarze Exp $ */
   2 /*
   3  * Copyright (c) 2008-2011, 2014 Kristaps Dzonsons <kristaps@bsd.lv>
   4  * Copyright (c) 2011-2015, 2017, 2018 Ingo Schwarze <schwarze@openbsd.org>
   5  *
   6  * Permission to use, copy, modify, and distribute this software for any
   7  * purpose with or without fee is hereby granted, provided that the above
   8  * copyright notice and this permission notice appear in all copies.
   9  *
  10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
  11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
  12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
  13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
  14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
  15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
  16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
  17  */
  18 #include "config.h"
  19 
  20 #include <sys/types.h>
  21 
  22 #include <assert.h>
  23 #include <ctype.h>
  24 #include <stdarg.h>
  25 #include <stddef.h>
  26 #include <stdio.h>
  27 #include <stdint.h>
  28 #include <stdlib.h>
  29 #include <string.h>
  30 #include <unistd.h>
  31 
  32 #include "mandoc_aux.h"
  33 #include "mandoc_ohash.h"
  34 #include "mandoc.h"
  35 #include "roff.h"
  36 #include "out.h"
  37 #include "html.h"
  38 #include "manconf.h"
  39 #include "main.h"
  40 
  41 struct  htmldata {
  42         const char       *name;
  43         int               flags;
  44 #define HTML_NOSTACK     (1 << 0)
  45 #define HTML_AUTOCLOSE   (1 << 1)
  46 #define HTML_NLBEFORE    (1 << 2)
  47 #define HTML_NLBEGIN     (1 << 3)
  48 #define HTML_NLEND       (1 << 4)
  49 #define HTML_NLAFTER     (1 << 5)
  50 #define HTML_NLAROUND    (HTML_NLBEFORE | HTML_NLAFTER)
  51 #define HTML_NLINSIDE    (HTML_NLBEGIN | HTML_NLEND)
  52 #define HTML_NLALL       (HTML_NLAROUND | HTML_NLINSIDE)
  53 #define HTML_INDENT      (1 << 6)
  54 #define HTML_NOINDENT    (1 << 7)
  55 };
  56 
  57 static  const struct htmldata htmltags[TAG_MAX] = {
  58         {"html",        HTML_NLALL},
  59         {"head",        HTML_NLALL | HTML_INDENT},
  60         {"body",        HTML_NLALL},
  61         {"meta",        HTML_NOSTACK | HTML_AUTOCLOSE | HTML_NLALL},
  62         {"title",       HTML_NLAROUND},
  63         {"div",         HTML_NLAROUND},
  64         {"div",         0},
  65         {"h1",          HTML_NLAROUND},
  66         {"h2",          HTML_NLAROUND},
  67         {"span",        0},
  68         {"link",        HTML_NOSTACK | HTML_AUTOCLOSE | HTML_NLALL},
  69         {"br",          HTML_NOSTACK | HTML_AUTOCLOSE | HTML_NLALL},
  70         {"a",           0},
  71         {"table",       HTML_NLALL | HTML_INDENT},


  72         {"tr",          HTML_NLALL | HTML_INDENT},
  73         {"td",          HTML_NLAROUND},
  74         {"li",          HTML_NLAROUND | HTML_INDENT},
  75         {"ul",          HTML_NLALL | HTML_INDENT},
  76         {"ol",          HTML_NLALL | HTML_INDENT},
  77         {"dl",          HTML_NLALL | HTML_INDENT},
  78         {"dt",          HTML_NLAROUND},
  79         {"dd",          HTML_NLAROUND | HTML_INDENT},
  80         {"pre",         HTML_NLALL | HTML_NOINDENT},
  81         {"var",         0},
  82         {"cite",        0},
  83         {"b",           0},
  84         {"i",           0},
  85         {"code",        0},
  86         {"small",       0},
  87         {"style",       HTML_NLALL | HTML_INDENT},
  88         {"math",        HTML_NLALL | HTML_INDENT},
  89         {"mrow",        0},
  90         {"mi",          0},
  91         {"mn",          0},
  92         {"mo",          0},
  93         {"msup",        0},
  94         {"msub",        0},
  95         {"msubsup",     0},
  96         {"mfrac",       0},
  97         {"msqrt",       0},
  98         {"mfenced",     0},
  99         {"mtable",      0},
 100         {"mtr",         0},
 101         {"mtd",         0},
 102         {"munderover",  0},
 103         {"munder",      0},
 104         {"mover",       0},
 105 };
 106 
 107 /* Avoid duplicate HTML id= attributes. */
 108 static  struct ohash     id_unique;










 109 

 110 static  void     print_byte(struct html *, char);
 111 static  void     print_endword(struct html *);
 112 static  void     print_indent(struct html *);
 113 static  void     print_word(struct html *, const char *);
 114 
 115 static  void     print_ctag(struct html *, struct tag *);
 116 static  int      print_escape(struct html *, char);
 117 static  int      print_encode(struct html *, const char *, const char *, int);
 118 static  void     print_href(struct html *, const char *, const char *, int);
 119 static  void     print_metaf(struct html *, enum mandoc_esc);
 120 
 121 
 122 void *
 123 html_alloc(const struct manoutput *outopts)
 124 {
 125         struct html     *h;
 126 
 127         h = mandoc_calloc(1, sizeof(struct html));
 128 
 129         h->tag = NULL;
 130         h->style = outopts->style;
 131         h->base_man = outopts->man;
 132         h->base_includes = outopts->includes;
 133         if (outopts->fragment)
 134                 h->oflags |= HTML_FRAGMENT;
 135 
 136         mandoc_ohash_init(&id_unique, 4, 0);
 137 
 138         return h;
 139 }
 140 
 141 void
 142 html_free(void *p)
 143 {
 144         struct tag      *tag;
 145         struct html     *h;
 146         char            *cp;
 147         unsigned int     slot;
 148 
 149         h = (struct html *)p;

 150         while ((tag = h->tag) != NULL) {
 151                 h->tag = tag->next;
 152                 free(tag);
 153         }

 154         free(h);
 155 
 156         cp = ohash_first(&id_unique, &slot);
 157         while (cp != NULL) {
 158                 free(cp);
 159                 cp = ohash_next(&id_unique, &slot);
 160         }
 161         ohash_delete(&id_unique);
 162 }
 163 
 164 void
 165 print_gen_head(struct html *h)
 166 {
 167         struct tag      *t;
 168 
 169         print_otag(h, TAG_META, "?", "charset", "utf-8");
 170         if (h->style != NULL) {
 171                 print_otag(h, TAG_LINK, "?h??", "rel", "stylesheet",
 172                     h->style, "type", "text/css", "media", "all");
 173                 return;
 174         }
 175 
 176         /*
 177          * Print a minimal embedded style sheet.
 178          */
 179 
 180         t = print_otag(h, TAG_STYLE, "");
 181         print_text(h, "table.head, table.foot { width: 100%; }");
 182         print_endline(h);
 183         print_text(h, "td.head-rtitle, td.foot-os { text-align: right; }");
 184         print_endline(h);
 185         print_text(h, "td.head-vol { text-align: center; }");
 186         print_endline(h);
 187         print_text(h, "div.Pp { margin: 1ex 0ex; }");
 188         print_endline(h);
 189         print_text(h, "div.Nd, div.Bf, div.Op { display: inline; }");
 190         print_endline(h);
 191         print_text(h, "span.Pa, span.Ad { font-style: italic; }");
 192         print_endline(h);
 193         print_text(h, "span.Ms { font-weight: bold; }");
 194         print_endline(h);
 195         print_text(h, "dl.Bl-diag ");
 196         print_byte(h, '>');
 197         print_text(h, " dt { font-weight: bold; }");
 198         print_endline(h);
 199         print_text(h, "code.Nm, code.Fl, code.Cm, code.Ic, "
 200             "code.In, code.Fd, code.Fn,");
 201         print_endline(h);
 202         print_text(h, "code.Cd { font-weight: bold; "
 203             "font-family: inherit; }");
 204         print_tagq(h, t);




 205 }
 206 
 207 static void
 208 print_metaf(struct html *h, enum mandoc_esc deco)
 209 {
 210         enum htmlfont    font;
 211 
 212         switch (deco) {
 213         case ESCAPE_FONTPREV:
 214                 font = h->metal;
 215                 break;
 216         case ESCAPE_FONTITALIC:
 217                 font = HTMLFONT_ITALIC;
 218                 break;
 219         case ESCAPE_FONTBOLD:
 220                 font = HTMLFONT_BOLD;
 221                 break;
 222         case ESCAPE_FONTBI:
 223                 font = HTMLFONT_BI;
 224                 break;


 238         h->metal = h->metac;
 239         h->metac = font;
 240 
 241         switch (font) {
 242         case HTMLFONT_ITALIC:
 243                 h->metaf = print_otag(h, TAG_I, "");
 244                 break;
 245         case HTMLFONT_BOLD:
 246                 h->metaf = print_otag(h, TAG_B, "");
 247                 break;
 248         case HTMLFONT_BI:
 249                 h->metaf = print_otag(h, TAG_B, "");
 250                 print_otag(h, TAG_I, "");
 251                 break;
 252         default:
 253                 break;
 254         }
 255 }
 256 
 257 char *
 258 html_make_id(const struct roff_node *n, int unique)
 259 {
 260         const struct roff_node  *nch;
 261         char                    *buf, *bufs, *cp;
 262         unsigned int             slot;
 263         int                      suffix;
 264 
 265         for (nch = n->child; nch != NULL; nch = nch->next)
 266                 if (nch->type != ROFFT_TEXT)
 267                         return NULL;
 268 
 269         buf = NULL;
 270         deroff(&buf, n);
 271         if (buf == NULL)
 272                 return NULL;
 273 
 274         /*
 275          * In ID attributes, only use ASCII characters that are
 276          * permitted in URL-fragment strings according to the
 277          * explicit list at:
 278          * https://url.spec.whatwg.org/#url-fragment-string
 279          */
 280 
 281         for (cp = buf; *cp != '\0'; cp++)
 282                 if (isalnum((unsigned char)*cp) == 0 &&
 283                     strchr("!$&'()*+,-./:;=?@_~", *cp) == NULL)
 284                         *cp = '_';
 285 
 286         if (unique == 0)
 287                 return buf;

 288 
 289         /* Avoid duplicate HTML id= attributes. */




 290 
 291         bufs = NULL;
 292         suffix = 1;
 293         slot = ohash_qlookup(&id_unique, buf);
 294         cp = ohash_find(&id_unique, slot);
 295         if (cp != NULL) {
 296                 while (cp != NULL) {
 297                         free(bufs);
 298                         if (++suffix > 127) {
 299                                 free(buf);
 300                                 return NULL;







 301                         }
 302                         mandoc_asprintf(&bufs, "%s_%d", buf, suffix);
 303                         slot = ohash_qlookup(&id_unique, bufs);
 304                         cp = ohash_find(&id_unique, slot);
 305                 }
 306                 free(buf);
 307                 buf = bufs;


















 308         }
 309         ohash_insert(&id_unique, slot, buf);
 310         return buf;
 311 }
 312 
 313 static int
 314 print_escape(struct html *h, char c)
 315 {
 316 
 317         switch (c) {
 318         case '<':
 319                 print_word(h, "&lt;");
 320                 break;
 321         case '>':
 322                 print_word(h, "&gt;");
 323                 break;
 324         case '&':
 325                 print_word(h, "&amp;");
 326                 break;
 327         case '"':
 328                 print_word(h, "&quot;");
 329                 break;
 330         case ASCII_NBRSP:


 472                 if (man && p[1] == 'S') {
 473                         if (sec == NULL)
 474                                 print_byte(h, '1');
 475                         else
 476                                 print_encode(h, sec, NULL, 1);
 477                 } else if ((man && p[1] == 'N') ||
 478                     (man == 0 && p[1] == 'I'))
 479                         print_encode(h, name, NULL, 1);
 480                 else
 481                         print_encode(h, p, p + 2, 1);
 482                 pp = p + 2;
 483         }
 484         if (*pp != '\0')
 485                 print_encode(h, pp, NULL, 1);
 486 }
 487 
 488 struct tag *
 489 print_otag(struct html *h, enum htmltag tag, const char *fmt, ...)
 490 {
 491         va_list          ap;


 492         struct tag      *t;
 493         const char      *attr;
 494         char            *arg1, *arg2;
 495         int              tflags;

 496 
 497         tflags = htmltags[tag].flags;
 498 
 499         /* Push this tag onto the stack of open scopes. */
 500 
 501         if ((tflags & HTML_NOSTACK) == 0) {
 502                 t = mandoc_malloc(sizeof(struct tag));
 503                 t->tag = tag;
 504                 t->next = h->tag;
 505                 h->tag = t;
 506         } else
 507                 t = NULL;
 508 
 509         if (tflags & HTML_NLBEFORE)
 510                 print_endline(h);
 511         if (h->col == 0)
 512                 print_indent(h);
 513         else if ((h->flags & HTML_NOSPACE) == 0) {
 514                 if (h->flags & HTML_KEEP)
 515                         print_word(h, "&#x00A0;");
 516                 else {
 517                         if (h->flags & HTML_PREKEEP)
 518                                 h->flags |= HTML_KEEP;
 519                         print_endword(h);
 520                 }
 521         }
 522 
 523         if ( ! (h->flags & HTML_NONOSPACE))
 524                 h->flags &= ~HTML_NOSPACE;
 525         else
 526                 h->flags |= HTML_NOSPACE;
 527 
 528         /* Print out the tag name and attributes. */
 529 
 530         print_byte(h, '<');
 531         print_word(h, htmltags[tag].name);
 532 
 533         va_start(ap, fmt);
 534 

 535         while (*fmt != '\0') {





 536 
 537                 /* Parse attributes and arguments. */
 538 
 539                 arg1 = va_arg(ap, char *);
 540                 arg2 = NULL;
 541                 switch (*fmt++) {
 542                 case 'c':
 543                         attr = "class";
 544                         break;
 545                 case 'h':
 546                         attr = "href";
 547                         break;
 548                 case 'i':
 549                         attr = "id";
 550                         break;
 551                 case 's':
 552                         attr = "style";
 553                         arg2 = va_arg(ap, char *);
 554                         break;
 555                 case '?':
 556                         attr = arg1;
 557                         arg1 = va_arg(ap, char *);
 558                         break;
 559                 default:
 560                         abort();
 561                 }

 562                 if (*fmt == 'M')
 563                         arg2 = va_arg(ap, char *);
 564                 if (arg1 == NULL)
 565                         continue;
 566 
 567                 /* Print the attributes. */
 568 
 569                 print_byte(h, ' ');
 570                 print_word(h, attr);
 571                 print_byte(h, '=');
 572                 print_byte(h, '"');
 573                 switch (*fmt) {
 574                 case 'I':
 575                         print_href(h, arg1, NULL, 0);
 576                         fmt++;
 577                         break;
 578                 case 'M':
 579                         print_href(h, arg1, arg2, 1);
 580                         fmt++;
 581                         break;
 582                 case 'R':
 583                         print_byte(h, '#');
 584                         print_encode(h, arg1, NULL, 1);
 585                         fmt++;
 586                         break;
 587                 case 'T':
 588                         print_encode(h, arg1, NULL, 1);
 589                         print_word(h, "\" title=\"");
 590                         print_encode(h, arg1, NULL, 1);
 591                         fmt++;
 592                         break;
 593                 default:
 594                         if (arg2 == NULL)
 595                                 print_encode(h, arg1, NULL, 1);
 596                         else {
 597                                 print_word(h, arg1);






















































































 598                                 print_byte(h, ':');
 599                                 print_byte(h, ' ');
 600                                 print_word(h, arg2);










 601                                 print_byte(h, ';');

 602                         }
 603                         break;
 604                 }
 605                 print_byte(h, '"');
 606         }
 607         va_end(ap);
 608 
 609         /* Accommodate for "well-formed" singleton escaping. */
 610 
 611         if (HTML_AUTOCLOSE & htmltags[tag].flags)
 612                 print_byte(h, '/');
 613 
 614         print_byte(h, '>');
 615 
 616         if (tflags & HTML_NLBEGIN)
 617                 print_endline(h);
 618         else
 619                 h->flags |= HTML_NOSPACE;
 620 
 621         if (tflags & HTML_INDENT)
 622                 h->indent++;
 623         if (tflags & HTML_NOINDENT)
 624                 h->noindent++;
 625 
 626         return t;


 651         print_indent(h);
 652         print_byte(h, '<');
 653         print_byte(h, '/');
 654         print_word(h, htmltags[tag->tag].name);
 655         print_byte(h, '>');
 656         if (tflags & HTML_NLAFTER)
 657                 print_endline(h);
 658 
 659         h->tag = tag->next;
 660         free(tag);
 661 }
 662 
 663 void
 664 print_gen_decls(struct html *h)
 665 {
 666         print_word(h, "<!DOCTYPE html>");
 667         print_endline(h);
 668 }
 669 
 670 void
 671 print_gen_comment(struct html *h, struct roff_node *n)
 672 {
 673         int      wantblank;
 674 
 675         print_word(h, "<!-- This is an automatically generated file."
 676             "  Do not edit.");
 677         h->indent = 1;
 678         wantblank = 0;
 679         while (n != NULL && n->type == ROFFT_COMMENT) {
 680                 if (strstr(n->string, "-->") == NULL &&
 681                     (wantblank || *n->string != '\0')) {
 682                         print_endline(h);
 683                         print_indent(h);
 684                         print_word(h, n->string);
 685                         wantblank = *n->string != '\0';
 686                 }
 687                 n = n->next;
 688         }
 689         if (wantblank)
 690                 print_endline(h);
 691         print_word(h, " -->");
 692         print_endline(h);
 693         h->indent = 0;
 694 }
 695 
 696 void
 697 print_text(struct html *h, const char *word)
 698 {
 699         if (h->col && (h->flags & HTML_NOSPACE) == 0) {
 700                 if ( ! (HTML_KEEP & h->flags)) {
 701                         if (HTML_PREKEEP & h->flags)
 702                                 h->flags |= HTML_KEEP;
 703                         print_endword(h);
 704                 } else
 705                         print_word(h, "&#x00A0;");
 706         }
 707 
 708         assert(NULL == h->metaf);
 709         switch (h->metac) {
 710         case HTMLFONT_ITALIC:
 711                 h->metaf = print_otag(h, TAG_I, "");
 712                 break;
 713         case HTMLFONT_BOLD:
 714                 h->metaf = print_otag(h, TAG_B, "");
 715                 break;
 716         case HTMLFONT_BI:


 866         if (h->col)
 867                 return;
 868 
 869         if (h->noindent == 0) {
 870                 h->col = h->indent * 2;
 871                 for (i = 0; i < h->col; i++)
 872                         putchar(' ');
 873         }
 874         h->flags &= ~HTML_NOSPACE;
 875 }
 876 
 877 /*
 878  * Print or buffer some characters
 879  * depending on the current HTML output buffer state.
 880  */
 881 static void
 882 print_word(struct html *h, const char *cp)
 883 {
 884         while (*cp != '\0')
 885                 print_byte(h, *cp++);


















 886 }