1 /*      $Id: html.c,v 1.219 2017/07/15 17:57:51 schwarze Exp $ */
   2 /*
   3  * Copyright (c) 2008-2011, 2014 Kristaps Dzonsons <kristaps@bsd.lv>
   4  * Copyright (c) 2011-2015, 2017 Ingo Schwarze <schwarze@openbsd.org>
   5  *
   6  * Permission to use, copy, modify, and distribute this software for any
   7  * purpose with or without fee is hereby granted, provided that the above
   8  * copyright notice and this permission notice appear in all copies.
   9  *
  10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
  11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
  12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
  13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
  14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
  15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
  16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
  17  */
  18 #include "config.h"
  19 
  20 #include <sys/types.h>
  21 
  22 #include <assert.h>
  23 #include <ctype.h>
  24 #include <stdarg.h>
  25 #include <stdio.h>
  26 #include <stdint.h>
  27 #include <stdlib.h>
  28 #include <string.h>
  29 #include <unistd.h>
  30 
  31 #include "mandoc_aux.h"
  32 #include "mandoc.h"
  33 #include "roff.h"
  34 #include "out.h"
  35 #include "html.h"
  36 #include "manconf.h"
  37 #include "main.h"
  38 
  39 struct  htmldata {
  40         const char       *name;
  41         int               flags;
  42 #define HTML_NOSTACK     (1 << 0)
  43 #define HTML_AUTOCLOSE   (1 << 1)
  44 #define HTML_NLBEFORE    (1 << 2)
  45 #define HTML_NLBEGIN     (1 << 3)
  46 #define HTML_NLEND       (1 << 4)
  47 #define HTML_NLAFTER     (1 << 5)
  48 #define HTML_NLAROUND    (HTML_NLBEFORE | HTML_NLAFTER)
  49 #define HTML_NLINSIDE    (HTML_NLBEGIN | HTML_NLEND)
  50 #define HTML_NLALL       (HTML_NLAROUND | HTML_NLINSIDE)
  51 #define HTML_INDENT      (1 << 6)
  52 #define HTML_NOINDENT    (1 << 7)
  53 };
  54 
  55 static  const struct htmldata htmltags[TAG_MAX] = {
  56         {"html",        HTML_NLALL},
  57         {"head",        HTML_NLALL | HTML_INDENT},
  58         {"body",        HTML_NLALL},
  59         {"meta",        HTML_NOSTACK | HTML_AUTOCLOSE | HTML_NLALL},
  60         {"title",       HTML_NLAROUND},
  61         {"div",         HTML_NLAROUND},
  62         {"h1",          HTML_NLAROUND},
  63         {"h2",          HTML_NLAROUND},
  64         {"span",        0},
  65         {"link",        HTML_NOSTACK | HTML_AUTOCLOSE | HTML_NLALL},
  66         {"br",          HTML_NOSTACK | HTML_AUTOCLOSE | HTML_NLALL},
  67         {"a",           0},
  68         {"table",       HTML_NLALL | HTML_INDENT},
  69         {"colgroup",    HTML_NLALL | HTML_INDENT},
  70         {"col",         HTML_NOSTACK | HTML_AUTOCLOSE | HTML_NLALL},
  71         {"tr",          HTML_NLALL | HTML_INDENT},
  72         {"td",          HTML_NLAROUND},
  73         {"li",          HTML_NLAROUND | HTML_INDENT},
  74         {"ul",          HTML_NLALL | HTML_INDENT},
  75         {"ol",          HTML_NLALL | HTML_INDENT},
  76         {"dl",          HTML_NLALL | HTML_INDENT},
  77         {"dt",          HTML_NLAROUND},
  78         {"dd",          HTML_NLAROUND | HTML_INDENT},
  79         {"pre",         HTML_NLALL | HTML_NOINDENT},
  80         {"var",         0},
  81         {"cite",        0},
  82         {"b",           0},
  83         {"i",           0},
  84         {"code",        0},
  85         {"small",       0},
  86         {"style",       HTML_NLALL | HTML_INDENT},
  87         {"math",        HTML_NLALL | HTML_INDENT},
  88         {"mrow",        0},
  89         {"mi",          0},
  90         {"mn",          0},
  91         {"mo",          0},
  92         {"msup",        0},
  93         {"msub",        0},
  94         {"msubsup",     0},
  95         {"mfrac",       0},
  96         {"msqrt",       0},
  97         {"mfenced",     0},
  98         {"mtable",      0},
  99         {"mtr",         0},
 100         {"mtd",         0},
 101         {"munderover",  0},
 102         {"munder",      0},
 103         {"mover",       0},
 104 };
 105 
 106 static  const char      *const roffscales[SCALE_MAX] = {
 107         "cm", /* SCALE_CM */
 108         "in", /* SCALE_IN */
 109         "pc", /* SCALE_PC */
 110         "pt", /* SCALE_PT */
 111         "em", /* SCALE_EM */
 112         "em", /* SCALE_MM */
 113         "ex", /* SCALE_EN */
 114         "ex", /* SCALE_BU */
 115         "em", /* SCALE_VS */
 116         "ex", /* SCALE_FS */
 117 };
 118 
 119 static  void     a2width(const char *, struct roffsu *);
 120 static  void     print_byte(struct html *, char);
 121 static  void     print_endword(struct html *);
 122 static  void     print_indent(struct html *);
 123 static  void     print_word(struct html *, const char *);
 124 
 125 static  void     print_ctag(struct html *, struct tag *);
 126 static  int      print_escape(struct html *, char);
 127 static  int      print_encode(struct html *, const char *, const char *, int);
 128 static  void     print_href(struct html *, const char *, const char *, int);
 129 static  void     print_metaf(struct html *, enum mandoc_esc);
 130 
 131 
 132 void *
 133 html_alloc(const struct manoutput *outopts)
 134 {
 135         struct html     *h;
 136 
 137         h = mandoc_calloc(1, sizeof(struct html));
 138 
 139         h->tag = NULL;
 140         h->style = outopts->style;
 141         h->base_man = outopts->man;
 142         h->base_includes = outopts->includes;
 143         if (outopts->fragment)
 144                 h->oflags |= HTML_FRAGMENT;
 145 
 146         return h;
 147 }
 148 
 149 void
 150 html_free(void *p)
 151 {
 152         struct tag      *tag;
 153         struct html     *h;
 154 
 155         h = (struct html *)p;
 156 
 157         while ((tag = h->tag) != NULL) {
 158                 h->tag = tag->next;
 159                 free(tag);
 160         }
 161 
 162         free(h);
 163 }
 164 
 165 void
 166 print_gen_head(struct html *h)
 167 {
 168         struct tag      *t;
 169 
 170         print_otag(h, TAG_META, "?", "charset", "utf-8");
 171 
 172         /*
 173          * Print a default style-sheet.
 174          */
 175 
 176         t = print_otag(h, TAG_STYLE, "");
 177         print_text(h, "table.head, table.foot { width: 100%; }");
 178         print_endline(h);
 179         print_text(h, "td.head-rtitle, td.foot-os { text-align: right; }");
 180         print_endline(h);
 181         print_text(h, "td.head-vol { text-align: center; }");
 182         print_endline(h);
 183         print_text(h, "div.Pp { margin: 1ex 0ex; }");
 184         print_tagq(h, t);
 185 
 186         if (h->style)
 187                 print_otag(h, TAG_LINK, "?h??", "rel", "stylesheet",
 188                     h->style, "type", "text/css", "media", "all");
 189 }
 190 
 191 static void
 192 print_metaf(struct html *h, enum mandoc_esc deco)
 193 {
 194         enum htmlfont    font;
 195 
 196         switch (deco) {
 197         case ESCAPE_FONTPREV:
 198                 font = h->metal;
 199                 break;
 200         case ESCAPE_FONTITALIC:
 201                 font = HTMLFONT_ITALIC;
 202                 break;
 203         case ESCAPE_FONTBOLD:
 204                 font = HTMLFONT_BOLD;
 205                 break;
 206         case ESCAPE_FONTBI:
 207                 font = HTMLFONT_BI;
 208                 break;
 209         case ESCAPE_FONT:
 210         case ESCAPE_FONTROMAN:
 211                 font = HTMLFONT_NONE;
 212                 break;
 213         default:
 214                 abort();
 215         }
 216 
 217         if (h->metaf) {
 218                 print_tagq(h, h->metaf);
 219                 h->metaf = NULL;
 220         }
 221 
 222         h->metal = h->metac;
 223         h->metac = font;
 224 
 225         switch (font) {
 226         case HTMLFONT_ITALIC:
 227                 h->metaf = print_otag(h, TAG_I, "");
 228                 break;
 229         case HTMLFONT_BOLD:
 230                 h->metaf = print_otag(h, TAG_B, "");
 231                 break;
 232         case HTMLFONT_BI:
 233                 h->metaf = print_otag(h, TAG_B, "");
 234                 print_otag(h, TAG_I, "");
 235                 break;
 236         default:
 237                 break;
 238         }
 239 }
 240 
 241 char *
 242 html_make_id(const struct roff_node *n)
 243 {
 244         const struct roff_node  *nch;
 245         char                    *buf, *cp;
 246 
 247         for (nch = n->child; nch != NULL; nch = nch->next)
 248                 if (nch->type != ROFFT_TEXT)
 249                         return NULL;
 250 
 251         buf = NULL;
 252         deroff(&buf, n);
 253 
 254         /* http://www.w3.org/TR/html5/dom.html#the-id-attribute */
 255 
 256         for (cp = buf; *cp != '\0'; cp++)
 257                 if (*cp == ' ')
 258                         *cp = '_';
 259 
 260         return buf;
 261 }
 262 
 263 int
 264 html_strlen(const char *cp)
 265 {
 266         size_t           rsz;
 267         int              skip, sz;
 268 
 269         /*
 270          * Account for escaped sequences within string length
 271          * calculations.  This follows the logic in term_strlen() as we
 272          * must calculate the width of produced strings.
 273          * Assume that characters are always width of "1".  This is
 274          * hacky, but it gets the job done for approximation of widths.
 275          */
 276 
 277         sz = 0;
 278         skip = 0;
 279         while (1) {
 280                 rsz = strcspn(cp, "\\");
 281                 if (rsz) {
 282                         cp += rsz;
 283                         if (skip) {
 284                                 skip = 0;
 285                                 rsz--;
 286                         }
 287                         sz += rsz;
 288                 }
 289                 if ('\0' == *cp)
 290                         break;
 291                 cp++;
 292                 switch (mandoc_escape(&cp, NULL, NULL)) {
 293                 case ESCAPE_ERROR:
 294                         return sz;
 295                 case ESCAPE_UNICODE:
 296                 case ESCAPE_NUMBERED:
 297                 case ESCAPE_SPECIAL:
 298                 case ESCAPE_OVERSTRIKE:
 299                         if (skip)
 300                                 skip = 0;
 301                         else
 302                                 sz++;
 303                         break;
 304                 case ESCAPE_SKIPCHAR:
 305                         skip = 1;
 306                         break;
 307                 default:
 308                         break;
 309                 }
 310         }
 311         return sz;
 312 }
 313 
 314 static int
 315 print_escape(struct html *h, char c)
 316 {
 317 
 318         switch (c) {
 319         case '<':
 320                 print_word(h, "&lt;");
 321                 break;
 322         case '>':
 323                 print_word(h, "&gt;");
 324                 break;
 325         case '&':
 326                 print_word(h, "&amp;");
 327                 break;
 328         case '"':
 329                 print_word(h, "&quot;");
 330                 break;
 331         case ASCII_NBRSP:
 332                 print_word(h, "&nbsp;");
 333                 break;
 334         case ASCII_HYPH:
 335                 print_byte(h, '-');
 336                 break;
 337         case ASCII_BREAK:
 338                 break;
 339         default:
 340                 return 0;
 341         }
 342         return 1;
 343 }
 344 
 345 static int
 346 print_encode(struct html *h, const char *p, const char *pend, int norecurse)
 347 {
 348         char             numbuf[16];
 349         struct tag      *t;
 350         const char      *seq;
 351         size_t           sz;
 352         int              c, len, breakline, nospace;
 353         enum mandoc_esc  esc;
 354         static const char rejs[10] = { ' ', '\\', '<', '>', '&', '"',
 355                 ASCII_NBRSP, ASCII_HYPH, ASCII_BREAK, '\0' };
 356 
 357         if (pend == NULL)
 358                 pend = strchr(p, '\0');
 359 
 360         breakline = 0;
 361         nospace = 0;
 362 
 363         while (p < pend) {
 364                 if (HTML_SKIPCHAR & h->flags && '\\' != *p) {
 365                         h->flags &= ~HTML_SKIPCHAR;
 366                         p++;
 367                         continue;
 368                 }
 369 
 370                 for (sz = strcspn(p, rejs); sz-- && p < pend; p++)
 371                         print_byte(h, *p);
 372 
 373                 if (breakline &&
 374                     (p >= pend || *p == ' ' || *p == ASCII_NBRSP)) {
 375                         t = print_otag(h, TAG_DIV, "");
 376                         print_text(h, "\\~");
 377                         print_tagq(h, t);
 378                         breakline = 0;
 379                         while (p < pend && (*p == ' ' || *p == ASCII_NBRSP))
 380                                 p++;
 381                         continue;
 382                 }
 383 
 384                 if (p >= pend)
 385                         break;
 386 
 387                 if (*p == ' ') {
 388                         print_endword(h);
 389                         p++;
 390                         continue;
 391                 }
 392 
 393                 if (print_escape(h, *p++))
 394                         continue;
 395 
 396                 esc = mandoc_escape(&p, &seq, &len);
 397                 if (ESCAPE_ERROR == esc)
 398                         break;
 399 
 400                 switch (esc) {
 401                 case ESCAPE_FONT:
 402                 case ESCAPE_FONTPREV:
 403                 case ESCAPE_FONTBOLD:
 404                 case ESCAPE_FONTITALIC:
 405                 case ESCAPE_FONTBI:
 406                 case ESCAPE_FONTROMAN:
 407                         if (0 == norecurse)
 408                                 print_metaf(h, esc);
 409                         continue;
 410                 case ESCAPE_SKIPCHAR:
 411                         h->flags |= HTML_SKIPCHAR;
 412                         continue;
 413                 default:
 414                         break;
 415                 }
 416 
 417                 if (h->flags & HTML_SKIPCHAR) {
 418                         h->flags &= ~HTML_SKIPCHAR;
 419                         continue;
 420                 }
 421 
 422                 switch (esc) {
 423                 case ESCAPE_UNICODE:
 424                         /* Skip past "u" header. */
 425                         c = mchars_num2uc(seq + 1, len - 1);
 426                         break;
 427                 case ESCAPE_NUMBERED:
 428                         c = mchars_num2char(seq, len);
 429                         if (c < 0)
 430                                 continue;
 431                         break;
 432                 case ESCAPE_SPECIAL:
 433                         c = mchars_spec2cp(seq, len);
 434                         if (c <= 0)
 435                                 continue;
 436                         break;
 437                 case ESCAPE_BREAK:
 438                         breakline = 1;
 439                         continue;
 440                 case ESCAPE_NOSPACE:
 441                         if ('\0' == *p)
 442                                 nospace = 1;
 443                         continue;
 444                 case ESCAPE_OVERSTRIKE:
 445                         if (len == 0)
 446                                 continue;
 447                         c = seq[len - 1];
 448                         break;
 449                 default:
 450                         continue;
 451                 }
 452                 if ((c < 0x20 && c != 0x09) ||
 453                     (c > 0x7E && c < 0xA0))
 454                         c = 0xFFFD;
 455                 if (c > 0x7E) {
 456                         (void)snprintf(numbuf, sizeof(numbuf), "&#x%.4X;", c);
 457                         print_word(h, numbuf);
 458                 } else if (print_escape(h, c) == 0)
 459                         print_byte(h, c);
 460         }
 461 
 462         return nospace;
 463 }
 464 
 465 static void
 466 print_href(struct html *h, const char *name, const char *sec, int man)
 467 {
 468         const char      *p, *pp;
 469 
 470         pp = man ? h->base_man : h->base_includes;
 471         while ((p = strchr(pp, '%')) != NULL) {
 472                 print_encode(h, pp, p, 1);
 473                 if (man && p[1] == 'S') {
 474                         if (sec == NULL)
 475                                 print_byte(h, '1');
 476                         else
 477                                 print_encode(h, sec, NULL, 1);
 478                 } else if ((man && p[1] == 'N') ||
 479                     (man == 0 && p[1] == 'I'))
 480                         print_encode(h, name, NULL, 1);
 481                 else
 482                         print_encode(h, p, p + 2, 1);
 483                 pp = p + 2;
 484         }
 485         if (*pp != '\0')
 486                 print_encode(h, pp, NULL, 1);
 487 }
 488 
 489 struct tag *
 490 print_otag(struct html *h, enum htmltag tag, const char *fmt, ...)
 491 {
 492         va_list          ap;
 493         struct roffsu    mysu, *su;
 494         char             numbuf[16];
 495         struct tag      *t;
 496         const char      *attr;
 497         char            *arg1, *arg2;
 498         double           v;
 499         int              i, have_style, tflags;
 500 
 501         tflags = htmltags[tag].flags;
 502 
 503         /* Push this tag onto the stack of open scopes. */
 504 
 505         if ((tflags & HTML_NOSTACK) == 0) {
 506                 t = mandoc_malloc(sizeof(struct tag));
 507                 t->tag = tag;
 508                 t->next = h->tag;
 509                 h->tag = t;
 510         } else
 511                 t = NULL;
 512 
 513         if (tflags & HTML_NLBEFORE)
 514                 print_endline(h);
 515         if (h->col == 0)
 516                 print_indent(h);
 517         else if ((h->flags & HTML_NOSPACE) == 0) {
 518                 if (h->flags & HTML_KEEP)
 519                         print_word(h, "&#x00A0;");
 520                 else {
 521                         if (h->flags & HTML_PREKEEP)
 522                                 h->flags |= HTML_KEEP;
 523                         print_endword(h);
 524                 }
 525         }
 526 
 527         if ( ! (h->flags & HTML_NONOSPACE))
 528                 h->flags &= ~HTML_NOSPACE;
 529         else
 530                 h->flags |= HTML_NOSPACE;
 531 
 532         /* Print out the tag name and attributes. */
 533 
 534         print_byte(h, '<');
 535         print_word(h, htmltags[tag].name);
 536 
 537         va_start(ap, fmt);
 538 
 539         have_style = 0;
 540         while (*fmt != '\0') {
 541                 if (*fmt == 's') {
 542                         have_style = 1;
 543                         fmt++;
 544                         break;
 545                 }
 546 
 547                 /* Parse a non-style attribute and its arguments. */
 548 
 549                 arg1 = va_arg(ap, char *);
 550                 switch (*fmt++) {
 551                 case 'c':
 552                         attr = "class";
 553                         break;
 554                 case 'h':
 555                         attr = "href";
 556                         break;
 557                 case 'i':
 558                         attr = "id";
 559                         break;
 560                 case '?':
 561                         attr = arg1;
 562                         arg1 = va_arg(ap, char *);
 563                         break;
 564                 default:
 565                         abort();
 566                 }
 567                 arg2 = NULL;
 568                 if (*fmt == 'M')
 569                         arg2 = va_arg(ap, char *);
 570                 if (arg1 == NULL)
 571                         continue;
 572 
 573                 /* Print the non-style attributes. */
 574 
 575                 print_byte(h, ' ');
 576                 print_word(h, attr);
 577                 print_byte(h, '=');
 578                 print_byte(h, '"');
 579                 switch (*fmt) {
 580                 case 'I':
 581                         print_href(h, arg1, NULL, 0);
 582                         fmt++;
 583                         break;
 584                 case 'M':
 585                         print_href(h, arg1, arg2, 1);
 586                         fmt++;
 587                         break;
 588                 case 'R':
 589                         print_byte(h, '#');
 590                         print_encode(h, arg1, NULL, 1);
 591                         fmt++;
 592                         break;
 593                 case 'T':
 594                         print_encode(h, arg1, NULL, 1);
 595                         print_word(h, "\" title=\"");
 596                         print_encode(h, arg1, NULL, 1);
 597                         fmt++;
 598                         break;
 599                 default:
 600                         print_encode(h, arg1, NULL, 1);
 601                         break;
 602                 }
 603                 print_byte(h, '"');
 604         }
 605 
 606         /* Print out styles. */
 607 
 608         while (*fmt != '\0') {
 609                 arg1 = NULL;
 610                 su = NULL;
 611 
 612                 /* First letter: input argument type. */
 613 
 614                 switch (*fmt++) {
 615                 case 'h':
 616                         i = va_arg(ap, int);
 617                         su = &mysu;
 618                         SCALE_HS_INIT(su, i);
 619                         break;
 620                 case 's':
 621                         arg1 = va_arg(ap, char *);
 622                         break;
 623                 case 'u':
 624                         su = va_arg(ap, struct roffsu *);
 625                         break;
 626                 case 'w':
 627                         if ((arg2 = va_arg(ap, char *)) != NULL) {
 628                                 su = &mysu;
 629                                 a2width(arg2, su);
 630                         }
 631                         if (*fmt == '*') {
 632                                 if (su != NULL && su->unit == SCALE_EN &&
 633                                     su->scale > 5.9 && su->scale < 6.1)
 634                                         su = NULL;
 635                                 fmt++;
 636                         }
 637                         if (*fmt == '+') {
 638                                 if (su != NULL) {
 639                                         /* Make even bold text fit. */
 640                                         su->scale *= 1.2;
 641                                         /* Add padding. */
 642                                         su->scale += 3.0;
 643                                 }
 644                                 fmt++;
 645                         }
 646                         if (*fmt == '-') {
 647                                 if (su != NULL)
 648                                         su->scale *= -1.0;
 649                                 fmt++;
 650                         }
 651                         break;
 652                 default:
 653                         abort();
 654                 }
 655 
 656                 /* Second letter: style name. */
 657 
 658                 switch (*fmt++) {
 659                 case 'h':
 660                         attr = "height";
 661                         break;
 662                 case 'i':
 663                         attr = "text-indent";
 664                         break;
 665                 case 'l':
 666                         attr = "margin-left";
 667                         break;
 668                 case 'w':
 669                         attr = "width";
 670                         break;
 671                 case 'W':
 672                         attr = "min-width";
 673                         break;
 674                 case '?':
 675                         attr = arg1;
 676                         arg1 = va_arg(ap, char *);
 677                         break;
 678                 default:
 679                         abort();
 680                 }
 681                 if (su == NULL && arg1 == NULL)
 682                         continue;
 683 
 684                 if (have_style == 1)
 685                         print_word(h, " style=\"");
 686                 else
 687                         print_byte(h, ' ');
 688                 print_word(h, attr);
 689                 print_byte(h, ':');
 690                 print_byte(h, ' ');
 691                 if (su != NULL) {
 692                         v = su->scale;
 693                         if (su->unit == SCALE_MM && (v /= 100.0) == 0.0)
 694                                 v = 1.0;
 695                         else if (su->unit == SCALE_BU)
 696                                 v /= 24.0;
 697                         (void)snprintf(numbuf, sizeof(numbuf), "%.2f", v);
 698                         print_word(h, numbuf);
 699                         print_word(h, roffscales[su->unit]);
 700                 } else
 701                         print_word(h, arg1);
 702                 print_byte(h, ';');
 703                 have_style = 2;
 704         }
 705         if (have_style == 2)
 706                 print_byte(h, '"');
 707 
 708         va_end(ap);
 709 
 710         /* Accommodate for "well-formed" singleton escaping. */
 711 
 712         if (HTML_AUTOCLOSE & htmltags[tag].flags)
 713                 print_byte(h, '/');
 714 
 715         print_byte(h, '>');
 716 
 717         if (tflags & HTML_NLBEGIN)
 718                 print_endline(h);
 719         else
 720                 h->flags |= HTML_NOSPACE;
 721 
 722         if (tflags & HTML_INDENT)
 723                 h->indent++;
 724         if (tflags & HTML_NOINDENT)
 725                 h->noindent++;
 726 
 727         return t;
 728 }
 729 
 730 static void
 731 print_ctag(struct html *h, struct tag *tag)
 732 {
 733         int      tflags;
 734 
 735         /*
 736          * Remember to close out and nullify the current
 737          * meta-font and table, if applicable.
 738          */
 739         if (tag == h->metaf)
 740                 h->metaf = NULL;
 741         if (tag == h->tblt)
 742                 h->tblt = NULL;
 743 
 744         tflags = htmltags[tag->tag].flags;
 745 
 746         if (tflags & HTML_INDENT)
 747                 h->indent--;
 748         if (tflags & HTML_NOINDENT)
 749                 h->noindent--;
 750         if (tflags & HTML_NLEND)
 751                 print_endline(h);
 752         print_indent(h);
 753         print_byte(h, '<');
 754         print_byte(h, '/');
 755         print_word(h, htmltags[tag->tag].name);
 756         print_byte(h, '>');
 757         if (tflags & HTML_NLAFTER)
 758                 print_endline(h);
 759 
 760         h->tag = tag->next;
 761         free(tag);
 762 }
 763 
 764 void
 765 print_gen_decls(struct html *h)
 766 {
 767         print_word(h, "<!DOCTYPE html>");
 768         print_endline(h);
 769 }
 770 
 771 void
 772 print_text(struct html *h, const char *word)
 773 {
 774         if (h->col && (h->flags & HTML_NOSPACE) == 0) {
 775                 if ( ! (HTML_KEEP & h->flags)) {
 776                         if (HTML_PREKEEP & h->flags)
 777                                 h->flags |= HTML_KEEP;
 778                         print_endword(h);
 779                 } else
 780                         print_word(h, "&#x00A0;");
 781         }
 782 
 783         assert(NULL == h->metaf);
 784         switch (h->metac) {
 785         case HTMLFONT_ITALIC:
 786                 h->metaf = print_otag(h, TAG_I, "");
 787                 break;
 788         case HTMLFONT_BOLD:
 789                 h->metaf = print_otag(h, TAG_B, "");
 790                 break;
 791         case HTMLFONT_BI:
 792                 h->metaf = print_otag(h, TAG_B, "");
 793                 print_otag(h, TAG_I, "");
 794                 break;
 795         default:
 796                 print_indent(h);
 797                 break;
 798         }
 799 
 800         assert(word);
 801         if ( ! print_encode(h, word, NULL, 0)) {
 802                 if ( ! (h->flags & HTML_NONOSPACE))
 803                         h->flags &= ~HTML_NOSPACE;
 804                 h->flags &= ~HTML_NONEWLINE;
 805         } else
 806                 h->flags |= HTML_NOSPACE | HTML_NONEWLINE;
 807 
 808         if (h->metaf) {
 809                 print_tagq(h, h->metaf);
 810                 h->metaf = NULL;
 811         }
 812 
 813         h->flags &= ~HTML_IGNDELIM;
 814 }
 815 
 816 void
 817 print_tagq(struct html *h, const struct tag *until)
 818 {
 819         struct tag      *tag;
 820 
 821         while ((tag = h->tag) != NULL) {
 822                 print_ctag(h, tag);
 823                 if (until && tag == until)
 824                         return;
 825         }
 826 }
 827 
 828 void
 829 print_stagq(struct html *h, const struct tag *suntil)
 830 {
 831         struct tag      *tag;
 832 
 833         while ((tag = h->tag) != NULL) {
 834                 if (suntil && tag == suntil)
 835                         return;
 836                 print_ctag(h, tag);
 837         }
 838 }
 839 
 840 void
 841 print_paragraph(struct html *h)
 842 {
 843         struct tag      *t;
 844 
 845         t = print_otag(h, TAG_DIV, "c", "Pp");
 846         print_tagq(h, t);
 847 }
 848 
 849 
 850 /***********************************************************************
 851  * Low level output functions.
 852  * They implement line breaking using a short static buffer.
 853  ***********************************************************************/
 854 
 855 /*
 856  * Buffer one HTML output byte.
 857  * If the buffer is full, flush and deactivate it and start a new line.
 858  * If the buffer is inactive, print directly.
 859  */
 860 static void
 861 print_byte(struct html *h, char c)
 862 {
 863         if ((h->flags & HTML_BUFFER) == 0) {
 864                 putchar(c);
 865                 h->col++;
 866                 return;
 867         }
 868 
 869         if (h->col + h->bufcol < sizeof(h->buf)) {
 870                 h->buf[h->bufcol++] = c;
 871                 return;
 872         }
 873 
 874         putchar('\n');
 875         h->col = 0;
 876         print_indent(h);
 877         putchar(' ');
 878         putchar(' ');
 879         fwrite(h->buf, h->bufcol, 1, stdout);
 880         putchar(c);
 881         h->col = (h->indent + 1) * 2 + h->bufcol + 1;
 882         h->bufcol = 0;
 883         h->flags &= ~HTML_BUFFER;
 884 }
 885 
 886 /*
 887  * If something was printed on the current output line, end it.
 888  * Not to be called right after print_indent().
 889  */
 890 void
 891 print_endline(struct html *h)
 892 {
 893         if (h->col == 0)
 894                 return;
 895 
 896         if (h->bufcol) {
 897                 putchar(' ');
 898                 fwrite(h->buf, h->bufcol, 1, stdout);
 899                 h->bufcol = 0;
 900         }
 901         putchar('\n');
 902         h->col = 0;
 903         h->flags |= HTML_NOSPACE;
 904         h->flags &= ~HTML_BUFFER;
 905 }
 906 
 907 /*
 908  * Flush the HTML output buffer.
 909  * If it is inactive, activate it.
 910  */
 911 static void
 912 print_endword(struct html *h)
 913 {
 914         if (h->noindent) {
 915                 print_byte(h, ' ');
 916                 return;
 917         }
 918 
 919         if ((h->flags & HTML_BUFFER) == 0) {
 920                 h->col++;
 921                 h->flags |= HTML_BUFFER;
 922         } else if (h->bufcol) {
 923                 putchar(' ');
 924                 fwrite(h->buf, h->bufcol, 1, stdout);
 925                 h->col += h->bufcol + 1;
 926         }
 927         h->bufcol = 0;
 928 }
 929 
 930 /*
 931  * If at the beginning of a new output line,
 932  * perform indentation and mark the line as containing output.
 933  * Make sure to really produce some output right afterwards,
 934  * but do not use print_otag() for producing it.
 935  */
 936 static void
 937 print_indent(struct html *h)
 938 {
 939         size_t   i;
 940 
 941         if (h->col)
 942                 return;
 943 
 944         if (h->noindent == 0) {
 945                 h->col = h->indent * 2;
 946                 for (i = 0; i < h->col; i++)
 947                         putchar(' ');
 948         }
 949         h->flags &= ~HTML_NOSPACE;
 950 }
 951 
 952 /*
 953  * Print or buffer some characters
 954  * depending on the current HTML output buffer state.
 955  */
 956 static void
 957 print_word(struct html *h, const char *cp)
 958 {
 959         while (*cp != '\0')
 960                 print_byte(h, *cp++);
 961 }
 962 
 963 /*
 964  * Calculate the scaling unit passed in a `-width' argument.  This uses
 965  * either a native scaling unit (e.g., 1i, 2m) or the string length of
 966  * the value.
 967  */
 968 static void
 969 a2width(const char *p, struct roffsu *su)
 970 {
 971         const char      *end;
 972 
 973         end = a2roffsu(p, su, SCALE_MAX);
 974         if (end == NULL || *end != '\0') {
 975                 su->unit = SCALE_EN;
 976                 su->scale = html_strlen(p);
 977         } else if (su->scale < 0.0)
 978                 su->scale = 0.0;
 979 }