Print this page
5051 import mdocml-1.12.3
Reviewed by: Yuri Pankov <yuri.pankov@nexenta.com>
Approved by: TBD
   1 /*      $Id: html.c,v 1.150 2011/10/05 21:35:17 kristaps Exp $ */
   2 /*
   3  * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
   4  * Copyright (c) 2011 Ingo Schwarze <schwarze@openbsd.org>
   5  *
   6  * Permission to use, copy, modify, and distribute this software for any
   7  * purpose with or without fee is hereby granted, provided that the above
   8  * copyright notice and this permission notice appear in all copies.
   9  *
  10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
  11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
  12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
  13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
  14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
  15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
  16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
  17  */
  18 #ifdef HAVE_CONFIG_H
  19 #include "config.h"
  20 #endif
  21 
  22 #include <sys/types.h>
  23 
  24 #include <assert.h>


 218                 tag[3].val = "all";
 219                 print_otag(h, TAG_LINK, 4, tag);
 220         }
 221 }
 222 
 223 static void
 224 print_metaf(struct html *h, enum mandoc_esc deco)
 225 {
 226         enum htmlfont    font;
 227 
 228         switch (deco) {
 229         case (ESCAPE_FONTPREV):
 230                 font = h->metal;
 231                 break;
 232         case (ESCAPE_FONTITALIC):
 233                 font = HTMLFONT_ITALIC;
 234                 break;
 235         case (ESCAPE_FONTBOLD):
 236                 font = HTMLFONT_BOLD;
 237                 break;



 238         case (ESCAPE_FONT):
 239                 /* FALLTHROUGH */
 240         case (ESCAPE_FONTROMAN):
 241                 font = HTMLFONT_NONE;
 242                 break;
 243         default:
 244                 abort();
 245                 /* NOTREACHED */
 246         }
 247 
 248         if (h->metaf) {
 249                 print_tagq(h, h->metaf);
 250                 h->metaf = NULL;
 251         }
 252 
 253         h->metal = h->metac;
 254         h->metac = font;
 255 
 256         if (HTMLFONT_NONE != font)
 257                 h->metaf = HTMLFONT_BOLD == font ?
 258                         print_otag(h, TAG_B, 0, NULL) :






 259                         print_otag(h, TAG_I, 0, NULL);




 260 }
 261 
 262 int
 263 html_strlen(const char *cp)
 264 {
 265         int              ssz, sz;
 266         const char      *seq, *p;
 267 
 268         /*
 269          * Account for escaped sequences within string length
 270          * calculations.  This follows the logic in term_strlen() as we
 271          * must calculate the width of produced strings.
 272          * Assume that characters are always width of "1".  This is
 273          * hacky, but it gets the job done for approximation of widths.
 274          */
 275 
 276         sz = 0;
 277         while (NULL != (p = strchr(cp, '\\'))) {
 278                 sz += (int)(p - cp);
 279                 ++cp;
 280                 switch (mandoc_escape(&cp, &seq, &ssz)) {











 281                 case (ESCAPE_ERROR):
 282                         return(sz);
 283                 case (ESCAPE_UNICODE):
 284                         /* FALLTHROUGH */
 285                 case (ESCAPE_NUMBERED):
 286                         /* FALLTHROUGH */
 287                 case (ESCAPE_SPECIAL):



 288                         sz++;
 289                         break;



 290                 default:
 291                         break;
 292                 }
 293         }
 294 
 295         assert(sz >= 0);
 296         return(sz + strlen(cp));
 297 }
 298 
 299 static int
 300 print_encode(struct html *h, const char *p, int norecurse)
 301 {
 302         size_t           sz;
 303         int              c, len, nospace;
 304         const char      *seq;
 305         enum mandoc_esc  esc;
 306         static const char rejs[6] = { '\\', '<', '>', '&', ASCII_HYPH, '\0' };
 307 
 308         nospace = 0;
 309 
 310         while ('\0' != *p) {






 311                 sz = strcspn(p, rejs);
 312 
 313                 fwrite(p, 1, sz, stdout);
 314                 p += (int)sz;
 315 
 316                 if ('\0' == *p)
 317                         break;
 318 
 319                 switch (*p++) {
 320                 case ('<'):
 321                         printf("&lt;");
 322                         continue;
 323                 case ('>'):
 324                         printf("&gt;");
 325                         continue;
 326                 case ('&'):
 327                         printf("&amp;");
 328                         continue;
 329                 case (ASCII_HYPH):
 330                         putchar('-');
 331                         continue;
 332                 default:
 333                         break;
 334                 }
 335 
 336                 esc = mandoc_escape(&p, &seq, &len);
 337                 if (ESCAPE_ERROR == esc)
 338                         break;
 339 
 340                 switch (esc) {



























 341                 case (ESCAPE_UNICODE):
 342                         /* Skip passed "u" header. */
 343                         c = mchars_num2uc(seq + 1, len - 1);
 344                         if ('\0' != c)
 345                                 printf("&#x%x;", c);
 346                         break;
 347                 case (ESCAPE_NUMBERED):
 348                         c = mchars_num2char(seq, len);
 349                         if ('\0' != c)
 350                                 putchar(c);
 351                         break;
 352                 case (ESCAPE_SPECIAL):
 353                         c = mchars_spec2cp(h->symtab, seq, len);
 354                         if (c > 0)
 355                                 printf("&#%d;", c);
 356                         else if (-1 == c && 1 == len)
 357                                 putchar((int)*seq);
 358                         break;
 359                 case (ESCAPE_FONT):
 360                         /* FALLTHROUGH */
 361                 case (ESCAPE_FONTPREV):
 362                         /* FALLTHROUGH */
 363                 case (ESCAPE_FONTBOLD):
 364                         /* FALLTHROUGH */
 365                 case (ESCAPE_FONTITALIC):
 366                         /* FALLTHROUGH */
 367                 case (ESCAPE_FONTROMAN):
 368                         if (norecurse)
 369                                 break;
 370                         print_metaf(h, esc);
 371                         break;
 372                 case (ESCAPE_NOSPACE):
 373                         if ('\0' == *p)
 374                                 nospace = 1;
 375                         break;
 376                 default:
 377                         break;
 378                 }
 379         }
 380 
 381         return(nospace);
 382 }
 383 
 384 
 385 static void
 386 print_attr(struct html *h, const char *key, const char *val)
 387 {
 388         printf(" %s=\"", key);
 389         (void)print_encode(h, val, 1);
 390         putchar('\"');
 391 }


 494 
 495         printf("<!DOCTYPE %s PUBLIC \"%s\" \"%s\">\n", 
 496                         name, doctype, dtd);
 497 }
 498 
 499 void
 500 print_text(struct html *h, const char *word)
 501 {
 502 
 503         if ( ! (HTML_NOSPACE & h->flags)) {
 504                 /* Manage keeps! */
 505                 if ( ! (HTML_KEEP & h->flags)) {
 506                         if (HTML_PREKEEP & h->flags)
 507                                 h->flags |= HTML_KEEP;
 508                         putchar(' ');
 509                 } else
 510                         printf("&#160;");
 511         }
 512 
 513         assert(NULL == h->metaf);
 514         if (HTMLFONT_NONE != h->metac)
 515                 h->metaf = HTMLFONT_BOLD == h->metac ?
 516                         print_otag(h, TAG_B, 0, NULL) :






 517                         print_otag(h, TAG_I, 0, NULL);




 518 
 519         assert(word);
 520         if ( ! print_encode(h, word, 0)) {
 521                 if ( ! (h->flags & HTML_NONOSPACE))
 522                         h->flags &= ~HTML_NOSPACE;
 523         } else
 524                 h->flags |= HTML_NOSPACE;
 525 
 526         if (h->metaf) {
 527                 print_tagq(h, h->metaf);
 528                 h->metaf = NULL;
 529         }
 530 
 531         h->flags &= ~HTML_IGNDELIM;
 532 }
 533 
 534 
 535 void
 536 print_tagq(struct html *h, const struct tag *until)
 537 {


   1 /*      $Id: html.c,v 1.152 2013/08/08 20:07:47 schwarze Exp $ */
   2 /*
   3  * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
   4  * Copyright (c) 2011, 2012, 2013 Ingo Schwarze <schwarze@openbsd.org>
   5  *
   6  * Permission to use, copy, modify, and distribute this software for any
   7  * purpose with or without fee is hereby granted, provided that the above
   8  * copyright notice and this permission notice appear in all copies.
   9  *
  10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
  11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
  12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
  13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
  14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
  15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
  16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
  17  */
  18 #ifdef HAVE_CONFIG_H
  19 #include "config.h"
  20 #endif
  21 
  22 #include <sys/types.h>
  23 
  24 #include <assert.h>


 218                 tag[3].val = "all";
 219                 print_otag(h, TAG_LINK, 4, tag);
 220         }
 221 }
 222 
 223 static void
 224 print_metaf(struct html *h, enum mandoc_esc deco)
 225 {
 226         enum htmlfont    font;
 227 
 228         switch (deco) {
 229         case (ESCAPE_FONTPREV):
 230                 font = h->metal;
 231                 break;
 232         case (ESCAPE_FONTITALIC):
 233                 font = HTMLFONT_ITALIC;
 234                 break;
 235         case (ESCAPE_FONTBOLD):
 236                 font = HTMLFONT_BOLD;
 237                 break;
 238         case (ESCAPE_FONTBI):
 239                 font = HTMLFONT_BI;
 240                 break;
 241         case (ESCAPE_FONT):
 242                 /* FALLTHROUGH */
 243         case (ESCAPE_FONTROMAN):
 244                 font = HTMLFONT_NONE;
 245                 break;
 246         default:
 247                 abort();
 248                 /* NOTREACHED */
 249         }
 250 
 251         if (h->metaf) {
 252                 print_tagq(h, h->metaf);
 253                 h->metaf = NULL;
 254         }
 255 
 256         h->metal = h->metac;
 257         h->metac = font;
 258 
 259         switch (font) {
 260         case (HTMLFONT_ITALIC):
 261                 h->metaf = print_otag(h, TAG_I, 0, NULL);
 262                 break;
 263         case (HTMLFONT_BOLD):
 264                 h->metaf = print_otag(h, TAG_B, 0, NULL);
 265                 break;
 266         case (HTMLFONT_BI):
 267                 h->metaf = print_otag(h, TAG_B, 0, NULL);
 268                 print_otag(h, TAG_I, 0, NULL);
 269                 break;
 270         default:
 271                 break;
 272         }
 273 }
 274 
 275 int
 276 html_strlen(const char *cp)
 277 {
 278         size_t           rsz;
 279         int              skip, sz;
 280 
 281         /*
 282          * Account for escaped sequences within string length
 283          * calculations.  This follows the logic in term_strlen() as we
 284          * must calculate the width of produced strings.
 285          * Assume that characters are always width of "1".  This is
 286          * hacky, but it gets the job done for approximation of widths.
 287          */
 288 
 289         sz = 0;
 290         skip = 0;
 291         while (1) {
 292                 rsz = strcspn(cp, "\\");
 293                 if (rsz) {
 294                         cp += rsz;
 295                         if (skip) {
 296                                 skip = 0;
 297                                 rsz--;
 298                         }
 299                         sz += rsz;
 300                 }
 301                 if ('\0' == *cp)
 302                         break;
 303                 cp++;
 304                 switch (mandoc_escape(&cp, NULL, NULL)) {
 305                 case (ESCAPE_ERROR):
 306                         return(sz);
 307                 case (ESCAPE_UNICODE):
 308                         /* FALLTHROUGH */
 309                 case (ESCAPE_NUMBERED):
 310                         /* FALLTHROUGH */
 311                 case (ESCAPE_SPECIAL):
 312                         if (skip)
 313                                 skip = 0;
 314                         else
 315                                 sz++;
 316                         break;
 317                 case (ESCAPE_SKIPCHAR):
 318                         skip = 1;
 319                         break;
 320                 default:
 321                         break;
 322                 }
 323         }
 324         return(sz);


 325 }
 326 
 327 static int
 328 print_encode(struct html *h, const char *p, int norecurse)
 329 {
 330         size_t           sz;
 331         int              c, len, nospace;
 332         const char      *seq;
 333         enum mandoc_esc  esc;
 334         static const char rejs[6] = { '\\', '<', '>', '&', ASCII_HYPH, '\0' };
 335 
 336         nospace = 0;
 337 
 338         while ('\0' != *p) {
 339                 if (HTML_SKIPCHAR & h->flags && '\\' != *p) {
 340                         h->flags &= ~HTML_SKIPCHAR;
 341                         p++;
 342                         continue;
 343                 }
 344 
 345                 sz = strcspn(p, rejs);
 346 
 347                 fwrite(p, 1, sz, stdout);
 348                 p += (int)sz;
 349 
 350                 if ('\0' == *p)
 351                         break;
 352 
 353                 switch (*p++) {
 354                 case ('<'):
 355                         printf("&lt;");
 356                         continue;
 357                 case ('>'):
 358                         printf("&gt;");
 359                         continue;
 360                 case ('&'):
 361                         printf("&amp;");
 362                         continue;
 363                 case (ASCII_HYPH):
 364                         putchar('-');
 365                         continue;
 366                 default:
 367                         break;
 368                 }
 369 
 370                 esc = mandoc_escape(&p, &seq, &len);
 371                 if (ESCAPE_ERROR == esc)
 372                         break;
 373 
 374                 switch (esc) {
 375                 case (ESCAPE_FONT):
 376                         /* FALLTHROUGH */
 377                 case (ESCAPE_FONTPREV):
 378                         /* FALLTHROUGH */
 379                 case (ESCAPE_FONTBOLD):
 380                         /* FALLTHROUGH */
 381                 case (ESCAPE_FONTITALIC):
 382                         /* FALLTHROUGH */
 383                 case (ESCAPE_FONTBI):
 384                         /* FALLTHROUGH */
 385                 case (ESCAPE_FONTROMAN):
 386                         if (0 == norecurse)
 387                                 print_metaf(h, esc);
 388                         continue;
 389                 case (ESCAPE_SKIPCHAR):
 390                         h->flags |= HTML_SKIPCHAR;
 391                         continue;
 392                 default:
 393                         break;
 394                 }
 395 
 396                 if (h->flags & HTML_SKIPCHAR) {
 397                         h->flags &= ~HTML_SKIPCHAR;
 398                         continue;
 399                 }
 400 
 401                 switch (esc) {
 402                 case (ESCAPE_UNICODE):
 403                         /* Skip passed "u" header. */
 404                         c = mchars_num2uc(seq + 1, len - 1);
 405                         if ('\0' != c)
 406                                 printf("&#x%x;", c);
 407                         break;
 408                 case (ESCAPE_NUMBERED):
 409                         c = mchars_num2char(seq, len);
 410                         if ('\0' != c)
 411                                 putchar(c);
 412                         break;
 413                 case (ESCAPE_SPECIAL):
 414                         c = mchars_spec2cp(h->symtab, seq, len);
 415                         if (c > 0)
 416                                 printf("&#%d;", c);
 417                         else if (-1 == c && 1 == len)
 418                                 putchar((int)*seq);
 419                         break;













 420                 case (ESCAPE_NOSPACE):
 421                         if ('\0' == *p)
 422                                 nospace = 1;
 423                         break;
 424                 default:
 425                         break;
 426                 }
 427         }
 428 
 429         return(nospace);
 430 }
 431 
 432 
 433 static void
 434 print_attr(struct html *h, const char *key, const char *val)
 435 {
 436         printf(" %s=\"", key);
 437         (void)print_encode(h, val, 1);
 438         putchar('\"');
 439 }


 542 
 543         printf("<!DOCTYPE %s PUBLIC \"%s\" \"%s\">\n", 
 544                         name, doctype, dtd);
 545 }
 546 
 547 void
 548 print_text(struct html *h, const char *word)
 549 {
 550 
 551         if ( ! (HTML_NOSPACE & h->flags)) {
 552                 /* Manage keeps! */
 553                 if ( ! (HTML_KEEP & h->flags)) {
 554                         if (HTML_PREKEEP & h->flags)
 555                                 h->flags |= HTML_KEEP;
 556                         putchar(' ');
 557                 } else
 558                         printf("&#160;");
 559         }
 560 
 561         assert(NULL == h->metaf);
 562         switch (h->metac) {
 563         case (HTMLFONT_ITALIC):
 564                 h->metaf = print_otag(h, TAG_I, 0, NULL);
 565                 break;
 566         case (HTMLFONT_BOLD):
 567                 h->metaf = print_otag(h, TAG_B, 0, NULL);
 568                 break;
 569         case (HTMLFONT_BI):
 570                 h->metaf = print_otag(h, TAG_B, 0, NULL);
 571                 print_otag(h, TAG_I, 0, NULL);
 572                 break;
 573         default:
 574                 break;
 575         }
 576 
 577         assert(word);
 578         if ( ! print_encode(h, word, 0)) {
 579                 if ( ! (h->flags & HTML_NONOSPACE))
 580                         h->flags &= ~HTML_NOSPACE;
 581         } else
 582                 h->flags |= HTML_NOSPACE;
 583 
 584         if (h->metaf) {
 585                 print_tagq(h, h->metaf);
 586                 h->metaf = NULL;
 587         }
 588 
 589         h->flags &= ~HTML_IGNDELIM;
 590 }
 591 
 592 
 593 void
 594 print_tagq(struct html *h, const struct tag *until)
 595 {