1 /* $Id: man.c,v 1.121 2013/11/10 22:54:40 schwarze Exp $ */ 2 /* 3 * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv> 4 * 5 * Permission to use, copy, modify, and distribute this software for any 6 * purpose with or without fee is hereby granted, provided that the above 7 * copyright notice and this permission notice appear in all copies. 8 * 9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 16 */ 17 #ifdef HAVE_CONFIG_H 18 #include "config.h" 19 #endif 20 21 #include <sys/types.h> 22 23 #include <assert.h> 24 #include <stdarg.h> 25 #include <stdlib.h> 26 #include <stdio.h> 27 #include <string.h> 28 29 #include "man.h" 30 #include "mandoc.h" 31 #include "libman.h" 32 #include "libmandoc.h" 33 34 const char *const __man_macronames[MAN_MAX] = { 35 "br", "TH", "SH", "SS", 36 "TP", "LP", "PP", "P", 37 "IP", "HP", "SM", "SB", 38 "BI", "IB", "BR", "RB", 39 "R", "B", "I", "IR", 40 "RI", "na", "sp", "nf", 41 "fi", "RE", "RS", "DT", 42 "UC", "PD", "AT", "in", 43 "ft", "OP", "EX", "EE", 44 "UR", "UE" 45 }; 46 47 const char * const *man_macronames = __man_macronames; 48 49 static struct man_node *man_node_alloc(struct man *, int, int, 50 enum man_type, enum mant); 51 static int man_node_append(struct man *, 52 struct man_node *); 53 static void man_node_free(struct man_node *); 54 static void man_node_unlink(struct man *, 55 struct man_node *); 56 static int man_ptext(struct man *, int, char *, int); 57 static int man_pmacro(struct man *, int, char *, int); 58 static void man_free1(struct man *); 59 static void man_alloc1(struct man *); 60 static int man_descope(struct man *, int, int); 61 62 63 const struct man_node * 64 man_node(const struct man *man) 65 { 66 67 assert( ! (MAN_HALT & man->flags)); 68 return(man->first); 69 } 70 71 72 const struct man_meta * 73 man_meta(const struct man *man) 74 { 75 76 assert( ! (MAN_HALT & man->flags)); 77 return(&man->meta); 78 } 79 80 81 void 82 man_reset(struct man *man) 83 { 84 85 man_free1(man); 86 man_alloc1(man); 87 } 88 89 90 void 91 man_free(struct man *man) 92 { 93 94 man_free1(man); 95 free(man); 96 } 97 98 99 struct man * 100 man_alloc(struct roff *roff, struct mparse *parse) 101 { 102 struct man *p; 103 104 p = mandoc_calloc(1, sizeof(struct man)); 105 106 man_hash_init(); 107 p->parse = parse; 108 p->roff = roff; 109 110 man_alloc1(p); 111 return(p); 112 } 113 114 115 int 116 man_endparse(struct man *man) 117 { 118 119 assert( ! (MAN_HALT & man->flags)); 120 if (man_macroend(man)) 121 return(1); 122 man->flags |= MAN_HALT; 123 return(0); 124 } 125 126 127 int 128 man_parseln(struct man *man, int ln, char *buf, int offs) 129 { 130 131 man->flags |= MAN_NEWLINE; 132 133 assert( ! (MAN_HALT & man->flags)); 134 135 return (roff_getcontrol(man->roff, buf, &offs) ? 136 man_pmacro(man, ln, buf, offs) : 137 man_ptext(man, ln, buf, offs)); 138 } 139 140 141 static void 142 man_free1(struct man *man) 143 { 144 145 if (man->first) 146 man_node_delete(man, man->first); 147 if (man->meta.title) 148 free(man->meta.title); 149 if (man->meta.source) 150 free(man->meta.source); 151 if (man->meta.date) 152 free(man->meta.date); 153 if (man->meta.vol) 154 free(man->meta.vol); 155 if (man->meta.msec) 156 free(man->meta.msec); 157 } 158 159 160 static void 161 man_alloc1(struct man *man) 162 { 163 164 memset(&man->meta, 0, sizeof(struct man_meta)); 165 man->flags = 0; 166 man->last = mandoc_calloc(1, sizeof(struct man_node)); 167 man->first = man->last; 168 man->last->type = MAN_ROOT; 169 man->last->tok = MAN_MAX; 170 man->next = MAN_NEXT_CHILD; 171 } 172 173 174 static int 175 man_node_append(struct man *man, struct man_node *p) 176 { 177 178 assert(man->last); 179 assert(man->first); 180 assert(MAN_ROOT != p->type); 181 182 switch (man->next) { 183 case (MAN_NEXT_SIBLING): 184 man->last->next = p; 185 p->prev = man->last; 186 p->parent = man->last->parent; 187 break; 188 case (MAN_NEXT_CHILD): 189 man->last->child = p; 190 p->parent = man->last; 191 break; 192 default: 193 abort(); 194 /* NOTREACHED */ 195 } 196 197 assert(p->parent); 198 p->parent->nchild++; 199 200 if ( ! man_valid_pre(man, p)) 201 return(0); 202 203 switch (p->type) { 204 case (MAN_HEAD): 205 assert(MAN_BLOCK == p->parent->type); 206 p->parent->head = p; 207 break; 208 case (MAN_TAIL): 209 assert(MAN_BLOCK == p->parent->type); 210 p->parent->tail = p; 211 break; 212 case (MAN_BODY): 213 assert(MAN_BLOCK == p->parent->type); 214 p->parent->body = p; 215 break; 216 default: 217 break; 218 } 219 220 man->last = p; 221 222 switch (p->type) { 223 case (MAN_TBL): 224 /* FALLTHROUGH */ 225 case (MAN_TEXT): 226 if ( ! man_valid_post(man)) 227 return(0); 228 break; 229 default: 230 break; 231 } 232 233 return(1); 234 } 235 236 237 static struct man_node * 238 man_node_alloc(struct man *man, int line, int pos, 239 enum man_type type, enum mant tok) 240 { 241 struct man_node *p; 242 243 p = mandoc_calloc(1, sizeof(struct man_node)); 244 p->line = line; 245 p->pos = pos; 246 p->type = type; 247 p->tok = tok; 248 249 if (MAN_NEWLINE & man->flags) 250 p->flags |= MAN_LINE; 251 man->flags &= ~MAN_NEWLINE; 252 return(p); 253 } 254 255 256 int 257 man_elem_alloc(struct man *man, int line, int pos, enum mant tok) 258 { 259 struct man_node *p; 260 261 p = man_node_alloc(man, line, pos, MAN_ELEM, tok); 262 if ( ! man_node_append(man, p)) 263 return(0); 264 man->next = MAN_NEXT_CHILD; 265 return(1); 266 } 267 268 269 int 270 man_tail_alloc(struct man *man, int line, int pos, enum mant tok) 271 { 272 struct man_node *p; 273 274 p = man_node_alloc(man, line, pos, MAN_TAIL, tok); 275 if ( ! man_node_append(man, p)) 276 return(0); 277 man->next = MAN_NEXT_CHILD; 278 return(1); 279 } 280 281 282 int 283 man_head_alloc(struct man *man, int line, int pos, enum mant tok) 284 { 285 struct man_node *p; 286 287 p = man_node_alloc(man, line, pos, MAN_HEAD, tok); 288 if ( ! man_node_append(man, p)) 289 return(0); 290 man->next = MAN_NEXT_CHILD; 291 return(1); 292 } 293 294 295 int 296 man_body_alloc(struct man *man, int line, int pos, enum mant tok) 297 { 298 struct man_node *p; 299 300 p = man_node_alloc(man, line, pos, MAN_BODY, tok); 301 if ( ! man_node_append(man, p)) 302 return(0); 303 man->next = MAN_NEXT_CHILD; 304 return(1); 305 } 306 307 308 int 309 man_block_alloc(struct man *man, int line, int pos, enum mant tok) 310 { 311 struct man_node *p; 312 313 p = man_node_alloc(man, line, pos, MAN_BLOCK, tok); 314 if ( ! man_node_append(man, p)) 315 return(0); 316 man->next = MAN_NEXT_CHILD; 317 return(1); 318 } 319 320 int 321 man_word_alloc(struct man *man, int line, int pos, const char *word) 322 { 323 struct man_node *n; 324 325 n = man_node_alloc(man, line, pos, MAN_TEXT, MAN_MAX); 326 n->string = roff_strdup(man->roff, word); 327 328 if ( ! man_node_append(man, n)) 329 return(0); 330 331 man->next = MAN_NEXT_SIBLING; 332 return(1); 333 } 334 335 336 /* 337 * Free all of the resources held by a node. This does NOT unlink a 338 * node from its context; for that, see man_node_unlink(). 339 */ 340 static void 341 man_node_free(struct man_node *p) 342 { 343 344 if (p->string) 345 free(p->string); 346 free(p); 347 } 348 349 350 void 351 man_node_delete(struct man *man, struct man_node *p) 352 { 353 354 while (p->child) 355 man_node_delete(man, p->child); 356 357 man_node_unlink(man, p); 358 man_node_free(p); 359 } 360 361 int 362 man_addeqn(struct man *man, const struct eqn *ep) 363 { 364 struct man_node *n; 365 366 assert( ! (MAN_HALT & man->flags)); 367 368 n = man_node_alloc(man, ep->ln, ep->pos, MAN_EQN, MAN_MAX); 369 n->eqn = ep; 370 371 if ( ! man_node_append(man, n)) 372 return(0); 373 374 man->next = MAN_NEXT_SIBLING; 375 return(man_descope(man, ep->ln, ep->pos)); 376 } 377 378 int 379 man_addspan(struct man *man, const struct tbl_span *sp) 380 { 381 struct man_node *n; 382 383 assert( ! (MAN_HALT & man->flags)); 384 385 n = man_node_alloc(man, sp->line, 0, MAN_TBL, MAN_MAX); 386 n->span = sp; 387 388 if ( ! man_node_append(man, n)) 389 return(0); 390 391 man->next = MAN_NEXT_SIBLING; 392 return(man_descope(man, sp->line, 0)); 393 } 394 395 static int 396 man_descope(struct man *man, int line, int offs) 397 { 398 /* 399 * Co-ordinate what happens with having a next-line scope open: 400 * first close out the element scope (if applicable), then close 401 * out the block scope (also if applicable). 402 */ 403 404 if (MAN_ELINE & man->flags) { 405 man->flags &= ~MAN_ELINE; 406 if ( ! man_unscope(man, man->last->parent, MANDOCERR_MAX)) 407 return(0); 408 } 409 410 if ( ! (MAN_BLINE & man->flags)) 411 return(1); 412 man->flags &= ~MAN_BLINE; 413 414 if ( ! man_unscope(man, man->last->parent, MANDOCERR_MAX)) 415 return(0); 416 return(man_body_alloc(man, line, offs, man->last->tok)); 417 } 418 419 static int 420 man_ptext(struct man *man, int line, char *buf, int offs) 421 { 422 int i; 423 424 /* Literal free-form text whitespace is preserved. */ 425 426 if (MAN_LITERAL & man->flags) { 427 if ( ! man_word_alloc(man, line, offs, buf + offs)) 428 return(0); 429 return(man_descope(man, line, offs)); 430 } 431 432 for (i = offs; ' ' == buf[i]; i++) 433 /* Skip leading whitespace. */ ; 434 435 /* 436 * Blank lines are ignored right after headings 437 * but add a single vertical space elsewhere. 438 */ 439 440 if ('\0' == buf[i]) { 441 /* Allocate a blank entry. */ 442 if (MAN_SH != man->last->tok && 443 MAN_SS != man->last->tok) { 444 if ( ! man_elem_alloc(man, line, offs, MAN_sp)) 445 return(0); 446 man->next = MAN_NEXT_SIBLING; 447 } 448 return(1); 449 } 450 451 /* 452 * Warn if the last un-escaped character is whitespace. Then 453 * strip away the remaining spaces (tabs stay!). 454 */ 455 456 i = (int)strlen(buf); 457 assert(i); 458 459 if (' ' == buf[i - 1] || '\t' == buf[i - 1]) { 460 if (i > 1 && '\\' != buf[i - 2]) 461 man_pmsg(man, line, i - 1, MANDOCERR_EOLNSPACE); 462 463 for (--i; i && ' ' == buf[i]; i--) 464 /* Spin back to non-space. */ ; 465 466 /* Jump ahead of escaped whitespace. */ 467 i += '\\' == buf[i] ? 2 : 1; 468 469 buf[i] = '\0'; 470 } 471 472 if ( ! man_word_alloc(man, line, offs, buf + offs)) 473 return(0); 474 475 /* 476 * End-of-sentence check. If the last character is an unescaped 477 * EOS character, then flag the node as being the end of a 478 * sentence. The front-end will know how to interpret this. 479 */ 480 481 assert(i); 482 if (mandoc_eos(buf, (size_t)i, 0)) 483 man->last->flags |= MAN_EOS; 484 485 return(man_descope(man, line, offs)); 486 } 487 488 static int 489 man_pmacro(struct man *man, int ln, char *buf, int offs) 490 { 491 int i, ppos; 492 enum mant tok; 493 char mac[5]; 494 struct man_node *n; 495 496 if ('"' == buf[offs]) { 497 man_pmsg(man, ln, offs, MANDOCERR_BADCOMMENT); 498 return(1); 499 } else if ('\0' == buf[offs]) 500 return(1); 501 502 ppos = offs; 503 504 /* 505 * Copy the first word into a nil-terminated buffer. 506 * Stop copying when a tab, space, or eoln is encountered. 507 */ 508 509 i = 0; 510 while (i < 4 && '\0' != buf[offs] && 511 ' ' != buf[offs] && '\t' != buf[offs]) 512 mac[i++] = buf[offs++]; 513 514 mac[i] = '\0'; 515 516 tok = (i > 0 && i < 4) ? man_hash_find(mac) : MAN_MAX; 517 518 if (MAN_MAX == tok) { 519 mandoc_vmsg(MANDOCERR_MACRO, man->parse, ln, 520 ppos, "%s", buf + ppos - 1); 521 return(1); 522 } 523 524 /* The macro is sane. Jump to the next word. */ 525 526 while (buf[offs] && ' ' == buf[offs]) 527 offs++; 528 529 /* 530 * Trailing whitespace. Note that tabs are allowed to be passed 531 * into the parser as "text", so we only warn about spaces here. 532 */ 533 534 if ('\0' == buf[offs] && ' ' == buf[offs - 1]) 535 man_pmsg(man, ln, offs - 1, MANDOCERR_EOLNSPACE); 536 537 /* 538 * Remove prior ELINE macro, as it's being clobbered by a new 539 * macro. Note that NSCOPED macros do not close out ELINE 540 * macros---they don't print text---so we let those slip by. 541 */ 542 543 if ( ! (MAN_NSCOPED & man_macros[tok].flags) && 544 man->flags & MAN_ELINE) { 545 n = man->last; 546 assert(MAN_TEXT != n->type); 547 548 /* Remove repeated NSCOPED macros causing ELINE. */ 549 550 if (MAN_NSCOPED & man_macros[n->tok].flags) 551 n = n->parent; 552 553 mandoc_vmsg(MANDOCERR_LINESCOPE, man->parse, n->line, 554 n->pos, "%s breaks %s", man_macronames[tok], 555 man_macronames[n->tok]); 556 557 man_node_delete(man, n); 558 man->flags &= ~MAN_ELINE; 559 } 560 561 /* 562 * Remove prior BLINE macro that is being clobbered. 563 */ 564 if ((man->flags & MAN_BLINE) && 565 (MAN_BSCOPE & man_macros[tok].flags)) { 566 n = man->last; 567 568 /* Might be a text node like 8 in 569 * .TP 8 570 * .SH foo 571 */ 572 if (MAN_TEXT == n->type) 573 n = n->parent; 574 575 /* Remove element that didn't end BLINE, if any. */ 576 if ( ! (MAN_BSCOPE & man_macros[n->tok].flags)) 577 n = n->parent; 578 579 assert(MAN_HEAD == n->type); 580 n = n->parent; 581 assert(MAN_BLOCK == n->type); 582 assert(MAN_SCOPED & man_macros[n->tok].flags); 583 584 mandoc_vmsg(MANDOCERR_LINESCOPE, man->parse, n->line, 585 n->pos, "%s breaks %s", man_macronames[tok], 586 man_macronames[n->tok]); 587 588 man_node_delete(man, n); 589 man->flags &= ~MAN_BLINE; 590 } 591 592 /* 593 * Save the fact that we're in the next-line for a block. In 594 * this way, embedded roff instructions can "remember" state 595 * when they exit. 596 */ 597 598 if (MAN_BLINE & man->flags) 599 man->flags |= MAN_BPLINE; 600 601 /* Call to handler... */ 602 603 assert(man_macros[tok].fp); 604 if ( ! (*man_macros[tok].fp)(man, tok, ln, ppos, &offs, buf)) 605 goto err; 606 607 /* 608 * We weren't in a block-line scope when entering the 609 * above-parsed macro, so return. 610 */ 611 612 if ( ! (MAN_BPLINE & man->flags)) { 613 man->flags &= ~MAN_ILINE; 614 return(1); 615 } 616 man->flags &= ~MAN_BPLINE; 617 618 /* 619 * If we're in a block scope, then allow this macro to slip by 620 * without closing scope around it. 621 */ 622 623 if (MAN_ILINE & man->flags) { 624 man->flags &= ~MAN_ILINE; 625 return(1); 626 } 627 628 /* 629 * If we've opened a new next-line element scope, then return 630 * now, as the next line will close out the block scope. 631 */ 632 633 if (MAN_ELINE & man->flags) 634 return(1); 635 636 /* Close out the block scope opened in the prior line. */ 637 638 assert(MAN_BLINE & man->flags); 639 man->flags &= ~MAN_BLINE; 640 641 if ( ! man_unscope(man, man->last->parent, MANDOCERR_MAX)) 642 return(0); 643 return(man_body_alloc(man, ln, ppos, man->last->tok)); 644 645 err: /* Error out. */ 646 647 man->flags |= MAN_HALT; 648 return(0); 649 } 650 651 /* 652 * Unlink a node from its context. If "man" is provided, the last parse 653 * point will also be adjusted accordingly. 654 */ 655 static void 656 man_node_unlink(struct man *man, struct man_node *n) 657 { 658 659 /* Adjust siblings. */ 660 661 if (n->prev) 662 n->prev->next = n->next; 663 if (n->next) 664 n->next->prev = n->prev; 665 666 /* Adjust parent. */ 667 668 if (n->parent) { 669 n->parent->nchild--; 670 if (n->parent->child == n) 671 n->parent->child = n->prev ? n->prev : n->next; 672 } 673 674 /* Adjust parse point, if applicable. */ 675 676 if (man && man->last == n) { 677 /*XXX: this can occur when bailing from validation. */ 678 /*assert(NULL == n->next);*/ 679 if (n->prev) { 680 man->last = n->prev; 681 man->next = MAN_NEXT_SIBLING; 682 } else { 683 man->last = n->parent; 684 man->next = MAN_NEXT_CHILD; 685 } 686 } 687 688 if (man && man->first == n) 689 man->first = NULL; 690 } 691 692 const struct mparse * 693 man_mparse(const struct man *man) 694 { 695 696 assert(man && man->parse); 697 return(man->parse); 698 }