1 /* $Id: mdoc.c,v 1.196 2011/09/30 00:13:28 schwarze Exp $ */ 2 /* 3 * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv> 4 * Copyright (c) 2010 Ingo Schwarze <schwarze@openbsd.org> 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice and this permission notice appear in all copies. 9 * 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 */ 18 #ifdef HAVE_CONFIG_H 19 #include "config.h" 20 #endif 21 22 #include <sys/types.h> 23 24 #include <assert.h> 25 #include <stdarg.h> 26 #include <stdio.h> 27 #include <stdlib.h> 28 #include <string.h> 29 #include <time.h> 30 31 #include "mdoc.h" 32 #include "mandoc.h" 33 #include "libmdoc.h" 34 #include "libmandoc.h" 35 36 const char *const __mdoc_macronames[MDOC_MAX] = { 37 "Ap", "Dd", "Dt", "Os", 38 "Sh", "Ss", "Pp", "D1", 39 "Dl", "Bd", "Ed", "Bl", 40 "El", "It", "Ad", "An", 41 "Ar", "Cd", "Cm", "Dv", 42 "Er", "Ev", "Ex", "Fa", 43 "Fd", "Fl", "Fn", "Ft", 44 "Ic", "In", "Li", "Nd", 45 "Nm", "Op", "Ot", "Pa", 46 "Rv", "St", "Va", "Vt", 47 /* LINTED */ 48 "Xr", "%A", "%B", "%D", 49 /* LINTED */ 50 "%I", "%J", "%N", "%O", 51 /* LINTED */ 52 "%P", "%R", "%T", "%V", 53 "Ac", "Ao", "Aq", "At", 54 "Bc", "Bf", "Bo", "Bq", 55 "Bsx", "Bx", "Db", "Dc", 56 "Do", "Dq", "Ec", "Ef", 57 "Em", "Eo", "Fx", "Ms", 58 "No", "Ns", "Nx", "Ox", 59 "Pc", "Pf", "Po", "Pq", 60 "Qc", "Ql", "Qo", "Qq", 61 "Re", "Rs", "Sc", "So", 62 "Sq", "Sm", "Sx", "Sy", 63 "Tn", "Ux", "Xc", "Xo", 64 "Fo", "Fc", "Oo", "Oc", 65 "Bk", "Ek", "Bt", "Hf", 66 "Fr", "Ud", "Lb", "Lp", 67 "Lk", "Mt", "Brq", "Bro", 68 /* LINTED */ 69 "Brc", "%C", "Es", "En", 70 /* LINTED */ 71 "Dx", "%Q", "br", "sp", 72 /* LINTED */ 73 "%U", "Ta" 74 }; 75 76 const char *const __mdoc_argnames[MDOC_ARG_MAX] = { 77 "split", "nosplit", "ragged", 78 "unfilled", "literal", "file", 79 "offset", "bullet", "dash", 80 "hyphen", "item", "enum", 81 "tag", "diag", "hang", 82 "ohang", "inset", "column", 83 "width", "compact", "std", 84 "filled", "words", "emphasis", 85 "symbolic", "nested", "centered" 86 }; 87 88 const char * const *mdoc_macronames = __mdoc_macronames; 89 const char * const *mdoc_argnames = __mdoc_argnames; 90 91 static void mdoc_node_free(struct mdoc_node *); 92 static void mdoc_node_unlink(struct mdoc *, 93 struct mdoc_node *); 94 static void mdoc_free1(struct mdoc *); 95 static void mdoc_alloc1(struct mdoc *); 96 static struct mdoc_node *node_alloc(struct mdoc *, int, int, 97 enum mdoct, enum mdoc_type); 98 static int node_append(struct mdoc *, 99 struct mdoc_node *); 100 #if 0 101 static int mdoc_preptext(struct mdoc *, int, char *, int); 102 #endif 103 static int mdoc_ptext(struct mdoc *, int, char *, int); 104 static int mdoc_pmacro(struct mdoc *, int, char *, int); 105 106 const struct mdoc_node * 107 mdoc_node(const struct mdoc *m) 108 { 109 110 assert( ! (MDOC_HALT & m->flags)); 111 return(m->first); 112 } 113 114 115 const struct mdoc_meta * 116 mdoc_meta(const struct mdoc *m) 117 { 118 119 assert( ! (MDOC_HALT & m->flags)); 120 return(&m->meta); 121 } 122 123 124 /* 125 * Frees volatile resources (parse tree, meta-data, fields). 126 */ 127 static void 128 mdoc_free1(struct mdoc *mdoc) 129 { 130 131 if (mdoc->first) 132 mdoc_node_delete(mdoc, mdoc->first); 133 if (mdoc->meta.title) 134 free(mdoc->meta.title); 135 if (mdoc->meta.os) 136 free(mdoc->meta.os); 137 if (mdoc->meta.name) 138 free(mdoc->meta.name); 139 if (mdoc->meta.arch) 140 free(mdoc->meta.arch); 141 if (mdoc->meta.vol) 142 free(mdoc->meta.vol); 143 if (mdoc->meta.msec) 144 free(mdoc->meta.msec); 145 if (mdoc->meta.date) 146 free(mdoc->meta.date); 147 } 148 149 150 /* 151 * Allocate all volatile resources (parse tree, meta-data, fields). 152 */ 153 static void 154 mdoc_alloc1(struct mdoc *mdoc) 155 { 156 157 memset(&mdoc->meta, 0, sizeof(struct mdoc_meta)); 158 mdoc->flags = 0; 159 mdoc->lastnamed = mdoc->lastsec = SEC_NONE; 160 mdoc->last = mandoc_calloc(1, sizeof(struct mdoc_node)); 161 mdoc->first = mdoc->last; 162 mdoc->last->type = MDOC_ROOT; 163 mdoc->last->tok = MDOC_MAX; 164 mdoc->next = MDOC_NEXT_CHILD; 165 } 166 167 168 /* 169 * Free up volatile resources (see mdoc_free1()) then re-initialises the 170 * data with mdoc_alloc1(). After invocation, parse data has been reset 171 * and the parser is ready for re-invocation on a new tree; however, 172 * cross-parse non-volatile data is kept intact. 173 */ 174 void 175 mdoc_reset(struct mdoc *mdoc) 176 { 177 178 mdoc_free1(mdoc); 179 mdoc_alloc1(mdoc); 180 } 181 182 183 /* 184 * Completely free up all volatile and non-volatile parse resources. 185 * After invocation, the pointer is no longer usable. 186 */ 187 void 188 mdoc_free(struct mdoc *mdoc) 189 { 190 191 mdoc_free1(mdoc); 192 free(mdoc); 193 } 194 195 196 /* 197 * Allocate volatile and non-volatile parse resources. 198 */ 199 struct mdoc * 200 mdoc_alloc(struct roff *roff, struct mparse *parse) 201 { 202 struct mdoc *p; 203 204 p = mandoc_calloc(1, sizeof(struct mdoc)); 205 206 p->parse = parse; 207 p->roff = roff; 208 209 mdoc_hash_init(); 210 mdoc_alloc1(p); 211 return(p); 212 } 213 214 215 /* 216 * Climb back up the parse tree, validating open scopes. Mostly calls 217 * through to macro_end() in macro.c. 218 */ 219 int 220 mdoc_endparse(struct mdoc *m) 221 { 222 223 assert( ! (MDOC_HALT & m->flags)); 224 if (mdoc_macroend(m)) 225 return(1); 226 m->flags |= MDOC_HALT; 227 return(0); 228 } 229 230 int 231 mdoc_addeqn(struct mdoc *m, const struct eqn *ep) 232 { 233 struct mdoc_node *n; 234 235 assert( ! (MDOC_HALT & m->flags)); 236 237 /* No text before an initial macro. */ 238 239 if (SEC_NONE == m->lastnamed) { 240 mdoc_pmsg(m, ep->ln, ep->pos, MANDOCERR_NOTEXT); 241 return(1); 242 } 243 244 n = node_alloc(m, ep->ln, ep->pos, MDOC_MAX, MDOC_EQN); 245 n->eqn = ep; 246 247 if ( ! node_append(m, n)) 248 return(0); 249 250 m->next = MDOC_NEXT_SIBLING; 251 return(1); 252 } 253 254 int 255 mdoc_addspan(struct mdoc *m, const struct tbl_span *sp) 256 { 257 struct mdoc_node *n; 258 259 assert( ! (MDOC_HALT & m->flags)); 260 261 /* No text before an initial macro. */ 262 263 if (SEC_NONE == m->lastnamed) { 264 mdoc_pmsg(m, sp->line, 0, MANDOCERR_NOTEXT); 265 return(1); 266 } 267 268 n = node_alloc(m, sp->line, 0, MDOC_MAX, MDOC_TBL); 269 n->span = sp; 270 271 if ( ! node_append(m, n)) 272 return(0); 273 274 m->next = MDOC_NEXT_SIBLING; 275 return(1); 276 } 277 278 279 /* 280 * Main parse routine. Parses a single line -- really just hands off to 281 * the macro (mdoc_pmacro()) or text parser (mdoc_ptext()). 282 */ 283 int 284 mdoc_parseln(struct mdoc *m, int ln, char *buf, int offs) 285 { 286 287 assert( ! (MDOC_HALT & m->flags)); 288 289 m->flags |= MDOC_NEWLINE; 290 291 /* 292 * Let the roff nS register switch SYNOPSIS mode early, 293 * such that the parser knows at all times 294 * whether this mode is on or off. 295 * Note that this mode is also switched by the Sh macro. 296 */ 297 if (roff_regisset(m->roff, REG_nS)) { 298 if (roff_regget(m->roff, REG_nS)) 299 m->flags |= MDOC_SYNOPSIS; 300 else 301 m->flags &= ~MDOC_SYNOPSIS; 302 } 303 304 return(mandoc_getcontrol(buf, &offs) ? 305 mdoc_pmacro(m, ln, buf, offs) : 306 mdoc_ptext(m, ln, buf, offs)); 307 } 308 309 int 310 mdoc_macro(MACRO_PROT_ARGS) 311 { 312 assert(tok < MDOC_MAX); 313 314 /* If we're in the body, deny prologue calls. */ 315 316 if (MDOC_PROLOGUE & mdoc_macros[tok].flags && 317 MDOC_PBODY & m->flags) { 318 mdoc_pmsg(m, line, ppos, MANDOCERR_BADBODY); 319 return(1); 320 } 321 322 /* If we're in the prologue, deny "body" macros. */ 323 324 if ( ! (MDOC_PROLOGUE & mdoc_macros[tok].flags) && 325 ! (MDOC_PBODY & m->flags)) { 326 mdoc_pmsg(m, line, ppos, MANDOCERR_BADPROLOG); 327 if (NULL == m->meta.msec) 328 m->meta.msec = mandoc_strdup("1"); 329 if (NULL == m->meta.title) 330 m->meta.title = mandoc_strdup("UNKNOWN"); 331 if (NULL == m->meta.vol) 332 m->meta.vol = mandoc_strdup("LOCAL"); 333 if (NULL == m->meta.os) 334 m->meta.os = mandoc_strdup("LOCAL"); 335 if (NULL == m->meta.date) 336 m->meta.date = mandoc_normdate 337 (m->parse, NULL, line, ppos); 338 m->flags |= MDOC_PBODY; 339 } 340 341 return((*mdoc_macros[tok].fp)(m, tok, line, ppos, pos, buf)); 342 } 343 344 345 static int 346 node_append(struct mdoc *mdoc, struct mdoc_node *p) 347 { 348 349 assert(mdoc->last); 350 assert(mdoc->first); 351 assert(MDOC_ROOT != p->type); 352 353 switch (mdoc->next) { 354 case (MDOC_NEXT_SIBLING): 355 mdoc->last->next = p; 356 p->prev = mdoc->last; 357 p->parent = mdoc->last->parent; 358 break; 359 case (MDOC_NEXT_CHILD): 360 mdoc->last->child = p; 361 p->parent = mdoc->last; 362 break; 363 default: 364 abort(); 365 /* NOTREACHED */ 366 } 367 368 p->parent->nchild++; 369 370 /* 371 * Copy over the normalised-data pointer of our parent. Not 372 * everybody has one, but copying a null pointer is fine. 373 */ 374 375 switch (p->type) { 376 case (MDOC_BODY): 377 /* FALLTHROUGH */ 378 case (MDOC_TAIL): 379 /* FALLTHROUGH */ 380 case (MDOC_HEAD): 381 p->norm = p->parent->norm; 382 break; 383 default: 384 break; 385 } 386 387 if ( ! mdoc_valid_pre(mdoc, p)) 388 return(0); 389 390 switch (p->type) { 391 case (MDOC_HEAD): 392 assert(MDOC_BLOCK == p->parent->type); 393 p->parent->head = p; 394 break; 395 case (MDOC_TAIL): 396 assert(MDOC_BLOCK == p->parent->type); 397 p->parent->tail = p; 398 break; 399 case (MDOC_BODY): 400 if (p->end) 401 break; 402 assert(MDOC_BLOCK == p->parent->type); 403 p->parent->body = p; 404 break; 405 default: 406 break; 407 } 408 409 mdoc->last = p; 410 411 switch (p->type) { 412 case (MDOC_TBL): 413 /* FALLTHROUGH */ 414 case (MDOC_TEXT): 415 if ( ! mdoc_valid_post(mdoc)) 416 return(0); 417 break; 418 default: 419 break; 420 } 421 422 return(1); 423 } 424 425 426 static struct mdoc_node * 427 node_alloc(struct mdoc *m, int line, int pos, 428 enum mdoct tok, enum mdoc_type type) 429 { 430 struct mdoc_node *p; 431 432 p = mandoc_calloc(1, sizeof(struct mdoc_node)); 433 p->sec = m->lastsec; 434 p->line = line; 435 p->pos = pos; 436 p->tok = tok; 437 p->type = type; 438 439 /* Flag analysis. */ 440 441 if (MDOC_SYNOPSIS & m->flags) 442 p->flags |= MDOC_SYNPRETTY; 443 else 444 p->flags &= ~MDOC_SYNPRETTY; 445 if (MDOC_NEWLINE & m->flags) 446 p->flags |= MDOC_LINE; 447 m->flags &= ~MDOC_NEWLINE; 448 449 return(p); 450 } 451 452 453 int 454 mdoc_tail_alloc(struct mdoc *m, int line, int pos, enum mdoct tok) 455 { 456 struct mdoc_node *p; 457 458 p = node_alloc(m, line, pos, tok, MDOC_TAIL); 459 if ( ! node_append(m, p)) 460 return(0); 461 m->next = MDOC_NEXT_CHILD; 462 return(1); 463 } 464 465 466 int 467 mdoc_head_alloc(struct mdoc *m, int line, int pos, enum mdoct tok) 468 { 469 struct mdoc_node *p; 470 471 assert(m->first); 472 assert(m->last); 473 474 p = node_alloc(m, line, pos, tok, MDOC_HEAD); 475 if ( ! node_append(m, p)) 476 return(0); 477 m->next = MDOC_NEXT_CHILD; 478 return(1); 479 } 480 481 482 int 483 mdoc_body_alloc(struct mdoc *m, int line, int pos, enum mdoct tok) 484 { 485 struct mdoc_node *p; 486 487 p = node_alloc(m, line, pos, tok, MDOC_BODY); 488 if ( ! node_append(m, p)) 489 return(0); 490 m->next = MDOC_NEXT_CHILD; 491 return(1); 492 } 493 494 495 int 496 mdoc_endbody_alloc(struct mdoc *m, int line, int pos, enum mdoct tok, 497 struct mdoc_node *body, enum mdoc_endbody end) 498 { 499 struct mdoc_node *p; 500 501 p = node_alloc(m, line, pos, tok, MDOC_BODY); 502 p->pending = body; 503 p->end = end; 504 if ( ! node_append(m, p)) 505 return(0); 506 m->next = MDOC_NEXT_SIBLING; 507 return(1); 508 } 509 510 511 int 512 mdoc_block_alloc(struct mdoc *m, int line, int pos, 513 enum mdoct tok, struct mdoc_arg *args) 514 { 515 struct mdoc_node *p; 516 517 p = node_alloc(m, line, pos, tok, MDOC_BLOCK); 518 p->args = args; 519 if (p->args) 520 (args->refcnt)++; 521 522 switch (tok) { 523 case (MDOC_Bd): 524 /* FALLTHROUGH */ 525 case (MDOC_Bf): 526 /* FALLTHROUGH */ 527 case (MDOC_Bl): 528 /* FALLTHROUGH */ 529 case (MDOC_Rs): 530 p->norm = mandoc_calloc(1, sizeof(union mdoc_data)); 531 break; 532 default: 533 break; 534 } 535 536 if ( ! node_append(m, p)) 537 return(0); 538 m->next = MDOC_NEXT_CHILD; 539 return(1); 540 } 541 542 543 int 544 mdoc_elem_alloc(struct mdoc *m, int line, int pos, 545 enum mdoct tok, struct mdoc_arg *args) 546 { 547 struct mdoc_node *p; 548 549 p = node_alloc(m, line, pos, tok, MDOC_ELEM); 550 p->args = args; 551 if (p->args) 552 (args->refcnt)++; 553 554 switch (tok) { 555 case (MDOC_An): 556 p->norm = mandoc_calloc(1, sizeof(union mdoc_data)); 557 break; 558 default: 559 break; 560 } 561 562 if ( ! node_append(m, p)) 563 return(0); 564 m->next = MDOC_NEXT_CHILD; 565 return(1); 566 } 567 568 int 569 mdoc_word_alloc(struct mdoc *m, int line, int pos, const char *p) 570 { 571 struct mdoc_node *n; 572 573 n = node_alloc(m, line, pos, MDOC_MAX, MDOC_TEXT); 574 n->string = roff_strdup(m->roff, p); 575 576 if ( ! node_append(m, n)) 577 return(0); 578 579 m->next = MDOC_NEXT_SIBLING; 580 return(1); 581 } 582 583 584 static void 585 mdoc_node_free(struct mdoc_node *p) 586 { 587 588 if (MDOC_BLOCK == p->type || MDOC_ELEM == p->type) 589 free(p->norm); 590 if (p->string) 591 free(p->string); 592 if (p->args) 593 mdoc_argv_free(p->args); 594 free(p); 595 } 596 597 598 static void 599 mdoc_node_unlink(struct mdoc *m, struct mdoc_node *n) 600 { 601 602 /* Adjust siblings. */ 603 604 if (n->prev) 605 n->prev->next = n->next; 606 if (n->next) 607 n->next->prev = n->prev; 608 609 /* Adjust parent. */ 610 611 if (n->parent) { 612 n->parent->nchild--; 613 if (n->parent->child == n) 614 n->parent->child = n->prev ? n->prev : n->next; 615 if (n->parent->last == n) 616 n->parent->last = n->prev ? n->prev : NULL; 617 } 618 619 /* Adjust parse point, if applicable. */ 620 621 if (m && m->last == n) { 622 if (n->prev) { 623 m->last = n->prev; 624 m->next = MDOC_NEXT_SIBLING; 625 } else { 626 m->last = n->parent; 627 m->next = MDOC_NEXT_CHILD; 628 } 629 } 630 631 if (m && m->first == n) 632 m->first = NULL; 633 } 634 635 636 void 637 mdoc_node_delete(struct mdoc *m, struct mdoc_node *p) 638 { 639 640 while (p->child) { 641 assert(p->nchild); 642 mdoc_node_delete(m, p->child); 643 } 644 assert(0 == p->nchild); 645 646 mdoc_node_unlink(m, p); 647 mdoc_node_free(p); 648 } 649 650 #if 0 651 /* 652 * Pre-treat a text line. 653 * Text lines can consist of equations, which must be handled apart from 654 * the regular text. 655 * Thus, use this function to step through a line checking if it has any 656 * equations embedded in it. 657 * This must handle multiple equations AND equations that do not end at 658 * the end-of-line, i.e., will re-enter in the next roff parse. 659 */ 660 static int 661 mdoc_preptext(struct mdoc *m, int line, char *buf, int offs) 662 { 663 char *start, *end; 664 char delim; 665 666 while ('\0' != buf[offs]) { 667 /* Mark starting position if eqn is set. */ 668 start = NULL; 669 if ('\0' != (delim = roff_eqndelim(m->roff))) 670 if (NULL != (start = strchr(buf + offs, delim))) 671 *start++ = '\0'; 672 673 /* Parse text as normal. */ 674 if ( ! mdoc_ptext(m, line, buf, offs)) 675 return(0); 676 677 /* Continue only if an equation exists. */ 678 if (NULL == start) 679 break; 680 681 /* Read past the end of the equation. */ 682 offs += start - (buf + offs); 683 assert(start == &buf[offs]); 684 if (NULL != (end = strchr(buf + offs, delim))) { 685 *end++ = '\0'; 686 while (' ' == *end) 687 end++; 688 } 689 690 /* Parse the equation itself. */ 691 roff_openeqn(m->roff, NULL, line, offs, buf); 692 693 /* Process a finished equation? */ 694 if (roff_closeeqn(m->roff)) 695 if ( ! mdoc_addeqn(m, roff_eqn(m->roff))) 696 return(0); 697 offs += (end - (buf + offs)); 698 } 699 700 return(1); 701 } 702 #endif 703 704 /* 705 * Parse free-form text, that is, a line that does not begin with the 706 * control character. 707 */ 708 static int 709 mdoc_ptext(struct mdoc *m, int line, char *buf, int offs) 710 { 711 char *c, *ws, *end; 712 struct mdoc_node *n; 713 714 /* No text before an initial macro. */ 715 716 if (SEC_NONE == m->lastnamed) { 717 mdoc_pmsg(m, line, offs, MANDOCERR_NOTEXT); 718 return(1); 719 } 720 721 assert(m->last); 722 n = m->last; 723 724 /* 725 * Divert directly to list processing if we're encountering a 726 * columnar MDOC_BLOCK with or without a prior MDOC_BLOCK entry 727 * (a MDOC_BODY means it's already open, in which case we should 728 * process within its context in the normal way). 729 */ 730 731 if (MDOC_Bl == n->tok && MDOC_BODY == n->type && 732 LIST_column == n->norm->Bl.type) { 733 /* `Bl' is open without any children. */ 734 m->flags |= MDOC_FREECOL; 735 return(mdoc_macro(m, MDOC_It, line, offs, &offs, buf)); 736 } 737 738 if (MDOC_It == n->tok && MDOC_BLOCK == n->type && 739 NULL != n->parent && 740 MDOC_Bl == n->parent->tok && 741 LIST_column == n->parent->norm->Bl.type) { 742 /* `Bl' has block-level `It' children. */ 743 m->flags |= MDOC_FREECOL; 744 return(mdoc_macro(m, MDOC_It, line, offs, &offs, buf)); 745 } 746 747 /* 748 * Search for the beginning of unescaped trailing whitespace (ws) 749 * and for the first character not to be output (end). 750 */ 751 752 /* FIXME: replace with strcspn(). */ 753 ws = NULL; 754 for (c = end = buf + offs; *c; c++) { 755 switch (*c) { 756 case ' ': 757 if (NULL == ws) 758 ws = c; 759 continue; 760 case '\t': 761 /* 762 * Always warn about trailing tabs, 763 * even outside literal context, 764 * where they should be put on the next line. 765 */ 766 if (NULL == ws) 767 ws = c; 768 /* 769 * Strip trailing tabs in literal context only; 770 * outside, they affect the next line. 771 */ 772 if (MDOC_LITERAL & m->flags) 773 continue; 774 break; 775 case '\\': 776 /* Skip the escaped character, too, if any. */ 777 if (c[1]) 778 c++; 779 /* FALLTHROUGH */ 780 default: 781 ws = NULL; 782 break; 783 } 784 end = c + 1; 785 } 786 *end = '\0'; 787 788 if (ws) 789 mdoc_pmsg(m, line, (int)(ws-buf), MANDOCERR_EOLNSPACE); 790 791 if ('\0' == buf[offs] && ! (MDOC_LITERAL & m->flags)) { 792 mdoc_pmsg(m, line, (int)(c-buf), MANDOCERR_NOBLANKLN); 793 794 /* 795 * Insert a `sp' in the case of a blank line. Technically, 796 * blank lines aren't allowed, but enough manuals assume this 797 * behaviour that we want to work around it. 798 */ 799 if ( ! mdoc_elem_alloc(m, line, offs, MDOC_sp, NULL)) 800 return(0); 801 802 m->next = MDOC_NEXT_SIBLING; 803 return(1); 804 } 805 806 if ( ! mdoc_word_alloc(m, line, offs, buf+offs)) 807 return(0); 808 809 if (MDOC_LITERAL & m->flags) 810 return(1); 811 812 /* 813 * End-of-sentence check. If the last character is an unescaped 814 * EOS character, then flag the node as being the end of a 815 * sentence. The front-end will know how to interpret this. 816 */ 817 818 assert(buf < end); 819 820 if (mandoc_eos(buf+offs, (size_t)(end-buf-offs), 0)) 821 m->last->flags |= MDOC_EOS; 822 823 return(1); 824 } 825 826 827 /* 828 * Parse a macro line, that is, a line beginning with the control 829 * character. 830 */ 831 static int 832 mdoc_pmacro(struct mdoc *m, int ln, char *buf, int offs) 833 { 834 enum mdoct tok; 835 int i, sv; 836 char mac[5]; 837 struct mdoc_node *n; 838 839 /* Empty post-control lines are ignored. */ 840 841 if ('"' == buf[offs]) { 842 mdoc_pmsg(m, ln, offs, MANDOCERR_BADCOMMENT); 843 return(1); 844 } else if ('\0' == buf[offs]) 845 return(1); 846 847 sv = offs; 848 849 /* 850 * Copy the first word into a nil-terminated buffer. 851 * Stop copying when a tab, space, or eoln is encountered. 852 */ 853 854 i = 0; 855 while (i < 4 && '\0' != buf[offs] && 856 ' ' != buf[offs] && '\t' != buf[offs]) 857 mac[i++] = buf[offs++]; 858 859 mac[i] = '\0'; 860 861 tok = (i > 1 || i < 4) ? mdoc_hash_find(mac) : MDOC_MAX; 862 863 if (MDOC_MAX == tok) { 864 mandoc_vmsg(MANDOCERR_MACRO, m->parse, 865 ln, sv, "%s", buf + sv - 1); 866 return(1); 867 } 868 869 /* Disregard the first trailing tab, if applicable. */ 870 871 if ('\t' == buf[offs]) 872 offs++; 873 874 /* Jump to the next non-whitespace word. */ 875 876 while (buf[offs] && ' ' == buf[offs]) 877 offs++; 878 879 /* 880 * Trailing whitespace. Note that tabs are allowed to be passed 881 * into the parser as "text", so we only warn about spaces here. 882 */ 883 884 if ('\0' == buf[offs] && ' ' == buf[offs - 1]) 885 mdoc_pmsg(m, ln, offs - 1, MANDOCERR_EOLNSPACE); 886 887 /* 888 * If an initial macro or a list invocation, divert directly 889 * into macro processing. 890 */ 891 892 if (NULL == m->last || MDOC_It == tok || MDOC_El == tok) { 893 if ( ! mdoc_macro(m, tok, ln, sv, &offs, buf)) 894 goto err; 895 return(1); 896 } 897 898 n = m->last; 899 assert(m->last); 900 901 /* 902 * If the first macro of a `Bl -column', open an `It' block 903 * context around the parsed macro. 904 */ 905 906 if (MDOC_Bl == n->tok && MDOC_BODY == n->type && 907 LIST_column == n->norm->Bl.type) { 908 m->flags |= MDOC_FREECOL; 909 if ( ! mdoc_macro(m, MDOC_It, ln, sv, &sv, buf)) 910 goto err; 911 return(1); 912 } 913 914 /* 915 * If we're following a block-level `It' within a `Bl -column' 916 * context (perhaps opened in the above block or in ptext()), 917 * then open an `It' block context around the parsed macro. 918 */ 919 920 if (MDOC_It == n->tok && MDOC_BLOCK == n->type && 921 NULL != n->parent && 922 MDOC_Bl == n->parent->tok && 923 LIST_column == n->parent->norm->Bl.type) { 924 m->flags |= MDOC_FREECOL; 925 if ( ! mdoc_macro(m, MDOC_It, ln, sv, &sv, buf)) 926 goto err; 927 return(1); 928 } 929 930 /* Normal processing of a macro. */ 931 932 if ( ! mdoc_macro(m, tok, ln, sv, &offs, buf)) 933 goto err; 934 935 return(1); 936 937 err: /* Error out. */ 938 939 m->flags |= MDOC_HALT; 940 return(0); 941 } 942 943 enum mdelim 944 mdoc_isdelim(const char *p) 945 { 946 947 if ('\0' == p[0]) 948 return(DELIM_NONE); 949 950 if ('\0' == p[1]) 951 switch (p[0]) { 952 case('('): 953 /* FALLTHROUGH */ 954 case('['): 955 return(DELIM_OPEN); 956 case('|'): 957 return(DELIM_MIDDLE); 958 case('.'): 959 /* FALLTHROUGH */ 960 case(','): 961 /* FALLTHROUGH */ 962 case(';'): 963 /* FALLTHROUGH */ 964 case(':'): 965 /* FALLTHROUGH */ 966 case('?'): 967 /* FALLTHROUGH */ 968 case('!'): 969 /* FALLTHROUGH */ 970 case(')'): 971 /* FALLTHROUGH */ 972 case(']'): 973 return(DELIM_CLOSE); 974 default: 975 return(DELIM_NONE); 976 } 977 978 if ('\\' != p[0]) 979 return(DELIM_NONE); 980 981 if (0 == strcmp(p + 1, ".")) 982 return(DELIM_CLOSE); 983 if (0 == strcmp(p + 1, "*(Ba")) 984 return(DELIM_MIDDLE); 985 986 return(DELIM_NONE); 987 }