1 /* $Id: roff.c,v 1.189 2013/12/30 18:44:06 schwarze Exp $ */ 2 /* 3 * Copyright (c) 2010, 2011, 2012 Kristaps Dzonsons <kristaps@bsd.lv> 4 * Copyright (c) 2010, 2011, 2012, 2013 Ingo Schwarze <schwarze@openbsd.org> 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice and this permission notice appear in all copies. 9 * 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES 11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR 13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 */ 18 #ifdef HAVE_CONFIG_H 19 #include "config.h" 20 #endif 21 22 #include <assert.h> 23 #include <ctype.h> 24 #include <stdio.h> 25 #include <stdlib.h> 26 #include <string.h> 27 28 #include "mandoc.h" 29 #include "libroff.h" 30 #include "libmandoc.h" 31 32 /* Maximum number of nested if-else conditionals. */ 33 #define RSTACK_MAX 128 34 35 /* Maximum number of string expansions per line, to break infinite loops. */ 36 #define EXPAND_LIMIT 1000 37 38 enum rofft { 39 ROFF_ad, 40 ROFF_am, 41 ROFF_ami, 42 ROFF_am1, 43 ROFF_cc, 44 ROFF_de, 45 ROFF_dei, 46 ROFF_de1, 47 ROFF_ds, 48 ROFF_el, 49 ROFF_fam, 50 ROFF_hw, 51 ROFF_hy, 52 ROFF_ie, 53 ROFF_if, 54 ROFF_ig, 55 ROFF_it, 56 ROFF_ne, 57 ROFF_nh, 58 ROFF_nr, 59 ROFF_ns, 60 ROFF_ps, 61 ROFF_rm, 62 ROFF_so, 63 ROFF_ta, 64 ROFF_tr, 65 ROFF_Dd, 66 ROFF_TH, 67 ROFF_TS, 68 ROFF_TE, 69 ROFF_T_, 70 ROFF_EQ, 71 ROFF_EN, 72 ROFF_cblock, 73 ROFF_ccond, 74 ROFF_USERDEF, 75 ROFF_MAX 76 }; 77 78 enum roffrule { 79 ROFFRULE_DENY, 80 ROFFRULE_ALLOW 81 }; 82 83 /* 84 * An incredibly-simple string buffer. 85 */ 86 struct roffstr { 87 char *p; /* nil-terminated buffer */ 88 size_t sz; /* saved strlen(p) */ 89 }; 90 91 /* 92 * A key-value roffstr pair as part of a singly-linked list. 93 */ 94 struct roffkv { 95 struct roffstr key; 96 struct roffstr val; 97 struct roffkv *next; /* next in list */ 98 }; 99 100 /* 101 * A single number register as part of a singly-linked list. 102 */ 103 struct roffreg { 104 struct roffstr key; 105 int val; 106 struct roffreg *next; 107 }; 108 109 struct roff { 110 enum mparset parsetype; /* requested parse type */ 111 struct mparse *parse; /* parse point */ 112 struct roffnode *last; /* leaf of stack */ 113 enum roffrule rstack[RSTACK_MAX]; /* stack of !`ie' rules */ 114 char control; /* control character */ 115 int rstackpos; /* position in rstack */ 116 struct roffreg *regtab; /* number registers */ 117 struct roffkv *strtab; /* user-defined strings & macros */ 118 struct roffkv *xmbtab; /* multi-byte trans table (`tr') */ 119 struct roffstr *xtab; /* single-byte trans table (`tr') */ 120 const char *current_string; /* value of last called user macro */ 121 struct tbl_node *first_tbl; /* first table parsed */ 122 struct tbl_node *last_tbl; /* last table parsed */ 123 struct tbl_node *tbl; /* current table being parsed */ 124 struct eqn_node *last_eqn; /* last equation parsed */ 125 struct eqn_node *first_eqn; /* first equation parsed */ 126 struct eqn_node *eqn; /* current equation being parsed */ 127 }; 128 129 struct roffnode { 130 enum rofft tok; /* type of node */ 131 struct roffnode *parent; /* up one in stack */ 132 int line; /* parse line */ 133 int col; /* parse col */ 134 char *name; /* node name, e.g. macro name */ 135 char *end; /* end-rules: custom token */ 136 int endspan; /* end-rules: next-line or infty */ 137 enum roffrule rule; /* current evaluation rule */ 138 }; 139 140 #define ROFF_ARGS struct roff *r, /* parse ctx */ \ 141 enum rofft tok, /* tok of macro */ \ 142 char **bufp, /* input buffer */ \ 143 size_t *szp, /* size of input buffer */ \ 144 int ln, /* parse line */ \ 145 int ppos, /* original pos in buffer */ \ 146 int pos, /* current pos in buffer */ \ 147 int *offs /* reset offset of buffer data */ 148 149 typedef enum rofferr (*roffproc)(ROFF_ARGS); 150 151 struct roffmac { 152 const char *name; /* macro name */ 153 roffproc proc; /* process new macro */ 154 roffproc text; /* process as child text of macro */ 155 roffproc sub; /* process as child of macro */ 156 int flags; 157 #define ROFFMAC_STRUCT (1 << 0) /* always interpret */ 158 struct roffmac *next; 159 }; 160 161 struct predef { 162 const char *name; /* predefined input name */ 163 const char *str; /* replacement symbol */ 164 }; 165 166 #define PREDEF(__name, __str) \ 167 { (__name), (__str) }, 168 169 static enum rofft roffhash_find(const char *, size_t); 170 static void roffhash_init(void); 171 static void roffnode_cleanscope(struct roff *); 172 static void roffnode_pop(struct roff *); 173 static void roffnode_push(struct roff *, enum rofft, 174 const char *, int, int); 175 static enum rofferr roff_block(ROFF_ARGS); 176 static enum rofferr roff_block_text(ROFF_ARGS); 177 static enum rofferr roff_block_sub(ROFF_ARGS); 178 static enum rofferr roff_cblock(ROFF_ARGS); 179 static enum rofferr roff_cc(ROFF_ARGS); 180 static enum rofferr roff_ccond(ROFF_ARGS); 181 static enum rofferr roff_cond(ROFF_ARGS); 182 static enum rofferr roff_cond_text(ROFF_ARGS); 183 static enum rofferr roff_cond_sub(ROFF_ARGS); 184 static enum rofferr roff_ds(ROFF_ARGS); 185 static enum roffrule roff_evalcond(const char *, int *); 186 static void roff_free1(struct roff *); 187 static void roff_freereg(struct roffreg *); 188 static void roff_freestr(struct roffkv *); 189 static char *roff_getname(struct roff *, char **, int, int); 190 static int roff_getnum(const char *, int *, int *); 191 static int roff_getop(const char *, int *, char *); 192 static int roff_getregn(const struct roff *, 193 const char *, size_t); 194 static const char *roff_getstrn(const struct roff *, 195 const char *, size_t); 196 static enum rofferr roff_it(ROFF_ARGS); 197 static enum rofferr roff_line_ignore(ROFF_ARGS); 198 static enum rofferr roff_nr(ROFF_ARGS); 199 static void roff_openeqn(struct roff *, const char *, 200 int, int, const char *); 201 static enum rofft roff_parse(struct roff *, const char *, int *); 202 static enum rofferr roff_parsetext(char **, size_t *, int, int *); 203 static enum rofferr roff_res(struct roff *, 204 char **, size_t *, int, int); 205 static enum rofferr roff_rm(ROFF_ARGS); 206 static void roff_setstr(struct roff *, 207 const char *, const char *, int); 208 static void roff_setstrn(struct roffkv **, const char *, 209 size_t, const char *, size_t, int); 210 static enum rofferr roff_so(ROFF_ARGS); 211 static enum rofferr roff_tr(ROFF_ARGS); 212 static enum rofferr roff_Dd(ROFF_ARGS); 213 static enum rofferr roff_TH(ROFF_ARGS); 214 static enum rofferr roff_TE(ROFF_ARGS); 215 static enum rofferr roff_TS(ROFF_ARGS); 216 static enum rofferr roff_EQ(ROFF_ARGS); 217 static enum rofferr roff_EN(ROFF_ARGS); 218 static enum rofferr roff_T_(ROFF_ARGS); 219 static enum rofferr roff_userdef(ROFF_ARGS); 220 221 /* See roffhash_find() */ 222 223 #define ASCII_HI 126 224 #define ASCII_LO 33 225 #define HASHWIDTH (ASCII_HI - ASCII_LO + 1) 226 227 static struct roffmac *hash[HASHWIDTH]; 228 229 static struct roffmac roffs[ROFF_MAX] = { 230 { "ad", roff_line_ignore, NULL, NULL, 0, NULL }, 231 { "am", roff_block, roff_block_text, roff_block_sub, 0, NULL }, 232 { "ami", roff_block, roff_block_text, roff_block_sub, 0, NULL }, 233 { "am1", roff_block, roff_block_text, roff_block_sub, 0, NULL }, 234 { "cc", roff_cc, NULL, NULL, 0, NULL }, 235 { "de", roff_block, roff_block_text, roff_block_sub, 0, NULL }, 236 { "dei", roff_block, roff_block_text, roff_block_sub, 0, NULL }, 237 { "de1", roff_block, roff_block_text, roff_block_sub, 0, NULL }, 238 { "ds", roff_ds, NULL, NULL, 0, NULL }, 239 { "el", roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT, NULL }, 240 { "fam", roff_line_ignore, NULL, NULL, 0, NULL }, 241 { "hw", roff_line_ignore, NULL, NULL, 0, NULL }, 242 { "hy", roff_line_ignore, NULL, NULL, 0, NULL }, 243 { "ie", roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT, NULL }, 244 { "if", roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT, NULL }, 245 { "ig", roff_block, roff_block_text, roff_block_sub, 0, NULL }, 246 { "it", roff_it, NULL, NULL, 0, NULL }, 247 { "ne", roff_line_ignore, NULL, NULL, 0, NULL }, 248 { "nh", roff_line_ignore, NULL, NULL, 0, NULL }, 249 { "nr", roff_nr, NULL, NULL, 0, NULL }, 250 { "ns", roff_line_ignore, NULL, NULL, 0, NULL }, 251 { "ps", roff_line_ignore, NULL, NULL, 0, NULL }, 252 { "rm", roff_rm, NULL, NULL, 0, NULL }, 253 { "so", roff_so, NULL, NULL, 0, NULL }, 254 { "ta", roff_line_ignore, NULL, NULL, 0, NULL }, 255 { "tr", roff_tr, NULL, NULL, 0, NULL }, 256 { "Dd", roff_Dd, NULL, NULL, 0, NULL }, 257 { "TH", roff_TH, NULL, NULL, 0, NULL }, 258 { "TS", roff_TS, NULL, NULL, 0, NULL }, 259 { "TE", roff_TE, NULL, NULL, 0, NULL }, 260 { "T&", roff_T_, NULL, NULL, 0, NULL }, 261 { "EQ", roff_EQ, NULL, NULL, 0, NULL }, 262 { "EN", roff_EN, NULL, NULL, 0, NULL }, 263 { ".", roff_cblock, NULL, NULL, 0, NULL }, 264 { "\\}", roff_ccond, NULL, NULL, 0, NULL }, 265 { NULL, roff_userdef, NULL, NULL, 0, NULL }, 266 }; 267 268 const char *const __mdoc_reserved[] = { 269 "Ac", "Ad", "An", "Ao", "Ap", "Aq", "Ar", "At", 270 "Bc", "Bd", "Bf", "Bk", "Bl", "Bo", "Bq", 271 "Brc", "Bro", "Brq", "Bsx", "Bt", "Bx", 272 "Cd", "Cm", "Db", "Dc", "Dd", "Dl", "Do", "Dq", 273 "Ds", "Dt", "Dv", "Dx", "D1", 274 "Ec", "Ed", "Ef", "Ek", "El", "Em", "em", 275 "En", "Eo", "Eq", "Er", "Es", "Ev", "Ex", 276 "Fa", "Fc", "Fd", "Fl", "Fn", "Fo", "Fr", "Ft", "Fx", 277 "Hf", "Ic", "In", "It", "Lb", "Li", "Lk", "Lp", "LP", 278 "Me", "Ms", "Mt", "Nd", "Nm", "No", "Ns", "Nx", 279 "Oc", "Oo", "Op", "Os", "Ot", "Ox", 280 "Pa", "Pc", "Pf", "Po", "Pp", "PP", "pp", "Pq", 281 "Qc", "Ql", "Qo", "Qq", "Or", "Rd", "Re", "Rs", "Rv", 282 "Sc", "Sf", "Sh", "SH", "Sm", "So", "Sq", 283 "Ss", "St", "Sx", "Sy", 284 "Ta", "Tn", "Ud", "Ux", "Va", "Vt", "Xc", "Xo", "Xr", 285 "%A", "%B", "%D", "%I", "%J", "%N", "%O", 286 "%P", "%Q", "%R", "%T", "%U", "%V", 287 NULL 288 }; 289 290 const char *const __man_reserved[] = { 291 "AT", "B", "BI", "BR", "BT", "DE", "DS", "DT", 292 "EE", "EN", "EQ", "EX", "HF", "HP", "I", "IB", "IP", "IR", 293 "LP", "ME", "MT", "OP", "P", "PD", "PP", "PT", 294 "R", "RB", "RE", "RI", "RS", "SB", "SH", "SM", "SS", "SY", 295 "TE", "TH", "TP", "TQ", "TS", "T&", "UC", "UE", "UR", "YS", 296 NULL 297 }; 298 299 /* Array of injected predefined strings. */ 300 #define PREDEFS_MAX 38 301 static const struct predef predefs[PREDEFS_MAX] = { 302 #include "predefs.in" 303 }; 304 305 /* See roffhash_find() */ 306 #define ROFF_HASH(p) (p[0] - ASCII_LO) 307 308 static int roffit_lines; /* number of lines to delay */ 309 static char *roffit_macro; /* nil-terminated macro line */ 310 311 static void 312 roffhash_init(void) 313 { 314 struct roffmac *n; 315 int buc, i; 316 317 for (i = 0; i < (int)ROFF_USERDEF; i++) { 318 assert(roffs[i].name[0] >= ASCII_LO); 319 assert(roffs[i].name[0] <= ASCII_HI); 320 321 buc = ROFF_HASH(roffs[i].name); 322 323 if (NULL != (n = hash[buc])) { 324 for ( ; n->next; n = n->next) 325 /* Do nothing. */ ; 326 n->next = &roffs[i]; 327 } else 328 hash[buc] = &roffs[i]; 329 } 330 } 331 332 /* 333 * Look up a roff token by its name. Returns ROFF_MAX if no macro by 334 * the nil-terminated string name could be found. 335 */ 336 static enum rofft 337 roffhash_find(const char *p, size_t s) 338 { 339 int buc; 340 struct roffmac *n; 341 342 /* 343 * libroff has an extremely simple hashtable, for the time 344 * being, which simply keys on the first character, which must 345 * be printable, then walks a chain. It works well enough until 346 * optimised. 347 */ 348 349 if (p[0] < ASCII_LO || p[0] > ASCII_HI) 350 return(ROFF_MAX); 351 352 buc = ROFF_HASH(p); 353 354 if (NULL == (n = hash[buc])) 355 return(ROFF_MAX); 356 for ( ; n; n = n->next) 357 if (0 == strncmp(n->name, p, s) && '\0' == n->name[(int)s]) 358 return((enum rofft)(n - roffs)); 359 360 return(ROFF_MAX); 361 } 362 363 364 /* 365 * Pop the current node off of the stack of roff instructions currently 366 * pending. 367 */ 368 static void 369 roffnode_pop(struct roff *r) 370 { 371 struct roffnode *p; 372 373 assert(r->last); 374 p = r->last; 375 376 r->last = r->last->parent; 377 free(p->name); 378 free(p->end); 379 free(p); 380 } 381 382 383 /* 384 * Push a roff node onto the instruction stack. This must later be 385 * removed with roffnode_pop(). 386 */ 387 static void 388 roffnode_push(struct roff *r, enum rofft tok, const char *name, 389 int line, int col) 390 { 391 struct roffnode *p; 392 393 p = mandoc_calloc(1, sizeof(struct roffnode)); 394 p->tok = tok; 395 if (name) 396 p->name = mandoc_strdup(name); 397 p->parent = r->last; 398 p->line = line; 399 p->col = col; 400 p->rule = p->parent ? p->parent->rule : ROFFRULE_DENY; 401 402 r->last = p; 403 } 404 405 406 static void 407 roff_free1(struct roff *r) 408 { 409 struct tbl_node *tbl; 410 struct eqn_node *e; 411 int i; 412 413 while (NULL != (tbl = r->first_tbl)) { 414 r->first_tbl = tbl->next; 415 tbl_free(tbl); 416 } 417 418 r->first_tbl = r->last_tbl = r->tbl = NULL; 419 420 while (NULL != (e = r->first_eqn)) { 421 r->first_eqn = e->next; 422 eqn_free(e); 423 } 424 425 r->first_eqn = r->last_eqn = r->eqn = NULL; 426 427 while (r->last) 428 roffnode_pop(r); 429 430 roff_freestr(r->strtab); 431 roff_freestr(r->xmbtab); 432 433 r->strtab = r->xmbtab = NULL; 434 435 roff_freereg(r->regtab); 436 437 r->regtab = NULL; 438 439 if (r->xtab) 440 for (i = 0; i < 128; i++) 441 free(r->xtab[i].p); 442 443 free(r->xtab); 444 r->xtab = NULL; 445 } 446 447 void 448 roff_reset(struct roff *r) 449 { 450 int i; 451 452 roff_free1(r); 453 454 r->control = 0; 455 456 for (i = 0; i < PREDEFS_MAX; i++) 457 roff_setstr(r, predefs[i].name, predefs[i].str, 0); 458 } 459 460 461 void 462 roff_free(struct roff *r) 463 { 464 465 roff_free1(r); 466 free(r); 467 } 468 469 470 struct roff * 471 roff_alloc(enum mparset type, struct mparse *parse) 472 { 473 struct roff *r; 474 int i; 475 476 r = mandoc_calloc(1, sizeof(struct roff)); 477 r->parsetype = type; 478 r->parse = parse; 479 r->rstackpos = -1; 480 481 roffhash_init(); 482 483 for (i = 0; i < PREDEFS_MAX; i++) 484 roff_setstr(r, predefs[i].name, predefs[i].str, 0); 485 486 return(r); 487 } 488 489 /* 490 * In the current line, expand user-defined strings ("\*") 491 * and references to number registers ("\n"). 492 * Also check the syntax of other escape sequences. 493 */ 494 static enum rofferr 495 roff_res(struct roff *r, char **bufp, size_t *szp, int ln, int pos) 496 { 497 char ubuf[12]; /* buffer to print the number */ 498 const char *stesc; /* start of an escape sequence ('\\') */ 499 const char *stnam; /* start of the name, after "[(*" */ 500 const char *cp; /* end of the name, e.g. before ']' */ 501 const char *res; /* the string to be substituted */ 502 char *nbuf; /* new buffer to copy bufp to */ 503 size_t nsz; /* size of the new buffer */ 504 size_t maxl; /* expected length of the escape name */ 505 size_t naml; /* actual length of the escape name */ 506 int expand_count; /* to avoid infinite loops */ 507 508 expand_count = 0; 509 510 again: 511 cp = *bufp + pos; 512 while (NULL != (cp = strchr(cp, '\\'))) { 513 stesc = cp++; 514 515 /* 516 * The second character must be an asterisk or an n. 517 * If it isn't, skip it anyway: It is escaped, 518 * so it can't start another escape sequence. 519 */ 520 521 if ('\0' == *cp) 522 return(ROFF_CONT); 523 524 switch (*cp) { 525 case ('*'): 526 res = NULL; 527 break; 528 case ('n'): 529 res = ubuf; 530 break; 531 default: 532 if (ESCAPE_ERROR != mandoc_escape(&cp, NULL, NULL)) 533 continue; 534 mandoc_msg 535 (MANDOCERR_BADESCAPE, r->parse, 536 ln, (int)(stesc - *bufp), NULL); 537 return(ROFF_CONT); 538 } 539 540 cp++; 541 542 /* 543 * The third character decides the length 544 * of the name of the string or register. 545 * Save a pointer to the name. 546 */ 547 548 switch (*cp) { 549 case ('\0'): 550 return(ROFF_CONT); 551 case ('('): 552 cp++; 553 maxl = 2; 554 break; 555 case ('['): 556 cp++; 557 maxl = 0; 558 break; 559 default: 560 maxl = 1; 561 break; 562 } 563 stnam = cp; 564 565 /* Advance to the end of the name. */ 566 567 for (naml = 0; 0 == maxl || naml < maxl; naml++, cp++) { 568 if ('\0' == *cp) { 569 mandoc_msg 570 (MANDOCERR_BADESCAPE, 571 r->parse, ln, 572 (int)(stesc - *bufp), NULL); 573 return(ROFF_CONT); 574 } 575 if (0 == maxl && ']' == *cp) 576 break; 577 } 578 579 /* 580 * Retrieve the replacement string; if it is 581 * undefined, resume searching for escapes. 582 */ 583 584 if (NULL == res) 585 res = roff_getstrn(r, stnam, naml); 586 else 587 snprintf(ubuf, sizeof(ubuf), "%d", 588 roff_getregn(r, stnam, naml)); 589 590 if (NULL == res) { 591 mandoc_msg 592 (MANDOCERR_BADESCAPE, r->parse, 593 ln, (int)(stesc - *bufp), NULL); 594 res = ""; 595 } 596 597 /* Replace the escape sequence by the string. */ 598 599 pos = stesc - *bufp; 600 601 nsz = *szp + strlen(res) + 1; 602 nbuf = mandoc_malloc(nsz); 603 604 strlcpy(nbuf, *bufp, (size_t)(stesc - *bufp + 1)); 605 strlcat(nbuf, res, nsz); 606 strlcat(nbuf, cp + (maxl ? 0 : 1), nsz); 607 608 free(*bufp); 609 610 *bufp = nbuf; 611 *szp = nsz; 612 613 if (EXPAND_LIMIT >= ++expand_count) 614 goto again; 615 616 /* Just leave the string unexpanded. */ 617 mandoc_msg(MANDOCERR_ROFFLOOP, r->parse, ln, pos, NULL); 618 return(ROFF_IGN); 619 } 620 return(ROFF_CONT); 621 } 622 623 /* 624 * Process text streams: 625 * Convert all breakable hyphens into ASCII_HYPH. 626 * Decrement and spring input line trap. 627 */ 628 static enum rofferr 629 roff_parsetext(char **bufp, size_t *szp, int pos, int *offs) 630 { 631 size_t sz; 632 const char *start; 633 char *p; 634 int isz; 635 enum mandoc_esc esc; 636 637 start = p = *bufp + pos; 638 639 while ('\0' != *p) { 640 sz = strcspn(p, "-\\"); 641 p += sz; 642 643 if ('\0' == *p) 644 break; 645 646 if ('\\' == *p) { 647 /* Skip over escapes. */ 648 p++; 649 esc = mandoc_escape((const char **)&p, NULL, NULL); 650 if (ESCAPE_ERROR == esc) 651 break; 652 continue; 653 } else if (p == start) { 654 p++; 655 continue; 656 } 657 658 if (isalpha((unsigned char)p[-1]) && 659 isalpha((unsigned char)p[1])) 660 *p = ASCII_HYPH; 661 p++; 662 } 663 664 /* Spring the input line trap. */ 665 if (1 == roffit_lines) { 666 isz = asprintf(&p, "%s\n.%s", *bufp, roffit_macro); 667 if (-1 == isz) { 668 perror(NULL); 669 exit((int)MANDOCLEVEL_SYSERR); 670 } 671 free(*bufp); 672 *bufp = p; 673 *szp = isz + 1; 674 *offs = 0; 675 free(roffit_macro); 676 roffit_lines = 0; 677 return(ROFF_REPARSE); 678 } else if (1 < roffit_lines) 679 --roffit_lines; 680 return(ROFF_CONT); 681 } 682 683 enum rofferr 684 roff_parseln(struct roff *r, int ln, char **bufp, 685 size_t *szp, int pos, int *offs) 686 { 687 enum rofft t; 688 enum rofferr e; 689 int ppos, ctl; 690 691 /* 692 * Run the reserved-word filter only if we have some reserved 693 * words to fill in. 694 */ 695 696 e = roff_res(r, bufp, szp, ln, pos); 697 if (ROFF_IGN == e) 698 return(e); 699 assert(ROFF_CONT == e); 700 701 ppos = pos; 702 ctl = roff_getcontrol(r, *bufp, &pos); 703 704 /* 705 * First, if a scope is open and we're not a macro, pass the 706 * text through the macro's filter. If a scope isn't open and 707 * we're not a macro, just let it through. 708 * Finally, if there's an equation scope open, divert it into it 709 * no matter our state. 710 */ 711 712 if (r->last && ! ctl) { 713 t = r->last->tok; 714 assert(roffs[t].text); 715 e = (*roffs[t].text) 716 (r, t, bufp, szp, ln, pos, pos, offs); 717 assert(ROFF_IGN == e || ROFF_CONT == e); 718 if (ROFF_CONT != e) 719 return(e); 720 } 721 if (r->eqn) 722 return(eqn_read(&r->eqn, ln, *bufp, ppos, offs)); 723 if ( ! ctl) { 724 if (r->tbl) 725 return(tbl_read(r->tbl, ln, *bufp, pos)); 726 return(roff_parsetext(bufp, szp, pos, offs)); 727 } 728 729 /* 730 * If a scope is open, go to the child handler for that macro, 731 * as it may want to preprocess before doing anything with it. 732 * Don't do so if an equation is open. 733 */ 734 735 if (r->last) { 736 t = r->last->tok; 737 assert(roffs[t].sub); 738 return((*roffs[t].sub) 739 (r, t, bufp, szp, 740 ln, ppos, pos, offs)); 741 } 742 743 /* 744 * Lastly, as we've no scope open, try to look up and execute 745 * the new macro. If no macro is found, simply return and let 746 * the compilers handle it. 747 */ 748 749 if (ROFF_MAX == (t = roff_parse(r, *bufp, &pos))) 750 return(ROFF_CONT); 751 752 assert(roffs[t].proc); 753 return((*roffs[t].proc) 754 (r, t, bufp, szp, 755 ln, ppos, pos, offs)); 756 } 757 758 759 void 760 roff_endparse(struct roff *r) 761 { 762 763 if (r->last) 764 mandoc_msg(MANDOCERR_SCOPEEXIT, r->parse, 765 r->last->line, r->last->col, NULL); 766 767 if (r->eqn) { 768 mandoc_msg(MANDOCERR_SCOPEEXIT, r->parse, 769 r->eqn->eqn.ln, r->eqn->eqn.pos, NULL); 770 eqn_end(&r->eqn); 771 } 772 773 if (r->tbl) { 774 mandoc_msg(MANDOCERR_SCOPEEXIT, r->parse, 775 r->tbl->line, r->tbl->pos, NULL); 776 tbl_end(&r->tbl); 777 } 778 } 779 780 /* 781 * Parse a roff node's type from the input buffer. This must be in the 782 * form of ".foo xxx" in the usual way. 783 */ 784 static enum rofft 785 roff_parse(struct roff *r, const char *buf, int *pos) 786 { 787 const char *mac; 788 size_t maclen; 789 enum rofft t; 790 791 if ('\0' == buf[*pos] || '"' == buf[*pos] || 792 '\t' == buf[*pos] || ' ' == buf[*pos]) 793 return(ROFF_MAX); 794 795 /* 796 * We stop the macro parse at an escape, tab, space, or nil. 797 * However, `\}' is also a valid macro, so make sure we don't 798 * clobber it by seeing the `\' as the end of token. 799 */ 800 801 mac = buf + *pos; 802 maclen = strcspn(mac + 1, " \\\t\0") + 1; 803 804 t = (r->current_string = roff_getstrn(r, mac, maclen)) 805 ? ROFF_USERDEF : roffhash_find(mac, maclen); 806 807 *pos += (int)maclen; 808 809 while (buf[*pos] && ' ' == buf[*pos]) 810 (*pos)++; 811 812 return(t); 813 } 814 815 /* ARGSUSED */ 816 static enum rofferr 817 roff_cblock(ROFF_ARGS) 818 { 819 820 /* 821 * A block-close `..' should only be invoked as a child of an 822 * ignore macro, otherwise raise a warning and just ignore it. 823 */ 824 825 if (NULL == r->last) { 826 mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL); 827 return(ROFF_IGN); 828 } 829 830 switch (r->last->tok) { 831 case (ROFF_am): 832 /* FALLTHROUGH */ 833 case (ROFF_ami): 834 /* FALLTHROUGH */ 835 case (ROFF_am1): 836 /* FALLTHROUGH */ 837 case (ROFF_de): 838 /* ROFF_de1 is remapped to ROFF_de in roff_block(). */ 839 /* FALLTHROUGH */ 840 case (ROFF_dei): 841 /* FALLTHROUGH */ 842 case (ROFF_ig): 843 break; 844 default: 845 mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL); 846 return(ROFF_IGN); 847 } 848 849 if ((*bufp)[pos]) 850 mandoc_msg(MANDOCERR_ARGSLOST, r->parse, ln, pos, NULL); 851 852 roffnode_pop(r); 853 roffnode_cleanscope(r); 854 return(ROFF_IGN); 855 856 } 857 858 859 static void 860 roffnode_cleanscope(struct roff *r) 861 { 862 863 while (r->last) { 864 if (--r->last->endspan != 0) 865 break; 866 roffnode_pop(r); 867 } 868 } 869 870 871 /* ARGSUSED */ 872 static enum rofferr 873 roff_ccond(ROFF_ARGS) 874 { 875 876 if (NULL == r->last) { 877 mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL); 878 return(ROFF_IGN); 879 } 880 881 switch (r->last->tok) { 882 case (ROFF_el): 883 /* FALLTHROUGH */ 884 case (ROFF_ie): 885 /* FALLTHROUGH */ 886 case (ROFF_if): 887 break; 888 default: 889 mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL); 890 return(ROFF_IGN); 891 } 892 893 if (r->last->endspan > -1) { 894 mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL); 895 return(ROFF_IGN); 896 } 897 898 if ((*bufp)[pos]) 899 mandoc_msg(MANDOCERR_ARGSLOST, r->parse, ln, pos, NULL); 900 901 roffnode_pop(r); 902 roffnode_cleanscope(r); 903 return(ROFF_IGN); 904 } 905 906 907 /* ARGSUSED */ 908 static enum rofferr 909 roff_block(ROFF_ARGS) 910 { 911 int sv; 912 size_t sz; 913 char *name; 914 915 name = NULL; 916 917 if (ROFF_ig != tok) { 918 if ('\0' == (*bufp)[pos]) { 919 mandoc_msg(MANDOCERR_NOARGS, r->parse, ln, ppos, NULL); 920 return(ROFF_IGN); 921 } 922 923 /* 924 * Re-write `de1', since we don't really care about 925 * groff's strange compatibility mode, into `de'. 926 */ 927 928 if (ROFF_de1 == tok) 929 tok = ROFF_de; 930 if (ROFF_de == tok) 931 name = *bufp + pos; 932 else 933 mandoc_msg(MANDOCERR_REQUEST, r->parse, ln, ppos, 934 roffs[tok].name); 935 936 while ((*bufp)[pos] && ! isspace((unsigned char)(*bufp)[pos])) 937 pos++; 938 939 while (isspace((unsigned char)(*bufp)[pos])) 940 (*bufp)[pos++] = '\0'; 941 } 942 943 roffnode_push(r, tok, name, ln, ppos); 944 945 /* 946 * At the beginning of a `de' macro, clear the existing string 947 * with the same name, if there is one. New content will be 948 * added from roff_block_text() in multiline mode. 949 */ 950 951 if (ROFF_de == tok) 952 roff_setstr(r, name, "", 0); 953 954 if ('\0' == (*bufp)[pos]) 955 return(ROFF_IGN); 956 957 /* If present, process the custom end-of-line marker. */ 958 959 sv = pos; 960 while ((*bufp)[pos] && ! isspace((unsigned char)(*bufp)[pos])) 961 pos++; 962 963 /* 964 * Note: groff does NOT like escape characters in the input. 965 * Instead of detecting this, we're just going to let it fly and 966 * to hell with it. 967 */ 968 969 assert(pos > sv); 970 sz = (size_t)(pos - sv); 971 972 if (1 == sz && '.' == (*bufp)[sv]) 973 return(ROFF_IGN); 974 975 r->last->end = mandoc_malloc(sz + 1); 976 977 memcpy(r->last->end, *bufp + sv, sz); 978 r->last->end[(int)sz] = '\0'; 979 980 if ((*bufp)[pos]) 981 mandoc_msg(MANDOCERR_ARGSLOST, r->parse, ln, pos, NULL); 982 983 return(ROFF_IGN); 984 } 985 986 987 /* ARGSUSED */ 988 static enum rofferr 989 roff_block_sub(ROFF_ARGS) 990 { 991 enum rofft t; 992 int i, j; 993 994 /* 995 * First check whether a custom macro exists at this level. If 996 * it does, then check against it. This is some of groff's 997 * stranger behaviours. If we encountered a custom end-scope 998 * tag and that tag also happens to be a "real" macro, then we 999 * need to try interpreting it again as a real macro. If it's 1000 * not, then return ignore. Else continue. 1001 */ 1002 1003 if (r->last->end) { 1004 for (i = pos, j = 0; r->last->end[j]; j++, i++) 1005 if ((*bufp)[i] != r->last->end[j]) 1006 break; 1007 1008 if ('\0' == r->last->end[j] && 1009 ('\0' == (*bufp)[i] || 1010 ' ' == (*bufp)[i] || 1011 '\t' == (*bufp)[i])) { 1012 roffnode_pop(r); 1013 roffnode_cleanscope(r); 1014 1015 while (' ' == (*bufp)[i] || '\t' == (*bufp)[i]) 1016 i++; 1017 1018 pos = i; 1019 if (ROFF_MAX != roff_parse(r, *bufp, &pos)) 1020 return(ROFF_RERUN); 1021 return(ROFF_IGN); 1022 } 1023 } 1024 1025 /* 1026 * If we have no custom end-query or lookup failed, then try 1027 * pulling it out of the hashtable. 1028 */ 1029 1030 t = roff_parse(r, *bufp, &pos); 1031 1032 /* 1033 * Macros other than block-end are only significant 1034 * in `de' blocks; elsewhere, simply throw them away. 1035 */ 1036 if (ROFF_cblock != t) { 1037 if (ROFF_de == tok) 1038 roff_setstr(r, r->last->name, *bufp + ppos, 1); 1039 return(ROFF_IGN); 1040 } 1041 1042 assert(roffs[t].proc); 1043 return((*roffs[t].proc)(r, t, bufp, szp, 1044 ln, ppos, pos, offs)); 1045 } 1046 1047 1048 /* ARGSUSED */ 1049 static enum rofferr 1050 roff_block_text(ROFF_ARGS) 1051 { 1052 1053 if (ROFF_de == tok) 1054 roff_setstr(r, r->last->name, *bufp + pos, 1); 1055 1056 return(ROFF_IGN); 1057 } 1058 1059 1060 /* ARGSUSED */ 1061 static enum rofferr 1062 roff_cond_sub(ROFF_ARGS) 1063 { 1064 enum rofft t; 1065 enum roffrule rr; 1066 char *ep; 1067 1068 rr = r->last->rule; 1069 roffnode_cleanscope(r); 1070 t = roff_parse(r, *bufp, &pos); 1071 1072 /* 1073 * Fully handle known macros when they are structurally 1074 * required or when the conditional evaluated to true. 1075 */ 1076 1077 if ((ROFF_MAX != t) && 1078 (ROFF_ccond == t || ROFFRULE_ALLOW == rr || 1079 ROFFMAC_STRUCT & roffs[t].flags)) { 1080 assert(roffs[t].proc); 1081 return((*roffs[t].proc)(r, t, bufp, szp, 1082 ln, ppos, pos, offs)); 1083 } 1084 1085 /* Always check for the closing delimiter `\}'. */ 1086 1087 ep = &(*bufp)[pos]; 1088 while (NULL != (ep = strchr(ep, '\\'))) { 1089 if ('}' != *(++ep)) 1090 continue; 1091 1092 /* 1093 * If we're at the end of line, then just chop 1094 * off the \} and resize the buffer. 1095 * If we aren't, then convert it to spaces. 1096 */ 1097 1098 if ('\0' == *(ep + 1)) { 1099 *--ep = '\0'; 1100 *szp -= 2; 1101 } else 1102 *(ep - 1) = *ep = ' '; 1103 1104 roff_ccond(r, ROFF_ccond, bufp, szp, 1105 ln, pos, pos + 2, offs); 1106 break; 1107 } 1108 return(ROFFRULE_DENY == rr ? ROFF_IGN : ROFF_CONT); 1109 } 1110 1111 /* ARGSUSED */ 1112 static enum rofferr 1113 roff_cond_text(ROFF_ARGS) 1114 { 1115 char *ep; 1116 enum roffrule rr; 1117 1118 rr = r->last->rule; 1119 roffnode_cleanscope(r); 1120 1121 ep = &(*bufp)[pos]; 1122 for ( ; NULL != (ep = strchr(ep, '\\')); ep++) { 1123 ep++; 1124 if ('}' != *ep) 1125 continue; 1126 *ep = '&'; 1127 roff_ccond(r, ROFF_ccond, bufp, szp, 1128 ln, pos, pos + 2, offs); 1129 } 1130 return(ROFFRULE_DENY == rr ? ROFF_IGN : ROFF_CONT); 1131 } 1132 1133 static int 1134 roff_getnum(const char *v, int *pos, int *res) 1135 { 1136 int p, n; 1137 1138 p = *pos; 1139 n = v[p] == '-'; 1140 if (n) 1141 p++; 1142 1143 for (*res = 0; isdigit((unsigned char)v[p]); p++) 1144 *res += 10 * *res + v[p] - '0'; 1145 if (p == *pos + n) 1146 return 0; 1147 1148 if (n) 1149 *res = -*res; 1150 1151 *pos = p; 1152 return 1; 1153 } 1154 1155 static int 1156 roff_getop(const char *v, int *pos, char *res) 1157 { 1158 int e; 1159 1160 *res = v[*pos]; 1161 e = v[*pos + 1] == '='; 1162 1163 switch (*res) { 1164 case '=': 1165 break; 1166 case '>': 1167 if (e) 1168 *res = 'g'; 1169 break; 1170 case '<': 1171 if (e) 1172 *res = 'l'; 1173 break; 1174 default: 1175 return(0); 1176 } 1177 1178 *pos += 1 + e; 1179 1180 return(*res); 1181 } 1182 1183 static enum roffrule 1184 roff_evalcond(const char *v, int *pos) 1185 { 1186 int not, lh, rh; 1187 char op; 1188 1189 switch (v[*pos]) { 1190 case ('n'): 1191 (*pos)++; 1192 return(ROFFRULE_ALLOW); 1193 case ('e'): 1194 /* FALLTHROUGH */ 1195 case ('o'): 1196 /* FALLTHROUGH */ 1197 case ('t'): 1198 (*pos)++; 1199 return(ROFFRULE_DENY); 1200 case ('!'): 1201 (*pos)++; 1202 not = 1; 1203 break; 1204 default: 1205 not = 0; 1206 break; 1207 } 1208 1209 if (!roff_getnum(v, pos, &lh)) 1210 return ROFFRULE_DENY; 1211 if (!roff_getop(v, pos, &op)) { 1212 if (lh < 0) 1213 lh = 0; 1214 goto out; 1215 } 1216 if (!roff_getnum(v, pos, &rh)) 1217 return ROFFRULE_DENY; 1218 switch (op) { 1219 case 'g': 1220 lh = lh >= rh; 1221 break; 1222 case 'l': 1223 lh = lh <= rh; 1224 break; 1225 case '=': 1226 lh = lh == rh; 1227 break; 1228 case '>': 1229 lh = lh > rh; 1230 break; 1231 case '<': 1232 lh = lh < rh; 1233 break; 1234 default: 1235 return ROFFRULE_DENY; 1236 } 1237 out: 1238 if (not) 1239 lh = !lh; 1240 return lh ? ROFFRULE_ALLOW : ROFFRULE_DENY; 1241 } 1242 1243 /* ARGSUSED */ 1244 static enum rofferr 1245 roff_line_ignore(ROFF_ARGS) 1246 { 1247 1248 return(ROFF_IGN); 1249 } 1250 1251 /* ARGSUSED */ 1252 static enum rofferr 1253 roff_cond(ROFF_ARGS) 1254 { 1255 1256 roffnode_push(r, tok, NULL, ln, ppos); 1257 1258 /* 1259 * An `.el' has no conditional body: it will consume the value 1260 * of the current rstack entry set in prior `ie' calls or 1261 * defaults to DENY. 1262 * 1263 * If we're not an `el', however, then evaluate the conditional. 1264 */ 1265 1266 r->last->rule = ROFF_el == tok ? 1267 (r->rstackpos < 0 ? 1268 ROFFRULE_DENY : r->rstack[r->rstackpos--]) : 1269 roff_evalcond(*bufp, &pos); 1270 1271 /* 1272 * An if-else will put the NEGATION of the current evaluated 1273 * conditional into the stack of rules. 1274 */ 1275 1276 if (ROFF_ie == tok) { 1277 if (r->rstackpos == RSTACK_MAX - 1) { 1278 mandoc_msg(MANDOCERR_MEM, 1279 r->parse, ln, ppos, NULL); 1280 return(ROFF_ERR); 1281 } 1282 r->rstack[++r->rstackpos] = 1283 ROFFRULE_DENY == r->last->rule ? 1284 ROFFRULE_ALLOW : ROFFRULE_DENY; 1285 } 1286 1287 /* If the parent has false as its rule, then so do we. */ 1288 1289 if (r->last->parent && ROFFRULE_DENY == r->last->parent->rule) 1290 r->last->rule = ROFFRULE_DENY; 1291 1292 /* 1293 * Determine scope. 1294 * If there is nothing on the line after the conditional, 1295 * not even whitespace, use next-line scope. 1296 */ 1297 1298 if ('\0' == (*bufp)[pos]) { 1299 r->last->endspan = 2; 1300 goto out; 1301 } 1302 1303 while (' ' == (*bufp)[pos]) 1304 pos++; 1305 1306 /* An opening brace requests multiline scope. */ 1307 1308 if ('\\' == (*bufp)[pos] && '{' == (*bufp)[pos + 1]) { 1309 r->last->endspan = -1; 1310 pos += 2; 1311 goto out; 1312 } 1313 1314 /* 1315 * Anything else following the conditional causes 1316 * single-line scope. Warn if the scope contains 1317 * nothing but trailing whitespace. 1318 */ 1319 1320 if ('\0' == (*bufp)[pos]) 1321 mandoc_msg(MANDOCERR_NOARGS, r->parse, ln, ppos, NULL); 1322 1323 r->last->endspan = 1; 1324 1325 out: 1326 *offs = pos; 1327 return(ROFF_RERUN); 1328 } 1329 1330 1331 /* ARGSUSED */ 1332 static enum rofferr 1333 roff_ds(ROFF_ARGS) 1334 { 1335 char *name, *string; 1336 1337 /* 1338 * A symbol is named by the first word following the macro 1339 * invocation up to a space. Its value is anything after the 1340 * name's trailing whitespace and optional double-quote. Thus, 1341 * 1342 * [.ds foo "bar " ] 1343 * 1344 * will have `bar " ' as its value. 1345 */ 1346 1347 string = *bufp + pos; 1348 name = roff_getname(r, &string, ln, pos); 1349 if ('\0' == *name) 1350 return(ROFF_IGN); 1351 1352 /* Read past initial double-quote. */ 1353 if ('"' == *string) 1354 string++; 1355 1356 /* The rest is the value. */ 1357 roff_setstr(r, name, string, 0); 1358 return(ROFF_IGN); 1359 } 1360 1361 void 1362 roff_setreg(struct roff *r, const char *name, int val, char sign) 1363 { 1364 struct roffreg *reg; 1365 1366 /* Search for an existing register with the same name. */ 1367 reg = r->regtab; 1368 1369 while (reg && strcmp(name, reg->key.p)) 1370 reg = reg->next; 1371 1372 if (NULL == reg) { 1373 /* Create a new register. */ 1374 reg = mandoc_malloc(sizeof(struct roffreg)); 1375 reg->key.p = mandoc_strdup(name); 1376 reg->key.sz = strlen(name); 1377 reg->val = 0; 1378 reg->next = r->regtab; 1379 r->regtab = reg; 1380 } 1381 1382 if ('+' == sign) 1383 reg->val += val; 1384 else if ('-' == sign) 1385 reg->val -= val; 1386 else 1387 reg->val = val; 1388 } 1389 1390 int 1391 roff_getreg(const struct roff *r, const char *name) 1392 { 1393 struct roffreg *reg; 1394 1395 for (reg = r->regtab; reg; reg = reg->next) 1396 if (0 == strcmp(name, reg->key.p)) 1397 return(reg->val); 1398 1399 return(0); 1400 } 1401 1402 static int 1403 roff_getregn(const struct roff *r, const char *name, size_t len) 1404 { 1405 struct roffreg *reg; 1406 1407 for (reg = r->regtab; reg; reg = reg->next) 1408 if (len == reg->key.sz && 1409 0 == strncmp(name, reg->key.p, len)) 1410 return(reg->val); 1411 1412 return(0); 1413 } 1414 1415 static void 1416 roff_freereg(struct roffreg *reg) 1417 { 1418 struct roffreg *old_reg; 1419 1420 while (NULL != reg) { 1421 free(reg->key.p); 1422 old_reg = reg; 1423 reg = reg->next; 1424 free(old_reg); 1425 } 1426 } 1427 1428 /* ARGSUSED */ 1429 static enum rofferr 1430 roff_nr(ROFF_ARGS) 1431 { 1432 const char *key; 1433 char *val; 1434 size_t sz; 1435 int iv; 1436 char sign; 1437 1438 val = *bufp + pos; 1439 key = roff_getname(r, &val, ln, pos); 1440 1441 sign = *val; 1442 if ('+' == sign || '-' == sign) 1443 val++; 1444 1445 sz = strspn(val, "0123456789"); 1446 iv = sz ? mandoc_strntoi(val, sz, 10) : 0; 1447 1448 roff_setreg(r, key, iv, sign); 1449 1450 return(ROFF_IGN); 1451 } 1452 1453 /* ARGSUSED */ 1454 static enum rofferr 1455 roff_rm(ROFF_ARGS) 1456 { 1457 const char *name; 1458 char *cp; 1459 1460 cp = *bufp + pos; 1461 while ('\0' != *cp) { 1462 name = roff_getname(r, &cp, ln, (int)(cp - *bufp)); 1463 if ('\0' != *name) 1464 roff_setstr(r, name, NULL, 0); 1465 } 1466 return(ROFF_IGN); 1467 } 1468 1469 /* ARGSUSED */ 1470 static enum rofferr 1471 roff_it(ROFF_ARGS) 1472 { 1473 char *cp; 1474 size_t len; 1475 int iv; 1476 1477 /* Parse the number of lines. */ 1478 cp = *bufp + pos; 1479 len = strcspn(cp, " \t"); 1480 cp[len] = '\0'; 1481 if ((iv = mandoc_strntoi(cp, len, 10)) <= 0) { 1482 mandoc_msg(MANDOCERR_NUMERIC, r->parse, 1483 ln, ppos, *bufp + 1); 1484 return(ROFF_IGN); 1485 } 1486 cp += len + 1; 1487 1488 /* Arm the input line trap. */ 1489 roffit_lines = iv; 1490 roffit_macro = mandoc_strdup(cp); 1491 return(ROFF_IGN); 1492 } 1493 1494 /* ARGSUSED */ 1495 static enum rofferr 1496 roff_Dd(ROFF_ARGS) 1497 { 1498 const char *const *cp; 1499 1500 if (MPARSE_MDOC != r->parsetype) 1501 for (cp = __mdoc_reserved; *cp; cp++) 1502 roff_setstr(r, *cp, NULL, 0); 1503 1504 return(ROFF_CONT); 1505 } 1506 1507 /* ARGSUSED */ 1508 static enum rofferr 1509 roff_TH(ROFF_ARGS) 1510 { 1511 const char *const *cp; 1512 1513 if (MPARSE_MDOC != r->parsetype) 1514 for (cp = __man_reserved; *cp; cp++) 1515 roff_setstr(r, *cp, NULL, 0); 1516 1517 return(ROFF_CONT); 1518 } 1519 1520 /* ARGSUSED */ 1521 static enum rofferr 1522 roff_TE(ROFF_ARGS) 1523 { 1524 1525 if (NULL == r->tbl) 1526 mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL); 1527 else 1528 tbl_end(&r->tbl); 1529 1530 return(ROFF_IGN); 1531 } 1532 1533 /* ARGSUSED */ 1534 static enum rofferr 1535 roff_T_(ROFF_ARGS) 1536 { 1537 1538 if (NULL == r->tbl) 1539 mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL); 1540 else 1541 tbl_restart(ppos, ln, r->tbl); 1542 1543 return(ROFF_IGN); 1544 } 1545 1546 #if 0 1547 static int 1548 roff_closeeqn(struct roff *r) 1549 { 1550 1551 return(r->eqn && ROFF_EQN == eqn_end(&r->eqn) ? 1 : 0); 1552 } 1553 #endif 1554 1555 static void 1556 roff_openeqn(struct roff *r, const char *name, int line, 1557 int offs, const char *buf) 1558 { 1559 struct eqn_node *e; 1560 int poff; 1561 1562 assert(NULL == r->eqn); 1563 e = eqn_alloc(name, offs, line, r->parse); 1564 1565 if (r->last_eqn) 1566 r->last_eqn->next = e; 1567 else 1568 r->first_eqn = r->last_eqn = e; 1569 1570 r->eqn = r->last_eqn = e; 1571 1572 if (buf) { 1573 poff = 0; 1574 eqn_read(&r->eqn, line, buf, offs, &poff); 1575 } 1576 } 1577 1578 /* ARGSUSED */ 1579 static enum rofferr 1580 roff_EQ(ROFF_ARGS) 1581 { 1582 1583 roff_openeqn(r, *bufp + pos, ln, ppos, NULL); 1584 return(ROFF_IGN); 1585 } 1586 1587 /* ARGSUSED */ 1588 static enum rofferr 1589 roff_EN(ROFF_ARGS) 1590 { 1591 1592 mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL); 1593 return(ROFF_IGN); 1594 } 1595 1596 /* ARGSUSED */ 1597 static enum rofferr 1598 roff_TS(ROFF_ARGS) 1599 { 1600 struct tbl_node *tbl; 1601 1602 if (r->tbl) { 1603 mandoc_msg(MANDOCERR_SCOPEBROKEN, r->parse, ln, ppos, NULL); 1604 tbl_end(&r->tbl); 1605 } 1606 1607 tbl = tbl_alloc(ppos, ln, r->parse); 1608 1609 if (r->last_tbl) 1610 r->last_tbl->next = tbl; 1611 else 1612 r->first_tbl = r->last_tbl = tbl; 1613 1614 r->tbl = r->last_tbl = tbl; 1615 return(ROFF_IGN); 1616 } 1617 1618 /* ARGSUSED */ 1619 static enum rofferr 1620 roff_cc(ROFF_ARGS) 1621 { 1622 const char *p; 1623 1624 p = *bufp + pos; 1625 1626 if ('\0' == *p || '.' == (r->control = *p++)) 1627 r->control = 0; 1628 1629 if ('\0' != *p) 1630 mandoc_msg(MANDOCERR_ARGCOUNT, r->parse, ln, ppos, NULL); 1631 1632 return(ROFF_IGN); 1633 } 1634 1635 /* ARGSUSED */ 1636 static enum rofferr 1637 roff_tr(ROFF_ARGS) 1638 { 1639 const char *p, *first, *second; 1640 size_t fsz, ssz; 1641 enum mandoc_esc esc; 1642 1643 p = *bufp + pos; 1644 1645 if ('\0' == *p) { 1646 mandoc_msg(MANDOCERR_ARGCOUNT, r->parse, ln, ppos, NULL); 1647 return(ROFF_IGN); 1648 } 1649 1650 while ('\0' != *p) { 1651 fsz = ssz = 1; 1652 1653 first = p++; 1654 if ('\\' == *first) { 1655 esc = mandoc_escape(&p, NULL, NULL); 1656 if (ESCAPE_ERROR == esc) { 1657 mandoc_msg 1658 (MANDOCERR_BADESCAPE, r->parse, 1659 ln, (int)(p - *bufp), NULL); 1660 return(ROFF_IGN); 1661 } 1662 fsz = (size_t)(p - first); 1663 } 1664 1665 second = p++; 1666 if ('\\' == *second) { 1667 esc = mandoc_escape(&p, NULL, NULL); 1668 if (ESCAPE_ERROR == esc) { 1669 mandoc_msg 1670 (MANDOCERR_BADESCAPE, r->parse, 1671 ln, (int)(p - *bufp), NULL); 1672 return(ROFF_IGN); 1673 } 1674 ssz = (size_t)(p - second); 1675 } else if ('\0' == *second) { 1676 mandoc_msg(MANDOCERR_ARGCOUNT, r->parse, 1677 ln, (int)(p - *bufp), NULL); 1678 second = " "; 1679 p--; 1680 } 1681 1682 if (fsz > 1) { 1683 roff_setstrn(&r->xmbtab, first, 1684 fsz, second, ssz, 0); 1685 continue; 1686 } 1687 1688 if (NULL == r->xtab) 1689 r->xtab = mandoc_calloc 1690 (128, sizeof(struct roffstr)); 1691 1692 free(r->xtab[(int)*first].p); 1693 r->xtab[(int)*first].p = mandoc_strndup(second, ssz); 1694 r->xtab[(int)*first].sz = ssz; 1695 } 1696 1697 return(ROFF_IGN); 1698 } 1699 1700 /* ARGSUSED */ 1701 static enum rofferr 1702 roff_so(ROFF_ARGS) 1703 { 1704 char *name; 1705 1706 mandoc_msg(MANDOCERR_SO, r->parse, ln, ppos, NULL); 1707 1708 /* 1709 * Handle `so'. Be EXTREMELY careful, as we shouldn't be 1710 * opening anything that's not in our cwd or anything beneath 1711 * it. Thus, explicitly disallow traversing up the file-system 1712 * or using absolute paths. 1713 */ 1714 1715 name = *bufp + pos; 1716 if ('/' == *name || strstr(name, "../") || strstr(name, "/..")) { 1717 mandoc_msg(MANDOCERR_SOPATH, r->parse, ln, pos, NULL); 1718 return(ROFF_ERR); 1719 } 1720 1721 *offs = pos; 1722 return(ROFF_SO); 1723 } 1724 1725 /* ARGSUSED */ 1726 static enum rofferr 1727 roff_userdef(ROFF_ARGS) 1728 { 1729 const char *arg[9]; 1730 char *cp, *n1, *n2; 1731 int i; 1732 1733 /* 1734 * Collect pointers to macro argument strings 1735 * and NUL-terminate them. 1736 */ 1737 cp = *bufp + pos; 1738 for (i = 0; i < 9; i++) 1739 arg[i] = '\0' == *cp ? "" : 1740 mandoc_getarg(r->parse, &cp, ln, &pos); 1741 1742 /* 1743 * Expand macro arguments. 1744 */ 1745 *szp = 0; 1746 n1 = cp = mandoc_strdup(r->current_string); 1747 while (NULL != (cp = strstr(cp, "\\$"))) { 1748 i = cp[2] - '1'; 1749 if (0 > i || 8 < i) { 1750 /* Not an argument invocation. */ 1751 cp += 2; 1752 continue; 1753 } 1754 1755 *szp = strlen(n1) - 3 + strlen(arg[i]) + 1; 1756 n2 = mandoc_malloc(*szp); 1757 1758 strlcpy(n2, n1, (size_t)(cp - n1 + 1)); 1759 strlcat(n2, arg[i], *szp); 1760 strlcat(n2, cp + 3, *szp); 1761 1762 cp = n2 + (cp - n1); 1763 free(n1); 1764 n1 = n2; 1765 } 1766 1767 /* 1768 * Replace the macro invocation 1769 * by the expanded macro. 1770 */ 1771 free(*bufp); 1772 *bufp = n1; 1773 if (0 == *szp) 1774 *szp = strlen(*bufp) + 1; 1775 1776 return(*szp > 1 && '\n' == (*bufp)[(int)*szp - 2] ? 1777 ROFF_REPARSE : ROFF_APPEND); 1778 } 1779 1780 static char * 1781 roff_getname(struct roff *r, char **cpp, int ln, int pos) 1782 { 1783 char *name, *cp; 1784 1785 name = *cpp; 1786 if ('\0' == *name) 1787 return(name); 1788 1789 /* Read until end of name. */ 1790 for (cp = name; '\0' != *cp && ' ' != *cp; cp++) { 1791 if ('\\' != *cp) 1792 continue; 1793 cp++; 1794 if ('\\' == *cp) 1795 continue; 1796 mandoc_msg(MANDOCERR_NAMESC, r->parse, ln, pos, NULL); 1797 *cp = '\0'; 1798 name = cp; 1799 } 1800 1801 /* Nil-terminate name. */ 1802 if ('\0' != *cp) 1803 *(cp++) = '\0'; 1804 1805 /* Read past spaces. */ 1806 while (' ' == *cp) 1807 cp++; 1808 1809 *cpp = cp; 1810 return(name); 1811 } 1812 1813 /* 1814 * Store *string into the user-defined string called *name. 1815 * In multiline mode, append to an existing entry and append '\n'; 1816 * else replace the existing entry, if there is one. 1817 * To clear an existing entry, call with (*r, *name, NULL, 0). 1818 */ 1819 static void 1820 roff_setstr(struct roff *r, const char *name, const char *string, 1821 int multiline) 1822 { 1823 1824 roff_setstrn(&r->strtab, name, strlen(name), string, 1825 string ? strlen(string) : 0, multiline); 1826 } 1827 1828 static void 1829 roff_setstrn(struct roffkv **r, const char *name, size_t namesz, 1830 const char *string, size_t stringsz, int multiline) 1831 { 1832 struct roffkv *n; 1833 char *c; 1834 int i; 1835 size_t oldch, newch; 1836 1837 /* Search for an existing string with the same name. */ 1838 n = *r; 1839 1840 while (n && strcmp(name, n->key.p)) 1841 n = n->next; 1842 1843 if (NULL == n) { 1844 /* Create a new string table entry. */ 1845 n = mandoc_malloc(sizeof(struct roffkv)); 1846 n->key.p = mandoc_strndup(name, namesz); 1847 n->key.sz = namesz; 1848 n->val.p = NULL; 1849 n->val.sz = 0; 1850 n->next = *r; 1851 *r = n; 1852 } else if (0 == multiline) { 1853 /* In multiline mode, append; else replace. */ 1854 free(n->val.p); 1855 n->val.p = NULL; 1856 n->val.sz = 0; 1857 } 1858 1859 if (NULL == string) 1860 return; 1861 1862 /* 1863 * One additional byte for the '\n' in multiline mode, 1864 * and one for the terminating '\0'. 1865 */ 1866 newch = stringsz + (multiline ? 2u : 1u); 1867 1868 if (NULL == n->val.p) { 1869 n->val.p = mandoc_malloc(newch); 1870 *n->val.p = '\0'; 1871 oldch = 0; 1872 } else { 1873 oldch = n->val.sz; 1874 n->val.p = mandoc_realloc(n->val.p, oldch + newch); 1875 } 1876 1877 /* Skip existing content in the destination buffer. */ 1878 c = n->val.p + (int)oldch; 1879 1880 /* Append new content to the destination buffer. */ 1881 i = 0; 1882 while (i < (int)stringsz) { 1883 /* 1884 * Rudimentary roff copy mode: 1885 * Handle escaped backslashes. 1886 */ 1887 if ('\\' == string[i] && '\\' == string[i + 1]) 1888 i++; 1889 *c++ = string[i++]; 1890 } 1891 1892 /* Append terminating bytes. */ 1893 if (multiline) 1894 *c++ = '\n'; 1895 1896 *c = '\0'; 1897 n->val.sz = (int)(c - n->val.p); 1898 } 1899 1900 static const char * 1901 roff_getstrn(const struct roff *r, const char *name, size_t len) 1902 { 1903 const struct roffkv *n; 1904 1905 for (n = r->strtab; n; n = n->next) 1906 if (0 == strncmp(name, n->key.p, len) && 1907 '\0' == n->key.p[(int)len]) 1908 return(n->val.p); 1909 1910 return(NULL); 1911 } 1912 1913 static void 1914 roff_freestr(struct roffkv *r) 1915 { 1916 struct roffkv *n, *nn; 1917 1918 for (n = r; n; n = nn) { 1919 free(n->key.p); 1920 free(n->val.p); 1921 nn = n->next; 1922 free(n); 1923 } 1924 } 1925 1926 const struct tbl_span * 1927 roff_span(const struct roff *r) 1928 { 1929 1930 return(r->tbl ? tbl_span(r->tbl) : NULL); 1931 } 1932 1933 const struct eqn * 1934 roff_eqn(const struct roff *r) 1935 { 1936 1937 return(r->last_eqn ? &r->last_eqn->eqn : NULL); 1938 } 1939 1940 /* 1941 * Duplicate an input string, making the appropriate character 1942 * conversations (as stipulated by `tr') along the way. 1943 * Returns a heap-allocated string with all the replacements made. 1944 */ 1945 char * 1946 roff_strdup(const struct roff *r, const char *p) 1947 { 1948 const struct roffkv *cp; 1949 char *res; 1950 const char *pp; 1951 size_t ssz, sz; 1952 enum mandoc_esc esc; 1953 1954 if (NULL == r->xmbtab && NULL == r->xtab) 1955 return(mandoc_strdup(p)); 1956 else if ('\0' == *p) 1957 return(mandoc_strdup("")); 1958 1959 /* 1960 * Step through each character looking for term matches 1961 * (remember that a `tr' can be invoked with an escape, which is 1962 * a glyph but the escape is multi-character). 1963 * We only do this if the character hash has been initialised 1964 * and the string is >0 length. 1965 */ 1966 1967 res = NULL; 1968 ssz = 0; 1969 1970 while ('\0' != *p) { 1971 if ('\\' != *p && r->xtab && r->xtab[(int)*p].p) { 1972 sz = r->xtab[(int)*p].sz; 1973 res = mandoc_realloc(res, ssz + sz + 1); 1974 memcpy(res + ssz, r->xtab[(int)*p].p, sz); 1975 ssz += sz; 1976 p++; 1977 continue; 1978 } else if ('\\' != *p) { 1979 res = mandoc_realloc(res, ssz + 2); 1980 res[ssz++] = *p++; 1981 continue; 1982 } 1983 1984 /* Search for term matches. */ 1985 for (cp = r->xmbtab; cp; cp = cp->next) 1986 if (0 == strncmp(p, cp->key.p, cp->key.sz)) 1987 break; 1988 1989 if (NULL != cp) { 1990 /* 1991 * A match has been found. 1992 * Append the match to the array and move 1993 * forward by its keysize. 1994 */ 1995 res = mandoc_realloc 1996 (res, ssz + cp->val.sz + 1); 1997 memcpy(res + ssz, cp->val.p, cp->val.sz); 1998 ssz += cp->val.sz; 1999 p += (int)cp->key.sz; 2000 continue; 2001 } 2002 2003 /* 2004 * Handle escapes carefully: we need to copy 2005 * over just the escape itself, or else we might 2006 * do replacements within the escape itself. 2007 * Make sure to pass along the bogus string. 2008 */ 2009 pp = p++; 2010 esc = mandoc_escape(&p, NULL, NULL); 2011 if (ESCAPE_ERROR == esc) { 2012 sz = strlen(pp); 2013 res = mandoc_realloc(res, ssz + sz + 1); 2014 memcpy(res + ssz, pp, sz); 2015 break; 2016 } 2017 /* 2018 * We bail out on bad escapes. 2019 * No need to warn: we already did so when 2020 * roff_res() was called. 2021 */ 2022 sz = (int)(p - pp); 2023 res = mandoc_realloc(res, ssz + sz + 1); 2024 memcpy(res + ssz, pp, sz); 2025 ssz += sz; 2026 } 2027 2028 res[(int)ssz] = '\0'; 2029 return(res); 2030 } 2031 2032 /* 2033 * Find out whether a line is a macro line or not. 2034 * If it is, adjust the current position and return one; if it isn't, 2035 * return zero and don't change the current position. 2036 * If the control character has been set with `.cc', then let that grain 2037 * precedence. 2038 * This is slighly contrary to groff, where using the non-breaking 2039 * control character when `cc' has been invoked will cause the 2040 * non-breaking macro contents to be printed verbatim. 2041 */ 2042 int 2043 roff_getcontrol(const struct roff *r, const char *cp, int *ppos) 2044 { 2045 int pos; 2046 2047 pos = *ppos; 2048 2049 if (0 != r->control && cp[pos] == r->control) 2050 pos++; 2051 else if (0 != r->control) 2052 return(0); 2053 else if ('\\' == cp[pos] && '.' == cp[pos + 1]) 2054 pos += 2; 2055 else if ('.' == cp[pos] || '\'' == cp[pos]) 2056 pos++; 2057 else 2058 return(0); 2059 2060 while (' ' == cp[pos] || '\t' == cp[pos]) 2061 pos++; 2062 2063 *ppos = pos; 2064 return(1); 2065 }