1 /*      $Id: man.c,v 1.121 2013/11/10 22:54:40 schwarze Exp $ */
   2 /*
   3  * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
   4  *
   5  * Permission to use, copy, modify, and distribute this software for any
   6  * purpose with or without fee is hereby granted, provided that the above
   7  * copyright notice and this permission notice appear in all copies.
   8  *
   9  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
  10  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
  11  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
  12  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
  13  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
  14  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
  15  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
  16  */
  17 #ifdef HAVE_CONFIG_H
  18 #include "config.h"
  19 #endif
  20 
  21 #include <sys/types.h>
  22 
  23 #include <assert.h>
  24 #include <stdarg.h>
  25 #include <stdlib.h>
  26 #include <stdio.h>
  27 #include <string.h>
  28 
  29 #include "man.h"
  30 #include "mandoc.h"
  31 #include "libman.h"
  32 #include "libmandoc.h"
  33 
  34 const   char *const __man_macronames[MAN_MAX] = {                
  35         "br",           "TH",           "SH",           "SS",
  36         "TP",           "LP",           "PP",           "P",
  37         "IP",           "HP",           "SM",           "SB",
  38         "BI",           "IB",           "BR",           "RB",
  39         "R",            "B",            "I",            "IR",
  40         "RI",           "na",           "sp",           "nf",
  41         "fi",           "RE",           "RS",           "DT",
  42         "UC",           "PD",           "AT",           "in",
  43         "ft",           "OP",           "EX",           "EE",
  44         "UR",           "UE"
  45         };
  46 
  47 const   char * const *man_macronames = __man_macronames;
  48 
  49 static  struct man_node *man_node_alloc(struct man *, int, int, 
  50                                 enum man_type, enum mant);
  51 static  int              man_node_append(struct man *, 
  52                                 struct man_node *);
  53 static  void             man_node_free(struct man_node *);
  54 static  void             man_node_unlink(struct man *, 
  55                                 struct man_node *);
  56 static  int              man_ptext(struct man *, int, char *, int);
  57 static  int              man_pmacro(struct man *, int, char *, int);
  58 static  void             man_free1(struct man *);
  59 static  void             man_alloc1(struct man *);
  60 static  int              man_descope(struct man *, int, int);
  61 
  62 
  63 const struct man_node *
  64 man_node(const struct man *man)
  65 {
  66 
  67         assert( ! (MAN_HALT & man->flags));
  68         return(man->first);
  69 }
  70 
  71 
  72 const struct man_meta *
  73 man_meta(const struct man *man)
  74 {
  75 
  76         assert( ! (MAN_HALT & man->flags));
  77         return(&man->meta);
  78 }
  79 
  80 
  81 void
  82 man_reset(struct man *man)
  83 {
  84 
  85         man_free1(man);
  86         man_alloc1(man);
  87 }
  88 
  89 
  90 void
  91 man_free(struct man *man)
  92 {
  93 
  94         man_free1(man);
  95         free(man);
  96 }
  97 
  98 
  99 struct man *
 100 man_alloc(struct roff *roff, struct mparse *parse)
 101 {
 102         struct man      *p;
 103 
 104         p = mandoc_calloc(1, sizeof(struct man));
 105 
 106         man_hash_init();
 107         p->parse = parse;
 108         p->roff = roff;
 109 
 110         man_alloc1(p);
 111         return(p);
 112 }
 113 
 114 
 115 int
 116 man_endparse(struct man *man)
 117 {
 118 
 119         assert( ! (MAN_HALT & man->flags));
 120         if (man_macroend(man))
 121                 return(1);
 122         man->flags |= MAN_HALT;
 123         return(0);
 124 }
 125 
 126 
 127 int
 128 man_parseln(struct man *man, int ln, char *buf, int offs)
 129 {
 130 
 131         man->flags |= MAN_NEWLINE;
 132 
 133         assert( ! (MAN_HALT & man->flags));
 134 
 135         return (roff_getcontrol(man->roff, buf, &offs) ?
 136                         man_pmacro(man, ln, buf, offs) : 
 137                         man_ptext(man, ln, buf, offs));
 138 }
 139 
 140 
 141 static void
 142 man_free1(struct man *man)
 143 {
 144 
 145         if (man->first)
 146                 man_node_delete(man, man->first);
 147         if (man->meta.title)
 148                 free(man->meta.title);
 149         if (man->meta.source)
 150                 free(man->meta.source);
 151         if (man->meta.date)
 152                 free(man->meta.date);
 153         if (man->meta.vol)
 154                 free(man->meta.vol);
 155         if (man->meta.msec)
 156                 free(man->meta.msec);
 157 }
 158 
 159 
 160 static void
 161 man_alloc1(struct man *man)
 162 {
 163 
 164         memset(&man->meta, 0, sizeof(struct man_meta));
 165         man->flags = 0;
 166         man->last = mandoc_calloc(1, sizeof(struct man_node));
 167         man->first = man->last;
 168         man->last->type = MAN_ROOT;
 169         man->last->tok = MAN_MAX;
 170         man->next = MAN_NEXT_CHILD;
 171 }
 172 
 173 
 174 static int
 175 man_node_append(struct man *man, struct man_node *p)
 176 {
 177 
 178         assert(man->last);
 179         assert(man->first);
 180         assert(MAN_ROOT != p->type);
 181 
 182         switch (man->next) {
 183         case (MAN_NEXT_SIBLING):
 184                 man->last->next = p;
 185                 p->prev = man->last;
 186                 p->parent = man->last->parent;
 187                 break;
 188         case (MAN_NEXT_CHILD):
 189                 man->last->child = p;
 190                 p->parent = man->last;
 191                 break;
 192         default:
 193                 abort();
 194                 /* NOTREACHED */
 195         }
 196         
 197         assert(p->parent);
 198         p->parent->nchild++;
 199 
 200         if ( ! man_valid_pre(man, p))
 201                 return(0);
 202 
 203         switch (p->type) {
 204         case (MAN_HEAD):
 205                 assert(MAN_BLOCK == p->parent->type);
 206                 p->parent->head = p;
 207                 break;
 208         case (MAN_TAIL):
 209                 assert(MAN_BLOCK == p->parent->type);
 210                 p->parent->tail = p;
 211                 break;
 212         case (MAN_BODY):
 213                 assert(MAN_BLOCK == p->parent->type);
 214                 p->parent->body = p;
 215                 break;
 216         default:
 217                 break;
 218         }
 219 
 220         man->last = p;
 221 
 222         switch (p->type) {
 223         case (MAN_TBL):
 224                 /* FALLTHROUGH */
 225         case (MAN_TEXT):
 226                 if ( ! man_valid_post(man))
 227                         return(0);
 228                 break;
 229         default:
 230                 break;
 231         }
 232 
 233         return(1);
 234 }
 235 
 236 
 237 static struct man_node *
 238 man_node_alloc(struct man *man, int line, int pos, 
 239                 enum man_type type, enum mant tok)
 240 {
 241         struct man_node *p;
 242 
 243         p = mandoc_calloc(1, sizeof(struct man_node));
 244         p->line = line;
 245         p->pos = pos;
 246         p->type = type;
 247         p->tok = tok;
 248 
 249         if (MAN_NEWLINE & man->flags)
 250                 p->flags |= MAN_LINE;
 251         man->flags &= ~MAN_NEWLINE;
 252         return(p);
 253 }
 254 
 255 
 256 int
 257 man_elem_alloc(struct man *man, int line, int pos, enum mant tok)
 258 {
 259         struct man_node *p;
 260 
 261         p = man_node_alloc(man, line, pos, MAN_ELEM, tok);
 262         if ( ! man_node_append(man, p))
 263                 return(0);
 264         man->next = MAN_NEXT_CHILD;
 265         return(1);
 266 }
 267 
 268 
 269 int
 270 man_tail_alloc(struct man *man, int line, int pos, enum mant tok)
 271 {
 272         struct man_node *p;
 273 
 274         p = man_node_alloc(man, line, pos, MAN_TAIL, tok);
 275         if ( ! man_node_append(man, p))
 276                 return(0);
 277         man->next = MAN_NEXT_CHILD;
 278         return(1);
 279 }
 280 
 281 
 282 int
 283 man_head_alloc(struct man *man, int line, int pos, enum mant tok)
 284 {
 285         struct man_node *p;
 286 
 287         p = man_node_alloc(man, line, pos, MAN_HEAD, tok);
 288         if ( ! man_node_append(man, p))
 289                 return(0);
 290         man->next = MAN_NEXT_CHILD;
 291         return(1);
 292 }
 293 
 294 
 295 int
 296 man_body_alloc(struct man *man, int line, int pos, enum mant tok)
 297 {
 298         struct man_node *p;
 299 
 300         p = man_node_alloc(man, line, pos, MAN_BODY, tok);
 301         if ( ! man_node_append(man, p))
 302                 return(0);
 303         man->next = MAN_NEXT_CHILD;
 304         return(1);
 305 }
 306 
 307 
 308 int
 309 man_block_alloc(struct man *man, int line, int pos, enum mant tok)
 310 {
 311         struct man_node *p;
 312 
 313         p = man_node_alloc(man, line, pos, MAN_BLOCK, tok);
 314         if ( ! man_node_append(man, p))
 315                 return(0);
 316         man->next = MAN_NEXT_CHILD;
 317         return(1);
 318 }
 319 
 320 int
 321 man_word_alloc(struct man *man, int line, int pos, const char *word)
 322 {
 323         struct man_node *n;
 324 
 325         n = man_node_alloc(man, line, pos, MAN_TEXT, MAN_MAX);
 326         n->string = roff_strdup(man->roff, word);
 327 
 328         if ( ! man_node_append(man, n))
 329                 return(0);
 330 
 331         man->next = MAN_NEXT_SIBLING;
 332         return(1);
 333 }
 334 
 335 
 336 /*
 337  * Free all of the resources held by a node.  This does NOT unlink a
 338  * node from its context; for that, see man_node_unlink().
 339  */
 340 static void
 341 man_node_free(struct man_node *p)
 342 {
 343 
 344         if (p->string)
 345                 free(p->string);
 346         free(p);
 347 }
 348 
 349 
 350 void
 351 man_node_delete(struct man *man, struct man_node *p)
 352 {
 353 
 354         while (p->child)
 355                 man_node_delete(man, p->child);
 356 
 357         man_node_unlink(man, p);
 358         man_node_free(p);
 359 }
 360 
 361 int
 362 man_addeqn(struct man *man, const struct eqn *ep)
 363 {
 364         struct man_node *n;
 365 
 366         assert( ! (MAN_HALT & man->flags));
 367 
 368         n = man_node_alloc(man, ep->ln, ep->pos, MAN_EQN, MAN_MAX);
 369         n->eqn = ep;
 370 
 371         if ( ! man_node_append(man, n))
 372                 return(0);
 373 
 374         man->next = MAN_NEXT_SIBLING;
 375         return(man_descope(man, ep->ln, ep->pos));
 376 }
 377 
 378 int
 379 man_addspan(struct man *man, const struct tbl_span *sp)
 380 {
 381         struct man_node *n;
 382 
 383         assert( ! (MAN_HALT & man->flags));
 384 
 385         n = man_node_alloc(man, sp->line, 0, MAN_TBL, MAN_MAX);
 386         n->span = sp;
 387 
 388         if ( ! man_node_append(man, n))
 389                 return(0);
 390 
 391         man->next = MAN_NEXT_SIBLING;
 392         return(man_descope(man, sp->line, 0));
 393 }
 394 
 395 static int
 396 man_descope(struct man *man, int line, int offs)
 397 {
 398         /*
 399          * Co-ordinate what happens with having a next-line scope open:
 400          * first close out the element scope (if applicable), then close
 401          * out the block scope (also if applicable).
 402          */
 403 
 404         if (MAN_ELINE & man->flags) {
 405                 man->flags &= ~MAN_ELINE;
 406                 if ( ! man_unscope(man, man->last->parent, MANDOCERR_MAX))
 407                         return(0);
 408         }
 409 
 410         if ( ! (MAN_BLINE & man->flags))
 411                 return(1);
 412         man->flags &= ~MAN_BLINE;
 413 
 414         if ( ! man_unscope(man, man->last->parent, MANDOCERR_MAX))
 415                 return(0);
 416         return(man_body_alloc(man, line, offs, man->last->tok));
 417 }
 418 
 419 static int
 420 man_ptext(struct man *man, int line, char *buf, int offs)
 421 {
 422         int              i;
 423 
 424         /* Literal free-form text whitespace is preserved. */
 425 
 426         if (MAN_LITERAL & man->flags) {
 427                 if ( ! man_word_alloc(man, line, offs, buf + offs))
 428                         return(0);
 429                 return(man_descope(man, line, offs));
 430         }
 431 
 432         for (i = offs; ' ' == buf[i]; i++)
 433                 /* Skip leading whitespace. */ ;
 434 
 435         /*
 436          * Blank lines are ignored right after headings
 437          * but add a single vertical space elsewhere.
 438          */
 439 
 440         if ('\0' == buf[i]) {
 441                 /* Allocate a blank entry. */
 442                 if (MAN_SH != man->last->tok &&
 443                     MAN_SS != man->last->tok) {
 444                         if ( ! man_elem_alloc(man, line, offs, MAN_sp))
 445                                 return(0);
 446                         man->next = MAN_NEXT_SIBLING;
 447                 }
 448                 return(1);
 449         }
 450 
 451         /* 
 452          * Warn if the last un-escaped character is whitespace. Then
 453          * strip away the remaining spaces (tabs stay!).   
 454          */
 455 
 456         i = (int)strlen(buf);
 457         assert(i);
 458 
 459         if (' ' == buf[i - 1] || '\t' == buf[i - 1]) {
 460                 if (i > 1 && '\\' != buf[i - 2])
 461                         man_pmsg(man, line, i - 1, MANDOCERR_EOLNSPACE);
 462 
 463                 for (--i; i && ' ' == buf[i]; i--)
 464                         /* Spin back to non-space. */ ;
 465 
 466                 /* Jump ahead of escaped whitespace. */
 467                 i += '\\' == buf[i] ? 2 : 1;
 468 
 469                 buf[i] = '\0';
 470         }
 471 
 472         if ( ! man_word_alloc(man, line, offs, buf + offs))
 473                 return(0);
 474 
 475         /*
 476          * End-of-sentence check.  If the last character is an unescaped
 477          * EOS character, then flag the node as being the end of a
 478          * sentence.  The front-end will know how to interpret this.
 479          */
 480 
 481         assert(i);
 482         if (mandoc_eos(buf, (size_t)i, 0))
 483                 man->last->flags |= MAN_EOS;
 484 
 485         return(man_descope(man, line, offs));
 486 }
 487 
 488 static int
 489 man_pmacro(struct man *man, int ln, char *buf, int offs)
 490 {
 491         int              i, ppos;
 492         enum mant        tok;
 493         char             mac[5];
 494         struct man_node *n;
 495 
 496         if ('"' == buf[offs]) {
 497                 man_pmsg(man, ln, offs, MANDOCERR_BADCOMMENT);
 498                 return(1);
 499         } else if ('\0' == buf[offs])
 500                 return(1);
 501 
 502         ppos = offs;
 503 
 504         /*
 505          * Copy the first word into a nil-terminated buffer.
 506          * Stop copying when a tab, space, or eoln is encountered.
 507          */
 508 
 509         i = 0;
 510         while (i < 4 && '\0' != buf[offs] && 
 511                         ' ' != buf[offs] && '\t' != buf[offs])
 512                 mac[i++] = buf[offs++];
 513 
 514         mac[i] = '\0';
 515 
 516         tok = (i > 0 && i < 4) ? man_hash_find(mac) : MAN_MAX;
 517 
 518         if (MAN_MAX == tok) {
 519                 mandoc_vmsg(MANDOCERR_MACRO, man->parse, ln, 
 520                                 ppos, "%s", buf + ppos - 1);
 521                 return(1);
 522         }
 523 
 524         /* The macro is sane.  Jump to the next word. */
 525 
 526         while (buf[offs] && ' ' == buf[offs])
 527                 offs++;
 528 
 529         /* 
 530          * Trailing whitespace.  Note that tabs are allowed to be passed
 531          * into the parser as "text", so we only warn about spaces here.
 532          */
 533 
 534         if ('\0' == buf[offs] && ' ' == buf[offs - 1])
 535                 man_pmsg(man, ln, offs - 1, MANDOCERR_EOLNSPACE);
 536 
 537         /* 
 538          * Remove prior ELINE macro, as it's being clobbered by a new
 539          * macro.  Note that NSCOPED macros do not close out ELINE
 540          * macros---they don't print text---so we let those slip by.
 541          */
 542 
 543         if ( ! (MAN_NSCOPED & man_macros[tok].flags) &&
 544                         man->flags & MAN_ELINE) {
 545                 n = man->last;
 546                 assert(MAN_TEXT != n->type);
 547 
 548                 /* Remove repeated NSCOPED macros causing ELINE. */
 549 
 550                 if (MAN_NSCOPED & man_macros[n->tok].flags)
 551                         n = n->parent;
 552 
 553                 mandoc_vmsg(MANDOCERR_LINESCOPE, man->parse, n->line, 
 554                     n->pos, "%s breaks %s", man_macronames[tok],
 555                     man_macronames[n->tok]);
 556 
 557                 man_node_delete(man, n);
 558                 man->flags &= ~MAN_ELINE;
 559         }
 560 
 561         /*
 562          * Remove prior BLINE macro that is being clobbered.
 563          */
 564         if ((man->flags & MAN_BLINE) &&
 565             (MAN_BSCOPE & man_macros[tok].flags)) {
 566                 n = man->last;
 567 
 568                 /* Might be a text node like 8 in
 569                  * .TP 8
 570                  * .SH foo
 571                  */
 572                 if (MAN_TEXT == n->type)
 573                         n = n->parent;
 574 
 575                 /* Remove element that didn't end BLINE, if any. */
 576                 if ( ! (MAN_BSCOPE & man_macros[n->tok].flags))
 577                         n = n->parent;
 578 
 579                 assert(MAN_HEAD == n->type);
 580                 n = n->parent;
 581                 assert(MAN_BLOCK == n->type);
 582                 assert(MAN_SCOPED & man_macros[n->tok].flags);
 583 
 584                 mandoc_vmsg(MANDOCERR_LINESCOPE, man->parse, n->line, 
 585                     n->pos, "%s breaks %s", man_macronames[tok],
 586                     man_macronames[n->tok]);
 587 
 588                 man_node_delete(man, n);
 589                 man->flags &= ~MAN_BLINE;
 590         }
 591 
 592         /*
 593          * Save the fact that we're in the next-line for a block.  In
 594          * this way, embedded roff instructions can "remember" state
 595          * when they exit.
 596          */
 597 
 598         if (MAN_BLINE & man->flags)
 599                 man->flags |= MAN_BPLINE;
 600 
 601         /* Call to handler... */
 602 
 603         assert(man_macros[tok].fp);
 604         if ( ! (*man_macros[tok].fp)(man, tok, ln, ppos, &offs, buf))
 605                 goto err;
 606 
 607         /* 
 608          * We weren't in a block-line scope when entering the
 609          * above-parsed macro, so return.
 610          */
 611 
 612         if ( ! (MAN_BPLINE & man->flags)) {
 613                 man->flags &= ~MAN_ILINE; 
 614                 return(1);
 615         }
 616         man->flags &= ~MAN_BPLINE;
 617 
 618         /*
 619          * If we're in a block scope, then allow this macro to slip by
 620          * without closing scope around it.
 621          */
 622 
 623         if (MAN_ILINE & man->flags) {
 624                 man->flags &= ~MAN_ILINE;
 625                 return(1);
 626         }
 627 
 628         /* 
 629          * If we've opened a new next-line element scope, then return
 630          * now, as the next line will close out the block scope.
 631          */
 632 
 633         if (MAN_ELINE & man->flags)
 634                 return(1);
 635 
 636         /* Close out the block scope opened in the prior line.  */
 637 
 638         assert(MAN_BLINE & man->flags);
 639         man->flags &= ~MAN_BLINE;
 640 
 641         if ( ! man_unscope(man, man->last->parent, MANDOCERR_MAX))
 642                 return(0);
 643         return(man_body_alloc(man, ln, ppos, man->last->tok));
 644 
 645 err:    /* Error out. */
 646 
 647         man->flags |= MAN_HALT;
 648         return(0);
 649 }
 650 
 651 /*
 652  * Unlink a node from its context.  If "man" is provided, the last parse
 653  * point will also be adjusted accordingly.
 654  */
 655 static void
 656 man_node_unlink(struct man *man, struct man_node *n)
 657 {
 658 
 659         /* Adjust siblings. */
 660 
 661         if (n->prev)
 662                 n->prev->next = n->next;
 663         if (n->next)
 664                 n->next->prev = n->prev;
 665 
 666         /* Adjust parent. */
 667 
 668         if (n->parent) {
 669                 n->parent->nchild--;
 670                 if (n->parent->child == n)
 671                         n->parent->child = n->prev ? n->prev : n->next;
 672         }
 673 
 674         /* Adjust parse point, if applicable. */
 675 
 676         if (man && man->last == n) {
 677                 /*XXX: this can occur when bailing from validation. */
 678                 /*assert(NULL == n->next);*/
 679                 if (n->prev) {
 680                         man->last = n->prev;
 681                         man->next = MAN_NEXT_SIBLING;
 682                 } else {
 683                         man->last = n->parent;
 684                         man->next = MAN_NEXT_CHILD;
 685                 }
 686         }
 687 
 688         if (man && man->first == n)
 689                 man->first = NULL;
 690 }
 691 
 692 const struct mparse *
 693 man_mparse(const struct man *man)
 694 {
 695 
 696         assert(man && man->parse);
 697         return(man->parse);
 698 }