1 /*      $Id: man.c,v 1.115 2012/01/03 15:16:24 kristaps Exp $ */
   2 /*
   3  * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
   4  *
   5  * Permission to use, copy, modify, and distribute this software for any
   6  * purpose with or without fee is hereby granted, provided that the above
   7  * copyright notice and this permission notice appear in all copies.
   8  *
   9  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
  10  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
  11  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
  12  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
  13  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
  14  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
  15  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
  16  */
  17 #ifdef HAVE_CONFIG_H
  18 #include "config.h"
  19 #endif
  20 
  21 #include <sys/types.h>
  22 
  23 #include <assert.h>
  24 #include <stdarg.h>
  25 #include <stdlib.h>
  26 #include <stdio.h>
  27 #include <string.h>
  28 
  29 #include "man.h"
  30 #include "mandoc.h"
  31 #include "libman.h"
  32 #include "libmandoc.h"
  33 
  34 const   char *const __man_macronames[MAN_MAX] = {                
  35         "br",           "TH",           "SH",           "SS",
  36         "TP",           "LP",           "PP",           "P",
  37         "IP",           "HP",           "SM",           "SB",
  38         "BI",           "IB",           "BR",           "RB",
  39         "R",            "B",            "I",            "IR",
  40         "RI",           "na",           "sp",           "nf",
  41         "fi",           "RE",           "RS",           "DT",
  42         "UC",           "PD",           "AT",           "in",
  43         "ft",           "OP"
  44         };
  45 
  46 const   char * const *man_macronames = __man_macronames;
  47 
  48 static  struct man_node *man_node_alloc(struct man *, int, int, 
  49                                 enum man_type, enum mant);
  50 static  int              man_node_append(struct man *, 
  51                                 struct man_node *);
  52 static  void             man_node_free(struct man_node *);
  53 static  void             man_node_unlink(struct man *, 
  54                                 struct man_node *);
  55 static  int              man_ptext(struct man *, int, char *, int);
  56 static  int              man_pmacro(struct man *, int, char *, int);
  57 static  void             man_free1(struct man *);
  58 static  void             man_alloc1(struct man *);
  59 static  int              man_descope(struct man *, int, int);
  60 
  61 
  62 const struct man_node *
  63 man_node(const struct man *m)
  64 {
  65 
  66         assert( ! (MAN_HALT & m->flags));
  67         return(m->first);
  68 }
  69 
  70 
  71 const struct man_meta *
  72 man_meta(const struct man *m)
  73 {
  74 
  75         assert( ! (MAN_HALT & m->flags));
  76         return(&m->meta);
  77 }
  78 
  79 
  80 void
  81 man_reset(struct man *man)
  82 {
  83 
  84         man_free1(man);
  85         man_alloc1(man);
  86 }
  87 
  88 
  89 void
  90 man_free(struct man *man)
  91 {
  92 
  93         man_free1(man);
  94         free(man);
  95 }
  96 
  97 
  98 struct man *
  99 man_alloc(struct roff *roff, struct mparse *parse)
 100 {
 101         struct man      *p;
 102 
 103         p = mandoc_calloc(1, sizeof(struct man));
 104 
 105         man_hash_init();
 106         p->parse = parse;
 107         p->roff = roff;
 108 
 109         man_alloc1(p);
 110         return(p);
 111 }
 112 
 113 
 114 int
 115 man_endparse(struct man *m)
 116 {
 117 
 118         assert( ! (MAN_HALT & m->flags));
 119         if (man_macroend(m))
 120                 return(1);
 121         m->flags |= MAN_HALT;
 122         return(0);
 123 }
 124 
 125 
 126 int
 127 man_parseln(struct man *m, int ln, char *buf, int offs)
 128 {
 129 
 130         m->flags |= MAN_NEWLINE;
 131 
 132         assert( ! (MAN_HALT & m->flags));
 133 
 134         return (mandoc_getcontrol(buf, &offs) ?
 135                         man_pmacro(m, ln, buf, offs) : 
 136                         man_ptext(m, ln, buf, offs));
 137 }
 138 
 139 
 140 static void
 141 man_free1(struct man *man)
 142 {
 143 
 144         if (man->first)
 145                 man_node_delete(man, man->first);
 146         if (man->meta.title)
 147                 free(man->meta.title);
 148         if (man->meta.source)
 149                 free(man->meta.source);
 150         if (man->meta.date)
 151                 free(man->meta.date);
 152         if (man->meta.vol)
 153                 free(man->meta.vol);
 154         if (man->meta.msec)
 155                 free(man->meta.msec);
 156 }
 157 
 158 
 159 static void
 160 man_alloc1(struct man *m)
 161 {
 162 
 163         memset(&m->meta, 0, sizeof(struct man_meta));
 164         m->flags = 0;
 165         m->last = mandoc_calloc(1, sizeof(struct man_node));
 166         m->first = m->last;
 167         m->last->type = MAN_ROOT;
 168         m->last->tok = MAN_MAX;
 169         m->next = MAN_NEXT_CHILD;
 170 }
 171 
 172 
 173 static int
 174 man_node_append(struct man *man, struct man_node *p)
 175 {
 176 
 177         assert(man->last);
 178         assert(man->first);
 179         assert(MAN_ROOT != p->type);
 180 
 181         switch (man->next) {
 182         case (MAN_NEXT_SIBLING):
 183                 man->last->next = p;
 184                 p->prev = man->last;
 185                 p->parent = man->last->parent;
 186                 break;
 187         case (MAN_NEXT_CHILD):
 188                 man->last->child = p;
 189                 p->parent = man->last;
 190                 break;
 191         default:
 192                 abort();
 193                 /* NOTREACHED */
 194         }
 195         
 196         assert(p->parent);
 197         p->parent->nchild++;
 198 
 199         if ( ! man_valid_pre(man, p))
 200                 return(0);
 201 
 202         switch (p->type) {
 203         case (MAN_HEAD):
 204                 assert(MAN_BLOCK == p->parent->type);
 205                 p->parent->head = p;
 206                 break;
 207         case (MAN_TAIL):
 208                 assert(MAN_BLOCK == p->parent->type);
 209                 p->parent->tail = p;
 210                 break;
 211         case (MAN_BODY):
 212                 assert(MAN_BLOCK == p->parent->type);
 213                 p->parent->body = p;
 214                 break;
 215         default:
 216                 break;
 217         }
 218 
 219         man->last = p;
 220 
 221         switch (p->type) {
 222         case (MAN_TBL):
 223                 /* FALLTHROUGH */
 224         case (MAN_TEXT):
 225                 if ( ! man_valid_post(man))
 226                         return(0);
 227                 break;
 228         default:
 229                 break;
 230         }
 231 
 232         return(1);
 233 }
 234 
 235 
 236 static struct man_node *
 237 man_node_alloc(struct man *m, int line, int pos, 
 238                 enum man_type type, enum mant tok)
 239 {
 240         struct man_node *p;
 241 
 242         p = mandoc_calloc(1, sizeof(struct man_node));
 243         p->line = line;
 244         p->pos = pos;
 245         p->type = type;
 246         p->tok = tok;
 247 
 248         if (MAN_NEWLINE & m->flags)
 249                 p->flags |= MAN_LINE;
 250         m->flags &= ~MAN_NEWLINE;
 251         return(p);
 252 }
 253 
 254 
 255 int
 256 man_elem_alloc(struct man *m, int line, int pos, enum mant tok)
 257 {
 258         struct man_node *p;
 259 
 260         p = man_node_alloc(m, line, pos, MAN_ELEM, tok);
 261         if ( ! man_node_append(m, p))
 262                 return(0);
 263         m->next = MAN_NEXT_CHILD;
 264         return(1);
 265 }
 266 
 267 
 268 int
 269 man_tail_alloc(struct man *m, int line, int pos, enum mant tok)
 270 {
 271         struct man_node *p;
 272 
 273         p = man_node_alloc(m, line, pos, MAN_TAIL, tok);
 274         if ( ! man_node_append(m, p))
 275                 return(0);
 276         m->next = MAN_NEXT_CHILD;
 277         return(1);
 278 }
 279 
 280 
 281 int
 282 man_head_alloc(struct man *m, int line, int pos, enum mant tok)
 283 {
 284         struct man_node *p;
 285 
 286         p = man_node_alloc(m, line, pos, MAN_HEAD, tok);
 287         if ( ! man_node_append(m, p))
 288                 return(0);
 289         m->next = MAN_NEXT_CHILD;
 290         return(1);
 291 }
 292 
 293 
 294 int
 295 man_body_alloc(struct man *m, int line, int pos, enum mant tok)
 296 {
 297         struct man_node *p;
 298 
 299         p = man_node_alloc(m, line, pos, MAN_BODY, tok);
 300         if ( ! man_node_append(m, p))
 301                 return(0);
 302         m->next = MAN_NEXT_CHILD;
 303         return(1);
 304 }
 305 
 306 
 307 int
 308 man_block_alloc(struct man *m, int line, int pos, enum mant tok)
 309 {
 310         struct man_node *p;
 311 
 312         p = man_node_alloc(m, line, pos, MAN_BLOCK, tok);
 313         if ( ! man_node_append(m, p))
 314                 return(0);
 315         m->next = MAN_NEXT_CHILD;
 316         return(1);
 317 }
 318 
 319 int
 320 man_word_alloc(struct man *m, int line, int pos, const char *word)
 321 {
 322         struct man_node *n;
 323 
 324         n = man_node_alloc(m, line, pos, MAN_TEXT, MAN_MAX);
 325         n->string = roff_strdup(m->roff, word);
 326 
 327         if ( ! man_node_append(m, n))
 328                 return(0);
 329 
 330         m->next = MAN_NEXT_SIBLING;
 331         return(1);
 332 }
 333 
 334 
 335 /*
 336  * Free all of the resources held by a node.  This does NOT unlink a
 337  * node from its context; for that, see man_node_unlink().
 338  */
 339 static void
 340 man_node_free(struct man_node *p)
 341 {
 342 
 343         if (p->string)
 344                 free(p->string);
 345         free(p);
 346 }
 347 
 348 
 349 void
 350 man_node_delete(struct man *m, struct man_node *p)
 351 {
 352 
 353         while (p->child)
 354                 man_node_delete(m, p->child);
 355 
 356         man_node_unlink(m, p);
 357         man_node_free(p);
 358 }
 359 
 360 int
 361 man_addeqn(struct man *m, const struct eqn *ep)
 362 {
 363         struct man_node *n;
 364 
 365         assert( ! (MAN_HALT & m->flags));
 366 
 367         n = man_node_alloc(m, ep->ln, ep->pos, MAN_EQN, MAN_MAX);
 368         n->eqn = ep;
 369 
 370         if ( ! man_node_append(m, n))
 371                 return(0);
 372 
 373         m->next = MAN_NEXT_SIBLING;
 374         return(man_descope(m, ep->ln, ep->pos));
 375 }
 376 
 377 int
 378 man_addspan(struct man *m, const struct tbl_span *sp)
 379 {
 380         struct man_node *n;
 381 
 382         assert( ! (MAN_HALT & m->flags));
 383 
 384         n = man_node_alloc(m, sp->line, 0, MAN_TBL, MAN_MAX);
 385         n->span = sp;
 386 
 387         if ( ! man_node_append(m, n))
 388                 return(0);
 389 
 390         m->next = MAN_NEXT_SIBLING;
 391         return(man_descope(m, sp->line, 0));
 392 }
 393 
 394 static int
 395 man_descope(struct man *m, int line, int offs)
 396 {
 397         /*
 398          * Co-ordinate what happens with having a next-line scope open:
 399          * first close out the element scope (if applicable), then close
 400          * out the block scope (also if applicable).
 401          */
 402 
 403         if (MAN_ELINE & m->flags) {
 404                 m->flags &= ~MAN_ELINE;
 405                 if ( ! man_unscope(m, m->last->parent, MANDOCERR_MAX))
 406                         return(0);
 407         }
 408 
 409         if ( ! (MAN_BLINE & m->flags))
 410                 return(1);
 411         m->flags &= ~MAN_BLINE;
 412 
 413         if ( ! man_unscope(m, m->last->parent, MANDOCERR_MAX))
 414                 return(0);
 415         return(man_body_alloc(m, line, offs, m->last->tok));
 416 }
 417 
 418 static int
 419 man_ptext(struct man *m, int line, char *buf, int offs)
 420 {
 421         int              i;
 422 
 423         /* Literal free-form text whitespace is preserved. */
 424 
 425         if (MAN_LITERAL & m->flags) {
 426                 if ( ! man_word_alloc(m, line, offs, buf + offs))
 427                         return(0);
 428                 return(man_descope(m, line, offs));
 429         }
 430 
 431         /* Pump blank lines directly into the backend. */
 432 
 433         for (i = offs; ' ' == buf[i]; i++)
 434                 /* Skip leading whitespace. */ ;
 435 
 436         if ('\0' == buf[i]) {
 437                 /* Allocate a blank entry. */
 438                 if ( ! man_word_alloc(m, line, offs, ""))
 439                         return(0);
 440                 return(man_descope(m, line, offs));
 441         }
 442 
 443         /* 
 444          * Warn if the last un-escaped character is whitespace. Then
 445          * strip away the remaining spaces (tabs stay!).   
 446          */
 447 
 448         i = (int)strlen(buf);
 449         assert(i);
 450 
 451         if (' ' == buf[i - 1] || '\t' == buf[i - 1]) {
 452                 if (i > 1 && '\\' != buf[i - 2])
 453                         man_pmsg(m, line, i - 1, MANDOCERR_EOLNSPACE);
 454 
 455                 for (--i; i && ' ' == buf[i]; i--)
 456                         /* Spin back to non-space. */ ;
 457 
 458                 /* Jump ahead of escaped whitespace. */
 459                 i += '\\' == buf[i] ? 2 : 1;
 460 
 461                 buf[i] = '\0';
 462         }
 463 
 464         if ( ! man_word_alloc(m, line, offs, buf + offs))
 465                 return(0);
 466 
 467         /*
 468          * End-of-sentence check.  If the last character is an unescaped
 469          * EOS character, then flag the node as being the end of a
 470          * sentence.  The front-end will know how to interpret this.
 471          */
 472 
 473         assert(i);
 474         if (mandoc_eos(buf, (size_t)i, 0))
 475                 m->last->flags |= MAN_EOS;
 476 
 477         return(man_descope(m, line, offs));
 478 }
 479 
 480 static int
 481 man_pmacro(struct man *m, int ln, char *buf, int offs)
 482 {
 483         int              i, ppos;
 484         enum mant        tok;
 485         char             mac[5];
 486         struct man_node *n;
 487 
 488         if ('"' == buf[offs]) {
 489                 man_pmsg(m, ln, offs, MANDOCERR_BADCOMMENT);
 490                 return(1);
 491         } else if ('\0' == buf[offs])
 492                 return(1);
 493 
 494         ppos = offs;
 495 
 496         /*
 497          * Copy the first word into a nil-terminated buffer.
 498          * Stop copying when a tab, space, or eoln is encountered.
 499          */
 500 
 501         i = 0;
 502         while (i < 4 && '\0' != buf[offs] && 
 503                         ' ' != buf[offs] && '\t' != buf[offs])
 504                 mac[i++] = buf[offs++];
 505 
 506         mac[i] = '\0';
 507 
 508         tok = (i > 0 && i < 4) ? man_hash_find(mac) : MAN_MAX;
 509 
 510         if (MAN_MAX == tok) {
 511                 mandoc_vmsg(MANDOCERR_MACRO, m->parse, ln, 
 512                                 ppos, "%s", buf + ppos - 1);
 513                 return(1);
 514         }
 515 
 516         /* The macro is sane.  Jump to the next word. */
 517 
 518         while (buf[offs] && ' ' == buf[offs])
 519                 offs++;
 520 
 521         /* 
 522          * Trailing whitespace.  Note that tabs are allowed to be passed
 523          * into the parser as "text", so we only warn about spaces here.
 524          */
 525 
 526         if ('\0' == buf[offs] && ' ' == buf[offs - 1])
 527                 man_pmsg(m, ln, offs - 1, MANDOCERR_EOLNSPACE);
 528 
 529         /* 
 530          * Remove prior ELINE macro, as it's being clobbered by a new
 531          * macro.  Note that NSCOPED macros do not close out ELINE
 532          * macros---they don't print text---so we let those slip by.
 533          */
 534 
 535         if ( ! (MAN_NSCOPED & man_macros[tok].flags) &&
 536                         m->flags & MAN_ELINE) {
 537                 n = m->last;
 538                 assert(MAN_TEXT != n->type);
 539 
 540                 /* Remove repeated NSCOPED macros causing ELINE. */
 541 
 542                 if (MAN_NSCOPED & man_macros[n->tok].flags)
 543                         n = n->parent;
 544 
 545                 mandoc_vmsg(MANDOCERR_LINESCOPE, m->parse, n->line, 
 546                     n->pos, "%s breaks %s", man_macronames[tok],
 547                     man_macronames[n->tok]);
 548 
 549                 man_node_delete(m, n);
 550                 m->flags &= ~MAN_ELINE;
 551         }
 552 
 553         /*
 554          * Remove prior BLINE macro that is being clobbered.
 555          */
 556         if ((m->flags & MAN_BLINE) &&
 557             (MAN_BSCOPE & man_macros[tok].flags)) {
 558                 n = m->last;
 559 
 560                 /* Might be a text node like 8 in
 561                  * .TP 8
 562                  * .SH foo
 563                  */
 564                 if (MAN_TEXT == n->type)
 565                         n = n->parent;
 566 
 567                 /* Remove element that didn't end BLINE, if any. */
 568                 if ( ! (MAN_BSCOPE & man_macros[n->tok].flags))
 569                         n = n->parent;
 570 
 571                 assert(MAN_HEAD == n->type);
 572                 n = n->parent;
 573                 assert(MAN_BLOCK == n->type);
 574                 assert(MAN_SCOPED & man_macros[n->tok].flags);
 575 
 576                 mandoc_vmsg(MANDOCERR_LINESCOPE, m->parse, n->line, 
 577                     n->pos, "%s breaks %s", man_macronames[tok],
 578                     man_macronames[n->tok]);
 579 
 580                 man_node_delete(m, n);
 581                 m->flags &= ~MAN_BLINE;
 582         }
 583 
 584         /*
 585          * Save the fact that we're in the next-line for a block.  In
 586          * this way, embedded roff instructions can "remember" state
 587          * when they exit.
 588          */
 589 
 590         if (MAN_BLINE & m->flags)
 591                 m->flags |= MAN_BPLINE;
 592 
 593         /* Call to handler... */
 594 
 595         assert(man_macros[tok].fp);
 596         if ( ! (*man_macros[tok].fp)(m, tok, ln, ppos, &offs, buf))
 597                 goto err;
 598 
 599         /* 
 600          * We weren't in a block-line scope when entering the
 601          * above-parsed macro, so return.
 602          */
 603 
 604         if ( ! (MAN_BPLINE & m->flags)) {
 605                 m->flags &= ~MAN_ILINE; 
 606                 return(1);
 607         }
 608         m->flags &= ~MAN_BPLINE;
 609 
 610         /*
 611          * If we're in a block scope, then allow this macro to slip by
 612          * without closing scope around it.
 613          */
 614 
 615         if (MAN_ILINE & m->flags) {
 616                 m->flags &= ~MAN_ILINE;
 617                 return(1);
 618         }
 619 
 620         /* 
 621          * If we've opened a new next-line element scope, then return
 622          * now, as the next line will close out the block scope.
 623          */
 624 
 625         if (MAN_ELINE & m->flags)
 626                 return(1);
 627 
 628         /* Close out the block scope opened in the prior line.  */
 629 
 630         assert(MAN_BLINE & m->flags);
 631         m->flags &= ~MAN_BLINE;
 632 
 633         if ( ! man_unscope(m, m->last->parent, MANDOCERR_MAX))
 634                 return(0);
 635         return(man_body_alloc(m, ln, ppos, m->last->tok));
 636 
 637 err:    /* Error out. */
 638 
 639         m->flags |= MAN_HALT;
 640         return(0);
 641 }
 642 
 643 /*
 644  * Unlink a node from its context.  If "m" is provided, the last parse
 645  * point will also be adjusted accordingly.
 646  */
 647 static void
 648 man_node_unlink(struct man *m, struct man_node *n)
 649 {
 650 
 651         /* Adjust siblings. */
 652 
 653         if (n->prev)
 654                 n->prev->next = n->next;
 655         if (n->next)
 656                 n->next->prev = n->prev;
 657 
 658         /* Adjust parent. */
 659 
 660         if (n->parent) {
 661                 n->parent->nchild--;
 662                 if (n->parent->child == n)
 663                         n->parent->child = n->prev ? n->prev : n->next;
 664         }
 665 
 666         /* Adjust parse point, if applicable. */
 667 
 668         if (m && m->last == n) {
 669                 /*XXX: this can occur when bailing from validation. */
 670                 /*assert(NULL == n->next);*/
 671                 if (n->prev) {
 672                         m->last = n->prev;
 673                         m->next = MAN_NEXT_SIBLING;
 674                 } else {
 675                         m->last = n->parent;
 676                         m->next = MAN_NEXT_CHILD;
 677                 }
 678         }
 679 
 680         if (m && m->first == n)
 681                 m->first = NULL;
 682 }
 683 
 684 const struct mparse *
 685 man_mparse(const struct man *m)
 686 {
 687 
 688         assert(m && m->parse);
 689         return(m->parse);
 690 }