Print this page
5051 import mdocml-1.12.3
Reviewed by: Yuri Pankov <yuri.pankov@nexenta.com>
Approved by: TBD

Split Close
Expand all
Collapse all
          --- old/usr/src/cmd/mandoc/read.c
          +++ new/usr/src/cmd/mandoc/read.c
   1      -/*      $Id: read.c,v 1.28 2012/02/16 20:51:31 joerg Exp $ */
        1 +/*      $Id: read.c,v 1.39 2013/09/16 00:25:07 schwarze Exp $ */
   2    2  /*
   3    3   * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
   4      - * Copyright (c) 2010, 2011 Ingo Schwarze <schwarze@openbsd.org>
        4 + * Copyright (c) 2010, 2011, 2012, 2013 Ingo Schwarze <schwarze@openbsd.org>
   5    5   *
   6    6   * Permission to use, copy, modify, and distribute this software for any
   7    7   * purpose with or without fee is hereby granted, provided that the above
   8    8   * copyright notice and this permission notice appear in all copies.
   9    9   *
  10   10   * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
  11   11   * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
  12   12   * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
  13   13   * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
  14   14   * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
↓ open down ↓ 18 lines elided ↑ open up ↑
  33   33  #include <stdlib.h>
  34   34  #include <string.h>
  35   35  #include <unistd.h>
  36   36  
  37   37  #include "mandoc.h"
  38   38  #include "libmandoc.h"
  39   39  #include "mdoc.h"
  40   40  #include "man.h"
  41   41  #include "main.h"
  42   42  
  43      -#ifndef MAP_FILE
  44      -#define MAP_FILE        0
  45      -#endif
  46      -
  47   43  #define REPARSE_LIMIT   1000
  48   44  
  49   45  struct  buf {
  50   46          char             *buf; /* binary input buffer */
  51   47          size_t            sz; /* size of binary buffer */
  52   48  };
  53   49  
  54   50  struct  mparse {
  55   51          enum mandoclevel  file_status; /* status of current parse */
  56   52          enum mandoclevel  wlevel; /* ignore messages below this */
↓ open down ↓ 2 lines elided ↑ open up ↑
  59   55          struct man       *pman; /* persistent man parser */
  60   56          struct mdoc      *pmdoc; /* persistent mdoc parser */
  61   57          struct man       *man; /* man parser */
  62   58          struct mdoc      *mdoc; /* mdoc parser */
  63   59          struct roff      *roff; /* roff parser (!NULL) */
  64   60          int               reparse_count; /* finite interp. stack */
  65   61          mandocmsg         mmsg; /* warning/error message handler */
  66   62          void             *arg; /* argument to mmsg */
  67   63          const char       *file; 
  68   64          struct buf       *secondary;
       65 +        char             *defos; /* default operating system */
  69   66  };
  70   67  
  71   68  static  void      resize_buf(struct buf *, size_t);
  72   69  static  void      mparse_buf_r(struct mparse *, struct buf, int);
  73      -static  void      mparse_readfd_r(struct mparse *, int, const char *, int);
  74   70  static  void      pset(const char *, int, struct mparse *);
  75   71  static  int       read_whole_file(const char *, int, struct buf *, int *);
  76   72  static  void      mparse_end(struct mparse *);
       73 +static  void      mparse_parse_buffer(struct mparse *, struct buf,
       74 +                        const char *);
  77   75  
  78   76  static  const enum mandocerr    mandoclimits[MANDOCLEVEL_MAX] = {
  79   77          MANDOCERR_OK,
  80   78          MANDOCERR_WARNING,
  81   79          MANDOCERR_WARNING,
  82   80          MANDOCERR_ERROR,
  83   81          MANDOCERR_FATAL,
  84   82          MANDOCERR_MAX,
  85   83          MANDOCERR_MAX
  86   84  };
  87   85  
  88   86  static  const char * const      mandocerrs[MANDOCERR_MAX] = {
  89   87          "ok",
  90   88  
  91   89          "generic warning",
  92   90  
  93   91          /* related to the prologue */
  94   92          "no title in document",
  95   93          "document title should be all caps",
  96   94          "unknown manual section",
       95 +        "unknown manual volume or arch",
  97   96          "date missing, using today's date",
  98   97          "cannot parse date, using it verbatim",
  99   98          "prologue macros out of order",
 100   99          "duplicate prologue macro",
 101  100          "macro not allowed in prologue",
 102  101          "macro not allowed in body",
 103  102  
 104  103          /* related to document structure */
 105  104          ".so is fragile, better use ln(1)",
 106  105          "NAME section must come first",
 107  106          "bad NAME section contents",
 108      -        "manual name not yet set",
 109  107          "sections out of conventional order",
 110  108          "duplicate section name",
 111      -        "section not in conventional manual section",
      109 +        "section header suited to sections 2, 3, and 9 only",
 112  110  
 113  111          /* related to macros and nesting */
 114  112          "skipping obsolete macro",
 115  113          "skipping paragraph macro",
      114 +        "moving paragraph macro out of list",
 116  115          "skipping no-space macro",
 117  116          "blocks badly nested",
 118  117          "child violates parent syntax",
 119  118          "nested displays are not portable",
 120  119          "already in literal mode",
 121  120          "line scope broken",
 122  121  
 123  122          /* related to missing macro arguments */
 124  123          "skipping empty macro",
 125  124          "argument count wrong",
↓ open down ↓ 40 lines elided ↑ open up ↑
 166  165          "bad table layout",
 167  166          "no table layout cells specified",
 168  167          "no table data cells specified",
 169  168          "ignore data in cell",
 170  169          "data block still open",
 171  170          "ignoring extra data cells",
 172  171  
 173  172          "input stack limit exceeded, infinite loop?",
 174  173          "skipping bad character",
 175  174          "escaped character not allowed in a name",
      175 +        "manual name not yet set",
 176  176          "skipping text before the first section header",
 177  177          "skipping unknown macro",
 178  178          "NOT IMPLEMENTED, please use groff: skipping request",
 179  179          "argument count wrong",
      180 +        "skipping column outside column list",
 180  181          "skipping end of block that is not open",
 181  182          "missing end of block",
 182  183          "scope open on exit",
 183  184          "uname(3) system call failed",
 184  185          "macro requires line argument(s)",
 185  186          "macro requires body argument(s)",
 186  187          "macro requires argument(s)",
      188 +        "request requires a numeric argument",
 187  189          "missing list type",
 188  190          "line argument(s) will be lost",
 189  191          "body argument(s) will be lost",
 190  192  
 191  193          "generic fatal error",
 192  194  
 193  195          "not a manual",
 194  196          "column syntax is inconsistent",
 195  197          "NOT IMPLEMENTED: .Bd -file",
 196  198          "argument count wrong, violates syntax",
↓ open down ↓ 43 lines elided ↑ open up ↑
 240  242                  for (i = 1; buf[i]; i++)
 241  243                          if (' ' != buf[i] && '\t' != buf[i])
 242  244                                  break;
 243  245                  if ('\0' == buf[i])
 244  246                          return;
 245  247          }
 246  248  
 247  249          switch (curp->inttype) {
 248  250          case (MPARSE_MDOC):
 249  251                  if (NULL == curp->pmdoc) 
 250      -                        curp->pmdoc = mdoc_alloc(curp->roff, curp);
      252 +                        curp->pmdoc = mdoc_alloc(curp->roff, curp,
      253 +                                        curp->defos);
 251  254                  assert(curp->pmdoc);
 252  255                  curp->mdoc = curp->pmdoc;
 253  256                  return;
 254  257          case (MPARSE_MAN):
 255  258                  if (NULL == curp->pman) 
 256  259                          curp->pman = man_alloc(curp->roff, curp);
 257  260                  assert(curp->pman);
 258  261                  curp->man = curp->pman;
 259  262                  return;
 260  263          default:
 261  264                  break;
 262  265          }
 263  266  
 264  267          if (pos >= 3 && 0 == memcmp(buf, ".Dd", 3))  {
 265  268                  if (NULL == curp->pmdoc) 
 266      -                        curp->pmdoc = mdoc_alloc(curp->roff, curp);
      269 +                        curp->pmdoc = mdoc_alloc(curp->roff, curp,
      270 +                                        curp->defos);
 267  271                  assert(curp->pmdoc);
 268  272                  curp->mdoc = curp->pmdoc;
 269  273                  return;
 270  274          } 
 271  275  
 272  276          if (NULL == curp->pman) 
 273  277                  curp->pman = man_alloc(curp->roff, curp);
 274  278          assert(curp->pman);
 275  279          curp->man = curp->pman;
 276  280  }
↓ open down ↓ 38 lines elided ↑ open up ↑
 315  319  
 316  320                          if ('\r' == blk.buf[i] && i + 1 < (int)blk.sz &&
 317  321                              '\n' == blk.buf[i + 1])
 318  322                                  ++i;
 319  323                          if ('\n' == blk.buf[i]) {
 320  324                                  ++i;
 321  325                                  ++lnn;
 322  326                                  break;
 323  327                          }
 324  328  
      329 +                        /*
      330 +                         * Make sure we have space for at least
      331 +                         * one backslash and one other character
      332 +                         * and the trailing NUL byte.
      333 +                         */
      334 +
      335 +                        if (pos + 2 >= (int)ln.sz)
      336 +                                resize_buf(&ln, 256);
      337 +
 325  338                          /* 
 326  339                           * Warn about bogus characters.  If you're using
 327  340                           * non-ASCII encoding, you're screwing your
 328  341                           * readers.  Since I'd rather this not happen,
 329  342                           * I'll be helpful and replace these characters
 330  343                           * with "?", so we don't display gibberish.
 331  344                           * Note to manual writers: use special characters.
 332  345                           */
 333  346  
 334  347                          c = (unsigned char) blk.buf[i];
 335  348  
 336  349                          if ( ! (isascii(c) && 
 337  350                                          (isgraph(c) || isblank(c)))) {
 338  351                                  mandoc_msg(MANDOCERR_BADCHAR, curp,
 339  352                                                  curp->line, pos, NULL);
 340  353                                  i++;
 341      -                                if (pos >= (int)ln.sz)
 342      -                                        resize_buf(&ln, 256);
 343  354                                  ln.buf[pos++] = '?';
 344  355                                  continue;
 345  356                          }
 346  357  
 347  358                          /* Trailing backslash = a plain char. */
 348  359  
 349  360                          if ('\\' != blk.buf[i] || i + 1 == (int)blk.sz) {
 350      -                                if (pos >= (int)ln.sz)
 351      -                                        resize_buf(&ln, 256);
 352  361                                  ln.buf[pos++] = blk.buf[i++];
 353  362                                  continue;
 354  363                          }
 355  364  
 356  365                          /*
 357  366                           * Found escape and at least one other character.
 358  367                           * When it's a newline character, skip it.
 359  368                           * When there is a carriage return in between,
 360  369                           * skip that one as well.
 361  370                           */
↓ open down ↓ 21 lines elided ↑ open up ↑
 383  392                                  /* Backout trailing whitespaces */
 384  393                                  for (; pos > 0; --pos) {
 385  394                                          if (ln.buf[pos - 1] != ' ')
 386  395                                                  break;
 387  396                                          if (pos > 2 && ln.buf[pos - 2] == '\\')
 388  397                                                  break;
 389  398                                  }
 390  399                                  break;
 391  400                          }
 392  401  
 393      -                        /* Some other escape sequence, copy & cont. */
      402 +                        /* Catch escaped bogus characters. */
 394  403  
 395      -                        if (pos + 1 >= (int)ln.sz)
 396      -                                resize_buf(&ln, 256);
      404 +                        c = (unsigned char) blk.buf[i+1];
 397  405  
      406 +                        if ( ! (isascii(c) && 
      407 +                                        (isgraph(c) || isblank(c)))) {
      408 +                                mandoc_msg(MANDOCERR_BADCHAR, curp,
      409 +                                                curp->line, pos, NULL);
      410 +                                i += 2;
      411 +                                ln.buf[pos++] = '?';
      412 +                                continue;
      413 +                        }
      414 +
      415 +                        /* Some other escape sequence, copy & cont. */
      416 +
 398  417                          ln.buf[pos++] = blk.buf[i++];
 399  418                          ln.buf[pos++] = blk.buf[i++];
 400  419                  }
 401  420  
 402  421                  if (pos >= (int)ln.sz)
 403  422                          resize_buf(&ln, 256);
 404  423  
 405  424                  ln.buf[pos] = '\0';
 406  425  
 407  426                  /*
↓ open down ↓ 54 lines elided ↑ open up ↑
 462  481                          assert(MANDOCLEVEL_FATAL <= curp->file_status);
 463  482                          break;
 464  483                  case (ROFF_SO):
 465  484                          /*
 466  485                           * We remove `so' clauses from our lookaside
 467  486                           * buffer because we're going to descend into
 468  487                           * the file recursively.
 469  488                           */
 470  489                          if (curp->secondary) 
 471  490                                  curp->secondary->sz -= pos + 1;
 472      -                        mparse_readfd_r(curp, -1, ln.buf + of, 1);
      491 +                        mparse_readfd(curp, -1, ln.buf + of);
 473  492                          if (MANDOCLEVEL_FATAL <= curp->file_status)
 474  493                                  break;
 475  494                          pos = 0;
 476  495                          continue;
 477  496                  default:
 478  497                          break;
 479  498                  }
 480  499  
 481  500                  /*
 482  501                   * If we encounter errors in the recursive parse, make
↓ open down ↓ 85 lines elided ↑ open up ↑
 568  587           * concerned that this is going to tank any machines.
 569  588           */
 570  589  
 571  590          if (S_ISREG(st.st_mode)) {
 572  591                  if (st.st_size >= (1U << 31)) {
 573  592                          fprintf(stderr, "%s: input too large\n", file);
 574  593                          return(0);
 575  594                  }
 576  595                  *with_mmap = 1;
 577  596                  fb->sz = (size_t)st.st_size;
 578      -                fb->buf = mmap(NULL, fb->sz, PROT_READ, 
 579      -                                MAP_FILE|MAP_SHARED, fd, 0);
      597 +                fb->buf = mmap(NULL, fb->sz, PROT_READ, MAP_SHARED, fd, 0);
 580  598                  if (fb->buf != MAP_FAILED)
 581  599                          return(1);
 582  600          }
 583  601  #endif
 584  602  
 585  603          /*
 586  604           * If this isn't a regular file (like, say, stdin), then we must
 587  605           * go the old way and just read things in bit by bit.
 588  606           */
 589  607  
↓ open down ↓ 46 lines elided ↑ open up ↑
 636  654          if ( ! (curp->man || curp->mdoc)) {
 637  655                  mandoc_msg(MANDOCERR_NOTMANUAL, curp, 1, 0, NULL);
 638  656                  curp->file_status = MANDOCLEVEL_FATAL;
 639  657                  return;
 640  658          }
 641  659  
 642  660          roff_endparse(curp->roff);
 643  661  }
 644  662  
 645  663  static void
 646      -mparse_parse_buffer(struct mparse *curp, struct buf blk, const char *file,
 647      -                int re)
      664 +mparse_parse_buffer(struct mparse *curp, struct buf blk, const char *file)
 648  665  {
 649  666          const char      *svfile;
      667 +        static int       recursion_depth;
 650  668  
      669 +        if (64 < recursion_depth) {
      670 +                mandoc_msg(MANDOCERR_ROFFLOOP, curp, curp->line, 0, NULL);
      671 +                return;
      672 +        }
      673 +
 651  674          /* Line number is per-file. */
 652  675          svfile = curp->file;
 653  676          curp->file = file;
 654  677          curp->line = 1;
      678 +        recursion_depth++;
 655  679  
 656  680          mparse_buf_r(curp, blk, 1);
 657  681  
 658      -        if (0 == re && MANDOCLEVEL_FATAL > curp->file_status)
      682 +        if (0 == --recursion_depth && MANDOCLEVEL_FATAL > curp->file_status)
 659  683                  mparse_end(curp);
 660  684  
 661  685          curp->file = svfile;
 662  686  }
 663  687  
 664  688  enum mandoclevel
 665  689  mparse_readmem(struct mparse *curp, const void *buf, size_t len,
 666  690                  const char *file)
 667  691  {
 668  692          struct buf blk;
 669  693  
 670  694          blk.buf = UNCONST(buf);
 671  695          blk.sz = len;
 672  696  
 673      -        mparse_parse_buffer(curp, blk, file, 0);
      697 +        mparse_parse_buffer(curp, blk, file);
 674  698          return(curp->file_status);
 675  699  }
 676  700  
 677      -static void
 678      -mparse_readfd_r(struct mparse *curp, int fd, const char *file, int re)
      701 +enum mandoclevel
      702 +mparse_readfd(struct mparse *curp, int fd, const char *file)
 679  703  {
 680  704          struct buf       blk;
 681  705          int              with_mmap;
 682  706  
 683  707          if (-1 == fd)
 684  708                  if (-1 == (fd = open(file, O_RDONLY, 0))) {
 685  709                          perror(file);
 686  710                          curp->file_status = MANDOCLEVEL_SYSERR;
 687      -                        return;
      711 +                        goto out;
 688  712                  }
 689  713          /*
 690  714           * Run for each opened file; may be called more than once for
 691  715           * each full parse sequence if the opened file is nested (i.e.,
 692  716           * from `so').  Simply sucks in the whole file and moves into
 693  717           * the parse phase for the file.
 694  718           */
 695  719  
 696  720          if ( ! read_whole_file(file, fd, &blk, &with_mmap)) {
 697  721                  curp->file_status = MANDOCLEVEL_SYSERR;
 698      -                return;
      722 +                goto out;
 699  723          }
 700  724  
 701      -        mparse_parse_buffer(curp, blk, file, re);
      725 +        mparse_parse_buffer(curp, blk, file);
 702  726  
 703  727  #ifdef  HAVE_MMAP
 704  728          if (with_mmap)
 705  729                  munmap(blk.buf, blk.sz);
 706  730          else
 707  731  #endif
 708  732                  free(blk.buf);
 709  733  
 710  734          if (STDIN_FILENO != fd && -1 == close(fd))
 711  735                  perror(file);
 712      -}
 713      -
 714      -enum mandoclevel
 715      -mparse_readfd(struct mparse *curp, int fd, const char *file)
 716      -{
 717      -
 718      -        mparse_readfd_r(curp, fd, file, 0);
      736 +out:
 719  737          return(curp->file_status);
 720  738  }
 721  739  
 722  740  struct mparse *
 723      -mparse_alloc(enum mparset inttype, enum mandoclevel wlevel, mandocmsg mmsg, void *arg)
      741 +mparse_alloc(enum mparset inttype, enum mandoclevel wlevel,
      742 +                mandocmsg mmsg, void *arg, char *defos)
 724  743  {
 725  744          struct mparse   *curp;
 726  745  
 727  746          assert(wlevel <= MANDOCLEVEL_FATAL);
 728  747  
 729  748          curp = mandoc_calloc(1, sizeof(struct mparse));
 730  749  
 731  750          curp->wlevel = wlevel;
 732  751          curp->mmsg = mmsg;
 733  752          curp->arg = arg;
 734  753          curp->inttype = inttype;
      754 +        curp->defos = defos;
 735  755  
 736      -        curp->roff = roff_alloc(curp);
      756 +        curp->roff = roff_alloc(inttype, curp);
 737  757          return(curp);
 738  758  }
 739  759  
 740  760  void
 741  761  mparse_reset(struct mparse *curp)
 742  762  {
 743  763  
 744  764          roff_reset(curp->roff);
 745  765  
 746  766          if (curp->mdoc)
↓ open down ↓ 100 lines elided ↑ open up ↑
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX