Print this page
    
9718 update mandoc to 1.14.4
    
      
        | Split | 
	Close | 
      
      | Expand all | 
      | Collapse all | 
    
    
          --- old/usr/src/cmd/mandoc/mdoc.c
          +++ new/usr/src/cmd/mandoc/mdoc.c
   1      -/*      $Id: mdoc.c,v 1.267 2017/06/17 13:06:16 schwarze Exp $ */
        1 +/*      $Id: mdoc.c,v 1.268 2017/08/11 16:56:21 schwarze Exp $ */
   2    2  /*
   3    3   * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
   4    4   * Copyright (c) 2010, 2012-2017 Ingo Schwarze <schwarze@openbsd.org>
   5    5   *
   6    6   * Permission to use, copy, modify, and distribute this software for any
   7    7   * purpose with or without fee is hereby granted, provided that the above
   8    8   * copyright notice and this permission notice appear in all copies.
   9    9   *
  10   10   * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
  11   11   * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
  12   12   * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
  13   13   * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
  14   14   * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
  15   15   * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
  16   16   * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
  17   17   */
  18   18  #include "config.h"
  19   19  
  20   20  #include <sys/types.h>
  21   21  
  22   22  #include <assert.h>
  23   23  #include <ctype.h>
  24   24  #include <stdarg.h>
  25   25  #include <stdio.h>
  26   26  #include <stdlib.h>
  27   27  #include <string.h>
  28   28  #include <time.h>
  29   29  
  30   30  #include "mandoc_aux.h"
  31   31  #include "mandoc.h"
  32   32  #include "roff.h"
  33   33  #include "mdoc.h"
  34   34  #include "libmandoc.h"
  35   35  #include "roff_int.h"
  36   36  #include "libmdoc.h"
  37   37  
  38   38  const   char *const __mdoc_argnames[MDOC_ARG_MAX] = {
  39   39          "split",                "nosplit",              "ragged",
  40   40          "unfilled",             "literal",              "file",
  41   41          "offset",               "bullet",               "dash",
  42   42          "hyphen",               "item",                 "enum",
  43   43          "tag",                  "diag",                 "hang",
  44   44          "ohang",                "inset",                "column",
  45   45          "width",                "compact",              "std",
  46   46          "filled",               "words",                "emphasis",
  47   47          "symbolic",             "nested",               "centered"
  48   48  };
  49   49  const   char * const *mdoc_argnames = __mdoc_argnames;
  50   50  
  51   51  static  int               mdoc_ptext(struct roff_man *, int, char *, int);
  52   52  static  int               mdoc_pmacro(struct roff_man *, int, char *, int);
  53   53  
  54   54  
  55   55  /*
  56   56   * Main parse routine.  Parses a single line -- really just hands off to
  57   57   * the macro (mdoc_pmacro()) or text parser (mdoc_ptext()).
  58   58   */
  59   59  int
  60   60  mdoc_parseln(struct roff_man *mdoc, int ln, char *buf, int offs)
  61   61  {
  62   62  
  63   63          if (mdoc->last->type != ROFFT_EQN || ln > mdoc->last->line)
  64   64                  mdoc->flags |= MDOC_NEWLINE;
  65   65  
  66   66          /*
  67   67           * Let the roff nS register switch SYNOPSIS mode early,
  68   68           * such that the parser knows at all times
  69   69           * whether this mode is on or off.
  70   70           * Note that this mode is also switched by the Sh macro.
  71   71           */
  72   72          if (roff_getreg(mdoc->roff, "nS"))
  73   73                  mdoc->flags |= MDOC_SYNOPSIS;
  74   74          else
  75   75                  mdoc->flags &= ~MDOC_SYNOPSIS;
  76   76  
  77   77          return roff_getcontrol(mdoc->roff, buf, &offs) ?
  78   78              mdoc_pmacro(mdoc, ln, buf, offs) :
  79   79              mdoc_ptext(mdoc, ln, buf, offs);
  80   80  }
  81   81  
  82   82  void
  83   83  mdoc_macro(MACRO_PROT_ARGS)
  84   84  {
  85   85          assert(tok >= MDOC_Dd && tok < MDOC_MAX);
  86   86          (*mdoc_macros[tok].fp)(mdoc, tok, line, ppos, pos, buf);
  87   87  }
  88   88  
  89   89  void
  90   90  mdoc_tail_alloc(struct roff_man *mdoc, int line, int pos, enum roff_tok tok)
  91   91  {
  92   92          struct roff_node *p;
  93   93  
  94   94          p = roff_node_alloc(mdoc, line, pos, ROFFT_TAIL, tok);
  95   95          roff_node_append(mdoc, p);
  96   96          mdoc->next = ROFF_NEXT_CHILD;
  97   97  }
  98   98  
  99   99  struct roff_node *
 100  100  mdoc_endbody_alloc(struct roff_man *mdoc, int line, int pos,
 101  101      enum roff_tok tok, struct roff_node *body)
 102  102  {
 103  103          struct roff_node *p;
 104  104  
 105  105          body->flags |= NODE_ENDED;
 106  106          body->parent->flags |= NODE_ENDED;
 107  107          p = roff_node_alloc(mdoc, line, pos, ROFFT_BODY, tok);
 108  108          p->body = body;
 109  109          p->norm = body->norm;
 110  110          p->end = ENDBODY_SPACE;
 111  111          roff_node_append(mdoc, p);
 112  112          mdoc->next = ROFF_NEXT_SIBLING;
 113  113          return p;
 114  114  }
 115  115  
 116  116  struct roff_node *
 117  117  mdoc_block_alloc(struct roff_man *mdoc, int line, int pos,
 118  118      enum roff_tok tok, struct mdoc_arg *args)
 119  119  {
 120  120          struct roff_node *p;
 121  121  
 122  122          p = roff_node_alloc(mdoc, line, pos, ROFFT_BLOCK, tok);
 123  123          p->args = args;
 124  124          if (p->args)
 125  125                  (args->refcnt)++;
 126  126  
 127  127          switch (tok) {
 128  128          case MDOC_Bd:
 129  129          case MDOC_Bf:
 130  130          case MDOC_Bl:
 131  131          case MDOC_En:
 132  132          case MDOC_Rs:
 133  133                  p->norm = mandoc_calloc(1, sizeof(union mdoc_data));
 134  134                  break;
 135  135          default:
 136  136                  break;
 137  137          }
 138  138          roff_node_append(mdoc, p);
 139  139          mdoc->next = ROFF_NEXT_CHILD;
 140  140          return p;
 141  141  }
 142  142  
 143  143  void
 144  144  mdoc_elem_alloc(struct roff_man *mdoc, int line, int pos,
 145  145       enum roff_tok tok, struct mdoc_arg *args)
 146  146  {
 147  147          struct roff_node *p;
 148  148  
 149  149          p = roff_node_alloc(mdoc, line, pos, ROFFT_ELEM, tok);
 150  150          p->args = args;
 151  151          if (p->args)
 152  152                  (args->refcnt)++;
 153  153  
 154  154          switch (tok) {
 155  155          case MDOC_An:
 156  156                  p->norm = mandoc_calloc(1, sizeof(union mdoc_data));
 157  157                  break;
 158  158          default:
 159  159                  break;
 160  160          }
 161  161          roff_node_append(mdoc, p);
 162  162          mdoc->next = ROFF_NEXT_CHILD;
 163  163  }
 164  164  
 165  165  void
 166  166  mdoc_node_relink(struct roff_man *mdoc, struct roff_node *p)
 167  167  {
 168  168  
 169  169          roff_node_unlink(mdoc, p);
 170  170          p->prev = p->next = NULL;
 171  171          roff_node_append(mdoc, p);
 172  172  }
 173  173  
 174  174  /*
 175  175   * Parse free-form text, that is, a line that does not begin with the
 176  176   * control character.
 177  177   */
 178  178  static int
 179  179  mdoc_ptext(struct roff_man *mdoc, int line, char *buf, int offs)
 180  180  {
 181  181          struct roff_node *n;
 182  182          const char       *cp, *sp;
 183  183          char             *c, *ws, *end;
 184  184  
 185  185          n = mdoc->last;
 186  186  
 187  187          /*
 188  188           * If a column list contains plain text, assume an implicit item
 189  189           * macro.  This can happen one or more times at the beginning
 190  190           * of such a list, intermixed with non-It mdoc macros and with
 191  191           * nodes generated on the roff level, for example by tbl.
 192  192           */
 193  193  
 194  194          if ((n->tok == MDOC_Bl && n->type == ROFFT_BODY &&
 195  195               n->end == ENDBODY_NOT && n->norm->Bl.type == LIST_column) ||
 196  196              (n->parent != NULL && n->parent->tok == MDOC_Bl &&
 197  197               n->parent->norm->Bl.type == LIST_column)) {
 198  198                  mdoc->flags |= MDOC_FREECOL;
 199  199                  mdoc_macro(mdoc, MDOC_It, line, offs, &offs, buf);
 200  200                  return 1;
 201  201          }
 202  202  
 203  203          /*
 204  204           * Search for the beginning of unescaped trailing whitespace (ws)
 205  205           * and for the first character not to be output (end).
 206  206           */
 207  207  
 208  208          /* FIXME: replace with strcspn(). */
 209  209          ws = NULL;
 210  210          for (c = end = buf + offs; *c; c++) {
 211  211                  switch (*c) {
 212  212                  case ' ':
 213  213                          if (NULL == ws)
 214  214                                  ws = c;
 215  215                          continue;
 216  216                  case '\t':
 217  217                          /*
 218  218                           * Always warn about trailing tabs,
 219  219                           * even outside literal context,
 220  220                           * where they should be put on the next line.
 221  221                           */
 222  222                          if (NULL == ws)
 223  223                                  ws = c;
 224  224                          /*
 225  225                           * Strip trailing tabs in literal context only;
 226  226                           * outside, they affect the next line.
 227  227                           */
 228  228                          if (MDOC_LITERAL & mdoc->flags)
 229  229                                  continue;
 230  230                          break;
 231  231                  case '\\':
 232  232                          /* Skip the escaped character, too, if any. */
 233  233                          if (c[1])
 234  234                                  c++;
 235  235                          /* FALLTHROUGH */
 236  236                  default:
 237  237                          ws = NULL;
 238  238                          break;
 239  239                  }
 240  240                  end = c + 1;
 241  241          }
 242  242          *end = '\0';
 243  243  
 244  244          if (ws)
 245  245                  mandoc_msg(MANDOCERR_SPACE_EOL, mdoc->parse,
 246  246                      line, (int)(ws-buf), NULL);
 247  247  
 248  248          /*
 249  249           * Blank lines are allowed in no-fill mode
 250  250           * and cancel preceding \c,
 251  251           * but add a single vertical space elsewhere.
 252  252           */
 253  253  
 254  254          if (buf[offs] == '\0' && ! (mdoc->flags & MDOC_LITERAL)) {
 255  255                  switch (mdoc->last->type) {
 256  256                  case ROFFT_TEXT:
 257  257                          sp = mdoc->last->string;
 258  258                          cp = end = strchr(sp, '\0') - 2;
 259  259                          if (cp < sp || cp[0] != '\\' || cp[1] != 'c')
 260  260                                  break;
 261  261                          while (cp > sp && cp[-1] == '\\')
 262  262                                  cp--;
 263  263                          if ((end - cp) % 2)
 264  264                                  break;
 265  265                          *end = '\0';
 266  266                          return 1;
 267  267                  default:
 268  268                          break;
 269  269                  }
 270  270                  mandoc_msg(MANDOCERR_FI_BLANK, mdoc->parse,
 271  271                      line, (int)(c - buf), NULL);
 272  272                  roff_elem_alloc(mdoc, line, offs, ROFF_sp);
 273  273                  mdoc->last->flags |= NODE_VALID | NODE_ENDED;
 274  274                  mdoc->next = ROFF_NEXT_SIBLING;
 275  275                  return 1;
 276  276          }
 277  277  
 278  278          roff_word_alloc(mdoc, line, offs, buf+offs);
 279  279  
 280  280          if (mdoc->flags & MDOC_LITERAL)
 281  281                  return 1;
 282  282  
 283  283          /*
 284  284           * End-of-sentence check.  If the last character is an unescaped
 285  285           * EOS character, then flag the node as being the end of a
 286  286           * sentence.  The front-end will know how to interpret this.
 287  287           */
 288  288  
 289  289          assert(buf < end);
  
    | 
      ↓ open down ↓ | 
    278 lines elided | 
    
      ↑ open up ↑ | 
  
 290  290  
 291  291          if (mandoc_eos(buf+offs, (size_t)(end-buf-offs)))
 292  292                  mdoc->last->flags |= NODE_EOS;
 293  293  
 294  294          for (c = buf + offs; c != NULL; c = strchr(c + 1, '.')) {
 295  295                  if (c - buf < offs + 2)
 296  296                          continue;
 297  297                  if (end - c < 3)
 298  298                          break;
 299  299                  if (c[1] != ' ' ||
 300      -                    isalpha((unsigned char)c[-2]) == 0 ||
 301      -                    isalpha((unsigned char)c[-1]) == 0 ||
      300 +                    isalnum((unsigned char)c[-2]) == 0 ||
      301 +                    isalnum((unsigned char)c[-1]) == 0 ||
 302  302                      (c[-2] == 'n' && c[-1] == 'c') ||
 303  303                      (c[-2] == 'v' && c[-1] == 's'))
 304  304                          continue;
 305  305                  c += 2;
 306  306                  if (*c == ' ')
 307  307                          c++;
 308  308                  if (*c == ' ')
 309  309                          c++;
 310  310                  if (isupper((unsigned char)(*c)))
 311  311                          mandoc_msg(MANDOCERR_EOS, mdoc->parse,
 312  312                              line, (int)(c - buf), NULL);
 313  313          }
 314  314  
 315  315          return 1;
 316  316  }
 317  317  
 318  318  /*
 319  319   * Parse a macro line, that is, a line beginning with the control
 320  320   * character.
 321  321   */
 322  322  static int
 323  323  mdoc_pmacro(struct roff_man *mdoc, int ln, char *buf, int offs)
 324  324  {
 325  325          struct roff_node *n;
 326  326          const char       *cp;
 327  327          size_t            sz;
 328  328          enum roff_tok     tok;
 329  329          int               sv;
 330  330  
 331  331          /* Determine the line macro. */
 332  332  
 333  333          sv = offs;
 334  334          tok = TOKEN_NONE;
 335  335          for (sz = 0; sz < 4 && strchr(" \t\\", buf[offs]) == NULL; sz++)
 336  336                  offs++;
 337  337          if (sz == 2 || sz == 3)
 338  338                  tok = roffhash_find(mdoc->mdocmac, buf + sv, sz);
 339  339          if (tok == TOKEN_NONE) {
 340  340                  mandoc_msg(MANDOCERR_MACRO, mdoc->parse,
 341  341                      ln, sv, buf + sv - 1);
 342  342                  return 1;
 343  343          }
 344  344  
 345  345          /* Skip a leading escape sequence or tab. */
 346  346  
 347  347          switch (buf[offs]) {
 348  348          case '\\':
 349  349                  cp = buf + offs + 1;
 350  350                  mandoc_escape(&cp, NULL, NULL);
 351  351                  offs = cp - buf;
 352  352                  break;
 353  353          case '\t':
 354  354                  offs++;
 355  355                  break;
 356  356          default:
 357  357                  break;
 358  358          }
 359  359  
 360  360          /* Jump to the next non-whitespace word. */
 361  361  
 362  362          while (buf[offs] == ' ')
 363  363                  offs++;
 364  364  
 365  365          /*
 366  366           * Trailing whitespace.  Note that tabs are allowed to be passed
 367  367           * into the parser as "text", so we only warn about spaces here.
 368  368           */
 369  369  
 370  370          if ('\0' == buf[offs] && ' ' == buf[offs - 1])
 371  371                  mandoc_msg(MANDOCERR_SPACE_EOL, mdoc->parse,
 372  372                      ln, offs - 1, NULL);
 373  373  
 374  374          /*
 375  375           * If an initial macro or a list invocation, divert directly
 376  376           * into macro processing.
 377  377           */
 378  378  
 379  379          n = mdoc->last;
 380  380          if (n == NULL || tok == MDOC_It || tok == MDOC_El) {
 381  381                  mdoc_macro(mdoc, tok, ln, sv, &offs, buf);
 382  382                  return 1;
 383  383          }
 384  384  
 385  385          /*
 386  386           * If a column list contains a non-It macro, assume an implicit
 387  387           * item macro.  This can happen one or more times at the
 388  388           * beginning of such a list, intermixed with text lines and
 389  389           * with nodes generated on the roff level, for example by tbl.
 390  390           */
 391  391  
 392  392          if ((n->tok == MDOC_Bl && n->type == ROFFT_BODY &&
 393  393               n->end == ENDBODY_NOT && n->norm->Bl.type == LIST_column) ||
 394  394              (n->parent != NULL && n->parent->tok == MDOC_Bl &&
 395  395               n->parent->norm->Bl.type == LIST_column)) {
 396  396                  mdoc->flags |= MDOC_FREECOL;
 397  397                  mdoc_macro(mdoc, MDOC_It, ln, sv, &sv, buf);
 398  398                  return 1;
 399  399          }
 400  400  
 401  401          /* Normal processing of a macro. */
 402  402  
 403  403          mdoc_macro(mdoc, tok, ln, sv, &offs, buf);
 404  404  
 405  405          /* In quick mode (for mandocdb), abort after the NAME section. */
 406  406  
 407  407          if (mdoc->quick && MDOC_Sh == tok &&
 408  408              SEC_NAME != mdoc->last->sec)
 409  409                  return 2;
 410  410  
 411  411          return 1;
 412  412  }
 413  413  
 414  414  enum mdelim
 415  415  mdoc_isdelim(const char *p)
 416  416  {
 417  417  
 418  418          if ('\0' == p[0])
 419  419                  return DELIM_NONE;
 420  420  
 421  421          if ('\0' == p[1])
 422  422                  switch (p[0]) {
 423  423                  case '(':
 424  424                  case '[':
 425  425                          return DELIM_OPEN;
 426  426                  case '|':
 427  427                          return DELIM_MIDDLE;
 428  428                  case '.':
 429  429                  case ',':
 430  430                  case ';':
 431  431                  case ':':
 432  432                  case '?':
 433  433                  case '!':
 434  434                  case ')':
 435  435                  case ']':
 436  436                          return DELIM_CLOSE;
 437  437                  default:
 438  438                          return DELIM_NONE;
 439  439                  }
 440  440  
 441  441          if ('\\' != p[0])
 442  442                  return DELIM_NONE;
 443  443  
 444  444          if (0 == strcmp(p + 1, "."))
 445  445                  return DELIM_CLOSE;
 446  446          if (0 == strcmp(p + 1, "fR|\\fP"))
 447  447                  return DELIM_MIDDLE;
 448  448  
 449  449          return DELIM_NONE;
 450  450  }
 451  451  
 452  452  void
 453  453  mdoc_validate(struct roff_man *mdoc)
 454  454  {
 455  455  
 456  456          mdoc->last = mdoc->first;
 457  457          mdoc_node_validate(mdoc);
 458  458          mdoc_state_reset(mdoc);
 459  459  }
  
    | 
      ↓ open down ↓ | 
    148 lines elided | 
    
      ↑ open up ↑ | 
  
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX