1 /*      $Id: mdoc.c,v 1.267 2017/06/17 13:06:16 schwarze Exp $ */
   2 /*
   3  * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
   4  * Copyright (c) 2010, 2012-2017 Ingo Schwarze <schwarze@openbsd.org>
   5  *
   6  * Permission to use, copy, modify, and distribute this software for any
   7  * purpose with or without fee is hereby granted, provided that the above
   8  * copyright notice and this permission notice appear in all copies.
   9  *
  10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
  11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
  12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
  13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
  14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
  15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
  16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
  17  */
  18 #include "config.h"
  19 
  20 #include <sys/types.h>
  21 
  22 #include <assert.h>
  23 #include <ctype.h>
  24 #include <stdarg.h>
  25 #include <stdio.h>
  26 #include <stdlib.h>
  27 #include <string.h>
  28 #include <time.h>
  29 
  30 #include "mandoc_aux.h"
  31 #include "mandoc.h"
  32 #include "roff.h"
  33 #include "mdoc.h"
  34 #include "libmandoc.h"
  35 #include "roff_int.h"
  36 #include "libmdoc.h"
  37 
  38 const   char *const __mdoc_argnames[MDOC_ARG_MAX] = {
  39         "split",                "nosplit",              "ragged",
  40         "unfilled",             "literal",              "file",
  41         "offset",               "bullet",               "dash",
  42         "hyphen",               "item",                 "enum",
  43         "tag",                  "diag",                 "hang",
  44         "ohang",                "inset",                "column",
  45         "width",                "compact",              "std",
  46         "filled",               "words",                "emphasis",
  47         "symbolic",             "nested",               "centered"
  48 };
  49 const   char * const *mdoc_argnames = __mdoc_argnames;
  50 
  51 static  int               mdoc_ptext(struct roff_man *, int, char *, int);
  52 static  int               mdoc_pmacro(struct roff_man *, int, char *, int);
  53 
  54 
  55 /*
  56  * Main parse routine.  Parses a single line -- really just hands off to
  57  * the macro (mdoc_pmacro()) or text parser (mdoc_ptext()).
  58  */
  59 int
  60 mdoc_parseln(struct roff_man *mdoc, int ln, char *buf, int offs)
  61 {
  62 
  63         if (mdoc->last->type != ROFFT_EQN || ln > mdoc->last->line)
  64                 mdoc->flags |= MDOC_NEWLINE;
  65 
  66         /*
  67          * Let the roff nS register switch SYNOPSIS mode early,
  68          * such that the parser knows at all times
  69          * whether this mode is on or off.
  70          * Note that this mode is also switched by the Sh macro.
  71          */
  72         if (roff_getreg(mdoc->roff, "nS"))
  73                 mdoc->flags |= MDOC_SYNOPSIS;
  74         else
  75                 mdoc->flags &= ~MDOC_SYNOPSIS;
  76 
  77         return roff_getcontrol(mdoc->roff, buf, &offs) ?
  78             mdoc_pmacro(mdoc, ln, buf, offs) :
  79             mdoc_ptext(mdoc, ln, buf, offs);
  80 }
  81 
  82 void
  83 mdoc_macro(MACRO_PROT_ARGS)
  84 {
  85         assert(tok >= MDOC_Dd && tok < MDOC_MAX);
  86         (*mdoc_macros[tok].fp)(mdoc, tok, line, ppos, pos, buf);
  87 }
  88 
  89 void
  90 mdoc_tail_alloc(struct roff_man *mdoc, int line, int pos, enum roff_tok tok)
  91 {
  92         struct roff_node *p;
  93 
  94         p = roff_node_alloc(mdoc, line, pos, ROFFT_TAIL, tok);
  95         roff_node_append(mdoc, p);
  96         mdoc->next = ROFF_NEXT_CHILD;
  97 }
  98 
  99 struct roff_node *
 100 mdoc_endbody_alloc(struct roff_man *mdoc, int line, int pos,
 101     enum roff_tok tok, struct roff_node *body)
 102 {
 103         struct roff_node *p;
 104 
 105         body->flags |= NODE_ENDED;
 106         body->parent->flags |= NODE_ENDED;
 107         p = roff_node_alloc(mdoc, line, pos, ROFFT_BODY, tok);
 108         p->body = body;
 109         p->norm = body->norm;
 110         p->end = ENDBODY_SPACE;
 111         roff_node_append(mdoc, p);
 112         mdoc->next = ROFF_NEXT_SIBLING;
 113         return p;
 114 }
 115 
 116 struct roff_node *
 117 mdoc_block_alloc(struct roff_man *mdoc, int line, int pos,
 118     enum roff_tok tok, struct mdoc_arg *args)
 119 {
 120         struct roff_node *p;
 121 
 122         p = roff_node_alloc(mdoc, line, pos, ROFFT_BLOCK, tok);
 123         p->args = args;
 124         if (p->args)
 125                 (args->refcnt)++;
 126 
 127         switch (tok) {
 128         case MDOC_Bd:
 129         case MDOC_Bf:
 130         case MDOC_Bl:
 131         case MDOC_En:
 132         case MDOC_Rs:
 133                 p->norm = mandoc_calloc(1, sizeof(union mdoc_data));
 134                 break;
 135         default:
 136                 break;
 137         }
 138         roff_node_append(mdoc, p);
 139         mdoc->next = ROFF_NEXT_CHILD;
 140         return p;
 141 }
 142 
 143 void
 144 mdoc_elem_alloc(struct roff_man *mdoc, int line, int pos,
 145      enum roff_tok tok, struct mdoc_arg *args)
 146 {
 147         struct roff_node *p;
 148 
 149         p = roff_node_alloc(mdoc, line, pos, ROFFT_ELEM, tok);
 150         p->args = args;
 151         if (p->args)
 152                 (args->refcnt)++;
 153 
 154         switch (tok) {
 155         case MDOC_An:
 156                 p->norm = mandoc_calloc(1, sizeof(union mdoc_data));
 157                 break;
 158         default:
 159                 break;
 160         }
 161         roff_node_append(mdoc, p);
 162         mdoc->next = ROFF_NEXT_CHILD;
 163 }
 164 
 165 void
 166 mdoc_node_relink(struct roff_man *mdoc, struct roff_node *p)
 167 {
 168 
 169         roff_node_unlink(mdoc, p);
 170         p->prev = p->next = NULL;
 171         roff_node_append(mdoc, p);
 172 }
 173 
 174 /*
 175  * Parse free-form text, that is, a line that does not begin with the
 176  * control character.
 177  */
 178 static int
 179 mdoc_ptext(struct roff_man *mdoc, int line, char *buf, int offs)
 180 {
 181         struct roff_node *n;
 182         const char       *cp, *sp;
 183         char             *c, *ws, *end;
 184 
 185         n = mdoc->last;
 186 
 187         /*
 188          * If a column list contains plain text, assume an implicit item
 189          * macro.  This can happen one or more times at the beginning
 190          * of such a list, intermixed with non-It mdoc macros and with
 191          * nodes generated on the roff level, for example by tbl.
 192          */
 193 
 194         if ((n->tok == MDOC_Bl && n->type == ROFFT_BODY &&
 195              n->end == ENDBODY_NOT && n->norm->Bl.type == LIST_column) ||
 196             (n->parent != NULL && n->parent->tok == MDOC_Bl &&
 197              n->parent->norm->Bl.type == LIST_column)) {
 198                 mdoc->flags |= MDOC_FREECOL;
 199                 mdoc_macro(mdoc, MDOC_It, line, offs, &offs, buf);
 200                 return 1;
 201         }
 202 
 203         /*
 204          * Search for the beginning of unescaped trailing whitespace (ws)
 205          * and for the first character not to be output (end).
 206          */
 207 
 208         /* FIXME: replace with strcspn(). */
 209         ws = NULL;
 210         for (c = end = buf + offs; *c; c++) {
 211                 switch (*c) {
 212                 case ' ':
 213                         if (NULL == ws)
 214                                 ws = c;
 215                         continue;
 216                 case '\t':
 217                         /*
 218                          * Always warn about trailing tabs,
 219                          * even outside literal context,
 220                          * where they should be put on the next line.
 221                          */
 222                         if (NULL == ws)
 223                                 ws = c;
 224                         /*
 225                          * Strip trailing tabs in literal context only;
 226                          * outside, they affect the next line.
 227                          */
 228                         if (MDOC_LITERAL & mdoc->flags)
 229                                 continue;
 230                         break;
 231                 case '\\':
 232                         /* Skip the escaped character, too, if any. */
 233                         if (c[1])
 234                                 c++;
 235                         /* FALLTHROUGH */
 236                 default:
 237                         ws = NULL;
 238                         break;
 239                 }
 240                 end = c + 1;
 241         }
 242         *end = '\0';
 243 
 244         if (ws)
 245                 mandoc_msg(MANDOCERR_SPACE_EOL, mdoc->parse,
 246                     line, (int)(ws-buf), NULL);
 247 
 248         /*
 249          * Blank lines are allowed in no-fill mode
 250          * and cancel preceding \c,
 251          * but add a single vertical space elsewhere.
 252          */
 253 
 254         if (buf[offs] == '\0' && ! (mdoc->flags & MDOC_LITERAL)) {
 255                 switch (mdoc->last->type) {
 256                 case ROFFT_TEXT:
 257                         sp = mdoc->last->string;
 258                         cp = end = strchr(sp, '\0') - 2;
 259                         if (cp < sp || cp[0] != '\\' || cp[1] != 'c')
 260                                 break;
 261                         while (cp > sp && cp[-1] == '\\')
 262                                 cp--;
 263                         if ((end - cp) % 2)
 264                                 break;
 265                         *end = '\0';
 266                         return 1;
 267                 default:
 268                         break;
 269                 }
 270                 mandoc_msg(MANDOCERR_FI_BLANK, mdoc->parse,
 271                     line, (int)(c - buf), NULL);
 272                 roff_elem_alloc(mdoc, line, offs, ROFF_sp);
 273                 mdoc->last->flags |= NODE_VALID | NODE_ENDED;
 274                 mdoc->next = ROFF_NEXT_SIBLING;
 275                 return 1;
 276         }
 277 
 278         roff_word_alloc(mdoc, line, offs, buf+offs);
 279 
 280         if (mdoc->flags & MDOC_LITERAL)
 281                 return 1;
 282 
 283         /*
 284          * End-of-sentence check.  If the last character is an unescaped
 285          * EOS character, then flag the node as being the end of a
 286          * sentence.  The front-end will know how to interpret this.
 287          */
 288 
 289         assert(buf < end);
 290 
 291         if (mandoc_eos(buf+offs, (size_t)(end-buf-offs)))
 292                 mdoc->last->flags |= NODE_EOS;
 293 
 294         for (c = buf + offs; c != NULL; c = strchr(c + 1, '.')) {
 295                 if (c - buf < offs + 2)
 296                         continue;
 297                 if (end - c < 3)
 298                         break;
 299                 if (c[1] != ' ' ||
 300                     isalpha((unsigned char)c[-2]) == 0 ||
 301                     isalpha((unsigned char)c[-1]) == 0 ||
 302                     (c[-2] == 'n' && c[-1] == 'c') ||
 303                     (c[-2] == 'v' && c[-1] == 's'))
 304                         continue;
 305                 c += 2;
 306                 if (*c == ' ')
 307                         c++;
 308                 if (*c == ' ')
 309                         c++;
 310                 if (isupper((unsigned char)(*c)))
 311                         mandoc_msg(MANDOCERR_EOS, mdoc->parse,
 312                             line, (int)(c - buf), NULL);
 313         }
 314 
 315         return 1;
 316 }
 317 
 318 /*
 319  * Parse a macro line, that is, a line beginning with the control
 320  * character.
 321  */
 322 static int
 323 mdoc_pmacro(struct roff_man *mdoc, int ln, char *buf, int offs)
 324 {
 325         struct roff_node *n;
 326         const char       *cp;
 327         size_t            sz;
 328         enum roff_tok     tok;
 329         int               sv;
 330 
 331         /* Determine the line macro. */
 332 
 333         sv = offs;
 334         tok = TOKEN_NONE;
 335         for (sz = 0; sz < 4 && strchr(" \t\\", buf[offs]) == NULL; sz++)
 336                 offs++;
 337         if (sz == 2 || sz == 3)
 338                 tok = roffhash_find(mdoc->mdocmac, buf + sv, sz);
 339         if (tok == TOKEN_NONE) {
 340                 mandoc_msg(MANDOCERR_MACRO, mdoc->parse,
 341                     ln, sv, buf + sv - 1);
 342                 return 1;
 343         }
 344 
 345         /* Skip a leading escape sequence or tab. */
 346 
 347         switch (buf[offs]) {
 348         case '\\':
 349                 cp = buf + offs + 1;
 350                 mandoc_escape(&cp, NULL, NULL);
 351                 offs = cp - buf;
 352                 break;
 353         case '\t':
 354                 offs++;
 355                 break;
 356         default:
 357                 break;
 358         }
 359 
 360         /* Jump to the next non-whitespace word. */
 361 
 362         while (buf[offs] == ' ')
 363                 offs++;
 364 
 365         /*
 366          * Trailing whitespace.  Note that tabs are allowed to be passed
 367          * into the parser as "text", so we only warn about spaces here.
 368          */
 369 
 370         if ('\0' == buf[offs] && ' ' == buf[offs - 1])
 371                 mandoc_msg(MANDOCERR_SPACE_EOL, mdoc->parse,
 372                     ln, offs - 1, NULL);
 373 
 374         /*
 375          * If an initial macro or a list invocation, divert directly
 376          * into macro processing.
 377          */
 378 
 379         n = mdoc->last;
 380         if (n == NULL || tok == MDOC_It || tok == MDOC_El) {
 381                 mdoc_macro(mdoc, tok, ln, sv, &offs, buf);
 382                 return 1;
 383         }
 384 
 385         /*
 386          * If a column list contains a non-It macro, assume an implicit
 387          * item macro.  This can happen one or more times at the
 388          * beginning of such a list, intermixed with text lines and
 389          * with nodes generated on the roff level, for example by tbl.
 390          */
 391 
 392         if ((n->tok == MDOC_Bl && n->type == ROFFT_BODY &&
 393              n->end == ENDBODY_NOT && n->norm->Bl.type == LIST_column) ||
 394             (n->parent != NULL && n->parent->tok == MDOC_Bl &&
 395              n->parent->norm->Bl.type == LIST_column)) {
 396                 mdoc->flags |= MDOC_FREECOL;
 397                 mdoc_macro(mdoc, MDOC_It, ln, sv, &sv, buf);
 398                 return 1;
 399         }
 400 
 401         /* Normal processing of a macro. */
 402 
 403         mdoc_macro(mdoc, tok, ln, sv, &offs, buf);
 404 
 405         /* In quick mode (for mandocdb), abort after the NAME section. */
 406 
 407         if (mdoc->quick && MDOC_Sh == tok &&
 408             SEC_NAME != mdoc->last->sec)
 409                 return 2;
 410 
 411         return 1;
 412 }
 413 
 414 enum mdelim
 415 mdoc_isdelim(const char *p)
 416 {
 417 
 418         if ('\0' == p[0])
 419                 return DELIM_NONE;
 420 
 421         if ('\0' == p[1])
 422                 switch (p[0]) {
 423                 case '(':
 424                 case '[':
 425                         return DELIM_OPEN;
 426                 case '|':
 427                         return DELIM_MIDDLE;
 428                 case '.':
 429                 case ',':
 430                 case ';':
 431                 case ':':
 432                 case '?':
 433                 case '!':
 434                 case ')':
 435                 case ']':
 436                         return DELIM_CLOSE;
 437                 default:
 438                         return DELIM_NONE;
 439                 }
 440 
 441         if ('\\' != p[0])
 442                 return DELIM_NONE;
 443 
 444         if (0 == strcmp(p + 1, "."))
 445                 return DELIM_CLOSE;
 446         if (0 == strcmp(p + 1, "fR|\\fP"))
 447                 return DELIM_MIDDLE;
 448 
 449         return DELIM_NONE;
 450 }
 451 
 452 void
 453 mdoc_validate(struct roff_man *mdoc)
 454 {
 455 
 456         mdoc->last = mdoc->first;
 457         mdoc_node_validate(mdoc);
 458         mdoc_state_reset(mdoc);
 459 }