1 /*      $Id: man_macro.c,v 1.71 2012/01/03 15:16:24 kristaps Exp $ */
   2 /*
   3  * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
   4  *
   5  * Permission to use, copy, modify, and distribute this software for any
   6  * purpose with or without fee is hereby granted, provided that the above
   7  * copyright notice and this permission notice appear in all copies.
   8  *
   9  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
  10  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
  11  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
  12  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
  13  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
  14  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
  15  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
  16  */
  17 #ifdef HAVE_CONFIG_H
  18 #include "config.h"
  19 #endif
  20 
  21 #include <assert.h>
  22 #include <ctype.h>
  23 #include <stdlib.h>
  24 #include <string.h>
  25 
  26 #include "man.h"
  27 #include "mandoc.h"
  28 #include "libmandoc.h"
  29 #include "libman.h"
  30 
  31 enum    rew {
  32         REW_REWIND,
  33         REW_NOHALT,
  34         REW_HALT
  35 };
  36 
  37 static  int              blk_close(MACRO_PROT_ARGS);
  38 static  int              blk_exp(MACRO_PROT_ARGS);
  39 static  int              blk_imp(MACRO_PROT_ARGS);
  40 static  int              in_line_eoln(MACRO_PROT_ARGS);
  41 static  int              man_args(struct man *, int, 
  42                                 int *, char *, char **);
  43 
  44 static  int              rew_scope(enum man_type, 
  45                                 struct man *, enum mant);
  46 static  enum rew         rew_dohalt(enum mant, enum man_type, 
  47                                 const struct man_node *);
  48 static  enum rew         rew_block(enum mant, enum man_type, 
  49                                 const struct man_node *);
  50 static  void             rew_warn(struct man *, 
  51                                 struct man_node *, enum mandocerr);
  52 
  53 const   struct man_macro __man_macros[MAN_MAX] = {
  54         { in_line_eoln, MAN_NSCOPED }, /* br */
  55         { in_line_eoln, MAN_BSCOPE }, /* TH */
  56         { blk_imp, MAN_BSCOPE | MAN_SCOPED }, /* SH */
  57         { blk_imp, MAN_BSCOPE | MAN_SCOPED }, /* SS */
  58         { blk_imp, MAN_BSCOPE | MAN_SCOPED | MAN_FSCOPED }, /* TP */
  59         { blk_imp, MAN_BSCOPE }, /* LP */
  60         { blk_imp, MAN_BSCOPE }, /* PP */
  61         { blk_imp, MAN_BSCOPE }, /* P */
  62         { blk_imp, MAN_BSCOPE }, /* IP */
  63         { blk_imp, MAN_BSCOPE }, /* HP */
  64         { in_line_eoln, MAN_SCOPED }, /* SM */
  65         { in_line_eoln, MAN_SCOPED }, /* SB */
  66         { in_line_eoln, 0 }, /* BI */
  67         { in_line_eoln, 0 }, /* IB */
  68         { in_line_eoln, 0 }, /* BR */
  69         { in_line_eoln, 0 }, /* RB */
  70         { in_line_eoln, MAN_SCOPED }, /* R */
  71         { in_line_eoln, MAN_SCOPED }, /* B */
  72         { in_line_eoln, MAN_SCOPED }, /* I */
  73         { in_line_eoln, 0 }, /* IR */
  74         { in_line_eoln, 0 }, /* RI */
  75         { in_line_eoln, MAN_NSCOPED }, /* na */
  76         { in_line_eoln, MAN_NSCOPED }, /* sp */
  77         { in_line_eoln, MAN_BSCOPE }, /* nf */
  78         { in_line_eoln, MAN_BSCOPE }, /* fi */
  79         { blk_close, 0 }, /* RE */
  80         { blk_exp, MAN_EXPLICIT }, /* RS */
  81         { in_line_eoln, 0 }, /* DT */
  82         { in_line_eoln, 0 }, /* UC */
  83         { in_line_eoln, 0 }, /* PD */
  84         { in_line_eoln, 0 }, /* AT */
  85         { in_line_eoln, 0 }, /* in */
  86         { in_line_eoln, 0 }, /* ft */
  87         { in_line_eoln, 0 }, /* OP */
  88 };
  89 
  90 const   struct man_macro * const man_macros = __man_macros;
  91 
  92 
  93 /*
  94  * Warn when "n" is an explicit non-roff macro.
  95  */
  96 static void
  97 rew_warn(struct man *m, struct man_node *n, enum mandocerr er)
  98 {
  99 
 100         if (er == MANDOCERR_MAX || MAN_BLOCK != n->type)
 101                 return;
 102         if (MAN_VALID & n->flags)
 103                 return;
 104         if ( ! (MAN_EXPLICIT & man_macros[n->tok].flags))
 105                 return;
 106 
 107         assert(er < MANDOCERR_FATAL);
 108         man_nmsg(m, n, er);
 109 }
 110 
 111 
 112 /*
 113  * Rewind scope.  If a code "er" != MANDOCERR_MAX has been provided, it
 114  * will be used if an explicit block scope is being closed out.
 115  */
 116 int
 117 man_unscope(struct man *m, const struct man_node *to, 
 118                 enum mandocerr er)
 119 {
 120         struct man_node *n;
 121 
 122         assert(to);
 123 
 124         m->next = MAN_NEXT_SIBLING;
 125 
 126         /* LINTED */
 127         while (m->last != to) {
 128                 /*
 129                  * Save the parent here, because we may delete the
 130                  * m->last node in the post-validation phase and reset
 131                  * it to m->last->parent, causing a step in the closing
 132                  * out to be lost.
 133                  */
 134                 n = m->last->parent;
 135                 rew_warn(m, m->last, er);
 136                 if ( ! man_valid_post(m))
 137                         return(0);
 138                 m->last = n;
 139                 assert(m->last);
 140         }
 141 
 142         rew_warn(m, m->last, er);
 143         if ( ! man_valid_post(m))
 144                 return(0);
 145 
 146         return(1);
 147 }
 148 
 149 
 150 static enum rew
 151 rew_block(enum mant ntok, enum man_type type, const struct man_node *n)
 152 {
 153 
 154         if (MAN_BLOCK == type && ntok == n->parent->tok && 
 155                         MAN_BODY == n->parent->type)
 156                 return(REW_REWIND);
 157         return(ntok == n->tok ? REW_HALT : REW_NOHALT);
 158 }
 159 
 160 
 161 /*
 162  * There are three scope levels: scoped to the root (all), scoped to the
 163  * section (all less sections), and scoped to subsections (all less
 164  * sections and subsections).
 165  */
 166 static enum rew 
 167 rew_dohalt(enum mant tok, enum man_type type, const struct man_node *n)
 168 {
 169         enum rew         c;
 170 
 171         /* We cannot progress beyond the root ever. */
 172         if (MAN_ROOT == n->type)
 173                 return(REW_HALT);
 174 
 175         assert(n->parent);
 176 
 177         /* Normal nodes shouldn't go to the level of the root. */
 178         if (MAN_ROOT == n->parent->type)
 179                 return(REW_REWIND);
 180 
 181         /* Already-validated nodes should be closed out. */
 182         if (MAN_VALID & n->flags)
 183                 return(REW_NOHALT);
 184 
 185         /* First: rewind to ourselves. */
 186         if (type == n->type && tok == n->tok)
 187                 return(REW_REWIND);
 188 
 189         /* 
 190          * Next follow the implicit scope-smashings as defined by man.7:
 191          * section, sub-section, etc.
 192          */
 193 
 194         switch (tok) {
 195         case (MAN_SH):
 196                 break;
 197         case (MAN_SS):
 198                 /* Rewind to a section, if a block. */
 199                 if (REW_NOHALT != (c = rew_block(MAN_SH, type, n)))
 200                         return(c);
 201                 break;
 202         case (MAN_RS):
 203                 /* Rewind to a subsection, if a block. */
 204                 if (REW_NOHALT != (c = rew_block(MAN_SS, type, n)))
 205                         return(c);
 206                 /* Rewind to a section, if a block. */
 207                 if (REW_NOHALT != (c = rew_block(MAN_SH, type, n)))
 208                         return(c);
 209                 break;
 210         default:
 211                 /* Rewind to an offsetter, if a block. */
 212                 if (REW_NOHALT != (c = rew_block(MAN_RS, type, n)))
 213                         return(c);
 214                 /* Rewind to a subsection, if a block. */
 215                 if (REW_NOHALT != (c = rew_block(MAN_SS, type, n)))
 216                         return(c);
 217                 /* Rewind to a section, if a block. */
 218                 if (REW_NOHALT != (c = rew_block(MAN_SH, type, n)))
 219                         return(c);
 220                 break;
 221         }
 222 
 223         return(REW_NOHALT);
 224 }
 225 
 226 
 227 /*
 228  * Rewinding entails ascending the parse tree until a coherent point,
 229  * for example, the `SH' macro will close out any intervening `SS'
 230  * scopes.  When a scope is closed, it must be validated and actioned.
 231  */
 232 static int
 233 rew_scope(enum man_type type, struct man *m, enum mant tok)
 234 {
 235         struct man_node *n;
 236         enum rew         c;
 237 
 238         /* LINTED */
 239         for (n = m->last; n; n = n->parent) {
 240                 /* 
 241                  * Whether we should stop immediately (REW_HALT), stop
 242                  * and rewind until this point (REW_REWIND), or keep
 243                  * rewinding (REW_NOHALT).
 244                  */
 245                 c = rew_dohalt(tok, type, n);
 246                 if (REW_HALT == c)
 247                         return(1);
 248                 if (REW_REWIND == c)
 249                         break;
 250         }
 251 
 252         /* 
 253          * Rewind until the current point.  Warn if we're a roff
 254          * instruction that's mowing over explicit scopes.
 255          */
 256         assert(n);
 257 
 258         return(man_unscope(m, n, MANDOCERR_MAX));
 259 }
 260 
 261 
 262 /*
 263  * Close out a generic explicit macro.
 264  */
 265 /* ARGSUSED */
 266 int
 267 blk_close(MACRO_PROT_ARGS)
 268 {
 269         enum mant                ntok;
 270         const struct man_node   *nn;
 271 
 272         switch (tok) {
 273         case (MAN_RE):
 274                 ntok = MAN_RS;
 275                 break;
 276         default:
 277                 abort();
 278                 /* NOTREACHED */
 279         }
 280 
 281         for (nn = m->last->parent; nn; nn = nn->parent)
 282                 if (ntok == nn->tok)
 283                         break;
 284 
 285         if (NULL == nn)
 286                 man_pmsg(m, line, ppos, MANDOCERR_NOSCOPE);
 287 
 288         if ( ! rew_scope(MAN_BODY, m, ntok))
 289                 return(0);
 290         if ( ! rew_scope(MAN_BLOCK, m, ntok))
 291                 return(0);
 292 
 293         return(1);
 294 }
 295 
 296 
 297 /* ARGSUSED */
 298 int
 299 blk_exp(MACRO_PROT_ARGS)
 300 {
 301         int              la;
 302         char            *p;
 303 
 304         /* 
 305          * Close out prior scopes.  "Regular" explicit macros cannot be
 306          * nested, but we allow roff macros to be placed just about
 307          * anywhere.
 308          */
 309 
 310         if ( ! man_block_alloc(m, line, ppos, tok))
 311                 return(0);
 312         if ( ! man_head_alloc(m, line, ppos, tok))
 313                 return(0);
 314 
 315         for (;;) {
 316                 la = *pos;
 317                 if ( ! man_args(m, line, pos, buf, &p))
 318                         break;
 319                 if ( ! man_word_alloc(m, line, la, p))
 320                         return(0);
 321         }
 322 
 323         assert(m);
 324         assert(tok != MAN_MAX);
 325 
 326         if ( ! rew_scope(MAN_HEAD, m, tok))
 327                 return(0);
 328         return(man_body_alloc(m, line, ppos, tok));
 329 }
 330 
 331 
 332 
 333 /*
 334  * Parse an implicit-block macro.  These contain a MAN_HEAD and a
 335  * MAN_BODY contained within a MAN_BLOCK.  Rules for closing out other
 336  * scopes, such as `SH' closing out an `SS', are defined in the rew
 337  * routines.
 338  */
 339 /* ARGSUSED */
 340 int
 341 blk_imp(MACRO_PROT_ARGS)
 342 {
 343         int              la;
 344         char            *p;
 345         struct man_node *n;
 346 
 347         /* Close out prior scopes. */
 348 
 349         if ( ! rew_scope(MAN_BODY, m, tok))
 350                 return(0);
 351         if ( ! rew_scope(MAN_BLOCK, m, tok))
 352                 return(0);
 353 
 354         /* Allocate new block & head scope. */
 355 
 356         if ( ! man_block_alloc(m, line, ppos, tok))
 357                 return(0);
 358         if ( ! man_head_alloc(m, line, ppos, tok))
 359                 return(0);
 360 
 361         n = m->last;
 362 
 363         /* Add line arguments. */
 364 
 365         for (;;) {
 366                 la = *pos;
 367                 if ( ! man_args(m, line, pos, buf, &p))
 368                         break;
 369                 if ( ! man_word_alloc(m, line, la, p))
 370                         return(0);
 371         }
 372 
 373         /* Close out head and open body (unless MAN_SCOPE). */
 374 
 375         if (MAN_SCOPED & man_macros[tok].flags) {
 376                 /* If we're forcing scope (`TP'), keep it open. */
 377                 if (MAN_FSCOPED & man_macros[tok].flags) {
 378                         m->flags |= MAN_BLINE;
 379                         return(1);
 380                 } else if (n == m->last) {
 381                         m->flags |= MAN_BLINE;
 382                         return(1);
 383                 }
 384         }
 385 
 386         if ( ! rew_scope(MAN_HEAD, m, tok))
 387                 return(0);
 388         return(man_body_alloc(m, line, ppos, tok));
 389 }
 390 
 391 
 392 /* ARGSUSED */
 393 int
 394 in_line_eoln(MACRO_PROT_ARGS)
 395 {
 396         int              la;
 397         char            *p;
 398         struct man_node *n;
 399 
 400         if ( ! man_elem_alloc(m, line, ppos, tok))
 401                 return(0);
 402 
 403         n = m->last;
 404 
 405         for (;;) {
 406                 la = *pos;
 407                 if ( ! man_args(m, line, pos, buf, &p))
 408                         break;
 409                 if ( ! man_word_alloc(m, line, la, p))
 410                         return(0);
 411         }
 412 
 413         /*
 414          * If no arguments are specified and this is MAN_SCOPED (i.e.,
 415          * next-line scoped), then set our mode to indicate that we're
 416          * waiting for terms to load into our context.
 417          */
 418 
 419         if (n == m->last && MAN_SCOPED & man_macros[tok].flags) {
 420                 assert( ! (MAN_NSCOPED & man_macros[tok].flags));
 421                 m->flags |= MAN_ELINE;
 422                 return(1);
 423         } 
 424 
 425         /* Set ignorable context, if applicable. */
 426 
 427         if (MAN_NSCOPED & man_macros[tok].flags) {
 428                 assert( ! (MAN_SCOPED & man_macros[tok].flags));
 429                 m->flags |= MAN_ILINE;
 430         }
 431 
 432         assert(MAN_ROOT != m->last->type);
 433         m->next = MAN_NEXT_SIBLING;
 434         
 435         /*
 436          * Rewind our element scope.  Note that when TH is pruned, we'll
 437          * be back at the root, so make sure that we don't clobber as
 438          * its sibling.
 439          */
 440 
 441         for ( ; m->last; m->last = m->last->parent) {
 442                 if (m->last == n)
 443                         break;
 444                 if (m->last->type == MAN_ROOT)
 445                         break;
 446                 if ( ! man_valid_post(m))
 447                         return(0);
 448         }
 449 
 450         assert(m->last);
 451 
 452         /*
 453          * Same here regarding whether we're back at the root. 
 454          */
 455 
 456         if (m->last->type != MAN_ROOT && ! man_valid_post(m))
 457                 return(0);
 458 
 459         return(1);
 460 }
 461 
 462 
 463 int
 464 man_macroend(struct man *m)
 465 {
 466 
 467         return(man_unscope(m, m->first, MANDOCERR_SCOPEEXIT));
 468 }
 469 
 470 static int
 471 man_args(struct man *m, int line, int *pos, char *buf, char **v)
 472 {
 473         char     *start;
 474 
 475         assert(*pos);
 476         *v = start = buf + *pos;
 477         assert(' ' != *start);
 478 
 479         if ('\0' == *start)
 480                 return(0);
 481 
 482         *v = mandoc_getarg(m->parse, v, line, pos);
 483         return(1);
 484 }