1 /*      $Id: man_validate.c,v 1.86 2013/10/17 20:54:58 schwarze Exp $ */
   2 /*
   3  * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
   4  * Copyright (c) 2010, 2012, 2013 Ingo Schwarze <schwarze@openbsd.org>
   5  *
   6  * Permission to use, copy, modify, and distribute this software for any
   7  * purpose with or without fee is hereby granted, provided that the above
   8  * copyright notice and this permission notice appear in all copies.
   9  *
  10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
  11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
  12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
  13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
  14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
  15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
  16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
  17  */
  18 #ifdef HAVE_CONFIG_H
  19 #include "config.h"
  20 #endif
  21 
  22 #include <sys/types.h>
  23 
  24 #include <assert.h>
  25 #include <ctype.h>
  26 #include <errno.h>
  27 #include <limits.h>
  28 #include <stdarg.h>
  29 #include <stdlib.h>
  30 #include <string.h>
  31 #include <time.h>
  32 
  33 #include "man.h"
  34 #include "mandoc.h"
  35 #include "libman.h"
  36 #include "libmandoc.h"
  37 
  38 #define CHKARGS   struct man *man, struct man_node *n
  39 
  40 typedef int     (*v_check)(CHKARGS);
  41 
  42 struct  man_valid {
  43         v_check  *pres;
  44         v_check  *posts;
  45 };
  46 
  47 static  int       check_eq0(CHKARGS);
  48 static  int       check_eq2(CHKARGS);
  49 static  int       check_le1(CHKARGS);
  50 static  int       check_ge2(CHKARGS);
  51 static  int       check_le5(CHKARGS);
  52 static  int       check_head1(CHKARGS);
  53 static  int       check_par(CHKARGS);
  54 static  int       check_part(CHKARGS);
  55 static  int       check_root(CHKARGS);
  56 static  void      check_text(CHKARGS);
  57 
  58 static  int       post_AT(CHKARGS);
  59 static  int       post_IP(CHKARGS);
  60 static  int       post_vs(CHKARGS);
  61 static  int       post_fi(CHKARGS);
  62 static  int       post_ft(CHKARGS);
  63 static  int       post_nf(CHKARGS);
  64 static  int       post_sec(CHKARGS);
  65 static  int       post_TH(CHKARGS);
  66 static  int       post_UC(CHKARGS);
  67 static  int       pre_sec(CHKARGS);
  68 
  69 static  v_check   posts_at[] = { post_AT, NULL };
  70 static  v_check   posts_br[] = { post_vs, check_eq0, NULL };
  71 static  v_check   posts_eq0[] = { check_eq0, NULL };
  72 static  v_check   posts_eq2[] = { check_eq2, NULL };
  73 static  v_check   posts_fi[] = { check_eq0, post_fi, NULL };
  74 static  v_check   posts_ft[] = { post_ft, NULL };
  75 static  v_check   posts_ip[] = { post_IP, NULL };
  76 static  v_check   posts_le1[] = { check_le1, NULL };
  77 static  v_check   posts_nf[] = { check_eq0, post_nf, NULL };
  78 static  v_check   posts_par[] = { check_par, NULL };
  79 static  v_check   posts_part[] = { check_part, NULL };
  80 static  v_check   posts_sec[] = { post_sec, NULL };
  81 static  v_check   posts_sp[] = { post_vs, check_le1, NULL };
  82 static  v_check   posts_th[] = { check_ge2, check_le5, post_TH, NULL };
  83 static  v_check   posts_uc[] = { post_UC, NULL };
  84 static  v_check   posts_ur[] = { check_head1, check_part, NULL };
  85 static  v_check   pres_sec[] = { pre_sec, NULL };
  86 
  87 static  const struct man_valid man_valids[MAN_MAX] = {
  88         { NULL, posts_br }, /* br */
  89         { NULL, posts_th }, /* TH */
  90         { pres_sec, posts_sec }, /* SH */
  91         { pres_sec, posts_sec }, /* SS */
  92         { NULL, NULL }, /* TP */
  93         { NULL, posts_par }, /* LP */
  94         { NULL, posts_par }, /* PP */
  95         { NULL, posts_par }, /* P */
  96         { NULL, posts_ip }, /* IP */
  97         { NULL, NULL }, /* HP */
  98         { NULL, NULL }, /* SM */
  99         { NULL, NULL }, /* SB */
 100         { NULL, NULL }, /* BI */
 101         { NULL, NULL }, /* IB */
 102         { NULL, NULL }, /* BR */
 103         { NULL, NULL }, /* RB */
 104         { NULL, NULL }, /* R */
 105         { NULL, NULL }, /* B */
 106         { NULL, NULL }, /* I */
 107         { NULL, NULL }, /* IR */
 108         { NULL, NULL }, /* RI */
 109         { NULL, posts_eq0 }, /* na */
 110         { NULL, posts_sp }, /* sp */
 111         { NULL, posts_nf }, /* nf */
 112         { NULL, posts_fi }, /* fi */
 113         { NULL, NULL }, /* RE */
 114         { NULL, posts_part }, /* RS */
 115         { NULL, NULL }, /* DT */
 116         { NULL, posts_uc }, /* UC */
 117         { NULL, posts_le1 }, /* PD */
 118         { NULL, posts_at }, /* AT */
 119         { NULL, NULL }, /* in */
 120         { NULL, posts_ft }, /* ft */
 121         { NULL, posts_eq2 }, /* OP */
 122         { NULL, posts_nf }, /* EX */
 123         { NULL, posts_fi }, /* EE */
 124         { NULL, posts_ur }, /* UR */
 125         { NULL, NULL }, /* UE */
 126 };
 127 
 128 
 129 int
 130 man_valid_pre(struct man *man, struct man_node *n)
 131 {
 132         v_check         *cp;
 133 
 134         switch (n->type) {
 135         case (MAN_TEXT):
 136                 /* FALLTHROUGH */
 137         case (MAN_ROOT):
 138                 /* FALLTHROUGH */
 139         case (MAN_EQN):
 140                 /* FALLTHROUGH */
 141         case (MAN_TBL):
 142                 return(1);
 143         default:
 144                 break;
 145         }
 146 
 147         if (NULL == (cp = man_valids[n->tok].pres))
 148                 return(1);
 149         for ( ; *cp; cp++)
 150                 if ( ! (*cp)(man, n)) 
 151                         return(0);
 152         return(1);
 153 }
 154 
 155 
 156 int
 157 man_valid_post(struct man *man)
 158 {
 159         v_check         *cp;
 160 
 161         if (MAN_VALID & man->last->flags)
 162                 return(1);
 163         man->last->flags |= MAN_VALID;
 164 
 165         switch (man->last->type) {
 166         case (MAN_TEXT): 
 167                 check_text(man, man->last);
 168                 return(1);
 169         case (MAN_ROOT):
 170                 return(check_root(man, man->last));
 171         case (MAN_EQN):
 172                 /* FALLTHROUGH */
 173         case (MAN_TBL):
 174                 return(1);
 175         default:
 176                 break;
 177         }
 178 
 179         if (NULL == (cp = man_valids[man->last->tok].posts))
 180                 return(1);
 181         for ( ; *cp; cp++)
 182                 if ( ! (*cp)(man, man->last))
 183                         return(0);
 184 
 185         return(1);
 186 }
 187 
 188 
 189 static int
 190 check_root(CHKARGS) 
 191 {
 192 
 193         if (MAN_BLINE & man->flags)
 194                 man_nmsg(man, n, MANDOCERR_SCOPEEXIT);
 195         else if (MAN_ELINE & man->flags)
 196                 man_nmsg(man, n, MANDOCERR_SCOPEEXIT);
 197 
 198         man->flags &= ~MAN_BLINE;
 199         man->flags &= ~MAN_ELINE;
 200 
 201         if (NULL == man->first->child) {
 202                 man_nmsg(man, n, MANDOCERR_NODOCBODY);
 203                 return(0);
 204         } else if (NULL == man->meta.title) {
 205                 man_nmsg(man, n, MANDOCERR_NOTITLE);
 206 
 207                 /*
 208                  * If a title hasn't been set, do so now (by
 209                  * implication, date and section also aren't set).
 210                  */
 211 
 212                 man->meta.title = mandoc_strdup("unknown");
 213                 man->meta.msec = mandoc_strdup("1");
 214                 man->meta.date = mandoc_normdate
 215                         (man->parse, NULL, n->line, n->pos);
 216         }
 217 
 218         return(1);
 219 }
 220 
 221 static void
 222 check_text(CHKARGS)
 223 {
 224         char            *cp, *p;
 225 
 226         if (MAN_LITERAL & man->flags)
 227                 return;
 228 
 229         cp = n->string;
 230         for (p = cp; NULL != (p = strchr(p, '\t')); p++)
 231                 man_pmsg(man, n->line, (int)(p - cp), MANDOCERR_BADTAB);
 232 }
 233 
 234 #define INEQ_DEFINE(x, ineq, name) \
 235 static int \
 236 check_##name(CHKARGS) \
 237 { \
 238         if (n->nchild ineq (x)) \
 239                 return(1); \
 240         mandoc_vmsg(MANDOCERR_ARGCOUNT, man->parse, n->line, n->pos, \
 241                         "line arguments %s %d (have %d)", \
 242                         #ineq, (x), n->nchild); \
 243         return(1); \
 244 }
 245 
 246 INEQ_DEFINE(0, ==, eq0)
 247 INEQ_DEFINE(2, ==, eq2)
 248 INEQ_DEFINE(1, <=, le1)
 249 INEQ_DEFINE(2, >=, ge2)
 250 INEQ_DEFINE(5, <=, le5)
 251 
 252 static int
 253 check_head1(CHKARGS)
 254 {
 255 
 256         if (MAN_HEAD == n->type && 1 != n->nchild)
 257                 mandoc_vmsg(MANDOCERR_ARGCOUNT, man->parse, n->line,
 258                     n->pos, "line arguments eq 1 (have %d)", n->nchild);
 259 
 260         return(1);
 261 }
 262 
 263 static int
 264 post_ft(CHKARGS)
 265 {
 266         char    *cp;
 267         int      ok;
 268 
 269         if (0 == n->nchild)
 270                 return(1);
 271 
 272         ok = 0;
 273         cp = n->child->string;
 274         switch (*cp) {
 275         case ('1'):
 276                 /* FALLTHROUGH */
 277         case ('2'):
 278                 /* FALLTHROUGH */
 279         case ('3'):
 280                 /* FALLTHROUGH */
 281         case ('4'):
 282                 /* FALLTHROUGH */
 283         case ('I'):
 284                 /* FALLTHROUGH */
 285         case ('P'):
 286                 /* FALLTHROUGH */
 287         case ('R'):
 288                 if ('\0' == cp[1])
 289                         ok = 1;
 290                 break;
 291         case ('B'):
 292                 if ('\0' == cp[1] || ('I' == cp[1] && '\0' == cp[2]))
 293                         ok = 1;
 294                 break;
 295         case ('C'):
 296                 if ('W' == cp[1] && '\0' == cp[2])
 297                         ok = 1;
 298                 break;
 299         default:
 300                 break;
 301         }
 302 
 303         if (0 == ok) {
 304                 mandoc_vmsg
 305                         (MANDOCERR_BADFONT, man->parse,
 306                          n->line, n->pos, "%s", cp);
 307                 *cp = '\0';
 308         }
 309 
 310         if (1 < n->nchild)
 311                 mandoc_vmsg
 312                         (MANDOCERR_ARGCOUNT, man->parse, n->line, 
 313                          n->pos, "want one child (have %d)", 
 314                          n->nchild);
 315 
 316         return(1);
 317 }
 318 
 319 static int
 320 pre_sec(CHKARGS)
 321 {
 322 
 323         if (MAN_BLOCK == n->type)
 324                 man->flags &= ~MAN_LITERAL;
 325         return(1);
 326 }
 327 
 328 static int
 329 post_sec(CHKARGS)
 330 {
 331 
 332         if ( ! (MAN_HEAD == n->type && 0 == n->nchild)) 
 333                 return(1);
 334 
 335         man_nmsg(man, n, MANDOCERR_SYNTARGCOUNT);
 336         return(0);
 337 }
 338 
 339 static int
 340 check_part(CHKARGS)
 341 {
 342 
 343         if (MAN_BODY == n->type && 0 == n->nchild)
 344                 mandoc_msg(MANDOCERR_ARGCWARN, man->parse, n->line, 
 345                                 n->pos, "want children (have none)");
 346 
 347         return(1);
 348 }
 349 
 350 
 351 static int
 352 check_par(CHKARGS)
 353 {
 354 
 355         switch (n->type) {
 356         case (MAN_BLOCK):
 357                 if (0 == n->body->nchild)
 358                         man_node_delete(man, n);
 359                 break;
 360         case (MAN_BODY):
 361                 if (0 == n->nchild)
 362                         man_nmsg(man, n, MANDOCERR_IGNPAR);
 363                 break;
 364         case (MAN_HEAD):
 365                 if (n->nchild)
 366                         man_nmsg(man, n, MANDOCERR_ARGSLOST);
 367                 break;
 368         default:
 369                 break;
 370         }
 371 
 372         return(1);
 373 }
 374 
 375 static int
 376 post_IP(CHKARGS)
 377 {
 378 
 379         switch (n->type) {
 380         case (MAN_BLOCK):
 381                 if (0 == n->head->nchild && 0 == n->body->nchild)
 382                         man_node_delete(man, n);
 383                 break;
 384         case (MAN_BODY):
 385                 if (0 == n->parent->head->nchild && 0 == n->nchild)
 386                         man_nmsg(man, n, MANDOCERR_IGNPAR);
 387                 break;
 388         default:
 389                 break;
 390         }
 391         return(1);
 392 }
 393 
 394 static int
 395 post_TH(CHKARGS)
 396 {
 397         const char      *p;
 398         int              line, pos;
 399 
 400         free(man->meta.title);
 401         free(man->meta.vol);
 402         free(man->meta.source);
 403         free(man->meta.msec);
 404         free(man->meta.date);
 405 
 406         line = n->line;
 407         pos = n->pos;
 408         man->meta.title = man->meta.vol = man->meta.date =
 409                 man->meta.msec = man->meta.source = NULL;
 410 
 411         /* ->TITLE<- MSEC DATE SOURCE VOL */
 412 
 413         n = n->child;
 414         if (n && n->string) {
 415                 for (p = n->string; '\0' != *p; p++) {
 416                         /* Only warn about this once... */
 417                         if (isalpha((unsigned char)*p) && 
 418                                         ! isupper((unsigned char)*p)) {
 419                                 man_nmsg(man, n, MANDOCERR_UPPERCASE);
 420                                 break;
 421                         }
 422                 }
 423                 man->meta.title = mandoc_strdup(n->string);
 424         } else
 425                 man->meta.title = mandoc_strdup("");
 426 
 427         /* TITLE ->MSEC<- DATE SOURCE VOL */
 428 
 429         if (n)
 430                 n = n->next;
 431         if (n && n->string)
 432                 man->meta.msec = mandoc_strdup(n->string);
 433         else
 434                 man->meta.msec = mandoc_strdup("");
 435 
 436         /* TITLE MSEC ->DATE<- SOURCE VOL */
 437 
 438         if (n)
 439                 n = n->next;
 440         if (n && n->string && '\0' != n->string[0]) {
 441                 pos = n->pos;
 442                 man->meta.date = mandoc_normdate
 443                     (man->parse, n->string, line, pos);
 444         } else
 445                 man->meta.date = mandoc_strdup("");
 446 
 447         /* TITLE MSEC DATE ->SOURCE<- VOL */
 448 
 449         if (n && (n = n->next))
 450                 man->meta.source = mandoc_strdup(n->string);
 451 
 452         /* TITLE MSEC DATE SOURCE ->VOL<- */
 453         /* If missing, use the default VOL name for MSEC. */
 454 
 455         if (n && (n = n->next))
 456                 man->meta.vol = mandoc_strdup(n->string);
 457         else if ('\0' != man->meta.msec[0] &&
 458             (NULL != (p = mandoc_a2msec(man->meta.msec))))
 459                 man->meta.vol = mandoc_strdup(p);
 460 
 461         /*
 462          * Remove the `TH' node after we've processed it for our
 463          * meta-data.
 464          */
 465         man_node_delete(man, man->last);
 466         return(1);
 467 }
 468 
 469 static int
 470 post_nf(CHKARGS)
 471 {
 472 
 473         if (MAN_LITERAL & man->flags)
 474                 man_nmsg(man, n, MANDOCERR_SCOPEREP);
 475 
 476         man->flags |= MAN_LITERAL;
 477         return(1);
 478 }
 479 
 480 static int
 481 post_fi(CHKARGS)
 482 {
 483 
 484         if ( ! (MAN_LITERAL & man->flags))
 485                 man_nmsg(man, n, MANDOCERR_WNOSCOPE);
 486 
 487         man->flags &= ~MAN_LITERAL;
 488         return(1);
 489 }
 490 
 491 static int
 492 post_UC(CHKARGS)
 493 {
 494         static const char * const bsd_versions[] = {
 495             "3rd Berkeley Distribution",
 496             "4th Berkeley Distribution",
 497             "4.2 Berkeley Distribution",
 498             "4.3 Berkeley Distribution",
 499             "4.4 Berkeley Distribution",
 500         };
 501 
 502         const char      *p, *s;
 503 
 504         n = n->child;
 505 
 506         if (NULL == n || MAN_TEXT != n->type)
 507                 p = bsd_versions[0];
 508         else {
 509                 s = n->string;
 510                 if (0 == strcmp(s, "3"))
 511                         p = bsd_versions[0];
 512                 else if (0 == strcmp(s, "4"))
 513                         p = bsd_versions[1];
 514                 else if (0 == strcmp(s, "5"))
 515                         p = bsd_versions[2];
 516                 else if (0 == strcmp(s, "6"))
 517                         p = bsd_versions[3];
 518                 else if (0 == strcmp(s, "7"))
 519                         p = bsd_versions[4];
 520                 else
 521                         p = bsd_versions[0];
 522         }
 523 
 524         free(man->meta.source);
 525         man->meta.source = mandoc_strdup(p);
 526         return(1);
 527 }
 528 
 529 static int
 530 post_AT(CHKARGS)
 531 {
 532         static const char * const unix_versions[] = {
 533             "7th Edition",
 534             "System III",
 535             "System V",
 536             "System V Release 2",
 537         };
 538 
 539         const char      *p, *s;
 540         struct man_node *nn;
 541 
 542         n = n->child;
 543 
 544         if (NULL == n || MAN_TEXT != n->type)
 545                 p = unix_versions[0];
 546         else {
 547                 s = n->string;
 548                 if (0 == strcmp(s, "3"))
 549                         p = unix_versions[0];
 550                 else if (0 == strcmp(s, "4"))
 551                         p = unix_versions[1];
 552                 else if (0 == strcmp(s, "5")) {
 553                         nn = n->next;
 554                         if (nn && MAN_TEXT == nn->type && nn->string[0])
 555                                 p = unix_versions[3];
 556                         else
 557                                 p = unix_versions[2];
 558                 } else
 559                         p = unix_versions[0];
 560         }
 561 
 562         free(man->meta.source);
 563         man->meta.source = mandoc_strdup(p);
 564         return(1);
 565 }
 566 
 567 static int
 568 post_vs(CHKARGS)
 569 {
 570 
 571         if (NULL != n->prev)
 572                 return(1);
 573 
 574         switch (n->parent->tok) {
 575         case (MAN_SH):
 576                 /* FALLTHROUGH */
 577         case (MAN_SS):
 578                 man_nmsg(man, n, MANDOCERR_IGNPAR);
 579                 /* FALLTHROUGH */
 580         case (MAN_MAX):
 581                 /* 
 582                  * Don't warn about this because it occurs in pod2man
 583                  * and would cause considerable (unfixable) warnage.
 584                  */
 585                 man_node_delete(man, n);
 586                 break;
 587         default:
 588                 break;
 589         }
 590 
 591         return(1);
 592 }