1 /*      Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T     */
   2 /*        All Rights Reserved   */
   3 
   4 
   5 /*
   6  * Copyright (c) 1980 Regents of the University of California.
   7  * All rights reserved. The Berkeley software License Agreement
   8  * specifies the terms and conditions for redistribution.
   9  */
  10 
  11 /*
  12  * Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
  13  * Use is subject to license terms.
  14  */
  15 
  16 /*
  17  * Copyright (c) 2018, Joyent, Inc.
  18  */
  19 
  20 /*
  21  * checknr: check an nroff/troff input file for matching macro calls.
  22  * we also attempt to match size and font changes, but only the embedded
  23  * kind.  These must end in \s0 and \fP resp.  Maybe more sophistication
  24  * later but for now think of these restrictions as contributions to
  25  * structured typesetting.
  26  */
  27 #include <stdio.h>
  28 #include <stdlib.h>
  29 #include <unistd.h>
  30 #include <string.h>
  31 #include <ctype.h>
  32 #include <locale.h>
  33 
  34 #define MAXSTK  100     /* Stack size */
  35 static  int     maxstk;
  36 #define MAXBR   100     /* Max number of bracket pairs known */
  37 #define MAXCMDS 500     /* Max number of commands known */
  38 
  39 /*
  40  * The stack on which we remember what we've seen so far.
  41  */
  42 static struct stkstr {
  43         int opno;       /* number of opening bracket */
  44         int pl;         /* '+', '-', ' ' for \s, 1 for \f, 0 for .ft */
  45         int parm;       /* parm to size, font, etc */
  46         int lno;        /* line number the thing came in in */
  47 } *stk;
  48 static int stktop;
  49 
  50 /*
  51  * The kinds of opening and closing brackets.
  52  */
  53 static struct brstr {
  54         char *opbr;
  55         char *clbr;
  56 } br[MAXBR] = {
  57         /* A few bare bones troff commands */
  58 #define SZ      0
  59         "sz",   "sz",   /* also \s */
  60 #define FT      1
  61         "ft",   "ft",   /* also \f */
  62         /* the -mm package */
  63         "AL",   "LE",
  64         "AS",   "AE",
  65         "BL",   "LE",
  66         "BS",   "BE",
  67         "DF",   "DE",
  68         "DL",   "LE",
  69         "DS",   "DE",
  70         "FS",   "FE",
  71         "ML",   "LE",
  72         "NS",   "NE",
  73         "RL",   "LE",
  74         "VL",   "LE",
  75         /* the -ms package */
  76         "AB",   "AE",
  77         "BD",   "DE",
  78         "CD",   "DE",
  79         "DS",   "DE",
  80         "FS",   "FE",
  81         "ID",   "DE",
  82         "KF",   "KE",
  83         "KS",   "KE",
  84         "LD",   "DE",
  85         "LG",   "NL",
  86         "QS",   "QE",
  87         "RS",   "RE",
  88         "SM",   "NL",
  89         "XA",   "XE",
  90         "XS",   "XE",
  91         /* The -me package */
  92         "(b",   ")b",
  93         "(c",   ")c",
  94         "(d",   ")d",
  95         "(f",   ")f",
  96         "(l",   ")l",
  97         "(q",   ")q",
  98         "(x",   ")x",
  99         "(z",   ")z",
 100         /* Things needed by preprocessors */
 101         "EQ",   "EN",
 102         "TS",   "TE",
 103         /* Refer */
 104         "[",    "]",
 105         0,      0
 106 };
 107 
 108 /*
 109  * All commands known to nroff, plus macro packages.
 110  * Used so we can complain about unrecognized commands.
 111  */
 112 static char *knowncmds[MAXCMDS] = {
 113 "$c", "$f", "$h", "$p", "$s", "(b", "(c", "(d", "(f", "(l", "(q", "(t",
 114 "(x", "(z", ")b", ")c", ")d", ")f", ")l", ")q", ")t", ")x", ")z", "++",
 115 "+c", "1C", "1c", "2C", "2c", "@(", "@)", "@C", "@D", "@F", "@I", "@M",
 116 "@c", "@e", "@f", "@h", "@m", "@n", "@o", "@p", "@r", "@t", "@z", "AB",
 117 "AE", "AF", "AI", "AL", "AM", "AS", "AT", "AU", "AX", "B",  "B1", "B2",
 118 "BD", "BE", "BG", "BL", "BS", "BT", "BX", "C1", "C2", "CD", "CM", "CT",
 119 "D",  "DA", "DE", "DF", "DL", "DS", "DT", "EC", "EF", "EG", "EH", "EM",
 120 "EN", "EQ", "EX", "FA", "FD", "FE", "FG", "FJ", "FK", "FL", "FN", "FO",
 121 "FQ", "FS", "FV", "FX", "H",  "HC", "HD", "HM", "HO", "HU", "I",  "ID",
 122 "IE", "IH", "IM", "IP", "IX", "IZ", "KD", "KE", "KF", "KQ", "KS", "LB",
 123 "LC", "LD", "LE", "LG", "LI", "LP", "MC", "ME", "MF", "MH", "ML", "MR",
 124 "MT", "ND", "NE", "NH", "NL", "NP", "NS", "OF", "OH", "OK", "OP", "P",
 125 "P1", "PF", "PH", "PP", "PT", "PX", "PY", "QE", "QP", "QS", "R",  "RA",
 126 "RC", "RE", "RL", "RP", "RQ", "RS", "RT", "S",  "S0", "S2", "S3", "SA",
 127 "SG", "SH", "SK", "SM", "SP", "SY", "T&", "TA", "TB", "TC", "TD", "TE",
 128 "TH", "TL", "TM", "TP", "TQ", "TR", "TS", "TX", "UL", "US", "UX", "VL",
 129 "WC", "WH", "XA", "XD", "XE", "XF", "XK", "XP", "XS", "[",  "[-", "[0",
 130 "[1", "[2", "[3", "[4", "[5", "[<", "[>", "[]", "]",  "]-", "]<", "]>",
 131 "][", "ab", "ac", "ad", "af", "am", "ar", "as", "b",  "ba", "bc", "bd",
 132 "bi", "bl", "bp", "br", "bx", "c.", "c2", "cc", "ce", "cf", "ch", "cs",
 133 "ct", "cu", "da", "de", "di", "dl", "dn", "ds", "dt", "dw", "dy", "ec",
 134 "ef", "eh", "el", "em", "eo", "ep", "ev", "ex", "fc", "fi", "fl", "fo",
 135 "fp", "ft", "fz", "hc", "he", "hl", "hp", "ht", "hw", "hx", "hy", "i",
 136 "ie", "if", "ig", "in", "ip", "it", "ix", "lc", "lg", "li", "ll", "ln",
 137 "lo", "lp", "ls", "lt", "m1", "m2", "m3", "m4", "mc", "mk", "mo", "n1",
 138 "n2", "na", "ne", "nf", "nh", "nl", "nm", "nn", "np", "nr", "ns", "nx",
 139 "of", "oh", "os", "pa", "pc", "pi", "pl", "pm", "pn", "po", "pp", "ps",
 140 "q",  "r",  "rb", "rd", "re", "rm", "rn", "ro", "rr", "rs", "rt", "sb",
 141 "sc", "sh", "sk", "so", "sp", "ss", "st", "sv", "sz", "ta", "tc", "th",
 142 "ti", "tl", "tm", "tp", "tr", "u",  "uf", "uh", "ul", "vs", "wh", "xp",
 143 "yr", 0
 144 };
 145 
 146 static  int     lineno;         /* current line number in input file */
 147 static  char    line[256];      /* the current line */
 148 static  char    *cfilename;     /* name of current file */
 149 static  int     nfiles;         /* number of files to process */
 150 static  int     fflag;          /* -f: ignore \f */
 151 static  int     sflag;          /* -s: ignore \s */
 152 static  int     ncmds;          /* size of knowncmds */
 153 static  int     slot;           /* slot in knowncmds found by binsrch */
 154 
 155 static void growstk();
 156 static void usage();
 157 static void process(FILE *f);
 158 static void complain(int i);
 159 static void prop(int i);
 160 static void chkcmd(char *line, char *mac);
 161 static void nomatch(char *mac);
 162 static int eq(char *s1, char *s2);
 163 static void pe(int lineno);
 164 static void checkknown(char *mac);
 165 static void addcmd(char *line);
 166 static void addmac(char *mac);
 167 static int binsrch(char *mac);
 168 
 169 static void
 170 growstk()
 171 {
 172         stktop++;
 173         if (stktop >= maxstk) {
 174                 maxstk *= 2;
 175                 stk = (struct stkstr *)realloc(stk,
 176                     sizeof (struct stkstr) * maxstk);
 177         }
 178 }
 179 
 180 int
 181 main(argc, argv)
 182 int argc;
 183 char **argv;
 184 {
 185         FILE *f;
 186         int i;
 187         char *cp;
 188         char b1[4];
 189 
 190         (void) setlocale(LC_ALL, "");
 191 #if !defined(TEXT_DOMAIN)
 192 #define TEXT_DOMAIN     "SYS_TEST"
 193 #endif
 194         (void) textdomain(TEXT_DOMAIN);
 195         stk = (struct stkstr *)calloc(100, sizeof (struct stkstr));
 196         maxstk = 100;
 197         /* Figure out how many known commands there are */
 198         while (knowncmds[ncmds])
 199                 ncmds++;
 200         while (argc > 1 && argv[1][0] == '-') {
 201                 switch (argv[1][1]) {
 202 
 203                 /* -a: add pairs of macros */
 204                 case 'a':
 205                         i = strlen(argv[1]) - 2;
 206                         if (i % 6 != 0)
 207                                 usage();
 208                         /* look for empty macro slots */
 209                         for (i = 0; br[i].opbr; i++)
 210                                 ;
 211                         for (cp = argv[1]+3; cp[-1]; cp += 6) {
 212                                 br[i].opbr = malloc(3);
 213                                 (void) strncpy(br[i].opbr, cp, 2);
 214                                 br[i].clbr = malloc(3);
 215                                 (void) strncpy(br[i].clbr, cp+3, 2);
 216                                 /* knows pairs are also known cmds */
 217                                 addmac(br[i].opbr);
 218                                 addmac(br[i].clbr);
 219                                 i++;
 220                         }
 221                         break;
 222 
 223                 /* -c: add known commands */
 224                 case 'c':
 225                         i = strlen(argv[1]) - 2;
 226                         if (i % 3 != 0)
 227                                 usage();
 228                         for (cp = argv[1]+3; cp[-1]; cp += 3) {
 229                                 if (cp[2] && cp[2] != '.')
 230                                         usage();
 231                                 (void) strncpy(b1, cp, 2);
 232                                 addmac(b1);
 233                         }
 234                         break;
 235 
 236                 /* -f: ignore font changes */
 237                 case 'f':
 238                         fflag = 1;
 239                         break;
 240 
 241                 /* -s: ignore size changes */
 242                 case 's':
 243                         sflag = 1;
 244                         break;
 245                 default:
 246                         usage();
 247                 }
 248                 argc--; argv++;
 249         }
 250 
 251         nfiles = argc - 1;
 252 
 253         if (nfiles > 0) {
 254                 for (i = 1; i < argc; i++) {
 255                         cfilename = argv[i];
 256                         f = fopen(cfilename, "r");
 257                         if (f == NULL) {
 258                                 perror(cfilename);
 259                                 exit(1);
 260                                 }
 261                         else
 262                                 process(f);
 263                 }
 264         } else {
 265                 cfilename = "stdin";
 266                 process(stdin);
 267         }
 268         return (0);
 269 }
 270 
 271 static void
 272 usage()
 273 {
 274         (void) printf(gettext("Usage: \
 275 checknr [ -fs ] [ -a.xx.yy.xx.yy...] [-c.xx.xx.xx...] [ filename .. ]\n"));
 276         exit(1);
 277 }
 278 
 279 static void
 280 process(FILE *f)
 281 {
 282         int i, n;
 283         char mac[5];    /* The current macro or nroff command */
 284         int pl;
 285 
 286         stktop = -1;
 287         for (lineno = 1; fgets(line, sizeof (line), f); lineno++) {
 288                 if (line[0] == '.') {
 289                         /*
 290                          * find and isolate the macro/command name.
 291                          */
 292                         (void) strncpy(mac, line+1, 4);
 293                         if (isspace(mac[0])) {
 294                                 pe(lineno);
 295                                 (void) printf(gettext("Empty command\n"));
 296                         } else if (isspace(mac[1])) {
 297                                 mac[1] = 0;
 298                         } else if (isspace(mac[2])) {
 299                                 mac[2] = 0;
 300                         } else if (mac[0] != '\\' || mac[1] != '\"') {
 301                                 pe(lineno);
 302                                 (void) printf(gettext("Command too long\n"));
 303                         }
 304 
 305                         /*
 306                          * Is it a known command?
 307                          */
 308                         checkknown(mac);
 309 
 310                         /*
 311                          * Should we add it?
 312                          */
 313                         if (eq(mac, "de"))
 314                                 addcmd(line);
 315 
 316                         chkcmd(line, mac);
 317                 }
 318 
 319                 /*
 320                  * At this point we process the line looking
 321                  * for \s and \f.
 322                  */
 323                 for (i = 0; line[i]; i++)
 324                         if (line[i] == '\\' && (i == 0 || line[i-1] != '\\')) {
 325                                 if (!sflag && line[++i] == 's') {
 326                                         pl = line[++i];
 327                                         if (isdigit(pl)) {
 328                                                 n = pl - '0';
 329                                                 pl = ' ';
 330                                         } else
 331                                                 n = 0;
 332                                         while (isdigit(line[++i]))
 333                                                 n = 10 * n + line[i] - '0';
 334                                         i--;
 335                                         if (n == 0) {
 336                                                 if (stk[stktop].opno == SZ) {
 337                                                         stktop--;
 338                                                 } else {
 339                                                         pe(lineno);
 340                                                         (void) printf(
 341                                                 gettext("unmatched \\s0\n"));
 342                                                 }
 343                                         } else {
 344                                                 growstk();
 345                                                 stk[stktop].opno = SZ;
 346                                                 stk[stktop].pl = pl;
 347                                                 stk[stktop].parm = n;
 348                                                 stk[stktop].lno = lineno;
 349                                         }
 350                                 } else if (!fflag && line[i] == 'f') {
 351                                         n = line[++i];
 352                                         if (n == 'P') {
 353                                                 if (stk[stktop].opno == FT) {
 354                                                         stktop--;
 355                                                 } else {
 356                                                         pe(lineno);
 357                                                         (void) printf(
 358                                                 gettext("unmatched \\fP\n"));
 359                                                 }
 360                                         } else {
 361                                                 growstk();
 362                                                 stk[stktop].opno = FT;
 363                                                 stk[stktop].pl = 1;
 364                                                 stk[stktop].parm = n;
 365                                                 stk[stktop].lno = lineno;
 366                                         }
 367                                 }
 368                         }
 369         }
 370         /*
 371          * We've hit the end and look at all this stuff that hasn't been
 372          * matched yet!  Complain, complain.
 373          */
 374         for (i = stktop; i >= 0; i--) {
 375                 complain(i);
 376         }
 377 }
 378 
 379 static void
 380 complain(int i)
 381 {
 382         pe(stk[i].lno);
 383         (void) printf(gettext("Unmatched "));
 384         prop(i);
 385         (void) printf("\n");
 386 }
 387 
 388 static void
 389 prop(int i)
 390 {
 391         if (stk[i].pl == 0)
 392                 (void) printf(".%s", br[stk[i].opno].opbr);
 393         else switch (stk[i].opno) {
 394         case SZ:
 395                 (void) printf("\\s%c%d", stk[i].pl, stk[i].parm);
 396                 break;
 397         case FT:
 398                 (void) printf("\\f%c", stk[i].parm);
 399                 break;
 400         default:
 401                 (void) printf(gettext("Bug: stk[%d].opno = %d = .%s, .%s"),
 402                         i, stk[i].opno, br[stk[i].opno].opbr,
 403                         br[stk[i].opno].clbr);
 404         }
 405 }
 406 
 407 /* ARGSUSED */
 408 static void
 409 chkcmd(char *line, char *mac)
 410 {
 411         int i;
 412 
 413         /*
 414          * Check to see if it matches top of stack.
 415          */
 416         if (stktop >= 0 && eq(mac, br[stk[stktop].opno].clbr))
 417                 stktop--;       /* OK. Pop & forget */
 418         else {
 419                 /* No. Maybe it's an opener */
 420                 for (i = 0; br[i].opbr; i++) {
 421                         if (eq(mac, br[i].opbr)) {
 422                                 /* Found. Push it. */
 423                                 growstk();
 424                                 stk[stktop].opno = i;
 425                                 stk[stktop].pl = 0;
 426                                 stk[stktop].parm = 0;
 427                                 stk[stktop].lno = lineno;
 428                                 break;
 429                         }
 430                         /*
 431                          * Maybe it's an unmatched closer.
 432                          * NOTE: this depends on the fact
 433                          * that none of the closers can be
 434                          * openers too.
 435                          */
 436                         if (eq(mac, br[i].clbr)) {
 437                                 nomatch(mac);
 438                                 break;
 439                         }
 440                 }
 441         }
 442 }
 443 
 444 static void
 445 nomatch(char *mac)
 446 {
 447         int i, j;
 448 
 449         /*
 450          * Look for a match further down on stack
 451          * If we find one, it suggests that the stuff in
 452          * between is supposed to match itself.
 453          */
 454         for (j = stktop; j >= 0; j--)
 455                 if (eq(mac, br[stk[j].opno].clbr)) {
 456                         /* Found.  Make a good diagnostic. */
 457                         if (j == stktop-2) {
 458                                 /*
 459                                  * Check for special case \fx..\fR and don't
 460                                  * complain.
 461                                  */
 462                                 if (stk[j+1].opno == FT &&
 463                                     stk[j+1].parm != 'R' &&
 464                                     stk[j+2].opno == FT &&
 465                                     stk[j+2].parm == 'R') {
 466                                         stktop = j -1;
 467                                         return;
 468                                 }
 469                                 /*
 470                                  * We have two unmatched frobs.  Chances are
 471                                  * they were intended to match, so we mention
 472                                  * them together.
 473                                  */
 474                                 pe(stk[j+1].lno);
 475                                 prop(j+1);
 476                                 (void) printf(gettext(" does not match %d: "),
 477                                         stk[j+2].lno);
 478                                 prop(j+2);
 479                                 (void) printf("\n");
 480                         } else for (i = j+1; i <= stktop; i++) {
 481                                 complain(i);
 482                         }
 483                         stktop = j-1;
 484                         return;
 485                 }
 486         /* Didn't find one.  Throw this away. */
 487         pe(lineno);
 488         (void) printf(gettext("Unmatched .%s\n"), mac);
 489 }
 490 
 491 /* eq: are two strings equal? */
 492 static int
 493 eq(char *s1, char *s2)
 494 {
 495         return (strcmp(s1, s2) == 0);
 496 }
 497 
 498 /* print the first part of an error message, given the line number */
 499 static void
 500 pe(int lineno)
 501 {
 502         if (nfiles > 1)
 503                 (void) printf("%s: ", cfilename);
 504         (void) printf("%d: ", lineno);
 505 }
 506 
 507 static void
 508 checkknown(char *mac)
 509 {
 510 
 511         if (eq(mac, "."))
 512                 return;
 513         if (binsrch(mac) >= 0)
 514                 return;
 515         if (mac[0] == '\\' && mac[1] == '"')    /* comments */
 516                 return;
 517 
 518         pe(lineno);
 519         (void) printf(gettext("Unknown command: .%s\n"), mac);
 520 }
 521 
 522 /*
 523  * We have a .de xx line in "line".  Add xx to the list of known commands.
 524  */
 525 static void
 526 addcmd(char *line)
 527 {
 528         char *mac;
 529 
 530         /* grab the macro being defined */
 531         mac = line+4;
 532         while (isspace(*mac))
 533                 mac++;
 534         if (*mac == 0) {
 535                 pe(lineno);
 536                 (void) printf(gettext("illegal define: %s\n"), line);
 537                 return;
 538         }
 539         mac[2] = 0;
 540         if (isspace(mac[1]) || mac[1] == '\\')
 541                 mac[1] = 0;
 542         if (ncmds >= MAXCMDS) {
 543                 (void) printf(gettext("Only %d known commands allowed\n"),
 544                     MAXCMDS);
 545                 exit(1);
 546         }
 547         addmac(mac);
 548 }
 549 
 550 /*
 551  * Add mac to the list.  We should really have some kind of tree
 552  * structure here but this is a quick-and-dirty job and I just don't
 553  * have time to mess with it.  (I wonder if this will come back to haunt
 554  * me someday?)  Anyway, I claim that .de is fairly rare in user
 555  * nroff programs, and the loop below is pretty fast.
 556  */
 557 static void
 558 addmac(char *mac)
 559 {
 560         char **src, **dest, **loc;
 561 
 562         if (binsrch(mac) >= 0) {     /* it's OK to redefine something */
 563 #ifdef DEBUG
 564                 (void) printf("binsrch(%s) -> already in table\n", mac);
 565 #endif
 566                 return;
 567         }
 568         /* binsrch sets slot as a side effect */
 569 #ifdef DEBUG
 570 printf("binsrch(%s) -> %d\n", mac, slot);
 571 #endif
 572         loc = &knowncmds[slot];
 573         src = &knowncmds[ncmds-1];
 574         dest = src+1;
 575         while (dest > loc)
 576                 *dest-- = *src--;
 577         *loc = malloc(3);
 578         (void) strcpy(*loc, mac);
 579         ncmds++;
 580 #ifdef DEBUG
 581         (void) printf("after: %s %s %s %s %s, %d cmds\n",
 582             knowncmds[slot-2], knowncmds[slot-1], knowncmds[slot],
 583             knowncmds[slot+1], knowncmds[slot+2], ncmds);
 584 #endif
 585 }
 586 
 587 /*
 588  * Do a binary search in knowncmds for mac.
 589  * If found, return the index.  If not, return -1.
 590  */
 591 static int
 592 binsrch(char *mac)
 593 {
 594         char *p;        /* pointer to current cmd in list */
 595         int d;          /* difference if any */
 596         int mid;        /* mid point in binary search */
 597         int top, bot;   /* boundaries of bin search, inclusive */
 598 
 599         top = ncmds-1;
 600         bot = 0;
 601         while (top >= bot) {
 602                 mid = (top+bot)/2;
 603                 p = knowncmds[mid];
 604                 d = p[0] - mac[0];
 605                 if (d == 0)
 606                         d = p[1] - mac[1];
 607                 if (d == 0)
 608                         return (mid);
 609                 if (d < 0)
 610                         bot = mid + 1;
 611                 else
 612                         top = mid - 1;
 613         }
 614         slot = bot;     /* place it would have gone */
 615         return (-1);
 616 }