1 /*      Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T     */
   2 /*        All Rights Reserved   */
   3 
   4 
   5 /*
   6  * Copyright (c) 1980 Regents of the University of California.
   7  * All rights reserved. The Berkeley software License Agreement
   8  * specifies the terms and conditions for redistribution.
   9  */
  10 
  11 /*
  12  * Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
  13  * Use is subject to license terms.
  14  */
  15 
  16 #pragma ident   "%Z%%M% %I%     %E% SMI"
  17 
  18 /*
  19  * checknr: check an nroff/troff input file for matching macro calls.
  20  * we also attempt to match size and font changes, but only the embedded
  21  * kind.  These must end in \s0 and \fP resp.  Maybe more sophistication
  22  * later but for now think of these restrictions as contributions to
  23  * structured typesetting.
  24  */
  25 #include <stdio.h>
  26 #include <stdlib.h>
  27 #include <unistd.h>
  28 #include <string.h>
  29 #include <ctype.h>
  30 #include <locale.h>
  31 
  32 #define MAXSTK  100     /* Stack size */
  33 static  int     maxstk;
  34 #define MAXBR   100     /* Max number of bracket pairs known */
  35 #define MAXCMDS 500     /* Max number of commands known */
  36 
  37 /*
  38  * The stack on which we remember what we've seen so far.
  39  */
  40 static struct stkstr {
  41         int opno;       /* number of opening bracket */
  42         int pl;         /* '+', '-', ' ' for \s, 1 for \f, 0 for .ft */
  43         int parm;       /* parm to size, font, etc */
  44         int lno;        /* line number the thing came in in */
  45 } *stk;
  46 static int stktop;
  47 
  48 /*
  49  * The kinds of opening and closing brackets.
  50  */
  51 static struct brstr {
  52         char *opbr;
  53         char *clbr;
  54 } br[MAXBR] = {
  55         /* A few bare bones troff commands */
  56 #define SZ      0
  57         "sz",   "sz",   /* also \s */
  58 #define FT      1
  59         "ft",   "ft",   /* also \f */
  60         /* the -mm package */
  61         "AL",   "LE",
  62         "AS",   "AE",
  63         "BL",   "LE",
  64         "BS",   "BE",
  65         "DF",   "DE",
  66         "DL",   "LE",
  67         "DS",   "DE",
  68         "FS",   "FE",
  69         "ML",   "LE",
  70         "NS",   "NE",
  71         "RL",   "LE",
  72         "VL",   "LE",
  73         /* the -ms package */
  74         "AB",   "AE",
  75         "BD",   "DE",
  76         "CD",   "DE",
  77         "DS",   "DE",
  78         "FS",   "FE",
  79         "ID",   "DE",
  80         "KF",   "KE",
  81         "KS",   "KE",
  82         "LD",   "DE",
  83         "LG",   "NL",
  84         "QS",   "QE",
  85         "RS",   "RE",
  86         "SM",   "NL",
  87         "XA",   "XE",
  88         "XS",   "XE",
  89         /* The -me package */
  90         "(b",   ")b",
  91         "(c",   ")c",
  92         "(d",   ")d",
  93         "(f",   ")f",
  94         "(l",   ")l",
  95         "(q",   ")q",
  96         "(x",   ")x",
  97         "(z",   ")z",
  98         /* Things needed by preprocessors */
  99         "EQ",   "EN",
 100         "TS",   "TE",
 101         /* Refer */
 102         "[",    "]",
 103         0,      0
 104 };
 105 
 106 /*
 107  * All commands known to nroff, plus macro packages.
 108  * Used so we can complain about unrecognized commands.
 109  */
 110 static char *knowncmds[MAXCMDS] = {
 111 "$c", "$f", "$h", "$p", "$s", "(b", "(c", "(d", "(f", "(l", "(q", "(t",
 112 "(x", "(z", ")b", ")c", ")d", ")f", ")l", ")q", ")t", ")x", ")z", "++",
 113 "+c", "1C", "1c", "2C", "2c", "@(", "@)", "@C", "@D", "@F", "@I", "@M",
 114 "@c", "@e", "@f", "@h", "@m", "@n", "@o", "@p", "@r", "@t", "@z", "AB",
 115 "AE", "AF", "AI", "AL", "AM", "AS", "AT", "AU", "AX", "B",  "B1", "B2",
 116 "BD", "BE", "BG", "BL", "BS", "BT", "BX", "C1", "C2", "CD", "CM", "CT",
 117 "D",  "DA", "DE", "DF", "DL", "DS", "DT", "EC", "EF", "EG", "EH", "EM",
 118 "EN", "EQ", "EX", "FA", "FD", "FE", "FG", "FJ", "FK", "FL", "FN", "FO",
 119 "FQ", "FS", "FV", "FX", "H",  "HC", "HD", "HM", "HO", "HU", "I",  "ID",
 120 "IE", "IH", "IM", "IP", "IX", "IZ", "KD", "KE", "KF", "KQ", "KS", "LB",
 121 "LC", "LD", "LE", "LG", "LI", "LP", "MC", "ME", "MF", "MH", "ML", "MR",
 122 "MT", "ND", "NE", "NH", "NL", "NP", "NS", "OF", "OH", "OK", "OP", "P",
 123 "P1", "PF", "PH", "PP", "PT", "PX", "PY", "QE", "QP", "QS", "R",  "RA",
 124 "RC", "RE", "RL", "RP", "RQ", "RS", "RT", "S",  "S0", "S2", "S3", "SA",
 125 "SG", "SH", "SK", "SM", "SP", "SY", "T&", "TA", "TB", "TC", "TD", "TE",
 126 "TH", "TL", "TM", "TP", "TQ", "TR", "TS", "TX", "UL", "US", "UX", "VL",
 127 "WC", "WH", "XA", "XD", "XE", "XF", "XK", "XP", "XS", "[",  "[-", "[0",
 128 "[1", "[2", "[3", "[4", "[5", "[<", "[>", "[]", "]",  "]-", "]<", "]>",
 129 "][", "ab", "ac", "ad", "af", "am", "ar", "as", "b",  "ba", "bc", "bd",
 130 "bi", "bl", "bp", "br", "bx", "c.", "c2", "cc", "ce", "cf", "ch", "cs",
 131 "ct", "cu", "da", "de", "di", "dl", "dn", "ds", "dt", "dw", "dy", "ec",
 132 "ef", "eh", "el", "em", "eo", "ep", "ev", "ex", "fc", "fi", "fl", "fo",
 133 "fp", "ft", "fz", "hc", "he", "hl", "hp", "ht", "hw", "hx", "hy", "i",
 134 "ie", "if", "ig", "in", "ip", "it", "ix", "lc", "lg", "li", "ll", "ln",
 135 "lo", "lp", "ls", "lt", "m1", "m2", "m3", "m4", "mc", "mk", "mo", "n1",
 136 "n2", "na", "ne", "nf", "nh", "nl", "nm", "nn", "np", "nr", "ns", "nx",
 137 "of", "oh", "os", "pa", "pc", "pi", "pl", "pm", "pn", "po", "pp", "ps",
 138 "q",  "r",  "rb", "rd", "re", "rm", "rn", "ro", "rr", "rs", "rt", "sb",
 139 "sc", "sh", "sk", "so", "sp", "ss", "st", "sv", "sz", "ta", "tc", "th",
 140 "ti", "tl", "tm", "tp", "tr", "u",  "uf", "uh", "ul", "vs", "wh", "xp",
 141 "yr", 0
 142 };
 143 
 144 static  int     lineno;         /* current line number in input file */
 145 static  char    line[256];      /* the current line */
 146 static  char    *cfilename;     /* name of current file */
 147 static  int     nfiles;         /* number of files to process */
 148 static  int     fflag;          /* -f: ignore \f */
 149 static  int     sflag;          /* -s: ignore \s */
 150 static  int     ncmds;          /* size of knowncmds */
 151 static  int     slot;           /* slot in knowncmds found by binsrch */
 152 
 153 static void growstk();
 154 static void usage();
 155 static void process(FILE *f);
 156 static void complain(int i);
 157 static void prop(int i);
 158 static void chkcmd(char *line, char *mac);
 159 static void nomatch(char *mac);
 160 static int eq(char *s1, char *s2);
 161 static void pe(int lineno);
 162 static void checkknown(char *mac);
 163 static void addcmd(char *line);
 164 static void addmac(char *mac);
 165 static int binsrch(char *mac);
 166 
 167 static void
 168 growstk()
 169 {
 170         stktop++;
 171         if (stktop >= maxstk) {
 172                 maxstk *= 2;
 173                 stk = (struct stkstr *)realloc(stk,
 174                     sizeof (struct stkstr) * maxstk);
 175         }
 176 }
 177 
 178 int
 179 main(argc, argv)
 180 int argc;
 181 char **argv;
 182 {
 183         FILE *f;
 184         int i;
 185         char *cp;
 186         char b1[4];
 187 
 188         (void) setlocale(LC_ALL, "");
 189 #if !defined(TEXT_DOMAIN)
 190 #define TEXT_DOMAIN     "SYS_TEST"
 191 #endif
 192         (void) textdomain(TEXT_DOMAIN);
 193         stk = (struct stkstr *)calloc(sizeof (struct stkstr), 100);
 194         maxstk = 100;
 195         /* Figure out how many known commands there are */
 196         while (knowncmds[ncmds])
 197                 ncmds++;
 198         while (argc > 1 && argv[1][0] == '-') {
 199                 switch (argv[1][1]) {
 200 
 201                 /* -a: add pairs of macros */
 202                 case 'a':
 203                         i = strlen(argv[1]) - 2;
 204                         if (i % 6 != 0)
 205                                 usage();
 206                         /* look for empty macro slots */
 207                         for (i = 0; br[i].opbr; i++)
 208                                 ;
 209                         for (cp = argv[1]+3; cp[-1]; cp += 6) {
 210                                 br[i].opbr = malloc(3);
 211                                 (void) strncpy(br[i].opbr, cp, 2);
 212                                 br[i].clbr = malloc(3);
 213                                 (void) strncpy(br[i].clbr, cp+3, 2);
 214                                 /* knows pairs are also known cmds */
 215                                 addmac(br[i].opbr);
 216                                 addmac(br[i].clbr);
 217                                 i++;
 218                         }
 219                         break;
 220 
 221                 /* -c: add known commands */
 222                 case 'c':
 223                         i = strlen(argv[1]) - 2;
 224                         if (i % 3 != 0)
 225                                 usage();
 226                         for (cp = argv[1]+3; cp[-1]; cp += 3) {
 227                                 if (cp[2] && cp[2] != '.')
 228                                         usage();
 229                                 (void) strncpy(b1, cp, 2);
 230                                 addmac(b1);
 231                         }
 232                         break;
 233 
 234                 /* -f: ignore font changes */
 235                 case 'f':
 236                         fflag = 1;
 237                         break;
 238 
 239                 /* -s: ignore size changes */
 240                 case 's':
 241                         sflag = 1;
 242                         break;
 243                 default:
 244                         usage();
 245                 }
 246                 argc--; argv++;
 247         }
 248 
 249         nfiles = argc - 1;
 250 
 251         if (nfiles > 0) {
 252                 for (i = 1; i < argc; i++) {
 253                         cfilename = argv[i];
 254                         f = fopen(cfilename, "r");
 255                         if (f == NULL) {
 256                                 perror(cfilename);
 257                                 exit(1);
 258                                 }
 259                         else
 260                                 process(f);
 261                 }
 262         } else {
 263                 cfilename = "stdin";
 264                 process(stdin);
 265         }
 266         return (0);
 267 }
 268 
 269 static void
 270 usage()
 271 {
 272         (void) printf(gettext("Usage: \
 273 checknr [ -fs ] [ -a.xx.yy.xx.yy...] [-c.xx.xx.xx...] [ filename .. ]\n"));
 274         exit(1);
 275 }
 276 
 277 static void
 278 process(FILE *f)
 279 {
 280         int i, n;
 281         char mac[5];    /* The current macro or nroff command */
 282         int pl;
 283 
 284         stktop = -1;
 285         for (lineno = 1; fgets(line, sizeof (line), f); lineno++) {
 286                 if (line[0] == '.') {
 287                         /*
 288                          * find and isolate the macro/command name.
 289                          */
 290                         (void) strncpy(mac, line+1, 4);
 291                         if (isspace(mac[0])) {
 292                                 pe(lineno);
 293                                 (void) printf(gettext("Empty command\n"));
 294                         } else if (isspace(mac[1])) {
 295                                 mac[1] = 0;
 296                         } else if (isspace(mac[2])) {
 297                                 mac[2] = 0;
 298                         } else if (mac[0] != '\\' || mac[1] != '\"') {
 299                                 pe(lineno);
 300                                 (void) printf(gettext("Command too long\n"));
 301                         }
 302 
 303                         /*
 304                          * Is it a known command?
 305                          */
 306                         checkknown(mac);
 307 
 308                         /*
 309                          * Should we add it?
 310                          */
 311                         if (eq(mac, "de"))
 312                                 addcmd(line);
 313 
 314                         chkcmd(line, mac);
 315                 }
 316 
 317                 /*
 318                  * At this point we process the line looking
 319                  * for \s and \f.
 320                  */
 321                 for (i = 0; line[i]; i++)
 322                         if (line[i] == '\\' && (i == 0 || line[i-1] != '\\')) {
 323                                 if (!sflag && line[++i] == 's') {
 324                                         pl = line[++i];
 325                                         if (isdigit(pl)) {
 326                                                 n = pl - '0';
 327                                                 pl = ' ';
 328                                         } else
 329                                                 n = 0;
 330                                         while (isdigit(line[++i]))
 331                                                 n = 10 * n + line[i] - '0';
 332                                         i--;
 333                                         if (n == 0) {
 334                                                 if (stk[stktop].opno == SZ) {
 335                                                         stktop--;
 336                                                 } else {
 337                                                         pe(lineno);
 338                                                         (void) printf(
 339                                                 gettext("unmatched \\s0\n"));
 340                                                 }
 341                                         } else {
 342                                                 growstk();
 343                                                 stk[stktop].opno = SZ;
 344                                                 stk[stktop].pl = pl;
 345                                                 stk[stktop].parm = n;
 346                                                 stk[stktop].lno = lineno;
 347                                         }
 348                                 } else if (!fflag && line[i] == 'f') {
 349                                         n = line[++i];
 350                                         if (n == 'P') {
 351                                                 if (stk[stktop].opno == FT) {
 352                                                         stktop--;
 353                                                 } else {
 354                                                         pe(lineno);
 355                                                         (void) printf(
 356                                                 gettext("unmatched \\fP\n"));
 357                                                 }
 358                                         } else {
 359                                                 growstk();
 360                                                 stk[stktop].opno = FT;
 361                                                 stk[stktop].pl = 1;
 362                                                 stk[stktop].parm = n;
 363                                                 stk[stktop].lno = lineno;
 364                                         }
 365                                 }
 366                         }
 367         }
 368         /*
 369          * We've hit the end and look at all this stuff that hasn't been
 370          * matched yet!  Complain, complain.
 371          */
 372         for (i = stktop; i >= 0; i--) {
 373                 complain(i);
 374         }
 375 }
 376 
 377 static void
 378 complain(int i)
 379 {
 380         pe(stk[i].lno);
 381         (void) printf(gettext("Unmatched "));
 382         prop(i);
 383         (void) printf("\n");
 384 }
 385 
 386 static void
 387 prop(int i)
 388 {
 389         if (stk[i].pl == 0)
 390                 (void) printf(".%s", br[stk[i].opno].opbr);
 391         else switch (stk[i].opno) {
 392         case SZ:
 393                 (void) printf("\\s%c%d", stk[i].pl, stk[i].parm);
 394                 break;
 395         case FT:
 396                 (void) printf("\\f%c", stk[i].parm);
 397                 break;
 398         default:
 399                 (void) printf(gettext("Bug: stk[%d].opno = %d = .%s, .%s"),
 400                         i, stk[i].opno, br[stk[i].opno].opbr,
 401                         br[stk[i].opno].clbr);
 402         }
 403 }
 404 
 405 /* ARGSUSED */
 406 static void
 407 chkcmd(char *line, char *mac)
 408 {
 409         int i;
 410 
 411         /*
 412          * Check to see if it matches top of stack.
 413          */
 414         if (stktop >= 0 && eq(mac, br[stk[stktop].opno].clbr))
 415                 stktop--;       /* OK. Pop & forget */
 416         else {
 417                 /* No. Maybe it's an opener */
 418                 for (i = 0; br[i].opbr; i++) {
 419                         if (eq(mac, br[i].opbr)) {
 420                                 /* Found. Push it. */
 421                                 growstk();
 422                                 stk[stktop].opno = i;
 423                                 stk[stktop].pl = 0;
 424                                 stk[stktop].parm = 0;
 425                                 stk[stktop].lno = lineno;
 426                                 break;
 427                         }
 428                         /*
 429                          * Maybe it's an unmatched closer.
 430                          * NOTE: this depends on the fact
 431                          * that none of the closers can be
 432                          * openers too.
 433                          */
 434                         if (eq(mac, br[i].clbr)) {
 435                                 nomatch(mac);
 436                                 break;
 437                         }
 438                 }
 439         }
 440 }
 441 
 442 static void
 443 nomatch(char *mac)
 444 {
 445         int i, j;
 446 
 447         /*
 448          * Look for a match further down on stack
 449          * If we find one, it suggests that the stuff in
 450          * between is supposed to match itself.
 451          */
 452         for (j = stktop; j >= 0; j--)
 453                 if (eq(mac, br[stk[j].opno].clbr)) {
 454                         /* Found.  Make a good diagnostic. */
 455                         if (j == stktop-2) {
 456                                 /*
 457                                  * Check for special case \fx..\fR and don't
 458                                  * complain.
 459                                  */
 460                                 if (stk[j+1].opno == FT &&
 461                                     stk[j+1].parm != 'R' &&
 462                                     stk[j+2].opno == FT &&
 463                                     stk[j+2].parm == 'R') {
 464                                         stktop = j -1;
 465                                         return;
 466                                 }
 467                                 /*
 468                                  * We have two unmatched frobs.  Chances are
 469                                  * they were intended to match, so we mention
 470                                  * them together.
 471                                  */
 472                                 pe(stk[j+1].lno);
 473                                 prop(j+1);
 474                                 (void) printf(gettext(" does not match %d: "),
 475                                         stk[j+2].lno);
 476                                 prop(j+2);
 477                                 (void) printf("\n");
 478                         } else for (i = j+1; i <= stktop; i++) {
 479                                 complain(i);
 480                         }
 481                         stktop = j-1;
 482                         return;
 483                 }
 484         /* Didn't find one.  Throw this away. */
 485         pe(lineno);
 486         (void) printf(gettext("Unmatched .%s\n"), mac);
 487 }
 488 
 489 /* eq: are two strings equal? */
 490 static int
 491 eq(char *s1, char *s2)
 492 {
 493         return (strcmp(s1, s2) == 0);
 494 }
 495 
 496 /* print the first part of an error message, given the line number */
 497 static void
 498 pe(int lineno)
 499 {
 500         if (nfiles > 1)
 501                 (void) printf("%s: ", cfilename);
 502         (void) printf("%d: ", lineno);
 503 }
 504 
 505 static void
 506 checkknown(char *mac)
 507 {
 508 
 509         if (eq(mac, "."))
 510                 return;
 511         if (binsrch(mac) >= 0)
 512                 return;
 513         if (mac[0] == '\\' && mac[1] == '"')    /* comments */
 514                 return;
 515 
 516         pe(lineno);
 517         (void) printf(gettext("Unknown command: .%s\n"), mac);
 518 }
 519 
 520 /*
 521  * We have a .de xx line in "line".  Add xx to the list of known commands.
 522  */
 523 static void
 524 addcmd(char *line)
 525 {
 526         char *mac;
 527 
 528         /* grab the macro being defined */
 529         mac = line+4;
 530         while (isspace(*mac))
 531                 mac++;
 532         if (*mac == 0) {
 533                 pe(lineno);
 534                 (void) printf(gettext("illegal define: %s\n"), line);
 535                 return;
 536         }
 537         mac[2] = 0;
 538         if (isspace(mac[1]) || mac[1] == '\\')
 539                 mac[1] = 0;
 540         if (ncmds >= MAXCMDS) {
 541                 (void) printf(gettext("Only %d known commands allowed\n"),
 542                     MAXCMDS);
 543                 exit(1);
 544         }
 545         addmac(mac);
 546 }
 547 
 548 /*
 549  * Add mac to the list.  We should really have some kind of tree
 550  * structure here but this is a quick-and-dirty job and I just don't
 551  * have time to mess with it.  (I wonder if this will come back to haunt
 552  * me someday?)  Anyway, I claim that .de is fairly rare in user
 553  * nroff programs, and the loop below is pretty fast.
 554  */
 555 static void
 556 addmac(char *mac)
 557 {
 558         char **src, **dest, **loc;
 559 
 560         if (binsrch(mac) >= 0) {     /* it's OK to redefine something */
 561 #ifdef DEBUG
 562                 (void) printf("binsrch(%s) -> already in table\n", mac);
 563 #endif
 564                 return;
 565         }
 566         /* binsrch sets slot as a side effect */
 567 #ifdef DEBUG
 568 printf("binsrch(%s) -> %d\n", mac, slot);
 569 #endif
 570         loc = &knowncmds[slot];
 571         src = &knowncmds[ncmds-1];
 572         dest = src+1;
 573         while (dest > loc)
 574                 *dest-- = *src--;
 575         *loc = malloc(3);
 576         (void) strcpy(*loc, mac);
 577         ncmds++;
 578 #ifdef DEBUG
 579         (void) printf("after: %s %s %s %s %s, %d cmds\n",
 580             knowncmds[slot-2], knowncmds[slot-1], knowncmds[slot],
 581             knowncmds[slot+1], knowncmds[slot+2], ncmds);
 582 #endif
 583 }
 584 
 585 /*
 586  * Do a binary search in knowncmds for mac.
 587  * If found, return the index.  If not, return -1.
 588  */
 589 static int
 590 binsrch(char *mac)
 591 {
 592         char *p;        /* pointer to current cmd in list */
 593         int d;          /* difference if any */
 594         int mid;        /* mid point in binary search */
 595         int top, bot;   /* boundaries of bin search, inclusive */
 596 
 597         top = ncmds-1;
 598         bot = 0;
 599         while (top >= bot) {
 600                 mid = (top+bot)/2;
 601                 p = knowncmds[mid];
 602                 d = p[0] - mac[0];
 603                 if (d == 0)
 604                         d = p[1] - mac[1];
 605                 if (d == 0)
 606                         return (mid);
 607                 if (d < 0)
 608                         bot = mid + 1;
 609                 else
 610                         top = mid - 1;
 611         }
 612         slot = bot;     /* place it would have gone */
 613         return (-1);
 614 }