1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  *
  21  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  22  * Use is subject to license terms.
  23  */
  24 
  25 /*
  26  * Finds all unreferenced files in a source tree that do not match a list of
  27  * permitted pathnames.
  28  */
  29 
  30 #include <ctype.h>
  31 #include <errno.h>
  32 #include <fnmatch.h>
  33 #include <ftw.h>
  34 #include <stdarg.h>
  35 #include <stdio.h>
  36 #include <stdlib.h>
  37 #include <string.h>
  38 #include <time.h>
  39 #include <unistd.h>
  40 #include <sys/param.h>
  41 #include <sys/stat.h>
  42 #include <sys/types.h>
  43 
  44 /*
  45  * Pathname set: a simple datatype for storing pathname pattern globs and
  46  * for checking whether a given pathname is matched by a pattern glob in
  47  * the set.
  48  */
  49 typedef struct {
  50         char            **paths;
  51         unsigned int    npath;
  52         unsigned int    maxpaths;
  53 } pnset_t;
  54 
  55 /*
  56  * Data associated with the current Mercurial manifest.
  57  */
  58 typedef struct hgdata {
  59         pnset_t         *manifest;
  60         char            hgpath[MAXPATHLEN];
  61         char            root[MAXPATHLEN];
  62         unsigned int    rootlen;
  63         boolean_t       rootwarn;
  64 } hgdata_t;
  65 
  66 /*
  67  * Hooks used to check if a given unreferenced file is known to an SCM
  68  * (currently Mercurial and TeamWare).
  69  */
  70 typedef int checkscm_func_t(const char *, const struct FTW *);
  71 typedef void chdirscm_func_t(const char *);
  72 
  73 typedef struct {
  74         const char      *name;
  75         checkscm_func_t *checkfunc;
  76         chdirscm_func_t *chdirfunc;
  77 } scm_t;
  78 
  79 static checkscm_func_t check_tw, check_hg;
  80 static chdirscm_func_t chdir_hg;
  81 static int      pnset_add(pnset_t *, const char *);
  82 static int      pnset_check(const pnset_t *, const char *);
  83 static void     pnset_empty(pnset_t *);
  84 static void     pnset_free(pnset_t *);
  85 static int      checkpath(const char *, const struct stat *, int, struct FTW *);
  86 static pnset_t  *make_exset(const char *);
  87 static void     warn(const char *, ...);
  88 static void     die(const char *, ...);
  89 
  90 static const scm_t scms[] = {
  91         { "tw",         check_tw,       NULL            },
  92         { "teamware",   check_tw,       NULL            },
  93         { "hg",         check_hg,       chdir_hg        },
  94         { "mercurial",  check_hg,       chdir_hg        },
  95         { NULL,         NULL,           NULL            }
  96 };
  97 
  98 static const scm_t      *scm;
  99 static hgdata_t         hgdata;
 100 static time_t           tstamp;         /* timestamp to compare files to */
 101 static pnset_t          *exsetp;        /* pathname globs to ignore */
 102 static const char       *progname;
 103 
 104 int
 105 main(int argc, char *argv[])
 106 {
 107         int c;
 108         char path[MAXPATHLEN];
 109         char subtree[MAXPATHLEN] = "./";
 110         char *tstampfile = ".build.tstamp";
 111         struct stat tsstat;
 112 
 113         progname = strrchr(argv[0], '/');
 114         if (progname == NULL)
 115                 progname = argv[0];
 116         else
 117                 progname++;
 118 
 119         while ((c = getopt(argc, argv, "as:t:S:")) != EOF) {
 120                 switch (c) {
 121                 case 'a':
 122                         /* for compatibility; now the default */
 123                         break;
 124 
 125                 case 's':
 126                         (void) strlcat(subtree, optarg, MAXPATHLEN);
 127                         break;
 128 
 129                 case 't':
 130                         tstampfile = optarg;
 131                         break;
 132 
 133                 case 'S':
 134                         for (scm = scms; scm->name != NULL; scm++) {
 135                                 if (strcmp(scm->name, optarg) == 0)
 136                                         break;
 137                         }
 138                         if (scm->name == NULL)
 139                                 die("unsupported SCM `%s'\n", optarg);
 140                         break;
 141 
 142                 default:
 143                 case '?':
 144                         goto usage;
 145                 }
 146         }
 147 
 148         argc -= optind;
 149         argv += optind;
 150 
 151         if (argc != 2) {
 152 usage:          (void) fprintf(stderr, "usage: %s [-s <subtree>] "
 153                     "[-t <tstampfile>] [-S hg|tw] <srcroot> <exceptfile>\n",
 154                     progname);
 155                 return (EXIT_FAILURE);
 156         }
 157 
 158         /*
 159          * Interpret a relative timestamp path as relative to srcroot.
 160          */
 161         if (tstampfile[0] == '/')
 162                 (void) strlcpy(path, tstampfile, MAXPATHLEN);
 163         else
 164                 (void) snprintf(path, MAXPATHLEN, "%s/%s", argv[0], tstampfile);
 165 
 166         if (stat(path, &tsstat) == -1)
 167                 die("cannot stat timestamp file \"%s\"", path);
 168         tstamp = tsstat.st_mtime;
 169 
 170         /*
 171          * Create the exception pathname set.
 172          */
 173         exsetp = make_exset(argv[1]);
 174         if (exsetp == NULL)
 175                 die("cannot make exception pathname set\n");
 176 
 177         /*
 178          * Walk the specified subtree of the tree rooted at argv[0].
 179          */
 180         if (chdir(argv[0]) == -1)
 181                 die("cannot change directory to \"%s\"", argv[0]);
 182 
 183         if (nftw(subtree, checkpath, 100, FTW_PHYS) != 0)
 184                 die("cannot walk tree rooted at \"%s\"\n", argv[0]);
 185 
 186         pnset_empty(exsetp);
 187         return (EXIT_SUCCESS);
 188 }
 189 
 190 /*
 191  * Load and return a pnset for the manifest for the Mercurial repo at `hgroot'.
 192  */
 193 static pnset_t *
 194 load_manifest(const char *hgroot)
 195 {
 196         FILE    *fp = NULL;
 197         char    *hgcmd = NULL;
 198         char    *newline;
 199         pnset_t *pnsetp;
 200         char    path[MAXPATHLEN];
 201 
 202         pnsetp = calloc(sizeof (pnset_t), 1);
 203         if (pnsetp == NULL ||
 204             asprintf(&hgcmd, "/usr/bin/hg manifest -R %s", hgroot) == -1)
 205                 goto fail;
 206 
 207         fp = popen(hgcmd, "r");
 208         if (fp == NULL)
 209                 goto fail;
 210 
 211         while (fgets(path, sizeof (path), fp) != NULL) {
 212                 newline = strrchr(path, '\n');
 213                 if (newline != NULL)
 214                         *newline = '\0';
 215 
 216                 if (pnset_add(pnsetp, path) == 0)
 217                         goto fail;
 218         }
 219 
 220         (void) pclose(fp);
 221         free(hgcmd);
 222         return (pnsetp);
 223 fail:
 224         warn("cannot load hg manifest at %s", hgroot);
 225         if (fp != NULL)
 226                 (void) pclose(fp);
 227         free(hgcmd);
 228         pnset_free(pnsetp);
 229         return (NULL);
 230 }
 231 
 232 /*
 233  * If necessary, change our active manifest to be appropriate for `path'.
 234  */
 235 static void
 236 chdir_hg(const char *path)
 237 {
 238         char hgpath[MAXPATHLEN];
 239         char basepath[MAXPATHLEN];
 240         char *slash;
 241 
 242         (void) snprintf(hgpath, MAXPATHLEN, "%s/.hg", path);
 243 
 244         /*
 245          * Change our active manifest if any one of the following is true:
 246          *
 247          *   1. No manifest is loaded.  Find the nearest hgroot to load from.
 248          *
 249          *   2. A manifest is loaded, but we've moved into a directory with
 250          *      its own hgroot (e.g., usr/closed).  Load from its hgroot.
 251          *
 252          *   3. A manifest is loaded, but no longer applies (e.g., the manifest
 253          *      under usr/closed is loaded, but we've moved to usr/src).
 254          */
 255         if (hgdata.manifest == NULL ||
 256             strcmp(hgpath, hgdata.hgpath) != 0 && access(hgpath, X_OK) == 0 ||
 257             strncmp(path, hgdata.root, hgdata.rootlen - 1) != 0) {
 258                 pnset_free(hgdata.manifest);
 259                 hgdata.manifest = NULL;
 260 
 261                 (void) strlcpy(basepath, path, MAXPATHLEN);
 262 
 263                 /*
 264                  * Walk up the directory tree looking for .hg subdirectories.
 265                  */
 266                 while (access(hgpath, X_OK) == -1) {
 267                         slash = strrchr(basepath, '/');
 268                         if (slash == NULL) {
 269                                 if (!hgdata.rootwarn) {
 270                                         warn("no hg root for \"%s\"\n", path);
 271                                         hgdata.rootwarn = B_TRUE;
 272                                 }
 273                                 return;
 274                         }
 275                         *slash = '\0';
 276                         (void) snprintf(hgpath, MAXPATHLEN, "%s/.hg", basepath);
 277                 }
 278 
 279                 /*
 280                  * We found a directory with an .hg subdirectory; record it
 281                  * and load its manifest.
 282                  */
 283                 (void) strlcpy(hgdata.hgpath, hgpath, MAXPATHLEN);
 284                 (void) strlcpy(hgdata.root, basepath, MAXPATHLEN);
 285                 hgdata.manifest = load_manifest(hgdata.root);
 286 
 287                 /*
 288                  * The logic in check_hg() depends on hgdata.root having a
 289                  * single trailing slash, so only add it if it's missing.
 290                  */
 291                 if (hgdata.root[strlen(hgdata.root) - 1] != '/')
 292                         (void) strlcat(hgdata.root, "/", MAXPATHLEN);
 293                 hgdata.rootlen = strlen(hgdata.root);
 294         }
 295 }
 296 
 297 /*
 298  * Check if a file is under Mercurial control by checking against the manifest.
 299  */
 300 /* ARGSUSED */
 301 static int
 302 check_hg(const char *path, const struct FTW *ftwp)
 303 {
 304         /*
 305          * The manifest paths are relative to the manifest root; skip past it.
 306          */
 307         path += hgdata.rootlen;
 308 
 309         return (hgdata.manifest != NULL && pnset_check(hgdata.manifest, path));
 310 }
 311 
 312 /*
 313  * Check if a file is under TeamWare control by checking for its corresponding
 314  * SCCS "s-dot" file.
 315  */
 316 static int
 317 check_tw(const char *path, const struct FTW *ftwp)
 318 {
 319         char sccspath[MAXPATHLEN];
 320 
 321         (void) snprintf(sccspath, MAXPATHLEN, "%.*s/SCCS/s.%s", ftwp->base,
 322             path, path + ftwp->base);
 323 
 324         return (access(sccspath, F_OK) == 0);
 325 }
 326 
 327 /*
 328  * Using `exceptfile' and a built-in list of exceptions, build and return a
 329  * pnset_t consisting of all of the pathnames globs which are allowed to be
 330  * unreferenced in the source tree.
 331  */
 332 static pnset_t *
 333 make_exset(const char *exceptfile)
 334 {
 335         FILE            *fp;
 336         char            line[MAXPATHLEN];
 337         char            *newline;
 338         pnset_t         *pnsetp;
 339         unsigned int    i;
 340 
 341         pnsetp = calloc(sizeof (pnset_t), 1);
 342         if (pnsetp == NULL)
 343                 return (NULL);
 344 
 345         /*
 346          * Add any exceptions from the file.
 347          */
 348         fp = fopen(exceptfile, "r");
 349         if (fp == NULL) {
 350                 warn("cannot open exception file \"%s\"", exceptfile);
 351                 goto fail;
 352         }
 353 
 354         while (fgets(line, sizeof (line), fp) != NULL) {
 355                 newline = strrchr(line, '\n');
 356                 if (newline != NULL)
 357                         *newline = '\0';
 358 
 359                 for (i = 0; isspace(line[i]); i++)
 360                         ;
 361 
 362                 if (line[i] == '#' || line[i] == '\0')
 363                         continue;
 364 
 365                 if (pnset_add(pnsetp, line) == 0) {
 366                         (void) fclose(fp);
 367                         goto fail;
 368                 }
 369         }
 370 
 371         (void) fclose(fp);
 372         return (pnsetp);
 373 fail:
 374         pnset_free(pnsetp);
 375         return (NULL);
 376 }
 377 
 378 /*
 379  * FTW callback: print `path' if it's older than `tstamp' and not in `exsetp'.
 380  */
 381 static int
 382 checkpath(const char *path, const struct stat *statp, int type,
 383     struct FTW *ftwp)
 384 {
 385         switch (type) {
 386         case FTW_F:
 387                 /*
 388                  * Skip if the file is referenced or in the exception list.
 389                  */
 390                 if (statp->st_atime >= tstamp || pnset_check(exsetp, path))
 391                         return (0);
 392 
 393                 /*
 394                  * If requested, restrict ourselves to unreferenced files
 395                  * under SCM control.
 396                  */
 397                 if (scm == NULL || scm->checkfunc(path, ftwp))
 398                         (void) puts(path);
 399                 return (0);
 400 
 401         case FTW_D:
 402                 /*
 403                  * Prune any directories in the exception list.
 404                  */
 405                 if (pnset_check(exsetp, path)) {
 406                         ftwp->quit = FTW_PRUNE;
 407                         return (0);
 408                 }
 409 
 410                 /*
 411                  * If necessary, advise the SCM logic of our new directory.
 412                  */
 413                 if (scm != NULL && scm->chdirfunc != NULL)
 414                         scm->chdirfunc(path);
 415 
 416                 return (0);
 417 
 418         case FTW_DNR:
 419                 warn("cannot read \"%s\"", path);
 420                 return (0);
 421 
 422         case FTW_NS:
 423                 warn("cannot stat \"%s\"", path);
 424                 return (0);
 425 
 426         default:
 427                 break;
 428         }
 429 
 430         return (0);
 431 }
 432 
 433 /*
 434  * Add `path' to the pnset_t pointed to by `pnsetp'.
 435  */
 436 static int
 437 pnset_add(pnset_t *pnsetp, const char *path)
 438 {
 439         char **newpaths;
 440         unsigned int maxpaths;
 441 
 442         if (pnsetp->npath == pnsetp->maxpaths) {
 443                 maxpaths = (pnsetp->maxpaths == 0) ? 512 : pnsetp->maxpaths * 2;
 444                 newpaths = realloc(pnsetp->paths, sizeof (char *) * maxpaths);
 445                 if (newpaths == NULL)
 446                         return (0);
 447                 pnsetp->paths = newpaths;
 448                 pnsetp->maxpaths = maxpaths;
 449         }
 450 
 451         pnsetp->paths[pnsetp->npath] = strdup(path);
 452         if (pnsetp->paths[pnsetp->npath] == NULL)
 453                 return (0);
 454 
 455         pnsetp->npath++;
 456         return (1);
 457 }
 458 
 459 /*
 460  * Check `path' against the pnset_t pointed to by `pnsetp'.
 461  */
 462 static int
 463 pnset_check(const pnset_t *pnsetp, const char *path)
 464 {
 465         unsigned int i;
 466 
 467         for (i = 0; i < pnsetp->npath; i++) {
 468                 if (fnmatch(pnsetp->paths[i], path, 0) == 0)
 469                         return (1);
 470         }
 471         return (0);
 472 }
 473 
 474 /*
 475  * Empty the pnset_t pointed to by `pnsetp'.
 476  */
 477 static void
 478 pnset_empty(pnset_t *pnsetp)
 479 {
 480         while (pnsetp->npath-- != 0)
 481                 free(pnsetp->paths[pnsetp->npath]);
 482 
 483         free(pnsetp->paths);
 484         pnsetp->maxpaths = 0;
 485 }
 486 
 487 /*
 488  * Free the pnset_t pointed to by `pnsetp'.
 489  */
 490 static void
 491 pnset_free(pnset_t *pnsetp)
 492 {
 493         if (pnsetp != NULL) {
 494                 pnset_empty(pnsetp);
 495                 free(pnsetp);
 496         }
 497 }
 498 
 499 /* PRINTFLIKE1 */
 500 static void
 501 warn(const char *format, ...)
 502 {
 503         va_list alist;
 504         char *errstr = strerror(errno);
 505 
 506         if (errstr == NULL)
 507                 errstr = "<unknown error>";
 508 
 509         (void) fprintf(stderr, "%s: ", progname);
 510 
 511         va_start(alist, format);
 512         (void) vfprintf(stderr, format, alist);
 513         va_end(alist);
 514 
 515         if (strrchr(format, '\n') == NULL)
 516                 (void) fprintf(stderr, ": %s\n", errstr);
 517 }
 518 
 519 /* PRINTFLIKE1 */
 520 static void
 521 die(const char *format, ...)
 522 {
 523         va_list alist;
 524         char *errstr = strerror(errno);
 525 
 526         if (errstr == NULL)
 527                 errstr = "<unknown error>";
 528 
 529         (void) fprintf(stderr, "%s: fatal: ", progname);
 530 
 531         va_start(alist, format);
 532         (void) vfprintf(stderr, format, alist);
 533         va_end(alist);
 534 
 535         if (strrchr(format, '\n') == NULL)
 536                 (void) fprintf(stderr, ": %s\n", errstr);
 537 
 538         exit(EXIT_FAILURE);
 539 }