1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  *
  21  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  22  * Use is subject to license terms.
  23  */
  24 
  25 /*
  26  * Finds all unreferenced files in a source tree that do not match a list of
  27  * permitted pathnames.
  28  */
  29 
  30 #include <ctype.h>
  31 #include <errno.h>
  32 #include <fnmatch.h>
  33 #include <ftw.h>
  34 #include <stdarg.h>
  35 #include <stdio.h>
  36 #include <stdlib.h>
  37 #include <string.h>
  38 #include <time.h>
  39 #include <unistd.h>
  40 #include <sys/param.h>
  41 #include <sys/stat.h>
  42 #include <sys/types.h>
  43 
  44 /*
  45  * Pathname set: a simple datatype for storing pathname pattern globs and
  46  * for checking whether a given pathname is matched by a pattern glob in
  47  * the set.
  48  */
  49 typedef struct {
  50         char            **paths;
  51         unsigned int    npath;
  52         unsigned int    maxpaths;
  53 } pnset_t;
  54 
  55 /*
  56  * Data associated with the current SCM manifest.
  57  */
  58 typedef struct scmdata {
  59         pnset_t         *manifest;
  60         char            metapath[MAXPATHLEN];
  61         char            root[MAXPATHLEN];
  62         unsigned int    rootlen;
  63         boolean_t       rootwarn;
  64 } scmdata_t;
  65 
  66 /*
  67  * Hooks used to check if a given unreferenced file is known to an SCM
  68  * (currently Git, Mercurial and TeamWare).
  69  */
  70 typedef int checkscm_func_t(const char *, const struct FTW *);
  71 typedef void chdirscm_func_t(const char *);
  72 
  73 typedef struct {
  74         const char      *name;
  75         checkscm_func_t *checkfunc;
  76         chdirscm_func_t *chdirfunc;
  77 } scm_t;
  78 
  79 static checkscm_func_t check_tw, check_scmdata;
  80 static chdirscm_func_t chdir_hg, chdir_git;
  81 static int      pnset_add(pnset_t *, const char *);
  82 static int      pnset_check(const pnset_t *, const char *);
  83 static void     pnset_empty(pnset_t *);
  84 static void     pnset_free(pnset_t *);
  85 static int      checkpath(const char *, const struct stat *, int, struct FTW *);
  86 static pnset_t  *make_exset(const char *);
  87 static void     warn(const char *, ...);
  88 static void     die(const char *, ...);
  89 
  90 static const scm_t scms[] = {
  91         { "tw",         check_tw,       NULL            },
  92         { "teamware",   check_tw,       NULL            },
  93         { "hg",         check_scmdata,  chdir_hg        },
  94         { "mercurial",  check_scmdata,  chdir_hg        },
  95         { "git",        check_scmdata,  chdir_git       },
  96         { NULL,         NULL,           NULL            }
  97 };
  98 
  99 static const scm_t      *scm;
 100 static scmdata_t        scmdata;
 101 static time_t           tstamp;         /* timestamp to compare files to */
 102 static pnset_t          *exsetp;        /* pathname globs to ignore */
 103 static const char       *progname;
 104 
 105 int
 106 main(int argc, char *argv[])
 107 {
 108         int c;
 109         char path[MAXPATHLEN];
 110         char subtree[MAXPATHLEN] = "./";
 111         char *tstampfile = ".build.tstamp";
 112         struct stat tsstat;
 113 
 114         progname = strrchr(argv[0], '/');
 115         if (progname == NULL)
 116                 progname = argv[0];
 117         else
 118                 progname++;
 119 
 120         while ((c = getopt(argc, argv, "as:t:S:")) != EOF) {
 121                 switch (c) {
 122                 case 'a':
 123                         /* for compatibility; now the default */
 124                         break;
 125 
 126                 case 's':
 127                         (void) strlcat(subtree, optarg, MAXPATHLEN);
 128                         break;
 129 
 130                 case 't':
 131                         tstampfile = optarg;
 132                         break;
 133 
 134                 case 'S':
 135                         for (scm = scms; scm->name != NULL; scm++) {
 136                                 if (strcmp(scm->name, optarg) == 0)
 137                                         break;
 138                         }
 139                         if (scm->name == NULL)
 140                                 die("unsupported SCM `%s'\n", optarg);
 141                         break;
 142 
 143                 default:
 144                 case '?':
 145                         goto usage;
 146                 }
 147         }
 148 
 149         argc -= optind;
 150         argv += optind;
 151 
 152         if (argc != 2) {
 153 usage:          (void) fprintf(stderr, "usage: %s [-s <subtree>] "
 154                     "[-t <tstampfile>] [-S hg|tw|git] <srcroot> <exceptfile>\n",
 155                     progname);
 156                 return (EXIT_FAILURE);
 157         }
 158 
 159         /*
 160          * Interpret a relative timestamp path as relative to srcroot.
 161          */
 162         if (tstampfile[0] == '/')
 163                 (void) strlcpy(path, tstampfile, MAXPATHLEN);
 164         else
 165                 (void) snprintf(path, MAXPATHLEN, "%s/%s", argv[0], tstampfile);
 166 
 167         if (stat(path, &tsstat) == -1)
 168                 die("cannot stat timestamp file \"%s\"", path);
 169         tstamp = tsstat.st_mtime;
 170 
 171         /*
 172          * Create the exception pathname set.
 173          */
 174         exsetp = make_exset(argv[1]);
 175         if (exsetp == NULL)
 176                 die("cannot make exception pathname set\n");
 177 
 178         /*
 179          * Walk the specified subtree of the tree rooted at argv[0].
 180          */
 181         if (chdir(argv[0]) == -1)
 182                 die("cannot change directory to \"%s\"", argv[0]);
 183 
 184         if (nftw(subtree, checkpath, 100, FTW_PHYS) != 0)
 185                 die("cannot walk tree rooted at \"%s\"\n", argv[0]);
 186 
 187         pnset_empty(exsetp);
 188         return (EXIT_SUCCESS);
 189 }
 190 
 191 /*
 192  * Load and return a pnset for the manifest for the Mercurial repo at `hgroot'.
 193  */
 194 static pnset_t *
 195 hg_manifest(const char *hgroot)
 196 {
 197         FILE    *fp = NULL;
 198         char    *hgcmd = NULL;
 199         char    *newline;
 200         pnset_t *pnsetp;
 201         char    path[MAXPATHLEN];
 202 
 203         pnsetp = calloc(sizeof (pnset_t), 1);
 204         if (pnsetp == NULL ||
 205             asprintf(&hgcmd, "hg manifest -R %s", hgroot) == -1)
 206                 goto fail;
 207 
 208         fp = popen(hgcmd, "r");
 209         if (fp == NULL)
 210                 goto fail;
 211 
 212         while (fgets(path, sizeof (path), fp) != NULL) {
 213                 newline = strrchr(path, '\n');
 214                 if (newline != NULL)
 215                         *newline = '\0';
 216 
 217                 if (pnset_add(pnsetp, path) == 0)
 218                         goto fail;
 219         }
 220 
 221         (void) pclose(fp);
 222         free(hgcmd);
 223         return (pnsetp);
 224 fail:
 225         warn("cannot load hg manifest at %s", hgroot);
 226         if (fp != NULL)
 227                 (void) pclose(fp);
 228         free(hgcmd);
 229         pnset_free(pnsetp);
 230         return (NULL);
 231 }
 232 
 233 /*
 234  * Load and return a pnset for the manifest for the Git repo at `gitroot'.
 235  */
 236 static pnset_t *
 237 git_manifest(const char *gitroot)
 238 {
 239         FILE    *fp = NULL;
 240         char    *gitcmd = NULL;
 241         char    *newline;
 242         pnset_t *pnsetp;
 243         char    path[MAXPATHLEN];
 244 
 245         pnsetp = calloc(sizeof (pnset_t), 1);
 246         if (pnsetp == NULL ||
 247             asprintf(&gitcmd, "git --git-dir=%s/.git ls-files", gitroot) == -1)
 248                 goto fail;
 249 
 250         fp = popen(gitcmd, "r");
 251         if (fp == NULL)
 252                 goto fail;
 253 
 254         while (fgets(path, sizeof (path), fp) != NULL) {
 255                 newline = strrchr(path, '\n');
 256                 if (newline != NULL)
 257                         *newline = '\0';
 258 
 259                 if (pnset_add(pnsetp, path) == 0)
 260                         goto fail;
 261         }
 262 
 263         (void) pclose(fp);
 264         free(gitcmd);
 265         return (pnsetp);
 266 fail:
 267         warn("cannot load git manifest at %s", gitroot);
 268         if (fp != NULL)
 269                 (void) pclose(fp);
 270         free(gitcmd);
 271         pnset_free(pnsetp);
 272         return (NULL);
 273 }
 274 
 275 /*
 276  * If necessary, change our active manifest to be appropriate for `path'.
 277  */
 278 static void
 279 chdir_scmdata(const char *path, const char *meta,
 280     pnset_t *(*manifest_func)(const char *path))
 281 {
 282         char scmpath[MAXPATHLEN];
 283         char basepath[MAXPATHLEN];
 284         char *slash;
 285 
 286         (void) snprintf(scmpath, MAXPATHLEN, "%s/%s", path, meta);
 287 
 288         /*
 289          * Change our active manifest if any one of the following is true:
 290          *
 291          *   1. No manifest is loaded.  Find the nearest SCM root to load from.
 292          *
 293          *   2. A manifest is loaded, but we've moved into a directory with
 294          *      its own metadata directory (e.g., usr/closed).  Load from its
 295          *      root.
 296          *
 297          *   3. A manifest is loaded, but no longer applies (e.g., the manifest
 298          *      under usr/closed is loaded, but we've moved to usr/src).
 299          */
 300         if (scmdata.manifest == NULL ||
 301             (strcmp(scmpath, scmdata.metapath) != 0 &&
 302             access(scmpath, X_OK) == 0) ||
 303             strncmp(path, scmdata.root, scmdata.rootlen - 1) != 0) {
 304                 pnset_free(scmdata.manifest);
 305                 scmdata.manifest = NULL;
 306 
 307                 (void) strlcpy(basepath, path, MAXPATHLEN);
 308 
 309                 /*
 310                  * Walk up the directory tree looking for metadata
 311                  * subdirectories.
 312                  */
 313                 while (access(scmpath, X_OK) == -1) {
 314                         slash = strrchr(basepath, '/');
 315                         if (slash == NULL) {
 316                                 if (!scmdata.rootwarn) {
 317                                         warn("no metadata directory "
 318                                             "for \"%s\"\n", path);
 319                                         scmdata.rootwarn = B_TRUE;
 320                                 }
 321                                 return;
 322                         }
 323                         *slash = '\0';
 324                         (void) snprintf(scmpath, MAXPATHLEN, "%s/%s", basepath,
 325                             meta);
 326                 }
 327 
 328                 /*
 329                  * We found a directory with an SCM metadata directory; record
 330                  * it and load its manifest.
 331                  */
 332                 (void) strlcpy(scmdata.metapath, scmpath, MAXPATHLEN);
 333                 (void) strlcpy(scmdata.root, basepath, MAXPATHLEN);
 334                 scmdata.manifest = manifest_func(scmdata.root);
 335 
 336                 /*
 337                  * The logic in check_scmdata() depends on scmdata.root having
 338                  * a single trailing slash, so only add it if it's missing.
 339                  */
 340                 if (scmdata.root[strlen(scmdata.root) - 1] != '/')
 341                         (void) strlcat(scmdata.root, "/", MAXPATHLEN);
 342                 scmdata.rootlen = strlen(scmdata.root);
 343         }
 344 }
 345 
 346 static void
 347 chdir_git(const char *path)
 348 {
 349         chdir_scmdata(path, ".git", git_manifest);
 350 }
 351 
 352 /*
 353  * If necessary, change our active manifest to be appropriate for `path'.
 354  */
 355 static void
 356 chdir_hg(const char *path)
 357 {
 358         chdir_scmdata(path, ".hg", hg_manifest);
 359 }
 360 
 361 /* ARGSUSED */
 362 static int
 363 check_scmdata(const char *path, const struct FTW *ftwp)
 364 {
 365         /*
 366          * The manifest paths are relative to the manifest root; skip past it.
 367          */
 368         path += scmdata.rootlen;
 369 
 370         return (scmdata.manifest != NULL && pnset_check(scmdata.manifest,
 371             path));
 372 }
 373 
 374 /*
 375  * Check if a file is under TeamWare control by checking for its corresponding
 376  * SCCS "s-dot" file.
 377  */
 378 static int
 379 check_tw(const char *path, const struct FTW *ftwp)
 380 {
 381         char sccspath[MAXPATHLEN];
 382 
 383         (void) snprintf(sccspath, MAXPATHLEN, "%.*s/SCCS/s.%s", ftwp->base,
 384             path, path + ftwp->base);
 385 
 386         return (access(sccspath, F_OK) == 0);
 387 }
 388 
 389 /*
 390  * Using `exceptfile' and a built-in list of exceptions, build and return a
 391  * pnset_t consisting of all of the pathnames globs which are allowed to be
 392  * unreferenced in the source tree.
 393  */
 394 static pnset_t *
 395 make_exset(const char *exceptfile)
 396 {
 397         FILE            *fp;
 398         char            line[MAXPATHLEN];
 399         char            *newline;
 400         pnset_t         *pnsetp;
 401         unsigned int    i;
 402 
 403         pnsetp = calloc(sizeof (pnset_t), 1);
 404         if (pnsetp == NULL)
 405                 return (NULL);
 406 
 407         /*
 408          * Add any exceptions from the file.
 409          */
 410         fp = fopen(exceptfile, "r");
 411         if (fp == NULL) {
 412                 warn("cannot open exception file \"%s\"", exceptfile);
 413                 goto fail;
 414         }
 415 
 416         while (fgets(line, sizeof (line), fp) != NULL) {
 417                 newline = strrchr(line, '\n');
 418                 if (newline != NULL)
 419                         *newline = '\0';
 420 
 421                 for (i = 0; isspace(line[i]); i++)
 422                         ;
 423 
 424                 if (line[i] == '#' || line[i] == '\0')
 425                         continue;
 426 
 427                 if (pnset_add(pnsetp, line) == 0) {
 428                         (void) fclose(fp);
 429                         goto fail;
 430                 }
 431         }
 432 
 433         (void) fclose(fp);
 434         return (pnsetp);
 435 fail:
 436         pnset_free(pnsetp);
 437         return (NULL);
 438 }
 439 
 440 /*
 441  * FTW callback: print `path' if it's older than `tstamp' and not in `exsetp'.
 442  */
 443 static int
 444 checkpath(const char *path, const struct stat *statp, int type,
 445     struct FTW *ftwp)
 446 {
 447         switch (type) {
 448         case FTW_F:
 449                 /*
 450                  * Skip if the file is referenced or in the exception list.
 451                  */
 452                 if (statp->st_atime >= tstamp || pnset_check(exsetp, path))
 453                         return (0);
 454 
 455                 /*
 456                  * If requested, restrict ourselves to unreferenced files
 457                  * under SCM control.
 458                  */
 459                 if (scm == NULL || scm->checkfunc(path, ftwp))
 460                         (void) puts(path);
 461                 return (0);
 462 
 463         case FTW_D:
 464                 /*
 465                  * Prune any directories in the exception list.
 466                  */
 467                 if (pnset_check(exsetp, path)) {
 468                         ftwp->quit = FTW_PRUNE;
 469                         return (0);
 470                 }
 471 
 472                 /*
 473                  * If necessary, advise the SCM logic of our new directory.
 474                  */
 475                 if (scm != NULL && scm->chdirfunc != NULL)
 476                         scm->chdirfunc(path);
 477 
 478                 return (0);
 479 
 480         case FTW_DNR:
 481                 warn("cannot read \"%s\"", path);
 482                 return (0);
 483 
 484         case FTW_NS:
 485                 warn("cannot stat \"%s\"", path);
 486                 return (0);
 487 
 488         default:
 489                 break;
 490         }
 491 
 492         return (0);
 493 }
 494 
 495 /*
 496  * Add `path' to the pnset_t pointed to by `pnsetp'.
 497  */
 498 static int
 499 pnset_add(pnset_t *pnsetp, const char *path)
 500 {
 501         char **newpaths;
 502         unsigned int maxpaths;
 503 
 504         if (pnsetp->npath == pnsetp->maxpaths) {
 505                 maxpaths = (pnsetp->maxpaths == 0) ? 512 : pnsetp->maxpaths * 2;
 506                 newpaths = realloc(pnsetp->paths, sizeof (char *) * maxpaths);
 507                 if (newpaths == NULL)
 508                         return (0);
 509                 pnsetp->paths = newpaths;
 510                 pnsetp->maxpaths = maxpaths;
 511         }
 512 
 513         pnsetp->paths[pnsetp->npath] = strdup(path);
 514         if (pnsetp->paths[pnsetp->npath] == NULL)
 515                 return (0);
 516 
 517         pnsetp->npath++;
 518         return (1);
 519 }
 520 
 521 /*
 522  * Check `path' against the pnset_t pointed to by `pnsetp'.
 523  */
 524 static int
 525 pnset_check(const pnset_t *pnsetp, const char *path)
 526 {
 527         unsigned int i;
 528 
 529         for (i = 0; i < pnsetp->npath; i++) {
 530                 if (fnmatch(pnsetp->paths[i], path, 0) == 0)
 531                         return (1);
 532         }
 533         return (0);
 534 }
 535 
 536 /*
 537  * Empty the pnset_t pointed to by `pnsetp'.
 538  */
 539 static void
 540 pnset_empty(pnset_t *pnsetp)
 541 {
 542         while (pnsetp->npath-- != 0)
 543                 free(pnsetp->paths[pnsetp->npath]);
 544 
 545         free(pnsetp->paths);
 546         pnsetp->maxpaths = 0;
 547 }
 548 
 549 /*
 550  * Free the pnset_t pointed to by `pnsetp'.
 551  */
 552 static void
 553 pnset_free(pnset_t *pnsetp)
 554 {
 555         if (pnsetp != NULL) {
 556                 pnset_empty(pnsetp);
 557                 free(pnsetp);
 558         }
 559 }
 560 
 561 /* PRINTFLIKE1 */
 562 static void
 563 warn(const char *format, ...)
 564 {
 565         va_list alist;
 566         char *errstr = strerror(errno);
 567 
 568         if (errstr == NULL)
 569                 errstr = "<unknown error>";
 570 
 571         (void) fprintf(stderr, "%s: ", progname);
 572 
 573         va_start(alist, format);
 574         (void) vfprintf(stderr, format, alist);
 575         va_end(alist);
 576 
 577         if (strrchr(format, '\n') == NULL)
 578                 (void) fprintf(stderr, ": %s\n", errstr);
 579 }
 580 
 581 /* PRINTFLIKE1 */
 582 static void
 583 die(const char *format, ...)
 584 {
 585         va_list alist;
 586         char *errstr = strerror(errno);
 587 
 588         if (errstr == NULL)
 589                 errstr = "<unknown error>";
 590 
 591         (void) fprintf(stderr, "%s: fatal: ", progname);
 592 
 593         va_start(alist, format);
 594         (void) vfprintf(stderr, format, alist);
 595         va_end(alist);
 596 
 597         if (strrchr(format, '\n') == NULL)
 598                 (void) fprintf(stderr, ": %s\n", errstr);
 599 
 600         exit(EXIT_FAILURE);
 601 }