1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  *
  21  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  22  * Use is subject to license terms.
  23  */
  24 
  25 /*
  26  * Finds all unreferenced files in a source tree that do not match a list of
  27  * permitted pathnames.
  28  */
  29 
  30 #include <ctype.h>
  31 #include <errno.h>
  32 #include <fnmatch.h>
  33 #include <ftw.h>
  34 #include <stdarg.h>
  35 #include <stdio.h>
  36 #include <stdlib.h>
  37 #include <string.h>
  38 #include <time.h>
  39 #include <unistd.h>
  40 #include <sys/param.h>
  41 #include <sys/stat.h>
  42 #include <sys/types.h>
  43 
  44 /*
  45  * Pathname set: a simple datatype for storing pathname pattern globs and
  46  * for checking whether a given pathname is matched by a pattern glob in
  47  * the set.
  48  */
  49 typedef struct {
  50         char            **paths;
  51         unsigned int    npath;
  52         unsigned int    maxpaths;
  53 } pnset_t;
  54 
  55 /*
  56  * Data associated with the current Mercurial manifest.
  57  */
  58 typedef struct hgdata {
  59         pnset_t         *manifest;
  60         char            hgpath[MAXPATHLEN];
  61         char            root[MAXPATHLEN];
  62         unsigned int    rootlen;
  63         boolean_t       rootwarn;
  64 } hgdata_t;
  65 
  66 /*
  67  * Hooks used to check if a given unreferenced file is known to an SCM
  68  * (currently Mercurial and TeamWare).
  69  */
  70 typedef int checkscm_func_t(const char *, const struct FTW *);
  71 typedef void chdirscm_func_t(const char *);
  72 
  73 typedef struct {
  74         const char      *name;
  75         checkscm_func_t *checkfunc;
  76         chdirscm_func_t *chdirfunc;
  77 } scm_t;
  78 
  79 static checkscm_func_t check_tw, check_hg, check_git;
  80 static chdirscm_func_t chdir_hg, chdir_git;
  81 static int      pnset_add(pnset_t *, const char *);
  82 static int      pnset_check(const pnset_t *, const char *);
  83 static void     pnset_empty(pnset_t *);
  84 static void     pnset_free(pnset_t *);
  85 static int      checkpath(const char *, const struct stat *, int, struct FTW *);
  86 static pnset_t  *make_exset(const char *);
  87 static void     warn(const char *, ...);
  88 static void     die(const char *, ...);
  89 
  90 static const scm_t scms[] = {
  91         { "tw",         check_tw,       NULL            },
  92         { "teamware",   check_tw,       NULL            },
  93         { "hg",         check_hg,       chdir_hg        },
  94         { "mercurial",  check_hg,       chdir_hg        },
  95         { "git",        check_git,      chdir_git       },
  96         { NULL,         NULL,           NULL            }
  97 };
  98 
  99 static const scm_t      *scm;
 100 static hgdata_t         hgdata;
 101 static pnset_t          *gitmanifest = NULL;
 102 static time_t           tstamp;         /* timestamp to compare files to */
 103 static pnset_t          *exsetp;        /* pathname globs to ignore */
 104 static const char       *progname;
 105 
 106 int
 107 main(int argc, char *argv[])
 108 {
 109         int c;
 110         char path[MAXPATHLEN];
 111         char subtree[MAXPATHLEN] = "./";
 112         char *tstampfile = ".build.tstamp";
 113         struct stat tsstat;
 114 
 115         progname = strrchr(argv[0], '/');
 116         if (progname == NULL)
 117                 progname = argv[0];
 118         else
 119                 progname++;
 120 
 121         while ((c = getopt(argc, argv, "as:t:S:")) != EOF) {
 122                 switch (c) {
 123                 case 'a':
 124                         /* for compatibility; now the default */
 125                         break;
 126 
 127                 case 's':
 128                         (void) strlcat(subtree, optarg, MAXPATHLEN);
 129                         break;
 130 
 131                 case 't':
 132                         tstampfile = optarg;
 133                         break;
 134 
 135                 case 'S':
 136                         for (scm = scms; scm->name != NULL; scm++) {
 137                                 if (strcmp(scm->name, optarg) == 0)
 138                                         break;
 139                         }
 140                         if (scm->name == NULL)
 141                                 die("unsupported SCM `%s'\n", optarg);
 142                         break;
 143 
 144                 default:
 145                 case '?':
 146                         goto usage;
 147                 }
 148         }
 149 
 150         argc -= optind;
 151         argv += optind;
 152 
 153         if (argc != 2) {
 154 usage:          (void) fprintf(stderr, "usage: %s [-s <subtree>] "
 155                     "[-t <tstampfile>] [-S hg|tw|git] <srcroot> <exceptfile>\n",
 156                     progname);
 157                 return (EXIT_FAILURE);
 158         }
 159 
 160         /*
 161          * Interpret a relative timestamp path as relative to srcroot.
 162          */
 163         if (tstampfile[0] == '/')
 164                 (void) strlcpy(path, tstampfile, MAXPATHLEN);
 165         else
 166                 (void) snprintf(path, MAXPATHLEN, "%s/%s", argv[0], tstampfile);
 167 
 168         if (stat(path, &tsstat) == -1)
 169                 die("cannot stat timestamp file \"%s\"", path);
 170         tstamp = tsstat.st_mtime;
 171 
 172         /*
 173          * Create the exception pathname set.
 174          */
 175         exsetp = make_exset(argv[1]);
 176         if (exsetp == NULL)
 177                 die("cannot make exception pathname set\n");
 178 
 179         /*
 180          * Walk the specified subtree of the tree rooted at argv[0].
 181          */
 182         if (chdir(argv[0]) == -1)
 183                 die("cannot change directory to \"%s\"", argv[0]);
 184 
 185         if (nftw(subtree, checkpath, 100, FTW_PHYS) != 0)
 186                 die("cannot walk tree rooted at \"%s\"\n", argv[0]);
 187 
 188         pnset_empty(exsetp);
 189         return (EXIT_SUCCESS);
 190 }
 191 
 192 /*
 193  * Load and return a pnset for the manifest for the Mercurial repo at `hgroot'.
 194  */
 195 static pnset_t *
 196 load_manifest(const char *hgroot)
 197 {
 198         FILE    *fp = NULL;
 199         char    *hgcmd = NULL;
 200         char    *newline;
 201         pnset_t *pnsetp;
 202         char    path[MAXPATHLEN];
 203 
 204         pnsetp = calloc(sizeof (pnset_t), 1);
 205         if (pnsetp == NULL ||
 206             asprintf(&hgcmd, "hg manifest -R %s", hgroot) == -1)
 207                 goto fail;
 208 
 209         fp = popen(hgcmd, "r");
 210         if (fp == NULL)
 211                 goto fail;
 212 
 213         while (fgets(path, sizeof (path), fp) != NULL) {
 214                 newline = strrchr(path, '\n');
 215                 if (newline != NULL)
 216                         *newline = '\0';
 217 
 218                 if (pnset_add(pnsetp, path) == 0)
 219                         goto fail;
 220         }
 221 
 222         (void) pclose(fp);
 223         free(hgcmd);
 224         return (pnsetp);
 225 fail:
 226         warn("cannot load hg manifest at %s", hgroot);
 227         if (fp != NULL)
 228                 (void) pclose(fp);
 229         free(hgcmd);
 230         pnset_free(pnsetp);
 231         return (NULL);
 232 }
 233 
 234 static void
 235 chdir_git(const char *path)
 236 {
 237         FILE *fp = NULL;
 238         char *gitcmd = NULL;
 239         char *newline;
 240         char fn[MAXPATHLEN];
 241         pnset_t *pnsetp;
 242 
 243         pnsetp = calloc(sizeof (pnset_t), 1);
 244         if ((pnsetp == NULL) ||
 245             (asprintf(&gitcmd, "git ls-files %s", path) == -1))
 246                 goto fail;
 247 
 248         if ((fp = popen(gitcmd, "r")) == NULL)
 249                 goto fail;
 250 
 251         while (fgets(fn, sizeof (fn), fp) != NULL) {
 252                 if ((newline = strrchr(fn, '\n')) != NULL)
 253                         *newline = '\0';
 254 
 255                 if (pnset_add(pnsetp, fn) == 0)
 256                         goto fail;
 257         }
 258 
 259         (void) pclose(fp);
 260         free(gitcmd);
 261         gitmanifest = pnsetp;
 262         return;
 263 fail:
 264         warn("cannot load git manifest");
 265         if (fp != NULL)
 266                 (void) pclose(fp);
 267         if (pnsetp != NULL)
 268                 free(pnsetp);
 269         if (gitcmd != NULL)
 270                 free(gitcmd);
 271 }
 272 
 273 /*
 274  * If necessary, change our active manifest to be appropriate for `path'.
 275  */
 276 static void
 277 chdir_hg(const char *path)
 278 {
 279         char hgpath[MAXPATHLEN];
 280         char basepath[MAXPATHLEN];
 281         char *slash;
 282 
 283         (void) snprintf(hgpath, MAXPATHLEN, "%s/.hg", path);
 284 
 285         /*
 286          * Change our active manifest if any one of the following is true:
 287          *
 288          *   1. No manifest is loaded.  Find the nearest hgroot to load from.
 289          *
 290          *   2. A manifest is loaded, but we've moved into a directory with
 291          *      its own hgroot (e.g., usr/closed).  Load from its hgroot.
 292          *
 293          *   3. A manifest is loaded, but no longer applies (e.g., the manifest
 294          *      under usr/closed is loaded, but we've moved to usr/src).
 295          */
 296         if (hgdata.manifest == NULL ||
 297             strcmp(hgpath, hgdata.hgpath) != 0 && access(hgpath, X_OK) == 0 ||
 298             strncmp(path, hgdata.root, hgdata.rootlen - 1) != 0) {
 299                 pnset_free(hgdata.manifest);
 300                 hgdata.manifest = NULL;
 301 
 302                 (void) strlcpy(basepath, path, MAXPATHLEN);
 303 
 304                 /*
 305                  * Walk up the directory tree looking for .hg subdirectories.
 306                  */
 307                 while (access(hgpath, X_OK) == -1) {
 308                         slash = strrchr(basepath, '/');
 309                         if (slash == NULL) {
 310                                 if (!hgdata.rootwarn) {
 311                                         warn("no hg root for \"%s\"\n", path);
 312                                         hgdata.rootwarn = B_TRUE;
 313                                 }
 314                                 return;
 315                         }
 316                         *slash = '\0';
 317                         (void) snprintf(hgpath, MAXPATHLEN, "%s/.hg", basepath);
 318                 }
 319 
 320                 /*
 321                  * We found a directory with an .hg subdirectory; record it
 322                  * and load its manifest.
 323                  */
 324                 (void) strlcpy(hgdata.hgpath, hgpath, MAXPATHLEN);
 325                 (void) strlcpy(hgdata.root, basepath, MAXPATHLEN);
 326                 hgdata.manifest = load_manifest(hgdata.root);
 327 
 328                 /*
 329                  * The logic in check_hg() depends on hgdata.root having a
 330                  * single trailing slash, so only add it if it's missing.
 331                  */
 332                 if (hgdata.root[strlen(hgdata.root) - 1] != '/')
 333                         (void) strlcat(hgdata.root, "/", MAXPATHLEN);
 334                 hgdata.rootlen = strlen(hgdata.root);
 335         }
 336 }
 337 
 338 /*
 339  * Check if a file is under Mercurial control by checking against the manifest.
 340  */
 341 /* ARGSUSED */
 342 static int
 343 check_hg(const char *path, const struct FTW *ftwp)
 344 {
 345         /*
 346          * The manifest paths are relative to the manifest root; skip past it.
 347          */
 348         path += hgdata.rootlen;
 349 
 350         return (hgdata.manifest != NULL && pnset_check(hgdata.manifest, path));
 351 }
 352 /* ARGSUSED */
 353 static int
 354 check_git(const char *path, const struct FTW *ftwp)
 355 {
 356         path += 2;              /* Skip "./" */
 357         return (gitmanifest != NULL && pnset_check(gitmanifest, path));
 358 }
 359 
 360 /*
 361  * Check if a file is under TeamWare control by checking for its corresponding
 362  * SCCS "s-dot" file.
 363  */
 364 static int
 365 check_tw(const char *path, const struct FTW *ftwp)
 366 {
 367         char sccspath[MAXPATHLEN];
 368 
 369         (void) snprintf(sccspath, MAXPATHLEN, "%.*s/SCCS/s.%s", ftwp->base,
 370             path, path + ftwp->base);
 371 
 372         return (access(sccspath, F_OK) == 0);
 373 }
 374 
 375 /*
 376  * Using `exceptfile' and a built-in list of exceptions, build and return a
 377  * pnset_t consisting of all of the pathnames globs which are allowed to be
 378  * unreferenced in the source tree.
 379  */
 380 static pnset_t *
 381 make_exset(const char *exceptfile)
 382 {
 383         FILE            *fp;
 384         char            line[MAXPATHLEN];
 385         char            *newline;
 386         pnset_t         *pnsetp;
 387         unsigned int    i;
 388 
 389         pnsetp = calloc(sizeof (pnset_t), 1);
 390         if (pnsetp == NULL)
 391                 return (NULL);
 392 
 393         /*
 394          * Add any exceptions from the file.
 395          */
 396         fp = fopen(exceptfile, "r");
 397         if (fp == NULL) {
 398                 warn("cannot open exception file \"%s\"", exceptfile);
 399                 goto fail;
 400         }
 401 
 402         while (fgets(line, sizeof (line), fp) != NULL) {
 403                 newline = strrchr(line, '\n');
 404                 if (newline != NULL)
 405                         *newline = '\0';
 406 
 407                 for (i = 0; isspace(line[i]); i++)
 408                         ;
 409 
 410                 if (line[i] == '#' || line[i] == '\0')
 411                         continue;
 412 
 413                 if (pnset_add(pnsetp, line) == 0) {
 414                         (void) fclose(fp);
 415                         goto fail;
 416                 }
 417         }
 418 
 419         (void) fclose(fp);
 420         return (pnsetp);
 421 fail:
 422         pnset_free(pnsetp);
 423         return (NULL);
 424 }
 425 
 426 /*
 427  * FTW callback: print `path' if it's older than `tstamp' and not in `exsetp'.
 428  */
 429 static int
 430 checkpath(const char *path, const struct stat *statp, int type,
 431     struct FTW *ftwp)
 432 {
 433         switch (type) {
 434         case FTW_F:
 435                 /*
 436                  * Skip if the file is referenced or in the exception list.
 437                  */
 438                 if (statp->st_atime >= tstamp || pnset_check(exsetp, path))
 439                         return (0);
 440 
 441                 /*
 442                  * If requested, restrict ourselves to unreferenced files
 443                  * under SCM control.
 444                  */
 445                 if (scm == NULL || scm->checkfunc(path, ftwp))
 446                         (void) puts(path);
 447                 return (0);
 448 
 449         case FTW_D:
 450                 /*
 451                  * Prune any directories in the exception list.
 452                  */
 453                 if (pnset_check(exsetp, path)) {
 454                         ftwp->quit = FTW_PRUNE;
 455                         return (0);
 456                 }
 457 
 458                 /*
 459                  * If necessary, advise the SCM logic of our new directory.
 460                  */
 461                 if (scm != NULL && scm->chdirfunc != NULL)
 462                         scm->chdirfunc(path);
 463 
 464                 return (0);
 465 
 466         case FTW_DNR:
 467                 warn("cannot read \"%s\"", path);
 468                 return (0);
 469 
 470         case FTW_NS:
 471                 warn("cannot stat \"%s\"", path);
 472                 return (0);
 473 
 474         default:
 475                 break;
 476         }
 477 
 478         return (0);
 479 }
 480 
 481 /*
 482  * Add `path' to the pnset_t pointed to by `pnsetp'.
 483  */
 484 static int
 485 pnset_add(pnset_t *pnsetp, const char *path)
 486 {
 487         char **newpaths;
 488         unsigned int maxpaths;
 489 
 490         if (pnsetp->npath == pnsetp->maxpaths) {
 491                 maxpaths = (pnsetp->maxpaths == 0) ? 512 : pnsetp->maxpaths * 2;
 492                 newpaths = realloc(pnsetp->paths, sizeof (char *) * maxpaths);
 493                 if (newpaths == NULL)
 494                         return (0);
 495                 pnsetp->paths = newpaths;
 496                 pnsetp->maxpaths = maxpaths;
 497         }
 498 
 499         pnsetp->paths[pnsetp->npath] = strdup(path);
 500         if (pnsetp->paths[pnsetp->npath] == NULL)
 501                 return (0);
 502 
 503         pnsetp->npath++;
 504         return (1);
 505 }
 506 
 507 /*
 508  * Check `path' against the pnset_t pointed to by `pnsetp'.
 509  */
 510 static int
 511 pnset_check(const pnset_t *pnsetp, const char *path)
 512 {
 513         unsigned int i;
 514 
 515         for (i = 0; i < pnsetp->npath; i++) {
 516                 if (fnmatch(pnsetp->paths[i], path, 0) == 0)
 517                         return (1);
 518         }
 519         return (0);
 520 }
 521 
 522 /*
 523  * Empty the pnset_t pointed to by `pnsetp'.
 524  */
 525 static void
 526 pnset_empty(pnset_t *pnsetp)
 527 {
 528         while (pnsetp->npath-- != 0)
 529                 free(pnsetp->paths[pnsetp->npath]);
 530 
 531         free(pnsetp->paths);
 532         pnsetp->maxpaths = 0;
 533 }
 534 
 535 /*
 536  * Free the pnset_t pointed to by `pnsetp'.
 537  */
 538 static void
 539 pnset_free(pnset_t *pnsetp)
 540 {
 541         if (pnsetp != NULL) {
 542                 pnset_empty(pnsetp);
 543                 free(pnsetp);
 544         }
 545 }
 546 
 547 /* PRINTFLIKE1 */
 548 static void
 549 warn(const char *format, ...)
 550 {
 551         va_list alist;
 552         char *errstr = strerror(errno);
 553 
 554         if (errstr == NULL)
 555                 errstr = "<unknown error>";
 556 
 557         (void) fprintf(stderr, "%s: ", progname);
 558 
 559         va_start(alist, format);
 560         (void) vfprintf(stderr, format, alist);
 561         va_end(alist);
 562 
 563         if (strrchr(format, '\n') == NULL)
 564                 (void) fprintf(stderr, ": %s\n", errstr);
 565 }
 566 
 567 /* PRINTFLIKE1 */
 568 static void
 569 die(const char *format, ...)
 570 {
 571         va_list alist;
 572         char *errstr = strerror(errno);
 573 
 574         if (errstr == NULL)
 575                 errstr = "<unknown error>";
 576 
 577         (void) fprintf(stderr, "%s: fatal: ", progname);
 578 
 579         va_start(alist, format);
 580         (void) vfprintf(stderr, format, alist);
 581         va_end(alist);
 582 
 583         if (strrchr(format, '\n') == NULL)
 584                 (void) fprintf(stderr, ": %s\n", errstr);
 585 
 586         exit(EXIT_FAILURE);
 587 }