1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  *
  21  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  22  * Use is subject to license terms.
  23  */
  24 
  25 /*
  26  * Copyright (c) 2018, Joyent, Inc.
  27  */
  28 
  29 /*
  30  * Finds all unreferenced files in a source tree that do not match a list of
  31  * permitted pathnames.
  32  */
  33 
  34 #include <ctype.h>
  35 #include <errno.h>
  36 #include <fnmatch.h>
  37 #include <ftw.h>
  38 #include <stdarg.h>
  39 #include <stdio.h>
  40 #include <stdlib.h>
  41 #include <string.h>
  42 #include <time.h>
  43 #include <unistd.h>
  44 #include <sys/param.h>
  45 #include <sys/stat.h>
  46 #include <sys/types.h>
  47 
  48 /*
  49  * Pathname set: a simple datatype for storing pathname pattern globs and
  50  * for checking whether a given pathname is matched by a pattern glob in
  51  * the set.
  52  */
  53 typedef struct {
  54         char            **paths;
  55         unsigned int    npath;
  56         unsigned int    maxpaths;
  57 } pnset_t;
  58 
  59 /*
  60  * Data associated with the current SCM manifest.
  61  */
  62 typedef struct scmdata {
  63         pnset_t         *manifest;
  64         char            metapath[MAXPATHLEN];
  65         char            root[MAXPATHLEN];
  66         unsigned int    rootlen;
  67         boolean_t       rootwarn;
  68 } scmdata_t;
  69 
  70 /*
  71  * Hooks used to check if a given unreferenced file is known to an SCM
  72  * (currently Git, Mercurial and TeamWare).
  73  */
  74 typedef int checkscm_func_t(const char *, const struct FTW *);
  75 typedef void chdirscm_func_t(const char *);
  76 
  77 typedef struct {
  78         const char      *name;
  79         checkscm_func_t *checkfunc;
  80         chdirscm_func_t *chdirfunc;
  81 } scm_t;
  82 
  83 static checkscm_func_t check_tw, check_scmdata;
  84 static chdirscm_func_t chdir_hg, chdir_git;
  85 static int      pnset_add(pnset_t *, const char *);
  86 static int      pnset_check(const pnset_t *, const char *);
  87 static void     pnset_empty(pnset_t *);
  88 static void     pnset_free(pnset_t *);
  89 static int      checkpath(const char *, const struct stat *, int, struct FTW *);
  90 static pnset_t  *make_exset(const char *);
  91 static void     warn(const char *, ...);
  92 static void     die(const char *, ...);
  93 
  94 static const scm_t scms[] = {
  95         { "tw",         check_tw,       NULL            },
  96         { "teamware",   check_tw,       NULL            },
  97         { "hg",         check_scmdata,  chdir_hg        },
  98         { "mercurial",  check_scmdata,  chdir_hg        },
  99         { "git",        check_scmdata,  chdir_git       },
 100         { NULL,         NULL,           NULL            }
 101 };
 102 
 103 static const scm_t      *scm;
 104 static scmdata_t        scmdata;
 105 static time_t           tstamp;         /* timestamp to compare files to */
 106 static pnset_t          *exsetp;        /* pathname globs to ignore */
 107 static const char       *progname;
 108 
 109 int
 110 main(int argc, char *argv[])
 111 {
 112         int c;
 113         char path[MAXPATHLEN];
 114         char subtree[MAXPATHLEN] = "./";
 115         char *tstampfile = ".build.tstamp";
 116         struct stat tsstat;
 117 
 118         progname = strrchr(argv[0], '/');
 119         if (progname == NULL)
 120                 progname = argv[0];
 121         else
 122                 progname++;
 123 
 124         while ((c = getopt(argc, argv, "as:t:S:")) != EOF) {
 125                 switch (c) {
 126                 case 'a':
 127                         /* for compatibility; now the default */
 128                         break;
 129 
 130                 case 's':
 131                         (void) strlcat(subtree, optarg, MAXPATHLEN);
 132                         break;
 133 
 134                 case 't':
 135                         tstampfile = optarg;
 136                         break;
 137 
 138                 case 'S':
 139                         for (scm = scms; scm->name != NULL; scm++) {
 140                                 if (strcmp(scm->name, optarg) == 0)
 141                                         break;
 142                         }
 143                         if (scm->name == NULL)
 144                                 die("unsupported SCM `%s'\n", optarg);
 145                         break;
 146 
 147                 default:
 148                 case '?':
 149                         goto usage;
 150                 }
 151         }
 152 
 153         argc -= optind;
 154         argv += optind;
 155 
 156         if (argc != 2) {
 157 usage:          (void) fprintf(stderr, "usage: %s [-s <subtree>] "
 158                     "[-t <tstampfile>] [-S hg|tw|git] <srcroot> <exceptfile>\n",
 159                     progname);
 160                 return (EXIT_FAILURE);
 161         }
 162 
 163         /*
 164          * Interpret a relative timestamp path as relative to srcroot.
 165          */
 166         if (tstampfile[0] == '/')
 167                 (void) strlcpy(path, tstampfile, MAXPATHLEN);
 168         else
 169                 (void) snprintf(path, MAXPATHLEN, "%s/%s", argv[0], tstampfile);
 170 
 171         if (stat(path, &tsstat) == -1)
 172                 die("cannot stat timestamp file \"%s\"", path);
 173         tstamp = tsstat.st_mtime;
 174 
 175         /*
 176          * Create the exception pathname set.
 177          */
 178         exsetp = make_exset(argv[1]);
 179         if (exsetp == NULL)
 180                 die("cannot make exception pathname set\n");
 181 
 182         /*
 183          * Walk the specified subtree of the tree rooted at argv[0].
 184          */
 185         if (chdir(argv[0]) == -1)
 186                 die("cannot change directory to \"%s\"", argv[0]);
 187 
 188         if (nftw(subtree, checkpath, 100, FTW_PHYS) != 0)
 189                 die("cannot walk tree rooted at \"%s\"\n", argv[0]);
 190 
 191         pnset_empty(exsetp);
 192         return (EXIT_SUCCESS);
 193 }
 194 
 195 /*
 196  * Load and return a pnset for the manifest for the Mercurial repo at `hgroot'.
 197  */
 198 static pnset_t *
 199 hg_manifest(const char *hgroot)
 200 {
 201         FILE    *fp = NULL;
 202         char    *hgcmd = NULL;
 203         char    *newline;
 204         pnset_t *pnsetp;
 205         char    path[MAXPATHLEN];
 206 
 207         pnsetp = calloc(1, sizeof (pnset_t));
 208         if (pnsetp == NULL ||
 209             asprintf(&hgcmd, "hg manifest -R %s", hgroot) == -1)
 210                 goto fail;
 211 
 212         fp = popen(hgcmd, "r");
 213         if (fp == NULL)
 214                 goto fail;
 215 
 216         while (fgets(path, sizeof (path), fp) != NULL) {
 217                 newline = strrchr(path, '\n');
 218                 if (newline != NULL)
 219                         *newline = '\0';
 220 
 221                 if (pnset_add(pnsetp, path) == 0)
 222                         goto fail;
 223         }
 224 
 225         (void) pclose(fp);
 226         free(hgcmd);
 227         return (pnsetp);
 228 fail:
 229         warn("cannot load hg manifest at %s", hgroot);
 230         if (fp != NULL)
 231                 (void) pclose(fp);
 232         free(hgcmd);
 233         pnset_free(pnsetp);
 234         return (NULL);
 235 }
 236 
 237 /*
 238  * Load and return a pnset for the manifest for the Git repo at `gitroot'.
 239  */
 240 static pnset_t *
 241 git_manifest(const char *gitroot)
 242 {
 243         FILE    *fp = NULL;
 244         char    *gitcmd = NULL;
 245         char    *newline;
 246         pnset_t *pnsetp;
 247         char    path[MAXPATHLEN];
 248 
 249         pnsetp = calloc(1, sizeof (pnset_t));
 250         if (pnsetp == NULL ||
 251             asprintf(&gitcmd, "git --git-dir=%s/.git ls-files", gitroot) == -1)
 252                 goto fail;
 253 
 254         fp = popen(gitcmd, "r");
 255         if (fp == NULL)
 256                 goto fail;
 257 
 258         while (fgets(path, sizeof (path), fp) != NULL) {
 259                 newline = strrchr(path, '\n');
 260                 if (newline != NULL)
 261                         *newline = '\0';
 262 
 263                 if (pnset_add(pnsetp, path) == 0)
 264                         goto fail;
 265         }
 266 
 267         (void) pclose(fp);
 268         free(gitcmd);
 269         return (pnsetp);
 270 fail:
 271         warn("cannot load git manifest at %s", gitroot);
 272         if (fp != NULL)
 273                 (void) pclose(fp);
 274         free(gitcmd);
 275         pnset_free(pnsetp);
 276         return (NULL);
 277 }
 278 
 279 /*
 280  * If necessary, change our active manifest to be appropriate for `path'.
 281  */
 282 static void
 283 chdir_scmdata(const char *path, const char *meta,
 284     pnset_t *(*manifest_func)(const char *path))
 285 {
 286         char scmpath[MAXPATHLEN];
 287         char basepath[MAXPATHLEN];
 288         char *slash;
 289 
 290         (void) snprintf(scmpath, MAXPATHLEN, "%s/%s", path, meta);
 291 
 292         /*
 293          * Change our active manifest if any one of the following is true:
 294          *
 295          *   1. No manifest is loaded.  Find the nearest SCM root to load from.
 296          *
 297          *   2. A manifest is loaded, but we've moved into a directory with
 298          *      its own metadata directory (e.g., usr/closed).  Load from its
 299          *      root.
 300          *
 301          *   3. A manifest is loaded, but no longer applies (e.g., the manifest
 302          *      under usr/closed is loaded, but we've moved to usr/src).
 303          */
 304         if (scmdata.manifest == NULL ||
 305             (strcmp(scmpath, scmdata.metapath) != 0 &&
 306             access(scmpath, X_OK) == 0) ||
 307             strncmp(path, scmdata.root, scmdata.rootlen - 1) != 0) {
 308                 pnset_free(scmdata.manifest);
 309                 scmdata.manifest = NULL;
 310 
 311                 (void) strlcpy(basepath, path, MAXPATHLEN);
 312 
 313                 /*
 314                  * Walk up the directory tree looking for metadata
 315                  * subdirectories.
 316                  */
 317                 while (access(scmpath, X_OK) == -1) {
 318                         slash = strrchr(basepath, '/');
 319                         if (slash == NULL) {
 320                                 if (!scmdata.rootwarn) {
 321                                         warn("no metadata directory "
 322                                             "for \"%s\"\n", path);
 323                                         scmdata.rootwarn = B_TRUE;
 324                                 }
 325                                 return;
 326                         }
 327                         *slash = '\0';
 328                         (void) snprintf(scmpath, MAXPATHLEN, "%s/%s", basepath,
 329                             meta);
 330                 }
 331 
 332                 /*
 333                  * We found a directory with an SCM metadata directory; record
 334                  * it and load its manifest.
 335                  */
 336                 (void) strlcpy(scmdata.metapath, scmpath, MAXPATHLEN);
 337                 (void) strlcpy(scmdata.root, basepath, MAXPATHLEN);
 338                 scmdata.manifest = manifest_func(scmdata.root);
 339 
 340                 /*
 341                  * The logic in check_scmdata() depends on scmdata.root having
 342                  * a single trailing slash, so only add it if it's missing.
 343                  */
 344                 if (scmdata.root[strlen(scmdata.root) - 1] != '/')
 345                         (void) strlcat(scmdata.root, "/", MAXPATHLEN);
 346                 scmdata.rootlen = strlen(scmdata.root);
 347         }
 348 }
 349 
 350 /*
 351  * If necessary, change our active manifest to be appropriate for `path'.
 352  */
 353 static void
 354 chdir_git(const char *path)
 355 {
 356         chdir_scmdata(path, ".git", git_manifest);
 357 }
 358 
 359 static void
 360 chdir_hg(const char *path)
 361 {
 362         chdir_scmdata(path, ".hg", hg_manifest);
 363 }
 364 
 365 /* ARGSUSED */
 366 static int
 367 check_scmdata(const char *path, const struct FTW *ftwp)
 368 {
 369         /*
 370          * The manifest paths are relative to the manifest root; skip past it.
 371          */
 372         path += scmdata.rootlen;
 373 
 374         return (scmdata.manifest != NULL && pnset_check(scmdata.manifest,
 375             path));
 376 }
 377 
 378 /*
 379  * Check if a file is under TeamWare control by checking for its corresponding
 380  * SCCS "s-dot" file.
 381  */
 382 static int
 383 check_tw(const char *path, const struct FTW *ftwp)
 384 {
 385         char sccspath[MAXPATHLEN];
 386 
 387         (void) snprintf(sccspath, MAXPATHLEN, "%.*s/SCCS/s.%s", ftwp->base,
 388             path, path + ftwp->base);
 389 
 390         return (access(sccspath, F_OK) == 0);
 391 }
 392 
 393 /*
 394  * Using `exceptfile' and a built-in list of exceptions, build and return a
 395  * pnset_t consisting of all of the pathnames globs which are allowed to be
 396  * unreferenced in the source tree.
 397  */
 398 static pnset_t *
 399 make_exset(const char *exceptfile)
 400 {
 401         FILE            *fp;
 402         char            line[MAXPATHLEN];
 403         char            *newline;
 404         pnset_t         *pnsetp;
 405         unsigned int    i;
 406 
 407         pnsetp = calloc(1, sizeof (pnset_t));
 408         if (pnsetp == NULL)
 409                 return (NULL);
 410 
 411         /*
 412          * Add any exceptions from the file.
 413          */
 414         fp = fopen(exceptfile, "r");
 415         if (fp == NULL) {
 416                 warn("cannot open exception file \"%s\"", exceptfile);
 417                 goto fail;
 418         }
 419 
 420         while (fgets(line, sizeof (line), fp) != NULL) {
 421                 newline = strrchr(line, '\n');
 422                 if (newline != NULL)
 423                         *newline = '\0';
 424 
 425                 for (i = 0; isspace(line[i]); i++)
 426                         ;
 427 
 428                 if (line[i] == '#' || line[i] == '\0')
 429                         continue;
 430 
 431                 if (pnset_add(pnsetp, line) == 0) {
 432                         (void) fclose(fp);
 433                         goto fail;
 434                 }
 435         }
 436 
 437         (void) fclose(fp);
 438         return (pnsetp);
 439 fail:
 440         pnset_free(pnsetp);
 441         return (NULL);
 442 }
 443 
 444 /*
 445  * FTW callback: print `path' if it's older than `tstamp' and not in `exsetp'.
 446  */
 447 static int
 448 checkpath(const char *path, const struct stat *statp, int type,
 449     struct FTW *ftwp)
 450 {
 451         switch (type) {
 452         case FTW_F:
 453                 /*
 454                  * Skip if the file is referenced or in the exception list.
 455                  */
 456                 if (statp->st_atime >= tstamp || pnset_check(exsetp, path))
 457                         return (0);
 458 
 459                 /*
 460                  * If requested, restrict ourselves to unreferenced files
 461                  * under SCM control.
 462                  */
 463                 if (scm == NULL || scm->checkfunc(path, ftwp))
 464                         (void) puts(path);
 465                 return (0);
 466 
 467         case FTW_D:
 468                 /*
 469                  * Prune any directories in the exception list.
 470                  */
 471                 if (pnset_check(exsetp, path)) {
 472                         ftwp->quit = FTW_PRUNE;
 473                         return (0);
 474                 }
 475 
 476                 /*
 477                  * If necessary, advise the SCM logic of our new directory.
 478                  */
 479                 if (scm != NULL && scm->chdirfunc != NULL)
 480                         scm->chdirfunc(path);
 481 
 482                 return (0);
 483 
 484         case FTW_DNR:
 485                 warn("cannot read \"%s\"", path);
 486                 return (0);
 487 
 488         case FTW_NS:
 489                 warn("cannot stat \"%s\"", path);
 490                 return (0);
 491 
 492         default:
 493                 break;
 494         }
 495 
 496         return (0);
 497 }
 498 
 499 /*
 500  * Add `path' to the pnset_t pointed to by `pnsetp'.
 501  */
 502 static int
 503 pnset_add(pnset_t *pnsetp, const char *path)
 504 {
 505         char **newpaths;
 506         unsigned int maxpaths;
 507 
 508         if (pnsetp->npath == pnsetp->maxpaths) {
 509                 maxpaths = (pnsetp->maxpaths == 0) ? 512 : pnsetp->maxpaths * 2;
 510                 newpaths = realloc(pnsetp->paths, sizeof (char *) * maxpaths);
 511                 if (newpaths == NULL)
 512                         return (0);
 513                 pnsetp->paths = newpaths;
 514                 pnsetp->maxpaths = maxpaths;
 515         }
 516 
 517         pnsetp->paths[pnsetp->npath] = strdup(path);
 518         if (pnsetp->paths[pnsetp->npath] == NULL)
 519                 return (0);
 520 
 521         pnsetp->npath++;
 522         return (1);
 523 }
 524 
 525 /*
 526  * Check `path' against the pnset_t pointed to by `pnsetp'.
 527  */
 528 static int
 529 pnset_check(const pnset_t *pnsetp, const char *path)
 530 {
 531         unsigned int i;
 532 
 533         for (i = 0; i < pnsetp->npath; i++) {
 534                 if (fnmatch(pnsetp->paths[i], path, 0) == 0)
 535                         return (1);
 536         }
 537         return (0);
 538 }
 539 
 540 /*
 541  * Empty the pnset_t pointed to by `pnsetp'.
 542  */
 543 static void
 544 pnset_empty(pnset_t *pnsetp)
 545 {
 546         while (pnsetp->npath-- != 0)
 547                 free(pnsetp->paths[pnsetp->npath]);
 548 
 549         free(pnsetp->paths);
 550         pnsetp->maxpaths = 0;
 551 }
 552 
 553 /*
 554  * Free the pnset_t pointed to by `pnsetp'.
 555  */
 556 static void
 557 pnset_free(pnset_t *pnsetp)
 558 {
 559         if (pnsetp != NULL) {
 560                 pnset_empty(pnsetp);
 561                 free(pnsetp);
 562         }
 563 }
 564 
 565 /* PRINTFLIKE1 */
 566 static void
 567 warn(const char *format, ...)
 568 {
 569         va_list alist;
 570         char *errstr = strerror(errno);
 571 
 572         if (errstr == NULL)
 573                 errstr = "<unknown error>";
 574 
 575         (void) fprintf(stderr, "%s: ", progname);
 576 
 577         va_start(alist, format);
 578         (void) vfprintf(stderr, format, alist);
 579         va_end(alist);
 580 
 581         if (strrchr(format, '\n') == NULL)
 582                 (void) fprintf(stderr, ": %s\n", errstr);
 583 }
 584 
 585 /* PRINTFLIKE1 */
 586 static void
 587 die(const char *format, ...)
 588 {
 589         va_list alist;
 590         char *errstr = strerror(errno);
 591 
 592         if (errstr == NULL)
 593                 errstr = "<unknown error>";
 594 
 595         (void) fprintf(stderr, "%s: fatal: ", progname);
 596 
 597         va_start(alist, format);
 598         (void) vfprintf(stderr, format, alist);
 599         va_end(alist);
 600 
 601         if (strrchr(format, '\n') == NULL)
 602                 (void) fprintf(stderr, ": %s\n", errstr);
 603 
 604         exit(EXIT_FAILURE);
 605 }