Print this page
5292 findunref is both slow and broken


  36 #include <stdlib.h>
  37 #include <string.h>
  38 #include <time.h>
  39 #include <unistd.h>
  40 #include <sys/param.h>
  41 #include <sys/stat.h>
  42 #include <sys/types.h>
  43 
  44 /*
  45  * Pathname set: a simple datatype for storing pathname pattern globs and
  46  * for checking whether a given pathname is matched by a pattern glob in
  47  * the set.
  48  */
  49 typedef struct {
  50         char            **paths;
  51         unsigned int    npath;
  52         unsigned int    maxpaths;
  53 } pnset_t;
  54 
  55 /*
  56  * Data associated with the current Mercurial manifest.
  57  */
  58 typedef struct hgdata {
  59         pnset_t         *manifest;
  60         char            hgpath[MAXPATHLEN];
  61         char            root[MAXPATHLEN];
  62         unsigned int    rootlen;
  63         boolean_t       rootwarn;
  64 } hgdata_t;
  65 
  66 /*
  67  * Hooks used to check if a given unreferenced file is known to an SCM
  68  * (currently Mercurial and TeamWare).
  69  */
  70 typedef int checkscm_func_t(const char *, const struct FTW *);
  71 typedef void chdirscm_func_t(const char *);
  72 
  73 typedef struct {
  74         const char      *name;
  75         checkscm_func_t *checkfunc;
  76         chdirscm_func_t *chdirfunc;
  77 } scm_t;
  78 
  79 static checkscm_func_t check_tw, check_hg, check_git;
  80 static chdirscm_func_t chdir_hg, chdir_git;
  81 static int      pnset_add(pnset_t *, const char *);
  82 static int      pnset_check(const pnset_t *, const char *);
  83 static void     pnset_empty(pnset_t *);
  84 static void     pnset_free(pnset_t *);
  85 static int      checkpath(const char *, const struct stat *, int, struct FTW *);
  86 static pnset_t  *make_exset(const char *);
  87 static void     warn(const char *, ...);
  88 static void     die(const char *, ...);
  89 
  90 static const scm_t scms[] = {
  91         { "tw",         check_tw,       NULL            },
  92         { "teamware",   check_tw,       NULL            },
  93         { "hg",         check_hg,       chdir_hg        },
  94         { "mercurial",  check_hg,       chdir_hg        },
  95         { "git",        check_git,      chdir_git       },
  96         { NULL,         NULL,           NULL            }
  97 };
  98 
  99 static const scm_t      *scm;
 100 static hgdata_t         hgdata;
 101 static pnset_t          *gitmanifest = NULL;
 102 static time_t           tstamp;         /* timestamp to compare files to */
 103 static pnset_t          *exsetp;        /* pathname globs to ignore */
 104 static const char       *progname;
 105 
 106 int
 107 main(int argc, char *argv[])
 108 {
 109         int c;
 110         char path[MAXPATHLEN];
 111         char subtree[MAXPATHLEN] = "./";
 112         char *tstampfile = ".build.tstamp";
 113         struct stat tsstat;
 114 
 115         progname = strrchr(argv[0], '/');
 116         if (progname == NULL)
 117                 progname = argv[0];
 118         else
 119                 progname++;
 120 
 121         while ((c = getopt(argc, argv, "as:t:S:")) != EOF) {


 176         if (exsetp == NULL)
 177                 die("cannot make exception pathname set\n");
 178 
 179         /*
 180          * Walk the specified subtree of the tree rooted at argv[0].
 181          */
 182         if (chdir(argv[0]) == -1)
 183                 die("cannot change directory to \"%s\"", argv[0]);
 184 
 185         if (nftw(subtree, checkpath, 100, FTW_PHYS) != 0)
 186                 die("cannot walk tree rooted at \"%s\"\n", argv[0]);
 187 
 188         pnset_empty(exsetp);
 189         return (EXIT_SUCCESS);
 190 }
 191 
 192 /*
 193  * Load and return a pnset for the manifest for the Mercurial repo at `hgroot'.
 194  */
 195 static pnset_t *
 196 load_manifest(const char *hgroot)
 197 {
 198         FILE    *fp = NULL;
 199         char    *hgcmd = NULL;
 200         char    *newline;
 201         pnset_t *pnsetp;
 202         char    path[MAXPATHLEN];
 203 
 204         pnsetp = calloc(sizeof (pnset_t), 1);
 205         if (pnsetp == NULL ||
 206             asprintf(&hgcmd, "hg manifest -R %s", hgroot) == -1)
 207                 goto fail;
 208 
 209         fp = popen(hgcmd, "r");
 210         if (fp == NULL)
 211                 goto fail;
 212 
 213         while (fgets(path, sizeof (path), fp) != NULL) {
 214                 newline = strrchr(path, '\n');
 215                 if (newline != NULL)
 216                         *newline = '\0';
 217 
 218                 if (pnset_add(pnsetp, path) == 0)
 219                         goto fail;
 220         }
 221 
 222         (void) pclose(fp);
 223         free(hgcmd);
 224         return (pnsetp);
 225 fail:
 226         warn("cannot load hg manifest at %s", hgroot);
 227         if (fp != NULL)
 228                 (void) pclose(fp);
 229         free(hgcmd);
 230         pnset_free(pnsetp);
 231         return (NULL);
 232 }
 233 
 234 static void
 235 chdir_git(const char *path)



 236 {
 237         FILE *fp = NULL;
 238         char *gitcmd = NULL;
 239         char *newline;
 240         char fn[MAXPATHLEN];
 241         pnset_t *pnsetp;

 242 
 243         pnsetp = calloc(sizeof (pnset_t), 1);
 244         if ((pnsetp == NULL) ||
 245             (asprintf(&gitcmd, "git ls-files %s", path) == -1))
 246                 goto fail;
 247 
 248         if ((fp = popen(gitcmd, "r")) == NULL)

 249                 goto fail;
 250 
 251         while (fgets(fn, sizeof (fn), fp) != NULL) {
 252                 if ((newline = strrchr(fn, '\n')) != NULL)

 253                         *newline = '\0';
 254 
 255                 if (pnset_add(pnsetp, fn) == 0)
 256                         goto fail;
 257         }
 258 
 259         (void) pclose(fp);
 260         free(gitcmd);
 261         gitmanifest = pnsetp;
 262         return;
 263 fail:
 264         warn("cannot load git manifest");
 265         if (fp != NULL)
 266                 (void) pclose(fp);
 267         if (pnsetp != NULL)
 268                 free(pnsetp);
 269         if (gitcmd != NULL)
 270                 free(gitcmd);


 271 }
 272 
 273 /*
 274  * If necessary, change our active manifest to be appropriate for `path'.
 275  */
 276 static void
 277 chdir_hg(const char *path)

 278 {
 279         char hgpath[MAXPATHLEN];
 280         char basepath[MAXPATHLEN];
 281         char *slash;
 282 
 283         (void) snprintf(hgpath, MAXPATHLEN, "%s/.hg", path);
 284 
 285         /*
 286          * Change our active manifest if any one of the following is true:
 287          *
 288          *   1. No manifest is loaded.  Find the nearest hgroot to load from.
 289          *
 290          *   2. A manifest is loaded, but we've moved into a directory with
 291          *      its own hgroot (e.g., usr/closed).  Load from its hgroot.

 292          *
 293          *   3. A manifest is loaded, but no longer applies (e.g., the manifest
 294          *      under usr/closed is loaded, but we've moved to usr/src).
 295          */
 296         if (hgdata.manifest == NULL ||
 297             strcmp(hgpath, hgdata.hgpath) != 0 && access(hgpath, X_OK) == 0 ||
 298             strncmp(path, hgdata.root, hgdata.rootlen - 1) != 0) {
 299                 pnset_free(hgdata.manifest);
 300                 hgdata.manifest = NULL;

 301 
 302                 (void) strlcpy(basepath, path, MAXPATHLEN);
 303 
 304                 /*
 305                  * Walk up the directory tree looking for .hg subdirectories.

 306                  */
 307                 while (access(hgpath, X_OK) == -1) {
 308                         slash = strrchr(basepath, '/');
 309                         if (slash == NULL) {
 310                                 if (!hgdata.rootwarn) {
 311                                         warn("no hg root for \"%s\"\n", path);
 312                                         hgdata.rootwarn = B_TRUE;

 313                                 }
 314                                 return;
 315                         }
 316                         *slash = '\0';
 317                         (void) snprintf(hgpath, MAXPATHLEN, "%s/.hg", basepath);

 318                 }
 319 
 320                 /*
 321                  * We found a directory with an .hg subdirectory; record it
 322                  * and load its manifest.
 323                  */
 324                 (void) strlcpy(hgdata.hgpath, hgpath, MAXPATHLEN);
 325                 (void) strlcpy(hgdata.root, basepath, MAXPATHLEN);
 326                 hgdata.manifest = load_manifest(hgdata.root);
 327 
 328                 /*
 329                  * The logic in check_hg() depends on hgdata.root having a
 330                  * single trailing slash, so only add it if it's missing.
 331                  */
 332                 if (hgdata.root[strlen(hgdata.root) - 1] != '/')
 333                         (void) strlcat(hgdata.root, "/", MAXPATHLEN);
 334                 hgdata.rootlen = strlen(hgdata.root);
 335         }
 336 }
 337 






 338 /*
 339  * Check if a file is under Mercurial control by checking against the manifest.
 340  */






 341 /* ARGSUSED */
 342 static int
 343 check_hg(const char *path, const struct FTW *ftwp)
 344 {
 345         /*
 346          * The manifest paths are relative to the manifest root; skip past it.
 347          */
 348         path += hgdata.rootlen;
 349 
 350         return (hgdata.manifest != NULL && pnset_check(hgdata.manifest, path));
 351 }
 352 /* ARGSUSED */
 353 static int
 354 check_git(const char *path, const struct FTW *ftwp)
 355 {
 356         path += 2;              /* Skip "./" */
 357         return (gitmanifest != NULL && pnset_check(gitmanifest, path));
 358 }
 359 
 360 /*
 361  * Check if a file is under TeamWare control by checking for its corresponding
 362  * SCCS "s-dot" file.
 363  */
 364 static int
 365 check_tw(const char *path, const struct FTW *ftwp)
 366 {
 367         char sccspath[MAXPATHLEN];
 368 
 369         (void) snprintf(sccspath, MAXPATHLEN, "%.*s/SCCS/s.%s", ftwp->base,
 370             path, path + ftwp->base);
 371 
 372         return (access(sccspath, F_OK) == 0);
 373 }
 374 
 375 /*
 376  * Using `exceptfile' and a built-in list of exceptions, build and return a
 377  * pnset_t consisting of all of the pathnames globs which are allowed to be




  36 #include <stdlib.h>
  37 #include <string.h>
  38 #include <time.h>
  39 #include <unistd.h>
  40 #include <sys/param.h>
  41 #include <sys/stat.h>
  42 #include <sys/types.h>
  43 
  44 /*
  45  * Pathname set: a simple datatype for storing pathname pattern globs and
  46  * for checking whether a given pathname is matched by a pattern glob in
  47  * the set.
  48  */
  49 typedef struct {
  50         char            **paths;
  51         unsigned int    npath;
  52         unsigned int    maxpaths;
  53 } pnset_t;
  54 
  55 /*
  56  * Data associated with the current SCM manifest.
  57  */
  58 typedef struct scmdata {
  59         pnset_t         *manifest;
  60         char            metapath[MAXPATHLEN];
  61         char            root[MAXPATHLEN];
  62         unsigned int    rootlen;
  63         boolean_t       rootwarn;
  64 } scmdata_t;
  65 
  66 /*
  67  * Hooks used to check if a given unreferenced file is known to an SCM
  68  * (currently Git, Mercurial and TeamWare).
  69  */
  70 typedef int checkscm_func_t(const char *, const struct FTW *);
  71 typedef void chdirscm_func_t(const char *);
  72 
  73 typedef struct {
  74         const char      *name;
  75         checkscm_func_t *checkfunc;
  76         chdirscm_func_t *chdirfunc;
  77 } scm_t;
  78 
  79 static checkscm_func_t check_tw, check_scmdata;
  80 static chdirscm_func_t chdir_hg, chdir_git;
  81 static int      pnset_add(pnset_t *, const char *);
  82 static int      pnset_check(const pnset_t *, const char *);
  83 static void     pnset_empty(pnset_t *);
  84 static void     pnset_free(pnset_t *);
  85 static int      checkpath(const char *, const struct stat *, int, struct FTW *);
  86 static pnset_t  *make_exset(const char *);
  87 static void     warn(const char *, ...);
  88 static void     die(const char *, ...);
  89 
  90 static const scm_t scms[] = {
  91         { "tw",         check_tw,       NULL            },
  92         { "teamware",   check_tw,       NULL            },
  93         { "hg",         check_scmdata,  chdir_hg        },
  94         { "mercurial",  check_scmdata,  chdir_hg        },
  95         { "git",        check_scmdata,  chdir_git       },
  96         { NULL,         NULL,           NULL            }
  97 };
  98 
  99 static const scm_t      *scm;
 100 static scmdata_t        scmdata;

 101 static time_t           tstamp;         /* timestamp to compare files to */
 102 static pnset_t          *exsetp;        /* pathname globs to ignore */
 103 static const char       *progname;
 104 
 105 int
 106 main(int argc, char *argv[])
 107 {
 108         int c;
 109         char path[MAXPATHLEN];
 110         char subtree[MAXPATHLEN] = "./";
 111         char *tstampfile = ".build.tstamp";
 112         struct stat tsstat;
 113 
 114         progname = strrchr(argv[0], '/');
 115         if (progname == NULL)
 116                 progname = argv[0];
 117         else
 118                 progname++;
 119 
 120         while ((c = getopt(argc, argv, "as:t:S:")) != EOF) {


 175         if (exsetp == NULL)
 176                 die("cannot make exception pathname set\n");
 177 
 178         /*
 179          * Walk the specified subtree of the tree rooted at argv[0].
 180          */
 181         if (chdir(argv[0]) == -1)
 182                 die("cannot change directory to \"%s\"", argv[0]);
 183 
 184         if (nftw(subtree, checkpath, 100, FTW_PHYS) != 0)
 185                 die("cannot walk tree rooted at \"%s\"\n", argv[0]);
 186 
 187         pnset_empty(exsetp);
 188         return (EXIT_SUCCESS);
 189 }
 190 
 191 /*
 192  * Load and return a pnset for the manifest for the Mercurial repo at `hgroot'.
 193  */
 194 static pnset_t *
 195 hg_manifest(const char *hgroot)
 196 {
 197         FILE    *fp = NULL;
 198         char    *hgcmd = NULL;
 199         char    *newline;
 200         pnset_t *pnsetp;
 201         char    path[MAXPATHLEN];
 202 
 203         pnsetp = calloc(sizeof (pnset_t), 1);
 204         if (pnsetp == NULL ||
 205             asprintf(&hgcmd, "hg manifest -R %s", hgroot) == -1)
 206                 goto fail;
 207 
 208         fp = popen(hgcmd, "r");
 209         if (fp == NULL)
 210                 goto fail;
 211 
 212         while (fgets(path, sizeof (path), fp) != NULL) {
 213                 newline = strrchr(path, '\n');
 214                 if (newline != NULL)
 215                         *newline = '\0';
 216 
 217                 if (pnset_add(pnsetp, path) == 0)
 218                         goto fail;
 219         }
 220 
 221         (void) pclose(fp);
 222         free(hgcmd);
 223         return (pnsetp);
 224 fail:
 225         warn("cannot load hg manifest at %s", hgroot);
 226         if (fp != NULL)
 227                 (void) pclose(fp);
 228         free(hgcmd);
 229         pnset_free(pnsetp);
 230         return (NULL);
 231 }
 232 
 233 /*
 234  * Load and return a pnset for the manifest for the Git repo at `gitroot'.
 235  */
 236 static pnset_t *
 237 git_manifest(const char *gitroot)
 238 {
 239         FILE    *fp = NULL;
 240         char    *gitcmd = NULL;
 241         char    *newline;

 242         pnset_t *pnsetp;
 243         char    path[MAXPATHLEN];
 244 
 245         pnsetp = calloc(sizeof (pnset_t), 1);
 246         if (pnsetp == NULL ||
 247             asprintf(&gitcmd, "git --git-dir=%s/.git ls-files", gitroot) == -1)
 248                 goto fail;
 249 
 250         fp = popen(gitcmd, "r");
 251         if (fp == NULL)
 252                 goto fail;
 253 
 254         while (fgets(path, sizeof (path), fp) != NULL) {
 255                 newline = strrchr(path, '\n');
 256                 if (newline != NULL)
 257                         *newline = '\0';
 258 
 259                 if (pnset_add(pnsetp, path) == 0)
 260                         goto fail;
 261         }
 262 
 263         (void) pclose(fp);
 264         free(gitcmd);
 265         return (pnsetp);

 266 fail:
 267         warn("cannot load git manifest at %s", gitroot);
 268         if (fp != NULL)
 269                 (void) pclose(fp);



 270         free(gitcmd);
 271         pnset_free(pnsetp);
 272         return (NULL);
 273 }
 274 
 275 /*
 276  * If necessary, change our active manifest to be appropriate for `path'.
 277  */
 278 static void
 279 chdir_scmdata(const char *path, const char *meta,
 280     pnset_t *(*manifest_func)(const char *path))
 281 {
 282         char scmpath[MAXPATHLEN];
 283         char basepath[MAXPATHLEN];
 284         char *slash;
 285 
 286         (void) snprintf(scmpath, MAXPATHLEN, "%s/%s", path, meta);
 287 
 288         /*
 289          * Change our active manifest if any one of the following is true:
 290          *
 291          *   1. No manifest is loaded.  Find the nearest SCM root to load from.
 292          *
 293          *   2. A manifest is loaded, but we've moved into a directory with
 294          *      its own metadata directory (e.g., usr/closed).  Load from its
 295          *      root.
 296          *
 297          *   3. A manifest is loaded, but no longer applies (e.g., the manifest
 298          *      under usr/closed is loaded, but we've moved to usr/src).
 299          */
 300         if (scmdata.manifest == NULL ||
 301             (strcmp(scmpath, scmdata.metapath) != 0 &&
 302             access(scmpath, X_OK) == 0) ||
 303             strncmp(path, scmdata.root, scmdata.rootlen - 1) != 0) {
 304                 pnset_free(scmdata.manifest);
 305                 scmdata.manifest = NULL;
 306 
 307                 (void) strlcpy(basepath, path, MAXPATHLEN);
 308 
 309                 /*
 310                  * Walk up the directory tree looking for metadata
 311                  * subdirectories.
 312                  */
 313                 while (access(scmpath, X_OK) == -1) {
 314                         slash = strrchr(basepath, '/');
 315                         if (slash == NULL) {
 316                                 if (!scmdata.rootwarn) {
 317                                         warn("no metadata directory "
 318                                             "for \"%s\"\n", path);
 319                                         scmdata.rootwarn = B_TRUE;
 320                                 }
 321                                 return;
 322                         }
 323                         *slash = '\0';
 324                         (void) snprintf(scmpath, MAXPATHLEN, "%s/%s", basepath,
 325                             meta);
 326                 }
 327 
 328                 /*
 329                  * We found a directory with an SCM metadata directory; record
 330                  * it and load its manifest.
 331                  */
 332                 (void) strlcpy(scmdata.metapath, scmpath, MAXPATHLEN);
 333                 (void) strlcpy(scmdata.root, basepath, MAXPATHLEN);
 334                 scmdata.manifest = manifest_func(scmdata.root);
 335 
 336                 /*
 337                  * The logic in check_scmdata() depends on scmdata.root having
 338                  * a single trailing slash, so only add it if it's missing.
 339                  */
 340                 if (scmdata.root[strlen(scmdata.root) - 1] != '/')
 341                         (void) strlcat(scmdata.root, "/", MAXPATHLEN);
 342                 scmdata.rootlen = strlen(scmdata.root);
 343         }
 344 }
 345 
 346 static void
 347 chdir_git(const char *path)
 348 {
 349         chdir_scmdata(path, ".git", git_manifest);
 350 }
 351 
 352 /*
 353  * If necessary, change our active manifest to be appropriate for `path'.
 354  */
 355 static void
 356 chdir_hg(const char *path)
 357 {
 358         chdir_scmdata(path, ".hg", hg_manifest);
 359 }
 360 
 361 /* ARGSUSED */
 362 static int
 363 check_scmdata(const char *path, const struct FTW *ftwp)
 364 {
 365         /*
 366          * The manifest paths are relative to the manifest root; skip past it.
 367          */
 368         path += scmdata.rootlen;
 369 
 370         return (scmdata.manifest != NULL && pnset_check(scmdata.manifest,
 371             path));






 372 }
 373 
 374 /*
 375  * Check if a file is under TeamWare control by checking for its corresponding
 376  * SCCS "s-dot" file.
 377  */
 378 static int
 379 check_tw(const char *path, const struct FTW *ftwp)
 380 {
 381         char sccspath[MAXPATHLEN];
 382 
 383         (void) snprintf(sccspath, MAXPATHLEN, "%.*s/SCCS/s.%s", ftwp->base,
 384             path, path + ftwp->base);
 385 
 386         return (access(sccspath, F_OK) == 0);
 387 }
 388 
 389 /*
 390  * Using `exceptfile' and a built-in list of exceptions, build and return a
 391  * pnset_t consisting of all of the pathnames globs which are allowed to be