1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License, Version 1.0 only
   6  * (the "License").  You may not use this file except in compliance
   7  * with the License.
   8  *
   9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  10  * or http://www.opensolaris.org/os/licensing.
  11  * See the License for the specific language governing permissions
  12  * and limitations under the License.
  13  *
  14  * When distributing Covered Code, include this CDDL HEADER in each
  15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  16  * If applicable, add the following below this CDDL HEADER, with the
  17  * fields enclosed by brackets "[]" replaced with your own identifying
  18  * information: Portions Copyright [yyyy] [name of copyright owner]
  19  *
  20  * CDDL HEADER END
  21  */
  22 /*
  23  * Copyright (c) 1995 Sun Microsystems, Inc.  All Rights Reserved
  24  *
  25  * module:
  26  *      anal.c
  27  *
  28  * purpose:
  29  *      routines to analyze the file trees and figure out what has changed
  30  *      and queue files for reconciliation.  It also contains tree enumeration
  31  *      routines to for other purposes (pruning and link location).
  32  *
  33  * contents:
  34  *
  35  *  change analysis:
  36  *      analyze .... (top level) analyze all files in the tree for changes
  37  *      summary .... print out change/reconciliation statistics for each base
  38  *      check_file . (static) look for changes and queue file for reconciliation
  39  *      check_changes (static) figure out if a particular file has changed
  40  *      queue_file . (static) add a file to the reconciliation list
  41  *
  42  *  other tree enumeration functions:
  43  *      prune_file . (static) recursive descent and actual pruning
  44  *      prune ...... (top level) initiate pruning analysis for nonexistant files
  45  *      find_link .. look for other files to which a file may be a link
  46  *      link_update. propagate changed stat info to all other links
  47  *      same_name .. (static) figure out if two nodes describe same file
  48  *
  49  *  misc:
  50  *      push_name .. maintain a running full pathname as we descend
  51  *      pop_name ... maintain a running full pathname as we pop back
  52  *      get_name ... return full pathname for the current file
  53  *
  54  * notes:
  55  *      analysis is limited to files that were evaluated in the previous
  56  *      pass ... since we don't have complete information about files that
  57  *      were not evaluated in the previous pass.
  58  */
  59 #pragma ident   "%Z%%M% %I%     %E% SMI"
  60 
  61 #include <stdio.h>
  62 #include <stdlib.h>
  63 #include <strings.h>
  64 
  65 #include "messages.h"
  66 #include "filesync.h"
  67 #include "database.h"
  68 #include "debug.h"
  69 
  70 /*
  71  * routines:
  72  */
  73 void push_name(const char *);
  74 void pop_name();
  75 char *get_name(struct file *);
  76 static errmask_t check_file(struct file *fp);
  77 static diffmask_t check_changes(struct file *fp, int first, int second);
  78 static int prune_file(struct file *fp);
  79 static void queue_file(struct file *fp);
  80 
  81 /*
  82  * globals
  83  */
  84 static struct file *changes;    /* list of files to be reconciled       */
  85 
  86 static long total_files;        /* total number of files being considered  */
  87 static long est_deletes;        /* estimated number of files to be deleted */
  88 static long est_rmdirs;         /* est rmdirs of non-empty directories     */
  89 
  90 int inum_changes;               /* LISTed directories whose I#s changed    */
  91 
  92 /*
  93  * routine:
  94  *      analyze
  95  *
  96  * purpose:
  97  *      top level routine for the analysis/reconciliation process
  98  *
  99  * parameters:
 100  *      none
 101  *
 102  * returns:
 103  *      error mask
 104  *
 105  * notes:
 106  *      a critical side effect of this routine is the creation of
 107  *      the reconciliation list, an ordered list of files that
 108  *      needed to be processed in the subsequent reconciliation pass
 109  */
 110 errmask_t
 111 analyze()
 112 {       struct base *bp;
 113         struct file *fp;
 114         int errs = 0;
 115         int err;
 116         int percentage;
 117         bool_t aborted = FALSE;
 118         char msgbuf[MAX_LINE];
 119 
 120         /*
 121          * run through all bases and directories looking for files
 122          * that have been renamed.  This must be done before the
 123          * difference analysis because a directory rename can introduce
 124          * radical restructuring into a name-based tree.
 125          */
 126         for (bp = bases; bp; bp = bp->b_next) {
 127                 for (fp = bp->b_files; fp; fp = fp->f_next)
 128                         if (fp->f_flags & F_EVALUATE)
 129                                 errs |= find_renames(fp);
 130         }
 131 
 132         /*
 133          * run through all bases and files looking for candidates
 134          * note, however that we only descend into trees that have
 135          * the evaluate flag turned on.  As a result of new rules or
 136          * restriction arguments, we may be deliberatly ignoring
 137          * large amounts of the baseline.   This means we won't do
 138          * any stats to update the information in those nodes, and
 139          * they will be written back just as they were.
 140          *
 141          * note that there is code to prune out baseline nodes for
 142          * files that no longer exist, but that code is in reconcile
 143          * and will never get a chance to run on nodes that aren't
 144          * analyzed.
 145          *
 146          * we also want to run though all nodes with STAT errors
 147          * so that we can put them on the reconciliation list.
 148          */
 149         for (bp = bases; bp; bp = bp->b_next) {
 150                 for (fp = bp->b_files; fp; fp = fp->f_next)
 151                         if (fp->f_flags & (F_EVALUATE|F_STAT_ERROR))
 152                                 errs |= check_file(fp);
 153         }
 154 
 155         /*
 156          * my greatest fear is that someday, somehow, by messing with
 157          * variables or baselines or who-knows-what, that someone will
 158          * run a reconciliation against a large tree that doesn't correspond
 159          * to the baseline, and I will infer that a bazillion files have
 160          * been deleted and will propagate the slaughter before anyone
 161          * can say somebody stop that maniac.
 162          *
 163          * in order to prevent such a possibility, we have a few different
 164          * sanity checks.  There is, of course, a tradeoff here between
 165          * danger and irritation.  The current set of heuristics for whether
 166          * or not to generate a warning are (any of)
 167          *
 168          *      at least CONFIRM_MIN files have been deleted AND
 169          *      CONFIRM_PCT of all files have been deleted
 170          *
 171          *      the inode number on a LISTed directory has changed
 172          *
 173          *      a non-empty directory has been deleted.
 174          */
 175         msgbuf[0] = 0;
 176 
 177         percentage = (est_deletes * 100) / (total_files ? total_files : 1);
 178         if (est_deletes >= CONFIRM_MIN && percentage >= CONFIRM_PCT)
 179                 sprintf(msgbuf, gettext(WARN_deletes), est_deletes);
 180         else if (inum_changes > 0)
 181                 sprintf(msgbuf, gettext(WARN_ichange), inum_changes);
 182         else if (est_rmdirs)
 183                 sprintf(msgbuf, gettext(WARN_rmdirs), est_rmdirs);
 184 
 185         if (msgbuf[0])
 186                 confirm(msgbuf);
 187 
 188         /*
 189          * TRICK:
 190          *      the change list contains both files that have changed
 191          *      (and probably warrant reconciliation) and files that
 192          *      we couldn't get up-to-date stat information on.  The
 193          *      latter files should just be flagged as being in conflict
 194          *      so they can be reported in the summary.  The same is
 195          *      true of all subsequent files if we abort reconciliation.
 196          */
 197         for (fp = changes; fp; fp = fp->f_rnext)
 198                 if (aborted || (fp->f_flags & F_STAT_ERROR)) {
 199                         fp->f_flags |= F_CONFLICT;
 200                         /* if it isn't in the baseline yet, don't add it */
 201                         if ((fp->f_flags & F_IN_BASELINE) == 0)
 202                                 fp->f_flags |= F_REMOVE;
 203                         fp->f_problem = aborted ? PROB_aborted : PROB_restat;
 204                         (fp->f_base)->b_unresolved++;
 205                         errs |= ERR_UNRESOLVED;
 206                         if (opt_verbose)
 207                                 fprintf(stdout,
 208                                         gettext(aborted ? V_suppressed
 209                                                         : V_nostat),
 210                                         fp->f_fullname);
 211                 } else {
 212                         err = reconcile(fp);
 213                         errs |= err;
 214                         if (opt_halt && (err & ERR_ABORT)) {
 215                                 fprintf(stderr, gettext(ERR_abort_h));
 216                                 aborted = TRUE;
 217                         }
 218                 }
 219 
 220         return (errs);
 221 }
 222 
 223 /*
 224  * routine:
 225  *      prune_file
 226  *
 227  * purpose:
 228  *      to look for file entries that should be pruned from baseline
 229  *      prune the current file if it needs pruning, and recursively
 230  *      descend if it is a directory.
 231  *
 232  * parameters:
 233  *      pointer to file node
 234  */
 235 static int
 236 prune_file(struct file *fp)
 237 {       struct file *cp;
 238         int prunes = 0;
 239 
 240         /* if node hasn't been evaluated, mark it for removal   */
 241         if ((fp->f_flags & (F_EVALUATE|F_STAT_ERROR)) == 0) {
 242                 fp->f_flags |= F_REMOVE;
 243                 prunes++;
 244                 if (opt_debug & DBG_ANAL)
 245                         fprintf(stderr, "ANAL: PRUNE %s\n", fp->f_name);
 246         }
 247 
 248         /* now check our children                               */
 249         for (cp = fp->f_files; cp; cp = cp->f_next)
 250                 prunes += prune_file(cp);
 251 
 252         return (prunes);
 253 }
 254 
 255 /*
 256  * routine:
 257  *      prune
 258  *
 259  * purpose:
 260  *      to prune the baseline of entries that no longer correspond to
 261  *      existing rules.
 262  *
 263  * notes:
 264  *      This routine just calls prune_file on the top of each base tree.
 265  */
 266 int
 267 prune()
 268 {       struct base *bp;
 269         struct file *fp;
 270         int prunes = 0;
 271 
 272         for (bp = bases; bp; bp = bp->b_next) {
 273                 for (fp = bp->b_files; fp; fp = fp->f_next)
 274                         prunes += prune_file(fp);
 275 
 276                 if ((bp->b_flags & F_EVALUATE) == 0)
 277                         bp->b_flags |= F_REMOVE;
 278         }
 279 
 280         return (prunes);
 281 }
 282 
 283 /*
 284  * routine:
 285  *      summary
 286  *
 287  * purpose:
 288  *      to print out statics and conflict lists
 289  */
 290 void
 291 summary()
 292 {       struct base *bp;
 293         struct file *fp;
 294         extern bool_t need_super;
 295 
 296         (void) fflush(stdout);
 297 
 298         for (bp = bases; bp; bp = bp->b_next) {
 299 
 300                 /* see if this base was irrelevant      */
 301                 if ((bp->b_flags & F_EVALUATE) == 0)
 302                         continue;
 303 
 304                 /* print out a summary for this base    */
 305                 fprintf(stderr, gettext(SUM_hd),
 306                         bp->b_src_spec, bp->b_dst_spec, bp->b_totfiles);
 307                 fprintf(stderr, gettext(SUM_dst),
 308                         bp->b_dst_copies, bp->b_dst_deletes, bp->b_dst_misc);
 309                 fprintf(stderr, gettext(SUM_src),
 310                         bp->b_src_copies, bp->b_src_deletes, bp->b_src_misc);
 311                 if (bp->b_unresolved)
 312                         fprintf(stderr, gettext(SUM_unresolved),
 313                                 bp->b_unresolved);
 314 
 315 
 316                 /* print out a list of unreconciled files for this base */
 317                 for (fp = changes; fp; fp = fp->f_rnext) {
 318                         if (fp->f_base != bp)
 319                                 continue;
 320                         if ((fp->f_flags & F_CONFLICT) == 0)
 321                                 continue;
 322                         fprintf(stderr, "\t\t%s (%s)\n", fp->f_fullname,
 323                                 fp->f_problem ? fp->f_problem : "???");
 324                 }
 325 
 326                 fprintf(stderr, "\n");
 327         }
 328 
 329         if (need_super)
 330                 fprintf(stderr, gettext(WARN_super));
 331 }
 332 
 333 /*
 334  * routine:
 335  *      check_file
 336  *
 337  * purpose:
 338  *      figure out if a file requires reconciliation and recursively
 339  *      descend into all sub-files and directories
 340  *
 341  * parameters:
 342  *      base pointer
 343  *      file pointer
 344  *
 345  * returns:
 346  *      error mask
 347  *      built up changes needed list
 348  *      updated statistics
 349  *
 350  * notes:
 351  *      this routine builds up a path name as it descends through
 352  *      the tree (see push_name, pop_name, get_name).
 353  */
 354 static errmask_t
 355 check_file(struct file *fp)
 356 {       struct file *cp;
 357         int errs = 0;
 358 
 359         if ((fp->f_flags & F_STAT_ERROR) == 0) {
 360                 /* see if the source has changed        */
 361                 fp->f_info[OPT_BASE].f_modtime       = fp->f_s_modtime;
 362                 fp->f_info[OPT_BASE].f_ino   = fp->f_s_inum;
 363                 fp->f_info[OPT_BASE].f_d_maj = fp->f_s_maj;
 364                 fp->f_info[OPT_BASE].f_d_min = fp->f_s_min;
 365                 fp->f_info[OPT_BASE].f_nlink = fp->f_s_nlink;
 366                 fp->f_srcdiffs |= check_changes(fp, OPT_BASE, OPT_SRC);
 367 
 368                 /* see if the destination has changed   */
 369                 fp->f_info[OPT_BASE].f_modtime       = fp->f_d_modtime;
 370                 fp->f_info[OPT_BASE].f_ino           = fp->f_d_inum;
 371                 fp->f_info[OPT_BASE].f_d_maj    = fp->f_d_maj;
 372                 fp->f_info[OPT_BASE].f_d_min    = fp->f_d_min;
 373                 fp->f_info[OPT_BASE].f_nlink = fp->f_d_nlink;
 374                 fp->f_dstdiffs |= check_changes(fp, OPT_BASE, OPT_DST);
 375 
 376                 /* if nobody thinks the file exists, baseline needs pruning */
 377                 if ((fp->f_flags & (F_IN_SOURCE|F_IN_DEST)) == 0) {
 378                         fp->f_srcdiffs |= D_DELETE;
 379                         fp->f_dstdiffs |= D_DELETE;
 380                 }
 381 
 382                 /* keep track of possible deletions to look for trouble */
 383                 if ((fp->f_dstdiffs | fp->f_srcdiffs) & D_DELETE) {
 384                         est_deletes++;
 385 
 386                         /* see if file is (or has been) a non-empty directory */
 387                         if (fp->f_files)
 388                                 est_rmdirs++;
 389                 }
 390         }
 391 
 392         /* if we found differences, queue the file for reconciliation   */
 393         if (fp->f_srcdiffs || fp->f_dstdiffs || fp->f_flags & F_STAT_ERROR) {
 394                 queue_file(fp);
 395 
 396                 if (opt_debug & DBG_ANAL) {
 397                         fprintf(stderr, "ANAL: src=%s",
 398                                 showflags(diffmap, fp->f_srcdiffs));
 399                         fprintf(stderr, " dst=%s",
 400                                 showflags(diffmap, fp->f_dstdiffs));
 401                         fprintf(stderr, " flgs=%s",
 402                                 showflags(fileflags, fp->f_flags));
 403                         fprintf(stderr, " name=%s\n", fp->f_fullname);
 404                 }
 405         }
 406 
 407         /* bump the total file count    */
 408         fp->f_base->b_totfiles++;
 409         total_files++;
 410 
 411         /* if this is not a directory, we're done       */
 412         if (fp->f_files == 0)
 413                 return (errs);
 414 
 415         /*
 416          * If this is a directory, we need to recursively analyze
 417          * our children, but only children who have been evaluated.
 418          * If a node has not been evaluated, then we don't have
 419          * updated stat information and there is nothing to analyze.
 420          *
 421          * we also want to run though all nodes with STAT errors
 422          * so that we can put them on the reconciliation list.
 423          * If a directory is unreadable on one side, all files
 424          * under that directory (ON BOTH SIDES) must be marked as
 425          * blocked by stat errors.
 426          */
 427         push_name(fp->f_name);
 428 
 429         for (cp = fp->f_files; cp; cp = cp->f_next) {
 430                 if (fp->f_flags & F_STAT_ERROR)
 431                         cp->f_flags |= F_STAT_ERROR;
 432                 if (cp->f_flags & (F_EVALUATE|F_STAT_ERROR))
 433                         errs |= check_file(cp);
 434         }
 435 
 436         pop_name();
 437 
 438         return (errs);
 439 }
 440 
 441 /*
 442  * routine:
 443  *      check_changes
 444  *
 445  * purpose:
 446  *      to figure out what has changed for a specific file
 447  *
 448  * parameters:
 449  *      file pointer
 450  *      the reference info
 451  *      the info to be checked for changes
 452  *
 453  * returns:
 454  *      diff mask
 455  *
 456  * notes:
 457  *      this routine doesn't pretend to understand what happened.
 458  *      it merely enumerates the ways in which the files differ.
 459  */
 460 static diffmask_t
 461 check_changes(struct file *fp, int ref, int new)
 462 {       struct fileinfo *rp, *np;
 463         int mask = 0;
 464         int type;
 465 
 466         rp = &fp->f_info[ref];
 467         np = &fp->f_info[new];
 468 
 469         if (np->f_uid != rp->f_uid)
 470                 mask |= D_UID;
 471 
 472         if (np->f_gid != rp->f_gid)
 473                 mask |= D_GID;
 474 
 475         if (np->f_mode != rp->f_mode)
 476                 mask |= D_PROT;
 477 
 478         type = np->f_type;
 479         if (type != rp->f_type) {
 480                 if (type == 0)
 481                         mask |= D_DELETE;
 482                 else if (rp->f_type == 0)
 483                         mask |= D_CREATE;
 484                 else
 485                         mask |= D_TYPE;
 486         } else if (type == S_IFBLK || type == S_IFCHR) {
 487                 /*
 488                  * for special files, we only look at the maj/min
 489                  */
 490                 if (np->f_rd_maj != rp->f_rd_maj)
 491                         mask |= D_SIZE;
 492                 if (np->f_rd_min != rp->f_rd_min)
 493                         mask |= D_SIZE;
 494         } else if (type != S_IFDIR) {
 495                 /*
 496                  * for directories, we don't look directly at
 497                  * the contents, so these fields don't mean
 498                  * anything.  If the directories have changed
 499                  * in any interesting way, we'll find it by
 500                  * walking the tree.
 501                  */
 502                 if (np->f_modtime > rp->f_modtime)
 503                         mask |= D_MTIME;
 504 
 505                 if (np->f_size != rp->f_size)
 506                         mask |= D_SIZE;
 507 
 508                 if (np->f_nlink != rp->f_nlink)
 509                         mask |= D_LINKS;
 510         }
 511 
 512         if (cmp_acls(rp, np) == 0)
 513                 mask |= D_FACLS;
 514 
 515         return (mask);
 516 }
 517 
 518 /*
 519  * routine:
 520  *      same_name
 521  *
 522  * purpose:
 523  *      to figure out whether or not two databsae nodes actually refer to
 524  *      the same file.
 525  *
 526  * parameters:
 527  *      pointers to two file description nodes
 528  *      which side we should check
 529  *
 530  * returns:
 531  *      TRUE/FALSE
 532  *
 533  * notes:
 534  *      if a single directory is specified in multiple base pairs, it
 535  *      is possible to have multiple nodes in the database describing
 536  *      the same file.  This routine is supposed to detect those cases.
 537  *
 538  *      what should be a trivial string comparison is complicated by
 539  *      the possibility that the two nodes might describe the same file
 540  *      from base directories at different depths.  Thus, rather than
 541  *      comparing two strings, we really want to compare the concatenation
 542  *      of two pairs of strings.  Unfortunately calling full_name would
 543  *      be awkward right now, so instead we have our own comparison
 544  *      routine that automatically skips from the first string to
 545  *      the second.
 546  */
 547 static bool_t
 548 same_name(struct file *f1, struct file *f2, side_t srcdst)
 549 {
 550         char *s1, *s2, *x1, *x2;
 551 
 552         if (srcdst == OPT_SRC) {
 553                 s1 = (f1->f_base)->b_src_name;
 554                 s2 = (f2->f_base)->b_src_name;
 555         } else {
 556                 s1 = (f1->f_base)->b_dst_name;
 557                 s2 = (f2->f_base)->b_dst_name;
 558         }
 559         x1 = f1->f_fullname;
 560         x2 = f2->f_fullname;
 561 
 562         /*
 563          * Compare the two names, and if they differ before they end
 564          * this is a non-match.  If they both end at the same time,
 565          * this is a match.
 566          *
 567          * The trick here is that each string is actually the logical
 568          * concatenation of two strings, and we need to automatically
 569          * wrap from the first to the second string in each pair.  There
 570          * is no requirement that the two (concatenated) strings be
 571          * broken at the same point, so we have a slightly baroque
 572          * comparsion loop.
 573          */
 574         while (*s1 && *s1 == *s2) {
 575 
 576                 /*
 577                  * strings have been identical so far, so advance the
 578                  * pointers and continue the comparison.  The trick
 579                  * is that when either string ends, we have to wrap
 580                  * over to its extension.
 581                  */
 582                 s1++; s2++;
 583                 if (*s1 && *s2)
 584                         continue;
 585 
 586                 /*
 587                  * at least one of the strings has ended.
 588                  * there is an implicit slash between the string
 589                  * and its extension, and this has to be matched
 590                  * against the other string.
 591                  */
 592                 if (*s1 != *s2) {
 593                         if (*s1 == 0 && *s2 == '/')
 594                                 s2++;
 595                         else if (*s2 == 0 && *s1 == '/')
 596                                 s1++;
 597                         else
 598                                 /* the disagreement doesn't come at a slash */
 599                                 break;
 600                 }
 601 
 602                 /*
 603                  * if either string has ended, wrap to its extension
 604                  */
 605                 if (*s1 == 0 && x1 != 0) {
 606                         s1 = x1;
 607                         x1 = 0;
 608                 }
 609                 if (*s2 == 0 && x2 != 0) {
 610                         s2 = x2;
 611                         x2 = 0;
 612                 }
 613         }
 614 
 615         return (*s1 == *s2);
 616 }
 617 
 618 /*
 619  * routine:
 620  *      find_link
 621  *
 622  * purpose:
 623  *      to figure out if there is a file to which we should
 624  *      be creating a link (rather than making a copy)
 625  *
 626  * parameters:
 627  *      file node for the file to be created (that we hope is merely a link)
 628  *      which side is to be changed (src/dst)
 629  *
 630  * return:
 631  *      0       no link is appropriate
 632  *      else    pointer to file node for link referent
 633  *
 634  * notes:
 635  *      there are a few strange heuristics in this routine and I
 636  *      wouldn't bet my soul that I got all of them right.  The general
 637  *      theory is that when a new file is created, we look to see if it
 638  *      is a link to another file on the changed side, and if it is, we
 639  *      find the corresponding file on the unchanged side.
 640  *
 641  *      cases we want to be able to handle:
 642  *          1.  one or more links are created to a prexisting file
 643  *          2.  a preexisting only link is renamed
 644  *          3.  a rename of one of multiple links to a preexisting file
 645  *          4.  a single file is created with multiple links
 646  */
 647 struct file *
 648 find_link(struct file *fp, side_t srcdst)
 649 {       struct file *lp;
 650         side_t chgside, tgtside;
 651         struct fileinfo *chgp, *tgtp, *basp, *fcp, *ftp;
 652 
 653         /* chg = side on which the change was noticed           */
 654         /* tgt = side to which the change is to be propagated   */
 655         chgside = (srcdst == OPT_SRC) ? OPT_DST : OPT_SRC;
 656         tgtside = (srcdst == OPT_SRC) ? OPT_SRC : OPT_DST;
 657         fcp = &fp->f_info[chgside];
 658         ftp = &fp->f_info[tgtside];
 659 
 660         /*
 661          * cases 1 and 3
 662          *
 663          * When a new link is created, we should be able to find
 664          * another file in the changed hierarchy that has the same
 665          * I-node number.  We expect it to be on the changed list
 666          * because the link count will have gone up or because all
 667          * of the copies are new.  If we find one, then the new file
 668          * on the receiving file should be a link to the corresponding
 669          * existing file.
 670          *
 671          * case 4
 672          *
 673          * the first link will be dealt with as a copy, but all
 674          * subsequent links should find an existing file analogous
 675          * to one of the links on the changed side, and create
 676          * corresponding links on the other side.
 677          *
 678          * in each of these cases, there should be multiple links
 679          * on the changed side.  If the linkcount on the changed
 680          * side is one, we needn't bother searching for other links.
 681          */
 682         if (fcp->f_nlink > 1)
 683         for (lp = changes; lp; lp = lp->f_rnext) {
 684                 /* finding the same node doesn't count  */
 685                 if (fp == lp)
 686                         continue;
 687 
 688                 tgtp = &lp->f_info[tgtside];
 689                 chgp = &lp->f_info[chgside];
 690 
 691                 /*
 692                  * if the file doesn't already exist on the target side
 693                  * we cannot make a link to it
 694                  */
 695                 if (tgtp->f_mode == 0)
 696                         continue;
 697 
 698                 /*
 699                  * if this is indeed a link, then the prospective file on
 700                  * the changed side will have the same dev/inum as the file
 701                  * we are looking for
 702                  */
 703                 if (fcp->f_d_maj != chgp->f_d_maj)
 704                         continue;
 705                 if (fcp->f_d_min != chgp->f_d_min)
 706                         continue;
 707                 if (fcp->f_ino != chgp->f_ino)
 708                         continue;
 709 
 710                 /*
 711                  * if the target side is already a link to this file,
 712                  * then there is no new link to be created
 713                  * FIX: how does this interact with copies over links
 714                  */
 715                 if ((ftp->f_d_maj == tgtp->f_d_maj) &&
 716                     (ftp->f_d_min == tgtp->f_d_min) &&
 717                     (ftp->f_ino   == tgtp->f_ino))
 718                         continue;
 719 
 720                 /*
 721                  * there is a pathological situation where a single file
 722                  * might appear under multiple base directories.  This is
 723                  * damned awkward to detect in any other way, so we must
 724                  * check to see if we have just found another database
 725                  * instance for the same file (on the changed side).
 726                  */
 727                 if ((fp->f_base != lp->f_base) && same_name(fp, lp, chgside))
 728                         continue;
 729 
 730                 if (opt_debug & DBG_ANAL)
 731                         fprintf(stderr, "ANAL: FIND LINK %s and %s\n",
 732                                 fp->f_fullname, lp->f_fullname);
 733 
 734                 return (lp);
 735         }
 736 
 737         /*
 738          * case 2: a simple rename of the only link
 739          *
 740          * In this case, there may not be any other existing file on
 741          * the changed side that has the same I-node number.  There
 742          * might, however, be a record of such a file in the baseline.
 743          * If we can find an identical file with a different name that
 744          * has recently disappeared, we have a likely rename.
 745          */
 746         for (lp = changes; lp; lp = lp->f_rnext) {
 747 
 748                 /* finding the same node doesn't count                  */
 749                 if (fp == lp)
 750                         continue;
 751 
 752                 tgtp = &lp->f_info[tgtside];
 753                 chgp = &lp->f_info[chgside];
 754 
 755                 /*
 756                  * if the file still exists on the changed side this is
 757                  * not a simple rename, and in fact the previous pass
 758                  * would have found it.
 759                  */
 760                 if (chgp->f_mode != 0)
 761                         continue;
 762 
 763                 /*
 764                  * the inode number for the new link on the changed
 765                  * side must match the inode number for the old link
 766                  * from the baseline.
 767                  */
 768                 if (fcp->f_d_maj != ((srcdst == OPT_SRC) ? lp->f_d_maj
 769                                                         : lp->f_s_maj))
 770                         continue;
 771                 if (fcp->f_d_min != ((srcdst == OPT_SRC) ? lp->f_d_min
 772                                                         : lp->f_s_min))
 773                         continue;
 774                 if (fcp->f_ino != ((srcdst == OPT_SRC) ? lp->f_d_inum
 775                                                         : lp->f_s_inum))
 776                         continue;
 777 
 778                 /* finding a file we are already linked to doesn't help */
 779                 if ((ftp->f_d_maj == tgtp->f_d_maj) &&
 780                     (ftp->f_d_min == tgtp->f_d_min) &&
 781                     (ftp->f_ino   == tgtp->f_ino))
 782                         continue;
 783 
 784                 /*
 785                  * there is a danger that we will confuse an
 786                  * inode reallocation with a rename.  We should
 787                  * only consider this to be a rename if the
 788                  * new file is identical to the old one
 789                  */
 790                 basp = &lp->f_info[OPT_BASE];
 791                 if (fcp->f_type != basp->f_type)
 792                         continue;
 793                 if (fcp->f_size != basp->f_size)
 794                         continue;
 795                 if (fcp->f_mode != basp->f_mode)
 796                         continue;
 797                 if (fcp->f_uid != basp->f_uid)
 798                         continue;
 799                 if (fcp->f_gid != basp->f_gid)
 800                         continue;
 801 
 802                 if (opt_debug & DBG_ANAL)
 803                         fprintf(stderr, "ANAL: FIND RENAME %s and %s\n",
 804                                 fp->f_fullname, lp->f_fullname);
 805 
 806                 return (lp);
 807         }
 808 
 809         return (0);
 810 }
 811 
 812 /*
 813  * routine:
 814  *      has_other_links
 815  *
 816  * purpose:
 817  *      to determine whether or not there is more that one link to a
 818  *      particular file.  We are willing to delete a link to a file
 819  *      that has changed if we will still have other links to it.
 820  *      The trick here is that we only care about links under our
 821  *      dominion.
 822  *
 823  * parameters:
 824  *      file pointer to node we are interested in
 825  *      which side we are looking to additional links on
 826  *
 827  * returns:
 828  *      TRUE if there are multiple links
 829  *      FALSE if this is the only one we know of
 830  */
 831 bool_t
 832 has_other_links(struct file *fp, side_t srcdst)
 833 {       struct file *lp;
 834         struct fileinfo *fip, *lip;
 835 
 836         fip = &fp->f_info[srcdst];
 837 
 838         /* if the link count is one, there couldn't be others   */
 839         if (fip->f_nlink < 2)
 840                 return (FALSE);
 841 
 842         /* look for any other files for the same inode          */
 843         for (lp = changes; lp; lp = lp->f_rnext) {
 844                 /* finding the same node doesn't count  */
 845                 if (fp == lp)
 846                         continue;
 847 
 848                 lip = &lp->f_info[srcdst];
 849 
 850                 /*
 851                  * file must still exist on this side
 852                  */
 853                 if (lip->f_mode == 0)
 854                         continue;
 855 
 856                 /*
 857                  * if this is indeed a link, then the prospective file on
 858                  * the changed side will have the same dev/inum as the file
 859                  * we are looking for
 860                  */
 861                 if (lip->f_d_maj != fip->f_d_maj)
 862                         continue;
 863                 if (lip->f_d_min != fip->f_d_min)
 864                         continue;
 865                 if (lip->f_ino != fip->f_ino)
 866                         continue;
 867 
 868                 /*
 869                  * we have found at least one other link
 870                  */
 871                 return (TRUE);
 872         }
 873 
 874         return (FALSE);
 875 }
 876 
 877 /*
 878  * routine:
 879  *      link_update
 880  *
 881  * purpose:
 882  *      to propoagate a stat change to all other file nodes that
 883  *      correspond to the same I-node on the changed side
 884  *
 885  * parameters:
 886  *      file pointer for the updated file
 887  *      which side was changed
 888  *
 889  * returns:
 890  *      void
 891  *
 892  * notes:
 893  *      if we have copied onto a file, we have copied onto all
 894  *      of its links, but since we do all stats before we do any
 895  *      copies, the stat information recently collected for links
 896  *      is no longer up-to-date, and this would result in incorrect
 897  *      reconciliation (redundant copies).
 898  *
 899  *      There is an assumption here that all links to a changed
 900  *      file will be in the change list.  This is true for almost
 901  *      all cases not involving restriction.  If we do fail to
 902  *      update the baseline for a file that was off the change list,
 903  *      the worst that is likely to happen is that we will think
 904  *      it changed later (but will almost surely find that both
 905  *      copies agree).
 906  */
 907 void
 908 link_update(struct file *fp, side_t which)
 909 {       struct file *lp;
 910 
 911         for (lp = changes; lp; lp = lp->f_rnext) {
 912                 /* finding the current entry doesn't count      */
 913                 if (lp == fp)
 914                         continue;
 915 
 916                 /* look for same i#, maj, min on changed side   */
 917                 if (lp->f_info[which].f_ino != fp->f_info[which].f_ino)
 918                         continue;
 919                 if (lp->f_info[which].f_d_maj != fp->f_info[which].f_d_maj)
 920                         continue;
 921                 if (lp->f_info[which].f_d_min != fp->f_info[which].f_d_min)
 922                         continue;
 923 
 924                 /*
 925                  * this appears to be another link to the same file
 926                  * so the updated stat information for one must be
 927                  * correct for the other.
 928                  */
 929                 lp->f_info[which].f_type     = fp->f_info[which].f_type;
 930                 lp->f_info[which].f_size     = fp->f_info[which].f_size;
 931                 lp->f_info[which].f_mode     = fp->f_info[which].f_mode;
 932                 lp->f_info[which].f_uid              = fp->f_info[which].f_uid;
 933                 lp->f_info[which].f_gid              = fp->f_info[which].f_gid;
 934                 lp->f_info[which].f_modtime  = fp->f_info[which].f_modtime;
 935                 lp->f_info[which].f_modns    = fp->f_info[which].f_modns;
 936                 lp->f_info[which].f_nlink    = fp->f_info[which].f_nlink;
 937                 lp->f_info[which].f_rd_maj   = fp->f_info[which].f_rd_maj;
 938                 lp->f_info[which].f_rd_min   = fp->f_info[which].f_rd_min;
 939 
 940                 if (opt_debug & DBG_STAT)
 941                         fprintf(stderr,
 942                                 "STAT: UPDATE LINK, file=%s, mod=%08lx.%08lx\n",
 943                                 lp->f_name, lp->f_info[which].f_modtime,
 944                                 lp->f_info[which].f_modns);
 945         }
 946 }
 947 
 948 /*
 949  * routine:
 950  *      queue_file
 951  *
 952  * purpose:
 953  *      append a file to the list of needed reconciliations
 954  *
 955  * parameters:
 956  *      pointer to file
 957  *
 958  * notes:
 959  *      when a request is appended to the reconciliation list,
 960  *      we fill in the full name.  We delayed this in hopes that
 961  *      it wouldn't be necessary (saving cycles and memory)
 962  *
 963  *      There is some funny business with modification times.
 964  *      In general, we queue files in order of the latest modification
 965  *      time so that propagations preserve relative ordering.  There
 966  *      are, however, a few important exceptions:
 967  *          1.  all directory creations happen at time zero,
 968  *              so that they are created before any files can
 969  *              be added to them.
 970  *          2.  all directory deletions happen at time infinity-depth,
 971  *              so that everything else can be removed before the
 972  *              directories themselves are removed.
 973  *          3.  all file deletions happen at time infinity-depth
 974  *              so that (in renames) the links will preceed the unlinks.
 975  */
 976 static void
 977 queue_file(struct file *fp)
 978 {       struct file **pp, *np;
 979 
 980 #define TIME_ZERO       0L              /* the earliest possible time   */
 981 #define TIME_LONG       0x7FFFFFFF      /* the latest possible time     */
 982 
 983         /*
 984          * figure out the modification time for sequencing purposes
 985          */
 986         if ((fp->f_srcdiffs|fp->f_dstdiffs) & D_DELETE) {
 987                 /*
 988                  * deletions are performed last, and depth first
 989                  */
 990                 fp->f_modtime = TIME_LONG - fp->f_depth;
 991         } else if (fp->f_info[OPT_SRC].f_type != S_IFDIR &&
 992             fp->f_info[OPT_DST].f_type != S_IFDIR) {
 993                 /*
 994                  * for most files we use the latest mod time
 995                  */
 996                 fp->f_modtime = fp->f_info[OPT_SRC].f_modtime;
 997                 fp->f_modns   = fp->f_info[OPT_SRC].f_modns;
 998                 if (fp->f_modtime < fp->f_info[OPT_DST].f_modtime) {
 999                         fp->f_modtime = fp->f_info[OPT_DST].f_modtime;
1000                         fp->f_modns   = fp->f_info[OPT_DST].f_modns;
1001                 }
1002         } else {
1003                 /*
1004                  * new directory creations need to happen before anything
1005                  * else and are automatically sequenced in traversal order
1006                  */
1007                 fp->f_modtime = TIME_ZERO;
1008         }
1009 
1010         /*
1011          * insertion is time ordered, and for equal times,
1012          * insertions is in (pre-order) traversal order
1013          */
1014         for (pp = &changes; (np = *pp) != 0; pp = &np->f_rnext) {
1015                 if (fp->f_modtime > np->f_modtime)
1016                         continue;
1017                 if (fp->f_modtime < np->f_modtime)
1018                         break;
1019                 if (fp->f_modns < np->f_modns)
1020                         break;
1021         }
1022 
1023         fp->f_fullname = strdup(get_name(fp));
1024         fp->f_rnext = np;
1025         *pp = fp;
1026 }
1027 
1028 
1029 /*
1030  * routines:
1031  *      push_name/pop_name/get_name
1032  *
1033  * purpose:
1034  *      maintain a name stack so we can form name of a particular file
1035  *      as the concatenation of all of the names between it and the
1036  *      (know to be fully qualified) base directory.
1037  *
1038  * notes:
1039  *      we go to this trouble because most files never change and
1040  *      so we don't need to associate full names with every one.
1041  *      This stack is maintained during analysis, and if we decide
1042  *      to add a file to the reconciliation list, we can use the
1043  *      stack to generate a fully qualified name at that time.
1044  *
1045  *      we compress out '/./' when we return a name.  Given that the
1046  *      stack was built by a tree walk, the only place a /./ should
1047  *      appear is at the first level after the base ... but there
1048  *      are legitimate ways for them to appear there.
1049  *
1050  *      these names can get deep, so we dynamically size our name buffer
1051  */
1052 static const char *namestack[ MAX_DEPTH + 1 ];
1053 static int namedepth = 0;
1054 static int namelen = 0;
1055 
1056 void
1057 push_name(const char *name)
1058 {
1059         namestack[ namedepth++ ] = name;
1060         namelen += 2 + strlen(name);
1061 
1062         /* make sure we don't overflow our name stack   */
1063         if (namedepth >= MAX_DEPTH) {
1064                 fprintf(stderr, gettext(ERR_deep), name);
1065                 exit(ERR_OTHER);
1066         }
1067 }
1068 
1069 void
1070 pop_name(void)
1071 {
1072         namelen -= 2 + strlen(namestack[--namedepth]);
1073         namestack[ namedepth ] = 0;
1074 
1075 #ifdef  DBG_ERRORS
1076         /* just a little sanity check here      */
1077         if (namedepth <= 0) {
1078                 if (namedepth < 0) {
1079                         fprintf(stderr, "ASSERTION FAILURE: namedepth < 0\n");
1080                         exit(ERR_OTHER);
1081                 } else if (namelen != 0) {
1082                         fprintf(stderr, "ASSERTION FAILURE: namelen != 0\n");
1083                         exit(ERR_OTHER);
1084                 }
1085         }
1086 #endif
1087 }
1088 
1089 char
1090 *get_name(struct file *fp)
1091 {       int i;
1092         static char *namebuf = 0;
1093         static int buflen = 0;
1094 
1095         /* make sure we have an adequate buffer */
1096         i = namelen + 1 + strlen(fp->f_name);
1097         if (buflen < i) {
1098                 for (buflen = MAX_PATH; buflen < i; buflen += MAX_NAME);
1099                 namebuf = (char *) realloc(namebuf, buflen);
1100         }
1101 
1102         /* assemble the name    */
1103         namebuf[0] = 0;
1104         for (i = 0; i < namedepth; i++) {
1105                 if (strcmp(namestack[i], ".")) {
1106                         strcat(namebuf, namestack[i]);
1107                         strcat(namebuf, "/");
1108                 }
1109         }
1110 
1111         strcat(namebuf, fp->f_name);
1112 
1113         return (namebuf);
1114 }