1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
  23  * Use is subject to license terms.
  24  *
  25  * Copyright 2016 RackTop Systems.
  26  */
  27 
  28 #include <sys/types.h>
  29 #include <sys/param.h>
  30 #include <sys/sysmacros.h>
  31 #include <sys/systm.h>
  32 #include <sys/time.h>
  33 #include <sys/vfs.h>
  34 #include <sys/vnode.h>
  35 #include <sys/errno.h>
  36 #include <sys/cmn_err.h>
  37 #include <sys/cred.h>
  38 #include <sys/stat.h>
  39 #include <sys/debug.h>
  40 #include <sys/policy.h>
  41 #include <sys/fs/tmpnode.h>
  42 #include <sys/fs/tmp.h>
  43 #include <sys/vtrace.h>
  44 
  45 static int tdircheckpath(struct tmpnode *, struct tmpnode *, struct cred *);
  46 static int tdirrename(struct tmpnode *, struct tmpnode *, struct tmpnode *,
  47         char *, struct tmpnode *, struct tdirent *, struct cred *);
  48 static void tdirfixdotdot(struct tmpnode *, struct tmpnode *, struct tmpnode *);
  49 static int tdirmaketnode(struct tmpnode *, struct tmount *, struct vattr *,
  50         enum de_op, struct tmpnode **, struct cred *);
  51 static int tdiraddentry(struct tmpnode *, struct tmpnode *, char *,
  52         enum de_op, struct tmpnode *);
  53 
  54 
  55 #define T_HASH_SIZE     8192            /* must be power of 2 */
  56 #define T_MUTEX_SIZE    64
  57 
  58 static struct tdirent   *t_hashtable[T_HASH_SIZE];
  59 static kmutex_t          t_hashmutex[T_MUTEX_SIZE];
  60 
  61 #define T_HASH_INDEX(a)         ((a) & (T_HASH_SIZE-1))
  62 #define T_MUTEX_INDEX(a)        ((a) & (T_MUTEX_SIZE-1))
  63 
  64 #define TMPFS_HASH(tp, name, hash)                              \
  65         {                                                       \
  66                 char Xc, *Xcp;                                  \
  67                 hash = (uint_t)(uintptr_t)(tp) >> 8;              \
  68                 for (Xcp = (name); (Xc = *Xcp) != 0; Xcp++)     \
  69                         hash = (hash << 4) + hash + (uint_t)Xc;   \
  70         }
  71 
  72 void
  73 tmpfs_hash_init(void)
  74 {
  75         int     ix;
  76 
  77         for (ix = 0; ix < T_MUTEX_SIZE; ix++)
  78                 mutex_init(&t_hashmutex[ix], NULL, MUTEX_DEFAULT, NULL);
  79 }
  80 
  81 /*
  82  * This routine is where the rubber meets the road for identities.
  83  */
  84 static void
  85 tmpfs_hash_in(struct tdirent *t)
  86 {
  87         uint_t          hash;
  88         struct tdirent  **prevpp;
  89         kmutex_t        *t_hmtx;
  90 
  91         TMPFS_HASH(t->td_parent, t->td_name, hash);
  92         t->td_hash = hash;
  93         prevpp = &t_hashtable[T_HASH_INDEX(hash)];
  94         t_hmtx = &t_hashmutex[T_MUTEX_INDEX(hash)];
  95         mutex_enter(t_hmtx);
  96         t->td_link = *prevpp;
  97         *prevpp = t;
  98         mutex_exit(t_hmtx);
  99 }
 100 
 101 /*
 102  * Remove tdirent *t from the hash list.
 103  */
 104 static void
 105 tmpfs_hash_out(struct tdirent *t)
 106 {
 107         uint_t          hash;
 108         struct tdirent  **prevpp;
 109         kmutex_t        *t_hmtx;
 110 
 111         hash = t->td_hash;
 112         prevpp = &t_hashtable[T_HASH_INDEX(hash)];
 113         t_hmtx = &t_hashmutex[T_MUTEX_INDEX(hash)];
 114         mutex_enter(t_hmtx);
 115         while (*prevpp != t)
 116                 prevpp = &(*prevpp)->td_link;
 117         *prevpp = t->td_link;
 118         mutex_exit(t_hmtx);
 119 }
 120 
 121 /*
 122  * Currently called by tdirrename() only.
 123  * rename operation needs to be done with lock held, to ensure that
 124  * no other operations can access the tmpnode at the same instance.
 125  */
 126 static void
 127 tmpfs_hash_change(struct tdirent *tdp, struct tmpnode *fromtp)
 128 {
 129         uint_t          hash;
 130         kmutex_t        *t_hmtx;
 131 
 132         hash = tdp->td_hash;
 133         t_hmtx = &t_hashmutex[T_MUTEX_INDEX(hash)];
 134         mutex_enter(t_hmtx);
 135         tdp->td_tmpnode = fromtp;
 136         mutex_exit(t_hmtx);
 137 }
 138 
 139 static struct tdirent *
 140 tmpfs_hash_lookup(char *name, struct tmpnode *parent, uint_t hold,
 141         struct tmpnode **found)
 142 {
 143         struct tdirent  *l;
 144         uint_t          hash;
 145         kmutex_t        *t_hmtx;
 146         struct tmpnode  *tnp;
 147 
 148         TMPFS_HASH(parent, name, hash);
 149         t_hmtx = &t_hashmutex[T_MUTEX_INDEX(hash)];
 150         mutex_enter(t_hmtx);
 151         l = t_hashtable[T_HASH_INDEX(hash)];
 152         while (l) {
 153                 if ((l->td_hash == hash) &&
 154                     (l->td_parent == parent) &&
 155                     (strcmp(l->td_name, name) == 0)) {
 156                         /*
 157                          * We need to make sure that the tmpnode that
 158                          * we put a hold on is the same one that we pass back.
 159                          * Hence, temporary variable tnp is necessary.
 160                          */
 161                         tnp = l->td_tmpnode;
 162                         if (hold) {
 163                                 ASSERT(tnp);
 164                                 tmpnode_hold(tnp);
 165                         }
 166                         if (found)
 167                                 *found = tnp;
 168                         mutex_exit(t_hmtx);
 169                         return (l);
 170                 } else {
 171                         l = l->td_link;
 172                 }
 173         }
 174         mutex_exit(t_hmtx);
 175         return (NULL);
 176 }
 177 
 178 /*
 179  * Search directory 'parent' for entry 'name'.
 180  *
 181  * The calling thread can't hold the write version
 182  * of the rwlock for the directory being searched
 183  *
 184  * 0 is returned on success and *foundtp points
 185  * to the found tmpnode with its vnode held.
 186  */
 187 int
 188 tdirlookup(
 189         struct tmpnode *parent,
 190         char *name,
 191         struct tmpnode **foundtp,
 192         struct cred *cred)
 193 {
 194         int error;
 195 
 196         *foundtp = NULL;
 197         if (parent->tn_type != VDIR)
 198                 return (ENOTDIR);
 199 
 200         if ((error = tmp_taccess(parent, VEXEC, cred)))
 201                 return (error);
 202 
 203         if (*name == '\0') {
 204                 tmpnode_hold(parent);
 205                 *foundtp = parent;
 206                 return (0);
 207         }
 208 
 209         /*
 210          * Search the directory for the matching name
 211          * We need the lock protecting the tn_dir list
 212          * so that it doesn't change out from underneath us.
 213          * tmpfs_hash_lookup() will pass back the tmpnode
 214          * with a hold on it.
 215          */
 216 
 217         if (tmpfs_hash_lookup(name, parent, 1, foundtp) != NULL) {
 218                 ASSERT(*foundtp);
 219                 return (0);
 220         }
 221 
 222         return (ENOENT);
 223 }
 224 
 225 /*
 226  * Enter a directory entry for 'name' and 'tp' into directory 'dir'
 227  *
 228  * Returns 0 on success.
 229  */
 230 int
 231 tdirenter(
 232         struct tmount   *tm,
 233         struct tmpnode  *dir,           /* target directory to make entry in */
 234         char            *name,          /* name of entry */
 235         enum de_op      op,             /* entry operation */
 236         struct tmpnode  *fromparent,    /* source directory if rename */
 237         struct tmpnode  *tp,            /* source tmpnode, if link/rename */
 238         struct vattr    *va,
 239         struct tmpnode  **tpp,          /* return tmpnode, if create/mkdir */
 240         struct cred     *cred,
 241         caller_context_t *ctp)
 242 {
 243         struct tdirent *tdp;
 244         struct tmpnode *found = NULL;
 245         int error = 0;
 246         char *s;
 247 
 248         /*
 249          * tn_rwlock is held to serialize direnter and dirdeletes
 250          */
 251         ASSERT(RW_WRITE_HELD(&dir->tn_rwlock));
 252         ASSERT(dir->tn_type == VDIR);
 253 
 254         /*
 255          * Don't allow '/' characters in pathname component
 256          * (thus in ufs_direnter()).
 257          */
 258         for (s = name; *s; s++)
 259                 if (*s == '/')
 260                         return (EACCES);
 261 
 262         if (name[0] == '\0')
 263                 panic("tdirenter: NULL name");
 264 
 265         /*
 266          * For link and rename lock the source entry and check the link count
 267          * to see if it has been removed while it was unlocked.
 268          */
 269         if (op == DE_LINK || op == DE_RENAME) {
 270                 if (tp != dir)
 271                         rw_enter(&tp->tn_rwlock, RW_WRITER);
 272                 mutex_enter(&tp->tn_tlock);
 273                 if (tp->tn_nlink == 0) {
 274                         mutex_exit(&tp->tn_tlock);
 275                         if (tp != dir)
 276                                 rw_exit(&tp->tn_rwlock);
 277                         return (ENOENT);
 278                 }
 279 
 280                 if (tp->tn_nlink == MAXLINK) {
 281                         mutex_exit(&tp->tn_tlock);
 282                         if (tp != dir)
 283                                 rw_exit(&tp->tn_rwlock);
 284                         return (EMLINK);
 285                 }
 286                 tp->tn_nlink++;
 287                 gethrestime(&tp->tn_ctime);
 288                 mutex_exit(&tp->tn_tlock);
 289                 if (tp != dir)
 290                         rw_exit(&tp->tn_rwlock);
 291         }
 292 
 293         /*
 294          * This might be a "dangling detached directory".
 295          * it could have been removed, but a reference
 296          * to it kept in u_cwd.  don't bother searching
 297          * it, and with any luck the user will get tired
 298          * of dealing with us and cd to some absolute
 299          * pathway.  *sigh*, thus in ufs, too.
 300          */
 301         if (dir->tn_nlink == 0) {
 302                 error = ENOENT;
 303                 goto out;
 304         }
 305 
 306         /*
 307          * If this is a rename of a directory and the parent is
 308          * different (".." must be changed), then the source
 309          * directory must not be in the directory hierarchy
 310          * above the target, as this would orphan everything
 311          * below the source directory.
 312          */
 313         if (op == DE_RENAME) {
 314                 if (tp == dir) {
 315                         error = EINVAL;
 316                         goto out;
 317                 }
 318                 if (tp->tn_type == VDIR) {
 319                         if ((fromparent != dir) &&
 320                             (error = tdircheckpath(tp, dir, cred))) {
 321                                 goto out;
 322                         }
 323                 }
 324         }
 325 
 326         /*
 327          * Search for the entry.  Return "found" if it exists.
 328          */
 329         tdp = tmpfs_hash_lookup(name, dir, 1, &found);
 330 
 331         if (tdp) {
 332                 ASSERT(found);
 333                 switch (op) {
 334                 case DE_CREATE:
 335                 case DE_MKDIR:
 336                         if (tpp) {
 337                                 *tpp = found;
 338                                 error = EEXIST;
 339                         } else {
 340                                 tmpnode_rele(found);
 341                         }
 342                         break;
 343 
 344                 case DE_RENAME:
 345                         error = tdirrename(fromparent, tp,
 346                             dir, name, found, tdp, cred);
 347                         if (error == 0) {
 348                                 if (found != NULL) {
 349                                         vnevent_rename_dest(TNTOV(found),
 350                                             TNTOV(dir), name, ctp);
 351                                 }
 352                         }
 353 
 354                         tmpnode_rele(found);
 355                         break;
 356 
 357                 case DE_LINK:
 358                         /*
 359                          * Can't link to an existing file.
 360                          */
 361                         error = EEXIST;
 362                         tmpnode_rele(found);
 363                         break;
 364                 }
 365         } else {
 366 
 367                 /*
 368                  * The entry does not exist. Check write permission in
 369                  * directory to see if entry can be created.
 370                  */
 371                 if (error = tmp_taccess(dir, VWRITE, cred))
 372                         goto out;
 373                 if (op == DE_CREATE || op == DE_MKDIR) {
 374                         /*
 375                          * Make new tmpnode and directory entry as required.
 376                          */
 377                         error = tdirmaketnode(dir, tm, va, op, &tp, cred);
 378                         if (error)
 379                                 goto out;
 380                 }
 381                 if (error = tdiraddentry(dir, tp, name, op, fromparent)) {
 382                         if (op == DE_CREATE || op == DE_MKDIR) {
 383                                 /*
 384                                  * Unmake the inode we just made.
 385                                  */
 386                                 rw_enter(&tp->tn_rwlock, RW_WRITER);
 387                                 if ((tp->tn_type) == VDIR) {
 388                                         ASSERT(tdp == NULL);
 389                                         /*
 390                                          * cleanup allocs made by tdirinit()
 391                                          */
 392                                         tdirtrunc(tp);
 393                                 }
 394                                 mutex_enter(&tp->tn_tlock);
 395                                 tp->tn_nlink = 0;
 396                                 mutex_exit(&tp->tn_tlock);
 397                                 gethrestime(&tp->tn_ctime);
 398                                 rw_exit(&tp->tn_rwlock);
 399                                 tmpnode_rele(tp);
 400                                 tp = NULL;
 401                         }
 402                 } else if (tpp) {
 403                         *tpp = tp;
 404                 } else if (op == DE_CREATE || op == DE_MKDIR) {
 405                         tmpnode_rele(tp);
 406                 }
 407         }
 408 
 409 out:
 410         if (error && (op == DE_LINK || op == DE_RENAME)) {
 411                 /*
 412                  * Undo bumped link count.
 413                  */
 414                 DECR_COUNT(&tp->tn_nlink, &tp->tn_tlock);
 415                 gethrestime(&tp->tn_ctime);
 416         }
 417         return (error);
 418 }
 419 
 420 /*
 421  * Delete entry tp of name "nm" from dir.
 422  * Free dir entry space and decrement link count on tmpnode(s).
 423  *
 424  * Return 0 on success.
 425  */
 426 int
 427 tdirdelete(
 428         struct tmpnode *dir,
 429         struct tmpnode *tp,
 430         char *nm,
 431         enum dr_op op,
 432         struct cred *cred)
 433 {
 434         struct tdirent *tpdp;
 435         int error;
 436         size_t namelen;
 437         struct tmpnode *tnp;
 438         timestruc_t now;
 439 
 440         ASSERT(RW_WRITE_HELD(&dir->tn_rwlock));
 441         ASSERT(RW_WRITE_HELD(&tp->tn_rwlock));
 442         ASSERT(dir->tn_type == VDIR);
 443 
 444         if (nm[0] == '\0')
 445                 panic("tdirdelete: NULL name for %p", (void *)tp);
 446 
 447         /*
 448          * return error when removing . and ..
 449          */
 450         if (nm[0] == '.') {
 451                 if (nm[1] == '\0')
 452                         return (EINVAL);
 453                 if (nm[1] == '.' && nm[2] == '\0')
 454                         return (EEXIST); /* thus in ufs */
 455         }
 456 
 457         if (error = tmp_taccess(dir, VEXEC|VWRITE, cred))
 458                 return (error);
 459 
 460         /*
 461          * If the parent directory is "sticky", then the user must
 462          * own the parent directory or the file in it, or else must
 463          * have permission to write the file.  Otherwise it may not
 464          * be deleted (except by privileged users).
 465          * Same as ufs_dirremove.
 466          */
 467         if ((error = tmp_sticky_remove_access(dir, tp, cred)) != 0)
 468                 return (error);
 469 
 470         if (dir->tn_dir == NULL)
 471                 return (ENOENT);
 472 
 473         tpdp = tmpfs_hash_lookup(nm, dir, 0, &tnp);
 474         if (tpdp == NULL) {
 475                 /*
 476                  * If it is gone, some other thread got here first!
 477                  * Return error ENOENT.
 478                  */
 479                 return (ENOENT);
 480         }
 481 
 482         /*
 483          * If the tmpnode in the tdirent changed, we were probably
 484          * the victim of a concurrent rename operation.  The original
 485          * is gone, so return that status (same as UFS).
 486          */
 487         if (tp != tnp)
 488                 return (ENOENT);
 489 
 490         tmpfs_hash_out(tpdp);
 491 
 492         /*
 493          * Take tpdp out of the directory list.
 494          */
 495         ASSERT(tpdp->td_next != tpdp);
 496         ASSERT(tpdp->td_prev != tpdp);
 497         if (tpdp->td_prev) {
 498                 tpdp->td_prev->td_next = tpdp->td_next;
 499         }
 500         if (tpdp->td_next) {
 501                 tpdp->td_next->td_prev = tpdp->td_prev;
 502         }
 503 
 504         /*
 505          * If the roving slot pointer happens to match tpdp,
 506          * point it at the previous dirent.
 507          */
 508         if (dir->tn_dir->td_prev == tpdp) {
 509                 dir->tn_dir->td_prev = tpdp->td_prev;
 510         }
 511         ASSERT(tpdp->td_next != tpdp);
 512         ASSERT(tpdp->td_prev != tpdp);
 513 
 514         /*
 515          * tpdp points to the correct directory entry
 516          */
 517         namelen = strlen(tpdp->td_name) + 1;
 518 
 519         tmp_memfree(tpdp, sizeof (struct tdirent) + namelen);
 520         dir->tn_size -= (sizeof (struct tdirent) + namelen);
 521         dir->tn_dirents--;
 522 
 523         gethrestime(&now);
 524         dir->tn_mtime = now;
 525         dir->tn_ctime = now;
 526         tp->tn_ctime = now;
 527 
 528         /*
 529          * If this is a _REMOVE (unlink) operation there may
 530          * be other links to the directory entry.
 531          */
 532         ASSERT(tp->tn_nlink > 0);
 533         DECR_COUNT(&tp->tn_nlink, &tp->tn_tlock);
 534         if (op == DR_RMDIR || (op == DR_REMOVE && tp->tn_type == VDIR)) {
 535                 if (tp->tn_nlink > 1) {
 536                         ASSERT(op == DR_REMOVE);
 537                 } else {
 538                         tdirtrunc(tp);
 539                         ASSERT(tp->tn_nlink == 0);
 540                 }
 541         }
 542         return (0);
 543 }
 544 
 545 /*
 546  * tdirinit is used internally to initialize a directory (dir)
 547  * with '.' and '..' entries without checking permissions and locking
 548  */
 549 void
 550 tdirinit(
 551         struct tmpnode *parent,         /* parent of directory to initialize */
 552         struct tmpnode *dir)            /* the new directory */
 553 {
 554         struct tdirent *dot, *dotdot;
 555         timestruc_t now;
 556 
 557         ASSERT(RW_WRITE_HELD(&parent->tn_rwlock));
 558         ASSERT(dir->tn_type == VDIR);
 559 
 560         dot = tmp_memalloc(sizeof (struct tdirent) + 2, TMP_MUSTHAVE);
 561         dotdot = tmp_memalloc(sizeof (struct tdirent) + 3, TMP_MUSTHAVE);
 562 
 563         /*
 564          * Initialize the entries
 565          */
 566         dot->td_tmpnode = dir;
 567         dot->td_offset = 0;
 568         dot->td_name = (char *)dot + sizeof (struct tdirent);
 569         dot->td_name[0] = '.';
 570         dot->td_parent = dir;
 571         tmpfs_hash_in(dot);
 572 
 573         dotdot->td_tmpnode = parent;
 574         dotdot->td_offset = 1;
 575         dotdot->td_name = (char *)dotdot + sizeof (struct tdirent);
 576         dotdot->td_name[0] = '.';
 577         dotdot->td_name[1] = '.';
 578         dotdot->td_parent = dir;
 579         tmpfs_hash_in(dotdot);
 580 
 581         /*
 582          * Initialize directory entry list.
 583          */
 584         dot->td_next = dotdot;
 585         dot->td_prev = dotdot;       /* dot's td_prev holds roving slot pointer */
 586         dotdot->td_next = NULL;
 587         dotdot->td_prev = dot;
 588 
 589         gethrestime(&now);
 590         dir->tn_mtime = now;
 591         dir->tn_ctime = now;
 592 
 593         /*
 594          * Link counts are special for the hidden attribute directory.
 595          * The only explicit reference in the name space is "." and
 596          * the reference through ".." is not counted on the parent
 597          * file. The attrdir is created as a side effect to lookup,
 598          * so don't change the ctime of the parent.
 599          * Since tdirinit is called with both dir and parent being the
 600          * same for the root vnode, we need to increment this before we set
 601          * tn_nlink = 2 below.
 602          */
 603         if (!(dir->tn_vnode->v_flag & V_XATTRDIR)) {
 604                 INCR_COUNT(&parent->tn_nlink, &parent->tn_tlock);
 605                 parent->tn_ctime = now;
 606         }
 607 
 608         dir->tn_dir = dot;
 609         dir->tn_size = 2 * sizeof (struct tdirent) + 5;      /* dot and dotdot */
 610         dir->tn_dirents = 2;
 611         dir->tn_nlink = 2;
 612 }
 613 
 614 
 615 /*
 616  * tdirtrunc is called to remove all directory entries under this directory.
 617  */
 618 void
 619 tdirtrunc(struct tmpnode *dir)
 620 {
 621         struct tdirent *tdp;
 622         struct tmpnode *tp;
 623         size_t namelen;
 624         timestruc_t now;
 625         int isvattrdir, isdotdot, skip_decr;
 626         int lock_held;
 627 
 628         ASSERT(RW_WRITE_HELD(&dir->tn_rwlock));
 629         ASSERT(dir->tn_type == VDIR);
 630 
 631         isvattrdir = (dir->tn_vnode->v_flag & V_XATTRDIR) ? 1 : 0;
 632         for (tdp = dir->tn_dir; tdp; tdp = dir->tn_dir) {
 633                 ASSERT(tdp->td_next != tdp);
 634                 ASSERT(tdp->td_prev != tdp);
 635                 ASSERT(tdp->td_tmpnode);
 636 
 637                 dir->tn_dir = tdp->td_next;
 638                 namelen = strlen(tdp->td_name) + 1;
 639 
 640                 /*
 641                  * Adjust the link counts to account for this directory
 642                  * entry removal. Hidden attribute directories may
 643                  * not be empty as they may be truncated as a side-
 644                  * effect of removing the parent. We do hold/rele
 645                  * operations to free up these tmpnodes.
 646                  *
 647                  * Skip the link count adjustment for parents of
 648                  * attribute directories as those link counts
 649                  * do not include the ".." reference in the hidden
 650                  * directories.
 651                  */
 652                 tp = tdp->td_tmpnode;
 653                 isdotdot = (strcmp("..", tdp->td_name) == 0);
 654                 skip_decr = (isvattrdir && isdotdot);
 655                 if (!skip_decr) {
 656                         ASSERT(tp->tn_nlink > 0);
 657                         DECR_COUNT(&tp->tn_nlink, &tp->tn_tlock);
 658                 }
 659 
 660                 tmpfs_hash_out(tdp);
 661 
 662                 tmp_memfree(tdp, sizeof (struct tdirent) + namelen);
 663                 dir->tn_size -= (sizeof (struct tdirent) + namelen);
 664                 dir->tn_dirents--;
 665 
 666                 /*
 667                  * This directory entry may itself be a directory with
 668                  * entries and removing it may have created orphans.
 669                  * On a normal filesystem like UFS this wouldn't be
 670                  * a huge problem because fcsk can reclaim them.  For
 671                  * TMPFS which resides in RAM however, it means we
 672                  * end up leaking memory.
 673                  *
 674                  * To avoid this we also truncate child directories,
 675                  * but only if they have no other links to them.
 676                  */
 677                 if (!isdotdot && tp->tn_type == VDIR && tp != dir) {
 678                         if (tp->tn_nlink > 1)
 679                                 continue;
 680                         lock_held = RW_WRITE_HELD(&tp->tn_rwlock);
 681                         if (!lock_held)
 682                                 rw_enter(&tp->tn_rwlock, RW_WRITER);
 683                         tdirtrunc(tp);
 684                         if (!lock_held)
 685                                 rw_exit(&tp->tn_rwlock);
 686                         ASSERT(tp->tn_nlink == 0);
 687                 }
 688         }
 689 
 690         gethrestime(&now);
 691         dir->tn_mtime = now;
 692         dir->tn_ctime = now;
 693 
 694         ASSERT(dir->tn_dir == NULL);
 695         ASSERT(dir->tn_size == 0);
 696         ASSERT(dir->tn_dirents == 0);
 697 }
 698 
 699 /*
 700  * Check if the source directory is in the path of the target directory.
 701  * The target directory is locked by the caller.
 702  *
 703  * XXX - The source and target's should be different upon entry.
 704  */
 705 static int
 706 tdircheckpath(
 707         struct tmpnode *fromtp,
 708         struct tmpnode  *toparent,
 709         struct cred     *cred)
 710 {
 711         int     error = 0;
 712         struct tmpnode *dir, *dotdot;
 713         struct tdirent *tdp;
 714 
 715         ASSERT(RW_WRITE_HELD(&toparent->tn_rwlock));
 716 
 717         tdp = tmpfs_hash_lookup("..", toparent, 1, &dotdot);
 718         if (tdp == NULL)
 719                 return (ENOENT);
 720 
 721         ASSERT(dotdot);
 722 
 723         if (dotdot == toparent) {
 724                 /* root of fs.  search trivially satisfied. */
 725                 tmpnode_rele(dotdot);
 726                 return (0);
 727         }
 728         for (;;) {
 729                 /*
 730                  * Return error for cases like "mv c c/d",
 731                  * "mv c c/d/e" and so on.
 732                  */
 733                 if (dotdot == fromtp) {
 734                         tmpnode_rele(dotdot);
 735                         error = EINVAL;
 736                         break;
 737                 }
 738                 dir = dotdot;
 739                 error = tdirlookup(dir, "..", &dotdot, cred);
 740                 if (error) {
 741                         tmpnode_rele(dir);
 742                         break;
 743                 }
 744                 /*
 745                  * We're okay if we traverse the directory tree up to
 746                  * the root directory and don't run into the
 747                  * parent directory.
 748                  */
 749                 if (dir == dotdot) {
 750                         tmpnode_rele(dir);
 751                         tmpnode_rele(dotdot);
 752                         break;
 753                 }
 754                 tmpnode_rele(dir);
 755         }
 756         return (error);
 757 }
 758 
 759 static int
 760 tdirrename(
 761         struct tmpnode *fromparent,     /* parent directory of source */
 762         struct tmpnode *fromtp,         /* source tmpnode */
 763         struct tmpnode *toparent,       /* parent directory of target */
 764         char *nm,                       /* entry we are trying to change */
 765         struct tmpnode *to,             /* target tmpnode */
 766         struct tdirent *where,          /* target tmpnode directory entry */
 767         struct cred *cred)              /* credentials */
 768 {
 769         int error = 0;
 770         int doingdirectory;
 771         timestruc_t now;
 772 
 773 #if defined(lint)
 774         nm = nm;
 775 #endif
 776         ASSERT(RW_WRITE_HELD(&toparent->tn_rwlock));
 777 
 778         /*
 779          * Short circuit rename of something to itself.
 780          */
 781         if (fromtp == to)
 782                 return (ESAME);         /* special KLUDGE error code */
 783 
 784         rw_enter(&fromtp->tn_rwlock, RW_READER);
 785         rw_enter(&to->tn_rwlock, RW_READER);
 786 
 787         /*
 788          * Check that everything is on the same filesystem.
 789          */
 790         if (to->tn_vnode->v_vfsp != toparent->tn_vnode->v_vfsp ||
 791             to->tn_vnode->v_vfsp != fromtp->tn_vnode->v_vfsp) {
 792                 error = EXDEV;
 793                 goto out;
 794         }
 795 
 796         /*
 797          * Must have write permission to rewrite target entry.
 798          * Check for stickyness.
 799          */
 800         if ((error = tmp_taccess(toparent, VWRITE, cred)) != 0 ||
 801             (error = tmp_sticky_remove_access(toparent, to, cred)) != 0)
 802                 goto out;
 803 
 804         /*
 805          * Ensure source and target are compatible (both directories
 806          * or both not directories).  If target is a directory it must
 807          * be empty and have no links to it; in addition it must not
 808          * be a mount point, and both the source and target must be
 809          * writable.
 810          */
 811         doingdirectory = (fromtp->tn_type == VDIR);
 812         if (to->tn_type == VDIR) {
 813                 if (!doingdirectory) {
 814                         error = EISDIR;
 815                         goto out;
 816                 }
 817                 /*
 818                  * vn_vfswlock will prevent mounts from using the directory
 819                  * until we are done.
 820                  */
 821                 if (vn_vfswlock(TNTOV(to))) {
 822                         error = EBUSY;
 823                         goto out;
 824                 }
 825                 if (vn_mountedvfs(TNTOV(to)) != NULL) {
 826                         vn_vfsunlock(TNTOV(to));
 827                         error = EBUSY;
 828                         goto out;
 829                 }
 830 
 831                 mutex_enter(&to->tn_tlock);
 832                 if (to->tn_dirents > 2 || to->tn_nlink > 2) {
 833                         mutex_exit(&to->tn_tlock);
 834                         vn_vfsunlock(TNTOV(to));
 835                         error = EEXIST; /* SIGH should be ENOTEMPTY */
 836                         /*
 837                          * Update atime because checking tn_dirents is
 838                          * logically equivalent to reading the directory
 839                          */
 840                         gethrestime(&to->tn_atime);
 841                         goto out;
 842                 }
 843                 mutex_exit(&to->tn_tlock);
 844         } else if (doingdirectory) {
 845                 error = ENOTDIR;
 846                 goto out;
 847         }
 848 
 849         tmpfs_hash_change(where, fromtp);
 850         gethrestime(&now);
 851         toparent->tn_mtime = now;
 852         toparent->tn_ctime = now;
 853 
 854         /*
 855          * Upgrade to write lock on "to" (i.e., the target tmpnode).
 856          */
 857         rw_exit(&to->tn_rwlock);
 858         rw_enter(&to->tn_rwlock, RW_WRITER);
 859 
 860         /*
 861          * Decrement the link count of the target tmpnode.
 862          */
 863         DECR_COUNT(&to->tn_nlink, &to->tn_tlock);
 864         to->tn_ctime = now;
 865 
 866         if (doingdirectory) {
 867                 /*
 868                  * The entry for "to" no longer exists so release the vfslock.
 869                  */
 870                 vn_vfsunlock(TNTOV(to));
 871 
 872                 /*
 873                  * Decrement the target link count and delete all entires.
 874                  */
 875                 tdirtrunc(to);
 876                 ASSERT(to->tn_nlink == 0);
 877 
 878                 /*
 879                  * Renaming a directory with the parent different
 880                  * requires that ".." be rewritten.  The window is
 881                  * still there for ".." to be inconsistent, but this
 882                  * is unavoidable, and a lot shorter than when it was
 883                  * done in a user process.
 884                  */
 885                 if (fromparent != toparent)
 886                         tdirfixdotdot(fromtp, fromparent, toparent);
 887         }
 888 out:
 889         rw_exit(&to->tn_rwlock);
 890         rw_exit(&fromtp->tn_rwlock);
 891         return (error);
 892 }
 893 
 894 static void
 895 tdirfixdotdot(
 896         struct tmpnode  *fromtp,        /* child directory */
 897         struct tmpnode  *fromparent,    /* old parent directory */
 898         struct tmpnode  *toparent)      /* new parent directory */
 899 {
 900         struct tdirent  *dotdot;
 901 
 902         ASSERT(RW_LOCK_HELD(&toparent->tn_rwlock));
 903 
 904         /*
 905          * Increment the link count in the new parent tmpnode
 906          */
 907         INCR_COUNT(&toparent->tn_nlink, &toparent->tn_tlock);
 908         gethrestime(&toparent->tn_ctime);
 909 
 910         dotdot = tmpfs_hash_lookup("..", fromtp, 0, NULL);
 911 
 912         ASSERT(dotdot->td_tmpnode == fromparent);
 913         dotdot->td_tmpnode = toparent;
 914 
 915         /*
 916          * Decrement the link count of the old parent tmpnode.
 917          * If fromparent is NULL, then this is a new directory link;
 918          * it has no parent, so we need not do anything.
 919          */
 920         if (fromparent != NULL) {
 921                 mutex_enter(&fromparent->tn_tlock);
 922                 if (fromparent->tn_nlink != 0) {
 923                         fromparent->tn_nlink--;
 924                         gethrestime(&fromparent->tn_ctime);
 925                 }
 926                 mutex_exit(&fromparent->tn_tlock);
 927         }
 928 }
 929 
 930 static int
 931 tdiraddentry(
 932         struct tmpnode  *dir,   /* target directory to make entry in */
 933         struct tmpnode  *tp,    /* new tmpnode */
 934         char            *name,
 935         enum de_op      op,
 936         struct tmpnode  *fromtp)
 937 {
 938         struct tdirent *tdp, *tpdp;
 939         size_t          namelen, alloc_size;
 940         timestruc_t     now;
 941 
 942         /*
 943          * Make sure the parent directory wasn't removed from
 944          * underneath the caller.
 945          */
 946         if (dir->tn_dir == NULL)
 947                 return (ENOENT);
 948 
 949         /*
 950          * Check that everything is on the same filesystem.
 951          */
 952         if (tp->tn_vnode->v_vfsp != dir->tn_vnode->v_vfsp)
 953                 return (EXDEV);
 954 
 955         /*
 956          * Allocate and initialize directory entry
 957          */
 958         namelen = strlen(name) + 1;
 959         alloc_size = namelen + sizeof (struct tdirent);
 960         tdp = tmp_memalloc(alloc_size, 0);
 961         if (tdp == NULL)
 962                 return (ENOSPC);
 963 
 964         if ((op == DE_RENAME) && (tp->tn_type == VDIR))
 965                 tdirfixdotdot(tp, fromtp, dir);
 966 
 967         dir->tn_size += alloc_size;
 968         dir->tn_dirents++;
 969         tdp->td_tmpnode = tp;
 970         tdp->td_parent = dir;
 971 
 972         /*
 973          * The directory entry and its name were allocated sequentially.
 974          */
 975         tdp->td_name = (char *)tdp + sizeof (struct tdirent);
 976         (void) strcpy(tdp->td_name, name);
 977 
 978         tmpfs_hash_in(tdp);
 979 
 980         /*
 981          * Some utilities expect the size of a directory to remain
 982          * somewhat static.  For example, a routine which unlinks
 983          * files between calls to readdir(); the size of the
 984          * directory changes from underneath it and so the real
 985          * directory offset in bytes is invalid.  To circumvent
 986          * this problem, we initialize a directory entry with an
 987          * phony offset, and use this offset to determine end of
 988          * file in tmp_readdir.
 989          */
 990         tpdp = dir->tn_dir->td_prev;
 991         /*
 992          * Install at first empty "slot" in directory list.
 993          */
 994         while (tpdp->td_next != NULL && (tpdp->td_next->td_offset -
 995             tpdp->td_offset) <= 1) {
 996                 ASSERT(tpdp->td_next != tpdp);
 997                 ASSERT(tpdp->td_prev != tpdp);
 998                 ASSERT(tpdp->td_next->td_offset > tpdp->td_offset);
 999                 tpdp = tpdp->td_next;
1000         }
1001         tdp->td_offset = tpdp->td_offset + 1;
1002 
1003         /*
1004          * If we're at the end of the dirent list and the offset (which
1005          * is necessarily the largest offset in this directory) is more
1006          * than twice the number of dirents, that means the directory is
1007          * 50% holes.  At this point we reset the slot pointer back to
1008          * the beginning of the directory so we start using the holes.
1009          * The idea is that if there are N dirents, there must also be
1010          * N holes, so we can satisfy the next N creates by walking at
1011          * most 2N entries; thus the average cost of a create is constant.
1012          * Note that we use the first dirent's td_prev as the roving
1013          * slot pointer; it's ugly, but it saves a word in every dirent.
1014          */
1015         if (tpdp->td_next == NULL && tpdp->td_offset > 2 * dir->tn_dirents)
1016                 dir->tn_dir->td_prev = dir->tn_dir->td_next;
1017         else
1018                 dir->tn_dir->td_prev = tdp;
1019 
1020         ASSERT(tpdp->td_next != tpdp);
1021         ASSERT(tpdp->td_prev != tpdp);
1022 
1023         tdp->td_next = tpdp->td_next;
1024         if (tdp->td_next) {
1025                 tdp->td_next->td_prev = tdp;
1026         }
1027         tdp->td_prev = tpdp;
1028         tpdp->td_next = tdp;
1029 
1030         ASSERT(tdp->td_next != tdp);
1031         ASSERT(tdp->td_prev != tdp);
1032         ASSERT(tpdp->td_next != tpdp);
1033         ASSERT(tpdp->td_prev != tpdp);
1034 
1035         gethrestime(&now);
1036         dir->tn_mtime = now;
1037         dir->tn_ctime = now;
1038 
1039         return (0);
1040 }
1041 
1042 static int
1043 tdirmaketnode(
1044         struct tmpnode *dir,
1045         struct tmount   *tm,
1046         struct vattr    *va,
1047         enum    de_op   op,
1048         struct tmpnode **newnode,
1049         struct cred     *cred)
1050 {
1051         struct tmpnode *tp;
1052         enum vtype      type;
1053 
1054         ASSERT(va != NULL);
1055         ASSERT(op == DE_CREATE || op == DE_MKDIR);
1056         if (((va->va_mask & AT_ATIME) && TIMESPEC_OVERFLOW(&va->va_atime)) ||
1057             ((va->va_mask & AT_MTIME) && TIMESPEC_OVERFLOW(&va->va_mtime)))
1058                 return (EOVERFLOW);
1059         type = va->va_type;
1060         tp = tmp_memalloc(sizeof (struct tmpnode), TMP_MUSTHAVE);
1061         tmpnode_init(tm, tp, va, cred);
1062 
1063         /* setup normal file/dir's extended attribute directory */
1064         if (dir->tn_flags & ISXATTR) {
1065                 /* parent dir is , mark file as xattr */
1066                 tp->tn_flags |= ISXATTR;
1067         }
1068 
1069 
1070         if (type == VBLK || type == VCHR) {
1071                 tp->tn_vnode->v_rdev = tp->tn_rdev = va->va_rdev;
1072         } else {
1073                 tp->tn_vnode->v_rdev = tp->tn_rdev = NODEV;
1074         }
1075         tp->tn_vnode->v_type = type;
1076         tp->tn_uid = crgetuid(cred);
1077 
1078         /*
1079          * To determine the group-id of the created file:
1080          *   1) If the gid is set in the attribute list (non-Sun & pre-4.0
1081          *      clients are not likely to set the gid), then use it if
1082          *      the process is privileged, belongs to the target group,
1083          *      or the group is the same as the parent directory.
1084          *   2) If the filesystem was not mounted with the Old-BSD-compatible
1085          *      GRPID option, and the directory's set-gid bit is clear,
1086          *      then use the process's gid.
1087          *   3) Otherwise, set the group-id to the gid of the parent directory.
1088          */
1089         if ((va->va_mask & AT_GID) &&
1090             ((va->va_gid == dir->tn_gid) || groupmember(va->va_gid, cred) ||
1091             secpolicy_vnode_create_gid(cred) == 0)) {
1092                 /*
1093                  * XXX - is this only the case when a 4.0 NFS client, or a
1094                  * client derived from that code, makes a call over the wire?
1095                  */
1096                 tp->tn_gid = va->va_gid;
1097         } else {
1098                 if (dir->tn_mode & VSGID)
1099                         tp->tn_gid = dir->tn_gid;
1100                 else
1101                         tp->tn_gid = crgetgid(cred);
1102         }
1103         /*
1104          * If we're creating a directory, and the parent directory has the
1105          * set-GID bit set, set it on the new directory.
1106          * Otherwise, if the user is neither privileged nor a member of the
1107          * file's new group, clear the file's set-GID bit.
1108          */
1109         if (dir->tn_mode & VSGID && type == VDIR)
1110                 tp->tn_mode |= VSGID;
1111         else {
1112                 if ((tp->tn_mode & VSGID) &&
1113                     secpolicy_vnode_setids_setgids(cred, tp->tn_gid) != 0)
1114                         tp->tn_mode &= ~VSGID;
1115         }
1116 
1117         if (va->va_mask & AT_ATIME)
1118                 tp->tn_atime = va->va_atime;
1119         if (va->va_mask & AT_MTIME)
1120                 tp->tn_mtime = va->va_mtime;
1121 
1122         if (op == DE_MKDIR)
1123                 tdirinit(dir, tp);
1124 
1125         *newnode = tp;
1126         return (0);
1127 }