1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
  23  * Use is subject to license terms.
  24  */
  25 #pragma ident   "%Z%%M% %I%     %E% SMI"
  26 
  27 #include <sys/param.h>
  28 #include <sys/types.h>
  29 #include <sys/systm.h>
  30 #include <sys/file.h>
  31 #include <sys/cred.h>
  32 #include <sys/proc.h>
  33 #include <sys/user.h>
  34 #include <sys/vfs.h>
  35 #include <sys/vnode.h>
  36 #include <sys/pathname.h>
  37 #include <sys/uio.h>
  38 #include <sys/tiuser.h>
  39 #include <sys/sysmacros.h>
  40 #include <sys/kmem.h>
  41 #include <sys/mount.h>
  42 #include <sys/ioctl.h>
  43 #include <sys/statvfs.h>
  44 #include <sys/errno.h>
  45 #include <sys/debug.h>
  46 #include <sys/cmn_err.h>
  47 #include <sys/utsname.h>
  48 #include <sys/modctl.h>
  49 #include <sys/stat.h>
  50 #include <sys/fcntl.h>
  51 #include <sys/fbuf.h>
  52 #include <rpc/types.h>
  53 
  54 #include <vm/hat.h>
  55 #include <vm/as.h>
  56 #include <vm/page.h>
  57 #include <vm/pvn.h>
  58 #include <vm/seg.h>
  59 #include <vm/seg_map.h>
  60 #include <vm/seg_vn.h>
  61 #include <vm/rm.h>
  62 #include <sys/fs/cachefs_fs.h>
  63 #include <sys/fs/cachefs_dlog.h>
  64 #include <sys/fs/cachefs_ioctl.h>
  65 
  66 /* external references */
  67 extern struct cachefsops nopcfsops, strictcfsops, codcfsops;
  68 
  69 /* forward references */
  70 int fscdir_create(cachefscache_t *cachep, char *namep, fscache_t *fscp);
  71 int fscdir_find(cachefscache_t *cachep, ino64_t fsid, fscache_t *fscp);
  72 static int fscache_info_sync(fscache_t *fscp);
  73 
  74 struct kmem_cache *cachefs_fscache_cache = NULL;
  75 
  76 /*
  77  * ------------------------------------------------------------------
  78  *
  79  *              fscache_create
  80  *
  81  * Description:
  82  *      Creates a fscache object.
  83  * Arguments:
  84  *      cachep          cache to create fscache object for
  85  * Returns:
  86  *      Returns a fscache object.
  87  * Preconditions:
  88  *      precond(cachep)
  89  */
  90 
  91 fscache_t *
  92 fscache_create(cachefscache_t *cachep)
  93 {
  94         fscache_t *fscp;
  95 
  96         /* create and initialize the fscache object */
  97         fscp = kmem_cache_alloc(cachefs_fscache_cache, KM_SLEEP);
  98 
  99         bzero(fscp, sizeof (*fscp));
 100 
 101         mutex_init(&fscp->fs_fslock, NULL, MUTEX_DEFAULT, NULL);
 102         mutex_init(&fscp->fs_idlelock, NULL, MUTEX_DEFAULT, NULL);
 103         mutex_init(&fscp->fs_dlock, NULL, MUTEX_DEFAULT, NULL);
 104         mutex_init(&fscp->fs_cdlock, NULL, MUTEX_DEFAULT, NULL);
 105         cv_init(&fscp->fs_cdwaitcv, NULL, CV_DEFAULT, NULL);
 106 
 107         fscp->fs_cache = cachep;
 108         fscp->fs_info.fi_mntflags = CFS_WRITE_AROUND;
 109         fscp->fs_info.fi_popsize = DEF_POP_SIZE;
 110         fscp->fs_info.fi_fgsize = DEF_FILEGRP_SIZE;
 111         fscp->fs_cfsops = &nopcfsops;
 112         fscp->fs_consttype = CFS_FS_CONST_NOCONST;
 113         fscp->fs_acregmin = 30;
 114         fscp->fs_acregmax = 30;
 115         fscp->fs_acdirmin = 30;
 116         fscp->fs_acdirmax = 30;
 117         fscp->fs_cdconnected = CFS_CD_CONNECTED;
 118         fscp->fs_mntpt = NULL;
 119         fscp->fs_hostname = NULL;
 120         fscp->fs_backfsname = NULL;
 121         cachefs_workq_init(&fscp->fs_workq);
 122         return (fscp);
 123 }
 124 
 125 /*
 126  * ------------------------------------------------------------------
 127  *
 128  *              fscache_destroy
 129  *
 130  * Description:
 131  *      Destroys the fscache object.
 132  * Arguments:
 133  *      fscp    the fscache object to destroy
 134  * Returns:
 135  * Preconditions:
 136  *      precond(fscp)
 137  *      precond(fs_ref == 0)
 138  */
 139 
 140 void
 141 fscache_destroy(fscache_t *fscp)
 142 {
 143         size_t strl;
 144 
 145         ASSERT(fscp->fs_ref == 0);
 146 
 147         (void) fscache_info_sync(fscp);
 148 
 149         if (fscp->fs_mntpt) {
 150                 strl = strlen(fscp->fs_mntpt);
 151                 if (strl != 0)
 152                         kmem_free(fscp->fs_mntpt, strl + 1);
 153         }
 154         if (fscp->fs_hostname) {
 155                 strl = strlen(fscp->fs_hostname);
 156                 if (strl != 0)
 157                         kmem_free(fscp->fs_hostname, strl + 1);
 158         }
 159         if (fscp->fs_backfsname) {
 160                 strl = strlen(fscp->fs_backfsname);
 161                 if (strl != 0)
 162                         kmem_free(fscp->fs_backfsname, strl + 1);
 163         }
 164 
 165         /* drop the inum translation table */
 166         if (fscp->fs_inum_size > 0)
 167                 cachefs_kmem_free(fscp->fs_inum_trans,
 168                     fscp->fs_inum_size * sizeof (cachefs_inum_trans_t));
 169 
 170         /* drop references to the fscache directory */
 171         if (fscp->fs_fscdirvp)
 172                 VN_RELE(fscp->fs_fscdirvp);
 173         if (fscp->fs_fsattrdir)
 174                 VN_RELE(fscp->fs_fsattrdir);
 175         if (fscp->fs_infovp)
 176                 VN_RELE(fscp->fs_infovp);
 177 
 178         /* drop logging references */
 179         cachefs_dlog_teardown(fscp);
 180 
 181         mutex_destroy(&fscp->fs_fslock);
 182         mutex_destroy(&fscp->fs_idlelock);
 183         mutex_destroy(&fscp->fs_dlock);
 184         mutex_destroy(&fscp->fs_cdlock);
 185         cv_destroy(&fscp->fs_cdwaitcv);
 186 
 187         kmem_cache_free(cachefs_fscache_cache, fscp);
 188 }
 189 
 190 /*
 191  * ------------------------------------------------------------------
 192  *
 193  *              fscache_setup
 194  *
 195  * Description:
 196  *      Activates a fscache by associating the fscache object
 197  *      with on disk data.
 198  *      If the fscache directory of the specified fsid exists then
 199  *      it will be used.
 200  *      Otherwise a new fscache directory will be created using namep
 201  *      and optp with fsid being ignored.  However if namep or optp
 202  *      are not NULL or the cache is in NOFILL then this routine fails.
 203  * Arguments:
 204  *      fscp    the fscache object to activate
 205  *      fsid    unique identifier for the cache
 206  *      namep   name of the cache
 207  *      optp    options for the cache
 208  * Returns:
 209  *      Returns 0 for success, !0 on failure.
 210  * Preconditions:
 211  *      precond(fscp)
 212  *      precond(the cache must not be in NOCACHE mode)
 213  *      precond(the cache must not alread by active)
 214  */
 215 
 216 static int
 217 fscache_setup(fscache_t *fscp, ino64_t fsid, char *namep,
 218     struct cachefsoptions *optp, ino64_t backfileno, int setflags)
 219 {
 220         int error;
 221         cachefscache_t *cachep = fscp->fs_cache;
 222 
 223         ASSERT((cachep->c_flags & CACHE_NOCACHE) == 0);
 224 
 225         /* see if the fscache directory already exists */
 226         error = fscdir_find(cachep, fsid, fscp);
 227         if (error) {
 228                 /* return error if cannot create the directory */
 229                 if ((namep == NULL) || (optp == NULL) ||
 230                     (cachep->c_flags & CACHE_NOFILL)) {
 231                         return (error);
 232                 }
 233                 if (backfileno == 0)
 234                         return (EAGAIN);
 235 
 236                 /* remember the root back fileno for disconnected mounts */
 237                 fscp->fs_info.fi_root = backfileno;
 238 
 239                 /* copy options into the fscache */
 240                 fscp->fs_info.fi_mntflags = optp->opt_flags;
 241                 fscp->fs_info.fi_popsize = optp->opt_popsize;
 242                 fscp->fs_info.fi_fgsize = optp->opt_fgsize;
 243                 fscp->fs_flags |= CFS_FS_DIRTYINFO;
 244 
 245                 /* create the directory */
 246                 error = fscdir_create(cachep, namep, fscp);
 247                 if (error) {
 248                         if (error == ENOSPC)
 249                                 cmn_err(CE_WARN,
 250                                     "CacheFS: not enough space to create %s",
 251                                     namep);
 252                         else
 253                                 cmn_err(CE_WARN,
 254                                     "CacheFS: error %d creating %s",
 255                                     error, namep);
 256                         return (error);
 257                 }
 258         } else if (optp) {
 259                 /* compare the options to make sure they are compatible */
 260                 error = fscache_compare_options(fscp, optp);
 261                 if (error) {
 262                         cmn_err(CE_WARN,
 263                                 "CacheFS: mount failed, options do not match.");
 264                         return (error);
 265                 }
 266 
 267                 /* copy options into the fscache */
 268                 fscp->fs_info.fi_mntflags = optp->opt_flags;
 269                 fscp->fs_info.fi_popsize = optp->opt_popsize;
 270                 fscp->fs_info.fi_fgsize = optp->opt_fgsize;
 271                 fscp->fs_flags |= CFS_FS_DIRTYINFO;
 272 
 273                 /*
 274                  * The fileid of the root of the filesystem can change
 275                  * in NFSv4, so make sure we update the fi_root
 276                  * with the new filenumber.
 277                  */
 278                 if (CFS_ISFS_BACKFS_NFSV4(fscp) &&
 279                     fscp->fs_info.fi_root != backfileno) {
 280                         fscp->fs_info.fi_root = backfileno;
 281                 }
 282         }
 283 
 284         if (setflags) {
 285                 mutex_enter(&fscp->fs_fslock);
 286                 fscp->fs_flags |= CFS_FS_READ;
 287                 if ((cachep->c_flags & CACHE_NOFILL) == 0)
 288                         fscp->fs_flags |= CFS_FS_WRITE;
 289                 mutex_exit(&fscp->fs_fslock);
 290         }
 291 
 292         return (0);
 293 }
 294 
 295 /*
 296  * ------------------------------------------------------------------
 297  *
 298  *              fscache_activate
 299  *
 300  * Description:
 301  *      A wrapper routine for fscache_setup, telling it to setup the
 302  *      fscache for general use.
 303  *
 304  */
 305 int
 306 fscache_activate(fscache_t *fscp, ino64_t fsid, char *namep,
 307     struct cachefsoptions *optp, ino64_t backfileno)
 308 {
 309         return (fscache_setup(fscp, fsid, namep, optp, backfileno, 1));
 310 }
 311 
 312 /*
 313  * ------------------------------------------------------------------
 314  *
 315  *              fscache_enable
 316  *
 317  * Description:
 318  *      A wrapper routine for fscache_setup, telling it to create a
 319  *      fscache that can be used during remount.  In this case the
 320  *      fscache flags that allow general use are not yet turned on.
 321  *      A later call to fscache_activate_rw will set the flags.
 322  *
 323  */
 324 int
 325 fscache_enable(fscache_t *fscp, ino64_t fsid, char *namep,
 326     struct cachefsoptions *optp, ino64_t backfileno)
 327 {
 328         return (fscache_setup(fscp, fsid, namep, optp, backfileno, 0));
 329 }
 330 
 331 /*
 332  * ------------------------------------------------------------------
 333  *
 334  *              fscache_activate_rw
 335  *
 336  * Description:
 337  *      Makes the fscache both readable and writable.
 338  * Arguments:
 339  *      fscp            fscache object
 340  * Returns:
 341  * Preconditions:
 342  *      precond(fscp)
 343  */
 344 
 345 void
 346 fscache_activate_rw(fscache_t *fscp)
 347 {
 348         mutex_enter(&fscp->fs_fslock);
 349         fscp->fs_flags |= (CFS_FS_WRITE|CFS_FS_READ);
 350         mutex_exit(&fscp->fs_fslock);
 351 }
 352 
 353 /*
 354  * ------------------------------------------------------------------
 355  *
 356  *              fscache_hold
 357  *
 358  * Description:
 359  *      Increments the reference count on the fscache object
 360  * Arguments:
 361  *      fscp            fscache object to incriment reference count on
 362  * Returns:
 363  * Preconditions:
 364  *      precond(fscp)
 365  */
 366 
 367 void
 368 fscache_hold(fscache_t *fscp)
 369 {
 370         mutex_enter(&fscp->fs_fslock);
 371         fscp->fs_ref++;
 372         ASSERT(fscp->fs_ref > 0);
 373         mutex_exit(&fscp->fs_fslock);
 374 }
 375 
 376 /*
 377  * ------------------------------------------------------------------
 378  *
 379  *              fscache_rele
 380  *
 381  * Description:
 382  *      Decriments the reference count on the fscache object
 383  * Arguments:
 384  *      fscp            fscache object to decriment reference count on
 385  * Returns:
 386  * Preconditions:
 387  *      precond(fscp)
 388  */
 389 
 390 void
 391 fscache_rele(fscache_t *fscp)
 392 {
 393         mutex_enter(&fscp->fs_fslock);
 394         ASSERT(fscp->fs_ref > 0);
 395         fscp->fs_ref--;
 396         mutex_exit(&fscp->fs_fslock);
 397 }
 398 
 399 /*
 400  * ------------------------------------------------------------------
 401  *
 402  *              fscache_cnodecnt
 403  *
 404  * Description:
 405  *      Changes the count of number of cnodes on this fscache
 406  *      by the specified amount.
 407  * Arguments:
 408  *      fscp            fscache object to to modify count on
 409  *      cnt             amount to adjust by
 410  * Returns:
 411  *      Returns new count of number of cnodes.
 412  * Preconditions:
 413  *      precond(fscp)
 414  */
 415 
 416 int
 417 fscache_cnodecnt(fscache_t *fscp, int cnt)
 418 {
 419         int xx;
 420 
 421         mutex_enter(&fscp->fs_fslock);
 422         fscp->fs_cnodecnt += cnt;
 423         ASSERT(fscp->fs_cnodecnt >= 0);
 424         xx = fscp->fs_cnodecnt;
 425         mutex_exit(&fscp->fs_fslock);
 426         return (xx);
 427 }
 428 
 429 /*
 430  * ------------------------------------------------------------------
 431  *
 432  *              fscache_mounted
 433  *
 434  * Description:
 435  *      Called to indicate the the fscache is mounted.
 436  * Arguments:
 437  *      fscp            fscache object
 438  *      cfsvfsp         cachefs vfsp
 439  *      backvfsp        vfsp of back file system
 440  * Returns:
 441  *      Returns 0 for success, -1 if the cache is already mounted.
 442  * Preconditions:
 443  *      precond(fscp)
 444  */
 445 
 446 int
 447 fscache_mounted(fscache_t *fscp, struct vfs *cfsvfsp, struct vfs *backvfsp)
 448 {
 449         int error = 0;
 450 
 451         mutex_enter(&fscp->fs_fslock);
 452         if (fscp->fs_flags & CFS_FS_MOUNTED) {
 453                 error = -1;
 454                 goto out;
 455         }
 456 
 457         fscp->fs_backvfsp = backvfsp;
 458         fscp->fs_cfsvfsp = cfsvfsp;
 459         gethrestime(&fscp->fs_cod_time);
 460         fscp->fs_flags |= CFS_FS_MOUNTED;
 461 
 462         if (CFS_ISFS_SNR(fscp)) {
 463                 /*
 464                  * If there is a dlog file present, then we assume the cache
 465                  * was left in disconnected mode.
 466                  * Also if the back file system was not mounted we also
 467                  * start off in disconnected mode.
 468                  */
 469                 error = cachefs_dlog_setup(fscp, 0);
 470                 if (!error || (backvfsp == NULL)) {
 471                         mutex_enter(&fscp->fs_cdlock);
 472                         fscp->fs_cdconnected = CFS_CD_DISCONNECTED;
 473                         fscp->fs_cdtransition = 0;
 474                         cv_broadcast(&fscp->fs_cdwaitcv);
 475                         mutex_exit(&fscp->fs_cdlock);
 476                 }
 477 
 478                 /* invalidate any local fileno mappings */
 479                 fscp->fs_info.fi_resetfileno++;
 480                 fscp->fs_flags |= CFS_FS_DIRTYINFO;
 481 
 482                 /* if connected, invalidate any local time mappings */
 483                 if (backvfsp)
 484                         fscp->fs_info.fi_resettimes++;
 485         }
 486 
 487                 error = 0;
 488 
 489         /* set up the consistency mode */
 490         if (fscp->fs_info.fi_mntflags & CFS_NOCONST_MODE) {
 491                 fscp->fs_cfsops = &nopcfsops;
 492                 fscp->fs_consttype = CFS_FS_CONST_NOCONST;
 493         } else if (fscp->fs_info.fi_mntflags & CFS_CODCONST_MODE) {
 494                 fscp->fs_cfsops = &codcfsops;
 495                 fscp->fs_consttype = CFS_FS_CONST_CODCONST;
 496         } else {
 497                 fscp->fs_cfsops = &strictcfsops;
 498                 fscp->fs_consttype = CFS_FS_CONST_STRICT;
 499         }
 500 
 501 out:
 502         mutex_exit(&fscp->fs_fslock);
 503         (void) fscache_info_sync(fscp);
 504         return (error);
 505 }
 506 
 507 /*
 508  * Compares fscache state with new mount options
 509  * to make sure compatible.
 510  * Returns ESRCH if not compatible or 0 for success.
 511  */
 512 int
 513 fscache_compare_options(fscache_t *fscp, struct cachefsoptions *optp)
 514 {
 515         if ((fscp->fs_info.fi_popsize == optp->opt_popsize) &&
 516             (fscp->fs_info.fi_fgsize == optp->opt_fgsize)) {
 517                 return (0);
 518         } else {
 519                 return (ESRCH);
 520         }
 521 }
 522 
 523 /*
 524  * ------------------------------------------------------------------
 525  *
 526  *              fscache_sync
 527  *
 528  * Description:
 529  *      Syncs any data for this fscache to the front file system.
 530  * Arguments:
 531  *      fscp    fscache to sync
 532  * Returns:
 533  * Preconditions:
 534  *      precond(fscp)
 535  */
 536 
 537 void
 538 fscache_sync(struct fscache *fscp)
 539 {
 540         struct filegrp *fgp;
 541         int xx;
 542 
 543         (void) fscache_info_sync(fscp);
 544 
 545         /* sync the cnodes */
 546         cachefs_cnode_traverse(fscp, cachefs_cnode_sync);
 547 
 548         mutex_enter(&fscp->fs_fslock);
 549 
 550         /* sync the attrcache files */
 551         for (xx = 0; xx < CFS_FS_FGP_BUCKET_SIZE; xx++) {
 552                 for (fgp = fscp->fs_filegrp[xx]; fgp != NULL;
 553                         fgp = fgp->fg_next) {
 554                         (void) filegrp_sync(fgp);
 555                 }
 556         }
 557 
 558         /* garbage collect any unused file groups */
 559         filegrp_list_gc(fscp);
 560 
 561         mutex_exit(&fscp->fs_fslock);
 562 }
 563 
 564 /*
 565  * ------------------------------------------------------------------
 566  *
 567  *              fscache_acset
 568  *
 569  * Description:
 570  *      Sets the ac timeout values for the fscache.
 571  * Arguments:
 572  *      fscp    fscache object
 573  * Returns:
 574  * Preconditions:
 575  *      precond(fscp)
 576  */
 577 
 578 void
 579 fscache_acset(fscache_t *fscp,
 580         uint_t acregmin, uint_t acregmax, uint_t acdirmin, uint_t acdirmax)
 581 {
 582         mutex_enter(&fscp->fs_fslock);
 583         if (acregmin > acregmax)
 584                 acregmin = acregmax;
 585         if (acdirmin > acdirmax)
 586                 acdirmin = acdirmax;
 587         if (acregmin != 0)
 588                 fscp->fs_acregmin = acregmin;
 589         if (acregmax != 0)
 590                 fscp->fs_acregmax = acregmax;
 591         if (acdirmin != 0)
 592                 fscp->fs_acdirmin = acdirmin;
 593         if (acdirmax != 0)
 594                 fscp->fs_acdirmax = acdirmax;
 595         mutex_exit(&fscp->fs_fslock);
 596 }
 597 
 598 /*
 599  * ------------------------------------------------------------------
 600  *
 601  *              fscache_list_find
 602  *
 603  * Description:
 604  *      Finds the desired fscache structure on a cache's
 605  *      file system list.
 606  * Arguments:
 607  *      cachep  holds the list of fscache objects to search
 608  *      fsid    the numeric identifier of the fscache
 609  * Returns:
 610  *      Returns an fscache object on success or NULL on failure.
 611  * Preconditions:
 612  *      precond(cachep)
 613  *      precond(the fslistlock must be held)
 614  */
 615 
 616 fscache_t *
 617 fscache_list_find(cachefscache_t *cachep, ino64_t fsid)
 618 {
 619         fscache_t *fscp = cachep->c_fslist;
 620 
 621         ASSERT(MUTEX_HELD(&cachep->c_fslistlock));
 622 
 623         while (fscp != NULL) {
 624                 if (fscp->fs_cfsid == fsid) {
 625                         ASSERT(fscp->fs_cache == cachep);
 626                         break;
 627                 }
 628                 fscp = fscp->fs_next;
 629         }
 630 
 631         return (fscp);
 632 }
 633 
 634 /*
 635  * ------------------------------------------------------------------
 636  *
 637  *              fscache_list_add
 638  *
 639  * Description:
 640  *      Adds the specified fscache object to the list on
 641  *      the specified cachep.
 642  * Arguments:
 643  *      cachep  holds the list of fscache objects
 644  *      fscp    fscache object to add to list
 645  * Returns:
 646  * Preconditions:
 647  *      precond(cachep)
 648  *      precond(fscp)
 649  *      precond(fscp cannot already be on a list)
 650  *      precond(the fslistlock must be held)
 651  */
 652 
 653 void
 654 fscache_list_add(cachefscache_t *cachep, fscache_t *fscp)
 655 {
 656         ASSERT(MUTEX_HELD(&cachep->c_fslistlock));
 657 
 658         fscp->fs_next = cachep->c_fslist;
 659         cachep->c_fslist = fscp;
 660         cachep->c_refcnt++;
 661 }
 662 
 663 /*
 664  * ------------------------------------------------------------------
 665  *
 666  *              fscache_list_remove
 667  *
 668  * Description:
 669  *      Removes the specified fscache object from the list
 670  *      on the specified cachep.
 671  * Arguments:
 672  *      cachep  holds the list of fscache objects
 673  *      fscp    fscache object to remove from list
 674  * Returns:
 675  * Preconditions:
 676  *      precond(cachep)
 677  *      precond(fscp)
 678  *      precond(the fslistlock must be held)
 679  */
 680 
 681 void
 682 fscache_list_remove(cachefscache_t *cachep, fscache_t *fscp)
 683 {
 684         struct fscache **pfscp = &cachep->c_fslist;
 685 
 686         ASSERT(MUTEX_HELD(&cachep->c_fslistlock));
 687 
 688         while (*pfscp != NULL) {
 689                 if (fscp == *pfscp) {
 690                         *pfscp = fscp->fs_next;
 691                         cachep->c_refcnt--;
 692                         break;
 693                 }
 694                 pfscp = &(*pfscp)->fs_next;
 695         }
 696 }
 697 
 698 /*
 699  * ------------------------------------------------------------------
 700  *
 701  *              fscache_list_gc
 702  *
 703  * Description:
 704  *      Traverses the list of fscache objects on the cachep
 705  *      list and destroys any that are not mounted and
 706  *      that are not referenced.
 707  * Arguments:
 708  *      cachep  holds the list of fscache objects
 709  * Returns:
 710  * Preconditions:
 711  *      precond(cachep)
 712  *      precond(the fslistlock must be held)
 713  */
 714 
 715 void
 716 fscache_list_gc(cachefscache_t *cachep)
 717 {
 718         struct fscache *next, *fscp;
 719 
 720         ASSERT(MUTEX_HELD(&cachep->c_fslistlock));
 721 
 722         for (fscp = cachep->c_fslist; fscp != NULL; fscp = next) {
 723                 next = fscp->fs_next;
 724                 mutex_enter(&fscp->fs_fslock);
 725                 if (((fscp->fs_flags & CFS_FS_MOUNTED) == 0) &&
 726                     (fscp->fs_ref == 0)) {
 727                         mutex_exit(&fscp->fs_fslock);
 728                         fscache_list_remove(cachep, fscp);
 729                         fscache_destroy(fscp);
 730                 } else {
 731                         mutex_exit(&fscp->fs_fslock);
 732                 }
 733         }
 734 }
 735 
 736 /*
 737  * ------------------------------------------------------------------
 738  *
 739  *              fscache_list_mounted
 740  *
 741  * Description:
 742  *      Returns the number of fscache objects that are mounted.
 743  * Arguments:
 744  *      cachep  holds the list of fscache objects
 745  * Returns:
 746  * Preconditions:
 747  *      precond(cachep)
 748  *      precond(the fslistlock must be held)
 749  */
 750 
 751 int
 752 fscache_list_mounted(cachefscache_t *cachep)
 753 {
 754         struct fscache *fscp;
 755         int count;
 756 
 757         ASSERT(MUTEX_HELD(&cachep->c_fslistlock));
 758 
 759         count = 0;
 760         for (fscp = cachep->c_fslist; fscp != NULL; fscp = fscp->fs_next) {
 761                 mutex_enter(&fscp->fs_fslock);
 762                 if (fscp->fs_flags & CFS_FS_MOUNTED)
 763                         count++;
 764                 mutex_exit(&fscp->fs_fslock);
 765         }
 766 
 767         return (count);
 768 }
 769 
 770 /*
 771  * Creates the fs cache directory.
 772  * The directory name is the ascii version of the fsid.
 773  * Also makes a symlink to the directory using the specified name.
 774  */
 775 int
 776 fscdir_create(cachefscache_t *cachep, char *namep, fscache_t *fscp)
 777 {
 778         int error;
 779         vnode_t *fscdirvp = NULL;
 780         vnode_t *infovp = NULL;
 781         vnode_t *attrvp = NULL;
 782         struct vattr *attrp = (struct vattr *)NULL;
 783         char name[CFS_FRONTFILE_NAME_SIZE];
 784         int files;
 785         int blocks = 0;
 786         cfs_cid_t cid;
 787         ino64_t fsid;
 788 
 789         ASSERT(MUTEX_HELD(&cachep->c_fslistlock));
 790         ASSERT(fscp->fs_infovp == NULL);
 791         ASSERT(fscp->fs_fscdirvp == NULL);
 792         ASSERT(fscp->fs_fsattrdir == NULL);
 793 
 794         /* directory, symlink and options file + attrcache dir */
 795         files = 0;
 796         while (files < 4) {
 797                 error = cachefs_allocfile(cachep);
 798                 if (error)
 799                         goto out;
 800                 files++;
 801         }
 802         error = cachefs_allocblocks(cachep, 4, CACHEFS_RL_NONE);
 803         if (error)
 804                 goto out;
 805         blocks = 4;
 806 
 807         attrp = cachefs_kmem_alloc(sizeof (struct vattr), KM_SLEEP);
 808         attrp->va_mode = S_IFDIR | 0777;
 809         attrp->va_uid = 0;
 810         attrp->va_gid = 0;
 811         attrp->va_type = VDIR;
 812         attrp->va_mask = AT_TYPE | AT_MODE | AT_UID | AT_GID;
 813         error = VOP_MKDIR(cachep->c_dirvp, namep, attrp, &fscdirvp, kcred,
 814             NULL, 0, NULL);
 815         if (error) {
 816                 cmn_err(CE_WARN, "Can't create fs cache directory");
 817                 goto out;
 818         }
 819 
 820         /*
 821          * Created the directory. Get the fileno. That'll be the cachefs_fsid.
 822          */
 823         attrp->va_mask = AT_NODEID;
 824         error = VOP_GETATTR(fscdirvp, attrp, 0, kcred, NULL);
 825         if (error) {
 826                 goto out;
 827         }
 828         fsid = attrp->va_nodeid;
 829         attrp->va_mode = S_IFREG | 0666;
 830         attrp->va_uid = 0;
 831         attrp->va_gid = 0;
 832         attrp->va_type = VREG;
 833         attrp->va_mask = AT_TYPE | AT_MODE | AT_UID | AT_GID;
 834         error = VOP_CREATE(fscdirvp, CACHEFS_FSINFO, attrp, EXCL,
 835                         0600, &infovp, kcred, 0, NULL, NULL);
 836         if (error) {
 837                 cmn_err(CE_WARN, "Can't create fs option file");
 838                 goto out;
 839         }
 840         attrp->va_size = MAXBSIZE;
 841         attrp->va_mask = AT_SIZE;
 842         error = VOP_SETATTR(infovp, attrp, 0, kcred, NULL);
 843         if (error) {
 844                 cmn_err(CE_WARN, "Can't set size of fsinfo file");
 845                 goto out;
 846         }
 847 
 848         /* write out the info file */
 849         fscp->fs_flags |= CFS_FS_DIRTYINFO;
 850         error = fscache_info_sync(fscp);
 851         if (error)
 852                 goto out;
 853 
 854         /*
 855          * Install the symlink from cachefs_fsid -> directory.
 856          */
 857         cid.cid_flags = 0;
 858         cid.cid_fileno = fsid;
 859         make_ascii_name(&cid, name);
 860         error = VOP_RENAME(cachep->c_dirvp, namep, cachep->c_dirvp,
 861                 name, kcred, NULL, 0);
 862         if (error) {
 863                 cmn_err(CE_WARN, "Can't rename cache directory");
 864                 goto out;
 865         }
 866         attrp->va_mask = AT_MODE | AT_TYPE;
 867         attrp->va_mode = 0777;
 868         attrp->va_type = VLNK;
 869         error = VOP_SYMLINK(cachep->c_dirvp, namep, attrp, name, kcred, NULL,
 870             0);
 871         if (error) {
 872                 cmn_err(CE_WARN, "Can't create cache directory symlink");
 873                 goto out;
 874         }
 875 
 876         /*
 877          * Finally, make the attrcache directory
 878          */
 879         attrp->va_mode = S_IFDIR | 0777;
 880         attrp->va_uid = 0;
 881         attrp->va_gid = 0;
 882         attrp->va_type = VDIR;
 883         attrp->va_mask = AT_TYPE | AT_MODE | AT_UID | AT_GID;
 884         error = VOP_MKDIR(fscdirvp, ATTRCACHE_NAME, attrp, &attrvp, kcred, NULL,
 885             0, NULL);
 886         if (error) {
 887                 cmn_err(CE_WARN, "Can't create attrcache dir for fscache");
 888                 goto out;
 889         }
 890 
 891         mutex_enter(&fscp->fs_fslock);
 892         fscp->fs_cfsid = fsid;
 893         fscp->fs_fscdirvp = fscdirvp;
 894         fscp->fs_fsattrdir = attrvp;
 895         fscp->fs_infovp = infovp;
 896         mutex_exit(&fscp->fs_fslock);
 897 
 898 out:
 899 
 900         if (error) {
 901                 while (files-- > 0)
 902                         cachefs_freefile(cachep);
 903                 if (fscdirvp)
 904                         VN_RELE(fscdirvp);
 905                 if (blocks)
 906                         cachefs_freeblocks(cachep, blocks, CACHEFS_RL_NONE);
 907                 if (attrvp)
 908                         VN_RELE(attrvp);
 909                 if (infovp)
 910                         VN_RELE(infovp);
 911         }
 912         if (attrp)
 913                 cachefs_kmem_free(attrp, sizeof (struct vattr));
 914         return (error);
 915 }
 916 
 917 /*
 918  * Tries to find the fscache directory indicated by fsid.
 919  */
 920 int
 921 fscdir_find(cachefscache_t *cachep, ino64_t fsid, fscache_t *fscp)
 922 {
 923         int error;
 924         vnode_t *infovp = NULL;
 925         vnode_t *fscdirvp = NULL;
 926         vnode_t *attrvp = NULL;
 927         char dirname[CFS_FRONTFILE_NAME_SIZE];
 928         cfs_cid_t cid;
 929         cachefs_fsinfo_t fsinfo;
 930         caddr_t addr;
 931 
 932         ASSERT(MUTEX_HELD(&cachep->c_fslistlock));
 933         ASSERT(fscp->fs_infovp == NULL);
 934         ASSERT(fscp->fs_fscdirvp == NULL);
 935         ASSERT(fscp->fs_fsattrdir == NULL);
 936 
 937         /* convert the fsid value to the name of the directory */
 938         cid.cid_flags = 0;
 939         cid.cid_fileno = fsid;
 940         make_ascii_name(&cid, dirname);
 941 
 942         /* try to find the directory */
 943         error = VOP_LOOKUP(cachep->c_dirvp, dirname, &fscdirvp, NULL,
 944                         0, NULL, kcred, NULL, NULL, NULL);
 945         if (error)
 946                 goto out;
 947 
 948         /* this better be a directory or we are hosed */
 949         if (fscdirvp->v_type != VDIR) {
 950                 cmn_err(CE_WARN, "cachefs: fscdir_find_a: cache corruption"
 951                         " run fsck, %s", dirname);
 952                 error = ENOTDIR;
 953                 goto out;
 954         }
 955 
 956         /* try to find the info file */
 957         error = VOP_LOOKUP(fscdirvp, CACHEFS_FSINFO, &infovp,
 958             NULL, 0, NULL, kcred, NULL, NULL, NULL);
 959         if (error) {
 960                 cmn_err(CE_WARN, "cachefs: fscdir_find_b: cache corruption"
 961                         " run fsck, %s", dirname);
 962                 goto out;
 963         }
 964 
 965         /* read in info struct */
 966         addr = segmap_getmapflt(segkmap, infovp, (offset_t)0,
 967                                 MAXBSIZE, 1, S_READ);
 968 
 969         /*LINTED alignment okay*/
 970         fsinfo = *(cachefs_fsinfo_t *)addr;
 971         error =  segmap_release(segkmap, addr, 0);
 972         if (error) {
 973                 cmn_err(CE_WARN, "cachefs: fscdir_find_c: cache corruption"
 974                         " run fsck, %s", dirname);
 975                 goto out;
 976         }
 977 
 978         /* try to find the attrcache directory */
 979         error = VOP_LOOKUP(fscdirvp, ATTRCACHE_NAME,
 980             &attrvp, NULL, 0, NULL, kcred, NULL, NULL, NULL);
 981         if (error) {
 982                 cmn_err(CE_WARN, "cachefs: fscdir_find_d: cache corruption"
 983                         " run fsck, %s", dirname);
 984                 goto out;
 985         }
 986 
 987         mutex_enter(&fscp->fs_fslock);
 988         fscp->fs_info = fsinfo;
 989         fscp->fs_cfsid = fsid;
 990         fscp->fs_fscdirvp = fscdirvp;
 991         fscp->fs_fsattrdir = attrvp;
 992         fscp->fs_infovp = infovp;
 993         mutex_exit(&fscp->fs_fslock);
 994 
 995 out:
 996         if (error) {
 997                 if (infovp)
 998                         VN_RELE(infovp);
 999                 if (fscdirvp)
1000                         VN_RELE(fscdirvp);
1001         }
1002         return (error);
1003 }
1004 
1005 /*
1006  * fscache_info_sync
1007  * Writes out the fs_info data if necessary.
1008  */
1009 static int
1010 fscache_info_sync(fscache_t *fscp)
1011 {
1012         caddr_t addr;
1013         int error = 0;
1014 
1015         mutex_enter(&fscp->fs_fslock);
1016 
1017         if (fscp->fs_cache->c_flags & CACHE_NOFILL) {
1018                 error = EROFS;
1019                 goto out;
1020         }
1021 
1022         /* if the data is dirty and we have the file vnode */
1023         if ((fscp->fs_flags & CFS_FS_DIRTYINFO) && fscp->fs_infovp) {
1024                 addr = segmap_getmapflt(segkmap, fscp->fs_infovp, 0,
1025                                         MAXBSIZE, 1, S_WRITE);
1026 
1027                 /*LINTED alignment okay*/
1028                 *(cachefs_fsinfo_t *)addr = fscp->fs_info;
1029                 error = segmap_release(segkmap, addr, SM_WRITE);
1030 
1031                 if (error) {
1032                         cmn_err(CE_WARN,
1033                             "cachefs: Can not write to info file.");
1034                 } else {
1035                         fscp->fs_flags &= ~CFS_FS_DIRTYINFO;
1036                 }
1037         }
1038 
1039 out:
1040 
1041         mutex_exit(&fscp->fs_fslock);
1042 
1043         return (error);
1044 }
1045 
1046 /*
1047  * ------------------------------------------------------------------
1048  *
1049  *              fscache_name_to_fsid
1050  *
1051  * Description:
1052  *      Takes the name of a cache and determines it corresponding
1053  *      fsid.
1054  * Arguments:
1055  *      cachep  cache object to find name of fs cache in
1056  *      namep   the name of the fs cache
1057  *      fsidp   set to the fsid if found
1058  * Returns:
1059  *      Returns 0 on success, !0 on error.
1060  * Preconditions:
1061  *      precond(cachep)
1062  *      precond(namep)
1063  *      precond(fsidp)
1064  */
1065 
1066 int
1067 fscache_name_to_fsid(cachefscache_t *cachep, char *namep, ino64_t *fsidp)
1068 {
1069         int error;
1070         char dirname[CFS_FRONTFILE_NAME_SIZE];
1071         vnode_t *linkvp = NULL;
1072         struct uio uio;
1073         struct iovec iov;
1074         ino64_t nodeid;
1075         char *pd;
1076         int xx;
1077         int c;
1078 
1079         /* get the vnode of the name */
1080         error = VOP_LOOKUP(cachep->c_dirvp, namep, &linkvp, NULL, 0, NULL,
1081                 kcred, NULL, NULL, NULL);
1082         if (error)
1083                 goto out;
1084 
1085         /* the vnode had better be a link */
1086         if (linkvp->v_type != VLNK) {
1087                 error = EINVAL;
1088                 goto out;
1089         }
1090 
1091         /* read the contents of the link */
1092         iov.iov_len = CFS_FRONTFILE_NAME_SIZE;
1093         iov.iov_base = dirname;
1094         uio.uio_iov = &iov;
1095         uio.uio_iovcnt = 1;
1096         uio.uio_resid = iov.iov_len;
1097         uio.uio_segflg = UIO_SYSSPACE;
1098         uio.uio_loffset = 0;
1099         uio.uio_fmode = 0;
1100         uio.uio_extflg = UIO_COPY_CACHED;
1101         error = VOP_READLINK(linkvp, &uio, kcred, NULL);
1102         if (error) {
1103                 cmn_err(CE_WARN, "cachefs: Can't read filesystem cache link");
1104                 goto out;
1105         }
1106 
1107         /* convert the contents of the link to a ino64_t */
1108         nodeid = 0;
1109         pd = dirname;
1110         for (xx = 0; xx < (CFS_FRONTFILE_NAME_SIZE - 2); xx++) {
1111                 nodeid <<= 4;
1112                 c = *pd++;
1113                 if (c <= '9')
1114                         c -= '0';
1115                 else if (c <= 'F')
1116                         c = c - 'A' + 10;
1117                 else
1118                         c = c - 'a' + 10;
1119                 nodeid += c;
1120         }
1121         *fsidp = nodeid;
1122 out:
1123         if (linkvp)
1124                 VN_RELE(linkvp);
1125 
1126         return (error);
1127 }
1128 
1129 
1130 /*
1131  * Suspends the thread until access to the cache is granted.
1132  * If !SOFT then
1133  *      waitconnected == 1 means wait until connected
1134  *      waitconnected == 0 means wait until connected or disconnected
1135  * else then
1136  *      wait until connected or disconnected
1137  * writing is set to 1 if writing, 0 if reading
1138  * Returns 0, EINTR, or ETIMEDOUT.
1139  */
1140 int
1141 cachefs_cd_access(fscache_t *fscp, int waitconnected, int writing)
1142 {
1143         int nosig;
1144         int error = 0;
1145         cachefscache_t *cachep;
1146         int waithappens = 0;
1147         pid_t pid;
1148 
1149         mutex_enter(&fscp->fs_cdlock);
1150 
1151 #ifdef CFS_CD_DEBUG
1152         ASSERT((curthread->t_flag & T_CD_HELD) == 0);
1153 #endif
1154 
1155         for (;;) {
1156                 /* if we have to wait */
1157                 if (waithappens ||
1158                     (waitconnected &&
1159                     (fscp->fs_cdconnected != CFS_CD_CONNECTED))) {
1160 
1161                         /* do not make soft mounts wait until connected */
1162                         if ((waithappens == 0) && CFS_ISFS_SOFT(fscp)) {
1163                                 error = ETIMEDOUT;
1164                                 break;
1165                         }
1166 
1167                         /* wait for a wakeup or a signal */
1168                         nosig = cv_wait_sig(&fscp->fs_cdwaitcv,
1169                             &fscp->fs_cdlock);
1170 
1171                         /* if we got a signal */
1172                         if (nosig == 0) {
1173                                 error = EINTR;
1174                                 break;
1175                         }
1176 
1177                         if (waitconnected &&
1178                             (fscp->fs_cdconnected == CFS_CD_CONNECTED))
1179                                 waitconnected = 0;
1180 
1181                         /* try again to get access */
1182                         waithappens = 0;
1183                         continue;
1184                 }
1185 
1186                 /* if transitioning modes */
1187                 if (fscp->fs_cdtransition) {
1188                         waithappens = 1;
1189                         continue;
1190                 }
1191 
1192                 /* if rolling the log */
1193                 if (fscp->fs_cdconnected == CFS_CD_RECONNECTING) {
1194                         pid = ttoproc(curthread)->p_pid;
1195                         cachep = fscp->fs_cache;
1196 
1197                         /* if writing or not the cachefsd */
1198                         if (writing ||
1199                             ((fscp->fs_cddaemonid != pid) &&
1200                             (cachep->c_rootdaemonid != pid))) {
1201                                 waithappens = 1;
1202                                 continue;
1203                         }
1204                 }
1205 
1206                 /* if the daemon is not running */
1207                 if (fscp->fs_cddaemonid == 0) {
1208                         /* if writing and not connected */
1209                         if (writing &&
1210                             (fscp->fs_cdconnected != CFS_CD_CONNECTED)) {
1211                                 waithappens = 1;
1212                                 continue;
1213                         }
1214                 }
1215 
1216                 /*
1217                  * Verify don't set wait for NFSv4 (doesn't support
1218                  * disconnected behavior).
1219                  */
1220                 ASSERT(!CFS_ISFS_BACKFS_NFSV4(fscp) ||
1221                                 (waithappens == 0 && waitconnected == 0));
1222 
1223                 ASSERT(fscp->fs_cdrefcnt >= 0);
1224                 fscp->fs_cdrefcnt++;
1225 #ifdef CFS_CD_DEBUG
1226                 curthread->t_flag |= T_CD_HELD;
1227 #endif
1228                 break;
1229         }
1230         mutex_exit(&fscp->fs_cdlock);
1231 
1232         return (error);
1233 }
1234 
1235 /*
1236  * Call to check if can have access after a cache miss has occurred.
1237  * Only read access is allowed, do not call this routine if want
1238  * to write.
1239  * Returns 1 if yes, 0 if no.
1240  */
1241 int
1242 cachefs_cd_access_miss(fscache_t *fscp)
1243 {
1244         cachefscache_t *cachep;
1245         pid_t pid;
1246 
1247 #ifdef CFS_CD_DEBUG
1248         ASSERT(curthread->t_flag & T_CD_HELD);
1249 #endif
1250 
1251         /* should not get called if connected */
1252         ASSERT(fscp->fs_cdconnected != CFS_CD_CONNECTED);
1253 
1254         /* if no back file system, then no */
1255         if (fscp->fs_backvfsp == NULL)
1256                 return (0);
1257 
1258         /* if daemon is not running, then yes */
1259         if (fscp->fs_cddaemonid == 0) {
1260                 return (1);
1261         }
1262 
1263         pid = ttoproc(curthread)->p_pid;
1264         cachep = fscp->fs_cache;
1265 
1266         /* if daemon is running, only daemon is allowed to have access */
1267         if ((fscp->fs_cddaemonid != pid) &&
1268             (cachep->c_rootdaemonid != pid)) {
1269                 return (0);
1270         }
1271 
1272         return (1);
1273 }
1274 
1275 /*
1276  * Releases an access to the file system.
1277  */
1278 void
1279 cachefs_cd_release(fscache_t *fscp)
1280 {
1281         mutex_enter(&fscp->fs_cdlock);
1282 
1283 #ifdef CFS_CD_DEBUG
1284         ASSERT(curthread->t_flag & T_CD_HELD);
1285         curthread->t_flag &= ~T_CD_HELD;
1286 #endif
1287         /* decriment hold on file system */
1288         fscp->fs_cdrefcnt--;
1289         ASSERT(fscp->fs_cdrefcnt >= 0);
1290 
1291         /* Verify no connected state transitions for NFSv4 */
1292         ASSERT(!CFS_ISFS_BACKFS_NFSV4(fscp) || fscp->fs_cdtransition == 0);
1293 
1294         /* wake up cachefsd */
1295         if ((fscp->fs_cdrefcnt == 0) && fscp->fs_cdtransition)
1296                 cv_broadcast(&fscp->fs_cdwaitcv);
1297 
1298         mutex_exit(&fscp->fs_cdlock);
1299 }
1300 
1301 /*
1302  * Called when a network timeout error has occurred.
1303  * If connected, switches state to disconnected.
1304  */
1305 void
1306 cachefs_cd_timedout(fscache_t *fscp)
1307 {
1308         int state;
1309 
1310         /* nothing to do if not snr or not connected */
1311         if (!CFS_ISFS_SNR(fscp) || (fscp->fs_cdconnected != CFS_CD_CONNECTED))
1312                 return;
1313 
1314 #ifdef CFS_CD_DEBUG
1315         ASSERT((curthread->t_flag & T_CD_HELD) == 0);
1316 #endif
1317 
1318         /* Verify no state changes done for NFSv4 */
1319         ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
1320 
1321         state = CFS_FS_DISCONNECTED;
1322         (void) cachefs_io_stateset(fscp->fs_rootvp, &state, NULL);
1323 }