1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  23  * Use is subject to license terms.
  24  */
  25 
  26 #include <sys/param.h>
  27 #include <sys/types.h>
  28 #include <sys/systm.h>
  29 #include <sys/cred.h>
  30 #include <sys/proc.h>
  31 #include <sys/user.h>
  32 #include <sys/vfs.h>
  33 #include <sys/vnode.h>
  34 #include <sys/pathname.h>
  35 #include <sys/uio.h>
  36 #include <sys/tiuser.h>
  37 #include <sys/sysmacros.h>
  38 #include <sys/kmem.h>
  39 #include <sys/mount.h>
  40 #include <sys/ioctl.h>
  41 #include <sys/statvfs.h>
  42 #include <sys/errno.h>
  43 #include <sys/debug.h>
  44 #include <sys/cmn_err.h>
  45 #include <sys/utsname.h>
  46 #include <sys/modctl.h>
  47 #include <sys/file.h>
  48 #include <sys/stat.h>
  49 #include <sys/fcntl.h>
  50 #include <sys/fbuf.h>
  51 #include <sys/dnlc.h>
  52 #include <sys/callb.h>
  53 #include <sys/kobj.h>
  54 #include <sys/rwlock.h>
  55 
  56 #include <sys/vmsystm.h>
  57 #include <vm/hat.h>
  58 #include <vm/as.h>
  59 #include <vm/page.h>
  60 #include <vm/pvn.h>
  61 #include <vm/seg.h>
  62 #include <vm/seg_map.h>
  63 #include <vm/seg_vn.h>
  64 #include <vm/rm.h>
  65 #include <sys/fs/cachefs_fs.h>
  66 #include <sys/fs/cachefs_log.h>
  67 #include <sys/fs/cachefs_dir.h>
  68 
  69 extern struct seg *segkmap;
  70 caddr_t segmap_getmap();
  71 int segmap_release();
  72 
  73 extern struct cnode *cachefs_freeback;
  74 extern struct cnode *cachefs_freefront;
  75 extern cachefscache_t *cachefs_cachelist;
  76 
  77 #ifdef CFSDEBUG
  78 int cachefsdebug = 0;
  79 #endif
  80 
  81 int cachefs_max_threads = CFS_MAX_THREADS;
  82 ino64_t cachefs_check_fileno = 0;
  83 struct kmem_cache *cachefs_cache_kmcache = NULL;
  84 struct kmem_cache *cachefs_req_cache = NULL;
  85 
  86 static int
  87 cachefs_async_populate_reg(struct cachefs_populate_req *, cred_t *,
  88     vnode_t *, vnode_t *);
  89 
  90 /*
  91  * Cache routines
  92  */
  93 
  94 /*
  95  * ------------------------------------------------------------------
  96  *
  97  *              cachefs_cache_create
  98  *
  99  * Description:
 100  *      Creates a cachefscache_t object and initializes it to
 101  *      be NOCACHE and NOFILL mode.
 102  * Arguments:
 103  * Returns:
 104  *      Returns a pointer to the created object or NULL if
 105  *      threads could not be created.
 106  * Preconditions:
 107  */
 108 
 109 cachefscache_t *
 110 cachefs_cache_create(void)
 111 {
 112         cachefscache_t *cachep;
 113         struct cachefs_req *rp;
 114 
 115         /* allocate zeroed memory for the object */
 116         cachep = kmem_cache_alloc(cachefs_cache_kmcache, KM_SLEEP);
 117 
 118         bzero(cachep, sizeof (*cachep));
 119 
 120         cv_init(&cachep->c_cwcv, NULL, CV_DEFAULT, NULL);
 121         cv_init(&cachep->c_cwhaltcv, NULL, CV_DEFAULT, NULL);
 122         mutex_init(&cachep->c_contentslock, NULL, MUTEX_DEFAULT, NULL);
 123         mutex_init(&cachep->c_fslistlock, NULL, MUTEX_DEFAULT, NULL);
 124         mutex_init(&cachep->c_log_mutex, NULL, MUTEX_DEFAULT, NULL);
 125 
 126         /* set up the work queue and get the sync thread created */
 127         cachefs_workq_init(&cachep->c_workq);
 128         cachep->c_workq.wq_keepone = 1;
 129         cachep->c_workq.wq_cachep = cachep;
 130         rp = kmem_cache_alloc(cachefs_req_cache, KM_SLEEP);
 131         rp->cfs_cmd = CFS_NOOP;
 132         rp->cfs_cr = kcred;
 133         rp->cfs_req_u.cu_fs_sync.cf_cachep = cachep;
 134         crhold(rp->cfs_cr);
 135         cachefs_addqueue(rp, &cachep->c_workq);
 136         cachep->c_flags |= CACHE_NOCACHE | CACHE_NOFILL | CACHE_ALLOC_PENDING;
 137 
 138         return (cachep);
 139 }
 140 
 141 /*
 142  * ------------------------------------------------------------------
 143  *
 144  *              cachefs_cache_destroy
 145  *
 146  * Description:
 147  *      Destroys the cachefscache_t object.
 148  * Arguments:
 149  *      cachep  the cachefscache_t object to destroy
 150  * Returns:
 151  * Preconditions:
 152  *      precond(cachep)
 153  */
 154 
 155 void
 156 cachefs_cache_destroy(cachefscache_t *cachep)
 157 {
 158         int error = 0;
 159 #ifdef CFSRLDEBUG
 160         uint_t index;
 161 #endif /* CFSRLDEBUG */
 162         clock_t wakeup = (60 * hz);
 163 
 164         /* stop async threads */
 165         while (cachep->c_workq.wq_thread_count > 0)
 166                 (void) cachefs_async_halt(&cachep->c_workq, 1);
 167 
 168         /* kill off the cachep worker thread */
 169         mutex_enter(&cachep->c_contentslock);
 170         while (cachep->c_flags & CACHE_CACHEW_THREADRUN) {
 171                 cachep->c_flags |= CACHE_CACHEW_THREADEXIT;
 172                 cv_signal(&cachep->c_cwcv);
 173                 (void) cv_reltimedwait(&cachep->c_cwhaltcv,
 174                     &cachep->c_contentslock, wakeup, TR_CLOCK_TICK);
 175         }
 176 
 177         if ((cachep->c_flags & CACHE_ALLOC_PENDING) == 0) {
 178                 cachep->c_usage.cu_flags &= ~CUSAGE_ACTIVE;
 179                 (void) cachefs_cache_rssync(cachep);
 180         }
 181         mutex_exit(&cachep->c_contentslock);
 182 
 183         /* if there is a cache */
 184         if ((cachep->c_flags & CACHE_NOCACHE) == 0) {
 185                 if ((cachep->c_flags & CACHE_NOFILL) == 0) {
 186 #ifdef CFSRLDEBUG
 187                         /* blow away dangling rl debugging info */
 188                         mutex_enter(&cachep->c_contentslock);
 189                         for (index = 0;
 190                             index <= cachep->c_rlinfo.rl_entries;
 191                             index++) {
 192                                 rl_entry_t *rlent;
 193 
 194                                 error = cachefs_rl_entry_get(cachep, index,
 195                                     rlent);
 196                                 /*
 197                                  * Since we are destroying the cache,
 198                                  * better to ignore and proceed
 199                                  */
 200                                 if (error)
 201                                         break;
 202                                 cachefs_rl_debug_destroy(rlent);
 203                         }
 204                         mutex_exit(&cachep->c_contentslock);
 205 #endif /* CFSRLDEBUG */
 206 
 207                         /* sync the cache */
 208                         if (!error)
 209                                 cachefs_cache_sync(cachep);
 210                 } else {
 211                         /* get rid of any unused fscache objects */
 212                         mutex_enter(&cachep->c_fslistlock);
 213                         fscache_list_gc(cachep);
 214                         mutex_exit(&cachep->c_fslistlock);
 215                 }
 216                 ASSERT(cachep->c_fslist == NULL);
 217 
 218                 VN_RELE(cachep->c_resfilevp);
 219                 VN_RELE(cachep->c_dirvp);
 220                 VN_RELE(cachep->c_lockvp);
 221                 VN_RELE(cachep->c_lostfoundvp);
 222         }
 223 
 224         if (cachep->c_log_ctl != NULL)
 225                 cachefs_kmem_free(cachep->c_log_ctl,
 226                     sizeof (cachefs_log_control_t));
 227         if (cachep->c_log != NULL)
 228                 cachefs_log_destroy_cookie(cachep->c_log);
 229 
 230         cv_destroy(&cachep->c_cwcv);
 231         cv_destroy(&cachep->c_cwhaltcv);
 232         mutex_destroy(&cachep->c_contentslock);
 233         mutex_destroy(&cachep->c_fslistlock);
 234         mutex_destroy(&cachep->c_log_mutex);
 235 
 236         kmem_cache_free(cachefs_cache_kmcache, cachep);
 237 }
 238 
 239 /*
 240  * ------------------------------------------------------------------
 241  *
 242  *              cachefs_cache_active_ro
 243  *
 244  * Description:
 245  *      Activates the cachefscache_t object for a read-only file system.
 246  * Arguments:
 247  *      cachep  the cachefscache_t object to activate
 248  *      cdvp    the vnode of the cache directory
 249  * Returns:
 250  *      Returns 0 for success, !0 if there is a problem with the cache.
 251  * Preconditions:
 252  *      precond(cachep)
 253  *      precond(cdvp)
 254  *      precond(cachep->c_flags & CACHE_NOCACHE)
 255  */
 256 
 257 int
 258 cachefs_cache_activate_ro(cachefscache_t *cachep, vnode_t *cdvp)
 259 {
 260         cachefs_log_control_t *lc;
 261         vnode_t *labelvp = NULL;
 262         vnode_t *rifvp = NULL;
 263         vnode_t *lockvp = NULL;
 264         vnode_t *statevp = NULL;
 265         vnode_t *lostfoundvp = NULL;
 266         struct vattr *attrp = NULL;
 267         int error;
 268 
 269         ASSERT(cachep->c_flags & CACHE_NOCACHE);
 270         mutex_enter(&cachep->c_contentslock);
 271 
 272         attrp = cachefs_kmem_alloc(sizeof (struct vattr), KM_SLEEP);
 273 
 274         /* get the mode bits of the cache directory */
 275         attrp->va_mask = AT_ALL;
 276         error = VOP_GETATTR(cdvp, attrp, 0, kcred, NULL);
 277         if (error)
 278                 goto out;
 279 
 280         /* ensure the mode bits are 000 to keep out casual users */
 281         if (attrp->va_mode & S_IAMB) {
 282                 cmn_err(CE_WARN, "cachefs: Cache Directory Mode must be 000\n");
 283                 error = EPERM;
 284                 goto out;
 285         }
 286 
 287         /* Get the lock file */
 288         error = VOP_LOOKUP(cdvp, CACHEFS_LOCK_FILE, &lockvp, NULL, 0, NULL,
 289             kcred, NULL, NULL, NULL);
 290         if (error) {
 291                 cmn_err(CE_WARN, "cachefs: activate_a: cache corruption"
 292                     " run fsck.\n");
 293                 goto out;
 294         }
 295 
 296         /* Get the label file */
 297         error = VOP_LOOKUP(cdvp, CACHELABEL_NAME, &labelvp, NULL, 0, NULL,
 298             kcred, NULL, NULL, NULL);
 299         if (error) {
 300                 cmn_err(CE_WARN, "cachefs: activate_b: cache corruption"
 301                     " run fsck.\n");
 302                 goto out;
 303         }
 304 
 305         /* read in the label */
 306         error = vn_rdwr(UIO_READ, labelvp, (caddr_t)&cachep->c_label,
 307             sizeof (struct cache_label), 0LL, UIO_SYSSPACE,
 308             0, (rlim64_t)0, kcred, NULL);
 309         if (error) {
 310                 cmn_err(CE_WARN, "cachefs: activate_c: cache corruption"
 311                     " run fsck.\n");
 312                 goto out;
 313         }
 314 
 315         /* Verify that we can handle the version this cache was created under */
 316         if (cachep->c_label.cl_cfsversion != CFSVERSION) {
 317                 cmn_err(CE_WARN, "cachefs: Invalid Cache Version, run fsck\n");
 318                 error = EINVAL;
 319                 goto out;
 320         }
 321 
 322         /* Open the resource file */
 323         error = VOP_LOOKUP(cdvp, RESOURCE_NAME, &rifvp, NULL, 0, NULL, kcred,
 324             NULL, NULL, NULL);
 325         if (error) {
 326                 cmn_err(CE_WARN, "cachefs: activate_d: cache corruption"
 327                     " run fsck.\n");
 328                 goto out;
 329         }
 330 
 331         /*  Read the usage struct for this cache */
 332         error = vn_rdwr(UIO_READ, rifvp, (caddr_t)&cachep->c_usage,
 333             sizeof (struct cache_usage), 0LL, UIO_SYSSPACE, 0,
 334             (rlim64_t)0, kcred, NULL);
 335         if (error) {
 336                 cmn_err(CE_WARN, "cachefs: activate_e: cache corruption"
 337                     " run fsck.\n");
 338                 goto out;
 339         }
 340 
 341         if (cachep->c_usage.cu_flags & CUSAGE_ACTIVE) {
 342                 cmn_err(CE_WARN, "cachefs: cache not clean.  Run fsck\n");
 343                 /* ENOSPC is what UFS uses for clean flag check */
 344                 error = ENOSPC;
 345                 goto out;
 346         }
 347 
 348         /*  Read the rlinfo for this cache */
 349         error = vn_rdwr(UIO_READ, rifvp, (caddr_t)&cachep->c_rlinfo,
 350             sizeof (cachefs_rl_info_t), (offset_t)sizeof (struct cache_usage),
 351             UIO_SYSSPACE, 0, 0, kcred, NULL);
 352         if (error) {
 353                 cmn_err(CE_WARN, "cachefs: activate_f: cache corruption"
 354                     " run fsck.\n");
 355                 goto out;
 356         }
 357 
 358         /* Open the lost+found directory */
 359         error = VOP_LOOKUP(cdvp, CACHEFS_LOSTFOUND_NAME, &lostfoundvp,
 360             NULL, 0, NULL, kcred, NULL, NULL, NULL);
 361         if (error) {
 362                 cmn_err(CE_WARN, "cachefs: activate_g: cache corruption"
 363                     " run fsck.\n");
 364                 goto out;
 365         }
 366 
 367         VN_HOLD(rifvp);
 368         VN_HOLD(cdvp);
 369         VN_HOLD(lockvp);
 370         VN_HOLD(lostfoundvp);
 371         cachep->c_resfilevp = rifvp;
 372         cachep->c_dirvp = cdvp;
 373         cachep->c_lockvp = lockvp;
 374         cachep->c_lostfoundvp = lostfoundvp;
 375 
 376         /* get the cachep worker thread created */
 377         cachep->c_flags |= CACHE_CACHEW_THREADRUN;
 378         (void) thread_create(NULL, 0, cachefs_cachep_worker_thread,
 379             cachep, 0, &p0, TS_RUN, minclsyspri);
 380 
 381         /* allocate the `logging control' field */
 382         mutex_enter(&cachep->c_log_mutex);
 383         cachep->c_log_ctl =
 384             cachefs_kmem_zalloc(sizeof (cachefs_log_control_t), KM_SLEEP);
 385         lc = (cachefs_log_control_t *)cachep->c_log_ctl;
 386 
 387         /* if the LOG_STATUS_NAME file exists, read it in and set up logging */
 388         error = VOP_LOOKUP(cachep->c_dirvp, LOG_STATUS_NAME, &statevp,
 389             NULL, 0, NULL, kcred, NULL, NULL, NULL);
 390         if (error == 0) {
 391                 int vnrw_error;
 392 
 393                 vnrw_error = vn_rdwr(UIO_READ, statevp, (caddr_t)lc,
 394                     sizeof (*lc), 0LL, UIO_SYSSPACE, 0, (rlim64_t)RLIM_INFINITY,
 395                     kcred, NULL);
 396                 VN_RELE(statevp);
 397 
 398                 if (vnrw_error == 0) {
 399                         if ((cachep->c_log = cachefs_log_create_cookie(lc))
 400                             == NULL)
 401                                 cachefs_log_error(cachep, ENOMEM, 0);
 402                         else if ((lc->lc_magic != CACHEFS_LOG_MAGIC) ||
 403                             (lc->lc_path[0] != '/') ||
 404                             (cachefs_log_logfile_open(cachep,
 405                             lc->lc_path) != 0))
 406                                 cachefs_log_error(cachep, EINVAL, 0);
 407                 }
 408         } else {
 409                 error = 0;
 410         }
 411         lc->lc_magic = CACHEFS_LOG_MAGIC;
 412         lc->lc_cachep = (uint64_t)(uintptr_t)cachep;
 413         mutex_exit(&cachep->c_log_mutex);
 414 
 415 out:
 416         if (error == 0) {
 417                 cachep->c_flags &= ~(CACHE_NOCACHE | CACHE_ALLOC_PENDING);
 418         }
 419         if (attrp)
 420                 cachefs_kmem_free(attrp, sizeof (struct vattr));
 421         if (labelvp != NULL)
 422                 VN_RELE(labelvp);
 423         if (rifvp != NULL)
 424                 VN_RELE(rifvp);
 425         if (lockvp)
 426                 VN_RELE(lockvp);
 427         if (lostfoundvp)
 428                 VN_RELE(lostfoundvp);
 429 
 430         mutex_exit(&cachep->c_contentslock);
 431         return (error);
 432 }
 433 
 434 int
 435 cachefs_stop_cache(cnode_t *cp)
 436 {
 437         fscache_t *fscp = C_TO_FSCACHE(cp);
 438         cachefscache_t *cachep = fscp->fs_cache;
 439         filegrp_t *fgp;
 440         int i;
 441         int error = 0;
 442         clock_t wakeup = (60 * hz);
 443 
 444         /* XXX verify lock-ordering for this function */
 445 
 446         mutex_enter(&cachep->c_contentslock);
 447 
 448         /*
 449          * no work if we're already in nocache mode.  hopefully this
 450          * will be the usual case.
 451          */
 452 
 453         if (cachep->c_flags & CACHE_NOCACHE) {
 454                 mutex_exit(&cachep->c_contentslock);
 455                 return (0);
 456         }
 457 
 458         if ((cachep->c_flags & CACHE_NOFILL) == 0) {
 459                 mutex_exit(&cachep->c_contentslock);
 460                 return (EINVAL);
 461         }
 462 
 463         mutex_exit(&cachep->c_contentslock);
 464 
 465         /* We are already not caching if nfsv4 */
 466         if (CFS_ISFS_BACKFS_NFSV4(fscp)) {
 467                 return (0);
 468         }
 469 
 470 #ifdef CFSDEBUG
 471         mutex_enter(&cachep->c_fslistlock);
 472         ASSERT(fscp == cachep->c_fslist);
 473         ASSERT(fscp->fs_next == NULL);
 474         mutex_exit(&cachep->c_fslistlock);
 475 
 476         printf("cachefs_stop_cache: resetting CACHE_NOCACHE\n");
 477 #endif
 478 
 479         /* XXX should i worry about disconnected during boot? */
 480         error = cachefs_cd_access(fscp, 1, 1);
 481         if (error)
 482                 goto out;
 483 
 484         error = cachefs_async_halt(&fscp->fs_workq, 1);
 485         ASSERT(error == 0);
 486         error = cachefs_async_halt(&cachep->c_workq, 1);
 487         ASSERT(error == 0);
 488         /* sigh -- best to keep going if async_halt failed. */
 489         error = 0;
 490 
 491         /* XXX current order: cnode, fgp, fscp, cache. okay? */
 492 
 493         cachefs_cnode_traverse(fscp, cachefs_cnode_disable_caching);
 494 
 495         for (i = 0; i < CFS_FS_FGP_BUCKET_SIZE; i++) {
 496                 for (fgp = fscp->fs_filegrp[i]; fgp != NULL;
 497                     fgp = fgp->fg_next) {
 498                         mutex_enter(&fgp->fg_mutex);
 499 
 500                         ASSERT((fgp->fg_flags &
 501                             (CFS_FG_WRITE | CFS_FG_UPDATED)) == 0);
 502                         fgp->fg_flags |=
 503                             CFS_FG_ALLOC_FILE |
 504                             CFS_FG_ALLOC_ATTR;
 505                         fgp->fg_flags &= ~CFS_FG_READ;
 506 
 507                         if (fgp->fg_dirvp) {
 508                                 fgp->fg_flags |= CFS_FG_ALLOC_FILE;
 509                                 VN_RELE(fgp->fg_dirvp);
 510                                 fgp->fg_dirvp = NULL;
 511                         }
 512                         if (fgp->fg_attrvp) {
 513                                 fgp->fg_flags |= CFS_FG_ALLOC_ATTR;
 514                                 VN_RELE(fgp->fg_attrvp);
 515                                 fgp->fg_attrvp = NULL;
 516                         }
 517 
 518                         mutex_exit(&fgp->fg_mutex);
 519                 }
 520         }
 521 
 522         mutex_enter(&fscp->fs_fslock);
 523         ASSERT((fscp->fs_flags & (CFS_FS_WRITE)) == 0);
 524         fscp->fs_flags &= ~(CFS_FS_READ | CFS_FS_DIRTYINFO);
 525 
 526         if (fscp->fs_fscdirvp) {
 527                 VN_RELE(fscp->fs_fscdirvp);
 528                 fscp->fs_fscdirvp = NULL;
 529         }
 530         if (fscp->fs_fsattrdir) {
 531                 VN_RELE(fscp->fs_fsattrdir);
 532                 fscp->fs_fsattrdir = NULL;
 533         }
 534         if (fscp->fs_infovp) {
 535                 VN_RELE(fscp->fs_infovp);
 536                 fscp->fs_infovp = NULL;
 537         }
 538         /* XXX dlog stuff? */
 539 
 540         mutex_exit(&fscp->fs_fslock);
 541 
 542         /*
 543          * release resources grabbed in cachefs_cache_activate_ro
 544          */
 545 
 546         mutex_enter(&cachep->c_contentslock);
 547 
 548         /* kill off the cachep worker thread */
 549         while (cachep->c_flags & CACHE_CACHEW_THREADRUN) {
 550                 cachep->c_flags |= CACHE_CACHEW_THREADEXIT;
 551                 cv_signal(&cachep->c_cwcv);
 552                 (void) cv_reltimedwait(&cachep->c_cwhaltcv,
 553                     &cachep->c_contentslock, wakeup, TR_CLOCK_TICK);
 554         }
 555 
 556         if (cachep->c_resfilevp) {
 557                 VN_RELE(cachep->c_resfilevp);
 558                 cachep->c_resfilevp = NULL;
 559         }
 560         if (cachep->c_dirvp) {
 561                 VN_RELE(cachep->c_dirvp);
 562                 cachep->c_dirvp = NULL;
 563         }
 564         if (cachep->c_lockvp) {
 565                 VN_RELE(cachep->c_lockvp);
 566                 cachep->c_lockvp = NULL;
 567         }
 568         if (cachep->c_lostfoundvp) {
 569                 VN_RELE(cachep->c_lostfoundvp);
 570                 cachep->c_lostfoundvp = NULL;
 571         }
 572 
 573         mutex_enter(&cachep->c_log_mutex);
 574         if (cachep->c_log_ctl) {
 575                 cachefs_kmem_free(cachep->c_log_ctl,
 576                     sizeof (cachefs_log_control_t));
 577                 cachep->c_log_ctl = NULL;
 578         }
 579         if (cachep->c_log) {
 580                 cachefs_log_destroy_cookie(cachep->c_log);
 581                 cachep->c_log = NULL;
 582         }
 583         mutex_exit(&cachep->c_log_mutex);
 584 
 585         /* XXX do what mountroot_init does when ! foundcache */
 586 
 587         cachep->c_flags |= CACHE_NOCACHE;
 588         mutex_exit(&cachep->c_contentslock);
 589 
 590         /* XXX should i release this here? */
 591         cachefs_cd_release(fscp);
 592 
 593 out:
 594 
 595         return (error);
 596 }
 597 
 598 /*
 599  * ------------------------------------------------------------------
 600  *
 601  *              cachefs_cache_active_rw
 602  *
 603  * Description:
 604  *      Activates the cachefscache_t object for a read-write file system.
 605  * Arguments:
 606  *      cachep  the cachefscache_t object to activate
 607  * Returns:
 608  * Preconditions:
 609  *      precond(cachep)
 610  *      precond((cachep->c_flags & CACHE_NOCACHE) == 0)
 611  *      precond(cachep->c_flags & CACHE_NOFILL)
 612  */
 613 
 614 void
 615 cachefs_cache_activate_rw(cachefscache_t *cachep)
 616 {
 617         cachefs_rl_listhead_t *lhp;
 618 
 619         ASSERT((cachep->c_flags & CACHE_NOCACHE) == 0);
 620         ASSERT(cachep->c_flags & CACHE_NOFILL);
 621 
 622         mutex_enter(&cachep->c_contentslock);
 623         cachep->c_flags &= ~CACHE_NOFILL;
 624 
 625         /* move the active list to the rl list */
 626         cachefs_rl_cleanup(cachep);
 627 
 628         lhp = &cachep->c_rlinfo.rl_items[
 629             CACHEFS_RL_INDEX(CACHEFS_RL_PACKED_PENDING)];
 630         if (lhp->rli_itemcnt != 0)
 631                 cachep->c_flags |= CACHE_PACKED_PENDING;
 632         cachefs_cache_dirty(cachep, 0);
 633         mutex_exit(&cachep->c_contentslock);
 634 }
 635 
 636 /*
 637  * ------------------------------------------------------------------
 638  *
 639  *              cachefs_cache_dirty
 640  *
 641  * Description:
 642  *      Marks the cache as dirty (active).
 643  * Arguments:
 644  *      cachep  the cachefscache_t to mark as dirty
 645  *      lockit  1 means grab contents lock, 0 means caller grabbed it
 646  * Returns:
 647  * Preconditions:
 648  *      precond(cachep)
 649  *      precond(cache is in rw mode)
 650  */
 651 
 652 void
 653 cachefs_cache_dirty(struct cachefscache *cachep, int lockit)
 654 {
 655         int error;
 656 
 657         ASSERT((cachep->c_flags & (CACHE_NOCACHE | CACHE_NOFILL)) == 0);
 658 
 659         if (lockit) {
 660                 mutex_enter(&cachep->c_contentslock);
 661         } else {
 662                 ASSERT(MUTEX_HELD(&cachep->c_contentslock));
 663         }
 664         if (cachep->c_flags & CACHE_DIRTY) {
 665                 ASSERT(cachep->c_usage.cu_flags & CUSAGE_ACTIVE);
 666         } else {
 667                 /*
 668                  * turn on the "cache active" (dirty) flag and write it
 669                  * synchronously to disk
 670                  */
 671                 cachep->c_flags |= CACHE_DIRTY;
 672                 cachep->c_usage.cu_flags |= CUSAGE_ACTIVE;
 673                 if (error = vn_rdwr(UIO_WRITE, cachep->c_resfilevp,
 674                     (caddr_t)&cachep->c_usage, sizeof (struct cache_usage),
 675                     0LL, UIO_SYSSPACE, FSYNC, (rlim64_t)RLIM_INFINITY,
 676                     kcred, NULL)) {
 677                         cmn_err(CE_WARN,
 678                             "cachefs: clean flag write error: %d\n", error);
 679                 }
 680         }
 681 
 682         if (lockit)
 683                 mutex_exit(&cachep->c_contentslock);
 684 }
 685 
 686 /*
 687  * ------------------------------------------------------------------
 688  *
 689  *              cachefs_cache_rssync
 690  *
 691  * Description:
 692  *      Syncs out the resource file for the cachefscache_t object.
 693  * Arguments:
 694  *      cachep  the cachefscache_t object to operate on
 695  * Returns:
 696  *      Returns 0 for success, !0 on an error writing data.
 697  * Preconditions:
 698  *      precond(cachep)
 699  *      precond(cache is in rw mode)
 700  */
 701 
 702 int
 703 cachefs_cache_rssync(struct cachefscache *cachep)
 704 {
 705         int error;
 706 
 707         ASSERT((cachep->c_flags & (CACHE_NOCACHE | CACHE_NOFILL |
 708             CACHE_ALLOC_PENDING)) == 0);
 709 
 710         if (cachep->c_rl_entries != NULL) {
 711                 error = vn_rdwr(UIO_WRITE, cachep->c_resfilevp,
 712                     (caddr_t)cachep->c_rl_entries, MAXBSIZE,
 713                     (offset_t)((cachep->c_rl_window + 1) * MAXBSIZE),
 714                     UIO_SYSSPACE, FSYNC, RLIM_INFINITY, kcred, NULL);
 715                 if (error)
 716                         cmn_err(CE_WARN,
 717                             "cachefs: Can't Write rl entries Info\n");
 718                 cachefs_kmem_free(cachep->c_rl_entries, MAXBSIZE);
 719                 cachep->c_rl_entries = NULL;
 720         }
 721 
 722         /* write the usage struct for this cache */
 723         error = vn_rdwr(UIO_WRITE, cachep->c_resfilevp,
 724             (caddr_t)&cachep->c_usage, sizeof (struct cache_usage),
 725             0LL, UIO_SYSSPACE, 0, (rlim64_t)RLIM_INFINITY, kcred, NULL);
 726         if (error) {
 727                 cmn_err(CE_WARN, "cachefs: Can't Write Cache Usage Info\n");
 728         }
 729 
 730         /* write the rlinfo for this cache */
 731         error = vn_rdwr(UIO_WRITE, cachep->c_resfilevp,
 732             (caddr_t)&cachep->c_rlinfo, sizeof (cachefs_rl_info_t),
 733             (offset_t)sizeof (struct cache_usage), UIO_SYSSPACE,
 734             0, (rlim64_t)RLIM_INFINITY, kcred, NULL);
 735         if (error) {
 736                 cmn_err(CE_WARN, "cachefs: Can't Write Cache RL Info\n");
 737         }
 738         error = VOP_FSYNC(cachep->c_resfilevp, FSYNC, kcred, NULL);
 739         return (error);
 740 }
 741 
 742 /*
 743  * ------------------------------------------------------------------
 744  *
 745  *              cachefs_cache_sync
 746  *
 747  * Description:
 748  *      Sync a cache which includes all of its fscaches.
 749  * Arguments:
 750  *      cachep  the cachefscache_t object to sync
 751  * Returns:
 752  * Preconditions:
 753  *      precond(cachep)
 754  *      precond(cache is in rw mode)
 755  */
 756 
 757 void
 758 cachefs_cache_sync(struct cachefscache *cachep)
 759 {
 760         struct fscache *fscp;
 761         struct fscache **syncfsc;
 762         int nfscs, fscidx;
 763         int try;
 764         int done;
 765 
 766         if (cachep->c_flags & (CACHE_NOCACHE | CACHE_NOFILL))
 767                 return;
 768 
 769         done = 0;
 770         for (try = 0; (try < 2) && !done; try++) {
 771 
 772                 nfscs = 0;
 773 
 774                 /*
 775                  * here we turn off the cache-wide DIRTY flag.  If it's still
 776                  * off when the sync completes we can write the clean flag to
 777                  * disk telling fsck it has no work to do.
 778                  */
 779 #ifdef CFSCLEANFLAG
 780                 mutex_enter(&cachep->c_contentslock);
 781                 cachep->c_flags &= ~CACHE_DIRTY;
 782                 mutex_exit(&cachep->c_contentslock);
 783 #endif /* CFSCLEANFLAG */
 784 
 785                 cachefs_log_process_queue(cachep, 1);
 786 
 787                 mutex_enter(&cachep->c_fslistlock);
 788                 syncfsc = cachefs_kmem_alloc(
 789                     cachep->c_refcnt * sizeof (struct fscache *), KM_SLEEP);
 790                 for (fscp = cachep->c_fslist; fscp; fscp = fscp->fs_next) {
 791                         fscache_hold(fscp);
 792                         ASSERT(nfscs < cachep->c_refcnt);
 793                         syncfsc[nfscs++] = fscp;
 794                 }
 795                 ASSERT(nfscs == cachep->c_refcnt);
 796                 mutex_exit(&cachep->c_fslistlock);
 797                 for (fscidx = 0; fscidx < nfscs; fscidx++) {
 798                         fscp = syncfsc[fscidx];
 799                         fscache_sync(fscp);
 800                         fscache_rele(fscp);
 801                 }
 802 
 803                 /* get rid of any unused fscache objects */
 804                 mutex_enter(&cachep->c_fslistlock);
 805                 fscache_list_gc(cachep);
 806                 mutex_exit(&cachep->c_fslistlock);
 807 
 808                 /*
 809                  * here we check the cache-wide DIRTY flag.
 810                  * If it's off,
 811                  * we can write the clean flag to disk.
 812                  */
 813 #ifdef CFSCLEANFLAG
 814                 mutex_enter(&cachep->c_contentslock);
 815                 if ((cachep->c_flags & CACHE_DIRTY) == 0) {
 816                         if (cachep->c_usage.cu_flags & CUSAGE_ACTIVE) {
 817                                 cachep->c_usage.cu_flags &= ~CUSAGE_ACTIVE;
 818                                 if (cachefs_cache_rssync(cachep) == 0) {
 819                                         done = 1;
 820                                 } else {
 821                                         cachep->c_usage.cu_flags |=
 822                                             CUSAGE_ACTIVE;
 823                                 }
 824                         } else {
 825                                 done = 1;
 826                         }
 827                 }
 828                 mutex_exit(&cachep->c_contentslock);
 829 #else /* CFSCLEANFLAG */
 830                 mutex_enter(&cachep->c_contentslock);
 831                 (void) cachefs_cache_rssync(cachep);
 832                 mutex_exit(&cachep->c_contentslock);
 833                 done = 1;
 834 #endif /* CFSCLEANFLAG */
 835                 cachefs_kmem_free(syncfsc, nfscs * sizeof (struct fscache *));
 836         }
 837 }
 838 
 839 /*
 840  * ------------------------------------------------------------------
 841  *
 842  *              cachefs_cache_unique
 843  *
 844  * Description:
 845  * Arguments:
 846  * Returns:
 847  *      Returns a unique number.
 848  * Preconditions:
 849  *      precond(cachep)
 850  */
 851 
 852 uint_t
 853 cachefs_cache_unique(cachefscache_t *cachep)
 854 {
 855         uint_t unique = 0;
 856         int error = 0;
 857 
 858         mutex_enter(&cachep->c_contentslock);
 859         if (cachep->c_usage.cu_flags & CUSAGE_NEED_ADJUST ||
 860             ++(cachep->c_unique) == 0) {
 861                 cachep->c_usage.cu_unique++;
 862 
 863                 if (cachep->c_unique == 0)
 864                         cachep->c_unique = 1;
 865                 cachep->c_flags &= ~CUSAGE_NEED_ADJUST;
 866                 error = cachefs_cache_rssync(cachep);
 867         }
 868         if (error == 0)
 869                 unique = (cachep->c_usage.cu_unique << 16) + cachep->c_unique;
 870         mutex_exit(&cachep->c_contentslock);
 871         return (unique);
 872 }
 873 
 874 /*
 875  * Called from c_getfrontfile. Shouldn't be called from anywhere else !
 876  */
 877 static int
 878 cachefs_createfrontfile(cnode_t *cp, struct filegrp *fgp)
 879 {
 880         char name[CFS_FRONTFILE_NAME_SIZE];
 881         struct vattr *attrp = NULL;
 882         int error = 0;
 883         int mode;
 884         int alloc = 0;
 885         int freefile = 0;
 886         int ffrele = 0;
 887         int rlfree = 0;
 888         rl_entry_t rl_ent;
 889 
 890 #ifdef CFSDEBUG
 891         CFS_DEBUG(CFSDEBUG_FRONT)
 892                 printf("c_createfrontfile: ENTER cp %p fgp %p\n",
 893                     (void *)cp, (void *)fgp);
 894 #endif
 895 
 896         ASSERT(cp->c_frontvp == NULL);
 897         ASSERT(CFS_ISFS_BACKFS_NFSV4(fgp->fg_fscp) == 0);
 898 
 899         /* quit if we cannot write to the filegrp */
 900         if ((fgp->fg_flags & CFS_FG_WRITE) == 0) {
 901                 error = ENOENT;
 902                 goto out;
 903         }
 904 
 905         /* find or create the filegrp attrcache file if necessary */
 906         if (fgp->fg_flags & CFS_FG_ALLOC_ATTR) {
 907                 error = filegrp_allocattr(fgp);
 908                 if (error)
 909                         goto out;
 910         }
 911 
 912         make_ascii_name(&cp->c_id, name);
 913 
 914         /* set up attributes for the front file we want to create */
 915         attrp = cachefs_kmem_zalloc(sizeof (struct vattr), KM_SLEEP);
 916         alloc++;
 917         attrp->va_mode = S_IFREG | 0666;
 918         mode = 0666;
 919         attrp->va_uid = 0;
 920         attrp->va_gid = 0;
 921         attrp->va_type = VREG;
 922         attrp->va_size = 0;
 923         attrp->va_mask = AT_SIZE | AT_TYPE | AT_MODE | AT_UID | AT_GID;
 924 
 925         /* get a file from the resource counts */
 926         error = cachefs_allocfile(fgp->fg_fscp->fs_cache);
 927         if (error) {
 928                 error = EINVAL;
 929                 goto out;
 930         }
 931         freefile++;
 932 
 933         /* create the metadata slot if necessary */
 934         if (cp->c_flags & CN_ALLOC_PENDING) {
 935                 error = filegrp_create_metadata(fgp, &cp->c_metadata,
 936                     &cp->c_id);
 937                 if (error) {
 938                         error = EINVAL;
 939                         goto out;
 940                 }
 941                 cp->c_flags &= ~CN_ALLOC_PENDING;
 942                 cp->c_flags |= CN_UPDATED;
 943         }
 944 
 945         /* get an rl entry if necessary */
 946         if (cp->c_metadata.md_rlno == 0) {
 947                 rl_ent.rl_fileno = cp->c_id.cid_fileno;
 948                 rl_ent.rl_local = (cp->c_id.cid_flags & CFS_CID_LOCAL) ? 1 : 0;
 949                 rl_ent.rl_fsid = fgp->fg_fscp->fs_cfsid;
 950                 rl_ent.rl_attrc = 0;
 951                 error = cachefs_rl_alloc(fgp->fg_fscp->fs_cache, &rl_ent,
 952                     &cp->c_metadata.md_rlno);
 953                 if (error)
 954                         goto out;
 955                 cachefs_rlent_moveto(fgp->fg_fscp->fs_cache,
 956                     CACHEFS_RL_ACTIVE, cp->c_metadata.md_rlno,
 957                     cp->c_metadata.md_frontblks);
 958                 cp->c_metadata.md_rltype = CACHEFS_RL_ACTIVE;
 959                 rlfree++;
 960                 cp->c_flags |= CN_UPDATED; /* XXX sam: do we need this? */
 961 
 962                 /* increment number of front files */
 963                 error = filegrp_ffhold(fgp);
 964                 if (error) {
 965                         error = EINVAL;
 966                         goto out;
 967                 }
 968                 ffrele++;
 969         }
 970 
 971         if (cp->c_flags & CN_ASYNC_POP_WORKING) {
 972                 /* lookup the already created front file */
 973                 error = VOP_LOOKUP(fgp->fg_dirvp, name, &cp->c_frontvp,
 974                     NULL, 0, NULL, kcred, NULL, NULL, NULL);
 975         } else {
 976                 /* create the front file */
 977                 error = VOP_CREATE(fgp->fg_dirvp, name, attrp, EXCL, mode,
 978                     &cp->c_frontvp, kcred, 0, NULL, NULL);
 979         }
 980         if (error) {
 981 #ifdef CFSDEBUG
 982                 CFS_DEBUG(CFSDEBUG_FRONT)
 983                         printf("c_createfrontfile: Can't create cached object"
 984                             " error %u, fileno %llx\n", error,
 985                             (u_longlong_t)cp->c_id.cid_fileno);
 986 #endif
 987                 goto out;
 988         }
 989 
 990         /* get a copy of the fid of the front file */
 991         cp->c_metadata.md_fid.fid_len = MAXFIDSZ;
 992         error = VOP_FID(cp->c_frontvp, &cp->c_metadata.md_fid, NULL);
 993         if (error) {
 994                 /*
 995                  * If we get back ENOSPC then the fid we passed in was too
 996                  * small.  For now we don't do anything and map to EINVAL.
 997                  */
 998                 if (error == ENOSPC) {
 999                         error = EINVAL;
1000                 }
1001                 goto out;
1002         }
1003 
1004         dnlc_purge_vp(cp->c_frontvp);
1005 
1006         cp->c_metadata.md_flags |= MD_FILE;
1007         cp->c_flags |= CN_UPDATED | CN_NEED_FRONT_SYNC;
1008 
1009 out:
1010         if (error) {
1011                 if (cp->c_frontvp) {
1012                         VN_RELE(cp->c_frontvp);
1013                         (void) VOP_REMOVE(fgp->fg_dirvp, name, kcred, NULL, 0);
1014                         cp->c_frontvp = NULL;
1015                 }
1016                 if (ffrele)
1017                         filegrp_ffrele(fgp);
1018                 if (freefile)
1019                         cachefs_freefile(fgp->fg_fscp->fs_cache);
1020                 if (rlfree) {
1021 #ifdef CFSDEBUG
1022                         cachefs_rlent_verify(fgp->fg_fscp->fs_cache,
1023                             CACHEFS_RL_ACTIVE, cp->c_metadata.md_rlno);
1024 #endif /* CFSDEBUG */
1025                         cachefs_rlent_moveto(fgp->fg_fscp->fs_cache,
1026                             CACHEFS_RL_FREE, cp->c_metadata.md_rlno, 0);
1027                         cp->c_metadata.md_rlno = 0;
1028                         cp->c_metadata.md_rltype = CACHEFS_RL_NONE;
1029                 }
1030                 cachefs_nocache(cp);
1031         }
1032         if (alloc)
1033                 cachefs_kmem_free(attrp, sizeof (struct vattr));
1034 #ifdef CFSDEBUG
1035         CFS_DEBUG(CFSDEBUG_FRONT)
1036                 printf("c_createfrontfile: EXIT error = %d name %s\n", error,
1037                     name);
1038 #endif
1039         return (error);
1040 }
1041 
1042 /*
1043  * Releases resources associated with the front file.
1044  * Only call this routine if a ffhold has been done.
1045  * Its okay to call this routine if the front file does not exist.
1046  * Note: this routine is used even if there is no front file.
1047  */
1048 void
1049 cachefs_removefrontfile(cachefs_metadata_t *mdp, cfs_cid_t *cidp,
1050     filegrp_t *fgp)
1051 {
1052         int error, enoent;
1053         char name[CFS_FRONTFILE_NAME_SIZE + 2];
1054 
1055         ASSERT(CFS_ISFS_BACKFS_NFSV4(fgp->fg_fscp) == 0);
1056 
1057         enoent = 0;
1058         if (mdp->md_flags & MD_FILE) {
1059                 if (fgp->fg_dirvp == NULL) {
1060                         cmn_err(CE_WARN, "cachefs: remove error, run fsck\n");
1061                         return;
1062                 }
1063                 make_ascii_name(cidp, name);
1064                 error = VOP_REMOVE(fgp->fg_dirvp, name, kcred, NULL, 0);
1065                 if (error == ENOENT)
1066                         enoent = 1;
1067                 if ((error) && (error != ENOENT)) {
1068                         cmn_err(CE_WARN, "UFS remove error %s %d, run fsck\n",
1069                             name, error);
1070                 }
1071                 if (mdp->md_flags & MD_ACLDIR) {
1072                         (void) strcat(name, ".d");
1073                         error = VOP_RMDIR(fgp->fg_dirvp, name, fgp->fg_dirvp,
1074                             kcred, NULL, 0);
1075                         if ((error) && (error != ENOENT)) {
1076                                 cmn_err(CE_WARN, "frontfs rmdir error %s %d"
1077                                     "; run fsck\n", name, error);
1078                         }
1079                 }
1080                 mdp->md_flags &= ~(MD_FILE | MD_POPULATED | MD_ACL | MD_ACLDIR);
1081                 bzero(&mdp->md_allocinfo, mdp->md_allocents *
1082                     sizeof (struct cachefs_allocmap));
1083                 cachefs_freefile(fgp->fg_fscp->fs_cache);
1084         }
1085 
1086         /*
1087          * Clear packed bit, fastsymlinks and special files
1088          * do not have a front file.
1089          */
1090         mdp->md_flags &= ~MD_PACKED;
1091 
1092         /* XXX either rename routine or move this to caller */
1093         if (enoent == 0)
1094                 filegrp_ffrele(fgp);
1095 
1096         if (mdp->md_frontblks) {
1097                 cachefs_freeblocks(fgp->fg_fscp->fs_cache, mdp->md_frontblks,
1098                     mdp->md_rltype);
1099                 mdp->md_frontblks = 0;
1100         }
1101 }
1102 
1103 /*
1104  * This is the interface to the rest of CFS. This takes a cnode, and returns
1105  * the frontvp (stuffs it in the cnode). This creates an attrcache slot and
1106  * and frontfile if necessary.
1107  */
1108 
1109 int
1110 cachefs_getfrontfile(cnode_t *cp)
1111 {
1112         struct filegrp *fgp = cp->c_filegrp;
1113         int error;
1114         struct vattr va;
1115 
1116 #ifdef CFSDEBUG
1117         CFS_DEBUG(CFSDEBUG_SUBR)
1118                 printf("c_getfrontfile: ENTER cp %p\n", (void *)cp);
1119 #endif
1120 
1121         ASSERT(CFS_ISFS_BACKFS_NFSV4(fgp->fg_fscp) == 0);
1122         ASSERT(MUTEX_HELD(&cp->c_statelock));
1123 
1124         /*
1125          * Now we check to see if there is a front file for this entry.
1126          * If there is, we get the vnode for it and stick it in the cnode.
1127          * Otherwise, we create a front file, get the vnode for it and stick
1128          * it in the cnode.
1129          */
1130         if (cp->c_flags & CN_STALE) {
1131                 cp->c_flags |= CN_NOCACHE;
1132                 error = ESTALE;
1133                 goto out;
1134         }
1135 
1136         /*
1137          * If the cnode is being populated, and we're not the populating
1138          * thread, then block until the pop thread completes.  If we are the
1139          * pop thread, then we may come in here, but not to nuke the directory
1140          * cnode at a critical juncture.  If we return from a cv_wait and the
1141          * cnode is now stale, don't bother trying to get the front file.
1142          */
1143         while ((cp->c_flags & CN_ASYNC_POP_WORKING) &&
1144             (cp->c_popthrp != curthread)) {
1145                 cv_wait(&cp->c_popcv, &cp->c_statelock);
1146                 if (cp->c_flags & CN_STALE) {
1147                         cp->c_flags |= CN_NOCACHE;
1148                         error = ESTALE;
1149                         goto out;
1150                 }
1151         }
1152 
1153         if ((cp->c_metadata.md_flags & MD_FILE) == 0) {
1154 #ifdef CFSDEBUG
1155                 if (cp->c_frontvp != NULL)
1156                         CFS_DEBUG(CFSDEBUG_FRONT)
1157                                 printf("c_getfrontfile: !MD_FILE and frontvp "
1158                                     "not null cp %p\n", (void *)cp);
1159 #endif
1160                 if (CTOV(cp)->v_type == VDIR)
1161                         ASSERT((cp->c_metadata.md_flags & MD_POPULATED) == 0);
1162                 error = cachefs_createfrontfile(cp, fgp);
1163                 if (error)
1164                         goto out;
1165         } else {
1166                 /*
1167                  * A front file exists, all we need to do is to grab the fid,
1168                  * do a VFS_VGET() on the fid, stuff the vnode in the cnode,
1169                  * and return.
1170                  */
1171                 if (fgp->fg_dirvp == NULL) {
1172                         cmn_err(CE_WARN, "cachefs: gff0: corrupted file system"
1173                             " run fsck\n");
1174                         cachefs_inval_object(cp);
1175                         cp->c_flags |= CN_NOCACHE;
1176                         error = ESTALE;
1177                         goto out;
1178                 }
1179                 error = VFS_VGET(fgp->fg_dirvp->v_vfsp, &cp->c_frontvp,
1180                     &cp->c_metadata.md_fid);
1181                 if (error || (cp->c_frontvp == NULL)) {
1182 #ifdef CFSDEBUG
1183                         CFS_DEBUG(CFSDEBUG_FRONT)
1184                                 printf("cachefs: "
1185                                     "gff1: front file system error %d\n",
1186                                     error);
1187 #endif /* CFSDEBUG */
1188                         cachefs_inval_object(cp);
1189                         cp->c_flags |= CN_NOCACHE;
1190                         error = ESTALE;
1191                         goto out;
1192                 }
1193 
1194                 /* don't need to check timestamps if need_front_sync is set */
1195                 if (cp->c_flags & CN_NEED_FRONT_SYNC) {
1196                         error = 0;
1197                         goto out;
1198                 }
1199 
1200                 /* don't need to check empty directories */
1201                 if (CTOV(cp)->v_type == VDIR &&
1202                     ((cp->c_metadata.md_flags & MD_POPULATED) == 0)) {
1203                         error = 0;
1204                         goto out;
1205                 }
1206 
1207                 /* get modify time of the front file */
1208                 va.va_mask = AT_MTIME;
1209                 error = VOP_GETATTR(cp->c_frontvp, &va, 0, kcred, NULL);
1210                 if (error) {
1211                         cmn_err(CE_WARN, "cachefs: gff2: front file"
1212                             " system error %d", error);
1213                         cachefs_inval_object(cp);
1214                         error = (cp->c_flags & CN_NOCACHE) ? ESTALE : 0;
1215                         goto out;
1216                 }
1217 
1218                 /* compare with modify time stored in metadata */
1219                 if (bcmp(&va.va_mtime, &cp->c_metadata.md_timestamp,
1220                     sizeof (timestruc_t)) != 0) {
1221 #ifdef CFSDEBUG
1222                         CFS_DEBUG(CFSDEBUG_GENERAL | CFSDEBUG_INVALIDATE) {
1223                                 long sec, nsec;
1224                                 sec = cp->c_metadata.md_timestamp.tv_sec;
1225                                 nsec = cp->c_metadata.md_timestamp.tv_nsec;
1226                                 printf("c_getfrontfile: timestamps don't"
1227                                     " match fileno %lld va %lx %lx"
1228                                     " meta %lx %lx\n",
1229                                     (u_longlong_t)cp->c_id.cid_fileno,
1230                                     va.va_mtime.tv_sec,
1231                                     va.va_mtime.tv_nsec, sec, nsec);
1232                         }
1233 #endif
1234                         cachefs_inval_object(cp);
1235                         error = (cp->c_flags & CN_NOCACHE) ? ESTALE : 0;
1236                 }
1237         }
1238 out:
1239 #ifdef CFSDEBUG
1240         CFS_DEBUG(CFSDEBUG_FRONT)
1241                 printf("c_getfrontfile: EXIT error = %d\n", error);
1242 #endif
1243         return (error);
1244 }
1245 
1246 void
1247 cachefs_inval_object(cnode_t *cp)
1248 {
1249         cachefscache_t *cachep = C_TO_FSCACHE(cp)->fs_cache;
1250         struct filegrp *fgp = cp->c_filegrp;
1251         int error;
1252 
1253         ASSERT(CFS_ISFS_BACKFS_NFSV4(C_TO_FSCACHE(cp)) == 0);
1254         ASSERT(MUTEX_HELD(&cp->c_statelock));
1255         ASSERT((cp->c_flags & CN_ASYNC_POP_WORKING) == 0 ||
1256             cp->c_popthrp == curthread);
1257 #if 0
1258         CFS_DEBUG(CFSDEBUG_SUBR)
1259                 printf("c_inval_object: ENTER cp %p\n", (void *)cp);
1260         if (cp->c_flags & (CN_ASYNC_POPULATE | CN_ASYNC_POP_WORKING))
1261                 debug_enter("inval object during async pop");
1262 #endif
1263         cp->c_flags |= CN_NOCACHE;
1264 
1265         /* if we cannot modify the cache */
1266         if (C_TO_FSCACHE(cp)->fs_cache->c_flags &
1267             (CACHE_NOFILL | CACHE_NOCACHE)) {
1268                 goto out;
1269         }
1270 
1271         /* if there is a front file */
1272         if (cp->c_metadata.md_flags & MD_FILE) {
1273                 if (fgp->fg_dirvp == NULL)
1274                         goto out;
1275 
1276                 /* get the front file vp if necessary */
1277                 if (cp->c_frontvp == NULL) {
1278 
1279                         error = VFS_VGET(fgp->fg_dirvp->v_vfsp, &cp->c_frontvp,
1280                             &cp->c_metadata.md_fid);
1281                         if (error || (cp->c_frontvp == NULL)) {
1282 #ifdef CFSDEBUG
1283                                 CFS_DEBUG(CFSDEBUG_FRONT)
1284                                         printf("cachefs: "
1285                                             "io: front file error %d\n", error);
1286 #endif /* CFSDEBUG */
1287                                 goto out;
1288                         }
1289                 }
1290 
1291                 /* truncate the file to zero size */
1292                 error = cachefs_frontfile_size(cp, 0);
1293                 if (error)
1294                         goto out;
1295                 cp->c_flags &= ~CN_NOCACHE;
1296 
1297                 /* if a directory, v_type is zero if called from initcnode */
1298                 if (cp->c_attr.va_type == VDIR) {
1299                         if (cp->c_usage < CFS_DIRCACHE_COST) {
1300                                 cp->c_invals++;
1301                                 if (cp->c_invals > CFS_DIRCACHE_INVAL) {
1302                                         cp->c_invals = 0;
1303                                 }
1304                         } else
1305                                 cp->c_invals = 0;
1306                         cp->c_usage = 0;
1307                 }
1308         } else {
1309                 cp->c_flags &= ~CN_NOCACHE;
1310         }
1311 
1312 out:
1313         if ((cp->c_metadata.md_flags & MD_PACKED) &&
1314             (cp->c_metadata.md_rltype != CACHEFS_RL_MODIFIED) &&
1315             ((cachep->c_flags & CACHE_NOFILL) == 0)) {
1316                 ASSERT(cp->c_metadata.md_rlno != 0);
1317                 if (cp->c_metadata.md_rltype != CACHEFS_RL_PACKED_PENDING) {
1318                         cachefs_rlent_moveto(cachep,
1319                             CACHEFS_RL_PACKED_PENDING,
1320                             cp->c_metadata.md_rlno,
1321                             cp->c_metadata.md_frontblks);
1322                         cp->c_metadata.md_rltype = CACHEFS_RL_PACKED_PENDING;
1323                         /* unconditionally set CN_UPDATED below */
1324                 }
1325         }
1326 
1327         cachefs_purgeacl(cp);
1328 
1329         if (cp->c_flags & CN_ASYNC_POP_WORKING)
1330                 cp->c_flags |= CN_NOCACHE;
1331         cp->c_metadata.md_flags &= ~(MD_POPULATED | MD_INVALREADDIR |
1332             MD_FASTSYMLNK);
1333         cp->c_flags &= ~CN_NEED_FRONT_SYNC;
1334         cp->c_flags |= CN_UPDATED;
1335 
1336         /*
1337          * If the object invalidated is a directory, the dnlc should be purged
1338          * to elide all references to this (directory) vnode.
1339          */
1340         if (CTOV(cp)->v_type == VDIR)
1341                 dnlc_purge_vp(CTOV(cp));
1342 
1343 #ifdef CFSDEBUG
1344         CFS_DEBUG(CFSDEBUG_SUBR)
1345                 printf("c_inval_object: EXIT\n");
1346 #endif
1347 }
1348 
1349 void
1350 make_ascii_name(cfs_cid_t *cidp, char *strp)
1351 {
1352         int i = sizeof (uint_t) * 4;
1353         u_longlong_t index;
1354         ino64_t name;
1355 
1356         if (cidp->cid_flags & CFS_CID_LOCAL)
1357                 *strp++ = 'L';
1358         name = (ino64_t)cidp->cid_fileno;
1359         do {
1360                 index = (((u_longlong_t)name) & 0xf000000000000000) >> 60;
1361                 index &= (u_longlong_t)0xf;
1362                 ASSERT(index < (u_longlong_t)16);
1363                 *strp++ = "0123456789abcdef"[index];
1364                 name <<= 4;
1365         } while (--i);
1366         *strp = '\0';
1367 }
1368 
1369 void
1370 cachefs_nocache(cnode_t *cp)
1371 {
1372         fscache_t *fscp = C_TO_FSCACHE(cp);
1373         cachefscache_t *cachep = fscp->fs_cache;
1374 
1375 #ifdef CFSDEBUG
1376         CFS_DEBUG(CFSDEBUG_SUBR)
1377                 printf("c_nocache: ENTER cp %p\n", (void *)cp);
1378 #endif
1379 
1380         ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
1381         ASSERT(MUTEX_HELD(&cp->c_statelock));
1382         if ((cp->c_flags & CN_NOCACHE) == 0) {
1383 #ifdef CFSDEBUG
1384                 CFS_DEBUG(CFSDEBUG_INVALIDATE)
1385                         printf("cachefs_nocache: invalidating %llu\n",
1386                             (u_longlong_t)cp->c_id.cid_fileno);
1387 #endif
1388                 /*
1389                  * Here we are waiting until inactive time to do
1390                  * the inval_object.  In case we don't get to inactive
1391                  * (because of a crash, say) we set up a timestamp mismatch
1392                  * such that getfrontfile will blow the front file away
1393                  * next time we try to use it.
1394                  */
1395                 cp->c_metadata.md_timestamp.tv_sec = 0;
1396                 cp->c_metadata.md_timestamp.tv_nsec = 0;
1397                 cp->c_metadata.md_flags &= ~(MD_POPULATED | MD_INVALREADDIR |
1398                     MD_FASTSYMLNK);
1399                 cp->c_flags &= ~CN_NEED_FRONT_SYNC;
1400 
1401                 cachefs_purgeacl(cp);
1402 
1403                 /*
1404                  * It is possible we can nocache while disconnected.
1405                  * A directory could be nocached by running out of space.
1406                  * A regular file should only be nocached if an I/O error
1407                  * occurs to the front fs.
1408                  * We count on the item staying on the modified list
1409                  * so we do not loose the cid to fid mapping for directories.
1410                  */
1411 
1412                 if ((cp->c_metadata.md_flags & MD_PACKED) &&
1413                     (cp->c_metadata.md_rltype != CACHEFS_RL_MODIFIED) &&
1414                     ((cachep->c_flags & CACHE_NOFILL) == 0)) {
1415                         ASSERT(cp->c_metadata.md_rlno != 0);
1416                         if (cp->c_metadata.md_rltype !=
1417                             CACHEFS_RL_PACKED_PENDING) {
1418                                 cachefs_rlent_moveto(cachep,
1419                                     CACHEFS_RL_PACKED_PENDING,
1420                                     cp->c_metadata.md_rlno,
1421                                     cp->c_metadata.md_frontblks);
1422                                 cp->c_metadata.md_rltype =
1423                                     CACHEFS_RL_PACKED_PENDING;
1424                                 /* unconditionally set CN_UPDATED below */
1425                         }
1426                 }
1427 
1428                 if (CTOV(cp)->v_type == VDIR)
1429                         dnlc_purge_vp(CTOV(cp));
1430                 cp->c_flags |= (CN_NOCACHE | CN_UPDATED);
1431         }
1432 
1433         if (CACHEFS_LOG_LOGGING(cachep, CACHEFS_LOG_NOCACHE))
1434                 cachefs_log_nocache(cachep, 0, fscp->fs_cfsvfsp,
1435                     &cp->c_metadata.md_cookie, cp->c_id.cid_fileno);
1436 
1437 #ifdef CFSDEBUG
1438         CFS_DEBUG(CFSDEBUG_SUBR)
1439                 printf("c_nocache: EXIT cp %p\n", (void *)cp);
1440 #endif
1441 }
1442 
1443 /*
1444  * Checks to see if the page is in the disk cache, by checking the allocmap.
1445  */
1446 int
1447 cachefs_check_allocmap(cnode_t *cp, u_offset_t off)
1448 {
1449         int i;
1450         size_t dbl_size_to_look = cp->c_attr.va_size - off;
1451         uint_t  size_to_look;
1452 
1453         if (dbl_size_to_look > (u_offset_t)PAGESIZE)
1454                 size_to_look = (uint_t)PAGESIZE;
1455         else
1456                 /*LINTED alignment okay*/
1457                 size_to_look = (uint_t)dbl_size_to_look;
1458 
1459         for (i = 0; i < cp->c_metadata.md_allocents; i++) {
1460                 struct cachefs_allocmap *allocp =
1461                     cp->c_metadata.md_allocinfo + i;
1462 
1463                 if (off >= allocp->am_start_off) {
1464                         if ((off + size_to_look) <=
1465                             (allocp->am_start_off + allocp->am_size)) {
1466                                 struct fscache *fscp = C_TO_FSCACHE(cp);
1467                                 cachefscache_t *cachep = fscp->fs_cache;
1468 
1469                                 if (CACHEFS_LOG_LOGGING(cachep,
1470                                     CACHEFS_LOG_CALLOC))
1471                                         cachefs_log_calloc(cachep, 0,
1472                                             fscp->fs_cfsvfsp,
1473                                             &cp->c_metadata.md_cookie,
1474                                             cp->c_id.cid_fileno,
1475                                             off, size_to_look);
1476                         /*
1477                          * Found the page in the CFS disk cache.
1478                          */
1479                                 return (1);
1480                         }
1481                 } else {
1482                         return (0);
1483                 }
1484         }
1485         return (0);
1486 }
1487 
1488 /*
1489  * Merges adjacent allocmap entries together where possible, e.g.
1490  *   offset=0x0,     size=0x40000
1491  *   offset=0x40000, size=0x20000       becomes just offset=0x0, size-0x90000
1492  *   offset=0x60000, size=0x30000
1493  */
1494 
1495 
1496 void
1497 cachefs_coalesce_allocmap(struct cachefs_metadata *cmd)
1498 {
1499         int i, reduced = 0;
1500         struct cachefs_allocmap *allocp, *nallocp;
1501 
1502         nallocp = allocp = cmd->md_allocinfo;
1503         allocp++;
1504         for (i = 1; i < cmd->md_allocents; i++, allocp++) {
1505                 if (nallocp->am_start_off + nallocp->am_size ==
1506                     allocp->am_start_off) {
1507                         nallocp->am_size += allocp->am_size;
1508                         reduced++;
1509                 } else {
1510                         nallocp++;
1511                         nallocp->am_start_off = allocp->am_start_off;
1512                         nallocp->am_size = allocp->am_size;
1513                 }
1514         }
1515         cmd->md_allocents -= reduced;
1516 }
1517 
1518 /*
1519  * Updates the allocmap to reflect a new chunk of data that has been
1520  * populated.
1521  */
1522 void
1523 cachefs_update_allocmap(cnode_t *cp, u_offset_t off, size_t size)
1524 {
1525         int i;
1526         struct cachefs_allocmap *allocp;
1527         struct fscache *fscp =  C_TO_FSCACHE(cp);
1528         cachefscache_t *cachep = fscp->fs_cache;
1529         u_offset_t saveoff;
1530         u_offset_t savesize;
1531         u_offset_t logoff = off;
1532         size_t logsize = size;
1533         u_offset_t endoff;
1534         u_offset_t tmpendoff;
1535 
1536         /*
1537          * We try to see if we can coalesce the current block into an existing
1538          * allocation and mark it as such.
1539          * If we can't do that then we make a new entry in the allocmap.
1540          * when we run out of allocmaps, put the cnode in NOCACHE mode.
1541          */
1542 again:
1543         allocp = cp->c_metadata.md_allocinfo;
1544         for (i = 0; i < cp->c_metadata.md_allocents; i++, allocp++) {
1545 
1546                 if (off <= (allocp->am_start_off)) {
1547                         endoff = off + size;
1548                         if (endoff >= allocp->am_start_off) {
1549                                 tmpendoff = allocp->am_start_off +
1550                                     allocp->am_size;
1551                                 if (endoff < tmpendoff)
1552                                         endoff = tmpendoff;
1553                                 allocp->am_size = endoff - off;
1554                                 allocp->am_start_off = off;
1555                                 cachefs_coalesce_allocmap(&cp->c_metadata);
1556                                 allocp = cp->c_metadata.md_allocinfo;
1557                                 if (allocp->am_size >= cp->c_size)
1558                                         cp->c_metadata.md_flags |= MD_POPULATED;
1559                                 return;
1560                         } else {
1561                                 saveoff = off;
1562                                 savesize = size;
1563                                 off = allocp->am_start_off;
1564                                 size = allocp->am_size;
1565                                 allocp->am_size = savesize;
1566                                 allocp->am_start_off = saveoff;
1567                                 goto again;
1568                         }
1569                 } else {
1570                         endoff = allocp->am_start_off + allocp->am_size;
1571                         if (off < endoff) {
1572                                 tmpendoff = off + size;
1573                                 if (endoff < tmpendoff)
1574                                         endoff = tmpendoff;
1575                                 allocp->am_size = endoff - allocp->am_start_off;
1576                                 cachefs_coalesce_allocmap(&cp->c_metadata);
1577                                 allocp = cp->c_metadata.md_allocinfo;
1578                                 if (allocp->am_size >= cp->c_size)
1579                                         cp->c_metadata.md_flags |= MD_POPULATED;
1580                                 return;
1581                         }
1582                         if (off == (allocp->am_start_off + allocp->am_size)) {
1583                                 allocp->am_size += size;
1584                                 cachefs_coalesce_allocmap(&cp->c_metadata);
1585                                 allocp = cp->c_metadata.md_allocinfo;
1586                                 if (allocp->am_size >= cp->c_size)
1587                                         cp->c_metadata.md_flags |= MD_POPULATED;
1588                                 return;
1589                         }
1590                 }
1591         }
1592         if (i == C_MAX_ALLOCINFO_SLOTS) {
1593 #ifdef CFSDEBUG
1594                 CFS_DEBUG(CFSDEBUG_ALLOCMAP)
1595                         printf("c_update_alloc_map: "
1596                             "Too many allinfo entries cp %p fileno %llu %p\n",
1597                             (void *)cp, (u_longlong_t)cp->c_id.cid_fileno,
1598                             (void *)cp->c_metadata.md_allocinfo);
1599 #endif
1600                 cachefs_nocache(cp);
1601                 return;
1602         }
1603         allocp->am_start_off = off;
1604         allocp->am_size = (u_offset_t)size;
1605         if (allocp->am_size >= cp->c_size)
1606                 cp->c_metadata.md_flags |= MD_POPULATED;
1607         cp->c_metadata.md_allocents++;
1608 
1609         if (CACHEFS_LOG_LOGGING(cachep, CACHEFS_LOG_UALLOC))
1610                 cachefs_log_ualloc(cachep, 0, fscp->fs_cfsvfsp,
1611                     &cp->c_metadata.md_cookie, cp->c_id.cid_fileno,
1612                     logoff, logsize);
1613 }
1614 
1615 /*
1616  * CFS population function
1617  *
1618  * before async population, this function used to turn on the cnode
1619  * flags CN_UPDATED, CN_NEED_FRONT_SYNC, and CN_POPULATION_PENDING.
1620  * now, however, it's the responsibility of the caller to do this if
1621  * this function returns 0 (no error).
1622  */
1623 
1624 int
1625 cachefs_populate(cnode_t *cp, u_offset_t off, size_t popsize, vnode_t *frontvp,
1626     vnode_t *backvp, u_offset_t cpsize, cred_t *cr)
1627 {
1628         int error = 0;
1629         caddr_t addr;
1630         u_offset_t upto;
1631         uint_t size;
1632         u_offset_t from = off;
1633         cachefscache_t *cachep = C_TO_FSCACHE(cp)->fs_cache;
1634         ssize_t resid;
1635         struct fbuf *fbp;
1636         caddr_t buf = kmem_alloc(MAXBSIZE, KM_SLEEP);
1637 
1638 #ifdef CFSDEBUG
1639         CFS_DEBUG(CFSDEBUG_VOPS)
1640                 printf("cachefs_populate: ENTER cp %p off %lld\n",
1641                     (void *)cp, off);
1642 #endif
1643 
1644         upto = MIN((off + popsize), cpsize);
1645 
1646         while (from < upto) {
1647                 u_offset_t blkoff = (from & (offset_t)MAXBMASK);
1648                 uint_t n = from - blkoff;
1649 
1650                 size = upto - from;
1651                 if (upto > (blkoff + MAXBSIZE))
1652                         size = MAXBSIZE - n;
1653 
1654                 error = fbread(backvp, (offset_t)blkoff, n + size,
1655                     S_OTHER, &fbp);
1656                 if (CFS_TIMEOUT(C_TO_FSCACHE(cp), error))
1657                         goto out;
1658                 else if (error) {
1659 #ifdef CFSDEBUG
1660                         CFS_DEBUG(CFSDEBUG_BACK)
1661                                 printf("cachefs_populate: fbread error %d\n",
1662                                     error);
1663 #endif
1664                         goto out;
1665                 }
1666 
1667                 addr = fbp->fb_addr;
1668                 ASSERT(addr != NULL);
1669                 ASSERT(n + size <= MAXBSIZE);
1670                 bcopy(addr, buf, n + size);
1671                 fbrelse(fbp, S_OTHER);
1672 
1673                 if (n == 0 || cachefs_check_allocmap(cp, blkoff) == 0) {
1674                         if (error = cachefs_allocblocks(cachep, 1,
1675                             cp->c_metadata.md_rltype))
1676                                 goto out;
1677                         cp->c_metadata.md_frontblks++;
1678                 }
1679                 resid = 0;
1680                 error = vn_rdwr(UIO_WRITE, frontvp, buf + n, size,
1681                     (offset_t)from, UIO_SYSSPACE, 0,
1682                     (rlim64_t)RLIM64_INFINITY, cr, &resid);
1683                 if (error) {
1684 #ifdef CFSDEBUG
1685                         CFS_DEBUG(CFSDEBUG_FRONT)
1686                                 printf("cachefs_populate: "
1687                                     "Got error = %d from vn_rdwr\n", error);
1688 #endif
1689                         goto out;
1690                 }
1691 #ifdef CFSDEBUG
1692                 if (resid)
1693                         CFS_DEBUG(CFSDEBUG_FRONT)
1694                                 printf("cachefs_populate: non-zero resid %ld\n",
1695                                     resid);
1696 #endif
1697                 from += size;
1698         }
1699         (void) cachefs_update_allocmap(cp, off, upto - off);
1700 out:
1701         if (CACHEFS_LOG_LOGGING(cachep, CACHEFS_LOG_POPULATE))
1702                 cachefs_log_populate(cachep, error,
1703                     C_TO_FSCACHE(cp)->fs_cfsvfsp,
1704                     &cp->c_metadata.md_cookie, cp->c_id.cid_fileno, off,
1705                     popsize);
1706 
1707 #ifdef CFSDEBUG
1708         CFS_DEBUG(CFSDEBUG_VOPS)
1709                 printf("cachefs_populate: EXIT cp %p error %d\n",
1710                     (void *)cp, error);
1711 #endif
1712         kmem_free(buf, MAXBSIZE);
1713 
1714         return (error);
1715 }
1716 
1717 /*
1718  * due to compiler error we shifted cnode to the last argument slot.
1719  * occurred during large files project - XXX.
1720  */
1721 void
1722 cachefs_cluster_allocmap(u_offset_t off, u_offset_t *popoffp, size_t *popsizep,
1723     size_t size, struct cnode *cp)
1724 {
1725         int i;
1726         u_offset_t lastoff = 0;
1727         u_offset_t forward_diff = 0;
1728         u_offset_t backward_diff = 0;
1729 
1730         ASSERT(size <= C_TO_FSCACHE(cp)->fs_info.fi_popsize);
1731 
1732 #ifdef CFSDEBUG
1733         CFS_DEBUG(CFSDEBUG_SUBR)
1734                 printf("cachefs_cluster_allocmap: off %llx, size %llx, "
1735                     "c_size %llx\n", off, size, (longlong_t)cp->c_size);
1736 #endif /* CFSDEBUG */
1737         for (i = 0; i < cp->c_metadata.md_allocents; i++) {
1738                 struct cachefs_allocmap *allocp =
1739                     cp->c_metadata.md_allocinfo + i;
1740 
1741                 if (allocp->am_start_off > off) {
1742                         if ((off + size) > allocp->am_start_off) {
1743                                 forward_diff = allocp->am_start_off - off;
1744                                 backward_diff = size - forward_diff;
1745                                 if (backward_diff > off)
1746                                         backward_diff = off;
1747                                 if (lastoff > (off - backward_diff))
1748                                         backward_diff = off - lastoff;
1749                         } else {
1750                                 forward_diff = size;
1751                         }
1752                         *popoffp = (off - backward_diff) & (offset_t)PAGEMASK;
1753                         *popsizep = ((off + forward_diff) - *popoffp) &
1754                             (offset_t)PAGEMASK;
1755                         return;
1756                 } else {
1757                         lastoff = allocp->am_start_off + allocp->am_size;
1758                 }
1759         }
1760         if ((lastoff + size) > off) {
1761                 *popoffp = (lastoff & (offset_t)PAGEMASK);
1762         } else {
1763                 *popoffp = off & (offset_t)PAGEMASK;
1764         }
1765 
1766         /*
1767          * 64bit project: popsize is the chunk size used to populate the
1768          * cache (default 64K). As such, 32 bit should suffice.
1769          */
1770         if ((*popoffp + size) > cp->c_size)
1771                 *popsizep = (cp->c_size - *popoffp + PAGEOFFSET) &
1772                     (offset_t)PAGEMASK;
1773         else if (size < PAGESIZE)
1774                 *popsizep = (size + PAGEOFFSET) & (offset_t)PAGEMASK;
1775         else
1776                 *popsizep = size & (offset_t)PAGEMASK;
1777 
1778 #ifdef CFSDEBUG
1779         CFS_DEBUG(CFSDEBUG_SUBR)
1780                 printf("cachefs_cluster_allocmap: popoff %llx, popsize %llx\n",
1781                     (u_longlong_t)(*popoffp), (u_longlong_t)(*popsizep));
1782 #endif /* CFSDEBUG */
1783 }
1784 
1785 /*
1786  * "populate" a symlink in the cache
1787  */
1788 int
1789 cachefs_stuffsymlink(cnode_t *cp, caddr_t buf, int buflen)
1790 {
1791         int error = 0;
1792         struct fscache *fscp = C_TO_FSCACHE(cp);
1793         cachefscache_t *cachep = fscp->fs_cache;
1794         struct cachefs_metadata *mdp = &cp->c_metadata;
1795 
1796         ASSERT(RW_WRITE_HELD(&cp->c_rwlock));
1797         ASSERT(MUTEX_HELD(&cp->c_statelock));
1798 
1799         if (CFS_ISFS_BACKFS_NFSV4(fscp))
1800                 goto out;
1801 
1802         if (cp->c_flags & CN_NOCACHE)
1803                 return (ENOENT);
1804 
1805         cp->c_size = (u_offset_t)buflen;
1806 
1807         /* if can create a fast sym link */
1808         if (buflen <= C_FSL_SIZE) {
1809                 /* give up the front file resources */
1810                 if (mdp->md_rlno) {
1811                         cachefs_removefrontfile(mdp, &cp->c_id, cp->c_filegrp);
1812                         cachefs_rlent_moveto(cachep, CACHEFS_RL_FREE,
1813                             mdp->md_rlno, 0);
1814                         mdp->md_rlno = 0;
1815                         mdp->md_rltype = CACHEFS_RL_NONE;
1816                 }
1817                 /* put sym link contents in allocinfo in metadata */
1818                 bzero(mdp->md_allocinfo, C_FSL_SIZE);
1819                 bcopy(buf, mdp->md_allocinfo, buflen);
1820 
1821                 mdp->md_flags |= MD_FASTSYMLNK;
1822                 cp->c_flags &= ~CN_NEED_FRONT_SYNC;
1823                 cp->c_flags |= CN_UPDATED;
1824                 goto out;
1825         }
1826 
1827         /* else create a sym link in a front file */
1828         if (cp->c_frontvp == NULL)
1829                 error = cachefs_getfrontfile(cp);
1830         if (error)
1831                 goto out;
1832 
1833         /* truncate front file */
1834         error = cachefs_frontfile_size(cp, 0);
1835         mdp->md_flags &= ~(MD_FASTSYMLNK | MD_POPULATED);
1836         if (error)
1837                 goto out;
1838 
1839         /* get space for the sym link */
1840         error = cachefs_allocblocks(cachep, 1, cp->c_metadata.md_rltype);
1841         if (error)
1842                 goto out;
1843 
1844         /* write the sym link to the front file */
1845         error = vn_rdwr(UIO_WRITE, cp->c_frontvp, buf, buflen, 0,
1846             UIO_SYSSPACE, 0, RLIM_INFINITY, kcred, NULL);
1847         if (error) {
1848                 cachefs_freeblocks(cachep, 1, cp->c_metadata.md_rltype);
1849                 goto out;
1850         }
1851 
1852         cp->c_metadata.md_flags |= MD_POPULATED;
1853         cp->c_flags |= CN_NEED_FRONT_SYNC;
1854         cp->c_flags |= CN_UPDATED;
1855 
1856 out:
1857         if (CACHEFS_LOG_LOGGING(cachep, CACHEFS_LOG_CSYMLINK))
1858                 cachefs_log_csymlink(cachep, error, fscp->fs_cfsvfsp,
1859                     &cp->c_metadata.md_cookie, cp->c_id.cid_fileno, buflen);
1860 
1861         return (error);
1862 }
1863 
1864 /*
1865  * Reads the full contents of the symbolic link from the back file system.
1866  * *bufp is set to a MAXPATHLEN buffer that must be freed when done
1867  * *buflenp is the length of the link
1868  */
1869 int
1870 cachefs_readlink_back(cnode_t *cp, cred_t *cr, caddr_t *bufp, int *buflenp)
1871 {
1872         int error;
1873         struct uio uio;
1874         struct iovec iov;
1875         caddr_t buf;
1876         fscache_t *fscp = C_TO_FSCACHE(cp);
1877 
1878         ASSERT(MUTEX_HELD(&cp->c_statelock));
1879 
1880         *bufp = NULL;
1881 
1882         /* get back vnode */
1883         if (cp->c_backvp == NULL) {
1884                 error = cachefs_getbackvp(fscp, cp);
1885                 if (error)
1886                         return (error);
1887         }
1888 
1889         /* set up for the readlink */
1890         bzero(&uio, sizeof (struct uio));
1891         bzero(&iov, sizeof (struct iovec));
1892         buf = cachefs_kmem_alloc(MAXPATHLEN, KM_SLEEP);
1893         iov.iov_base = buf;
1894         iov.iov_len = MAXPATHLEN;
1895         uio.uio_iov = &iov;
1896         uio.uio_iovcnt = 1;
1897         uio.uio_resid = MAXPATHLEN;
1898         uio.uio_segflg = UIO_SYSSPACE;
1899         uio.uio_loffset = 0;
1900         uio.uio_fmode = 0;
1901         uio.uio_extflg = UIO_COPY_CACHED;
1902         uio.uio_llimit = MAXOFFSET_T;
1903 
1904         /* get the link data */
1905         CFS_DPRINT_BACKFS_NFSV4(fscp,
1906             ("cachefs_readlink (nfsv4): cnode %p, backvp %p\n",
1907             cp, cp->c_backvp));
1908         error = VOP_READLINK(cp->c_backvp, &uio, cr, NULL);
1909         if (error) {
1910                 cachefs_kmem_free(buf, MAXPATHLEN);
1911         } else {
1912                 *bufp = buf;
1913                 /*LINTED alignment okay*/
1914                 *buflenp = MAXPATHLEN - (int)uio.uio_resid;
1915         }
1916 
1917         return (error);
1918 }
1919 
1920 int
1921 cachefs_getbackvp(struct fscache *fscp, struct cnode *cp)
1922 {
1923         int error = 0;
1924         int flag;
1925 
1926 #ifdef CFSDEBUG
1927         CFS_DEBUG(CFSDEBUG_CHEAT | CFSDEBUG_BACK)
1928                 printf("cachefs_getbackvp: ENTER fscp %p cp %p\n",
1929                     (void *)fscp, (void *)cp);
1930 #endif
1931         ASSERT(cp != NULL);
1932         ASSERT(MUTEX_HELD(&cp->c_statelock));
1933         ASSERT(cp->c_backvp == NULL);
1934         ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
1935 
1936         /*
1937          * If destroy is set then the last link to a file has been
1938          * removed.  Oddly enough NFS will still return a vnode
1939          * for the file if the timeout has not expired.
1940          * This causes headaches for cachefs_push because the
1941          * vnode is really stale.
1942          * So we just short circuit the problem here.
1943          */
1944         if (cp->c_flags & CN_DESTROY)
1945                 return (ESTALE);
1946 
1947         ASSERT(fscp->fs_backvfsp);
1948         if (fscp->fs_backvfsp == NULL)
1949                 return (ETIMEDOUT);
1950         error = VFS_VGET(fscp->fs_backvfsp, &cp->c_backvp,
1951             (struct fid *)&cp->c_cookie);
1952         if (cp->c_backvp && cp->c_cred &&
1953             ((cp->c_flags & CN_NEEDOPEN) || (cp->c_attr.va_type == VREG))) {
1954                 /*
1955                  * XXX bob: really should pass in the correct flag,
1956                  * fortunately nobody pays attention to it
1957                  */
1958                 flag = 0;
1959                 /*
1960                  * If NEEDOOPEN is set, then this file was opened VOP_OPEN'd
1961                  * but the backvp was not.  So, for the sake of the vnode
1962                  * open counts used by delegation, we need to OPEN the backvp
1963                  * with the same flags that were used for this cnode.  That way
1964                  * when the file is VOP_CLOSE'd the counts won't go negative.
1965                  */
1966                 if (cp->c_flags & CN_NEEDOPEN) {
1967                         cp->c_flags &= ~CN_NEEDOPEN;
1968                         if (cp->c_rdcnt > 0) {
1969                                 cp->c_rdcnt--;
1970                                 flag |= FREAD;
1971                         }
1972                         if (cp->c_wrcnt > 0) {
1973                                 cp->c_wrcnt--;
1974                                 flag |= FWRITE;
1975                         }
1976                 }
1977                 error = VOP_OPEN(&cp->c_backvp, flag, cp->c_cred, NULL);
1978                 if (error) {
1979                         VN_RELE(cp->c_backvp);
1980                         cp->c_backvp = NULL;
1981                 }
1982         }
1983 
1984 #ifdef CFSDEBUG
1985         CFS_DEBUG(CFSDEBUG_GENERAL | CFSDEBUG_BACK) {
1986                 if (error || cp->c_backvp == NULL) {
1987                         printf("Stale cookie cp %p fileno %llu type %d \n",
1988                             (void *)cp, (u_longlong_t)cp->c_id.cid_fileno,
1989                             CTOV(cp)->v_type);
1990                 }
1991         }
1992 #endif
1993 
1994 #ifdef CFSDEBUG
1995         CFS_DEBUG(CFSDEBUG_CHEAT | CFSDEBUG_BACK)
1996                 printf("cachefs_getbackvp: EXIT error = %d\n", error);
1997 #endif
1998         return (error);
1999 }
2000 
2001 int
2002 cachefs_getcookie(
2003         vnode_t *vp,
2004         struct fid *cookiep,
2005         struct vattr *attrp,
2006         cred_t *cr,
2007         uint32_t valid_fid)
2008 {
2009         int error = 0;
2010 
2011 #ifdef CFSDEBUG
2012         CFS_DEBUG(CFSDEBUG_CHEAT)
2013                 printf("cachefs_getcookie: ENTER vp %p\n", (void *)vp);
2014 #endif
2015         /*
2016          * Get the FID only if the caller has indicated it is valid,
2017          * otherwise, zero the cookie.
2018          */
2019         if (valid_fid) {
2020                 /*
2021                  * This assumes that the cookie is a full size fid, if we go to
2022                  * variable length fids we will need to change this.
2023                  */
2024                 cookiep->fid_len = MAXFIDSZ;
2025                 error = VOP_FID(vp, cookiep, NULL);
2026         } else {
2027                 bzero(cookiep, sizeof (*cookiep));
2028         }
2029 
2030         if (!error) {
2031                 if (attrp) {
2032                         ASSERT(attrp != NULL);
2033                         attrp->va_mask = AT_ALL;
2034                         error = VOP_GETATTR(vp, attrp, 0, cr, NULL);
2035                 }
2036         } else {
2037                 if (error == ENOSPC) {
2038                         /*
2039                          * This is an indication that the underlying filesystem
2040                          * needs a bigger fid.  For now just map to EINVAL.
2041                          */
2042                         error = EINVAL;
2043                 }
2044         }
2045 #ifdef CFSDEBUG
2046         CFS_DEBUG(CFSDEBUG_CHEAT)
2047                 printf("cachefs_getcookie: EXIT error = %d\n", error);
2048 #endif
2049         return (error);
2050 }
2051 
2052 void
2053 cachefs_workq_init(struct cachefs_workq *qp)
2054 {
2055         qp->wq_head = qp->wq_tail = NULL;
2056         qp->wq_length =
2057             qp->wq_thread_count =
2058             qp->wq_max_len =
2059             qp->wq_halt_request = 0;
2060         qp->wq_keepone = 0;
2061         cv_init(&qp->wq_req_cv, NULL, CV_DEFAULT, NULL);
2062         cv_init(&qp->wq_halt_cv, NULL, CV_DEFAULT, NULL);
2063         mutex_init(&qp->wq_queue_lock, NULL, MUTEX_DEFAULT, NULL);
2064 }
2065 
2066 /*
2067  * return non-zero if it's `okay' to queue more requests (policy)
2068  */
2069 
2070 static int cachefs_async_max = 512;
2071 static int cachefs_async_count = 0;
2072 kmutex_t cachefs_async_lock;
2073 
2074 int
2075 cachefs_async_okay(void)
2076 {
2077         /*
2078          * a value of -1 for max means to ignore freemem
2079          */
2080 
2081         if (cachefs_async_max == -1)
2082                 return (1);
2083 
2084         if (freemem < minfree)
2085                 return (0);
2086 
2087         /*
2088          * a value of 0 for max means no arbitrary limit (only `freemen')
2089          */
2090 
2091         if (cachefs_async_max == 0)
2092                 return (1);
2093 
2094         ASSERT(cachefs_async_max > 0);
2095 
2096         /*
2097          * check the global count against the max.
2098          *
2099          * we don't need to grab cachefs_async_lock -- we're just
2100          * looking, and a little bit of `fuzz' is okay.
2101          */
2102 
2103         if (cachefs_async_count >= cachefs_async_max)
2104                 return (0);
2105 
2106         return (1);
2107 }
2108 
2109 void
2110 cachefs_async_start(struct cachefs_workq *qp)
2111 {
2112         struct cachefs_req *rp;
2113         int left;
2114         callb_cpr_t cprinfo;
2115 
2116         CALLB_CPR_INIT(&cprinfo, &qp->wq_queue_lock, callb_generic_cpr, "cas");
2117         mutex_enter(&qp->wq_queue_lock);
2118         left = 1;
2119         for (;;) {
2120                 /* if there are no pending requests */
2121                 if ((qp->wq_head == NULL) && (qp->wq_logwork == 0)) {
2122                         /* see if thread should exit */
2123                         if (qp->wq_halt_request || (left == -1)) {
2124                                 if ((qp->wq_thread_count > 1) ||
2125                                     (qp->wq_keepone == 0))
2126                                         break;
2127                         }
2128 
2129                         /* wake up thread in async_halt if necessary */
2130                         if (qp->wq_halt_request)
2131                                 cv_broadcast(&qp->wq_halt_cv);
2132 
2133                         CALLB_CPR_SAFE_BEGIN(&cprinfo);
2134                         /* sleep until there is something to do */
2135                         left = cv_reltimedwait(&qp->wq_req_cv,
2136                             &qp->wq_queue_lock, CFS_ASYNC_TIMEOUT,
2137                             TR_CLOCK_TICK);
2138                         CALLB_CPR_SAFE_END(&cprinfo, &qp->wq_queue_lock);
2139                         if ((qp->wq_head == NULL) && (qp->wq_logwork == 0))
2140                                 continue;
2141                 }
2142                 left = 1;
2143 
2144                 if (qp->wq_logwork) {
2145                         qp->wq_logwork = 0;
2146                         mutex_exit(&qp->wq_queue_lock);
2147                         cachefs_log_process_queue(qp->wq_cachep, 1);
2148                         mutex_enter(&qp->wq_queue_lock);
2149                         continue;
2150                 }
2151 
2152                 /* remove request from the list */
2153                 rp = qp->wq_head;
2154                 qp->wq_head = rp->cfs_next;
2155                 if (rp->cfs_next == NULL)
2156                         qp->wq_tail = NULL;
2157 
2158                 /* do the request */
2159                 mutex_exit(&qp->wq_queue_lock);
2160                 cachefs_do_req(rp);
2161                 mutex_enter(&qp->wq_queue_lock);
2162 
2163                 /* decrement count of requests */
2164                 qp->wq_length--;
2165                 mutex_enter(&cachefs_async_lock);
2166                 --cachefs_async_count;
2167                 mutex_exit(&cachefs_async_lock);
2168         }
2169         ASSERT(qp->wq_head == NULL);
2170         qp->wq_thread_count--;
2171         if (qp->wq_halt_request && qp->wq_thread_count == 0)
2172                 cv_broadcast(&qp->wq_halt_cv);
2173         CALLB_CPR_EXIT(&cprinfo);
2174         thread_exit();
2175         /*NOTREACHED*/
2176 }
2177 
2178 /*
2179  * attempt to halt all the async threads associated with a given workq
2180  */
2181 int
2182 cachefs_async_halt(struct cachefs_workq *qp, int force)
2183 {
2184         int error = 0;
2185 
2186         mutex_enter(&qp->wq_queue_lock);
2187         if (force)
2188                 qp->wq_keepone = 0;
2189 
2190         if (qp->wq_thread_count > 0) {
2191                 qp->wq_halt_request++;
2192                 cv_broadcast(&qp->wq_req_cv);
2193                 (void) cv_reltimedwait(&qp->wq_halt_cv,
2194                     &qp->wq_queue_lock, (60 * hz), TR_CLOCK_TICK);
2195                 qp->wq_halt_request--;
2196                 if (qp->wq_thread_count > 0) {
2197                         if ((qp->wq_thread_count == 1) &&
2198                             (qp->wq_length == 0) && qp->wq_keepone)
2199                                 error = EAGAIN;
2200                         else
2201                                 error = EBUSY;
2202                 } else {
2203                         ASSERT(qp->wq_length == 0 && qp->wq_head == NULL);
2204                 }
2205         }
2206         mutex_exit(&qp->wq_queue_lock);
2207         return (error);
2208 }
2209 
2210 void
2211 cachefs_addqueue(struct cachefs_req *rp, struct cachefs_workq *qp)
2212 {
2213         mutex_enter(&qp->wq_queue_lock);
2214         if (qp->wq_thread_count < cachefs_max_threads) {
2215                 if (qp->wq_thread_count == 0 ||
2216                     (qp->wq_length >= (qp->wq_thread_count * 2))) {
2217                         (void) thread_create(NULL, 0, cachefs_async_start,
2218                             qp, 0, &p0, TS_RUN, minclsyspri);
2219                         qp->wq_thread_count++;
2220                 }
2221         }
2222         mutex_enter(&rp->cfs_req_lock);
2223         if (qp->wq_tail)
2224                 qp->wq_tail->cfs_next = rp;
2225         else
2226                 qp->wq_head = rp;
2227         qp->wq_tail = rp;
2228         rp->cfs_next = NULL;
2229         qp->wq_length++;
2230         if (qp->wq_length > qp->wq_max_len)
2231                 qp->wq_max_len = qp->wq_length;
2232         mutex_enter(&cachefs_async_lock);
2233         ++cachefs_async_count;
2234         mutex_exit(&cachefs_async_lock);
2235 
2236         cv_signal(&qp->wq_req_cv);
2237         mutex_exit(&rp->cfs_req_lock);
2238         mutex_exit(&qp->wq_queue_lock);
2239 }
2240 
2241 void
2242 cachefs_async_putpage(struct cachefs_putpage_req *prp, cred_t *cr)
2243 {
2244         struct cnode *cp = VTOC(prp->cp_vp);
2245 
2246         ASSERT(CFS_ISFS_BACKFS_NFSV4(C_TO_FSCACHE(cp)) == 0);
2247 
2248         (void) VOP_PUTPAGE(prp->cp_vp, prp->cp_off, prp->cp_len,
2249             prp->cp_flags, cr, NULL);
2250 
2251         mutex_enter(&cp->c_iomutex);
2252         if (--cp->c_nio == 0)
2253                 cv_broadcast(&cp->c_iocv);
2254         if (prp->cp_off == 0 && prp->cp_len == 0 &&
2255             (cp->c_ioflags & CIO_PUTPAGES)) {
2256                 cp->c_ioflags &= ~CIO_PUTPAGES;
2257         }
2258         mutex_exit(&cp->c_iomutex);
2259 }
2260 
2261 void
2262 cachefs_async_populate(struct cachefs_populate_req *pop, cred_t *cr)
2263 {
2264         struct cnode *cp = VTOC(pop->cpop_vp);
2265         struct fscache *fscp = C_TO_FSCACHE(cp);
2266         struct filegrp *fgp = cp->c_filegrp;
2267         int error = 0; /* not returned -- used as a place-holder */
2268         vnode_t *frontvp = NULL, *backvp = NULL;
2269         int havelock = 0;
2270         vattr_t va;
2271 
2272         ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
2273 
2274         if (((cp->c_filegrp->fg_flags & CFS_FG_WRITE) == 0) ||
2275             (fscp->fs_cdconnected != CFS_CD_CONNECTED)) {
2276                 mutex_enter(&cp->c_statelock);
2277                 cp->c_flags &= ~CN_ASYNC_POPULATE;
2278                 mutex_exit(&cp->c_statelock);
2279                 return; /* goto out */
2280         }
2281 
2282         error = cachefs_cd_access(fscp, 0, 0);
2283         if (error) {
2284 #ifdef CFSDEBUG
2285                 CFS_DEBUG(CFSDEBUG_ASYNCPOP)
2286                         printf("async_pop: cd_access: err %d con %d\n",
2287                             error, fscp->fs_cdconnected);
2288 #endif /* CFSDEBUG */
2289                 mutex_enter(&cp->c_statelock);
2290                 cp->c_flags &= ~CN_ASYNC_POPULATE;
2291                 mutex_exit(&cp->c_statelock);
2292                 return; /* goto out */
2293         }
2294 
2295         /*
2296          * grab the statelock for some minimal things
2297          */
2298 
2299         rw_enter(&cp->c_rwlock, RW_WRITER);
2300         mutex_enter(&cp->c_statelock);
2301         havelock = 1;
2302 
2303         if ((cp->c_flags & CN_ASYNC_POPULATE) == 0)
2304                 goto out;
2305 
2306         /* there can be only one */
2307         ASSERT((cp->c_flags & CN_ASYNC_POP_WORKING) == 0);
2308         cp->c_flags |= CN_ASYNC_POP_WORKING;
2309         cp->c_popthrp = curthread;
2310 
2311         if (cp->c_metadata.md_flags & MD_POPULATED)
2312                 goto out;
2313 
2314         if (cp->c_flags & CN_NOCACHE) {
2315 #ifdef CFSDEBUG
2316                 CFS_DEBUG(CFSDEBUG_ASYNCPOP)
2317                         printf("cachefs_async_populate: nocache bit on\n");
2318 #endif /* CFSDEBUG */
2319                 error = EINVAL;
2320                 goto out;
2321         }
2322 
2323         if (cp->c_frontvp == NULL) {
2324                 if ((cp->c_metadata.md_flags & MD_FILE) == 0) {
2325                         struct cfs_cid cid = cp->c_id;
2326 
2327                         mutex_exit(&cp->c_statelock);
2328                         havelock = 0;
2329 
2330                         /*
2331                          * if frontfile doesn't exist, drop the lock
2332                          * to do some of the file creation stuff.
2333                          */
2334 
2335                         if (fgp->fg_flags & CFS_FG_ALLOC_ATTR) {
2336                                 error = filegrp_allocattr(fgp);
2337                                 if (error != 0)
2338                                         goto out;
2339                         }
2340                         if (fgp->fg_flags & CFS_FG_ALLOC_FILE) {
2341                                 mutex_enter(&fgp->fg_mutex);
2342                                 if (fgp->fg_flags & CFS_FG_ALLOC_FILE) {
2343                                         if (fgp->fg_header->ach_nffs == 0)
2344                                                 error = filegrpdir_create(fgp);
2345                                         else
2346                                                 error = filegrpdir_find(fgp);
2347                                         if (error != 0) {
2348                                                 mutex_exit(&fgp->fg_mutex);
2349                                                 goto out;
2350                                         }
2351                                 }
2352                                 mutex_exit(&fgp->fg_mutex);
2353                         }
2354 
2355                         if (fgp->fg_dirvp != NULL) {
2356                                 char name[CFS_FRONTFILE_NAME_SIZE];
2357                                 struct vattr *attrp;
2358 
2359                                 attrp = cachefs_kmem_zalloc(
2360                                     sizeof (struct vattr), KM_SLEEP);
2361                                 attrp->va_mode = S_IFREG | 0666;
2362                                 attrp->va_uid = 0;
2363                                 attrp->va_gid = 0;
2364                                 attrp->va_type = VREG;
2365                                 attrp->va_size = 0;
2366                                 attrp->va_mask =
2367                                     AT_SIZE | AT_TYPE | AT_MODE |
2368                                     AT_UID | AT_GID;
2369 
2370                                 make_ascii_name(&cid, name);
2371 
2372                                 (void) VOP_CREATE(fgp->fg_dirvp, name, attrp,
2373                                     EXCL, 0666, &frontvp, kcred, 0, NULL, NULL);
2374 
2375                                 cachefs_kmem_free(attrp,
2376                                     sizeof (struct vattr));
2377                         }
2378 
2379                         mutex_enter(&cp->c_statelock);
2380                         havelock = 1;
2381                 }
2382                 error = cachefs_getfrontfile(cp);
2383                 ASSERT((error != 0) ||
2384                     (frontvp == NULL) ||
2385                     (frontvp == cp->c_frontvp));
2386         }
2387         if ((error != 0) || (cp->c_frontvp == NULL))
2388                 goto out;
2389 
2390         if (frontvp != NULL)
2391                 VN_RELE(frontvp);
2392 
2393         frontvp = cp->c_frontvp;
2394         VN_HOLD(frontvp);
2395 
2396         if (cp->c_backvp == NULL) {
2397                 error = cachefs_getbackvp(fscp, cp);
2398                 if ((error != 0) || (cp->c_backvp == NULL))
2399                         goto out;
2400         }
2401         backvp = cp->c_backvp;
2402         VN_HOLD(backvp);
2403 
2404         switch (pop->cpop_vp->v_type) {
2405         case VREG:
2406                 mutex_exit(&cp->c_statelock);
2407                 havelock = 0;
2408                 error = cachefs_async_populate_reg(pop, cr, backvp, frontvp);
2409                 break;
2410         case VDIR:
2411                 error = cachefs_async_populate_dir(pop, cr, backvp, frontvp);
2412                 mutex_exit(&cp->c_statelock);
2413                 havelock = 0;
2414                 break;
2415         default:
2416 #ifdef CFSDEBUG
2417                 printf("cachefs_async_populate: warning: vnode type = %d\n",
2418                     pop->cpop_vp->v_type);
2419                 ASSERT(0);
2420 #endif /* CFSDEBUG */
2421                 error = EINVAL;
2422                 break;
2423         }
2424 
2425         if (error != 0)
2426                 goto out;
2427 
2428         error = VOP_FSYNC(frontvp, FSYNC, cr, NULL);
2429         if (error != 0) {
2430 #ifdef CFSDEBUG
2431                 CFS_DEBUG(CFSDEBUG_ASYNCPOP)
2432                         printf("cachefs_async_populate: fsync\n");
2433 #endif /* CFSDEBUG */
2434                 goto out;
2435         }
2436 
2437         /* grab the lock and finish up */
2438         mutex_enter(&cp->c_statelock);
2439         havelock = 1;
2440 
2441         /* if went nocache while lock was dropped, get out */
2442         if ((cp->c_flags & CN_NOCACHE) || (cp->c_frontvp == NULL)) {
2443                 error = EINVAL;
2444                 goto out;
2445         }
2446 
2447         va.va_mask = AT_MTIME;
2448         error = VOP_GETATTR(cp->c_frontvp, &va, 0, cr, NULL);
2449         if (error) {
2450 #ifdef CFSDEBUG
2451                 CFS_DEBUG(CFSDEBUG_ASYNCPOP)
2452                         printf("cachefs_async_populate: getattr\n");
2453 #endif /* CFSDEBUG */
2454                 goto out;
2455         }
2456         cp->c_metadata.md_timestamp = va.va_mtime;
2457         cp->c_metadata.md_flags |= MD_POPULATED;
2458         cp->c_metadata.md_flags &= ~MD_INVALREADDIR;
2459         cp->c_flags |= CN_UPDATED;
2460 
2461 out:
2462         if (! havelock)
2463                 mutex_enter(&cp->c_statelock);
2464 
2465         /* see if an error happened behind our backs */
2466         if ((error == 0) && (cp->c_flags & CN_NOCACHE)) {
2467 #ifdef CFSDEBUG
2468                 CFS_DEBUG(CFSDEBUG_ASYNCPOP)
2469                         printf("cachefs_async_populate: "
2470                             "nocache behind our backs\n");
2471 #endif /* CFSDEBUG */
2472                 error = EINVAL;
2473         }
2474 
2475         cp->c_flags &= ~(CN_NEED_FRONT_SYNC | CN_POPULATION_PENDING |
2476             CN_ASYNC_POPULATE | CN_ASYNC_POP_WORKING);
2477         cp->c_popthrp = NULL;
2478 
2479         if (error != 0)
2480                 cachefs_nocache(cp);
2481 
2482         /* unblock any threads waiting for populate to finish */
2483         cv_broadcast(&cp->c_popcv);
2484         mutex_exit(&cp->c_statelock);
2485         rw_exit(&cp->c_rwlock);
2486         cachefs_cd_release(fscp);
2487 
2488         if (backvp != NULL) {
2489                 VN_RELE(backvp);
2490         }
2491         if (frontvp != NULL) {
2492                 VN_RELE(frontvp);
2493         }
2494 }
2495 
2496 /*
2497  * only to be called from cachefs_async_populate
2498  */
2499 
2500 static int
2501 cachefs_async_populate_reg(struct cachefs_populate_req *pop, cred_t *cr,
2502     vnode_t *backvp, vnode_t *frontvp)
2503 {
2504         struct cnode *cp = VTOC(pop->cpop_vp);
2505         int error = 0;
2506         u_offset_t popoff;
2507         size_t popsize;
2508 
2509         cachefs_cluster_allocmap(pop->cpop_off, &popoff,
2510             &popsize, pop->cpop_size, cp);
2511         if (popsize == 0) {
2512 #ifdef CFSDEBUG
2513                 CFS_DEBUG(CFSDEBUG_ASYNCPOP)
2514                         printf("cachefs_async_populate: popsize == 0\n");
2515 #endif /* CFSDEBUG */
2516                 goto out;
2517         }
2518 
2519         error = cachefs_populate(cp, popoff, popsize, frontvp, backvp,
2520             cp->c_size, cr);
2521         if (error != 0) {
2522 #ifdef CFSDEBUG
2523                 CFS_DEBUG(CFSDEBUG_ASYNCPOP)
2524                         printf("cachefs_async_populate: cachefs_populate\n");
2525 #endif /* CFSDEBUG */
2526                 goto out;
2527         }
2528 
2529 out:
2530         return (error);
2531 }
2532 
2533 void
2534 cachefs_do_req(struct cachefs_req *rp)
2535 {
2536         struct cachefscache *cachep;
2537 
2538         mutex_enter(&rp->cfs_req_lock);
2539         switch (rp->cfs_cmd) {
2540         case CFS_INVALID:
2541                 panic("cachefs_do_req: CFS_INVALID operation on queue");
2542                 /*NOTREACHED*/
2543         case CFS_CACHE_SYNC:
2544                 cachep = rp->cfs_req_u.cu_fs_sync.cf_cachep;
2545                 cachefs_cache_sync(cachep);
2546                 break;
2547         case CFS_IDLE:
2548                 cachefs_cnode_idle(rp->cfs_req_u.cu_idle.ci_vp, rp->cfs_cr);
2549                 break;
2550         case CFS_PUTPAGE:
2551                 cachefs_async_putpage(&rp->cfs_req_u.cu_putpage, rp->cfs_cr);
2552                 VN_RELE(rp->cfs_req_u.cu_putpage.cp_vp);
2553                 break;
2554         case CFS_POPULATE:
2555                 cachefs_async_populate(&rp->cfs_req_u.cu_populate, rp->cfs_cr);
2556                 VN_RELE(rp->cfs_req_u.cu_populate.cpop_vp);
2557                 break;
2558         case CFS_NOOP:
2559                 break;
2560         default:
2561                 panic("c_do_req: Invalid CFS async operation");
2562         }
2563         crfree(rp->cfs_cr);
2564         rp->cfs_cmd = CFS_INVALID;
2565         mutex_exit(&rp->cfs_req_lock);
2566         kmem_cache_free(cachefs_req_cache, rp);
2567 }
2568 
2569 
2570 
2571 
2572 ssize_t cachefs_mem_usage = 0;
2573 
2574 struct km_wrap {
2575         size_t kw_size;
2576         struct km_wrap *kw_other;
2577 };
2578 
2579 kmutex_t cachefs_kmem_lock;
2580 
2581 void *
2582 cachefs_kmem_alloc(size_t size, int flag)
2583 {
2584 #ifdef DEBUG
2585         caddr_t mp = NULL;
2586         struct km_wrap *kwp;
2587         size_t n = (size + (2 * sizeof (struct km_wrap)) + 7) & ~7;
2588 
2589         ASSERT(n >= (size + 8));
2590         mp = kmem_alloc(n, flag);
2591         if (mp == NULL) {
2592                 return (NULL);
2593         }
2594         /*LINTED alignment okay*/
2595         kwp = (struct km_wrap *)mp;
2596         kwp->kw_size = n;
2597         /*LINTED alignment okay*/
2598         kwp->kw_other = (struct km_wrap *)(mp + n - sizeof (struct km_wrap));
2599         kwp = (struct km_wrap *)kwp->kw_other;
2600         kwp->kw_size = n;
2601         /*LINTED alignment okay*/
2602         kwp->kw_other = (struct km_wrap *)mp;
2603 
2604         mutex_enter(&cachefs_kmem_lock);
2605         ASSERT(cachefs_mem_usage >= 0);
2606         cachefs_mem_usage += n;
2607         mutex_exit(&cachefs_kmem_lock);
2608 
2609         return (mp + sizeof (struct km_wrap));
2610 #else /* DEBUG */
2611         return (kmem_alloc(size, flag));
2612 #endif /* DEBUG */
2613 }
2614 
2615 void *
2616 cachefs_kmem_zalloc(size_t size, int flag)
2617 {
2618 #ifdef DEBUG
2619         caddr_t mp = NULL;
2620         struct km_wrap *kwp;
2621         size_t n = (size + (2 * sizeof (struct km_wrap)) + 7) & ~7;
2622 
2623         ASSERT(n >= (size + 8));
2624         mp = kmem_zalloc(n, flag);
2625         if (mp == NULL) {
2626                 return (NULL);
2627         }
2628         /*LINTED alignment okay*/
2629         kwp = (struct km_wrap *)mp;
2630         kwp->kw_size = n;
2631         /*LINTED alignment okay*/
2632         kwp->kw_other = (struct km_wrap *)(mp + n - sizeof (struct km_wrap));
2633         kwp = (struct km_wrap *)kwp->kw_other;
2634         kwp->kw_size = n;
2635         /*LINTED alignment okay*/
2636         kwp->kw_other = (struct km_wrap *)mp;
2637 
2638         mutex_enter(&cachefs_kmem_lock);
2639         ASSERT(cachefs_mem_usage >= 0);
2640         cachefs_mem_usage += n;
2641         mutex_exit(&cachefs_kmem_lock);
2642 
2643         return (mp + sizeof (struct km_wrap));
2644 #else /* DEBUG */
2645         return (kmem_zalloc(size, flag));
2646 #endif /* DEBUG */
2647 }
2648 
2649 void
2650 cachefs_kmem_free(void *mp, size_t size)
2651 {
2652 #ifdef DEBUG
2653         struct km_wrap *front_kwp;
2654         struct km_wrap *back_kwp;
2655         size_t n = (size + (2 * sizeof (struct km_wrap)) + 7) & ~7;
2656         void *p;
2657 
2658         ASSERT(n >= (size + 8));
2659         front_kwp = (struct km_wrap *)((uintptr_t)mp - sizeof (struct km_wrap));
2660         back_kwp = (struct km_wrap *)
2661             ((uintptr_t)front_kwp + n - sizeof (struct km_wrap));
2662 
2663         ASSERT(front_kwp->kw_other == back_kwp);
2664         ASSERT(front_kwp->kw_size == n);
2665         ASSERT(back_kwp->kw_other == front_kwp);
2666         ASSERT(back_kwp->kw_size == n);
2667 
2668         mutex_enter(&cachefs_kmem_lock);
2669         cachefs_mem_usage -= n;
2670         ASSERT(cachefs_mem_usage >= 0);
2671         mutex_exit(&cachefs_kmem_lock);
2672 
2673         p = front_kwp;
2674         front_kwp->kw_size = back_kwp->kw_size = 0;
2675         front_kwp->kw_other = back_kwp->kw_other = NULL;
2676         kmem_free(p, n);
2677 #else /* DEBUG */
2678         kmem_free(mp, size);
2679 #endif /* DEBUG */
2680 }
2681 
2682 char *
2683 cachefs_strdup(char *s)
2684 {
2685         char *rc;
2686 
2687         ASSERT(s != NULL);
2688 
2689         rc = cachefs_kmem_alloc(strlen(s) + 1, KM_SLEEP);
2690         (void) strcpy(rc, s);
2691 
2692         return (rc);
2693 }
2694 
2695 int
2696 cachefs_stats_kstat_snapshot(kstat_t *ksp, void *buf, int rw)
2697 {
2698         struct fscache *fscp = (struct fscache *)ksp->ks_data;
2699         cachefscache_t *cachep = fscp->fs_cache;
2700         int     error = 0;
2701 
2702         if (rw == KSTAT_WRITE) {
2703                 bcopy(buf, &fscp->fs_stats, sizeof (fscp->fs_stats));
2704                 cachep->c_gc_count = fscp->fs_stats.st_gc_count;
2705                 CACHEFS_CFS_TIME_TO_TIME_COPY(fscp->fs_stats.st_gc_time,
2706                     cachep->c_gc_time);
2707                 CACHEFS_CFS_TIME_TO_TIME_COPY(fscp->fs_stats.st_gc_before_atime,
2708                     cachep->c_gc_before);
2709                 CACHEFS_CFS_TIME_TO_TIME_COPY(fscp->fs_stats.st_gc_after_atime,
2710                     cachep->c_gc_after);
2711                 return (error);
2712         }
2713 
2714         fscp->fs_stats.st_gc_count = cachep->c_gc_count;
2715         CACHEFS_TIME_TO_CFS_TIME_COPY(cachep->c_gc_time,
2716             fscp->fs_stats.st_gc_time, error);
2717         CACHEFS_TIME_TO_CFS_TIME_COPY(cachep->c_gc_before,
2718             fscp->fs_stats.st_gc_before_atime, error);
2719         CACHEFS_TIME_TO_CFS_TIME_COPY(cachep->c_gc_after,
2720             fscp->fs_stats.st_gc_after_atime, error);
2721         bcopy(&fscp->fs_stats, buf, sizeof (fscp->fs_stats));
2722 
2723         return (error);
2724 }
2725 
2726 #ifdef DEBUG
2727 cachefs_debug_info_t *
2728 cachefs_debug_save(cachefs_debug_info_t *oldcdb, int chain,
2729     char *message, uint_t flags, int number, void *pointer,
2730     cachefscache_t *cachep, struct fscache *fscp, struct cnode *cp)
2731 {
2732         cachefs_debug_info_t *cdb;
2733 
2734         if ((chain) || (oldcdb == NULL))
2735                 cdb = cachefs_kmem_zalloc(sizeof (*cdb), KM_SLEEP);
2736         else
2737                 cdb = oldcdb;
2738         if (chain)
2739                 cdb->cdb_next = oldcdb;
2740 
2741         if (message != NULL) {
2742                 if (cdb->cdb_message != NULL)
2743                         cachefs_kmem_free(cdb->cdb_message,
2744                             strlen(cdb->cdb_message) + 1);
2745                 cdb->cdb_message = cachefs_kmem_alloc(strlen(message) + 1,
2746                     KM_SLEEP);
2747                 (void) strcpy(cdb->cdb_message, message);
2748         }
2749         cdb->cdb_flags = flags;
2750         cdb->cdb_int = number;
2751         cdb->cdb_pointer = pointer;
2752 
2753         cdb->cdb_count++;
2754 
2755         cdb->cdb_cnode = cp;
2756         if (cp != NULL) {
2757                 cdb->cdb_frontvp = cp->c_frontvp;
2758                 cdb->cdb_backvp = cp->c_backvp;
2759         }
2760         if (fscp != NULL)
2761                 cdb->cdb_fscp = fscp;
2762         else if (cp != NULL)
2763                 cdb->cdb_fscp = C_TO_FSCACHE(cp);
2764         if (cachep != NULL)
2765                 cdb->cdb_cachep = cachep;
2766         else if (cdb->cdb_fscp != NULL)
2767                 cdb->cdb_cachep = cdb->cdb_fscp->fs_cache;
2768 
2769         cdb->cdb_thread = curthread;
2770         cdb->cdb_timestamp = gethrtime();
2771         cdb->cdb_depth = getpcstack(cdb->cdb_stack, CACHEFS_DEBUG_DEPTH);
2772 
2773         return (cdb);
2774 }
2775 
2776 void
2777 cachefs_debug_show(cachefs_debug_info_t *cdb)
2778 {
2779         hrtime_t now = gethrtime();
2780         timestruc_t ts;
2781         int i;
2782 
2783         while (cdb != NULL) {
2784                 hrt2ts(now - cdb->cdb_timestamp, &ts);
2785                 printf("cdb: %p count: %d timelapse: %ld.%9ld\n",
2786                     (void *)cdb, cdb->cdb_count, ts.tv_sec, ts.tv_nsec);
2787                 if (cdb->cdb_message != NULL)
2788                         printf("message: %s", cdb->cdb_message);
2789                 printf("flags: %x int: %d pointer: %p\n",
2790                     cdb->cdb_flags, cdb->cdb_int, (void *)cdb->cdb_pointer);
2791 
2792                 printf("cnode: %p fscp: %p cachep: %p\n",
2793                     (void *)cdb->cdb_cnode,
2794                     (void *)cdb->cdb_fscp, (void *)cdb->cdb_cachep);
2795                 printf("frontvp: %p backvp: %p\n",
2796                     (void *)cdb->cdb_frontvp, (void *)cdb->cdb_backvp);
2797 
2798                 printf("thread: %p stack...\n", (void *)cdb->cdb_thread);
2799                 for (i = 0; i < cdb->cdb_depth; i++) {
2800                         ulong_t off;
2801                         char *sym;
2802 
2803                         sym = kobj_getsymname(cdb->cdb_stack[i], &off);
2804                         printf("%s+%lx\n", sym ? sym : "?", off);
2805                 }
2806                 delay(2*hz);
2807                 cdb = cdb->cdb_next;
2808         }
2809         debug_enter(NULL);
2810 }
2811 #endif /* DEBUG */
2812 
2813 /*
2814  * Changes the size of the front file.
2815  * Returns 0 for success or error if cannot set file size.
2816  * NOCACHE bit is ignored.
2817  * c_size is ignored.
2818  * statelock must be held, frontvp must be set.
2819  * File must be populated if setting to a size other than zero.
2820  */
2821 int
2822 cachefs_frontfile_size(cnode_t *cp, u_offset_t length)
2823 {
2824         cachefscache_t *cachep = C_TO_FSCACHE(cp)->fs_cache;
2825         vattr_t va;
2826         size_t nblks, blkdelta;
2827         int error = 0;
2828         int alloc = 0;
2829         struct cachefs_allocmap *allocp;
2830 
2831         ASSERT(MUTEX_HELD(&cp->c_statelock));
2832         ASSERT(cp->c_frontvp);
2833 
2834         /* if growing the file, allocate space first, we charge for holes */
2835         if (length) {
2836                 ASSERT(cp->c_metadata.md_flags & MD_POPULATED);
2837 
2838                 nblks = (length + MAXBSIZE - 1) / MAXBSIZE;
2839                 if (nblks > cp->c_metadata.md_frontblks) {
2840                         blkdelta = nblks - cp->c_metadata.md_frontblks;
2841                         error = cachefs_allocblocks(cachep, blkdelta,
2842                             cp->c_metadata.md_rltype);
2843                         if (error)
2844                                 goto out;
2845                         alloc = 1;
2846                 }
2847         }
2848 
2849         /* change the size of the front file */
2850         va.va_mask = AT_SIZE;
2851         va.va_size = length;
2852         error = VOP_SETATTR(cp->c_frontvp, &va, 0, kcred, NULL);
2853         if (error)
2854                 goto out;
2855 
2856         /* zero out the alloc map */
2857         bzero(&cp->c_metadata.md_allocinfo,
2858             cp->c_metadata.md_allocents * sizeof (struct cachefs_allocmap));
2859         cp->c_metadata.md_allocents = 0;
2860 
2861         if (length == 0) {
2862                 /* free up blocks */
2863                 if (cp->c_metadata.md_frontblks) {
2864                         cachefs_freeblocks(cachep, cp->c_metadata.md_frontblks,
2865                             cp->c_metadata.md_rltype);
2866                         cp->c_metadata.md_frontblks = 0;
2867                 }
2868         } else {
2869                 /* update number of blocks if shrinking file */
2870                 nblks = (length + MAXBSIZE - 1) / MAXBSIZE;
2871                 if (nblks < cp->c_metadata.md_frontblks) {
2872                         blkdelta = cp->c_metadata.md_frontblks - nblks;
2873                         cachefs_freeblocks(cachep, blkdelta,
2874                             cp->c_metadata.md_rltype);
2875                         cp->c_metadata.md_frontblks = (uint_t)nblks;
2876                 }
2877 
2878                 /* fix up alloc map to reflect new size */
2879                 allocp = cp->c_metadata.md_allocinfo;
2880                 allocp->am_start_off = 0;
2881                 allocp->am_size = length;
2882                 cp->c_metadata.md_allocents = 1;
2883         }
2884         cp->c_flags |= CN_UPDATED | CN_NEED_FRONT_SYNC;
2885 
2886 out:
2887         if (error && alloc)
2888                 cachefs_freeblocks(cachep, blkdelta, cp->c_metadata.md_rltype);
2889         return (error);
2890 }
2891 
2892 /*ARGSUSED*/
2893 int
2894 cachefs_req_create(void *voidp, void *cdrarg, int kmflags)
2895 {
2896         struct cachefs_req *rp = (struct cachefs_req *)voidp;
2897 
2898         /*
2899          * XXX don't do this!  if you need this, you can't use this
2900          * constructor.
2901          */
2902 
2903         bzero(rp, sizeof (struct cachefs_req));
2904 
2905         mutex_init(&rp->cfs_req_lock, NULL, MUTEX_DEFAULT, NULL);
2906         return (0);
2907 }
2908 
2909 /*ARGSUSED*/
2910 void
2911 cachefs_req_destroy(void *voidp, void *cdrarg)
2912 {
2913         struct cachefs_req *rp = (struct cachefs_req *)voidp;
2914 
2915         mutex_destroy(&rp->cfs_req_lock);
2916 }