1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright (c) 1992, 2010, Oracle and/or its affiliates. All rights reserved.
  23  * Copyright 2015 Nexenta Systems, Inc.  All rights reserved.
  24  */
  25 
  26 #include <sys/param.h>
  27 #include <sys/types.h>
  28 #include <sys/systm.h>
  29 #include <sys/cred.h>
  30 #include <sys/proc.h>
  31 #include <sys/user.h>
  32 #include <sys/time.h>
  33 #include <sys/vnode.h>
  34 #include <sys/vfs.h>
  35 #include <sys/vfs_opreg.h>
  36 #include <sys/file.h>
  37 #include <sys/filio.h>
  38 #include <sys/uio.h>
  39 #include <sys/buf.h>
  40 #include <sys/mman.h>
  41 #include <sys/tiuser.h>
  42 #include <sys/pathname.h>
  43 #include <sys/dirent.h>
  44 #include <sys/conf.h>
  45 #include <sys/debug.h>
  46 #include <sys/vmsystm.h>
  47 #include <sys/fcntl.h>
  48 #include <sys/flock.h>
  49 #include <sys/swap.h>
  50 #include <sys/errno.h>
  51 #include <sys/sysmacros.h>
  52 #include <sys/disp.h>
  53 #include <sys/kmem.h>
  54 #include <sys/cmn_err.h>
  55 #include <sys/vtrace.h>
  56 #include <sys/mount.h>
  57 #include <sys/bootconf.h>
  58 #include <sys/dnlc.h>
  59 #include <sys/stat.h>
  60 #include <sys/acl.h>
  61 #include <sys/policy.h>
  62 #include <rpc/types.h>
  63 
  64 #include <vm/hat.h>
  65 #include <vm/as.h>
  66 #include <vm/page.h>
  67 #include <vm/pvn.h>
  68 #include <vm/seg.h>
  69 #include <vm/seg_map.h>
  70 #include <vm/seg_vn.h>
  71 #include <vm/rm.h>
  72 #include <sys/fs/cachefs_fs.h>
  73 #include <sys/fs/cachefs_dir.h>
  74 #include <sys/fs/cachefs_dlog.h>
  75 #include <sys/fs/cachefs_ioctl.h>
  76 #include <sys/fs/cachefs_log.h>
  77 #include <fs/fs_subr.h>
  78 
  79 int cachefs_dnlc;       /* use dnlc, debugging */
  80 
  81 static void cachefs_attr_setup(vattr_t *srcp, vattr_t *targp, cnode_t *cp,
  82     cred_t *cr);
  83 static void cachefs_creategid(cnode_t *dcp, cnode_t *newcp, vattr_t *vap,
  84     cred_t *cr);
  85 static void cachefs_createacl(cnode_t *dcp, cnode_t *newcp);
  86 static int cachefs_getaclfromcache(cnode_t *cp, vsecattr_t *vsec);
  87 static int cachefs_getacldirvp(cnode_t *cp);
  88 static void cachefs_acl2perm(cnode_t *cp, vsecattr_t *vsec);
  89 static int cachefs_access_local(void *cp, int mode, cred_t *cr);
  90 static int cachefs_acl_access(struct cnode *cp, int mode, cred_t *cr);
  91 static int cachefs_push_connected(vnode_t *vp, struct buf *bp, size_t iolen,
  92     u_offset_t iooff, cred_t *cr);
  93 static int cachefs_push_front(vnode_t *vp, struct buf *bp, size_t iolen,
  94     u_offset_t iooff, cred_t *cr);
  95 static int cachefs_setattr_connected(vnode_t *vp, vattr_t *vap, int flags,
  96     cred_t *cr, caller_context_t *ct);
  97 static int cachefs_setattr_disconnected(vnode_t *vp, vattr_t *vap,
  98     int flags, cred_t *cr, caller_context_t *ct);
  99 static int cachefs_access_connected(struct vnode *vp, int mode,
 100     int flags, cred_t *cr);
 101 static int cachefs_lookup_back(vnode_t *dvp, char *nm, vnode_t **vpp,
 102     cred_t *cr);
 103 static int cachefs_symlink_connected(vnode_t *dvp, char *lnm, vattr_t *tva,
 104     char *tnm, cred_t *cr);
 105 static int cachefs_symlink_disconnected(vnode_t *dvp, char *lnm,
 106     vattr_t *tva, char *tnm, cred_t *cr);
 107 static int cachefs_link_connected(vnode_t *tdvp, vnode_t *fvp, char *tnm,
 108     cred_t *cr);
 109 static int cachefs_link_disconnected(vnode_t *tdvp, vnode_t *fvp,
 110     char *tnm, cred_t *cr);
 111 static int cachefs_mkdir_connected(vnode_t *dvp, char *nm, vattr_t *vap,
 112     vnode_t **vpp, cred_t *cr);
 113 static int cachefs_mkdir_disconnected(vnode_t *dvp, char *nm, vattr_t *vap,
 114     vnode_t **vpp, cred_t *cr);
 115 static int cachefs_stickyrmchk(struct cnode *dcp, struct cnode *cp, cred_t *cr);
 116 static int cachefs_rmdir_connected(vnode_t *dvp, char *nm,
 117     vnode_t *cdir, cred_t *cr, vnode_t *vp);
 118 static int cachefs_rmdir_disconnected(vnode_t *dvp, char *nm,
 119     vnode_t *cdir, cred_t *cr, vnode_t *vp);
 120 static char *cachefs_newname(void);
 121 static int cachefs_remove_dolink(vnode_t *dvp, vnode_t *vp, char *nm,
 122     cred_t *cr);
 123 static int cachefs_rename_connected(vnode_t *odvp, char *onm,
 124     vnode_t *ndvp, char *nnm, cred_t *cr, vnode_t *delvp);
 125 static int cachefs_rename_disconnected(vnode_t *odvp, char *onm,
 126     vnode_t *ndvp, char *nnm, cred_t *cr, vnode_t *delvp);
 127 static int cachefs_readdir_connected(vnode_t *vp, uio_t *uiop, cred_t *cr,
 128     int *eofp);
 129 static int cachefs_readdir_disconnected(vnode_t *vp, uio_t *uiop,
 130     cred_t *cr, int *eofp);
 131 static int cachefs_readback_translate(cnode_t *cp, uio_t *uiop,
 132         cred_t *cr, int *eofp);
 133 
 134 static int cachefs_setattr_common(vnode_t *vp, vattr_t *vap, int flags,
 135     cred_t *cr, caller_context_t *ct);
 136 
 137 static  int     cachefs_open(struct vnode **, int, cred_t *,
 138                         caller_context_t *);
 139 static  int     cachefs_close(struct vnode *, int, int, offset_t,
 140                         cred_t *, caller_context_t *);
 141 static  int     cachefs_read(struct vnode *, struct uio *, int, cred_t *,
 142                         caller_context_t *);
 143 static  int     cachefs_write(struct vnode *, struct uio *, int, cred_t *,
 144                         caller_context_t *);
 145 static  int     cachefs_ioctl(struct vnode *, int, intptr_t, int, cred_t *,
 146                         int *, caller_context_t *);
 147 static  int     cachefs_getattr(struct vnode *, struct vattr *, int,
 148                         cred_t *, caller_context_t *);
 149 static  int     cachefs_setattr(struct vnode *, struct vattr *,
 150                         int, cred_t *, caller_context_t *);
 151 static  int     cachefs_access(struct vnode *, int, int, cred_t *,
 152                         caller_context_t *);
 153 static  int     cachefs_lookup(struct vnode *, char *, struct vnode **,
 154                         struct pathname *, int, struct vnode *, cred_t *,
 155                         caller_context_t *, int *, pathname_t *);
 156 static  int     cachefs_create(struct vnode *, char *, struct vattr *,
 157                         enum vcexcl, int, struct vnode **, cred_t *, int,
 158                         caller_context_t *, vsecattr_t *);
 159 static  int     cachefs_create_connected(vnode_t *dvp, char *nm,
 160                         vattr_t *vap, enum vcexcl exclusive, int mode,
 161                         vnode_t **vpp, cred_t *cr);
 162 static  int     cachefs_create_disconnected(vnode_t *dvp, char *nm,
 163                         vattr_t *vap, enum vcexcl exclusive, int mode,
 164                         vnode_t **vpp, cred_t *cr);
 165 static  int     cachefs_remove(struct vnode *, char *, cred_t *,
 166                         caller_context_t *, int);
 167 static  int     cachefs_link(struct vnode *, struct vnode *, char *,
 168                         cred_t *, caller_context_t *, int);
 169 static  int     cachefs_rename(struct vnode *, char *, struct vnode *,
 170                         char *, cred_t *, caller_context_t *, int);
 171 static  int     cachefs_mkdir(struct vnode *, char *, struct
 172                         vattr *, struct vnode **, cred_t *, caller_context_t *,
 173                         int, vsecattr_t *);
 174 static  int     cachefs_rmdir(struct vnode *, char *, struct vnode *,
 175                         cred_t *, caller_context_t *, int);
 176 static  int     cachefs_readdir(struct vnode *, struct uio *,
 177                         cred_t *, int *, caller_context_t *, int);
 178 static  int     cachefs_symlink(struct vnode *, char *, struct vattr *,
 179                         char *, cred_t *, caller_context_t *, int);
 180 static  int     cachefs_readlink(struct vnode *, struct uio *, cred_t *,
 181                         caller_context_t *);
 182 static int cachefs_readlink_connected(vnode_t *vp, uio_t *uiop, cred_t *cr);
 183 static int cachefs_readlink_disconnected(vnode_t *vp, uio_t *uiop);
 184 static  int     cachefs_fsync(struct vnode *, int, cred_t *,
 185                         caller_context_t *);
 186 static  void    cachefs_inactive(struct vnode *, cred_t *, caller_context_t *);
 187 static  int     cachefs_fid(struct vnode *, struct fid *, caller_context_t *);
 188 static  int     cachefs_rwlock(struct vnode *, int, caller_context_t *);
 189 static  void    cachefs_rwunlock(struct vnode *, int, caller_context_t *);
 190 static  int     cachefs_seek(struct vnode *, offset_t, offset_t *,
 191                         caller_context_t *);
 192 static  int     cachefs_frlock(struct vnode *, int, struct flock64 *,
 193                         int, offset_t, struct flk_callback *, cred_t *,
 194                         caller_context_t *);
 195 static  int     cachefs_space(struct vnode *, int, struct flock64 *, int,
 196                         offset_t, cred_t *, caller_context_t *);
 197 static  int     cachefs_realvp(struct vnode *, struct vnode **,
 198                         caller_context_t *);
 199 static  int     cachefs_getpage(struct vnode *, offset_t, size_t, uint_t *,
 200                         struct page *[], size_t, struct seg *, caddr_t,
 201                         enum seg_rw, cred_t *, caller_context_t *);
 202 static  int     cachefs_getapage(struct vnode *, u_offset_t, size_t, uint_t *,
 203                         struct page *[], size_t, struct seg *, caddr_t,
 204                         enum seg_rw, cred_t *);
 205 static  int     cachefs_getapage_back(struct vnode *, u_offset_t, size_t,
 206                 uint_t *, struct page *[], size_t, struct seg *, caddr_t,
 207                         enum seg_rw, cred_t *);
 208 static  int     cachefs_putpage(struct vnode *, offset_t, size_t, int,
 209                         cred_t *, caller_context_t *);
 210 static  int     cachefs_map(struct vnode *, offset_t, struct as *,
 211                         caddr_t *, size_t, uchar_t, uchar_t, uint_t, cred_t *,
 212                         caller_context_t *);
 213 static  int     cachefs_addmap(struct vnode *, offset_t, struct as *,
 214                         caddr_t, size_t, uchar_t, uchar_t, uint_t, cred_t *,
 215                         caller_context_t *);
 216 static  int     cachefs_delmap(struct vnode *, offset_t, struct as *,
 217                         caddr_t, size_t, uint_t, uint_t, uint_t, cred_t *,
 218                         caller_context_t *);
 219 static int      cachefs_setsecattr(vnode_t *vp, vsecattr_t *vsec,
 220                         int flag, cred_t *cr, caller_context_t *);
 221 static int      cachefs_getsecattr(vnode_t *vp, vsecattr_t *vsec,
 222                         int flag, cred_t *cr, caller_context_t *);
 223 static  int     cachefs_shrlock(vnode_t *, int, struct shrlock *, int,
 224                         cred_t *, caller_context_t *);
 225 static int cachefs_getsecattr_connected(vnode_t *vp, vsecattr_t *vsec, int flag,
 226     cred_t *cr);
 227 static int cachefs_getsecattr_disconnected(vnode_t *vp, vsecattr_t *vsec,
 228     int flag, cred_t *cr);
 229 
 230 static int      cachefs_dump(struct vnode *, caddr_t, offset_t, offset_t,
 231                         caller_context_t *);
 232 static int      cachefs_pageio(struct vnode *, page_t *,
 233                     u_offset_t, size_t, int, cred_t *, caller_context_t *);
 234 static int      cachefs_writepage(struct vnode *vp, caddr_t base,
 235                     int tcount, struct uio *uiop);
 236 static int      cachefs_pathconf(vnode_t *, int, ulong_t *, cred_t *,
 237                         caller_context_t *);
 238 
 239 static int      cachefs_read_backfs_nfsv4(vnode_t *vp, uio_t *uiop, int ioflag,
 240                         cred_t *cr, caller_context_t *ct);
 241 static int      cachefs_write_backfs_nfsv4(vnode_t *vp, uio_t *uiop, int ioflag,
 242                         cred_t *cr, caller_context_t *ct);
 243 static int      cachefs_getattr_backfs_nfsv4(vnode_t *vp, vattr_t *vap,
 244                         int flags, cred_t *cr, caller_context_t *ct);
 245 static int      cachefs_remove_backfs_nfsv4(vnode_t *dvp, char *nm, cred_t *cr,
 246                         vnode_t *vp);
 247 static int      cachefs_getpage_backfs_nfsv4(struct vnode *vp, offset_t off,
 248                         size_t len, uint_t *protp, struct page *pl[],
 249                         size_t plsz, struct seg *seg, caddr_t addr,
 250                         enum seg_rw rw, cred_t *cr);
 251 static int      cachefs_putpage_backfs_nfsv4(vnode_t *vp, offset_t off,
 252                         size_t len, int flags, cred_t *cr);
 253 static int      cachefs_map_backfs_nfsv4(struct vnode *vp, offset_t off,
 254                         struct as *as, caddr_t *addrp, size_t len, uchar_t prot,
 255                         uchar_t maxprot, uint_t flags, cred_t *cr);
 256 static int      cachefs_space_backfs_nfsv4(struct vnode *vp, int cmd,
 257                         struct flock64 *bfp, int flag, offset_t offset,
 258                         cred_t *cr, caller_context_t *ct);
 259 
 260 struct vnodeops *cachefs_vnodeops;
 261 
 262 static const fs_operation_def_t cachefs_vnodeops_template[] = {
 263         VOPNAME_OPEN,           { .vop_open = cachefs_open },
 264         VOPNAME_CLOSE,          { .vop_close = cachefs_close },
 265         VOPNAME_READ,           { .vop_read = cachefs_read },
 266         VOPNAME_WRITE,          { .vop_write = cachefs_write },
 267         VOPNAME_IOCTL,          { .vop_ioctl = cachefs_ioctl },
 268         VOPNAME_GETATTR,        { .vop_getattr = cachefs_getattr },
 269         VOPNAME_SETATTR,        { .vop_setattr = cachefs_setattr },
 270         VOPNAME_ACCESS,         { .vop_access = cachefs_access },
 271         VOPNAME_LOOKUP,         { .vop_lookup = cachefs_lookup },
 272         VOPNAME_CREATE,         { .vop_create = cachefs_create },
 273         VOPNAME_REMOVE,         { .vop_remove = cachefs_remove },
 274         VOPNAME_LINK,           { .vop_link = cachefs_link },
 275         VOPNAME_RENAME,         { .vop_rename = cachefs_rename },
 276         VOPNAME_MKDIR,          { .vop_mkdir = cachefs_mkdir },
 277         VOPNAME_RMDIR,          { .vop_rmdir = cachefs_rmdir },
 278         VOPNAME_READDIR,        { .vop_readdir = cachefs_readdir },
 279         VOPNAME_SYMLINK,        { .vop_symlink = cachefs_symlink },
 280         VOPNAME_READLINK,       { .vop_readlink = cachefs_readlink },
 281         VOPNAME_FSYNC,          { .vop_fsync = cachefs_fsync },
 282         VOPNAME_INACTIVE,       { .vop_inactive = cachefs_inactive },
 283         VOPNAME_FID,            { .vop_fid = cachefs_fid },
 284         VOPNAME_RWLOCK,         { .vop_rwlock = cachefs_rwlock },
 285         VOPNAME_RWUNLOCK,       { .vop_rwunlock = cachefs_rwunlock },
 286         VOPNAME_SEEK,           { .vop_seek = cachefs_seek },
 287         VOPNAME_FRLOCK,         { .vop_frlock = cachefs_frlock },
 288         VOPNAME_SPACE,          { .vop_space = cachefs_space },
 289         VOPNAME_REALVP,         { .vop_realvp = cachefs_realvp },
 290         VOPNAME_GETPAGE,        { .vop_getpage = cachefs_getpage },
 291         VOPNAME_PUTPAGE,        { .vop_putpage = cachefs_putpage },
 292         VOPNAME_MAP,            { .vop_map = cachefs_map },
 293         VOPNAME_ADDMAP,         { .vop_addmap = cachefs_addmap },
 294         VOPNAME_DELMAP,         { .vop_delmap = cachefs_delmap },
 295         VOPNAME_DUMP,           { .vop_dump = cachefs_dump },
 296         VOPNAME_PATHCONF,       { .vop_pathconf = cachefs_pathconf },
 297         VOPNAME_PAGEIO,         { .vop_pageio = cachefs_pageio },
 298         VOPNAME_SETSECATTR,     { .vop_setsecattr = cachefs_setsecattr },
 299         VOPNAME_GETSECATTR,     { .vop_getsecattr = cachefs_getsecattr },
 300         VOPNAME_SHRLOCK,        { .vop_shrlock = cachefs_shrlock },
 301         NULL,                   NULL
 302 };
 303 
 304 /* forward declarations of statics */
 305 static void cachefs_modified(cnode_t *cp);
 306 static int cachefs_modified_alloc(cnode_t *cp);
 307 
 308 int
 309 cachefs_init_vnops(char *name)
 310 {
 311         return (vn_make_ops(name,
 312             cachefs_vnodeops_template, &cachefs_vnodeops));
 313 }
 314 
 315 struct vnodeops *
 316 cachefs_getvnodeops(void)
 317 {
 318         return (cachefs_vnodeops);
 319 }
 320 
 321 static int
 322 cachefs_open(vnode_t **vpp, int flag, cred_t *cr, caller_context_t *ct)
 323 {
 324         int error = 0;
 325         cnode_t *cp = VTOC(*vpp);
 326         fscache_t *fscp = C_TO_FSCACHE(cp);
 327         int held = 0;
 328         int type;
 329         int connected = 0;
 330 
 331 #ifdef CFSDEBUG
 332         CFS_DEBUG(CFSDEBUG_VOPS)
 333                 printf("cachefs_open: ENTER vpp %p flag %x\n",
 334                     (void *)vpp, flag);
 335 #endif
 336         if (getzoneid() != GLOBAL_ZONEID) {
 337                 error = EPERM;
 338                 goto out;
 339         }
 340         if ((flag & FWRITE) &&
 341             ((*vpp)->v_type == VDIR || (*vpp)->v_type == VLNK)) {
 342                 error = EISDIR;
 343                 goto out;
 344         }
 345 
 346         /*
 347          * Cachefs only provides pass-through support for NFSv4,
 348          * and all vnode operations are passed through to the
 349          * back file system. For NFSv4 pass-through to work, only
 350          * connected operation is supported, the cnode backvp must
 351          * exist, and cachefs optional (eg., disconnectable) flags
 352          * are turned off. Assert these conditions to ensure that
 353          * the backfilesystem is called for the open operation.
 354          */
 355         CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
 356         CFS_BACKFS_NFSV4_ASSERT_CNODE(cp);
 357 
 358         for (;;) {
 359                 /* get (or renew) access to the file system */
 360                 if (held) {
 361                         /* Won't loop with NFSv4 connected behavior */
 362                         ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
 363                         cachefs_cd_release(fscp);
 364                         held = 0;
 365                 }
 366                 error = cachefs_cd_access(fscp, connected, 0);
 367                 if (error)
 368                         goto out;
 369                 held = 1;
 370 
 371                 mutex_enter(&cp->c_statelock);
 372 
 373                 /* grab creds if we do not have any yet */
 374                 if (cp->c_cred == NULL) {
 375                         crhold(cr);
 376                         cp->c_cred = cr;
 377                 }
 378                 cp->c_flags |= CN_NEEDOPEN;
 379 
 380                 /* if we are disconnected */
 381                 if (fscp->fs_cdconnected != CFS_CD_CONNECTED) {
 382                         /* if we cannot write to the file system */
 383                         if ((flag & FWRITE) && CFS_ISFS_WRITE_AROUND(fscp)) {
 384                                 mutex_exit(&cp->c_statelock);
 385                                 connected = 1;
 386                                 continue;
 387                         }
 388                         /*
 389                          * Allow read only requests to continue
 390                          */
 391                         if ((flag & (FWRITE|FREAD)) == FREAD) {
 392                                 /* track the flag for opening the backvp */
 393                                 cp->c_rdcnt++;
 394                                 mutex_exit(&cp->c_statelock);
 395                                 error = 0;
 396                                 break;
 397                         }
 398 
 399                         /*
 400                          * check credentials  - if this procs
 401                          * credentials don't match the creds in the
 402                          * cnode disallow writing while disconnected.
 403                          */
 404                         if (crcmp(cp->c_cred, CRED()) != 0 &&
 405                             secpolicy_vnode_access2(CRED(), *vpp,
 406                             cp->c_attr.va_uid, 0, VWRITE) != 0) {
 407                                 mutex_exit(&cp->c_statelock);
 408                                 connected = 1;
 409                                 continue;
 410                         }
 411                         /* to get here, we know that the WRITE flag is on */
 412                         cp->c_wrcnt++;
 413                         if (flag & FREAD)
 414                                 cp->c_rdcnt++;
 415                 }
 416 
 417                 /* else if we are connected */
 418                 else {
 419                         /* if cannot use the cached copy of the file */
 420                         if ((flag & FWRITE) && CFS_ISFS_WRITE_AROUND(fscp) &&
 421                             ((cp->c_flags & CN_NOCACHE) == 0))
 422                                 cachefs_nocache(cp);
 423 
 424                         /* pass open to the back file */
 425                         if (cp->c_backvp) {
 426                                 cp->c_flags &= ~CN_NEEDOPEN;
 427                                 CFS_DPRINT_BACKFS_NFSV4(fscp,
 428                                     ("cachefs_open (nfsv4): cnode %p, "
 429                                     "backvp %p\n", cp, cp->c_backvp));
 430                                 error = VOP_OPEN(&cp->c_backvp, flag, cr, ct);
 431                                 if (CFS_TIMEOUT(fscp, error)) {
 432                                         mutex_exit(&cp->c_statelock);
 433                                         cachefs_cd_release(fscp);
 434                                         held = 0;
 435                                         cachefs_cd_timedout(fscp);
 436                                         continue;
 437                                 } else if (error) {
 438                                         mutex_exit(&cp->c_statelock);
 439                                         break;
 440                                 }
 441                         } else {
 442                                 /* backvp will be VOP_OPEN'd later */
 443                                 if (flag & FREAD)
 444                                         cp->c_rdcnt++;
 445                                 if (flag & FWRITE)
 446                                         cp->c_wrcnt++;
 447                         }
 448 
 449                         /*
 450                          * Now perform a consistency check on the file.
 451                          * If strict consistency then force a check to
 452                          * the backfs even if the timeout has not expired
 453                          * for close-to-open consistency.
 454                          */
 455                         type = 0;
 456                         if (fscp->fs_consttype == CFS_FS_CONST_STRICT)
 457                                 type = C_BACK_CHECK;
 458                         error = CFSOP_CHECK_COBJECT(fscp, cp, type, cr);
 459                         if (CFS_TIMEOUT(fscp, error)) {
 460                                 mutex_exit(&cp->c_statelock);
 461                                 cachefs_cd_release(fscp);
 462                                 held = 0;
 463                                 cachefs_cd_timedout(fscp);
 464                                 continue;
 465                         }
 466                 }
 467                 mutex_exit(&cp->c_statelock);
 468                 break;
 469         }
 470         if (held)
 471                 cachefs_cd_release(fscp);
 472 out:
 473 #ifdef CFS_CD_DEBUG
 474         ASSERT((curthread->t_flag & T_CD_HELD) == 0);
 475 #endif
 476 #ifdef CFSDEBUG
 477         CFS_DEBUG(CFSDEBUG_VOPS)
 478                 printf("cachefs_open: EXIT vpp %p error %d\n",
 479                     (void *)vpp, error);
 480 #endif
 481         return (error);
 482 }
 483 
 484 /* ARGSUSED */
 485 static int
 486 cachefs_close(vnode_t *vp, int flag, int count, offset_t offset, cred_t *cr,
 487         caller_context_t *ct)
 488 {
 489         int error = 0;
 490         cnode_t *cp = VTOC(vp);
 491         fscache_t *fscp = C_TO_FSCACHE(cp);
 492         int held = 0;
 493         int connected = 0;
 494         int close_cnt = 1;
 495         cachefscache_t *cachep;
 496 
 497 #ifdef CFSDEBUG
 498         CFS_DEBUG(CFSDEBUG_VOPS)
 499                 printf("cachefs_close: ENTER vp %p\n", (void *)vp);
 500 #endif
 501         /*
 502          * Cachefs only provides pass-through support for NFSv4,
 503          * and all vnode operations are passed through to the
 504          * back file system. For NFSv4 pass-through to work, only
 505          * connected operation is supported, the cnode backvp must
 506          * exist, and cachefs optional (eg., disconnectable) flags
 507          * are turned off. Assert these conditions to ensure that
 508          * the backfilesystem is called for the close operation.
 509          */
 510         CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
 511         CFS_BACKFS_NFSV4_ASSERT_CNODE(cp);
 512 
 513         /*
 514          * File could have been passed in or inherited from the global zone, so
 515          * we don't want to flat out reject the request; we'll just leave things
 516          * the way they are and let the backfs (NFS) deal with it.
 517          */
 518         /* get rid of any local locks */
 519         if (CFS_ISFS_LLOCK(fscp)) {
 520                 (void) cleanlocks(vp, ttoproc(curthread)->p_pid, 0);
 521         }
 522 
 523         /* clean up if this is the daemon closing down */
 524         if ((fscp->fs_cddaemonid == ttoproc(curthread)->p_pid) &&
 525             ((ttoproc(curthread)->p_pid) != 0) &&
 526             (vp == fscp->fs_rootvp) &&
 527             (count == 1)) {
 528                 mutex_enter(&fscp->fs_cdlock);
 529                 fscp->fs_cddaemonid = 0;
 530                 if (fscp->fs_dlogfile)
 531                         fscp->fs_cdconnected = CFS_CD_DISCONNECTED;
 532                 else
 533                         fscp->fs_cdconnected = CFS_CD_CONNECTED;
 534                 cv_broadcast(&fscp->fs_cdwaitcv);
 535                 mutex_exit(&fscp->fs_cdlock);
 536                 if (fscp->fs_flags & CFS_FS_ROOTFS) {
 537                         cachep = fscp->fs_cache;
 538                         mutex_enter(&cachep->c_contentslock);
 539                         ASSERT(cachep->c_rootdaemonid != 0);
 540                         cachep->c_rootdaemonid = 0;
 541                         mutex_exit(&cachep->c_contentslock);
 542                 }
 543                 return (0);
 544         }
 545 
 546         for (;;) {
 547                 /* get (or renew) access to the file system */
 548                 if (held) {
 549                         /* Won't loop with NFSv4 connected behavior */
 550                         ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
 551                         cachefs_cd_release(fscp);
 552                         held = 0;
 553                 }
 554                 error = cachefs_cd_access(fscp, connected, 0);
 555                 if (error)
 556                         goto out;
 557                 held = 1;
 558                 connected = 0;
 559 
 560                 /* if not the last close */
 561                 if (count > 1) {
 562                         if (fscp->fs_cdconnected != CFS_CD_CONNECTED)
 563                                 goto out;
 564                         mutex_enter(&cp->c_statelock);
 565                         if (cp->c_backvp) {
 566                                 CFS_DPRINT_BACKFS_NFSV4(fscp,
 567                                     ("cachefs_close (nfsv4): cnode %p, "
 568                                     "backvp %p\n", cp, cp->c_backvp));
 569                                 error = VOP_CLOSE(cp->c_backvp, flag, count,
 570                                     offset, cr, ct);
 571                                 if (CFS_TIMEOUT(fscp, error)) {
 572                                         mutex_exit(&cp->c_statelock);
 573                                         cachefs_cd_release(fscp);
 574                                         held = 0;
 575                                         cachefs_cd_timedout(fscp);
 576                                         continue;
 577                                 }
 578                         }
 579                         mutex_exit(&cp->c_statelock);
 580                         goto out;
 581                 }
 582 
 583                 /*
 584                  * If the file is an unlinked file, then flush the lookup
 585                  * cache so that inactive will be called if this is
 586                  * the last reference.  It will invalidate all of the
 587                  * cached pages, without writing them out.  Writing them
 588                  * out is not required because they will be written to a
 589                  * file which will be immediately removed.
 590                  */
 591                 if (cp->c_unldvp != NULL) {
 592                         dnlc_purge_vp(vp);
 593                         mutex_enter(&cp->c_statelock);
 594                         error = cp->c_error;
 595                         cp->c_error = 0;
 596                         mutex_exit(&cp->c_statelock);
 597                         /* always call VOP_CLOSE() for back fs vnode */
 598                 }
 599 
 600                 /* force dirty data to stable storage */
 601                 else if ((vp->v_type == VREG) && (flag & FWRITE) &&
 602                     !CFS_ISFS_BACKFS_NFSV4(fscp)) {
 603                         /* clean the cachefs pages synchronously */
 604                         error = cachefs_putpage_common(vp, (offset_t)0,
 605                             0, 0, cr);
 606                         if (CFS_TIMEOUT(fscp, error)) {
 607                                 if (fscp->fs_cdconnected == CFS_CD_CONNECTED) {
 608                                         cachefs_cd_release(fscp);
 609                                         held = 0;
 610                                         cachefs_cd_timedout(fscp);
 611                                         continue;
 612                                 } else {
 613                                         connected = 1;
 614                                         continue;
 615                                 }
 616                         }
 617 
 618                         /* if no space left in cache, wait until connected */
 619                         if ((error == ENOSPC) &&
 620                             (fscp->fs_cdconnected != CFS_CD_CONNECTED)) {
 621                                 connected = 1;
 622                                 continue;
 623                         }
 624 
 625                         /* clear the cnode error if putpage worked */
 626                         if ((error == 0) && cp->c_error) {
 627                                 mutex_enter(&cp->c_statelock);
 628                                 cp->c_error = 0;
 629                                 mutex_exit(&cp->c_statelock);
 630                         }
 631 
 632                         /* if any other important error */
 633                         if (cp->c_error) {
 634                                 /* get rid of the pages */
 635                                 (void) cachefs_putpage_common(vp,
 636                                     (offset_t)0, 0, B_INVAL | B_FORCE, cr);
 637                                 dnlc_purge_vp(vp);
 638                         }
 639                 }
 640 
 641                 mutex_enter(&cp->c_statelock);
 642                 if (cp->c_backvp &&
 643                     (fscp->fs_cdconnected == CFS_CD_CONNECTED)) {
 644                         error = VOP_CLOSE(cp->c_backvp, flag, close_cnt,
 645                             offset, cr, ct);
 646                         if (CFS_TIMEOUT(fscp, error)) {
 647                                 mutex_exit(&cp->c_statelock);
 648                                 cachefs_cd_release(fscp);
 649                                 held = 0;
 650                                 cachefs_cd_timedout(fscp);
 651                                 /* don't decrement the vnode counts again */
 652                                 close_cnt = 0;
 653                                 continue;
 654                         }
 655                 }
 656                 mutex_exit(&cp->c_statelock);
 657                 break;
 658         }
 659 
 660         mutex_enter(&cp->c_statelock);
 661         if (!error)
 662                 error = cp->c_error;
 663         cp->c_error = 0;
 664         mutex_exit(&cp->c_statelock);
 665 
 666 out:
 667         if (held)
 668                 cachefs_cd_release(fscp);
 669 #ifdef CFS_CD_DEBUG
 670         ASSERT((curthread->t_flag & T_CD_HELD) == 0);
 671 #endif
 672 
 673 #ifdef CFSDEBUG
 674         CFS_DEBUG(CFSDEBUG_VOPS)
 675                 printf("cachefs_close: EXIT vp %p\n", (void *)vp);
 676 #endif
 677         return (error);
 678 }
 679 
 680 /*ARGSUSED*/
 681 static int
 682 cachefs_read(vnode_t *vp, uio_t *uiop, int ioflag, cred_t *cr,
 683         caller_context_t *ct)
 684 {
 685         struct cnode *cp = VTOC(vp);
 686         fscache_t *fscp = C_TO_FSCACHE(cp);
 687         register u_offset_t off;
 688         register int mapoff;
 689         register caddr_t base;
 690         int n;
 691         offset_t diff;
 692         uint_t flags = 0;
 693         int error = 0;
 694 
 695 #if 0
 696         if (vp->v_flag & VNOCACHE)
 697                 flags = SM_INVAL;
 698 #endif
 699         if (getzoneid() != GLOBAL_ZONEID)
 700                 return (EPERM);
 701         if (vp->v_type != VREG)
 702                 return (EISDIR);
 703 
 704         ASSERT(RW_READ_HELD(&cp->c_rwlock));
 705 
 706         if (uiop->uio_resid == 0)
 707                 return (0);
 708 
 709 
 710         if (uiop->uio_loffset < (offset_t)0)
 711                 return (EINVAL);
 712 
 713         /*
 714          * Call backfilesystem to read if NFSv4, the cachefs code
 715          * does the read from the back filesystem asynchronously
 716          * which is not supported by pass-through functionality.
 717          */
 718         if (CFS_ISFS_BACKFS_NFSV4(fscp)) {
 719                 error = cachefs_read_backfs_nfsv4(vp, uiop, ioflag, cr, ct);
 720                 goto out;
 721         }
 722 
 723         if (MANDLOCK(vp, cp->c_attr.va_mode)) {
 724                 error = chklock(vp, FREAD, (offset_t)uiop->uio_loffset,
 725                     uiop->uio_resid, uiop->uio_fmode, ct);
 726                 if (error)
 727                         return (error);
 728         }
 729 
 730         /*
 731          * Sit in a loop and transfer (uiomove) the data in up to
 732          * MAXBSIZE chunks. Each chunk is mapped into the kernel's
 733          * address space as needed and then released.
 734          */
 735         do {
 736                 /*
 737                  *      off     Offset of current MAXBSIZE chunk
 738                  *      mapoff  Offset within the current chunk
 739                  *      n       Number of bytes to move from this chunk
 740                  *      base    kernel address of mapped in chunk
 741                  */
 742                 off = uiop->uio_loffset & (offset_t)MAXBMASK;
 743                 mapoff = uiop->uio_loffset & MAXBOFFSET;
 744                 n = MAXBSIZE - mapoff;
 745                 if (n > uiop->uio_resid)
 746                         n = (uint_t)uiop->uio_resid;
 747 
 748                 /* perform consistency check */
 749                 error = cachefs_cd_access(fscp, 0, 0);
 750                 if (error)
 751                         break;
 752                 mutex_enter(&cp->c_statelock);
 753                 error = CFSOP_CHECK_COBJECT(fscp, cp, 0, cr);
 754                 diff = cp->c_size - uiop->uio_loffset;
 755                 mutex_exit(&cp->c_statelock);
 756                 if (CFS_TIMEOUT(fscp, error)) {
 757                         cachefs_cd_release(fscp);
 758                         cachefs_cd_timedout(fscp);
 759                         error = 0;
 760                         continue;
 761                 }
 762                 cachefs_cd_release(fscp);
 763 
 764                 if (error)
 765                         break;
 766 
 767                 if (diff <= (offset_t)0)
 768                         break;
 769                 if (diff < (offset_t)n)
 770                         n = diff;
 771 
 772                 base = segmap_getmapflt(segkmap, vp, off, (uint_t)n, 1, S_READ);
 773 
 774                 error = segmap_fault(kas.a_hat, segkmap, base, n,
 775                     F_SOFTLOCK, S_READ);
 776                 if (error) {
 777                         (void) segmap_release(segkmap, base, 0);
 778                         if (FC_CODE(error) == FC_OBJERR)
 779                                 error =  FC_ERRNO(error);
 780                         else
 781                                 error = EIO;
 782                         break;
 783                 }
 784                 error = uiomove(base+mapoff, n, UIO_READ, uiop);
 785                 (void) segmap_fault(kas.a_hat, segkmap, base, n,
 786                     F_SOFTUNLOCK, S_READ);
 787                 if (error == 0) {
 788                         /*
 789                          * if we read a whole page(s), or to eof,
 790                          *  we won't need this page(s) again soon.
 791                          */
 792                         if (n + mapoff == MAXBSIZE ||
 793                             uiop->uio_loffset == cp->c_size)
 794                                 flags |= SM_DONTNEED;
 795                 }
 796                 (void) segmap_release(segkmap, base, flags);
 797         } while (error == 0 && uiop->uio_resid > 0);
 798 
 799 out:
 800 #ifdef CFSDEBUG
 801         CFS_DEBUG(CFSDEBUG_VOPS)
 802                 printf("cachefs_read: EXIT error %d resid %ld\n", error,
 803                     uiop->uio_resid);
 804 #endif
 805         return (error);
 806 }
 807 
 808 /*
 809  * cachefs_read_backfs_nfsv4
 810  *
 811  * Call NFSv4 back filesystem to handle the read (cachefs
 812  * pass-through support for NFSv4).
 813  */
 814 static int
 815 cachefs_read_backfs_nfsv4(vnode_t *vp, uio_t *uiop, int ioflag, cred_t *cr,
 816                         caller_context_t *ct)
 817 {
 818         cnode_t *cp = VTOC(vp);
 819         fscache_t *fscp = C_TO_FSCACHE(cp);
 820         vnode_t *backvp;
 821         int error;
 822 
 823         /*
 824          * For NFSv4 pass-through to work, only connected operation
 825          * is supported, the cnode backvp must exist, and cachefs
 826          * optional (eg., disconnectable) flags are turned off. Assert
 827          * these conditions for the read operation.
 828          */
 829         CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
 830         CFS_BACKFS_NFSV4_ASSERT_CNODE(cp);
 831 
 832         /* Call backfs vnode op after extracting backvp */
 833         mutex_enter(&cp->c_statelock);
 834         backvp = cp->c_backvp;
 835         mutex_exit(&cp->c_statelock);
 836 
 837         CFS_DPRINT_BACKFS_NFSV4(fscp, ("cachefs_read_backfs_nfsv4: cnode %p, "
 838             "backvp %p\n", cp, backvp));
 839 
 840         (void) VOP_RWLOCK(backvp, V_WRITELOCK_FALSE, ct);
 841         error = VOP_READ(backvp, uiop, ioflag, cr, ct);
 842         VOP_RWUNLOCK(backvp, V_WRITELOCK_FALSE, ct);
 843 
 844         /* Increment cache miss counter */
 845         fscp->fs_stats.st_misses++;
 846 
 847         return (error);
 848 }
 849 
 850 /*ARGSUSED*/
 851 static int
 852 cachefs_write(vnode_t *vp, uio_t *uiop, int ioflag, cred_t *cr,
 853         caller_context_t *ct)
 854 {
 855         struct cnode *cp = VTOC(vp);
 856         fscache_t *fscp = C_TO_FSCACHE(cp);
 857         int error = 0;
 858         u_offset_t off;
 859         caddr_t base;
 860         uint_t bsize;
 861         uint_t flags;
 862         int n, on;
 863         rlim64_t limit = uiop->uio_llimit;
 864         ssize_t resid;
 865         offset_t offset;
 866         offset_t remainder;
 867 
 868 #ifdef CFSDEBUG
 869         CFS_DEBUG(CFSDEBUG_VOPS)
 870                 printf(
 871                 "cachefs_write: ENTER vp %p offset %llu count %ld cflags %x\n",
 872                     (void *)vp, uiop->uio_loffset, uiop->uio_resid,
 873                     cp->c_flags);
 874 #endif
 875         if (getzoneid() != GLOBAL_ZONEID) {
 876                 error = EPERM;
 877                 goto out;
 878         }
 879         if (vp->v_type != VREG) {
 880                 error = EISDIR;
 881                 goto out;
 882         }
 883 
 884         ASSERT(RW_WRITE_HELD(&cp->c_rwlock));
 885 
 886         if (uiop->uio_resid == 0) {
 887                 goto out;
 888         }
 889 
 890         /* Call backfilesystem to write if NFSv4 */
 891         if (CFS_ISFS_BACKFS_NFSV4(fscp)) {
 892                 error = cachefs_write_backfs_nfsv4(vp, uiop, ioflag, cr, ct);
 893                 goto out2;
 894         }
 895 
 896         if (MANDLOCK(vp, cp->c_attr.va_mode)) {
 897                 error = chklock(vp, FWRITE, (offset_t)uiop->uio_loffset,
 898                     uiop->uio_resid, uiop->uio_fmode, ct);
 899                 if (error)
 900                         goto out;
 901         }
 902 
 903         if (ioflag & FAPPEND) {
 904                 for (;;) {
 905                         /* do consistency check to get correct file size */
 906                         error = cachefs_cd_access(fscp, 0, 1);
 907                         if (error)
 908                                 goto out;
 909                         mutex_enter(&cp->c_statelock);
 910                         error = CFSOP_CHECK_COBJECT(fscp, cp, 0, cr);
 911                         uiop->uio_loffset = cp->c_size;
 912                         mutex_exit(&cp->c_statelock);
 913                         if (CFS_TIMEOUT(fscp, error)) {
 914                                 cachefs_cd_release(fscp);
 915                                 cachefs_cd_timedout(fscp);
 916                                 continue;
 917                         }
 918                         cachefs_cd_release(fscp);
 919                         if (error)
 920                                 goto out;
 921                         break;
 922                 }
 923         }
 924 
 925         if (limit == RLIM64_INFINITY || limit > MAXOFFSET_T)
 926                 limit = MAXOFFSET_T;
 927 
 928         if (uiop->uio_loffset >= limit) {
 929                 proc_t *p = ttoproc(curthread);
 930 
 931                 mutex_enter(&p->p_lock);
 932                 (void) rctl_action(rctlproc_legacy[RLIMIT_FSIZE], p->p_rctls,
 933                     p, RCA_UNSAFE_SIGINFO);
 934                 mutex_exit(&p->p_lock);
 935                 error = EFBIG;
 936                 goto out;
 937         }
 938         if (uiop->uio_loffset > fscp->fs_offmax) {
 939                 error = EFBIG;
 940                 goto out;
 941         }
 942 
 943         if (limit > fscp->fs_offmax)
 944                 limit = fscp->fs_offmax;
 945 
 946         if (uiop->uio_loffset < (offset_t)0) {
 947                 error = EINVAL;
 948                 goto out;
 949         }
 950 
 951         offset = uiop->uio_loffset + uiop->uio_resid;
 952         /*
 953          * Check to make sure that the process will not exceed
 954          * its limit on file size.  It is okay to write up to
 955          * the limit, but not beyond.  Thus, the write which
 956          * reaches the limit will be short and the next write
 957          * will return an error.
 958          */
 959         remainder = 0;
 960         if (offset > limit) {
 961                 remainder = (int)(offset - (u_offset_t)limit);
 962                 uiop->uio_resid = limit - uiop->uio_loffset;
 963                 if (uiop->uio_resid <= 0) {
 964                         proc_t *p = ttoproc(curthread);
 965 
 966                         uiop->uio_resid += remainder;
 967                         mutex_enter(&p->p_lock);
 968                         (void) rctl_action(rctlproc_legacy[RLIMIT_FSIZE],
 969                             p->p_rctls, p, RCA_UNSAFE_SIGINFO);
 970                         mutex_exit(&p->p_lock);
 971                         error = EFBIG;
 972                         goto out;
 973                 }
 974         }
 975 
 976         resid = uiop->uio_resid;
 977         offset = uiop->uio_loffset;
 978         bsize = vp->v_vfsp->vfs_bsize;
 979 
 980         /* loop around and do the write in MAXBSIZE chunks */
 981         do {
 982                 /* mapping offset */
 983                 off = uiop->uio_loffset & (offset_t)MAXBMASK;
 984                 on = uiop->uio_loffset & MAXBOFFSET; /* Rel. offset */
 985                 n = MAXBSIZE - on;
 986                 if (n > uiop->uio_resid)
 987                         n = (int)uiop->uio_resid;
 988 
 989                 /*
 990                  * Touch the page and fault it in if it is not in
 991                  * core before segmap_getmapflt can lock it. This
 992                  * is to avoid the deadlock if the buffer is mapped
 993                  * to the same file through mmap which we want to
 994                  * write to.
 995                  */
 996                 uio_prefaultpages((long)n, uiop);
 997 
 998                 base = segmap_getmap(segkmap, vp, off);
 999                 error = cachefs_writepage(vp, (base + on), n, uiop);
1000                 if (error == 0) {
1001                         flags = 0;
1002                         /*
1003                          * Have written a whole block.Start an
1004                          * asynchronous write and mark the buffer to
1005                          * indicate that it won't be needed again
1006                          * soon.
1007                          */
1008                         if (n + on == bsize) {
1009                                 flags = SM_WRITE |SM_ASYNC |SM_DONTNEED;
1010                         }
1011 #if 0
1012                         /* XXX need to understand this */
1013                         if ((ioflag & (FSYNC|FDSYNC)) ||
1014                             (cp->c_backvp && vn_has_flocks(cp->c_backvp))) {
1015                                 flags &= ~SM_ASYNC;
1016                                 flags |= SM_WRITE;
1017                         }
1018 #else
1019                         if (ioflag & (FSYNC|FDSYNC)) {
1020                                 flags &= ~SM_ASYNC;
1021                                 flags |= SM_WRITE;
1022                         }
1023 #endif
1024                         error = segmap_release(segkmap, base, flags);
1025                 } else {
1026                         (void) segmap_release(segkmap, base, 0);
1027                 }
1028         } while (error == 0 && uiop->uio_resid > 0);
1029 
1030 out:
1031         if (error == EINTR && (ioflag & (FSYNC|FDSYNC))) {
1032                 uiop->uio_resid = resid;
1033                 uiop->uio_loffset = offset;
1034         } else
1035                 uiop->uio_resid += remainder;
1036 
1037 out2:
1038 #ifdef CFSDEBUG
1039         CFS_DEBUG(CFSDEBUG_VOPS)
1040                 printf("cachefs_write: EXIT error %d\n", error);
1041 #endif
1042         return (error);
1043 }
1044 
1045 /*
1046  * cachefs_write_backfs_nfsv4
1047  *
1048  * Call NFSv4 back filesystem to handle the write (cachefs
1049  * pass-through support for NFSv4).
1050  */
1051 static int
1052 cachefs_write_backfs_nfsv4(vnode_t *vp, uio_t *uiop, int ioflag, cred_t *cr,
1053                         caller_context_t *ct)
1054 {
1055         cnode_t *cp = VTOC(vp);
1056         fscache_t *fscp = C_TO_FSCACHE(cp);
1057         vnode_t *backvp;
1058         int error;
1059 
1060         /*
1061          * For NFSv4 pass-through to work, only connected operation
1062          * is supported, the cnode backvp must exist, and cachefs
1063          * optional (eg., disconnectable) flags are turned off. Assert
1064          * these conditions for the read operation.
1065          */
1066         CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
1067         CFS_BACKFS_NFSV4_ASSERT_CNODE(cp);
1068 
1069         /* Call backfs vnode op after extracting the backvp */
1070         mutex_enter(&cp->c_statelock);
1071         backvp = cp->c_backvp;
1072         mutex_exit(&cp->c_statelock);
1073 
1074         CFS_DPRINT_BACKFS_NFSV4(fscp, ("cachefs_write_backfs_nfsv4: cnode %p, "
1075             "backvp %p\n", cp, backvp));
1076         (void) VOP_RWLOCK(backvp, V_WRITELOCK_TRUE, ct);
1077         error = VOP_WRITE(backvp, uiop, ioflag, cr, ct);
1078         VOP_RWUNLOCK(backvp, V_WRITELOCK_TRUE, ct);
1079 
1080         return (error);
1081 }
1082 
1083 /*
1084  * see if we've charged ourselves for frontfile data at
1085  * the given offset.  If not, allocate a block for it now.
1086  */
1087 static int
1088 cachefs_charge_page(struct cnode *cp, u_offset_t offset)
1089 {
1090         u_offset_t blockoff;
1091         int error;
1092         int inc;
1093 
1094         ASSERT(MUTEX_HELD(&cp->c_statelock));
1095         /*LINTED*/
1096         ASSERT(PAGESIZE <= MAXBSIZE);
1097 
1098         error = 0;
1099         blockoff = offset & (offset_t)MAXBMASK;
1100 
1101         /* get the front file if necessary so allocblocks works */
1102         if ((cp->c_frontvp == NULL) &&
1103             ((cp->c_flags & CN_NOCACHE) == 0)) {
1104                 (void) cachefs_getfrontfile(cp);
1105         }
1106         if (cp->c_flags & CN_NOCACHE)
1107                 return (1);
1108 
1109         if (cachefs_check_allocmap(cp, blockoff))
1110                 return (0);
1111 
1112         for (inc = PAGESIZE; inc < MAXBSIZE; inc += PAGESIZE)
1113                 if (cachefs_check_allocmap(cp, blockoff+inc))
1114                         return (0);
1115 
1116         error = cachefs_allocblocks(C_TO_FSCACHE(cp)->fs_cache, 1,
1117             cp->c_metadata.md_rltype);
1118         if (error == 0) {
1119                 cp->c_metadata.md_frontblks++;
1120                 cp->c_flags |= CN_UPDATED;
1121         }
1122         return (error);
1123 }
1124 
1125 /*
1126  * Called only by cachefs_write to write 1 page or less of data.
1127  *      base   - base address kernel addr space
1128  *      tcount - Total bytes to move - < MAXBSIZE
1129  */
1130 static int
1131 cachefs_writepage(vnode_t *vp, caddr_t base, int tcount, uio_t *uiop)
1132 {
1133         struct cnode *cp =  VTOC(vp);
1134         fscache_t *fscp = C_TO_FSCACHE(cp);
1135         register int n;
1136         register u_offset_t offset;
1137         int error = 0, terror;
1138         extern struct as kas;
1139         u_offset_t lastpage_off;
1140         int pagecreate = 0;
1141         int newpage;
1142 
1143 #ifdef CFSDEBUG
1144         CFS_DEBUG(CFSDEBUG_VOPS)
1145                 printf(
1146                     "cachefs_writepage: ENTER vp %p offset %llu len %ld\\\n",
1147                     (void *)vp, uiop->uio_loffset, uiop->uio_resid);
1148 #endif
1149 
1150         /*
1151          * Move bytes in PAGESIZE chunks. We must avoid spanning pages in
1152          * uiomove() because page faults may cause the cache to be invalidated
1153          * out from under us.
1154          */
1155         do {
1156                 offset = uiop->uio_loffset;
1157                 lastpage_off = (cp->c_size - 1) & (offset_t)PAGEMASK;
1158 
1159                 /*
1160                  * If not connected then need to make sure we have space
1161                  * to perform the write.  We could make this check
1162                  * a little tighter by only doing it if we are growing the file.
1163                  */
1164                 if (fscp->fs_cdconnected != CFS_CD_CONNECTED) {
1165                         error = cachefs_allocblocks(fscp->fs_cache, 1,
1166                             cp->c_metadata.md_rltype);
1167                         if (error)
1168                                 break;
1169                         cachefs_freeblocks(fscp->fs_cache, 1,
1170                             cp->c_metadata.md_rltype);
1171                 }
1172 
1173                 /*
1174                  * n is the number of bytes required to satisfy the request
1175                  * or the number of bytes to fill out the page.
1176                  */
1177                 n = (int)(PAGESIZE - ((uintptr_t)base & PAGEOFFSET));
1178                 if (n > tcount)
1179                         n = tcount;
1180 
1181                 /*
1182                  * The number of bytes of data in the last page can not
1183                  * be accurately be determined while page is being
1184                  * uiomove'd to and the size of the file being updated.
1185                  * Thus, inform threads which need to know accurately
1186                  * how much data is in the last page of the file.  They
1187                  * will not do the i/o immediately, but will arrange for
1188                  * the i/o to happen later when this modify operation
1189                  * will have finished.
1190                  *
1191                  * in similar NFS code, this is done right before the
1192                  * uiomove(), which is best.  but here in cachefs, we
1193                  * have two uiomove()s, so we must do it here.
1194                  */
1195                 ASSERT(!(cp->c_flags & CN_CMODINPROG));
1196                 mutex_enter(&cp->c_statelock);
1197                 cp->c_flags |= CN_CMODINPROG;
1198                 cp->c_modaddr = (offset & (offset_t)MAXBMASK);
1199                 mutex_exit(&cp->c_statelock);
1200 
1201                 /*
1202                  * Check to see if we can skip reading in the page
1203                  * and just allocate the memory.  We can do this
1204                  * if we are going to rewrite the entire mapping
1205                  * or if we are going to write to or beyond the current
1206                  * end of file from the beginning of the mapping.
1207                  */
1208                 if ((offset > (lastpage_off + PAGEOFFSET)) ||
1209                     ((cp->c_size == 0) && (offset < PAGESIZE)) ||
1210                     ((uintptr_t)base & PAGEOFFSET) == 0 && (n == PAGESIZE ||
1211                     ((offset + n) >= cp->c_size))) {
1212                         pagecreate = 1;
1213 
1214                         /*
1215                          * segmap_pagecreate() returns 1 if it calls
1216                          * page_create_va() to allocate any pages.
1217                          */
1218                         newpage = segmap_pagecreate(segkmap,
1219                             (caddr_t)((uintptr_t)base & (uintptr_t)PAGEMASK),
1220                             PAGESIZE, 0);
1221                         /* do not zero page if we are overwriting all of it */
1222                         if (!((((uintptr_t)base & PAGEOFFSET) == 0) &&
1223                             (n == PAGESIZE))) {
1224                                 (void) kzero((void *)
1225                                     ((uintptr_t)base & (uintptr_t)PAGEMASK),
1226                                     PAGESIZE);
1227                         }
1228                         error = uiomove(base, n, UIO_WRITE, uiop);
1229 
1230                         /*
1231                          * Unlock the page allocated by page_create_va()
1232                          * in segmap_pagecreate()
1233                          */
1234                         if (newpage)
1235                                 segmap_pageunlock(segkmap,
1236                                     (caddr_t)((uintptr_t)base &
1237                                     (uintptr_t)PAGEMASK),
1238                                     PAGESIZE, S_WRITE);
1239                 } else {
1240                         /*
1241                          * KLUDGE ! Use segmap_fault instead of faulting and
1242                          * using as_fault() to avoid a recursive readers lock
1243                          * on kas.
1244                          */
1245                         error = segmap_fault(kas.a_hat, segkmap, (caddr_t)
1246                             ((uintptr_t)base & (uintptr_t)PAGEMASK),
1247                             PAGESIZE, F_SOFTLOCK, S_WRITE);
1248                         if (error) {
1249                                 if (FC_CODE(error) == FC_OBJERR)
1250                                         error =  FC_ERRNO(error);
1251                                 else
1252                                         error = EIO;
1253                                 break;
1254                         }
1255                         error = uiomove(base, n, UIO_WRITE, uiop);
1256                         (void) segmap_fault(kas.a_hat, segkmap, (caddr_t)
1257                             ((uintptr_t)base & (uintptr_t)PAGEMASK),
1258                             PAGESIZE, F_SOFTUNLOCK, S_WRITE);
1259                 }
1260                 n = (int)(uiop->uio_loffset - offset); /* n = # bytes written */
1261                 base += n;
1262                 tcount -= n;
1263 
1264                 /* get access to the file system */
1265                 if ((terror = cachefs_cd_access(fscp, 0, 1)) != 0) {
1266                         error = terror;
1267                         break;
1268                 }
1269 
1270                 /*
1271                  * cp->c_attr.va_size is the maximum number of
1272                  * bytes known to be in the file.
1273                  * Make sure it is at least as high as the
1274                  * last byte we just wrote into the buffer.
1275                  */
1276                 mutex_enter(&cp->c_statelock);
1277                 if (cp->c_size < uiop->uio_loffset) {
1278                         cp->c_size = uiop->uio_loffset;
1279                 }
1280                 if (cp->c_size != cp->c_attr.va_size) {
1281                         cp->c_attr.va_size = cp->c_size;
1282                         cp->c_flags |= CN_UPDATED;
1283                 }
1284                 /* c_size is now correct, so we can clear modinprog */
1285                 cp->c_flags &= ~CN_CMODINPROG;
1286                 if (error == 0) {
1287                         cp->c_flags |= CDIRTY;
1288                         if (pagecreate && (cp->c_flags & CN_NOCACHE) == 0) {
1289                                 /*
1290                                  * if we're not in NOCACHE mode
1291                                  * (i.e., single-writer), we update the
1292                                  * allocmap here rather than waiting until
1293                                  * cachefspush is called.  This prevents
1294                                  * getpage from clustering up pages from
1295                                  * the backfile and stomping over the changes
1296                                  * we make here.
1297                                  */
1298                                 if (cachefs_charge_page(cp, offset) == 0) {
1299                                         cachefs_update_allocmap(cp,
1300                                             offset & (offset_t)PAGEMASK,
1301                                             (size_t)PAGESIZE);
1302                                 }
1303 
1304                                 /* else we ran out of space */
1305                                 else {
1306                                         /* nocache file if connected */
1307                                         if (fscp->fs_cdconnected ==
1308                                             CFS_CD_CONNECTED)
1309                                                 cachefs_nocache(cp);
1310                                         /*
1311                                          * If disconnected then cannot
1312                                          * nocache the file.  Let it have
1313                                          * the space.
1314                                          */
1315                                         else {
1316                                                 cp->c_metadata.md_frontblks++;
1317                                                 cp->c_flags |= CN_UPDATED;
1318                                                 cachefs_update_allocmap(cp,
1319                                                     offset & (offset_t)PAGEMASK,
1320                                                     (size_t)PAGESIZE);
1321                                         }
1322                                 }
1323                         }
1324                 }
1325                 mutex_exit(&cp->c_statelock);
1326                 cachefs_cd_release(fscp);
1327         } while (tcount > 0 && error == 0);
1328 
1329         if (cp->c_flags & CN_CMODINPROG) {
1330                 /* XXX assert error != 0?  FC_ERRNO() makes this more risky. */
1331                 mutex_enter(&cp->c_statelock);
1332                 cp->c_flags &= ~CN_CMODINPROG;
1333                 mutex_exit(&cp->c_statelock);
1334         }
1335 
1336 #ifdef CFS_CD_DEBUG
1337         ASSERT((curthread->t_flag & T_CD_HELD) == 0);
1338 #endif
1339 
1340 #ifdef CFSDEBUG
1341         CFS_DEBUG(CFSDEBUG_VOPS)
1342                 printf("cachefs_writepage: EXIT error %d\n", error);
1343 #endif
1344 
1345         return (error);
1346 }
1347 
1348 /*
1349  * Pushes out pages to the back and/or front file system.
1350  */
1351 static int
1352 cachefs_push(vnode_t *vp, page_t *pp, u_offset_t *offp, size_t *lenp,
1353     int flags, cred_t *cr)
1354 {
1355         struct cnode *cp = VTOC(vp);
1356         struct buf *bp;
1357         int error;
1358         fscache_t *fscp = C_TO_FSCACHE(cp);
1359         u_offset_t iooff;
1360         size_t iolen;
1361         u_offset_t lbn;
1362         u_offset_t lbn_off;
1363         uint_t bsize;
1364 
1365         ASSERT((flags & B_ASYNC) == 0);
1366         ASSERT(!vn_is_readonly(vp));
1367         ASSERT(pp != NULL);
1368         ASSERT(cr != NULL);
1369 
1370         bsize = MAX(vp->v_vfsp->vfs_bsize, PAGESIZE);
1371         lbn = pp->p_offset / bsize;
1372         lbn_off = lbn * bsize;
1373 
1374         /*
1375          * Find a kluster that fits in one block, or in
1376          * one page if pages are bigger than blocks.  If
1377          * there is less file space allocated than a whole
1378          * page, we'll shorten the i/o request below.
1379          */
1380 
1381         pp = pvn_write_kluster(vp, pp, &iooff, &iolen, lbn_off,
1382             roundup(bsize, PAGESIZE), flags);
1383 
1384         /*
1385          * The CN_CMODINPROG flag makes sure that we use a correct
1386          * value of c_size, below.  CN_CMODINPROG is set in
1387          * cachefs_writepage().  When CN_CMODINPROG is set it
1388          * indicates that a uiomove() is in progress and the c_size
1389          * has not been made consistent with the new size of the
1390          * file. When the uiomove() completes the c_size is updated
1391          * and the CN_CMODINPROG flag is cleared.
1392          *
1393          * The CN_CMODINPROG flag makes sure that cachefs_push_front
1394          * and cachefs_push_connected see a consistent value of
1395          * c_size.  Without this handshaking, it is possible that
1396          * these routines will pick up the old value of c_size before
1397          * the uiomove() in cachefs_writepage() completes.  This will
1398          * result in the vn_rdwr() being too small, and data loss.
1399          *
1400          * More precisely, there is a window between the time the
1401          * uiomove() completes and the time the c_size is updated. If
1402          * a VOP_PUTPAGE() operation intervenes in this window, the
1403          * page will be picked up, because it is dirty; it will be
1404          * unlocked, unless it was pagecreate'd. When the page is
1405          * picked up as dirty, the dirty bit is reset
1406          * (pvn_getdirty()). In cachefs_push_connected(), c_size is
1407          * checked.  This will still be the old size.  Therefore, the
1408          * page will not be written out to the correct length, and the
1409          * page will be clean, so the data may disappear.
1410          */
1411         if (cp->c_flags & CN_CMODINPROG) {
1412                 mutex_enter(&cp->c_statelock);
1413                 if ((cp->c_flags & CN_CMODINPROG) &&
1414                     cp->c_modaddr + MAXBSIZE > iooff &&
1415                     cp->c_modaddr < iooff + iolen) {
1416                         page_t *plist;
1417 
1418                         /*
1419                          * A write is in progress for this region of
1420                          * the file.  If we did not detect
1421                          * CN_CMODINPROG here then this path through
1422                          * cachefs_push_connected() would eventually
1423                          * do the vn_rdwr() and may not write out all
1424                          * of the data in the pages.  We end up losing
1425                          * data. So we decide to set the modified bit
1426                          * on each page in the page list and mark the
1427                          * cnode with CDIRTY.  This push will be
1428                          * restarted at some later time.
1429                          */
1430 
1431                         plist = pp;
1432                         while (plist != NULL) {
1433                                 pp = plist;
1434                                 page_sub(&plist, pp);
1435                                 hat_setmod(pp);
1436                                 page_io_unlock(pp);
1437                                 page_unlock(pp);
1438                         }
1439                         cp->c_flags |= CDIRTY;
1440                         mutex_exit(&cp->c_statelock);
1441                         if (offp)
1442                                 *offp = iooff;
1443                         if (lenp)
1444                                 *lenp = iolen;
1445                         return (0);
1446                 }
1447                 mutex_exit(&cp->c_statelock);
1448         }
1449 
1450         /*
1451          * Set the pages up for pageout.
1452          */
1453         bp = pageio_setup(pp, iolen, CTOV(cp), B_WRITE | flags);
1454         if (bp == NULL) {
1455 
1456                 /*
1457                  * currently, there is no way for pageio_setup() to
1458                  * return NULL, since it uses its own scheme for
1459                  * kmem_alloc()ing that shouldn't return NULL, and
1460                  * since pageio_setup() itself dereferences the thing
1461                  * it's about to return.  still, we need to be ready
1462                  * in case this ever does start happening.
1463                  */
1464 
1465                 error = ENOMEM;
1466                 goto writedone;
1467         }
1468         /*
1469          * pageio_setup should have set b_addr to 0.  This
1470          * is correct since we want to do I/O on a page
1471          * boundary.  bp_mapin will use this addr to calculate
1472          * an offset, and then set b_addr to the kernel virtual
1473          * address it allocated for us.
1474          */
1475         bp->b_edev = 0;
1476         bp->b_dev = 0;
1477         bp->b_lblkno = (diskaddr_t)lbtodb(iooff);
1478         bp_mapin(bp);
1479 
1480         iolen  = cp->c_size - ldbtob(bp->b_blkno);
1481         if (iolen > bp->b_bcount)
1482                 iolen  = bp->b_bcount;
1483 
1484         /* if connected */
1485         if (fscp->fs_cdconnected == CFS_CD_CONNECTED) {
1486                 /* write to the back file first */
1487                 error = cachefs_push_connected(vp, bp, iolen, iooff, cr);
1488 
1489                 /* write to the front file if allowed */
1490                 if ((error == 0) && CFS_ISFS_NONSHARED(fscp) &&
1491                     ((cp->c_flags & CN_NOCACHE) == 0)) {
1492                         /* try to write to the front file */
1493                         (void) cachefs_push_front(vp, bp, iolen, iooff, cr);
1494                 }
1495         }
1496 
1497         /* else if disconnected */
1498         else {
1499                 /* try to write to the front file */
1500                 error = cachefs_push_front(vp, bp, iolen, iooff, cr);
1501         }
1502 
1503         bp_mapout(bp);
1504         pageio_done(bp);
1505 
1506 writedone:
1507 
1508         pvn_write_done(pp, ((error) ? B_ERROR : 0) | B_WRITE | flags);
1509         if (offp)
1510                 *offp = iooff;
1511         if (lenp)
1512                 *lenp = iolen;
1513 
1514         /* XXX ask bob mastors how to fix this someday */
1515         mutex_enter(&cp->c_statelock);
1516         if (error) {
1517                 if (error == ENOSPC) {
1518                         if ((fscp->fs_cdconnected == CFS_CD_CONNECTED) ||
1519                             CFS_ISFS_SOFT(fscp)) {
1520                                 CFSOP_INVALIDATE_COBJECT(fscp, cp, cr);
1521                                 cp->c_error = error;
1522                         }
1523                 } else if ((CFS_TIMEOUT(fscp, error) == 0) &&
1524                     (error != EINTR)) {
1525                         CFSOP_INVALIDATE_COBJECT(fscp, cp, cr);
1526                         cp->c_error = error;
1527                 }
1528         } else if (fscp->fs_cdconnected == CFS_CD_CONNECTED) {
1529                 CFSOP_MODIFY_COBJECT(fscp, cp, cr);
1530         }
1531         mutex_exit(&cp->c_statelock);
1532 
1533         return (error);
1534 }
1535 
1536 /*
1537  * Pushes out pages to the back file system.
1538  */
1539 static int
1540 cachefs_push_connected(vnode_t *vp, struct buf *bp, size_t iolen,
1541     u_offset_t iooff, cred_t *cr)
1542 {
1543         struct cnode *cp = VTOC(vp);
1544         int error = 0;
1545         int mode = 0;
1546         fscache_t *fscp = C_TO_FSCACHE(cp);
1547         ssize_t resid;
1548         vnode_t *backvp;
1549 
1550         /* get the back file if necessary */
1551         mutex_enter(&cp->c_statelock);
1552         if (cp->c_backvp == NULL) {
1553                 error = cachefs_getbackvp(fscp, cp);
1554                 if (error) {
1555                         mutex_exit(&cp->c_statelock);
1556                         goto out;
1557                 }
1558         }
1559         backvp = cp->c_backvp;
1560         VN_HOLD(backvp);
1561         mutex_exit(&cp->c_statelock);
1562 
1563         if (CFS_ISFS_NONSHARED(fscp) && CFS_ISFS_SNR(fscp))
1564                 mode = FSYNC;
1565 
1566         /* write to the back file */
1567         error = bp->b_error = vn_rdwr(UIO_WRITE, backvp, bp->b_un.b_addr,
1568             iolen, iooff, UIO_SYSSPACE, mode,
1569             RLIM64_INFINITY, cr, &resid);
1570         if (error) {
1571 #ifdef CFSDEBUG
1572                 CFS_DEBUG(CFSDEBUG_VOPS | CFSDEBUG_BACK)
1573                         printf("cachefspush: error %d cr %p\n",
1574                             error, (void *)cr);
1575 #endif
1576                 bp->b_flags |= B_ERROR;
1577         }
1578         VN_RELE(backvp);
1579 out:
1580         return (error);
1581 }
1582 
1583 /*
1584  * Pushes out pages to the front file system.
1585  * Called for both connected and disconnected states.
1586  */
1587 static int
1588 cachefs_push_front(vnode_t *vp, struct buf *bp, size_t iolen,
1589     u_offset_t iooff, cred_t *cr)
1590 {
1591         struct cnode *cp = VTOC(vp);
1592         fscache_t *fscp = C_TO_FSCACHE(cp);
1593         int error = 0;
1594         ssize_t resid;
1595         u_offset_t popoff;
1596         off_t commit = 0;
1597         uint_t seq;
1598         enum cachefs_rl_type type;
1599         vnode_t *frontvp = NULL;
1600 
1601         mutex_enter(&cp->c_statelock);
1602 
1603         if (!CFS_ISFS_NONSHARED(fscp)) {
1604                 error = ETIMEDOUT;
1605                 goto out;
1606         }
1607 
1608         /* get the front file if necessary */
1609         if ((cp->c_frontvp == NULL) &&
1610             ((cp->c_flags & CN_NOCACHE) == 0)) {
1611                 (void) cachefs_getfrontfile(cp);
1612         }
1613         if (cp->c_flags & CN_NOCACHE) {
1614                 error = ETIMEDOUT;
1615                 goto out;
1616         }
1617 
1618         /* if disconnected, needs to be populated and have good attributes */
1619         if ((fscp->fs_cdconnected != CFS_CD_CONNECTED) &&
1620             (((cp->c_metadata.md_flags & MD_POPULATED) == 0) ||
1621             (cp->c_metadata.md_flags & MD_NEEDATTRS))) {
1622                 error = ETIMEDOUT;
1623                 goto out;
1624         }
1625 
1626         for (popoff = iooff; popoff < (iooff + iolen); popoff += MAXBSIZE) {
1627                 if (cachefs_charge_page(cp, popoff)) {
1628                         if (fscp->fs_cdconnected == CFS_CD_CONNECTED) {
1629                                 cachefs_nocache(cp);
1630                                 goto out;
1631                         } else {
1632                                 error = ENOSPC;
1633                                 goto out;
1634                         }
1635                 }
1636         }
1637 
1638         if (fscp->fs_cdconnected != CFS_CD_CONNECTED) {
1639                 /* log the first putpage to a file */
1640                 if ((cp->c_metadata.md_flags & MD_PUTPAGE) == 0) {
1641                         /* uses open's creds if we have them */
1642                         if (cp->c_cred)
1643                                 cr = cp->c_cred;
1644 
1645                         if ((cp->c_metadata.md_flags & MD_MAPPING) == 0) {
1646                                 error = cachefs_dlog_cidmap(fscp);
1647                                 if (error) {
1648                                         error = ENOSPC;
1649                                         goto out;
1650                                 }
1651                                 cp->c_metadata.md_flags |= MD_MAPPING;
1652                         }
1653 
1654                         commit = cachefs_dlog_modify(fscp, cp, cr, &seq);
1655                         if (commit == 0) {
1656                                 /* out of space */
1657                                 error = ENOSPC;
1658                                 goto out;
1659                         }
1660 
1661                         cp->c_metadata.md_seq = seq;
1662                         type = cp->c_metadata.md_rltype;
1663                         cachefs_modified(cp);
1664                         cp->c_metadata.md_flags |= MD_PUTPAGE;
1665                         cp->c_metadata.md_flags &= ~MD_PUSHDONE;
1666                         cp->c_flags |= CN_UPDATED;
1667                 }
1668 
1669                 /* subsequent putpages just get a new sequence number */
1670                 else {
1671                         /* but only if it matters */
1672                         if (cp->c_metadata.md_seq != fscp->fs_dlogseq) {
1673                                 seq = cachefs_dlog_seqnext(fscp);
1674                                 if (seq == 0) {
1675                                         error = ENOSPC;
1676                                         goto out;
1677                                 }
1678                                 cp->c_metadata.md_seq = seq;
1679                                 cp->c_flags |= CN_UPDATED;
1680                                 /* XXX maybe should do write_metadata here */
1681                         }
1682                 }
1683         }
1684 
1685         frontvp = cp->c_frontvp;
1686         VN_HOLD(frontvp);
1687         mutex_exit(&cp->c_statelock);
1688         error = bp->b_error = vn_rdwr(UIO_WRITE, frontvp,
1689             bp->b_un.b_addr, iolen, iooff, UIO_SYSSPACE, 0,
1690             RLIM64_INFINITY, kcred, &resid);
1691         mutex_enter(&cp->c_statelock);
1692         VN_RELE(frontvp);
1693         frontvp = NULL;
1694         if (error) {
1695                 if (fscp->fs_cdconnected == CFS_CD_CONNECTED) {
1696                         cachefs_nocache(cp);
1697                         error = 0;
1698                         goto out;
1699                 } else {
1700                         goto out;
1701                 }
1702         }
1703 
1704         (void) cachefs_update_allocmap(cp, iooff, iolen);
1705         cp->c_flags |= (CN_UPDATED | CN_NEED_FRONT_SYNC |
1706             CN_POPULATION_PENDING);
1707         if (fscp->fs_cdconnected != CFS_CD_CONNECTED) {
1708                 gethrestime(&cp->c_metadata.md_localmtime);
1709                 cp->c_metadata.md_flags |= MD_LOCALMTIME;
1710         }
1711 
1712 out:
1713         if (commit) {
1714                 /* commit the log record */
1715                 ASSERT(fscp->fs_cdconnected == CFS_CD_DISCONNECTED);
1716                 if (cachefs_dlog_commit(fscp, commit, error)) {
1717                         /*EMPTY*/
1718                         /* XXX fix on panic */
1719                 }
1720         }
1721 
1722         if (error && commit) {
1723                 cp->c_metadata.md_flags &= ~MD_PUTPAGE;
1724                 cachefs_rlent_moveto(fscp->fs_cache, type,
1725                     cp->c_metadata.md_rlno, cp->c_metadata.md_frontblks);
1726                 cp->c_metadata.md_rltype = type;
1727                 cp->c_flags |= CN_UPDATED;
1728         }
1729         mutex_exit(&cp->c_statelock);
1730         return (error);
1731 }
1732 
1733 /*ARGSUSED*/
1734 static int
1735 cachefs_dump(struct vnode *vp, caddr_t foo1, offset_t foo2, offset_t foo3,
1736     caller_context_t *ct)
1737 {
1738         return (ENOSYS); /* should we panic if we get here? */
1739 }
1740 
1741 /*ARGSUSED*/
1742 static int
1743 cachefs_ioctl(struct vnode *vp, int cmd, intptr_t arg, int flag, cred_t *cred,
1744         int *rvalp, caller_context_t *ct)
1745 {
1746         int error;
1747         struct cnode *cp = VTOC(vp);
1748         struct fscache *fscp = C_TO_FSCACHE(cp);
1749         struct cachefscache *cachep;
1750         extern kmutex_t cachefs_cachelock;
1751         extern cachefscache_t *cachefs_cachelist;
1752         cachefsio_pack_t *packp;
1753         STRUCT_DECL(cachefsio_dcmd, dcmd);
1754         int     inlen, outlen;  /* LP64: generic int for struct in/out len */
1755         void *dinp, *doutp;
1756         int (*dcmd_routine)(vnode_t *, void *, void *);
1757 
1758         if (getzoneid() != GLOBAL_ZONEID)
1759                 return (EPERM);
1760 
1761         /*
1762          * Cachefs only provides pass-through support for NFSv4,
1763          * and all vnode operations are passed through to the
1764          * back file system. For NFSv4 pass-through to work, only
1765          * connected operation is supported, the cnode backvp must
1766          * exist, and cachefs optional (eg., disconnectable) flags
1767          * are turned off. Assert these conditions which ensure
1768          * that only a subset of the ioctls are "truly supported"
1769          * for NFSv4 (these are CFSDCMD_DAEMONID and CFSDCMD_GETSTATS.
1770          * The packing operations are meaningless since there is
1771          * no caching for NFSv4, and the called functions silently
1772          * return if the backfilesystem is NFSv4. The daemon
1773          * commands except for those above are essentially used
1774          * for disconnectable operation support (including log
1775          * rolling), so in each called function, we assert that
1776          * NFSv4 is not in use. The _FIO* calls (except _FIOCOD)
1777          * are from "cfsfstype" which is not a documented
1778          * command. However, the command is visible in
1779          * /usr/lib/fs/cachefs so the commands are simply let
1780          * through (don't seem to impact pass-through functionality).
1781          */
1782         CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
1783         CFS_BACKFS_NFSV4_ASSERT_CNODE(cp);
1784 
1785         switch (cmd) {
1786         case CACHEFSIO_PACK:
1787                 packp = cachefs_kmem_alloc(sizeof (cachefsio_pack_t), KM_SLEEP);
1788                 error = xcopyin((void *)arg, packp, sizeof (cachefsio_pack_t));
1789                 if (!error)
1790                         error = cachefs_pack(vp, packp->p_name, cred);
1791                 cachefs_kmem_free(packp, sizeof (cachefsio_pack_t));
1792                 break;
1793 
1794         case CACHEFSIO_UNPACK:
1795                 packp = cachefs_kmem_alloc(sizeof (cachefsio_pack_t), KM_SLEEP);
1796                 error = xcopyin((void *)arg, packp, sizeof (cachefsio_pack_t));
1797                 if (!error)
1798                         error = cachefs_unpack(vp, packp->p_name, cred);
1799                 cachefs_kmem_free(packp, sizeof (cachefsio_pack_t));
1800                 break;
1801 
1802         case CACHEFSIO_PACKINFO:
1803                 packp = cachefs_kmem_alloc(sizeof (cachefsio_pack_t), KM_SLEEP);
1804                 error = xcopyin((void *)arg, packp, sizeof (cachefsio_pack_t));
1805                 if (!error)
1806                         error = cachefs_packinfo(vp, packp->p_name,
1807                             &packp->p_status, cred);
1808                 if (!error)
1809                         error = xcopyout(packp, (void *)arg,
1810                             sizeof (cachefsio_pack_t));
1811                 cachefs_kmem_free(packp, sizeof (cachefsio_pack_t));
1812                 break;
1813 
1814         case CACHEFSIO_UNPACKALL:
1815                 error = cachefs_unpackall(vp);
1816                 break;
1817 
1818         case CACHEFSIO_DCMD:
1819                 /*
1820                  * This is a private interface between the cachefsd and
1821                  * this file system.
1822                  */
1823 
1824                 /* must be root to use these commands */
1825                 if (secpolicy_fs_config(cred, vp->v_vfsp) != 0)
1826                         return (EPERM);
1827 
1828                 /* get the command packet */
1829                 STRUCT_INIT(dcmd, flag & DATAMODEL_MASK);
1830                 error = xcopyin((void *)arg, STRUCT_BUF(dcmd),
1831                     SIZEOF_STRUCT(cachefsio_dcmd, DATAMODEL_NATIVE));
1832                 if (error)
1833                         return (error);
1834 
1835                 /* copy in the data for the operation */
1836                 dinp = NULL;
1837                 if ((inlen = STRUCT_FGET(dcmd, d_slen)) > 0) {
1838                         dinp = cachefs_kmem_alloc(inlen, KM_SLEEP);
1839                         error = xcopyin(STRUCT_FGETP(dcmd, d_sdata), dinp,
1840                             inlen);
1841                         if (error)
1842                                 return (error);
1843                 }
1844 
1845                 /* allocate space for the result */
1846                 doutp = NULL;
1847                 if ((outlen = STRUCT_FGET(dcmd, d_rlen)) > 0)
1848                         doutp = cachefs_kmem_alloc(outlen, KM_SLEEP);
1849 
1850                 /*
1851                  * Assert NFSv4 only allows the daemonid and getstats
1852                  * daemon requests
1853                  */
1854                 ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0 ||
1855                     STRUCT_FGET(dcmd, d_cmd) == CFSDCMD_DAEMONID ||
1856                     STRUCT_FGET(dcmd, d_cmd) == CFSDCMD_GETSTATS);
1857 
1858                 /* get the routine to execute */
1859                 dcmd_routine = NULL;
1860                 switch (STRUCT_FGET(dcmd, d_cmd)) {
1861                 case CFSDCMD_DAEMONID:
1862                         dcmd_routine = cachefs_io_daemonid;
1863                         break;
1864                 case CFSDCMD_STATEGET:
1865                         dcmd_routine = cachefs_io_stateget;
1866                         break;
1867                 case CFSDCMD_STATESET:
1868                         dcmd_routine = cachefs_io_stateset;
1869                         break;
1870                 case CFSDCMD_XWAIT:
1871                         dcmd_routine = cachefs_io_xwait;
1872                         break;
1873                 case CFSDCMD_EXISTS:
1874                         dcmd_routine = cachefs_io_exists;
1875                         break;
1876                 case CFSDCMD_LOSTFOUND:
1877                         dcmd_routine = cachefs_io_lostfound;
1878                         break;
1879                 case CFSDCMD_GETINFO:
1880                         dcmd_routine = cachefs_io_getinfo;
1881                         break;
1882                 case CFSDCMD_CIDTOFID:
1883                         dcmd_routine = cachefs_io_cidtofid;
1884                         break;
1885                 case CFSDCMD_GETATTRFID:
1886                         dcmd_routine = cachefs_io_getattrfid;
1887                         break;
1888                 case CFSDCMD_GETATTRNAME:
1889                         dcmd_routine = cachefs_io_getattrname;
1890                         break;
1891                 case CFSDCMD_GETSTATS:
1892                         dcmd_routine = cachefs_io_getstats;
1893                         break;
1894                 case CFSDCMD_ROOTFID:
1895                         dcmd_routine = cachefs_io_rootfid;
1896                         break;
1897                 case CFSDCMD_CREATE:
1898                         dcmd_routine = cachefs_io_create;
1899                         break;
1900                 case CFSDCMD_REMOVE:
1901                         dcmd_routine = cachefs_io_remove;
1902                         break;
1903                 case CFSDCMD_LINK:
1904                         dcmd_routine = cachefs_io_link;
1905                         break;
1906                 case CFSDCMD_RENAME:
1907                         dcmd_routine = cachefs_io_rename;
1908                         break;
1909                 case CFSDCMD_MKDIR:
1910                         dcmd_routine = cachefs_io_mkdir;
1911                         break;
1912                 case CFSDCMD_RMDIR:
1913                         dcmd_routine = cachefs_io_rmdir;
1914                         break;
1915                 case CFSDCMD_SYMLINK:
1916                         dcmd_routine = cachefs_io_symlink;
1917                         break;
1918                 case CFSDCMD_SETATTR:
1919                         dcmd_routine = cachefs_io_setattr;
1920                         break;
1921                 case CFSDCMD_SETSECATTR:
1922                         dcmd_routine = cachefs_io_setsecattr;
1923                         break;
1924                 case CFSDCMD_PUSHBACK:
1925                         dcmd_routine = cachefs_io_pushback;
1926                         break;
1927                 default:
1928                         error = ENOTTY;
1929                         break;
1930                 }
1931 
1932                 /* execute the routine */
1933                 if (dcmd_routine)
1934                         error = (*dcmd_routine)(vp, dinp, doutp);
1935 
1936                 /* copy out the result */
1937                 if ((error == 0) && doutp)
1938                         error = xcopyout(doutp, STRUCT_FGETP(dcmd, d_rdata),
1939                             outlen);
1940 
1941                 /* free allocated memory */
1942                 if (dinp)
1943                         cachefs_kmem_free(dinp, inlen);
1944                 if (doutp)
1945                         cachefs_kmem_free(doutp, outlen);
1946 
1947                 break;
1948 
1949         case _FIOCOD:
1950                 if (secpolicy_fs_config(cred, vp->v_vfsp) != 0) {
1951                         error = EPERM;
1952                         break;
1953                 }
1954 
1955                 error = EBUSY;
1956                 if (arg) {
1957                         /* non-zero arg means do all filesystems */
1958                         mutex_enter(&cachefs_cachelock);
1959                         for (cachep = cachefs_cachelist; cachep != NULL;
1960                             cachep = cachep->c_next) {
1961                                 mutex_enter(&cachep->c_fslistlock);
1962                                 for (fscp = cachep->c_fslist;
1963                                     fscp != NULL;
1964                                     fscp = fscp->fs_next) {
1965                                         if (CFS_ISFS_CODCONST(fscp)) {
1966                                                 gethrestime(&fscp->fs_cod_time);
1967                                                 error = 0;
1968                                         }
1969                                 }
1970                                 mutex_exit(&cachep->c_fslistlock);
1971                         }
1972                         mutex_exit(&cachefs_cachelock);
1973                 } else {
1974                         if (CFS_ISFS_CODCONST(fscp)) {
1975                                 gethrestime(&fscp->fs_cod_time);
1976                                 error = 0;
1977                         }
1978                 }
1979                 break;
1980 
1981         case _FIOSTOPCACHE:
1982                 error = cachefs_stop_cache(cp);
1983                 break;
1984 
1985         default:
1986                 error = ENOTTY;
1987                 break;
1988         }
1989 
1990         /* return the result */
1991         return (error);
1992 }
1993 
1994 ino64_t
1995 cachefs_fileno_conflict(fscache_t *fscp, ino64_t old)
1996 {
1997         ino64_t new;
1998 
1999         ASSERT(MUTEX_HELD(&fscp->fs_fslock));
2000 
2001         for (;;) {
2002                 fscp->fs_info.fi_localfileno++;
2003                 if (fscp->fs_info.fi_localfileno == 0)
2004                         fscp->fs_info.fi_localfileno = 3;
2005                 fscp->fs_flags |= CFS_FS_DIRTYINFO;
2006 
2007                 new = fscp->fs_info.fi_localfileno;
2008                 if (! cachefs_fileno_inuse(fscp, new))
2009                         break;
2010         }
2011 
2012         cachefs_inum_register(fscp, old, new);
2013         cachefs_inum_register(fscp, new, 0);
2014         return (new);
2015 }
2016 
2017 /*ARGSUSED*/
2018 static int
2019 cachefs_getattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr,
2020         caller_context_t *ct)
2021 {
2022         struct cnode *cp = VTOC(vp);
2023         fscache_t *fscp = C_TO_FSCACHE(cp);
2024         int error = 0;
2025         int held = 0;
2026         int connected = 0;
2027 
2028 #ifdef CFSDEBUG
2029         CFS_DEBUG(CFSDEBUG_VOPS)
2030                 printf("cachefs_getattr: ENTER vp %p\n", (void *)vp);
2031 #endif
2032 
2033         if (getzoneid() != GLOBAL_ZONEID)
2034                 return (EPERM);
2035 
2036         /* Call backfilesystem getattr if NFSv4 */
2037         if (CFS_ISFS_BACKFS_NFSV4(fscp)) {
2038                 error = cachefs_getattr_backfs_nfsv4(vp, vap, flags, cr, ct);
2039                 goto out;
2040         }
2041 
2042         /*
2043          * If it has been specified that the return value will
2044          * just be used as a hint, and we are only being asked
2045          * for size, fsid or rdevid, then return the client's
2046          * notion of these values without checking to make sure
2047          * that the attribute cache is up to date.
2048          * The whole point is to avoid an over the wire GETATTR
2049          * call.
2050          */
2051         if (flags & ATTR_HINT) {
2052                 if (vap->va_mask ==
2053                     (vap->va_mask & (AT_SIZE | AT_FSID | AT_RDEV))) {
2054                         if (vap->va_mask | AT_SIZE)
2055                                 vap->va_size = cp->c_size;
2056                         /*
2057                          * Return the FSID of the cachefs filesystem,
2058                          * not the back filesystem
2059                          */
2060                         if (vap->va_mask | AT_FSID)
2061                                 vap->va_fsid = vp->v_vfsp->vfs_dev;
2062                         if (vap->va_mask | AT_RDEV)
2063                                 vap->va_rdev = cp->c_attr.va_rdev;
2064                         return (0);
2065                 }
2066         }
2067 
2068         /*
2069          * Only need to flush pages if asking for the mtime
2070          * and if there any dirty pages.
2071          */
2072         if (vap->va_mask & AT_MTIME) {
2073                 /*EMPTY*/
2074 #if 0
2075                 /*
2076                  * XXX bob: stolen from nfs code, need to do something similar
2077                  */
2078                 rp = VTOR(vp);
2079                 if ((rp->r_flags & RDIRTY) || rp->r_iocnt > 0)
2080                         (void) nfs3_putpage(vp, (offset_t)0, 0, 0, cr);
2081 #endif
2082         }
2083 
2084         for (;;) {
2085                 /* get (or renew) access to the file system */
2086                 if (held) {
2087                         cachefs_cd_release(fscp);
2088                         held = 0;
2089                 }
2090                 error = cachefs_cd_access(fscp, connected, 0);
2091                 if (error)
2092                         goto out;
2093                 held = 1;
2094 
2095                 /*
2096                  * If it has been specified that the return value will
2097                  * just be used as a hint, and we are only being asked
2098                  * for size, fsid or rdevid, then return the client's
2099                  * notion of these values without checking to make sure
2100                  * that the attribute cache is up to date.
2101                  * The whole point is to avoid an over the wire GETATTR
2102                  * call.
2103                  */
2104                 if (flags & ATTR_HINT) {
2105                         if (vap->va_mask ==
2106                             (vap->va_mask & (AT_SIZE | AT_FSID | AT_RDEV))) {
2107                                 if (vap->va_mask | AT_SIZE)
2108                                         vap->va_size = cp->c_size;
2109                                 /*
2110                                  * Return the FSID of the cachefs filesystem,
2111                                  * not the back filesystem
2112                                  */
2113                                 if (vap->va_mask | AT_FSID)
2114                                         vap->va_fsid = vp->v_vfsp->vfs_dev;
2115                                 if (vap->va_mask | AT_RDEV)
2116                                         vap->va_rdev = cp->c_attr.va_rdev;
2117                                 goto out;
2118                         }
2119                 }
2120 
2121                 mutex_enter(&cp->c_statelock);
2122                 if ((cp->c_metadata.md_flags & MD_NEEDATTRS) &&
2123                     (fscp->fs_cdconnected != CFS_CD_CONNECTED)) {
2124                         mutex_exit(&cp->c_statelock);
2125                         connected = 1;
2126                         continue;
2127                 }
2128 
2129                 error = CFSOP_CHECK_COBJECT(fscp, cp, 0, cr);
2130                 if (CFS_TIMEOUT(fscp, error)) {
2131                         mutex_exit(&cp->c_statelock);
2132                         cachefs_cd_release(fscp);
2133                         held = 0;
2134                         cachefs_cd_timedout(fscp);
2135                         continue;
2136                 }
2137                 if (error) {
2138                         mutex_exit(&cp->c_statelock);
2139                         break;
2140                 }
2141 
2142                 /* check for fileno conflict */
2143                 if ((fscp->fs_inum_size > 0) &&
2144                     ((cp->c_metadata.md_flags & MD_LOCALFILENO) == 0)) {
2145                         ino64_t fakenum;
2146 
2147                         mutex_exit(&cp->c_statelock);
2148                         mutex_enter(&fscp->fs_fslock);
2149                         fakenum = cachefs_inum_real2fake(fscp,
2150                             cp->c_attr.va_nodeid);
2151                         if (fakenum == 0) {
2152                                 fakenum = cachefs_fileno_conflict(fscp,
2153                                     cp->c_attr.va_nodeid);
2154                         }
2155                         mutex_exit(&fscp->fs_fslock);
2156 
2157                         mutex_enter(&cp->c_statelock);
2158                         cp->c_metadata.md_flags |= MD_LOCALFILENO;
2159                         cp->c_metadata.md_localfileno = fakenum;
2160                         cp->c_flags |= CN_UPDATED;
2161                 }
2162 
2163                 /* copy out the attributes */
2164                 *vap = cp->c_attr;
2165 
2166                 /*
2167                  * return the FSID of the cachefs filesystem,
2168                  * not the back filesystem
2169                  */
2170                 vap->va_fsid = vp->v_vfsp->vfs_dev;
2171 
2172                 /* return our idea of the size */
2173                 if (cp->c_size > vap->va_size)
2174                         vap->va_size = cp->c_size;
2175 
2176                 /* overwrite with our version of fileno and timestamps */
2177                 vap->va_nodeid = cp->c_metadata.md_localfileno;
2178                 vap->va_mtime = cp->c_metadata.md_localmtime;
2179                 vap->va_ctime = cp->c_metadata.md_localctime;
2180 
2181                 mutex_exit(&cp->c_statelock);
2182                 break;
2183         }
2184 out:
2185         if (held)
2186                 cachefs_cd_release(fscp);
2187 #ifdef CFS_CD_DEBUG
2188         ASSERT((curthread->t_flag & T_CD_HELD) == 0);
2189 #endif
2190 
2191 #ifdef CFSDEBUG
2192         CFS_DEBUG(CFSDEBUG_VOPS)
2193                 printf("cachefs_getattr: EXIT error = %d\n", error);
2194 #endif
2195         return (error);
2196 }
2197 
2198 /*
2199  * cachefs_getattr_backfs_nfsv4
2200  *
2201  * Call NFSv4 back filesystem to handle the getattr (cachefs
2202  * pass-through support for NFSv4).
2203  */
2204 static int
2205 cachefs_getattr_backfs_nfsv4(vnode_t *vp, vattr_t *vap,
2206     int flags, cred_t *cr, caller_context_t *ct)
2207 {
2208         cnode_t *cp = VTOC(vp);
2209         fscache_t *fscp = C_TO_FSCACHE(cp);
2210         vnode_t *backvp;
2211         int error;
2212 
2213         /*
2214          * For NFSv4 pass-through to work, only connected operation
2215          * is supported, the cnode backvp must exist, and cachefs
2216          * optional (eg., disconnectable) flags are turned off. Assert
2217          * these conditions for the getattr operation.
2218          */
2219         CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
2220         CFS_BACKFS_NFSV4_ASSERT_CNODE(cp);
2221 
2222         /* Call backfs vnode op after extracting backvp */
2223         mutex_enter(&cp->c_statelock);
2224         backvp = cp->c_backvp;
2225         mutex_exit(&cp->c_statelock);
2226 
2227         CFS_DPRINT_BACKFS_NFSV4(fscp, ("cachefs_getattr_backfs_nfsv4: cnode %p,"
2228             " backvp %p\n", cp, backvp));
2229         error = VOP_GETATTR(backvp, vap, flags, cr, ct);
2230 
2231         /* Update attributes */
2232         cp->c_attr = *vap;
2233 
2234         /*
2235          * return the FSID of the cachefs filesystem,
2236          * not the back filesystem
2237          */
2238         vap->va_fsid = vp->v_vfsp->vfs_dev;
2239 
2240         return (error);
2241 }
2242 
2243 /*ARGSUSED4*/
2244 static int
2245 cachefs_setattr(
2246         vnode_t *vp,
2247         vattr_t *vap,
2248         int flags,
2249         cred_t *cr,
2250         caller_context_t *ct)
2251 {
2252         cnode_t *cp = VTOC(vp);
2253         fscache_t *fscp = C_TO_FSCACHE(cp);
2254         int error;
2255         int connected;
2256         int held = 0;
2257 
2258         if (getzoneid() != GLOBAL_ZONEID)
2259                 return (EPERM);
2260 
2261         /*
2262          * Cachefs only provides pass-through support for NFSv4,
2263          * and all vnode operations are passed through to the
2264          * back file system. For NFSv4 pass-through to work, only
2265          * connected operation is supported, the cnode backvp must
2266          * exist, and cachefs optional (eg., disconnectable) flags
2267          * are turned off. Assert these conditions to ensure that
2268          * the backfilesystem is called for the setattr operation.
2269          */
2270         CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
2271         CFS_BACKFS_NFSV4_ASSERT_CNODE(cp);
2272 
2273         connected = 0;
2274         for (;;) {
2275                 /* drop hold on file system */
2276                 if (held) {
2277                         /* Won't loop with NFSv4 connected behavior */
2278                         ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
2279                         cachefs_cd_release(fscp);
2280                         held = 0;
2281                 }
2282 
2283                 /* acquire access to the file system */
2284                 error = cachefs_cd_access(fscp, connected, 1);
2285                 if (error)
2286                         break;
2287                 held = 1;
2288 
2289                 /* perform the setattr */
2290                 error = cachefs_setattr_common(vp, vap, flags, cr, ct);
2291                 if (error) {
2292                         /* if connected */
2293                         if (fscp->fs_cdconnected == CFS_CD_CONNECTED) {
2294                                 if (CFS_TIMEOUT(fscp, error)) {
2295                                         cachefs_cd_release(fscp);
2296                                         held = 0;
2297                                         cachefs_cd_timedout(fscp);
2298                                         connected = 0;
2299                                         continue;
2300                                 }
2301                         }
2302 
2303                         /* else must be disconnected */
2304                         else {
2305                                 if (CFS_TIMEOUT(fscp, error)) {
2306                                         connected = 1;
2307                                         continue;
2308                                 }
2309                         }
2310                 }
2311                 break;
2312         }
2313 
2314         if (held) {
2315                 cachefs_cd_release(fscp);
2316         }
2317 #ifdef CFS_CD_DEBUG
2318         ASSERT((curthread->t_flag & T_CD_HELD) == 0);
2319 #endif
2320         return (error);
2321 }
2322 
2323 static int
2324 cachefs_setattr_common(
2325         vnode_t *vp,
2326         vattr_t *vap,
2327         int flags,
2328         cred_t *cr,
2329         caller_context_t *ct)
2330 {
2331         cnode_t *cp = VTOC(vp);
2332         fscache_t *fscp = C_TO_FSCACHE(cp);
2333         cachefscache_t *cachep = fscp->fs_cache;
2334         uint_t mask = vap->va_mask;
2335         int error = 0;
2336         uint_t bcnt;
2337 
2338         /* Cannot set these attributes. */
2339         if (mask & AT_NOSET)
2340                 return (EINVAL);
2341 
2342         /*
2343          * Truncate file.  Must have write permission and not be a directory.
2344          */
2345         if (mask & AT_SIZE) {
2346                 if (vp->v_type == VDIR) {
2347                         if (CACHEFS_LOG_LOGGING(cachep, CACHEFS_LOG_TRUNCATE))
2348                                 cachefs_log_truncate(cachep, EISDIR,
2349                                     fscp->fs_cfsvfsp,
2350                                     &cp->c_metadata.md_cookie,
2351                                     cp->c_id.cid_fileno,
2352                                     crgetuid(cr), vap->va_size);
2353                         return (EISDIR);
2354                 }
2355         }
2356 
2357         /*
2358          * Gotta deal with one special case here, where we're setting the
2359          * size of the file. First, we zero out part of the page after the
2360          * new size of the file. Then we toss (not write) all pages after
2361          * page in which the new offset occurs. Note that the NULL passed
2362          * in instead of a putapage() fn parameter is correct, since
2363          * no dirty pages will be found (B_TRUNC | B_INVAL).
2364          */
2365 
2366         rw_enter(&cp->c_rwlock, RW_WRITER);
2367 
2368         /* sync dirty pages */
2369         if (!CFS_ISFS_BACKFS_NFSV4(fscp)) {
2370                 error = cachefs_putpage_common(vp, (offset_t)0, 0, 0, cr);
2371                 if (error == EINTR)
2372                         goto out;
2373         }
2374         error = 0;
2375 
2376         /* if connected */
2377         if (fscp->fs_cdconnected == CFS_CD_CONNECTED) {
2378                 error = cachefs_setattr_connected(vp, vap, flags, cr, ct);
2379         }
2380         /* else must be disconnected */
2381         else {
2382                 error = cachefs_setattr_disconnected(vp, vap, flags, cr, ct);
2383         }
2384         if (error)
2385                 goto out;
2386 
2387         /*
2388          * If the file size has been changed then
2389          * toss whole pages beyond the end of the file and zero
2390          * the portion of the last page that is beyond the end of the file.
2391          */
2392         if (mask & AT_SIZE && !CFS_ISFS_BACKFS_NFSV4(fscp)) {
2393                 bcnt = (uint_t)(cp->c_size & PAGEOFFSET);
2394                 if (bcnt)
2395                         pvn_vpzero(vp, cp->c_size, PAGESIZE - bcnt);
2396                 (void) pvn_vplist_dirty(vp, cp->c_size, cachefs_push,
2397                     B_TRUNC | B_INVAL, cr);
2398         }
2399 
2400 out:
2401         rw_exit(&cp->c_rwlock);
2402 
2403         if ((mask & AT_SIZE) &&
2404             (CACHEFS_LOG_LOGGING(cachep, CACHEFS_LOG_TRUNCATE)))
2405                 cachefs_log_truncate(cachep, error, fscp->fs_cfsvfsp,
2406                     &cp->c_metadata.md_cookie, cp->c_id.cid_fileno,
2407                     crgetuid(cr), vap->va_size);
2408 
2409         return (error);
2410 }
2411 
2412 static int
2413 cachefs_setattr_connected(
2414         vnode_t *vp,
2415         vattr_t *vap,
2416         int flags,
2417         cred_t *cr,
2418         caller_context_t *ct)
2419 {
2420         cnode_t *cp = VTOC(vp);
2421         fscache_t *fscp = C_TO_FSCACHE(cp);
2422         uint_t mask = vap->va_mask;
2423         int error = 0;
2424         int setsize;
2425 
2426         mutex_enter(&cp->c_statelock);
2427 
2428         if (cp->c_backvp == NULL) {
2429                 error = cachefs_getbackvp(fscp, cp);
2430                 if (error)
2431                         goto out;
2432         }
2433 
2434         error = CFSOP_CHECK_COBJECT(fscp, cp, 0, cr);
2435         if (error)
2436                 goto out;
2437 
2438         CFS_DPRINT_BACKFS_NFSV4(fscp, ("cachefs_setattr (nfsv4): cnode %p, "
2439             "backvp %p\n", cp, cp->c_backvp));
2440         error = VOP_SETATTR(cp->c_backvp, vap, flags, cr, ct);
2441         if (error) {
2442                 goto out;
2443         }
2444 
2445         /* if the size of the file is being changed */
2446         if (mask & AT_SIZE) {
2447                 cp->c_size = vap->va_size;
2448                 error = 0;
2449                 setsize = 0;
2450 
2451                 /* see if okay to try to set the file size */
2452                 if (((cp->c_flags & CN_NOCACHE) == 0) &&
2453                     CFS_ISFS_NONSHARED(fscp)) {
2454                         /* okay to set size if file is populated */
2455                         if (cp->c_metadata.md_flags & MD_POPULATED)
2456                                 setsize = 1;
2457 
2458                         /*
2459                          * Okay to set size if front file exists and setting
2460                          * file size to zero.
2461                          */
2462                         if ((cp->c_metadata.md_flags & MD_FILE) &&
2463                             (vap->va_size == 0))
2464                                 setsize = 1;
2465                 }
2466 
2467                 /* if okay to try to set the file size */
2468                 if (setsize) {
2469                         error = 0;
2470                         if (cp->c_frontvp == NULL)
2471                                 error = cachefs_getfrontfile(cp);
2472                         if (error == 0)
2473                                 error = cachefs_frontfile_size(cp, cp->c_size);
2474                 } else if (cp->c_metadata.md_flags & MD_FILE) {
2475                         /* make sure file gets nocached */
2476                         error = EEXIST;
2477                 }
2478 
2479                 /* if we have to nocache the file */
2480                 if (error) {
2481                         if ((cp->c_flags & CN_NOCACHE) == 0 &&
2482                             !CFS_ISFS_BACKFS_NFSV4(fscp))
2483                                 cachefs_nocache(cp);
2484                         error = 0;
2485                 }
2486         }
2487 
2488         cp->c_flags |= CN_UPDATED;
2489 
2490         /* XXX bob: given what modify_cobject does this seems unnecessary */
2491         cp->c_attr.va_mask = AT_ALL;
2492         error = VOP_GETATTR(cp->c_backvp, &cp->c_attr, 0, cr, ct);
2493         if (error)
2494                 goto out;
2495 
2496         cp->c_attr.va_size = MAX(cp->c_attr.va_size, cp->c_size);
2497         cp->c_size = cp->c_attr.va_size;
2498 
2499         CFSOP_MODIFY_COBJECT(fscp, cp, cr);
2500 out:
2501         mutex_exit(&cp->c_statelock);
2502         return (error);
2503 }
2504 
2505 /*
2506  * perform the setattr on the local file system
2507  */
2508 /*ARGSUSED4*/
2509 static int
2510 cachefs_setattr_disconnected(
2511         vnode_t *vp,
2512         vattr_t *vap,
2513         int flags,
2514         cred_t *cr,
2515         caller_context_t *ct)
2516 {
2517         cnode_t *cp = VTOC(vp);
2518         fscache_t *fscp = C_TO_FSCACHE(cp);
2519         int mask;
2520         int error;
2521         int newfile;
2522         off_t commit = 0;
2523 
2524         if (CFS_ISFS_WRITE_AROUND(fscp))
2525                 return (ETIMEDOUT);
2526 
2527         /* if we do not have good attributes */
2528         if (cp->c_metadata.md_flags & MD_NEEDATTRS)
2529                 return (ETIMEDOUT);
2530 
2531         /* primary concern is to keep this routine as much like ufs_setattr */
2532 
2533         mutex_enter(&cp->c_statelock);
2534 
2535         error = secpolicy_vnode_setattr(cr, vp, vap, &cp->c_attr, flags,
2536             cachefs_access_local, cp);
2537 
2538         if (error)
2539                 goto out;
2540 
2541         mask = vap->va_mask;
2542 
2543         /* if changing the size of the file */
2544         if (mask & AT_SIZE) {
2545                 if (vp->v_type == VDIR) {
2546                         error = EISDIR;
2547                         goto out;
2548                 }
2549 
2550                 if (vp->v_type == VFIFO) {
2551                         error = 0;
2552                         goto out;
2553                 }
2554 
2555                 if ((vp->v_type != VREG) &&
2556                     !((vp->v_type == VLNK) && (vap->va_size == 0))) {
2557                         error = EINVAL;
2558                         goto out;
2559                 }
2560 
2561                 if (vap->va_size > fscp->fs_offmax) {
2562                         error = EFBIG;
2563                         goto out;
2564                 }
2565 
2566                 /* if the file is not populated and we are not truncating it */
2567                 if (((cp->c_metadata.md_flags & MD_POPULATED) == 0) &&
2568                     (vap->va_size != 0)) {
2569                         error = ETIMEDOUT;
2570                         goto out;
2571                 }
2572 
2573                 if ((cp->c_metadata.md_flags & MD_MAPPING) == 0) {
2574                         error = cachefs_dlog_cidmap(fscp);
2575                         if (error) {
2576                                 error = ENOSPC;
2577                                 goto out;
2578                         }
2579                         cp->c_metadata.md_flags |= MD_MAPPING;
2580                 }
2581 
2582                 /* log the operation */
2583                 commit = cachefs_dlog_setattr(fscp, vap, flags, cp, cr);
2584                 if (commit == 0) {
2585                         error = ENOSPC;
2586                         goto out;
2587                 }
2588                 cp->c_flags &= ~CN_NOCACHE;
2589 
2590                 /* special case truncating fast sym links */
2591                 if ((vp->v_type == VLNK) &&
2592                     (cp->c_metadata.md_flags & MD_FASTSYMLNK)) {
2593                         /* XXX how can we get here */
2594                         /* XXX should update mtime */
2595                         cp->c_size = 0;
2596                         error = 0;
2597                         goto out;
2598                 }
2599 
2600                 /* get the front file, this may create one */
2601                 newfile = (cp->c_metadata.md_flags & MD_FILE) ? 0 : 1;
2602                 if (cp->c_frontvp == NULL) {
2603                         error = cachefs_getfrontfile(cp);
2604                         if (error)
2605                                 goto out;
2606                 }
2607                 ASSERT(cp->c_frontvp);
2608                 if (newfile && (cp->c_flags & CN_UPDATED)) {
2609                         /* allocate space for the metadata */
2610                         ASSERT((cp->c_flags & CN_ALLOC_PENDING) == 0);
2611                         ASSERT((cp->c_filegrp->fg_flags & CFS_FG_ALLOC_ATTR)
2612                             == 0);
2613                         error = filegrp_write_metadata(cp->c_filegrp,
2614                             &cp->c_id, &cp->c_metadata);
2615                         if (error)
2616                                 goto out;
2617                 }
2618 
2619                 /* change the size of the front file */
2620                 error = cachefs_frontfile_size(cp, vap->va_size);
2621                 if (error)
2622                         goto out;
2623                 cp->c_attr.va_size = cp->c_size = vap->va_size;
2624                 gethrestime(&cp->c_metadata.md_localmtime);
2625                 cp->c_metadata.md_flags |= MD_POPULATED | MD_LOCALMTIME;
2626                 cachefs_modified(cp);
2627                 cp->c_flags |= CN_UPDATED;
2628         }
2629 
2630         if (mask & AT_MODE) {
2631                 /* mark as modified */
2632                 if (cachefs_modified_alloc(cp)) {
2633                         error = ENOSPC;
2634                         goto out;
2635                 }
2636 
2637                 if ((cp->c_metadata.md_flags & MD_MAPPING) == 0) {
2638                         error = cachefs_dlog_cidmap(fscp);
2639                         if (error) {
2640                                 error = ENOSPC;
2641                                 goto out;
2642                         }
2643                         cp->c_metadata.md_flags |= MD_MAPPING;
2644                 }
2645 
2646                 /* log the operation if not already logged */
2647                 if (commit == 0) {
2648                         commit = cachefs_dlog_setattr(fscp, vap, flags, cp, cr);
2649                         if (commit == 0) {
2650                                 error = ENOSPC;
2651                                 goto out;
2652                         }
2653                 }
2654 
2655                 cp->c_attr.va_mode &= S_IFMT;
2656                 cp->c_attr.va_mode |= vap->va_mode & ~S_IFMT;
2657                 gethrestime(&cp->c_metadata.md_localctime);
2658                 cp->c_metadata.md_flags |= MD_LOCALCTIME;
2659                 cp->c_flags |= CN_UPDATED;
2660         }
2661 
2662         if (mask & (AT_UID|AT_GID)) {
2663 
2664                 /* mark as modified */
2665                 if (cachefs_modified_alloc(cp)) {
2666                         error = ENOSPC;
2667                         goto out;
2668                 }
2669 
2670                 if ((cp->c_metadata.md_flags & MD_MAPPING) == 0) {
2671                         error = cachefs_dlog_cidmap(fscp);
2672                         if (error) {
2673                                 error = ENOSPC;
2674                                 goto out;
2675                         }
2676                         cp->c_metadata.md_flags |= MD_MAPPING;
2677                 }
2678 
2679                 /* log the operation if not already logged */
2680                 if (commit == 0) {
2681                         commit = cachefs_dlog_setattr(fscp, vap, flags, cp, cr);
2682                         if (commit == 0) {
2683                                 error = ENOSPC;
2684                                 goto out;
2685                         }
2686                 }
2687 
2688                 if (mask & AT_UID)
2689                         cp->c_attr.va_uid = vap->va_uid;
2690 
2691                 if (mask & AT_GID)
2692                         cp->c_attr.va_gid = vap->va_gid;
2693                 gethrestime(&cp->c_metadata.md_localctime);
2694                 cp->c_metadata.md_flags |= MD_LOCALCTIME;
2695                 cp->c_flags |= CN_UPDATED;
2696         }
2697 
2698 
2699         if (mask & (AT_MTIME|AT_ATIME)) {
2700                 /* mark as modified */
2701                 if (cachefs_modified_alloc(cp)) {
2702                         error = ENOSPC;
2703                         goto out;
2704                 }
2705 
2706                 if ((cp->c_metadata.md_flags & MD_MAPPING) == 0) {
2707                         error = cachefs_dlog_cidmap(fscp);
2708                         if (error) {
2709                                 error = ENOSPC;
2710                                 goto out;
2711                         }
2712                         cp->c_metadata.md_flags |= MD_MAPPING;
2713                 }
2714 
2715                 /* log the operation if not already logged */
2716                 if (commit == 0) {
2717                         commit = cachefs_dlog_setattr(fscp, vap, flags, cp, cr);
2718                         if (commit == 0) {
2719                                 error = ENOSPC;
2720                                 goto out;
2721                         }
2722                 }
2723 
2724                 if (mask & AT_MTIME) {
2725                         cp->c_metadata.md_localmtime = vap->va_mtime;
2726                         cp->c_metadata.md_flags |= MD_LOCALMTIME;
2727                 }
2728                 if (mask & AT_ATIME)
2729                         cp->c_attr.va_atime = vap->va_atime;
2730                 gethrestime(&cp->c_metadata.md_localctime);
2731                 cp->c_metadata.md_flags |= MD_LOCALCTIME;
2732                 cp->c_flags |= CN_UPDATED;
2733         }
2734 
2735 out:
2736         mutex_exit(&cp->c_statelock);
2737 
2738         /* commit the log entry */
2739         if (commit) {
2740                 if (cachefs_dlog_commit(fscp, commit, error)) {
2741                         /*EMPTY*/
2742                         /* XXX bob: fix on panic */
2743                 }
2744         }
2745         return (error);
2746 }
2747 
2748 /* ARGSUSED */
2749 static int
2750 cachefs_access(vnode_t *vp, int mode, int flags, cred_t *cr,
2751         caller_context_t *ct)
2752 {
2753         cnode_t *cp = VTOC(vp);
2754         fscache_t *fscp = C_TO_FSCACHE(cp);
2755         int error;
2756         int held = 0;
2757         int connected = 0;
2758 
2759 #ifdef CFSDEBUG
2760         CFS_DEBUG(CFSDEBUG_VOPS)
2761                 printf("cachefs_access: ENTER vp %p\n", (void *)vp);
2762 #endif
2763         if (getzoneid() != GLOBAL_ZONEID) {
2764                 error = EPERM;
2765                 goto out;
2766         }
2767 
2768         /*
2769          * Cachefs only provides pass-through support for NFSv4,
2770          * and all vnode operations are passed through to the
2771          * back file system. For NFSv4 pass-through to work, only
2772          * connected operation is supported, the cnode backvp must
2773          * exist, and cachefs optional (eg., disconnectable) flags
2774          * are turned off. Assert these conditions to ensure that
2775          * the backfilesystem is called for the access operation.
2776          */
2777         CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
2778         CFS_BACKFS_NFSV4_ASSERT_CNODE(cp);
2779 
2780         for (;;) {
2781                 /* get (or renew) access to the file system */
2782                 if (held) {
2783                         /* Won't loop with NFSv4 connected behavior */
2784                         ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
2785                         cachefs_cd_release(fscp);
2786                         held = 0;
2787                 }
2788                 error = cachefs_cd_access(fscp, connected, 0);
2789                 if (error)
2790                         break;
2791                 held = 1;
2792 
2793                 if (fscp->fs_cdconnected == CFS_CD_CONNECTED) {
2794                         error = cachefs_access_connected(vp, mode, flags,
2795                             cr);
2796                         if (CFS_TIMEOUT(fscp, error)) {
2797                                 cachefs_cd_release(fscp);
2798                                 held = 0;
2799                                 cachefs_cd_timedout(fscp);
2800                                 connected = 0;
2801                                 continue;
2802                         }
2803                 } else {
2804                         mutex_enter(&cp->c_statelock);
2805                         error = cachefs_access_local(cp, mode, cr);
2806                         mutex_exit(&cp->c_statelock);
2807                         if (CFS_TIMEOUT(fscp, error)) {
2808                                 if (cachefs_cd_access_miss(fscp)) {
2809                                         mutex_enter(&cp->c_statelock);
2810                                         if (cp->c_backvp == NULL) {
2811                                                 (void) cachefs_getbackvp(fscp,
2812                                                     cp);
2813                                         }
2814                                         mutex_exit(&cp->c_statelock);
2815                                         error = cachefs_access_connected(vp,
2816                                             mode, flags, cr);
2817                                         if (!CFS_TIMEOUT(fscp, error))
2818                                                 break;
2819                                         delay(5*hz);
2820                                         connected = 0;
2821                                         continue;
2822                                 }
2823                                 connected = 1;
2824                                 continue;
2825                         }
2826                 }
2827                 break;
2828         }
2829         if (held)
2830                 cachefs_cd_release(fscp);
2831 #ifdef CFS_CD_DEBUG
2832         ASSERT((curthread->t_flag & T_CD_HELD) == 0);
2833 #endif
2834 out:
2835 #ifdef CFSDEBUG
2836         CFS_DEBUG(CFSDEBUG_VOPS)
2837                 printf("cachefs_access: EXIT error = %d\n", error);
2838 #endif
2839         return (error);
2840 }
2841 
2842 static int
2843 cachefs_access_connected(struct vnode *vp, int mode, int flags, cred_t *cr)
2844 {
2845         cnode_t *cp = VTOC(vp);
2846         fscache_t *fscp = C_TO_FSCACHE(cp);
2847         int error = 0;
2848 
2849         mutex_enter(&cp->c_statelock);
2850 
2851         /* Make sure the cnode attrs are valid first. */
2852         error = CFSOP_CHECK_COBJECT(fscp, cp, 0, cr);
2853         if (error)
2854                 goto out;
2855 
2856         /* see if can do a local file system check */
2857         if ((fscp->fs_info.fi_mntflags & CFS_ACCESS_BACKFS) == 0 &&
2858             !CFS_ISFS_BACKFS_NFSV4(fscp)) {
2859                 error = cachefs_access_local(cp, mode, cr);
2860                 goto out;
2861         }
2862 
2863         /* else do a remote file system check */
2864         else {
2865                 if (cp->c_backvp == NULL) {
2866                         error = cachefs_getbackvp(fscp, cp);
2867                         if (error)
2868                                 goto out;
2869                 }
2870 
2871                 CFS_DPRINT_BACKFS_NFSV4(fscp,
2872                     ("cachefs_access (nfsv4): cnode %p, backvp %p\n",
2873                     cp, cp->c_backvp));
2874                 error = VOP_ACCESS(cp->c_backvp, mode, flags, cr, NULL);
2875 
2876                 /*
2877                  * even though we don't `need' the ACL to do access
2878                  * via the backvp, we should cache it here to make our
2879                  * behavior more reasonable if we go disconnected.
2880                  */
2881 
2882                 if (((fscp->fs_info.fi_mntflags & CFS_NOACL) == 0) &&
2883                     (cachefs_vtype_aclok(vp)) &&
2884                     ((cp->c_flags & CN_NOCACHE) == 0) &&
2885                     (!CFS_ISFS_BACKFS_NFSV4(fscp)) &&
2886                     ((cp->c_metadata.md_flags & MD_ACL) == 0))
2887                         (void) cachefs_cacheacl(cp, NULL);
2888         }
2889 out:
2890         /*
2891          * If NFS returned ESTALE, mark this cnode as stale, so that
2892          * the vn_open retry will read the file anew from backfs
2893          */
2894         if (error == ESTALE)
2895                 cachefs_cnode_stale(cp);
2896 
2897         mutex_exit(&cp->c_statelock);
2898         return (error);
2899 }
2900 
2901 /*
2902  * CFS has a fastsymlink scheme. If the size of the link is < C_FSL_SIZE, then
2903  * the link is placed in the metadata itself (no front file is allocated).
2904  */
2905 /*ARGSUSED*/
2906 static int
2907 cachefs_readlink(vnode_t *vp, uio_t *uiop, cred_t *cr, caller_context_t *ct)
2908 {
2909         int error = 0;
2910         cnode_t *cp = VTOC(vp);
2911         fscache_t *fscp = C_TO_FSCACHE(cp);
2912         cachefscache_t *cachep = fscp->fs_cache;
2913         int held = 0;
2914         int connected = 0;
2915 
2916         if (getzoneid() != GLOBAL_ZONEID)
2917                 return (EPERM);
2918 
2919         if (vp->v_type != VLNK)
2920                 return (EINVAL);
2921 
2922         /*
2923          * Cachefs only provides pass-through support for NFSv4,
2924          * and all vnode operations are passed through to the
2925          * back file system. For NFSv4 pass-through to work, only
2926          * connected operation is supported, the cnode backvp must
2927          * exist, and cachefs optional (eg., disconnectable) flags
2928          * are turned off. Assert these conditions to ensure that
2929          * the backfilesystem is called for the readlink operation.
2930          */
2931         CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
2932         CFS_BACKFS_NFSV4_ASSERT_CNODE(cp);
2933 
2934         for (;;) {
2935                 /* get (or renew) access to the file system */
2936                 if (held) {
2937                         /* Won't loop with NFSv4 connected behavior */
2938                         ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
2939                         cachefs_cd_release(fscp);
2940                         held = 0;
2941                 }
2942                 error = cachefs_cd_access(fscp, connected, 0);
2943                 if (error)
2944                         break;
2945                 held = 1;
2946 
2947                 if (fscp->fs_cdconnected == CFS_CD_CONNECTED) {
2948                         /*
2949                          * since readlink_connected will call stuffsymlink
2950                          * on success, have to serialize access
2951                          */
2952                         if (!rw_tryenter(&cp->c_rwlock, RW_WRITER)) {
2953                                 cachefs_cd_release(fscp);
2954                                 rw_enter(&cp->c_rwlock, RW_WRITER);
2955                                 error = cachefs_cd_access(fscp, connected, 0);
2956                                 if (error) {
2957                                         held = 0;
2958                                         rw_exit(&cp->c_rwlock);
2959                                         break;
2960                                 }
2961                         }
2962                         error = cachefs_readlink_connected(vp, uiop, cr);
2963                         rw_exit(&cp->c_rwlock);
2964                         if (CFS_TIMEOUT(fscp, error)) {
2965                                 cachefs_cd_release(fscp);
2966                                 held = 0;
2967                                 cachefs_cd_timedout(fscp);
2968                                 connected = 0;
2969                                 continue;
2970                         }
2971                 } else {
2972                         error = cachefs_readlink_disconnected(vp, uiop);
2973                         if (CFS_TIMEOUT(fscp, error)) {
2974                                 if (cachefs_cd_access_miss(fscp)) {
2975                                         /* as above */
2976                                         if (!rw_tryenter(&cp->c_rwlock,
2977                                             RW_WRITER)) {
2978                                                 cachefs_cd_release(fscp);
2979                                                 rw_enter(&cp->c_rwlock,
2980                                                     RW_WRITER);
2981                                                 error = cachefs_cd_access(fscp,
2982                                                     connected, 0);
2983                                                 if (error) {
2984                                                         held = 0;
2985                                                         rw_exit(&cp->c_rwlock);
2986                                                         break;
2987                                                 }
2988                                         }
2989                                         error = cachefs_readlink_connected(vp,
2990                                             uiop, cr);
2991                                         rw_exit(&cp->c_rwlock);
2992                                         if (!CFS_TIMEOUT(fscp, error))
2993                                                 break;
2994                                         delay(5*hz);
2995                                         connected = 0;
2996                                         continue;
2997                                 }
2998                                 connected = 1;
2999                                 continue;
3000                         }
3001                 }
3002                 break;
3003         }
3004         if (CACHEFS_LOG_LOGGING(cachep, CACHEFS_LOG_READLINK))
3005                 cachefs_log_readlink(cachep, error, fscp->fs_cfsvfsp,
3006                     &cp->c_metadata.md_cookie, cp->c_id.cid_fileno,
3007                     crgetuid(cr), cp->c_size);
3008 
3009         if (held)
3010                 cachefs_cd_release(fscp);
3011 #ifdef CFS_CD_DEBUG
3012         ASSERT((curthread->t_flag & T_CD_HELD) == 0);
3013 #endif
3014 
3015         /*
3016          * The over the wire error for attempting to readlink something
3017          * other than a symbolic link is ENXIO.  However, we need to
3018          * return EINVAL instead of ENXIO, so we map it here.
3019          */
3020         return (error == ENXIO ? EINVAL : error);
3021 }
3022 
3023 static int
3024 cachefs_readlink_connected(vnode_t *vp, uio_t *uiop, cred_t *cr)
3025 {
3026         int error;
3027         cnode_t *cp = VTOC(vp);
3028         fscache_t *fscp = C_TO_FSCACHE(cp);
3029         caddr_t buf;
3030         int buflen;
3031         int readcache = 0;
3032 
3033         mutex_enter(&cp->c_statelock);
3034 
3035         error = CFSOP_CHECK_COBJECT(fscp, cp, 0, cr);
3036         if (error)
3037                 goto out;
3038 
3039         /* if the sym link is cached as a fast sym link */
3040         if (cp->c_metadata.md_flags & MD_FASTSYMLNK) {
3041                 ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
3042                 error = uiomove(cp->c_metadata.md_allocinfo,
3043                     MIN(cp->c_size, uiop->uio_resid), UIO_READ, uiop);
3044 #ifdef CFSDEBUG
3045                 readcache = 1;
3046                 goto out;
3047 #else /* CFSDEBUG */
3048                 /* XXX KLUDGE! correct for insidious 0-len symlink */
3049                 if (cp->c_size != 0) {
3050                         readcache = 1;
3051                         goto out;
3052                 }
3053 #endif /* CFSDEBUG */
3054         }
3055 
3056         /* if the sym link is cached in a front file */
3057         if (cp->c_metadata.md_flags & MD_POPULATED) {
3058                 ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
3059                 ASSERT(cp->c_metadata.md_flags & MD_FILE);
3060                 if (cp->c_frontvp == NULL) {
3061                         (void) cachefs_getfrontfile(cp);
3062                 }
3063                 if (cp->c_metadata.md_flags & MD_POPULATED) {
3064                         /* read symlink data from frontfile */
3065                         uiop->uio_offset = 0;
3066                         (void) VOP_RWLOCK(cp->c_frontvp,
3067                             V_WRITELOCK_FALSE, NULL);
3068                         error = VOP_READ(cp->c_frontvp, uiop, 0, kcred, NULL);
3069                         VOP_RWUNLOCK(cp->c_frontvp, V_WRITELOCK_FALSE, NULL);
3070 
3071                         /* XXX KLUDGE! correct for insidious 0-len symlink */
3072                         if (cp->c_size != 0) {
3073                                 readcache = 1;
3074                                 goto out;
3075                         }
3076                 }
3077         }
3078 
3079         /* get the sym link contents from the back fs */
3080         error = cachefs_readlink_back(cp, cr, &buf, &buflen);
3081         if (error)
3082                 goto out;
3083 
3084         /* copy the contents out to the user */
3085         error = uiomove(buf, MIN(buflen, uiop->uio_resid), UIO_READ, uiop);
3086 
3087         /*
3088          * try to cache the sym link, note that its a noop if NOCACHE is set
3089          * or if NFSv4 pass-through is enabled.
3090          */
3091         if (cachefs_stuffsymlink(cp, buf, buflen)) {
3092                 cachefs_nocache(cp);
3093         }
3094 
3095         cachefs_kmem_free(buf, MAXPATHLEN);
3096 
3097 out:
3098         mutex_exit(&cp->c_statelock);
3099         if (error == 0) {
3100                 if (readcache)
3101                         fscp->fs_stats.st_hits++;
3102                 else
3103                         fscp->fs_stats.st_misses++;
3104         }
3105         return (error);
3106 }
3107 
3108 static int
3109 cachefs_readlink_disconnected(vnode_t *vp, uio_t *uiop)
3110 {
3111         int error;
3112         cnode_t *cp = VTOC(vp);
3113         fscache_t *fscp = C_TO_FSCACHE(cp);
3114         int readcache = 0;
3115 
3116         mutex_enter(&cp->c_statelock);
3117 
3118         /* if the sym link is cached as a fast sym link */
3119         if (cp->c_metadata.md_flags & MD_FASTSYMLNK) {
3120                 error = uiomove(cp->c_metadata.md_allocinfo,
3121                     MIN(cp->c_size, uiop->uio_resid), UIO_READ, uiop);
3122                 readcache = 1;
3123                 goto out;
3124         }
3125 
3126         /* if the sym link is cached in a front file */
3127         if (cp->c_metadata.md_flags & MD_POPULATED) {
3128                 ASSERT(cp->c_metadata.md_flags & MD_FILE);
3129                 if (cp->c_frontvp == NULL) {
3130                         (void) cachefs_getfrontfile(cp);
3131                 }
3132                 if (cp->c_metadata.md_flags & MD_POPULATED) {
3133                         /* read symlink data from frontfile */
3134                         uiop->uio_offset = 0;
3135                         (void) VOP_RWLOCK(cp->c_frontvp,
3136                             V_WRITELOCK_FALSE, NULL);
3137                         error = VOP_READ(cp->c_frontvp, uiop, 0, kcred, NULL);
3138                         VOP_RWUNLOCK(cp->c_frontvp, V_WRITELOCK_FALSE, NULL);
3139                         readcache = 1;
3140                         goto out;
3141                 }
3142         }
3143         error = ETIMEDOUT;
3144 
3145 out:
3146         mutex_exit(&cp->c_statelock);
3147         if (error == 0) {
3148                 if (readcache)
3149                         fscp->fs_stats.st_hits++;
3150                 else
3151                         fscp->fs_stats.st_misses++;
3152         }
3153         return (error);
3154 }
3155 
3156 /*ARGSUSED*/
3157 static int
3158 cachefs_fsync(vnode_t *vp, int syncflag, cred_t *cr, caller_context_t *ct)
3159 {
3160         cnode_t *cp = VTOC(vp);
3161         int error = 0;
3162         fscache_t *fscp = C_TO_FSCACHE(cp);
3163         int held = 0;
3164         int connected = 0;
3165 
3166 #ifdef CFSDEBUG
3167         CFS_DEBUG(CFSDEBUG_VOPS)
3168                 printf("cachefs_fsync: ENTER vp %p\n", (void *)vp);
3169 #endif
3170 
3171         if (getzoneid() != GLOBAL_ZONEID) {
3172                 error = EPERM;
3173                 goto out;
3174         }
3175 
3176         if (fscp->fs_backvfsp && fscp->fs_backvfsp->vfs_flag & VFS_RDONLY)
3177                 goto out;
3178 
3179         /*
3180          * Cachefs only provides pass-through support for NFSv4,
3181          * and all vnode operations are passed through to the
3182          * back file system. For NFSv4 pass-through to work, only
3183          * connected operation is supported, the cnode backvp must
3184          * exist, and cachefs optional (eg., disconnectable) flags
3185          * are turned off. Assert these conditions to ensure that
3186          * the backfilesystem is called for the fsync operation.
3187          */
3188         CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
3189         CFS_BACKFS_NFSV4_ASSERT_CNODE(cp);
3190 
3191         for (;;) {
3192                 /* get (or renew) access to the file system */
3193                 if (held) {
3194                         /* Won't loop with NFSv4 connected behavior */
3195                         ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
3196                         cachefs_cd_release(fscp);
3197                         held = 0;
3198                 }
3199                 error = cachefs_cd_access(fscp, connected, 1);
3200                 if (error)
3201                         break;
3202                 held = 1;
3203                 connected = 0;
3204 
3205                 /* if a regular file, write out the pages */
3206                 if ((vp->v_type == VREG) && vn_has_cached_data(vp) &&
3207                     !CFS_ISFS_BACKFS_NFSV4(fscp)) {
3208                         error = cachefs_putpage_common(vp, (offset_t)0,
3209                             0, 0, cr);
3210                         if (CFS_TIMEOUT(fscp, error)) {
3211                                 if (fscp->fs_cdconnected == CFS_CD_CONNECTED) {
3212                                         cachefs_cd_release(fscp);
3213                                         held = 0;
3214                                         cachefs_cd_timedout(fscp);
3215                                         continue;
3216                                 } else {
3217                                         connected = 1;
3218                                         continue;
3219                                 }
3220                         }
3221 
3222                         /* if no space left in cache, wait until connected */
3223                         if ((error == ENOSPC) &&
3224                             (fscp->fs_cdconnected != CFS_CD_CONNECTED)) {
3225                                 connected = 1;
3226                                 continue;
3227                         }
3228 
3229                         /* clear the cnode error if putpage worked */
3230                         if ((error == 0) && cp->c_error) {
3231                                 mutex_enter(&cp->c_statelock);
3232                                 cp->c_error = 0;
3233                                 mutex_exit(&cp->c_statelock);
3234                         }
3235 
3236                         if (error)
3237                                 break;
3238                 }
3239 
3240                 /* if connected, sync the backvp */
3241                 if ((fscp->fs_cdconnected == CFS_CD_CONNECTED) &&
3242                     cp->c_backvp) {
3243                         mutex_enter(&cp->c_statelock);
3244                         if (cp->c_backvp) {
3245                                 CFS_DPRINT_BACKFS_NFSV4(fscp,
3246                                     ("cachefs_fsync (nfsv4): cnode %p, "
3247                                     "backvp %p\n", cp, cp->c_backvp));
3248                                 error = VOP_FSYNC(cp->c_backvp, syncflag, cr,
3249                                     ct);
3250                                 if (CFS_TIMEOUT(fscp, error)) {
3251                                         mutex_exit(&cp->c_statelock);
3252                                         cachefs_cd_release(fscp);
3253                                         held = 0;
3254                                         cachefs_cd_timedout(fscp);
3255                                         continue;
3256                                 } else if (error && (error != EINTR))
3257                                         cp->c_error = error;
3258                         }
3259                         mutex_exit(&cp->c_statelock);
3260                 }
3261 
3262                 /* sync the metadata and the front file to the front fs */
3263                 if (!CFS_ISFS_BACKFS_NFSV4(fscp)) {
3264                         error = cachefs_sync_metadata(cp);
3265                         if (error &&
3266                             (fscp->fs_cdconnected == CFS_CD_CONNECTED))
3267                                 error = 0;
3268                 }
3269                 break;
3270         }
3271 
3272         if (error == 0)
3273                 error = cp->c_error;
3274 
3275         if (held)
3276                 cachefs_cd_release(fscp);
3277 
3278 out:
3279 #ifdef CFS_CD_DEBUG
3280         ASSERT((curthread->t_flag & T_CD_HELD) == 0);
3281 #endif
3282 
3283 #ifdef CFSDEBUG
3284         CFS_DEBUG(CFSDEBUG_VOPS)
3285                 printf("cachefs_fsync: EXIT vp %p\n", (void *)vp);
3286 #endif
3287         return (error);
3288 }
3289 
3290 /*
3291  * Called from cachefs_inactive(), to make sure all the data goes out to disk.
3292  */
3293 int
3294 cachefs_sync_metadata(cnode_t *cp)
3295 {
3296         int error = 0;
3297         struct filegrp *fgp;
3298         struct vattr va;
3299         fscache_t *fscp = C_TO_FSCACHE(cp);
3300 
3301 #ifdef CFSDEBUG
3302         CFS_DEBUG(CFSDEBUG_VOPS)
3303                 printf("c_sync_metadata: ENTER cp %p cflag %x\n",
3304                     (void *)cp, cp->c_flags);
3305 #endif
3306 
3307         mutex_enter(&cp->c_statelock);
3308         if ((cp->c_flags & CN_UPDATED) == 0)
3309                 goto out;
3310         if (cp->c_flags & (CN_STALE | CN_DESTROY))
3311                 goto out;
3312         fgp = cp->c_filegrp;
3313         if ((fgp->fg_flags & CFS_FG_WRITE) == 0)
3314                 goto out;
3315         if (CFS_ISFS_BACKFS_NFSV4(fscp))
3316                 goto out;
3317 
3318         if (fgp->fg_flags & CFS_FG_ALLOC_ATTR) {
3319                 mutex_exit(&cp->c_statelock);
3320                 error = filegrp_allocattr(fgp);
3321                 mutex_enter(&cp->c_statelock);
3322                 if (error) {
3323                         error = 0;
3324                         goto out;
3325                 }
3326         }
3327 
3328         if (cp->c_flags & CN_ALLOC_PENDING) {
3329                 error = filegrp_create_metadata(fgp, &cp->c_metadata,
3330                     &cp->c_id);
3331                 if (error)
3332                         goto out;
3333                 cp->c_flags &= ~CN_ALLOC_PENDING;
3334         }
3335 
3336         if (cp->c_flags & CN_NEED_FRONT_SYNC) {
3337                 if (cp->c_frontvp != NULL) {
3338                         error = VOP_FSYNC(cp->c_frontvp, FSYNC, kcred, NULL);
3339                         if (error) {
3340                                 cp->c_metadata.md_timestamp.tv_sec = 0;
3341                         } else {
3342                                 va.va_mask = AT_MTIME;
3343                                 error = VOP_GETATTR(cp->c_frontvp, &va, 0,
3344                                     kcred, NULL);
3345                                 if (error)
3346                                         goto out;
3347                                 cp->c_metadata.md_timestamp = va.va_mtime;
3348                                 cp->c_flags &=
3349                                     ~(CN_NEED_FRONT_SYNC |
3350                                     CN_POPULATION_PENDING);
3351                         }
3352                 } else {
3353                         cp->c_flags &=
3354                             ~(CN_NEED_FRONT_SYNC | CN_POPULATION_PENDING);
3355                 }
3356         }
3357 
3358         /*
3359          * XXX tony: How can CN_ALLOC_PENDING still be set??
3360          * XXX tony: How can CN_UPDATED not be set?????
3361          */
3362         if ((cp->c_flags & CN_ALLOC_PENDING) == 0 &&
3363             (cp->c_flags & CN_UPDATED)) {
3364                 error = filegrp_write_metadata(fgp, &cp->c_id,
3365                     &cp->c_metadata);
3366                 if (error)
3367                         goto out;
3368         }
3369 out:
3370         if (error) {
3371                 /* XXX modified files? */
3372                 if (cp->c_metadata.md_rlno) {
3373                         cachefs_removefrontfile(&cp->c_metadata,
3374                             &cp->c_id, fgp);
3375                         cachefs_rlent_moveto(C_TO_FSCACHE(cp)->fs_cache,
3376                             CACHEFS_RL_FREE, cp->c_metadata.md_rlno, 0);
3377                         cp->c_metadata.md_rlno = 0;
3378                         cp->c_metadata.md_rltype = CACHEFS_RL_NONE;
3379                         if (cp->c_frontvp) {
3380                                 VN_RELE(cp->c_frontvp);
3381                                 cp->c_frontvp = NULL;
3382                         }
3383                 }
3384                 if ((cp->c_flags & CN_ALLOC_PENDING) == 0)
3385                         (void) filegrp_destroy_metadata(fgp, &cp->c_id);
3386                 cp->c_flags |= CN_ALLOC_PENDING;
3387                 cachefs_nocache(cp);
3388         }
3389         /*
3390          * we clear the updated bit even on errors because a retry
3391          * will probably fail also.
3392          */
3393         cp->c_flags &= ~CN_UPDATED;
3394         mutex_exit(&cp->c_statelock);
3395 
3396 #ifdef CFSDEBUG
3397         CFS_DEBUG(CFSDEBUG_VOPS)
3398                 printf("c_sync_metadata: EXIT cp %p cflag %x\n",
3399                     (void *)cp, cp->c_flags);
3400 #endif
3401 
3402         return (error);
3403 }
3404 
3405 /*
3406  * This is the vop entry point for inactivating a vnode.
3407  * It just queues the request for the async thread which
3408  * calls cachefs_inactive.
3409  * Because of the dnlc, it is not safe to grab most locks here.
3410  */
3411 /*ARGSUSED*/
3412 static void
3413 cachefs_inactive(struct vnode *vp, cred_t *cr, caller_context_t *ct)
3414 {
3415         cnode_t *cp;
3416         struct cachefs_req *rp;
3417         fscache_t *fscp;
3418 
3419 #ifdef CFSDEBUG
3420         CFS_DEBUG(CFSDEBUG_VOPS)
3421                 printf("cachefs_inactive: ENTER vp %p\n", (void *)vp);
3422 #endif
3423 
3424         cp = VTOC(vp);
3425         fscp = C_TO_FSCACHE(cp);
3426 
3427         ASSERT((cp->c_flags & CN_IDLE) == 0);
3428 
3429         /*
3430          * Cachefs only provides pass-through support for NFSv4,
3431          * and all vnode operations are passed through to the
3432          * back file system. For NFSv4 pass-through to work, only
3433          * connected operation is supported, the cnode backvp must
3434          * exist, and cachefs optional (eg., disconnectable) flags
3435          * are turned off. Assert these conditions to ensure that
3436          * the backfilesystem is called for the inactive operation.
3437          */
3438         CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
3439         CFS_BACKFS_NFSV4_ASSERT_CNODE(cp);
3440 
3441         /* vn_rele() set the v_count == 1 */
3442 
3443         cp->c_ipending = 1;
3444 
3445         rp = kmem_cache_alloc(cachefs_req_cache, KM_SLEEP);
3446         rp->cfs_cmd = CFS_IDLE;
3447         rp->cfs_cr = cr;
3448         crhold(rp->cfs_cr);
3449         rp->cfs_req_u.cu_idle.ci_vp = vp;
3450         cachefs_addqueue(rp, &(C_TO_FSCACHE(cp)->fs_workq));
3451 
3452 #ifdef CFSDEBUG
3453         CFS_DEBUG(CFSDEBUG_VOPS)
3454                 printf("cachefs_inactive: EXIT vp %p\n", (void *)vp);
3455 #endif
3456 }
3457 
3458 /* ARGSUSED */
3459 static int
3460 cachefs_lookup(vnode_t *dvp, char *nm, vnode_t **vpp,
3461     struct pathname *pnp, int flags, vnode_t *rdir, cred_t *cr,
3462     caller_context_t *ct, int *direntflags, pathname_t *realpnp)
3463 
3464 {
3465         int error = 0;
3466         cnode_t *dcp = VTOC(dvp);
3467         fscache_t *fscp = C_TO_FSCACHE(dcp);
3468         int held = 0;
3469         int connected = 0;
3470 
3471 #ifdef CFSDEBUG
3472         CFS_DEBUG(CFSDEBUG_VOPS)
3473                 printf("cachefs_lookup: ENTER dvp %p nm %s\n", (void *)dvp, nm);
3474 #endif
3475 
3476         if (getzoneid() != GLOBAL_ZONEID) {
3477                 error = EPERM;
3478                 goto out;
3479         }
3480 
3481         /*
3482          * Cachefs only provides pass-through support for NFSv4,
3483          * and all vnode operations are passed through to the
3484          * back file system. For NFSv4 pass-through to work, only
3485          * connected operation is supported, the cnode backvp must
3486          * exist, and cachefs optional (eg., disconnectable) flags
3487          * are turned off. Assert these conditions to ensure that
3488          * the backfilesystem is called for the lookup operation.
3489          */
3490         CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
3491         CFS_BACKFS_NFSV4_ASSERT_CNODE(dcp);
3492 
3493         for (;;) {
3494                 /* get (or renew) access to the file system */
3495                 if (held) {
3496                         /* Won't loop with NFSv4 connected behavior */
3497                         ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
3498                         cachefs_cd_release(fscp);
3499                         held = 0;
3500                 }
3501                 error = cachefs_cd_access(fscp, connected, 0);
3502                 if (error)
3503                         break;
3504                 held = 1;
3505 
3506                 error = cachefs_lookup_common(dvp, nm, vpp, pnp,
3507                         flags, rdir, cr);
3508                 if (CFS_TIMEOUT(fscp, error)) {
3509                         if (fscp->fs_cdconnected == CFS_CD_CONNECTED) {
3510                                 cachefs_cd_release(fscp);
3511                                 held = 0;
3512                                 cachefs_cd_timedout(fscp);
3513                                 connected = 0;
3514                                 continue;
3515                         } else {
3516                                 if (cachefs_cd_access_miss(fscp)) {
3517                                         rw_enter(&dcp->c_rwlock, RW_READER);
3518                                         error = cachefs_lookup_back(dvp, nm,
3519                                             vpp, cr);
3520                                         rw_exit(&dcp->c_rwlock);
3521                                         if (!CFS_TIMEOUT(fscp, error))
3522                                                 break;
3523                                         delay(5*hz);
3524                                         connected = 0;
3525                                         continue;
3526                                 }
3527                                 connected = 1;
3528                                 continue;
3529                         }
3530                 }
3531                 break;
3532         }
3533         if (held)
3534                 cachefs_cd_release(fscp);
3535 
3536         if (error == 0 && IS_DEVVP(*vpp)) {
3537                 struct vnode *newvp;
3538                 newvp = specvp(*vpp, (*vpp)->v_rdev, (*vpp)->v_type, cr);
3539                 VN_RELE(*vpp);
3540                 if (newvp == NULL) {
3541                         error = ENOSYS;
3542                 } else {
3543                         *vpp = newvp;
3544                 }
3545         }
3546 
3547 #ifdef CFS_CD_DEBUG
3548         ASSERT((curthread->t_flag & T_CD_HELD) == 0);
3549 #endif
3550 out:
3551 #ifdef CFSDEBUG
3552         CFS_DEBUG(CFSDEBUG_VOPS)
3553                 printf("cachefs_lookup: EXIT error = %d\n", error);
3554 #endif
3555 
3556         return (error);
3557 }
3558 
3559 /* ARGSUSED */
3560 int
3561 cachefs_lookup_common(vnode_t *dvp, char *nm, vnode_t **vpp,
3562     struct pathname *pnp, int flags, vnode_t *rdir, cred_t *cr)
3563 {
3564         int error = 0;
3565         cnode_t *cp, *dcp = VTOC(dvp);
3566         fscache_t *fscp = C_TO_FSCACHE(dcp);
3567         struct fid cookie;
3568         u_offset_t d_offset;
3569         struct cachefs_req *rp;
3570         cfs_cid_t cid, dircid;
3571         uint_t flag;
3572         uint_t uncached = 0;
3573 
3574         *vpp = NULL;
3575 
3576         /*
3577          * If lookup is for "", just return dvp.  Don't need
3578          * to send it over the wire, look it up in the dnlc,
3579          * or perform any access checks.
3580          */
3581         if (*nm == '\0') {
3582                 VN_HOLD(dvp);
3583                 *vpp = dvp;
3584                 return (0);
3585         }
3586 
3587         /* can't do lookups in non-directories */
3588         if (dvp->v_type != VDIR)
3589                 return (ENOTDIR);
3590 
3591         /* perform access check, also does consistency check if connected */
3592         if (fscp->fs_cdconnected == CFS_CD_CONNECTED) {
3593                 error = cachefs_access_connected(dvp, VEXEC, 0, cr);
3594         } else {
3595                 mutex_enter(&dcp->c_statelock);
3596                 error = cachefs_access_local(dcp, VEXEC, cr);
3597                 mutex_exit(&dcp->c_statelock);
3598         }
3599         if (error)
3600                 return (error);
3601 
3602         /*
3603          * If lookup is for ".", just return dvp.  Don't need
3604          * to send it over the wire or look it up in the dnlc,
3605          * just need to check access.
3606          */
3607         if (strcmp(nm, ".") == 0) {
3608                 VN_HOLD(dvp);
3609                 *vpp = dvp;
3610                 return (0);
3611         }
3612 
3613         /* check the dnlc */
3614         *vpp = (vnode_t *)dnlc_lookup(dvp, nm);
3615         if (*vpp)
3616                 return (0);
3617 
3618         /* read lock the dir before starting the search */
3619         rw_enter(&dcp->c_rwlock, RW_READER);
3620 
3621         mutex_enter(&dcp->c_statelock);
3622         dircid = dcp->c_id;
3623 
3624         dcp->c_usage++;
3625 
3626         /* if front file is not usable, lookup on the back fs */
3627         if ((dcp->c_flags & (CN_NOCACHE | CN_ASYNC_POPULATE)) ||
3628             CFS_ISFS_BACKFS_NFSV4(fscp) ||
3629             ((dcp->c_filegrp->fg_flags & CFS_FG_READ) == 0)) {
3630                 mutex_exit(&dcp->c_statelock);
3631                 if (fscp->fs_cdconnected == CFS_CD_CONNECTED)
3632                         error = cachefs_lookup_back(dvp, nm, vpp, cr);
3633                 else
3634                         error = ETIMEDOUT;
3635                 goto out;
3636         }
3637 
3638         /* if the front file is not populated, try to populate it */
3639         if ((dcp->c_metadata.md_flags & MD_POPULATED) == 0) {
3640                 if (fscp->fs_cdconnected != CFS_CD_CONNECTED) {
3641                         error = ETIMEDOUT;
3642                         mutex_exit(&dcp->c_statelock);
3643                         goto out;
3644                 }
3645 
3646                 if (cachefs_async_okay()) {
3647                         /* cannot populate if cache is not writable */
3648                         ASSERT((dcp->c_flags &
3649                             (CN_ASYNC_POPULATE | CN_NOCACHE)) == 0);
3650                         dcp->c_flags |= CN_ASYNC_POPULATE;
3651 
3652                         rp = kmem_cache_alloc(cachefs_req_cache, KM_SLEEP);
3653                         rp->cfs_cmd = CFS_POPULATE;
3654                         rp->cfs_req_u.cu_populate.cpop_vp = dvp;
3655                         rp->cfs_cr = cr;
3656 
3657                         crhold(cr);
3658                         VN_HOLD(dvp);
3659 
3660                         cachefs_addqueue(rp, &fscp->fs_workq);
3661                 } else if (fscp->fs_info.fi_mntflags & CFS_NOACL) {
3662                         error = cachefs_dir_fill(dcp, cr);
3663                         if (error != 0) {
3664                                 mutex_exit(&dcp->c_statelock);
3665                                 goto out;
3666                         }
3667                 }
3668                 /* no populate if too many asyncs and we have to cache ACLs */
3669 
3670                 mutex_exit(&dcp->c_statelock);
3671 
3672                 if (fscp->fs_cdconnected == CFS_CD_CONNECTED)
3673                         error = cachefs_lookup_back(dvp, nm, vpp, cr);
3674                 else
3675                         error = ETIMEDOUT;
3676                 goto out;
3677         }
3678 
3679         /* by now we have a valid cached front file that we can search */
3680 
3681         ASSERT((dcp->c_flags & CN_ASYNC_POPULATE) == 0);
3682         error = cachefs_dir_look(dcp, nm, &cookie, &flag,
3683             &d_offset, &cid);
3684         mutex_exit(&dcp->c_statelock);
3685 
3686         if (error) {
3687                 /* if the entry does not have the fid, go get it */
3688                 if (error == EINVAL) {
3689                         if (fscp->fs_cdconnected == CFS_CD_CONNECTED)
3690                                 error = cachefs_lookup_back(dvp, nm, vpp, cr);
3691                         else
3692                                 error = ETIMEDOUT;
3693                 }
3694 
3695                 /* errors other than does not exist */
3696                 else if (error != ENOENT) {
3697                         if (fscp->fs_cdconnected == CFS_CD_CONNECTED)
3698                                 error = cachefs_lookup_back(dvp, nm, vpp, cr);
3699                         else
3700                                 error = ETIMEDOUT;
3701                 }
3702                 goto out;
3703         }
3704 
3705         /*
3706          * Else we found the entry in the cached directory.
3707          * Make a cnode for it.
3708          */
3709         error = cachefs_cnode_make(&cid, fscp, &cookie, NULL, NULL,
3710             cr, 0, &cp);
3711         if (error == ESTALE) {
3712                 ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
3713                 mutex_enter(&dcp->c_statelock);
3714                 cachefs_nocache(dcp);
3715                 mutex_exit(&dcp->c_statelock);
3716                 if (fscp->fs_cdconnected == CFS_CD_CONNECTED) {
3717                         error = cachefs_lookup_back(dvp, nm, vpp, cr);
3718                         uncached = 1;
3719                 } else
3720                         error = ETIMEDOUT;
3721         } else if (error == 0) {
3722                 *vpp = CTOV(cp);
3723         }
3724 
3725 out:
3726         if (error == 0) {
3727                 /* put the entry in the dnlc */
3728                 if (cachefs_dnlc)
3729                         dnlc_enter(dvp, nm, *vpp);
3730 
3731                 /* save the cid of the parent so can find the name */
3732                 cp = VTOC(*vpp);
3733                 if (bcmp(&cp->c_metadata.md_parent, &dircid,
3734                     sizeof (cfs_cid_t)) != 0) {
3735                         mutex_enter(&cp->c_statelock);
3736                         cp->c_metadata.md_parent = dircid;
3737                         cp->c_flags |= CN_UPDATED;
3738                         mutex_exit(&cp->c_statelock);
3739                 }
3740         }
3741 
3742         rw_exit(&dcp->c_rwlock);
3743         if (uncached && dcp->c_metadata.md_flags & MD_PACKED)
3744                 (void) cachefs_pack_common(dvp, cr);
3745         return (error);
3746 }
3747 
3748 /*
3749  * Called from cachefs_lookup_common when the back file system needs to be
3750  * examined to perform the lookup.
3751  */
3752 static int
3753 cachefs_lookup_back(vnode_t *dvp, char *nm, vnode_t **vpp,
3754     cred_t *cr)
3755 {
3756         int error = 0;
3757         cnode_t *cp, *dcp = VTOC(dvp);
3758         fscache_t *fscp = C_TO_FSCACHE(dcp);
3759         vnode_t *backvp = NULL;
3760         struct vattr va;
3761         struct fid cookie;
3762         cfs_cid_t cid;
3763         uint32_t valid_fid;
3764 
3765         mutex_enter(&dcp->c_statelock);
3766 
3767         /* do a lookup on the back FS to get the back vnode */
3768         if (dcp->c_backvp == NULL) {
3769                 error = cachefs_getbackvp(fscp, dcp);
3770                 if (error)
3771                         goto out;
3772         }
3773 
3774         CFS_DPRINT_BACKFS_NFSV4(fscp,
3775             ("cachefs_lookup (nfsv4): dcp %p, dbackvp %p, name %s\n",
3776             dcp, dcp->c_backvp, nm));
3777         error = VOP_LOOKUP(dcp->c_backvp, nm, &backvp, (struct pathname *)NULL,
3778             0, (vnode_t *)NULL, cr, NULL, NULL, NULL);
3779         if (error)
3780                 goto out;
3781         if (IS_DEVVP(backvp)) {
3782                 struct vnode *devvp = backvp;
3783 
3784                 if (VOP_REALVP(devvp, &backvp, NULL) == 0) {
3785                         VN_HOLD(backvp);
3786                         VN_RELE(devvp);
3787                 }
3788         }
3789 
3790         /* get the fid and attrs from the back fs */
3791         valid_fid = (CFS_ISFS_BACKFS_NFSV4(fscp) ? FALSE : TRUE);
3792         error = cachefs_getcookie(backvp, &cookie, &va, cr, valid_fid);
3793         if (error)
3794                 goto out;
3795 
3796         cid.cid_fileno = va.va_nodeid;
3797         cid.cid_flags = 0;
3798 
3799 #if 0
3800         /* XXX bob: this is probably no longer necessary */
3801         /* if the directory entry was incomplete, we can complete it now */
3802         if ((dcp->c_metadata.md_flags & MD_POPULATED) &&
3803             ((dcp->c_flags & CN_ASYNC_POPULATE) == 0) &&
3804             (dcp->c_filegrp->fg_flags & CFS_FG_WRITE)) {
3805                 cachefs_dir_modentry(dcp, d_offset, &cookie, &cid);
3806         }
3807 #endif
3808 
3809 out:
3810         mutex_exit(&dcp->c_statelock);
3811 
3812         /* create the cnode */
3813         if (error == 0) {
3814                 error = cachefs_cnode_make(&cid, fscp,
3815                     (valid_fid ? &cookie : NULL),
3816                     &va, backvp, cr, 0, &cp);
3817                 if (error == 0) {
3818                         *vpp = CTOV(cp);
3819                 }
3820         }
3821 
3822         if (backvp)
3823                 VN_RELE(backvp);
3824 
3825         return (error);
3826 }
3827 
3828 /*ARGSUSED7*/
3829 static int
3830 cachefs_create(vnode_t *dvp, char *nm, vattr_t *vap,
3831     vcexcl_t exclusive, int mode, vnode_t **vpp, cred_t *cr, int flag,
3832     caller_context_t *ct, vsecattr_t *vsecp)
3833 
3834 {
3835         cnode_t *dcp = VTOC(dvp);
3836         fscache_t *fscp = C_TO_FSCACHE(dcp);
3837         cachefscache_t *cachep = fscp->fs_cache;
3838         int error;
3839         int connected = 0;
3840         int held = 0;
3841 
3842 #ifdef CFSDEBUG
3843         CFS_DEBUG(CFSDEBUG_VOPS)
3844                 printf("cachefs_create: ENTER dvp %p, nm %s\n",
3845                     (void *)dvp, nm);
3846 #endif
3847         if (getzoneid() != GLOBAL_ZONEID) {
3848                 error = EPERM;
3849                 goto out;
3850         }
3851 
3852         /*
3853          * Cachefs only provides pass-through support for NFSv4,
3854          * and all vnode operations are passed through to the
3855          * back file system. For NFSv4 pass-through to work, only
3856          * connected operation is supported, the cnode backvp must
3857          * exist, and cachefs optional (eg., disconnectable) flags
3858          * are turned off. Assert these conditions to ensure that
3859          * the backfilesystem is called for the create operation.
3860          */
3861         CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
3862         CFS_BACKFS_NFSV4_ASSERT_CNODE(dcp);
3863 
3864         for (;;) {
3865                 /* get (or renew) access to the file system */
3866                 if (held) {
3867                         /* Won't loop with NFSv4 connected behavior */
3868                         ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
3869                         cachefs_cd_release(fscp);
3870                         held = 0;
3871                 }
3872                 error = cachefs_cd_access(fscp, connected, 1);
3873                 if (error)
3874                         break;
3875                 held = 1;
3876 
3877                 /*
3878                  * if we are connected, perform the remote portion of the
3879                  * create.
3880                  */
3881                 if (fscp->fs_cdconnected == CFS_CD_CONNECTED) {
3882                         error = cachefs_create_connected(dvp, nm, vap,
3883                             exclusive, mode, vpp, cr);
3884                         if (CFS_TIMEOUT(fscp, error)) {
3885                                 cachefs_cd_release(fscp);
3886                                 held = 0;
3887                                 cachefs_cd_timedout(fscp);
3888                                 connected = 0;
3889                                 continue;
3890                         } else if (error) {
3891                                 break;
3892                         }
3893                 }
3894 
3895                 /* else we must be disconnected */
3896                 else {
3897                         error = cachefs_create_disconnected(dvp, nm, vap,
3898                             exclusive, mode, vpp, cr);
3899                         if (CFS_TIMEOUT(fscp, error)) {
3900                                 connected = 1;
3901                                 continue;
3902                         } else if (error) {
3903                                 break;
3904                         }
3905                 }
3906                 break;
3907         }
3908 
3909         if (CACHEFS_LOG_LOGGING(cachep, CACHEFS_LOG_CREATE)) {
3910                 fid_t *fidp = NULL;
3911                 ino64_t fileno = 0;
3912                 cnode_t *cp = NULL;
3913                 if (error == 0)
3914                         cp = VTOC(*vpp);
3915 
3916                 if (cp != NULL) {
3917                         fidp = &cp->c_metadata.md_cookie;
3918                         fileno = cp->c_id.cid_fileno;
3919                 }
3920                 cachefs_log_create(cachep, error, fscp->fs_cfsvfsp,
3921                     fidp, fileno, crgetuid(cr));
3922         }
3923 
3924         if (held)
3925                 cachefs_cd_release(fscp);
3926 
3927         if (error == 0 && CFS_ISFS_NONSHARED(fscp))
3928                 (void) cachefs_pack(dvp, nm, cr);
3929         if (error == 0 && IS_DEVVP(*vpp)) {
3930                 struct vnode *spcvp;
3931 
3932                 spcvp = specvp(*vpp, (*vpp)->v_rdev, (*vpp)->v_type, cr);
3933                 VN_RELE(*vpp);
3934                 if (spcvp == NULL) {
3935                         error = ENOSYS;
3936                 } else {
3937                         *vpp = spcvp;
3938                 }
3939         }
3940 
3941 #ifdef CFS_CD_DEBUG
3942         ASSERT((curthread->t_flag & T_CD_HELD) == 0);
3943 #endif
3944 out:
3945 #ifdef CFSDEBUG
3946         CFS_DEBUG(CFSDEBUG_VOPS)
3947                 printf("cachefs_create: EXIT error %d\n", error);
3948 #endif
3949         return (error);
3950 }
3951 
3952 
3953 static int
3954 cachefs_create_connected(vnode_t *dvp, char *nm, vattr_t *vap,
3955     enum vcexcl exclusive, int mode, vnode_t **vpp, cred_t *cr)
3956 {
3957         cnode_t *dcp = VTOC(dvp);
3958         fscache_t *fscp = C_TO_FSCACHE(dcp);
3959         int error;
3960         vnode_t *tvp = NULL;
3961         vnode_t *devvp;
3962         fid_t cookie;
3963         vattr_t va;
3964         cnode_t *ncp;
3965         cfs_cid_t cid;
3966         vnode_t *vp;
3967         uint32_t valid_fid;
3968 
3969         /* special case if file already exists */
3970         error = cachefs_lookup_common(dvp, nm, &vp, NULL, 0, NULL, cr);
3971         if (CFS_TIMEOUT(fscp, error))
3972                 return (error);
3973         if (error == 0) {
3974                 if (exclusive == EXCL)
3975                         error = EEXIST;
3976                 else if (vp->v_type == VDIR && (mode & VWRITE))
3977                         error = EISDIR;
3978                 else if ((error =
3979                     cachefs_access_connected(vp, mode, 0, cr)) == 0) {
3980                         if ((vap->va_mask & AT_SIZE) && (vp->v_type == VREG)) {
3981                                 vap->va_mask = AT_SIZE;
3982                                 error = cachefs_setattr_common(vp, vap, 0,
3983                                     cr, NULL);
3984                         }
3985                 }
3986                 if (error) {
3987                         VN_RELE(vp);
3988                 } else
3989                         *vpp = vp;
3990                 return (error);
3991         }
3992 
3993         rw_enter(&dcp->c_rwlock, RW_WRITER);
3994         mutex_enter(&dcp->c_statelock);
3995 
3996         /* consistency check the directory */
3997         error = CFSOP_CHECK_COBJECT(fscp, dcp, 0, cr);
3998         if (error) {
3999                 mutex_exit(&dcp->c_statelock);
4000                 goto out;
4001         }
4002 
4003         /* get the backvp if necessary */
4004         if (dcp->c_backvp == NULL) {
4005                 error = cachefs_getbackvp(fscp, dcp);
4006                 if (error) {
4007                         mutex_exit(&dcp->c_statelock);
4008                         goto out;
4009                 }
4010         }
4011 
4012         /* create the file on the back fs */
4013         CFS_DPRINT_BACKFS_NFSV4(fscp,
4014             ("cachefs_create (nfsv4): dcp %p, dbackvp %p,"
4015             "name %s\n", dcp, dcp->c_backvp, nm));
4016         error = VOP_CREATE(dcp->c_backvp, nm, vap, exclusive, mode,
4017             &devvp, cr, 0, NULL, NULL);
4018         mutex_exit(&dcp->c_statelock);
4019         if (error)
4020                 goto out;
4021         if (VOP_REALVP(devvp, &tvp, NULL) == 0) {
4022                 VN_HOLD(tvp);
4023                 VN_RELE(devvp);
4024         } else {
4025                 tvp = devvp;
4026         }
4027 
4028         /* get the fid and attrs from the back fs */
4029         valid_fid = (CFS_ISFS_BACKFS_NFSV4(fscp) ? FALSE : TRUE);
4030         error = cachefs_getcookie(tvp, &cookie, &va, cr, valid_fid);
4031         if (error)
4032                 goto out;
4033 
4034         /* make the cnode */
4035         cid.cid_fileno = va.va_nodeid;
4036         cid.cid_flags = 0;
4037         error = cachefs_cnode_make(&cid, fscp, (valid_fid ? &cookie : NULL),
4038             &va, tvp, cr, 0, &ncp);
4039         if (error)
4040                 goto out;
4041 
4042         *vpp = CTOV(ncp);
4043 
4044         /* enter it in the parent directory */
4045         mutex_enter(&dcp->c_statelock);
4046         if (CFS_ISFS_NONSHARED(fscp) &&
4047             (dcp->c_metadata.md_flags & MD_POPULATED)) {
4048                 /* see if entry already exists */
4049                 ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
4050                 error = cachefs_dir_look(dcp, nm, NULL, NULL, NULL, NULL);
4051                 if (error == ENOENT) {
4052                         /* entry, does not exist, add the new file */
4053                         error = cachefs_dir_enter(dcp, nm, &ncp->c_cookie,
4054                             &ncp->c_id, SM_ASYNC);
4055                         if (error) {
4056                                 cachefs_nocache(dcp);
4057                                 error = 0;
4058                         }
4059                         /* XXX should this be done elsewhere, too? */
4060                         dnlc_enter(dvp, nm, *vpp);
4061                 } else {
4062                         /* entry exists or some other problem */
4063                         cachefs_nocache(dcp);
4064                         error = 0;
4065                 }
4066         }
4067         CFSOP_MODIFY_COBJECT(fscp, dcp, cr);
4068         mutex_exit(&dcp->c_statelock);
4069 
4070 out:
4071         rw_exit(&dcp->c_rwlock);
4072         if (tvp)
4073                 VN_RELE(tvp);
4074 
4075         return (error);
4076 }
4077 
4078 static int
4079 cachefs_create_disconnected(vnode_t *dvp, char *nm, vattr_t *vap,
4080         enum vcexcl exclusive, int mode, vnode_t **vpp, cred_t *cr)
4081 {
4082         cnode_t *dcp = VTOC(dvp);
4083         cnode_t *cp;
4084         cnode_t *ncp = NULL;
4085         vnode_t *vp;
4086         fscache_t *fscp = C_TO_FSCACHE(dcp);
4087         int error = 0;
4088         struct vattr va;
4089         timestruc_t current_time;
4090         off_t commit = 0;
4091         fid_t cookie;
4092         cfs_cid_t cid;
4093 
4094         rw_enter(&dcp->c_rwlock, RW_WRITER);
4095         mutex_enter(&dcp->c_statelock);
4096 
4097         /* give up if the directory is not populated */
4098         if ((dcp->c_metadata.md_flags & MD_POPULATED) == 0) {
4099                 mutex_exit(&dcp->c_statelock);
4100                 rw_exit(&dcp->c_rwlock);
4101                 return (ETIMEDOUT);
4102         }
4103 
4104         /* special case if file already exists */
4105         error = cachefs_dir_look(dcp, nm, &cookie, NULL, NULL, &cid);
4106         if (error == EINVAL) {
4107                 mutex_exit(&dcp->c_statelock);
4108                 rw_exit(&dcp->c_rwlock);
4109                 return (ETIMEDOUT);
4110         }
4111         if (error == 0) {
4112                 mutex_exit(&dcp->c_statelock);
4113                 rw_exit(&dcp->c_rwlock);
4114                 error = cachefs_cnode_make(&cid, fscp, &cookie, NULL, NULL,
4115                     cr, 0, &cp);
4116                 if (error) {
4117                         return (error);
4118                 }
4119                 vp = CTOV(cp);
4120 
4121                 if (cp->c_metadata.md_flags & MD_NEEDATTRS)
4122                         error = ETIMEDOUT;
4123                 else if (exclusive == EXCL)
4124                         error = EEXIST;
4125                 else if (vp->v_type == VDIR && (mode & VWRITE))
4126                         error = EISDIR;
4127                 else {
4128                         mutex_enter(&cp->c_statelock);
4129                         error = cachefs_access_local(cp, mode, cr);
4130                         mutex_exit(&cp->c_statelock);
4131                         if (!error) {
4132                                 if ((vap->va_mask & AT_SIZE) &&
4133                                     (vp->v_type == VREG)) {
4134                                         vap->va_mask = AT_SIZE;
4135                                         error = cachefs_setattr_common(vp,
4136                                             vap, 0, cr, NULL);
4137                                 }
4138                         }
4139                 }
4140                 if (error) {
4141                         VN_RELE(vp);
4142                 } else
4143                         *vpp = vp;
4144                 return (error);
4145         }
4146 
4147         /* give up if cannot modify the cache */
4148         if (CFS_ISFS_WRITE_AROUND(fscp)) {
4149                 mutex_exit(&dcp->c_statelock);
4150                 error = ETIMEDOUT;
4151                 goto out;
4152         }
4153 
4154         /* check access */
4155         if (error = cachefs_access_local(dcp, VWRITE, cr)) {
4156                 mutex_exit(&dcp->c_statelock);
4157                 goto out;
4158         }
4159 
4160         /* mark dir as modified */
4161         cachefs_modified(dcp);
4162         mutex_exit(&dcp->c_statelock);
4163 
4164         /* must be privileged to set sticky bit */
4165         if ((vap->va_mode & VSVTX) && secpolicy_vnode_stky_modify(cr) != 0)
4166                 vap->va_mode &= ~VSVTX;
4167 
4168         /* make up a reasonable set of attributes */
4169         cachefs_attr_setup(vap, &va, dcp, cr);
4170 
4171         /* create the cnode */
4172         error = cachefs_cnode_create(fscp, &va, 0, &ncp);
4173         if (error)
4174                 goto out;
4175 
4176         mutex_enter(&ncp->c_statelock);
4177 
4178         /* get the front file now instead of later */
4179         if (vap->va_type == VREG) {
4180                 error = cachefs_getfrontfile(ncp);
4181                 if (error) {
4182                         mutex_exit(&ncp->c_statelock);
4183                         goto out;
4184                 }
4185                 ASSERT(ncp->c_frontvp != NULL);
4186                 ASSERT((ncp->c_flags & CN_ALLOC_PENDING) == 0);
4187                 ncp->c_metadata.md_flags |= MD_POPULATED;
4188         } else {
4189                 ASSERT(ncp->c_flags & CN_ALLOC_PENDING);
4190                 if (ncp->c_filegrp->fg_flags & CFS_FG_ALLOC_ATTR) {
4191                         (void) filegrp_allocattr(ncp->c_filegrp);
4192                 }
4193                 error = filegrp_create_metadata(ncp->c_filegrp,
4194                     &ncp->c_metadata, &ncp->c_id);
4195                 if (error) {
4196                         mutex_exit(&ncp->c_statelock);
4197                         goto out;
4198                 }
4199                 ncp->c_flags &= ~CN_ALLOC_PENDING;
4200         }
4201         mutex_enter(&dcp->c_statelock);
4202         cachefs_creategid(dcp, ncp, vap, cr);
4203         cachefs_createacl(dcp, ncp);
4204         mutex_exit(&dcp->c_statelock);
4205 
4206         /* set times on the file */
4207         gethrestime(&current_time);
4208         ncp->c_metadata.md_vattr.va_atime = current_time;
4209         ncp->c_metadata.md_localctime = current_time;
4210         ncp->c_metadata.md_localmtime = current_time;
4211         ncp->c_metadata.md_flags |= MD_LOCALMTIME | MD_LOCALCTIME;
4212 
4213         /* reserve space for the daemon cid mapping */
4214         error = cachefs_dlog_cidmap(fscp);
4215         if (error) {
4216                 mutex_exit(&ncp->c_statelock);
4217                 goto out;
4218         }
4219         ncp->c_metadata.md_flags |= MD_MAPPING;
4220 
4221         /* mark the new file as modified */
4222         if (cachefs_modified_alloc(ncp)) {
4223                 mutex_exit(&ncp->c_statelock);
4224                 error = ENOSPC;
4225                 goto out;
4226         }
4227         ncp->c_flags |= CN_UPDATED;
4228 
4229         /*
4230          * write the metadata now rather than waiting until
4231          * inactive so that if there's no space we can let
4232          * the caller know.
4233          */
4234         ASSERT((ncp->c_flags & CN_ALLOC_PENDING) == 0);
4235         ASSERT((ncp->c_filegrp->fg_flags & CFS_FG_ALLOC_ATTR) == 0);
4236         error = filegrp_write_metadata(ncp->c_filegrp,
4237             &ncp->c_id, &ncp->c_metadata);
4238         if (error) {
4239                 mutex_exit(&ncp->c_statelock);
4240                 goto out;
4241         }
4242 
4243         /* log the operation */
4244         commit = cachefs_dlog_create(fscp, dcp, nm, vap, exclusive,
4245             mode, ncp, 0, cr);
4246         if (commit == 0) {
4247                 mutex_exit(&ncp->c_statelock);
4248                 error = ENOSPC;
4249                 goto out;
4250         }
4251 
4252         mutex_exit(&ncp->c_statelock);
4253 
4254         mutex_enter(&dcp->c_statelock);
4255 
4256         /* update parent dir times */
4257         dcp->c_metadata.md_localmtime = current_time;
4258         dcp->c_metadata.md_flags |= MD_LOCALMTIME;
4259         dcp->c_flags |= CN_UPDATED;
4260 
4261         /* enter new file name in the parent directory */
4262         if (dcp->c_metadata.md_flags & MD_POPULATED) {
4263                 error = cachefs_dir_enter(dcp, nm, &ncp->c_cookie,
4264                     &ncp->c_id, 0);
4265                 if (error) {
4266                         cachefs_nocache(dcp);
4267                         mutex_exit(&dcp->c_statelock);
4268                         error = ETIMEDOUT;
4269                         goto out;
4270                 }
4271                 dnlc_enter(dvp, nm, CTOV(ncp));
4272         } else {
4273                 mutex_exit(&dcp->c_statelock);
4274                 error = ETIMEDOUT;
4275                 goto out;
4276         }
4277         mutex_exit(&dcp->c_statelock);
4278 
4279 out:
4280         rw_exit(&dcp->c_rwlock);
4281 
4282         if (commit) {
4283                 if (cachefs_dlog_commit(fscp, commit, error)) {
4284                         /*EMPTY*/
4285                         /* XXX bob: fix on panic */
4286                 }
4287         }
4288         if (error) {
4289                 /* destroy the cnode we created */
4290                 if (ncp) {
4291                         mutex_enter(&ncp->c_statelock);
4292                         ncp->c_flags |= CN_DESTROY;
4293                         mutex_exit(&ncp->c_statelock);
4294                         VN_RELE(CTOV(ncp));
4295                 }
4296         } else {
4297                 *vpp = CTOV(ncp);
4298         }
4299         return (error);
4300 }
4301 
4302 /*ARGSUSED*/
4303 static int
4304 cachefs_remove(vnode_t *dvp, char *nm, cred_t *cr, caller_context_t *ct,
4305     int flags)
4306 {
4307         cnode_t *dcp = VTOC(dvp);
4308         fscache_t *fscp = C_TO_FSCACHE(dcp);
4309         cachefscache_t *cachep = fscp->fs_cache;
4310         int error = 0;
4311         int held = 0;
4312         int connected = 0;
4313         size_t namlen;
4314         vnode_t *vp = NULL;
4315         int vfslock = 0;
4316 
4317 #ifdef CFSDEBUG
4318         CFS_DEBUG(CFSDEBUG_VOPS)
4319                 printf("cachefs_remove: ENTER dvp %p name %s\n",
4320                     (void *)dvp, nm);
4321 #endif
4322         if (getzoneid() != GLOBAL_ZONEID) {
4323                 error = EPERM;
4324                 goto out;
4325         }
4326 
4327         if (fscp->fs_cache->c_flags & (CACHE_NOFILL | CACHE_NOCACHE))
4328                 ASSERT(dcp->c_flags & CN_NOCACHE);
4329 
4330         /*
4331          * Cachefs only provides pass-through support for NFSv4,
4332          * and all vnode operations are passed through to the
4333          * back file system. For NFSv4 pass-through to work, only
4334          * connected operation is supported, the cnode backvp must
4335          * exist, and cachefs optional (eg., disconnectable) flags
4336          * are turned off. Assert these conditions to ensure that
4337          * the backfilesystem is called for the remove operation.
4338          */
4339         CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
4340         CFS_BACKFS_NFSV4_ASSERT_CNODE(dcp);
4341 
4342         for (;;) {
4343                 if (vfslock) {
4344                         vn_vfsunlock(vp);
4345                         vfslock = 0;
4346                 }
4347                 if (vp) {
4348                         VN_RELE(vp);
4349                         vp = NULL;
4350                 }
4351 
4352                 /* get (or renew) access to the file system */
4353                 if (held) {
4354                         /* Won't loop with NFSv4 connected behavior */
4355                         ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
4356                         cachefs_cd_release(fscp);
4357                         held = 0;
4358                 }
4359                 error = cachefs_cd_access(fscp, connected, 1);
4360                 if (error)
4361                         break;
4362                 held = 1;
4363 
4364                 /* if disconnected, do some extra error checking */
4365                 if (fscp->fs_cdconnected != CFS_CD_CONNECTED) {
4366                         /* check permissions */
4367                         mutex_enter(&dcp->c_statelock);
4368                         error = cachefs_access_local(dcp, (VEXEC|VWRITE), cr);
4369                         mutex_exit(&dcp->c_statelock);
4370                         if (CFS_TIMEOUT(fscp, error)) {
4371                                 connected = 1;
4372                                 continue;
4373                         }
4374                         if (error)
4375                                 break;
4376 
4377                         namlen = strlen(nm);
4378                         if (namlen == 0) {
4379                                 error = EINVAL;
4380                                 break;
4381                         }
4382 
4383                         /* cannot remove . and .. */
4384                         if (nm[0] == '.') {
4385                                 if (namlen == 1) {
4386                                         error = EINVAL;
4387                                         break;
4388                                 } else if (namlen == 2 && nm[1] == '.') {
4389                                         error = EEXIST;
4390                                         break;
4391                                 }
4392                         }
4393 
4394                 }
4395 
4396                 /* get the cnode of the file to delete */
4397                 error = cachefs_lookup_common(dvp, nm, &vp, NULL, 0, NULL, cr);
4398                 if (error) {
4399                         if (fscp->fs_cdconnected == CFS_CD_CONNECTED) {
4400                                 if (CFS_TIMEOUT(fscp, error)) {
4401                                         cachefs_cd_release(fscp);
4402                                         held = 0;
4403                                         cachefs_cd_timedout(fscp);
4404                                         connected = 0;
4405                                         continue;
4406                                 }
4407                         } else {
4408                                 if (CFS_TIMEOUT(fscp, error)) {
4409                                         connected = 1;
4410                                         continue;
4411                                 }
4412                         }
4413                         if (CACHEFS_LOG_LOGGING(cachep, CACHEFS_LOG_REMOVE)) {
4414                                 struct fid foo;
4415 
4416                                 bzero(&foo, sizeof (foo));
4417                                 cachefs_log_remove(cachep, error,
4418                                     fscp->fs_cfsvfsp, &foo, 0, crgetuid(cr));
4419                         }
4420                         break;
4421                 }
4422 
4423                 if (vp->v_type == VDIR) {
4424                         /* must be privileged to remove dirs with unlink() */
4425                         if ((error = secpolicy_fs_linkdir(cr, vp->v_vfsp)) != 0)
4426                                 break;
4427 
4428                         /* see ufs_dirremove for why this is done, mount race */
4429                         if (vn_vfswlock(vp)) {
4430                                 error = EBUSY;
4431                                 break;
4432                         }
4433                         vfslock = 1;
4434                         if (vn_mountedvfs(vp) != NULL) {
4435                                 error = EBUSY;
4436                                 break;
4437                         }
4438                 }
4439 
4440                 if (fscp->fs_cdconnected == CFS_CD_CONNECTED) {
4441                         error = cachefs_remove_connected(dvp, nm, cr, vp);
4442                         if (CFS_TIMEOUT(fscp, error)) {
4443                                 cachefs_cd_release(fscp);
4444                                 held = 0;
4445                                 cachefs_cd_timedout(fscp);
4446                                 connected = 0;
4447                                 continue;
4448                         }
4449                 } else {
4450                         error = cachefs_remove_disconnected(dvp, nm, cr,
4451                             vp);
4452                         if (CFS_TIMEOUT(fscp, error)) {
4453                                 connected = 1;
4454                                 continue;
4455                         }
4456                 }
4457                 break;
4458         }
4459 
4460 #if 0
4461         if (CACHEFS_LOG_LOGGING(cachep, CACHEFS_LOG_REMOVE))
4462                 cachefs_log_remove(cachep, error, fscp->fs_cfsvfsp,
4463                     &cp->c_metadata.md_cookie, cp->c_id.cid_fileno,
4464                     crgetuid(cr));
4465 #endif
4466 
4467         if (held)
4468                 cachefs_cd_release(fscp);
4469 
4470         if (vfslock)
4471                 vn_vfsunlock(vp);
4472 
4473         if (vp)
4474                 VN_RELE(vp);
4475 
4476 #ifdef CFS_CD_DEBUG
4477         ASSERT((curthread->t_flag & T_CD_HELD) == 0);
4478 #endif
4479 out:
4480 #ifdef CFSDEBUG
4481         CFS_DEBUG(CFSDEBUG_VOPS)
4482                 printf("cachefs_remove: EXIT dvp %p\n", (void *)dvp);
4483 #endif
4484 
4485         return (error);
4486 }
4487 
4488 int
4489 cachefs_remove_connected(vnode_t *dvp, char *nm, cred_t *cr, vnode_t *vp)
4490 {
4491         cnode_t *dcp = VTOC(dvp);
4492         cnode_t *cp = VTOC(vp);
4493         fscache_t *fscp = C_TO_FSCACHE(dcp);
4494         int error = 0;
4495 
4496         /*
4497          * Acquire the rwlock (WRITER) on the directory to prevent other
4498          * activity on the directory.
4499          */
4500         rw_enter(&dcp->c_rwlock, RW_WRITER);
4501 
4502         /* purge dnlc of this entry so can get accurate vnode count */
4503         dnlc_purge_vp(vp);
4504 
4505         /*
4506          * If the cnode is active, make a link to the file
4507          * so operations on the file will continue.
4508          */
4509         if ((vp->v_type != VDIR) &&
4510             !((vp->v_count == 1) || ((vp->v_count == 2) && cp->c_ipending))) {
4511                 error = cachefs_remove_dolink(dvp, vp, nm, cr);
4512                 if (error)
4513                         goto out;
4514         }
4515 
4516         /* else call backfs NFSv4 handler if NFSv4 */
4517         else if (CFS_ISFS_BACKFS_NFSV4(fscp)) {
4518                 error = cachefs_remove_backfs_nfsv4(dvp, nm, cr, vp);
4519                 goto out;
4520         }
4521 
4522         /* else drop the backvp so nfs does not do rename */
4523         else if (cp->c_backvp) {
4524                 mutex_enter(&cp->c_statelock);
4525                 if (cp->c_backvp) {
4526                         VN_RELE(cp->c_backvp);
4527                         cp->c_backvp = NULL;
4528                 }
4529                 mutex_exit(&cp->c_statelock);
4530         }
4531 
4532         mutex_enter(&dcp->c_statelock);
4533 
4534         /* get the backvp */
4535         if (dcp->c_backvp == NULL) {
4536                 error = cachefs_getbackvp(fscp, dcp);
4537                 if (error) {
4538                         mutex_exit(&dcp->c_statelock);
4539                         goto out;
4540                 }
4541         }
4542 
4543         /* check directory consistency */
4544         error = CFSOP_CHECK_COBJECT(fscp, dcp, 0, cr);
4545         if (error) {
4546                 mutex_exit(&dcp->c_statelock);
4547                 goto out;
4548         }
4549 
4550         /* perform the remove on the back fs */
4551         error = VOP_REMOVE(dcp->c_backvp, nm, cr, NULL, 0);
4552         if (error) {
4553                 mutex_exit(&dcp->c_statelock);
4554                 goto out;
4555         }
4556 
4557         /* the dir has been modified */
4558         CFSOP_MODIFY_COBJECT(fscp, dcp, cr);
4559 
4560         /* remove the entry from the populated directory */
4561         if (CFS_ISFS_NONSHARED(fscp) &&
4562             (dcp->c_metadata.md_flags & MD_POPULATED)) {
4563                 error = cachefs_dir_rmentry(dcp, nm);
4564                 if (error) {
4565                         cachefs_nocache(dcp);
4566                         error = 0;
4567                 }
4568         }
4569         mutex_exit(&dcp->c_statelock);
4570 
4571         /* fix up the file we deleted */
4572         mutex_enter(&cp->c_statelock);
4573         if (cp->c_attr.va_nlink == 1)
4574                 cp->c_flags |= CN_DESTROY;
4575         else
4576                 cp->c_flags |= CN_UPDATED;
4577 
4578         cp->c_attr.va_nlink--;
4579         CFSOP_MODIFY_COBJECT(fscp, cp, cr);
4580         mutex_exit(&cp->c_statelock);
4581 
4582 out:
4583         rw_exit(&dcp->c_rwlock);
4584         return (error);
4585 }
4586 
4587 /*
4588  * cachefs_remove_backfs_nfsv4
4589  *
4590  * Call NFSv4 back filesystem to handle the remove (cachefs
4591  * pass-through support for NFSv4).
4592  */
4593 int
4594 cachefs_remove_backfs_nfsv4(vnode_t *dvp, char *nm, cred_t *cr, vnode_t *vp)
4595 {
4596         cnode_t *dcp = VTOC(dvp);
4597         cnode_t *cp = VTOC(vp);
4598         vnode_t *dbackvp;
4599         fscache_t *fscp = C_TO_FSCACHE(dcp);
4600         int error = 0;
4601 
4602         /*
4603          * For NFSv4 pass-through to work, only connected operation
4604          * is supported, the cnode backvp must exist, and cachefs
4605          * optional (eg., disconnectable) flags are turned off. Assert
4606          * these conditions for the getattr operation.
4607          */
4608         CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
4609         CFS_BACKFS_NFSV4_ASSERT_CNODE(cp);
4610 
4611         /* Should hold the directory readwrite lock to update directory */
4612         ASSERT(RW_WRITE_HELD(&dcp->c_rwlock));
4613 
4614         /*
4615          * Update attributes for directory. Note that
4616          * CFSOP_CHECK_COBJECT asserts for c_statelock being
4617          * held, so grab it before calling the routine.
4618          */
4619         mutex_enter(&dcp->c_statelock);
4620         error = CFSOP_CHECK_COBJECT(fscp, dcp, 0, cr);
4621         mutex_exit(&dcp->c_statelock);
4622         if (error)
4623                 goto out;
4624 
4625         /*
4626          * Update attributes for cp. Note that CFSOP_CHECK_COBJECT
4627          * asserts for c_statelock being held, so grab it before
4628          * calling the routine.
4629          */
4630         mutex_enter(&cp->c_statelock);
4631         error = CFSOP_CHECK_COBJECT(fscp, cp, 0, cr);
4632         if (error) {
4633                 mutex_exit(&cp->c_statelock);
4634                 goto out;
4635         }
4636 
4637         /*
4638          * Drop the backvp so nfs if the link count is 1 so that
4639          * nfs does not do rename. Ensure that we will destroy the cnode
4640          * since this cnode no longer contains the backvp. Note that we
4641          * maintain lock on this cnode to prevent change till the remove
4642          * completes, otherwise other operations will encounter an ESTALE
4643          * if they try to use the cnode with CN_DESTROY set (see
4644          * cachefs_get_backvp()), or change the state of the cnode
4645          * while we're removing it.
4646          */
4647         if (cp->c_attr.va_nlink == 1) {
4648                 /*
4649                  * The unldvp information is created for the case
4650                  * when there is more than one reference on the
4651                  * vnode when a remove operation is called. If the
4652                  * remove itself was holding a reference to the
4653                  * vnode, then a subsequent remove will remove the
4654                  * backvp, so we need to get rid of the unldvp
4655                  * before removing the backvp. An alternate would
4656                  * be to simply ignore the remove and let the
4657                  * inactivation routine do the deletion of the
4658                  * unldvp.
4659                  */
4660                 if (cp->c_unldvp) {
4661                         VN_RELE(cp->c_unldvp);
4662                         cachefs_kmem_free(cp->c_unlname, MAXNAMELEN);
4663                         crfree(cp->c_unlcred);
4664                         cp->c_unldvp = NULL;
4665                         cp->c_unlcred = NULL;
4666                 }
4667                 cp->c_flags |= CN_DESTROY;
4668                 cp->c_attr.va_nlink = 0;
4669                 VN_RELE(cp->c_backvp);
4670                 cp->c_backvp = NULL;
4671         }
4672 
4673         /* perform the remove on back fs after extracting directory backvp */
4674         mutex_enter(&dcp->c_statelock);
4675         dbackvp = dcp->c_backvp;
4676         mutex_exit(&dcp->c_statelock);
4677 
4678         CFS_DPRINT_BACKFS_NFSV4(fscp,
4679             ("cachefs_remove (nfsv4): dcp %p, dbackvp %p, name %s\n",
4680             dcp, dbackvp, nm));
4681         error = VOP_REMOVE(dbackvp, nm, cr, NULL, 0);
4682         if (error) {
4683                 mutex_exit(&cp->c_statelock);
4684                 goto out;
4685         }
4686 
4687         /* fix up the file we deleted, if not destroying the cnode */
4688         if ((cp->c_flags & CN_DESTROY) == 0) {
4689                 cp->c_attr.va_nlink--;
4690                 cp->c_flags |= CN_UPDATED;
4691         }
4692 
4693         mutex_exit(&cp->c_statelock);
4694 
4695 out:
4696         return (error);
4697 }
4698 
4699 int
4700 cachefs_remove_disconnected(vnode_t *dvp, char *nm, cred_t *cr,
4701     vnode_t *vp)
4702 {
4703         cnode_t *dcp = VTOC(dvp);
4704         cnode_t *cp = VTOC(vp);
4705         fscache_t *fscp = C_TO_FSCACHE(dcp);
4706         int error = 0;
4707         off_t commit = 0;
4708         timestruc_t current_time;
4709 
4710         if (CFS_ISFS_WRITE_AROUND(fscp))
4711                 return (ETIMEDOUT);
4712 
4713         if (cp->c_metadata.md_flags & MD_NEEDATTRS)
4714                 return (ETIMEDOUT);
4715 
4716         /*
4717          * Acquire the rwlock (WRITER) on the directory to prevent other
4718          * activity on the directory.
4719          */
4720         rw_enter(&dcp->c_rwlock, RW_WRITER);
4721 
4722         /* dir must be populated */
4723         if ((dcp->c_metadata.md_flags & MD_POPULATED) == 0) {
4724                 error = ETIMEDOUT;
4725                 goto out;
4726         }
4727 
4728         mutex_enter(&dcp->c_statelock);
4729         mutex_enter(&cp->c_statelock);
4730 
4731         error = cachefs_stickyrmchk(dcp, cp, cr);
4732 
4733         mutex_exit(&cp->c_statelock);
4734         mutex_exit(&dcp->c_statelock);
4735         if (error)
4736                 goto out;
4737 
4738         /* purge dnlc of this entry so can get accurate vnode count */
4739         dnlc_purge_vp(vp);
4740 
4741         /*
4742          * If the cnode is active, make a link to the file
4743          * so operations on the file will continue.
4744          */
4745         if ((vp->v_type != VDIR) &&
4746             !((vp->v_count == 1) || ((vp->v_count == 2) && cp->c_ipending))) {
4747                 error = cachefs_remove_dolink(dvp, vp, nm, cr);
4748                 if (error)
4749                         goto out;
4750         }
4751 
4752         if (cp->c_attr.va_nlink > 1) {
4753                 mutex_enter(&cp->c_statelock);
4754                 if (cachefs_modified_alloc(cp)) {
4755                         mutex_exit(&cp->c_statelock);
4756                         error = ENOSPC;
4757                         goto out;
4758                 }
4759                 if ((cp->c_metadata.md_flags & MD_MAPPING) == 0) {
4760                         error = cachefs_dlog_cidmap(fscp);
4761                         if (error) {
4762                                 mutex_exit(&cp->c_statelock);
4763                                 error = ENOSPC;
4764                                 goto out;
4765                         }
4766                         cp->c_metadata.md_flags |= MD_MAPPING;
4767                         cp->c_flags |= CN_UPDATED;
4768                 }
4769                 mutex_exit(&cp->c_statelock);
4770         }
4771 
4772         /* log the remove */
4773         commit = cachefs_dlog_remove(fscp, dcp, nm, cp, cr);
4774         if (commit == 0) {
4775                 error = ENOSPC;
4776                 goto out;
4777         }
4778 
4779         /* remove the file from the dir */
4780         mutex_enter(&dcp->c_statelock);
4781         if ((dcp->c_metadata.md_flags & MD_POPULATED) == 0) {
4782                 mutex_exit(&dcp->c_statelock);
4783                 error = ETIMEDOUT;
4784                 goto out;
4785 
4786         }
4787         cachefs_modified(dcp);
4788         error = cachefs_dir_rmentry(dcp, nm);
4789         if (error) {
4790                 mutex_exit(&dcp->c_statelock);
4791                 if (error == ENOTDIR)
4792                         error = ETIMEDOUT;
4793                 goto out;
4794         }
4795 
4796         /* update parent dir times */
4797         gethrestime(&current_time);
4798         dcp->c_metadata.md_localctime = current_time;
4799         dcp->c_metadata.md_localmtime = current_time;
4800         dcp->c_metadata.md_flags |= MD_LOCALCTIME | MD_LOCALMTIME;
4801         dcp->c_flags |= CN_UPDATED;
4802         mutex_exit(&dcp->c_statelock);
4803 
4804         /* adjust file we are deleting */
4805         mutex_enter(&cp->c_statelock);
4806         cp->c_attr.va_nlink--;
4807         cp->c_metadata.md_localctime = current_time;
4808         cp->c_metadata.md_flags |= MD_LOCALCTIME;
4809         if (cp->c_attr.va_nlink == 0) {
4810                 cp->c_flags |= CN_DESTROY;
4811         } else {
4812                 cp->c_flags |= CN_UPDATED;
4813         }
4814         mutex_exit(&cp->c_statelock);
4815 
4816 out:
4817         if (commit) {
4818                 /* commit the log entry */
4819                 if (cachefs_dlog_commit(fscp, commit, error)) {
4820                         /*EMPTY*/
4821                         /* XXX bob: fix on panic */
4822                 }
4823         }
4824 
4825         rw_exit(&dcp->c_rwlock);
4826         return (error);
4827 }
4828 
4829 /*ARGSUSED*/
4830 static int
4831 cachefs_link(vnode_t *tdvp, vnode_t *fvp, char *tnm, cred_t *cr,
4832     caller_context_t *ct, int flags)
4833 {
4834         fscache_t *fscp = VFS_TO_FSCACHE(tdvp->v_vfsp);
4835         cnode_t *tdcp = VTOC(tdvp);
4836         struct vnode *realvp;
4837         int error = 0;
4838         int held = 0;
4839         int connected = 0;
4840 
4841 #ifdef CFSDEBUG
4842         CFS_DEBUG(CFSDEBUG_VOPS)
4843                 printf("cachefs_link: ENTER fvp %p tdvp %p tnm %s\n",
4844                     (void *)fvp, (void *)tdvp, tnm);
4845 #endif
4846 
4847         if (getzoneid() != GLOBAL_ZONEID) {
4848                 error = EPERM;
4849                 goto out;
4850         }
4851 
4852         if (fscp->fs_cache->c_flags & (CACHE_NOFILL | CACHE_NOCACHE))
4853                 ASSERT(tdcp->c_flags & CN_NOCACHE);
4854 
4855         if (VOP_REALVP(fvp, &realvp, ct) == 0) {
4856                 fvp = realvp;
4857         }
4858 
4859         /*
4860          * Cachefs only provides pass-through support for NFSv4,
4861          * and all vnode operations are passed through to the
4862          * back file system. For NFSv4 pass-through to work, only
4863          * connected operation is supported, the cnode backvp must
4864          * exist, and cachefs optional (eg., disconnectable) flags
4865          * are turned off. Assert these conditions to ensure that
4866          * the backfilesystem is called for the link operation.
4867          */
4868 
4869         CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
4870         CFS_BACKFS_NFSV4_ASSERT_CNODE(tdcp);
4871 
4872         for (;;) {
4873                 /* get (or renew) access to the file system */
4874                 if (held) {
4875                         /* Won't loop with NFSv4 connected behavior */
4876                         ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
4877                         rw_exit(&tdcp->c_rwlock);
4878                         cachefs_cd_release(fscp);
4879                         held = 0;
4880                 }
4881                 error = cachefs_cd_access(fscp, connected, 1);
4882                 if (error)
4883                         break;
4884                 rw_enter(&tdcp->c_rwlock, RW_WRITER);
4885                 held = 1;
4886 
4887                 if (fscp->fs_cdconnected == CFS_CD_CONNECTED) {
4888                         error = cachefs_link_connected(tdvp, fvp, tnm, cr);
4889                         if (CFS_TIMEOUT(fscp, error)) {
4890                                 rw_exit(&tdcp->c_rwlock);
4891                                 cachefs_cd_release(fscp);
4892                                 held = 0;
4893                                 cachefs_cd_timedout(fscp);
4894                                 connected = 0;
4895                                 continue;
4896                         }
4897                 } else {
4898                         error = cachefs_link_disconnected(tdvp, fvp, tnm,
4899                             cr);
4900                         if (CFS_TIMEOUT(fscp, error)) {
4901                                 connected = 1;
4902                                 continue;
4903                         }
4904                 }
4905                 break;
4906         }
4907 
4908         if (held) {
4909                 rw_exit(&tdcp->c_rwlock);
4910                 cachefs_cd_release(fscp);
4911         }
4912 
4913 #ifdef CFS_CD_DEBUG
4914         ASSERT((curthread->t_flag & T_CD_HELD) == 0);
4915 #endif
4916 out:
4917 #ifdef CFSDEBUG
4918         CFS_DEBUG(CFSDEBUG_VOPS)
4919                 printf("cachefs_link: EXIT fvp %p tdvp %p tnm %s\n",
4920                     (void *)fvp, (void *)tdvp, tnm);
4921 #endif
4922         return (error);
4923 }
4924 
4925 static int
4926 cachefs_link_connected(vnode_t *tdvp, vnode_t *fvp, char *tnm, cred_t *cr)
4927 {
4928         cnode_t *tdcp = VTOC(tdvp);
4929         cnode_t *fcp = VTOC(fvp);
4930         fscache_t *fscp = VFS_TO_FSCACHE(tdvp->v_vfsp);
4931         int error = 0;
4932         vnode_t *backvp = NULL;
4933 
4934         if (tdcp != fcp) {
4935                 mutex_enter(&fcp->c_statelock);
4936 
4937                 if (fcp->c_backvp == NULL) {
4938                         error = cachefs_getbackvp(fscp, fcp);
4939                         if (error) {
4940                                 mutex_exit(&fcp->c_statelock);
4941                                 goto out;
4942                         }
4943                 }
4944 
4945                 error = CFSOP_CHECK_COBJECT(fscp, fcp, 0, cr);
4946                 if (error) {
4947                         mutex_exit(&fcp->c_statelock);
4948                         goto out;
4949                 }
4950                 backvp = fcp->c_backvp;
4951                 VN_HOLD(backvp);
4952                 mutex_exit(&fcp->c_statelock);
4953         }
4954 
4955         mutex_enter(&tdcp->c_statelock);
4956 
4957         /* get backvp of target directory */
4958         if (tdcp->c_backvp == NULL) {
4959                 error = cachefs_getbackvp(fscp, tdcp);
4960                 if (error) {
4961                         mutex_exit(&tdcp->c_statelock);
4962                         goto out;
4963                 }
4964         }
4965 
4966         /* consistency check target directory */
4967         error = CFSOP_CHECK_COBJECT(fscp, tdcp, 0, cr);
4968         if (error) {
4969                 mutex_exit(&tdcp->c_statelock);
4970                 goto out;
4971         }
4972         if (backvp == NULL) {
4973                 backvp = tdcp->c_backvp;
4974                 VN_HOLD(backvp);
4975         }
4976 
4977         /* perform the link on the back fs */
4978         CFS_DPRINT_BACKFS_NFSV4(fscp,
4979             ("cachefs_link (nfsv4): tdcp %p, tdbackvp %p, "
4980             "name %s\n", tdcp, tdcp->c_backvp, tnm));
4981         error = VOP_LINK(tdcp->c_backvp, backvp, tnm, cr, NULL, 0);
4982         if (error) {
4983                 mutex_exit(&tdcp->c_statelock);
4984                 goto out;
4985         }
4986 
4987         CFSOP_MODIFY_COBJECT(fscp, tdcp, cr);
4988 
4989         /* if the dir is populated, add the new link */
4990         if (CFS_ISFS_NONSHARED(fscp) &&
4991             (tdcp->c_metadata.md_flags & MD_POPULATED)) {
4992                 error = cachefs_dir_enter(tdcp, tnm, &fcp->c_cookie,
4993                     &fcp->c_id, SM_ASYNC);
4994                 if (error) {
4995                         cachefs_nocache(tdcp);
4996                         error = 0;
4997                 }
4998         }
4999         mutex_exit(&tdcp->c_statelock);
5000 
5001         /* get the new link count on the file */
5002         mutex_enter(&fcp->c_statelock);
5003         fcp->c_flags |= CN_UPDATED;
5004         CFSOP_MODIFY_COBJECT(fscp, fcp, cr);
5005         if (fcp->c_backvp == NULL) {
5006                 error = cachefs_getbackvp(fscp, fcp);
5007                 if (error) {
5008                         mutex_exit(&fcp->c_statelock);
5009                         goto out;
5010                 }
5011         }
5012 
5013         /* XXX bob: given what modify_cobject does this seems unnecessary */
5014         fcp->c_attr.va_mask = AT_ALL;
5015         error = VOP_GETATTR(fcp->c_backvp, &fcp->c_attr, 0, cr, NULL);
5016         mutex_exit(&fcp->c_statelock);
5017 out:
5018         if (backvp)
5019                 VN_RELE(backvp);
5020 
5021         return (error);
5022 }
5023 
5024 static int
5025 cachefs_link_disconnected(vnode_t *tdvp, vnode_t *fvp, char *tnm,
5026     cred_t *cr)
5027 {
5028         cnode_t *tdcp = VTOC(tdvp);
5029         cnode_t *fcp = VTOC(fvp);
5030         fscache_t *fscp = VFS_TO_FSCACHE(tdvp->v_vfsp);
5031         int error = 0;
5032         timestruc_t current_time;
5033         off_t commit = 0;
5034 
5035         if (fvp->v_type == VDIR && secpolicy_fs_linkdir(cr, fvp->v_vfsp) != 0 ||
5036             fcp->c_attr.va_uid != crgetuid(cr) && secpolicy_basic_link(cr) != 0)
5037                 return (EPERM);
5038 
5039         if (CFS_ISFS_WRITE_AROUND(fscp))
5040                 return (ETIMEDOUT);
5041 
5042         if (fcp->c_metadata.md_flags & MD_NEEDATTRS)
5043                 return (ETIMEDOUT);
5044 
5045         mutex_enter(&tdcp->c_statelock);
5046 
5047         /* check permissions */
5048         if (error = cachefs_access_local(tdcp, (VEXEC|VWRITE), cr)) {
5049                 mutex_exit(&tdcp->c_statelock);
5050                 goto out;
5051         }
5052 
5053         /* the directory front file must be populated */
5054         if ((tdcp->c_metadata.md_flags & MD_POPULATED) == 0) {
5055                 error = ETIMEDOUT;
5056                 mutex_exit(&tdcp->c_statelock);
5057                 goto out;
5058         }
5059 
5060         /* make sure tnm does not already exist in the directory */
5061         error = cachefs_dir_look(tdcp, tnm, NULL, NULL, NULL, NULL);
5062         if (error == ENOTDIR) {
5063                 error = ETIMEDOUT;
5064                 mutex_exit(&tdcp->c_statelock);
5065                 goto out;
5066         }
5067         if (error != ENOENT) {
5068                 error = EEXIST;
5069                 mutex_exit(&tdcp->c_statelock);
5070                 goto out;
5071         }
5072 
5073         mutex_enter(&fcp->c_statelock);
5074 
5075         /* create a mapping for the file if necessary */
5076         if ((fcp->c_metadata.md_flags & MD_MAPPING) == 0) {
5077                 error = cachefs_dlog_cidmap(fscp);
5078                 if (error) {
5079                         mutex_exit(&fcp->c_statelock);
5080                         mutex_exit(&tdcp->c_statelock);
5081                         error = ENOSPC;
5082                         goto out;
5083                 }
5084                 fcp->c_metadata.md_flags |= MD_MAPPING;
5085                 fcp->c_flags |= CN_UPDATED;
5086         }
5087 
5088         /* mark file as modified */
5089         if (cachefs_modified_alloc(fcp)) {
5090                 mutex_exit(&fcp->c_statelock);
5091                 mutex_exit(&tdcp->c_statelock);
5092                 error = ENOSPC;
5093                 goto out;
5094         }
5095         mutex_exit(&fcp->c_statelock);
5096 
5097         /* log the operation */
5098         commit = cachefs_dlog_link(fscp, tdcp, tnm, fcp, cr);
5099         if (commit == 0) {
5100                 mutex_exit(&tdcp->c_statelock);
5101                 error = ENOSPC;
5102                 goto out;
5103         }
5104 
5105         gethrestime(&current_time);
5106 
5107         /* make the new link */
5108         cachefs_modified(tdcp);
5109         error = cachefs_dir_enter(tdcp, tnm, &fcp->c_cookie,
5110             &fcp->c_id, SM_ASYNC);
5111         if (error) {
5112                 error = 0;
5113                 mutex_exit(&tdcp->c_statelock);
5114                 goto out;
5115         }
5116 
5117         /* Update mtime/ctime of parent dir */
5118         tdcp->c_metadata.md_localmtime = current_time;
5119         tdcp->c_metadata.md_localctime = current_time;
5120         tdcp->c_metadata.md_flags |= MD_LOCALCTIME | MD_LOCALMTIME;
5121         tdcp->c_flags |= CN_UPDATED;
5122         mutex_exit(&tdcp->c_statelock);
5123 
5124         /* update the file we linked to */
5125         mutex_enter(&fcp->c_statelock);
5126         fcp->c_attr.va_nlink++;
5127         fcp->c_metadata.md_localctime = current_time;
5128         fcp->c_metadata.md_flags |= MD_LOCALCTIME;
5129         fcp->c_flags |= CN_UPDATED;
5130         mutex_exit(&fcp->c_statelock);
5131 
5132 out:
5133         if (commit) {
5134                 /* commit the log entry */
5135                 if (cachefs_dlog_commit(fscp, commit, error)) {
5136                         /*EMPTY*/
5137                         /* XXX bob: fix on panic */
5138                 }
5139         }
5140 
5141         return (error);
5142 }
5143 
5144 /*
5145  * Serialize all renames in CFS, to avoid deadlocks - We have to hold two
5146  * cnodes atomically.
5147  */
5148 kmutex_t cachefs_rename_lock;
5149 
5150 /*ARGSUSED*/
5151 static int
5152 cachefs_rename(vnode_t *odvp, char *onm, vnode_t *ndvp,
5153     char *nnm, cred_t *cr, caller_context_t *ct, int flags)
5154 {
5155         fscache_t *fscp = C_TO_FSCACHE(VTOC(odvp));
5156         cachefscache_t *cachep = fscp->fs_cache;
5157         int error = 0;
5158         int held = 0;
5159         int connected = 0;
5160         vnode_t *delvp = NULL;
5161         vnode_t *tvp = NULL;
5162         int vfslock = 0;
5163         struct vnode *realvp;
5164 
5165         if (getzoneid() != GLOBAL_ZONEID)
5166                 return (EPERM);
5167 
5168         if (VOP_REALVP(ndvp, &realvp, ct) == 0)
5169                 ndvp = realvp;
5170 
5171         /*
5172          * if the fs NOFILL or NOCACHE flags are on, then the old and new
5173          * directory cnodes better indicate NOCACHE mode as well.
5174          */
5175         ASSERT(
5176             (fscp->fs_cache->c_flags & (CACHE_NOFILL | CACHE_NOCACHE)) == 0 ||
5177             ((VTOC(odvp)->c_flags & CN_NOCACHE) &&
5178             (VTOC(ndvp)->c_flags & CN_NOCACHE)));
5179 
5180         /*
5181          * Cachefs only provides pass-through support for NFSv4,
5182          * and all vnode operations are passed through to the
5183          * back file system. For NFSv4 pass-through to work, only
5184          * connected operation is supported, the cnode backvp must
5185          * exist, and cachefs optional (eg., disconnectable) flags
5186          * are turned off. Assert these conditions to ensure that
5187          * the backfilesystem is called for the rename operation.
5188          */
5189         CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
5190         CFS_BACKFS_NFSV4_ASSERT_CNODE(VTOC(odvp));
5191         CFS_BACKFS_NFSV4_ASSERT_CNODE(VTOC(ndvp));
5192 
5193         for (;;) {
5194                 if (vfslock) {
5195                         vn_vfsunlock(delvp);
5196                         vfslock = 0;
5197                 }
5198                 if (delvp) {
5199                         VN_RELE(delvp);
5200                         delvp = NULL;
5201                 }
5202 
5203                 /* get (or renew) access to the file system */
5204                 if (held) {
5205                         /* Won't loop for NFSv4 connected support */
5206                         ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
5207                         cachefs_cd_release(fscp);
5208                         held = 0;
5209                 }
5210                 error = cachefs_cd_access(fscp, connected, 1);
5211                 if (error)
5212                         break;
5213                 held = 1;
5214 
5215                 /* sanity check */
5216                 if ((odvp->v_type != VDIR) || (ndvp->v_type != VDIR)) {
5217                         error = EINVAL;
5218                         break;
5219                 }
5220 
5221                 /* cannot rename from or to . or .. */
5222                 if (strcmp(onm, ".") == 0 || strcmp(onm, "..") == 0 ||
5223                     strcmp(nnm, ".") == 0 || strcmp(nnm, "..") == 0) {
5224                         error = EINVAL;
5225                         break;
5226                 }
5227 
5228                 if (odvp != ndvp) {
5229                         /*
5230                          * if moving a directory, its notion
5231                          * of ".." will change
5232                          */
5233                         error = cachefs_lookup_common(odvp, onm, &tvp,
5234                             NULL, 0, NULL, cr);
5235                         if (error == 0) {
5236                                 ASSERT(tvp != NULL);
5237                                 if (tvp->v_type == VDIR) {
5238                                         cnode_t *cp = VTOC(tvp);
5239 
5240                                         dnlc_remove(tvp, "..");
5241 
5242                                         mutex_enter(&cp->c_statelock);
5243                                         CFSOP_MODIFY_COBJECT(fscp, cp, cr);
5244                                         mutex_exit(&cp->c_statelock);
5245                                 }
5246                         } else {
5247                                 tvp = NULL;
5248                                 if (fscp->fs_cdconnected == CFS_CD_CONNECTED) {
5249                                         if (CFS_TIMEOUT(fscp, error)) {
5250                                                 cachefs_cd_release(fscp);
5251                                                 held = 0;
5252                                                 cachefs_cd_timedout(fscp);
5253                                                 connected = 0;
5254                                                 continue;
5255                                         }
5256                                 } else {
5257                                         if (CFS_TIMEOUT(fscp, error)) {
5258                                                 connected = 1;
5259                                                 continue;
5260                                         }
5261                                 }
5262                                 break;
5263                         }
5264                 }
5265 
5266                 /* get the cnode if file being deleted */
5267                 error = cachefs_lookup_common(ndvp, nnm, &delvp, NULL, 0,
5268                     NULL, cr);
5269                 if (error) {
5270                         delvp = NULL;
5271                         if (fscp->fs_cdconnected == CFS_CD_CONNECTED) {
5272                                 if (CFS_TIMEOUT(fscp, error)) {
5273                                         cachefs_cd_release(fscp);
5274                                         held = 0;
5275                                         cachefs_cd_timedout(fscp);
5276                                         connected = 0;
5277                                         continue;
5278                                 }
5279                         } else {
5280                                 if (CFS_TIMEOUT(fscp, error)) {
5281                                         connected = 1;
5282                                         continue;
5283                                 }
5284                         }
5285                         if (error != ENOENT)
5286                                 break;
5287                 }
5288 
5289                 if (delvp && delvp->v_type == VDIR) {
5290                         /* see ufs_dirremove for why this is done, mount race */
5291                         if (vn_vfswlock(delvp)) {
5292                                 error = EBUSY;
5293                                 break;
5294                         }
5295                         vfslock = 1;
5296                         if (vn_mountedvfs(delvp) != NULL) {
5297                                 error = EBUSY;
5298                                 break;
5299                         }
5300                 }
5301 
5302                 if (fscp->fs_cdconnected == CFS_CD_CONNECTED) {
5303                         error = cachefs_rename_connected(odvp, onm,
5304                             ndvp, nnm, cr, delvp);
5305                         if (CFS_TIMEOUT(fscp, error)) {
5306                                 cachefs_cd_release(fscp);
5307                                 held = 0;
5308                                 cachefs_cd_timedout(fscp);
5309                                 connected = 0;
5310                                 continue;
5311                         }
5312                 } else {
5313                         error = cachefs_rename_disconnected(odvp, onm,
5314                             ndvp, nnm, cr, delvp);
5315                         if (CFS_TIMEOUT(fscp, error)) {
5316                                 connected = 1;
5317                                 continue;
5318                         }
5319                 }
5320                 break;
5321         }
5322 
5323         if (CACHEFS_LOG_LOGGING(cachep, CACHEFS_LOG_RENAME)) {
5324                 struct fid gone;
5325 
5326                 bzero(&gone, sizeof (gone));
5327                 gone.fid_len = MAXFIDSZ;
5328                 if (delvp != NULL)
5329                         (void) VOP_FID(delvp, &gone, ct);
5330 
5331                 cachefs_log_rename(cachep, error, fscp->fs_cfsvfsp,
5332                     &gone, 0, (delvp != NULL), crgetuid(cr));
5333         }
5334 
5335         if (held)
5336                 cachefs_cd_release(fscp);
5337 
5338         if (vfslock)
5339                 vn_vfsunlock(delvp);
5340 
5341         if (delvp)
5342                 VN_RELE(delvp);
5343         if (tvp)
5344                 VN_RELE(tvp);
5345 
5346 #ifdef CFS_CD_DEBUG
5347         ASSERT((curthread->t_flag & T_CD_HELD) == 0);
5348 #endif
5349         return (error);
5350 }
5351 
5352 static int
5353 cachefs_rename_connected(vnode_t *odvp, char *onm, vnode_t *ndvp,
5354     char *nnm, cred_t *cr, vnode_t *delvp)
5355 {
5356         cnode_t *odcp = VTOC(odvp);
5357         cnode_t *ndcp = VTOC(ndvp);
5358         vnode_t *revp = NULL;
5359         cnode_t *recp;
5360         cnode_t *delcp;
5361         fscache_t *fscp = C_TO_FSCACHE(odcp);
5362         int error = 0;
5363         struct fid cookie;
5364         struct fid *cookiep;
5365         cfs_cid_t cid;
5366         int gotdirent;
5367 
5368         /* find the file we are renaming */
5369         error = cachefs_lookup_common(odvp, onm, &revp, NULL, 0, NULL, cr);
5370         if (error)
5371                 return (error);
5372         recp = VTOC(revp);
5373 
5374         /*
5375          * To avoid deadlock, we acquire this global rename lock before
5376          * we try to get the locks for the source and target directories.
5377          */
5378         mutex_enter(&cachefs_rename_lock);
5379         rw_enter(&odcp->c_rwlock, RW_WRITER);
5380         if (odcp != ndcp) {
5381                 rw_enter(&ndcp->c_rwlock, RW_WRITER);
5382         }
5383         mutex_exit(&cachefs_rename_lock);
5384 
5385         ASSERT((odcp->c_flags & CN_ASYNC_POP_WORKING) == 0);
5386         ASSERT((ndcp->c_flags & CN_ASYNC_POP_WORKING) == 0);
5387 
5388         mutex_enter(&odcp->c_statelock);
5389         if (odcp->c_backvp == NULL) {
5390                 error = cachefs_getbackvp(fscp, odcp);
5391                 if (error) {
5392                         mutex_exit(&odcp->c_statelock);
5393                         goto out;
5394                 }
5395         }
5396 
5397         error = CFSOP_CHECK_COBJECT(fscp, odcp, 0, cr);
5398         if (error) {
5399                 mutex_exit(&odcp->c_statelock);
5400                 goto out;
5401         }
5402         mutex_exit(&odcp->c_statelock);
5403 
5404         if (odcp != ndcp) {
5405                 mutex_enter(&ndcp->c_statelock);
5406                 if (ndcp->c_backvp == NULL) {
5407                         error = cachefs_getbackvp(fscp, ndcp);
5408                         if (error) {
5409                                 mutex_exit(&ndcp->c_statelock);
5410                                 goto out;
5411                         }
5412                 }
5413 
5414                 error = CFSOP_CHECK_COBJECT(fscp, ndcp, 0, cr);
5415                 if (error) {
5416                         mutex_exit(&ndcp->c_statelock);
5417                         goto out;
5418                 }
5419                 mutex_exit(&ndcp->c_statelock);
5420         }
5421 
5422         /* if a file is being deleted because of this rename */
5423         if (delvp) {
5424                 /* if src and dest file are same */
5425                 if (delvp == revp) {
5426                         error = 0;
5427                         goto out;
5428                 }
5429 
5430                 /*
5431                  * If the cnode is active, make a link to the file
5432                  * so operations on the file will continue.
5433                  */
5434                 dnlc_purge_vp(delvp);
5435                 delcp = VTOC(delvp);
5436                 if ((delvp->v_type != VDIR) &&
5437                     !((delvp->v_count == 1) ||
5438                     ((delvp->v_count == 2) && delcp->c_ipending))) {
5439                         error = cachefs_remove_dolink(ndvp, delvp, nnm, cr);
5440                         if (error)
5441                                 goto out;
5442                 }
5443         }
5444 
5445         /* do the rename on the back fs */
5446         CFS_DPRINT_BACKFS_NFSV4(fscp,
5447             ("cachefs_rename (nfsv4): odcp %p, odbackvp %p, "
5448             " ndcp %p, ndbackvp %p, onm %s, nnm %s\n",
5449             odcp, odcp->c_backvp, ndcp, ndcp->c_backvp, onm, nnm));
5450         error = VOP_RENAME(odcp->c_backvp, onm, ndcp->c_backvp, nnm, cr, NULL,
5451             0);
5452         if (error)
5453                 goto out;
5454 
5455         /* purge mappings to file in the old directory */
5456         dnlc_purge_vp(odvp);
5457 
5458         /* purge mappings in the new dir if we deleted a file */
5459         if (delvp && (odvp != ndvp))
5460                 dnlc_purge_vp(ndvp);
5461 
5462         /* update the file we just deleted */
5463         if (delvp) {
5464                 mutex_enter(&delcp->c_statelock);
5465                 if (delcp->c_attr.va_nlink == 1) {
5466                         delcp->c_flags |= CN_DESTROY;
5467                 } else {
5468                         delcp->c_flags |= CN_UPDATED;
5469                 }
5470                 delcp->c_attr.va_nlink--;
5471                 CFSOP_MODIFY_COBJECT(fscp, delcp, cr);
5472                 mutex_exit(&delcp->c_statelock);
5473         }
5474 
5475         /* find the entry in the old directory */
5476         mutex_enter(&odcp->c_statelock);
5477         gotdirent = 0;
5478         cookiep = NULL;
5479         if (CFS_ISFS_NONSHARED(fscp) &&
5480             (odcp->c_metadata.md_flags & MD_POPULATED)) {
5481                 error = cachefs_dir_look(odcp, onm, &cookie,
5482                     NULL, NULL, &cid);
5483                 if (error == 0 || error == EINVAL) {
5484                         gotdirent = 1;
5485                         if (error == 0)
5486                                 cookiep = &cookie;
5487                 } else {
5488                         cachefs_inval_object(odcp);
5489                 }
5490         }
5491         error = 0;
5492 
5493         /* remove the directory entry from the old directory */
5494         if (gotdirent) {
5495                 error = cachefs_dir_rmentry(odcp, onm);
5496                 if (error) {
5497                         cachefs_nocache(odcp);
5498                         error = 0;
5499                 }
5500         }
5501         CFSOP_MODIFY_COBJECT(fscp, odcp, cr);
5502         mutex_exit(&odcp->c_statelock);
5503 
5504         /* install the directory entry in the new directory */
5505         mutex_enter(&ndcp->c_statelock);
5506         if (CFS_ISFS_NONSHARED(fscp) &&
5507             (ndcp->c_metadata.md_flags & MD_POPULATED)) {
5508                 error = 1;
5509                 if (gotdirent) {
5510                         ASSERT(cid.cid_fileno != 0);
5511                         error = 0;
5512                         if (delvp) {
5513                                 error = cachefs_dir_rmentry(ndcp, nnm);
5514                         }
5515                         if (error == 0) {
5516                                 error = cachefs_dir_enter(ndcp, nnm, cookiep,
5517                                     &cid, SM_ASYNC);
5518                         }
5519                 }
5520                 if (error) {
5521                         cachefs_nocache(ndcp);
5522                         error = 0;
5523                 }
5524         }
5525         if (odcp != ndcp)
5526                 CFSOP_MODIFY_COBJECT(fscp, ndcp, cr);
5527         mutex_exit(&ndcp->c_statelock);
5528 
5529         /* ctime of renamed file has changed */
5530         mutex_enter(&recp->c_statelock);
5531         CFSOP_MODIFY_COBJECT(fscp, recp, cr);
5532         mutex_exit(&recp->c_statelock);
5533 
5534 out:
5535         if (odcp != ndcp)
5536                 rw_exit(&ndcp->c_rwlock);
5537         rw_exit(&odcp->c_rwlock);
5538 
5539         VN_RELE(revp);
5540 
5541         return (error);
5542 }
5543 
5544 static int
5545 cachefs_rename_disconnected(vnode_t *odvp, char *onm, vnode_t *ndvp,
5546     char *nnm, cred_t *cr, vnode_t *delvp)
5547 {
5548         cnode_t *odcp = VTOC(odvp);
5549         cnode_t *ndcp = VTOC(ndvp);
5550         cnode_t *delcp = NULL;
5551         vnode_t *revp = NULL;
5552         cnode_t *recp;
5553         fscache_t *fscp = C_TO_FSCACHE(odcp);
5554         int error = 0;
5555         struct fid cookie;
5556         struct fid *cookiep;
5557         cfs_cid_t cid;
5558         off_t commit = 0;
5559         timestruc_t current_time;
5560 
5561         if (CFS_ISFS_WRITE_AROUND(fscp))
5562                 return (ETIMEDOUT);
5563 
5564         /* find the file we are renaming */
5565         error = cachefs_lookup_common(odvp, onm, &revp, NULL, 0, NULL, cr);
5566         if (error)
5567                 return (error);
5568         recp = VTOC(revp);
5569 
5570         /*
5571          * To avoid deadlock, we acquire this global rename lock before
5572          * we try to get the locks for the source and target directories.
5573          */
5574         mutex_enter(&cachefs_rename_lock);
5575         rw_enter(&odcp->c_rwlock, RW_WRITER);
5576         if (odcp != ndcp) {
5577                 rw_enter(&ndcp->c_rwlock, RW_WRITER);
5578         }
5579         mutex_exit(&cachefs_rename_lock);
5580 
5581         if (recp->c_metadata.md_flags & MD_NEEDATTRS) {
5582                 error = ETIMEDOUT;
5583                 goto out;
5584         }
5585 
5586         if ((recp->c_metadata.md_flags & MD_MAPPING) == 0) {
5587                 mutex_enter(&recp->c_statelock);
5588                 if ((recp->c_metadata.md_flags & MD_MAPPING) == 0) {
5589                         error = cachefs_dlog_cidmap(fscp);
5590                         if (error) {
5591                                 mutex_exit(&recp->c_statelock);
5592                                 error = ENOSPC;
5593                                 goto out;
5594                         }
5595                         recp->c_metadata.md_flags |= MD_MAPPING;
5596                         recp->c_flags |= CN_UPDATED;
5597                 }
5598                 mutex_exit(&recp->c_statelock);
5599         }
5600 
5601         /* check permissions */
5602         /* XXX clean up this mutex junk sometime */
5603         mutex_enter(&odcp->c_statelock);
5604         error = cachefs_access_local(odcp, (VEXEC|VWRITE), cr);
5605         mutex_exit(&odcp->c_statelock);
5606         if (error != 0)
5607                 goto out;
5608         mutex_enter(&ndcp->c_statelock);
5609         error = cachefs_access_local(ndcp, (VEXEC|VWRITE), cr);
5610         mutex_exit(&ndcp->c_statelock);
5611         if (error != 0)
5612                 goto out;
5613         mutex_enter(&odcp->c_statelock);
5614         error = cachefs_stickyrmchk(odcp, recp, cr);
5615         mutex_exit(&odcp->c_statelock);
5616         if (error != 0)
5617                 goto out;
5618 
5619         /* dirs must be populated */
5620         if (((odcp->c_metadata.md_flags & MD_POPULATED) == 0) ||
5621             ((ndcp->c_metadata.md_flags & MD_POPULATED) == 0)) {
5622                 error = ETIMEDOUT;
5623                 goto out;
5624         }
5625 
5626         /* for now do not allow moving dirs because could cause cycles */
5627         if ((((revp->v_type == VDIR) && (odvp != ndvp))) ||
5628             (revp == odvp)) {
5629                 error = ETIMEDOUT;
5630                 goto out;
5631         }
5632 
5633         /* if a file is being deleted because of this rename */
5634         if (delvp) {
5635                 delcp = VTOC(delvp);
5636 
5637                 /* if src and dest file are the same */
5638                 if (delvp == revp) {
5639                         error = 0;
5640                         goto out;
5641                 }
5642 
5643                 if (delcp->c_metadata.md_flags & MD_NEEDATTRS) {
5644                         error = ETIMEDOUT;
5645                         goto out;
5646                 }
5647 
5648                 /* if there are hard links to this file */
5649                 if (delcp->c_attr.va_nlink > 1) {
5650                         mutex_enter(&delcp->c_statelock);
5651                         if (cachefs_modified_alloc(delcp)) {
5652                                 mutex_exit(&delcp->c_statelock);
5653                                 error = ENOSPC;
5654                                 goto out;
5655                         }
5656 
5657                         if ((delcp->c_metadata.md_flags & MD_MAPPING) == 0) {
5658                                 error = cachefs_dlog_cidmap(fscp);
5659                                 if (error) {
5660                                         mutex_exit(&delcp->c_statelock);
5661                                         error = ENOSPC;
5662                                         goto out;
5663                                 }
5664                                 delcp->c_metadata.md_flags |= MD_MAPPING;
5665                                 delcp->c_flags |= CN_UPDATED;
5666                         }
5667                         mutex_exit(&delcp->c_statelock);
5668                 }
5669 
5670                 /* make sure we can delete file */
5671                 mutex_enter(&ndcp->c_statelock);
5672                 error = cachefs_stickyrmchk(ndcp, delcp, cr);
5673                 mutex_exit(&ndcp->c_statelock);
5674                 if (error != 0)
5675                         goto out;
5676 
5677                 /*
5678                  * If the cnode is active, make a link to the file
5679                  * so operations on the file will continue.
5680                  */
5681                 dnlc_purge_vp(delvp);
5682                 if ((delvp->v_type != VDIR) &&
5683                     !((delvp->v_count == 1) ||
5684                     ((delvp->v_count == 2) && delcp->c_ipending))) {
5685                         error = cachefs_remove_dolink(ndvp, delvp, nnm, cr);
5686                         if (error)
5687                                 goto out;
5688                 }
5689         }
5690 
5691         /* purge mappings to file in the old directory */
5692         dnlc_purge_vp(odvp);
5693 
5694         /* purge mappings in the new dir if we deleted a file */
5695         if (delvp && (odvp != ndvp))
5696                 dnlc_purge_vp(ndvp);
5697 
5698         /* find the entry in the old directory */
5699         mutex_enter(&odcp->c_statelock);
5700         if ((odcp->c_metadata.md_flags & MD_POPULATED) == 0) {
5701                 mutex_exit(&odcp->c_statelock);
5702                 error = ETIMEDOUT;
5703                 goto out;
5704         }
5705         cookiep = NULL;
5706         error = cachefs_dir_look(odcp, onm, &cookie, NULL, NULL, &cid);
5707         if (error == 0 || error == EINVAL) {
5708                 if (error == 0)
5709                         cookiep = &cookie;
5710         } else {
5711                 mutex_exit(&odcp->c_statelock);
5712                 if (error == ENOTDIR)
5713                         error = ETIMEDOUT;
5714                 goto out;
5715         }
5716         error = 0;
5717 
5718         /* write the log entry */
5719         commit = cachefs_dlog_rename(fscp, odcp, onm, ndcp, nnm, cr,
5720             recp, delcp);
5721         if (commit == 0) {
5722                 mutex_exit(&odcp->c_statelock);
5723                 error = ENOSPC;
5724                 goto out;
5725         }
5726 
5727         /* remove the directory entry from the old directory */
5728         cachefs_modified(odcp);
5729         error = cachefs_dir_rmentry(odcp, onm);
5730         if (error) {
5731                 mutex_exit(&odcp->c_statelock);
5732                 if (error == ENOTDIR)
5733                         error = ETIMEDOUT;
5734                 goto out;
5735         }
5736         mutex_exit(&odcp->c_statelock);
5737 
5738         /* install the directory entry in the new directory */
5739         mutex_enter(&ndcp->c_statelock);
5740         error = ENOTDIR;
5741         if (ndcp->c_metadata.md_flags & MD_POPULATED) {
5742                 ASSERT(cid.cid_fileno != 0);
5743                 cachefs_modified(ndcp);
5744                 error = 0;
5745                 if (delvp) {
5746                         error = cachefs_dir_rmentry(ndcp, nnm);
5747                 }
5748                 if (error == 0) {
5749                         error = cachefs_dir_enter(ndcp, nnm, cookiep,
5750                             &cid, SM_ASYNC);
5751                 }
5752         }
5753         if (error) {
5754                 cachefs_nocache(ndcp);
5755                 mutex_exit(&ndcp->c_statelock);
5756                 mutex_enter(&odcp->c_statelock);
5757                 cachefs_nocache(odcp);
5758                 mutex_exit(&odcp->c_statelock);
5759                 if (error == ENOTDIR)
5760                         error = ETIMEDOUT;
5761                 goto out;
5762         }
5763         mutex_exit(&ndcp->c_statelock);
5764 
5765         gethrestime(&current_time);
5766 
5767         /* update the file we just deleted */
5768         if (delvp) {
5769                 mutex_enter(&delcp->c_statelock);
5770                 delcp->c_attr.va_nlink--;
5771                 delcp->c_metadata.md_localctime = current_time;
5772                 delcp->c_metadata.md_flags |= MD_LOCALCTIME;
5773                 if (delcp->c_attr.va_nlink == 0) {
5774                         delcp->c_flags |= CN_DESTROY;
5775                 } else {
5776                         delcp->c_flags |= CN_UPDATED;
5777                 }
5778                 mutex_exit(&delcp->c_statelock);
5779         }
5780 
5781         /* update the file we renamed */
5782         mutex_enter(&recp->c_statelock);
5783         recp->c_metadata.md_localctime = current_time;
5784         recp->c_metadata.md_flags |= MD_LOCALCTIME;
5785         recp->c_flags |= CN_UPDATED;
5786         mutex_exit(&recp->c_statelock);
5787 
5788         /* update the source directory */
5789         mutex_enter(&odcp->c_statelock);
5790         odcp->c_metadata.md_localctime = current_time;
5791         odcp->c_metadata.md_localmtime = current_time;
5792         odcp->c_metadata.md_flags |= MD_LOCALCTIME | MD_LOCALMTIME;
5793         odcp->c_flags |= CN_UPDATED;
5794         mutex_exit(&odcp->c_statelock);
5795 
5796         /* update the destination directory */
5797         if (odcp != ndcp) {
5798                 mutex_enter(&ndcp->c_statelock);
5799                 ndcp->c_metadata.md_localctime = current_time;
5800                 ndcp->c_metadata.md_localmtime = current_time;
5801                 ndcp->c_metadata.md_flags |= MD_LOCALCTIME | MD_LOCALMTIME;
5802                 ndcp->c_flags |= CN_UPDATED;
5803                 mutex_exit(&ndcp->c_statelock);
5804         }
5805 
5806 out:
5807         if (commit) {
5808                 /* commit the log entry */
5809                 if (cachefs_dlog_commit(fscp, commit, error)) {
5810                         /*EMPTY*/
5811                         /* XXX bob: fix on panic */
5812                 }
5813         }
5814 
5815         if (odcp != ndcp)
5816                 rw_exit(&ndcp->c_rwlock);
5817         rw_exit(&odcp->c_rwlock);
5818 
5819         VN_RELE(revp);
5820 
5821         return (error);
5822 }
5823 
5824 /*ARGSUSED*/
5825 static int
5826 cachefs_mkdir(vnode_t *dvp, char *nm, vattr_t *vap, vnode_t **vpp,
5827     cred_t *cr, caller_context_t *ct, int flags, vsecattr_t *vsecp)
5828 {
5829         cnode_t *dcp = VTOC(dvp);
5830         fscache_t *fscp = C_TO_FSCACHE(dcp);
5831         cachefscache_t *cachep = fscp->fs_cache;
5832         int error = 0;
5833         int held = 0;
5834         int connected = 0;
5835 
5836 #ifdef CFSDEBUG
5837         CFS_DEBUG(CFSDEBUG_VOPS)
5838                 printf("cachefs_mkdir: ENTER dvp %p\n", (void *)dvp);
5839 #endif
5840 
5841         if (getzoneid() != GLOBAL_ZONEID) {
5842                 error = EPERM;
5843                 goto out;
5844         }
5845 
5846         if (fscp->fs_cache->c_flags & (CACHE_NOFILL | CACHE_NOCACHE))
5847                 ASSERT(dcp->c_flags & CN_NOCACHE);
5848 
5849         /*
5850          * Cachefs only provides pass-through support for NFSv4,
5851          * and all vnode operations are passed through to the
5852          * back file system. For NFSv4 pass-through to work, only
5853          * connected operation is supported, the cnode backvp must
5854          * exist, and cachefs optional (eg., disconnectable) flags
5855          * are turned off. Assert these conditions to ensure that
5856          * the backfilesystem is called for the mkdir operation.
5857          */
5858         CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
5859         CFS_BACKFS_NFSV4_ASSERT_CNODE(dcp);
5860 
5861         for (;;) {
5862                 /* get (or renew) access to the file system */
5863                 if (held) {
5864                         /* Won't loop with NFSv4 connected behavior */
5865                         ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
5866                         rw_exit(&dcp->c_rwlock);
5867                         cachefs_cd_release(fscp);
5868                         held = 0;
5869                 }
5870                 error = cachefs_cd_access(fscp, connected, 1);
5871                 if (error)
5872                         break;
5873                 rw_enter(&dcp->c_rwlock, RW_WRITER);
5874                 held = 1;
5875 
5876                 if (fscp->fs_cdconnected == CFS_CD_CONNECTED) {
5877                         error = cachefs_mkdir_connected(dvp, nm, vap,
5878                             vpp, cr);
5879                         if (CFS_TIMEOUT(fscp, error)) {
5880                                 rw_exit(&dcp->c_rwlock);
5881                                 cachefs_cd_release(fscp);
5882                                 held = 0;
5883                                 cachefs_cd_timedout(fscp);
5884                                 connected = 0;
5885                                 continue;
5886                         }
5887                 } else {
5888                         error = cachefs_mkdir_disconnected(dvp, nm, vap,
5889                             vpp, cr);
5890                         if (CFS_TIMEOUT(fscp, error)) {
5891                                 connected = 1;
5892                                 continue;
5893                         }
5894                 }
5895                 break;
5896         }
5897 
5898         if (CACHEFS_LOG_LOGGING(cachep, CACHEFS_LOG_MKDIR)) {
5899                 fid_t *fidp = NULL;
5900                 ino64_t fileno = 0;
5901                 cnode_t *cp = NULL;
5902                 if (error == 0)
5903                         cp = VTOC(*vpp);
5904 
5905                 if (cp != NULL) {
5906                         fidp = &cp->c_metadata.md_cookie;
5907                         fileno = cp->c_id.cid_fileno;
5908                 }
5909 
5910                 cachefs_log_mkdir(cachep, error, fscp->fs_cfsvfsp,
5911                     fidp, fileno, crgetuid(cr));
5912         }
5913 
5914         if (held) {
5915                 rw_exit(&dcp->c_rwlock);
5916                 cachefs_cd_release(fscp);
5917         }
5918         if (error == 0 && CFS_ISFS_NONSHARED(fscp))
5919                 (void) cachefs_pack(dvp, nm, cr);
5920 
5921 #ifdef CFS_CD_DEBUG
5922         ASSERT((curthread->t_flag & T_CD_HELD) == 0);
5923 #endif
5924 out:
5925 #ifdef CFSDEBUG
5926         CFS_DEBUG(CFSDEBUG_VOPS)
5927                 printf("cachefs_mkdir: EXIT error = %d\n", error);
5928 #endif
5929         return (error);
5930 }
5931 
5932 static int
5933 cachefs_mkdir_connected(vnode_t *dvp, char *nm, vattr_t *vap,
5934     vnode_t **vpp, cred_t *cr)
5935 {
5936         cnode_t *newcp = NULL, *dcp = VTOC(dvp);
5937         struct vnode *vp = NULL;
5938         int error = 0;
5939         fscache_t *fscp = C_TO_FSCACHE(dcp);
5940         struct fid cookie;
5941         struct vattr attr;
5942         cfs_cid_t cid, dircid;
5943         uint32_t valid_fid;
5944 
5945         if (fscp->fs_cache->c_flags & (CACHE_NOFILL | CACHE_NOCACHE))
5946                 ASSERT(dcp->c_flags & CN_NOCACHE);
5947 
5948         mutex_enter(&dcp->c_statelock);
5949 
5950         /* get backvp of dir */
5951         if (dcp->c_backvp == NULL) {
5952                 error = cachefs_getbackvp(fscp, dcp);
5953                 if (error) {
5954                         mutex_exit(&dcp->c_statelock);
5955                         goto out;
5956                 }
5957         }
5958 
5959         /* consistency check the directory */
5960         error = CFSOP_CHECK_COBJECT(fscp, dcp, 0, cr);
5961         if (error) {
5962                 mutex_exit(&dcp->c_statelock);
5963                 goto out;
5964         }
5965         dircid = dcp->c_id;
5966 
5967         /* make the dir on the back fs */
5968         CFS_DPRINT_BACKFS_NFSV4(fscp,
5969             ("cachefs_mkdir (nfsv4): dcp %p, dbackvp %p, "
5970             "name %s\n", dcp, dcp->c_backvp, nm));
5971         error = VOP_MKDIR(dcp->c_backvp, nm, vap, &vp, cr, NULL, 0, NULL);
5972         mutex_exit(&dcp->c_statelock);
5973         if (error) {
5974                 goto out;
5975         }
5976 
5977         /* get the cookie and make the cnode */
5978         attr.va_mask = AT_ALL;
5979         valid_fid = (CFS_ISFS_BACKFS_NFSV4(fscp) ? FALSE : TRUE);
5980         error = cachefs_getcookie(vp, &cookie, &attr, cr, valid_fid);
5981         if (error) {
5982                 goto out;
5983         }
5984         cid.cid_flags = 0;
5985         cid.cid_fileno = attr.va_nodeid;
5986         error = cachefs_cnode_make(&cid, fscp, (valid_fid ? &cookie : NULL),
5987             &attr, vp, cr, 0, &newcp);
5988         if (error) {
5989                 goto out;
5990         }
5991         ASSERT(CTOV(newcp)->v_type == VDIR);
5992         *vpp = CTOV(newcp);
5993 
5994         /* if the dir is populated, add the new entry */
5995         mutex_enter(&dcp->c_statelock);
5996         if (CFS_ISFS_NONSHARED(fscp) &&
5997             (dcp->c_metadata.md_flags & MD_POPULATED)) {
5998                 error = cachefs_dir_enter(dcp, nm, &cookie, &newcp->c_id,
5999                     SM_ASYNC);
6000                 if (error) {
6001                         cachefs_nocache(dcp);
6002                         error = 0;
6003                 }
6004         }
6005         dcp->c_attr.va_nlink++;
6006         dcp->c_flags |= CN_UPDATED;
6007         CFSOP_MODIFY_COBJECT(fscp, dcp, cr);
6008         mutex_exit(&dcp->c_statelock);
6009 
6010         /* XXX bob: should we do a filldir here? or just add . and .. */
6011         /* maybe should kick off an async filldir so caller does not wait */
6012 
6013         /* put the entry in the dnlc */
6014         if (cachefs_dnlc)
6015                 dnlc_enter(dvp, nm, *vpp);
6016 
6017         /* save the fileno of the parent so can find the name */
6018         if (bcmp(&newcp->c_metadata.md_parent, &dircid,
6019             sizeof (cfs_cid_t)) != 0) {
6020                 mutex_enter(&newcp->c_statelock);
6021                 newcp->c_metadata.md_parent = dircid;
6022                 newcp->c_flags |= CN_UPDATED;
6023                 mutex_exit(&newcp->c_statelock);
6024         }
6025 out:
6026         if (vp)
6027                 VN_RELE(vp);
6028 
6029         return (error);
6030 }
6031 
6032 static int
6033 cachefs_mkdir_disconnected(vnode_t *dvp, char *nm, vattr_t *vap,
6034     vnode_t **vpp, cred_t *cr)
6035 {
6036         cnode_t *dcp = VTOC(dvp);
6037         fscache_t *fscp = C_TO_FSCACHE(dcp);
6038         int error;
6039         cnode_t *newcp = NULL;
6040         struct vattr va;
6041         timestruc_t current_time;
6042         off_t commit = 0;
6043         char *s;
6044         int namlen;
6045 
6046         /* don't allow '/' characters in pathname component */
6047         for (s = nm, namlen = 0; *s; s++, namlen++)
6048                 if (*s == '/')
6049                         return (EACCES);
6050         if (namlen == 0)
6051                 return (EINVAL);
6052 
6053         if (CFS_ISFS_WRITE_AROUND(fscp))
6054                 return (ETIMEDOUT);
6055 
6056         mutex_enter(&dcp->c_statelock);
6057 
6058         /* check permissions */
6059         if (error = cachefs_access_local(dcp, (VEXEC|VWRITE), cr)) {
6060                 mutex_exit(&dcp->c_statelock);
6061                 goto out;
6062         }
6063 
6064         /* the directory front file must be populated */
6065         if ((dcp->c_metadata.md_flags & MD_POPULATED) == 0) {
6066                 error = ETIMEDOUT;
6067                 mutex_exit(&dcp->c_statelock);
6068                 goto out;
6069         }
6070 
6071         /* make sure nm does not already exist in the directory */
6072         error = cachefs_dir_look(dcp, nm, NULL, NULL, NULL, NULL);
6073         if (error == ENOTDIR) {
6074                 error = ETIMEDOUT;
6075                 mutex_exit(&dcp->c_statelock);
6076                 goto out;
6077         }
6078         if (error != ENOENT) {
6079                 error = EEXIST;
6080                 mutex_exit(&dcp->c_statelock);
6081                 goto out;
6082         }
6083 
6084         /* make up a reasonable set of attributes */
6085         cachefs_attr_setup(vap, &va, dcp, cr);
6086         va.va_type = VDIR;
6087         va.va_mode |= S_IFDIR;
6088         va.va_nlink = 2;
6089 
6090         mutex_exit(&dcp->c_statelock);
6091 
6092         /* create the cnode */
6093         error = cachefs_cnode_create(fscp, &va, 0, &newcp);
6094         if (error)
6095                 goto out;
6096 
6097         mutex_enter(&newcp->c_statelock);
6098 
6099         error = cachefs_dlog_cidmap(fscp);
6100         if (error) {
6101                 mutex_exit(&newcp->c_statelock);
6102                 goto out;
6103         }
6104 
6105         cachefs_creategid(dcp, newcp, vap, cr);
6106         mutex_enter(&dcp->c_statelock);
6107         cachefs_createacl(dcp, newcp);
6108         mutex_exit(&dcp->c_statelock);
6109         gethrestime(&current_time);
6110         newcp->c_metadata.md_vattr.va_atime = current_time;
6111         newcp->c_metadata.md_localctime = current_time;
6112         newcp->c_metadata.md_localmtime = current_time;
6113         newcp->c_metadata.md_flags |= MD_MAPPING | MD_LOCALMTIME |
6114             MD_LOCALCTIME;
6115         newcp->c_flags |= CN_UPDATED;
6116 
6117         /* make a front file for the new directory, add . and .. */
6118         error = cachefs_dir_new(dcp, newcp);
6119         if (error) {
6120                 mutex_exit(&newcp->c_statelock);
6121                 goto out;
6122         }
6123         cachefs_modified(newcp);
6124 
6125         /*
6126          * write the metadata now rather than waiting until
6127          * inactive so that if there's no space we can let
6128          * the caller know.
6129          */
6130         ASSERT(newcp->c_frontvp);
6131         ASSERT((newcp->c_filegrp->fg_flags & CFS_FG_ALLOC_ATTR) == 0);
6132         ASSERT((newcp->c_flags & CN_ALLOC_PENDING) == 0);
6133         error = filegrp_write_metadata(newcp->c_filegrp,
6134             &newcp->c_id, &newcp->c_metadata);
6135         if (error) {
6136                 mutex_exit(&newcp->c_statelock);
6137                 goto out;
6138         }
6139         mutex_exit(&newcp->c_statelock);
6140 
6141         /* log the operation */
6142         commit = cachefs_dlog_mkdir(fscp, dcp, newcp, nm, &va, cr);
6143         if (commit == 0) {
6144                 error = ENOSPC;
6145                 goto out;
6146         }
6147 
6148         mutex_enter(&dcp->c_statelock);
6149 
6150         /* make sure directory is still populated */
6151         if ((dcp->c_metadata.md_flags & MD_POPULATED) == 0) {
6152                 mutex_exit(&dcp->c_statelock);
6153                 error = ETIMEDOUT;
6154                 goto out;
6155         }
6156         cachefs_modified(dcp);
6157 
6158         /* enter the new file in the directory */
6159         error = cachefs_dir_enter(dcp, nm, &newcp->c_metadata.md_cookie,
6160             &newcp->c_id, SM_ASYNC);
6161         if (error) {
6162                 mutex_exit(&dcp->c_statelock);
6163                 goto out;
6164         }
6165 
6166         /* update parent dir times */
6167         dcp->c_metadata.md_localctime = current_time;
6168         dcp->c_metadata.md_localmtime = current_time;
6169         dcp->c_metadata.md_flags |= MD_LOCALCTIME | MD_LOCALMTIME;
6170         dcp->c_attr.va_nlink++;
6171         dcp->c_flags |= CN_UPDATED;
6172         mutex_exit(&dcp->c_statelock);
6173 
6174 out:
6175         if (commit) {
6176                 /* commit the log entry */
6177                 if (cachefs_dlog_commit(fscp, commit, error)) {
6178                         /*EMPTY*/
6179                         /* XXX bob: fix on panic */
6180                 }
6181         }
6182         if (error) {
6183                 if (newcp) {
6184                         mutex_enter(&newcp->c_statelock);
6185                         newcp->c_flags |= CN_DESTROY;
6186                         mutex_exit(&newcp->c_statelock);
6187                         VN_RELE(CTOV(newcp));
6188                 }
6189         } else {
6190                 *vpp = CTOV(newcp);
6191         }
6192         return (error);
6193 }
6194 
6195 /*ARGSUSED*/
6196 static int
6197 cachefs_rmdir(vnode_t *dvp, char *nm, vnode_t *cdir, cred_t *cr,
6198     caller_context_t *ct, int flags)
6199 {
6200         cnode_t *dcp = VTOC(dvp);
6201         fscache_t *fscp = C_TO_FSCACHE(dcp);
6202         cachefscache_t *cachep = fscp->fs_cache;
6203         int error = 0;
6204         int held = 0;
6205         int connected = 0;
6206         size_t namlen;
6207         vnode_t *vp = NULL;
6208         int vfslock = 0;
6209 
6210 #ifdef CFSDEBUG
6211         CFS_DEBUG(CFSDEBUG_VOPS)
6212                 printf("cachefs_rmdir: ENTER vp %p\n", (void *)dvp);
6213 #endif
6214 
6215         if (getzoneid() != GLOBAL_ZONEID) {
6216                 error = EPERM;
6217                 goto out;
6218         }
6219 
6220         if (fscp->fs_cache->c_flags & (CACHE_NOFILL | CACHE_NOCACHE))
6221                 ASSERT(dcp->c_flags & CN_NOCACHE);
6222 
6223         /*
6224          * Cachefs only provides pass-through support for NFSv4,
6225          * and all vnode operations are passed through to the
6226          * back file system. For NFSv4 pass-through to work, only
6227          * connected operation is supported, the cnode backvp must
6228          * exist, and cachefs optional (eg., disconnectable) flags
6229          * are turned off. Assert these conditions to ensure that
6230          * the backfilesystem is called for the rmdir operation.
6231          */
6232         CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
6233         CFS_BACKFS_NFSV4_ASSERT_CNODE(dcp);
6234 
6235         for (;;) {
6236                 if (vfslock) {
6237                         vn_vfsunlock(vp);
6238                         vfslock = 0;
6239                 }
6240                 if (vp) {
6241                         VN_RELE(vp);
6242                         vp = NULL;
6243                 }
6244 
6245                 /* get (or renew) access to the file system */
6246                 if (held) {
6247                         /* Won't loop with NFSv4 connected behavior */
6248                         ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
6249                         cachefs_cd_release(fscp);
6250                         held = 0;
6251                 }
6252                 error = cachefs_cd_access(fscp, connected, 1);
6253                 if (error)
6254                         break;
6255                 held = 1;
6256 
6257                 /* if disconnected, do some extra error checking */
6258                 if (fscp->fs_cdconnected != CFS_CD_CONNECTED) {
6259                         /* check permissions */
6260                         mutex_enter(&dcp->c_statelock);
6261                         error = cachefs_access_local(dcp, (VEXEC|VWRITE), cr);
6262                         mutex_exit(&dcp->c_statelock);
6263                         if (CFS_TIMEOUT(fscp, error)) {
6264                                 connected = 1;
6265                                 continue;
6266                         }
6267                         if (error)
6268                                 break;
6269 
6270                         namlen = strlen(nm);
6271                         if (namlen == 0) {
6272                                 error = EINVAL;
6273                                 break;
6274                         }
6275 
6276                         /* cannot remove . and .. */
6277                         if (nm[0] == '.') {
6278                                 if (namlen == 1) {
6279                                         error = EINVAL;
6280                                         break;
6281                                 } else if (namlen == 2 && nm[1] == '.') {
6282                                         error = EEXIST;
6283                                         break;
6284                                 }
6285                         }
6286 
6287                 }
6288 
6289                 /* get the cnode of the dir to remove */
6290                 error = cachefs_lookup_common(dvp, nm, &vp, NULL, 0, NULL, cr);
6291                 if (error) {
6292                         if (fscp->fs_cdconnected == CFS_CD_CONNECTED) {
6293                                 if (CFS_TIMEOUT(fscp, error)) {
6294                                         cachefs_cd_release(fscp);
6295                                         held = 0;
6296                                         cachefs_cd_timedout(fscp);
6297                                         connected = 0;
6298                                         continue;
6299                                 }
6300                         } else {
6301                                 if (CFS_TIMEOUT(fscp, error)) {
6302                                         connected = 1;
6303                                         continue;
6304                                 }
6305                         }
6306                         break;
6307                 }
6308 
6309                 /* must be a dir */
6310                 if (vp->v_type != VDIR) {
6311                         error = ENOTDIR;
6312                         break;
6313                 }
6314 
6315                 /* must not be current dir */
6316                 if (VOP_CMP(vp, cdir, ct)) {
6317                         error = EINVAL;
6318                         break;
6319                 }
6320 
6321                 /* see ufs_dirremove for why this is done, mount race */
6322                 if (vn_vfswlock(vp)) {
6323                         error = EBUSY;
6324                         break;
6325                 }
6326                 vfslock = 1;
6327                 if (vn_mountedvfs(vp) != NULL) {
6328                         error = EBUSY;
6329                         break;
6330                 }
6331 
6332                 if (fscp->fs_cdconnected == CFS_CD_CONNECTED) {
6333                         error = cachefs_rmdir_connected(dvp, nm, cdir,
6334                             cr, vp);
6335                         if (CFS_TIMEOUT(fscp, error)) {
6336                                 cachefs_cd_release(fscp);
6337                                 held = 0;
6338                                 cachefs_cd_timedout(fscp);
6339                                 connected = 0;
6340                                 continue;
6341                         }
6342                 } else {
6343                         error = cachefs_rmdir_disconnected(dvp, nm, cdir,
6344                             cr, vp);
6345                         if (CFS_TIMEOUT(fscp, error)) {
6346                                 connected = 1;
6347                                 continue;
6348                         }
6349                 }
6350                 break;
6351         }
6352 
6353         if (CACHEFS_LOG_LOGGING(cachep, CACHEFS_LOG_RMDIR)) {
6354                 ino64_t fileno = 0;
6355                 fid_t *fidp = NULL;
6356                 cnode_t *cp = NULL;
6357                 if (vp)
6358                         cp = VTOC(vp);
6359 
6360                 if (cp != NULL) {
6361                         fidp = &cp->c_metadata.md_cookie;
6362                         fileno = cp->c_id.cid_fileno;
6363                 }
6364 
6365                 cachefs_log_rmdir(cachep, error, fscp->fs_cfsvfsp,
6366                     fidp, fileno, crgetuid(cr));
6367         }
6368 
6369         if (held) {
6370                 cachefs_cd_release(fscp);
6371         }
6372 
6373         if (vfslock)
6374                 vn_vfsunlock(vp);
6375 
6376         if (vp)
6377                 VN_RELE(vp);
6378 
6379 #ifdef CFS_CD_DEBUG
6380         ASSERT((curthread->t_flag & T_CD_HELD) == 0);
6381 #endif
6382 out:
6383 #ifdef CFSDEBUG
6384         CFS_DEBUG(CFSDEBUG_VOPS)
6385                 printf("cachefs_rmdir: EXIT error = %d\n", error);
6386 #endif
6387 
6388         return (error);
6389 }
6390 
6391 static int
6392 cachefs_rmdir_connected(vnode_t *dvp, char *nm, vnode_t *cdir, cred_t *cr,
6393     vnode_t *vp)
6394 {
6395         cnode_t *dcp = VTOC(dvp);
6396         cnode_t *cp = VTOC(vp);
6397         int error = 0;
6398         fscache_t *fscp = C_TO_FSCACHE(dcp);
6399 
6400         rw_enter(&dcp->c_rwlock, RW_WRITER);
6401         mutex_enter(&dcp->c_statelock);
6402         mutex_enter(&cp->c_statelock);
6403 
6404         if (dcp->c_backvp == NULL) {
6405                 error = cachefs_getbackvp(fscp, dcp);
6406                 if (error) {
6407                         goto out;
6408                 }
6409         }
6410 
6411         error = CFSOP_CHECK_COBJECT(fscp, dcp, 0, cr);
6412         if (error)
6413                 goto out;
6414 
6415         /* rmdir on the back fs */
6416         CFS_DPRINT_BACKFS_NFSV4(fscp,
6417             ("cachefs_rmdir (nfsv4): dcp %p, dbackvp %p, "
6418             "name %s\n", dcp, dcp->c_backvp, nm));
6419         error = VOP_RMDIR(dcp->c_backvp, nm, cdir, cr, NULL, 0);
6420         if (error)
6421                 goto out;
6422 
6423         /* if the dir is populated, remove the entry from it */
6424         if (CFS_ISFS_NONSHARED(fscp) &&
6425             (dcp->c_metadata.md_flags & MD_POPULATED)) {
6426                 error = cachefs_dir_rmentry(dcp, nm);
6427                 if (error) {
6428                         cachefs_nocache(dcp);
6429                         error = 0;
6430                 }
6431         }
6432 
6433         /*
6434          * *if* the (hard) link count goes to 0, then we set the CDESTROY
6435          * flag on the cnode. The cached object will then be destroyed
6436          * at inactive time where the chickens come home to roost :-)
6437          * The link cnt for directories is bumped down by 2 'cause the "."
6438          * entry has to be elided too ! The link cnt for the parent goes down
6439          * by 1 (because of "..").
6440          */
6441         cp->c_attr.va_nlink -= 2;
6442         dcp->c_attr.va_nlink--;
6443         if (cp->c_attr.va_nlink == 0) {
6444                 cp->c_flags |= CN_DESTROY;
6445         } else {
6446                 cp->c_flags |= CN_UPDATED;
6447         }
6448         dcp->c_flags |= CN_UPDATED;
6449 
6450         dnlc_purge_vp(vp);
6451         CFSOP_MODIFY_COBJECT(fscp, dcp, cr);
6452 
6453 out:
6454         mutex_exit(&cp->c_statelock);
6455         mutex_exit(&dcp->c_statelock);
6456         rw_exit(&dcp->c_rwlock);
6457 
6458         return (error);
6459 }
6460 
6461 static int
6462 /*ARGSUSED*/
6463 cachefs_rmdir_disconnected(vnode_t *dvp, char *nm, vnode_t *cdir,
6464     cred_t *cr, vnode_t *vp)
6465 {
6466         cnode_t *dcp = VTOC(dvp);
6467         cnode_t *cp = VTOC(vp);
6468         fscache_t *fscp = C_TO_FSCACHE(dcp);
6469         int error = 0;
6470         off_t commit = 0;
6471         timestruc_t current_time;
6472 
6473         if (CFS_ISFS_WRITE_AROUND(fscp))
6474                 return (ETIMEDOUT);
6475 
6476         rw_enter(&dcp->c_rwlock, RW_WRITER);
6477         mutex_enter(&dcp->c_statelock);
6478         mutex_enter(&cp->c_statelock);
6479 
6480         /* both directories must be populated */
6481         if (((dcp->c_metadata.md_flags & MD_POPULATED) == 0) ||
6482             ((cp->c_metadata.md_flags & MD_POPULATED) == 0)) {
6483                 error = ETIMEDOUT;
6484                 goto out;
6485         }
6486 
6487         /* if sticky bit set on the dir, more access checks to perform */
6488         if (error = cachefs_stickyrmchk(dcp, cp, cr)) {
6489                 goto out;
6490         }
6491 
6492         /* make sure dir is empty */
6493         if (cp->c_attr.va_nlink > 2) {
6494                 error = cachefs_dir_empty(cp);
6495                 if (error) {
6496                         if (error == ENOTDIR)
6497                                 error = ETIMEDOUT;
6498                         goto out;
6499                 }
6500                 cachefs_modified(cp);
6501         }
6502         cachefs_modified(dcp);
6503 
6504         /* log the operation */
6505         commit = cachefs_dlog_rmdir(fscp, dcp, nm, cp, cr);
6506         if (commit == 0) {
6507                 error = ENOSPC;
6508                 goto out;
6509         }
6510 
6511         /* remove name from parent dir */
6512         error = cachefs_dir_rmentry(dcp, nm);
6513         if (error == ENOTDIR) {
6514                 error = ETIMEDOUT;
6515                 goto out;
6516         }
6517         if (error)
6518                 goto out;
6519 
6520         gethrestime(&current_time);
6521 
6522         /* update deleted dir values */
6523         cp->c_attr.va_nlink -= 2;
6524         if (cp->c_attr.va_nlink == 0)
6525                 cp->c_flags |= CN_DESTROY;
6526         else {
6527                 cp->c_metadata.md_localctime = current_time;
6528                 cp->c_metadata.md_flags |= MD_LOCALCTIME;
6529                 cp->c_flags |= CN_UPDATED;
6530         }
6531 
6532         /* update parent values */
6533         dcp->c_metadata.md_localctime = current_time;
6534         dcp->c_metadata.md_localmtime = current_time;
6535         dcp->c_metadata.md_flags |= MD_LOCALCTIME | MD_LOCALMTIME;
6536         dcp->c_attr.va_nlink--;
6537         dcp->c_flags |= CN_UPDATED;
6538 
6539 out:
6540         mutex_exit(&cp->c_statelock);
6541         mutex_exit(&dcp->c_statelock);
6542         rw_exit(&dcp->c_rwlock);
6543         if (commit) {
6544                 /* commit the log entry */
6545                 if (cachefs_dlog_commit(fscp, commit, error)) {
6546                         /*EMPTY*/
6547                         /* XXX bob: fix on panic */
6548                 }
6549                 dnlc_purge_vp(vp);
6550         }
6551         return (error);
6552 }
6553 
6554 /*ARGSUSED*/
6555 static int
6556 cachefs_symlink(vnode_t *dvp, char *lnm, vattr_t *tva,
6557     char *tnm, cred_t *cr, caller_context_t *ct, int flags)
6558 {
6559         cnode_t *dcp = VTOC(dvp);
6560         fscache_t *fscp = C_TO_FSCACHE(dcp);
6561         cachefscache_t *cachep = fscp->fs_cache;
6562         int error = 0;
6563         int held = 0;
6564         int connected = 0;
6565 
6566 #ifdef CFSDEBUG
6567         CFS_DEBUG(CFSDEBUG_VOPS)
6568                 printf("cachefs_symlink: ENTER dvp %p lnm %s tnm %s\n",
6569                     (void *)dvp, lnm, tnm);
6570 #endif
6571 
6572         if (getzoneid() != GLOBAL_ZONEID) {
6573                 error = EPERM;
6574                 goto out;
6575         }
6576 
6577         if (fscp->fs_cache->c_flags & CACHE_NOCACHE)
6578                 ASSERT(dcp->c_flags & CN_NOCACHE);
6579 
6580         /*
6581          * Cachefs only provides pass-through support for NFSv4,
6582          * and all vnode operations are passed through to the
6583          * back file system. For NFSv4 pass-through to work, only
6584          * connected operation is supported, the cnode backvp must
6585          * exist, and cachefs optional (eg., disconnectable) flags
6586          * are turned off. Assert these conditions to ensure that
6587          * the backfilesystem is called for the symlink operation.
6588          */
6589         CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
6590         CFS_BACKFS_NFSV4_ASSERT_CNODE(dcp);
6591 
6592         for (;;) {
6593                 /* get (or renew) access to the file system */
6594                 if (held) {
6595                         /* Won't loop with NFSv4 connected behavior */
6596                         ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
6597                         rw_exit(&dcp->c_rwlock);
6598                         cachefs_cd_release(fscp);
6599                         held = 0;
6600                 }
6601                 error = cachefs_cd_access(fscp, connected, 1);
6602                 if (error)
6603                         break;
6604                 rw_enter(&dcp->c_rwlock, RW_WRITER);
6605                 held = 1;
6606 
6607                 if (fscp->fs_cdconnected == CFS_CD_CONNECTED) {
6608                         error = cachefs_symlink_connected(dvp, lnm, tva,
6609                             tnm, cr);
6610                         if (CFS_TIMEOUT(fscp, error)) {
6611                                 rw_exit(&dcp->c_rwlock);
6612                                 cachefs_cd_release(fscp);
6613                                 held = 0;
6614                                 cachefs_cd_timedout(fscp);
6615                                 connected = 0;
6616                                 continue;
6617                         }
6618                 } else {
6619                         error = cachefs_symlink_disconnected(dvp, lnm, tva,
6620                             tnm, cr);
6621                         if (CFS_TIMEOUT(fscp, error)) {
6622                                 connected = 1;
6623                                 continue;
6624                         }
6625                 }
6626                 break;
6627         }
6628 
6629         if (CACHEFS_LOG_LOGGING(cachep, CACHEFS_LOG_SYMLINK))
6630                 cachefs_log_symlink(cachep, error, fscp->fs_cfsvfsp,
6631                     &dcp->c_metadata.md_cookie, dcp->c_id.cid_fileno,
6632                     crgetuid(cr), (uint_t)strlen(tnm));
6633 
6634         if (held) {
6635                 rw_exit(&dcp->c_rwlock);
6636                 cachefs_cd_release(fscp);
6637         }
6638 
6639 #ifdef CFS_CD_DEBUG
6640         ASSERT((curthread->t_flag & T_CD_HELD) == 0);
6641 #endif
6642 out:
6643 #ifdef CFSDEBUG
6644         CFS_DEBUG(CFSDEBUG_VOPS)
6645                 printf("cachefs_symlink: EXIT error = %d\n", error);
6646 #endif
6647         return (error);
6648 }
6649 
6650 static int
6651 cachefs_symlink_connected(vnode_t *dvp, char *lnm, vattr_t *tva,
6652     char *tnm, cred_t *cr)
6653 {
6654         cnode_t *dcp = VTOC(dvp);
6655         fscache_t *fscp = C_TO_FSCACHE(dcp);
6656         int error = 0;
6657         vnode_t *backvp = NULL;
6658         cnode_t *newcp = NULL;
6659         struct vattr va;
6660         struct fid cookie;
6661         cfs_cid_t cid;
6662         uint32_t valid_fid;
6663 
6664         mutex_enter(&dcp->c_statelock);
6665 
6666         if (dcp->c_backvp == NULL) {
6667                 error = cachefs_getbackvp(fscp, dcp);
6668                 if (error) {
6669                         cachefs_nocache(dcp);
6670                         mutex_exit(&dcp->c_statelock);
6671                         goto out;
6672                 }
6673         }
6674 
6675         error = CFSOP_CHECK_COBJECT(fscp, dcp, 0, cr);
6676         if (error) {
6677                 mutex_exit(&dcp->c_statelock);
6678                 goto out;
6679         }
6680         CFS_DPRINT_BACKFS_NFSV4(fscp,
6681             ("cachefs_symlink (nfsv4): dcp %p, dbackvp %p, "
6682             "lnm %s, tnm %s\n", dcp, dcp->c_backvp, lnm, tnm));
6683         error = VOP_SYMLINK(dcp->c_backvp, lnm, tva, tnm, cr, NULL, 0);
6684         if (error) {
6685                 mutex_exit(&dcp->c_statelock);
6686                 goto out;
6687         }
6688         if ((dcp->c_filegrp->fg_flags & CFS_FG_WRITE) == 0 &&
6689             !CFS_ISFS_BACKFS_NFSV4(fscp)) {
6690                 cachefs_nocache(dcp);
6691                 mutex_exit(&dcp->c_statelock);
6692                 goto out;
6693         }
6694 
6695         CFSOP_MODIFY_COBJECT(fscp, dcp, cr);
6696 
6697         /* lookup the symlink we just created and get its fid and attrs */
6698         (void) VOP_LOOKUP(dcp->c_backvp, lnm, &backvp, NULL, 0, NULL, cr,
6699             NULL, NULL, NULL);
6700         if (backvp == NULL) {
6701                 if (CFS_ISFS_BACKFS_NFSV4(fscp) == 0)
6702                         cachefs_nocache(dcp);
6703                 mutex_exit(&dcp->c_statelock);
6704                 goto out;
6705         }
6706 
6707         valid_fid = (CFS_ISFS_BACKFS_NFSV4(fscp) ? FALSE : TRUE);
6708         error = cachefs_getcookie(backvp, &cookie, &va, cr, valid_fid);
6709         if (error) {
6710                 ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
6711                 error = 0;
6712                 cachefs_nocache(dcp);
6713                 mutex_exit(&dcp->c_statelock);
6714                 goto out;
6715         }
6716         cid.cid_fileno = va.va_nodeid;
6717         cid.cid_flags = 0;
6718 
6719         /* if the dir is cached, add the symlink to it */
6720         if (CFS_ISFS_NONSHARED(fscp) &&
6721             (dcp->c_metadata.md_flags & MD_POPULATED)) {
6722                 error = cachefs_dir_enter(dcp, lnm, &cookie, &cid, SM_ASYNC);
6723                 if (error) {
6724                         cachefs_nocache(dcp);
6725                         error = 0;
6726                 }
6727         }
6728         mutex_exit(&dcp->c_statelock);
6729 
6730         /* make the cnode for the sym link */
6731         error = cachefs_cnode_make(&cid, fscp, (valid_fid ? &cookie : NULL),
6732             &va, backvp, cr, 0, &newcp);
6733         if (error) {
6734                 ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
6735                 cachefs_nocache(dcp);
6736                 error = 0;
6737                 goto out;
6738         }
6739 
6740         /* try to cache the symlink contents */
6741         rw_enter(&newcp->c_rwlock, RW_WRITER);
6742         mutex_enter(&newcp->c_statelock);
6743 
6744         /*
6745          * try to cache the sym link, note that its a noop if NOCACHE
6746          * or NFSv4 is set
6747          */
6748         error = cachefs_stuffsymlink(newcp, tnm, (int)newcp->c_size);
6749         if (error) {
6750                 cachefs_nocache(newcp);
6751                 error = 0;
6752         }
6753         mutex_exit(&newcp->c_statelock);
6754         rw_exit(&newcp->c_rwlock);
6755 
6756 out:
6757         if (backvp)
6758                 VN_RELE(backvp);
6759         if (newcp)
6760                 VN_RELE(CTOV(newcp));
6761         return (error);
6762 }
6763 
6764 static int
6765 cachefs_symlink_disconnected(vnode_t *dvp, char *lnm, vattr_t *tva,
6766     char *tnm, cred_t *cr)
6767 {
6768         cnode_t *dcp = VTOC(dvp);
6769         fscache_t *fscp = C_TO_FSCACHE(dcp);
6770         int error;
6771         cnode_t *newcp = NULL;
6772         struct vattr va;
6773         timestruc_t current_time;
6774         off_t commit = 0;
6775 
6776         if (CFS_ISFS_WRITE_AROUND(fscp))
6777                 return (ETIMEDOUT);
6778 
6779         mutex_enter(&dcp->c_statelock);
6780 
6781         /* check permissions */
6782         if (error = cachefs_access_local(dcp, (VEXEC|VWRITE), cr)) {
6783                 mutex_exit(&dcp->c_statelock);
6784                 goto out;
6785         }
6786 
6787         /* the directory front file must be populated */
6788         if ((dcp->c_metadata.md_flags & MD_POPULATED) == 0) {
6789                 error = ETIMEDOUT;
6790                 mutex_exit(&dcp->c_statelock);
6791                 goto out;
6792         }
6793 
6794         /* make sure lnm does not already exist in the directory */
6795         error = cachefs_dir_look(dcp, lnm, NULL, NULL, NULL, NULL);
6796         if (error == ENOTDIR) {
6797                 error = ETIMEDOUT;
6798                 mutex_exit(&dcp->c_statelock);
6799                 goto out;
6800         }
6801         if (error != ENOENT) {
6802                 error = EEXIST;
6803                 mutex_exit(&dcp->c_statelock);
6804                 goto out;
6805         }
6806 
6807         /* make up a reasonable set of attributes */
6808         cachefs_attr_setup(tva, &va, dcp, cr);
6809         va.va_type = VLNK;
6810         va.va_mode |= S_IFLNK;
6811         va.va_size = strlen(tnm);
6812 
6813         mutex_exit(&dcp->c_statelock);
6814 
6815         /* create the cnode */
6816         error = cachefs_cnode_create(fscp, &va, 0, &newcp);
6817         if (error)
6818                 goto out;
6819 
6820         rw_enter(&newcp->c_rwlock, RW_WRITER);
6821         mutex_enter(&newcp->c_statelock);
6822 
6823         error = cachefs_dlog_cidmap(fscp);
6824         if (error) {
6825                 mutex_exit(&newcp->c_statelock);
6826                 rw_exit(&newcp->c_rwlock);
6827                 error = ENOSPC;
6828                 goto out;
6829         }
6830 
6831         cachefs_creategid(dcp, newcp, tva, cr);
6832         mutex_enter(&dcp->c_statelock);
6833         cachefs_createacl(dcp, newcp);
6834         mutex_exit(&dcp->c_statelock);
6835         gethrestime(&current_time);
6836         newcp->c_metadata.md_vattr.va_atime = current_time;
6837         newcp->c_metadata.md_localctime = current_time;
6838         newcp->c_metadata.md_localmtime = current_time;
6839         newcp->c_metadata.md_flags |= MD_MAPPING | MD_LOCALMTIME |
6840             MD_LOCALCTIME;
6841         newcp->c_flags |= CN_UPDATED;
6842 
6843         /* log the operation */
6844         commit = cachefs_dlog_symlink(fscp, dcp, newcp, lnm, tva, tnm, cr);
6845         if (commit == 0) {
6846                 mutex_exit(&newcp->c_statelock);
6847                 rw_exit(&newcp->c_rwlock);
6848                 error = ENOSPC;
6849                 goto out;
6850         }
6851 
6852         /* store the symlink contents */
6853         error = cachefs_stuffsymlink(newcp, tnm, (int)newcp->c_size);
6854         if (error) {
6855                 mutex_exit(&newcp->c_statelock);
6856                 rw_exit(&newcp->c_rwlock);
6857                 goto out;
6858         }
6859         if (cachefs_modified_alloc(newcp)) {
6860                 mutex_exit(&newcp->c_statelock);
6861                 rw_exit(&newcp->c_rwlock);
6862                 error = ENOSPC;
6863                 goto out;
6864         }
6865 
6866         /*
6867          * write the metadata now rather than waiting until
6868          * inactive so that if there's no space we can let
6869          * the caller know.
6870          */
6871         if (newcp->c_flags & CN_ALLOC_PENDING) {
6872                 if (newcp->c_filegrp->fg_flags & CFS_FG_ALLOC_ATTR) {
6873                         (void) filegrp_allocattr(newcp->c_filegrp);
6874                 }
6875                 error = filegrp_create_metadata(newcp->c_filegrp,
6876                     &newcp->c_metadata, &newcp->c_id);
6877                 if (error) {
6878                         mutex_exit(&newcp->c_statelock);
6879                         rw_exit(&newcp->c_rwlock);
6880                         goto out;
6881                 }
6882                 newcp->c_flags &= ~CN_ALLOC_PENDING;
6883         }
6884         error = filegrp_write_metadata(newcp->c_filegrp,
6885             &newcp->c_id, &newcp->c_metadata);
6886         if (error) {
6887                 mutex_exit(&newcp->c_statelock);
6888                 rw_exit(&newcp->c_rwlock);
6889                 goto out;
6890         }
6891         mutex_exit(&newcp->c_statelock);
6892         rw_exit(&newcp->c_rwlock);
6893 
6894         mutex_enter(&dcp->c_statelock);
6895 
6896         /* enter the new file in the directory */
6897         if ((dcp->c_metadata.md_flags & MD_POPULATED) == 0) {
6898                 error = ETIMEDOUT;
6899                 mutex_exit(&dcp->c_statelock);
6900                 goto out;
6901         }
6902         cachefs_modified(dcp);
6903         error = cachefs_dir_enter(dcp, lnm, &newcp->c_metadata.md_cookie,
6904             &newcp->c_id, SM_ASYNC);
6905         if (error) {
6906                 mutex_exit(&dcp->c_statelock);
6907                 goto out;
6908         }
6909 
6910         /* update parent dir times */
6911         dcp->c_metadata.md_localctime = current_time;
6912         dcp->c_metadata.md_localmtime = current_time;
6913         dcp->c_metadata.md_flags |= MD_LOCALMTIME | MD_LOCALCTIME;
6914         dcp->c_flags |= CN_UPDATED;
6915         mutex_exit(&dcp->c_statelock);
6916 
6917 out:
6918         if (commit) {
6919                 /* commit the log entry */
6920                 if (cachefs_dlog_commit(fscp, commit, error)) {
6921                         /*EMPTY*/
6922                         /* XXX bob: fix on panic */
6923                 }
6924         }
6925 
6926         if (error) {
6927                 if (newcp) {
6928                         mutex_enter(&newcp->c_statelock);
6929                         newcp->c_flags |= CN_DESTROY;
6930                         mutex_exit(&newcp->c_statelock);
6931                 }
6932         }
6933         if (newcp) {
6934                 VN_RELE(CTOV(newcp));
6935         }
6936 
6937         return (error);
6938 }
6939 
6940 /*ARGSUSED*/
6941 static int
6942 cachefs_readdir(vnode_t *vp, uio_t *uiop, cred_t *cr, int *eofp,
6943     caller_context_t *ct, int flags)
6944 {
6945         cnode_t *dcp = VTOC(vp);
6946         fscache_t *fscp = C_TO_FSCACHE(dcp);
6947         cachefscache_t *cachep = fscp->fs_cache;
6948         int error = 0;
6949         int held = 0;
6950         int connected = 0;
6951 
6952 #ifdef CFSDEBUG
6953         CFS_DEBUG(CFSDEBUG_VOPS)
6954                 printf("cachefs_readdir: ENTER vp %p\n", (void *)vp);
6955 #endif
6956         if (getzoneid() != GLOBAL_ZONEID) {
6957                 error = EPERM;
6958                 goto out;
6959         }
6960 
6961         /*
6962          * Cachefs only provides pass-through support for NFSv4,
6963          * and all vnode operations are passed through to the
6964          * back file system. For NFSv4 pass-through to work, only
6965          * connected operation is supported, the cnode backvp must
6966          * exist, and cachefs optional (eg., disconnectable) flags
6967          * are turned off. Assert these conditions to ensure that
6968          * the backfilesystem is called for the readdir operation.
6969          */
6970         CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
6971         CFS_BACKFS_NFSV4_ASSERT_CNODE(dcp);
6972 
6973         for (;;) {
6974                 /* get (or renew) access to the file system */
6975                 if (held) {
6976                         /* Won't loop with NFSv4 connected behavior */
6977                         ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
6978                         rw_exit(&dcp->c_rwlock);
6979                         cachefs_cd_release(fscp);
6980                         held = 0;
6981                 }
6982                 error = cachefs_cd_access(fscp, connected, 0);
6983                 if (error)
6984                         break;
6985                 rw_enter(&dcp->c_rwlock, RW_READER);
6986                 held = 1;
6987 
6988                 /* quit if link count of zero (posix) */
6989                 if (dcp->c_attr.va_nlink == 0) {
6990                         if (eofp)
6991                                 *eofp = 1;
6992                         error = 0;
6993                         break;
6994                 }
6995 
6996                 if (fscp->fs_cdconnected == CFS_CD_CONNECTED) {
6997                         error = cachefs_readdir_connected(vp, uiop, cr,
6998                             eofp);
6999                         if (CFS_TIMEOUT(fscp, error)) {
7000                                 rw_exit(&dcp->c_rwlock);
7001                                 cachefs_cd_release(fscp);
7002                                 held = 0;
7003                                 cachefs_cd_timedout(fscp);
7004                                 connected = 0;
7005                                 continue;
7006                         }
7007                 } else {
7008                         error = cachefs_readdir_disconnected(vp, uiop, cr,
7009                             eofp);
7010                         if (CFS_TIMEOUT(fscp, error)) {
7011                                 if (cachefs_cd_access_miss(fscp)) {
7012                                         error = cachefs_readdir_connected(vp,
7013                                             uiop, cr, eofp);
7014                                         if (!CFS_TIMEOUT(fscp, error))
7015                                                 break;
7016                                         delay(5*hz);
7017                                         connected = 0;
7018                                         continue;
7019                                 }
7020                                 connected = 1;
7021                                 continue;
7022                         }
7023                 }
7024                 break;
7025         }
7026 
7027         if (CACHEFS_LOG_LOGGING(cachep, CACHEFS_LOG_READDIR))
7028                 cachefs_log_readdir(cachep, error, fscp->fs_cfsvfsp,
7029                     &dcp->c_metadata.md_cookie, dcp->c_id.cid_fileno,
7030                     crgetuid(cr), uiop->uio_loffset, *eofp);
7031 
7032         if (held) {
7033                 rw_exit(&dcp->c_rwlock);
7034                 cachefs_cd_release(fscp);
7035         }
7036 
7037 #ifdef CFS_CD_DEBUG
7038         ASSERT((curthread->t_flag & T_CD_HELD) == 0);
7039 #endif
7040 out:
7041 #ifdef CFSDEBUG
7042         CFS_DEBUG(CFSDEBUG_VOPS)
7043                 printf("cachefs_readdir: EXIT error = %d\n", error);
7044 #endif
7045 
7046         return (error);
7047 }
7048 
7049 static int
7050 cachefs_readdir_connected(vnode_t *vp, uio_t *uiop, cred_t *cr, int *eofp)
7051 {
7052         cnode_t *dcp = VTOC(vp);
7053         int error;
7054         fscache_t *fscp = C_TO_FSCACHE(dcp);
7055         struct cachefs_req *rp;
7056 
7057         mutex_enter(&dcp->c_statelock);
7058 
7059         /* check directory consistency */
7060         error = CFSOP_CHECK_COBJECT(fscp, dcp, 0, cr);
7061         if (error)
7062                 goto out;
7063         dcp->c_usage++;
7064 
7065         /* if dir was modified, toss old contents */
7066         if (dcp->c_metadata.md_flags & MD_INVALREADDIR) {
7067                 ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
7068                 cachefs_inval_object(dcp);
7069         }
7070 
7071         error = 0;
7072         if (((dcp->c_metadata.md_flags & MD_POPULATED) == 0) &&
7073             ((dcp->c_flags & (CN_ASYNC_POPULATE | CN_NOCACHE)) == 0) &&
7074             !CFS_ISFS_BACKFS_NFSV4(fscp) &&
7075             (fscp->fs_cdconnected == CFS_CD_CONNECTED)) {
7076 
7077                 if (cachefs_async_okay()) {
7078 
7079                         /*
7080                          * Set up asynchronous request to fill this
7081                          * directory.
7082                          */
7083 
7084                         dcp->c_flags |= CN_ASYNC_POPULATE;
7085 
7086                         rp = kmem_cache_alloc(cachefs_req_cache, KM_SLEEP);
7087                         rp->cfs_cmd = CFS_POPULATE;
7088                         rp->cfs_req_u.cu_populate.cpop_vp = vp;
7089                         rp->cfs_cr = cr;
7090 
7091                         crhold(cr);
7092                         VN_HOLD(vp);
7093 
7094                         cachefs_addqueue(rp, &fscp->fs_workq);
7095                 } else {
7096                         error = cachefs_dir_fill(dcp, cr);
7097                         if (error != 0)
7098                                 cachefs_nocache(dcp);
7099                 }
7100         }
7101 
7102         /* if front file is populated */
7103         if (((dcp->c_flags & (CN_NOCACHE | CN_ASYNC_POPULATE)) == 0) &&
7104             !CFS_ISFS_BACKFS_NFSV4(fscp) &&
7105             (dcp->c_metadata.md_flags & MD_POPULATED)) {
7106                 ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
7107                 error = cachefs_dir_read(dcp, uiop, eofp);
7108                 if (error == 0)
7109                         fscp->fs_stats.st_hits++;
7110         }
7111 
7112         /* if front file could not be used */
7113         if ((error != 0) ||
7114             CFS_ISFS_BACKFS_NFSV4(fscp) ||
7115             (dcp->c_flags & (CN_NOCACHE | CN_ASYNC_POPULATE)) ||
7116             ((dcp->c_metadata.md_flags & MD_POPULATED) == 0)) {
7117 
7118                 if (error && !(dcp->c_flags & CN_NOCACHE) &&
7119                     !CFS_ISFS_BACKFS_NFSV4(fscp))
7120                         cachefs_nocache(dcp);
7121 
7122                 /* get the back vp */
7123                 if (dcp->c_backvp == NULL) {
7124                         error = cachefs_getbackvp(fscp, dcp);
7125                         if (error)
7126                                 goto out;
7127                 }
7128 
7129                 if (fscp->fs_inum_size > 0) {
7130                         error = cachefs_readback_translate(dcp, uiop, cr, eofp);
7131                 } else {
7132                         /* do the dir read from the back fs */
7133                         (void) VOP_RWLOCK(dcp->c_backvp,
7134                             V_WRITELOCK_FALSE, NULL);
7135                         CFS_DPRINT_BACKFS_NFSV4(fscp,
7136                             ("cachefs_readdir (nfsv4): "
7137                             "dcp %p, dbackvp %p\n", dcp, dcp->c_backvp));
7138                         error = VOP_READDIR(dcp->c_backvp, uiop, cr, eofp,
7139                             NULL, 0);
7140                         VOP_RWUNLOCK(dcp->c_backvp, V_WRITELOCK_FALSE, NULL);
7141                 }
7142 
7143                 if (error == 0)
7144                         fscp->fs_stats.st_misses++;
7145         }
7146 
7147 out:
7148         mutex_exit(&dcp->c_statelock);
7149 
7150         return (error);
7151 }
7152 
7153 static int
7154 cachefs_readback_translate(cnode_t *cp, uio_t *uiop, cred_t *cr, int *eofp)
7155 {
7156         int error = 0;
7157         fscache_t *fscp = C_TO_FSCACHE(cp);
7158         caddr_t buffy = NULL;
7159         int buffysize = MAXBSIZE;
7160         caddr_t chrp, end;
7161         ino64_t newinum;
7162         struct dirent64 *de;
7163         uio_t uioin;
7164         iovec_t iov;
7165 
7166         ASSERT(cp->c_backvp != NULL);
7167         ASSERT(fscp->fs_inum_size > 0);
7168 
7169         if (uiop->uio_resid < buffysize)
7170                 buffysize = (int)uiop->uio_resid;
7171         buffy = cachefs_kmem_alloc(buffysize, KM_SLEEP);
7172 
7173         iov.iov_base = buffy;
7174         iov.iov_len = buffysize;
7175         uioin.uio_iov = &iov;
7176         uioin.uio_iovcnt = 1;
7177         uioin.uio_segflg = UIO_SYSSPACE;
7178         uioin.uio_fmode = 0;
7179         uioin.uio_extflg = UIO_COPY_CACHED;
7180         uioin.uio_loffset = uiop->uio_loffset;
7181         uioin.uio_resid = buffysize;
7182 
7183         (void) VOP_RWLOCK(cp->c_backvp, V_WRITELOCK_FALSE, NULL);
7184         error = VOP_READDIR(cp->c_backvp, &uioin, cr, eofp, NULL, 0);
7185         VOP_RWUNLOCK(cp->c_backvp, V_WRITELOCK_FALSE, NULL);
7186 
7187         if (error != 0)
7188                 goto out;
7189 
7190         end = buffy + buffysize - uioin.uio_resid;
7191 
7192         mutex_exit(&cp->c_statelock);
7193         mutex_enter(&fscp->fs_fslock);
7194 
7195 
7196         for (chrp = buffy; chrp < end; chrp += de->d_reclen) {
7197                 de = (dirent64_t *)chrp;
7198                 newinum = cachefs_inum_real2fake(fscp, de->d_ino);
7199                 if (newinum == 0)
7200                         newinum = cachefs_fileno_conflict(fscp, de->d_ino);
7201                 de->d_ino = newinum;
7202         }
7203         mutex_exit(&fscp->fs_fslock);
7204         mutex_enter(&cp->c_statelock);
7205 
7206         error = uiomove(buffy, end - buffy, UIO_READ, uiop);
7207         uiop->uio_loffset = uioin.uio_loffset;
7208 
7209 out:
7210 
7211         if (buffy != NULL)
7212                 cachefs_kmem_free(buffy, buffysize);
7213 
7214         return (error);
7215 }
7216 
7217 static int
7218 /*ARGSUSED*/
7219 cachefs_readdir_disconnected(vnode_t *vp, uio_t *uiop, cred_t *cr,
7220     int *eofp)
7221 {
7222         cnode_t *dcp = VTOC(vp);
7223         int error;
7224 
7225         mutex_enter(&dcp->c_statelock);
7226         if ((dcp->c_metadata.md_flags & MD_POPULATED) == 0) {
7227                 error = ETIMEDOUT;
7228         } else {
7229                 error = cachefs_dir_read(dcp, uiop, eofp);
7230                 if (error == ENOTDIR)
7231                         error = ETIMEDOUT;
7232         }
7233         mutex_exit(&dcp->c_statelock);
7234 
7235         return (error);
7236 }
7237 
7238 /*ARGSUSED*/
7239 static int
7240 cachefs_fid(struct vnode *vp, struct fid *fidp, caller_context_t *ct)
7241 {
7242         int error = 0;
7243         struct cnode *cp = VTOC(vp);
7244         fscache_t *fscp = C_TO_FSCACHE(cp);
7245 
7246         /*
7247          * Cachefs only provides pass-through support for NFSv4,
7248          * and all vnode operations are passed through to the
7249          * back file system. For NFSv4 pass-through to work, only
7250          * connected operation is supported, the cnode backvp must
7251          * exist, and cachefs optional (eg., disconnectable) flags
7252          * are turned off. Assert these conditions, then bail
7253          * as  NFSv4 doesn't support VOP_FID.
7254          */
7255         CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
7256         CFS_BACKFS_NFSV4_ASSERT_CNODE(cp);
7257         if (CFS_ISFS_BACKFS_NFSV4(fscp)) {
7258                 return (ENOTSUP);
7259         }
7260 
7261         mutex_enter(&cp->c_statelock);
7262         if (fidp->fid_len < cp->c_metadata.md_cookie.fid_len) {
7263                 fidp->fid_len = cp->c_metadata.md_cookie.fid_len;
7264                 error = ENOSPC;
7265         } else {
7266                 bcopy(cp->c_metadata.md_cookie.fid_data, fidp->fid_data,
7267                     cp->c_metadata.md_cookie.fid_len);
7268                 fidp->fid_len = cp->c_metadata.md_cookie.fid_len;
7269         }
7270         mutex_exit(&cp->c_statelock);
7271         return (error);
7272 }
7273 
7274 /* ARGSUSED2 */
7275 static int
7276 cachefs_rwlock(struct vnode *vp, int write_lock, caller_context_t *ctp)
7277 {
7278         cnode_t *cp = VTOC(vp);
7279 
7280         /*
7281          * XXX - This is ifdef'ed out for now. The problem -
7282          * getdents() acquires the read version of rwlock, then we come
7283          * into cachefs_readdir() and that wants to acquire the write version
7284          * of this lock (if its going to populate the directory). This is
7285          * a problem, this can be solved by introducing another lock in the
7286          * cnode.
7287          */
7288 /* XXX */
7289         if (vp->v_type != VREG)
7290                 return (-1);
7291         if (write_lock)
7292                 rw_enter(&cp->c_rwlock, RW_WRITER);
7293         else
7294                 rw_enter(&cp->c_rwlock, RW_READER);
7295         return (write_lock);
7296 }
7297 
7298 /* ARGSUSED */
7299 static void
7300 cachefs_rwunlock(struct vnode *vp, int write_lock, caller_context_t *ctp)
7301 {
7302         cnode_t *cp = VTOC(vp);
7303         if (vp->v_type != VREG)
7304                 return;
7305         rw_exit(&cp->c_rwlock);
7306 }
7307 
7308 /* ARGSUSED */
7309 static int
7310 cachefs_seek(struct vnode *vp, offset_t ooff, offset_t *noffp,
7311     caller_context_t *ct)
7312 {
7313         return (0);
7314 }
7315 
7316 static int cachefs_lostpage = 0;
7317 /*
7318  * Return all the pages from [off..off+len] in file
7319  */
7320 /*ARGSUSED*/
7321 static int
7322 cachefs_getpage(struct vnode *vp, offset_t off, size_t len,
7323         uint_t *protp, struct page *pl[], size_t plsz, struct seg *seg,
7324         caddr_t addr, enum seg_rw rw, cred_t *cr, caller_context_t *ct)
7325 {
7326         cnode_t *cp = VTOC(vp);
7327         int error;
7328         fscache_t *fscp = C_TO_FSCACHE(cp);
7329         cachefscache_t *cachep = fscp->fs_cache;
7330         int held = 0;
7331         int connected = 0;
7332 
7333 #ifdef CFSDEBUG
7334         u_offset_t offx = (u_offset_t)off;
7335 
7336         CFS_DEBUG(CFSDEBUG_VOPS)
7337                 printf("cachefs_getpage: ENTER vp %p off %lld len %lu rw %d\n",
7338                     (void *)vp, offx, len, rw);
7339 #endif
7340         if (getzoneid() != GLOBAL_ZONEID) {
7341                 error = EPERM;
7342                 goto out;
7343         }
7344 
7345         if (vp->v_flag & VNOMAP) {
7346                 error = ENOSYS;
7347                 goto out;
7348         }
7349 
7350         /* Call backfilesystem if NFSv4 */
7351         if (CFS_ISFS_BACKFS_NFSV4(fscp)) {
7352                 error = cachefs_getpage_backfs_nfsv4(vp, off, len, protp, pl,
7353                     plsz, seg, addr, rw, cr);
7354                 goto out;
7355         }
7356 
7357         /* XXX sam: make this do an async populate? */
7358         if (pl == NULL) {
7359                 error = 0;
7360                 goto out;
7361         }
7362         if (protp != NULL)
7363                 *protp = PROT_ALL;
7364 
7365         for (;;) {
7366                 /* get (or renew) access to the file system */
7367                 if (held) {
7368                         cachefs_cd_release(fscp);
7369                         held = 0;
7370                 }
7371                 error = cachefs_cd_access(fscp, connected, 0);
7372                 if (error)
7373                         break;
7374                 held = 1;
7375 
7376                 /*
7377                  * If we are getting called as a side effect of a
7378                  * cachefs_write()
7379                  * operation the local file size might not be extended yet.
7380                  * In this case we want to be able to return pages of zeroes.
7381                  */
7382                 if ((u_offset_t)off + len >
7383                     ((cp->c_size + PAGEOFFSET) & (offset_t)PAGEMASK)) {
7384                         if (seg != segkmap) {
7385                                 error = EFAULT;
7386                                 break;
7387                         }
7388                 }
7389                 error = pvn_getpages(cachefs_getapage, vp, (u_offset_t)off,
7390                     len, protp, pl, plsz, seg, addr, rw, cr);
7391                 if (error == 0)
7392                         break;
7393 
7394                 if (((cp->c_flags & CN_NOCACHE) && (error == ENOSPC)) ||
7395                     error == EAGAIN) {
7396                         connected = 0;
7397                         continue;
7398                 }
7399                 if (fscp->fs_cdconnected == CFS_CD_CONNECTED) {
7400                         if (CFS_TIMEOUT(fscp, error)) {
7401                                 cachefs_cd_release(fscp);
7402                                 held = 0;
7403                                 cachefs_cd_timedout(fscp);
7404                                 connected = 0;
7405                                 continue;
7406                         }
7407                 } else {
7408                         if (CFS_TIMEOUT(fscp, error)) {
7409                                 if (cachefs_cd_access_miss(fscp)) {
7410                                         error = pvn_getpages(
7411                                             cachefs_getapage_back, vp,
7412                                             (u_offset_t)off, len, protp, pl,
7413                                             plsz, seg, addr, rw, cr);
7414                                         if (!CFS_TIMEOUT(fscp, error) &&
7415                                             (error != EAGAIN))
7416                                                 break;
7417                                         delay(5*hz);
7418                                         connected = 0;
7419                                         continue;
7420                                 }
7421                                 connected = 1;
7422                                 continue;
7423                         }
7424                 }
7425                 break;
7426         }
7427 
7428         if (CACHEFS_LOG_LOGGING(cachep, CACHEFS_LOG_GETPAGE))
7429                 cachefs_log_getpage(cachep, error, vp->v_vfsp,
7430                     &cp->c_metadata.md_cookie, cp->c_id.cid_fileno,
7431                     crgetuid(cr), off, len);
7432 
7433         if (held) {
7434                 cachefs_cd_release(fscp);
7435         }
7436 
7437 out:
7438 #ifdef CFS_CD_DEBUG
7439         ASSERT((curthread->t_flag & T_CD_HELD) == 0);
7440 #endif
7441 #ifdef CFSDEBUG
7442         CFS_DEBUG(CFSDEBUG_VOPS)
7443                 printf("cachefs_getpage: EXIT vp %p error %d\n",
7444                     (void *)vp, error);
7445 #endif
7446         return (error);
7447 }
7448 
7449 /*
7450  * cachefs_getpage_backfs_nfsv4
7451  *
7452  * Call NFSv4 back filesystem to handle the getpage (cachefs
7453  * pass-through support for NFSv4).
7454  */
7455 static int
7456 cachefs_getpage_backfs_nfsv4(struct vnode *vp, offset_t off, size_t len,
7457                         uint_t *protp, struct page *pl[], size_t plsz,
7458                         struct seg *seg, caddr_t addr, enum seg_rw rw,
7459                         cred_t *cr)
7460 {
7461         cnode_t *cp = VTOC(vp);
7462         fscache_t *fscp = C_TO_FSCACHE(cp);
7463         vnode_t *backvp;
7464         int error;
7465 
7466         /*
7467          * For NFSv4 pass-through to work, only connected operation is
7468          * supported, the cnode backvp must exist, and cachefs optional
7469          * (eg., disconnectable) flags are turned off. Assert these
7470          * conditions for the getpage operation.
7471          */
7472         CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
7473         CFS_BACKFS_NFSV4_ASSERT_CNODE(cp);
7474 
7475         /* Call backfs vnode op after extracting backvp */
7476         mutex_enter(&cp->c_statelock);
7477         backvp = cp->c_backvp;
7478         mutex_exit(&cp->c_statelock);
7479 
7480         CFS_DPRINT_BACKFS_NFSV4(fscp,
7481             ("cachefs_getpage_backfs_nfsv4: cnode %p, backvp %p\n",
7482             cp, backvp));
7483         error = VOP_GETPAGE(backvp, off, len, protp, pl, plsz, seg,
7484             addr, rw, cr, NULL);
7485 
7486         return (error);
7487 }
7488 
7489 /*
7490  * Called from pvn_getpages to get a particular page.
7491  */
7492 /*ARGSUSED*/
7493 static int
7494 cachefs_getapage(struct vnode *vp, u_offset_t off, size_t len, uint_t *protp,
7495         struct page *pl[], size_t plsz, struct seg *seg, caddr_t addr,
7496         enum seg_rw rw, cred_t *cr)
7497 {
7498         cnode_t *cp = VTOC(vp);
7499         page_t **ppp, *pp = NULL;
7500         fscache_t *fscp = C_TO_FSCACHE(cp);
7501         cachefscache_t *cachep = fscp->fs_cache;
7502         int error = 0;
7503         struct page **ourpl;
7504         struct page *ourstackpl[17]; /* see ASSERT() below for 17 */
7505         int index = 0;
7506         int downgrade;
7507         int have_statelock = 0;
7508         u_offset_t popoff;
7509         size_t popsize = 0;
7510 
7511         /*LINTED*/
7512         ASSERT(((DEF_POP_SIZE / PAGESIZE) + 1) <= 17);
7513 
7514         if (fscp->fs_info.fi_popsize > DEF_POP_SIZE)
7515                 ourpl = cachefs_kmem_alloc(sizeof (struct page *) *
7516                     ((fscp->fs_info.fi_popsize / PAGESIZE) + 1), KM_SLEEP);
7517         else
7518                 ourpl = ourstackpl;
7519 
7520         ourpl[0] = NULL;
7521         off = off & (offset_t)PAGEMASK;
7522 again:
7523         /*
7524          * Look for the page
7525          */
7526         if (page_exists(vp, off) == 0) {
7527                 /*
7528                  * Need to do work to get the page.
7529                  * Grab our lock because we are going to
7530                  * modify the state of the cnode.
7531                  */
7532                 if (! have_statelock) {
7533                         mutex_enter(&cp->c_statelock);
7534                         have_statelock = 1;
7535                 }
7536                 /*
7537                  * If we're in NOCACHE mode, we will need a backvp
7538                  */
7539                 if (cp->c_flags & CN_NOCACHE) {
7540                         if (fscp->fs_cdconnected != CFS_CD_CONNECTED) {
7541                                 error = ETIMEDOUT;
7542                                 goto out;
7543                         }
7544                         if (cp->c_backvp == NULL) {
7545                                 error = cachefs_getbackvp(fscp, cp);
7546                                 if (error)
7547                                         goto out;
7548                         }
7549                         error = VOP_GETPAGE(cp->c_backvp, off,
7550                             PAGESIZE, protp, ourpl, PAGESIZE, seg,
7551                             addr, S_READ, cr, NULL);
7552                         /*
7553                          * backfs returns EFAULT when we are trying for a
7554                          * page beyond EOF but cachefs has the knowledge that
7555                          * it is not beyond EOF be cause cp->c_size is
7556                          * greater then the offset requested.
7557                          */
7558                         if (error == EFAULT) {
7559                                 error = 0;
7560                                 pp = page_create_va(vp, off, PAGESIZE,
7561                                     PG_EXCL | PG_WAIT, seg, addr);
7562                                 if (pp == NULL)
7563                                         goto again;
7564                                 pagezero(pp, 0, PAGESIZE);
7565                                 pvn_plist_init(pp, pl, plsz, off, PAGESIZE, rw);
7566                                 goto out;
7567                         }
7568                         if (error)
7569                                 goto out;
7570                         goto getpages;
7571                 }
7572                 /*
7573                  * We need a front file. If we can't get it,
7574                  * put the cnode in NOCACHE mode and try again.
7575                  */
7576                 if (cp->c_frontvp == NULL) {
7577                         error = cachefs_getfrontfile(cp);
7578                         if (error) {
7579                                 cachefs_nocache(cp);
7580                                 error = EAGAIN;
7581                                 goto out;
7582                         }
7583                 }
7584                 /*
7585                  * Check if the front file needs population.
7586                  * If population is necessary, make sure we have a
7587                  * backvp as well. We will get the page from the backvp.
7588                  * bug 4152459-
7589                  * But if the file system is in disconnected mode
7590                  * and the file is a local file then do not check the
7591                  * allocmap.
7592                  */
7593                 if (((fscp->fs_cdconnected == CFS_CD_CONNECTED) ||
7594                     ((cp->c_metadata.md_flags & MD_LOCALFILENO) == 0)) &&
7595                     (cachefs_check_allocmap(cp, off) == 0)) {
7596                         if (fscp->fs_cdconnected != CFS_CD_CONNECTED) {
7597                                 error = ETIMEDOUT;
7598                                 goto out;
7599                         }
7600                         if (cp->c_backvp == NULL) {
7601                                 error = cachefs_getbackvp(fscp, cp);
7602                                 if (error)
7603                                         goto out;
7604                         }
7605                         if (cp->c_filegrp->fg_flags & CFS_FG_WRITE) {
7606                                 cachefs_cluster_allocmap(off, &popoff,
7607                                     &popsize,
7608                                     fscp->fs_info.fi_popsize, cp);
7609                                 if (popsize != 0) {
7610                                         error = cachefs_populate(cp,
7611                                             popoff, popsize,
7612                                             cp->c_frontvp, cp->c_backvp,
7613                                             cp->c_size, cr);
7614                                         if (error) {
7615                                                 cachefs_nocache(cp);
7616                                                 error = EAGAIN;
7617                                                 goto out;
7618                                         } else {
7619                                                 cp->c_flags |=
7620                                                     CN_UPDATED |
7621                                                     CN_NEED_FRONT_SYNC |
7622                                                     CN_POPULATION_PENDING;
7623                                         }
7624                                         popsize = popsize - (off - popoff);
7625                                 } else {
7626                                         popsize = PAGESIZE;
7627                                 }
7628                         }
7629                         /* else XXX assert CN_NOCACHE? */
7630                         error = VOP_GETPAGE(cp->c_backvp, (offset_t)off,
7631                             PAGESIZE, protp, ourpl, popsize,
7632                             seg, addr, S_READ, cr, NULL);
7633                         if (error)
7634                                 goto out;
7635                         fscp->fs_stats.st_misses++;
7636                 } else {
7637                         if (cp->c_flags & CN_POPULATION_PENDING) {
7638                                 error = VOP_FSYNC(cp->c_frontvp, FSYNC, cr,
7639                                     NULL);
7640                                 cp->c_flags &= ~CN_POPULATION_PENDING;
7641                                 if (error) {
7642                                         cachefs_nocache(cp);
7643                                         error = EAGAIN;
7644                                         goto out;
7645                                 }
7646                         }
7647                         /*
7648                          * File was populated so we get the page from the
7649                          * frontvp
7650                          */
7651                         error = VOP_GETPAGE(cp->c_frontvp, (offset_t)off,
7652                             PAGESIZE, protp, ourpl, PAGESIZE, seg, addr,
7653                             rw, cr, NULL);
7654                         if (CACHEFS_LOG_LOGGING(cachep, CACHEFS_LOG_GPFRONT))
7655                                 cachefs_log_gpfront(cachep, error,
7656                                     fscp->fs_cfsvfsp,
7657                                     &cp->c_metadata.md_cookie, cp->c_fileno,
7658                                     crgetuid(cr), off, PAGESIZE);
7659                         if (error) {
7660                                 cachefs_nocache(cp);
7661                                 error = EAGAIN;
7662                                 goto out;
7663                         }
7664                         fscp->fs_stats.st_hits++;
7665                 }
7666 getpages:
7667                 ASSERT(have_statelock);
7668                 if (have_statelock) {
7669                         mutex_exit(&cp->c_statelock);
7670                         have_statelock = 0;
7671                 }
7672                 downgrade = 0;
7673                 for (ppp = ourpl; *ppp; ppp++) {
7674                         if ((*ppp)->p_offset < off) {
7675                                 index++;
7676                                 page_unlock(*ppp);
7677                                 continue;
7678                         }
7679                         if (PAGE_SHARED(*ppp)) {
7680                                 if (page_tryupgrade(*ppp) == 0) {
7681                                         for (ppp = &ourpl[index]; *ppp; ppp++)
7682                                                 page_unlock(*ppp);
7683                                         error = EAGAIN;
7684                                         goto out;
7685                                 }
7686                                 downgrade = 1;
7687                         }
7688                         ASSERT(PAGE_EXCL(*ppp));
7689                         (void) hat_pageunload((*ppp), HAT_FORCE_PGUNLOAD);
7690                         page_rename(*ppp, vp, (*ppp)->p_offset);
7691                 }
7692                 pl[0] = ourpl[index];
7693                 pl[1] = NULL;
7694                 if (downgrade) {
7695                         page_downgrade(ourpl[index]);
7696                 }
7697                 /* Unlock the rest of the pages from the cluster */
7698                 for (ppp = &ourpl[index+1]; *ppp; ppp++)
7699                         page_unlock(*ppp);
7700         } else {
7701                 ASSERT(! have_statelock);
7702                 if (have_statelock) {
7703                         mutex_exit(&cp->c_statelock);
7704                         have_statelock = 0;
7705                 }
7706                 /* XXX SE_SHARED probably isn't what we *always* want */
7707                 if ((pp = page_lookup(vp, off, SE_SHARED)) == NULL) {
7708                         cachefs_lostpage++;
7709                         goto again;
7710                 }
7711                 pl[0] = pp;
7712                 pl[1] = NULL;
7713                 /* XXX increment st_hits?  i don't think so, but... */
7714         }
7715 
7716 out:
7717         if (have_statelock) {
7718                 mutex_exit(&cp->c_statelock);
7719                 have_statelock = 0;
7720         }
7721         if (fscp->fs_info.fi_popsize > DEF_POP_SIZE)
7722                 cachefs_kmem_free(ourpl, sizeof (struct page *) *
7723                     ((fscp->fs_info.fi_popsize / PAGESIZE) + 1));
7724         return (error);
7725 }
7726 
7727 /* gets a page but only from the back fs */
7728 /*ARGSUSED*/
7729 static int
7730 cachefs_getapage_back(struct vnode *vp, u_offset_t off, size_t len,
7731     uint_t *protp, struct page *pl[], size_t plsz, struct seg *seg,
7732     caddr_t addr, enum seg_rw rw, cred_t *cr)
7733 {
7734         cnode_t *cp = VTOC(vp);
7735         page_t **ppp, *pp = NULL;
7736         fscache_t *fscp = C_TO_FSCACHE(cp);
7737         int error = 0;
7738         struct page *ourpl[17];
7739         int index = 0;
7740         int have_statelock = 0;
7741         int downgrade;
7742 
7743         /*
7744          * Grab the cnode statelock so the cnode state won't change
7745          * while we're in here.
7746          */
7747         ourpl[0] = NULL;
7748         off = off & (offset_t)PAGEMASK;
7749 again:
7750         if (page_exists(vp, off) == 0) {
7751                 if (! have_statelock) {
7752                         mutex_enter(&cp->c_statelock);
7753                         have_statelock = 1;
7754                 }
7755 
7756                 if (cp->c_backvp == NULL) {
7757                         error = cachefs_getbackvp(fscp, cp);
7758                         if (error)
7759                                 goto out;
7760                 }
7761                 error = VOP_GETPAGE(cp->c_backvp, (offset_t)off,
7762                     PAGESIZE, protp, ourpl, PAGESIZE, seg,
7763                     addr, S_READ, cr, NULL);
7764                 if (error)
7765                         goto out;
7766 
7767                 if (have_statelock) {
7768                         mutex_exit(&cp->c_statelock);
7769                         have_statelock = 0;
7770                 }
7771                 downgrade = 0;
7772                 for (ppp = ourpl; *ppp; ppp++) {
7773                         if ((*ppp)->p_offset < off) {
7774                                 index++;
7775                                 page_unlock(*ppp);
7776                                 continue;
7777                         }
7778                         if (PAGE_SHARED(*ppp)) {
7779                                 if (page_tryupgrade(*ppp) == 0) {
7780                                         for (ppp = &ourpl[index]; *ppp; ppp++)
7781                                                 page_unlock(*ppp);
7782                                         error = EAGAIN;
7783                                         goto out;
7784                                 }
7785                                 downgrade = 1;
7786                         }
7787                         ASSERT(PAGE_EXCL(*ppp));
7788                         (void) hat_pageunload((*ppp), HAT_FORCE_PGUNLOAD);
7789                         page_rename(*ppp, vp, (*ppp)->p_offset);
7790                 }
7791                 pl[0] = ourpl[index];
7792                 pl[1] = NULL;
7793                 if (downgrade) {
7794                         page_downgrade(ourpl[index]);
7795                 }
7796                 /* Unlock the rest of the pages from the cluster */
7797                 for (ppp = &ourpl[index+1]; *ppp; ppp++)
7798                         page_unlock(*ppp);
7799         } else {
7800                 ASSERT(! have_statelock);
7801                 if (have_statelock) {
7802                         mutex_exit(&cp->c_statelock);
7803                         have_statelock = 0;
7804                 }
7805                 if ((pp = page_lookup(vp, off, SE_SHARED)) == NULL) {
7806                         cachefs_lostpage++;
7807                         goto again;
7808                 }
7809                 pl[0] = pp;
7810                 pl[1] = NULL;
7811         }
7812 
7813 out:
7814         if (have_statelock) {
7815                 mutex_exit(&cp->c_statelock);
7816                 have_statelock = 0;
7817         }
7818         return (error);
7819 }
7820 
7821 /*ARGSUSED*/
7822 static int
7823 cachefs_putpage(vnode_t *vp, offset_t off, size_t len, int flags, cred_t *cr,
7824     caller_context_t *ct)
7825 {
7826         cnode_t *cp = VTOC(vp);
7827         int error = 0;
7828         fscache_t *fscp = C_TO_FSCACHE(cp);
7829         int held = 0;
7830         int connected = 0;
7831 
7832         if (getzoneid() != GLOBAL_ZONEID)
7833                 return (EPERM);
7834 
7835         /* Call backfilesytem if NFSv4 */
7836         if (CFS_ISFS_BACKFS_NFSV4(fscp)) {
7837                 error = cachefs_putpage_backfs_nfsv4(vp, off, len, flags, cr);
7838                 goto out;
7839         }
7840 
7841         for (;;) {
7842                 /* get (or renew) access to the file system */
7843                 if (held) {
7844                         cachefs_cd_release(fscp);
7845                         held = 0;
7846                 }
7847                 error = cachefs_cd_access(fscp, connected, 1);
7848                 if (error)
7849                         break;
7850                 held = 1;
7851 
7852                 error = cachefs_putpage_common(vp, off, len, flags, cr);
7853                 if (error == 0)
7854                         break;
7855 
7856                 if (fscp->fs_cdconnected == CFS_CD_CONNECTED) {
7857                         if (CFS_TIMEOUT(fscp, error)) {
7858                                 cachefs_cd_release(fscp);
7859                                 held = 0;
7860                                 cachefs_cd_timedout(fscp);
7861                                 connected = 0;
7862                                 continue;
7863                         }
7864                 } else {
7865                         if (NOMEMWAIT()) {
7866                                 error = 0;
7867                                 goto out;
7868                         }
7869                         if (CFS_TIMEOUT(fscp, error)) {
7870                                 connected = 1;
7871                                 continue;
7872                         }
7873                 }
7874                 break;
7875         }
7876 
7877 out:
7878 
7879         if (held) {
7880                 cachefs_cd_release(fscp);
7881         }
7882 
7883 #ifdef CFS_CD_DEBUG
7884         ASSERT((curthread->t_flag & T_CD_HELD) == 0);
7885 #endif
7886         return (error);
7887 }
7888 
7889 /*
7890  * cachefs_putpage_backfs_nfsv4
7891  *
7892  * Call NFSv4 back filesystem to handle the putpage (cachefs
7893  * pass-through support for NFSv4).
7894  */
7895 static int
7896 cachefs_putpage_backfs_nfsv4(vnode_t *vp, offset_t off, size_t len, int flags,
7897                         cred_t *cr)
7898 {
7899         cnode_t *cp = VTOC(vp);
7900         fscache_t *fscp = C_TO_FSCACHE(cp);
7901         vnode_t *backvp;
7902         int error;
7903 
7904         /*
7905          * For NFSv4 pass-through to work, only connected operation is
7906          * supported, the cnode backvp must exist, and cachefs optional
7907          * (eg., disconnectable) flags are turned off. Assert these
7908          * conditions for the putpage operation.
7909          */
7910         CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
7911         CFS_BACKFS_NFSV4_ASSERT_CNODE(cp);
7912 
7913         /* Call backfs vnode op after extracting backvp */
7914         mutex_enter(&cp->c_statelock);
7915         backvp = cp->c_backvp;
7916         mutex_exit(&cp->c_statelock);
7917 
7918         CFS_DPRINT_BACKFS_NFSV4(fscp,
7919             ("cachefs_putpage_backfs_nfsv4: cnode %p, backvp %p\n",
7920             cp, backvp));
7921         error = VOP_PUTPAGE(backvp, off, len, flags, cr, NULL);
7922 
7923         return (error);
7924 }
7925 
7926 /*
7927  * Flags are composed of {B_INVAL, B_FREE, B_DONTNEED, B_FORCE}
7928  * If len == 0, do from off to EOF.
7929  *
7930  * The normal cases should be len == 0 & off == 0 (entire vp list),
7931  * len == MAXBSIZE (from segmap_release actions), and len == PAGESIZE
7932  * (from pageout).
7933  */
7934 
7935 /*ARGSUSED*/
7936 int
7937 cachefs_putpage_common(struct vnode *vp, offset_t off, size_t len,
7938     int flags, cred_t *cr)
7939 {
7940         struct cnode *cp  = VTOC(vp);
7941         struct page *pp;
7942         size_t io_len;
7943         u_offset_t eoff, io_off;
7944         int error = 0;
7945         fscache_t *fscp = C_TO_FSCACHE(cp);
7946         cachefscache_t *cachep = fscp->fs_cache;
7947 
7948         if (len == 0 && (flags & B_INVAL) == 0 && vn_is_readonly(vp)) {
7949                 return (0);
7950         }
7951         if (!vn_has_cached_data(vp) || (off >= cp->c_size &&
7952             (flags & B_INVAL) == 0))
7953                 return (0);
7954 
7955         /*
7956          * Should never have cached data for the cachefs vnode
7957          * if NFSv4 is in use.
7958          */
7959         ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
7960 
7961         /*
7962          * If this is an async putpage let a thread handle it.
7963          */
7964         if (flags & B_ASYNC) {
7965                 struct cachefs_req *rp;
7966                 int tflags = (flags & ~(B_ASYNC|B_DONTNEED));
7967 
7968                 if (ttoproc(curthread) == proc_pageout) {
7969                         /*
7970                          * If this is the page daemon we
7971                          * do the push synchronously (Dangerous!) and hope
7972                          * we can free enough to keep running...
7973                          */
7974                         flags &= ~B_ASYNC;
7975                         goto again;
7976                 }
7977 
7978                 if (! cachefs_async_okay()) {
7979 
7980                         /*
7981                          * this is somewhat like NFS's behavior.  keep
7982                          * the system from thrashing.  we've seen
7983                          * cases where async queues get out of
7984                          * control, especially if
7985                          * madvise(MADV_SEQUENTIAL) is done on a large
7986                          * mmap()ed file that is read sequentially.
7987                          */
7988 
7989                         flags &= ~B_ASYNC;
7990                         goto again;
7991                 }
7992 
7993                 /*
7994                  * if no flags other than B_ASYNC were set,
7995                  * we coalesce putpage requests into a single one for the
7996                  * whole file (len = off = 0).  If such a request is
7997                  * already queued, we're done.
7998                  *
7999                  * If there are other flags set (e.g., B_INVAL), we don't
8000                  * attempt to coalesce and we use the specified length and
8001                  * offset.
8002                  */
8003                 rp = kmem_cache_alloc(cachefs_req_cache, KM_SLEEP);
8004                 mutex_enter(&cp->c_iomutex);
8005                 if ((cp->c_ioflags & CIO_PUTPAGES) == 0 || tflags != 0) {
8006                         rp->cfs_cmd = CFS_PUTPAGE;
8007                         rp->cfs_req_u.cu_putpage.cp_vp = vp;
8008                         if (tflags == 0) {
8009                                 off = len = 0;
8010                                 cp->c_ioflags |= CIO_PUTPAGES;
8011                         }
8012                         rp->cfs_req_u.cu_putpage.cp_off = off;
8013                         rp->cfs_req_u.cu_putpage.cp_len = (uint_t)len;
8014                         rp->cfs_req_u.cu_putpage.cp_flags = flags & ~B_ASYNC;
8015                         rp->cfs_cr = cr;
8016                         crhold(rp->cfs_cr);
8017                         VN_HOLD(vp);
8018                         cp->c_nio++;
8019                         cachefs_addqueue(rp, &(C_TO_FSCACHE(cp)->fs_workq));
8020                 } else {
8021                         kmem_cache_free(cachefs_req_cache, rp);
8022                 }
8023 
8024                 mutex_exit(&cp->c_iomutex);
8025                 return (0);
8026         }
8027 
8028 
8029 again:
8030         if (len == 0) {
8031                 /*
8032                  * Search the entire vp list for pages >= off
8033                  */
8034                 error = pvn_vplist_dirty(vp, off, cachefs_push, flags, cr);
8035         } else {
8036                 /*
8037                  * Do a range from [off...off + len] looking for pages
8038                  * to deal with.
8039                  */
8040                 eoff = (u_offset_t)off + len;
8041                 for (io_off = off; io_off < eoff && io_off < cp->c_size;
8042                     io_off += io_len) {
8043                         /*
8044                          * If we are not invalidating, synchronously
8045                          * freeing or writing pages use the routine
8046                          * page_lookup_nowait() to prevent reclaiming
8047                          * them from the free list.
8048                          */
8049                         if ((flags & B_INVAL) || ((flags & B_ASYNC) == 0)) {
8050                                 pp = page_lookup(vp, io_off,
8051                                     (flags & (B_INVAL | B_FREE)) ?
8052                                     SE_EXCL : SE_SHARED);
8053                         } else {
8054                                 /* XXX this looks like dead code */
8055                                 pp = page_lookup_nowait(vp, io_off,
8056                                     (flags & B_FREE) ? SE_EXCL : SE_SHARED);
8057                         }
8058 
8059                         if (pp == NULL || pvn_getdirty(pp, flags) == 0)
8060                                 io_len = PAGESIZE;
8061                         else {
8062                                 error = cachefs_push(vp, pp, &io_off,
8063                                     &io_len, flags, cr);
8064                                 if (error != 0)
8065                                         break;
8066                                 /*
8067                                  * "io_off" and "io_len" are returned as
8068                                  * the range of pages we actually wrote.
8069                                  * This allows us to skip ahead more quickly
8070                                  * since several pages may've been dealt
8071                                  * with by this iteration of the loop.
8072                                  */
8073                         }
8074                 }
8075         }
8076 
8077         if (error == 0 && off == 0 && (len == 0 || len >= cp->c_size)) {
8078                 cp->c_flags &= ~CDIRTY;
8079         }
8080 
8081         if (CACHEFS_LOG_LOGGING(cachep, CACHEFS_LOG_PUTPAGE))
8082                 cachefs_log_putpage(cachep, error, fscp->fs_cfsvfsp,
8083                     &cp->c_metadata.md_cookie, cp->c_id.cid_fileno,
8084                     crgetuid(cr), off, len);
8085 
8086         return (error);
8087 
8088 }
8089 
8090 /*ARGSUSED*/
8091 static int
8092 cachefs_map(struct vnode *vp, offset_t off, struct as *as, caddr_t *addrp,
8093     size_t len, uchar_t prot, uchar_t maxprot, uint_t flags, cred_t *cr,
8094     caller_context_t *ct)
8095 {
8096         cnode_t *cp = VTOC(vp);
8097         fscache_t *fscp = C_TO_FSCACHE(cp);
8098         struct segvn_crargs vn_a;
8099         int error;
8100         int held = 0;
8101         int writing;
8102         int connected = 0;
8103 
8104 #ifdef CFSDEBUG
8105         u_offset_t offx = (u_offset_t)off;
8106 
8107         CFS_DEBUG(CFSDEBUG_VOPS)
8108                 printf("cachefs_map: ENTER vp %p off %lld len %lu flags %d\n",
8109                     (void *)vp, offx, len, flags);
8110 #endif
8111         if (getzoneid() != GLOBAL_ZONEID) {
8112                 error = EPERM;
8113                 goto out;
8114         }
8115 
8116         if (vp->v_flag & VNOMAP) {
8117                 error = ENOSYS;
8118                 goto out;
8119         }
8120         if (off < 0 || (offset_t)(off + len) < 0) {
8121                 error = ENXIO;
8122                 goto out;
8123         }
8124         if (vp->v_type != VREG) {
8125                 error = ENODEV;
8126                 goto out;
8127         }
8128 
8129         /*
8130          * Check to see if the vnode is currently marked as not cachable.
8131          * If so, we have to refuse the map request as this violates the
8132          * don't cache attribute.
8133          */
8134         if (vp->v_flag & VNOCACHE)
8135                 return (EAGAIN);
8136 
8137 #ifdef OBSOLETE
8138         /*
8139          * If file is being locked, disallow mapping.
8140          */
8141         if (vn_has_flocks(vp)) {
8142                 error = EAGAIN;
8143                 goto out;
8144         }
8145 #endif
8146 
8147         /* call backfilesystem if NFSv4 */
8148         if (CFS_ISFS_BACKFS_NFSV4(fscp)) {
8149                 error = cachefs_map_backfs_nfsv4(vp, off, as, addrp, len, prot,
8150                     maxprot, flags, cr);
8151                 goto out;
8152         }
8153 
8154         writing = (prot & PROT_WRITE && ((flags & MAP_PRIVATE) == 0));
8155 
8156         for (;;) {
8157                 /* get (or renew) access to the file system */
8158                 if (held) {
8159                         cachefs_cd_release(fscp);
8160                         held = 0;
8161                 }
8162                 error = cachefs_cd_access(fscp, connected, writing);
8163                 if (error)
8164                         break;
8165                 held = 1;
8166 
8167                 if (writing) {
8168                         mutex_enter(&cp->c_statelock);
8169                         if (CFS_ISFS_WRITE_AROUND(fscp)) {
8170                                 if (fscp->fs_cdconnected != CFS_CD_CONNECTED) {
8171                                         connected = 1;
8172                                         continue;
8173                                 } else {
8174                                         cachefs_nocache(cp);
8175                                 }
8176                         }
8177 
8178                         /*
8179                          * CN_MAPWRITE is for an optimization in cachefs_delmap.
8180                          * If CN_MAPWRITE is not set then cachefs_delmap does
8181                          * not need to try to push out any pages.
8182                          * This bit gets cleared when the cnode goes inactive.
8183                          */
8184                         cp->c_flags |= CN_MAPWRITE;
8185 
8186                         mutex_exit(&cp->c_statelock);
8187                 }
8188                 break;
8189         }
8190 
8191         if (held) {
8192                 cachefs_cd_release(fscp);
8193         }
8194 
8195         as_rangelock(as);
8196         error = choose_addr(as, addrp, len, off, ADDR_VACALIGN, flags);
8197         if (error != 0) {
8198                 as_rangeunlock(as);
8199                 goto out;
8200         }
8201 
8202         /*
8203          * package up all the data passed in into a segvn_args struct and
8204          * call as_map with segvn_create function to create a new segment
8205          * in the address space.
8206          */
8207         vn_a.vp = vp;
8208         vn_a.offset = off;
8209         vn_a.type = flags & MAP_TYPE;
8210         vn_a.prot = (uchar_t)prot;
8211         vn_a.maxprot = (uchar_t)maxprot;
8212         vn_a.cred = cr;
8213         vn_a.amp = NULL;
8214         vn_a.flags = flags & ~MAP_TYPE;
8215         vn_a.szc = 0;
8216         vn_a.lgrp_mem_policy_flags = 0;
8217         error = as_map(as, *addrp, len, segvn_create, &vn_a);
8218         as_rangeunlock(as);
8219 out:
8220 
8221 #ifdef CFS_CD_DEBUG
8222         ASSERT((curthread->t_flag & T_CD_HELD) == 0);
8223 #endif
8224 #ifdef CFSDEBUG
8225         CFS_DEBUG(CFSDEBUG_VOPS)
8226                 printf("cachefs_map: EXIT vp %p error %d\n", (void *)vp, error);
8227 #endif
8228         return (error);
8229 }
8230 
8231 /*
8232  * cachefs_map_backfs_nfsv4
8233  *
8234  * Call NFSv4 back filesystem to handle the map (cachefs
8235  * pass-through support for NFSv4).
8236  */
8237 static int
8238 cachefs_map_backfs_nfsv4(struct vnode *vp, offset_t off, struct as *as,
8239                         caddr_t *addrp, size_t len, uchar_t prot,
8240                         uchar_t maxprot, uint_t flags, cred_t *cr)
8241 {
8242         cnode_t *cp = VTOC(vp);
8243         fscache_t *fscp = C_TO_FSCACHE(cp);
8244         vnode_t *backvp;
8245         int error;
8246 
8247         /*
8248          * For NFSv4 pass-through to work, only connected operation is
8249          * supported, the cnode backvp must exist, and cachefs optional
8250          * (eg., disconnectable) flags are turned off. Assert these
8251          * conditions for the map operation.
8252          */
8253         CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
8254         CFS_BACKFS_NFSV4_ASSERT_CNODE(cp);
8255 
8256         /* Call backfs vnode op after extracting backvp */
8257         mutex_enter(&cp->c_statelock);
8258         backvp = cp->c_backvp;
8259         mutex_exit(&cp->c_statelock);
8260 
8261         CFS_DPRINT_BACKFS_NFSV4(fscp,
8262             ("cachefs_map_backfs_nfsv4: cnode %p, backvp %p\n",
8263             cp, backvp));
8264         error = VOP_MAP(backvp, off, as, addrp, len, prot, maxprot, flags, cr,
8265             NULL);
8266 
8267         return (error);
8268 }
8269 
8270 /*ARGSUSED*/
8271 static int
8272 cachefs_addmap(struct vnode *vp, offset_t off, struct as *as,
8273     caddr_t addr, size_t len, uchar_t prot, uchar_t maxprot, uint_t flags,
8274     cred_t *cr, caller_context_t *ct)
8275 {
8276         cnode_t *cp = VTOC(vp);
8277         fscache_t *fscp = C_TO_FSCACHE(cp);
8278 
8279         if (getzoneid() != GLOBAL_ZONEID)
8280                 return (EPERM);
8281 
8282         if (vp->v_flag & VNOMAP)
8283                 return (ENOSYS);
8284 
8285         /*
8286          * Check this is not an NFSv4 filesystem, as the mapping
8287          * is not done on the cachefs filesystem if NFSv4 is in
8288          * use.
8289          */
8290         ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
8291 
8292         mutex_enter(&cp->c_statelock);
8293         cp->c_mapcnt += btopr(len);
8294         mutex_exit(&cp->c_statelock);
8295         return (0);
8296 }
8297 
8298 /*ARGSUSED*/
8299 static int
8300 cachefs_delmap(struct vnode *vp, offset_t off, struct as *as,
8301         caddr_t addr, size_t len, uint_t prot, uint_t maxprot, uint_t flags,
8302         cred_t *cr, caller_context_t *ct)
8303 {
8304         cnode_t *cp = VTOC(vp);
8305         fscache_t *fscp = C_TO_FSCACHE(cp);
8306         int error;
8307         int connected = 0;
8308         int held = 0;
8309 
8310         /*
8311          * The file may be passed in to (or inherited into) the zone, so we
8312          * need to let this operation go through since it happens as part of
8313          * exiting.
8314          */
8315         if (vp->v_flag & VNOMAP)
8316                 return (ENOSYS);
8317 
8318         /*
8319          * Check this is not an NFSv4 filesystem, as the mapping
8320          * is not done on the cachefs filesystem if NFSv4 is in
8321          * use.
8322          */
8323         ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
8324 
8325         mutex_enter(&cp->c_statelock);
8326         cp->c_mapcnt -= btopr(len);
8327         ASSERT(cp->c_mapcnt >= 0);
8328         mutex_exit(&cp->c_statelock);
8329 
8330         if (cp->c_mapcnt || !vn_has_cached_data(vp) ||
8331             ((cp->c_flags & CN_MAPWRITE) == 0))
8332                 return (0);
8333 
8334         for (;;) {
8335                 /* get (or renew) access to the file system */
8336                 if (held) {
8337                         cachefs_cd_release(fscp);
8338                         held = 0;
8339                 }
8340                 error = cachefs_cd_access(fscp, connected, 1);
8341                 if (error)
8342                         break;
8343                 held = 1;
8344                 connected = 0;
8345 
8346                 error = cachefs_putpage_common(vp, (offset_t)0,
8347                     (uint_t)0, 0, cr);
8348                 if (CFS_TIMEOUT(fscp, error)) {
8349                         if (fscp->fs_cdconnected == CFS_CD_CONNECTED) {
8350                                 cachefs_cd_release(fscp);
8351                                 held = 0;
8352                                 cachefs_cd_timedout(fscp);
8353                                 continue;
8354                         } else {
8355                                 connected = 1;
8356                                 continue;
8357                         }
8358                 }
8359 
8360                 /* if no space left in cache, wait until connected */
8361                 if ((error == ENOSPC) &&
8362                     (fscp->fs_cdconnected != CFS_CD_CONNECTED)) {
8363                         connected = 1;
8364                         continue;
8365                 }
8366 
8367                 mutex_enter(&cp->c_statelock);
8368                 if (!error)
8369                         error = cp->c_error;
8370                 cp->c_error = 0;
8371                 mutex_exit(&cp->c_statelock);
8372                 break;
8373         }
8374 
8375         if (held)
8376                 cachefs_cd_release(fscp);
8377 
8378 #ifdef CFS_CD_DEBUG
8379         ASSERT((curthread->t_flag & T_CD_HELD) == 0);
8380 #endif
8381         return (error);
8382 }
8383 
8384 /* ARGSUSED */
8385 static int
8386 cachefs_frlock(struct vnode *vp, int cmd, struct flock64 *bfp, int flag,
8387         offset_t offset, struct flk_callback *flk_cbp, cred_t *cr,
8388         caller_context_t *ct)
8389 {
8390         struct cnode *cp = VTOC(vp);
8391         int error;
8392         struct fscache *fscp = C_TO_FSCACHE(cp);
8393         vnode_t *backvp;
8394         int held = 0;
8395         int connected = 0;
8396 
8397         if (getzoneid() != GLOBAL_ZONEID)
8398                 return (EPERM);
8399 
8400         if ((cmd != F_GETLK) && (cmd != F_SETLK) && (cmd != F_SETLKW))
8401                 return (EINVAL);
8402 
8403         /* Disallow locking of files that are currently mapped */
8404         if (((cmd == F_SETLK) || (cmd == F_SETLKW)) && (cp->c_mapcnt > 0)) {
8405                 ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
8406                 return (EAGAIN);
8407         }
8408 
8409         /*
8410          * Cachefs only provides pass-through support for NFSv4,
8411          * and all vnode operations are passed through to the
8412          * back file system. For NFSv4 pass-through to work, only
8413          * connected operation is supported, the cnode backvp must
8414          * exist, and cachefs optional (eg., disconnectable) flags
8415          * are turned off. Assert these conditions to ensure that
8416          * the backfilesystem is called for the frlock operation.
8417          */
8418         CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
8419         CFS_BACKFS_NFSV4_ASSERT_CNODE(cp);
8420 
8421         /* XXX bob: nfs does a bunch more checks than we do */
8422         if (CFS_ISFS_LLOCK(fscp)) {
8423                 ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
8424                 return (fs_frlock(vp, cmd, bfp, flag, offset, flk_cbp, cr, ct));
8425         }
8426 
8427         for (;;) {
8428                 /* get (or renew) access to the file system */
8429                 if (held) {
8430                         /* Won't loop with NFSv4 connected behavior */
8431                         ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
8432                         cachefs_cd_release(fscp);
8433                         held = 0;
8434                 }
8435                 error = cachefs_cd_access(fscp, connected, 0);
8436                 if (error)
8437                         break;
8438                 held = 1;
8439 
8440                 /* if not connected, quit or wait */
8441                 if (fscp->fs_cdconnected != CFS_CD_CONNECTED) {
8442                         connected = 1;
8443                         continue;
8444                 }
8445 
8446                 /* nocache the file */
8447                 if ((cp->c_flags & CN_NOCACHE) == 0 &&
8448                     !CFS_ISFS_BACKFS_NFSV4(fscp)) {
8449                         mutex_enter(&cp->c_statelock);
8450                         cachefs_nocache(cp);
8451                         mutex_exit(&cp->c_statelock);
8452                 }
8453 
8454                 /*
8455                  * XXX bob: probably should do a consistency check
8456                  * Pass arguments unchanged if NFSv4 is the backfs.
8457                  */
8458                 if (bfp->l_whence == 2 && CFS_ISFS_BACKFS_NFSV4(fscp) == 0) {
8459                         bfp->l_start += cp->c_size;
8460                         bfp->l_whence = 0;
8461                 }
8462 
8463                 /* get the back vp */
8464                 mutex_enter(&cp->c_statelock);
8465                 if (cp->c_backvp == NULL) {
8466                         error = cachefs_getbackvp(fscp, cp);
8467                         if (error) {
8468                                 mutex_exit(&cp->c_statelock);
8469                                 break;
8470                         }
8471                 }
8472                 backvp = cp->c_backvp;
8473                 VN_HOLD(backvp);
8474                 mutex_exit(&cp->c_statelock);
8475 
8476                 /*
8477                  * make sure we can flush currently dirty pages before
8478                  * allowing the lock
8479                  */
8480                 if (bfp->l_type != F_UNLCK && cmd != F_GETLK &&
8481                     !CFS_ISFS_BACKFS_NFSV4(fscp)) {
8482                         error = cachefs_putpage(
8483                             vp, (offset_t)0, 0, B_INVAL, cr, ct);
8484                         if (error) {
8485                                 error = ENOLCK;
8486                                 VN_RELE(backvp);
8487                                 break;
8488                         }
8489                 }
8490 
8491                 /* do lock on the back file */
8492                 CFS_DPRINT_BACKFS_NFSV4(fscp,
8493                     ("cachefs_frlock (nfsv4): cp %p, backvp %p\n",
8494                     cp, backvp));
8495                 error = VOP_FRLOCK(backvp, cmd, bfp, flag, offset, NULL, cr,
8496                     ct);
8497                 VN_RELE(backvp);
8498                 if (CFS_TIMEOUT(fscp, error)) {
8499                         connected = 1;
8500                         continue;
8501                 }
8502                 break;
8503         }
8504 
8505         if (held) {
8506                 cachefs_cd_release(fscp);
8507         }
8508 
8509         /*
8510          * If we are setting a lock mark the vnode VNOCACHE so the page
8511          * cache does not give inconsistent results on locked files shared
8512          * between clients.  The VNOCACHE flag is never turned off as long
8513          * as the vnode is active because it is hard to figure out when the
8514          * last lock is gone.
8515          * XXX - what if some already has the vnode mapped in?
8516          * XXX bob: see nfs3_frlock, do not allow locking if vnode mapped in.
8517          */
8518         if ((error == 0) && (bfp->l_type != F_UNLCK) && (cmd != F_GETLK) &&
8519             !CFS_ISFS_BACKFS_NFSV4(fscp))
8520                 vp->v_flag |= VNOCACHE;
8521 
8522 #ifdef CFS_CD_DEBUG
8523         ASSERT((curthread->t_flag & T_CD_HELD) == 0);
8524 #endif
8525         return (error);
8526 }
8527 
8528 /*
8529  * Free storage space associated with the specified vnode.  The portion
8530  * to be freed is specified by bfp->l_start and bfp->l_len (already
8531  * normalized to a "whence" of 0).
8532  *
8533  * This is an experimental facility whose continued existence is not
8534  * guaranteed.  Currently, we only support the special case
8535  * of l_len == 0, meaning free to end of file.
8536  */
8537 /* ARGSUSED */
8538 static int
8539 cachefs_space(struct vnode *vp, int cmd, struct flock64 *bfp, int flag,
8540         offset_t offset, cred_t *cr, caller_context_t *ct)
8541 {
8542         cnode_t *cp = VTOC(vp);
8543         fscache_t *fscp = C_TO_FSCACHE(cp);
8544         int error;
8545 
8546         ASSERT(vp->v_type == VREG);
8547         if (getzoneid() != GLOBAL_ZONEID)
8548                 return (EPERM);
8549         if (cmd != F_FREESP)
8550                 return (EINVAL);
8551 
8552         /* call backfilesystem if NFSv4 */
8553         if (CFS_ISFS_BACKFS_NFSV4(fscp)) {
8554                 error = cachefs_space_backfs_nfsv4(vp, cmd, bfp, flag,
8555                     offset, cr, ct);
8556                 goto out;
8557         }
8558 
8559         if ((error = convoff(vp, bfp, 0, offset)) == 0) {
8560                 ASSERT(bfp->l_start >= 0);
8561                 if (bfp->l_len == 0) {
8562                         struct vattr va;
8563 
8564                         va.va_size = bfp->l_start;
8565                         va.va_mask = AT_SIZE;
8566                         error = cachefs_setattr(vp, &va, 0, cr, ct);
8567                 } else
8568                         error = EINVAL;
8569         }
8570 
8571 out:
8572         return (error);
8573 }
8574 
8575 /*
8576  * cachefs_space_backfs_nfsv4
8577  *
8578  * Call NFSv4 back filesystem to handle the space (cachefs
8579  * pass-through support for NFSv4).
8580  */
8581 static int
8582 cachefs_space_backfs_nfsv4(struct vnode *vp, int cmd, struct flock64 *bfp,
8583                 int flag, offset_t offset, cred_t *cr, caller_context_t *ct)
8584 {
8585         cnode_t *cp = VTOC(vp);
8586         fscache_t *fscp = C_TO_FSCACHE(cp);
8587         vnode_t *backvp;
8588         int error;
8589 
8590         /*
8591          * For NFSv4 pass-through to work, only connected operation is
8592          * supported, the cnode backvp must exist, and cachefs optional
8593          * (eg., disconnectable) flags are turned off. Assert these
8594          * conditions for the space operation.
8595          */
8596         CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
8597         CFS_BACKFS_NFSV4_ASSERT_CNODE(cp);
8598 
8599         /* Call backfs vnode op after extracting backvp */
8600         mutex_enter(&cp->c_statelock);
8601         backvp = cp->c_backvp;
8602         mutex_exit(&cp->c_statelock);
8603 
8604         CFS_DPRINT_BACKFS_NFSV4(fscp,
8605             ("cachefs_space_backfs_nfsv4: cnode %p, backvp %p\n",
8606             cp, backvp));
8607         error = VOP_SPACE(backvp, cmd, bfp, flag, offset, cr, ct);
8608 
8609         return (error);
8610 }
8611 
8612 /*ARGSUSED*/
8613 static int
8614 cachefs_realvp(struct vnode *vp, struct vnode **vpp, caller_context_t *ct)
8615 {
8616         return (EINVAL);
8617 }
8618 
8619 /*ARGSUSED*/
8620 static int
8621 cachefs_pageio(struct vnode *vp, page_t *pp, u_offset_t io_off, size_t io_len,
8622         int flags, cred_t *cr, caller_context_t *ct)
8623 {
8624         return (ENOSYS);
8625 }
8626 
8627 static int
8628 cachefs_setsecattr_connected(cnode_t *cp,
8629     vsecattr_t *vsec, int flag, cred_t *cr)
8630 {
8631         fscache_t *fscp = C_TO_FSCACHE(cp);
8632         int error = 0;
8633 
8634         ASSERT(RW_WRITE_HELD(&cp->c_rwlock));
8635         ASSERT((fscp->fs_info.fi_mntflags & CFS_NOACL) == 0);
8636 
8637         mutex_enter(&cp->c_statelock);
8638 
8639         if (cp->c_backvp == NULL) {
8640                 error = cachefs_getbackvp(fscp, cp);
8641                 if (error) {
8642                         cachefs_nocache(cp);
8643                         goto out;
8644                 }
8645         }
8646 
8647         error = CFSOP_CHECK_COBJECT(fscp, cp, 0, cr);
8648         if (error)
8649                 goto out;
8650 
8651         /* only owner can set acl */
8652         if (cp->c_metadata.md_vattr.va_uid != crgetuid(cr)) {
8653                 error = EINVAL;
8654                 goto out;
8655         }
8656 
8657 
8658         CFS_DPRINT_BACKFS_NFSV4(fscp,
8659             ("cachefs_setsecattr (nfsv4): cp %p, backvp %p",
8660             cp, cp->c_backvp));
8661         error = VOP_SETSECATTR(cp->c_backvp, vsec, flag, cr, NULL);
8662         if (error) {
8663                 goto out;
8664         }
8665 
8666         if ((cp->c_filegrp->fg_flags & CFS_FG_WRITE) == 0 &&
8667             !CFS_ISFS_BACKFS_NFSV4(fscp)) {
8668                 cachefs_nocache(cp);
8669                 goto out;
8670         }
8671 
8672         CFSOP_MODIFY_COBJECT(fscp, cp, cr);
8673 
8674         /* acl may have changed permissions -- handle this. */
8675         if (!CFS_ISFS_BACKFS_NFSV4(fscp))
8676                 cachefs_acl2perm(cp, vsec);
8677 
8678         if ((cp->c_flags & CN_NOCACHE) == 0 &&
8679             !CFS_ISFS_BACKFS_NFSV4(fscp)) {
8680                 error = cachefs_cacheacl(cp, vsec);
8681                 if (error != 0) {
8682 #ifdef CFSDEBUG
8683                         CFS_DEBUG(CFSDEBUG_VOPS)
8684                                 printf("cachefs_setacl: cacheacl: error %d\n",
8685                                     error);
8686 #endif /* CFSDEBUG */
8687                         error = 0;
8688                         cachefs_nocache(cp);
8689                 }
8690         }
8691 
8692 out:
8693         mutex_exit(&cp->c_statelock);
8694 
8695         return (error);
8696 }
8697 
8698 static int
8699 cachefs_setsecattr_disconnected(cnode_t *cp,
8700     vsecattr_t *vsec, int flag, cred_t *cr)
8701 {
8702         fscache_t *fscp = C_TO_FSCACHE(cp);
8703         mode_t failmode = cp->c_metadata.md_vattr.va_mode;
8704         off_t commit = 0;
8705         int error = 0;
8706 
8707         ASSERT((fscp->fs_info.fi_mntflags & CFS_NOACL) == 0);
8708 
8709         if (CFS_ISFS_WRITE_AROUND(fscp))
8710                 return (ETIMEDOUT);
8711 
8712         mutex_enter(&cp->c_statelock);
8713 
8714         /* only owner can set acl */
8715         if (cp->c_metadata.md_vattr.va_uid != crgetuid(cr)) {
8716                 error = EINVAL;
8717                 goto out;
8718         }
8719 
8720         if (cp->c_metadata.md_flags & MD_NEEDATTRS) {
8721                 error = ETIMEDOUT;
8722                 goto out;
8723         }
8724 
8725         /* XXX do i need this?  is this right? */
8726         if (cp->c_flags & CN_ALLOC_PENDING) {
8727                 if (cp->c_filegrp->fg_flags & CFS_FG_ALLOC_ATTR) {
8728                         (void) filegrp_allocattr(cp->c_filegrp);
8729                 }
8730                 error = filegrp_create_metadata(cp->c_filegrp,
8731                     &cp->c_metadata, &cp->c_id);
8732                 if (error) {
8733                         goto out;
8734                 }
8735                 cp->c_flags &= ~CN_ALLOC_PENDING;
8736         }
8737 
8738         /* XXX is this right? */
8739         if ((cp->c_metadata.md_flags & MD_MAPPING) == 0) {
8740                 error = cachefs_dlog_cidmap(fscp);
8741                 if (error) {
8742                         error = ENOSPC;
8743                         goto out;
8744                 }
8745                 cp->c_metadata.md_flags |= MD_MAPPING;
8746                 cp->c_flags |= CN_UPDATED;
8747         }
8748 
8749         commit = cachefs_dlog_setsecattr(fscp, vsec, flag, cp, cr);
8750         if (commit == 0)
8751                 goto out;
8752 
8753         /* fix modes in metadata */
8754         cachefs_acl2perm(cp, vsec);
8755 
8756         if ((cp->c_flags & CN_NOCACHE) == 0) {
8757                 error = cachefs_cacheacl(cp, vsec);
8758                 if (error != 0) {
8759                         goto out;
8760                 }
8761         }
8762 
8763         /* XXX is this right? */
8764         if (cachefs_modified_alloc(cp)) {
8765                 error = ENOSPC;
8766                 goto out;
8767         }
8768 
8769 out:
8770         if (error != 0)
8771                 cp->c_metadata.md_vattr.va_mode = failmode;
8772 
8773         mutex_exit(&cp->c_statelock);
8774 
8775         if (commit) {
8776                 if (cachefs_dlog_commit(fscp, commit, error)) {
8777                         /*EMPTY*/
8778                         /* XXX fix on panic? */
8779                 }
8780         }
8781 
8782         return (error);
8783 }
8784 
8785 /*ARGSUSED*/
8786 static int
8787 cachefs_setsecattr(vnode_t *vp, vsecattr_t *vsec, int flag, cred_t *cr,
8788     caller_context_t *ct)
8789 {
8790         cnode_t *cp = VTOC(vp);
8791         fscache_t *fscp = C_TO_FSCACHE(cp);
8792         int connected = 0;
8793         int held = 0;
8794         int error = 0;
8795 
8796 #ifdef CFSDEBUG
8797         CFS_DEBUG(CFSDEBUG_VOPS)
8798                 printf("cachefs_setsecattr: ENTER vp %p\n", (void *)vp);
8799 #endif
8800         if (getzoneid() != GLOBAL_ZONEID) {
8801                 error = EPERM;
8802                 goto out;
8803         }
8804 
8805         if (fscp->fs_info.fi_mntflags & CFS_NOACL) {
8806                 error = ENOSYS;
8807                 goto out;
8808         }
8809 
8810         if (! cachefs_vtype_aclok(vp)) {
8811                 error = EINVAL;
8812                 goto out;
8813         }
8814 
8815         /*
8816          * Cachefs only provides pass-through support for NFSv4,
8817          * and all vnode operations are passed through to the
8818          * back file system. For NFSv4 pass-through to work, only
8819          * connected operation is supported, the cnode backvp must
8820          * exist, and cachefs optional (eg., disconnectable) flags
8821          * are turned off. Assert these conditions to ensure that
8822          * the backfilesystem is called for the setsecattr operation.
8823          */
8824         CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
8825         CFS_BACKFS_NFSV4_ASSERT_CNODE(cp);
8826 
8827         for (;;) {
8828                 /* drop hold on file system */
8829                 if (held) {
8830                         /* Won't loop with NFSv4 connected operation */
8831                         ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
8832                         cachefs_cd_release(fscp);
8833                         held = 0;
8834                 }
8835 
8836                 /* acquire access to the file system */
8837                 error = cachefs_cd_access(fscp, connected, 1);
8838                 if (error)
8839                         break;
8840                 held = 1;
8841 
8842                 /* perform the setattr */
8843                 if (fscp->fs_cdconnected == CFS_CD_CONNECTED)
8844                         error = cachefs_setsecattr_connected(cp,
8845                             vsec, flag, cr);
8846                 else
8847                         error = cachefs_setsecattr_disconnected(cp,
8848                             vsec, flag, cr);
8849                 if (error) {
8850                         /* if connected */
8851                         if (fscp->fs_cdconnected == CFS_CD_CONNECTED) {
8852                                 if (CFS_TIMEOUT(fscp, error)) {
8853                                         cachefs_cd_release(fscp);
8854                                         held = 0;
8855                                         cachefs_cd_timedout(fscp);
8856                                         connected = 0;
8857                                         continue;
8858                                 }
8859                         }
8860 
8861                         /* else must be disconnected */
8862                         else {
8863                                 if (CFS_TIMEOUT(fscp, error)) {
8864                                         connected = 1;
8865                                         continue;
8866                                 }
8867                         }
8868                 }
8869                 break;
8870         }
8871 
8872         if (held) {
8873                 cachefs_cd_release(fscp);
8874         }
8875         return (error);
8876 
8877 out:
8878 #ifdef CFS_CD_DEBUG
8879         ASSERT((curthread->t_flag & T_CD_HELD) == 0);
8880 #endif
8881 
8882 #ifdef CFSDEBUG
8883         CFS_DEBUG(CFSDEBUG_VOPS)
8884                 printf("cachefs_setsecattr: EXIT error = %d\n", error);
8885 #endif
8886         return (error);
8887 }
8888 
8889 /*
8890  * call this BEFORE calling cachefs_cacheacl(), as the latter will
8891  * sanitize the acl.
8892  */
8893 
8894 static void
8895 cachefs_acl2perm(cnode_t *cp, vsecattr_t *vsec)
8896 {
8897         aclent_t *aclp;
8898         int i;
8899 
8900         for (i = 0; i < vsec->vsa_aclcnt; i++) {
8901                 aclp = ((aclent_t *)vsec->vsa_aclentp) + i;
8902                 switch (aclp->a_type) {
8903                 case USER_OBJ:
8904                         cp->c_metadata.md_vattr.va_mode &= (~0700);
8905                         cp->c_metadata.md_vattr.va_mode |= (aclp->a_perm << 6);
8906                         break;
8907 
8908                 case GROUP_OBJ:
8909                         cp->c_metadata.md_vattr.va_mode &= (~070);
8910                         cp->c_metadata.md_vattr.va_mode |= (aclp->a_perm << 3);
8911                         break;
8912 
8913                 case OTHER_OBJ:
8914                         cp->c_metadata.md_vattr.va_mode &= (~07);
8915                         cp->c_metadata.md_vattr.va_mode |= (aclp->a_perm);
8916                         break;
8917 
8918                 case CLASS_OBJ:
8919                         cp->c_metadata.md_aclclass = aclp->a_perm;
8920                         break;
8921                 }
8922         }
8923 
8924         cp->c_flags |= CN_UPDATED;
8925 }
8926 
8927 static int
8928 cachefs_getsecattr(vnode_t *vp, vsecattr_t *vsec, int flag, cred_t *cr,
8929     caller_context_t *ct)
8930 {
8931         cnode_t *cp = VTOC(vp);
8932         fscache_t *fscp = C_TO_FSCACHE(cp);
8933         int held = 0, connected = 0;
8934         int error = 0;
8935 
8936 #ifdef CFSDEBUG
8937         CFS_DEBUG(CFSDEBUG_VOPS)
8938                 printf("cachefs_getsecattr: ENTER vp %p\n", (void *)vp);
8939 #endif
8940 
8941         if (getzoneid() != GLOBAL_ZONEID) {
8942                 error = EPERM;
8943                 goto out;
8944         }
8945 
8946         /*
8947          * Cachefs only provides pass-through support for NFSv4,
8948          * and all vnode operations are passed through to the
8949          * back file system. For NFSv4 pass-through to work, only
8950          * connected operation is supported, the cnode backvp must
8951          * exist, and cachefs optional (eg., disconnectable) flags
8952          * are turned off. Assert these conditions to ensure that
8953          * the backfilesystem is called for the getsecattr operation.
8954          */
8955         CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
8956         CFS_BACKFS_NFSV4_ASSERT_CNODE(cp);
8957 
8958         if (fscp->fs_info.fi_mntflags & CFS_NOACL) {
8959                 error = fs_fab_acl(vp, vsec, flag, cr, ct);
8960                 goto out;
8961         }
8962 
8963         for (;;) {
8964                 if (held) {
8965                         /* Won't loop with NFSv4 connected behavior */
8966                         ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
8967                         cachefs_cd_release(fscp);
8968                         held = 0;
8969                 }
8970                 error = cachefs_cd_access(fscp, connected, 0);
8971                 if (error)
8972                         break;
8973                 held = 1;
8974 
8975                 if (fscp->fs_cdconnected == CFS_CD_CONNECTED) {
8976                         error = cachefs_getsecattr_connected(vp, vsec, flag,
8977                             cr);
8978                         if (CFS_TIMEOUT(fscp, error)) {
8979                                 cachefs_cd_release(fscp);
8980                                 held = 0;
8981                                 cachefs_cd_timedout(fscp);
8982                                 connected = 0;
8983                                 continue;
8984                         }
8985                 } else {
8986                         error = cachefs_getsecattr_disconnected(vp, vsec, flag,
8987                             cr);
8988                         if (CFS_TIMEOUT(fscp, error)) {
8989                                 if (cachefs_cd_access_miss(fscp)) {
8990                                         error = cachefs_getsecattr_connected(vp,
8991                                             vsec, flag, cr);
8992                                         if (!CFS_TIMEOUT(fscp, error))
8993                                                 break;
8994                                         delay(5*hz);
8995                                         connected = 0;
8996                                         continue;
8997                                 }
8998                                 connected = 1;
8999                                 continue;
9000                         }
9001                 }
9002                 break;
9003         }
9004 
9005 out:
9006         if (held)
9007                 cachefs_cd_release(fscp);
9008 
9009 #ifdef CFS_CD_DEBUG
9010         ASSERT((curthread->t_flag & T_CD_HELD) == 0);
9011 #endif
9012 #ifdef CFSDEBUG
9013         CFS_DEBUG(CFSDEBUG_VOPS)
9014                 printf("cachefs_getsecattr: EXIT error = %d\n", error);
9015 #endif
9016         return (error);
9017 }
9018 
9019 static int
9020 cachefs_shrlock(vnode_t *vp, int cmd, struct shrlock *shr, int flag, cred_t *cr,
9021     caller_context_t *ct)
9022 {
9023         cnode_t *cp = VTOC(vp);
9024         fscache_t *fscp = C_TO_FSCACHE(cp);
9025         int error = 0;
9026         vnode_t *backvp;
9027 
9028 #ifdef CFSDEBUG
9029         CFS_DEBUG(CFSDEBUG_VOPS)
9030                 printf("cachefs_shrlock: ENTER vp %p\n", (void *)vp);
9031 #endif
9032 
9033         if (getzoneid() != GLOBAL_ZONEID) {
9034                 error = EPERM;
9035                 goto out;
9036         }
9037 
9038         /*
9039          * Cachefs only provides pass-through support for NFSv4,
9040          * and all vnode operations are passed through to the
9041          * back file system. For NFSv4 pass-through to work, only
9042          * connected operation is supported, the cnode backvp must
9043          * exist, and cachefs optional (eg., disconnectable) flags
9044          * are turned off. Assert these conditions to ensure that
9045          * the backfilesystem is called for the shrlock operation.
9046          */
9047         CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
9048         CFS_BACKFS_NFSV4_ASSERT_CNODE(cp);
9049 
9050         mutex_enter(&cp->c_statelock);
9051         if (cp->c_backvp == NULL)
9052                 error = cachefs_getbackvp(fscp, cp);
9053         backvp = cp->c_backvp;
9054         mutex_exit(&cp->c_statelock);
9055         ASSERT((error != 0) || (backvp != NULL));
9056 
9057         if (error == 0) {
9058                 CFS_DPRINT_BACKFS_NFSV4(fscp,
9059                     ("cachefs_shrlock (nfsv4): cp %p, backvp %p",
9060                     cp, backvp));
9061                 error = VOP_SHRLOCK(backvp, cmd, shr, flag, cr, ct);
9062         }
9063 
9064 out:
9065 #ifdef CFSDEBUG
9066         CFS_DEBUG(CFSDEBUG_VOPS)
9067                 printf("cachefs_shrlock: EXIT error = %d\n", error);
9068 #endif
9069         return (error);
9070 }
9071 
9072 static int
9073 cachefs_getsecattr_connected(vnode_t *vp, vsecattr_t *vsec, int flag,
9074     cred_t *cr)
9075 {
9076         cnode_t *cp = VTOC(vp);
9077         fscache_t *fscp = C_TO_FSCACHE(cp);
9078         int hit = 0;
9079         int error = 0;
9080 
9081 
9082         mutex_enter(&cp->c_statelock);
9083         error = CFSOP_CHECK_COBJECT(fscp, cp, 0, cr);
9084         if (error)
9085                 goto out;
9086 
9087         /* read from the cache if we can */
9088         if ((cp->c_metadata.md_flags & MD_ACL) &&
9089             ((cp->c_flags & CN_NOCACHE) == 0) &&
9090             !CFS_ISFS_BACKFS_NFSV4(fscp)) {
9091                 ASSERT((cp->c_flags & CN_NOCACHE) == 0);
9092                 error = cachefs_getaclfromcache(cp, vsec);
9093                 if (error) {
9094                         cachefs_nocache(cp);
9095                         ASSERT((cp->c_metadata.md_flags & MD_ACL) == 0);
9096                         error = 0;
9097                 } else {
9098                         hit = 1;
9099                         goto out;
9100                 }
9101         }
9102 
9103         ASSERT(error == 0);
9104         if (cp->c_backvp == NULL)
9105                 error = cachefs_getbackvp(fscp, cp);
9106         if (error)
9107                 goto out;
9108 
9109         CFS_DPRINT_BACKFS_NFSV4(fscp,
9110             ("cachefs_getsecattr (nfsv4): cp %p, backvp %p",
9111             cp, cp->c_backvp));
9112         error = VOP_GETSECATTR(cp->c_backvp, vsec, flag, cr, NULL);
9113         if (error)
9114                 goto out;
9115 
9116         if (((fscp->fs_info.fi_mntflags & CFS_NOACL) == 0) &&
9117             (cachefs_vtype_aclok(vp)) &&
9118             ((cp->c_flags & CN_NOCACHE) == 0) &&
9119             !CFS_ISFS_BACKFS_NFSV4(fscp)) {
9120                 error = cachefs_cacheacl(cp, vsec);
9121                 if (error) {
9122                         error = 0;
9123                         cachefs_nocache(cp);
9124                 }
9125         }
9126 
9127 out:
9128         if (error == 0) {
9129                 if (hit)
9130                         fscp->fs_stats.st_hits++;
9131                 else
9132                         fscp->fs_stats.st_misses++;
9133         }
9134         mutex_exit(&cp->c_statelock);
9135 
9136         return (error);
9137 }
9138 
9139 static int
9140 /*ARGSUSED*/
9141 cachefs_getsecattr_disconnected(vnode_t *vp, vsecattr_t *vsec, int flag,
9142     cred_t *cr)
9143 {
9144         cnode_t *cp = VTOC(vp);
9145         fscache_t *fscp = C_TO_FSCACHE(cp);
9146         int hit = 0;
9147         int error = 0;
9148 
9149 
9150         mutex_enter(&cp->c_statelock);
9151 
9152         /* read from the cache if we can */
9153         if (((cp->c_flags & CN_NOCACHE) == 0) &&
9154             (cp->c_metadata.md_flags & MD_ACL)) {
9155                 error = cachefs_getaclfromcache(cp, vsec);
9156                 if (error) {
9157                         cachefs_nocache(cp);
9158                         ASSERT((cp->c_metadata.md_flags & MD_ACL) == 0);
9159                         error = 0;
9160                 } else {
9161                         hit = 1;
9162                         goto out;
9163                 }
9164         }
9165         error = ETIMEDOUT;
9166 
9167 out:
9168         if (error == 0) {
9169                 if (hit)
9170                         fscp->fs_stats.st_hits++;
9171                 else
9172                         fscp->fs_stats.st_misses++;
9173         }
9174         mutex_exit(&cp->c_statelock);
9175 
9176         return (error);
9177 }
9178 
9179 /*
9180  * cachefs_cacheacl() -- cache an ACL, which we do by applying it to
9181  * the frontfile if possible; otherwise, the adjunct directory.
9182  *
9183  * inputs:
9184  * cp - the cnode, with its statelock already held
9185  * vsecp - a pointer to a vsecattr_t you'd like us to cache as-is,
9186  *  or NULL if you want us to do the VOP_GETSECATTR(backvp).
9187  *
9188  * returns:
9189  * 0 - all is well
9190  * nonzero - errno
9191  */
9192 
9193 int
9194 cachefs_cacheacl(cnode_t *cp, vsecattr_t *vsecp)
9195 {
9196         fscache_t *fscp = C_TO_FSCACHE(cp);
9197         vsecattr_t vsec;
9198         aclent_t *aclp;
9199         int gotvsec = 0;
9200         int error = 0;
9201         vnode_t *vp = NULL;
9202         void *aclkeep = NULL;
9203         int i;
9204 
9205         ASSERT(MUTEX_HELD(&cp->c_statelock));
9206         ASSERT((cp->c_flags & CN_NOCACHE) == 0);
9207         ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
9208         ASSERT((fscp->fs_info.fi_mntflags & CFS_NOACL) == 0);
9209         ASSERT(cachefs_vtype_aclok(CTOV(cp)));
9210 
9211         if (fscp->fs_info.fi_mntflags & CFS_NOACL) {
9212                 error = ENOSYS;
9213                 goto out;
9214         }
9215 
9216         if (vsecp == NULL) {
9217                 if (cp->c_backvp == NULL)
9218                         error = cachefs_getbackvp(fscp, cp);
9219                 if (error != 0)
9220                         goto out;
9221                 vsecp = &vsec;
9222                 bzero(&vsec, sizeof (vsec));
9223                 vsecp->vsa_mask =
9224                     VSA_ACL | VSA_ACLCNT | VSA_DFACL | VSA_DFACLCNT;
9225                 error = VOP_GETSECATTR(cp->c_backvp, vsecp, 0, kcred, NULL);
9226                 if (error != 0) {
9227                         goto out;
9228                 }
9229                 gotvsec = 1;
9230         } else if (vsecp->vsa_mask & VSA_ACL) {
9231                 aclkeep = vsecp->vsa_aclentp;
9232                 vsecp->vsa_aclentp = cachefs_kmem_alloc(vsecp->vsa_aclcnt *
9233                     sizeof (aclent_t), KM_SLEEP);
9234                 bcopy(aclkeep, vsecp->vsa_aclentp, vsecp->vsa_aclcnt *
9235                     sizeof (aclent_t));
9236         } else if ((vsecp->vsa_mask & (VSA_ACL | VSA_DFACL)) == 0) {
9237                 /* unless there's real data, we can cache nothing. */
9238                 return (0);
9239         }
9240 
9241         /*
9242          * prevent the ACL from chmoding our frontfile, and
9243          * snarf the class info
9244          */
9245 
9246         if ((vsecp->vsa_mask & (VSA_ACL | VSA_ACLCNT)) ==
9247             (VSA_ACL | VSA_ACLCNT)) {
9248                 for (i = 0; i < vsecp->vsa_aclcnt; i++) {
9249                         aclp = ((aclent_t *)vsecp->vsa_aclentp) + i;
9250                         switch (aclp->a_type) {
9251                         case CLASS_OBJ:
9252                                 cp->c_metadata.md_aclclass =
9253                                     aclp->a_perm;
9254                                 /*FALLTHROUGH*/
9255                         case USER_OBJ:
9256                         case GROUP_OBJ:
9257                         case OTHER_OBJ:
9258                                 aclp->a_perm = 06;
9259                         }
9260                 }
9261         }
9262 
9263         /*
9264          * if the frontfile exists, then we always do the work.  but,
9265          * if there's no frontfile, and the ACL isn't a `real' ACL,
9266          * then we don't want to do the work.  otherwise, an `ls -l'
9267          * will create tons of emtpy frontfiles.
9268          */
9269 
9270         if (((cp->c_metadata.md_flags & MD_FILE) == 0) &&
9271             ((vsecp->vsa_aclcnt + vsecp->vsa_dfaclcnt)
9272             <= MIN_ACL_ENTRIES)) {
9273                 cp->c_metadata.md_flags |= MD_ACL;
9274                 cp->c_flags |= CN_UPDATED;
9275                 goto out;
9276         }
9277 
9278         /*
9279          * if we have a default ACL, then we need a
9280          * real live directory in the frontfs that we
9281          * can apply the ACL to.  if not, then we just
9282          * use the frontfile.  we get the frontfile
9283          * regardless -- that way, we know the
9284          * directory for the frontfile exists.
9285          */
9286 
9287         if (vsecp->vsa_dfaclcnt > 0) {
9288                 if (cp->c_acldirvp == NULL)
9289                         error = cachefs_getacldirvp(cp);
9290                 if (error != 0)
9291                         goto out;
9292                 vp = cp->c_acldirvp;
9293         } else {
9294                 if (cp->c_frontvp == NULL)
9295                         error = cachefs_getfrontfile(cp);
9296                 if (error != 0)
9297                         goto out;
9298                 vp = cp->c_frontvp;
9299         }
9300         ASSERT(vp != NULL);
9301 
9302         (void) VOP_RWLOCK(vp, V_WRITELOCK_TRUE, NULL);
9303         error = VOP_SETSECATTR(vp, vsecp, 0, kcred, NULL);
9304         VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, NULL);
9305         if (error != 0) {
9306 #ifdef CFSDEBUG
9307                 CFS_DEBUG(CFSDEBUG_VOPS)
9308                         printf("cachefs_cacheacl: setsecattr: error %d\n",
9309                             error);
9310 #endif /* CFSDEBUG */
9311                 /*
9312                  * If there was an error, we don't want to call
9313                  * cachefs_nocache(); so, set error to 0.
9314                  * We will call cachefs_purgeacl(), in order to
9315                  * clean such things as adjunct ACL directories.
9316                  */
9317                 cachefs_purgeacl(cp);
9318                 error = 0;
9319                 goto out;
9320         }
9321         if (vp == cp->c_frontvp)
9322                 cp->c_flags |= CN_NEED_FRONT_SYNC;
9323 
9324         cp->c_metadata.md_flags |= MD_ACL;
9325         cp->c_flags |= CN_UPDATED;
9326 
9327 out:
9328         if ((error) && (fscp->fs_cdconnected == CFS_CD_CONNECTED))
9329                 cachefs_nocache(cp);
9330 
9331         if (gotvsec) {
9332                 if (vsec.vsa_aclcnt)
9333                         kmem_free(vsec.vsa_aclentp,
9334                             vsec.vsa_aclcnt * sizeof (aclent_t));
9335                 if (vsec.vsa_dfaclcnt)
9336                         kmem_free(vsec.vsa_dfaclentp,
9337                             vsec.vsa_dfaclcnt * sizeof (aclent_t));
9338         } else if (aclkeep != NULL) {
9339                 cachefs_kmem_free(vsecp->vsa_aclentp,
9340                     vsecp->vsa_aclcnt * sizeof (aclent_t));
9341                 vsecp->vsa_aclentp = aclkeep;
9342         }
9343 
9344         return (error);
9345 }
9346 
9347 void
9348 cachefs_purgeacl(cnode_t *cp)
9349 {
9350         ASSERT(MUTEX_HELD(&cp->c_statelock));
9351 
9352         ASSERT(!CFS_ISFS_BACKFS_NFSV4(C_TO_FSCACHE(cp)));
9353 
9354         if (cp->c_acldirvp != NULL) {
9355                 VN_RELE(cp->c_acldirvp);
9356                 cp->c_acldirvp = NULL;
9357         }
9358 
9359         if (cp->c_metadata.md_flags & MD_ACLDIR) {
9360                 char name[CFS_FRONTFILE_NAME_SIZE + 2];
9361 
9362                 ASSERT(cp->c_filegrp->fg_dirvp != NULL);
9363                 make_ascii_name(&cp->c_id, name);
9364                 (void) strcat(name, ".d");
9365 
9366                 (void) VOP_RMDIR(cp->c_filegrp->fg_dirvp, name,
9367                     cp->c_filegrp->fg_dirvp, kcred, NULL, 0);
9368         }
9369 
9370         cp->c_metadata.md_flags &= ~(MD_ACL | MD_ACLDIR);
9371         cp->c_flags |= CN_UPDATED;
9372 }
9373 
9374 static int
9375 cachefs_getacldirvp(cnode_t *cp)
9376 {
9377         char name[CFS_FRONTFILE_NAME_SIZE + 2];
9378         int error = 0;
9379 
9380         ASSERT(MUTEX_HELD(&cp->c_statelock));
9381         ASSERT(cp->c_acldirvp == NULL);
9382 
9383         if (cp->c_frontvp == NULL)
9384                 error = cachefs_getfrontfile(cp);
9385         if (error != 0)
9386                 goto out;
9387 
9388         ASSERT(cp->c_filegrp->fg_dirvp != NULL);
9389         make_ascii_name(&cp->c_id, name);
9390         (void) strcat(name, ".d");
9391         error = VOP_LOOKUP(cp->c_filegrp->fg_dirvp,
9392             name, &cp->c_acldirvp, NULL, 0, NULL, kcred, NULL, NULL, NULL);
9393         if ((error != 0) && (error != ENOENT))
9394                 goto out;
9395 
9396         if (error != 0) {
9397                 vattr_t va;
9398 
9399                 va.va_mode = S_IFDIR | 0777;
9400                 va.va_uid = 0;
9401                 va.va_gid = 0;
9402                 va.va_type = VDIR;
9403                 va.va_mask = AT_TYPE | AT_MODE |
9404                     AT_UID | AT_GID;
9405                 error =
9406                     VOP_MKDIR(cp->c_filegrp->fg_dirvp,
9407                     name, &va, &cp->c_acldirvp, kcred, NULL, 0, NULL);
9408                 if (error != 0)
9409                         goto out;
9410         }
9411 
9412         ASSERT(cp->c_acldirvp != NULL);
9413         cp->c_metadata.md_flags |= MD_ACLDIR;
9414         cp->c_flags |= CN_UPDATED;
9415 
9416 out:
9417         if (error != 0)
9418                 cp->c_acldirvp = NULL;
9419         return (error);
9420 }
9421 
9422 static int
9423 cachefs_getaclfromcache(cnode_t *cp, vsecattr_t *vsec)
9424 {
9425         aclent_t *aclp;
9426         int error = 0;
9427         vnode_t *vp = NULL;
9428         int i;
9429 
9430         ASSERT(cp->c_metadata.md_flags & MD_ACL);
9431         ASSERT(MUTEX_HELD(&cp->c_statelock));
9432         ASSERT(vsec->vsa_aclentp == NULL);
9433 
9434         if (cp->c_metadata.md_flags & MD_ACLDIR) {
9435                 if (cp->c_acldirvp == NULL)
9436                         error = cachefs_getacldirvp(cp);
9437                 if (error != 0)
9438                         goto out;
9439                 vp = cp->c_acldirvp;
9440         } else if (cp->c_metadata.md_flags & MD_FILE) {
9441                 if (cp->c_frontvp == NULL)
9442                         error = cachefs_getfrontfile(cp);
9443                 if (error != 0)
9444                         goto out;
9445                 vp = cp->c_frontvp;
9446         } else {
9447 
9448                 /*
9449                  * if we get here, then we know that MD_ACL is on,
9450                  * meaning an ACL was successfully cached.  we also
9451                  * know that neither MD_ACLDIR nor MD_FILE are on, so
9452                  * this has to be an entry without a `real' ACL.
9453                  * thus, we forge whatever is necessary.
9454                  */
9455 
9456                 if (vsec->vsa_mask & VSA_ACLCNT)
9457                         vsec->vsa_aclcnt = MIN_ACL_ENTRIES;
9458 
9459                 if (vsec->vsa_mask & VSA_ACL) {
9460                         vsec->vsa_aclentp =
9461                             kmem_zalloc(MIN_ACL_ENTRIES *
9462                             sizeof (aclent_t), KM_SLEEP);
9463                         aclp = (aclent_t *)vsec->vsa_aclentp;
9464                         aclp->a_type = USER_OBJ;
9465                         ++aclp;
9466                         aclp->a_type = GROUP_OBJ;
9467                         ++aclp;
9468                         aclp->a_type = OTHER_OBJ;
9469                         ++aclp;
9470                         aclp->a_type = CLASS_OBJ;
9471                         ksort((caddr_t)vsec->vsa_aclentp, MIN_ACL_ENTRIES,
9472                             sizeof (aclent_t), cmp2acls);
9473                 }
9474 
9475                 ASSERT(vp == NULL);
9476         }
9477 
9478         if (vp != NULL) {
9479                 if ((error = VOP_GETSECATTR(vp, vsec, 0, kcred, NULL)) != 0) {
9480 #ifdef CFSDEBUG
9481                         CFS_DEBUG(CFSDEBUG_VOPS)
9482                                 printf("cachefs_getaclfromcache: error %d\n",
9483                                     error);
9484 #endif /* CFSDEBUG */
9485                         goto out;
9486                 }
9487         }
9488 
9489         if (vsec->vsa_aclentp != NULL) {
9490                 for (i = 0; i < vsec->vsa_aclcnt; i++) {
9491                         aclp = ((aclent_t *)vsec->vsa_aclentp) + i;
9492                         switch (aclp->a_type) {
9493                         case USER_OBJ:
9494                                 aclp->a_id = cp->c_metadata.md_vattr.va_uid;
9495                                 aclp->a_perm =
9496                                     cp->c_metadata.md_vattr.va_mode & 0700;
9497                                 aclp->a_perm >>= 6;
9498                                 break;
9499 
9500                         case GROUP_OBJ:
9501                                 aclp->a_id = cp->c_metadata.md_vattr.va_gid;
9502                                 aclp->a_perm =
9503                                     cp->c_metadata.md_vattr.va_mode & 070;
9504                                 aclp->a_perm >>= 3;
9505                                 break;
9506 
9507                         case OTHER_OBJ:
9508                                 aclp->a_perm =
9509                                     cp->c_metadata.md_vattr.va_mode & 07;
9510                                 break;
9511 
9512                         case CLASS_OBJ:
9513                                 aclp->a_perm =
9514                                     cp->c_metadata.md_aclclass;
9515                                 break;
9516                         }
9517                 }
9518         }
9519 
9520 out:
9521 
9522         if (error != 0)
9523                 cachefs_nocache(cp);
9524 
9525         return (error);
9526 }
9527 
9528 /*
9529  * Fills in targp with attribute information from srcp, cp
9530  * and if necessary the system.
9531  */
9532 static void
9533 cachefs_attr_setup(vattr_t *srcp, vattr_t *targp, cnode_t *cp, cred_t *cr)
9534 {
9535         time_t  now;
9536 
9537         ASSERT((srcp->va_mask & (AT_TYPE | AT_MODE)) == (AT_TYPE | AT_MODE));
9538 
9539         /*
9540          * Add code to fill in the va struct.  We use the fields from
9541          * the srcp struct if they are populated, otherwise we guess
9542          */
9543 
9544         targp->va_mask = 0;  /* initialize all fields */
9545         targp->va_mode = srcp->va_mode;
9546         targp->va_type = srcp->va_type;
9547         targp->va_nlink = 1;
9548         targp->va_nodeid = 0;
9549 
9550         if (srcp->va_mask & AT_UID)
9551                 targp->va_uid = srcp->va_uid;
9552         else
9553                 targp->va_uid = crgetuid(cr);
9554 
9555         if (srcp->va_mask & AT_GID)
9556                 targp->va_gid = srcp->va_gid;
9557         else
9558                 targp->va_gid = crgetgid(cr);
9559 
9560         if (srcp->va_mask & AT_FSID)
9561                 targp->va_fsid = srcp->va_fsid;
9562         else
9563                 targp->va_fsid = 0;  /* initialize all fields */
9564 
9565         now = gethrestime_sec();
9566         if (srcp->va_mask & AT_ATIME)
9567                 targp->va_atime = srcp->va_atime;
9568         else
9569                 targp->va_atime.tv_sec = now;
9570 
9571         if (srcp->va_mask & AT_MTIME)
9572                 targp->va_mtime = srcp->va_mtime;
9573         else
9574                 targp->va_mtime.tv_sec = now;
9575 
9576         if (srcp->va_mask & AT_CTIME)
9577                 targp->va_ctime = srcp->va_ctime;
9578         else
9579                 targp->va_ctime.tv_sec = now;
9580 
9581 
9582         if (srcp->va_mask & AT_SIZE)
9583                 targp->va_size = srcp->va_size;
9584         else
9585                 targp->va_size = 0;
9586 
9587         /*
9588          * the remaing fields are set by the fs and not changable.
9589          * we populate these entries useing the parent directory
9590          * values.  It's a small hack, but should work.
9591          */
9592         targp->va_blksize = cp->c_metadata.md_vattr.va_blksize;
9593         targp->va_rdev = cp->c_metadata.md_vattr.va_rdev;
9594         targp->va_nblocks = cp->c_metadata.md_vattr.va_nblocks;
9595         targp->va_seq = 0; /* Never keep the sequence number */
9596 }
9597 
9598 /*
9599  * set the gid for a newly created file.  The algorithm is as follows:
9600  *
9601  *      1) If the gid is set in the attribute list, then use it if
9602  *         the caller is privileged, belongs to the target group, or
9603  *         the group is the same as the parent directory.
9604  *
9605  *      2) If the parent directory's set-gid bit is clear, then use
9606  *         the process gid
9607  *
9608  *      3) Otherwise, use the gid of the parent directory.
9609  *
9610  * Note: newcp->c_attr.va_{mode,type} must already be set before calling
9611  * this routine.
9612  */
9613 static void
9614 cachefs_creategid(cnode_t *dcp, cnode_t *newcp, vattr_t *vap, cred_t *cr)
9615 {
9616         if ((vap->va_mask & AT_GID) &&
9617             ((vap->va_gid == dcp->c_attr.va_gid) ||
9618             groupmember(vap->va_gid, cr) ||
9619             secpolicy_vnode_create_gid(cr) != 0)) {
9620                 newcp->c_attr.va_gid = vap->va_gid;
9621         } else {
9622                 if (dcp->c_attr.va_mode & S_ISGID)
9623                         newcp->c_attr.va_gid = dcp->c_attr.va_gid;
9624                 else
9625                         newcp->c_attr.va_gid = crgetgid(cr);
9626         }
9627 
9628         /*
9629          * if we're creating a directory, and the parent directory has the
9630          * set-GID bit set, set it on the new directory.
9631          * Otherwise, if the user is neither privileged nor a member of the
9632          * file's new group, clear the file's set-GID bit.
9633          */
9634         if (dcp->c_attr.va_mode & S_ISGID && newcp->c_attr.va_type == VDIR) {
9635                 newcp->c_attr.va_mode |= S_ISGID;
9636         } else if ((newcp->c_attr.va_mode & S_ISGID) &&
9637             secpolicy_vnode_setids_setgids(cr, newcp->c_attr.va_gid) != 0)
9638                 newcp->c_attr.va_mode &= ~S_ISGID;
9639 }
9640 
9641 /*
9642  * create an acl for the newly created file.  should be called right
9643  * after cachefs_creategid.
9644  */
9645 
9646 static void
9647 cachefs_createacl(cnode_t *dcp, cnode_t *newcp)
9648 {
9649         fscache_t *fscp = C_TO_FSCACHE(dcp);
9650         vsecattr_t vsec;
9651         int gotvsec = 0;
9652         int error = 0; /* placeholder */
9653         aclent_t *aclp;
9654         o_mode_t *classp = NULL;
9655         o_mode_t gunion = 0;
9656         int i;
9657 
9658         if ((fscp->fs_info.fi_mntflags & CFS_NOACL) ||
9659             (! cachefs_vtype_aclok(CTOV(newcp))))
9660                 return;
9661 
9662         ASSERT(dcp->c_metadata.md_flags & MD_ACL);
9663         ASSERT(MUTEX_HELD(&dcp->c_statelock));
9664         ASSERT(MUTEX_HELD(&newcp->c_statelock));
9665 
9666         /*
9667          * XXX should probably not do VSA_ACL and VSA_ACLCNT, but that
9668          * would hit code paths that isn't hit anywhere else.
9669          */
9670 
9671         bzero(&vsec, sizeof (vsec));
9672         vsec.vsa_mask = VSA_ACL | VSA_ACLCNT | VSA_DFACL | VSA_DFACLCNT;
9673         error = cachefs_getaclfromcache(dcp, &vsec);
9674         if (error != 0)
9675                 goto out;
9676         gotvsec = 1;
9677 
9678         if ((vsec.vsa_dfaclcnt > 0) && (vsec.vsa_dfaclentp != NULL)) {
9679                 if ((vsec.vsa_aclcnt > 0) && (vsec.vsa_aclentp != NULL))
9680                         kmem_free(vsec.vsa_aclentp,
9681                             vsec.vsa_aclcnt * sizeof (aclent_t));
9682 
9683                 vsec.vsa_aclcnt = vsec.vsa_dfaclcnt;
9684                 vsec.vsa_aclentp = vsec.vsa_dfaclentp;
9685                 vsec.vsa_dfaclcnt = 0;
9686                 vsec.vsa_dfaclentp = NULL;
9687 
9688                 if (newcp->c_attr.va_type == VDIR) {
9689                         vsec.vsa_dfaclentp = kmem_alloc(vsec.vsa_aclcnt *
9690                             sizeof (aclent_t), KM_SLEEP);
9691                         vsec.vsa_dfaclcnt = vsec.vsa_aclcnt;
9692                         bcopy(vsec.vsa_aclentp, vsec.vsa_dfaclentp,
9693                             vsec.vsa_aclcnt * sizeof (aclent_t));
9694                 }
9695 
9696                 /*
9697                  * this function should be called pretty much after
9698                  * the rest of the file creation stuff is done.  so,
9699                  * uid, gid, etc. should be `right'.  we'll go with
9700                  * that, rather than trying to determine whether to
9701                  * get stuff from cr or va.
9702                  */
9703 
9704                 for (i = 0; i < vsec.vsa_aclcnt; i++) {
9705                         aclp = ((aclent_t *)vsec.vsa_aclentp) + i;
9706                         switch (aclp->a_type) {
9707                         case DEF_USER_OBJ:
9708                                 aclp->a_type = USER_OBJ;
9709                                 aclp->a_id = newcp->c_metadata.md_vattr.va_uid;
9710                                 aclp->a_perm =
9711                                     newcp->c_metadata.md_vattr.va_mode;
9712                                 aclp->a_perm &= 0700;
9713                                 aclp->a_perm >>= 6;
9714                                 break;
9715 
9716                         case DEF_GROUP_OBJ:
9717                                 aclp->a_type = GROUP_OBJ;
9718                                 aclp->a_id = newcp->c_metadata.md_vattr.va_gid;
9719                                 aclp->a_perm =
9720                                     newcp->c_metadata.md_vattr.va_mode;
9721                                 aclp->a_perm &= 070;
9722                                 aclp->a_perm >>= 3;
9723                                 gunion |= aclp->a_perm;
9724                                 break;
9725 
9726                         case DEF_OTHER_OBJ:
9727                                 aclp->a_type = OTHER_OBJ;
9728                                 aclp->a_perm =
9729                                     newcp->c_metadata.md_vattr.va_mode & 07;
9730                                 break;
9731 
9732                         case DEF_CLASS_OBJ:
9733                                 aclp->a_type = CLASS_OBJ;
9734                                 classp = &(aclp->a_perm);
9735                                 break;
9736 
9737                         case DEF_USER:
9738                                 aclp->a_type = USER;
9739                                 gunion |= aclp->a_perm;
9740                                 break;
9741 
9742                         case DEF_GROUP:
9743                                 aclp->a_type = GROUP;
9744                                 gunion |= aclp->a_perm;
9745                                 break;
9746                         }
9747                 }
9748 
9749                 /* XXX is this the POSIX thing to do? */
9750                 if (classp != NULL)
9751                         *classp &= gunion;
9752 
9753                 /*
9754                  * we don't need to log this; rather, we clear the
9755                  * MD_ACL bit when we reconnect.
9756                  */
9757 
9758                 error = cachefs_cacheacl(newcp, &vsec);
9759                 if (error != 0)
9760                         goto out;
9761         }
9762 
9763         newcp->c_metadata.md_aclclass = 07; /* XXX check posix */
9764         newcp->c_metadata.md_flags |= MD_ACL;
9765         newcp->c_flags |= CN_UPDATED;
9766 
9767 out:
9768 
9769         if (gotvsec) {
9770                 if ((vsec.vsa_aclcnt > 0) && (vsec.vsa_aclentp != NULL))
9771                         kmem_free(vsec.vsa_aclentp,
9772                             vsec.vsa_aclcnt * sizeof (aclent_t));
9773                 if ((vsec.vsa_dfaclcnt > 0) && (vsec.vsa_dfaclentp != NULL))
9774                         kmem_free(vsec.vsa_dfaclentp,
9775                             vsec.vsa_dfaclcnt * sizeof (aclent_t));
9776         }
9777 }
9778 
9779 /*
9780  * this is translated from the UFS code for access checking.
9781  */
9782 
9783 static int
9784 cachefs_access_local(void *vcp, int mode, cred_t *cr)
9785 {
9786         cnode_t *cp = vcp;
9787         fscache_t *fscp = C_TO_FSCACHE(cp);
9788         int shift = 0;
9789 
9790         ASSERT(MUTEX_HELD(&cp->c_statelock));
9791 
9792         if (mode & VWRITE) {
9793                 /*
9794                  * Disallow write attempts on read-only
9795                  * file systems, unless the file is special.
9796                  */
9797                 struct vnode *vp = CTOV(cp);
9798                 if (vn_is_readonly(vp)) {
9799                         if (!IS_DEVVP(vp)) {
9800                                 return (EROFS);
9801                         }
9802                 }
9803         }
9804 
9805         /*
9806          * if we need to do ACLs, do it.  this works whether anyone
9807          * has explicitly made an ACL or not.
9808          */
9809 
9810         if (((fscp->fs_info.fi_mntflags & CFS_NOACL) == 0) &&
9811             (cachefs_vtype_aclok(CTOV(cp))))
9812                 return (cachefs_acl_access(cp, mode, cr));
9813 
9814         if (crgetuid(cr) != cp->c_attr.va_uid) {
9815                 shift += 3;
9816                 if (!groupmember(cp->c_attr.va_gid, cr))
9817                         shift += 3;
9818         }
9819 
9820         return (secpolicy_vnode_access2(cr, CTOV(cp), cp->c_attr.va_uid,
9821             cp->c_attr.va_mode << shift, mode));
9822 }
9823 
9824 /*
9825  * This is transcribed from ufs_acl_access().  If that changes, then
9826  * this should, too.
9827  *
9828  * Check the cnode's ACL's to see if this mode of access is
9829  * allowed; return 0 if allowed, EACCES if not.
9830  *
9831  * We follow the procedure defined in Sec. 3.3.5, ACL Access
9832  * Check Algorithm, of the POSIX 1003.6 Draft Standard.
9833  */
9834 
9835 #define ACL_MODE_CHECK(M, PERM, C, I) \
9836     secpolicy_vnode_access2(C, CTOV(I), owner, (PERM), (M))
9837 
9838 static int
9839 cachefs_acl_access(struct cnode *cp, int mode, cred_t *cr)
9840 {
9841         int error = 0;
9842 
9843         fscache_t *fscp = C_TO_FSCACHE(cp);
9844 
9845         int mask = ~0;
9846         int ismask = 0;
9847 
9848         int gperm = 0;
9849         int ngroup = 0;
9850 
9851         vsecattr_t vsec;
9852         int gotvsec = 0;
9853         aclent_t *aclp;
9854 
9855         uid_t owner = cp->c_attr.va_uid;
9856 
9857         int i;
9858 
9859         ASSERT(MUTEX_HELD(&cp->c_statelock));
9860         ASSERT((fscp->fs_info.fi_mntflags & CFS_NOACL) == 0);
9861 
9862         /*
9863          * strictly speaking, we shouldn't set VSA_DFACL and DFACLCNT,
9864          * but then i believe we'd be the only thing exercising those
9865          * code paths -- probably a bad thing.
9866          */
9867 
9868         bzero(&vsec, sizeof (vsec));
9869         vsec.vsa_mask = VSA_ACL | VSA_ACLCNT | VSA_DFACL | VSA_DFACLCNT;
9870 
9871         /* XXX KLUDGE! correct insidious 0-class problem */
9872         if (cp->c_metadata.md_aclclass == 0 &&
9873             fscp->fs_cdconnected == CFS_CD_CONNECTED)
9874                 cachefs_purgeacl(cp);
9875 again:
9876         if (cp->c_metadata.md_flags & MD_ACL) {
9877                 error = cachefs_getaclfromcache(cp, &vsec);
9878                 if (error != 0) {
9879 #ifdef CFSDEBUG
9880                         if (error != ETIMEDOUT)
9881                                 CFS_DEBUG(CFSDEBUG_VOPS)
9882                                         printf("cachefs_acl_access():"
9883                                             "error %d from getaclfromcache()\n",
9884                                             error);
9885 #endif /* CFSDEBUG */
9886                         if ((cp->c_metadata.md_flags & MD_ACL) == 0) {
9887                                 goto again;
9888                         } else {
9889                                 goto out;
9890                         }
9891                 }
9892         } else {
9893                 if (cp->c_backvp == NULL) {
9894                         if (fscp->fs_cdconnected == CFS_CD_CONNECTED)
9895                                 error = cachefs_getbackvp(fscp, cp);
9896                         else
9897                                 error = ETIMEDOUT;
9898                 }
9899                 if (error == 0)
9900                         error = VOP_GETSECATTR(cp->c_backvp, &vsec, 0, cr,
9901                             NULL);
9902                 if (error != 0) {
9903 #ifdef CFSDEBUG
9904                         CFS_DEBUG(CFSDEBUG_VOPS)
9905                                 printf("cachefs_acl_access():"
9906                                     "error %d from getsecattr(backvp)\n",
9907                                     error);
9908 #endif /* CFSDEBUG */
9909                         goto out;
9910                 }
9911                 if ((cp->c_flags & CN_NOCACHE) == 0 &&
9912                     !CFS_ISFS_BACKFS_NFSV4(fscp))
9913                         (void) cachefs_cacheacl(cp, &vsec);
9914         }
9915         gotvsec = 1;
9916 
9917         ASSERT(error == 0);
9918         for (i = 0; i < vsec.vsa_aclcnt; i++) {
9919                 aclp = ((aclent_t *)vsec.vsa_aclentp) + i;
9920                 switch (aclp->a_type) {
9921                 case USER_OBJ:
9922                         /*
9923                          * this might look cleaner in the 2nd loop
9924                          * below, but we do it here as an
9925                          * optimization.
9926                          */
9927 
9928                         owner = aclp->a_id;
9929                         if (crgetuid(cr) == owner) {
9930                                 error = ACL_MODE_CHECK(mode, aclp->a_perm << 6,
9931                                     cr, cp);
9932                                 goto out;
9933                         }
9934                         break;
9935 
9936                 case CLASS_OBJ:
9937                         mask = aclp->a_perm;
9938                         ismask = 1;
9939                         break;
9940                 }
9941         }
9942 
9943         ASSERT(error == 0);
9944         for (i = 0; i < vsec.vsa_aclcnt; i++) {
9945                 aclp = ((aclent_t *)vsec.vsa_aclentp) + i;
9946                 switch (aclp->a_type) {
9947                 case USER:
9948                         if (crgetuid(cr) == aclp->a_id) {
9949                                 error = ACL_MODE_CHECK(mode,
9950                                     (aclp->a_perm & mask) << 6, cr, cp);
9951                                 goto out;
9952                         }
9953                         break;
9954 
9955                 case GROUP_OBJ:
9956                         if (groupmember(aclp->a_id, cr)) {
9957                                 ++ngroup;
9958                                 gperm |= aclp->a_perm;
9959                                 if (! ismask) {
9960                                         error = ACL_MODE_CHECK(mode,
9961                                             aclp->a_perm << 6,
9962                                             cr, cp);
9963                                         goto out;
9964                                 }
9965                         }
9966                         break;
9967 
9968                 case GROUP:
9969                         if (groupmember(aclp->a_id, cr)) {
9970                                 ++ngroup;
9971                                 gperm |= aclp->a_perm;
9972                         }
9973                         break;
9974 
9975                 case OTHER_OBJ:
9976                         if (ngroup == 0) {
9977                                 error = ACL_MODE_CHECK(mode, aclp->a_perm << 6,
9978                                     cr, cp);
9979                                 goto out;
9980                         }
9981                         break;
9982 
9983                 default:
9984                         break;
9985                 }
9986         }
9987 
9988         ASSERT(ngroup > 0);
9989         error = ACL_MODE_CHECK(mode, (gperm & mask) << 6, cr, cp);
9990 
9991 out:
9992         if (gotvsec) {
9993                 if (vsec.vsa_aclcnt && vsec.vsa_aclentp)
9994                         kmem_free(vsec.vsa_aclentp,
9995                             vsec.vsa_aclcnt * sizeof (aclent_t));
9996                 if (vsec.vsa_dfaclcnt && vsec.vsa_dfaclentp)
9997                         kmem_free(vsec.vsa_dfaclentp,
9998                             vsec.vsa_dfaclcnt * sizeof (aclent_t));
9999         }
10000 
10001         return (error);
10002 }
10003 
10004 /*
10005  * see if permissions allow for removal of the given file from
10006  * the given directory.
10007  */
10008 static int
10009 cachefs_stickyrmchk(struct cnode *dcp, struct cnode *cp, cred_t *cr)
10010 {
10011         uid_t uid;
10012         /*
10013          * If the containing directory is sticky, the user must:
10014          *  - own the directory, or
10015          *  - own the file, or
10016          *  - be able to write the file (if it's a plain file), or
10017          *  - be sufficiently privileged.
10018          */
10019         if ((dcp->c_attr.va_mode & S_ISVTX) &&
10020             ((uid = crgetuid(cr)) != dcp->c_attr.va_uid) &&
10021             (uid != cp->c_attr.va_uid) &&
10022             (cp->c_attr.va_type != VREG ||
10023             cachefs_access_local(cp, VWRITE, cr) != 0))
10024                 return (secpolicy_vnode_remove(cr));
10025 
10026         return (0);
10027 }
10028 
10029 /*
10030  * Returns a new name, may even be unique.
10031  * Stolen from nfs code.
10032  * Since now we will use renaming to .cfs* in place of .nfs*
10033  * for CacheFS. Both NFS and CacheFS will rename opened files.
10034  */
10035 static char cachefs_prefix[] = ".cfs";
10036 kmutex_t cachefs_newnum_lock;
10037 
10038 static char *
10039 cachefs_newname(void)
10040 {
10041         static uint_t newnum = 0;
10042         char *news;
10043         char *s, *p;
10044         uint_t id;
10045 
10046         mutex_enter(&cachefs_newnum_lock);
10047         if (newnum == 0) {
10048                 newnum = gethrestime_sec() & 0xfffff;
10049                 newnum |= 0x10000;
10050         }
10051         id = newnum++;
10052         mutex_exit(&cachefs_newnum_lock);
10053 
10054         news = cachefs_kmem_alloc(MAXNAMELEN, KM_SLEEP);
10055         s = news;
10056         p = cachefs_prefix;
10057         while (*p != '\0')
10058                 *s++ = *p++;
10059         while (id != 0) {
10060                 *s++ = "0123456789ABCDEF"[id & 0x0f];
10061                 id >>= 4;
10062         }
10063         *s = '\0';
10064         return (news);
10065 }
10066 
10067 /*
10068  * Called to rename the specified file to a temporary file so
10069  * operations to the file after remove work.
10070  * Must call this routine with the dir c_rwlock held as a writer.
10071  */
10072 static int
10073 /*ARGSUSED*/
10074 cachefs_remove_dolink(vnode_t *dvp, vnode_t *vp, char *nm, cred_t *cr)
10075 {
10076         cnode_t *cp = VTOC(vp);
10077         char *tmpname;
10078         fscache_t *fscp = C_TO_FSCACHE(cp);
10079         int error;
10080 
10081         ASSERT(RW_WRITE_HELD(&(VTOC(dvp)->c_rwlock)));
10082 
10083         /* get the new name for the file */
10084         tmpname = cachefs_newname();
10085 
10086         /* do the link */
10087         if (fscp->fs_cdconnected == CFS_CD_CONNECTED)
10088                 error = cachefs_link_connected(dvp, vp, tmpname, cr);
10089         else
10090                 error = cachefs_link_disconnected(dvp, vp, tmpname, cr);
10091         if (error) {
10092                 cachefs_kmem_free(tmpname, MAXNAMELEN);
10093                 return (error);
10094         }
10095 
10096         mutex_enter(&cp->c_statelock);
10097         if (cp->c_unldvp) {
10098                 VN_RELE(cp->c_unldvp);
10099                 cachefs_kmem_free(cp->c_unlname, MAXNAMELEN);
10100                 crfree(cp->c_unlcred);
10101         }
10102 
10103         VN_HOLD(dvp);
10104         cp->c_unldvp = dvp;
10105         crhold(cr);
10106         cp->c_unlcred = cr;
10107         cp->c_unlname = tmpname;
10108 
10109         /* drop the backvp so NFS does not also do a rename */
10110         mutex_exit(&cp->c_statelock);
10111 
10112         return (0);
10113 }
10114 
10115 /*
10116  * Marks the cnode as modified.
10117  */
10118 static void
10119 cachefs_modified(cnode_t *cp)
10120 {
10121         fscache_t *fscp = C_TO_FSCACHE(cp);
10122         struct vattr va;
10123         int error;
10124 
10125         ASSERT(MUTEX_HELD(&cp->c_statelock));
10126         ASSERT(cp->c_metadata.md_rlno);
10127 
10128         /* if not on the modify list */
10129         if (cp->c_metadata.md_rltype != CACHEFS_RL_MODIFIED) {
10130                 /* put on modified list, also marks the file as modified */
10131                 cachefs_rlent_moveto(fscp->fs_cache, CACHEFS_RL_MODIFIED,
10132                     cp->c_metadata.md_rlno, cp->c_metadata.md_frontblks);
10133                 cp->c_metadata.md_rltype = CACHEFS_RL_MODIFIED;
10134                 cp->c_flags |= CN_UPDATED;
10135 
10136                 /* if a modified regular file that is not local */
10137                 if (((cp->c_id.cid_flags & CFS_CID_LOCAL) == 0) &&
10138                     (cp->c_metadata.md_flags & MD_FILE) &&
10139                     (cp->c_attr.va_type == VREG)) {
10140 
10141                         if (cp->c_frontvp == NULL)
10142                                 (void) cachefs_getfrontfile(cp);
10143                         if (cp->c_frontvp) {
10144                                 /* identify file so fsck knows it is modified */
10145                                 va.va_mode = 0766;
10146                                 va.va_mask = AT_MODE;
10147                                 error = VOP_SETATTR(cp->c_frontvp,
10148                                     &va, 0, kcred, NULL);
10149                                 if (error) {
10150                                         cmn_err(CE_WARN,
10151                                             "Cannot change ff mode.\n");
10152                                 }
10153                         }
10154                 }
10155         }
10156 }
10157 
10158 /*
10159  * Marks the cnode as modified.
10160  * Allocates a rl slot for the cnode if necessary.
10161  * Returns 0 for success, !0 if cannot get an rl slot.
10162  */
10163 static int
10164 cachefs_modified_alloc(cnode_t *cp)
10165 {
10166         fscache_t *fscp = C_TO_FSCACHE(cp);
10167         filegrp_t *fgp = cp->c_filegrp;
10168         int error;
10169         rl_entry_t rl_ent;
10170 
10171         ASSERT(MUTEX_HELD(&cp->c_statelock));
10172 
10173         /* get the rl slot if needed */
10174         if (cp->c_metadata.md_rlno == 0) {
10175                 /* get a metadata slot if we do not have one yet */
10176                 if (cp->c_flags & CN_ALLOC_PENDING) {
10177                         if (cp->c_filegrp->fg_flags & CFS_FG_ALLOC_ATTR) {
10178                                 (void) filegrp_allocattr(cp->c_filegrp);
10179                         }
10180                         error = filegrp_create_metadata(cp->c_filegrp,
10181                             &cp->c_metadata, &cp->c_id);
10182                         if (error)
10183                                 return (error);
10184                         cp->c_flags &= ~CN_ALLOC_PENDING;
10185                 }
10186 
10187                 /* get a free rl entry */
10188                 rl_ent.rl_fileno = cp->c_id.cid_fileno;
10189                 rl_ent.rl_local = (cp->c_id.cid_flags & CFS_CID_LOCAL) ? 1 : 0;
10190                 rl_ent.rl_fsid = fscp->fs_cfsid;
10191                 rl_ent.rl_attrc = 0;
10192                 error = cachefs_rl_alloc(fscp->fs_cache, &rl_ent,
10193                     &cp->c_metadata.md_rlno);
10194                 if (error)
10195                         return (error);
10196                 cp->c_metadata.md_rltype = CACHEFS_RL_NONE;
10197 
10198                 /* hold the filegrp so the attrcache file is not gc */
10199                 error = filegrp_ffhold(fgp);
10200                 if (error) {
10201                         cachefs_rlent_moveto(fscp->fs_cache,
10202                             CACHEFS_RL_FREE, cp->c_metadata.md_rlno, 0);
10203                         cp->c_metadata.md_rlno = 0;
10204                         return (error);
10205                 }
10206         }
10207         cachefs_modified(cp);
10208         return (0);
10209 }
10210 
10211 int
10212 cachefs_vtype_aclok(vnode_t *vp)
10213 {
10214         vtype_t *vtp, oktypes[] = {VREG, VDIR, VFIFO, VNON};
10215 
10216         if (vp->v_type == VNON)
10217                 return (0);
10218 
10219         for (vtp = oktypes; *vtp != VNON; vtp++)
10220                 if (vp->v_type == *vtp)
10221                         break;
10222 
10223         return (*vtp != VNON);
10224 }
10225 
10226 static int
10227 cachefs_pathconf(vnode_t *vp, int cmd, ulong_t *valp, cred_t *cr,
10228     caller_context_t *ct)
10229 {
10230         int error = 0;
10231         fscache_t *fscp = C_TO_FSCACHE(VTOC(vp));
10232 
10233         /* Assert cachefs compatibility if NFSv4 is in use */
10234         CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
10235         CFS_BACKFS_NFSV4_ASSERT_CNODE(VTOC(vp));
10236 
10237         if (cmd == _PC_FILESIZEBITS) {
10238                 u_offset_t maxsize = fscp->fs_offmax;
10239                 (*valp) = 0;
10240                 while (maxsize != 0) {
10241                         maxsize >>= 1;
10242                         (*valp)++;
10243                 }
10244                 (*valp)++;
10245         } else
10246                 error = fs_pathconf(vp, cmd, valp, cr, ct);
10247 
10248         return (error);
10249 }