1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright 2016 Nexenta Systems, Inc.  All rights reserved.
  24  * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
  25  * Copyright (c) 2012 by Delphix. All rights reserved.
  26  */
  27 
  28 /*
  29  *      Copyright (c) 1983,1984,1985,1986,1987,1988,1989  AT&T.
  30  *      All Rights Reserved
  31  */
  32 
  33 #include <sys/param.h>
  34 #include <sys/types.h>
  35 #include <sys/systm.h>
  36 #include <sys/cred.h>
  37 #include <sys/buf.h>
  38 #include <sys/vfs.h>
  39 #include <sys/vfs_opreg.h>
  40 #include <sys/vnode.h>
  41 #include <sys/uio.h>
  42 #include <sys/errno.h>
  43 #include <sys/sysmacros.h>
  44 #include <sys/statvfs.h>
  45 #include <sys/kmem.h>
  46 #include <sys/dirent.h>
  47 #include <sys/cmn_err.h>
  48 #include <sys/debug.h>
  49 #include <sys/systeminfo.h>
  50 #include <sys/flock.h>
  51 #include <sys/pathname.h>
  52 #include <sys/nbmlock.h>
  53 #include <sys/share.h>
  54 #include <sys/atomic.h>
  55 #include <sys/policy.h>
  56 #include <sys/fem.h>
  57 #include <sys/sdt.h>
  58 #include <sys/ddi.h>
  59 #include <sys/zone.h>
  60 #include <sys/kstat.h>
  61 
  62 #include <fs/fs_reparse.h>
  63 
  64 #include <rpc/types.h>
  65 #include <rpc/auth.h>
  66 #include <rpc/rpcsec_gss.h>
  67 #include <rpc/svc.h>
  68 
  69 #include <nfs/nfs.h>
  70 #include <nfs/export.h>
  71 #include <nfs/nfs_cmd.h>
  72 #include <nfs/lm.h>
  73 #include <nfs/nfs4.h>
  74 
  75 #include <sys/strsubr.h>
  76 #include <sys/strsun.h>
  77 
  78 #include <inet/common.h>
  79 #include <inet/ip.h>
  80 #include <inet/ip6.h>
  81 
  82 #include <sys/tsol/label.h>
  83 #include <sys/tsol/tndb.h>
  84 
  85 #define RFS4_MAXLOCK_TRIES 4    /* Try to get the lock this many times */
  86 static int rfs4_maxlock_tries = RFS4_MAXLOCK_TRIES;
  87 #define RFS4_LOCK_DELAY 10      /* Milliseconds */
  88 static clock_t  rfs4_lock_delay = RFS4_LOCK_DELAY;
  89 extern struct svc_ops rdma_svc_ops;
  90 extern int nfs_loaned_buffers;
  91 /* End of Tunables */
  92 
  93 static int rdma_setup_read_data4(READ4args *, READ4res *);
  94 
  95 /*
  96  * Used to bump the stateid4.seqid value and show changes in the stateid
  97  */
  98 #define next_stateid(sp) (++(sp)->bits.chgseq)
  99 
 100 /*
 101  * RFS4_MINLEN_ENTRY4: XDR-encoded size of smallest possible dirent.
 102  *      This is used to return NFS4ERR_TOOSMALL when clients specify
 103  *      maxcount that isn't large enough to hold the smallest possible
 104  *      XDR encoded dirent.
 105  *
 106  *          sizeof cookie (8 bytes) +
 107  *          sizeof name_len (4 bytes) +
 108  *          sizeof smallest (padded) name (4 bytes) +
 109  *          sizeof bitmap4_len (12 bytes) +   NOTE: we always encode len=2 bm4
 110  *          sizeof attrlist4_len (4 bytes) +
 111  *          sizeof next boolean (4 bytes)
 112  *
 113  * RFS4_MINLEN_RDDIR4: XDR-encoded size of READDIR op reply containing
 114  * the smallest possible entry4 (assumes no attrs requested).
 115  *      sizeof nfsstat4 (4 bytes) +
 116  *      sizeof verifier4 (8 bytes) +
 117  *      sizeof entry4list bool (4 bytes) +
 118  *      sizeof entry4   (36 bytes) +
 119  *      sizeof eof bool  (4 bytes)
 120  *
 121  * RFS4_MINLEN_RDDIR_BUF: minimum length of buffer server will provide to
 122  *      VOP_READDIR.  Its value is the size of the maximum possible dirent
 123  *      for solaris.  The DIRENT64_RECLEN macro returns the size of dirent
 124  *      required for a given name length.  MAXNAMELEN is the maximum
 125  *      filename length allowed in Solaris.  The first two DIRENT64_RECLEN()
 126  *      macros are to allow for . and .. entries -- just a minor tweak to try
 127  *      and guarantee that buffer we give to VOP_READDIR will be large enough
 128  *      to hold ., .., and the largest possible solaris dirent64.
 129  */
 130 #define RFS4_MINLEN_ENTRY4 36
 131 #define RFS4_MINLEN_RDDIR4 (4 + NFS4_VERIFIER_SIZE + 4 + RFS4_MINLEN_ENTRY4 + 4)
 132 #define RFS4_MINLEN_RDDIR_BUF \
 133         (DIRENT64_RECLEN(1) + DIRENT64_RECLEN(2) + DIRENT64_RECLEN(MAXNAMELEN))
 134 
 135 /*
 136  * It would be better to pad to 4 bytes since that's what XDR would do,
 137  * but the dirents UFS gives us are already padded to 8, so just take
 138  * what we're given.  Dircount is only a hint anyway.  Currently the
 139  * solaris kernel is ASCII only, so there's no point in calling the
 140  * UTF8 functions.
 141  *
 142  * dirent64: named padded to provide 8 byte struct alignment
 143  *      d_ino(8) + d_off(8) + d_reclen(2) + d_name(namelen + null(1) + pad)
 144  *
 145  * cookie: uint64_t   +  utf8namelen: uint_t  +   utf8name padded to 8 bytes
 146  *
 147  */
 148 #define DIRENT64_TO_DIRCOUNT(dp) \
 149         (3 * BYTES_PER_XDR_UNIT + DIRENT64_NAMELEN((dp)->d_reclen))
 150 
 151 time_t rfs4_start_time;                 /* Initialized in rfs4_srvrinit */
 152 
 153 static sysid_t lockt_sysid;             /* dummy sysid for all LOCKT calls */
 154 
 155 u_longlong_t    nfs4_srv_caller_id;
 156 uint_t          nfs4_srv_vkey = 0;
 157 
 158 verifier4       Write4verf;
 159 verifier4       Readdir4verf;
 160 
 161 void    rfs4_init_compound_state(struct compound_state *);
 162 
 163 static void     nullfree(caddr_t);
 164 static void     rfs4_op_inval(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 165                         struct compound_state *);
 166 static void     rfs4_op_access(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 167                         struct compound_state *);
 168 static void     rfs4_op_close(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 169                         struct compound_state *);
 170 static void     rfs4_op_commit(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 171                         struct compound_state *);
 172 static void     rfs4_op_create(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 173                         struct compound_state *);
 174 static void     rfs4_op_create_free(nfs_resop4 *resop);
 175 static void     rfs4_op_delegreturn(nfs_argop4 *, nfs_resop4 *,
 176                         struct svc_req *, struct compound_state *);
 177 static void     rfs4_op_delegpurge(nfs_argop4 *, nfs_resop4 *,
 178                         struct svc_req *, struct compound_state *);
 179 static void     rfs4_op_getattr(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 180                         struct compound_state *);
 181 static void     rfs4_op_getattr_free(nfs_resop4 *);
 182 static void     rfs4_op_getfh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 183                         struct compound_state *);
 184 static void     rfs4_op_getfh_free(nfs_resop4 *);
 185 static void     rfs4_op_illegal(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 186                         struct compound_state *);
 187 static void     rfs4_op_link(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 188                         struct compound_state *);
 189 static void     rfs4_op_lock(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 190                         struct compound_state *);
 191 static void     lock_denied_free(nfs_resop4 *);
 192 static void     rfs4_op_locku(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 193                         struct compound_state *);
 194 static void     rfs4_op_lockt(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 195                         struct compound_state *);
 196 static void     rfs4_op_lookup(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 197                         struct compound_state *);
 198 static void     rfs4_op_lookupp(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 199                         struct compound_state *);
 200 static void     rfs4_op_openattr(nfs_argop4 *argop, nfs_resop4 *resop,
 201                                 struct svc_req *req, struct compound_state *cs);
 202 static void     rfs4_op_nverify(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 203                         struct compound_state *);
 204 static void     rfs4_op_open(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 205                         struct compound_state *);
 206 static void     rfs4_op_open_confirm(nfs_argop4 *, nfs_resop4 *,
 207                         struct svc_req *, struct compound_state *);
 208 static void     rfs4_op_open_downgrade(nfs_argop4 *, nfs_resop4 *,
 209                         struct svc_req *, struct compound_state *);
 210 static void     rfs4_op_putfh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 211                         struct compound_state *);
 212 static void     rfs4_op_putpubfh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 213                         struct compound_state *);
 214 static void     rfs4_op_putrootfh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 215                         struct compound_state *);
 216 static void     rfs4_op_read(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 217                         struct compound_state *);
 218 static void     rfs4_op_read_free(nfs_resop4 *);
 219 static void     rfs4_op_readdir_free(nfs_resop4 *resop);
 220 static void     rfs4_op_readlink(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 221                         struct compound_state *);
 222 static void     rfs4_op_readlink_free(nfs_resop4 *);
 223 static void     rfs4_op_release_lockowner(nfs_argop4 *, nfs_resop4 *,
 224                         struct svc_req *, struct compound_state *);
 225 static void     rfs4_op_remove(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 226                         struct compound_state *);
 227 static void     rfs4_op_rename(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 228                         struct compound_state *);
 229 static void     rfs4_op_renew(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 230                         struct compound_state *);
 231 static void     rfs4_op_restorefh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 232                         struct compound_state *);
 233 static void     rfs4_op_savefh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 234                         struct compound_state *);
 235 static void     rfs4_op_setattr(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 236                         struct compound_state *);
 237 static void     rfs4_op_verify(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 238                         struct compound_state *);
 239 static void     rfs4_op_write(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 240                         struct compound_state *);
 241 static void     rfs4_op_setclientid(nfs_argop4 *, nfs_resop4 *,
 242                         struct svc_req *, struct compound_state *);
 243 static void     rfs4_op_setclientid_confirm(nfs_argop4 *, nfs_resop4 *,
 244                         struct svc_req *req, struct compound_state *);
 245 static void     rfs4_op_secinfo(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 246                         struct compound_state *);
 247 static void     rfs4_op_secinfo_free(nfs_resop4 *);
 248 
 249 static nfsstat4 check_open_access(uint32_t,
 250                                 struct compound_state *, struct svc_req *);
 251 nfsstat4 rfs4_client_sysid(rfs4_client_t *, sysid_t *);
 252 void rfs4_ss_clid(rfs4_client_t *);
 253 
 254 /*
 255  * translation table for attrs
 256  */
 257 struct nfs4_ntov_table {
 258         union nfs4_attr_u *na;
 259         uint8_t amap[NFS4_MAXNUM_ATTRS];
 260         int attrcnt;
 261         bool_t vfsstat;
 262 };
 263 
 264 static void     nfs4_ntov_table_init(struct nfs4_ntov_table *ntovp);
 265 static void     nfs4_ntov_table_free(struct nfs4_ntov_table *ntovp,
 266                                     struct nfs4_svgetit_arg *sargp);
 267 
 268 static nfsstat4 do_rfs4_set_attrs(bitmap4 *resp, fattr4 *fattrp,
 269                     struct compound_state *cs, struct nfs4_svgetit_arg *sargp,
 270                     struct nfs4_ntov_table *ntovp, nfs4_attr_cmd_t cmd);
 271 
 272 fem_t           *deleg_rdops;
 273 fem_t           *deleg_wrops;
 274 
 275 rfs4_servinst_t *rfs4_cur_servinst = NULL;      /* current server instance */
 276 kmutex_t        rfs4_servinst_lock;     /* protects linked list */
 277 int             rfs4_seen_first_compound;       /* set first time we see one */
 278 
 279 /*
 280  * NFS4 op dispatch table
 281  */
 282 
 283 struct rfsv4disp {
 284         void    (*dis_proc)();          /* proc to call */
 285         void    (*dis_resfree)();       /* frees space allocated by proc */
 286         int     dis_flags;              /* RPC_IDEMPOTENT, etc... */
 287         int     op_type;                /* operation type, see below */
 288 };
 289 
 290 /*
 291  * operation types; used primarily for the per-exportinfo kstat implementation
 292  */
 293 #define NFS4_OP_NOFH    0       /* The operation does not operate with any */
 294                                 /* particular filehandle; we cannot associate */
 295                                 /* it with any exportinfo. */
 296 
 297 #define NFS4_OP_CFH     1       /* The operation works with the current */
 298                                 /* filehandle; we associate the operation */
 299                                 /* with the exportinfo related to the current */
 300                                 /* filehandle (as set before the operation is */
 301                                 /* executed). */
 302 
 303 #define NFS4_OP_SFH     2       /* The operation works with the saved */
 304                                 /* filehandle; we associate the operation */
 305                                 /* with the exportinfo related to the saved */
 306                                 /* filehandle (as set before the operation is */
 307                                 /* executed). */
 308 
 309 #define NFS4_OP_POSTCFH 3       /* The operation ignores the current */
 310                                 /* filehandle, but sets the new current */
 311                                 /* filehandle instead; we associate the */
 312                                 /* operation with the exportinfo related to */
 313                                 /* the current filehandle as set after the */
 314                                 /* operation is successfuly executed.  Since */
 315                                 /* we do not know the particular exportinfo */
 316                                 /* (and thus the kstat) before the operation */
 317                                 /* is done, there is no simple way how to */
 318                                 /* update some I/O kstat statistics related */
 319                                 /* to kstat_queue(9F). */
 320 
 321 static struct rfsv4disp rfsv4disptab[] = {
 322         /*
 323          * NFS VERSION 4
 324          */
 325 
 326         /* RFS_NULL = 0 */
 327         {rfs4_op_illegal, nullfree, 0, NFS4_OP_NOFH},
 328 
 329         /* UNUSED = 1 */
 330         {rfs4_op_illegal, nullfree, 0, NFS4_OP_NOFH},
 331 
 332         /* UNUSED = 2 */
 333         {rfs4_op_illegal, nullfree, 0, NFS4_OP_NOFH},
 334 
 335         /* OP_ACCESS = 3 */
 336         {rfs4_op_access, nullfree, RPC_IDEMPOTENT, NFS4_OP_CFH},
 337 
 338         /* OP_CLOSE = 4 */
 339         {rfs4_op_close, nullfree, 0, NFS4_OP_CFH},
 340 
 341         /* OP_COMMIT = 5 */
 342         {rfs4_op_commit, nullfree, RPC_IDEMPOTENT, NFS4_OP_CFH},
 343 
 344         /* OP_CREATE = 6 */
 345         {rfs4_op_create, nullfree, 0, NFS4_OP_CFH},
 346 
 347         /* OP_DELEGPURGE = 7 */
 348         {rfs4_op_delegpurge, nullfree, 0, NFS4_OP_NOFH},
 349 
 350         /* OP_DELEGRETURN = 8 */
 351         {rfs4_op_delegreturn, nullfree, 0, NFS4_OP_CFH},
 352 
 353         /* OP_GETATTR = 9 */
 354         {rfs4_op_getattr, rfs4_op_getattr_free, RPC_IDEMPOTENT, NFS4_OP_CFH},
 355 
 356         /* OP_GETFH = 10 */
 357         {rfs4_op_getfh, rfs4_op_getfh_free, RPC_ALL, NFS4_OP_CFH},
 358 
 359         /* OP_LINK = 11 */
 360         {rfs4_op_link, nullfree, 0, NFS4_OP_CFH},
 361 
 362         /* OP_LOCK = 12 */
 363         {rfs4_op_lock, lock_denied_free, 0, NFS4_OP_CFH},
 364 
 365         /* OP_LOCKT = 13 */
 366         {rfs4_op_lockt, lock_denied_free, 0, NFS4_OP_CFH},
 367 
 368         /* OP_LOCKU = 14 */
 369         {rfs4_op_locku, nullfree, 0, NFS4_OP_CFH},
 370 
 371         /* OP_LOOKUP = 15 */
 372         {rfs4_op_lookup, nullfree, (RPC_IDEMPOTENT | RPC_PUBLICFH_OK),
 373             NFS4_OP_CFH},
 374 
 375         /* OP_LOOKUPP = 16 */
 376         {rfs4_op_lookupp, nullfree, (RPC_IDEMPOTENT | RPC_PUBLICFH_OK),
 377             NFS4_OP_CFH},
 378 
 379         /* OP_NVERIFY = 17 */
 380         {rfs4_op_nverify, nullfree, RPC_IDEMPOTENT, NFS4_OP_CFH},
 381 
 382         /* OP_OPEN = 18 */
 383         {rfs4_op_open, rfs4_free_reply, 0, NFS4_OP_CFH},
 384 
 385         /* OP_OPENATTR = 19 */
 386         {rfs4_op_openattr, nullfree, 0, NFS4_OP_CFH},
 387 
 388         /* OP_OPEN_CONFIRM = 20 */
 389         {rfs4_op_open_confirm, nullfree, 0, NFS4_OP_CFH},
 390 
 391         /* OP_OPEN_DOWNGRADE = 21 */
 392         {rfs4_op_open_downgrade, nullfree, 0, NFS4_OP_CFH},
 393 
 394         /* OP_OPEN_PUTFH = 22 */
 395         {rfs4_op_putfh, nullfree, RPC_ALL, NFS4_OP_POSTCFH},
 396 
 397         /* OP_PUTPUBFH = 23 */
 398         {rfs4_op_putpubfh, nullfree, RPC_ALL, NFS4_OP_POSTCFH},
 399 
 400         /* OP_PUTROOTFH = 24 */
 401         {rfs4_op_putrootfh, nullfree, RPC_ALL, NFS4_OP_POSTCFH},
 402 
 403         /* OP_READ = 25 */
 404         {rfs4_op_read, rfs4_op_read_free, RPC_IDEMPOTENT, NFS4_OP_CFH},
 405 
 406         /* OP_READDIR = 26 */
 407         {rfs4_op_readdir, rfs4_op_readdir_free, RPC_IDEMPOTENT, NFS4_OP_CFH},
 408 
 409         /* OP_READLINK = 27 */
 410         {rfs4_op_readlink, rfs4_op_readlink_free, RPC_IDEMPOTENT, NFS4_OP_CFH},
 411 
 412         /* OP_REMOVE = 28 */
 413         {rfs4_op_remove, nullfree, 0, NFS4_OP_CFH},
 414 
 415         /* OP_RENAME = 29 */
 416         {rfs4_op_rename, nullfree, 0, NFS4_OP_CFH},
 417 
 418         /* OP_RENEW = 30 */
 419         {rfs4_op_renew, nullfree, 0, NFS4_OP_NOFH},
 420 
 421         /* OP_RESTOREFH = 31 */
 422         {rfs4_op_restorefh, nullfree, RPC_ALL, NFS4_OP_SFH},
 423 
 424         /* OP_SAVEFH = 32 */
 425         {rfs4_op_savefh, nullfree, RPC_ALL, NFS4_OP_CFH},
 426 
 427         /* OP_SECINFO = 33 */
 428         {rfs4_op_secinfo, rfs4_op_secinfo_free, 0, NFS4_OP_CFH},
 429 
 430         /* OP_SETATTR = 34 */
 431         {rfs4_op_setattr, nullfree, 0, NFS4_OP_CFH},
 432 
 433         /* OP_SETCLIENTID = 35 */
 434         {rfs4_op_setclientid, nullfree, 0, NFS4_OP_NOFH},
 435 
 436         /* OP_SETCLIENTID_CONFIRM = 36 */
 437         {rfs4_op_setclientid_confirm, nullfree, 0, NFS4_OP_NOFH},
 438 
 439         /* OP_VERIFY = 37 */
 440         {rfs4_op_verify, nullfree, RPC_IDEMPOTENT, NFS4_OP_CFH},
 441 
 442         /* OP_WRITE = 38 */
 443         {rfs4_op_write, nullfree, 0, NFS4_OP_CFH},
 444 
 445         /* OP_RELEASE_LOCKOWNER = 39 */
 446         {rfs4_op_release_lockowner, nullfree, 0, NFS4_OP_NOFH},
 447 };
 448 
 449 static uint_t rfsv4disp_cnt = sizeof (rfsv4disptab) / sizeof (rfsv4disptab[0]);
 450 
 451 #define OP_ILLEGAL_IDX (rfsv4disp_cnt)
 452 
 453 #ifdef DEBUG
 454 
 455 int             rfs4_fillone_debug = 0;
 456 int             rfs4_no_stub_access = 1;
 457 int             rfs4_rddir_debug = 0;
 458 
 459 static char    *rfs4_op_string[] = {
 460         "rfs4_op_null",
 461         "rfs4_op_1 unused",
 462         "rfs4_op_2 unused",
 463         "rfs4_op_access",
 464         "rfs4_op_close",
 465         "rfs4_op_commit",
 466         "rfs4_op_create",
 467         "rfs4_op_delegpurge",
 468         "rfs4_op_delegreturn",
 469         "rfs4_op_getattr",
 470         "rfs4_op_getfh",
 471         "rfs4_op_link",
 472         "rfs4_op_lock",
 473         "rfs4_op_lockt",
 474         "rfs4_op_locku",
 475         "rfs4_op_lookup",
 476         "rfs4_op_lookupp",
 477         "rfs4_op_nverify",
 478         "rfs4_op_open",
 479         "rfs4_op_openattr",
 480         "rfs4_op_open_confirm",
 481         "rfs4_op_open_downgrade",
 482         "rfs4_op_putfh",
 483         "rfs4_op_putpubfh",
 484         "rfs4_op_putrootfh",
 485         "rfs4_op_read",
 486         "rfs4_op_readdir",
 487         "rfs4_op_readlink",
 488         "rfs4_op_remove",
 489         "rfs4_op_rename",
 490         "rfs4_op_renew",
 491         "rfs4_op_restorefh",
 492         "rfs4_op_savefh",
 493         "rfs4_op_secinfo",
 494         "rfs4_op_setattr",
 495         "rfs4_op_setclientid",
 496         "rfs4_op_setclient_confirm",
 497         "rfs4_op_verify",
 498         "rfs4_op_write",
 499         "rfs4_op_release_lockowner",
 500         "rfs4_op_illegal"
 501 };
 502 #endif
 503 
 504 void    rfs4_ss_chkclid(rfs4_client_t *);
 505 
 506 extern size_t   strlcpy(char *dst, const char *src, size_t dstsize);
 507 
 508 extern void     rfs4_free_fs_locations4(fs_locations4 *);
 509 
 510 #ifdef  nextdp
 511 #undef nextdp
 512 #endif
 513 #define nextdp(dp)      ((struct dirent64 *)((char *)(dp) + (dp)->d_reclen))
 514 
 515 static const fs_operation_def_t nfs4_rd_deleg_tmpl[] = {
 516         { VOPNAME_OPEN,         { .femop_open = deleg_rd_open } },
 517         { VOPNAME_WRITE,        { .femop_write = deleg_rd_write } },
 518         { VOPNAME_SETATTR,      { .femop_setattr = deleg_rd_setattr } },
 519         { VOPNAME_RWLOCK,       { .femop_rwlock = deleg_rd_rwlock } },
 520         { VOPNAME_SPACE,        { .femop_space = deleg_rd_space } },
 521         { VOPNAME_SETSECATTR,   { .femop_setsecattr = deleg_rd_setsecattr } },
 522         { VOPNAME_VNEVENT,      { .femop_vnevent = deleg_rd_vnevent } },
 523         { NULL,                 { NULL } }
 524 };
 525 static const fs_operation_def_t nfs4_wr_deleg_tmpl[] = {
 526         { VOPNAME_OPEN,         { .femop_open = deleg_wr_open } },
 527         { VOPNAME_READ,         { .femop_read = deleg_wr_read } },
 528         { VOPNAME_WRITE,        { .femop_write = deleg_wr_write } },
 529         { VOPNAME_SETATTR,      { .femop_setattr = deleg_wr_setattr } },
 530         { VOPNAME_RWLOCK,       { .femop_rwlock = deleg_wr_rwlock } },
 531         { VOPNAME_SPACE,        { .femop_space = deleg_wr_space } },
 532         { VOPNAME_SETSECATTR,   { .femop_setsecattr = deleg_wr_setsecattr } },
 533         { VOPNAME_VNEVENT,      { .femop_vnevent = deleg_wr_vnevent } },
 534         { NULL,                 { NULL } }
 535 };
 536 
 537 int
 538 rfs4_srvrinit(void)
 539 {
 540         timespec32_t verf;
 541         int error;
 542         extern void rfs4_attr_init();
 543         extern krwlock_t rfs4_deleg_policy_lock;
 544 
 545         /*
 546          * The following algorithm attempts to find a unique verifier
 547          * to be used as the write verifier returned from the server
 548          * to the client.  It is important that this verifier change
 549          * whenever the server reboots.  Of secondary importance, it
 550          * is important for the verifier to be unique between two
 551          * different servers.
 552          *
 553          * Thus, an attempt is made to use the system hostid and the
 554          * current time in seconds when the nfssrv kernel module is
 555          * loaded.  It is assumed that an NFS server will not be able
 556          * to boot and then to reboot in less than a second.  If the
 557          * hostid has not been set, then the current high resolution
 558          * time is used.  This will ensure different verifiers each
 559          * time the server reboots and minimize the chances that two
 560          * different servers will have the same verifier.
 561          * XXX - this is broken on LP64 kernels.
 562          */
 563         verf.tv_sec = (time_t)zone_get_hostid(NULL);
 564         if (verf.tv_sec != 0) {
 565                 verf.tv_nsec = gethrestime_sec();
 566         } else {
 567                 timespec_t tverf;
 568 
 569                 gethrestime(&tverf);
 570                 verf.tv_sec = (time_t)tverf.tv_sec;
 571                 verf.tv_nsec = tverf.tv_nsec;
 572         }
 573 
 574         Write4verf = *(uint64_t *)&verf;
 575 
 576         rfs4_attr_init();
 577         mutex_init(&rfs4_deleg_lock, NULL, MUTEX_DEFAULT, NULL);
 578 
 579         /* Used to manage create/destroy of server state */
 580         mutex_init(&rfs4_state_lock, NULL, MUTEX_DEFAULT, NULL);
 581 
 582         /* Used to manage access to server instance linked list */
 583         mutex_init(&rfs4_servinst_lock, NULL, MUTEX_DEFAULT, NULL);
 584 
 585         /* Used to manage access to rfs4_deleg_policy */
 586         rw_init(&rfs4_deleg_policy_lock, NULL, RW_DEFAULT, NULL);
 587 
 588         error = fem_create("deleg_rdops", nfs4_rd_deleg_tmpl, &deleg_rdops);
 589         if (error != 0) {
 590                 rfs4_disable_delegation();
 591         } else {
 592                 error = fem_create("deleg_wrops", nfs4_wr_deleg_tmpl,
 593                     &deleg_wrops);
 594                 if (error != 0) {
 595                         rfs4_disable_delegation();
 596                         fem_free(deleg_rdops);
 597                 }
 598         }
 599 
 600         nfs4_srv_caller_id = fs_new_caller_id();
 601 
 602         lockt_sysid = lm_alloc_sysidt();
 603 
 604         vsd_create(&nfs4_srv_vkey, NULL);
 605 
 606         return (0);
 607 }
 608 
 609 void
 610 rfs4_srvrfini(void)
 611 {
 612         extern krwlock_t rfs4_deleg_policy_lock;
 613 
 614         if (lockt_sysid != LM_NOSYSID) {
 615                 lm_free_sysidt(lockt_sysid);
 616                 lockt_sysid = LM_NOSYSID;
 617         }
 618 
 619         mutex_destroy(&rfs4_deleg_lock);
 620         mutex_destroy(&rfs4_state_lock);
 621         rw_destroy(&rfs4_deleg_policy_lock);
 622 
 623         fem_free(deleg_rdops);
 624         fem_free(deleg_wrops);
 625 }
 626 
 627 void
 628 rfs4_init_compound_state(struct compound_state *cs)
 629 {
 630         bzero(cs, sizeof (*cs));
 631         cs->cont = TRUE;
 632         cs->access = CS_ACCESS_DENIED;
 633         cs->deleg = FALSE;
 634         cs->mandlock = FALSE;
 635         cs->fh.nfs_fh4_val = cs->fhbuf;
 636 }
 637 
 638 void
 639 rfs4_grace_start(rfs4_servinst_t *sip)
 640 {
 641         rw_enter(&sip->rwlock, RW_WRITER);
 642         sip->start_time = (time_t)TICK_TO_SEC(ddi_get_lbolt());
 643         sip->grace_period = rfs4_grace_period;
 644         rw_exit(&sip->rwlock);
 645 }
 646 
 647 /*
 648  * returns true if the instance's grace period has never been started
 649  */
 650 int
 651 rfs4_servinst_grace_new(rfs4_servinst_t *sip)
 652 {
 653         time_t start_time;
 654 
 655         rw_enter(&sip->rwlock, RW_READER);
 656         start_time = sip->start_time;
 657         rw_exit(&sip->rwlock);
 658 
 659         return (start_time == 0);
 660 }
 661 
 662 /*
 663  * Indicates if server instance is within the
 664  * grace period.
 665  */
 666 int
 667 rfs4_servinst_in_grace(rfs4_servinst_t *sip)
 668 {
 669         time_t grace_expiry;
 670 
 671         rw_enter(&sip->rwlock, RW_READER);
 672         grace_expiry = sip->start_time + sip->grace_period;
 673         rw_exit(&sip->rwlock);
 674 
 675         return (((time_t)TICK_TO_SEC(ddi_get_lbolt())) < grace_expiry);
 676 }
 677 
 678 int
 679 rfs4_clnt_in_grace(rfs4_client_t *cp)
 680 {
 681         ASSERT(rfs4_dbe_refcnt(cp->rc_dbe) > 0);
 682 
 683         return (rfs4_servinst_in_grace(cp->rc_server_instance));
 684 }
 685 
 686 /*
 687  * reset all currently active grace periods
 688  */
 689 void
 690 rfs4_grace_reset_all(void)
 691 {
 692         rfs4_servinst_t *sip;
 693 
 694         mutex_enter(&rfs4_servinst_lock);
 695         for (sip = rfs4_cur_servinst; sip != NULL; sip = sip->prev)
 696                 if (rfs4_servinst_in_grace(sip))
 697                         rfs4_grace_start(sip);
 698         mutex_exit(&rfs4_servinst_lock);
 699 }
 700 
 701 /*
 702  * start any new instances' grace periods
 703  */
 704 void
 705 rfs4_grace_start_new(void)
 706 {
 707         rfs4_servinst_t *sip;
 708 
 709         mutex_enter(&rfs4_servinst_lock);
 710         for (sip = rfs4_cur_servinst; sip != NULL; sip = sip->prev)
 711                 if (rfs4_servinst_grace_new(sip))
 712                         rfs4_grace_start(sip);
 713         mutex_exit(&rfs4_servinst_lock);
 714 }
 715 
 716 static rfs4_dss_path_t *
 717 rfs4_dss_newpath(rfs4_servinst_t *sip, char *path, unsigned index)
 718 {
 719         size_t len;
 720         rfs4_dss_path_t *dss_path;
 721 
 722         dss_path = kmem_alloc(sizeof (rfs4_dss_path_t), KM_SLEEP);
 723 
 724         /*
 725          * Take a copy of the string, since the original may be overwritten.
 726          * Sadly, no strdup() in the kernel.
 727          */
 728         /* allow for NUL */
 729         len = strlen(path) + 1;
 730         dss_path->path = kmem_alloc(len, KM_SLEEP);
 731         (void) strlcpy(dss_path->path, path, len);
 732 
 733         /* associate with servinst */
 734         dss_path->sip = sip;
 735         dss_path->index = index;
 736 
 737         /*
 738          * Add to list of served paths.
 739          * No locking required, as we're only ever called at startup.
 740          */
 741         if (rfs4_dss_pathlist == NULL) {
 742                 /* this is the first dss_path_t */
 743 
 744                 /* needed for insque/remque */
 745                 dss_path->next = dss_path->prev = dss_path;
 746 
 747                 rfs4_dss_pathlist = dss_path;
 748         } else {
 749                 insque(dss_path, rfs4_dss_pathlist);
 750         }
 751 
 752         return (dss_path);
 753 }
 754 
 755 /*
 756  * Create a new server instance, and make it the currently active instance.
 757  * Note that starting the grace period too early will reduce the clients'
 758  * recovery window.
 759  */
 760 void
 761 rfs4_servinst_create(int start_grace, int dss_npaths, char **dss_paths)
 762 {
 763         unsigned i;
 764         rfs4_servinst_t *sip;
 765         rfs4_oldstate_t *oldstate;
 766 
 767         sip = kmem_alloc(sizeof (rfs4_servinst_t), KM_SLEEP);
 768         rw_init(&sip->rwlock, NULL, RW_DEFAULT, NULL);
 769 
 770         sip->start_time = (time_t)0;
 771         sip->grace_period = (time_t)0;
 772         sip->next = NULL;
 773         sip->prev = NULL;
 774 
 775         rw_init(&sip->oldstate_lock, NULL, RW_DEFAULT, NULL);
 776         /*
 777          * This initial dummy entry is required to setup for insque/remque.
 778          * It must be skipped over whenever the list is traversed.
 779          */
 780         oldstate = kmem_alloc(sizeof (rfs4_oldstate_t), KM_SLEEP);
 781         /* insque/remque require initial list entry to be self-terminated */
 782         oldstate->next = oldstate;
 783         oldstate->prev = oldstate;
 784         sip->oldstate = oldstate;
 785 
 786 
 787         sip->dss_npaths = dss_npaths;
 788         sip->dss_paths = kmem_alloc(dss_npaths *
 789             sizeof (rfs4_dss_path_t *), KM_SLEEP);
 790 
 791         for (i = 0; i < dss_npaths; i++) {
 792                 sip->dss_paths[i] = rfs4_dss_newpath(sip, dss_paths[i], i);
 793         }
 794 
 795         mutex_enter(&rfs4_servinst_lock);
 796         if (rfs4_cur_servinst != NULL) {
 797                 /* add to linked list */
 798                 sip->prev = rfs4_cur_servinst;
 799                 rfs4_cur_servinst->next = sip;
 800         }
 801         if (start_grace)
 802                 rfs4_grace_start(sip);
 803         /* make the new instance "current" */
 804         rfs4_cur_servinst = sip;
 805 
 806         mutex_exit(&rfs4_servinst_lock);
 807 }
 808 
 809 /*
 810  * In future, we might add a rfs4_servinst_destroy(sip) but, for now, destroy
 811  * all instances directly.
 812  */
 813 void
 814 rfs4_servinst_destroy_all(void)
 815 {
 816         rfs4_servinst_t *sip, *prev, *current;
 817 #ifdef DEBUG
 818         int n = 0;
 819 #endif
 820 
 821         mutex_enter(&rfs4_servinst_lock);
 822         ASSERT(rfs4_cur_servinst != NULL);
 823         current = rfs4_cur_servinst;
 824         rfs4_cur_servinst = NULL;
 825         for (sip = current; sip != NULL; sip = prev) {
 826                 prev = sip->prev;
 827                 rw_destroy(&sip->rwlock);
 828                 if (sip->oldstate)
 829                         kmem_free(sip->oldstate, sizeof (rfs4_oldstate_t));
 830                 if (sip->dss_paths)
 831                         kmem_free(sip->dss_paths,
 832                             sip->dss_npaths * sizeof (rfs4_dss_path_t *));
 833                 kmem_free(sip, sizeof (rfs4_servinst_t));
 834 #ifdef DEBUG
 835                 n++;
 836 #endif
 837         }
 838         mutex_exit(&rfs4_servinst_lock);
 839 }
 840 
 841 /*
 842  * Assign the current server instance to a client_t.
 843  * Should be called with cp->rc_dbe held.
 844  */
 845 void
 846 rfs4_servinst_assign(rfs4_client_t *cp, rfs4_servinst_t *sip)
 847 {
 848         ASSERT(rfs4_dbe_refcnt(cp->rc_dbe) > 0);
 849 
 850         /*
 851          * The lock ensures that if the current instance is in the process
 852          * of changing, we will see the new one.
 853          */
 854         mutex_enter(&rfs4_servinst_lock);
 855         cp->rc_server_instance = sip;
 856         mutex_exit(&rfs4_servinst_lock);
 857 }
 858 
 859 rfs4_servinst_t *
 860 rfs4_servinst(rfs4_client_t *cp)
 861 {
 862         ASSERT(rfs4_dbe_refcnt(cp->rc_dbe) > 0);
 863 
 864         return (cp->rc_server_instance);
 865 }
 866 
 867 /* ARGSUSED */
 868 static void
 869 nullfree(caddr_t resop)
 870 {
 871 }
 872 
 873 /*
 874  * This is a fall-through for invalid or not implemented (yet) ops
 875  */
 876 /* ARGSUSED */
 877 static void
 878 rfs4_op_inval(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
 879     struct compound_state *cs)
 880 {
 881         *cs->statusp = *((nfsstat4 *)&(resop)->nfs_resop4_u) = NFS4ERR_INVAL;
 882 }
 883 
 884 /*
 885  * Check if the security flavor, nfsnum, is in the flavor_list.
 886  */
 887 bool_t
 888 in_flavor_list(int nfsnum, int *flavor_list, int count)
 889 {
 890         int i;
 891 
 892         for (i = 0; i < count; i++) {
 893                 if (nfsnum == flavor_list[i])
 894                         return (TRUE);
 895         }
 896         return (FALSE);
 897 }
 898 
 899 /*
 900  * Used by rfs4_op_secinfo to get the security information from the
 901  * export structure associated with the component.
 902  */
 903 /* ARGSUSED */
 904 static nfsstat4
 905 do_rfs4_op_secinfo(struct compound_state *cs, char *nm, SECINFO4res *resp)
 906 {
 907         int error, different_export = 0;
 908         vnode_t *dvp, *vp;
 909         struct exportinfo *exi = NULL;
 910         fid_t fid;
 911         uint_t count, i;
 912         secinfo4 *resok_val;
 913         struct secinfo *secp;
 914         seconfig_t *si;
 915         bool_t did_traverse = FALSE;
 916         int dotdot, walk;
 917 
 918         dvp = cs->vp;
 919         dotdot = (nm[0] == '.' && nm[1] == '.' && nm[2] == '\0');
 920 
 921         /*
 922          * If dotdotting, then need to check whether it's above the
 923          * root of a filesystem, or above an export point.
 924          */
 925         if (dotdot) {
 926 
 927                 /*
 928                  * If dotdotting at the root of a filesystem, then
 929                  * need to traverse back to the mounted-on filesystem
 930                  * and do the dotdot lookup there.
 931                  */
 932                 if (cs->vp->v_flag & VROOT) {
 933 
 934                         /*
 935                          * If at the system root, then can
 936                          * go up no further.
 937                          */
 938                         if (VN_CMP(dvp, rootdir))
 939                                 return (puterrno4(ENOENT));
 940 
 941                         /*
 942                          * Traverse back to the mounted-on filesystem
 943                          */
 944                         dvp = untraverse(cs->vp);
 945 
 946                         /*
 947                          * Set the different_export flag so we remember
 948                          * to pick up a new exportinfo entry for
 949                          * this new filesystem.
 950                          */
 951                         different_export = 1;
 952                 } else {
 953 
 954                         /*
 955                          * If dotdotting above an export point then set
 956                          * the different_export to get new export info.
 957                          */
 958                         different_export = nfs_exported(cs->exi, cs->vp);
 959                 }
 960         }
 961 
 962         /*
 963          * Get the vnode for the component "nm".
 964          */
 965         error = VOP_LOOKUP(dvp, nm, &vp, NULL, 0, NULL, cs->cr,
 966             NULL, NULL, NULL);
 967         if (error)
 968                 return (puterrno4(error));
 969 
 970         /*
 971          * If the vnode is in a pseudo filesystem, or if the security flavor
 972          * used in the request is valid but not an explicitly shared flavor,
 973          * or the access bit indicates that this is a limited access,
 974          * check whether this vnode is visible.
 975          */
 976         if (!different_export &&
 977             (PSEUDO(cs->exi) || ! is_exported_sec(cs->nfsflavor, cs->exi) ||
 978             cs->access & CS_ACCESS_LIMITED)) {
 979                 if (! nfs_visible(cs->exi, vp, &different_export)) {
 980                         VN_RELE(vp);
 981                         return (puterrno4(ENOENT));
 982                 }
 983         }
 984 
 985         /*
 986          * If it's a mountpoint, then traverse it.
 987          */
 988         if (vn_ismntpt(vp)) {
 989                 if ((error = traverse(&vp)) != 0) {
 990                         VN_RELE(vp);
 991                         return (puterrno4(error));
 992                 }
 993                 /* remember that we had to traverse mountpoint */
 994                 did_traverse = TRUE;
 995                 different_export = 1;
 996         } else if (vp->v_vfsp != dvp->v_vfsp) {
 997                 /*
 998                  * If vp isn't a mountpoint and the vfs ptrs aren't the same,
 999                  * then vp is probably an LOFS object.  We don't need the
1000                  * realvp, we just need to know that we might have crossed
1001                  * a server fs boundary and need to call checkexport4.
1002                  * (LOFS lookup hides server fs mountpoints, and actually calls
1003                  * traverse)
1004                  */
1005                 different_export = 1;
1006         }
1007 
1008         /*
1009          * Get the export information for it.
1010          */
1011         if (different_export) {
1012 
1013                 bzero(&fid, sizeof (fid));
1014                 fid.fid_len = MAXFIDSZ;
1015                 error = vop_fid_pseudo(vp, &fid);
1016                 if (error) {
1017                         VN_RELE(vp);
1018                         return (puterrno4(error));
1019                 }
1020 
1021                 if (dotdot)
1022                         exi = nfs_vptoexi(NULL, vp, cs->cr, &walk, NULL, TRUE);
1023                 else
1024                         exi = checkexport4(&vp->v_vfsp->vfs_fsid, &fid, vp);
1025 
1026                 if (exi == NULL) {
1027                         if (did_traverse == TRUE) {
1028                                 /*
1029                                  * If this vnode is a mounted-on vnode,
1030                                  * but the mounted-on file system is not
1031                                  * exported, send back the secinfo for
1032                                  * the exported node that the mounted-on
1033                                  * vnode lives in.
1034                                  */
1035                                 exi = cs->exi;
1036                         } else {
1037                                 VN_RELE(vp);
1038                                 return (puterrno4(EACCES));
1039                         }
1040                 }
1041         } else {
1042                 exi = cs->exi;
1043         }
1044         ASSERT(exi != NULL);
1045 
1046 
1047         /*
1048          * Create the secinfo result based on the security information
1049          * from the exportinfo structure (exi).
1050          *
1051          * Return all flavors for a pseudo node.
1052          * For a real export node, return the flavor that the client
1053          * has access with.
1054          */
1055         ASSERT(RW_LOCK_HELD(&exported_lock));
1056         if (PSEUDO(exi)) {
1057                 count = exi->exi_export.ex_seccnt; /* total sec count */
1058                 resok_val = kmem_alloc(count * sizeof (secinfo4), KM_SLEEP);
1059                 secp = exi->exi_export.ex_secinfo;
1060 
1061                 for (i = 0; i < count; i++) {
1062                         si = &secp[i].s_secinfo;
1063                         resok_val[i].flavor = si->sc_rpcnum;
1064                         if (resok_val[i].flavor == RPCSEC_GSS) {
1065                                 rpcsec_gss_info *info;
1066 
1067                                 info = &resok_val[i].flavor_info;
1068                                 info->qop = si->sc_qop;
1069                                 info->service = (rpc_gss_svc_t)si->sc_service;
1070 
1071                                 /* get oid opaque data */
1072                                 info->oid.sec_oid4_len =
1073                                     si->sc_gss_mech_type->length;
1074                                 info->oid.sec_oid4_val = kmem_alloc(
1075                                     si->sc_gss_mech_type->length, KM_SLEEP);
1076                                 bcopy(
1077                                     si->sc_gss_mech_type->elements,
1078                                     info->oid.sec_oid4_val,
1079                                     info->oid.sec_oid4_len);
1080                         }
1081                 }
1082                 resp->SECINFO4resok_len = count;
1083                 resp->SECINFO4resok_val = resok_val;
1084         } else {
1085                 int ret_cnt = 0, k = 0;
1086                 int *flavor_list;
1087 
1088                 count = exi->exi_export.ex_seccnt; /* total sec count */
1089                 secp = exi->exi_export.ex_secinfo;
1090 
1091                 flavor_list = kmem_alloc(count * sizeof (int), KM_SLEEP);
1092                 /* find out which flavors to return */
1093                 for (i = 0; i < count; i ++) {
1094                         int access, flavor, perm;
1095 
1096                         flavor = secp[i].s_secinfo.sc_nfsnum;
1097                         perm = secp[i].s_flags;
1098 
1099                         access = nfsauth4_secinfo_access(exi, cs->req,
1100                             flavor, perm, cs->basecr);
1101 
1102                         if (! (access & NFSAUTH_DENIED) &&
1103                             ! (access & NFSAUTH_WRONGSEC)) {
1104                                 flavor_list[ret_cnt] = flavor;
1105                                 ret_cnt++;
1106                         }
1107                 }
1108 
1109                 /* Create the returning SECINFO value */
1110                 resok_val = kmem_alloc(ret_cnt * sizeof (secinfo4), KM_SLEEP);
1111 
1112                 for (i = 0; i < count; i++) {
1113                         /*
1114                          * If the flavor is in the flavor list,
1115                          * fill in resok_val.
1116                          */
1117                         si = &secp[i].s_secinfo;
1118                         if (in_flavor_list(si->sc_nfsnum,
1119                             flavor_list, ret_cnt)) {
1120                                 resok_val[k].flavor = si->sc_rpcnum;
1121                                 if (resok_val[k].flavor == RPCSEC_GSS) {
1122                                         rpcsec_gss_info *info;
1123 
1124                                         info = &resok_val[k].flavor_info;
1125                                         info->qop = si->sc_qop;
1126                                         info->service = (rpc_gss_svc_t)
1127                                             si->sc_service;
1128 
1129                                         /* get oid opaque data */
1130                                         info->oid.sec_oid4_len =
1131                                             si->sc_gss_mech_type->length;
1132                                         info->oid.sec_oid4_val = kmem_alloc(
1133                                             si->sc_gss_mech_type->length,
1134                                             KM_SLEEP);
1135                                         bcopy(si->sc_gss_mech_type->elements,
1136                                             info->oid.sec_oid4_val,
1137                                             info->oid.sec_oid4_len);
1138                                 }
1139                                 k++;
1140                         }
1141                         if (k >= ret_cnt)
1142                                 break;
1143                 }
1144                 resp->SECINFO4resok_len = ret_cnt;
1145                 resp->SECINFO4resok_val = resok_val;
1146                 kmem_free(flavor_list, count * sizeof (int));
1147         }
1148 
1149         VN_RELE(vp);
1150         return (NFS4_OK);
1151 }
1152 
1153 /*
1154  * SECINFO (Operation 33): Obtain required security information on
1155  * the component name in the format of (security-mechanism-oid, qop, service)
1156  * triplets.
1157  */
1158 /* ARGSUSED */
1159 static void
1160 rfs4_op_secinfo(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1161     struct compound_state *cs)
1162 {
1163         SECINFO4args *args = &argop->nfs_argop4_u.opsecinfo;
1164         SECINFO4res *resp = &resop->nfs_resop4_u.opsecinfo;
1165         utf8string *utfnm = &args->name;
1166         uint_t len;
1167         char *nm;
1168         struct sockaddr *ca;
1169         char *name = NULL;
1170         nfsstat4 status = NFS4_OK;
1171 
1172         DTRACE_NFSV4_2(op__secinfo__start, struct compound_state *, cs,
1173             SECINFO4args *, args);
1174 
1175         /*
1176          * Current file handle (cfh) should have been set before getting
1177          * into this function. If not, return error.
1178          */
1179         if (cs->vp == NULL) {
1180                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
1181                 goto out;
1182         }
1183 
1184         if (cs->vp->v_type != VDIR) {
1185                 *cs->statusp = resp->status = NFS4ERR_NOTDIR;
1186                 goto out;
1187         }
1188 
1189         /*
1190          * Verify the component name. If failed, error out, but
1191          * do not error out if the component name is a "..".
1192          * SECINFO will return its parents secinfo data for SECINFO "..".
1193          */
1194         status = utf8_dir_verify(utfnm);
1195         if (status != NFS4_OK) {
1196                 if (utfnm->utf8string_len != 2 ||
1197                     utfnm->utf8string_val[0] != '.' ||
1198                     utfnm->utf8string_val[1] != '.') {
1199                         *cs->statusp = resp->status = status;
1200                         goto out;
1201                 }
1202         }
1203 
1204         nm = utf8_to_str(utfnm, &len, NULL);
1205         if (nm == NULL) {
1206                 *cs->statusp = resp->status = NFS4ERR_INVAL;
1207                 goto out;
1208         }
1209 
1210         if (len > MAXNAMELEN) {
1211                 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
1212                 kmem_free(nm, len);
1213                 goto out;
1214         }
1215 
1216         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
1217         name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
1218             MAXPATHLEN  + 1);
1219 
1220         if (name == NULL) {
1221                 *cs->statusp = resp->status = NFS4ERR_INVAL;
1222                 kmem_free(nm, len);
1223                 goto out;
1224         }
1225 
1226 
1227         *cs->statusp = resp->status = do_rfs4_op_secinfo(cs, name, resp);
1228 
1229         if (name != nm)
1230                 kmem_free(name, MAXPATHLEN + 1);
1231         kmem_free(nm, len);
1232 
1233 out:
1234         DTRACE_NFSV4_2(op__secinfo__done, struct compound_state *, cs,
1235             SECINFO4res *, resp);
1236 }
1237 
1238 /*
1239  * Free SECINFO result.
1240  */
1241 /* ARGSUSED */
1242 static void
1243 rfs4_op_secinfo_free(nfs_resop4 *resop)
1244 {
1245         SECINFO4res *resp = &resop->nfs_resop4_u.opsecinfo;
1246         int count, i;
1247         secinfo4 *resok_val;
1248 
1249         /* If this is not an Ok result, nothing to free. */
1250         if (resp->status != NFS4_OK) {
1251                 return;
1252         }
1253 
1254         count = resp->SECINFO4resok_len;
1255         resok_val = resp->SECINFO4resok_val;
1256 
1257         for (i = 0; i < count; i++) {
1258                 if (resok_val[i].flavor == RPCSEC_GSS) {
1259                         rpcsec_gss_info *info;
1260 
1261                         info = &resok_val[i].flavor_info;
1262                         kmem_free(info->oid.sec_oid4_val,
1263                             info->oid.sec_oid4_len);
1264                 }
1265         }
1266         kmem_free(resok_val, count * sizeof (secinfo4));
1267         resp->SECINFO4resok_len = 0;
1268         resp->SECINFO4resok_val = NULL;
1269 }
1270 
1271 /* ARGSUSED */
1272 static void
1273 rfs4_op_access(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1274     struct compound_state *cs)
1275 {
1276         ACCESS4args *args = &argop->nfs_argop4_u.opaccess;
1277         ACCESS4res *resp = &resop->nfs_resop4_u.opaccess;
1278         int error;
1279         vnode_t *vp;
1280         struct vattr va;
1281         int checkwriteperm;
1282         cred_t *cr = cs->cr;
1283         bslabel_t *clabel, *slabel;
1284         ts_label_t *tslabel;
1285         boolean_t admin_low_client;
1286 
1287         DTRACE_NFSV4_2(op__access__start, struct compound_state *, cs,
1288             ACCESS4args *, args);
1289 
1290 #if 0   /* XXX allow access even if !cs->access. Eventually only pseudo fs */
1291         if (cs->access == CS_ACCESS_DENIED) {
1292                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
1293                 goto out;
1294         }
1295 #endif
1296         if (cs->vp == NULL) {
1297                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
1298                 goto out;
1299         }
1300 
1301         ASSERT(cr != NULL);
1302 
1303         vp = cs->vp;
1304 
1305         /*
1306          * If the file system is exported read only, it is not appropriate
1307          * to check write permissions for regular files and directories.
1308          * Special files are interpreted by the client, so the underlying
1309          * permissions are sent back to the client for interpretation.
1310          */
1311         if (rdonly4(req, cs) &&
1312             (vp->v_type == VREG || vp->v_type == VDIR))
1313                 checkwriteperm = 0;
1314         else
1315                 checkwriteperm = 1;
1316 
1317         /*
1318          * XXX
1319          * We need the mode so that we can correctly determine access
1320          * permissions relative to a mandatory lock file.  Access to
1321          * mandatory lock files is denied on the server, so it might
1322          * as well be reflected to the server during the open.
1323          */
1324         va.va_mask = AT_MODE;
1325         error = VOP_GETATTR(vp, &va, 0, cr, NULL);
1326         if (error) {
1327                 *cs->statusp = resp->status = puterrno4(error);
1328                 goto out;
1329         }
1330         resp->access = 0;
1331         resp->supported = 0;
1332 
1333         if (is_system_labeled()) {
1334                 ASSERT(req->rq_label != NULL);
1335                 clabel = req->rq_label;
1336                 DTRACE_PROBE2(tx__rfs4__log__info__opaccess__clabel, char *,
1337                     "got client label from request(1)",
1338                     struct svc_req *, req);
1339                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
1340                         if ((tslabel = nfs_getflabel(vp, cs->exi)) == NULL) {
1341                                 *cs->statusp = resp->status = puterrno4(EACCES);
1342                                 goto out;
1343                         }
1344                         slabel = label2bslabel(tslabel);
1345                         DTRACE_PROBE3(tx__rfs4__log__info__opaccess__slabel,
1346                             char *, "got server label(1) for vp(2)",
1347                             bslabel_t *, slabel, vnode_t *, vp);
1348 
1349                         admin_low_client = B_FALSE;
1350                 } else
1351                         admin_low_client = B_TRUE;
1352         }
1353 
1354         if (args->access & ACCESS4_READ) {
1355                 error = VOP_ACCESS(vp, VREAD, 0, cr, NULL);
1356                 if (!error && !MANDLOCK(vp, va.va_mode) &&
1357                     (!is_system_labeled() || admin_low_client ||
1358                     bldominates(clabel, slabel)))
1359                         resp->access |= ACCESS4_READ;
1360                 resp->supported |= ACCESS4_READ;
1361         }
1362         if ((args->access & ACCESS4_LOOKUP) && vp->v_type == VDIR) {
1363                 error = VOP_ACCESS(vp, VEXEC, 0, cr, NULL);
1364                 if (!error && (!is_system_labeled() || admin_low_client ||
1365                     bldominates(clabel, slabel)))
1366                         resp->access |= ACCESS4_LOOKUP;
1367                 resp->supported |= ACCESS4_LOOKUP;
1368         }
1369         if (checkwriteperm &&
1370             (args->access & (ACCESS4_MODIFY|ACCESS4_EXTEND))) {
1371                 error = VOP_ACCESS(vp, VWRITE, 0, cr, NULL);
1372                 if (!error && !MANDLOCK(vp, va.va_mode) &&
1373                     (!is_system_labeled() || admin_low_client ||
1374                     blequal(clabel, slabel)))
1375                         resp->access |=
1376                             (args->access & (ACCESS4_MODIFY | ACCESS4_EXTEND));
1377                 resp->supported |=
1378                     resp->access & (ACCESS4_MODIFY | ACCESS4_EXTEND);
1379         }
1380 
1381         if (checkwriteperm &&
1382             (args->access & ACCESS4_DELETE) && vp->v_type == VDIR) {
1383                 error = VOP_ACCESS(vp, VWRITE, 0, cr, NULL);
1384                 if (!error && (!is_system_labeled() || admin_low_client ||
1385                     blequal(clabel, slabel)))
1386                         resp->access |= ACCESS4_DELETE;
1387                 resp->supported |= ACCESS4_DELETE;
1388         }
1389         if (args->access & ACCESS4_EXECUTE && vp->v_type != VDIR) {
1390                 error = VOP_ACCESS(vp, VEXEC, 0, cr, NULL);
1391                 if (!error && !MANDLOCK(vp, va.va_mode) &&
1392                     (!is_system_labeled() || admin_low_client ||
1393                     bldominates(clabel, slabel)))
1394                         resp->access |= ACCESS4_EXECUTE;
1395                 resp->supported |= ACCESS4_EXECUTE;
1396         }
1397 
1398         if (is_system_labeled() && !admin_low_client)
1399                 label_rele(tslabel);
1400 
1401         *cs->statusp = resp->status = NFS4_OK;
1402 out:
1403         DTRACE_NFSV4_2(op__access__done, struct compound_state *, cs,
1404             ACCESS4res *, resp);
1405 }
1406 
1407 /* ARGSUSED */
1408 static void
1409 rfs4_op_commit(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1410     struct compound_state *cs)
1411 {
1412         COMMIT4args *args = &argop->nfs_argop4_u.opcommit;
1413         COMMIT4res *resp = &resop->nfs_resop4_u.opcommit;
1414         int error;
1415         vnode_t *vp = cs->vp;
1416         cred_t *cr = cs->cr;
1417         vattr_t va;
1418 
1419         DTRACE_NFSV4_2(op__commit__start, struct compound_state *, cs,
1420             COMMIT4args *, args);
1421 
1422         if (vp == NULL) {
1423                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
1424                 goto out;
1425         }
1426         if (cs->access == CS_ACCESS_DENIED) {
1427                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
1428                 goto out;
1429         }
1430 
1431         if (args->offset + args->count < args->offset) {
1432                 *cs->statusp = resp->status = NFS4ERR_INVAL;
1433                 goto out;
1434         }
1435 
1436         va.va_mask = AT_UID;
1437         error = VOP_GETATTR(vp, &va, 0, cr, NULL);
1438 
1439         /*
1440          * If we can't get the attributes, then we can't do the
1441          * right access checking.  So, we'll fail the request.
1442          */
1443         if (error) {
1444                 *cs->statusp = resp->status = puterrno4(error);
1445                 goto out;
1446         }
1447         if (rdonly4(req, cs)) {
1448                 *cs->statusp = resp->status = NFS4ERR_ROFS;
1449                 goto out;
1450         }
1451 
1452         if (vp->v_type != VREG) {
1453                 if (vp->v_type == VDIR)
1454                         resp->status = NFS4ERR_ISDIR;
1455                 else
1456                         resp->status = NFS4ERR_INVAL;
1457                 *cs->statusp = resp->status;
1458                 goto out;
1459         }
1460 
1461         if (crgetuid(cr) != va.va_uid &&
1462             (error = VOP_ACCESS(vp, VWRITE, 0, cs->cr, NULL))) {
1463                 *cs->statusp = resp->status = puterrno4(error);
1464                 goto out;
1465         }
1466 
1467         error = VOP_FSYNC(vp, FSYNC, cr, NULL);
1468 
1469         if (error) {
1470                 *cs->statusp = resp->status = puterrno4(error);
1471                 goto out;
1472         }
1473 
1474         *cs->statusp = resp->status = NFS4_OK;
1475         resp->writeverf = Write4verf;
1476 out:
1477         DTRACE_NFSV4_2(op__commit__done, struct compound_state *, cs,
1478             COMMIT4res *, resp);
1479 }
1480 
1481 /*
1482  * rfs4_op_mknod is called from rfs4_op_create after all initial verification
1483  * was completed. It does the nfsv4 create for special files.
1484  */
1485 /* ARGSUSED */
1486 static vnode_t *
1487 do_rfs4_op_mknod(CREATE4args *args, CREATE4res *resp, struct svc_req *req,
1488     struct compound_state *cs, vattr_t *vap, char *nm)
1489 {
1490         int error;
1491         cred_t *cr = cs->cr;
1492         vnode_t *dvp = cs->vp;
1493         vnode_t *vp = NULL;
1494         int mode;
1495         enum vcexcl excl;
1496 
1497         switch (args->type) {
1498         case NF4CHR:
1499         case NF4BLK:
1500                 if (secpolicy_sys_devices(cr) != 0) {
1501                         *cs->statusp = resp->status = NFS4ERR_PERM;
1502                         return (NULL);
1503                 }
1504                 if (args->type == NF4CHR)
1505                         vap->va_type = VCHR;
1506                 else
1507                         vap->va_type = VBLK;
1508                 vap->va_rdev = makedevice(args->ftype4_u.devdata.specdata1,
1509                     args->ftype4_u.devdata.specdata2);
1510                 vap->va_mask |= AT_RDEV;
1511                 break;
1512         case NF4SOCK:
1513                 vap->va_type = VSOCK;
1514                 break;
1515         case NF4FIFO:
1516                 vap->va_type = VFIFO;
1517                 break;
1518         default:
1519                 *cs->statusp = resp->status = NFS4ERR_BADTYPE;
1520                 return (NULL);
1521         }
1522 
1523         /*
1524          * Must specify the mode.
1525          */
1526         if (!(vap->va_mask & AT_MODE)) {
1527                 *cs->statusp = resp->status = NFS4ERR_INVAL;
1528                 return (NULL);
1529         }
1530 
1531         excl = EXCL;
1532 
1533         mode = 0;
1534 
1535         error = VOP_CREATE(dvp, nm, vap, excl, mode, &vp, cr, 0, NULL, NULL);
1536         if (error) {
1537                 *cs->statusp = resp->status = puterrno4(error);
1538                 return (NULL);
1539         }
1540         return (vp);
1541 }
1542 
1543 /*
1544  * nfsv4 create is used to create non-regular files. For regular files,
1545  * use nfsv4 open.
1546  */
1547 /* ARGSUSED */
1548 static void
1549 rfs4_op_create(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1550     struct compound_state *cs)
1551 {
1552         CREATE4args *args = &argop->nfs_argop4_u.opcreate;
1553         CREATE4res *resp = &resop->nfs_resop4_u.opcreate;
1554         int error;
1555         struct vattr bva, iva, iva2, ava, *vap;
1556         cred_t *cr = cs->cr;
1557         vnode_t *dvp = cs->vp;
1558         vnode_t *vp = NULL;
1559         vnode_t *realvp;
1560         char *nm, *lnm;
1561         uint_t len, llen;
1562         int syncval = 0;
1563         struct nfs4_svgetit_arg sarg;
1564         struct nfs4_ntov_table ntov;
1565         struct statvfs64 sb;
1566         nfsstat4 status;
1567         struct sockaddr *ca;
1568         char *name = NULL;
1569         char *lname = NULL;
1570 
1571         DTRACE_NFSV4_2(op__create__start, struct compound_state *, cs,
1572             CREATE4args *, args);
1573 
1574         resp->attrset = 0;
1575 
1576         if (dvp == NULL) {
1577                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
1578                 goto out;
1579         }
1580 
1581         /*
1582          * If there is an unshared filesystem mounted on this vnode,
1583          * do not allow to create an object in this directory.
1584          */
1585         if (vn_ismntpt(dvp)) {
1586                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
1587                 goto out;
1588         }
1589 
1590         /* Verify that type is correct */
1591         switch (args->type) {
1592         case NF4LNK:
1593         case NF4BLK:
1594         case NF4CHR:
1595         case NF4SOCK:
1596         case NF4FIFO:
1597         case NF4DIR:
1598                 break;
1599         default:
1600                 *cs->statusp = resp->status = NFS4ERR_BADTYPE;
1601                 goto out;
1602         };
1603 
1604         if (cs->access == CS_ACCESS_DENIED) {
1605                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
1606                 goto out;
1607         }
1608         if (dvp->v_type != VDIR) {
1609                 *cs->statusp = resp->status = NFS4ERR_NOTDIR;
1610                 goto out;
1611         }
1612         status = utf8_dir_verify(&args->objname);
1613         if (status != NFS4_OK) {
1614                 *cs->statusp = resp->status = status;
1615                 goto out;
1616         }
1617 
1618         if (rdonly4(req, cs)) {
1619                 *cs->statusp = resp->status = NFS4ERR_ROFS;
1620                 goto out;
1621         }
1622 
1623         /*
1624          * Name of newly created object
1625          */
1626         nm = utf8_to_fn(&args->objname, &len, NULL);
1627         if (nm == NULL) {
1628                 *cs->statusp = resp->status = NFS4ERR_INVAL;
1629                 goto out;
1630         }
1631 
1632         if (len > MAXNAMELEN) {
1633                 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
1634                 kmem_free(nm, len);
1635                 goto out;
1636         }
1637 
1638         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
1639         name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
1640             MAXPATHLEN  + 1);
1641 
1642         if (name == NULL) {
1643                 *cs->statusp = resp->status = NFS4ERR_INVAL;
1644                 kmem_free(nm, len);
1645                 goto out;
1646         }
1647 
1648         resp->attrset = 0;
1649 
1650         sarg.sbp = &sb;
1651         sarg.is_referral = B_FALSE;
1652         nfs4_ntov_table_init(&ntov);
1653 
1654         status = do_rfs4_set_attrs(&resp->attrset,
1655             &args->createattrs, cs, &sarg, &ntov, NFS4ATTR_SETIT);
1656 
1657         if (sarg.vap->va_mask == 0 && status == NFS4_OK)
1658                 status = NFS4ERR_INVAL;
1659 
1660         if (status != NFS4_OK) {
1661                 *cs->statusp = resp->status = status;
1662                 if (name != nm)
1663                         kmem_free(name, MAXPATHLEN + 1);
1664                 kmem_free(nm, len);
1665                 nfs4_ntov_table_free(&ntov, &sarg);
1666                 resp->attrset = 0;
1667                 goto out;
1668         }
1669 
1670         /* Get "before" change value */
1671         bva.va_mask = AT_CTIME|AT_SEQ|AT_MODE;
1672         error = VOP_GETATTR(dvp, &bva, 0, cr, NULL);
1673         if (error) {
1674                 *cs->statusp = resp->status = puterrno4(error);
1675                 if (name != nm)
1676                         kmem_free(name, MAXPATHLEN + 1);
1677                 kmem_free(nm, len);
1678                 nfs4_ntov_table_free(&ntov, &sarg);
1679                 resp->attrset = 0;
1680                 goto out;
1681         }
1682         NFS4_SET_FATTR4_CHANGE(resp->cinfo.before, bva.va_ctime)
1683 
1684         vap = sarg.vap;
1685 
1686         /*
1687          * Set the default initial values for attributes when the parent
1688          * directory does not have the VSUID/VSGID bit set and they have
1689          * not been specified in createattrs.
1690          */
1691         if (!(bva.va_mode & VSUID) && (vap->va_mask & AT_UID) == 0) {
1692                 vap->va_uid = crgetuid(cr);
1693                 vap->va_mask |= AT_UID;
1694         }
1695         if (!(bva.va_mode & VSGID) && (vap->va_mask & AT_GID) == 0) {
1696                 vap->va_gid = crgetgid(cr);
1697                 vap->va_mask |= AT_GID;
1698         }
1699 
1700         vap->va_mask |= AT_TYPE;
1701         switch (args->type) {
1702         case NF4DIR:
1703                 vap->va_type = VDIR;
1704                 if ((vap->va_mask & AT_MODE) == 0) {
1705                         vap->va_mode = 0700; /* default: owner rwx only */
1706                         vap->va_mask |= AT_MODE;
1707                 }
1708                 error = VOP_MKDIR(dvp, name, vap, &vp, cr, NULL, 0, NULL);
1709                 if (error)
1710                         break;
1711 
1712                 /*
1713                  * Get the initial "after" sequence number, if it fails,
1714                  * set to zero
1715                  */
1716                 iva.va_mask = AT_SEQ;
1717                 if (VOP_GETATTR(dvp, &iva, 0, cs->cr, NULL))
1718                         iva.va_seq = 0;
1719                 break;
1720         case NF4LNK:
1721                 vap->va_type = VLNK;
1722                 if ((vap->va_mask & AT_MODE) == 0) {
1723                         vap->va_mode = 0700; /* default: owner rwx only */
1724                         vap->va_mask |= AT_MODE;
1725                 }
1726 
1727                 /*
1728                  * symlink names must be treated as data
1729                  */
1730                 lnm = utf8_to_str((utf8string *)&args->ftype4_u.linkdata,
1731                     &llen, NULL);
1732 
1733                 if (lnm == NULL) {
1734                         *cs->statusp = resp->status = NFS4ERR_INVAL;
1735                         if (name != nm)
1736                                 kmem_free(name, MAXPATHLEN + 1);
1737                         kmem_free(nm, len);
1738                         nfs4_ntov_table_free(&ntov, &sarg);
1739                         resp->attrset = 0;
1740                         goto out;
1741                 }
1742 
1743                 if (llen > MAXPATHLEN) {
1744                         *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
1745                         if (name != nm)
1746                                 kmem_free(name, MAXPATHLEN + 1);
1747                         kmem_free(nm, len);
1748                         kmem_free(lnm, llen);
1749                         nfs4_ntov_table_free(&ntov, &sarg);
1750                         resp->attrset = 0;
1751                         goto out;
1752                 }
1753 
1754                 lname = nfscmd_convname(ca, cs->exi, lnm,
1755                     NFSCMD_CONV_INBOUND, MAXPATHLEN  + 1);
1756 
1757                 if (lname == NULL) {
1758                         *cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
1759                         if (name != nm)
1760                                 kmem_free(name, MAXPATHLEN + 1);
1761                         kmem_free(nm, len);
1762                         kmem_free(lnm, llen);
1763                         nfs4_ntov_table_free(&ntov, &sarg);
1764                         resp->attrset = 0;
1765                         goto out;
1766                 }
1767 
1768                 error = VOP_SYMLINK(dvp, name, vap, lname, cr, NULL, 0);
1769                 if (lname != lnm)
1770                         kmem_free(lname, MAXPATHLEN + 1);
1771                 kmem_free(lnm, llen);
1772                 if (error)
1773                         break;
1774 
1775                 /*
1776                  * Get the initial "after" sequence number, if it fails,
1777                  * set to zero
1778                  */
1779                 iva.va_mask = AT_SEQ;
1780                 if (VOP_GETATTR(dvp, &iva, 0, cs->cr, NULL))
1781                         iva.va_seq = 0;
1782 
1783                 error = VOP_LOOKUP(dvp, name, &vp, NULL, 0, NULL, cr,
1784                     NULL, NULL, NULL);
1785                 if (error)
1786                         break;
1787 
1788                 /*
1789                  * va_seq is not safe over VOP calls, check it again
1790                  * if it has changed zero out iva to force atomic = FALSE.
1791                  */
1792                 iva2.va_mask = AT_SEQ;
1793                 if (VOP_GETATTR(dvp, &iva2, 0, cs->cr, NULL) ||
1794                     iva2.va_seq != iva.va_seq)
1795                         iva.va_seq = 0;
1796                 break;
1797         default:
1798                 /*
1799                  * probably a special file.
1800                  */
1801                 if ((vap->va_mask & AT_MODE) == 0) {
1802                         vap->va_mode = 0600; /* default: owner rw only */
1803                         vap->va_mask |= AT_MODE;
1804                 }
1805                 syncval = FNODSYNC;
1806                 /*
1807                  * We know this will only generate one VOP call
1808                  */
1809                 vp = do_rfs4_op_mknod(args, resp, req, cs, vap, name);
1810 
1811                 if (vp == NULL) {
1812                         if (name != nm)
1813                                 kmem_free(name, MAXPATHLEN + 1);
1814                         kmem_free(nm, len);
1815                         nfs4_ntov_table_free(&ntov, &sarg);
1816                         resp->attrset = 0;
1817                         goto out;
1818                 }
1819 
1820                 /*
1821                  * Get the initial "after" sequence number, if it fails,
1822                  * set to zero
1823                  */
1824                 iva.va_mask = AT_SEQ;
1825                 if (VOP_GETATTR(dvp, &iva, 0, cs->cr, NULL))
1826                         iva.va_seq = 0;
1827 
1828                 break;
1829         }
1830         if (name != nm)
1831                 kmem_free(name, MAXPATHLEN + 1);
1832         kmem_free(nm, len);
1833 
1834         if (error) {
1835                 *cs->statusp = resp->status = puterrno4(error);
1836         }
1837 
1838         /*
1839          * Force modified data and metadata out to stable storage.
1840          */
1841         (void) VOP_FSYNC(dvp, 0, cr, NULL);
1842 
1843         if (resp->status != NFS4_OK) {
1844                 if (vp != NULL)
1845                         VN_RELE(vp);
1846                 nfs4_ntov_table_free(&ntov, &sarg);
1847                 resp->attrset = 0;
1848                 goto out;
1849         }
1850 
1851         /*
1852          * Finish setup of cinfo response, "before" value already set.
1853          * Get "after" change value, if it fails, simply return the
1854          * before value.
1855          */
1856         ava.va_mask = AT_CTIME|AT_SEQ;
1857         if (VOP_GETATTR(dvp, &ava, 0, cr, NULL)) {
1858                 ava.va_ctime = bva.va_ctime;
1859                 ava.va_seq = 0;
1860         }
1861         NFS4_SET_FATTR4_CHANGE(resp->cinfo.after, ava.va_ctime);
1862 
1863         /*
1864          * True verification that object was created with correct
1865          * attrs is impossible.  The attrs could have been changed
1866          * immediately after object creation.  If attributes did
1867          * not verify, the only recourse for the server is to
1868          * destroy the object.  Maybe if some attrs (like gid)
1869          * are set incorrectly, the object should be destroyed;
1870          * however, seems bad as a default policy.  Do we really
1871          * want to destroy an object over one of the times not
1872          * verifying correctly?  For these reasons, the server
1873          * currently sets bits in attrset for createattrs
1874          * that were set; however, no verification is done.
1875          *
1876          * vmask_to_nmask accounts for vattr bits set on create
1877          *      [do_rfs4_set_attrs() only sets resp bits for
1878          *       non-vattr/vfs bits.]
1879          * Mask off any bits set by default so as not to return
1880          * more attrset bits than were requested in createattrs
1881          */
1882         nfs4_vmask_to_nmask(sarg.vap->va_mask, &resp->attrset);
1883         resp->attrset &= args->createattrs.attrmask;
1884         nfs4_ntov_table_free(&ntov, &sarg);
1885 
1886         error = makefh4(&cs->fh, vp, cs->exi);
1887         if (error) {
1888                 *cs->statusp = resp->status = puterrno4(error);
1889         }
1890 
1891         /*
1892          * The cinfo.atomic = TRUE only if we got no errors, we have
1893          * non-zero va_seq's, and it has incremented by exactly one
1894          * during the creation and it didn't change during the VOP_LOOKUP
1895          * or VOP_FSYNC.
1896          */
1897         if (!error && bva.va_seq && iva.va_seq && ava.va_seq &&
1898             iva.va_seq == (bva.va_seq + 1) && iva.va_seq == ava.va_seq)
1899                 resp->cinfo.atomic = TRUE;
1900         else
1901                 resp->cinfo.atomic = FALSE;
1902 
1903         /*
1904          * Force modified metadata out to stable storage.
1905          *
1906          * if a underlying vp exists, pass it to VOP_FSYNC
1907          */
1908         if (VOP_REALVP(vp, &realvp, NULL) == 0)
1909                 (void) VOP_FSYNC(realvp, syncval, cr, NULL);
1910         else
1911                 (void) VOP_FSYNC(vp, syncval, cr, NULL);
1912 
1913         if (resp->status != NFS4_OK) {
1914                 VN_RELE(vp);
1915                 goto out;
1916         }
1917         if (cs->vp)
1918                 VN_RELE(cs->vp);
1919 
1920         cs->vp = vp;
1921         *cs->statusp = resp->status = NFS4_OK;
1922 out:
1923         DTRACE_NFSV4_2(op__create__done, struct compound_state *, cs,
1924             CREATE4res *, resp);
1925 }
1926 
1927 /*ARGSUSED*/
1928 static void
1929 rfs4_op_delegpurge(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1930     struct compound_state *cs)
1931 {
1932         DTRACE_NFSV4_2(op__delegpurge__start, struct compound_state *, cs,
1933             DELEGPURGE4args *, &argop->nfs_argop4_u.opdelegpurge);
1934 
1935         rfs4_op_inval(argop, resop, req, cs);
1936 
1937         DTRACE_NFSV4_2(op__delegpurge__done, struct compound_state *, cs,
1938             DELEGPURGE4res *, &resop->nfs_resop4_u.opdelegpurge);
1939 }
1940 
1941 /*ARGSUSED*/
1942 static void
1943 rfs4_op_delegreturn(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1944     struct compound_state *cs)
1945 {
1946         DELEGRETURN4args *args = &argop->nfs_argop4_u.opdelegreturn;
1947         DELEGRETURN4res *resp = &resop->nfs_resop4_u.opdelegreturn;
1948         rfs4_deleg_state_t *dsp;
1949         nfsstat4 status;
1950 
1951         DTRACE_NFSV4_2(op__delegreturn__start, struct compound_state *, cs,
1952             DELEGRETURN4args *, args);
1953 
1954         status = rfs4_get_deleg_state(&args->deleg_stateid, &dsp);
1955         resp->status = *cs->statusp = status;
1956         if (status != NFS4_OK)
1957                 goto out;
1958 
1959         /* Ensure specified filehandle matches */
1960         if (cs->vp != dsp->rds_finfo->rf_vp) {
1961                 resp->status = *cs->statusp = NFS4ERR_BAD_STATEID;
1962         } else
1963                 rfs4_return_deleg(dsp, FALSE);
1964 
1965         rfs4_update_lease(dsp->rds_client);
1966 
1967         rfs4_deleg_state_rele(dsp);
1968 out:
1969         DTRACE_NFSV4_2(op__delegreturn__done, struct compound_state *, cs,
1970             DELEGRETURN4res *, resp);
1971 }
1972 
1973 /*
1974  * Check to see if a given "flavor" is an explicitly shared flavor.
1975  * The assumption of this routine is the "flavor" is already a valid
1976  * flavor in the secinfo list of "exi".
1977  *
1978  *      e.g.
1979  *              # share -o sec=flavor1 /export
1980  *              # share -o sec=flavor2 /export/home
1981  *
1982  *              flavor2 is not an explicitly shared flavor for /export,
1983  *              however it is in the secinfo list for /export thru the
1984  *              server namespace setup.
1985  */
1986 int
1987 is_exported_sec(int flavor, struct exportinfo *exi)
1988 {
1989         int     i;
1990         struct secinfo *sp;
1991 
1992         sp = exi->exi_export.ex_secinfo;
1993         for (i = 0; i < exi->exi_export.ex_seccnt; i++) {
1994                 if (flavor == sp[i].s_secinfo.sc_nfsnum ||
1995                     sp[i].s_secinfo.sc_nfsnum == AUTH_NONE) {
1996                         return (SEC_REF_EXPORTED(&sp[i]));
1997                 }
1998         }
1999 
2000         /* Should not reach this point based on the assumption */
2001         return (0);
2002 }
2003 
2004 /*
2005  * Check if the security flavor used in the request matches what is
2006  * required at the export point or at the root pseudo node (exi_root).
2007  *
2008  * returns 1 if there's a match or if exported with AUTH_NONE; 0 otherwise.
2009  *
2010  */
2011 static int
2012 secinfo_match_or_authnone(struct compound_state *cs)
2013 {
2014         int     i;
2015         struct secinfo *sp;
2016 
2017         /*
2018          * Check cs->nfsflavor (from the request) against
2019          * the current export data in cs->exi.
2020          */
2021         sp = cs->exi->exi_export.ex_secinfo;
2022         for (i = 0; i < cs->exi->exi_export.ex_seccnt; i++) {
2023                 if (cs->nfsflavor == sp[i].s_secinfo.sc_nfsnum ||
2024                     sp[i].s_secinfo.sc_nfsnum == AUTH_NONE)
2025                         return (1);
2026         }
2027 
2028         return (0);
2029 }
2030 
2031 /*
2032  * Check the access authority for the client and return the correct error.
2033  */
2034 nfsstat4
2035 call_checkauth4(struct compound_state *cs, struct svc_req *req)
2036 {
2037         int     authres;
2038 
2039         /*
2040          * First, check if the security flavor used in the request
2041          * are among the flavors set in the server namespace.
2042          */
2043         if (!secinfo_match_or_authnone(cs)) {
2044                 *cs->statusp = NFS4ERR_WRONGSEC;
2045                 return (*cs->statusp);
2046         }
2047 
2048         authres = checkauth4(cs, req);
2049 
2050         if (authres > 0) {
2051                 *cs->statusp = NFS4_OK;
2052                 if (! (cs->access & CS_ACCESS_LIMITED))
2053                         cs->access = CS_ACCESS_OK;
2054         } else if (authres == 0) {
2055                 *cs->statusp = NFS4ERR_ACCESS;
2056         } else if (authres == -2) {
2057                 *cs->statusp = NFS4ERR_WRONGSEC;
2058         } else {
2059                 *cs->statusp = NFS4ERR_DELAY;
2060         }
2061         return (*cs->statusp);
2062 }
2063 
2064 /*
2065  * bitmap4_to_attrmask is called by getattr and readdir.
2066  * It sets up the vattr mask and determines whether vfsstat call is needed
2067  * based on the input bitmap.
2068  * Returns nfsv4 status.
2069  */
2070 static nfsstat4
2071 bitmap4_to_attrmask(bitmap4 breq, struct nfs4_svgetit_arg *sargp)
2072 {
2073         int i;
2074         uint_t  va_mask;
2075         struct statvfs64 *sbp = sargp->sbp;
2076 
2077         sargp->sbp = NULL;
2078         sargp->flag = 0;
2079         sargp->rdattr_error = NFS4_OK;
2080         sargp->mntdfid_set = FALSE;
2081         if (sargp->cs->vp)
2082                 sargp->xattr = get_fh4_flag(&sargp->cs->fh,
2083                     FH4_ATTRDIR | FH4_NAMEDATTR);
2084         else
2085                 sargp->xattr = 0;
2086 
2087         /*
2088          * Set rdattr_error_req to true if return error per
2089          * failed entry rather than fail the readdir.
2090          */
2091         if (breq & FATTR4_RDATTR_ERROR_MASK)
2092                 sargp->rdattr_error_req = 1;
2093         else
2094                 sargp->rdattr_error_req = 0;
2095 
2096         /*
2097          * generate the va_mask
2098          * Handle the easy cases first
2099          */
2100         switch (breq) {
2101         case NFS4_NTOV_ATTR_MASK:
2102                 sargp->vap->va_mask = NFS4_NTOV_ATTR_AT_MASK;
2103                 return (NFS4_OK);
2104 
2105         case NFS4_FS_ATTR_MASK:
2106                 sargp->vap->va_mask = NFS4_FS_ATTR_AT_MASK;
2107                 sargp->sbp = sbp;
2108                 return (NFS4_OK);
2109 
2110         case NFS4_NTOV_ATTR_CACHE_MASK:
2111                 sargp->vap->va_mask = NFS4_NTOV_ATTR_CACHE_AT_MASK;
2112                 return (NFS4_OK);
2113 
2114         case FATTR4_LEASE_TIME_MASK:
2115                 sargp->vap->va_mask = 0;
2116                 return (NFS4_OK);
2117 
2118         default:
2119                 va_mask = 0;
2120                 for (i = 0; i < nfs4_ntov_map_size; i++) {
2121                         if ((breq & nfs4_ntov_map[i].fbit) &&
2122                             nfs4_ntov_map[i].vbit)
2123                                 va_mask |= nfs4_ntov_map[i].vbit;
2124                 }
2125 
2126                 /*
2127                  * Check is vfsstat is needed
2128                  */
2129                 if (breq & NFS4_FS_ATTR_MASK)
2130                         sargp->sbp = sbp;
2131 
2132                 sargp->vap->va_mask = va_mask;
2133                 return (NFS4_OK);
2134         }
2135         /* NOTREACHED */
2136 }
2137 
2138 /*
2139  * bitmap4_get_sysattrs is called by getattr and readdir.
2140  * It calls both VOP_GETATTR and VFS_STATVFS calls to get the attrs.
2141  * Returns nfsv4 status.
2142  */
2143 static nfsstat4
2144 bitmap4_get_sysattrs(struct nfs4_svgetit_arg *sargp)
2145 {
2146         int error;
2147         struct compound_state *cs = sargp->cs;
2148         vnode_t *vp = cs->vp;
2149 
2150         if (sargp->sbp != NULL) {
2151                 if (error = VFS_STATVFS(vp->v_vfsp, sargp->sbp)) {
2152                         sargp->sbp = NULL;   /* to identify error */
2153                         return (puterrno4(error));
2154                 }
2155         }
2156 
2157         return (rfs4_vop_getattr(vp, sargp->vap, 0, cs->cr));
2158 }
2159 
2160 static void
2161 nfs4_ntov_table_init(struct nfs4_ntov_table *ntovp)
2162 {
2163         ntovp->na = kmem_zalloc(sizeof (union nfs4_attr_u) * nfs4_ntov_map_size,
2164             KM_SLEEP);
2165         ntovp->attrcnt = 0;
2166         ntovp->vfsstat = FALSE;
2167 }
2168 
2169 static void
2170 nfs4_ntov_table_free(struct nfs4_ntov_table *ntovp,
2171     struct nfs4_svgetit_arg *sargp)
2172 {
2173         int i;
2174         union nfs4_attr_u *na;
2175         uint8_t *amap;
2176 
2177         /*
2178          * XXX Should do the same checks for whether the bit is set
2179          */
2180         for (i = 0, na = ntovp->na, amap = ntovp->amap;
2181             i < ntovp->attrcnt; i++, na++, amap++) {
2182                 (void) (*nfs4_ntov_map[*amap].sv_getit)(
2183                     NFS4ATTR_FREEIT, sargp, na);
2184         }
2185         if ((sargp->op == NFS4ATTR_SETIT) || (sargp->op == NFS4ATTR_VERIT)) {
2186                 /*
2187                  * xdr_free for getattr will be done later
2188                  */
2189                 for (i = 0, na = ntovp->na, amap = ntovp->amap;
2190                     i < ntovp->attrcnt; i++, na++, amap++) {
2191                         xdr_free(nfs4_ntov_map[*amap].xfunc, (caddr_t)na);
2192                 }
2193         }
2194         kmem_free(ntovp->na, sizeof (union nfs4_attr_u) * nfs4_ntov_map_size);
2195 }
2196 
2197 /*
2198  * do_rfs4_op_getattr gets the system attrs and converts into fattr4.
2199  */
2200 static nfsstat4
2201 do_rfs4_op_getattr(bitmap4 breq, fattr4 *fattrp,
2202     struct nfs4_svgetit_arg *sargp)
2203 {
2204         int error = 0;
2205         int i, k;
2206         struct nfs4_ntov_table ntov;
2207         XDR xdr;
2208         ulong_t xdr_size;
2209         char *xdr_attrs;
2210         nfsstat4 status = NFS4_OK;
2211         nfsstat4 prev_rdattr_error = sargp->rdattr_error;
2212         union nfs4_attr_u *na;
2213         uint8_t *amap;
2214 
2215         sargp->op = NFS4ATTR_GETIT;
2216         sargp->flag = 0;
2217 
2218         fattrp->attrmask = 0;
2219         /* if no bits requested, then return empty fattr4 */
2220         if (breq == 0) {
2221                 fattrp->attrlist4_len = 0;
2222                 fattrp->attrlist4 = NULL;
2223                 return (NFS4_OK);
2224         }
2225 
2226         /*
2227          * return NFS4ERR_INVAL when client requests write-only attrs
2228          */
2229         if (breq & (FATTR4_TIME_ACCESS_SET_MASK | FATTR4_TIME_MODIFY_SET_MASK))
2230                 return (NFS4ERR_INVAL);
2231 
2232         nfs4_ntov_table_init(&ntov);
2233         na = ntov.na;
2234         amap = ntov.amap;
2235 
2236         /*
2237          * Now loop to get or verify the attrs
2238          */
2239         for (i = 0; i < nfs4_ntov_map_size; i++) {
2240                 if (breq & nfs4_ntov_map[i].fbit) {
2241                         if ((*nfs4_ntov_map[i].sv_getit)(
2242                             NFS4ATTR_SUPPORTED, sargp, NULL) == 0) {
2243 
2244                                 error = (*nfs4_ntov_map[i].sv_getit)(
2245                                     NFS4ATTR_GETIT, sargp, na);
2246 
2247                                 /*
2248                                  * Possible error values:
2249                                  * >0 if sv_getit failed to
2250                                  * get the attr; 0 if succeeded;
2251                                  * <0 if rdattr_error and the
2252                                  * attribute cannot be returned.
2253                                  */
2254                                 if (error && !(sargp->rdattr_error_req))
2255                                         goto done;
2256                                 /*
2257                                  * If error then just for entry
2258                                  */
2259                                 if (error == 0) {
2260                                         fattrp->attrmask |=
2261                                             nfs4_ntov_map[i].fbit;
2262                                         *amap++ =
2263                                             (uint8_t)nfs4_ntov_map[i].nval;
2264                                         na++;
2265                                         (ntov.attrcnt)++;
2266                                 } else if ((error > 0) &&
2267                                     (sargp->rdattr_error == NFS4_OK)) {
2268                                         sargp->rdattr_error = puterrno4(error);
2269                                 }
2270                                 error = 0;
2271                         }
2272                 }
2273         }
2274 
2275         /*
2276          * If rdattr_error was set after the return value for it was assigned,
2277          * update it.
2278          */
2279         if (prev_rdattr_error != sargp->rdattr_error) {
2280                 na = ntov.na;
2281                 amap = ntov.amap;
2282                 for (i = 0; i < ntov.attrcnt; i++, na++, amap++) {
2283                         k = *amap;
2284                         if (k < FATTR4_RDATTR_ERROR) {
2285                                 continue;
2286                         }
2287                         if ((k == FATTR4_RDATTR_ERROR) &&
2288                             ((*nfs4_ntov_map[k].sv_getit)(
2289                             NFS4ATTR_SUPPORTED, sargp, NULL) == 0)) {
2290 
2291                                 (void) (*nfs4_ntov_map[k].sv_getit)(
2292                                     NFS4ATTR_GETIT, sargp, na);
2293                         }
2294                         break;
2295                 }
2296         }
2297 
2298         xdr_size = 0;
2299         na = ntov.na;
2300         amap = ntov.amap;
2301         for (i = 0; i < ntov.attrcnt; i++, na++, amap++) {
2302                 xdr_size += xdr_sizeof(nfs4_ntov_map[*amap].xfunc, na);
2303         }
2304 
2305         fattrp->attrlist4_len = xdr_size;
2306         if (xdr_size) {
2307                 /* freed by rfs4_op_getattr_free() */
2308                 fattrp->attrlist4 = xdr_attrs = kmem_zalloc(xdr_size, KM_SLEEP);
2309 
2310                 xdrmem_create(&xdr, xdr_attrs, xdr_size, XDR_ENCODE);
2311 
2312                 na = ntov.na;
2313                 amap = ntov.amap;
2314                 for (i = 0; i < ntov.attrcnt; i++, na++, amap++) {
2315                         if (!(*nfs4_ntov_map[*amap].xfunc)(&xdr, na)) {
2316                                 DTRACE_PROBE1(nfss__e__getattr4_encfail,
2317                                     int, *amap);
2318                                 status = NFS4ERR_SERVERFAULT;
2319                                 break;
2320                         }
2321                 }
2322                 /* xdrmem_destroy(&xdrs); */        /* NO-OP */
2323         } else {
2324                 fattrp->attrlist4 = NULL;
2325         }
2326 done:
2327 
2328         nfs4_ntov_table_free(&ntov, sargp);
2329 
2330         if (error != 0)
2331                 status = puterrno4(error);
2332 
2333         return (status);
2334 }
2335 
2336 /* ARGSUSED */
2337 static void
2338 rfs4_op_getattr(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
2339     struct compound_state *cs)
2340 {
2341         GETATTR4args *args = &argop->nfs_argop4_u.opgetattr;
2342         GETATTR4res *resp = &resop->nfs_resop4_u.opgetattr;
2343         struct nfs4_svgetit_arg sarg;
2344         struct statvfs64 sb;
2345         nfsstat4 status;
2346 
2347         DTRACE_NFSV4_2(op__getattr__start, struct compound_state *, cs,
2348             GETATTR4args *, args);
2349 
2350         if (cs->vp == NULL) {
2351                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2352                 goto out;
2353         }
2354 
2355         if (cs->access == CS_ACCESS_DENIED) {
2356                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
2357                 goto out;
2358         }
2359 
2360         sarg.sbp = &sb;
2361         sarg.cs = cs;
2362         sarg.is_referral = B_FALSE;
2363 
2364         status = bitmap4_to_attrmask(args->attr_request, &sarg);
2365         if (status == NFS4_OK) {
2366 
2367                 status = bitmap4_get_sysattrs(&sarg);
2368                 if (status == NFS4_OK) {
2369 
2370                         /* Is this a referral? */
2371                         if (vn_is_nfs_reparse(cs->vp, cs->cr)) {
2372                                 /* Older V4 Solaris client sees a link */
2373                                 if (client_is_downrev(req))
2374                                         sarg.vap->va_type = VLNK;
2375                                 else
2376                                         sarg.is_referral = B_TRUE;
2377                         }
2378 
2379                         status = do_rfs4_op_getattr(args->attr_request,
2380                             &resp->obj_attributes, &sarg);
2381                 }
2382         }
2383         *cs->statusp = resp->status = status;
2384 out:
2385         DTRACE_NFSV4_2(op__getattr__done, struct compound_state *, cs,
2386             GETATTR4res *, resp);
2387 }
2388 
2389 static void
2390 rfs4_op_getattr_free(nfs_resop4 *resop)
2391 {
2392         GETATTR4res *resp = &resop->nfs_resop4_u.opgetattr;
2393 
2394         nfs4_fattr4_free(&resp->obj_attributes);
2395 }
2396 
2397 /* ARGSUSED */
2398 static void
2399 rfs4_op_getfh(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
2400     struct compound_state *cs)
2401 {
2402         GETFH4res *resp = &resop->nfs_resop4_u.opgetfh;
2403 
2404         DTRACE_NFSV4_1(op__getfh__start, struct compound_state *, cs);
2405 
2406         if (cs->vp == NULL) {
2407                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2408                 goto out;
2409         }
2410         if (cs->access == CS_ACCESS_DENIED) {
2411                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
2412                 goto out;
2413         }
2414 
2415         /* check for reparse point at the share point */
2416         if (cs->exi->exi_moved || vn_is_nfs_reparse(cs->exi->exi_vp, cs->cr)) {
2417                 /* it's all bad */
2418                 cs->exi->exi_moved = 1;
2419                 *cs->statusp = resp->status = NFS4ERR_MOVED;
2420                 DTRACE_PROBE2(nfs4serv__func__referral__shared__moved,
2421                     vnode_t *, cs->vp, char *, "rfs4_op_getfh");
2422                 return;
2423         }
2424 
2425         /* check for reparse point at vp */
2426         if (vn_is_nfs_reparse(cs->vp, cs->cr) && !client_is_downrev(req)) {
2427                 /* it's not all bad */
2428                 *cs->statusp = resp->status = NFS4ERR_MOVED;
2429                 DTRACE_PROBE2(nfs4serv__func__referral__moved,
2430                     vnode_t *, cs->vp, char *, "rfs4_op_getfh");
2431                 return;
2432         }
2433 
2434         resp->object.nfs_fh4_val =
2435             kmem_alloc(cs->fh.nfs_fh4_len, KM_SLEEP);
2436         nfs_fh4_copy(&cs->fh, &resp->object);
2437         *cs->statusp = resp->status = NFS4_OK;
2438 out:
2439         DTRACE_NFSV4_2(op__getfh__done, struct compound_state *, cs,
2440             GETFH4res *, resp);
2441 }
2442 
2443 static void
2444 rfs4_op_getfh_free(nfs_resop4 *resop)
2445 {
2446         GETFH4res *resp = &resop->nfs_resop4_u.opgetfh;
2447 
2448         if (resp->status == NFS4_OK &&
2449             resp->object.nfs_fh4_val != NULL) {
2450                 kmem_free(resp->object.nfs_fh4_val, resp->object.nfs_fh4_len);
2451                 resp->object.nfs_fh4_val = NULL;
2452                 resp->object.nfs_fh4_len = 0;
2453         }
2454 }
2455 
2456 /*
2457  * illegal: args: void
2458  *          res : status (NFS4ERR_OP_ILLEGAL)
2459  */
2460 /* ARGSUSED */
2461 static void
2462 rfs4_op_illegal(nfs_argop4 *argop, nfs_resop4 *resop,
2463     struct svc_req *req, struct compound_state *cs)
2464 {
2465         ILLEGAL4res *resp = &resop->nfs_resop4_u.opillegal;
2466 
2467         resop->resop = OP_ILLEGAL;
2468         *cs->statusp = resp->status = NFS4ERR_OP_ILLEGAL;
2469 }
2470 
2471 /*
2472  * link: args: SAVED_FH: file, CURRENT_FH: target directory
2473  *       res: status. If success - CURRENT_FH unchanged, return change_info
2474  */
2475 /* ARGSUSED */
2476 static void
2477 rfs4_op_link(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
2478     struct compound_state *cs)
2479 {
2480         LINK4args *args = &argop->nfs_argop4_u.oplink;
2481         LINK4res *resp = &resop->nfs_resop4_u.oplink;
2482         int error;
2483         vnode_t *vp;
2484         vnode_t *dvp;
2485         struct vattr bdva, idva, adva;
2486         char *nm;
2487         uint_t  len;
2488         struct sockaddr *ca;
2489         char *name = NULL;
2490         nfsstat4 status;
2491 
2492         DTRACE_NFSV4_2(op__link__start, struct compound_state *, cs,
2493             LINK4args *, args);
2494 
2495         /* SAVED_FH: source object */
2496         vp = cs->saved_vp;
2497         if (vp == NULL) {
2498                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2499                 goto out;
2500         }
2501 
2502         /* CURRENT_FH: target directory */
2503         dvp = cs->vp;
2504         if (dvp == NULL) {
2505                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2506                 goto out;
2507         }
2508 
2509         /*
2510          * If there is a non-shared filesystem mounted on this vnode,
2511          * do not allow to link any file in this directory.
2512          */
2513         if (vn_ismntpt(dvp)) {
2514                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
2515                 goto out;
2516         }
2517 
2518         if (cs->access == CS_ACCESS_DENIED) {
2519                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
2520                 goto out;
2521         }
2522 
2523         /* Check source object's type validity */
2524         if (vp->v_type == VDIR) {
2525                 *cs->statusp = resp->status = NFS4ERR_ISDIR;
2526                 goto out;
2527         }
2528 
2529         /* Check target directory's type */
2530         if (dvp->v_type != VDIR) {
2531                 *cs->statusp = resp->status = NFS4ERR_NOTDIR;
2532                 goto out;
2533         }
2534 
2535         if (cs->saved_exi != cs->exi) {
2536                 *cs->statusp = resp->status = NFS4ERR_XDEV;
2537                 goto out;
2538         }
2539 
2540         status = utf8_dir_verify(&args->newname);
2541         if (status != NFS4_OK) {
2542                 *cs->statusp = resp->status = status;
2543                 goto out;
2544         }
2545 
2546         nm = utf8_to_fn(&args->newname, &len, NULL);
2547         if (nm == NULL) {
2548                 *cs->statusp = resp->status = NFS4ERR_INVAL;
2549                 goto out;
2550         }
2551 
2552         if (len > MAXNAMELEN) {
2553                 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
2554                 kmem_free(nm, len);
2555                 goto out;
2556         }
2557 
2558         if (rdonly4(req, cs)) {
2559                 *cs->statusp = resp->status = NFS4ERR_ROFS;
2560                 kmem_free(nm, len);
2561                 goto out;
2562         }
2563 
2564         /* Get "before" change value */
2565         bdva.va_mask = AT_CTIME|AT_SEQ;
2566         error = VOP_GETATTR(dvp, &bdva, 0, cs->cr, NULL);
2567         if (error) {
2568                 *cs->statusp = resp->status = puterrno4(error);
2569                 kmem_free(nm, len);
2570                 goto out;
2571         }
2572 
2573         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2574         name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
2575             MAXPATHLEN  + 1);
2576 
2577         if (name == NULL) {
2578                 *cs->statusp = resp->status = NFS4ERR_INVAL;
2579                 kmem_free(nm, len);
2580                 goto out;
2581         }
2582 
2583         NFS4_SET_FATTR4_CHANGE(resp->cinfo.before, bdva.va_ctime)
2584 
2585         error = VOP_LINK(dvp, vp, name, cs->cr, NULL, 0);
2586 
2587         if (nm != name)
2588                 kmem_free(name, MAXPATHLEN + 1);
2589         kmem_free(nm, len);
2590 
2591         /*
2592          * Get the initial "after" sequence number, if it fails, set to zero
2593          */
2594         idva.va_mask = AT_SEQ;
2595         if (VOP_GETATTR(dvp, &idva, 0, cs->cr, NULL))
2596                 idva.va_seq = 0;
2597 
2598         /*
2599          * Force modified data and metadata out to stable storage.
2600          */
2601         (void) VOP_FSYNC(vp, FNODSYNC, cs->cr, NULL);
2602         (void) VOP_FSYNC(dvp, 0, cs->cr, NULL);
2603 
2604         if (error) {
2605                 *cs->statusp = resp->status = puterrno4(error);
2606                 goto out;
2607         }
2608 
2609         /*
2610          * Get "after" change value, if it fails, simply return the
2611          * before value.
2612          */
2613         adva.va_mask = AT_CTIME|AT_SEQ;
2614         if (VOP_GETATTR(dvp, &adva, 0, cs->cr, NULL)) {
2615                 adva.va_ctime = bdva.va_ctime;
2616                 adva.va_seq = 0;
2617         }
2618 
2619         NFS4_SET_FATTR4_CHANGE(resp->cinfo.after, adva.va_ctime)
2620 
2621         /*
2622          * The cinfo.atomic = TRUE only if we have
2623          * non-zero va_seq's, and it has incremented by exactly one
2624          * during the VOP_LINK and it didn't change during the VOP_FSYNC.
2625          */
2626         if (bdva.va_seq && idva.va_seq && adva.va_seq &&
2627             idva.va_seq == (bdva.va_seq + 1) && idva.va_seq == adva.va_seq)
2628                 resp->cinfo.atomic = TRUE;
2629         else
2630                 resp->cinfo.atomic = FALSE;
2631 
2632         *cs->statusp = resp->status = NFS4_OK;
2633 out:
2634         DTRACE_NFSV4_2(op__link__done, struct compound_state *, cs,
2635             LINK4res *, resp);
2636 }
2637 
2638 /*
2639  * Used by rfs4_op_lookup and rfs4_op_lookupp to do the actual work.
2640  */
2641 
2642 /* ARGSUSED */
2643 static nfsstat4
2644 do_rfs4_op_lookup(char *nm, struct svc_req *req, struct compound_state *cs)
2645 {
2646         int error;
2647         int different_export = 0;
2648         vnode_t *vp, *pre_tvp = NULL, *oldvp = NULL;
2649         struct exportinfo *exi = NULL, *pre_exi = NULL;
2650         nfsstat4 stat;
2651         fid_t fid;
2652         int attrdir, dotdot, walk;
2653         bool_t is_newvp = FALSE;
2654 
2655         if (cs->vp->v_flag & V_XATTRDIR) {
2656                 attrdir = 1;
2657                 ASSERT(get_fh4_flag(&cs->fh, FH4_ATTRDIR));
2658         } else {
2659                 attrdir = 0;
2660                 ASSERT(! get_fh4_flag(&cs->fh, FH4_ATTRDIR));
2661         }
2662 
2663         dotdot = (nm[0] == '.' && nm[1] == '.' && nm[2] == '\0');
2664 
2665         /*
2666          * If dotdotting, then need to check whether it's
2667          * above the root of a filesystem, or above an
2668          * export point.
2669          */
2670         if (dotdot) {
2671 
2672                 /*
2673                  * If dotdotting at the root of a filesystem, then
2674                  * need to traverse back to the mounted-on filesystem
2675                  * and do the dotdot lookup there.
2676                  */
2677                 if (cs->vp->v_flag & VROOT) {
2678 
2679                         /*
2680                          * If at the system root, then can
2681                          * go up no further.
2682                          */
2683                         if (VN_CMP(cs->vp, rootdir))
2684                                 return (puterrno4(ENOENT));
2685 
2686                         /*
2687                          * Traverse back to the mounted-on filesystem
2688                          */
2689                         cs->vp = untraverse(cs->vp);
2690 
2691                         /*
2692                          * Set the different_export flag so we remember
2693                          * to pick up a new exportinfo entry for
2694                          * this new filesystem.
2695                          */
2696                         different_export = 1;
2697                 } else {
2698 
2699                         /*
2700                          * If dotdotting above an export point then set
2701                          * the different_export to get new export info.
2702                          */
2703                         different_export = nfs_exported(cs->exi, cs->vp);
2704                 }
2705         }
2706 
2707         error = VOP_LOOKUP(cs->vp, nm, &vp, NULL, 0, NULL, cs->cr,
2708             NULL, NULL, NULL);
2709         if (error)
2710                 return (puterrno4(error));
2711 
2712         /*
2713          * If the vnode is in a pseudo filesystem, check whether it is visible.
2714          *
2715          * XXX if the vnode is a symlink and it is not visible in
2716          * a pseudo filesystem, return ENOENT (not following symlink).
2717          * V4 client can not mount such symlink. This is a regression
2718          * from V2/V3.
2719          *
2720          * In the same exported filesystem, if the security flavor used
2721          * is not an explicitly shared flavor, limit the view to the visible
2722          * list entries only. This is not a WRONGSEC case because it's already
2723          * checked via PUTROOTFH/PUTPUBFH or PUTFH.
2724          */
2725         if (!different_export &&
2726             (PSEUDO(cs->exi) || ! is_exported_sec(cs->nfsflavor, cs->exi) ||
2727             cs->access & CS_ACCESS_LIMITED)) {
2728                 if (! nfs_visible(cs->exi, vp, &different_export)) {
2729                         VN_RELE(vp);
2730                         return (puterrno4(ENOENT));
2731                 }
2732         }
2733 
2734         /*
2735          * If it's a mountpoint, then traverse it.
2736          */
2737         if (vn_ismntpt(vp)) {
2738                 pre_exi = cs->exi;   /* save pre-traversed exportinfo */
2739                 pre_tvp = vp;           /* save pre-traversed vnode     */
2740 
2741                 /*
2742                  * hold pre_tvp to counteract rele by traverse.  We will
2743                  * need pre_tvp below if checkexport4 fails
2744                  */
2745                 VN_HOLD(pre_tvp);
2746                 if ((error = traverse(&vp)) != 0) {
2747                         VN_RELE(vp);
2748                         VN_RELE(pre_tvp);
2749                         return (puterrno4(error));
2750                 }
2751                 different_export = 1;
2752         } else if (vp->v_vfsp != cs->vp->v_vfsp) {
2753                 /*
2754                  * The vfsp comparison is to handle the case where
2755                  * a LOFS mount is shared.  lo_lookup traverses mount points,
2756                  * and NFS is unaware of local fs transistions because
2757                  * v_vfsmountedhere isn't set.  For this special LOFS case,
2758                  * the dir and the obj returned by lookup will have different
2759                  * vfs ptrs.
2760                  */
2761                 different_export = 1;
2762         }
2763 
2764         if (different_export) {
2765 
2766                 bzero(&fid, sizeof (fid));
2767                 fid.fid_len = MAXFIDSZ;
2768                 error = vop_fid_pseudo(vp, &fid);
2769                 if (error) {
2770                         VN_RELE(vp);
2771                         if (pre_tvp)
2772                                 VN_RELE(pre_tvp);
2773                         return (puterrno4(error));
2774                 }
2775 
2776                 if (dotdot)
2777                         exi = nfs_vptoexi(NULL, vp, cs->cr, &walk, NULL, TRUE);
2778                 else
2779                         exi = checkexport4(&vp->v_vfsp->vfs_fsid, &fid, vp);
2780 
2781                 if (exi == NULL) {
2782                         if (pre_tvp) {
2783                                 /*
2784                                  * If this vnode is a mounted-on vnode,
2785                                  * but the mounted-on file system is not
2786                                  * exported, send back the filehandle for
2787                                  * the mounted-on vnode, not the root of
2788                                  * the mounted-on file system.
2789                                  */
2790                                 VN_RELE(vp);
2791                                 vp = pre_tvp;
2792                                 exi = pre_exi;
2793                         } else {
2794                                 VN_RELE(vp);
2795                                 return (puterrno4(EACCES));
2796                         }
2797                 } else if (pre_tvp) {
2798                         /* we're done with pre_tvp now. release extra hold */
2799                         VN_RELE(pre_tvp);
2800                 }
2801 
2802                 cs->exi = exi;
2803 
2804                 /*
2805                  * Now we do a checkauth4. The reason is that
2806                  * this client/user may not have access to the new
2807                  * exported file system, and if he does,
2808                  * the client/user may be mapped to a different uid.
2809                  *
2810                  * We start with a new cr, because the checkauth4 done
2811                  * in the PUT*FH operation over wrote the cred's uid,
2812                  * gid, etc, and we want the real thing before calling
2813                  * checkauth4()
2814                  */
2815                 crfree(cs->cr);
2816                 cs->cr = crdup(cs->basecr);
2817 
2818                 oldvp = cs->vp;
2819                 cs->vp = vp;
2820                 is_newvp = TRUE;
2821 
2822                 stat = call_checkauth4(cs, req);
2823                 if (stat != NFS4_OK) {
2824                         VN_RELE(cs->vp);
2825                         cs->vp = oldvp;
2826                         return (stat);
2827                 }
2828         }
2829 
2830         /*
2831          * After various NFS checks, do a label check on the path
2832          * component. The label on this path should either be the
2833          * global zone's label or a zone's label. We are only
2834          * interested in the zone's label because exported files
2835          * in global zone is accessible (though read-only) to
2836          * clients. The exportability/visibility check is already
2837          * done before reaching this code.
2838          */
2839         if (is_system_labeled()) {
2840                 bslabel_t *clabel;
2841 
2842                 ASSERT(req->rq_label != NULL);
2843                 clabel = req->rq_label;
2844                 DTRACE_PROBE2(tx__rfs4__log__info__oplookup__clabel, char *,
2845                     "got client label from request(1)", struct svc_req *, req);
2846 
2847                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
2848                         if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
2849                             cs->exi)) {
2850                                 error = EACCES;
2851                                 goto err_out;
2852                         }
2853                 } else {
2854                         /*
2855                          * We grant access to admin_low label clients
2856                          * only if the client is trusted, i.e. also
2857                          * running Solaris Trusted Extension.
2858                          */
2859                         struct sockaddr *ca;
2860                         int             addr_type;
2861                         void            *ipaddr;
2862                         tsol_tpc_t      *tp;
2863 
2864                         ca = (struct sockaddr *)svc_getrpccaller(
2865                             req->rq_xprt)->buf;
2866                         if (ca->sa_family == AF_INET) {
2867                                 addr_type = IPV4_VERSION;
2868                                 ipaddr = &((struct sockaddr_in *)ca)->sin_addr;
2869                         } else if (ca->sa_family == AF_INET6) {
2870                                 addr_type = IPV6_VERSION;
2871                                 ipaddr = &((struct sockaddr_in6 *)
2872                                     ca)->sin6_addr;
2873                         }
2874                         tp = find_tpc(ipaddr, addr_type, B_FALSE);
2875                         if (tp == NULL || tp->tpc_tp.tp_doi !=
2876                             l_admin_low->tsl_doi || tp->tpc_tp.host_type !=
2877                             SUN_CIPSO) {
2878                                 if (tp != NULL)
2879                                         TPC_RELE(tp);
2880                                 error = EACCES;
2881                                 goto err_out;
2882                         }
2883                         TPC_RELE(tp);
2884                 }
2885         }
2886 
2887         error = makefh4(&cs->fh, vp, cs->exi);
2888 
2889 err_out:
2890         if (error) {
2891                 if (is_newvp) {
2892                         VN_RELE(cs->vp);
2893                         cs->vp = oldvp;
2894                 } else
2895                         VN_RELE(vp);
2896                 return (puterrno4(error));
2897         }
2898 
2899         if (!is_newvp) {
2900                 if (cs->vp)
2901                         VN_RELE(cs->vp);
2902                 cs->vp = vp;
2903         } else if (oldvp)
2904                 VN_RELE(oldvp);
2905 
2906         /*
2907          * if did lookup on attrdir and didn't lookup .., set named
2908          * attr fh flag
2909          */
2910         if (attrdir && ! dotdot)
2911                 set_fh4_flag(&cs->fh, FH4_NAMEDATTR);
2912 
2913         /* Assume false for now, open proc will set this */
2914         cs->mandlock = FALSE;
2915 
2916         return (NFS4_OK);
2917 }
2918 
2919 /* ARGSUSED */
2920 static void
2921 rfs4_op_lookup(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
2922     struct compound_state *cs)
2923 {
2924         LOOKUP4args *args = &argop->nfs_argop4_u.oplookup;
2925         LOOKUP4res *resp = &resop->nfs_resop4_u.oplookup;
2926         char *nm;
2927         uint_t len;
2928         struct sockaddr *ca;
2929         char *name = NULL;
2930         nfsstat4 status;
2931 
2932         DTRACE_NFSV4_2(op__lookup__start, struct compound_state *, cs,
2933             LOOKUP4args *, args);
2934 
2935         if (cs->vp == NULL) {
2936                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2937                 goto out;
2938         }
2939 
2940         if (cs->vp->v_type == VLNK) {
2941                 *cs->statusp = resp->status = NFS4ERR_SYMLINK;
2942                 goto out;
2943         }
2944 
2945         if (cs->vp->v_type != VDIR) {
2946                 *cs->statusp = resp->status = NFS4ERR_NOTDIR;
2947                 goto out;
2948         }
2949 
2950         status = utf8_dir_verify(&args->objname);
2951         if (status != NFS4_OK) {
2952                 *cs->statusp = resp->status = status;
2953                 goto out;
2954         }
2955 
2956         nm = utf8_to_str(&args->objname, &len, NULL);
2957         if (nm == NULL) {
2958                 *cs->statusp = resp->status = NFS4ERR_INVAL;
2959                 goto out;
2960         }
2961 
2962         if (len > MAXNAMELEN) {
2963                 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
2964                 kmem_free(nm, len);
2965                 goto out;
2966         }
2967 
2968         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2969         name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
2970             MAXPATHLEN  + 1);
2971 
2972         if (name == NULL) {
2973                 *cs->statusp = resp->status = NFS4ERR_INVAL;
2974                 kmem_free(nm, len);
2975                 goto out;
2976         }
2977 
2978         *cs->statusp = resp->status = do_rfs4_op_lookup(name, req, cs);
2979 
2980         if (name != nm)
2981                 kmem_free(name, MAXPATHLEN + 1);
2982         kmem_free(nm, len);
2983 
2984 out:
2985         DTRACE_NFSV4_2(op__lookup__done, struct compound_state *, cs,
2986             LOOKUP4res *, resp);
2987 }
2988 
2989 /* ARGSUSED */
2990 static void
2991 rfs4_op_lookupp(nfs_argop4 *args, nfs_resop4 *resop, struct svc_req *req,
2992     struct compound_state *cs)
2993 {
2994         LOOKUPP4res *resp = &resop->nfs_resop4_u.oplookupp;
2995 
2996         DTRACE_NFSV4_1(op__lookupp__start, struct compound_state *, cs);
2997 
2998         if (cs->vp == NULL) {
2999                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
3000                 goto out;
3001         }
3002 
3003         if (cs->vp->v_type != VDIR) {
3004                 *cs->statusp = resp->status = NFS4ERR_NOTDIR;
3005                 goto out;
3006         }
3007 
3008         *cs->statusp = resp->status = do_rfs4_op_lookup("..", req, cs);
3009 
3010         /*
3011          * From NFSV4 Specification, LOOKUPP should not check for
3012          * NFS4ERR_WRONGSEC. Retrun NFS4_OK instead.
3013          */
3014         if (resp->status == NFS4ERR_WRONGSEC) {
3015                 *cs->statusp = resp->status = NFS4_OK;
3016         }
3017 
3018 out:
3019         DTRACE_NFSV4_2(op__lookupp__done, struct compound_state *, cs,
3020             LOOKUPP4res *, resp);
3021 }
3022 
3023 
3024 /*ARGSUSED2*/
3025 static void
3026 rfs4_op_openattr(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
3027     struct compound_state *cs)
3028 {
3029         OPENATTR4args   *args = &argop->nfs_argop4_u.opopenattr;
3030         OPENATTR4res    *resp = &resop->nfs_resop4_u.opopenattr;
3031         vnode_t         *avp = NULL;
3032         int             lookup_flags = LOOKUP_XATTR, error;
3033         int             exp_ro = 0;
3034 
3035         DTRACE_NFSV4_2(op__openattr__start, struct compound_state *, cs,
3036             OPENATTR4args *, args);
3037 
3038         if (cs->vp == NULL) {
3039                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
3040                 goto out;
3041         }
3042 
3043         if ((cs->vp->v_vfsp->vfs_flag & VFS_XATTR) == 0 &&
3044             !vfs_has_feature(cs->vp->v_vfsp, VFSFT_SYSATTR_VIEWS)) {
3045                 *cs->statusp = resp->status = puterrno4(ENOTSUP);
3046                 goto out;
3047         }
3048 
3049         /*
3050          * If file system supports passing ACE mask to VOP_ACCESS then
3051          * check for ACE_READ_NAMED_ATTRS, otherwise do legacy checks
3052          */
3053 
3054         if (vfs_has_feature(cs->vp->v_vfsp, VFSFT_ACEMASKONACCESS))
3055                 error = VOP_ACCESS(cs->vp, ACE_READ_NAMED_ATTRS,
3056                     V_ACE_MASK, cs->cr, NULL);
3057         else
3058                 error = ((VOP_ACCESS(cs->vp, VREAD, 0, cs->cr, NULL) != 0) &&
3059                     (VOP_ACCESS(cs->vp, VWRITE, 0, cs->cr, NULL) != 0) &&
3060                     (VOP_ACCESS(cs->vp, VEXEC, 0, cs->cr, NULL) != 0));
3061 
3062         if (error) {
3063                 *cs->statusp = resp->status = puterrno4(EACCES);
3064                 goto out;
3065         }
3066 
3067         /*
3068          * The CREATE_XATTR_DIR VOP flag cannot be specified if
3069          * the file system is exported read-only -- regardless of
3070          * createdir flag.  Otherwise the attrdir would be created
3071          * (assuming server fs isn't mounted readonly locally).  If
3072          * VOP_LOOKUP returns ENOENT in this case, the error will
3073          * be translated into EROFS.  ENOSYS is mapped to ENOTSUP
3074          * because specfs has no VOP_LOOKUP op, so the macro would
3075          * return ENOSYS.  EINVAL is returned by all (current)
3076          * Solaris file system implementations when any of their
3077          * restrictions are violated (xattr(dir) can't have xattrdir).
3078          * Returning NOTSUPP is more appropriate in this case
3079          * because the object will never be able to have an attrdir.
3080          */
3081         if (args->createdir && ! (exp_ro = rdonly4(req, cs)))
3082                 lookup_flags |= CREATE_XATTR_DIR;
3083 
3084         error = VOP_LOOKUP(cs->vp, "", &avp, NULL, lookup_flags, NULL, cs->cr,
3085             NULL, NULL, NULL);
3086 
3087         if (error) {
3088                 if (error == ENOENT && args->createdir && exp_ro)
3089                         *cs->statusp = resp->status = puterrno4(EROFS);
3090                 else if (error == EINVAL || error == ENOSYS)
3091                         *cs->statusp = resp->status = puterrno4(ENOTSUP);
3092                 else
3093                         *cs->statusp = resp->status = puterrno4(error);
3094                 goto out;
3095         }
3096 
3097         ASSERT(avp->v_flag & V_XATTRDIR);
3098 
3099         error = makefh4(&cs->fh, avp, cs->exi);
3100 
3101         if (error) {
3102                 VN_RELE(avp);
3103                 *cs->statusp = resp->status = puterrno4(error);
3104                 goto out;
3105         }
3106 
3107         VN_RELE(cs->vp);
3108         cs->vp = avp;
3109 
3110         /*
3111          * There is no requirement for an attrdir fh flag
3112          * because the attrdir has a vnode flag to distinguish
3113          * it from regular (non-xattr) directories.  The
3114          * FH4_ATTRDIR flag is set for future sanity checks.
3115          */
3116         set_fh4_flag(&cs->fh, FH4_ATTRDIR);
3117         *cs->statusp = resp->status = NFS4_OK;
3118 
3119 out:
3120         DTRACE_NFSV4_2(op__openattr__done, struct compound_state *, cs,
3121             OPENATTR4res *, resp);
3122 }
3123 
3124 static int
3125 do_io(int direction, vnode_t *vp, struct uio *uio, int ioflag, cred_t *cred,
3126     caller_context_t *ct)
3127 {
3128         int error;
3129         int i;
3130         clock_t delaytime;
3131 
3132         delaytime = MSEC_TO_TICK_ROUNDUP(rfs4_lock_delay);
3133 
3134         /*
3135          * Don't block on mandatory locks. If this routine returns
3136          * EAGAIN, the caller should return NFS4ERR_LOCKED.
3137          */
3138         uio->uio_fmode = FNONBLOCK;
3139 
3140         for (i = 0; i < rfs4_maxlock_tries; i++) {
3141 
3142 
3143                 if (direction == FREAD) {
3144                         (void) VOP_RWLOCK(vp, V_WRITELOCK_FALSE, ct);
3145                         error = VOP_READ(vp, uio, ioflag, cred, ct);
3146                         VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, ct);
3147                 } else {
3148                         (void) VOP_RWLOCK(vp, V_WRITELOCK_TRUE, ct);
3149                         error = VOP_WRITE(vp, uio, ioflag, cred, ct);
3150                         VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, ct);
3151                 }
3152 
3153                 if (error != EAGAIN)
3154                         break;
3155 
3156                 if (i < rfs4_maxlock_tries - 1) {
3157                         delay(delaytime);
3158                         delaytime *= 2;
3159                 }
3160         }
3161 
3162         return (error);
3163 }
3164 
3165 /* ARGSUSED */
3166 static void
3167 rfs4_op_read(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
3168     struct compound_state *cs)
3169 {
3170         READ4args *args = &argop->nfs_argop4_u.opread;
3171         READ4res *resp = &resop->nfs_resop4_u.opread;
3172         int error;
3173         int verror;
3174         vnode_t *vp;
3175         struct vattr va;
3176         struct iovec iov, *iovp = NULL;
3177         int iovcnt;
3178         struct uio uio;
3179         u_offset_t offset;
3180         bool_t *deleg = &cs->deleg;
3181         nfsstat4 stat;
3182         int in_crit = 0;
3183         mblk_t *mp = NULL;
3184         int alloc_err = 0;
3185         int rdma_used = 0;
3186         int loaned_buffers;
3187         caller_context_t ct;
3188         struct uio *uiop;
3189 
3190         DTRACE_NFSV4_2(op__read__start, struct compound_state *, cs,
3191             READ4args, args);
3192 
3193         vp = cs->vp;
3194         if (vp == NULL) {
3195                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
3196                 goto out;
3197         }
3198         if (cs->access == CS_ACCESS_DENIED) {
3199                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
3200                 goto out;
3201         }
3202 
3203         if ((stat = rfs4_check_stateid(FREAD, vp, &args->stateid, FALSE,
3204             deleg, TRUE, &ct)) != NFS4_OK) {
3205                 *cs->statusp = resp->status = stat;
3206                 goto out;
3207         }
3208 
3209         /*
3210          * Enter the critical region before calling VOP_RWLOCK
3211          * to avoid a deadlock with write requests.
3212          */
3213         if (nbl_need_check(vp)) {
3214                 nbl_start_crit(vp, RW_READER);
3215                 in_crit = 1;
3216                 if (nbl_conflict(vp, NBL_READ, args->offset, args->count, 0,
3217                     &ct)) {
3218                         *cs->statusp = resp->status = NFS4ERR_LOCKED;
3219                         goto out;
3220                 }
3221         }
3222 
3223         if (args->wlist) {
3224                 if (args->count > clist_len(args->wlist)) {
3225                         *cs->statusp = resp->status = NFS4ERR_INVAL;
3226                         goto out;
3227                 }
3228                 rdma_used = 1;
3229         }
3230 
3231         /* use loaned buffers for TCP */
3232         loaned_buffers = (nfs_loaned_buffers && !rdma_used) ? 1 : 0;
3233 
3234         va.va_mask = AT_MODE|AT_SIZE|AT_UID;
3235         verror = VOP_GETATTR(vp, &va, 0, cs->cr, &ct);
3236 
3237         /*
3238          * If we can't get the attributes, then we can't do the
3239          * right access checking.  So, we'll fail the request.
3240          */
3241         if (verror) {
3242                 *cs->statusp = resp->status = puterrno4(verror);
3243                 goto out;
3244         }
3245 
3246         if (vp->v_type != VREG) {
3247                 *cs->statusp = resp->status =
3248                     ((vp->v_type == VDIR) ? NFS4ERR_ISDIR : NFS4ERR_INVAL);
3249                 goto out;
3250         }
3251 
3252         if (crgetuid(cs->cr) != va.va_uid &&
3253             (error = VOP_ACCESS(vp, VREAD, 0, cs->cr, &ct)) &&
3254             (error = VOP_ACCESS(vp, VEXEC, 0, cs->cr, &ct))) {
3255                 *cs->statusp = resp->status = puterrno4(error);
3256                 goto out;
3257         }
3258 
3259         if (MANDLOCK(vp, va.va_mode)) { /* XXX - V4 supports mand locking */
3260                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
3261                 goto out;
3262         }
3263 
3264         offset = args->offset;
3265         if (offset >= va.va_size) {
3266                 *cs->statusp = resp->status = NFS4_OK;
3267                 resp->eof = TRUE;
3268                 resp->data_len = 0;
3269                 resp->data_val = NULL;
3270                 resp->mblk = NULL;
3271                 /* RDMA */
3272                 resp->wlist = args->wlist;
3273                 resp->wlist_len = resp->data_len;
3274                 *cs->statusp = resp->status = NFS4_OK;
3275                 if (resp->wlist)
3276                         clist_zero_len(resp->wlist);
3277                 goto out;
3278         }
3279 
3280         if (args->count == 0) {
3281                 *cs->statusp = resp->status = NFS4_OK;
3282                 resp->eof = FALSE;
3283                 resp->data_len = 0;
3284                 resp->data_val = NULL;
3285                 resp->mblk = NULL;
3286                 /* RDMA */
3287                 resp->wlist = args->wlist;
3288                 resp->wlist_len = resp->data_len;
3289                 if (resp->wlist)
3290                         clist_zero_len(resp->wlist);
3291                 goto out;
3292         }
3293 
3294         /*
3295          * Do not allocate memory more than maximum allowed
3296          * transfer size
3297          */
3298         if (args->count > rfs4_tsize(req))
3299                 args->count = rfs4_tsize(req);
3300 
3301         if (loaned_buffers) {
3302                 uiop = (uio_t *)rfs_setup_xuio(vp);
3303                 ASSERT(uiop != NULL);
3304                 uiop->uio_segflg = UIO_SYSSPACE;
3305                 uiop->uio_loffset = args->offset;
3306                 uiop->uio_resid = args->count;
3307 
3308                 /* Jump to do the read if successful */
3309                 if (!VOP_REQZCBUF(vp, UIO_READ, (xuio_t *)uiop, cs->cr, &ct)) {
3310                         /*
3311                          * Need to hold the vnode until after VOP_RETZCBUF()
3312                          * is called.
3313                          */
3314                         VN_HOLD(vp);
3315                         goto doio_read;
3316                 }
3317 
3318                 DTRACE_PROBE2(nfss__i__reqzcbuf_failed, int,
3319                     uiop->uio_loffset, int, uiop->uio_resid);
3320 
3321                 uiop->uio_extflg = 0;
3322 
3323                 /* failure to setup for zero copy */
3324                 rfs_free_xuio((void *)uiop);
3325                 loaned_buffers = 0;
3326         }
3327 
3328         /*
3329          * If returning data via RDMA Write, then grab the chunk list. If we
3330          * aren't returning READ data w/RDMA_WRITE, then grab a mblk.
3331          */
3332         if (rdma_used) {
3333                 mp = NULL;
3334                 (void) rdma_get_wchunk(req, &iov, args->wlist);
3335                 uio.uio_iov = &iov;
3336                 uio.uio_iovcnt = 1;
3337         } else {
3338                 /*
3339                  * mp will contain the data to be sent out in the read reply.
3340                  * It will be freed after the reply has been sent.
3341                  */
3342                 mp = rfs_read_alloc(args->count, &iovp, &iovcnt);
3343                 ASSERT(mp != NULL);
3344                 ASSERT(alloc_err == 0);
3345                 uio.uio_iov = iovp;
3346                 uio.uio_iovcnt = iovcnt;
3347         }
3348 
3349         uio.uio_segflg = UIO_SYSSPACE;
3350         uio.uio_extflg = UIO_COPY_CACHED;
3351         uio.uio_loffset = args->offset;
3352         uio.uio_resid = args->count;
3353         uiop = &uio;
3354 
3355 doio_read:
3356         error = do_io(FREAD, vp, uiop, 0, cs->cr, &ct);
3357 
3358         va.va_mask = AT_SIZE;
3359         verror = VOP_GETATTR(vp, &va, 0, cs->cr, &ct);
3360 
3361         if (error) {
3362                 if (mp)
3363                         freemsg(mp);
3364                 *cs->statusp = resp->status = puterrno4(error);
3365                 goto out;
3366         }
3367 
3368         /* make mblk using zc buffers */
3369         if (loaned_buffers) {
3370                 mp = uio_to_mblk(uiop);
3371                 ASSERT(mp != NULL);
3372         }
3373 
3374         *cs->statusp = resp->status = NFS4_OK;
3375 
3376         ASSERT(uiop->uio_resid >= 0);
3377         resp->data_len = args->count - uiop->uio_resid;
3378         if (mp) {
3379                 resp->data_val = (char *)mp->b_datap->db_base;
3380                 rfs_rndup_mblks(mp, resp->data_len, loaned_buffers);
3381         } else {
3382                 resp->data_val = (caddr_t)iov.iov_base;
3383         }
3384 
3385         resp->mblk = mp;
3386 
3387         if (!verror && offset + resp->data_len == va.va_size)
3388                 resp->eof = TRUE;
3389         else
3390                 resp->eof = FALSE;
3391 
3392         if (rdma_used) {
3393                 if (!rdma_setup_read_data4(args, resp)) {
3394                         *cs->statusp = resp->status = NFS4ERR_INVAL;
3395                 }
3396         } else {
3397                 resp->wlist = NULL;
3398         }
3399 
3400 out:
3401         if (in_crit)
3402                 nbl_end_crit(vp);
3403 
3404         if (iovp != NULL)
3405                 kmem_free(iovp, iovcnt * sizeof (struct iovec));
3406 
3407         DTRACE_NFSV4_2(op__read__done, struct compound_state *, cs,
3408             READ4res *, resp);
3409 }
3410 
3411 static void
3412 rfs4_op_read_free(nfs_resop4 *resop)
3413 {
3414         READ4res        *resp = &resop->nfs_resop4_u.opread;
3415 
3416         if (resp->status == NFS4_OK && resp->mblk != NULL) {
3417                 freemsg(resp->mblk);
3418                 resp->mblk = NULL;
3419                 resp->data_val = NULL;
3420                 resp->data_len = 0;
3421         }
3422 }
3423 
3424 static void
3425 rfs4_op_readdir_free(nfs_resop4 * resop)
3426 {
3427         READDIR4res    *resp = &resop->nfs_resop4_u.opreaddir;
3428 
3429         if (resp->status == NFS4_OK && resp->mblk != NULL) {
3430                 freeb(resp->mblk);
3431                 resp->mblk = NULL;
3432                 resp->data_len = 0;
3433         }
3434 }
3435 
3436 
3437 /* ARGSUSED */
3438 static void
3439 rfs4_op_putpubfh(nfs_argop4 *args, nfs_resop4 *resop, struct svc_req *req,
3440     struct compound_state *cs)
3441 {
3442         PUTPUBFH4res    *resp = &resop->nfs_resop4_u.opputpubfh;
3443         int             error;
3444         vnode_t         *vp;
3445         struct exportinfo *exi, *sav_exi;
3446         nfs_fh4_fmt_t   *fh_fmtp;
3447 
3448         DTRACE_NFSV4_1(op__putpubfh__start, struct compound_state *, cs);
3449 
3450         if (cs->vp) {
3451                 VN_RELE(cs->vp);
3452                 cs->vp = NULL;
3453         }
3454 
3455         if (cs->cr)
3456                 crfree(cs->cr);
3457 
3458         cs->cr = crdup(cs->basecr);
3459 
3460         vp = exi_public->exi_vp;
3461         if (vp == NULL) {
3462                 *cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
3463                 goto out;
3464         }
3465 
3466         error = makefh4(&cs->fh, vp, exi_public);
3467         if (error != 0) {
3468                 *cs->statusp = resp->status = puterrno4(error);
3469                 goto out;
3470         }
3471         sav_exi = cs->exi;
3472         if (exi_public == exi_root) {
3473                 /*
3474                  * No filesystem is actually shared public, so we default
3475                  * to exi_root. In this case, we must check whether root
3476                  * is exported.
3477                  */
3478                 fh_fmtp = (nfs_fh4_fmt_t *)cs->fh.nfs_fh4_val;
3479 
3480                 /*
3481                  * if root filesystem is exported, the exportinfo struct that we
3482                  * should use is what checkexport4 returns, because root_exi is
3483                  * actually a mostly empty struct.
3484                  */
3485                 exi = checkexport4(&fh_fmtp->fh4_fsid,
3486                     (fid_t *)&fh_fmtp->fh4_xlen, NULL);
3487                 cs->exi = ((exi != NULL) ? exi : exi_public);
3488         } else {
3489                 /*
3490                  * it's a properly shared filesystem
3491                  */
3492                 cs->exi = exi_public;
3493         }
3494 
3495         if (is_system_labeled()) {
3496                 bslabel_t *clabel;
3497 
3498                 ASSERT(req->rq_label != NULL);
3499                 clabel = req->rq_label;
3500                 DTRACE_PROBE2(tx__rfs4__log__info__opputpubfh__clabel, char *,
3501                     "got client label from request(1)",
3502                     struct svc_req *, req);
3503                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
3504                         if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
3505                             cs->exi)) {
3506                                 *cs->statusp = resp->status =
3507                                     NFS4ERR_SERVERFAULT;
3508                                 goto out;
3509                         }
3510                 }
3511         }
3512 
3513         VN_HOLD(vp);
3514         cs->vp = vp;
3515 
3516         if ((resp->status = call_checkauth4(cs, req)) != NFS4_OK) {
3517                 VN_RELE(cs->vp);
3518                 cs->vp = NULL;
3519                 cs->exi = sav_exi;
3520                 goto out;
3521         }
3522 
3523         *cs->statusp = resp->status = NFS4_OK;
3524 out:
3525         DTRACE_NFSV4_2(op__putpubfh__done, struct compound_state *, cs,
3526             PUTPUBFH4res *, resp);
3527 }
3528 
3529 /*
3530  * XXX - issue with put*fh operations. Suppose /export/home is exported.
3531  * Suppose an NFS client goes to mount /export/home/joe. If /export, home,
3532  * or joe have restrictive search permissions, then we shouldn't let
3533  * the client get a file handle. This is easy to enforce. However, we
3534  * don't know what security flavor should be used until we resolve the
3535  * path name. Another complication is uid mapping. If root is
3536  * the user, then it will be mapped to the anonymous user by default,
3537  * but we won't know that till we've resolved the path name. And we won't
3538  * know what the anonymous user is.
3539  * Luckily, SECINFO is specified to take a full filename.
3540  * So what we will have to in rfs4_op_lookup is check that flavor of
3541  * the target object matches that of the request, and if root was the
3542  * caller, check for the root= and anon= options, and if necessary,
3543  * repeat the lookup using the right cred_t. But that's not done yet.
3544  */
3545 /* ARGSUSED */
3546 static void
3547 rfs4_op_putfh(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
3548     struct compound_state *cs)
3549 {
3550         PUTFH4args *args = &argop->nfs_argop4_u.opputfh;
3551         PUTFH4res *resp = &resop->nfs_resop4_u.opputfh;
3552         nfs_fh4_fmt_t *fh_fmtp;
3553 
3554         DTRACE_NFSV4_2(op__putfh__start, struct compound_state *, cs,
3555             PUTFH4args *, args);
3556 
3557         if (cs->vp) {
3558                 VN_RELE(cs->vp);
3559                 cs->vp = NULL;
3560         }
3561 
3562         if (cs->cr) {
3563                 crfree(cs->cr);
3564                 cs->cr = NULL;
3565         }
3566 
3567         if (args->object.nfs_fh4_len < NFS_FH4_LEN) {
3568                 *cs->statusp = resp->status = NFS4ERR_BADHANDLE;
3569                 goto out;
3570         }
3571 
3572         fh_fmtp = (nfs_fh4_fmt_t *)args->object.nfs_fh4_val;
3573         cs->exi = checkexport4(&fh_fmtp->fh4_fsid, (fid_t *)&fh_fmtp->fh4_xlen,
3574             NULL);
3575 
3576         if (cs->exi == NULL) {
3577                 *cs->statusp = resp->status = NFS4ERR_STALE;
3578                 goto out;
3579         }
3580 
3581         cs->cr = crdup(cs->basecr);
3582 
3583         ASSERT(cs->cr != NULL);
3584 
3585         if (! (cs->vp = nfs4_fhtovp(&args->object, cs->exi, &resp->status))) {
3586                 *cs->statusp = resp->status;
3587                 goto out;
3588         }
3589 
3590         if ((resp->status = call_checkauth4(cs, req)) != NFS4_OK) {
3591                 VN_RELE(cs->vp);
3592                 cs->vp = NULL;
3593                 goto out;
3594         }
3595 
3596         nfs_fh4_copy(&args->object, &cs->fh);
3597         *cs->statusp = resp->status = NFS4_OK;
3598         cs->deleg = FALSE;
3599 
3600 out:
3601         DTRACE_NFSV4_2(op__putfh__done, struct compound_state *, cs,
3602             PUTFH4res *, resp);
3603 }
3604 
3605 /* ARGSUSED */
3606 static void
3607 rfs4_op_putrootfh(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
3608     struct compound_state *cs)
3609 {
3610         PUTROOTFH4res *resp = &resop->nfs_resop4_u.opputrootfh;
3611         int error;
3612         fid_t fid;
3613         struct exportinfo *exi, *sav_exi;
3614 
3615         DTRACE_NFSV4_1(op__putrootfh__start, struct compound_state *, cs);
3616 
3617         if (cs->vp) {
3618                 VN_RELE(cs->vp);
3619                 cs->vp = NULL;
3620         }
3621 
3622         if (cs->cr)
3623                 crfree(cs->cr);
3624 
3625         cs->cr = crdup(cs->basecr);
3626 
3627         /*
3628          * Using rootdir, the system root vnode,
3629          * get its fid.
3630          */
3631         bzero(&fid, sizeof (fid));
3632         fid.fid_len = MAXFIDSZ;
3633         error = vop_fid_pseudo(rootdir, &fid);
3634         if (error != 0) {
3635                 *cs->statusp = resp->status = puterrno4(error);
3636                 goto out;
3637         }
3638 
3639         /*
3640          * Then use the root fsid & fid it to find out if it's exported
3641          *
3642          * If the server root isn't exported directly, then
3643          * it should at least be a pseudo export based on
3644          * one or more exports further down in the server's
3645          * file tree.
3646          */
3647         exi = checkexport4(&rootdir->v_vfsp->vfs_fsid, &fid, NULL);
3648         if (exi == NULL || exi->exi_export.ex_flags & EX_PUBLIC) {
3649                 NFS4_DEBUG(rfs4_debug,
3650                     (CE_WARN, "rfs4_op_putrootfh: export check failure"));
3651                 *cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
3652                 goto out;
3653         }
3654 
3655         /*
3656          * Now make a filehandle based on the root
3657          * export and root vnode.
3658          */
3659         error = makefh4(&cs->fh, rootdir, exi);
3660         if (error != 0) {
3661                 *cs->statusp = resp->status = puterrno4(error);
3662                 goto out;
3663         }
3664 
3665         sav_exi = cs->exi;
3666         cs->exi = exi;
3667 
3668         VN_HOLD(rootdir);
3669         cs->vp = rootdir;
3670 
3671         if ((resp->status = call_checkauth4(cs, req)) != NFS4_OK) {
3672                 VN_RELE(rootdir);
3673                 cs->vp = NULL;
3674                 cs->exi = sav_exi;
3675                 goto out;
3676         }
3677 
3678         *cs->statusp = resp->status = NFS4_OK;
3679         cs->deleg = FALSE;
3680 out:
3681         DTRACE_NFSV4_2(op__putrootfh__done, struct compound_state *, cs,
3682             PUTROOTFH4res *, resp);
3683 }
3684 
3685 /*
3686  * set_rdattr_params sets up the variables used to manage what information
3687  * to get for each directory entry.
3688  */
3689 static nfsstat4
3690 set_rdattr_params(struct nfs4_svgetit_arg *sargp,
3691     bitmap4 attrs, bool_t *need_to_lookup)
3692 {
3693         uint_t  va_mask;
3694         nfsstat4 status;
3695         bitmap4 objbits;
3696 
3697         status = bitmap4_to_attrmask(attrs, sargp);
3698         if (status != NFS4_OK) {
3699                 /*
3700                  * could not even figure attr mask
3701                  */
3702                 return (status);
3703         }
3704         va_mask = sargp->vap->va_mask;
3705 
3706         /*
3707          * dirent's d_ino is always correct value for mounted_on_fileid.
3708          * mntdfid_set is set once here, but mounted_on_fileid is
3709          * set in main dirent processing loop for each dirent.
3710          * The mntdfid_set is a simple optimization that lets the
3711          * server attr code avoid work when caller is readdir.
3712          */
3713         sargp->mntdfid_set = TRUE;
3714 
3715         /*
3716          * Lookup entry only if client asked for any of the following:
3717          * a) vattr attrs
3718          * b) vfs attrs
3719          * c) attrs w/per-object scope requested (change, filehandle, etc)
3720          *    other than mounted_on_fileid (which we can take from dirent)
3721          */
3722         objbits = attrs ? attrs & NFS4_VP_ATTR_MASK : 0;
3723 
3724         if (va_mask || sargp->sbp || (objbits & ~FATTR4_MOUNTED_ON_FILEID_MASK))
3725                 *need_to_lookup = TRUE;
3726         else
3727                 *need_to_lookup = FALSE;
3728 
3729         if (sargp->sbp == NULL)
3730                 return (NFS4_OK);
3731 
3732         /*
3733          * If filesystem attrs are requested, get them now from the
3734          * directory vp, as most entries will have same filesystem. The only
3735          * exception are mounted over entries but we handle
3736          * those as we go (XXX mounted over detection not yet implemented).
3737          */
3738         sargp->vap->va_mask = 0;  /* to avoid VOP_GETATTR */
3739         status = bitmap4_get_sysattrs(sargp);
3740         sargp->vap->va_mask = va_mask;
3741 
3742         if ((status != NFS4_OK) && sargp->rdattr_error_req) {
3743                 /*
3744                  * Failed to get filesystem attributes.
3745                  * Return a rdattr_error for each entry, but don't fail.
3746                  * However, don't get any obj-dependent attrs.
3747                  */
3748                 sargp->rdattr_error = status;        /* for rdattr_error */
3749                 *need_to_lookup = FALSE;
3750                 /*
3751                  * At least get fileid for regular readdir output
3752                  */
3753                 sargp->vap->va_mask &= AT_NODEID;
3754                 status = NFS4_OK;
3755         }
3756 
3757         return (status);
3758 }
3759 
3760 /*
3761  * readlink: args: CURRENT_FH.
3762  *      res: status. If success - CURRENT_FH unchanged, return linktext.
3763  */
3764 
3765 /* ARGSUSED */
3766 static void
3767 rfs4_op_readlink(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
3768     struct compound_state *cs)
3769 {
3770         READLINK4res *resp = &resop->nfs_resop4_u.opreadlink;
3771         int error;
3772         vnode_t *vp;
3773         struct iovec iov;
3774         struct vattr va;
3775         struct uio uio;
3776         char *data;
3777         struct sockaddr *ca;
3778         char *name = NULL;
3779         int is_referral;
3780 
3781         DTRACE_NFSV4_1(op__readlink__start, struct compound_state *, cs);
3782 
3783         /* CURRENT_FH: directory */
3784         vp = cs->vp;
3785         if (vp == NULL) {
3786                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
3787                 goto out;
3788         }
3789 
3790         if (cs->access == CS_ACCESS_DENIED) {
3791                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
3792                 goto out;
3793         }
3794 
3795         /* Is it a referral? */
3796         if (vn_is_nfs_reparse(vp, cs->cr) && client_is_downrev(req)) {
3797 
3798                 is_referral = 1;
3799 
3800         } else {
3801 
3802                 is_referral = 0;
3803 
3804                 if (vp->v_type == VDIR) {
3805                         *cs->statusp = resp->status = NFS4ERR_ISDIR;
3806                         goto out;
3807                 }
3808 
3809                 if (vp->v_type != VLNK) {
3810                         *cs->statusp = resp->status = NFS4ERR_INVAL;
3811                         goto out;
3812                 }
3813 
3814         }
3815 
3816         va.va_mask = AT_MODE;
3817         error = VOP_GETATTR(vp, &va, 0, cs->cr, NULL);
3818         if (error) {
3819                 *cs->statusp = resp->status = puterrno4(error);
3820                 goto out;
3821         }
3822 
3823         if (MANDLOCK(vp, va.va_mode)) {
3824                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
3825                 goto out;
3826         }
3827 
3828         data = kmem_alloc(MAXPATHLEN + 1, KM_SLEEP);
3829 
3830         if (is_referral) {
3831                 char *s;
3832                 size_t strsz;
3833 
3834                 /* Get an artificial symlink based on a referral */
3835                 s = build_symlink(vp, cs->cr, &strsz);
3836                 global_svstat_ptr[4][NFS_REFERLINKS].value.ui64++;
3837                 DTRACE_PROBE2(nfs4serv__func__referral__reflink,
3838                     vnode_t *, vp, char *, s);
3839                 if (s == NULL)
3840                         error = EINVAL;
3841                 else {
3842                         error = 0;
3843                         (void) strlcpy(data, s, MAXPATHLEN + 1);
3844                         kmem_free(s, strsz);
3845                 }
3846 
3847         } else {
3848 
3849                 iov.iov_base = data;
3850                 iov.iov_len = MAXPATHLEN;
3851                 uio.uio_iov = &iov;
3852                 uio.uio_iovcnt = 1;
3853                 uio.uio_segflg = UIO_SYSSPACE;
3854                 uio.uio_extflg = UIO_COPY_CACHED;
3855                 uio.uio_loffset = 0;
3856                 uio.uio_resid = MAXPATHLEN;
3857 
3858                 error = VOP_READLINK(vp, &uio, cs->cr, NULL);
3859 
3860                 if (!error)
3861                         *(data + MAXPATHLEN - uio.uio_resid) = '\0';
3862         }
3863 
3864         if (error) {
3865                 kmem_free((caddr_t)data, (uint_t)MAXPATHLEN + 1);
3866                 *cs->statusp = resp->status = puterrno4(error);
3867                 goto out;
3868         }
3869 
3870         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
3871         name = nfscmd_convname(ca, cs->exi, data, NFSCMD_CONV_OUTBOUND,
3872             MAXPATHLEN  + 1);
3873 
3874         if (name == NULL) {
3875                 /*
3876                  * Even though the conversion failed, we return
3877                  * something. We just don't translate it.
3878                  */
3879                 name = data;
3880         }
3881 
3882         /*
3883          * treat link name as data
3884          */
3885         (void) str_to_utf8(name, (utf8string *)&resp->link);
3886 
3887         if (name != data)
3888                 kmem_free(name, MAXPATHLEN + 1);
3889         kmem_free((caddr_t)data, (uint_t)MAXPATHLEN + 1);
3890         *cs->statusp = resp->status = NFS4_OK;
3891 
3892 out:
3893         DTRACE_NFSV4_2(op__readlink__done, struct compound_state *, cs,
3894             READLINK4res *, resp);
3895 }
3896 
3897 static void
3898 rfs4_op_readlink_free(nfs_resop4 *resop)
3899 {
3900         READLINK4res *resp = &resop->nfs_resop4_u.opreadlink;
3901         utf8string *symlink = (utf8string *)&resp->link;
3902 
3903         if (symlink->utf8string_val) {
3904                 UTF8STRING_FREE(*symlink)
3905         }
3906 }
3907 
3908 /*
3909  * release_lockowner:
3910  *      Release any state associated with the supplied
3911  *      lockowner. Note if any lo_state is holding locks we will not
3912  *      rele that lo_state and thus the lockowner will not be destroyed.
3913  *      A client using lock after the lock owner stateid has been released
3914  *      will suffer the consequence of NFS4ERR_BAD_STATEID and would have
3915  *      to reissue the lock with new_lock_owner set to TRUE.
3916  *      args: lock_owner
3917  *      res:  status
3918  */
3919 /* ARGSUSED */
3920 static void
3921 rfs4_op_release_lockowner(nfs_argop4 *argop, nfs_resop4 *resop,
3922     struct svc_req *req, struct compound_state *cs)
3923 {
3924         RELEASE_LOCKOWNER4args *ap = &argop->nfs_argop4_u.oprelease_lockowner;
3925         RELEASE_LOCKOWNER4res *resp = &resop->nfs_resop4_u.oprelease_lockowner;
3926         rfs4_lockowner_t *lo;
3927         rfs4_openowner_t *oo;
3928         rfs4_state_t *sp;
3929         rfs4_lo_state_t *lsp;
3930         rfs4_client_t *cp;
3931         bool_t create = FALSE;
3932         locklist_t *llist;
3933         sysid_t sysid;
3934 
3935         DTRACE_NFSV4_2(op__release__lockowner__start, struct compound_state *,
3936             cs, RELEASE_LOCKOWNER4args *, ap);
3937 
3938         /* Make sure there is a clientid around for this request */
3939         cp = rfs4_findclient_by_id(ap->lock_owner.clientid, FALSE);
3940 
3941         if (cp == NULL) {
3942                 *cs->statusp = resp->status =
3943                     rfs4_check_clientid(&ap->lock_owner.clientid, 0);
3944                 goto out;
3945         }
3946         rfs4_client_rele(cp);
3947 
3948         lo = rfs4_findlockowner(&ap->lock_owner, &create);
3949         if (lo == NULL) {
3950                 *cs->statusp = resp->status = NFS4_OK;
3951                 goto out;
3952         }
3953         ASSERT(lo->rl_client != NULL);
3954 
3955         /*
3956          * Check for EXPIRED client. If so will reap state with in a lease
3957          * period or on next set_clientid_confirm step
3958          */
3959         if (rfs4_lease_expired(lo->rl_client)) {
3960                 rfs4_lockowner_rele(lo);
3961                 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
3962                 goto out;
3963         }
3964 
3965         /*
3966          * If no sysid has been assigned, then no locks exist; just return.
3967          */
3968         rfs4_dbe_lock(lo->rl_client->rc_dbe);
3969         if (lo->rl_client->rc_sysidt == LM_NOSYSID) {
3970                 rfs4_lockowner_rele(lo);
3971                 rfs4_dbe_unlock(lo->rl_client->rc_dbe);
3972                 goto out;
3973         }
3974 
3975         sysid = lo->rl_client->rc_sysidt;
3976         rfs4_dbe_unlock(lo->rl_client->rc_dbe);
3977 
3978         /*
3979          * Mark the lockowner invalid.
3980          */
3981         rfs4_dbe_hide(lo->rl_dbe);
3982 
3983         /*
3984          * sysid-pid pair should now not be used since the lockowner is
3985          * invalid. If the client were to instantiate the lockowner again
3986          * it would be assigned a new pid. Thus we can get the list of
3987          * current locks.
3988          */
3989 
3990         llist = flk_get_active_locks(sysid, lo->rl_pid);
3991         /* If we are still holding locks fail */
3992         if (llist != NULL) {
3993 
3994                 *cs->statusp = resp->status = NFS4ERR_LOCKS_HELD;
3995 
3996                 flk_free_locklist(llist);
3997                 /*
3998                  * We need to unhide the lockowner so the client can
3999                  * try it again. The bad thing here is if the client
4000                  * has a logic error that took it here in the first place
4001                  * he probably has lost accounting of the locks that it
4002                  * is holding. So we may have dangling state until the
4003                  * open owner state is reaped via close. One scenario
4004                  * that could possibly occur is that the client has
4005                  * sent the unlock request(s) in separate threads
4006                  * and has not waited for the replies before sending the
4007                  * RELEASE_LOCKOWNER request. Presumably, it would expect
4008                  * and deal appropriately with NFS4ERR_LOCKS_HELD, by
4009                  * reissuing the request.
4010                  */
4011                 rfs4_dbe_unhide(lo->rl_dbe);
4012                 rfs4_lockowner_rele(lo);
4013                 goto out;
4014         }
4015 
4016         /*
4017          * For the corresponding client we need to check each open
4018          * owner for any opens that have lockowner state associated
4019          * with this lockowner.
4020          */
4021 
4022         rfs4_dbe_lock(lo->rl_client->rc_dbe);
4023         for (oo = list_head(&lo->rl_client->rc_openownerlist); oo != NULL;
4024             oo = list_next(&lo->rl_client->rc_openownerlist, oo)) {
4025 
4026                 rfs4_dbe_lock(oo->ro_dbe);
4027                 for (sp = list_head(&oo->ro_statelist); sp != NULL;
4028                     sp = list_next(&oo->ro_statelist, sp)) {
4029 
4030                         rfs4_dbe_lock(sp->rs_dbe);
4031                         for (lsp = list_head(&sp->rs_lostatelist);
4032                             lsp != NULL;
4033                             lsp = list_next(&sp->rs_lostatelist, lsp)) {
4034                                 if (lsp->rls_locker == lo) {
4035                                         rfs4_dbe_lock(lsp->rls_dbe);
4036                                         rfs4_dbe_invalidate(lsp->rls_dbe);
4037                                         rfs4_dbe_unlock(lsp->rls_dbe);
4038                                 }
4039                         }
4040                         rfs4_dbe_unlock(sp->rs_dbe);
4041                 }
4042                 rfs4_dbe_unlock(oo->ro_dbe);
4043         }
4044         rfs4_dbe_unlock(lo->rl_client->rc_dbe);
4045 
4046         rfs4_lockowner_rele(lo);
4047 
4048         *cs->statusp = resp->status = NFS4_OK;
4049 
4050 out:
4051         DTRACE_NFSV4_2(op__release__lockowner__done, struct compound_state *,
4052             cs, RELEASE_LOCKOWNER4res *, resp);
4053 }
4054 
4055 /*
4056  * short utility function to lookup a file and recall the delegation
4057  */
4058 static rfs4_file_t *
4059 rfs4_lookup_and_findfile(vnode_t *dvp, char *nm, vnode_t **vpp,
4060     int *lkup_error, cred_t *cr)
4061 {
4062         vnode_t *vp;
4063         rfs4_file_t *fp = NULL;
4064         bool_t fcreate = FALSE;
4065         int error;
4066 
4067         if (vpp)
4068                 *vpp = NULL;
4069 
4070         if ((error = VOP_LOOKUP(dvp, nm, &vp, NULL, 0, NULL, cr, NULL, NULL,
4071             NULL)) == 0) {
4072                 if (vp->v_type == VREG)
4073                         fp = rfs4_findfile(vp, NULL, &fcreate);
4074                 if (vpp)
4075                         *vpp = vp;
4076                 else
4077                         VN_RELE(vp);
4078         }
4079 
4080         if (lkup_error)
4081                 *lkup_error = error;
4082 
4083         return (fp);
4084 }
4085 
4086 /*
4087  * remove: args: CURRENT_FH: directory; name.
4088  *      res: status. If success - CURRENT_FH unchanged, return change_info
4089  *              for directory.
4090  */
4091 /* ARGSUSED */
4092 static void
4093 rfs4_op_remove(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
4094     struct compound_state *cs)
4095 {
4096         REMOVE4args *args = &argop->nfs_argop4_u.opremove;
4097         REMOVE4res *resp = &resop->nfs_resop4_u.opremove;
4098         int error;
4099         vnode_t *dvp, *vp;
4100         struct vattr bdva, idva, adva;
4101         char *nm;
4102         uint_t len;
4103         rfs4_file_t *fp;
4104         int in_crit = 0;
4105         bslabel_t *clabel;
4106         struct sockaddr *ca;
4107         char *name = NULL;
4108         nfsstat4 status;
4109 
4110         DTRACE_NFSV4_2(op__remove__start, struct compound_state *, cs,
4111             REMOVE4args *, args);
4112 
4113         /* CURRENT_FH: directory */
4114         dvp = cs->vp;
4115         if (dvp == NULL) {
4116                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
4117                 goto out;
4118         }
4119 
4120         if (cs->access == CS_ACCESS_DENIED) {
4121                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
4122                 goto out;
4123         }
4124 
4125         /*
4126          * If there is an unshared filesystem mounted on this vnode,
4127          * Do not allow to remove anything in this directory.
4128          */
4129         if (vn_ismntpt(dvp)) {
4130                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
4131                 goto out;
4132         }
4133 
4134         if (dvp->v_type != VDIR) {
4135                 *cs->statusp = resp->status = NFS4ERR_NOTDIR;
4136                 goto out;
4137         }
4138 
4139         status = utf8_dir_verify(&args->target);
4140         if (status != NFS4_OK) {
4141                 *cs->statusp = resp->status = status;
4142                 goto out;
4143         }
4144 
4145         /*
4146          * Lookup the file so that we can check if it's a directory
4147          */
4148         nm = utf8_to_fn(&args->target, &len, NULL);
4149         if (nm == NULL) {
4150                 *cs->statusp = resp->status = NFS4ERR_INVAL;
4151                 goto out;
4152         }
4153 
4154         if (len > MAXNAMELEN) {
4155                 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
4156                 kmem_free(nm, len);
4157                 goto out;
4158         }
4159 
4160         if (rdonly4(req, cs)) {
4161                 *cs->statusp = resp->status = NFS4ERR_ROFS;
4162                 kmem_free(nm, len);
4163                 goto out;
4164         }
4165 
4166         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
4167         name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
4168             MAXPATHLEN  + 1);
4169 
4170         if (name == NULL) {
4171                 *cs->statusp = resp->status = NFS4ERR_INVAL;
4172                 kmem_free(nm, len);
4173                 goto out;
4174         }
4175 
4176         /*
4177          * Lookup the file to determine type and while we are see if
4178          * there is a file struct around and check for delegation.
4179          * We don't need to acquire va_seq before this lookup, if
4180          * it causes an update, cinfo.before will not match, which will
4181          * trigger a cache flush even if atomic is TRUE.
4182          */
4183         if (fp = rfs4_lookup_and_findfile(dvp, name, &vp, &error, cs->cr)) {
4184                 if (rfs4_check_delegated_byfp(FWRITE, fp, TRUE, TRUE, TRUE,
4185                     NULL)) {
4186                         VN_RELE(vp);
4187                         rfs4_file_rele(fp);
4188                         *cs->statusp = resp->status = NFS4ERR_DELAY;
4189                         if (nm != name)
4190                                 kmem_free(name, MAXPATHLEN + 1);
4191                         kmem_free(nm, len);
4192                         goto out;
4193                 }
4194         }
4195 
4196         /* Didn't find anything to remove */
4197         if (vp == NULL) {
4198                 *cs->statusp = resp->status = error;
4199                 if (nm != name)
4200                         kmem_free(name, MAXPATHLEN + 1);
4201                 kmem_free(nm, len);
4202                 goto out;
4203         }
4204 
4205         if (nbl_need_check(vp)) {
4206                 nbl_start_crit(vp, RW_READER);
4207                 in_crit = 1;
4208                 if (nbl_conflict(vp, NBL_REMOVE, 0, 0, 0, NULL)) {
4209                         *cs->statusp = resp->status = NFS4ERR_FILE_OPEN;
4210                         if (nm != name)
4211                                 kmem_free(name, MAXPATHLEN + 1);
4212                         kmem_free(nm, len);
4213                         nbl_end_crit(vp);
4214                         VN_RELE(vp);
4215                         if (fp) {
4216                                 rfs4_clear_dont_grant(fp);
4217                                 rfs4_file_rele(fp);
4218                         }
4219                         goto out;
4220                 }
4221         }
4222 
4223         /* check label before allowing removal */
4224         if (is_system_labeled()) {
4225                 ASSERT(req->rq_label != NULL);
4226                 clabel = req->rq_label;
4227                 DTRACE_PROBE2(tx__rfs4__log__info__opremove__clabel, char *,
4228                     "got client label from request(1)",
4229                     struct svc_req *, req);
4230                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
4231                         if (!do_rfs_label_check(clabel, vp, EQUALITY_CHECK,
4232                             cs->exi)) {
4233                                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
4234                                 if (name != nm)
4235                                         kmem_free(name, MAXPATHLEN + 1);
4236                                 kmem_free(nm, len);
4237                                 if (in_crit)
4238                                         nbl_end_crit(vp);
4239                                 VN_RELE(vp);
4240                                 if (fp) {
4241                                         rfs4_clear_dont_grant(fp);
4242                                         rfs4_file_rele(fp);
4243                                 }
4244                                 goto out;
4245                         }
4246                 }
4247         }
4248 
4249         /* Get dir "before" change value */
4250         bdva.va_mask = AT_CTIME|AT_SEQ;
4251         error = VOP_GETATTR(dvp, &bdva, 0, cs->cr, NULL);
4252         if (error) {
4253                 *cs->statusp = resp->status = puterrno4(error);
4254                 if (nm != name)
4255                         kmem_free(name, MAXPATHLEN + 1);
4256                 kmem_free(nm, len);
4257                 if (in_crit)
4258                         nbl_end_crit(vp);
4259                 VN_RELE(vp);
4260                 if (fp) {
4261                         rfs4_clear_dont_grant(fp);
4262                         rfs4_file_rele(fp);
4263                 }
4264                 goto out;
4265         }
4266         NFS4_SET_FATTR4_CHANGE(resp->cinfo.before, bdva.va_ctime)
4267 
4268         /* Actually do the REMOVE operation */
4269         if (vp->v_type == VDIR) {
4270                 /*
4271                  * Can't remove a directory that has a mounted-on filesystem.
4272                  */
4273                 if (vn_ismntpt(vp)) {
4274                         error = EACCES;
4275                 } else {
4276                         /*
4277                          * System V defines rmdir to return EEXIST,
4278                          * not ENOTEMPTY, if the directory is not
4279                          * empty.  A System V NFS server needs to map
4280                          * NFS4ERR_EXIST to NFS4ERR_NOTEMPTY to
4281                          * transmit over the wire.
4282                          */
4283                         if ((error = VOP_RMDIR(dvp, name, rootdir, cs->cr,
4284                             NULL, 0)) == EEXIST)
4285                                 error = ENOTEMPTY;
4286                 }
4287         } else {
4288                 if ((error = VOP_REMOVE(dvp, name, cs->cr, NULL, 0)) == 0 &&
4289                     fp != NULL) {
4290                         struct vattr va;
4291                         vnode_t *tvp;
4292 
4293                         rfs4_dbe_lock(fp->rf_dbe);
4294                         tvp = fp->rf_vp;
4295                         if (tvp)
4296                                 VN_HOLD(tvp);
4297                         rfs4_dbe_unlock(fp->rf_dbe);
4298 
4299                         if (tvp) {
4300                                 /*
4301                                  * This is va_seq safe because we are not
4302                                  * manipulating dvp.
4303                                  */
4304                                 va.va_mask = AT_NLINK;
4305                                 if (!VOP_GETATTR(tvp, &va, 0, cs->cr, NULL) &&
4306                                     va.va_nlink == 0) {
4307                                         /* Remove state on file remove */
4308                                         if (in_crit) {
4309                                                 nbl_end_crit(vp);
4310                                                 in_crit = 0;
4311                                         }
4312                                         rfs4_close_all_state(fp);
4313                                 }
4314                                 VN_RELE(tvp);
4315                         }
4316                 }
4317         }
4318 
4319         if (in_crit)
4320                 nbl_end_crit(vp);
4321         VN_RELE(vp);
4322 
4323         if (fp) {
4324                 rfs4_clear_dont_grant(fp);
4325                 rfs4_file_rele(fp);
4326         }
4327         if (nm != name)
4328                 kmem_free(name, MAXPATHLEN + 1);
4329         kmem_free(nm, len);
4330 
4331         if (error) {
4332                 *cs->statusp = resp->status = puterrno4(error);
4333                 goto out;
4334         }
4335 
4336         /*
4337          * Get the initial "after" sequence number, if it fails, set to zero
4338          */
4339         idva.va_mask = AT_SEQ;
4340         if (VOP_GETATTR(dvp, &idva, 0, cs->cr, NULL))
4341                 idva.va_seq = 0;
4342 
4343         /*
4344          * Force modified data and metadata out to stable storage.
4345          */
4346         (void) VOP_FSYNC(dvp, 0, cs->cr, NULL);
4347 
4348         /*
4349          * Get "after" change value, if it fails, simply return the
4350          * before value.
4351          */
4352         adva.va_mask = AT_CTIME|AT_SEQ;
4353         if (VOP_GETATTR(dvp, &adva, 0, cs->cr, NULL)) {
4354                 adva.va_ctime = bdva.va_ctime;
4355                 adva.va_seq = 0;
4356         }
4357 
4358         NFS4_SET_FATTR4_CHANGE(resp->cinfo.after, adva.va_ctime)
4359 
4360         /*
4361          * The cinfo.atomic = TRUE only if we have
4362          * non-zero va_seq's, and it has incremented by exactly one
4363          * during the VOP_REMOVE/RMDIR and it didn't change during
4364          * the VOP_FSYNC.
4365          */
4366         if (bdva.va_seq && idva.va_seq && adva.va_seq &&
4367             idva.va_seq == (bdva.va_seq + 1) && idva.va_seq == adva.va_seq)
4368                 resp->cinfo.atomic = TRUE;
4369         else
4370                 resp->cinfo.atomic = FALSE;
4371 
4372         *cs->statusp = resp->status = NFS4_OK;
4373 
4374 out:
4375         DTRACE_NFSV4_2(op__remove__done, struct compound_state *, cs,
4376             REMOVE4res *, resp);
4377 }
4378 
4379 /*
4380  * rename: args: SAVED_FH: from directory, CURRENT_FH: target directory,
4381  *              oldname and newname.
4382  *      res: status. If success - CURRENT_FH unchanged, return change_info
4383  *              for both from and target directories.
4384  */
4385 /* ARGSUSED */
4386 static void
4387 rfs4_op_rename(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
4388     struct compound_state *cs)
4389 {
4390         RENAME4args *args = &argop->nfs_argop4_u.oprename;
4391         RENAME4res *resp = &resop->nfs_resop4_u.oprename;
4392         int error;
4393         vnode_t *odvp;
4394         vnode_t *ndvp;
4395         vnode_t *srcvp, *targvp;
4396         struct vattr obdva, oidva, oadva;
4397         struct vattr nbdva, nidva, nadva;
4398         char *onm, *nnm;
4399         uint_t olen, nlen;
4400         rfs4_file_t *fp, *sfp;
4401         int in_crit_src, in_crit_targ;
4402         int fp_rele_grant_hold, sfp_rele_grant_hold;
4403         bslabel_t *clabel;
4404         struct sockaddr *ca;
4405         char *converted_onm = NULL;
4406         char *converted_nnm = NULL;
4407         nfsstat4 status;
4408 
4409         DTRACE_NFSV4_2(op__rename__start, struct compound_state *, cs,
4410             RENAME4args *, args);
4411 
4412         fp = sfp = NULL;
4413         srcvp = targvp = NULL;
4414         in_crit_src = in_crit_targ = 0;
4415         fp_rele_grant_hold = sfp_rele_grant_hold = 0;
4416 
4417         /* CURRENT_FH: target directory */
4418         ndvp = cs->vp;
4419         if (ndvp == NULL) {
4420                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
4421                 goto out;
4422         }
4423 
4424         /* SAVED_FH: from directory */
4425         odvp = cs->saved_vp;
4426         if (odvp == NULL) {
4427                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
4428                 goto out;
4429         }
4430 
4431         if (cs->access == CS_ACCESS_DENIED) {
4432                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
4433                 goto out;
4434         }
4435 
4436         /*
4437          * If there is an unshared filesystem mounted on this vnode,
4438          * do not allow to rename objects in this directory.
4439          */
4440         if (vn_ismntpt(odvp)) {
4441                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
4442                 goto out;
4443         }
4444 
4445         /*
4446          * If there is an unshared filesystem mounted on this vnode,
4447          * do not allow to rename to this directory.
4448          */
4449         if (vn_ismntpt(ndvp)) {
4450                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
4451                 goto out;
4452         }
4453 
4454         if (odvp->v_type != VDIR || ndvp->v_type != VDIR) {
4455                 *cs->statusp = resp->status = NFS4ERR_NOTDIR;
4456                 goto out;
4457         }
4458 
4459         if (cs->saved_exi != cs->exi) {
4460                 *cs->statusp = resp->status = NFS4ERR_XDEV;
4461                 goto out;
4462         }
4463 
4464         status = utf8_dir_verify(&args->oldname);
4465         if (status != NFS4_OK) {
4466                 *cs->statusp = resp->status = status;
4467                 goto out;
4468         }
4469 
4470         status = utf8_dir_verify(&args->newname);
4471         if (status != NFS4_OK) {
4472                 *cs->statusp = resp->status = status;
4473                 goto out;
4474         }
4475 
4476         onm = utf8_to_fn(&args->oldname, &olen, NULL);
4477         if (onm == NULL) {
4478                 *cs->statusp = resp->status = NFS4ERR_INVAL;
4479                 goto out;
4480         }
4481         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
4482         nlen = MAXPATHLEN + 1;
4483         converted_onm = nfscmd_convname(ca, cs->exi, onm, NFSCMD_CONV_INBOUND,
4484             nlen);
4485 
4486         if (converted_onm == NULL) {
4487                 *cs->statusp = resp->status = NFS4ERR_INVAL;
4488                 kmem_free(onm, olen);
4489                 goto out;
4490         }
4491 
4492         nnm = utf8_to_fn(&args->newname, &nlen, NULL);
4493         if (nnm == NULL) {
4494                 *cs->statusp = resp->status = NFS4ERR_INVAL;
4495                 if (onm != converted_onm)
4496                         kmem_free(converted_onm, MAXPATHLEN + 1);
4497                 kmem_free(onm, olen);
4498                 goto out;
4499         }
4500         converted_nnm = nfscmd_convname(ca, cs->exi, nnm, NFSCMD_CONV_INBOUND,
4501             MAXPATHLEN  + 1);
4502 
4503         if (converted_nnm == NULL) {
4504                 *cs->statusp = resp->status = NFS4ERR_INVAL;
4505                 kmem_free(nnm, nlen);
4506                 nnm = NULL;
4507                 if (onm != converted_onm)
4508                         kmem_free(converted_onm, MAXPATHLEN + 1);
4509                 kmem_free(onm, olen);
4510                 goto out;
4511         }
4512 
4513 
4514         if (olen > MAXNAMELEN || nlen > MAXNAMELEN) {
4515                 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
4516                 kmem_free(onm, olen);
4517                 kmem_free(nnm, nlen);
4518                 goto out;
4519         }
4520 
4521 
4522         if (rdonly4(req, cs)) {
4523                 *cs->statusp = resp->status = NFS4ERR_ROFS;
4524                 if (onm != converted_onm)
4525                         kmem_free(converted_onm, MAXPATHLEN + 1);
4526                 kmem_free(onm, olen);
4527                 if (nnm != converted_nnm)
4528                         kmem_free(converted_nnm, MAXPATHLEN + 1);
4529                 kmem_free(nnm, nlen);
4530                 goto out;
4531         }
4532 
4533         /* check label of the target dir */
4534         if (is_system_labeled()) {
4535                 ASSERT(req->rq_label != NULL);
4536                 clabel = req->rq_label;
4537                 DTRACE_PROBE2(tx__rfs4__log__info__oprename__clabel, char *,
4538                     "got client label from request(1)",
4539                     struct svc_req *, req);
4540                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
4541                         if (!do_rfs_label_check(clabel, ndvp,
4542                             EQUALITY_CHECK, cs->exi)) {
4543                                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
4544                                 goto err_out;
4545                         }
4546                 }
4547         }
4548 
4549         /*
4550          * Is the source a file and have a delegation?
4551          * We don't need to acquire va_seq before these lookups, if
4552          * it causes an update, cinfo.before will not match, which will
4553          * trigger a cache flush even if atomic is TRUE.
4554          */
4555         if (sfp = rfs4_lookup_and_findfile(odvp, converted_onm, &srcvp,
4556             &error, cs->cr)) {
4557                 if (rfs4_check_delegated_byfp(FWRITE, sfp, TRUE, TRUE, TRUE,
4558                     NULL)) {
4559                         *cs->statusp = resp->status = NFS4ERR_DELAY;
4560                         goto err_out;
4561                 }
4562         }
4563 
4564         if (srcvp == NULL) {
4565                 *cs->statusp = resp->status = puterrno4(error);
4566                 if (onm != converted_onm)
4567                         kmem_free(converted_onm, MAXPATHLEN + 1);
4568                 kmem_free(onm, olen);
4569                 if (nnm != converted_nnm)
4570                         kmem_free(converted_nnm, MAXPATHLEN + 1);
4571                 kmem_free(nnm, nlen);
4572                 goto out;
4573         }
4574 
4575         sfp_rele_grant_hold = 1;
4576 
4577         /* Does the destination exist and a file and have a delegation? */
4578         if (fp = rfs4_lookup_and_findfile(ndvp, converted_nnm, &targvp,
4579             NULL, cs->cr)) {
4580                 if (rfs4_check_delegated_byfp(FWRITE, fp, TRUE, TRUE, TRUE,
4581                     NULL)) {
4582                         *cs->statusp = resp->status = NFS4ERR_DELAY;
4583                         goto err_out;
4584                 }
4585         }
4586         fp_rele_grant_hold = 1;
4587 
4588 
4589         /* Check for NBMAND lock on both source and target */
4590         if (nbl_need_check(srcvp)) {
4591                 nbl_start_crit(srcvp, RW_READER);
4592                 in_crit_src = 1;
4593                 if (nbl_conflict(srcvp, NBL_RENAME, 0, 0, 0, NULL)) {
4594                         *cs->statusp = resp->status = NFS4ERR_FILE_OPEN;
4595                         goto err_out;
4596                 }
4597         }
4598 
4599         if (targvp && nbl_need_check(targvp)) {
4600                 nbl_start_crit(targvp, RW_READER);
4601                 in_crit_targ = 1;
4602                 if (nbl_conflict(targvp, NBL_REMOVE, 0, 0, 0, NULL)) {
4603                         *cs->statusp = resp->status = NFS4ERR_FILE_OPEN;
4604                         goto err_out;
4605                 }
4606         }
4607 
4608         /* Get source "before" change value */
4609         obdva.va_mask = AT_CTIME|AT_SEQ;
4610         error = VOP_GETATTR(odvp, &obdva, 0, cs->cr, NULL);
4611         if (!error) {
4612                 nbdva.va_mask = AT_CTIME|AT_SEQ;
4613                 error = VOP_GETATTR(ndvp, &nbdva, 0, cs->cr, NULL);
4614         }
4615         if (error) {
4616                 *cs->statusp = resp->status = puterrno4(error);
4617                 goto err_out;
4618         }
4619 
4620         NFS4_SET_FATTR4_CHANGE(resp->source_cinfo.before, obdva.va_ctime)
4621         NFS4_SET_FATTR4_CHANGE(resp->target_cinfo.before, nbdva.va_ctime)
4622 
4623         if ((error = VOP_RENAME(odvp, converted_onm, ndvp, converted_nnm,
4624             cs->cr, NULL, 0)) == 0 && fp != NULL) {
4625                 struct vattr va;
4626                 vnode_t *tvp;
4627 
4628                 rfs4_dbe_lock(fp->rf_dbe);
4629                 tvp = fp->rf_vp;
4630                 if (tvp)
4631                         VN_HOLD(tvp);
4632                 rfs4_dbe_unlock(fp->rf_dbe);
4633 
4634                 if (tvp) {
4635                         va.va_mask = AT_NLINK;
4636                         if (!VOP_GETATTR(tvp, &va, 0, cs->cr, NULL) &&
4637                             va.va_nlink == 0) {
4638                                 /* The file is gone and so should the state */
4639                                 if (in_crit_targ) {
4640                                         nbl_end_crit(targvp);
4641                                         in_crit_targ = 0;
4642                                 }
4643                                 rfs4_close_all_state(fp);
4644                         }
4645                         VN_RELE(tvp);
4646                 }
4647         }
4648         if (error == 0)
4649                 vn_renamepath(ndvp, srcvp, nnm, nlen - 1);
4650 
4651         if (in_crit_src)
4652                 nbl_end_crit(srcvp);
4653         if (srcvp)
4654                 VN_RELE(srcvp);
4655         if (in_crit_targ)
4656                 nbl_end_crit(targvp);
4657         if (targvp)
4658                 VN_RELE(targvp);
4659 
4660         if (sfp) {
4661                 rfs4_clear_dont_grant(sfp);
4662                 rfs4_file_rele(sfp);
4663         }
4664         if (fp) {
4665                 rfs4_clear_dont_grant(fp);
4666                 rfs4_file_rele(fp);
4667         }
4668 
4669         if (converted_onm != onm)
4670                 kmem_free(converted_onm, MAXPATHLEN + 1);
4671         kmem_free(onm, olen);
4672         if (converted_nnm != nnm)
4673                 kmem_free(converted_nnm, MAXPATHLEN + 1);
4674         kmem_free(nnm, nlen);
4675 
4676         /*
4677          * Get the initial "after" sequence number, if it fails, set to zero
4678          */
4679         oidva.va_mask = AT_SEQ;
4680         if (VOP_GETATTR(odvp, &oidva, 0, cs->cr, NULL))
4681                 oidva.va_seq = 0;
4682 
4683         nidva.va_mask = AT_SEQ;
4684         if (VOP_GETATTR(ndvp, &nidva, 0, cs->cr, NULL))
4685                 nidva.va_seq = 0;
4686 
4687         /*
4688          * Force modified data and metadata out to stable storage.
4689          */
4690         (void) VOP_FSYNC(odvp, 0, cs->cr, NULL);
4691         (void) VOP_FSYNC(ndvp, 0, cs->cr, NULL);
4692 
4693         if (error) {
4694                 *cs->statusp = resp->status = puterrno4(error);
4695                 goto out;
4696         }
4697 
4698         /*
4699          * Get "after" change values, if it fails, simply return the
4700          * before value.
4701          */
4702         oadva.va_mask = AT_CTIME|AT_SEQ;
4703         if (VOP_GETATTR(odvp, &oadva, 0, cs->cr, NULL)) {
4704                 oadva.va_ctime = obdva.va_ctime;
4705                 oadva.va_seq = 0;
4706         }
4707 
4708         nadva.va_mask = AT_CTIME|AT_SEQ;
4709         if (VOP_GETATTR(odvp, &nadva, 0, cs->cr, NULL)) {
4710                 nadva.va_ctime = nbdva.va_ctime;
4711                 nadva.va_seq = 0;
4712         }
4713 
4714         NFS4_SET_FATTR4_CHANGE(resp->source_cinfo.after, oadva.va_ctime)
4715         NFS4_SET_FATTR4_CHANGE(resp->target_cinfo.after, nadva.va_ctime)
4716 
4717         /*
4718          * The cinfo.atomic = TRUE only if we have
4719          * non-zero va_seq's, and it has incremented by exactly one
4720          * during the VOP_RENAME and it didn't change during the VOP_FSYNC.
4721          */
4722         if (obdva.va_seq && oidva.va_seq && oadva.va_seq &&
4723             oidva.va_seq == (obdva.va_seq + 1) && oidva.va_seq == oadva.va_seq)
4724                 resp->source_cinfo.atomic = TRUE;
4725         else
4726                 resp->source_cinfo.atomic = FALSE;
4727 
4728         if (nbdva.va_seq && nidva.va_seq && nadva.va_seq &&
4729             nidva.va_seq == (nbdva.va_seq + 1) && nidva.va_seq == nadva.va_seq)
4730                 resp->target_cinfo.atomic = TRUE;
4731         else
4732                 resp->target_cinfo.atomic = FALSE;
4733 
4734 #ifdef  VOLATILE_FH_TEST
4735         {
4736         extern void add_volrnm_fh(struct exportinfo *, vnode_t *);
4737 
4738         /*
4739          * Add the renamed file handle to the volatile rename list
4740          */
4741         if (cs->exi->exi_export.ex_flags & EX_VOLRNM) {
4742                 /* file handles may expire on rename */
4743                 vnode_t *vp;
4744 
4745                 nnm = utf8_to_fn(&args->newname, &nlen, NULL);
4746                 /*
4747                  * Already know that nnm will be a valid string
4748                  */
4749                 error = VOP_LOOKUP(ndvp, nnm, &vp, NULL, 0, NULL, cs->cr,
4750                     NULL, NULL, NULL);
4751                 kmem_free(nnm, nlen);
4752                 if (!error) {
4753                         add_volrnm_fh(cs->exi, vp);
4754                         VN_RELE(vp);
4755                 }
4756         }
4757         }
4758 #endif  /* VOLATILE_FH_TEST */
4759 
4760         *cs->statusp = resp->status = NFS4_OK;
4761 out:
4762         DTRACE_NFSV4_2(op__rename__done, struct compound_state *, cs,
4763             RENAME4res *, resp);
4764         return;
4765 
4766 err_out:
4767         if (onm != converted_onm)
4768                 kmem_free(converted_onm, MAXPATHLEN + 1);
4769         if (onm != NULL)
4770                 kmem_free(onm, olen);
4771         if (nnm != converted_nnm)
4772                 kmem_free(converted_nnm, MAXPATHLEN + 1);
4773         if (nnm != NULL)
4774                 kmem_free(nnm, nlen);
4775 
4776         if (in_crit_src) nbl_end_crit(srcvp);
4777         if (in_crit_targ) nbl_end_crit(targvp);
4778         if (targvp) VN_RELE(targvp);
4779         if (srcvp) VN_RELE(srcvp);
4780         if (sfp) {
4781                 if (sfp_rele_grant_hold) rfs4_clear_dont_grant(sfp);
4782                 rfs4_file_rele(sfp);
4783         }
4784         if (fp) {
4785                 if (fp_rele_grant_hold) rfs4_clear_dont_grant(fp);
4786                 rfs4_file_rele(fp);
4787         }
4788 
4789         DTRACE_NFSV4_2(op__rename__done, struct compound_state *, cs,
4790             RENAME4res *, resp);
4791 }
4792 
4793 /* ARGSUSED */
4794 static void
4795 rfs4_op_renew(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
4796     struct compound_state *cs)
4797 {
4798         RENEW4args *args = &argop->nfs_argop4_u.oprenew;
4799         RENEW4res *resp = &resop->nfs_resop4_u.oprenew;
4800         rfs4_client_t *cp;
4801 
4802         DTRACE_NFSV4_2(op__renew__start, struct compound_state *, cs,
4803             RENEW4args *, args);
4804 
4805         if ((cp = rfs4_findclient_by_id(args->clientid, FALSE)) == NULL) {
4806                 *cs->statusp = resp->status =
4807                     rfs4_check_clientid(&args->clientid, 0);
4808                 goto out;
4809         }
4810 
4811         if (rfs4_lease_expired(cp)) {
4812                 rfs4_client_rele(cp);
4813                 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
4814                 goto out;
4815         }
4816 
4817         rfs4_update_lease(cp);
4818 
4819         mutex_enter(cp->rc_cbinfo.cb_lock);
4820         if (cp->rc_cbinfo.cb_notified_of_cb_path_down == FALSE) {
4821                 cp->rc_cbinfo.cb_notified_of_cb_path_down = TRUE;
4822                 *cs->statusp = resp->status = NFS4ERR_CB_PATH_DOWN;
4823         } else {
4824                 *cs->statusp = resp->status = NFS4_OK;
4825         }
4826         mutex_exit(cp->rc_cbinfo.cb_lock);
4827 
4828         rfs4_client_rele(cp);
4829 
4830 out:
4831         DTRACE_NFSV4_2(op__renew__done, struct compound_state *, cs,
4832             RENEW4res *, resp);
4833 }
4834 
4835 /* ARGSUSED */
4836 static void
4837 rfs4_op_restorefh(nfs_argop4 *args, nfs_resop4 *resop, struct svc_req *req,
4838     struct compound_state *cs)
4839 {
4840         RESTOREFH4res *resp = &resop->nfs_resop4_u.oprestorefh;
4841 
4842         DTRACE_NFSV4_1(op__restorefh__start, struct compound_state *, cs);
4843 
4844         /* No need to check cs->access - we are not accessing any object */
4845         if ((cs->saved_vp == NULL) || (cs->saved_fh.nfs_fh4_val == NULL)) {
4846                 *cs->statusp = resp->status = NFS4ERR_RESTOREFH;
4847                 goto out;
4848         }
4849         if (cs->vp != NULL) {
4850                 VN_RELE(cs->vp);
4851         }
4852         cs->vp = cs->saved_vp;
4853         cs->saved_vp = NULL;
4854         cs->exi = cs->saved_exi;
4855         nfs_fh4_copy(&cs->saved_fh, &cs->fh);
4856         *cs->statusp = resp->status = NFS4_OK;
4857         cs->deleg = FALSE;
4858 
4859 out:
4860         DTRACE_NFSV4_2(op__restorefh__done, struct compound_state *, cs,
4861             RESTOREFH4res *, resp);
4862 }
4863 
4864 /* ARGSUSED */
4865 static void
4866 rfs4_op_savefh(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
4867     struct compound_state *cs)
4868 {
4869         SAVEFH4res *resp = &resop->nfs_resop4_u.opsavefh;
4870 
4871         DTRACE_NFSV4_1(op__savefh__start, struct compound_state *, cs);
4872 
4873         /* No need to check cs->access - we are not accessing any object */
4874         if (cs->vp == NULL) {
4875                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
4876                 goto out;
4877         }
4878         if (cs->saved_vp != NULL) {
4879                 VN_RELE(cs->saved_vp);
4880         }
4881         cs->saved_vp = cs->vp;
4882         VN_HOLD(cs->saved_vp);
4883         cs->saved_exi = cs->exi;
4884         /*
4885          * since SAVEFH is fairly rare, don't alloc space for its fh
4886          * unless necessary.
4887          */
4888         if (cs->saved_fh.nfs_fh4_val == NULL) {
4889                 cs->saved_fh.nfs_fh4_val = kmem_alloc(NFS4_FHSIZE, KM_SLEEP);
4890         }
4891         nfs_fh4_copy(&cs->fh, &cs->saved_fh);
4892         *cs->statusp = resp->status = NFS4_OK;
4893 
4894 out:
4895         DTRACE_NFSV4_2(op__savefh__done, struct compound_state *, cs,
4896             SAVEFH4res *, resp);
4897 }
4898 
4899 /*
4900  * rfs4_verify_attr is called when nfsv4 Setattr failed, but we wish to
4901  * return the bitmap of attrs that were set successfully. It is also
4902  * called by Verify/Nverify to test the vattr/vfsstat attrs. It should
4903  * always be called only after rfs4_do_set_attrs().
4904  *
4905  * Verify that the attributes are same as the expected ones. sargp->vap
4906  * and sargp->sbp contain the input attributes as translated from fattr4.
4907  *
4908  * This function verifies only the attrs that correspond to a vattr or
4909  * vfsstat struct. That is because of the extra step needed to get the
4910  * corresponding system structs. Other attributes have already been set or
4911  * verified by do_rfs4_set_attrs.
4912  *
4913  * Return 0 if all attrs match, -1 if some don't, error if error processing.
4914  */
4915 static int
4916 rfs4_verify_attr(struct nfs4_svgetit_arg *sargp,
4917     bitmap4 *resp, struct nfs4_ntov_table *ntovp)
4918 {
4919         int error, ret_error = 0;
4920         int i, k;
4921         uint_t sva_mask = sargp->vap->va_mask;
4922         uint_t vbit;
4923         union nfs4_attr_u *na;
4924         uint8_t *amap;
4925         bool_t getsb = ntovp->vfsstat;
4926 
4927         if (sva_mask != 0) {
4928                 /*
4929                  * Okay to overwrite sargp->vap because we verify based
4930                  * on the incoming values.
4931                  */
4932                 ret_error = VOP_GETATTR(sargp->cs->vp, sargp->vap, 0,
4933                     sargp->cs->cr, NULL);
4934                 if (ret_error) {
4935                         if (resp == NULL)
4936                                 return (ret_error);
4937                         /*
4938                          * Must return bitmap of successful attrs
4939                          */
4940                         sva_mask = 0;   /* to prevent checking vap later */
4941                 } else {
4942                         /*
4943                          * Some file systems clobber va_mask. it is probably
4944                          * wrong of them to do so, nonethless we practice
4945                          * defensive coding.
4946                          * See bug id 4276830.
4947                          */
4948                         sargp->vap->va_mask = sva_mask;
4949                 }
4950         }
4951 
4952         if (getsb) {
4953                 /*
4954                  * Now get the superblock and loop on the bitmap, as there is
4955                  * no simple way of translating from superblock to bitmap4.
4956                  */
4957                 ret_error = VFS_STATVFS(sargp->cs->vp->v_vfsp, sargp->sbp);
4958                 if (ret_error) {
4959                         if (resp == NULL)
4960                                 goto errout;
4961                         getsb = FALSE;
4962                 }
4963         }
4964 
4965         /*
4966          * Now loop and verify each attribute which getattr returned
4967          * whether it's the same as the input.
4968          */
4969         if (resp == NULL && !getsb && (sva_mask == 0))
4970                 goto errout;
4971 
4972         na = ntovp->na;
4973         amap = ntovp->amap;
4974         k = 0;
4975         for (i = 0; i < ntovp->attrcnt; i++, na++, amap++) {
4976                 k = *amap;
4977                 ASSERT(nfs4_ntov_map[k].nval == k);
4978                 vbit = nfs4_ntov_map[k].vbit;
4979 
4980                 /*
4981                  * If vattr attribute but VOP_GETATTR failed, or it's
4982                  * superblock attribute but VFS_STATVFS failed, skip
4983                  */
4984                 if (vbit) {
4985                         if ((vbit & sva_mask) == 0)
4986                                 continue;
4987                 } else if (!(getsb && nfs4_ntov_map[k].vfsstat)) {
4988                         continue;
4989                 }
4990                 error = (*nfs4_ntov_map[k].sv_getit)(NFS4ATTR_VERIT, sargp, na);
4991                 if (resp != NULL) {
4992                         if (error)
4993                                 ret_error = -1; /* not all match */
4994                         else    /* update response bitmap */
4995                                 *resp |= nfs4_ntov_map[k].fbit;
4996                         continue;
4997                 }
4998                 if (error) {
4999                         ret_error = -1; /* not all match */
5000                         break;
5001                 }
5002         }
5003 errout:
5004         return (ret_error);
5005 }
5006 
5007 /*
5008  * Decode the attribute to be set/verified. If the attr requires a sys op
5009  * (VOP_GETATTR, VFS_VFSSTAT), and the request is to verify, then don't
5010  * call the sv_getit function for it, because the sys op hasn't yet been done.
5011  * Return 0 for success, error code if failed.
5012  *
5013  * Note: the decoded arg is not freed here but in nfs4_ntov_table_free.
5014  */
5015 static int
5016 decode_fattr4_attr(nfs4_attr_cmd_t cmd, struct nfs4_svgetit_arg *sargp,
5017     int k, XDR *xdrp, bitmap4 *resp_bval, union nfs4_attr_u *nap)
5018 {
5019         int error = 0;
5020         bool_t set_later;
5021 
5022         sargp->vap->va_mask |= nfs4_ntov_map[k].vbit;
5023 
5024         if ((*nfs4_ntov_map[k].xfunc)(xdrp, nap)) {
5025                 set_later = nfs4_ntov_map[k].vbit || nfs4_ntov_map[k].vfsstat;
5026                 /*
5027                  * don't verify yet if a vattr or sb dependent attr,
5028                  * because we don't have their sys values yet.
5029                  * Will be done later.
5030                  */
5031                 if (! (set_later && (cmd == NFS4ATTR_VERIT))) {
5032                         /*
5033                          * ACLs are a special case, since setting the MODE
5034                          * conflicts with setting the ACL.  We delay setting
5035                          * the ACL until all other attributes have been set.
5036                          * The ACL gets set in do_rfs4_op_setattr().
5037                          */
5038                         if (nfs4_ntov_map[k].fbit != FATTR4_ACL_MASK) {
5039                                 error = (*nfs4_ntov_map[k].sv_getit)(cmd,
5040                                     sargp, nap);
5041                                 if (error) {
5042                                         xdr_free(nfs4_ntov_map[k].xfunc,
5043                                             (caddr_t)nap);
5044                                 }
5045                         }
5046                 }
5047         } else {
5048 #ifdef  DEBUG
5049                 cmn_err(CE_NOTE, "decode_fattr4_attr: error "
5050                     "decoding attribute %d\n", k);
5051 #endif
5052                 error = EINVAL;
5053         }
5054         if (!error && resp_bval && !set_later) {
5055                 *resp_bval |= nfs4_ntov_map[k].fbit;
5056         }
5057 
5058         return (error);
5059 }
5060 
5061 /*
5062  * Set vattr based on incoming fattr4 attrs - used by setattr.
5063  * Set response mask. Ignore any values that are not writable vattr attrs.
5064  */
5065 static nfsstat4
5066 do_rfs4_set_attrs(bitmap4 *resp, fattr4 *fattrp, struct compound_state *cs,
5067     struct nfs4_svgetit_arg *sargp, struct nfs4_ntov_table *ntovp,
5068     nfs4_attr_cmd_t cmd)
5069 {
5070         int error = 0;
5071         int i;
5072         char *attrs = fattrp->attrlist4;
5073         uint32_t attrslen = fattrp->attrlist4_len;
5074         XDR xdr;
5075         nfsstat4 status = NFS4_OK;
5076         vnode_t *vp = cs->vp;
5077         union nfs4_attr_u *na;
5078         uint8_t *amap;
5079 
5080 #ifndef lint
5081         /*
5082          * Make sure that maximum attribute number can be expressed as an
5083          * 8 bit quantity.
5084          */
5085         ASSERT(NFS4_MAXNUM_ATTRS <= (UINT8_MAX + 1));
5086 #endif
5087 
5088         if (vp == NULL) {
5089                 if (resp)
5090                         *resp = 0;
5091                 return (NFS4ERR_NOFILEHANDLE);
5092         }
5093         if (cs->access == CS_ACCESS_DENIED) {
5094                 if (resp)
5095                         *resp = 0;
5096                 return (NFS4ERR_ACCESS);
5097         }
5098 
5099         sargp->op = cmd;
5100         sargp->cs = cs;
5101         sargp->flag = 0;     /* may be set later */
5102         sargp->vap->va_mask = 0;
5103         sargp->rdattr_error = NFS4_OK;
5104         sargp->rdattr_error_req = FALSE;
5105         /* sargp->sbp is set by the caller */
5106 
5107         xdrmem_create(&xdr, attrs, attrslen, XDR_DECODE);
5108 
5109         na = ntovp->na;
5110         amap = ntovp->amap;
5111 
5112         /*
5113          * The following loop iterates on the nfs4_ntov_map checking
5114          * if the fbit is set in the requested bitmap.
5115          * If set then we process the arguments using the
5116          * rfs4_fattr4 conversion functions to populate the setattr
5117          * vattr and va_mask. Any settable attrs that are not using vattr
5118          * will be set in this loop.
5119          */
5120         for (i = 0; i < nfs4_ntov_map_size; i++) {
5121                 if (!(fattrp->attrmask & nfs4_ntov_map[i].fbit)) {
5122                         continue;
5123                 }
5124                 /*
5125                  * If setattr, must be a writable attr.
5126                  * If verify/nverify, must be a readable attr.
5127                  */
5128                 if ((error = (*nfs4_ntov_map[i].sv_getit)(
5129                     NFS4ATTR_SUPPORTED, sargp, NULL)) != 0) {
5130                         /*
5131                          * Client tries to set/verify an
5132                          * unsupported attribute, tries to set
5133                          * a read only attr or verify a write
5134                          * only one - error!
5135                          */
5136                         break;
5137                 }
5138                 /*
5139                  * Decode the attribute to set/verify
5140                  */
5141                 error = decode_fattr4_attr(cmd, sargp, nfs4_ntov_map[i].nval,
5142                     &xdr, resp ? resp : NULL, na);
5143                 if (error)
5144                         break;
5145                 *amap++ = (uint8_t)nfs4_ntov_map[i].nval;
5146                 na++;
5147                 (ntovp->attrcnt)++;
5148                 if (nfs4_ntov_map[i].vfsstat)
5149                         ntovp->vfsstat = TRUE;
5150         }
5151 
5152         if (error != 0)
5153                 status = (error == ENOTSUP ? NFS4ERR_ATTRNOTSUPP :
5154                     puterrno4(error));
5155         /* xdrmem_destroy(&xdrs); */        /* NO-OP */
5156         return (status);
5157 }
5158 
5159 static nfsstat4
5160 do_rfs4_op_setattr(bitmap4 *resp, fattr4 *fattrp, struct compound_state *cs,
5161     stateid4 *stateid)
5162 {
5163         int error = 0;
5164         struct nfs4_svgetit_arg sarg;
5165         bool_t trunc;
5166 
5167         nfsstat4 status = NFS4_OK;
5168         cred_t *cr = cs->cr;
5169         vnode_t *vp = cs->vp;
5170         struct nfs4_ntov_table ntov;
5171         struct statvfs64 sb;
5172         struct vattr bva;
5173         struct flock64 bf;
5174         int in_crit = 0;
5175         uint_t saved_mask = 0;
5176         caller_context_t ct;
5177 
5178         *resp = 0;
5179         sarg.sbp = &sb;
5180         sarg.is_referral = B_FALSE;
5181         nfs4_ntov_table_init(&ntov);
5182         status = do_rfs4_set_attrs(resp, fattrp, cs, &sarg, &ntov,
5183             NFS4ATTR_SETIT);
5184         if (status != NFS4_OK) {
5185                 /*
5186                  * failed set attrs
5187                  */
5188                 goto done;
5189         }
5190         if ((sarg.vap->va_mask == 0) &&
5191             (! (fattrp->attrmask & FATTR4_ACL_MASK))) {
5192                 /*
5193                  * no further work to be done
5194                  */
5195                 goto done;
5196         }
5197 
5198         /*
5199          * If we got a request to set the ACL and the MODE, only
5200          * allow changing VSUID, VSGID, and VSVTX.  Attempting
5201          * to change any other bits, along with setting an ACL,
5202          * gives NFS4ERR_INVAL.
5203          */
5204         if ((fattrp->attrmask & FATTR4_ACL_MASK) &&
5205             (fattrp->attrmask & FATTR4_MODE_MASK)) {
5206                 vattr_t va;
5207 
5208                 va.va_mask = AT_MODE;
5209                 error = VOP_GETATTR(vp, &va, 0, cs->cr, NULL);
5210                 if (error) {
5211                         status = puterrno4(error);
5212                         goto done;
5213                 }
5214                 if ((sarg.vap->va_mode ^ va.va_mode) &
5215                     ~(VSUID | VSGID | VSVTX)) {
5216                         status = NFS4ERR_INVAL;
5217                         goto done;
5218                 }
5219         }
5220 
5221         /* Check stateid only if size has been set */
5222         if (sarg.vap->va_mask & AT_SIZE) {
5223                 trunc = (sarg.vap->va_size == 0);
5224                 status = rfs4_check_stateid(FWRITE, cs->vp, stateid,
5225                     trunc, &cs->deleg, sarg.vap->va_mask & AT_SIZE, &ct);
5226                 if (status != NFS4_OK)
5227                         goto done;
5228         } else {
5229                 ct.cc_sysid = 0;
5230                 ct.cc_pid = 0;
5231                 ct.cc_caller_id = nfs4_srv_caller_id;
5232                 ct.cc_flags = CC_DONTBLOCK;
5233         }
5234 
5235         /* XXX start of possible race with delegations */
5236 
5237         /*
5238          * We need to specially handle size changes because it is
5239          * possible for the client to create a file with read-only
5240          * modes, but with the file opened for writing. If the client
5241          * then tries to set the file size, e.g. ftruncate(3C),
5242          * fcntl(F_FREESP), the normal access checking done in
5243          * VOP_SETATTR would prevent the client from doing it even though
5244          * it should be allowed to do so.  To get around this, we do the
5245          * access checking for ourselves and use VOP_SPACE which doesn't
5246          * do the access checking.
5247          * Also the client should not be allowed to change the file
5248          * size if there is a conflicting non-blocking mandatory lock in
5249          * the region of the change.
5250          */
5251         if (vp->v_type == VREG && (sarg.vap->va_mask & AT_SIZE)) {
5252                 u_offset_t offset;
5253                 ssize_t length;
5254 
5255                 /*
5256                  * ufs_setattr clears AT_SIZE from vap->va_mask, but
5257                  * before returning, sarg.vap->va_mask is used to
5258                  * generate the setattr reply bitmap.  We also clear
5259                  * AT_SIZE below before calling VOP_SPACE.  For both
5260                  * of these cases, the va_mask needs to be saved here
5261                  * and restored after calling VOP_SETATTR.
5262                  */
5263                 saved_mask = sarg.vap->va_mask;
5264 
5265                 /*
5266                  * Check any possible conflict due to NBMAND locks.
5267                  * Get into critical region before VOP_GETATTR, so the
5268                  * size attribute is valid when checking conflicts.
5269                  */
5270                 if (nbl_need_check(vp)) {
5271                         nbl_start_crit(vp, RW_READER);
5272                         in_crit = 1;
5273                 }
5274 
5275                 bva.va_mask = AT_UID|AT_SIZE;
5276                 if (error = VOP_GETATTR(vp, &bva, 0, cr, &ct)) {
5277                         status = puterrno4(error);
5278                         goto done;
5279                 }
5280 
5281                 if (in_crit) {
5282                         if (sarg.vap->va_size < bva.va_size) {
5283                                 offset = sarg.vap->va_size;
5284                                 length = bva.va_size - sarg.vap->va_size;
5285                         } else {
5286                                 offset = bva.va_size;
5287                                 length = sarg.vap->va_size - bva.va_size;
5288                         }
5289                         if (nbl_conflict(vp, NBL_WRITE, offset, length, 0,
5290                             &ct)) {
5291                                 status = NFS4ERR_LOCKED;
5292                                 goto done;
5293                         }
5294                 }
5295 
5296                 if (crgetuid(cr) == bva.va_uid) {
5297                         sarg.vap->va_mask &= ~AT_SIZE;
5298                         bf.l_type = F_WRLCK;
5299                         bf.l_whence = 0;
5300                         bf.l_start = (off64_t)sarg.vap->va_size;
5301                         bf.l_len = 0;
5302                         bf.l_sysid = 0;
5303                         bf.l_pid = 0;
5304                         error = VOP_SPACE(vp, F_FREESP, &bf, FWRITE,
5305                             (offset_t)sarg.vap->va_size, cr, &ct);
5306                 }
5307         }
5308 
5309         if (!error && sarg.vap->va_mask != 0)
5310                 error = VOP_SETATTR(vp, sarg.vap, sarg.flag, cr, &ct);
5311 
5312         /* restore va_mask -- ufs_setattr clears AT_SIZE */
5313         if (saved_mask & AT_SIZE)
5314                 sarg.vap->va_mask |= AT_SIZE;
5315 
5316         /*
5317          * If an ACL was being set, it has been delayed until now,
5318          * in order to set the mode (via the VOP_SETATTR() above) first.
5319          */
5320         if ((! error) && (fattrp->attrmask & FATTR4_ACL_MASK)) {
5321                 int i;
5322 
5323                 for (i = 0; i < NFS4_MAXNUM_ATTRS; i++)
5324                         if (ntov.amap[i] == FATTR4_ACL)
5325                                 break;
5326                 if (i < NFS4_MAXNUM_ATTRS) {
5327                         error = (*nfs4_ntov_map[FATTR4_ACL].sv_getit)(
5328                             NFS4ATTR_SETIT, &sarg, &ntov.na[i]);
5329                         if (error == 0) {
5330                                 *resp |= FATTR4_ACL_MASK;
5331                         } else if (error == ENOTSUP) {
5332                                 (void) rfs4_verify_attr(&sarg, resp, &ntov);
5333                                 status = NFS4ERR_ATTRNOTSUPP;
5334                                 goto done;
5335                         }
5336                 } else {
5337                         NFS4_DEBUG(rfs4_debug,
5338                             (CE_NOTE, "do_rfs4_op_setattr: "
5339                             "unable to find ACL in fattr4"));
5340                         error = EINVAL;
5341                 }
5342         }
5343 
5344         if (error) {
5345                 /* check if a monitor detected a delegation conflict */
5346                 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK))
5347                         status = NFS4ERR_DELAY;
5348                 else
5349                         status = puterrno4(error);
5350 
5351                 /*
5352                  * Set the response bitmap when setattr failed.
5353                  * If VOP_SETATTR partially succeeded, test by doing a
5354                  * VOP_GETATTR on the object and comparing the data
5355                  * to the setattr arguments.
5356                  */
5357                 (void) rfs4_verify_attr(&sarg, resp, &ntov);
5358         } else {
5359                 /*
5360                  * Force modified metadata out to stable storage.
5361                  */
5362                 (void) VOP_FSYNC(vp, FNODSYNC, cr, &ct);
5363                 /*
5364                  * Set response bitmap
5365                  */
5366                 nfs4_vmask_to_nmask_set(sarg.vap->va_mask, resp);
5367         }
5368 
5369 /* Return early and already have a NFSv4 error */
5370 done:
5371         /*
5372          * Except for nfs4_vmask_to_nmask_set(), vattr --> fattr
5373          * conversion sets both readable and writeable NFS4 attrs
5374          * for AT_MTIME and AT_ATIME.  The line below masks out
5375          * unrequested attrs from the setattr result bitmap.  This
5376          * is placed after the done: label to catch the ATTRNOTSUP
5377          * case.
5378          */
5379         *resp &= fattrp->attrmask;
5380 
5381         if (in_crit)
5382                 nbl_end_crit(vp);
5383 
5384         nfs4_ntov_table_free(&ntov, &sarg);
5385 
5386         return (status);
5387 }
5388 
5389 /* ARGSUSED */
5390 static void
5391 rfs4_op_setattr(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
5392     struct compound_state *cs)
5393 {
5394         SETATTR4args *args = &argop->nfs_argop4_u.opsetattr;
5395         SETATTR4res *resp = &resop->nfs_resop4_u.opsetattr;
5396         bslabel_t *clabel;
5397 
5398         DTRACE_NFSV4_2(op__setattr__start, struct compound_state *, cs,
5399             SETATTR4args *, args);
5400 
5401         if (cs->vp == NULL) {
5402                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
5403                 goto out;
5404         }
5405 
5406         /*
5407          * If there is an unshared filesystem mounted on this vnode,
5408          * do not allow to setattr on this vnode.
5409          */
5410         if (vn_ismntpt(cs->vp)) {
5411                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
5412                 goto out;
5413         }
5414 
5415         resp->attrsset = 0;
5416 
5417         if (rdonly4(req, cs)) {
5418                 *cs->statusp = resp->status = NFS4ERR_ROFS;
5419                 goto out;
5420         }
5421 
5422         /* check label before setting attributes */
5423         if (is_system_labeled()) {
5424                 ASSERT(req->rq_label != NULL);
5425                 clabel = req->rq_label;
5426                 DTRACE_PROBE2(tx__rfs4__log__info__opsetattr__clabel, char *,
5427                     "got client label from request(1)",
5428                     struct svc_req *, req);
5429                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
5430                         if (!do_rfs_label_check(clabel, cs->vp,
5431                             EQUALITY_CHECK, cs->exi)) {
5432                                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
5433                                 goto out;
5434                         }
5435                 }
5436         }
5437 
5438         *cs->statusp = resp->status =
5439             do_rfs4_op_setattr(&resp->attrsset, &args->obj_attributes, cs,
5440             &args->stateid);
5441 
5442 out:
5443         DTRACE_NFSV4_2(op__setattr__done, struct compound_state *, cs,
5444             SETATTR4res *, resp);
5445 }
5446 
5447 /* ARGSUSED */
5448 static void
5449 rfs4_op_verify(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
5450     struct compound_state *cs)
5451 {
5452         /*
5453          * verify and nverify are exactly the same, except that nverify
5454          * succeeds when some argument changed, and verify succeeds when
5455          * when none changed.
5456          */
5457 
5458         VERIFY4args  *args = &argop->nfs_argop4_u.opverify;
5459         VERIFY4res *resp = &resop->nfs_resop4_u.opverify;
5460 
5461         int error;
5462         struct nfs4_svgetit_arg sarg;
5463         struct statvfs64 sb;
5464         struct nfs4_ntov_table ntov;
5465 
5466         DTRACE_NFSV4_2(op__verify__start, struct compound_state *, cs,
5467             VERIFY4args *, args);
5468 
5469         if (cs->vp == NULL) {
5470                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
5471                 goto out;
5472         }
5473 
5474         sarg.sbp = &sb;
5475         sarg.is_referral = B_FALSE;
5476         nfs4_ntov_table_init(&ntov);
5477         resp->status = do_rfs4_set_attrs(NULL, &args->obj_attributes, cs,
5478             &sarg, &ntov, NFS4ATTR_VERIT);
5479         if (resp->status != NFS4_OK) {
5480                 /*
5481                  * do_rfs4_set_attrs will try to verify systemwide attrs,
5482                  * so could return -1 for "no match".
5483                  */
5484                 if (resp->status == -1)
5485                         resp->status = NFS4ERR_NOT_SAME;
5486                 goto done;
5487         }
5488         error = rfs4_verify_attr(&sarg, NULL, &ntov);
5489         switch (error) {
5490         case 0:
5491                 resp->status = NFS4_OK;
5492                 break;
5493         case -1:
5494                 resp->status = NFS4ERR_NOT_SAME;
5495                 break;
5496         default:
5497                 resp->status = puterrno4(error);
5498                 break;
5499         }
5500 done:
5501         *cs->statusp = resp->status;
5502         nfs4_ntov_table_free(&ntov, &sarg);
5503 out:
5504         DTRACE_NFSV4_2(op__verify__done, struct compound_state *, cs,
5505             VERIFY4res *, resp);
5506 }
5507 
5508 /* ARGSUSED */
5509 static void
5510 rfs4_op_nverify(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
5511     struct compound_state *cs)
5512 {
5513         /*
5514          * verify and nverify are exactly the same, except that nverify
5515          * succeeds when some argument changed, and verify succeeds when
5516          * when none changed.
5517          */
5518 
5519         NVERIFY4args  *args = &argop->nfs_argop4_u.opnverify;
5520         NVERIFY4res *resp = &resop->nfs_resop4_u.opnverify;
5521 
5522         int error;
5523         struct nfs4_svgetit_arg sarg;
5524         struct statvfs64 sb;
5525         struct nfs4_ntov_table ntov;
5526 
5527         DTRACE_NFSV4_2(op__nverify__start, struct compound_state *, cs,
5528             NVERIFY4args *, args);
5529 
5530         if (cs->vp == NULL) {
5531                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
5532                 DTRACE_NFSV4_2(op__nverify__done, struct compound_state *, cs,
5533                     NVERIFY4res *, resp);
5534                 return;
5535         }
5536         sarg.sbp = &sb;
5537         sarg.is_referral = B_FALSE;
5538         nfs4_ntov_table_init(&ntov);
5539         resp->status = do_rfs4_set_attrs(NULL, &args->obj_attributes, cs,
5540             &sarg, &ntov, NFS4ATTR_VERIT);
5541         if (resp->status != NFS4_OK) {
5542                 /*
5543                  * do_rfs4_set_attrs will try to verify systemwide attrs,
5544                  * so could return -1 for "no match".
5545                  */
5546                 if (resp->status == -1)
5547                         resp->status = NFS4_OK;
5548                 goto done;
5549         }
5550         error = rfs4_verify_attr(&sarg, NULL, &ntov);
5551         switch (error) {
5552         case 0:
5553                 resp->status = NFS4ERR_SAME;
5554                 break;
5555         case -1:
5556                 resp->status = NFS4_OK;
5557                 break;
5558         default:
5559                 resp->status = puterrno4(error);
5560                 break;
5561         }
5562 done:
5563         *cs->statusp = resp->status;
5564         nfs4_ntov_table_free(&ntov, &sarg);
5565 
5566         DTRACE_NFSV4_2(op__nverify__done, struct compound_state *, cs,
5567             NVERIFY4res *, resp);
5568 }
5569 
5570 /*
5571  * XXX - This should live in an NFS header file.
5572  */
5573 #define MAX_IOVECS      12
5574 
5575 /* ARGSUSED */
5576 static void
5577 rfs4_op_write(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
5578     struct compound_state *cs)
5579 {
5580         WRITE4args *args = &argop->nfs_argop4_u.opwrite;
5581         WRITE4res *resp = &resop->nfs_resop4_u.opwrite;
5582         int error;
5583         vnode_t *vp;
5584         struct vattr bva;
5585         u_offset_t rlimit;
5586         struct uio uio;
5587         struct iovec iov[MAX_IOVECS];
5588         struct iovec *iovp;
5589         int iovcnt;
5590         int ioflag;
5591         cred_t *savecred, *cr;
5592         bool_t *deleg = &cs->deleg;
5593         nfsstat4 stat;
5594         int in_crit = 0;
5595         caller_context_t ct;
5596 
5597         DTRACE_NFSV4_2(op__write__start, struct compound_state *, cs,
5598             WRITE4args *, args);
5599 
5600         vp = cs->vp;
5601         if (vp == NULL) {
5602                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
5603                 goto out;
5604         }
5605         if (cs->access == CS_ACCESS_DENIED) {
5606                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
5607                 goto out;
5608         }
5609 
5610         cr = cs->cr;
5611 
5612         if ((stat = rfs4_check_stateid(FWRITE, vp, &args->stateid, FALSE,
5613             deleg, TRUE, &ct)) != NFS4_OK) {
5614                 *cs->statusp = resp->status = stat;
5615                 goto out;
5616         }
5617 
5618         /*
5619          * We have to enter the critical region before calling VOP_RWLOCK
5620          * to avoid a deadlock with ufs.
5621          */
5622         if (nbl_need_check(vp)) {
5623                 nbl_start_crit(vp, RW_READER);
5624                 in_crit = 1;
5625                 if (nbl_conflict(vp, NBL_WRITE,
5626                     args->offset, args->data_len, 0, &ct)) {
5627                         *cs->statusp = resp->status = NFS4ERR_LOCKED;
5628                         goto out;
5629                 }
5630         }
5631 
5632         bva.va_mask = AT_MODE | AT_UID;
5633         error = VOP_GETATTR(vp, &bva, 0, cr, &ct);
5634 
5635         /*
5636          * If we can't get the attributes, then we can't do the
5637          * right access checking.  So, we'll fail the request.
5638          */
5639         if (error) {
5640                 *cs->statusp = resp->status = puterrno4(error);
5641                 goto out;
5642         }
5643 
5644         if (rdonly4(req, cs)) {
5645                 *cs->statusp = resp->status = NFS4ERR_ROFS;
5646                 goto out;
5647         }
5648 
5649         if (vp->v_type != VREG) {
5650                 *cs->statusp = resp->status =
5651                     ((vp->v_type == VDIR) ? NFS4ERR_ISDIR : NFS4ERR_INVAL);
5652                 goto out;
5653         }
5654 
5655         if (crgetuid(cr) != bva.va_uid &&
5656             (error = VOP_ACCESS(vp, VWRITE, 0, cr, &ct))) {
5657                 *cs->statusp = resp->status = puterrno4(error);
5658                 goto out;
5659         }
5660 
5661         if (MANDLOCK(vp, bva.va_mode)) {
5662                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
5663                 goto out;
5664         }
5665 
5666         if (args->data_len == 0) {
5667                 *cs->statusp = resp->status = NFS4_OK;
5668                 resp->count = 0;
5669                 resp->committed = args->stable;
5670                 resp->writeverf = Write4verf;
5671                 goto out;
5672         }
5673 
5674         if (args->mblk != NULL) {
5675                 mblk_t *m;
5676                 uint_t bytes, round_len;
5677 
5678                 iovcnt = 0;
5679                 bytes = 0;
5680                 round_len = roundup(args->data_len, BYTES_PER_XDR_UNIT);
5681                 for (m = args->mblk;
5682                     m != NULL && bytes < round_len;
5683                     m = m->b_cont) {
5684                         iovcnt++;
5685                         bytes += MBLKL(m);
5686                 }
5687 #ifdef DEBUG
5688                 /* should have ended on an mblk boundary */
5689                 if (bytes != round_len) {
5690                         printf("bytes=0x%x, round_len=0x%x, req len=0x%x\n",
5691                             bytes, round_len, args->data_len);
5692                         printf("args=%p, args->mblk=%p, m=%p", (void *)args,
5693                             (void *)args->mblk, (void *)m);
5694                         ASSERT(bytes == round_len);
5695                 }
5696 #endif
5697                 if (iovcnt <= MAX_IOVECS) {
5698                         iovp = iov;
5699                 } else {
5700                         iovp = kmem_alloc(sizeof (*iovp) * iovcnt, KM_SLEEP);
5701                 }
5702                 mblk_to_iov(args->mblk, iovcnt, iovp);
5703         } else if (args->rlist != NULL) {
5704                 iovcnt = 1;
5705                 iovp = iov;
5706                 iovp->iov_base = (char *)((args->rlist)->u.c_daddr3);
5707                 iovp->iov_len = args->data_len;
5708         } else {
5709                 iovcnt = 1;
5710                 iovp = iov;
5711                 iovp->iov_base = args->data_val;
5712                 iovp->iov_len = args->data_len;
5713         }
5714 
5715         uio.uio_iov = iovp;
5716         uio.uio_iovcnt = iovcnt;
5717 
5718         uio.uio_segflg = UIO_SYSSPACE;
5719         uio.uio_extflg = UIO_COPY_DEFAULT;
5720         uio.uio_loffset = args->offset;
5721         uio.uio_resid = args->data_len;
5722         uio.uio_llimit = curproc->p_fsz_ctl;
5723         rlimit = uio.uio_llimit - args->offset;
5724         if (rlimit < (u_offset_t)uio.uio_resid)
5725                 uio.uio_resid = (int)rlimit;
5726 
5727         if (args->stable == UNSTABLE4)
5728                 ioflag = 0;
5729         else if (args->stable == FILE_SYNC4)
5730                 ioflag = FSYNC;
5731         else if (args->stable == DATA_SYNC4)
5732                 ioflag = FDSYNC;
5733         else {
5734                 if (iovp != iov)
5735                         kmem_free(iovp, sizeof (*iovp) * iovcnt);
5736                 *cs->statusp = resp->status = NFS4ERR_INVAL;
5737                 goto out;
5738         }
5739 
5740         /*
5741          * We're changing creds because VM may fault and we need
5742          * the cred of the current thread to be used if quota
5743          * checking is enabled.
5744          */
5745         savecred = curthread->t_cred;
5746         curthread->t_cred = cr;
5747         error = do_io(FWRITE, vp, &uio, ioflag, cr, &ct);
5748         curthread->t_cred = savecred;
5749 
5750         if (iovp != iov)
5751                 kmem_free(iovp, sizeof (*iovp) * iovcnt);
5752 
5753         if (error) {
5754                 *cs->statusp = resp->status = puterrno4(error);
5755                 goto out;
5756         }
5757 
5758         *cs->statusp = resp->status = NFS4_OK;
5759         resp->count = args->data_len - uio.uio_resid;
5760 
5761         if (ioflag == 0)
5762                 resp->committed = UNSTABLE4;
5763         else
5764                 resp->committed = FILE_SYNC4;
5765 
5766         resp->writeverf = Write4verf;
5767 
5768 out:
5769         if (in_crit)
5770                 nbl_end_crit(vp);
5771 
5772         DTRACE_NFSV4_2(op__write__done, struct compound_state *, cs,
5773             WRITE4res *, resp);
5774 }
5775 
5776 
5777 /* XXX put in a header file */
5778 extern int      sec_svc_getcred(struct svc_req *, cred_t *,  caddr_t *, int *);
5779 
5780 void
5781 rfs4_compound(COMPOUND4args *args, COMPOUND4res *resp, struct exportinfo *exi,
5782     struct svc_req *req, cred_t *cr, int *rv)
5783 {
5784         uint_t i;
5785         struct compound_state cs;
5786 
5787         if (rv != NULL)
5788                 *rv = 0;
5789         rfs4_init_compound_state(&cs);
5790         /*
5791          * Form a reply tag by copying over the reqeuest tag.
5792          */
5793         resp->tag.utf8string_val =
5794             kmem_alloc(args->tag.utf8string_len, KM_SLEEP);
5795         resp->tag.utf8string_len = args->tag.utf8string_len;
5796         bcopy(args->tag.utf8string_val, resp->tag.utf8string_val,
5797             resp->tag.utf8string_len);
5798 
5799         cs.statusp = &resp->status;
5800         cs.req = req;
5801         resp->array = NULL;
5802         resp->array_len = 0;
5803 
5804         /*
5805          * XXX for now, minorversion should be zero
5806          */
5807         if (args->minorversion != NFS4_MINORVERSION) {
5808                 DTRACE_NFSV4_2(compound__start, struct compound_state *,
5809                     &cs, COMPOUND4args *, args);
5810                 resp->status = NFS4ERR_MINOR_VERS_MISMATCH;
5811                 DTRACE_NFSV4_2(compound__done, struct compound_state *,
5812                     &cs, COMPOUND4res *, resp);
5813                 return;
5814         }
5815 
5816         if (args->array_len == 0) {
5817                 resp->status = NFS4_OK;
5818                 return;
5819         }
5820 
5821         ASSERT(exi == NULL);
5822         ASSERT(cr == NULL);
5823 
5824         cr = crget();
5825         ASSERT(cr != NULL);
5826 
5827         if (sec_svc_getcred(req, cr, &cs.principal, &cs.nfsflavor) == 0) {
5828                 DTRACE_NFSV4_2(compound__start, struct compound_state *,
5829                     &cs, COMPOUND4args *, args);
5830                 crfree(cr);
5831                 DTRACE_NFSV4_2(compound__done, struct compound_state *,
5832                     &cs, COMPOUND4res *, resp);
5833                 svcerr_badcred(req->rq_xprt);
5834                 if (rv != NULL)
5835                         *rv = 1;
5836                 return;
5837         }
5838         resp->array_len = args->array_len;
5839         resp->array = kmem_zalloc(args->array_len * sizeof (nfs_resop4),
5840             KM_SLEEP);
5841 
5842         cs.basecr = cr;
5843 
5844         DTRACE_NFSV4_2(compound__start, struct compound_state *, &cs,
5845             COMPOUND4args *, args);
5846 
5847         /*
5848          * For now, NFS4 compound processing must be protected by
5849          * exported_lock because it can access more than one exportinfo
5850          * per compound and share/unshare can now change multiple
5851          * exinfo structs.  The NFS2/3 code only refs 1 exportinfo
5852          * per proc (excluding public exinfo), and exi_count design
5853          * is sufficient to protect concurrent execution of NFS2/3
5854          * ops along with unexport.  This lock will be removed as
5855          * part of the NFSv4 phase 2 namespace redesign work.
5856          */
5857         rw_enter(&exported_lock, RW_READER);
5858 
5859         /*
5860          * If this is the first compound we've seen, we need to start all
5861          * new instances' grace periods.
5862          */
5863         if (rfs4_seen_first_compound == 0) {
5864                 rfs4_grace_start_new();
5865                 /*
5866                  * This must be set after rfs4_grace_start_new(), otherwise
5867                  * another thread could proceed past here before the former
5868                  * is finished.
5869                  */
5870                 rfs4_seen_first_compound = 1;
5871         }
5872 
5873         for (i = 0; i < args->array_len && cs.cont; i++) {
5874                 nfs_argop4 *argop;
5875                 nfs_resop4 *resop;
5876                 uint_t op;
5877 
5878                 argop = &args->array[i];
5879                 resop = &resp->array[i];
5880                 resop->resop = argop->argop;
5881                 op = (uint_t)resop->resop;
5882 
5883                 if (op < rfsv4disp_cnt) {
5884                         kstat_t *ksp = rfsprocio_v4_ptr[op];
5885                         kstat_t *exi_ksp = NULL;
5886 
5887                         /*
5888                          * Count the individual ops here; NULL and COMPOUND
5889                          * are counted in common_dispatch()
5890                          */
5891                         rfsproccnt_v4_ptr[op].value.ui64++;
5892 
5893                         if (ksp != NULL) {
5894                                 mutex_enter(ksp->ks_lock);
5895                                 kstat_runq_enter(KSTAT_IO_PTR(ksp));
5896                                 mutex_exit(ksp->ks_lock);
5897                         }
5898 
5899                         switch (rfsv4disptab[op].op_type) {
5900                         case NFS4_OP_CFH:
5901                                 resop->exi = cs.exi;
5902                                 break;
5903                         case NFS4_OP_SFH:
5904                                 resop->exi = cs.saved_exi;
5905                                 break;
5906                         default:
5907                                 ASSERT(resop->exi == NULL);
5908                                 break;
5909                         }
5910 
5911                         if (resop->exi != NULL) {
5912                                 exi_ksp = resop->exi->exi_kstats->
5913                                     rfsprocio_v4_ptr[op];
5914                                 if (exi_ksp != NULL) {
5915                                         mutex_enter(exi_ksp->ks_lock);
5916                                         kstat_runq_enter(KSTAT_IO_PTR(exi_ksp));
5917                                         mutex_exit(exi_ksp->ks_lock);
5918                                 }
5919                         }
5920 
5921                         NFS4_DEBUG(rfs4_debug > 1,
5922                             (CE_NOTE, "Executing %s", rfs4_op_string[op]));
5923                         (*rfsv4disptab[op].dis_proc)(argop, resop, req, &cs);
5924                         NFS4_DEBUG(rfs4_debug > 1, (CE_NOTE, "%s returned %d",
5925                             rfs4_op_string[op], *cs.statusp));
5926                         if (*cs.statusp != NFS4_OK)
5927                                 cs.cont = FALSE;
5928 
5929                         if (rfsv4disptab[op].op_type == NFS4_OP_POSTCFH &&
5930                             *cs.statusp == NFS4_OK &&
5931                             (resop->exi = cs.exi) != NULL) {
5932                                 exi_ksp = resop->exi->exi_kstats->
5933                                     rfsprocio_v4_ptr[op];
5934                         }
5935 
5936                         if (exi_ksp != NULL) {
5937                                 mutex_enter(exi_ksp->ks_lock);
5938                                 KSTAT_IO_PTR(exi_ksp)->nwritten +=
5939                                     argop->opsize;
5940                                 KSTAT_IO_PTR(exi_ksp)->writes++;
5941                                 if (rfsv4disptab[op].op_type != NFS4_OP_POSTCFH)
5942                                         kstat_runq_exit(KSTAT_IO_PTR(exi_ksp));
5943                                 mutex_exit(exi_ksp->ks_lock);
5944 
5945                                 exi_hold(resop->exi);
5946                         } else {
5947                                 resop->exi = NULL;
5948                         }
5949 
5950                         if (ksp != NULL) {
5951                                 mutex_enter(ksp->ks_lock);
5952                                 kstat_runq_exit(KSTAT_IO_PTR(ksp));
5953                                 mutex_exit(ksp->ks_lock);
5954                         }
5955                 } else {
5956                         /*
5957                          * This is effectively dead code since XDR code
5958                          * will have already returned BADXDR if op doesn't
5959                          * decode to legal value.  This only done for a
5960                          * day when XDR code doesn't verify v4 opcodes.
5961                          */
5962                         op = OP_ILLEGAL;
5963                         rfsproccnt_v4_ptr[OP_ILLEGAL_IDX].value.ui64++;
5964 
5965                         rfs4_op_illegal(argop, resop, req, &cs);
5966                         cs.cont = FALSE;
5967                 }
5968 
5969                 /*
5970                  * If not at last op, and if we are to stop, then
5971                  * compact the results array.
5972                  */
5973                 if ((i + 1) < args->array_len && !cs.cont) {
5974                         nfs_resop4 *new_res = kmem_alloc(
5975                             (i + 1) * sizeof (nfs_resop4), KM_SLEEP);
5976                         bcopy(resp->array,
5977                             new_res, (i + 1) * sizeof (nfs_resop4));
5978                         kmem_free(resp->array,
5979                             args->array_len * sizeof (nfs_resop4));
5980 
5981                         resp->array_len = i + 1;
5982                         resp->array = new_res;
5983                 }
5984         }
5985 
5986         rw_exit(&exported_lock);
5987 
5988         DTRACE_NFSV4_2(compound__done, struct compound_state *, &cs,
5989             COMPOUND4res *, resp);
5990 
5991         if (cs.vp)
5992                 VN_RELE(cs.vp);
5993         if (cs.saved_vp)
5994                 VN_RELE(cs.saved_vp);
5995         if (cs.saved_fh.nfs_fh4_val)
5996                 kmem_free(cs.saved_fh.nfs_fh4_val, NFS4_FHSIZE);
5997 
5998         if (cs.basecr)
5999                 crfree(cs.basecr);
6000         if (cs.cr)
6001                 crfree(cs.cr);
6002         /*
6003          * done with this compound request, free the label
6004          */
6005 
6006         if (req->rq_label != NULL) {
6007                 kmem_free(req->rq_label, sizeof (bslabel_t));
6008                 req->rq_label = NULL;
6009         }
6010 }
6011 
6012 /*
6013  * XXX because of what appears to be duplicate calls to rfs4_compound_free
6014  * XXX zero out the tag and array values. Need to investigate why the
6015  * XXX calls occur, but at least prevent the panic for now.
6016  */
6017 void
6018 rfs4_compound_free(COMPOUND4res *resp)
6019 {
6020         uint_t i;
6021 
6022         if (resp->tag.utf8string_val) {
6023                 UTF8STRING_FREE(resp->tag)
6024         }
6025 
6026         for (i = 0; i < resp->array_len; i++) {
6027                 nfs_resop4 *resop;
6028                 uint_t op;
6029 
6030                 resop = &resp->array[i];
6031                 op = (uint_t)resop->resop;
6032                 if (op < rfsv4disp_cnt) {
6033                         (*rfsv4disptab[op].dis_resfree)(resop);
6034                 }
6035         }
6036         if (resp->array != NULL) {
6037                 kmem_free(resp->array, resp->array_len * sizeof (nfs_resop4));
6038         }
6039 }
6040 
6041 /*
6042  * Process the value of the compound request rpc flags, as a bit-AND
6043  * of the individual per-op flags (idempotent, allowork, publicfh_ok)
6044  */
6045 void
6046 rfs4_compound_flagproc(COMPOUND4args *args, int *flagp)
6047 {
6048         int i;
6049         int flag = RPC_ALL;
6050 
6051         for (i = 0; flag && i < args->array_len; i++) {
6052                 uint_t op;
6053 
6054                 op = (uint_t)args->array[i].argop;
6055 
6056                 if (op < rfsv4disp_cnt)
6057                         flag &= rfsv4disptab[op].dis_flags;
6058                 else
6059                         flag = 0;
6060         }
6061         *flagp = flag;
6062 }
6063 
6064 void
6065 rfs4_compound_kstat_args(COMPOUND4args *args)
6066 {
6067         int i;
6068 
6069         for (i = 0; i < args->array_len; i++) {
6070                 uint_t op = (uint_t)args->array[i].argop;
6071 
6072                 if (op < rfsv4disp_cnt) {
6073                         kstat_t *ksp = rfsprocio_v4_ptr[op];
6074 
6075                         if (ksp != NULL) {
6076                                 mutex_enter(ksp->ks_lock);
6077                                 KSTAT_IO_PTR(ksp)->nwritten +=
6078                                     args->array[i].opsize;
6079                                 KSTAT_IO_PTR(ksp)->writes++;
6080                                 mutex_exit(ksp->ks_lock);
6081                         }
6082                 }
6083         }
6084 }
6085 
6086 void
6087 rfs4_compound_kstat_res(COMPOUND4res *res)
6088 {
6089         int i;
6090 
6091         for (i = 0; i < res->array_len; i++) {
6092                 uint_t op = (uint_t)res->array[i].resop;
6093 
6094                 if (op < rfsv4disp_cnt) {
6095                         kstat_t *ksp = rfsprocio_v4_ptr[op];
6096                         struct exportinfo *exi = res->array[i].exi;
6097 
6098                         if (ksp != NULL) {
6099                                 mutex_enter(ksp->ks_lock);
6100                                 KSTAT_IO_PTR(ksp)->nread +=
6101                                     res->array[i].opsize;
6102                                 KSTAT_IO_PTR(ksp)->reads++;
6103                                 mutex_exit(ksp->ks_lock);
6104                         }
6105 
6106                         if (exi != NULL) {
6107                                 kstat_t *exi_ksp;
6108 
6109                                 rw_enter(&exported_lock, RW_READER);
6110 
6111                                 exi_ksp = exi->exi_kstats->rfsprocio_v4_ptr[op];
6112                                 if (exi_ksp != NULL) {
6113                                         mutex_enter(exi_ksp->ks_lock);
6114                                         KSTAT_IO_PTR(exi_ksp)->nread +=
6115                                             res->array[i].opsize;
6116                                         KSTAT_IO_PTR(exi_ksp)->reads++;
6117                                         mutex_exit(exi_ksp->ks_lock);
6118                                 }
6119 
6120                                 rw_exit(&exported_lock);
6121 
6122                                 exi_rele(exi);
6123                         }
6124                 }
6125         }
6126 }
6127 
6128 nfsstat4
6129 rfs4_client_sysid(rfs4_client_t *cp, sysid_t *sp)
6130 {
6131         nfsstat4 e;
6132 
6133         rfs4_dbe_lock(cp->rc_dbe);
6134 
6135         if (cp->rc_sysidt != LM_NOSYSID) {
6136                 *sp = cp->rc_sysidt;
6137                 e = NFS4_OK;
6138 
6139         } else if ((cp->rc_sysidt = lm_alloc_sysidt()) != LM_NOSYSID) {
6140                 *sp = cp->rc_sysidt;
6141                 e = NFS4_OK;
6142 
6143                 NFS4_DEBUG(rfs4_debug, (CE_NOTE,
6144                     "rfs4_client_sysid: allocated 0x%x\n", *sp));
6145         } else
6146                 e = NFS4ERR_DELAY;
6147 
6148         rfs4_dbe_unlock(cp->rc_dbe);
6149         return (e);
6150 }
6151 
6152 #if defined(DEBUG) && ! defined(lint)
6153 static void lock_print(char *str, int operation, struct flock64 *flk)
6154 {
6155         char *op, *type;
6156 
6157         switch (operation) {
6158         case F_GETLK: op = "F_GETLK";
6159                 break;
6160         case F_SETLK: op = "F_SETLK";
6161                 break;
6162         case F_SETLK_NBMAND: op = "F_SETLK_NBMAND";
6163                 break;
6164         default: op = "F_UNKNOWN";
6165                 break;
6166         }
6167         switch (flk->l_type) {
6168         case F_UNLCK: type = "F_UNLCK";
6169                 break;
6170         case F_RDLCK: type = "F_RDLCK";
6171                 break;
6172         case F_WRLCK: type = "F_WRLCK";
6173                 break;
6174         default: type = "F_UNKNOWN";
6175                 break;
6176         }
6177 
6178         ASSERT(flk->l_whence == 0);
6179         cmn_err(CE_NOTE, "%s:  %s, type = %s, off = %llx len = %llx pid = %d",
6180             str, op, type, (longlong_t)flk->l_start,
6181             flk->l_len ? (longlong_t)flk->l_len : ~0LL, flk->l_pid);
6182 }
6183 
6184 #define LOCK_PRINT(d, s, t, f) if (d) lock_print(s, t, f)
6185 #else
6186 #define LOCK_PRINT(d, s, t, f)
6187 #endif
6188 
6189 /*ARGSUSED*/
6190 static bool_t
6191 creds_ok(cred_set_t cr_set, struct svc_req *req, struct compound_state *cs)
6192 {
6193         return (TRUE);
6194 }
6195 
6196 /*
6197  * Look up the pathname using the vp in cs as the directory vnode.
6198  * cs->vp will be the vnode for the file on success
6199  */
6200 
6201 static nfsstat4
6202 rfs4_lookup(component4 *component, struct svc_req *req,
6203     struct compound_state *cs)
6204 {
6205         char *nm;
6206         uint32_t len;
6207         nfsstat4 status;
6208         struct sockaddr *ca;
6209         char *name;
6210 
6211         if (cs->vp == NULL) {
6212                 return (NFS4ERR_NOFILEHANDLE);
6213         }
6214         if (cs->vp->v_type != VDIR) {
6215                 return (NFS4ERR_NOTDIR);
6216         }
6217 
6218         status = utf8_dir_verify(component);
6219         if (status != NFS4_OK)
6220                 return (status);
6221 
6222         nm = utf8_to_fn(component, &len, NULL);
6223         if (nm == NULL) {
6224                 return (NFS4ERR_INVAL);
6225         }
6226 
6227         if (len > MAXNAMELEN) {
6228                 kmem_free(nm, len);
6229                 return (NFS4ERR_NAMETOOLONG);
6230         }
6231 
6232         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
6233         name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
6234             MAXPATHLEN + 1);
6235 
6236         if (name == NULL) {
6237                 kmem_free(nm, len);
6238                 return (NFS4ERR_INVAL);
6239         }
6240 
6241         status = do_rfs4_op_lookup(name, req, cs);
6242 
6243         if (name != nm)
6244                 kmem_free(name, MAXPATHLEN + 1);
6245 
6246         kmem_free(nm, len);
6247 
6248         return (status);
6249 }
6250 
6251 static nfsstat4
6252 rfs4_lookupfile(component4 *component, struct svc_req *req,
6253     struct compound_state *cs, uint32_t access, change_info4 *cinfo)
6254 {
6255         nfsstat4 status;
6256         vnode_t *dvp = cs->vp;
6257         vattr_t bva, ava, fva;
6258         int error;
6259 
6260         /* Get "before" change value */
6261         bva.va_mask = AT_CTIME|AT_SEQ;
6262         error = VOP_GETATTR(dvp, &bva, 0, cs->cr, NULL);
6263         if (error)
6264                 return (puterrno4(error));
6265 
6266         /* rfs4_lookup may VN_RELE directory */
6267         VN_HOLD(dvp);
6268 
6269         status = rfs4_lookup(component, req, cs);
6270         if (status != NFS4_OK) {
6271                 VN_RELE(dvp);
6272                 return (status);
6273         }
6274 
6275         /*
6276          * Get "after" change value, if it fails, simply return the
6277          * before value.
6278          */
6279         ava.va_mask = AT_CTIME|AT_SEQ;
6280         if (VOP_GETATTR(dvp, &ava, 0, cs->cr, NULL)) {
6281                 ava.va_ctime = bva.va_ctime;
6282                 ava.va_seq = 0;
6283         }
6284         VN_RELE(dvp);
6285 
6286         /*
6287          * Validate the file is a file
6288          */
6289         fva.va_mask = AT_TYPE|AT_MODE;
6290         error = VOP_GETATTR(cs->vp, &fva, 0, cs->cr, NULL);
6291         if (error)
6292                 return (puterrno4(error));
6293 
6294         if (fva.va_type != VREG) {
6295                 if (fva.va_type == VDIR)
6296                         return (NFS4ERR_ISDIR);
6297                 if (fva.va_type == VLNK)
6298                         return (NFS4ERR_SYMLINK);
6299                 return (NFS4ERR_INVAL);
6300         }
6301 
6302         NFS4_SET_FATTR4_CHANGE(cinfo->before, bva.va_ctime);
6303         NFS4_SET_FATTR4_CHANGE(cinfo->after, ava.va_ctime);
6304 
6305         /*
6306          * It is undefined if VOP_LOOKUP will change va_seq, so
6307          * cinfo.atomic = TRUE only if we have
6308          * non-zero va_seq's, and they have not changed.
6309          */
6310         if (bva.va_seq && ava.va_seq && ava.va_seq == bva.va_seq)
6311                 cinfo->atomic = TRUE;
6312         else
6313                 cinfo->atomic = FALSE;
6314 
6315         /* Check for mandatory locking */
6316         cs->mandlock = MANDLOCK(cs->vp, fva.va_mode);
6317         return (check_open_access(access, cs, req));
6318 }
6319 
6320 static nfsstat4
6321 create_vnode(vnode_t *dvp, char *nm,  vattr_t *vap, createmode4 mode,
6322     timespec32_t *mtime, cred_t *cr, vnode_t **vpp, bool_t *created)
6323 {
6324         int error;
6325         nfsstat4 status = NFS4_OK;
6326         vattr_t va;
6327 
6328 tryagain:
6329 
6330         /*
6331          * The file open mode used is VWRITE.  If the client needs
6332          * some other semantic, then it should do the access checking
6333          * itself.  It would have been nice to have the file open mode
6334          * passed as part of the arguments.
6335          */
6336 
6337         *created = TRUE;
6338         error = VOP_CREATE(dvp, nm, vap, EXCL, VWRITE, vpp, cr, 0, NULL, NULL);
6339 
6340         if (error) {
6341                 *created = FALSE;
6342 
6343                 /*
6344                  * If we got something other than file already exists
6345                  * then just return this error.  Otherwise, we got
6346                  * EEXIST.  If we were doing a GUARDED create, then
6347                  * just return this error.  Otherwise, we need to
6348                  * make sure that this wasn't a duplicate of an
6349                  * exclusive create request.
6350                  *
6351                  * The assumption is made that a non-exclusive create
6352                  * request will never return EEXIST.
6353                  */
6354 
6355                 if (error != EEXIST || mode == GUARDED4) {
6356                         status = puterrno4(error);
6357                         return (status);
6358                 }
6359                 error = VOP_LOOKUP(dvp, nm, vpp, NULL, 0, NULL, cr,
6360                     NULL, NULL, NULL);
6361 
6362                 if (error) {
6363                         /*
6364                          * We couldn't find the file that we thought that
6365                          * we just created.  So, we'll just try creating
6366                          * it again.
6367                          */
6368                         if (error == ENOENT)
6369                                 goto tryagain;
6370 
6371                         status = puterrno4(error);
6372                         return (status);
6373                 }
6374 
6375                 if (mode == UNCHECKED4) {
6376                         /* existing object must be regular file */
6377                         if ((*vpp)->v_type != VREG) {
6378                                 if ((*vpp)->v_type == VDIR)
6379                                         status = NFS4ERR_ISDIR;
6380                                 else if ((*vpp)->v_type == VLNK)
6381                                         status = NFS4ERR_SYMLINK;
6382                                 else
6383                                         status = NFS4ERR_INVAL;
6384                                 VN_RELE(*vpp);
6385                                 return (status);
6386                         }
6387 
6388                         return (NFS4_OK);
6389                 }
6390 
6391                 /* Check for duplicate request */
6392                 ASSERT(mtime != 0);
6393                 va.va_mask = AT_MTIME;
6394                 error = VOP_GETATTR(*vpp, &va, 0, cr, NULL);
6395                 if (!error) {
6396                         /* We found the file */
6397                         if (va.va_mtime.tv_sec != mtime->tv_sec ||
6398                             va.va_mtime.tv_nsec != mtime->tv_nsec) {
6399                                 /* but its not our creation */
6400                                 VN_RELE(*vpp);
6401                                 return (NFS4ERR_EXIST);
6402                         }
6403                         *created = TRUE; /* retrans of create == created */
6404                         return (NFS4_OK);
6405                 }
6406                 VN_RELE(*vpp);
6407                 return (NFS4ERR_EXIST);
6408         }
6409 
6410         return (NFS4_OK);
6411 }
6412 
6413 static nfsstat4
6414 check_open_access(uint32_t access, struct compound_state *cs,
6415     struct svc_req *req)
6416 {
6417         int error;
6418         vnode_t *vp;
6419         bool_t readonly;
6420         cred_t *cr = cs->cr;
6421 
6422         /* For now we don't allow mandatory locking as per V2/V3 */
6423         if (cs->access == CS_ACCESS_DENIED || cs->mandlock) {
6424                 return (NFS4ERR_ACCESS);
6425         }
6426 
6427         vp = cs->vp;
6428         ASSERT(cr != NULL && vp->v_type == VREG);
6429 
6430         /*
6431          * If the file system is exported read only and we are trying
6432          * to open for write, then return NFS4ERR_ROFS
6433          */
6434 
6435         readonly = rdonly4(req, cs);
6436 
6437         if ((access & OPEN4_SHARE_ACCESS_WRITE) && readonly)
6438                 return (NFS4ERR_ROFS);
6439 
6440         if (access & OPEN4_SHARE_ACCESS_READ) {
6441                 if ((VOP_ACCESS(vp, VREAD, 0, cr, NULL) != 0) &&
6442                     (VOP_ACCESS(vp, VEXEC, 0, cr, NULL) != 0)) {
6443                         return (NFS4ERR_ACCESS);
6444                 }
6445         }
6446 
6447         if (access & OPEN4_SHARE_ACCESS_WRITE) {
6448                 error = VOP_ACCESS(vp, VWRITE, 0, cr, NULL);
6449                 if (error)
6450                         return (NFS4ERR_ACCESS);
6451         }
6452 
6453         return (NFS4_OK);
6454 }
6455 
6456 static nfsstat4
6457 rfs4_createfile(OPEN4args *args, struct svc_req *req, struct compound_state *cs,
6458     change_info4 *cinfo, bitmap4 *attrset, clientid4 clientid)
6459 {
6460         struct nfs4_svgetit_arg sarg;
6461         struct nfs4_ntov_table ntov;
6462 
6463         bool_t ntov_table_init = FALSE;
6464         struct statvfs64 sb;
6465         nfsstat4 status;
6466         vnode_t *vp;
6467         vattr_t bva, ava, iva, cva, *vap;
6468         vnode_t *dvp;
6469         timespec32_t *mtime;
6470         char *nm = NULL;
6471         uint_t buflen;
6472         bool_t created;
6473         bool_t setsize = FALSE;
6474         len_t reqsize;
6475         int error;
6476         bool_t trunc;
6477         caller_context_t ct;
6478         component4 *component;
6479         bslabel_t *clabel;
6480         struct sockaddr *ca;
6481         char *name = NULL;
6482 
6483         sarg.sbp = &sb;
6484         sarg.is_referral = B_FALSE;
6485 
6486         dvp = cs->vp;
6487 
6488         /* Check if the file system is read only */
6489         if (rdonly4(req, cs))
6490                 return (NFS4ERR_ROFS);
6491 
6492         /* check the label of including directory */
6493         if (is_system_labeled()) {
6494                 ASSERT(req->rq_label != NULL);
6495                 clabel = req->rq_label;
6496                 DTRACE_PROBE2(tx__rfs4__log__info__opremove__clabel, char *,
6497                     "got client label from request(1)",
6498                     struct svc_req *, req);
6499                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
6500                         if (!do_rfs_label_check(clabel, dvp, EQUALITY_CHECK,
6501                             cs->exi)) {
6502                                 return (NFS4ERR_ACCESS);
6503                         }
6504                 }
6505         }
6506 
6507         /*
6508          * Get the last component of path name in nm. cs will reference
6509          * the including directory on success.
6510          */
6511         component = &args->open_claim4_u.file;
6512         status = utf8_dir_verify(component);
6513         if (status != NFS4_OK)
6514                 return (status);
6515 
6516         nm = utf8_to_fn(component, &buflen, NULL);
6517 
6518         if (nm == NULL)
6519                 return (NFS4ERR_RESOURCE);
6520 
6521         if (buflen > MAXNAMELEN) {
6522                 kmem_free(nm, buflen);
6523                 return (NFS4ERR_NAMETOOLONG);
6524         }
6525 
6526         bva.va_mask = AT_TYPE|AT_CTIME|AT_SEQ;
6527         error = VOP_GETATTR(dvp, &bva, 0, cs->cr, NULL);
6528         if (error) {
6529                 kmem_free(nm, buflen);
6530                 return (puterrno4(error));
6531         }
6532 
6533         if (bva.va_type != VDIR) {
6534                 kmem_free(nm, buflen);
6535                 return (NFS4ERR_NOTDIR);
6536         }
6537 
6538         NFS4_SET_FATTR4_CHANGE(cinfo->before, bva.va_ctime)
6539 
6540         switch (args->mode) {
6541         case GUARDED4:
6542                 /*FALLTHROUGH*/
6543         case UNCHECKED4:
6544                 nfs4_ntov_table_init(&ntov);
6545                 ntov_table_init = TRUE;
6546 
6547                 *attrset = 0;
6548                 status = do_rfs4_set_attrs(attrset,
6549                     &args->createhow4_u.createattrs,
6550                     cs, &sarg, &ntov, NFS4ATTR_SETIT);
6551 
6552                 if (status == NFS4_OK && (sarg.vap->va_mask & AT_TYPE) &&
6553                     sarg.vap->va_type != VREG) {
6554                         if (sarg.vap->va_type == VDIR)
6555                                 status = NFS4ERR_ISDIR;
6556                         else if (sarg.vap->va_type == VLNK)
6557                                 status = NFS4ERR_SYMLINK;
6558                         else
6559                                 status = NFS4ERR_INVAL;
6560                 }
6561 
6562                 if (status != NFS4_OK) {
6563                         kmem_free(nm, buflen);
6564                         nfs4_ntov_table_free(&ntov, &sarg);
6565                         *attrset = 0;
6566                         return (status);
6567                 }
6568 
6569                 vap = sarg.vap;
6570                 vap->va_type = VREG;
6571                 vap->va_mask |= AT_TYPE;
6572 
6573                 if ((vap->va_mask & AT_MODE) == 0) {
6574                         vap->va_mask |= AT_MODE;
6575                         vap->va_mode = (mode_t)0600;
6576                 }
6577 
6578                 if (vap->va_mask & AT_SIZE) {
6579 
6580                         /* Disallow create with a non-zero size */
6581 
6582                         if ((reqsize = sarg.vap->va_size) != 0) {
6583                                 kmem_free(nm, buflen);
6584                                 nfs4_ntov_table_free(&ntov, &sarg);
6585                                 *attrset = 0;
6586                                 return (NFS4ERR_INVAL);
6587                         }
6588                         setsize = TRUE;
6589                 }
6590                 break;
6591 
6592         case EXCLUSIVE4:
6593                 /* prohibit EXCL create of named attributes */
6594                 if (dvp->v_flag & V_XATTRDIR) {
6595                         kmem_free(nm, buflen);
6596                         *attrset = 0;
6597                         return (NFS4ERR_INVAL);
6598                 }
6599 
6600                 cva.va_mask = AT_TYPE | AT_MTIME | AT_MODE;
6601                 cva.va_type = VREG;
6602                 /*
6603                  * Ensure no time overflows. Assumes underlying
6604                  * filesystem supports at least 32 bits.
6605                  * Truncate nsec to usec resolution to allow valid
6606                  * compares even if the underlying filesystem truncates.
6607                  */
6608                 mtime = (timespec32_t *)&args->createhow4_u.createverf;
6609                 cva.va_mtime.tv_sec = mtime->tv_sec % TIME32_MAX;
6610                 cva.va_mtime.tv_nsec = (mtime->tv_nsec / 1000) * 1000;
6611                 cva.va_mode = (mode_t)0;
6612                 vap = &cva;
6613 
6614                 /*
6615                  * For EXCL create, attrset is set to the server attr
6616                  * used to cache the client's verifier.
6617                  */
6618                 *attrset = FATTR4_TIME_MODIFY_MASK;
6619                 break;
6620         }
6621 
6622         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
6623         name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
6624             MAXPATHLEN  + 1);
6625 
6626         if (name == NULL) {
6627                 kmem_free(nm, buflen);
6628                 return (NFS4ERR_SERVERFAULT);
6629         }
6630 
6631         status = create_vnode(dvp, name, vap, args->mode, mtime,
6632             cs->cr, &vp, &created);
6633         if (nm != name)
6634                 kmem_free(name, MAXPATHLEN + 1);
6635         kmem_free(nm, buflen);
6636 
6637         if (status != NFS4_OK) {
6638                 if (ntov_table_init)
6639                         nfs4_ntov_table_free(&ntov, &sarg);
6640                 *attrset = 0;
6641                 return (status);
6642         }
6643 
6644         trunc = (setsize && !created);
6645 
6646         if (args->mode != EXCLUSIVE4) {
6647                 bitmap4 createmask = args->createhow4_u.createattrs.attrmask;
6648 
6649                 /*
6650                  * True verification that object was created with correct
6651                  * attrs is impossible.  The attrs could have been changed
6652                  * immediately after object creation.  If attributes did
6653                  * not verify, the only recourse for the server is to
6654                  * destroy the object.  Maybe if some attrs (like gid)
6655                  * are set incorrectly, the object should be destroyed;
6656                  * however, seems bad as a default policy.  Do we really
6657                  * want to destroy an object over one of the times not
6658                  * verifying correctly?  For these reasons, the server
6659                  * currently sets bits in attrset for createattrs
6660                  * that were set; however, no verification is done.
6661                  *
6662                  * vmask_to_nmask accounts for vattr bits set on create
6663                  *      [do_rfs4_set_attrs() only sets resp bits for
6664                  *       non-vattr/vfs bits.]
6665                  * Mask off any bits we set by default so as not to return
6666                  * more attrset bits than were requested in createattrs
6667                  */
6668                 if (created) {
6669                         nfs4_vmask_to_nmask(sarg.vap->va_mask, attrset);
6670                         *attrset &= createmask;
6671                 } else {
6672                         /*
6673                          * We did not create the vnode (we tried but it
6674                          * already existed).  In this case, the only createattr
6675                          * that the spec allows the server to set is size,
6676                          * and even then, it can only be set if it is 0.
6677                          */
6678                         *attrset = 0;
6679                         if (trunc)
6680                                 *attrset = FATTR4_SIZE_MASK;
6681                 }
6682         }
6683         if (ntov_table_init)
6684                 nfs4_ntov_table_free(&ntov, &sarg);
6685 
6686         /*
6687          * Get the initial "after" sequence number, if it fails,
6688          * set to zero, time to before.
6689          */
6690         iva.va_mask = AT_CTIME|AT_SEQ;
6691         if (VOP_GETATTR(dvp, &iva, 0, cs->cr, NULL)) {
6692                 iva.va_seq = 0;
6693                 iva.va_ctime = bva.va_ctime;
6694         }
6695 
6696         /*
6697          * create_vnode attempts to create the file exclusive,
6698          * if it already exists the VOP_CREATE will fail and
6699          * may not increase va_seq. It is atomic if
6700          * we haven't changed the directory, but if it has changed
6701          * we don't know what changed it.
6702          */
6703         if (!created) {
6704                 if (bva.va_seq && iva.va_seq &&
6705                     bva.va_seq == iva.va_seq)
6706                         cinfo->atomic = TRUE;
6707                 else
6708                         cinfo->atomic = FALSE;
6709                 NFS4_SET_FATTR4_CHANGE(cinfo->after, iva.va_ctime);
6710         } else {
6711                 /*
6712                  * The entry was created, we need to sync the
6713                  * directory metadata.
6714                  */
6715                 (void) VOP_FSYNC(dvp, 0, cs->cr, NULL);
6716 
6717                 /*
6718                  * Get "after" change value, if it fails, simply return the
6719                  * before value.
6720                  */
6721                 ava.va_mask = AT_CTIME|AT_SEQ;
6722                 if (VOP_GETATTR(dvp, &ava, 0, cs->cr, NULL)) {
6723                         ava.va_ctime = bva.va_ctime;
6724                         ava.va_seq = 0;
6725                 }
6726 
6727                 NFS4_SET_FATTR4_CHANGE(cinfo->after, ava.va_ctime);
6728 
6729                 /*
6730                  * The cinfo->atomic = TRUE only if we have
6731                  * non-zero va_seq's, and it has incremented by exactly one
6732                  * during the create_vnode and it didn't
6733                  * change during the VOP_FSYNC.
6734                  */
6735                 if (bva.va_seq && iva.va_seq && ava.va_seq &&
6736                     iva.va_seq == (bva.va_seq + 1) && iva.va_seq == ava.va_seq)
6737                         cinfo->atomic = TRUE;
6738                 else
6739                         cinfo->atomic = FALSE;
6740         }
6741 
6742         /* Check for mandatory locking and that the size gets set. */
6743         cva.va_mask = AT_MODE;
6744         if (setsize)
6745                 cva.va_mask |= AT_SIZE;
6746 
6747         /* Assume the worst */
6748         cs->mandlock = TRUE;
6749 
6750         if (VOP_GETATTR(vp, &cva, 0, cs->cr, NULL) == 0) {
6751                 cs->mandlock = MANDLOCK(cs->vp, cva.va_mode);
6752 
6753                 /*
6754                  * Truncate the file if necessary; this would be
6755                  * the case for create over an existing file.
6756                  */
6757 
6758                 if (trunc) {
6759                         int in_crit = 0;
6760                         rfs4_file_t *fp;
6761                         bool_t create = FALSE;
6762 
6763                         /*
6764                          * We are writing over an existing file.
6765                          * Check to see if we need to recall a delegation.
6766                          */
6767                         rfs4_hold_deleg_policy();
6768                         if ((fp = rfs4_findfile(vp, NULL, &create)) != NULL) {
6769                                 if (rfs4_check_delegated_byfp(FWRITE, fp,
6770                                     (reqsize == 0), FALSE, FALSE, &clientid)) {
6771                                         rfs4_file_rele(fp);
6772                                         rfs4_rele_deleg_policy();
6773                                         VN_RELE(vp);
6774                                         *attrset = 0;
6775                                         return (NFS4ERR_DELAY);
6776                                 }
6777                                 rfs4_file_rele(fp);
6778                         }
6779                         rfs4_rele_deleg_policy();
6780 
6781                         if (nbl_need_check(vp)) {
6782                                 in_crit = 1;
6783 
6784                                 ASSERT(reqsize == 0);
6785 
6786                                 nbl_start_crit(vp, RW_READER);
6787                                 if (nbl_conflict(vp, NBL_WRITE, 0,
6788                                     cva.va_size, 0, NULL)) {
6789                                         in_crit = 0;
6790                                         nbl_end_crit(vp);
6791                                         VN_RELE(vp);
6792                                         *attrset = 0;
6793                                         return (NFS4ERR_ACCESS);
6794                                 }
6795                         }
6796                         ct.cc_sysid = 0;
6797                         ct.cc_pid = 0;
6798                         ct.cc_caller_id = nfs4_srv_caller_id;
6799                         ct.cc_flags = CC_DONTBLOCK;
6800 
6801                         cva.va_mask = AT_SIZE;
6802                         cva.va_size = reqsize;
6803                         (void) VOP_SETATTR(vp, &cva, 0, cs->cr, &ct);
6804                         if (in_crit)
6805                                 nbl_end_crit(vp);
6806                 }
6807         }
6808 
6809         error = makefh4(&cs->fh, vp, cs->exi);
6810 
6811         /*
6812          * Force modified data and metadata out to stable storage.
6813          */
6814         (void) VOP_FSYNC(vp, FNODSYNC, cs->cr, NULL);
6815 
6816         if (error) {
6817                 VN_RELE(vp);
6818                 *attrset = 0;
6819                 return (puterrno4(error));
6820         }
6821 
6822         /* if parent dir is attrdir, set namedattr fh flag */
6823         if (dvp->v_flag & V_XATTRDIR)
6824                 set_fh4_flag(&cs->fh, FH4_NAMEDATTR);
6825 
6826         if (cs->vp)
6827                 VN_RELE(cs->vp);
6828 
6829         cs->vp = vp;
6830 
6831         /*
6832          * if we did not create the file, we will need to check
6833          * the access bits on the file
6834          */
6835 
6836         if (!created) {
6837                 if (setsize)
6838                         args->share_access |= OPEN4_SHARE_ACCESS_WRITE;
6839                 status = check_open_access(args->share_access, cs, req);
6840                 if (status != NFS4_OK)
6841                         *attrset = 0;
6842         }
6843         return (status);
6844 }
6845 
6846 /*ARGSUSED*/
6847 static void
6848 rfs4_do_open(struct compound_state *cs, struct svc_req *req,
6849     rfs4_openowner_t *oo, delegreq_t deleg,
6850     uint32_t access, uint32_t deny,
6851     OPEN4res *resp, int deleg_cur)
6852 {
6853         /* XXX Currently not using req  */
6854         rfs4_state_t *sp;
6855         rfs4_file_t *fp;
6856         bool_t screate = TRUE;
6857         bool_t fcreate = TRUE;
6858         uint32_t open_a, share_a;
6859         uint32_t open_d, share_d;
6860         rfs4_deleg_state_t *dsp;
6861         sysid_t sysid;
6862         nfsstat4 status;
6863         caller_context_t ct;
6864         int fflags = 0;
6865         int recall = 0;
6866         int err;
6867         int first_open;
6868 
6869         /* get the file struct and hold a lock on it during initial open */
6870         fp = rfs4_findfile_withlock(cs->vp, &cs->fh, &fcreate);
6871         if (fp == NULL) {
6872                 resp->status = NFS4ERR_RESOURCE;
6873                 DTRACE_PROBE1(nfss__e__do__open1, nfsstat4, resp->status);
6874                 return;
6875         }
6876 
6877         sp = rfs4_findstate_by_owner_file(oo, fp, &screate);
6878         if (sp == NULL) {
6879                 resp->status = NFS4ERR_RESOURCE;
6880                 DTRACE_PROBE1(nfss__e__do__open2, nfsstat4, resp->status);
6881                 /* No need to keep any reference */
6882                 rw_exit(&fp->rf_file_rwlock);
6883                 rfs4_file_rele(fp);
6884                 return;
6885         }
6886 
6887         /* try to get the sysid before continuing */
6888         if ((status = rfs4_client_sysid(oo->ro_client, &sysid)) != NFS4_OK) {
6889                 resp->status = status;
6890                 rfs4_file_rele(fp);
6891                 /* Not a fully formed open; "close" it */
6892                 if (screate == TRUE)
6893                         rfs4_state_close(sp, FALSE, FALSE, cs->cr);
6894                 rfs4_state_rele(sp);
6895                 return;
6896         }
6897 
6898         /* Calculate the fflags for this OPEN. */
6899         if (access & OPEN4_SHARE_ACCESS_READ)
6900                 fflags |= FREAD;
6901         if (access & OPEN4_SHARE_ACCESS_WRITE)
6902                 fflags |= FWRITE;
6903 
6904         rfs4_dbe_lock(sp->rs_dbe);
6905 
6906         /*
6907          * Calculate the new deny and access mode that this open is adding to
6908          * the file for this open owner;
6909          */
6910         open_d = (deny & ~sp->rs_open_deny);
6911         open_a = (access & ~sp->rs_open_access);
6912 
6913         /*
6914          * Calculate the new share access and share deny modes that this open
6915          * is adding to the file for this open owner;
6916          */
6917         share_a = (access & ~sp->rs_share_access);
6918         share_d = (deny & ~sp->rs_share_deny);
6919 
6920         first_open = (sp->rs_open_access & OPEN4_SHARE_ACCESS_BOTH) == 0;
6921 
6922         /*
6923          * Check to see the client has already sent an open for this
6924          * open owner on this file with the same share/deny modes.
6925          * If so, we don't need to check for a conflict and we don't
6926          * need to add another shrlock.  If not, then we need to
6927          * check for conflicts in deny and access before checking for
6928          * conflicts in delegation.  We don't want to recall a
6929          * delegation based on an open that will eventually fail based
6930          * on shares modes.
6931          */
6932 
6933         if (share_a || share_d) {
6934                 if ((err = rfs4_share(sp, access, deny)) != 0) {
6935                         rfs4_dbe_unlock(sp->rs_dbe);
6936                         resp->status = err;
6937 
6938                         rfs4_file_rele(fp);
6939                         /* Not a fully formed open; "close" it */
6940                         if (screate == TRUE)
6941                                 rfs4_state_close(sp, FALSE, FALSE, cs->cr);
6942                         rfs4_state_rele(sp);
6943                         return;
6944                 }
6945         }
6946 
6947         rfs4_dbe_lock(fp->rf_dbe);
6948 
6949         /*
6950          * Check to see if this file is delegated and if so, if a
6951          * recall needs to be done.
6952          */
6953         if (rfs4_check_recall(sp, access)) {
6954                 rfs4_dbe_unlock(fp->rf_dbe);
6955                 rfs4_dbe_unlock(sp->rs_dbe);
6956                 rfs4_recall_deleg(fp, FALSE, sp->rs_owner->ro_client);
6957                 delay(NFS4_DELEGATION_CONFLICT_DELAY);
6958                 rfs4_dbe_lock(sp->rs_dbe);
6959 
6960                 /* if state closed while lock was dropped */
6961                 if (sp->rs_closed) {
6962                         if (share_a || share_d)
6963                                 (void) rfs4_unshare(sp);
6964                         rfs4_dbe_unlock(sp->rs_dbe);
6965                         rfs4_file_rele(fp);
6966                         /* Not a fully formed open; "close" it */
6967                         if (screate == TRUE)
6968                                 rfs4_state_close(sp, FALSE, FALSE, cs->cr);
6969                         rfs4_state_rele(sp);
6970                         resp->status = NFS4ERR_OLD_STATEID;
6971                         return;
6972                 }
6973 
6974                 rfs4_dbe_lock(fp->rf_dbe);
6975                 /* Let's see if the delegation was returned */
6976                 if (rfs4_check_recall(sp, access)) {
6977                         rfs4_dbe_unlock(fp->rf_dbe);
6978                         if (share_a || share_d)
6979                                 (void) rfs4_unshare(sp);
6980                         rfs4_dbe_unlock(sp->rs_dbe);
6981                         rfs4_file_rele(fp);
6982                         rfs4_update_lease(sp->rs_owner->ro_client);
6983 
6984                         /* Not a fully formed open; "close" it */
6985                         if (screate == TRUE)
6986                                 rfs4_state_close(sp, FALSE, FALSE, cs->cr);
6987                         rfs4_state_rele(sp);
6988                         resp->status = NFS4ERR_DELAY;
6989                         return;
6990                 }
6991         }
6992         /*
6993          * the share check passed and any delegation conflict has been
6994          * taken care of, now call vop_open.
6995          * if this is the first open then call vop_open with fflags.
6996          * if not, call vn_open_upgrade with just the upgrade flags.
6997          *
6998          * if the file has been opened already, it will have the current
6999          * access mode in the state struct.  if it has no share access, then
7000          * this is a new open.
7001          *
7002          * However, if this is open with CLAIM_DLEGATE_CUR, then don't
7003          * call VOP_OPEN(), just do the open upgrade.
7004          */
7005         if (first_open && !deleg_cur) {
7006                 ct.cc_sysid = sysid;
7007                 ct.cc_pid = rfs4_dbe_getid(sp->rs_owner->ro_dbe);
7008                 ct.cc_caller_id = nfs4_srv_caller_id;
7009                 ct.cc_flags = CC_DONTBLOCK;
7010                 err = VOP_OPEN(&cs->vp, fflags, cs->cr, &ct);
7011                 if (err) {
7012                         rfs4_dbe_unlock(fp->rf_dbe);
7013                         if (share_a || share_d)
7014                                 (void) rfs4_unshare(sp);
7015                         rfs4_dbe_unlock(sp->rs_dbe);
7016                         rfs4_file_rele(fp);
7017 
7018                         /* Not a fully formed open; "close" it */
7019                         if (screate == TRUE)
7020                                 rfs4_state_close(sp, FALSE, FALSE, cs->cr);
7021                         rfs4_state_rele(sp);
7022                         /* check if a monitor detected a delegation conflict */
7023                         if (err == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK))
7024                                 resp->status = NFS4ERR_DELAY;
7025                         else
7026                                 resp->status = NFS4ERR_SERVERFAULT;
7027                         return;
7028                 }
7029         } else { /* open upgrade */
7030                 /*
7031                  * calculate the fflags for the new mode that is being added
7032                  * by this upgrade.
7033                  */
7034                 fflags = 0;
7035                 if (open_a & OPEN4_SHARE_ACCESS_READ)
7036                         fflags |= FREAD;
7037                 if (open_a & OPEN4_SHARE_ACCESS_WRITE)
7038                         fflags |= FWRITE;
7039                 vn_open_upgrade(cs->vp, fflags);
7040         }
7041         sp->rs_open_access |= access;
7042         sp->rs_open_deny |= deny;
7043 
7044         if (open_d & OPEN4_SHARE_DENY_READ)
7045                 fp->rf_deny_read++;
7046         if (open_d & OPEN4_SHARE_DENY_WRITE)
7047                 fp->rf_deny_write++;
7048         fp->rf_share_deny |= deny;
7049 
7050         if (open_a & OPEN4_SHARE_ACCESS_READ)
7051                 fp->rf_access_read++;
7052         if (open_a & OPEN4_SHARE_ACCESS_WRITE)
7053                 fp->rf_access_write++;
7054         fp->rf_share_access |= access;
7055 
7056         /*
7057          * Check for delegation here. if the deleg argument is not
7058          * DELEG_ANY, then this is a reclaim from a client and
7059          * we must honor the delegation requested. If necessary we can
7060          * set the recall flag.
7061          */
7062 
7063         dsp = rfs4_grant_delegation(deleg, sp, &recall);
7064 
7065         cs->deleg = (fp->rf_dinfo.rd_dtype == OPEN_DELEGATE_WRITE);
7066 
7067         next_stateid(&sp->rs_stateid);
7068 
7069         resp->stateid = sp->rs_stateid.stateid;
7070 
7071         rfs4_dbe_unlock(fp->rf_dbe);
7072         rfs4_dbe_unlock(sp->rs_dbe);
7073 
7074         if (dsp) {
7075                 rfs4_set_deleg_response(dsp, &resp->delegation, NULL, recall);
7076                 rfs4_deleg_state_rele(dsp);
7077         }
7078 
7079         rfs4_file_rele(fp);
7080         rfs4_state_rele(sp);
7081 
7082         resp->status = NFS4_OK;
7083 }
7084 
7085 /*ARGSUSED*/
7086 static void
7087 rfs4_do_opennull(struct compound_state *cs, struct svc_req *req,
7088     OPEN4args *args, rfs4_openowner_t *oo, OPEN4res *resp)
7089 {
7090         change_info4 *cinfo = &resp->cinfo;
7091         bitmap4 *attrset = &resp->attrset;
7092 
7093         if (args->opentype == OPEN4_NOCREATE)
7094                 resp->status = rfs4_lookupfile(&args->open_claim4_u.file,
7095                     req, cs, args->share_access, cinfo);
7096         else {
7097                 /* inhibit delegation grants during exclusive create */
7098 
7099                 if (args->mode == EXCLUSIVE4)
7100                         rfs4_disable_delegation();
7101 
7102                 resp->status = rfs4_createfile(args, req, cs, cinfo, attrset,
7103                     oo->ro_client->rc_clientid);
7104         }
7105 
7106         if (resp->status == NFS4_OK) {
7107 
7108                 /* cs->vp cs->fh now reference the desired file */
7109 
7110                 rfs4_do_open(cs, req, oo,
7111                     oo->ro_need_confirm ? DELEG_NONE : DELEG_ANY,
7112                     args->share_access, args->share_deny, resp, 0);
7113 
7114                 /*
7115                  * If rfs4_createfile set attrset, we must
7116                  * clear this attrset before the response is copied.
7117                  */
7118                 if (resp->status != NFS4_OK && resp->attrset) {
7119                         resp->attrset = 0;
7120                 }
7121         }
7122         else
7123                 *cs->statusp = resp->status;
7124 
7125         if (args->mode == EXCLUSIVE4)
7126                 rfs4_enable_delegation();
7127 }
7128 
7129 /*ARGSUSED*/
7130 static void
7131 rfs4_do_openprev(struct compound_state *cs, struct svc_req *req,
7132     OPEN4args *args, rfs4_openowner_t *oo, OPEN4res *resp)
7133 {
7134         change_info4 *cinfo = &resp->cinfo;
7135         vattr_t va;
7136         vtype_t v_type = cs->vp->v_type;
7137         int error = 0;
7138 
7139         /* Verify that we have a regular file */
7140         if (v_type != VREG) {
7141                 if (v_type == VDIR)
7142                         resp->status = NFS4ERR_ISDIR;
7143                 else if (v_type == VLNK)
7144                         resp->status = NFS4ERR_SYMLINK;
7145                 else
7146                         resp->status = NFS4ERR_INVAL;
7147                 return;
7148         }
7149 
7150         va.va_mask = AT_MODE|AT_UID;
7151         error = VOP_GETATTR(cs->vp, &va, 0, cs->cr, NULL);
7152         if (error) {
7153                 resp->status = puterrno4(error);
7154                 return;
7155         }
7156 
7157         cs->mandlock = MANDLOCK(cs->vp, va.va_mode);
7158 
7159         /*
7160          * Check if we have access to the file, Note the the file
7161          * could have originally been open UNCHECKED or GUARDED
7162          * with mode bits that will now fail, but there is nothing
7163          * we can really do about that except in the case that the
7164          * owner of the file is the one requesting the open.
7165          */
7166         if (crgetuid(cs->cr) != va.va_uid) {
7167                 resp->status = check_open_access(args->share_access, cs, req);
7168                 if (resp->status != NFS4_OK) {
7169                         return;
7170                 }
7171         }
7172 
7173         /*
7174          * cinfo on a CLAIM_PREVIOUS is undefined, initialize to zero
7175          */
7176         cinfo->before = 0;
7177         cinfo->after = 0;
7178         cinfo->atomic = FALSE;
7179 
7180         rfs4_do_open(cs, req, oo,
7181             NFS4_DELEG4TYPE2REQTYPE(args->open_claim4_u.delegate_type),
7182             args->share_access, args->share_deny, resp, 0);
7183 }
7184 
7185 static void
7186 rfs4_do_opendelcur(struct compound_state *cs, struct svc_req *req,
7187     OPEN4args *args, rfs4_openowner_t *oo, OPEN4res *resp)
7188 {
7189         int error;
7190         nfsstat4 status;
7191         stateid4 stateid =
7192             args->open_claim4_u.delegate_cur_info.delegate_stateid;
7193         rfs4_deleg_state_t *dsp;
7194 
7195         /*
7196          * Find the state info from the stateid and confirm that the
7197          * file is delegated.  If the state openowner is the same as
7198          * the supplied openowner we're done. If not, get the file
7199          * info from the found state info. Use that file info to
7200          * create the state for this lock owner. Note solaris doen't
7201          * really need the pathname to find the file. We may want to
7202          * lookup the pathname and make sure that the vp exist and
7203          * matches the vp in the file structure. However it is
7204          * possible that the pathname nolonger exists (local process
7205          * unlinks the file), so this may not be that useful.
7206          */
7207 
7208         status = rfs4_get_deleg_state(&stateid, &dsp);
7209         if (status != NFS4_OK) {
7210                 resp->status = status;
7211                 return;
7212         }
7213 
7214         ASSERT(dsp->rds_finfo->rf_dinfo.rd_dtype != OPEN_DELEGATE_NONE);
7215 
7216         /*
7217          * New lock owner, create state. Since this was probably called
7218          * in response to a CB_RECALL we set deleg to DELEG_NONE
7219          */
7220 
7221         ASSERT(cs->vp != NULL);
7222         VN_RELE(cs->vp);
7223         VN_HOLD(dsp->rds_finfo->rf_vp);
7224         cs->vp = dsp->rds_finfo->rf_vp;
7225 
7226         if (error = makefh4(&cs->fh, cs->vp, cs->exi)) {
7227                 rfs4_deleg_state_rele(dsp);
7228                 *cs->statusp = resp->status = puterrno4(error);
7229                 return;
7230         }
7231 
7232         /* Mark progress for delegation returns */
7233         dsp->rds_finfo->rf_dinfo.rd_time_lastwrite = gethrestime_sec();
7234         rfs4_deleg_state_rele(dsp);
7235         rfs4_do_open(cs, req, oo, DELEG_NONE,
7236             args->share_access, args->share_deny, resp, 1);
7237 }
7238 
7239 /*ARGSUSED*/
7240 static void
7241 rfs4_do_opendelprev(struct compound_state *cs, struct svc_req *req,
7242     OPEN4args *args, rfs4_openowner_t *oo, OPEN4res *resp)
7243 {
7244         /*
7245          * Lookup the pathname, it must already exist since this file
7246          * was delegated.
7247          *
7248          * Find the file and state info for this vp and open owner pair.
7249          *      check that they are in fact delegated.
7250          *      check that the state access and deny modes are the same.
7251          *
7252          * Return the delgation possibly seting the recall flag.
7253          */
7254         rfs4_file_t *fp;
7255         rfs4_state_t *sp;
7256         bool_t create = FALSE;
7257         bool_t dcreate = FALSE;
7258         rfs4_deleg_state_t *dsp;
7259         nfsace4 *ace;
7260 
7261         /* Note we ignore oflags */
7262         resp->status = rfs4_lookupfile(&args->open_claim4_u.file_delegate_prev,
7263             req, cs, args->share_access, &resp->cinfo);
7264 
7265         if (resp->status != NFS4_OK) {
7266                 return;
7267         }
7268 
7269         /* get the file struct and hold a lock on it during initial open */
7270         fp = rfs4_findfile_withlock(cs->vp, NULL, &create);
7271         if (fp == NULL) {
7272                 resp->status = NFS4ERR_RESOURCE;
7273                 DTRACE_PROBE1(nfss__e__do_opendelprev1, nfsstat4, resp->status);
7274                 return;
7275         }
7276 
7277         sp = rfs4_findstate_by_owner_file(oo, fp, &create);
7278         if (sp == NULL) {
7279                 resp->status = NFS4ERR_SERVERFAULT;
7280                 DTRACE_PROBE1(nfss__e__do_opendelprev2, nfsstat4, resp->status);
7281                 rw_exit(&fp->rf_file_rwlock);
7282                 rfs4_file_rele(fp);
7283                 return;
7284         }
7285 
7286         rfs4_dbe_lock(sp->rs_dbe);
7287         rfs4_dbe_lock(fp->rf_dbe);
7288         if (args->share_access != sp->rs_share_access ||
7289             args->share_deny != sp->rs_share_deny ||
7290             sp->rs_finfo->rf_dinfo.rd_dtype == OPEN_DELEGATE_NONE) {
7291                 NFS4_DEBUG(rfs4_debug,
7292                     (CE_NOTE, "rfs4_do_opendelprev: state mixup"));
7293                 rfs4_dbe_unlock(fp->rf_dbe);
7294                 rfs4_dbe_unlock(sp->rs_dbe);
7295                 rfs4_file_rele(fp);
7296                 rfs4_state_rele(sp);
7297                 resp->status = NFS4ERR_SERVERFAULT;
7298                 return;
7299         }
7300         rfs4_dbe_unlock(fp->rf_dbe);
7301         rfs4_dbe_unlock(sp->rs_dbe);
7302 
7303         dsp = rfs4_finddeleg(sp, &dcreate);
7304         if (dsp == NULL) {
7305                 rfs4_state_rele(sp);
7306                 rfs4_file_rele(fp);
7307                 resp->status = NFS4ERR_SERVERFAULT;
7308                 return;
7309         }
7310 
7311         next_stateid(&sp->rs_stateid);
7312 
7313         resp->stateid = sp->rs_stateid.stateid;
7314 
7315         resp->delegation.delegation_type = dsp->rds_dtype;
7316 
7317         if (dsp->rds_dtype == OPEN_DELEGATE_READ) {
7318                 open_read_delegation4 *rv =
7319                     &resp->delegation.open_delegation4_u.read;
7320 
7321                 rv->stateid = dsp->rds_delegid.stateid;
7322                 rv->recall = FALSE; /* no policy in place to set to TRUE */
7323                 ace = &rv->permissions;
7324         } else {
7325                 open_write_delegation4 *rv =
7326                     &resp->delegation.open_delegation4_u.write;
7327 
7328                 rv->stateid = dsp->rds_delegid.stateid;
7329                 rv->recall = FALSE;  /* no policy in place to set to TRUE */
7330                 ace = &rv->permissions;
7331                 rv->space_limit.limitby = NFS_LIMIT_SIZE;
7332                 rv->space_limit.nfs_space_limit4_u.filesize = UINT64_MAX;
7333         }
7334 
7335         /* XXX For now */
7336         ace->type = ACE4_ACCESS_ALLOWED_ACE_TYPE;
7337         ace->flag = 0;
7338         ace->access_mask = 0;
7339         ace->who.utf8string_len = 0;
7340         ace->who.utf8string_val = 0;
7341 
7342         rfs4_deleg_state_rele(dsp);
7343         rfs4_state_rele(sp);
7344         rfs4_file_rele(fp);
7345 }
7346 
7347 typedef enum {
7348         NFS4_CHKSEQ_OKAY = 0,
7349         NFS4_CHKSEQ_REPLAY = 1,
7350         NFS4_CHKSEQ_BAD = 2
7351 } rfs4_chkseq_t;
7352 
7353 /*
7354  * Generic function for sequence number checks.
7355  */
7356 static rfs4_chkseq_t
7357 rfs4_check_seqid(seqid4 seqid, nfs_resop4 *lastop,
7358     seqid4 rqst_seq, nfs_resop4 *resop, bool_t copyres)
7359 {
7360         /* Same sequence ids and matching operations? */
7361         if (seqid == rqst_seq && resop->resop == lastop->resop) {
7362                 if (copyres == TRUE) {
7363                         rfs4_free_reply(resop);
7364                         rfs4_copy_reply(resop, lastop);
7365                 }
7366                 NFS4_DEBUG(rfs4_debug, (CE_NOTE,
7367                     "Replayed SEQID %d\n", seqid));
7368                 return (NFS4_CHKSEQ_REPLAY);
7369         }
7370 
7371         /* If the incoming sequence is not the next expected then it is bad */
7372         if (rqst_seq != seqid + 1) {
7373                 if (rqst_seq == seqid) {
7374                         NFS4_DEBUG(rfs4_debug,
7375                             (CE_NOTE, "BAD SEQID: Replayed sequence id "
7376                             "but last op was %d current op is %d\n",
7377                             lastop->resop, resop->resop));
7378                         return (NFS4_CHKSEQ_BAD);
7379                 }
7380                 NFS4_DEBUG(rfs4_debug,
7381                     (CE_NOTE, "BAD SEQID: got %u expecting %u\n",
7382                     rqst_seq, seqid));
7383                 return (NFS4_CHKSEQ_BAD);
7384         }
7385 
7386         /* Everything okay -- next expected */
7387         return (NFS4_CHKSEQ_OKAY);
7388 }
7389 
7390 
7391 static rfs4_chkseq_t
7392 rfs4_check_open_seqid(seqid4 seqid, rfs4_openowner_t *op, nfs_resop4 *resop)
7393 {
7394         rfs4_chkseq_t rc;
7395 
7396         rfs4_dbe_lock(op->ro_dbe);
7397         rc = rfs4_check_seqid(op->ro_open_seqid, &op->ro_reply, seqid, resop,
7398             TRUE);
7399         rfs4_dbe_unlock(op->ro_dbe);
7400 
7401         if (rc == NFS4_CHKSEQ_OKAY)
7402                 rfs4_update_lease(op->ro_client);
7403 
7404         return (rc);
7405 }
7406 
7407 static rfs4_chkseq_t
7408 rfs4_check_olo_seqid(seqid4 olo_seqid, rfs4_openowner_t *op, nfs_resop4 *resop)
7409 {
7410         rfs4_chkseq_t rc;
7411 
7412         rfs4_dbe_lock(op->ro_dbe);
7413         rc = rfs4_check_seqid(op->ro_open_seqid, &op->ro_reply,
7414             olo_seqid, resop, FALSE);
7415         rfs4_dbe_unlock(op->ro_dbe);
7416 
7417         return (rc);
7418 }
7419 
7420 static rfs4_chkseq_t
7421 rfs4_check_lock_seqid(seqid4 seqid, rfs4_lo_state_t *lsp, nfs_resop4 *resop)
7422 {
7423         rfs4_chkseq_t rc = NFS4_CHKSEQ_OKAY;
7424 
7425         rfs4_dbe_lock(lsp->rls_dbe);
7426         if (!lsp->rls_skip_seqid_check)
7427                 rc = rfs4_check_seqid(lsp->rls_seqid, &lsp->rls_reply, seqid,
7428                     resop, TRUE);
7429         rfs4_dbe_unlock(lsp->rls_dbe);
7430 
7431         return (rc);
7432 }
7433 
7434 static void
7435 rfs4_op_open(nfs_argop4 *argop, nfs_resop4 *resop,
7436     struct svc_req *req, struct compound_state *cs)
7437 {
7438         OPEN4args *args = &argop->nfs_argop4_u.opopen;
7439         OPEN4res *resp = &resop->nfs_resop4_u.opopen;
7440         open_owner4 *owner = &args->owner;
7441         open_claim_type4 claim = args->claim;
7442         rfs4_client_t *cp;
7443         rfs4_openowner_t *oo;
7444         bool_t create;
7445         bool_t replay = FALSE;
7446         int can_reclaim;
7447 
7448         DTRACE_NFSV4_2(op__open__start, struct compound_state *, cs,
7449             OPEN4args *, args);
7450 
7451         if (cs->vp == NULL) {
7452                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
7453                 goto end;
7454         }
7455 
7456         /*
7457          * Need to check clientid and lease expiration first based on
7458          * error ordering and incrementing sequence id.
7459          */
7460         cp = rfs4_findclient_by_id(owner->clientid, FALSE);
7461         if (cp == NULL) {
7462                 *cs->statusp = resp->status =
7463                     rfs4_check_clientid(&owner->clientid, 0);
7464                 goto end;
7465         }
7466 
7467         if (rfs4_lease_expired(cp)) {
7468                 rfs4_client_close(cp);
7469                 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
7470                 goto end;
7471         }
7472         can_reclaim = cp->rc_can_reclaim;
7473 
7474         /*
7475          * Find the open_owner for use from this point forward.  Take
7476          * care in updating the sequence id based on the type of error
7477          * being returned.
7478          */
7479 retry:
7480         create = TRUE;
7481         oo = rfs4_findopenowner(owner, &create, args->seqid);
7482         if (oo == NULL) {
7483                 *cs->statusp = resp->status = NFS4ERR_RESOURCE;
7484                 rfs4_client_rele(cp);
7485                 goto end;
7486         }
7487 
7488         /* Hold off access to the sequence space while the open is done */
7489         rfs4_sw_enter(&oo->ro_sw);
7490 
7491         /*
7492          * If the open_owner existed before at the server, then check
7493          * the sequence id.
7494          */
7495         if (!create && !oo->ro_postpone_confirm) {
7496                 switch (rfs4_check_open_seqid(args->seqid, oo, resop)) {
7497                 case NFS4_CHKSEQ_BAD:
7498                         if ((args->seqid > oo->ro_open_seqid) &&
7499                             oo->ro_need_confirm) {
7500                                 rfs4_free_opens(oo, TRUE, FALSE);
7501                                 rfs4_sw_exit(&oo->ro_sw);
7502                                 rfs4_openowner_rele(oo);
7503                                 goto retry;
7504                         }
7505                         resp->status = NFS4ERR_BAD_SEQID;
7506                         goto out;
7507                 case NFS4_CHKSEQ_REPLAY: /* replay of previous request */
7508                         replay = TRUE;
7509                         goto out;
7510                 default:
7511                         break;
7512                 }
7513 
7514                 /*
7515                  * Sequence was ok and open owner exists
7516                  * check to see if we have yet to see an
7517                  * open_confirm.
7518                  */
7519                 if (oo->ro_need_confirm) {
7520                         rfs4_free_opens(oo, TRUE, FALSE);
7521                         rfs4_sw_exit(&oo->ro_sw);
7522                         rfs4_openowner_rele(oo);
7523                         goto retry;
7524                 }
7525         }
7526         /* Grace only applies to regular-type OPENs */
7527         if (rfs4_clnt_in_grace(cp) &&
7528             (claim == CLAIM_NULL || claim == CLAIM_DELEGATE_CUR)) {
7529                 *cs->statusp = resp->status = NFS4ERR_GRACE;
7530                 goto out;
7531         }
7532 
7533         /*
7534          * If previous state at the server existed then can_reclaim
7535          * will be set. If not reply NFS4ERR_NO_GRACE to the
7536          * client.
7537          */
7538         if (rfs4_clnt_in_grace(cp) && claim == CLAIM_PREVIOUS && !can_reclaim) {
7539                 *cs->statusp = resp->status = NFS4ERR_NO_GRACE;
7540                 goto out;
7541         }
7542 
7543 
7544         /*
7545          * Reject the open if the client has missed the grace period
7546          */
7547         if (!rfs4_clnt_in_grace(cp) && claim == CLAIM_PREVIOUS) {
7548                 *cs->statusp = resp->status = NFS4ERR_NO_GRACE;
7549                 goto out;
7550         }
7551 
7552         /* Couple of up-front bookkeeping items */
7553         if (oo->ro_need_confirm) {
7554                 /*
7555                  * If this is a reclaim OPEN then we should not ask
7556                  * for a confirmation of the open_owner per the
7557                  * protocol specification.
7558                  */
7559                 if (claim == CLAIM_PREVIOUS)
7560                         oo->ro_need_confirm = FALSE;
7561                 else
7562                         resp->rflags |= OPEN4_RESULT_CONFIRM;
7563         }
7564         resp->rflags |= OPEN4_RESULT_LOCKTYPE_POSIX;
7565 
7566         /*
7567          * If there is an unshared filesystem mounted on this vnode,
7568          * do not allow to open/create in this directory.
7569          */
7570         if (vn_ismntpt(cs->vp)) {
7571                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
7572                 goto out;
7573         }
7574 
7575         /*
7576          * access must READ, WRITE, or BOTH.  No access is invalid.
7577          * deny can be READ, WRITE, BOTH, or NONE.
7578          * bits not defined for access/deny are invalid.
7579          */
7580         if (! (args->share_access & OPEN4_SHARE_ACCESS_BOTH) ||
7581             (args->share_access & ~OPEN4_SHARE_ACCESS_BOTH) ||
7582             (args->share_deny & ~OPEN4_SHARE_DENY_BOTH)) {
7583                 *cs->statusp = resp->status = NFS4ERR_INVAL;
7584                 goto out;
7585         }
7586 
7587 
7588         /*
7589          * make sure attrset is zero before response is built.
7590          */
7591         resp->attrset = 0;
7592 
7593         switch (claim) {
7594         case CLAIM_NULL:
7595                 rfs4_do_opennull(cs, req, args, oo, resp);
7596                 break;
7597         case CLAIM_PREVIOUS:
7598                 rfs4_do_openprev(cs, req, args, oo, resp);
7599                 break;
7600         case CLAIM_DELEGATE_CUR:
7601                 rfs4_do_opendelcur(cs, req, args, oo, resp);
7602                 break;
7603         case CLAIM_DELEGATE_PREV:
7604                 rfs4_do_opendelprev(cs, req, args, oo, resp);
7605                 break;
7606         default:
7607                 resp->status = NFS4ERR_INVAL;
7608                 break;
7609         }
7610 
7611 out:
7612         rfs4_client_rele(cp);
7613 
7614         /* Catch sequence id handling here to make it a little easier */
7615         switch (resp->status) {
7616         case NFS4ERR_BADXDR:
7617         case NFS4ERR_BAD_SEQID:
7618         case NFS4ERR_BAD_STATEID:
7619         case NFS4ERR_NOFILEHANDLE:
7620         case NFS4ERR_RESOURCE:
7621         case NFS4ERR_STALE_CLIENTID:
7622         case NFS4ERR_STALE_STATEID:
7623                 /*
7624                  * The protocol states that if any of these errors are
7625                  * being returned, the sequence id should not be
7626                  * incremented.  Any other return requires an
7627                  * increment.
7628                  */
7629                 break;
7630         default:
7631                 /* Always update the lease in this case */
7632                 rfs4_update_lease(oo->ro_client);
7633 
7634                 /* Regular response - copy the result */
7635                 if (!replay)
7636                         rfs4_update_open_resp(oo, resop, &cs->fh);
7637 
7638                 /*
7639                  * REPLAY case: Only if the previous response was OK
7640                  * do we copy the filehandle.  If not OK, no
7641                  * filehandle to copy.
7642                  */
7643                 if (replay == TRUE &&
7644                     resp->status == NFS4_OK &&
7645                     oo->ro_reply_fh.nfs_fh4_val) {
7646                         /*
7647                          * If this is a replay, we must restore the
7648                          * current filehandle/vp to that of what was
7649                          * returned originally.  Try our best to do
7650                          * it.
7651                          */
7652                         nfs_fh4_fmt_t *fh_fmtp =
7653                             (nfs_fh4_fmt_t *)oo->ro_reply_fh.nfs_fh4_val;
7654 
7655                         cs->exi = checkexport4(&fh_fmtp->fh4_fsid,
7656                             (fid_t *)&fh_fmtp->fh4_xlen, NULL);
7657 
7658                         if (cs->exi == NULL) {
7659                                 resp->status = NFS4ERR_STALE;
7660                                 goto finish;
7661                         }
7662 
7663                         VN_RELE(cs->vp);
7664 
7665                         cs->vp = nfs4_fhtovp(&oo->ro_reply_fh, cs->exi,
7666                             &resp->status);
7667 
7668                         if (cs->vp == NULL)
7669                                 goto finish;
7670 
7671                         nfs_fh4_copy(&oo->ro_reply_fh, &cs->fh);
7672                 }
7673 
7674                 /*
7675                  * If this was a replay, no need to update the
7676                  * sequence id. If the open_owner was not created on
7677                  * this pass, then update.  The first use of an
7678                  * open_owner will not bump the sequence id.
7679                  */
7680                 if (replay == FALSE && !create)
7681                         rfs4_update_open_sequence(oo);
7682                 /*
7683                  * If the client is receiving an error and the
7684                  * open_owner needs to be confirmed, there is no way
7685                  * to notify the client of this fact ignoring the fact
7686                  * that the server has no method of returning a
7687                  * stateid to confirm.  Therefore, the server needs to
7688                  * mark this open_owner in a way as to avoid the
7689                  * sequence id checking the next time the client uses
7690                  * this open_owner.
7691                  */
7692                 if (resp->status != NFS4_OK && oo->ro_need_confirm)
7693                         oo->ro_postpone_confirm = TRUE;
7694                 /*
7695                  * If OK response then clear the postpone flag and
7696                  * reset the sequence id to keep in sync with the
7697                  * client.
7698                  */
7699                 if (resp->status == NFS4_OK && oo->ro_postpone_confirm) {
7700                         oo->ro_postpone_confirm = FALSE;
7701                         oo->ro_open_seqid = args->seqid;
7702                 }
7703                 break;
7704         }
7705 
7706 finish:
7707         *cs->statusp = resp->status;
7708 
7709         rfs4_sw_exit(&oo->ro_sw);
7710         rfs4_openowner_rele(oo);
7711 
7712 end:
7713         DTRACE_NFSV4_2(op__open__done, struct compound_state *, cs,
7714             OPEN4res *, resp);
7715 }
7716 
7717 /*ARGSUSED*/
7718 void
7719 rfs4_op_open_confirm(nfs_argop4 *argop, nfs_resop4 *resop,
7720     struct svc_req *req, struct compound_state *cs)
7721 {
7722         OPEN_CONFIRM4args *args = &argop->nfs_argop4_u.opopen_confirm;
7723         OPEN_CONFIRM4res *resp = &resop->nfs_resop4_u.opopen_confirm;
7724         rfs4_state_t *sp;
7725         nfsstat4 status;
7726 
7727         DTRACE_NFSV4_2(op__open__confirm__start, struct compound_state *, cs,
7728             OPEN_CONFIRM4args *, args);
7729 
7730         if (cs->vp == NULL) {
7731                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
7732                 goto out;
7733         }
7734 
7735         if (cs->vp->v_type != VREG) {
7736                 *cs->statusp = resp->status =
7737                     cs->vp->v_type == VDIR ? NFS4ERR_ISDIR : NFS4ERR_INVAL;
7738                 return;
7739         }
7740 
7741         status = rfs4_get_state(&args->open_stateid, &sp, RFS4_DBS_VALID);
7742         if (status != NFS4_OK) {
7743                 *cs->statusp = resp->status = status;
7744                 goto out;
7745         }
7746 
7747         /* Ensure specified filehandle matches */
7748         if (cs->vp != sp->rs_finfo->rf_vp) {
7749                 rfs4_state_rele(sp);
7750                 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7751                 goto out;
7752         }
7753 
7754         /* hold off other access to open_owner while we tinker */
7755         rfs4_sw_enter(&sp->rs_owner->ro_sw);
7756 
7757         switch (rfs4_check_stateid_seqid(sp, &args->open_stateid)) {
7758         case NFS4_CHECK_STATEID_OKAY:
7759                 if (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
7760                     resop) != 0) {
7761                         *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
7762                         break;
7763                 }
7764                 /*
7765                  * If it is the appropriate stateid and determined to
7766                  * be "OKAY" then this means that the stateid does not
7767                  * need to be confirmed and the client is in error for
7768                  * sending an OPEN_CONFIRM.
7769                  */
7770                 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7771                 break;
7772         case NFS4_CHECK_STATEID_OLD:
7773                 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
7774                 break;
7775         case NFS4_CHECK_STATEID_BAD:
7776                 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7777                 break;
7778         case NFS4_CHECK_STATEID_EXPIRED:
7779                 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
7780                 break;
7781         case NFS4_CHECK_STATEID_CLOSED:
7782                 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
7783                 break;
7784         case NFS4_CHECK_STATEID_REPLAY:
7785                 switch (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
7786                     resop)) {
7787                 case NFS4_CHKSEQ_OKAY:
7788                         /*
7789                          * This is replayed stateid; if seqid matches
7790                          * next expected, then client is using wrong seqid.
7791                          */
7792                         /* fall through */
7793                 case NFS4_CHKSEQ_BAD:
7794                         *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
7795                         break;
7796                 case NFS4_CHKSEQ_REPLAY:
7797                         /*
7798                          * Note this case is the duplicate case so
7799                          * resp->status is already set.
7800                          */
7801                         *cs->statusp = resp->status;
7802                         rfs4_update_lease(sp->rs_owner->ro_client);
7803                         break;
7804                 }
7805                 break;
7806         case NFS4_CHECK_STATEID_UNCONFIRMED:
7807                 if (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
7808                     resop) != NFS4_CHKSEQ_OKAY) {
7809                         *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
7810                         break;
7811                 }
7812                 *cs->statusp = resp->status = NFS4_OK;
7813 
7814                 next_stateid(&sp->rs_stateid);
7815                 resp->open_stateid = sp->rs_stateid.stateid;
7816                 sp->rs_owner->ro_need_confirm = FALSE;
7817                 rfs4_update_lease(sp->rs_owner->ro_client);
7818                 rfs4_update_open_sequence(sp->rs_owner);
7819                 rfs4_update_open_resp(sp->rs_owner, resop, NULL);
7820                 break;
7821         default:
7822                 ASSERT(FALSE);
7823                 *cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
7824                 break;
7825         }
7826         rfs4_sw_exit(&sp->rs_owner->ro_sw);
7827         rfs4_state_rele(sp);
7828 
7829 out:
7830         DTRACE_NFSV4_2(op__open__confirm__done, struct compound_state *, cs,
7831             OPEN_CONFIRM4res *, resp);
7832 }
7833 
7834 /*ARGSUSED*/
7835 void
7836 rfs4_op_open_downgrade(nfs_argop4 *argop, nfs_resop4 *resop,
7837     struct svc_req *req, struct compound_state *cs)
7838 {
7839         OPEN_DOWNGRADE4args *args = &argop->nfs_argop4_u.opopen_downgrade;
7840         OPEN_DOWNGRADE4res *resp = &resop->nfs_resop4_u.opopen_downgrade;
7841         uint32_t access = args->share_access;
7842         uint32_t deny = args->share_deny;
7843         nfsstat4 status;
7844         rfs4_state_t *sp;
7845         rfs4_file_t *fp;
7846         int fflags = 0;
7847 
7848         DTRACE_NFSV4_2(op__open__downgrade__start, struct compound_state *, cs,
7849             OPEN_DOWNGRADE4args *, args);
7850 
7851         if (cs->vp == NULL) {
7852                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
7853                 goto out;
7854         }
7855 
7856         if (cs->vp->v_type != VREG) {
7857                 *cs->statusp = resp->status = NFS4ERR_INVAL;
7858                 return;
7859         }
7860 
7861         status = rfs4_get_state(&args->open_stateid, &sp, RFS4_DBS_VALID);
7862         if (status != NFS4_OK) {
7863                 *cs->statusp = resp->status = status;
7864                 goto out;
7865         }
7866 
7867         /* Ensure specified filehandle matches */
7868         if (cs->vp != sp->rs_finfo->rf_vp) {
7869                 rfs4_state_rele(sp);
7870                 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7871                 goto out;
7872         }
7873 
7874         /* hold off other access to open_owner while we tinker */
7875         rfs4_sw_enter(&sp->rs_owner->ro_sw);
7876 
7877         switch (rfs4_check_stateid_seqid(sp, &args->open_stateid)) {
7878         case NFS4_CHECK_STATEID_OKAY:
7879                 if (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
7880                     resop) != NFS4_CHKSEQ_OKAY) {
7881                         *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
7882                         goto end;
7883                 }
7884                 break;
7885         case NFS4_CHECK_STATEID_OLD:
7886                 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
7887                 goto end;
7888         case NFS4_CHECK_STATEID_BAD:
7889                 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7890                 goto end;
7891         case NFS4_CHECK_STATEID_EXPIRED:
7892                 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
7893                 goto end;
7894         case NFS4_CHECK_STATEID_CLOSED:
7895                 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
7896                 goto end;
7897         case NFS4_CHECK_STATEID_UNCONFIRMED:
7898                 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7899                 goto end;
7900         case NFS4_CHECK_STATEID_REPLAY:
7901                 /* Check the sequence id for the open owner */
7902                 switch (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
7903                     resop)) {
7904                 case NFS4_CHKSEQ_OKAY:
7905                         /*
7906                          * This is replayed stateid; if seqid matches
7907                          * next expected, then client is using wrong seqid.
7908                          */
7909                         /* fall through */
7910                 case NFS4_CHKSEQ_BAD:
7911                         *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
7912                         goto end;
7913                 case NFS4_CHKSEQ_REPLAY:
7914                         /*
7915                          * Note this case is the duplicate case so
7916                          * resp->status is already set.
7917                          */
7918                         *cs->statusp = resp->status;
7919                         rfs4_update_lease(sp->rs_owner->ro_client);
7920                         goto end;
7921                 }
7922                 break;
7923         default:
7924                 ASSERT(FALSE);
7925                 break;
7926         }
7927 
7928         rfs4_dbe_lock(sp->rs_dbe);
7929         /*
7930          * Check that the new access modes and deny modes are valid.
7931          * Check that no invalid bits are set.
7932          */
7933         if ((access & ~(OPEN4_SHARE_ACCESS_READ | OPEN4_SHARE_ACCESS_WRITE)) ||
7934             (deny & ~(OPEN4_SHARE_DENY_READ | OPEN4_SHARE_DENY_WRITE))) {
7935                 *cs->statusp = resp->status = NFS4ERR_INVAL;
7936                 rfs4_update_open_sequence(sp->rs_owner);
7937                 rfs4_dbe_unlock(sp->rs_dbe);
7938                 goto end;
7939         }
7940 
7941         /*
7942          * The new modes must be a subset of the current modes and
7943          * the access must specify at least one mode. To test that
7944          * the new mode is a subset of the current modes we bitwise
7945          * AND them together and check that the result equals the new
7946          * mode. For example:
7947          * New mode, access == R and current mode, sp->rs_open_access  == RW
7948          * access & sp->rs_open_access == R == access, so the new access mode
7949          * is valid. Consider access == RW, sp->rs_open_access = R
7950          * access & sp->rs_open_access == R != access, so the new access mode
7951          * is invalid.
7952          */
7953         if ((access & sp->rs_open_access) != access ||
7954             (deny & sp->rs_open_deny) != deny ||
7955             (access &
7956             (OPEN4_SHARE_ACCESS_READ | OPEN4_SHARE_ACCESS_WRITE)) == 0) {
7957                 *cs->statusp = resp->status = NFS4ERR_INVAL;
7958                 rfs4_update_open_sequence(sp->rs_owner);
7959                 rfs4_dbe_unlock(sp->rs_dbe);
7960                 goto end;
7961         }
7962 
7963         /*
7964          * Release any share locks associated with this stateID.
7965          * Strictly speaking, this violates the spec because the
7966          * spec effectively requires that open downgrade be atomic.
7967          * At present, fs_shrlock does not have this capability.
7968          */
7969         (void) rfs4_unshare(sp);
7970 
7971         status = rfs4_share(sp, access, deny);
7972         if (status != NFS4_OK) {
7973                 *cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
7974                 rfs4_update_open_sequence(sp->rs_owner);
7975                 rfs4_dbe_unlock(sp->rs_dbe);
7976                 goto end;
7977         }
7978 
7979         fp = sp->rs_finfo;
7980         rfs4_dbe_lock(fp->rf_dbe);
7981 
7982         /*
7983          * If the current mode has deny read and the new mode
7984          * does not, decrement the number of deny read mode bits
7985          * and if it goes to zero turn off the deny read bit
7986          * on the file.
7987          */
7988         if ((sp->rs_open_deny & OPEN4_SHARE_DENY_READ) &&
7989             (deny & OPEN4_SHARE_DENY_READ) == 0) {
7990                 fp->rf_deny_read--;
7991                 if (fp->rf_deny_read == 0)
7992                         fp->rf_share_deny &= ~OPEN4_SHARE_DENY_READ;
7993         }
7994 
7995         /*
7996          * If the current mode has deny write and the new mode
7997          * does not, decrement the number of deny write mode bits
7998          * and if it goes to zero turn off the deny write bit
7999          * on the file.
8000          */
8001         if ((sp->rs_open_deny & OPEN4_SHARE_DENY_WRITE) &&
8002             (deny & OPEN4_SHARE_DENY_WRITE) == 0) {
8003                 fp->rf_deny_write--;
8004                 if (fp->rf_deny_write == 0)
8005                         fp->rf_share_deny &= ~OPEN4_SHARE_DENY_WRITE;
8006         }
8007 
8008         /*
8009          * If the current mode has access read and the new mode
8010          * does not, decrement the number of access read mode bits
8011          * and if it goes to zero turn off the access read bit
8012          * on the file.  set fflags to FREAD for the call to
8013          * vn_open_downgrade().
8014          */
8015         if ((sp->rs_open_access & OPEN4_SHARE_ACCESS_READ) &&
8016             (access & OPEN4_SHARE_ACCESS_READ) == 0) {
8017                 fp->rf_access_read--;
8018                 if (fp->rf_access_read == 0)
8019                         fp->rf_share_access &= ~OPEN4_SHARE_ACCESS_READ;
8020                 fflags |= FREAD;
8021         }
8022 
8023         /*
8024          * If the current mode has access write and the new mode
8025          * does not, decrement the number of access write mode bits
8026          * and if it goes to zero turn off the access write bit
8027          * on the file.  set fflags to FWRITE for the call to
8028          * vn_open_downgrade().
8029          */
8030         if ((sp->rs_open_access & OPEN4_SHARE_ACCESS_WRITE) &&
8031             (access & OPEN4_SHARE_ACCESS_WRITE) == 0) {
8032                 fp->rf_access_write--;
8033                 if (fp->rf_access_write == 0)
8034                         fp->rf_share_deny &= ~OPEN4_SHARE_ACCESS_WRITE;
8035                 fflags |= FWRITE;
8036         }
8037 
8038         /* Check that the file is still accessible */
8039         ASSERT(fp->rf_share_access);
8040 
8041         rfs4_dbe_unlock(fp->rf_dbe);
8042 
8043         /* now set the new open access and deny modes */
8044         sp->rs_open_access = access;
8045         sp->rs_open_deny = deny;
8046 
8047         /*
8048          * we successfully downgraded the share lock, now we need to downgrade
8049          * the open. it is possible that the downgrade was only for a deny
8050          * mode and we have nothing else to do.
8051          */
8052         if ((fflags & (FREAD|FWRITE)) != 0)
8053                 vn_open_downgrade(cs->vp, fflags);
8054 
8055         /* Update the stateid */
8056         next_stateid(&sp->rs_stateid);
8057         resp->open_stateid = sp->rs_stateid.stateid;
8058 
8059         rfs4_dbe_unlock(sp->rs_dbe);
8060 
8061         *cs->statusp = resp->status = NFS4_OK;
8062         /* Update the lease */
8063         rfs4_update_lease(sp->rs_owner->ro_client);
8064         /* And the sequence */
8065         rfs4_update_open_sequence(sp->rs_owner);
8066         rfs4_update_open_resp(sp->rs_owner, resop, NULL);
8067 
8068 end:
8069         rfs4_sw_exit(&sp->rs_owner->ro_sw);
8070         rfs4_state_rele(sp);
8071 out:
8072         DTRACE_NFSV4_2(op__open__downgrade__done, struct compound_state *, cs,
8073             OPEN_DOWNGRADE4res *, resp);
8074 }
8075 
8076 static void *
8077 memstr(const void *s1, const char *s2, size_t n)
8078 {
8079         size_t l = strlen(s2);
8080         char *p = (char *)s1;
8081 
8082         while (n >= l) {
8083                 if (bcmp(p, s2, l) == 0)
8084                         return (p);
8085                 p++;
8086                 n--;
8087         }
8088 
8089         return (NULL);
8090 }
8091 
8092 /*
8093  * The logic behind this function is detailed in the NFSv4 RFC in the
8094  * SETCLIENTID operation description under IMPLEMENTATION.  Refer to
8095  * that section for explicit guidance to server behavior for
8096  * SETCLIENTID.
8097  */
8098 void
8099 rfs4_op_setclientid(nfs_argop4 *argop, nfs_resop4 *resop,
8100     struct svc_req *req, struct compound_state *cs)
8101 {
8102         SETCLIENTID4args *args = &argop->nfs_argop4_u.opsetclientid;
8103         SETCLIENTID4res *res = &resop->nfs_resop4_u.opsetclientid;
8104         rfs4_client_t *cp, *newcp, *cp_confirmed, *cp_unconfirmed;
8105         rfs4_clntip_t *ci;
8106         bool_t create;
8107         char *addr, *netid;
8108         int len;
8109 
8110         DTRACE_NFSV4_2(op__setclientid__start, struct compound_state *, cs,
8111             SETCLIENTID4args *, args);
8112 retry:
8113         newcp = cp_confirmed = cp_unconfirmed = NULL;
8114 
8115         /*
8116          * Save the caller's IP address
8117          */
8118         args->client.cl_addr =
8119             (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
8120 
8121         /*
8122          * Record if it is a Solaris client that cannot handle referrals.
8123          */
8124         if (memstr(args->client.id_val, "Solaris", args->client.id_len) &&
8125             !memstr(args->client.id_val, "+referrals", args->client.id_len)) {
8126                 /* Add a "yes, it's downrev" record */
8127                 create = TRUE;
8128                 ci = rfs4_find_clntip(args->client.cl_addr, &create);
8129                 ASSERT(ci != NULL);
8130                 rfs4_dbe_rele(ci->ri_dbe);
8131         } else {
8132                 /* Remove any previous record */
8133                 rfs4_invalidate_clntip(args->client.cl_addr);
8134         }
8135 
8136         /*
8137          * In search of an EXISTING client matching the incoming
8138          * request to establish a new client identifier at the server
8139          */
8140         create = TRUE;
8141         cp = rfs4_findclient(&args->client, &create, NULL);
8142 
8143         /* Should never happen */
8144         ASSERT(cp != NULL);
8145 
8146         if (cp == NULL) {
8147                 *cs->statusp = res->status = NFS4ERR_SERVERFAULT;
8148                 goto out;
8149         }
8150 
8151         /*
8152          * Easiest case. Client identifier is newly created and is
8153          * unconfirmed.  Also note that for this case, no other
8154          * entries exist for the client identifier.  Nothing else to
8155          * check.  Just setup the response and respond.
8156          */
8157         if (create) {
8158                 *cs->statusp = res->status = NFS4_OK;
8159                 res->SETCLIENTID4res_u.resok4.clientid = cp->rc_clientid;
8160                 res->SETCLIENTID4res_u.resok4.setclientid_confirm =
8161                     cp->rc_confirm_verf;
8162                 /* Setup callback information; CB_NULL confirmation later */
8163                 rfs4_client_setcb(cp, &args->callback, args->callback_ident);
8164 
8165                 rfs4_client_rele(cp);
8166                 goto out;
8167         }
8168 
8169         /*
8170          * An existing, confirmed client may exist but it may not have
8171          * been active for at least one lease period.  If so, then
8172          * "close" the client and create a new client identifier
8173          */
8174         if (rfs4_lease_expired(cp)) {
8175                 rfs4_client_close(cp);
8176                 goto retry;
8177         }
8178 
8179         if (cp->rc_need_confirm == TRUE)
8180                 cp_unconfirmed = cp;
8181         else
8182                 cp_confirmed = cp;
8183 
8184         cp = NULL;
8185 
8186         /*
8187          * We have a confirmed client, now check for an
8188          * unconfimred entry
8189          */
8190         if (cp_confirmed) {
8191                 /* If creds don't match then client identifier is inuse */
8192                 if (!creds_ok(cp_confirmed->rc_cr_set, req, cs)) {
8193                         rfs4_cbinfo_t *cbp;
8194                         /*
8195                          * Some one else has established this client
8196                          * id. Try and say * who they are. We will use
8197                          * the call back address supplied by * the
8198                          * first client.
8199                          */
8200                         *cs->statusp = res->status = NFS4ERR_CLID_INUSE;
8201 
8202                         addr = netid = NULL;
8203 
8204                         cbp = &cp_confirmed->rc_cbinfo;
8205                         if (cbp->cb_callback.cb_location.r_addr &&
8206                             cbp->cb_callback.cb_location.r_netid) {
8207                                 cb_client4 *cbcp = &cbp->cb_callback;
8208 
8209                                 len = strlen(cbcp->cb_location.r_addr)+1;
8210                                 addr = kmem_alloc(len, KM_SLEEP);
8211                                 bcopy(cbcp->cb_location.r_addr, addr, len);
8212                                 len = strlen(cbcp->cb_location.r_netid)+1;
8213                                 netid = kmem_alloc(len, KM_SLEEP);
8214                                 bcopy(cbcp->cb_location.r_netid, netid, len);
8215                         }
8216 
8217                         res->SETCLIENTID4res_u.client_using.r_addr = addr;
8218                         res->SETCLIENTID4res_u.client_using.r_netid = netid;
8219 
8220                         rfs4_client_rele(cp_confirmed);
8221                 }
8222 
8223                 /*
8224                  * Confirmed, creds match, and verifier matches; must
8225                  * be an update of the callback info
8226                  */
8227                 if (cp_confirmed->rc_nfs_client.verifier ==
8228                     args->client.verifier) {
8229                         /* Setup callback information */
8230                         rfs4_client_setcb(cp_confirmed, &args->callback,
8231                             args->callback_ident);
8232 
8233                         /* everything okay -- move ahead */
8234                         *cs->statusp = res->status = NFS4_OK;
8235                         res->SETCLIENTID4res_u.resok4.clientid =
8236                             cp_confirmed->rc_clientid;
8237 
8238                         /* update the confirm_verifier and return it */
8239                         rfs4_client_scv_next(cp_confirmed);
8240                         res->SETCLIENTID4res_u.resok4.setclientid_confirm =
8241                             cp_confirmed->rc_confirm_verf;
8242 
8243                         rfs4_client_rele(cp_confirmed);
8244                         goto out;
8245                 }
8246 
8247                 /*
8248                  * Creds match but the verifier doesn't.  Must search
8249                  * for an unconfirmed client that would be replaced by
8250                  * this request.
8251                  */
8252                 create = FALSE;
8253                 cp_unconfirmed = rfs4_findclient(&args->client, &create,
8254                     cp_confirmed);
8255         }
8256 
8257         /*
8258          * At this point, we have taken care of the brand new client
8259          * struct, INUSE case, update of an existing, and confirmed
8260          * client struct.
8261          */
8262 
8263         /*
8264          * check to see if things have changed while we originally
8265          * picked up the client struct.  If they have, then return and
8266          * retry the processing of this SETCLIENTID request.
8267          */
8268         if (cp_unconfirmed) {
8269                 rfs4_dbe_lock(cp_unconfirmed->rc_dbe);
8270                 if (!cp_unconfirmed->rc_need_confirm) {
8271                         rfs4_dbe_unlock(cp_unconfirmed->rc_dbe);
8272                         rfs4_client_rele(cp_unconfirmed);
8273                         if (cp_confirmed)
8274                                 rfs4_client_rele(cp_confirmed);
8275                         goto retry;
8276                 }
8277                 /* do away with the old unconfirmed one */
8278                 rfs4_dbe_invalidate(cp_unconfirmed->rc_dbe);
8279                 rfs4_dbe_unlock(cp_unconfirmed->rc_dbe);
8280                 rfs4_client_rele(cp_unconfirmed);
8281                 cp_unconfirmed = NULL;
8282         }
8283 
8284         /*
8285          * This search will temporarily hide the confirmed client
8286          * struct while a new client struct is created as the
8287          * unconfirmed one.
8288          */
8289         create = TRUE;
8290         newcp = rfs4_findclient(&args->client, &create, cp_confirmed);
8291 
8292         ASSERT(newcp != NULL);
8293 
8294         if (newcp == NULL) {
8295                 *cs->statusp = res->status = NFS4ERR_SERVERFAULT;
8296                 rfs4_client_rele(cp_confirmed);
8297                 goto out;
8298         }
8299 
8300         /*
8301          * If one was not created, then a similar request must be in
8302          * process so release and start over with this one
8303          */
8304         if (create != TRUE) {
8305                 rfs4_client_rele(newcp);
8306                 if (cp_confirmed)
8307                         rfs4_client_rele(cp_confirmed);
8308                 goto retry;
8309         }
8310 
8311         *cs->statusp = res->status = NFS4_OK;
8312         res->SETCLIENTID4res_u.resok4.clientid = newcp->rc_clientid;
8313         res->SETCLIENTID4res_u.resok4.setclientid_confirm =
8314             newcp->rc_confirm_verf;
8315         /* Setup callback information; CB_NULL confirmation later */
8316         rfs4_client_setcb(newcp, &args->callback, args->callback_ident);
8317 
8318         newcp->rc_cp_confirmed = cp_confirmed;
8319 
8320         rfs4_client_rele(newcp);
8321 
8322 out:
8323         DTRACE_NFSV4_2(op__setclientid__done, struct compound_state *, cs,
8324             SETCLIENTID4res *, res);
8325 }
8326 
8327 /*ARGSUSED*/
8328 void
8329 rfs4_op_setclientid_confirm(nfs_argop4 *argop, nfs_resop4 *resop,
8330     struct svc_req *req, struct compound_state *cs)
8331 {
8332         SETCLIENTID_CONFIRM4args *args =
8333             &argop->nfs_argop4_u.opsetclientid_confirm;
8334         SETCLIENTID_CONFIRM4res *res =
8335             &resop->nfs_resop4_u.opsetclientid_confirm;
8336         rfs4_client_t *cp, *cptoclose = NULL;
8337 
8338         DTRACE_NFSV4_2(op__setclientid__confirm__start,
8339             struct compound_state *, cs,
8340             SETCLIENTID_CONFIRM4args *, args);
8341 
8342         *cs->statusp = res->status = NFS4_OK;
8343 
8344         cp = rfs4_findclient_by_id(args->clientid, TRUE);
8345 
8346         if (cp == NULL) {
8347                 *cs->statusp = res->status =
8348                     rfs4_check_clientid(&args->clientid, 1);
8349                 goto out;
8350         }
8351 
8352         if (!creds_ok(cp, req, cs)) {
8353                 *cs->statusp = res->status = NFS4ERR_CLID_INUSE;
8354                 rfs4_client_rele(cp);
8355                 goto out;
8356         }
8357 
8358         /* If the verifier doesn't match, the record doesn't match */
8359         if (cp->rc_confirm_verf != args->setclientid_confirm) {
8360                 *cs->statusp = res->status = NFS4ERR_STALE_CLIENTID;
8361                 rfs4_client_rele(cp);
8362                 goto out;
8363         }
8364 
8365         rfs4_dbe_lock(cp->rc_dbe);
8366         cp->rc_need_confirm = FALSE;
8367         if (cp->rc_cp_confirmed) {
8368                 cptoclose = cp->rc_cp_confirmed;
8369                 cptoclose->rc_ss_remove = 1;
8370                 cp->rc_cp_confirmed = NULL;
8371         }
8372 
8373         /*
8374          * Update the client's associated server instance, if it's changed
8375          * since the client was created.
8376          */
8377         if (rfs4_servinst(cp) != rfs4_cur_servinst)
8378                 rfs4_servinst_assign(cp, rfs4_cur_servinst);
8379 
8380         /*
8381          * Record clientid in stable storage.
8382          * Must be done after server instance has been assigned.
8383          */
8384         rfs4_ss_clid(cp);
8385 
8386         rfs4_dbe_unlock(cp->rc_dbe);
8387 
8388         if (cptoclose)
8389                 /* don't need to rele, client_close does it */
8390                 rfs4_client_close(cptoclose);
8391 
8392         /* If needed, initiate CB_NULL call for callback path */
8393         rfs4_deleg_cb_check(cp);
8394         rfs4_update_lease(cp);
8395 
8396         /*
8397          * Check to see if client can perform reclaims
8398          */
8399         rfs4_ss_chkclid(cp);
8400 
8401         rfs4_client_rele(cp);
8402 
8403 out:
8404         DTRACE_NFSV4_2(op__setclientid__confirm__done,
8405             struct compound_state *, cs,
8406             SETCLIENTID_CONFIRM4 *, res);
8407 }
8408 
8409 
8410 /*ARGSUSED*/
8411 void
8412 rfs4_op_close(nfs_argop4 *argop, nfs_resop4 *resop,
8413     struct svc_req *req, struct compound_state *cs)
8414 {
8415         CLOSE4args *args = &argop->nfs_argop4_u.opclose;
8416         CLOSE4res *resp = &resop->nfs_resop4_u.opclose;
8417         rfs4_state_t *sp;
8418         nfsstat4 status;
8419 
8420         DTRACE_NFSV4_2(op__close__start, struct compound_state *, cs,
8421             CLOSE4args *, args);
8422 
8423         if (cs->vp == NULL) {
8424                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
8425                 goto out;
8426         }
8427 
8428         status = rfs4_get_state(&args->open_stateid, &sp, RFS4_DBS_INVALID);
8429         if (status != NFS4_OK) {
8430                 *cs->statusp = resp->status = status;
8431                 goto out;
8432         }
8433 
8434         /* Ensure specified filehandle matches */
8435         if (cs->vp != sp->rs_finfo->rf_vp) {
8436                 rfs4_state_rele(sp);
8437                 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8438                 goto out;
8439         }
8440 
8441         /* hold off other access to open_owner while we tinker */
8442         rfs4_sw_enter(&sp->rs_owner->ro_sw);
8443 
8444         switch (rfs4_check_stateid_seqid(sp, &args->open_stateid)) {
8445         case NFS4_CHECK_STATEID_OKAY:
8446                 if (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
8447                     resop) != NFS4_CHKSEQ_OKAY) {
8448                         *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
8449                         goto end;
8450                 }
8451                 break;
8452         case NFS4_CHECK_STATEID_OLD:
8453                 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
8454                 goto end;
8455         case NFS4_CHECK_STATEID_BAD:
8456                 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8457                 goto end;
8458         case NFS4_CHECK_STATEID_EXPIRED:
8459                 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
8460                 goto end;
8461         case NFS4_CHECK_STATEID_CLOSED:
8462                 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
8463                 goto end;
8464         case NFS4_CHECK_STATEID_UNCONFIRMED:
8465                 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8466                 goto end;
8467         case NFS4_CHECK_STATEID_REPLAY:
8468                 /* Check the sequence id for the open owner */
8469                 switch (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
8470                     resop)) {
8471                 case NFS4_CHKSEQ_OKAY:
8472                         /*
8473                          * This is replayed stateid; if seqid matches
8474                          * next expected, then client is using wrong seqid.
8475                          */
8476                         /* FALL THROUGH */
8477                 case NFS4_CHKSEQ_BAD:
8478                         *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
8479                         goto end;
8480                 case NFS4_CHKSEQ_REPLAY:
8481                         /*
8482                          * Note this case is the duplicate case so
8483                          * resp->status is already set.
8484                          */
8485                         *cs->statusp = resp->status;
8486                         rfs4_update_lease(sp->rs_owner->ro_client);
8487                         goto end;
8488                 }
8489                 break;
8490         default:
8491                 ASSERT(FALSE);
8492                 break;
8493         }
8494 
8495         rfs4_dbe_lock(sp->rs_dbe);
8496 
8497         /* Update the stateid. */
8498         next_stateid(&sp->rs_stateid);
8499         resp->open_stateid = sp->rs_stateid.stateid;
8500 
8501         rfs4_dbe_unlock(sp->rs_dbe);
8502 
8503         rfs4_update_lease(sp->rs_owner->ro_client);
8504         rfs4_update_open_sequence(sp->rs_owner);
8505         rfs4_update_open_resp(sp->rs_owner, resop, NULL);
8506 
8507         rfs4_state_close(sp, FALSE, FALSE, cs->cr);
8508 
8509         *cs->statusp = resp->status = status;
8510 
8511 end:
8512         rfs4_sw_exit(&sp->rs_owner->ro_sw);
8513         rfs4_state_rele(sp);
8514 out:
8515         DTRACE_NFSV4_2(op__close__done, struct compound_state *, cs,
8516             CLOSE4res *, resp);
8517 }
8518 
8519 /*
8520  * Manage the counts on the file struct and close all file locks
8521  */
8522 /*ARGSUSED*/
8523 void
8524 rfs4_release_share_lock_state(rfs4_state_t *sp, cred_t *cr,
8525     bool_t close_of_client)
8526 {
8527         rfs4_file_t *fp = sp->rs_finfo;
8528         rfs4_lo_state_t *lsp;
8529         int fflags = 0;
8530 
8531         /*
8532          * If this call is part of the larger closing down of client
8533          * state then it is just easier to release all locks
8534          * associated with this client instead of going through each
8535          * individual file and cleaning locks there.
8536          */
8537         if (close_of_client) {
8538                 if (sp->rs_owner->ro_client->rc_unlksys_completed == FALSE &&
8539                     !list_is_empty(&sp->rs_lostatelist) &&
8540                     sp->rs_owner->ro_client->rc_sysidt != LM_NOSYSID) {
8541                         /* Is the PxFS kernel module loaded? */
8542                         if (lm_remove_file_locks != NULL) {
8543                                 int new_sysid;
8544 
8545                                 /* Encode the cluster nodeid in new sysid */
8546                                 new_sysid = sp->rs_owner->ro_client->rc_sysidt;
8547                                 lm_set_nlmid_flk(&new_sysid);
8548 
8549                                 /*
8550                                  * This PxFS routine removes file locks for a
8551                                  * client over all nodes of a cluster.
8552                                  */
8553                                 NFS4_DEBUG(rfs4_debug, (CE_NOTE,
8554                                     "lm_remove_file_locks(sysid=0x%x)\n",
8555                                     new_sysid));
8556                                 (*lm_remove_file_locks)(new_sysid);
8557                         } else {
8558                                 struct flock64 flk;
8559 
8560                                 /* Release all locks for this client */
8561                                 flk.l_type = F_UNLKSYS;
8562                                 flk.l_whence = 0;
8563                                 flk.l_start = 0;
8564                                 flk.l_len = 0;
8565                                 flk.l_sysid =
8566                                     sp->rs_owner->ro_client->rc_sysidt;
8567                                 flk.l_pid = 0;
8568                                 (void) VOP_FRLOCK(sp->rs_finfo->rf_vp, F_SETLK,
8569                                     &flk, F_REMOTELOCK | FREAD | FWRITE,
8570                                     (u_offset_t)0, NULL, CRED(), NULL);
8571                         }
8572 
8573                         sp->rs_owner->ro_client->rc_unlksys_completed = TRUE;
8574                 }
8575         }
8576 
8577         /*
8578          * Release all locks on this file by this lock owner or at
8579          * least mark the locks as having been released
8580          */
8581         for (lsp = list_head(&sp->rs_lostatelist); lsp != NULL;
8582             lsp = list_next(&sp->rs_lostatelist, lsp)) {
8583                 lsp->rls_locks_cleaned = TRUE;
8584 
8585                 /* Was this already taken care of above? */
8586                 if (!close_of_client &&
8587                     sp->rs_owner->ro_client->rc_sysidt != LM_NOSYSID)
8588                         (void) cleanlocks(sp->rs_finfo->rf_vp,
8589                             lsp->rls_locker->rl_pid,
8590                             lsp->rls_locker->rl_client->rc_sysidt);
8591         }
8592 
8593         /*
8594          * Release any shrlocks associated with this open state ID.
8595          * This must be done before the rfs4_state gets marked closed.
8596          */
8597         if (sp->rs_owner->ro_client->rc_sysidt != LM_NOSYSID)
8598                 (void) rfs4_unshare(sp);
8599 
8600         if (sp->rs_open_access) {
8601                 rfs4_dbe_lock(fp->rf_dbe);
8602 
8603                 /*
8604                  * Decrement the count for each access and deny bit that this
8605                  * state has contributed to the file.
8606                  * If the file counts go to zero
8607                  * clear the appropriate bit in the appropriate mask.
8608                  */
8609                 if (sp->rs_open_access & OPEN4_SHARE_ACCESS_READ) {
8610                         fp->rf_access_read--;
8611                         fflags |= FREAD;
8612                         if (fp->rf_access_read == 0)
8613                                 fp->rf_share_access &= ~OPEN4_SHARE_ACCESS_READ;
8614                 }
8615                 if (sp->rs_open_access & OPEN4_SHARE_ACCESS_WRITE) {
8616                         fp->rf_access_write--;
8617                         fflags |= FWRITE;
8618                         if (fp->rf_access_write == 0)
8619                                 fp->rf_share_access &=
8620                                     ~OPEN4_SHARE_ACCESS_WRITE;
8621                 }
8622                 if (sp->rs_open_deny & OPEN4_SHARE_DENY_READ) {
8623                         fp->rf_deny_read--;
8624                         if (fp->rf_deny_read == 0)
8625                                 fp->rf_share_deny &= ~OPEN4_SHARE_DENY_READ;
8626                 }
8627                 if (sp->rs_open_deny & OPEN4_SHARE_DENY_WRITE) {
8628                         fp->rf_deny_write--;
8629                         if (fp->rf_deny_write == 0)
8630                                 fp->rf_share_deny &= ~OPEN4_SHARE_DENY_WRITE;
8631                 }
8632 
8633                 (void) VOP_CLOSE(fp->rf_vp, fflags, 1, (offset_t)0, cr, NULL);
8634 
8635                 rfs4_dbe_unlock(fp->rf_dbe);
8636 
8637                 sp->rs_open_access = 0;
8638                 sp->rs_open_deny = 0;
8639         }
8640 }
8641 
8642 /*
8643  * lock_denied: Fill in a LOCK4deneid structure given an flock64 structure.
8644  */
8645 static nfsstat4
8646 lock_denied(LOCK4denied *dp, struct flock64 *flk)
8647 {
8648         rfs4_lockowner_t *lo;
8649         rfs4_client_t *cp;
8650         uint32_t len;
8651 
8652         lo = rfs4_findlockowner_by_pid(flk->l_pid);
8653         if (lo != NULL) {
8654                 cp = lo->rl_client;
8655                 if (rfs4_lease_expired(cp)) {
8656                         rfs4_lockowner_rele(lo);
8657                         rfs4_dbe_hold(cp->rc_dbe);
8658                         rfs4_client_close(cp);
8659                         return (NFS4ERR_EXPIRED);
8660                 }
8661                 dp->owner.clientid = lo->rl_owner.clientid;
8662                 len = lo->rl_owner.owner_len;
8663                 dp->owner.owner_val = kmem_alloc(len, KM_SLEEP);
8664                 bcopy(lo->rl_owner.owner_val, dp->owner.owner_val, len);
8665                 dp->owner.owner_len = len;
8666                 rfs4_lockowner_rele(lo);
8667                 goto finish;
8668         }
8669 
8670         /*
8671          * Its not a NFS4 lock. We take advantage that the upper 32 bits
8672          * of the client id contain the boot time for a NFS4 lock. So we
8673          * fabricate and identity by setting clientid to the sysid, and
8674          * the lock owner to the pid.
8675          */
8676         dp->owner.clientid = flk->l_sysid;
8677         len = sizeof (pid_t);
8678         dp->owner.owner_len = len;
8679         dp->owner.owner_val = kmem_alloc(len, KM_SLEEP);
8680         bcopy(&flk->l_pid, dp->owner.owner_val, len);
8681 finish:
8682         dp->offset = flk->l_start;
8683         dp->length = flk->l_len;
8684 
8685         if (flk->l_type == F_RDLCK)
8686                 dp->locktype = READ_LT;
8687         else if (flk->l_type == F_WRLCK)
8688                 dp->locktype = WRITE_LT;
8689         else
8690                 return (NFS4ERR_INVAL); /* no mapping from POSIX ltype to v4 */
8691 
8692         return (NFS4_OK);
8693 }
8694 
8695 /*
8696  * The NFSv4.0 LOCK operation does not support the blocking lock (at the
8697  * NFSv4.0 protocol level) so the client needs to resend the LOCK request in a
8698  * case the lock is denied by the NFSv4.0 server.  NFSv4.0 clients are prepared
8699  * for that (obviously); they are sending the LOCK requests with some delays
8700  * between the attempts.  See nfs4frlock() and nfs4_block_and_wait() for the
8701  * locking and delay implementation at the client side.
8702  *
8703  * To make the life of the clients easier, the NFSv4.0 server tries to do some
8704  * fast retries on its own (the for loop below) in a hope the lock will be
8705  * available soon.  And if not, the client won't need to resend the LOCK
8706  * requests so fast to check the lock availability.  This basically saves some
8707  * network traffic and tries to make sure the client gets the lock ASAP.
8708  */
8709 static int
8710 setlock(vnode_t *vp, struct flock64 *flock, int flag, cred_t *cred)
8711 {
8712         int error;
8713         struct flock64 flk;
8714         int i;
8715         clock_t delaytime;
8716         int cmd;
8717         int spin_cnt = 0;
8718 
8719         cmd = nbl_need_check(vp) ? F_SETLK_NBMAND : F_SETLK;
8720 retry:
8721         delaytime = MSEC_TO_TICK_ROUNDUP(rfs4_lock_delay);
8722 
8723         for (i = 0; i < rfs4_maxlock_tries; i++) {
8724                 LOCK_PRINT(rfs4_debug, "setlock", cmd, flock);
8725                 error = VOP_FRLOCK(vp, cmd,
8726                     flock, flag, (u_offset_t)0, NULL, cred, NULL);
8727 
8728                 if (error != EAGAIN && error != EACCES)
8729                         break;
8730 
8731                 if (i < rfs4_maxlock_tries - 1) {
8732                         delay(delaytime);
8733                         delaytime *= 2;
8734                 }
8735         }
8736 
8737         if (error == EAGAIN || error == EACCES) {
8738                 /* Get the owner of the lock */
8739                 flk = *flock;
8740                 LOCK_PRINT(rfs4_debug, "setlock", F_GETLK, &flk);
8741                 if (VOP_FRLOCK(vp, F_GETLK, &flk, flag, 0, NULL, cred,
8742                     NULL) == 0) {
8743                         /*
8744                          * There's a race inherent in the current VOP_FRLOCK
8745                          * design where:
8746                          * a: "other guy" takes a lock that conflicts with a
8747                          * lock we want
8748                          * b: we attempt to take our lock (non-blocking) and
8749                          * the attempt fails.
8750                          * c: "other guy" releases the conflicting lock
8751                          * d: we ask what lock conflicts with the lock we want,
8752                          * getting F_UNLCK (no lock blocks us)
8753                          *
8754                          * If we retry the non-blocking lock attempt in this
8755                          * case (restart at step 'b') there's some possibility
8756                          * that many such attempts might fail.  However a test
8757                          * designed to actually provoke this race shows that
8758                          * the vast majority of cases require no retry, and
8759                          * only a few took as many as three retries.  Here's
8760                          * the test outcome:
8761                          *
8762                          *         number of retries    how many times we needed
8763                          *                              that many retries
8764                          *         0                    79461
8765                          *         1                      862
8766                          *         2                       49
8767                          *         3                        5
8768                          *
8769                          * Given those empirical results, we arbitrarily limit
8770                          * the retry count to ten.
8771                          *
8772                          * If we actually make to ten retries and give up,
8773                          * nothing catastrophic happens, but we're unable to
8774                          * return the information about the conflicting lock to
8775                          * the NFS client.  That's an acceptable trade off vs.
8776                          * letting this retry loop run forever.
8777                          */
8778                         if (flk.l_type == F_UNLCK) {
8779                                 if (spin_cnt++ < 10) {
8780                                         /* No longer locked, retry */
8781                                         goto retry;
8782                                 }
8783                         } else {
8784                                 *flock = flk;
8785                                 LOCK_PRINT(rfs4_debug, "setlock(blocking lock)",
8786                                     F_GETLK, &flk);
8787                         }
8788                 }
8789         }
8790 
8791         return (error);
8792 }
8793 
8794 /*ARGSUSED*/
8795 static nfsstat4
8796 rfs4_do_lock(rfs4_lo_state_t *lsp, nfs_lock_type4 locktype,
8797     offset4 offset, length4 length, cred_t *cred, nfs_resop4 *resop)
8798 {
8799         nfsstat4 status;
8800         rfs4_lockowner_t *lo = lsp->rls_locker;
8801         rfs4_state_t *sp = lsp->rls_state;
8802         struct flock64 flock;
8803         int16_t ltype;
8804         int flag;
8805         int error;
8806         sysid_t sysid;
8807         LOCK4res *lres;
8808         vnode_t *vp;
8809 
8810         if (rfs4_lease_expired(lo->rl_client)) {
8811                 return (NFS4ERR_EXPIRED);
8812         }
8813 
8814         if ((status = rfs4_client_sysid(lo->rl_client, &sysid)) != NFS4_OK)
8815                 return (status);
8816 
8817         /* Check for zero length. To lock to end of file use all ones for V4 */
8818         if (length == 0)
8819                 return (NFS4ERR_INVAL);
8820         else if (length == (length4)(~0))
8821                 length = 0;             /* Posix to end of file  */
8822 
8823 retry:
8824         rfs4_dbe_lock(sp->rs_dbe);
8825         if (sp->rs_closed == TRUE) {
8826                 rfs4_dbe_unlock(sp->rs_dbe);
8827                 return (NFS4ERR_OLD_STATEID);
8828         }
8829 
8830         if (resop->resop != OP_LOCKU) {
8831                 switch (locktype) {
8832                 case READ_LT:
8833                 case READW_LT:
8834                         if ((sp->rs_share_access
8835                             & OPEN4_SHARE_ACCESS_READ) == 0) {
8836                                 rfs4_dbe_unlock(sp->rs_dbe);
8837 
8838                                 return (NFS4ERR_OPENMODE);
8839                         }
8840                         ltype = F_RDLCK;
8841                         break;
8842                 case WRITE_LT:
8843                 case WRITEW_LT:
8844                         if ((sp->rs_share_access
8845                             & OPEN4_SHARE_ACCESS_WRITE) == 0) {
8846                                 rfs4_dbe_unlock(sp->rs_dbe);
8847 
8848                                 return (NFS4ERR_OPENMODE);
8849                         }
8850                         ltype = F_WRLCK;
8851                         break;
8852                 }
8853         } else
8854                 ltype = F_UNLCK;
8855 
8856         flock.l_type = ltype;
8857         flock.l_whence = 0;             /* SEEK_SET */
8858         flock.l_start = offset;
8859         flock.l_len = length;
8860         flock.l_sysid = sysid;
8861         flock.l_pid = lsp->rls_locker->rl_pid;
8862 
8863         /* Note that length4 is uint64_t but l_len and l_start are off64_t */
8864         if (flock.l_len < 0 || flock.l_start < 0) {
8865                 rfs4_dbe_unlock(sp->rs_dbe);
8866                 return (NFS4ERR_INVAL);
8867         }
8868 
8869         /*
8870          * N.B. FREAD has the same value as OPEN4_SHARE_ACCESS_READ and
8871          * FWRITE has the same value as OPEN4_SHARE_ACCESS_WRITE.
8872          */
8873         flag = (int)sp->rs_share_access | F_REMOTELOCK;
8874 
8875         vp = sp->rs_finfo->rf_vp;
8876         VN_HOLD(vp);
8877 
8878         /*
8879          * We need to unlock sp before we call the underlying filesystem to
8880          * acquire the file lock.
8881          */
8882         rfs4_dbe_unlock(sp->rs_dbe);
8883 
8884         error = setlock(vp, &flock, flag, cred);
8885 
8886         /*
8887          * Make sure the file is still open.  In a case the file was closed in
8888          * the meantime, clean the lock we acquired using the setlock() call
8889          * above, and return the appropriate error.
8890          */
8891         rfs4_dbe_lock(sp->rs_dbe);
8892         if (sp->rs_closed == TRUE) {
8893                 cleanlocks(vp, lsp->rls_locker->rl_pid, sysid);
8894                 rfs4_dbe_unlock(sp->rs_dbe);
8895 
8896                 VN_RELE(vp);
8897 
8898                 return (NFS4ERR_OLD_STATEID);
8899         }
8900         rfs4_dbe_unlock(sp->rs_dbe);
8901 
8902         VN_RELE(vp);
8903 
8904         if (error == 0) {
8905                 rfs4_dbe_lock(lsp->rls_dbe);
8906                 next_stateid(&lsp->rls_lockid);
8907                 rfs4_dbe_unlock(lsp->rls_dbe);
8908         }
8909 
8910         /*
8911          * N.B. We map error values to nfsv4 errors. This is differrent
8912          * than puterrno4 routine.
8913          */
8914         switch (error) {
8915         case 0:
8916                 status = NFS4_OK;
8917                 break;
8918         case EAGAIN:
8919         case EACCES:            /* Old value */
8920                 /* Can only get here if op is OP_LOCK */
8921                 ASSERT(resop->resop == OP_LOCK);
8922                 lres = &resop->nfs_resop4_u.oplock;
8923                 status = NFS4ERR_DENIED;
8924                 if (lock_denied(&lres->LOCK4res_u.denied, &flock)
8925                     == NFS4ERR_EXPIRED)
8926                         goto retry;
8927                 break;
8928         case ENOLCK:
8929                 status = NFS4ERR_DELAY;
8930                 break;
8931         case EOVERFLOW:
8932                 status = NFS4ERR_INVAL;
8933                 break;
8934         case EINVAL:
8935                 status = NFS4ERR_NOTSUPP;
8936                 break;
8937         default:
8938                 status = NFS4ERR_SERVERFAULT;
8939                 break;
8940         }
8941 
8942         return (status);
8943 }
8944 
8945 /*ARGSUSED*/
8946 void
8947 rfs4_op_lock(nfs_argop4 *argop, nfs_resop4 *resop,
8948     struct svc_req *req, struct compound_state *cs)
8949 {
8950         LOCK4args *args = &argop->nfs_argop4_u.oplock;
8951         LOCK4res *resp = &resop->nfs_resop4_u.oplock;
8952         nfsstat4 status;
8953         stateid4 *stateid;
8954         rfs4_lockowner_t *lo;
8955         rfs4_client_t *cp;
8956         rfs4_state_t *sp = NULL;
8957         rfs4_lo_state_t *lsp = NULL;
8958         bool_t ls_sw_held = FALSE;
8959         bool_t create = TRUE;
8960         bool_t lcreate = TRUE;
8961         bool_t dup_lock = FALSE;
8962         int rc;
8963 
8964         DTRACE_NFSV4_2(op__lock__start, struct compound_state *, cs,
8965             LOCK4args *, args);
8966 
8967         if (cs->vp == NULL) {
8968                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
8969                 DTRACE_NFSV4_2(op__lock__done, struct compound_state *,
8970                     cs, LOCK4res *, resp);
8971                 return;
8972         }
8973 
8974         if (args->locker.new_lock_owner) {
8975                 /* Create a new lockowner for this instance */
8976                 open_to_lock_owner4 *olo = &args->locker.locker4_u.open_owner;
8977 
8978                 NFS4_DEBUG(rfs4_debug, (CE_NOTE, "Creating new lock owner"));
8979 
8980                 stateid = &olo->open_stateid;
8981                 status = rfs4_get_state(stateid, &sp, RFS4_DBS_VALID);
8982                 if (status != NFS4_OK) {
8983                         NFS4_DEBUG(rfs4_debug,
8984                             (CE_NOTE, "Get state failed in lock %d", status));
8985                         *cs->statusp = resp->status = status;
8986                         DTRACE_NFSV4_2(op__lock__done, struct compound_state *,
8987                             cs, LOCK4res *, resp);
8988                         return;
8989                 }
8990 
8991                 /* Ensure specified filehandle matches */
8992                 if (cs->vp != sp->rs_finfo->rf_vp) {
8993                         rfs4_state_rele(sp);
8994                         *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8995                         DTRACE_NFSV4_2(op__lock__done, struct compound_state *,
8996                             cs, LOCK4res *, resp);
8997                         return;
8998                 }
8999 
9000                 /* hold off other access to open_owner while we tinker */
9001                 rfs4_sw_enter(&sp->rs_owner->ro_sw);
9002 
9003                 switch (rc = rfs4_check_stateid_seqid(sp, stateid)) {
9004                 case NFS4_CHECK_STATEID_OLD:
9005                         *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
9006                         goto end;
9007                 case NFS4_CHECK_STATEID_BAD:
9008                         *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
9009                         goto end;
9010                 case NFS4_CHECK_STATEID_EXPIRED:
9011                         *cs->statusp = resp->status = NFS4ERR_EXPIRED;
9012                         goto end;
9013                 case NFS4_CHECK_STATEID_UNCONFIRMED:
9014                         *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
9015                         goto end;
9016                 case NFS4_CHECK_STATEID_CLOSED:
9017                         *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
9018                         goto end;
9019                 case NFS4_CHECK_STATEID_OKAY:
9020                 case NFS4_CHECK_STATEID_REPLAY:
9021                         switch (rfs4_check_olo_seqid(olo->open_seqid,
9022                             sp->rs_owner, resop)) {
9023                         case NFS4_CHKSEQ_OKAY:
9024                                 if (rc == NFS4_CHECK_STATEID_OKAY)
9025                                         break;
9026                                 /*
9027                                  * This is replayed stateid; if seqid
9028                                  * matches next expected, then client
9029                                  * is using wrong seqid.
9030                                  */
9031                                 /* FALLTHROUGH */
9032                         case NFS4_CHKSEQ_BAD:
9033                                 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
9034                                 goto end;
9035                         case NFS4_CHKSEQ_REPLAY:
9036                                 /* This is a duplicate LOCK request */
9037                                 dup_lock = TRUE;
9038 
9039                                 /*
9040                                  * For a duplicate we do not want to
9041                                  * create a new lockowner as it should
9042                                  * already exist.
9043                                  * Turn off the lockowner create flag.
9044                                  */
9045                                 lcreate = FALSE;
9046                         }
9047                         break;
9048                 }
9049 
9050                 lo = rfs4_findlockowner(&olo->lock_owner, &lcreate);
9051                 if (lo == NULL) {
9052                         NFS4_DEBUG(rfs4_debug,
9053                             (CE_NOTE, "rfs4_op_lock: no lock owner"));
9054                         *cs->statusp = resp->status = NFS4ERR_RESOURCE;
9055                         goto end;
9056                 }
9057 
9058                 lsp = rfs4_findlo_state_by_owner(lo, sp, &create);
9059                 if (lsp == NULL) {
9060                         rfs4_update_lease(sp->rs_owner->ro_client);
9061                         /*
9062                          * Only update theh open_seqid if this is not
9063                          * a duplicate request
9064                          */
9065                         if (dup_lock == FALSE) {
9066                                 rfs4_update_open_sequence(sp->rs_owner);
9067                         }
9068 
9069                         NFS4_DEBUG(rfs4_debug,
9070                             (CE_NOTE, "rfs4_op_lock: no state"));
9071                         *cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
9072                         rfs4_update_open_resp(sp->rs_owner, resop, NULL);
9073                         rfs4_lockowner_rele(lo);
9074                         goto end;
9075                 }
9076 
9077                 /*
9078                  * This is the new_lock_owner branch and the client is
9079                  * supposed to be associating a new lock_owner with
9080                  * the open file at this point.  If we find that a
9081                  * lock_owner/state association already exists and a
9082                  * successful LOCK request was returned to the client,
9083                  * an error is returned to the client since this is
9084                  * not appropriate.  The client should be using the
9085                  * existing lock_owner branch.
9086                  */
9087                 if (dup_lock == FALSE && create == FALSE) {
9088                         if (lsp->rls_lock_completed == TRUE) {
9089                                 *cs->statusp =
9090                                     resp->status = NFS4ERR_BAD_SEQID;
9091                                 rfs4_lockowner_rele(lo);
9092                                 goto end;
9093                         }
9094                 }
9095 
9096                 rfs4_update_lease(sp->rs_owner->ro_client);
9097 
9098                 /*
9099                  * Only update theh open_seqid if this is not
9100                  * a duplicate request
9101                  */
9102                 if (dup_lock == FALSE) {
9103                         rfs4_update_open_sequence(sp->rs_owner);
9104                 }
9105 
9106                 /*
9107                  * If this is a duplicate lock request, just copy the
9108                  * previously saved reply and return.
9109                  */
9110                 if (dup_lock == TRUE) {
9111                         /* verify that lock_seqid's match */
9112                         if (lsp->rls_seqid != olo->lock_seqid) {
9113                                 NFS4_DEBUG(rfs4_debug,
9114                                     (CE_NOTE, "rfs4_op_lock: Dup-Lock seqid bad"
9115                                     "lsp->seqid=%d old->seqid=%d",
9116                                     lsp->rls_seqid, olo->lock_seqid));
9117                                 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
9118                         } else {
9119                                 rfs4_copy_reply(resop, &lsp->rls_reply);
9120                                 /*
9121                                  * Make sure to copy the just
9122                                  * retrieved reply status into the
9123                                  * overall compound status
9124                                  */
9125                                 *cs->statusp = resp->status;
9126                         }
9127                         rfs4_lockowner_rele(lo);
9128                         goto end;
9129                 }
9130 
9131                 rfs4_dbe_lock(lsp->rls_dbe);
9132 
9133                 /* Make sure to update the lock sequence id */
9134                 lsp->rls_seqid = olo->lock_seqid;
9135 
9136                 NFS4_DEBUG(rfs4_debug,
9137                     (CE_NOTE, "Lock seqid established as %d", lsp->rls_seqid));
9138 
9139                 /*
9140                  * This is used to signify the newly created lockowner
9141                  * stateid and its sequence number.  The checks for
9142                  * sequence number and increment don't occur on the
9143                  * very first lock request for a lockowner.
9144                  */
9145                 lsp->rls_skip_seqid_check = TRUE;
9146 
9147                 /* hold off other access to lsp while we tinker */
9148                 rfs4_sw_enter(&lsp->rls_sw);
9149                 ls_sw_held = TRUE;
9150 
9151                 rfs4_dbe_unlock(lsp->rls_dbe);
9152 
9153                 rfs4_lockowner_rele(lo);
9154         } else {
9155                 stateid = &args->locker.locker4_u.lock_owner.lock_stateid;
9156                 /* get lsp and hold the lock on the underlying file struct */
9157                 if ((status = rfs4_get_lo_state(stateid, &lsp, TRUE))
9158                     != NFS4_OK) {
9159                         *cs->statusp = resp->status = status;
9160                         DTRACE_NFSV4_2(op__lock__done, struct compound_state *,
9161                             cs, LOCK4res *, resp);
9162                         return;
9163                 }
9164                 create = FALSE; /* We didn't create lsp */
9165 
9166                 /* Ensure specified filehandle matches */
9167                 if (cs->vp != lsp->rls_state->rs_finfo->rf_vp) {
9168                         rfs4_lo_state_rele(lsp, TRUE);
9169                         *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
9170                         DTRACE_NFSV4_2(op__lock__done, struct compound_state *,
9171                             cs, LOCK4res *, resp);
9172                         return;
9173                 }
9174 
9175                 /* hold off other access to lsp while we tinker */
9176                 rfs4_sw_enter(&lsp->rls_sw);
9177                 ls_sw_held = TRUE;
9178 
9179                 switch (rfs4_check_lo_stateid_seqid(lsp, stateid)) {
9180                 /*
9181                  * The stateid looks like it was okay (expected to be
9182                  * the next one)
9183                  */
9184                 case NFS4_CHECK_STATEID_OKAY:
9185                         /*
9186                          * The sequence id is now checked.  Determine
9187                          * if this is a replay or if it is in the
9188                          * expected (next) sequence.  In the case of a
9189                          * replay, there are two replay conditions
9190                          * that may occur.  The first is the normal
9191                          * condition where a LOCK is done with a
9192                          * NFS4_OK response and the stateid is
9193                          * updated.  That case is handled below when
9194                          * the stateid is identified as a REPLAY.  The
9195                          * second is the case where an error is
9196                          * returned, like NFS4ERR_DENIED, and the
9197                          * sequence number is updated but the stateid
9198                          * is not updated.  This second case is dealt
9199                          * with here.  So it may seem odd that the
9200                          * stateid is okay but the sequence id is a
9201                          * replay but it is okay.
9202                          */
9203                         switch (rfs4_check_lock_seqid(
9204                             args->locker.locker4_u.lock_owner.lock_seqid,
9205                             lsp, resop)) {
9206                         case NFS4_CHKSEQ_REPLAY:
9207                                 if (resp->status != NFS4_OK) {
9208                                         /*
9209                                          * Here is our replay and need
9210                                          * to verify that the last
9211                                          * response was an error.
9212                                          */
9213                                         *cs->statusp = resp->status;
9214                                         goto end;
9215                                 }
9216                                 /*
9217                                  * This is done since the sequence id
9218                                  * looked like a replay but it didn't
9219                                  * pass our check so a BAD_SEQID is
9220                                  * returned as a result.
9221                                  */
9222                                 /*FALLTHROUGH*/
9223                         case NFS4_CHKSEQ_BAD:
9224                                 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
9225                                 goto end;
9226                         case NFS4_CHKSEQ_OKAY:
9227                                 /* Everything looks okay move ahead */
9228                                 break;
9229                         }
9230                         break;
9231                 case NFS4_CHECK_STATEID_OLD:
9232                         *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
9233                         goto end;
9234                 case NFS4_CHECK_STATEID_BAD:
9235                         *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
9236                         goto end;
9237                 case NFS4_CHECK_STATEID_EXPIRED:
9238                         *cs->statusp = resp->status = NFS4ERR_EXPIRED;
9239                         goto end;
9240                 case NFS4_CHECK_STATEID_CLOSED:
9241                         *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
9242                         goto end;
9243                 case NFS4_CHECK_STATEID_REPLAY:
9244                         switch (rfs4_check_lock_seqid(
9245                             args->locker.locker4_u.lock_owner.lock_seqid,
9246                             lsp, resop)) {
9247                         case NFS4_CHKSEQ_OKAY:
9248                                 /*
9249                                  * This is a replayed stateid; if
9250                                  * seqid matches the next expected,
9251                                  * then client is using wrong seqid.
9252                                  */
9253                         case NFS4_CHKSEQ_BAD:
9254                                 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
9255                                 goto end;
9256                         case NFS4_CHKSEQ_REPLAY:
9257                                 rfs4_update_lease(lsp->rls_locker->rl_client);
9258                                 *cs->statusp = status = resp->status;
9259                                 goto end;
9260                         }
9261                         break;
9262                 default:
9263                         ASSERT(FALSE);
9264                         break;
9265                 }
9266 
9267                 rfs4_update_lock_sequence(lsp);
9268                 rfs4_update_lease(lsp->rls_locker->rl_client);
9269         }
9270 
9271         /*
9272          * NFS4 only allows locking on regular files, so
9273          * verify type of object.
9274          */
9275         if (cs->vp->v_type != VREG) {
9276                 if (cs->vp->v_type == VDIR)
9277                         status = NFS4ERR_ISDIR;
9278                 else
9279                         status = NFS4ERR_INVAL;
9280                 goto out;
9281         }
9282 
9283         cp = lsp->rls_state->rs_owner->ro_client;
9284 
9285         if (rfs4_clnt_in_grace(cp) && !args->reclaim) {
9286                 status = NFS4ERR_GRACE;
9287                 goto out;
9288         }
9289 
9290         if (rfs4_clnt_in_grace(cp) && args->reclaim && !cp->rc_can_reclaim) {
9291                 status = NFS4ERR_NO_GRACE;
9292                 goto out;
9293         }
9294 
9295         if (!rfs4_clnt_in_grace(cp) && args->reclaim) {
9296                 status = NFS4ERR_NO_GRACE;
9297                 goto out;
9298         }
9299 
9300         if (lsp->rls_state->rs_finfo->rf_dinfo.rd_dtype == OPEN_DELEGATE_WRITE)
9301                 cs->deleg = TRUE;
9302 
9303         status = rfs4_do_lock(lsp, args->locktype,
9304             args->offset, args->length, cs->cr, resop);
9305 
9306 out:
9307         lsp->rls_skip_seqid_check = FALSE;
9308 
9309         *cs->statusp = resp->status = status;
9310 
9311         if (status == NFS4_OK) {
9312                 resp->LOCK4res_u.lock_stateid = lsp->rls_lockid.stateid;
9313                 lsp->rls_lock_completed = TRUE;
9314         }
9315         /*
9316          * Only update the "OPEN" response here if this was a new
9317          * lock_owner
9318          */
9319         if (sp)
9320                 rfs4_update_open_resp(sp->rs_owner, resop, NULL);
9321 
9322         rfs4_update_lock_resp(lsp, resop);
9323 
9324 end:
9325         if (lsp) {
9326                 if (ls_sw_held)
9327                         rfs4_sw_exit(&lsp->rls_sw);
9328                 /*
9329                  * If an sp obtained, then the lsp does not represent
9330                  * a lock on the file struct.
9331                  */
9332                 if (sp != NULL)
9333                         rfs4_lo_state_rele(lsp, FALSE);
9334                 else
9335                         rfs4_lo_state_rele(lsp, TRUE);
9336         }
9337         if (sp) {
9338                 rfs4_sw_exit(&sp->rs_owner->ro_sw);
9339                 rfs4_state_rele(sp);
9340         }
9341 
9342         DTRACE_NFSV4_2(op__lock__done, struct compound_state *, cs,
9343             LOCK4res *, resp);
9344 }
9345 
9346 /* free function for LOCK/LOCKT */
9347 static void
9348 lock_denied_free(nfs_resop4 *resop)
9349 {
9350         LOCK4denied *dp = NULL;
9351 
9352         switch (resop->resop) {
9353         case OP_LOCK:
9354                 if (resop->nfs_resop4_u.oplock.status == NFS4ERR_DENIED)
9355                         dp = &resop->nfs_resop4_u.oplock.LOCK4res_u.denied;
9356                 break;
9357         case OP_LOCKT:
9358                 if (resop->nfs_resop4_u.oplockt.status == NFS4ERR_DENIED)
9359                         dp = &resop->nfs_resop4_u.oplockt.denied;
9360                 break;
9361         default:
9362                 break;
9363         }
9364 
9365         if (dp)
9366                 kmem_free(dp->owner.owner_val, dp->owner.owner_len);
9367 }
9368 
9369 /*ARGSUSED*/
9370 void
9371 rfs4_op_locku(nfs_argop4 *argop, nfs_resop4 *resop,
9372     struct svc_req *req, struct compound_state *cs)
9373 {
9374         LOCKU4args *args = &argop->nfs_argop4_u.oplocku;
9375         LOCKU4res *resp = &resop->nfs_resop4_u.oplocku;
9376         nfsstat4 status;
9377         stateid4 *stateid = &args->lock_stateid;
9378         rfs4_lo_state_t *lsp;
9379 
9380         DTRACE_NFSV4_2(op__locku__start, struct compound_state *, cs,
9381             LOCKU4args *, args);
9382 
9383         if (cs->vp == NULL) {
9384                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
9385                 DTRACE_NFSV4_2(op__locku__done, struct compound_state *, cs,
9386                     LOCKU4res *, resp);
9387                 return;
9388         }
9389 
9390         if ((status = rfs4_get_lo_state(stateid, &lsp, TRUE)) != NFS4_OK) {
9391                 *cs->statusp = resp->status = status;
9392                 DTRACE_NFSV4_2(op__locku__done, struct compound_state *, cs,
9393                     LOCKU4res *, resp);
9394                 return;
9395         }
9396 
9397         /* Ensure specified filehandle matches */
9398         if (cs->vp != lsp->rls_state->rs_finfo->rf_vp) {
9399                 rfs4_lo_state_rele(lsp, TRUE);
9400                 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
9401                 DTRACE_NFSV4_2(op__locku__done, struct compound_state *, cs,
9402                     LOCKU4res *, resp);
9403                 return;
9404         }
9405 
9406         /* hold off other access to lsp while we tinker */
9407         rfs4_sw_enter(&lsp->rls_sw);
9408 
9409         switch (rfs4_check_lo_stateid_seqid(lsp, stateid)) {
9410         case NFS4_CHECK_STATEID_OKAY:
9411                 if (rfs4_check_lock_seqid(args->seqid, lsp, resop)
9412                     != NFS4_CHKSEQ_OKAY) {
9413                         *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
9414                         goto end;
9415                 }
9416                 break;
9417         case NFS4_CHECK_STATEID_OLD:
9418                 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
9419                 goto end;
9420         case NFS4_CHECK_STATEID_BAD:
9421                 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
9422                 goto end;
9423         case NFS4_CHECK_STATEID_EXPIRED:
9424                 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
9425                 goto end;
9426         case NFS4_CHECK_STATEID_CLOSED:
9427                 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
9428                 goto end;
9429         case NFS4_CHECK_STATEID_REPLAY:
9430                 switch (rfs4_check_lock_seqid(args->seqid, lsp, resop)) {
9431                 case NFS4_CHKSEQ_OKAY:
9432                                 /*
9433                                  * This is a replayed stateid; if
9434                                  * seqid matches the next expected,
9435                                  * then client is using wrong seqid.
9436                                  */
9437                 case NFS4_CHKSEQ_BAD:
9438                         *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
9439                         goto end;
9440                 case NFS4_CHKSEQ_REPLAY:
9441                         rfs4_update_lease(lsp->rls_locker->rl_client);
9442                         *cs->statusp = status = resp->status;
9443                         goto end;
9444                 }
9445                 break;
9446         default:
9447                 ASSERT(FALSE);
9448                 break;
9449         }
9450 
9451         rfs4_update_lock_sequence(lsp);
9452         rfs4_update_lease(lsp->rls_locker->rl_client);
9453 
9454         /*
9455          * NFS4 only allows locking on regular files, so
9456          * verify type of object.
9457          */
9458         if (cs->vp->v_type != VREG) {
9459                 if (cs->vp->v_type == VDIR)
9460                         status = NFS4ERR_ISDIR;
9461                 else
9462                         status = NFS4ERR_INVAL;
9463                 goto out;
9464         }
9465 
9466         if (rfs4_clnt_in_grace(lsp->rls_state->rs_owner->ro_client)) {
9467                 status = NFS4ERR_GRACE;
9468                 goto out;
9469         }
9470 
9471         status = rfs4_do_lock(lsp, args->locktype,
9472             args->offset, args->length, cs->cr, resop);
9473 
9474 out:
9475         *cs->statusp = resp->status = status;
9476 
9477         if (status == NFS4_OK)
9478                 resp->lock_stateid = lsp->rls_lockid.stateid;
9479 
9480         rfs4_update_lock_resp(lsp, resop);
9481 
9482 end:
9483         rfs4_sw_exit(&lsp->rls_sw);
9484         rfs4_lo_state_rele(lsp, TRUE);
9485 
9486         DTRACE_NFSV4_2(op__locku__done, struct compound_state *, cs,
9487             LOCKU4res *, resp);
9488 }
9489 
9490 /*
9491  * LOCKT is a best effort routine, the client can not be guaranteed that
9492  * the status return is still in effect by the time the reply is received.
9493  * They are numerous race conditions in this routine, but we are not required
9494  * and can not be accurate.
9495  */
9496 /*ARGSUSED*/
9497 void
9498 rfs4_op_lockt(nfs_argop4 *argop, nfs_resop4 *resop,
9499     struct svc_req *req, struct compound_state *cs)
9500 {
9501         LOCKT4args *args = &argop->nfs_argop4_u.oplockt;
9502         LOCKT4res *resp = &resop->nfs_resop4_u.oplockt;
9503         rfs4_lockowner_t *lo;
9504         rfs4_client_t *cp;
9505         bool_t create = FALSE;
9506         struct flock64 flk;
9507         int error;
9508         int flag = FREAD | FWRITE;
9509         int ltype;
9510         length4 posix_length;
9511         sysid_t sysid;
9512         pid_t pid;
9513 
9514         DTRACE_NFSV4_2(op__lockt__start, struct compound_state *, cs,
9515             LOCKT4args *, args);
9516 
9517         if (cs->vp == NULL) {
9518                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
9519                 goto out;
9520         }
9521 
9522         /*
9523          * NFS4 only allows locking on regular files, so
9524          * verify type of object.
9525          */
9526         if (cs->vp->v_type != VREG) {
9527                 if (cs->vp->v_type == VDIR)
9528                         *cs->statusp = resp->status = NFS4ERR_ISDIR;
9529                 else
9530                         *cs->statusp = resp->status =  NFS4ERR_INVAL;
9531                 goto out;
9532         }
9533 
9534         /*
9535          * Check out the clientid to ensure the server knows about it
9536          * so that we correctly inform the client of a server reboot.
9537          */
9538         if ((cp = rfs4_findclient_by_id(args->owner.clientid, FALSE))
9539             == NULL) {
9540                 *cs->statusp = resp->status =
9541                     rfs4_check_clientid(&args->owner.clientid, 0);
9542                 goto out;
9543         }
9544         if (rfs4_lease_expired(cp)) {
9545                 rfs4_client_close(cp);
9546                 /*
9547                  * Protocol doesn't allow returning NFS4ERR_STALE as
9548                  * other operations do on this check so STALE_CLIENTID
9549                  * is returned instead
9550                  */
9551                 *cs->statusp = resp->status = NFS4ERR_STALE_CLIENTID;
9552                 goto out;
9553         }
9554 
9555         if (rfs4_clnt_in_grace(cp) && !(cp->rc_can_reclaim)) {
9556                 *cs->statusp = resp->status = NFS4ERR_GRACE;
9557                 rfs4_client_rele(cp);
9558                 goto out;
9559         }
9560         rfs4_client_rele(cp);
9561 
9562         resp->status = NFS4_OK;
9563 
9564         switch (args->locktype) {
9565         case READ_LT:
9566         case READW_LT:
9567                 ltype = F_RDLCK;
9568                 break;
9569         case WRITE_LT:
9570         case WRITEW_LT:
9571                 ltype = F_WRLCK;
9572                 break;
9573         }
9574 
9575         posix_length = args->length;
9576         /* Check for zero length. To lock to end of file use all ones for V4 */
9577         if (posix_length == 0) {
9578                 *cs->statusp = resp->status = NFS4ERR_INVAL;
9579                 goto out;
9580         } else if (posix_length == (length4)(~0)) {
9581                 posix_length = 0;       /* Posix to end of file  */
9582         }
9583 
9584         /* Find or create a lockowner */
9585         lo = rfs4_findlockowner(&args->owner, &create);
9586 
9587         if (lo) {
9588                 pid = lo->rl_pid;
9589                 if ((resp->status =
9590                     rfs4_client_sysid(lo->rl_client, &sysid)) != NFS4_OK)
9591                         goto err;
9592         } else {
9593                 pid = 0;
9594                 sysid = lockt_sysid;
9595         }
9596 retry:
9597         flk.l_type = ltype;
9598         flk.l_whence = 0;               /* SEEK_SET */
9599         flk.l_start = args->offset;
9600         flk.l_len = posix_length;
9601         flk.l_sysid = sysid;
9602         flk.l_pid = pid;
9603         flag |= F_REMOTELOCK;
9604 
9605         LOCK_PRINT(rfs4_debug, "rfs4_op_lockt", F_GETLK, &flk);
9606 
9607         /* Note that length4 is uint64_t but l_len and l_start are off64_t */
9608         if (flk.l_len < 0 || flk.l_start < 0) {
9609                 resp->status = NFS4ERR_INVAL;
9610                 goto err;
9611         }
9612         error = VOP_FRLOCK(cs->vp, F_GETLK, &flk, flag, (u_offset_t)0,
9613             NULL, cs->cr, NULL);
9614 
9615         /*
9616          * N.B. We map error values to nfsv4 errors. This is differrent
9617          * than puterrno4 routine.
9618          */
9619         switch (error) {
9620         case 0:
9621                 if (flk.l_type == F_UNLCK)
9622                         resp->status = NFS4_OK;
9623                 else {
9624                         if (lock_denied(&resp->denied, &flk) == NFS4ERR_EXPIRED)
9625                                 goto retry;
9626                         resp->status = NFS4ERR_DENIED;
9627                 }
9628                 break;
9629         case EOVERFLOW:
9630                 resp->status = NFS4ERR_INVAL;
9631                 break;
9632         case EINVAL:
9633                 resp->status = NFS4ERR_NOTSUPP;
9634                 break;
9635         default:
9636                 cmn_err(CE_WARN, "rfs4_op_lockt: unexpected errno (%d)",
9637                     error);
9638                 resp->status = NFS4ERR_SERVERFAULT;
9639                 break;
9640         }
9641 
9642 err:
9643         if (lo)
9644                 rfs4_lockowner_rele(lo);
9645         *cs->statusp = resp->status;
9646 out:
9647         DTRACE_NFSV4_2(op__lockt__done, struct compound_state *, cs,
9648             LOCKT4res *, resp);
9649 }
9650 
9651 int
9652 rfs4_share(rfs4_state_t *sp, uint32_t access, uint32_t deny)
9653 {
9654         int err;
9655         int cmd;
9656         vnode_t *vp;
9657         struct shrlock shr;
9658         struct shr_locowner shr_loco;
9659         int fflags = 0;
9660 
9661         ASSERT(rfs4_dbe_islocked(sp->rs_dbe));
9662         ASSERT(sp->rs_owner->ro_client->rc_sysidt != LM_NOSYSID);
9663 
9664         if (sp->rs_closed)
9665                 return (NFS4ERR_OLD_STATEID);
9666 
9667         vp = sp->rs_finfo->rf_vp;
9668         ASSERT(vp);
9669 
9670         shr.s_access = shr.s_deny = 0;
9671 
9672         if (access & OPEN4_SHARE_ACCESS_READ) {
9673                 fflags |= FREAD;
9674                 shr.s_access |= F_RDACC;
9675         }
9676         if (access & OPEN4_SHARE_ACCESS_WRITE) {
9677                 fflags |= FWRITE;
9678                 shr.s_access |= F_WRACC;
9679         }
9680         ASSERT(shr.s_access);
9681 
9682         if (deny & OPEN4_SHARE_DENY_READ)
9683                 shr.s_deny |= F_RDDNY;
9684         if (deny & OPEN4_SHARE_DENY_WRITE)
9685                 shr.s_deny |= F_WRDNY;
9686 
9687         shr.s_pid = rfs4_dbe_getid(sp->rs_owner->ro_dbe);
9688         shr.s_sysid = sp->rs_owner->ro_client->rc_sysidt;
9689         shr_loco.sl_pid = shr.s_pid;
9690         shr_loco.sl_id = shr.s_sysid;
9691         shr.s_owner = (caddr_t)&shr_loco;
9692         shr.s_own_len = sizeof (shr_loco);
9693 
9694         cmd = nbl_need_check(vp) ? F_SHARE_NBMAND : F_SHARE;
9695 
9696         err = VOP_SHRLOCK(vp, cmd, &shr, fflags, CRED(), NULL);
9697         if (err != 0) {
9698                 if (err == EAGAIN)
9699                         err = NFS4ERR_SHARE_DENIED;
9700                 else
9701                         err = puterrno4(err);
9702                 return (err);
9703         }
9704 
9705         sp->rs_share_access |= access;
9706         sp->rs_share_deny |= deny;
9707 
9708         return (0);
9709 }
9710 
9711 int
9712 rfs4_unshare(rfs4_state_t *sp)
9713 {
9714         int err;
9715         struct shrlock shr;
9716         struct shr_locowner shr_loco;
9717 
9718         ASSERT(rfs4_dbe_islocked(sp->rs_dbe));
9719 
9720         if (sp->rs_closed || sp->rs_share_access == 0)
9721                 return (0);
9722 
9723         ASSERT(sp->rs_owner->ro_client->rc_sysidt != LM_NOSYSID);
9724         ASSERT(sp->rs_finfo->rf_vp);
9725 
9726         shr.s_access = shr.s_deny = 0;
9727         shr.s_pid = rfs4_dbe_getid(sp->rs_owner->ro_dbe);
9728         shr.s_sysid = sp->rs_owner->ro_client->rc_sysidt;
9729         shr_loco.sl_pid = shr.s_pid;
9730         shr_loco.sl_id = shr.s_sysid;
9731         shr.s_owner = (caddr_t)&shr_loco;
9732         shr.s_own_len = sizeof (shr_loco);
9733 
9734         err = VOP_SHRLOCK(sp->rs_finfo->rf_vp, F_UNSHARE, &shr, 0, CRED(),
9735             NULL);
9736         if (err != 0) {
9737                 err = puterrno4(err);
9738                 return (err);
9739         }
9740 
9741         sp->rs_share_access = 0;
9742         sp->rs_share_deny = 0;
9743 
9744         return (0);
9745 
9746 }
9747 
9748 static int
9749 rdma_setup_read_data4(READ4args *args, READ4res *rok)
9750 {
9751         struct clist    *wcl;
9752         count4          count = rok->data_len;
9753         int             wlist_len;
9754 
9755         wcl = args->wlist;
9756         if (rdma_setup_read_chunks(wcl, count, &wlist_len) == FALSE) {
9757                 return (FALSE);
9758         }
9759         wcl = args->wlist;
9760         rok->wlist_len = wlist_len;
9761         rok->wlist = wcl;
9762         return (TRUE);
9763 }
9764 
9765 /* tunable to disable server referrals */
9766 int rfs4_no_referrals = 0;
9767 
9768 /*
9769  * Find an NFS record in reparse point data.
9770  * Returns 0 for success and <0 or an errno value on failure.
9771  */
9772 int
9773 vn_find_nfs_record(vnode_t *vp, nvlist_t **nvlp, char **svcp, char **datap)
9774 {
9775         int err;
9776         char *stype, *val;
9777         nvlist_t *nvl;
9778         nvpair_t *curr;
9779 
9780         if ((nvl = reparse_init()) == NULL)
9781                 return (-1);
9782 
9783         if ((err = reparse_vnode_parse(vp, nvl)) != 0) {
9784                 reparse_free(nvl);
9785                 return (err);
9786         }
9787 
9788         curr = NULL;
9789         while ((curr = nvlist_next_nvpair(nvl, curr)) != NULL) {
9790                 if ((stype = nvpair_name(curr)) == NULL) {
9791                         reparse_free(nvl);
9792                         return (-2);
9793                 }
9794                 if (strncasecmp(stype, "NFS", 3) == 0)
9795                         break;
9796         }
9797 
9798         if ((curr == NULL) ||
9799             (nvpair_value_string(curr, &val))) {
9800                 reparse_free(nvl);
9801                 return (-3);
9802         }
9803         *nvlp = nvl;
9804         *svcp = stype;
9805         *datap = val;
9806         return (0);
9807 }
9808 
9809 int
9810 vn_is_nfs_reparse(vnode_t *vp, cred_t *cr)
9811 {
9812         nvlist_t *nvl;
9813         char *s, *d;
9814 
9815         if (rfs4_no_referrals != 0)
9816                 return (B_FALSE);
9817 
9818         if (vn_is_reparse(vp, cr, NULL) == B_FALSE)
9819                 return (B_FALSE);
9820 
9821         if (vn_find_nfs_record(vp, &nvl, &s, &d) != 0)
9822                 return (B_FALSE);
9823 
9824         reparse_free(nvl);
9825 
9826         return (B_TRUE);
9827 }
9828 
9829 /*
9830  * There is a user-level copy of this routine in ref_subr.c.
9831  * Changes should be kept in sync.
9832  */
9833 static int
9834 nfs4_create_components(char *path, component4 *comp4)
9835 {
9836         int slen, plen, ncomp;
9837         char *ori_path, *nxtc, buf[MAXNAMELEN];
9838 
9839         if (path == NULL)
9840                 return (0);
9841 
9842         plen = strlen(path) + 1;        /* include the terminator */
9843         ori_path = path;
9844         ncomp = 0;
9845 
9846         /* count number of components in the path */
9847         for (nxtc = path; nxtc < ori_path + plen; nxtc++) {
9848                 if (*nxtc == '/' || *nxtc == '\0' || *nxtc == '\n') {
9849                         if ((slen = nxtc - path) == 0) {
9850                                 path = nxtc + 1;
9851                                 continue;
9852                         }
9853 
9854                         if (comp4 != NULL) {
9855                                 bcopy(path, buf, slen);
9856                                 buf[slen] = '\0';
9857                                 (void) str_to_utf8(buf, &comp4[ncomp]);
9858                         }
9859 
9860                         ncomp++;        /* 1 valid component */
9861                         path = nxtc + 1;
9862                 }
9863                 if (*nxtc == '\0' || *nxtc == '\n')
9864                         break;
9865         }
9866 
9867         return (ncomp);
9868 }
9869 
9870 /*
9871  * There is a user-level copy of this routine in ref_subr.c.
9872  * Changes should be kept in sync.
9873  */
9874 static int
9875 make_pathname4(char *path, pathname4 *pathname)
9876 {
9877         int ncomp;
9878         component4 *comp4;
9879 
9880         if (pathname == NULL)
9881                 return (0);
9882 
9883         if (path == NULL) {
9884                 pathname->pathname4_val = NULL;
9885                 pathname->pathname4_len = 0;
9886                 return (0);
9887         }
9888 
9889         /* count number of components to alloc buffer */
9890         if ((ncomp = nfs4_create_components(path, NULL)) == 0) {
9891                 pathname->pathname4_val = NULL;
9892                 pathname->pathname4_len = 0;
9893                 return (0);
9894         }
9895         comp4 = kmem_zalloc(ncomp * sizeof (component4), KM_SLEEP);
9896 
9897         /* copy components into allocated buffer */
9898         ncomp = nfs4_create_components(path, comp4);
9899 
9900         pathname->pathname4_val = comp4;
9901         pathname->pathname4_len = ncomp;
9902 
9903         return (ncomp);
9904 }
9905 
9906 #define xdr_fs_locations4 xdr_fattr4_fs_locations
9907 
9908 fs_locations4 *
9909 fetch_referral(vnode_t *vp, cred_t *cr)
9910 {
9911         nvlist_t *nvl;
9912         char *stype, *sdata;
9913         fs_locations4 *result;
9914         char buf[1024];
9915         size_t bufsize;
9916         XDR xdr;
9917         int err;
9918 
9919         /*
9920          * Check attrs to ensure it's a reparse point
9921          */
9922         if (vn_is_reparse(vp, cr, NULL) == B_FALSE)
9923                 return (NULL);
9924 
9925         /*
9926          * Look for an NFS record and get the type and data
9927          */
9928         if (vn_find_nfs_record(vp, &nvl, &stype, &sdata) != 0)
9929                 return (NULL);
9930 
9931         /*
9932          * With the type and data, upcall to get the referral
9933          */
9934         bufsize = sizeof (buf);
9935         bzero(buf, sizeof (buf));
9936         err = reparse_kderef((const char *)stype, (const char *)sdata,
9937             buf, &bufsize);
9938         reparse_free(nvl);
9939 
9940         DTRACE_PROBE4(nfs4serv__func__referral__upcall,
9941             char *, stype, char *, sdata, char *, buf, int, err);
9942         if (err) {
9943                 cmn_err(CE_NOTE,
9944                     "reparsed daemon not running: unable to get referral (%d)",
9945                     err);
9946                 return (NULL);
9947         }
9948 
9949         /*
9950          * We get an XDR'ed record back from the kderef call
9951          */
9952         xdrmem_create(&xdr, buf, bufsize, XDR_DECODE);
9953         result = kmem_alloc(sizeof (fs_locations4), KM_SLEEP);
9954         err = xdr_fs_locations4(&xdr, result);
9955         XDR_DESTROY(&xdr);
9956         if (err != TRUE) {
9957                 DTRACE_PROBE1(nfs4serv__func__referral__upcall__xdrfail,
9958                     int, err);
9959                 return (NULL);
9960         }
9961 
9962         /*
9963          * Look at path to recover fs_root, ignoring the leading '/'
9964          */
9965         (void) make_pathname4(vp->v_path, &result->fs_root);
9966 
9967         return (result);
9968 }
9969 
9970 char *
9971 build_symlink(vnode_t *vp, cred_t *cr, size_t *strsz)
9972 {
9973         fs_locations4 *fsl;
9974         fs_location4 *fs;
9975         char *server, *path, *symbuf;
9976         static char *prefix = "/net/";
9977         int i, size, npaths;
9978         uint_t len;
9979 
9980         /* Get the referral */
9981         if ((fsl = fetch_referral(vp, cr)) == NULL)
9982                 return (NULL);
9983 
9984         /* Deal with only the first location and first server */
9985         fs = &fsl->locations_val[0];
9986         server = utf8_to_str(&fs->server_val[0], &len, NULL);
9987         if (server == NULL) {
9988                 rfs4_free_fs_locations4(fsl);
9989                 kmem_free(fsl, sizeof (fs_locations4));
9990                 return (NULL);
9991         }
9992 
9993         /* Figure out size for "/net/" + host + /path/path/path + NULL */
9994         size = strlen(prefix) + len;
9995         for (i = 0; i < fs->rootpath.pathname4_len; i++)
9996                 size += fs->rootpath.pathname4_val[i].utf8string_len + 1;
9997 
9998         /* Allocate the symlink buffer and fill it */
9999         symbuf = kmem_zalloc(size, KM_SLEEP);
10000         (void) strcat(symbuf, prefix);
10001         (void) strcat(symbuf, server);
10002         kmem_free(server, len);
10003 
10004         npaths = 0;
10005         for (i = 0; i < fs->rootpath.pathname4_len; i++) {
10006                 path = utf8_to_str(&fs->rootpath.pathname4_val[i], &len, NULL);
10007                 if (path == NULL)
10008                         continue;
10009                 (void) strcat(symbuf, "/");
10010                 (void) strcat(symbuf, path);
10011                 npaths++;
10012                 kmem_free(path, len);
10013         }
10014 
10015         rfs4_free_fs_locations4(fsl);
10016         kmem_free(fsl, sizeof (fs_locations4));
10017 
10018         if (strsz != NULL)
10019                 *strsz = size;
10020         return (symbuf);
10021 }
10022 
10023 /*
10024  * Check to see if we have a downrev Solaris client, so that we
10025  * can send it a symlink instead of a referral.
10026  */
10027 int
10028 client_is_downrev(struct svc_req *req)
10029 {
10030         struct sockaddr *ca;
10031         rfs4_clntip_t *ci;
10032         bool_t create = FALSE;
10033         int is_downrev;
10034 
10035         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
10036         ASSERT(ca);
10037         ci = rfs4_find_clntip(ca, &create);
10038         if (ci == NULL)
10039                 return (0);
10040         is_downrev = ci->ri_no_referrals;
10041         rfs4_dbe_rele(ci->ri_dbe);
10042         return (is_downrev);
10043 }