1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright 2016 Nexenta Systems, Inc.  All rights reserved.
  24  * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
  25  * Copyright (c) 2012 by Delphix. All rights reserved.
  26  */
  27 
  28 /*
  29  *      Copyright (c) 1983,1984,1985,1986,1987,1988,1989  AT&T.
  30  *      All Rights Reserved
  31  */
  32 
  33 #include <sys/param.h>
  34 #include <sys/types.h>
  35 #include <sys/systm.h>
  36 #include <sys/cred.h>
  37 #include <sys/buf.h>
  38 #include <sys/vfs.h>
  39 #include <sys/vfs_opreg.h>
  40 #include <sys/vnode.h>
  41 #include <sys/uio.h>
  42 #include <sys/errno.h>
  43 #include <sys/sysmacros.h>
  44 #include <sys/statvfs.h>
  45 #include <sys/kmem.h>
  46 #include <sys/dirent.h>
  47 #include <sys/cmn_err.h>
  48 #include <sys/debug.h>
  49 #include <sys/systeminfo.h>
  50 #include <sys/flock.h>
  51 #include <sys/pathname.h>
  52 #include <sys/nbmlock.h>
  53 #include <sys/share.h>
  54 #include <sys/atomic.h>
  55 #include <sys/policy.h>
  56 #include <sys/fem.h>
  57 #include <sys/sdt.h>
  58 #include <sys/ddi.h>
  59 #include <sys/zone.h>
  60 
  61 #include <fs/fs_reparse.h>
  62 
  63 #include <rpc/types.h>
  64 #include <rpc/auth.h>
  65 #include <rpc/rpcsec_gss.h>
  66 #include <rpc/svc.h>
  67 
  68 #include <nfs/nfs.h>
  69 #include <nfs/export.h>
  70 #include <nfs/nfs_cmd.h>
  71 #include <nfs/lm.h>
  72 #include <nfs/nfs4.h>
  73 
  74 #include <sys/strsubr.h>
  75 #include <sys/strsun.h>
  76 
  77 #include <inet/common.h>
  78 #include <inet/ip.h>
  79 #include <inet/ip6.h>
  80 
  81 #include <sys/tsol/label.h>
  82 #include <sys/tsol/tndb.h>
  83 
  84 #define RFS4_MAXLOCK_TRIES 4    /* Try to get the lock this many times */
  85 static int rfs4_maxlock_tries = RFS4_MAXLOCK_TRIES;
  86 #define RFS4_LOCK_DELAY 10      /* Milliseconds */
  87 static clock_t  rfs4_lock_delay = RFS4_LOCK_DELAY;
  88 extern struct svc_ops rdma_svc_ops;
  89 extern int nfs_loaned_buffers;
  90 /* End of Tunables */
  91 
  92 static int rdma_setup_read_data4(READ4args *, READ4res *);
  93 
  94 /*
  95  * Used to bump the stateid4.seqid value and show changes in the stateid
  96  */
  97 #define next_stateid(sp) (++(sp)->bits.chgseq)
  98 
  99 /*
 100  * RFS4_MINLEN_ENTRY4: XDR-encoded size of smallest possible dirent.
 101  *      This is used to return NFS4ERR_TOOSMALL when clients specify
 102  *      maxcount that isn't large enough to hold the smallest possible
 103  *      XDR encoded dirent.
 104  *
 105  *          sizeof cookie (8 bytes) +
 106  *          sizeof name_len (4 bytes) +
 107  *          sizeof smallest (padded) name (4 bytes) +
 108  *          sizeof bitmap4_len (12 bytes) +   NOTE: we always encode len=2 bm4
 109  *          sizeof attrlist4_len (4 bytes) +
 110  *          sizeof next boolean (4 bytes)
 111  *
 112  * RFS4_MINLEN_RDDIR4: XDR-encoded size of READDIR op reply containing
 113  * the smallest possible entry4 (assumes no attrs requested).
 114  *      sizeof nfsstat4 (4 bytes) +
 115  *      sizeof verifier4 (8 bytes) +
 116  *      sizeof entry4list bool (4 bytes) +
 117  *      sizeof entry4   (36 bytes) +
 118  *      sizeof eof bool  (4 bytes)
 119  *
 120  * RFS4_MINLEN_RDDIR_BUF: minimum length of buffer server will provide to
 121  *      VOP_READDIR.  Its value is the size of the maximum possible dirent
 122  *      for solaris.  The DIRENT64_RECLEN macro returns the size of dirent
 123  *      required for a given name length.  MAXNAMELEN is the maximum
 124  *      filename length allowed in Solaris.  The first two DIRENT64_RECLEN()
 125  *      macros are to allow for . and .. entries -- just a minor tweak to try
 126  *      and guarantee that buffer we give to VOP_READDIR will be large enough
 127  *      to hold ., .., and the largest possible solaris dirent64.
 128  */
 129 #define RFS4_MINLEN_ENTRY4 36
 130 #define RFS4_MINLEN_RDDIR4 (4 + NFS4_VERIFIER_SIZE + 4 + RFS4_MINLEN_ENTRY4 + 4)
 131 #define RFS4_MINLEN_RDDIR_BUF \
 132         (DIRENT64_RECLEN(1) + DIRENT64_RECLEN(2) + DIRENT64_RECLEN(MAXNAMELEN))
 133 
 134 /*
 135  * It would be better to pad to 4 bytes since that's what XDR would do,
 136  * but the dirents UFS gives us are already padded to 8, so just take
 137  * what we're given.  Dircount is only a hint anyway.  Currently the
 138  * solaris kernel is ASCII only, so there's no point in calling the
 139  * UTF8 functions.
 140  *
 141  * dirent64: named padded to provide 8 byte struct alignment
 142  *      d_ino(8) + d_off(8) + d_reclen(2) + d_name(namelen + null(1) + pad)
 143  *
 144  * cookie: uint64_t   +  utf8namelen: uint_t  +   utf8name padded to 8 bytes
 145  *
 146  */
 147 #define DIRENT64_TO_DIRCOUNT(dp) \
 148         (3 * BYTES_PER_XDR_UNIT + DIRENT64_NAMELEN((dp)->d_reclen))
 149 
 150 time_t rfs4_start_time;                 /* Initialized in rfs4_srvrinit */
 151 
 152 static sysid_t lockt_sysid;             /* dummy sysid for all LOCKT calls */
 153 
 154 u_longlong_t    nfs4_srv_caller_id;
 155 uint_t          nfs4_srv_vkey = 0;
 156 
 157 verifier4       Write4verf;
 158 verifier4       Readdir4verf;
 159 
 160 void    rfs4_init_compound_state(struct compound_state *);
 161 
 162 static void     nullfree(caddr_t);
 163 static void     rfs4_op_inval(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 164                         struct compound_state *);
 165 static void     rfs4_op_access(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 166                         struct compound_state *);
 167 static void     rfs4_op_close(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 168                         struct compound_state *);
 169 static void     rfs4_op_commit(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 170                         struct compound_state *);
 171 static void     rfs4_op_create(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 172                         struct compound_state *);
 173 static void     rfs4_op_create_free(nfs_resop4 *resop);
 174 static void     rfs4_op_delegreturn(nfs_argop4 *, nfs_resop4 *,
 175                         struct svc_req *, struct compound_state *);
 176 static void     rfs4_op_delegpurge(nfs_argop4 *, nfs_resop4 *,
 177                         struct svc_req *, struct compound_state *);
 178 static void     rfs4_op_getattr(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 179                         struct compound_state *);
 180 static void     rfs4_op_getattr_free(nfs_resop4 *);
 181 static void     rfs4_op_getfh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 182                         struct compound_state *);
 183 static void     rfs4_op_getfh_free(nfs_resop4 *);
 184 static void     rfs4_op_illegal(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 185                         struct compound_state *);
 186 static void     rfs4_op_link(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 187                         struct compound_state *);
 188 static void     rfs4_op_lock(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 189                         struct compound_state *);
 190 static void     lock_denied_free(nfs_resop4 *);
 191 static void     rfs4_op_locku(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 192                         struct compound_state *);
 193 static void     rfs4_op_lockt(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 194                         struct compound_state *);
 195 static void     rfs4_op_lookup(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 196                         struct compound_state *);
 197 static void     rfs4_op_lookupp(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 198                         struct compound_state *);
 199 static void     rfs4_op_openattr(nfs_argop4 *argop, nfs_resop4 *resop,
 200                                 struct svc_req *req, struct compound_state *cs);
 201 static void     rfs4_op_nverify(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 202                         struct compound_state *);
 203 static void     rfs4_op_open(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 204                         struct compound_state *);
 205 static void     rfs4_op_open_confirm(nfs_argop4 *, nfs_resop4 *,
 206                         struct svc_req *, struct compound_state *);
 207 static void     rfs4_op_open_downgrade(nfs_argop4 *, nfs_resop4 *,
 208                         struct svc_req *, struct compound_state *);
 209 static void     rfs4_op_putfh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 210                         struct compound_state *);
 211 static void     rfs4_op_putpubfh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 212                         struct compound_state *);
 213 static void     rfs4_op_putrootfh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 214                         struct compound_state *);
 215 static void     rfs4_op_read(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 216                         struct compound_state *);
 217 static void     rfs4_op_read_free(nfs_resop4 *);
 218 static void     rfs4_op_readdir_free(nfs_resop4 *resop);
 219 static void     rfs4_op_readlink(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 220                         struct compound_state *);
 221 static void     rfs4_op_readlink_free(nfs_resop4 *);
 222 static void     rfs4_op_release_lockowner(nfs_argop4 *, nfs_resop4 *,
 223                         struct svc_req *, struct compound_state *);
 224 static void     rfs4_op_remove(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 225                         struct compound_state *);
 226 static void     rfs4_op_rename(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 227                         struct compound_state *);
 228 static void     rfs4_op_renew(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 229                         struct compound_state *);
 230 static void     rfs4_op_restorefh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 231                         struct compound_state *);
 232 static void     rfs4_op_savefh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 233                         struct compound_state *);
 234 static void     rfs4_op_setattr(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 235                         struct compound_state *);
 236 static void     rfs4_op_verify(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 237                         struct compound_state *);
 238 static void     rfs4_op_write(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 239                         struct compound_state *);
 240 static void     rfs4_op_setclientid(nfs_argop4 *, nfs_resop4 *,
 241                         struct svc_req *, struct compound_state *);
 242 static void     rfs4_op_setclientid_confirm(nfs_argop4 *, nfs_resop4 *,
 243                         struct svc_req *req, struct compound_state *);
 244 static void     rfs4_op_secinfo(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 245                         struct compound_state *);
 246 static void     rfs4_op_secinfo_free(nfs_resop4 *);
 247 
 248 static nfsstat4 check_open_access(uint32_t,
 249                                 struct compound_state *, struct svc_req *);
 250 nfsstat4 rfs4_client_sysid(rfs4_client_t *, sysid_t *);
 251 void rfs4_ss_clid(rfs4_client_t *);
 252 
 253 /*
 254  * translation table for attrs
 255  */
 256 struct nfs4_ntov_table {
 257         union nfs4_attr_u *na;
 258         uint8_t amap[NFS4_MAXNUM_ATTRS];
 259         int attrcnt;
 260         bool_t vfsstat;
 261 };
 262 
 263 static void     nfs4_ntov_table_init(struct nfs4_ntov_table *ntovp);
 264 static void     nfs4_ntov_table_free(struct nfs4_ntov_table *ntovp,
 265                                     struct nfs4_svgetit_arg *sargp);
 266 
 267 static nfsstat4 do_rfs4_set_attrs(bitmap4 *resp, fattr4 *fattrp,
 268                     struct compound_state *cs, struct nfs4_svgetit_arg *sargp,
 269                     struct nfs4_ntov_table *ntovp, nfs4_attr_cmd_t cmd);
 270 
 271 fem_t           *deleg_rdops;
 272 fem_t           *deleg_wrops;
 273 
 274 rfs4_servinst_t *rfs4_cur_servinst = NULL;      /* current server instance */
 275 kmutex_t        rfs4_servinst_lock;     /* protects linked list */
 276 int             rfs4_seen_first_compound;       /* set first time we see one */
 277 
 278 /*
 279  * NFS4 op dispatch table
 280  */
 281 
 282 struct rfsv4disp {
 283         void    (*dis_proc)();          /* proc to call */
 284         void    (*dis_resfree)();       /* frees space allocated by proc */
 285         int     dis_flags;              /* RPC_IDEMPOTENT, etc... */
 286 };
 287 
 288 static struct rfsv4disp rfsv4disptab[] = {
 289         /*
 290          * NFS VERSION 4
 291          */
 292 
 293         /* RFS_NULL = 0 */
 294         {rfs4_op_illegal, nullfree, 0},
 295 
 296         /* UNUSED = 1 */
 297         {rfs4_op_illegal, nullfree, 0},
 298 
 299         /* UNUSED = 2 */
 300         {rfs4_op_illegal, nullfree, 0},
 301 
 302         /* OP_ACCESS = 3 */
 303         {rfs4_op_access, nullfree, RPC_IDEMPOTENT},
 304 
 305         /* OP_CLOSE = 4 */
 306         {rfs4_op_close, nullfree, 0},
 307 
 308         /* OP_COMMIT = 5 */
 309         {rfs4_op_commit, nullfree, RPC_IDEMPOTENT},
 310 
 311         /* OP_CREATE = 6 */
 312         {rfs4_op_create, nullfree, 0},
 313 
 314         /* OP_DELEGPURGE = 7 */
 315         {rfs4_op_delegpurge, nullfree, 0},
 316 
 317         /* OP_DELEGRETURN = 8 */
 318         {rfs4_op_delegreturn, nullfree, 0},
 319 
 320         /* OP_GETATTR = 9 */
 321         {rfs4_op_getattr, rfs4_op_getattr_free, RPC_IDEMPOTENT},
 322 
 323         /* OP_GETFH = 10 */
 324         {rfs4_op_getfh, rfs4_op_getfh_free, RPC_ALL},
 325 
 326         /* OP_LINK = 11 */
 327         {rfs4_op_link, nullfree, 0},
 328 
 329         /* OP_LOCK = 12 */
 330         {rfs4_op_lock, lock_denied_free, 0},
 331 
 332         /* OP_LOCKT = 13 */
 333         {rfs4_op_lockt, lock_denied_free, 0},
 334 
 335         /* OP_LOCKU = 14 */
 336         {rfs4_op_locku, nullfree, 0},
 337 
 338         /* OP_LOOKUP = 15 */
 339         {rfs4_op_lookup, nullfree, (RPC_IDEMPOTENT | RPC_PUBLICFH_OK)},
 340 
 341         /* OP_LOOKUPP = 16 */
 342         {rfs4_op_lookupp, nullfree, (RPC_IDEMPOTENT | RPC_PUBLICFH_OK)},
 343 
 344         /* OP_NVERIFY = 17 */
 345         {rfs4_op_nverify, nullfree, RPC_IDEMPOTENT},
 346 
 347         /* OP_OPEN = 18 */
 348         {rfs4_op_open, rfs4_free_reply, 0},
 349 
 350         /* OP_OPENATTR = 19 */
 351         {rfs4_op_openattr, nullfree, 0},
 352 
 353         /* OP_OPEN_CONFIRM = 20 */
 354         {rfs4_op_open_confirm, nullfree, 0},
 355 
 356         /* OP_OPEN_DOWNGRADE = 21 */
 357         {rfs4_op_open_downgrade, nullfree, 0},
 358 
 359         /* OP_OPEN_PUTFH = 22 */
 360         {rfs4_op_putfh, nullfree, RPC_ALL},
 361 
 362         /* OP_PUTPUBFH = 23 */
 363         {rfs4_op_putpubfh, nullfree, RPC_ALL},
 364 
 365         /* OP_PUTROOTFH = 24 */
 366         {rfs4_op_putrootfh, nullfree, RPC_ALL},
 367 
 368         /* OP_READ = 25 */
 369         {rfs4_op_read, rfs4_op_read_free, RPC_IDEMPOTENT},
 370 
 371         /* OP_READDIR = 26 */
 372         {rfs4_op_readdir, rfs4_op_readdir_free, RPC_IDEMPOTENT},
 373 
 374         /* OP_READLINK = 27 */
 375         {rfs4_op_readlink, rfs4_op_readlink_free, RPC_IDEMPOTENT},
 376 
 377         /* OP_REMOVE = 28 */
 378         {rfs4_op_remove, nullfree, 0},
 379 
 380         /* OP_RENAME = 29 */
 381         {rfs4_op_rename, nullfree, 0},
 382 
 383         /* OP_RENEW = 30 */
 384         {rfs4_op_renew, nullfree, 0},
 385 
 386         /* OP_RESTOREFH = 31 */
 387         {rfs4_op_restorefh, nullfree, RPC_ALL},
 388 
 389         /* OP_SAVEFH = 32 */
 390         {rfs4_op_savefh, nullfree, RPC_ALL},
 391 
 392         /* OP_SECINFO = 33 */
 393         {rfs4_op_secinfo, rfs4_op_secinfo_free, 0},
 394 
 395         /* OP_SETATTR = 34 */
 396         {rfs4_op_setattr, nullfree, 0},
 397 
 398         /* OP_SETCLIENTID = 35 */
 399         {rfs4_op_setclientid, nullfree, 0},
 400 
 401         /* OP_SETCLIENTID_CONFIRM = 36 */
 402         {rfs4_op_setclientid_confirm, nullfree, 0},
 403 
 404         /* OP_VERIFY = 37 */
 405         {rfs4_op_verify, nullfree, RPC_IDEMPOTENT},
 406 
 407         /* OP_WRITE = 38 */
 408         {rfs4_op_write, nullfree, 0},
 409 
 410         /* OP_RELEASE_LOCKOWNER = 39 */
 411         {rfs4_op_release_lockowner, nullfree, 0},
 412 };
 413 
 414 static uint_t rfsv4disp_cnt = sizeof (rfsv4disptab) / sizeof (rfsv4disptab[0]);
 415 
 416 #define OP_ILLEGAL_IDX (rfsv4disp_cnt)
 417 
 418 #ifdef DEBUG
 419 
 420 int             rfs4_fillone_debug = 0;
 421 int             rfs4_no_stub_access = 1;
 422 int             rfs4_rddir_debug = 0;
 423 
 424 static char    *rfs4_op_string[] = {
 425         "rfs4_op_null",
 426         "rfs4_op_1 unused",
 427         "rfs4_op_2 unused",
 428         "rfs4_op_access",
 429         "rfs4_op_close",
 430         "rfs4_op_commit",
 431         "rfs4_op_create",
 432         "rfs4_op_delegpurge",
 433         "rfs4_op_delegreturn",
 434         "rfs4_op_getattr",
 435         "rfs4_op_getfh",
 436         "rfs4_op_link",
 437         "rfs4_op_lock",
 438         "rfs4_op_lockt",
 439         "rfs4_op_locku",
 440         "rfs4_op_lookup",
 441         "rfs4_op_lookupp",
 442         "rfs4_op_nverify",
 443         "rfs4_op_open",
 444         "rfs4_op_openattr",
 445         "rfs4_op_open_confirm",
 446         "rfs4_op_open_downgrade",
 447         "rfs4_op_putfh",
 448         "rfs4_op_putpubfh",
 449         "rfs4_op_putrootfh",
 450         "rfs4_op_read",
 451         "rfs4_op_readdir",
 452         "rfs4_op_readlink",
 453         "rfs4_op_remove",
 454         "rfs4_op_rename",
 455         "rfs4_op_renew",
 456         "rfs4_op_restorefh",
 457         "rfs4_op_savefh",
 458         "rfs4_op_secinfo",
 459         "rfs4_op_setattr",
 460         "rfs4_op_setclientid",
 461         "rfs4_op_setclient_confirm",
 462         "rfs4_op_verify",
 463         "rfs4_op_write",
 464         "rfs4_op_release_lockowner",
 465         "rfs4_op_illegal"
 466 };
 467 #endif
 468 
 469 void    rfs4_ss_chkclid(rfs4_client_t *);
 470 
 471 extern size_t   strlcpy(char *dst, const char *src, size_t dstsize);
 472 
 473 extern void     rfs4_free_fs_locations4(fs_locations4 *);
 474 
 475 #ifdef  nextdp
 476 #undef nextdp
 477 #endif
 478 #define nextdp(dp)      ((struct dirent64 *)((char *)(dp) + (dp)->d_reclen))
 479 
 480 static const fs_operation_def_t nfs4_rd_deleg_tmpl[] = {
 481         VOPNAME_OPEN,           { .femop_open = deleg_rd_open },
 482         VOPNAME_WRITE,          { .femop_write = deleg_rd_write },
 483         VOPNAME_SETATTR,        { .femop_setattr = deleg_rd_setattr },
 484         VOPNAME_RWLOCK,         { .femop_rwlock = deleg_rd_rwlock },
 485         VOPNAME_SPACE,          { .femop_space = deleg_rd_space },
 486         VOPNAME_SETSECATTR,     { .femop_setsecattr = deleg_rd_setsecattr },
 487         VOPNAME_VNEVENT,        { .femop_vnevent = deleg_rd_vnevent },
 488         NULL,                   NULL
 489 };
 490 static const fs_operation_def_t nfs4_wr_deleg_tmpl[] = {
 491         VOPNAME_OPEN,           { .femop_open = deleg_wr_open },
 492         VOPNAME_READ,           { .femop_read = deleg_wr_read },
 493         VOPNAME_WRITE,          { .femop_write = deleg_wr_write },
 494         VOPNAME_SETATTR,        { .femop_setattr = deleg_wr_setattr },
 495         VOPNAME_RWLOCK,         { .femop_rwlock = deleg_wr_rwlock },
 496         VOPNAME_SPACE,          { .femop_space = deleg_wr_space },
 497         VOPNAME_SETSECATTR,     { .femop_setsecattr = deleg_wr_setsecattr },
 498         VOPNAME_VNEVENT,        { .femop_vnevent = deleg_wr_vnevent },
 499         NULL,                   NULL
 500 };
 501 
 502 int
 503 rfs4_srvrinit(void)
 504 {
 505         timespec32_t verf;
 506         int error;
 507         extern void rfs4_attr_init();
 508         extern krwlock_t rfs4_deleg_policy_lock;
 509 
 510         /*
 511          * The following algorithm attempts to find a unique verifier
 512          * to be used as the write verifier returned from the server
 513          * to the client.  It is important that this verifier change
 514          * whenever the server reboots.  Of secondary importance, it
 515          * is important for the verifier to be unique between two
 516          * different servers.
 517          *
 518          * Thus, an attempt is made to use the system hostid and the
 519          * current time in seconds when the nfssrv kernel module is
 520          * loaded.  It is assumed that an NFS server will not be able
 521          * to boot and then to reboot in less than a second.  If the
 522          * hostid has not been set, then the current high resolution
 523          * time is used.  This will ensure different verifiers each
 524          * time the server reboots and minimize the chances that two
 525          * different servers will have the same verifier.
 526          * XXX - this is broken on LP64 kernels.
 527          */
 528         verf.tv_sec = (time_t)zone_get_hostid(NULL);
 529         if (verf.tv_sec != 0) {
 530                 verf.tv_nsec = gethrestime_sec();
 531         } else {
 532                 timespec_t tverf;
 533 
 534                 gethrestime(&tverf);
 535                 verf.tv_sec = (time_t)tverf.tv_sec;
 536                 verf.tv_nsec = tverf.tv_nsec;
 537         }
 538 
 539         Write4verf = *(uint64_t *)&verf;
 540 
 541         rfs4_attr_init();
 542         mutex_init(&rfs4_deleg_lock, NULL, MUTEX_DEFAULT, NULL);
 543 
 544         /* Used to manage create/destroy of server state */
 545         mutex_init(&rfs4_state_lock, NULL, MUTEX_DEFAULT, NULL);
 546 
 547         /* Used to manage access to server instance linked list */
 548         mutex_init(&rfs4_servinst_lock, NULL, MUTEX_DEFAULT, NULL);
 549 
 550         /* Used to manage access to rfs4_deleg_policy */
 551         rw_init(&rfs4_deleg_policy_lock, NULL, RW_DEFAULT, NULL);
 552 
 553         error = fem_create("deleg_rdops", nfs4_rd_deleg_tmpl, &deleg_rdops);
 554         if (error != 0) {
 555                 rfs4_disable_delegation();
 556         } else {
 557                 error = fem_create("deleg_wrops", nfs4_wr_deleg_tmpl,
 558                     &deleg_wrops);
 559                 if (error != 0) {
 560                         rfs4_disable_delegation();
 561                         fem_free(deleg_rdops);
 562                 }
 563         }
 564 
 565         nfs4_srv_caller_id = fs_new_caller_id();
 566 
 567         lockt_sysid = lm_alloc_sysidt();
 568 
 569         vsd_create(&nfs4_srv_vkey, NULL);
 570 
 571         return (0);
 572 }
 573 
 574 void
 575 rfs4_srvrfini(void)
 576 {
 577         extern krwlock_t rfs4_deleg_policy_lock;
 578 
 579         if (lockt_sysid != LM_NOSYSID) {
 580                 lm_free_sysidt(lockt_sysid);
 581                 lockt_sysid = LM_NOSYSID;
 582         }
 583 
 584         mutex_destroy(&rfs4_deleg_lock);
 585         mutex_destroy(&rfs4_state_lock);
 586         rw_destroy(&rfs4_deleg_policy_lock);
 587 
 588         fem_free(deleg_rdops);
 589         fem_free(deleg_wrops);
 590 }
 591 
 592 void
 593 rfs4_init_compound_state(struct compound_state *cs)
 594 {
 595         bzero(cs, sizeof (*cs));
 596         cs->cont = TRUE;
 597         cs->access = CS_ACCESS_DENIED;
 598         cs->deleg = FALSE;
 599         cs->mandlock = FALSE;
 600         cs->fh.nfs_fh4_val = cs->fhbuf;
 601 }
 602 
 603 void
 604 rfs4_grace_start(rfs4_servinst_t *sip)
 605 {
 606         rw_enter(&sip->rwlock, RW_WRITER);
 607         sip->start_time = (time_t)TICK_TO_SEC(ddi_get_lbolt());
 608         sip->grace_period = rfs4_grace_period;
 609         rw_exit(&sip->rwlock);
 610 }
 611 
 612 /*
 613  * returns true if the instance's grace period has never been started
 614  */
 615 int
 616 rfs4_servinst_grace_new(rfs4_servinst_t *sip)
 617 {
 618         time_t start_time;
 619 
 620         rw_enter(&sip->rwlock, RW_READER);
 621         start_time = sip->start_time;
 622         rw_exit(&sip->rwlock);
 623 
 624         return (start_time == 0);
 625 }
 626 
 627 /*
 628  * Indicates if server instance is within the
 629  * grace period.
 630  */
 631 int
 632 rfs4_servinst_in_grace(rfs4_servinst_t *sip)
 633 {
 634         time_t grace_expiry;
 635 
 636         rw_enter(&sip->rwlock, RW_READER);
 637         grace_expiry = sip->start_time + sip->grace_period;
 638         rw_exit(&sip->rwlock);
 639 
 640         return (((time_t)TICK_TO_SEC(ddi_get_lbolt())) < grace_expiry);
 641 }
 642 
 643 int
 644 rfs4_clnt_in_grace(rfs4_client_t *cp)
 645 {
 646         ASSERT(rfs4_dbe_refcnt(cp->rc_dbe) > 0);
 647 
 648         return (rfs4_servinst_in_grace(cp->rc_server_instance));
 649 }
 650 
 651 /*
 652  * reset all currently active grace periods
 653  */
 654 void
 655 rfs4_grace_reset_all(void)
 656 {
 657         rfs4_servinst_t *sip;
 658 
 659         mutex_enter(&rfs4_servinst_lock);
 660         for (sip = rfs4_cur_servinst; sip != NULL; sip = sip->prev)
 661                 if (rfs4_servinst_in_grace(sip))
 662                         rfs4_grace_start(sip);
 663         mutex_exit(&rfs4_servinst_lock);
 664 }
 665 
 666 /*
 667  * start any new instances' grace periods
 668  */
 669 void
 670 rfs4_grace_start_new(void)
 671 {
 672         rfs4_servinst_t *sip;
 673 
 674         mutex_enter(&rfs4_servinst_lock);
 675         for (sip = rfs4_cur_servinst; sip != NULL; sip = sip->prev)
 676                 if (rfs4_servinst_grace_new(sip))
 677                         rfs4_grace_start(sip);
 678         mutex_exit(&rfs4_servinst_lock);
 679 }
 680 
 681 static rfs4_dss_path_t *
 682 rfs4_dss_newpath(rfs4_servinst_t *sip, char *path, unsigned index)
 683 {
 684         size_t len;
 685         rfs4_dss_path_t *dss_path;
 686 
 687         dss_path = kmem_alloc(sizeof (rfs4_dss_path_t), KM_SLEEP);
 688 
 689         /*
 690          * Take a copy of the string, since the original may be overwritten.
 691          * Sadly, no strdup() in the kernel.
 692          */
 693         /* allow for NUL */
 694         len = strlen(path) + 1;
 695         dss_path->path = kmem_alloc(len, KM_SLEEP);
 696         (void) strlcpy(dss_path->path, path, len);
 697 
 698         /* associate with servinst */
 699         dss_path->sip = sip;
 700         dss_path->index = index;
 701 
 702         /*
 703          * Add to list of served paths.
 704          * No locking required, as we're only ever called at startup.
 705          */
 706         if (rfs4_dss_pathlist == NULL) {
 707                 /* this is the first dss_path_t */
 708 
 709                 /* needed for insque/remque */
 710                 dss_path->next = dss_path->prev = dss_path;
 711 
 712                 rfs4_dss_pathlist = dss_path;
 713         } else {
 714                 insque(dss_path, rfs4_dss_pathlist);
 715         }
 716 
 717         return (dss_path);
 718 }
 719 
 720 /*
 721  * Create a new server instance, and make it the currently active instance.
 722  * Note that starting the grace period too early will reduce the clients'
 723  * recovery window.
 724  */
 725 void
 726 rfs4_servinst_create(int start_grace, int dss_npaths, char **dss_paths)
 727 {
 728         unsigned i;
 729         rfs4_servinst_t *sip;
 730         rfs4_oldstate_t *oldstate;
 731 
 732         sip = kmem_alloc(sizeof (rfs4_servinst_t), KM_SLEEP);
 733         rw_init(&sip->rwlock, NULL, RW_DEFAULT, NULL);
 734 
 735         sip->start_time = (time_t)0;
 736         sip->grace_period = (time_t)0;
 737         sip->next = NULL;
 738         sip->prev = NULL;
 739 
 740         rw_init(&sip->oldstate_lock, NULL, RW_DEFAULT, NULL);
 741         /*
 742          * This initial dummy entry is required to setup for insque/remque.
 743          * It must be skipped over whenever the list is traversed.
 744          */
 745         oldstate = kmem_alloc(sizeof (rfs4_oldstate_t), KM_SLEEP);
 746         /* insque/remque require initial list entry to be self-terminated */
 747         oldstate->next = oldstate;
 748         oldstate->prev = oldstate;
 749         sip->oldstate = oldstate;
 750 
 751 
 752         sip->dss_npaths = dss_npaths;
 753         sip->dss_paths = kmem_alloc(dss_npaths *
 754             sizeof (rfs4_dss_path_t *), KM_SLEEP);
 755 
 756         for (i = 0; i < dss_npaths; i++) {
 757                 sip->dss_paths[i] = rfs4_dss_newpath(sip, dss_paths[i], i);
 758         }
 759 
 760         mutex_enter(&rfs4_servinst_lock);
 761         if (rfs4_cur_servinst != NULL) {
 762                 /* add to linked list */
 763                 sip->prev = rfs4_cur_servinst;
 764                 rfs4_cur_servinst->next = sip;
 765         }
 766         if (start_grace)
 767                 rfs4_grace_start(sip);
 768         /* make the new instance "current" */
 769         rfs4_cur_servinst = sip;
 770 
 771         mutex_exit(&rfs4_servinst_lock);
 772 }
 773 
 774 /*
 775  * In future, we might add a rfs4_servinst_destroy(sip) but, for now, destroy
 776  * all instances directly.
 777  */
 778 void
 779 rfs4_servinst_destroy_all(void)
 780 {
 781         rfs4_servinst_t *sip, *prev, *current;
 782 #ifdef DEBUG
 783         int n = 0;
 784 #endif
 785 
 786         mutex_enter(&rfs4_servinst_lock);
 787         ASSERT(rfs4_cur_servinst != NULL);
 788         current = rfs4_cur_servinst;
 789         rfs4_cur_servinst = NULL;
 790         for (sip = current; sip != NULL; sip = prev) {
 791                 prev = sip->prev;
 792                 rw_destroy(&sip->rwlock);
 793                 if (sip->oldstate)
 794                         kmem_free(sip->oldstate, sizeof (rfs4_oldstate_t));
 795                 if (sip->dss_paths)
 796                         kmem_free(sip->dss_paths,
 797                             sip->dss_npaths * sizeof (rfs4_dss_path_t *));
 798                 kmem_free(sip, sizeof (rfs4_servinst_t));
 799 #ifdef DEBUG
 800                 n++;
 801 #endif
 802         }
 803         mutex_exit(&rfs4_servinst_lock);
 804 }
 805 
 806 /*
 807  * Assign the current server instance to a client_t.
 808  * Should be called with cp->rc_dbe held.
 809  */
 810 void
 811 rfs4_servinst_assign(rfs4_client_t *cp, rfs4_servinst_t *sip)
 812 {
 813         ASSERT(rfs4_dbe_refcnt(cp->rc_dbe) > 0);
 814 
 815         /*
 816          * The lock ensures that if the current instance is in the process
 817          * of changing, we will see the new one.
 818          */
 819         mutex_enter(&rfs4_servinst_lock);
 820         cp->rc_server_instance = sip;
 821         mutex_exit(&rfs4_servinst_lock);
 822 }
 823 
 824 rfs4_servinst_t *
 825 rfs4_servinst(rfs4_client_t *cp)
 826 {
 827         ASSERT(rfs4_dbe_refcnt(cp->rc_dbe) > 0);
 828 
 829         return (cp->rc_server_instance);
 830 }
 831 
 832 /* ARGSUSED */
 833 static void
 834 nullfree(caddr_t resop)
 835 {
 836 }
 837 
 838 /*
 839  * This is a fall-through for invalid or not implemented (yet) ops
 840  */
 841 /* ARGSUSED */
 842 static void
 843 rfs4_op_inval(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
 844     struct compound_state *cs)
 845 {
 846         *cs->statusp = *((nfsstat4 *)&(resop)->nfs_resop4_u) = NFS4ERR_INVAL;
 847 }
 848 
 849 /*
 850  * Check if the security flavor, nfsnum, is in the flavor_list.
 851  */
 852 bool_t
 853 in_flavor_list(int nfsnum, int *flavor_list, int count)
 854 {
 855         int i;
 856 
 857         for (i = 0; i < count; i++) {
 858                 if (nfsnum == flavor_list[i])
 859                         return (TRUE);
 860         }
 861         return (FALSE);
 862 }
 863 
 864 /*
 865  * Used by rfs4_op_secinfo to get the security information from the
 866  * export structure associated with the component.
 867  */
 868 /* ARGSUSED */
 869 static nfsstat4
 870 do_rfs4_op_secinfo(struct compound_state *cs, char *nm, SECINFO4res *resp)
 871 {
 872         int error, different_export = 0;
 873         vnode_t *dvp, *vp;
 874         struct exportinfo *exi = NULL;
 875         fid_t fid;
 876         uint_t count, i;
 877         secinfo4 *resok_val;
 878         struct secinfo *secp;
 879         seconfig_t *si;
 880         bool_t did_traverse = FALSE;
 881         int dotdot, walk;
 882 
 883         dvp = cs->vp;
 884         dotdot = (nm[0] == '.' && nm[1] == '.' && nm[2] == '\0');
 885 
 886         /*
 887          * If dotdotting, then need to check whether it's above the
 888          * root of a filesystem, or above an export point.
 889          */
 890         if (dotdot) {
 891 
 892                 /*
 893                  * If dotdotting at the root of a filesystem, then
 894                  * need to traverse back to the mounted-on filesystem
 895                  * and do the dotdot lookup there.
 896                  */
 897                 if (cs->vp->v_flag & VROOT) {
 898 
 899                         /*
 900                          * If at the system root, then can
 901                          * go up no further.
 902                          */
 903                         if (VN_CMP(dvp, rootdir))
 904                                 return (puterrno4(ENOENT));
 905 
 906                         /*
 907                          * Traverse back to the mounted-on filesystem
 908                          */
 909                         dvp = untraverse(cs->vp);
 910 
 911                         /*
 912                          * Set the different_export flag so we remember
 913                          * to pick up a new exportinfo entry for
 914                          * this new filesystem.
 915                          */
 916                         different_export = 1;
 917                 } else {
 918 
 919                         /*
 920                          * If dotdotting above an export point then set
 921                          * the different_export to get new export info.
 922                          */
 923                         different_export = nfs_exported(cs->exi, cs->vp);
 924                 }
 925         }
 926 
 927         /*
 928          * Get the vnode for the component "nm".
 929          */
 930         error = VOP_LOOKUP(dvp, nm, &vp, NULL, 0, NULL, cs->cr,
 931             NULL, NULL, NULL);
 932         if (error)
 933                 return (puterrno4(error));
 934 
 935         /*
 936          * If the vnode is in a pseudo filesystem, or if the security flavor
 937          * used in the request is valid but not an explicitly shared flavor,
 938          * or the access bit indicates that this is a limited access,
 939          * check whether this vnode is visible.
 940          */
 941         if (!different_export &&
 942             (PSEUDO(cs->exi) || ! is_exported_sec(cs->nfsflavor, cs->exi) ||
 943             cs->access & CS_ACCESS_LIMITED)) {
 944                 if (! nfs_visible(cs->exi, vp, &different_export)) {
 945                         VN_RELE(vp);
 946                         return (puterrno4(ENOENT));
 947                 }
 948         }
 949 
 950         /*
 951          * If it's a mountpoint, then traverse it.
 952          */
 953         if (vn_ismntpt(vp)) {
 954                 if ((error = traverse(&vp)) != 0) {
 955                         VN_RELE(vp);
 956                         return (puterrno4(error));
 957                 }
 958                 /* remember that we had to traverse mountpoint */
 959                 did_traverse = TRUE;
 960                 different_export = 1;
 961         } else if (vp->v_vfsp != dvp->v_vfsp) {
 962                 /*
 963                  * If vp isn't a mountpoint and the vfs ptrs aren't the same,
 964                  * then vp is probably an LOFS object.  We don't need the
 965                  * realvp, we just need to know that we might have crossed
 966                  * a server fs boundary and need to call checkexport4.
 967                  * (LOFS lookup hides server fs mountpoints, and actually calls
 968                  * traverse)
 969                  */
 970                 different_export = 1;
 971         }
 972 
 973         /*
 974          * Get the export information for it.
 975          */
 976         if (different_export) {
 977 
 978                 bzero(&fid, sizeof (fid));
 979                 fid.fid_len = MAXFIDSZ;
 980                 error = vop_fid_pseudo(vp, &fid);
 981                 if (error) {
 982                         VN_RELE(vp);
 983                         return (puterrno4(error));
 984                 }
 985 
 986                 if (dotdot)
 987                         exi = nfs_vptoexi(NULL, vp, cs->cr, &walk, NULL, TRUE);
 988                 else
 989                         exi = checkexport4(&vp->v_vfsp->vfs_fsid, &fid, vp);
 990 
 991                 if (exi == NULL) {
 992                         if (did_traverse == TRUE) {
 993                                 /*
 994                                  * If this vnode is a mounted-on vnode,
 995                                  * but the mounted-on file system is not
 996                                  * exported, send back the secinfo for
 997                                  * the exported node that the mounted-on
 998                                  * vnode lives in.
 999                                  */
1000                                 exi = cs->exi;
1001                         } else {
1002                                 VN_RELE(vp);
1003                                 return (puterrno4(EACCES));
1004                         }
1005                 }
1006         } else {
1007                 exi = cs->exi;
1008         }
1009         ASSERT(exi != NULL);
1010 
1011 
1012         /*
1013          * Create the secinfo result based on the security information
1014          * from the exportinfo structure (exi).
1015          *
1016          * Return all flavors for a pseudo node.
1017          * For a real export node, return the flavor that the client
1018          * has access with.
1019          */
1020         ASSERT(RW_LOCK_HELD(&exported_lock));
1021         if (PSEUDO(exi)) {
1022                 count = exi->exi_export.ex_seccnt; /* total sec count */
1023                 resok_val = kmem_alloc(count * sizeof (secinfo4), KM_SLEEP);
1024                 secp = exi->exi_export.ex_secinfo;
1025 
1026                 for (i = 0; i < count; i++) {
1027                         si = &secp[i].s_secinfo;
1028                         resok_val[i].flavor = si->sc_rpcnum;
1029                         if (resok_val[i].flavor == RPCSEC_GSS) {
1030                                 rpcsec_gss_info *info;
1031 
1032                                 info = &resok_val[i].flavor_info;
1033                                 info->qop = si->sc_qop;
1034                                 info->service = (rpc_gss_svc_t)si->sc_service;
1035 
1036                                 /* get oid opaque data */
1037                                 info->oid.sec_oid4_len =
1038                                     si->sc_gss_mech_type->length;
1039                                 info->oid.sec_oid4_val = kmem_alloc(
1040                                     si->sc_gss_mech_type->length, KM_SLEEP);
1041                                 bcopy(
1042                                     si->sc_gss_mech_type->elements,
1043                                     info->oid.sec_oid4_val,
1044                                     info->oid.sec_oid4_len);
1045                         }
1046                 }
1047                 resp->SECINFO4resok_len = count;
1048                 resp->SECINFO4resok_val = resok_val;
1049         } else {
1050                 int ret_cnt = 0, k = 0;
1051                 int *flavor_list;
1052 
1053                 count = exi->exi_export.ex_seccnt; /* total sec count */
1054                 secp = exi->exi_export.ex_secinfo;
1055 
1056                 flavor_list = kmem_alloc(count * sizeof (int), KM_SLEEP);
1057                 /* find out which flavors to return */
1058                 for (i = 0; i < count; i ++) {
1059                         int access, flavor, perm;
1060 
1061                         flavor = secp[i].s_secinfo.sc_nfsnum;
1062                         perm = secp[i].s_flags;
1063 
1064                         access = nfsauth4_secinfo_access(exi, cs->req,
1065                             flavor, perm, cs->basecr);
1066 
1067                         if (! (access & NFSAUTH_DENIED) &&
1068                             ! (access & NFSAUTH_WRONGSEC)) {
1069                                 flavor_list[ret_cnt] = flavor;
1070                                 ret_cnt++;
1071                         }
1072                 }
1073 
1074                 /* Create the returning SECINFO value */
1075                 resok_val = kmem_alloc(ret_cnt * sizeof (secinfo4), KM_SLEEP);
1076 
1077                 for (i = 0; i < count; i++) {
1078                         /*
1079                          * If the flavor is in the flavor list,
1080                          * fill in resok_val.
1081                          */
1082                         si = &secp[i].s_secinfo;
1083                         if (in_flavor_list(si->sc_nfsnum,
1084                             flavor_list, ret_cnt)) {
1085                                 resok_val[k].flavor = si->sc_rpcnum;
1086                                 if (resok_val[k].flavor == RPCSEC_GSS) {
1087                                         rpcsec_gss_info *info;
1088 
1089                                         info = &resok_val[k].flavor_info;
1090                                         info->qop = si->sc_qop;
1091                                         info->service = (rpc_gss_svc_t)
1092                                             si->sc_service;
1093 
1094                                         /* get oid opaque data */
1095                                         info->oid.sec_oid4_len =
1096                                             si->sc_gss_mech_type->length;
1097                                         info->oid.sec_oid4_val = kmem_alloc(
1098                                             si->sc_gss_mech_type->length,
1099                                             KM_SLEEP);
1100                                         bcopy(si->sc_gss_mech_type->elements,
1101                                             info->oid.sec_oid4_val,
1102                                             info->oid.sec_oid4_len);
1103                                 }
1104                                 k++;
1105                         }
1106                         if (k >= ret_cnt)
1107                                 break;
1108                 }
1109                 resp->SECINFO4resok_len = ret_cnt;
1110                 resp->SECINFO4resok_val = resok_val;
1111                 kmem_free(flavor_list, count * sizeof (int));
1112         }
1113 
1114         VN_RELE(vp);
1115         return (NFS4_OK);
1116 }
1117 
1118 /*
1119  * SECINFO (Operation 33): Obtain required security information on
1120  * the component name in the format of (security-mechanism-oid, qop, service)
1121  * triplets.
1122  */
1123 /* ARGSUSED */
1124 static void
1125 rfs4_op_secinfo(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1126     struct compound_state *cs)
1127 {
1128         SECINFO4args *args = &argop->nfs_argop4_u.opsecinfo;
1129         SECINFO4res *resp = &resop->nfs_resop4_u.opsecinfo;
1130         utf8string *utfnm = &args->name;
1131         uint_t len;
1132         char *nm;
1133         struct sockaddr *ca;
1134         char *name = NULL;
1135         nfsstat4 status = NFS4_OK;
1136 
1137         DTRACE_NFSV4_2(op__secinfo__start, struct compound_state *, cs,
1138             SECINFO4args *, args);
1139 
1140         /*
1141          * Current file handle (cfh) should have been set before getting
1142          * into this function. If not, return error.
1143          */
1144         if (cs->vp == NULL) {
1145                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
1146                 goto out;
1147         }
1148 
1149         if (cs->vp->v_type != VDIR) {
1150                 *cs->statusp = resp->status = NFS4ERR_NOTDIR;
1151                 goto out;
1152         }
1153 
1154         /*
1155          * Verify the component name. If failed, error out, but
1156          * do not error out if the component name is a "..".
1157          * SECINFO will return its parents secinfo data for SECINFO "..".
1158          */
1159         status = utf8_dir_verify(utfnm);
1160         if (status != NFS4_OK) {
1161                 if (utfnm->utf8string_len != 2 ||
1162                     utfnm->utf8string_val[0] != '.' ||
1163                     utfnm->utf8string_val[1] != '.') {
1164                         *cs->statusp = resp->status = status;
1165                         goto out;
1166                 }
1167         }
1168 
1169         nm = utf8_to_str(utfnm, &len, NULL);
1170         if (nm == NULL) {
1171                 *cs->statusp = resp->status = NFS4ERR_INVAL;
1172                 goto out;
1173         }
1174 
1175         if (len > MAXNAMELEN) {
1176                 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
1177                 kmem_free(nm, len);
1178                 goto out;
1179         }
1180 
1181         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
1182         name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
1183             MAXPATHLEN  + 1);
1184 
1185         if (name == NULL) {
1186                 *cs->statusp = resp->status = NFS4ERR_INVAL;
1187                 kmem_free(nm, len);
1188                 goto out;
1189         }
1190 
1191 
1192         *cs->statusp = resp->status = do_rfs4_op_secinfo(cs, name, resp);
1193 
1194         if (name != nm)
1195                 kmem_free(name, MAXPATHLEN + 1);
1196         kmem_free(nm, len);
1197 
1198 out:
1199         DTRACE_NFSV4_2(op__secinfo__done, struct compound_state *, cs,
1200             SECINFO4res *, resp);
1201 }
1202 
1203 /*
1204  * Free SECINFO result.
1205  */
1206 /* ARGSUSED */
1207 static void
1208 rfs4_op_secinfo_free(nfs_resop4 *resop)
1209 {
1210         SECINFO4res *resp = &resop->nfs_resop4_u.opsecinfo;
1211         int count, i;
1212         secinfo4 *resok_val;
1213 
1214         /* If this is not an Ok result, nothing to free. */
1215         if (resp->status != NFS4_OK) {
1216                 return;
1217         }
1218 
1219         count = resp->SECINFO4resok_len;
1220         resok_val = resp->SECINFO4resok_val;
1221 
1222         for (i = 0; i < count; i++) {
1223                 if (resok_val[i].flavor == RPCSEC_GSS) {
1224                         rpcsec_gss_info *info;
1225 
1226                         info = &resok_val[i].flavor_info;
1227                         kmem_free(info->oid.sec_oid4_val,
1228                             info->oid.sec_oid4_len);
1229                 }
1230         }
1231         kmem_free(resok_val, count * sizeof (secinfo4));
1232         resp->SECINFO4resok_len = 0;
1233         resp->SECINFO4resok_val = NULL;
1234 }
1235 
1236 /* ARGSUSED */
1237 static void
1238 rfs4_op_access(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1239     struct compound_state *cs)
1240 {
1241         ACCESS4args *args = &argop->nfs_argop4_u.opaccess;
1242         ACCESS4res *resp = &resop->nfs_resop4_u.opaccess;
1243         int error;
1244         vnode_t *vp;
1245         struct vattr va;
1246         int checkwriteperm;
1247         cred_t *cr = cs->cr;
1248         bslabel_t *clabel, *slabel;
1249         ts_label_t *tslabel;
1250         boolean_t admin_low_client;
1251 
1252         DTRACE_NFSV4_2(op__access__start, struct compound_state *, cs,
1253             ACCESS4args *, args);
1254 
1255 #if 0   /* XXX allow access even if !cs->access. Eventually only pseudo fs */
1256         if (cs->access == CS_ACCESS_DENIED) {
1257                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
1258                 goto out;
1259         }
1260 #endif
1261         if (cs->vp == NULL) {
1262                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
1263                 goto out;
1264         }
1265 
1266         ASSERT(cr != NULL);
1267 
1268         vp = cs->vp;
1269 
1270         /*
1271          * If the file system is exported read only, it is not appropriate
1272          * to check write permissions for regular files and directories.
1273          * Special files are interpreted by the client, so the underlying
1274          * permissions are sent back to the client for interpretation.
1275          */
1276         if (rdonly4(req, cs) &&
1277             (vp->v_type == VREG || vp->v_type == VDIR))
1278                 checkwriteperm = 0;
1279         else
1280                 checkwriteperm = 1;
1281 
1282         /*
1283          * XXX
1284          * We need the mode so that we can correctly determine access
1285          * permissions relative to a mandatory lock file.  Access to
1286          * mandatory lock files is denied on the server, so it might
1287          * as well be reflected to the server during the open.
1288          */
1289         va.va_mask = AT_MODE;
1290         error = VOP_GETATTR(vp, &va, 0, cr, NULL);
1291         if (error) {
1292                 *cs->statusp = resp->status = puterrno4(error);
1293                 goto out;
1294         }
1295         resp->access = 0;
1296         resp->supported = 0;
1297 
1298         if (is_system_labeled()) {
1299                 ASSERT(req->rq_label != NULL);
1300                 clabel = req->rq_label;
1301                 DTRACE_PROBE2(tx__rfs4__log__info__opaccess__clabel, char *,
1302                     "got client label from request(1)",
1303                     struct svc_req *, req);
1304                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
1305                         if ((tslabel = nfs_getflabel(vp, cs->exi)) == NULL) {
1306                                 *cs->statusp = resp->status = puterrno4(EACCES);
1307                                 goto out;
1308                         }
1309                         slabel = label2bslabel(tslabel);
1310                         DTRACE_PROBE3(tx__rfs4__log__info__opaccess__slabel,
1311                             char *, "got server label(1) for vp(2)",
1312                             bslabel_t *, slabel, vnode_t *, vp);
1313 
1314                         admin_low_client = B_FALSE;
1315                 } else
1316                         admin_low_client = B_TRUE;
1317         }
1318 
1319         if (args->access & ACCESS4_READ) {
1320                 error = VOP_ACCESS(vp, VREAD, 0, cr, NULL);
1321                 if (!error && !MANDLOCK(vp, va.va_mode) &&
1322                     (!is_system_labeled() || admin_low_client ||
1323                     bldominates(clabel, slabel)))
1324                         resp->access |= ACCESS4_READ;
1325                 resp->supported |= ACCESS4_READ;
1326         }
1327         if ((args->access & ACCESS4_LOOKUP) && vp->v_type == VDIR) {
1328                 error = VOP_ACCESS(vp, VEXEC, 0, cr, NULL);
1329                 if (!error && (!is_system_labeled() || admin_low_client ||
1330                     bldominates(clabel, slabel)))
1331                         resp->access |= ACCESS4_LOOKUP;
1332                 resp->supported |= ACCESS4_LOOKUP;
1333         }
1334         if (checkwriteperm &&
1335             (args->access & (ACCESS4_MODIFY|ACCESS4_EXTEND))) {
1336                 error = VOP_ACCESS(vp, VWRITE, 0, cr, NULL);
1337                 if (!error && !MANDLOCK(vp, va.va_mode) &&
1338                     (!is_system_labeled() || admin_low_client ||
1339                     blequal(clabel, slabel)))
1340                         resp->access |=
1341                             (args->access & (ACCESS4_MODIFY | ACCESS4_EXTEND));
1342                 resp->supported |=
1343                     resp->access & (ACCESS4_MODIFY | ACCESS4_EXTEND);
1344         }
1345 
1346         if (checkwriteperm &&
1347             (args->access & ACCESS4_DELETE) && vp->v_type == VDIR) {
1348                 error = VOP_ACCESS(vp, VWRITE, 0, cr, NULL);
1349                 if (!error && (!is_system_labeled() || admin_low_client ||
1350                     blequal(clabel, slabel)))
1351                         resp->access |= ACCESS4_DELETE;
1352                 resp->supported |= ACCESS4_DELETE;
1353         }
1354         if (args->access & ACCESS4_EXECUTE && vp->v_type != VDIR) {
1355                 error = VOP_ACCESS(vp, VEXEC, 0, cr, NULL);
1356                 if (!error && !MANDLOCK(vp, va.va_mode) &&
1357                     (!is_system_labeled() || admin_low_client ||
1358                     bldominates(clabel, slabel)))
1359                         resp->access |= ACCESS4_EXECUTE;
1360                 resp->supported |= ACCESS4_EXECUTE;
1361         }
1362 
1363         if (is_system_labeled() && !admin_low_client)
1364                 label_rele(tslabel);
1365 
1366         *cs->statusp = resp->status = NFS4_OK;
1367 out:
1368         DTRACE_NFSV4_2(op__access__done, struct compound_state *, cs,
1369             ACCESS4res *, resp);
1370 }
1371 
1372 /* ARGSUSED */
1373 static void
1374 rfs4_op_commit(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1375     struct compound_state *cs)
1376 {
1377         COMMIT4args *args = &argop->nfs_argop4_u.opcommit;
1378         COMMIT4res *resp = &resop->nfs_resop4_u.opcommit;
1379         int error;
1380         vnode_t *vp = cs->vp;
1381         cred_t *cr = cs->cr;
1382         vattr_t va;
1383 
1384         DTRACE_NFSV4_2(op__commit__start, struct compound_state *, cs,
1385             COMMIT4args *, args);
1386 
1387         if (vp == NULL) {
1388                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
1389                 goto out;
1390         }
1391         if (cs->access == CS_ACCESS_DENIED) {
1392                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
1393                 goto out;
1394         }
1395 
1396         if (args->offset + args->count < args->offset) {
1397                 *cs->statusp = resp->status = NFS4ERR_INVAL;
1398                 goto out;
1399         }
1400 
1401         va.va_mask = AT_UID;
1402         error = VOP_GETATTR(vp, &va, 0, cr, NULL);
1403 
1404         /*
1405          * If we can't get the attributes, then we can't do the
1406          * right access checking.  So, we'll fail the request.
1407          */
1408         if (error) {
1409                 *cs->statusp = resp->status = puterrno4(error);
1410                 goto out;
1411         }
1412         if (rdonly4(req, cs)) {
1413                 *cs->statusp = resp->status = NFS4ERR_ROFS;
1414                 goto out;
1415         }
1416 
1417         if (vp->v_type != VREG) {
1418                 if (vp->v_type == VDIR)
1419                         resp->status = NFS4ERR_ISDIR;
1420                 else
1421                         resp->status = NFS4ERR_INVAL;
1422                 *cs->statusp = resp->status;
1423                 goto out;
1424         }
1425 
1426         if (crgetuid(cr) != va.va_uid &&
1427             (error = VOP_ACCESS(vp, VWRITE, 0, cs->cr, NULL))) {
1428                 *cs->statusp = resp->status = puterrno4(error);
1429                 goto out;
1430         }
1431 
1432         error = VOP_FSYNC(vp, FSYNC, cr, NULL);
1433 
1434         if (error) {
1435                 *cs->statusp = resp->status = puterrno4(error);
1436                 goto out;
1437         }
1438 
1439         *cs->statusp = resp->status = NFS4_OK;
1440         resp->writeverf = Write4verf;
1441 out:
1442         DTRACE_NFSV4_2(op__commit__done, struct compound_state *, cs,
1443             COMMIT4res *, resp);
1444 }
1445 
1446 /*
1447  * rfs4_op_mknod is called from rfs4_op_create after all initial verification
1448  * was completed. It does the nfsv4 create for special files.
1449  */
1450 /* ARGSUSED */
1451 static vnode_t *
1452 do_rfs4_op_mknod(CREATE4args *args, CREATE4res *resp, struct svc_req *req,
1453     struct compound_state *cs, vattr_t *vap, char *nm)
1454 {
1455         int error;
1456         cred_t *cr = cs->cr;
1457         vnode_t *dvp = cs->vp;
1458         vnode_t *vp = NULL;
1459         int mode;
1460         enum vcexcl excl;
1461 
1462         switch (args->type) {
1463         case NF4CHR:
1464         case NF4BLK:
1465                 if (secpolicy_sys_devices(cr) != 0) {
1466                         *cs->statusp = resp->status = NFS4ERR_PERM;
1467                         return (NULL);
1468                 }
1469                 if (args->type == NF4CHR)
1470                         vap->va_type = VCHR;
1471                 else
1472                         vap->va_type = VBLK;
1473                 vap->va_rdev = makedevice(args->ftype4_u.devdata.specdata1,
1474                     args->ftype4_u.devdata.specdata2);
1475                 vap->va_mask |= AT_RDEV;
1476                 break;
1477         case NF4SOCK:
1478                 vap->va_type = VSOCK;
1479                 break;
1480         case NF4FIFO:
1481                 vap->va_type = VFIFO;
1482                 break;
1483         default:
1484                 *cs->statusp = resp->status = NFS4ERR_BADTYPE;
1485                 return (NULL);
1486         }
1487 
1488         /*
1489          * Must specify the mode.
1490          */
1491         if (!(vap->va_mask & AT_MODE)) {
1492                 *cs->statusp = resp->status = NFS4ERR_INVAL;
1493                 return (NULL);
1494         }
1495 
1496         excl = EXCL;
1497 
1498         mode = 0;
1499 
1500         error = VOP_CREATE(dvp, nm, vap, excl, mode, &vp, cr, 0, NULL, NULL);
1501         if (error) {
1502                 *cs->statusp = resp->status = puterrno4(error);
1503                 return (NULL);
1504         }
1505         return (vp);
1506 }
1507 
1508 /*
1509  * nfsv4 create is used to create non-regular files. For regular files,
1510  * use nfsv4 open.
1511  */
1512 /* ARGSUSED */
1513 static void
1514 rfs4_op_create(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1515     struct compound_state *cs)
1516 {
1517         CREATE4args *args = &argop->nfs_argop4_u.opcreate;
1518         CREATE4res *resp = &resop->nfs_resop4_u.opcreate;
1519         int error;
1520         struct vattr bva, iva, iva2, ava, *vap;
1521         cred_t *cr = cs->cr;
1522         vnode_t *dvp = cs->vp;
1523         vnode_t *vp = NULL;
1524         vnode_t *realvp;
1525         char *nm, *lnm;
1526         uint_t len, llen;
1527         int syncval = 0;
1528         struct nfs4_svgetit_arg sarg;
1529         struct nfs4_ntov_table ntov;
1530         struct statvfs64 sb;
1531         nfsstat4 status;
1532         struct sockaddr *ca;
1533         char *name = NULL;
1534         char *lname = NULL;
1535 
1536         DTRACE_NFSV4_2(op__create__start, struct compound_state *, cs,
1537             CREATE4args *, args);
1538 
1539         resp->attrset = 0;
1540 
1541         if (dvp == NULL) {
1542                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
1543                 goto out;
1544         }
1545 
1546         /*
1547          * If there is an unshared filesystem mounted on this vnode,
1548          * do not allow to create an object in this directory.
1549          */
1550         if (vn_ismntpt(dvp)) {
1551                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
1552                 goto out;
1553         }
1554 
1555         /* Verify that type is correct */
1556         switch (args->type) {
1557         case NF4LNK:
1558         case NF4BLK:
1559         case NF4CHR:
1560         case NF4SOCK:
1561         case NF4FIFO:
1562         case NF4DIR:
1563                 break;
1564         default:
1565                 *cs->statusp = resp->status = NFS4ERR_BADTYPE;
1566                 goto out;
1567         };
1568 
1569         if (cs->access == CS_ACCESS_DENIED) {
1570                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
1571                 goto out;
1572         }
1573         if (dvp->v_type != VDIR) {
1574                 *cs->statusp = resp->status = NFS4ERR_NOTDIR;
1575                 goto out;
1576         }
1577         status = utf8_dir_verify(&args->objname);
1578         if (status != NFS4_OK) {
1579                 *cs->statusp = resp->status = status;
1580                 goto out;
1581         }
1582 
1583         if (rdonly4(req, cs)) {
1584                 *cs->statusp = resp->status = NFS4ERR_ROFS;
1585                 goto out;
1586         }
1587 
1588         /*
1589          * Name of newly created object
1590          */
1591         nm = utf8_to_fn(&args->objname, &len, NULL);
1592         if (nm == NULL) {
1593                 *cs->statusp = resp->status = NFS4ERR_INVAL;
1594                 goto out;
1595         }
1596 
1597         if (len > MAXNAMELEN) {
1598                 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
1599                 kmem_free(nm, len);
1600                 goto out;
1601         }
1602 
1603         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
1604         name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
1605             MAXPATHLEN  + 1);
1606 
1607         if (name == NULL) {
1608                 *cs->statusp = resp->status = NFS4ERR_INVAL;
1609                 kmem_free(nm, len);
1610                 goto out;
1611         }
1612 
1613         resp->attrset = 0;
1614 
1615         sarg.sbp = &sb;
1616         sarg.is_referral = B_FALSE;
1617         nfs4_ntov_table_init(&ntov);
1618 
1619         status = do_rfs4_set_attrs(&resp->attrset,
1620             &args->createattrs, cs, &sarg, &ntov, NFS4ATTR_SETIT);
1621 
1622         if (sarg.vap->va_mask == 0 && status == NFS4_OK)
1623                 status = NFS4ERR_INVAL;
1624 
1625         if (status != NFS4_OK) {
1626                 *cs->statusp = resp->status = status;
1627                 if (name != nm)
1628                         kmem_free(name, MAXPATHLEN + 1);
1629                 kmem_free(nm, len);
1630                 nfs4_ntov_table_free(&ntov, &sarg);
1631                 resp->attrset = 0;
1632                 goto out;
1633         }
1634 
1635         /* Get "before" change value */
1636         bva.va_mask = AT_CTIME|AT_SEQ|AT_MODE;
1637         error = VOP_GETATTR(dvp, &bva, 0, cr, NULL);
1638         if (error) {
1639                 *cs->statusp = resp->status = puterrno4(error);
1640                 if (name != nm)
1641                         kmem_free(name, MAXPATHLEN + 1);
1642                 kmem_free(nm, len);
1643                 nfs4_ntov_table_free(&ntov, &sarg);
1644                 resp->attrset = 0;
1645                 goto out;
1646         }
1647         NFS4_SET_FATTR4_CHANGE(resp->cinfo.before, bva.va_ctime)
1648 
1649         vap = sarg.vap;
1650 
1651         /*
1652          * Set the default initial values for attributes when the parent
1653          * directory does not have the VSUID/VSGID bit set and they have
1654          * not been specified in createattrs.
1655          */
1656         if (!(bva.va_mode & VSUID) && (vap->va_mask & AT_UID) == 0) {
1657                 vap->va_uid = crgetuid(cr);
1658                 vap->va_mask |= AT_UID;
1659         }
1660         if (!(bva.va_mode & VSGID) && (vap->va_mask & AT_GID) == 0) {
1661                 vap->va_gid = crgetgid(cr);
1662                 vap->va_mask |= AT_GID;
1663         }
1664 
1665         vap->va_mask |= AT_TYPE;
1666         switch (args->type) {
1667         case NF4DIR:
1668                 vap->va_type = VDIR;
1669                 if ((vap->va_mask & AT_MODE) == 0) {
1670                         vap->va_mode = 0700; /* default: owner rwx only */
1671                         vap->va_mask |= AT_MODE;
1672                 }
1673                 error = VOP_MKDIR(dvp, name, vap, &vp, cr, NULL, 0, NULL);
1674                 if (error)
1675                         break;
1676 
1677                 /*
1678                  * Get the initial "after" sequence number, if it fails,
1679                  * set to zero
1680                  */
1681                 iva.va_mask = AT_SEQ;
1682                 if (VOP_GETATTR(dvp, &iva, 0, cs->cr, NULL))
1683                         iva.va_seq = 0;
1684                 break;
1685         case NF4LNK:
1686                 vap->va_type = VLNK;
1687                 if ((vap->va_mask & AT_MODE) == 0) {
1688                         vap->va_mode = 0700; /* default: owner rwx only */
1689                         vap->va_mask |= AT_MODE;
1690                 }
1691 
1692                 /*
1693                  * symlink names must be treated as data
1694                  */
1695                 lnm = utf8_to_str((utf8string *)&args->ftype4_u.linkdata,
1696                     &llen, NULL);
1697 
1698                 if (lnm == NULL) {
1699                         *cs->statusp = resp->status = NFS4ERR_INVAL;
1700                         if (name != nm)
1701                                 kmem_free(name, MAXPATHLEN + 1);
1702                         kmem_free(nm, len);
1703                         nfs4_ntov_table_free(&ntov, &sarg);
1704                         resp->attrset = 0;
1705                         goto out;
1706                 }
1707 
1708                 if (llen > MAXPATHLEN) {
1709                         *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
1710                         if (name != nm)
1711                                 kmem_free(name, MAXPATHLEN + 1);
1712                         kmem_free(nm, len);
1713                         kmem_free(lnm, llen);
1714                         nfs4_ntov_table_free(&ntov, &sarg);
1715                         resp->attrset = 0;
1716                         goto out;
1717                 }
1718 
1719                 lname = nfscmd_convname(ca, cs->exi, lnm,
1720                     NFSCMD_CONV_INBOUND, MAXPATHLEN  + 1);
1721 
1722                 if (lname == NULL) {
1723                         *cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
1724                         if (name != nm)
1725                                 kmem_free(name, MAXPATHLEN + 1);
1726                         kmem_free(nm, len);
1727                         kmem_free(lnm, llen);
1728                         nfs4_ntov_table_free(&ntov, &sarg);
1729                         resp->attrset = 0;
1730                         goto out;
1731                 }
1732 
1733                 error = VOP_SYMLINK(dvp, name, vap, lname, cr, NULL, 0);
1734                 if (lname != lnm)
1735                         kmem_free(lname, MAXPATHLEN + 1);
1736                 kmem_free(lnm, llen);
1737                 if (error)
1738                         break;
1739 
1740                 /*
1741                  * Get the initial "after" sequence number, if it fails,
1742                  * set to zero
1743                  */
1744                 iva.va_mask = AT_SEQ;
1745                 if (VOP_GETATTR(dvp, &iva, 0, cs->cr, NULL))
1746                         iva.va_seq = 0;
1747 
1748                 error = VOP_LOOKUP(dvp, name, &vp, NULL, 0, NULL, cr,
1749                     NULL, NULL, NULL);
1750                 if (error)
1751                         break;
1752 
1753                 /*
1754                  * va_seq is not safe over VOP calls, check it again
1755                  * if it has changed zero out iva to force atomic = FALSE.
1756                  */
1757                 iva2.va_mask = AT_SEQ;
1758                 if (VOP_GETATTR(dvp, &iva2, 0, cs->cr, NULL) ||
1759                     iva2.va_seq != iva.va_seq)
1760                         iva.va_seq = 0;
1761                 break;
1762         default:
1763                 /*
1764                  * probably a special file.
1765                  */
1766                 if ((vap->va_mask & AT_MODE) == 0) {
1767                         vap->va_mode = 0600; /* default: owner rw only */
1768                         vap->va_mask |= AT_MODE;
1769                 }
1770                 syncval = FNODSYNC;
1771                 /*
1772                  * We know this will only generate one VOP call
1773                  */
1774                 vp = do_rfs4_op_mknod(args, resp, req, cs, vap, name);
1775 
1776                 if (vp == NULL) {
1777                         if (name != nm)
1778                                 kmem_free(name, MAXPATHLEN + 1);
1779                         kmem_free(nm, len);
1780                         nfs4_ntov_table_free(&ntov, &sarg);
1781                         resp->attrset = 0;
1782                         goto out;
1783                 }
1784 
1785                 /*
1786                  * Get the initial "after" sequence number, if it fails,
1787                  * set to zero
1788                  */
1789                 iva.va_mask = AT_SEQ;
1790                 if (VOP_GETATTR(dvp, &iva, 0, cs->cr, NULL))
1791                         iva.va_seq = 0;
1792 
1793                 break;
1794         }
1795         if (name != nm)
1796                 kmem_free(name, MAXPATHLEN + 1);
1797         kmem_free(nm, len);
1798 
1799         if (error) {
1800                 *cs->statusp = resp->status = puterrno4(error);
1801         }
1802 
1803         /*
1804          * Force modified data and metadata out to stable storage.
1805          */
1806         (void) VOP_FSYNC(dvp, 0, cr, NULL);
1807 
1808         if (resp->status != NFS4_OK) {
1809                 if (vp != NULL)
1810                         VN_RELE(vp);
1811                 nfs4_ntov_table_free(&ntov, &sarg);
1812                 resp->attrset = 0;
1813                 goto out;
1814         }
1815 
1816         /*
1817          * Finish setup of cinfo response, "before" value already set.
1818          * Get "after" change value, if it fails, simply return the
1819          * before value.
1820          */
1821         ava.va_mask = AT_CTIME|AT_SEQ;
1822         if (VOP_GETATTR(dvp, &ava, 0, cr, NULL)) {
1823                 ava.va_ctime = bva.va_ctime;
1824                 ava.va_seq = 0;
1825         }
1826         NFS4_SET_FATTR4_CHANGE(resp->cinfo.after, ava.va_ctime);
1827 
1828         /*
1829          * True verification that object was created with correct
1830          * attrs is impossible.  The attrs could have been changed
1831          * immediately after object creation.  If attributes did
1832          * not verify, the only recourse for the server is to
1833          * destroy the object.  Maybe if some attrs (like gid)
1834          * are set incorrectly, the object should be destroyed;
1835          * however, seems bad as a default policy.  Do we really
1836          * want to destroy an object over one of the times not
1837          * verifying correctly?  For these reasons, the server
1838          * currently sets bits in attrset for createattrs
1839          * that were set; however, no verification is done.
1840          *
1841          * vmask_to_nmask accounts for vattr bits set on create
1842          *      [do_rfs4_set_attrs() only sets resp bits for
1843          *       non-vattr/vfs bits.]
1844          * Mask off any bits set by default so as not to return
1845          * more attrset bits than were requested in createattrs
1846          */
1847         nfs4_vmask_to_nmask(sarg.vap->va_mask, &resp->attrset);
1848         resp->attrset &= args->createattrs.attrmask;
1849         nfs4_ntov_table_free(&ntov, &sarg);
1850 
1851         error = makefh4(&cs->fh, vp, cs->exi);
1852         if (error) {
1853                 *cs->statusp = resp->status = puterrno4(error);
1854         }
1855 
1856         /*
1857          * The cinfo.atomic = TRUE only if we got no errors, we have
1858          * non-zero va_seq's, and it has incremented by exactly one
1859          * during the creation and it didn't change during the VOP_LOOKUP
1860          * or VOP_FSYNC.
1861          */
1862         if (!error && bva.va_seq && iva.va_seq && ava.va_seq &&
1863             iva.va_seq == (bva.va_seq + 1) && iva.va_seq == ava.va_seq)
1864                 resp->cinfo.atomic = TRUE;
1865         else
1866                 resp->cinfo.atomic = FALSE;
1867 
1868         /*
1869          * Force modified metadata out to stable storage.
1870          *
1871          * if a underlying vp exists, pass it to VOP_FSYNC
1872          */
1873         if (VOP_REALVP(vp, &realvp, NULL) == 0)
1874                 (void) VOP_FSYNC(realvp, syncval, cr, NULL);
1875         else
1876                 (void) VOP_FSYNC(vp, syncval, cr, NULL);
1877 
1878         if (resp->status != NFS4_OK) {
1879                 VN_RELE(vp);
1880                 goto out;
1881         }
1882         if (cs->vp)
1883                 VN_RELE(cs->vp);
1884 
1885         cs->vp = vp;
1886         *cs->statusp = resp->status = NFS4_OK;
1887 out:
1888         DTRACE_NFSV4_2(op__create__done, struct compound_state *, cs,
1889             CREATE4res *, resp);
1890 }
1891 
1892 /*ARGSUSED*/
1893 static void
1894 rfs4_op_delegpurge(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1895     struct compound_state *cs)
1896 {
1897         DTRACE_NFSV4_2(op__delegpurge__start, struct compound_state *, cs,
1898             DELEGPURGE4args *, &argop->nfs_argop4_u.opdelegpurge);
1899 
1900         rfs4_op_inval(argop, resop, req, cs);
1901 
1902         DTRACE_NFSV4_2(op__delegpurge__done, struct compound_state *, cs,
1903             DELEGPURGE4res *, &resop->nfs_resop4_u.opdelegpurge);
1904 }
1905 
1906 /*ARGSUSED*/
1907 static void
1908 rfs4_op_delegreturn(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1909     struct compound_state *cs)
1910 {
1911         DELEGRETURN4args *args = &argop->nfs_argop4_u.opdelegreturn;
1912         DELEGRETURN4res *resp = &resop->nfs_resop4_u.opdelegreturn;
1913         rfs4_deleg_state_t *dsp;
1914         nfsstat4 status;
1915 
1916         DTRACE_NFSV4_2(op__delegreturn__start, struct compound_state *, cs,
1917             DELEGRETURN4args *, args);
1918 
1919         status = rfs4_get_deleg_state(&args->deleg_stateid, &dsp);
1920         resp->status = *cs->statusp = status;
1921         if (status != NFS4_OK)
1922                 goto out;
1923 
1924         /* Ensure specified filehandle matches */
1925         if (cs->vp != dsp->rds_finfo->rf_vp) {
1926                 resp->status = *cs->statusp = NFS4ERR_BAD_STATEID;
1927         } else
1928                 rfs4_return_deleg(dsp, FALSE);
1929 
1930         rfs4_update_lease(dsp->rds_client);
1931 
1932         rfs4_deleg_state_rele(dsp);
1933 out:
1934         DTRACE_NFSV4_2(op__delegreturn__done, struct compound_state *, cs,
1935             DELEGRETURN4res *, resp);
1936 }
1937 
1938 /*
1939  * Check to see if a given "flavor" is an explicitly shared flavor.
1940  * The assumption of this routine is the "flavor" is already a valid
1941  * flavor in the secinfo list of "exi".
1942  *
1943  *      e.g.
1944  *              # share -o sec=flavor1 /export
1945  *              # share -o sec=flavor2 /export/home
1946  *
1947  *              flavor2 is not an explicitly shared flavor for /export,
1948  *              however it is in the secinfo list for /export thru the
1949  *              server namespace setup.
1950  */
1951 int
1952 is_exported_sec(int flavor, struct exportinfo *exi)
1953 {
1954         int     i;
1955         struct secinfo *sp;
1956 
1957         sp = exi->exi_export.ex_secinfo;
1958         for (i = 0; i < exi->exi_export.ex_seccnt; i++) {
1959                 if (flavor == sp[i].s_secinfo.sc_nfsnum ||
1960                     sp[i].s_secinfo.sc_nfsnum == AUTH_NONE) {
1961                         return (SEC_REF_EXPORTED(&sp[i]));
1962                 }
1963         }
1964 
1965         /* Should not reach this point based on the assumption */
1966         return (0);
1967 }
1968 
1969 /*
1970  * Check if the security flavor used in the request matches what is
1971  * required at the export point or at the root pseudo node (exi_root).
1972  *
1973  * returns 1 if there's a match or if exported with AUTH_NONE; 0 otherwise.
1974  *
1975  */
1976 static int
1977 secinfo_match_or_authnone(struct compound_state *cs)
1978 {
1979         int     i;
1980         struct secinfo *sp;
1981 
1982         /*
1983          * Check cs->nfsflavor (from the request) against
1984          * the current export data in cs->exi.
1985          */
1986         sp = cs->exi->exi_export.ex_secinfo;
1987         for (i = 0; i < cs->exi->exi_export.ex_seccnt; i++) {
1988                 if (cs->nfsflavor == sp[i].s_secinfo.sc_nfsnum ||
1989                     sp[i].s_secinfo.sc_nfsnum == AUTH_NONE)
1990                         return (1);
1991         }
1992 
1993         return (0);
1994 }
1995 
1996 /*
1997  * Check the access authority for the client and return the correct error.
1998  */
1999 nfsstat4
2000 call_checkauth4(struct compound_state *cs, struct svc_req *req)
2001 {
2002         int     authres;
2003 
2004         /*
2005          * First, check if the security flavor used in the request
2006          * are among the flavors set in the server namespace.
2007          */
2008         if (!secinfo_match_or_authnone(cs)) {
2009                 *cs->statusp = NFS4ERR_WRONGSEC;
2010                 return (*cs->statusp);
2011         }
2012 
2013         authres = checkauth4(cs, req);
2014 
2015         if (authres > 0) {
2016                 *cs->statusp = NFS4_OK;
2017                 if (! (cs->access & CS_ACCESS_LIMITED))
2018                         cs->access = CS_ACCESS_OK;
2019         } else if (authres == 0) {
2020                 *cs->statusp = NFS4ERR_ACCESS;
2021         } else if (authres == -2) {
2022                 *cs->statusp = NFS4ERR_WRONGSEC;
2023         } else {
2024                 *cs->statusp = NFS4ERR_DELAY;
2025         }
2026         return (*cs->statusp);
2027 }
2028 
2029 /*
2030  * bitmap4_to_attrmask is called by getattr and readdir.
2031  * It sets up the vattr mask and determines whether vfsstat call is needed
2032  * based on the input bitmap.
2033  * Returns nfsv4 status.
2034  */
2035 static nfsstat4
2036 bitmap4_to_attrmask(bitmap4 breq, struct nfs4_svgetit_arg *sargp)
2037 {
2038         int i;
2039         uint_t  va_mask;
2040         struct statvfs64 *sbp = sargp->sbp;
2041 
2042         sargp->sbp = NULL;
2043         sargp->flag = 0;
2044         sargp->rdattr_error = NFS4_OK;
2045         sargp->mntdfid_set = FALSE;
2046         if (sargp->cs->vp)
2047                 sargp->xattr = get_fh4_flag(&sargp->cs->fh,
2048                     FH4_ATTRDIR | FH4_NAMEDATTR);
2049         else
2050                 sargp->xattr = 0;
2051 
2052         /*
2053          * Set rdattr_error_req to true if return error per
2054          * failed entry rather than fail the readdir.
2055          */
2056         if (breq & FATTR4_RDATTR_ERROR_MASK)
2057                 sargp->rdattr_error_req = 1;
2058         else
2059                 sargp->rdattr_error_req = 0;
2060 
2061         /*
2062          * generate the va_mask
2063          * Handle the easy cases first
2064          */
2065         switch (breq) {
2066         case NFS4_NTOV_ATTR_MASK:
2067                 sargp->vap->va_mask = NFS4_NTOV_ATTR_AT_MASK;
2068                 return (NFS4_OK);
2069 
2070         case NFS4_FS_ATTR_MASK:
2071                 sargp->vap->va_mask = NFS4_FS_ATTR_AT_MASK;
2072                 sargp->sbp = sbp;
2073                 return (NFS4_OK);
2074 
2075         case NFS4_NTOV_ATTR_CACHE_MASK:
2076                 sargp->vap->va_mask = NFS4_NTOV_ATTR_CACHE_AT_MASK;
2077                 return (NFS4_OK);
2078 
2079         case FATTR4_LEASE_TIME_MASK:
2080                 sargp->vap->va_mask = 0;
2081                 return (NFS4_OK);
2082 
2083         default:
2084                 va_mask = 0;
2085                 for (i = 0; i < nfs4_ntov_map_size; i++) {
2086                         if ((breq & nfs4_ntov_map[i].fbit) &&
2087                             nfs4_ntov_map[i].vbit)
2088                                 va_mask |= nfs4_ntov_map[i].vbit;
2089                 }
2090 
2091                 /*
2092                  * Check is vfsstat is needed
2093                  */
2094                 if (breq & NFS4_FS_ATTR_MASK)
2095                         sargp->sbp = sbp;
2096 
2097                 sargp->vap->va_mask = va_mask;
2098                 return (NFS4_OK);
2099         }
2100         /* NOTREACHED */
2101 }
2102 
2103 /*
2104  * bitmap4_get_sysattrs is called by getattr and readdir.
2105  * It calls both VOP_GETATTR and VFS_STATVFS calls to get the attrs.
2106  * Returns nfsv4 status.
2107  */
2108 static nfsstat4
2109 bitmap4_get_sysattrs(struct nfs4_svgetit_arg *sargp)
2110 {
2111         int error;
2112         struct compound_state *cs = sargp->cs;
2113         vnode_t *vp = cs->vp;
2114 
2115         if (sargp->sbp != NULL) {
2116                 if (error = VFS_STATVFS(vp->v_vfsp, sargp->sbp)) {
2117                         sargp->sbp = NULL;   /* to identify error */
2118                         return (puterrno4(error));
2119                 }
2120         }
2121 
2122         return (rfs4_vop_getattr(vp, sargp->vap, 0, cs->cr));
2123 }
2124 
2125 static void
2126 nfs4_ntov_table_init(struct nfs4_ntov_table *ntovp)
2127 {
2128         ntovp->na = kmem_zalloc(sizeof (union nfs4_attr_u) * nfs4_ntov_map_size,
2129             KM_SLEEP);
2130         ntovp->attrcnt = 0;
2131         ntovp->vfsstat = FALSE;
2132 }
2133 
2134 static void
2135 nfs4_ntov_table_free(struct nfs4_ntov_table *ntovp,
2136     struct nfs4_svgetit_arg *sargp)
2137 {
2138         int i;
2139         union nfs4_attr_u *na;
2140         uint8_t *amap;
2141 
2142         /*
2143          * XXX Should do the same checks for whether the bit is set
2144          */
2145         for (i = 0, na = ntovp->na, amap = ntovp->amap;
2146             i < ntovp->attrcnt; i++, na++, amap++) {
2147                 (void) (*nfs4_ntov_map[*amap].sv_getit)(
2148                     NFS4ATTR_FREEIT, sargp, na);
2149         }
2150         if ((sargp->op == NFS4ATTR_SETIT) || (sargp->op == NFS4ATTR_VERIT)) {
2151                 /*
2152                  * xdr_free for getattr will be done later
2153                  */
2154                 for (i = 0, na = ntovp->na, amap = ntovp->amap;
2155                     i < ntovp->attrcnt; i++, na++, amap++) {
2156                         xdr_free(nfs4_ntov_map[*amap].xfunc, (caddr_t)na);
2157                 }
2158         }
2159         kmem_free(ntovp->na, sizeof (union nfs4_attr_u) * nfs4_ntov_map_size);
2160 }
2161 
2162 /*
2163  * do_rfs4_op_getattr gets the system attrs and converts into fattr4.
2164  */
2165 static nfsstat4
2166 do_rfs4_op_getattr(bitmap4 breq, fattr4 *fattrp,
2167     struct nfs4_svgetit_arg *sargp)
2168 {
2169         int error = 0;
2170         int i, k;
2171         struct nfs4_ntov_table ntov;
2172         XDR xdr;
2173         ulong_t xdr_size;
2174         char *xdr_attrs;
2175         nfsstat4 status = NFS4_OK;
2176         nfsstat4 prev_rdattr_error = sargp->rdattr_error;
2177         union nfs4_attr_u *na;
2178         uint8_t *amap;
2179 
2180         sargp->op = NFS4ATTR_GETIT;
2181         sargp->flag = 0;
2182 
2183         fattrp->attrmask = 0;
2184         /* if no bits requested, then return empty fattr4 */
2185         if (breq == 0) {
2186                 fattrp->attrlist4_len = 0;
2187                 fattrp->attrlist4 = NULL;
2188                 return (NFS4_OK);
2189         }
2190 
2191         /*
2192          * return NFS4ERR_INVAL when client requests write-only attrs
2193          */
2194         if (breq & (FATTR4_TIME_ACCESS_SET_MASK | FATTR4_TIME_MODIFY_SET_MASK))
2195                 return (NFS4ERR_INVAL);
2196 
2197         nfs4_ntov_table_init(&ntov);
2198         na = ntov.na;
2199         amap = ntov.amap;
2200 
2201         /*
2202          * Now loop to get or verify the attrs
2203          */
2204         for (i = 0; i < nfs4_ntov_map_size; i++) {
2205                 if (breq & nfs4_ntov_map[i].fbit) {
2206                         if ((*nfs4_ntov_map[i].sv_getit)(
2207                             NFS4ATTR_SUPPORTED, sargp, NULL) == 0) {
2208 
2209                                 error = (*nfs4_ntov_map[i].sv_getit)(
2210                                     NFS4ATTR_GETIT, sargp, na);
2211 
2212                                 /*
2213                                  * Possible error values:
2214                                  * >0 if sv_getit failed to
2215                                  * get the attr; 0 if succeeded;
2216                                  * <0 if rdattr_error and the
2217                                  * attribute cannot be returned.
2218                                  */
2219                                 if (error && !(sargp->rdattr_error_req))
2220                                         goto done;
2221                                 /*
2222                                  * If error then just for entry
2223                                  */
2224                                 if (error == 0) {
2225                                         fattrp->attrmask |=
2226                                             nfs4_ntov_map[i].fbit;
2227                                         *amap++ =
2228                                             (uint8_t)nfs4_ntov_map[i].nval;
2229                                         na++;
2230                                         (ntov.attrcnt)++;
2231                                 } else if ((error > 0) &&
2232                                     (sargp->rdattr_error == NFS4_OK)) {
2233                                         sargp->rdattr_error = puterrno4(error);
2234                                 }
2235                                 error = 0;
2236                         }
2237                 }
2238         }
2239 
2240         /*
2241          * If rdattr_error was set after the return value for it was assigned,
2242          * update it.
2243          */
2244         if (prev_rdattr_error != sargp->rdattr_error) {
2245                 na = ntov.na;
2246                 amap = ntov.amap;
2247                 for (i = 0; i < ntov.attrcnt; i++, na++, amap++) {
2248                         k = *amap;
2249                         if (k < FATTR4_RDATTR_ERROR) {
2250                                 continue;
2251                         }
2252                         if ((k == FATTR4_RDATTR_ERROR) &&
2253                             ((*nfs4_ntov_map[k].sv_getit)(
2254                             NFS4ATTR_SUPPORTED, sargp, NULL) == 0)) {
2255 
2256                                 (void) (*nfs4_ntov_map[k].sv_getit)(
2257                                     NFS4ATTR_GETIT, sargp, na);
2258                         }
2259                         break;
2260                 }
2261         }
2262 
2263         xdr_size = 0;
2264         na = ntov.na;
2265         amap = ntov.amap;
2266         for (i = 0; i < ntov.attrcnt; i++, na++, amap++) {
2267                 xdr_size += xdr_sizeof(nfs4_ntov_map[*amap].xfunc, na);
2268         }
2269 
2270         fattrp->attrlist4_len = xdr_size;
2271         if (xdr_size) {
2272                 /* freed by rfs4_op_getattr_free() */
2273                 fattrp->attrlist4 = xdr_attrs = kmem_zalloc(xdr_size, KM_SLEEP);
2274 
2275                 xdrmem_create(&xdr, xdr_attrs, xdr_size, XDR_ENCODE);
2276 
2277                 na = ntov.na;
2278                 amap = ntov.amap;
2279                 for (i = 0; i < ntov.attrcnt; i++, na++, amap++) {
2280                         if (!(*nfs4_ntov_map[*amap].xfunc)(&xdr, na)) {
2281                                 DTRACE_PROBE1(nfss__e__getattr4_encfail,
2282                                     int, *amap);
2283                                 status = NFS4ERR_SERVERFAULT;
2284                                 break;
2285                         }
2286                 }
2287                 /* xdrmem_destroy(&xdrs); */        /* NO-OP */
2288         } else {
2289                 fattrp->attrlist4 = NULL;
2290         }
2291 done:
2292 
2293         nfs4_ntov_table_free(&ntov, sargp);
2294 
2295         if (error != 0)
2296                 status = puterrno4(error);
2297 
2298         return (status);
2299 }
2300 
2301 /* ARGSUSED */
2302 static void
2303 rfs4_op_getattr(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
2304     struct compound_state *cs)
2305 {
2306         GETATTR4args *args = &argop->nfs_argop4_u.opgetattr;
2307         GETATTR4res *resp = &resop->nfs_resop4_u.opgetattr;
2308         struct nfs4_svgetit_arg sarg;
2309         struct statvfs64 sb;
2310         nfsstat4 status;
2311 
2312         DTRACE_NFSV4_2(op__getattr__start, struct compound_state *, cs,
2313             GETATTR4args *, args);
2314 
2315         if (cs->vp == NULL) {
2316                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2317                 goto out;
2318         }
2319 
2320         if (cs->access == CS_ACCESS_DENIED) {
2321                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
2322                 goto out;
2323         }
2324 
2325         sarg.sbp = &sb;
2326         sarg.cs = cs;
2327         sarg.is_referral = B_FALSE;
2328 
2329         status = bitmap4_to_attrmask(args->attr_request, &sarg);
2330         if (status == NFS4_OK) {
2331 
2332                 status = bitmap4_get_sysattrs(&sarg);
2333                 if (status == NFS4_OK) {
2334 
2335                         /* Is this a referral? */
2336                         if (vn_is_nfs_reparse(cs->vp, cs->cr)) {
2337                                 /* Older V4 Solaris client sees a link */
2338                                 if (client_is_downrev(req))
2339                                         sarg.vap->va_type = VLNK;
2340                                 else
2341                                         sarg.is_referral = B_TRUE;
2342                         }
2343 
2344                         status = do_rfs4_op_getattr(args->attr_request,
2345                             &resp->obj_attributes, &sarg);
2346                 }
2347         }
2348         *cs->statusp = resp->status = status;
2349 out:
2350         DTRACE_NFSV4_2(op__getattr__done, struct compound_state *, cs,
2351             GETATTR4res *, resp);
2352 }
2353 
2354 static void
2355 rfs4_op_getattr_free(nfs_resop4 *resop)
2356 {
2357         GETATTR4res *resp = &resop->nfs_resop4_u.opgetattr;
2358 
2359         nfs4_fattr4_free(&resp->obj_attributes);
2360 }
2361 
2362 /* ARGSUSED */
2363 static void
2364 rfs4_op_getfh(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
2365     struct compound_state *cs)
2366 {
2367         GETFH4res *resp = &resop->nfs_resop4_u.opgetfh;
2368 
2369         DTRACE_NFSV4_1(op__getfh__start, struct compound_state *, cs);
2370 
2371         if (cs->vp == NULL) {
2372                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2373                 goto out;
2374         }
2375         if (cs->access == CS_ACCESS_DENIED) {
2376                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
2377                 goto out;
2378         }
2379 
2380         /* check for reparse point at the share point */
2381         if (cs->exi->exi_moved || vn_is_nfs_reparse(cs->exi->exi_vp, cs->cr)) {
2382                 /* it's all bad */
2383                 cs->exi->exi_moved = 1;
2384                 *cs->statusp = resp->status = NFS4ERR_MOVED;
2385                 DTRACE_PROBE2(nfs4serv__func__referral__shared__moved,
2386                     vnode_t *, cs->vp, char *, "rfs4_op_getfh");
2387                 return;
2388         }
2389 
2390         /* check for reparse point at vp */
2391         if (vn_is_nfs_reparse(cs->vp, cs->cr) && !client_is_downrev(req)) {
2392                 /* it's not all bad */
2393                 *cs->statusp = resp->status = NFS4ERR_MOVED;
2394                 DTRACE_PROBE2(nfs4serv__func__referral__moved,
2395                     vnode_t *, cs->vp, char *, "rfs4_op_getfh");
2396                 return;
2397         }
2398 
2399         resp->object.nfs_fh4_val =
2400             kmem_alloc(cs->fh.nfs_fh4_len, KM_SLEEP);
2401         nfs_fh4_copy(&cs->fh, &resp->object);
2402         *cs->statusp = resp->status = NFS4_OK;
2403 out:
2404         DTRACE_NFSV4_2(op__getfh__done, struct compound_state *, cs,
2405             GETFH4res *, resp);
2406 }
2407 
2408 static void
2409 rfs4_op_getfh_free(nfs_resop4 *resop)
2410 {
2411         GETFH4res *resp = &resop->nfs_resop4_u.opgetfh;
2412 
2413         if (resp->status == NFS4_OK &&
2414             resp->object.nfs_fh4_val != NULL) {
2415                 kmem_free(resp->object.nfs_fh4_val, resp->object.nfs_fh4_len);
2416                 resp->object.nfs_fh4_val = NULL;
2417                 resp->object.nfs_fh4_len = 0;
2418         }
2419 }
2420 
2421 /*
2422  * illegal: args: void
2423  *          res : status (NFS4ERR_OP_ILLEGAL)
2424  */
2425 /* ARGSUSED */
2426 static void
2427 rfs4_op_illegal(nfs_argop4 *argop, nfs_resop4 *resop,
2428     struct svc_req *req, struct compound_state *cs)
2429 {
2430         ILLEGAL4res *resp = &resop->nfs_resop4_u.opillegal;
2431 
2432         resop->resop = OP_ILLEGAL;
2433         *cs->statusp = resp->status = NFS4ERR_OP_ILLEGAL;
2434 }
2435 
2436 /*
2437  * link: args: SAVED_FH: file, CURRENT_FH: target directory
2438  *       res: status. If success - CURRENT_FH unchanged, return change_info
2439  */
2440 /* ARGSUSED */
2441 static void
2442 rfs4_op_link(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
2443     struct compound_state *cs)
2444 {
2445         LINK4args *args = &argop->nfs_argop4_u.oplink;
2446         LINK4res *resp = &resop->nfs_resop4_u.oplink;
2447         int error;
2448         vnode_t *vp;
2449         vnode_t *dvp;
2450         struct vattr bdva, idva, adva;
2451         char *nm;
2452         uint_t  len;
2453         struct sockaddr *ca;
2454         char *name = NULL;
2455         nfsstat4 status;
2456 
2457         DTRACE_NFSV4_2(op__link__start, struct compound_state *, cs,
2458             LINK4args *, args);
2459 
2460         /* SAVED_FH: source object */
2461         vp = cs->saved_vp;
2462         if (vp == NULL) {
2463                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2464                 goto out;
2465         }
2466 
2467         /* CURRENT_FH: target directory */
2468         dvp = cs->vp;
2469         if (dvp == NULL) {
2470                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2471                 goto out;
2472         }
2473 
2474         /*
2475          * If there is a non-shared filesystem mounted on this vnode,
2476          * do not allow to link any file in this directory.
2477          */
2478         if (vn_ismntpt(dvp)) {
2479                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
2480                 goto out;
2481         }
2482 
2483         if (cs->access == CS_ACCESS_DENIED) {
2484                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
2485                 goto out;
2486         }
2487 
2488         /* Check source object's type validity */
2489         if (vp->v_type == VDIR) {
2490                 *cs->statusp = resp->status = NFS4ERR_ISDIR;
2491                 goto out;
2492         }
2493 
2494         /* Check target directory's type */
2495         if (dvp->v_type != VDIR) {
2496                 *cs->statusp = resp->status = NFS4ERR_NOTDIR;
2497                 goto out;
2498         }
2499 
2500         if (cs->saved_exi != cs->exi) {
2501                 *cs->statusp = resp->status = NFS4ERR_XDEV;
2502                 goto out;
2503         }
2504 
2505         status = utf8_dir_verify(&args->newname);
2506         if (status != NFS4_OK) {
2507                 *cs->statusp = resp->status = status;
2508                 goto out;
2509         }
2510 
2511         nm = utf8_to_fn(&args->newname, &len, NULL);
2512         if (nm == NULL) {
2513                 *cs->statusp = resp->status = NFS4ERR_INVAL;
2514                 goto out;
2515         }
2516 
2517         if (len > MAXNAMELEN) {
2518                 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
2519                 kmem_free(nm, len);
2520                 goto out;
2521         }
2522 
2523         if (rdonly4(req, cs)) {
2524                 *cs->statusp = resp->status = NFS4ERR_ROFS;
2525                 kmem_free(nm, len);
2526                 goto out;
2527         }
2528 
2529         /* Get "before" change value */
2530         bdva.va_mask = AT_CTIME|AT_SEQ;
2531         error = VOP_GETATTR(dvp, &bdva, 0, cs->cr, NULL);
2532         if (error) {
2533                 *cs->statusp = resp->status = puterrno4(error);
2534                 kmem_free(nm, len);
2535                 goto out;
2536         }
2537 
2538         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2539         name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
2540             MAXPATHLEN  + 1);
2541 
2542         if (name == NULL) {
2543                 *cs->statusp = resp->status = NFS4ERR_INVAL;
2544                 kmem_free(nm, len);
2545                 goto out;
2546         }
2547 
2548         NFS4_SET_FATTR4_CHANGE(resp->cinfo.before, bdva.va_ctime)
2549 
2550         error = VOP_LINK(dvp, vp, name, cs->cr, NULL, 0);
2551 
2552         if (nm != name)
2553                 kmem_free(name, MAXPATHLEN + 1);
2554         kmem_free(nm, len);
2555 
2556         /*
2557          * Get the initial "after" sequence number, if it fails, set to zero
2558          */
2559         idva.va_mask = AT_SEQ;
2560         if (VOP_GETATTR(dvp, &idva, 0, cs->cr, NULL))
2561                 idva.va_seq = 0;
2562 
2563         /*
2564          * Force modified data and metadata out to stable storage.
2565          */
2566         (void) VOP_FSYNC(vp, FNODSYNC, cs->cr, NULL);
2567         (void) VOP_FSYNC(dvp, 0, cs->cr, NULL);
2568 
2569         if (error) {
2570                 *cs->statusp = resp->status = puterrno4(error);
2571                 goto out;
2572         }
2573 
2574         /*
2575          * Get "after" change value, if it fails, simply return the
2576          * before value.
2577          */
2578         adva.va_mask = AT_CTIME|AT_SEQ;
2579         if (VOP_GETATTR(dvp, &adva, 0, cs->cr, NULL)) {
2580                 adva.va_ctime = bdva.va_ctime;
2581                 adva.va_seq = 0;
2582         }
2583 
2584         NFS4_SET_FATTR4_CHANGE(resp->cinfo.after, adva.va_ctime)
2585 
2586         /*
2587          * The cinfo.atomic = TRUE only if we have
2588          * non-zero va_seq's, and it has incremented by exactly one
2589          * during the VOP_LINK and it didn't change during the VOP_FSYNC.
2590          */
2591         if (bdva.va_seq && idva.va_seq && adva.va_seq &&
2592             idva.va_seq == (bdva.va_seq + 1) && idva.va_seq == adva.va_seq)
2593                 resp->cinfo.atomic = TRUE;
2594         else
2595                 resp->cinfo.atomic = FALSE;
2596 
2597         *cs->statusp = resp->status = NFS4_OK;
2598 out:
2599         DTRACE_NFSV4_2(op__link__done, struct compound_state *, cs,
2600             LINK4res *, resp);
2601 }
2602 
2603 /*
2604  * Used by rfs4_op_lookup and rfs4_op_lookupp to do the actual work.
2605  */
2606 
2607 /* ARGSUSED */
2608 static nfsstat4
2609 do_rfs4_op_lookup(char *nm, struct svc_req *req, struct compound_state *cs)
2610 {
2611         int error;
2612         int different_export = 0;
2613         vnode_t *vp, *pre_tvp = NULL, *oldvp = NULL;
2614         struct exportinfo *exi = NULL, *pre_exi = NULL;
2615         nfsstat4 stat;
2616         fid_t fid;
2617         int attrdir, dotdot, walk;
2618         bool_t is_newvp = FALSE;
2619 
2620         if (cs->vp->v_flag & V_XATTRDIR) {
2621                 attrdir = 1;
2622                 ASSERT(get_fh4_flag(&cs->fh, FH4_ATTRDIR));
2623         } else {
2624                 attrdir = 0;
2625                 ASSERT(! get_fh4_flag(&cs->fh, FH4_ATTRDIR));
2626         }
2627 
2628         dotdot = (nm[0] == '.' && nm[1] == '.' && nm[2] == '\0');
2629 
2630         /*
2631          * If dotdotting, then need to check whether it's
2632          * above the root of a filesystem, or above an
2633          * export point.
2634          */
2635         if (dotdot) {
2636 
2637                 /*
2638                  * If dotdotting at the root of a filesystem, then
2639                  * need to traverse back to the mounted-on filesystem
2640                  * and do the dotdot lookup there.
2641                  */
2642                 if (cs->vp->v_flag & VROOT) {
2643 
2644                         /*
2645                          * If at the system root, then can
2646                          * go up no further.
2647                          */
2648                         if (VN_CMP(cs->vp, rootdir))
2649                                 return (puterrno4(ENOENT));
2650 
2651                         /*
2652                          * Traverse back to the mounted-on filesystem
2653                          */
2654                         cs->vp = untraverse(cs->vp);
2655 
2656                         /*
2657                          * Set the different_export flag so we remember
2658                          * to pick up a new exportinfo entry for
2659                          * this new filesystem.
2660                          */
2661                         different_export = 1;
2662                 } else {
2663 
2664                         /*
2665                          * If dotdotting above an export point then set
2666                          * the different_export to get new export info.
2667                          */
2668                         different_export = nfs_exported(cs->exi, cs->vp);
2669                 }
2670         }
2671 
2672         error = VOP_LOOKUP(cs->vp, nm, &vp, NULL, 0, NULL, cs->cr,
2673             NULL, NULL, NULL);
2674         if (error)
2675                 return (puterrno4(error));
2676 
2677         /*
2678          * If the vnode is in a pseudo filesystem, check whether it is visible.
2679          *
2680          * XXX if the vnode is a symlink and it is not visible in
2681          * a pseudo filesystem, return ENOENT (not following symlink).
2682          * V4 client can not mount such symlink. This is a regression
2683          * from V2/V3.
2684          *
2685          * In the same exported filesystem, if the security flavor used
2686          * is not an explicitly shared flavor, limit the view to the visible
2687          * list entries only. This is not a WRONGSEC case because it's already
2688          * checked via PUTROOTFH/PUTPUBFH or PUTFH.
2689          */
2690         if (!different_export &&
2691             (PSEUDO(cs->exi) || ! is_exported_sec(cs->nfsflavor, cs->exi) ||
2692             cs->access & CS_ACCESS_LIMITED)) {
2693                 if (! nfs_visible(cs->exi, vp, &different_export)) {
2694                         VN_RELE(vp);
2695                         return (puterrno4(ENOENT));
2696                 }
2697         }
2698 
2699         /*
2700          * If it's a mountpoint, then traverse it.
2701          */
2702         if (vn_ismntpt(vp)) {
2703                 pre_exi = cs->exi;   /* save pre-traversed exportinfo */
2704                 pre_tvp = vp;           /* save pre-traversed vnode     */
2705 
2706                 /*
2707                  * hold pre_tvp to counteract rele by traverse.  We will
2708                  * need pre_tvp below if checkexport4 fails
2709                  */
2710                 VN_HOLD(pre_tvp);
2711                 if ((error = traverse(&vp)) != 0) {
2712                         VN_RELE(vp);
2713                         VN_RELE(pre_tvp);
2714                         return (puterrno4(error));
2715                 }
2716                 different_export = 1;
2717         } else if (vp->v_vfsp != cs->vp->v_vfsp) {
2718                 /*
2719                  * The vfsp comparison is to handle the case where
2720                  * a LOFS mount is shared.  lo_lookup traverses mount points,
2721                  * and NFS is unaware of local fs transistions because
2722                  * v_vfsmountedhere isn't set.  For this special LOFS case,
2723                  * the dir and the obj returned by lookup will have different
2724                  * vfs ptrs.
2725                  */
2726                 different_export = 1;
2727         }
2728 
2729         if (different_export) {
2730 
2731                 bzero(&fid, sizeof (fid));
2732                 fid.fid_len = MAXFIDSZ;
2733                 error = vop_fid_pseudo(vp, &fid);
2734                 if (error) {
2735                         VN_RELE(vp);
2736                         if (pre_tvp)
2737                                 VN_RELE(pre_tvp);
2738                         return (puterrno4(error));
2739                 }
2740 
2741                 if (dotdot)
2742                         exi = nfs_vptoexi(NULL, vp, cs->cr, &walk, NULL, TRUE);
2743                 else
2744                         exi = checkexport4(&vp->v_vfsp->vfs_fsid, &fid, vp);
2745 
2746                 if (exi == NULL) {
2747                         if (pre_tvp) {
2748                                 /*
2749                                  * If this vnode is a mounted-on vnode,
2750                                  * but the mounted-on file system is not
2751                                  * exported, send back the filehandle for
2752                                  * the mounted-on vnode, not the root of
2753                                  * the mounted-on file system.
2754                                  */
2755                                 VN_RELE(vp);
2756                                 vp = pre_tvp;
2757                                 exi = pre_exi;
2758                         } else {
2759                                 VN_RELE(vp);
2760                                 return (puterrno4(EACCES));
2761                         }
2762                 } else if (pre_tvp) {
2763                         /* we're done with pre_tvp now. release extra hold */
2764                         VN_RELE(pre_tvp);
2765                 }
2766 
2767                 cs->exi = exi;
2768 
2769                 /*
2770                  * Now we do a checkauth4. The reason is that
2771                  * this client/user may not have access to the new
2772                  * exported file system, and if he does,
2773                  * the client/user may be mapped to a different uid.
2774                  *
2775                  * We start with a new cr, because the checkauth4 done
2776                  * in the PUT*FH operation over wrote the cred's uid,
2777                  * gid, etc, and we want the real thing before calling
2778                  * checkauth4()
2779                  */
2780                 crfree(cs->cr);
2781                 cs->cr = crdup(cs->basecr);
2782 
2783                 oldvp = cs->vp;
2784                 cs->vp = vp;
2785                 is_newvp = TRUE;
2786 
2787                 stat = call_checkauth4(cs, req);
2788                 if (stat != NFS4_OK) {
2789                         VN_RELE(cs->vp);
2790                         cs->vp = oldvp;
2791                         return (stat);
2792                 }
2793         }
2794 
2795         /*
2796          * After various NFS checks, do a label check on the path
2797          * component. The label on this path should either be the
2798          * global zone's label or a zone's label. We are only
2799          * interested in the zone's label because exported files
2800          * in global zone is accessible (though read-only) to
2801          * clients. The exportability/visibility check is already
2802          * done before reaching this code.
2803          */
2804         if (is_system_labeled()) {
2805                 bslabel_t *clabel;
2806 
2807                 ASSERT(req->rq_label != NULL);
2808                 clabel = req->rq_label;
2809                 DTRACE_PROBE2(tx__rfs4__log__info__oplookup__clabel, char *,
2810                     "got client label from request(1)", struct svc_req *, req);
2811 
2812                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
2813                         if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
2814                             cs->exi)) {
2815                                 error = EACCES;
2816                                 goto err_out;
2817                         }
2818                 } else {
2819                         /*
2820                          * We grant access to admin_low label clients
2821                          * only if the client is trusted, i.e. also
2822                          * running Solaris Trusted Extension.
2823                          */
2824                         struct sockaddr *ca;
2825                         int             addr_type;
2826                         void            *ipaddr;
2827                         tsol_tpc_t      *tp;
2828 
2829                         ca = (struct sockaddr *)svc_getrpccaller(
2830                             req->rq_xprt)->buf;
2831                         if (ca->sa_family == AF_INET) {
2832                                 addr_type = IPV4_VERSION;
2833                                 ipaddr = &((struct sockaddr_in *)ca)->sin_addr;
2834                         } else if (ca->sa_family == AF_INET6) {
2835                                 addr_type = IPV6_VERSION;
2836                                 ipaddr = &((struct sockaddr_in6 *)
2837                                     ca)->sin6_addr;
2838                         }
2839                         tp = find_tpc(ipaddr, addr_type, B_FALSE);
2840                         if (tp == NULL || tp->tpc_tp.tp_doi !=
2841                             l_admin_low->tsl_doi || tp->tpc_tp.host_type !=
2842                             SUN_CIPSO) {
2843                                 if (tp != NULL)
2844                                         TPC_RELE(tp);
2845                                 error = EACCES;
2846                                 goto err_out;
2847                         }
2848                         TPC_RELE(tp);
2849                 }
2850         }
2851 
2852         error = makefh4(&cs->fh, vp, cs->exi);
2853 
2854 err_out:
2855         if (error) {
2856                 if (is_newvp) {
2857                         VN_RELE(cs->vp);
2858                         cs->vp = oldvp;
2859                 } else
2860                         VN_RELE(vp);
2861                 return (puterrno4(error));
2862         }
2863 
2864         if (!is_newvp) {
2865                 if (cs->vp)
2866                         VN_RELE(cs->vp);
2867                 cs->vp = vp;
2868         } else if (oldvp)
2869                 VN_RELE(oldvp);
2870 
2871         /*
2872          * if did lookup on attrdir and didn't lookup .., set named
2873          * attr fh flag
2874          */
2875         if (attrdir && ! dotdot)
2876                 set_fh4_flag(&cs->fh, FH4_NAMEDATTR);
2877 
2878         /* Assume false for now, open proc will set this */
2879         cs->mandlock = FALSE;
2880 
2881         return (NFS4_OK);
2882 }
2883 
2884 /* ARGSUSED */
2885 static void
2886 rfs4_op_lookup(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
2887     struct compound_state *cs)
2888 {
2889         LOOKUP4args *args = &argop->nfs_argop4_u.oplookup;
2890         LOOKUP4res *resp = &resop->nfs_resop4_u.oplookup;
2891         char *nm;
2892         uint_t len;
2893         struct sockaddr *ca;
2894         char *name = NULL;
2895         nfsstat4 status;
2896 
2897         DTRACE_NFSV4_2(op__lookup__start, struct compound_state *, cs,
2898             LOOKUP4args *, args);
2899 
2900         if (cs->vp == NULL) {
2901                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2902                 goto out;
2903         }
2904 
2905         if (cs->vp->v_type == VLNK) {
2906                 *cs->statusp = resp->status = NFS4ERR_SYMLINK;
2907                 goto out;
2908         }
2909 
2910         if (cs->vp->v_type != VDIR) {
2911                 *cs->statusp = resp->status = NFS4ERR_NOTDIR;
2912                 goto out;
2913         }
2914 
2915         status = utf8_dir_verify(&args->objname);
2916         if (status != NFS4_OK) {
2917                 *cs->statusp = resp->status = status;
2918                 goto out;
2919         }
2920 
2921         nm = utf8_to_str(&args->objname, &len, NULL);
2922         if (nm == NULL) {
2923                 *cs->statusp = resp->status = NFS4ERR_INVAL;
2924                 goto out;
2925         }
2926 
2927         if (len > MAXNAMELEN) {
2928                 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
2929                 kmem_free(nm, len);
2930                 goto out;
2931         }
2932 
2933         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2934         name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
2935             MAXPATHLEN  + 1);
2936 
2937         if (name == NULL) {
2938                 *cs->statusp = resp->status = NFS4ERR_INVAL;
2939                 kmem_free(nm, len);
2940                 goto out;
2941         }
2942 
2943         *cs->statusp = resp->status = do_rfs4_op_lookup(name, req, cs);
2944 
2945         if (name != nm)
2946                 kmem_free(name, MAXPATHLEN + 1);
2947         kmem_free(nm, len);
2948 
2949 out:
2950         DTRACE_NFSV4_2(op__lookup__done, struct compound_state *, cs,
2951             LOOKUP4res *, resp);
2952 }
2953 
2954 /* ARGSUSED */
2955 static void
2956 rfs4_op_lookupp(nfs_argop4 *args, nfs_resop4 *resop, struct svc_req *req,
2957     struct compound_state *cs)
2958 {
2959         LOOKUPP4res *resp = &resop->nfs_resop4_u.oplookupp;
2960 
2961         DTRACE_NFSV4_1(op__lookupp__start, struct compound_state *, cs);
2962 
2963         if (cs->vp == NULL) {
2964                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2965                 goto out;
2966         }
2967 
2968         if (cs->vp->v_type != VDIR) {
2969                 *cs->statusp = resp->status = NFS4ERR_NOTDIR;
2970                 goto out;
2971         }
2972 
2973         *cs->statusp = resp->status = do_rfs4_op_lookup("..", req, cs);
2974 
2975         /*
2976          * From NFSV4 Specification, LOOKUPP should not check for
2977          * NFS4ERR_WRONGSEC. Retrun NFS4_OK instead.
2978          */
2979         if (resp->status == NFS4ERR_WRONGSEC) {
2980                 *cs->statusp = resp->status = NFS4_OK;
2981         }
2982 
2983 out:
2984         DTRACE_NFSV4_2(op__lookupp__done, struct compound_state *, cs,
2985             LOOKUPP4res *, resp);
2986 }
2987 
2988 
2989 /*ARGSUSED2*/
2990 static void
2991 rfs4_op_openattr(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
2992     struct compound_state *cs)
2993 {
2994         OPENATTR4args   *args = &argop->nfs_argop4_u.opopenattr;
2995         OPENATTR4res    *resp = &resop->nfs_resop4_u.opopenattr;
2996         vnode_t         *avp = NULL;
2997         int             lookup_flags = LOOKUP_XATTR, error;
2998         int             exp_ro = 0;
2999 
3000         DTRACE_NFSV4_2(op__openattr__start, struct compound_state *, cs,
3001             OPENATTR4args *, args);
3002 
3003         if (cs->vp == NULL) {
3004                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
3005                 goto out;
3006         }
3007 
3008         if ((cs->vp->v_vfsp->vfs_flag & VFS_XATTR) == 0 &&
3009             !vfs_has_feature(cs->vp->v_vfsp, VFSFT_SYSATTR_VIEWS)) {
3010                 *cs->statusp = resp->status = puterrno4(ENOTSUP);
3011                 goto out;
3012         }
3013 
3014         /*
3015          * If file system supports passing ACE mask to VOP_ACCESS then
3016          * check for ACE_READ_NAMED_ATTRS, otherwise do legacy checks
3017          */
3018 
3019         if (vfs_has_feature(cs->vp->v_vfsp, VFSFT_ACEMASKONACCESS))
3020                 error = VOP_ACCESS(cs->vp, ACE_READ_NAMED_ATTRS,
3021                     V_ACE_MASK, cs->cr, NULL);
3022         else
3023                 error = ((VOP_ACCESS(cs->vp, VREAD, 0, cs->cr, NULL) != 0) &&
3024                     (VOP_ACCESS(cs->vp, VWRITE, 0, cs->cr, NULL) != 0) &&
3025                     (VOP_ACCESS(cs->vp, VEXEC, 0, cs->cr, NULL) != 0));
3026 
3027         if (error) {
3028                 *cs->statusp = resp->status = puterrno4(EACCES);
3029                 goto out;
3030         }
3031 
3032         /*
3033          * The CREATE_XATTR_DIR VOP flag cannot be specified if
3034          * the file system is exported read-only -- regardless of
3035          * createdir flag.  Otherwise the attrdir would be created
3036          * (assuming server fs isn't mounted readonly locally).  If
3037          * VOP_LOOKUP returns ENOENT in this case, the error will
3038          * be translated into EROFS.  ENOSYS is mapped to ENOTSUP
3039          * because specfs has no VOP_LOOKUP op, so the macro would
3040          * return ENOSYS.  EINVAL is returned by all (current)
3041          * Solaris file system implementations when any of their
3042          * restrictions are violated (xattr(dir) can't have xattrdir).
3043          * Returning NOTSUPP is more appropriate in this case
3044          * because the object will never be able to have an attrdir.
3045          */
3046         if (args->createdir && ! (exp_ro = rdonly4(req, cs)))
3047                 lookup_flags |= CREATE_XATTR_DIR;
3048 
3049         error = VOP_LOOKUP(cs->vp, "", &avp, NULL, lookup_flags, NULL, cs->cr,
3050             NULL, NULL, NULL);
3051 
3052         if (error) {
3053                 if (error == ENOENT && args->createdir && exp_ro)
3054                         *cs->statusp = resp->status = puterrno4(EROFS);
3055                 else if (error == EINVAL || error == ENOSYS)
3056                         *cs->statusp = resp->status = puterrno4(ENOTSUP);
3057                 else
3058                         *cs->statusp = resp->status = puterrno4(error);
3059                 goto out;
3060         }
3061 
3062         ASSERT(avp->v_flag & V_XATTRDIR);
3063 
3064         error = makefh4(&cs->fh, avp, cs->exi);
3065 
3066         if (error) {
3067                 VN_RELE(avp);
3068                 *cs->statusp = resp->status = puterrno4(error);
3069                 goto out;
3070         }
3071 
3072         VN_RELE(cs->vp);
3073         cs->vp = avp;
3074 
3075         /*
3076          * There is no requirement for an attrdir fh flag
3077          * because the attrdir has a vnode flag to distinguish
3078          * it from regular (non-xattr) directories.  The
3079          * FH4_ATTRDIR flag is set for future sanity checks.
3080          */
3081         set_fh4_flag(&cs->fh, FH4_ATTRDIR);
3082         *cs->statusp = resp->status = NFS4_OK;
3083 
3084 out:
3085         DTRACE_NFSV4_2(op__openattr__done, struct compound_state *, cs,
3086             OPENATTR4res *, resp);
3087 }
3088 
3089 static int
3090 do_io(int direction, vnode_t *vp, struct uio *uio, int ioflag, cred_t *cred,
3091     caller_context_t *ct)
3092 {
3093         int error;
3094         int i;
3095         clock_t delaytime;
3096 
3097         delaytime = MSEC_TO_TICK_ROUNDUP(rfs4_lock_delay);
3098 
3099         /*
3100          * Don't block on mandatory locks. If this routine returns
3101          * EAGAIN, the caller should return NFS4ERR_LOCKED.
3102          */
3103         uio->uio_fmode = FNONBLOCK;
3104 
3105         for (i = 0; i < rfs4_maxlock_tries; i++) {
3106 
3107 
3108                 if (direction == FREAD) {
3109                         (void) VOP_RWLOCK(vp, V_WRITELOCK_FALSE, ct);
3110                         error = VOP_READ(vp, uio, ioflag, cred, ct);
3111                         VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, ct);
3112                 } else {
3113                         (void) VOP_RWLOCK(vp, V_WRITELOCK_TRUE, ct);
3114                         error = VOP_WRITE(vp, uio, ioflag, cred, ct);
3115                         VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, ct);
3116                 }
3117 
3118                 if (error != EAGAIN)
3119                         break;
3120 
3121                 if (i < rfs4_maxlock_tries - 1) {
3122                         delay(delaytime);
3123                         delaytime *= 2;
3124                 }
3125         }
3126 
3127         return (error);
3128 }
3129 
3130 /* ARGSUSED */
3131 static void
3132 rfs4_op_read(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
3133     struct compound_state *cs)
3134 {
3135         READ4args *args = &argop->nfs_argop4_u.opread;
3136         READ4res *resp = &resop->nfs_resop4_u.opread;
3137         int error;
3138         int verror;
3139         vnode_t *vp;
3140         struct vattr va;
3141         struct iovec iov, *iovp = NULL;
3142         int iovcnt;
3143         struct uio uio;
3144         u_offset_t offset;
3145         bool_t *deleg = &cs->deleg;
3146         nfsstat4 stat;
3147         int in_crit = 0;
3148         mblk_t *mp = NULL;
3149         int alloc_err = 0;
3150         int rdma_used = 0;
3151         int loaned_buffers;
3152         caller_context_t ct;
3153         struct uio *uiop;
3154 
3155         DTRACE_NFSV4_2(op__read__start, struct compound_state *, cs,
3156             READ4args, args);
3157 
3158         vp = cs->vp;
3159         if (vp == NULL) {
3160                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
3161                 goto out;
3162         }
3163         if (cs->access == CS_ACCESS_DENIED) {
3164                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
3165                 goto out;
3166         }
3167 
3168         if ((stat = rfs4_check_stateid(FREAD, vp, &args->stateid, FALSE,
3169             deleg, TRUE, &ct)) != NFS4_OK) {
3170                 *cs->statusp = resp->status = stat;
3171                 goto out;
3172         }
3173 
3174         /*
3175          * Enter the critical region before calling VOP_RWLOCK
3176          * to avoid a deadlock with write requests.
3177          */
3178         if (nbl_need_check(vp)) {
3179                 nbl_start_crit(vp, RW_READER);
3180                 in_crit = 1;
3181                 if (nbl_conflict(vp, NBL_READ, args->offset, args->count, 0,
3182                     &ct)) {
3183                         *cs->statusp = resp->status = NFS4ERR_LOCKED;
3184                         goto out;
3185                 }
3186         }
3187 
3188         if (args->wlist) {
3189                 if (args->count > clist_len(args->wlist)) {
3190                         *cs->statusp = resp->status = NFS4ERR_INVAL;
3191                         goto out;
3192                 }
3193                 rdma_used = 1;
3194         }
3195 
3196         /* use loaned buffers for TCP */
3197         loaned_buffers = (nfs_loaned_buffers && !rdma_used) ? 1 : 0;
3198 
3199         va.va_mask = AT_MODE|AT_SIZE|AT_UID;
3200         verror = VOP_GETATTR(vp, &va, 0, cs->cr, &ct);
3201 
3202         /*
3203          * If we can't get the attributes, then we can't do the
3204          * right access checking.  So, we'll fail the request.
3205          */
3206         if (verror) {
3207                 *cs->statusp = resp->status = puterrno4(verror);
3208                 goto out;
3209         }
3210 
3211         if (vp->v_type != VREG) {
3212                 *cs->statusp = resp->status =
3213                     ((vp->v_type == VDIR) ? NFS4ERR_ISDIR : NFS4ERR_INVAL);
3214                 goto out;
3215         }
3216 
3217         if (crgetuid(cs->cr) != va.va_uid &&
3218             (error = VOP_ACCESS(vp, VREAD, 0, cs->cr, &ct)) &&
3219             (error = VOP_ACCESS(vp, VEXEC, 0, cs->cr, &ct))) {
3220                 *cs->statusp = resp->status = puterrno4(error);
3221                 goto out;
3222         }
3223 
3224         if (MANDLOCK(vp, va.va_mode)) { /* XXX - V4 supports mand locking */
3225                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
3226                 goto out;
3227         }
3228 
3229         offset = args->offset;
3230         if (offset >= va.va_size) {
3231                 *cs->statusp = resp->status = NFS4_OK;
3232                 resp->eof = TRUE;
3233                 resp->data_len = 0;
3234                 resp->data_val = NULL;
3235                 resp->mblk = NULL;
3236                 /* RDMA */
3237                 resp->wlist = args->wlist;
3238                 resp->wlist_len = resp->data_len;
3239                 *cs->statusp = resp->status = NFS4_OK;
3240                 if (resp->wlist)
3241                         clist_zero_len(resp->wlist);
3242                 goto out;
3243         }
3244 
3245         if (args->count == 0) {
3246                 *cs->statusp = resp->status = NFS4_OK;
3247                 resp->eof = FALSE;
3248                 resp->data_len = 0;
3249                 resp->data_val = NULL;
3250                 resp->mblk = NULL;
3251                 /* RDMA */
3252                 resp->wlist = args->wlist;
3253                 resp->wlist_len = resp->data_len;
3254                 if (resp->wlist)
3255                         clist_zero_len(resp->wlist);
3256                 goto out;
3257         }
3258 
3259         /*
3260          * Do not allocate memory more than maximum allowed
3261          * transfer size
3262          */
3263         if (args->count > rfs4_tsize(req))
3264                 args->count = rfs4_tsize(req);
3265 
3266         if (loaned_buffers) {
3267                 uiop = (uio_t *)rfs_setup_xuio(vp);
3268                 ASSERT(uiop != NULL);
3269                 uiop->uio_segflg = UIO_SYSSPACE;
3270                 uiop->uio_loffset = args->offset;
3271                 uiop->uio_resid = args->count;
3272 
3273                 /* Jump to do the read if successful */
3274                 if (!VOP_REQZCBUF(vp, UIO_READ, (xuio_t *)uiop, cs->cr, &ct)) {
3275                         /*
3276                          * Need to hold the vnode until after VOP_RETZCBUF()
3277                          * is called.
3278                          */
3279                         VN_HOLD(vp);
3280                         goto doio_read;
3281                 }
3282 
3283                 DTRACE_PROBE2(nfss__i__reqzcbuf_failed, int,
3284                     uiop->uio_loffset, int, uiop->uio_resid);
3285 
3286                 uiop->uio_extflg = 0;
3287 
3288                 /* failure to setup for zero copy */
3289                 rfs_free_xuio((void *)uiop);
3290                 loaned_buffers = 0;
3291         }
3292 
3293         /*
3294          * If returning data via RDMA Write, then grab the chunk list. If we
3295          * aren't returning READ data w/RDMA_WRITE, then grab a mblk.
3296          */
3297         if (rdma_used) {
3298                 mp = NULL;
3299                 (void) rdma_get_wchunk(req, &iov, args->wlist);
3300                 uio.uio_iov = &iov;
3301                 uio.uio_iovcnt = 1;
3302         } else {
3303                 /*
3304                  * mp will contain the data to be sent out in the read reply.
3305                  * It will be freed after the reply has been sent.
3306                  */
3307                 mp = rfs_read_alloc(args->count, &iovp, &iovcnt);
3308                 ASSERT(mp != NULL);
3309                 ASSERT(alloc_err == 0);
3310                 uio.uio_iov = iovp;
3311                 uio.uio_iovcnt = iovcnt;
3312         }
3313 
3314         uio.uio_segflg = UIO_SYSSPACE;
3315         uio.uio_extflg = UIO_COPY_CACHED;
3316         uio.uio_loffset = args->offset;
3317         uio.uio_resid = args->count;
3318         uiop = &uio;
3319 
3320 doio_read:
3321         error = do_io(FREAD, vp, uiop, 0, cs->cr, &ct);
3322 
3323         va.va_mask = AT_SIZE;
3324         verror = VOP_GETATTR(vp, &va, 0, cs->cr, &ct);
3325 
3326         if (error) {
3327                 if (mp)
3328                         freemsg(mp);
3329                 *cs->statusp = resp->status = puterrno4(error);
3330                 goto out;
3331         }
3332 
3333         /* make mblk using zc buffers */
3334         if (loaned_buffers) {
3335                 mp = uio_to_mblk(uiop);
3336                 ASSERT(mp != NULL);
3337         }
3338 
3339         *cs->statusp = resp->status = NFS4_OK;
3340 
3341         ASSERT(uiop->uio_resid >= 0);
3342         resp->data_len = args->count - uiop->uio_resid;
3343         if (mp) {
3344                 resp->data_val = (char *)mp->b_datap->db_base;
3345                 rfs_rndup_mblks(mp, resp->data_len, loaned_buffers);
3346         } else {
3347                 resp->data_val = (caddr_t)iov.iov_base;
3348         }
3349 
3350         resp->mblk = mp;
3351 
3352         if (!verror && offset + resp->data_len == va.va_size)
3353                 resp->eof = TRUE;
3354         else
3355                 resp->eof = FALSE;
3356 
3357         if (rdma_used) {
3358                 if (!rdma_setup_read_data4(args, resp)) {
3359                         *cs->statusp = resp->status = NFS4ERR_INVAL;
3360                 }
3361         } else {
3362                 resp->wlist = NULL;
3363         }
3364 
3365 out:
3366         if (in_crit)
3367                 nbl_end_crit(vp);
3368 
3369         if (iovp != NULL)
3370                 kmem_free(iovp, iovcnt * sizeof (struct iovec));
3371 
3372         DTRACE_NFSV4_2(op__read__done, struct compound_state *, cs,
3373             READ4res *, resp);
3374 }
3375 
3376 static void
3377 rfs4_op_read_free(nfs_resop4 *resop)
3378 {
3379         READ4res        *resp = &resop->nfs_resop4_u.opread;
3380 
3381         if (resp->status == NFS4_OK && resp->mblk != NULL) {
3382                 freemsg(resp->mblk);
3383                 resp->mblk = NULL;
3384                 resp->data_val = NULL;
3385                 resp->data_len = 0;
3386         }
3387 }
3388 
3389 static void
3390 rfs4_op_readdir_free(nfs_resop4 * resop)
3391 {
3392         READDIR4res    *resp = &resop->nfs_resop4_u.opreaddir;
3393 
3394         if (resp->status == NFS4_OK && resp->mblk != NULL) {
3395                 freeb(resp->mblk);
3396                 resp->mblk = NULL;
3397                 resp->data_len = 0;
3398         }
3399 }
3400 
3401 
3402 /* ARGSUSED */
3403 static void
3404 rfs4_op_putpubfh(nfs_argop4 *args, nfs_resop4 *resop, struct svc_req *req,
3405     struct compound_state *cs)
3406 {
3407         PUTPUBFH4res    *resp = &resop->nfs_resop4_u.opputpubfh;
3408         int             error;
3409         vnode_t         *vp;
3410         struct exportinfo *exi, *sav_exi;
3411         nfs_fh4_fmt_t   *fh_fmtp;
3412 
3413         DTRACE_NFSV4_1(op__putpubfh__start, struct compound_state *, cs);
3414 
3415         if (cs->vp) {
3416                 VN_RELE(cs->vp);
3417                 cs->vp = NULL;
3418         }
3419 
3420         if (cs->cr)
3421                 crfree(cs->cr);
3422 
3423         cs->cr = crdup(cs->basecr);
3424 
3425         vp = exi_public->exi_vp;
3426         if (vp == NULL) {
3427                 *cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
3428                 goto out;
3429         }
3430 
3431         error = makefh4(&cs->fh, vp, exi_public);
3432         if (error != 0) {
3433                 *cs->statusp = resp->status = puterrno4(error);
3434                 goto out;
3435         }
3436         sav_exi = cs->exi;
3437         if (exi_public == exi_root) {
3438                 /*
3439                  * No filesystem is actually shared public, so we default
3440                  * to exi_root. In this case, we must check whether root
3441                  * is exported.
3442                  */
3443                 fh_fmtp = (nfs_fh4_fmt_t *)cs->fh.nfs_fh4_val;
3444 
3445                 /*
3446                  * if root filesystem is exported, the exportinfo struct that we
3447                  * should use is what checkexport4 returns, because root_exi is
3448                  * actually a mostly empty struct.
3449                  */
3450                 exi = checkexport4(&fh_fmtp->fh4_fsid,
3451                     (fid_t *)&fh_fmtp->fh4_xlen, NULL);
3452                 cs->exi = ((exi != NULL) ? exi : exi_public);
3453         } else {
3454                 /*
3455                  * it's a properly shared filesystem
3456                  */
3457                 cs->exi = exi_public;
3458         }
3459 
3460         if (is_system_labeled()) {
3461                 bslabel_t *clabel;
3462 
3463                 ASSERT(req->rq_label != NULL);
3464                 clabel = req->rq_label;
3465                 DTRACE_PROBE2(tx__rfs4__log__info__opputpubfh__clabel, char *,
3466                     "got client label from request(1)",
3467                     struct svc_req *, req);
3468                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
3469                         if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
3470                             cs->exi)) {
3471                                 *cs->statusp = resp->status =
3472                                     NFS4ERR_SERVERFAULT;
3473                                 goto out;
3474                         }
3475                 }
3476         }
3477 
3478         VN_HOLD(vp);
3479         cs->vp = vp;
3480 
3481         if ((resp->status = call_checkauth4(cs, req)) != NFS4_OK) {
3482                 VN_RELE(cs->vp);
3483                 cs->vp = NULL;
3484                 cs->exi = sav_exi;
3485                 goto out;
3486         }
3487 
3488         *cs->statusp = resp->status = NFS4_OK;
3489 out:
3490         DTRACE_NFSV4_2(op__putpubfh__done, struct compound_state *, cs,
3491             PUTPUBFH4res *, resp);
3492 }
3493 
3494 /*
3495  * XXX - issue with put*fh operations. Suppose /export/home is exported.
3496  * Suppose an NFS client goes to mount /export/home/joe. If /export, home,
3497  * or joe have restrictive search permissions, then we shouldn't let
3498  * the client get a file handle. This is easy to enforce. However, we
3499  * don't know what security flavor should be used until we resolve the
3500  * path name. Another complication is uid mapping. If root is
3501  * the user, then it will be mapped to the anonymous user by default,
3502  * but we won't know that till we've resolved the path name. And we won't
3503  * know what the anonymous user is.
3504  * Luckily, SECINFO is specified to take a full filename.
3505  * So what we will have to in rfs4_op_lookup is check that flavor of
3506  * the target object matches that of the request, and if root was the
3507  * caller, check for the root= and anon= options, and if necessary,
3508  * repeat the lookup using the right cred_t. But that's not done yet.
3509  */
3510 /* ARGSUSED */
3511 static void
3512 rfs4_op_putfh(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
3513     struct compound_state *cs)
3514 {
3515         PUTFH4args *args = &argop->nfs_argop4_u.opputfh;
3516         PUTFH4res *resp = &resop->nfs_resop4_u.opputfh;
3517         nfs_fh4_fmt_t *fh_fmtp;
3518 
3519         DTRACE_NFSV4_2(op__putfh__start, struct compound_state *, cs,
3520             PUTFH4args *, args);
3521 
3522         if (cs->vp) {
3523                 VN_RELE(cs->vp);
3524                 cs->vp = NULL;
3525         }
3526 
3527         if (cs->cr) {
3528                 crfree(cs->cr);
3529                 cs->cr = NULL;
3530         }
3531 
3532 
3533         if (args->object.nfs_fh4_len < NFS_FH4_LEN) {
3534                 *cs->statusp = resp->status = NFS4ERR_BADHANDLE;
3535                 goto out;
3536         }
3537 
3538         fh_fmtp = (nfs_fh4_fmt_t *)args->object.nfs_fh4_val;
3539         cs->exi = checkexport4(&fh_fmtp->fh4_fsid, (fid_t *)&fh_fmtp->fh4_xlen,
3540             NULL);
3541 
3542         if (cs->exi == NULL) {
3543                 *cs->statusp = resp->status = NFS4ERR_STALE;
3544                 goto out;
3545         }
3546 
3547         cs->cr = crdup(cs->basecr);
3548 
3549         ASSERT(cs->cr != NULL);
3550 
3551         if (! (cs->vp = nfs4_fhtovp(&args->object, cs->exi, &resp->status))) {
3552                 *cs->statusp = resp->status;
3553                 goto out;
3554         }
3555 
3556         if ((resp->status = call_checkauth4(cs, req)) != NFS4_OK) {
3557                 VN_RELE(cs->vp);
3558                 cs->vp = NULL;
3559                 goto out;
3560         }
3561 
3562         nfs_fh4_copy(&args->object, &cs->fh);
3563         *cs->statusp = resp->status = NFS4_OK;
3564         cs->deleg = FALSE;
3565 
3566 out:
3567         DTRACE_NFSV4_2(op__putfh__done, struct compound_state *, cs,
3568             PUTFH4res *, resp);
3569 }
3570 
3571 /* ARGSUSED */
3572 static void
3573 rfs4_op_putrootfh(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
3574     struct compound_state *cs)
3575 {
3576         PUTROOTFH4res *resp = &resop->nfs_resop4_u.opputrootfh;
3577         int error;
3578         fid_t fid;
3579         struct exportinfo *exi, *sav_exi;
3580 
3581         DTRACE_NFSV4_1(op__putrootfh__start, struct compound_state *, cs);
3582 
3583         if (cs->vp) {
3584                 VN_RELE(cs->vp);
3585                 cs->vp = NULL;
3586         }
3587 
3588         if (cs->cr)
3589                 crfree(cs->cr);
3590 
3591         cs->cr = crdup(cs->basecr);
3592 
3593         /*
3594          * Using rootdir, the system root vnode,
3595          * get its fid.
3596          */
3597         bzero(&fid, sizeof (fid));
3598         fid.fid_len = MAXFIDSZ;
3599         error = vop_fid_pseudo(rootdir, &fid);
3600         if (error != 0) {
3601                 *cs->statusp = resp->status = puterrno4(error);
3602                 goto out;
3603         }
3604 
3605         /*
3606          * Then use the root fsid & fid it to find out if it's exported
3607          *
3608          * If the server root isn't exported directly, then
3609          * it should at least be a pseudo export based on
3610          * one or more exports further down in the server's
3611          * file tree.
3612          */
3613         exi = checkexport4(&rootdir->v_vfsp->vfs_fsid, &fid, NULL);
3614         if (exi == NULL || exi->exi_export.ex_flags & EX_PUBLIC) {
3615                 NFS4_DEBUG(rfs4_debug,
3616                     (CE_WARN, "rfs4_op_putrootfh: export check failure"));
3617                 *cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
3618                 goto out;
3619         }
3620 
3621         /*
3622          * Now make a filehandle based on the root
3623          * export and root vnode.
3624          */
3625         error = makefh4(&cs->fh, rootdir, exi);
3626         if (error != 0) {
3627                 *cs->statusp = resp->status = puterrno4(error);
3628                 goto out;
3629         }
3630 
3631         sav_exi = cs->exi;
3632         cs->exi = exi;
3633 
3634         VN_HOLD(rootdir);
3635         cs->vp = rootdir;
3636 
3637         if ((resp->status = call_checkauth4(cs, req)) != NFS4_OK) {
3638                 VN_RELE(rootdir);
3639                 cs->vp = NULL;
3640                 cs->exi = sav_exi;
3641                 goto out;
3642         }
3643 
3644         *cs->statusp = resp->status = NFS4_OK;
3645         cs->deleg = FALSE;
3646 out:
3647         DTRACE_NFSV4_2(op__putrootfh__done, struct compound_state *, cs,
3648             PUTROOTFH4res *, resp);
3649 }
3650 
3651 /*
3652  * set_rdattr_params sets up the variables used to manage what information
3653  * to get for each directory entry.
3654  */
3655 static nfsstat4
3656 set_rdattr_params(struct nfs4_svgetit_arg *sargp,
3657     bitmap4 attrs, bool_t *need_to_lookup)
3658 {
3659         uint_t  va_mask;
3660         nfsstat4 status;
3661         bitmap4 objbits;
3662 
3663         status = bitmap4_to_attrmask(attrs, sargp);
3664         if (status != NFS4_OK) {
3665                 /*
3666                  * could not even figure attr mask
3667                  */
3668                 return (status);
3669         }
3670         va_mask = sargp->vap->va_mask;
3671 
3672         /*
3673          * dirent's d_ino is always correct value for mounted_on_fileid.
3674          * mntdfid_set is set once here, but mounted_on_fileid is
3675          * set in main dirent processing loop for each dirent.
3676          * The mntdfid_set is a simple optimization that lets the
3677          * server attr code avoid work when caller is readdir.
3678          */
3679         sargp->mntdfid_set = TRUE;
3680 
3681         /*
3682          * Lookup entry only if client asked for any of the following:
3683          * a) vattr attrs
3684          * b) vfs attrs
3685          * c) attrs w/per-object scope requested (change, filehandle, etc)
3686          *    other than mounted_on_fileid (which we can take from dirent)
3687          */
3688         objbits = attrs ? attrs & NFS4_VP_ATTR_MASK : 0;
3689 
3690         if (va_mask || sargp->sbp || (objbits & ~FATTR4_MOUNTED_ON_FILEID_MASK))
3691                 *need_to_lookup = TRUE;
3692         else
3693                 *need_to_lookup = FALSE;
3694 
3695         if (sargp->sbp == NULL)
3696                 return (NFS4_OK);
3697 
3698         /*
3699          * If filesystem attrs are requested, get them now from the
3700          * directory vp, as most entries will have same filesystem. The only
3701          * exception are mounted over entries but we handle
3702          * those as we go (XXX mounted over detection not yet implemented).
3703          */
3704         sargp->vap->va_mask = 0;  /* to avoid VOP_GETATTR */
3705         status = bitmap4_get_sysattrs(sargp);
3706         sargp->vap->va_mask = va_mask;
3707 
3708         if ((status != NFS4_OK) && sargp->rdattr_error_req) {
3709                 /*
3710                  * Failed to get filesystem attributes.
3711                  * Return a rdattr_error for each entry, but don't fail.
3712                  * However, don't get any obj-dependent attrs.
3713                  */
3714                 sargp->rdattr_error = status;        /* for rdattr_error */
3715                 *need_to_lookup = FALSE;
3716                 /*
3717                  * At least get fileid for regular readdir output
3718                  */
3719                 sargp->vap->va_mask &= AT_NODEID;
3720                 status = NFS4_OK;
3721         }
3722 
3723         return (status);
3724 }
3725 
3726 /*
3727  * readlink: args: CURRENT_FH.
3728  *      res: status. If success - CURRENT_FH unchanged, return linktext.
3729  */
3730 
3731 /* ARGSUSED */
3732 static void
3733 rfs4_op_readlink(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
3734     struct compound_state *cs)
3735 {
3736         READLINK4res *resp = &resop->nfs_resop4_u.opreadlink;
3737         int error;
3738         vnode_t *vp;
3739         struct iovec iov;
3740         struct vattr va;
3741         struct uio uio;
3742         char *data;
3743         struct sockaddr *ca;
3744         char *name = NULL;
3745         int is_referral;
3746 
3747         DTRACE_NFSV4_1(op__readlink__start, struct compound_state *, cs);
3748 
3749         /* CURRENT_FH: directory */
3750         vp = cs->vp;
3751         if (vp == NULL) {
3752                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
3753                 goto out;
3754         }
3755 
3756         if (cs->access == CS_ACCESS_DENIED) {
3757                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
3758                 goto out;
3759         }
3760 
3761         /* Is it a referral? */
3762         if (vn_is_nfs_reparse(vp, cs->cr) && client_is_downrev(req)) {
3763 
3764                 is_referral = 1;
3765 
3766         } else {
3767 
3768                 is_referral = 0;
3769 
3770                 if (vp->v_type == VDIR) {
3771                         *cs->statusp = resp->status = NFS4ERR_ISDIR;
3772                         goto out;
3773                 }
3774 
3775                 if (vp->v_type != VLNK) {
3776                         *cs->statusp = resp->status = NFS4ERR_INVAL;
3777                         goto out;
3778                 }
3779 
3780         }
3781 
3782         va.va_mask = AT_MODE;
3783         error = VOP_GETATTR(vp, &va, 0, cs->cr, NULL);
3784         if (error) {
3785                 *cs->statusp = resp->status = puterrno4(error);
3786                 goto out;
3787         }
3788 
3789         if (MANDLOCK(vp, va.va_mode)) {
3790                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
3791                 goto out;
3792         }
3793 
3794         data = kmem_alloc(MAXPATHLEN + 1, KM_SLEEP);
3795 
3796         if (is_referral) {
3797                 char *s;
3798                 size_t strsz;
3799 
3800                 /* Get an artificial symlink based on a referral */
3801                 s = build_symlink(vp, cs->cr, &strsz);
3802                 global_svstat_ptr[4][NFS_REFERLINKS].value.ui64++;
3803                 DTRACE_PROBE2(nfs4serv__func__referral__reflink,
3804                     vnode_t *, vp, char *, s);
3805                 if (s == NULL)
3806                         error = EINVAL;
3807                 else {
3808                         error = 0;
3809                         (void) strlcpy(data, s, MAXPATHLEN + 1);
3810                         kmem_free(s, strsz);
3811                 }
3812 
3813         } else {
3814 
3815                 iov.iov_base = data;
3816                 iov.iov_len = MAXPATHLEN;
3817                 uio.uio_iov = &iov;
3818                 uio.uio_iovcnt = 1;
3819                 uio.uio_segflg = UIO_SYSSPACE;
3820                 uio.uio_extflg = UIO_COPY_CACHED;
3821                 uio.uio_loffset = 0;
3822                 uio.uio_resid = MAXPATHLEN;
3823 
3824                 error = VOP_READLINK(vp, &uio, cs->cr, NULL);
3825 
3826                 if (!error)
3827                         *(data + MAXPATHLEN - uio.uio_resid) = '\0';
3828         }
3829 
3830         if (error) {
3831                 kmem_free((caddr_t)data, (uint_t)MAXPATHLEN + 1);
3832                 *cs->statusp = resp->status = puterrno4(error);
3833                 goto out;
3834         }
3835 
3836         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
3837         name = nfscmd_convname(ca, cs->exi, data, NFSCMD_CONV_OUTBOUND,
3838             MAXPATHLEN  + 1);
3839 
3840         if (name == NULL) {
3841                 /*
3842                  * Even though the conversion failed, we return
3843                  * something. We just don't translate it.
3844                  */
3845                 name = data;
3846         }
3847 
3848         /*
3849          * treat link name as data
3850          */
3851         (void) str_to_utf8(name, (utf8string *)&resp->link);
3852 
3853         if (name != data)
3854                 kmem_free(name, MAXPATHLEN + 1);
3855         kmem_free((caddr_t)data, (uint_t)MAXPATHLEN + 1);
3856         *cs->statusp = resp->status = NFS4_OK;
3857 
3858 out:
3859         DTRACE_NFSV4_2(op__readlink__done, struct compound_state *, cs,
3860             READLINK4res *, resp);
3861 }
3862 
3863 static void
3864 rfs4_op_readlink_free(nfs_resop4 *resop)
3865 {
3866         READLINK4res *resp = &resop->nfs_resop4_u.opreadlink;
3867         utf8string *symlink = (utf8string *)&resp->link;
3868 
3869         if (symlink->utf8string_val) {
3870                 UTF8STRING_FREE(*symlink)
3871         }
3872 }
3873 
3874 /*
3875  * release_lockowner:
3876  *      Release any state associated with the supplied
3877  *      lockowner. Note if any lo_state is holding locks we will not
3878  *      rele that lo_state and thus the lockowner will not be destroyed.
3879  *      A client using lock after the lock owner stateid has been released
3880  *      will suffer the consequence of NFS4ERR_BAD_STATEID and would have
3881  *      to reissue the lock with new_lock_owner set to TRUE.
3882  *      args: lock_owner
3883  *      res:  status
3884  */
3885 /* ARGSUSED */
3886 static void
3887 rfs4_op_release_lockowner(nfs_argop4 *argop, nfs_resop4 *resop,
3888     struct svc_req *req, struct compound_state *cs)
3889 {
3890         RELEASE_LOCKOWNER4args *ap = &argop->nfs_argop4_u.oprelease_lockowner;
3891         RELEASE_LOCKOWNER4res *resp = &resop->nfs_resop4_u.oprelease_lockowner;
3892         rfs4_lockowner_t *lo;
3893         rfs4_openowner_t *oo;
3894         rfs4_state_t *sp;
3895         rfs4_lo_state_t *lsp;
3896         rfs4_client_t *cp;
3897         bool_t create = FALSE;
3898         locklist_t *llist;
3899         sysid_t sysid;
3900 
3901         DTRACE_NFSV4_2(op__release__lockowner__start, struct compound_state *,
3902             cs, RELEASE_LOCKOWNER4args *, ap);
3903 
3904         /* Make sure there is a clientid around for this request */
3905         cp = rfs4_findclient_by_id(ap->lock_owner.clientid, FALSE);
3906 
3907         if (cp == NULL) {
3908                 *cs->statusp = resp->status =
3909                     rfs4_check_clientid(&ap->lock_owner.clientid, 0);
3910                 goto out;
3911         }
3912         rfs4_client_rele(cp);
3913 
3914         lo = rfs4_findlockowner(&ap->lock_owner, &create);
3915         if (lo == NULL) {
3916                 *cs->statusp = resp->status = NFS4_OK;
3917                 goto out;
3918         }
3919         ASSERT(lo->rl_client != NULL);
3920 
3921         /*
3922          * Check for EXPIRED client. If so will reap state with in a lease
3923          * period or on next set_clientid_confirm step
3924          */
3925         if (rfs4_lease_expired(lo->rl_client)) {
3926                 rfs4_lockowner_rele(lo);
3927                 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
3928                 goto out;
3929         }
3930 
3931         /*
3932          * If no sysid has been assigned, then no locks exist; just return.
3933          */
3934         rfs4_dbe_lock(lo->rl_client->rc_dbe);
3935         if (lo->rl_client->rc_sysidt == LM_NOSYSID) {
3936                 rfs4_lockowner_rele(lo);
3937                 rfs4_dbe_unlock(lo->rl_client->rc_dbe);
3938                 goto out;
3939         }
3940 
3941         sysid = lo->rl_client->rc_sysidt;
3942         rfs4_dbe_unlock(lo->rl_client->rc_dbe);
3943 
3944         /*
3945          * Mark the lockowner invalid.
3946          */
3947         rfs4_dbe_hide(lo->rl_dbe);
3948 
3949         /*
3950          * sysid-pid pair should now not be used since the lockowner is
3951          * invalid. If the client were to instantiate the lockowner again
3952          * it would be assigned a new pid. Thus we can get the list of
3953          * current locks.
3954          */
3955 
3956         llist = flk_get_active_locks(sysid, lo->rl_pid);
3957         /* If we are still holding locks fail */
3958         if (llist != NULL) {
3959 
3960                 *cs->statusp = resp->status = NFS4ERR_LOCKS_HELD;
3961 
3962                 flk_free_locklist(llist);
3963                 /*
3964                  * We need to unhide the lockowner so the client can
3965                  * try it again. The bad thing here is if the client
3966                  * has a logic error that took it here in the first place
3967                  * he probably has lost accounting of the locks that it
3968                  * is holding. So we may have dangling state until the
3969                  * open owner state is reaped via close. One scenario
3970                  * that could possibly occur is that the client has
3971                  * sent the unlock request(s) in separate threads
3972                  * and has not waited for the replies before sending the
3973                  * RELEASE_LOCKOWNER request. Presumably, it would expect
3974                  * and deal appropriately with NFS4ERR_LOCKS_HELD, by
3975                  * reissuing the request.
3976                  */
3977                 rfs4_dbe_unhide(lo->rl_dbe);
3978                 rfs4_lockowner_rele(lo);
3979                 goto out;
3980         }
3981 
3982         /*
3983          * For the corresponding client we need to check each open
3984          * owner for any opens that have lockowner state associated
3985          * with this lockowner.
3986          */
3987 
3988         rfs4_dbe_lock(lo->rl_client->rc_dbe);
3989         for (oo = list_head(&lo->rl_client->rc_openownerlist); oo != NULL;
3990             oo = list_next(&lo->rl_client->rc_openownerlist, oo)) {
3991 
3992                 rfs4_dbe_lock(oo->ro_dbe);
3993                 for (sp = list_head(&oo->ro_statelist); sp != NULL;
3994                     sp = list_next(&oo->ro_statelist, sp)) {
3995 
3996                         rfs4_dbe_lock(sp->rs_dbe);
3997                         for (lsp = list_head(&sp->rs_lostatelist);
3998                             lsp != NULL;
3999                             lsp = list_next(&sp->rs_lostatelist, lsp)) {
4000                                 if (lsp->rls_locker == lo) {
4001                                         rfs4_dbe_lock(lsp->rls_dbe);
4002                                         rfs4_dbe_invalidate(lsp->rls_dbe);
4003                                         rfs4_dbe_unlock(lsp->rls_dbe);
4004                                 }
4005                         }
4006                         rfs4_dbe_unlock(sp->rs_dbe);
4007                 }
4008                 rfs4_dbe_unlock(oo->ro_dbe);
4009         }
4010         rfs4_dbe_unlock(lo->rl_client->rc_dbe);
4011 
4012         rfs4_lockowner_rele(lo);
4013 
4014         *cs->statusp = resp->status = NFS4_OK;
4015 
4016 out:
4017         DTRACE_NFSV4_2(op__release__lockowner__done, struct compound_state *,
4018             cs, RELEASE_LOCKOWNER4res *, resp);
4019 }
4020 
4021 /*
4022  * short utility function to lookup a file and recall the delegation
4023  */
4024 static rfs4_file_t *
4025 rfs4_lookup_and_findfile(vnode_t *dvp, char *nm, vnode_t **vpp,
4026     int *lkup_error, cred_t *cr)
4027 {
4028         vnode_t *vp;
4029         rfs4_file_t *fp = NULL;
4030         bool_t fcreate = FALSE;
4031         int error;
4032 
4033         if (vpp)
4034                 *vpp = NULL;
4035 
4036         if ((error = VOP_LOOKUP(dvp, nm, &vp, NULL, 0, NULL, cr, NULL, NULL,
4037             NULL)) == 0) {
4038                 if (vp->v_type == VREG)
4039                         fp = rfs4_findfile(vp, NULL, &fcreate);
4040                 if (vpp)
4041                         *vpp = vp;
4042                 else
4043                         VN_RELE(vp);
4044         }
4045 
4046         if (lkup_error)
4047                 *lkup_error = error;
4048 
4049         return (fp);
4050 }
4051 
4052 /*
4053  * remove: args: CURRENT_FH: directory; name.
4054  *      res: status. If success - CURRENT_FH unchanged, return change_info
4055  *              for directory.
4056  */
4057 /* ARGSUSED */
4058 static void
4059 rfs4_op_remove(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
4060     struct compound_state *cs)
4061 {
4062         REMOVE4args *args = &argop->nfs_argop4_u.opremove;
4063         REMOVE4res *resp = &resop->nfs_resop4_u.opremove;
4064         int error;
4065         vnode_t *dvp, *vp;
4066         struct vattr bdva, idva, adva;
4067         char *nm;
4068         uint_t len;
4069         rfs4_file_t *fp;
4070         int in_crit = 0;
4071         bslabel_t *clabel;
4072         struct sockaddr *ca;
4073         char *name = NULL;
4074         nfsstat4 status;
4075 
4076         DTRACE_NFSV4_2(op__remove__start, struct compound_state *, cs,
4077             REMOVE4args *, args);
4078 
4079         /* CURRENT_FH: directory */
4080         dvp = cs->vp;
4081         if (dvp == NULL) {
4082                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
4083                 goto out;
4084         }
4085 
4086         if (cs->access == CS_ACCESS_DENIED) {
4087                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
4088                 goto out;
4089         }
4090 
4091         /*
4092          * If there is an unshared filesystem mounted on this vnode,
4093          * Do not allow to remove anything in this directory.
4094          */
4095         if (vn_ismntpt(dvp)) {
4096                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
4097                 goto out;
4098         }
4099 
4100         if (dvp->v_type != VDIR) {
4101                 *cs->statusp = resp->status = NFS4ERR_NOTDIR;
4102                 goto out;
4103         }
4104 
4105         status = utf8_dir_verify(&args->target);
4106         if (status != NFS4_OK) {
4107                 *cs->statusp = resp->status = status;
4108                 goto out;
4109         }
4110 
4111         /*
4112          * Lookup the file so that we can check if it's a directory
4113          */
4114         nm = utf8_to_fn(&args->target, &len, NULL);
4115         if (nm == NULL) {
4116                 *cs->statusp = resp->status = NFS4ERR_INVAL;
4117                 goto out;
4118         }
4119 
4120         if (len > MAXNAMELEN) {
4121                 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
4122                 kmem_free(nm, len);
4123                 goto out;
4124         }
4125 
4126         if (rdonly4(req, cs)) {
4127                 *cs->statusp = resp->status = NFS4ERR_ROFS;
4128                 kmem_free(nm, len);
4129                 goto out;
4130         }
4131 
4132         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
4133         name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
4134             MAXPATHLEN  + 1);
4135 
4136         if (name == NULL) {
4137                 *cs->statusp = resp->status = NFS4ERR_INVAL;
4138                 kmem_free(nm, len);
4139                 goto out;
4140         }
4141 
4142         /*
4143          * Lookup the file to determine type and while we are see if
4144          * there is a file struct around and check for delegation.
4145          * We don't need to acquire va_seq before this lookup, if
4146          * it causes an update, cinfo.before will not match, which will
4147          * trigger a cache flush even if atomic is TRUE.
4148          */
4149         if (fp = rfs4_lookup_and_findfile(dvp, name, &vp, &error, cs->cr)) {
4150                 if (rfs4_check_delegated_byfp(FWRITE, fp, TRUE, TRUE, TRUE,
4151                     NULL)) {
4152                         VN_RELE(vp);
4153                         rfs4_file_rele(fp);
4154                         *cs->statusp = resp->status = NFS4ERR_DELAY;
4155                         if (nm != name)
4156                                 kmem_free(name, MAXPATHLEN + 1);
4157                         kmem_free(nm, len);
4158                         goto out;
4159                 }
4160         }
4161 
4162         /* Didn't find anything to remove */
4163         if (vp == NULL) {
4164                 *cs->statusp = resp->status = error;
4165                 if (nm != name)
4166                         kmem_free(name, MAXPATHLEN + 1);
4167                 kmem_free(nm, len);
4168                 goto out;
4169         }
4170 
4171         if (nbl_need_check(vp)) {
4172                 nbl_start_crit(vp, RW_READER);
4173                 in_crit = 1;
4174                 if (nbl_conflict(vp, NBL_REMOVE, 0, 0, 0, NULL)) {
4175                         *cs->statusp = resp->status = NFS4ERR_FILE_OPEN;
4176                         if (nm != name)
4177                                 kmem_free(name, MAXPATHLEN + 1);
4178                         kmem_free(nm, len);
4179                         nbl_end_crit(vp);
4180                         VN_RELE(vp);
4181                         if (fp) {
4182                                 rfs4_clear_dont_grant(fp);
4183                                 rfs4_file_rele(fp);
4184                         }
4185                         goto out;
4186                 }
4187         }
4188 
4189         /* check label before allowing removal */
4190         if (is_system_labeled()) {
4191                 ASSERT(req->rq_label != NULL);
4192                 clabel = req->rq_label;
4193                 DTRACE_PROBE2(tx__rfs4__log__info__opremove__clabel, char *,
4194                     "got client label from request(1)",
4195                     struct svc_req *, req);
4196                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
4197                         if (!do_rfs_label_check(clabel, vp, EQUALITY_CHECK,
4198                             cs->exi)) {
4199                                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
4200                                 if (name != nm)
4201                                         kmem_free(name, MAXPATHLEN + 1);
4202                                 kmem_free(nm, len);
4203                                 if (in_crit)
4204                                         nbl_end_crit(vp);
4205                                 VN_RELE(vp);
4206                                 if (fp) {
4207                                         rfs4_clear_dont_grant(fp);
4208                                         rfs4_file_rele(fp);
4209                                 }
4210                                 goto out;
4211                         }
4212                 }
4213         }
4214 
4215         /* Get dir "before" change value */
4216         bdva.va_mask = AT_CTIME|AT_SEQ;
4217         error = VOP_GETATTR(dvp, &bdva, 0, cs->cr, NULL);
4218         if (error) {
4219                 *cs->statusp = resp->status = puterrno4(error);
4220                 if (nm != name)
4221                         kmem_free(name, MAXPATHLEN + 1);
4222                 kmem_free(nm, len);
4223                 if (in_crit)
4224                         nbl_end_crit(vp);
4225                 VN_RELE(vp);
4226                 if (fp) {
4227                         rfs4_clear_dont_grant(fp);
4228                         rfs4_file_rele(fp);
4229                 }
4230                 goto out;
4231         }
4232         NFS4_SET_FATTR4_CHANGE(resp->cinfo.before, bdva.va_ctime)
4233 
4234         /* Actually do the REMOVE operation */
4235         if (vp->v_type == VDIR) {
4236                 /*
4237                  * Can't remove a directory that has a mounted-on filesystem.
4238                  */
4239                 if (vn_ismntpt(vp)) {
4240                         error = EACCES;
4241                 } else {
4242                         /*
4243                          * System V defines rmdir to return EEXIST,
4244                          * not ENOTEMPTY, if the directory is not
4245                          * empty.  A System V NFS server needs to map
4246                          * NFS4ERR_EXIST to NFS4ERR_NOTEMPTY to
4247                          * transmit over the wire.
4248                          */
4249                         if ((error = VOP_RMDIR(dvp, name, rootdir, cs->cr,
4250                             NULL, 0)) == EEXIST)
4251                                 error = ENOTEMPTY;
4252                 }
4253         } else {
4254                 if ((error = VOP_REMOVE(dvp, name, cs->cr, NULL, 0)) == 0 &&
4255                     fp != NULL) {
4256                         struct vattr va;
4257                         vnode_t *tvp;
4258 
4259                         rfs4_dbe_lock(fp->rf_dbe);
4260                         tvp = fp->rf_vp;
4261                         if (tvp)
4262                                 VN_HOLD(tvp);
4263                         rfs4_dbe_unlock(fp->rf_dbe);
4264 
4265                         if (tvp) {
4266                                 /*
4267                                  * This is va_seq safe because we are not
4268                                  * manipulating dvp.
4269                                  */
4270                                 va.va_mask = AT_NLINK;
4271                                 if (!VOP_GETATTR(tvp, &va, 0, cs->cr, NULL) &&
4272                                     va.va_nlink == 0) {
4273                                         /* Remove state on file remove */
4274                                         if (in_crit) {
4275                                                 nbl_end_crit(vp);
4276                                                 in_crit = 0;
4277                                         }
4278                                         rfs4_close_all_state(fp);
4279                                 }
4280                                 VN_RELE(tvp);
4281                         }
4282                 }
4283         }
4284 
4285         if (in_crit)
4286                 nbl_end_crit(vp);
4287         VN_RELE(vp);
4288 
4289         if (fp) {
4290                 rfs4_clear_dont_grant(fp);
4291                 rfs4_file_rele(fp);
4292         }
4293         if (nm != name)
4294                 kmem_free(name, MAXPATHLEN + 1);
4295         kmem_free(nm, len);
4296 
4297         if (error) {
4298                 *cs->statusp = resp->status = puterrno4(error);
4299                 goto out;
4300         }
4301 
4302         /*
4303          * Get the initial "after" sequence number, if it fails, set to zero
4304          */
4305         idva.va_mask = AT_SEQ;
4306         if (VOP_GETATTR(dvp, &idva, 0, cs->cr, NULL))
4307                 idva.va_seq = 0;
4308 
4309         /*
4310          * Force modified data and metadata out to stable storage.
4311          */
4312         (void) VOP_FSYNC(dvp, 0, cs->cr, NULL);
4313 
4314         /*
4315          * Get "after" change value, if it fails, simply return the
4316          * before value.
4317          */
4318         adva.va_mask = AT_CTIME|AT_SEQ;
4319         if (VOP_GETATTR(dvp, &adva, 0, cs->cr, NULL)) {
4320                 adva.va_ctime = bdva.va_ctime;
4321                 adva.va_seq = 0;
4322         }
4323 
4324         NFS4_SET_FATTR4_CHANGE(resp->cinfo.after, adva.va_ctime)
4325 
4326         /*
4327          * The cinfo.atomic = TRUE only if we have
4328          * non-zero va_seq's, and it has incremented by exactly one
4329          * during the VOP_REMOVE/RMDIR and it didn't change during
4330          * the VOP_FSYNC.
4331          */
4332         if (bdva.va_seq && idva.va_seq && adva.va_seq &&
4333             idva.va_seq == (bdva.va_seq + 1) && idva.va_seq == adva.va_seq)
4334                 resp->cinfo.atomic = TRUE;
4335         else
4336                 resp->cinfo.atomic = FALSE;
4337 
4338         *cs->statusp = resp->status = NFS4_OK;
4339 
4340 out:
4341         DTRACE_NFSV4_2(op__remove__done, struct compound_state *, cs,
4342             REMOVE4res *, resp);
4343 }
4344 
4345 /*
4346  * rename: args: SAVED_FH: from directory, CURRENT_FH: target directory,
4347  *              oldname and newname.
4348  *      res: status. If success - CURRENT_FH unchanged, return change_info
4349  *              for both from and target directories.
4350  */
4351 /* ARGSUSED */
4352 static void
4353 rfs4_op_rename(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
4354     struct compound_state *cs)
4355 {
4356         RENAME4args *args = &argop->nfs_argop4_u.oprename;
4357         RENAME4res *resp = &resop->nfs_resop4_u.oprename;
4358         int error;
4359         vnode_t *odvp;
4360         vnode_t *ndvp;
4361         vnode_t *srcvp, *targvp;
4362         struct vattr obdva, oidva, oadva;
4363         struct vattr nbdva, nidva, nadva;
4364         char *onm, *nnm;
4365         uint_t olen, nlen;
4366         rfs4_file_t *fp, *sfp;
4367         int in_crit_src, in_crit_targ;
4368         int fp_rele_grant_hold, sfp_rele_grant_hold;
4369         bslabel_t *clabel;
4370         struct sockaddr *ca;
4371         char *converted_onm = NULL;
4372         char *converted_nnm = NULL;
4373         nfsstat4 status;
4374 
4375         DTRACE_NFSV4_2(op__rename__start, struct compound_state *, cs,
4376             RENAME4args *, args);
4377 
4378         fp = sfp = NULL;
4379         srcvp = targvp = NULL;
4380         in_crit_src = in_crit_targ = 0;
4381         fp_rele_grant_hold = sfp_rele_grant_hold = 0;
4382 
4383         /* CURRENT_FH: target directory */
4384         ndvp = cs->vp;
4385         if (ndvp == NULL) {
4386                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
4387                 goto out;
4388         }
4389 
4390         /* SAVED_FH: from directory */
4391         odvp = cs->saved_vp;
4392         if (odvp == NULL) {
4393                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
4394                 goto out;
4395         }
4396 
4397         if (cs->access == CS_ACCESS_DENIED) {
4398                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
4399                 goto out;
4400         }
4401 
4402         /*
4403          * If there is an unshared filesystem mounted on this vnode,
4404          * do not allow to rename objects in this directory.
4405          */
4406         if (vn_ismntpt(odvp)) {
4407                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
4408                 goto out;
4409         }
4410 
4411         /*
4412          * If there is an unshared filesystem mounted on this vnode,
4413          * do not allow to rename to this directory.
4414          */
4415         if (vn_ismntpt(ndvp)) {
4416                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
4417                 goto out;
4418         }
4419 
4420         if (odvp->v_type != VDIR || ndvp->v_type != VDIR) {
4421                 *cs->statusp = resp->status = NFS4ERR_NOTDIR;
4422                 goto out;
4423         }
4424 
4425         if (cs->saved_exi != cs->exi) {
4426                 *cs->statusp = resp->status = NFS4ERR_XDEV;
4427                 goto out;
4428         }
4429 
4430         status = utf8_dir_verify(&args->oldname);
4431         if (status != NFS4_OK) {
4432                 *cs->statusp = resp->status = status;
4433                 goto out;
4434         }
4435 
4436         status = utf8_dir_verify(&args->newname);
4437         if (status != NFS4_OK) {
4438                 *cs->statusp = resp->status = status;
4439                 goto out;
4440         }
4441 
4442         onm = utf8_to_fn(&args->oldname, &olen, NULL);
4443         if (onm == NULL) {
4444                 *cs->statusp = resp->status = NFS4ERR_INVAL;
4445                 goto out;
4446         }
4447         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
4448         nlen = MAXPATHLEN + 1;
4449         converted_onm = nfscmd_convname(ca, cs->exi, onm, NFSCMD_CONV_INBOUND,
4450             nlen);
4451 
4452         if (converted_onm == NULL) {
4453                 *cs->statusp = resp->status = NFS4ERR_INVAL;
4454                 kmem_free(onm, olen);
4455                 goto out;
4456         }
4457 
4458         nnm = utf8_to_fn(&args->newname, &nlen, NULL);
4459         if (nnm == NULL) {
4460                 *cs->statusp = resp->status = NFS4ERR_INVAL;
4461                 if (onm != converted_onm)
4462                         kmem_free(converted_onm, MAXPATHLEN + 1);
4463                 kmem_free(onm, olen);
4464                 goto out;
4465         }
4466         converted_nnm = nfscmd_convname(ca, cs->exi, nnm, NFSCMD_CONV_INBOUND,
4467             MAXPATHLEN  + 1);
4468 
4469         if (converted_nnm == NULL) {
4470                 *cs->statusp = resp->status = NFS4ERR_INVAL;
4471                 kmem_free(nnm, nlen);
4472                 nnm = NULL;
4473                 if (onm != converted_onm)
4474                         kmem_free(converted_onm, MAXPATHLEN + 1);
4475                 kmem_free(onm, olen);
4476                 goto out;
4477         }
4478 
4479 
4480         if (olen > MAXNAMELEN || nlen > MAXNAMELEN) {
4481                 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
4482                 kmem_free(onm, olen);
4483                 kmem_free(nnm, nlen);
4484                 goto out;
4485         }
4486 
4487 
4488         if (rdonly4(req, cs)) {
4489                 *cs->statusp = resp->status = NFS4ERR_ROFS;
4490                 if (onm != converted_onm)
4491                         kmem_free(converted_onm, MAXPATHLEN + 1);
4492                 kmem_free(onm, olen);
4493                 if (nnm != converted_nnm)
4494                         kmem_free(converted_nnm, MAXPATHLEN + 1);
4495                 kmem_free(nnm, nlen);
4496                 goto out;
4497         }
4498 
4499         /* check label of the target dir */
4500         if (is_system_labeled()) {
4501                 ASSERT(req->rq_label != NULL);
4502                 clabel = req->rq_label;
4503                 DTRACE_PROBE2(tx__rfs4__log__info__oprename__clabel, char *,
4504                     "got client label from request(1)",
4505                     struct svc_req *, req);
4506                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
4507                         if (!do_rfs_label_check(clabel, ndvp,
4508                             EQUALITY_CHECK, cs->exi)) {
4509                                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
4510                                 goto err_out;
4511                         }
4512                 }
4513         }
4514 
4515         /*
4516          * Is the source a file and have a delegation?
4517          * We don't need to acquire va_seq before these lookups, if
4518          * it causes an update, cinfo.before will not match, which will
4519          * trigger a cache flush even if atomic is TRUE.
4520          */
4521         if (sfp = rfs4_lookup_and_findfile(odvp, converted_onm, &srcvp,
4522             &error, cs->cr)) {
4523                 if (rfs4_check_delegated_byfp(FWRITE, sfp, TRUE, TRUE, TRUE,
4524                     NULL)) {
4525                         *cs->statusp = resp->status = NFS4ERR_DELAY;
4526                         goto err_out;
4527                 }
4528         }
4529 
4530         if (srcvp == NULL) {
4531                 *cs->statusp = resp->status = puterrno4(error);
4532                 if (onm != converted_onm)
4533                         kmem_free(converted_onm, MAXPATHLEN + 1);
4534                 kmem_free(onm, olen);
4535                 if (nnm != converted_nnm)
4536                         kmem_free(converted_nnm, MAXPATHLEN + 1);
4537                 kmem_free(nnm, nlen);
4538                 goto out;
4539         }
4540 
4541         sfp_rele_grant_hold = 1;
4542 
4543         /* Does the destination exist and a file and have a delegation? */
4544         if (fp = rfs4_lookup_and_findfile(ndvp, converted_nnm, &targvp,
4545             NULL, cs->cr)) {
4546                 if (rfs4_check_delegated_byfp(FWRITE, fp, TRUE, TRUE, TRUE,
4547                     NULL)) {
4548                         *cs->statusp = resp->status = NFS4ERR_DELAY;
4549                         goto err_out;
4550                 }
4551         }
4552         fp_rele_grant_hold = 1;
4553 
4554 
4555         /* Check for NBMAND lock on both source and target */
4556         if (nbl_need_check(srcvp)) {
4557                 nbl_start_crit(srcvp, RW_READER);
4558                 in_crit_src = 1;
4559                 if (nbl_conflict(srcvp, NBL_RENAME, 0, 0, 0, NULL)) {
4560                         *cs->statusp = resp->status = NFS4ERR_FILE_OPEN;
4561                         goto err_out;
4562                 }
4563         }
4564 
4565         if (targvp && nbl_need_check(targvp)) {
4566                 nbl_start_crit(targvp, RW_READER);
4567                 in_crit_targ = 1;
4568                 if (nbl_conflict(targvp, NBL_REMOVE, 0, 0, 0, NULL)) {
4569                         *cs->statusp = resp->status = NFS4ERR_FILE_OPEN;
4570                         goto err_out;
4571                 }
4572         }
4573 
4574         /* Get source "before" change value */
4575         obdva.va_mask = AT_CTIME|AT_SEQ;
4576         error = VOP_GETATTR(odvp, &obdva, 0, cs->cr, NULL);
4577         if (!error) {
4578                 nbdva.va_mask = AT_CTIME|AT_SEQ;
4579                 error = VOP_GETATTR(ndvp, &nbdva, 0, cs->cr, NULL);
4580         }
4581         if (error) {
4582                 *cs->statusp = resp->status = puterrno4(error);
4583                 goto err_out;
4584         }
4585 
4586         NFS4_SET_FATTR4_CHANGE(resp->source_cinfo.before, obdva.va_ctime)
4587         NFS4_SET_FATTR4_CHANGE(resp->target_cinfo.before, nbdva.va_ctime)
4588 
4589         if ((error = VOP_RENAME(odvp, converted_onm, ndvp, converted_nnm,
4590             cs->cr, NULL, 0)) == 0 && fp != NULL) {
4591                 struct vattr va;
4592                 vnode_t *tvp;
4593 
4594                 rfs4_dbe_lock(fp->rf_dbe);
4595                 tvp = fp->rf_vp;
4596                 if (tvp)
4597                         VN_HOLD(tvp);
4598                 rfs4_dbe_unlock(fp->rf_dbe);
4599 
4600                 if (tvp) {
4601                         va.va_mask = AT_NLINK;
4602                         if (!VOP_GETATTR(tvp, &va, 0, cs->cr, NULL) &&
4603                             va.va_nlink == 0) {
4604                                 /* The file is gone and so should the state */
4605                                 if (in_crit_targ) {
4606                                         nbl_end_crit(targvp);
4607                                         in_crit_targ = 0;
4608                                 }
4609                                 rfs4_close_all_state(fp);
4610                         }
4611                         VN_RELE(tvp);
4612                 }
4613         }
4614         if (error == 0)
4615                 vn_renamepath(ndvp, srcvp, nnm, nlen - 1);
4616 
4617         if (in_crit_src)
4618                 nbl_end_crit(srcvp);
4619         if (srcvp)
4620                 VN_RELE(srcvp);
4621         if (in_crit_targ)
4622                 nbl_end_crit(targvp);
4623         if (targvp)
4624                 VN_RELE(targvp);
4625 
4626         if (sfp) {
4627                 rfs4_clear_dont_grant(sfp);
4628                 rfs4_file_rele(sfp);
4629         }
4630         if (fp) {
4631                 rfs4_clear_dont_grant(fp);
4632                 rfs4_file_rele(fp);
4633         }
4634 
4635         if (converted_onm != onm)
4636                 kmem_free(converted_onm, MAXPATHLEN + 1);
4637         kmem_free(onm, olen);
4638         if (converted_nnm != nnm)
4639                 kmem_free(converted_nnm, MAXPATHLEN + 1);
4640         kmem_free(nnm, nlen);
4641 
4642         /*
4643          * Get the initial "after" sequence number, if it fails, set to zero
4644          */
4645         oidva.va_mask = AT_SEQ;
4646         if (VOP_GETATTR(odvp, &oidva, 0, cs->cr, NULL))
4647                 oidva.va_seq = 0;
4648 
4649         nidva.va_mask = AT_SEQ;
4650         if (VOP_GETATTR(ndvp, &nidva, 0, cs->cr, NULL))
4651                 nidva.va_seq = 0;
4652 
4653         /*
4654          * Force modified data and metadata out to stable storage.
4655          */
4656         (void) VOP_FSYNC(odvp, 0, cs->cr, NULL);
4657         (void) VOP_FSYNC(ndvp, 0, cs->cr, NULL);
4658 
4659         if (error) {
4660                 *cs->statusp = resp->status = puterrno4(error);
4661                 goto out;
4662         }
4663 
4664         /*
4665          * Get "after" change values, if it fails, simply return the
4666          * before value.
4667          */
4668         oadva.va_mask = AT_CTIME|AT_SEQ;
4669         if (VOP_GETATTR(odvp, &oadva, 0, cs->cr, NULL)) {
4670                 oadva.va_ctime = obdva.va_ctime;
4671                 oadva.va_seq = 0;
4672         }
4673 
4674         nadva.va_mask = AT_CTIME|AT_SEQ;
4675         if (VOP_GETATTR(odvp, &nadva, 0, cs->cr, NULL)) {
4676                 nadva.va_ctime = nbdva.va_ctime;
4677                 nadva.va_seq = 0;
4678         }
4679 
4680         NFS4_SET_FATTR4_CHANGE(resp->source_cinfo.after, oadva.va_ctime)
4681         NFS4_SET_FATTR4_CHANGE(resp->target_cinfo.after, nadva.va_ctime)
4682 
4683         /*
4684          * The cinfo.atomic = TRUE only if we have
4685          * non-zero va_seq's, and it has incremented by exactly one
4686          * during the VOP_RENAME and it didn't change during the VOP_FSYNC.
4687          */
4688         if (obdva.va_seq && oidva.va_seq && oadva.va_seq &&
4689             oidva.va_seq == (obdva.va_seq + 1) && oidva.va_seq == oadva.va_seq)
4690                 resp->source_cinfo.atomic = TRUE;
4691         else
4692                 resp->source_cinfo.atomic = FALSE;
4693 
4694         if (nbdva.va_seq && nidva.va_seq && nadva.va_seq &&
4695             nidva.va_seq == (nbdva.va_seq + 1) && nidva.va_seq == nadva.va_seq)
4696                 resp->target_cinfo.atomic = TRUE;
4697         else
4698                 resp->target_cinfo.atomic = FALSE;
4699 
4700 #ifdef  VOLATILE_FH_TEST
4701         {
4702         extern void add_volrnm_fh(struct exportinfo *, vnode_t *);
4703 
4704         /*
4705          * Add the renamed file handle to the volatile rename list
4706          */
4707         if (cs->exi->exi_export.ex_flags & EX_VOLRNM) {
4708                 /* file handles may expire on rename */
4709                 vnode_t *vp;
4710 
4711                 nnm = utf8_to_fn(&args->newname, &nlen, NULL);
4712                 /*
4713                  * Already know that nnm will be a valid string
4714                  */
4715                 error = VOP_LOOKUP(ndvp, nnm, &vp, NULL, 0, NULL, cs->cr,
4716                     NULL, NULL, NULL);
4717                 kmem_free(nnm, nlen);
4718                 if (!error) {
4719                         add_volrnm_fh(cs->exi, vp);
4720                         VN_RELE(vp);
4721                 }
4722         }
4723         }
4724 #endif  /* VOLATILE_FH_TEST */
4725 
4726         *cs->statusp = resp->status = NFS4_OK;
4727 out:
4728         DTRACE_NFSV4_2(op__rename__done, struct compound_state *, cs,
4729             RENAME4res *, resp);
4730         return;
4731 
4732 err_out:
4733         if (onm != converted_onm)
4734                 kmem_free(converted_onm, MAXPATHLEN + 1);
4735         if (onm != NULL)
4736                 kmem_free(onm, olen);
4737         if (nnm != converted_nnm)
4738                 kmem_free(converted_nnm, MAXPATHLEN + 1);
4739         if (nnm != NULL)
4740                 kmem_free(nnm, nlen);
4741 
4742         if (in_crit_src) nbl_end_crit(srcvp);
4743         if (in_crit_targ) nbl_end_crit(targvp);
4744         if (targvp) VN_RELE(targvp);
4745         if (srcvp) VN_RELE(srcvp);
4746         if (sfp) {
4747                 if (sfp_rele_grant_hold) rfs4_clear_dont_grant(sfp);
4748                 rfs4_file_rele(sfp);
4749         }
4750         if (fp) {
4751                 if (fp_rele_grant_hold) rfs4_clear_dont_grant(fp);
4752                 rfs4_file_rele(fp);
4753         }
4754 
4755         DTRACE_NFSV4_2(op__rename__done, struct compound_state *, cs,
4756             RENAME4res *, resp);
4757 }
4758 
4759 /* ARGSUSED */
4760 static void
4761 rfs4_op_renew(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
4762     struct compound_state *cs)
4763 {
4764         RENEW4args *args = &argop->nfs_argop4_u.oprenew;
4765         RENEW4res *resp = &resop->nfs_resop4_u.oprenew;
4766         rfs4_client_t *cp;
4767 
4768         DTRACE_NFSV4_2(op__renew__start, struct compound_state *, cs,
4769             RENEW4args *, args);
4770 
4771         if ((cp = rfs4_findclient_by_id(args->clientid, FALSE)) == NULL) {
4772                 *cs->statusp = resp->status =
4773                     rfs4_check_clientid(&args->clientid, 0);
4774                 goto out;
4775         }
4776 
4777         if (rfs4_lease_expired(cp)) {
4778                 rfs4_client_rele(cp);
4779                 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
4780                 goto out;
4781         }
4782 
4783         rfs4_update_lease(cp);
4784 
4785         mutex_enter(cp->rc_cbinfo.cb_lock);
4786         if (cp->rc_cbinfo.cb_notified_of_cb_path_down == FALSE) {
4787                 cp->rc_cbinfo.cb_notified_of_cb_path_down = TRUE;
4788                 *cs->statusp = resp->status = NFS4ERR_CB_PATH_DOWN;
4789         } else {
4790                 *cs->statusp = resp->status = NFS4_OK;
4791         }
4792         mutex_exit(cp->rc_cbinfo.cb_lock);
4793 
4794         rfs4_client_rele(cp);
4795 
4796 out:
4797         DTRACE_NFSV4_2(op__renew__done, struct compound_state *, cs,
4798             RENEW4res *, resp);
4799 }
4800 
4801 /* ARGSUSED */
4802 static void
4803 rfs4_op_restorefh(nfs_argop4 *args, nfs_resop4 *resop, struct svc_req *req,
4804     struct compound_state *cs)
4805 {
4806         RESTOREFH4res *resp = &resop->nfs_resop4_u.oprestorefh;
4807 
4808         DTRACE_NFSV4_1(op__restorefh__start, struct compound_state *, cs);
4809 
4810         /* No need to check cs->access - we are not accessing any object */
4811         if ((cs->saved_vp == NULL) || (cs->saved_fh.nfs_fh4_val == NULL)) {
4812                 *cs->statusp = resp->status = NFS4ERR_RESTOREFH;
4813                 goto out;
4814         }
4815         if (cs->vp != NULL) {
4816                 VN_RELE(cs->vp);
4817         }
4818         cs->vp = cs->saved_vp;
4819         cs->saved_vp = NULL;
4820         cs->exi = cs->saved_exi;
4821         nfs_fh4_copy(&cs->saved_fh, &cs->fh);
4822         *cs->statusp = resp->status = NFS4_OK;
4823         cs->deleg = FALSE;
4824 
4825 out:
4826         DTRACE_NFSV4_2(op__restorefh__done, struct compound_state *, cs,
4827             RESTOREFH4res *, resp);
4828 }
4829 
4830 /* ARGSUSED */
4831 static void
4832 rfs4_op_savefh(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
4833     struct compound_state *cs)
4834 {
4835         SAVEFH4res *resp = &resop->nfs_resop4_u.opsavefh;
4836 
4837         DTRACE_NFSV4_1(op__savefh__start, struct compound_state *, cs);
4838 
4839         /* No need to check cs->access - we are not accessing any object */
4840         if (cs->vp == NULL) {
4841                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
4842                 goto out;
4843         }
4844         if (cs->saved_vp != NULL) {
4845                 VN_RELE(cs->saved_vp);
4846         }
4847         cs->saved_vp = cs->vp;
4848         VN_HOLD(cs->saved_vp);
4849         cs->saved_exi = cs->exi;
4850         /*
4851          * since SAVEFH is fairly rare, don't alloc space for its fh
4852          * unless necessary.
4853          */
4854         if (cs->saved_fh.nfs_fh4_val == NULL) {
4855                 cs->saved_fh.nfs_fh4_val = kmem_alloc(NFS4_FHSIZE, KM_SLEEP);
4856         }
4857         nfs_fh4_copy(&cs->fh, &cs->saved_fh);
4858         *cs->statusp = resp->status = NFS4_OK;
4859 
4860 out:
4861         DTRACE_NFSV4_2(op__savefh__done, struct compound_state *, cs,
4862             SAVEFH4res *, resp);
4863 }
4864 
4865 /*
4866  * rfs4_verify_attr is called when nfsv4 Setattr failed, but we wish to
4867  * return the bitmap of attrs that were set successfully. It is also
4868  * called by Verify/Nverify to test the vattr/vfsstat attrs. It should
4869  * always be called only after rfs4_do_set_attrs().
4870  *
4871  * Verify that the attributes are same as the expected ones. sargp->vap
4872  * and sargp->sbp contain the input attributes as translated from fattr4.
4873  *
4874  * This function verifies only the attrs that correspond to a vattr or
4875  * vfsstat struct. That is because of the extra step needed to get the
4876  * corresponding system structs. Other attributes have already been set or
4877  * verified by do_rfs4_set_attrs.
4878  *
4879  * Return 0 if all attrs match, -1 if some don't, error if error processing.
4880  */
4881 static int
4882 rfs4_verify_attr(struct nfs4_svgetit_arg *sargp,
4883     bitmap4 *resp, struct nfs4_ntov_table *ntovp)
4884 {
4885         int error, ret_error = 0;
4886         int i, k;
4887         uint_t sva_mask = sargp->vap->va_mask;
4888         uint_t vbit;
4889         union nfs4_attr_u *na;
4890         uint8_t *amap;
4891         bool_t getsb = ntovp->vfsstat;
4892 
4893         if (sva_mask != 0) {
4894                 /*
4895                  * Okay to overwrite sargp->vap because we verify based
4896                  * on the incoming values.
4897                  */
4898                 ret_error = VOP_GETATTR(sargp->cs->vp, sargp->vap, 0,
4899                     sargp->cs->cr, NULL);
4900                 if (ret_error) {
4901                         if (resp == NULL)
4902                                 return (ret_error);
4903                         /*
4904                          * Must return bitmap of successful attrs
4905                          */
4906                         sva_mask = 0;   /* to prevent checking vap later */
4907                 } else {
4908                         /*
4909                          * Some file systems clobber va_mask. it is probably
4910                          * wrong of them to do so, nonethless we practice
4911                          * defensive coding.
4912                          * See bug id 4276830.
4913                          */
4914                         sargp->vap->va_mask = sva_mask;
4915                 }
4916         }
4917 
4918         if (getsb) {
4919                 /*
4920                  * Now get the superblock and loop on the bitmap, as there is
4921                  * no simple way of translating from superblock to bitmap4.
4922                  */
4923                 ret_error = VFS_STATVFS(sargp->cs->vp->v_vfsp, sargp->sbp);
4924                 if (ret_error) {
4925                         if (resp == NULL)
4926                                 goto errout;
4927                         getsb = FALSE;
4928                 }
4929         }
4930 
4931         /*
4932          * Now loop and verify each attribute which getattr returned
4933          * whether it's the same as the input.
4934          */
4935         if (resp == NULL && !getsb && (sva_mask == 0))
4936                 goto errout;
4937 
4938         na = ntovp->na;
4939         amap = ntovp->amap;
4940         k = 0;
4941         for (i = 0; i < ntovp->attrcnt; i++, na++, amap++) {
4942                 k = *amap;
4943                 ASSERT(nfs4_ntov_map[k].nval == k);
4944                 vbit = nfs4_ntov_map[k].vbit;
4945 
4946                 /*
4947                  * If vattr attribute but VOP_GETATTR failed, or it's
4948                  * superblock attribute but VFS_STATVFS failed, skip
4949                  */
4950                 if (vbit) {
4951                         if ((vbit & sva_mask) == 0)
4952                                 continue;
4953                 } else if (!(getsb && nfs4_ntov_map[k].vfsstat)) {
4954                         continue;
4955                 }
4956                 error = (*nfs4_ntov_map[k].sv_getit)(NFS4ATTR_VERIT, sargp, na);
4957                 if (resp != NULL) {
4958                         if (error)
4959                                 ret_error = -1; /* not all match */
4960                         else    /* update response bitmap */
4961                                 *resp |= nfs4_ntov_map[k].fbit;
4962                         continue;
4963                 }
4964                 if (error) {
4965                         ret_error = -1; /* not all match */
4966                         break;
4967                 }
4968         }
4969 errout:
4970         return (ret_error);
4971 }
4972 
4973 /*
4974  * Decode the attribute to be set/verified. If the attr requires a sys op
4975  * (VOP_GETATTR, VFS_VFSSTAT), and the request is to verify, then don't
4976  * call the sv_getit function for it, because the sys op hasn't yet been done.
4977  * Return 0 for success, error code if failed.
4978  *
4979  * Note: the decoded arg is not freed here but in nfs4_ntov_table_free.
4980  */
4981 static int
4982 decode_fattr4_attr(nfs4_attr_cmd_t cmd, struct nfs4_svgetit_arg *sargp,
4983     int k, XDR *xdrp, bitmap4 *resp_bval, union nfs4_attr_u *nap)
4984 {
4985         int error = 0;
4986         bool_t set_later;
4987 
4988         sargp->vap->va_mask |= nfs4_ntov_map[k].vbit;
4989 
4990         if ((*nfs4_ntov_map[k].xfunc)(xdrp, nap)) {
4991                 set_later = nfs4_ntov_map[k].vbit || nfs4_ntov_map[k].vfsstat;
4992                 /*
4993                  * don't verify yet if a vattr or sb dependent attr,
4994                  * because we don't have their sys values yet.
4995                  * Will be done later.
4996                  */
4997                 if (! (set_later && (cmd == NFS4ATTR_VERIT))) {
4998                         /*
4999                          * ACLs are a special case, since setting the MODE
5000                          * conflicts with setting the ACL.  We delay setting
5001                          * the ACL until all other attributes have been set.
5002                          * The ACL gets set in do_rfs4_op_setattr().
5003                          */
5004                         if (nfs4_ntov_map[k].fbit != FATTR4_ACL_MASK) {
5005                                 error = (*nfs4_ntov_map[k].sv_getit)(cmd,
5006                                     sargp, nap);
5007                                 if (error) {
5008                                         xdr_free(nfs4_ntov_map[k].xfunc,
5009                                             (caddr_t)nap);
5010                                 }
5011                         }
5012                 }
5013         } else {
5014 #ifdef  DEBUG
5015                 cmn_err(CE_NOTE, "decode_fattr4_attr: error "
5016                     "decoding attribute %d\n", k);
5017 #endif
5018                 error = EINVAL;
5019         }
5020         if (!error && resp_bval && !set_later) {
5021                 *resp_bval |= nfs4_ntov_map[k].fbit;
5022         }
5023 
5024         return (error);
5025 }
5026 
5027 /*
5028  * Set vattr based on incoming fattr4 attrs - used by setattr.
5029  * Set response mask. Ignore any values that are not writable vattr attrs.
5030  */
5031 static nfsstat4
5032 do_rfs4_set_attrs(bitmap4 *resp, fattr4 *fattrp, struct compound_state *cs,
5033     struct nfs4_svgetit_arg *sargp, struct nfs4_ntov_table *ntovp,
5034     nfs4_attr_cmd_t cmd)
5035 {
5036         int error = 0;
5037         int i;
5038         char *attrs = fattrp->attrlist4;
5039         uint32_t attrslen = fattrp->attrlist4_len;
5040         XDR xdr;
5041         nfsstat4 status = NFS4_OK;
5042         vnode_t *vp = cs->vp;
5043         union nfs4_attr_u *na;
5044         uint8_t *amap;
5045 
5046 #ifndef lint
5047         /*
5048          * Make sure that maximum attribute number can be expressed as an
5049          * 8 bit quantity.
5050          */
5051         ASSERT(NFS4_MAXNUM_ATTRS <= (UINT8_MAX + 1));
5052 #endif
5053 
5054         if (vp == NULL) {
5055                 if (resp)
5056                         *resp = 0;
5057                 return (NFS4ERR_NOFILEHANDLE);
5058         }
5059         if (cs->access == CS_ACCESS_DENIED) {
5060                 if (resp)
5061                         *resp = 0;
5062                 return (NFS4ERR_ACCESS);
5063         }
5064 
5065         sargp->op = cmd;
5066         sargp->cs = cs;
5067         sargp->flag = 0;     /* may be set later */
5068         sargp->vap->va_mask = 0;
5069         sargp->rdattr_error = NFS4_OK;
5070         sargp->rdattr_error_req = FALSE;
5071         /* sargp->sbp is set by the caller */
5072 
5073         xdrmem_create(&xdr, attrs, attrslen, XDR_DECODE);
5074 
5075         na = ntovp->na;
5076         amap = ntovp->amap;
5077 
5078         /*
5079          * The following loop iterates on the nfs4_ntov_map checking
5080          * if the fbit is set in the requested bitmap.
5081          * If set then we process the arguments using the
5082          * rfs4_fattr4 conversion functions to populate the setattr
5083          * vattr and va_mask. Any settable attrs that are not using vattr
5084          * will be set in this loop.
5085          */
5086         for (i = 0; i < nfs4_ntov_map_size; i++) {
5087                 if (!(fattrp->attrmask & nfs4_ntov_map[i].fbit)) {
5088                         continue;
5089                 }
5090                 /*
5091                  * If setattr, must be a writable attr.
5092                  * If verify/nverify, must be a readable attr.
5093                  */
5094                 if ((error = (*nfs4_ntov_map[i].sv_getit)(
5095                     NFS4ATTR_SUPPORTED, sargp, NULL)) != 0) {
5096                         /*
5097                          * Client tries to set/verify an
5098                          * unsupported attribute, tries to set
5099                          * a read only attr or verify a write
5100                          * only one - error!
5101                          */
5102                         break;
5103                 }
5104                 /*
5105                  * Decode the attribute to set/verify
5106                  */
5107                 error = decode_fattr4_attr(cmd, sargp, nfs4_ntov_map[i].nval,
5108                     &xdr, resp ? resp : NULL, na);
5109                 if (error)
5110                         break;
5111                 *amap++ = (uint8_t)nfs4_ntov_map[i].nval;
5112                 na++;
5113                 (ntovp->attrcnt)++;
5114                 if (nfs4_ntov_map[i].vfsstat)
5115                         ntovp->vfsstat = TRUE;
5116         }
5117 
5118         if (error != 0)
5119                 status = (error == ENOTSUP ? NFS4ERR_ATTRNOTSUPP :
5120                     puterrno4(error));
5121         /* xdrmem_destroy(&xdrs); */        /* NO-OP */
5122         return (status);
5123 }
5124 
5125 static nfsstat4
5126 do_rfs4_op_setattr(bitmap4 *resp, fattr4 *fattrp, struct compound_state *cs,
5127     stateid4 *stateid)
5128 {
5129         int error = 0;
5130         struct nfs4_svgetit_arg sarg;
5131         bool_t trunc;
5132 
5133         nfsstat4 status = NFS4_OK;
5134         cred_t *cr = cs->cr;
5135         vnode_t *vp = cs->vp;
5136         struct nfs4_ntov_table ntov;
5137         struct statvfs64 sb;
5138         struct vattr bva;
5139         struct flock64 bf;
5140         int in_crit = 0;
5141         uint_t saved_mask = 0;
5142         caller_context_t ct;
5143 
5144         *resp = 0;
5145         sarg.sbp = &sb;
5146         sarg.is_referral = B_FALSE;
5147         nfs4_ntov_table_init(&ntov);
5148         status = do_rfs4_set_attrs(resp, fattrp, cs, &sarg, &ntov,
5149             NFS4ATTR_SETIT);
5150         if (status != NFS4_OK) {
5151                 /*
5152                  * failed set attrs
5153                  */
5154                 goto done;
5155         }
5156         if ((sarg.vap->va_mask == 0) &&
5157             (! (fattrp->attrmask & FATTR4_ACL_MASK))) {
5158                 /*
5159                  * no further work to be done
5160                  */
5161                 goto done;
5162         }
5163 
5164         /*
5165          * If we got a request to set the ACL and the MODE, only
5166          * allow changing VSUID, VSGID, and VSVTX.  Attempting
5167          * to change any other bits, along with setting an ACL,
5168          * gives NFS4ERR_INVAL.
5169          */
5170         if ((fattrp->attrmask & FATTR4_ACL_MASK) &&
5171             (fattrp->attrmask & FATTR4_MODE_MASK)) {
5172                 vattr_t va;
5173 
5174                 va.va_mask = AT_MODE;
5175                 error = VOP_GETATTR(vp, &va, 0, cs->cr, NULL);
5176                 if (error) {
5177                         status = puterrno4(error);
5178                         goto done;
5179                 }
5180                 if ((sarg.vap->va_mode ^ va.va_mode) &
5181                     ~(VSUID | VSGID | VSVTX)) {
5182                         status = NFS4ERR_INVAL;
5183                         goto done;
5184                 }
5185         }
5186 
5187         /* Check stateid only if size has been set */
5188         if (sarg.vap->va_mask & AT_SIZE) {
5189                 trunc = (sarg.vap->va_size == 0);
5190                 status = rfs4_check_stateid(FWRITE, cs->vp, stateid,
5191                     trunc, &cs->deleg, sarg.vap->va_mask & AT_SIZE, &ct);
5192                 if (status != NFS4_OK)
5193                         goto done;
5194         } else {
5195                 ct.cc_sysid = 0;
5196                 ct.cc_pid = 0;
5197                 ct.cc_caller_id = nfs4_srv_caller_id;
5198                 ct.cc_flags = CC_DONTBLOCK;
5199         }
5200 
5201         /* XXX start of possible race with delegations */
5202 
5203         /*
5204          * We need to specially handle size changes because it is
5205          * possible for the client to create a file with read-only
5206          * modes, but with the file opened for writing. If the client
5207          * then tries to set the file size, e.g. ftruncate(3C),
5208          * fcntl(F_FREESP), the normal access checking done in
5209          * VOP_SETATTR would prevent the client from doing it even though
5210          * it should be allowed to do so.  To get around this, we do the
5211          * access checking for ourselves and use VOP_SPACE which doesn't
5212          * do the access checking.
5213          * Also the client should not be allowed to change the file
5214          * size if there is a conflicting non-blocking mandatory lock in
5215          * the region of the change.
5216          */
5217         if (vp->v_type == VREG && (sarg.vap->va_mask & AT_SIZE)) {
5218                 u_offset_t offset;
5219                 ssize_t length;
5220 
5221                 /*
5222                  * ufs_setattr clears AT_SIZE from vap->va_mask, but
5223                  * before returning, sarg.vap->va_mask is used to
5224                  * generate the setattr reply bitmap.  We also clear
5225                  * AT_SIZE below before calling VOP_SPACE.  For both
5226                  * of these cases, the va_mask needs to be saved here
5227                  * and restored after calling VOP_SETATTR.
5228                  */
5229                 saved_mask = sarg.vap->va_mask;
5230 
5231                 /*
5232                  * Check any possible conflict due to NBMAND locks.
5233                  * Get into critical region before VOP_GETATTR, so the
5234                  * size attribute is valid when checking conflicts.
5235                  */
5236                 if (nbl_need_check(vp)) {
5237                         nbl_start_crit(vp, RW_READER);
5238                         in_crit = 1;
5239                 }
5240 
5241                 bva.va_mask = AT_UID|AT_SIZE;
5242                 if (error = VOP_GETATTR(vp, &bva, 0, cr, &ct)) {
5243                         status = puterrno4(error);
5244                         goto done;
5245                 }
5246 
5247                 if (in_crit) {
5248                         if (sarg.vap->va_size < bva.va_size) {
5249                                 offset = sarg.vap->va_size;
5250                                 length = bva.va_size - sarg.vap->va_size;
5251                         } else {
5252                                 offset = bva.va_size;
5253                                 length = sarg.vap->va_size - bva.va_size;
5254                         }
5255                         if (nbl_conflict(vp, NBL_WRITE, offset, length, 0,
5256                             &ct)) {
5257                                 status = NFS4ERR_LOCKED;
5258                                 goto done;
5259                         }
5260                 }
5261 
5262                 if (crgetuid(cr) == bva.va_uid) {
5263                         sarg.vap->va_mask &= ~AT_SIZE;
5264                         bf.l_type = F_WRLCK;
5265                         bf.l_whence = 0;
5266                         bf.l_start = (off64_t)sarg.vap->va_size;
5267                         bf.l_len = 0;
5268                         bf.l_sysid = 0;
5269                         bf.l_pid = 0;
5270                         error = VOP_SPACE(vp, F_FREESP, &bf, FWRITE,
5271                             (offset_t)sarg.vap->va_size, cr, &ct);
5272                 }
5273         }
5274 
5275         if (!error && sarg.vap->va_mask != 0)
5276                 error = VOP_SETATTR(vp, sarg.vap, sarg.flag, cr, &ct);
5277 
5278         /* restore va_mask -- ufs_setattr clears AT_SIZE */
5279         if (saved_mask & AT_SIZE)
5280                 sarg.vap->va_mask |= AT_SIZE;
5281 
5282         /*
5283          * If an ACL was being set, it has been delayed until now,
5284          * in order to set the mode (via the VOP_SETATTR() above) first.
5285          */
5286         if ((! error) && (fattrp->attrmask & FATTR4_ACL_MASK)) {
5287                 int i;
5288 
5289                 for (i = 0; i < NFS4_MAXNUM_ATTRS; i++)
5290                         if (ntov.amap[i] == FATTR4_ACL)
5291                                 break;
5292                 if (i < NFS4_MAXNUM_ATTRS) {
5293                         error = (*nfs4_ntov_map[FATTR4_ACL].sv_getit)(
5294                             NFS4ATTR_SETIT, &sarg, &ntov.na[i]);
5295                         if (error == 0) {
5296                                 *resp |= FATTR4_ACL_MASK;
5297                         } else if (error == ENOTSUP) {
5298                                 (void) rfs4_verify_attr(&sarg, resp, &ntov);
5299                                 status = NFS4ERR_ATTRNOTSUPP;
5300                                 goto done;
5301                         }
5302                 } else {
5303                         NFS4_DEBUG(rfs4_debug,
5304                             (CE_NOTE, "do_rfs4_op_setattr: "
5305                             "unable to find ACL in fattr4"));
5306                         error = EINVAL;
5307                 }
5308         }
5309 
5310         if (error) {
5311                 /* check if a monitor detected a delegation conflict */
5312                 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK))
5313                         status = NFS4ERR_DELAY;
5314                 else
5315                         status = puterrno4(error);
5316 
5317                 /*
5318                  * Set the response bitmap when setattr failed.
5319                  * If VOP_SETATTR partially succeeded, test by doing a
5320                  * VOP_GETATTR on the object and comparing the data
5321                  * to the setattr arguments.
5322                  */
5323                 (void) rfs4_verify_attr(&sarg, resp, &ntov);
5324         } else {
5325                 /*
5326                  * Force modified metadata out to stable storage.
5327                  */
5328                 (void) VOP_FSYNC(vp, FNODSYNC, cr, &ct);
5329                 /*
5330                  * Set response bitmap
5331                  */
5332                 nfs4_vmask_to_nmask_set(sarg.vap->va_mask, resp);
5333         }
5334 
5335 /* Return early and already have a NFSv4 error */
5336 done:
5337         /*
5338          * Except for nfs4_vmask_to_nmask_set(), vattr --> fattr
5339          * conversion sets both readable and writeable NFS4 attrs
5340          * for AT_MTIME and AT_ATIME.  The line below masks out
5341          * unrequested attrs from the setattr result bitmap.  This
5342          * is placed after the done: label to catch the ATTRNOTSUP
5343          * case.
5344          */
5345         *resp &= fattrp->attrmask;
5346 
5347         if (in_crit)
5348                 nbl_end_crit(vp);
5349 
5350         nfs4_ntov_table_free(&ntov, &sarg);
5351 
5352         return (status);
5353 }
5354 
5355 /* ARGSUSED */
5356 static void
5357 rfs4_op_setattr(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
5358     struct compound_state *cs)
5359 {
5360         SETATTR4args *args = &argop->nfs_argop4_u.opsetattr;
5361         SETATTR4res *resp = &resop->nfs_resop4_u.opsetattr;
5362         bslabel_t *clabel;
5363 
5364         DTRACE_NFSV4_2(op__setattr__start, struct compound_state *, cs,
5365             SETATTR4args *, args);
5366 
5367         if (cs->vp == NULL) {
5368                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
5369                 goto out;
5370         }
5371 
5372         /*
5373          * If there is an unshared filesystem mounted on this vnode,
5374          * do not allow to setattr on this vnode.
5375          */
5376         if (vn_ismntpt(cs->vp)) {
5377                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
5378                 goto out;
5379         }
5380 
5381         resp->attrsset = 0;
5382 
5383         if (rdonly4(req, cs)) {
5384                 *cs->statusp = resp->status = NFS4ERR_ROFS;
5385                 goto out;
5386         }
5387 
5388         /* check label before setting attributes */
5389         if (is_system_labeled()) {
5390                 ASSERT(req->rq_label != NULL);
5391                 clabel = req->rq_label;
5392                 DTRACE_PROBE2(tx__rfs4__log__info__opsetattr__clabel, char *,
5393                     "got client label from request(1)",
5394                     struct svc_req *, req);
5395                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
5396                         if (!do_rfs_label_check(clabel, cs->vp,
5397                             EQUALITY_CHECK, cs->exi)) {
5398                                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
5399                                 goto out;
5400                         }
5401                 }
5402         }
5403 
5404         *cs->statusp = resp->status =
5405             do_rfs4_op_setattr(&resp->attrsset, &args->obj_attributes, cs,
5406             &args->stateid);
5407 
5408 out:
5409         DTRACE_NFSV4_2(op__setattr__done, struct compound_state *, cs,
5410             SETATTR4res *, resp);
5411 }
5412 
5413 /* ARGSUSED */
5414 static void
5415 rfs4_op_verify(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
5416     struct compound_state *cs)
5417 {
5418         /*
5419          * verify and nverify are exactly the same, except that nverify
5420          * succeeds when some argument changed, and verify succeeds when
5421          * when none changed.
5422          */
5423 
5424         VERIFY4args  *args = &argop->nfs_argop4_u.opverify;
5425         VERIFY4res *resp = &resop->nfs_resop4_u.opverify;
5426 
5427         int error;
5428         struct nfs4_svgetit_arg sarg;
5429         struct statvfs64 sb;
5430         struct nfs4_ntov_table ntov;
5431 
5432         DTRACE_NFSV4_2(op__verify__start, struct compound_state *, cs,
5433             VERIFY4args *, args);
5434 
5435         if (cs->vp == NULL) {
5436                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
5437                 goto out;
5438         }
5439 
5440         sarg.sbp = &sb;
5441         sarg.is_referral = B_FALSE;
5442         nfs4_ntov_table_init(&ntov);
5443         resp->status = do_rfs4_set_attrs(NULL, &args->obj_attributes, cs,
5444             &sarg, &ntov, NFS4ATTR_VERIT);
5445         if (resp->status != NFS4_OK) {
5446                 /*
5447                  * do_rfs4_set_attrs will try to verify systemwide attrs,
5448                  * so could return -1 for "no match".
5449                  */
5450                 if (resp->status == -1)
5451                         resp->status = NFS4ERR_NOT_SAME;
5452                 goto done;
5453         }
5454         error = rfs4_verify_attr(&sarg, NULL, &ntov);
5455         switch (error) {
5456         case 0:
5457                 resp->status = NFS4_OK;
5458                 break;
5459         case -1:
5460                 resp->status = NFS4ERR_NOT_SAME;
5461                 break;
5462         default:
5463                 resp->status = puterrno4(error);
5464                 break;
5465         }
5466 done:
5467         *cs->statusp = resp->status;
5468         nfs4_ntov_table_free(&ntov, &sarg);
5469 out:
5470         DTRACE_NFSV4_2(op__verify__done, struct compound_state *, cs,
5471             VERIFY4res *, resp);
5472 }
5473 
5474 /* ARGSUSED */
5475 static void
5476 rfs4_op_nverify(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
5477     struct compound_state *cs)
5478 {
5479         /*
5480          * verify and nverify are exactly the same, except that nverify
5481          * succeeds when some argument changed, and verify succeeds when
5482          * when none changed.
5483          */
5484 
5485         NVERIFY4args  *args = &argop->nfs_argop4_u.opnverify;
5486         NVERIFY4res *resp = &resop->nfs_resop4_u.opnverify;
5487 
5488         int error;
5489         struct nfs4_svgetit_arg sarg;
5490         struct statvfs64 sb;
5491         struct nfs4_ntov_table ntov;
5492 
5493         DTRACE_NFSV4_2(op__nverify__start, struct compound_state *, cs,
5494             NVERIFY4args *, args);
5495 
5496         if (cs->vp == NULL) {
5497                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
5498                 DTRACE_NFSV4_2(op__nverify__done, struct compound_state *, cs,
5499                     NVERIFY4res *, resp);
5500                 return;
5501         }
5502         sarg.sbp = &sb;
5503         sarg.is_referral = B_FALSE;
5504         nfs4_ntov_table_init(&ntov);
5505         resp->status = do_rfs4_set_attrs(NULL, &args->obj_attributes, cs,
5506             &sarg, &ntov, NFS4ATTR_VERIT);
5507         if (resp->status != NFS4_OK) {
5508                 /*
5509                  * do_rfs4_set_attrs will try to verify systemwide attrs,
5510                  * so could return -1 for "no match".
5511                  */
5512                 if (resp->status == -1)
5513                         resp->status = NFS4_OK;
5514                 goto done;
5515         }
5516         error = rfs4_verify_attr(&sarg, NULL, &ntov);
5517         switch (error) {
5518         case 0:
5519                 resp->status = NFS4ERR_SAME;
5520                 break;
5521         case -1:
5522                 resp->status = NFS4_OK;
5523                 break;
5524         default:
5525                 resp->status = puterrno4(error);
5526                 break;
5527         }
5528 done:
5529         *cs->statusp = resp->status;
5530         nfs4_ntov_table_free(&ntov, &sarg);
5531 
5532         DTRACE_NFSV4_2(op__nverify__done, struct compound_state *, cs,
5533             NVERIFY4res *, resp);
5534 }
5535 
5536 /*
5537  * XXX - This should live in an NFS header file.
5538  */
5539 #define MAX_IOVECS      12
5540 
5541 /* ARGSUSED */
5542 static void
5543 rfs4_op_write(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
5544     struct compound_state *cs)
5545 {
5546         WRITE4args *args = &argop->nfs_argop4_u.opwrite;
5547         WRITE4res *resp = &resop->nfs_resop4_u.opwrite;
5548         int error;
5549         vnode_t *vp;
5550         struct vattr bva;
5551         u_offset_t rlimit;
5552         struct uio uio;
5553         struct iovec iov[MAX_IOVECS];
5554         struct iovec *iovp;
5555         int iovcnt;
5556         int ioflag;
5557         cred_t *savecred, *cr;
5558         bool_t *deleg = &cs->deleg;
5559         nfsstat4 stat;
5560         int in_crit = 0;
5561         caller_context_t ct;
5562 
5563         DTRACE_NFSV4_2(op__write__start, struct compound_state *, cs,
5564             WRITE4args *, args);
5565 
5566         vp = cs->vp;
5567         if (vp == NULL) {
5568                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
5569                 goto out;
5570         }
5571         if (cs->access == CS_ACCESS_DENIED) {
5572                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
5573                 goto out;
5574         }
5575 
5576         cr = cs->cr;
5577 
5578         if ((stat = rfs4_check_stateid(FWRITE, vp, &args->stateid, FALSE,
5579             deleg, TRUE, &ct)) != NFS4_OK) {
5580                 *cs->statusp = resp->status = stat;
5581                 goto out;
5582         }
5583 
5584         /*
5585          * We have to enter the critical region before calling VOP_RWLOCK
5586          * to avoid a deadlock with ufs.
5587          */
5588         if (nbl_need_check(vp)) {
5589                 nbl_start_crit(vp, RW_READER);
5590                 in_crit = 1;
5591                 if (nbl_conflict(vp, NBL_WRITE,
5592                     args->offset, args->data_len, 0, &ct)) {
5593                         *cs->statusp = resp->status = NFS4ERR_LOCKED;
5594                         goto out;
5595                 }
5596         }
5597 
5598         bva.va_mask = AT_MODE | AT_UID;
5599         error = VOP_GETATTR(vp, &bva, 0, cr, &ct);
5600 
5601         /*
5602          * If we can't get the attributes, then we can't do the
5603          * right access checking.  So, we'll fail the request.
5604          */
5605         if (error) {
5606                 *cs->statusp = resp->status = puterrno4(error);
5607                 goto out;
5608         }
5609 
5610         if (rdonly4(req, cs)) {
5611                 *cs->statusp = resp->status = NFS4ERR_ROFS;
5612                 goto out;
5613         }
5614 
5615         if (vp->v_type != VREG) {
5616                 *cs->statusp = resp->status =
5617                     ((vp->v_type == VDIR) ? NFS4ERR_ISDIR : NFS4ERR_INVAL);
5618                 goto out;
5619         }
5620 
5621         if (crgetuid(cr) != bva.va_uid &&
5622             (error = VOP_ACCESS(vp, VWRITE, 0, cr, &ct))) {
5623                 *cs->statusp = resp->status = puterrno4(error);
5624                 goto out;
5625         }
5626 
5627         if (MANDLOCK(vp, bva.va_mode)) {
5628                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
5629                 goto out;
5630         }
5631 
5632         if (args->data_len == 0) {
5633                 *cs->statusp = resp->status = NFS4_OK;
5634                 resp->count = 0;
5635                 resp->committed = args->stable;
5636                 resp->writeverf = Write4verf;
5637                 goto out;
5638         }
5639 
5640         if (args->mblk != NULL) {
5641                 mblk_t *m;
5642                 uint_t bytes, round_len;
5643 
5644                 iovcnt = 0;
5645                 bytes = 0;
5646                 round_len = roundup(args->data_len, BYTES_PER_XDR_UNIT);
5647                 for (m = args->mblk;
5648                     m != NULL && bytes < round_len;
5649                     m = m->b_cont) {
5650                         iovcnt++;
5651                         bytes += MBLKL(m);
5652                 }
5653 #ifdef DEBUG
5654                 /* should have ended on an mblk boundary */
5655                 if (bytes != round_len) {
5656                         printf("bytes=0x%x, round_len=0x%x, req len=0x%x\n",
5657                             bytes, round_len, args->data_len);
5658                         printf("args=%p, args->mblk=%p, m=%p", (void *)args,
5659                             (void *)args->mblk, (void *)m);
5660                         ASSERT(bytes == round_len);
5661                 }
5662 #endif
5663                 if (iovcnt <= MAX_IOVECS) {
5664                         iovp = iov;
5665                 } else {
5666                         iovp = kmem_alloc(sizeof (*iovp) * iovcnt, KM_SLEEP);
5667                 }
5668                 mblk_to_iov(args->mblk, iovcnt, iovp);
5669         } else if (args->rlist != NULL) {
5670                 iovcnt = 1;
5671                 iovp = iov;
5672                 iovp->iov_base = (char *)((args->rlist)->u.c_daddr3);
5673                 iovp->iov_len = args->data_len;
5674         } else {
5675                 iovcnt = 1;
5676                 iovp = iov;
5677                 iovp->iov_base = args->data_val;
5678                 iovp->iov_len = args->data_len;
5679         }
5680 
5681         uio.uio_iov = iovp;
5682         uio.uio_iovcnt = iovcnt;
5683 
5684         uio.uio_segflg = UIO_SYSSPACE;
5685         uio.uio_extflg = UIO_COPY_DEFAULT;
5686         uio.uio_loffset = args->offset;
5687         uio.uio_resid = args->data_len;
5688         uio.uio_llimit = curproc->p_fsz_ctl;
5689         rlimit = uio.uio_llimit - args->offset;
5690         if (rlimit < (u_offset_t)uio.uio_resid)
5691                 uio.uio_resid = (int)rlimit;
5692 
5693         if (args->stable == UNSTABLE4)
5694                 ioflag = 0;
5695         else if (args->stable == FILE_SYNC4)
5696                 ioflag = FSYNC;
5697         else if (args->stable == DATA_SYNC4)
5698                 ioflag = FDSYNC;
5699         else {
5700                 if (iovp != iov)
5701                         kmem_free(iovp, sizeof (*iovp) * iovcnt);
5702                 *cs->statusp = resp->status = NFS4ERR_INVAL;
5703                 goto out;
5704         }
5705 
5706         /*
5707          * We're changing creds because VM may fault and we need
5708          * the cred of the current thread to be used if quota
5709          * checking is enabled.
5710          */
5711         savecred = curthread->t_cred;
5712         curthread->t_cred = cr;
5713         error = do_io(FWRITE, vp, &uio, ioflag, cr, &ct);
5714         curthread->t_cred = savecred;
5715 
5716         if (iovp != iov)
5717                 kmem_free(iovp, sizeof (*iovp) * iovcnt);
5718 
5719         if (error) {
5720                 *cs->statusp = resp->status = puterrno4(error);
5721                 goto out;
5722         }
5723 
5724         *cs->statusp = resp->status = NFS4_OK;
5725         resp->count = args->data_len - uio.uio_resid;
5726 
5727         if (ioflag == 0)
5728                 resp->committed = UNSTABLE4;
5729         else
5730                 resp->committed = FILE_SYNC4;
5731 
5732         resp->writeverf = Write4verf;
5733 
5734 out:
5735         if (in_crit)
5736                 nbl_end_crit(vp);
5737 
5738         DTRACE_NFSV4_2(op__write__done, struct compound_state *, cs,
5739             WRITE4res *, resp);
5740 }
5741 
5742 
5743 /* XXX put in a header file */
5744 extern int      sec_svc_getcred(struct svc_req *, cred_t *,  caddr_t *, int *);
5745 
5746 void
5747 rfs4_compound(COMPOUND4args *args, COMPOUND4res *resp, struct exportinfo *exi,
5748     struct svc_req *req, cred_t *cr, int *rv)
5749 {
5750         uint_t i;
5751         struct compound_state cs;
5752 
5753         if (rv != NULL)
5754                 *rv = 0;
5755         rfs4_init_compound_state(&cs);
5756         /*
5757          * Form a reply tag by copying over the reqeuest tag.
5758          */
5759         resp->tag.utf8string_val =
5760             kmem_alloc(args->tag.utf8string_len, KM_SLEEP);
5761         resp->tag.utf8string_len = args->tag.utf8string_len;
5762         bcopy(args->tag.utf8string_val, resp->tag.utf8string_val,
5763             resp->tag.utf8string_len);
5764 
5765         cs.statusp = &resp->status;
5766         cs.req = req;
5767         resp->array = NULL;
5768         resp->array_len = 0;
5769 
5770         /*
5771          * XXX for now, minorversion should be zero
5772          */
5773         if (args->minorversion != NFS4_MINORVERSION) {
5774                 DTRACE_NFSV4_2(compound__start, struct compound_state *,
5775                     &cs, COMPOUND4args *, args);
5776                 resp->status = NFS4ERR_MINOR_VERS_MISMATCH;
5777                 DTRACE_NFSV4_2(compound__done, struct compound_state *,
5778                     &cs, COMPOUND4res *, resp);
5779                 return;
5780         }
5781 
5782         if (args->array_len == 0) {
5783                 resp->status = NFS4_OK;
5784                 return;
5785         }
5786 
5787         ASSERT(exi == NULL);
5788         ASSERT(cr == NULL);
5789 
5790         cr = crget();
5791         ASSERT(cr != NULL);
5792 
5793         if (sec_svc_getcred(req, cr, &cs.principal, &cs.nfsflavor) == 0) {
5794                 DTRACE_NFSV4_2(compound__start, struct compound_state *,
5795                     &cs, COMPOUND4args *, args);
5796                 crfree(cr);
5797                 DTRACE_NFSV4_2(compound__done, struct compound_state *,
5798                     &cs, COMPOUND4res *, resp);
5799                 svcerr_badcred(req->rq_xprt);
5800                 if (rv != NULL)
5801                         *rv = 1;
5802                 return;
5803         }
5804         resp->array_len = args->array_len;
5805         resp->array = kmem_zalloc(args->array_len * sizeof (nfs_resop4),
5806             KM_SLEEP);
5807 
5808         cs.basecr = cr;
5809 
5810         DTRACE_NFSV4_2(compound__start, struct compound_state *, &cs,
5811             COMPOUND4args *, args);
5812 
5813         /*
5814          * For now, NFS4 compound processing must be protected by
5815          * exported_lock because it can access more than one exportinfo
5816          * per compound and share/unshare can now change multiple
5817          * exinfo structs.  The NFS2/3 code only refs 1 exportinfo
5818          * per proc (excluding public exinfo), and exi_count design
5819          * is sufficient to protect concurrent execution of NFS2/3
5820          * ops along with unexport.  This lock will be removed as
5821          * part of the NFSv4 phase 2 namespace redesign work.
5822          */
5823         rw_enter(&exported_lock, RW_READER);
5824 
5825         /*
5826          * If this is the first compound we've seen, we need to start all
5827          * new instances' grace periods.
5828          */
5829         if (rfs4_seen_first_compound == 0) {
5830                 rfs4_grace_start_new();
5831                 /*
5832                  * This must be set after rfs4_grace_start_new(), otherwise
5833                  * another thread could proceed past here before the former
5834                  * is finished.
5835                  */
5836                 rfs4_seen_first_compound = 1;
5837         }
5838 
5839         for (i = 0; i < args->array_len && cs.cont; i++) {
5840                 nfs_argop4 *argop;
5841                 nfs_resop4 *resop;
5842                 uint_t op;
5843 
5844                 argop = &args->array[i];
5845                 resop = &resp->array[i];
5846                 resop->resop = argop->argop;
5847                 op = (uint_t)resop->resop;
5848 
5849                 if (op < rfsv4disp_cnt) {
5850                         /*
5851                          * Count the individual ops here; NULL and COMPOUND
5852                          * are counted in common_dispatch()
5853                          */
5854                         rfsproccnt_v4_ptr[op].value.ui64++;
5855 
5856                         NFS4_DEBUG(rfs4_debug > 1,
5857                             (CE_NOTE, "Executing %s", rfs4_op_string[op]));
5858                         (*rfsv4disptab[op].dis_proc)(argop, resop, req, &cs);
5859                         NFS4_DEBUG(rfs4_debug > 1, (CE_NOTE, "%s returned %d",
5860                             rfs4_op_string[op], *cs.statusp));
5861                         if (*cs.statusp != NFS4_OK)
5862                                 cs.cont = FALSE;
5863                 } else {
5864                         /*
5865                          * This is effectively dead code since XDR code
5866                          * will have already returned BADXDR if op doesn't
5867                          * decode to legal value.  This only done for a
5868                          * day when XDR code doesn't verify v4 opcodes.
5869                          */
5870                         op = OP_ILLEGAL;
5871                         rfsproccnt_v4_ptr[OP_ILLEGAL_IDX].value.ui64++;
5872 
5873                         rfs4_op_illegal(argop, resop, req, &cs);
5874                         cs.cont = FALSE;
5875                 }
5876 
5877                 /*
5878                  * If not at last op, and if we are to stop, then
5879                  * compact the results array.
5880                  */
5881                 if ((i + 1) < args->array_len && !cs.cont) {
5882                         nfs_resop4 *new_res = kmem_alloc(
5883                             (i+1) * sizeof (nfs_resop4), KM_SLEEP);
5884                         bcopy(resp->array,
5885                             new_res, (i+1) * sizeof (nfs_resop4));
5886                         kmem_free(resp->array,
5887                             args->array_len * sizeof (nfs_resop4));
5888 
5889                         resp->array_len =  i + 1;
5890                         resp->array = new_res;
5891                 }
5892         }
5893 
5894         rw_exit(&exported_lock);
5895 
5896         DTRACE_NFSV4_2(compound__done, struct compound_state *, &cs,
5897             COMPOUND4res *, resp);
5898 
5899         if (cs.vp)
5900                 VN_RELE(cs.vp);
5901         if (cs.saved_vp)
5902                 VN_RELE(cs.saved_vp);
5903         if (cs.saved_fh.nfs_fh4_val)
5904                 kmem_free(cs.saved_fh.nfs_fh4_val, NFS4_FHSIZE);
5905 
5906         if (cs.basecr)
5907                 crfree(cs.basecr);
5908         if (cs.cr)
5909                 crfree(cs.cr);
5910         /*
5911          * done with this compound request, free the label
5912          */
5913 
5914         if (req->rq_label != NULL) {
5915                 kmem_free(req->rq_label, sizeof (bslabel_t));
5916                 req->rq_label = NULL;
5917         }
5918 }
5919 
5920 /*
5921  * XXX because of what appears to be duplicate calls to rfs4_compound_free
5922  * XXX zero out the tag and array values. Need to investigate why the
5923  * XXX calls occur, but at least prevent the panic for now.
5924  */
5925 void
5926 rfs4_compound_free(COMPOUND4res *resp)
5927 {
5928         uint_t i;
5929 
5930         if (resp->tag.utf8string_val) {
5931                 UTF8STRING_FREE(resp->tag)
5932         }
5933 
5934         for (i = 0; i < resp->array_len; i++) {
5935                 nfs_resop4 *resop;
5936                 uint_t op;
5937 
5938                 resop = &resp->array[i];
5939                 op = (uint_t)resop->resop;
5940                 if (op < rfsv4disp_cnt) {
5941                         (*rfsv4disptab[op].dis_resfree)(resop);
5942                 }
5943         }
5944         if (resp->array != NULL) {
5945                 kmem_free(resp->array, resp->array_len * sizeof (nfs_resop4));
5946         }
5947 }
5948 
5949 /*
5950  * Process the value of the compound request rpc flags, as a bit-AND
5951  * of the individual per-op flags (idempotent, allowork, publicfh_ok)
5952  */
5953 void
5954 rfs4_compound_flagproc(COMPOUND4args *args, int *flagp)
5955 {
5956         int i;
5957         int flag = RPC_ALL;
5958 
5959         for (i = 0; flag && i < args->array_len; i++) {
5960                 uint_t op;
5961 
5962                 op = (uint_t)args->array[i].argop;
5963 
5964                 if (op < rfsv4disp_cnt)
5965                         flag &= rfsv4disptab[op].dis_flags;
5966                 else
5967                         flag = 0;
5968         }
5969         *flagp = flag;
5970 }
5971 
5972 nfsstat4
5973 rfs4_client_sysid(rfs4_client_t *cp, sysid_t *sp)
5974 {
5975         nfsstat4 e;
5976 
5977         rfs4_dbe_lock(cp->rc_dbe);
5978 
5979         if (cp->rc_sysidt != LM_NOSYSID) {
5980                 *sp = cp->rc_sysidt;
5981                 e = NFS4_OK;
5982 
5983         } else if ((cp->rc_sysidt = lm_alloc_sysidt()) != LM_NOSYSID) {
5984                 *sp = cp->rc_sysidt;
5985                 e = NFS4_OK;
5986 
5987                 NFS4_DEBUG(rfs4_debug, (CE_NOTE,
5988                     "rfs4_client_sysid: allocated 0x%x\n", *sp));
5989         } else
5990                 e = NFS4ERR_DELAY;
5991 
5992         rfs4_dbe_unlock(cp->rc_dbe);
5993         return (e);
5994 }
5995 
5996 #if defined(DEBUG) && ! defined(lint)
5997 static void lock_print(char *str, int operation, struct flock64 *flk)
5998 {
5999         char *op, *type;
6000 
6001         switch (operation) {
6002         case F_GETLK: op = "F_GETLK";
6003                 break;
6004         case F_SETLK: op = "F_SETLK";
6005                 break;
6006         case F_SETLK_NBMAND: op = "F_SETLK_NBMAND";
6007                 break;
6008         default: op = "F_UNKNOWN";
6009                 break;
6010         }
6011         switch (flk->l_type) {
6012         case F_UNLCK: type = "F_UNLCK";
6013                 break;
6014         case F_RDLCK: type = "F_RDLCK";
6015                 break;
6016         case F_WRLCK: type = "F_WRLCK";
6017                 break;
6018         default: type = "F_UNKNOWN";
6019                 break;
6020         }
6021 
6022         ASSERT(flk->l_whence == 0);
6023         cmn_err(CE_NOTE, "%s:  %s, type = %s, off = %llx len = %llx pid = %d",
6024             str, op, type, (longlong_t)flk->l_start,
6025             flk->l_len ? (longlong_t)flk->l_len : ~0LL, flk->l_pid);
6026 }
6027 
6028 #define LOCK_PRINT(d, s, t, f) if (d) lock_print(s, t, f)
6029 #else
6030 #define LOCK_PRINT(d, s, t, f)
6031 #endif
6032 
6033 /*ARGSUSED*/
6034 static bool_t
6035 creds_ok(cred_set_t cr_set, struct svc_req *req, struct compound_state *cs)
6036 {
6037         return (TRUE);
6038 }
6039 
6040 /*
6041  * Look up the pathname using the vp in cs as the directory vnode.
6042  * cs->vp will be the vnode for the file on success
6043  */
6044 
6045 static nfsstat4
6046 rfs4_lookup(component4 *component, struct svc_req *req,
6047     struct compound_state *cs)
6048 {
6049         char *nm;
6050         uint32_t len;
6051         nfsstat4 status;
6052         struct sockaddr *ca;
6053         char *name;
6054 
6055         if (cs->vp == NULL) {
6056                 return (NFS4ERR_NOFILEHANDLE);
6057         }
6058         if (cs->vp->v_type != VDIR) {
6059                 return (NFS4ERR_NOTDIR);
6060         }
6061 
6062         status = utf8_dir_verify(component);
6063         if (status != NFS4_OK)
6064                 return (status);
6065 
6066         nm = utf8_to_fn(component, &len, NULL);
6067         if (nm == NULL) {
6068                 return (NFS4ERR_INVAL);
6069         }
6070 
6071         if (len > MAXNAMELEN) {
6072                 kmem_free(nm, len);
6073                 return (NFS4ERR_NAMETOOLONG);
6074         }
6075 
6076         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
6077         name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
6078             MAXPATHLEN + 1);
6079 
6080         if (name == NULL) {
6081                 kmem_free(nm, len);
6082                 return (NFS4ERR_INVAL);
6083         }
6084 
6085         status = do_rfs4_op_lookup(name, req, cs);
6086 
6087         if (name != nm)
6088                 kmem_free(name, MAXPATHLEN + 1);
6089 
6090         kmem_free(nm, len);
6091 
6092         return (status);
6093 }
6094 
6095 static nfsstat4
6096 rfs4_lookupfile(component4 *component, struct svc_req *req,
6097     struct compound_state *cs, uint32_t access, change_info4 *cinfo)
6098 {
6099         nfsstat4 status;
6100         vnode_t *dvp = cs->vp;
6101         vattr_t bva, ava, fva;
6102         int error;
6103 
6104         /* Get "before" change value */
6105         bva.va_mask = AT_CTIME|AT_SEQ;
6106         error = VOP_GETATTR(dvp, &bva, 0, cs->cr, NULL);
6107         if (error)
6108                 return (puterrno4(error));
6109 
6110         /* rfs4_lookup may VN_RELE directory */
6111         VN_HOLD(dvp);
6112 
6113         status = rfs4_lookup(component, req, cs);
6114         if (status != NFS4_OK) {
6115                 VN_RELE(dvp);
6116                 return (status);
6117         }
6118 
6119         /*
6120          * Get "after" change value, if it fails, simply return the
6121          * before value.
6122          */
6123         ava.va_mask = AT_CTIME|AT_SEQ;
6124         if (VOP_GETATTR(dvp, &ava, 0, cs->cr, NULL)) {
6125                 ava.va_ctime = bva.va_ctime;
6126                 ava.va_seq = 0;
6127         }
6128         VN_RELE(dvp);
6129 
6130         /*
6131          * Validate the file is a file
6132          */
6133         fva.va_mask = AT_TYPE|AT_MODE;
6134         error = VOP_GETATTR(cs->vp, &fva, 0, cs->cr, NULL);
6135         if (error)
6136                 return (puterrno4(error));
6137 
6138         if (fva.va_type != VREG) {
6139                 if (fva.va_type == VDIR)
6140                         return (NFS4ERR_ISDIR);
6141                 if (fva.va_type == VLNK)
6142                         return (NFS4ERR_SYMLINK);
6143                 return (NFS4ERR_INVAL);
6144         }
6145 
6146         NFS4_SET_FATTR4_CHANGE(cinfo->before, bva.va_ctime);
6147         NFS4_SET_FATTR4_CHANGE(cinfo->after, ava.va_ctime);
6148 
6149         /*
6150          * It is undefined if VOP_LOOKUP will change va_seq, so
6151          * cinfo.atomic = TRUE only if we have
6152          * non-zero va_seq's, and they have not changed.
6153          */
6154         if (bva.va_seq && ava.va_seq && ava.va_seq == bva.va_seq)
6155                 cinfo->atomic = TRUE;
6156         else
6157                 cinfo->atomic = FALSE;
6158 
6159         /* Check for mandatory locking */
6160         cs->mandlock = MANDLOCK(cs->vp, fva.va_mode);
6161         return (check_open_access(access, cs, req));
6162 }
6163 
6164 static nfsstat4
6165 create_vnode(vnode_t *dvp, char *nm,  vattr_t *vap, createmode4 mode,
6166     timespec32_t *mtime, cred_t *cr, vnode_t **vpp, bool_t *created)
6167 {
6168         int error;
6169         nfsstat4 status = NFS4_OK;
6170         vattr_t va;
6171 
6172 tryagain:
6173 
6174         /*
6175          * The file open mode used is VWRITE.  If the client needs
6176          * some other semantic, then it should do the access checking
6177          * itself.  It would have been nice to have the file open mode
6178          * passed as part of the arguments.
6179          */
6180 
6181         *created = TRUE;
6182         error = VOP_CREATE(dvp, nm, vap, EXCL, VWRITE, vpp, cr, 0, NULL, NULL);
6183 
6184         if (error) {
6185                 *created = FALSE;
6186 
6187                 /*
6188                  * If we got something other than file already exists
6189                  * then just return this error.  Otherwise, we got
6190                  * EEXIST.  If we were doing a GUARDED create, then
6191                  * just return this error.  Otherwise, we need to
6192                  * make sure that this wasn't a duplicate of an
6193                  * exclusive create request.
6194                  *
6195                  * The assumption is made that a non-exclusive create
6196                  * request will never return EEXIST.
6197                  */
6198 
6199                 if (error != EEXIST || mode == GUARDED4) {
6200                         status = puterrno4(error);
6201                         return (status);
6202                 }
6203                 error = VOP_LOOKUP(dvp, nm, vpp, NULL, 0, NULL, cr,
6204                     NULL, NULL, NULL);
6205 
6206                 if (error) {
6207                         /*
6208                          * We couldn't find the file that we thought that
6209                          * we just created.  So, we'll just try creating
6210                          * it again.
6211                          */
6212                         if (error == ENOENT)
6213                                 goto tryagain;
6214 
6215                         status = puterrno4(error);
6216                         return (status);
6217                 }
6218 
6219                 if (mode == UNCHECKED4) {
6220                         /* existing object must be regular file */
6221                         if ((*vpp)->v_type != VREG) {
6222                                 if ((*vpp)->v_type == VDIR)
6223                                         status = NFS4ERR_ISDIR;
6224                                 else if ((*vpp)->v_type == VLNK)
6225                                         status = NFS4ERR_SYMLINK;
6226                                 else
6227                                         status = NFS4ERR_INVAL;
6228                                 VN_RELE(*vpp);
6229                                 return (status);
6230                         }
6231 
6232                         return (NFS4_OK);
6233                 }
6234 
6235                 /* Check for duplicate request */
6236                 ASSERT(mtime != 0);
6237                 va.va_mask = AT_MTIME;
6238                 error = VOP_GETATTR(*vpp, &va, 0, cr, NULL);
6239                 if (!error) {
6240                         /* We found the file */
6241                         if (va.va_mtime.tv_sec != mtime->tv_sec ||
6242                             va.va_mtime.tv_nsec != mtime->tv_nsec) {
6243                                 /* but its not our creation */
6244                                 VN_RELE(*vpp);
6245                                 return (NFS4ERR_EXIST);
6246                         }
6247                         *created = TRUE; /* retrans of create == created */
6248                         return (NFS4_OK);
6249                 }
6250                 VN_RELE(*vpp);
6251                 return (NFS4ERR_EXIST);
6252         }
6253 
6254         return (NFS4_OK);
6255 }
6256 
6257 static nfsstat4
6258 check_open_access(uint32_t access, struct compound_state *cs,
6259     struct svc_req *req)
6260 {
6261         int error;
6262         vnode_t *vp;
6263         bool_t readonly;
6264         cred_t *cr = cs->cr;
6265 
6266         /* For now we don't allow mandatory locking as per V2/V3 */
6267         if (cs->access == CS_ACCESS_DENIED || cs->mandlock) {
6268                 return (NFS4ERR_ACCESS);
6269         }
6270 
6271         vp = cs->vp;
6272         ASSERT(cr != NULL && vp->v_type == VREG);
6273 
6274         /*
6275          * If the file system is exported read only and we are trying
6276          * to open for write, then return NFS4ERR_ROFS
6277          */
6278 
6279         readonly = rdonly4(req, cs);
6280 
6281         if ((access & OPEN4_SHARE_ACCESS_WRITE) && readonly)
6282                 return (NFS4ERR_ROFS);
6283 
6284         if (access & OPEN4_SHARE_ACCESS_READ) {
6285                 if ((VOP_ACCESS(vp, VREAD, 0, cr, NULL) != 0) &&
6286                     (VOP_ACCESS(vp, VEXEC, 0, cr, NULL) != 0)) {
6287                         return (NFS4ERR_ACCESS);
6288                 }
6289         }
6290 
6291         if (access & OPEN4_SHARE_ACCESS_WRITE) {
6292                 error = VOP_ACCESS(vp, VWRITE, 0, cr, NULL);
6293                 if (error)
6294                         return (NFS4ERR_ACCESS);
6295         }
6296 
6297         return (NFS4_OK);
6298 }
6299 
6300 static nfsstat4
6301 rfs4_createfile(OPEN4args *args, struct svc_req *req, struct compound_state *cs,
6302     change_info4 *cinfo, bitmap4 *attrset, clientid4 clientid)
6303 {
6304         struct nfs4_svgetit_arg sarg;
6305         struct nfs4_ntov_table ntov;
6306 
6307         bool_t ntov_table_init = FALSE;
6308         struct statvfs64 sb;
6309         nfsstat4 status;
6310         vnode_t *vp;
6311         vattr_t bva, ava, iva, cva, *vap;
6312         vnode_t *dvp;
6313         timespec32_t *mtime;
6314         char *nm = NULL;
6315         uint_t buflen;
6316         bool_t created;
6317         bool_t setsize = FALSE;
6318         len_t reqsize;
6319         int error;
6320         bool_t trunc;
6321         caller_context_t ct;
6322         component4 *component;
6323         bslabel_t *clabel;
6324         struct sockaddr *ca;
6325         char *name = NULL;
6326 
6327         sarg.sbp = &sb;
6328         sarg.is_referral = B_FALSE;
6329 
6330         dvp = cs->vp;
6331 
6332         /* Check if the file system is read only */
6333         if (rdonly4(req, cs))
6334                 return (NFS4ERR_ROFS);
6335 
6336         /* check the label of including directory */
6337         if (is_system_labeled()) {
6338                 ASSERT(req->rq_label != NULL);
6339                 clabel = req->rq_label;
6340                 DTRACE_PROBE2(tx__rfs4__log__info__opremove__clabel, char *,
6341                     "got client label from request(1)",
6342                     struct svc_req *, req);
6343                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
6344                         if (!do_rfs_label_check(clabel, dvp, EQUALITY_CHECK,
6345                             cs->exi)) {
6346                                 return (NFS4ERR_ACCESS);
6347                         }
6348                 }
6349         }
6350 
6351         /*
6352          * Get the last component of path name in nm. cs will reference
6353          * the including directory on success.
6354          */
6355         component = &args->open_claim4_u.file;
6356         status = utf8_dir_verify(component);
6357         if (status != NFS4_OK)
6358                 return (status);
6359 
6360         nm = utf8_to_fn(component, &buflen, NULL);
6361 
6362         if (nm == NULL)
6363                 return (NFS4ERR_RESOURCE);
6364 
6365         if (buflen > MAXNAMELEN) {
6366                 kmem_free(nm, buflen);
6367                 return (NFS4ERR_NAMETOOLONG);
6368         }
6369 
6370         bva.va_mask = AT_TYPE|AT_CTIME|AT_SEQ;
6371         error = VOP_GETATTR(dvp, &bva, 0, cs->cr, NULL);
6372         if (error) {
6373                 kmem_free(nm, buflen);
6374                 return (puterrno4(error));
6375         }
6376 
6377         if (bva.va_type != VDIR) {
6378                 kmem_free(nm, buflen);
6379                 return (NFS4ERR_NOTDIR);
6380         }
6381 
6382         NFS4_SET_FATTR4_CHANGE(cinfo->before, bva.va_ctime)
6383 
6384         switch (args->mode) {
6385         case GUARDED4:
6386                 /*FALLTHROUGH*/
6387         case UNCHECKED4:
6388                 nfs4_ntov_table_init(&ntov);
6389                 ntov_table_init = TRUE;
6390 
6391                 *attrset = 0;
6392                 status = do_rfs4_set_attrs(attrset,
6393                     &args->createhow4_u.createattrs,
6394                     cs, &sarg, &ntov, NFS4ATTR_SETIT);
6395 
6396                 if (status == NFS4_OK && (sarg.vap->va_mask & AT_TYPE) &&
6397                     sarg.vap->va_type != VREG) {
6398                         if (sarg.vap->va_type == VDIR)
6399                                 status = NFS4ERR_ISDIR;
6400                         else if (sarg.vap->va_type == VLNK)
6401                                 status = NFS4ERR_SYMLINK;
6402                         else
6403                                 status = NFS4ERR_INVAL;
6404                 }
6405 
6406                 if (status != NFS4_OK) {
6407                         kmem_free(nm, buflen);
6408                         nfs4_ntov_table_free(&ntov, &sarg);
6409                         *attrset = 0;
6410                         return (status);
6411                 }
6412 
6413                 vap = sarg.vap;
6414                 vap->va_type = VREG;
6415                 vap->va_mask |= AT_TYPE;
6416 
6417                 if ((vap->va_mask & AT_MODE) == 0) {
6418                         vap->va_mask |= AT_MODE;
6419                         vap->va_mode = (mode_t)0600;
6420                 }
6421 
6422                 if (vap->va_mask & AT_SIZE) {
6423 
6424                         /* Disallow create with a non-zero size */
6425 
6426                         if ((reqsize = sarg.vap->va_size) != 0) {
6427                                 kmem_free(nm, buflen);
6428                                 nfs4_ntov_table_free(&ntov, &sarg);
6429                                 *attrset = 0;
6430                                 return (NFS4ERR_INVAL);
6431                         }
6432                         setsize = TRUE;
6433                 }
6434                 break;
6435 
6436         case EXCLUSIVE4:
6437                 /* prohibit EXCL create of named attributes */
6438                 if (dvp->v_flag & V_XATTRDIR) {
6439                         kmem_free(nm, buflen);
6440                         *attrset = 0;
6441                         return (NFS4ERR_INVAL);
6442                 }
6443 
6444                 cva.va_mask = AT_TYPE | AT_MTIME | AT_MODE;
6445                 cva.va_type = VREG;
6446                 /*
6447                  * Ensure no time overflows. Assumes underlying
6448                  * filesystem supports at least 32 bits.
6449                  * Truncate nsec to usec resolution to allow valid
6450                  * compares even if the underlying filesystem truncates.
6451                  */
6452                 mtime = (timespec32_t *)&args->createhow4_u.createverf;
6453                 cva.va_mtime.tv_sec = mtime->tv_sec % TIME32_MAX;
6454                 cva.va_mtime.tv_nsec = (mtime->tv_nsec / 1000) * 1000;
6455                 cva.va_mode = (mode_t)0;
6456                 vap = &cva;
6457 
6458                 /*
6459                  * For EXCL create, attrset is set to the server attr
6460                  * used to cache the client's verifier.
6461                  */
6462                 *attrset = FATTR4_TIME_MODIFY_MASK;
6463                 break;
6464         }
6465 
6466         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
6467         name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
6468             MAXPATHLEN  + 1);
6469 
6470         if (name == NULL) {
6471                 kmem_free(nm, buflen);
6472                 return (NFS4ERR_SERVERFAULT);
6473         }
6474 
6475         status = create_vnode(dvp, name, vap, args->mode, mtime,
6476             cs->cr, &vp, &created);
6477         if (nm != name)
6478                 kmem_free(name, MAXPATHLEN + 1);
6479         kmem_free(nm, buflen);
6480 
6481         if (status != NFS4_OK) {
6482                 if (ntov_table_init)
6483                         nfs4_ntov_table_free(&ntov, &sarg);
6484                 *attrset = 0;
6485                 return (status);
6486         }
6487 
6488         trunc = (setsize && !created);
6489 
6490         if (args->mode != EXCLUSIVE4) {
6491                 bitmap4 createmask = args->createhow4_u.createattrs.attrmask;
6492 
6493                 /*
6494                  * True verification that object was created with correct
6495                  * attrs is impossible.  The attrs could have been changed
6496                  * immediately after object creation.  If attributes did
6497                  * not verify, the only recourse for the server is to
6498                  * destroy the object.  Maybe if some attrs (like gid)
6499                  * are set incorrectly, the object should be destroyed;
6500                  * however, seems bad as a default policy.  Do we really
6501                  * want to destroy an object over one of the times not
6502                  * verifying correctly?  For these reasons, the server
6503                  * currently sets bits in attrset for createattrs
6504                  * that were set; however, no verification is done.
6505                  *
6506                  * vmask_to_nmask accounts for vattr bits set on create
6507                  *      [do_rfs4_set_attrs() only sets resp bits for
6508                  *       non-vattr/vfs bits.]
6509                  * Mask off any bits we set by default so as not to return
6510                  * more attrset bits than were requested in createattrs
6511                  */
6512                 if (created) {
6513                         nfs4_vmask_to_nmask(sarg.vap->va_mask, attrset);
6514                         *attrset &= createmask;
6515                 } else {
6516                         /*
6517                          * We did not create the vnode (we tried but it
6518                          * already existed).  In this case, the only createattr
6519                          * that the spec allows the server to set is size,
6520                          * and even then, it can only be set if it is 0.
6521                          */
6522                         *attrset = 0;
6523                         if (trunc)
6524                                 *attrset = FATTR4_SIZE_MASK;
6525                 }
6526         }
6527         if (ntov_table_init)
6528                 nfs4_ntov_table_free(&ntov, &sarg);
6529 
6530         /*
6531          * Get the initial "after" sequence number, if it fails,
6532          * set to zero, time to before.
6533          */
6534         iva.va_mask = AT_CTIME|AT_SEQ;
6535         if (VOP_GETATTR(dvp, &iva, 0, cs->cr, NULL)) {
6536                 iva.va_seq = 0;
6537                 iva.va_ctime = bva.va_ctime;
6538         }
6539 
6540         /*
6541          * create_vnode attempts to create the file exclusive,
6542          * if it already exists the VOP_CREATE will fail and
6543          * may not increase va_seq. It is atomic if
6544          * we haven't changed the directory, but if it has changed
6545          * we don't know what changed it.
6546          */
6547         if (!created) {
6548                 if (bva.va_seq && iva.va_seq &&
6549                     bva.va_seq == iva.va_seq)
6550                         cinfo->atomic = TRUE;
6551                 else
6552                         cinfo->atomic = FALSE;
6553                 NFS4_SET_FATTR4_CHANGE(cinfo->after, iva.va_ctime);
6554         } else {
6555                 /*
6556                  * The entry was created, we need to sync the
6557                  * directory metadata.
6558                  */
6559                 (void) VOP_FSYNC(dvp, 0, cs->cr, NULL);
6560 
6561                 /*
6562                  * Get "after" change value, if it fails, simply return the
6563                  * before value.
6564                  */
6565                 ava.va_mask = AT_CTIME|AT_SEQ;
6566                 if (VOP_GETATTR(dvp, &ava, 0, cs->cr, NULL)) {
6567                         ava.va_ctime = bva.va_ctime;
6568                         ava.va_seq = 0;
6569                 }
6570 
6571                 NFS4_SET_FATTR4_CHANGE(cinfo->after, ava.va_ctime);
6572 
6573                 /*
6574                  * The cinfo->atomic = TRUE only if we have
6575                  * non-zero va_seq's, and it has incremented by exactly one
6576                  * during the create_vnode and it didn't
6577                  * change during the VOP_FSYNC.
6578                  */
6579                 if (bva.va_seq && iva.va_seq && ava.va_seq &&
6580                     iva.va_seq == (bva.va_seq + 1) && iva.va_seq == ava.va_seq)
6581                         cinfo->atomic = TRUE;
6582                 else
6583                         cinfo->atomic = FALSE;
6584         }
6585 
6586         /* Check for mandatory locking and that the size gets set. */
6587         cva.va_mask = AT_MODE;
6588         if (setsize)
6589                 cva.va_mask |= AT_SIZE;
6590 
6591         /* Assume the worst */
6592         cs->mandlock = TRUE;
6593 
6594         if (VOP_GETATTR(vp, &cva, 0, cs->cr, NULL) == 0) {
6595                 cs->mandlock = MANDLOCK(cs->vp, cva.va_mode);
6596 
6597                 /*
6598                  * Truncate the file if necessary; this would be
6599                  * the case for create over an existing file.
6600                  */
6601 
6602                 if (trunc) {
6603                         int in_crit = 0;
6604                         rfs4_file_t *fp;
6605                         bool_t create = FALSE;
6606 
6607                         /*
6608                          * We are writing over an existing file.
6609                          * Check to see if we need to recall a delegation.
6610                          */
6611                         rfs4_hold_deleg_policy();
6612                         if ((fp = rfs4_findfile(vp, NULL, &create)) != NULL) {
6613                                 if (rfs4_check_delegated_byfp(FWRITE, fp,
6614                                     (reqsize == 0), FALSE, FALSE, &clientid)) {
6615                                         rfs4_file_rele(fp);
6616                                         rfs4_rele_deleg_policy();
6617                                         VN_RELE(vp);
6618                                         *attrset = 0;
6619                                         return (NFS4ERR_DELAY);
6620                                 }
6621                                 rfs4_file_rele(fp);
6622                         }
6623                         rfs4_rele_deleg_policy();
6624 
6625                         if (nbl_need_check(vp)) {
6626                                 in_crit = 1;
6627 
6628                                 ASSERT(reqsize == 0);
6629 
6630                                 nbl_start_crit(vp, RW_READER);
6631                                 if (nbl_conflict(vp, NBL_WRITE, 0,
6632                                     cva.va_size, 0, NULL)) {
6633                                         in_crit = 0;
6634                                         nbl_end_crit(vp);
6635                                         VN_RELE(vp);
6636                                         *attrset = 0;
6637                                         return (NFS4ERR_ACCESS);
6638                                 }
6639                         }
6640                         ct.cc_sysid = 0;
6641                         ct.cc_pid = 0;
6642                         ct.cc_caller_id = nfs4_srv_caller_id;
6643                         ct.cc_flags = CC_DONTBLOCK;
6644 
6645                         cva.va_mask = AT_SIZE;
6646                         cva.va_size = reqsize;
6647                         (void) VOP_SETATTR(vp, &cva, 0, cs->cr, &ct);
6648                         if (in_crit)
6649                                 nbl_end_crit(vp);
6650                 }
6651         }
6652 
6653         error = makefh4(&cs->fh, vp, cs->exi);
6654 
6655         /*
6656          * Force modified data and metadata out to stable storage.
6657          */
6658         (void) VOP_FSYNC(vp, FNODSYNC, cs->cr, NULL);
6659 
6660         if (error) {
6661                 VN_RELE(vp);
6662                 *attrset = 0;
6663                 return (puterrno4(error));
6664         }
6665 
6666         /* if parent dir is attrdir, set namedattr fh flag */
6667         if (dvp->v_flag & V_XATTRDIR)
6668                 set_fh4_flag(&cs->fh, FH4_NAMEDATTR);
6669 
6670         if (cs->vp)
6671                 VN_RELE(cs->vp);
6672 
6673         cs->vp = vp;
6674 
6675         /*
6676          * if we did not create the file, we will need to check
6677          * the access bits on the file
6678          */
6679 
6680         if (!created) {
6681                 if (setsize)
6682                         args->share_access |= OPEN4_SHARE_ACCESS_WRITE;
6683                 status = check_open_access(args->share_access, cs, req);
6684                 if (status != NFS4_OK)
6685                         *attrset = 0;
6686         }
6687         return (status);
6688 }
6689 
6690 /*ARGSUSED*/
6691 static void
6692 rfs4_do_open(struct compound_state *cs, struct svc_req *req,
6693     rfs4_openowner_t *oo, delegreq_t deleg,
6694     uint32_t access, uint32_t deny,
6695     OPEN4res *resp, int deleg_cur)
6696 {
6697         /* XXX Currently not using req  */
6698         rfs4_state_t *sp;
6699         rfs4_file_t *fp;
6700         bool_t screate = TRUE;
6701         bool_t fcreate = TRUE;
6702         uint32_t open_a, share_a;
6703         uint32_t open_d, share_d;
6704         rfs4_deleg_state_t *dsp;
6705         sysid_t sysid;
6706         nfsstat4 status;
6707         caller_context_t ct;
6708         int fflags = 0;
6709         int recall = 0;
6710         int err;
6711         int first_open;
6712 
6713         /* get the file struct and hold a lock on it during initial open */
6714         fp = rfs4_findfile_withlock(cs->vp, &cs->fh, &fcreate);
6715         if (fp == NULL) {
6716                 resp->status = NFS4ERR_RESOURCE;
6717                 DTRACE_PROBE1(nfss__e__do__open1, nfsstat4, resp->status);
6718                 return;
6719         }
6720 
6721         sp = rfs4_findstate_by_owner_file(oo, fp, &screate);
6722         if (sp == NULL) {
6723                 resp->status = NFS4ERR_RESOURCE;
6724                 DTRACE_PROBE1(nfss__e__do__open2, nfsstat4, resp->status);
6725                 /* No need to keep any reference */
6726                 rw_exit(&fp->rf_file_rwlock);
6727                 rfs4_file_rele(fp);
6728                 return;
6729         }
6730 
6731         /* try to get the sysid before continuing */
6732         if ((status = rfs4_client_sysid(oo->ro_client, &sysid)) != NFS4_OK) {
6733                 resp->status = status;
6734                 rfs4_file_rele(fp);
6735                 /* Not a fully formed open; "close" it */
6736                 if (screate == TRUE)
6737                         rfs4_state_close(sp, FALSE, FALSE, cs->cr);
6738                 rfs4_state_rele(sp);
6739                 return;
6740         }
6741 
6742         /* Calculate the fflags for this OPEN. */
6743         if (access & OPEN4_SHARE_ACCESS_READ)
6744                 fflags |= FREAD;
6745         if (access & OPEN4_SHARE_ACCESS_WRITE)
6746                 fflags |= FWRITE;
6747 
6748         rfs4_dbe_lock(sp->rs_dbe);
6749 
6750         /*
6751          * Calculate the new deny and access mode that this open is adding to
6752          * the file for this open owner;
6753          */
6754         open_d = (deny & ~sp->rs_open_deny);
6755         open_a = (access & ~sp->rs_open_access);
6756 
6757         /*
6758          * Calculate the new share access and share deny modes that this open
6759          * is adding to the file for this open owner;
6760          */
6761         share_a = (access & ~sp->rs_share_access);
6762         share_d = (deny & ~sp->rs_share_deny);
6763 
6764         first_open = (sp->rs_open_access & OPEN4_SHARE_ACCESS_BOTH) == 0;
6765 
6766         /*
6767          * Check to see the client has already sent an open for this
6768          * open owner on this file with the same share/deny modes.
6769          * If so, we don't need to check for a conflict and we don't
6770          * need to add another shrlock.  If not, then we need to
6771          * check for conflicts in deny and access before checking for
6772          * conflicts in delegation.  We don't want to recall a
6773          * delegation based on an open that will eventually fail based
6774          * on shares modes.
6775          */
6776 
6777         if (share_a || share_d) {
6778                 if ((err = rfs4_share(sp, access, deny)) != 0) {
6779                         rfs4_dbe_unlock(sp->rs_dbe);
6780                         resp->status = err;
6781 
6782                         rfs4_file_rele(fp);
6783                         /* Not a fully formed open; "close" it */
6784                         if (screate == TRUE)
6785                                 rfs4_state_close(sp, FALSE, FALSE, cs->cr);
6786                         rfs4_state_rele(sp);
6787                         return;
6788                 }
6789         }
6790 
6791         rfs4_dbe_lock(fp->rf_dbe);
6792 
6793         /*
6794          * Check to see if this file is delegated and if so, if a
6795          * recall needs to be done.
6796          */
6797         if (rfs4_check_recall(sp, access)) {
6798                 rfs4_dbe_unlock(fp->rf_dbe);
6799                 rfs4_dbe_unlock(sp->rs_dbe);
6800                 rfs4_recall_deleg(fp, FALSE, sp->rs_owner->ro_client);
6801                 delay(NFS4_DELEGATION_CONFLICT_DELAY);
6802                 rfs4_dbe_lock(sp->rs_dbe);
6803 
6804                 /* if state closed while lock was dropped */
6805                 if (sp->rs_closed) {
6806                         if (share_a || share_d)
6807                                 (void) rfs4_unshare(sp);
6808                         rfs4_dbe_unlock(sp->rs_dbe);
6809                         rfs4_file_rele(fp);
6810                         /* Not a fully formed open; "close" it */
6811                         if (screate == TRUE)
6812                                 rfs4_state_close(sp, FALSE, FALSE, cs->cr);
6813                         rfs4_state_rele(sp);
6814                         resp->status = NFS4ERR_OLD_STATEID;
6815                         return;
6816                 }
6817 
6818                 rfs4_dbe_lock(fp->rf_dbe);
6819                 /* Let's see if the delegation was returned */
6820                 if (rfs4_check_recall(sp, access)) {
6821                         rfs4_dbe_unlock(fp->rf_dbe);
6822                         if (share_a || share_d)
6823                                 (void) rfs4_unshare(sp);
6824                         rfs4_dbe_unlock(sp->rs_dbe);
6825                         rfs4_file_rele(fp);
6826                         rfs4_update_lease(sp->rs_owner->ro_client);
6827 
6828                         /* Not a fully formed open; "close" it */
6829                         if (screate == TRUE)
6830                                 rfs4_state_close(sp, FALSE, FALSE, cs->cr);
6831                         rfs4_state_rele(sp);
6832                         resp->status = NFS4ERR_DELAY;
6833                         return;
6834                 }
6835         }
6836         /*
6837          * the share check passed and any delegation conflict has been
6838          * taken care of, now call vop_open.
6839          * if this is the first open then call vop_open with fflags.
6840          * if not, call vn_open_upgrade with just the upgrade flags.
6841          *
6842          * if the file has been opened already, it will have the current
6843          * access mode in the state struct.  if it has no share access, then
6844          * this is a new open.
6845          *
6846          * However, if this is open with CLAIM_DLEGATE_CUR, then don't
6847          * call VOP_OPEN(), just do the open upgrade.
6848          */
6849         if (first_open && !deleg_cur) {
6850                 ct.cc_sysid = sysid;
6851                 ct.cc_pid = rfs4_dbe_getid(sp->rs_owner->ro_dbe);
6852                 ct.cc_caller_id = nfs4_srv_caller_id;
6853                 ct.cc_flags = CC_DONTBLOCK;
6854                 err = VOP_OPEN(&cs->vp, fflags, cs->cr, &ct);
6855                 if (err) {
6856                         rfs4_dbe_unlock(fp->rf_dbe);
6857                         if (share_a || share_d)
6858                                 (void) rfs4_unshare(sp);
6859                         rfs4_dbe_unlock(sp->rs_dbe);
6860                         rfs4_file_rele(fp);
6861 
6862                         /* Not a fully formed open; "close" it */
6863                         if (screate == TRUE)
6864                                 rfs4_state_close(sp, FALSE, FALSE, cs->cr);
6865                         rfs4_state_rele(sp);
6866                         /* check if a monitor detected a delegation conflict */
6867                         if (err == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK))
6868                                 resp->status = NFS4ERR_DELAY;
6869                         else
6870                                 resp->status = NFS4ERR_SERVERFAULT;
6871                         return;
6872                 }
6873         } else { /* open upgrade */
6874                 /*
6875                  * calculate the fflags for the new mode that is being added
6876                  * by this upgrade.
6877                  */
6878                 fflags = 0;
6879                 if (open_a & OPEN4_SHARE_ACCESS_READ)
6880                         fflags |= FREAD;
6881                 if (open_a & OPEN4_SHARE_ACCESS_WRITE)
6882                         fflags |= FWRITE;
6883                 vn_open_upgrade(cs->vp, fflags);
6884         }
6885         sp->rs_open_access |= access;
6886         sp->rs_open_deny |= deny;
6887 
6888         if (open_d & OPEN4_SHARE_DENY_READ)
6889                 fp->rf_deny_read++;
6890         if (open_d & OPEN4_SHARE_DENY_WRITE)
6891                 fp->rf_deny_write++;
6892         fp->rf_share_deny |= deny;
6893 
6894         if (open_a & OPEN4_SHARE_ACCESS_READ)
6895                 fp->rf_access_read++;
6896         if (open_a & OPEN4_SHARE_ACCESS_WRITE)
6897                 fp->rf_access_write++;
6898         fp->rf_share_access |= access;
6899 
6900         /*
6901          * Check for delegation here. if the deleg argument is not
6902          * DELEG_ANY, then this is a reclaim from a client and
6903          * we must honor the delegation requested. If necessary we can
6904          * set the recall flag.
6905          */
6906 
6907         dsp = rfs4_grant_delegation(deleg, sp, &recall);
6908 
6909         cs->deleg = (fp->rf_dinfo.rd_dtype == OPEN_DELEGATE_WRITE);
6910 
6911         next_stateid(&sp->rs_stateid);
6912 
6913         resp->stateid = sp->rs_stateid.stateid;
6914 
6915         rfs4_dbe_unlock(fp->rf_dbe);
6916         rfs4_dbe_unlock(sp->rs_dbe);
6917 
6918         if (dsp) {
6919                 rfs4_set_deleg_response(dsp, &resp->delegation, NULL, recall);
6920                 rfs4_deleg_state_rele(dsp);
6921         }
6922 
6923         rfs4_file_rele(fp);
6924         rfs4_state_rele(sp);
6925 
6926         resp->status = NFS4_OK;
6927 }
6928 
6929 /*ARGSUSED*/
6930 static void
6931 rfs4_do_opennull(struct compound_state *cs, struct svc_req *req,
6932     OPEN4args *args, rfs4_openowner_t *oo, OPEN4res *resp)
6933 {
6934         change_info4 *cinfo = &resp->cinfo;
6935         bitmap4 *attrset = &resp->attrset;
6936 
6937         if (args->opentype == OPEN4_NOCREATE)
6938                 resp->status = rfs4_lookupfile(&args->open_claim4_u.file,
6939                     req, cs, args->share_access, cinfo);
6940         else {
6941                 /* inhibit delegation grants during exclusive create */
6942 
6943                 if (args->mode == EXCLUSIVE4)
6944                         rfs4_disable_delegation();
6945 
6946                 resp->status = rfs4_createfile(args, req, cs, cinfo, attrset,
6947                     oo->ro_client->rc_clientid);
6948         }
6949 
6950         if (resp->status == NFS4_OK) {
6951 
6952                 /* cs->vp cs->fh now reference the desired file */
6953 
6954                 rfs4_do_open(cs, req, oo,
6955                     oo->ro_need_confirm ? DELEG_NONE : DELEG_ANY,
6956                     args->share_access, args->share_deny, resp, 0);
6957 
6958                 /*
6959                  * If rfs4_createfile set attrset, we must
6960                  * clear this attrset before the response is copied.
6961                  */
6962                 if (resp->status != NFS4_OK && resp->attrset) {
6963                         resp->attrset = 0;
6964                 }
6965         }
6966         else
6967                 *cs->statusp = resp->status;
6968 
6969         if (args->mode == EXCLUSIVE4)
6970                 rfs4_enable_delegation();
6971 }
6972 
6973 /*ARGSUSED*/
6974 static void
6975 rfs4_do_openprev(struct compound_state *cs, struct svc_req *req,
6976     OPEN4args *args, rfs4_openowner_t *oo, OPEN4res *resp)
6977 {
6978         change_info4 *cinfo = &resp->cinfo;
6979         vattr_t va;
6980         vtype_t v_type = cs->vp->v_type;
6981         int error = 0;
6982 
6983         /* Verify that we have a regular file */
6984         if (v_type != VREG) {
6985                 if (v_type == VDIR)
6986                         resp->status = NFS4ERR_ISDIR;
6987                 else if (v_type == VLNK)
6988                         resp->status = NFS4ERR_SYMLINK;
6989                 else
6990                         resp->status = NFS4ERR_INVAL;
6991                 return;
6992         }
6993 
6994         va.va_mask = AT_MODE|AT_UID;
6995         error = VOP_GETATTR(cs->vp, &va, 0, cs->cr, NULL);
6996         if (error) {
6997                 resp->status = puterrno4(error);
6998                 return;
6999         }
7000 
7001         cs->mandlock = MANDLOCK(cs->vp, va.va_mode);
7002 
7003         /*
7004          * Check if we have access to the file, Note the the file
7005          * could have originally been open UNCHECKED or GUARDED
7006          * with mode bits that will now fail, but there is nothing
7007          * we can really do about that except in the case that the
7008          * owner of the file is the one requesting the open.
7009          */
7010         if (crgetuid(cs->cr) != va.va_uid) {
7011                 resp->status = check_open_access(args->share_access, cs, req);
7012                 if (resp->status != NFS4_OK) {
7013                         return;
7014                 }
7015         }
7016 
7017         /*
7018          * cinfo on a CLAIM_PREVIOUS is undefined, initialize to zero
7019          */
7020         cinfo->before = 0;
7021         cinfo->after = 0;
7022         cinfo->atomic = FALSE;
7023 
7024         rfs4_do_open(cs, req, oo,
7025             NFS4_DELEG4TYPE2REQTYPE(args->open_claim4_u.delegate_type),
7026             args->share_access, args->share_deny, resp, 0);
7027 }
7028 
7029 static void
7030 rfs4_do_opendelcur(struct compound_state *cs, struct svc_req *req,
7031     OPEN4args *args, rfs4_openowner_t *oo, OPEN4res *resp)
7032 {
7033         int error;
7034         nfsstat4 status;
7035         stateid4 stateid =
7036             args->open_claim4_u.delegate_cur_info.delegate_stateid;
7037         rfs4_deleg_state_t *dsp;
7038 
7039         /*
7040          * Find the state info from the stateid and confirm that the
7041          * file is delegated.  If the state openowner is the same as
7042          * the supplied openowner we're done. If not, get the file
7043          * info from the found state info. Use that file info to
7044          * create the state for this lock owner. Note solaris doen't
7045          * really need the pathname to find the file. We may want to
7046          * lookup the pathname and make sure that the vp exist and
7047          * matches the vp in the file structure. However it is
7048          * possible that the pathname nolonger exists (local process
7049          * unlinks the file), so this may not be that useful.
7050          */
7051 
7052         status = rfs4_get_deleg_state(&stateid, &dsp);
7053         if (status != NFS4_OK) {
7054                 resp->status = status;
7055                 return;
7056         }
7057 
7058         ASSERT(dsp->rds_finfo->rf_dinfo.rd_dtype != OPEN_DELEGATE_NONE);
7059 
7060         /*
7061          * New lock owner, create state. Since this was probably called
7062          * in response to a CB_RECALL we set deleg to DELEG_NONE
7063          */
7064 
7065         ASSERT(cs->vp != NULL);
7066         VN_RELE(cs->vp);
7067         VN_HOLD(dsp->rds_finfo->rf_vp);
7068         cs->vp = dsp->rds_finfo->rf_vp;
7069 
7070         if (error = makefh4(&cs->fh, cs->vp, cs->exi)) {
7071                 rfs4_deleg_state_rele(dsp);
7072                 *cs->statusp = resp->status = puterrno4(error);
7073                 return;
7074         }
7075 
7076         /* Mark progress for delegation returns */
7077         dsp->rds_finfo->rf_dinfo.rd_time_lastwrite = gethrestime_sec();
7078         rfs4_deleg_state_rele(dsp);
7079         rfs4_do_open(cs, req, oo, DELEG_NONE,
7080             args->share_access, args->share_deny, resp, 1);
7081 }
7082 
7083 /*ARGSUSED*/
7084 static void
7085 rfs4_do_opendelprev(struct compound_state *cs, struct svc_req *req,
7086     OPEN4args *args, rfs4_openowner_t *oo, OPEN4res *resp)
7087 {
7088         /*
7089          * Lookup the pathname, it must already exist since this file
7090          * was delegated.
7091          *
7092          * Find the file and state info for this vp and open owner pair.
7093          *      check that they are in fact delegated.
7094          *      check that the state access and deny modes are the same.
7095          *
7096          * Return the delgation possibly seting the recall flag.
7097          */
7098         rfs4_file_t *fp;
7099         rfs4_state_t *sp;
7100         bool_t create = FALSE;
7101         bool_t dcreate = FALSE;
7102         rfs4_deleg_state_t *dsp;
7103         nfsace4 *ace;
7104 
7105         /* Note we ignore oflags */
7106         resp->status = rfs4_lookupfile(&args->open_claim4_u.file_delegate_prev,
7107             req, cs, args->share_access, &resp->cinfo);
7108 
7109         if (resp->status != NFS4_OK) {
7110                 return;
7111         }
7112 
7113         /* get the file struct and hold a lock on it during initial open */
7114         fp = rfs4_findfile_withlock(cs->vp, NULL, &create);
7115         if (fp == NULL) {
7116                 resp->status = NFS4ERR_RESOURCE;
7117                 DTRACE_PROBE1(nfss__e__do_opendelprev1, nfsstat4, resp->status);
7118                 return;
7119         }
7120 
7121         sp = rfs4_findstate_by_owner_file(oo, fp, &create);
7122         if (sp == NULL) {
7123                 resp->status = NFS4ERR_SERVERFAULT;
7124                 DTRACE_PROBE1(nfss__e__do_opendelprev2, nfsstat4, resp->status);
7125                 rw_exit(&fp->rf_file_rwlock);
7126                 rfs4_file_rele(fp);
7127                 return;
7128         }
7129 
7130         rfs4_dbe_lock(sp->rs_dbe);
7131         rfs4_dbe_lock(fp->rf_dbe);
7132         if (args->share_access != sp->rs_share_access ||
7133             args->share_deny != sp->rs_share_deny ||
7134             sp->rs_finfo->rf_dinfo.rd_dtype == OPEN_DELEGATE_NONE) {
7135                 NFS4_DEBUG(rfs4_debug,
7136                     (CE_NOTE, "rfs4_do_opendelprev: state mixup"));
7137                 rfs4_dbe_unlock(fp->rf_dbe);
7138                 rfs4_dbe_unlock(sp->rs_dbe);
7139                 rfs4_file_rele(fp);
7140                 rfs4_state_rele(sp);
7141                 resp->status = NFS4ERR_SERVERFAULT;
7142                 return;
7143         }
7144         rfs4_dbe_unlock(fp->rf_dbe);
7145         rfs4_dbe_unlock(sp->rs_dbe);
7146 
7147         dsp = rfs4_finddeleg(sp, &dcreate);
7148         if (dsp == NULL) {
7149                 rfs4_state_rele(sp);
7150                 rfs4_file_rele(fp);
7151                 resp->status = NFS4ERR_SERVERFAULT;
7152                 return;
7153         }
7154 
7155         next_stateid(&sp->rs_stateid);
7156 
7157         resp->stateid = sp->rs_stateid.stateid;
7158 
7159         resp->delegation.delegation_type = dsp->rds_dtype;
7160 
7161         if (dsp->rds_dtype == OPEN_DELEGATE_READ) {
7162                 open_read_delegation4 *rv =
7163                     &resp->delegation.open_delegation4_u.read;
7164 
7165                 rv->stateid = dsp->rds_delegid.stateid;
7166                 rv->recall = FALSE; /* no policy in place to set to TRUE */
7167                 ace = &rv->permissions;
7168         } else {
7169                 open_write_delegation4 *rv =
7170                     &resp->delegation.open_delegation4_u.write;
7171 
7172                 rv->stateid = dsp->rds_delegid.stateid;
7173                 rv->recall = FALSE;  /* no policy in place to set to TRUE */
7174                 ace = &rv->permissions;
7175                 rv->space_limit.limitby = NFS_LIMIT_SIZE;
7176                 rv->space_limit.nfs_space_limit4_u.filesize = UINT64_MAX;
7177         }
7178 
7179         /* XXX For now */
7180         ace->type = ACE4_ACCESS_ALLOWED_ACE_TYPE;
7181         ace->flag = 0;
7182         ace->access_mask = 0;
7183         ace->who.utf8string_len = 0;
7184         ace->who.utf8string_val = 0;
7185 
7186         rfs4_deleg_state_rele(dsp);
7187         rfs4_state_rele(sp);
7188         rfs4_file_rele(fp);
7189 }
7190 
7191 typedef enum {
7192         NFS4_CHKSEQ_OKAY = 0,
7193         NFS4_CHKSEQ_REPLAY = 1,
7194         NFS4_CHKSEQ_BAD = 2
7195 } rfs4_chkseq_t;
7196 
7197 /*
7198  * Generic function for sequence number checks.
7199  */
7200 static rfs4_chkseq_t
7201 rfs4_check_seqid(seqid4 seqid, nfs_resop4 *lastop,
7202     seqid4 rqst_seq, nfs_resop4 *resop, bool_t copyres)
7203 {
7204         /* Same sequence ids and matching operations? */
7205         if (seqid == rqst_seq && resop->resop == lastop->resop) {
7206                 if (copyres == TRUE) {
7207                         rfs4_free_reply(resop);
7208                         rfs4_copy_reply(resop, lastop);
7209                 }
7210                 NFS4_DEBUG(rfs4_debug, (CE_NOTE,
7211                     "Replayed SEQID %d\n", seqid));
7212                 return (NFS4_CHKSEQ_REPLAY);
7213         }
7214 
7215         /* If the incoming sequence is not the next expected then it is bad */
7216         if (rqst_seq != seqid + 1) {
7217                 if (rqst_seq == seqid) {
7218                         NFS4_DEBUG(rfs4_debug,
7219                             (CE_NOTE, "BAD SEQID: Replayed sequence id "
7220                             "but last op was %d current op is %d\n",
7221                             lastop->resop, resop->resop));
7222                         return (NFS4_CHKSEQ_BAD);
7223                 }
7224                 NFS4_DEBUG(rfs4_debug,
7225                     (CE_NOTE, "BAD SEQID: got %u expecting %u\n",
7226                     rqst_seq, seqid));
7227                 return (NFS4_CHKSEQ_BAD);
7228         }
7229 
7230         /* Everything okay -- next expected */
7231         return (NFS4_CHKSEQ_OKAY);
7232 }
7233 
7234 
7235 static rfs4_chkseq_t
7236 rfs4_check_open_seqid(seqid4 seqid, rfs4_openowner_t *op, nfs_resop4 *resop)
7237 {
7238         rfs4_chkseq_t rc;
7239 
7240         rfs4_dbe_lock(op->ro_dbe);
7241         rc = rfs4_check_seqid(op->ro_open_seqid, &op->ro_reply, seqid, resop,
7242             TRUE);
7243         rfs4_dbe_unlock(op->ro_dbe);
7244 
7245         if (rc == NFS4_CHKSEQ_OKAY)
7246                 rfs4_update_lease(op->ro_client);
7247 
7248         return (rc);
7249 }
7250 
7251 static rfs4_chkseq_t
7252 rfs4_check_olo_seqid(seqid4 olo_seqid, rfs4_openowner_t *op, nfs_resop4 *resop)
7253 {
7254         rfs4_chkseq_t rc;
7255 
7256         rfs4_dbe_lock(op->ro_dbe);
7257         rc = rfs4_check_seqid(op->ro_open_seqid, &op->ro_reply,
7258             olo_seqid, resop, FALSE);
7259         rfs4_dbe_unlock(op->ro_dbe);
7260 
7261         return (rc);
7262 }
7263 
7264 static rfs4_chkseq_t
7265 rfs4_check_lock_seqid(seqid4 seqid, rfs4_lo_state_t *lsp, nfs_resop4 *resop)
7266 {
7267         rfs4_chkseq_t rc = NFS4_CHKSEQ_OKAY;
7268 
7269         rfs4_dbe_lock(lsp->rls_dbe);
7270         if (!lsp->rls_skip_seqid_check)
7271                 rc = rfs4_check_seqid(lsp->rls_seqid, &lsp->rls_reply, seqid,
7272                     resop, TRUE);
7273         rfs4_dbe_unlock(lsp->rls_dbe);
7274 
7275         return (rc);
7276 }
7277 
7278 static void
7279 rfs4_op_open(nfs_argop4 *argop, nfs_resop4 *resop,
7280     struct svc_req *req, struct compound_state *cs)
7281 {
7282         OPEN4args *args = &argop->nfs_argop4_u.opopen;
7283         OPEN4res *resp = &resop->nfs_resop4_u.opopen;
7284         open_owner4 *owner = &args->owner;
7285         open_claim_type4 claim = args->claim;
7286         rfs4_client_t *cp;
7287         rfs4_openowner_t *oo;
7288         bool_t create;
7289         bool_t replay = FALSE;
7290         int can_reclaim;
7291 
7292         DTRACE_NFSV4_2(op__open__start, struct compound_state *, cs,
7293             OPEN4args *, args);
7294 
7295         if (cs->vp == NULL) {
7296                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
7297                 goto end;
7298         }
7299 
7300         /*
7301          * Need to check clientid and lease expiration first based on
7302          * error ordering and incrementing sequence id.
7303          */
7304         cp = rfs4_findclient_by_id(owner->clientid, FALSE);
7305         if (cp == NULL) {
7306                 *cs->statusp = resp->status =
7307                     rfs4_check_clientid(&owner->clientid, 0);
7308                 goto end;
7309         }
7310 
7311         if (rfs4_lease_expired(cp)) {
7312                 rfs4_client_close(cp);
7313                 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
7314                 goto end;
7315         }
7316         can_reclaim = cp->rc_can_reclaim;
7317 
7318         /*
7319          * Find the open_owner for use from this point forward.  Take
7320          * care in updating the sequence id based on the type of error
7321          * being returned.
7322          */
7323 retry:
7324         create = TRUE;
7325         oo = rfs4_findopenowner(owner, &create, args->seqid);
7326         if (oo == NULL) {
7327                 *cs->statusp = resp->status = NFS4ERR_RESOURCE;
7328                 rfs4_client_rele(cp);
7329                 goto end;
7330         }
7331 
7332         /* Hold off access to the sequence space while the open is done */
7333         rfs4_sw_enter(&oo->ro_sw);
7334 
7335         /*
7336          * If the open_owner existed before at the server, then check
7337          * the sequence id.
7338          */
7339         if (!create && !oo->ro_postpone_confirm) {
7340                 switch (rfs4_check_open_seqid(args->seqid, oo, resop)) {
7341                 case NFS4_CHKSEQ_BAD:
7342                         if ((args->seqid > oo->ro_open_seqid) &&
7343                             oo->ro_need_confirm) {
7344                                 rfs4_free_opens(oo, TRUE, FALSE);
7345                                 rfs4_sw_exit(&oo->ro_sw);
7346                                 rfs4_openowner_rele(oo);
7347                                 goto retry;
7348                         }
7349                         resp->status = NFS4ERR_BAD_SEQID;
7350                         goto out;
7351                 case NFS4_CHKSEQ_REPLAY: /* replay of previous request */
7352                         replay = TRUE;
7353                         goto out;
7354                 default:
7355                         break;
7356                 }
7357 
7358                 /*
7359                  * Sequence was ok and open owner exists
7360                  * check to see if we have yet to see an
7361                  * open_confirm.
7362                  */
7363                 if (oo->ro_need_confirm) {
7364                         rfs4_free_opens(oo, TRUE, FALSE);
7365                         rfs4_sw_exit(&oo->ro_sw);
7366                         rfs4_openowner_rele(oo);
7367                         goto retry;
7368                 }
7369         }
7370         /* Grace only applies to regular-type OPENs */
7371         if (rfs4_clnt_in_grace(cp) &&
7372             (claim == CLAIM_NULL || claim == CLAIM_DELEGATE_CUR)) {
7373                 *cs->statusp = resp->status = NFS4ERR_GRACE;
7374                 goto out;
7375         }
7376 
7377         /*
7378          * If previous state at the server existed then can_reclaim
7379          * will be set. If not reply NFS4ERR_NO_GRACE to the
7380          * client.
7381          */
7382         if (rfs4_clnt_in_grace(cp) && claim == CLAIM_PREVIOUS && !can_reclaim) {
7383                 *cs->statusp = resp->status = NFS4ERR_NO_GRACE;
7384                 goto out;
7385         }
7386 
7387 
7388         /*
7389          * Reject the open if the client has missed the grace period
7390          */
7391         if (!rfs4_clnt_in_grace(cp) && claim == CLAIM_PREVIOUS) {
7392                 *cs->statusp = resp->status = NFS4ERR_NO_GRACE;
7393                 goto out;
7394         }
7395 
7396         /* Couple of up-front bookkeeping items */
7397         if (oo->ro_need_confirm) {
7398                 /*
7399                  * If this is a reclaim OPEN then we should not ask
7400                  * for a confirmation of the open_owner per the
7401                  * protocol specification.
7402                  */
7403                 if (claim == CLAIM_PREVIOUS)
7404                         oo->ro_need_confirm = FALSE;
7405                 else
7406                         resp->rflags |= OPEN4_RESULT_CONFIRM;
7407         }
7408         resp->rflags |= OPEN4_RESULT_LOCKTYPE_POSIX;
7409 
7410         /*
7411          * If there is an unshared filesystem mounted on this vnode,
7412          * do not allow to open/create in this directory.
7413          */
7414         if (vn_ismntpt(cs->vp)) {
7415                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
7416                 goto out;
7417         }
7418 
7419         /*
7420          * access must READ, WRITE, or BOTH.  No access is invalid.
7421          * deny can be READ, WRITE, BOTH, or NONE.
7422          * bits not defined for access/deny are invalid.
7423          */
7424         if (! (args->share_access & OPEN4_SHARE_ACCESS_BOTH) ||
7425             (args->share_access & ~OPEN4_SHARE_ACCESS_BOTH) ||
7426             (args->share_deny & ~OPEN4_SHARE_DENY_BOTH)) {
7427                 *cs->statusp = resp->status = NFS4ERR_INVAL;
7428                 goto out;
7429         }
7430 
7431 
7432         /*
7433          * make sure attrset is zero before response is built.
7434          */
7435         resp->attrset = 0;
7436 
7437         switch (claim) {
7438         case CLAIM_NULL:
7439                 rfs4_do_opennull(cs, req, args, oo, resp);
7440                 break;
7441         case CLAIM_PREVIOUS:
7442                 rfs4_do_openprev(cs, req, args, oo, resp);
7443                 break;
7444         case CLAIM_DELEGATE_CUR:
7445                 rfs4_do_opendelcur(cs, req, args, oo, resp);
7446                 break;
7447         case CLAIM_DELEGATE_PREV:
7448                 rfs4_do_opendelprev(cs, req, args, oo, resp);
7449                 break;
7450         default:
7451                 resp->status = NFS4ERR_INVAL;
7452                 break;
7453         }
7454 
7455 out:
7456         rfs4_client_rele(cp);
7457 
7458         /* Catch sequence id handling here to make it a little easier */
7459         switch (resp->status) {
7460         case NFS4ERR_BADXDR:
7461         case NFS4ERR_BAD_SEQID:
7462         case NFS4ERR_BAD_STATEID:
7463         case NFS4ERR_NOFILEHANDLE:
7464         case NFS4ERR_RESOURCE:
7465         case NFS4ERR_STALE_CLIENTID:
7466         case NFS4ERR_STALE_STATEID:
7467                 /*
7468                  * The protocol states that if any of these errors are
7469                  * being returned, the sequence id should not be
7470                  * incremented.  Any other return requires an
7471                  * increment.
7472                  */
7473                 break;
7474         default:
7475                 /* Always update the lease in this case */
7476                 rfs4_update_lease(oo->ro_client);
7477 
7478                 /* Regular response - copy the result */
7479                 if (!replay)
7480                         rfs4_update_open_resp(oo, resop, &cs->fh);
7481 
7482                 /*
7483                  * REPLAY case: Only if the previous response was OK
7484                  * do we copy the filehandle.  If not OK, no
7485                  * filehandle to copy.
7486                  */
7487                 if (replay == TRUE &&
7488                     resp->status == NFS4_OK &&
7489                     oo->ro_reply_fh.nfs_fh4_val) {
7490                         /*
7491                          * If this is a replay, we must restore the
7492                          * current filehandle/vp to that of what was
7493                          * returned originally.  Try our best to do
7494                          * it.
7495                          */
7496                         nfs_fh4_fmt_t *fh_fmtp =
7497                             (nfs_fh4_fmt_t *)oo->ro_reply_fh.nfs_fh4_val;
7498 
7499                         cs->exi = checkexport4(&fh_fmtp->fh4_fsid,
7500                             (fid_t *)&fh_fmtp->fh4_xlen, NULL);
7501 
7502                         if (cs->exi == NULL) {
7503                                 resp->status = NFS4ERR_STALE;
7504                                 goto finish;
7505                         }
7506 
7507                         VN_RELE(cs->vp);
7508 
7509                         cs->vp = nfs4_fhtovp(&oo->ro_reply_fh, cs->exi,
7510                             &resp->status);
7511 
7512                         if (cs->vp == NULL)
7513                                 goto finish;
7514 
7515                         nfs_fh4_copy(&oo->ro_reply_fh, &cs->fh);
7516                 }
7517 
7518                 /*
7519                  * If this was a replay, no need to update the
7520                  * sequence id. If the open_owner was not created on
7521                  * this pass, then update.  The first use of an
7522                  * open_owner will not bump the sequence id.
7523                  */
7524                 if (replay == FALSE && !create)
7525                         rfs4_update_open_sequence(oo);
7526                 /*
7527                  * If the client is receiving an error and the
7528                  * open_owner needs to be confirmed, there is no way
7529                  * to notify the client of this fact ignoring the fact
7530                  * that the server has no method of returning a
7531                  * stateid to confirm.  Therefore, the server needs to
7532                  * mark this open_owner in a way as to avoid the
7533                  * sequence id checking the next time the client uses
7534                  * this open_owner.
7535                  */
7536                 if (resp->status != NFS4_OK && oo->ro_need_confirm)
7537                         oo->ro_postpone_confirm = TRUE;
7538                 /*
7539                  * If OK response then clear the postpone flag and
7540                  * reset the sequence id to keep in sync with the
7541                  * client.
7542                  */
7543                 if (resp->status == NFS4_OK && oo->ro_postpone_confirm) {
7544                         oo->ro_postpone_confirm = FALSE;
7545                         oo->ro_open_seqid = args->seqid;
7546                 }
7547                 break;
7548         }
7549 
7550 finish:
7551         *cs->statusp = resp->status;
7552 
7553         rfs4_sw_exit(&oo->ro_sw);
7554         rfs4_openowner_rele(oo);
7555 
7556 end:
7557         DTRACE_NFSV4_2(op__open__done, struct compound_state *, cs,
7558             OPEN4res *, resp);
7559 }
7560 
7561 /*ARGSUSED*/
7562 void
7563 rfs4_op_open_confirm(nfs_argop4 *argop, nfs_resop4 *resop,
7564     struct svc_req *req, struct compound_state *cs)
7565 {
7566         OPEN_CONFIRM4args *args = &argop->nfs_argop4_u.opopen_confirm;
7567         OPEN_CONFIRM4res *resp = &resop->nfs_resop4_u.opopen_confirm;
7568         rfs4_state_t *sp;
7569         nfsstat4 status;
7570 
7571         DTRACE_NFSV4_2(op__open__confirm__start, struct compound_state *, cs,
7572             OPEN_CONFIRM4args *, args);
7573 
7574         if (cs->vp == NULL) {
7575                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
7576                 goto out;
7577         }
7578 
7579         if (cs->vp->v_type != VREG) {
7580                 *cs->statusp = resp->status =
7581                     cs->vp->v_type == VDIR ? NFS4ERR_ISDIR : NFS4ERR_INVAL;
7582                 return;
7583         }
7584 
7585         status = rfs4_get_state(&args->open_stateid, &sp, RFS4_DBS_VALID);
7586         if (status != NFS4_OK) {
7587                 *cs->statusp = resp->status = status;
7588                 goto out;
7589         }
7590 
7591         /* Ensure specified filehandle matches */
7592         if (cs->vp != sp->rs_finfo->rf_vp) {
7593                 rfs4_state_rele(sp);
7594                 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7595                 goto out;
7596         }
7597 
7598         /* hold off other access to open_owner while we tinker */
7599         rfs4_sw_enter(&sp->rs_owner->ro_sw);
7600 
7601         switch (rfs4_check_stateid_seqid(sp, &args->open_stateid)) {
7602         case NFS4_CHECK_STATEID_OKAY:
7603                 if (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
7604                     resop) != 0) {
7605                         *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
7606                         break;
7607                 }
7608                 /*
7609                  * If it is the appropriate stateid and determined to
7610                  * be "OKAY" then this means that the stateid does not
7611                  * need to be confirmed and the client is in error for
7612                  * sending an OPEN_CONFIRM.
7613                  */
7614                 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7615                 break;
7616         case NFS4_CHECK_STATEID_OLD:
7617                 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
7618                 break;
7619         case NFS4_CHECK_STATEID_BAD:
7620                 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7621                 break;
7622         case NFS4_CHECK_STATEID_EXPIRED:
7623                 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
7624                 break;
7625         case NFS4_CHECK_STATEID_CLOSED:
7626                 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
7627                 break;
7628         case NFS4_CHECK_STATEID_REPLAY:
7629                 switch (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
7630                     resop)) {
7631                 case NFS4_CHKSEQ_OKAY:
7632                         /*
7633                          * This is replayed stateid; if seqid matches
7634                          * next expected, then client is using wrong seqid.
7635                          */
7636                         /* fall through */
7637                 case NFS4_CHKSEQ_BAD:
7638                         *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
7639                         break;
7640                 case NFS4_CHKSEQ_REPLAY:
7641                         /*
7642                          * Note this case is the duplicate case so
7643                          * resp->status is already set.
7644                          */
7645                         *cs->statusp = resp->status;
7646                         rfs4_update_lease(sp->rs_owner->ro_client);
7647                         break;
7648                 }
7649                 break;
7650         case NFS4_CHECK_STATEID_UNCONFIRMED:
7651                 if (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
7652                     resop) != NFS4_CHKSEQ_OKAY) {
7653                         *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
7654                         break;
7655                 }
7656                 *cs->statusp = resp->status = NFS4_OK;
7657 
7658                 next_stateid(&sp->rs_stateid);
7659                 resp->open_stateid = sp->rs_stateid.stateid;
7660                 sp->rs_owner->ro_need_confirm = FALSE;
7661                 rfs4_update_lease(sp->rs_owner->ro_client);
7662                 rfs4_update_open_sequence(sp->rs_owner);
7663                 rfs4_update_open_resp(sp->rs_owner, resop, NULL);
7664                 break;
7665         default:
7666                 ASSERT(FALSE);
7667                 *cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
7668                 break;
7669         }
7670         rfs4_sw_exit(&sp->rs_owner->ro_sw);
7671         rfs4_state_rele(sp);
7672 
7673 out:
7674         DTRACE_NFSV4_2(op__open__confirm__done, struct compound_state *, cs,
7675             OPEN_CONFIRM4res *, resp);
7676 }
7677 
7678 /*ARGSUSED*/
7679 void
7680 rfs4_op_open_downgrade(nfs_argop4 *argop, nfs_resop4 *resop,
7681     struct svc_req *req, struct compound_state *cs)
7682 {
7683         OPEN_DOWNGRADE4args *args = &argop->nfs_argop4_u.opopen_downgrade;
7684         OPEN_DOWNGRADE4res *resp = &resop->nfs_resop4_u.opopen_downgrade;
7685         uint32_t access = args->share_access;
7686         uint32_t deny = args->share_deny;
7687         nfsstat4 status;
7688         rfs4_state_t *sp;
7689         rfs4_file_t *fp;
7690         int fflags = 0;
7691 
7692         DTRACE_NFSV4_2(op__open__downgrade__start, struct compound_state *, cs,
7693             OPEN_DOWNGRADE4args *, args);
7694 
7695         if (cs->vp == NULL) {
7696                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
7697                 goto out;
7698         }
7699 
7700         if (cs->vp->v_type != VREG) {
7701                 *cs->statusp = resp->status = NFS4ERR_INVAL;
7702                 return;
7703         }
7704 
7705         status = rfs4_get_state(&args->open_stateid, &sp, RFS4_DBS_VALID);
7706         if (status != NFS4_OK) {
7707                 *cs->statusp = resp->status = status;
7708                 goto out;
7709         }
7710 
7711         /* Ensure specified filehandle matches */
7712         if (cs->vp != sp->rs_finfo->rf_vp) {
7713                 rfs4_state_rele(sp);
7714                 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7715                 goto out;
7716         }
7717 
7718         /* hold off other access to open_owner while we tinker */
7719         rfs4_sw_enter(&sp->rs_owner->ro_sw);
7720 
7721         switch (rfs4_check_stateid_seqid(sp, &args->open_stateid)) {
7722         case NFS4_CHECK_STATEID_OKAY:
7723                 if (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
7724                     resop) != NFS4_CHKSEQ_OKAY) {
7725                         *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
7726                         goto end;
7727                 }
7728                 break;
7729         case NFS4_CHECK_STATEID_OLD:
7730                 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
7731                 goto end;
7732         case NFS4_CHECK_STATEID_BAD:
7733                 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7734                 goto end;
7735         case NFS4_CHECK_STATEID_EXPIRED:
7736                 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
7737                 goto end;
7738         case NFS4_CHECK_STATEID_CLOSED:
7739                 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
7740                 goto end;
7741         case NFS4_CHECK_STATEID_UNCONFIRMED:
7742                 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7743                 goto end;
7744         case NFS4_CHECK_STATEID_REPLAY:
7745                 /* Check the sequence id for the open owner */
7746                 switch (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
7747                     resop)) {
7748                 case NFS4_CHKSEQ_OKAY:
7749                         /*
7750                          * This is replayed stateid; if seqid matches
7751                          * next expected, then client is using wrong seqid.
7752                          */
7753                         /* fall through */
7754                 case NFS4_CHKSEQ_BAD:
7755                         *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
7756                         goto end;
7757                 case NFS4_CHKSEQ_REPLAY:
7758                         /*
7759                          * Note this case is the duplicate case so
7760                          * resp->status is already set.
7761                          */
7762                         *cs->statusp = resp->status;
7763                         rfs4_update_lease(sp->rs_owner->ro_client);
7764                         goto end;
7765                 }
7766                 break;
7767         default:
7768                 ASSERT(FALSE);
7769                 break;
7770         }
7771 
7772         rfs4_dbe_lock(sp->rs_dbe);
7773         /*
7774          * Check that the new access modes and deny modes are valid.
7775          * Check that no invalid bits are set.
7776          */
7777         if ((access & ~(OPEN4_SHARE_ACCESS_READ | OPEN4_SHARE_ACCESS_WRITE)) ||
7778             (deny & ~(OPEN4_SHARE_DENY_READ | OPEN4_SHARE_DENY_WRITE))) {
7779                 *cs->statusp = resp->status = NFS4ERR_INVAL;
7780                 rfs4_update_open_sequence(sp->rs_owner);
7781                 rfs4_dbe_unlock(sp->rs_dbe);
7782                 goto end;
7783         }
7784 
7785         /*
7786          * The new modes must be a subset of the current modes and
7787          * the access must specify at least one mode. To test that
7788          * the new mode is a subset of the current modes we bitwise
7789          * AND them together and check that the result equals the new
7790          * mode. For example:
7791          * New mode, access == R and current mode, sp->rs_open_access  == RW
7792          * access & sp->rs_open_access == R == access, so the new access mode
7793          * is valid. Consider access == RW, sp->rs_open_access = R
7794          * access & sp->rs_open_access == R != access, so the new access mode
7795          * is invalid.
7796          */
7797         if ((access & sp->rs_open_access) != access ||
7798             (deny & sp->rs_open_deny) != deny ||
7799             (access &
7800             (OPEN4_SHARE_ACCESS_READ | OPEN4_SHARE_ACCESS_WRITE)) == 0) {
7801                 *cs->statusp = resp->status = NFS4ERR_INVAL;
7802                 rfs4_update_open_sequence(sp->rs_owner);
7803                 rfs4_dbe_unlock(sp->rs_dbe);
7804                 goto end;
7805         }
7806 
7807         /*
7808          * Release any share locks associated with this stateID.
7809          * Strictly speaking, this violates the spec because the
7810          * spec effectively requires that open downgrade be atomic.
7811          * At present, fs_shrlock does not have this capability.
7812          */
7813         (void) rfs4_unshare(sp);
7814 
7815         status = rfs4_share(sp, access, deny);
7816         if (status != NFS4_OK) {
7817                 *cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
7818                 rfs4_update_open_sequence(sp->rs_owner);
7819                 rfs4_dbe_unlock(sp->rs_dbe);
7820                 goto end;
7821         }
7822 
7823         fp = sp->rs_finfo;
7824         rfs4_dbe_lock(fp->rf_dbe);
7825 
7826         /*
7827          * If the current mode has deny read and the new mode
7828          * does not, decrement the number of deny read mode bits
7829          * and if it goes to zero turn off the deny read bit
7830          * on the file.
7831          */
7832         if ((sp->rs_open_deny & OPEN4_SHARE_DENY_READ) &&
7833             (deny & OPEN4_SHARE_DENY_READ) == 0) {
7834                 fp->rf_deny_read--;
7835                 if (fp->rf_deny_read == 0)
7836                         fp->rf_share_deny &= ~OPEN4_SHARE_DENY_READ;
7837         }
7838 
7839         /*
7840          * If the current mode has deny write and the new mode
7841          * does not, decrement the number of deny write mode bits
7842          * and if it goes to zero turn off the deny write bit
7843          * on the file.
7844          */
7845         if ((sp->rs_open_deny & OPEN4_SHARE_DENY_WRITE) &&
7846             (deny & OPEN4_SHARE_DENY_WRITE) == 0) {
7847                 fp->rf_deny_write--;
7848                 if (fp->rf_deny_write == 0)
7849                         fp->rf_share_deny &= ~OPEN4_SHARE_DENY_WRITE;
7850         }
7851 
7852         /*
7853          * If the current mode has access read and the new mode
7854          * does not, decrement the number of access read mode bits
7855          * and if it goes to zero turn off the access read bit
7856          * on the file.  set fflags to FREAD for the call to
7857          * vn_open_downgrade().
7858          */
7859         if ((sp->rs_open_access & OPEN4_SHARE_ACCESS_READ) &&
7860             (access & OPEN4_SHARE_ACCESS_READ) == 0) {
7861                 fp->rf_access_read--;
7862                 if (fp->rf_access_read == 0)
7863                         fp->rf_share_access &= ~OPEN4_SHARE_ACCESS_READ;
7864                 fflags |= FREAD;
7865         }
7866 
7867         /*
7868          * If the current mode has access write and the new mode
7869          * does not, decrement the number of access write mode bits
7870          * and if it goes to zero turn off the access write bit
7871          * on the file.  set fflags to FWRITE for the call to
7872          * vn_open_downgrade().
7873          */
7874         if ((sp->rs_open_access & OPEN4_SHARE_ACCESS_WRITE) &&
7875             (access & OPEN4_SHARE_ACCESS_WRITE) == 0) {
7876                 fp->rf_access_write--;
7877                 if (fp->rf_access_write == 0)
7878                         fp->rf_share_deny &= ~OPEN4_SHARE_ACCESS_WRITE;
7879                 fflags |= FWRITE;
7880         }
7881 
7882         /* Check that the file is still accessible */
7883         ASSERT(fp->rf_share_access);
7884 
7885         rfs4_dbe_unlock(fp->rf_dbe);
7886 
7887         /* now set the new open access and deny modes */
7888         sp->rs_open_access = access;
7889         sp->rs_open_deny = deny;
7890 
7891         /*
7892          * we successfully downgraded the share lock, now we need to downgrade
7893          * the open. it is possible that the downgrade was only for a deny
7894          * mode and we have nothing else to do.
7895          */
7896         if ((fflags & (FREAD|FWRITE)) != 0)
7897                 vn_open_downgrade(cs->vp, fflags);
7898 
7899         /* Update the stateid */
7900         next_stateid(&sp->rs_stateid);
7901         resp->open_stateid = sp->rs_stateid.stateid;
7902 
7903         rfs4_dbe_unlock(sp->rs_dbe);
7904 
7905         *cs->statusp = resp->status = NFS4_OK;
7906         /* Update the lease */
7907         rfs4_update_lease(sp->rs_owner->ro_client);
7908         /* And the sequence */
7909         rfs4_update_open_sequence(sp->rs_owner);
7910         rfs4_update_open_resp(sp->rs_owner, resop, NULL);
7911 
7912 end:
7913         rfs4_sw_exit(&sp->rs_owner->ro_sw);
7914         rfs4_state_rele(sp);
7915 out:
7916         DTRACE_NFSV4_2(op__open__downgrade__done, struct compound_state *, cs,
7917             OPEN_DOWNGRADE4res *, resp);
7918 }
7919 
7920 static void *
7921 memstr(const void *s1, const char *s2, size_t n)
7922 {
7923         size_t l = strlen(s2);
7924         char *p = (char *)s1;
7925 
7926         while (n >= l) {
7927                 if (bcmp(p, s2, l) == 0)
7928                         return (p);
7929                 p++;
7930                 n--;
7931         }
7932 
7933         return (NULL);
7934 }
7935 
7936 /*
7937  * The logic behind this function is detailed in the NFSv4 RFC in the
7938  * SETCLIENTID operation description under IMPLEMENTATION.  Refer to
7939  * that section for explicit guidance to server behavior for
7940  * SETCLIENTID.
7941  */
7942 void
7943 rfs4_op_setclientid(nfs_argop4 *argop, nfs_resop4 *resop,
7944     struct svc_req *req, struct compound_state *cs)
7945 {
7946         SETCLIENTID4args *args = &argop->nfs_argop4_u.opsetclientid;
7947         SETCLIENTID4res *res = &resop->nfs_resop4_u.opsetclientid;
7948         rfs4_client_t *cp, *newcp, *cp_confirmed, *cp_unconfirmed;
7949         rfs4_clntip_t *ci;
7950         bool_t create;
7951         char *addr, *netid;
7952         int len;
7953 
7954         DTRACE_NFSV4_2(op__setclientid__start, struct compound_state *, cs,
7955             SETCLIENTID4args *, args);
7956 retry:
7957         newcp = cp_confirmed = cp_unconfirmed = NULL;
7958 
7959         /*
7960          * Save the caller's IP address
7961          */
7962         args->client.cl_addr =
7963             (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
7964 
7965         /*
7966          * Record if it is a Solaris client that cannot handle referrals.
7967          */
7968         if (memstr(args->client.id_val, "Solaris", args->client.id_len) &&
7969             !memstr(args->client.id_val, "+referrals", args->client.id_len)) {
7970                 /* Add a "yes, it's downrev" record */
7971                 create = TRUE;
7972                 ci = rfs4_find_clntip(args->client.cl_addr, &create);
7973                 ASSERT(ci != NULL);
7974                 rfs4_dbe_rele(ci->ri_dbe);
7975         } else {
7976                 /* Remove any previous record */
7977                 rfs4_invalidate_clntip(args->client.cl_addr);
7978         }
7979 
7980         /*
7981          * In search of an EXISTING client matching the incoming
7982          * request to establish a new client identifier at the server
7983          */
7984         create = TRUE;
7985         cp = rfs4_findclient(&args->client, &create, NULL);
7986 
7987         /* Should never happen */
7988         ASSERT(cp != NULL);
7989 
7990         if (cp == NULL) {
7991                 *cs->statusp = res->status = NFS4ERR_SERVERFAULT;
7992                 goto out;
7993         }
7994 
7995         /*
7996          * Easiest case. Client identifier is newly created and is
7997          * unconfirmed.  Also note that for this case, no other
7998          * entries exist for the client identifier.  Nothing else to
7999          * check.  Just setup the response and respond.
8000          */
8001         if (create) {
8002                 *cs->statusp = res->status = NFS4_OK;
8003                 res->SETCLIENTID4res_u.resok4.clientid = cp->rc_clientid;
8004                 res->SETCLIENTID4res_u.resok4.setclientid_confirm =
8005                     cp->rc_confirm_verf;
8006                 /* Setup callback information; CB_NULL confirmation later */
8007                 rfs4_client_setcb(cp, &args->callback, args->callback_ident);
8008 
8009                 rfs4_client_rele(cp);
8010                 goto out;
8011         }
8012 
8013         /*
8014          * An existing, confirmed client may exist but it may not have
8015          * been active for at least one lease period.  If so, then
8016          * "close" the client and create a new client identifier
8017          */
8018         if (rfs4_lease_expired(cp)) {
8019                 rfs4_client_close(cp);
8020                 goto retry;
8021         }
8022 
8023         if (cp->rc_need_confirm == TRUE)
8024                 cp_unconfirmed = cp;
8025         else
8026                 cp_confirmed = cp;
8027 
8028         cp = NULL;
8029 
8030         /*
8031          * We have a confirmed client, now check for an
8032          * unconfimred entry
8033          */
8034         if (cp_confirmed) {
8035                 /* If creds don't match then client identifier is inuse */
8036                 if (!creds_ok(cp_confirmed->rc_cr_set, req, cs)) {
8037                         rfs4_cbinfo_t *cbp;
8038                         /*
8039                          * Some one else has established this client
8040                          * id. Try and say * who they are. We will use
8041                          * the call back address supplied by * the
8042                          * first client.
8043                          */
8044                         *cs->statusp = res->status = NFS4ERR_CLID_INUSE;
8045 
8046                         addr = netid = NULL;
8047 
8048                         cbp = &cp_confirmed->rc_cbinfo;
8049                         if (cbp->cb_callback.cb_location.r_addr &&
8050                             cbp->cb_callback.cb_location.r_netid) {
8051                                 cb_client4 *cbcp = &cbp->cb_callback;
8052 
8053                                 len = strlen(cbcp->cb_location.r_addr)+1;
8054                                 addr = kmem_alloc(len, KM_SLEEP);
8055                                 bcopy(cbcp->cb_location.r_addr, addr, len);
8056                                 len = strlen(cbcp->cb_location.r_netid)+1;
8057                                 netid = kmem_alloc(len, KM_SLEEP);
8058                                 bcopy(cbcp->cb_location.r_netid, netid, len);
8059                         }
8060 
8061                         res->SETCLIENTID4res_u.client_using.r_addr = addr;
8062                         res->SETCLIENTID4res_u.client_using.r_netid = netid;
8063 
8064                         rfs4_client_rele(cp_confirmed);
8065                 }
8066 
8067                 /*
8068                  * Confirmed, creds match, and verifier matches; must
8069                  * be an update of the callback info
8070                  */
8071                 if (cp_confirmed->rc_nfs_client.verifier ==
8072                     args->client.verifier) {
8073                         /* Setup callback information */
8074                         rfs4_client_setcb(cp_confirmed, &args->callback,
8075                             args->callback_ident);
8076 
8077                         /* everything okay -- move ahead */
8078                         *cs->statusp = res->status = NFS4_OK;
8079                         res->SETCLIENTID4res_u.resok4.clientid =
8080                             cp_confirmed->rc_clientid;
8081 
8082                         /* update the confirm_verifier and return it */
8083                         rfs4_client_scv_next(cp_confirmed);
8084                         res->SETCLIENTID4res_u.resok4.setclientid_confirm =
8085                             cp_confirmed->rc_confirm_verf;
8086 
8087                         rfs4_client_rele(cp_confirmed);
8088                         goto out;
8089                 }
8090 
8091                 /*
8092                  * Creds match but the verifier doesn't.  Must search
8093                  * for an unconfirmed client that would be replaced by
8094                  * this request.
8095                  */
8096                 create = FALSE;
8097                 cp_unconfirmed = rfs4_findclient(&args->client, &create,
8098                     cp_confirmed);
8099         }
8100 
8101         /*
8102          * At this point, we have taken care of the brand new client
8103          * struct, INUSE case, update of an existing, and confirmed
8104          * client struct.
8105          */
8106 
8107         /*
8108          * check to see if things have changed while we originally
8109          * picked up the client struct.  If they have, then return and
8110          * retry the processing of this SETCLIENTID request.
8111          */
8112         if (cp_unconfirmed) {
8113                 rfs4_dbe_lock(cp_unconfirmed->rc_dbe);
8114                 if (!cp_unconfirmed->rc_need_confirm) {
8115                         rfs4_dbe_unlock(cp_unconfirmed->rc_dbe);
8116                         rfs4_client_rele(cp_unconfirmed);
8117                         if (cp_confirmed)
8118                                 rfs4_client_rele(cp_confirmed);
8119                         goto retry;
8120                 }
8121                 /* do away with the old unconfirmed one */
8122                 rfs4_dbe_invalidate(cp_unconfirmed->rc_dbe);
8123                 rfs4_dbe_unlock(cp_unconfirmed->rc_dbe);
8124                 rfs4_client_rele(cp_unconfirmed);
8125                 cp_unconfirmed = NULL;
8126         }
8127 
8128         /*
8129          * This search will temporarily hide the confirmed client
8130          * struct while a new client struct is created as the
8131          * unconfirmed one.
8132          */
8133         create = TRUE;
8134         newcp = rfs4_findclient(&args->client, &create, cp_confirmed);
8135 
8136         ASSERT(newcp != NULL);
8137 
8138         if (newcp == NULL) {
8139                 *cs->statusp = res->status = NFS4ERR_SERVERFAULT;
8140                 rfs4_client_rele(cp_confirmed);
8141                 goto out;
8142         }
8143 
8144         /*
8145          * If one was not created, then a similar request must be in
8146          * process so release and start over with this one
8147          */
8148         if (create != TRUE) {
8149                 rfs4_client_rele(newcp);
8150                 if (cp_confirmed)
8151                         rfs4_client_rele(cp_confirmed);
8152                 goto retry;
8153         }
8154 
8155         *cs->statusp = res->status = NFS4_OK;
8156         res->SETCLIENTID4res_u.resok4.clientid = newcp->rc_clientid;
8157         res->SETCLIENTID4res_u.resok4.setclientid_confirm =
8158             newcp->rc_confirm_verf;
8159         /* Setup callback information; CB_NULL confirmation later */
8160         rfs4_client_setcb(newcp, &args->callback, args->callback_ident);
8161 
8162         newcp->rc_cp_confirmed = cp_confirmed;
8163 
8164         rfs4_client_rele(newcp);
8165 
8166 out:
8167         DTRACE_NFSV4_2(op__setclientid__done, struct compound_state *, cs,
8168             SETCLIENTID4res *, res);
8169 }
8170 
8171 /*ARGSUSED*/
8172 void
8173 rfs4_op_setclientid_confirm(nfs_argop4 *argop, nfs_resop4 *resop,
8174     struct svc_req *req, struct compound_state *cs)
8175 {
8176         SETCLIENTID_CONFIRM4args *args =
8177             &argop->nfs_argop4_u.opsetclientid_confirm;
8178         SETCLIENTID_CONFIRM4res *res =
8179             &resop->nfs_resop4_u.opsetclientid_confirm;
8180         rfs4_client_t *cp, *cptoclose = NULL;
8181 
8182         DTRACE_NFSV4_2(op__setclientid__confirm__start,
8183             struct compound_state *, cs,
8184             SETCLIENTID_CONFIRM4args *, args);
8185 
8186         *cs->statusp = res->status = NFS4_OK;
8187 
8188         cp = rfs4_findclient_by_id(args->clientid, TRUE);
8189 
8190         if (cp == NULL) {
8191                 *cs->statusp = res->status =
8192                     rfs4_check_clientid(&args->clientid, 1);
8193                 goto out;
8194         }
8195 
8196         if (!creds_ok(cp, req, cs)) {
8197                 *cs->statusp = res->status = NFS4ERR_CLID_INUSE;
8198                 rfs4_client_rele(cp);
8199                 goto out;
8200         }
8201 
8202         /* If the verifier doesn't match, the record doesn't match */
8203         if (cp->rc_confirm_verf != args->setclientid_confirm) {
8204                 *cs->statusp = res->status = NFS4ERR_STALE_CLIENTID;
8205                 rfs4_client_rele(cp);
8206                 goto out;
8207         }
8208 
8209         rfs4_dbe_lock(cp->rc_dbe);
8210         cp->rc_need_confirm = FALSE;
8211         if (cp->rc_cp_confirmed) {
8212                 cptoclose = cp->rc_cp_confirmed;
8213                 cptoclose->rc_ss_remove = 1;
8214                 cp->rc_cp_confirmed = NULL;
8215         }
8216 
8217         /*
8218          * Update the client's associated server instance, if it's changed
8219          * since the client was created.
8220          */
8221         if (rfs4_servinst(cp) != rfs4_cur_servinst)
8222                 rfs4_servinst_assign(cp, rfs4_cur_servinst);
8223 
8224         /*
8225          * Record clientid in stable storage.
8226          * Must be done after server instance has been assigned.
8227          */
8228         rfs4_ss_clid(cp);
8229 
8230         rfs4_dbe_unlock(cp->rc_dbe);
8231 
8232         if (cptoclose)
8233                 /* don't need to rele, client_close does it */
8234                 rfs4_client_close(cptoclose);
8235 
8236         /* If needed, initiate CB_NULL call for callback path */
8237         rfs4_deleg_cb_check(cp);
8238         rfs4_update_lease(cp);
8239 
8240         /*
8241          * Check to see if client can perform reclaims
8242          */
8243         rfs4_ss_chkclid(cp);
8244 
8245         rfs4_client_rele(cp);
8246 
8247 out:
8248         DTRACE_NFSV4_2(op__setclientid__confirm__done,
8249             struct compound_state *, cs,
8250             SETCLIENTID_CONFIRM4 *, res);
8251 }
8252 
8253 
8254 /*ARGSUSED*/
8255 void
8256 rfs4_op_close(nfs_argop4 *argop, nfs_resop4 *resop,
8257     struct svc_req *req, struct compound_state *cs)
8258 {
8259         CLOSE4args *args = &argop->nfs_argop4_u.opclose;
8260         CLOSE4res *resp = &resop->nfs_resop4_u.opclose;
8261         rfs4_state_t *sp;
8262         nfsstat4 status;
8263 
8264         DTRACE_NFSV4_2(op__close__start, struct compound_state *, cs,
8265             CLOSE4args *, args);
8266 
8267         if (cs->vp == NULL) {
8268                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
8269                 goto out;
8270         }
8271 
8272         status = rfs4_get_state(&args->open_stateid, &sp, RFS4_DBS_INVALID);
8273         if (status != NFS4_OK) {
8274                 *cs->statusp = resp->status = status;
8275                 goto out;
8276         }
8277 
8278         /* Ensure specified filehandle matches */
8279         if (cs->vp != sp->rs_finfo->rf_vp) {
8280                 rfs4_state_rele(sp);
8281                 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8282                 goto out;
8283         }
8284 
8285         /* hold off other access to open_owner while we tinker */
8286         rfs4_sw_enter(&sp->rs_owner->ro_sw);
8287 
8288         switch (rfs4_check_stateid_seqid(sp, &args->open_stateid)) {
8289         case NFS4_CHECK_STATEID_OKAY:
8290                 if (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
8291                     resop) != NFS4_CHKSEQ_OKAY) {
8292                         *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
8293                         goto end;
8294                 }
8295                 break;
8296         case NFS4_CHECK_STATEID_OLD:
8297                 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
8298                 goto end;
8299         case NFS4_CHECK_STATEID_BAD:
8300                 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8301                 goto end;
8302         case NFS4_CHECK_STATEID_EXPIRED:
8303                 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
8304                 goto end;
8305         case NFS4_CHECK_STATEID_CLOSED:
8306                 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
8307                 goto end;
8308         case NFS4_CHECK_STATEID_UNCONFIRMED:
8309                 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8310                 goto end;
8311         case NFS4_CHECK_STATEID_REPLAY:
8312                 /* Check the sequence id for the open owner */
8313                 switch (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
8314                     resop)) {
8315                 case NFS4_CHKSEQ_OKAY:
8316                         /*
8317                          * This is replayed stateid; if seqid matches
8318                          * next expected, then client is using wrong seqid.
8319                          */
8320                         /* FALL THROUGH */
8321                 case NFS4_CHKSEQ_BAD:
8322                         *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
8323                         goto end;
8324                 case NFS4_CHKSEQ_REPLAY:
8325                         /*
8326                          * Note this case is the duplicate case so
8327                          * resp->status is already set.
8328                          */
8329                         *cs->statusp = resp->status;
8330                         rfs4_update_lease(sp->rs_owner->ro_client);
8331                         goto end;
8332                 }
8333                 break;
8334         default:
8335                 ASSERT(FALSE);
8336                 break;
8337         }
8338 
8339         rfs4_dbe_lock(sp->rs_dbe);
8340 
8341         /* Update the stateid. */
8342         next_stateid(&sp->rs_stateid);
8343         resp->open_stateid = sp->rs_stateid.stateid;
8344 
8345         rfs4_dbe_unlock(sp->rs_dbe);
8346 
8347         rfs4_update_lease(sp->rs_owner->ro_client);
8348         rfs4_update_open_sequence(sp->rs_owner);
8349         rfs4_update_open_resp(sp->rs_owner, resop, NULL);
8350 
8351         rfs4_state_close(sp, FALSE, FALSE, cs->cr);
8352 
8353         *cs->statusp = resp->status = status;
8354 
8355 end:
8356         rfs4_sw_exit(&sp->rs_owner->ro_sw);
8357         rfs4_state_rele(sp);
8358 out:
8359         DTRACE_NFSV4_2(op__close__done, struct compound_state *, cs,
8360             CLOSE4res *, resp);
8361 }
8362 
8363 /*
8364  * Manage the counts on the file struct and close all file locks
8365  */
8366 /*ARGSUSED*/
8367 void
8368 rfs4_release_share_lock_state(rfs4_state_t *sp, cred_t *cr,
8369     bool_t close_of_client)
8370 {
8371         rfs4_file_t *fp = sp->rs_finfo;
8372         rfs4_lo_state_t *lsp;
8373         int fflags = 0;
8374 
8375         /*
8376          * If this call is part of the larger closing down of client
8377          * state then it is just easier to release all locks
8378          * associated with this client instead of going through each
8379          * individual file and cleaning locks there.
8380          */
8381         if (close_of_client) {
8382                 if (sp->rs_owner->ro_client->rc_unlksys_completed == FALSE &&
8383                     !list_is_empty(&sp->rs_lostatelist) &&
8384                     sp->rs_owner->ro_client->rc_sysidt != LM_NOSYSID) {
8385                         /* Is the PxFS kernel module loaded? */
8386                         if (lm_remove_file_locks != NULL) {
8387                                 int new_sysid;
8388 
8389                                 /* Encode the cluster nodeid in new sysid */
8390                                 new_sysid = sp->rs_owner->ro_client->rc_sysidt;
8391                                 lm_set_nlmid_flk(&new_sysid);
8392 
8393                                 /*
8394                                  * This PxFS routine removes file locks for a
8395                                  * client over all nodes of a cluster.
8396                                  */
8397                                 NFS4_DEBUG(rfs4_debug, (CE_NOTE,
8398                                     "lm_remove_file_locks(sysid=0x%x)\n",
8399                                     new_sysid));
8400                                 (*lm_remove_file_locks)(new_sysid);
8401                         } else {
8402                                 struct flock64 flk;
8403 
8404                                 /* Release all locks for this client */
8405                                 flk.l_type = F_UNLKSYS;
8406                                 flk.l_whence = 0;
8407                                 flk.l_start = 0;
8408                                 flk.l_len = 0;
8409                                 flk.l_sysid =
8410                                     sp->rs_owner->ro_client->rc_sysidt;
8411                                 flk.l_pid = 0;
8412                                 (void) VOP_FRLOCK(sp->rs_finfo->rf_vp, F_SETLK,
8413                                     &flk, F_REMOTELOCK | FREAD | FWRITE,
8414                                     (u_offset_t)0, NULL, CRED(), NULL);
8415                         }
8416 
8417                         sp->rs_owner->ro_client->rc_unlksys_completed = TRUE;
8418                 }
8419         }
8420 
8421         /*
8422          * Release all locks on this file by this lock owner or at
8423          * least mark the locks as having been released
8424          */
8425         for (lsp = list_head(&sp->rs_lostatelist); lsp != NULL;
8426             lsp = list_next(&sp->rs_lostatelist, lsp)) {
8427                 lsp->rls_locks_cleaned = TRUE;
8428 
8429                 /* Was this already taken care of above? */
8430                 if (!close_of_client &&
8431                     sp->rs_owner->ro_client->rc_sysidt != LM_NOSYSID)
8432                         (void) cleanlocks(sp->rs_finfo->rf_vp,
8433                             lsp->rls_locker->rl_pid,
8434                             lsp->rls_locker->rl_client->rc_sysidt);
8435         }
8436 
8437         /*
8438          * Release any shrlocks associated with this open state ID.
8439          * This must be done before the rfs4_state gets marked closed.
8440          */
8441         if (sp->rs_owner->ro_client->rc_sysidt != LM_NOSYSID)
8442                 (void) rfs4_unshare(sp);
8443 
8444         if (sp->rs_open_access) {
8445                 rfs4_dbe_lock(fp->rf_dbe);
8446 
8447                 /*
8448                  * Decrement the count for each access and deny bit that this
8449                  * state has contributed to the file.
8450                  * If the file counts go to zero
8451                  * clear the appropriate bit in the appropriate mask.
8452                  */
8453                 if (sp->rs_open_access & OPEN4_SHARE_ACCESS_READ) {
8454                         fp->rf_access_read--;
8455                         fflags |= FREAD;
8456                         if (fp->rf_access_read == 0)
8457                                 fp->rf_share_access &= ~OPEN4_SHARE_ACCESS_READ;
8458                 }
8459                 if (sp->rs_open_access & OPEN4_SHARE_ACCESS_WRITE) {
8460                         fp->rf_access_write--;
8461                         fflags |= FWRITE;
8462                         if (fp->rf_access_write == 0)
8463                                 fp->rf_share_access &=
8464                                     ~OPEN4_SHARE_ACCESS_WRITE;
8465                 }
8466                 if (sp->rs_open_deny & OPEN4_SHARE_DENY_READ) {
8467                         fp->rf_deny_read--;
8468                         if (fp->rf_deny_read == 0)
8469                                 fp->rf_share_deny &= ~OPEN4_SHARE_DENY_READ;
8470                 }
8471                 if (sp->rs_open_deny & OPEN4_SHARE_DENY_WRITE) {
8472                         fp->rf_deny_write--;
8473                         if (fp->rf_deny_write == 0)
8474                                 fp->rf_share_deny &= ~OPEN4_SHARE_DENY_WRITE;
8475                 }
8476 
8477                 (void) VOP_CLOSE(fp->rf_vp, fflags, 1, (offset_t)0, cr, NULL);
8478 
8479                 rfs4_dbe_unlock(fp->rf_dbe);
8480 
8481                 sp->rs_open_access = 0;
8482                 sp->rs_open_deny = 0;
8483         }
8484 }
8485 
8486 /*
8487  * lock_denied: Fill in a LOCK4deneid structure given an flock64 structure.
8488  */
8489 static nfsstat4
8490 lock_denied(LOCK4denied *dp, struct flock64 *flk)
8491 {
8492         rfs4_lockowner_t *lo;
8493         rfs4_client_t *cp;
8494         uint32_t len;
8495 
8496         lo = rfs4_findlockowner_by_pid(flk->l_pid);
8497         if (lo != NULL) {
8498                 cp = lo->rl_client;
8499                 if (rfs4_lease_expired(cp)) {
8500                         rfs4_lockowner_rele(lo);
8501                         rfs4_dbe_hold(cp->rc_dbe);
8502                         rfs4_client_close(cp);
8503                         return (NFS4ERR_EXPIRED);
8504                 }
8505                 dp->owner.clientid = lo->rl_owner.clientid;
8506                 len = lo->rl_owner.owner_len;
8507                 dp->owner.owner_val = kmem_alloc(len, KM_SLEEP);
8508                 bcopy(lo->rl_owner.owner_val, dp->owner.owner_val, len);
8509                 dp->owner.owner_len = len;
8510                 rfs4_lockowner_rele(lo);
8511                 goto finish;
8512         }
8513 
8514         /*
8515          * Its not a NFS4 lock. We take advantage that the upper 32 bits
8516          * of the client id contain the boot time for a NFS4 lock. So we
8517          * fabricate and identity by setting clientid to the sysid, and
8518          * the lock owner to the pid.
8519          */
8520         dp->owner.clientid = flk->l_sysid;
8521         len = sizeof (pid_t);
8522         dp->owner.owner_len = len;
8523         dp->owner.owner_val = kmem_alloc(len, KM_SLEEP);
8524         bcopy(&flk->l_pid, dp->owner.owner_val, len);
8525 finish:
8526         dp->offset = flk->l_start;
8527         dp->length = flk->l_len;
8528 
8529         if (flk->l_type == F_RDLCK)
8530                 dp->locktype = READ_LT;
8531         else if (flk->l_type == F_WRLCK)
8532                 dp->locktype = WRITE_LT;
8533         else
8534                 return (NFS4ERR_INVAL); /* no mapping from POSIX ltype to v4 */
8535 
8536         return (NFS4_OK);
8537 }
8538 
8539 /*
8540  * The NFSv4.0 LOCK operation does not support the blocking lock (at the
8541  * NFSv4.0 protocol level) so the client needs to resend the LOCK request in a
8542  * case the lock is denied by the NFSv4.0 server.  NFSv4.0 clients are prepared
8543  * for that (obviously); they are sending the LOCK requests with some delays
8544  * between the attempts.  See nfs4frlock() and nfs4_block_and_wait() for the
8545  * locking and delay implementation at the client side.
8546  *
8547  * To make the life of the clients easier, the NFSv4.0 server tries to do some
8548  * fast retries on its own (the for loop below) in a hope the lock will be
8549  * available soon.  And if not, the client won't need to resend the LOCK
8550  * requests so fast to check the lock availability.  This basically saves some
8551  * network traffic and tries to make sure the client gets the lock ASAP.
8552  */
8553 static int
8554 setlock(vnode_t *vp, struct flock64 *flock, int flag, cred_t *cred)
8555 {
8556         int error;
8557         struct flock64 flk;
8558         int i;
8559         clock_t delaytime;
8560         int cmd;
8561         int spin_cnt = 0;
8562 
8563         cmd = nbl_need_check(vp) ? F_SETLK_NBMAND : F_SETLK;
8564 retry:
8565         delaytime = MSEC_TO_TICK_ROUNDUP(rfs4_lock_delay);
8566 
8567         for (i = 0; i < rfs4_maxlock_tries; i++) {
8568                 LOCK_PRINT(rfs4_debug, "setlock", cmd, flock);
8569                 error = VOP_FRLOCK(vp, cmd,
8570                     flock, flag, (u_offset_t)0, NULL, cred, NULL);
8571 
8572                 if (error != EAGAIN && error != EACCES)
8573                         break;
8574 
8575                 if (i < rfs4_maxlock_tries - 1) {
8576                         delay(delaytime);
8577                         delaytime *= 2;
8578                 }
8579         }
8580 
8581         if (error == EAGAIN || error == EACCES) {
8582                 /* Get the owner of the lock */
8583                 flk = *flock;
8584                 LOCK_PRINT(rfs4_debug, "setlock", F_GETLK, &flk);
8585                 if (VOP_FRLOCK(vp, F_GETLK, &flk, flag, 0, NULL, cred,
8586                     NULL) == 0) {
8587                         /*
8588                          * There's a race inherent in the current VOP_FRLOCK
8589                          * design where:
8590                          * a: "other guy" takes a lock that conflicts with a
8591                          * lock we want
8592                          * b: we attempt to take our lock (non-blocking) and
8593                          * the attempt fails.
8594                          * c: "other guy" releases the conflicting lock
8595                          * d: we ask what lock conflicts with the lock we want,
8596                          * getting F_UNLCK (no lock blocks us)
8597                          *
8598                          * If we retry the non-blocking lock attempt in this
8599                          * case (restart at step 'b') there's some possibility
8600                          * that many such attempts might fail.  However a test
8601                          * designed to actually provoke this race shows that
8602                          * the vast majority of cases require no retry, and
8603                          * only a few took as many as three retries.  Here's
8604                          * the test outcome:
8605                          *
8606                          *         number of retries    how many times we needed
8607                          *                              that many retries
8608                          *         0                    79461
8609                          *         1                      862
8610                          *         2                       49
8611                          *         3                        5
8612                          *
8613                          * Given those empirical results, we arbitrarily limit
8614                          * the retry count to ten.
8615                          *
8616                          * If we actually make to ten retries and give up,
8617                          * nothing catastrophic happens, but we're unable to
8618                          * return the information about the conflicting lock to
8619                          * the NFS client.  That's an acceptable trade off vs.
8620                          * letting this retry loop run forever.
8621                          */
8622                         if (flk.l_type == F_UNLCK) {
8623                                 if (spin_cnt++ < 10) {
8624                                         /* No longer locked, retry */
8625                                         goto retry;
8626                                 }
8627                         } else {
8628                                 *flock = flk;
8629                                 LOCK_PRINT(rfs4_debug, "setlock(blocking lock)",
8630                                     F_GETLK, &flk);
8631                         }
8632                 }
8633         }
8634 
8635         return (error);
8636 }
8637 
8638 /*ARGSUSED*/
8639 static nfsstat4
8640 rfs4_do_lock(rfs4_lo_state_t *lsp, nfs_lock_type4 locktype,
8641     offset4 offset, length4 length, cred_t *cred, nfs_resop4 *resop)
8642 {
8643         nfsstat4 status;
8644         rfs4_lockowner_t *lo = lsp->rls_locker;
8645         rfs4_state_t *sp = lsp->rls_state;
8646         struct flock64 flock;
8647         int16_t ltype;
8648         int flag;
8649         int error;
8650         sysid_t sysid;
8651         LOCK4res *lres;
8652         vnode_t *vp;
8653 
8654         if (rfs4_lease_expired(lo->rl_client)) {
8655                 return (NFS4ERR_EXPIRED);
8656         }
8657 
8658         if ((status = rfs4_client_sysid(lo->rl_client, &sysid)) != NFS4_OK)
8659                 return (status);
8660 
8661         /* Check for zero length. To lock to end of file use all ones for V4 */
8662         if (length == 0)
8663                 return (NFS4ERR_INVAL);
8664         else if (length == (length4)(~0))
8665                 length = 0;             /* Posix to end of file  */
8666 
8667 retry:
8668         rfs4_dbe_lock(sp->rs_dbe);
8669         if (sp->rs_closed == TRUE) {
8670                 rfs4_dbe_unlock(sp->rs_dbe);
8671                 return (NFS4ERR_OLD_STATEID);
8672         }
8673 
8674         if (resop->resop != OP_LOCKU) {
8675                 switch (locktype) {
8676                 case READ_LT:
8677                 case READW_LT:
8678                         if ((sp->rs_share_access
8679                             & OPEN4_SHARE_ACCESS_READ) == 0) {
8680                                 rfs4_dbe_unlock(sp->rs_dbe);
8681 
8682                                 return (NFS4ERR_OPENMODE);
8683                         }
8684                         ltype = F_RDLCK;
8685                         break;
8686                 case WRITE_LT:
8687                 case WRITEW_LT:
8688                         if ((sp->rs_share_access
8689                             & OPEN4_SHARE_ACCESS_WRITE) == 0) {
8690                                 rfs4_dbe_unlock(sp->rs_dbe);
8691 
8692                                 return (NFS4ERR_OPENMODE);
8693                         }
8694                         ltype = F_WRLCK;
8695                         break;
8696                 }
8697         } else
8698                 ltype = F_UNLCK;
8699 
8700         flock.l_type = ltype;
8701         flock.l_whence = 0;             /* SEEK_SET */
8702         flock.l_start = offset;
8703         flock.l_len = length;
8704         flock.l_sysid = sysid;
8705         flock.l_pid = lsp->rls_locker->rl_pid;
8706 
8707         /* Note that length4 is uint64_t but l_len and l_start are off64_t */
8708         if (flock.l_len < 0 || flock.l_start < 0) {
8709                 rfs4_dbe_unlock(sp->rs_dbe);
8710                 return (NFS4ERR_INVAL);
8711         }
8712 
8713         /*
8714          * N.B. FREAD has the same value as OPEN4_SHARE_ACCESS_READ and
8715          * FWRITE has the same value as OPEN4_SHARE_ACCESS_WRITE.
8716          */
8717         flag = (int)sp->rs_share_access | F_REMOTELOCK;
8718 
8719         vp = sp->rs_finfo->rf_vp;
8720         VN_HOLD(vp);
8721 
8722         /*
8723          * We need to unlock sp before we call the underlying filesystem to
8724          * acquire the file lock.
8725          */
8726         rfs4_dbe_unlock(sp->rs_dbe);
8727 
8728         error = setlock(vp, &flock, flag, cred);
8729 
8730         /*
8731          * Make sure the file is still open.  In a case the file was closed in
8732          * the meantime, clean the lock we acquired using the setlock() call
8733          * above, and return the appropriate error.
8734          */
8735         rfs4_dbe_lock(sp->rs_dbe);
8736         if (sp->rs_closed == TRUE) {
8737                 cleanlocks(vp, lsp->rls_locker->rl_pid, sysid);
8738                 rfs4_dbe_unlock(sp->rs_dbe);
8739 
8740                 VN_RELE(vp);
8741 
8742                 return (NFS4ERR_OLD_STATEID);
8743         }
8744         rfs4_dbe_unlock(sp->rs_dbe);
8745 
8746         VN_RELE(vp);
8747 
8748         if (error == 0) {
8749                 rfs4_dbe_lock(lsp->rls_dbe);
8750                 next_stateid(&lsp->rls_lockid);
8751                 rfs4_dbe_unlock(lsp->rls_dbe);
8752         }
8753 
8754         /*
8755          * N.B. We map error values to nfsv4 errors. This is differrent
8756          * than puterrno4 routine.
8757          */
8758         switch (error) {
8759         case 0:
8760                 status = NFS4_OK;
8761                 break;
8762         case EAGAIN:
8763         case EACCES:            /* Old value */
8764                 /* Can only get here if op is OP_LOCK */
8765                 ASSERT(resop->resop == OP_LOCK);
8766                 lres = &resop->nfs_resop4_u.oplock;
8767                 status = NFS4ERR_DENIED;
8768                 if (lock_denied(&lres->LOCK4res_u.denied, &flock)
8769                     == NFS4ERR_EXPIRED)
8770                         goto retry;
8771                 break;
8772         case ENOLCK:
8773                 status = NFS4ERR_DELAY;
8774                 break;
8775         case EOVERFLOW:
8776                 status = NFS4ERR_INVAL;
8777                 break;
8778         case EINVAL:
8779                 status = NFS4ERR_NOTSUPP;
8780                 break;
8781         default:
8782                 status = NFS4ERR_SERVERFAULT;
8783                 break;
8784         }
8785 
8786         return (status);
8787 }
8788 
8789 /*ARGSUSED*/
8790 void
8791 rfs4_op_lock(nfs_argop4 *argop, nfs_resop4 *resop,
8792     struct svc_req *req, struct compound_state *cs)
8793 {
8794         LOCK4args *args = &argop->nfs_argop4_u.oplock;
8795         LOCK4res *resp = &resop->nfs_resop4_u.oplock;
8796         nfsstat4 status;
8797         stateid4 *stateid;
8798         rfs4_lockowner_t *lo;
8799         rfs4_client_t *cp;
8800         rfs4_state_t *sp = NULL;
8801         rfs4_lo_state_t *lsp = NULL;
8802         bool_t ls_sw_held = FALSE;
8803         bool_t create = TRUE;
8804         bool_t lcreate = TRUE;
8805         bool_t dup_lock = FALSE;
8806         int rc;
8807 
8808         DTRACE_NFSV4_2(op__lock__start, struct compound_state *, cs,
8809             LOCK4args *, args);
8810 
8811         if (cs->vp == NULL) {
8812                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
8813                 DTRACE_NFSV4_2(op__lock__done, struct compound_state *,
8814                     cs, LOCK4res *, resp);
8815                 return;
8816         }
8817 
8818         if (args->locker.new_lock_owner) {
8819                 /* Create a new lockowner for this instance */
8820                 open_to_lock_owner4 *olo = &args->locker.locker4_u.open_owner;
8821 
8822                 NFS4_DEBUG(rfs4_debug, (CE_NOTE, "Creating new lock owner"));
8823 
8824                 stateid = &olo->open_stateid;
8825                 status = rfs4_get_state(stateid, &sp, RFS4_DBS_VALID);
8826                 if (status != NFS4_OK) {
8827                         NFS4_DEBUG(rfs4_debug,
8828                             (CE_NOTE, "Get state failed in lock %d", status));
8829                         *cs->statusp = resp->status = status;
8830                         DTRACE_NFSV4_2(op__lock__done, struct compound_state *,
8831                             cs, LOCK4res *, resp);
8832                         return;
8833                 }
8834 
8835                 /* Ensure specified filehandle matches */
8836                 if (cs->vp != sp->rs_finfo->rf_vp) {
8837                         rfs4_state_rele(sp);
8838                         *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8839                         DTRACE_NFSV4_2(op__lock__done, struct compound_state *,
8840                             cs, LOCK4res *, resp);
8841                         return;
8842                 }
8843 
8844                 /* hold off other access to open_owner while we tinker */
8845                 rfs4_sw_enter(&sp->rs_owner->ro_sw);
8846 
8847                 switch (rc = rfs4_check_stateid_seqid(sp, stateid)) {
8848                 case NFS4_CHECK_STATEID_OLD:
8849                         *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
8850                         goto end;
8851                 case NFS4_CHECK_STATEID_BAD:
8852                         *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8853                         goto end;
8854                 case NFS4_CHECK_STATEID_EXPIRED:
8855                         *cs->statusp = resp->status = NFS4ERR_EXPIRED;
8856                         goto end;
8857                 case NFS4_CHECK_STATEID_UNCONFIRMED:
8858                         *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8859                         goto end;
8860                 case NFS4_CHECK_STATEID_CLOSED:
8861                         *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
8862                         goto end;
8863                 case NFS4_CHECK_STATEID_OKAY:
8864                 case NFS4_CHECK_STATEID_REPLAY:
8865                         switch (rfs4_check_olo_seqid(olo->open_seqid,
8866                             sp->rs_owner, resop)) {
8867                         case NFS4_CHKSEQ_OKAY:
8868                                 if (rc == NFS4_CHECK_STATEID_OKAY)
8869                                         break;
8870                                 /*
8871                                  * This is replayed stateid; if seqid
8872                                  * matches next expected, then client
8873                                  * is using wrong seqid.
8874                                  */
8875                                 /* FALLTHROUGH */
8876                         case NFS4_CHKSEQ_BAD:
8877                                 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
8878                                 goto end;
8879                         case NFS4_CHKSEQ_REPLAY:
8880                                 /* This is a duplicate LOCK request */
8881                                 dup_lock = TRUE;
8882 
8883                                 /*
8884                                  * For a duplicate we do not want to
8885                                  * create a new lockowner as it should
8886                                  * already exist.
8887                                  * Turn off the lockowner create flag.
8888                                  */
8889                                 lcreate = FALSE;
8890                         }
8891                         break;
8892                 }
8893 
8894                 lo = rfs4_findlockowner(&olo->lock_owner, &lcreate);
8895                 if (lo == NULL) {
8896                         NFS4_DEBUG(rfs4_debug,
8897                             (CE_NOTE, "rfs4_op_lock: no lock owner"));
8898                         *cs->statusp = resp->status = NFS4ERR_RESOURCE;
8899                         goto end;
8900                 }
8901 
8902                 lsp = rfs4_findlo_state_by_owner(lo, sp, &create);
8903                 if (lsp == NULL) {
8904                         rfs4_update_lease(sp->rs_owner->ro_client);
8905                         /*
8906                          * Only update theh open_seqid if this is not
8907                          * a duplicate request
8908                          */
8909                         if (dup_lock == FALSE) {
8910                                 rfs4_update_open_sequence(sp->rs_owner);
8911                         }
8912 
8913                         NFS4_DEBUG(rfs4_debug,
8914                             (CE_NOTE, "rfs4_op_lock: no state"));
8915                         *cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
8916                         rfs4_update_open_resp(sp->rs_owner, resop, NULL);
8917                         rfs4_lockowner_rele(lo);
8918                         goto end;
8919                 }
8920 
8921                 /*
8922                  * This is the new_lock_owner branch and the client is
8923                  * supposed to be associating a new lock_owner with
8924                  * the open file at this point.  If we find that a
8925                  * lock_owner/state association already exists and a
8926                  * successful LOCK request was returned to the client,
8927                  * an error is returned to the client since this is
8928                  * not appropriate.  The client should be using the
8929                  * existing lock_owner branch.
8930                  */
8931                 if (dup_lock == FALSE && create == FALSE) {
8932                         if (lsp->rls_lock_completed == TRUE) {
8933                                 *cs->statusp =
8934                                     resp->status = NFS4ERR_BAD_SEQID;
8935                                 rfs4_lockowner_rele(lo);
8936                                 goto end;
8937                         }
8938                 }
8939 
8940                 rfs4_update_lease(sp->rs_owner->ro_client);
8941 
8942                 /*
8943                  * Only update theh open_seqid if this is not
8944                  * a duplicate request
8945                  */
8946                 if (dup_lock == FALSE) {
8947                         rfs4_update_open_sequence(sp->rs_owner);
8948                 }
8949 
8950                 /*
8951                  * If this is a duplicate lock request, just copy the
8952                  * previously saved reply and return.
8953                  */
8954                 if (dup_lock == TRUE) {
8955                         /* verify that lock_seqid's match */
8956                         if (lsp->rls_seqid != olo->lock_seqid) {
8957                                 NFS4_DEBUG(rfs4_debug,
8958                                     (CE_NOTE, "rfs4_op_lock: Dup-Lock seqid bad"
8959                                     "lsp->seqid=%d old->seqid=%d",
8960                                     lsp->rls_seqid, olo->lock_seqid));
8961                                 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
8962                         } else {
8963                                 rfs4_copy_reply(resop, &lsp->rls_reply);
8964                                 /*
8965                                  * Make sure to copy the just
8966                                  * retrieved reply status into the
8967                                  * overall compound status
8968                                  */
8969                                 *cs->statusp = resp->status;
8970                         }
8971                         rfs4_lockowner_rele(lo);
8972                         goto end;
8973                 }
8974 
8975                 rfs4_dbe_lock(lsp->rls_dbe);
8976 
8977                 /* Make sure to update the lock sequence id */
8978                 lsp->rls_seqid = olo->lock_seqid;
8979 
8980                 NFS4_DEBUG(rfs4_debug,
8981                     (CE_NOTE, "Lock seqid established as %d", lsp->rls_seqid));
8982 
8983                 /*
8984                  * This is used to signify the newly created lockowner
8985                  * stateid and its sequence number.  The checks for
8986                  * sequence number and increment don't occur on the
8987                  * very first lock request for a lockowner.
8988                  */
8989                 lsp->rls_skip_seqid_check = TRUE;
8990 
8991                 /* hold off other access to lsp while we tinker */
8992                 rfs4_sw_enter(&lsp->rls_sw);
8993                 ls_sw_held = TRUE;
8994 
8995                 rfs4_dbe_unlock(lsp->rls_dbe);
8996 
8997                 rfs4_lockowner_rele(lo);
8998         } else {
8999                 stateid = &args->locker.locker4_u.lock_owner.lock_stateid;
9000                 /* get lsp and hold the lock on the underlying file struct */
9001                 if ((status = rfs4_get_lo_state(stateid, &lsp, TRUE))
9002                     != NFS4_OK) {
9003                         *cs->statusp = resp->status = status;
9004                         DTRACE_NFSV4_2(op__lock__done, struct compound_state *,
9005                             cs, LOCK4res *, resp);
9006                         return;
9007                 }
9008                 create = FALSE; /* We didn't create lsp */
9009 
9010                 /* Ensure specified filehandle matches */
9011                 if (cs->vp != lsp->rls_state->rs_finfo->rf_vp) {
9012                         rfs4_lo_state_rele(lsp, TRUE);
9013                         *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
9014                         DTRACE_NFSV4_2(op__lock__done, struct compound_state *,
9015                             cs, LOCK4res *, resp);
9016                         return;
9017                 }
9018 
9019                 /* hold off other access to lsp while we tinker */
9020                 rfs4_sw_enter(&lsp->rls_sw);
9021                 ls_sw_held = TRUE;
9022 
9023                 switch (rfs4_check_lo_stateid_seqid(lsp, stateid)) {
9024                 /*
9025                  * The stateid looks like it was okay (expected to be
9026                  * the next one)
9027                  */
9028                 case NFS4_CHECK_STATEID_OKAY:
9029                         /*
9030                          * The sequence id is now checked.  Determine
9031                          * if this is a replay or if it is in the
9032                          * expected (next) sequence.  In the case of a
9033                          * replay, there are two replay conditions
9034                          * that may occur.  The first is the normal
9035                          * condition where a LOCK is done with a
9036                          * NFS4_OK response and the stateid is
9037                          * updated.  That case is handled below when
9038                          * the stateid is identified as a REPLAY.  The
9039                          * second is the case where an error is
9040                          * returned, like NFS4ERR_DENIED, and the
9041                          * sequence number is updated but the stateid
9042                          * is not updated.  This second case is dealt
9043                          * with here.  So it may seem odd that the
9044                          * stateid is okay but the sequence id is a
9045                          * replay but it is okay.
9046                          */
9047                         switch (rfs4_check_lock_seqid(
9048                             args->locker.locker4_u.lock_owner.lock_seqid,
9049                             lsp, resop)) {
9050                         case NFS4_CHKSEQ_REPLAY:
9051                                 if (resp->status != NFS4_OK) {
9052                                         /*
9053                                          * Here is our replay and need
9054                                          * to verify that the last
9055                                          * response was an error.
9056                                          */
9057                                         *cs->statusp = resp->status;
9058                                         goto end;
9059                                 }
9060                                 /*
9061                                  * This is done since the sequence id
9062                                  * looked like a replay but it didn't
9063                                  * pass our check so a BAD_SEQID is
9064                                  * returned as a result.
9065                                  */
9066                                 /*FALLTHROUGH*/
9067                         case NFS4_CHKSEQ_BAD:
9068                                 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
9069                                 goto end;
9070                         case NFS4_CHKSEQ_OKAY:
9071                                 /* Everything looks okay move ahead */
9072                                 break;
9073                         }
9074                         break;
9075                 case NFS4_CHECK_STATEID_OLD:
9076                         *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
9077                         goto end;
9078                 case NFS4_CHECK_STATEID_BAD:
9079                         *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
9080                         goto end;
9081                 case NFS4_CHECK_STATEID_EXPIRED:
9082                         *cs->statusp = resp->status = NFS4ERR_EXPIRED;
9083                         goto end;
9084                 case NFS4_CHECK_STATEID_CLOSED:
9085                         *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
9086                         goto end;
9087                 case NFS4_CHECK_STATEID_REPLAY:
9088                         switch (rfs4_check_lock_seqid(
9089                             args->locker.locker4_u.lock_owner.lock_seqid,
9090                             lsp, resop)) {
9091                         case NFS4_CHKSEQ_OKAY:
9092                                 /*
9093                                  * This is a replayed stateid; if
9094                                  * seqid matches the next expected,
9095                                  * then client is using wrong seqid.
9096                                  */
9097                         case NFS4_CHKSEQ_BAD:
9098                                 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
9099                                 goto end;
9100                         case NFS4_CHKSEQ_REPLAY:
9101                                 rfs4_update_lease(lsp->rls_locker->rl_client);
9102                                 *cs->statusp = status = resp->status;
9103                                 goto end;
9104                         }
9105                         break;
9106                 default:
9107                         ASSERT(FALSE);
9108                         break;
9109                 }
9110 
9111                 rfs4_update_lock_sequence(lsp);
9112                 rfs4_update_lease(lsp->rls_locker->rl_client);
9113         }
9114 
9115         /*
9116          * NFS4 only allows locking on regular files, so
9117          * verify type of object.
9118          */
9119         if (cs->vp->v_type != VREG) {
9120                 if (cs->vp->v_type == VDIR)
9121                         status = NFS4ERR_ISDIR;
9122                 else
9123                         status = NFS4ERR_INVAL;
9124                 goto out;
9125         }
9126 
9127         cp = lsp->rls_state->rs_owner->ro_client;
9128 
9129         if (rfs4_clnt_in_grace(cp) && !args->reclaim) {
9130                 status = NFS4ERR_GRACE;
9131                 goto out;
9132         }
9133 
9134         if (rfs4_clnt_in_grace(cp) && args->reclaim && !cp->rc_can_reclaim) {
9135                 status = NFS4ERR_NO_GRACE;
9136                 goto out;
9137         }
9138 
9139         if (!rfs4_clnt_in_grace(cp) && args->reclaim) {
9140                 status = NFS4ERR_NO_GRACE;
9141                 goto out;
9142         }
9143 
9144         if (lsp->rls_state->rs_finfo->rf_dinfo.rd_dtype == OPEN_DELEGATE_WRITE)
9145                 cs->deleg = TRUE;
9146 
9147         status = rfs4_do_lock(lsp, args->locktype,
9148             args->offset, args->length, cs->cr, resop);
9149 
9150 out:
9151         lsp->rls_skip_seqid_check = FALSE;
9152 
9153         *cs->statusp = resp->status = status;
9154 
9155         if (status == NFS4_OK) {
9156                 resp->LOCK4res_u.lock_stateid = lsp->rls_lockid.stateid;
9157                 lsp->rls_lock_completed = TRUE;
9158         }
9159         /*
9160          * Only update the "OPEN" response here if this was a new
9161          * lock_owner
9162          */
9163         if (sp)
9164                 rfs4_update_open_resp(sp->rs_owner, resop, NULL);
9165 
9166         rfs4_update_lock_resp(lsp, resop);
9167 
9168 end:
9169         if (lsp) {
9170                 if (ls_sw_held)
9171                         rfs4_sw_exit(&lsp->rls_sw);
9172                 /*
9173                  * If an sp obtained, then the lsp does not represent
9174                  * a lock on the file struct.
9175                  */
9176                 if (sp != NULL)
9177                         rfs4_lo_state_rele(lsp, FALSE);
9178                 else
9179                         rfs4_lo_state_rele(lsp, TRUE);
9180         }
9181         if (sp) {
9182                 rfs4_sw_exit(&sp->rs_owner->ro_sw);
9183                 rfs4_state_rele(sp);
9184         }
9185 
9186         DTRACE_NFSV4_2(op__lock__done, struct compound_state *, cs,
9187             LOCK4res *, resp);
9188 }
9189 
9190 /* free function for LOCK/LOCKT */
9191 static void
9192 lock_denied_free(nfs_resop4 *resop)
9193 {
9194         LOCK4denied *dp = NULL;
9195 
9196         switch (resop->resop) {
9197         case OP_LOCK:
9198                 if (resop->nfs_resop4_u.oplock.status == NFS4ERR_DENIED)
9199                         dp = &resop->nfs_resop4_u.oplock.LOCK4res_u.denied;
9200                 break;
9201         case OP_LOCKT:
9202                 if (resop->nfs_resop4_u.oplockt.status == NFS4ERR_DENIED)
9203                         dp = &resop->nfs_resop4_u.oplockt.denied;
9204                 break;
9205         default:
9206                 break;
9207         }
9208 
9209         if (dp)
9210                 kmem_free(dp->owner.owner_val, dp->owner.owner_len);
9211 }
9212 
9213 /*ARGSUSED*/
9214 void
9215 rfs4_op_locku(nfs_argop4 *argop, nfs_resop4 *resop,
9216     struct svc_req *req, struct compound_state *cs)
9217 {
9218         LOCKU4args *args = &argop->nfs_argop4_u.oplocku;
9219         LOCKU4res *resp = &resop->nfs_resop4_u.oplocku;
9220         nfsstat4 status;
9221         stateid4 *stateid = &args->lock_stateid;
9222         rfs4_lo_state_t *lsp;
9223 
9224         DTRACE_NFSV4_2(op__locku__start, struct compound_state *, cs,
9225             LOCKU4args *, args);
9226 
9227         if (cs->vp == NULL) {
9228                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
9229                 DTRACE_NFSV4_2(op__locku__done, struct compound_state *, cs,
9230                     LOCKU4res *, resp);
9231                 return;
9232         }
9233 
9234         if ((status = rfs4_get_lo_state(stateid, &lsp, TRUE)) != NFS4_OK) {
9235                 *cs->statusp = resp->status = status;
9236                 DTRACE_NFSV4_2(op__locku__done, struct compound_state *, cs,
9237                     LOCKU4res *, resp);
9238                 return;
9239         }
9240 
9241         /* Ensure specified filehandle matches */
9242         if (cs->vp != lsp->rls_state->rs_finfo->rf_vp) {
9243                 rfs4_lo_state_rele(lsp, TRUE);
9244                 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
9245                 DTRACE_NFSV4_2(op__locku__done, struct compound_state *, cs,
9246                     LOCKU4res *, resp);
9247                 return;
9248         }
9249 
9250         /* hold off other access to lsp while we tinker */
9251         rfs4_sw_enter(&lsp->rls_sw);
9252 
9253         switch (rfs4_check_lo_stateid_seqid(lsp, stateid)) {
9254         case NFS4_CHECK_STATEID_OKAY:
9255                 if (rfs4_check_lock_seqid(args->seqid, lsp, resop)
9256                     != NFS4_CHKSEQ_OKAY) {
9257                         *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
9258                         goto end;
9259                 }
9260                 break;
9261         case NFS4_CHECK_STATEID_OLD:
9262                 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
9263                 goto end;
9264         case NFS4_CHECK_STATEID_BAD:
9265                 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
9266                 goto end;
9267         case NFS4_CHECK_STATEID_EXPIRED:
9268                 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
9269                 goto end;
9270         case NFS4_CHECK_STATEID_CLOSED:
9271                 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
9272                 goto end;
9273         case NFS4_CHECK_STATEID_REPLAY:
9274                 switch (rfs4_check_lock_seqid(args->seqid, lsp, resop)) {
9275                 case NFS4_CHKSEQ_OKAY:
9276                                 /*
9277                                  * This is a replayed stateid; if
9278                                  * seqid matches the next expected,
9279                                  * then client is using wrong seqid.
9280                                  */
9281                 case NFS4_CHKSEQ_BAD:
9282                         *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
9283                         goto end;
9284                 case NFS4_CHKSEQ_REPLAY:
9285                         rfs4_update_lease(lsp->rls_locker->rl_client);
9286                         *cs->statusp = status = resp->status;
9287                         goto end;
9288                 }
9289                 break;
9290         default:
9291                 ASSERT(FALSE);
9292                 break;
9293         }
9294 
9295         rfs4_update_lock_sequence(lsp);
9296         rfs4_update_lease(lsp->rls_locker->rl_client);
9297 
9298         /*
9299          * NFS4 only allows locking on regular files, so
9300          * verify type of object.
9301          */
9302         if (cs->vp->v_type != VREG) {
9303                 if (cs->vp->v_type == VDIR)
9304                         status = NFS4ERR_ISDIR;
9305                 else
9306                         status = NFS4ERR_INVAL;
9307                 goto out;
9308         }
9309 
9310         if (rfs4_clnt_in_grace(lsp->rls_state->rs_owner->ro_client)) {
9311                 status = NFS4ERR_GRACE;
9312                 goto out;
9313         }
9314 
9315         status = rfs4_do_lock(lsp, args->locktype,
9316             args->offset, args->length, cs->cr, resop);
9317 
9318 out:
9319         *cs->statusp = resp->status = status;
9320 
9321         if (status == NFS4_OK)
9322                 resp->lock_stateid = lsp->rls_lockid.stateid;
9323 
9324         rfs4_update_lock_resp(lsp, resop);
9325 
9326 end:
9327         rfs4_sw_exit(&lsp->rls_sw);
9328         rfs4_lo_state_rele(lsp, TRUE);
9329 
9330         DTRACE_NFSV4_2(op__locku__done, struct compound_state *, cs,
9331             LOCKU4res *, resp);
9332 }
9333 
9334 /*
9335  * LOCKT is a best effort routine, the client can not be guaranteed that
9336  * the status return is still in effect by the time the reply is received.
9337  * They are numerous race conditions in this routine, but we are not required
9338  * and can not be accurate.
9339  */
9340 /*ARGSUSED*/
9341 void
9342 rfs4_op_lockt(nfs_argop4 *argop, nfs_resop4 *resop,
9343     struct svc_req *req, struct compound_state *cs)
9344 {
9345         LOCKT4args *args = &argop->nfs_argop4_u.oplockt;
9346         LOCKT4res *resp = &resop->nfs_resop4_u.oplockt;
9347         rfs4_lockowner_t *lo;
9348         rfs4_client_t *cp;
9349         bool_t create = FALSE;
9350         struct flock64 flk;
9351         int error;
9352         int flag = FREAD | FWRITE;
9353         int ltype;
9354         length4 posix_length;
9355         sysid_t sysid;
9356         pid_t pid;
9357 
9358         DTRACE_NFSV4_2(op__lockt__start, struct compound_state *, cs,
9359             LOCKT4args *, args);
9360 
9361         if (cs->vp == NULL) {
9362                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
9363                 goto out;
9364         }
9365 
9366         /*
9367          * NFS4 only allows locking on regular files, so
9368          * verify type of object.
9369          */
9370         if (cs->vp->v_type != VREG) {
9371                 if (cs->vp->v_type == VDIR)
9372                         *cs->statusp = resp->status = NFS4ERR_ISDIR;
9373                 else
9374                         *cs->statusp = resp->status =  NFS4ERR_INVAL;
9375                 goto out;
9376         }
9377 
9378         /*
9379          * Check out the clientid to ensure the server knows about it
9380          * so that we correctly inform the client of a server reboot.
9381          */
9382         if ((cp = rfs4_findclient_by_id(args->owner.clientid, FALSE))
9383             == NULL) {
9384                 *cs->statusp = resp->status =
9385                     rfs4_check_clientid(&args->owner.clientid, 0);
9386                 goto out;
9387         }
9388         if (rfs4_lease_expired(cp)) {
9389                 rfs4_client_close(cp);
9390                 /*
9391                  * Protocol doesn't allow returning NFS4ERR_STALE as
9392                  * other operations do on this check so STALE_CLIENTID
9393                  * is returned instead
9394                  */
9395                 *cs->statusp = resp->status = NFS4ERR_STALE_CLIENTID;
9396                 goto out;
9397         }
9398 
9399         if (rfs4_clnt_in_grace(cp) && !(cp->rc_can_reclaim)) {
9400                 *cs->statusp = resp->status = NFS4ERR_GRACE;
9401                 rfs4_client_rele(cp);
9402                 goto out;
9403         }
9404         rfs4_client_rele(cp);
9405 
9406         resp->status = NFS4_OK;
9407 
9408         switch (args->locktype) {
9409         case READ_LT:
9410         case READW_LT:
9411                 ltype = F_RDLCK;
9412                 break;
9413         case WRITE_LT:
9414         case WRITEW_LT:
9415                 ltype = F_WRLCK;
9416                 break;
9417         }
9418 
9419         posix_length = args->length;
9420         /* Check for zero length. To lock to end of file use all ones for V4 */
9421         if (posix_length == 0) {
9422                 *cs->statusp = resp->status = NFS4ERR_INVAL;
9423                 goto out;
9424         } else if (posix_length == (length4)(~0)) {
9425                 posix_length = 0;       /* Posix to end of file  */
9426         }
9427 
9428         /* Find or create a lockowner */
9429         lo = rfs4_findlockowner(&args->owner, &create);
9430 
9431         if (lo) {
9432                 pid = lo->rl_pid;
9433                 if ((resp->status =
9434                     rfs4_client_sysid(lo->rl_client, &sysid)) != NFS4_OK)
9435                         goto err;
9436         } else {
9437                 pid = 0;
9438                 sysid = lockt_sysid;
9439         }
9440 retry:
9441         flk.l_type = ltype;
9442         flk.l_whence = 0;               /* SEEK_SET */
9443         flk.l_start = args->offset;
9444         flk.l_len = posix_length;
9445         flk.l_sysid = sysid;
9446         flk.l_pid = pid;
9447         flag |= F_REMOTELOCK;
9448 
9449         LOCK_PRINT(rfs4_debug, "rfs4_op_lockt", F_GETLK, &flk);
9450 
9451         /* Note that length4 is uint64_t but l_len and l_start are off64_t */
9452         if (flk.l_len < 0 || flk.l_start < 0) {
9453                 resp->status = NFS4ERR_INVAL;
9454                 goto err;
9455         }
9456         error = VOP_FRLOCK(cs->vp, F_GETLK, &flk, flag, (u_offset_t)0,
9457             NULL, cs->cr, NULL);
9458 
9459         /*
9460          * N.B. We map error values to nfsv4 errors. This is differrent
9461          * than puterrno4 routine.
9462          */
9463         switch (error) {
9464         case 0:
9465                 if (flk.l_type == F_UNLCK)
9466                         resp->status = NFS4_OK;
9467                 else {
9468                         if (lock_denied(&resp->denied, &flk) == NFS4ERR_EXPIRED)
9469                                 goto retry;
9470                         resp->status = NFS4ERR_DENIED;
9471                 }
9472                 break;
9473         case EOVERFLOW:
9474                 resp->status = NFS4ERR_INVAL;
9475                 break;
9476         case EINVAL:
9477                 resp->status = NFS4ERR_NOTSUPP;
9478                 break;
9479         default:
9480                 cmn_err(CE_WARN, "rfs4_op_lockt: unexpected errno (%d)",
9481                     error);
9482                 resp->status = NFS4ERR_SERVERFAULT;
9483                 break;
9484         }
9485 
9486 err:
9487         if (lo)
9488                 rfs4_lockowner_rele(lo);
9489         *cs->statusp = resp->status;
9490 out:
9491         DTRACE_NFSV4_2(op__lockt__done, struct compound_state *, cs,
9492             LOCKT4res *, resp);
9493 }
9494 
9495 int
9496 rfs4_share(rfs4_state_t *sp, uint32_t access, uint32_t deny)
9497 {
9498         int err;
9499         int cmd;
9500         vnode_t *vp;
9501         struct shrlock shr;
9502         struct shr_locowner shr_loco;
9503         int fflags = 0;
9504 
9505         ASSERT(rfs4_dbe_islocked(sp->rs_dbe));
9506         ASSERT(sp->rs_owner->ro_client->rc_sysidt != LM_NOSYSID);
9507 
9508         if (sp->rs_closed)
9509                 return (NFS4ERR_OLD_STATEID);
9510 
9511         vp = sp->rs_finfo->rf_vp;
9512         ASSERT(vp);
9513 
9514         shr.s_access = shr.s_deny = 0;
9515 
9516         if (access & OPEN4_SHARE_ACCESS_READ) {
9517                 fflags |= FREAD;
9518                 shr.s_access |= F_RDACC;
9519         }
9520         if (access & OPEN4_SHARE_ACCESS_WRITE) {
9521                 fflags |= FWRITE;
9522                 shr.s_access |= F_WRACC;
9523         }
9524         ASSERT(shr.s_access);
9525 
9526         if (deny & OPEN4_SHARE_DENY_READ)
9527                 shr.s_deny |= F_RDDNY;
9528         if (deny & OPEN4_SHARE_DENY_WRITE)
9529                 shr.s_deny |= F_WRDNY;
9530 
9531         shr.s_pid = rfs4_dbe_getid(sp->rs_owner->ro_dbe);
9532         shr.s_sysid = sp->rs_owner->ro_client->rc_sysidt;
9533         shr_loco.sl_pid = shr.s_pid;
9534         shr_loco.sl_id = shr.s_sysid;
9535         shr.s_owner = (caddr_t)&shr_loco;
9536         shr.s_own_len = sizeof (shr_loco);
9537 
9538         cmd = nbl_need_check(vp) ? F_SHARE_NBMAND : F_SHARE;
9539 
9540         err = VOP_SHRLOCK(vp, cmd, &shr, fflags, CRED(), NULL);
9541         if (err != 0) {
9542                 if (err == EAGAIN)
9543                         err = NFS4ERR_SHARE_DENIED;
9544                 else
9545                         err = puterrno4(err);
9546                 return (err);
9547         }
9548 
9549         sp->rs_share_access |= access;
9550         sp->rs_share_deny |= deny;
9551 
9552         return (0);
9553 }
9554 
9555 int
9556 rfs4_unshare(rfs4_state_t *sp)
9557 {
9558         int err;
9559         struct shrlock shr;
9560         struct shr_locowner shr_loco;
9561 
9562         ASSERT(rfs4_dbe_islocked(sp->rs_dbe));
9563 
9564         if (sp->rs_closed || sp->rs_share_access == 0)
9565                 return (0);
9566 
9567         ASSERT(sp->rs_owner->ro_client->rc_sysidt != LM_NOSYSID);
9568         ASSERT(sp->rs_finfo->rf_vp);
9569 
9570         shr.s_access = shr.s_deny = 0;
9571         shr.s_pid = rfs4_dbe_getid(sp->rs_owner->ro_dbe);
9572         shr.s_sysid = sp->rs_owner->ro_client->rc_sysidt;
9573         shr_loco.sl_pid = shr.s_pid;
9574         shr_loco.sl_id = shr.s_sysid;
9575         shr.s_owner = (caddr_t)&shr_loco;
9576         shr.s_own_len = sizeof (shr_loco);
9577 
9578         err = VOP_SHRLOCK(sp->rs_finfo->rf_vp, F_UNSHARE, &shr, 0, CRED(),
9579             NULL);
9580         if (err != 0) {
9581                 err = puterrno4(err);
9582                 return (err);
9583         }
9584 
9585         sp->rs_share_access = 0;
9586         sp->rs_share_deny = 0;
9587 
9588         return (0);
9589 
9590 }
9591 
9592 static int
9593 rdma_setup_read_data4(READ4args *args, READ4res *rok)
9594 {
9595         struct clist    *wcl;
9596         count4          count = rok->data_len;
9597         int             wlist_len;
9598 
9599         wcl = args->wlist;
9600         if (rdma_setup_read_chunks(wcl, count, &wlist_len) == FALSE) {
9601                 return (FALSE);
9602         }
9603         wcl = args->wlist;
9604         rok->wlist_len = wlist_len;
9605         rok->wlist = wcl;
9606         return (TRUE);
9607 }
9608 
9609 /* tunable to disable server referrals */
9610 int rfs4_no_referrals = 0;
9611 
9612 /*
9613  * Find an NFS record in reparse point data.
9614  * Returns 0 for success and <0 or an errno value on failure.
9615  */
9616 int
9617 vn_find_nfs_record(vnode_t *vp, nvlist_t **nvlp, char **svcp, char **datap)
9618 {
9619         int err;
9620         char *stype, *val;
9621         nvlist_t *nvl;
9622         nvpair_t *curr;
9623 
9624         if ((nvl = reparse_init()) == NULL)
9625                 return (-1);
9626 
9627         if ((err = reparse_vnode_parse(vp, nvl)) != 0) {
9628                 reparse_free(nvl);
9629                 return (err);
9630         }
9631 
9632         curr = NULL;
9633         while ((curr = nvlist_next_nvpair(nvl, curr)) != NULL) {
9634                 if ((stype = nvpair_name(curr)) == NULL) {
9635                         reparse_free(nvl);
9636                         return (-2);
9637                 }
9638                 if (strncasecmp(stype, "NFS", 3) == 0)
9639                         break;
9640         }
9641 
9642         if ((curr == NULL) ||
9643             (nvpair_value_string(curr, &val))) {
9644                 reparse_free(nvl);
9645                 return (-3);
9646         }
9647         *nvlp = nvl;
9648         *svcp = stype;
9649         *datap = val;
9650         return (0);
9651 }
9652 
9653 int
9654 vn_is_nfs_reparse(vnode_t *vp, cred_t *cr)
9655 {
9656         nvlist_t *nvl;
9657         char *s, *d;
9658 
9659         if (rfs4_no_referrals != 0)
9660                 return (B_FALSE);
9661 
9662         if (vn_is_reparse(vp, cr, NULL) == B_FALSE)
9663                 return (B_FALSE);
9664 
9665         if (vn_find_nfs_record(vp, &nvl, &s, &d) != 0)
9666                 return (B_FALSE);
9667 
9668         reparse_free(nvl);
9669 
9670         return (B_TRUE);
9671 }
9672 
9673 /*
9674  * There is a user-level copy of this routine in ref_subr.c.
9675  * Changes should be kept in sync.
9676  */
9677 static int
9678 nfs4_create_components(char *path, component4 *comp4)
9679 {
9680         int slen, plen, ncomp;
9681         char *ori_path, *nxtc, buf[MAXNAMELEN];
9682 
9683         if (path == NULL)
9684                 return (0);
9685 
9686         plen = strlen(path) + 1;        /* include the terminator */
9687         ori_path = path;
9688         ncomp = 0;
9689 
9690         /* count number of components in the path */
9691         for (nxtc = path; nxtc < ori_path + plen; nxtc++) {
9692                 if (*nxtc == '/' || *nxtc == '\0' || *nxtc == '\n') {
9693                         if ((slen = nxtc - path) == 0) {
9694                                 path = nxtc + 1;
9695                                 continue;
9696                         }
9697 
9698                         if (comp4 != NULL) {
9699                                 bcopy(path, buf, slen);
9700                                 buf[slen] = '\0';
9701                                 (void) str_to_utf8(buf, &comp4[ncomp]);
9702                         }
9703 
9704                         ncomp++;        /* 1 valid component */
9705                         path = nxtc + 1;
9706                 }
9707                 if (*nxtc == '\0' || *nxtc == '\n')
9708                         break;
9709         }
9710 
9711         return (ncomp);
9712 }
9713 
9714 /*
9715  * There is a user-level copy of this routine in ref_subr.c.
9716  * Changes should be kept in sync.
9717  */
9718 static int
9719 make_pathname4(char *path, pathname4 *pathname)
9720 {
9721         int ncomp;
9722         component4 *comp4;
9723 
9724         if (pathname == NULL)
9725                 return (0);
9726 
9727         if (path == NULL) {
9728                 pathname->pathname4_val = NULL;
9729                 pathname->pathname4_len = 0;
9730                 return (0);
9731         }
9732 
9733         /* count number of components to alloc buffer */
9734         if ((ncomp = nfs4_create_components(path, NULL)) == 0) {
9735                 pathname->pathname4_val = NULL;
9736                 pathname->pathname4_len = 0;
9737                 return (0);
9738         }
9739         comp4 = kmem_zalloc(ncomp * sizeof (component4), KM_SLEEP);
9740 
9741         /* copy components into allocated buffer */
9742         ncomp = nfs4_create_components(path, comp4);
9743 
9744         pathname->pathname4_val = comp4;
9745         pathname->pathname4_len = ncomp;
9746 
9747         return (ncomp);
9748 }
9749 
9750 #define xdr_fs_locations4 xdr_fattr4_fs_locations
9751 
9752 fs_locations4 *
9753 fetch_referral(vnode_t *vp, cred_t *cr)
9754 {
9755         nvlist_t *nvl;
9756         char *stype, *sdata;
9757         fs_locations4 *result;
9758         char buf[1024];
9759         size_t bufsize;
9760         XDR xdr;
9761         int err;
9762 
9763         /*
9764          * Check attrs to ensure it's a reparse point
9765          */
9766         if (vn_is_reparse(vp, cr, NULL) == B_FALSE)
9767                 return (NULL);
9768 
9769         /*
9770          * Look for an NFS record and get the type and data
9771          */
9772         if (vn_find_nfs_record(vp, &nvl, &stype, &sdata) != 0)
9773                 return (NULL);
9774 
9775         /*
9776          * With the type and data, upcall to get the referral
9777          */
9778         bufsize = sizeof (buf);
9779         bzero(buf, sizeof (buf));
9780         err = reparse_kderef((const char *)stype, (const char *)sdata,
9781             buf, &bufsize);
9782         reparse_free(nvl);
9783 
9784         DTRACE_PROBE4(nfs4serv__func__referral__upcall,
9785             char *, stype, char *, sdata, char *, buf, int, err);
9786         if (err) {
9787                 cmn_err(CE_NOTE,
9788                     "reparsed daemon not running: unable to get referral (%d)",
9789                     err);
9790                 return (NULL);
9791         }
9792 
9793         /*
9794          * We get an XDR'ed record back from the kderef call
9795          */
9796         xdrmem_create(&xdr, buf, bufsize, XDR_DECODE);
9797         result = kmem_alloc(sizeof (fs_locations4), KM_SLEEP);
9798         err = xdr_fs_locations4(&xdr, result);
9799         XDR_DESTROY(&xdr);
9800         if (err != TRUE) {
9801                 DTRACE_PROBE1(nfs4serv__func__referral__upcall__xdrfail,
9802                     int, err);
9803                 return (NULL);
9804         }
9805 
9806         /*
9807          * Look at path to recover fs_root, ignoring the leading '/'
9808          */
9809         (void) make_pathname4(vp->v_path, &result->fs_root);
9810 
9811         return (result);
9812 }
9813 
9814 char *
9815 build_symlink(vnode_t *vp, cred_t *cr, size_t *strsz)
9816 {
9817         fs_locations4 *fsl;
9818         fs_location4 *fs;
9819         char *server, *path, *symbuf;
9820         static char *prefix = "/net/";
9821         int i, size, npaths;
9822         uint_t len;
9823 
9824         /* Get the referral */
9825         if ((fsl = fetch_referral(vp, cr)) == NULL)
9826                 return (NULL);
9827 
9828         /* Deal with only the first location and first server */
9829         fs = &fsl->locations_val[0];
9830         server = utf8_to_str(&fs->server_val[0], &len, NULL);
9831         if (server == NULL) {
9832                 rfs4_free_fs_locations4(fsl);
9833                 kmem_free(fsl, sizeof (fs_locations4));
9834                 return (NULL);
9835         }
9836 
9837         /* Figure out size for "/net/" + host + /path/path/path + NULL */
9838         size = strlen(prefix) + len;
9839         for (i = 0; i < fs->rootpath.pathname4_len; i++)
9840                 size += fs->rootpath.pathname4_val[i].utf8string_len + 1;
9841 
9842         /* Allocate the symlink buffer and fill it */
9843         symbuf = kmem_zalloc(size, KM_SLEEP);
9844         (void) strcat(symbuf, prefix);
9845         (void) strcat(symbuf, server);
9846         kmem_free(server, len);
9847 
9848         npaths = 0;
9849         for (i = 0; i < fs->rootpath.pathname4_len; i++) {
9850                 path = utf8_to_str(&fs->rootpath.pathname4_val[i], &len, NULL);
9851                 if (path == NULL)
9852                         continue;
9853                 (void) strcat(symbuf, "/");
9854                 (void) strcat(symbuf, path);
9855                 npaths++;
9856                 kmem_free(path, len);
9857         }
9858 
9859         rfs4_free_fs_locations4(fsl);
9860         kmem_free(fsl, sizeof (fs_locations4));
9861 
9862         if (strsz != NULL)
9863                 *strsz = size;
9864         return (symbuf);
9865 }
9866 
9867 /*
9868  * Check to see if we have a downrev Solaris client, so that we
9869  * can send it a symlink instead of a referral.
9870  */
9871 int
9872 client_is_downrev(struct svc_req *req)
9873 {
9874         struct sockaddr *ca;
9875         rfs4_clntip_t *ci;
9876         bool_t create = FALSE;
9877         int is_downrev;
9878 
9879         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
9880         ASSERT(ca);
9881         ci = rfs4_find_clntip(ca, &create);
9882         if (ci == NULL)
9883                 return (0);
9884         is_downrev = ci->ri_no_referrals;
9885         rfs4_dbe_rele(ci->ri_dbe);
9886         return (is_downrev);
9887 }