1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright 2016 Nexenta Systems, Inc.  All rights reserved.
  24  * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
  25  * Copyright (c) 2012 by Delphix. All rights reserved.
  26  */
  27 
  28 /*
  29  *      Copyright (c) 1983,1984,1985,1986,1987,1988,1989  AT&T.
  30  *      All Rights Reserved
  31  */
  32 
  33 #include <sys/param.h>
  34 #include <sys/types.h>
  35 #include <sys/systm.h>
  36 #include <sys/cred.h>
  37 #include <sys/buf.h>
  38 #include <sys/vfs.h>
  39 #include <sys/vfs_opreg.h>
  40 #include <sys/vnode.h>
  41 #include <sys/uio.h>
  42 #include <sys/errno.h>
  43 #include <sys/sysmacros.h>
  44 #include <sys/statvfs.h>
  45 #include <sys/kmem.h>
  46 #include <sys/dirent.h>
  47 #include <sys/cmn_err.h>
  48 #include <sys/debug.h>
  49 #include <sys/systeminfo.h>
  50 #include <sys/flock.h>
  51 #include <sys/pathname.h>
  52 #include <sys/nbmlock.h>
  53 #include <sys/share.h>
  54 #include <sys/atomic.h>
  55 #include <sys/policy.h>
  56 #include <sys/fem.h>
  57 #include <sys/sdt.h>
  58 #include <sys/ddi.h>
  59 #include <sys/zone.h>
  60 
  61 #include <fs/fs_reparse.h>
  62 
  63 #include <rpc/types.h>
  64 #include <rpc/auth.h>
  65 #include <rpc/rpcsec_gss.h>
  66 #include <rpc/svc.h>
  67 
  68 #include <nfs/nfs.h>
  69 #include <nfs/export.h>
  70 #include <nfs/nfs_cmd.h>
  71 #include <nfs/lm.h>
  72 #include <nfs/nfs4.h>
  73 
  74 #include <sys/strsubr.h>
  75 #include <sys/strsun.h>
  76 
  77 #include <inet/common.h>
  78 #include <inet/ip.h>
  79 #include <inet/ip6.h>
  80 
  81 #include <sys/tsol/label.h>
  82 #include <sys/tsol/tndb.h>
  83 
  84 #define RFS4_MAXLOCK_TRIES 4    /* Try to get the lock this many times */
  85 static int rfs4_maxlock_tries = RFS4_MAXLOCK_TRIES;
  86 #define RFS4_LOCK_DELAY 10      /* Milliseconds */
  87 static clock_t  rfs4_lock_delay = RFS4_LOCK_DELAY;
  88 extern struct svc_ops rdma_svc_ops;
  89 extern int nfs_loaned_buffers;
  90 /* End of Tunables */
  91 
  92 static int rdma_setup_read_data4(READ4args *, READ4res *);
  93 
  94 /*
  95  * Used to bump the stateid4.seqid value and show changes in the stateid
  96  */
  97 #define next_stateid(sp) (++(sp)->bits.chgseq)
  98 
  99 /*
 100  * RFS4_MINLEN_ENTRY4: XDR-encoded size of smallest possible dirent.
 101  *      This is used to return NFS4ERR_TOOSMALL when clients specify
 102  *      maxcount that isn't large enough to hold the smallest possible
 103  *      XDR encoded dirent.
 104  *
 105  *          sizeof cookie (8 bytes) +
 106  *          sizeof name_len (4 bytes) +
 107  *          sizeof smallest (padded) name (4 bytes) +
 108  *          sizeof bitmap4_len (12 bytes) +   NOTE: we always encode len=2 bm4
 109  *          sizeof attrlist4_len (4 bytes) +
 110  *          sizeof next boolean (4 bytes)
 111  *
 112  * RFS4_MINLEN_RDDIR4: XDR-encoded size of READDIR op reply containing
 113  * the smallest possible entry4 (assumes no attrs requested).
 114  *      sizeof nfsstat4 (4 bytes) +
 115  *      sizeof verifier4 (8 bytes) +
 116  *      sizeof entry4list bool (4 bytes) +
 117  *      sizeof entry4   (36 bytes) +
 118  *      sizeof eof bool  (4 bytes)
 119  *
 120  * RFS4_MINLEN_RDDIR_BUF: minimum length of buffer server will provide to
 121  *      VOP_READDIR.  Its value is the size of the maximum possible dirent
 122  *      for solaris.  The DIRENT64_RECLEN macro returns the size of dirent
 123  *      required for a given name length.  MAXNAMELEN is the maximum
 124  *      filename length allowed in Solaris.  The first two DIRENT64_RECLEN()
 125  *      macros are to allow for . and .. entries -- just a minor tweak to try
 126  *      and guarantee that buffer we give to VOP_READDIR will be large enough
 127  *      to hold ., .., and the largest possible solaris dirent64.
 128  */
 129 #define RFS4_MINLEN_ENTRY4 36
 130 #define RFS4_MINLEN_RDDIR4 (4 + NFS4_VERIFIER_SIZE + 4 + RFS4_MINLEN_ENTRY4 + 4)
 131 #define RFS4_MINLEN_RDDIR_BUF \
 132         (DIRENT64_RECLEN(1) + DIRENT64_RECLEN(2) + DIRENT64_RECLEN(MAXNAMELEN))
 133 
 134 /*
 135  * It would be better to pad to 4 bytes since that's what XDR would do,
 136  * but the dirents UFS gives us are already padded to 8, so just take
 137  * what we're given.  Dircount is only a hint anyway.  Currently the
 138  * solaris kernel is ASCII only, so there's no point in calling the
 139  * UTF8 functions.
 140  *
 141  * dirent64: named padded to provide 8 byte struct alignment
 142  *      d_ino(8) + d_off(8) + d_reclen(2) + d_name(namelen + null(1) + pad)
 143  *
 144  * cookie: uint64_t   +  utf8namelen: uint_t  +   utf8name padded to 8 bytes
 145  *
 146  */
 147 #define DIRENT64_TO_DIRCOUNT(dp) \
 148         (3 * BYTES_PER_XDR_UNIT + DIRENT64_NAMELEN((dp)->d_reclen))
 149 
 150 time_t rfs4_start_time;                 /* Initialized in rfs4_srvrinit */
 151 
 152 static sysid_t lockt_sysid;             /* dummy sysid for all LOCKT calls */
 153 
 154 u_longlong_t    nfs4_srv_caller_id;
 155 uint_t          nfs4_srv_vkey = 0;
 156 
 157 verifier4       Write4verf;
 158 verifier4       Readdir4verf;
 159 
 160 void    rfs4_init_compound_state(struct compound_state *);
 161 
 162 static void     nullfree(caddr_t);
 163 static void     rfs4_op_inval(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 164                         struct compound_state *);
 165 static void     rfs4_op_access(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 166                         struct compound_state *);
 167 static void     rfs4_op_close(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 168                         struct compound_state *);
 169 static void     rfs4_op_commit(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 170                         struct compound_state *);
 171 static void     rfs4_op_create(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 172                         struct compound_state *);
 173 static void     rfs4_op_create_free(nfs_resop4 *resop);
 174 static void     rfs4_op_delegreturn(nfs_argop4 *, nfs_resop4 *,
 175                         struct svc_req *, struct compound_state *);
 176 static void     rfs4_op_delegpurge(nfs_argop4 *, nfs_resop4 *,
 177                         struct svc_req *, struct compound_state *);
 178 static void     rfs4_op_getattr(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 179                         struct compound_state *);
 180 static void     rfs4_op_getattr_free(nfs_resop4 *);
 181 static void     rfs4_op_getfh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 182                         struct compound_state *);
 183 static void     rfs4_op_getfh_free(nfs_resop4 *);
 184 static void     rfs4_op_illegal(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 185                         struct compound_state *);
 186 static void     rfs4_op_link(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 187                         struct compound_state *);
 188 static void     rfs4_op_lock(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 189                         struct compound_state *);
 190 static void     lock_denied_free(nfs_resop4 *);
 191 static void     rfs4_op_locku(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 192                         struct compound_state *);
 193 static void     rfs4_op_lockt(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 194                         struct compound_state *);
 195 static void     rfs4_op_lookup(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 196                         struct compound_state *);
 197 static void     rfs4_op_lookupp(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 198                         struct compound_state *);
 199 static void     rfs4_op_openattr(nfs_argop4 *argop, nfs_resop4 *resop,
 200                                 struct svc_req *req, struct compound_state *cs);
 201 static void     rfs4_op_nverify(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 202                         struct compound_state *);
 203 static void     rfs4_op_open(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 204                         struct compound_state *);
 205 static void     rfs4_op_open_confirm(nfs_argop4 *, nfs_resop4 *,
 206                         struct svc_req *, struct compound_state *);
 207 static void     rfs4_op_open_downgrade(nfs_argop4 *, nfs_resop4 *,
 208                         struct svc_req *, struct compound_state *);
 209 static void     rfs4_op_putfh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 210                         struct compound_state *);
 211 static void     rfs4_op_putpubfh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 212                         struct compound_state *);
 213 static void     rfs4_op_putrootfh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 214                         struct compound_state *);
 215 static void     rfs4_op_read(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 216                         struct compound_state *);
 217 static void     rfs4_op_read_free(nfs_resop4 *);
 218 static void     rfs4_op_readdir_free(nfs_resop4 *resop);
 219 static void     rfs4_op_readlink(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 220                         struct compound_state *);
 221 static void     rfs4_op_readlink_free(nfs_resop4 *);
 222 static void     rfs4_op_release_lockowner(nfs_argop4 *, nfs_resop4 *,
 223                         struct svc_req *, struct compound_state *);
 224 static void     rfs4_op_remove(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 225                         struct compound_state *);
 226 static void     rfs4_op_rename(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 227                         struct compound_state *);
 228 static void     rfs4_op_renew(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 229                         struct compound_state *);
 230 static void     rfs4_op_restorefh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 231                         struct compound_state *);
 232 static void     rfs4_op_savefh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 233                         struct compound_state *);
 234 static void     rfs4_op_setattr(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 235                         struct compound_state *);
 236 static void     rfs4_op_verify(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 237                         struct compound_state *);
 238 static void     rfs4_op_write(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 239                         struct compound_state *);
 240 static void     rfs4_op_setclientid(nfs_argop4 *, nfs_resop4 *,
 241                         struct svc_req *, struct compound_state *);
 242 static void     rfs4_op_setclientid_confirm(nfs_argop4 *, nfs_resop4 *,
 243                         struct svc_req *req, struct compound_state *);
 244 static void     rfs4_op_secinfo(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 245                         struct compound_state *);
 246 static void     rfs4_op_secinfo_free(nfs_resop4 *);
 247 
 248 static nfsstat4 check_open_access(uint32_t,
 249                                 struct compound_state *, struct svc_req *);
 250 nfsstat4 rfs4_client_sysid(rfs4_client_t *, sysid_t *);
 251 void rfs4_ss_clid(rfs4_client_t *);
 252 
 253 /*
 254  * translation table for attrs
 255  */
 256 struct nfs4_ntov_table {
 257         union nfs4_attr_u *na;
 258         uint8_t amap[NFS4_MAXNUM_ATTRS];
 259         int attrcnt;
 260         bool_t vfsstat;
 261 };
 262 
 263 static void     nfs4_ntov_table_init(struct nfs4_ntov_table *ntovp);
 264 static void     nfs4_ntov_table_free(struct nfs4_ntov_table *ntovp,
 265                                     struct nfs4_svgetit_arg *sargp);
 266 
 267 static nfsstat4 do_rfs4_set_attrs(bitmap4 *resp, fattr4 *fattrp,
 268                     struct compound_state *cs, struct nfs4_svgetit_arg *sargp,
 269                     struct nfs4_ntov_table *ntovp, nfs4_attr_cmd_t cmd);
 270 
 271 fem_t           *deleg_rdops;
 272 fem_t           *deleg_wrops;
 273 
 274 rfs4_servinst_t *rfs4_cur_servinst = NULL;      /* current server instance */
 275 kmutex_t        rfs4_servinst_lock;     /* protects linked list */
 276 int             rfs4_seen_first_compound;       /* set first time we see one */
 277 
 278 /*
 279  * NFS4 op dispatch table
 280  */
 281 
 282 struct rfsv4disp {
 283         void    (*dis_proc)();          /* proc to call */
 284         void    (*dis_resfree)();       /* frees space allocated by proc */
 285         int     dis_flags;              /* RPC_IDEMPOTENT, etc... */
 286 };
 287 
 288 static struct rfsv4disp rfsv4disptab[] = {
 289         /*
 290          * NFS VERSION 4
 291          */
 292 
 293         /* RFS_NULL = 0 */
 294         {rfs4_op_illegal, nullfree, 0},
 295 
 296         /* UNUSED = 1 */
 297         {rfs4_op_illegal, nullfree, 0},
 298 
 299         /* UNUSED = 2 */
 300         {rfs4_op_illegal, nullfree, 0},
 301 
 302         /* OP_ACCESS = 3 */
 303         {rfs4_op_access, nullfree, RPC_IDEMPOTENT},
 304 
 305         /* OP_CLOSE = 4 */
 306         {rfs4_op_close, nullfree, 0},
 307 
 308         /* OP_COMMIT = 5 */
 309         {rfs4_op_commit, nullfree, RPC_IDEMPOTENT},
 310 
 311         /* OP_CREATE = 6 */
 312         {rfs4_op_create, nullfree, 0},
 313 
 314         /* OP_DELEGPURGE = 7 */
 315         {rfs4_op_delegpurge, nullfree, 0},
 316 
 317         /* OP_DELEGRETURN = 8 */
 318         {rfs4_op_delegreturn, nullfree, 0},
 319 
 320         /* OP_GETATTR = 9 */
 321         {rfs4_op_getattr, rfs4_op_getattr_free, RPC_IDEMPOTENT},
 322 
 323         /* OP_GETFH = 10 */
 324         {rfs4_op_getfh, rfs4_op_getfh_free, RPC_ALL},
 325 
 326         /* OP_LINK = 11 */
 327         {rfs4_op_link, nullfree, 0},
 328 
 329         /* OP_LOCK = 12 */
 330         {rfs4_op_lock, lock_denied_free, 0},
 331 
 332         /* OP_LOCKT = 13 */
 333         {rfs4_op_lockt, lock_denied_free, 0},
 334 
 335         /* OP_LOCKU = 14 */
 336         {rfs4_op_locku, nullfree, 0},
 337 
 338         /* OP_LOOKUP = 15 */
 339         {rfs4_op_lookup, nullfree, (RPC_IDEMPOTENT | RPC_PUBLICFH_OK)},
 340 
 341         /* OP_LOOKUPP = 16 */
 342         {rfs4_op_lookupp, nullfree, (RPC_IDEMPOTENT | RPC_PUBLICFH_OK)},
 343 
 344         /* OP_NVERIFY = 17 */
 345         {rfs4_op_nverify, nullfree, RPC_IDEMPOTENT},
 346 
 347         /* OP_OPEN = 18 */
 348         {rfs4_op_open, rfs4_free_reply, 0},
 349 
 350         /* OP_OPENATTR = 19 */
 351         {rfs4_op_openattr, nullfree, 0},
 352 
 353         /* OP_OPEN_CONFIRM = 20 */
 354         {rfs4_op_open_confirm, nullfree, 0},
 355 
 356         /* OP_OPEN_DOWNGRADE = 21 */
 357         {rfs4_op_open_downgrade, nullfree, 0},
 358 
 359         /* OP_OPEN_PUTFH = 22 */
 360         {rfs4_op_putfh, nullfree, RPC_ALL},
 361 
 362         /* OP_PUTPUBFH = 23 */
 363         {rfs4_op_putpubfh, nullfree, RPC_ALL},
 364 
 365         /* OP_PUTROOTFH = 24 */
 366         {rfs4_op_putrootfh, nullfree, RPC_ALL},
 367 
 368         /* OP_READ = 25 */
 369         {rfs4_op_read, rfs4_op_read_free, RPC_IDEMPOTENT},
 370 
 371         /* OP_READDIR = 26 */
 372         {rfs4_op_readdir, rfs4_op_readdir_free, RPC_IDEMPOTENT},
 373 
 374         /* OP_READLINK = 27 */
 375         {rfs4_op_readlink, rfs4_op_readlink_free, RPC_IDEMPOTENT},
 376 
 377         /* OP_REMOVE = 28 */
 378         {rfs4_op_remove, nullfree, 0},
 379 
 380         /* OP_RENAME = 29 */
 381         {rfs4_op_rename, nullfree, 0},
 382 
 383         /* OP_RENEW = 30 */
 384         {rfs4_op_renew, nullfree, 0},
 385 
 386         /* OP_RESTOREFH = 31 */
 387         {rfs4_op_restorefh, nullfree, RPC_ALL},
 388 
 389         /* OP_SAVEFH = 32 */
 390         {rfs4_op_savefh, nullfree, RPC_ALL},
 391 
 392         /* OP_SECINFO = 33 */
 393         {rfs4_op_secinfo, rfs4_op_secinfo_free, 0},
 394 
 395         /* OP_SETATTR = 34 */
 396         {rfs4_op_setattr, nullfree, 0},
 397 
 398         /* OP_SETCLIENTID = 35 */
 399         {rfs4_op_setclientid, nullfree, 0},
 400 
 401         /* OP_SETCLIENTID_CONFIRM = 36 */
 402         {rfs4_op_setclientid_confirm, nullfree, 0},
 403 
 404         /* OP_VERIFY = 37 */
 405         {rfs4_op_verify, nullfree, RPC_IDEMPOTENT},
 406 
 407         /* OP_WRITE = 38 */
 408         {rfs4_op_write, nullfree, 0},
 409 
 410         /* OP_RELEASE_LOCKOWNER = 39 */
 411         {rfs4_op_release_lockowner, nullfree, 0},
 412 };
 413 
 414 static uint_t rfsv4disp_cnt = sizeof (rfsv4disptab) / sizeof (rfsv4disptab[0]);
 415 
 416 #define OP_ILLEGAL_IDX (rfsv4disp_cnt)
 417 
 418 #ifdef DEBUG
 419 
 420 int             rfs4_fillone_debug = 0;
 421 int             rfs4_no_stub_access = 1;
 422 int             rfs4_rddir_debug = 0;
 423 
 424 static char    *rfs4_op_string[] = {
 425         "rfs4_op_null",
 426         "rfs4_op_1 unused",
 427         "rfs4_op_2 unused",
 428         "rfs4_op_access",
 429         "rfs4_op_close",
 430         "rfs4_op_commit",
 431         "rfs4_op_create",
 432         "rfs4_op_delegpurge",
 433         "rfs4_op_delegreturn",
 434         "rfs4_op_getattr",
 435         "rfs4_op_getfh",
 436         "rfs4_op_link",
 437         "rfs4_op_lock",
 438         "rfs4_op_lockt",
 439         "rfs4_op_locku",
 440         "rfs4_op_lookup",
 441         "rfs4_op_lookupp",
 442         "rfs4_op_nverify",
 443         "rfs4_op_open",
 444         "rfs4_op_openattr",
 445         "rfs4_op_open_confirm",
 446         "rfs4_op_open_downgrade",
 447         "rfs4_op_putfh",
 448         "rfs4_op_putpubfh",
 449         "rfs4_op_putrootfh",
 450         "rfs4_op_read",
 451         "rfs4_op_readdir",
 452         "rfs4_op_readlink",
 453         "rfs4_op_remove",
 454         "rfs4_op_rename",
 455         "rfs4_op_renew",
 456         "rfs4_op_restorefh",
 457         "rfs4_op_savefh",
 458         "rfs4_op_secinfo",
 459         "rfs4_op_setattr",
 460         "rfs4_op_setclientid",
 461         "rfs4_op_setclient_confirm",
 462         "rfs4_op_verify",
 463         "rfs4_op_write",
 464         "rfs4_op_release_lockowner",
 465         "rfs4_op_illegal"
 466 };
 467 #endif
 468 
 469 void    rfs4_ss_chkclid(rfs4_client_t *);
 470 
 471 extern size_t   strlcpy(char *dst, const char *src, size_t dstsize);
 472 
 473 extern void     rfs4_free_fs_locations4(fs_locations4 *);
 474 
 475 #ifdef  nextdp
 476 #undef nextdp
 477 #endif
 478 #define nextdp(dp)      ((struct dirent64 *)((char *)(dp) + (dp)->d_reclen))
 479 
 480 static const fs_operation_def_t nfs4_rd_deleg_tmpl[] = {
 481         VOPNAME_OPEN,           { .femop_open = deleg_rd_open },
 482         VOPNAME_WRITE,          { .femop_write = deleg_rd_write },
 483         VOPNAME_SETATTR,        { .femop_setattr = deleg_rd_setattr },
 484         VOPNAME_RWLOCK,         { .femop_rwlock = deleg_rd_rwlock },
 485         VOPNAME_SPACE,          { .femop_space = deleg_rd_space },
 486         VOPNAME_SETSECATTR,     { .femop_setsecattr = deleg_rd_setsecattr },
 487         VOPNAME_VNEVENT,        { .femop_vnevent = deleg_rd_vnevent },
 488         NULL,                   NULL
 489 };
 490 static const fs_operation_def_t nfs4_wr_deleg_tmpl[] = {
 491         VOPNAME_OPEN,           { .femop_open = deleg_wr_open },
 492         VOPNAME_READ,           { .femop_read = deleg_wr_read },
 493         VOPNAME_WRITE,          { .femop_write = deleg_wr_write },
 494         VOPNAME_SETATTR,        { .femop_setattr = deleg_wr_setattr },
 495         VOPNAME_RWLOCK,         { .femop_rwlock = deleg_wr_rwlock },
 496         VOPNAME_SPACE,          { .femop_space = deleg_wr_space },
 497         VOPNAME_SETSECATTR,     { .femop_setsecattr = deleg_wr_setsecattr },
 498         VOPNAME_VNEVENT,        { .femop_vnevent = deleg_wr_vnevent },
 499         NULL,                   NULL
 500 };
 501 
 502 int
 503 rfs4_srvrinit(void)
 504 {
 505         timespec32_t verf;
 506         int error;
 507         extern void rfs4_attr_init();
 508         extern krwlock_t rfs4_deleg_policy_lock;
 509 
 510         /*
 511          * The following algorithm attempts to find a unique verifier
 512          * to be used as the write verifier returned from the server
 513          * to the client.  It is important that this verifier change
 514          * whenever the server reboots.  Of secondary importance, it
 515          * is important for the verifier to be unique between two
 516          * different servers.
 517          *
 518          * Thus, an attempt is made to use the system hostid and the
 519          * current time in seconds when the nfssrv kernel module is
 520          * loaded.  It is assumed that an NFS server will not be able
 521          * to boot and then to reboot in less than a second.  If the
 522          * hostid has not been set, then the current high resolution
 523          * time is used.  This will ensure different verifiers each
 524          * time the server reboots and minimize the chances that two
 525          * different servers will have the same verifier.
 526          * XXX - this is broken on LP64 kernels.
 527          */
 528         verf.tv_sec = (time_t)zone_get_hostid(NULL);
 529         if (verf.tv_sec != 0) {
 530                 verf.tv_nsec = gethrestime_sec();
 531         } else {
 532                 timespec_t tverf;
 533 
 534                 gethrestime(&tverf);
 535                 verf.tv_sec = (time_t)tverf.tv_sec;
 536                 verf.tv_nsec = tverf.tv_nsec;
 537         }
 538 
 539         Write4verf = *(uint64_t *)&verf;
 540 
 541         rfs4_attr_init();
 542         mutex_init(&rfs4_deleg_lock, NULL, MUTEX_DEFAULT, NULL);
 543 
 544         /* Used to manage create/destroy of server state */
 545         mutex_init(&rfs4_state_lock, NULL, MUTEX_DEFAULT, NULL);
 546 
 547         /* Used to manage access to server instance linked list */
 548         mutex_init(&rfs4_servinst_lock, NULL, MUTEX_DEFAULT, NULL);
 549 
 550         /* Used to manage access to rfs4_deleg_policy */
 551         rw_init(&rfs4_deleg_policy_lock, NULL, RW_DEFAULT, NULL);
 552 
 553         error = fem_create("deleg_rdops", nfs4_rd_deleg_tmpl, &deleg_rdops);
 554         if (error != 0) {
 555                 rfs4_disable_delegation();
 556         } else {
 557                 error = fem_create("deleg_wrops", nfs4_wr_deleg_tmpl,
 558                     &deleg_wrops);
 559                 if (error != 0) {
 560                         rfs4_disable_delegation();
 561                         fem_free(deleg_rdops);
 562                 }
 563         }
 564 
 565         nfs4_srv_caller_id = fs_new_caller_id();
 566 
 567         lockt_sysid = lm_alloc_sysidt();
 568 
 569         vsd_create(&nfs4_srv_vkey, NULL);
 570 
 571         return (0);
 572 }
 573 
 574 void
 575 rfs4_srvrfini(void)
 576 {
 577         extern krwlock_t rfs4_deleg_policy_lock;
 578 
 579         if (lockt_sysid != LM_NOSYSID) {
 580                 lm_free_sysidt(lockt_sysid);
 581                 lockt_sysid = LM_NOSYSID;
 582         }
 583 
 584         mutex_destroy(&rfs4_deleg_lock);
 585         mutex_destroy(&rfs4_state_lock);
 586         rw_destroy(&rfs4_deleg_policy_lock);
 587 
 588         fem_free(deleg_rdops);
 589         fem_free(deleg_wrops);
 590 }
 591 
 592 void
 593 rfs4_init_compound_state(struct compound_state *cs)
 594 {
 595         bzero(cs, sizeof (*cs));
 596         cs->cont = TRUE;
 597         cs->access = CS_ACCESS_DENIED;
 598         cs->deleg = FALSE;
 599         cs->mandlock = FALSE;
 600         cs->fh.nfs_fh4_val = cs->fhbuf;
 601 }
 602 
 603 void
 604 rfs4_grace_start(rfs4_servinst_t *sip)
 605 {
 606         rw_enter(&sip->rwlock, RW_WRITER);
 607         sip->start_time = (time_t)TICK_TO_SEC(ddi_get_lbolt());
 608         sip->grace_period = rfs4_grace_period;
 609         rw_exit(&sip->rwlock);
 610 }
 611 
 612 /*
 613  * returns true if the instance's grace period has never been started
 614  */
 615 int
 616 rfs4_servinst_grace_new(rfs4_servinst_t *sip)
 617 {
 618         time_t start_time;
 619 
 620         rw_enter(&sip->rwlock, RW_READER);
 621         start_time = sip->start_time;
 622         rw_exit(&sip->rwlock);
 623 
 624         return (start_time == 0);
 625 }
 626 
 627 /*
 628  * Indicates if server instance is within the
 629  * grace period.
 630  */
 631 int
 632 rfs4_servinst_in_grace(rfs4_servinst_t *sip)
 633 {
 634         time_t grace_expiry;
 635 
 636         rw_enter(&sip->rwlock, RW_READER);
 637         grace_expiry = sip->start_time + sip->grace_period;
 638         rw_exit(&sip->rwlock);
 639 
 640         return (((time_t)TICK_TO_SEC(ddi_get_lbolt())) < grace_expiry);
 641 }
 642 
 643 int
 644 rfs4_clnt_in_grace(rfs4_client_t *cp)
 645 {
 646         ASSERT(rfs4_dbe_refcnt(cp->rc_dbe) > 0);
 647 
 648         return (rfs4_servinst_in_grace(cp->rc_server_instance));
 649 }
 650 
 651 /*
 652  * reset all currently active grace periods
 653  */
 654 void
 655 rfs4_grace_reset_all(void)
 656 {
 657         rfs4_servinst_t *sip;
 658 
 659         mutex_enter(&rfs4_servinst_lock);
 660         for (sip = rfs4_cur_servinst; sip != NULL; sip = sip->prev)
 661                 if (rfs4_servinst_in_grace(sip))
 662                         rfs4_grace_start(sip);
 663         mutex_exit(&rfs4_servinst_lock);
 664 }
 665 
 666 /*
 667  * start any new instances' grace periods
 668  */
 669 void
 670 rfs4_grace_start_new(void)
 671 {
 672         rfs4_servinst_t *sip;
 673 
 674         mutex_enter(&rfs4_servinst_lock);
 675         for (sip = rfs4_cur_servinst; sip != NULL; sip = sip->prev)
 676                 if (rfs4_servinst_grace_new(sip))
 677                         rfs4_grace_start(sip);
 678         mutex_exit(&rfs4_servinst_lock);
 679 }
 680 
 681 static rfs4_dss_path_t *
 682 rfs4_dss_newpath(rfs4_servinst_t *sip, char *path, unsigned index)
 683 {
 684         size_t len;
 685         rfs4_dss_path_t *dss_path;
 686 
 687         dss_path = kmem_alloc(sizeof (rfs4_dss_path_t), KM_SLEEP);
 688 
 689         /*
 690          * Take a copy of the string, since the original may be overwritten.
 691          * Sadly, no strdup() in the kernel.
 692          */
 693         /* allow for NUL */
 694         len = strlen(path) + 1;
 695         dss_path->path = kmem_alloc(len, KM_SLEEP);
 696         (void) strlcpy(dss_path->path, path, len);
 697 
 698         /* associate with servinst */
 699         dss_path->sip = sip;
 700         dss_path->index = index;
 701 
 702         /*
 703          * Add to list of served paths.
 704          * No locking required, as we're only ever called at startup.
 705          */
 706         if (rfs4_dss_pathlist == NULL) {
 707                 /* this is the first dss_path_t */
 708 
 709                 /* needed for insque/remque */
 710                 dss_path->next = dss_path->prev = dss_path;
 711 
 712                 rfs4_dss_pathlist = dss_path;
 713         } else {
 714                 insque(dss_path, rfs4_dss_pathlist);
 715         }
 716 
 717         return (dss_path);
 718 }
 719 
 720 /*
 721  * Create a new server instance, and make it the currently active instance.
 722  * Note that starting the grace period too early will reduce the clients'
 723  * recovery window.
 724  */
 725 void
 726 rfs4_servinst_create(int start_grace, int dss_npaths, char **dss_paths)
 727 {
 728         unsigned i;
 729         rfs4_servinst_t *sip;
 730         rfs4_oldstate_t *oldstate;
 731 
 732         sip = kmem_alloc(sizeof (rfs4_servinst_t), KM_SLEEP);
 733         rw_init(&sip->rwlock, NULL, RW_DEFAULT, NULL);
 734 
 735         sip->start_time = (time_t)0;
 736         sip->grace_period = (time_t)0;
 737         sip->next = NULL;
 738         sip->prev = NULL;
 739 
 740         rw_init(&sip->oldstate_lock, NULL, RW_DEFAULT, NULL);
 741         /*
 742          * This initial dummy entry is required to setup for insque/remque.
 743          * It must be skipped over whenever the list is traversed.
 744          */
 745         oldstate = kmem_alloc(sizeof (rfs4_oldstate_t), KM_SLEEP);
 746         /* insque/remque require initial list entry to be self-terminated */
 747         oldstate->next = oldstate;
 748         oldstate->prev = oldstate;
 749         sip->oldstate = oldstate;
 750 
 751 
 752         sip->dss_npaths = dss_npaths;
 753         sip->dss_paths = kmem_alloc(dss_npaths *
 754             sizeof (rfs4_dss_path_t *), KM_SLEEP);
 755 
 756         for (i = 0; i < dss_npaths; i++) {
 757                 sip->dss_paths[i] = rfs4_dss_newpath(sip, dss_paths[i], i);
 758         }
 759 
 760         mutex_enter(&rfs4_servinst_lock);
 761         if (rfs4_cur_servinst != NULL) {
 762                 /* add to linked list */
 763                 sip->prev = rfs4_cur_servinst;
 764                 rfs4_cur_servinst->next = sip;
 765         }
 766         if (start_grace)
 767                 rfs4_grace_start(sip);
 768         /* make the new instance "current" */
 769         rfs4_cur_servinst = sip;
 770 
 771         mutex_exit(&rfs4_servinst_lock);
 772 }
 773 
 774 /*
 775  * In future, we might add a rfs4_servinst_destroy(sip) but, for now, destroy
 776  * all instances directly.
 777  */
 778 void
 779 rfs4_servinst_destroy_all(void)
 780 {
 781         rfs4_servinst_t *sip, *prev, *current;
 782 #ifdef DEBUG
 783         int n = 0;
 784 #endif
 785 
 786         mutex_enter(&rfs4_servinst_lock);
 787         ASSERT(rfs4_cur_servinst != NULL);
 788         current = rfs4_cur_servinst;
 789         rfs4_cur_servinst = NULL;
 790         for (sip = current; sip != NULL; sip = prev) {
 791                 prev = sip->prev;
 792                 rw_destroy(&sip->rwlock);
 793                 if (sip->oldstate)
 794                         kmem_free(sip->oldstate, sizeof (rfs4_oldstate_t));
 795                 if (sip->dss_paths)
 796                         kmem_free(sip->dss_paths,
 797                             sip->dss_npaths * sizeof (rfs4_dss_path_t *));
 798                 kmem_free(sip, sizeof (rfs4_servinst_t));
 799 #ifdef DEBUG
 800                 n++;
 801 #endif
 802         }
 803         mutex_exit(&rfs4_servinst_lock);
 804 }
 805 
 806 /*
 807  * Assign the current server instance to a client_t.
 808  * Should be called with cp->rc_dbe held.
 809  */
 810 void
 811 rfs4_servinst_assign(rfs4_client_t *cp, rfs4_servinst_t *sip)
 812 {
 813         ASSERT(rfs4_dbe_refcnt(cp->rc_dbe) > 0);
 814 
 815         /*
 816          * The lock ensures that if the current instance is in the process
 817          * of changing, we will see the new one.
 818          */
 819         mutex_enter(&rfs4_servinst_lock);
 820         cp->rc_server_instance = sip;
 821         mutex_exit(&rfs4_servinst_lock);
 822 }
 823 
 824 rfs4_servinst_t *
 825 rfs4_servinst(rfs4_client_t *cp)
 826 {
 827         ASSERT(rfs4_dbe_refcnt(cp->rc_dbe) > 0);
 828 
 829         return (cp->rc_server_instance);
 830 }
 831 
 832 /* ARGSUSED */
 833 static void
 834 nullfree(caddr_t resop)
 835 {
 836 }
 837 
 838 /*
 839  * This is a fall-through for invalid or not implemented (yet) ops
 840  */
 841 /* ARGSUSED */
 842 static void
 843 rfs4_op_inval(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
 844     struct compound_state *cs)
 845 {
 846         *cs->statusp = *((nfsstat4 *)&(resop)->nfs_resop4_u) = NFS4ERR_INVAL;
 847 }
 848 
 849 /*
 850  * Check if the security flavor, nfsnum, is in the flavor_list.
 851  */
 852 bool_t
 853 in_flavor_list(int nfsnum, int *flavor_list, int count)
 854 {
 855         int i;
 856 
 857         for (i = 0; i < count; i++) {
 858                 if (nfsnum == flavor_list[i])
 859                         return (TRUE);
 860         }
 861         return (FALSE);
 862 }
 863 
 864 /*
 865  * Used by rfs4_op_secinfo to get the security information from the
 866  * export structure associated with the component.
 867  */
 868 /* ARGSUSED */
 869 static nfsstat4
 870 do_rfs4_op_secinfo(struct compound_state *cs, char *nm, SECINFO4res *resp)
 871 {
 872         int error, different_export = 0;
 873         vnode_t *dvp, *vp;
 874         struct exportinfo *exi = NULL;
 875         struct exportinfo *oexi = NULL;
 876         fid_t fid;
 877         uint_t count, i;
 878         secinfo4 *resok_val;
 879         struct secinfo *secp;
 880         seconfig_t *si;
 881         bool_t did_traverse = FALSE;
 882         int dotdot, walk;
 883 
 884         dvp = cs->vp;
 885         dotdot = (nm[0] == '.' && nm[1] == '.' && nm[2] == '\0');
 886 
 887         /*
 888          * If dotdotting, then need to check whether it's above the
 889          * root of a filesystem, or above an export point.
 890          */
 891         if (dotdot) {
 892 
 893                 /*
 894                  * If dotdotting at the root of a filesystem, then
 895                  * need to traverse back to the mounted-on filesystem
 896                  * and do the dotdot lookup there.
 897                  */
 898                 if (cs->vp->v_flag & VROOT) {
 899 
 900                         /*
 901                          * If at the system root, then can
 902                          * go up no further.
 903                          */
 904                         if (VN_CMP(dvp, rootdir))
 905                                 return (puterrno4(ENOENT));
 906 
 907                         /*
 908                          * Traverse back to the mounted-on filesystem
 909                          */
 910                         dvp = untraverse(cs->vp);
 911 
 912                         /*
 913                          * Set the different_export flag so we remember
 914                          * to pick up a new exportinfo entry for
 915                          * this new filesystem.
 916                          */
 917                         different_export = 1;
 918                 } else {
 919 
 920                         /*
 921                          * If dotdotting above an export point then set
 922                          * the different_export to get new export info.
 923                          */
 924                         different_export = nfs_exported(cs->exi, cs->vp);
 925                 }
 926         }
 927 
 928         /*
 929          * Get the vnode for the component "nm".
 930          */
 931         error = VOP_LOOKUP(dvp, nm, &vp, NULL, 0, NULL, cs->cr,
 932             NULL, NULL, NULL);
 933         if (error)
 934                 return (puterrno4(error));
 935 
 936         /*
 937          * If the vnode is in a pseudo filesystem, or if the security flavor
 938          * used in the request is valid but not an explicitly shared flavor,
 939          * or the access bit indicates that this is a limited access,
 940          * check whether this vnode is visible.
 941          */
 942         if (!different_export &&
 943             (PSEUDO(cs->exi) || ! is_exported_sec(cs->nfsflavor, cs->exi) ||
 944             cs->access & CS_ACCESS_LIMITED)) {
 945                 if (! nfs_visible(cs->exi, vp, &different_export)) {
 946                         VN_RELE(vp);
 947                         return (puterrno4(ENOENT));
 948                 }
 949         }
 950 
 951         /*
 952          * If it's a mountpoint, then traverse it.
 953          */
 954         if (vn_ismntpt(vp)) {
 955                 if ((error = traverse(&vp)) != 0) {
 956                         VN_RELE(vp);
 957                         return (puterrno4(error));
 958                 }
 959                 /* remember that we had to traverse mountpoint */
 960                 did_traverse = TRUE;
 961                 different_export = 1;
 962         } else if (vp->v_vfsp != dvp->v_vfsp) {
 963                 /*
 964                  * If vp isn't a mountpoint and the vfs ptrs aren't the same,
 965                  * then vp is probably an LOFS object.  We don't need the
 966                  * realvp, we just need to know that we might have crossed
 967                  * a server fs boundary and need to call checkexport.
 968                  * (LOFS lookup hides server fs mountpoints, and actually calls
 969                  * traverse)
 970                  */
 971                 different_export = 1;
 972         }
 973 
 974         /*
 975          * Get the export information for it.
 976          */
 977         if (different_export) {
 978 
 979                 bzero(&fid, sizeof (fid));
 980                 fid.fid_len = MAXFIDSZ;
 981                 error = vop_fid_pseudo(vp, &fid);
 982                 if (error) {
 983                         VN_RELE(vp);
 984                         return (puterrno4(error));
 985                 }
 986 
 987                 if (dotdot)
 988                         oexi = nfs_vptoexi(NULL, vp, cs->cr, &walk, NULL, TRUE);
 989                 else
 990                         oexi = checkexport(&vp->v_vfsp->vfs_fsid, &fid, vp);
 991 
 992                 if (oexi == NULL) {
 993                         if (did_traverse == TRUE) {
 994                                 /*
 995                                  * If this vnode is a mounted-on vnode,
 996                                  * but the mounted-on file system is not
 997                                  * exported, send back the secinfo for
 998                                  * the exported node that the mounted-on
 999                                  * vnode lives in.
1000                                  */
1001                                 exi = cs->exi;
1002                         } else {
1003                                 VN_RELE(vp);
1004                                 return (puterrno4(EACCES));
1005                         }
1006                 } else {
1007                         exi = oexi;
1008                 }
1009         } else {
1010                 exi = cs->exi;
1011         }
1012         ASSERT(exi != NULL);
1013 
1014 
1015         /*
1016          * Create the secinfo result based on the security information
1017          * from the exportinfo structure (exi).
1018          *
1019          * Return all flavors for a pseudo node.
1020          * For a real export node, return the flavor that the client
1021          * has access with.
1022          */
1023         rw_enter(&exported_lock, RW_READER);
1024         if (PSEUDO(exi)) {
1025                 count = exi->exi_export.ex_seccnt; /* total sec count */
1026                 resok_val = kmem_alloc(count * sizeof (secinfo4), KM_SLEEP);
1027                 secp = exi->exi_export.ex_secinfo;
1028 
1029                 for (i = 0; i < count; i++) {
1030                         si = &secp[i].s_secinfo;
1031                         resok_val[i].flavor = si->sc_rpcnum;
1032                         if (resok_val[i].flavor == RPCSEC_GSS) {
1033                                 rpcsec_gss_info *info;
1034 
1035                                 info = &resok_val[i].flavor_info;
1036                                 info->qop = si->sc_qop;
1037                                 info->service = (rpc_gss_svc_t)si->sc_service;
1038 
1039                                 /* get oid opaque data */
1040                                 info->oid.sec_oid4_len =
1041                                     si->sc_gss_mech_type->length;
1042                                 info->oid.sec_oid4_val = kmem_alloc(
1043                                     si->sc_gss_mech_type->length, KM_SLEEP);
1044                                 bcopy(
1045                                     si->sc_gss_mech_type->elements,
1046                                     info->oid.sec_oid4_val,
1047                                     info->oid.sec_oid4_len);
1048                         }
1049                 }
1050                 resp->SECINFO4resok_len = count;
1051                 resp->SECINFO4resok_val = resok_val;
1052         } else {
1053                 int ret_cnt = 0, k = 0;
1054                 int *flavor_list;
1055 
1056                 count = exi->exi_export.ex_seccnt; /* total sec count */
1057                 secp = exi->exi_export.ex_secinfo;
1058 
1059                 flavor_list = kmem_alloc(count * sizeof (int), KM_SLEEP);
1060                 /* find out which flavors to return */
1061                 for (i = 0; i < count; i ++) {
1062                         int access, flavor, perm;
1063 
1064                         flavor = secp[i].s_secinfo.sc_nfsnum;
1065                         perm = secp[i].s_flags;
1066 
1067                         access = nfsauth4_secinfo_access(exi, cs->req,
1068                             flavor, perm, cs->basecr);
1069 
1070                         if (! (access & NFSAUTH_DENIED) &&
1071                             ! (access & NFSAUTH_WRONGSEC)) {
1072                                 flavor_list[ret_cnt] = flavor;
1073                                 ret_cnt++;
1074                         }
1075                 }
1076 
1077                 /* Create the returning SECINFO value */
1078                 resok_val = kmem_alloc(ret_cnt * sizeof (secinfo4), KM_SLEEP);
1079 
1080                 for (i = 0; i < count; i++) {
1081                         /*
1082                          * If the flavor is in the flavor list,
1083                          * fill in resok_val.
1084                          */
1085                         si = &secp[i].s_secinfo;
1086                         if (in_flavor_list(si->sc_nfsnum,
1087                             flavor_list, ret_cnt)) {
1088                                 resok_val[k].flavor = si->sc_rpcnum;
1089                                 if (resok_val[k].flavor == RPCSEC_GSS) {
1090                                         rpcsec_gss_info *info;
1091 
1092                                         info = &resok_val[k].flavor_info;
1093                                         info->qop = si->sc_qop;
1094                                         info->service = (rpc_gss_svc_t)
1095                                             si->sc_service;
1096 
1097                                         /* get oid opaque data */
1098                                         info->oid.sec_oid4_len =
1099                                             si->sc_gss_mech_type->length;
1100                                         info->oid.sec_oid4_val = kmem_alloc(
1101                                             si->sc_gss_mech_type->length,
1102                                             KM_SLEEP);
1103                                         bcopy(si->sc_gss_mech_type->elements,
1104                                             info->oid.sec_oid4_val,
1105                                             info->oid.sec_oid4_len);
1106                                 }
1107                                 k++;
1108                         }
1109                         if (k >= ret_cnt)
1110                                 break;
1111                 }
1112                 resp->SECINFO4resok_len = ret_cnt;
1113                 resp->SECINFO4resok_val = resok_val;
1114                 kmem_free(flavor_list, count * sizeof (int));
1115         }
1116         rw_exit(&exported_lock);
1117         if (oexi)
1118                 exi_rele(oexi);
1119         VN_RELE(vp);
1120         return (NFS4_OK);
1121 }
1122 
1123 /*
1124  * SECINFO (Operation 33): Obtain required security information on
1125  * the component name in the format of (security-mechanism-oid, qop, service)
1126  * triplets.
1127  */
1128 /* ARGSUSED */
1129 static void
1130 rfs4_op_secinfo(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1131     struct compound_state *cs)
1132 {
1133         SECINFO4args *args = &argop->nfs_argop4_u.opsecinfo;
1134         SECINFO4res *resp = &resop->nfs_resop4_u.opsecinfo;
1135         utf8string *utfnm = &args->name;
1136         uint_t len;
1137         char *nm;
1138         struct sockaddr *ca;
1139         char *name = NULL;
1140         nfsstat4 status = NFS4_OK;
1141 
1142         DTRACE_NFSV4_2(op__secinfo__start, struct compound_state *, cs,
1143             SECINFO4args *, args);
1144 
1145         /*
1146          * Current file handle (cfh) should have been set before getting
1147          * into this function. If not, return error.
1148          */
1149         if (cs->vp == NULL) {
1150                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
1151                 goto out;
1152         }
1153 
1154         if (cs->vp->v_type != VDIR) {
1155                 *cs->statusp = resp->status = NFS4ERR_NOTDIR;
1156                 goto out;
1157         }
1158 
1159         /*
1160          * Verify the component name. If failed, error out, but
1161          * do not error out if the component name is a "..".
1162          * SECINFO will return its parents secinfo data for SECINFO "..".
1163          */
1164         status = utf8_dir_verify(utfnm);
1165         if (status != NFS4_OK) {
1166                 if (utfnm->utf8string_len != 2 ||
1167                     utfnm->utf8string_val[0] != '.' ||
1168                     utfnm->utf8string_val[1] != '.') {
1169                         *cs->statusp = resp->status = status;
1170                         goto out;
1171                 }
1172         }
1173 
1174         nm = utf8_to_str(utfnm, &len, NULL);
1175         if (nm == NULL) {
1176                 *cs->statusp = resp->status = NFS4ERR_INVAL;
1177                 goto out;
1178         }
1179 
1180         if (len > MAXNAMELEN) {
1181                 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
1182                 kmem_free(nm, len);
1183                 goto out;
1184         }
1185 
1186         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
1187         name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
1188             MAXPATHLEN  + 1);
1189 
1190         if (name == NULL) {
1191                 *cs->statusp = resp->status = NFS4ERR_INVAL;
1192                 kmem_free(nm, len);
1193                 goto out;
1194         }
1195 
1196 
1197         *cs->statusp = resp->status = do_rfs4_op_secinfo(cs, name, resp);
1198 
1199         if (name != nm)
1200                 kmem_free(name, MAXPATHLEN + 1);
1201         kmem_free(nm, len);
1202 
1203 out:
1204         DTRACE_NFSV4_2(op__secinfo__done, struct compound_state *, cs,
1205             SECINFO4res *, resp);
1206 }
1207 
1208 /*
1209  * Free SECINFO result.
1210  */
1211 /* ARGSUSED */
1212 static void
1213 rfs4_op_secinfo_free(nfs_resop4 *resop)
1214 {
1215         SECINFO4res *resp = &resop->nfs_resop4_u.opsecinfo;
1216         int count, i;
1217         secinfo4 *resok_val;
1218 
1219         /* If this is not an Ok result, nothing to free. */
1220         if (resp->status != NFS4_OK) {
1221                 return;
1222         }
1223 
1224         count = resp->SECINFO4resok_len;
1225         resok_val = resp->SECINFO4resok_val;
1226 
1227         for (i = 0; i < count; i++) {
1228                 if (resok_val[i].flavor == RPCSEC_GSS) {
1229                         rpcsec_gss_info *info;
1230 
1231                         info = &resok_val[i].flavor_info;
1232                         kmem_free(info->oid.sec_oid4_val,
1233                             info->oid.sec_oid4_len);
1234                 }
1235         }
1236         kmem_free(resok_val, count * sizeof (secinfo4));
1237         resp->SECINFO4resok_len = 0;
1238         resp->SECINFO4resok_val = NULL;
1239 }
1240 
1241 /* ARGSUSED */
1242 static void
1243 rfs4_op_access(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1244     struct compound_state *cs)
1245 {
1246         ACCESS4args *args = &argop->nfs_argop4_u.opaccess;
1247         ACCESS4res *resp = &resop->nfs_resop4_u.opaccess;
1248         int error;
1249         vnode_t *vp;
1250         struct vattr va;
1251         int checkwriteperm;
1252         cred_t *cr = cs->cr;
1253         bslabel_t *clabel, *slabel;
1254         ts_label_t *tslabel;
1255         boolean_t admin_low_client;
1256 
1257         DTRACE_NFSV4_2(op__access__start, struct compound_state *, cs,
1258             ACCESS4args *, args);
1259 
1260 #if 0   /* XXX allow access even if !cs->access. Eventually only pseudo fs */
1261         if (cs->access == CS_ACCESS_DENIED) {
1262                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
1263                 goto out;
1264         }
1265 #endif
1266         if (cs->vp == NULL) {
1267                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
1268                 goto out;
1269         }
1270 
1271         ASSERT(cr != NULL);
1272 
1273         vp = cs->vp;
1274 
1275         /*
1276          * If the file system is exported read only, it is not appropriate
1277          * to check write permissions for regular files and directories.
1278          * Special files are interpreted by the client, so the underlying
1279          * permissions are sent back to the client for interpretation.
1280          */
1281         if (rdonly4(req, cs) &&
1282             (vp->v_type == VREG || vp->v_type == VDIR))
1283                 checkwriteperm = 0;
1284         else
1285                 checkwriteperm = 1;
1286 
1287         /*
1288          * XXX
1289          * We need the mode so that we can correctly determine access
1290          * permissions relative to a mandatory lock file.  Access to
1291          * mandatory lock files is denied on the server, so it might
1292          * as well be reflected to the server during the open.
1293          */
1294         va.va_mask = AT_MODE;
1295         error = VOP_GETATTR(vp, &va, 0, cr, NULL);
1296         if (error) {
1297                 *cs->statusp = resp->status = puterrno4(error);
1298                 goto out;
1299         }
1300         resp->access = 0;
1301         resp->supported = 0;
1302 
1303         if (is_system_labeled()) {
1304                 ASSERT(req->rq_label != NULL);
1305                 clabel = req->rq_label;
1306                 DTRACE_PROBE2(tx__rfs4__log__info__opaccess__clabel, char *,
1307                     "got client label from request(1)",
1308                     struct svc_req *, req);
1309                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
1310                         if ((tslabel = nfs_getflabel(vp, cs->exi)) == NULL) {
1311                                 *cs->statusp = resp->status = puterrno4(EACCES);
1312                                 goto out;
1313                         }
1314                         slabel = label2bslabel(tslabel);
1315                         DTRACE_PROBE3(tx__rfs4__log__info__opaccess__slabel,
1316                             char *, "got server label(1) for vp(2)",
1317                             bslabel_t *, slabel, vnode_t *, vp);
1318 
1319                         admin_low_client = B_FALSE;
1320                 } else
1321                         admin_low_client = B_TRUE;
1322         }
1323 
1324         if (args->access & ACCESS4_READ) {
1325                 error = VOP_ACCESS(vp, VREAD, 0, cr, NULL);
1326                 if (!error && !MANDLOCK(vp, va.va_mode) &&
1327                     (!is_system_labeled() || admin_low_client ||
1328                     bldominates(clabel, slabel)))
1329                         resp->access |= ACCESS4_READ;
1330                 resp->supported |= ACCESS4_READ;
1331         }
1332         if ((args->access & ACCESS4_LOOKUP) && vp->v_type == VDIR) {
1333                 error = VOP_ACCESS(vp, VEXEC, 0, cr, NULL);
1334                 if (!error && (!is_system_labeled() || admin_low_client ||
1335                     bldominates(clabel, slabel)))
1336                         resp->access |= ACCESS4_LOOKUP;
1337                 resp->supported |= ACCESS4_LOOKUP;
1338         }
1339         if (checkwriteperm &&
1340             (args->access & (ACCESS4_MODIFY|ACCESS4_EXTEND))) {
1341                 error = VOP_ACCESS(vp, VWRITE, 0, cr, NULL);
1342                 if (!error && !MANDLOCK(vp, va.va_mode) &&
1343                     (!is_system_labeled() || admin_low_client ||
1344                     blequal(clabel, slabel)))
1345                         resp->access |=
1346                             (args->access & (ACCESS4_MODIFY | ACCESS4_EXTEND));
1347                 resp->supported |=
1348                     resp->access & (ACCESS4_MODIFY | ACCESS4_EXTEND);
1349         }
1350 
1351         if (checkwriteperm &&
1352             (args->access & ACCESS4_DELETE) && vp->v_type == VDIR) {
1353                 error = VOP_ACCESS(vp, VWRITE, 0, cr, NULL);
1354                 if (!error && (!is_system_labeled() || admin_low_client ||
1355                     blequal(clabel, slabel)))
1356                         resp->access |= ACCESS4_DELETE;
1357                 resp->supported |= ACCESS4_DELETE;
1358         }
1359         if (args->access & ACCESS4_EXECUTE && vp->v_type != VDIR) {
1360                 error = VOP_ACCESS(vp, VEXEC, 0, cr, NULL);
1361                 if (!error && !MANDLOCK(vp, va.va_mode) &&
1362                     (!is_system_labeled() || admin_low_client ||
1363                     bldominates(clabel, slabel)))
1364                         resp->access |= ACCESS4_EXECUTE;
1365                 resp->supported |= ACCESS4_EXECUTE;
1366         }
1367 
1368         if (is_system_labeled() && !admin_low_client)
1369                 label_rele(tslabel);
1370 
1371         *cs->statusp = resp->status = NFS4_OK;
1372 out:
1373         DTRACE_NFSV4_2(op__access__done, struct compound_state *, cs,
1374             ACCESS4res *, resp);
1375 }
1376 
1377 /* ARGSUSED */
1378 static void
1379 rfs4_op_commit(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1380     struct compound_state *cs)
1381 {
1382         COMMIT4args *args = &argop->nfs_argop4_u.opcommit;
1383         COMMIT4res *resp = &resop->nfs_resop4_u.opcommit;
1384         int error;
1385         vnode_t *vp = cs->vp;
1386         cred_t *cr = cs->cr;
1387         vattr_t va;
1388 
1389         DTRACE_NFSV4_2(op__commit__start, struct compound_state *, cs,
1390             COMMIT4args *, args);
1391 
1392         if (vp == NULL) {
1393                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
1394                 goto out;
1395         }
1396         if (cs->access == CS_ACCESS_DENIED) {
1397                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
1398                 goto out;
1399         }
1400 
1401         if (args->offset + args->count < args->offset) {
1402                 *cs->statusp = resp->status = NFS4ERR_INVAL;
1403                 goto out;
1404         }
1405 
1406         va.va_mask = AT_UID;
1407         error = VOP_GETATTR(vp, &va, 0, cr, NULL);
1408 
1409         /*
1410          * If we can't get the attributes, then we can't do the
1411          * right access checking.  So, we'll fail the request.
1412          */
1413         if (error) {
1414                 *cs->statusp = resp->status = puterrno4(error);
1415                 goto out;
1416         }
1417         if (rdonly4(req, cs)) {
1418                 *cs->statusp = resp->status = NFS4ERR_ROFS;
1419                 goto out;
1420         }
1421 
1422         if (vp->v_type != VREG) {
1423                 if (vp->v_type == VDIR)
1424                         resp->status = NFS4ERR_ISDIR;
1425                 else
1426                         resp->status = NFS4ERR_INVAL;
1427                 *cs->statusp = resp->status;
1428                 goto out;
1429         }
1430 
1431         if (crgetuid(cr) != va.va_uid &&
1432             (error = VOP_ACCESS(vp, VWRITE, 0, cs->cr, NULL))) {
1433                 *cs->statusp = resp->status = puterrno4(error);
1434                 goto out;
1435         }
1436 
1437         error = VOP_FSYNC(vp, FSYNC, cr, NULL);
1438 
1439         if (error) {
1440                 *cs->statusp = resp->status = puterrno4(error);
1441                 goto out;
1442         }
1443 
1444         *cs->statusp = resp->status = NFS4_OK;
1445         resp->writeverf = Write4verf;
1446 out:
1447         DTRACE_NFSV4_2(op__commit__done, struct compound_state *, cs,
1448             COMMIT4res *, resp);
1449 }
1450 
1451 /*
1452  * rfs4_op_mknod is called from rfs4_op_create after all initial verification
1453  * was completed. It does the nfsv4 create for special files.
1454  */
1455 /* ARGSUSED */
1456 static vnode_t *
1457 do_rfs4_op_mknod(CREATE4args *args, CREATE4res *resp, struct svc_req *req,
1458     struct compound_state *cs, vattr_t *vap, char *nm)
1459 {
1460         int error;
1461         cred_t *cr = cs->cr;
1462         vnode_t *dvp = cs->vp;
1463         vnode_t *vp = NULL;
1464         int mode;
1465         enum vcexcl excl;
1466 
1467         switch (args->type) {
1468         case NF4CHR:
1469         case NF4BLK:
1470                 if (secpolicy_sys_devices(cr) != 0) {
1471                         *cs->statusp = resp->status = NFS4ERR_PERM;
1472                         return (NULL);
1473                 }
1474                 if (args->type == NF4CHR)
1475                         vap->va_type = VCHR;
1476                 else
1477                         vap->va_type = VBLK;
1478                 vap->va_rdev = makedevice(args->ftype4_u.devdata.specdata1,
1479                     args->ftype4_u.devdata.specdata2);
1480                 vap->va_mask |= AT_RDEV;
1481                 break;
1482         case NF4SOCK:
1483                 vap->va_type = VSOCK;
1484                 break;
1485         case NF4FIFO:
1486                 vap->va_type = VFIFO;
1487                 break;
1488         default:
1489                 *cs->statusp = resp->status = NFS4ERR_BADTYPE;
1490                 return (NULL);
1491         }
1492 
1493         /*
1494          * Must specify the mode.
1495          */
1496         if (!(vap->va_mask & AT_MODE)) {
1497                 *cs->statusp = resp->status = NFS4ERR_INVAL;
1498                 return (NULL);
1499         }
1500 
1501         excl = EXCL;
1502 
1503         mode = 0;
1504 
1505         error = VOP_CREATE(dvp, nm, vap, excl, mode, &vp, cr, 0, NULL, NULL);
1506         if (error) {
1507                 *cs->statusp = resp->status = puterrno4(error);
1508                 return (NULL);
1509         }
1510         return (vp);
1511 }
1512 
1513 /*
1514  * nfsv4 create is used to create non-regular files. For regular files,
1515  * use nfsv4 open.
1516  */
1517 /* ARGSUSED */
1518 static void
1519 rfs4_op_create(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1520     struct compound_state *cs)
1521 {
1522         CREATE4args *args = &argop->nfs_argop4_u.opcreate;
1523         CREATE4res *resp = &resop->nfs_resop4_u.opcreate;
1524         int error;
1525         struct vattr bva, iva, iva2, ava, *vap;
1526         cred_t *cr = cs->cr;
1527         vnode_t *dvp = cs->vp;
1528         vnode_t *vp = NULL;
1529         vnode_t *realvp;
1530         char *nm, *lnm;
1531         uint_t len, llen;
1532         int syncval = 0;
1533         struct nfs4_svgetit_arg sarg;
1534         struct nfs4_ntov_table ntov;
1535         struct statvfs64 sb;
1536         nfsstat4 status;
1537         struct sockaddr *ca;
1538         char *name = NULL;
1539         char *lname = NULL;
1540 
1541         DTRACE_NFSV4_2(op__create__start, struct compound_state *, cs,
1542             CREATE4args *, args);
1543 
1544         resp->attrset = 0;
1545 
1546         if (dvp == NULL) {
1547                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
1548                 goto out;
1549         }
1550 
1551         /*
1552          * If there is an unshared filesystem mounted on this vnode,
1553          * do not allow to create an object in this directory.
1554          */
1555         if (vn_ismntpt(dvp)) {
1556                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
1557                 goto out;
1558         }
1559 
1560         /* Verify that type is correct */
1561         switch (args->type) {
1562         case NF4LNK:
1563         case NF4BLK:
1564         case NF4CHR:
1565         case NF4SOCK:
1566         case NF4FIFO:
1567         case NF4DIR:
1568                 break;
1569         default:
1570                 *cs->statusp = resp->status = NFS4ERR_BADTYPE;
1571                 goto out;
1572         };
1573 
1574         if (cs->access == CS_ACCESS_DENIED) {
1575                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
1576                 goto out;
1577         }
1578         if (dvp->v_type != VDIR) {
1579                 *cs->statusp = resp->status = NFS4ERR_NOTDIR;
1580                 goto out;
1581         }
1582         status = utf8_dir_verify(&args->objname);
1583         if (status != NFS4_OK) {
1584                 *cs->statusp = resp->status = status;
1585                 goto out;
1586         }
1587 
1588         if (rdonly4(req, cs)) {
1589                 *cs->statusp = resp->status = NFS4ERR_ROFS;
1590                 goto out;
1591         }
1592 
1593         /*
1594          * Name of newly created object
1595          */
1596         nm = utf8_to_fn(&args->objname, &len, NULL);
1597         if (nm == NULL) {
1598                 *cs->statusp = resp->status = NFS4ERR_INVAL;
1599                 goto out;
1600         }
1601 
1602         if (len > MAXNAMELEN) {
1603                 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
1604                 kmem_free(nm, len);
1605                 goto out;
1606         }
1607 
1608         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
1609         name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
1610             MAXPATHLEN  + 1);
1611 
1612         if (name == NULL) {
1613                 *cs->statusp = resp->status = NFS4ERR_INVAL;
1614                 kmem_free(nm, len);
1615                 goto out;
1616         }
1617 
1618         resp->attrset = 0;
1619 
1620         sarg.sbp = &sb;
1621         sarg.is_referral = B_FALSE;
1622         nfs4_ntov_table_init(&ntov);
1623 
1624         status = do_rfs4_set_attrs(&resp->attrset,
1625             &args->createattrs, cs, &sarg, &ntov, NFS4ATTR_SETIT);
1626 
1627         if (sarg.vap->va_mask == 0 && status == NFS4_OK)
1628                 status = NFS4ERR_INVAL;
1629 
1630         if (status != NFS4_OK) {
1631                 *cs->statusp = resp->status = status;
1632                 if (name != nm)
1633                         kmem_free(name, MAXPATHLEN + 1);
1634                 kmem_free(nm, len);
1635                 nfs4_ntov_table_free(&ntov, &sarg);
1636                 resp->attrset = 0;
1637                 goto out;
1638         }
1639 
1640         /* Get "before" change value */
1641         bva.va_mask = AT_CTIME|AT_SEQ|AT_MODE;
1642         error = VOP_GETATTR(dvp, &bva, 0, cr, NULL);
1643         if (error) {
1644                 *cs->statusp = resp->status = puterrno4(error);
1645                 if (name != nm)
1646                         kmem_free(name, MAXPATHLEN + 1);
1647                 kmem_free(nm, len);
1648                 nfs4_ntov_table_free(&ntov, &sarg);
1649                 resp->attrset = 0;
1650                 goto out;
1651         }
1652         NFS4_SET_FATTR4_CHANGE(resp->cinfo.before, bva.va_ctime)
1653 
1654         vap = sarg.vap;
1655 
1656         /*
1657          * Set the default initial values for attributes when the parent
1658          * directory does not have the VSUID/VSGID bit set and they have
1659          * not been specified in createattrs.
1660          */
1661         if (!(bva.va_mode & VSUID) && (vap->va_mask & AT_UID) == 0) {
1662                 vap->va_uid = crgetuid(cr);
1663                 vap->va_mask |= AT_UID;
1664         }
1665         if (!(bva.va_mode & VSGID) && (vap->va_mask & AT_GID) == 0) {
1666                 vap->va_gid = crgetgid(cr);
1667                 vap->va_mask |= AT_GID;
1668         }
1669 
1670         vap->va_mask |= AT_TYPE;
1671         switch (args->type) {
1672         case NF4DIR:
1673                 vap->va_type = VDIR;
1674                 if ((vap->va_mask & AT_MODE) == 0) {
1675                         vap->va_mode = 0700; /* default: owner rwx only */
1676                         vap->va_mask |= AT_MODE;
1677                 }
1678                 error = VOP_MKDIR(dvp, name, vap, &vp, cr, NULL, 0, NULL);
1679                 if (error)
1680                         break;
1681 
1682                 /*
1683                  * Get the initial "after" sequence number, if it fails,
1684                  * set to zero
1685                  */
1686                 iva.va_mask = AT_SEQ;
1687                 if (VOP_GETATTR(dvp, &iva, 0, cs->cr, NULL))
1688                         iva.va_seq = 0;
1689                 break;
1690         case NF4LNK:
1691                 vap->va_type = VLNK;
1692                 if ((vap->va_mask & AT_MODE) == 0) {
1693                         vap->va_mode = 0700; /* default: owner rwx only */
1694                         vap->va_mask |= AT_MODE;
1695                 }
1696 
1697                 /*
1698                  * symlink names must be treated as data
1699                  */
1700                 lnm = utf8_to_str((utf8string *)&args->ftype4_u.linkdata,
1701                     &llen, NULL);
1702 
1703                 if (lnm == NULL) {
1704                         *cs->statusp = resp->status = NFS4ERR_INVAL;
1705                         if (name != nm)
1706                                 kmem_free(name, MAXPATHLEN + 1);
1707                         kmem_free(nm, len);
1708                         nfs4_ntov_table_free(&ntov, &sarg);
1709                         resp->attrset = 0;
1710                         goto out;
1711                 }
1712 
1713                 if (llen > MAXPATHLEN) {
1714                         *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
1715                         if (name != nm)
1716                                 kmem_free(name, MAXPATHLEN + 1);
1717                         kmem_free(nm, len);
1718                         kmem_free(lnm, llen);
1719                         nfs4_ntov_table_free(&ntov, &sarg);
1720                         resp->attrset = 0;
1721                         goto out;
1722                 }
1723 
1724                 lname = nfscmd_convname(ca, cs->exi, lnm,
1725                     NFSCMD_CONV_INBOUND, MAXPATHLEN  + 1);
1726 
1727                 if (lname == NULL) {
1728                         *cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
1729                         if (name != nm)
1730                                 kmem_free(name, MAXPATHLEN + 1);
1731                         kmem_free(nm, len);
1732                         kmem_free(lnm, llen);
1733                         nfs4_ntov_table_free(&ntov, &sarg);
1734                         resp->attrset = 0;
1735                         goto out;
1736                 }
1737 
1738                 error = VOP_SYMLINK(dvp, name, vap, lname, cr, NULL, 0);
1739                 if (lname != lnm)
1740                         kmem_free(lname, MAXPATHLEN + 1);
1741                 kmem_free(lnm, llen);
1742                 if (error)
1743                         break;
1744 
1745                 /*
1746                  * Get the initial "after" sequence number, if it fails,
1747                  * set to zero
1748                  */
1749                 iva.va_mask = AT_SEQ;
1750                 if (VOP_GETATTR(dvp, &iva, 0, cs->cr, NULL))
1751                         iva.va_seq = 0;
1752 
1753                 error = VOP_LOOKUP(dvp, name, &vp, NULL, 0, NULL, cr,
1754                     NULL, NULL, NULL);
1755                 if (error)
1756                         break;
1757 
1758                 /*
1759                  * va_seq is not safe over VOP calls, check it again
1760                  * if it has changed zero out iva to force atomic = FALSE.
1761                  */
1762                 iva2.va_mask = AT_SEQ;
1763                 if (VOP_GETATTR(dvp, &iva2, 0, cs->cr, NULL) ||
1764                     iva2.va_seq != iva.va_seq)
1765                         iva.va_seq = 0;
1766                 break;
1767         default:
1768                 /*
1769                  * probably a special file.
1770                  */
1771                 if ((vap->va_mask & AT_MODE) == 0) {
1772                         vap->va_mode = 0600; /* default: owner rw only */
1773                         vap->va_mask |= AT_MODE;
1774                 }
1775                 syncval = FNODSYNC;
1776                 /*
1777                  * We know this will only generate one VOP call
1778                  */
1779                 vp = do_rfs4_op_mknod(args, resp, req, cs, vap, name);
1780 
1781                 if (vp == NULL) {
1782                         if (name != nm)
1783                                 kmem_free(name, MAXPATHLEN + 1);
1784                         kmem_free(nm, len);
1785                         nfs4_ntov_table_free(&ntov, &sarg);
1786                         resp->attrset = 0;
1787                         goto out;
1788                 }
1789 
1790                 /*
1791                  * Get the initial "after" sequence number, if it fails,
1792                  * set to zero
1793                  */
1794                 iva.va_mask = AT_SEQ;
1795                 if (VOP_GETATTR(dvp, &iva, 0, cs->cr, NULL))
1796                         iva.va_seq = 0;
1797 
1798                 break;
1799         }
1800         if (name != nm)
1801                 kmem_free(name, MAXPATHLEN + 1);
1802         kmem_free(nm, len);
1803 
1804         if (error) {
1805                 *cs->statusp = resp->status = puterrno4(error);
1806         }
1807 
1808         /*
1809          * Force modified data and metadata out to stable storage.
1810          */
1811         (void) VOP_FSYNC(dvp, 0, cr, NULL);
1812 
1813         if (resp->status != NFS4_OK) {
1814                 if (vp != NULL)
1815                         VN_RELE(vp);
1816                 nfs4_ntov_table_free(&ntov, &sarg);
1817                 resp->attrset = 0;
1818                 goto out;
1819         }
1820 
1821         /*
1822          * Finish setup of cinfo response, "before" value already set.
1823          * Get "after" change value, if it fails, simply return the
1824          * before value.
1825          */
1826         ava.va_mask = AT_CTIME|AT_SEQ;
1827         if (VOP_GETATTR(dvp, &ava, 0, cr, NULL)) {
1828                 ava.va_ctime = bva.va_ctime;
1829                 ava.va_seq = 0;
1830         }
1831         NFS4_SET_FATTR4_CHANGE(resp->cinfo.after, ava.va_ctime);
1832 
1833         /*
1834          * True verification that object was created with correct
1835          * attrs is impossible.  The attrs could have been changed
1836          * immediately after object creation.  If attributes did
1837          * not verify, the only recourse for the server is to
1838          * destroy the object.  Maybe if some attrs (like gid)
1839          * are set incorrectly, the object should be destroyed;
1840          * however, seems bad as a default policy.  Do we really
1841          * want to destroy an object over one of the times not
1842          * verifying correctly?  For these reasons, the server
1843          * currently sets bits in attrset for createattrs
1844          * that were set; however, no verification is done.
1845          *
1846          * vmask_to_nmask accounts for vattr bits set on create
1847          *      [do_rfs4_set_attrs() only sets resp bits for
1848          *       non-vattr/vfs bits.]
1849          * Mask off any bits set by default so as not to return
1850          * more attrset bits than were requested in createattrs
1851          */
1852         nfs4_vmask_to_nmask(sarg.vap->va_mask, &resp->attrset);
1853         resp->attrset &= args->createattrs.attrmask;
1854         nfs4_ntov_table_free(&ntov, &sarg);
1855 
1856         error = makefh4(&cs->fh, vp, cs->exi);
1857         if (error) {
1858                 *cs->statusp = resp->status = puterrno4(error);
1859         }
1860 
1861         /*
1862          * The cinfo.atomic = TRUE only if we got no errors, we have
1863          * non-zero va_seq's, and it has incremented by exactly one
1864          * during the creation and it didn't change during the VOP_LOOKUP
1865          * or VOP_FSYNC.
1866          */
1867         if (!error && bva.va_seq && iva.va_seq && ava.va_seq &&
1868             iva.va_seq == (bva.va_seq + 1) && iva.va_seq == ava.va_seq)
1869                 resp->cinfo.atomic = TRUE;
1870         else
1871                 resp->cinfo.atomic = FALSE;
1872 
1873         /*
1874          * Force modified metadata out to stable storage.
1875          *
1876          * if a underlying vp exists, pass it to VOP_FSYNC
1877          */
1878         if (VOP_REALVP(vp, &realvp, NULL) == 0)
1879                 (void) VOP_FSYNC(realvp, syncval, cr, NULL);
1880         else
1881                 (void) VOP_FSYNC(vp, syncval, cr, NULL);
1882 
1883         if (resp->status != NFS4_OK) {
1884                 VN_RELE(vp);
1885                 goto out;
1886         }
1887         if (cs->vp)
1888                 VN_RELE(cs->vp);
1889 
1890         cs->vp = vp;
1891         *cs->statusp = resp->status = NFS4_OK;
1892 out:
1893         DTRACE_NFSV4_2(op__create__done, struct compound_state *, cs,
1894             CREATE4res *, resp);
1895 }
1896 
1897 /*ARGSUSED*/
1898 static void
1899 rfs4_op_delegpurge(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1900     struct compound_state *cs)
1901 {
1902         DTRACE_NFSV4_2(op__delegpurge__start, struct compound_state *, cs,
1903             DELEGPURGE4args *, &argop->nfs_argop4_u.opdelegpurge);
1904 
1905         rfs4_op_inval(argop, resop, req, cs);
1906 
1907         DTRACE_NFSV4_2(op__delegpurge__done, struct compound_state *, cs,
1908             DELEGPURGE4res *, &resop->nfs_resop4_u.opdelegpurge);
1909 }
1910 
1911 /*ARGSUSED*/
1912 static void
1913 rfs4_op_delegreturn(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1914     struct compound_state *cs)
1915 {
1916         DELEGRETURN4args *args = &argop->nfs_argop4_u.opdelegreturn;
1917         DELEGRETURN4res *resp = &resop->nfs_resop4_u.opdelegreturn;
1918         rfs4_deleg_state_t *dsp;
1919         nfsstat4 status;
1920 
1921         DTRACE_NFSV4_2(op__delegreturn__start, struct compound_state *, cs,
1922             DELEGRETURN4args *, args);
1923 
1924         status = rfs4_get_deleg_state(&args->deleg_stateid, &dsp);
1925         resp->status = *cs->statusp = status;
1926         if (status != NFS4_OK)
1927                 goto out;
1928 
1929         /* Ensure specified filehandle matches */
1930         if (cs->vp != dsp->rds_finfo->rf_vp) {
1931                 resp->status = *cs->statusp = NFS4ERR_BAD_STATEID;
1932         } else
1933                 rfs4_return_deleg(dsp, FALSE);
1934 
1935         rfs4_update_lease(dsp->rds_client);
1936 
1937         rfs4_deleg_state_rele(dsp);
1938 out:
1939         DTRACE_NFSV4_2(op__delegreturn__done, struct compound_state *, cs,
1940             DELEGRETURN4res *, resp);
1941 }
1942 
1943 /*
1944  * Check to see if a given "flavor" is an explicitly shared flavor.
1945  * The assumption of this routine is the "flavor" is already a valid
1946  * flavor in the secinfo list of "exi".
1947  *
1948  *      e.g.
1949  *              # share -o sec=flavor1 /export
1950  *              # share -o sec=flavor2 /export/home
1951  *
1952  *              flavor2 is not an explicitly shared flavor for /export,
1953  *              however it is in the secinfo list for /export thru the
1954  *              server namespace setup.
1955  */
1956 int
1957 is_exported_sec(int flavor, struct exportinfo *exi)
1958 {
1959         int     i;
1960         struct secinfo *sp;
1961 
1962         sp = exi->exi_export.ex_secinfo;
1963         for (i = 0; i < exi->exi_export.ex_seccnt; i++) {
1964                 if (flavor == sp[i].s_secinfo.sc_nfsnum ||
1965                     sp[i].s_secinfo.sc_nfsnum == AUTH_NONE) {
1966                         return (SEC_REF_EXPORTED(&sp[i]));
1967                 }
1968         }
1969 
1970         /* Should not reach this point based on the assumption */
1971         return (0);
1972 }
1973 
1974 /*
1975  * Check if the security flavor used in the request matches what is
1976  * required at the export point or at the root pseudo node (exi_root).
1977  *
1978  * returns 1 if there's a match or if exported with AUTH_NONE; 0 otherwise.
1979  *
1980  */
1981 static int
1982 secinfo_match_or_authnone(struct compound_state *cs)
1983 {
1984         int     i;
1985         struct secinfo *sp;
1986 
1987         /*
1988          * Check cs->nfsflavor (from the request) against
1989          * the current export data in cs->exi.
1990          */
1991         sp = cs->exi->exi_export.ex_secinfo;
1992         for (i = 0; i < cs->exi->exi_export.ex_seccnt; i++) {
1993                 if (cs->nfsflavor == sp[i].s_secinfo.sc_nfsnum ||
1994                     sp[i].s_secinfo.sc_nfsnum == AUTH_NONE)
1995                         return (1);
1996         }
1997 
1998         return (0);
1999 }
2000 
2001 /*
2002  * Check the access authority for the client and return the correct error.
2003  */
2004 nfsstat4
2005 call_checkauth4(struct compound_state *cs, struct svc_req *req)
2006 {
2007         int     authres;
2008 
2009         /*
2010          * First, check if the security flavor used in the request
2011          * are among the flavors set in the server namespace.
2012          */
2013         if (!secinfo_match_or_authnone(cs)) {
2014                 *cs->statusp = NFS4ERR_WRONGSEC;
2015                 return (*cs->statusp);
2016         }
2017 
2018         authres = checkauth4(cs, req);
2019 
2020         if (authres > 0) {
2021                 *cs->statusp = NFS4_OK;
2022                 if (! (cs->access & CS_ACCESS_LIMITED))
2023                         cs->access = CS_ACCESS_OK;
2024         } else if (authres == 0) {
2025                 *cs->statusp = NFS4ERR_ACCESS;
2026         } else if (authres == -2) {
2027                 *cs->statusp = NFS4ERR_WRONGSEC;
2028         } else {
2029                 *cs->statusp = NFS4ERR_DELAY;
2030         }
2031         return (*cs->statusp);
2032 }
2033 
2034 /*
2035  * bitmap4_to_attrmask is called by getattr and readdir.
2036  * It sets up the vattr mask and determines whether vfsstat call is needed
2037  * based on the input bitmap.
2038  * Returns nfsv4 status.
2039  */
2040 static nfsstat4
2041 bitmap4_to_attrmask(bitmap4 breq, struct nfs4_svgetit_arg *sargp)
2042 {
2043         int i;
2044         uint_t  va_mask;
2045         struct statvfs64 *sbp = sargp->sbp;
2046 
2047         sargp->sbp = NULL;
2048         sargp->flag = 0;
2049         sargp->rdattr_error = NFS4_OK;
2050         sargp->mntdfid_set = FALSE;
2051         if (sargp->cs->vp)
2052                 sargp->xattr = get_fh4_flag(&sargp->cs->fh,
2053                     FH4_ATTRDIR | FH4_NAMEDATTR);
2054         else
2055                 sargp->xattr = 0;
2056 
2057         /*
2058          * Set rdattr_error_req to true if return error per
2059          * failed entry rather than fail the readdir.
2060          */
2061         if (breq & FATTR4_RDATTR_ERROR_MASK)
2062                 sargp->rdattr_error_req = 1;
2063         else
2064                 sargp->rdattr_error_req = 0;
2065 
2066         /*
2067          * generate the va_mask
2068          * Handle the easy cases first
2069          */
2070         switch (breq) {
2071         case NFS4_NTOV_ATTR_MASK:
2072                 sargp->vap->va_mask = NFS4_NTOV_ATTR_AT_MASK;
2073                 return (NFS4_OK);
2074 
2075         case NFS4_FS_ATTR_MASK:
2076                 sargp->vap->va_mask = NFS4_FS_ATTR_AT_MASK;
2077                 sargp->sbp = sbp;
2078                 return (NFS4_OK);
2079 
2080         case NFS4_NTOV_ATTR_CACHE_MASK:
2081                 sargp->vap->va_mask = NFS4_NTOV_ATTR_CACHE_AT_MASK;
2082                 return (NFS4_OK);
2083 
2084         case FATTR4_LEASE_TIME_MASK:
2085                 sargp->vap->va_mask = 0;
2086                 return (NFS4_OK);
2087 
2088         default:
2089                 va_mask = 0;
2090                 for (i = 0; i < nfs4_ntov_map_size; i++) {
2091                         if ((breq & nfs4_ntov_map[i].fbit) &&
2092                             nfs4_ntov_map[i].vbit)
2093                                 va_mask |= nfs4_ntov_map[i].vbit;
2094                 }
2095 
2096                 /*
2097                  * Check is vfsstat is needed
2098                  */
2099                 if (breq & NFS4_FS_ATTR_MASK)
2100                         sargp->sbp = sbp;
2101 
2102                 sargp->vap->va_mask = va_mask;
2103                 return (NFS4_OK);
2104         }
2105         /* NOTREACHED */
2106 }
2107 
2108 /*
2109  * bitmap4_get_sysattrs is called by getattr and readdir.
2110  * It calls both VOP_GETATTR and VFS_STATVFS calls to get the attrs.
2111  * Returns nfsv4 status.
2112  */
2113 static nfsstat4
2114 bitmap4_get_sysattrs(struct nfs4_svgetit_arg *sargp)
2115 {
2116         int error;
2117         struct compound_state *cs = sargp->cs;
2118         vnode_t *vp = cs->vp;
2119 
2120         if (sargp->sbp != NULL) {
2121                 if (error = VFS_STATVFS(vp->v_vfsp, sargp->sbp)) {
2122                         sargp->sbp = NULL;   /* to identify error */
2123                         return (puterrno4(error));
2124                 }
2125         }
2126 
2127         return (rfs4_vop_getattr(vp, sargp->vap, 0, cs->cr));
2128 }
2129 
2130 static void
2131 nfs4_ntov_table_init(struct nfs4_ntov_table *ntovp)
2132 {
2133         ntovp->na = kmem_zalloc(sizeof (union nfs4_attr_u) * nfs4_ntov_map_size,
2134             KM_SLEEP);
2135         ntovp->attrcnt = 0;
2136         ntovp->vfsstat = FALSE;
2137 }
2138 
2139 static void
2140 nfs4_ntov_table_free(struct nfs4_ntov_table *ntovp,
2141     struct nfs4_svgetit_arg *sargp)
2142 {
2143         int i;
2144         union nfs4_attr_u *na;
2145         uint8_t *amap;
2146 
2147         /*
2148          * XXX Should do the same checks for whether the bit is set
2149          */
2150         for (i = 0, na = ntovp->na, amap = ntovp->amap;
2151             i < ntovp->attrcnt; i++, na++, amap++) {
2152                 (void) (*nfs4_ntov_map[*amap].sv_getit)(
2153                     NFS4ATTR_FREEIT, sargp, na);
2154         }
2155         if ((sargp->op == NFS4ATTR_SETIT) || (sargp->op == NFS4ATTR_VERIT)) {
2156                 /*
2157                  * xdr_free for getattr will be done later
2158                  */
2159                 for (i = 0, na = ntovp->na, amap = ntovp->amap;
2160                     i < ntovp->attrcnt; i++, na++, amap++) {
2161                         xdr_free(nfs4_ntov_map[*amap].xfunc, (caddr_t)na);
2162                 }
2163         }
2164         kmem_free(ntovp->na, sizeof (union nfs4_attr_u) * nfs4_ntov_map_size);
2165 }
2166 
2167 /*
2168  * do_rfs4_op_getattr gets the system attrs and converts into fattr4.
2169  */
2170 static nfsstat4
2171 do_rfs4_op_getattr(bitmap4 breq, fattr4 *fattrp,
2172     struct nfs4_svgetit_arg *sargp)
2173 {
2174         int error = 0;
2175         int i, k;
2176         struct nfs4_ntov_table ntov;
2177         XDR xdr;
2178         ulong_t xdr_size;
2179         char *xdr_attrs;
2180         nfsstat4 status = NFS4_OK;
2181         nfsstat4 prev_rdattr_error = sargp->rdattr_error;
2182         union nfs4_attr_u *na;
2183         uint8_t *amap;
2184 
2185         sargp->op = NFS4ATTR_GETIT;
2186         sargp->flag = 0;
2187 
2188         fattrp->attrmask = 0;
2189         /* if no bits requested, then return empty fattr4 */
2190         if (breq == 0) {
2191                 fattrp->attrlist4_len = 0;
2192                 fattrp->attrlist4 = NULL;
2193                 return (NFS4_OK);
2194         }
2195 
2196         /*
2197          * return NFS4ERR_INVAL when client requests write-only attrs
2198          */
2199         if (breq & (FATTR4_TIME_ACCESS_SET_MASK | FATTR4_TIME_MODIFY_SET_MASK))
2200                 return (NFS4ERR_INVAL);
2201 
2202         nfs4_ntov_table_init(&ntov);
2203         na = ntov.na;
2204         amap = ntov.amap;
2205 
2206         /*
2207          * Now loop to get or verify the attrs
2208          */
2209         for (i = 0; i < nfs4_ntov_map_size; i++) {
2210                 if (breq & nfs4_ntov_map[i].fbit) {
2211                         if ((*nfs4_ntov_map[i].sv_getit)(
2212                             NFS4ATTR_SUPPORTED, sargp, NULL) == 0) {
2213 
2214                                 error = (*nfs4_ntov_map[i].sv_getit)(
2215                                     NFS4ATTR_GETIT, sargp, na);
2216 
2217                                 /*
2218                                  * Possible error values:
2219                                  * >0 if sv_getit failed to
2220                                  * get the attr; 0 if succeeded;
2221                                  * <0 if rdattr_error and the
2222                                  * attribute cannot be returned.
2223                                  */
2224                                 if (error && !(sargp->rdattr_error_req))
2225                                         goto done;
2226                                 /*
2227                                  * If error then just for entry
2228                                  */
2229                                 if (error == 0) {
2230                                         fattrp->attrmask |=
2231                                             nfs4_ntov_map[i].fbit;
2232                                         *amap++ =
2233                                             (uint8_t)nfs4_ntov_map[i].nval;
2234                                         na++;
2235                                         (ntov.attrcnt)++;
2236                                 } else if ((error > 0) &&
2237                                     (sargp->rdattr_error == NFS4_OK)) {
2238                                         sargp->rdattr_error = puterrno4(error);
2239                                 }
2240                                 error = 0;
2241                         }
2242                 }
2243         }
2244 
2245         /*
2246          * If rdattr_error was set after the return value for it was assigned,
2247          * update it.
2248          */
2249         if (prev_rdattr_error != sargp->rdattr_error) {
2250                 na = ntov.na;
2251                 amap = ntov.amap;
2252                 for (i = 0; i < ntov.attrcnt; i++, na++, amap++) {
2253                         k = *amap;
2254                         if (k < FATTR4_RDATTR_ERROR) {
2255                                 continue;
2256                         }
2257                         if ((k == FATTR4_RDATTR_ERROR) &&
2258                             ((*nfs4_ntov_map[k].sv_getit)(
2259                             NFS4ATTR_SUPPORTED, sargp, NULL) == 0)) {
2260 
2261                                 (void) (*nfs4_ntov_map[k].sv_getit)(
2262                                     NFS4ATTR_GETIT, sargp, na);
2263                         }
2264                         break;
2265                 }
2266         }
2267 
2268         xdr_size = 0;
2269         na = ntov.na;
2270         amap = ntov.amap;
2271         for (i = 0; i < ntov.attrcnt; i++, na++, amap++) {
2272                 xdr_size += xdr_sizeof(nfs4_ntov_map[*amap].xfunc, na);
2273         }
2274 
2275         fattrp->attrlist4_len = xdr_size;
2276         if (xdr_size) {
2277                 /* freed by rfs4_op_getattr_free() */
2278                 fattrp->attrlist4 = xdr_attrs = kmem_zalloc(xdr_size, KM_SLEEP);
2279 
2280                 xdrmem_create(&xdr, xdr_attrs, xdr_size, XDR_ENCODE);
2281 
2282                 na = ntov.na;
2283                 amap = ntov.amap;
2284                 for (i = 0; i < ntov.attrcnt; i++, na++, amap++) {
2285                         if (!(*nfs4_ntov_map[*amap].xfunc)(&xdr, na)) {
2286                                 DTRACE_PROBE1(nfss__e__getattr4_encfail,
2287                                     int, *amap);
2288                                 status = NFS4ERR_SERVERFAULT;
2289                                 break;
2290                         }
2291                 }
2292                 /* xdrmem_destroy(&xdrs); */        /* NO-OP */
2293         } else {
2294                 fattrp->attrlist4 = NULL;
2295         }
2296 done:
2297 
2298         nfs4_ntov_table_free(&ntov, sargp);
2299 
2300         if (error != 0)
2301                 status = puterrno4(error);
2302 
2303         return (status);
2304 }
2305 
2306 /* ARGSUSED */
2307 static void
2308 rfs4_op_getattr(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
2309     struct compound_state *cs)
2310 {
2311         GETATTR4args *args = &argop->nfs_argop4_u.opgetattr;
2312         GETATTR4res *resp = &resop->nfs_resop4_u.opgetattr;
2313         struct nfs4_svgetit_arg sarg;
2314         struct statvfs64 sb;
2315         nfsstat4 status;
2316 
2317         DTRACE_NFSV4_2(op__getattr__start, struct compound_state *, cs,
2318             GETATTR4args *, args);
2319 
2320         if (cs->vp == NULL) {
2321                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2322                 goto out;
2323         }
2324 
2325         if (cs->access == CS_ACCESS_DENIED) {
2326                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
2327                 goto out;
2328         }
2329 
2330         sarg.sbp = &sb;
2331         sarg.cs = cs;
2332         sarg.is_referral = B_FALSE;
2333 
2334         status = bitmap4_to_attrmask(args->attr_request, &sarg);
2335         if (status == NFS4_OK) {
2336 
2337                 status = bitmap4_get_sysattrs(&sarg);
2338                 if (status == NFS4_OK) {
2339 
2340                         /* Is this a referral? */
2341                         if (vn_is_nfs_reparse(cs->vp, cs->cr)) {
2342                                 /* Older V4 Solaris client sees a link */
2343                                 if (client_is_downrev(req))
2344                                         sarg.vap->va_type = VLNK;
2345                                 else
2346                                         sarg.is_referral = B_TRUE;
2347                         }
2348 
2349                         status = do_rfs4_op_getattr(args->attr_request,
2350                             &resp->obj_attributes, &sarg);
2351                 }
2352         }
2353         *cs->statusp = resp->status = status;
2354 out:
2355         DTRACE_NFSV4_2(op__getattr__done, struct compound_state *, cs,
2356             GETATTR4res *, resp);
2357 }
2358 
2359 static void
2360 rfs4_op_getattr_free(nfs_resop4 *resop)
2361 {
2362         GETATTR4res *resp = &resop->nfs_resop4_u.opgetattr;
2363 
2364         nfs4_fattr4_free(&resp->obj_attributes);
2365 }
2366 
2367 /* ARGSUSED */
2368 static void
2369 rfs4_op_getfh(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
2370     struct compound_state *cs)
2371 {
2372         GETFH4res *resp = &resop->nfs_resop4_u.opgetfh;
2373 
2374         DTRACE_NFSV4_1(op__getfh__start, struct compound_state *, cs);
2375 
2376         if (cs->vp == NULL) {
2377                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2378                 goto out;
2379         }
2380         if (cs->access == CS_ACCESS_DENIED) {
2381                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
2382                 goto out;
2383         }
2384 
2385         /* check for reparse point at the share point */
2386         if (cs->exi->exi_moved || vn_is_nfs_reparse(cs->exi->exi_vp, cs->cr)) {
2387                 /* it's all bad */
2388                 cs->exi->exi_moved = 1;
2389                 *cs->statusp = resp->status = NFS4ERR_MOVED;
2390                 DTRACE_PROBE2(nfs4serv__func__referral__shared__moved,
2391                     vnode_t *, cs->vp, char *, "rfs4_op_getfh");
2392                 return;
2393         }
2394 
2395         /* check for reparse point at vp */
2396         if (vn_is_nfs_reparse(cs->vp, cs->cr) && !client_is_downrev(req)) {
2397                 /* it's not all bad */
2398                 *cs->statusp = resp->status = NFS4ERR_MOVED;
2399                 DTRACE_PROBE2(nfs4serv__func__referral__moved,
2400                     vnode_t *, cs->vp, char *, "rfs4_op_getfh");
2401                 return;
2402         }
2403 
2404         resp->object.nfs_fh4_val =
2405             kmem_alloc(cs->fh.nfs_fh4_len, KM_SLEEP);
2406         nfs_fh4_copy(&cs->fh, &resp->object);
2407         *cs->statusp = resp->status = NFS4_OK;
2408 out:
2409         DTRACE_NFSV4_2(op__getfh__done, struct compound_state *, cs,
2410             GETFH4res *, resp);
2411 }
2412 
2413 static void
2414 rfs4_op_getfh_free(nfs_resop4 *resop)
2415 {
2416         GETFH4res *resp = &resop->nfs_resop4_u.opgetfh;
2417 
2418         if (resp->status == NFS4_OK &&
2419             resp->object.nfs_fh4_val != NULL) {
2420                 kmem_free(resp->object.nfs_fh4_val, resp->object.nfs_fh4_len);
2421                 resp->object.nfs_fh4_val = NULL;
2422                 resp->object.nfs_fh4_len = 0;
2423         }
2424 }
2425 
2426 /*
2427  * illegal: args: void
2428  *          res : status (NFS4ERR_OP_ILLEGAL)
2429  */
2430 /* ARGSUSED */
2431 static void
2432 rfs4_op_illegal(nfs_argop4 *argop, nfs_resop4 *resop,
2433     struct svc_req *req, struct compound_state *cs)
2434 {
2435         ILLEGAL4res *resp = &resop->nfs_resop4_u.opillegal;
2436 
2437         resop->resop = OP_ILLEGAL;
2438         *cs->statusp = resp->status = NFS4ERR_OP_ILLEGAL;
2439 }
2440 
2441 /*
2442  * link: args: SAVED_FH: file, CURRENT_FH: target directory
2443  *       res: status. If success - CURRENT_FH unchanged, return change_info
2444  */
2445 /* ARGSUSED */
2446 static void
2447 rfs4_op_link(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
2448     struct compound_state *cs)
2449 {
2450         LINK4args *args = &argop->nfs_argop4_u.oplink;
2451         LINK4res *resp = &resop->nfs_resop4_u.oplink;
2452         int error;
2453         vnode_t *vp;
2454         vnode_t *dvp;
2455         struct vattr bdva, idva, adva;
2456         char *nm;
2457         uint_t  len;
2458         struct sockaddr *ca;
2459         char *name = NULL;
2460         nfsstat4 status;
2461 
2462         DTRACE_NFSV4_2(op__link__start, struct compound_state *, cs,
2463             LINK4args *, args);
2464 
2465         /* SAVED_FH: source object */
2466         vp = cs->saved_vp;
2467         if (vp == NULL) {
2468                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2469                 goto out;
2470         }
2471 
2472         /* CURRENT_FH: target directory */
2473         dvp = cs->vp;
2474         if (dvp == NULL) {
2475                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2476                 goto out;
2477         }
2478 
2479         /*
2480          * If there is a non-shared filesystem mounted on this vnode,
2481          * do not allow to link any file in this directory.
2482          */
2483         if (vn_ismntpt(dvp)) {
2484                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
2485                 goto out;
2486         }
2487 
2488         if (cs->access == CS_ACCESS_DENIED) {
2489                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
2490                 goto out;
2491         }
2492 
2493         /* Check source object's type validity */
2494         if (vp->v_type == VDIR) {
2495                 *cs->statusp = resp->status = NFS4ERR_ISDIR;
2496                 goto out;
2497         }
2498 
2499         /* Check target directory's type */
2500         if (dvp->v_type != VDIR) {
2501                 *cs->statusp = resp->status = NFS4ERR_NOTDIR;
2502                 goto out;
2503         }
2504 
2505         if (cs->saved_exi != cs->exi) {
2506                 *cs->statusp = resp->status = NFS4ERR_XDEV;
2507                 goto out;
2508         }
2509 
2510         status = utf8_dir_verify(&args->newname);
2511         if (status != NFS4_OK) {
2512                 *cs->statusp = resp->status = status;
2513                 goto out;
2514         }
2515 
2516         nm = utf8_to_fn(&args->newname, &len, NULL);
2517         if (nm == NULL) {
2518                 *cs->statusp = resp->status = NFS4ERR_INVAL;
2519                 goto out;
2520         }
2521 
2522         if (len > MAXNAMELEN) {
2523                 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
2524                 kmem_free(nm, len);
2525                 goto out;
2526         }
2527 
2528         if (rdonly4(req, cs)) {
2529                 *cs->statusp = resp->status = NFS4ERR_ROFS;
2530                 kmem_free(nm, len);
2531                 goto out;
2532         }
2533 
2534         /* Get "before" change value */
2535         bdva.va_mask = AT_CTIME|AT_SEQ;
2536         error = VOP_GETATTR(dvp, &bdva, 0, cs->cr, NULL);
2537         if (error) {
2538                 *cs->statusp = resp->status = puterrno4(error);
2539                 kmem_free(nm, len);
2540                 goto out;
2541         }
2542 
2543         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2544         name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
2545             MAXPATHLEN  + 1);
2546 
2547         if (name == NULL) {
2548                 *cs->statusp = resp->status = NFS4ERR_INVAL;
2549                 kmem_free(nm, len);
2550                 goto out;
2551         }
2552 
2553         NFS4_SET_FATTR4_CHANGE(resp->cinfo.before, bdva.va_ctime)
2554 
2555         error = VOP_LINK(dvp, vp, name, cs->cr, NULL, 0);
2556 
2557         if (nm != name)
2558                 kmem_free(name, MAXPATHLEN + 1);
2559         kmem_free(nm, len);
2560 
2561         /*
2562          * Get the initial "after" sequence number, if it fails, set to zero
2563          */
2564         idva.va_mask = AT_SEQ;
2565         if (VOP_GETATTR(dvp, &idva, 0, cs->cr, NULL))
2566                 idva.va_seq = 0;
2567 
2568         /*
2569          * Force modified data and metadata out to stable storage.
2570          */
2571         (void) VOP_FSYNC(vp, FNODSYNC, cs->cr, NULL);
2572         (void) VOP_FSYNC(dvp, 0, cs->cr, NULL);
2573 
2574         if (error) {
2575                 *cs->statusp = resp->status = puterrno4(error);
2576                 goto out;
2577         }
2578 
2579         /*
2580          * Get "after" change value, if it fails, simply return the
2581          * before value.
2582          */
2583         adva.va_mask = AT_CTIME|AT_SEQ;
2584         if (VOP_GETATTR(dvp, &adva, 0, cs->cr, NULL)) {
2585                 adva.va_ctime = bdva.va_ctime;
2586                 adva.va_seq = 0;
2587         }
2588 
2589         NFS4_SET_FATTR4_CHANGE(resp->cinfo.after, adva.va_ctime)
2590 
2591         /*
2592          * The cinfo.atomic = TRUE only if we have
2593          * non-zero va_seq's, and it has incremented by exactly one
2594          * during the VOP_LINK and it didn't change during the VOP_FSYNC.
2595          */
2596         if (bdva.va_seq && idva.va_seq && adva.va_seq &&
2597             idva.va_seq == (bdva.va_seq + 1) && idva.va_seq == adva.va_seq)
2598                 resp->cinfo.atomic = TRUE;
2599         else
2600                 resp->cinfo.atomic = FALSE;
2601 
2602         *cs->statusp = resp->status = NFS4_OK;
2603 out:
2604         DTRACE_NFSV4_2(op__link__done, struct compound_state *, cs,
2605             LINK4res *, resp);
2606 }
2607 
2608 /*
2609  * Used by rfs4_op_lookup and rfs4_op_lookupp to do the actual work.
2610  */
2611 
2612 /* ARGSUSED */
2613 static nfsstat4
2614 do_rfs4_op_lookup(char *nm, struct svc_req *req, struct compound_state *cs)
2615 {
2616         int error;
2617         int different_export = 0;
2618         vnode_t *vp, *pre_tvp = NULL, *oldvp = NULL;
2619         struct exportinfo *exi = NULL, *pre_exi = NULL, *oexi = NULL;
2620         nfsstat4 stat;
2621         fid_t fid;
2622         int attrdir, dotdot, walk;
2623         bool_t is_newvp = FALSE;
2624 
2625         if (cs->vp->v_flag & V_XATTRDIR) {
2626                 attrdir = 1;
2627                 ASSERT(get_fh4_flag(&cs->fh, FH4_ATTRDIR));
2628         } else {
2629                 attrdir = 0;
2630                 ASSERT(! get_fh4_flag(&cs->fh, FH4_ATTRDIR));
2631         }
2632 
2633         dotdot = (nm[0] == '.' && nm[1] == '.' && nm[2] == '\0');
2634 
2635         /*
2636          * If dotdotting, then need to check whether it's
2637          * above the root of a filesystem, or above an
2638          * export point.
2639          */
2640         if (dotdot) {
2641 
2642                 /*
2643                  * If dotdotting at the root of a filesystem, then
2644                  * need to traverse back to the mounted-on filesystem
2645                  * and do the dotdot lookup there.
2646                  */
2647                 if (cs->vp->v_flag & VROOT) {
2648 
2649                         /*
2650                          * If at the system root, then can
2651                          * go up no further.
2652                          */
2653                         if (VN_CMP(cs->vp, rootdir))
2654                                 return (puterrno4(ENOENT));
2655 
2656                         /*
2657                          * Traverse back to the mounted-on filesystem
2658                          */
2659                         cs->vp = untraverse(cs->vp);
2660 
2661                         /*
2662                          * Set the different_export flag so we remember
2663                          * to pick up a new exportinfo entry for
2664                          * this new filesystem.
2665                          */
2666                         different_export = 1;
2667                 } else {
2668 
2669                         /*
2670                          * If dotdotting above an export point then set
2671                          * the different_export to get new export info.
2672                          */
2673                         different_export = nfs_exported(cs->exi, cs->vp);
2674                 }
2675         }
2676 
2677         error = VOP_LOOKUP(cs->vp, nm, &vp, NULL, 0, NULL, cs->cr,
2678             NULL, NULL, NULL);
2679         if (error)
2680                 return (puterrno4(error));
2681 
2682         /*
2683          * If the vnode is in a pseudo filesystem, check whether it is visible.
2684          *
2685          * XXX if the vnode is a symlink and it is not visible in
2686          * a pseudo filesystem, return ENOENT (not following symlink).
2687          * V4 client can not mount such symlink. This is a regression
2688          * from V2/V3.
2689          *
2690          * In the same exported filesystem, if the security flavor used
2691          * is not an explicitly shared flavor, limit the view to the visible
2692          * list entries only. This is not a WRONGSEC case because it's already
2693          * checked via PUTROOTFH/PUTPUBFH or PUTFH.
2694          */
2695         if (!different_export &&
2696             (PSEUDO(cs->exi) || ! is_exported_sec(cs->nfsflavor, cs->exi) ||
2697             cs->access & CS_ACCESS_LIMITED)) {
2698                 if (! nfs_visible(cs->exi, vp, &different_export)) {
2699                         VN_RELE(vp);
2700                         return (puterrno4(ENOENT));
2701                 }
2702         }
2703 
2704         /*
2705          * If it's a mountpoint, then traverse it.
2706          */
2707         if (vn_ismntpt(vp)) {
2708                 pre_exi = cs->exi;   /* save pre-traversed exportinfo */
2709                 pre_tvp = vp;           /* save pre-traversed vnode     */
2710 
2711                 /*
2712                  * hold pre_tvp to counteract rele by traverse.  We will
2713                  * need pre_tvp below if checkexport fails
2714                  */
2715                 VN_HOLD(pre_tvp);
2716                 if ((error = traverse(&vp)) != 0) {
2717                         VN_RELE(vp);
2718                         VN_RELE(pre_tvp);
2719                         return (puterrno4(error));
2720                 }
2721                 different_export = 1;
2722         } else if (vp->v_vfsp != cs->vp->v_vfsp) {
2723                 /*
2724                  * The vfsp comparison is to handle the case where
2725                  * a LOFS mount is shared.  lo_lookup traverses mount points,
2726                  * and NFS is unaware of local fs transistions because
2727                  * v_vfsmountedhere isn't set.  For this special LOFS case,
2728                  * the dir and the obj returned by lookup will have different
2729                  * vfs ptrs.
2730                  */
2731                 different_export = 1;
2732         }
2733 
2734         if (different_export) {
2735 
2736                 bzero(&fid, sizeof (fid));
2737                 fid.fid_len = MAXFIDSZ;
2738                 error = vop_fid_pseudo(vp, &fid);
2739                 if (error) {
2740                         VN_RELE(vp);
2741                         if (pre_tvp)
2742                                 VN_RELE(pre_tvp);
2743                         return (puterrno4(error));
2744                 }
2745 
2746                 if (dotdot)
2747                         exi = nfs_vptoexi(NULL, vp, cs->cr, &walk, NULL, TRUE);
2748                 else
2749                         exi = checkexport(&vp->v_vfsp->vfs_fsid, &fid, vp);
2750 
2751                 if (exi == NULL) {
2752                         if (pre_tvp) {
2753                                 /*
2754                                  * If this vnode is a mounted-on vnode,
2755                                  * but the mounted-on file system is not
2756                                  * exported, send back the filehandle for
2757                                  * the mounted-on vnode, not the root of
2758                                  * the mounted-on file system.
2759                                  */
2760                                 VN_RELE(vp);
2761                                 vp = pre_tvp;
2762                                 exi = pre_exi;
2763                                 if (exi)
2764                                         exi_hold(exi);
2765                         } else {
2766                                 VN_RELE(vp);
2767                                 return (puterrno4(EACCES));
2768                         }
2769                 } else if (pre_tvp) {
2770                         /* we're done with pre_tvp now. release extra hold */
2771                         VN_RELE(pre_tvp);
2772                 }
2773 
2774                 if (cs->exi)
2775                         exi_rele(cs->exi);
2776                 cs->exi = exi;
2777 
2778                 /*
2779                  * Now we do a checkauth4. The reason is that
2780                  * this client/user may not have access to the new
2781                  * exported file system, and if he does,
2782                  * the client/user may be mapped to a different uid.
2783                  *
2784                  * We start with a new cr, because the checkauth4 done
2785                  * in the PUT*FH operation over wrote the cred's uid,
2786                  * gid, etc, and we want the real thing before calling
2787                  * checkauth4()
2788                  */
2789                 crfree(cs->cr);
2790                 cs->cr = crdup(cs->basecr);
2791 
2792                 oldvp = cs->vp;
2793                 cs->vp = vp;
2794                 is_newvp = TRUE;
2795 
2796                 stat = call_checkauth4(cs, req);
2797                 if (stat != NFS4_OK) {
2798                         VN_RELE(cs->vp);
2799                         cs->vp = oldvp;
2800                         return (stat);
2801                 }
2802         }
2803 
2804         /*
2805          * After various NFS checks, do a label check on the path
2806          * component. The label on this path should either be the
2807          * global zone's label or a zone's label. We are only
2808          * interested in the zone's label because exported files
2809          * in global zone is accessible (though read-only) to
2810          * clients. The exportability/visibility check is already
2811          * done before reaching this code.
2812          */
2813         if (is_system_labeled()) {
2814                 bslabel_t *clabel;
2815 
2816                 ASSERT(req->rq_label != NULL);
2817                 clabel = req->rq_label;
2818                 DTRACE_PROBE2(tx__rfs4__log__info__oplookup__clabel, char *,
2819                     "got client label from request(1)", struct svc_req *, req);
2820 
2821                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
2822                         if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
2823                             cs->exi)) {
2824                                 error = EACCES;
2825                                 goto err_out;
2826                         }
2827                 } else {
2828                         /*
2829                          * We grant access to admin_low label clients
2830                          * only if the client is trusted, i.e. also
2831                          * running Solaris Trusted Extension.
2832                          */
2833                         struct sockaddr *ca;
2834                         int             addr_type;
2835                         void            *ipaddr;
2836                         tsol_tpc_t      *tp;
2837 
2838                         ca = (struct sockaddr *)svc_getrpccaller(
2839                             req->rq_xprt)->buf;
2840                         if (ca->sa_family == AF_INET) {
2841                                 addr_type = IPV4_VERSION;
2842                                 ipaddr = &((struct sockaddr_in *)ca)->sin_addr;
2843                         } else if (ca->sa_family == AF_INET6) {
2844                                 addr_type = IPV6_VERSION;
2845                                 ipaddr = &((struct sockaddr_in6 *)
2846                                     ca)->sin6_addr;
2847                         }
2848                         tp = find_tpc(ipaddr, addr_type, B_FALSE);
2849                         if (tp == NULL || tp->tpc_tp.tp_doi !=
2850                             l_admin_low->tsl_doi || tp->tpc_tp.host_type !=
2851                             SUN_CIPSO) {
2852                                 if (tp != NULL)
2853                                         TPC_RELE(tp);
2854                                 error = EACCES;
2855                                 goto err_out;
2856                         }
2857                         TPC_RELE(tp);
2858                 }
2859         }
2860 
2861         error = makefh4(&cs->fh, vp, cs->exi);
2862 
2863 err_out:
2864         if (error) {
2865                 if (is_newvp) {
2866                         VN_RELE(cs->vp);
2867                         cs->vp = oldvp;
2868                 } else
2869                         VN_RELE(vp);
2870                 return (puterrno4(error));
2871         }
2872 
2873         if (!is_newvp) {
2874                 if (cs->vp)
2875                         VN_RELE(cs->vp);
2876                 cs->vp = vp;
2877         } else if (oldvp)
2878                 VN_RELE(oldvp);
2879 
2880         /*
2881          * if did lookup on attrdir and didn't lookup .., set named
2882          * attr fh flag
2883          */
2884         if (attrdir && ! dotdot)
2885                 set_fh4_flag(&cs->fh, FH4_NAMEDATTR);
2886 
2887         /* Assume false for now, open proc will set this */
2888         cs->mandlock = FALSE;
2889 
2890         return (NFS4_OK);
2891 }
2892 
2893 /* ARGSUSED */
2894 static void
2895 rfs4_op_lookup(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
2896     struct compound_state *cs)
2897 {
2898         LOOKUP4args *args = &argop->nfs_argop4_u.oplookup;
2899         LOOKUP4res *resp = &resop->nfs_resop4_u.oplookup;
2900         char *nm;
2901         uint_t len;
2902         struct sockaddr *ca;
2903         char *name = NULL;
2904         nfsstat4 status;
2905 
2906         DTRACE_NFSV4_2(op__lookup__start, struct compound_state *, cs,
2907             LOOKUP4args *, args);
2908 
2909         if (cs->vp == NULL) {
2910                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2911                 goto out;
2912         }
2913 
2914         if (cs->vp->v_type == VLNK) {
2915                 *cs->statusp = resp->status = NFS4ERR_SYMLINK;
2916                 goto out;
2917         }
2918 
2919         if (cs->vp->v_type != VDIR) {
2920                 *cs->statusp = resp->status = NFS4ERR_NOTDIR;
2921                 goto out;
2922         }
2923 
2924         status = utf8_dir_verify(&args->objname);
2925         if (status != NFS4_OK) {
2926                 *cs->statusp = resp->status = status;
2927                 goto out;
2928         }
2929 
2930         nm = utf8_to_str(&args->objname, &len, NULL);
2931         if (nm == NULL) {
2932                 *cs->statusp = resp->status = NFS4ERR_INVAL;
2933                 goto out;
2934         }
2935 
2936         if (len > MAXNAMELEN) {
2937                 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
2938                 kmem_free(nm, len);
2939                 goto out;
2940         }
2941 
2942         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2943         name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
2944             MAXPATHLEN  + 1);
2945 
2946         if (name == NULL) {
2947                 *cs->statusp = resp->status = NFS4ERR_INVAL;
2948                 kmem_free(nm, len);
2949                 goto out;
2950         }
2951 
2952         *cs->statusp = resp->status = do_rfs4_op_lookup(name, req, cs);
2953 
2954         if (name != nm)
2955                 kmem_free(name, MAXPATHLEN + 1);
2956         kmem_free(nm, len);
2957 
2958 out:
2959         DTRACE_NFSV4_2(op__lookup__done, struct compound_state *, cs,
2960             LOOKUP4res *, resp);
2961 }
2962 
2963 /* ARGSUSED */
2964 static void
2965 rfs4_op_lookupp(nfs_argop4 *args, nfs_resop4 *resop, struct svc_req *req,
2966     struct compound_state *cs)
2967 {
2968         LOOKUPP4res *resp = &resop->nfs_resop4_u.oplookupp;
2969 
2970         DTRACE_NFSV4_1(op__lookupp__start, struct compound_state *, cs);
2971 
2972         if (cs->vp == NULL) {
2973                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2974                 goto out;
2975         }
2976 
2977         if (cs->vp->v_type != VDIR) {
2978                 *cs->statusp = resp->status = NFS4ERR_NOTDIR;
2979                 goto out;
2980         }
2981 
2982         *cs->statusp = resp->status = do_rfs4_op_lookup("..", req, cs);
2983 
2984         /*
2985          * From NFSV4 Specification, LOOKUPP should not check for
2986          * NFS4ERR_WRONGSEC. Retrun NFS4_OK instead.
2987          */
2988         if (resp->status == NFS4ERR_WRONGSEC) {
2989                 *cs->statusp = resp->status = NFS4_OK;
2990         }
2991 
2992 out:
2993         DTRACE_NFSV4_2(op__lookupp__done, struct compound_state *, cs,
2994             LOOKUPP4res *, resp);
2995 }
2996 
2997 
2998 /*ARGSUSED2*/
2999 static void
3000 rfs4_op_openattr(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
3001     struct compound_state *cs)
3002 {
3003         OPENATTR4args   *args = &argop->nfs_argop4_u.opopenattr;
3004         OPENATTR4res    *resp = &resop->nfs_resop4_u.opopenattr;
3005         vnode_t         *avp = NULL;
3006         int             lookup_flags = LOOKUP_XATTR, error;
3007         int             exp_ro = 0;
3008 
3009         DTRACE_NFSV4_2(op__openattr__start, struct compound_state *, cs,
3010             OPENATTR4args *, args);
3011 
3012         if (cs->vp == NULL) {
3013                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
3014                 goto out;
3015         }
3016 
3017         if ((cs->vp->v_vfsp->vfs_flag & VFS_XATTR) == 0 &&
3018             !vfs_has_feature(cs->vp->v_vfsp, VFSFT_SYSATTR_VIEWS)) {
3019                 *cs->statusp = resp->status = puterrno4(ENOTSUP);
3020                 goto out;
3021         }
3022 
3023         /*
3024          * If file system supports passing ACE mask to VOP_ACCESS then
3025          * check for ACE_READ_NAMED_ATTRS, otherwise do legacy checks
3026          */
3027 
3028         if (vfs_has_feature(cs->vp->v_vfsp, VFSFT_ACEMASKONACCESS))
3029                 error = VOP_ACCESS(cs->vp, ACE_READ_NAMED_ATTRS,
3030                     V_ACE_MASK, cs->cr, NULL);
3031         else
3032                 error = ((VOP_ACCESS(cs->vp, VREAD, 0, cs->cr, NULL) != 0) &&
3033                     (VOP_ACCESS(cs->vp, VWRITE, 0, cs->cr, NULL) != 0) &&
3034                     (VOP_ACCESS(cs->vp, VEXEC, 0, cs->cr, NULL) != 0));
3035 
3036         if (error) {
3037                 *cs->statusp = resp->status = puterrno4(EACCES);
3038                 goto out;
3039         }
3040 
3041         /*
3042          * The CREATE_XATTR_DIR VOP flag cannot be specified if
3043          * the file system is exported read-only -- regardless of
3044          * createdir flag.  Otherwise the attrdir would be created
3045          * (assuming server fs isn't mounted readonly locally).  If
3046          * VOP_LOOKUP returns ENOENT in this case, the error will
3047          * be translated into EROFS.  ENOSYS is mapped to ENOTSUP
3048          * because specfs has no VOP_LOOKUP op, so the macro would
3049          * return ENOSYS.  EINVAL is returned by all (current)
3050          * Solaris file system implementations when any of their
3051          * restrictions are violated (xattr(dir) can't have xattrdir).
3052          * Returning NOTSUPP is more appropriate in this case
3053          * because the object will never be able to have an attrdir.
3054          */
3055         if (args->createdir && ! (exp_ro = rdonly4(req, cs)))
3056                 lookup_flags |= CREATE_XATTR_DIR;
3057 
3058         error = VOP_LOOKUP(cs->vp, "", &avp, NULL, lookup_flags, NULL, cs->cr,
3059             NULL, NULL, NULL);
3060 
3061         if (error) {
3062                 if (error == ENOENT && args->createdir && exp_ro)
3063                         *cs->statusp = resp->status = puterrno4(EROFS);
3064                 else if (error == EINVAL || error == ENOSYS)
3065                         *cs->statusp = resp->status = puterrno4(ENOTSUP);
3066                 else
3067                         *cs->statusp = resp->status = puterrno4(error);
3068                 goto out;
3069         }
3070 
3071         ASSERT(avp->v_flag & V_XATTRDIR);
3072 
3073         error = makefh4(&cs->fh, avp, cs->exi);
3074 
3075         if (error) {
3076                 VN_RELE(avp);
3077                 *cs->statusp = resp->status = puterrno4(error);
3078                 goto out;
3079         }
3080 
3081         VN_RELE(cs->vp);
3082         cs->vp = avp;
3083 
3084         /*
3085          * There is no requirement for an attrdir fh flag
3086          * because the attrdir has a vnode flag to distinguish
3087          * it from regular (non-xattr) directories.  The
3088          * FH4_ATTRDIR flag is set for future sanity checks.
3089          */
3090         set_fh4_flag(&cs->fh, FH4_ATTRDIR);
3091         *cs->statusp = resp->status = NFS4_OK;
3092 
3093 out:
3094         DTRACE_NFSV4_2(op__openattr__done, struct compound_state *, cs,
3095             OPENATTR4res *, resp);
3096 }
3097 
3098 static int
3099 do_io(int direction, vnode_t *vp, struct uio *uio, int ioflag, cred_t *cred,
3100     caller_context_t *ct)
3101 {
3102         int error;
3103         int i;
3104         clock_t delaytime;
3105 
3106         delaytime = MSEC_TO_TICK_ROUNDUP(rfs4_lock_delay);
3107 
3108         /*
3109          * Don't block on mandatory locks. If this routine returns
3110          * EAGAIN, the caller should return NFS4ERR_LOCKED.
3111          */
3112         uio->uio_fmode = FNONBLOCK;
3113 
3114         for (i = 0; i < rfs4_maxlock_tries; i++) {
3115 
3116 
3117                 if (direction == FREAD) {
3118                         (void) VOP_RWLOCK(vp, V_WRITELOCK_FALSE, ct);
3119                         error = VOP_READ(vp, uio, ioflag, cred, ct);
3120                         VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, ct);
3121                 } else {
3122                         (void) VOP_RWLOCK(vp, V_WRITELOCK_TRUE, ct);
3123                         error = VOP_WRITE(vp, uio, ioflag, cred, ct);
3124                         VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, ct);
3125                 }
3126 
3127                 if (error != EAGAIN)
3128                         break;
3129 
3130                 if (i < rfs4_maxlock_tries - 1) {
3131                         delay(delaytime);
3132                         delaytime *= 2;
3133                 }
3134         }
3135 
3136         return (error);
3137 }
3138 
3139 /* ARGSUSED */
3140 static void
3141 rfs4_op_read(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
3142     struct compound_state *cs)
3143 {
3144         READ4args *args = &argop->nfs_argop4_u.opread;
3145         READ4res *resp = &resop->nfs_resop4_u.opread;
3146         int error;
3147         int verror;
3148         vnode_t *vp;
3149         struct vattr va;
3150         struct iovec iov, *iovp = NULL;
3151         int iovcnt;
3152         struct uio uio;
3153         u_offset_t offset;
3154         bool_t *deleg = &cs->deleg;
3155         nfsstat4 stat;
3156         int in_crit = 0;
3157         mblk_t *mp = NULL;
3158         int alloc_err = 0;
3159         int rdma_used = 0;
3160         int loaned_buffers;
3161         caller_context_t ct;
3162         struct uio *uiop;
3163 
3164         DTRACE_NFSV4_2(op__read__start, struct compound_state *, cs,
3165             READ4args, args);
3166 
3167         vp = cs->vp;
3168         if (vp == NULL) {
3169                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
3170                 goto out;
3171         }
3172         if (cs->access == CS_ACCESS_DENIED) {
3173                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
3174                 goto out;
3175         }
3176 
3177         if ((stat = rfs4_check_stateid(FREAD, vp, &args->stateid, FALSE,
3178             deleg, TRUE, &ct)) != NFS4_OK) {
3179                 *cs->statusp = resp->status = stat;
3180                 goto out;
3181         }
3182 
3183         /*
3184          * Enter the critical region before calling VOP_RWLOCK
3185          * to avoid a deadlock with write requests.
3186          */
3187         if (nbl_need_check(vp)) {
3188                 nbl_start_crit(vp, RW_READER);
3189                 in_crit = 1;
3190                 if (nbl_conflict(vp, NBL_READ, args->offset, args->count, 0,
3191                     &ct)) {
3192                         *cs->statusp = resp->status = NFS4ERR_LOCKED;
3193                         goto out;
3194                 }
3195         }
3196 
3197         if (args->wlist) {
3198                 if (args->count > clist_len(args->wlist)) {
3199                         *cs->statusp = resp->status = NFS4ERR_INVAL;
3200                         goto out;
3201                 }
3202                 rdma_used = 1;
3203         }
3204 
3205         /* use loaned buffers for TCP */
3206         loaned_buffers = (nfs_loaned_buffers && !rdma_used) ? 1 : 0;
3207 
3208         va.va_mask = AT_MODE|AT_SIZE|AT_UID;
3209         verror = VOP_GETATTR(vp, &va, 0, cs->cr, &ct);
3210 
3211         /*
3212          * If we can't get the attributes, then we can't do the
3213          * right access checking.  So, we'll fail the request.
3214          */
3215         if (verror) {
3216                 *cs->statusp = resp->status = puterrno4(verror);
3217                 goto out;
3218         }
3219 
3220         if (vp->v_type != VREG) {
3221                 *cs->statusp = resp->status =
3222                     ((vp->v_type == VDIR) ? NFS4ERR_ISDIR : NFS4ERR_INVAL);
3223                 goto out;
3224         }
3225 
3226         if (crgetuid(cs->cr) != va.va_uid &&
3227             (error = VOP_ACCESS(vp, VREAD, 0, cs->cr, &ct)) &&
3228             (error = VOP_ACCESS(vp, VEXEC, 0, cs->cr, &ct))) {
3229                 *cs->statusp = resp->status = puterrno4(error);
3230                 goto out;
3231         }
3232 
3233         if (MANDLOCK(vp, va.va_mode)) { /* XXX - V4 supports mand locking */
3234                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
3235                 goto out;
3236         }
3237 
3238         offset = args->offset;
3239         if (offset >= va.va_size) {
3240                 *cs->statusp = resp->status = NFS4_OK;
3241                 resp->eof = TRUE;
3242                 resp->data_len = 0;
3243                 resp->data_val = NULL;
3244                 resp->mblk = NULL;
3245                 /* RDMA */
3246                 resp->wlist = args->wlist;
3247                 resp->wlist_len = resp->data_len;
3248                 *cs->statusp = resp->status = NFS4_OK;
3249                 if (resp->wlist)
3250                         clist_zero_len(resp->wlist);
3251                 goto out;
3252         }
3253 
3254         if (args->count == 0) {
3255                 *cs->statusp = resp->status = NFS4_OK;
3256                 resp->eof = FALSE;
3257                 resp->data_len = 0;
3258                 resp->data_val = NULL;
3259                 resp->mblk = NULL;
3260                 /* RDMA */
3261                 resp->wlist = args->wlist;
3262                 resp->wlist_len = resp->data_len;
3263                 if (resp->wlist)
3264                         clist_zero_len(resp->wlist);
3265                 goto out;
3266         }
3267 
3268         /*
3269          * Do not allocate memory more than maximum allowed
3270          * transfer size
3271          */
3272         if (args->count > rfs4_tsize(req))
3273                 args->count = rfs4_tsize(req);
3274 
3275         if (loaned_buffers) {
3276                 uiop = (uio_t *)rfs_setup_xuio(vp);
3277                 ASSERT(uiop != NULL);
3278                 uiop->uio_segflg = UIO_SYSSPACE;
3279                 uiop->uio_loffset = args->offset;
3280                 uiop->uio_resid = args->count;
3281 
3282                 /* Jump to do the read if successful */
3283                 if (!VOP_REQZCBUF(vp, UIO_READ, (xuio_t *)uiop, cs->cr, &ct)) {
3284                         /*
3285                          * Need to hold the vnode until after VOP_RETZCBUF()
3286                          * is called.
3287                          */
3288                         VN_HOLD(vp);
3289                         goto doio_read;
3290                 }
3291 
3292                 DTRACE_PROBE2(nfss__i__reqzcbuf_failed, int,
3293                     uiop->uio_loffset, int, uiop->uio_resid);
3294 
3295                 uiop->uio_extflg = 0;
3296 
3297                 /* failure to setup for zero copy */
3298                 rfs_free_xuio((void *)uiop);
3299                 loaned_buffers = 0;
3300         }
3301 
3302         /*
3303          * If returning data via RDMA Write, then grab the chunk list. If we
3304          * aren't returning READ data w/RDMA_WRITE, then grab a mblk.
3305          */
3306         if (rdma_used) {
3307                 mp = NULL;
3308                 (void) rdma_get_wchunk(req, &iov, args->wlist);
3309                 uio.uio_iov = &iov;
3310                 uio.uio_iovcnt = 1;
3311         } else {
3312                 /*
3313                  * mp will contain the data to be sent out in the read reply.
3314                  * It will be freed after the reply has been sent.
3315                  */
3316                 mp = rfs_read_alloc(args->count, &iovp, &iovcnt);
3317                 ASSERT(mp != NULL);
3318                 ASSERT(alloc_err == 0);
3319                 uio.uio_iov = iovp;
3320                 uio.uio_iovcnt = iovcnt;
3321         }
3322 
3323         uio.uio_segflg = UIO_SYSSPACE;
3324         uio.uio_extflg = UIO_COPY_CACHED;
3325         uio.uio_loffset = args->offset;
3326         uio.uio_resid = args->count;
3327         uiop = &uio;
3328 
3329 doio_read:
3330         error = do_io(FREAD, vp, uiop, 0, cs->cr, &ct);
3331 
3332         va.va_mask = AT_SIZE;
3333         verror = VOP_GETATTR(vp, &va, 0, cs->cr, &ct);
3334 
3335         if (error) {
3336                 if (mp)
3337                         freemsg(mp);
3338                 *cs->statusp = resp->status = puterrno4(error);
3339                 goto out;
3340         }
3341 
3342         /* make mblk using zc buffers */
3343         if (loaned_buffers) {
3344                 mp = uio_to_mblk(uiop);
3345                 ASSERT(mp != NULL);
3346         }
3347 
3348         *cs->statusp = resp->status = NFS4_OK;
3349 
3350         ASSERT(uiop->uio_resid >= 0);
3351         resp->data_len = args->count - uiop->uio_resid;
3352         if (mp) {
3353                 resp->data_val = (char *)mp->b_datap->db_base;
3354                 rfs_rndup_mblks(mp, resp->data_len, loaned_buffers);
3355         } else {
3356                 resp->data_val = (caddr_t)iov.iov_base;
3357         }
3358 
3359         resp->mblk = mp;
3360 
3361         if (!verror && offset + resp->data_len == va.va_size)
3362                 resp->eof = TRUE;
3363         else
3364                 resp->eof = FALSE;
3365 
3366         if (rdma_used) {
3367                 if (!rdma_setup_read_data4(args, resp)) {
3368                         *cs->statusp = resp->status = NFS4ERR_INVAL;
3369                 }
3370         } else {
3371                 resp->wlist = NULL;
3372         }
3373 
3374 out:
3375         if (in_crit)
3376                 nbl_end_crit(vp);
3377 
3378         if (iovp != NULL)
3379                 kmem_free(iovp, iovcnt * sizeof (struct iovec));
3380 
3381         DTRACE_NFSV4_2(op__read__done, struct compound_state *, cs,
3382             READ4res *, resp);
3383 }
3384 
3385 static void
3386 rfs4_op_read_free(nfs_resop4 *resop)
3387 {
3388         READ4res        *resp = &resop->nfs_resop4_u.opread;
3389 
3390         if (resp->status == NFS4_OK && resp->mblk != NULL) {
3391                 freemsg(resp->mblk);
3392                 resp->mblk = NULL;
3393                 resp->data_val = NULL;
3394                 resp->data_len = 0;
3395         }
3396 }
3397 
3398 static void
3399 rfs4_op_readdir_free(nfs_resop4 * resop)
3400 {
3401         READDIR4res    *resp = &resop->nfs_resop4_u.opreaddir;
3402 
3403         if (resp->status == NFS4_OK && resp->mblk != NULL) {
3404                 freeb(resp->mblk);
3405                 resp->mblk = NULL;
3406                 resp->data_len = 0;
3407         }
3408 }
3409 
3410 
3411 /* ARGSUSED */
3412 static void
3413 rfs4_op_putpubfh(nfs_argop4 *args, nfs_resop4 *resop, struct svc_req *req,
3414     struct compound_state *cs)
3415 {
3416         PUTPUBFH4res    *resp = &resop->nfs_resop4_u.opputpubfh;
3417         int             error;
3418         vnode_t         *vp;
3419         struct exportinfo *exi, *sav_exi;
3420         nfs_fh4_fmt_t   *fh_fmtp;
3421 
3422         DTRACE_NFSV4_1(op__putpubfh__start, struct compound_state *, cs);
3423 
3424         if (cs->vp) {
3425                 VN_RELE(cs->vp);
3426                 cs->vp = NULL;
3427         }
3428 
3429         if (cs->cr)
3430                 crfree(cs->cr);
3431 
3432         cs->cr = crdup(cs->basecr);
3433 
3434         vp = exi_public->exi_vp;
3435         if (vp == NULL) {
3436                 *cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
3437                 goto out;
3438         }
3439 
3440         error = makefh4(&cs->fh, vp, exi_public);
3441         if (error != 0) {
3442                 *cs->statusp = resp->status = puterrno4(error);
3443                 goto out;
3444         }
3445         sav_exi = cs->exi;
3446         if (exi_public == exi_root) {
3447                 /*
3448                  * No filesystem is actually shared public, so we default
3449                  * to exi_root. In this case, we must check whether root
3450                  * is exported.
3451                  */
3452                 fh_fmtp = (nfs_fh4_fmt_t *)cs->fh.nfs_fh4_val;
3453 
3454                 /*
3455                  * if root filesystem is exported, the exportinfo struct that we
3456                  * should use is what checkexport returns, because root_exi is
3457                  * actually a mostly empty struct.
3458                  */
3459                 exi = checkexport(&fh_fmtp->fh4_fsid,
3460                     (fid_t *)&fh_fmtp->fh4_xlen, NULL);
3461                 if (exi) {
3462                         cs->exi = exi;
3463                 } else {
3464                         exi_hold(exi_public);
3465                         cs->exi = exi_public;
3466                 }
3467         } else {
3468                 /*
3469                  * it's a properly shared filesystem
3470                  */
3471                 exi_hold(exi_public);
3472                 cs->exi = exi_public;
3473         }
3474 
3475         if (is_system_labeled()) {
3476                 bslabel_t *clabel;
3477 
3478                 ASSERT(req->rq_label != NULL);
3479                 clabel = req->rq_label;
3480                 DTRACE_PROBE2(tx__rfs4__log__info__opputpubfh__clabel, char *,
3481                     "got client label from request(1)",
3482                     struct svc_req *, req);
3483                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
3484                         if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
3485                             cs->exi)) {
3486                                 *cs->statusp = resp->status =
3487                                     NFS4ERR_SERVERFAULT;
3488                                 if (sav_exi)
3489                                         exi_rele(sav_exi);
3490                                 goto out;
3491                         }
3492                 }
3493         }
3494 
3495         VN_HOLD(vp);
3496         cs->vp = vp;
3497 
3498         if ((resp->status = call_checkauth4(cs, req)) != NFS4_OK) {
3499                 VN_RELE(cs->vp);
3500                 cs->vp = NULL;
3501                 exi_rele(cs->exi);
3502                 cs->exi = sav_exi;
3503                 goto out;
3504         }
3505         if (sav_exi)
3506                 exi_rele(sav_exi);
3507 
3508         *cs->statusp = resp->status = NFS4_OK;
3509 out:
3510         DTRACE_NFSV4_2(op__putpubfh__done, struct compound_state *, cs,
3511             PUTPUBFH4res *, resp);
3512 }
3513 
3514 /*
3515  * XXX - issue with put*fh operations. Suppose /export/home is exported.
3516  * Suppose an NFS client goes to mount /export/home/joe. If /export, home,
3517  * or joe have restrictive search permissions, then we shouldn't let
3518  * the client get a file handle. This is easy to enforce. However, we
3519  * don't know what security flavor should be used until we resolve the
3520  * path name. Another complication is uid mapping. If root is
3521  * the user, then it will be mapped to the anonymous user by default,
3522  * but we won't know that till we've resolved the path name. And we won't
3523  * know what the anonymous user is.
3524  * Luckily, SECINFO is specified to take a full filename.
3525  * So what we will have to in rfs4_op_lookup is check that flavor of
3526  * the target object matches that of the request, and if root was the
3527  * caller, check for the root= and anon= options, and if necessary,
3528  * repeat the lookup using the right cred_t. But that's not done yet.
3529  */
3530 /* ARGSUSED */
3531 static void
3532 rfs4_op_putfh(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
3533     struct compound_state *cs)
3534 {
3535         PUTFH4args *args = &argop->nfs_argop4_u.opputfh;
3536         PUTFH4res *resp = &resop->nfs_resop4_u.opputfh;
3537         nfs_fh4_fmt_t *fh_fmtp;
3538 
3539         DTRACE_NFSV4_2(op__putfh__start, struct compound_state *, cs,
3540             PUTFH4args *, args);
3541 
3542         if (cs->vp) {
3543                 VN_RELE(cs->vp);
3544                 cs->vp = NULL;
3545         }
3546 
3547         if (cs->cr) {
3548                 crfree(cs->cr);
3549                 cs->cr = NULL;
3550         }
3551 
3552 
3553         if (args->object.nfs_fh4_len < NFS_FH4_LEN) {
3554                 *cs->statusp = resp->status = NFS4ERR_BADHANDLE;
3555                 goto out;
3556         }
3557 
3558         fh_fmtp = (nfs_fh4_fmt_t *)args->object.nfs_fh4_val;
3559         if (cs->exi)
3560                 exi_rele(cs->exi);
3561         cs->exi = checkexport(&fh_fmtp->fh4_fsid, (fid_t *)&fh_fmtp->fh4_xlen,
3562             NULL);
3563 
3564         if (cs->exi == NULL) {
3565                 *cs->statusp = resp->status = NFS4ERR_STALE;
3566                 goto out;
3567         }
3568 
3569         cs->cr = crdup(cs->basecr);
3570 
3571         ASSERT(cs->cr != NULL);
3572 
3573         if (! (cs->vp = nfs4_fhtovp(&args->object, cs->exi, &resp->status))) {
3574                 *cs->statusp = resp->status;
3575                 goto out;
3576         }
3577 
3578         if ((resp->status = call_checkauth4(cs, req)) != NFS4_OK) {
3579                 VN_RELE(cs->vp);
3580                 cs->vp = NULL;
3581                 goto out;
3582         }
3583 
3584         nfs_fh4_copy(&args->object, &cs->fh);
3585         *cs->statusp = resp->status = NFS4_OK;
3586         cs->deleg = FALSE;
3587 
3588 out:
3589         DTRACE_NFSV4_2(op__putfh__done, struct compound_state *, cs,
3590             PUTFH4res *, resp);
3591 }
3592 
3593 /* ARGSUSED */
3594 static void
3595 rfs4_op_putrootfh(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
3596     struct compound_state *cs)
3597 {
3598         PUTROOTFH4res *resp = &resop->nfs_resop4_u.opputrootfh;
3599         int error;
3600         fid_t fid;
3601         struct exportinfo *exi, *sav_exi;
3602 
3603         DTRACE_NFSV4_1(op__putrootfh__start, struct compound_state *, cs);
3604 
3605         if (cs->vp) {
3606                 VN_RELE(cs->vp);
3607                 cs->vp = NULL;
3608         }
3609 
3610         if (cs->cr)
3611                 crfree(cs->cr);
3612 
3613         cs->cr = crdup(cs->basecr);
3614 
3615         /*
3616          * Using rootdir, the system root vnode,
3617          * get its fid.
3618          */
3619         bzero(&fid, sizeof (fid));
3620         fid.fid_len = MAXFIDSZ;
3621         error = vop_fid_pseudo(rootdir, &fid);
3622         if (error != 0) {
3623                 *cs->statusp = resp->status = puterrno4(error);
3624                 goto out;
3625         }
3626 
3627         /*
3628          * Then use the root fsid & fid it to find out if it's exported
3629          *
3630          * If the server root isn't exported directly, then
3631          * it should at least be a pseudo export based on
3632          * one or more exports further down in the server's
3633          * file tree.
3634          */
3635         exi = checkexport(&rootdir->v_vfsp->vfs_fsid, &fid, NULL);
3636         if (exi == NULL || exi->exi_export.ex_flags & EX_PUBLIC) {
3637                 NFS4_DEBUG(rfs4_debug,
3638                     (CE_WARN, "rfs4_op_putrootfh: export check failure"));
3639                 *cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
3640                 if (exi)
3641                         exi_rele(exi);
3642                 goto out;
3643         }
3644 
3645         /*
3646          * Now make a filehandle based on the root
3647          * export and root vnode.
3648          */
3649         error = makefh4(&cs->fh, rootdir, exi);
3650         if (error != 0) {
3651                 *cs->statusp = resp->status = puterrno4(error);
3652                 exi_rele(exi);
3653                 goto out;
3654         }
3655 
3656         sav_exi = cs->exi;
3657         cs->exi = exi;
3658 
3659         VN_HOLD(rootdir);
3660         cs->vp = rootdir;
3661 
3662         if ((resp->status = call_checkauth4(cs, req)) != NFS4_OK) {
3663                 VN_RELE(rootdir);
3664                 cs->vp = NULL;
3665                 exi_rele(exi);
3666                 cs->exi = sav_exi;
3667                 goto out;
3668         }
3669         if (sav_exi)
3670                 exi_rele(sav_exi);
3671 
3672         *cs->statusp = resp->status = NFS4_OK;
3673         cs->deleg = FALSE;
3674 out:
3675         DTRACE_NFSV4_2(op__putrootfh__done, struct compound_state *, cs,
3676             PUTROOTFH4res *, resp);
3677 }
3678 
3679 /*
3680  * set_rdattr_params sets up the variables used to manage what information
3681  * to get for each directory entry.
3682  */
3683 static nfsstat4
3684 set_rdattr_params(struct nfs4_svgetit_arg *sargp,
3685     bitmap4 attrs, bool_t *need_to_lookup)
3686 {
3687         uint_t  va_mask;
3688         nfsstat4 status;
3689         bitmap4 objbits;
3690 
3691         status = bitmap4_to_attrmask(attrs, sargp);
3692         if (status != NFS4_OK) {
3693                 /*
3694                  * could not even figure attr mask
3695                  */
3696                 return (status);
3697         }
3698         va_mask = sargp->vap->va_mask;
3699 
3700         /*
3701          * dirent's d_ino is always correct value for mounted_on_fileid.
3702          * mntdfid_set is set once here, but mounted_on_fileid is
3703          * set in main dirent processing loop for each dirent.
3704          * The mntdfid_set is a simple optimization that lets the
3705          * server attr code avoid work when caller is readdir.
3706          */
3707         sargp->mntdfid_set = TRUE;
3708 
3709         /*
3710          * Lookup entry only if client asked for any of the following:
3711          * a) vattr attrs
3712          * b) vfs attrs
3713          * c) attrs w/per-object scope requested (change, filehandle, etc)
3714          *    other than mounted_on_fileid (which we can take from dirent)
3715          */
3716         objbits = attrs ? attrs & NFS4_VP_ATTR_MASK : 0;
3717 
3718         if (va_mask || sargp->sbp || (objbits & ~FATTR4_MOUNTED_ON_FILEID_MASK))
3719                 *need_to_lookup = TRUE;
3720         else
3721                 *need_to_lookup = FALSE;
3722 
3723         if (sargp->sbp == NULL)
3724                 return (NFS4_OK);
3725 
3726         /*
3727          * If filesystem attrs are requested, get them now from the
3728          * directory vp, as most entries will have same filesystem. The only
3729          * exception are mounted over entries but we handle
3730          * those as we go (XXX mounted over detection not yet implemented).
3731          */
3732         sargp->vap->va_mask = 0;  /* to avoid VOP_GETATTR */
3733         status = bitmap4_get_sysattrs(sargp);
3734         sargp->vap->va_mask = va_mask;
3735 
3736         if ((status != NFS4_OK) && sargp->rdattr_error_req) {
3737                 /*
3738                  * Failed to get filesystem attributes.
3739                  * Return a rdattr_error for each entry, but don't fail.
3740                  * However, don't get any obj-dependent attrs.
3741                  */
3742                 sargp->rdattr_error = status;        /* for rdattr_error */
3743                 *need_to_lookup = FALSE;
3744                 /*
3745                  * At least get fileid for regular readdir output
3746                  */
3747                 sargp->vap->va_mask &= AT_NODEID;
3748                 status = NFS4_OK;
3749         }
3750 
3751         return (status);
3752 }
3753 
3754 /*
3755  * readlink: args: CURRENT_FH.
3756  *      res: status. If success - CURRENT_FH unchanged, return linktext.
3757  */
3758 
3759 /* ARGSUSED */
3760 static void
3761 rfs4_op_readlink(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
3762     struct compound_state *cs)
3763 {
3764         READLINK4res *resp = &resop->nfs_resop4_u.opreadlink;
3765         int error;
3766         vnode_t *vp;
3767         struct iovec iov;
3768         struct vattr va;
3769         struct uio uio;
3770         char *data;
3771         struct sockaddr *ca;
3772         char *name = NULL;
3773         int is_referral;
3774 
3775         DTRACE_NFSV4_1(op__readlink__start, struct compound_state *, cs);
3776 
3777         /* CURRENT_FH: directory */
3778         vp = cs->vp;
3779         if (vp == NULL) {
3780                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
3781                 goto out;
3782         }
3783 
3784         if (cs->access == CS_ACCESS_DENIED) {
3785                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
3786                 goto out;
3787         }
3788 
3789         /* Is it a referral? */
3790         if (vn_is_nfs_reparse(vp, cs->cr) && client_is_downrev(req)) {
3791 
3792                 is_referral = 1;
3793 
3794         } else {
3795 
3796                 is_referral = 0;
3797 
3798                 if (vp->v_type == VDIR) {
3799                         *cs->statusp = resp->status = NFS4ERR_ISDIR;
3800                         goto out;
3801                 }
3802 
3803                 if (vp->v_type != VLNK) {
3804                         *cs->statusp = resp->status = NFS4ERR_INVAL;
3805                         goto out;
3806                 }
3807 
3808         }
3809 
3810         va.va_mask = AT_MODE;
3811         error = VOP_GETATTR(vp, &va, 0, cs->cr, NULL);
3812         if (error) {
3813                 *cs->statusp = resp->status = puterrno4(error);
3814                 goto out;
3815         }
3816 
3817         if (MANDLOCK(vp, va.va_mode)) {
3818                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
3819                 goto out;
3820         }
3821 
3822         data = kmem_alloc(MAXPATHLEN + 1, KM_SLEEP);
3823 
3824         if (is_referral) {
3825                 char *s;
3826                 size_t strsz;
3827 
3828                 /* Get an artificial symlink based on a referral */
3829                 s = build_symlink(vp, cs->cr, &strsz);
3830                 global_svstat_ptr[4][NFS_REFERLINKS].value.ui64++;
3831                 DTRACE_PROBE2(nfs4serv__func__referral__reflink,
3832                     vnode_t *, vp, char *, s);
3833                 if (s == NULL)
3834                         error = EINVAL;
3835                 else {
3836                         error = 0;
3837                         (void) strlcpy(data, s, MAXPATHLEN + 1);
3838                         kmem_free(s, strsz);
3839                 }
3840 
3841         } else {
3842 
3843                 iov.iov_base = data;
3844                 iov.iov_len = MAXPATHLEN;
3845                 uio.uio_iov = &iov;
3846                 uio.uio_iovcnt = 1;
3847                 uio.uio_segflg = UIO_SYSSPACE;
3848                 uio.uio_extflg = UIO_COPY_CACHED;
3849                 uio.uio_loffset = 0;
3850                 uio.uio_resid = MAXPATHLEN;
3851 
3852                 error = VOP_READLINK(vp, &uio, cs->cr, NULL);
3853 
3854                 if (!error)
3855                         *(data + MAXPATHLEN - uio.uio_resid) = '\0';
3856         }
3857 
3858         if (error) {
3859                 kmem_free((caddr_t)data, (uint_t)MAXPATHLEN + 1);
3860                 *cs->statusp = resp->status = puterrno4(error);
3861                 goto out;
3862         }
3863 
3864         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
3865         name = nfscmd_convname(ca, cs->exi, data, NFSCMD_CONV_OUTBOUND,
3866             MAXPATHLEN  + 1);
3867 
3868         if (name == NULL) {
3869                 /*
3870                  * Even though the conversion failed, we return
3871                  * something. We just don't translate it.
3872                  */
3873                 name = data;
3874         }
3875 
3876         /*
3877          * treat link name as data
3878          */
3879         (void) str_to_utf8(name, (utf8string *)&resp->link);
3880 
3881         if (name != data)
3882                 kmem_free(name, MAXPATHLEN + 1);
3883         kmem_free((caddr_t)data, (uint_t)MAXPATHLEN + 1);
3884         *cs->statusp = resp->status = NFS4_OK;
3885 
3886 out:
3887         DTRACE_NFSV4_2(op__readlink__done, struct compound_state *, cs,
3888             READLINK4res *, resp);
3889 }
3890 
3891 static void
3892 rfs4_op_readlink_free(nfs_resop4 *resop)
3893 {
3894         READLINK4res *resp = &resop->nfs_resop4_u.opreadlink;
3895         utf8string *symlink = (utf8string *)&resp->link;
3896 
3897         if (symlink->utf8string_val) {
3898                 UTF8STRING_FREE(*symlink)
3899         }
3900 }
3901 
3902 /*
3903  * release_lockowner:
3904  *      Release any state associated with the supplied
3905  *      lockowner. Note if any lo_state is holding locks we will not
3906  *      rele that lo_state and thus the lockowner will not be destroyed.
3907  *      A client using lock after the lock owner stateid has been released
3908  *      will suffer the consequence of NFS4ERR_BAD_STATEID and would have
3909  *      to reissue the lock with new_lock_owner set to TRUE.
3910  *      args: lock_owner
3911  *      res:  status
3912  */
3913 /* ARGSUSED */
3914 static void
3915 rfs4_op_release_lockowner(nfs_argop4 *argop, nfs_resop4 *resop,
3916     struct svc_req *req, struct compound_state *cs)
3917 {
3918         RELEASE_LOCKOWNER4args *ap = &argop->nfs_argop4_u.oprelease_lockowner;
3919         RELEASE_LOCKOWNER4res *resp = &resop->nfs_resop4_u.oprelease_lockowner;
3920         rfs4_lockowner_t *lo;
3921         rfs4_openowner_t *oo;
3922         rfs4_state_t *sp;
3923         rfs4_lo_state_t *lsp;
3924         rfs4_client_t *cp;
3925         bool_t create = FALSE;
3926         locklist_t *llist;
3927         sysid_t sysid;
3928 
3929         DTRACE_NFSV4_2(op__release__lockowner__start, struct compound_state *,
3930             cs, RELEASE_LOCKOWNER4args *, ap);
3931 
3932         /* Make sure there is a clientid around for this request */
3933         cp = rfs4_findclient_by_id(ap->lock_owner.clientid, FALSE);
3934 
3935         if (cp == NULL) {
3936                 *cs->statusp = resp->status =
3937                     rfs4_check_clientid(&ap->lock_owner.clientid, 0);
3938                 goto out;
3939         }
3940         rfs4_client_rele(cp);
3941 
3942         lo = rfs4_findlockowner(&ap->lock_owner, &create);
3943         if (lo == NULL) {
3944                 *cs->statusp = resp->status = NFS4_OK;
3945                 goto out;
3946         }
3947         ASSERT(lo->rl_client != NULL);
3948 
3949         /*
3950          * Check for EXPIRED client. If so will reap state with in a lease
3951          * period or on next set_clientid_confirm step
3952          */
3953         if (rfs4_lease_expired(lo->rl_client)) {
3954                 rfs4_lockowner_rele(lo);
3955                 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
3956                 goto out;
3957         }
3958 
3959         /*
3960          * If no sysid has been assigned, then no locks exist; just return.
3961          */
3962         rfs4_dbe_lock(lo->rl_client->rc_dbe);
3963         if (lo->rl_client->rc_sysidt == LM_NOSYSID) {
3964                 rfs4_lockowner_rele(lo);
3965                 rfs4_dbe_unlock(lo->rl_client->rc_dbe);
3966                 goto out;
3967         }
3968 
3969         sysid = lo->rl_client->rc_sysidt;
3970         rfs4_dbe_unlock(lo->rl_client->rc_dbe);
3971 
3972         /*
3973          * Mark the lockowner invalid.
3974          */
3975         rfs4_dbe_hide(lo->rl_dbe);
3976 
3977         /*
3978          * sysid-pid pair should now not be used since the lockowner is
3979          * invalid. If the client were to instantiate the lockowner again
3980          * it would be assigned a new pid. Thus we can get the list of
3981          * current locks.
3982          */
3983 
3984         llist = flk_get_active_locks(sysid, lo->rl_pid);
3985         /* If we are still holding locks fail */
3986         if (llist != NULL) {
3987 
3988                 *cs->statusp = resp->status = NFS4ERR_LOCKS_HELD;
3989 
3990                 flk_free_locklist(llist);
3991                 /*
3992                  * We need to unhide the lockowner so the client can
3993                  * try it again. The bad thing here is if the client
3994                  * has a logic error that took it here in the first place
3995                  * he probably has lost accounting of the locks that it
3996                  * is holding. So we may have dangling state until the
3997                  * open owner state is reaped via close. One scenario
3998                  * that could possibly occur is that the client has
3999                  * sent the unlock request(s) in separate threads
4000                  * and has not waited for the replies before sending the
4001                  * RELEASE_LOCKOWNER request. Presumably, it would expect
4002                  * and deal appropriately with NFS4ERR_LOCKS_HELD, by
4003                  * reissuing the request.
4004                  */
4005                 rfs4_dbe_unhide(lo->rl_dbe);
4006                 rfs4_lockowner_rele(lo);
4007                 goto out;
4008         }
4009 
4010         /*
4011          * For the corresponding client we need to check each open
4012          * owner for any opens that have lockowner state associated
4013          * with this lockowner.
4014          */
4015 
4016         rfs4_dbe_lock(lo->rl_client->rc_dbe);
4017         for (oo = list_head(&lo->rl_client->rc_openownerlist); oo != NULL;
4018             oo = list_next(&lo->rl_client->rc_openownerlist, oo)) {
4019 
4020                 rfs4_dbe_lock(oo->ro_dbe);
4021                 for (sp = list_head(&oo->ro_statelist); sp != NULL;
4022                     sp = list_next(&oo->ro_statelist, sp)) {
4023 
4024                         rfs4_dbe_lock(sp->rs_dbe);
4025                         for (lsp = list_head(&sp->rs_lostatelist);
4026                             lsp != NULL;
4027                             lsp = list_next(&sp->rs_lostatelist, lsp)) {
4028                                 if (lsp->rls_locker == lo) {
4029                                         rfs4_dbe_lock(lsp->rls_dbe);
4030                                         rfs4_dbe_invalidate(lsp->rls_dbe);
4031                                         rfs4_dbe_unlock(lsp->rls_dbe);
4032                                 }
4033                         }
4034                         rfs4_dbe_unlock(sp->rs_dbe);
4035                 }
4036                 rfs4_dbe_unlock(oo->ro_dbe);
4037         }
4038         rfs4_dbe_unlock(lo->rl_client->rc_dbe);
4039 
4040         rfs4_lockowner_rele(lo);
4041 
4042         *cs->statusp = resp->status = NFS4_OK;
4043 
4044 out:
4045         DTRACE_NFSV4_2(op__release__lockowner__done, struct compound_state *,
4046             cs, RELEASE_LOCKOWNER4res *, resp);
4047 }
4048 
4049 /*
4050  * short utility function to lookup a file and recall the delegation
4051  */
4052 static rfs4_file_t *
4053 rfs4_lookup_and_findfile(vnode_t *dvp, char *nm, vnode_t **vpp,
4054     int *lkup_error, cred_t *cr)
4055 {
4056         vnode_t *vp;
4057         rfs4_file_t *fp = NULL;
4058         bool_t fcreate = FALSE;
4059         int error;
4060 
4061         if (vpp)
4062                 *vpp = NULL;
4063 
4064         if ((error = VOP_LOOKUP(dvp, nm, &vp, NULL, 0, NULL, cr, NULL, NULL,
4065             NULL)) == 0) {
4066                 if (vp->v_type == VREG)
4067                         fp = rfs4_findfile(vp, NULL, &fcreate);
4068                 if (vpp)
4069                         *vpp = vp;
4070                 else
4071                         VN_RELE(vp);
4072         }
4073 
4074         if (lkup_error)
4075                 *lkup_error = error;
4076 
4077         return (fp);
4078 }
4079 
4080 /*
4081  * remove: args: CURRENT_FH: directory; name.
4082  *      res: status. If success - CURRENT_FH unchanged, return change_info
4083  *              for directory.
4084  */
4085 /* ARGSUSED */
4086 static void
4087 rfs4_op_remove(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
4088     struct compound_state *cs)
4089 {
4090         REMOVE4args *args = &argop->nfs_argop4_u.opremove;
4091         REMOVE4res *resp = &resop->nfs_resop4_u.opremove;
4092         int error;
4093         vnode_t *dvp, *vp;
4094         struct vattr bdva, idva, adva;
4095         char *nm;
4096         uint_t len;
4097         rfs4_file_t *fp;
4098         int in_crit = 0;
4099         bslabel_t *clabel;
4100         struct sockaddr *ca;
4101         char *name = NULL;
4102         nfsstat4 status;
4103 
4104         DTRACE_NFSV4_2(op__remove__start, struct compound_state *, cs,
4105             REMOVE4args *, args);
4106 
4107         /* CURRENT_FH: directory */
4108         dvp = cs->vp;
4109         if (dvp == NULL) {
4110                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
4111                 goto out;
4112         }
4113 
4114         if (cs->access == CS_ACCESS_DENIED) {
4115                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
4116                 goto out;
4117         }
4118 
4119         /*
4120          * If there is an unshared filesystem mounted on this vnode,
4121          * Do not allow to remove anything in this directory.
4122          */
4123         if (vn_ismntpt(dvp)) {
4124                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
4125                 goto out;
4126         }
4127 
4128         if (dvp->v_type != VDIR) {
4129                 *cs->statusp = resp->status = NFS4ERR_NOTDIR;
4130                 goto out;
4131         }
4132 
4133         status = utf8_dir_verify(&args->target);
4134         if (status != NFS4_OK) {
4135                 *cs->statusp = resp->status = status;
4136                 goto out;
4137         }
4138 
4139         /*
4140          * Lookup the file so that we can check if it's a directory
4141          */
4142         nm = utf8_to_fn(&args->target, &len, NULL);
4143         if (nm == NULL) {
4144                 *cs->statusp = resp->status = NFS4ERR_INVAL;
4145                 goto out;
4146         }
4147 
4148         if (len > MAXNAMELEN) {
4149                 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
4150                 kmem_free(nm, len);
4151                 goto out;
4152         }
4153 
4154         if (rdonly4(req, cs)) {
4155                 *cs->statusp = resp->status = NFS4ERR_ROFS;
4156                 kmem_free(nm, len);
4157                 goto out;
4158         }
4159 
4160         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
4161         name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
4162             MAXPATHLEN  + 1);
4163 
4164         if (name == NULL) {
4165                 *cs->statusp = resp->status = NFS4ERR_INVAL;
4166                 kmem_free(nm, len);
4167                 goto out;
4168         }
4169 
4170         /*
4171          * Lookup the file to determine type and while we are see if
4172          * there is a file struct around and check for delegation.
4173          * We don't need to acquire va_seq before this lookup, if
4174          * it causes an update, cinfo.before will not match, which will
4175          * trigger a cache flush even if atomic is TRUE.
4176          */
4177         if (fp = rfs4_lookup_and_findfile(dvp, name, &vp, &error, cs->cr)) {
4178                 if (rfs4_check_delegated_byfp(FWRITE, fp, TRUE, TRUE, TRUE,
4179                     NULL)) {
4180                         VN_RELE(vp);
4181                         rfs4_file_rele(fp);
4182                         *cs->statusp = resp->status = NFS4ERR_DELAY;
4183                         if (nm != name)
4184                                 kmem_free(name, MAXPATHLEN + 1);
4185                         kmem_free(nm, len);
4186                         goto out;
4187                 }
4188         }
4189 
4190         /* Didn't find anything to remove */
4191         if (vp == NULL) {
4192                 *cs->statusp = resp->status = error;
4193                 if (nm != name)
4194                         kmem_free(name, MAXPATHLEN + 1);
4195                 kmem_free(nm, len);
4196                 goto out;
4197         }
4198 
4199         if (nbl_need_check(vp)) {
4200                 nbl_start_crit(vp, RW_READER);
4201                 in_crit = 1;
4202                 if (nbl_conflict(vp, NBL_REMOVE, 0, 0, 0, NULL)) {
4203                         *cs->statusp = resp->status = NFS4ERR_FILE_OPEN;
4204                         if (nm != name)
4205                                 kmem_free(name, MAXPATHLEN + 1);
4206                         kmem_free(nm, len);
4207                         nbl_end_crit(vp);
4208                         VN_RELE(vp);
4209                         if (fp) {
4210                                 rfs4_clear_dont_grant(fp);
4211                                 rfs4_file_rele(fp);
4212                         }
4213                         goto out;
4214                 }
4215         }
4216 
4217         /* check label before allowing removal */
4218         if (is_system_labeled()) {
4219                 ASSERT(req->rq_label != NULL);
4220                 clabel = req->rq_label;
4221                 DTRACE_PROBE2(tx__rfs4__log__info__opremove__clabel, char *,
4222                     "got client label from request(1)",
4223                     struct svc_req *, req);
4224                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
4225                         if (!do_rfs_label_check(clabel, vp, EQUALITY_CHECK,
4226                             cs->exi)) {
4227                                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
4228                                 if (name != nm)
4229                                         kmem_free(name, MAXPATHLEN + 1);
4230                                 kmem_free(nm, len);
4231                                 if (in_crit)
4232                                         nbl_end_crit(vp);
4233                                 VN_RELE(vp);
4234                                 if (fp) {
4235                                         rfs4_clear_dont_grant(fp);
4236                                         rfs4_file_rele(fp);
4237                                 }
4238                                 goto out;
4239                         }
4240                 }
4241         }
4242 
4243         /* Get dir "before" change value */
4244         bdva.va_mask = AT_CTIME|AT_SEQ;
4245         error = VOP_GETATTR(dvp, &bdva, 0, cs->cr, NULL);
4246         if (error) {
4247                 *cs->statusp = resp->status = puterrno4(error);
4248                 if (nm != name)
4249                         kmem_free(name, MAXPATHLEN + 1);
4250                 kmem_free(nm, len);
4251                 if (in_crit)
4252                         nbl_end_crit(vp);
4253                 VN_RELE(vp);
4254                 if (fp) {
4255                         rfs4_clear_dont_grant(fp);
4256                         rfs4_file_rele(fp);
4257                 }
4258                 goto out;
4259         }
4260         NFS4_SET_FATTR4_CHANGE(resp->cinfo.before, bdva.va_ctime)
4261 
4262         /* Actually do the REMOVE operation */
4263         if (vp->v_type == VDIR) {
4264                 /*
4265                  * Can't remove a directory that has a mounted-on filesystem.
4266                  */
4267                 if (vn_ismntpt(vp)) {
4268                         error = EACCES;
4269                 } else {
4270                         /*
4271                          * System V defines rmdir to return EEXIST,
4272                          * not ENOTEMPTY, if the directory is not
4273                          * empty.  A System V NFS server needs to map
4274                          * NFS4ERR_EXIST to NFS4ERR_NOTEMPTY to
4275                          * transmit over the wire.
4276                          */
4277                         if ((error = VOP_RMDIR(dvp, name, rootdir, cs->cr,
4278                             NULL, 0)) == EEXIST)
4279                                 error = ENOTEMPTY;
4280                 }
4281         } else {
4282                 if ((error = VOP_REMOVE(dvp, name, cs->cr, NULL, 0)) == 0 &&
4283                     fp != NULL) {
4284                         struct vattr va;
4285                         vnode_t *tvp;
4286 
4287                         rfs4_dbe_lock(fp->rf_dbe);
4288                         tvp = fp->rf_vp;
4289                         if (tvp)
4290                                 VN_HOLD(tvp);
4291                         rfs4_dbe_unlock(fp->rf_dbe);
4292 
4293                         if (tvp) {
4294                                 /*
4295                                  * This is va_seq safe because we are not
4296                                  * manipulating dvp.
4297                                  */
4298                                 va.va_mask = AT_NLINK;
4299                                 if (!VOP_GETATTR(tvp, &va, 0, cs->cr, NULL) &&
4300                                     va.va_nlink == 0) {
4301                                         /* Remove state on file remove */
4302                                         if (in_crit) {
4303                                                 nbl_end_crit(vp);
4304                                                 in_crit = 0;
4305                                         }
4306                                         rfs4_close_all_state(fp);
4307                                 }
4308                                 VN_RELE(tvp);
4309                         }
4310                 }
4311         }
4312 
4313         if (in_crit)
4314                 nbl_end_crit(vp);
4315         VN_RELE(vp);
4316 
4317         if (fp) {
4318                 rfs4_clear_dont_grant(fp);
4319                 rfs4_file_rele(fp);
4320         }
4321         if (nm != name)
4322                 kmem_free(name, MAXPATHLEN + 1);
4323         kmem_free(nm, len);
4324 
4325         if (error) {
4326                 *cs->statusp = resp->status = puterrno4(error);
4327                 goto out;
4328         }
4329 
4330         /*
4331          * Get the initial "after" sequence number, if it fails, set to zero
4332          */
4333         idva.va_mask = AT_SEQ;
4334         if (VOP_GETATTR(dvp, &idva, 0, cs->cr, NULL))
4335                 idva.va_seq = 0;
4336 
4337         /*
4338          * Force modified data and metadata out to stable storage.
4339          */
4340         (void) VOP_FSYNC(dvp, 0, cs->cr, NULL);
4341 
4342         /*
4343          * Get "after" change value, if it fails, simply return the
4344          * before value.
4345          */
4346         adva.va_mask = AT_CTIME|AT_SEQ;
4347         if (VOP_GETATTR(dvp, &adva, 0, cs->cr, NULL)) {
4348                 adva.va_ctime = bdva.va_ctime;
4349                 adva.va_seq = 0;
4350         }
4351 
4352         NFS4_SET_FATTR4_CHANGE(resp->cinfo.after, adva.va_ctime)
4353 
4354         /*
4355          * The cinfo.atomic = TRUE only if we have
4356          * non-zero va_seq's, and it has incremented by exactly one
4357          * during the VOP_REMOVE/RMDIR and it didn't change during
4358          * the VOP_FSYNC.
4359          */
4360         if (bdva.va_seq && idva.va_seq && adva.va_seq &&
4361             idva.va_seq == (bdva.va_seq + 1) && idva.va_seq == adva.va_seq)
4362                 resp->cinfo.atomic = TRUE;
4363         else
4364                 resp->cinfo.atomic = FALSE;
4365 
4366         *cs->statusp = resp->status = NFS4_OK;
4367 
4368 out:
4369         DTRACE_NFSV4_2(op__remove__done, struct compound_state *, cs,
4370             REMOVE4res *, resp);
4371 }
4372 
4373 /*
4374  * rename: args: SAVED_FH: from directory, CURRENT_FH: target directory,
4375  *              oldname and newname.
4376  *      res: status. If success - CURRENT_FH unchanged, return change_info
4377  *              for both from and target directories.
4378  */
4379 /* ARGSUSED */
4380 static void
4381 rfs4_op_rename(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
4382     struct compound_state *cs)
4383 {
4384         RENAME4args *args = &argop->nfs_argop4_u.oprename;
4385         RENAME4res *resp = &resop->nfs_resop4_u.oprename;
4386         int error;
4387         vnode_t *odvp;
4388         vnode_t *ndvp;
4389         vnode_t *srcvp, *targvp;
4390         struct vattr obdva, oidva, oadva;
4391         struct vattr nbdva, nidva, nadva;
4392         char *onm, *nnm;
4393         uint_t olen, nlen;
4394         rfs4_file_t *fp, *sfp;
4395         int in_crit_src, in_crit_targ;
4396         int fp_rele_grant_hold, sfp_rele_grant_hold;
4397         bslabel_t *clabel;
4398         struct sockaddr *ca;
4399         char *converted_onm = NULL;
4400         char *converted_nnm = NULL;
4401         nfsstat4 status;
4402 
4403         DTRACE_NFSV4_2(op__rename__start, struct compound_state *, cs,
4404             RENAME4args *, args);
4405 
4406         fp = sfp = NULL;
4407         srcvp = targvp = NULL;
4408         in_crit_src = in_crit_targ = 0;
4409         fp_rele_grant_hold = sfp_rele_grant_hold = 0;
4410 
4411         /* CURRENT_FH: target directory */
4412         ndvp = cs->vp;
4413         if (ndvp == NULL) {
4414                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
4415                 goto out;
4416         }
4417 
4418         /* SAVED_FH: from directory */
4419         odvp = cs->saved_vp;
4420         if (odvp == NULL) {
4421                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
4422                 goto out;
4423         }
4424 
4425         if (cs->access == CS_ACCESS_DENIED) {
4426                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
4427                 goto out;
4428         }
4429 
4430         /*
4431          * If there is an unshared filesystem mounted on this vnode,
4432          * do not allow to rename objects in this directory.
4433          */
4434         if (vn_ismntpt(odvp)) {
4435                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
4436                 goto out;
4437         }
4438 
4439         /*
4440          * If there is an unshared filesystem mounted on this vnode,
4441          * do not allow to rename to this directory.
4442          */
4443         if (vn_ismntpt(ndvp)) {
4444                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
4445                 goto out;
4446         }
4447 
4448         if (odvp->v_type != VDIR || ndvp->v_type != VDIR) {
4449                 *cs->statusp = resp->status = NFS4ERR_NOTDIR;
4450                 goto out;
4451         }
4452 
4453         if (cs->saved_exi != cs->exi) {
4454                 *cs->statusp = resp->status = NFS4ERR_XDEV;
4455                 goto out;
4456         }
4457 
4458         status = utf8_dir_verify(&args->oldname);
4459         if (status != NFS4_OK) {
4460                 *cs->statusp = resp->status = status;
4461                 goto out;
4462         }
4463 
4464         status = utf8_dir_verify(&args->newname);
4465         if (status != NFS4_OK) {
4466                 *cs->statusp = resp->status = status;
4467                 goto out;
4468         }
4469 
4470         onm = utf8_to_fn(&args->oldname, &olen, NULL);
4471         if (onm == NULL) {
4472                 *cs->statusp = resp->status = NFS4ERR_INVAL;
4473                 goto out;
4474         }
4475         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
4476         nlen = MAXPATHLEN + 1;
4477         converted_onm = nfscmd_convname(ca, cs->exi, onm, NFSCMD_CONV_INBOUND,
4478             nlen);
4479 
4480         if (converted_onm == NULL) {
4481                 *cs->statusp = resp->status = NFS4ERR_INVAL;
4482                 kmem_free(onm, olen);
4483                 goto out;
4484         }
4485 
4486         nnm = utf8_to_fn(&args->newname, &nlen, NULL);
4487         if (nnm == NULL) {
4488                 *cs->statusp = resp->status = NFS4ERR_INVAL;
4489                 if (onm != converted_onm)
4490                         kmem_free(converted_onm, MAXPATHLEN + 1);
4491                 kmem_free(onm, olen);
4492                 goto out;
4493         }
4494         converted_nnm = nfscmd_convname(ca, cs->exi, nnm, NFSCMD_CONV_INBOUND,
4495             MAXPATHLEN  + 1);
4496 
4497         if (converted_nnm == NULL) {
4498                 *cs->statusp = resp->status = NFS4ERR_INVAL;
4499                 kmem_free(nnm, nlen);
4500                 nnm = NULL;
4501                 if (onm != converted_onm)
4502                         kmem_free(converted_onm, MAXPATHLEN + 1);
4503                 kmem_free(onm, olen);
4504                 goto out;
4505         }
4506 
4507 
4508         if (olen > MAXNAMELEN || nlen > MAXNAMELEN) {
4509                 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
4510                 kmem_free(onm, olen);
4511                 kmem_free(nnm, nlen);
4512                 goto out;
4513         }
4514 
4515 
4516         if (rdonly4(req, cs)) {
4517                 *cs->statusp = resp->status = NFS4ERR_ROFS;
4518                 if (onm != converted_onm)
4519                         kmem_free(converted_onm, MAXPATHLEN + 1);
4520                 kmem_free(onm, olen);
4521                 if (nnm != converted_nnm)
4522                         kmem_free(converted_nnm, MAXPATHLEN + 1);
4523                 kmem_free(nnm, nlen);
4524                 goto out;
4525         }
4526 
4527         /* check label of the target dir */
4528         if (is_system_labeled()) {
4529                 ASSERT(req->rq_label != NULL);
4530                 clabel = req->rq_label;
4531                 DTRACE_PROBE2(tx__rfs4__log__info__oprename__clabel, char *,
4532                     "got client label from request(1)",
4533                     struct svc_req *, req);
4534                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
4535                         if (!do_rfs_label_check(clabel, ndvp,
4536                             EQUALITY_CHECK, cs->exi)) {
4537                                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
4538                                 goto err_out;
4539                         }
4540                 }
4541         }
4542 
4543         /*
4544          * Is the source a file and have a delegation?
4545          * We don't need to acquire va_seq before these lookups, if
4546          * it causes an update, cinfo.before will not match, which will
4547          * trigger a cache flush even if atomic is TRUE.
4548          */
4549         if (sfp = rfs4_lookup_and_findfile(odvp, converted_onm, &srcvp,
4550             &error, cs->cr)) {
4551                 if (rfs4_check_delegated_byfp(FWRITE, sfp, TRUE, TRUE, TRUE,
4552                     NULL)) {
4553                         *cs->statusp = resp->status = NFS4ERR_DELAY;
4554                         goto err_out;
4555                 }
4556         }
4557 
4558         if (srcvp == NULL) {
4559                 *cs->statusp = resp->status = puterrno4(error);
4560                 if (onm != converted_onm)
4561                         kmem_free(converted_onm, MAXPATHLEN + 1);
4562                 kmem_free(onm, olen);
4563                 if (nnm != converted_nnm)
4564                         kmem_free(converted_nnm, MAXPATHLEN + 1);
4565                 kmem_free(nnm, nlen);
4566                 goto out;
4567         }
4568 
4569         sfp_rele_grant_hold = 1;
4570 
4571         /* Does the destination exist and a file and have a delegation? */
4572         if (fp = rfs4_lookup_and_findfile(ndvp, converted_nnm, &targvp,
4573             NULL, cs->cr)) {
4574                 if (rfs4_check_delegated_byfp(FWRITE, fp, TRUE, TRUE, TRUE,
4575                     NULL)) {
4576                         *cs->statusp = resp->status = NFS4ERR_DELAY;
4577                         goto err_out;
4578                 }
4579         }
4580         fp_rele_grant_hold = 1;
4581 
4582 
4583         /* Check for NBMAND lock on both source and target */
4584         if (nbl_need_check(srcvp)) {
4585                 nbl_start_crit(srcvp, RW_READER);
4586                 in_crit_src = 1;
4587                 if (nbl_conflict(srcvp, NBL_RENAME, 0, 0, 0, NULL)) {
4588                         *cs->statusp = resp->status = NFS4ERR_FILE_OPEN;
4589                         goto err_out;
4590                 }
4591         }
4592 
4593         if (targvp && nbl_need_check(targvp)) {
4594                 nbl_start_crit(targvp, RW_READER);
4595                 in_crit_targ = 1;
4596                 if (nbl_conflict(targvp, NBL_REMOVE, 0, 0, 0, NULL)) {
4597                         *cs->statusp = resp->status = NFS4ERR_FILE_OPEN;
4598                         goto err_out;
4599                 }
4600         }
4601 
4602         /* Get source "before" change value */
4603         obdva.va_mask = AT_CTIME|AT_SEQ;
4604         error = VOP_GETATTR(odvp, &obdva, 0, cs->cr, NULL);
4605         if (!error) {
4606                 nbdva.va_mask = AT_CTIME|AT_SEQ;
4607                 error = VOP_GETATTR(ndvp, &nbdva, 0, cs->cr, NULL);
4608         }
4609         if (error) {
4610                 *cs->statusp = resp->status = puterrno4(error);
4611                 goto err_out;
4612         }
4613 
4614         NFS4_SET_FATTR4_CHANGE(resp->source_cinfo.before, obdva.va_ctime)
4615         NFS4_SET_FATTR4_CHANGE(resp->target_cinfo.before, nbdva.va_ctime)
4616 
4617         if ((error = VOP_RENAME(odvp, converted_onm, ndvp, converted_nnm,
4618             cs->cr, NULL, 0)) == 0 && fp != NULL) {
4619                 struct vattr va;
4620                 vnode_t *tvp;
4621 
4622                 rfs4_dbe_lock(fp->rf_dbe);
4623                 tvp = fp->rf_vp;
4624                 if (tvp)
4625                         VN_HOLD(tvp);
4626                 rfs4_dbe_unlock(fp->rf_dbe);
4627 
4628                 if (tvp) {
4629                         va.va_mask = AT_NLINK;
4630                         if (!VOP_GETATTR(tvp, &va, 0, cs->cr, NULL) &&
4631                             va.va_nlink == 0) {
4632                                 /* The file is gone and so should the state */
4633                                 if (in_crit_targ) {
4634                                         nbl_end_crit(targvp);
4635                                         in_crit_targ = 0;
4636                                 }
4637                                 rfs4_close_all_state(fp);
4638                         }
4639                         VN_RELE(tvp);
4640                 }
4641         }
4642         if (error == 0)
4643                 vn_renamepath(ndvp, srcvp, nnm, nlen - 1);
4644 
4645         if (in_crit_src)
4646                 nbl_end_crit(srcvp);
4647         if (srcvp)
4648                 VN_RELE(srcvp);
4649         if (in_crit_targ)
4650                 nbl_end_crit(targvp);
4651         if (targvp)
4652                 VN_RELE(targvp);
4653 
4654         if (sfp) {
4655                 rfs4_clear_dont_grant(sfp);
4656                 rfs4_file_rele(sfp);
4657         }
4658         if (fp) {
4659                 rfs4_clear_dont_grant(fp);
4660                 rfs4_file_rele(fp);
4661         }
4662 
4663         if (converted_onm != onm)
4664                 kmem_free(converted_onm, MAXPATHLEN + 1);
4665         kmem_free(onm, olen);
4666         if (converted_nnm != nnm)
4667                 kmem_free(converted_nnm, MAXPATHLEN + 1);
4668         kmem_free(nnm, nlen);
4669 
4670         /*
4671          * Get the initial "after" sequence number, if it fails, set to zero
4672          */
4673         oidva.va_mask = AT_SEQ;
4674         if (VOP_GETATTR(odvp, &oidva, 0, cs->cr, NULL))
4675                 oidva.va_seq = 0;
4676 
4677         nidva.va_mask = AT_SEQ;
4678         if (VOP_GETATTR(ndvp, &nidva, 0, cs->cr, NULL))
4679                 nidva.va_seq = 0;
4680 
4681         /*
4682          * Force modified data and metadata out to stable storage.
4683          */
4684         (void) VOP_FSYNC(odvp, 0, cs->cr, NULL);
4685         (void) VOP_FSYNC(ndvp, 0, cs->cr, NULL);
4686 
4687         if (error) {
4688                 *cs->statusp = resp->status = puterrno4(error);
4689                 goto out;
4690         }
4691 
4692         /*
4693          * Get "after" change values, if it fails, simply return the
4694          * before value.
4695          */
4696         oadva.va_mask = AT_CTIME|AT_SEQ;
4697         if (VOP_GETATTR(odvp, &oadva, 0, cs->cr, NULL)) {
4698                 oadva.va_ctime = obdva.va_ctime;
4699                 oadva.va_seq = 0;
4700         }
4701 
4702         nadva.va_mask = AT_CTIME|AT_SEQ;
4703         if (VOP_GETATTR(odvp, &nadva, 0, cs->cr, NULL)) {
4704                 nadva.va_ctime = nbdva.va_ctime;
4705                 nadva.va_seq = 0;
4706         }
4707 
4708         NFS4_SET_FATTR4_CHANGE(resp->source_cinfo.after, oadva.va_ctime)
4709         NFS4_SET_FATTR4_CHANGE(resp->target_cinfo.after, nadva.va_ctime)
4710 
4711         /*
4712          * The cinfo.atomic = TRUE only if we have
4713          * non-zero va_seq's, and it has incremented by exactly one
4714          * during the VOP_RENAME and it didn't change during the VOP_FSYNC.
4715          */
4716         if (obdva.va_seq && oidva.va_seq && oadva.va_seq &&
4717             oidva.va_seq == (obdva.va_seq + 1) && oidva.va_seq == oadva.va_seq)
4718                 resp->source_cinfo.atomic = TRUE;
4719         else
4720                 resp->source_cinfo.atomic = FALSE;
4721 
4722         if (nbdva.va_seq && nidva.va_seq && nadva.va_seq &&
4723             nidva.va_seq == (nbdva.va_seq + 1) && nidva.va_seq == nadva.va_seq)
4724                 resp->target_cinfo.atomic = TRUE;
4725         else
4726                 resp->target_cinfo.atomic = FALSE;
4727 
4728 #ifdef  VOLATILE_FH_TEST
4729         {
4730         extern void add_volrnm_fh(struct exportinfo *, vnode_t *);
4731 
4732         /*
4733          * Add the renamed file handle to the volatile rename list
4734          */
4735         if (cs->exi->exi_export.ex_flags & EX_VOLRNM) {
4736                 /* file handles may expire on rename */
4737                 vnode_t *vp;
4738 
4739                 nnm = utf8_to_fn(&args->newname, &nlen, NULL);
4740                 /*
4741                  * Already know that nnm will be a valid string
4742                  */
4743                 error = VOP_LOOKUP(ndvp, nnm, &vp, NULL, 0, NULL, cs->cr,
4744                     NULL, NULL, NULL);
4745                 kmem_free(nnm, nlen);
4746                 if (!error) {
4747                         add_volrnm_fh(cs->exi, vp);
4748                         VN_RELE(vp);
4749                 }
4750         }
4751         }
4752 #endif  /* VOLATILE_FH_TEST */
4753 
4754         *cs->statusp = resp->status = NFS4_OK;
4755 out:
4756         DTRACE_NFSV4_2(op__rename__done, struct compound_state *, cs,
4757             RENAME4res *, resp);
4758         return;
4759 
4760 err_out:
4761         if (onm != converted_onm)
4762                 kmem_free(converted_onm, MAXPATHLEN + 1);
4763         if (onm != NULL)
4764                 kmem_free(onm, olen);
4765         if (nnm != converted_nnm)
4766                 kmem_free(converted_nnm, MAXPATHLEN + 1);
4767         if (nnm != NULL)
4768                 kmem_free(nnm, nlen);
4769 
4770         if (in_crit_src) nbl_end_crit(srcvp);
4771         if (in_crit_targ) nbl_end_crit(targvp);
4772         if (targvp) VN_RELE(targvp);
4773         if (srcvp) VN_RELE(srcvp);
4774         if (sfp) {
4775                 if (sfp_rele_grant_hold) rfs4_clear_dont_grant(sfp);
4776                 rfs4_file_rele(sfp);
4777         }
4778         if (fp) {
4779                 if (fp_rele_grant_hold) rfs4_clear_dont_grant(fp);
4780                 rfs4_file_rele(fp);
4781         }
4782 
4783         DTRACE_NFSV4_2(op__rename__done, struct compound_state *, cs,
4784             RENAME4res *, resp);
4785 }
4786 
4787 /* ARGSUSED */
4788 static void
4789 rfs4_op_renew(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
4790     struct compound_state *cs)
4791 {
4792         RENEW4args *args = &argop->nfs_argop4_u.oprenew;
4793         RENEW4res *resp = &resop->nfs_resop4_u.oprenew;
4794         rfs4_client_t *cp;
4795 
4796         DTRACE_NFSV4_2(op__renew__start, struct compound_state *, cs,
4797             RENEW4args *, args);
4798 
4799         if ((cp = rfs4_findclient_by_id(args->clientid, FALSE)) == NULL) {
4800                 *cs->statusp = resp->status =
4801                     rfs4_check_clientid(&args->clientid, 0);
4802                 goto out;
4803         }
4804 
4805         if (rfs4_lease_expired(cp)) {
4806                 rfs4_client_rele(cp);
4807                 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
4808                 goto out;
4809         }
4810 
4811         rfs4_update_lease(cp);
4812 
4813         mutex_enter(cp->rc_cbinfo.cb_lock);
4814         if (cp->rc_cbinfo.cb_notified_of_cb_path_down == FALSE) {
4815                 cp->rc_cbinfo.cb_notified_of_cb_path_down = TRUE;
4816                 *cs->statusp = resp->status = NFS4ERR_CB_PATH_DOWN;
4817         } else {
4818                 *cs->statusp = resp->status = NFS4_OK;
4819         }
4820         mutex_exit(cp->rc_cbinfo.cb_lock);
4821 
4822         rfs4_client_rele(cp);
4823 
4824 out:
4825         DTRACE_NFSV4_2(op__renew__done, struct compound_state *, cs,
4826             RENEW4res *, resp);
4827 }
4828 
4829 /* ARGSUSED */
4830 static void
4831 rfs4_op_restorefh(nfs_argop4 *args, nfs_resop4 *resop, struct svc_req *req,
4832     struct compound_state *cs)
4833 {
4834         RESTOREFH4res *resp = &resop->nfs_resop4_u.oprestorefh;
4835 
4836         DTRACE_NFSV4_1(op__restorefh__start, struct compound_state *, cs);
4837 
4838         /* No need to check cs->access - we are not accessing any object */
4839         if ((cs->saved_vp == NULL) || (cs->saved_fh.nfs_fh4_val == NULL)) {
4840                 *cs->statusp = resp->status = NFS4ERR_RESTOREFH;
4841                 goto out;
4842         }
4843         if (cs->vp != NULL) {
4844                 VN_RELE(cs->vp);
4845         }
4846         cs->vp = cs->saved_vp;
4847         cs->saved_vp = NULL;
4848         if (cs->exi)
4849                 exi_rele(cs->exi);
4850         cs->exi = cs->saved_exi;
4851         if (cs->exi)
4852                 exi_hold(cs->exi);
4853         nfs_fh4_copy(&cs->saved_fh, &cs->fh);
4854         *cs->statusp = resp->status = NFS4_OK;
4855         cs->deleg = FALSE;
4856 
4857 out:
4858         DTRACE_NFSV4_2(op__restorefh__done, struct compound_state *, cs,
4859             RESTOREFH4res *, resp);
4860 }
4861 
4862 /* ARGSUSED */
4863 static void
4864 rfs4_op_savefh(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
4865     struct compound_state *cs)
4866 {
4867         SAVEFH4res *resp = &resop->nfs_resop4_u.opsavefh;
4868 
4869         DTRACE_NFSV4_1(op__savefh__start, struct compound_state *, cs);
4870 
4871         /* No need to check cs->access - we are not accessing any object */
4872         if (cs->vp == NULL) {
4873                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
4874                 goto out;
4875         }
4876         if (cs->saved_vp != NULL) {
4877                 VN_RELE(cs->saved_vp);
4878         }
4879         cs->saved_vp = cs->vp;
4880         VN_HOLD(cs->saved_vp);
4881         if (cs->saved_exi)
4882                 exi_rele(cs->saved_exi);
4883         cs->saved_exi = cs->exi;
4884         if (cs->saved_exi)
4885                 exi_hold(cs->saved_exi);
4886         /*
4887          * since SAVEFH is fairly rare, don't alloc space for its fh
4888          * unless necessary.
4889          */
4890         if (cs->saved_fh.nfs_fh4_val == NULL) {
4891                 cs->saved_fh.nfs_fh4_val = kmem_alloc(NFS4_FHSIZE, KM_SLEEP);
4892         }
4893         nfs_fh4_copy(&cs->fh, &cs->saved_fh);
4894         *cs->statusp = resp->status = NFS4_OK;
4895 
4896 out:
4897         DTRACE_NFSV4_2(op__savefh__done, struct compound_state *, cs,
4898             SAVEFH4res *, resp);
4899 }
4900 
4901 /*
4902  * rfs4_verify_attr is called when nfsv4 Setattr failed, but we wish to
4903  * return the bitmap of attrs that were set successfully. It is also
4904  * called by Verify/Nverify to test the vattr/vfsstat attrs. It should
4905  * always be called only after rfs4_do_set_attrs().
4906  *
4907  * Verify that the attributes are same as the expected ones. sargp->vap
4908  * and sargp->sbp contain the input attributes as translated from fattr4.
4909  *
4910  * This function verifies only the attrs that correspond to a vattr or
4911  * vfsstat struct. That is because of the extra step needed to get the
4912  * corresponding system structs. Other attributes have already been set or
4913  * verified by do_rfs4_set_attrs.
4914  *
4915  * Return 0 if all attrs match, -1 if some don't, error if error processing.
4916  */
4917 static int
4918 rfs4_verify_attr(struct nfs4_svgetit_arg *sargp,
4919     bitmap4 *resp, struct nfs4_ntov_table *ntovp)
4920 {
4921         int error, ret_error = 0;
4922         int i, k;
4923         uint_t sva_mask = sargp->vap->va_mask;
4924         uint_t vbit;
4925         union nfs4_attr_u *na;
4926         uint8_t *amap;
4927         bool_t getsb = ntovp->vfsstat;
4928 
4929         if (sva_mask != 0) {
4930                 /*
4931                  * Okay to overwrite sargp->vap because we verify based
4932                  * on the incoming values.
4933                  */
4934                 ret_error = VOP_GETATTR(sargp->cs->vp, sargp->vap, 0,
4935                     sargp->cs->cr, NULL);
4936                 if (ret_error) {
4937                         if (resp == NULL)
4938                                 return (ret_error);
4939                         /*
4940                          * Must return bitmap of successful attrs
4941                          */
4942                         sva_mask = 0;   /* to prevent checking vap later */
4943                 } else {
4944                         /*
4945                          * Some file systems clobber va_mask. it is probably
4946                          * wrong of them to do so, nonethless we practice
4947                          * defensive coding.
4948                          * See bug id 4276830.
4949                          */
4950                         sargp->vap->va_mask = sva_mask;
4951                 }
4952         }
4953 
4954         if (getsb) {
4955                 /*
4956                  * Now get the superblock and loop on the bitmap, as there is
4957                  * no simple way of translating from superblock to bitmap4.
4958                  */
4959                 ret_error = VFS_STATVFS(sargp->cs->vp->v_vfsp, sargp->sbp);
4960                 if (ret_error) {
4961                         if (resp == NULL)
4962                                 goto errout;
4963                         getsb = FALSE;
4964                 }
4965         }
4966 
4967         /*
4968          * Now loop and verify each attribute which getattr returned
4969          * whether it's the same as the input.
4970          */
4971         if (resp == NULL && !getsb && (sva_mask == 0))
4972                 goto errout;
4973 
4974         na = ntovp->na;
4975         amap = ntovp->amap;
4976         k = 0;
4977         for (i = 0; i < ntovp->attrcnt; i++, na++, amap++) {
4978                 k = *amap;
4979                 ASSERT(nfs4_ntov_map[k].nval == k);
4980                 vbit = nfs4_ntov_map[k].vbit;
4981 
4982                 /*
4983                  * If vattr attribute but VOP_GETATTR failed, or it's
4984                  * superblock attribute but VFS_STATVFS failed, skip
4985                  */
4986                 if (vbit) {
4987                         if ((vbit & sva_mask) == 0)
4988                                 continue;
4989                 } else if (!(getsb && nfs4_ntov_map[k].vfsstat)) {
4990                         continue;
4991                 }
4992                 error = (*nfs4_ntov_map[k].sv_getit)(NFS4ATTR_VERIT, sargp, na);
4993                 if (resp != NULL) {
4994                         if (error)
4995                                 ret_error = -1; /* not all match */
4996                         else    /* update response bitmap */
4997                                 *resp |= nfs4_ntov_map[k].fbit;
4998                         continue;
4999                 }
5000                 if (error) {
5001                         ret_error = -1; /* not all match */
5002                         break;
5003                 }
5004         }
5005 errout:
5006         return (ret_error);
5007 }
5008 
5009 /*
5010  * Decode the attribute to be set/verified. If the attr requires a sys op
5011  * (VOP_GETATTR, VFS_VFSSTAT), and the request is to verify, then don't
5012  * call the sv_getit function for it, because the sys op hasn't yet been done.
5013  * Return 0 for success, error code if failed.
5014  *
5015  * Note: the decoded arg is not freed here but in nfs4_ntov_table_free.
5016  */
5017 static int
5018 decode_fattr4_attr(nfs4_attr_cmd_t cmd, struct nfs4_svgetit_arg *sargp,
5019     int k, XDR *xdrp, bitmap4 *resp_bval, union nfs4_attr_u *nap)
5020 {
5021         int error = 0;
5022         bool_t set_later;
5023 
5024         sargp->vap->va_mask |= nfs4_ntov_map[k].vbit;
5025 
5026         if ((*nfs4_ntov_map[k].xfunc)(xdrp, nap)) {
5027                 set_later = nfs4_ntov_map[k].vbit || nfs4_ntov_map[k].vfsstat;
5028                 /*
5029                  * don't verify yet if a vattr or sb dependent attr,
5030                  * because we don't have their sys values yet.
5031                  * Will be done later.
5032                  */
5033                 if (! (set_later && (cmd == NFS4ATTR_VERIT))) {
5034                         /*
5035                          * ACLs are a special case, since setting the MODE
5036                          * conflicts with setting the ACL.  We delay setting
5037                          * the ACL until all other attributes have been set.
5038                          * The ACL gets set in do_rfs4_op_setattr().
5039                          */
5040                         if (nfs4_ntov_map[k].fbit != FATTR4_ACL_MASK) {
5041                                 error = (*nfs4_ntov_map[k].sv_getit)(cmd,
5042                                     sargp, nap);
5043                                 if (error) {
5044                                         xdr_free(nfs4_ntov_map[k].xfunc,
5045                                             (caddr_t)nap);
5046                                 }
5047                         }
5048                 }
5049         } else {
5050 #ifdef  DEBUG
5051                 cmn_err(CE_NOTE, "decode_fattr4_attr: error "
5052                     "decoding attribute %d\n", k);
5053 #endif
5054                 error = EINVAL;
5055         }
5056         if (!error && resp_bval && !set_later) {
5057                 *resp_bval |= nfs4_ntov_map[k].fbit;
5058         }
5059 
5060         return (error);
5061 }
5062 
5063 /*
5064  * Set vattr based on incoming fattr4 attrs - used by setattr.
5065  * Set response mask. Ignore any values that are not writable vattr attrs.
5066  */
5067 static nfsstat4
5068 do_rfs4_set_attrs(bitmap4 *resp, fattr4 *fattrp, struct compound_state *cs,
5069     struct nfs4_svgetit_arg *sargp, struct nfs4_ntov_table *ntovp,
5070     nfs4_attr_cmd_t cmd)
5071 {
5072         int error = 0;
5073         int i;
5074         char *attrs = fattrp->attrlist4;
5075         uint32_t attrslen = fattrp->attrlist4_len;
5076         XDR xdr;
5077         nfsstat4 status = NFS4_OK;
5078         vnode_t *vp = cs->vp;
5079         union nfs4_attr_u *na;
5080         uint8_t *amap;
5081 
5082 #ifndef lint
5083         /*
5084          * Make sure that maximum attribute number can be expressed as an
5085          * 8 bit quantity.
5086          */
5087         ASSERT(NFS4_MAXNUM_ATTRS <= (UINT8_MAX + 1));
5088 #endif
5089 
5090         if (vp == NULL) {
5091                 if (resp)
5092                         *resp = 0;
5093                 return (NFS4ERR_NOFILEHANDLE);
5094         }
5095         if (cs->access == CS_ACCESS_DENIED) {
5096                 if (resp)
5097                         *resp = 0;
5098                 return (NFS4ERR_ACCESS);
5099         }
5100 
5101         sargp->op = cmd;
5102         sargp->cs = cs;
5103         sargp->flag = 0;     /* may be set later */
5104         sargp->vap->va_mask = 0;
5105         sargp->rdattr_error = NFS4_OK;
5106         sargp->rdattr_error_req = FALSE;
5107         /* sargp->sbp is set by the caller */
5108 
5109         xdrmem_create(&xdr, attrs, attrslen, XDR_DECODE);
5110 
5111         na = ntovp->na;
5112         amap = ntovp->amap;
5113 
5114         /*
5115          * The following loop iterates on the nfs4_ntov_map checking
5116          * if the fbit is set in the requested bitmap.
5117          * If set then we process the arguments using the
5118          * rfs4_fattr4 conversion functions to populate the setattr
5119          * vattr and va_mask. Any settable attrs that are not using vattr
5120          * will be set in this loop.
5121          */
5122         for (i = 0; i < nfs4_ntov_map_size; i++) {
5123                 if (!(fattrp->attrmask & nfs4_ntov_map[i].fbit)) {
5124                         continue;
5125                 }
5126                 /*
5127                  * If setattr, must be a writable attr.
5128                  * If verify/nverify, must be a readable attr.
5129                  */
5130                 if ((error = (*nfs4_ntov_map[i].sv_getit)(
5131                     NFS4ATTR_SUPPORTED, sargp, NULL)) != 0) {
5132                         /*
5133                          * Client tries to set/verify an
5134                          * unsupported attribute, tries to set
5135                          * a read only attr or verify a write
5136                          * only one - error!
5137                          */
5138                         break;
5139                 }
5140                 /*
5141                  * Decode the attribute to set/verify
5142                  */
5143                 error = decode_fattr4_attr(cmd, sargp, nfs4_ntov_map[i].nval,
5144                     &xdr, resp ? resp : NULL, na);
5145                 if (error)
5146                         break;
5147                 *amap++ = (uint8_t)nfs4_ntov_map[i].nval;
5148                 na++;
5149                 (ntovp->attrcnt)++;
5150                 if (nfs4_ntov_map[i].vfsstat)
5151                         ntovp->vfsstat = TRUE;
5152         }
5153 
5154         if (error != 0)
5155                 status = (error == ENOTSUP ? NFS4ERR_ATTRNOTSUPP :
5156                     puterrno4(error));
5157         /* xdrmem_destroy(&xdrs); */        /* NO-OP */
5158         return (status);
5159 }
5160 
5161 static nfsstat4
5162 do_rfs4_op_setattr(bitmap4 *resp, fattr4 *fattrp, struct compound_state *cs,
5163     stateid4 *stateid)
5164 {
5165         int error = 0;
5166         struct nfs4_svgetit_arg sarg;
5167         bool_t trunc;
5168 
5169         nfsstat4 status = NFS4_OK;
5170         cred_t *cr = cs->cr;
5171         vnode_t *vp = cs->vp;
5172         struct nfs4_ntov_table ntov;
5173         struct statvfs64 sb;
5174         struct vattr bva;
5175         struct flock64 bf;
5176         int in_crit = 0;
5177         uint_t saved_mask = 0;
5178         caller_context_t ct;
5179 
5180         *resp = 0;
5181         sarg.sbp = &sb;
5182         sarg.is_referral = B_FALSE;
5183         nfs4_ntov_table_init(&ntov);
5184         status = do_rfs4_set_attrs(resp, fattrp, cs, &sarg, &ntov,
5185             NFS4ATTR_SETIT);
5186         if (status != NFS4_OK) {
5187                 /*
5188                  * failed set attrs
5189                  */
5190                 goto done;
5191         }
5192         if ((sarg.vap->va_mask == 0) &&
5193             (! (fattrp->attrmask & FATTR4_ACL_MASK))) {
5194                 /*
5195                  * no further work to be done
5196                  */
5197                 goto done;
5198         }
5199 
5200         /*
5201          * If we got a request to set the ACL and the MODE, only
5202          * allow changing VSUID, VSGID, and VSVTX.  Attempting
5203          * to change any other bits, along with setting an ACL,
5204          * gives NFS4ERR_INVAL.
5205          */
5206         if ((fattrp->attrmask & FATTR4_ACL_MASK) &&
5207             (fattrp->attrmask & FATTR4_MODE_MASK)) {
5208                 vattr_t va;
5209 
5210                 va.va_mask = AT_MODE;
5211                 error = VOP_GETATTR(vp, &va, 0, cs->cr, NULL);
5212                 if (error) {
5213                         status = puterrno4(error);
5214                         goto done;
5215                 }
5216                 if ((sarg.vap->va_mode ^ va.va_mode) &
5217                     ~(VSUID | VSGID | VSVTX)) {
5218                         status = NFS4ERR_INVAL;
5219                         goto done;
5220                 }
5221         }
5222 
5223         /* Check stateid only if size has been set */
5224         if (sarg.vap->va_mask & AT_SIZE) {
5225                 trunc = (sarg.vap->va_size == 0);
5226                 status = rfs4_check_stateid(FWRITE, cs->vp, stateid,
5227                     trunc, &cs->deleg, sarg.vap->va_mask & AT_SIZE, &ct);
5228                 if (status != NFS4_OK)
5229                         goto done;
5230         } else {
5231                 ct.cc_sysid = 0;
5232                 ct.cc_pid = 0;
5233                 ct.cc_caller_id = nfs4_srv_caller_id;
5234                 ct.cc_flags = CC_DONTBLOCK;
5235         }
5236 
5237         /* XXX start of possible race with delegations */
5238 
5239         /*
5240          * We need to specially handle size changes because it is
5241          * possible for the client to create a file with read-only
5242          * modes, but with the file opened for writing. If the client
5243          * then tries to set the file size, e.g. ftruncate(3C),
5244          * fcntl(F_FREESP), the normal access checking done in
5245          * VOP_SETATTR would prevent the client from doing it even though
5246          * it should be allowed to do so.  To get around this, we do the
5247          * access checking for ourselves and use VOP_SPACE which doesn't
5248          * do the access checking.
5249          * Also the client should not be allowed to change the file
5250          * size if there is a conflicting non-blocking mandatory lock in
5251          * the region of the change.
5252          */
5253         if (vp->v_type == VREG && (sarg.vap->va_mask & AT_SIZE)) {
5254                 u_offset_t offset;
5255                 ssize_t length;
5256 
5257                 /*
5258                  * ufs_setattr clears AT_SIZE from vap->va_mask, but
5259                  * before returning, sarg.vap->va_mask is used to
5260                  * generate the setattr reply bitmap.  We also clear
5261                  * AT_SIZE below before calling VOP_SPACE.  For both
5262                  * of these cases, the va_mask needs to be saved here
5263                  * and restored after calling VOP_SETATTR.
5264                  */
5265                 saved_mask = sarg.vap->va_mask;
5266 
5267                 /*
5268                  * Check any possible conflict due to NBMAND locks.
5269                  * Get into critical region before VOP_GETATTR, so the
5270                  * size attribute is valid when checking conflicts.
5271                  */
5272                 if (nbl_need_check(vp)) {
5273                         nbl_start_crit(vp, RW_READER);
5274                         in_crit = 1;
5275                 }
5276 
5277                 bva.va_mask = AT_UID|AT_SIZE;
5278                 if (error = VOP_GETATTR(vp, &bva, 0, cr, &ct)) {
5279                         status = puterrno4(error);
5280                         goto done;
5281                 }
5282 
5283                 if (in_crit) {
5284                         if (sarg.vap->va_size < bva.va_size) {
5285                                 offset = sarg.vap->va_size;
5286                                 length = bva.va_size - sarg.vap->va_size;
5287                         } else {
5288                                 offset = bva.va_size;
5289                                 length = sarg.vap->va_size - bva.va_size;
5290                         }
5291                         if (nbl_conflict(vp, NBL_WRITE, offset, length, 0,
5292                             &ct)) {
5293                                 status = NFS4ERR_LOCKED;
5294                                 goto done;
5295                         }
5296                 }
5297 
5298                 if (crgetuid(cr) == bva.va_uid) {
5299                         sarg.vap->va_mask &= ~AT_SIZE;
5300                         bf.l_type = F_WRLCK;
5301                         bf.l_whence = 0;
5302                         bf.l_start = (off64_t)sarg.vap->va_size;
5303                         bf.l_len = 0;
5304                         bf.l_sysid = 0;
5305                         bf.l_pid = 0;
5306                         error = VOP_SPACE(vp, F_FREESP, &bf, FWRITE,
5307                             (offset_t)sarg.vap->va_size, cr, &ct);
5308                 }
5309         }
5310 
5311         if (!error && sarg.vap->va_mask != 0)
5312                 error = VOP_SETATTR(vp, sarg.vap, sarg.flag, cr, &ct);
5313 
5314         /* restore va_mask -- ufs_setattr clears AT_SIZE */
5315         if (saved_mask & AT_SIZE)
5316                 sarg.vap->va_mask |= AT_SIZE;
5317 
5318         /*
5319          * If an ACL was being set, it has been delayed until now,
5320          * in order to set the mode (via the VOP_SETATTR() above) first.
5321          */
5322         if ((! error) && (fattrp->attrmask & FATTR4_ACL_MASK)) {
5323                 int i;
5324 
5325                 for (i = 0; i < NFS4_MAXNUM_ATTRS; i++)
5326                         if (ntov.amap[i] == FATTR4_ACL)
5327                                 break;
5328                 if (i < NFS4_MAXNUM_ATTRS) {
5329                         error = (*nfs4_ntov_map[FATTR4_ACL].sv_getit)(
5330                             NFS4ATTR_SETIT, &sarg, &ntov.na[i]);
5331                         if (error == 0) {
5332                                 *resp |= FATTR4_ACL_MASK;
5333                         } else if (error == ENOTSUP) {
5334                                 (void) rfs4_verify_attr(&sarg, resp, &ntov);
5335                                 status = NFS4ERR_ATTRNOTSUPP;
5336                                 goto done;
5337                         }
5338                 } else {
5339                         NFS4_DEBUG(rfs4_debug,
5340                             (CE_NOTE, "do_rfs4_op_setattr: "
5341                             "unable to find ACL in fattr4"));
5342                         error = EINVAL;
5343                 }
5344         }
5345 
5346         if (error) {
5347                 /* check if a monitor detected a delegation conflict */
5348                 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK))
5349                         status = NFS4ERR_DELAY;
5350                 else
5351                         status = puterrno4(error);
5352 
5353                 /*
5354                  * Set the response bitmap when setattr failed.
5355                  * If VOP_SETATTR partially succeeded, test by doing a
5356                  * VOP_GETATTR on the object and comparing the data
5357                  * to the setattr arguments.
5358                  */
5359                 (void) rfs4_verify_attr(&sarg, resp, &ntov);
5360         } else {
5361                 /*
5362                  * Force modified metadata out to stable storage.
5363                  */
5364                 (void) VOP_FSYNC(vp, FNODSYNC, cr, &ct);
5365                 /*
5366                  * Set response bitmap
5367                  */
5368                 nfs4_vmask_to_nmask_set(sarg.vap->va_mask, resp);
5369         }
5370 
5371 /* Return early and already have a NFSv4 error */
5372 done:
5373         /*
5374          * Except for nfs4_vmask_to_nmask_set(), vattr --> fattr
5375          * conversion sets both readable and writeable NFS4 attrs
5376          * for AT_MTIME and AT_ATIME.  The line below masks out
5377          * unrequested attrs from the setattr result bitmap.  This
5378          * is placed after the done: label to catch the ATTRNOTSUP
5379          * case.
5380          */
5381         *resp &= fattrp->attrmask;
5382 
5383         if (in_crit)
5384                 nbl_end_crit(vp);
5385 
5386         nfs4_ntov_table_free(&ntov, &sarg);
5387 
5388         return (status);
5389 }
5390 
5391 /* ARGSUSED */
5392 static void
5393 rfs4_op_setattr(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
5394     struct compound_state *cs)
5395 {
5396         SETATTR4args *args = &argop->nfs_argop4_u.opsetattr;
5397         SETATTR4res *resp = &resop->nfs_resop4_u.opsetattr;
5398         bslabel_t *clabel;
5399 
5400         DTRACE_NFSV4_2(op__setattr__start, struct compound_state *, cs,
5401             SETATTR4args *, args);
5402 
5403         if (cs->vp == NULL) {
5404                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
5405                 goto out;
5406         }
5407 
5408         /*
5409          * If there is an unshared filesystem mounted on this vnode,
5410          * do not allow to setattr on this vnode.
5411          */
5412         if (vn_ismntpt(cs->vp)) {
5413                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
5414                 goto out;
5415         }
5416 
5417         resp->attrsset = 0;
5418 
5419         if (rdonly4(req, cs)) {
5420                 *cs->statusp = resp->status = NFS4ERR_ROFS;
5421                 goto out;
5422         }
5423 
5424         /* check label before setting attributes */
5425         if (is_system_labeled()) {
5426                 ASSERT(req->rq_label != NULL);
5427                 clabel = req->rq_label;
5428                 DTRACE_PROBE2(tx__rfs4__log__info__opsetattr__clabel, char *,
5429                     "got client label from request(1)",
5430                     struct svc_req *, req);
5431                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
5432                         if (!do_rfs_label_check(clabel, cs->vp,
5433                             EQUALITY_CHECK, cs->exi)) {
5434                                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
5435                                 goto out;
5436                         }
5437                 }
5438         }
5439 
5440         *cs->statusp = resp->status =
5441             do_rfs4_op_setattr(&resp->attrsset, &args->obj_attributes, cs,
5442             &args->stateid);
5443 
5444 out:
5445         DTRACE_NFSV4_2(op__setattr__done, struct compound_state *, cs,
5446             SETATTR4res *, resp);
5447 }
5448 
5449 /* ARGSUSED */
5450 static void
5451 rfs4_op_verify(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
5452     struct compound_state *cs)
5453 {
5454         /*
5455          * verify and nverify are exactly the same, except that nverify
5456          * succeeds when some argument changed, and verify succeeds when
5457          * when none changed.
5458          */
5459 
5460         VERIFY4args  *args = &argop->nfs_argop4_u.opverify;
5461         VERIFY4res *resp = &resop->nfs_resop4_u.opverify;
5462 
5463         int error;
5464         struct nfs4_svgetit_arg sarg;
5465         struct statvfs64 sb;
5466         struct nfs4_ntov_table ntov;
5467 
5468         DTRACE_NFSV4_2(op__verify__start, struct compound_state *, cs,
5469             VERIFY4args *, args);
5470 
5471         if (cs->vp == NULL) {
5472                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
5473                 goto out;
5474         }
5475 
5476         sarg.sbp = &sb;
5477         sarg.is_referral = B_FALSE;
5478         nfs4_ntov_table_init(&ntov);
5479         resp->status = do_rfs4_set_attrs(NULL, &args->obj_attributes, cs,
5480             &sarg, &ntov, NFS4ATTR_VERIT);
5481         if (resp->status != NFS4_OK) {
5482                 /*
5483                  * do_rfs4_set_attrs will try to verify systemwide attrs,
5484                  * so could return -1 for "no match".
5485                  */
5486                 if (resp->status == -1)
5487                         resp->status = NFS4ERR_NOT_SAME;
5488                 goto done;
5489         }
5490         error = rfs4_verify_attr(&sarg, NULL, &ntov);
5491         switch (error) {
5492         case 0:
5493                 resp->status = NFS4_OK;
5494                 break;
5495         case -1:
5496                 resp->status = NFS4ERR_NOT_SAME;
5497                 break;
5498         default:
5499                 resp->status = puterrno4(error);
5500                 break;
5501         }
5502 done:
5503         *cs->statusp = resp->status;
5504         nfs4_ntov_table_free(&ntov, &sarg);
5505 out:
5506         DTRACE_NFSV4_2(op__verify__done, struct compound_state *, cs,
5507             VERIFY4res *, resp);
5508 }
5509 
5510 /* ARGSUSED */
5511 static void
5512 rfs4_op_nverify(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
5513     struct compound_state *cs)
5514 {
5515         /*
5516          * verify and nverify are exactly the same, except that nverify
5517          * succeeds when some argument changed, and verify succeeds when
5518          * when none changed.
5519          */
5520 
5521         NVERIFY4args  *args = &argop->nfs_argop4_u.opnverify;
5522         NVERIFY4res *resp = &resop->nfs_resop4_u.opnverify;
5523 
5524         int error;
5525         struct nfs4_svgetit_arg sarg;
5526         struct statvfs64 sb;
5527         struct nfs4_ntov_table ntov;
5528 
5529         DTRACE_NFSV4_2(op__nverify__start, struct compound_state *, cs,
5530             NVERIFY4args *, args);
5531 
5532         if (cs->vp == NULL) {
5533                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
5534                 DTRACE_NFSV4_2(op__nverify__done, struct compound_state *, cs,
5535                     NVERIFY4res *, resp);
5536                 return;
5537         }
5538         sarg.sbp = &sb;
5539         sarg.is_referral = B_FALSE;
5540         nfs4_ntov_table_init(&ntov);
5541         resp->status = do_rfs4_set_attrs(NULL, &args->obj_attributes, cs,
5542             &sarg, &ntov, NFS4ATTR_VERIT);
5543         if (resp->status != NFS4_OK) {
5544                 /*
5545                  * do_rfs4_set_attrs will try to verify systemwide attrs,
5546                  * so could return -1 for "no match".
5547                  */
5548                 if (resp->status == -1)
5549                         resp->status = NFS4_OK;
5550                 goto done;
5551         }
5552         error = rfs4_verify_attr(&sarg, NULL, &ntov);
5553         switch (error) {
5554         case 0:
5555                 resp->status = NFS4ERR_SAME;
5556                 break;
5557         case -1:
5558                 resp->status = NFS4_OK;
5559                 break;
5560         default:
5561                 resp->status = puterrno4(error);
5562                 break;
5563         }
5564 done:
5565         *cs->statusp = resp->status;
5566         nfs4_ntov_table_free(&ntov, &sarg);
5567 
5568         DTRACE_NFSV4_2(op__nverify__done, struct compound_state *, cs,
5569             NVERIFY4res *, resp);
5570 }
5571 
5572 /*
5573  * XXX - This should live in an NFS header file.
5574  */
5575 #define MAX_IOVECS      12
5576 
5577 /* ARGSUSED */
5578 static void
5579 rfs4_op_write(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
5580     struct compound_state *cs)
5581 {
5582         WRITE4args *args = &argop->nfs_argop4_u.opwrite;
5583         WRITE4res *resp = &resop->nfs_resop4_u.opwrite;
5584         int error;
5585         vnode_t *vp;
5586         struct vattr bva;
5587         u_offset_t rlimit;
5588         struct uio uio;
5589         struct iovec iov[MAX_IOVECS];
5590         struct iovec *iovp;
5591         int iovcnt;
5592         int ioflag;
5593         cred_t *savecred, *cr;
5594         bool_t *deleg = &cs->deleg;
5595         nfsstat4 stat;
5596         int in_crit = 0;
5597         caller_context_t ct;
5598 
5599         DTRACE_NFSV4_2(op__write__start, struct compound_state *, cs,
5600             WRITE4args *, args);
5601 
5602         vp = cs->vp;
5603         if (vp == NULL) {
5604                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
5605                 goto out;
5606         }
5607         if (cs->access == CS_ACCESS_DENIED) {
5608                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
5609                 goto out;
5610         }
5611 
5612         cr = cs->cr;
5613 
5614         if ((stat = rfs4_check_stateid(FWRITE, vp, &args->stateid, FALSE,
5615             deleg, TRUE, &ct)) != NFS4_OK) {
5616                 *cs->statusp = resp->status = stat;
5617                 goto out;
5618         }
5619 
5620         /*
5621          * We have to enter the critical region before calling VOP_RWLOCK
5622          * to avoid a deadlock with ufs.
5623          */
5624         if (nbl_need_check(vp)) {
5625                 nbl_start_crit(vp, RW_READER);
5626                 in_crit = 1;
5627                 if (nbl_conflict(vp, NBL_WRITE,
5628                     args->offset, args->data_len, 0, &ct)) {
5629                         *cs->statusp = resp->status = NFS4ERR_LOCKED;
5630                         goto out;
5631                 }
5632         }
5633 
5634         bva.va_mask = AT_MODE | AT_UID;
5635         error = VOP_GETATTR(vp, &bva, 0, cr, &ct);
5636 
5637         /*
5638          * If we can't get the attributes, then we can't do the
5639          * right access checking.  So, we'll fail the request.
5640          */
5641         if (error) {
5642                 *cs->statusp = resp->status = puterrno4(error);
5643                 goto out;
5644         }
5645 
5646         if (rdonly4(req, cs)) {
5647                 *cs->statusp = resp->status = NFS4ERR_ROFS;
5648                 goto out;
5649         }
5650 
5651         if (vp->v_type != VREG) {
5652                 *cs->statusp = resp->status =
5653                     ((vp->v_type == VDIR) ? NFS4ERR_ISDIR : NFS4ERR_INVAL);
5654                 goto out;
5655         }
5656 
5657         if (crgetuid(cr) != bva.va_uid &&
5658             (error = VOP_ACCESS(vp, VWRITE, 0, cr, &ct))) {
5659                 *cs->statusp = resp->status = puterrno4(error);
5660                 goto out;
5661         }
5662 
5663         if (MANDLOCK(vp, bva.va_mode)) {
5664                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
5665                 goto out;
5666         }
5667 
5668         if (args->data_len == 0) {
5669                 *cs->statusp = resp->status = NFS4_OK;
5670                 resp->count = 0;
5671                 resp->committed = args->stable;
5672                 resp->writeverf = Write4verf;
5673                 goto out;
5674         }
5675 
5676         if (args->mblk != NULL) {
5677                 mblk_t *m;
5678                 uint_t bytes, round_len;
5679 
5680                 iovcnt = 0;
5681                 bytes = 0;
5682                 round_len = roundup(args->data_len, BYTES_PER_XDR_UNIT);
5683                 for (m = args->mblk;
5684                     m != NULL && bytes < round_len;
5685                     m = m->b_cont) {
5686                         iovcnt++;
5687                         bytes += MBLKL(m);
5688                 }
5689 #ifdef DEBUG
5690                 /* should have ended on an mblk boundary */
5691                 if (bytes != round_len) {
5692                         printf("bytes=0x%x, round_len=0x%x, req len=0x%x\n",
5693                             bytes, round_len, args->data_len);
5694                         printf("args=%p, args->mblk=%p, m=%p", (void *)args,
5695                             (void *)args->mblk, (void *)m);
5696                         ASSERT(bytes == round_len);
5697                 }
5698 #endif
5699                 if (iovcnt <= MAX_IOVECS) {
5700                         iovp = iov;
5701                 } else {
5702                         iovp = kmem_alloc(sizeof (*iovp) * iovcnt, KM_SLEEP);
5703                 }
5704                 mblk_to_iov(args->mblk, iovcnt, iovp);
5705         } else if (args->rlist != NULL) {
5706                 iovcnt = 1;
5707                 iovp = iov;
5708                 iovp->iov_base = (char *)((args->rlist)->u.c_daddr3);
5709                 iovp->iov_len = args->data_len;
5710         } else {
5711                 iovcnt = 1;
5712                 iovp = iov;
5713                 iovp->iov_base = args->data_val;
5714                 iovp->iov_len = args->data_len;
5715         }
5716 
5717         uio.uio_iov = iovp;
5718         uio.uio_iovcnt = iovcnt;
5719 
5720         uio.uio_segflg = UIO_SYSSPACE;
5721         uio.uio_extflg = UIO_COPY_DEFAULT;
5722         uio.uio_loffset = args->offset;
5723         uio.uio_resid = args->data_len;
5724         uio.uio_llimit = curproc->p_fsz_ctl;
5725         rlimit = uio.uio_llimit - args->offset;
5726         if (rlimit < (u_offset_t)uio.uio_resid)
5727                 uio.uio_resid = (int)rlimit;
5728 
5729         if (args->stable == UNSTABLE4)
5730                 ioflag = 0;
5731         else if (args->stable == FILE_SYNC4)
5732                 ioflag = FSYNC;
5733         else if (args->stable == DATA_SYNC4)
5734                 ioflag = FDSYNC;
5735         else {
5736                 if (iovp != iov)
5737                         kmem_free(iovp, sizeof (*iovp) * iovcnt);
5738                 *cs->statusp = resp->status = NFS4ERR_INVAL;
5739                 goto out;
5740         }
5741 
5742         /*
5743          * We're changing creds because VM may fault and we need
5744          * the cred of the current thread to be used if quota
5745          * checking is enabled.
5746          */
5747         savecred = curthread->t_cred;
5748         curthread->t_cred = cr;
5749         error = do_io(FWRITE, vp, &uio, ioflag, cr, &ct);
5750         curthread->t_cred = savecred;
5751 
5752         if (iovp != iov)
5753                 kmem_free(iovp, sizeof (*iovp) * iovcnt);
5754 
5755         if (error) {
5756                 *cs->statusp = resp->status = puterrno4(error);
5757                 goto out;
5758         }
5759 
5760         *cs->statusp = resp->status = NFS4_OK;
5761         resp->count = args->data_len - uio.uio_resid;
5762 
5763         if (ioflag == 0)
5764                 resp->committed = UNSTABLE4;
5765         else
5766                 resp->committed = FILE_SYNC4;
5767 
5768         resp->writeverf = Write4verf;
5769 
5770 out:
5771         if (in_crit)
5772                 nbl_end_crit(vp);
5773 
5774         DTRACE_NFSV4_2(op__write__done, struct compound_state *, cs,
5775             WRITE4res *, resp);
5776 }
5777 
5778 
5779 /* XXX put in a header file */
5780 extern int      sec_svc_getcred(struct svc_req *, cred_t *,  caddr_t *, int *);
5781 
5782 void
5783 rfs4_compound(COMPOUND4args *args, COMPOUND4res *resp, struct exportinfo *exi,
5784     struct svc_req *req, cred_t *cr, int *rv)
5785 {
5786         uint_t i;
5787         struct compound_state cs;
5788 
5789         if (rv != NULL)
5790                 *rv = 0;
5791         rfs4_init_compound_state(&cs);
5792         /*
5793          * Form a reply tag by copying over the reqeuest tag.
5794          */
5795         resp->tag.utf8string_val =
5796             kmem_alloc(args->tag.utf8string_len, KM_SLEEP);
5797         resp->tag.utf8string_len = args->tag.utf8string_len;
5798         bcopy(args->tag.utf8string_val, resp->tag.utf8string_val,
5799             resp->tag.utf8string_len);
5800 
5801         cs.statusp = &resp->status;
5802         cs.req = req;
5803         resp->array = NULL;
5804         resp->array_len = 0;
5805 
5806         /*
5807          * XXX for now, minorversion should be zero
5808          */
5809         if (args->minorversion != NFS4_MINORVERSION) {
5810                 DTRACE_NFSV4_2(compound__start, struct compound_state *,
5811                     &cs, COMPOUND4args *, args);
5812                 resp->status = NFS4ERR_MINOR_VERS_MISMATCH;
5813                 DTRACE_NFSV4_2(compound__done, struct compound_state *,
5814                     &cs, COMPOUND4res *, resp);
5815                 return;
5816         }
5817 
5818         if (args->array_len == 0) {
5819                 resp->status = NFS4_OK;
5820                 return;
5821         }
5822 
5823         ASSERT(exi == NULL);
5824         ASSERT(cr == NULL);
5825 
5826         cr = crget();
5827         ASSERT(cr != NULL);
5828 
5829         if (sec_svc_getcred(req, cr, &cs.principal, &cs.nfsflavor) == 0) {
5830                 DTRACE_NFSV4_2(compound__start, struct compound_state *,
5831                     &cs, COMPOUND4args *, args);
5832                 crfree(cr);
5833                 DTRACE_NFSV4_2(compound__done, struct compound_state *,
5834                     &cs, COMPOUND4res *, resp);
5835                 svcerr_badcred(req->rq_xprt);
5836                 if (rv != NULL)
5837                         *rv = 1;
5838                 return;
5839         }
5840         resp->array_len = args->array_len;
5841         resp->array = kmem_zalloc(args->array_len * sizeof (nfs_resop4),
5842             KM_SLEEP);
5843 
5844         cs.basecr = cr;
5845 
5846         DTRACE_NFSV4_2(compound__start, struct compound_state *, &cs,
5847             COMPOUND4args *, args);
5848 
5849         /*
5850          * If this is the first compound we've seen, we need to start all
5851          * new instances' grace periods.
5852          */
5853         if (rfs4_seen_first_compound == 0) {
5854                 rfs4_grace_start_new();
5855                 /*
5856                  * This must be set after rfs4_grace_start_new(), otherwise
5857                  * another thread could proceed past here before the former
5858                  * is finished.
5859                  */
5860                 rfs4_seen_first_compound = 1;
5861         }
5862 
5863         for (i = 0; i < args->array_len && cs.cont; i++) {
5864                 nfs_argop4 *argop;
5865                 nfs_resop4 *resop;
5866                 uint_t op;
5867 
5868                 argop = &args->array[i];
5869                 resop = &resp->array[i];
5870                 resop->resop = argop->argop;
5871                 op = (uint_t)resop->resop;
5872 
5873                 if (op < rfsv4disp_cnt) {
5874                         /*
5875                          * Count the individual ops here; NULL and COMPOUND
5876                          * are counted in common_dispatch()
5877                          */
5878                         rfsproccnt_v4_ptr[op].value.ui64++;
5879 
5880                         NFS4_DEBUG(rfs4_debug > 1,
5881                             (CE_NOTE, "Executing %s", rfs4_op_string[op]));
5882                         (*rfsv4disptab[op].dis_proc)(argop, resop, req, &cs);
5883                         NFS4_DEBUG(rfs4_debug > 1, (CE_NOTE, "%s returned %d",
5884                             rfs4_op_string[op], *cs.statusp));
5885                         if (*cs.statusp != NFS4_OK)
5886                                 cs.cont = FALSE;
5887                 } else {
5888                         /*
5889                          * This is effectively dead code since XDR code
5890                          * will have already returned BADXDR if op doesn't
5891                          * decode to legal value.  This only done for a
5892                          * day when XDR code doesn't verify v4 opcodes.
5893                          */
5894                         op = OP_ILLEGAL;
5895                         rfsproccnt_v4_ptr[OP_ILLEGAL_IDX].value.ui64++;
5896 
5897                         rfs4_op_illegal(argop, resop, req, &cs);
5898                         cs.cont = FALSE;
5899                 }
5900 
5901                 /*
5902                  * If not at last op, and if we are to stop, then
5903                  * compact the results array.
5904                  */
5905                 if ((i + 1) < args->array_len && !cs.cont) {
5906                         nfs_resop4 *new_res = kmem_alloc(
5907                             (i+1) * sizeof (nfs_resop4), KM_SLEEP);
5908                         bcopy(resp->array,
5909                             new_res, (i+1) * sizeof (nfs_resop4));
5910                         kmem_free(resp->array,
5911                             args->array_len * sizeof (nfs_resop4));
5912 
5913                         resp->array_len =  i + 1;
5914                         resp->array = new_res;
5915                 }
5916         }
5917 
5918 
5919         DTRACE_NFSV4_2(compound__done, struct compound_state *, &cs,
5920             COMPOUND4res *, resp);
5921 
5922         if (cs.exi)
5923                 exi_rele(cs.exi);
5924         if (cs.saved_exi)
5925                 exi_rele(cs.saved_exi);
5926         if (cs.vp)
5927                 VN_RELE(cs.vp);
5928         if (cs.saved_vp)
5929                 VN_RELE(cs.saved_vp);
5930         if (cs.saved_fh.nfs_fh4_val)
5931                 kmem_free(cs.saved_fh.nfs_fh4_val, NFS4_FHSIZE);
5932 
5933         if (cs.basecr)
5934                 crfree(cs.basecr);
5935         if (cs.cr)
5936                 crfree(cs.cr);
5937         /*
5938          * done with this compound request, free the label
5939          */
5940 
5941         if (req->rq_label != NULL) {
5942                 kmem_free(req->rq_label, sizeof (bslabel_t));
5943                 req->rq_label = NULL;
5944         }
5945 }
5946 
5947 /*
5948  * XXX because of what appears to be duplicate calls to rfs4_compound_free
5949  * XXX zero out the tag and array values. Need to investigate why the
5950  * XXX calls occur, but at least prevent the panic for now.
5951  */
5952 void
5953 rfs4_compound_free(COMPOUND4res *resp)
5954 {
5955         uint_t i;
5956 
5957         if (resp->tag.utf8string_val) {
5958                 UTF8STRING_FREE(resp->tag)
5959         }
5960 
5961         for (i = 0; i < resp->array_len; i++) {
5962                 nfs_resop4 *resop;
5963                 uint_t op;
5964 
5965                 resop = &resp->array[i];
5966                 op = (uint_t)resop->resop;
5967                 if (op < rfsv4disp_cnt) {
5968                         (*rfsv4disptab[op].dis_resfree)(resop);
5969                 }
5970         }
5971         if (resp->array != NULL) {
5972                 kmem_free(resp->array, resp->array_len * sizeof (nfs_resop4));
5973         }
5974 }
5975 
5976 /*
5977  * Process the value of the compound request rpc flags, as a bit-AND
5978  * of the individual per-op flags (idempotent, allowork, publicfh_ok)
5979  */
5980 void
5981 rfs4_compound_flagproc(COMPOUND4args *args, int *flagp)
5982 {
5983         int i;
5984         int flag = RPC_ALL;
5985 
5986         for (i = 0; flag && i < args->array_len; i++) {
5987                 uint_t op;
5988 
5989                 op = (uint_t)args->array[i].argop;
5990 
5991                 if (op < rfsv4disp_cnt)
5992                         flag &= rfsv4disptab[op].dis_flags;
5993                 else
5994                         flag = 0;
5995         }
5996         *flagp = flag;
5997 }
5998 
5999 nfsstat4
6000 rfs4_client_sysid(rfs4_client_t *cp, sysid_t *sp)
6001 {
6002         nfsstat4 e;
6003 
6004         rfs4_dbe_lock(cp->rc_dbe);
6005 
6006         if (cp->rc_sysidt != LM_NOSYSID) {
6007                 *sp = cp->rc_sysidt;
6008                 e = NFS4_OK;
6009 
6010         } else if ((cp->rc_sysidt = lm_alloc_sysidt()) != LM_NOSYSID) {
6011                 *sp = cp->rc_sysidt;
6012                 e = NFS4_OK;
6013 
6014                 NFS4_DEBUG(rfs4_debug, (CE_NOTE,
6015                     "rfs4_client_sysid: allocated 0x%x\n", *sp));
6016         } else
6017                 e = NFS4ERR_DELAY;
6018 
6019         rfs4_dbe_unlock(cp->rc_dbe);
6020         return (e);
6021 }
6022 
6023 #if defined(DEBUG) && ! defined(lint)
6024 static void lock_print(char *str, int operation, struct flock64 *flk)
6025 {
6026         char *op, *type;
6027 
6028         switch (operation) {
6029         case F_GETLK: op = "F_GETLK";
6030                 break;
6031         case F_SETLK: op = "F_SETLK";
6032                 break;
6033         case F_SETLK_NBMAND: op = "F_SETLK_NBMAND";
6034                 break;
6035         default: op = "F_UNKNOWN";
6036                 break;
6037         }
6038         switch (flk->l_type) {
6039         case F_UNLCK: type = "F_UNLCK";
6040                 break;
6041         case F_RDLCK: type = "F_RDLCK";
6042                 break;
6043         case F_WRLCK: type = "F_WRLCK";
6044                 break;
6045         default: type = "F_UNKNOWN";
6046                 break;
6047         }
6048 
6049         ASSERT(flk->l_whence == 0);
6050         cmn_err(CE_NOTE, "%s:  %s, type = %s, off = %llx len = %llx pid = %d",
6051             str, op, type, (longlong_t)flk->l_start,
6052             flk->l_len ? (longlong_t)flk->l_len : ~0LL, flk->l_pid);
6053 }
6054 
6055 #define LOCK_PRINT(d, s, t, f) if (d) lock_print(s, t, f)
6056 #else
6057 #define LOCK_PRINT(d, s, t, f)
6058 #endif
6059 
6060 /*ARGSUSED*/
6061 static bool_t
6062 creds_ok(cred_set_t cr_set, struct svc_req *req, struct compound_state *cs)
6063 {
6064         return (TRUE);
6065 }
6066 
6067 /*
6068  * Look up the pathname using the vp in cs as the directory vnode.
6069  * cs->vp will be the vnode for the file on success
6070  */
6071 
6072 static nfsstat4
6073 rfs4_lookup(component4 *component, struct svc_req *req,
6074     struct compound_state *cs)
6075 {
6076         char *nm;
6077         uint32_t len;
6078         nfsstat4 status;
6079         struct sockaddr *ca;
6080         char *name;
6081 
6082         if (cs->vp == NULL) {
6083                 return (NFS4ERR_NOFILEHANDLE);
6084         }
6085         if (cs->vp->v_type != VDIR) {
6086                 return (NFS4ERR_NOTDIR);
6087         }
6088 
6089         status = utf8_dir_verify(component);
6090         if (status != NFS4_OK)
6091                 return (status);
6092 
6093         nm = utf8_to_fn(component, &len, NULL);
6094         if (nm == NULL) {
6095                 return (NFS4ERR_INVAL);
6096         }
6097 
6098         if (len > MAXNAMELEN) {
6099                 kmem_free(nm, len);
6100                 return (NFS4ERR_NAMETOOLONG);
6101         }
6102 
6103         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
6104         name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
6105             MAXPATHLEN + 1);
6106 
6107         if (name == NULL) {
6108                 kmem_free(nm, len);
6109                 return (NFS4ERR_INVAL);
6110         }
6111 
6112         status = do_rfs4_op_lookup(name, req, cs);
6113 
6114         if (name != nm)
6115                 kmem_free(name, MAXPATHLEN + 1);
6116 
6117         kmem_free(nm, len);
6118 
6119         return (status);
6120 }
6121 
6122 static nfsstat4
6123 rfs4_lookupfile(component4 *component, struct svc_req *req,
6124     struct compound_state *cs, uint32_t access, change_info4 *cinfo)
6125 {
6126         nfsstat4 status;
6127         vnode_t *dvp = cs->vp;
6128         vattr_t bva, ava, fva;
6129         int error;
6130 
6131         /* Get "before" change value */
6132         bva.va_mask = AT_CTIME|AT_SEQ;
6133         error = VOP_GETATTR(dvp, &bva, 0, cs->cr, NULL);
6134         if (error)
6135                 return (puterrno4(error));
6136 
6137         /* rfs4_lookup may VN_RELE directory */
6138         VN_HOLD(dvp);
6139 
6140         status = rfs4_lookup(component, req, cs);
6141         if (status != NFS4_OK) {
6142                 VN_RELE(dvp);
6143                 return (status);
6144         }
6145 
6146         /*
6147          * Get "after" change value, if it fails, simply return the
6148          * before value.
6149          */
6150         ava.va_mask = AT_CTIME|AT_SEQ;
6151         if (VOP_GETATTR(dvp, &ava, 0, cs->cr, NULL)) {
6152                 ava.va_ctime = bva.va_ctime;
6153                 ava.va_seq = 0;
6154         }
6155         VN_RELE(dvp);
6156 
6157         /*
6158          * Validate the file is a file
6159          */
6160         fva.va_mask = AT_TYPE|AT_MODE;
6161         error = VOP_GETATTR(cs->vp, &fva, 0, cs->cr, NULL);
6162         if (error)
6163                 return (puterrno4(error));
6164 
6165         if (fva.va_type != VREG) {
6166                 if (fva.va_type == VDIR)
6167                         return (NFS4ERR_ISDIR);
6168                 if (fva.va_type == VLNK)
6169                         return (NFS4ERR_SYMLINK);
6170                 return (NFS4ERR_INVAL);
6171         }
6172 
6173         NFS4_SET_FATTR4_CHANGE(cinfo->before, bva.va_ctime);
6174         NFS4_SET_FATTR4_CHANGE(cinfo->after, ava.va_ctime);
6175 
6176         /*
6177          * It is undefined if VOP_LOOKUP will change va_seq, so
6178          * cinfo.atomic = TRUE only if we have
6179          * non-zero va_seq's, and they have not changed.
6180          */
6181         if (bva.va_seq && ava.va_seq && ava.va_seq == bva.va_seq)
6182                 cinfo->atomic = TRUE;
6183         else
6184                 cinfo->atomic = FALSE;
6185 
6186         /* Check for mandatory locking */
6187         cs->mandlock = MANDLOCK(cs->vp, fva.va_mode);
6188         return (check_open_access(access, cs, req));
6189 }
6190 
6191 static nfsstat4
6192 create_vnode(vnode_t *dvp, char *nm,  vattr_t *vap, createmode4 mode,
6193     timespec32_t *mtime, cred_t *cr, vnode_t **vpp, bool_t *created)
6194 {
6195         int error;
6196         nfsstat4 status = NFS4_OK;
6197         vattr_t va;
6198 
6199 tryagain:
6200 
6201         /*
6202          * The file open mode used is VWRITE.  If the client needs
6203          * some other semantic, then it should do the access checking
6204          * itself.  It would have been nice to have the file open mode
6205          * passed as part of the arguments.
6206          */
6207 
6208         *created = TRUE;
6209         error = VOP_CREATE(dvp, nm, vap, EXCL, VWRITE, vpp, cr, 0, NULL, NULL);
6210 
6211         if (error) {
6212                 *created = FALSE;
6213 
6214                 /*
6215                  * If we got something other than file already exists
6216                  * then just return this error.  Otherwise, we got
6217                  * EEXIST.  If we were doing a GUARDED create, then
6218                  * just return this error.  Otherwise, we need to
6219                  * make sure that this wasn't a duplicate of an
6220                  * exclusive create request.
6221                  *
6222                  * The assumption is made that a non-exclusive create
6223                  * request will never return EEXIST.
6224                  */
6225 
6226                 if (error != EEXIST || mode == GUARDED4) {
6227                         status = puterrno4(error);
6228                         return (status);
6229                 }
6230                 error = VOP_LOOKUP(dvp, nm, vpp, NULL, 0, NULL, cr,
6231                     NULL, NULL, NULL);
6232 
6233                 if (error) {
6234                         /*
6235                          * We couldn't find the file that we thought that
6236                          * we just created.  So, we'll just try creating
6237                          * it again.
6238                          */
6239                         if (error == ENOENT)
6240                                 goto tryagain;
6241 
6242                         status = puterrno4(error);
6243                         return (status);
6244                 }
6245 
6246                 if (mode == UNCHECKED4) {
6247                         /* existing object must be regular file */
6248                         if ((*vpp)->v_type != VREG) {
6249                                 if ((*vpp)->v_type == VDIR)
6250                                         status = NFS4ERR_ISDIR;
6251                                 else if ((*vpp)->v_type == VLNK)
6252                                         status = NFS4ERR_SYMLINK;
6253                                 else
6254                                         status = NFS4ERR_INVAL;
6255                                 VN_RELE(*vpp);
6256                                 return (status);
6257                         }
6258 
6259                         return (NFS4_OK);
6260                 }
6261 
6262                 /* Check for duplicate request */
6263                 ASSERT(mtime != 0);
6264                 va.va_mask = AT_MTIME;
6265                 error = VOP_GETATTR(*vpp, &va, 0, cr, NULL);
6266                 if (!error) {
6267                         /* We found the file */
6268                         if (va.va_mtime.tv_sec != mtime->tv_sec ||
6269                             va.va_mtime.tv_nsec != mtime->tv_nsec) {
6270                                 /* but its not our creation */
6271                                 VN_RELE(*vpp);
6272                                 return (NFS4ERR_EXIST);
6273                         }
6274                         *created = TRUE; /* retrans of create == created */
6275                         return (NFS4_OK);
6276                 }
6277                 VN_RELE(*vpp);
6278                 return (NFS4ERR_EXIST);
6279         }
6280 
6281         return (NFS4_OK);
6282 }
6283 
6284 static nfsstat4
6285 check_open_access(uint32_t access, struct compound_state *cs,
6286     struct svc_req *req)
6287 {
6288         int error;
6289         vnode_t *vp;
6290         bool_t readonly;
6291         cred_t *cr = cs->cr;
6292 
6293         /* For now we don't allow mandatory locking as per V2/V3 */
6294         if (cs->access == CS_ACCESS_DENIED || cs->mandlock) {
6295                 return (NFS4ERR_ACCESS);
6296         }
6297 
6298         vp = cs->vp;
6299         ASSERT(cr != NULL && vp->v_type == VREG);
6300 
6301         /*
6302          * If the file system is exported read only and we are trying
6303          * to open for write, then return NFS4ERR_ROFS
6304          */
6305 
6306         readonly = rdonly4(req, cs);
6307 
6308         if ((access & OPEN4_SHARE_ACCESS_WRITE) && readonly)
6309                 return (NFS4ERR_ROFS);
6310 
6311         if (access & OPEN4_SHARE_ACCESS_READ) {
6312                 if ((VOP_ACCESS(vp, VREAD, 0, cr, NULL) != 0) &&
6313                     (VOP_ACCESS(vp, VEXEC, 0, cr, NULL) != 0)) {
6314                         return (NFS4ERR_ACCESS);
6315                 }
6316         }
6317 
6318         if (access & OPEN4_SHARE_ACCESS_WRITE) {
6319                 error = VOP_ACCESS(vp, VWRITE, 0, cr, NULL);
6320                 if (error)
6321                         return (NFS4ERR_ACCESS);
6322         }
6323 
6324         return (NFS4_OK);
6325 }
6326 
6327 static nfsstat4
6328 rfs4_createfile(OPEN4args *args, struct svc_req *req, struct compound_state *cs,
6329     change_info4 *cinfo, bitmap4 *attrset, clientid4 clientid)
6330 {
6331         struct nfs4_svgetit_arg sarg;
6332         struct nfs4_ntov_table ntov;
6333 
6334         bool_t ntov_table_init = FALSE;
6335         struct statvfs64 sb;
6336         nfsstat4 status;
6337         vnode_t *vp;
6338         vattr_t bva, ava, iva, cva, *vap;
6339         vnode_t *dvp;
6340         timespec32_t *mtime;
6341         char *nm = NULL;
6342         uint_t buflen;
6343         bool_t created;
6344         bool_t setsize = FALSE;
6345         len_t reqsize;
6346         int error;
6347         bool_t trunc;
6348         caller_context_t ct;
6349         component4 *component;
6350         bslabel_t *clabel;
6351         struct sockaddr *ca;
6352         char *name = NULL;
6353 
6354         sarg.sbp = &sb;
6355         sarg.is_referral = B_FALSE;
6356 
6357         dvp = cs->vp;
6358 
6359         /* Check if the file system is read only */
6360         if (rdonly4(req, cs))
6361                 return (NFS4ERR_ROFS);
6362 
6363         /* check the label of including directory */
6364         if (is_system_labeled()) {
6365                 ASSERT(req->rq_label != NULL);
6366                 clabel = req->rq_label;
6367                 DTRACE_PROBE2(tx__rfs4__log__info__opremove__clabel, char *,
6368                     "got client label from request(1)",
6369                     struct svc_req *, req);
6370                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
6371                         if (!do_rfs_label_check(clabel, dvp, EQUALITY_CHECK,
6372                             cs->exi)) {
6373                                 return (NFS4ERR_ACCESS);
6374                         }
6375                 }
6376         }
6377 
6378         /*
6379          * Get the last component of path name in nm. cs will reference
6380          * the including directory on success.
6381          */
6382         component = &args->open_claim4_u.file;
6383         status = utf8_dir_verify(component);
6384         if (status != NFS4_OK)
6385                 return (status);
6386 
6387         nm = utf8_to_fn(component, &buflen, NULL);
6388 
6389         if (nm == NULL)
6390                 return (NFS4ERR_RESOURCE);
6391 
6392         if (buflen > MAXNAMELEN) {
6393                 kmem_free(nm, buflen);
6394                 return (NFS4ERR_NAMETOOLONG);
6395         }
6396 
6397         bva.va_mask = AT_TYPE|AT_CTIME|AT_SEQ;
6398         error = VOP_GETATTR(dvp, &bva, 0, cs->cr, NULL);
6399         if (error) {
6400                 kmem_free(nm, buflen);
6401                 return (puterrno4(error));
6402         }
6403 
6404         if (bva.va_type != VDIR) {
6405                 kmem_free(nm, buflen);
6406                 return (NFS4ERR_NOTDIR);
6407         }
6408 
6409         NFS4_SET_FATTR4_CHANGE(cinfo->before, bva.va_ctime)
6410 
6411         switch (args->mode) {
6412         case GUARDED4:
6413                 /*FALLTHROUGH*/
6414         case UNCHECKED4:
6415                 nfs4_ntov_table_init(&ntov);
6416                 ntov_table_init = TRUE;
6417 
6418                 *attrset = 0;
6419                 status = do_rfs4_set_attrs(attrset,
6420                     &args->createhow4_u.createattrs,
6421                     cs, &sarg, &ntov, NFS4ATTR_SETIT);
6422 
6423                 if (status == NFS4_OK && (sarg.vap->va_mask & AT_TYPE) &&
6424                     sarg.vap->va_type != VREG) {
6425                         if (sarg.vap->va_type == VDIR)
6426                                 status = NFS4ERR_ISDIR;
6427                         else if (sarg.vap->va_type == VLNK)
6428                                 status = NFS4ERR_SYMLINK;
6429                         else
6430                                 status = NFS4ERR_INVAL;
6431                 }
6432 
6433                 if (status != NFS4_OK) {
6434                         kmem_free(nm, buflen);
6435                         nfs4_ntov_table_free(&ntov, &sarg);
6436                         *attrset = 0;
6437                         return (status);
6438                 }
6439 
6440                 vap = sarg.vap;
6441                 vap->va_type = VREG;
6442                 vap->va_mask |= AT_TYPE;
6443 
6444                 if ((vap->va_mask & AT_MODE) == 0) {
6445                         vap->va_mask |= AT_MODE;
6446                         vap->va_mode = (mode_t)0600;
6447                 }
6448 
6449                 if (vap->va_mask & AT_SIZE) {
6450 
6451                         /* Disallow create with a non-zero size */
6452 
6453                         if ((reqsize = sarg.vap->va_size) != 0) {
6454                                 kmem_free(nm, buflen);
6455                                 nfs4_ntov_table_free(&ntov, &sarg);
6456                                 *attrset = 0;
6457                                 return (NFS4ERR_INVAL);
6458                         }
6459                         setsize = TRUE;
6460                 }
6461                 break;
6462 
6463         case EXCLUSIVE4:
6464                 /* prohibit EXCL create of named attributes */
6465                 if (dvp->v_flag & V_XATTRDIR) {
6466                         kmem_free(nm, buflen);
6467                         *attrset = 0;
6468                         return (NFS4ERR_INVAL);
6469                 }
6470 
6471                 cva.va_mask = AT_TYPE | AT_MTIME | AT_MODE;
6472                 cva.va_type = VREG;
6473                 /*
6474                  * Ensure no time overflows. Assumes underlying
6475                  * filesystem supports at least 32 bits.
6476                  * Truncate nsec to usec resolution to allow valid
6477                  * compares even if the underlying filesystem truncates.
6478                  */
6479                 mtime = (timespec32_t *)&args->createhow4_u.createverf;
6480                 cva.va_mtime.tv_sec = mtime->tv_sec % TIME32_MAX;
6481                 cva.va_mtime.tv_nsec = (mtime->tv_nsec / 1000) * 1000;
6482                 cva.va_mode = (mode_t)0;
6483                 vap = &cva;
6484 
6485                 /*
6486                  * For EXCL create, attrset is set to the server attr
6487                  * used to cache the client's verifier.
6488                  */
6489                 *attrset = FATTR4_TIME_MODIFY_MASK;
6490                 break;
6491         }
6492 
6493         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
6494         name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
6495             MAXPATHLEN  + 1);
6496 
6497         if (name == NULL) {
6498                 kmem_free(nm, buflen);
6499                 return (NFS4ERR_SERVERFAULT);
6500         }
6501 
6502         status = create_vnode(dvp, name, vap, args->mode, mtime,
6503             cs->cr, &vp, &created);
6504         if (nm != name)
6505                 kmem_free(name, MAXPATHLEN + 1);
6506         kmem_free(nm, buflen);
6507 
6508         if (status != NFS4_OK) {
6509                 if (ntov_table_init)
6510                         nfs4_ntov_table_free(&ntov, &sarg);
6511                 *attrset = 0;
6512                 return (status);
6513         }
6514 
6515         trunc = (setsize && !created);
6516 
6517         if (args->mode != EXCLUSIVE4) {
6518                 bitmap4 createmask = args->createhow4_u.createattrs.attrmask;
6519 
6520                 /*
6521                  * True verification that object was created with correct
6522                  * attrs is impossible.  The attrs could have been changed
6523                  * immediately after object creation.  If attributes did
6524                  * not verify, the only recourse for the server is to
6525                  * destroy the object.  Maybe if some attrs (like gid)
6526                  * are set incorrectly, the object should be destroyed;
6527                  * however, seems bad as a default policy.  Do we really
6528                  * want to destroy an object over one of the times not
6529                  * verifying correctly?  For these reasons, the server
6530                  * currently sets bits in attrset for createattrs
6531                  * that were set; however, no verification is done.
6532                  *
6533                  * vmask_to_nmask accounts for vattr bits set on create
6534                  *      [do_rfs4_set_attrs() only sets resp bits for
6535                  *       non-vattr/vfs bits.]
6536                  * Mask off any bits we set by default so as not to return
6537                  * more attrset bits than were requested in createattrs
6538                  */
6539                 if (created) {
6540                         nfs4_vmask_to_nmask(sarg.vap->va_mask, attrset);
6541                         *attrset &= createmask;
6542                 } else {
6543                         /*
6544                          * We did not create the vnode (we tried but it
6545                          * already existed).  In this case, the only createattr
6546                          * that the spec allows the server to set is size,
6547                          * and even then, it can only be set if it is 0.
6548                          */
6549                         *attrset = 0;
6550                         if (trunc)
6551                                 *attrset = FATTR4_SIZE_MASK;
6552                 }
6553         }
6554         if (ntov_table_init)
6555                 nfs4_ntov_table_free(&ntov, &sarg);
6556 
6557         /*
6558          * Get the initial "after" sequence number, if it fails,
6559          * set to zero, time to before.
6560          */
6561         iva.va_mask = AT_CTIME|AT_SEQ;
6562         if (VOP_GETATTR(dvp, &iva, 0, cs->cr, NULL)) {
6563                 iva.va_seq = 0;
6564                 iva.va_ctime = bva.va_ctime;
6565         }
6566 
6567         /*
6568          * create_vnode attempts to create the file exclusive,
6569          * if it already exists the VOP_CREATE will fail and
6570          * may not increase va_seq. It is atomic if
6571          * we haven't changed the directory, but if it has changed
6572          * we don't know what changed it.
6573          */
6574         if (!created) {
6575                 if (bva.va_seq && iva.va_seq &&
6576                     bva.va_seq == iva.va_seq)
6577                         cinfo->atomic = TRUE;
6578                 else
6579                         cinfo->atomic = FALSE;
6580                 NFS4_SET_FATTR4_CHANGE(cinfo->after, iva.va_ctime);
6581         } else {
6582                 /*
6583                  * The entry was created, we need to sync the
6584                  * directory metadata.
6585                  */
6586                 (void) VOP_FSYNC(dvp, 0, cs->cr, NULL);
6587 
6588                 /*
6589                  * Get "after" change value, if it fails, simply return the
6590                  * before value.
6591                  */
6592                 ava.va_mask = AT_CTIME|AT_SEQ;
6593                 if (VOP_GETATTR(dvp, &ava, 0, cs->cr, NULL)) {
6594                         ava.va_ctime = bva.va_ctime;
6595                         ava.va_seq = 0;
6596                 }
6597 
6598                 NFS4_SET_FATTR4_CHANGE(cinfo->after, ava.va_ctime);
6599 
6600                 /*
6601                  * The cinfo->atomic = TRUE only if we have
6602                  * non-zero va_seq's, and it has incremented by exactly one
6603                  * during the create_vnode and it didn't
6604                  * change during the VOP_FSYNC.
6605                  */
6606                 if (bva.va_seq && iva.va_seq && ava.va_seq &&
6607                     iva.va_seq == (bva.va_seq + 1) && iva.va_seq == ava.va_seq)
6608                         cinfo->atomic = TRUE;
6609                 else
6610                         cinfo->atomic = FALSE;
6611         }
6612 
6613         /* Check for mandatory locking and that the size gets set. */
6614         cva.va_mask = AT_MODE;
6615         if (setsize)
6616                 cva.va_mask |= AT_SIZE;
6617 
6618         /* Assume the worst */
6619         cs->mandlock = TRUE;
6620 
6621         if (VOP_GETATTR(vp, &cva, 0, cs->cr, NULL) == 0) {
6622                 cs->mandlock = MANDLOCK(cs->vp, cva.va_mode);
6623 
6624                 /*
6625                  * Truncate the file if necessary; this would be
6626                  * the case for create over an existing file.
6627                  */
6628 
6629                 if (trunc) {
6630                         int in_crit = 0;
6631                         rfs4_file_t *fp;
6632                         bool_t create = FALSE;
6633 
6634                         /*
6635                          * We are writing over an existing file.
6636                          * Check to see if we need to recall a delegation.
6637                          */
6638                         rfs4_hold_deleg_policy();
6639                         if ((fp = rfs4_findfile(vp, NULL, &create)) != NULL) {
6640                                 if (rfs4_check_delegated_byfp(FWRITE, fp,
6641                                     (reqsize == 0), FALSE, FALSE, &clientid)) {
6642                                         rfs4_file_rele(fp);
6643                                         rfs4_rele_deleg_policy();
6644                                         VN_RELE(vp);
6645                                         *attrset = 0;
6646                                         return (NFS4ERR_DELAY);
6647                                 }
6648                                 rfs4_file_rele(fp);
6649                         }
6650                         rfs4_rele_deleg_policy();
6651 
6652                         if (nbl_need_check(vp)) {
6653                                 in_crit = 1;
6654 
6655                                 ASSERT(reqsize == 0);
6656 
6657                                 nbl_start_crit(vp, RW_READER);
6658                                 if (nbl_conflict(vp, NBL_WRITE, 0,
6659                                     cva.va_size, 0, NULL)) {
6660                                         in_crit = 0;
6661                                         nbl_end_crit(vp);
6662                                         VN_RELE(vp);
6663                                         *attrset = 0;
6664                                         return (NFS4ERR_ACCESS);
6665                                 }
6666                         }
6667                         ct.cc_sysid = 0;
6668                         ct.cc_pid = 0;
6669                         ct.cc_caller_id = nfs4_srv_caller_id;
6670                         ct.cc_flags = CC_DONTBLOCK;
6671 
6672                         cva.va_mask = AT_SIZE;
6673                         cva.va_size = reqsize;
6674                         (void) VOP_SETATTR(vp, &cva, 0, cs->cr, &ct);
6675                         if (in_crit)
6676                                 nbl_end_crit(vp);
6677                 }
6678         }
6679 
6680         error = makefh4(&cs->fh, vp, cs->exi);
6681 
6682         /*
6683          * Force modified data and metadata out to stable storage.
6684          */
6685         (void) VOP_FSYNC(vp, FNODSYNC, cs->cr, NULL);
6686 
6687         if (error) {
6688                 VN_RELE(vp);
6689                 *attrset = 0;
6690                 return (puterrno4(error));
6691         }
6692 
6693         /* if parent dir is attrdir, set namedattr fh flag */
6694         if (dvp->v_flag & V_XATTRDIR)
6695                 set_fh4_flag(&cs->fh, FH4_NAMEDATTR);
6696 
6697         if (cs->vp)
6698                 VN_RELE(cs->vp);
6699 
6700         cs->vp = vp;
6701 
6702         /*
6703          * if we did not create the file, we will need to check
6704          * the access bits on the file
6705          */
6706 
6707         if (!created) {
6708                 if (setsize)
6709                         args->share_access |= OPEN4_SHARE_ACCESS_WRITE;
6710                 status = check_open_access(args->share_access, cs, req);
6711                 if (status != NFS4_OK)
6712                         *attrset = 0;
6713         }
6714         return (status);
6715 }
6716 
6717 /*ARGSUSED*/
6718 static void
6719 rfs4_do_open(struct compound_state *cs, struct svc_req *req,
6720     rfs4_openowner_t *oo, delegreq_t deleg,
6721     uint32_t access, uint32_t deny,
6722     OPEN4res *resp, int deleg_cur)
6723 {
6724         /* XXX Currently not using req  */
6725         rfs4_state_t *sp;
6726         rfs4_file_t *fp;
6727         bool_t screate = TRUE;
6728         bool_t fcreate = TRUE;
6729         uint32_t open_a, share_a;
6730         uint32_t open_d, share_d;
6731         rfs4_deleg_state_t *dsp;
6732         sysid_t sysid;
6733         nfsstat4 status;
6734         caller_context_t ct;
6735         int fflags = 0;
6736         int recall = 0;
6737         int err;
6738         int first_open;
6739 
6740         /* get the file struct and hold a lock on it during initial open */
6741         fp = rfs4_findfile_withlock(cs->vp, &cs->fh, &fcreate);
6742         if (fp == NULL) {
6743                 resp->status = NFS4ERR_RESOURCE;
6744                 DTRACE_PROBE1(nfss__e__do__open1, nfsstat4, resp->status);
6745                 return;
6746         }
6747 
6748         sp = rfs4_findstate_by_owner_file(oo, fp, &screate);
6749         if (sp == NULL) {
6750                 resp->status = NFS4ERR_RESOURCE;
6751                 DTRACE_PROBE1(nfss__e__do__open2, nfsstat4, resp->status);
6752                 /* No need to keep any reference */
6753                 rw_exit(&fp->rf_file_rwlock);
6754                 rfs4_file_rele(fp);
6755                 return;
6756         }
6757 
6758         /* try to get the sysid before continuing */
6759         if ((status = rfs4_client_sysid(oo->ro_client, &sysid)) != NFS4_OK) {
6760                 resp->status = status;
6761                 rfs4_file_rele(fp);
6762                 /* Not a fully formed open; "close" it */
6763                 if (screate == TRUE)
6764                         rfs4_state_close(sp, FALSE, FALSE, cs->cr);
6765                 rfs4_state_rele(sp);
6766                 return;
6767         }
6768 
6769         /* Calculate the fflags for this OPEN. */
6770         if (access & OPEN4_SHARE_ACCESS_READ)
6771                 fflags |= FREAD;
6772         if (access & OPEN4_SHARE_ACCESS_WRITE)
6773                 fflags |= FWRITE;
6774 
6775         rfs4_dbe_lock(sp->rs_dbe);
6776 
6777         /*
6778          * Calculate the new deny and access mode that this open is adding to
6779          * the file for this open owner;
6780          */
6781         open_d = (deny & ~sp->rs_open_deny);
6782         open_a = (access & ~sp->rs_open_access);
6783 
6784         /*
6785          * Calculate the new share access and share deny modes that this open
6786          * is adding to the file for this open owner;
6787          */
6788         share_a = (access & ~sp->rs_share_access);
6789         share_d = (deny & ~sp->rs_share_deny);
6790 
6791         first_open = (sp->rs_open_access & OPEN4_SHARE_ACCESS_BOTH) == 0;
6792 
6793         /*
6794          * Check to see the client has already sent an open for this
6795          * open owner on this file with the same share/deny modes.
6796          * If so, we don't need to check for a conflict and we don't
6797          * need to add another shrlock.  If not, then we need to
6798          * check for conflicts in deny and access before checking for
6799          * conflicts in delegation.  We don't want to recall a
6800          * delegation based on an open that will eventually fail based
6801          * on shares modes.
6802          */
6803 
6804         if (share_a || share_d) {
6805                 if ((err = rfs4_share(sp, access, deny)) != 0) {
6806                         rfs4_dbe_unlock(sp->rs_dbe);
6807                         resp->status = err;
6808 
6809                         rfs4_file_rele(fp);
6810                         /* Not a fully formed open; "close" it */
6811                         if (screate == TRUE)
6812                                 rfs4_state_close(sp, FALSE, FALSE, cs->cr);
6813                         rfs4_state_rele(sp);
6814                         return;
6815                 }
6816         }
6817 
6818         rfs4_dbe_lock(fp->rf_dbe);
6819 
6820         /*
6821          * Check to see if this file is delegated and if so, if a
6822          * recall needs to be done.
6823          */
6824         if (rfs4_check_recall(sp, access)) {
6825                 rfs4_dbe_unlock(fp->rf_dbe);
6826                 rfs4_dbe_unlock(sp->rs_dbe);
6827                 rfs4_recall_deleg(fp, FALSE, sp->rs_owner->ro_client);
6828                 delay(NFS4_DELEGATION_CONFLICT_DELAY);
6829                 rfs4_dbe_lock(sp->rs_dbe);
6830 
6831                 /* if state closed while lock was dropped */
6832                 if (sp->rs_closed) {
6833                         if (share_a || share_d)
6834                                 (void) rfs4_unshare(sp);
6835                         rfs4_dbe_unlock(sp->rs_dbe);
6836                         rfs4_file_rele(fp);
6837                         /* Not a fully formed open; "close" it */
6838                         if (screate == TRUE)
6839                                 rfs4_state_close(sp, FALSE, FALSE, cs->cr);
6840                         rfs4_state_rele(sp);
6841                         resp->status = NFS4ERR_OLD_STATEID;
6842                         return;
6843                 }
6844 
6845                 rfs4_dbe_lock(fp->rf_dbe);
6846                 /* Let's see if the delegation was returned */
6847                 if (rfs4_check_recall(sp, access)) {
6848                         rfs4_dbe_unlock(fp->rf_dbe);
6849                         if (share_a || share_d)
6850                                 (void) rfs4_unshare(sp);
6851                         rfs4_dbe_unlock(sp->rs_dbe);
6852                         rfs4_file_rele(fp);
6853                         rfs4_update_lease(sp->rs_owner->ro_client);
6854 
6855                         /* Not a fully formed open; "close" it */
6856                         if (screate == TRUE)
6857                                 rfs4_state_close(sp, FALSE, FALSE, cs->cr);
6858                         rfs4_state_rele(sp);
6859                         resp->status = NFS4ERR_DELAY;
6860                         return;
6861                 }
6862         }
6863         /*
6864          * the share check passed and any delegation conflict has been
6865          * taken care of, now call vop_open.
6866          * if this is the first open then call vop_open with fflags.
6867          * if not, call vn_open_upgrade with just the upgrade flags.
6868          *
6869          * if the file has been opened already, it will have the current
6870          * access mode in the state struct.  if it has no share access, then
6871          * this is a new open.
6872          *
6873          * However, if this is open with CLAIM_DLEGATE_CUR, then don't
6874          * call VOP_OPEN(), just do the open upgrade.
6875          */
6876         if (first_open && !deleg_cur) {
6877                 ct.cc_sysid = sysid;
6878                 ct.cc_pid = rfs4_dbe_getid(sp->rs_owner->ro_dbe);
6879                 ct.cc_caller_id = nfs4_srv_caller_id;
6880                 ct.cc_flags = CC_DONTBLOCK;
6881                 err = VOP_OPEN(&cs->vp, fflags, cs->cr, &ct);
6882                 if (err) {
6883                         rfs4_dbe_unlock(fp->rf_dbe);
6884                         if (share_a || share_d)
6885                                 (void) rfs4_unshare(sp);
6886                         rfs4_dbe_unlock(sp->rs_dbe);
6887                         rfs4_file_rele(fp);
6888 
6889                         /* Not a fully formed open; "close" it */
6890                         if (screate == TRUE)
6891                                 rfs4_state_close(sp, FALSE, FALSE, cs->cr);
6892                         rfs4_state_rele(sp);
6893                         /* check if a monitor detected a delegation conflict */
6894                         if (err == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK))
6895                                 resp->status = NFS4ERR_DELAY;
6896                         else
6897                                 resp->status = NFS4ERR_SERVERFAULT;
6898                         return;
6899                 }
6900         } else { /* open upgrade */
6901                 /*
6902                  * calculate the fflags for the new mode that is being added
6903                  * by this upgrade.
6904                  */
6905                 fflags = 0;
6906                 if (open_a & OPEN4_SHARE_ACCESS_READ)
6907                         fflags |= FREAD;
6908                 if (open_a & OPEN4_SHARE_ACCESS_WRITE)
6909                         fflags |= FWRITE;
6910                 vn_open_upgrade(cs->vp, fflags);
6911         }
6912         sp->rs_open_access |= access;
6913         sp->rs_open_deny |= deny;
6914 
6915         if (open_d & OPEN4_SHARE_DENY_READ)
6916                 fp->rf_deny_read++;
6917         if (open_d & OPEN4_SHARE_DENY_WRITE)
6918                 fp->rf_deny_write++;
6919         fp->rf_share_deny |= deny;
6920 
6921         if (open_a & OPEN4_SHARE_ACCESS_READ)
6922                 fp->rf_access_read++;
6923         if (open_a & OPEN4_SHARE_ACCESS_WRITE)
6924                 fp->rf_access_write++;
6925         fp->rf_share_access |= access;
6926 
6927         /*
6928          * Check for delegation here. if the deleg argument is not
6929          * DELEG_ANY, then this is a reclaim from a client and
6930          * we must honor the delegation requested. If necessary we can
6931          * set the recall flag.
6932          */
6933 
6934         dsp = rfs4_grant_delegation(deleg, sp, &recall);
6935 
6936         cs->deleg = (fp->rf_dinfo.rd_dtype == OPEN_DELEGATE_WRITE);
6937 
6938         next_stateid(&sp->rs_stateid);
6939 
6940         resp->stateid = sp->rs_stateid.stateid;
6941 
6942         rfs4_dbe_unlock(fp->rf_dbe);
6943         rfs4_dbe_unlock(sp->rs_dbe);
6944 
6945         if (dsp) {
6946                 rfs4_set_deleg_response(dsp, &resp->delegation, NULL, recall);
6947                 rfs4_deleg_state_rele(dsp);
6948         }
6949 
6950         rfs4_file_rele(fp);
6951         rfs4_state_rele(sp);
6952 
6953         resp->status = NFS4_OK;
6954 }
6955 
6956 /*ARGSUSED*/
6957 static void
6958 rfs4_do_opennull(struct compound_state *cs, struct svc_req *req,
6959     OPEN4args *args, rfs4_openowner_t *oo, OPEN4res *resp)
6960 {
6961         change_info4 *cinfo = &resp->cinfo;
6962         bitmap4 *attrset = &resp->attrset;
6963 
6964         if (args->opentype == OPEN4_NOCREATE)
6965                 resp->status = rfs4_lookupfile(&args->open_claim4_u.file,
6966                     req, cs, args->share_access, cinfo);
6967         else {
6968                 /* inhibit delegation grants during exclusive create */
6969 
6970                 if (args->mode == EXCLUSIVE4)
6971                         rfs4_disable_delegation();
6972 
6973                 resp->status = rfs4_createfile(args, req, cs, cinfo, attrset,
6974                     oo->ro_client->rc_clientid);
6975         }
6976 
6977         if (resp->status == NFS4_OK) {
6978 
6979                 /* cs->vp cs->fh now reference the desired file */
6980 
6981                 rfs4_do_open(cs, req, oo,
6982                     oo->ro_need_confirm ? DELEG_NONE : DELEG_ANY,
6983                     args->share_access, args->share_deny, resp, 0);
6984 
6985                 /*
6986                  * If rfs4_createfile set attrset, we must
6987                  * clear this attrset before the response is copied.
6988                  */
6989                 if (resp->status != NFS4_OK && resp->attrset) {
6990                         resp->attrset = 0;
6991                 }
6992         }
6993         else
6994                 *cs->statusp = resp->status;
6995 
6996         if (args->mode == EXCLUSIVE4)
6997                 rfs4_enable_delegation();
6998 }
6999 
7000 /*ARGSUSED*/
7001 static void
7002 rfs4_do_openprev(struct compound_state *cs, struct svc_req *req,
7003     OPEN4args *args, rfs4_openowner_t *oo, OPEN4res *resp)
7004 {
7005         change_info4 *cinfo = &resp->cinfo;
7006         vattr_t va;
7007         vtype_t v_type = cs->vp->v_type;
7008         int error = 0;
7009 
7010         /* Verify that we have a regular file */
7011         if (v_type != VREG) {
7012                 if (v_type == VDIR)
7013                         resp->status = NFS4ERR_ISDIR;
7014                 else if (v_type == VLNK)
7015                         resp->status = NFS4ERR_SYMLINK;
7016                 else
7017                         resp->status = NFS4ERR_INVAL;
7018                 return;
7019         }
7020 
7021         va.va_mask = AT_MODE|AT_UID;
7022         error = VOP_GETATTR(cs->vp, &va, 0, cs->cr, NULL);
7023         if (error) {
7024                 resp->status = puterrno4(error);
7025                 return;
7026         }
7027 
7028         cs->mandlock = MANDLOCK(cs->vp, va.va_mode);
7029 
7030         /*
7031          * Check if we have access to the file, Note the the file
7032          * could have originally been open UNCHECKED or GUARDED
7033          * with mode bits that will now fail, but there is nothing
7034          * we can really do about that except in the case that the
7035          * owner of the file is the one requesting the open.
7036          */
7037         if (crgetuid(cs->cr) != va.va_uid) {
7038                 resp->status = check_open_access(args->share_access, cs, req);
7039                 if (resp->status != NFS4_OK) {
7040                         return;
7041                 }
7042         }
7043 
7044         /*
7045          * cinfo on a CLAIM_PREVIOUS is undefined, initialize to zero
7046          */
7047         cinfo->before = 0;
7048         cinfo->after = 0;
7049         cinfo->atomic = FALSE;
7050 
7051         rfs4_do_open(cs, req, oo,
7052             NFS4_DELEG4TYPE2REQTYPE(args->open_claim4_u.delegate_type),
7053             args->share_access, args->share_deny, resp, 0);
7054 }
7055 
7056 static void
7057 rfs4_do_opendelcur(struct compound_state *cs, struct svc_req *req,
7058     OPEN4args *args, rfs4_openowner_t *oo, OPEN4res *resp)
7059 {
7060         int error;
7061         nfsstat4 status;
7062         stateid4 stateid =
7063             args->open_claim4_u.delegate_cur_info.delegate_stateid;
7064         rfs4_deleg_state_t *dsp;
7065 
7066         /*
7067          * Find the state info from the stateid and confirm that the
7068          * file is delegated.  If the state openowner is the same as
7069          * the supplied openowner we're done. If not, get the file
7070          * info from the found state info. Use that file info to
7071          * create the state for this lock owner. Note solaris doen't
7072          * really need the pathname to find the file. We may want to
7073          * lookup the pathname and make sure that the vp exist and
7074          * matches the vp in the file structure. However it is
7075          * possible that the pathname nolonger exists (local process
7076          * unlinks the file), so this may not be that useful.
7077          */
7078 
7079         status = rfs4_get_deleg_state(&stateid, &dsp);
7080         if (status != NFS4_OK) {
7081                 resp->status = status;
7082                 return;
7083         }
7084 
7085         ASSERT(dsp->rds_finfo->rf_dinfo.rd_dtype != OPEN_DELEGATE_NONE);
7086 
7087         /*
7088          * New lock owner, create state. Since this was probably called
7089          * in response to a CB_RECALL we set deleg to DELEG_NONE
7090          */
7091 
7092         ASSERT(cs->vp != NULL);
7093         VN_RELE(cs->vp);
7094         VN_HOLD(dsp->rds_finfo->rf_vp);
7095         cs->vp = dsp->rds_finfo->rf_vp;
7096 
7097         if (error = makefh4(&cs->fh, cs->vp, cs->exi)) {
7098                 rfs4_deleg_state_rele(dsp);
7099                 *cs->statusp = resp->status = puterrno4(error);
7100                 return;
7101         }
7102 
7103         /* Mark progress for delegation returns */
7104         dsp->rds_finfo->rf_dinfo.rd_time_lastwrite = gethrestime_sec();
7105         rfs4_deleg_state_rele(dsp);
7106         rfs4_do_open(cs, req, oo, DELEG_NONE,
7107             args->share_access, args->share_deny, resp, 1);
7108 }
7109 
7110 /*ARGSUSED*/
7111 static void
7112 rfs4_do_opendelprev(struct compound_state *cs, struct svc_req *req,
7113     OPEN4args *args, rfs4_openowner_t *oo, OPEN4res *resp)
7114 {
7115         /*
7116          * Lookup the pathname, it must already exist since this file
7117          * was delegated.
7118          *
7119          * Find the file and state info for this vp and open owner pair.
7120          *      check that they are in fact delegated.
7121          *      check that the state access and deny modes are the same.
7122          *
7123          * Return the delgation possibly seting the recall flag.
7124          */
7125         rfs4_file_t *fp;
7126         rfs4_state_t *sp;
7127         bool_t create = FALSE;
7128         bool_t dcreate = FALSE;
7129         rfs4_deleg_state_t *dsp;
7130         nfsace4 *ace;
7131 
7132         /* Note we ignore oflags */
7133         resp->status = rfs4_lookupfile(&args->open_claim4_u.file_delegate_prev,
7134             req, cs, args->share_access, &resp->cinfo);
7135 
7136         if (resp->status != NFS4_OK) {
7137                 return;
7138         }
7139 
7140         /* get the file struct and hold a lock on it during initial open */
7141         fp = rfs4_findfile_withlock(cs->vp, NULL, &create);
7142         if (fp == NULL) {
7143                 resp->status = NFS4ERR_RESOURCE;
7144                 DTRACE_PROBE1(nfss__e__do_opendelprev1, nfsstat4, resp->status);
7145                 return;
7146         }
7147 
7148         sp = rfs4_findstate_by_owner_file(oo, fp, &create);
7149         if (sp == NULL) {
7150                 resp->status = NFS4ERR_SERVERFAULT;
7151                 DTRACE_PROBE1(nfss__e__do_opendelprev2, nfsstat4, resp->status);
7152                 rw_exit(&fp->rf_file_rwlock);
7153                 rfs4_file_rele(fp);
7154                 return;
7155         }
7156 
7157         rfs4_dbe_lock(sp->rs_dbe);
7158         rfs4_dbe_lock(fp->rf_dbe);
7159         if (args->share_access != sp->rs_share_access ||
7160             args->share_deny != sp->rs_share_deny ||
7161             sp->rs_finfo->rf_dinfo.rd_dtype == OPEN_DELEGATE_NONE) {
7162                 NFS4_DEBUG(rfs4_debug,
7163                     (CE_NOTE, "rfs4_do_opendelprev: state mixup"));
7164                 rfs4_dbe_unlock(fp->rf_dbe);
7165                 rfs4_dbe_unlock(sp->rs_dbe);
7166                 rfs4_file_rele(fp);
7167                 rfs4_state_rele(sp);
7168                 resp->status = NFS4ERR_SERVERFAULT;
7169                 return;
7170         }
7171         rfs4_dbe_unlock(fp->rf_dbe);
7172         rfs4_dbe_unlock(sp->rs_dbe);
7173 
7174         dsp = rfs4_finddeleg(sp, &dcreate);
7175         if (dsp == NULL) {
7176                 rfs4_state_rele(sp);
7177                 rfs4_file_rele(fp);
7178                 resp->status = NFS4ERR_SERVERFAULT;
7179                 return;
7180         }
7181 
7182         next_stateid(&sp->rs_stateid);
7183 
7184         resp->stateid = sp->rs_stateid.stateid;
7185 
7186         resp->delegation.delegation_type = dsp->rds_dtype;
7187 
7188         if (dsp->rds_dtype == OPEN_DELEGATE_READ) {
7189                 open_read_delegation4 *rv =
7190                     &resp->delegation.open_delegation4_u.read;
7191 
7192                 rv->stateid = dsp->rds_delegid.stateid;
7193                 rv->recall = FALSE; /* no policy in place to set to TRUE */
7194                 ace = &rv->permissions;
7195         } else {
7196                 open_write_delegation4 *rv =
7197                     &resp->delegation.open_delegation4_u.write;
7198 
7199                 rv->stateid = dsp->rds_delegid.stateid;
7200                 rv->recall = FALSE;  /* no policy in place to set to TRUE */
7201                 ace = &rv->permissions;
7202                 rv->space_limit.limitby = NFS_LIMIT_SIZE;
7203                 rv->space_limit.nfs_space_limit4_u.filesize = UINT64_MAX;
7204         }
7205 
7206         /* XXX For now */
7207         ace->type = ACE4_ACCESS_ALLOWED_ACE_TYPE;
7208         ace->flag = 0;
7209         ace->access_mask = 0;
7210         ace->who.utf8string_len = 0;
7211         ace->who.utf8string_val = 0;
7212 
7213         rfs4_deleg_state_rele(dsp);
7214         rfs4_state_rele(sp);
7215         rfs4_file_rele(fp);
7216 }
7217 
7218 typedef enum {
7219         NFS4_CHKSEQ_OKAY = 0,
7220         NFS4_CHKSEQ_REPLAY = 1,
7221         NFS4_CHKSEQ_BAD = 2
7222 } rfs4_chkseq_t;
7223 
7224 /*
7225  * Generic function for sequence number checks.
7226  */
7227 static rfs4_chkseq_t
7228 rfs4_check_seqid(seqid4 seqid, nfs_resop4 *lastop,
7229     seqid4 rqst_seq, nfs_resop4 *resop, bool_t copyres)
7230 {
7231         /* Same sequence ids and matching operations? */
7232         if (seqid == rqst_seq && resop->resop == lastop->resop) {
7233                 if (copyres == TRUE) {
7234                         rfs4_free_reply(resop);
7235                         rfs4_copy_reply(resop, lastop);
7236                 }
7237                 NFS4_DEBUG(rfs4_debug, (CE_NOTE,
7238                     "Replayed SEQID %d\n", seqid));
7239                 return (NFS4_CHKSEQ_REPLAY);
7240         }
7241 
7242         /* If the incoming sequence is not the next expected then it is bad */
7243         if (rqst_seq != seqid + 1) {
7244                 if (rqst_seq == seqid) {
7245                         NFS4_DEBUG(rfs4_debug,
7246                             (CE_NOTE, "BAD SEQID: Replayed sequence id "
7247                             "but last op was %d current op is %d\n",
7248                             lastop->resop, resop->resop));
7249                         return (NFS4_CHKSEQ_BAD);
7250                 }
7251                 NFS4_DEBUG(rfs4_debug,
7252                     (CE_NOTE, "BAD SEQID: got %u expecting %u\n",
7253                     rqst_seq, seqid));
7254                 return (NFS4_CHKSEQ_BAD);
7255         }
7256 
7257         /* Everything okay -- next expected */
7258         return (NFS4_CHKSEQ_OKAY);
7259 }
7260 
7261 
7262 static rfs4_chkseq_t
7263 rfs4_check_open_seqid(seqid4 seqid, rfs4_openowner_t *op, nfs_resop4 *resop)
7264 {
7265         rfs4_chkseq_t rc;
7266 
7267         rfs4_dbe_lock(op->ro_dbe);
7268         rc = rfs4_check_seqid(op->ro_open_seqid, &op->ro_reply, seqid, resop,
7269             TRUE);
7270         rfs4_dbe_unlock(op->ro_dbe);
7271 
7272         if (rc == NFS4_CHKSEQ_OKAY)
7273                 rfs4_update_lease(op->ro_client);
7274 
7275         return (rc);
7276 }
7277 
7278 static rfs4_chkseq_t
7279 rfs4_check_olo_seqid(seqid4 olo_seqid, rfs4_openowner_t *op, nfs_resop4 *resop)
7280 {
7281         rfs4_chkseq_t rc;
7282 
7283         rfs4_dbe_lock(op->ro_dbe);
7284         rc = rfs4_check_seqid(op->ro_open_seqid, &op->ro_reply,
7285             olo_seqid, resop, FALSE);
7286         rfs4_dbe_unlock(op->ro_dbe);
7287 
7288         return (rc);
7289 }
7290 
7291 static rfs4_chkseq_t
7292 rfs4_check_lock_seqid(seqid4 seqid, rfs4_lo_state_t *lsp, nfs_resop4 *resop)
7293 {
7294         rfs4_chkseq_t rc = NFS4_CHKSEQ_OKAY;
7295 
7296         rfs4_dbe_lock(lsp->rls_dbe);
7297         if (!lsp->rls_skip_seqid_check)
7298                 rc = rfs4_check_seqid(lsp->rls_seqid, &lsp->rls_reply, seqid,
7299                     resop, TRUE);
7300         rfs4_dbe_unlock(lsp->rls_dbe);
7301 
7302         return (rc);
7303 }
7304 
7305 static void
7306 rfs4_op_open(nfs_argop4 *argop, nfs_resop4 *resop,
7307     struct svc_req *req, struct compound_state *cs)
7308 {
7309         OPEN4args *args = &argop->nfs_argop4_u.opopen;
7310         OPEN4res *resp = &resop->nfs_resop4_u.opopen;
7311         open_owner4 *owner = &args->owner;
7312         open_claim_type4 claim = args->claim;
7313         rfs4_client_t *cp;
7314         rfs4_openowner_t *oo;
7315         bool_t create;
7316         bool_t replay = FALSE;
7317         int can_reclaim;
7318 
7319         DTRACE_NFSV4_2(op__open__start, struct compound_state *, cs,
7320             OPEN4args *, args);
7321 
7322         if (cs->vp == NULL) {
7323                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
7324                 goto end;
7325         }
7326 
7327         /*
7328          * Need to check clientid and lease expiration first based on
7329          * error ordering and incrementing sequence id.
7330          */
7331         cp = rfs4_findclient_by_id(owner->clientid, FALSE);
7332         if (cp == NULL) {
7333                 *cs->statusp = resp->status =
7334                     rfs4_check_clientid(&owner->clientid, 0);
7335                 goto end;
7336         }
7337 
7338         if (rfs4_lease_expired(cp)) {
7339                 rfs4_client_close(cp);
7340                 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
7341                 goto end;
7342         }
7343         can_reclaim = cp->rc_can_reclaim;
7344 
7345         /*
7346          * Find the open_owner for use from this point forward.  Take
7347          * care in updating the sequence id based on the type of error
7348          * being returned.
7349          */
7350 retry:
7351         create = TRUE;
7352         oo = rfs4_findopenowner(owner, &create, args->seqid);
7353         if (oo == NULL) {
7354                 *cs->statusp = resp->status = NFS4ERR_RESOURCE;
7355                 rfs4_client_rele(cp);
7356                 goto end;
7357         }
7358 
7359         /* Hold off access to the sequence space while the open is done */
7360         rfs4_sw_enter(&oo->ro_sw);
7361 
7362         /*
7363          * If the open_owner existed before at the server, then check
7364          * the sequence id.
7365          */
7366         if (!create && !oo->ro_postpone_confirm) {
7367                 switch (rfs4_check_open_seqid(args->seqid, oo, resop)) {
7368                 case NFS4_CHKSEQ_BAD:
7369                         if ((args->seqid > oo->ro_open_seqid) &&
7370                             oo->ro_need_confirm) {
7371                                 rfs4_free_opens(oo, TRUE, FALSE);
7372                                 rfs4_sw_exit(&oo->ro_sw);
7373                                 rfs4_openowner_rele(oo);
7374                                 goto retry;
7375                         }
7376                         resp->status = NFS4ERR_BAD_SEQID;
7377                         goto out;
7378                 case NFS4_CHKSEQ_REPLAY: /* replay of previous request */
7379                         replay = TRUE;
7380                         goto out;
7381                 default:
7382                         break;
7383                 }
7384 
7385                 /*
7386                  * Sequence was ok and open owner exists
7387                  * check to see if we have yet to see an
7388                  * open_confirm.
7389                  */
7390                 if (oo->ro_need_confirm) {
7391                         rfs4_free_opens(oo, TRUE, FALSE);
7392                         rfs4_sw_exit(&oo->ro_sw);
7393                         rfs4_openowner_rele(oo);
7394                         goto retry;
7395                 }
7396         }
7397         /* Grace only applies to regular-type OPENs */
7398         if (rfs4_clnt_in_grace(cp) &&
7399             (claim == CLAIM_NULL || claim == CLAIM_DELEGATE_CUR)) {
7400                 *cs->statusp = resp->status = NFS4ERR_GRACE;
7401                 goto out;
7402         }
7403 
7404         /*
7405          * If previous state at the server existed then can_reclaim
7406          * will be set. If not reply NFS4ERR_NO_GRACE to the
7407          * client.
7408          */
7409         if (rfs4_clnt_in_grace(cp) && claim == CLAIM_PREVIOUS && !can_reclaim) {
7410                 *cs->statusp = resp->status = NFS4ERR_NO_GRACE;
7411                 goto out;
7412         }
7413 
7414 
7415         /*
7416          * Reject the open if the client has missed the grace period
7417          */
7418         if (!rfs4_clnt_in_grace(cp) && claim == CLAIM_PREVIOUS) {
7419                 *cs->statusp = resp->status = NFS4ERR_NO_GRACE;
7420                 goto out;
7421         }
7422 
7423         /* Couple of up-front bookkeeping items */
7424         if (oo->ro_need_confirm) {
7425                 /*
7426                  * If this is a reclaim OPEN then we should not ask
7427                  * for a confirmation of the open_owner per the
7428                  * protocol specification.
7429                  */
7430                 if (claim == CLAIM_PREVIOUS)
7431                         oo->ro_need_confirm = FALSE;
7432                 else
7433                         resp->rflags |= OPEN4_RESULT_CONFIRM;
7434         }
7435         resp->rflags |= OPEN4_RESULT_LOCKTYPE_POSIX;
7436 
7437         /*
7438          * If there is an unshared filesystem mounted on this vnode,
7439          * do not allow to open/create in this directory.
7440          */
7441         if (vn_ismntpt(cs->vp)) {
7442                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
7443                 goto out;
7444         }
7445 
7446         /*
7447          * access must READ, WRITE, or BOTH.  No access is invalid.
7448          * deny can be READ, WRITE, BOTH, or NONE.
7449          * bits not defined for access/deny are invalid.
7450          */
7451         if (! (args->share_access & OPEN4_SHARE_ACCESS_BOTH) ||
7452             (args->share_access & ~OPEN4_SHARE_ACCESS_BOTH) ||
7453             (args->share_deny & ~OPEN4_SHARE_DENY_BOTH)) {
7454                 *cs->statusp = resp->status = NFS4ERR_INVAL;
7455                 goto out;
7456         }
7457 
7458 
7459         /*
7460          * make sure attrset is zero before response is built.
7461          */
7462         resp->attrset = 0;
7463 
7464         switch (claim) {
7465         case CLAIM_NULL:
7466                 rfs4_do_opennull(cs, req, args, oo, resp);
7467                 break;
7468         case CLAIM_PREVIOUS:
7469                 rfs4_do_openprev(cs, req, args, oo, resp);
7470                 break;
7471         case CLAIM_DELEGATE_CUR:
7472                 rfs4_do_opendelcur(cs, req, args, oo, resp);
7473                 break;
7474         case CLAIM_DELEGATE_PREV:
7475                 rfs4_do_opendelprev(cs, req, args, oo, resp);
7476                 break;
7477         default:
7478                 resp->status = NFS4ERR_INVAL;
7479                 break;
7480         }
7481 
7482 out:
7483         rfs4_client_rele(cp);
7484 
7485         /* Catch sequence id handling here to make it a little easier */
7486         switch (resp->status) {
7487         case NFS4ERR_BADXDR:
7488         case NFS4ERR_BAD_SEQID:
7489         case NFS4ERR_BAD_STATEID:
7490         case NFS4ERR_NOFILEHANDLE:
7491         case NFS4ERR_RESOURCE:
7492         case NFS4ERR_STALE_CLIENTID:
7493         case NFS4ERR_STALE_STATEID:
7494                 /*
7495                  * The protocol states that if any of these errors are
7496                  * being returned, the sequence id should not be
7497                  * incremented.  Any other return requires an
7498                  * increment.
7499                  */
7500                 break;
7501         default:
7502                 /* Always update the lease in this case */
7503                 rfs4_update_lease(oo->ro_client);
7504 
7505                 /* Regular response - copy the result */
7506                 if (!replay)
7507                         rfs4_update_open_resp(oo, resop, &cs->fh);
7508 
7509                 /*
7510                  * REPLAY case: Only if the previous response was OK
7511                  * do we copy the filehandle.  If not OK, no
7512                  * filehandle to copy.
7513                  */
7514                 if (replay == TRUE &&
7515                     resp->status == NFS4_OK &&
7516                     oo->ro_reply_fh.nfs_fh4_val) {
7517                         /*
7518                          * If this is a replay, we must restore the
7519                          * current filehandle/vp to that of what was
7520                          * returned originally.  Try our best to do
7521                          * it.
7522                          */
7523                         nfs_fh4_fmt_t *fh_fmtp =
7524                             (nfs_fh4_fmt_t *)oo->ro_reply_fh.nfs_fh4_val;
7525 
7526                         if (cs->exi)
7527                                 exi_rele(cs->exi);
7528                         cs->exi = checkexport(&fh_fmtp->fh4_fsid,
7529                             (fid_t *)&fh_fmtp->fh4_xlen, NULL);
7530 
7531                         if (cs->exi == NULL) {
7532                                 resp->status = NFS4ERR_STALE;
7533                                 goto finish;
7534                         }
7535 
7536                         VN_RELE(cs->vp);
7537 
7538                         cs->vp = nfs4_fhtovp(&oo->ro_reply_fh, cs->exi,
7539                             &resp->status);
7540 
7541                         if (cs->vp == NULL)
7542                                 goto finish;
7543 
7544                         nfs_fh4_copy(&oo->ro_reply_fh, &cs->fh);
7545                 }
7546 
7547                 /*
7548                  * If this was a replay, no need to update the
7549                  * sequence id. If the open_owner was not created on
7550                  * this pass, then update.  The first use of an
7551                  * open_owner will not bump the sequence id.
7552                  */
7553                 if (replay == FALSE && !create)
7554                         rfs4_update_open_sequence(oo);
7555                 /*
7556                  * If the client is receiving an error and the
7557                  * open_owner needs to be confirmed, there is no way
7558                  * to notify the client of this fact ignoring the fact
7559                  * that the server has no method of returning a
7560                  * stateid to confirm.  Therefore, the server needs to
7561                  * mark this open_owner in a way as to avoid the
7562                  * sequence id checking the next time the client uses
7563                  * this open_owner.
7564                  */
7565                 if (resp->status != NFS4_OK && oo->ro_need_confirm)
7566                         oo->ro_postpone_confirm = TRUE;
7567                 /*
7568                  * If OK response then clear the postpone flag and
7569                  * reset the sequence id to keep in sync with the
7570                  * client.
7571                  */
7572                 if (resp->status == NFS4_OK && oo->ro_postpone_confirm) {
7573                         oo->ro_postpone_confirm = FALSE;
7574                         oo->ro_open_seqid = args->seqid;
7575                 }
7576                 break;
7577         }
7578 
7579 finish:
7580         *cs->statusp = resp->status;
7581 
7582         rfs4_sw_exit(&oo->ro_sw);
7583         rfs4_openowner_rele(oo);
7584 
7585 end:
7586         DTRACE_NFSV4_2(op__open__done, struct compound_state *, cs,
7587             OPEN4res *, resp);
7588 }
7589 
7590 /*ARGSUSED*/
7591 void
7592 rfs4_op_open_confirm(nfs_argop4 *argop, nfs_resop4 *resop,
7593     struct svc_req *req, struct compound_state *cs)
7594 {
7595         OPEN_CONFIRM4args *args = &argop->nfs_argop4_u.opopen_confirm;
7596         OPEN_CONFIRM4res *resp = &resop->nfs_resop4_u.opopen_confirm;
7597         rfs4_state_t *sp;
7598         nfsstat4 status;
7599 
7600         DTRACE_NFSV4_2(op__open__confirm__start, struct compound_state *, cs,
7601             OPEN_CONFIRM4args *, args);
7602 
7603         if (cs->vp == NULL) {
7604                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
7605                 goto out;
7606         }
7607 
7608         if (cs->vp->v_type != VREG) {
7609                 *cs->statusp = resp->status =
7610                     cs->vp->v_type == VDIR ? NFS4ERR_ISDIR : NFS4ERR_INVAL;
7611                 return;
7612         }
7613 
7614         status = rfs4_get_state(&args->open_stateid, &sp, RFS4_DBS_VALID);
7615         if (status != NFS4_OK) {
7616                 *cs->statusp = resp->status = status;
7617                 goto out;
7618         }
7619 
7620         /* Ensure specified filehandle matches */
7621         if (cs->vp != sp->rs_finfo->rf_vp) {
7622                 rfs4_state_rele(sp);
7623                 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7624                 goto out;
7625         }
7626 
7627         /* hold off other access to open_owner while we tinker */
7628         rfs4_sw_enter(&sp->rs_owner->ro_sw);
7629 
7630         switch (rfs4_check_stateid_seqid(sp, &args->open_stateid)) {
7631         case NFS4_CHECK_STATEID_OKAY:
7632                 if (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
7633                     resop) != 0) {
7634                         *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
7635                         break;
7636                 }
7637                 /*
7638                  * If it is the appropriate stateid and determined to
7639                  * be "OKAY" then this means that the stateid does not
7640                  * need to be confirmed and the client is in error for
7641                  * sending an OPEN_CONFIRM.
7642                  */
7643                 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7644                 break;
7645         case NFS4_CHECK_STATEID_OLD:
7646                 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
7647                 break;
7648         case NFS4_CHECK_STATEID_BAD:
7649                 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7650                 break;
7651         case NFS4_CHECK_STATEID_EXPIRED:
7652                 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
7653                 break;
7654         case NFS4_CHECK_STATEID_CLOSED:
7655                 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
7656                 break;
7657         case NFS4_CHECK_STATEID_REPLAY:
7658                 switch (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
7659                     resop)) {
7660                 case NFS4_CHKSEQ_OKAY:
7661                         /*
7662                          * This is replayed stateid; if seqid matches
7663                          * next expected, then client is using wrong seqid.
7664                          */
7665                         /* fall through */
7666                 case NFS4_CHKSEQ_BAD:
7667                         *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
7668                         break;
7669                 case NFS4_CHKSEQ_REPLAY:
7670                         /*
7671                          * Note this case is the duplicate case so
7672                          * resp->status is already set.
7673                          */
7674                         *cs->statusp = resp->status;
7675                         rfs4_update_lease(sp->rs_owner->ro_client);
7676                         break;
7677                 }
7678                 break;
7679         case NFS4_CHECK_STATEID_UNCONFIRMED:
7680                 if (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
7681                     resop) != NFS4_CHKSEQ_OKAY) {
7682                         *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
7683                         break;
7684                 }
7685                 *cs->statusp = resp->status = NFS4_OK;
7686 
7687                 next_stateid(&sp->rs_stateid);
7688                 resp->open_stateid = sp->rs_stateid.stateid;
7689                 sp->rs_owner->ro_need_confirm = FALSE;
7690                 rfs4_update_lease(sp->rs_owner->ro_client);
7691                 rfs4_update_open_sequence(sp->rs_owner);
7692                 rfs4_update_open_resp(sp->rs_owner, resop, NULL);
7693                 break;
7694         default:
7695                 ASSERT(FALSE);
7696                 *cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
7697                 break;
7698         }
7699         rfs4_sw_exit(&sp->rs_owner->ro_sw);
7700         rfs4_state_rele(sp);
7701 
7702 out:
7703         DTRACE_NFSV4_2(op__open__confirm__done, struct compound_state *, cs,
7704             OPEN_CONFIRM4res *, resp);
7705 }
7706 
7707 /*ARGSUSED*/
7708 void
7709 rfs4_op_open_downgrade(nfs_argop4 *argop, nfs_resop4 *resop,
7710     struct svc_req *req, struct compound_state *cs)
7711 {
7712         OPEN_DOWNGRADE4args *args = &argop->nfs_argop4_u.opopen_downgrade;
7713         OPEN_DOWNGRADE4res *resp = &resop->nfs_resop4_u.opopen_downgrade;
7714         uint32_t access = args->share_access;
7715         uint32_t deny = args->share_deny;
7716         nfsstat4 status;
7717         rfs4_state_t *sp;
7718         rfs4_file_t *fp;
7719         int fflags = 0;
7720 
7721         DTRACE_NFSV4_2(op__open__downgrade__start, struct compound_state *, cs,
7722             OPEN_DOWNGRADE4args *, args);
7723 
7724         if (cs->vp == NULL) {
7725                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
7726                 goto out;
7727         }
7728 
7729         if (cs->vp->v_type != VREG) {
7730                 *cs->statusp = resp->status = NFS4ERR_INVAL;
7731                 return;
7732         }
7733 
7734         status = rfs4_get_state(&args->open_stateid, &sp, RFS4_DBS_VALID);
7735         if (status != NFS4_OK) {
7736                 *cs->statusp = resp->status = status;
7737                 goto out;
7738         }
7739 
7740         /* Ensure specified filehandle matches */
7741         if (cs->vp != sp->rs_finfo->rf_vp) {
7742                 rfs4_state_rele(sp);
7743                 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7744                 goto out;
7745         }
7746 
7747         /* hold off other access to open_owner while we tinker */
7748         rfs4_sw_enter(&sp->rs_owner->ro_sw);
7749 
7750         switch (rfs4_check_stateid_seqid(sp, &args->open_stateid)) {
7751         case NFS4_CHECK_STATEID_OKAY:
7752                 if (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
7753                     resop) != NFS4_CHKSEQ_OKAY) {
7754                         *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
7755                         goto end;
7756                 }
7757                 break;
7758         case NFS4_CHECK_STATEID_OLD:
7759                 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
7760                 goto end;
7761         case NFS4_CHECK_STATEID_BAD:
7762                 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7763                 goto end;
7764         case NFS4_CHECK_STATEID_EXPIRED:
7765                 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
7766                 goto end;
7767         case NFS4_CHECK_STATEID_CLOSED:
7768                 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
7769                 goto end;
7770         case NFS4_CHECK_STATEID_UNCONFIRMED:
7771                 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7772                 goto end;
7773         case NFS4_CHECK_STATEID_REPLAY:
7774                 /* Check the sequence id for the open owner */
7775                 switch (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
7776                     resop)) {
7777                 case NFS4_CHKSEQ_OKAY:
7778                         /*
7779                          * This is replayed stateid; if seqid matches
7780                          * next expected, then client is using wrong seqid.
7781                          */
7782                         /* fall through */
7783                 case NFS4_CHKSEQ_BAD:
7784                         *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
7785                         goto end;
7786                 case NFS4_CHKSEQ_REPLAY:
7787                         /*
7788                          * Note this case is the duplicate case so
7789                          * resp->status is already set.
7790                          */
7791                         *cs->statusp = resp->status;
7792                         rfs4_update_lease(sp->rs_owner->ro_client);
7793                         goto end;
7794                 }
7795                 break;
7796         default:
7797                 ASSERT(FALSE);
7798                 break;
7799         }
7800 
7801         rfs4_dbe_lock(sp->rs_dbe);
7802         /*
7803          * Check that the new access modes and deny modes are valid.
7804          * Check that no invalid bits are set.
7805          */
7806         if ((access & ~(OPEN4_SHARE_ACCESS_READ | OPEN4_SHARE_ACCESS_WRITE)) ||
7807             (deny & ~(OPEN4_SHARE_DENY_READ | OPEN4_SHARE_DENY_WRITE))) {
7808                 *cs->statusp = resp->status = NFS4ERR_INVAL;
7809                 rfs4_update_open_sequence(sp->rs_owner);
7810                 rfs4_dbe_unlock(sp->rs_dbe);
7811                 goto end;
7812         }
7813 
7814         /*
7815          * The new modes must be a subset of the current modes and
7816          * the access must specify at least one mode. To test that
7817          * the new mode is a subset of the current modes we bitwise
7818          * AND them together and check that the result equals the new
7819          * mode. For example:
7820          * New mode, access == R and current mode, sp->rs_open_access  == RW
7821          * access & sp->rs_open_access == R == access, so the new access mode
7822          * is valid. Consider access == RW, sp->rs_open_access = R
7823          * access & sp->rs_open_access == R != access, so the new access mode
7824          * is invalid.
7825          */
7826         if ((access & sp->rs_open_access) != access ||
7827             (deny & sp->rs_open_deny) != deny ||
7828             (access &
7829             (OPEN4_SHARE_ACCESS_READ | OPEN4_SHARE_ACCESS_WRITE)) == 0) {
7830                 *cs->statusp = resp->status = NFS4ERR_INVAL;
7831                 rfs4_update_open_sequence(sp->rs_owner);
7832                 rfs4_dbe_unlock(sp->rs_dbe);
7833                 goto end;
7834         }
7835 
7836         /*
7837          * Release any share locks associated with this stateID.
7838          * Strictly speaking, this violates the spec because the
7839          * spec effectively requires that open downgrade be atomic.
7840          * At present, fs_shrlock does not have this capability.
7841          */
7842         (void) rfs4_unshare(sp);
7843 
7844         status = rfs4_share(sp, access, deny);
7845         if (status != NFS4_OK) {
7846                 *cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
7847                 rfs4_update_open_sequence(sp->rs_owner);
7848                 rfs4_dbe_unlock(sp->rs_dbe);
7849                 goto end;
7850         }
7851 
7852         fp = sp->rs_finfo;
7853         rfs4_dbe_lock(fp->rf_dbe);
7854 
7855         /*
7856          * If the current mode has deny read and the new mode
7857          * does not, decrement the number of deny read mode bits
7858          * and if it goes to zero turn off the deny read bit
7859          * on the file.
7860          */
7861         if ((sp->rs_open_deny & OPEN4_SHARE_DENY_READ) &&
7862             (deny & OPEN4_SHARE_DENY_READ) == 0) {
7863                 fp->rf_deny_read--;
7864                 if (fp->rf_deny_read == 0)
7865                         fp->rf_share_deny &= ~OPEN4_SHARE_DENY_READ;
7866         }
7867 
7868         /*
7869          * If the current mode has deny write and the new mode
7870          * does not, decrement the number of deny write mode bits
7871          * and if it goes to zero turn off the deny write bit
7872          * on the file.
7873          */
7874         if ((sp->rs_open_deny & OPEN4_SHARE_DENY_WRITE) &&
7875             (deny & OPEN4_SHARE_DENY_WRITE) == 0) {
7876                 fp->rf_deny_write--;
7877                 if (fp->rf_deny_write == 0)
7878                         fp->rf_share_deny &= ~OPEN4_SHARE_DENY_WRITE;
7879         }
7880 
7881         /*
7882          * If the current mode has access read and the new mode
7883          * does not, decrement the number of access read mode bits
7884          * and if it goes to zero turn off the access read bit
7885          * on the file.  set fflags to FREAD for the call to
7886          * vn_open_downgrade().
7887          */
7888         if ((sp->rs_open_access & OPEN4_SHARE_ACCESS_READ) &&
7889             (access & OPEN4_SHARE_ACCESS_READ) == 0) {
7890                 fp->rf_access_read--;
7891                 if (fp->rf_access_read == 0)
7892                         fp->rf_share_access &= ~OPEN4_SHARE_ACCESS_READ;
7893                 fflags |= FREAD;
7894         }
7895 
7896         /*
7897          * If the current mode has access write and the new mode
7898          * does not, decrement the number of access write mode bits
7899          * and if it goes to zero turn off the access write bit
7900          * on the file.  set fflags to FWRITE for the call to
7901          * vn_open_downgrade().
7902          */
7903         if ((sp->rs_open_access & OPEN4_SHARE_ACCESS_WRITE) &&
7904             (access & OPEN4_SHARE_ACCESS_WRITE) == 0) {
7905                 fp->rf_access_write--;
7906                 if (fp->rf_access_write == 0)
7907                         fp->rf_share_deny &= ~OPEN4_SHARE_ACCESS_WRITE;
7908                 fflags |= FWRITE;
7909         }
7910 
7911         /* Check that the file is still accessible */
7912         ASSERT(fp->rf_share_access);
7913 
7914         rfs4_dbe_unlock(fp->rf_dbe);
7915 
7916         /* now set the new open access and deny modes */
7917         sp->rs_open_access = access;
7918         sp->rs_open_deny = deny;
7919 
7920         /*
7921          * we successfully downgraded the share lock, now we need to downgrade
7922          * the open. it is possible that the downgrade was only for a deny
7923          * mode and we have nothing else to do.
7924          */
7925         if ((fflags & (FREAD|FWRITE)) != 0)
7926                 vn_open_downgrade(cs->vp, fflags);
7927 
7928         /* Update the stateid */
7929         next_stateid(&sp->rs_stateid);
7930         resp->open_stateid = sp->rs_stateid.stateid;
7931 
7932         rfs4_dbe_unlock(sp->rs_dbe);
7933 
7934         *cs->statusp = resp->status = NFS4_OK;
7935         /* Update the lease */
7936         rfs4_update_lease(sp->rs_owner->ro_client);
7937         /* And the sequence */
7938         rfs4_update_open_sequence(sp->rs_owner);
7939         rfs4_update_open_resp(sp->rs_owner, resop, NULL);
7940 
7941 end:
7942         rfs4_sw_exit(&sp->rs_owner->ro_sw);
7943         rfs4_state_rele(sp);
7944 out:
7945         DTRACE_NFSV4_2(op__open__downgrade__done, struct compound_state *, cs,
7946             OPEN_DOWNGRADE4res *, resp);
7947 }
7948 
7949 static void *
7950 memstr(const void *s1, const char *s2, size_t n)
7951 {
7952         size_t l = strlen(s2);
7953         char *p = (char *)s1;
7954 
7955         while (n >= l) {
7956                 if (bcmp(p, s2, l) == 0)
7957                         return (p);
7958                 p++;
7959                 n--;
7960         }
7961 
7962         return (NULL);
7963 }
7964 
7965 /*
7966  * The logic behind this function is detailed in the NFSv4 RFC in the
7967  * SETCLIENTID operation description under IMPLEMENTATION.  Refer to
7968  * that section for explicit guidance to server behavior for
7969  * SETCLIENTID.
7970  */
7971 void
7972 rfs4_op_setclientid(nfs_argop4 *argop, nfs_resop4 *resop,
7973     struct svc_req *req, struct compound_state *cs)
7974 {
7975         SETCLIENTID4args *args = &argop->nfs_argop4_u.opsetclientid;
7976         SETCLIENTID4res *res = &resop->nfs_resop4_u.opsetclientid;
7977         rfs4_client_t *cp, *newcp, *cp_confirmed, *cp_unconfirmed;
7978         rfs4_clntip_t *ci;
7979         bool_t create;
7980         char *addr, *netid;
7981         int len;
7982 
7983         DTRACE_NFSV4_2(op__setclientid__start, struct compound_state *, cs,
7984             SETCLIENTID4args *, args);
7985 retry:
7986         newcp = cp_confirmed = cp_unconfirmed = NULL;
7987 
7988         /*
7989          * Save the caller's IP address
7990          */
7991         args->client.cl_addr =
7992             (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
7993 
7994         /*
7995          * Record if it is a Solaris client that cannot handle referrals.
7996          */
7997         if (memstr(args->client.id_val, "Solaris", args->client.id_len) &&
7998             !memstr(args->client.id_val, "+referrals", args->client.id_len)) {
7999                 /* Add a "yes, it's downrev" record */
8000                 create = TRUE;
8001                 ci = rfs4_find_clntip(args->client.cl_addr, &create);
8002                 ASSERT(ci != NULL);
8003                 rfs4_dbe_rele(ci->ri_dbe);
8004         } else {
8005                 /* Remove any previous record */
8006                 rfs4_invalidate_clntip(args->client.cl_addr);
8007         }
8008 
8009         /*
8010          * In search of an EXISTING client matching the incoming
8011          * request to establish a new client identifier at the server
8012          */
8013         create = TRUE;
8014         cp = rfs4_findclient(&args->client, &create, NULL);
8015 
8016         /* Should never happen */
8017         ASSERT(cp != NULL);
8018 
8019         if (cp == NULL) {
8020                 *cs->statusp = res->status = NFS4ERR_SERVERFAULT;
8021                 goto out;
8022         }
8023 
8024         /*
8025          * Easiest case. Client identifier is newly created and is
8026          * unconfirmed.  Also note that for this case, no other
8027          * entries exist for the client identifier.  Nothing else to
8028          * check.  Just setup the response and respond.
8029          */
8030         if (create) {
8031                 *cs->statusp = res->status = NFS4_OK;
8032                 res->SETCLIENTID4res_u.resok4.clientid = cp->rc_clientid;
8033                 res->SETCLIENTID4res_u.resok4.setclientid_confirm =
8034                     cp->rc_confirm_verf;
8035                 /* Setup callback information; CB_NULL confirmation later */
8036                 rfs4_client_setcb(cp, &args->callback, args->callback_ident);
8037 
8038                 rfs4_client_rele(cp);
8039                 goto out;
8040         }
8041 
8042         /*
8043          * An existing, confirmed client may exist but it may not have
8044          * been active for at least one lease period.  If so, then
8045          * "close" the client and create a new client identifier
8046          */
8047         if (rfs4_lease_expired(cp)) {
8048                 rfs4_client_close(cp);
8049                 goto retry;
8050         }
8051 
8052         if (cp->rc_need_confirm == TRUE)
8053                 cp_unconfirmed = cp;
8054         else
8055                 cp_confirmed = cp;
8056 
8057         cp = NULL;
8058 
8059         /*
8060          * We have a confirmed client, now check for an
8061          * unconfimred entry
8062          */
8063         if (cp_confirmed) {
8064                 /* If creds don't match then client identifier is inuse */
8065                 if (!creds_ok(cp_confirmed->rc_cr_set, req, cs)) {
8066                         rfs4_cbinfo_t *cbp;
8067                         /*
8068                          * Some one else has established this client
8069                          * id. Try and say * who they are. We will use
8070                          * the call back address supplied by * the
8071                          * first client.
8072                          */
8073                         *cs->statusp = res->status = NFS4ERR_CLID_INUSE;
8074 
8075                         addr = netid = NULL;
8076 
8077                         cbp = &cp_confirmed->rc_cbinfo;
8078                         if (cbp->cb_callback.cb_location.r_addr &&
8079                             cbp->cb_callback.cb_location.r_netid) {
8080                                 cb_client4 *cbcp = &cbp->cb_callback;
8081 
8082                                 len = strlen(cbcp->cb_location.r_addr)+1;
8083                                 addr = kmem_alloc(len, KM_SLEEP);
8084                                 bcopy(cbcp->cb_location.r_addr, addr, len);
8085                                 len = strlen(cbcp->cb_location.r_netid)+1;
8086                                 netid = kmem_alloc(len, KM_SLEEP);
8087                                 bcopy(cbcp->cb_location.r_netid, netid, len);
8088                         }
8089 
8090                         res->SETCLIENTID4res_u.client_using.r_addr = addr;
8091                         res->SETCLIENTID4res_u.client_using.r_netid = netid;
8092 
8093                         rfs4_client_rele(cp_confirmed);
8094                 }
8095 
8096                 /*
8097                  * Confirmed, creds match, and verifier matches; must
8098                  * be an update of the callback info
8099                  */
8100                 if (cp_confirmed->rc_nfs_client.verifier ==
8101                     args->client.verifier) {
8102                         /* Setup callback information */
8103                         rfs4_client_setcb(cp_confirmed, &args->callback,
8104                             args->callback_ident);
8105 
8106                         /* everything okay -- move ahead */
8107                         *cs->statusp = res->status = NFS4_OK;
8108                         res->SETCLIENTID4res_u.resok4.clientid =
8109                             cp_confirmed->rc_clientid;
8110 
8111                         /* update the confirm_verifier and return it */
8112                         rfs4_client_scv_next(cp_confirmed);
8113                         res->SETCLIENTID4res_u.resok4.setclientid_confirm =
8114                             cp_confirmed->rc_confirm_verf;
8115 
8116                         rfs4_client_rele(cp_confirmed);
8117                         goto out;
8118                 }
8119 
8120                 /*
8121                  * Creds match but the verifier doesn't.  Must search
8122                  * for an unconfirmed client that would be replaced by
8123                  * this request.
8124                  */
8125                 create = FALSE;
8126                 cp_unconfirmed = rfs4_findclient(&args->client, &create,
8127                     cp_confirmed);
8128         }
8129 
8130         /*
8131          * At this point, we have taken care of the brand new client
8132          * struct, INUSE case, update of an existing, and confirmed
8133          * client struct.
8134          */
8135 
8136         /*
8137          * check to see if things have changed while we originally
8138          * picked up the client struct.  If they have, then return and
8139          * retry the processing of this SETCLIENTID request.
8140          */
8141         if (cp_unconfirmed) {
8142                 rfs4_dbe_lock(cp_unconfirmed->rc_dbe);
8143                 if (!cp_unconfirmed->rc_need_confirm) {
8144                         rfs4_dbe_unlock(cp_unconfirmed->rc_dbe);
8145                         rfs4_client_rele(cp_unconfirmed);
8146                         if (cp_confirmed)
8147                                 rfs4_client_rele(cp_confirmed);
8148                         goto retry;
8149                 }
8150                 /* do away with the old unconfirmed one */
8151                 rfs4_dbe_invalidate(cp_unconfirmed->rc_dbe);
8152                 rfs4_dbe_unlock(cp_unconfirmed->rc_dbe);
8153                 rfs4_client_rele(cp_unconfirmed);
8154                 cp_unconfirmed = NULL;
8155         }
8156 
8157         /*
8158          * This search will temporarily hide the confirmed client
8159          * struct while a new client struct is created as the
8160          * unconfirmed one.
8161          */
8162         create = TRUE;
8163         newcp = rfs4_findclient(&args->client, &create, cp_confirmed);
8164 
8165         ASSERT(newcp != NULL);
8166 
8167         if (newcp == NULL) {
8168                 *cs->statusp = res->status = NFS4ERR_SERVERFAULT;
8169                 rfs4_client_rele(cp_confirmed);
8170                 goto out;
8171         }
8172 
8173         /*
8174          * If one was not created, then a similar request must be in
8175          * process so release and start over with this one
8176          */
8177         if (create != TRUE) {
8178                 rfs4_client_rele(newcp);
8179                 if (cp_confirmed)
8180                         rfs4_client_rele(cp_confirmed);
8181                 goto retry;
8182         }
8183 
8184         *cs->statusp = res->status = NFS4_OK;
8185         res->SETCLIENTID4res_u.resok4.clientid = newcp->rc_clientid;
8186         res->SETCLIENTID4res_u.resok4.setclientid_confirm =
8187             newcp->rc_confirm_verf;
8188         /* Setup callback information; CB_NULL confirmation later */
8189         rfs4_client_setcb(newcp, &args->callback, args->callback_ident);
8190 
8191         newcp->rc_cp_confirmed = cp_confirmed;
8192 
8193         rfs4_client_rele(newcp);
8194 
8195 out:
8196         DTRACE_NFSV4_2(op__setclientid__done, struct compound_state *, cs,
8197             SETCLIENTID4res *, res);
8198 }
8199 
8200 /*ARGSUSED*/
8201 void
8202 rfs4_op_setclientid_confirm(nfs_argop4 *argop, nfs_resop4 *resop,
8203     struct svc_req *req, struct compound_state *cs)
8204 {
8205         SETCLIENTID_CONFIRM4args *args =
8206             &argop->nfs_argop4_u.opsetclientid_confirm;
8207         SETCLIENTID_CONFIRM4res *res =
8208             &resop->nfs_resop4_u.opsetclientid_confirm;
8209         rfs4_client_t *cp, *cptoclose = NULL;
8210 
8211         DTRACE_NFSV4_2(op__setclientid__confirm__start,
8212             struct compound_state *, cs,
8213             SETCLIENTID_CONFIRM4args *, args);
8214 
8215         *cs->statusp = res->status = NFS4_OK;
8216 
8217         cp = rfs4_findclient_by_id(args->clientid, TRUE);
8218 
8219         if (cp == NULL) {
8220                 *cs->statusp = res->status =
8221                     rfs4_check_clientid(&args->clientid, 1);
8222                 goto out;
8223         }
8224 
8225         if (!creds_ok(cp, req, cs)) {
8226                 *cs->statusp = res->status = NFS4ERR_CLID_INUSE;
8227                 rfs4_client_rele(cp);
8228                 goto out;
8229         }
8230 
8231         /* If the verifier doesn't match, the record doesn't match */
8232         if (cp->rc_confirm_verf != args->setclientid_confirm) {
8233                 *cs->statusp = res->status = NFS4ERR_STALE_CLIENTID;
8234                 rfs4_client_rele(cp);
8235                 goto out;
8236         }
8237 
8238         rfs4_dbe_lock(cp->rc_dbe);
8239         cp->rc_need_confirm = FALSE;
8240         if (cp->rc_cp_confirmed) {
8241                 cptoclose = cp->rc_cp_confirmed;
8242                 cptoclose->rc_ss_remove = 1;
8243                 cp->rc_cp_confirmed = NULL;
8244         }
8245 
8246         /*
8247          * Update the client's associated server instance, if it's changed
8248          * since the client was created.
8249          */
8250         if (rfs4_servinst(cp) != rfs4_cur_servinst)
8251                 rfs4_servinst_assign(cp, rfs4_cur_servinst);
8252 
8253         /*
8254          * Record clientid in stable storage.
8255          * Must be done after server instance has been assigned.
8256          */
8257         rfs4_ss_clid(cp);
8258 
8259         rfs4_dbe_unlock(cp->rc_dbe);
8260 
8261         if (cptoclose)
8262                 /* don't need to rele, client_close does it */
8263                 rfs4_client_close(cptoclose);
8264 
8265         /* If needed, initiate CB_NULL call for callback path */
8266         rfs4_deleg_cb_check(cp);
8267         rfs4_update_lease(cp);
8268 
8269         /*
8270          * Check to see if client can perform reclaims
8271          */
8272         rfs4_ss_chkclid(cp);
8273 
8274         rfs4_client_rele(cp);
8275 
8276 out:
8277         DTRACE_NFSV4_2(op__setclientid__confirm__done,
8278             struct compound_state *, cs,
8279             SETCLIENTID_CONFIRM4 *, res);
8280 }
8281 
8282 
8283 /*ARGSUSED*/
8284 void
8285 rfs4_op_close(nfs_argop4 *argop, nfs_resop4 *resop,
8286     struct svc_req *req, struct compound_state *cs)
8287 {
8288         CLOSE4args *args = &argop->nfs_argop4_u.opclose;
8289         CLOSE4res *resp = &resop->nfs_resop4_u.opclose;
8290         rfs4_state_t *sp;
8291         nfsstat4 status;
8292 
8293         DTRACE_NFSV4_2(op__close__start, struct compound_state *, cs,
8294             CLOSE4args *, args);
8295 
8296         if (cs->vp == NULL) {
8297                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
8298                 goto out;
8299         }
8300 
8301         status = rfs4_get_state(&args->open_stateid, &sp, RFS4_DBS_INVALID);
8302         if (status != NFS4_OK) {
8303                 *cs->statusp = resp->status = status;
8304                 goto out;
8305         }
8306 
8307         /* Ensure specified filehandle matches */
8308         if (cs->vp != sp->rs_finfo->rf_vp) {
8309                 rfs4_state_rele(sp);
8310                 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8311                 goto out;
8312         }
8313 
8314         /* hold off other access to open_owner while we tinker */
8315         rfs4_sw_enter(&sp->rs_owner->ro_sw);
8316 
8317         switch (rfs4_check_stateid_seqid(sp, &args->open_stateid)) {
8318         case NFS4_CHECK_STATEID_OKAY:
8319                 if (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
8320                     resop) != NFS4_CHKSEQ_OKAY) {
8321                         *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
8322                         goto end;
8323                 }
8324                 break;
8325         case NFS4_CHECK_STATEID_OLD:
8326                 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
8327                 goto end;
8328         case NFS4_CHECK_STATEID_BAD:
8329                 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8330                 goto end;
8331         case NFS4_CHECK_STATEID_EXPIRED:
8332                 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
8333                 goto end;
8334         case NFS4_CHECK_STATEID_CLOSED:
8335                 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
8336                 goto end;
8337         case NFS4_CHECK_STATEID_UNCONFIRMED:
8338                 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8339                 goto end;
8340         case NFS4_CHECK_STATEID_REPLAY:
8341                 /* Check the sequence id for the open owner */
8342                 switch (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
8343                     resop)) {
8344                 case NFS4_CHKSEQ_OKAY:
8345                         /*
8346                          * This is replayed stateid; if seqid matches
8347                          * next expected, then client is using wrong seqid.
8348                          */
8349                         /* FALL THROUGH */
8350                 case NFS4_CHKSEQ_BAD:
8351                         *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
8352                         goto end;
8353                 case NFS4_CHKSEQ_REPLAY:
8354                         /*
8355                          * Note this case is the duplicate case so
8356                          * resp->status is already set.
8357                          */
8358                         *cs->statusp = resp->status;
8359                         rfs4_update_lease(sp->rs_owner->ro_client);
8360                         goto end;
8361                 }
8362                 break;
8363         default:
8364                 ASSERT(FALSE);
8365                 break;
8366         }
8367 
8368         rfs4_dbe_lock(sp->rs_dbe);
8369 
8370         /* Update the stateid. */
8371         next_stateid(&sp->rs_stateid);
8372         resp->open_stateid = sp->rs_stateid.stateid;
8373 
8374         rfs4_dbe_unlock(sp->rs_dbe);
8375 
8376         rfs4_update_lease(sp->rs_owner->ro_client);
8377         rfs4_update_open_sequence(sp->rs_owner);
8378         rfs4_update_open_resp(sp->rs_owner, resop, NULL);
8379 
8380         rfs4_state_close(sp, FALSE, FALSE, cs->cr);
8381 
8382         *cs->statusp = resp->status = status;
8383 
8384 end:
8385         rfs4_sw_exit(&sp->rs_owner->ro_sw);
8386         rfs4_state_rele(sp);
8387 out:
8388         DTRACE_NFSV4_2(op__close__done, struct compound_state *, cs,
8389             CLOSE4res *, resp);
8390 }
8391 
8392 /*
8393  * Manage the counts on the file struct and close all file locks
8394  */
8395 /*ARGSUSED*/
8396 void
8397 rfs4_release_share_lock_state(rfs4_state_t *sp, cred_t *cr,
8398     bool_t close_of_client)
8399 {
8400         rfs4_file_t *fp = sp->rs_finfo;
8401         rfs4_lo_state_t *lsp;
8402         int fflags = 0;
8403 
8404         /*
8405          * If this call is part of the larger closing down of client
8406          * state then it is just easier to release all locks
8407          * associated with this client instead of going through each
8408          * individual file and cleaning locks there.
8409          */
8410         if (close_of_client) {
8411                 if (sp->rs_owner->ro_client->rc_unlksys_completed == FALSE &&
8412                     !list_is_empty(&sp->rs_lostatelist) &&
8413                     sp->rs_owner->ro_client->rc_sysidt != LM_NOSYSID) {
8414                         /* Is the PxFS kernel module loaded? */
8415                         if (lm_remove_file_locks != NULL) {
8416                                 int new_sysid;
8417 
8418                                 /* Encode the cluster nodeid in new sysid */
8419                                 new_sysid = sp->rs_owner->ro_client->rc_sysidt;
8420                                 lm_set_nlmid_flk(&new_sysid);
8421 
8422                                 /*
8423                                  * This PxFS routine removes file locks for a
8424                                  * client over all nodes of a cluster.
8425                                  */
8426                                 NFS4_DEBUG(rfs4_debug, (CE_NOTE,
8427                                     "lm_remove_file_locks(sysid=0x%x)\n",
8428                                     new_sysid));
8429                                 (*lm_remove_file_locks)(new_sysid);
8430                         } else {
8431                                 struct flock64 flk;
8432 
8433                                 /* Release all locks for this client */
8434                                 flk.l_type = F_UNLKSYS;
8435                                 flk.l_whence = 0;
8436                                 flk.l_start = 0;
8437                                 flk.l_len = 0;
8438                                 flk.l_sysid =
8439                                     sp->rs_owner->ro_client->rc_sysidt;
8440                                 flk.l_pid = 0;
8441                                 (void) VOP_FRLOCK(sp->rs_finfo->rf_vp, F_SETLK,
8442                                     &flk, F_REMOTELOCK | FREAD | FWRITE,
8443                                     (u_offset_t)0, NULL, CRED(), NULL);
8444                         }
8445 
8446                         sp->rs_owner->ro_client->rc_unlksys_completed = TRUE;
8447                 }
8448         }
8449 
8450         /*
8451          * Release all locks on this file by this lock owner or at
8452          * least mark the locks as having been released
8453          */
8454         for (lsp = list_head(&sp->rs_lostatelist); lsp != NULL;
8455             lsp = list_next(&sp->rs_lostatelist, lsp)) {
8456                 lsp->rls_locks_cleaned = TRUE;
8457 
8458                 /* Was this already taken care of above? */
8459                 if (!close_of_client &&
8460                     sp->rs_owner->ro_client->rc_sysidt != LM_NOSYSID)
8461                         (void) cleanlocks(sp->rs_finfo->rf_vp,
8462                             lsp->rls_locker->rl_pid,
8463                             lsp->rls_locker->rl_client->rc_sysidt);
8464         }
8465 
8466         /*
8467          * Release any shrlocks associated with this open state ID.
8468          * This must be done before the rfs4_state gets marked closed.
8469          */
8470         if (sp->rs_owner->ro_client->rc_sysidt != LM_NOSYSID)
8471                 (void) rfs4_unshare(sp);
8472 
8473         if (sp->rs_open_access) {
8474                 rfs4_dbe_lock(fp->rf_dbe);
8475 
8476                 /*
8477                  * Decrement the count for each access and deny bit that this
8478                  * state has contributed to the file.
8479                  * If the file counts go to zero
8480                  * clear the appropriate bit in the appropriate mask.
8481                  */
8482                 if (sp->rs_open_access & OPEN4_SHARE_ACCESS_READ) {
8483                         fp->rf_access_read--;
8484                         fflags |= FREAD;
8485                         if (fp->rf_access_read == 0)
8486                                 fp->rf_share_access &= ~OPEN4_SHARE_ACCESS_READ;
8487                 }
8488                 if (sp->rs_open_access & OPEN4_SHARE_ACCESS_WRITE) {
8489                         fp->rf_access_write--;
8490                         fflags |= FWRITE;
8491                         if (fp->rf_access_write == 0)
8492                                 fp->rf_share_access &=
8493                                     ~OPEN4_SHARE_ACCESS_WRITE;
8494                 }
8495                 if (sp->rs_open_deny & OPEN4_SHARE_DENY_READ) {
8496                         fp->rf_deny_read--;
8497                         if (fp->rf_deny_read == 0)
8498                                 fp->rf_share_deny &= ~OPEN4_SHARE_DENY_READ;
8499                 }
8500                 if (sp->rs_open_deny & OPEN4_SHARE_DENY_WRITE) {
8501                         fp->rf_deny_write--;
8502                         if (fp->rf_deny_write == 0)
8503                                 fp->rf_share_deny &= ~OPEN4_SHARE_DENY_WRITE;
8504                 }
8505 
8506                 (void) VOP_CLOSE(fp->rf_vp, fflags, 1, (offset_t)0, cr, NULL);
8507 
8508                 rfs4_dbe_unlock(fp->rf_dbe);
8509 
8510                 sp->rs_open_access = 0;
8511                 sp->rs_open_deny = 0;
8512         }
8513 }
8514 
8515 /*
8516  * lock_denied: Fill in a LOCK4deneid structure given an flock64 structure.
8517  */
8518 static nfsstat4
8519 lock_denied(LOCK4denied *dp, struct flock64 *flk)
8520 {
8521         rfs4_lockowner_t *lo;
8522         rfs4_client_t *cp;
8523         uint32_t len;
8524 
8525         lo = rfs4_findlockowner_by_pid(flk->l_pid);
8526         if (lo != NULL) {
8527                 cp = lo->rl_client;
8528                 if (rfs4_lease_expired(cp)) {
8529                         rfs4_lockowner_rele(lo);
8530                         rfs4_dbe_hold(cp->rc_dbe);
8531                         rfs4_client_close(cp);
8532                         return (NFS4ERR_EXPIRED);
8533                 }
8534                 dp->owner.clientid = lo->rl_owner.clientid;
8535                 len = lo->rl_owner.owner_len;
8536                 dp->owner.owner_val = kmem_alloc(len, KM_SLEEP);
8537                 bcopy(lo->rl_owner.owner_val, dp->owner.owner_val, len);
8538                 dp->owner.owner_len = len;
8539                 rfs4_lockowner_rele(lo);
8540                 goto finish;
8541         }
8542 
8543         /*
8544          * Its not a NFS4 lock. We take advantage that the upper 32 bits
8545          * of the client id contain the boot time for a NFS4 lock. So we
8546          * fabricate and identity by setting clientid to the sysid, and
8547          * the lock owner to the pid.
8548          */
8549         dp->owner.clientid = flk->l_sysid;
8550         len = sizeof (pid_t);
8551         dp->owner.owner_len = len;
8552         dp->owner.owner_val = kmem_alloc(len, KM_SLEEP);
8553         bcopy(&flk->l_pid, dp->owner.owner_val, len);
8554 finish:
8555         dp->offset = flk->l_start;
8556         dp->length = flk->l_len;
8557 
8558         if (flk->l_type == F_RDLCK)
8559                 dp->locktype = READ_LT;
8560         else if (flk->l_type == F_WRLCK)
8561                 dp->locktype = WRITE_LT;
8562         else
8563                 return (NFS4ERR_INVAL); /* no mapping from POSIX ltype to v4 */
8564 
8565         return (NFS4_OK);
8566 }
8567 
8568 /*
8569  * The NFSv4.0 LOCK operation does not support the blocking lock (at the
8570  * NFSv4.0 protocol level) so the client needs to resend the LOCK request in a
8571  * case the lock is denied by the NFSv4.0 server.  NFSv4.0 clients are prepared
8572  * for that (obviously); they are sending the LOCK requests with some delays
8573  * between the attempts.  See nfs4frlock() and nfs4_block_and_wait() for the
8574  * locking and delay implementation at the client side.
8575  *
8576  * To make the life of the clients easier, the NFSv4.0 server tries to do some
8577  * fast retries on its own (the for loop below) in a hope the lock will be
8578  * available soon.  And if not, the client won't need to resend the LOCK
8579  * requests so fast to check the lock availability.  This basically saves some
8580  * network traffic and tries to make sure the client gets the lock ASAP.
8581  */
8582 static int
8583 setlock(vnode_t *vp, struct flock64 *flock, int flag, cred_t *cred)
8584 {
8585         int error;
8586         struct flock64 flk;
8587         int i;
8588         clock_t delaytime;
8589         int cmd;
8590         int spin_cnt = 0;
8591 
8592         cmd = nbl_need_check(vp) ? F_SETLK_NBMAND : F_SETLK;
8593 retry:
8594         delaytime = MSEC_TO_TICK_ROUNDUP(rfs4_lock_delay);
8595 
8596         for (i = 0; i < rfs4_maxlock_tries; i++) {
8597                 LOCK_PRINT(rfs4_debug, "setlock", cmd, flock);
8598                 error = VOP_FRLOCK(vp, cmd,
8599                     flock, flag, (u_offset_t)0, NULL, cred, NULL);
8600 
8601                 if (error != EAGAIN && error != EACCES)
8602                         break;
8603 
8604                 if (i < rfs4_maxlock_tries - 1) {
8605                         delay(delaytime);
8606                         delaytime *= 2;
8607                 }
8608         }
8609 
8610         if (error == EAGAIN || error == EACCES) {
8611                 /* Get the owner of the lock */
8612                 flk = *flock;
8613                 LOCK_PRINT(rfs4_debug, "setlock", F_GETLK, &flk);
8614                 if (VOP_FRLOCK(vp, F_GETLK, &flk, flag, 0, NULL, cred,
8615                     NULL) == 0) {
8616                         /*
8617                          * There's a race inherent in the current VOP_FRLOCK
8618                          * design where:
8619                          * a: "other guy" takes a lock that conflicts with a
8620                          * lock we want
8621                          * b: we attempt to take our lock (non-blocking) and
8622                          * the attempt fails.
8623                          * c: "other guy" releases the conflicting lock
8624                          * d: we ask what lock conflicts with the lock we want,
8625                          * getting F_UNLCK (no lock blocks us)
8626                          *
8627                          * If we retry the non-blocking lock attempt in this
8628                          * case (restart at step 'b') there's some possibility
8629                          * that many such attempts might fail.  However a test
8630                          * designed to actually provoke this race shows that
8631                          * the vast majority of cases require no retry, and
8632                          * only a few took as many as three retries.  Here's
8633                          * the test outcome:
8634                          *
8635                          *         number of retries    how many times we needed
8636                          *                              that many retries
8637                          *         0                    79461
8638                          *         1                      862
8639                          *         2                       49
8640                          *         3                        5
8641                          *
8642                          * Given those empirical results, we arbitrarily limit
8643                          * the retry count to ten.
8644                          *
8645                          * If we actually make to ten retries and give up,
8646                          * nothing catastrophic happens, but we're unable to
8647                          * return the information about the conflicting lock to
8648                          * the NFS client.  That's an acceptable trade off vs.
8649                          * letting this retry loop run forever.
8650                          */
8651                         if (flk.l_type == F_UNLCK) {
8652                                 if (spin_cnt++ < 10) {
8653                                         /* No longer locked, retry */
8654                                         goto retry;
8655                                 }
8656                         } else {
8657                                 *flock = flk;
8658                                 LOCK_PRINT(rfs4_debug, "setlock(blocking lock)",
8659                                     F_GETLK, &flk);
8660                         }
8661                 }
8662         }
8663 
8664         return (error);
8665 }
8666 
8667 /*ARGSUSED*/
8668 static nfsstat4
8669 rfs4_do_lock(rfs4_lo_state_t *lsp, nfs_lock_type4 locktype,
8670     offset4 offset, length4 length, cred_t *cred, nfs_resop4 *resop)
8671 {
8672         nfsstat4 status;
8673         rfs4_lockowner_t *lo = lsp->rls_locker;
8674         rfs4_state_t *sp = lsp->rls_state;
8675         struct flock64 flock;
8676         int16_t ltype;
8677         int flag;
8678         int error;
8679         sysid_t sysid;
8680         LOCK4res *lres;
8681         vnode_t *vp;
8682 
8683         if (rfs4_lease_expired(lo->rl_client)) {
8684                 return (NFS4ERR_EXPIRED);
8685         }
8686 
8687         if ((status = rfs4_client_sysid(lo->rl_client, &sysid)) != NFS4_OK)
8688                 return (status);
8689 
8690         /* Check for zero length. To lock to end of file use all ones for V4 */
8691         if (length == 0)
8692                 return (NFS4ERR_INVAL);
8693         else if (length == (length4)(~0))
8694                 length = 0;             /* Posix to end of file  */
8695 
8696 retry:
8697         rfs4_dbe_lock(sp->rs_dbe);
8698         if (sp->rs_closed == TRUE) {
8699                 rfs4_dbe_unlock(sp->rs_dbe);
8700                 return (NFS4ERR_OLD_STATEID);
8701         }
8702 
8703         if (resop->resop != OP_LOCKU) {
8704                 switch (locktype) {
8705                 case READ_LT:
8706                 case READW_LT:
8707                         if ((sp->rs_share_access
8708                             & OPEN4_SHARE_ACCESS_READ) == 0) {
8709                                 rfs4_dbe_unlock(sp->rs_dbe);
8710 
8711                                 return (NFS4ERR_OPENMODE);
8712                         }
8713                         ltype = F_RDLCK;
8714                         break;
8715                 case WRITE_LT:
8716                 case WRITEW_LT:
8717                         if ((sp->rs_share_access
8718                             & OPEN4_SHARE_ACCESS_WRITE) == 0) {
8719                                 rfs4_dbe_unlock(sp->rs_dbe);
8720 
8721                                 return (NFS4ERR_OPENMODE);
8722                         }
8723                         ltype = F_WRLCK;
8724                         break;
8725                 }
8726         } else
8727                 ltype = F_UNLCK;
8728 
8729         flock.l_type = ltype;
8730         flock.l_whence = 0;             /* SEEK_SET */
8731         flock.l_start = offset;
8732         flock.l_len = length;
8733         flock.l_sysid = sysid;
8734         flock.l_pid = lsp->rls_locker->rl_pid;
8735 
8736         /* Note that length4 is uint64_t but l_len and l_start are off64_t */
8737         if (flock.l_len < 0 || flock.l_start < 0) {
8738                 rfs4_dbe_unlock(sp->rs_dbe);
8739                 return (NFS4ERR_INVAL);
8740         }
8741 
8742         /*
8743          * N.B. FREAD has the same value as OPEN4_SHARE_ACCESS_READ and
8744          * FWRITE has the same value as OPEN4_SHARE_ACCESS_WRITE.
8745          */
8746         flag = (int)sp->rs_share_access | F_REMOTELOCK;
8747 
8748         vp = sp->rs_finfo->rf_vp;
8749         VN_HOLD(vp);
8750 
8751         /*
8752          * We need to unlock sp before we call the underlying filesystem to
8753          * acquire the file lock.
8754          */
8755         rfs4_dbe_unlock(sp->rs_dbe);
8756 
8757         error = setlock(vp, &flock, flag, cred);
8758 
8759         /*
8760          * Make sure the file is still open.  In a case the file was closed in
8761          * the meantime, clean the lock we acquired using the setlock() call
8762          * above, and return the appropriate error.
8763          */
8764         rfs4_dbe_lock(sp->rs_dbe);
8765         if (sp->rs_closed == TRUE) {
8766                 cleanlocks(vp, lsp->rls_locker->rl_pid, sysid);
8767                 rfs4_dbe_unlock(sp->rs_dbe);
8768 
8769                 VN_RELE(vp);
8770 
8771                 return (NFS4ERR_OLD_STATEID);
8772         }
8773         rfs4_dbe_unlock(sp->rs_dbe);
8774 
8775         VN_RELE(vp);
8776 
8777         if (error == 0) {
8778                 rfs4_dbe_lock(lsp->rls_dbe);
8779                 next_stateid(&lsp->rls_lockid);
8780                 rfs4_dbe_unlock(lsp->rls_dbe);
8781         }
8782 
8783         /*
8784          * N.B. We map error values to nfsv4 errors. This is differrent
8785          * than puterrno4 routine.
8786          */
8787         switch (error) {
8788         case 0:
8789                 status = NFS4_OK;
8790                 break;
8791         case EAGAIN:
8792         case EACCES:            /* Old value */
8793                 /* Can only get here if op is OP_LOCK */
8794                 ASSERT(resop->resop == OP_LOCK);
8795                 lres = &resop->nfs_resop4_u.oplock;
8796                 status = NFS4ERR_DENIED;
8797                 if (lock_denied(&lres->LOCK4res_u.denied, &flock)
8798                     == NFS4ERR_EXPIRED)
8799                         goto retry;
8800                 break;
8801         case ENOLCK:
8802                 status = NFS4ERR_DELAY;
8803                 break;
8804         case EOVERFLOW:
8805                 status = NFS4ERR_INVAL;
8806                 break;
8807         case EINVAL:
8808                 status = NFS4ERR_NOTSUPP;
8809                 break;
8810         default:
8811                 status = NFS4ERR_SERVERFAULT;
8812                 break;
8813         }
8814 
8815         return (status);
8816 }
8817 
8818 /*ARGSUSED*/
8819 void
8820 rfs4_op_lock(nfs_argop4 *argop, nfs_resop4 *resop,
8821     struct svc_req *req, struct compound_state *cs)
8822 {
8823         LOCK4args *args = &argop->nfs_argop4_u.oplock;
8824         LOCK4res *resp = &resop->nfs_resop4_u.oplock;
8825         nfsstat4 status;
8826         stateid4 *stateid;
8827         rfs4_lockowner_t *lo;
8828         rfs4_client_t *cp;
8829         rfs4_state_t *sp = NULL;
8830         rfs4_lo_state_t *lsp = NULL;
8831         bool_t ls_sw_held = FALSE;
8832         bool_t create = TRUE;
8833         bool_t lcreate = TRUE;
8834         bool_t dup_lock = FALSE;
8835         int rc;
8836 
8837         DTRACE_NFSV4_2(op__lock__start, struct compound_state *, cs,
8838             LOCK4args *, args);
8839 
8840         if (cs->vp == NULL) {
8841                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
8842                 DTRACE_NFSV4_2(op__lock__done, struct compound_state *,
8843                     cs, LOCK4res *, resp);
8844                 return;
8845         }
8846 
8847         if (args->locker.new_lock_owner) {
8848                 /* Create a new lockowner for this instance */
8849                 open_to_lock_owner4 *olo = &args->locker.locker4_u.open_owner;
8850 
8851                 NFS4_DEBUG(rfs4_debug, (CE_NOTE, "Creating new lock owner"));
8852 
8853                 stateid = &olo->open_stateid;
8854                 status = rfs4_get_state(stateid, &sp, RFS4_DBS_VALID);
8855                 if (status != NFS4_OK) {
8856                         NFS4_DEBUG(rfs4_debug,
8857                             (CE_NOTE, "Get state failed in lock %d", status));
8858                         *cs->statusp = resp->status = status;
8859                         DTRACE_NFSV4_2(op__lock__done, struct compound_state *,
8860                             cs, LOCK4res *, resp);
8861                         return;
8862                 }
8863 
8864                 /* Ensure specified filehandle matches */
8865                 if (cs->vp != sp->rs_finfo->rf_vp) {
8866                         rfs4_state_rele(sp);
8867                         *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8868                         DTRACE_NFSV4_2(op__lock__done, struct compound_state *,
8869                             cs, LOCK4res *, resp);
8870                         return;
8871                 }
8872 
8873                 /* hold off other access to open_owner while we tinker */
8874                 rfs4_sw_enter(&sp->rs_owner->ro_sw);
8875 
8876                 switch (rc = rfs4_check_stateid_seqid(sp, stateid)) {
8877                 case NFS4_CHECK_STATEID_OLD:
8878                         *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
8879                         goto end;
8880                 case NFS4_CHECK_STATEID_BAD:
8881                         *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8882                         goto end;
8883                 case NFS4_CHECK_STATEID_EXPIRED:
8884                         *cs->statusp = resp->status = NFS4ERR_EXPIRED;
8885                         goto end;
8886                 case NFS4_CHECK_STATEID_UNCONFIRMED:
8887                         *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8888                         goto end;
8889                 case NFS4_CHECK_STATEID_CLOSED:
8890                         *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
8891                         goto end;
8892                 case NFS4_CHECK_STATEID_OKAY:
8893                 case NFS4_CHECK_STATEID_REPLAY:
8894                         switch (rfs4_check_olo_seqid(olo->open_seqid,
8895                             sp->rs_owner, resop)) {
8896                         case NFS4_CHKSEQ_OKAY:
8897                                 if (rc == NFS4_CHECK_STATEID_OKAY)
8898                                         break;
8899                                 /*
8900                                  * This is replayed stateid; if seqid
8901                                  * matches next expected, then client
8902                                  * is using wrong seqid.
8903                                  */
8904                                 /* FALLTHROUGH */
8905                         case NFS4_CHKSEQ_BAD:
8906                                 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
8907                                 goto end;
8908                         case NFS4_CHKSEQ_REPLAY:
8909                                 /* This is a duplicate LOCK request */
8910                                 dup_lock = TRUE;
8911 
8912                                 /*
8913                                  * For a duplicate we do not want to
8914                                  * create a new lockowner as it should
8915                                  * already exist.
8916                                  * Turn off the lockowner create flag.
8917                                  */
8918                                 lcreate = FALSE;
8919                         }
8920                         break;
8921                 }
8922 
8923                 lo = rfs4_findlockowner(&olo->lock_owner, &lcreate);
8924                 if (lo == NULL) {
8925                         NFS4_DEBUG(rfs4_debug,
8926                             (CE_NOTE, "rfs4_op_lock: no lock owner"));
8927                         *cs->statusp = resp->status = NFS4ERR_RESOURCE;
8928                         goto end;
8929                 }
8930 
8931                 lsp = rfs4_findlo_state_by_owner(lo, sp, &create);
8932                 if (lsp == NULL) {
8933                         rfs4_update_lease(sp->rs_owner->ro_client);
8934                         /*
8935                          * Only update theh open_seqid if this is not
8936                          * a duplicate request
8937                          */
8938                         if (dup_lock == FALSE) {
8939                                 rfs4_update_open_sequence(sp->rs_owner);
8940                         }
8941 
8942                         NFS4_DEBUG(rfs4_debug,
8943                             (CE_NOTE, "rfs4_op_lock: no state"));
8944                         *cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
8945                         rfs4_update_open_resp(sp->rs_owner, resop, NULL);
8946                         rfs4_lockowner_rele(lo);
8947                         goto end;
8948                 }
8949 
8950                 /*
8951                  * This is the new_lock_owner branch and the client is
8952                  * supposed to be associating a new lock_owner with
8953                  * the open file at this point.  If we find that a
8954                  * lock_owner/state association already exists and a
8955                  * successful LOCK request was returned to the client,
8956                  * an error is returned to the client since this is
8957                  * not appropriate.  The client should be using the
8958                  * existing lock_owner branch.
8959                  */
8960                 if (dup_lock == FALSE && create == FALSE) {
8961                         if (lsp->rls_lock_completed == TRUE) {
8962                                 *cs->statusp =
8963                                     resp->status = NFS4ERR_BAD_SEQID;
8964                                 rfs4_lockowner_rele(lo);
8965                                 goto end;
8966                         }
8967                 }
8968 
8969                 rfs4_update_lease(sp->rs_owner->ro_client);
8970 
8971                 /*
8972                  * Only update theh open_seqid if this is not
8973                  * a duplicate request
8974                  */
8975                 if (dup_lock == FALSE) {
8976                         rfs4_update_open_sequence(sp->rs_owner);
8977                 }
8978 
8979                 /*
8980                  * If this is a duplicate lock request, just copy the
8981                  * previously saved reply and return.
8982                  */
8983                 if (dup_lock == TRUE) {
8984                         /* verify that lock_seqid's match */
8985                         if (lsp->rls_seqid != olo->lock_seqid) {
8986                                 NFS4_DEBUG(rfs4_debug,
8987                                     (CE_NOTE, "rfs4_op_lock: Dup-Lock seqid bad"
8988                                     "lsp->seqid=%d old->seqid=%d",
8989                                     lsp->rls_seqid, olo->lock_seqid));
8990                                 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
8991                         } else {
8992                                 rfs4_copy_reply(resop, &lsp->rls_reply);
8993                                 /*
8994                                  * Make sure to copy the just
8995                                  * retrieved reply status into the
8996                                  * overall compound status
8997                                  */
8998                                 *cs->statusp = resp->status;
8999                         }
9000                         rfs4_lockowner_rele(lo);
9001                         goto end;
9002                 }
9003 
9004                 rfs4_dbe_lock(lsp->rls_dbe);
9005 
9006                 /* Make sure to update the lock sequence id */
9007                 lsp->rls_seqid = olo->lock_seqid;
9008 
9009                 NFS4_DEBUG(rfs4_debug,
9010                     (CE_NOTE, "Lock seqid established as %d", lsp->rls_seqid));
9011 
9012                 /*
9013                  * This is used to signify the newly created lockowner
9014                  * stateid and its sequence number.  The checks for
9015                  * sequence number and increment don't occur on the
9016                  * very first lock request for a lockowner.
9017                  */
9018                 lsp->rls_skip_seqid_check = TRUE;
9019 
9020                 /* hold off other access to lsp while we tinker */
9021                 rfs4_sw_enter(&lsp->rls_sw);
9022                 ls_sw_held = TRUE;
9023 
9024                 rfs4_dbe_unlock(lsp->rls_dbe);
9025 
9026                 rfs4_lockowner_rele(lo);
9027         } else {
9028                 stateid = &args->locker.locker4_u.lock_owner.lock_stateid;
9029                 /* get lsp and hold the lock on the underlying file struct */
9030                 if ((status = rfs4_get_lo_state(stateid, &lsp, TRUE))
9031                     != NFS4_OK) {
9032                         *cs->statusp = resp->status = status;
9033                         DTRACE_NFSV4_2(op__lock__done, struct compound_state *,
9034                             cs, LOCK4res *, resp);
9035                         return;
9036                 }
9037                 create = FALSE; /* We didn't create lsp */
9038 
9039                 /* Ensure specified filehandle matches */
9040                 if (cs->vp != lsp->rls_state->rs_finfo->rf_vp) {
9041                         rfs4_lo_state_rele(lsp, TRUE);
9042                         *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
9043                         DTRACE_NFSV4_2(op__lock__done, struct compound_state *,
9044                             cs, LOCK4res *, resp);
9045                         return;
9046                 }
9047 
9048                 /* hold off other access to lsp while we tinker */
9049                 rfs4_sw_enter(&lsp->rls_sw);
9050                 ls_sw_held = TRUE;
9051 
9052                 switch (rfs4_check_lo_stateid_seqid(lsp, stateid)) {
9053                 /*
9054                  * The stateid looks like it was okay (expected to be
9055                  * the next one)
9056                  */
9057                 case NFS4_CHECK_STATEID_OKAY:
9058                         /*
9059                          * The sequence id is now checked.  Determine
9060                          * if this is a replay or if it is in the
9061                          * expected (next) sequence.  In the case of a
9062                          * replay, there are two replay conditions
9063                          * that may occur.  The first is the normal
9064                          * condition where a LOCK is done with a
9065                          * NFS4_OK response and the stateid is
9066                          * updated.  That case is handled below when
9067                          * the stateid is identified as a REPLAY.  The
9068                          * second is the case where an error is
9069                          * returned, like NFS4ERR_DENIED, and the
9070                          * sequence number is updated but the stateid
9071                          * is not updated.  This second case is dealt
9072                          * with here.  So it may seem odd that the
9073                          * stateid is okay but the sequence id is a
9074                          * replay but it is okay.
9075                          */
9076                         switch (rfs4_check_lock_seqid(
9077                             args->locker.locker4_u.lock_owner.lock_seqid,
9078                             lsp, resop)) {
9079                         case NFS4_CHKSEQ_REPLAY:
9080                                 if (resp->status != NFS4_OK) {
9081                                         /*
9082                                          * Here is our replay and need
9083                                          * to verify that the last
9084                                          * response was an error.
9085                                          */
9086                                         *cs->statusp = resp->status;
9087                                         goto end;
9088                                 }
9089                                 /*
9090                                  * This is done since the sequence id
9091                                  * looked like a replay but it didn't
9092                                  * pass our check so a BAD_SEQID is
9093                                  * returned as a result.
9094                                  */
9095                                 /*FALLTHROUGH*/
9096                         case NFS4_CHKSEQ_BAD:
9097                                 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
9098                                 goto end;
9099                         case NFS4_CHKSEQ_OKAY:
9100                                 /* Everything looks okay move ahead */
9101                                 break;
9102                         }
9103                         break;
9104                 case NFS4_CHECK_STATEID_OLD:
9105                         *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
9106                         goto end;
9107                 case NFS4_CHECK_STATEID_BAD:
9108                         *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
9109                         goto end;
9110                 case NFS4_CHECK_STATEID_EXPIRED:
9111                         *cs->statusp = resp->status = NFS4ERR_EXPIRED;
9112                         goto end;
9113                 case NFS4_CHECK_STATEID_CLOSED:
9114                         *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
9115                         goto end;
9116                 case NFS4_CHECK_STATEID_REPLAY:
9117                         switch (rfs4_check_lock_seqid(
9118                             args->locker.locker4_u.lock_owner.lock_seqid,
9119                             lsp, resop)) {
9120                         case NFS4_CHKSEQ_OKAY:
9121                                 /*
9122                                  * This is a replayed stateid; if
9123                                  * seqid matches the next expected,
9124                                  * then client is using wrong seqid.
9125                                  */
9126                         case NFS4_CHKSEQ_BAD:
9127                                 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
9128                                 goto end;
9129                         case NFS4_CHKSEQ_REPLAY:
9130                                 rfs4_update_lease(lsp->rls_locker->rl_client);
9131                                 *cs->statusp = status = resp->status;
9132                                 goto end;
9133                         }
9134                         break;
9135                 default:
9136                         ASSERT(FALSE);
9137                         break;
9138                 }
9139 
9140                 rfs4_update_lock_sequence(lsp);
9141                 rfs4_update_lease(lsp->rls_locker->rl_client);
9142         }
9143 
9144         /*
9145          * NFS4 only allows locking on regular files, so
9146          * verify type of object.
9147          */
9148         if (cs->vp->v_type != VREG) {
9149                 if (cs->vp->v_type == VDIR)
9150                         status = NFS4ERR_ISDIR;
9151                 else
9152                         status = NFS4ERR_INVAL;
9153                 goto out;
9154         }
9155 
9156         cp = lsp->rls_state->rs_owner->ro_client;
9157 
9158         if (rfs4_clnt_in_grace(cp) && !args->reclaim) {
9159                 status = NFS4ERR_GRACE;
9160                 goto out;
9161         }
9162 
9163         if (rfs4_clnt_in_grace(cp) && args->reclaim && !cp->rc_can_reclaim) {
9164                 status = NFS4ERR_NO_GRACE;
9165                 goto out;
9166         }
9167 
9168         if (!rfs4_clnt_in_grace(cp) && args->reclaim) {
9169                 status = NFS4ERR_NO_GRACE;
9170                 goto out;
9171         }
9172 
9173         if (lsp->rls_state->rs_finfo->rf_dinfo.rd_dtype == OPEN_DELEGATE_WRITE)
9174                 cs->deleg = TRUE;
9175 
9176         status = rfs4_do_lock(lsp, args->locktype,
9177             args->offset, args->length, cs->cr, resop);
9178 
9179 out:
9180         lsp->rls_skip_seqid_check = FALSE;
9181 
9182         *cs->statusp = resp->status = status;
9183 
9184         if (status == NFS4_OK) {
9185                 resp->LOCK4res_u.lock_stateid = lsp->rls_lockid.stateid;
9186                 lsp->rls_lock_completed = TRUE;
9187         }
9188         /*
9189          * Only update the "OPEN" response here if this was a new
9190          * lock_owner
9191          */
9192         if (sp)
9193                 rfs4_update_open_resp(sp->rs_owner, resop, NULL);
9194 
9195         rfs4_update_lock_resp(lsp, resop);
9196 
9197 end:
9198         if (lsp) {
9199                 if (ls_sw_held)
9200                         rfs4_sw_exit(&lsp->rls_sw);
9201                 /*
9202                  * If an sp obtained, then the lsp does not represent
9203                  * a lock on the file struct.
9204                  */
9205                 if (sp != NULL)
9206                         rfs4_lo_state_rele(lsp, FALSE);
9207                 else
9208                         rfs4_lo_state_rele(lsp, TRUE);
9209         }
9210         if (sp) {
9211                 rfs4_sw_exit(&sp->rs_owner->ro_sw);
9212                 rfs4_state_rele(sp);
9213         }
9214 
9215         DTRACE_NFSV4_2(op__lock__done, struct compound_state *, cs,
9216             LOCK4res *, resp);
9217 }
9218 
9219 /* free function for LOCK/LOCKT */
9220 static void
9221 lock_denied_free(nfs_resop4 *resop)
9222 {
9223         LOCK4denied *dp = NULL;
9224 
9225         switch (resop->resop) {
9226         case OP_LOCK:
9227                 if (resop->nfs_resop4_u.oplock.status == NFS4ERR_DENIED)
9228                         dp = &resop->nfs_resop4_u.oplock.LOCK4res_u.denied;
9229                 break;
9230         case OP_LOCKT:
9231                 if (resop->nfs_resop4_u.oplockt.status == NFS4ERR_DENIED)
9232                         dp = &resop->nfs_resop4_u.oplockt.denied;
9233                 break;
9234         default:
9235                 break;
9236         }
9237 
9238         if (dp)
9239                 kmem_free(dp->owner.owner_val, dp->owner.owner_len);
9240 }
9241 
9242 /*ARGSUSED*/
9243 void
9244 rfs4_op_locku(nfs_argop4 *argop, nfs_resop4 *resop,
9245     struct svc_req *req, struct compound_state *cs)
9246 {
9247         LOCKU4args *args = &argop->nfs_argop4_u.oplocku;
9248         LOCKU4res *resp = &resop->nfs_resop4_u.oplocku;
9249         nfsstat4 status;
9250         stateid4 *stateid = &args->lock_stateid;
9251         rfs4_lo_state_t *lsp;
9252 
9253         DTRACE_NFSV4_2(op__locku__start, struct compound_state *, cs,
9254             LOCKU4args *, args);
9255 
9256         if (cs->vp == NULL) {
9257                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
9258                 DTRACE_NFSV4_2(op__locku__done, struct compound_state *, cs,
9259                     LOCKU4res *, resp);
9260                 return;
9261         }
9262 
9263         if ((status = rfs4_get_lo_state(stateid, &lsp, TRUE)) != NFS4_OK) {
9264                 *cs->statusp = resp->status = status;
9265                 DTRACE_NFSV4_2(op__locku__done, struct compound_state *, cs,
9266                     LOCKU4res *, resp);
9267                 return;
9268         }
9269 
9270         /* Ensure specified filehandle matches */
9271         if (cs->vp != lsp->rls_state->rs_finfo->rf_vp) {
9272                 rfs4_lo_state_rele(lsp, TRUE);
9273                 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
9274                 DTRACE_NFSV4_2(op__locku__done, struct compound_state *, cs,
9275                     LOCKU4res *, resp);
9276                 return;
9277         }
9278 
9279         /* hold off other access to lsp while we tinker */
9280         rfs4_sw_enter(&lsp->rls_sw);
9281 
9282         switch (rfs4_check_lo_stateid_seqid(lsp, stateid)) {
9283         case NFS4_CHECK_STATEID_OKAY:
9284                 if (rfs4_check_lock_seqid(args->seqid, lsp, resop)
9285                     != NFS4_CHKSEQ_OKAY) {
9286                         *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
9287                         goto end;
9288                 }
9289                 break;
9290         case NFS4_CHECK_STATEID_OLD:
9291                 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
9292                 goto end;
9293         case NFS4_CHECK_STATEID_BAD:
9294                 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
9295                 goto end;
9296         case NFS4_CHECK_STATEID_EXPIRED:
9297                 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
9298                 goto end;
9299         case NFS4_CHECK_STATEID_CLOSED:
9300                 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
9301                 goto end;
9302         case NFS4_CHECK_STATEID_REPLAY:
9303                 switch (rfs4_check_lock_seqid(args->seqid, lsp, resop)) {
9304                 case NFS4_CHKSEQ_OKAY:
9305                                 /*
9306                                  * This is a replayed stateid; if
9307                                  * seqid matches the next expected,
9308                                  * then client is using wrong seqid.
9309                                  */
9310                 case NFS4_CHKSEQ_BAD:
9311                         *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
9312                         goto end;
9313                 case NFS4_CHKSEQ_REPLAY:
9314                         rfs4_update_lease(lsp->rls_locker->rl_client);
9315                         *cs->statusp = status = resp->status;
9316                         goto end;
9317                 }
9318                 break;
9319         default:
9320                 ASSERT(FALSE);
9321                 break;
9322         }
9323 
9324         rfs4_update_lock_sequence(lsp);
9325         rfs4_update_lease(lsp->rls_locker->rl_client);
9326 
9327         /*
9328          * NFS4 only allows locking on regular files, so
9329          * verify type of object.
9330          */
9331         if (cs->vp->v_type != VREG) {
9332                 if (cs->vp->v_type == VDIR)
9333                         status = NFS4ERR_ISDIR;
9334                 else
9335                         status = NFS4ERR_INVAL;
9336                 goto out;
9337         }
9338 
9339         if (rfs4_clnt_in_grace(lsp->rls_state->rs_owner->ro_client)) {
9340                 status = NFS4ERR_GRACE;
9341                 goto out;
9342         }
9343 
9344         status = rfs4_do_lock(lsp, args->locktype,
9345             args->offset, args->length, cs->cr, resop);
9346 
9347 out:
9348         *cs->statusp = resp->status = status;
9349 
9350         if (status == NFS4_OK)
9351                 resp->lock_stateid = lsp->rls_lockid.stateid;
9352 
9353         rfs4_update_lock_resp(lsp, resop);
9354 
9355 end:
9356         rfs4_sw_exit(&lsp->rls_sw);
9357         rfs4_lo_state_rele(lsp, TRUE);
9358 
9359         DTRACE_NFSV4_2(op__locku__done, struct compound_state *, cs,
9360             LOCKU4res *, resp);
9361 }
9362 
9363 /*
9364  * LOCKT is a best effort routine, the client can not be guaranteed that
9365  * the status return is still in effect by the time the reply is received.
9366  * They are numerous race conditions in this routine, but we are not required
9367  * and can not be accurate.
9368  */
9369 /*ARGSUSED*/
9370 void
9371 rfs4_op_lockt(nfs_argop4 *argop, nfs_resop4 *resop,
9372     struct svc_req *req, struct compound_state *cs)
9373 {
9374         LOCKT4args *args = &argop->nfs_argop4_u.oplockt;
9375         LOCKT4res *resp = &resop->nfs_resop4_u.oplockt;
9376         rfs4_lockowner_t *lo;
9377         rfs4_client_t *cp;
9378         bool_t create = FALSE;
9379         struct flock64 flk;
9380         int error;
9381         int flag = FREAD | FWRITE;
9382         int ltype;
9383         length4 posix_length;
9384         sysid_t sysid;
9385         pid_t pid;
9386 
9387         DTRACE_NFSV4_2(op__lockt__start, struct compound_state *, cs,
9388             LOCKT4args *, args);
9389 
9390         if (cs->vp == NULL) {
9391                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
9392                 goto out;
9393         }
9394 
9395         /*
9396          * NFS4 only allows locking on regular files, so
9397          * verify type of object.
9398          */
9399         if (cs->vp->v_type != VREG) {
9400                 if (cs->vp->v_type == VDIR)
9401                         *cs->statusp = resp->status = NFS4ERR_ISDIR;
9402                 else
9403                         *cs->statusp = resp->status =  NFS4ERR_INVAL;
9404                 goto out;
9405         }
9406 
9407         /*
9408          * Check out the clientid to ensure the server knows about it
9409          * so that we correctly inform the client of a server reboot.
9410          */
9411         if ((cp = rfs4_findclient_by_id(args->owner.clientid, FALSE))
9412             == NULL) {
9413                 *cs->statusp = resp->status =
9414                     rfs4_check_clientid(&args->owner.clientid, 0);
9415                 goto out;
9416         }
9417         if (rfs4_lease_expired(cp)) {
9418                 rfs4_client_close(cp);
9419                 /*
9420                  * Protocol doesn't allow returning NFS4ERR_STALE as
9421                  * other operations do on this check so STALE_CLIENTID
9422                  * is returned instead
9423                  */
9424                 *cs->statusp = resp->status = NFS4ERR_STALE_CLIENTID;
9425                 goto out;
9426         }
9427 
9428         if (rfs4_clnt_in_grace(cp) && !(cp->rc_can_reclaim)) {
9429                 *cs->statusp = resp->status = NFS4ERR_GRACE;
9430                 rfs4_client_rele(cp);
9431                 goto out;
9432         }
9433         rfs4_client_rele(cp);
9434 
9435         resp->status = NFS4_OK;
9436 
9437         switch (args->locktype) {
9438         case READ_LT:
9439         case READW_LT:
9440                 ltype = F_RDLCK;
9441                 break;
9442         case WRITE_LT:
9443         case WRITEW_LT:
9444                 ltype = F_WRLCK;
9445                 break;
9446         }
9447 
9448         posix_length = args->length;
9449         /* Check for zero length. To lock to end of file use all ones for V4 */
9450         if (posix_length == 0) {
9451                 *cs->statusp = resp->status = NFS4ERR_INVAL;
9452                 goto out;
9453         } else if (posix_length == (length4)(~0)) {
9454                 posix_length = 0;       /* Posix to end of file  */
9455         }
9456 
9457         /* Find or create a lockowner */
9458         lo = rfs4_findlockowner(&args->owner, &create);
9459 
9460         if (lo) {
9461                 pid = lo->rl_pid;
9462                 if ((resp->status =
9463                     rfs4_client_sysid(lo->rl_client, &sysid)) != NFS4_OK)
9464                         goto err;
9465         } else {
9466                 pid = 0;
9467                 sysid = lockt_sysid;
9468         }
9469 retry:
9470         flk.l_type = ltype;
9471         flk.l_whence = 0;               /* SEEK_SET */
9472         flk.l_start = args->offset;
9473         flk.l_len = posix_length;
9474         flk.l_sysid = sysid;
9475         flk.l_pid = pid;
9476         flag |= F_REMOTELOCK;
9477 
9478         LOCK_PRINT(rfs4_debug, "rfs4_op_lockt", F_GETLK, &flk);
9479 
9480         /* Note that length4 is uint64_t but l_len and l_start are off64_t */
9481         if (flk.l_len < 0 || flk.l_start < 0) {
9482                 resp->status = NFS4ERR_INVAL;
9483                 goto err;
9484         }
9485         error = VOP_FRLOCK(cs->vp, F_GETLK, &flk, flag, (u_offset_t)0,
9486             NULL, cs->cr, NULL);
9487 
9488         /*
9489          * N.B. We map error values to nfsv4 errors. This is differrent
9490          * than puterrno4 routine.
9491          */
9492         switch (error) {
9493         case 0:
9494                 if (flk.l_type == F_UNLCK)
9495                         resp->status = NFS4_OK;
9496                 else {
9497                         if (lock_denied(&resp->denied, &flk) == NFS4ERR_EXPIRED)
9498                                 goto retry;
9499                         resp->status = NFS4ERR_DENIED;
9500                 }
9501                 break;
9502         case EOVERFLOW:
9503                 resp->status = NFS4ERR_INVAL;
9504                 break;
9505         case EINVAL:
9506                 resp->status = NFS4ERR_NOTSUPP;
9507                 break;
9508         default:
9509                 cmn_err(CE_WARN, "rfs4_op_lockt: unexpected errno (%d)",
9510                     error);
9511                 resp->status = NFS4ERR_SERVERFAULT;
9512                 break;
9513         }
9514 
9515 err:
9516         if (lo)
9517                 rfs4_lockowner_rele(lo);
9518         *cs->statusp = resp->status;
9519 out:
9520         DTRACE_NFSV4_2(op__lockt__done, struct compound_state *, cs,
9521             LOCKT4res *, resp);
9522 }
9523 
9524 int
9525 rfs4_share(rfs4_state_t *sp, uint32_t access, uint32_t deny)
9526 {
9527         int err;
9528         int cmd;
9529         vnode_t *vp;
9530         struct shrlock shr;
9531         struct shr_locowner shr_loco;
9532         int fflags = 0;
9533 
9534         ASSERT(rfs4_dbe_islocked(sp->rs_dbe));
9535         ASSERT(sp->rs_owner->ro_client->rc_sysidt != LM_NOSYSID);
9536 
9537         if (sp->rs_closed)
9538                 return (NFS4ERR_OLD_STATEID);
9539 
9540         vp = sp->rs_finfo->rf_vp;
9541         ASSERT(vp);
9542 
9543         shr.s_access = shr.s_deny = 0;
9544 
9545         if (access & OPEN4_SHARE_ACCESS_READ) {
9546                 fflags |= FREAD;
9547                 shr.s_access |= F_RDACC;
9548         }
9549         if (access & OPEN4_SHARE_ACCESS_WRITE) {
9550                 fflags |= FWRITE;
9551                 shr.s_access |= F_WRACC;
9552         }
9553         ASSERT(shr.s_access);
9554 
9555         if (deny & OPEN4_SHARE_DENY_READ)
9556                 shr.s_deny |= F_RDDNY;
9557         if (deny & OPEN4_SHARE_DENY_WRITE)
9558                 shr.s_deny |= F_WRDNY;
9559 
9560         shr.s_pid = rfs4_dbe_getid(sp->rs_owner->ro_dbe);
9561         shr.s_sysid = sp->rs_owner->ro_client->rc_sysidt;
9562         shr_loco.sl_pid = shr.s_pid;
9563         shr_loco.sl_id = shr.s_sysid;
9564         shr.s_owner = (caddr_t)&shr_loco;
9565         shr.s_own_len = sizeof (shr_loco);
9566 
9567         cmd = nbl_need_check(vp) ? F_SHARE_NBMAND : F_SHARE;
9568 
9569         err = VOP_SHRLOCK(vp, cmd, &shr, fflags, CRED(), NULL);
9570         if (err != 0) {
9571                 if (err == EAGAIN)
9572                         err = NFS4ERR_SHARE_DENIED;
9573                 else
9574                         err = puterrno4(err);
9575                 return (err);
9576         }
9577 
9578         sp->rs_share_access |= access;
9579         sp->rs_share_deny |= deny;
9580 
9581         return (0);
9582 }
9583 
9584 int
9585 rfs4_unshare(rfs4_state_t *sp)
9586 {
9587         int err;
9588         struct shrlock shr;
9589         struct shr_locowner shr_loco;
9590 
9591         ASSERT(rfs4_dbe_islocked(sp->rs_dbe));
9592 
9593         if (sp->rs_closed || sp->rs_share_access == 0)
9594                 return (0);
9595 
9596         ASSERT(sp->rs_owner->ro_client->rc_sysidt != LM_NOSYSID);
9597         ASSERT(sp->rs_finfo->rf_vp);
9598 
9599         shr.s_access = shr.s_deny = 0;
9600         shr.s_pid = rfs4_dbe_getid(sp->rs_owner->ro_dbe);
9601         shr.s_sysid = sp->rs_owner->ro_client->rc_sysidt;
9602         shr_loco.sl_pid = shr.s_pid;
9603         shr_loco.sl_id = shr.s_sysid;
9604         shr.s_owner = (caddr_t)&shr_loco;
9605         shr.s_own_len = sizeof (shr_loco);
9606 
9607         err = VOP_SHRLOCK(sp->rs_finfo->rf_vp, F_UNSHARE, &shr, 0, CRED(),
9608             NULL);
9609         if (err != 0) {
9610                 err = puterrno4(err);
9611                 return (err);
9612         }
9613 
9614         sp->rs_share_access = 0;
9615         sp->rs_share_deny = 0;
9616 
9617         return (0);
9618 
9619 }
9620 
9621 static int
9622 rdma_setup_read_data4(READ4args *args, READ4res *rok)
9623 {
9624         struct clist    *wcl;
9625         count4          count = rok->data_len;
9626         int             wlist_len;
9627 
9628         wcl = args->wlist;
9629         if (rdma_setup_read_chunks(wcl, count, &wlist_len) == FALSE) {
9630                 return (FALSE);
9631         }
9632         wcl = args->wlist;
9633         rok->wlist_len = wlist_len;
9634         rok->wlist = wcl;
9635         return (TRUE);
9636 }
9637 
9638 /* tunable to disable server referrals */
9639 int rfs4_no_referrals = 0;
9640 
9641 /*
9642  * Find an NFS record in reparse point data.
9643  * Returns 0 for success and <0 or an errno value on failure.
9644  */
9645 int
9646 vn_find_nfs_record(vnode_t *vp, nvlist_t **nvlp, char **svcp, char **datap)
9647 {
9648         int err;
9649         char *stype, *val;
9650         nvlist_t *nvl;
9651         nvpair_t *curr;
9652 
9653         if ((nvl = reparse_init()) == NULL)
9654                 return (-1);
9655 
9656         if ((err = reparse_vnode_parse(vp, nvl)) != 0) {
9657                 reparse_free(nvl);
9658                 return (err);
9659         }
9660 
9661         curr = NULL;
9662         while ((curr = nvlist_next_nvpair(nvl, curr)) != NULL) {
9663                 if ((stype = nvpair_name(curr)) == NULL) {
9664                         reparse_free(nvl);
9665                         return (-2);
9666                 }
9667                 if (strncasecmp(stype, "NFS", 3) == 0)
9668                         break;
9669         }
9670 
9671         if ((curr == NULL) ||
9672             (nvpair_value_string(curr, &val))) {
9673                 reparse_free(nvl);
9674                 return (-3);
9675         }
9676         *nvlp = nvl;
9677         *svcp = stype;
9678         *datap = val;
9679         return (0);
9680 }
9681 
9682 int
9683 vn_is_nfs_reparse(vnode_t *vp, cred_t *cr)
9684 {
9685         nvlist_t *nvl;
9686         char *s, *d;
9687 
9688         if (rfs4_no_referrals != 0)
9689                 return (B_FALSE);
9690 
9691         if (vn_is_reparse(vp, cr, NULL) == B_FALSE)
9692                 return (B_FALSE);
9693 
9694         if (vn_find_nfs_record(vp, &nvl, &s, &d) != 0)
9695                 return (B_FALSE);
9696 
9697         reparse_free(nvl);
9698 
9699         return (B_TRUE);
9700 }
9701 
9702 /*
9703  * There is a user-level copy of this routine in ref_subr.c.
9704  * Changes should be kept in sync.
9705  */
9706 static int
9707 nfs4_create_components(char *path, component4 *comp4)
9708 {
9709         int slen, plen, ncomp;
9710         char *ori_path, *nxtc, buf[MAXNAMELEN];
9711 
9712         if (path == NULL)
9713                 return (0);
9714 
9715         plen = strlen(path) + 1;        /* include the terminator */
9716         ori_path = path;
9717         ncomp = 0;
9718 
9719         /* count number of components in the path */
9720         for (nxtc = path; nxtc < ori_path + plen; nxtc++) {
9721                 if (*nxtc == '/' || *nxtc == '\0' || *nxtc == '\n') {
9722                         if ((slen = nxtc - path) == 0) {
9723                                 path = nxtc + 1;
9724                                 continue;
9725                         }
9726 
9727                         if (comp4 != NULL) {
9728                                 bcopy(path, buf, slen);
9729                                 buf[slen] = '\0';
9730                                 (void) str_to_utf8(buf, &comp4[ncomp]);
9731                         }
9732 
9733                         ncomp++;        /* 1 valid component */
9734                         path = nxtc + 1;
9735                 }
9736                 if (*nxtc == '\0' || *nxtc == '\n')
9737                         break;
9738         }
9739 
9740         return (ncomp);
9741 }
9742 
9743 /*
9744  * There is a user-level copy of this routine in ref_subr.c.
9745  * Changes should be kept in sync.
9746  */
9747 static int
9748 make_pathname4(char *path, pathname4 *pathname)
9749 {
9750         int ncomp;
9751         component4 *comp4;
9752 
9753         if (pathname == NULL)
9754                 return (0);
9755 
9756         if (path == NULL) {
9757                 pathname->pathname4_val = NULL;
9758                 pathname->pathname4_len = 0;
9759                 return (0);
9760         }
9761 
9762         /* count number of components to alloc buffer */
9763         if ((ncomp = nfs4_create_components(path, NULL)) == 0) {
9764                 pathname->pathname4_val = NULL;
9765                 pathname->pathname4_len = 0;
9766                 return (0);
9767         }
9768         comp4 = kmem_zalloc(ncomp * sizeof (component4), KM_SLEEP);
9769 
9770         /* copy components into allocated buffer */
9771         ncomp = nfs4_create_components(path, comp4);
9772 
9773         pathname->pathname4_val = comp4;
9774         pathname->pathname4_len = ncomp;
9775 
9776         return (ncomp);
9777 }
9778 
9779 #define xdr_fs_locations4 xdr_fattr4_fs_locations
9780 
9781 fs_locations4 *
9782 fetch_referral(vnode_t *vp, cred_t *cr)
9783 {
9784         nvlist_t *nvl;
9785         char *stype, *sdata;
9786         fs_locations4 *result;
9787         char buf[1024];
9788         size_t bufsize;
9789         XDR xdr;
9790         int err;
9791 
9792         /*
9793          * Check attrs to ensure it's a reparse point
9794          */
9795         if (vn_is_reparse(vp, cr, NULL) == B_FALSE)
9796                 return (NULL);
9797 
9798         /*
9799          * Look for an NFS record and get the type and data
9800          */
9801         if (vn_find_nfs_record(vp, &nvl, &stype, &sdata) != 0)
9802                 return (NULL);
9803 
9804         /*
9805          * With the type and data, upcall to get the referral
9806          */
9807         bufsize = sizeof (buf);
9808         bzero(buf, sizeof (buf));
9809         err = reparse_kderef((const char *)stype, (const char *)sdata,
9810             buf, &bufsize);
9811         reparse_free(nvl);
9812 
9813         DTRACE_PROBE4(nfs4serv__func__referral__upcall,
9814             char *, stype, char *, sdata, char *, buf, int, err);
9815         if (err) {
9816                 cmn_err(CE_NOTE,
9817                     "reparsed daemon not running: unable to get referral (%d)",
9818                     err);
9819                 return (NULL);
9820         }
9821 
9822         /*
9823          * We get an XDR'ed record back from the kderef call
9824          */
9825         xdrmem_create(&xdr, buf, bufsize, XDR_DECODE);
9826         result = kmem_alloc(sizeof (fs_locations4), KM_SLEEP);
9827         err = xdr_fs_locations4(&xdr, result);
9828         XDR_DESTROY(&xdr);
9829         if (err != TRUE) {
9830                 DTRACE_PROBE1(nfs4serv__func__referral__upcall__xdrfail,
9831                     int, err);
9832                 return (NULL);
9833         }
9834 
9835         /*
9836          * Look at path to recover fs_root, ignoring the leading '/'
9837          */
9838         (void) make_pathname4(vp->v_path, &result->fs_root);
9839 
9840         return (result);
9841 }
9842 
9843 char *
9844 build_symlink(vnode_t *vp, cred_t *cr, size_t *strsz)
9845 {
9846         fs_locations4 *fsl;
9847         fs_location4 *fs;
9848         char *server, *path, *symbuf;
9849         static char *prefix = "/net/";
9850         int i, size, npaths;
9851         uint_t len;
9852 
9853         /* Get the referral */
9854         if ((fsl = fetch_referral(vp, cr)) == NULL)
9855                 return (NULL);
9856 
9857         /* Deal with only the first location and first server */
9858         fs = &fsl->locations_val[0];
9859         server = utf8_to_str(&fs->server_val[0], &len, NULL);
9860         if (server == NULL) {
9861                 rfs4_free_fs_locations4(fsl);
9862                 kmem_free(fsl, sizeof (fs_locations4));
9863                 return (NULL);
9864         }
9865 
9866         /* Figure out size for "/net/" + host + /path/path/path + NULL */
9867         size = strlen(prefix) + len;
9868         for (i = 0; i < fs->rootpath.pathname4_len; i++)
9869                 size += fs->rootpath.pathname4_val[i].utf8string_len + 1;
9870 
9871         /* Allocate the symlink buffer and fill it */
9872         symbuf = kmem_zalloc(size, KM_SLEEP);
9873         (void) strcat(symbuf, prefix);
9874         (void) strcat(symbuf, server);
9875         kmem_free(server, len);
9876 
9877         npaths = 0;
9878         for (i = 0; i < fs->rootpath.pathname4_len; i++) {
9879                 path = utf8_to_str(&fs->rootpath.pathname4_val[i], &len, NULL);
9880                 if (path == NULL)
9881                         continue;
9882                 (void) strcat(symbuf, "/");
9883                 (void) strcat(symbuf, path);
9884                 npaths++;
9885                 kmem_free(path, len);
9886         }
9887 
9888         rfs4_free_fs_locations4(fsl);
9889         kmem_free(fsl, sizeof (fs_locations4));
9890 
9891         if (strsz != NULL)
9892                 *strsz = size;
9893         return (symbuf);
9894 }
9895 
9896 /*
9897  * Check to see if we have a downrev Solaris client, so that we
9898  * can send it a symlink instead of a referral.
9899  */
9900 int
9901 client_is_downrev(struct svc_req *req)
9902 {
9903         struct sockaddr *ca;
9904         rfs4_clntip_t *ci;
9905         bool_t create = FALSE;
9906         int is_downrev;
9907 
9908         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
9909         ASSERT(ca);
9910         ci = rfs4_find_clntip(ca, &create);
9911         if (ci == NULL)
9912                 return (0);
9913         is_downrev = ci->ri_no_referrals;
9914         rfs4_dbe_rele(ci->ri_dbe);
9915         return (is_downrev);
9916 }