1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright 2015 Nexenta Systems, Inc.  All rights reserved.
  24  * Copyright (c) 1994, 2010, Oracle and/or its affiliates. All rights reserved.
  25  * Copyright (c) 2013 by Delphix. All rights reserved.
  26  */
  27 
  28 /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */
  29 /* All Rights Reserved */
  30 
  31 #include <sys/param.h>
  32 #include <sys/types.h>
  33 #include <sys/systm.h>
  34 #include <sys/cred.h>
  35 #include <sys/buf.h>
  36 #include <sys/vfs.h>
  37 #include <sys/vnode.h>
  38 #include <sys/uio.h>
  39 #include <sys/errno.h>
  40 #include <sys/sysmacros.h>
  41 #include <sys/statvfs.h>
  42 #include <sys/kmem.h>
  43 #include <sys/dirent.h>
  44 #include <sys/cmn_err.h>
  45 #include <sys/debug.h>
  46 #include <sys/systeminfo.h>
  47 #include <sys/flock.h>
  48 #include <sys/nbmlock.h>
  49 #include <sys/policy.h>
  50 #include <sys/sdt.h>
  51 
  52 #include <rpc/types.h>
  53 #include <rpc/auth.h>
  54 #include <rpc/svc.h>
  55 #include <rpc/rpc_rdma.h>
  56 
  57 #include <nfs/nfs.h>
  58 #include <nfs/export.h>
  59 #include <nfs/nfs_cmd.h>
  60 
  61 #include <sys/strsubr.h>
  62 #include <sys/tsol/label.h>
  63 #include <sys/tsol/tndb.h>
  64 
  65 #include <sys/zone.h>
  66 
  67 #include <inet/ip.h>
  68 #include <inet/ip6.h>
  69 
  70 /*
  71  * These are the interface routines for the server side of the
  72  * Network File System.  See the NFS version 3 protocol specification
  73  * for a description of this interface.
  74  */
  75 
  76 static writeverf3 write3verf;
  77 
  78 static int      sattr3_to_vattr(sattr3 *, struct vattr *);
  79 static int      vattr_to_fattr3(struct vattr *, fattr3 *);
  80 static int      vattr_to_wcc_attr(struct vattr *, wcc_attr *);
  81 static void     vattr_to_pre_op_attr(struct vattr *, pre_op_attr *);
  82 static void     vattr_to_wcc_data(struct vattr *, struct vattr *, wcc_data *);
  83 static int      rdma_setup_read_data3(READ3args *, READ3resok *);
  84 
  85 extern int nfs_loaned_buffers;
  86 
  87 u_longlong_t nfs3_srv_caller_id;
  88 
  89 /* ARGSUSED */
  90 void
  91 rfs3_getattr(GETATTR3args *args, GETATTR3res *resp, struct exportinfo *exi,
  92     struct svc_req *req, cred_t *cr, bool_t ro)
  93 {
  94         int error;
  95         vnode_t *vp;
  96         struct vattr va;
  97 
  98         vp = nfs3_fhtovp(&args->object, exi);
  99 
 100         DTRACE_NFSV3_4(op__getattr__start, struct svc_req *, req,
 101             cred_t *, cr, vnode_t *, vp, GETATTR3args *, args);
 102 
 103         if (vp == NULL) {
 104                 error = ESTALE;
 105                 goto out;
 106         }
 107 
 108         va.va_mask = AT_ALL;
 109         error = rfs4_delegated_getattr(vp, &va, 0, cr);
 110 
 111         if (!error) {
 112                 /* Lie about the object type for a referral */
 113                 if (vn_is_nfs_reparse(vp, cr))
 114                         va.va_type = VLNK;
 115 
 116                 /* overflow error if time or size is out of range */
 117                 error = vattr_to_fattr3(&va, &resp->resok.obj_attributes);
 118                 if (error)
 119                         goto out;
 120                 resp->status = NFS3_OK;
 121 
 122                 DTRACE_NFSV3_4(op__getattr__done, struct svc_req *, req,
 123                     cred_t *, cr, vnode_t *, vp, GETATTR3res *, resp);
 124 
 125                 VN_RELE(vp);
 126 
 127                 return;
 128         }
 129 
 130 out:
 131         if (curthread->t_flag & T_WOULDBLOCK) {
 132                 curthread->t_flag &= ~T_WOULDBLOCK;
 133                 resp->status = NFS3ERR_JUKEBOX;
 134         } else
 135                 resp->status = puterrno3(error);
 136 
 137         DTRACE_NFSV3_4(op__getattr__done, struct svc_req *, req,
 138             cred_t *, cr, vnode_t *, vp, GETATTR3res *, resp);
 139 
 140         if (vp != NULL)
 141                 VN_RELE(vp);
 142 }
 143 
 144 void *
 145 rfs3_getattr_getfh(GETATTR3args *args)
 146 {
 147 
 148         return (&args->object);
 149 }
 150 
 151 void
 152 rfs3_setattr(SETATTR3args *args, SETATTR3res *resp, struct exportinfo *exi,
 153     struct svc_req *req, cred_t *cr, bool_t ro)
 154 {
 155         int error;
 156         vnode_t *vp;
 157         struct vattr *bvap;
 158         struct vattr bva;
 159         struct vattr *avap;
 160         struct vattr ava;
 161         int flag;
 162         int in_crit = 0;
 163         struct flock64 bf;
 164         caller_context_t ct;
 165 
 166         bvap = NULL;
 167         avap = NULL;
 168 
 169         vp = nfs3_fhtovp(&args->object, exi);
 170 
 171         DTRACE_NFSV3_4(op__setattr__start, struct svc_req *, req,
 172             cred_t *, cr, vnode_t *, vp, SETATTR3args *, args);
 173 
 174         if (vp == NULL) {
 175                 error = ESTALE;
 176                 goto out;
 177         }
 178 
 179         error = sattr3_to_vattr(&args->new_attributes, &ava);
 180         if (error)
 181                 goto out;
 182 
 183         if (is_system_labeled()) {
 184                 bslabel_t *clabel = req->rq_label;
 185 
 186                 ASSERT(clabel != NULL);
 187                 DTRACE_PROBE2(tx__rfs3__log__info__opsetattr__clabel, char *,
 188                     "got client label from request(1)", struct svc_req *, req);
 189 
 190                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
 191                         if (!do_rfs_label_check(clabel, vp, EQUALITY_CHECK,
 192                             exi)) {
 193                                 resp->status = NFS3ERR_ACCES;
 194                                 goto out1;
 195                         }
 196                 }
 197         }
 198 
 199         /*
 200          * We need to specially handle size changes because of
 201          * possible conflicting NBMAND locks. Get into critical
 202          * region before VOP_GETATTR, so the size attribute is
 203          * valid when checking conflicts.
 204          *
 205          * Also, check to see if the v4 side of the server has
 206          * delegated this file.  If so, then we return JUKEBOX to
 207          * allow the client to retrasmit its request.
 208          */
 209         if (vp->v_type == VREG && (ava.va_mask & AT_SIZE)) {
 210                 if (nbl_need_check(vp)) {
 211                         nbl_start_crit(vp, RW_READER);
 212                         in_crit = 1;
 213                 }
 214         }
 215 
 216         bva.va_mask = AT_ALL;
 217         error = rfs4_delegated_getattr(vp, &bva, 0, cr);
 218 
 219         /*
 220          * If we can't get the attributes, then we can't do the
 221          * right access checking.  So, we'll fail the request.
 222          */
 223         if (error)
 224                 goto out;
 225 
 226         bvap = &bva;
 227 
 228         if (rdonly(ro, vp)) {
 229                 resp->status = NFS3ERR_ROFS;
 230                 goto out1;
 231         }
 232 
 233         if (args->guard.check &&
 234             (args->guard.obj_ctime.seconds != bva.va_ctime.tv_sec ||
 235             args->guard.obj_ctime.nseconds != bva.va_ctime.tv_nsec)) {
 236                 resp->status = NFS3ERR_NOT_SYNC;
 237                 goto out1;
 238         }
 239 
 240         if (args->new_attributes.mtime.set_it == SET_TO_CLIENT_TIME)
 241                 flag = ATTR_UTIME;
 242         else
 243                 flag = 0;
 244 
 245         /*
 246          * If the filesystem is exported with nosuid, then mask off
 247          * the setuid and setgid bits.
 248          */
 249         if ((ava.va_mask & AT_MODE) && vp->v_type == VREG &&
 250             (exi->exi_export.ex_flags & EX_NOSUID))
 251                 ava.va_mode &= ~(VSUID | VSGID);
 252 
 253         ct.cc_sysid = 0;
 254         ct.cc_pid = 0;
 255         ct.cc_caller_id = nfs3_srv_caller_id;
 256         ct.cc_flags = CC_DONTBLOCK;
 257 
 258         /*
 259          * We need to specially handle size changes because it is
 260          * possible for the client to create a file with modes
 261          * which indicate read-only, but with the file opened for
 262          * writing.  If the client then tries to set the size of
 263          * the file, then the normal access checking done in
 264          * VOP_SETATTR would prevent the client from doing so,
 265          * although it should be legal for it to do so.  To get
 266          * around this, we do the access checking for ourselves
 267          * and then use VOP_SPACE which doesn't do the access
 268          * checking which VOP_SETATTR does. VOP_SPACE can only
 269          * operate on VREG files, let VOP_SETATTR handle the other
 270          * extremely rare cases.
 271          * Also the client should not be allowed to change the
 272          * size of the file if there is a conflicting non-blocking
 273          * mandatory lock in the region the change.
 274          */
 275         if (vp->v_type == VREG && (ava.va_mask & AT_SIZE)) {
 276                 if (in_crit) {
 277                         u_offset_t offset;
 278                         ssize_t length;
 279 
 280                         if (ava.va_size < bva.va_size) {
 281                                 offset = ava.va_size;
 282                                 length = bva.va_size - ava.va_size;
 283                         } else {
 284                                 offset = bva.va_size;
 285                                 length = ava.va_size - bva.va_size;
 286                         }
 287                         if (nbl_conflict(vp, NBL_WRITE, offset, length, 0,
 288                             NULL)) {
 289                                 error = EACCES;
 290                                 goto out;
 291                         }
 292                 }
 293 
 294                 if (crgetuid(cr) == bva.va_uid && ava.va_size != bva.va_size) {
 295                         ava.va_mask &= ~AT_SIZE;
 296                         bf.l_type = F_WRLCK;
 297                         bf.l_whence = 0;
 298                         bf.l_start = (off64_t)ava.va_size;
 299                         bf.l_len = 0;
 300                         bf.l_sysid = 0;
 301                         bf.l_pid = 0;
 302                         error = VOP_SPACE(vp, F_FREESP, &bf, FWRITE,
 303                             (offset_t)ava.va_size, cr, &ct);
 304                 }
 305         }
 306 
 307         if (!error && ava.va_mask)
 308                 error = VOP_SETATTR(vp, &ava, flag, cr, &ct);
 309 
 310         /* check if a monitor detected a delegation conflict */
 311         if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
 312                 resp->status = NFS3ERR_JUKEBOX;
 313                 goto out1;
 314         }
 315 
 316         ava.va_mask = AT_ALL;
 317         avap = rfs4_delegated_getattr(vp, &ava, 0, cr) ? NULL : &ava;
 318 
 319         /*
 320          * Force modified metadata out to stable storage.
 321          */
 322         (void) VOP_FSYNC(vp, FNODSYNC, cr, &ct);
 323 
 324         if (error)
 325                 goto out;
 326 
 327         if (in_crit)
 328                 nbl_end_crit(vp);
 329 
 330         resp->status = NFS3_OK;
 331         vattr_to_wcc_data(bvap, avap, &resp->resok.obj_wcc);
 332 
 333         DTRACE_NFSV3_4(op__setattr__done, struct svc_req *, req,
 334             cred_t *, cr, vnode_t *, vp, SETATTR3res *, resp);
 335 
 336         VN_RELE(vp);
 337 
 338         return;
 339 
 340 out:
 341         if (curthread->t_flag & T_WOULDBLOCK) {
 342                 curthread->t_flag &= ~T_WOULDBLOCK;
 343                 resp->status = NFS3ERR_JUKEBOX;
 344         } else
 345                 resp->status = puterrno3(error);
 346 out1:
 347         DTRACE_NFSV3_4(op__setattr__done, struct svc_req *, req,
 348             cred_t *, cr, vnode_t *, vp, SETATTR3res *, resp);
 349 
 350         if (vp != NULL) {
 351                 if (in_crit)
 352                         nbl_end_crit(vp);
 353                 VN_RELE(vp);
 354         }
 355         vattr_to_wcc_data(bvap, avap, &resp->resfail.obj_wcc);
 356 }
 357 
 358 void *
 359 rfs3_setattr_getfh(SETATTR3args *args)
 360 {
 361 
 362         return (&args->object);
 363 }
 364 
 365 /* ARGSUSED */
 366 void
 367 rfs3_lookup(LOOKUP3args *args, LOOKUP3res *resp, struct exportinfo *exi,
 368     struct svc_req *req, cred_t *cr, bool_t ro)
 369 {
 370         int error;
 371         vnode_t *vp;
 372         vnode_t *dvp;
 373         struct vattr *vap;
 374         struct vattr va;
 375         struct vattr *dvap;
 376         struct vattr dva;
 377         nfs_fh3 *fhp;
 378         struct sec_ol sec = {0, 0};
 379         bool_t publicfh_flag = FALSE, auth_weak = FALSE;
 380         struct sockaddr *ca;
 381         char *name = NULL;
 382 
 383         dvap = NULL;
 384 
 385         /*
 386          * Allow lookups from the root - the default
 387          * location of the public filehandle.
 388          */
 389         if (exi != NULL && (exi->exi_export.ex_flags & EX_PUBLIC)) {
 390                 dvp = rootdir;
 391                 VN_HOLD(dvp);
 392 
 393                 DTRACE_NFSV3_4(op__lookup__start, struct svc_req *, req,
 394                     cred_t *, cr, vnode_t *, dvp, LOOKUP3args *, args);
 395         } else {
 396                 dvp = nfs3_fhtovp(&args->what.dir, exi);
 397 
 398                 DTRACE_NFSV3_4(op__lookup__start, struct svc_req *, req,
 399                     cred_t *, cr, vnode_t *, dvp, LOOKUP3args *, args);
 400 
 401                 if (dvp == NULL) {
 402                         error = ESTALE;
 403                         goto out;
 404                 }
 405         }
 406 
 407         dva.va_mask = AT_ALL;
 408         dvap = VOP_GETATTR(dvp, &dva, 0, cr, NULL) ? NULL : &dva;
 409 
 410         if (args->what.name == nfs3nametoolong) {
 411                 resp->status = NFS3ERR_NAMETOOLONG;
 412                 goto out1;
 413         }
 414 
 415         if (args->what.name == NULL || *(args->what.name) == '\0') {
 416                 resp->status = NFS3ERR_ACCES;
 417                 goto out1;
 418         }
 419 
 420         fhp = &args->what.dir;
 421         if (strcmp(args->what.name, "..") == 0 &&
 422             EQFID(&exi->exi_fid, FH3TOFIDP(fhp))) {
 423                 resp->status = NFS3ERR_NOENT;
 424                 goto out1;
 425         }
 426 
 427         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
 428         name = nfscmd_convname(ca, exi, args->what.name,
 429             NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
 430 
 431         if (name == NULL) {
 432                 resp->status = NFS3ERR_ACCES;
 433                 goto out1;
 434         }
 435 
 436         /*
 437          * If the public filehandle is used then allow
 438          * a multi-component lookup
 439          */
 440         if (PUBLIC_FH3(&args->what.dir)) {
 441                 publicfh_flag = TRUE;
 442                 error = rfs_publicfh_mclookup(name, dvp, cr, &vp,
 443                     &exi, &sec);
 444                 if (error && exi != NULL)
 445                         exi_rele(exi); /* See comment below Re: publicfh_flag */
 446                 /*
 447                  * Since WebNFS may bypass MOUNT, we need to ensure this
 448                  * request didn't come from an unlabeled admin_low client.
 449                  */
 450                 if (is_system_labeled() && error == 0) {
 451                         int             addr_type;
 452                         void            *ipaddr;
 453                         tsol_tpc_t      *tp;
 454 
 455                         if (ca->sa_family == AF_INET) {
 456                                 addr_type = IPV4_VERSION;
 457                                 ipaddr = &((struct sockaddr_in *)ca)->sin_addr;
 458                         } else if (ca->sa_family == AF_INET6) {
 459                                 addr_type = IPV6_VERSION;
 460                                 ipaddr = &((struct sockaddr_in6 *)
 461                                     ca)->sin6_addr;
 462                         }
 463                         tp = find_tpc(ipaddr, addr_type, B_FALSE);
 464                         if (tp == NULL || tp->tpc_tp.tp_doi !=
 465                             l_admin_low->tsl_doi || tp->tpc_tp.host_type !=
 466                             SUN_CIPSO) {
 467                                 if (exi != NULL)
 468                                         exi_rele(exi);
 469                                 VN_RELE(vp);
 470                                 error = EACCES;
 471                         }
 472                         if (tp != NULL)
 473                                 TPC_RELE(tp);
 474                 }
 475         } else {
 476                 error = VOP_LOOKUP(dvp, name, &vp,
 477                     NULL, 0, NULL, cr, NULL, NULL, NULL);
 478         }
 479 
 480         if (name != args->what.name)
 481                 kmem_free(name, MAXPATHLEN + 1);
 482 
 483         if (is_system_labeled() && error == 0) {
 484                 bslabel_t *clabel = req->rq_label;
 485 
 486                 ASSERT(clabel != NULL);
 487                 DTRACE_PROBE2(tx__rfs3__log__info__oplookup__clabel, char *,
 488                     "got client label from request(1)", struct svc_req *, req);
 489 
 490                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
 491                         if (!do_rfs_label_check(clabel, dvp,
 492                             DOMINANCE_CHECK, exi)) {
 493                                 if (publicfh_flag && exi != NULL)
 494                                         exi_rele(exi);
 495                                 VN_RELE(vp);
 496                                 error = EACCES;
 497                         }
 498                 }
 499         }
 500 
 501         dva.va_mask = AT_ALL;
 502         dvap = VOP_GETATTR(dvp, &dva, 0, cr, NULL) ? NULL : &dva;
 503 
 504         if (error)
 505                 goto out;
 506 
 507         if (sec.sec_flags & SEC_QUERY) {
 508                 error = makefh3_ol(&resp->resok.object, exi, sec.sec_index);
 509         } else {
 510                 error = makefh3(&resp->resok.object, vp, exi);
 511                 if (!error && publicfh_flag && !chk_clnt_sec(exi, req))
 512                         auth_weak = TRUE;
 513         }
 514 
 515         /*
 516          * If publicfh_flag is true then we have called rfs_publicfh_mclookup
 517          * and have obtained a new exportinfo in exi which needs to be
 518          * released. Note that the original exportinfo pointed to by exi
 519          * will be released by the caller, common_dispatch.
 520          */
 521         if (publicfh_flag)
 522                 exi_rele(exi);
 523 
 524         if (error) {
 525                 VN_RELE(vp);
 526                 goto out;
 527         }
 528 
 529         va.va_mask = AT_ALL;
 530         vap = rfs4_delegated_getattr(vp, &va, 0, cr) ? NULL : &va;
 531 
 532         VN_RELE(vp);
 533 
 534         resp->status = NFS3_OK;
 535         vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
 536         vattr_to_post_op_attr(dvap, &resp->resok.dir_attributes);
 537 
 538         /*
 539          * If it's public fh, no 0x81, and client's flavor is
 540          * invalid, set WebNFS status to WNFSERR_CLNT_FLAVOR now.
 541          * Then set RPC status to AUTH_TOOWEAK in common_dispatch.
 542          */
 543         if (auth_weak)
 544                 resp->status = (enum nfsstat3)WNFSERR_CLNT_FLAVOR;
 545 
 546         DTRACE_NFSV3_4(op__lookup__done, struct svc_req *, req,
 547             cred_t *, cr, vnode_t *, dvp, LOOKUP3res *, resp);
 548         VN_RELE(dvp);
 549 
 550         return;
 551 
 552 out:
 553         if (curthread->t_flag & T_WOULDBLOCK) {
 554                 curthread->t_flag &= ~T_WOULDBLOCK;
 555                 resp->status = NFS3ERR_JUKEBOX;
 556         } else
 557                 resp->status = puterrno3(error);
 558 out1:
 559         DTRACE_NFSV3_4(op__lookup__done, struct svc_req *, req,
 560             cred_t *, cr, vnode_t *, dvp, LOOKUP3res *, resp);
 561 
 562         if (dvp != NULL)
 563                 VN_RELE(dvp);
 564         vattr_to_post_op_attr(dvap, &resp->resfail.dir_attributes);
 565 
 566 }
 567 
 568 void *
 569 rfs3_lookup_getfh(LOOKUP3args *args)
 570 {
 571 
 572         return (&args->what.dir);
 573 }
 574 
 575 /* ARGSUSED */
 576 void
 577 rfs3_access(ACCESS3args *args, ACCESS3res *resp, struct exportinfo *exi,
 578     struct svc_req *req, cred_t *cr, bool_t ro)
 579 {
 580         int error;
 581         vnode_t *vp;
 582         struct vattr *vap;
 583         struct vattr va;
 584         int checkwriteperm;
 585         boolean_t dominant_label = B_FALSE;
 586         boolean_t equal_label = B_FALSE;
 587         boolean_t admin_low_client;
 588 
 589         vap = NULL;
 590 
 591         vp = nfs3_fhtovp(&args->object, exi);
 592 
 593         DTRACE_NFSV3_4(op__access__start, struct svc_req *, req,
 594             cred_t *, cr, vnode_t *, vp, ACCESS3args *, args);
 595 
 596         if (vp == NULL) {
 597                 error = ESTALE;
 598                 goto out;
 599         }
 600 
 601         /*
 602          * If the file system is exported read only, it is not appropriate
 603          * to check write permissions for regular files and directories.
 604          * Special files are interpreted by the client, so the underlying
 605          * permissions are sent back to the client for interpretation.
 606          */
 607         if (rdonly(ro, vp) && (vp->v_type == VREG || vp->v_type == VDIR))
 608                 checkwriteperm = 0;
 609         else
 610                 checkwriteperm = 1;
 611 
 612         /*
 613          * We need the mode so that we can correctly determine access
 614          * permissions relative to a mandatory lock file.  Access to
 615          * mandatory lock files is denied on the server, so it might
 616          * as well be reflected to the server during the open.
 617          */
 618         va.va_mask = AT_MODE;
 619         error = VOP_GETATTR(vp, &va, 0, cr, NULL);
 620         if (error)
 621                 goto out;
 622 
 623         vap = &va;
 624 
 625         resp->resok.access = 0;
 626 
 627         if (is_system_labeled()) {
 628                 bslabel_t *clabel = req->rq_label;
 629 
 630                 ASSERT(clabel != NULL);
 631                 DTRACE_PROBE2(tx__rfs3__log__info__opaccess__clabel, char *,
 632                     "got client label from request(1)", struct svc_req *, req);
 633 
 634                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
 635                         if ((equal_label = do_rfs_label_check(clabel, vp,
 636                             EQUALITY_CHECK, exi)) == B_FALSE) {
 637                                 dominant_label = do_rfs_label_check(clabel,
 638                                     vp, DOMINANCE_CHECK, exi);
 639                         } else
 640                                 dominant_label = B_TRUE;
 641                         admin_low_client = B_FALSE;
 642                 } else
 643                         admin_low_client = B_TRUE;
 644         }
 645 
 646         if (args->access & ACCESS3_READ) {
 647                 error = VOP_ACCESS(vp, VREAD, 0, cr, NULL);
 648                 if (error) {
 649                         if (curthread->t_flag & T_WOULDBLOCK)
 650                                 goto out;
 651                 } else if (!MANDLOCK(vp, va.va_mode) &&
 652                     (!is_system_labeled() || admin_low_client ||
 653                     dominant_label))
 654                         resp->resok.access |= ACCESS3_READ;
 655         }
 656         if ((args->access & ACCESS3_LOOKUP) && vp->v_type == VDIR) {
 657                 error = VOP_ACCESS(vp, VEXEC, 0, cr, NULL);
 658                 if (error) {
 659                         if (curthread->t_flag & T_WOULDBLOCK)
 660                                 goto out;
 661                 } else if (!is_system_labeled() || admin_low_client ||
 662                     dominant_label)
 663                         resp->resok.access |= ACCESS3_LOOKUP;
 664         }
 665         if (checkwriteperm &&
 666             (args->access & (ACCESS3_MODIFY|ACCESS3_EXTEND))) {
 667                 error = VOP_ACCESS(vp, VWRITE, 0, cr, NULL);
 668                 if (error) {
 669                         if (curthread->t_flag & T_WOULDBLOCK)
 670                                 goto out;
 671                 } else if (!MANDLOCK(vp, va.va_mode) &&
 672                     (!is_system_labeled() || admin_low_client || equal_label)) {
 673                         resp->resok.access |=
 674                             (args->access & (ACCESS3_MODIFY|ACCESS3_EXTEND));
 675                 }
 676         }
 677         if (checkwriteperm &&
 678             (args->access & ACCESS3_DELETE) && vp->v_type == VDIR) {
 679                 error = VOP_ACCESS(vp, VWRITE, 0, cr, NULL);
 680                 if (error) {
 681                         if (curthread->t_flag & T_WOULDBLOCK)
 682                                 goto out;
 683                 } else if (!is_system_labeled() || admin_low_client ||
 684                     equal_label)
 685                         resp->resok.access |= ACCESS3_DELETE;
 686         }
 687         if (args->access & ACCESS3_EXECUTE) {
 688                 error = VOP_ACCESS(vp, VEXEC, 0, cr, NULL);
 689                 if (error) {
 690                         if (curthread->t_flag & T_WOULDBLOCK)
 691                                 goto out;
 692                 } else if (!MANDLOCK(vp, va.va_mode) &&
 693                     (!is_system_labeled() || admin_low_client ||
 694                     dominant_label))
 695                         resp->resok.access |= ACCESS3_EXECUTE;
 696         }
 697 
 698         va.va_mask = AT_ALL;
 699         vap = rfs4_delegated_getattr(vp, &va, 0, cr) ? NULL : &va;
 700 
 701         resp->status = NFS3_OK;
 702         vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
 703 
 704         DTRACE_NFSV3_4(op__access__done, struct svc_req *, req,
 705             cred_t *, cr, vnode_t *, vp, ACCESS3res *, resp);
 706 
 707         VN_RELE(vp);
 708 
 709         return;
 710 
 711 out:
 712         if (curthread->t_flag & T_WOULDBLOCK) {
 713                 curthread->t_flag &= ~T_WOULDBLOCK;
 714                 resp->status = NFS3ERR_JUKEBOX;
 715         } else
 716                 resp->status = puterrno3(error);
 717         DTRACE_NFSV3_4(op__access__done, struct svc_req *, req,
 718             cred_t *, cr, vnode_t *, vp, ACCESS3res *, resp);
 719         if (vp != NULL)
 720                 VN_RELE(vp);
 721         vattr_to_post_op_attr(vap, &resp->resfail.obj_attributes);
 722 }
 723 
 724 void *
 725 rfs3_access_getfh(ACCESS3args *args)
 726 {
 727 
 728         return (&args->object);
 729 }
 730 
 731 /* ARGSUSED */
 732 void
 733 rfs3_readlink(READLINK3args *args, READLINK3res *resp, struct exportinfo *exi,
 734     struct svc_req *req, cred_t *cr, bool_t ro)
 735 {
 736         int error;
 737         vnode_t *vp;
 738         struct vattr *vap;
 739         struct vattr va;
 740         struct iovec iov;
 741         struct uio uio;
 742         char *data;
 743         struct sockaddr *ca;
 744         char *name = NULL;
 745         int is_referral = 0;
 746 
 747         vap = NULL;
 748 
 749         vp = nfs3_fhtovp(&args->symlink, exi);
 750 
 751         DTRACE_NFSV3_4(op__readlink__start, struct svc_req *, req,
 752             cred_t *, cr, vnode_t *, vp, READLINK3args *, args);
 753 
 754         if (vp == NULL) {
 755                 error = ESTALE;
 756                 goto out;
 757         }
 758 
 759         va.va_mask = AT_ALL;
 760         error = VOP_GETATTR(vp, &va, 0, cr, NULL);
 761         if (error)
 762                 goto out;
 763 
 764         vap = &va;
 765 
 766         /* We lied about the object type for a referral */
 767         if (vn_is_nfs_reparse(vp, cr))
 768                 is_referral = 1;
 769 
 770         if (vp->v_type != VLNK && !is_referral) {
 771                 resp->status = NFS3ERR_INVAL;
 772                 goto out1;
 773         }
 774 
 775         if (MANDLOCK(vp, va.va_mode)) {
 776                 resp->status = NFS3ERR_ACCES;
 777                 goto out1;
 778         }
 779 
 780         if (is_system_labeled()) {
 781                 bslabel_t *clabel = req->rq_label;
 782 
 783                 ASSERT(clabel != NULL);
 784                 DTRACE_PROBE2(tx__rfs3__log__info__opreadlink__clabel, char *,
 785                     "got client label from request(1)", struct svc_req *, req);
 786 
 787                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
 788                         if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
 789                             exi)) {
 790                                 resp->status = NFS3ERR_ACCES;
 791                                 goto out1;
 792                         }
 793                 }
 794         }
 795 
 796         data = kmem_alloc(MAXPATHLEN + 1, KM_SLEEP);
 797 
 798         if (is_referral) {
 799                 char *s;
 800                 size_t strsz;
 801 
 802                 /* Get an artificial symlink based on a referral */
 803                 s = build_symlink(vp, cr, &strsz);
 804                 global_svstat_ptr[3][NFS_REFERLINKS].value.ui64++;
 805                 DTRACE_PROBE2(nfs3serv__func__referral__reflink,
 806                     vnode_t *, vp, char *, s);
 807                 if (s == NULL)
 808                         error = EINVAL;
 809                 else {
 810                         error = 0;
 811                         (void) strlcpy(data, s, MAXPATHLEN + 1);
 812                         kmem_free(s, strsz);
 813                 }
 814 
 815         } else {
 816 
 817                 iov.iov_base = data;
 818                 iov.iov_len = MAXPATHLEN;
 819                 uio.uio_iov = &iov;
 820                 uio.uio_iovcnt = 1;
 821                 uio.uio_segflg = UIO_SYSSPACE;
 822                 uio.uio_extflg = UIO_COPY_CACHED;
 823                 uio.uio_loffset = 0;
 824                 uio.uio_resid = MAXPATHLEN;
 825 
 826                 error = VOP_READLINK(vp, &uio, cr, NULL);
 827 
 828                 if (!error)
 829                         *(data + MAXPATHLEN - uio.uio_resid) = '\0';
 830         }
 831 
 832         va.va_mask = AT_ALL;
 833         vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
 834 
 835         /* Lie about object type again just to be consistent */
 836         if (is_referral && vap != NULL)
 837                 vap->va_type = VLNK;
 838 
 839 #if 0 /* notyet */
 840         /*
 841          * Don't do this.  It causes local disk writes when just
 842          * reading the file and the overhead is deemed larger
 843          * than the benefit.
 844          */
 845         /*
 846          * Force modified metadata out to stable storage.
 847          */
 848         (void) VOP_FSYNC(vp, FNODSYNC, cr, NULL);
 849 #endif
 850 
 851         if (error) {
 852                 kmem_free(data, MAXPATHLEN + 1);
 853                 goto out;
 854         }
 855 
 856         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
 857         name = nfscmd_convname(ca, exi, data, NFSCMD_CONV_OUTBOUND,
 858             MAXPATHLEN + 1);
 859 
 860         if (name == NULL) {
 861                 /*
 862                  * Even though the conversion failed, we return
 863                  * something. We just don't translate it.
 864                  */
 865                 name = data;
 866         }
 867 
 868         resp->status = NFS3_OK;
 869         vattr_to_post_op_attr(vap, &resp->resok.symlink_attributes);
 870         resp->resok.data = name;
 871 
 872         DTRACE_NFSV3_4(op__readlink__done, struct svc_req *, req,
 873             cred_t *, cr, vnode_t *, vp, READLINK3res *, resp);
 874         VN_RELE(vp);
 875 
 876         if (name != data)
 877                 kmem_free(data, MAXPATHLEN + 1);
 878 
 879         return;
 880 
 881 out:
 882         if (curthread->t_flag & T_WOULDBLOCK) {
 883                 curthread->t_flag &= ~T_WOULDBLOCK;
 884                 resp->status = NFS3ERR_JUKEBOX;
 885         } else
 886                 resp->status = puterrno3(error);
 887 out1:
 888         DTRACE_NFSV3_4(op__readlink__done, struct svc_req *, req,
 889             cred_t *, cr, vnode_t *, vp, READLINK3res *, resp);
 890         if (vp != NULL)
 891                 VN_RELE(vp);
 892         vattr_to_post_op_attr(vap, &resp->resfail.symlink_attributes);
 893 }
 894 
 895 void *
 896 rfs3_readlink_getfh(READLINK3args *args)
 897 {
 898 
 899         return (&args->symlink);
 900 }
 901 
 902 void
 903 rfs3_readlink_free(READLINK3res *resp)
 904 {
 905 
 906         if (resp->status == NFS3_OK)
 907                 kmem_free(resp->resok.data, MAXPATHLEN + 1);
 908 }
 909 
 910 /*
 911  * Server routine to handle read
 912  * May handle RDMA data as well as mblks
 913  */
 914 /* ARGSUSED */
 915 void
 916 rfs3_read(READ3args *args, READ3res *resp, struct exportinfo *exi,
 917     struct svc_req *req, cred_t *cr, bool_t ro)
 918 {
 919         int error;
 920         vnode_t *vp;
 921         struct vattr *vap;
 922         struct vattr va;
 923         struct iovec iov, *iovp = NULL;
 924         int iovcnt;
 925         struct uio uio;
 926         u_offset_t offset;
 927         mblk_t *mp = NULL;
 928         int in_crit = 0;
 929         int need_rwunlock = 0;
 930         caller_context_t ct;
 931         int rdma_used = 0;
 932         int loaned_buffers;
 933         struct uio *uiop;
 934 
 935         vap = NULL;
 936 
 937         vp = nfs3_fhtovp(&args->file, exi);
 938 
 939         DTRACE_NFSV3_4(op__read__start, struct svc_req *, req,
 940             cred_t *, cr, vnode_t *, vp, READ3args *, args);
 941 
 942         if (vp == NULL) {
 943                 error = ESTALE;
 944                 goto out;
 945         }
 946 
 947         if (args->wlist) {
 948                 if (args->count > clist_len(args->wlist)) {
 949                         error = EINVAL;
 950                         goto out;
 951                 }
 952                 rdma_used = 1;
 953         }
 954 
 955         /* use loaned buffers for TCP */
 956         loaned_buffers = (nfs_loaned_buffers && !rdma_used) ? 1 : 0;
 957 
 958         if (is_system_labeled()) {
 959                 bslabel_t *clabel = req->rq_label;
 960 
 961                 ASSERT(clabel != NULL);
 962                 DTRACE_PROBE2(tx__rfs3__log__info__opread__clabel, char *,
 963                     "got client label from request(1)", struct svc_req *, req);
 964 
 965                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
 966                         if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
 967                             exi)) {
 968                                 resp->status = NFS3ERR_ACCES;
 969                                 goto out1;
 970                         }
 971                 }
 972         }
 973 
 974         ct.cc_sysid = 0;
 975         ct.cc_pid = 0;
 976         ct.cc_caller_id = nfs3_srv_caller_id;
 977         ct.cc_flags = CC_DONTBLOCK;
 978 
 979         /*
 980          * Enter the critical region before calling VOP_RWLOCK
 981          * to avoid a deadlock with write requests.
 982          */
 983         if (nbl_need_check(vp)) {
 984                 nbl_start_crit(vp, RW_READER);
 985                 in_crit = 1;
 986                 if (nbl_conflict(vp, NBL_READ, args->offset, args->count, 0,
 987                     NULL)) {
 988                         error = EACCES;
 989                         goto out;
 990                 }
 991         }
 992 
 993         error = VOP_RWLOCK(vp, V_WRITELOCK_FALSE, &ct);
 994 
 995         /* check if a monitor detected a delegation conflict */
 996         if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
 997                 resp->status = NFS3ERR_JUKEBOX;
 998                 goto out1;
 999         }
1000 
1001         need_rwunlock = 1;
1002 
1003         va.va_mask = AT_ALL;
1004         error = VOP_GETATTR(vp, &va, 0, cr, &ct);
1005 
1006         /*
1007          * If we can't get the attributes, then we can't do the
1008          * right access checking.  So, we'll fail the request.
1009          */
1010         if (error)
1011                 goto out;
1012 
1013         vap = &va;
1014 
1015         if (vp->v_type != VREG) {
1016                 resp->status = NFS3ERR_INVAL;
1017                 goto out1;
1018         }
1019 
1020         if (crgetuid(cr) != va.va_uid) {
1021                 error = VOP_ACCESS(vp, VREAD, 0, cr, &ct);
1022                 if (error) {
1023                         if (curthread->t_flag & T_WOULDBLOCK)
1024                                 goto out;
1025                         error = VOP_ACCESS(vp, VEXEC, 0, cr, &ct);
1026                         if (error)
1027                                 goto out;
1028                 }
1029         }
1030 
1031         if (MANDLOCK(vp, va.va_mode)) {
1032                 resp->status = NFS3ERR_ACCES;
1033                 goto out1;
1034         }
1035 
1036         offset = args->offset;
1037         if (offset >= va.va_size) {
1038                 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
1039                 if (in_crit)
1040                         nbl_end_crit(vp);
1041                 resp->status = NFS3_OK;
1042                 vattr_to_post_op_attr(vap, &resp->resok.file_attributes);
1043                 resp->resok.count = 0;
1044                 resp->resok.eof = TRUE;
1045                 resp->resok.data.data_len = 0;
1046                 resp->resok.data.data_val = NULL;
1047                 resp->resok.data.mp = NULL;
1048                 /* RDMA */
1049                 resp->resok.wlist = args->wlist;
1050                 resp->resok.wlist_len = resp->resok.count;
1051                 if (resp->resok.wlist)
1052                         clist_zero_len(resp->resok.wlist);
1053                 goto done;
1054         }
1055 
1056         if (args->count == 0) {
1057                 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
1058                 if (in_crit)
1059                         nbl_end_crit(vp);
1060                 resp->status = NFS3_OK;
1061                 vattr_to_post_op_attr(vap, &resp->resok.file_attributes);
1062                 resp->resok.count = 0;
1063                 resp->resok.eof = FALSE;
1064                 resp->resok.data.data_len = 0;
1065                 resp->resok.data.data_val = NULL;
1066                 resp->resok.data.mp = NULL;
1067                 /* RDMA */
1068                 resp->resok.wlist = args->wlist;
1069                 resp->resok.wlist_len = resp->resok.count;
1070                 if (resp->resok.wlist)
1071                         clist_zero_len(resp->resok.wlist);
1072                 goto done;
1073         }
1074 
1075         /*
1076          * do not allocate memory more the max. allowed
1077          * transfer size
1078          */
1079         if (args->count > rfs3_tsize(req))
1080                 args->count = rfs3_tsize(req);
1081 
1082         if (loaned_buffers) {
1083                 uiop = (uio_t *)rfs_setup_xuio(vp);
1084                 ASSERT(uiop != NULL);
1085                 uiop->uio_segflg = UIO_SYSSPACE;
1086                 uiop->uio_loffset = args->offset;
1087                 uiop->uio_resid = args->count;
1088 
1089                 /* Jump to do the read if successful */
1090                 if (VOP_REQZCBUF(vp, UIO_READ, (xuio_t *)uiop, cr, &ct) == 0) {
1091                         /*
1092                          * Need to hold the vnode until after VOP_RETZCBUF()
1093                          * is called.
1094                          */
1095                         VN_HOLD(vp);
1096                         goto doio_read;
1097                 }
1098 
1099                 DTRACE_PROBE2(nfss__i__reqzcbuf_failed, int,
1100                     uiop->uio_loffset, int, uiop->uio_resid);
1101 
1102                 uiop->uio_extflg = 0;
1103                 /* failure to setup for zero copy */
1104                 rfs_free_xuio((void *)uiop);
1105                 loaned_buffers = 0;
1106         }
1107 
1108         /*
1109          * If returning data via RDMA Write, then grab the chunk list.
1110          * If we aren't returning READ data w/RDMA_WRITE, then grab
1111          * a mblk.
1112          */
1113         if (rdma_used) {
1114                 (void) rdma_get_wchunk(req, &iov, args->wlist);
1115                 uio.uio_iov = &iov;
1116                 uio.uio_iovcnt = 1;
1117         } else {
1118                 /*
1119                  * mp will contain the data to be sent out in the read reply.
1120                  * For UDP, this will be freed after the reply has been sent
1121                  * out by the driver.  For TCP, it will be freed after the last
1122                  * segment associated with the reply has been ACKed by the
1123                  * client.
1124                  */
1125                 mp = rfs_read_alloc(args->count, &iovp, &iovcnt);
1126                 uio.uio_iov = iovp;
1127                 uio.uio_iovcnt = iovcnt;
1128         }
1129 
1130         uio.uio_segflg = UIO_SYSSPACE;
1131         uio.uio_extflg = UIO_COPY_CACHED;
1132         uio.uio_loffset = args->offset;
1133         uio.uio_resid = args->count;
1134         uiop = &uio;
1135 
1136 doio_read:
1137         error = VOP_READ(vp, uiop, 0, cr, &ct);
1138 
1139         if (error) {
1140                 if (mp)
1141                         freemsg(mp);
1142                 /* check if a monitor detected a delegation conflict */
1143                 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
1144                         resp->status = NFS3ERR_JUKEBOX;
1145                         goto out1;
1146                 }
1147                 goto out;
1148         }
1149 
1150         /* make mblk using zc buffers */
1151         if (loaned_buffers) {
1152                 mp = uio_to_mblk(uiop);
1153                 ASSERT(mp != NULL);
1154         }
1155 
1156         va.va_mask = AT_ALL;
1157         error = VOP_GETATTR(vp, &va, 0, cr, &ct);
1158 
1159         if (error)
1160                 vap = NULL;
1161         else
1162                 vap = &va;
1163 
1164         VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
1165 
1166         if (in_crit)
1167                 nbl_end_crit(vp);
1168 
1169         resp->status = NFS3_OK;
1170         vattr_to_post_op_attr(vap, &resp->resok.file_attributes);
1171         resp->resok.count = args->count - uiop->uio_resid;
1172         if (!error && offset + resp->resok.count == va.va_size)
1173                 resp->resok.eof = TRUE;
1174         else
1175                 resp->resok.eof = FALSE;
1176         resp->resok.data.data_len = resp->resok.count;
1177 
1178         if (mp)
1179                 rfs_rndup_mblks(mp, resp->resok.count, loaned_buffers);
1180 
1181         resp->resok.data.mp = mp;
1182         resp->resok.size = (uint_t)args->count;
1183 
1184         if (rdma_used) {
1185                 resp->resok.data.data_val = (caddr_t)iov.iov_base;
1186                 if (!rdma_setup_read_data3(args, &(resp->resok))) {
1187                         resp->status = NFS3ERR_INVAL;
1188                 }
1189         } else {
1190                 resp->resok.data.data_val = (caddr_t)mp->b_datap->db_base;
1191                 (resp->resok).wlist = NULL;
1192         }
1193 
1194 done:
1195         DTRACE_NFSV3_4(op__read__done, struct svc_req *, req,
1196             cred_t *, cr, vnode_t *, vp, READ3res *, resp);
1197 
1198         VN_RELE(vp);
1199 
1200         if (iovp != NULL)
1201                 kmem_free(iovp, iovcnt * sizeof (struct iovec));
1202 
1203         return;
1204 
1205 out:
1206         if (curthread->t_flag & T_WOULDBLOCK) {
1207                 curthread->t_flag &= ~T_WOULDBLOCK;
1208                 resp->status = NFS3ERR_JUKEBOX;
1209         } else
1210                 resp->status = puterrno3(error);
1211 out1:
1212         DTRACE_NFSV3_4(op__read__done, struct svc_req *, req,
1213             cred_t *, cr, vnode_t *, vp, READ3res *, resp);
1214 
1215         if (vp != NULL) {
1216                 if (need_rwunlock)
1217                         VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
1218                 if (in_crit)
1219                         nbl_end_crit(vp);
1220                 VN_RELE(vp);
1221         }
1222         vattr_to_post_op_attr(vap, &resp->resfail.file_attributes);
1223 
1224         if (iovp != NULL)
1225                 kmem_free(iovp, iovcnt * sizeof (struct iovec));
1226 }
1227 
1228 void
1229 rfs3_read_free(READ3res *resp)
1230 {
1231         mblk_t *mp;
1232 
1233         if (resp->status == NFS3_OK) {
1234                 mp = resp->resok.data.mp;
1235                 if (mp != NULL)
1236                         freemsg(mp);
1237         }
1238 }
1239 
1240 void *
1241 rfs3_read_getfh(READ3args *args)
1242 {
1243 
1244         return (&args->file);
1245 }
1246 
1247 #define MAX_IOVECS      12
1248 
1249 #ifdef DEBUG
1250 static int rfs3_write_hits = 0;
1251 static int rfs3_write_misses = 0;
1252 #endif
1253 
1254 void
1255 rfs3_write(WRITE3args *args, WRITE3res *resp, struct exportinfo *exi,
1256     struct svc_req *req, cred_t *cr, bool_t ro)
1257 {
1258         int error;
1259         vnode_t *vp;
1260         struct vattr *bvap = NULL;
1261         struct vattr bva;
1262         struct vattr *avap = NULL;
1263         struct vattr ava;
1264         u_offset_t rlimit;
1265         struct uio uio;
1266         struct iovec iov[MAX_IOVECS];
1267         mblk_t *m;
1268         struct iovec *iovp;
1269         int iovcnt;
1270         int ioflag;
1271         cred_t *savecred;
1272         int in_crit = 0;
1273         int rwlock_ret = -1;
1274         caller_context_t ct;
1275 
1276         vp = nfs3_fhtovp(&args->file, exi);
1277 
1278         DTRACE_NFSV3_4(op__write__start, struct svc_req *, req,
1279             cred_t *, cr, vnode_t *, vp, WRITE3args *, args);
1280 
1281         if (vp == NULL) {
1282                 error = ESTALE;
1283                 goto err;
1284         }
1285 
1286         if (is_system_labeled()) {
1287                 bslabel_t *clabel = req->rq_label;
1288 
1289                 ASSERT(clabel != NULL);
1290                 DTRACE_PROBE2(tx__rfs3__log__info__opwrite__clabel, char *,
1291                     "got client label from request(1)", struct svc_req *, req);
1292 
1293                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
1294                         if (!do_rfs_label_check(clabel, vp, EQUALITY_CHECK,
1295                             exi)) {
1296                                 resp->status = NFS3ERR_ACCES;
1297                                 goto err1;
1298                         }
1299                 }
1300         }
1301 
1302         ct.cc_sysid = 0;
1303         ct.cc_pid = 0;
1304         ct.cc_caller_id = nfs3_srv_caller_id;
1305         ct.cc_flags = CC_DONTBLOCK;
1306 
1307         /*
1308          * We have to enter the critical region before calling VOP_RWLOCK
1309          * to avoid a deadlock with ufs.
1310          */
1311         if (nbl_need_check(vp)) {
1312                 nbl_start_crit(vp, RW_READER);
1313                 in_crit = 1;
1314                 if (nbl_conflict(vp, NBL_WRITE, args->offset, args->count, 0,
1315                     NULL)) {
1316                         error = EACCES;
1317                         goto err;
1318                 }
1319         }
1320 
1321         rwlock_ret = VOP_RWLOCK(vp, V_WRITELOCK_TRUE, &ct);
1322 
1323         /* check if a monitor detected a delegation conflict */
1324         if (rwlock_ret == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
1325                 resp->status = NFS3ERR_JUKEBOX;
1326                 rwlock_ret = -1;
1327                 goto err1;
1328         }
1329 
1330 
1331         bva.va_mask = AT_ALL;
1332         error = VOP_GETATTR(vp, &bva, 0, cr, &ct);
1333 
1334         /*
1335          * If we can't get the attributes, then we can't do the
1336          * right access checking.  So, we'll fail the request.
1337          */
1338         if (error)
1339                 goto err;
1340 
1341         bvap = &bva;
1342         avap = bvap;
1343 
1344         if (args->count != args->data.data_len) {
1345                 resp->status = NFS3ERR_INVAL;
1346                 goto err1;
1347         }
1348 
1349         if (rdonly(ro, vp)) {
1350                 resp->status = NFS3ERR_ROFS;
1351                 goto err1;
1352         }
1353 
1354         if (vp->v_type != VREG) {
1355                 resp->status = NFS3ERR_INVAL;
1356                 goto err1;
1357         }
1358 
1359         if (crgetuid(cr) != bva.va_uid &&
1360             (error = VOP_ACCESS(vp, VWRITE, 0, cr, &ct)))
1361                 goto err;
1362 
1363         if (MANDLOCK(vp, bva.va_mode)) {
1364                 resp->status = NFS3ERR_ACCES;
1365                 goto err1;
1366         }
1367 
1368         if (args->count == 0) {
1369                 resp->status = NFS3_OK;
1370                 vattr_to_wcc_data(bvap, avap, &resp->resok.file_wcc);
1371                 resp->resok.count = 0;
1372                 resp->resok.committed = args->stable;
1373                 resp->resok.verf = write3verf;
1374                 goto out;
1375         }
1376 
1377         if (args->mblk != NULL) {
1378                 iovcnt = 0;
1379                 for (m = args->mblk; m != NULL; m = m->b_cont)
1380                         iovcnt++;
1381                 if (iovcnt <= MAX_IOVECS) {
1382 #ifdef DEBUG
1383                         rfs3_write_hits++;
1384 #endif
1385                         iovp = iov;
1386                 } else {
1387 #ifdef DEBUG
1388                         rfs3_write_misses++;
1389 #endif
1390                         iovp = kmem_alloc(sizeof (*iovp) * iovcnt, KM_SLEEP);
1391                 }
1392                 mblk_to_iov(args->mblk, iovcnt, iovp);
1393 
1394         } else if (args->rlist != NULL) {
1395                 iovcnt = 1;
1396                 iovp = iov;
1397                 iovp->iov_base = (char *)((args->rlist)->u.c_daddr3);
1398                 iovp->iov_len = args->count;
1399         } else {
1400                 iovcnt = 1;
1401                 iovp = iov;
1402                 iovp->iov_base = args->data.data_val;
1403                 iovp->iov_len = args->count;
1404         }
1405 
1406         uio.uio_iov = iovp;
1407         uio.uio_iovcnt = iovcnt;
1408 
1409         uio.uio_segflg = UIO_SYSSPACE;
1410         uio.uio_extflg = UIO_COPY_DEFAULT;
1411         uio.uio_loffset = args->offset;
1412         uio.uio_resid = args->count;
1413         uio.uio_llimit = curproc->p_fsz_ctl;
1414         rlimit = uio.uio_llimit - args->offset;
1415         if (rlimit < (u_offset_t)uio.uio_resid)
1416                 uio.uio_resid = (int)rlimit;
1417 
1418         if (args->stable == UNSTABLE)
1419                 ioflag = 0;
1420         else if (args->stable == FILE_SYNC)
1421                 ioflag = FSYNC;
1422         else if (args->stable == DATA_SYNC)
1423                 ioflag = FDSYNC;
1424         else {
1425                 if (iovp != iov)
1426                         kmem_free(iovp, sizeof (*iovp) * iovcnt);
1427                 resp->status = NFS3ERR_INVAL;
1428                 goto err1;
1429         }
1430 
1431         /*
1432          * We're changing creds because VM may fault and we need
1433          * the cred of the current thread to be used if quota
1434          * checking is enabled.
1435          */
1436         savecred = curthread->t_cred;
1437         curthread->t_cred = cr;
1438         error = VOP_WRITE(vp, &uio, ioflag, cr, &ct);
1439         curthread->t_cred = savecred;
1440 
1441         if (iovp != iov)
1442                 kmem_free(iovp, sizeof (*iovp) * iovcnt);
1443 
1444         /* check if a monitor detected a delegation conflict */
1445         if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
1446                 resp->status = NFS3ERR_JUKEBOX;
1447                 goto err1;
1448         }
1449 
1450         ava.va_mask = AT_ALL;
1451         avap = VOP_GETATTR(vp, &ava, 0, cr, &ct) ? NULL : &ava;
1452 
1453         if (error)
1454                 goto err;
1455 
1456         /*
1457          * If we were unable to get the V_WRITELOCK_TRUE, then we
1458          * may not have accurate after attrs, so check if
1459          * we have both attributes, they have a non-zero va_seq, and
1460          * va_seq has changed by exactly one,
1461          * if not, turn off the before attr.
1462          */
1463         if (rwlock_ret != V_WRITELOCK_TRUE) {
1464                 if (bvap == NULL || avap == NULL ||
1465                     bvap->va_seq == 0 || avap->va_seq == 0 ||
1466                     avap->va_seq != (bvap->va_seq + 1)) {
1467                         bvap = NULL;
1468                 }
1469         }
1470 
1471         resp->status = NFS3_OK;
1472         vattr_to_wcc_data(bvap, avap, &resp->resok.file_wcc);
1473         resp->resok.count = args->count - uio.uio_resid;
1474         resp->resok.committed = args->stable;
1475         resp->resok.verf = write3verf;
1476         goto out;
1477 
1478 err:
1479         if (curthread->t_flag & T_WOULDBLOCK) {
1480                 curthread->t_flag &= ~T_WOULDBLOCK;
1481                 resp->status = NFS3ERR_JUKEBOX;
1482         } else
1483                 resp->status = puterrno3(error);
1484 err1:
1485         vattr_to_wcc_data(bvap, avap, &resp->resfail.file_wcc);
1486 out:
1487         DTRACE_NFSV3_4(op__write__done, struct svc_req *, req,
1488             cred_t *, cr, vnode_t *, vp, WRITE3res *, resp);
1489 
1490         if (vp != NULL) {
1491                 if (rwlock_ret != -1)
1492                         VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, &ct);
1493                 if (in_crit)
1494                         nbl_end_crit(vp);
1495                 VN_RELE(vp);
1496         }
1497 }
1498 
1499 void *
1500 rfs3_write_getfh(WRITE3args *args)
1501 {
1502 
1503         return (&args->file);
1504 }
1505 
1506 void
1507 rfs3_create(CREATE3args *args, CREATE3res *resp, struct exportinfo *exi,
1508     struct svc_req *req, cred_t *cr, bool_t ro)
1509 {
1510         int error;
1511         int in_crit = 0;
1512         vnode_t *vp;
1513         vnode_t *tvp = NULL;
1514         vnode_t *dvp;
1515         struct vattr *vap;
1516         struct vattr va;
1517         struct vattr *dbvap;
1518         struct vattr dbva;
1519         struct vattr *davap;
1520         struct vattr dava;
1521         enum vcexcl excl;
1522         nfstime3 *mtime;
1523         len_t reqsize;
1524         bool_t trunc;
1525         struct sockaddr *ca;
1526         char *name = NULL;
1527 
1528         dbvap = NULL;
1529         davap = NULL;
1530 
1531         dvp = nfs3_fhtovp(&args->where.dir, exi);
1532 
1533         DTRACE_NFSV3_4(op__create__start, struct svc_req *, req,
1534             cred_t *, cr, vnode_t *, dvp, CREATE3args *, args);
1535 
1536         if (dvp == NULL) {
1537                 error = ESTALE;
1538                 goto out;
1539         }
1540 
1541         dbva.va_mask = AT_ALL;
1542         dbvap = VOP_GETATTR(dvp, &dbva, 0, cr, NULL) ? NULL : &dbva;
1543         davap = dbvap;
1544 
1545         if (args->where.name == nfs3nametoolong) {
1546                 resp->status = NFS3ERR_NAMETOOLONG;
1547                 goto out1;
1548         }
1549 
1550         if (args->where.name == NULL || *(args->where.name) == '\0') {
1551                 resp->status = NFS3ERR_ACCES;
1552                 goto out1;
1553         }
1554 
1555         if (rdonly(ro, dvp)) {
1556                 resp->status = NFS3ERR_ROFS;
1557                 goto out1;
1558         }
1559 
1560         if (is_system_labeled()) {
1561                 bslabel_t *clabel = req->rq_label;
1562 
1563                 ASSERT(clabel != NULL);
1564                 DTRACE_PROBE2(tx__rfs3__log__info__opcreate__clabel, char *,
1565                     "got client label from request(1)", struct svc_req *, req);
1566 
1567                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
1568                         if (!do_rfs_label_check(clabel, dvp, EQUALITY_CHECK,
1569                             exi)) {
1570                                 resp->status = NFS3ERR_ACCES;
1571                                 goto out1;
1572                         }
1573                 }
1574         }
1575 
1576         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
1577         name = nfscmd_convname(ca, exi, args->where.name,
1578             NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
1579 
1580         if (name == NULL) {
1581                 /* This is really a Solaris EILSEQ */
1582                 resp->status = NFS3ERR_INVAL;
1583                 goto out1;
1584         }
1585 
1586         if (args->how.mode == EXCLUSIVE) {
1587                 va.va_mask = AT_TYPE | AT_MODE | AT_MTIME;
1588                 va.va_type = VREG;
1589                 va.va_mode = (mode_t)0;
1590                 /*
1591                  * Ensure no time overflows and that types match
1592                  */
1593                 mtime = (nfstime3 *)&args->how.createhow3_u.verf;
1594                 va.va_mtime.tv_sec = mtime->seconds % INT32_MAX;
1595                 va.va_mtime.tv_nsec = mtime->nseconds;
1596                 excl = EXCL;
1597         } else {
1598                 error = sattr3_to_vattr(&args->how.createhow3_u.obj_attributes,
1599                     &va);
1600                 if (error)
1601                         goto out;
1602                 va.va_mask |= AT_TYPE;
1603                 va.va_type = VREG;
1604                 if (args->how.mode == GUARDED)
1605                         excl = EXCL;
1606                 else {
1607                         excl = NONEXCL;
1608 
1609                         /*
1610                          * During creation of file in non-exclusive mode
1611                          * if size of file is being set then make sure
1612                          * that if the file already exists that no conflicting
1613                          * non-blocking mandatory locks exists in the region
1614                          * being modified. If there are conflicting locks fail
1615                          * the operation with EACCES.
1616                          */
1617                         if (va.va_mask & AT_SIZE) {
1618                                 struct vattr tva;
1619 
1620                                 /*
1621                                  * Does file already exist?
1622                                  */
1623                                 error = VOP_LOOKUP(dvp, name, &tvp,
1624                                     NULL, 0, NULL, cr, NULL, NULL, NULL);
1625 
1626                                 /*
1627                                  * Check to see if the file has been delegated
1628                                  * to a v4 client.  If so, then begin recall of
1629                                  * the delegation and return JUKEBOX to allow
1630                                  * the client to retrasmit its request.
1631                                  */
1632 
1633                                 trunc = va.va_size == 0;
1634                                 if (!error &&
1635                                     rfs4_check_delegated(FWRITE, tvp, trunc)) {
1636                                         resp->status = NFS3ERR_JUKEBOX;
1637                                         goto out1;
1638                                 }
1639 
1640                                 /*
1641                                  * Check for NBMAND lock conflicts
1642                                  */
1643                                 if (!error && nbl_need_check(tvp)) {
1644                                         u_offset_t offset;
1645                                         ssize_t len;
1646 
1647                                         nbl_start_crit(tvp, RW_READER);
1648                                         in_crit = 1;
1649 
1650                                         tva.va_mask = AT_SIZE;
1651                                         error = VOP_GETATTR(tvp, &tva, 0, cr,
1652                                             NULL);
1653                                         /*
1654                                          * Can't check for conflicts, so return
1655                                          * error.
1656                                          */
1657                                         if (error)
1658                                                 goto out;
1659 
1660                                         offset = tva.va_size < va.va_size ?
1661                                             tva.va_size : va.va_size;
1662                                         len = tva.va_size < va.va_size ?
1663                                             va.va_size - tva.va_size :
1664                                             tva.va_size - va.va_size;
1665                                         if (nbl_conflict(tvp, NBL_WRITE,
1666                                             offset, len, 0, NULL)) {
1667                                                 error = EACCES;
1668                                                 goto out;
1669                                         }
1670                                 } else if (tvp) {
1671                                         VN_RELE(tvp);
1672                                         tvp = NULL;
1673                                 }
1674                         }
1675                 }
1676                 if (va.va_mask & AT_SIZE)
1677                         reqsize = va.va_size;
1678         }
1679 
1680         /*
1681          * Must specify the mode.
1682          */
1683         if (!(va.va_mask & AT_MODE)) {
1684                 resp->status = NFS3ERR_INVAL;
1685                 goto out1;
1686         }
1687 
1688         /*
1689          * If the filesystem is exported with nosuid, then mask off
1690          * the setuid and setgid bits.
1691          */
1692         if (va.va_type == VREG && (exi->exi_export.ex_flags & EX_NOSUID))
1693                 va.va_mode &= ~(VSUID | VSGID);
1694 
1695 tryagain:
1696         /*
1697          * The file open mode used is VWRITE.  If the client needs
1698          * some other semantic, then it should do the access checking
1699          * itself.  It would have been nice to have the file open mode
1700          * passed as part of the arguments.
1701          */
1702         error = VOP_CREATE(dvp, name, &va, excl, VWRITE,
1703             &vp, cr, 0, NULL, NULL);
1704 
1705         dava.va_mask = AT_ALL;
1706         davap = VOP_GETATTR(dvp, &dava, 0, cr, NULL) ? NULL : &dava;
1707 
1708         if (error) {
1709                 /*
1710                  * If we got something other than file already exists
1711                  * then just return this error.  Otherwise, we got
1712                  * EEXIST.  If we were doing a GUARDED create, then
1713                  * just return this error.  Otherwise, we need to
1714                  * make sure that this wasn't a duplicate of an
1715                  * exclusive create request.
1716                  *
1717                  * The assumption is made that a non-exclusive create
1718                  * request will never return EEXIST.
1719                  */
1720                 if (error != EEXIST || args->how.mode == GUARDED)
1721                         goto out;
1722                 /*
1723                  * Lookup the file so that we can get a vnode for it.
1724                  */
1725                 error = VOP_LOOKUP(dvp, name, &vp, NULL, 0,
1726                     NULL, cr, NULL, NULL, NULL);
1727                 if (error) {
1728                         /*
1729                          * We couldn't find the file that we thought that
1730                          * we just created.  So, we'll just try creating
1731                          * it again.
1732                          */
1733                         if (error == ENOENT)
1734                                 goto tryagain;
1735                         goto out;
1736                 }
1737 
1738                 /*
1739                  * If the file is delegated to a v4 client, go ahead
1740                  * and initiate recall, this create is a hint that a
1741                  * conflicting v3 open has occurred.
1742                  */
1743 
1744                 if (rfs4_check_delegated(FWRITE, vp, FALSE)) {
1745                         VN_RELE(vp);
1746                         resp->status = NFS3ERR_JUKEBOX;
1747                         goto out1;
1748                 }
1749 
1750                 va.va_mask = AT_ALL;
1751                 vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
1752 
1753                 mtime = (nfstime3 *)&args->how.createhow3_u.verf;
1754                 /* % with INT32_MAX to prevent overflows */
1755                 if (args->how.mode == EXCLUSIVE && (vap == NULL ||
1756                     vap->va_mtime.tv_sec !=
1757                     (mtime->seconds % INT32_MAX) ||
1758                     vap->va_mtime.tv_nsec != mtime->nseconds)) {
1759                         VN_RELE(vp);
1760                         error = EEXIST;
1761                         goto out;
1762                 }
1763         } else {
1764 
1765                 if ((args->how.mode == UNCHECKED ||
1766                     args->how.mode == GUARDED) &&
1767                     args->how.createhow3_u.obj_attributes.size.set_it &&
1768                     va.va_size == 0)
1769                         trunc = TRUE;
1770                 else
1771                         trunc = FALSE;
1772 
1773                 if (rfs4_check_delegated(FWRITE, vp, trunc)) {
1774                         VN_RELE(vp);
1775                         resp->status = NFS3ERR_JUKEBOX;
1776                         goto out1;
1777                 }
1778 
1779                 va.va_mask = AT_ALL;
1780                 vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
1781 
1782                 /*
1783                  * We need to check to make sure that the file got
1784                  * created to the indicated size.  If not, we do a
1785                  * setattr to try to change the size, but we don't
1786                  * try too hard.  This shouldn't a problem as most
1787                  * clients will only specifiy a size of zero which
1788                  * local file systems handle.  However, even if
1789                  * the client does specify a non-zero size, it can
1790                  * still recover by checking the size of the file
1791                  * after it has created it and then issue a setattr
1792                  * request of its own to set the size of the file.
1793                  */
1794                 if (vap != NULL &&
1795                     (args->how.mode == UNCHECKED ||
1796                     args->how.mode == GUARDED) &&
1797                     args->how.createhow3_u.obj_attributes.size.set_it &&
1798                     vap->va_size != reqsize) {
1799                         va.va_mask = AT_SIZE;
1800                         va.va_size = reqsize;
1801                         (void) VOP_SETATTR(vp, &va, 0, cr, NULL);
1802                         va.va_mask = AT_ALL;
1803                         vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
1804                 }
1805         }
1806 
1807         if (name != args->where.name)
1808                 kmem_free(name, MAXPATHLEN + 1);
1809 
1810         error = makefh3(&resp->resok.obj.handle, vp, exi);
1811         if (error)
1812                 resp->resok.obj.handle_follows = FALSE;
1813         else
1814                 resp->resok.obj.handle_follows = TRUE;
1815 
1816         /*
1817          * Force modified data and metadata out to stable storage.
1818          */
1819         (void) VOP_FSYNC(vp, FNODSYNC, cr, NULL);
1820         (void) VOP_FSYNC(dvp, 0, cr, NULL);
1821 
1822         VN_RELE(vp);
1823         if (tvp != NULL) {
1824                 if (in_crit)
1825                         nbl_end_crit(tvp);
1826                 VN_RELE(tvp);
1827         }
1828 
1829         resp->status = NFS3_OK;
1830         vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
1831         vattr_to_wcc_data(dbvap, davap, &resp->resok.dir_wcc);
1832 
1833         DTRACE_NFSV3_4(op__create__done, struct svc_req *, req,
1834             cred_t *, cr, vnode_t *, dvp, CREATE3res *, resp);
1835 
1836         VN_RELE(dvp);
1837         return;
1838 
1839 out:
1840         if (curthread->t_flag & T_WOULDBLOCK) {
1841                 curthread->t_flag &= ~T_WOULDBLOCK;
1842                 resp->status = NFS3ERR_JUKEBOX;
1843         } else
1844                 resp->status = puterrno3(error);
1845 out1:
1846         DTRACE_NFSV3_4(op__create__done, struct svc_req *, req,
1847             cred_t *, cr, vnode_t *, dvp, CREATE3res *, resp);
1848 
1849         if (name != NULL && name != args->where.name)
1850                 kmem_free(name, MAXPATHLEN + 1);
1851 
1852         if (tvp != NULL) {
1853                 if (in_crit)
1854                         nbl_end_crit(tvp);
1855                 VN_RELE(tvp);
1856         }
1857         if (dvp != NULL)
1858                 VN_RELE(dvp);
1859         vattr_to_wcc_data(dbvap, davap, &resp->resfail.dir_wcc);
1860 }
1861 
1862 void *
1863 rfs3_create_getfh(CREATE3args *args)
1864 {
1865 
1866         return (&args->where.dir);
1867 }
1868 
1869 void
1870 rfs3_mkdir(MKDIR3args *args, MKDIR3res *resp, struct exportinfo *exi,
1871     struct svc_req *req, cred_t *cr, bool_t ro)
1872 {
1873         int error;
1874         vnode_t *vp = NULL;
1875         vnode_t *dvp;
1876         struct vattr *vap;
1877         struct vattr va;
1878         struct vattr *dbvap;
1879         struct vattr dbva;
1880         struct vattr *davap;
1881         struct vattr dava;
1882         struct sockaddr *ca;
1883         char *name = NULL;
1884 
1885         dbvap = NULL;
1886         davap = NULL;
1887 
1888         dvp = nfs3_fhtovp(&args->where.dir, exi);
1889 
1890         DTRACE_NFSV3_4(op__mkdir__start, struct svc_req *, req,
1891             cred_t *, cr, vnode_t *, dvp, MKDIR3args *, args);
1892 
1893         if (dvp == NULL) {
1894                 error = ESTALE;
1895                 goto out;
1896         }
1897 
1898         dbva.va_mask = AT_ALL;
1899         dbvap = VOP_GETATTR(dvp, &dbva, 0, cr, NULL) ? NULL : &dbva;
1900         davap = dbvap;
1901 
1902         if (args->where.name == nfs3nametoolong) {
1903                 resp->status = NFS3ERR_NAMETOOLONG;
1904                 goto out1;
1905         }
1906 
1907         if (args->where.name == NULL || *(args->where.name) == '\0') {
1908                 resp->status = NFS3ERR_ACCES;
1909                 goto out1;
1910         }
1911 
1912         if (rdonly(ro, dvp)) {
1913                 resp->status = NFS3ERR_ROFS;
1914                 goto out1;
1915         }
1916 
1917         if (is_system_labeled()) {
1918                 bslabel_t *clabel = req->rq_label;
1919 
1920                 ASSERT(clabel != NULL);
1921                 DTRACE_PROBE2(tx__rfs3__log__info__opmkdir__clabel, char *,
1922                     "got client label from request(1)", struct svc_req *, req);
1923 
1924                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
1925                         if (!do_rfs_label_check(clabel, dvp, EQUALITY_CHECK,
1926                             exi)) {
1927                                 resp->status = NFS3ERR_ACCES;
1928                                 goto out1;
1929                         }
1930                 }
1931         }
1932 
1933         error = sattr3_to_vattr(&args->attributes, &va);
1934         if (error)
1935                 goto out;
1936 
1937         if (!(va.va_mask & AT_MODE)) {
1938                 resp->status = NFS3ERR_INVAL;
1939                 goto out1;
1940         }
1941 
1942         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
1943         name = nfscmd_convname(ca, exi, args->where.name,
1944             NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
1945 
1946         if (name == NULL) {
1947                 resp->status = NFS3ERR_INVAL;
1948                 goto out1;
1949         }
1950 
1951         va.va_mask |= AT_TYPE;
1952         va.va_type = VDIR;
1953 
1954         error = VOP_MKDIR(dvp, name, &va, &vp, cr, NULL, 0, NULL);
1955 
1956         if (name != args->where.name)
1957                 kmem_free(name, MAXPATHLEN + 1);
1958 
1959         dava.va_mask = AT_ALL;
1960         davap = VOP_GETATTR(dvp, &dava, 0, cr, NULL) ? NULL : &dava;
1961 
1962         /*
1963          * Force modified data and metadata out to stable storage.
1964          */
1965         (void) VOP_FSYNC(dvp, 0, cr, NULL);
1966 
1967         if (error)
1968                 goto out;
1969 
1970         error = makefh3(&resp->resok.obj.handle, vp, exi);
1971         if (error)
1972                 resp->resok.obj.handle_follows = FALSE;
1973         else
1974                 resp->resok.obj.handle_follows = TRUE;
1975 
1976         va.va_mask = AT_ALL;
1977         vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
1978 
1979         /*
1980          * Force modified data and metadata out to stable storage.
1981          */
1982         (void) VOP_FSYNC(vp, 0, cr, NULL);
1983 
1984         VN_RELE(vp);
1985 
1986         resp->status = NFS3_OK;
1987         vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
1988         vattr_to_wcc_data(dbvap, davap, &resp->resok.dir_wcc);
1989 
1990         DTRACE_NFSV3_4(op__mkdir__done, struct svc_req *, req,
1991             cred_t *, cr, vnode_t *, dvp, MKDIR3res *, resp);
1992         VN_RELE(dvp);
1993 
1994         return;
1995 
1996 out:
1997         if (curthread->t_flag & T_WOULDBLOCK) {
1998                 curthread->t_flag &= ~T_WOULDBLOCK;
1999                 resp->status = NFS3ERR_JUKEBOX;
2000         } else
2001                 resp->status = puterrno3(error);
2002 out1:
2003         DTRACE_NFSV3_4(op__mkdir__done, struct svc_req *, req,
2004             cred_t *, cr, vnode_t *, dvp, MKDIR3res *, resp);
2005         if (dvp != NULL)
2006                 VN_RELE(dvp);
2007         vattr_to_wcc_data(dbvap, davap, &resp->resfail.dir_wcc);
2008 }
2009 
2010 void *
2011 rfs3_mkdir_getfh(MKDIR3args *args)
2012 {
2013 
2014         return (&args->where.dir);
2015 }
2016 
2017 void
2018 rfs3_symlink(SYMLINK3args *args, SYMLINK3res *resp, struct exportinfo *exi,
2019     struct svc_req *req, cred_t *cr, bool_t ro)
2020 {
2021         int error;
2022         vnode_t *vp;
2023         vnode_t *dvp;
2024         struct vattr *vap;
2025         struct vattr va;
2026         struct vattr *dbvap;
2027         struct vattr dbva;
2028         struct vattr *davap;
2029         struct vattr dava;
2030         struct sockaddr *ca;
2031         char *name = NULL;
2032         char *symdata = NULL;
2033 
2034         dbvap = NULL;
2035         davap = NULL;
2036 
2037         dvp = nfs3_fhtovp(&args->where.dir, exi);
2038 
2039         DTRACE_NFSV3_4(op__symlink__start, struct svc_req *, req,
2040             cred_t *, cr, vnode_t *, dvp, SYMLINK3args *, args);
2041 
2042         if (dvp == NULL) {
2043                 error = ESTALE;
2044                 goto err;
2045         }
2046 
2047         dbva.va_mask = AT_ALL;
2048         dbvap = VOP_GETATTR(dvp, &dbva, 0, cr, NULL) ? NULL : &dbva;
2049         davap = dbvap;
2050 
2051         if (args->where.name == nfs3nametoolong) {
2052                 resp->status = NFS3ERR_NAMETOOLONG;
2053                 goto err1;
2054         }
2055 
2056         if (args->where.name == NULL || *(args->where.name) == '\0') {
2057                 resp->status = NFS3ERR_ACCES;
2058                 goto err1;
2059         }
2060 
2061         if (rdonly(ro, dvp)) {
2062                 resp->status = NFS3ERR_ROFS;
2063                 goto err1;
2064         }
2065 
2066         if (is_system_labeled()) {
2067                 bslabel_t *clabel = req->rq_label;
2068 
2069                 ASSERT(clabel != NULL);
2070                 DTRACE_PROBE2(tx__rfs3__log__info__opsymlink__clabel, char *,
2071                     "got client label from request(1)", struct svc_req *, req);
2072 
2073                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
2074                         if (!do_rfs_label_check(clabel, dvp, EQUALITY_CHECK,
2075                             exi)) {
2076                                 resp->status = NFS3ERR_ACCES;
2077                                 goto err1;
2078                         }
2079                 }
2080         }
2081 
2082         error = sattr3_to_vattr(&args->symlink.symlink_attributes, &va);
2083         if (error)
2084                 goto err;
2085 
2086         if (!(va.va_mask & AT_MODE)) {
2087                 resp->status = NFS3ERR_INVAL;
2088                 goto err1;
2089         }
2090 
2091         if (args->symlink.symlink_data == nfs3nametoolong) {
2092                 resp->status = NFS3ERR_NAMETOOLONG;
2093                 goto err1;
2094         }
2095 
2096         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2097         name = nfscmd_convname(ca, exi, args->where.name,
2098             NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
2099 
2100         if (name == NULL) {
2101                 /* This is really a Solaris EILSEQ */
2102                 resp->status = NFS3ERR_INVAL;
2103                 goto err1;
2104         }
2105 
2106         symdata = nfscmd_convname(ca, exi, args->symlink.symlink_data,
2107             NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
2108         if (symdata == NULL) {
2109                 /* This is really a Solaris EILSEQ */
2110                 resp->status = NFS3ERR_INVAL;
2111                 goto err1;
2112         }
2113 
2114 
2115         va.va_mask |= AT_TYPE;
2116         va.va_type = VLNK;
2117 
2118         error = VOP_SYMLINK(dvp, name, &va, symdata, cr, NULL, 0);
2119 
2120         dava.va_mask = AT_ALL;
2121         davap = VOP_GETATTR(dvp, &dava, 0, cr, NULL) ? NULL : &dava;
2122 
2123         if (error)
2124                 goto err;
2125 
2126         error = VOP_LOOKUP(dvp, name, &vp, NULL, 0, NULL, cr,
2127             NULL, NULL, NULL);
2128 
2129         /*
2130          * Force modified data and metadata out to stable storage.
2131          */
2132         (void) VOP_FSYNC(dvp, 0, cr, NULL);
2133 
2134 
2135         resp->status = NFS3_OK;
2136         if (error) {
2137                 resp->resok.obj.handle_follows = FALSE;
2138                 vattr_to_post_op_attr(NULL, &resp->resok.obj_attributes);
2139                 vattr_to_wcc_data(dbvap, davap, &resp->resok.dir_wcc);
2140                 goto out;
2141         }
2142 
2143         error = makefh3(&resp->resok.obj.handle, vp, exi);
2144         if (error)
2145                 resp->resok.obj.handle_follows = FALSE;
2146         else
2147                 resp->resok.obj.handle_follows = TRUE;
2148 
2149         va.va_mask = AT_ALL;
2150         vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
2151 
2152         /*
2153          * Force modified data and metadata out to stable storage.
2154          */
2155         (void) VOP_FSYNC(vp, 0, cr, NULL);
2156 
2157         VN_RELE(vp);
2158 
2159         vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
2160         vattr_to_wcc_data(dbvap, davap, &resp->resok.dir_wcc);
2161         goto out;
2162 
2163 err:
2164         if (curthread->t_flag & T_WOULDBLOCK) {
2165                 curthread->t_flag &= ~T_WOULDBLOCK;
2166                 resp->status = NFS3ERR_JUKEBOX;
2167         } else
2168                 resp->status = puterrno3(error);
2169 err1:
2170         vattr_to_wcc_data(dbvap, davap, &resp->resfail.dir_wcc);
2171 out:
2172         if (name != NULL && name != args->where.name)
2173                 kmem_free(name, MAXPATHLEN + 1);
2174         if (symdata != NULL && symdata != args->symlink.symlink_data)
2175                 kmem_free(symdata, MAXPATHLEN + 1);
2176 
2177         DTRACE_NFSV3_4(op__symlink__done, struct svc_req *, req,
2178             cred_t *, cr, vnode_t *, dvp, SYMLINK3res *, resp);
2179 
2180         if (dvp != NULL)
2181                 VN_RELE(dvp);
2182 }
2183 
2184 void *
2185 rfs3_symlink_getfh(SYMLINK3args *args)
2186 {
2187 
2188         return (&args->where.dir);
2189 }
2190 
2191 void
2192 rfs3_mknod(MKNOD3args *args, MKNOD3res *resp, struct exportinfo *exi,
2193     struct svc_req *req, cred_t *cr, bool_t ro)
2194 {
2195         int error;
2196         vnode_t *vp;
2197         vnode_t *realvp;
2198         vnode_t *dvp;
2199         struct vattr *vap;
2200         struct vattr va;
2201         struct vattr *dbvap;
2202         struct vattr dbva;
2203         struct vattr *davap;
2204         struct vattr dava;
2205         int mode;
2206         enum vcexcl excl;
2207         struct sockaddr *ca;
2208         char *name = NULL;
2209 
2210         dbvap = NULL;
2211         davap = NULL;
2212 
2213         dvp = nfs3_fhtovp(&args->where.dir, exi);
2214 
2215         DTRACE_NFSV3_4(op__mknod__start, struct svc_req *, req,
2216             cred_t *, cr, vnode_t *, dvp, MKNOD3args *, args);
2217 
2218         if (dvp == NULL) {
2219                 error = ESTALE;
2220                 goto out;
2221         }
2222 
2223         dbva.va_mask = AT_ALL;
2224         dbvap = VOP_GETATTR(dvp, &dbva, 0, cr, NULL) ? NULL : &dbva;
2225         davap = dbvap;
2226 
2227         if (args->where.name == nfs3nametoolong) {
2228                 resp->status = NFS3ERR_NAMETOOLONG;
2229                 goto out1;
2230         }
2231 
2232         if (args->where.name == NULL || *(args->where.name) == '\0') {
2233                 resp->status = NFS3ERR_ACCES;
2234                 goto out1;
2235         }
2236 
2237         if (rdonly(ro, dvp)) {
2238                 resp->status = NFS3ERR_ROFS;
2239                 goto out1;
2240         }
2241 
2242         if (is_system_labeled()) {
2243                 bslabel_t *clabel = req->rq_label;
2244 
2245                 ASSERT(clabel != NULL);
2246                 DTRACE_PROBE2(tx__rfs3__log__info__opmknod__clabel, char *,
2247                     "got client label from request(1)", struct svc_req *, req);
2248 
2249                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
2250                         if (!do_rfs_label_check(clabel, dvp, EQUALITY_CHECK,
2251                             exi)) {
2252                                 resp->status = NFS3ERR_ACCES;
2253                                 goto out1;
2254                         }
2255                 }
2256         }
2257 
2258         switch (args->what.type) {
2259         case NF3CHR:
2260         case NF3BLK:
2261                 error = sattr3_to_vattr(
2262                     &args->what.mknoddata3_u.device.dev_attributes, &va);
2263                 if (error)
2264                         goto out;
2265                 if (secpolicy_sys_devices(cr) != 0) {
2266                         resp->status = NFS3ERR_PERM;
2267                         goto out1;
2268                 }
2269                 if (args->what.type == NF3CHR)
2270                         va.va_type = VCHR;
2271                 else
2272                         va.va_type = VBLK;
2273                 va.va_rdev = makedevice(
2274                     args->what.mknoddata3_u.device.spec.specdata1,
2275                     args->what.mknoddata3_u.device.spec.specdata2);
2276                 va.va_mask |= AT_TYPE | AT_RDEV;
2277                 break;
2278         case NF3SOCK:
2279                 error = sattr3_to_vattr(
2280                     &args->what.mknoddata3_u.pipe_attributes, &va);
2281                 if (error)
2282                         goto out;
2283                 va.va_type = VSOCK;
2284                 va.va_mask |= AT_TYPE;
2285                 break;
2286         case NF3FIFO:
2287                 error = sattr3_to_vattr(
2288                     &args->what.mknoddata3_u.pipe_attributes, &va);
2289                 if (error)
2290                         goto out;
2291                 va.va_type = VFIFO;
2292                 va.va_mask |= AT_TYPE;
2293                 break;
2294         default:
2295                 resp->status = NFS3ERR_BADTYPE;
2296                 goto out1;
2297         }
2298 
2299         /*
2300          * Must specify the mode.
2301          */
2302         if (!(va.va_mask & AT_MODE)) {
2303                 resp->status = NFS3ERR_INVAL;
2304                 goto out1;
2305         }
2306 
2307         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2308         name = nfscmd_convname(ca, exi, args->where.name,
2309             NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
2310 
2311         if (name == NULL) {
2312                 resp->status = NFS3ERR_INVAL;
2313                 goto out1;
2314         }
2315 
2316         excl = EXCL;
2317 
2318         mode = 0;
2319 
2320         error = VOP_CREATE(dvp, name, &va, excl, mode,
2321             &vp, cr, 0, NULL, NULL);
2322 
2323         if (name != args->where.name)
2324                 kmem_free(name, MAXPATHLEN + 1);
2325 
2326         dava.va_mask = AT_ALL;
2327         davap = VOP_GETATTR(dvp, &dava, 0, cr, NULL) ? NULL : &dava;
2328 
2329         /*
2330          * Force modified data and metadata out to stable storage.
2331          */
2332         (void) VOP_FSYNC(dvp, 0, cr, NULL);
2333 
2334         if (error)
2335                 goto out;
2336 
2337         resp->status = NFS3_OK;
2338 
2339         error = makefh3(&resp->resok.obj.handle, vp, exi);
2340         if (error)
2341                 resp->resok.obj.handle_follows = FALSE;
2342         else
2343                 resp->resok.obj.handle_follows = TRUE;
2344 
2345         va.va_mask = AT_ALL;
2346         vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
2347 
2348         /*
2349          * Force modified metadata out to stable storage.
2350          *
2351          * if a underlying vp exists, pass it to VOP_FSYNC
2352          */
2353         if (VOP_REALVP(vp, &realvp, NULL) == 0)
2354                 (void) VOP_FSYNC(realvp, FNODSYNC, cr, NULL);
2355         else
2356                 (void) VOP_FSYNC(vp, FNODSYNC, cr, NULL);
2357 
2358         VN_RELE(vp);
2359 
2360         vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
2361         vattr_to_wcc_data(dbvap, davap, &resp->resok.dir_wcc);
2362         DTRACE_NFSV3_4(op__mknod__done, struct svc_req *, req,
2363             cred_t *, cr, vnode_t *, dvp, MKNOD3res *, resp);
2364         VN_RELE(dvp);
2365         return;
2366 
2367 out:
2368         if (curthread->t_flag & T_WOULDBLOCK) {
2369                 curthread->t_flag &= ~T_WOULDBLOCK;
2370                 resp->status = NFS3ERR_JUKEBOX;
2371         } else
2372                 resp->status = puterrno3(error);
2373 out1:
2374         DTRACE_NFSV3_4(op__mknod__done, struct svc_req *, req,
2375             cred_t *, cr, vnode_t *, dvp, MKNOD3res *, resp);
2376         if (dvp != NULL)
2377                 VN_RELE(dvp);
2378         vattr_to_wcc_data(dbvap, davap, &resp->resfail.dir_wcc);
2379 }
2380 
2381 void *
2382 rfs3_mknod_getfh(MKNOD3args *args)
2383 {
2384 
2385         return (&args->where.dir);
2386 }
2387 
2388 void
2389 rfs3_remove(REMOVE3args *args, REMOVE3res *resp, struct exportinfo *exi,
2390     struct svc_req *req, cred_t *cr, bool_t ro)
2391 {
2392         int error = 0;
2393         vnode_t *vp;
2394         struct vattr *bvap;
2395         struct vattr bva;
2396         struct vattr *avap;
2397         struct vattr ava;
2398         vnode_t *targvp = NULL;
2399         struct sockaddr *ca;
2400         char *name = NULL;
2401 
2402         bvap = NULL;
2403         avap = NULL;
2404 
2405         vp = nfs3_fhtovp(&args->object.dir, exi);
2406 
2407         DTRACE_NFSV3_4(op__remove__start, struct svc_req *, req,
2408             cred_t *, cr, vnode_t *, vp, REMOVE3args *, args);
2409 
2410         if (vp == NULL) {
2411                 error = ESTALE;
2412                 goto err;
2413         }
2414 
2415         bva.va_mask = AT_ALL;
2416         bvap = VOP_GETATTR(vp, &bva, 0, cr, NULL) ? NULL : &bva;
2417         avap = bvap;
2418 
2419         if (vp->v_type != VDIR) {
2420                 resp->status = NFS3ERR_NOTDIR;
2421                 goto err1;
2422         }
2423 
2424         if (args->object.name == nfs3nametoolong) {
2425                 resp->status = NFS3ERR_NAMETOOLONG;
2426                 goto err1;
2427         }
2428 
2429         if (args->object.name == NULL || *(args->object.name) == '\0') {
2430                 resp->status = NFS3ERR_ACCES;
2431                 goto err1;
2432         }
2433 
2434         if (rdonly(ro, vp)) {
2435                 resp->status = NFS3ERR_ROFS;
2436                 goto err1;
2437         }
2438 
2439         if (is_system_labeled()) {
2440                 bslabel_t *clabel = req->rq_label;
2441 
2442                 ASSERT(clabel != NULL);
2443                 DTRACE_PROBE2(tx__rfs3__log__info__opremove__clabel, char *,
2444                     "got client label from request(1)", struct svc_req *, req);
2445 
2446                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
2447                         if (!do_rfs_label_check(clabel, vp, EQUALITY_CHECK,
2448                             exi)) {
2449                                 resp->status = NFS3ERR_ACCES;
2450                                 goto err1;
2451                         }
2452                 }
2453         }
2454 
2455         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2456         name = nfscmd_convname(ca, exi, args->object.name,
2457             NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
2458 
2459         if (name == NULL) {
2460                 resp->status = NFS3ERR_INVAL;
2461                 goto err1;
2462         }
2463 
2464         /*
2465          * Check for a conflict with a non-blocking mandatory share
2466          * reservation and V4 delegations
2467          */
2468         error = VOP_LOOKUP(vp, name, &targvp, NULL, 0,
2469             NULL, cr, NULL, NULL, NULL);
2470         if (error != 0)
2471                 goto err;
2472 
2473         if (rfs4_check_delegated(FWRITE, targvp, TRUE)) {
2474                 resp->status = NFS3ERR_JUKEBOX;
2475                 goto err1;
2476         }
2477 
2478         if (!nbl_need_check(targvp)) {
2479                 error = VOP_REMOVE(vp, name, cr, NULL, 0);
2480         } else {
2481                 nbl_start_crit(targvp, RW_READER);
2482                 if (nbl_conflict(targvp, NBL_REMOVE, 0, 0, 0, NULL)) {
2483                         error = EACCES;
2484                 } else {
2485                         error = VOP_REMOVE(vp, name, cr, NULL, 0);
2486                 }
2487                 nbl_end_crit(targvp);
2488         }
2489         VN_RELE(targvp);
2490         targvp = NULL;
2491 
2492         ava.va_mask = AT_ALL;
2493         avap = VOP_GETATTR(vp, &ava, 0, cr, NULL) ? NULL : &ava;
2494 
2495         /*
2496          * Force modified data and metadata out to stable storage.
2497          */
2498         (void) VOP_FSYNC(vp, 0, cr, NULL);
2499 
2500         if (error)
2501                 goto err;
2502 
2503         resp->status = NFS3_OK;
2504         vattr_to_wcc_data(bvap, avap, &resp->resok.dir_wcc);
2505         goto out;
2506 
2507 err:
2508         if (curthread->t_flag & T_WOULDBLOCK) {
2509                 curthread->t_flag &= ~T_WOULDBLOCK;
2510                 resp->status = NFS3ERR_JUKEBOX;
2511         } else
2512                 resp->status = puterrno3(error);
2513 err1:
2514         vattr_to_wcc_data(bvap, avap, &resp->resfail.dir_wcc);
2515 out:
2516         DTRACE_NFSV3_4(op__remove__done, struct svc_req *, req,
2517             cred_t *, cr, vnode_t *, vp, REMOVE3res *, resp);
2518 
2519         if (name != NULL && name != args->object.name)
2520                 kmem_free(name, MAXPATHLEN + 1);
2521 
2522         if (vp != NULL)
2523                 VN_RELE(vp);
2524 }
2525 
2526 void *
2527 rfs3_remove_getfh(REMOVE3args *args)
2528 {
2529 
2530         return (&args->object.dir);
2531 }
2532 
2533 void
2534 rfs3_rmdir(RMDIR3args *args, RMDIR3res *resp, struct exportinfo *exi,
2535     struct svc_req *req, cred_t *cr, bool_t ro)
2536 {
2537         int error;
2538         vnode_t *vp;
2539         struct vattr *bvap;
2540         struct vattr bva;
2541         struct vattr *avap;
2542         struct vattr ava;
2543         struct sockaddr *ca;
2544         char *name = NULL;
2545 
2546         bvap = NULL;
2547         avap = NULL;
2548 
2549         vp = nfs3_fhtovp(&args->object.dir, exi);
2550 
2551         DTRACE_NFSV3_4(op__rmdir__start, struct svc_req *, req,
2552             cred_t *, cr, vnode_t *, vp, RMDIR3args *, args);
2553 
2554         if (vp == NULL) {
2555                 error = ESTALE;
2556                 goto err;
2557         }
2558 
2559         bva.va_mask = AT_ALL;
2560         bvap = VOP_GETATTR(vp, &bva, 0, cr, NULL) ? NULL : &bva;
2561         avap = bvap;
2562 
2563         if (vp->v_type != VDIR) {
2564                 resp->status = NFS3ERR_NOTDIR;
2565                 goto err1;
2566         }
2567 
2568         if (args->object.name == nfs3nametoolong) {
2569                 resp->status = NFS3ERR_NAMETOOLONG;
2570                 goto err1;
2571         }
2572 
2573         if (args->object.name == NULL || *(args->object.name) == '\0') {
2574                 resp->status = NFS3ERR_ACCES;
2575                 goto err1;
2576         }
2577 
2578         if (rdonly(ro, vp)) {
2579                 resp->status = NFS3ERR_ROFS;
2580                 goto err1;
2581         }
2582 
2583         if (is_system_labeled()) {
2584                 bslabel_t *clabel = req->rq_label;
2585 
2586                 ASSERT(clabel != NULL);
2587                 DTRACE_PROBE2(tx__rfs3__log__info__opremovedir__clabel, char *,
2588                     "got client label from request(1)", struct svc_req *, req);
2589 
2590                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
2591                         if (!do_rfs_label_check(clabel, vp, EQUALITY_CHECK,
2592                             exi)) {
2593                                 resp->status = NFS3ERR_ACCES;
2594                                 goto err1;
2595                         }
2596                 }
2597         }
2598 
2599         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2600         name = nfscmd_convname(ca, exi, args->object.name,
2601             NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
2602 
2603         if (name == NULL) {
2604                 resp->status = NFS3ERR_INVAL;
2605                 goto err1;
2606         }
2607 
2608         error = VOP_RMDIR(vp, name, rootdir, cr, NULL, 0);
2609 
2610         if (name != args->object.name)
2611                 kmem_free(name, MAXPATHLEN + 1);
2612 
2613         ava.va_mask = AT_ALL;
2614         avap = VOP_GETATTR(vp, &ava, 0, cr, NULL) ? NULL : &ava;
2615 
2616         /*
2617          * Force modified data and metadata out to stable storage.
2618          */
2619         (void) VOP_FSYNC(vp, 0, cr, NULL);
2620 
2621         if (error) {
2622                 /*
2623                  * System V defines rmdir to return EEXIST, not ENOTEMPTY,
2624                  * if the directory is not empty.  A System V NFS server
2625                  * needs to map NFS3ERR_EXIST to NFS3ERR_NOTEMPTY to transmit
2626                  * over the wire.
2627                  */
2628                 if (error == EEXIST)
2629                         error = ENOTEMPTY;
2630                 goto err;
2631         }
2632 
2633         resp->status = NFS3_OK;
2634         vattr_to_wcc_data(bvap, avap, &resp->resok.dir_wcc);
2635         goto out;
2636 
2637 err:
2638         if (curthread->t_flag & T_WOULDBLOCK) {
2639                 curthread->t_flag &= ~T_WOULDBLOCK;
2640                 resp->status = NFS3ERR_JUKEBOX;
2641         } else
2642                 resp->status = puterrno3(error);
2643 err1:
2644         vattr_to_wcc_data(bvap, avap, &resp->resfail.dir_wcc);
2645 out:
2646         DTRACE_NFSV3_4(op__rmdir__done, struct svc_req *, req,
2647             cred_t *, cr, vnode_t *, vp, RMDIR3res *, resp);
2648         if (vp != NULL)
2649                 VN_RELE(vp);
2650 
2651 }
2652 
2653 void *
2654 rfs3_rmdir_getfh(RMDIR3args *args)
2655 {
2656 
2657         return (&args->object.dir);
2658 }
2659 
2660 void
2661 rfs3_rename(RENAME3args *args, RENAME3res *resp, struct exportinfo *exi,
2662     struct svc_req *req, cred_t *cr, bool_t ro)
2663 {
2664         int error = 0;
2665         vnode_t *fvp;
2666         vnode_t *tvp;
2667         vnode_t *targvp;
2668         struct vattr *fbvap;
2669         struct vattr fbva;
2670         struct vattr *favap;
2671         struct vattr fava;
2672         struct vattr *tbvap;
2673         struct vattr tbva;
2674         struct vattr *tavap;
2675         struct vattr tava;
2676         nfs_fh3 *fh3;
2677         struct exportinfo *to_exi;
2678         vnode_t *srcvp = NULL;
2679         bslabel_t *clabel;
2680         struct sockaddr *ca;
2681         char *name = NULL;
2682         char *toname = NULL;
2683 
2684         fbvap = NULL;
2685         favap = NULL;
2686         tbvap = NULL;
2687         tavap = NULL;
2688         tvp = NULL;
2689 
2690         fvp = nfs3_fhtovp(&args->from.dir, exi);
2691 
2692         DTRACE_NFSV3_4(op__rename__start, struct svc_req *, req,
2693             cred_t *, cr, vnode_t *, fvp, RENAME3args *, args);
2694 
2695         if (fvp == NULL) {
2696                 error = ESTALE;
2697                 goto err;
2698         }
2699 
2700         if (is_system_labeled()) {
2701                 clabel = req->rq_label;
2702                 ASSERT(clabel != NULL);
2703                 DTRACE_PROBE2(tx__rfs3__log__info__oprename__clabel, char *,
2704                     "got client label from request(1)", struct svc_req *, req);
2705 
2706                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
2707                         if (!do_rfs_label_check(clabel, fvp, EQUALITY_CHECK,
2708                             exi)) {
2709                                 resp->status = NFS3ERR_ACCES;
2710                                 goto err1;
2711                         }
2712                 }
2713         }
2714 
2715         fbva.va_mask = AT_ALL;
2716         fbvap = VOP_GETATTR(fvp, &fbva, 0, cr, NULL) ? NULL : &fbva;
2717         favap = fbvap;
2718 
2719         fh3 = &args->to.dir;
2720         to_exi = checkexport(&fh3->fh3_fsid, FH3TOXFIDP(fh3));
2721         if (to_exi == NULL) {
2722                 resp->status = NFS3ERR_ACCES;
2723                 goto err1;
2724         }
2725         exi_rele(to_exi);
2726 
2727         if (to_exi != exi) {
2728                 resp->status = NFS3ERR_XDEV;
2729                 goto err1;
2730         }
2731 
2732         tvp = nfs3_fhtovp(&args->to.dir, exi);
2733         if (tvp == NULL) {
2734                 error = ESTALE;
2735                 goto err;
2736         }
2737 
2738         tbva.va_mask = AT_ALL;
2739         tbvap = VOP_GETATTR(tvp, &tbva, 0, cr, NULL) ? NULL : &tbva;
2740         tavap = tbvap;
2741 
2742         if (fvp->v_type != VDIR || tvp->v_type != VDIR) {
2743                 resp->status = NFS3ERR_NOTDIR;
2744                 goto err1;
2745         }
2746 
2747         if (args->from.name == nfs3nametoolong ||
2748             args->to.name == nfs3nametoolong) {
2749                 resp->status = NFS3ERR_NAMETOOLONG;
2750                 goto err1;
2751         }
2752         if (args->from.name == NULL || *(args->from.name) == '\0' ||
2753             args->to.name == NULL || *(args->to.name) == '\0') {
2754                 resp->status = NFS3ERR_ACCES;
2755                 goto err1;
2756         }
2757 
2758         if (rdonly(ro, tvp)) {
2759                 resp->status = NFS3ERR_ROFS;
2760                 goto err1;
2761         }
2762 
2763         if (is_system_labeled()) {
2764                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
2765                         if (!do_rfs_label_check(clabel, tvp, EQUALITY_CHECK,
2766                             exi)) {
2767                                 resp->status = NFS3ERR_ACCES;
2768                                 goto err1;
2769                         }
2770                 }
2771         }
2772 
2773         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2774         name = nfscmd_convname(ca, exi, args->from.name,
2775             NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
2776 
2777         if (name == NULL) {
2778                 resp->status = NFS3ERR_INVAL;
2779                 goto err1;
2780         }
2781 
2782         toname = nfscmd_convname(ca, exi, args->to.name,
2783             NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
2784 
2785         if (toname == NULL) {
2786                 resp->status = NFS3ERR_INVAL;
2787                 goto err1;
2788         }
2789 
2790         /*
2791          * Check for a conflict with a non-blocking mandatory share
2792          * reservation or V4 delegations.
2793          */
2794         error = VOP_LOOKUP(fvp, name, &srcvp, NULL, 0,
2795             NULL, cr, NULL, NULL, NULL);
2796         if (error != 0)
2797                 goto err;
2798 
2799         /*
2800          * If we rename a delegated file we should recall the
2801          * delegation, since future opens should fail or would
2802          * refer to a new file.
2803          */
2804         if (rfs4_check_delegated(FWRITE, srcvp, FALSE)) {
2805                 resp->status = NFS3ERR_JUKEBOX;
2806                 goto err1;
2807         }
2808 
2809         /*
2810          * Check for renaming over a delegated file.  Check rfs4_deleg_policy
2811          * first to avoid VOP_LOOKUP if possible.
2812          */
2813         if (rfs4_deleg_policy != SRV_NEVER_DELEGATE &&
2814             VOP_LOOKUP(tvp, toname, &targvp, NULL, 0, NULL, cr,
2815             NULL, NULL, NULL) == 0) {
2816 
2817                 if (rfs4_check_delegated(FWRITE, targvp, TRUE)) {
2818                         VN_RELE(targvp);
2819                         resp->status = NFS3ERR_JUKEBOX;
2820                         goto err1;
2821                 }
2822                 VN_RELE(targvp);
2823         }
2824 
2825         if (!nbl_need_check(srcvp)) {
2826                 error = VOP_RENAME(fvp, name, tvp, toname, cr, NULL, 0);
2827         } else {
2828                 nbl_start_crit(srcvp, RW_READER);
2829                 if (nbl_conflict(srcvp, NBL_RENAME, 0, 0, 0, NULL))
2830                         error = EACCES;
2831                 else
2832                         error = VOP_RENAME(fvp, name, tvp, toname, cr, NULL, 0);
2833                 nbl_end_crit(srcvp);
2834         }
2835         if (error == 0)
2836                 vn_renamepath(tvp, srcvp, args->to.name,
2837                     strlen(args->to.name));
2838         VN_RELE(srcvp);
2839         srcvp = NULL;
2840 
2841         fava.va_mask = AT_ALL;
2842         favap = VOP_GETATTR(fvp, &fava, 0, cr, NULL) ? NULL : &fava;
2843         tava.va_mask = AT_ALL;
2844         tavap = VOP_GETATTR(tvp, &tava, 0, cr, NULL) ? NULL : &tava;
2845 
2846         /*
2847          * Force modified data and metadata out to stable storage.
2848          */
2849         (void) VOP_FSYNC(fvp, 0, cr, NULL);
2850         (void) VOP_FSYNC(tvp, 0, cr, NULL);
2851 
2852         if (error)
2853                 goto err;
2854 
2855         resp->status = NFS3_OK;
2856         vattr_to_wcc_data(fbvap, favap, &resp->resok.fromdir_wcc);
2857         vattr_to_wcc_data(tbvap, tavap, &resp->resok.todir_wcc);
2858         goto out;
2859 
2860 err:
2861         if (curthread->t_flag & T_WOULDBLOCK) {
2862                 curthread->t_flag &= ~T_WOULDBLOCK;
2863                 resp->status = NFS3ERR_JUKEBOX;
2864         } else {
2865                 resp->status = puterrno3(error);
2866         }
2867 err1:
2868         vattr_to_wcc_data(fbvap, favap, &resp->resfail.fromdir_wcc);
2869         vattr_to_wcc_data(tbvap, tavap, &resp->resfail.todir_wcc);
2870 
2871 out:
2872         if (name != NULL && name != args->from.name)
2873                 kmem_free(name, MAXPATHLEN + 1);
2874         if (toname != NULL && toname != args->to.name)
2875                 kmem_free(toname, MAXPATHLEN + 1);
2876 
2877         DTRACE_NFSV3_4(op__rename__done, struct svc_req *, req,
2878             cred_t *, cr, vnode_t *, fvp, RENAME3res *, resp);
2879         if (fvp != NULL)
2880                 VN_RELE(fvp);
2881         if (tvp != NULL)
2882                 VN_RELE(tvp);
2883 }
2884 
2885 void *
2886 rfs3_rename_getfh(RENAME3args *args)
2887 {
2888 
2889         return (&args->from.dir);
2890 }
2891 
2892 void
2893 rfs3_link(LINK3args *args, LINK3res *resp, struct exportinfo *exi,
2894     struct svc_req *req, cred_t *cr, bool_t ro)
2895 {
2896         int error;
2897         vnode_t *vp;
2898         vnode_t *dvp;
2899         struct vattr *vap;
2900         struct vattr va;
2901         struct vattr *bvap;
2902         struct vattr bva;
2903         struct vattr *avap;
2904         struct vattr ava;
2905         nfs_fh3 *fh3;
2906         struct exportinfo *to_exi;
2907         bslabel_t *clabel;
2908         struct sockaddr *ca;
2909         char *name = NULL;
2910 
2911         vap = NULL;
2912         bvap = NULL;
2913         avap = NULL;
2914         dvp = NULL;
2915 
2916         vp = nfs3_fhtovp(&args->file, exi);
2917 
2918         DTRACE_NFSV3_4(op__link__start, struct svc_req *, req,
2919             cred_t *, cr, vnode_t *, vp, LINK3args *, args);
2920 
2921         if (vp == NULL) {
2922                 error = ESTALE;
2923                 goto out;
2924         }
2925 
2926         va.va_mask = AT_ALL;
2927         vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
2928 
2929         fh3 = &args->link.dir;
2930         to_exi = checkexport(&fh3->fh3_fsid, FH3TOXFIDP(fh3));
2931         if (to_exi == NULL) {
2932                 resp->status = NFS3ERR_ACCES;
2933                 goto out1;
2934         }
2935         exi_rele(to_exi);
2936 
2937         if (to_exi != exi) {
2938                 resp->status = NFS3ERR_XDEV;
2939                 goto out1;
2940         }
2941 
2942         if (is_system_labeled()) {
2943                 clabel = req->rq_label;
2944 
2945                 ASSERT(clabel != NULL);
2946                 DTRACE_PROBE2(tx__rfs3__log__info__oplink__clabel, char *,
2947                     "got client label from request(1)", struct svc_req *, req);
2948 
2949                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
2950                         if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
2951                             exi)) {
2952                                 resp->status = NFS3ERR_ACCES;
2953                                 goto out1;
2954                         }
2955                 }
2956         }
2957 
2958         dvp = nfs3_fhtovp(&args->link.dir, exi);
2959         if (dvp == NULL) {
2960                 error = ESTALE;
2961                 goto out;
2962         }
2963 
2964         bva.va_mask = AT_ALL;
2965         bvap = VOP_GETATTR(dvp, &bva, 0, cr, NULL) ? NULL : &bva;
2966 
2967         if (dvp->v_type != VDIR) {
2968                 resp->status = NFS3ERR_NOTDIR;
2969                 goto out1;
2970         }
2971 
2972         if (args->link.name == nfs3nametoolong) {
2973                 resp->status = NFS3ERR_NAMETOOLONG;
2974                 goto out1;
2975         }
2976 
2977         if (args->link.name == NULL || *(args->link.name) == '\0') {
2978                 resp->status = NFS3ERR_ACCES;
2979                 goto out1;
2980         }
2981 
2982         if (rdonly(ro, dvp)) {
2983                 resp->status = NFS3ERR_ROFS;
2984                 goto out1;
2985         }
2986 
2987         if (is_system_labeled()) {
2988                 DTRACE_PROBE2(tx__rfs3__log__info__oplinkdir__clabel, char *,
2989                     "got client label from request(1)", struct svc_req *, req);
2990 
2991                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
2992                         if (!do_rfs_label_check(clabel, dvp, EQUALITY_CHECK,
2993                             exi)) {
2994                                 resp->status = NFS3ERR_ACCES;
2995                                 goto out1;
2996                         }
2997                 }
2998         }
2999 
3000         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
3001         name = nfscmd_convname(ca, exi, args->link.name,
3002             NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
3003 
3004         if (name == NULL) {
3005                 resp->status = NFS3ERR_SERVERFAULT;
3006                 goto out1;
3007         }
3008 
3009         error = VOP_LINK(dvp, vp, name, cr, NULL, 0);
3010 
3011         va.va_mask = AT_ALL;
3012         vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3013         ava.va_mask = AT_ALL;
3014         avap = VOP_GETATTR(dvp, &ava, 0, cr, NULL) ? NULL : &ava;
3015 
3016         /*
3017          * Force modified data and metadata out to stable storage.
3018          */
3019         (void) VOP_FSYNC(vp, FNODSYNC, cr, NULL);
3020         (void) VOP_FSYNC(dvp, 0, cr, NULL);
3021 
3022         if (error)
3023                 goto out;
3024 
3025         VN_RELE(dvp);
3026 
3027         resp->status = NFS3_OK;
3028         vattr_to_post_op_attr(vap, &resp->resok.file_attributes);
3029         vattr_to_wcc_data(bvap, avap, &resp->resok.linkdir_wcc);
3030 
3031         DTRACE_NFSV3_4(op__link__done, struct svc_req *, req,
3032             cred_t *, cr, vnode_t *, vp, LINK3res *, resp);
3033 
3034         VN_RELE(vp);
3035 
3036         return;
3037 
3038 out:
3039         if (curthread->t_flag & T_WOULDBLOCK) {
3040                 curthread->t_flag &= ~T_WOULDBLOCK;
3041                 resp->status = NFS3ERR_JUKEBOX;
3042         } else
3043                 resp->status = puterrno3(error);
3044 out1:
3045         if (name != NULL && name != args->link.name)
3046                 kmem_free(name, MAXPATHLEN + 1);
3047 
3048         DTRACE_NFSV3_4(op__link__done, struct svc_req *, req,
3049             cred_t *, cr, vnode_t *, vp, LINK3res *, resp);
3050 
3051         if (vp != NULL)
3052                 VN_RELE(vp);
3053         if (dvp != NULL)
3054                 VN_RELE(dvp);
3055         vattr_to_post_op_attr(vap, &resp->resfail.file_attributes);
3056         vattr_to_wcc_data(bvap, avap, &resp->resfail.linkdir_wcc);
3057 }
3058 
3059 void *
3060 rfs3_link_getfh(LINK3args *args)
3061 {
3062 
3063         return (&args->file);
3064 }
3065 
3066 /*
3067  * This macro defines the size of a response which contains attribute
3068  * information and one directory entry (whose length is specified by
3069  * the macro parameter).  If the incoming request is larger than this,
3070  * then we are guaranteed to be able to return at one directory entry
3071  * if one exists.  Therefore, we do not need to check for
3072  * NFS3ERR_TOOSMALL if the requested size is larger then this.  If it
3073  * is not, then we need to check to make sure that this error does not
3074  * need to be returned.
3075  *
3076  * NFS3_READDIR_MIN_COUNT is comprised of following :
3077  *
3078  * status - 1 * BYTES_PER_XDR_UNIT
3079  * attr. flag - 1 * BYTES_PER_XDR_UNIT
3080  * cookie verifier - 2 * BYTES_PER_XDR_UNIT
3081  * attributes  - NFS3_SIZEOF_FATTR3 * BYTES_PER_XDR_UNIT
3082  * boolean - 1 * BYTES_PER_XDR_UNIT
3083  * file id - 2 * BYTES_PER_XDR_UNIT
3084  * directory name length - 1 * BYTES_PER_XDR_UNIT
3085  * cookie - 2 * BYTES_PER_XDR_UNIT
3086  * end of list - 1 * BYTES_PER_XDR_UNIT
3087  * end of file - 1 * BYTES_PER_XDR_UNIT
3088  * Name length of directory to the nearest byte
3089  */
3090 
3091 #define NFS3_READDIR_MIN_COUNT(length)  \
3092         ((1 + 1 + 2 + NFS3_SIZEOF_FATTR3 + 1 + 2 + 1 + 2 + 1 + 1) * \
3093                 BYTES_PER_XDR_UNIT + roundup((length), BYTES_PER_XDR_UNIT))
3094 
3095 /* ARGSUSED */
3096 void
3097 rfs3_readdir(READDIR3args *args, READDIR3res *resp, struct exportinfo *exi,
3098     struct svc_req *req, cred_t *cr, bool_t ro)
3099 {
3100         int error;
3101         vnode_t *vp;
3102         struct vattr *vap;
3103         struct vattr va;
3104         struct iovec iov;
3105         struct uio uio;
3106         char *data;
3107         int iseof;
3108         int bufsize;
3109         int namlen;
3110         uint_t count;
3111         struct sockaddr *ca;
3112 
3113         vap = NULL;
3114 
3115         vp = nfs3_fhtovp(&args->dir, exi);
3116 
3117         DTRACE_NFSV3_4(op__readdir__start, struct svc_req *, req,
3118             cred_t *, cr, vnode_t *, vp, READDIR3args *, args);
3119 
3120         if (vp == NULL) {
3121                 error = ESTALE;
3122                 goto out;
3123         }
3124 
3125         if (is_system_labeled()) {
3126                 bslabel_t *clabel = req->rq_label;
3127 
3128                 ASSERT(clabel != NULL);
3129                 DTRACE_PROBE2(tx__rfs3__log__info__opreaddir__clabel, char *,
3130                     "got client label from request(1)", struct svc_req *, req);
3131 
3132                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
3133                         if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
3134                             exi)) {
3135                                 resp->status = NFS3ERR_ACCES;
3136                                 goto out1;
3137                         }
3138                 }
3139         }
3140 
3141         (void) VOP_RWLOCK(vp, V_WRITELOCK_FALSE, NULL);
3142 
3143         va.va_mask = AT_ALL;
3144         vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3145 
3146         if (vp->v_type != VDIR) {
3147                 resp->status = NFS3ERR_NOTDIR;
3148                 goto out1;
3149         }
3150 
3151         error = VOP_ACCESS(vp, VREAD, 0, cr, NULL);
3152         if (error)
3153                 goto out;
3154 
3155         /*
3156          * Now don't allow arbitrary count to alloc;
3157          * allow the maximum not to exceed rfs3_tsize()
3158          */
3159         if (args->count > rfs3_tsize(req))
3160                 args->count = rfs3_tsize(req);
3161 
3162         /*
3163          * Make sure that there is room to read at least one entry
3164          * if any are available.
3165          */
3166         if (args->count < DIRENT64_RECLEN(MAXNAMELEN))
3167                 count = DIRENT64_RECLEN(MAXNAMELEN);
3168         else
3169                 count = args->count;
3170 
3171         data = kmem_alloc(count, KM_SLEEP);
3172 
3173         iov.iov_base = data;
3174         iov.iov_len = count;
3175         uio.uio_iov = &iov;
3176         uio.uio_iovcnt = 1;
3177         uio.uio_segflg = UIO_SYSSPACE;
3178         uio.uio_extflg = UIO_COPY_CACHED;
3179         uio.uio_loffset = (offset_t)args->cookie;
3180         uio.uio_resid = count;
3181 
3182         error = VOP_READDIR(vp, &uio, cr, &iseof, NULL, 0);
3183 
3184         va.va_mask = AT_ALL;
3185         vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3186 
3187         if (error) {
3188                 kmem_free(data, count);
3189                 goto out;
3190         }
3191 
3192         /*
3193          * If the count was not large enough to be able to guarantee
3194          * to be able to return at least one entry, then need to
3195          * check to see if NFS3ERR_TOOSMALL should be returned.
3196          */
3197         if (args->count < NFS3_READDIR_MIN_COUNT(MAXNAMELEN)) {
3198                 /*
3199                  * bufsize is used to keep track of the size of the response.
3200                  * It is primed with:
3201                  *      1 for the status +
3202                  *      1 for the dir_attributes.attributes boolean +
3203                  *      2 for the cookie verifier
3204                  * all times BYTES_PER_XDR_UNIT to convert from XDR units
3205                  * to bytes.  If there are directory attributes to be
3206                  * returned, then:
3207                  *      NFS3_SIZEOF_FATTR3 for the dir_attributes.attr fattr3
3208                  * time BYTES_PER_XDR_UNIT is added to account for them.
3209                  */
3210                 bufsize = (1 + 1 + 2) * BYTES_PER_XDR_UNIT;
3211                 if (vap != NULL)
3212                         bufsize += NFS3_SIZEOF_FATTR3 * BYTES_PER_XDR_UNIT;
3213                 /*
3214                  * An entry is composed of:
3215                  *      1 for the true/false list indicator +
3216                  *      2 for the fileid +
3217                  *      1 for the length of the name +
3218                  *      2 for the cookie +
3219                  * all times BYTES_PER_XDR_UNIT to convert from
3220                  * XDR units to bytes, plus the length of the name
3221                  * rounded up to the nearest BYTES_PER_XDR_UNIT.
3222                  */
3223                 if (count != uio.uio_resid) {
3224                         namlen = strlen(((struct dirent64 *)data)->d_name);
3225                         bufsize += (1 + 2 + 1 + 2) * BYTES_PER_XDR_UNIT +
3226                             roundup(namlen, BYTES_PER_XDR_UNIT);
3227                 }
3228                 /*
3229                  * We need to check to see if the number of bytes left
3230                  * to go into the buffer will actually fit into the
3231                  * buffer.  This is calculated as the size of this
3232                  * entry plus:
3233                  *      1 for the true/false list indicator +
3234                  *      1 for the eof indicator
3235                  * times BYTES_PER_XDR_UNIT to convert from from
3236                  * XDR units to bytes.
3237                  */
3238                 bufsize += (1 + 1) * BYTES_PER_XDR_UNIT;
3239                 if (bufsize > args->count) {
3240                         kmem_free(data, count);
3241                         resp->status = NFS3ERR_TOOSMALL;
3242                         goto out1;
3243                 }
3244         }
3245 
3246         /*
3247          * Have a valid readir buffer for the native character
3248          * set. Need to check if a conversion is necessary and
3249          * potentially rewrite the whole buffer. Note that if the
3250          * conversion expands names enough, the structure may not
3251          * fit. In this case, we need to drop entries until if fits
3252          * and patch the counts in order that the next readdir will
3253          * get the correct entries.
3254          */
3255         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
3256         data = nfscmd_convdirent(ca, exi, data, count, &resp->status);
3257 
3258 
3259         VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
3260 
3261 #if 0 /* notyet */
3262         /*
3263          * Don't do this.  It causes local disk writes when just
3264          * reading the file and the overhead is deemed larger
3265          * than the benefit.
3266          */
3267         /*
3268          * Force modified metadata out to stable storage.
3269          */
3270         (void) VOP_FSYNC(vp, FNODSYNC, cr, NULL);
3271 #endif
3272 
3273         resp->status = NFS3_OK;
3274         vattr_to_post_op_attr(vap, &resp->resok.dir_attributes);
3275         resp->resok.cookieverf = 0;
3276         resp->resok.reply.entries = (entry3 *)data;
3277         resp->resok.reply.eof = iseof;
3278         resp->resok.size = count - uio.uio_resid;
3279         resp->resok.count = args->count;
3280         resp->resok.freecount = count;
3281 
3282         DTRACE_NFSV3_4(op__readdir__done, struct svc_req *, req,
3283             cred_t *, cr, vnode_t *, vp, READDIR3res *, resp);
3284 
3285         VN_RELE(vp);
3286 
3287         return;
3288 
3289 out:
3290         if (curthread->t_flag & T_WOULDBLOCK) {
3291                 curthread->t_flag &= ~T_WOULDBLOCK;
3292                 resp->status = NFS3ERR_JUKEBOX;
3293         } else
3294                 resp->status = puterrno3(error);
3295 out1:
3296         DTRACE_NFSV3_4(op__readdir__done, struct svc_req *, req,
3297             cred_t *, cr, vnode_t *, vp, READDIR3res *, resp);
3298 
3299         if (vp != NULL) {
3300                 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
3301                 VN_RELE(vp);
3302         }
3303         vattr_to_post_op_attr(vap, &resp->resfail.dir_attributes);
3304 }
3305 
3306 void *
3307 rfs3_readdir_getfh(READDIR3args *args)
3308 {
3309 
3310         return (&args->dir);
3311 }
3312 
3313 void
3314 rfs3_readdir_free(READDIR3res *resp)
3315 {
3316 
3317         if (resp->status == NFS3_OK)
3318                 kmem_free(resp->resok.reply.entries, resp->resok.freecount);
3319 }
3320 
3321 #ifdef nextdp
3322 #undef nextdp
3323 #endif
3324 #define nextdp(dp)      ((struct dirent64 *)((char *)(dp) + (dp)->d_reclen))
3325 
3326 /*
3327  * This macro computes the size of a response which contains
3328  * one directory entry including the attributes as well as file handle.
3329  * If the incoming request is larger than this, then we are guaranteed to be
3330  * able to return at least one more directory entry if one exists.
3331  *
3332  * NFS3_READDIRPLUS_ENTRY is made up of the following:
3333  *
3334  * boolean - 1 * BYTES_PER_XDR_UNIT
3335  * file id - 2 * BYTES_PER_XDR_UNIT
3336  * directory name length - 1 * BYTES_PER_XDR_UNIT
3337  * cookie - 2 * BYTES_PER_XDR_UNIT
3338  * attribute flag - 1 * BYTES_PER_XDR_UNIT
3339  * attributes - NFS3_SIZEOF_FATTR3 * BYTES_PER_XDR_UNIT
3340  * status byte for file handle - 1 *  BYTES_PER_XDR_UNIT
3341  * length of a file handle - 1 * BYTES_PER_XDR_UNIT
3342  * Maximum length of a file handle (NFS3_MAXFHSIZE)
3343  * name length of the entry to the nearest bytes
3344  */
3345 #define NFS3_READDIRPLUS_ENTRY(namelen) \
3346         ((1 + 2 + 1 + 2 + 1 + NFS3_SIZEOF_FATTR3 + 1 + 1) * \
3347                 BYTES_PER_XDR_UNIT + \
3348         NFS3_MAXFHSIZE + roundup(namelen, BYTES_PER_XDR_UNIT))
3349 
3350 static int rfs3_readdir_unit = MAXBSIZE;
3351 
3352 /* ARGSUSED */
3353 void
3354 rfs3_readdirplus(READDIRPLUS3args *args, READDIRPLUS3res *resp,
3355     struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
3356 {
3357         int error;
3358         vnode_t *vp;
3359         struct vattr *vap;
3360         struct vattr va;
3361         struct iovec iov;
3362         struct uio uio;
3363         char *data;
3364         int iseof;
3365         struct dirent64 *dp;
3366         vnode_t *nvp;
3367         struct vattr *nvap;
3368         struct vattr nva;
3369         entryplus3_info *infop = NULL;
3370         int size = 0;
3371         int nents = 0;
3372         int bufsize = 0;
3373         int entrysize = 0;
3374         int tofit = 0;
3375         int rd_unit = rfs3_readdir_unit;
3376         int prev_len;
3377         int space_left;
3378         int i;
3379         uint_t *namlen = NULL;
3380         char *ndata = NULL;
3381         struct sockaddr *ca;
3382         size_t ret;
3383 
3384         vap = NULL;
3385 
3386         vp = nfs3_fhtovp(&args->dir, exi);
3387 
3388         DTRACE_NFSV3_4(op__readdirplus__start, struct svc_req *, req,
3389             cred_t *, cr, vnode_t *, vp, READDIRPLUS3args *, args);
3390 
3391         if (vp == NULL) {
3392                 error = ESTALE;
3393                 goto out;
3394         }
3395 
3396         if (is_system_labeled()) {
3397                 bslabel_t *clabel = req->rq_label;
3398 
3399                 ASSERT(clabel != NULL);
3400                 DTRACE_PROBE2(tx__rfs3__log__info__opreaddirplus__clabel,
3401                     char *, "got client label from request(1)",
3402                     struct svc_req *, req);
3403 
3404                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
3405                         if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
3406                             exi)) {
3407                                 resp->status = NFS3ERR_ACCES;
3408                                 goto out1;
3409                         }
3410                 }
3411         }
3412 
3413         (void) VOP_RWLOCK(vp, V_WRITELOCK_FALSE, NULL);
3414 
3415         va.va_mask = AT_ALL;
3416         vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3417 
3418         if (vp->v_type != VDIR) {
3419                 error = ENOTDIR;
3420                 goto out;
3421         }
3422 
3423         error = VOP_ACCESS(vp, VREAD, 0, cr, NULL);
3424         if (error)
3425                 goto out;
3426 
3427         /*
3428          * Don't allow arbitrary counts for allocation
3429          */
3430         if (args->maxcount > rfs3_tsize(req))
3431                 args->maxcount = rfs3_tsize(req);
3432 
3433         /*
3434          * Make sure that there is room to read at least one entry
3435          * if any are available
3436          */
3437         args->dircount = MIN(args->dircount, args->maxcount);
3438 
3439         if (args->dircount < DIRENT64_RECLEN(MAXNAMELEN))
3440                 args->dircount = DIRENT64_RECLEN(MAXNAMELEN);
3441 
3442         /*
3443          * This allocation relies on a minimum directory entry
3444          * being roughly 24 bytes.  Therefore, the namlen array
3445          * will have enough space based on the maximum number of
3446          * entries to read.
3447          */
3448         namlen = kmem_alloc(args->dircount, KM_SLEEP);
3449 
3450         space_left = args->dircount;
3451         data = kmem_alloc(args->dircount, KM_SLEEP);
3452         dp = (struct dirent64 *)data;
3453         uio.uio_iov = &iov;
3454         uio.uio_iovcnt = 1;
3455         uio.uio_segflg = UIO_SYSSPACE;
3456         uio.uio_extflg = UIO_COPY_CACHED;
3457         uio.uio_loffset = (offset_t)args->cookie;
3458 
3459         /*
3460          * bufsize is used to keep track of the size of the response as we
3461          * get post op attributes and filehandles for each entry.  This is
3462          * an optimization as the server may have read more entries than will
3463          * fit in the buffer specified by maxcount.  We stop calculating
3464          * post op attributes and filehandles once we have exceeded maxcount.
3465          * This will minimize the effect of truncation.
3466          *
3467          * It is primed with:
3468          *      1 for the status +
3469          *      1 for the dir_attributes.attributes boolean +
3470          *      2 for the cookie verifier
3471          * all times BYTES_PER_XDR_UNIT to convert from XDR units
3472          * to bytes.  If there are directory attributes to be
3473          * returned, then:
3474          *      NFS3_SIZEOF_FATTR3 for the dir_attributes.attr fattr3
3475          * time BYTES_PER_XDR_UNIT is added to account for them.
3476          */
3477         bufsize = (1 + 1 + 2) * BYTES_PER_XDR_UNIT;
3478         if (vap != NULL)
3479                 bufsize += NFS3_SIZEOF_FATTR3 * BYTES_PER_XDR_UNIT;
3480 
3481 getmoredents:
3482         /*
3483          * Here we make a check so that our read unit is not larger than
3484          * the space left in the buffer.
3485          */
3486         rd_unit = MIN(rd_unit, space_left);
3487         iov.iov_base = (char *)dp;
3488         iov.iov_len = rd_unit;
3489         uio.uio_resid = rd_unit;
3490         prev_len = rd_unit;
3491 
3492         error = VOP_READDIR(vp, &uio, cr, &iseof, NULL, 0);
3493 
3494         if (error) {
3495                 kmem_free(data, args->dircount);
3496                 goto out;
3497         }
3498 
3499         if (uio.uio_resid == prev_len && !iseof) {
3500                 if (nents == 0) {
3501                         kmem_free(data, args->dircount);
3502                         resp->status = NFS3ERR_TOOSMALL;
3503                         goto out1;
3504                 }
3505 
3506                 /*
3507                  * We could not get any more entries, so get the attributes
3508                  * and filehandle for the entries already obtained.
3509                  */
3510                 goto good;
3511         }
3512 
3513         /*
3514          * We estimate the size of the response by assuming the
3515          * entry exists and attributes and filehandle are also valid
3516          */
3517         for (size = prev_len - uio.uio_resid;
3518             size > 0;
3519             size -= dp->d_reclen, dp = nextdp(dp)) {
3520 
3521                 if (dp->d_ino == 0) {
3522                         nents++;
3523                         continue;
3524                 }
3525 
3526                 namlen[nents] = strlen(dp->d_name);
3527                 entrysize = NFS3_READDIRPLUS_ENTRY(namlen[nents]);
3528 
3529                 /*
3530                  * We need to check to see if the number of bytes left
3531                  * to go into the buffer will actually fit into the
3532                  * buffer.  This is calculated as the size of this
3533                  * entry plus:
3534                  *      1 for the true/false list indicator +
3535                  *      1 for the eof indicator
3536                  * times BYTES_PER_XDR_UNIT to convert from XDR units
3537                  * to bytes.
3538                  *
3539                  * Also check the dircount limit against the first entry read
3540                  *
3541                  */
3542                 tofit = entrysize + (1 + 1) * BYTES_PER_XDR_UNIT;
3543                 if (bufsize + tofit > args->maxcount) {
3544                         /*
3545                          * We make a check here to see if this was the
3546                          * first entry being measured.  If so, then maxcount
3547                          * was too small to begin with and so we need to
3548                          * return with NFS3ERR_TOOSMALL.
3549                          */
3550                         if (nents == 0) {
3551                                 kmem_free(data, args->dircount);
3552                                 resp->status = NFS3ERR_TOOSMALL;
3553                                 goto out1;
3554                         }
3555                         iseof = FALSE;
3556                         goto good;
3557                 }
3558                 bufsize += entrysize;
3559                 nents++;
3560         }
3561 
3562         /*
3563          * If there is enough room to fit at least 1 more entry including
3564          * post op attributes and filehandle in the buffer AND that we haven't
3565          * exceeded dircount then go back and get some more.
3566          */
3567         if (!iseof &&
3568             (args->maxcount - bufsize) >= NFS3_READDIRPLUS_ENTRY(MAXNAMELEN)) {
3569                 space_left -= (prev_len - uio.uio_resid);
3570                 if (space_left >= DIRENT64_RECLEN(MAXNAMELEN))
3571                         goto getmoredents;
3572 
3573                 /* else, fall through */
3574         }
3575 good:
3576         va.va_mask = AT_ALL;
3577         vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3578 
3579         VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
3580 
3581         infop = kmem_alloc(nents * sizeof (struct entryplus3_info), KM_SLEEP);
3582         resp->resok.infop = infop;
3583 
3584         dp = (struct dirent64 *)data;
3585         for (i = 0; i < nents; i++) {
3586 
3587                 if (dp->d_ino == 0) {
3588                         infop[i].attr.attributes = FALSE;
3589                         infop[i].fh.handle_follows = FALSE;
3590                         dp = nextdp(dp);
3591                         continue;
3592                 }
3593 
3594                 infop[i].namelen = namlen[i];
3595 
3596                 error = VOP_LOOKUP(vp, dp->d_name, &nvp, NULL, 0, NULL, cr,
3597                     NULL, NULL, NULL);
3598                 if (error) {
3599                         infop[i].attr.attributes = FALSE;
3600                         infop[i].fh.handle_follows = FALSE;
3601                         dp = nextdp(dp);
3602                         continue;
3603                 }
3604 
3605                 nva.va_mask = AT_ALL;
3606                 nvap = rfs4_delegated_getattr(nvp, &nva, 0, cr) ? NULL : &nva;
3607 
3608                 /* Lie about the object type for a referral */
3609                 if (vn_is_nfs_reparse(nvp, cr))
3610                         nvap->va_type = VLNK;
3611 
3612                 vattr_to_post_op_attr(nvap, &infop[i].attr);
3613 
3614                 error = makefh3(&infop[i].fh.handle, nvp, exi);
3615                 if (!error)
3616                         infop[i].fh.handle_follows = TRUE;
3617                 else
3618                         infop[i].fh.handle_follows = FALSE;
3619 
3620                 VN_RELE(nvp);
3621                 dp = nextdp(dp);
3622         }
3623 
3624         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
3625         ret = nfscmd_convdirplus(ca, exi, data, nents, args->dircount, &ndata);
3626         if (ndata == NULL)
3627                 ndata = data;
3628 
3629         if (ret > 0) {
3630                 /*
3631                  * We had to drop one or more entries in order to fit
3632                  * during the character conversion.  We need to patch
3633                  * up the size and eof info.
3634                  */
3635                 if (iseof)
3636                         iseof = FALSE;
3637 
3638                 ret = nfscmd_dropped_entrysize((struct dirent64 *)data,
3639                     nents, ret);
3640         }
3641 
3642 
3643 #if 0 /* notyet */
3644         /*
3645          * Don't do this.  It causes local disk writes when just
3646          * reading the file and the overhead is deemed larger
3647          * than the benefit.
3648          */
3649         /*
3650          * Force modified metadata out to stable storage.
3651          */
3652         (void) VOP_FSYNC(vp, FNODSYNC, cr, NULL);
3653 #endif
3654 
3655         kmem_free(namlen, args->dircount);
3656 
3657         resp->status = NFS3_OK;
3658         vattr_to_post_op_attr(vap, &resp->resok.dir_attributes);
3659         resp->resok.cookieverf = 0;
3660         resp->resok.reply.entries = (entryplus3 *)ndata;
3661         resp->resok.reply.eof = iseof;
3662         resp->resok.size = nents;
3663         resp->resok.count = args->dircount - ret;
3664         resp->resok.maxcount = args->maxcount;
3665 
3666         DTRACE_NFSV3_4(op__readdirplus__done, struct svc_req *, req,
3667             cred_t *, cr, vnode_t *, vp, READDIRPLUS3res *, resp);
3668         if (ndata != data)
3669                 kmem_free(data, args->dircount);
3670 
3671 
3672         VN_RELE(vp);
3673 
3674         return;
3675 
3676 out:
3677         if (curthread->t_flag & T_WOULDBLOCK) {
3678                 curthread->t_flag &= ~T_WOULDBLOCK;
3679                 resp->status = NFS3ERR_JUKEBOX;
3680         } else {
3681                 resp->status = puterrno3(error);
3682         }
3683 out1:
3684         DTRACE_NFSV3_4(op__readdirplus__done, struct svc_req *, req,
3685             cred_t *, cr, vnode_t *, vp, READDIRPLUS3res *, resp);
3686 
3687         if (vp != NULL) {
3688                 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
3689                 VN_RELE(vp);
3690         }
3691 
3692         if (namlen != NULL)
3693                 kmem_free(namlen, args->dircount);
3694 
3695         vattr_to_post_op_attr(vap, &resp->resfail.dir_attributes);
3696 }
3697 
3698 void *
3699 rfs3_readdirplus_getfh(READDIRPLUS3args *args)
3700 {
3701 
3702         return (&args->dir);
3703 }
3704 
3705 void
3706 rfs3_readdirplus_free(READDIRPLUS3res *resp)
3707 {
3708 
3709         if (resp->status == NFS3_OK) {
3710                 kmem_free(resp->resok.reply.entries, resp->resok.count);
3711                 kmem_free(resp->resok.infop,
3712                     resp->resok.size * sizeof (struct entryplus3_info));
3713         }
3714 }
3715 
3716 /* ARGSUSED */
3717 void
3718 rfs3_fsstat(FSSTAT3args *args, FSSTAT3res *resp, struct exportinfo *exi,
3719     struct svc_req *req, cred_t *cr, bool_t ro)
3720 {
3721         int error;
3722         vnode_t *vp;
3723         struct vattr *vap;
3724         struct vattr va;
3725         struct statvfs64 sb;
3726 
3727         vap = NULL;
3728 
3729         vp = nfs3_fhtovp(&args->fsroot, exi);
3730 
3731         DTRACE_NFSV3_4(op__fsstat__start, struct svc_req *, req,
3732             cred_t *, cr, vnode_t *, vp, FSSTAT3args *, args);
3733 
3734         if (vp == NULL) {
3735                 error = ESTALE;
3736                 goto out;
3737         }
3738 
3739         if (is_system_labeled()) {
3740                 bslabel_t *clabel = req->rq_label;
3741 
3742                 ASSERT(clabel != NULL);
3743                 DTRACE_PROBE2(tx__rfs3__log__info__opfsstat__clabel, char *,
3744                     "got client label from request(1)", struct svc_req *, req);
3745 
3746                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
3747                         if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
3748                             exi)) {
3749                                 resp->status = NFS3ERR_ACCES;
3750                                 goto out1;
3751                         }
3752                 }
3753         }
3754 
3755         error = VFS_STATVFS(vp->v_vfsp, &sb);
3756 
3757         va.va_mask = AT_ALL;
3758         vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3759 
3760         if (error)
3761                 goto out;
3762 
3763         resp->status = NFS3_OK;
3764         vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
3765         if (sb.f_blocks != (fsblkcnt64_t)-1)
3766                 resp->resok.tbytes = (size3)sb.f_frsize * (size3)sb.f_blocks;
3767         else
3768                 resp->resok.tbytes = (size3)sb.f_blocks;
3769         if (sb.f_bfree != (fsblkcnt64_t)-1)
3770                 resp->resok.fbytes = (size3)sb.f_frsize * (size3)sb.f_bfree;
3771         else
3772                 resp->resok.fbytes = (size3)sb.f_bfree;
3773         if (sb.f_bavail != (fsblkcnt64_t)-1)
3774                 resp->resok.abytes = (size3)sb.f_frsize * (size3)sb.f_bavail;
3775         else
3776                 resp->resok.abytes = (size3)sb.f_bavail;
3777         resp->resok.tfiles = (size3)sb.f_files;
3778         resp->resok.ffiles = (size3)sb.f_ffree;
3779         resp->resok.afiles = (size3)sb.f_favail;
3780         resp->resok.invarsec = 0;
3781 
3782         DTRACE_NFSV3_4(op__fsstat__done, struct svc_req *, req,
3783             cred_t *, cr, vnode_t *, vp, FSSTAT3res *, resp);
3784         VN_RELE(vp);
3785 
3786         return;
3787 
3788 out:
3789         if (curthread->t_flag & T_WOULDBLOCK) {
3790                 curthread->t_flag &= ~T_WOULDBLOCK;
3791                 resp->status = NFS3ERR_JUKEBOX;
3792         } else
3793                 resp->status = puterrno3(error);
3794 out1:
3795         DTRACE_NFSV3_4(op__fsstat__done, struct svc_req *, req,
3796             cred_t *, cr, vnode_t *, vp, FSSTAT3res *, resp);
3797 
3798         if (vp != NULL)
3799                 VN_RELE(vp);
3800         vattr_to_post_op_attr(vap, &resp->resfail.obj_attributes);
3801 }
3802 
3803 void *
3804 rfs3_fsstat_getfh(FSSTAT3args *args)
3805 {
3806 
3807         return (&args->fsroot);
3808 }
3809 
3810 /* ARGSUSED */
3811 void
3812 rfs3_fsinfo(FSINFO3args *args, FSINFO3res *resp, struct exportinfo *exi,
3813     struct svc_req *req, cred_t *cr, bool_t ro)
3814 {
3815         vnode_t *vp;
3816         struct vattr *vap;
3817         struct vattr va;
3818         uint32_t xfer_size;
3819         ulong_t l = 0;
3820         int error;
3821 
3822         vp = nfs3_fhtovp(&args->fsroot, exi);
3823 
3824         DTRACE_NFSV3_4(op__fsinfo__start, struct svc_req *, req,
3825             cred_t *, cr, vnode_t *, vp, FSINFO3args *, args);
3826 
3827         if (vp == NULL) {
3828                 if (curthread->t_flag & T_WOULDBLOCK) {
3829                         curthread->t_flag &= ~T_WOULDBLOCK;
3830                         resp->status = NFS3ERR_JUKEBOX;
3831                 } else
3832                         resp->status = NFS3ERR_STALE;
3833                 vattr_to_post_op_attr(NULL, &resp->resfail.obj_attributes);
3834                 goto out;
3835         }
3836 
3837         if (is_system_labeled()) {
3838                 bslabel_t *clabel = req->rq_label;
3839 
3840                 ASSERT(clabel != NULL);
3841                 DTRACE_PROBE2(tx__rfs3__log__info__opfsinfo__clabel, char *,
3842                     "got client label from request(1)", struct svc_req *, req);
3843 
3844                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
3845                         if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
3846                             exi)) {
3847                                 resp->status = NFS3ERR_STALE;
3848                                 vattr_to_post_op_attr(NULL,
3849                                     &resp->resfail.obj_attributes);
3850                                 goto out;
3851                         }
3852                 }
3853         }
3854 
3855         va.va_mask = AT_ALL;
3856         vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3857 
3858         resp->status = NFS3_OK;
3859         vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
3860         xfer_size = rfs3_tsize(req);
3861         resp->resok.rtmax = xfer_size;
3862         resp->resok.rtpref = xfer_size;
3863         resp->resok.rtmult = DEV_BSIZE;
3864         resp->resok.wtmax = xfer_size;
3865         resp->resok.wtpref = xfer_size;
3866         resp->resok.wtmult = DEV_BSIZE;
3867         resp->resok.dtpref = MAXBSIZE;
3868 
3869         /*
3870          * Large file spec: want maxfilesize based on limit of
3871          * underlying filesystem.  We can guess 2^31-1 if need be.
3872          */
3873         error = VOP_PATHCONF(vp, _PC_FILESIZEBITS, &l, cr, NULL);
3874         if (error) {
3875                 resp->status = puterrno3(error);
3876                 goto out;
3877         }
3878 
3879         /*
3880          * If the underlying file system does not support _PC_FILESIZEBITS,
3881          * return a reasonable default. Note that error code on VOP_PATHCONF
3882          * will be 0, even if the underlying file system does not support
3883          * _PC_FILESIZEBITS.
3884          */
3885         if (l == (ulong_t)-1) {
3886                 resp->resok.maxfilesize = MAXOFF32_T;
3887         } else {
3888                 if (l >= (sizeof (uint64_t) * 8))
3889                         resp->resok.maxfilesize = INT64_MAX;
3890                 else
3891                         resp->resok.maxfilesize = (1LL << (l-1)) - 1;
3892         }
3893 
3894         resp->resok.time_delta.seconds = 0;
3895         resp->resok.time_delta.nseconds = 1000;
3896         resp->resok.properties = FSF3_LINK | FSF3_SYMLINK |
3897             FSF3_HOMOGENEOUS | FSF3_CANSETTIME;
3898 
3899         DTRACE_NFSV3_4(op__fsinfo__done, struct svc_req *, req,
3900             cred_t *, cr, vnode_t *, vp, FSINFO3res *, resp);
3901 
3902         VN_RELE(vp);
3903 
3904         return;
3905 
3906 out:
3907         DTRACE_NFSV3_4(op__fsinfo__done, struct svc_req *, req,
3908             cred_t *, cr, vnode_t *, NULL, FSINFO3res *, resp);
3909         if (vp != NULL)
3910                 VN_RELE(vp);
3911 }
3912 
3913 void *
3914 rfs3_fsinfo_getfh(FSINFO3args *args)
3915 {
3916         return (&args->fsroot);
3917 }
3918 
3919 /* ARGSUSED */
3920 void
3921 rfs3_pathconf(PATHCONF3args *args, PATHCONF3res *resp, struct exportinfo *exi,
3922     struct svc_req *req, cred_t *cr, bool_t ro)
3923 {
3924         int error;
3925         vnode_t *vp;
3926         struct vattr *vap;
3927         struct vattr va;
3928         ulong_t val;
3929 
3930         vap = NULL;
3931 
3932         vp = nfs3_fhtovp(&args->object, exi);
3933 
3934         DTRACE_NFSV3_4(op__pathconf__start, struct svc_req *, req,
3935             cred_t *, cr, vnode_t *, vp, PATHCONF3args *, args);
3936 
3937         if (vp == NULL) {
3938                 error = ESTALE;
3939                 goto out;
3940         }
3941 
3942         if (is_system_labeled()) {
3943                 bslabel_t *clabel = req->rq_label;
3944 
3945                 ASSERT(clabel != NULL);
3946                 DTRACE_PROBE2(tx__rfs3__log__info__oppathconf__clabel, char *,
3947                     "got client label from request(1)", struct svc_req *, req);
3948 
3949                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
3950                         if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
3951                             exi)) {
3952                                 resp->status = NFS3ERR_ACCES;
3953                                 goto out1;
3954                         }
3955                 }
3956         }
3957 
3958         va.va_mask = AT_ALL;
3959         vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3960 
3961         error = VOP_PATHCONF(vp, _PC_LINK_MAX, &val, cr, NULL);
3962         if (error)
3963                 goto out;
3964         resp->resok.info.link_max = (uint32)val;
3965 
3966         error = VOP_PATHCONF(vp, _PC_NAME_MAX, &val, cr, NULL);
3967         if (error)
3968                 goto out;
3969         resp->resok.info.name_max = (uint32)val;
3970 
3971         error = VOP_PATHCONF(vp, _PC_NO_TRUNC, &val, cr, NULL);
3972         if (error)
3973                 goto out;
3974         if (val == 1)
3975                 resp->resok.info.no_trunc = TRUE;
3976         else
3977                 resp->resok.info.no_trunc = FALSE;
3978 
3979         error = VOP_PATHCONF(vp, _PC_CHOWN_RESTRICTED, &val, cr, NULL);
3980         if (error)
3981                 goto out;
3982         if (val == 1)
3983                 resp->resok.info.chown_restricted = TRUE;
3984         else
3985                 resp->resok.info.chown_restricted = FALSE;
3986 
3987         resp->status = NFS3_OK;
3988         vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
3989         resp->resok.info.case_insensitive = FALSE;
3990         resp->resok.info.case_preserving = TRUE;
3991         DTRACE_NFSV3_4(op__pathconf__done, struct svc_req *, req,
3992             cred_t *, cr, vnode_t *, vp, PATHCONF3res *, resp);
3993         VN_RELE(vp);
3994         return;
3995 
3996 out:
3997         if (curthread->t_flag & T_WOULDBLOCK) {
3998                 curthread->t_flag &= ~T_WOULDBLOCK;
3999                 resp->status = NFS3ERR_JUKEBOX;
4000         } else
4001                 resp->status = puterrno3(error);
4002 out1:
4003         DTRACE_NFSV3_4(op__pathconf__done, struct svc_req *, req,
4004             cred_t *, cr, vnode_t *, vp, PATHCONF3res *, resp);
4005         if (vp != NULL)
4006                 VN_RELE(vp);
4007         vattr_to_post_op_attr(vap, &resp->resfail.obj_attributes);
4008 }
4009 
4010 void *
4011 rfs3_pathconf_getfh(PATHCONF3args *args)
4012 {
4013 
4014         return (&args->object);
4015 }
4016 
4017 void
4018 rfs3_commit(COMMIT3args *args, COMMIT3res *resp, struct exportinfo *exi,
4019     struct svc_req *req, cred_t *cr, bool_t ro)
4020 {
4021         int error;
4022         vnode_t *vp;
4023         struct vattr *bvap;
4024         struct vattr bva;
4025         struct vattr *avap;
4026         struct vattr ava;
4027 
4028         bvap = NULL;
4029         avap = NULL;
4030 
4031         vp = nfs3_fhtovp(&args->file, exi);
4032 
4033         DTRACE_NFSV3_4(op__commit__start, struct svc_req *, req,
4034             cred_t *, cr, vnode_t *, vp, COMMIT3args *, args);
4035 
4036         if (vp == NULL) {
4037                 error = ESTALE;
4038                 goto out;
4039         }
4040 
4041         bva.va_mask = AT_ALL;
4042         error = VOP_GETATTR(vp, &bva, 0, cr, NULL);
4043 
4044         /*
4045          * If we can't get the attributes, then we can't do the
4046          * right access checking.  So, we'll fail the request.
4047          */
4048         if (error)
4049                 goto out;
4050 
4051         bvap = &bva;
4052 
4053         if (rdonly(ro, vp)) {
4054                 resp->status = NFS3ERR_ROFS;
4055                 goto out1;
4056         }
4057 
4058         if (vp->v_type != VREG) {
4059                 resp->status = NFS3ERR_INVAL;
4060                 goto out1;
4061         }
4062 
4063         if (is_system_labeled()) {
4064                 bslabel_t *clabel = req->rq_label;
4065 
4066                 ASSERT(clabel != NULL);
4067                 DTRACE_PROBE2(tx__rfs3__log__info__opcommit__clabel, char *,
4068                     "got client label from request(1)", struct svc_req *, req);
4069 
4070                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
4071                         if (!do_rfs_label_check(clabel, vp, EQUALITY_CHECK,
4072                             exi)) {
4073                                 resp->status = NFS3ERR_ACCES;
4074                                 goto out1;
4075                         }
4076                 }
4077         }
4078 
4079         if (crgetuid(cr) != bva.va_uid &&
4080             (error = VOP_ACCESS(vp, VWRITE, 0, cr, NULL)))
4081                 goto out;
4082 
4083         error = VOP_FSYNC(vp, FSYNC, cr, NULL);
4084 
4085         ava.va_mask = AT_ALL;
4086         avap = VOP_GETATTR(vp, &ava, 0, cr, NULL) ? NULL : &ava;
4087 
4088         if (error)
4089                 goto out;
4090 
4091         resp->status = NFS3_OK;
4092         vattr_to_wcc_data(bvap, avap, &resp->resok.file_wcc);
4093         resp->resok.verf = write3verf;
4094 
4095         DTRACE_NFSV3_4(op__commit__done, struct svc_req *, req,
4096             cred_t *, cr, vnode_t *, vp, COMMIT3res *, resp);
4097 
4098         VN_RELE(vp);
4099 
4100         return;
4101 
4102 out:
4103         if (curthread->t_flag & T_WOULDBLOCK) {
4104                 curthread->t_flag &= ~T_WOULDBLOCK;
4105                 resp->status = NFS3ERR_JUKEBOX;
4106         } else
4107                 resp->status = puterrno3(error);
4108 out1:
4109         DTRACE_NFSV3_4(op__commit__done, struct svc_req *, req,
4110             cred_t *, cr, vnode_t *, vp, COMMIT3res *, resp);
4111 
4112         if (vp != NULL)
4113                 VN_RELE(vp);
4114         vattr_to_wcc_data(bvap, avap, &resp->resfail.file_wcc);
4115 }
4116 
4117 void *
4118 rfs3_commit_getfh(COMMIT3args *args)
4119 {
4120 
4121         return (&args->file);
4122 }
4123 
4124 static int
4125 sattr3_to_vattr(sattr3 *sap, struct vattr *vap)
4126 {
4127 
4128         vap->va_mask = 0;
4129 
4130         if (sap->mode.set_it) {
4131                 vap->va_mode = (mode_t)sap->mode.mode;
4132                 vap->va_mask |= AT_MODE;
4133         }
4134         if (sap->uid.set_it) {
4135                 vap->va_uid = (uid_t)sap->uid.uid;
4136                 vap->va_mask |= AT_UID;
4137         }
4138         if (sap->gid.set_it) {
4139                 vap->va_gid = (gid_t)sap->gid.gid;
4140                 vap->va_mask |= AT_GID;
4141         }
4142         if (sap->size.set_it) {
4143                 if (sap->size.size > (size3)((u_longlong_t)-1))
4144                         return (EINVAL);
4145                 vap->va_size = sap->size.size;
4146                 vap->va_mask |= AT_SIZE;
4147         }
4148         if (sap->atime.set_it == SET_TO_CLIENT_TIME) {
4149 #ifndef _LP64
4150                 /* check time validity */
4151                 if (!NFS3_TIME_OK(sap->atime.atime.seconds))
4152                         return (EOVERFLOW);
4153 #endif
4154                 /*
4155                  * nfs protocol defines times as unsigned so don't extend sign,
4156                  * unless sysadmin set nfs_allow_preepoch_time.
4157                  */
4158                 NFS_TIME_T_CONVERT(vap->va_atime.tv_sec,
4159                     sap->atime.atime.seconds);
4160                 vap->va_atime.tv_nsec = (uint32_t)sap->atime.atime.nseconds;
4161                 vap->va_mask |= AT_ATIME;
4162         } else if (sap->atime.set_it == SET_TO_SERVER_TIME) {
4163                 gethrestime(&vap->va_atime);
4164                 vap->va_mask |= AT_ATIME;
4165         }
4166         if (sap->mtime.set_it == SET_TO_CLIENT_TIME) {
4167 #ifndef _LP64
4168                 /* check time validity */
4169                 if (!NFS3_TIME_OK(sap->mtime.mtime.seconds))
4170                         return (EOVERFLOW);
4171 #endif
4172                 /*
4173                  * nfs protocol defines times as unsigned so don't extend sign,
4174                  * unless sysadmin set nfs_allow_preepoch_time.
4175                  */
4176                 NFS_TIME_T_CONVERT(vap->va_mtime.tv_sec,
4177                     sap->mtime.mtime.seconds);
4178                 vap->va_mtime.tv_nsec = (uint32_t)sap->mtime.mtime.nseconds;
4179                 vap->va_mask |= AT_MTIME;
4180         } else if (sap->mtime.set_it == SET_TO_SERVER_TIME) {
4181                 gethrestime(&vap->va_mtime);
4182                 vap->va_mask |= AT_MTIME;
4183         }
4184 
4185         return (0);
4186 }
4187 
4188 static ftype3 vt_to_nf3[] = {
4189         0, NF3REG, NF3DIR, NF3BLK, NF3CHR, NF3LNK, NF3FIFO, 0, 0, NF3SOCK, 0
4190 };
4191 
4192 static int
4193 vattr_to_fattr3(struct vattr *vap, fattr3 *fap)
4194 {
4195 
4196         ASSERT(vap->va_type >= VNON && vap->va_type <= VBAD);
4197         /* Return error if time or size overflow */
4198         if (! (NFS_VAP_TIME_OK(vap) && NFS3_SIZE_OK(vap->va_size))) {
4199                 return (EOVERFLOW);
4200         }
4201         fap->type = vt_to_nf3[vap->va_type];
4202         fap->mode = (mode3)(vap->va_mode & MODEMASK);
4203         fap->nlink = (uint32)vap->va_nlink;
4204         if (vap->va_uid == UID_NOBODY)
4205                 fap->uid = (uid3)NFS_UID_NOBODY;
4206         else
4207                 fap->uid = (uid3)vap->va_uid;
4208         if (vap->va_gid == GID_NOBODY)
4209                 fap->gid = (gid3)NFS_GID_NOBODY;
4210         else
4211                 fap->gid = (gid3)vap->va_gid;
4212         fap->size = (size3)vap->va_size;
4213         fap->used = (size3)DEV_BSIZE * (size3)vap->va_nblocks;
4214         fap->rdev.specdata1 = (uint32)getmajor(vap->va_rdev);
4215         fap->rdev.specdata2 = (uint32)getminor(vap->va_rdev);
4216         fap->fsid = (uint64)vap->va_fsid;
4217         fap->fileid = (fileid3)vap->va_nodeid;
4218         fap->atime.seconds = vap->va_atime.tv_sec;
4219         fap->atime.nseconds = vap->va_atime.tv_nsec;
4220         fap->mtime.seconds = vap->va_mtime.tv_sec;
4221         fap->mtime.nseconds = vap->va_mtime.tv_nsec;
4222         fap->ctime.seconds = vap->va_ctime.tv_sec;
4223         fap->ctime.nseconds = vap->va_ctime.tv_nsec;
4224         return (0);
4225 }
4226 
4227 static int
4228 vattr_to_wcc_attr(struct vattr *vap, wcc_attr *wccap)
4229 {
4230 
4231         /* Return error if time or size overflow */
4232         if (!(NFS_TIME_T_OK(vap->va_mtime.tv_sec) &&
4233             NFS_TIME_T_OK(vap->va_ctime.tv_sec) &&
4234             NFS3_SIZE_OK(vap->va_size))) {
4235                 return (EOVERFLOW);
4236         }
4237         wccap->size = (size3)vap->va_size;
4238         wccap->mtime.seconds = vap->va_mtime.tv_sec;
4239         wccap->mtime.nseconds = vap->va_mtime.tv_nsec;
4240         wccap->ctime.seconds = vap->va_ctime.tv_sec;
4241         wccap->ctime.nseconds = vap->va_ctime.tv_nsec;
4242         return (0);
4243 }
4244 
4245 static void
4246 vattr_to_pre_op_attr(struct vattr *vap, pre_op_attr *poap)
4247 {
4248 
4249         /* don't return attrs if time overflow */
4250         if ((vap != NULL) && !vattr_to_wcc_attr(vap, &poap->attr)) {
4251                 poap->attributes = TRUE;
4252         } else
4253                 poap->attributes = FALSE;
4254 }
4255 
4256 void
4257 vattr_to_post_op_attr(struct vattr *vap, post_op_attr *poap)
4258 {
4259 
4260         /* don't return attrs if time overflow */
4261         if ((vap != NULL) && !vattr_to_fattr3(vap, &poap->attr)) {
4262                 poap->attributes = TRUE;
4263         } else
4264                 poap->attributes = FALSE;
4265 }
4266 
4267 static void
4268 vattr_to_wcc_data(struct vattr *bvap, struct vattr *avap, wcc_data *wccp)
4269 {
4270 
4271         vattr_to_pre_op_attr(bvap, &wccp->before);
4272         vattr_to_post_op_attr(avap, &wccp->after);
4273 }
4274 
4275 void
4276 rfs3_srvrinit(void)
4277 {
4278         struct rfs3_verf_overlay {
4279                 uint_t id; /* a "unique" identifier */
4280                 int ts; /* a unique timestamp */
4281         } *verfp;
4282         timestruc_t now;
4283 
4284         /*
4285          * The following algorithm attempts to find a unique verifier
4286          * to be used as the write verifier returned from the server
4287          * to the client.  It is important that this verifier change
4288          * whenever the server reboots.  Of secondary importance, it
4289          * is important for the verifier to be unique between two
4290          * different servers.
4291          *
4292          * Thus, an attempt is made to use the system hostid and the
4293          * current time in seconds when the nfssrv kernel module is
4294          * loaded.  It is assumed that an NFS server will not be able
4295          * to boot and then to reboot in less than a second.  If the
4296          * hostid has not been set, then the current high resolution
4297          * time is used.  This will ensure different verifiers each
4298          * time the server reboots and minimize the chances that two
4299          * different servers will have the same verifier.
4300          */
4301 
4302 #ifndef lint
4303         /*
4304          * We ASSERT that this constant logic expression is
4305          * always true because in the past, it wasn't.
4306          */
4307         ASSERT(sizeof (*verfp) <= sizeof (write3verf));
4308 #endif
4309 
4310         gethrestime(&now);
4311         verfp = (struct rfs3_verf_overlay *)&write3verf;
4312         verfp->ts = (int)now.tv_sec;
4313         verfp->id = zone_get_hostid(NULL);
4314 
4315         if (verfp->id == 0)
4316                 verfp->id = (uint_t)now.tv_nsec;
4317 
4318         nfs3_srv_caller_id = fs_new_caller_id();
4319 
4320 }
4321 
4322 static int
4323 rdma_setup_read_data3(READ3args *args, READ3resok *rok)
4324 {
4325         struct clist    *wcl;
4326         int             wlist_len;
4327         count3          count = rok->count;
4328 
4329         wcl = args->wlist;
4330         if (rdma_setup_read_chunks(wcl, count, &wlist_len) == FALSE) {
4331                 return (FALSE);
4332         }
4333 
4334         wcl = args->wlist;
4335         rok->wlist_len = wlist_len;
4336         rok->wlist = wcl;
4337         return (TRUE);
4338 }
4339 
4340 void
4341 rfs3_srvrfini(void)
4342 {
4343         /* Nothing to do */
4344 }