1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright (c) 1994, 2010, Oracle and/or its affiliates. All rights reserved.
  23  *
  24  * Copyright 2012 Nexenta Systems, Inc.  All rights reserved.
  25  */
  26 
  27 /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */
  28 /* All Rights Reserved */
  29 
  30 #include <sys/param.h>
  31 #include <sys/types.h>
  32 #include <sys/systm.h>
  33 #include <sys/cred.h>
  34 #include <sys/buf.h>
  35 #include <sys/vfs.h>
  36 #include <sys/vnode.h>
  37 #include <sys/uio.h>
  38 #include <sys/errno.h>
  39 #include <sys/sysmacros.h>
  40 #include <sys/statvfs.h>
  41 #include <sys/kmem.h>
  42 #include <sys/dirent.h>
  43 #include <sys/cmn_err.h>
  44 #include <sys/debug.h>
  45 #include <sys/systeminfo.h>
  46 #include <sys/flock.h>
  47 #include <sys/nbmlock.h>
  48 #include <sys/policy.h>
  49 #include <sys/sdt.h>
  50 
  51 #include <rpc/types.h>
  52 #include <rpc/auth.h>
  53 #include <rpc/svc.h>
  54 #include <rpc/rpc_rdma.h>
  55 
  56 #include <nfs/nfs.h>
  57 #include <nfs/export.h>
  58 #include <nfs/nfs_cmd.h>
  59 
  60 #include <sys/strsubr.h>
  61 
  62 #include <sys/tsol/label.h>
  63 #include <sys/tsol/tndb.h>
  64 
  65 #include <sys/zone.h>
  66 
  67 #include <inet/ip.h>
  68 #include <inet/ip6.h>
  69 
  70 /*
  71  * These are the interface routines for the server side of the
  72  * Network File System.  See the NFS version 3 protocol specification
  73  * for a description of this interface.
  74  */
  75 
  76 static writeverf3 write3verf;
  77 
  78 static int      sattr3_to_vattr(sattr3 *, struct vattr *);
  79 static int      vattr_to_fattr3(struct vattr *, fattr3 *);
  80 static int      vattr_to_wcc_attr(struct vattr *, wcc_attr *);
  81 static void     vattr_to_pre_op_attr(struct vattr *, pre_op_attr *);
  82 static void     vattr_to_wcc_data(struct vattr *, struct vattr *, wcc_data *);
  83 static int      rdma_setup_read_data3(READ3args *, READ3resok *);
  84 
  85 extern int nfs_loaned_buffers;
  86 
  87 u_longlong_t nfs3_srv_caller_id;
  88 
  89 /* ARGSUSED */
  90 void
  91 rfs3_getattr(GETATTR3args *args, GETATTR3res *resp, struct exportinfo *exi,
  92         struct svc_req *req, cred_t *cr)
  93 {
  94         int error;
  95         vnode_t *vp;
  96         struct vattr va;
  97 
  98         vp = nfs3_fhtovp(&args->object, exi);
  99 
 100         DTRACE_NFSV3_4(op__getattr__start, struct svc_req *, req,
 101             cred_t *, cr, vnode_t *, vp, GETATTR3args *, args);
 102 
 103         if (vp == NULL) {
 104                 error = ESTALE;
 105                 goto out;
 106         }
 107 
 108         va.va_mask = AT_ALL;
 109         error = rfs4_delegated_getattr(vp, &va, 0, cr);
 110 
 111         if (!error) {
 112                 /* Lie about the object type for a referral */
 113                 if (vn_is_nfs_reparse(vp, cr))
 114                         va.va_type = VLNK;
 115 
 116                 /* overflow error if time or size is out of range */
 117                 error = vattr_to_fattr3(&va, &resp->resok.obj_attributes);
 118                 if (error)
 119                         goto out;
 120                 resp->status = NFS3_OK;
 121 
 122                 DTRACE_NFSV3_4(op__getattr__done, struct svc_req *, req,
 123                     cred_t *, cr, vnode_t *, vp, GETATTR3res *, resp);
 124 
 125                 VN_RELE(vp);
 126 
 127                 return;
 128         }
 129 
 130 out:
 131         if (curthread->t_flag & T_WOULDBLOCK) {
 132                 curthread->t_flag &= ~T_WOULDBLOCK;
 133                 resp->status = NFS3ERR_JUKEBOX;
 134         } else
 135                 resp->status = puterrno3(error);
 136 
 137         DTRACE_NFSV3_4(op__getattr__done, struct svc_req *, req,
 138             cred_t *, cr, vnode_t *, vp, GETATTR3res *, resp);
 139 
 140         if (vp != NULL)
 141                 VN_RELE(vp);
 142 }
 143 
 144 void *
 145 rfs3_getattr_getfh(GETATTR3args *args)
 146 {
 147 
 148         return (&args->object);
 149 }
 150 
 151 void
 152 rfs3_setattr(SETATTR3args *args, SETATTR3res *resp, struct exportinfo *exi,
 153         struct svc_req *req, cred_t *cr)
 154 {
 155         int error;
 156         vnode_t *vp;
 157         struct vattr *bvap;
 158         struct vattr bva;
 159         struct vattr *avap;
 160         struct vattr ava;
 161         int flag;
 162         int in_crit = 0;
 163         struct flock64 bf;
 164         caller_context_t ct;
 165 
 166         bvap = NULL;
 167         avap = NULL;
 168 
 169         vp = nfs3_fhtovp(&args->object, exi);
 170 
 171         DTRACE_NFSV3_4(op__setattr__start, struct svc_req *, req,
 172             cred_t *, cr, vnode_t *, vp, SETATTR3args *, args);
 173 
 174         if (vp == NULL) {
 175                 error = ESTALE;
 176                 goto out;
 177         }
 178 
 179         error = sattr3_to_vattr(&args->new_attributes, &ava);
 180         if (error)
 181                 goto out;
 182 
 183         if (is_system_labeled()) {
 184                 bslabel_t *clabel = req->rq_label;
 185 
 186                 ASSERT(clabel != NULL);
 187                 DTRACE_PROBE2(tx__rfs3__log__info__opsetattr__clabel, char *,
 188                     "got client label from request(1)", struct svc_req *, req);
 189 
 190                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
 191                         if (!do_rfs_label_check(clabel, vp, EQUALITY_CHECK,
 192                             exi)) {
 193                                 resp->status = NFS3ERR_ACCES;
 194                                 goto out1;
 195                         }
 196                 }
 197         }
 198 
 199         /*
 200          * We need to specially handle size changes because of
 201          * possible conflicting NBMAND locks. Get into critical
 202          * region before VOP_GETATTR, so the size attribute is
 203          * valid when checking conflicts.
 204          *
 205          * Also, check to see if the v4 side of the server has
 206          * delegated this file.  If so, then we return JUKEBOX to
 207          * allow the client to retrasmit its request.
 208          */
 209         if (vp->v_type == VREG && (ava.va_mask & AT_SIZE)) {
 210                 if (nbl_need_check(vp)) {
 211                         nbl_start_crit(vp, RW_READER);
 212                         in_crit = 1;
 213                 }
 214         }
 215 
 216         bva.va_mask = AT_ALL;
 217         error = rfs4_delegated_getattr(vp, &bva, 0, cr);
 218 
 219         /*
 220          * If we can't get the attributes, then we can't do the
 221          * right access checking.  So, we'll fail the request.
 222          */
 223         if (error)
 224                 goto out;
 225 
 226         bvap = &bva;
 227 
 228         if (rdonly(exi, req) || vn_is_readonly(vp)) {
 229                 resp->status = NFS3ERR_ROFS;
 230                 goto out1;
 231         }
 232 
 233         if (args->guard.check &&
 234             (args->guard.obj_ctime.seconds != bva.va_ctime.tv_sec ||
 235             args->guard.obj_ctime.nseconds != bva.va_ctime.tv_nsec)) {
 236                 resp->status = NFS3ERR_NOT_SYNC;
 237                 goto out1;
 238         }
 239 
 240         if (args->new_attributes.mtime.set_it == SET_TO_CLIENT_TIME)
 241                 flag = ATTR_UTIME;
 242         else
 243                 flag = 0;
 244 
 245         /*
 246          * If the filesystem is exported with nosuid, then mask off
 247          * the setuid and setgid bits.
 248          */
 249         if ((ava.va_mask & AT_MODE) && vp->v_type == VREG &&
 250             (exi->exi_export.ex_flags & EX_NOSUID))
 251                 ava.va_mode &= ~(VSUID | VSGID);
 252 
 253         ct.cc_sysid = 0;
 254         ct.cc_pid = 0;
 255         ct.cc_caller_id = nfs3_srv_caller_id;
 256         ct.cc_flags = CC_DONTBLOCK;
 257 
 258         /*
 259          * We need to specially handle size changes because it is
 260          * possible for the client to create a file with modes
 261          * which indicate read-only, but with the file opened for
 262          * writing.  If the client then tries to set the size of
 263          * the file, then the normal access checking done in
 264          * VOP_SETATTR would prevent the client from doing so,
 265          * although it should be legal for it to do so.  To get
 266          * around this, we do the access checking for ourselves
 267          * and then use VOP_SPACE which doesn't do the access
 268          * checking which VOP_SETATTR does. VOP_SPACE can only
 269          * operate on VREG files, let VOP_SETATTR handle the other
 270          * extremely rare cases.
 271          * Also the client should not be allowed to change the
 272          * size of the file if there is a conflicting non-blocking
 273          * mandatory lock in the region the change.
 274          */
 275         if (vp->v_type == VREG && (ava.va_mask & AT_SIZE)) {
 276                 if (in_crit) {
 277                         u_offset_t offset;
 278                         ssize_t length;
 279 
 280                         if (ava.va_size < bva.va_size) {
 281                                 offset = ava.va_size;
 282                                 length = bva.va_size - ava.va_size;
 283                         } else {
 284                                 offset = bva.va_size;
 285                                 length = ava.va_size - bva.va_size;
 286                         }
 287                         if (nbl_conflict(vp, NBL_WRITE, offset, length, 0,
 288                             NULL)) {
 289                                 error = EACCES;
 290                                 goto out;
 291                         }
 292                 }
 293 
 294                 if (crgetuid(cr) == bva.va_uid && ava.va_size != bva.va_size) {
 295                         ava.va_mask &= ~AT_SIZE;
 296                         bf.l_type = F_WRLCK;
 297                         bf.l_whence = 0;
 298                         bf.l_start = (off64_t)ava.va_size;
 299                         bf.l_len = 0;
 300                         bf.l_sysid = 0;
 301                         bf.l_pid = 0;
 302                         error = VOP_SPACE(vp, F_FREESP, &bf, FWRITE,
 303                             (offset_t)ava.va_size, cr, &ct);
 304                 }
 305         }
 306 
 307         if (!error && ava.va_mask)
 308                 error = VOP_SETATTR(vp, &ava, flag, cr, &ct);
 309 
 310         /* check if a monitor detected a delegation conflict */
 311         if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
 312                 resp->status = NFS3ERR_JUKEBOX;
 313                 goto out1;
 314         }
 315 
 316         ava.va_mask = AT_ALL;
 317         avap = rfs4_delegated_getattr(vp, &ava, 0, cr) ? NULL : &ava;
 318 
 319         /*
 320          * Force modified metadata out to stable storage.
 321          */
 322         (void) VOP_FSYNC(vp, FNODSYNC, cr, &ct);
 323 
 324         if (error)
 325                 goto out;
 326 
 327         if (in_crit)
 328                 nbl_end_crit(vp);
 329 
 330         resp->status = NFS3_OK;
 331         vattr_to_wcc_data(bvap, avap, &resp->resok.obj_wcc);
 332 
 333         DTRACE_NFSV3_4(op__setattr__done, struct svc_req *, req,
 334             cred_t *, cr, vnode_t *, vp, SETATTR3res *, resp);
 335 
 336         VN_RELE(vp);
 337 
 338         return;
 339 
 340 out:
 341         if (curthread->t_flag & T_WOULDBLOCK) {
 342                 curthread->t_flag &= ~T_WOULDBLOCK;
 343                 resp->status = NFS3ERR_JUKEBOX;
 344         } else
 345                 resp->status = puterrno3(error);
 346 out1:
 347         DTRACE_NFSV3_4(op__setattr__done, struct svc_req *, req,
 348             cred_t *, cr, vnode_t *, vp, SETATTR3res *, resp);
 349 
 350         if (vp != NULL) {
 351                 if (in_crit)
 352                         nbl_end_crit(vp);
 353                 VN_RELE(vp);
 354         }
 355         vattr_to_wcc_data(bvap, avap, &resp->resfail.obj_wcc);
 356 }
 357 
 358 void *
 359 rfs3_setattr_getfh(SETATTR3args *args)
 360 {
 361 
 362         return (&args->object);
 363 }
 364 
 365 /* ARGSUSED */
 366 void
 367 rfs3_lookup(LOOKUP3args *args, LOOKUP3res *resp, struct exportinfo *exi,
 368         struct svc_req *req, cred_t *cr)
 369 {
 370         int error;
 371         vnode_t *vp;
 372         vnode_t *dvp;
 373         struct vattr *vap;
 374         struct vattr va;
 375         struct vattr *dvap;
 376         struct vattr dva;
 377         nfs_fh3 *fhp;
 378         struct sec_ol sec = {0, 0};
 379         bool_t publicfh_flag = FALSE, auth_weak = FALSE;
 380         struct sockaddr *ca;
 381         char *name = NULL;
 382 
 383         dvap = NULL;
 384 
 385         /*
 386          * Allow lookups from the root - the default
 387          * location of the public filehandle.
 388          */
 389         if (exi != NULL && (exi->exi_export.ex_flags & EX_PUBLIC)) {
 390                 dvp = rootdir;
 391                 VN_HOLD(dvp);
 392 
 393                 DTRACE_NFSV3_4(op__lookup__start, struct svc_req *, req,
 394                     cred_t *, cr, vnode_t *, dvp, LOOKUP3args *, args);
 395         } else {
 396                 dvp = nfs3_fhtovp(&args->what.dir, exi);
 397 
 398                 DTRACE_NFSV3_4(op__lookup__start, struct svc_req *, req,
 399                     cred_t *, cr, vnode_t *, dvp, LOOKUP3args *, args);
 400 
 401                 if (dvp == NULL) {
 402                         error = ESTALE;
 403                         goto out;
 404                 }
 405         }
 406 
 407         dva.va_mask = AT_ALL;
 408         dvap = VOP_GETATTR(dvp, &dva, 0, cr, NULL) ? NULL : &dva;
 409 
 410         if (args->what.name == nfs3nametoolong) {
 411                 resp->status = NFS3ERR_NAMETOOLONG;
 412                 goto out1;
 413         }
 414 
 415         if (args->what.name == NULL || *(args->what.name) == '\0') {
 416                 resp->status = NFS3ERR_ACCES;
 417                 goto out1;
 418         }
 419 
 420         fhp = &args->what.dir;
 421         if (strcmp(args->what.name, "..") == 0 &&
 422             EQFID(&exi->exi_fid, FH3TOFIDP(fhp))) {
 423                 resp->status = NFS3ERR_NOENT;
 424                 goto out1;
 425         }
 426 
 427         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
 428         name = nfscmd_convname(ca, exi, args->what.name,
 429             NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
 430 
 431         if (name == NULL) {
 432                 resp->status = NFS3ERR_ACCES;
 433                 goto out1;
 434         }
 435 
 436         exi_hold(exi);
 437 
 438         /*
 439          * If the public filehandle is used then allow
 440          * a multi-component lookup
 441          */
 442         if (PUBLIC_FH3(&args->what.dir)) {
 443                 struct exportinfo *new;
 444 
 445                 publicfh_flag = TRUE;
 446 
 447                 error = rfs_publicfh_mclookup(name, dvp, cr, &vp,
 448                     &new, &sec);
 449 
 450                 if (error == 0) {
 451                         exi_rele(exi);
 452                         exi = new;
 453                 }
 454 
 455                 /*
 456                  * Since WebNFS may bypass MOUNT, we need to ensure this
 457                  * request didn't come from an unlabeled admin_low client.
 458                  */
 459                 if (is_system_labeled() && error == 0) {
 460                         int             addr_type;
 461                         void            *ipaddr;
 462                         tsol_tpc_t      *tp;
 463 
 464                         if (ca->sa_family == AF_INET) {
 465                                 addr_type = IPV4_VERSION;
 466                                 ipaddr = &((struct sockaddr_in *)ca)->sin_addr;
 467                         } else if (ca->sa_family == AF_INET6) {
 468                                 addr_type = IPV6_VERSION;
 469                                 ipaddr = &((struct sockaddr_in6 *)
 470                                     ca)->sin6_addr;
 471                         }
 472                         tp = find_tpc(ipaddr, addr_type, B_FALSE);
 473                         if (tp == NULL || tp->tpc_tp.tp_doi !=
 474                             l_admin_low->tsl_doi || tp->tpc_tp.host_type !=
 475                             SUN_CIPSO) {
 476                                 VN_RELE(vp);
 477                                 resp->status = NFS3ERR_ACCES;
 478                                 error = 1;
 479                         }
 480                         if (tp != NULL)
 481                                 TPC_RELE(tp);
 482                 }
 483         } else {
 484                 error = VOP_LOOKUP(dvp, name, &vp,
 485                     NULL, 0, NULL, cr, NULL, NULL, NULL);
 486         }
 487 
 488         if (name != args->what.name)
 489                 kmem_free(name, MAXPATHLEN + 1);
 490 
 491         if (is_system_labeled() && error == 0) {
 492                 bslabel_t *clabel = req->rq_label;
 493 
 494                 ASSERT(clabel != NULL);
 495                 DTRACE_PROBE2(tx__rfs3__log__info__oplookup__clabel, char *,
 496                     "got client label from request(1)", struct svc_req *, req);
 497 
 498                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
 499                         if (!do_rfs_label_check(clabel, dvp,
 500                             DOMINANCE_CHECK, exi)) {
 501                                 VN_RELE(vp);
 502                                 resp->status = NFS3ERR_ACCES;
 503                                 error = 1;
 504                         }
 505                 }
 506         }
 507 
 508         dva.va_mask = AT_ALL;
 509         dvap = VOP_GETATTR(dvp, &dva, 0, cr, NULL) ? NULL : &dva;
 510 
 511         if (error)
 512                 goto out;
 513 
 514         if (sec.sec_flags & SEC_QUERY) {
 515                 error = makefh3_ol(&resp->resok.object, exi, sec.sec_index);
 516         } else {
 517                 error = makefh3(&resp->resok.object, vp, exi);
 518                 if (!error && publicfh_flag && !chk_clnt_sec(exi, req))
 519                         auth_weak = TRUE;
 520         }
 521 
 522         if (error) {
 523                 VN_RELE(vp);
 524                 goto out;
 525         }
 526 
 527         va.va_mask = AT_ALL;
 528         vap = rfs4_delegated_getattr(vp, &va, 0, cr) ? NULL : &va;
 529 
 530         exi_rele(exi);
 531         VN_RELE(vp);
 532 
 533         resp->status = NFS3_OK;
 534         vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
 535         vattr_to_post_op_attr(dvap, &resp->resok.dir_attributes);
 536 
 537         /*
 538          * If it's public fh, no 0x81, and client's flavor is
 539          * invalid, set WebNFS status to WNFSERR_CLNT_FLAVOR now.
 540          * Then set RPC status to AUTH_TOOWEAK in common_dispatch.
 541          */
 542         if (auth_weak)
 543                 resp->status = (enum nfsstat3)WNFSERR_CLNT_FLAVOR;
 544 
 545         DTRACE_NFSV3_4(op__lookup__done, struct svc_req *, req,
 546             cred_t *, cr, vnode_t *, dvp, LOOKUP3res *, resp);
 547         VN_RELE(dvp);
 548 
 549         return;
 550 
 551 out:
 552         /*
 553          * The passed argument exportinfo is released by the
 554          * caller, common_dispatch
 555          */
 556         exi_rele(exi);
 557 
 558         if (curthread->t_flag & T_WOULDBLOCK) {
 559                 curthread->t_flag &= ~T_WOULDBLOCK;
 560                 resp->status = NFS3ERR_JUKEBOX;
 561         } else
 562                 resp->status = puterrno3(error);
 563 out1:
 564         DTRACE_NFSV3_4(op__lookup__done, struct svc_req *, req,
 565             cred_t *, cr, vnode_t *, dvp, LOOKUP3res *, resp);
 566 
 567         if (dvp != NULL)
 568                 VN_RELE(dvp);
 569         vattr_to_post_op_attr(dvap, &resp->resfail.dir_attributes);
 570 
 571 }
 572 
 573 void *
 574 rfs3_lookup_getfh(LOOKUP3args *args)
 575 {
 576 
 577         return (&args->what.dir);
 578 }
 579 
 580 /* ARGSUSED */
 581 void
 582 rfs3_access(ACCESS3args *args, ACCESS3res *resp, struct exportinfo *exi,
 583         struct svc_req *req, cred_t *cr)
 584 {
 585         int error;
 586         vnode_t *vp;
 587         struct vattr *vap;
 588         struct vattr va;
 589         int checkwriteperm;
 590         boolean_t dominant_label = B_FALSE;
 591         boolean_t equal_label = B_FALSE;
 592         boolean_t admin_low_client;
 593 
 594         vap = NULL;
 595 
 596         vp = nfs3_fhtovp(&args->object, exi);
 597 
 598         DTRACE_NFSV3_4(op__access__start, struct svc_req *, req,
 599             cred_t *, cr, vnode_t *, vp, ACCESS3args *, args);
 600 
 601         if (vp == NULL) {
 602                 error = ESTALE;
 603                 goto out;
 604         }
 605 
 606         /*
 607          * If the file system is exported read only, it is not appropriate
 608          * to check write permissions for regular files and directories.
 609          * Special files are interpreted by the client, so the underlying
 610          * permissions are sent back to the client for interpretation.
 611          */
 612         if (rdonly(exi, req) && (vp->v_type == VREG || vp->v_type == VDIR))
 613                 checkwriteperm = 0;
 614         else
 615                 checkwriteperm = 1;
 616 
 617         /*
 618          * We need the mode so that we can correctly determine access
 619          * permissions relative to a mandatory lock file.  Access to
 620          * mandatory lock files is denied on the server, so it might
 621          * as well be reflected to the server during the open.
 622          */
 623         va.va_mask = AT_MODE;
 624         error = VOP_GETATTR(vp, &va, 0, cr, NULL);
 625         if (error)
 626                 goto out;
 627 
 628         vap = &va;
 629 
 630         resp->resok.access = 0;
 631 
 632         if (is_system_labeled()) {
 633                 bslabel_t *clabel = req->rq_label;
 634 
 635                 ASSERT(clabel != NULL);
 636                 DTRACE_PROBE2(tx__rfs3__log__info__opaccess__clabel, char *,
 637                     "got client label from request(1)", struct svc_req *, req);
 638 
 639                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
 640                         if ((equal_label = do_rfs_label_check(clabel, vp,
 641                             EQUALITY_CHECK, exi)) == B_FALSE) {
 642                                 dominant_label = do_rfs_label_check(clabel,
 643                                     vp, DOMINANCE_CHECK, exi);
 644                         } else
 645                                 dominant_label = B_TRUE;
 646                         admin_low_client = B_FALSE;
 647                 } else
 648                         admin_low_client = B_TRUE;
 649         }
 650 
 651         if (args->access & ACCESS3_READ) {
 652                 error = VOP_ACCESS(vp, VREAD, 0, cr, NULL);
 653                 if (error) {
 654                         if (curthread->t_flag & T_WOULDBLOCK)
 655                                 goto out;
 656                 } else if (!MANDLOCK(vp, va.va_mode) &&
 657                     (!is_system_labeled() || admin_low_client ||
 658                     dominant_label))
 659                         resp->resok.access |= ACCESS3_READ;
 660         }
 661         if ((args->access & ACCESS3_LOOKUP) && vp->v_type == VDIR) {
 662                 error = VOP_ACCESS(vp, VEXEC, 0, cr, NULL);
 663                 if (error) {
 664                         if (curthread->t_flag & T_WOULDBLOCK)
 665                                 goto out;
 666                 } else if (!is_system_labeled() || admin_low_client ||
 667                     dominant_label)
 668                         resp->resok.access |= ACCESS3_LOOKUP;
 669         }
 670         if (checkwriteperm &&
 671             (args->access & (ACCESS3_MODIFY|ACCESS3_EXTEND))) {
 672                 error = VOP_ACCESS(vp, VWRITE, 0, cr, NULL);
 673                 if (error) {
 674                         if (curthread->t_flag & T_WOULDBLOCK)
 675                                 goto out;
 676                 } else if (!MANDLOCK(vp, va.va_mode) &&
 677                     (!is_system_labeled() || admin_low_client || equal_label)) {
 678                         resp->resok.access |=
 679                             (args->access & (ACCESS3_MODIFY|ACCESS3_EXTEND));
 680                 }
 681         }
 682         if (checkwriteperm &&
 683             (args->access & ACCESS3_DELETE) && vp->v_type == VDIR) {
 684                 error = VOP_ACCESS(vp, VWRITE, 0, cr, NULL);
 685                 if (error) {
 686                         if (curthread->t_flag & T_WOULDBLOCK)
 687                                 goto out;
 688                 } else if (!is_system_labeled() || admin_low_client ||
 689                     equal_label)
 690                         resp->resok.access |= ACCESS3_DELETE;
 691         }
 692         if (args->access & ACCESS3_EXECUTE) {
 693                 error = VOP_ACCESS(vp, VEXEC, 0, cr, NULL);
 694                 if (error) {
 695                         if (curthread->t_flag & T_WOULDBLOCK)
 696                                 goto out;
 697                 } else if (!MANDLOCK(vp, va.va_mode) &&
 698                     (!is_system_labeled() || admin_low_client ||
 699                     dominant_label))
 700                         resp->resok.access |= ACCESS3_EXECUTE;
 701         }
 702 
 703         va.va_mask = AT_ALL;
 704         vap = rfs4_delegated_getattr(vp, &va, 0, cr) ? NULL : &va;
 705 
 706         resp->status = NFS3_OK;
 707         vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
 708 
 709         DTRACE_NFSV3_4(op__access__done, struct svc_req *, req,
 710             cred_t *, cr, vnode_t *, vp, ACCESS3res *, resp);
 711 
 712         VN_RELE(vp);
 713 
 714         return;
 715 
 716 out:
 717         if (curthread->t_flag & T_WOULDBLOCK) {
 718                 curthread->t_flag &= ~T_WOULDBLOCK;
 719                 resp->status = NFS3ERR_JUKEBOX;
 720         } else
 721                 resp->status = puterrno3(error);
 722         DTRACE_NFSV3_4(op__access__done, struct svc_req *, req,
 723             cred_t *, cr, vnode_t *, vp, ACCESS3res *, resp);
 724         if (vp != NULL)
 725                 VN_RELE(vp);
 726         vattr_to_post_op_attr(vap, &resp->resfail.obj_attributes);
 727 }
 728 
 729 void *
 730 rfs3_access_getfh(ACCESS3args *args)
 731 {
 732 
 733         return (&args->object);
 734 }
 735 
 736 /* ARGSUSED */
 737 void
 738 rfs3_readlink(READLINK3args *args, READLINK3res *resp, struct exportinfo *exi,
 739         struct svc_req *req, cred_t *cr)
 740 {
 741         int error;
 742         vnode_t *vp;
 743         struct vattr *vap;
 744         struct vattr va;
 745         struct iovec iov;
 746         struct uio uio;
 747         char *data;
 748         struct sockaddr *ca;
 749         char *name = NULL;
 750         int is_referral = 0;
 751 
 752         vap = NULL;
 753 
 754         vp = nfs3_fhtovp(&args->symlink, exi);
 755 
 756         DTRACE_NFSV3_4(op__readlink__start, struct svc_req *, req,
 757             cred_t *, cr, vnode_t *, vp, READLINK3args *, args);
 758 
 759         if (vp == NULL) {
 760                 error = ESTALE;
 761                 goto out;
 762         }
 763 
 764         va.va_mask = AT_ALL;
 765         error = VOP_GETATTR(vp, &va, 0, cr, NULL);
 766         if (error)
 767                 goto out;
 768 
 769         vap = &va;
 770 
 771         /* We lied about the object type for a referral */
 772         if (vn_is_nfs_reparse(vp, cr))
 773                 is_referral = 1;
 774 
 775         if (vp->v_type != VLNK && !is_referral) {
 776                 resp->status = NFS3ERR_INVAL;
 777                 goto out1;
 778         }
 779 
 780         if (MANDLOCK(vp, va.va_mode)) {
 781                 resp->status = NFS3ERR_ACCES;
 782                 goto out1;
 783         }
 784 
 785         if (is_system_labeled()) {
 786                 bslabel_t *clabel = req->rq_label;
 787 
 788                 ASSERT(clabel != NULL);
 789                 DTRACE_PROBE2(tx__rfs3__log__info__opreadlink__clabel, char *,
 790                     "got client label from request(1)", struct svc_req *, req);
 791 
 792                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
 793                         if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
 794                             exi)) {
 795                                 resp->status = NFS3ERR_ACCES;
 796                                 goto out1;
 797                         }
 798                 }
 799         }
 800 
 801         data = kmem_alloc(MAXPATHLEN + 1, KM_SLEEP);
 802 
 803         if (is_referral) {
 804                 char *s;
 805                 size_t strsz;
 806 
 807                 /* Get an artificial symlink based on a referral */
 808                 s = build_symlink(vp, cr, &strsz);
 809                 global_svstat_ptr[3][NFS_REFERLINKS].value.ui64++;
 810                 DTRACE_PROBE2(nfs3serv__func__referral__reflink,
 811                     vnode_t *, vp, char *, s);
 812                 if (s == NULL)
 813                         error = EINVAL;
 814                 else {
 815                         error = 0;
 816                         (void) strlcpy(data, s, MAXPATHLEN + 1);
 817                         kmem_free(s, strsz);
 818                 }
 819 
 820         } else {
 821 
 822                 iov.iov_base = data;
 823                 iov.iov_len = MAXPATHLEN;
 824                 uio.uio_iov = &iov;
 825                 uio.uio_iovcnt = 1;
 826                 uio.uio_segflg = UIO_SYSSPACE;
 827                 uio.uio_extflg = UIO_COPY_CACHED;
 828                 uio.uio_loffset = 0;
 829                 uio.uio_resid = MAXPATHLEN;
 830 
 831                 error = VOP_READLINK(vp, &uio, cr, NULL);
 832 
 833                 if (!error)
 834                         *(data + MAXPATHLEN - uio.uio_resid) = '\0';
 835         }
 836 
 837         va.va_mask = AT_ALL;
 838         vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
 839 
 840         /* Lie about object type again just to be consistent */
 841         if (is_referral && vap != NULL)
 842                 vap->va_type = VLNK;
 843 
 844 #if 0 /* notyet */
 845         /*
 846          * Don't do this.  It causes local disk writes when just
 847          * reading the file and the overhead is deemed larger
 848          * than the benefit.
 849          */
 850         /*
 851          * Force modified metadata out to stable storage.
 852          */
 853         (void) VOP_FSYNC(vp, FNODSYNC, cr, NULL);
 854 #endif
 855 
 856         if (error) {
 857                 kmem_free(data, MAXPATHLEN + 1);
 858                 goto out;
 859         }
 860 
 861         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
 862         name = nfscmd_convname(ca, exi, data, NFSCMD_CONV_OUTBOUND,
 863             MAXPATHLEN + 1);
 864 
 865         if (name == NULL) {
 866                 /*
 867                  * Even though the conversion failed, we return
 868                  * something. We just don't translate it.
 869                  */
 870                 name = data;
 871         }
 872 
 873         resp->status = NFS3_OK;
 874         vattr_to_post_op_attr(vap, &resp->resok.symlink_attributes);
 875         resp->resok.data = name;
 876 
 877         DTRACE_NFSV3_4(op__readlink__done, struct svc_req *, req,
 878             cred_t *, cr, vnode_t *, vp, READLINK3res *, resp);
 879         VN_RELE(vp);
 880 
 881         if (name != data)
 882                 kmem_free(data, MAXPATHLEN + 1);
 883 
 884         return;
 885 
 886 out:
 887         if (curthread->t_flag & T_WOULDBLOCK) {
 888                 curthread->t_flag &= ~T_WOULDBLOCK;
 889                 resp->status = NFS3ERR_JUKEBOX;
 890         } else
 891                 resp->status = puterrno3(error);
 892 out1:
 893         DTRACE_NFSV3_4(op__readlink__done, struct svc_req *, req,
 894             cred_t *, cr, vnode_t *, vp, READLINK3res *, resp);
 895         if (vp != NULL)
 896                 VN_RELE(vp);
 897         vattr_to_post_op_attr(vap, &resp->resfail.symlink_attributes);
 898 }
 899 
 900 void *
 901 rfs3_readlink_getfh(READLINK3args *args)
 902 {
 903 
 904         return (&args->symlink);
 905 }
 906 
 907 void
 908 rfs3_readlink_free(READLINK3res *resp)
 909 {
 910 
 911         if (resp->status == NFS3_OK)
 912                 kmem_free(resp->resok.data, MAXPATHLEN + 1);
 913 }
 914 
 915 /*
 916  * Server routine to handle read
 917  * May handle RDMA data as well as mblks
 918  */
 919 /* ARGSUSED */
 920 void
 921 rfs3_read(READ3args *args, READ3res *resp, struct exportinfo *exi,
 922         struct svc_req *req, cred_t *cr)
 923 {
 924         int error;
 925         vnode_t *vp;
 926         struct vattr *vap;
 927         struct vattr va;
 928         struct iovec iov;
 929         struct uio uio;
 930         u_offset_t offset;
 931         mblk_t *mp = NULL;
 932         int alloc_err = 0;
 933         int in_crit = 0;
 934         int need_rwunlock = 0;
 935         caller_context_t ct;
 936         int rdma_used = 0;
 937         int loaned_buffers;
 938         struct uio *uiop;
 939 
 940         vap = NULL;
 941 
 942         vp = nfs3_fhtovp(&args->file, exi);
 943 
 944         DTRACE_NFSV3_4(op__read__start, struct svc_req *, req,
 945             cred_t *, cr, vnode_t *, vp, READ3args *, args);
 946 
 947         if (vp == NULL) {
 948                 error = ESTALE;
 949                 goto out;
 950         }
 951 
 952         if (args->wlist) {
 953                 if (args->count > clist_len(args->wlist)) {
 954                         error = EINVAL;
 955                         goto out;
 956                 }
 957                 rdma_used = 1;
 958         }
 959 
 960         /* use loaned buffers for TCP */
 961         loaned_buffers = (nfs_loaned_buffers && !rdma_used) ? 1 : 0;
 962 
 963         if (is_system_labeled()) {
 964                 bslabel_t *clabel = req->rq_label;
 965 
 966                 ASSERT(clabel != NULL);
 967                 DTRACE_PROBE2(tx__rfs3__log__info__opread__clabel, char *,
 968                     "got client label from request(1)", struct svc_req *, req);
 969 
 970                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
 971                         if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
 972                             exi)) {
 973                                 resp->status = NFS3ERR_ACCES;
 974                                 goto out1;
 975                         }
 976                 }
 977         }
 978 
 979         ct.cc_sysid = 0;
 980         ct.cc_pid = 0;
 981         ct.cc_caller_id = nfs3_srv_caller_id;
 982         ct.cc_flags = CC_DONTBLOCK;
 983 
 984         /*
 985          * Enter the critical region before calling VOP_RWLOCK
 986          * to avoid a deadlock with write requests.
 987          */
 988         if (nbl_need_check(vp)) {
 989                 nbl_start_crit(vp, RW_READER);
 990                 in_crit = 1;
 991                 if (nbl_conflict(vp, NBL_READ, args->offset, args->count, 0,
 992                     NULL)) {
 993                         error = EACCES;
 994                         goto out;
 995                 }
 996         }
 997 
 998         error = VOP_RWLOCK(vp, V_WRITELOCK_FALSE, &ct);
 999 
1000         /* check if a monitor detected a delegation conflict */
1001         if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
1002                 resp->status = NFS3ERR_JUKEBOX;
1003                 goto out1;
1004         }
1005 
1006         need_rwunlock = 1;
1007 
1008         va.va_mask = AT_ALL;
1009         error = VOP_GETATTR(vp, &va, 0, cr, &ct);
1010 
1011         /*
1012          * If we can't get the attributes, then we can't do the
1013          * right access checking.  So, we'll fail the request.
1014          */
1015         if (error)
1016                 goto out;
1017 
1018         vap = &va;
1019 
1020         if (vp->v_type != VREG) {
1021                 resp->status = NFS3ERR_INVAL;
1022                 goto out1;
1023         }
1024 
1025         if (crgetuid(cr) != va.va_uid) {
1026                 error = VOP_ACCESS(vp, VREAD, 0, cr, &ct);
1027                 if (error) {
1028                         if (curthread->t_flag & T_WOULDBLOCK)
1029                                 goto out;
1030                         error = VOP_ACCESS(vp, VEXEC, 0, cr, &ct);
1031                         if (error)
1032                                 goto out;
1033                 }
1034         }
1035 
1036         if (MANDLOCK(vp, va.va_mode)) {
1037                 resp->status = NFS3ERR_ACCES;
1038                 goto out1;
1039         }
1040 
1041         offset = args->offset;
1042         if (offset >= va.va_size) {
1043                 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
1044                 if (in_crit)
1045                         nbl_end_crit(vp);
1046                 resp->status = NFS3_OK;
1047                 vattr_to_post_op_attr(vap, &resp->resok.file_attributes);
1048                 resp->resok.count = 0;
1049                 resp->resok.eof = TRUE;
1050                 resp->resok.data.data_len = 0;
1051                 resp->resok.data.data_val = NULL;
1052                 resp->resok.data.mp = NULL;
1053                 /* RDMA */
1054                 resp->resok.wlist = args->wlist;
1055                 resp->resok.wlist_len = resp->resok.count;
1056                 if (resp->resok.wlist)
1057                         clist_zero_len(resp->resok.wlist);
1058                 goto done;
1059         }
1060 
1061         if (args->count == 0) {
1062                 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
1063                 if (in_crit)
1064                         nbl_end_crit(vp);
1065                 resp->status = NFS3_OK;
1066                 vattr_to_post_op_attr(vap, &resp->resok.file_attributes);
1067                 resp->resok.count = 0;
1068                 resp->resok.eof = FALSE;
1069                 resp->resok.data.data_len = 0;
1070                 resp->resok.data.data_val = NULL;
1071                 resp->resok.data.mp = NULL;
1072                 /* RDMA */
1073                 resp->resok.wlist = args->wlist;
1074                 resp->resok.wlist_len = resp->resok.count;
1075                 if (resp->resok.wlist)
1076                         clist_zero_len(resp->resok.wlist);
1077                 goto done;
1078         }
1079 
1080         /*
1081          * do not allocate memory more the max. allowed
1082          * transfer size
1083          */
1084         if (args->count > rfs3_tsize(req))
1085                 args->count = rfs3_tsize(req);
1086 
1087         if (loaned_buffers) {
1088                 uiop = (uio_t *)rfs_setup_xuio(vp);
1089                 ASSERT(uiop != NULL);
1090                 uiop->uio_segflg = UIO_SYSSPACE;
1091                 uiop->uio_loffset = args->offset;
1092                 uiop->uio_resid = args->count;
1093 
1094                 /* Jump to do the read if successful */
1095                 if (VOP_REQZCBUF(vp, UIO_READ, (xuio_t *)uiop, cr, &ct) == 0) {
1096                         /*
1097                          * Need to hold the vnode until after VOP_RETZCBUF()
1098                          * is called.
1099                          */
1100                         VN_HOLD(vp);
1101                         goto doio_read;
1102                 }
1103 
1104                 DTRACE_PROBE2(nfss__i__reqzcbuf_failed, int,
1105                     uiop->uio_loffset, int, uiop->uio_resid);
1106 
1107                 uiop->uio_extflg = 0;
1108                 /* failure to setup for zero copy */
1109                 rfs_free_xuio((void *)uiop);
1110                 loaned_buffers = 0;
1111         }
1112 
1113         /*
1114          * If returning data via RDMA Write, then grab the chunk list.
1115          * If we aren't returning READ data w/RDMA_WRITE, then grab
1116          * a mblk.
1117          */
1118         if (rdma_used) {
1119                 (void) rdma_get_wchunk(req, &iov, args->wlist);
1120         } else {
1121                 /*
1122                  * mp will contain the data to be sent out in the read reply.
1123                  * This will be freed after the reply has been sent out (by the
1124                  * driver).
1125                  * Let's roundup the data to a BYTES_PER_XDR_UNIT multiple, so
1126                  * that the call to xdrmblk_putmblk() never fails.
1127                  */
1128                 mp = allocb_wait(RNDUP(args->count), BPRI_MED, STR_NOSIG,
1129                     &alloc_err);
1130                 ASSERT(mp != NULL);
1131                 ASSERT(alloc_err == 0);
1132 
1133                 iov.iov_base = (caddr_t)mp->b_datap->db_base;
1134                 iov.iov_len = args->count;
1135         }
1136 
1137         uio.uio_iov = &iov;
1138         uio.uio_iovcnt = 1;
1139         uio.uio_segflg = UIO_SYSSPACE;
1140         uio.uio_extflg = UIO_COPY_CACHED;
1141         uio.uio_loffset = args->offset;
1142         uio.uio_resid = args->count;
1143         uiop = &uio;
1144 
1145 doio_read:
1146         error = VOP_READ(vp, uiop, 0, cr, &ct);
1147 
1148         if (error) {
1149                 if (mp)
1150                         freemsg(mp);
1151                 /* check if a monitor detected a delegation conflict */
1152                 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
1153                         resp->status = NFS3ERR_JUKEBOX;
1154                         goto out1;
1155                 }
1156                 goto out;
1157         }
1158 
1159         /* make mblk using zc buffers */
1160         if (loaned_buffers) {
1161                 mp = uio_to_mblk(uiop);
1162                 ASSERT(mp != NULL);
1163         }
1164 
1165         va.va_mask = AT_ALL;
1166         error = VOP_GETATTR(vp, &va, 0, cr, &ct);
1167 
1168         if (error)
1169                 vap = NULL;
1170         else
1171                 vap = &va;
1172 
1173         VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
1174 
1175         if (in_crit)
1176                 nbl_end_crit(vp);
1177 
1178         resp->status = NFS3_OK;
1179         vattr_to_post_op_attr(vap, &resp->resok.file_attributes);
1180         resp->resok.count = args->count - uiop->uio_resid;
1181         if (!error && offset + resp->resok.count == va.va_size)
1182                 resp->resok.eof = TRUE;
1183         else
1184                 resp->resok.eof = FALSE;
1185         resp->resok.data.data_len = resp->resok.count;
1186 
1187         if (mp)
1188                 rfs_rndup_mblks(mp, resp->resok.count, loaned_buffers);
1189 
1190         resp->resok.data.mp = mp;
1191         resp->resok.size = (uint_t)args->count;
1192 
1193         if (rdma_used) {
1194                 resp->resok.data.data_val = (caddr_t)iov.iov_base;
1195                 if (!rdma_setup_read_data3(args, &(resp->resok))) {
1196                         resp->status = NFS3ERR_INVAL;
1197                 }
1198         } else {
1199                 resp->resok.data.data_val = (caddr_t)mp->b_datap->db_base;
1200                 (resp->resok).wlist = NULL;
1201         }
1202 
1203 done:
1204         DTRACE_NFSV3_4(op__read__done, struct svc_req *, req,
1205             cred_t *, cr, vnode_t *, vp, READ3res *, resp);
1206 
1207         VN_RELE(vp);
1208 
1209         return;
1210 
1211 out:
1212         if (curthread->t_flag & T_WOULDBLOCK) {
1213                 curthread->t_flag &= ~T_WOULDBLOCK;
1214                 resp->status = NFS3ERR_JUKEBOX;
1215         } else
1216                 resp->status = puterrno3(error);
1217 out1:
1218         DTRACE_NFSV3_4(op__read__done, struct svc_req *, req,
1219             cred_t *, cr, vnode_t *, vp, READ3res *, resp);
1220 
1221         if (vp != NULL) {
1222                 if (need_rwunlock)
1223                         VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
1224                 if (in_crit)
1225                         nbl_end_crit(vp);
1226                 VN_RELE(vp);
1227         }
1228         vattr_to_post_op_attr(vap, &resp->resfail.file_attributes);
1229 }
1230 
1231 void
1232 rfs3_read_free(READ3res *resp)
1233 {
1234         mblk_t *mp;
1235 
1236         if (resp->status == NFS3_OK) {
1237                 mp = resp->resok.data.mp;
1238                 if (mp != NULL)
1239                         freemsg(mp);
1240         }
1241 }
1242 
1243 void *
1244 rfs3_read_getfh(READ3args *args)
1245 {
1246 
1247         return (&args->file);
1248 }
1249 
1250 #define MAX_IOVECS      12
1251 
1252 #ifdef DEBUG
1253 static int rfs3_write_hits = 0;
1254 static int rfs3_write_misses = 0;
1255 #endif
1256 
1257 void
1258 rfs3_write(WRITE3args *args, WRITE3res *resp, struct exportinfo *exi,
1259         struct svc_req *req, cred_t *cr)
1260 {
1261         int error;
1262         vnode_t *vp;
1263         struct vattr *bvap = NULL;
1264         struct vattr bva;
1265         struct vattr *avap = NULL;
1266         struct vattr ava;
1267         u_offset_t rlimit;
1268         struct uio uio;
1269         struct iovec iov[MAX_IOVECS];
1270         mblk_t *m;
1271         struct iovec *iovp;
1272         int iovcnt;
1273         int ioflag;
1274         cred_t *savecred;
1275         int in_crit = 0;
1276         int rwlock_ret = -1;
1277         caller_context_t ct;
1278 
1279         vp = nfs3_fhtovp(&args->file, exi);
1280 
1281         DTRACE_NFSV3_4(op__write__start, struct svc_req *, req,
1282             cred_t *, cr, vnode_t *, vp, WRITE3args *, args);
1283 
1284         if (vp == NULL) {
1285                 error = ESTALE;
1286                 goto err;
1287         }
1288 
1289         if (is_system_labeled()) {
1290                 bslabel_t *clabel = req->rq_label;
1291 
1292                 ASSERT(clabel != NULL);
1293                 DTRACE_PROBE2(tx__rfs3__log__info__opwrite__clabel, char *,
1294                     "got client label from request(1)", struct svc_req *, req);
1295 
1296                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
1297                         if (!do_rfs_label_check(clabel, vp, EQUALITY_CHECK,
1298                             exi)) {
1299                                 resp->status = NFS3ERR_ACCES;
1300                                 goto err1;
1301                         }
1302                 }
1303         }
1304 
1305         ct.cc_sysid = 0;
1306         ct.cc_pid = 0;
1307         ct.cc_caller_id = nfs3_srv_caller_id;
1308         ct.cc_flags = CC_DONTBLOCK;
1309 
1310         /*
1311          * We have to enter the critical region before calling VOP_RWLOCK
1312          * to avoid a deadlock with ufs.
1313          */
1314         if (nbl_need_check(vp)) {
1315                 nbl_start_crit(vp, RW_READER);
1316                 in_crit = 1;
1317                 if (nbl_conflict(vp, NBL_WRITE, args->offset, args->count, 0,
1318                     NULL)) {
1319                         error = EACCES;
1320                         goto err;
1321                 }
1322         }
1323 
1324         rwlock_ret = VOP_RWLOCK(vp, V_WRITELOCK_TRUE, &ct);
1325 
1326         /* check if a monitor detected a delegation conflict */
1327         if (rwlock_ret == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
1328                 resp->status = NFS3ERR_JUKEBOX;
1329                 rwlock_ret = -1;
1330                 goto err1;
1331         }
1332 
1333 
1334         bva.va_mask = AT_ALL;
1335         error = VOP_GETATTR(vp, &bva, 0, cr, &ct);
1336 
1337         /*
1338          * If we can't get the attributes, then we can't do the
1339          * right access checking.  So, we'll fail the request.
1340          */
1341         if (error)
1342                 goto err;
1343 
1344         bvap = &bva;
1345         avap = bvap;
1346 
1347         if (args->count != args->data.data_len) {
1348                 resp->status = NFS3ERR_INVAL;
1349                 goto err1;
1350         }
1351 
1352         if (rdonly(exi, req)) {
1353                 resp->status = NFS3ERR_ROFS;
1354                 goto err1;
1355         }
1356 
1357         if (vp->v_type != VREG) {
1358                 resp->status = NFS3ERR_INVAL;
1359                 goto err1;
1360         }
1361 
1362         if (crgetuid(cr) != bva.va_uid &&
1363             (error = VOP_ACCESS(vp, VWRITE, 0, cr, &ct)))
1364                 goto err;
1365 
1366         if (MANDLOCK(vp, bva.va_mode)) {
1367                 resp->status = NFS3ERR_ACCES;
1368                 goto err1;
1369         }
1370 
1371         if (args->count == 0) {
1372                 resp->status = NFS3_OK;
1373                 vattr_to_wcc_data(bvap, avap, &resp->resok.file_wcc);
1374                 resp->resok.count = 0;
1375                 resp->resok.committed = args->stable;
1376                 resp->resok.verf = write3verf;
1377                 goto out;
1378         }
1379 
1380         if (args->mblk != NULL) {
1381                 iovcnt = 0;
1382                 for (m = args->mblk; m != NULL; m = m->b_cont)
1383                         iovcnt++;
1384                 if (iovcnt <= MAX_IOVECS) {
1385 #ifdef DEBUG
1386                         rfs3_write_hits++;
1387 #endif
1388                         iovp = iov;
1389                 } else {
1390 #ifdef DEBUG
1391                         rfs3_write_misses++;
1392 #endif
1393                         iovp = kmem_alloc(sizeof (*iovp) * iovcnt, KM_SLEEP);
1394                 }
1395                 mblk_to_iov(args->mblk, iovcnt, iovp);
1396 
1397         } else if (args->rlist != NULL) {
1398                 iovcnt = 1;
1399                 iovp = iov;
1400                 iovp->iov_base = (char *)((args->rlist)->u.c_daddr3);
1401                 iovp->iov_len = args->count;
1402         } else {
1403                 iovcnt = 1;
1404                 iovp = iov;
1405                 iovp->iov_base = args->data.data_val;
1406                 iovp->iov_len = args->count;
1407         }
1408 
1409         uio.uio_iov = iovp;
1410         uio.uio_iovcnt = iovcnt;
1411 
1412         uio.uio_segflg = UIO_SYSSPACE;
1413         uio.uio_extflg = UIO_COPY_DEFAULT;
1414         uio.uio_loffset = args->offset;
1415         uio.uio_resid = args->count;
1416         uio.uio_llimit = curproc->p_fsz_ctl;
1417         rlimit = uio.uio_llimit - args->offset;
1418         if (rlimit < (u_offset_t)uio.uio_resid)
1419                 uio.uio_resid = (int)rlimit;
1420 
1421         if (args->stable == UNSTABLE)
1422                 ioflag = 0;
1423         else if (args->stable == FILE_SYNC)
1424                 ioflag = FSYNC;
1425         else if (args->stable == DATA_SYNC)
1426                 ioflag = FDSYNC;
1427         else {
1428                 if (iovp != iov)
1429                         kmem_free(iovp, sizeof (*iovp) * iovcnt);
1430                 resp->status = NFS3ERR_INVAL;
1431                 goto err1;
1432         }
1433 
1434         /*
1435          * We're changing creds because VM may fault and we need
1436          * the cred of the current thread to be used if quota
1437          * checking is enabled.
1438          */
1439         savecred = curthread->t_cred;
1440         curthread->t_cred = cr;
1441         error = VOP_WRITE(vp, &uio, ioflag, cr, &ct);
1442         curthread->t_cred = savecred;
1443 
1444         if (iovp != iov)
1445                 kmem_free(iovp, sizeof (*iovp) * iovcnt);
1446 
1447         /* check if a monitor detected a delegation conflict */
1448         if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
1449                 resp->status = NFS3ERR_JUKEBOX;
1450                 goto err1;
1451         }
1452 
1453         ava.va_mask = AT_ALL;
1454         avap = VOP_GETATTR(vp, &ava, 0, cr, &ct) ? NULL : &ava;
1455 
1456         if (error)
1457                 goto err;
1458 
1459         /*
1460          * If we were unable to get the V_WRITELOCK_TRUE, then we
1461          * may not have accurate after attrs, so check if
1462          * we have both attributes, they have a non-zero va_seq, and
1463          * va_seq has changed by exactly one,
1464          * if not, turn off the before attr.
1465          */
1466         if (rwlock_ret != V_WRITELOCK_TRUE) {
1467                 if (bvap == NULL || avap == NULL ||
1468                     bvap->va_seq == 0 || avap->va_seq == 0 ||
1469                     avap->va_seq != (bvap->va_seq + 1)) {
1470                         bvap = NULL;
1471                 }
1472         }
1473 
1474         resp->status = NFS3_OK;
1475         vattr_to_wcc_data(bvap, avap, &resp->resok.file_wcc);
1476         resp->resok.count = args->count - uio.uio_resid;
1477         resp->resok.committed = args->stable;
1478         resp->resok.verf = write3verf;
1479         goto out;
1480 
1481 err:
1482         if (curthread->t_flag & T_WOULDBLOCK) {
1483                 curthread->t_flag &= ~T_WOULDBLOCK;
1484                 resp->status = NFS3ERR_JUKEBOX;
1485         } else
1486                 resp->status = puterrno3(error);
1487 err1:
1488         vattr_to_wcc_data(bvap, avap, &resp->resfail.file_wcc);
1489 out:
1490         DTRACE_NFSV3_4(op__write__done, struct svc_req *, req,
1491             cred_t *, cr, vnode_t *, vp, WRITE3res *, resp);
1492 
1493         if (vp != NULL) {
1494                 if (rwlock_ret != -1)
1495                         VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, &ct);
1496                 if (in_crit)
1497                         nbl_end_crit(vp);
1498                 VN_RELE(vp);
1499         }
1500 }
1501 
1502 void *
1503 rfs3_write_getfh(WRITE3args *args)
1504 {
1505 
1506         return (&args->file);
1507 }
1508 
1509 void
1510 rfs3_create(CREATE3args *args, CREATE3res *resp, struct exportinfo *exi,
1511         struct svc_req *req, cred_t *cr)
1512 {
1513         int error;
1514         int in_crit = 0;
1515         vnode_t *vp;
1516         vnode_t *tvp = NULL;
1517         vnode_t *dvp;
1518         struct vattr *vap;
1519         struct vattr va;
1520         struct vattr *dbvap;
1521         struct vattr dbva;
1522         struct vattr *davap;
1523         struct vattr dava;
1524         enum vcexcl excl;
1525         nfstime3 *mtime;
1526         len_t reqsize;
1527         bool_t trunc;
1528         struct sockaddr *ca;
1529         char *name = NULL;
1530 
1531         dbvap = NULL;
1532         davap = NULL;
1533 
1534         dvp = nfs3_fhtovp(&args->where.dir, exi);
1535 
1536         DTRACE_NFSV3_4(op__create__start, struct svc_req *, req,
1537             cred_t *, cr, vnode_t *, dvp, CREATE3args *, args);
1538 
1539         if (dvp == NULL) {
1540                 error = ESTALE;
1541                 goto out;
1542         }
1543 
1544         dbva.va_mask = AT_ALL;
1545         dbvap = VOP_GETATTR(dvp, &dbva, 0, cr, NULL) ? NULL : &dbva;
1546         davap = dbvap;
1547 
1548         if (args->where.name == nfs3nametoolong) {
1549                 resp->status = NFS3ERR_NAMETOOLONG;
1550                 goto out1;
1551         }
1552 
1553         if (args->where.name == NULL || *(args->where.name) == '\0') {
1554                 resp->status = NFS3ERR_ACCES;
1555                 goto out1;
1556         }
1557 
1558         if (rdonly(exi, req)) {
1559                 resp->status = NFS3ERR_ROFS;
1560                 goto out1;
1561         }
1562 
1563         if (is_system_labeled()) {
1564                 bslabel_t *clabel = req->rq_label;
1565 
1566                 ASSERT(clabel != NULL);
1567                 DTRACE_PROBE2(tx__rfs3__log__info__opcreate__clabel, char *,
1568                     "got client label from request(1)", struct svc_req *, req);
1569 
1570                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
1571                         if (!do_rfs_label_check(clabel, dvp, EQUALITY_CHECK,
1572                             exi)) {
1573                                 resp->status = NFS3ERR_ACCES;
1574                                 goto out1;
1575                         }
1576                 }
1577         }
1578 
1579         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
1580         name = nfscmd_convname(ca, exi, args->where.name,
1581             NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
1582 
1583         if (name == NULL) {
1584                 /* This is really a Solaris EILSEQ */
1585                 resp->status = NFS3ERR_INVAL;
1586                 goto out1;
1587         }
1588 
1589         if (args->how.mode == EXCLUSIVE) {
1590                 va.va_mask = AT_TYPE | AT_MODE | AT_MTIME;
1591                 va.va_type = VREG;
1592                 va.va_mode = (mode_t)0;
1593                 /*
1594                  * Ensure no time overflows and that types match
1595                  */
1596                 mtime = (nfstime3 *)&args->how.createhow3_u.verf;
1597                 va.va_mtime.tv_sec = mtime->seconds % INT32_MAX;
1598                 va.va_mtime.tv_nsec = mtime->nseconds;
1599                 excl = EXCL;
1600         } else {
1601                 error = sattr3_to_vattr(&args->how.createhow3_u.obj_attributes,
1602                     &va);
1603                 if (error)
1604                         goto out;
1605                 va.va_mask |= AT_TYPE;
1606                 va.va_type = VREG;
1607                 if (args->how.mode == GUARDED)
1608                         excl = EXCL;
1609                 else {
1610                         excl = NONEXCL;
1611 
1612                         /*
1613                          * During creation of file in non-exclusive mode
1614                          * if size of file is being set then make sure
1615                          * that if the file already exists that no conflicting
1616                          * non-blocking mandatory locks exists in the region
1617                          * being modified. If there are conflicting locks fail
1618                          * the operation with EACCES.
1619                          */
1620                         if (va.va_mask & AT_SIZE) {
1621                                 struct vattr tva;
1622 
1623                                 /*
1624                                  * Does file already exist?
1625                                  */
1626                                 error = VOP_LOOKUP(dvp, name, &tvp,
1627                                     NULL, 0, NULL, cr, NULL, NULL, NULL);
1628 
1629                                 /*
1630                                  * Check to see if the file has been delegated
1631                                  * to a v4 client.  If so, then begin recall of
1632                                  * the delegation and return JUKEBOX to allow
1633                                  * the client to retrasmit its request.
1634                                  */
1635 
1636                                 trunc = va.va_size == 0;
1637                                 if (!error &&
1638                                     rfs4_check_delegated(FWRITE, tvp, trunc)) {
1639                                         resp->status = NFS3ERR_JUKEBOX;
1640                                         goto out1;
1641                                 }
1642 
1643                                 /*
1644                                  * Check for NBMAND lock conflicts
1645                                  */
1646                                 if (!error && nbl_need_check(tvp)) {
1647                                         u_offset_t offset;
1648                                         ssize_t len;
1649 
1650                                         nbl_start_crit(tvp, RW_READER);
1651                                         in_crit = 1;
1652 
1653                                         tva.va_mask = AT_SIZE;
1654                                         error = VOP_GETATTR(tvp, &tva, 0, cr,
1655                                             NULL);
1656                                         /*
1657                                          * Can't check for conflicts, so return
1658                                          * error.
1659                                          */
1660                                         if (error)
1661                                                 goto out;
1662 
1663                                         offset = tva.va_size < va.va_size ?
1664                                             tva.va_size : va.va_size;
1665                                         len = tva.va_size < va.va_size ?
1666                                             va.va_size - tva.va_size :
1667                                             tva.va_size - va.va_size;
1668                                         if (nbl_conflict(tvp, NBL_WRITE,
1669                                             offset, len, 0, NULL)) {
1670                                                 error = EACCES;
1671                                                 goto out;
1672                                         }
1673                                 } else if (tvp) {
1674                                         VN_RELE(tvp);
1675                                         tvp = NULL;
1676                                 }
1677                         }
1678                 }
1679                 if (va.va_mask & AT_SIZE)
1680                         reqsize = va.va_size;
1681         }
1682 
1683         /*
1684          * Must specify the mode.
1685          */
1686         if (!(va.va_mask & AT_MODE)) {
1687                 resp->status = NFS3ERR_INVAL;
1688                 goto out1;
1689         }
1690 
1691         /*
1692          * If the filesystem is exported with nosuid, then mask off
1693          * the setuid and setgid bits.
1694          */
1695         if (va.va_type == VREG && (exi->exi_export.ex_flags & EX_NOSUID))
1696                 va.va_mode &= ~(VSUID | VSGID);
1697 
1698 tryagain:
1699         /*
1700          * The file open mode used is VWRITE.  If the client needs
1701          * some other semantic, then it should do the access checking
1702          * itself.  It would have been nice to have the file open mode
1703          * passed as part of the arguments.
1704          */
1705         error = VOP_CREATE(dvp, name, &va, excl, VWRITE,
1706             &vp, cr, 0, NULL, NULL);
1707 
1708         dava.va_mask = AT_ALL;
1709         davap = VOP_GETATTR(dvp, &dava, 0, cr, NULL) ? NULL : &dava;
1710 
1711         if (error) {
1712                 /*
1713                  * If we got something other than file already exists
1714                  * then just return this error.  Otherwise, we got
1715                  * EEXIST.  If we were doing a GUARDED create, then
1716                  * just return this error.  Otherwise, we need to
1717                  * make sure that this wasn't a duplicate of an
1718                  * exclusive create request.
1719                  *
1720                  * The assumption is made that a non-exclusive create
1721                  * request will never return EEXIST.
1722                  */
1723                 if (error != EEXIST || args->how.mode == GUARDED)
1724                         goto out;
1725                 /*
1726                  * Lookup the file so that we can get a vnode for it.
1727                  */
1728                 error = VOP_LOOKUP(dvp, name, &vp, NULL, 0,
1729                     NULL, cr, NULL, NULL, NULL);
1730                 if (error) {
1731                         /*
1732                          * We couldn't find the file that we thought that
1733                          * we just created.  So, we'll just try creating
1734                          * it again.
1735                          */
1736                         if (error == ENOENT)
1737                                 goto tryagain;
1738                         goto out;
1739                 }
1740 
1741                 /*
1742                  * If the file is delegated to a v4 client, go ahead
1743                  * and initiate recall, this create is a hint that a
1744                  * conflicting v3 open has occurred.
1745                  */
1746 
1747                 if (rfs4_check_delegated(FWRITE, vp, FALSE)) {
1748                         VN_RELE(vp);
1749                         resp->status = NFS3ERR_JUKEBOX;
1750                         goto out1;
1751                 }
1752 
1753                 va.va_mask = AT_ALL;
1754                 vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
1755 
1756                 mtime = (nfstime3 *)&args->how.createhow3_u.verf;
1757                 /* % with INT32_MAX to prevent overflows */
1758                 if (args->how.mode == EXCLUSIVE && (vap == NULL ||
1759                     vap->va_mtime.tv_sec !=
1760                     (mtime->seconds % INT32_MAX) ||
1761                     vap->va_mtime.tv_nsec != mtime->nseconds)) {
1762                         VN_RELE(vp);
1763                         error = EEXIST;
1764                         goto out;
1765                 }
1766         } else {
1767 
1768                 if ((args->how.mode == UNCHECKED ||
1769                     args->how.mode == GUARDED) &&
1770                     args->how.createhow3_u.obj_attributes.size.set_it &&
1771                     va.va_size == 0)
1772                         trunc = TRUE;
1773                 else
1774                         trunc = FALSE;
1775 
1776                 if (rfs4_check_delegated(FWRITE, vp, trunc)) {
1777                         VN_RELE(vp);
1778                         resp->status = NFS3ERR_JUKEBOX;
1779                         goto out1;
1780                 }
1781 
1782                 va.va_mask = AT_ALL;
1783                 vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
1784 
1785                 /*
1786                  * We need to check to make sure that the file got
1787                  * created to the indicated size.  If not, we do a
1788                  * setattr to try to change the size, but we don't
1789                  * try too hard.  This shouldn't a problem as most
1790                  * clients will only specifiy a size of zero which
1791                  * local file systems handle.  However, even if
1792                  * the client does specify a non-zero size, it can
1793                  * still recover by checking the size of the file
1794                  * after it has created it and then issue a setattr
1795                  * request of its own to set the size of the file.
1796                  */
1797                 if (vap != NULL &&
1798                     (args->how.mode == UNCHECKED ||
1799                     args->how.mode == GUARDED) &&
1800                     args->how.createhow3_u.obj_attributes.size.set_it &&
1801                     vap->va_size != reqsize) {
1802                         va.va_mask = AT_SIZE;
1803                         va.va_size = reqsize;
1804                         (void) VOP_SETATTR(vp, &va, 0, cr, NULL);
1805                         va.va_mask = AT_ALL;
1806                         vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
1807                 }
1808         }
1809 
1810         if (name != args->where.name)
1811                 kmem_free(name, MAXPATHLEN + 1);
1812 
1813         error = makefh3(&resp->resok.obj.handle, vp, exi);
1814         if (error)
1815                 resp->resok.obj.handle_follows = FALSE;
1816         else
1817                 resp->resok.obj.handle_follows = TRUE;
1818 
1819         /*
1820          * Force modified data and metadata out to stable storage.
1821          */
1822         (void) VOP_FSYNC(vp, FNODSYNC, cr, NULL);
1823         (void) VOP_FSYNC(dvp, 0, cr, NULL);
1824 
1825         VN_RELE(vp);
1826         if (tvp != NULL) {
1827                 if (in_crit)
1828                         nbl_end_crit(tvp);
1829                 VN_RELE(tvp);
1830         }
1831 
1832         resp->status = NFS3_OK;
1833         vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
1834         vattr_to_wcc_data(dbvap, davap, &resp->resok.dir_wcc);
1835 
1836         DTRACE_NFSV3_4(op__create__done, struct svc_req *, req,
1837             cred_t *, cr, vnode_t *, dvp, CREATE3res *, resp);
1838 
1839         VN_RELE(dvp);
1840         return;
1841 
1842 out:
1843         if (curthread->t_flag & T_WOULDBLOCK) {
1844                 curthread->t_flag &= ~T_WOULDBLOCK;
1845                 resp->status = NFS3ERR_JUKEBOX;
1846         } else
1847                 resp->status = puterrno3(error);
1848 out1:
1849         DTRACE_NFSV3_4(op__create__done, struct svc_req *, req,
1850             cred_t *, cr, vnode_t *, dvp, CREATE3res *, resp);
1851 
1852         if (name != NULL && name != args->where.name)
1853                 kmem_free(name, MAXPATHLEN + 1);
1854 
1855         if (tvp != NULL) {
1856                 if (in_crit)
1857                         nbl_end_crit(tvp);
1858                 VN_RELE(tvp);
1859         }
1860         if (dvp != NULL)
1861                 VN_RELE(dvp);
1862         vattr_to_wcc_data(dbvap, davap, &resp->resfail.dir_wcc);
1863 }
1864 
1865 void *
1866 rfs3_create_getfh(CREATE3args *args)
1867 {
1868 
1869         return (&args->where.dir);
1870 }
1871 
1872 void
1873 rfs3_mkdir(MKDIR3args *args, MKDIR3res *resp, struct exportinfo *exi,
1874         struct svc_req *req, cred_t *cr)
1875 {
1876         int error;
1877         vnode_t *vp = NULL;
1878         vnode_t *dvp;
1879         struct vattr *vap;
1880         struct vattr va;
1881         struct vattr *dbvap;
1882         struct vattr dbva;
1883         struct vattr *davap;
1884         struct vattr dava;
1885         struct sockaddr *ca;
1886         char *name = NULL;
1887 
1888         dbvap = NULL;
1889         davap = NULL;
1890 
1891         dvp = nfs3_fhtovp(&args->where.dir, exi);
1892 
1893         DTRACE_NFSV3_4(op__mkdir__start, struct svc_req *, req,
1894             cred_t *, cr, vnode_t *, dvp, MKDIR3args *, args);
1895 
1896         if (dvp == NULL) {
1897                 error = ESTALE;
1898                 goto out;
1899         }
1900 
1901         dbva.va_mask = AT_ALL;
1902         dbvap = VOP_GETATTR(dvp, &dbva, 0, cr, NULL) ? NULL : &dbva;
1903         davap = dbvap;
1904 
1905         if (args->where.name == nfs3nametoolong) {
1906                 resp->status = NFS3ERR_NAMETOOLONG;
1907                 goto out1;
1908         }
1909 
1910         if (args->where.name == NULL || *(args->where.name) == '\0') {
1911                 resp->status = NFS3ERR_ACCES;
1912                 goto out1;
1913         }
1914 
1915         if (rdonly(exi, req)) {
1916                 resp->status = NFS3ERR_ROFS;
1917                 goto out1;
1918         }
1919 
1920         if (is_system_labeled()) {
1921                 bslabel_t *clabel = req->rq_label;
1922 
1923                 ASSERT(clabel != NULL);
1924                 DTRACE_PROBE2(tx__rfs3__log__info__opmkdir__clabel, char *,
1925                     "got client label from request(1)", struct svc_req *, req);
1926 
1927                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
1928                         if (!do_rfs_label_check(clabel, dvp, EQUALITY_CHECK,
1929                             exi)) {
1930                                 resp->status = NFS3ERR_ACCES;
1931                                 goto out1;
1932                         }
1933                 }
1934         }
1935 
1936         error = sattr3_to_vattr(&args->attributes, &va);
1937         if (error)
1938                 goto out;
1939 
1940         if (!(va.va_mask & AT_MODE)) {
1941                 resp->status = NFS3ERR_INVAL;
1942                 goto out1;
1943         }
1944 
1945         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
1946         name = nfscmd_convname(ca, exi, args->where.name,
1947             NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
1948 
1949         if (name == NULL) {
1950                 resp->status = NFS3ERR_INVAL;
1951                 goto out1;
1952         }
1953 
1954         va.va_mask |= AT_TYPE;
1955         va.va_type = VDIR;
1956 
1957         error = VOP_MKDIR(dvp, name, &va, &vp, cr, NULL, 0, NULL);
1958 
1959         if (name != args->where.name)
1960                 kmem_free(name, MAXPATHLEN + 1);
1961 
1962         dava.va_mask = AT_ALL;
1963         davap = VOP_GETATTR(dvp, &dava, 0, cr, NULL) ? NULL : &dava;
1964 
1965         /*
1966          * Force modified data and metadata out to stable storage.
1967          */
1968         (void) VOP_FSYNC(dvp, 0, cr, NULL);
1969 
1970         if (error)
1971                 goto out;
1972 
1973         error = makefh3(&resp->resok.obj.handle, vp, exi);
1974         if (error)
1975                 resp->resok.obj.handle_follows = FALSE;
1976         else
1977                 resp->resok.obj.handle_follows = TRUE;
1978 
1979         va.va_mask = AT_ALL;
1980         vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
1981 
1982         /*
1983          * Force modified data and metadata out to stable storage.
1984          */
1985         (void) VOP_FSYNC(vp, 0, cr, NULL);
1986 
1987         VN_RELE(vp);
1988 
1989         resp->status = NFS3_OK;
1990         vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
1991         vattr_to_wcc_data(dbvap, davap, &resp->resok.dir_wcc);
1992 
1993         DTRACE_NFSV3_4(op__mkdir__done, struct svc_req *, req,
1994             cred_t *, cr, vnode_t *, dvp, MKDIR3res *, resp);
1995         VN_RELE(dvp);
1996 
1997         return;
1998 
1999 out:
2000         if (curthread->t_flag & T_WOULDBLOCK) {
2001                 curthread->t_flag &= ~T_WOULDBLOCK;
2002                 resp->status = NFS3ERR_JUKEBOX;
2003         } else
2004                 resp->status = puterrno3(error);
2005 out1:
2006         DTRACE_NFSV3_4(op__mkdir__done, struct svc_req *, req,
2007             cred_t *, cr, vnode_t *, dvp, MKDIR3res *, resp);
2008         if (dvp != NULL)
2009                 VN_RELE(dvp);
2010         vattr_to_wcc_data(dbvap, davap, &resp->resfail.dir_wcc);
2011 }
2012 
2013 void *
2014 rfs3_mkdir_getfh(MKDIR3args *args)
2015 {
2016 
2017         return (&args->where.dir);
2018 }
2019 
2020 void
2021 rfs3_symlink(SYMLINK3args *args, SYMLINK3res *resp, struct exportinfo *exi,
2022         struct svc_req *req, cred_t *cr)
2023 {
2024         int error;
2025         vnode_t *vp;
2026         vnode_t *dvp;
2027         struct vattr *vap;
2028         struct vattr va;
2029         struct vattr *dbvap;
2030         struct vattr dbva;
2031         struct vattr *davap;
2032         struct vattr dava;
2033         struct sockaddr *ca;
2034         char *name = NULL;
2035         char *symdata = NULL;
2036 
2037         dbvap = NULL;
2038         davap = NULL;
2039 
2040         dvp = nfs3_fhtovp(&args->where.dir, exi);
2041 
2042         DTRACE_NFSV3_4(op__symlink__start, struct svc_req *, req,
2043             cred_t *, cr, vnode_t *, dvp, SYMLINK3args *, args);
2044 
2045         if (dvp == NULL) {
2046                 error = ESTALE;
2047                 goto err;
2048         }
2049 
2050         dbva.va_mask = AT_ALL;
2051         dbvap = VOP_GETATTR(dvp, &dbva, 0, cr, NULL) ? NULL : &dbva;
2052         davap = dbvap;
2053 
2054         if (args->where.name == nfs3nametoolong) {
2055                 resp->status = NFS3ERR_NAMETOOLONG;
2056                 goto err1;
2057         }
2058 
2059         if (args->where.name == NULL || *(args->where.name) == '\0') {
2060                 resp->status = NFS3ERR_ACCES;
2061                 goto err1;
2062         }
2063 
2064         if (rdonly(exi, req)) {
2065                 resp->status = NFS3ERR_ROFS;
2066                 goto err1;
2067         }
2068 
2069         if (is_system_labeled()) {
2070                 bslabel_t *clabel = req->rq_label;
2071 
2072                 ASSERT(clabel != NULL);
2073                 DTRACE_PROBE2(tx__rfs3__log__info__opsymlink__clabel, char *,
2074                     "got client label from request(1)", struct svc_req *, req);
2075 
2076                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
2077                         if (!do_rfs_label_check(clabel, dvp, EQUALITY_CHECK,
2078                             exi)) {
2079                                 resp->status = NFS3ERR_ACCES;
2080                                 goto err1;
2081                         }
2082                 }
2083         }
2084 
2085         error = sattr3_to_vattr(&args->symlink.symlink_attributes, &va);
2086         if (error)
2087                 goto err;
2088 
2089         if (!(va.va_mask & AT_MODE)) {
2090                 resp->status = NFS3ERR_INVAL;
2091                 goto err1;
2092         }
2093 
2094         if (args->symlink.symlink_data == nfs3nametoolong) {
2095                 resp->status = NFS3ERR_NAMETOOLONG;
2096                 goto err1;
2097         }
2098 
2099         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2100         name = nfscmd_convname(ca, exi, args->where.name,
2101             NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
2102 
2103         if (name == NULL) {
2104                 /* This is really a Solaris EILSEQ */
2105                 resp->status = NFS3ERR_INVAL;
2106                 goto err1;
2107         }
2108 
2109         symdata = nfscmd_convname(ca, exi, args->symlink.symlink_data,
2110             NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
2111         if (symdata == NULL) {
2112                 /* This is really a Solaris EILSEQ */
2113                 resp->status = NFS3ERR_INVAL;
2114                 goto err1;
2115         }
2116 
2117 
2118         va.va_mask |= AT_TYPE;
2119         va.va_type = VLNK;
2120 
2121         error = VOP_SYMLINK(dvp, name, &va, symdata, cr, NULL, 0);
2122 
2123         dava.va_mask = AT_ALL;
2124         davap = VOP_GETATTR(dvp, &dava, 0, cr, NULL) ? NULL : &dava;
2125 
2126         if (error)
2127                 goto err;
2128 
2129         error = VOP_LOOKUP(dvp, name, &vp, NULL, 0, NULL, cr,
2130             NULL, NULL, NULL);
2131 
2132         /*
2133          * Force modified data and metadata out to stable storage.
2134          */
2135         (void) VOP_FSYNC(dvp, 0, cr, NULL);
2136 
2137 
2138         resp->status = NFS3_OK;
2139         if (error) {
2140                 resp->resok.obj.handle_follows = FALSE;
2141                 vattr_to_post_op_attr(NULL, &resp->resok.obj_attributes);
2142                 vattr_to_wcc_data(dbvap, davap, &resp->resok.dir_wcc);
2143                 goto out;
2144         }
2145 
2146         error = makefh3(&resp->resok.obj.handle, vp, exi);
2147         if (error)
2148                 resp->resok.obj.handle_follows = FALSE;
2149         else
2150                 resp->resok.obj.handle_follows = TRUE;
2151 
2152         va.va_mask = AT_ALL;
2153         vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
2154 
2155         /*
2156          * Force modified data and metadata out to stable storage.
2157          */
2158         (void) VOP_FSYNC(vp, 0, cr, NULL);
2159 
2160         VN_RELE(vp);
2161 
2162         vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
2163         vattr_to_wcc_data(dbvap, davap, &resp->resok.dir_wcc);
2164         goto out;
2165 
2166 err:
2167         if (curthread->t_flag & T_WOULDBLOCK) {
2168                 curthread->t_flag &= ~T_WOULDBLOCK;
2169                 resp->status = NFS3ERR_JUKEBOX;
2170         } else
2171                 resp->status = puterrno3(error);
2172 err1:
2173         vattr_to_wcc_data(dbvap, davap, &resp->resfail.dir_wcc);
2174 out:
2175         if (name != NULL && name != args->where.name)
2176                 kmem_free(name, MAXPATHLEN + 1);
2177         if (symdata != NULL && symdata != args->symlink.symlink_data)
2178                 kmem_free(symdata, MAXPATHLEN + 1);
2179 
2180         DTRACE_NFSV3_4(op__symlink__done, struct svc_req *, req,
2181             cred_t *, cr, vnode_t *, dvp, SYMLINK3res *, resp);
2182 
2183         if (dvp != NULL)
2184                 VN_RELE(dvp);
2185 }
2186 
2187 void *
2188 rfs3_symlink_getfh(SYMLINK3args *args)
2189 {
2190 
2191         return (&args->where.dir);
2192 }
2193 
2194 void
2195 rfs3_mknod(MKNOD3args *args, MKNOD3res *resp, struct exportinfo *exi,
2196         struct svc_req *req, cred_t *cr)
2197 {
2198         int error;
2199         vnode_t *vp;
2200         vnode_t *realvp;
2201         vnode_t *dvp;
2202         struct vattr *vap;
2203         struct vattr va;
2204         struct vattr *dbvap;
2205         struct vattr dbva;
2206         struct vattr *davap;
2207         struct vattr dava;
2208         int mode;
2209         enum vcexcl excl;
2210         struct sockaddr *ca;
2211         char *name = NULL;
2212 
2213         dbvap = NULL;
2214         davap = NULL;
2215 
2216         dvp = nfs3_fhtovp(&args->where.dir, exi);
2217 
2218         DTRACE_NFSV3_4(op__mknod__start, struct svc_req *, req,
2219             cred_t *, cr, vnode_t *, dvp, MKNOD3args *, args);
2220 
2221         if (dvp == NULL) {
2222                 error = ESTALE;
2223                 goto out;
2224         }
2225 
2226         dbva.va_mask = AT_ALL;
2227         dbvap = VOP_GETATTR(dvp, &dbva, 0, cr, NULL) ? NULL : &dbva;
2228         davap = dbvap;
2229 
2230         if (args->where.name == nfs3nametoolong) {
2231                 resp->status = NFS3ERR_NAMETOOLONG;
2232                 goto out1;
2233         }
2234 
2235         if (args->where.name == NULL || *(args->where.name) == '\0') {
2236                 resp->status = NFS3ERR_ACCES;
2237                 goto out1;
2238         }
2239 
2240         if (rdonly(exi, req)) {
2241                 resp->status = NFS3ERR_ROFS;
2242                 goto out1;
2243         }
2244 
2245         if (is_system_labeled()) {
2246                 bslabel_t *clabel = req->rq_label;
2247 
2248                 ASSERT(clabel != NULL);
2249                 DTRACE_PROBE2(tx__rfs3__log__info__opmknod__clabel, char *,
2250                     "got client label from request(1)", struct svc_req *, req);
2251 
2252                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
2253                         if (!do_rfs_label_check(clabel, dvp, EQUALITY_CHECK,
2254                             exi)) {
2255                                 resp->status = NFS3ERR_ACCES;
2256                                 goto out1;
2257                         }
2258                 }
2259         }
2260 
2261         switch (args->what.type) {
2262         case NF3CHR:
2263         case NF3BLK:
2264                 error = sattr3_to_vattr(
2265                     &args->what.mknoddata3_u.device.dev_attributes, &va);
2266                 if (error)
2267                         goto out;
2268                 if (secpolicy_sys_devices(cr) != 0) {
2269                         resp->status = NFS3ERR_PERM;
2270                         goto out1;
2271                 }
2272                 if (args->what.type == NF3CHR)
2273                         va.va_type = VCHR;
2274                 else
2275                         va.va_type = VBLK;
2276                 va.va_rdev = makedevice(
2277                     args->what.mknoddata3_u.device.spec.specdata1,
2278                     args->what.mknoddata3_u.device.spec.specdata2);
2279                 va.va_mask |= AT_TYPE | AT_RDEV;
2280                 break;
2281         case NF3SOCK:
2282                 error = sattr3_to_vattr(
2283                     &args->what.mknoddata3_u.pipe_attributes, &va);
2284                 if (error)
2285                         goto out;
2286                 va.va_type = VSOCK;
2287                 va.va_mask |= AT_TYPE;
2288                 break;
2289         case NF3FIFO:
2290                 error = sattr3_to_vattr(
2291                     &args->what.mknoddata3_u.pipe_attributes, &va);
2292                 if (error)
2293                         goto out;
2294                 va.va_type = VFIFO;
2295                 va.va_mask |= AT_TYPE;
2296                 break;
2297         default:
2298                 resp->status = NFS3ERR_BADTYPE;
2299                 goto out1;
2300         }
2301 
2302         /*
2303          * Must specify the mode.
2304          */
2305         if (!(va.va_mask & AT_MODE)) {
2306                 resp->status = NFS3ERR_INVAL;
2307                 goto out1;
2308         }
2309 
2310         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2311         name = nfscmd_convname(ca, exi, args->where.name,
2312             NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
2313 
2314         if (name == NULL) {
2315                 resp->status = NFS3ERR_INVAL;
2316                 goto out1;
2317         }
2318 
2319         excl = EXCL;
2320 
2321         mode = 0;
2322 
2323         error = VOP_CREATE(dvp, name, &va, excl, mode,
2324             &vp, cr, 0, NULL, NULL);
2325 
2326         if (name != args->where.name)
2327                 kmem_free(name, MAXPATHLEN + 1);
2328 
2329         dava.va_mask = AT_ALL;
2330         davap = VOP_GETATTR(dvp, &dava, 0, cr, NULL) ? NULL : &dava;
2331 
2332         /*
2333          * Force modified data and metadata out to stable storage.
2334          */
2335         (void) VOP_FSYNC(dvp, 0, cr, NULL);
2336 
2337         if (error)
2338                 goto out;
2339 
2340         resp->status = NFS3_OK;
2341 
2342         error = makefh3(&resp->resok.obj.handle, vp, exi);
2343         if (error)
2344                 resp->resok.obj.handle_follows = FALSE;
2345         else
2346                 resp->resok.obj.handle_follows = TRUE;
2347 
2348         va.va_mask = AT_ALL;
2349         vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
2350 
2351         /*
2352          * Force modified metadata out to stable storage.
2353          *
2354          * if a underlying vp exists, pass it to VOP_FSYNC
2355          */
2356         if (VOP_REALVP(vp, &realvp, NULL) == 0)
2357                 (void) VOP_FSYNC(realvp, FNODSYNC, cr, NULL);
2358         else
2359                 (void) VOP_FSYNC(vp, FNODSYNC, cr, NULL);
2360 
2361         VN_RELE(vp);
2362 
2363         vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
2364         vattr_to_wcc_data(dbvap, davap, &resp->resok.dir_wcc);
2365         DTRACE_NFSV3_4(op__mknod__done, struct svc_req *, req,
2366             cred_t *, cr, vnode_t *, dvp, MKNOD3res *, resp);
2367         VN_RELE(dvp);
2368         return;
2369 
2370 out:
2371         if (curthread->t_flag & T_WOULDBLOCK) {
2372                 curthread->t_flag &= ~T_WOULDBLOCK;
2373                 resp->status = NFS3ERR_JUKEBOX;
2374         } else
2375                 resp->status = puterrno3(error);
2376 out1:
2377         DTRACE_NFSV3_4(op__mknod__done, struct svc_req *, req,
2378             cred_t *, cr, vnode_t *, dvp, MKNOD3res *, resp);
2379         if (dvp != NULL)
2380                 VN_RELE(dvp);
2381         vattr_to_wcc_data(dbvap, davap, &resp->resfail.dir_wcc);
2382 }
2383 
2384 void *
2385 rfs3_mknod_getfh(MKNOD3args *args)
2386 {
2387 
2388         return (&args->where.dir);
2389 }
2390 
2391 void
2392 rfs3_remove(REMOVE3args *args, REMOVE3res *resp, struct exportinfo *exi,
2393         struct svc_req *req, cred_t *cr)
2394 {
2395         int error = 0;
2396         vnode_t *vp;
2397         struct vattr *bvap;
2398         struct vattr bva;
2399         struct vattr *avap;
2400         struct vattr ava;
2401         vnode_t *targvp = NULL;
2402         struct sockaddr *ca;
2403         char *name = NULL;
2404 
2405         bvap = NULL;
2406         avap = NULL;
2407 
2408         vp = nfs3_fhtovp(&args->object.dir, exi);
2409 
2410         DTRACE_NFSV3_4(op__remove__start, struct svc_req *, req,
2411             cred_t *, cr, vnode_t *, vp, REMOVE3args *, args);
2412 
2413         if (vp == NULL) {
2414                 error = ESTALE;
2415                 goto err;
2416         }
2417 
2418         bva.va_mask = AT_ALL;
2419         bvap = VOP_GETATTR(vp, &bva, 0, cr, NULL) ? NULL : &bva;
2420         avap = bvap;
2421 
2422         if (vp->v_type != VDIR) {
2423                 resp->status = NFS3ERR_NOTDIR;
2424                 goto err1;
2425         }
2426 
2427         if (args->object.name == nfs3nametoolong) {
2428                 resp->status = NFS3ERR_NAMETOOLONG;
2429                 goto err1;
2430         }
2431 
2432         if (args->object.name == NULL || *(args->object.name) == '\0') {
2433                 resp->status = NFS3ERR_ACCES;
2434                 goto err1;
2435         }
2436 
2437         if (rdonly(exi, req)) {
2438                 resp->status = NFS3ERR_ROFS;
2439                 goto err1;
2440         }
2441 
2442         if (is_system_labeled()) {
2443                 bslabel_t *clabel = req->rq_label;
2444 
2445                 ASSERT(clabel != NULL);
2446                 DTRACE_PROBE2(tx__rfs3__log__info__opremove__clabel, char *,
2447                     "got client label from request(1)", struct svc_req *, req);
2448 
2449                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
2450                         if (!do_rfs_label_check(clabel, vp, EQUALITY_CHECK,
2451                             exi)) {
2452                                 resp->status = NFS3ERR_ACCES;
2453                                 goto err1;
2454                         }
2455                 }
2456         }
2457 
2458         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2459         name = nfscmd_convname(ca, exi, args->object.name,
2460             NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
2461 
2462         if (name == NULL) {
2463                 resp->status = NFS3ERR_INVAL;
2464                 goto err1;
2465         }
2466 
2467         /*
2468          * Check for a conflict with a non-blocking mandatory share
2469          * reservation and V4 delegations
2470          */
2471         error = VOP_LOOKUP(vp, name, &targvp, NULL, 0,
2472             NULL, cr, NULL, NULL, NULL);
2473         if (error != 0)
2474                 goto err;
2475 
2476         if (rfs4_check_delegated(FWRITE, targvp, TRUE)) {
2477                 resp->status = NFS3ERR_JUKEBOX;
2478                 goto err1;
2479         }
2480 
2481         if (!nbl_need_check(targvp)) {
2482                 error = VOP_REMOVE(vp, name, cr, NULL, 0);
2483         } else {
2484                 nbl_start_crit(targvp, RW_READER);
2485                 if (nbl_conflict(targvp, NBL_REMOVE, 0, 0, 0, NULL)) {
2486                         error = EACCES;
2487                 } else {
2488                         error = VOP_REMOVE(vp, name, cr, NULL, 0);
2489                 }
2490                 nbl_end_crit(targvp);
2491         }
2492         VN_RELE(targvp);
2493         targvp = NULL;
2494 
2495         ava.va_mask = AT_ALL;
2496         avap = VOP_GETATTR(vp, &ava, 0, cr, NULL) ? NULL : &ava;
2497 
2498         /*
2499          * Force modified data and metadata out to stable storage.
2500          */
2501         (void) VOP_FSYNC(vp, 0, cr, NULL);
2502 
2503         if (error)
2504                 goto err;
2505 
2506         resp->status = NFS3_OK;
2507         vattr_to_wcc_data(bvap, avap, &resp->resok.dir_wcc);
2508         goto out;
2509 
2510 err:
2511         if (curthread->t_flag & T_WOULDBLOCK) {
2512                 curthread->t_flag &= ~T_WOULDBLOCK;
2513                 resp->status = NFS3ERR_JUKEBOX;
2514         } else
2515                 resp->status = puterrno3(error);
2516 err1:
2517         vattr_to_wcc_data(bvap, avap, &resp->resfail.dir_wcc);
2518 out:
2519         DTRACE_NFSV3_4(op__remove__done, struct svc_req *, req,
2520             cred_t *, cr, vnode_t *, vp, REMOVE3res *, resp);
2521 
2522         if (name != NULL && name != args->object.name)
2523                 kmem_free(name, MAXPATHLEN + 1);
2524 
2525         if (vp != NULL)
2526                 VN_RELE(vp);
2527 }
2528 
2529 void *
2530 rfs3_remove_getfh(REMOVE3args *args)
2531 {
2532 
2533         return (&args->object.dir);
2534 }
2535 
2536 void
2537 rfs3_rmdir(RMDIR3args *args, RMDIR3res *resp, struct exportinfo *exi,
2538         struct svc_req *req, cred_t *cr)
2539 {
2540         int error;
2541         vnode_t *vp;
2542         struct vattr *bvap;
2543         struct vattr bva;
2544         struct vattr *avap;
2545         struct vattr ava;
2546         struct sockaddr *ca;
2547         char *name = NULL;
2548 
2549         bvap = NULL;
2550         avap = NULL;
2551 
2552         vp = nfs3_fhtovp(&args->object.dir, exi);
2553 
2554         DTRACE_NFSV3_4(op__rmdir__start, struct svc_req *, req,
2555             cred_t *, cr, vnode_t *, vp, RMDIR3args *, args);
2556 
2557         if (vp == NULL) {
2558                 error = ESTALE;
2559                 goto err;
2560         }
2561 
2562         bva.va_mask = AT_ALL;
2563         bvap = VOP_GETATTR(vp, &bva, 0, cr, NULL) ? NULL : &bva;
2564         avap = bvap;
2565 
2566         if (vp->v_type != VDIR) {
2567                 resp->status = NFS3ERR_NOTDIR;
2568                 goto err1;
2569         }
2570 
2571         if (args->object.name == nfs3nametoolong) {
2572                 resp->status = NFS3ERR_NAMETOOLONG;
2573                 goto err1;
2574         }
2575 
2576         if (args->object.name == NULL || *(args->object.name) == '\0') {
2577                 resp->status = NFS3ERR_ACCES;
2578                 goto err1;
2579         }
2580 
2581         if (rdonly(exi, req)) {
2582                 resp->status = NFS3ERR_ROFS;
2583                 goto err1;
2584         }
2585 
2586         if (is_system_labeled()) {
2587                 bslabel_t *clabel = req->rq_label;
2588 
2589                 ASSERT(clabel != NULL);
2590                 DTRACE_PROBE2(tx__rfs3__log__info__opremovedir__clabel, char *,
2591                     "got client label from request(1)", struct svc_req *, req);
2592 
2593                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
2594                         if (!do_rfs_label_check(clabel, vp, EQUALITY_CHECK,
2595                             exi)) {
2596                                 resp->status = NFS3ERR_ACCES;
2597                                 goto err1;
2598                         }
2599                 }
2600         }
2601 
2602         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2603         name = nfscmd_convname(ca, exi, args->object.name,
2604             NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
2605 
2606         if (name == NULL) {
2607                 resp->status = NFS3ERR_INVAL;
2608                 goto err1;
2609         }
2610 
2611         error = VOP_RMDIR(vp, name, rootdir, cr, NULL, 0);
2612 
2613         if (name != args->object.name)
2614                 kmem_free(name, MAXPATHLEN + 1);
2615 
2616         ava.va_mask = AT_ALL;
2617         avap = VOP_GETATTR(vp, &ava, 0, cr, NULL) ? NULL : &ava;
2618 
2619         /*
2620          * Force modified data and metadata out to stable storage.
2621          */
2622         (void) VOP_FSYNC(vp, 0, cr, NULL);
2623 
2624         if (error) {
2625                 /*
2626                  * System V defines rmdir to return EEXIST, not ENOTEMPTY,
2627                  * if the directory is not empty.  A System V NFS server
2628                  * needs to map NFS3ERR_EXIST to NFS3ERR_NOTEMPTY to transmit
2629                  * over the wire.
2630                  */
2631                 if (error == EEXIST)
2632                         error = ENOTEMPTY;
2633                 goto err;
2634         }
2635 
2636         resp->status = NFS3_OK;
2637         vattr_to_wcc_data(bvap, avap, &resp->resok.dir_wcc);
2638         goto out;
2639 
2640 err:
2641         if (curthread->t_flag & T_WOULDBLOCK) {
2642                 curthread->t_flag &= ~T_WOULDBLOCK;
2643                 resp->status = NFS3ERR_JUKEBOX;
2644         } else
2645                 resp->status = puterrno3(error);
2646 err1:
2647         vattr_to_wcc_data(bvap, avap, &resp->resfail.dir_wcc);
2648 out:
2649         DTRACE_NFSV3_4(op__rmdir__done, struct svc_req *, req,
2650             cred_t *, cr, vnode_t *, vp, RMDIR3res *, resp);
2651         if (vp != NULL)
2652                 VN_RELE(vp);
2653 
2654 }
2655 
2656 void *
2657 rfs3_rmdir_getfh(RMDIR3args *args)
2658 {
2659 
2660         return (&args->object.dir);
2661 }
2662 
2663 void
2664 rfs3_rename(RENAME3args *args, RENAME3res *resp, struct exportinfo *exi,
2665         struct svc_req *req, cred_t *cr)
2666 {
2667         int error = 0;
2668         vnode_t *fvp;
2669         vnode_t *tvp;
2670         vnode_t *targvp;
2671         struct vattr *fbvap;
2672         struct vattr fbva;
2673         struct vattr *favap;
2674         struct vattr fava;
2675         struct vattr *tbvap;
2676         struct vattr tbva;
2677         struct vattr *tavap;
2678         struct vattr tava;
2679         nfs_fh3 *fh3;
2680         struct exportinfo *to_exi;
2681         vnode_t *srcvp = NULL;
2682         bslabel_t *clabel;
2683         struct sockaddr *ca;
2684         char *name = NULL;
2685         char *toname = NULL;
2686 
2687         fbvap = NULL;
2688         favap = NULL;
2689         tbvap = NULL;
2690         tavap = NULL;
2691         tvp = NULL;
2692 
2693         fvp = nfs3_fhtovp(&args->from.dir, exi);
2694 
2695         DTRACE_NFSV3_4(op__rename__start, struct svc_req *, req,
2696             cred_t *, cr, vnode_t *, fvp, RENAME3args *, args);
2697 
2698         if (fvp == NULL) {
2699                 error = ESTALE;
2700                 goto err;
2701         }
2702 
2703         if (is_system_labeled()) {
2704                 clabel = req->rq_label;
2705                 ASSERT(clabel != NULL);
2706                 DTRACE_PROBE2(tx__rfs3__log__info__oprename__clabel, char *,
2707                     "got client label from request(1)", struct svc_req *, req);
2708 
2709                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
2710                         if (!do_rfs_label_check(clabel, fvp, EQUALITY_CHECK,
2711                             exi)) {
2712                                 resp->status = NFS3ERR_ACCES;
2713                                 goto err1;
2714                         }
2715                 }
2716         }
2717 
2718         fbva.va_mask = AT_ALL;
2719         fbvap = VOP_GETATTR(fvp, &fbva, 0, cr, NULL) ? NULL : &fbva;
2720         favap = fbvap;
2721 
2722         fh3 = &args->to.dir;
2723         to_exi = checkexport(&fh3->fh3_fsid, FH3TOXFIDP(fh3));
2724         if (to_exi == NULL) {
2725                 resp->status = NFS3ERR_ACCES;
2726                 goto err1;
2727         }
2728         exi_rele(to_exi);
2729 
2730         if (to_exi != exi) {
2731                 resp->status = NFS3ERR_XDEV;
2732                 goto err1;
2733         }
2734 
2735         tvp = nfs3_fhtovp(&args->to.dir, exi);
2736         if (tvp == NULL) {
2737                 error = ESTALE;
2738                 goto err;
2739         }
2740 
2741         tbva.va_mask = AT_ALL;
2742         tbvap = VOP_GETATTR(tvp, &tbva, 0, cr, NULL) ? NULL : &tbva;
2743         tavap = tbvap;
2744 
2745         if (fvp->v_type != VDIR || tvp->v_type != VDIR) {
2746                 resp->status = NFS3ERR_NOTDIR;
2747                 goto err1;
2748         }
2749 
2750         if (args->from.name == nfs3nametoolong ||
2751             args->to.name == nfs3nametoolong) {
2752                 resp->status = NFS3ERR_NAMETOOLONG;
2753                 goto err1;
2754         }
2755         if (args->from.name == NULL || *(args->from.name) == '\0' ||
2756             args->to.name == NULL || *(args->to.name) == '\0') {
2757                 resp->status = NFS3ERR_ACCES;
2758                 goto err1;
2759         }
2760 
2761         if (rdonly(exi, req)) {
2762                 resp->status = NFS3ERR_ROFS;
2763                 goto err1;
2764         }
2765 
2766         if (is_system_labeled()) {
2767                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
2768                         if (!do_rfs_label_check(clabel, tvp, EQUALITY_CHECK,
2769                             exi)) {
2770                                 resp->status = NFS3ERR_ACCES;
2771                                 goto err1;
2772                         }
2773                 }
2774         }
2775 
2776         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2777         name = nfscmd_convname(ca, exi, args->from.name,
2778             NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
2779 
2780         if (name == NULL) {
2781                 resp->status = NFS3ERR_INVAL;
2782                 goto err1;
2783         }
2784 
2785         toname = nfscmd_convname(ca, exi, args->to.name,
2786             NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
2787 
2788         if (toname == NULL) {
2789                 resp->status = NFS3ERR_INVAL;
2790                 goto err1;
2791         }
2792 
2793         /*
2794          * Check for a conflict with a non-blocking mandatory share
2795          * reservation or V4 delegations.
2796          */
2797         error = VOP_LOOKUP(fvp, name, &srcvp, NULL, 0,
2798             NULL, cr, NULL, NULL, NULL);
2799         if (error != 0)
2800                 goto err;
2801 
2802         /*
2803          * If we rename a delegated file we should recall the
2804          * delegation, since future opens should fail or would
2805          * refer to a new file.
2806          */
2807         if (rfs4_check_delegated(FWRITE, srcvp, FALSE)) {
2808                 resp->status = NFS3ERR_JUKEBOX;
2809                 goto err1;
2810         }
2811 
2812         /*
2813          * Check for renaming over a delegated file.  Check rfs4_deleg_policy
2814          * first to avoid VOP_LOOKUP if possible.
2815          */
2816         if (rfs4_deleg_policy != SRV_NEVER_DELEGATE &&
2817             VOP_LOOKUP(tvp, toname, &targvp, NULL, 0, NULL, cr,
2818             NULL, NULL, NULL) == 0) {
2819 
2820                 if (rfs4_check_delegated(FWRITE, targvp, TRUE)) {
2821                         VN_RELE(targvp);
2822                         resp->status = NFS3ERR_JUKEBOX;
2823                         goto err1;
2824                 }
2825                 VN_RELE(targvp);
2826         }
2827 
2828         if (!nbl_need_check(srcvp)) {
2829                 error = VOP_RENAME(fvp, name, tvp, toname, cr, NULL, 0);
2830         } else {
2831                 nbl_start_crit(srcvp, RW_READER);
2832                 if (nbl_conflict(srcvp, NBL_RENAME, 0, 0, 0, NULL))
2833                         error = EACCES;
2834                 else
2835                         error = VOP_RENAME(fvp, name, tvp, toname, cr, NULL, 0);
2836                 nbl_end_crit(srcvp);
2837         }
2838         if (error == 0)
2839                 vn_renamepath(tvp, srcvp, args->to.name,
2840                     strlen(args->to.name));
2841         VN_RELE(srcvp);
2842         srcvp = NULL;
2843 
2844         fava.va_mask = AT_ALL;
2845         favap = VOP_GETATTR(fvp, &fava, 0, cr, NULL) ? NULL : &fava;
2846         tava.va_mask = AT_ALL;
2847         tavap = VOP_GETATTR(tvp, &tava, 0, cr, NULL) ? NULL : &tava;
2848 
2849         /*
2850          * Force modified data and metadata out to stable storage.
2851          */
2852         (void) VOP_FSYNC(fvp, 0, cr, NULL);
2853         (void) VOP_FSYNC(tvp, 0, cr, NULL);
2854 
2855         if (error)
2856                 goto err;
2857 
2858         resp->status = NFS3_OK;
2859         vattr_to_wcc_data(fbvap, favap, &resp->resok.fromdir_wcc);
2860         vattr_to_wcc_data(tbvap, tavap, &resp->resok.todir_wcc);
2861         goto out;
2862 
2863 err:
2864         if (curthread->t_flag & T_WOULDBLOCK) {
2865                 curthread->t_flag &= ~T_WOULDBLOCK;
2866                 resp->status = NFS3ERR_JUKEBOX;
2867         } else {
2868                 resp->status = puterrno3(error);
2869         }
2870 err1:
2871         vattr_to_wcc_data(fbvap, favap, &resp->resfail.fromdir_wcc);
2872         vattr_to_wcc_data(tbvap, tavap, &resp->resfail.todir_wcc);
2873 
2874 out:
2875         if (name != NULL && name != args->from.name)
2876                 kmem_free(name, MAXPATHLEN + 1);
2877         if (toname != NULL && toname != args->to.name)
2878                 kmem_free(toname, MAXPATHLEN + 1);
2879 
2880         DTRACE_NFSV3_4(op__rename__done, struct svc_req *, req,
2881             cred_t *, cr, vnode_t *, fvp, RENAME3res *, resp);
2882         if (fvp != NULL)
2883                 VN_RELE(fvp);
2884         if (tvp != NULL)
2885                 VN_RELE(tvp);
2886 }
2887 
2888 void *
2889 rfs3_rename_getfh(RENAME3args *args)
2890 {
2891 
2892         return (&args->from.dir);
2893 }
2894 
2895 void
2896 rfs3_link(LINK3args *args, LINK3res *resp, struct exportinfo *exi,
2897         struct svc_req *req, cred_t *cr)
2898 {
2899         int error;
2900         vnode_t *vp;
2901         vnode_t *dvp;
2902         struct vattr *vap;
2903         struct vattr va;
2904         struct vattr *bvap;
2905         struct vattr bva;
2906         struct vattr *avap;
2907         struct vattr ava;
2908         nfs_fh3 *fh3;
2909         struct exportinfo *to_exi;
2910         bslabel_t *clabel;
2911         struct sockaddr *ca;
2912         char *name = NULL;
2913 
2914         vap = NULL;
2915         bvap = NULL;
2916         avap = NULL;
2917         dvp = NULL;
2918 
2919         vp = nfs3_fhtovp(&args->file, exi);
2920 
2921         DTRACE_NFSV3_4(op__link__start, struct svc_req *, req,
2922             cred_t *, cr, vnode_t *, vp, LINK3args *, args);
2923 
2924         if (vp == NULL) {
2925                 error = ESTALE;
2926                 goto out;
2927         }
2928 
2929         va.va_mask = AT_ALL;
2930         vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
2931 
2932         fh3 = &args->link.dir;
2933         to_exi = checkexport(&fh3->fh3_fsid, FH3TOXFIDP(fh3));
2934         if (to_exi == NULL) {
2935                 resp->status = NFS3ERR_ACCES;
2936                 goto out1;
2937         }
2938         exi_rele(to_exi);
2939 
2940         if (to_exi != exi) {
2941                 resp->status = NFS3ERR_XDEV;
2942                 goto out1;
2943         }
2944 
2945         if (is_system_labeled()) {
2946                 clabel = req->rq_label;
2947 
2948                 ASSERT(clabel != NULL);
2949                 DTRACE_PROBE2(tx__rfs3__log__info__oplink__clabel, char *,
2950                     "got client label from request(1)", struct svc_req *, req);
2951 
2952                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
2953                         if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
2954                             exi)) {
2955                                 resp->status = NFS3ERR_ACCES;
2956                                 goto out1;
2957                         }
2958                 }
2959         }
2960 
2961         dvp = nfs3_fhtovp(&args->link.dir, exi);
2962         if (dvp == NULL) {
2963                 error = ESTALE;
2964                 goto out;
2965         }
2966 
2967         bva.va_mask = AT_ALL;
2968         bvap = VOP_GETATTR(dvp, &bva, 0, cr, NULL) ? NULL : &bva;
2969 
2970         if (dvp->v_type != VDIR) {
2971                 resp->status = NFS3ERR_NOTDIR;
2972                 goto out1;
2973         }
2974 
2975         if (args->link.name == nfs3nametoolong) {
2976                 resp->status = NFS3ERR_NAMETOOLONG;
2977                 goto out1;
2978         }
2979 
2980         if (args->link.name == NULL || *(args->link.name) == '\0') {
2981                 resp->status = NFS3ERR_ACCES;
2982                 goto out1;
2983         }
2984 
2985         if (rdonly(exi, req)) {
2986                 resp->status = NFS3ERR_ROFS;
2987                 goto out1;
2988         }
2989 
2990         if (is_system_labeled()) {
2991                 DTRACE_PROBE2(tx__rfs3__log__info__oplinkdir__clabel, char *,
2992                     "got client label from request(1)", struct svc_req *, req);
2993 
2994                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
2995                         if (!do_rfs_label_check(clabel, dvp, EQUALITY_CHECK,
2996                             exi)) {
2997                                 resp->status = NFS3ERR_ACCES;
2998                                 goto out1;
2999                         }
3000                 }
3001         }
3002 
3003         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
3004         name = nfscmd_convname(ca, exi, args->link.name,
3005             NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
3006 
3007         if (name == NULL) {
3008                 resp->status = NFS3ERR_SERVERFAULT;
3009                 goto out1;
3010         }
3011 
3012         error = VOP_LINK(dvp, vp, name, cr, NULL, 0);
3013 
3014         va.va_mask = AT_ALL;
3015         vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3016         ava.va_mask = AT_ALL;
3017         avap = VOP_GETATTR(dvp, &ava, 0, cr, NULL) ? NULL : &ava;
3018 
3019         /*
3020          * Force modified data and metadata out to stable storage.
3021          */
3022         (void) VOP_FSYNC(vp, FNODSYNC, cr, NULL);
3023         (void) VOP_FSYNC(dvp, 0, cr, NULL);
3024 
3025         if (error)
3026                 goto out;
3027 
3028         VN_RELE(dvp);
3029 
3030         resp->status = NFS3_OK;
3031         vattr_to_post_op_attr(vap, &resp->resok.file_attributes);
3032         vattr_to_wcc_data(bvap, avap, &resp->resok.linkdir_wcc);
3033 
3034         DTRACE_NFSV3_4(op__link__done, struct svc_req *, req,
3035             cred_t *, cr, vnode_t *, vp, LINK3res *, resp);
3036 
3037         VN_RELE(vp);
3038 
3039         return;
3040 
3041 out:
3042         if (curthread->t_flag & T_WOULDBLOCK) {
3043                 curthread->t_flag &= ~T_WOULDBLOCK;
3044                 resp->status = NFS3ERR_JUKEBOX;
3045         } else
3046                 resp->status = puterrno3(error);
3047 out1:
3048         if (name != NULL && name != args->link.name)
3049                 kmem_free(name, MAXPATHLEN + 1);
3050 
3051         DTRACE_NFSV3_4(op__link__done, struct svc_req *, req,
3052             cred_t *, cr, vnode_t *, vp, LINK3res *, resp);
3053 
3054         if (vp != NULL)
3055                 VN_RELE(vp);
3056         if (dvp != NULL)
3057                 VN_RELE(dvp);
3058         vattr_to_post_op_attr(vap, &resp->resfail.file_attributes);
3059         vattr_to_wcc_data(bvap, avap, &resp->resfail.linkdir_wcc);
3060 }
3061 
3062 void *
3063 rfs3_link_getfh(LINK3args *args)
3064 {
3065 
3066         return (&args->file);
3067 }
3068 
3069 /*
3070  * This macro defines the size of a response which contains attribute
3071  * information and one directory entry (whose length is specified by
3072  * the macro parameter).  If the incoming request is larger than this,
3073  * then we are guaranteed to be able to return at one directory entry
3074  * if one exists.  Therefore, we do not need to check for
3075  * NFS3ERR_TOOSMALL if the requested size is larger then this.  If it
3076  * is not, then we need to check to make sure that this error does not
3077  * need to be returned.
3078  *
3079  * NFS3_READDIR_MIN_COUNT is comprised of following :
3080  *
3081  * status - 1 * BYTES_PER_XDR_UNIT
3082  * attr. flag - 1 * BYTES_PER_XDR_UNIT
3083  * cookie verifier - 2 * BYTES_PER_XDR_UNIT
3084  * attributes  - NFS3_SIZEOF_FATTR3 * BYTES_PER_XDR_UNIT
3085  * boolean - 1 * BYTES_PER_XDR_UNIT
3086  * file id - 2 * BYTES_PER_XDR_UNIT
3087  * directory name length - 1 * BYTES_PER_XDR_UNIT
3088  * cookie - 2 * BYTES_PER_XDR_UNIT
3089  * end of list - 1 * BYTES_PER_XDR_UNIT
3090  * end of file - 1 * BYTES_PER_XDR_UNIT
3091  * Name length of directory to the nearest byte
3092  */
3093 
3094 #define NFS3_READDIR_MIN_COUNT(length)  \
3095         ((1 + 1 + 2 + NFS3_SIZEOF_FATTR3 + 1 + 2 + 1 + 2 + 1 + 1) * \
3096                 BYTES_PER_XDR_UNIT + roundup((length), BYTES_PER_XDR_UNIT))
3097 
3098 /* ARGSUSED */
3099 void
3100 rfs3_readdir(READDIR3args *args, READDIR3res *resp, struct exportinfo *exi,
3101         struct svc_req *req, cred_t *cr)
3102 {
3103         int error;
3104         vnode_t *vp;
3105         struct vattr *vap;
3106         struct vattr va;
3107         struct iovec iov;
3108         struct uio uio;
3109         char *data;
3110         int iseof;
3111         int bufsize;
3112         int namlen;
3113         uint_t count;
3114         struct sockaddr *ca;
3115 
3116         vap = NULL;
3117 
3118         vp = nfs3_fhtovp(&args->dir, exi);
3119 
3120         DTRACE_NFSV3_4(op__readdir__start, struct svc_req *, req,
3121             cred_t *, cr, vnode_t *, vp, READDIR3args *, args);
3122 
3123         if (vp == NULL) {
3124                 error = ESTALE;
3125                 goto out;
3126         }
3127 
3128         if (is_system_labeled()) {
3129                 bslabel_t *clabel = req->rq_label;
3130 
3131                 ASSERT(clabel != NULL);
3132                 DTRACE_PROBE2(tx__rfs3__log__info__opreaddir__clabel, char *,
3133                     "got client label from request(1)", struct svc_req *, req);
3134 
3135                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
3136                         if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
3137                             exi)) {
3138                                 resp->status = NFS3ERR_ACCES;
3139                                 goto out1;
3140                         }
3141                 }
3142         }
3143 
3144         (void) VOP_RWLOCK(vp, V_WRITELOCK_FALSE, NULL);
3145 
3146         va.va_mask = AT_ALL;
3147         vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3148 
3149         if (vp->v_type != VDIR) {
3150                 resp->status = NFS3ERR_NOTDIR;
3151                 goto out1;
3152         }
3153 
3154         error = VOP_ACCESS(vp, VREAD, 0, cr, NULL);
3155         if (error)
3156                 goto out;
3157 
3158         /*
3159          * Now don't allow arbitrary count to alloc;
3160          * allow the maximum not to exceed rfs3_tsize()
3161          */
3162         if (args->count > rfs3_tsize(req))
3163                 args->count = rfs3_tsize(req);
3164 
3165         /*
3166          * Make sure that there is room to read at least one entry
3167          * if any are available.
3168          */
3169         if (args->count < DIRENT64_RECLEN(MAXNAMELEN))
3170                 count = DIRENT64_RECLEN(MAXNAMELEN);
3171         else
3172                 count = args->count;
3173 
3174         data = kmem_alloc(count, KM_SLEEP);
3175 
3176         iov.iov_base = data;
3177         iov.iov_len = count;
3178         uio.uio_iov = &iov;
3179         uio.uio_iovcnt = 1;
3180         uio.uio_segflg = UIO_SYSSPACE;
3181         uio.uio_extflg = UIO_COPY_CACHED;
3182         uio.uio_loffset = (offset_t)args->cookie;
3183         uio.uio_resid = count;
3184 
3185         error = VOP_READDIR(vp, &uio, cr, &iseof, NULL, 0);
3186 
3187         va.va_mask = AT_ALL;
3188         vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3189 
3190         if (error) {
3191                 kmem_free(data, count);
3192                 goto out;
3193         }
3194 
3195         /*
3196          * If the count was not large enough to be able to guarantee
3197          * to be able to return at least one entry, then need to
3198          * check to see if NFS3ERR_TOOSMALL should be returned.
3199          */
3200         if (args->count < NFS3_READDIR_MIN_COUNT(MAXNAMELEN)) {
3201                 /*
3202                  * bufsize is used to keep track of the size of the response.
3203                  * It is primed with:
3204                  *      1 for the status +
3205                  *      1 for the dir_attributes.attributes boolean +
3206                  *      2 for the cookie verifier
3207                  * all times BYTES_PER_XDR_UNIT to convert from XDR units
3208                  * to bytes.  If there are directory attributes to be
3209                  * returned, then:
3210                  *      NFS3_SIZEOF_FATTR3 for the dir_attributes.attr fattr3
3211                  * time BYTES_PER_XDR_UNIT is added to account for them.
3212                  */
3213                 bufsize = (1 + 1 + 2) * BYTES_PER_XDR_UNIT;
3214                 if (vap != NULL)
3215                         bufsize += NFS3_SIZEOF_FATTR3 * BYTES_PER_XDR_UNIT;
3216                 /*
3217                  * An entry is composed of:
3218                  *      1 for the true/false list indicator +
3219                  *      2 for the fileid +
3220                  *      1 for the length of the name +
3221                  *      2 for the cookie +
3222                  * all times BYTES_PER_XDR_UNIT to convert from
3223                  * XDR units to bytes, plus the length of the name
3224                  * rounded up to the nearest BYTES_PER_XDR_UNIT.
3225                  */
3226                 if (count != uio.uio_resid) {
3227                         namlen = strlen(((struct dirent64 *)data)->d_name);
3228                         bufsize += (1 + 2 + 1 + 2) * BYTES_PER_XDR_UNIT +
3229                             roundup(namlen, BYTES_PER_XDR_UNIT);
3230                 }
3231                 /*
3232                  * We need to check to see if the number of bytes left
3233                  * to go into the buffer will actually fit into the
3234                  * buffer.  This is calculated as the size of this
3235                  * entry plus:
3236                  *      1 for the true/false list indicator +
3237                  *      1 for the eof indicator
3238                  * times BYTES_PER_XDR_UNIT to convert from from
3239                  * XDR units to bytes.
3240                  */
3241                 bufsize += (1 + 1) * BYTES_PER_XDR_UNIT;
3242                 if (bufsize > args->count) {
3243                         kmem_free(data, count);
3244                         resp->status = NFS3ERR_TOOSMALL;
3245                         goto out1;
3246                 }
3247         }
3248 
3249         /*
3250          * Have a valid readir buffer for the native character
3251          * set. Need to check if a conversion is necessary and
3252          * potentially rewrite the whole buffer. Note that if the
3253          * conversion expands names enough, the structure may not
3254          * fit. In this case, we need to drop entries until if fits
3255          * and patch the counts in order that the next readdir will
3256          * get the correct entries.
3257          */
3258         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
3259         data = nfscmd_convdirent(ca, exi, data, count, &resp->status);
3260 
3261 
3262         VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
3263 
3264 #if 0 /* notyet */
3265         /*
3266          * Don't do this.  It causes local disk writes when just
3267          * reading the file and the overhead is deemed larger
3268          * than the benefit.
3269          */
3270         /*
3271          * Force modified metadata out to stable storage.
3272          */
3273         (void) VOP_FSYNC(vp, FNODSYNC, cr, NULL);
3274 #endif
3275 
3276         resp->status = NFS3_OK;
3277         vattr_to_post_op_attr(vap, &resp->resok.dir_attributes);
3278         resp->resok.cookieverf = 0;
3279         resp->resok.reply.entries = (entry3 *)data;
3280         resp->resok.reply.eof = iseof;
3281         resp->resok.size = count - uio.uio_resid;
3282         resp->resok.count = args->count;
3283         resp->resok.freecount = count;
3284 
3285         DTRACE_NFSV3_4(op__readdir__done, struct svc_req *, req,
3286             cred_t *, cr, vnode_t *, vp, READDIR3res *, resp);
3287 
3288         VN_RELE(vp);
3289 
3290         return;
3291 
3292 out:
3293         if (curthread->t_flag & T_WOULDBLOCK) {
3294                 curthread->t_flag &= ~T_WOULDBLOCK;
3295                 resp->status = NFS3ERR_JUKEBOX;
3296         } else
3297                 resp->status = puterrno3(error);
3298 out1:
3299         DTRACE_NFSV3_4(op__readdir__done, struct svc_req *, req,
3300             cred_t *, cr, vnode_t *, vp, READDIR3res *, resp);
3301 
3302         if (vp != NULL) {
3303                 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
3304                 VN_RELE(vp);
3305         }
3306         vattr_to_post_op_attr(vap, &resp->resfail.dir_attributes);
3307 }
3308 
3309 void *
3310 rfs3_readdir_getfh(READDIR3args *args)
3311 {
3312 
3313         return (&args->dir);
3314 }
3315 
3316 void
3317 rfs3_readdir_free(READDIR3res *resp)
3318 {
3319 
3320         if (resp->status == NFS3_OK)
3321                 kmem_free(resp->resok.reply.entries, resp->resok.freecount);
3322 }
3323 
3324 #ifdef nextdp
3325 #undef nextdp
3326 #endif
3327 #define nextdp(dp)      ((struct dirent64 *)((char *)(dp) + (dp)->d_reclen))
3328 
3329 /*
3330  * This macro computes the size of a response which contains
3331  * one directory entry including the attributes as well as file handle.
3332  * If the incoming request is larger than this, then we are guaranteed to be
3333  * able to return at least one more directory entry if one exists.
3334  *
3335  * NFS3_READDIRPLUS_ENTRY is made up of the following:
3336  *
3337  * boolean - 1 * BYTES_PER_XDR_UNIT
3338  * file id - 2 * BYTES_PER_XDR_UNIT
3339  * directory name length - 1 * BYTES_PER_XDR_UNIT
3340  * cookie - 2 * BYTES_PER_XDR_UNIT
3341  * attribute flag - 1 * BYTES_PER_XDR_UNIT
3342  * attributes - NFS3_SIZEOF_FATTR3 * BYTES_PER_XDR_UNIT
3343  * status byte for file handle - 1 *  BYTES_PER_XDR_UNIT
3344  * length of a file handle - 1 * BYTES_PER_XDR_UNIT
3345  * Maximum length of a file handle (NFS3_MAXFHSIZE)
3346  * name length of the entry to the nearest bytes
3347  */
3348 #define NFS3_READDIRPLUS_ENTRY(namelen) \
3349         ((1 + 2 + 1 + 2 + 1 + NFS3_SIZEOF_FATTR3 + 1 + 1) * \
3350                 BYTES_PER_XDR_UNIT + \
3351         NFS3_MAXFHSIZE + roundup(namelen, BYTES_PER_XDR_UNIT))
3352 
3353 static int rfs3_readdir_unit = MAXBSIZE;
3354 
3355 /* ARGSUSED */
3356 void
3357 rfs3_readdirplus(READDIRPLUS3args *args, READDIRPLUS3res *resp,
3358         struct exportinfo *exi, struct svc_req *req, cred_t *cr)
3359 {
3360         int error;
3361         vnode_t *vp;
3362         struct vattr *vap;
3363         struct vattr va;
3364         struct iovec iov;
3365         struct uio uio;
3366         char *data;
3367         int iseof;
3368         struct dirent64 *dp;
3369         vnode_t *nvp;
3370         struct vattr *nvap;
3371         struct vattr nva;
3372         entryplus3_info *infop = NULL;
3373         int size = 0;
3374         int nents = 0;
3375         int bufsize = 0;
3376         int entrysize = 0;
3377         int tofit = 0;
3378         int rd_unit = rfs3_readdir_unit;
3379         int prev_len;
3380         int space_left;
3381         int i;
3382         uint_t *namlen = NULL;
3383         char *ndata = NULL;
3384         struct sockaddr *ca;
3385         size_t ret;
3386 
3387         vap = NULL;
3388 
3389         vp = nfs3_fhtovp(&args->dir, exi);
3390 
3391         DTRACE_NFSV3_4(op__readdirplus__start, struct svc_req *, req,
3392             cred_t *, cr, vnode_t *, vp, READDIRPLUS3args *, args);
3393 
3394         if (vp == NULL) {
3395                 error = ESTALE;
3396                 goto out;
3397         }
3398 
3399         if (is_system_labeled()) {
3400                 bslabel_t *clabel = req->rq_label;
3401 
3402                 ASSERT(clabel != NULL);
3403                 DTRACE_PROBE2(tx__rfs3__log__info__opreaddirplus__clabel,
3404                     char *, "got client label from request(1)",
3405                     struct svc_req *, req);
3406 
3407                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
3408                         if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
3409                             exi)) {
3410                                 resp->status = NFS3ERR_ACCES;
3411                                 goto out1;
3412                         }
3413                 }
3414         }
3415 
3416         (void) VOP_RWLOCK(vp, V_WRITELOCK_FALSE, NULL);
3417 
3418         va.va_mask = AT_ALL;
3419         vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3420 
3421         if (vp->v_type != VDIR) {
3422                 error = ENOTDIR;
3423                 goto out;
3424         }
3425 
3426         error = VOP_ACCESS(vp, VREAD, 0, cr, NULL);
3427         if (error)
3428                 goto out;
3429 
3430         /*
3431          * Don't allow arbitrary counts for allocation
3432          */
3433         if (args->maxcount > rfs3_tsize(req))
3434                 args->maxcount = rfs3_tsize(req);
3435 
3436         /*
3437          * Make sure that there is room to read at least one entry
3438          * if any are available
3439          */
3440         args->dircount = MIN(args->dircount, args->maxcount);
3441 
3442         if (args->dircount < DIRENT64_RECLEN(MAXNAMELEN))
3443                 args->dircount = DIRENT64_RECLEN(MAXNAMELEN);
3444 
3445         /*
3446          * This allocation relies on a minimum directory entry
3447          * being roughly 24 bytes.  Therefore, the namlen array
3448          * will have enough space based on the maximum number of
3449          * entries to read.
3450          */
3451         namlen = kmem_alloc(args->dircount, KM_SLEEP);
3452 
3453         space_left = args->dircount;
3454         data = kmem_alloc(args->dircount, KM_SLEEP);
3455         dp = (struct dirent64 *)data;
3456         uio.uio_iov = &iov;
3457         uio.uio_iovcnt = 1;
3458         uio.uio_segflg = UIO_SYSSPACE;
3459         uio.uio_extflg = UIO_COPY_CACHED;
3460         uio.uio_loffset = (offset_t)args->cookie;
3461 
3462         /*
3463          * bufsize is used to keep track of the size of the response as we
3464          * get post op attributes and filehandles for each entry.  This is
3465          * an optimization as the server may have read more entries than will
3466          * fit in the buffer specified by maxcount.  We stop calculating
3467          * post op attributes and filehandles once we have exceeded maxcount.
3468          * This will minimize the effect of truncation.
3469          *
3470          * It is primed with:
3471          *      1 for the status +
3472          *      1 for the dir_attributes.attributes boolean +
3473          *      2 for the cookie verifier
3474          * all times BYTES_PER_XDR_UNIT to convert from XDR units
3475          * to bytes.  If there are directory attributes to be
3476          * returned, then:
3477          *      NFS3_SIZEOF_FATTR3 for the dir_attributes.attr fattr3
3478          * time BYTES_PER_XDR_UNIT is added to account for them.
3479          */
3480         bufsize = (1 + 1 + 2) * BYTES_PER_XDR_UNIT;
3481         if (vap != NULL)
3482                 bufsize += NFS3_SIZEOF_FATTR3 * BYTES_PER_XDR_UNIT;
3483 
3484 getmoredents:
3485         /*
3486          * Here we make a check so that our read unit is not larger than
3487          * the space left in the buffer.
3488          */
3489         rd_unit = MIN(rd_unit, space_left);
3490         iov.iov_base = (char *)dp;
3491         iov.iov_len = rd_unit;
3492         uio.uio_resid = rd_unit;
3493         prev_len = rd_unit;
3494 
3495         error = VOP_READDIR(vp, &uio, cr, &iseof, NULL, 0);
3496 
3497         if (error) {
3498                 kmem_free(data, args->dircount);
3499                 goto out;
3500         }
3501 
3502         if (uio.uio_resid == prev_len && !iseof) {
3503                 if (nents == 0) {
3504                         kmem_free(data, args->dircount);
3505                         resp->status = NFS3ERR_TOOSMALL;
3506                         goto out1;
3507                 }
3508 
3509                 /*
3510                  * We could not get any more entries, so get the attributes
3511                  * and filehandle for the entries already obtained.
3512                  */
3513                 goto good;
3514         }
3515 
3516         /*
3517          * We estimate the size of the response by assuming the
3518          * entry exists and attributes and filehandle are also valid
3519          */
3520         for (size = prev_len - uio.uio_resid;
3521             size > 0;
3522             size -= dp->d_reclen, dp = nextdp(dp)) {
3523 
3524                 if (dp->d_ino == 0) {
3525                         nents++;
3526                         continue;
3527                 }
3528 
3529                 namlen[nents] = strlen(dp->d_name);
3530                 entrysize = NFS3_READDIRPLUS_ENTRY(namlen[nents]);
3531 
3532                 /*
3533                  * We need to check to see if the number of bytes left
3534                  * to go into the buffer will actually fit into the
3535                  * buffer.  This is calculated as the size of this
3536                  * entry plus:
3537                  *      1 for the true/false list indicator +
3538                  *      1 for the eof indicator
3539                  * times BYTES_PER_XDR_UNIT to convert from XDR units
3540                  * to bytes.
3541                  *
3542                  * Also check the dircount limit against the first entry read
3543                  *
3544                  */
3545                 tofit = entrysize + (1 + 1) * BYTES_PER_XDR_UNIT;
3546                 if (bufsize + tofit > args->maxcount) {
3547                         /*
3548                          * We make a check here to see if this was the
3549                          * first entry being measured.  If so, then maxcount
3550                          * was too small to begin with and so we need to
3551                          * return with NFS3ERR_TOOSMALL.
3552                          */
3553                         if (nents == 0) {
3554                                 kmem_free(data, args->dircount);
3555                                 resp->status = NFS3ERR_TOOSMALL;
3556                                 goto out1;
3557                         }
3558                         iseof = FALSE;
3559                         goto good;
3560                 }
3561                 bufsize += entrysize;
3562                 nents++;
3563         }
3564 
3565         /*
3566          * If there is enough room to fit at least 1 more entry including
3567          * post op attributes and filehandle in the buffer AND that we haven't
3568          * exceeded dircount then go back and get some more.
3569          */
3570         if (!iseof &&
3571             (args->maxcount - bufsize) >= NFS3_READDIRPLUS_ENTRY(MAXNAMELEN)) {
3572                 space_left -= (prev_len - uio.uio_resid);
3573                 if (space_left >= DIRENT64_RECLEN(MAXNAMELEN))
3574                         goto getmoredents;
3575 
3576                 /* else, fall through */
3577         }
3578 good:
3579         va.va_mask = AT_ALL;
3580         vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3581 
3582         VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
3583 
3584         infop = kmem_alloc(nents * sizeof (struct entryplus3_info), KM_SLEEP);
3585         resp->resok.infop = infop;
3586 
3587         dp = (struct dirent64 *)data;
3588         for (i = 0; i < nents; i++) {
3589 
3590                 if (dp->d_ino == 0) {
3591                         infop[i].attr.attributes = FALSE;
3592                         infop[i].fh.handle_follows = FALSE;
3593                         dp = nextdp(dp);
3594                         continue;
3595                 }
3596 
3597                 infop[i].namelen = namlen[i];
3598 
3599                 error = VOP_LOOKUP(vp, dp->d_name, &nvp, NULL, 0, NULL, cr,
3600                     NULL, NULL, NULL);
3601                 if (error) {
3602                         infop[i].attr.attributes = FALSE;
3603                         infop[i].fh.handle_follows = FALSE;
3604                         dp = nextdp(dp);
3605                         continue;
3606                 }
3607 
3608                 nva.va_mask = AT_ALL;
3609                 nvap = rfs4_delegated_getattr(nvp, &nva, 0, cr) ? NULL : &nva;
3610 
3611                 /* Lie about the object type for a referral */
3612                 if (vn_is_nfs_reparse(nvp, cr))
3613                         nvap->va_type = VLNK;
3614 
3615                 vattr_to_post_op_attr(nvap, &infop[i].attr);
3616 
3617                 error = makefh3(&infop[i].fh.handle, nvp, exi);
3618                 if (!error)
3619                         infop[i].fh.handle_follows = TRUE;
3620                 else
3621                         infop[i].fh.handle_follows = FALSE;
3622 
3623                 VN_RELE(nvp);
3624                 dp = nextdp(dp);
3625         }
3626 
3627         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
3628         ret = nfscmd_convdirplus(ca, exi, data, nents, args->dircount, &ndata);
3629         if (ndata == NULL)
3630                 ndata = data;
3631 
3632         if (ret > 0) {
3633                 /*
3634                  * We had to drop one or more entries in order to fit
3635                  * during the character conversion.  We need to patch
3636                  * up the size and eof info.
3637                  */
3638                 if (iseof)
3639                         iseof = FALSE;
3640 
3641                 ret = nfscmd_dropped_entrysize((struct dirent64 *)data,
3642                     nents, ret);
3643         }
3644 
3645 
3646 #if 0 /* notyet */
3647         /*
3648          * Don't do this.  It causes local disk writes when just
3649          * reading the file and the overhead is deemed larger
3650          * than the benefit.
3651          */
3652         /*
3653          * Force modified metadata out to stable storage.
3654          */
3655         (void) VOP_FSYNC(vp, FNODSYNC, cr, NULL);
3656 #endif
3657 
3658         kmem_free(namlen, args->dircount);
3659 
3660         resp->status = NFS3_OK;
3661         vattr_to_post_op_attr(vap, &resp->resok.dir_attributes);
3662         resp->resok.cookieverf = 0;
3663         resp->resok.reply.entries = (entryplus3 *)ndata;
3664         resp->resok.reply.eof = iseof;
3665         resp->resok.size = nents;
3666         resp->resok.count = args->dircount - ret;
3667         resp->resok.maxcount = args->maxcount;
3668 
3669         DTRACE_NFSV3_4(op__readdirplus__done, struct svc_req *, req,
3670             cred_t *, cr, vnode_t *, vp, READDIRPLUS3res *, resp);
3671         if (ndata != data)
3672                 kmem_free(data, args->dircount);
3673 
3674 
3675         VN_RELE(vp);
3676 
3677         return;
3678 
3679 out:
3680         if (curthread->t_flag & T_WOULDBLOCK) {
3681                 curthread->t_flag &= ~T_WOULDBLOCK;
3682                 resp->status = NFS3ERR_JUKEBOX;
3683         } else {
3684                 resp->status = puterrno3(error);
3685         }
3686 out1:
3687         DTRACE_NFSV3_4(op__readdirplus__done, struct svc_req *, req,
3688             cred_t *, cr, vnode_t *, vp, READDIRPLUS3res *, resp);
3689 
3690         if (vp != NULL) {
3691                 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
3692                 VN_RELE(vp);
3693         }
3694 
3695         if (namlen != NULL)
3696                 kmem_free(namlen, args->dircount);
3697 
3698         vattr_to_post_op_attr(vap, &resp->resfail.dir_attributes);
3699 }
3700 
3701 void *
3702 rfs3_readdirplus_getfh(READDIRPLUS3args *args)
3703 {
3704 
3705         return (&args->dir);
3706 }
3707 
3708 void
3709 rfs3_readdirplus_free(READDIRPLUS3res *resp)
3710 {
3711 
3712         if (resp->status == NFS3_OK) {
3713                 kmem_free(resp->resok.reply.entries, resp->resok.count);
3714                 kmem_free(resp->resok.infop,
3715                     resp->resok.size * sizeof (struct entryplus3_info));
3716         }
3717 }
3718 
3719 /* ARGSUSED */
3720 void
3721 rfs3_fsstat(FSSTAT3args *args, FSSTAT3res *resp, struct exportinfo *exi,
3722         struct svc_req *req, cred_t *cr)
3723 {
3724         int error;
3725         vnode_t *vp;
3726         struct vattr *vap;
3727         struct vattr va;
3728         struct statvfs64 sb;
3729 
3730         vap = NULL;
3731 
3732         vp = nfs3_fhtovp(&args->fsroot, exi);
3733 
3734         DTRACE_NFSV3_4(op__fsstat__start, struct svc_req *, req,
3735             cred_t *, cr, vnode_t *, vp, FSSTAT3args *, args);
3736 
3737         if (vp == NULL) {
3738                 error = ESTALE;
3739                 goto out;
3740         }
3741 
3742         if (is_system_labeled()) {
3743                 bslabel_t *clabel = req->rq_label;
3744 
3745                 ASSERT(clabel != NULL);
3746                 DTRACE_PROBE2(tx__rfs3__log__info__opfsstat__clabel, char *,
3747                     "got client label from request(1)", struct svc_req *, req);
3748 
3749                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
3750                         if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
3751                             exi)) {
3752                                 resp->status = NFS3ERR_ACCES;
3753                                 goto out1;
3754                         }
3755                 }
3756         }
3757 
3758         error = VFS_STATVFS(vp->v_vfsp, &sb);
3759 
3760         va.va_mask = AT_ALL;
3761         vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3762 
3763         if (error)
3764                 goto out;
3765 
3766         resp->status = NFS3_OK;
3767         vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
3768         if (sb.f_blocks != (fsblkcnt64_t)-1)
3769                 resp->resok.tbytes = (size3)sb.f_frsize * (size3)sb.f_blocks;
3770         else
3771                 resp->resok.tbytes = (size3)sb.f_blocks;
3772         if (sb.f_bfree != (fsblkcnt64_t)-1)
3773                 resp->resok.fbytes = (size3)sb.f_frsize * (size3)sb.f_bfree;
3774         else
3775                 resp->resok.fbytes = (size3)sb.f_bfree;
3776         if (sb.f_bavail != (fsblkcnt64_t)-1)
3777                 resp->resok.abytes = (size3)sb.f_frsize * (size3)sb.f_bavail;
3778         else
3779                 resp->resok.abytes = (size3)sb.f_bavail;
3780         resp->resok.tfiles = (size3)sb.f_files;
3781         resp->resok.ffiles = (size3)sb.f_ffree;
3782         resp->resok.afiles = (size3)sb.f_favail;
3783         resp->resok.invarsec = 0;
3784 
3785         DTRACE_NFSV3_4(op__fsstat__done, struct svc_req *, req,
3786             cred_t *, cr, vnode_t *, vp, FSSTAT3res *, resp);
3787         VN_RELE(vp);
3788 
3789         return;
3790 
3791 out:
3792         if (curthread->t_flag & T_WOULDBLOCK) {
3793                 curthread->t_flag &= ~T_WOULDBLOCK;
3794                 resp->status = NFS3ERR_JUKEBOX;
3795         } else
3796                 resp->status = puterrno3(error);
3797 out1:
3798         DTRACE_NFSV3_4(op__fsstat__done, struct svc_req *, req,
3799             cred_t *, cr, vnode_t *, vp, FSSTAT3res *, resp);
3800 
3801         if (vp != NULL)
3802                 VN_RELE(vp);
3803         vattr_to_post_op_attr(vap, &resp->resfail.obj_attributes);
3804 }
3805 
3806 void *
3807 rfs3_fsstat_getfh(FSSTAT3args *args)
3808 {
3809 
3810         return (&args->fsroot);
3811 }
3812 
3813 void
3814 rfs3_fsinfo(FSINFO3args *args, FSINFO3res *resp, struct exportinfo *exi,
3815         struct svc_req *req, cred_t *cr)
3816 {
3817         vnode_t *vp;
3818         struct vattr *vap;
3819         struct vattr va;
3820         uint32_t xfer_size;
3821         ulong_t l = 0;
3822         int error;
3823 
3824         vp = nfs3_fhtovp(&args->fsroot, exi);
3825 
3826         DTRACE_NFSV3_4(op__fsinfo__start, struct svc_req *, req,
3827             cred_t *, cr, vnode_t *, vp, FSINFO3args *, args);
3828 
3829         if (vp == NULL) {
3830                 if (curthread->t_flag & T_WOULDBLOCK) {
3831                         curthread->t_flag &= ~T_WOULDBLOCK;
3832                         resp->status = NFS3ERR_JUKEBOX;
3833                 } else
3834                         resp->status = NFS3ERR_STALE;
3835                 vattr_to_post_op_attr(NULL, &resp->resfail.obj_attributes);
3836                 goto out;
3837         }
3838 
3839         if (is_system_labeled()) {
3840                 bslabel_t *clabel = req->rq_label;
3841 
3842                 ASSERT(clabel != NULL);
3843                 DTRACE_PROBE2(tx__rfs3__log__info__opfsinfo__clabel, char *,
3844                     "got client label from request(1)", struct svc_req *, req);
3845 
3846                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
3847                         if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
3848                             exi)) {
3849                                 resp->status = NFS3ERR_STALE;
3850                                 vattr_to_post_op_attr(NULL,
3851                                     &resp->resfail.obj_attributes);
3852                                 goto out;
3853                         }
3854                 }
3855         }
3856 
3857         va.va_mask = AT_ALL;
3858         vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3859 
3860         resp->status = NFS3_OK;
3861         vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
3862         xfer_size = rfs3_tsize(req);
3863         resp->resok.rtmax = xfer_size;
3864         resp->resok.rtpref = xfer_size;
3865         resp->resok.rtmult = DEV_BSIZE;
3866         resp->resok.wtmax = xfer_size;
3867         resp->resok.wtpref = xfer_size;
3868         resp->resok.wtmult = DEV_BSIZE;
3869         resp->resok.dtpref = MAXBSIZE;
3870 
3871         /*
3872          * Large file spec: want maxfilesize based on limit of
3873          * underlying filesystem.  We can guess 2^31-1 if need be.
3874          */
3875         error = VOP_PATHCONF(vp, _PC_FILESIZEBITS, &l, cr, NULL);
3876         if (error) {
3877                 resp->status = puterrno3(error);
3878                 goto out;
3879         }
3880 
3881         /*
3882          * If the underlying file system does not support _PC_FILESIZEBITS,
3883          * return a reasonable default. Note that error code on VOP_PATHCONF
3884          * will be 0, even if the underlying file system does not support
3885          * _PC_FILESIZEBITS.
3886          */
3887         if (l == (ulong_t)-1) {
3888                 resp->resok.maxfilesize = MAXOFF32_T;
3889         } else {
3890                 if (l >= (sizeof (uint64_t) * 8))
3891                         resp->resok.maxfilesize = INT64_MAX;
3892                 else
3893                         resp->resok.maxfilesize = (1LL << (l-1)) - 1;
3894         }
3895 
3896         resp->resok.time_delta.seconds = 0;
3897         resp->resok.time_delta.nseconds = 1000;
3898         resp->resok.properties = FSF3_LINK | FSF3_SYMLINK |
3899             FSF3_HOMOGENEOUS | FSF3_CANSETTIME;
3900 
3901         DTRACE_NFSV3_4(op__fsinfo__done, struct svc_req *, req,
3902             cred_t *, cr, vnode_t *, vp, FSINFO3res *, resp);
3903 
3904         VN_RELE(vp);
3905 
3906         return;
3907 
3908 out:
3909         DTRACE_NFSV3_4(op__fsinfo__done, struct svc_req *, req,
3910             cred_t *, cr, vnode_t *, NULL, FSINFO3res *, resp);
3911         if (vp != NULL)
3912                 VN_RELE(vp);
3913 }
3914 
3915 void *
3916 rfs3_fsinfo_getfh(FSINFO3args *args)
3917 {
3918 
3919         return (&args->fsroot);
3920 }
3921 
3922 /* ARGSUSED */
3923 void
3924 rfs3_pathconf(PATHCONF3args *args, PATHCONF3res *resp, struct exportinfo *exi,
3925         struct svc_req *req, cred_t *cr)
3926 {
3927         int error;
3928         vnode_t *vp;
3929         struct vattr *vap;
3930         struct vattr va;
3931         ulong_t val;
3932 
3933         vap = NULL;
3934 
3935         vp = nfs3_fhtovp(&args->object, exi);
3936 
3937         DTRACE_NFSV3_4(op__pathconf__start, struct svc_req *, req,
3938             cred_t *, cr, vnode_t *, vp, PATHCONF3args *, args);
3939 
3940         if (vp == NULL) {
3941                 error = ESTALE;
3942                 goto out;
3943         }
3944 
3945         if (is_system_labeled()) {
3946                 bslabel_t *clabel = req->rq_label;
3947 
3948                 ASSERT(clabel != NULL);
3949                 DTRACE_PROBE2(tx__rfs3__log__info__oppathconf__clabel, char *,
3950                     "got client label from request(1)", struct svc_req *, req);
3951 
3952                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
3953                         if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
3954                             exi)) {
3955                                 resp->status = NFS3ERR_ACCES;
3956                                 goto out1;
3957                         }
3958                 }
3959         }
3960 
3961         va.va_mask = AT_ALL;
3962         vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3963 
3964         error = VOP_PATHCONF(vp, _PC_LINK_MAX, &val, cr, NULL);
3965         if (error)
3966                 goto out;
3967         resp->resok.info.link_max = (uint32)val;
3968 
3969         error = VOP_PATHCONF(vp, _PC_NAME_MAX, &val, cr, NULL);
3970         if (error)
3971                 goto out;
3972         resp->resok.info.name_max = (uint32)val;
3973 
3974         error = VOP_PATHCONF(vp, _PC_NO_TRUNC, &val, cr, NULL);
3975         if (error)
3976                 goto out;
3977         if (val == 1)
3978                 resp->resok.info.no_trunc = TRUE;
3979         else
3980                 resp->resok.info.no_trunc = FALSE;
3981 
3982         error = VOP_PATHCONF(vp, _PC_CHOWN_RESTRICTED, &val, cr, NULL);
3983         if (error)
3984                 goto out;
3985         if (val == 1)
3986                 resp->resok.info.chown_restricted = TRUE;
3987         else
3988                 resp->resok.info.chown_restricted = FALSE;
3989 
3990         resp->status = NFS3_OK;
3991         vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
3992         resp->resok.info.case_insensitive = FALSE;
3993         resp->resok.info.case_preserving = TRUE;
3994         DTRACE_NFSV3_4(op__pathconf__done, struct svc_req *, req,
3995             cred_t *, cr, vnode_t *, vp, PATHCONF3res *, resp);
3996         VN_RELE(vp);
3997         return;
3998 
3999 out:
4000         if (curthread->t_flag & T_WOULDBLOCK) {
4001                 curthread->t_flag &= ~T_WOULDBLOCK;
4002                 resp->status = NFS3ERR_JUKEBOX;
4003         } else
4004                 resp->status = puterrno3(error);
4005 out1:
4006         DTRACE_NFSV3_4(op__pathconf__done, struct svc_req *, req,
4007             cred_t *, cr, vnode_t *, vp, PATHCONF3res *, resp);
4008         if (vp != NULL)
4009                 VN_RELE(vp);
4010         vattr_to_post_op_attr(vap, &resp->resfail.obj_attributes);
4011 }
4012 
4013 void *
4014 rfs3_pathconf_getfh(PATHCONF3args *args)
4015 {
4016 
4017         return (&args->object);
4018 }
4019 
4020 void
4021 rfs3_commit(COMMIT3args *args, COMMIT3res *resp, struct exportinfo *exi,
4022         struct svc_req *req, cred_t *cr)
4023 {
4024         int error;
4025         vnode_t *vp;
4026         struct vattr *bvap;
4027         struct vattr bva;
4028         struct vattr *avap;
4029         struct vattr ava;
4030 
4031         bvap = NULL;
4032         avap = NULL;
4033 
4034         vp = nfs3_fhtovp(&args->file, exi);
4035 
4036         DTRACE_NFSV3_4(op__commit__start, struct svc_req *, req,
4037             cred_t *, cr, vnode_t *, vp, COMMIT3args *, args);
4038 
4039         if (vp == NULL) {
4040                 error = ESTALE;
4041                 goto out;
4042         }
4043 
4044         bva.va_mask = AT_ALL;
4045         error = VOP_GETATTR(vp, &bva, 0, cr, NULL);
4046 
4047         /*
4048          * If we can't get the attributes, then we can't do the
4049          * right access checking.  So, we'll fail the request.
4050          */
4051         if (error)
4052                 goto out;
4053 
4054         bvap = &bva;
4055 
4056         if (rdonly(exi, req)) {
4057                 resp->status = NFS3ERR_ROFS;
4058                 goto out1;
4059         }
4060 
4061         if (vp->v_type != VREG) {
4062                 resp->status = NFS3ERR_INVAL;
4063                 goto out1;
4064         }
4065 
4066         if (is_system_labeled()) {
4067                 bslabel_t *clabel = req->rq_label;
4068 
4069                 ASSERT(clabel != NULL);
4070                 DTRACE_PROBE2(tx__rfs3__log__info__opcommit__clabel, char *,
4071                     "got client label from request(1)", struct svc_req *, req);
4072 
4073                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
4074                         if (!do_rfs_label_check(clabel, vp, EQUALITY_CHECK,
4075                             exi)) {
4076                                 resp->status = NFS3ERR_ACCES;
4077                                 goto out1;
4078                         }
4079                 }
4080         }
4081 
4082         if (crgetuid(cr) != bva.va_uid &&
4083             (error = VOP_ACCESS(vp, VWRITE, 0, cr, NULL)))
4084                 goto out;
4085 
4086         error = VOP_FSYNC(vp, FSYNC, cr, NULL);
4087 
4088         ava.va_mask = AT_ALL;
4089         avap = VOP_GETATTR(vp, &ava, 0, cr, NULL) ? NULL : &ava;
4090 
4091         if (error)
4092                 goto out;
4093 
4094         resp->status = NFS3_OK;
4095         vattr_to_wcc_data(bvap, avap, &resp->resok.file_wcc);
4096         resp->resok.verf = write3verf;
4097 
4098         DTRACE_NFSV3_4(op__commit__done, struct svc_req *, req,
4099             cred_t *, cr, vnode_t *, vp, COMMIT3res *, resp);
4100 
4101         VN_RELE(vp);
4102 
4103         return;
4104 
4105 out:
4106         if (curthread->t_flag & T_WOULDBLOCK) {
4107                 curthread->t_flag &= ~T_WOULDBLOCK;
4108                 resp->status = NFS3ERR_JUKEBOX;
4109         } else
4110                 resp->status = puterrno3(error);
4111 out1:
4112         DTRACE_NFSV3_4(op__commit__done, struct svc_req *, req,
4113             cred_t *, cr, vnode_t *, vp, COMMIT3res *, resp);
4114 
4115         if (vp != NULL)
4116                 VN_RELE(vp);
4117         vattr_to_wcc_data(bvap, avap, &resp->resfail.file_wcc);
4118 }
4119 
4120 void *
4121 rfs3_commit_getfh(COMMIT3args *args)
4122 {
4123 
4124         return (&args->file);
4125 }
4126 
4127 static int
4128 sattr3_to_vattr(sattr3 *sap, struct vattr *vap)
4129 {
4130 
4131         vap->va_mask = 0;
4132 
4133         if (sap->mode.set_it) {
4134                 vap->va_mode = (mode_t)sap->mode.mode;
4135                 vap->va_mask |= AT_MODE;
4136         }
4137         if (sap->uid.set_it) {
4138                 vap->va_uid = (uid_t)sap->uid.uid;
4139                 vap->va_mask |= AT_UID;
4140         }
4141         if (sap->gid.set_it) {
4142                 vap->va_gid = (gid_t)sap->gid.gid;
4143                 vap->va_mask |= AT_GID;
4144         }
4145         if (sap->size.set_it) {
4146                 if (sap->size.size > (size3)((u_longlong_t)-1))
4147                         return (EINVAL);
4148                 vap->va_size = sap->size.size;
4149                 vap->va_mask |= AT_SIZE;
4150         }
4151         if (sap->atime.set_it == SET_TO_CLIENT_TIME) {
4152 #ifndef _LP64
4153                 /* check time validity */
4154                 if (!NFS3_TIME_OK(sap->atime.atime.seconds))
4155                         return (EOVERFLOW);
4156 #endif
4157                 /*
4158                  * nfs protocol defines times as unsigned so don't extend sign,
4159                  * unless sysadmin set nfs_allow_preepoch_time.
4160                  */
4161                 NFS_TIME_T_CONVERT(vap->va_atime.tv_sec,
4162                     sap->atime.atime.seconds);
4163                 vap->va_atime.tv_nsec = (uint32_t)sap->atime.atime.nseconds;
4164                 vap->va_mask |= AT_ATIME;
4165         } else if (sap->atime.set_it == SET_TO_SERVER_TIME) {
4166                 gethrestime(&vap->va_atime);
4167                 vap->va_mask |= AT_ATIME;
4168         }
4169         if (sap->mtime.set_it == SET_TO_CLIENT_TIME) {
4170 #ifndef _LP64
4171                 /* check time validity */
4172                 if (!NFS3_TIME_OK(sap->mtime.mtime.seconds))
4173                         return (EOVERFLOW);
4174 #endif
4175                 /*
4176                  * nfs protocol defines times as unsigned so don't extend sign,
4177                  * unless sysadmin set nfs_allow_preepoch_time.
4178                  */
4179                 NFS_TIME_T_CONVERT(vap->va_mtime.tv_sec,
4180                     sap->mtime.mtime.seconds);
4181                 vap->va_mtime.tv_nsec = (uint32_t)sap->mtime.mtime.nseconds;
4182                 vap->va_mask |= AT_MTIME;
4183         } else if (sap->mtime.set_it == SET_TO_SERVER_TIME) {
4184                 gethrestime(&vap->va_mtime);
4185                 vap->va_mask |= AT_MTIME;
4186         }
4187 
4188         return (0);
4189 }
4190 
4191 static ftype3 vt_to_nf3[] = {
4192         0, NF3REG, NF3DIR, NF3BLK, NF3CHR, NF3LNK, NF3FIFO, 0, 0, NF3SOCK, 0
4193 };
4194 
4195 static int
4196 vattr_to_fattr3(struct vattr *vap, fattr3 *fap)
4197 {
4198 
4199         ASSERT(vap->va_type >= VNON && vap->va_type <= VBAD);
4200         /* Return error if time or size overflow */
4201         if (! (NFS_VAP_TIME_OK(vap) && NFS3_SIZE_OK(vap->va_size))) {
4202                 return (EOVERFLOW);
4203         }
4204         fap->type = vt_to_nf3[vap->va_type];
4205         fap->mode = (mode3)(vap->va_mode & MODEMASK);
4206         fap->nlink = (uint32)vap->va_nlink;
4207         if (vap->va_uid == UID_NOBODY)
4208                 fap->uid = (uid3)NFS_UID_NOBODY;
4209         else
4210                 fap->uid = (uid3)vap->va_uid;
4211         if (vap->va_gid == GID_NOBODY)
4212                 fap->gid = (gid3)NFS_GID_NOBODY;
4213         else
4214                 fap->gid = (gid3)vap->va_gid;
4215         fap->size = (size3)vap->va_size;
4216         fap->used = (size3)DEV_BSIZE * (size3)vap->va_nblocks;
4217         fap->rdev.specdata1 = (uint32)getmajor(vap->va_rdev);
4218         fap->rdev.specdata2 = (uint32)getminor(vap->va_rdev);
4219         fap->fsid = (uint64)vap->va_fsid;
4220         fap->fileid = (fileid3)vap->va_nodeid;
4221         fap->atime.seconds = vap->va_atime.tv_sec;
4222         fap->atime.nseconds = vap->va_atime.tv_nsec;
4223         fap->mtime.seconds = vap->va_mtime.tv_sec;
4224         fap->mtime.nseconds = vap->va_mtime.tv_nsec;
4225         fap->ctime.seconds = vap->va_ctime.tv_sec;
4226         fap->ctime.nseconds = vap->va_ctime.tv_nsec;
4227         return (0);
4228 }
4229 
4230 static int
4231 vattr_to_wcc_attr(struct vattr *vap, wcc_attr *wccap)
4232 {
4233 
4234         /* Return error if time or size overflow */
4235         if (!(NFS_TIME_T_OK(vap->va_mtime.tv_sec) &&
4236             NFS_TIME_T_OK(vap->va_ctime.tv_sec) &&
4237             NFS3_SIZE_OK(vap->va_size))) {
4238                 return (EOVERFLOW);
4239         }
4240         wccap->size = (size3)vap->va_size;
4241         wccap->mtime.seconds = vap->va_mtime.tv_sec;
4242         wccap->mtime.nseconds = vap->va_mtime.tv_nsec;
4243         wccap->ctime.seconds = vap->va_ctime.tv_sec;
4244         wccap->ctime.nseconds = vap->va_ctime.tv_nsec;
4245         return (0);
4246 }
4247 
4248 static void
4249 vattr_to_pre_op_attr(struct vattr *vap, pre_op_attr *poap)
4250 {
4251 
4252         /* don't return attrs if time overflow */
4253         if ((vap != NULL) && !vattr_to_wcc_attr(vap, &poap->attr)) {
4254                 poap->attributes = TRUE;
4255         } else
4256                 poap->attributes = FALSE;
4257 }
4258 
4259 void
4260 vattr_to_post_op_attr(struct vattr *vap, post_op_attr *poap)
4261 {
4262 
4263         /* don't return attrs if time overflow */
4264         if ((vap != NULL) && !vattr_to_fattr3(vap, &poap->attr)) {
4265                 poap->attributes = TRUE;
4266         } else
4267                 poap->attributes = FALSE;
4268 }
4269 
4270 static void
4271 vattr_to_wcc_data(struct vattr *bvap, struct vattr *avap, wcc_data *wccp)
4272 {
4273 
4274         vattr_to_pre_op_attr(bvap, &wccp->before);
4275         vattr_to_post_op_attr(avap, &wccp->after);
4276 }
4277 
4278 void
4279 rfs3_srvrinit(void)
4280 {
4281         struct rfs3_verf_overlay {
4282                 uint_t id; /* a "unique" identifier */
4283                 int ts; /* a unique timestamp */
4284         } *verfp;
4285         timestruc_t now;
4286 
4287         /*
4288          * The following algorithm attempts to find a unique verifier
4289          * to be used as the write verifier returned from the server
4290          * to the client.  It is important that this verifier change
4291          * whenever the server reboots.  Of secondary importance, it
4292          * is important for the verifier to be unique between two
4293          * different servers.
4294          *
4295          * Thus, an attempt is made to use the system hostid and the
4296          * current time in seconds when the nfssrv kernel module is
4297          * loaded.  It is assumed that an NFS server will not be able
4298          * to boot and then to reboot in less than a second.  If the
4299          * hostid has not been set, then the current high resolution
4300          * time is used.  This will ensure different verifiers each
4301          * time the server reboots and minimize the chances that two
4302          * different servers will have the same verifier.
4303          */
4304 
4305 #ifndef lint
4306         /*
4307          * We ASSERT that this constant logic expression is
4308          * always true because in the past, it wasn't.
4309          */
4310         ASSERT(sizeof (*verfp) <= sizeof (write3verf));
4311 #endif
4312 
4313         gethrestime(&now);
4314         verfp = (struct rfs3_verf_overlay *)&write3verf;
4315         verfp->ts = (int)now.tv_sec;
4316         verfp->id = zone_get_hostid(NULL);
4317 
4318         if (verfp->id == 0)
4319                 verfp->id = (uint_t)now.tv_nsec;
4320 
4321         nfs3_srv_caller_id = fs_new_caller_id();
4322 
4323 }
4324 
4325 static int
4326 rdma_setup_read_data3(READ3args *args, READ3resok *rok)
4327 {
4328         struct clist    *wcl;
4329         int             wlist_len;
4330         count3          count = rok->count;
4331 
4332         wcl = args->wlist;
4333         if (rdma_setup_read_chunks(wcl, count, &wlist_len) == FALSE) {
4334                 return (FALSE);
4335         }
4336 
4337         wcl = args->wlist;
4338         rok->wlist_len = wlist_len;
4339         rok->wlist = wcl;
4340         return (TRUE);
4341 }
4342 
4343 void
4344 rfs3_srvrfini(void)
4345 {
4346         /* Nothing to do */
4347 }