1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright (c) 1994, 2010, Oracle and/or its affiliates. All rights reserved.
  23  */
  24 
  25 /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */
  26 /* All Rights Reserved */
  27 
  28 #include <sys/param.h>
  29 #include <sys/types.h>
  30 #include <sys/systm.h>
  31 #include <sys/cred.h>
  32 #include <sys/buf.h>
  33 #include <sys/vfs.h>
  34 #include <sys/vnode.h>
  35 #include <sys/uio.h>
  36 #include <sys/errno.h>
  37 #include <sys/sysmacros.h>
  38 #include <sys/statvfs.h>
  39 #include <sys/kmem.h>
  40 #include <sys/dirent.h>
  41 #include <sys/cmn_err.h>
  42 #include <sys/debug.h>
  43 #include <sys/systeminfo.h>
  44 #include <sys/flock.h>
  45 #include <sys/nbmlock.h>
  46 #include <sys/policy.h>
  47 #include <sys/sdt.h>
  48 
  49 #include <rpc/types.h>
  50 #include <rpc/auth.h>
  51 #include <rpc/svc.h>
  52 #include <rpc/rpc_rdma.h>
  53 
  54 #include <nfs/nfs.h>
  55 #include <nfs/export.h>
  56 #include <nfs/nfs_cmd.h>
  57 
  58 #include <sys/strsubr.h>
  59 
  60 #include <sys/tsol/label.h>
  61 #include <sys/tsol/tndb.h>
  62 
  63 #include <sys/zone.h>
  64 
  65 #include <inet/ip.h>
  66 #include <inet/ip6.h>
  67 
  68 /*
  69  * These are the interface routines for the server side of the
  70  * Network File System.  See the NFS version 3 protocol specification
  71  * for a description of this interface.
  72  */
  73 
  74 static writeverf3 write3verf;
  75 
  76 static int      sattr3_to_vattr(sattr3 *, struct vattr *);
  77 static int      vattr_to_fattr3(struct vattr *, fattr3 *);
  78 static int      vattr_to_wcc_attr(struct vattr *, wcc_attr *);
  79 static void     vattr_to_pre_op_attr(struct vattr *, pre_op_attr *);
  80 static void     vattr_to_wcc_data(struct vattr *, struct vattr *, wcc_data *);
  81 static int      rdma_setup_read_data3(READ3args *, READ3resok *);
  82 
  83 extern int nfs_loaned_buffers;
  84 
  85 u_longlong_t nfs3_srv_caller_id;
  86 
  87 /* ARGSUSED */
  88 void
  89 rfs3_getattr(GETATTR3args *args, GETATTR3res *resp, struct exportinfo *exi,
  90         struct svc_req *req, cred_t *cr)
  91 {
  92         int error;
  93         vnode_t *vp;
  94         struct vattr va;
  95 
  96         vp = nfs3_fhtovp(&args->object, exi);
  97 
  98         DTRACE_NFSV3_4(op__getattr__start, struct svc_req *, req,
  99             cred_t *, cr, vnode_t *, vp, GETATTR3args *, args);
 100 
 101         if (vp == NULL) {
 102                 error = ESTALE;
 103                 goto out;
 104         }
 105 
 106         va.va_mask = AT_ALL;
 107         error = rfs4_delegated_getattr(vp, &va, 0, cr);
 108 
 109         if (!error) {
 110                 /* Lie about the object type for a referral */
 111                 if (vn_is_nfs_reparse(vp, cr))
 112                         va.va_type = VLNK;
 113 
 114                 /* overflow error if time or size is out of range */
 115                 error = vattr_to_fattr3(&va, &resp->resok.obj_attributes);
 116                 if (error)
 117                         goto out;
 118                 resp->status = NFS3_OK;
 119 
 120                 DTRACE_NFSV3_4(op__getattr__done, struct svc_req *, req,
 121                     cred_t *, cr, vnode_t *, vp, GETATTR3res *, resp);
 122 
 123                 VN_RELE(vp);
 124 
 125                 return;
 126         }
 127 
 128 out:
 129         if (curthread->t_flag & T_WOULDBLOCK) {
 130                 curthread->t_flag &= ~T_WOULDBLOCK;
 131                 resp->status = NFS3ERR_JUKEBOX;
 132         } else
 133                 resp->status = puterrno3(error);
 134 
 135         DTRACE_NFSV3_4(op__getattr__done, struct svc_req *, req,
 136             cred_t *, cr, vnode_t *, vp, GETATTR3res *, resp);
 137 
 138         if (vp != NULL)
 139                 VN_RELE(vp);
 140 }
 141 
 142 void *
 143 rfs3_getattr_getfh(GETATTR3args *args)
 144 {
 145 
 146         return (&args->object);
 147 }
 148 
 149 void
 150 rfs3_setattr(SETATTR3args *args, SETATTR3res *resp, struct exportinfo *exi,
 151         struct svc_req *req, cred_t *cr)
 152 {
 153         int error;
 154         vnode_t *vp;
 155         struct vattr *bvap;
 156         struct vattr bva;
 157         struct vattr *avap;
 158         struct vattr ava;
 159         int flag;
 160         int in_crit = 0;
 161         struct flock64 bf;
 162         caller_context_t ct;
 163 
 164         bvap = NULL;
 165         avap = NULL;
 166 
 167         vp = nfs3_fhtovp(&args->object, exi);
 168 
 169         DTRACE_NFSV3_4(op__setattr__start, struct svc_req *, req,
 170             cred_t *, cr, vnode_t *, vp, SETATTR3args *, args);
 171 
 172         if (vp == NULL) {
 173                 error = ESTALE;
 174                 goto out;
 175         }
 176 
 177         error = sattr3_to_vattr(&args->new_attributes, &ava);
 178         if (error)
 179                 goto out;
 180 
 181         if (is_system_labeled()) {
 182                 bslabel_t *clabel = req->rq_label;
 183 
 184                 ASSERT(clabel != NULL);
 185                 DTRACE_PROBE2(tx__rfs3__log__info__opsetattr__clabel, char *,
 186                     "got client label from request(1)", struct svc_req *, req);
 187 
 188                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
 189                         if (!do_rfs_label_check(clabel, vp, EQUALITY_CHECK,
 190                             exi)) {
 191                                 resp->status = NFS3ERR_ACCES;
 192                                 goto out1;
 193                         }
 194                 }
 195         }
 196 
 197         /*
 198          * We need to specially handle size changes because of
 199          * possible conflicting NBMAND locks. Get into critical
 200          * region before VOP_GETATTR, so the size attribute is
 201          * valid when checking conflicts.
 202          *
 203          * Also, check to see if the v4 side of the server has
 204          * delegated this file.  If so, then we return JUKEBOX to
 205          * allow the client to retrasmit its request.
 206          */
 207         if (vp->v_type == VREG && (ava.va_mask & AT_SIZE)) {
 208                 if (nbl_need_check(vp)) {
 209                         nbl_start_crit(vp, RW_READER);
 210                         in_crit = 1;
 211                 }
 212         }
 213 
 214         bva.va_mask = AT_ALL;
 215         error = rfs4_delegated_getattr(vp, &bva, 0, cr);
 216 
 217         /*
 218          * If we can't get the attributes, then we can't do the
 219          * right access checking.  So, we'll fail the request.
 220          */
 221         if (error)
 222                 goto out;
 223 
 224         bvap = &bva;
 225 
 226         if (rdonly(exi, req) || vn_is_readonly(vp)) {
 227                 resp->status = NFS3ERR_ROFS;
 228                 goto out1;
 229         }
 230 
 231         if (args->guard.check &&
 232             (args->guard.obj_ctime.seconds != bva.va_ctime.tv_sec ||
 233             args->guard.obj_ctime.nseconds != bva.va_ctime.tv_nsec)) {
 234                 resp->status = NFS3ERR_NOT_SYNC;
 235                 goto out1;
 236         }
 237 
 238         if (args->new_attributes.mtime.set_it == SET_TO_CLIENT_TIME)
 239                 flag = ATTR_UTIME;
 240         else
 241                 flag = 0;
 242 
 243         /*
 244          * If the filesystem is exported with nosuid, then mask off
 245          * the setuid and setgid bits.
 246          */
 247         if ((ava.va_mask & AT_MODE) && vp->v_type == VREG &&
 248             (exi->exi_export.ex_flags & EX_NOSUID))
 249                 ava.va_mode &= ~(VSUID | VSGID);
 250 
 251         ct.cc_sysid = 0;
 252         ct.cc_pid = 0;
 253         ct.cc_caller_id = nfs3_srv_caller_id;
 254         ct.cc_flags = CC_DONTBLOCK;
 255 
 256         /*
 257          * We need to specially handle size changes because it is
 258          * possible for the client to create a file with modes
 259          * which indicate read-only, but with the file opened for
 260          * writing.  If the client then tries to set the size of
 261          * the file, then the normal access checking done in
 262          * VOP_SETATTR would prevent the client from doing so,
 263          * although it should be legal for it to do so.  To get
 264          * around this, we do the access checking for ourselves
 265          * and then use VOP_SPACE which doesn't do the access
 266          * checking which VOP_SETATTR does. VOP_SPACE can only
 267          * operate on VREG files, let VOP_SETATTR handle the other
 268          * extremely rare cases.
 269          * Also the client should not be allowed to change the
 270          * size of the file if there is a conflicting non-blocking
 271          * mandatory lock in the region the change.
 272          */
 273         if (vp->v_type == VREG && (ava.va_mask & AT_SIZE)) {
 274                 if (in_crit) {
 275                         u_offset_t offset;
 276                         ssize_t length;
 277 
 278                         if (ava.va_size < bva.va_size) {
 279                                 offset = ava.va_size;
 280                                 length = bva.va_size - ava.va_size;
 281                         } else {
 282                                 offset = bva.va_size;
 283                                 length = ava.va_size - bva.va_size;
 284                         }
 285                         if (nbl_conflict(vp, NBL_WRITE, offset, length, 0,
 286                             NULL)) {
 287                                 error = EACCES;
 288                                 goto out;
 289                         }
 290                 }
 291 
 292                 if (crgetuid(cr) == bva.va_uid && ava.va_size != bva.va_size) {
 293                         ava.va_mask &= ~AT_SIZE;
 294                         bf.l_type = F_WRLCK;
 295                         bf.l_whence = 0;
 296                         bf.l_start = (off64_t)ava.va_size;
 297                         bf.l_len = 0;
 298                         bf.l_sysid = 0;
 299                         bf.l_pid = 0;
 300                         error = VOP_SPACE(vp, F_FREESP, &bf, FWRITE,
 301                             (offset_t)ava.va_size, cr, &ct);
 302                 }
 303         }
 304 
 305         if (!error && ava.va_mask)
 306                 error = VOP_SETATTR(vp, &ava, flag, cr, &ct);
 307 
 308         /* check if a monitor detected a delegation conflict */
 309         if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
 310                 resp->status = NFS3ERR_JUKEBOX;
 311                 goto out1;
 312         }
 313 
 314         ava.va_mask = AT_ALL;
 315         avap = rfs4_delegated_getattr(vp, &ava, 0, cr) ? NULL : &ava;
 316 
 317         /*
 318          * Force modified metadata out to stable storage.
 319          */
 320         (void) VOP_FSYNC(vp, FNODSYNC, cr, &ct);
 321 
 322         if (error)
 323                 goto out;
 324 
 325         if (in_crit)
 326                 nbl_end_crit(vp);
 327 
 328         resp->status = NFS3_OK;
 329         vattr_to_wcc_data(bvap, avap, &resp->resok.obj_wcc);
 330 
 331         DTRACE_NFSV3_4(op__setattr__done, struct svc_req *, req,
 332             cred_t *, cr, vnode_t *, vp, SETATTR3res *, resp);
 333 
 334         VN_RELE(vp);
 335 
 336         return;
 337 
 338 out:
 339         if (curthread->t_flag & T_WOULDBLOCK) {
 340                 curthread->t_flag &= ~T_WOULDBLOCK;
 341                 resp->status = NFS3ERR_JUKEBOX;
 342         } else
 343                 resp->status = puterrno3(error);
 344 out1:
 345         DTRACE_NFSV3_4(op__setattr__done, struct svc_req *, req,
 346             cred_t *, cr, vnode_t *, vp, SETATTR3res *, resp);
 347 
 348         if (vp != NULL) {
 349                 if (in_crit)
 350                         nbl_end_crit(vp);
 351                 VN_RELE(vp);
 352         }
 353         vattr_to_wcc_data(bvap, avap, &resp->resfail.obj_wcc);
 354 }
 355 
 356 void *
 357 rfs3_setattr_getfh(SETATTR3args *args)
 358 {
 359 
 360         return (&args->object);
 361 }
 362 
 363 /* ARGSUSED */
 364 void
 365 rfs3_lookup(LOOKUP3args *args, LOOKUP3res *resp, struct exportinfo *exi,
 366         struct svc_req *req, cred_t *cr)
 367 {
 368         int error;
 369         vnode_t *vp;
 370         vnode_t *dvp;
 371         struct vattr *vap;
 372         struct vattr va;
 373         struct vattr *dvap;
 374         struct vattr dva;
 375         nfs_fh3 *fhp;
 376         struct sec_ol sec = {0, 0};
 377         bool_t publicfh_flag = FALSE, auth_weak = FALSE;
 378         struct sockaddr *ca;
 379         char *name = NULL;
 380 
 381         dvap = NULL;
 382 
 383         /*
 384          * Allow lookups from the root - the default
 385          * location of the public filehandle.
 386          */
 387         if (exi != NULL && (exi->exi_export.ex_flags & EX_PUBLIC)) {
 388                 dvp = rootdir;
 389                 VN_HOLD(dvp);
 390 
 391                 DTRACE_NFSV3_4(op__lookup__start, struct svc_req *, req,
 392                     cred_t *, cr, vnode_t *, dvp, LOOKUP3args *, args);
 393         } else {
 394                 dvp = nfs3_fhtovp(&args->what.dir, exi);
 395 
 396                 DTRACE_NFSV3_4(op__lookup__start, struct svc_req *, req,
 397                     cred_t *, cr, vnode_t *, dvp, LOOKUP3args *, args);
 398 
 399                 if (dvp == NULL) {
 400                         error = ESTALE;
 401                         goto out;
 402                 }
 403         }
 404 
 405         dva.va_mask = AT_ALL;
 406         dvap = VOP_GETATTR(dvp, &dva, 0, cr, NULL) ? NULL : &dva;
 407 
 408         if (args->what.name == nfs3nametoolong) {
 409                 resp->status = NFS3ERR_NAMETOOLONG;
 410                 goto out1;
 411         }
 412 
 413         if (args->what.name == NULL || *(args->what.name) == '\0') {
 414                 resp->status = NFS3ERR_ACCES;
 415                 goto out1;
 416         }
 417 
 418         fhp = &args->what.dir;
 419         if (strcmp(args->what.name, "..") == 0 &&
 420             EQFID(&exi->exi_fid, FH3TOFIDP(fhp))) {
 421                 resp->status = NFS3ERR_NOENT;
 422                 goto out1;
 423         }
 424 
 425         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
 426         name = nfscmd_convname(ca, exi, args->what.name,
 427             NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
 428 
 429         if (name == NULL) {
 430                 resp->status = NFS3ERR_ACCES;
 431                 goto out1;
 432         }
 433 
 434         exi_hold(exi);
 435 
 436         /*
 437          * If the public filehandle is used then allow
 438          * a multi-component lookup
 439          */
 440         if (PUBLIC_FH3(&args->what.dir)) {
 441                 struct exportinfo *new;
 442 
 443                 publicfh_flag = TRUE;
 444 
 445                 error = rfs_publicfh_mclookup(name, dvp, cr, &vp,
 446                     &new, &sec);
 447 
 448                 if (error == 0) {
 449                         exi_rele(exi);
 450                         exi = new;
 451                 }
 452 
 453                 /*
 454                  * Since WebNFS may bypass MOUNT, we need to ensure this
 455                  * request didn't come from an unlabeled admin_low client.
 456                  */
 457                 if (is_system_labeled() && error == 0) {
 458                         int             addr_type;
 459                         void            *ipaddr;
 460                         tsol_tpc_t      *tp;
 461 
 462                         if (ca->sa_family == AF_INET) {
 463                                 addr_type = IPV4_VERSION;
 464                                 ipaddr = &((struct sockaddr_in *)ca)->sin_addr;
 465                         } else if (ca->sa_family == AF_INET6) {
 466                                 addr_type = IPV6_VERSION;
 467                                 ipaddr = &((struct sockaddr_in6 *)
 468                                     ca)->sin6_addr;
 469                         }
 470                         tp = find_tpc(ipaddr, addr_type, B_FALSE);
 471                         if (tp == NULL || tp->tpc_tp.tp_doi !=
 472                             l_admin_low->tsl_doi || tp->tpc_tp.host_type !=
 473                             SUN_CIPSO) {
 474                                 VN_RELE(vp);
 475                                 resp->status = NFS3ERR_ACCES;
 476                                 error = 1;
 477                         }
 478                         if (tp != NULL)
 479                                 TPC_RELE(tp);
 480                 }
 481         } else {
 482                 error = VOP_LOOKUP(dvp, name, &vp,
 483                     NULL, 0, NULL, cr, NULL, NULL, NULL);
 484         }
 485 
 486         if (name != args->what.name)
 487                 kmem_free(name, MAXPATHLEN + 1);
 488 
 489         if (error == 0 && vn_ismntpt(vp)) {
 490                 error = rfs_cross_mnt(&vp, &exi);
 491                 if (error)
 492                         VN_RELE(vp);
 493         }
 494 
 495         if (is_system_labeled() && error == 0) {
 496                 bslabel_t *clabel = req->rq_label;
 497 
 498                 ASSERT(clabel != NULL);
 499                 DTRACE_PROBE2(tx__rfs3__log__info__oplookup__clabel, char *,
 500                     "got client label from request(1)", struct svc_req *, req);
 501 
 502                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
 503                         if (!do_rfs_label_check(clabel, dvp,
 504                             DOMINANCE_CHECK, exi)) {
 505                                 VN_RELE(vp);
 506                                 resp->status = NFS3ERR_ACCES;
 507                                 error = 1;
 508                         }
 509                 }
 510         }
 511 
 512         dva.va_mask = AT_ALL;
 513         dvap = VOP_GETATTR(dvp, &dva, 0, cr, NULL) ? NULL : &dva;
 514 
 515         if (error)
 516                 goto out;
 517 
 518         if (sec.sec_flags & SEC_QUERY) {
 519                 error = makefh3_ol(&resp->resok.object, exi, sec.sec_index);
 520         } else {
 521                 error = makefh3(&resp->resok.object, vp, exi);
 522                 if (!error && publicfh_flag && !chk_clnt_sec(exi, req))
 523                         auth_weak = TRUE;
 524         }
 525 
 526         if (error) {
 527                 VN_RELE(vp);
 528                 goto out;
 529         }
 530 
 531         va.va_mask = AT_ALL;
 532         vap = rfs4_delegated_getattr(vp, &va, 0, cr) ? NULL : &va;
 533 
 534         exi_rele(exi);
 535         VN_RELE(vp);
 536 
 537         resp->status = NFS3_OK;
 538         vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
 539         vattr_to_post_op_attr(dvap, &resp->resok.dir_attributes);
 540 
 541         /*
 542          * If it's public fh, no 0x81, and client's flavor is
 543          * invalid, set WebNFS status to WNFSERR_CLNT_FLAVOR now.
 544          * Then set RPC status to AUTH_TOOWEAK in common_dispatch.
 545          */
 546         if (auth_weak)
 547                 resp->status = (enum nfsstat3)WNFSERR_CLNT_FLAVOR;
 548 
 549         DTRACE_NFSV3_4(op__lookup__done, struct svc_req *, req,
 550             cred_t *, cr, vnode_t *, dvp, LOOKUP3res *, resp);
 551         VN_RELE(dvp);
 552 
 553         return;
 554 
 555 out:
 556         /*
 557          * The passed argument exportinfo is released by the
 558          * caller, common_dispatch
 559          */
 560         exi_rele(exi);
 561 
 562         if (curthread->t_flag & T_WOULDBLOCK) {
 563                 curthread->t_flag &= ~T_WOULDBLOCK;
 564                 resp->status = NFS3ERR_JUKEBOX;
 565         } else
 566                 resp->status = puterrno3(error);
 567 out1:
 568         DTRACE_NFSV3_4(op__lookup__done, struct svc_req *, req,
 569             cred_t *, cr, vnode_t *, dvp, LOOKUP3res *, resp);
 570 
 571         if (dvp != NULL)
 572                 VN_RELE(dvp);
 573         vattr_to_post_op_attr(dvap, &resp->resfail.dir_attributes);
 574 
 575 }
 576 
 577 void *
 578 rfs3_lookup_getfh(LOOKUP3args *args)
 579 {
 580 
 581         return (&args->what.dir);
 582 }
 583 
 584 /* ARGSUSED */
 585 void
 586 rfs3_access(ACCESS3args *args, ACCESS3res *resp, struct exportinfo *exi,
 587         struct svc_req *req, cred_t *cr)
 588 {
 589         int error;
 590         vnode_t *vp;
 591         struct vattr *vap;
 592         struct vattr va;
 593         int checkwriteperm;
 594         boolean_t dominant_label = B_FALSE;
 595         boolean_t equal_label = B_FALSE;
 596         boolean_t admin_low_client;
 597 
 598         vap = NULL;
 599 
 600         vp = nfs3_fhtovp(&args->object, exi);
 601 
 602         DTRACE_NFSV3_4(op__access__start, struct svc_req *, req,
 603             cred_t *, cr, vnode_t *, vp, ACCESS3args *, args);
 604 
 605         if (vp == NULL) {
 606                 error = ESTALE;
 607                 goto out;
 608         }
 609 
 610         /*
 611          * If the file system is exported read only, it is not appropriate
 612          * to check write permissions for regular files and directories.
 613          * Special files are interpreted by the client, so the underlying
 614          * permissions are sent back to the client for interpretation.
 615          */
 616         if (rdonly(exi, req) && (vp->v_type == VREG || vp->v_type == VDIR))
 617                 checkwriteperm = 0;
 618         else
 619                 checkwriteperm = 1;
 620 
 621         /*
 622          * We need the mode so that we can correctly determine access
 623          * permissions relative to a mandatory lock file.  Access to
 624          * mandatory lock files is denied on the server, so it might
 625          * as well be reflected to the server during the open.
 626          */
 627         va.va_mask = AT_MODE;
 628         error = VOP_GETATTR(vp, &va, 0, cr, NULL);
 629         if (error)
 630                 goto out;
 631 
 632         vap = &va;
 633 
 634         resp->resok.access = 0;
 635 
 636         if (is_system_labeled()) {
 637                 bslabel_t *clabel = req->rq_label;
 638 
 639                 ASSERT(clabel != NULL);
 640                 DTRACE_PROBE2(tx__rfs3__log__info__opaccess__clabel, char *,
 641                     "got client label from request(1)", struct svc_req *, req);
 642 
 643                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
 644                         if ((equal_label = do_rfs_label_check(clabel, vp,
 645                             EQUALITY_CHECK, exi)) == B_FALSE) {
 646                                 dominant_label = do_rfs_label_check(clabel,
 647                                     vp, DOMINANCE_CHECK, exi);
 648                         } else
 649                                 dominant_label = B_TRUE;
 650                         admin_low_client = B_FALSE;
 651                 } else
 652                         admin_low_client = B_TRUE;
 653         }
 654 
 655         if (args->access & ACCESS3_READ) {
 656                 error = VOP_ACCESS(vp, VREAD, 0, cr, NULL);
 657                 if (error) {
 658                         if (curthread->t_flag & T_WOULDBLOCK)
 659                                 goto out;
 660                 } else if (!MANDLOCK(vp, va.va_mode) &&
 661                     (!is_system_labeled() || admin_low_client ||
 662                     dominant_label))
 663                         resp->resok.access |= ACCESS3_READ;
 664         }
 665         if ((args->access & ACCESS3_LOOKUP) && vp->v_type == VDIR) {
 666                 error = VOP_ACCESS(vp, VEXEC, 0, cr, NULL);
 667                 if (error) {
 668                         if (curthread->t_flag & T_WOULDBLOCK)
 669                                 goto out;
 670                 } else if (!is_system_labeled() || admin_low_client ||
 671                     dominant_label)
 672                         resp->resok.access |= ACCESS3_LOOKUP;
 673         }
 674         if (checkwriteperm &&
 675             (args->access & (ACCESS3_MODIFY|ACCESS3_EXTEND))) {
 676                 error = VOP_ACCESS(vp, VWRITE, 0, cr, NULL);
 677                 if (error) {
 678                         if (curthread->t_flag & T_WOULDBLOCK)
 679                                 goto out;
 680                 } else if (!MANDLOCK(vp, va.va_mode) &&
 681                     (!is_system_labeled() || admin_low_client || equal_label)) {
 682                         resp->resok.access |=
 683                             (args->access & (ACCESS3_MODIFY|ACCESS3_EXTEND));
 684                 }
 685         }
 686         if (checkwriteperm &&
 687             (args->access & ACCESS3_DELETE) && vp->v_type == VDIR) {
 688                 error = VOP_ACCESS(vp, VWRITE, 0, cr, NULL);
 689                 if (error) {
 690                         if (curthread->t_flag & T_WOULDBLOCK)
 691                                 goto out;
 692                 } else if (!is_system_labeled() || admin_low_client ||
 693                     equal_label)
 694                         resp->resok.access |= ACCESS3_DELETE;
 695         }
 696         if (args->access & ACCESS3_EXECUTE) {
 697                 error = VOP_ACCESS(vp, VEXEC, 0, cr, NULL);
 698                 if (error) {
 699                         if (curthread->t_flag & T_WOULDBLOCK)
 700                                 goto out;
 701                 } else if (!MANDLOCK(vp, va.va_mode) &&
 702                     (!is_system_labeled() || admin_low_client ||
 703                     dominant_label))
 704                         resp->resok.access |= ACCESS3_EXECUTE;
 705         }
 706 
 707         va.va_mask = AT_ALL;
 708         vap = rfs4_delegated_getattr(vp, &va, 0, cr) ? NULL : &va;
 709 
 710         resp->status = NFS3_OK;
 711         vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
 712 
 713         DTRACE_NFSV3_4(op__access__done, struct svc_req *, req,
 714             cred_t *, cr, vnode_t *, vp, ACCESS3res *, resp);
 715 
 716         VN_RELE(vp);
 717 
 718         return;
 719 
 720 out:
 721         if (curthread->t_flag & T_WOULDBLOCK) {
 722                 curthread->t_flag &= ~T_WOULDBLOCK;
 723                 resp->status = NFS3ERR_JUKEBOX;
 724         } else
 725                 resp->status = puterrno3(error);
 726         DTRACE_NFSV3_4(op__access__done, struct svc_req *, req,
 727             cred_t *, cr, vnode_t *, vp, ACCESS3res *, resp);
 728         if (vp != NULL)
 729                 VN_RELE(vp);
 730         vattr_to_post_op_attr(vap, &resp->resfail.obj_attributes);
 731 }
 732 
 733 void *
 734 rfs3_access_getfh(ACCESS3args *args)
 735 {
 736 
 737         return (&args->object);
 738 }
 739 
 740 /* ARGSUSED */
 741 void
 742 rfs3_readlink(READLINK3args *args, READLINK3res *resp, struct exportinfo *exi,
 743         struct svc_req *req, cred_t *cr)
 744 {
 745         int error;
 746         vnode_t *vp;
 747         struct vattr *vap;
 748         struct vattr va;
 749         struct iovec iov;
 750         struct uio uio;
 751         char *data;
 752         struct sockaddr *ca;
 753         char *name = NULL;
 754         int is_referral = 0;
 755 
 756         vap = NULL;
 757 
 758         vp = nfs3_fhtovp(&args->symlink, exi);
 759 
 760         DTRACE_NFSV3_4(op__readlink__start, struct svc_req *, req,
 761             cred_t *, cr, vnode_t *, vp, READLINK3args *, args);
 762 
 763         if (vp == NULL) {
 764                 error = ESTALE;
 765                 goto out;
 766         }
 767 
 768         va.va_mask = AT_ALL;
 769         error = VOP_GETATTR(vp, &va, 0, cr, NULL);
 770         if (error)
 771                 goto out;
 772 
 773         vap = &va;
 774 
 775         /* We lied about the object type for a referral */
 776         if (vn_is_nfs_reparse(vp, cr))
 777                 is_referral = 1;
 778 
 779         if (vp->v_type != VLNK && !is_referral) {
 780                 resp->status = NFS3ERR_INVAL;
 781                 goto out1;
 782         }
 783 
 784         if (MANDLOCK(vp, va.va_mode)) {
 785                 resp->status = NFS3ERR_ACCES;
 786                 goto out1;
 787         }
 788 
 789         if (is_system_labeled()) {
 790                 bslabel_t *clabel = req->rq_label;
 791 
 792                 ASSERT(clabel != NULL);
 793                 DTRACE_PROBE2(tx__rfs3__log__info__opreadlink__clabel, char *,
 794                     "got client label from request(1)", struct svc_req *, req);
 795 
 796                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
 797                         if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
 798                             exi)) {
 799                                 resp->status = NFS3ERR_ACCES;
 800                                 goto out1;
 801                         }
 802                 }
 803         }
 804 
 805         data = kmem_alloc(MAXPATHLEN + 1, KM_SLEEP);
 806 
 807         if (is_referral) {
 808                 char *s;
 809                 size_t strsz;
 810 
 811                 /* Get an artificial symlink based on a referral */
 812                 s = build_symlink(vp, cr, &strsz);
 813                 global_svstat_ptr[3][NFS_REFERLINKS].value.ui64++;
 814                 DTRACE_PROBE2(nfs3serv__func__referral__reflink,
 815                     vnode_t *, vp, char *, s);
 816                 if (s == NULL)
 817                         error = EINVAL;
 818                 else {
 819                         error = 0;
 820                         (void) strlcpy(data, s, MAXPATHLEN + 1);
 821                         kmem_free(s, strsz);
 822                 }
 823 
 824         } else {
 825 
 826                 iov.iov_base = data;
 827                 iov.iov_len = MAXPATHLEN;
 828                 uio.uio_iov = &iov;
 829                 uio.uio_iovcnt = 1;
 830                 uio.uio_segflg = UIO_SYSSPACE;
 831                 uio.uio_extflg = UIO_COPY_CACHED;
 832                 uio.uio_loffset = 0;
 833                 uio.uio_resid = MAXPATHLEN;
 834 
 835                 error = VOP_READLINK(vp, &uio, cr, NULL);
 836 
 837                 if (!error)
 838                         *(data + MAXPATHLEN - uio.uio_resid) = '\0';
 839         }
 840 
 841         va.va_mask = AT_ALL;
 842         vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
 843 
 844         /* Lie about object type again just to be consistent */
 845         if (is_referral && vap != NULL)
 846                 vap->va_type = VLNK;
 847 
 848 #if 0 /* notyet */
 849         /*
 850          * Don't do this.  It causes local disk writes when just
 851          * reading the file and the overhead is deemed larger
 852          * than the benefit.
 853          */
 854         /*
 855          * Force modified metadata out to stable storage.
 856          */
 857         (void) VOP_FSYNC(vp, FNODSYNC, cr, NULL);
 858 #endif
 859 
 860         if (error) {
 861                 kmem_free(data, MAXPATHLEN + 1);
 862                 goto out;
 863         }
 864 
 865         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
 866         name = nfscmd_convname(ca, exi, data, NFSCMD_CONV_OUTBOUND,
 867             MAXPATHLEN + 1);
 868 
 869         if (name == NULL) {
 870                 /*
 871                  * Even though the conversion failed, we return
 872                  * something. We just don't translate it.
 873                  */
 874                 name = data;
 875         }
 876 
 877         resp->status = NFS3_OK;
 878         vattr_to_post_op_attr(vap, &resp->resok.symlink_attributes);
 879         resp->resok.data = name;
 880 
 881         DTRACE_NFSV3_4(op__readlink__done, struct svc_req *, req,
 882             cred_t *, cr, vnode_t *, vp, READLINK3res *, resp);
 883         VN_RELE(vp);
 884 
 885         if (name != data)
 886                 kmem_free(data, MAXPATHLEN + 1);
 887 
 888         return;
 889 
 890 out:
 891         if (curthread->t_flag & T_WOULDBLOCK) {
 892                 curthread->t_flag &= ~T_WOULDBLOCK;
 893                 resp->status = NFS3ERR_JUKEBOX;
 894         } else
 895                 resp->status = puterrno3(error);
 896 out1:
 897         DTRACE_NFSV3_4(op__readlink__done, struct svc_req *, req,
 898             cred_t *, cr, vnode_t *, vp, READLINK3res *, resp);
 899         if (vp != NULL)
 900                 VN_RELE(vp);
 901         vattr_to_post_op_attr(vap, &resp->resfail.symlink_attributes);
 902 }
 903 
 904 void *
 905 rfs3_readlink_getfh(READLINK3args *args)
 906 {
 907 
 908         return (&args->symlink);
 909 }
 910 
 911 void
 912 rfs3_readlink_free(READLINK3res *resp)
 913 {
 914 
 915         if (resp->status == NFS3_OK)
 916                 kmem_free(resp->resok.data, MAXPATHLEN + 1);
 917 }
 918 
 919 /*
 920  * Server routine to handle read
 921  * May handle RDMA data as well as mblks
 922  */
 923 /* ARGSUSED */
 924 void
 925 rfs3_read(READ3args *args, READ3res *resp, struct exportinfo *exi,
 926         struct svc_req *req, cred_t *cr)
 927 {
 928         int error;
 929         vnode_t *vp;
 930         struct vattr *vap;
 931         struct vattr va;
 932         struct iovec iov;
 933         struct uio uio;
 934         u_offset_t offset;
 935         mblk_t *mp = NULL;
 936         int alloc_err = 0;
 937         int in_crit = 0;
 938         int need_rwunlock = 0;
 939         caller_context_t ct;
 940         int rdma_used = 0;
 941         int loaned_buffers;
 942         struct uio *uiop;
 943 
 944         vap = NULL;
 945 
 946         vp = nfs3_fhtovp(&args->file, exi);
 947 
 948         DTRACE_NFSV3_4(op__read__start, struct svc_req *, req,
 949             cred_t *, cr, vnode_t *, vp, READ3args *, args);
 950 
 951         if (vp == NULL) {
 952                 error = ESTALE;
 953                 goto out;
 954         }
 955 
 956         if (args->wlist) {
 957                 if (args->count > clist_len(args->wlist)) {
 958                         error = EINVAL;
 959                         goto out;
 960                 }
 961                 rdma_used = 1;
 962         }
 963 
 964         /* use loaned buffers for TCP */
 965         loaned_buffers = (nfs_loaned_buffers && !rdma_used) ? 1 : 0;
 966 
 967         if (is_system_labeled()) {
 968                 bslabel_t *clabel = req->rq_label;
 969 
 970                 ASSERT(clabel != NULL);
 971                 DTRACE_PROBE2(tx__rfs3__log__info__opread__clabel, char *,
 972                     "got client label from request(1)", struct svc_req *, req);
 973 
 974                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
 975                         if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
 976                             exi)) {
 977                                 resp->status = NFS3ERR_ACCES;
 978                                 goto out1;
 979                         }
 980                 }
 981         }
 982 
 983         ct.cc_sysid = 0;
 984         ct.cc_pid = 0;
 985         ct.cc_caller_id = nfs3_srv_caller_id;
 986         ct.cc_flags = CC_DONTBLOCK;
 987 
 988         /*
 989          * Enter the critical region before calling VOP_RWLOCK
 990          * to avoid a deadlock with write requests.
 991          */
 992         if (nbl_need_check(vp)) {
 993                 nbl_start_crit(vp, RW_READER);
 994                 in_crit = 1;
 995                 if (nbl_conflict(vp, NBL_READ, args->offset, args->count, 0,
 996                     NULL)) {
 997                         error = EACCES;
 998                         goto out;
 999                 }
1000         }
1001 
1002         error = VOP_RWLOCK(vp, V_WRITELOCK_FALSE, &ct);
1003 
1004         /* check if a monitor detected a delegation conflict */
1005         if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
1006                 resp->status = NFS3ERR_JUKEBOX;
1007                 goto out1;
1008         }
1009 
1010         need_rwunlock = 1;
1011 
1012         va.va_mask = AT_ALL;
1013         error = VOP_GETATTR(vp, &va, 0, cr, &ct);
1014 
1015         /*
1016          * If we can't get the attributes, then we can't do the
1017          * right access checking.  So, we'll fail the request.
1018          */
1019         if (error)
1020                 goto out;
1021 
1022         vap = &va;
1023 
1024         if (vp->v_type != VREG) {
1025                 resp->status = NFS3ERR_INVAL;
1026                 goto out1;
1027         }
1028 
1029         if (crgetuid(cr) != va.va_uid) {
1030                 error = VOP_ACCESS(vp, VREAD, 0, cr, &ct);
1031                 if (error) {
1032                         if (curthread->t_flag & T_WOULDBLOCK)
1033                                 goto out;
1034                         error = VOP_ACCESS(vp, VEXEC, 0, cr, &ct);
1035                         if (error)
1036                                 goto out;
1037                 }
1038         }
1039 
1040         if (MANDLOCK(vp, va.va_mode)) {
1041                 resp->status = NFS3ERR_ACCES;
1042                 goto out1;
1043         }
1044 
1045         offset = args->offset;
1046         if (offset >= va.va_size) {
1047                 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
1048                 if (in_crit)
1049                         nbl_end_crit(vp);
1050                 resp->status = NFS3_OK;
1051                 vattr_to_post_op_attr(vap, &resp->resok.file_attributes);
1052                 resp->resok.count = 0;
1053                 resp->resok.eof = TRUE;
1054                 resp->resok.data.data_len = 0;
1055                 resp->resok.data.data_val = NULL;
1056                 resp->resok.data.mp = NULL;
1057                 /* RDMA */
1058                 resp->resok.wlist = args->wlist;
1059                 resp->resok.wlist_len = resp->resok.count;
1060                 if (resp->resok.wlist)
1061                         clist_zero_len(resp->resok.wlist);
1062                 goto done;
1063         }
1064 
1065         if (args->count == 0) {
1066                 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
1067                 if (in_crit)
1068                         nbl_end_crit(vp);
1069                 resp->status = NFS3_OK;
1070                 vattr_to_post_op_attr(vap, &resp->resok.file_attributes);
1071                 resp->resok.count = 0;
1072                 resp->resok.eof = FALSE;
1073                 resp->resok.data.data_len = 0;
1074                 resp->resok.data.data_val = NULL;
1075                 resp->resok.data.mp = NULL;
1076                 /* RDMA */
1077                 resp->resok.wlist = args->wlist;
1078                 resp->resok.wlist_len = resp->resok.count;
1079                 if (resp->resok.wlist)
1080                         clist_zero_len(resp->resok.wlist);
1081                 goto done;
1082         }
1083 
1084         /*
1085          * do not allocate memory more the max. allowed
1086          * transfer size
1087          */
1088         if (args->count > rfs3_tsize(req))
1089                 args->count = rfs3_tsize(req);
1090 
1091         if (loaned_buffers) {
1092                 uiop = (uio_t *)rfs_setup_xuio(vp);
1093                 ASSERT(uiop != NULL);
1094                 uiop->uio_segflg = UIO_SYSSPACE;
1095                 uiop->uio_loffset = args->offset;
1096                 uiop->uio_resid = args->count;
1097 
1098                 /* Jump to do the read if successful */
1099                 if (VOP_REQZCBUF(vp, UIO_READ, (xuio_t *)uiop, cr, &ct) == 0) {
1100                         /*
1101                          * Need to hold the vnode until after VOP_RETZCBUF()
1102                          * is called.
1103                          */
1104                         VN_HOLD(vp);
1105                         goto doio_read;
1106                 }
1107 
1108                 DTRACE_PROBE2(nfss__i__reqzcbuf_failed, int,
1109                     uiop->uio_loffset, int, uiop->uio_resid);
1110 
1111                 uiop->uio_extflg = 0;
1112                 /* failure to setup for zero copy */
1113                 rfs_free_xuio((void *)uiop);
1114                 loaned_buffers = 0;
1115         }
1116 
1117         /*
1118          * If returning data via RDMA Write, then grab the chunk list.
1119          * If we aren't returning READ data w/RDMA_WRITE, then grab
1120          * a mblk.
1121          */
1122         if (rdma_used) {
1123                 (void) rdma_get_wchunk(req, &iov, args->wlist);
1124         } else {
1125                 /*
1126                  * mp will contain the data to be sent out in the read reply.
1127                  * This will be freed after the reply has been sent out (by the
1128                  * driver).
1129                  * Let's roundup the data to a BYTES_PER_XDR_UNIT multiple, so
1130                  * that the call to xdrmblk_putmblk() never fails.
1131                  */
1132                 mp = allocb_wait(RNDUP(args->count), BPRI_MED, STR_NOSIG,
1133                     &alloc_err);
1134                 ASSERT(mp != NULL);
1135                 ASSERT(alloc_err == 0);
1136 
1137                 iov.iov_base = (caddr_t)mp->b_datap->db_base;
1138                 iov.iov_len = args->count;
1139         }
1140 
1141         uio.uio_iov = &iov;
1142         uio.uio_iovcnt = 1;
1143         uio.uio_segflg = UIO_SYSSPACE;
1144         uio.uio_extflg = UIO_COPY_CACHED;
1145         uio.uio_loffset = args->offset;
1146         uio.uio_resid = args->count;
1147         uiop = &uio;
1148 
1149 doio_read:
1150         error = VOP_READ(vp, uiop, 0, cr, &ct);
1151 
1152         if (error) {
1153                 if (mp)
1154                         freemsg(mp);
1155                 /* check if a monitor detected a delegation conflict */
1156                 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
1157                         resp->status = NFS3ERR_JUKEBOX;
1158                         goto out1;
1159                 }
1160                 goto out;
1161         }
1162 
1163         /* make mblk using zc buffers */
1164         if (loaned_buffers) {
1165                 mp = uio_to_mblk(uiop);
1166                 ASSERT(mp != NULL);
1167         }
1168 
1169         va.va_mask = AT_ALL;
1170         error = VOP_GETATTR(vp, &va, 0, cr, &ct);
1171 
1172         if (error)
1173                 vap = NULL;
1174         else
1175                 vap = &va;
1176 
1177         VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
1178 
1179         if (in_crit)
1180                 nbl_end_crit(vp);
1181 
1182         resp->status = NFS3_OK;
1183         vattr_to_post_op_attr(vap, &resp->resok.file_attributes);
1184         resp->resok.count = args->count - uiop->uio_resid;
1185         if (!error && offset + resp->resok.count == va.va_size)
1186                 resp->resok.eof = TRUE;
1187         else
1188                 resp->resok.eof = FALSE;
1189         resp->resok.data.data_len = resp->resok.count;
1190 
1191         if (mp)
1192                 rfs_rndup_mblks(mp, resp->resok.count, loaned_buffers);
1193 
1194         resp->resok.data.mp = mp;
1195         resp->resok.size = (uint_t)args->count;
1196 
1197         if (rdma_used) {
1198                 resp->resok.data.data_val = (caddr_t)iov.iov_base;
1199                 if (!rdma_setup_read_data3(args, &(resp->resok))) {
1200                         resp->status = NFS3ERR_INVAL;
1201                 }
1202         } else {
1203                 resp->resok.data.data_val = (caddr_t)mp->b_datap->db_base;
1204                 (resp->resok).wlist = NULL;
1205         }
1206 
1207 done:
1208         DTRACE_NFSV3_4(op__read__done, struct svc_req *, req,
1209             cred_t *, cr, vnode_t *, vp, READ3res *, resp);
1210 
1211         VN_RELE(vp);
1212 
1213         return;
1214 
1215 out:
1216         if (curthread->t_flag & T_WOULDBLOCK) {
1217                 curthread->t_flag &= ~T_WOULDBLOCK;
1218                 resp->status = NFS3ERR_JUKEBOX;
1219         } else
1220                 resp->status = puterrno3(error);
1221 out1:
1222         DTRACE_NFSV3_4(op__read__done, struct svc_req *, req,
1223             cred_t *, cr, vnode_t *, vp, READ3res *, resp);
1224 
1225         if (vp != NULL) {
1226                 if (need_rwunlock)
1227                         VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
1228                 if (in_crit)
1229                         nbl_end_crit(vp);
1230                 VN_RELE(vp);
1231         }
1232         vattr_to_post_op_attr(vap, &resp->resfail.file_attributes);
1233 }
1234 
1235 void
1236 rfs3_read_free(READ3res *resp)
1237 {
1238         mblk_t *mp;
1239 
1240         if (resp->status == NFS3_OK) {
1241                 mp = resp->resok.data.mp;
1242                 if (mp != NULL)
1243                         freemsg(mp);
1244         }
1245 }
1246 
1247 void *
1248 rfs3_read_getfh(READ3args *args)
1249 {
1250 
1251         return (&args->file);
1252 }
1253 
1254 #define MAX_IOVECS      12
1255 
1256 #ifdef DEBUG
1257 static int rfs3_write_hits = 0;
1258 static int rfs3_write_misses = 0;
1259 #endif
1260 
1261 void
1262 rfs3_write(WRITE3args *args, WRITE3res *resp, struct exportinfo *exi,
1263         struct svc_req *req, cred_t *cr)
1264 {
1265         int error;
1266         vnode_t *vp;
1267         struct vattr *bvap = NULL;
1268         struct vattr bva;
1269         struct vattr *avap = NULL;
1270         struct vattr ava;
1271         u_offset_t rlimit;
1272         struct uio uio;
1273         struct iovec iov[MAX_IOVECS];
1274         mblk_t *m;
1275         struct iovec *iovp;
1276         int iovcnt;
1277         int ioflag;
1278         cred_t *savecred;
1279         int in_crit = 0;
1280         int rwlock_ret = -1;
1281         caller_context_t ct;
1282 
1283         vp = nfs3_fhtovp(&args->file, exi);
1284 
1285         DTRACE_NFSV3_4(op__write__start, struct svc_req *, req,
1286             cred_t *, cr, vnode_t *, vp, WRITE3args *, args);
1287 
1288         if (vp == NULL) {
1289                 error = ESTALE;
1290                 goto err;
1291         }
1292 
1293         if (is_system_labeled()) {
1294                 bslabel_t *clabel = req->rq_label;
1295 
1296                 ASSERT(clabel != NULL);
1297                 DTRACE_PROBE2(tx__rfs3__log__info__opwrite__clabel, char *,
1298                     "got client label from request(1)", struct svc_req *, req);
1299 
1300                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
1301                         if (!do_rfs_label_check(clabel, vp, EQUALITY_CHECK,
1302                             exi)) {
1303                                 resp->status = NFS3ERR_ACCES;
1304                                 goto err1;
1305                         }
1306                 }
1307         }
1308 
1309         ct.cc_sysid = 0;
1310         ct.cc_pid = 0;
1311         ct.cc_caller_id = nfs3_srv_caller_id;
1312         ct.cc_flags = CC_DONTBLOCK;
1313 
1314         /*
1315          * We have to enter the critical region before calling VOP_RWLOCK
1316          * to avoid a deadlock with ufs.
1317          */
1318         if (nbl_need_check(vp)) {
1319                 nbl_start_crit(vp, RW_READER);
1320                 in_crit = 1;
1321                 if (nbl_conflict(vp, NBL_WRITE, args->offset, args->count, 0,
1322                     NULL)) {
1323                         error = EACCES;
1324                         goto err;
1325                 }
1326         }
1327 
1328         rwlock_ret = VOP_RWLOCK(vp, V_WRITELOCK_TRUE, &ct);
1329 
1330         /* check if a monitor detected a delegation conflict */
1331         if (rwlock_ret == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
1332                 resp->status = NFS3ERR_JUKEBOX;
1333                 rwlock_ret = -1;
1334                 goto err1;
1335         }
1336 
1337 
1338         bva.va_mask = AT_ALL;
1339         error = VOP_GETATTR(vp, &bva, 0, cr, &ct);
1340 
1341         /*
1342          * If we can't get the attributes, then we can't do the
1343          * right access checking.  So, we'll fail the request.
1344          */
1345         if (error)
1346                 goto err;
1347 
1348         bvap = &bva;
1349         avap = bvap;
1350 
1351         if (args->count != args->data.data_len) {
1352                 resp->status = NFS3ERR_INVAL;
1353                 goto err1;
1354         }
1355 
1356         if (rdonly(exi, req)) {
1357                 resp->status = NFS3ERR_ROFS;
1358                 goto err1;
1359         }
1360 
1361         if (vp->v_type != VREG) {
1362                 resp->status = NFS3ERR_INVAL;
1363                 goto err1;
1364         }
1365 
1366         if (crgetuid(cr) != bva.va_uid &&
1367             (error = VOP_ACCESS(vp, VWRITE, 0, cr, &ct)))
1368                 goto err;
1369 
1370         if (MANDLOCK(vp, bva.va_mode)) {
1371                 resp->status = NFS3ERR_ACCES;
1372                 goto err1;
1373         }
1374 
1375         if (args->count == 0) {
1376                 resp->status = NFS3_OK;
1377                 vattr_to_wcc_data(bvap, avap, &resp->resok.file_wcc);
1378                 resp->resok.count = 0;
1379                 resp->resok.committed = args->stable;
1380                 resp->resok.verf = write3verf;
1381                 goto out;
1382         }
1383 
1384         if (args->mblk != NULL) {
1385                 iovcnt = 0;
1386                 for (m = args->mblk; m != NULL; m = m->b_cont)
1387                         iovcnt++;
1388                 if (iovcnt <= MAX_IOVECS) {
1389 #ifdef DEBUG
1390                         rfs3_write_hits++;
1391 #endif
1392                         iovp = iov;
1393                 } else {
1394 #ifdef DEBUG
1395                         rfs3_write_misses++;
1396 #endif
1397                         iovp = kmem_alloc(sizeof (*iovp) * iovcnt, KM_SLEEP);
1398                 }
1399                 mblk_to_iov(args->mblk, iovcnt, iovp);
1400 
1401         } else if (args->rlist != NULL) {
1402                 iovcnt = 1;
1403                 iovp = iov;
1404                 iovp->iov_base = (char *)((args->rlist)->u.c_daddr3);
1405                 iovp->iov_len = args->count;
1406         } else {
1407                 iovcnt = 1;
1408                 iovp = iov;
1409                 iovp->iov_base = args->data.data_val;
1410                 iovp->iov_len = args->count;
1411         }
1412 
1413         uio.uio_iov = iovp;
1414         uio.uio_iovcnt = iovcnt;
1415 
1416         uio.uio_segflg = UIO_SYSSPACE;
1417         uio.uio_extflg = UIO_COPY_DEFAULT;
1418         uio.uio_loffset = args->offset;
1419         uio.uio_resid = args->count;
1420         uio.uio_llimit = curproc->p_fsz_ctl;
1421         rlimit = uio.uio_llimit - args->offset;
1422         if (rlimit < (u_offset_t)uio.uio_resid)
1423                 uio.uio_resid = (int)rlimit;
1424 
1425         if (args->stable == UNSTABLE)
1426                 ioflag = 0;
1427         else if (args->stable == FILE_SYNC)
1428                 ioflag = FSYNC;
1429         else if (args->stable == DATA_SYNC)
1430                 ioflag = FDSYNC;
1431         else {
1432                 if (iovp != iov)
1433                         kmem_free(iovp, sizeof (*iovp) * iovcnt);
1434                 resp->status = NFS3ERR_INVAL;
1435                 goto err1;
1436         }
1437 
1438         /*
1439          * We're changing creds because VM may fault and we need
1440          * the cred of the current thread to be used if quota
1441          * checking is enabled.
1442          */
1443         savecred = curthread->t_cred;
1444         curthread->t_cred = cr;
1445         error = VOP_WRITE(vp, &uio, ioflag, cr, &ct);
1446         curthread->t_cred = savecred;
1447 
1448         if (iovp != iov)
1449                 kmem_free(iovp, sizeof (*iovp) * iovcnt);
1450 
1451         /* check if a monitor detected a delegation conflict */
1452         if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
1453                 resp->status = NFS3ERR_JUKEBOX;
1454                 goto err1;
1455         }
1456 
1457         ava.va_mask = AT_ALL;
1458         avap = VOP_GETATTR(vp, &ava, 0, cr, &ct) ? NULL : &ava;
1459 
1460         if (error)
1461                 goto err;
1462 
1463         /*
1464          * If we were unable to get the V_WRITELOCK_TRUE, then we
1465          * may not have accurate after attrs, so check if
1466          * we have both attributes, they have a non-zero va_seq, and
1467          * va_seq has changed by exactly one,
1468          * if not, turn off the before attr.
1469          */
1470         if (rwlock_ret != V_WRITELOCK_TRUE) {
1471                 if (bvap == NULL || avap == NULL ||
1472                     bvap->va_seq == 0 || avap->va_seq == 0 ||
1473                     avap->va_seq != (bvap->va_seq + 1)) {
1474                         bvap = NULL;
1475                 }
1476         }
1477 
1478         resp->status = NFS3_OK;
1479         vattr_to_wcc_data(bvap, avap, &resp->resok.file_wcc);
1480         resp->resok.count = args->count - uio.uio_resid;
1481         resp->resok.committed = args->stable;
1482         resp->resok.verf = write3verf;
1483         goto out;
1484 
1485 err:
1486         if (curthread->t_flag & T_WOULDBLOCK) {
1487                 curthread->t_flag &= ~T_WOULDBLOCK;
1488                 resp->status = NFS3ERR_JUKEBOX;
1489         } else
1490                 resp->status = puterrno3(error);
1491 err1:
1492         vattr_to_wcc_data(bvap, avap, &resp->resfail.file_wcc);
1493 out:
1494         DTRACE_NFSV3_4(op__write__done, struct svc_req *, req,
1495             cred_t *, cr, vnode_t *, vp, WRITE3res *, resp);
1496 
1497         if (vp != NULL) {
1498                 if (rwlock_ret != -1)
1499                         VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, &ct);
1500                 if (in_crit)
1501                         nbl_end_crit(vp);
1502                 VN_RELE(vp);
1503         }
1504 }
1505 
1506 void *
1507 rfs3_write_getfh(WRITE3args *args)
1508 {
1509 
1510         return (&args->file);
1511 }
1512 
1513 void
1514 rfs3_create(CREATE3args *args, CREATE3res *resp, struct exportinfo *exi,
1515         struct svc_req *req, cred_t *cr)
1516 {
1517         int error;
1518         int in_crit = 0;
1519         vnode_t *vp;
1520         vnode_t *tvp = NULL;
1521         vnode_t *dvp;
1522         struct vattr *vap;
1523         struct vattr va;
1524         struct vattr *dbvap;
1525         struct vattr dbva;
1526         struct vattr *davap;
1527         struct vattr dava;
1528         enum vcexcl excl;
1529         nfstime3 *mtime;
1530         len_t reqsize;
1531         bool_t trunc;
1532         struct sockaddr *ca;
1533         char *name = NULL;
1534 
1535         dbvap = NULL;
1536         davap = NULL;
1537 
1538         dvp = nfs3_fhtovp(&args->where.dir, exi);
1539 
1540         DTRACE_NFSV3_4(op__create__start, struct svc_req *, req,
1541             cred_t *, cr, vnode_t *, dvp, CREATE3args *, args);
1542 
1543         if (dvp == NULL) {
1544                 error = ESTALE;
1545                 goto out;
1546         }
1547 
1548         dbva.va_mask = AT_ALL;
1549         dbvap = VOP_GETATTR(dvp, &dbva, 0, cr, NULL) ? NULL : &dbva;
1550         davap = dbvap;
1551 
1552         if (args->where.name == nfs3nametoolong) {
1553                 resp->status = NFS3ERR_NAMETOOLONG;
1554                 goto out1;
1555         }
1556 
1557         if (args->where.name == NULL || *(args->where.name) == '\0') {
1558                 resp->status = NFS3ERR_ACCES;
1559                 goto out1;
1560         }
1561 
1562         if (rdonly(exi, req)) {
1563                 resp->status = NFS3ERR_ROFS;
1564                 goto out1;
1565         }
1566 
1567         if (is_system_labeled()) {
1568                 bslabel_t *clabel = req->rq_label;
1569 
1570                 ASSERT(clabel != NULL);
1571                 DTRACE_PROBE2(tx__rfs3__log__info__opcreate__clabel, char *,
1572                     "got client label from request(1)", struct svc_req *, req);
1573 
1574                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
1575                         if (!do_rfs_label_check(clabel, dvp, EQUALITY_CHECK,
1576                             exi)) {
1577                                 resp->status = NFS3ERR_ACCES;
1578                                 goto out1;
1579                         }
1580                 }
1581         }
1582 
1583         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
1584         name = nfscmd_convname(ca, exi, args->where.name,
1585             NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
1586 
1587         if (name == NULL) {
1588                 /* This is really a Solaris EILSEQ */
1589                 resp->status = NFS3ERR_INVAL;
1590                 goto out1;
1591         }
1592 
1593         if (args->how.mode == EXCLUSIVE) {
1594                 va.va_mask = AT_TYPE | AT_MODE | AT_MTIME;
1595                 va.va_type = VREG;
1596                 va.va_mode = (mode_t)0;
1597                 /*
1598                  * Ensure no time overflows and that types match
1599                  */
1600                 mtime = (nfstime3 *)&args->how.createhow3_u.verf;
1601                 va.va_mtime.tv_sec = mtime->seconds % INT32_MAX;
1602                 va.va_mtime.tv_nsec = mtime->nseconds;
1603                 excl = EXCL;
1604         } else {
1605                 error = sattr3_to_vattr(&args->how.createhow3_u.obj_attributes,
1606                     &va);
1607                 if (error)
1608                         goto out;
1609                 va.va_mask |= AT_TYPE;
1610                 va.va_type = VREG;
1611                 if (args->how.mode == GUARDED)
1612                         excl = EXCL;
1613                 else {
1614                         excl = NONEXCL;
1615 
1616                         /*
1617                          * During creation of file in non-exclusive mode
1618                          * if size of file is being set then make sure
1619                          * that if the file already exists that no conflicting
1620                          * non-blocking mandatory locks exists in the region
1621                          * being modified. If there are conflicting locks fail
1622                          * the operation with EACCES.
1623                          */
1624                         if (va.va_mask & AT_SIZE) {
1625                                 struct vattr tva;
1626 
1627                                 /*
1628                                  * Does file already exist?
1629                                  */
1630                                 error = VOP_LOOKUP(dvp, name, &tvp,
1631                                     NULL, 0, NULL, cr, NULL, NULL, NULL);
1632 
1633                                 /*
1634                                  * Check to see if the file has been delegated
1635                                  * to a v4 client.  If so, then begin recall of
1636                                  * the delegation and return JUKEBOX to allow
1637                                  * the client to retrasmit its request.
1638                                  */
1639 
1640                                 trunc = va.va_size == 0;
1641                                 if (!error &&
1642                                     rfs4_check_delegated(FWRITE, tvp, trunc)) {
1643                                         resp->status = NFS3ERR_JUKEBOX;
1644                                         goto out1;
1645                                 }
1646 
1647                                 /*
1648                                  * Check for NBMAND lock conflicts
1649                                  */
1650                                 if (!error && nbl_need_check(tvp)) {
1651                                         u_offset_t offset;
1652                                         ssize_t len;
1653 
1654                                         nbl_start_crit(tvp, RW_READER);
1655                                         in_crit = 1;
1656 
1657                                         tva.va_mask = AT_SIZE;
1658                                         error = VOP_GETATTR(tvp, &tva, 0, cr,
1659                                             NULL);
1660                                         /*
1661                                          * Can't check for conflicts, so return
1662                                          * error.
1663                                          */
1664                                         if (error)
1665                                                 goto out;
1666 
1667                                         offset = tva.va_size < va.va_size ?
1668                                             tva.va_size : va.va_size;
1669                                         len = tva.va_size < va.va_size ?
1670                                             va.va_size - tva.va_size :
1671                                             tva.va_size - va.va_size;
1672                                         if (nbl_conflict(tvp, NBL_WRITE,
1673                                             offset, len, 0, NULL)) {
1674                                                 error = EACCES;
1675                                                 goto out;
1676                                         }
1677                                 } else if (tvp) {
1678                                         VN_RELE(tvp);
1679                                         tvp = NULL;
1680                                 }
1681                         }
1682                 }
1683                 if (va.va_mask & AT_SIZE)
1684                         reqsize = va.va_size;
1685         }
1686 
1687         /*
1688          * Must specify the mode.
1689          */
1690         if (!(va.va_mask & AT_MODE)) {
1691                 resp->status = NFS3ERR_INVAL;
1692                 goto out1;
1693         }
1694 
1695         /*
1696          * If the filesystem is exported with nosuid, then mask off
1697          * the setuid and setgid bits.
1698          */
1699         if (va.va_type == VREG && (exi->exi_export.ex_flags & EX_NOSUID))
1700                 va.va_mode &= ~(VSUID | VSGID);
1701 
1702 tryagain:
1703         /*
1704          * The file open mode used is VWRITE.  If the client needs
1705          * some other semantic, then it should do the access checking
1706          * itself.  It would have been nice to have the file open mode
1707          * passed as part of the arguments.
1708          */
1709         error = VOP_CREATE(dvp, name, &va, excl, VWRITE,
1710             &vp, cr, 0, NULL, NULL);
1711 
1712         dava.va_mask = AT_ALL;
1713         davap = VOP_GETATTR(dvp, &dava, 0, cr, NULL) ? NULL : &dava;
1714 
1715         if (error) {
1716                 /*
1717                  * If we got something other than file already exists
1718                  * then just return this error.  Otherwise, we got
1719                  * EEXIST.  If we were doing a GUARDED create, then
1720                  * just return this error.  Otherwise, we need to
1721                  * make sure that this wasn't a duplicate of an
1722                  * exclusive create request.
1723                  *
1724                  * The assumption is made that a non-exclusive create
1725                  * request will never return EEXIST.
1726                  */
1727                 if (error != EEXIST || args->how.mode == GUARDED)
1728                         goto out;
1729                 /*
1730                  * Lookup the file so that we can get a vnode for it.
1731                  */
1732                 error = VOP_LOOKUP(dvp, name, &vp, NULL, 0,
1733                     NULL, cr, NULL, NULL, NULL);
1734                 if (error) {
1735                         /*
1736                          * We couldn't find the file that we thought that
1737                          * we just created.  So, we'll just try creating
1738                          * it again.
1739                          */
1740                         if (error == ENOENT)
1741                                 goto tryagain;
1742                         goto out;
1743                 }
1744 
1745                 /*
1746                  * If the file is delegated to a v4 client, go ahead
1747                  * and initiate recall, this create is a hint that a
1748                  * conflicting v3 open has occurred.
1749                  */
1750 
1751                 if (rfs4_check_delegated(FWRITE, vp, FALSE)) {
1752                         VN_RELE(vp);
1753                         resp->status = NFS3ERR_JUKEBOX;
1754                         goto out1;
1755                 }
1756 
1757                 va.va_mask = AT_ALL;
1758                 vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
1759 
1760                 mtime = (nfstime3 *)&args->how.createhow3_u.verf;
1761                 /* % with INT32_MAX to prevent overflows */
1762                 if (args->how.mode == EXCLUSIVE && (vap == NULL ||
1763                     vap->va_mtime.tv_sec !=
1764                     (mtime->seconds % INT32_MAX) ||
1765                     vap->va_mtime.tv_nsec != mtime->nseconds)) {
1766                         VN_RELE(vp);
1767                         error = EEXIST;
1768                         goto out;
1769                 }
1770         } else {
1771 
1772                 if ((args->how.mode == UNCHECKED ||
1773                     args->how.mode == GUARDED) &&
1774                     args->how.createhow3_u.obj_attributes.size.set_it &&
1775                     va.va_size == 0)
1776                         trunc = TRUE;
1777                 else
1778                         trunc = FALSE;
1779 
1780                 if (rfs4_check_delegated(FWRITE, vp, trunc)) {
1781                         VN_RELE(vp);
1782                         resp->status = NFS3ERR_JUKEBOX;
1783                         goto out1;
1784                 }
1785 
1786                 va.va_mask = AT_ALL;
1787                 vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
1788 
1789                 /*
1790                  * We need to check to make sure that the file got
1791                  * created to the indicated size.  If not, we do a
1792                  * setattr to try to change the size, but we don't
1793                  * try too hard.  This shouldn't a problem as most
1794                  * clients will only specifiy a size of zero which
1795                  * local file systems handle.  However, even if
1796                  * the client does specify a non-zero size, it can
1797                  * still recover by checking the size of the file
1798                  * after it has created it and then issue a setattr
1799                  * request of its own to set the size of the file.
1800                  */
1801                 if (vap != NULL &&
1802                     (args->how.mode == UNCHECKED ||
1803                     args->how.mode == GUARDED) &&
1804                     args->how.createhow3_u.obj_attributes.size.set_it &&
1805                     vap->va_size != reqsize) {
1806                         va.va_mask = AT_SIZE;
1807                         va.va_size = reqsize;
1808                         (void) VOP_SETATTR(vp, &va, 0, cr, NULL);
1809                         va.va_mask = AT_ALL;
1810                         vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
1811                 }
1812         }
1813 
1814         if (name != args->where.name)
1815                 kmem_free(name, MAXPATHLEN + 1);
1816 
1817         error = makefh3(&resp->resok.obj.handle, vp, exi);
1818         if (error)
1819                 resp->resok.obj.handle_follows = FALSE;
1820         else
1821                 resp->resok.obj.handle_follows = TRUE;
1822 
1823         /*
1824          * Force modified data and metadata out to stable storage.
1825          */
1826         (void) VOP_FSYNC(vp, FNODSYNC, cr, NULL);
1827         (void) VOP_FSYNC(dvp, 0, cr, NULL);
1828 
1829         VN_RELE(vp);
1830         if (tvp != NULL) {
1831                 if (in_crit)
1832                         nbl_end_crit(tvp);
1833                 VN_RELE(tvp);
1834         }
1835 
1836         resp->status = NFS3_OK;
1837         vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
1838         vattr_to_wcc_data(dbvap, davap, &resp->resok.dir_wcc);
1839 
1840         DTRACE_NFSV3_4(op__create__done, struct svc_req *, req,
1841             cred_t *, cr, vnode_t *, dvp, CREATE3res *, resp);
1842 
1843         VN_RELE(dvp);
1844         return;
1845 
1846 out:
1847         if (curthread->t_flag & T_WOULDBLOCK) {
1848                 curthread->t_flag &= ~T_WOULDBLOCK;
1849                 resp->status = NFS3ERR_JUKEBOX;
1850         } else
1851                 resp->status = puterrno3(error);
1852 out1:
1853         DTRACE_NFSV3_4(op__create__done, struct svc_req *, req,
1854             cred_t *, cr, vnode_t *, dvp, CREATE3res *, resp);
1855 
1856         if (name != NULL && name != args->where.name)
1857                 kmem_free(name, MAXPATHLEN + 1);
1858 
1859         if (tvp != NULL) {
1860                 if (in_crit)
1861                         nbl_end_crit(tvp);
1862                 VN_RELE(tvp);
1863         }
1864         if (dvp != NULL)
1865                 VN_RELE(dvp);
1866         vattr_to_wcc_data(dbvap, davap, &resp->resfail.dir_wcc);
1867 }
1868 
1869 void *
1870 rfs3_create_getfh(CREATE3args *args)
1871 {
1872 
1873         return (&args->where.dir);
1874 }
1875 
1876 void
1877 rfs3_mkdir(MKDIR3args *args, MKDIR3res *resp, struct exportinfo *exi,
1878         struct svc_req *req, cred_t *cr)
1879 {
1880         int error;
1881         vnode_t *vp = NULL;
1882         vnode_t *dvp;
1883         struct vattr *vap;
1884         struct vattr va;
1885         struct vattr *dbvap;
1886         struct vattr dbva;
1887         struct vattr *davap;
1888         struct vattr dava;
1889         struct sockaddr *ca;
1890         char *name = NULL;
1891 
1892         dbvap = NULL;
1893         davap = NULL;
1894 
1895         dvp = nfs3_fhtovp(&args->where.dir, exi);
1896 
1897         DTRACE_NFSV3_4(op__mkdir__start, struct svc_req *, req,
1898             cred_t *, cr, vnode_t *, dvp, MKDIR3args *, args);
1899 
1900         if (dvp == NULL) {
1901                 error = ESTALE;
1902                 goto out;
1903         }
1904 
1905         dbva.va_mask = AT_ALL;
1906         dbvap = VOP_GETATTR(dvp, &dbva, 0, cr, NULL) ? NULL : &dbva;
1907         davap = dbvap;
1908 
1909         if (args->where.name == nfs3nametoolong) {
1910                 resp->status = NFS3ERR_NAMETOOLONG;
1911                 goto out1;
1912         }
1913 
1914         if (args->where.name == NULL || *(args->where.name) == '\0') {
1915                 resp->status = NFS3ERR_ACCES;
1916                 goto out1;
1917         }
1918 
1919         if (rdonly(exi, req)) {
1920                 resp->status = NFS3ERR_ROFS;
1921                 goto out1;
1922         }
1923 
1924         if (is_system_labeled()) {
1925                 bslabel_t *clabel = req->rq_label;
1926 
1927                 ASSERT(clabel != NULL);
1928                 DTRACE_PROBE2(tx__rfs3__log__info__opmkdir__clabel, char *,
1929                     "got client label from request(1)", struct svc_req *, req);
1930 
1931                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
1932                         if (!do_rfs_label_check(clabel, dvp, EQUALITY_CHECK,
1933                             exi)) {
1934                                 resp->status = NFS3ERR_ACCES;
1935                                 goto out1;
1936                         }
1937                 }
1938         }
1939 
1940         error = sattr3_to_vattr(&args->attributes, &va);
1941         if (error)
1942                 goto out;
1943 
1944         if (!(va.va_mask & AT_MODE)) {
1945                 resp->status = NFS3ERR_INVAL;
1946                 goto out1;
1947         }
1948 
1949         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
1950         name = nfscmd_convname(ca, exi, args->where.name,
1951             NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
1952 
1953         if (name == NULL) {
1954                 resp->status = NFS3ERR_INVAL;
1955                 goto out1;
1956         }
1957 
1958         va.va_mask |= AT_TYPE;
1959         va.va_type = VDIR;
1960 
1961         error = VOP_MKDIR(dvp, name, &va, &vp, cr, NULL, 0, NULL);
1962 
1963         if (name != args->where.name)
1964                 kmem_free(name, MAXPATHLEN + 1);
1965 
1966         dava.va_mask = AT_ALL;
1967         davap = VOP_GETATTR(dvp, &dava, 0, cr, NULL) ? NULL : &dava;
1968 
1969         /*
1970          * Force modified data and metadata out to stable storage.
1971          */
1972         (void) VOP_FSYNC(dvp, 0, cr, NULL);
1973 
1974         if (error)
1975                 goto out;
1976 
1977         error = makefh3(&resp->resok.obj.handle, vp, exi);
1978         if (error)
1979                 resp->resok.obj.handle_follows = FALSE;
1980         else
1981                 resp->resok.obj.handle_follows = TRUE;
1982 
1983         va.va_mask = AT_ALL;
1984         vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
1985 
1986         /*
1987          * Force modified data and metadata out to stable storage.
1988          */
1989         (void) VOP_FSYNC(vp, 0, cr, NULL);
1990 
1991         VN_RELE(vp);
1992 
1993         resp->status = NFS3_OK;
1994         vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
1995         vattr_to_wcc_data(dbvap, davap, &resp->resok.dir_wcc);
1996 
1997         DTRACE_NFSV3_4(op__mkdir__done, struct svc_req *, req,
1998             cred_t *, cr, vnode_t *, dvp, MKDIR3res *, resp);
1999         VN_RELE(dvp);
2000 
2001         return;
2002 
2003 out:
2004         if (curthread->t_flag & T_WOULDBLOCK) {
2005                 curthread->t_flag &= ~T_WOULDBLOCK;
2006                 resp->status = NFS3ERR_JUKEBOX;
2007         } else
2008                 resp->status = puterrno3(error);
2009 out1:
2010         DTRACE_NFSV3_4(op__mkdir__done, struct svc_req *, req,
2011             cred_t *, cr, vnode_t *, dvp, MKDIR3res *, resp);
2012         if (dvp != NULL)
2013                 VN_RELE(dvp);
2014         vattr_to_wcc_data(dbvap, davap, &resp->resfail.dir_wcc);
2015 }
2016 
2017 void *
2018 rfs3_mkdir_getfh(MKDIR3args *args)
2019 {
2020 
2021         return (&args->where.dir);
2022 }
2023 
2024 void
2025 rfs3_symlink(SYMLINK3args *args, SYMLINK3res *resp, struct exportinfo *exi,
2026         struct svc_req *req, cred_t *cr)
2027 {
2028         int error;
2029         vnode_t *vp;
2030         vnode_t *dvp;
2031         struct vattr *vap;
2032         struct vattr va;
2033         struct vattr *dbvap;
2034         struct vattr dbva;
2035         struct vattr *davap;
2036         struct vattr dava;
2037         struct sockaddr *ca;
2038         char *name = NULL;
2039         char *symdata = NULL;
2040 
2041         dbvap = NULL;
2042         davap = NULL;
2043 
2044         dvp = nfs3_fhtovp(&args->where.dir, exi);
2045 
2046         DTRACE_NFSV3_4(op__symlink__start, struct svc_req *, req,
2047             cred_t *, cr, vnode_t *, dvp, SYMLINK3args *, args);
2048 
2049         if (dvp == NULL) {
2050                 error = ESTALE;
2051                 goto err;
2052         }
2053 
2054         dbva.va_mask = AT_ALL;
2055         dbvap = VOP_GETATTR(dvp, &dbva, 0, cr, NULL) ? NULL : &dbva;
2056         davap = dbvap;
2057 
2058         if (args->where.name == nfs3nametoolong) {
2059                 resp->status = NFS3ERR_NAMETOOLONG;
2060                 goto err1;
2061         }
2062 
2063         if (args->where.name == NULL || *(args->where.name) == '\0') {
2064                 resp->status = NFS3ERR_ACCES;
2065                 goto err1;
2066         }
2067 
2068         if (rdonly(exi, req)) {
2069                 resp->status = NFS3ERR_ROFS;
2070                 goto err1;
2071         }
2072 
2073         if (is_system_labeled()) {
2074                 bslabel_t *clabel = req->rq_label;
2075 
2076                 ASSERT(clabel != NULL);
2077                 DTRACE_PROBE2(tx__rfs3__log__info__opsymlink__clabel, char *,
2078                     "got client label from request(1)", struct svc_req *, req);
2079 
2080                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
2081                         if (!do_rfs_label_check(clabel, dvp, EQUALITY_CHECK,
2082                             exi)) {
2083                                 resp->status = NFS3ERR_ACCES;
2084                                 goto err1;
2085                         }
2086                 }
2087         }
2088 
2089         error = sattr3_to_vattr(&args->symlink.symlink_attributes, &va);
2090         if (error)
2091                 goto err;
2092 
2093         if (!(va.va_mask & AT_MODE)) {
2094                 resp->status = NFS3ERR_INVAL;
2095                 goto err1;
2096         }
2097 
2098         if (args->symlink.symlink_data == nfs3nametoolong) {
2099                 resp->status = NFS3ERR_NAMETOOLONG;
2100                 goto err1;
2101         }
2102 
2103         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2104         name = nfscmd_convname(ca, exi, args->where.name,
2105             NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
2106 
2107         if (name == NULL) {
2108                 /* This is really a Solaris EILSEQ */
2109                 resp->status = NFS3ERR_INVAL;
2110                 goto err1;
2111         }
2112 
2113         symdata = nfscmd_convname(ca, exi, args->symlink.symlink_data,
2114             NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
2115         if (symdata == NULL) {
2116                 /* This is really a Solaris EILSEQ */
2117                 resp->status = NFS3ERR_INVAL;
2118                 goto err1;
2119         }
2120 
2121 
2122         va.va_mask |= AT_TYPE;
2123         va.va_type = VLNK;
2124 
2125         error = VOP_SYMLINK(dvp, name, &va, symdata, cr, NULL, 0);
2126 
2127         dava.va_mask = AT_ALL;
2128         davap = VOP_GETATTR(dvp, &dava, 0, cr, NULL) ? NULL : &dava;
2129 
2130         if (error)
2131                 goto err;
2132 
2133         error = VOP_LOOKUP(dvp, name, &vp, NULL, 0, NULL, cr,
2134             NULL, NULL, NULL);
2135 
2136         /*
2137          * Force modified data and metadata out to stable storage.
2138          */
2139         (void) VOP_FSYNC(dvp, 0, cr, NULL);
2140 
2141 
2142         resp->status = NFS3_OK;
2143         if (error) {
2144                 resp->resok.obj.handle_follows = FALSE;
2145                 vattr_to_post_op_attr(NULL, &resp->resok.obj_attributes);
2146                 vattr_to_wcc_data(dbvap, davap, &resp->resok.dir_wcc);
2147                 goto out;
2148         }
2149 
2150         error = makefh3(&resp->resok.obj.handle, vp, exi);
2151         if (error)
2152                 resp->resok.obj.handle_follows = FALSE;
2153         else
2154                 resp->resok.obj.handle_follows = TRUE;
2155 
2156         va.va_mask = AT_ALL;
2157         vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
2158 
2159         /*
2160          * Force modified data and metadata out to stable storage.
2161          */
2162         (void) VOP_FSYNC(vp, 0, cr, NULL);
2163 
2164         VN_RELE(vp);
2165 
2166         vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
2167         vattr_to_wcc_data(dbvap, davap, &resp->resok.dir_wcc);
2168         goto out;
2169 
2170 err:
2171         if (curthread->t_flag & T_WOULDBLOCK) {
2172                 curthread->t_flag &= ~T_WOULDBLOCK;
2173                 resp->status = NFS3ERR_JUKEBOX;
2174         } else
2175                 resp->status = puterrno3(error);
2176 err1:
2177         vattr_to_wcc_data(dbvap, davap, &resp->resfail.dir_wcc);
2178 out:
2179         if (name != NULL && name != args->where.name)
2180                 kmem_free(name, MAXPATHLEN + 1);
2181         if (symdata != NULL && symdata != args->symlink.symlink_data)
2182                 kmem_free(symdata, MAXPATHLEN + 1);
2183 
2184         DTRACE_NFSV3_4(op__symlink__done, struct svc_req *, req,
2185             cred_t *, cr, vnode_t *, dvp, SYMLINK3res *, resp);
2186 
2187         if (dvp != NULL)
2188                 VN_RELE(dvp);
2189 }
2190 
2191 void *
2192 rfs3_symlink_getfh(SYMLINK3args *args)
2193 {
2194 
2195         return (&args->where.dir);
2196 }
2197 
2198 void
2199 rfs3_mknod(MKNOD3args *args, MKNOD3res *resp, struct exportinfo *exi,
2200         struct svc_req *req, cred_t *cr)
2201 {
2202         int error;
2203         vnode_t *vp;
2204         vnode_t *realvp;
2205         vnode_t *dvp;
2206         struct vattr *vap;
2207         struct vattr va;
2208         struct vattr *dbvap;
2209         struct vattr dbva;
2210         struct vattr *davap;
2211         struct vattr dava;
2212         int mode;
2213         enum vcexcl excl;
2214         struct sockaddr *ca;
2215         char *name = NULL;
2216 
2217         dbvap = NULL;
2218         davap = NULL;
2219 
2220         dvp = nfs3_fhtovp(&args->where.dir, exi);
2221 
2222         DTRACE_NFSV3_4(op__mknod__start, struct svc_req *, req,
2223             cred_t *, cr, vnode_t *, dvp, MKNOD3args *, args);
2224 
2225         if (dvp == NULL) {
2226                 error = ESTALE;
2227                 goto out;
2228         }
2229 
2230         dbva.va_mask = AT_ALL;
2231         dbvap = VOP_GETATTR(dvp, &dbva, 0, cr, NULL) ? NULL : &dbva;
2232         davap = dbvap;
2233 
2234         if (args->where.name == nfs3nametoolong) {
2235                 resp->status = NFS3ERR_NAMETOOLONG;
2236                 goto out1;
2237         }
2238 
2239         if (args->where.name == NULL || *(args->where.name) == '\0') {
2240                 resp->status = NFS3ERR_ACCES;
2241                 goto out1;
2242         }
2243 
2244         if (rdonly(exi, req)) {
2245                 resp->status = NFS3ERR_ROFS;
2246                 goto out1;
2247         }
2248 
2249         if (is_system_labeled()) {
2250                 bslabel_t *clabel = req->rq_label;
2251 
2252                 ASSERT(clabel != NULL);
2253                 DTRACE_PROBE2(tx__rfs3__log__info__opmknod__clabel, char *,
2254                     "got client label from request(1)", struct svc_req *, req);
2255 
2256                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
2257                         if (!do_rfs_label_check(clabel, dvp, EQUALITY_CHECK,
2258                             exi)) {
2259                                 resp->status = NFS3ERR_ACCES;
2260                                 goto out1;
2261                         }
2262                 }
2263         }
2264 
2265         switch (args->what.type) {
2266         case NF3CHR:
2267         case NF3BLK:
2268                 error = sattr3_to_vattr(
2269                     &args->what.mknoddata3_u.device.dev_attributes, &va);
2270                 if (error)
2271                         goto out;
2272                 if (secpolicy_sys_devices(cr) != 0) {
2273                         resp->status = NFS3ERR_PERM;
2274                         goto out1;
2275                 }
2276                 if (args->what.type == NF3CHR)
2277                         va.va_type = VCHR;
2278                 else
2279                         va.va_type = VBLK;
2280                 va.va_rdev = makedevice(
2281                     args->what.mknoddata3_u.device.spec.specdata1,
2282                     args->what.mknoddata3_u.device.spec.specdata2);
2283                 va.va_mask |= AT_TYPE | AT_RDEV;
2284                 break;
2285         case NF3SOCK:
2286                 error = sattr3_to_vattr(
2287                     &args->what.mknoddata3_u.pipe_attributes, &va);
2288                 if (error)
2289                         goto out;
2290                 va.va_type = VSOCK;
2291                 va.va_mask |= AT_TYPE;
2292                 break;
2293         case NF3FIFO:
2294                 error = sattr3_to_vattr(
2295                     &args->what.mknoddata3_u.pipe_attributes, &va);
2296                 if (error)
2297                         goto out;
2298                 va.va_type = VFIFO;
2299                 va.va_mask |= AT_TYPE;
2300                 break;
2301         default:
2302                 resp->status = NFS3ERR_BADTYPE;
2303                 goto out1;
2304         }
2305 
2306         /*
2307          * Must specify the mode.
2308          */
2309         if (!(va.va_mask & AT_MODE)) {
2310                 resp->status = NFS3ERR_INVAL;
2311                 goto out1;
2312         }
2313 
2314         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2315         name = nfscmd_convname(ca, exi, args->where.name,
2316             NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
2317 
2318         if (name == NULL) {
2319                 resp->status = NFS3ERR_INVAL;
2320                 goto out1;
2321         }
2322 
2323         excl = EXCL;
2324 
2325         mode = 0;
2326 
2327         error = VOP_CREATE(dvp, name, &va, excl, mode,
2328             &vp, cr, 0, NULL, NULL);
2329 
2330         if (name != args->where.name)
2331                 kmem_free(name, MAXPATHLEN + 1);
2332 
2333         dava.va_mask = AT_ALL;
2334         davap = VOP_GETATTR(dvp, &dava, 0, cr, NULL) ? NULL : &dava;
2335 
2336         /*
2337          * Force modified data and metadata out to stable storage.
2338          */
2339         (void) VOP_FSYNC(dvp, 0, cr, NULL);
2340 
2341         if (error)
2342                 goto out;
2343 
2344         resp->status = NFS3_OK;
2345 
2346         error = makefh3(&resp->resok.obj.handle, vp, exi);
2347         if (error)
2348                 resp->resok.obj.handle_follows = FALSE;
2349         else
2350                 resp->resok.obj.handle_follows = TRUE;
2351 
2352         va.va_mask = AT_ALL;
2353         vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
2354 
2355         /*
2356          * Force modified metadata out to stable storage.
2357          *
2358          * if a underlying vp exists, pass it to VOP_FSYNC
2359          */
2360         if (VOP_REALVP(vp, &realvp, NULL) == 0)
2361                 (void) VOP_FSYNC(realvp, FNODSYNC, cr, NULL);
2362         else
2363                 (void) VOP_FSYNC(vp, FNODSYNC, cr, NULL);
2364 
2365         VN_RELE(vp);
2366 
2367         vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
2368         vattr_to_wcc_data(dbvap, davap, &resp->resok.dir_wcc);
2369         DTRACE_NFSV3_4(op__mknod__done, struct svc_req *, req,
2370             cred_t *, cr, vnode_t *, dvp, MKNOD3res *, resp);
2371         VN_RELE(dvp);
2372         return;
2373 
2374 out:
2375         if (curthread->t_flag & T_WOULDBLOCK) {
2376                 curthread->t_flag &= ~T_WOULDBLOCK;
2377                 resp->status = NFS3ERR_JUKEBOX;
2378         } else
2379                 resp->status = puterrno3(error);
2380 out1:
2381         DTRACE_NFSV3_4(op__mknod__done, struct svc_req *, req,
2382             cred_t *, cr, vnode_t *, dvp, MKNOD3res *, resp);
2383         if (dvp != NULL)
2384                 VN_RELE(dvp);
2385         vattr_to_wcc_data(dbvap, davap, &resp->resfail.dir_wcc);
2386 }
2387 
2388 void *
2389 rfs3_mknod_getfh(MKNOD3args *args)
2390 {
2391 
2392         return (&args->where.dir);
2393 }
2394 
2395 void
2396 rfs3_remove(REMOVE3args *args, REMOVE3res *resp, struct exportinfo *exi,
2397         struct svc_req *req, cred_t *cr)
2398 {
2399         int error = 0;
2400         vnode_t *vp;
2401         struct vattr *bvap;
2402         struct vattr bva;
2403         struct vattr *avap;
2404         struct vattr ava;
2405         vnode_t *targvp = NULL;
2406         struct sockaddr *ca;
2407         char *name = NULL;
2408 
2409         bvap = NULL;
2410         avap = NULL;
2411 
2412         vp = nfs3_fhtovp(&args->object.dir, exi);
2413 
2414         DTRACE_NFSV3_4(op__remove__start, struct svc_req *, req,
2415             cred_t *, cr, vnode_t *, vp, REMOVE3args *, args);
2416 
2417         if (vp == NULL) {
2418                 error = ESTALE;
2419                 goto err;
2420         }
2421 
2422         bva.va_mask = AT_ALL;
2423         bvap = VOP_GETATTR(vp, &bva, 0, cr, NULL) ? NULL : &bva;
2424         avap = bvap;
2425 
2426         if (vp->v_type != VDIR) {
2427                 resp->status = NFS3ERR_NOTDIR;
2428                 goto err1;
2429         }
2430 
2431         if (args->object.name == nfs3nametoolong) {
2432                 resp->status = NFS3ERR_NAMETOOLONG;
2433                 goto err1;
2434         }
2435 
2436         if (args->object.name == NULL || *(args->object.name) == '\0') {
2437                 resp->status = NFS3ERR_ACCES;
2438                 goto err1;
2439         }
2440 
2441         if (rdonly(exi, req)) {
2442                 resp->status = NFS3ERR_ROFS;
2443                 goto err1;
2444         }
2445 
2446         if (is_system_labeled()) {
2447                 bslabel_t *clabel = req->rq_label;
2448 
2449                 ASSERT(clabel != NULL);
2450                 DTRACE_PROBE2(tx__rfs3__log__info__opremove__clabel, char *,
2451                     "got client label from request(1)", struct svc_req *, req);
2452 
2453                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
2454                         if (!do_rfs_label_check(clabel, vp, EQUALITY_CHECK,
2455                             exi)) {
2456                                 resp->status = NFS3ERR_ACCES;
2457                                 goto err1;
2458                         }
2459                 }
2460         }
2461 
2462         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2463         name = nfscmd_convname(ca, exi, args->object.name,
2464             NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
2465 
2466         if (name == NULL) {
2467                 resp->status = NFS3ERR_INVAL;
2468                 goto err1;
2469         }
2470 
2471         /*
2472          * Check for a conflict with a non-blocking mandatory share
2473          * reservation and V4 delegations
2474          */
2475         error = VOP_LOOKUP(vp, name, &targvp, NULL, 0,
2476             NULL, cr, NULL, NULL, NULL);
2477         if (error != 0)
2478                 goto err;
2479 
2480         if (rfs4_check_delegated(FWRITE, targvp, TRUE)) {
2481                 resp->status = NFS3ERR_JUKEBOX;
2482                 goto err1;
2483         }
2484 
2485         if (!nbl_need_check(targvp)) {
2486                 error = VOP_REMOVE(vp, name, cr, NULL, 0);
2487         } else {
2488                 nbl_start_crit(targvp, RW_READER);
2489                 if (nbl_conflict(targvp, NBL_REMOVE, 0, 0, 0, NULL)) {
2490                         error = EACCES;
2491                 } else {
2492                         error = VOP_REMOVE(vp, name, cr, NULL, 0);
2493                 }
2494                 nbl_end_crit(targvp);
2495         }
2496         VN_RELE(targvp);
2497         targvp = NULL;
2498 
2499         ava.va_mask = AT_ALL;
2500         avap = VOP_GETATTR(vp, &ava, 0, cr, NULL) ? NULL : &ava;
2501 
2502         /*
2503          * Force modified data and metadata out to stable storage.
2504          */
2505         (void) VOP_FSYNC(vp, 0, cr, NULL);
2506 
2507         if (error)
2508                 goto err;
2509 
2510         resp->status = NFS3_OK;
2511         vattr_to_wcc_data(bvap, avap, &resp->resok.dir_wcc);
2512         goto out;
2513 
2514 err:
2515         if (curthread->t_flag & T_WOULDBLOCK) {
2516                 curthread->t_flag &= ~T_WOULDBLOCK;
2517                 resp->status = NFS3ERR_JUKEBOX;
2518         } else
2519                 resp->status = puterrno3(error);
2520 err1:
2521         vattr_to_wcc_data(bvap, avap, &resp->resfail.dir_wcc);
2522 out:
2523         DTRACE_NFSV3_4(op__remove__done, struct svc_req *, req,
2524             cred_t *, cr, vnode_t *, vp, REMOVE3res *, resp);
2525 
2526         if (name != NULL && name != args->object.name)
2527                 kmem_free(name, MAXPATHLEN + 1);
2528 
2529         if (vp != NULL)
2530                 VN_RELE(vp);
2531 }
2532 
2533 void *
2534 rfs3_remove_getfh(REMOVE3args *args)
2535 {
2536 
2537         return (&args->object.dir);
2538 }
2539 
2540 void
2541 rfs3_rmdir(RMDIR3args *args, RMDIR3res *resp, struct exportinfo *exi,
2542         struct svc_req *req, cred_t *cr)
2543 {
2544         int error;
2545         vnode_t *vp;
2546         struct vattr *bvap;
2547         struct vattr bva;
2548         struct vattr *avap;
2549         struct vattr ava;
2550         struct sockaddr *ca;
2551         char *name = NULL;
2552 
2553         bvap = NULL;
2554         avap = NULL;
2555 
2556         vp = nfs3_fhtovp(&args->object.dir, exi);
2557 
2558         DTRACE_NFSV3_4(op__rmdir__start, struct svc_req *, req,
2559             cred_t *, cr, vnode_t *, vp, RMDIR3args *, args);
2560 
2561         if (vp == NULL) {
2562                 error = ESTALE;
2563                 goto err;
2564         }
2565 
2566         bva.va_mask = AT_ALL;
2567         bvap = VOP_GETATTR(vp, &bva, 0, cr, NULL) ? NULL : &bva;
2568         avap = bvap;
2569 
2570         if (vp->v_type != VDIR) {
2571                 resp->status = NFS3ERR_NOTDIR;
2572                 goto err1;
2573         }
2574 
2575         if (args->object.name == nfs3nametoolong) {
2576                 resp->status = NFS3ERR_NAMETOOLONG;
2577                 goto err1;
2578         }
2579 
2580         if (args->object.name == NULL || *(args->object.name) == '\0') {
2581                 resp->status = NFS3ERR_ACCES;
2582                 goto err1;
2583         }
2584 
2585         if (rdonly(exi, req)) {
2586                 resp->status = NFS3ERR_ROFS;
2587                 goto err1;
2588         }
2589 
2590         if (is_system_labeled()) {
2591                 bslabel_t *clabel = req->rq_label;
2592 
2593                 ASSERT(clabel != NULL);
2594                 DTRACE_PROBE2(tx__rfs3__log__info__opremovedir__clabel, char *,
2595                     "got client label from request(1)", struct svc_req *, req);
2596 
2597                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
2598                         if (!do_rfs_label_check(clabel, vp, EQUALITY_CHECK,
2599                             exi)) {
2600                                 resp->status = NFS3ERR_ACCES;
2601                                 goto err1;
2602                         }
2603                 }
2604         }
2605 
2606         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2607         name = nfscmd_convname(ca, exi, args->object.name,
2608             NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
2609 
2610         if (name == NULL) {
2611                 resp->status = NFS3ERR_INVAL;
2612                 goto err1;
2613         }
2614 
2615         error = VOP_RMDIR(vp, name, rootdir, cr, NULL, 0);
2616 
2617         if (name != args->object.name)
2618                 kmem_free(name, MAXPATHLEN + 1);
2619 
2620         ava.va_mask = AT_ALL;
2621         avap = VOP_GETATTR(vp, &ava, 0, cr, NULL) ? NULL : &ava;
2622 
2623         /*
2624          * Force modified data and metadata out to stable storage.
2625          */
2626         (void) VOP_FSYNC(vp, 0, cr, NULL);
2627 
2628         if (error) {
2629                 /*
2630                  * System V defines rmdir to return EEXIST, not ENOTEMPTY,
2631                  * if the directory is not empty.  A System V NFS server
2632                  * needs to map NFS3ERR_EXIST to NFS3ERR_NOTEMPTY to transmit
2633                  * over the wire.
2634                  */
2635                 if (error == EEXIST)
2636                         error = ENOTEMPTY;
2637                 goto err;
2638         }
2639 
2640         resp->status = NFS3_OK;
2641         vattr_to_wcc_data(bvap, avap, &resp->resok.dir_wcc);
2642         goto out;
2643 
2644 err:
2645         if (curthread->t_flag & T_WOULDBLOCK) {
2646                 curthread->t_flag &= ~T_WOULDBLOCK;
2647                 resp->status = NFS3ERR_JUKEBOX;
2648         } else
2649                 resp->status = puterrno3(error);
2650 err1:
2651         vattr_to_wcc_data(bvap, avap, &resp->resfail.dir_wcc);
2652 out:
2653         DTRACE_NFSV3_4(op__rmdir__done, struct svc_req *, req,
2654             cred_t *, cr, vnode_t *, vp, RMDIR3res *, resp);
2655         if (vp != NULL)
2656                 VN_RELE(vp);
2657 
2658 }
2659 
2660 void *
2661 rfs3_rmdir_getfh(RMDIR3args *args)
2662 {
2663 
2664         return (&args->object.dir);
2665 }
2666 
2667 void
2668 rfs3_rename(RENAME3args *args, RENAME3res *resp, struct exportinfo *exi,
2669         struct svc_req *req, cred_t *cr)
2670 {
2671         int error = 0;
2672         vnode_t *fvp;
2673         vnode_t *tvp;
2674         vnode_t *targvp;
2675         struct vattr *fbvap;
2676         struct vattr fbva;
2677         struct vattr *favap;
2678         struct vattr fava;
2679         struct vattr *tbvap;
2680         struct vattr tbva;
2681         struct vattr *tavap;
2682         struct vattr tava;
2683         nfs_fh3 *fh3;
2684         struct exportinfo *to_exi;
2685         vnode_t *srcvp = NULL;
2686         bslabel_t *clabel;
2687         struct sockaddr *ca;
2688         char *name = NULL;
2689         char *toname = NULL;
2690 
2691         fbvap = NULL;
2692         favap = NULL;
2693         tbvap = NULL;
2694         tavap = NULL;
2695         tvp = NULL;
2696 
2697         fvp = nfs3_fhtovp(&args->from.dir, exi);
2698 
2699         DTRACE_NFSV3_4(op__rename__start, struct svc_req *, req,
2700             cred_t *, cr, vnode_t *, fvp, RENAME3args *, args);
2701 
2702         if (fvp == NULL) {
2703                 error = ESTALE;
2704                 goto err;
2705         }
2706 
2707         if (is_system_labeled()) {
2708                 clabel = req->rq_label;
2709                 ASSERT(clabel != NULL);
2710                 DTRACE_PROBE2(tx__rfs3__log__info__oprename__clabel, char *,
2711                     "got client label from request(1)", struct svc_req *, req);
2712 
2713                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
2714                         if (!do_rfs_label_check(clabel, fvp, EQUALITY_CHECK,
2715                             exi)) {
2716                                 resp->status = NFS3ERR_ACCES;
2717                                 goto err1;
2718                         }
2719                 }
2720         }
2721 
2722         fbva.va_mask = AT_ALL;
2723         fbvap = VOP_GETATTR(fvp, &fbva, 0, cr, NULL) ? NULL : &fbva;
2724         favap = fbvap;
2725 
2726         fh3 = &args->to.dir;
2727         to_exi = checkexport(&fh3->fh3_fsid, FH3TOXFIDP(fh3));
2728         if (to_exi == NULL) {
2729                 resp->status = NFS3ERR_ACCES;
2730                 goto err1;
2731         }
2732         exi_rele(to_exi);
2733 
2734         if (to_exi != exi) {
2735                 resp->status = NFS3ERR_XDEV;
2736                 goto err1;
2737         }
2738 
2739         tvp = nfs3_fhtovp(&args->to.dir, exi);
2740         if (tvp == NULL) {
2741                 error = ESTALE;
2742                 goto err;
2743         }
2744 
2745         tbva.va_mask = AT_ALL;
2746         tbvap = VOP_GETATTR(tvp, &tbva, 0, cr, NULL) ? NULL : &tbva;
2747         tavap = tbvap;
2748 
2749         if (fvp->v_type != VDIR || tvp->v_type != VDIR) {
2750                 resp->status = NFS3ERR_NOTDIR;
2751                 goto err1;
2752         }
2753 
2754         if (args->from.name == nfs3nametoolong ||
2755             args->to.name == nfs3nametoolong) {
2756                 resp->status = NFS3ERR_NAMETOOLONG;
2757                 goto err1;
2758         }
2759         if (args->from.name == NULL || *(args->from.name) == '\0' ||
2760             args->to.name == NULL || *(args->to.name) == '\0') {
2761                 resp->status = NFS3ERR_ACCES;
2762                 goto err1;
2763         }
2764 
2765         if (rdonly(exi, req)) {
2766                 resp->status = NFS3ERR_ROFS;
2767                 goto err1;
2768         }
2769 
2770         if (is_system_labeled()) {
2771                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
2772                         if (!do_rfs_label_check(clabel, tvp, EQUALITY_CHECK,
2773                             exi)) {
2774                                 resp->status = NFS3ERR_ACCES;
2775                                 goto err1;
2776                         }
2777                 }
2778         }
2779 
2780         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2781         name = nfscmd_convname(ca, exi, args->from.name,
2782             NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
2783 
2784         if (name == NULL) {
2785                 resp->status = NFS3ERR_INVAL;
2786                 goto err1;
2787         }
2788 
2789         toname = nfscmd_convname(ca, exi, args->to.name,
2790             NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
2791 
2792         if (toname == NULL) {
2793                 resp->status = NFS3ERR_INVAL;
2794                 goto err1;
2795         }
2796 
2797         /*
2798          * Check for a conflict with a non-blocking mandatory share
2799          * reservation or V4 delegations.
2800          */
2801         error = VOP_LOOKUP(fvp, name, &srcvp, NULL, 0,
2802             NULL, cr, NULL, NULL, NULL);
2803         if (error != 0)
2804                 goto err;
2805 
2806         /*
2807          * If we rename a delegated file we should recall the
2808          * delegation, since future opens should fail or would
2809          * refer to a new file.
2810          */
2811         if (rfs4_check_delegated(FWRITE, srcvp, FALSE)) {
2812                 resp->status = NFS3ERR_JUKEBOX;
2813                 goto err1;
2814         }
2815 
2816         /*
2817          * Check for renaming over a delegated file.  Check rfs4_deleg_policy
2818          * first to avoid VOP_LOOKUP if possible.
2819          */
2820         if (rfs4_deleg_policy != SRV_NEVER_DELEGATE &&
2821             VOP_LOOKUP(tvp, toname, &targvp, NULL, 0, NULL, cr,
2822             NULL, NULL, NULL) == 0) {
2823 
2824                 if (rfs4_check_delegated(FWRITE, targvp, TRUE)) {
2825                         VN_RELE(targvp);
2826                         resp->status = NFS3ERR_JUKEBOX;
2827                         goto err1;
2828                 }
2829                 VN_RELE(targvp);
2830         }
2831 
2832         if (!nbl_need_check(srcvp)) {
2833                 error = VOP_RENAME(fvp, name, tvp, toname, cr, NULL, 0);
2834         } else {
2835                 nbl_start_crit(srcvp, RW_READER);
2836                 if (nbl_conflict(srcvp, NBL_RENAME, 0, 0, 0, NULL))
2837                         error = EACCES;
2838                 else
2839                         error = VOP_RENAME(fvp, name, tvp, toname, cr, NULL, 0);
2840                 nbl_end_crit(srcvp);
2841         }
2842         if (error == 0)
2843                 vn_renamepath(tvp, srcvp, args->to.name,
2844                     strlen(args->to.name));
2845         VN_RELE(srcvp);
2846         srcvp = NULL;
2847 
2848         fava.va_mask = AT_ALL;
2849         favap = VOP_GETATTR(fvp, &fava, 0, cr, NULL) ? NULL : &fava;
2850         tava.va_mask = AT_ALL;
2851         tavap = VOP_GETATTR(tvp, &tava, 0, cr, NULL) ? NULL : &tava;
2852 
2853         /*
2854          * Force modified data and metadata out to stable storage.
2855          */
2856         (void) VOP_FSYNC(fvp, 0, cr, NULL);
2857         (void) VOP_FSYNC(tvp, 0, cr, NULL);
2858 
2859         if (error)
2860                 goto err;
2861 
2862         resp->status = NFS3_OK;
2863         vattr_to_wcc_data(fbvap, favap, &resp->resok.fromdir_wcc);
2864         vattr_to_wcc_data(tbvap, tavap, &resp->resok.todir_wcc);
2865         goto out;
2866 
2867 err:
2868         if (curthread->t_flag & T_WOULDBLOCK) {
2869                 curthread->t_flag &= ~T_WOULDBLOCK;
2870                 resp->status = NFS3ERR_JUKEBOX;
2871         } else {
2872                 resp->status = puterrno3(error);
2873         }
2874 err1:
2875         vattr_to_wcc_data(fbvap, favap, &resp->resfail.fromdir_wcc);
2876         vattr_to_wcc_data(tbvap, tavap, &resp->resfail.todir_wcc);
2877 
2878 out:
2879         if (name != NULL && name != args->from.name)
2880                 kmem_free(name, MAXPATHLEN + 1);
2881         if (toname != NULL && toname != args->to.name)
2882                 kmem_free(toname, MAXPATHLEN + 1);
2883 
2884         DTRACE_NFSV3_4(op__rename__done, struct svc_req *, req,
2885             cred_t *, cr, vnode_t *, fvp, RENAME3res *, resp);
2886         if (fvp != NULL)
2887                 VN_RELE(fvp);
2888         if (tvp != NULL)
2889                 VN_RELE(tvp);
2890 }
2891 
2892 void *
2893 rfs3_rename_getfh(RENAME3args *args)
2894 {
2895 
2896         return (&args->from.dir);
2897 }
2898 
2899 void
2900 rfs3_link(LINK3args *args, LINK3res *resp, struct exportinfo *exi,
2901         struct svc_req *req, cred_t *cr)
2902 {
2903         int error;
2904         vnode_t *vp;
2905         vnode_t *dvp;
2906         struct vattr *vap;
2907         struct vattr va;
2908         struct vattr *bvap;
2909         struct vattr bva;
2910         struct vattr *avap;
2911         struct vattr ava;
2912         nfs_fh3 *fh3;
2913         struct exportinfo *to_exi;
2914         bslabel_t *clabel;
2915         struct sockaddr *ca;
2916         char *name = NULL;
2917 
2918         vap = NULL;
2919         bvap = NULL;
2920         avap = NULL;
2921         dvp = NULL;
2922 
2923         vp = nfs3_fhtovp(&args->file, exi);
2924 
2925         DTRACE_NFSV3_4(op__link__start, struct svc_req *, req,
2926             cred_t *, cr, vnode_t *, vp, LINK3args *, args);
2927 
2928         if (vp == NULL) {
2929                 error = ESTALE;
2930                 goto out;
2931         }
2932 
2933         va.va_mask = AT_ALL;
2934         vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
2935 
2936         fh3 = &args->link.dir;
2937         to_exi = checkexport(&fh3->fh3_fsid, FH3TOXFIDP(fh3));
2938         if (to_exi == NULL) {
2939                 resp->status = NFS3ERR_ACCES;
2940                 goto out1;
2941         }
2942         exi_rele(to_exi);
2943 
2944         if (to_exi != exi) {
2945                 resp->status = NFS3ERR_XDEV;
2946                 goto out1;
2947         }
2948 
2949         if (is_system_labeled()) {
2950                 clabel = req->rq_label;
2951 
2952                 ASSERT(clabel != NULL);
2953                 DTRACE_PROBE2(tx__rfs3__log__info__oplink__clabel, char *,
2954                     "got client label from request(1)", struct svc_req *, req);
2955 
2956                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
2957                         if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
2958                             exi)) {
2959                                 resp->status = NFS3ERR_ACCES;
2960                                 goto out1;
2961                         }
2962                 }
2963         }
2964 
2965         dvp = nfs3_fhtovp(&args->link.dir, exi);
2966         if (dvp == NULL) {
2967                 error = ESTALE;
2968                 goto out;
2969         }
2970 
2971         bva.va_mask = AT_ALL;
2972         bvap = VOP_GETATTR(dvp, &bva, 0, cr, NULL) ? NULL : &bva;
2973 
2974         if (dvp->v_type != VDIR) {
2975                 resp->status = NFS3ERR_NOTDIR;
2976                 goto out1;
2977         }
2978 
2979         if (args->link.name == nfs3nametoolong) {
2980                 resp->status = NFS3ERR_NAMETOOLONG;
2981                 goto out1;
2982         }
2983 
2984         if (args->link.name == NULL || *(args->link.name) == '\0') {
2985                 resp->status = NFS3ERR_ACCES;
2986                 goto out1;
2987         }
2988 
2989         if (rdonly(exi, req)) {
2990                 resp->status = NFS3ERR_ROFS;
2991                 goto out1;
2992         }
2993 
2994         if (is_system_labeled()) {
2995                 DTRACE_PROBE2(tx__rfs3__log__info__oplinkdir__clabel, char *,
2996                     "got client label from request(1)", struct svc_req *, req);
2997 
2998                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
2999                         if (!do_rfs_label_check(clabel, dvp, EQUALITY_CHECK,
3000                             exi)) {
3001                                 resp->status = NFS3ERR_ACCES;
3002                                 goto out1;
3003                         }
3004                 }
3005         }
3006 
3007         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
3008         name = nfscmd_convname(ca, exi, args->link.name,
3009             NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
3010 
3011         if (name == NULL) {
3012                 resp->status = NFS3ERR_SERVERFAULT;
3013                 goto out1;
3014         }
3015 
3016         error = VOP_LINK(dvp, vp, name, cr, NULL, 0);
3017 
3018         va.va_mask = AT_ALL;
3019         vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3020         ava.va_mask = AT_ALL;
3021         avap = VOP_GETATTR(dvp, &ava, 0, cr, NULL) ? NULL : &ava;
3022 
3023         /*
3024          * Force modified data and metadata out to stable storage.
3025          */
3026         (void) VOP_FSYNC(vp, FNODSYNC, cr, NULL);
3027         (void) VOP_FSYNC(dvp, 0, cr, NULL);
3028 
3029         if (error)
3030                 goto out;
3031 
3032         VN_RELE(dvp);
3033 
3034         resp->status = NFS3_OK;
3035         vattr_to_post_op_attr(vap, &resp->resok.file_attributes);
3036         vattr_to_wcc_data(bvap, avap, &resp->resok.linkdir_wcc);
3037 
3038         DTRACE_NFSV3_4(op__link__done, struct svc_req *, req,
3039             cred_t *, cr, vnode_t *, vp, LINK3res *, resp);
3040 
3041         VN_RELE(vp);
3042 
3043         return;
3044 
3045 out:
3046         if (curthread->t_flag & T_WOULDBLOCK) {
3047                 curthread->t_flag &= ~T_WOULDBLOCK;
3048                 resp->status = NFS3ERR_JUKEBOX;
3049         } else
3050                 resp->status = puterrno3(error);
3051 out1:
3052         if (name != NULL && name != args->link.name)
3053                 kmem_free(name, MAXPATHLEN + 1);
3054 
3055         DTRACE_NFSV3_4(op__link__done, struct svc_req *, req,
3056             cred_t *, cr, vnode_t *, vp, LINK3res *, resp);
3057 
3058         if (vp != NULL)
3059                 VN_RELE(vp);
3060         if (dvp != NULL)
3061                 VN_RELE(dvp);
3062         vattr_to_post_op_attr(vap, &resp->resfail.file_attributes);
3063         vattr_to_wcc_data(bvap, avap, &resp->resfail.linkdir_wcc);
3064 }
3065 
3066 void *
3067 rfs3_link_getfh(LINK3args *args)
3068 {
3069 
3070         return (&args->file);
3071 }
3072 
3073 /*
3074  * This macro defines the size of a response which contains attribute
3075  * information and one directory entry (whose length is specified by
3076  * the macro parameter).  If the incoming request is larger than this,
3077  * then we are guaranteed to be able to return at one directory entry
3078  * if one exists.  Therefore, we do not need to check for
3079  * NFS3ERR_TOOSMALL if the requested size is larger then this.  If it
3080  * is not, then we need to check to make sure that this error does not
3081  * need to be returned.
3082  *
3083  * NFS3_READDIR_MIN_COUNT is comprised of following :
3084  *
3085  * status - 1 * BYTES_PER_XDR_UNIT
3086  * attr. flag - 1 * BYTES_PER_XDR_UNIT
3087  * cookie verifier - 2 * BYTES_PER_XDR_UNIT
3088  * attributes  - NFS3_SIZEOF_FATTR3 * BYTES_PER_XDR_UNIT
3089  * boolean - 1 * BYTES_PER_XDR_UNIT
3090  * file id - 2 * BYTES_PER_XDR_UNIT
3091  * directory name length - 1 * BYTES_PER_XDR_UNIT
3092  * cookie - 2 * BYTES_PER_XDR_UNIT
3093  * end of list - 1 * BYTES_PER_XDR_UNIT
3094  * end of file - 1 * BYTES_PER_XDR_UNIT
3095  * Name length of directory to the nearest byte
3096  */
3097 
3098 #define NFS3_READDIR_MIN_COUNT(length)  \
3099         ((1 + 1 + 2 + NFS3_SIZEOF_FATTR3 + 1 + 2 + 1 + 2 + 1 + 1) * \
3100                 BYTES_PER_XDR_UNIT + roundup((length), BYTES_PER_XDR_UNIT))
3101 
3102 /* ARGSUSED */
3103 void
3104 rfs3_readdir(READDIR3args *args, READDIR3res *resp, struct exportinfo *exi,
3105         struct svc_req *req, cred_t *cr)
3106 {
3107         int error;
3108         vnode_t *vp;
3109         struct vattr *vap;
3110         struct vattr va;
3111         struct iovec iov;
3112         struct uio uio;
3113         char *data;
3114         int iseof;
3115         int bufsize;
3116         int namlen;
3117         uint_t count;
3118         struct sockaddr *ca;
3119 
3120         vap = NULL;
3121 
3122         vp = nfs3_fhtovp(&args->dir, exi);
3123 
3124         DTRACE_NFSV3_4(op__readdir__start, struct svc_req *, req,
3125             cred_t *, cr, vnode_t *, vp, READDIR3args *, args);
3126 
3127         if (vp == NULL) {
3128                 error = ESTALE;
3129                 goto out;
3130         }
3131 
3132         if (is_system_labeled()) {
3133                 bslabel_t *clabel = req->rq_label;
3134 
3135                 ASSERT(clabel != NULL);
3136                 DTRACE_PROBE2(tx__rfs3__log__info__opreaddir__clabel, char *,
3137                     "got client label from request(1)", struct svc_req *, req);
3138 
3139                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
3140                         if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
3141                             exi)) {
3142                                 resp->status = NFS3ERR_ACCES;
3143                                 goto out1;
3144                         }
3145                 }
3146         }
3147 
3148         (void) VOP_RWLOCK(vp, V_WRITELOCK_FALSE, NULL);
3149 
3150         va.va_mask = AT_ALL;
3151         vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3152 
3153         if (vp->v_type != VDIR) {
3154                 resp->status = NFS3ERR_NOTDIR;
3155                 goto out1;
3156         }
3157 
3158         error = VOP_ACCESS(vp, VREAD, 0, cr, NULL);
3159         if (error)
3160                 goto out;
3161 
3162         /*
3163          * Now don't allow arbitrary count to alloc;
3164          * allow the maximum not to exceed rfs3_tsize()
3165          */
3166         if (args->count > rfs3_tsize(req))
3167                 args->count = rfs3_tsize(req);
3168 
3169         /*
3170          * Make sure that there is room to read at least one entry
3171          * if any are available.
3172          */
3173         if (args->count < DIRENT64_RECLEN(MAXNAMELEN))
3174                 count = DIRENT64_RECLEN(MAXNAMELEN);
3175         else
3176                 count = args->count;
3177 
3178         data = kmem_alloc(count, KM_SLEEP);
3179 
3180         iov.iov_base = data;
3181         iov.iov_len = count;
3182         uio.uio_iov = &iov;
3183         uio.uio_iovcnt = 1;
3184         uio.uio_segflg = UIO_SYSSPACE;
3185         uio.uio_extflg = UIO_COPY_CACHED;
3186         uio.uio_loffset = (offset_t)args->cookie;
3187         uio.uio_resid = count;
3188 
3189         error = VOP_READDIR(vp, &uio, cr, &iseof, NULL, 0);
3190 
3191         va.va_mask = AT_ALL;
3192         vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3193 
3194         if (error) {
3195                 kmem_free(data, count);
3196                 goto out;
3197         }
3198 
3199         /*
3200          * If the count was not large enough to be able to guarantee
3201          * to be able to return at least one entry, then need to
3202          * check to see if NFS3ERR_TOOSMALL should be returned.
3203          */
3204         if (args->count < NFS3_READDIR_MIN_COUNT(MAXNAMELEN)) {
3205                 /*
3206                  * bufsize is used to keep track of the size of the response.
3207                  * It is primed with:
3208                  *      1 for the status +
3209                  *      1 for the dir_attributes.attributes boolean +
3210                  *      2 for the cookie verifier
3211                  * all times BYTES_PER_XDR_UNIT to convert from XDR units
3212                  * to bytes.  If there are directory attributes to be
3213                  * returned, then:
3214                  *      NFS3_SIZEOF_FATTR3 for the dir_attributes.attr fattr3
3215                  * time BYTES_PER_XDR_UNIT is added to account for them.
3216                  */
3217                 bufsize = (1 + 1 + 2) * BYTES_PER_XDR_UNIT;
3218                 if (vap != NULL)
3219                         bufsize += NFS3_SIZEOF_FATTR3 * BYTES_PER_XDR_UNIT;
3220                 /*
3221                  * An entry is composed of:
3222                  *      1 for the true/false list indicator +
3223                  *      2 for the fileid +
3224                  *      1 for the length of the name +
3225                  *      2 for the cookie +
3226                  * all times BYTES_PER_XDR_UNIT to convert from
3227                  * XDR units to bytes, plus the length of the name
3228                  * rounded up to the nearest BYTES_PER_XDR_UNIT.
3229                  */
3230                 if (count != uio.uio_resid) {
3231                         namlen = strlen(((struct dirent64 *)data)->d_name);
3232                         bufsize += (1 + 2 + 1 + 2) * BYTES_PER_XDR_UNIT +
3233                             roundup(namlen, BYTES_PER_XDR_UNIT);
3234                 }
3235                 /*
3236                  * We need to check to see if the number of bytes left
3237                  * to go into the buffer will actually fit into the
3238                  * buffer.  This is calculated as the size of this
3239                  * entry plus:
3240                  *      1 for the true/false list indicator +
3241                  *      1 for the eof indicator
3242                  * times BYTES_PER_XDR_UNIT to convert from from
3243                  * XDR units to bytes.
3244                  */
3245                 bufsize += (1 + 1) * BYTES_PER_XDR_UNIT;
3246                 if (bufsize > args->count) {
3247                         kmem_free(data, count);
3248                         resp->status = NFS3ERR_TOOSMALL;
3249                         goto out1;
3250                 }
3251         }
3252 
3253         /*
3254          * Have a valid readir buffer for the native character
3255          * set. Need to check if a conversion is necessary and
3256          * potentially rewrite the whole buffer. Note that if the
3257          * conversion expands names enough, the structure may not
3258          * fit. In this case, we need to drop entries until if fits
3259          * and patch the counts in order that the next readdir will
3260          * get the correct entries.
3261          */
3262         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
3263         data = nfscmd_convdirent(ca, exi, data, count, &resp->status);
3264 
3265 
3266         VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
3267 
3268 #if 0 /* notyet */
3269         /*
3270          * Don't do this.  It causes local disk writes when just
3271          * reading the file and the overhead is deemed larger
3272          * than the benefit.
3273          */
3274         /*
3275          * Force modified metadata out to stable storage.
3276          */
3277         (void) VOP_FSYNC(vp, FNODSYNC, cr, NULL);
3278 #endif
3279 
3280         resp->status = NFS3_OK;
3281         vattr_to_post_op_attr(vap, &resp->resok.dir_attributes);
3282         resp->resok.cookieverf = 0;
3283         resp->resok.reply.entries = (entry3 *)data;
3284         resp->resok.reply.eof = iseof;
3285         resp->resok.size = count - uio.uio_resid;
3286         resp->resok.count = args->count;
3287         resp->resok.freecount = count;
3288 
3289         DTRACE_NFSV3_4(op__readdir__done, struct svc_req *, req,
3290             cred_t *, cr, vnode_t *, vp, READDIR3res *, resp);
3291 
3292         VN_RELE(vp);
3293 
3294         return;
3295 
3296 out:
3297         if (curthread->t_flag & T_WOULDBLOCK) {
3298                 curthread->t_flag &= ~T_WOULDBLOCK;
3299                 resp->status = NFS3ERR_JUKEBOX;
3300         } else
3301                 resp->status = puterrno3(error);
3302 out1:
3303         DTRACE_NFSV3_4(op__readdir__done, struct svc_req *, req,
3304             cred_t *, cr, vnode_t *, vp, READDIR3res *, resp);
3305 
3306         if (vp != NULL) {
3307                 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
3308                 VN_RELE(vp);
3309         }
3310         vattr_to_post_op_attr(vap, &resp->resfail.dir_attributes);
3311 }
3312 
3313 void *
3314 rfs3_readdir_getfh(READDIR3args *args)
3315 {
3316 
3317         return (&args->dir);
3318 }
3319 
3320 void
3321 rfs3_readdir_free(READDIR3res *resp)
3322 {
3323 
3324         if (resp->status == NFS3_OK)
3325                 kmem_free(resp->resok.reply.entries, resp->resok.freecount);
3326 }
3327 
3328 #ifdef nextdp
3329 #undef nextdp
3330 #endif
3331 #define nextdp(dp)      ((struct dirent64 *)((char *)(dp) + (dp)->d_reclen))
3332 
3333 /*
3334  * This macro computes the size of a response which contains
3335  * one directory entry including the attributes as well as file handle.
3336  * If the incoming request is larger than this, then we are guaranteed to be
3337  * able to return at least one more directory entry if one exists.
3338  *
3339  * NFS3_READDIRPLUS_ENTRY is made up of the following:
3340  *
3341  * boolean - 1 * BYTES_PER_XDR_UNIT
3342  * file id - 2 * BYTES_PER_XDR_UNIT
3343  * directory name length - 1 * BYTES_PER_XDR_UNIT
3344  * cookie - 2 * BYTES_PER_XDR_UNIT
3345  * attribute flag - 1 * BYTES_PER_XDR_UNIT
3346  * attributes - NFS3_SIZEOF_FATTR3 * BYTES_PER_XDR_UNIT
3347  * status byte for file handle - 1 *  BYTES_PER_XDR_UNIT
3348  * length of a file handle - 1 * BYTES_PER_XDR_UNIT
3349  * Maximum length of a file handle (NFS3_MAXFHSIZE)
3350  * name length of the entry to the nearest bytes
3351  */
3352 #define NFS3_READDIRPLUS_ENTRY(namelen) \
3353         ((1 + 2 + 1 + 2 + 1 + NFS3_SIZEOF_FATTR3 + 1 + 1) * \
3354                 BYTES_PER_XDR_UNIT + \
3355         NFS3_MAXFHSIZE + roundup(namelen, BYTES_PER_XDR_UNIT))
3356 
3357 static int rfs3_readdir_unit = MAXBSIZE;
3358 
3359 /* ARGSUSED */
3360 void
3361 rfs3_readdirplus(READDIRPLUS3args *args, READDIRPLUS3res *resp,
3362         struct exportinfo *exi, struct svc_req *req, cred_t *cr)
3363 {
3364         int error;
3365         vnode_t *vp;
3366         struct vattr *vap;
3367         struct vattr va;
3368         struct iovec iov;
3369         struct uio uio;
3370         char *data;
3371         int iseof;
3372         struct dirent64 *dp;
3373         vnode_t *nvp;
3374         struct vattr *nvap;
3375         struct vattr nva;
3376         entryplus3_info *infop = NULL;
3377         int size = 0;
3378         int nents = 0;
3379         int bufsize = 0;
3380         int entrysize = 0;
3381         int tofit = 0;
3382         int rd_unit = rfs3_readdir_unit;
3383         int prev_len;
3384         int space_left;
3385         int i;
3386         uint_t *namlen = NULL;
3387         char *ndata = NULL;
3388         struct sockaddr *ca;
3389         size_t ret;
3390 
3391         vap = NULL;
3392 
3393         vp = nfs3_fhtovp(&args->dir, exi);
3394 
3395         DTRACE_NFSV3_4(op__readdirplus__start, struct svc_req *, req,
3396             cred_t *, cr, vnode_t *, vp, READDIRPLUS3args *, args);
3397 
3398         if (vp == NULL) {
3399                 error = ESTALE;
3400                 goto out;
3401         }
3402 
3403         if (is_system_labeled()) {
3404                 bslabel_t *clabel = req->rq_label;
3405 
3406                 ASSERT(clabel != NULL);
3407                 DTRACE_PROBE2(tx__rfs3__log__info__opreaddirplus__clabel,
3408                     char *, "got client label from request(1)",
3409                     struct svc_req *, req);
3410 
3411                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
3412                         if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
3413                             exi)) {
3414                                 resp->status = NFS3ERR_ACCES;
3415                                 goto out1;
3416                         }
3417                 }
3418         }
3419 
3420         (void) VOP_RWLOCK(vp, V_WRITELOCK_FALSE, NULL);
3421 
3422         va.va_mask = AT_ALL;
3423         vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3424 
3425         if (vp->v_type != VDIR) {
3426                 error = ENOTDIR;
3427                 goto out;
3428         }
3429 
3430         error = VOP_ACCESS(vp, VREAD, 0, cr, NULL);
3431         if (error)
3432                 goto out;
3433 
3434         /*
3435          * Don't allow arbitrary counts for allocation
3436          */
3437         if (args->maxcount > rfs3_tsize(req))
3438                 args->maxcount = rfs3_tsize(req);
3439 
3440         /*
3441          * Make sure that there is room to read at least one entry
3442          * if any are available
3443          */
3444         args->dircount = MIN(args->dircount, args->maxcount);
3445 
3446         if (args->dircount < DIRENT64_RECLEN(MAXNAMELEN))
3447                 args->dircount = DIRENT64_RECLEN(MAXNAMELEN);
3448 
3449         /*
3450          * This allocation relies on a minimum directory entry
3451          * being roughly 24 bytes.  Therefore, the namlen array
3452          * will have enough space based on the maximum number of
3453          * entries to read.
3454          */
3455         namlen = kmem_alloc(args->dircount, KM_SLEEP);
3456 
3457         space_left = args->dircount;
3458         data = kmem_alloc(args->dircount, KM_SLEEP);
3459         dp = (struct dirent64 *)data;
3460         uio.uio_iov = &iov;
3461         uio.uio_iovcnt = 1;
3462         uio.uio_segflg = UIO_SYSSPACE;
3463         uio.uio_extflg = UIO_COPY_CACHED;
3464         uio.uio_loffset = (offset_t)args->cookie;
3465 
3466         /*
3467          * bufsize is used to keep track of the size of the response as we
3468          * get post op attributes and filehandles for each entry.  This is
3469          * an optimization as the server may have read more entries than will
3470          * fit in the buffer specified by maxcount.  We stop calculating
3471          * post op attributes and filehandles once we have exceeded maxcount.
3472          * This will minimize the effect of truncation.
3473          *
3474          * It is primed with:
3475          *      1 for the status +
3476          *      1 for the dir_attributes.attributes boolean +
3477          *      2 for the cookie verifier
3478          * all times BYTES_PER_XDR_UNIT to convert from XDR units
3479          * to bytes.  If there are directory attributes to be
3480          * returned, then:
3481          *      NFS3_SIZEOF_FATTR3 for the dir_attributes.attr fattr3
3482          * time BYTES_PER_XDR_UNIT is added to account for them.
3483          */
3484         bufsize = (1 + 1 + 2) * BYTES_PER_XDR_UNIT;
3485         if (vap != NULL)
3486                 bufsize += NFS3_SIZEOF_FATTR3 * BYTES_PER_XDR_UNIT;
3487 
3488 getmoredents:
3489         /*
3490          * Here we make a check so that our read unit is not larger than
3491          * the space left in the buffer.
3492          */
3493         rd_unit = MIN(rd_unit, space_left);
3494         iov.iov_base = (char *)dp;
3495         iov.iov_len = rd_unit;
3496         uio.uio_resid = rd_unit;
3497         prev_len = rd_unit;
3498 
3499         error = VOP_READDIR(vp, &uio, cr, &iseof, NULL, 0);
3500 
3501         if (error) {
3502                 kmem_free(data, args->dircount);
3503                 goto out;
3504         }
3505 
3506         if (uio.uio_resid == prev_len && !iseof) {
3507                 if (nents == 0) {
3508                         kmem_free(data, args->dircount);
3509                         resp->status = NFS3ERR_TOOSMALL;
3510                         goto out1;
3511                 }
3512 
3513                 /*
3514                  * We could not get any more entries, so get the attributes
3515                  * and filehandle for the entries already obtained.
3516                  */
3517                 goto good;
3518         }
3519 
3520         /*
3521          * We estimate the size of the response by assuming the
3522          * entry exists and attributes and filehandle are also valid
3523          */
3524         for (size = prev_len - uio.uio_resid;
3525             size > 0;
3526             size -= dp->d_reclen, dp = nextdp(dp)) {
3527 
3528                 if (dp->d_ino == 0) {
3529                         nents++;
3530                         continue;
3531                 }
3532 
3533                 namlen[nents] = strlen(dp->d_name);
3534                 entrysize = NFS3_READDIRPLUS_ENTRY(namlen[nents]);
3535 
3536                 /*
3537                  * We need to check to see if the number of bytes left
3538                  * to go into the buffer will actually fit into the
3539                  * buffer.  This is calculated as the size of this
3540                  * entry plus:
3541                  *      1 for the true/false list indicator +
3542                  *      1 for the eof indicator
3543                  * times BYTES_PER_XDR_UNIT to convert from XDR units
3544                  * to bytes.
3545                  *
3546                  * Also check the dircount limit against the first entry read
3547                  *
3548                  */
3549                 tofit = entrysize + (1 + 1) * BYTES_PER_XDR_UNIT;
3550                 if (bufsize + tofit > args->maxcount) {
3551                         /*
3552                          * We make a check here to see if this was the
3553                          * first entry being measured.  If so, then maxcount
3554                          * was too small to begin with and so we need to
3555                          * return with NFS3ERR_TOOSMALL.
3556                          */
3557                         if (nents == 0) {
3558                                 kmem_free(data, args->dircount);
3559                                 resp->status = NFS3ERR_TOOSMALL;
3560                                 goto out1;
3561                         }
3562                         iseof = FALSE;
3563                         goto good;
3564                 }
3565                 bufsize += entrysize;
3566                 nents++;
3567         }
3568 
3569         /*
3570          * If there is enough room to fit at least 1 more entry including
3571          * post op attributes and filehandle in the buffer AND that we haven't
3572          * exceeded dircount then go back and get some more.
3573          */
3574         if (!iseof &&
3575             (args->maxcount - bufsize) >= NFS3_READDIRPLUS_ENTRY(MAXNAMELEN)) {
3576                 space_left -= (prev_len - uio.uio_resid);
3577                 if (space_left >= DIRENT64_RECLEN(MAXNAMELEN))
3578                         goto getmoredents;
3579 
3580                 /* else, fall through */
3581         }
3582 good:
3583         va.va_mask = AT_ALL;
3584         vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3585 
3586         VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
3587 
3588         infop = kmem_alloc(nents * sizeof (struct entryplus3_info), KM_SLEEP);
3589         resp->resok.infop = infop;
3590 
3591         dp = (struct dirent64 *)data;
3592         for (i = 0; i < nents; i++) {
3593 
3594                 if (dp->d_ino == 0) {
3595                         infop[i].attr.attributes = FALSE;
3596                         infop[i].fh.handle_follows = FALSE;
3597                         dp = nextdp(dp);
3598                         continue;
3599                 }
3600 
3601                 infop[i].namelen = namlen[i];
3602 
3603                 error = VOP_LOOKUP(vp, dp->d_name, &nvp, NULL, 0, NULL, cr,
3604                     NULL, NULL, NULL);
3605                 if (error) {
3606                         infop[i].attr.attributes = FALSE;
3607                         infop[i].fh.handle_follows = FALSE;
3608                         dp = nextdp(dp);
3609                         continue;
3610                 }
3611 
3612                 nva.va_mask = AT_ALL;
3613                 nvap = rfs4_delegated_getattr(nvp, &nva, 0, cr) ? NULL : &nva;
3614 
3615                 /* Lie about the object type for a referral */
3616                 if (vn_is_nfs_reparse(nvp, cr))
3617                         nvap->va_type = VLNK;
3618 
3619                 if (vn_ismntpt(nvp)) {
3620                         infop[i].attr.attributes = FALSE;
3621                         infop[i].fh.handle_follows = FALSE;
3622                 } else {
3623                         vattr_to_post_op_attr(nvap, &infop[i].attr);
3624 
3625                         error = makefh3(&infop[i].fh.handle, nvp, exi);
3626                         if (!error)
3627                                 infop[i].fh.handle_follows = TRUE;
3628                         else
3629                                 infop[i].fh.handle_follows = FALSE;
3630                 }
3631 
3632                 VN_RELE(nvp);
3633                 dp = nextdp(dp);
3634         }
3635 
3636         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
3637         ret = nfscmd_convdirplus(ca, exi, data, nents, args->dircount, &ndata);
3638         if (ndata == NULL)
3639                 ndata = data;
3640 
3641         if (ret > 0) {
3642                 /*
3643                  * We had to drop one or more entries in order to fit
3644                  * during the character conversion.  We need to patch
3645                  * up the size and eof info.
3646                  */
3647                 if (iseof)
3648                         iseof = FALSE;
3649 
3650                 ret = nfscmd_dropped_entrysize((struct dirent64 *)data,
3651                     nents, ret);
3652         }
3653 
3654 
3655 #if 0 /* notyet */
3656         /*
3657          * Don't do this.  It causes local disk writes when just
3658          * reading the file and the overhead is deemed larger
3659          * than the benefit.
3660          */
3661         /*
3662          * Force modified metadata out to stable storage.
3663          */
3664         (void) VOP_FSYNC(vp, FNODSYNC, cr, NULL);
3665 #endif
3666 
3667         kmem_free(namlen, args->dircount);
3668 
3669         resp->status = NFS3_OK;
3670         vattr_to_post_op_attr(vap, &resp->resok.dir_attributes);
3671         resp->resok.cookieverf = 0;
3672         resp->resok.reply.entries = (entryplus3 *)ndata;
3673         resp->resok.reply.eof = iseof;
3674         resp->resok.size = nents;
3675         resp->resok.count = args->dircount - ret;
3676         resp->resok.maxcount = args->maxcount;
3677 
3678         DTRACE_NFSV3_4(op__readdirplus__done, struct svc_req *, req,
3679             cred_t *, cr, vnode_t *, vp, READDIRPLUS3res *, resp);
3680         if (ndata != data)
3681                 kmem_free(data, args->dircount);
3682 
3683 
3684         VN_RELE(vp);
3685 
3686         return;
3687 
3688 out:
3689         if (curthread->t_flag & T_WOULDBLOCK) {
3690                 curthread->t_flag &= ~T_WOULDBLOCK;
3691                 resp->status = NFS3ERR_JUKEBOX;
3692         } else {
3693                 resp->status = puterrno3(error);
3694         }
3695 out1:
3696         DTRACE_NFSV3_4(op__readdirplus__done, struct svc_req *, req,
3697             cred_t *, cr, vnode_t *, vp, READDIRPLUS3res *, resp);
3698 
3699         if (vp != NULL) {
3700                 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
3701                 VN_RELE(vp);
3702         }
3703 
3704         if (namlen != NULL)
3705                 kmem_free(namlen, args->dircount);
3706 
3707         vattr_to_post_op_attr(vap, &resp->resfail.dir_attributes);
3708 }
3709 
3710 void *
3711 rfs3_readdirplus_getfh(READDIRPLUS3args *args)
3712 {
3713 
3714         return (&args->dir);
3715 }
3716 
3717 void
3718 rfs3_readdirplus_free(READDIRPLUS3res *resp)
3719 {
3720 
3721         if (resp->status == NFS3_OK) {
3722                 kmem_free(resp->resok.reply.entries, resp->resok.count);
3723                 kmem_free(resp->resok.infop,
3724                     resp->resok.size * sizeof (struct entryplus3_info));
3725         }
3726 }
3727 
3728 /* ARGSUSED */
3729 void
3730 rfs3_fsstat(FSSTAT3args *args, FSSTAT3res *resp, struct exportinfo *exi,
3731         struct svc_req *req, cred_t *cr)
3732 {
3733         int error;
3734         vnode_t *vp;
3735         struct vattr *vap;
3736         struct vattr va;
3737         struct statvfs64 sb;
3738 
3739         vap = NULL;
3740 
3741         vp = nfs3_fhtovp(&args->fsroot, exi);
3742 
3743         DTRACE_NFSV3_4(op__fsstat__start, struct svc_req *, req,
3744             cred_t *, cr, vnode_t *, vp, FSSTAT3args *, args);
3745 
3746         if (vp == NULL) {
3747                 error = ESTALE;
3748                 goto out;
3749         }
3750 
3751         if (is_system_labeled()) {
3752                 bslabel_t *clabel = req->rq_label;
3753 
3754                 ASSERT(clabel != NULL);
3755                 DTRACE_PROBE2(tx__rfs3__log__info__opfsstat__clabel, char *,
3756                     "got client label from request(1)", struct svc_req *, req);
3757 
3758                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
3759                         if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
3760                             exi)) {
3761                                 resp->status = NFS3ERR_ACCES;
3762                                 goto out1;
3763                         }
3764                 }
3765         }
3766 
3767         error = VFS_STATVFS(vp->v_vfsp, &sb);
3768 
3769         va.va_mask = AT_ALL;
3770         vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3771 
3772         if (error)
3773                 goto out;
3774 
3775         resp->status = NFS3_OK;
3776         vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
3777         if (sb.f_blocks != (fsblkcnt64_t)-1)
3778                 resp->resok.tbytes = (size3)sb.f_frsize * (size3)sb.f_blocks;
3779         else
3780                 resp->resok.tbytes = (size3)sb.f_blocks;
3781         if (sb.f_bfree != (fsblkcnt64_t)-1)
3782                 resp->resok.fbytes = (size3)sb.f_frsize * (size3)sb.f_bfree;
3783         else
3784                 resp->resok.fbytes = (size3)sb.f_bfree;
3785         if (sb.f_bavail != (fsblkcnt64_t)-1)
3786                 resp->resok.abytes = (size3)sb.f_frsize * (size3)sb.f_bavail;
3787         else
3788                 resp->resok.abytes = (size3)sb.f_bavail;
3789         resp->resok.tfiles = (size3)sb.f_files;
3790         resp->resok.ffiles = (size3)sb.f_ffree;
3791         resp->resok.afiles = (size3)sb.f_favail;
3792         resp->resok.invarsec = 0;
3793 
3794         DTRACE_NFSV3_4(op__fsstat__done, struct svc_req *, req,
3795             cred_t *, cr, vnode_t *, vp, FSSTAT3res *, resp);
3796         VN_RELE(vp);
3797 
3798         return;
3799 
3800 out:
3801         if (curthread->t_flag & T_WOULDBLOCK) {
3802                 curthread->t_flag &= ~T_WOULDBLOCK;
3803                 resp->status = NFS3ERR_JUKEBOX;
3804         } else
3805                 resp->status = puterrno3(error);
3806 out1:
3807         DTRACE_NFSV3_4(op__fsstat__done, struct svc_req *, req,
3808             cred_t *, cr, vnode_t *, vp, FSSTAT3res *, resp);
3809 
3810         if (vp != NULL)
3811                 VN_RELE(vp);
3812         vattr_to_post_op_attr(vap, &resp->resfail.obj_attributes);
3813 }
3814 
3815 void *
3816 rfs3_fsstat_getfh(FSSTAT3args *args)
3817 {
3818 
3819         return (&args->fsroot);
3820 }
3821 
3822 void
3823 rfs3_fsinfo(FSINFO3args *args, FSINFO3res *resp, struct exportinfo *exi,
3824         struct svc_req *req, cred_t *cr)
3825 {
3826         vnode_t *vp;
3827         struct vattr *vap;
3828         struct vattr va;
3829         uint32_t xfer_size;
3830         ulong_t l = 0;
3831         int error;
3832 
3833         vp = nfs3_fhtovp(&args->fsroot, exi);
3834 
3835         DTRACE_NFSV3_4(op__fsinfo__start, struct svc_req *, req,
3836             cred_t *, cr, vnode_t *, vp, FSINFO3args *, args);
3837 
3838         if (vp == NULL) {
3839                 if (curthread->t_flag & T_WOULDBLOCK) {
3840                         curthread->t_flag &= ~T_WOULDBLOCK;
3841                         resp->status = NFS3ERR_JUKEBOX;
3842                 } else
3843                         resp->status = NFS3ERR_STALE;
3844                 vattr_to_post_op_attr(NULL, &resp->resfail.obj_attributes);
3845                 goto out;
3846         }
3847 
3848         if (is_system_labeled()) {
3849                 bslabel_t *clabel = req->rq_label;
3850 
3851                 ASSERT(clabel != NULL);
3852                 DTRACE_PROBE2(tx__rfs3__log__info__opfsinfo__clabel, char *,
3853                     "got client label from request(1)", struct svc_req *, req);
3854 
3855                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
3856                         if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
3857                             exi)) {
3858                                 resp->status = NFS3ERR_STALE;
3859                                 vattr_to_post_op_attr(NULL,
3860                                     &resp->resfail.obj_attributes);
3861                                 goto out;
3862                         }
3863                 }
3864         }
3865 
3866         va.va_mask = AT_ALL;
3867         vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3868 
3869         resp->status = NFS3_OK;
3870         vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
3871         xfer_size = rfs3_tsize(req);
3872         resp->resok.rtmax = xfer_size;
3873         resp->resok.rtpref = xfer_size;
3874         resp->resok.rtmult = DEV_BSIZE;
3875         resp->resok.wtmax = xfer_size;
3876         resp->resok.wtpref = xfer_size;
3877         resp->resok.wtmult = DEV_BSIZE;
3878         resp->resok.dtpref = MAXBSIZE;
3879 
3880         /*
3881          * Large file spec: want maxfilesize based on limit of
3882          * underlying filesystem.  We can guess 2^31-1 if need be.
3883          */
3884         error = VOP_PATHCONF(vp, _PC_FILESIZEBITS, &l, cr, NULL);
3885         if (error) {
3886                 resp->status = puterrno3(error);
3887                 goto out;
3888         }
3889 
3890         /*
3891          * If the underlying file system does not support _PC_FILESIZEBITS,
3892          * return a reasonable default. Note that error code on VOP_PATHCONF
3893          * will be 0, even if the underlying file system does not support
3894          * _PC_FILESIZEBITS.
3895          */
3896         if (l == (ulong_t)-1) {
3897                 resp->resok.maxfilesize = MAXOFF32_T;
3898         } else {
3899                 if (l >= (sizeof (uint64_t) * 8))
3900                         resp->resok.maxfilesize = INT64_MAX;
3901                 else
3902                         resp->resok.maxfilesize = (1LL << (l-1)) - 1;
3903         }
3904 
3905         resp->resok.time_delta.seconds = 0;
3906         resp->resok.time_delta.nseconds = 1000;
3907         resp->resok.properties = FSF3_LINK | FSF3_SYMLINK |
3908             FSF3_HOMOGENEOUS | FSF3_CANSETTIME;
3909 
3910         DTRACE_NFSV3_4(op__fsinfo__done, struct svc_req *, req,
3911             cred_t *, cr, vnode_t *, vp, FSINFO3res *, resp);
3912 
3913         VN_RELE(vp);
3914 
3915         return;
3916 
3917 out:
3918         DTRACE_NFSV3_4(op__fsinfo__done, struct svc_req *, req,
3919             cred_t *, cr, vnode_t *, NULL, FSINFO3res *, resp);
3920         if (vp != NULL)
3921                 VN_RELE(vp);
3922 }
3923 
3924 void *
3925 rfs3_fsinfo_getfh(FSINFO3args *args)
3926 {
3927 
3928         return (&args->fsroot);
3929 }
3930 
3931 /* ARGSUSED */
3932 void
3933 rfs3_pathconf(PATHCONF3args *args, PATHCONF3res *resp, struct exportinfo *exi,
3934         struct svc_req *req, cred_t *cr)
3935 {
3936         int error;
3937         vnode_t *vp;
3938         struct vattr *vap;
3939         struct vattr va;
3940         ulong_t val;
3941 
3942         vap = NULL;
3943 
3944         vp = nfs3_fhtovp(&args->object, exi);
3945 
3946         DTRACE_NFSV3_4(op__pathconf__start, struct svc_req *, req,
3947             cred_t *, cr, vnode_t *, vp, PATHCONF3args *, args);
3948 
3949         if (vp == NULL) {
3950                 error = ESTALE;
3951                 goto out;
3952         }
3953 
3954         if (is_system_labeled()) {
3955                 bslabel_t *clabel = req->rq_label;
3956 
3957                 ASSERT(clabel != NULL);
3958                 DTRACE_PROBE2(tx__rfs3__log__info__oppathconf__clabel, char *,
3959                     "got client label from request(1)", struct svc_req *, req);
3960 
3961                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
3962                         if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
3963                             exi)) {
3964                                 resp->status = NFS3ERR_ACCES;
3965                                 goto out1;
3966                         }
3967                 }
3968         }
3969 
3970         va.va_mask = AT_ALL;
3971         vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3972 
3973         error = VOP_PATHCONF(vp, _PC_LINK_MAX, &val, cr, NULL);
3974         if (error)
3975                 goto out;
3976         resp->resok.info.link_max = (uint32)val;
3977 
3978         error = VOP_PATHCONF(vp, _PC_NAME_MAX, &val, cr, NULL);
3979         if (error)
3980                 goto out;
3981         resp->resok.info.name_max = (uint32)val;
3982 
3983         error = VOP_PATHCONF(vp, _PC_NO_TRUNC, &val, cr, NULL);
3984         if (error)
3985                 goto out;
3986         if (val == 1)
3987                 resp->resok.info.no_trunc = TRUE;
3988         else
3989                 resp->resok.info.no_trunc = FALSE;
3990 
3991         error = VOP_PATHCONF(vp, _PC_CHOWN_RESTRICTED, &val, cr, NULL);
3992         if (error)
3993                 goto out;
3994         if (val == 1)
3995                 resp->resok.info.chown_restricted = TRUE;
3996         else
3997                 resp->resok.info.chown_restricted = FALSE;
3998 
3999         resp->status = NFS3_OK;
4000         vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
4001         resp->resok.info.case_insensitive = FALSE;
4002         resp->resok.info.case_preserving = TRUE;
4003         DTRACE_NFSV3_4(op__pathconf__done, struct svc_req *, req,
4004             cred_t *, cr, vnode_t *, vp, PATHCONF3res *, resp);
4005         VN_RELE(vp);
4006         return;
4007 
4008 out:
4009         if (curthread->t_flag & T_WOULDBLOCK) {
4010                 curthread->t_flag &= ~T_WOULDBLOCK;
4011                 resp->status = NFS3ERR_JUKEBOX;
4012         } else
4013                 resp->status = puterrno3(error);
4014 out1:
4015         DTRACE_NFSV3_4(op__pathconf__done, struct svc_req *, req,
4016             cred_t *, cr, vnode_t *, vp, PATHCONF3res *, resp);
4017         if (vp != NULL)
4018                 VN_RELE(vp);
4019         vattr_to_post_op_attr(vap, &resp->resfail.obj_attributes);
4020 }
4021 
4022 void *
4023 rfs3_pathconf_getfh(PATHCONF3args *args)
4024 {
4025 
4026         return (&args->object);
4027 }
4028 
4029 void
4030 rfs3_commit(COMMIT3args *args, COMMIT3res *resp, struct exportinfo *exi,
4031         struct svc_req *req, cred_t *cr)
4032 {
4033         int error;
4034         vnode_t *vp;
4035         struct vattr *bvap;
4036         struct vattr bva;
4037         struct vattr *avap;
4038         struct vattr ava;
4039 
4040         bvap = NULL;
4041         avap = NULL;
4042 
4043         vp = nfs3_fhtovp(&args->file, exi);
4044 
4045         DTRACE_NFSV3_4(op__commit__start, struct svc_req *, req,
4046             cred_t *, cr, vnode_t *, vp, COMMIT3args *, args);
4047 
4048         if (vp == NULL) {
4049                 error = ESTALE;
4050                 goto out;
4051         }
4052 
4053         bva.va_mask = AT_ALL;
4054         error = VOP_GETATTR(vp, &bva, 0, cr, NULL);
4055 
4056         /*
4057          * If we can't get the attributes, then we can't do the
4058          * right access checking.  So, we'll fail the request.
4059          */
4060         if (error)
4061                 goto out;
4062 
4063         bvap = &bva;
4064 
4065         if (rdonly(exi, req)) {
4066                 resp->status = NFS3ERR_ROFS;
4067                 goto out1;
4068         }
4069 
4070         if (vp->v_type != VREG) {
4071                 resp->status = NFS3ERR_INVAL;
4072                 goto out1;
4073         }
4074 
4075         if (is_system_labeled()) {
4076                 bslabel_t *clabel = req->rq_label;
4077 
4078                 ASSERT(clabel != NULL);
4079                 DTRACE_PROBE2(tx__rfs3__log__info__opcommit__clabel, char *,
4080                     "got client label from request(1)", struct svc_req *, req);
4081 
4082                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
4083                         if (!do_rfs_label_check(clabel, vp, EQUALITY_CHECK,
4084                             exi)) {
4085                                 resp->status = NFS3ERR_ACCES;
4086                                 goto out1;
4087                         }
4088                 }
4089         }
4090 
4091         if (crgetuid(cr) != bva.va_uid &&
4092             (error = VOP_ACCESS(vp, VWRITE, 0, cr, NULL)))
4093                 goto out;
4094 
4095         error = VOP_FSYNC(vp, FSYNC, cr, NULL);
4096 
4097         ava.va_mask = AT_ALL;
4098         avap = VOP_GETATTR(vp, &ava, 0, cr, NULL) ? NULL : &ava;
4099 
4100         if (error)
4101                 goto out;
4102 
4103         resp->status = NFS3_OK;
4104         vattr_to_wcc_data(bvap, avap, &resp->resok.file_wcc);
4105         resp->resok.verf = write3verf;
4106 
4107         DTRACE_NFSV3_4(op__commit__done, struct svc_req *, req,
4108             cred_t *, cr, vnode_t *, vp, COMMIT3res *, resp);
4109 
4110         VN_RELE(vp);
4111 
4112         return;
4113 
4114 out:
4115         if (curthread->t_flag & T_WOULDBLOCK) {
4116                 curthread->t_flag &= ~T_WOULDBLOCK;
4117                 resp->status = NFS3ERR_JUKEBOX;
4118         } else
4119                 resp->status = puterrno3(error);
4120 out1:
4121         DTRACE_NFSV3_4(op__commit__done, struct svc_req *, req,
4122             cred_t *, cr, vnode_t *, vp, COMMIT3res *, resp);
4123 
4124         if (vp != NULL)
4125                 VN_RELE(vp);
4126         vattr_to_wcc_data(bvap, avap, &resp->resfail.file_wcc);
4127 }
4128 
4129 void *
4130 rfs3_commit_getfh(COMMIT3args *args)
4131 {
4132 
4133         return (&args->file);
4134 }
4135 
4136 static int
4137 sattr3_to_vattr(sattr3 *sap, struct vattr *vap)
4138 {
4139 
4140         vap->va_mask = 0;
4141 
4142         if (sap->mode.set_it) {
4143                 vap->va_mode = (mode_t)sap->mode.mode;
4144                 vap->va_mask |= AT_MODE;
4145         }
4146         if (sap->uid.set_it) {
4147                 vap->va_uid = (uid_t)sap->uid.uid;
4148                 vap->va_mask |= AT_UID;
4149         }
4150         if (sap->gid.set_it) {
4151                 vap->va_gid = (gid_t)sap->gid.gid;
4152                 vap->va_mask |= AT_GID;
4153         }
4154         if (sap->size.set_it) {
4155                 if (sap->size.size > (size3)((u_longlong_t)-1))
4156                         return (EINVAL);
4157                 vap->va_size = sap->size.size;
4158                 vap->va_mask |= AT_SIZE;
4159         }
4160         if (sap->atime.set_it == SET_TO_CLIENT_TIME) {
4161 #ifndef _LP64
4162                 /* check time validity */
4163                 if (!NFS3_TIME_OK(sap->atime.atime.seconds))
4164                         return (EOVERFLOW);
4165 #endif
4166                 /*
4167                  * nfs protocol defines times as unsigned so don't extend sign,
4168                  * unless sysadmin set nfs_allow_preepoch_time.
4169                  */
4170                 NFS_TIME_T_CONVERT(vap->va_atime.tv_sec,
4171                     sap->atime.atime.seconds);
4172                 vap->va_atime.tv_nsec = (uint32_t)sap->atime.atime.nseconds;
4173                 vap->va_mask |= AT_ATIME;
4174         } else if (sap->atime.set_it == SET_TO_SERVER_TIME) {
4175                 gethrestime(&vap->va_atime);
4176                 vap->va_mask |= AT_ATIME;
4177         }
4178         if (sap->mtime.set_it == SET_TO_CLIENT_TIME) {
4179 #ifndef _LP64
4180                 /* check time validity */
4181                 if (!NFS3_TIME_OK(sap->mtime.mtime.seconds))
4182                         return (EOVERFLOW);
4183 #endif
4184                 /*
4185                  * nfs protocol defines times as unsigned so don't extend sign,
4186                  * unless sysadmin set nfs_allow_preepoch_time.
4187                  */
4188                 NFS_TIME_T_CONVERT(vap->va_mtime.tv_sec,
4189                     sap->mtime.mtime.seconds);
4190                 vap->va_mtime.tv_nsec = (uint32_t)sap->mtime.mtime.nseconds;
4191                 vap->va_mask |= AT_MTIME;
4192         } else if (sap->mtime.set_it == SET_TO_SERVER_TIME) {
4193                 gethrestime(&vap->va_mtime);
4194                 vap->va_mask |= AT_MTIME;
4195         }
4196 
4197         return (0);
4198 }
4199 
4200 static ftype3 vt_to_nf3[] = {
4201         0, NF3REG, NF3DIR, NF3BLK, NF3CHR, NF3LNK, NF3FIFO, 0, 0, NF3SOCK, 0
4202 };
4203 
4204 static int
4205 vattr_to_fattr3(struct vattr *vap, fattr3 *fap)
4206 {
4207 
4208         ASSERT(vap->va_type >= VNON && vap->va_type <= VBAD);
4209         /* Return error if time or size overflow */
4210         if (! (NFS_VAP_TIME_OK(vap) && NFS3_SIZE_OK(vap->va_size))) {
4211                 return (EOVERFLOW);
4212         }
4213         fap->type = vt_to_nf3[vap->va_type];
4214         fap->mode = (mode3)(vap->va_mode & MODEMASK);
4215         fap->nlink = (uint32)vap->va_nlink;
4216         if (vap->va_uid == UID_NOBODY)
4217                 fap->uid = (uid3)NFS_UID_NOBODY;
4218         else
4219                 fap->uid = (uid3)vap->va_uid;
4220         if (vap->va_gid == GID_NOBODY)
4221                 fap->gid = (gid3)NFS_GID_NOBODY;
4222         else
4223                 fap->gid = (gid3)vap->va_gid;
4224         fap->size = (size3)vap->va_size;
4225         fap->used = (size3)DEV_BSIZE * (size3)vap->va_nblocks;
4226         fap->rdev.specdata1 = (uint32)getmajor(vap->va_rdev);
4227         fap->rdev.specdata2 = (uint32)getminor(vap->va_rdev);
4228         fap->fsid = (uint64)vap->va_fsid;
4229         fap->fileid = (fileid3)vap->va_nodeid;
4230         fap->atime.seconds = vap->va_atime.tv_sec;
4231         fap->atime.nseconds = vap->va_atime.tv_nsec;
4232         fap->mtime.seconds = vap->va_mtime.tv_sec;
4233         fap->mtime.nseconds = vap->va_mtime.tv_nsec;
4234         fap->ctime.seconds = vap->va_ctime.tv_sec;
4235         fap->ctime.nseconds = vap->va_ctime.tv_nsec;
4236         return (0);
4237 }
4238 
4239 static int
4240 vattr_to_wcc_attr(struct vattr *vap, wcc_attr *wccap)
4241 {
4242 
4243         /* Return error if time or size overflow */
4244         if (!(NFS_TIME_T_OK(vap->va_mtime.tv_sec) &&
4245             NFS_TIME_T_OK(vap->va_ctime.tv_sec) &&
4246             NFS3_SIZE_OK(vap->va_size))) {
4247                 return (EOVERFLOW);
4248         }
4249         wccap->size = (size3)vap->va_size;
4250         wccap->mtime.seconds = vap->va_mtime.tv_sec;
4251         wccap->mtime.nseconds = vap->va_mtime.tv_nsec;
4252         wccap->ctime.seconds = vap->va_ctime.tv_sec;
4253         wccap->ctime.nseconds = vap->va_ctime.tv_nsec;
4254         return (0);
4255 }
4256 
4257 static void
4258 vattr_to_pre_op_attr(struct vattr *vap, pre_op_attr *poap)
4259 {
4260 
4261         /* don't return attrs if time overflow */
4262         if ((vap != NULL) && !vattr_to_wcc_attr(vap, &poap->attr)) {
4263                 poap->attributes = TRUE;
4264         } else
4265                 poap->attributes = FALSE;
4266 }
4267 
4268 void
4269 vattr_to_post_op_attr(struct vattr *vap, post_op_attr *poap)
4270 {
4271 
4272         /* don't return attrs if time overflow */
4273         if ((vap != NULL) && !vattr_to_fattr3(vap, &poap->attr)) {
4274                 poap->attributes = TRUE;
4275         } else
4276                 poap->attributes = FALSE;
4277 }
4278 
4279 static void
4280 vattr_to_wcc_data(struct vattr *bvap, struct vattr *avap, wcc_data *wccp)
4281 {
4282 
4283         vattr_to_pre_op_attr(bvap, &wccp->before);
4284         vattr_to_post_op_attr(avap, &wccp->after);
4285 }
4286 
4287 void
4288 rfs3_srvrinit(void)
4289 {
4290         struct rfs3_verf_overlay {
4291                 uint_t id; /* a "unique" identifier */
4292                 int ts; /* a unique timestamp */
4293         } *verfp;
4294         timestruc_t now;
4295 
4296         /*
4297          * The following algorithm attempts to find a unique verifier
4298          * to be used as the write verifier returned from the server
4299          * to the client.  It is important that this verifier change
4300          * whenever the server reboots.  Of secondary importance, it
4301          * is important for the verifier to be unique between two
4302          * different servers.
4303          *
4304          * Thus, an attempt is made to use the system hostid and the
4305          * current time in seconds when the nfssrv kernel module is
4306          * loaded.  It is assumed that an NFS server will not be able
4307          * to boot and then to reboot in less than a second.  If the
4308          * hostid has not been set, then the current high resolution
4309          * time is used.  This will ensure different verifiers each
4310          * time the server reboots and minimize the chances that two
4311          * different servers will have the same verifier.
4312          */
4313 
4314 #ifndef lint
4315         /*
4316          * We ASSERT that this constant logic expression is
4317          * always true because in the past, it wasn't.
4318          */
4319         ASSERT(sizeof (*verfp) <= sizeof (write3verf));
4320 #endif
4321 
4322         gethrestime(&now);
4323         verfp = (struct rfs3_verf_overlay *)&write3verf;
4324         verfp->ts = (int)now.tv_sec;
4325         verfp->id = zone_get_hostid(NULL);
4326 
4327         if (verfp->id == 0)
4328                 verfp->id = (uint_t)now.tv_nsec;
4329 
4330         nfs3_srv_caller_id = fs_new_caller_id();
4331 
4332 }
4333 
4334 static int
4335 rdma_setup_read_data3(READ3args *args, READ3resok *rok)
4336 {
4337         struct clist    *wcl;
4338         int             wlist_len;
4339         count3          count = rok->count;
4340 
4341         wcl = args->wlist;
4342         if (rdma_setup_read_chunks(wcl, count, &wlist_len) == FALSE) {
4343                 return (FALSE);
4344         }
4345 
4346         wcl = args->wlist;
4347         rok->wlist_len = wlist_len;
4348         rok->wlist = wcl;
4349         return (TRUE);
4350 }
4351 
4352 void
4353 rfs3_srvrfini(void)
4354 {
4355         /* Nothing to do */
4356 }