1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright (c) 1994, 2010, Oracle and/or its affiliates. All rights reserved.
  23  */
  24 
  25 /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */
  26 /* All Rights Reserved */
  27 
  28 #include <sys/param.h>
  29 #include <sys/types.h>
  30 #include <sys/systm.h>
  31 #include <sys/cred.h>
  32 #include <sys/buf.h>
  33 #include <sys/vfs.h>
  34 #include <sys/vnode.h>
  35 #include <sys/uio.h>
  36 #include <sys/errno.h>
  37 #include <sys/sysmacros.h>
  38 #include <sys/statvfs.h>
  39 #include <sys/kmem.h>
  40 #include <sys/dirent.h>
  41 #include <sys/cmn_err.h>
  42 #include <sys/debug.h>
  43 #include <sys/systeminfo.h>
  44 #include <sys/flock.h>
  45 #include <sys/nbmlock.h>
  46 #include <sys/policy.h>
  47 #include <sys/sdt.h>
  48 
  49 #include <rpc/types.h>
  50 #include <rpc/auth.h>
  51 #include <rpc/svc.h>
  52 #include <rpc/rpc_rdma.h>
  53 
  54 #include <nfs/nfs.h>
  55 #include <nfs/export.h>
  56 #include <nfs/nfs_cmd.h>
  57 
  58 #include <sys/strsubr.h>
  59 
  60 #include <sys/tsol/label.h>
  61 #include <sys/tsol/tndb.h>
  62 
  63 #include <sys/zone.h>
  64 
  65 #include <inet/ip.h>
  66 #include <inet/ip6.h>
  67 
  68 /*
  69  * These are the interface routines for the server side of the
  70  * Network File System.  See the NFS version 3 protocol specification
  71  * for a description of this interface.
  72  */
  73 
  74 static writeverf3 write3verf;
  75 
  76 static int      sattr3_to_vattr(sattr3 *, struct vattr *);
  77 static int      vattr_to_fattr3(struct vattr *, fattr3 *);
  78 static int      vattr_to_wcc_attr(struct vattr *, wcc_attr *);
  79 static void     vattr_to_pre_op_attr(struct vattr *, pre_op_attr *);
  80 static void     vattr_to_wcc_data(struct vattr *, struct vattr *, wcc_data *);
  81 static int      rdma_setup_read_data3(READ3args *, READ3resok *);
  82 
  83 extern int nfs_loaned_buffers;
  84 
  85 u_longlong_t nfs3_srv_caller_id;
  86 
  87 /* ARGSUSED */
  88 void
  89 rfs3_getattr(GETATTR3args *args, GETATTR3res *resp, struct exportinfo *exi,
  90         struct svc_req *req, cred_t *cr)
  91 {
  92         int error;
  93         vnode_t *vp;
  94         struct vattr va;
  95 
  96         vp = nfs3_fhtovp(&args->object, exi);
  97 
  98         DTRACE_NFSV3_4(op__getattr__start, struct svc_req *, req,
  99             cred_t *, cr, vnode_t *, vp, GETATTR3args *, args);
 100 
 101         if (vp == NULL) {
 102                 error = ESTALE;
 103                 goto out;
 104         }
 105 
 106         va.va_mask = AT_ALL;
 107         error = rfs4_delegated_getattr(vp, &va, 0, cr);
 108 
 109         if (!error) {
 110                 /* Lie about the object type for a referral */
 111                 if (vn_is_nfs_reparse(vp, cr))
 112                         va.va_type = VLNK;
 113 
 114                 /* overflow error if time or size is out of range */
 115                 error = vattr_to_fattr3(&va, &resp->resok.obj_attributes);
 116                 if (error)
 117                         goto out;
 118                 resp->status = NFS3_OK;
 119 
 120                 DTRACE_NFSV3_4(op__getattr__done, struct svc_req *, req,
 121                     cred_t *, cr, vnode_t *, vp, GETATTR3res *, resp);
 122 
 123                 VN_RELE(vp);
 124 
 125                 return;
 126         }
 127 
 128 out:
 129         if (curthread->t_flag & T_WOULDBLOCK) {
 130                 curthread->t_flag &= ~T_WOULDBLOCK;
 131                 resp->status = NFS3ERR_JUKEBOX;
 132         } else
 133                 resp->status = puterrno3(error);
 134 
 135         DTRACE_NFSV3_4(op__getattr__done, struct svc_req *, req,
 136             cred_t *, cr, vnode_t *, vp, GETATTR3res *, resp);
 137 
 138         if (vp != NULL)
 139                 VN_RELE(vp);
 140 }
 141 
 142 void *
 143 rfs3_getattr_getfh(GETATTR3args *args)
 144 {
 145 
 146         return (&args->object);
 147 }
 148 
 149 void
 150 rfs3_setattr(SETATTR3args *args, SETATTR3res *resp, struct exportinfo *exi,
 151         struct svc_req *req, cred_t *cr)
 152 {
 153         int error;
 154         vnode_t *vp;
 155         struct vattr *bvap;
 156         struct vattr bva;
 157         struct vattr *avap;
 158         struct vattr ava;
 159         int flag;
 160         int in_crit = 0;
 161         struct flock64 bf;
 162         caller_context_t ct;
 163 
 164         bvap = NULL;
 165         avap = NULL;
 166 
 167         vp = nfs3_fhtovp(&args->object, exi);
 168 
 169         DTRACE_NFSV3_4(op__setattr__start, struct svc_req *, req,
 170             cred_t *, cr, vnode_t *, vp, SETATTR3args *, args);
 171 
 172         if (vp == NULL) {
 173                 error = ESTALE;
 174                 goto out;
 175         }
 176 
 177         error = sattr3_to_vattr(&args->new_attributes, &ava);
 178         if (error)
 179                 goto out;
 180 
 181         if (is_system_labeled()) {
 182                 bslabel_t *clabel = req->rq_label;
 183 
 184                 ASSERT(clabel != NULL);
 185                 DTRACE_PROBE2(tx__rfs3__log__info__opsetattr__clabel, char *,
 186                     "got client label from request(1)", struct svc_req *, req);
 187 
 188                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
 189                         if (!do_rfs_label_check(clabel, vp, EQUALITY_CHECK,
 190                             exi)) {
 191                                 resp->status = NFS3ERR_ACCES;
 192                                 goto out1;
 193                         }
 194                 }
 195         }
 196 
 197         /*
 198          * We need to specially handle size changes because of
 199          * possible conflicting NBMAND locks. Get into critical
 200          * region before VOP_GETATTR, so the size attribute is
 201          * valid when checking conflicts.
 202          *
 203          * Also, check to see if the v4 side of the server has
 204          * delegated this file.  If so, then we return JUKEBOX to
 205          * allow the client to retrasmit its request.
 206          */
 207         if (vp->v_type == VREG && (ava.va_mask & AT_SIZE)) {
 208                 if (nbl_need_check(vp)) {
 209                         nbl_start_crit(vp, RW_READER);
 210                         in_crit = 1;
 211                 }
 212         }
 213 
 214         bva.va_mask = AT_ALL;
 215         error = rfs4_delegated_getattr(vp, &bva, 0, cr);
 216 
 217         /*
 218          * If we can't get the attributes, then we can't do the
 219          * right access checking.  So, we'll fail the request.
 220          */
 221         if (error)
 222                 goto out;
 223 
 224         bvap = &bva;
 225 
 226         if (rdonly(exi, req) || vn_is_readonly(vp)) {
 227                 resp->status = NFS3ERR_ROFS;
 228                 goto out1;
 229         }
 230 
 231         if (args->guard.check &&
 232             (args->guard.obj_ctime.seconds != bva.va_ctime.tv_sec ||
 233             args->guard.obj_ctime.nseconds != bva.va_ctime.tv_nsec)) {
 234                 resp->status = NFS3ERR_NOT_SYNC;
 235                 goto out1;
 236         }
 237 
 238         if (args->new_attributes.mtime.set_it == SET_TO_CLIENT_TIME)
 239                 flag = ATTR_UTIME;
 240         else
 241                 flag = 0;
 242 
 243         /*
 244          * If the filesystem is exported with nosuid, then mask off
 245          * the setuid and setgid bits.
 246          */
 247         if ((ava.va_mask & AT_MODE) && vp->v_type == VREG &&
 248             (exi->exi_export.ex_flags & EX_NOSUID))
 249                 ava.va_mode &= ~(VSUID | VSGID);
 250 
 251         ct.cc_sysid = 0;
 252         ct.cc_pid = 0;
 253         ct.cc_caller_id = nfs3_srv_caller_id;
 254         ct.cc_flags = CC_DONTBLOCK;
 255 
 256         /*
 257          * We need to specially handle size changes because it is
 258          * possible for the client to create a file with modes
 259          * which indicate read-only, but with the file opened for
 260          * writing.  If the client then tries to set the size of
 261          * the file, then the normal access checking done in
 262          * VOP_SETATTR would prevent the client from doing so,
 263          * although it should be legal for it to do so.  To get
 264          * around this, we do the access checking for ourselves
 265          * and then use VOP_SPACE which doesn't do the access
 266          * checking which VOP_SETATTR does. VOP_SPACE can only
 267          * operate on VREG files, let VOP_SETATTR handle the other
 268          * extremely rare cases.
 269          * Also the client should not be allowed to change the
 270          * size of the file if there is a conflicting non-blocking
 271          * mandatory lock in the region the change.
 272          */
 273         if (vp->v_type == VREG && (ava.va_mask & AT_SIZE)) {
 274                 if (in_crit) {
 275                         u_offset_t offset;
 276                         ssize_t length;
 277 
 278                         if (ava.va_size < bva.va_size) {
 279                                 offset = ava.va_size;
 280                                 length = bva.va_size - ava.va_size;
 281                         } else {
 282                                 offset = bva.va_size;
 283                                 length = ava.va_size - bva.va_size;
 284                         }
 285                         if (nbl_conflict(vp, NBL_WRITE, offset, length, 0,
 286                             NULL)) {
 287                                 error = EACCES;
 288                                 goto out;
 289                         }
 290                 }
 291 
 292                 if (crgetuid(cr) == bva.va_uid && ava.va_size != bva.va_size) {
 293                         ava.va_mask &= ~AT_SIZE;
 294                         bf.l_type = F_WRLCK;
 295                         bf.l_whence = 0;
 296                         bf.l_start = (off64_t)ava.va_size;
 297                         bf.l_len = 0;
 298                         bf.l_sysid = 0;
 299                         bf.l_pid = 0;
 300                         error = VOP_SPACE(vp, F_FREESP, &bf, FWRITE,
 301                             (offset_t)ava.va_size, cr, &ct);
 302                 }
 303         }
 304 
 305         if (!error && ava.va_mask)
 306                 error = VOP_SETATTR(vp, &ava, flag, cr, &ct);
 307 
 308         /* check if a monitor detected a delegation conflict */
 309         if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
 310                 resp->status = NFS3ERR_JUKEBOX;
 311                 goto out1;
 312         }
 313 
 314         ava.va_mask = AT_ALL;
 315         avap = rfs4_delegated_getattr(vp, &ava, 0, cr) ? NULL : &ava;
 316 
 317         /*
 318          * Force modified metadata out to stable storage.
 319          */
 320         (void) VOP_FSYNC(vp, FNODSYNC, cr, &ct);
 321 
 322         if (error)
 323                 goto out;
 324 
 325         if (in_crit)
 326                 nbl_end_crit(vp);
 327 
 328         resp->status = NFS3_OK;
 329         vattr_to_wcc_data(bvap, avap, &resp->resok.obj_wcc);
 330 
 331         DTRACE_NFSV3_4(op__setattr__done, struct svc_req *, req,
 332             cred_t *, cr, vnode_t *, vp, SETATTR3res *, resp);
 333 
 334         VN_RELE(vp);
 335 
 336         return;
 337 
 338 out:
 339         if (curthread->t_flag & T_WOULDBLOCK) {
 340                 curthread->t_flag &= ~T_WOULDBLOCK;
 341                 resp->status = NFS3ERR_JUKEBOX;
 342         } else
 343                 resp->status = puterrno3(error);
 344 out1:
 345         DTRACE_NFSV3_4(op__setattr__done, struct svc_req *, req,
 346             cred_t *, cr, vnode_t *, vp, SETATTR3res *, resp);
 347 
 348         if (vp != NULL) {
 349                 if (in_crit)
 350                         nbl_end_crit(vp);
 351                 VN_RELE(vp);
 352         }
 353         vattr_to_wcc_data(bvap, avap, &resp->resfail.obj_wcc);
 354 }
 355 
 356 void *
 357 rfs3_setattr_getfh(SETATTR3args *args)
 358 {
 359 
 360         return (&args->object);
 361 }
 362 
 363 /* ARGSUSED */
 364 void
 365 rfs3_lookup(LOOKUP3args *args, LOOKUP3res *resp, struct exportinfo *exi,
 366         struct svc_req *req, cred_t *cr)
 367 {
 368         int error;
 369         vnode_t *vp;
 370         vnode_t *dvp;
 371         struct vattr *vap;
 372         struct vattr va;
 373         struct vattr *dvap;
 374         struct vattr dva;
 375         nfs_fh3 *fhp;
 376         struct sec_ol sec = {0, 0};
 377         bool_t publicfh_flag = FALSE, auth_weak = FALSE;
 378         struct sockaddr *ca;
 379         char *name = NULL;
 380 
 381         dvap = NULL;
 382 
 383         /*
 384          * Allow lookups from the root - the default
 385          * location of the public filehandle.
 386          */
 387         if (exi != NULL && (exi->exi_export.ex_flags & EX_PUBLIC)) {
 388                 dvp = rootdir;
 389                 VN_HOLD(dvp);
 390 
 391                 DTRACE_NFSV3_4(op__lookup__start, struct svc_req *, req,
 392                     cred_t *, cr, vnode_t *, dvp, LOOKUP3args *, args);
 393         } else {
 394                 dvp = nfs3_fhtovp(&args->what.dir, exi);
 395 
 396                 DTRACE_NFSV3_4(op__lookup__start, struct svc_req *, req,
 397                     cred_t *, cr, vnode_t *, dvp, LOOKUP3args *, args);
 398 
 399                 if (dvp == NULL) {
 400                         error = ESTALE;
 401                         goto out;
 402                 }
 403         }
 404 
 405         dva.va_mask = AT_ALL;
 406         dvap = VOP_GETATTR(dvp, &dva, 0, cr, NULL) ? NULL : &dva;
 407 
 408         if (args->what.name == nfs3nametoolong) {
 409                 resp->status = NFS3ERR_NAMETOOLONG;
 410                 goto out1;
 411         }
 412 
 413         if (args->what.name == NULL || *(args->what.name) == '\0') {
 414                 resp->status = NFS3ERR_ACCES;
 415                 goto out1;
 416         }
 417 
 418         fhp = &args->what.dir;
 419         if (strcmp(args->what.name, "..") == 0 &&
 420             EQFID(&exi->exi_fid, FH3TOFIDP(fhp))) {
 421                 resp->status = NFS3ERR_NOENT;
 422                 goto out1;
 423         }
 424 
 425         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
 426         name = nfscmd_convname(ca, exi, args->what.name,
 427             NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
 428 
 429         if (name == NULL) {
 430                 resp->status = NFS3ERR_ACCES;
 431                 goto out1;
 432         }
 433 
 434         /*
 435          * If the public filehandle is used then allow
 436          * a multi-component lookup
 437          */
 438         if (PUBLIC_FH3(&args->what.dir)) {
 439                 publicfh_flag = TRUE;
 440                 error = rfs_publicfh_mclookup(name, dvp, cr, &vp,
 441                     &exi, &sec);
 442                 if (error && exi != NULL)
 443                         exi_rele(exi); /* See comment below Re: publicfh_flag */
 444                 /*
 445                  * Since WebNFS may bypass MOUNT, we need to ensure this
 446                  * request didn't come from an unlabeled admin_low client.
 447                  */
 448                 if (is_system_labeled() && error == 0) {
 449                         int             addr_type;
 450                         void            *ipaddr;
 451                         tsol_tpc_t      *tp;
 452 
 453                         if (ca->sa_family == AF_INET) {
 454                                 addr_type = IPV4_VERSION;
 455                                 ipaddr = &((struct sockaddr_in *)ca)->sin_addr;
 456                         } else if (ca->sa_family == AF_INET6) {
 457                                 addr_type = IPV6_VERSION;
 458                                 ipaddr = &((struct sockaddr_in6 *)
 459                                     ca)->sin6_addr;
 460                         }
 461                         tp = find_tpc(ipaddr, addr_type, B_FALSE);
 462                         if (tp == NULL || tp->tpc_tp.tp_doi !=
 463                             l_admin_low->tsl_doi || tp->tpc_tp.host_type !=
 464                             SUN_CIPSO) {
 465                                 if (exi != NULL)
 466                                         exi_rele(exi);
 467                                 VN_RELE(vp);
 468                                 resp->status = NFS3ERR_ACCES;
 469                                 error = 1;
 470                         }
 471                         if (tp != NULL)
 472                                 TPC_RELE(tp);
 473                 }
 474         } else {
 475                 error = VOP_LOOKUP(dvp, name, &vp,
 476                     NULL, 0, NULL, cr, NULL, NULL, NULL);
 477         }
 478 
 479         if (name != args->what.name)
 480                 kmem_free(name, MAXPATHLEN + 1);
 481 
 482         if (is_system_labeled() && error == 0) {
 483                 bslabel_t *clabel = req->rq_label;
 484 
 485                 ASSERT(clabel != NULL);
 486                 DTRACE_PROBE2(tx__rfs3__log__info__oplookup__clabel, char *,
 487                     "got client label from request(1)", struct svc_req *, req);
 488 
 489                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
 490                         if (!do_rfs_label_check(clabel, dvp,
 491                             DOMINANCE_CHECK, exi)) {
 492                                 if (publicfh_flag && exi != NULL)
 493                                         exi_rele(exi);
 494                                 VN_RELE(vp);
 495                                 resp->status = NFS3ERR_ACCES;
 496                                 error = 1;
 497                         }
 498                 }
 499         }
 500 
 501         dva.va_mask = AT_ALL;
 502         dvap = VOP_GETATTR(dvp, &dva, 0, cr, NULL) ? NULL : &dva;
 503 
 504         if (error)
 505                 goto out;
 506 
 507         if (sec.sec_flags & SEC_QUERY) {
 508                 error = makefh3_ol(&resp->resok.object, exi, sec.sec_index);
 509         } else {
 510                 error = makefh3(&resp->resok.object, vp, exi);
 511                 if (!error && publicfh_flag && !chk_clnt_sec(exi, req))
 512                         auth_weak = TRUE;
 513         }
 514 
 515         if (error) {
 516                 VN_RELE(vp);
 517                 goto out;
 518         }
 519 
 520         /*
 521          * If publicfh_flag is true then we have called rfs_publicfh_mclookup
 522          * and have obtained a new exportinfo in exi which needs to be
 523          * released. Note the the original exportinfo pointed to by exi
 524          * will be released by the caller, common_dispatch.
 525          */
 526         if (publicfh_flag)
 527                 exi_rele(exi);
 528 
 529         va.va_mask = AT_ALL;
 530         vap = rfs4_delegated_getattr(vp, &va, 0, cr) ? NULL : &va;
 531 
 532         VN_RELE(vp);
 533 
 534         resp->status = NFS3_OK;
 535         vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
 536         vattr_to_post_op_attr(dvap, &resp->resok.dir_attributes);
 537 
 538         /*
 539          * If it's public fh, no 0x81, and client's flavor is
 540          * invalid, set WebNFS status to WNFSERR_CLNT_FLAVOR now.
 541          * Then set RPC status to AUTH_TOOWEAK in common_dispatch.
 542          */
 543         if (auth_weak)
 544                 resp->status = (enum nfsstat3)WNFSERR_CLNT_FLAVOR;
 545 
 546         DTRACE_NFSV3_4(op__lookup__done, struct svc_req *, req,
 547             cred_t *, cr, vnode_t *, dvp, LOOKUP3res *, resp);
 548         VN_RELE(dvp);
 549 
 550         return;
 551 
 552 out:
 553         if (curthread->t_flag & T_WOULDBLOCK) {
 554                 curthread->t_flag &= ~T_WOULDBLOCK;
 555                 resp->status = NFS3ERR_JUKEBOX;
 556         } else
 557                 resp->status = puterrno3(error);
 558 out1:
 559         DTRACE_NFSV3_4(op__lookup__done, struct svc_req *, req,
 560             cred_t *, cr, vnode_t *, dvp, LOOKUP3res *, resp);
 561 
 562         if (dvp != NULL)
 563                 VN_RELE(dvp);
 564         vattr_to_post_op_attr(dvap, &resp->resfail.dir_attributes);
 565 
 566 }
 567 
 568 void *
 569 rfs3_lookup_getfh(LOOKUP3args *args)
 570 {
 571 
 572         return (&args->what.dir);
 573 }
 574 
 575 /* ARGSUSED */
 576 void
 577 rfs3_access(ACCESS3args *args, ACCESS3res *resp, struct exportinfo *exi,
 578         struct svc_req *req, cred_t *cr)
 579 {
 580         int error;
 581         vnode_t *vp;
 582         struct vattr *vap;
 583         struct vattr va;
 584         int checkwriteperm;
 585         boolean_t dominant_label = B_FALSE;
 586         boolean_t equal_label = B_FALSE;
 587         boolean_t admin_low_client;
 588 
 589         vap = NULL;
 590 
 591         vp = nfs3_fhtovp(&args->object, exi);
 592 
 593         DTRACE_NFSV3_4(op__access__start, struct svc_req *, req,
 594             cred_t *, cr, vnode_t *, vp, ACCESS3args *, args);
 595 
 596         if (vp == NULL) {
 597                 error = ESTALE;
 598                 goto out;
 599         }
 600 
 601         /*
 602          * If the file system is exported read only, it is not appropriate
 603          * to check write permissions for regular files and directories.
 604          * Special files are interpreted by the client, so the underlying
 605          * permissions are sent back to the client for interpretation.
 606          */
 607         if (rdonly(exi, req) && (vp->v_type == VREG || vp->v_type == VDIR))
 608                 checkwriteperm = 0;
 609         else
 610                 checkwriteperm = 1;
 611 
 612         /*
 613          * We need the mode so that we can correctly determine access
 614          * permissions relative to a mandatory lock file.  Access to
 615          * mandatory lock files is denied on the server, so it might
 616          * as well be reflected to the server during the open.
 617          */
 618         va.va_mask = AT_MODE;
 619         error = VOP_GETATTR(vp, &va, 0, cr, NULL);
 620         if (error)
 621                 goto out;
 622 
 623         vap = &va;
 624 
 625         resp->resok.access = 0;
 626 
 627         if (is_system_labeled()) {
 628                 bslabel_t *clabel = req->rq_label;
 629 
 630                 ASSERT(clabel != NULL);
 631                 DTRACE_PROBE2(tx__rfs3__log__info__opaccess__clabel, char *,
 632                     "got client label from request(1)", struct svc_req *, req);
 633 
 634                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
 635                         if ((equal_label = do_rfs_label_check(clabel, vp,
 636                             EQUALITY_CHECK, exi)) == B_FALSE) {
 637                                 dominant_label = do_rfs_label_check(clabel,
 638                                     vp, DOMINANCE_CHECK, exi);
 639                         } else
 640                                 dominant_label = B_TRUE;
 641                         admin_low_client = B_FALSE;
 642                 } else
 643                         admin_low_client = B_TRUE;
 644         }
 645 
 646         if (args->access & ACCESS3_READ) {
 647                 error = VOP_ACCESS(vp, VREAD, 0, cr, NULL);
 648                 if (error) {
 649                         if (curthread->t_flag & T_WOULDBLOCK)
 650                                 goto out;
 651                 } else if (!MANDLOCK(vp, va.va_mode) &&
 652                     (!is_system_labeled() || admin_low_client ||
 653                     dominant_label))
 654                         resp->resok.access |= ACCESS3_READ;
 655         }
 656         if ((args->access & ACCESS3_LOOKUP) && vp->v_type == VDIR) {
 657                 error = VOP_ACCESS(vp, VEXEC, 0, cr, NULL);
 658                 if (error) {
 659                         if (curthread->t_flag & T_WOULDBLOCK)
 660                                 goto out;
 661                 } else if (!is_system_labeled() || admin_low_client ||
 662                     dominant_label)
 663                         resp->resok.access |= ACCESS3_LOOKUP;
 664         }
 665         if (checkwriteperm &&
 666             (args->access & (ACCESS3_MODIFY|ACCESS3_EXTEND))) {
 667                 error = VOP_ACCESS(vp, VWRITE, 0, cr, NULL);
 668                 if (error) {
 669                         if (curthread->t_flag & T_WOULDBLOCK)
 670                                 goto out;
 671                 } else if (!MANDLOCK(vp, va.va_mode) &&
 672                     (!is_system_labeled() || admin_low_client || equal_label)) {
 673                         resp->resok.access |=
 674                             (args->access & (ACCESS3_MODIFY|ACCESS3_EXTEND));
 675                 }
 676         }
 677         if (checkwriteperm &&
 678             (args->access & ACCESS3_DELETE) && vp->v_type == VDIR) {
 679                 error = VOP_ACCESS(vp, VWRITE, 0, cr, NULL);
 680                 if (error) {
 681                         if (curthread->t_flag & T_WOULDBLOCK)
 682                                 goto out;
 683                 } else if (!is_system_labeled() || admin_low_client ||
 684                     equal_label)
 685                         resp->resok.access |= ACCESS3_DELETE;
 686         }
 687         if (args->access & ACCESS3_EXECUTE) {
 688                 error = VOP_ACCESS(vp, VEXEC, 0, cr, NULL);
 689                 if (error) {
 690                         if (curthread->t_flag & T_WOULDBLOCK)
 691                                 goto out;
 692                 } else if (!MANDLOCK(vp, va.va_mode) &&
 693                     (!is_system_labeled() || admin_low_client ||
 694                     dominant_label))
 695                         resp->resok.access |= ACCESS3_EXECUTE;
 696         }
 697 
 698         va.va_mask = AT_ALL;
 699         vap = rfs4_delegated_getattr(vp, &va, 0, cr) ? NULL : &va;
 700 
 701         resp->status = NFS3_OK;
 702         vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
 703 
 704         DTRACE_NFSV3_4(op__access__done, struct svc_req *, req,
 705             cred_t *, cr, vnode_t *, vp, ACCESS3res *, resp);
 706 
 707         VN_RELE(vp);
 708 
 709         return;
 710 
 711 out:
 712         if (curthread->t_flag & T_WOULDBLOCK) {
 713                 curthread->t_flag &= ~T_WOULDBLOCK;
 714                 resp->status = NFS3ERR_JUKEBOX;
 715         } else
 716                 resp->status = puterrno3(error);
 717         DTRACE_NFSV3_4(op__access__done, struct svc_req *, req,
 718             cred_t *, cr, vnode_t *, vp, ACCESS3res *, resp);
 719         if (vp != NULL)
 720                 VN_RELE(vp);
 721         vattr_to_post_op_attr(vap, &resp->resfail.obj_attributes);
 722 }
 723 
 724 void *
 725 rfs3_access_getfh(ACCESS3args *args)
 726 {
 727 
 728         return (&args->object);
 729 }
 730 
 731 /* ARGSUSED */
 732 void
 733 rfs3_readlink(READLINK3args *args, READLINK3res *resp, struct exportinfo *exi,
 734         struct svc_req *req, cred_t *cr)
 735 {
 736         int error;
 737         vnode_t *vp;
 738         struct vattr *vap;
 739         struct vattr va;
 740         struct iovec iov;
 741         struct uio uio;
 742         char *data;
 743         struct sockaddr *ca;
 744         char *name = NULL;
 745         int is_referral = 0;
 746 
 747         vap = NULL;
 748 
 749         vp = nfs3_fhtovp(&args->symlink, exi);
 750 
 751         DTRACE_NFSV3_4(op__readlink__start, struct svc_req *, req,
 752             cred_t *, cr, vnode_t *, vp, READLINK3args *, args);
 753 
 754         if (vp == NULL) {
 755                 error = ESTALE;
 756                 goto out;
 757         }
 758 
 759         va.va_mask = AT_ALL;
 760         error = VOP_GETATTR(vp, &va, 0, cr, NULL);
 761         if (error)
 762                 goto out;
 763 
 764         vap = &va;
 765 
 766         /* We lied about the object type for a referral */
 767         if (vn_is_nfs_reparse(vp, cr))
 768                 is_referral = 1;
 769 
 770         if (vp->v_type != VLNK && !is_referral) {
 771                 resp->status = NFS3ERR_INVAL;
 772                 goto out1;
 773         }
 774 
 775         if (MANDLOCK(vp, va.va_mode)) {
 776                 resp->status = NFS3ERR_ACCES;
 777                 goto out1;
 778         }
 779 
 780         if (is_system_labeled()) {
 781                 bslabel_t *clabel = req->rq_label;
 782 
 783                 ASSERT(clabel != NULL);
 784                 DTRACE_PROBE2(tx__rfs3__log__info__opreadlink__clabel, char *,
 785                     "got client label from request(1)", struct svc_req *, req);
 786 
 787                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
 788                         if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
 789                             exi)) {
 790                                 resp->status = NFS3ERR_ACCES;
 791                                 goto out1;
 792                         }
 793                 }
 794         }
 795 
 796         data = kmem_alloc(MAXPATHLEN + 1, KM_SLEEP);
 797 
 798         if (is_referral) {
 799                 char *s;
 800                 size_t strsz;
 801 
 802                 /* Get an artificial symlink based on a referral */
 803                 s = build_symlink(vp, cr, &strsz);
 804                 global_svstat_ptr[3][NFS_REFERLINKS].value.ui64++;
 805                 DTRACE_PROBE2(nfs3serv__func__referral__reflink,
 806                     vnode_t *, vp, char *, s);
 807                 if (s == NULL)
 808                         error = EINVAL;
 809                 else {
 810                         error = 0;
 811                         (void) strlcpy(data, s, MAXPATHLEN + 1);
 812                         kmem_free(s, strsz);
 813                 }
 814 
 815         } else {
 816 
 817                 iov.iov_base = data;
 818                 iov.iov_len = MAXPATHLEN;
 819                 uio.uio_iov = &iov;
 820                 uio.uio_iovcnt = 1;
 821                 uio.uio_segflg = UIO_SYSSPACE;
 822                 uio.uio_extflg = UIO_COPY_CACHED;
 823                 uio.uio_loffset = 0;
 824                 uio.uio_resid = MAXPATHLEN;
 825 
 826                 error = VOP_READLINK(vp, &uio, cr, NULL);
 827 
 828                 if (!error)
 829                         *(data + MAXPATHLEN - uio.uio_resid) = '\0';
 830         }
 831 
 832         va.va_mask = AT_ALL;
 833         vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
 834 
 835         /* Lie about object type again just to be consistent */
 836         if (is_referral && vap != NULL)
 837                 vap->va_type = VLNK;
 838 
 839 #if 0 /* notyet */
 840         /*
 841          * Don't do this.  It causes local disk writes when just
 842          * reading the file and the overhead is deemed larger
 843          * than the benefit.
 844          */
 845         /*
 846          * Force modified metadata out to stable storage.
 847          */
 848         (void) VOP_FSYNC(vp, FNODSYNC, cr, NULL);
 849 #endif
 850 
 851         if (error) {
 852                 kmem_free(data, MAXPATHLEN + 1);
 853                 goto out;
 854         }
 855 
 856         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
 857         name = nfscmd_convname(ca, exi, data, NFSCMD_CONV_OUTBOUND,
 858             MAXPATHLEN + 1);
 859 
 860         if (name == NULL) {
 861                 /*
 862                  * Even though the conversion failed, we return
 863                  * something. We just don't translate it.
 864                  */
 865                 name = data;
 866         }
 867 
 868         resp->status = NFS3_OK;
 869         vattr_to_post_op_attr(vap, &resp->resok.symlink_attributes);
 870         resp->resok.data = name;
 871 
 872         DTRACE_NFSV3_4(op__readlink__done, struct svc_req *, req,
 873             cred_t *, cr, vnode_t *, vp, READLINK3res *, resp);
 874         VN_RELE(vp);
 875 
 876         if (name != data)
 877                 kmem_free(data, MAXPATHLEN + 1);
 878 
 879         return;
 880 
 881 out:
 882         if (curthread->t_flag & T_WOULDBLOCK) {
 883                 curthread->t_flag &= ~T_WOULDBLOCK;
 884                 resp->status = NFS3ERR_JUKEBOX;
 885         } else
 886                 resp->status = puterrno3(error);
 887 out1:
 888         DTRACE_NFSV3_4(op__readlink__done, struct svc_req *, req,
 889             cred_t *, cr, vnode_t *, vp, READLINK3res *, resp);
 890         if (vp != NULL)
 891                 VN_RELE(vp);
 892         vattr_to_post_op_attr(vap, &resp->resfail.symlink_attributes);
 893 }
 894 
 895 void *
 896 rfs3_readlink_getfh(READLINK3args *args)
 897 {
 898 
 899         return (&args->symlink);
 900 }
 901 
 902 void
 903 rfs3_readlink_free(READLINK3res *resp)
 904 {
 905 
 906         if (resp->status == NFS3_OK)
 907                 kmem_free(resp->resok.data, MAXPATHLEN + 1);
 908 }
 909 
 910 /*
 911  * Server routine to handle read
 912  * May handle RDMA data as well as mblks
 913  */
 914 /* ARGSUSED */
 915 void
 916 rfs3_read(READ3args *args, READ3res *resp, struct exportinfo *exi,
 917         struct svc_req *req, cred_t *cr)
 918 {
 919         int error;
 920         vnode_t *vp;
 921         struct vattr *vap;
 922         struct vattr va;
 923         struct iovec iov;
 924         struct uio uio;
 925         u_offset_t offset;
 926         mblk_t *mp = NULL;
 927         int alloc_err = 0;
 928         int in_crit = 0;
 929         int need_rwunlock = 0;
 930         caller_context_t ct;
 931         int rdma_used = 0;
 932         int loaned_buffers;
 933         struct uio *uiop;
 934 
 935         vap = NULL;
 936 
 937         vp = nfs3_fhtovp(&args->file, exi);
 938 
 939         DTRACE_NFSV3_4(op__read__start, struct svc_req *, req,
 940             cred_t *, cr, vnode_t *, vp, READ3args *, args);
 941 
 942         if (vp == NULL) {
 943                 error = ESTALE;
 944                 goto out;
 945         }
 946 
 947         if (args->wlist) {
 948                 if (args->count > clist_len(args->wlist)) {
 949                         error = EINVAL;
 950                         goto out;
 951                 }
 952                 rdma_used = 1;
 953         }
 954 
 955         /* use loaned buffers for TCP */
 956         loaned_buffers = (nfs_loaned_buffers && !rdma_used) ? 1 : 0;
 957 
 958         if (is_system_labeled()) {
 959                 bslabel_t *clabel = req->rq_label;
 960 
 961                 ASSERT(clabel != NULL);
 962                 DTRACE_PROBE2(tx__rfs3__log__info__opread__clabel, char *,
 963                     "got client label from request(1)", struct svc_req *, req);
 964 
 965                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
 966                         if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
 967                             exi)) {
 968                                 resp->status = NFS3ERR_ACCES;
 969                                 goto out1;
 970                         }
 971                 }
 972         }
 973 
 974         ct.cc_sysid = 0;
 975         ct.cc_pid = 0;
 976         ct.cc_caller_id = nfs3_srv_caller_id;
 977         ct.cc_flags = CC_DONTBLOCK;
 978 
 979         /*
 980          * Enter the critical region before calling VOP_RWLOCK
 981          * to avoid a deadlock with write requests.
 982          */
 983         if (nbl_need_check(vp)) {
 984                 nbl_start_crit(vp, RW_READER);
 985                 in_crit = 1;
 986                 if (nbl_conflict(vp, NBL_READ, args->offset, args->count, 0,
 987                     NULL)) {
 988                         error = EACCES;
 989                         goto out;
 990                 }
 991         }
 992 
 993         error = VOP_RWLOCK(vp, V_WRITELOCK_FALSE, &ct);
 994 
 995         /* check if a monitor detected a delegation conflict */
 996         if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
 997                 resp->status = NFS3ERR_JUKEBOX;
 998                 goto out1;
 999         }
1000 
1001         need_rwunlock = 1;
1002 
1003         va.va_mask = AT_ALL;
1004         error = VOP_GETATTR(vp, &va, 0, cr, &ct);
1005 
1006         /*
1007          * If we can't get the attributes, then we can't do the
1008          * right access checking.  So, we'll fail the request.
1009          */
1010         if (error)
1011                 goto out;
1012 
1013         vap = &va;
1014 
1015         if (vp->v_type != VREG) {
1016                 resp->status = NFS3ERR_INVAL;
1017                 goto out1;
1018         }
1019 
1020         if (crgetuid(cr) != va.va_uid) {
1021                 error = VOP_ACCESS(vp, VREAD, 0, cr, &ct);
1022                 if (error) {
1023                         if (curthread->t_flag & T_WOULDBLOCK)
1024                                 goto out;
1025                         error = VOP_ACCESS(vp, VEXEC, 0, cr, &ct);
1026                         if (error)
1027                                 goto out;
1028                 }
1029         }
1030 
1031         if (MANDLOCK(vp, va.va_mode)) {
1032                 resp->status = NFS3ERR_ACCES;
1033                 goto out1;
1034         }
1035 
1036         offset = args->offset;
1037         if (offset >= va.va_size) {
1038                 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
1039                 if (in_crit)
1040                         nbl_end_crit(vp);
1041                 resp->status = NFS3_OK;
1042                 vattr_to_post_op_attr(vap, &resp->resok.file_attributes);
1043                 resp->resok.count = 0;
1044                 resp->resok.eof = TRUE;
1045                 resp->resok.data.data_len = 0;
1046                 resp->resok.data.data_val = NULL;
1047                 resp->resok.data.mp = NULL;
1048                 /* RDMA */
1049                 resp->resok.wlist = args->wlist;
1050                 resp->resok.wlist_len = resp->resok.count;
1051                 if (resp->resok.wlist)
1052                         clist_zero_len(resp->resok.wlist);
1053                 goto done;
1054         }
1055 
1056         if (args->count == 0) {
1057                 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
1058                 if (in_crit)
1059                         nbl_end_crit(vp);
1060                 resp->status = NFS3_OK;
1061                 vattr_to_post_op_attr(vap, &resp->resok.file_attributes);
1062                 resp->resok.count = 0;
1063                 resp->resok.eof = FALSE;
1064                 resp->resok.data.data_len = 0;
1065                 resp->resok.data.data_val = NULL;
1066                 resp->resok.data.mp = NULL;
1067                 /* RDMA */
1068                 resp->resok.wlist = args->wlist;
1069                 resp->resok.wlist_len = resp->resok.count;
1070                 if (resp->resok.wlist)
1071                         clist_zero_len(resp->resok.wlist);
1072                 goto done;
1073         }
1074 
1075         /*
1076          * do not allocate memory more the max. allowed
1077          * transfer size
1078          */
1079         if (args->count > rfs3_tsize(req))
1080                 args->count = rfs3_tsize(req);
1081 
1082         if (loaned_buffers) {
1083                 uiop = (uio_t *)rfs_setup_xuio(vp);
1084                 ASSERT(uiop != NULL);
1085                 uiop->uio_segflg = UIO_SYSSPACE;
1086                 uiop->uio_loffset = args->offset;
1087                 uiop->uio_resid = args->count;
1088 
1089                 /* Jump to do the read if successful */
1090                 if (VOP_REQZCBUF(vp, UIO_READ, (xuio_t *)uiop, cr, &ct) == 0) {
1091                         /*
1092                          * Need to hold the vnode until after VOP_RETZCBUF()
1093                          * is called.
1094                          */
1095                         VN_HOLD(vp);
1096                         goto doio_read;
1097                 }
1098 
1099                 DTRACE_PROBE2(nfss__i__reqzcbuf_failed, int,
1100                     uiop->uio_loffset, int, uiop->uio_resid);
1101 
1102                 uiop->uio_extflg = 0;
1103                 /* failure to setup for zero copy */
1104                 rfs_free_xuio((void *)uiop);
1105                 loaned_buffers = 0;
1106         }
1107 
1108         /*
1109          * If returning data via RDMA Write, then grab the chunk list.
1110          * If we aren't returning READ data w/RDMA_WRITE, then grab
1111          * a mblk.
1112          */
1113         if (rdma_used) {
1114                 (void) rdma_get_wchunk(req, &iov, args->wlist);
1115         } else {
1116                 /*
1117                  * mp will contain the data to be sent out in the read reply.
1118                  * This will be freed after the reply has been sent out (by the
1119                  * driver).
1120                  * Let's roundup the data to a BYTES_PER_XDR_UNIT multiple, so
1121                  * that the call to xdrmblk_putmblk() never fails.
1122                  */
1123                 mp = allocb_wait(RNDUP(args->count), BPRI_MED, STR_NOSIG,
1124                     &alloc_err);
1125                 ASSERT(mp != NULL);
1126                 ASSERT(alloc_err == 0);
1127 
1128                 iov.iov_base = (caddr_t)mp->b_datap->db_base;
1129                 iov.iov_len = args->count;
1130         }
1131 
1132         uio.uio_iov = &iov;
1133         uio.uio_iovcnt = 1;
1134         uio.uio_segflg = UIO_SYSSPACE;
1135         uio.uio_extflg = UIO_COPY_CACHED;
1136         uio.uio_loffset = args->offset;
1137         uio.uio_resid = args->count;
1138         uiop = &uio;
1139 
1140 doio_read:
1141         error = VOP_READ(vp, uiop, 0, cr, &ct);
1142 
1143         if (error) {
1144                 if (mp)
1145                         freemsg(mp);
1146                 /* check if a monitor detected a delegation conflict */
1147                 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
1148                         resp->status = NFS3ERR_JUKEBOX;
1149                         goto out1;
1150                 }
1151                 goto out;
1152         }
1153 
1154         /* make mblk using zc buffers */
1155         if (loaned_buffers) {
1156                 mp = uio_to_mblk(uiop);
1157                 ASSERT(mp != NULL);
1158         }
1159 
1160         va.va_mask = AT_ALL;
1161         error = VOP_GETATTR(vp, &va, 0, cr, &ct);
1162 
1163         if (error)
1164                 vap = NULL;
1165         else
1166                 vap = &va;
1167 
1168         VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
1169 
1170         if (in_crit)
1171                 nbl_end_crit(vp);
1172 
1173         resp->status = NFS3_OK;
1174         vattr_to_post_op_attr(vap, &resp->resok.file_attributes);
1175         resp->resok.count = args->count - uiop->uio_resid;
1176         if (!error && offset + resp->resok.count == va.va_size)
1177                 resp->resok.eof = TRUE;
1178         else
1179                 resp->resok.eof = FALSE;
1180         resp->resok.data.data_len = resp->resok.count;
1181 
1182         if (mp)
1183                 rfs_rndup_mblks(mp, resp->resok.count, loaned_buffers);
1184 
1185         resp->resok.data.mp = mp;
1186         resp->resok.size = (uint_t)args->count;
1187 
1188         if (rdma_used) {
1189                 resp->resok.data.data_val = (caddr_t)iov.iov_base;
1190                 if (!rdma_setup_read_data3(args, &(resp->resok))) {
1191                         resp->status = NFS3ERR_INVAL;
1192                 }
1193         } else {
1194                 resp->resok.data.data_val = (caddr_t)mp->b_datap->db_base;
1195                 (resp->resok).wlist = NULL;
1196         }
1197 
1198 done:
1199         DTRACE_NFSV3_4(op__read__done, struct svc_req *, req,
1200             cred_t *, cr, vnode_t *, vp, READ3res *, resp);
1201 
1202         VN_RELE(vp);
1203 
1204         return;
1205 
1206 out:
1207         if (curthread->t_flag & T_WOULDBLOCK) {
1208                 curthread->t_flag &= ~T_WOULDBLOCK;
1209                 resp->status = NFS3ERR_JUKEBOX;
1210         } else
1211                 resp->status = puterrno3(error);
1212 out1:
1213         DTRACE_NFSV3_4(op__read__done, struct svc_req *, req,
1214             cred_t *, cr, vnode_t *, vp, READ3res *, resp);
1215 
1216         if (vp != NULL) {
1217                 if (need_rwunlock)
1218                         VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
1219                 if (in_crit)
1220                         nbl_end_crit(vp);
1221                 VN_RELE(vp);
1222         }
1223         vattr_to_post_op_attr(vap, &resp->resfail.file_attributes);
1224 }
1225 
1226 void
1227 rfs3_read_free(READ3res *resp)
1228 {
1229         mblk_t *mp;
1230 
1231         if (resp->status == NFS3_OK) {
1232                 mp = resp->resok.data.mp;
1233                 if (mp != NULL)
1234                         freemsg(mp);
1235         }
1236 }
1237 
1238 void *
1239 rfs3_read_getfh(READ3args *args)
1240 {
1241 
1242         return (&args->file);
1243 }
1244 
1245 #define MAX_IOVECS      12
1246 
1247 #ifdef DEBUG
1248 static int rfs3_write_hits = 0;
1249 static int rfs3_write_misses = 0;
1250 #endif
1251 
1252 void
1253 rfs3_write(WRITE3args *args, WRITE3res *resp, struct exportinfo *exi,
1254         struct svc_req *req, cred_t *cr)
1255 {
1256         int error;
1257         vnode_t *vp;
1258         struct vattr *bvap = NULL;
1259         struct vattr bva;
1260         struct vattr *avap = NULL;
1261         struct vattr ava;
1262         u_offset_t rlimit;
1263         struct uio uio;
1264         struct iovec iov[MAX_IOVECS];
1265         mblk_t *m;
1266         struct iovec *iovp;
1267         int iovcnt;
1268         int ioflag;
1269         cred_t *savecred;
1270         int in_crit = 0;
1271         int rwlock_ret = -1;
1272         caller_context_t ct;
1273 
1274         vp = nfs3_fhtovp(&args->file, exi);
1275 
1276         DTRACE_NFSV3_4(op__write__start, struct svc_req *, req,
1277             cred_t *, cr, vnode_t *, vp, WRITE3args *, args);
1278 
1279         if (vp == NULL) {
1280                 error = ESTALE;
1281                 goto err;
1282         }
1283 
1284         if (is_system_labeled()) {
1285                 bslabel_t *clabel = req->rq_label;
1286 
1287                 ASSERT(clabel != NULL);
1288                 DTRACE_PROBE2(tx__rfs3__log__info__opwrite__clabel, char *,
1289                     "got client label from request(1)", struct svc_req *, req);
1290 
1291                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
1292                         if (!do_rfs_label_check(clabel, vp, EQUALITY_CHECK,
1293                             exi)) {
1294                                 resp->status = NFS3ERR_ACCES;
1295                                 goto err1;
1296                         }
1297                 }
1298         }
1299 
1300         ct.cc_sysid = 0;
1301         ct.cc_pid = 0;
1302         ct.cc_caller_id = nfs3_srv_caller_id;
1303         ct.cc_flags = CC_DONTBLOCK;
1304 
1305         /*
1306          * We have to enter the critical region before calling VOP_RWLOCK
1307          * to avoid a deadlock with ufs.
1308          */
1309         if (nbl_need_check(vp)) {
1310                 nbl_start_crit(vp, RW_READER);
1311                 in_crit = 1;
1312                 if (nbl_conflict(vp, NBL_WRITE, args->offset, args->count, 0,
1313                     NULL)) {
1314                         error = EACCES;
1315                         goto err;
1316                 }
1317         }
1318 
1319         rwlock_ret = VOP_RWLOCK(vp, V_WRITELOCK_TRUE, &ct);
1320 
1321         /* check if a monitor detected a delegation conflict */
1322         if (rwlock_ret == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
1323                 resp->status = NFS3ERR_JUKEBOX;
1324                 rwlock_ret = -1;
1325                 goto err1;
1326         }
1327 
1328 
1329         bva.va_mask = AT_ALL;
1330         error = VOP_GETATTR(vp, &bva, 0, cr, &ct);
1331 
1332         /*
1333          * If we can't get the attributes, then we can't do the
1334          * right access checking.  So, we'll fail the request.
1335          */
1336         if (error)
1337                 goto err;
1338 
1339         bvap = &bva;
1340         avap = bvap;
1341 
1342         if (args->count != args->data.data_len) {
1343                 resp->status = NFS3ERR_INVAL;
1344                 goto err1;
1345         }
1346 
1347         if (rdonly(exi, req)) {
1348                 resp->status = NFS3ERR_ROFS;
1349                 goto err1;
1350         }
1351 
1352         if (vp->v_type != VREG) {
1353                 resp->status = NFS3ERR_INVAL;
1354                 goto err1;
1355         }
1356 
1357         if (crgetuid(cr) != bva.va_uid &&
1358             (error = VOP_ACCESS(vp, VWRITE, 0, cr, &ct)))
1359                 goto err;
1360 
1361         if (MANDLOCK(vp, bva.va_mode)) {
1362                 resp->status = NFS3ERR_ACCES;
1363                 goto err1;
1364         }
1365 
1366         if (args->count == 0) {
1367                 resp->status = NFS3_OK;
1368                 vattr_to_wcc_data(bvap, avap, &resp->resok.file_wcc);
1369                 resp->resok.count = 0;
1370                 resp->resok.committed = args->stable;
1371                 resp->resok.verf = write3verf;
1372                 goto out;
1373         }
1374 
1375         if (args->mblk != NULL) {
1376                 iovcnt = 0;
1377                 for (m = args->mblk; m != NULL; m = m->b_cont)
1378                         iovcnt++;
1379                 if (iovcnt <= MAX_IOVECS) {
1380 #ifdef DEBUG
1381                         rfs3_write_hits++;
1382 #endif
1383                         iovp = iov;
1384                 } else {
1385 #ifdef DEBUG
1386                         rfs3_write_misses++;
1387 #endif
1388                         iovp = kmem_alloc(sizeof (*iovp) * iovcnt, KM_SLEEP);
1389                 }
1390                 mblk_to_iov(args->mblk, iovcnt, iovp);
1391 
1392         } else if (args->rlist != NULL) {
1393                 iovcnt = 1;
1394                 iovp = iov;
1395                 iovp->iov_base = (char *)((args->rlist)->u.c_daddr3);
1396                 iovp->iov_len = args->count;
1397         } else {
1398                 iovcnt = 1;
1399                 iovp = iov;
1400                 iovp->iov_base = args->data.data_val;
1401                 iovp->iov_len = args->count;
1402         }
1403 
1404         uio.uio_iov = iovp;
1405         uio.uio_iovcnt = iovcnt;
1406 
1407         uio.uio_segflg = UIO_SYSSPACE;
1408         uio.uio_extflg = UIO_COPY_DEFAULT;
1409         uio.uio_loffset = args->offset;
1410         uio.uio_resid = args->count;
1411         uio.uio_llimit = curproc->p_fsz_ctl;
1412         rlimit = uio.uio_llimit - args->offset;
1413         if (rlimit < (u_offset_t)uio.uio_resid)
1414                 uio.uio_resid = (int)rlimit;
1415 
1416         if (args->stable == UNSTABLE)
1417                 ioflag = 0;
1418         else if (args->stable == FILE_SYNC)
1419                 ioflag = FSYNC;
1420         else if (args->stable == DATA_SYNC)
1421                 ioflag = FDSYNC;
1422         else {
1423                 if (iovp != iov)
1424                         kmem_free(iovp, sizeof (*iovp) * iovcnt);
1425                 resp->status = NFS3ERR_INVAL;
1426                 goto err1;
1427         }
1428 
1429         /*
1430          * We're changing creds because VM may fault and we need
1431          * the cred of the current thread to be used if quota
1432          * checking is enabled.
1433          */
1434         savecred = curthread->t_cred;
1435         curthread->t_cred = cr;
1436         error = VOP_WRITE(vp, &uio, ioflag, cr, &ct);
1437         curthread->t_cred = savecred;
1438 
1439         if (iovp != iov)
1440                 kmem_free(iovp, sizeof (*iovp) * iovcnt);
1441 
1442         /* check if a monitor detected a delegation conflict */
1443         if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
1444                 resp->status = NFS3ERR_JUKEBOX;
1445                 goto err1;
1446         }
1447 
1448         ava.va_mask = AT_ALL;
1449         avap = VOP_GETATTR(vp, &ava, 0, cr, &ct) ? NULL : &ava;
1450 
1451         if (error)
1452                 goto err;
1453 
1454         /*
1455          * If we were unable to get the V_WRITELOCK_TRUE, then we
1456          * may not have accurate after attrs, so check if
1457          * we have both attributes, they have a non-zero va_seq, and
1458          * va_seq has changed by exactly one,
1459          * if not, turn off the before attr.
1460          */
1461         if (rwlock_ret != V_WRITELOCK_TRUE) {
1462                 if (bvap == NULL || avap == NULL ||
1463                     bvap->va_seq == 0 || avap->va_seq == 0 ||
1464                     avap->va_seq != (bvap->va_seq + 1)) {
1465                         bvap = NULL;
1466                 }
1467         }
1468 
1469         resp->status = NFS3_OK;
1470         vattr_to_wcc_data(bvap, avap, &resp->resok.file_wcc);
1471         resp->resok.count = args->count - uio.uio_resid;
1472         resp->resok.committed = args->stable;
1473         resp->resok.verf = write3verf;
1474         goto out;
1475 
1476 err:
1477         if (curthread->t_flag & T_WOULDBLOCK) {
1478                 curthread->t_flag &= ~T_WOULDBLOCK;
1479                 resp->status = NFS3ERR_JUKEBOX;
1480         } else
1481                 resp->status = puterrno3(error);
1482 err1:
1483         vattr_to_wcc_data(bvap, avap, &resp->resfail.file_wcc);
1484 out:
1485         DTRACE_NFSV3_4(op__write__done, struct svc_req *, req,
1486             cred_t *, cr, vnode_t *, vp, WRITE3res *, resp);
1487 
1488         if (vp != NULL) {
1489                 if (rwlock_ret != -1)
1490                         VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, &ct);
1491                 if (in_crit)
1492                         nbl_end_crit(vp);
1493                 VN_RELE(vp);
1494         }
1495 }
1496 
1497 void *
1498 rfs3_write_getfh(WRITE3args *args)
1499 {
1500 
1501         return (&args->file);
1502 }
1503 
1504 void
1505 rfs3_create(CREATE3args *args, CREATE3res *resp, struct exportinfo *exi,
1506         struct svc_req *req, cred_t *cr)
1507 {
1508         int error;
1509         int in_crit = 0;
1510         vnode_t *vp;
1511         vnode_t *tvp = NULL;
1512         vnode_t *dvp;
1513         struct vattr *vap;
1514         struct vattr va;
1515         struct vattr *dbvap;
1516         struct vattr dbva;
1517         struct vattr *davap;
1518         struct vattr dava;
1519         enum vcexcl excl;
1520         nfstime3 *mtime;
1521         len_t reqsize;
1522         bool_t trunc;
1523         struct sockaddr *ca;
1524         char *name = NULL;
1525 
1526         dbvap = NULL;
1527         davap = NULL;
1528 
1529         dvp = nfs3_fhtovp(&args->where.dir, exi);
1530 
1531         DTRACE_NFSV3_4(op__create__start, struct svc_req *, req,
1532             cred_t *, cr, vnode_t *, dvp, CREATE3args *, args);
1533 
1534         if (dvp == NULL) {
1535                 error = ESTALE;
1536                 goto out;
1537         }
1538 
1539         dbva.va_mask = AT_ALL;
1540         dbvap = VOP_GETATTR(dvp, &dbva, 0, cr, NULL) ? NULL : &dbva;
1541         davap = dbvap;
1542 
1543         if (args->where.name == nfs3nametoolong) {
1544                 resp->status = NFS3ERR_NAMETOOLONG;
1545                 goto out1;
1546         }
1547 
1548         if (args->where.name == NULL || *(args->where.name) == '\0') {
1549                 resp->status = NFS3ERR_ACCES;
1550                 goto out1;
1551         }
1552 
1553         if (rdonly(exi, req)) {
1554                 resp->status = NFS3ERR_ROFS;
1555                 goto out1;
1556         }
1557 
1558         if (is_system_labeled()) {
1559                 bslabel_t *clabel = req->rq_label;
1560 
1561                 ASSERT(clabel != NULL);
1562                 DTRACE_PROBE2(tx__rfs3__log__info__opcreate__clabel, char *,
1563                     "got client label from request(1)", struct svc_req *, req);
1564 
1565                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
1566                         if (!do_rfs_label_check(clabel, dvp, EQUALITY_CHECK,
1567                             exi)) {
1568                                 resp->status = NFS3ERR_ACCES;
1569                                 goto out1;
1570                         }
1571                 }
1572         }
1573 
1574         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
1575         name = nfscmd_convname(ca, exi, args->where.name,
1576             NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
1577 
1578         if (name == NULL) {
1579                 /* This is really a Solaris EILSEQ */
1580                 resp->status = NFS3ERR_INVAL;
1581                 goto out1;
1582         }
1583 
1584         if (args->how.mode == EXCLUSIVE) {
1585                 va.va_mask = AT_TYPE | AT_MODE | AT_MTIME;
1586                 va.va_type = VREG;
1587                 va.va_mode = (mode_t)0;
1588                 /*
1589                  * Ensure no time overflows and that types match
1590                  */
1591                 mtime = (nfstime3 *)&args->how.createhow3_u.verf;
1592                 va.va_mtime.tv_sec = mtime->seconds % INT32_MAX;
1593                 va.va_mtime.tv_nsec = mtime->nseconds;
1594                 excl = EXCL;
1595         } else {
1596                 error = sattr3_to_vattr(&args->how.createhow3_u.obj_attributes,
1597                     &va);
1598                 if (error)
1599                         goto out;
1600                 va.va_mask |= AT_TYPE;
1601                 va.va_type = VREG;
1602                 if (args->how.mode == GUARDED)
1603                         excl = EXCL;
1604                 else {
1605                         excl = NONEXCL;
1606 
1607                         /*
1608                          * During creation of file in non-exclusive mode
1609                          * if size of file is being set then make sure
1610                          * that if the file already exists that no conflicting
1611                          * non-blocking mandatory locks exists in the region
1612                          * being modified. If there are conflicting locks fail
1613                          * the operation with EACCES.
1614                          */
1615                         if (va.va_mask & AT_SIZE) {
1616                                 struct vattr tva;
1617 
1618                                 /*
1619                                  * Does file already exist?
1620                                  */
1621                                 error = VOP_LOOKUP(dvp, name, &tvp,
1622                                     NULL, 0, NULL, cr, NULL, NULL, NULL);
1623 
1624                                 /*
1625                                  * Check to see if the file has been delegated
1626                                  * to a v4 client.  If so, then begin recall of
1627                                  * the delegation and return JUKEBOX to allow
1628                                  * the client to retrasmit its request.
1629                                  */
1630 
1631                                 trunc = va.va_size == 0;
1632                                 if (!error &&
1633                                     rfs4_check_delegated(FWRITE, tvp, trunc)) {
1634                                         resp->status = NFS3ERR_JUKEBOX;
1635                                         goto out1;
1636                                 }
1637 
1638                                 /*
1639                                  * Check for NBMAND lock conflicts
1640                                  */
1641                                 if (!error && nbl_need_check(tvp)) {
1642                                         u_offset_t offset;
1643                                         ssize_t len;
1644 
1645                                         nbl_start_crit(tvp, RW_READER);
1646                                         in_crit = 1;
1647 
1648                                         tva.va_mask = AT_SIZE;
1649                                         error = VOP_GETATTR(tvp, &tva, 0, cr,
1650                                             NULL);
1651                                         /*
1652                                          * Can't check for conflicts, so return
1653                                          * error.
1654                                          */
1655                                         if (error)
1656                                                 goto out;
1657 
1658                                         offset = tva.va_size < va.va_size ?
1659                                             tva.va_size : va.va_size;
1660                                         len = tva.va_size < va.va_size ?
1661                                             va.va_size - tva.va_size :
1662                                             tva.va_size - va.va_size;
1663                                         if (nbl_conflict(tvp, NBL_WRITE,
1664                                             offset, len, 0, NULL)) {
1665                                                 error = EACCES;
1666                                                 goto out;
1667                                         }
1668                                 } else if (tvp) {
1669                                         VN_RELE(tvp);
1670                                         tvp = NULL;
1671                                 }
1672                         }
1673                 }
1674                 if (va.va_mask & AT_SIZE)
1675                         reqsize = va.va_size;
1676         }
1677 
1678         /*
1679          * Must specify the mode.
1680          */
1681         if (!(va.va_mask & AT_MODE)) {
1682                 resp->status = NFS3ERR_INVAL;
1683                 goto out1;
1684         }
1685 
1686         /*
1687          * If the filesystem is exported with nosuid, then mask off
1688          * the setuid and setgid bits.
1689          */
1690         if (va.va_type == VREG && (exi->exi_export.ex_flags & EX_NOSUID))
1691                 va.va_mode &= ~(VSUID | VSGID);
1692 
1693 tryagain:
1694         /*
1695          * The file open mode used is VWRITE.  If the client needs
1696          * some other semantic, then it should do the access checking
1697          * itself.  It would have been nice to have the file open mode
1698          * passed as part of the arguments.
1699          */
1700         error = VOP_CREATE(dvp, name, &va, excl, VWRITE,
1701             &vp, cr, 0, NULL, NULL);
1702 
1703         dava.va_mask = AT_ALL;
1704         davap = VOP_GETATTR(dvp, &dava, 0, cr, NULL) ? NULL : &dava;
1705 
1706         if (error) {
1707                 /*
1708                  * If we got something other than file already exists
1709                  * then just return this error.  Otherwise, we got
1710                  * EEXIST.  If we were doing a GUARDED create, then
1711                  * just return this error.  Otherwise, we need to
1712                  * make sure that this wasn't a duplicate of an
1713                  * exclusive create request.
1714                  *
1715                  * The assumption is made that a non-exclusive create
1716                  * request will never return EEXIST.
1717                  */
1718                 if (error != EEXIST || args->how.mode == GUARDED)
1719                         goto out;
1720                 /*
1721                  * Lookup the file so that we can get a vnode for it.
1722                  */
1723                 error = VOP_LOOKUP(dvp, name, &vp, NULL, 0,
1724                     NULL, cr, NULL, NULL, NULL);
1725                 if (error) {
1726                         /*
1727                          * We couldn't find the file that we thought that
1728                          * we just created.  So, we'll just try creating
1729                          * it again.
1730                          */
1731                         if (error == ENOENT)
1732                                 goto tryagain;
1733                         goto out;
1734                 }
1735 
1736                 /*
1737                  * If the file is delegated to a v4 client, go ahead
1738                  * and initiate recall, this create is a hint that a
1739                  * conflicting v3 open has occurred.
1740                  */
1741 
1742                 if (rfs4_check_delegated(FWRITE, vp, FALSE)) {
1743                         VN_RELE(vp);
1744                         resp->status = NFS3ERR_JUKEBOX;
1745                         goto out1;
1746                 }
1747 
1748                 va.va_mask = AT_ALL;
1749                 vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
1750 
1751                 mtime = (nfstime3 *)&args->how.createhow3_u.verf;
1752                 /* % with INT32_MAX to prevent overflows */
1753                 if (args->how.mode == EXCLUSIVE && (vap == NULL ||
1754                     vap->va_mtime.tv_sec !=
1755                     (mtime->seconds % INT32_MAX) ||
1756                     vap->va_mtime.tv_nsec != mtime->nseconds)) {
1757                         VN_RELE(vp);
1758                         error = EEXIST;
1759                         goto out;
1760                 }
1761         } else {
1762 
1763                 if ((args->how.mode == UNCHECKED ||
1764                     args->how.mode == GUARDED) &&
1765                     args->how.createhow3_u.obj_attributes.size.set_it &&
1766                     va.va_size == 0)
1767                         trunc = TRUE;
1768                 else
1769                         trunc = FALSE;
1770 
1771                 if (rfs4_check_delegated(FWRITE, vp, trunc)) {
1772                         VN_RELE(vp);
1773                         resp->status = NFS3ERR_JUKEBOX;
1774                         goto out1;
1775                 }
1776 
1777                 va.va_mask = AT_ALL;
1778                 vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
1779 
1780                 /*
1781                  * We need to check to make sure that the file got
1782                  * created to the indicated size.  If not, we do a
1783                  * setattr to try to change the size, but we don't
1784                  * try too hard.  This shouldn't a problem as most
1785                  * clients will only specifiy a size of zero which
1786                  * local file systems handle.  However, even if
1787                  * the client does specify a non-zero size, it can
1788                  * still recover by checking the size of the file
1789                  * after it has created it and then issue a setattr
1790                  * request of its own to set the size of the file.
1791                  */
1792                 if (vap != NULL &&
1793                     (args->how.mode == UNCHECKED ||
1794                     args->how.mode == GUARDED) &&
1795                     args->how.createhow3_u.obj_attributes.size.set_it &&
1796                     vap->va_size != reqsize) {
1797                         va.va_mask = AT_SIZE;
1798                         va.va_size = reqsize;
1799                         (void) VOP_SETATTR(vp, &va, 0, cr, NULL);
1800                         va.va_mask = AT_ALL;
1801                         vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
1802                 }
1803         }
1804 
1805         if (name != args->where.name)
1806                 kmem_free(name, MAXPATHLEN + 1);
1807 
1808         error = makefh3(&resp->resok.obj.handle, vp, exi);
1809         if (error)
1810                 resp->resok.obj.handle_follows = FALSE;
1811         else
1812                 resp->resok.obj.handle_follows = TRUE;
1813 
1814         /*
1815          * Force modified data and metadata out to stable storage.
1816          */
1817         (void) VOP_FSYNC(vp, FNODSYNC, cr, NULL);
1818         (void) VOP_FSYNC(dvp, 0, cr, NULL);
1819 
1820         VN_RELE(vp);
1821         if (tvp != NULL) {
1822                 if (in_crit)
1823                         nbl_end_crit(tvp);
1824                 VN_RELE(tvp);
1825         }
1826 
1827         resp->status = NFS3_OK;
1828         vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
1829         vattr_to_wcc_data(dbvap, davap, &resp->resok.dir_wcc);
1830 
1831         DTRACE_NFSV3_4(op__create__done, struct svc_req *, req,
1832             cred_t *, cr, vnode_t *, dvp, CREATE3res *, resp);
1833 
1834         VN_RELE(dvp);
1835         return;
1836 
1837 out:
1838         if (curthread->t_flag & T_WOULDBLOCK) {
1839                 curthread->t_flag &= ~T_WOULDBLOCK;
1840                 resp->status = NFS3ERR_JUKEBOX;
1841         } else
1842                 resp->status = puterrno3(error);
1843 out1:
1844         DTRACE_NFSV3_4(op__create__done, struct svc_req *, req,
1845             cred_t *, cr, vnode_t *, dvp, CREATE3res *, resp);
1846 
1847         if (name != NULL && name != args->where.name)
1848                 kmem_free(name, MAXPATHLEN + 1);
1849 
1850         if (tvp != NULL) {
1851                 if (in_crit)
1852                         nbl_end_crit(tvp);
1853                 VN_RELE(tvp);
1854         }
1855         if (dvp != NULL)
1856                 VN_RELE(dvp);
1857         vattr_to_wcc_data(dbvap, davap, &resp->resfail.dir_wcc);
1858 }
1859 
1860 void *
1861 rfs3_create_getfh(CREATE3args *args)
1862 {
1863 
1864         return (&args->where.dir);
1865 }
1866 
1867 void
1868 rfs3_mkdir(MKDIR3args *args, MKDIR3res *resp, struct exportinfo *exi,
1869         struct svc_req *req, cred_t *cr)
1870 {
1871         int error;
1872         vnode_t *vp = NULL;
1873         vnode_t *dvp;
1874         struct vattr *vap;
1875         struct vattr va;
1876         struct vattr *dbvap;
1877         struct vattr dbva;
1878         struct vattr *davap;
1879         struct vattr dava;
1880         struct sockaddr *ca;
1881         char *name = NULL;
1882 
1883         dbvap = NULL;
1884         davap = NULL;
1885 
1886         dvp = nfs3_fhtovp(&args->where.dir, exi);
1887 
1888         DTRACE_NFSV3_4(op__mkdir__start, struct svc_req *, req,
1889             cred_t *, cr, vnode_t *, dvp, MKDIR3args *, args);
1890 
1891         if (dvp == NULL) {
1892                 error = ESTALE;
1893                 goto out;
1894         }
1895 
1896         dbva.va_mask = AT_ALL;
1897         dbvap = VOP_GETATTR(dvp, &dbva, 0, cr, NULL) ? NULL : &dbva;
1898         davap = dbvap;
1899 
1900         if (args->where.name == nfs3nametoolong) {
1901                 resp->status = NFS3ERR_NAMETOOLONG;
1902                 goto out1;
1903         }
1904 
1905         if (args->where.name == NULL || *(args->where.name) == '\0') {
1906                 resp->status = NFS3ERR_ACCES;
1907                 goto out1;
1908         }
1909 
1910         if (rdonly(exi, req)) {
1911                 resp->status = NFS3ERR_ROFS;
1912                 goto out1;
1913         }
1914 
1915         if (is_system_labeled()) {
1916                 bslabel_t *clabel = req->rq_label;
1917 
1918                 ASSERT(clabel != NULL);
1919                 DTRACE_PROBE2(tx__rfs3__log__info__opmkdir__clabel, char *,
1920                     "got client label from request(1)", struct svc_req *, req);
1921 
1922                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
1923                         if (!do_rfs_label_check(clabel, dvp, EQUALITY_CHECK,
1924                             exi)) {
1925                                 resp->status = NFS3ERR_ACCES;
1926                                 goto out1;
1927                         }
1928                 }
1929         }
1930 
1931         error = sattr3_to_vattr(&args->attributes, &va);
1932         if (error)
1933                 goto out;
1934 
1935         if (!(va.va_mask & AT_MODE)) {
1936                 resp->status = NFS3ERR_INVAL;
1937                 goto out1;
1938         }
1939 
1940         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
1941         name = nfscmd_convname(ca, exi, args->where.name,
1942             NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
1943 
1944         if (name == NULL) {
1945                 resp->status = NFS3ERR_INVAL;
1946                 goto out1;
1947         }
1948 
1949         va.va_mask |= AT_TYPE;
1950         va.va_type = VDIR;
1951 
1952         error = VOP_MKDIR(dvp, name, &va, &vp, cr, NULL, 0, NULL);
1953 
1954         if (name != args->where.name)
1955                 kmem_free(name, MAXPATHLEN + 1);
1956 
1957         dava.va_mask = AT_ALL;
1958         davap = VOP_GETATTR(dvp, &dava, 0, cr, NULL) ? NULL : &dava;
1959 
1960         /*
1961          * Force modified data and metadata out to stable storage.
1962          */
1963         (void) VOP_FSYNC(dvp, 0, cr, NULL);
1964 
1965         if (error)
1966                 goto out;
1967 
1968         error = makefh3(&resp->resok.obj.handle, vp, exi);
1969         if (error)
1970                 resp->resok.obj.handle_follows = FALSE;
1971         else
1972                 resp->resok.obj.handle_follows = TRUE;
1973 
1974         va.va_mask = AT_ALL;
1975         vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
1976 
1977         /*
1978          * Force modified data and metadata out to stable storage.
1979          */
1980         (void) VOP_FSYNC(vp, 0, cr, NULL);
1981 
1982         VN_RELE(vp);
1983 
1984         resp->status = NFS3_OK;
1985         vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
1986         vattr_to_wcc_data(dbvap, davap, &resp->resok.dir_wcc);
1987 
1988         DTRACE_NFSV3_4(op__mkdir__done, struct svc_req *, req,
1989             cred_t *, cr, vnode_t *, dvp, MKDIR3res *, resp);
1990         VN_RELE(dvp);
1991 
1992         return;
1993 
1994 out:
1995         if (curthread->t_flag & T_WOULDBLOCK) {
1996                 curthread->t_flag &= ~T_WOULDBLOCK;
1997                 resp->status = NFS3ERR_JUKEBOX;
1998         } else
1999                 resp->status = puterrno3(error);
2000 out1:
2001         DTRACE_NFSV3_4(op__mkdir__done, struct svc_req *, req,
2002             cred_t *, cr, vnode_t *, dvp, MKDIR3res *, resp);
2003         if (dvp != NULL)
2004                 VN_RELE(dvp);
2005         vattr_to_wcc_data(dbvap, davap, &resp->resfail.dir_wcc);
2006 }
2007 
2008 void *
2009 rfs3_mkdir_getfh(MKDIR3args *args)
2010 {
2011 
2012         return (&args->where.dir);
2013 }
2014 
2015 void
2016 rfs3_symlink(SYMLINK3args *args, SYMLINK3res *resp, struct exportinfo *exi,
2017         struct svc_req *req, cred_t *cr)
2018 {
2019         int error;
2020         vnode_t *vp;
2021         vnode_t *dvp;
2022         struct vattr *vap;
2023         struct vattr va;
2024         struct vattr *dbvap;
2025         struct vattr dbva;
2026         struct vattr *davap;
2027         struct vattr dava;
2028         struct sockaddr *ca;
2029         char *name = NULL;
2030         char *symdata = NULL;
2031 
2032         dbvap = NULL;
2033         davap = NULL;
2034 
2035         dvp = nfs3_fhtovp(&args->where.dir, exi);
2036 
2037         DTRACE_NFSV3_4(op__symlink__start, struct svc_req *, req,
2038             cred_t *, cr, vnode_t *, dvp, SYMLINK3args *, args);
2039 
2040         if (dvp == NULL) {
2041                 error = ESTALE;
2042                 goto err;
2043         }
2044 
2045         dbva.va_mask = AT_ALL;
2046         dbvap = VOP_GETATTR(dvp, &dbva, 0, cr, NULL) ? NULL : &dbva;
2047         davap = dbvap;
2048 
2049         if (args->where.name == nfs3nametoolong) {
2050                 resp->status = NFS3ERR_NAMETOOLONG;
2051                 goto err1;
2052         }
2053 
2054         if (args->where.name == NULL || *(args->where.name) == '\0') {
2055                 resp->status = NFS3ERR_ACCES;
2056                 goto err1;
2057         }
2058 
2059         if (rdonly(exi, req)) {
2060                 resp->status = NFS3ERR_ROFS;
2061                 goto err1;
2062         }
2063 
2064         if (is_system_labeled()) {
2065                 bslabel_t *clabel = req->rq_label;
2066 
2067                 ASSERT(clabel != NULL);
2068                 DTRACE_PROBE2(tx__rfs3__log__info__opsymlink__clabel, char *,
2069                     "got client label from request(1)", struct svc_req *, req);
2070 
2071                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
2072                         if (!do_rfs_label_check(clabel, dvp, EQUALITY_CHECK,
2073                             exi)) {
2074                                 resp->status = NFS3ERR_ACCES;
2075                                 goto err1;
2076                         }
2077                 }
2078         }
2079 
2080         error = sattr3_to_vattr(&args->symlink.symlink_attributes, &va);
2081         if (error)
2082                 goto err;
2083 
2084         if (!(va.va_mask & AT_MODE)) {
2085                 resp->status = NFS3ERR_INVAL;
2086                 goto err1;
2087         }
2088 
2089         if (args->symlink.symlink_data == nfs3nametoolong) {
2090                 resp->status = NFS3ERR_NAMETOOLONG;
2091                 goto err1;
2092         }
2093 
2094         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2095         name = nfscmd_convname(ca, exi, args->where.name,
2096             NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
2097 
2098         if (name == NULL) {
2099                 /* This is really a Solaris EILSEQ */
2100                 resp->status = NFS3ERR_INVAL;
2101                 goto err1;
2102         }
2103 
2104         symdata = nfscmd_convname(ca, exi, args->symlink.symlink_data,
2105             NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
2106         if (symdata == NULL) {
2107                 /* This is really a Solaris EILSEQ */
2108                 resp->status = NFS3ERR_INVAL;
2109                 goto err1;
2110         }
2111 
2112 
2113         va.va_mask |= AT_TYPE;
2114         va.va_type = VLNK;
2115 
2116         error = VOP_SYMLINK(dvp, name, &va, symdata, cr, NULL, 0);
2117 
2118         dava.va_mask = AT_ALL;
2119         davap = VOP_GETATTR(dvp, &dava, 0, cr, NULL) ? NULL : &dava;
2120 
2121         if (error)
2122                 goto err;
2123 
2124         error = VOP_LOOKUP(dvp, name, &vp, NULL, 0, NULL, cr,
2125             NULL, NULL, NULL);
2126 
2127         /*
2128          * Force modified data and metadata out to stable storage.
2129          */
2130         (void) VOP_FSYNC(dvp, 0, cr, NULL);
2131 
2132 
2133         resp->status = NFS3_OK;
2134         if (error) {
2135                 resp->resok.obj.handle_follows = FALSE;
2136                 vattr_to_post_op_attr(NULL, &resp->resok.obj_attributes);
2137                 vattr_to_wcc_data(dbvap, davap, &resp->resok.dir_wcc);
2138                 goto out;
2139         }
2140 
2141         error = makefh3(&resp->resok.obj.handle, vp, exi);
2142         if (error)
2143                 resp->resok.obj.handle_follows = FALSE;
2144         else
2145                 resp->resok.obj.handle_follows = TRUE;
2146 
2147         va.va_mask = AT_ALL;
2148         vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
2149 
2150         /*
2151          * Force modified data and metadata out to stable storage.
2152          */
2153         (void) VOP_FSYNC(vp, 0, cr, NULL);
2154 
2155         VN_RELE(vp);
2156 
2157         vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
2158         vattr_to_wcc_data(dbvap, davap, &resp->resok.dir_wcc);
2159         goto out;
2160 
2161 err:
2162         if (curthread->t_flag & T_WOULDBLOCK) {
2163                 curthread->t_flag &= ~T_WOULDBLOCK;
2164                 resp->status = NFS3ERR_JUKEBOX;
2165         } else
2166                 resp->status = puterrno3(error);
2167 err1:
2168         vattr_to_wcc_data(dbvap, davap, &resp->resfail.dir_wcc);
2169 out:
2170         if (name != NULL && name != args->where.name)
2171                 kmem_free(name, MAXPATHLEN + 1);
2172         if (symdata != NULL && symdata != args->symlink.symlink_data)
2173                 kmem_free(symdata, MAXPATHLEN + 1);
2174 
2175         DTRACE_NFSV3_4(op__symlink__done, struct svc_req *, req,
2176             cred_t *, cr, vnode_t *, dvp, SYMLINK3res *, resp);
2177 
2178         if (dvp != NULL)
2179                 VN_RELE(dvp);
2180 }
2181 
2182 void *
2183 rfs3_symlink_getfh(SYMLINK3args *args)
2184 {
2185 
2186         return (&args->where.dir);
2187 }
2188 
2189 void
2190 rfs3_mknod(MKNOD3args *args, MKNOD3res *resp, struct exportinfo *exi,
2191         struct svc_req *req, cred_t *cr)
2192 {
2193         int error;
2194         vnode_t *vp;
2195         vnode_t *realvp;
2196         vnode_t *dvp;
2197         struct vattr *vap;
2198         struct vattr va;
2199         struct vattr *dbvap;
2200         struct vattr dbva;
2201         struct vattr *davap;
2202         struct vattr dava;
2203         int mode;
2204         enum vcexcl excl;
2205         struct sockaddr *ca;
2206         char *name = NULL;
2207 
2208         dbvap = NULL;
2209         davap = NULL;
2210 
2211         dvp = nfs3_fhtovp(&args->where.dir, exi);
2212 
2213         DTRACE_NFSV3_4(op__mknod__start, struct svc_req *, req,
2214             cred_t *, cr, vnode_t *, dvp, MKNOD3args *, args);
2215 
2216         if (dvp == NULL) {
2217                 error = ESTALE;
2218                 goto out;
2219         }
2220 
2221         dbva.va_mask = AT_ALL;
2222         dbvap = VOP_GETATTR(dvp, &dbva, 0, cr, NULL) ? NULL : &dbva;
2223         davap = dbvap;
2224 
2225         if (args->where.name == nfs3nametoolong) {
2226                 resp->status = NFS3ERR_NAMETOOLONG;
2227                 goto out1;
2228         }
2229 
2230         if (args->where.name == NULL || *(args->where.name) == '\0') {
2231                 resp->status = NFS3ERR_ACCES;
2232                 goto out1;
2233         }
2234 
2235         if (rdonly(exi, req)) {
2236                 resp->status = NFS3ERR_ROFS;
2237                 goto out1;
2238         }
2239 
2240         if (is_system_labeled()) {
2241                 bslabel_t *clabel = req->rq_label;
2242 
2243                 ASSERT(clabel != NULL);
2244                 DTRACE_PROBE2(tx__rfs3__log__info__opmknod__clabel, char *,
2245                     "got client label from request(1)", struct svc_req *, req);
2246 
2247                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
2248                         if (!do_rfs_label_check(clabel, dvp, EQUALITY_CHECK,
2249                             exi)) {
2250                                 resp->status = NFS3ERR_ACCES;
2251                                 goto out1;
2252                         }
2253                 }
2254         }
2255 
2256         switch (args->what.type) {
2257         case NF3CHR:
2258         case NF3BLK:
2259                 error = sattr3_to_vattr(
2260                     &args->what.mknoddata3_u.device.dev_attributes, &va);
2261                 if (error)
2262                         goto out;
2263                 if (secpolicy_sys_devices(cr) != 0) {
2264                         resp->status = NFS3ERR_PERM;
2265                         goto out1;
2266                 }
2267                 if (args->what.type == NF3CHR)
2268                         va.va_type = VCHR;
2269                 else
2270                         va.va_type = VBLK;
2271                 va.va_rdev = makedevice(
2272                     args->what.mknoddata3_u.device.spec.specdata1,
2273                     args->what.mknoddata3_u.device.spec.specdata2);
2274                 va.va_mask |= AT_TYPE | AT_RDEV;
2275                 break;
2276         case NF3SOCK:
2277                 error = sattr3_to_vattr(
2278                     &args->what.mknoddata3_u.pipe_attributes, &va);
2279                 if (error)
2280                         goto out;
2281                 va.va_type = VSOCK;
2282                 va.va_mask |= AT_TYPE;
2283                 break;
2284         case NF3FIFO:
2285                 error = sattr3_to_vattr(
2286                     &args->what.mknoddata3_u.pipe_attributes, &va);
2287                 if (error)
2288                         goto out;
2289                 va.va_type = VFIFO;
2290                 va.va_mask |= AT_TYPE;
2291                 break;
2292         default:
2293                 resp->status = NFS3ERR_BADTYPE;
2294                 goto out1;
2295         }
2296 
2297         /*
2298          * Must specify the mode.
2299          */
2300         if (!(va.va_mask & AT_MODE)) {
2301                 resp->status = NFS3ERR_INVAL;
2302                 goto out1;
2303         }
2304 
2305         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2306         name = nfscmd_convname(ca, exi, args->where.name,
2307             NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
2308 
2309         if (name == NULL) {
2310                 resp->status = NFS3ERR_INVAL;
2311                 goto out1;
2312         }
2313 
2314         excl = EXCL;
2315 
2316         mode = 0;
2317 
2318         error = VOP_CREATE(dvp, name, &va, excl, mode,
2319             &vp, cr, 0, NULL, NULL);
2320 
2321         if (name != args->where.name)
2322                 kmem_free(name, MAXPATHLEN + 1);
2323 
2324         dava.va_mask = AT_ALL;
2325         davap = VOP_GETATTR(dvp, &dava, 0, cr, NULL) ? NULL : &dava;
2326 
2327         /*
2328          * Force modified data and metadata out to stable storage.
2329          */
2330         (void) VOP_FSYNC(dvp, 0, cr, NULL);
2331 
2332         if (error)
2333                 goto out;
2334 
2335         resp->status = NFS3_OK;
2336 
2337         error = makefh3(&resp->resok.obj.handle, vp, exi);
2338         if (error)
2339                 resp->resok.obj.handle_follows = FALSE;
2340         else
2341                 resp->resok.obj.handle_follows = TRUE;
2342 
2343         va.va_mask = AT_ALL;
2344         vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
2345 
2346         /*
2347          * Force modified metadata out to stable storage.
2348          *
2349          * if a underlying vp exists, pass it to VOP_FSYNC
2350          */
2351         if (VOP_REALVP(vp, &realvp, NULL) == 0)
2352                 (void) VOP_FSYNC(realvp, FNODSYNC, cr, NULL);
2353         else
2354                 (void) VOP_FSYNC(vp, FNODSYNC, cr, NULL);
2355 
2356         VN_RELE(vp);
2357 
2358         vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
2359         vattr_to_wcc_data(dbvap, davap, &resp->resok.dir_wcc);
2360         DTRACE_NFSV3_4(op__mknod__done, struct svc_req *, req,
2361             cred_t *, cr, vnode_t *, dvp, MKNOD3res *, resp);
2362         VN_RELE(dvp);
2363         return;
2364 
2365 out:
2366         if (curthread->t_flag & T_WOULDBLOCK) {
2367                 curthread->t_flag &= ~T_WOULDBLOCK;
2368                 resp->status = NFS3ERR_JUKEBOX;
2369         } else
2370                 resp->status = puterrno3(error);
2371 out1:
2372         DTRACE_NFSV3_4(op__mknod__done, struct svc_req *, req,
2373             cred_t *, cr, vnode_t *, dvp, MKNOD3res *, resp);
2374         if (dvp != NULL)
2375                 VN_RELE(dvp);
2376         vattr_to_wcc_data(dbvap, davap, &resp->resfail.dir_wcc);
2377 }
2378 
2379 void *
2380 rfs3_mknod_getfh(MKNOD3args *args)
2381 {
2382 
2383         return (&args->where.dir);
2384 }
2385 
2386 void
2387 rfs3_remove(REMOVE3args *args, REMOVE3res *resp, struct exportinfo *exi,
2388         struct svc_req *req, cred_t *cr)
2389 {
2390         int error = 0;
2391         vnode_t *vp;
2392         struct vattr *bvap;
2393         struct vattr bva;
2394         struct vattr *avap;
2395         struct vattr ava;
2396         vnode_t *targvp = NULL;
2397         struct sockaddr *ca;
2398         char *name = NULL;
2399 
2400         bvap = NULL;
2401         avap = NULL;
2402 
2403         vp = nfs3_fhtovp(&args->object.dir, exi);
2404 
2405         DTRACE_NFSV3_4(op__remove__start, struct svc_req *, req,
2406             cred_t *, cr, vnode_t *, vp, REMOVE3args *, args);
2407 
2408         if (vp == NULL) {
2409                 error = ESTALE;
2410                 goto err;
2411         }
2412 
2413         bva.va_mask = AT_ALL;
2414         bvap = VOP_GETATTR(vp, &bva, 0, cr, NULL) ? NULL : &bva;
2415         avap = bvap;
2416 
2417         if (vp->v_type != VDIR) {
2418                 resp->status = NFS3ERR_NOTDIR;
2419                 goto err1;
2420         }
2421 
2422         if (args->object.name == nfs3nametoolong) {
2423                 resp->status = NFS3ERR_NAMETOOLONG;
2424                 goto err1;
2425         }
2426 
2427         if (args->object.name == NULL || *(args->object.name) == '\0') {
2428                 resp->status = NFS3ERR_ACCES;
2429                 goto err1;
2430         }
2431 
2432         if (rdonly(exi, req)) {
2433                 resp->status = NFS3ERR_ROFS;
2434                 goto err1;
2435         }
2436 
2437         if (is_system_labeled()) {
2438                 bslabel_t *clabel = req->rq_label;
2439 
2440                 ASSERT(clabel != NULL);
2441                 DTRACE_PROBE2(tx__rfs3__log__info__opremove__clabel, char *,
2442                     "got client label from request(1)", struct svc_req *, req);
2443 
2444                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
2445                         if (!do_rfs_label_check(clabel, vp, EQUALITY_CHECK,
2446                             exi)) {
2447                                 resp->status = NFS3ERR_ACCES;
2448                                 goto err1;
2449                         }
2450                 }
2451         }
2452 
2453         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2454         name = nfscmd_convname(ca, exi, args->object.name,
2455             NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
2456 
2457         if (name == NULL) {
2458                 resp->status = NFS3ERR_INVAL;
2459                 goto err1;
2460         }
2461 
2462         /*
2463          * Check for a conflict with a non-blocking mandatory share
2464          * reservation and V4 delegations
2465          */
2466         error = VOP_LOOKUP(vp, name, &targvp, NULL, 0,
2467             NULL, cr, NULL, NULL, NULL);
2468         if (error != 0)
2469                 goto err;
2470 
2471         if (rfs4_check_delegated(FWRITE, targvp, TRUE)) {
2472                 resp->status = NFS3ERR_JUKEBOX;
2473                 goto err1;
2474         }
2475 
2476         if (!nbl_need_check(targvp)) {
2477                 error = VOP_REMOVE(vp, name, cr, NULL, 0);
2478         } else {
2479                 nbl_start_crit(targvp, RW_READER);
2480                 if (nbl_conflict(targvp, NBL_REMOVE, 0, 0, 0, NULL)) {
2481                         error = EACCES;
2482                 } else {
2483                         error = VOP_REMOVE(vp, name, cr, NULL, 0);
2484                 }
2485                 nbl_end_crit(targvp);
2486         }
2487         VN_RELE(targvp);
2488         targvp = NULL;
2489 
2490         ava.va_mask = AT_ALL;
2491         avap = VOP_GETATTR(vp, &ava, 0, cr, NULL) ? NULL : &ava;
2492 
2493         /*
2494          * Force modified data and metadata out to stable storage.
2495          */
2496         (void) VOP_FSYNC(vp, 0, cr, NULL);
2497 
2498         if (error)
2499                 goto err;
2500 
2501         resp->status = NFS3_OK;
2502         vattr_to_wcc_data(bvap, avap, &resp->resok.dir_wcc);
2503         goto out;
2504 
2505 err:
2506         if (curthread->t_flag & T_WOULDBLOCK) {
2507                 curthread->t_flag &= ~T_WOULDBLOCK;
2508                 resp->status = NFS3ERR_JUKEBOX;
2509         } else
2510                 resp->status = puterrno3(error);
2511 err1:
2512         vattr_to_wcc_data(bvap, avap, &resp->resfail.dir_wcc);
2513 out:
2514         DTRACE_NFSV3_4(op__remove__done, struct svc_req *, req,
2515             cred_t *, cr, vnode_t *, vp, REMOVE3res *, resp);
2516 
2517         if (name != NULL && name != args->object.name)
2518                 kmem_free(name, MAXPATHLEN + 1);
2519 
2520         if (vp != NULL)
2521                 VN_RELE(vp);
2522 }
2523 
2524 void *
2525 rfs3_remove_getfh(REMOVE3args *args)
2526 {
2527 
2528         return (&args->object.dir);
2529 }
2530 
2531 void
2532 rfs3_rmdir(RMDIR3args *args, RMDIR3res *resp, struct exportinfo *exi,
2533         struct svc_req *req, cred_t *cr)
2534 {
2535         int error;
2536         vnode_t *vp;
2537         struct vattr *bvap;
2538         struct vattr bva;
2539         struct vattr *avap;
2540         struct vattr ava;
2541         struct sockaddr *ca;
2542         char *name = NULL;
2543 
2544         bvap = NULL;
2545         avap = NULL;
2546 
2547         vp = nfs3_fhtovp(&args->object.dir, exi);
2548 
2549         DTRACE_NFSV3_4(op__rmdir__start, struct svc_req *, req,
2550             cred_t *, cr, vnode_t *, vp, RMDIR3args *, args);
2551 
2552         if (vp == NULL) {
2553                 error = ESTALE;
2554                 goto err;
2555         }
2556 
2557         bva.va_mask = AT_ALL;
2558         bvap = VOP_GETATTR(vp, &bva, 0, cr, NULL) ? NULL : &bva;
2559         avap = bvap;
2560 
2561         if (vp->v_type != VDIR) {
2562                 resp->status = NFS3ERR_NOTDIR;
2563                 goto err1;
2564         }
2565 
2566         if (args->object.name == nfs3nametoolong) {
2567                 resp->status = NFS3ERR_NAMETOOLONG;
2568                 goto err1;
2569         }
2570 
2571         if (args->object.name == NULL || *(args->object.name) == '\0') {
2572                 resp->status = NFS3ERR_ACCES;
2573                 goto err1;
2574         }
2575 
2576         if (rdonly(exi, req)) {
2577                 resp->status = NFS3ERR_ROFS;
2578                 goto err1;
2579         }
2580 
2581         if (is_system_labeled()) {
2582                 bslabel_t *clabel = req->rq_label;
2583 
2584                 ASSERT(clabel != NULL);
2585                 DTRACE_PROBE2(tx__rfs3__log__info__opremovedir__clabel, char *,
2586                     "got client label from request(1)", struct svc_req *, req);
2587 
2588                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
2589                         if (!do_rfs_label_check(clabel, vp, EQUALITY_CHECK,
2590                             exi)) {
2591                                 resp->status = NFS3ERR_ACCES;
2592                                 goto err1;
2593                         }
2594                 }
2595         }
2596 
2597         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2598         name = nfscmd_convname(ca, exi, args->object.name,
2599             NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
2600 
2601         if (name == NULL) {
2602                 resp->status = NFS3ERR_INVAL;
2603                 goto err1;
2604         }
2605 
2606         error = VOP_RMDIR(vp, name, rootdir, cr, NULL, 0);
2607 
2608         if (name != args->object.name)
2609                 kmem_free(name, MAXPATHLEN + 1);
2610 
2611         ava.va_mask = AT_ALL;
2612         avap = VOP_GETATTR(vp, &ava, 0, cr, NULL) ? NULL : &ava;
2613 
2614         /*
2615          * Force modified data and metadata out to stable storage.
2616          */
2617         (void) VOP_FSYNC(vp, 0, cr, NULL);
2618 
2619         if (error) {
2620                 /*
2621                  * System V defines rmdir to return EEXIST, not ENOTEMPTY,
2622                  * if the directory is not empty.  A System V NFS server
2623                  * needs to map NFS3ERR_EXIST to NFS3ERR_NOTEMPTY to transmit
2624                  * over the wire.
2625                  */
2626                 if (error == EEXIST)
2627                         error = ENOTEMPTY;
2628                 goto err;
2629         }
2630 
2631         resp->status = NFS3_OK;
2632         vattr_to_wcc_data(bvap, avap, &resp->resok.dir_wcc);
2633         goto out;
2634 
2635 err:
2636         if (curthread->t_flag & T_WOULDBLOCK) {
2637                 curthread->t_flag &= ~T_WOULDBLOCK;
2638                 resp->status = NFS3ERR_JUKEBOX;
2639         } else
2640                 resp->status = puterrno3(error);
2641 err1:
2642         vattr_to_wcc_data(bvap, avap, &resp->resfail.dir_wcc);
2643 out:
2644         DTRACE_NFSV3_4(op__rmdir__done, struct svc_req *, req,
2645             cred_t *, cr, vnode_t *, vp, RMDIR3res *, resp);
2646         if (vp != NULL)
2647                 VN_RELE(vp);
2648 
2649 }
2650 
2651 void *
2652 rfs3_rmdir_getfh(RMDIR3args *args)
2653 {
2654 
2655         return (&args->object.dir);
2656 }
2657 
2658 void
2659 rfs3_rename(RENAME3args *args, RENAME3res *resp, struct exportinfo *exi,
2660         struct svc_req *req, cred_t *cr)
2661 {
2662         int error = 0;
2663         vnode_t *fvp;
2664         vnode_t *tvp;
2665         vnode_t *targvp;
2666         struct vattr *fbvap;
2667         struct vattr fbva;
2668         struct vattr *favap;
2669         struct vattr fava;
2670         struct vattr *tbvap;
2671         struct vattr tbva;
2672         struct vattr *tavap;
2673         struct vattr tava;
2674         nfs_fh3 *fh3;
2675         struct exportinfo *to_exi;
2676         vnode_t *srcvp = NULL;
2677         bslabel_t *clabel;
2678         struct sockaddr *ca;
2679         char *name = NULL;
2680         char *toname = NULL;
2681 
2682         fbvap = NULL;
2683         favap = NULL;
2684         tbvap = NULL;
2685         tavap = NULL;
2686         tvp = NULL;
2687 
2688         fvp = nfs3_fhtovp(&args->from.dir, exi);
2689 
2690         DTRACE_NFSV3_4(op__rename__start, struct svc_req *, req,
2691             cred_t *, cr, vnode_t *, fvp, RENAME3args *, args);
2692 
2693         if (fvp == NULL) {
2694                 error = ESTALE;
2695                 goto err;
2696         }
2697 
2698         if (is_system_labeled()) {
2699                 clabel = req->rq_label;
2700                 ASSERT(clabel != NULL);
2701                 DTRACE_PROBE2(tx__rfs3__log__info__oprename__clabel, char *,
2702                     "got client label from request(1)", struct svc_req *, req);
2703 
2704                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
2705                         if (!do_rfs_label_check(clabel, fvp, EQUALITY_CHECK,
2706                             exi)) {
2707                                 resp->status = NFS3ERR_ACCES;
2708                                 goto err1;
2709                         }
2710                 }
2711         }
2712 
2713         fbva.va_mask = AT_ALL;
2714         fbvap = VOP_GETATTR(fvp, &fbva, 0, cr, NULL) ? NULL : &fbva;
2715         favap = fbvap;
2716 
2717         fh3 = &args->to.dir;
2718         to_exi = checkexport(&fh3->fh3_fsid, FH3TOXFIDP(fh3));
2719         if (to_exi == NULL) {
2720                 resp->status = NFS3ERR_ACCES;
2721                 goto err1;
2722         }
2723         exi_rele(to_exi);
2724 
2725         if (to_exi != exi) {
2726                 resp->status = NFS3ERR_XDEV;
2727                 goto err1;
2728         }
2729 
2730         tvp = nfs3_fhtovp(&args->to.dir, exi);
2731         if (tvp == NULL) {
2732                 error = ESTALE;
2733                 goto err;
2734         }
2735 
2736         tbva.va_mask = AT_ALL;
2737         tbvap = VOP_GETATTR(tvp, &tbva, 0, cr, NULL) ? NULL : &tbva;
2738         tavap = tbvap;
2739 
2740         if (fvp->v_type != VDIR || tvp->v_type != VDIR) {
2741                 resp->status = NFS3ERR_NOTDIR;
2742                 goto err1;
2743         }
2744 
2745         if (args->from.name == nfs3nametoolong ||
2746             args->to.name == nfs3nametoolong) {
2747                 resp->status = NFS3ERR_NAMETOOLONG;
2748                 goto err1;
2749         }
2750         if (args->from.name == NULL || *(args->from.name) == '\0' ||
2751             args->to.name == NULL || *(args->to.name) == '\0') {
2752                 resp->status = NFS3ERR_ACCES;
2753                 goto err1;
2754         }
2755 
2756         if (rdonly(exi, req)) {
2757                 resp->status = NFS3ERR_ROFS;
2758                 goto err1;
2759         }
2760 
2761         if (is_system_labeled()) {
2762                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
2763                         if (!do_rfs_label_check(clabel, tvp, EQUALITY_CHECK,
2764                             exi)) {
2765                                 resp->status = NFS3ERR_ACCES;
2766                                 goto err1;
2767                         }
2768                 }
2769         }
2770 
2771         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2772         name = nfscmd_convname(ca, exi, args->from.name,
2773             NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
2774 
2775         if (name == NULL) {
2776                 resp->status = NFS3ERR_INVAL;
2777                 goto err1;
2778         }
2779 
2780         toname = nfscmd_convname(ca, exi, args->to.name,
2781             NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
2782 
2783         if (toname == NULL) {
2784                 resp->status = NFS3ERR_INVAL;
2785                 goto err1;
2786         }
2787 
2788         /*
2789          * Check for a conflict with a non-blocking mandatory share
2790          * reservation or V4 delegations.
2791          */
2792         error = VOP_LOOKUP(fvp, name, &srcvp, NULL, 0,
2793             NULL, cr, NULL, NULL, NULL);
2794         if (error != 0)
2795                 goto err;
2796 
2797         /*
2798          * If we rename a delegated file we should recall the
2799          * delegation, since future opens should fail or would
2800          * refer to a new file.
2801          */
2802         if (rfs4_check_delegated(FWRITE, srcvp, FALSE)) {
2803                 resp->status = NFS3ERR_JUKEBOX;
2804                 goto err1;
2805         }
2806 
2807         /*
2808          * Check for renaming over a delegated file.  Check rfs4_deleg_policy
2809          * first to avoid VOP_LOOKUP if possible.
2810          */
2811         if (rfs4_deleg_policy != SRV_NEVER_DELEGATE &&
2812             VOP_LOOKUP(tvp, toname, &targvp, NULL, 0, NULL, cr,
2813             NULL, NULL, NULL) == 0) {
2814 
2815                 if (rfs4_check_delegated(FWRITE, targvp, TRUE)) {
2816                         VN_RELE(targvp);
2817                         resp->status = NFS3ERR_JUKEBOX;
2818                         goto err1;
2819                 }
2820                 VN_RELE(targvp);
2821         }
2822 
2823         if (!nbl_need_check(srcvp)) {
2824                 error = VOP_RENAME(fvp, name, tvp, toname, cr, NULL, 0);
2825         } else {
2826                 nbl_start_crit(srcvp, RW_READER);
2827                 if (nbl_conflict(srcvp, NBL_RENAME, 0, 0, 0, NULL))
2828                         error = EACCES;
2829                 else
2830                         error = VOP_RENAME(fvp, name, tvp, toname, cr, NULL, 0);
2831                 nbl_end_crit(srcvp);
2832         }
2833         if (error == 0)
2834                 vn_renamepath(tvp, srcvp, args->to.name,
2835                     strlen(args->to.name));
2836         VN_RELE(srcvp);
2837         srcvp = NULL;
2838 
2839         fava.va_mask = AT_ALL;
2840         favap = VOP_GETATTR(fvp, &fava, 0, cr, NULL) ? NULL : &fava;
2841         tava.va_mask = AT_ALL;
2842         tavap = VOP_GETATTR(tvp, &tava, 0, cr, NULL) ? NULL : &tava;
2843 
2844         /*
2845          * Force modified data and metadata out to stable storage.
2846          */
2847         (void) VOP_FSYNC(fvp, 0, cr, NULL);
2848         (void) VOP_FSYNC(tvp, 0, cr, NULL);
2849 
2850         if (error)
2851                 goto err;
2852 
2853         resp->status = NFS3_OK;
2854         vattr_to_wcc_data(fbvap, favap, &resp->resok.fromdir_wcc);
2855         vattr_to_wcc_data(tbvap, tavap, &resp->resok.todir_wcc);
2856         goto out;
2857 
2858 err:
2859         if (curthread->t_flag & T_WOULDBLOCK) {
2860                 curthread->t_flag &= ~T_WOULDBLOCK;
2861                 resp->status = NFS3ERR_JUKEBOX;
2862         } else {
2863                 resp->status = puterrno3(error);
2864         }
2865 err1:
2866         vattr_to_wcc_data(fbvap, favap, &resp->resfail.fromdir_wcc);
2867         vattr_to_wcc_data(tbvap, tavap, &resp->resfail.todir_wcc);
2868 
2869 out:
2870         if (name != NULL && name != args->from.name)
2871                 kmem_free(name, MAXPATHLEN + 1);
2872         if (toname != NULL && toname != args->to.name)
2873                 kmem_free(toname, MAXPATHLEN + 1);
2874 
2875         DTRACE_NFSV3_4(op__rename__done, struct svc_req *, req,
2876             cred_t *, cr, vnode_t *, fvp, RENAME3res *, resp);
2877         if (fvp != NULL)
2878                 VN_RELE(fvp);
2879         if (tvp != NULL)
2880                 VN_RELE(tvp);
2881 }
2882 
2883 void *
2884 rfs3_rename_getfh(RENAME3args *args)
2885 {
2886 
2887         return (&args->from.dir);
2888 }
2889 
2890 void
2891 rfs3_link(LINK3args *args, LINK3res *resp, struct exportinfo *exi,
2892         struct svc_req *req, cred_t *cr)
2893 {
2894         int error;
2895         vnode_t *vp;
2896         vnode_t *dvp;
2897         struct vattr *vap;
2898         struct vattr va;
2899         struct vattr *bvap;
2900         struct vattr bva;
2901         struct vattr *avap;
2902         struct vattr ava;
2903         nfs_fh3 *fh3;
2904         struct exportinfo *to_exi;
2905         bslabel_t *clabel;
2906         struct sockaddr *ca;
2907         char *name = NULL;
2908 
2909         vap = NULL;
2910         bvap = NULL;
2911         avap = NULL;
2912         dvp = NULL;
2913 
2914         vp = nfs3_fhtovp(&args->file, exi);
2915 
2916         DTRACE_NFSV3_4(op__link__start, struct svc_req *, req,
2917             cred_t *, cr, vnode_t *, vp, LINK3args *, args);
2918 
2919         if (vp == NULL) {
2920                 error = ESTALE;
2921                 goto out;
2922         }
2923 
2924         va.va_mask = AT_ALL;
2925         vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
2926 
2927         fh3 = &args->link.dir;
2928         to_exi = checkexport(&fh3->fh3_fsid, FH3TOXFIDP(fh3));
2929         if (to_exi == NULL) {
2930                 resp->status = NFS3ERR_ACCES;
2931                 goto out1;
2932         }
2933         exi_rele(to_exi);
2934 
2935         if (to_exi != exi) {
2936                 resp->status = NFS3ERR_XDEV;
2937                 goto out1;
2938         }
2939 
2940         if (is_system_labeled()) {
2941                 clabel = req->rq_label;
2942 
2943                 ASSERT(clabel != NULL);
2944                 DTRACE_PROBE2(tx__rfs3__log__info__oplink__clabel, char *,
2945                     "got client label from request(1)", struct svc_req *, req);
2946 
2947                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
2948                         if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
2949                             exi)) {
2950                                 resp->status = NFS3ERR_ACCES;
2951                                 goto out1;
2952                         }
2953                 }
2954         }
2955 
2956         dvp = nfs3_fhtovp(&args->link.dir, exi);
2957         if (dvp == NULL) {
2958                 error = ESTALE;
2959                 goto out;
2960         }
2961 
2962         bva.va_mask = AT_ALL;
2963         bvap = VOP_GETATTR(dvp, &bva, 0, cr, NULL) ? NULL : &bva;
2964 
2965         if (dvp->v_type != VDIR) {
2966                 resp->status = NFS3ERR_NOTDIR;
2967                 goto out1;
2968         }
2969 
2970         if (args->link.name == nfs3nametoolong) {
2971                 resp->status = NFS3ERR_NAMETOOLONG;
2972                 goto out1;
2973         }
2974 
2975         if (args->link.name == NULL || *(args->link.name) == '\0') {
2976                 resp->status = NFS3ERR_ACCES;
2977                 goto out1;
2978         }
2979 
2980         if (rdonly(exi, req)) {
2981                 resp->status = NFS3ERR_ROFS;
2982                 goto out1;
2983         }
2984 
2985         if (is_system_labeled()) {
2986                 DTRACE_PROBE2(tx__rfs3__log__info__oplinkdir__clabel, char *,
2987                     "got client label from request(1)", struct svc_req *, req);
2988 
2989                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
2990                         if (!do_rfs_label_check(clabel, dvp, EQUALITY_CHECK,
2991                             exi)) {
2992                                 resp->status = NFS3ERR_ACCES;
2993                                 goto out1;
2994                         }
2995                 }
2996         }
2997 
2998         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2999         name = nfscmd_convname(ca, exi, args->link.name,
3000             NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
3001 
3002         if (name == NULL) {
3003                 resp->status = NFS3ERR_SERVERFAULT;
3004                 goto out1;
3005         }
3006 
3007         error = VOP_LINK(dvp, vp, name, cr, NULL, 0);
3008 
3009         va.va_mask = AT_ALL;
3010         vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3011         ava.va_mask = AT_ALL;
3012         avap = VOP_GETATTR(dvp, &ava, 0, cr, NULL) ? NULL : &ava;
3013 
3014         /*
3015          * Force modified data and metadata out to stable storage.
3016          */
3017         (void) VOP_FSYNC(vp, FNODSYNC, cr, NULL);
3018         (void) VOP_FSYNC(dvp, 0, cr, NULL);
3019 
3020         if (error)
3021                 goto out;
3022 
3023         VN_RELE(dvp);
3024 
3025         resp->status = NFS3_OK;
3026         vattr_to_post_op_attr(vap, &resp->resok.file_attributes);
3027         vattr_to_wcc_data(bvap, avap, &resp->resok.linkdir_wcc);
3028 
3029         DTRACE_NFSV3_4(op__link__done, struct svc_req *, req,
3030             cred_t *, cr, vnode_t *, vp, LINK3res *, resp);
3031 
3032         VN_RELE(vp);
3033 
3034         return;
3035 
3036 out:
3037         if (curthread->t_flag & T_WOULDBLOCK) {
3038                 curthread->t_flag &= ~T_WOULDBLOCK;
3039                 resp->status = NFS3ERR_JUKEBOX;
3040         } else
3041                 resp->status = puterrno3(error);
3042 out1:
3043         if (name != NULL && name != args->link.name)
3044                 kmem_free(name, MAXPATHLEN + 1);
3045 
3046         DTRACE_NFSV3_4(op__link__done, struct svc_req *, req,
3047             cred_t *, cr, vnode_t *, vp, LINK3res *, resp);
3048 
3049         if (vp != NULL)
3050                 VN_RELE(vp);
3051         if (dvp != NULL)
3052                 VN_RELE(dvp);
3053         vattr_to_post_op_attr(vap, &resp->resfail.file_attributes);
3054         vattr_to_wcc_data(bvap, avap, &resp->resfail.linkdir_wcc);
3055 }
3056 
3057 void *
3058 rfs3_link_getfh(LINK3args *args)
3059 {
3060 
3061         return (&args->file);
3062 }
3063 
3064 /*
3065  * This macro defines the size of a response which contains attribute
3066  * information and one directory entry (whose length is specified by
3067  * the macro parameter).  If the incoming request is larger than this,
3068  * then we are guaranteed to be able to return at one directory entry
3069  * if one exists.  Therefore, we do not need to check for
3070  * NFS3ERR_TOOSMALL if the requested size is larger then this.  If it
3071  * is not, then we need to check to make sure that this error does not
3072  * need to be returned.
3073  *
3074  * NFS3_READDIR_MIN_COUNT is comprised of following :
3075  *
3076  * status - 1 * BYTES_PER_XDR_UNIT
3077  * attr. flag - 1 * BYTES_PER_XDR_UNIT
3078  * cookie verifier - 2 * BYTES_PER_XDR_UNIT
3079  * attributes  - NFS3_SIZEOF_FATTR3 * BYTES_PER_XDR_UNIT
3080  * boolean - 1 * BYTES_PER_XDR_UNIT
3081  * file id - 2 * BYTES_PER_XDR_UNIT
3082  * directory name length - 1 * BYTES_PER_XDR_UNIT
3083  * cookie - 2 * BYTES_PER_XDR_UNIT
3084  * end of list - 1 * BYTES_PER_XDR_UNIT
3085  * end of file - 1 * BYTES_PER_XDR_UNIT
3086  * Name length of directory to the nearest byte
3087  */
3088 
3089 #define NFS3_READDIR_MIN_COUNT(length)  \
3090         ((1 + 1 + 2 + NFS3_SIZEOF_FATTR3 + 1 + 2 + 1 + 2 + 1 + 1) * \
3091                 BYTES_PER_XDR_UNIT + roundup((length), BYTES_PER_XDR_UNIT))
3092 
3093 /* ARGSUSED */
3094 void
3095 rfs3_readdir(READDIR3args *args, READDIR3res *resp, struct exportinfo *exi,
3096         struct svc_req *req, cred_t *cr)
3097 {
3098         int error;
3099         vnode_t *vp;
3100         struct vattr *vap;
3101         struct vattr va;
3102         struct iovec iov;
3103         struct uio uio;
3104         char *data;
3105         int iseof;
3106         int bufsize;
3107         int namlen;
3108         uint_t count;
3109         struct sockaddr *ca;
3110 
3111         vap = NULL;
3112 
3113         vp = nfs3_fhtovp(&args->dir, exi);
3114 
3115         DTRACE_NFSV3_4(op__readdir__start, struct svc_req *, req,
3116             cred_t *, cr, vnode_t *, vp, READDIR3args *, args);
3117 
3118         if (vp == NULL) {
3119                 error = ESTALE;
3120                 goto out;
3121         }
3122 
3123         if (is_system_labeled()) {
3124                 bslabel_t *clabel = req->rq_label;
3125 
3126                 ASSERT(clabel != NULL);
3127                 DTRACE_PROBE2(tx__rfs3__log__info__opreaddir__clabel, char *,
3128                     "got client label from request(1)", struct svc_req *, req);
3129 
3130                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
3131                         if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
3132                             exi)) {
3133                                 resp->status = NFS3ERR_ACCES;
3134                                 goto out1;
3135                         }
3136                 }
3137         }
3138 
3139         (void) VOP_RWLOCK(vp, V_WRITELOCK_FALSE, NULL);
3140 
3141         va.va_mask = AT_ALL;
3142         vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3143 
3144         if (vp->v_type != VDIR) {
3145                 resp->status = NFS3ERR_NOTDIR;
3146                 goto out1;
3147         }
3148 
3149         error = VOP_ACCESS(vp, VREAD, 0, cr, NULL);
3150         if (error)
3151                 goto out;
3152 
3153         /*
3154          * Now don't allow arbitrary count to alloc;
3155          * allow the maximum not to exceed rfs3_tsize()
3156          */
3157         if (args->count > rfs3_tsize(req))
3158                 args->count = rfs3_tsize(req);
3159 
3160         /*
3161          * Make sure that there is room to read at least one entry
3162          * if any are available.
3163          */
3164         if (args->count < DIRENT64_RECLEN(MAXNAMELEN))
3165                 count = DIRENT64_RECLEN(MAXNAMELEN);
3166         else
3167                 count = args->count;
3168 
3169         data = kmem_alloc(count, KM_SLEEP);
3170 
3171         iov.iov_base = data;
3172         iov.iov_len = count;
3173         uio.uio_iov = &iov;
3174         uio.uio_iovcnt = 1;
3175         uio.uio_segflg = UIO_SYSSPACE;
3176         uio.uio_extflg = UIO_COPY_CACHED;
3177         uio.uio_loffset = (offset_t)args->cookie;
3178         uio.uio_resid = count;
3179 
3180         error = VOP_READDIR(vp, &uio, cr, &iseof, NULL, 0);
3181 
3182         va.va_mask = AT_ALL;
3183         vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3184 
3185         if (error) {
3186                 kmem_free(data, count);
3187                 goto out;
3188         }
3189 
3190         /*
3191          * If the count was not large enough to be able to guarantee
3192          * to be able to return at least one entry, then need to
3193          * check to see if NFS3ERR_TOOSMALL should be returned.
3194          */
3195         if (args->count < NFS3_READDIR_MIN_COUNT(MAXNAMELEN)) {
3196                 /*
3197                  * bufsize is used to keep track of the size of the response.
3198                  * It is primed with:
3199                  *      1 for the status +
3200                  *      1 for the dir_attributes.attributes boolean +
3201                  *      2 for the cookie verifier
3202                  * all times BYTES_PER_XDR_UNIT to convert from XDR units
3203                  * to bytes.  If there are directory attributes to be
3204                  * returned, then:
3205                  *      NFS3_SIZEOF_FATTR3 for the dir_attributes.attr fattr3
3206                  * time BYTES_PER_XDR_UNIT is added to account for them.
3207                  */
3208                 bufsize = (1 + 1 + 2) * BYTES_PER_XDR_UNIT;
3209                 if (vap != NULL)
3210                         bufsize += NFS3_SIZEOF_FATTR3 * BYTES_PER_XDR_UNIT;
3211                 /*
3212                  * An entry is composed of:
3213                  *      1 for the true/false list indicator +
3214                  *      2 for the fileid +
3215                  *      1 for the length of the name +
3216                  *      2 for the cookie +
3217                  * all times BYTES_PER_XDR_UNIT to convert from
3218                  * XDR units to bytes, plus the length of the name
3219                  * rounded up to the nearest BYTES_PER_XDR_UNIT.
3220                  */
3221                 if (count != uio.uio_resid) {
3222                         namlen = strlen(((struct dirent64 *)data)->d_name);
3223                         bufsize += (1 + 2 + 1 + 2) * BYTES_PER_XDR_UNIT +
3224                             roundup(namlen, BYTES_PER_XDR_UNIT);
3225                 }
3226                 /*
3227                  * We need to check to see if the number of bytes left
3228                  * to go into the buffer will actually fit into the
3229                  * buffer.  This is calculated as the size of this
3230                  * entry plus:
3231                  *      1 for the true/false list indicator +
3232                  *      1 for the eof indicator
3233                  * times BYTES_PER_XDR_UNIT to convert from from
3234                  * XDR units to bytes.
3235                  */
3236                 bufsize += (1 + 1) * BYTES_PER_XDR_UNIT;
3237                 if (bufsize > args->count) {
3238                         kmem_free(data, count);
3239                         resp->status = NFS3ERR_TOOSMALL;
3240                         goto out1;
3241                 }
3242         }
3243 
3244         /*
3245          * Have a valid readir buffer for the native character
3246          * set. Need to check if a conversion is necessary and
3247          * potentially rewrite the whole buffer. Note that if the
3248          * conversion expands names enough, the structure may not
3249          * fit. In this case, we need to drop entries until if fits
3250          * and patch the counts in order that the next readdir will
3251          * get the correct entries.
3252          */
3253         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
3254         data = nfscmd_convdirent(ca, exi, data, count, &resp->status);
3255 
3256 
3257         VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
3258 
3259 #if 0 /* notyet */
3260         /*
3261          * Don't do this.  It causes local disk writes when just
3262          * reading the file and the overhead is deemed larger
3263          * than the benefit.
3264          */
3265         /*
3266          * Force modified metadata out to stable storage.
3267          */
3268         (void) VOP_FSYNC(vp, FNODSYNC, cr, NULL);
3269 #endif
3270 
3271         resp->status = NFS3_OK;
3272         vattr_to_post_op_attr(vap, &resp->resok.dir_attributes);
3273         resp->resok.cookieverf = 0;
3274         resp->resok.reply.entries = (entry3 *)data;
3275         resp->resok.reply.eof = iseof;
3276         resp->resok.size = count - uio.uio_resid;
3277         resp->resok.count = args->count;
3278         resp->resok.freecount = count;
3279 
3280         DTRACE_NFSV3_4(op__readdir__done, struct svc_req *, req,
3281             cred_t *, cr, vnode_t *, vp, READDIR3res *, resp);
3282 
3283         VN_RELE(vp);
3284 
3285         return;
3286 
3287 out:
3288         if (curthread->t_flag & T_WOULDBLOCK) {
3289                 curthread->t_flag &= ~T_WOULDBLOCK;
3290                 resp->status = NFS3ERR_JUKEBOX;
3291         } else
3292                 resp->status = puterrno3(error);
3293 out1:
3294         DTRACE_NFSV3_4(op__readdir__done, struct svc_req *, req,
3295             cred_t *, cr, vnode_t *, vp, READDIR3res *, resp);
3296 
3297         if (vp != NULL) {
3298                 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
3299                 VN_RELE(vp);
3300         }
3301         vattr_to_post_op_attr(vap, &resp->resfail.dir_attributes);
3302 }
3303 
3304 void *
3305 rfs3_readdir_getfh(READDIR3args *args)
3306 {
3307 
3308         return (&args->dir);
3309 }
3310 
3311 void
3312 rfs3_readdir_free(READDIR3res *resp)
3313 {
3314 
3315         if (resp->status == NFS3_OK)
3316                 kmem_free(resp->resok.reply.entries, resp->resok.freecount);
3317 }
3318 
3319 #ifdef nextdp
3320 #undef nextdp
3321 #endif
3322 #define nextdp(dp)      ((struct dirent64 *)((char *)(dp) + (dp)->d_reclen))
3323 
3324 /*
3325  * This macro computes the size of a response which contains
3326  * one directory entry including the attributes as well as file handle.
3327  * If the incoming request is larger than this, then we are guaranteed to be
3328  * able to return at least one more directory entry if one exists.
3329  *
3330  * NFS3_READDIRPLUS_ENTRY is made up of the following:
3331  *
3332  * boolean - 1 * BYTES_PER_XDR_UNIT
3333  * file id - 2 * BYTES_PER_XDR_UNIT
3334  * directory name length - 1 * BYTES_PER_XDR_UNIT
3335  * cookie - 2 * BYTES_PER_XDR_UNIT
3336  * attribute flag - 1 * BYTES_PER_XDR_UNIT
3337  * attributes - NFS3_SIZEOF_FATTR3 * BYTES_PER_XDR_UNIT
3338  * status byte for file handle - 1 *  BYTES_PER_XDR_UNIT
3339  * length of a file handle - 1 * BYTES_PER_XDR_UNIT
3340  * Maximum length of a file handle (NFS3_MAXFHSIZE)
3341  * name length of the entry to the nearest bytes
3342  */
3343 #define NFS3_READDIRPLUS_ENTRY(namelen) \
3344         ((1 + 2 + 1 + 2 + 1 + NFS3_SIZEOF_FATTR3 + 1 + 1) * \
3345                 BYTES_PER_XDR_UNIT + \
3346         NFS3_MAXFHSIZE + roundup(namelen, BYTES_PER_XDR_UNIT))
3347 
3348 static int rfs3_readdir_unit = MAXBSIZE;
3349 
3350 /* ARGSUSED */
3351 void
3352 rfs3_readdirplus(READDIRPLUS3args *args, READDIRPLUS3res *resp,
3353         struct exportinfo *exi, struct svc_req *req, cred_t *cr)
3354 {
3355         int error;
3356         vnode_t *vp;
3357         struct vattr *vap;
3358         struct vattr va;
3359         struct iovec iov;
3360         struct uio uio;
3361         char *data;
3362         int iseof;
3363         struct dirent64 *dp;
3364         vnode_t *nvp;
3365         struct vattr *nvap;
3366         struct vattr nva;
3367         entryplus3_info *infop = NULL;
3368         int size = 0;
3369         int nents = 0;
3370         int bufsize = 0;
3371         int entrysize = 0;
3372         int tofit = 0;
3373         int rd_unit = rfs3_readdir_unit;
3374         int prev_len;
3375         int space_left;
3376         int i;
3377         uint_t *namlen = NULL;
3378         char *ndata = NULL;
3379         struct sockaddr *ca;
3380         size_t ret;
3381 
3382         vap = NULL;
3383 
3384         vp = nfs3_fhtovp(&args->dir, exi);
3385 
3386         DTRACE_NFSV3_4(op__readdirplus__start, struct svc_req *, req,
3387             cred_t *, cr, vnode_t *, vp, READDIRPLUS3args *, args);
3388 
3389         if (vp == NULL) {
3390                 error = ESTALE;
3391                 goto out;
3392         }
3393 
3394         if (is_system_labeled()) {
3395                 bslabel_t *clabel = req->rq_label;
3396 
3397                 ASSERT(clabel != NULL);
3398                 DTRACE_PROBE2(tx__rfs3__log__info__opreaddirplus__clabel,
3399                     char *, "got client label from request(1)",
3400                     struct svc_req *, req);
3401 
3402                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
3403                         if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
3404                             exi)) {
3405                                 resp->status = NFS3ERR_ACCES;
3406                                 goto out1;
3407                         }
3408                 }
3409         }
3410 
3411         (void) VOP_RWLOCK(vp, V_WRITELOCK_FALSE, NULL);
3412 
3413         va.va_mask = AT_ALL;
3414         vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3415 
3416         if (vp->v_type != VDIR) {
3417                 error = ENOTDIR;
3418                 goto out;
3419         }
3420 
3421         error = VOP_ACCESS(vp, VREAD, 0, cr, NULL);
3422         if (error)
3423                 goto out;
3424 
3425         /*
3426          * Don't allow arbitrary counts for allocation
3427          */
3428         if (args->maxcount > rfs3_tsize(req))
3429                 args->maxcount = rfs3_tsize(req);
3430 
3431         /*
3432          * Make sure that there is room to read at least one entry
3433          * if any are available
3434          */
3435         args->dircount = MIN(args->dircount, args->maxcount);
3436 
3437         if (args->dircount < DIRENT64_RECLEN(MAXNAMELEN))
3438                 args->dircount = DIRENT64_RECLEN(MAXNAMELEN);
3439 
3440         /*
3441          * This allocation relies on a minimum directory entry
3442          * being roughly 24 bytes.  Therefore, the namlen array
3443          * will have enough space based on the maximum number of
3444          * entries to read.
3445          */
3446         namlen = kmem_alloc(args->dircount, KM_SLEEP);
3447 
3448         space_left = args->dircount;
3449         data = kmem_alloc(args->dircount, KM_SLEEP);
3450         dp = (struct dirent64 *)data;
3451         uio.uio_iov = &iov;
3452         uio.uio_iovcnt = 1;
3453         uio.uio_segflg = UIO_SYSSPACE;
3454         uio.uio_extflg = UIO_COPY_CACHED;
3455         uio.uio_loffset = (offset_t)args->cookie;
3456 
3457         /*
3458          * bufsize is used to keep track of the size of the response as we
3459          * get post op attributes and filehandles for each entry.  This is
3460          * an optimization as the server may have read more entries than will
3461          * fit in the buffer specified by maxcount.  We stop calculating
3462          * post op attributes and filehandles once we have exceeded maxcount.
3463          * This will minimize the effect of truncation.
3464          *
3465          * It is primed with:
3466          *      1 for the status +
3467          *      1 for the dir_attributes.attributes boolean +
3468          *      2 for the cookie verifier
3469          * all times BYTES_PER_XDR_UNIT to convert from XDR units
3470          * to bytes.  If there are directory attributes to be
3471          * returned, then:
3472          *      NFS3_SIZEOF_FATTR3 for the dir_attributes.attr fattr3
3473          * time BYTES_PER_XDR_UNIT is added to account for them.
3474          */
3475         bufsize = (1 + 1 + 2) * BYTES_PER_XDR_UNIT;
3476         if (vap != NULL)
3477                 bufsize += NFS3_SIZEOF_FATTR3 * BYTES_PER_XDR_UNIT;
3478 
3479 getmoredents:
3480         /*
3481          * Here we make a check so that our read unit is not larger than
3482          * the space left in the buffer.
3483          */
3484         rd_unit = MIN(rd_unit, space_left);
3485         iov.iov_base = (char *)dp;
3486         iov.iov_len = rd_unit;
3487         uio.uio_resid = rd_unit;
3488         prev_len = rd_unit;
3489 
3490         error = VOP_READDIR(vp, &uio, cr, &iseof, NULL, 0);
3491 
3492         if (error) {
3493                 kmem_free(data, args->dircount);
3494                 goto out;
3495         }
3496 
3497         if (uio.uio_resid == prev_len && !iseof) {
3498                 if (nents == 0) {
3499                         kmem_free(data, args->dircount);
3500                         resp->status = NFS3ERR_TOOSMALL;
3501                         goto out1;
3502                 }
3503 
3504                 /*
3505                  * We could not get any more entries, so get the attributes
3506                  * and filehandle for the entries already obtained.
3507                  */
3508                 goto good;
3509         }
3510 
3511         /*
3512          * We estimate the size of the response by assuming the
3513          * entry exists and attributes and filehandle are also valid
3514          */
3515         for (size = prev_len - uio.uio_resid;
3516             size > 0;
3517             size -= dp->d_reclen, dp = nextdp(dp)) {
3518 
3519                 if (dp->d_ino == 0) {
3520                         nents++;
3521                         continue;
3522                 }
3523 
3524                 namlen[nents] = strlen(dp->d_name);
3525                 entrysize = NFS3_READDIRPLUS_ENTRY(namlen[nents]);
3526 
3527                 /*
3528                  * We need to check to see if the number of bytes left
3529                  * to go into the buffer will actually fit into the
3530                  * buffer.  This is calculated as the size of this
3531                  * entry plus:
3532                  *      1 for the true/false list indicator +
3533                  *      1 for the eof indicator
3534                  * times BYTES_PER_XDR_UNIT to convert from XDR units
3535                  * to bytes.
3536                  *
3537                  * Also check the dircount limit against the first entry read
3538                  *
3539                  */
3540                 tofit = entrysize + (1 + 1) * BYTES_PER_XDR_UNIT;
3541                 if (bufsize + tofit > args->maxcount) {
3542                         /*
3543                          * We make a check here to see if this was the
3544                          * first entry being measured.  If so, then maxcount
3545                          * was too small to begin with and so we need to
3546                          * return with NFS3ERR_TOOSMALL.
3547                          */
3548                         if (nents == 0) {
3549                                 kmem_free(data, args->dircount);
3550                                 resp->status = NFS3ERR_TOOSMALL;
3551                                 goto out1;
3552                         }
3553                         iseof = FALSE;
3554                         goto good;
3555                 }
3556                 bufsize += entrysize;
3557                 nents++;
3558         }
3559 
3560         /*
3561          * If there is enough room to fit at least 1 more entry including
3562          * post op attributes and filehandle in the buffer AND that we haven't
3563          * exceeded dircount then go back and get some more.
3564          */
3565         if (!iseof &&
3566             (args->maxcount - bufsize) >= NFS3_READDIRPLUS_ENTRY(MAXNAMELEN)) {
3567                 space_left -= (prev_len - uio.uio_resid);
3568                 if (space_left >= DIRENT64_RECLEN(MAXNAMELEN))
3569                         goto getmoredents;
3570 
3571                 /* else, fall through */
3572         }
3573 good:
3574         va.va_mask = AT_ALL;
3575         vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3576 
3577         VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
3578 
3579         infop = kmem_alloc(nents * sizeof (struct entryplus3_info), KM_SLEEP);
3580         resp->resok.infop = infop;
3581 
3582         dp = (struct dirent64 *)data;
3583         for (i = 0; i < nents; i++) {
3584 
3585                 if (dp->d_ino == 0) {
3586                         infop[i].attr.attributes = FALSE;
3587                         infop[i].fh.handle_follows = FALSE;
3588                         dp = nextdp(dp);
3589                         continue;
3590                 }
3591 
3592                 infop[i].namelen = namlen[i];
3593 
3594                 error = VOP_LOOKUP(vp, dp->d_name, &nvp, NULL, 0, NULL, cr,
3595                     NULL, NULL, NULL);
3596                 if (error) {
3597                         infop[i].attr.attributes = FALSE;
3598                         infop[i].fh.handle_follows = FALSE;
3599                         dp = nextdp(dp);
3600                         continue;
3601                 }
3602 
3603                 nva.va_mask = AT_ALL;
3604                 nvap = rfs4_delegated_getattr(nvp, &nva, 0, cr) ? NULL : &nva;
3605 
3606                 /* Lie about the object type for a referral */
3607                 if (vn_is_nfs_reparse(nvp, cr))
3608                         nvap->va_type = VLNK;
3609 
3610                 vattr_to_post_op_attr(nvap, &infop[i].attr);
3611 
3612                 error = makefh3(&infop[i].fh.handle, nvp, exi);
3613                 if (!error)
3614                         infop[i].fh.handle_follows = TRUE;
3615                 else
3616                         infop[i].fh.handle_follows = FALSE;
3617 
3618                 VN_RELE(nvp);
3619                 dp = nextdp(dp);
3620         }
3621 
3622         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
3623         ret = nfscmd_convdirplus(ca, exi, data, nents, args->dircount, &ndata);
3624         if (ndata == NULL)
3625                 ndata = data;
3626 
3627         if (ret > 0) {
3628                 /*
3629                  * We had to drop one or more entries in order to fit
3630                  * during the character conversion.  We need to patch
3631                  * up the size and eof info.
3632                  */
3633                 if (iseof)
3634                         iseof = FALSE;
3635 
3636                 ret = nfscmd_dropped_entrysize((struct dirent64 *)data,
3637                     nents, ret);
3638         }
3639 
3640 
3641 #if 0 /* notyet */
3642         /*
3643          * Don't do this.  It causes local disk writes when just
3644          * reading the file and the overhead is deemed larger
3645          * than the benefit.
3646          */
3647         /*
3648          * Force modified metadata out to stable storage.
3649          */
3650         (void) VOP_FSYNC(vp, FNODSYNC, cr, NULL);
3651 #endif
3652 
3653         kmem_free(namlen, args->dircount);
3654 
3655         resp->status = NFS3_OK;
3656         vattr_to_post_op_attr(vap, &resp->resok.dir_attributes);
3657         resp->resok.cookieverf = 0;
3658         resp->resok.reply.entries = (entryplus3 *)ndata;
3659         resp->resok.reply.eof = iseof;
3660         resp->resok.size = nents;
3661         resp->resok.count = args->dircount - ret;
3662         resp->resok.maxcount = args->maxcount;
3663 
3664         DTRACE_NFSV3_4(op__readdirplus__done, struct svc_req *, req,
3665             cred_t *, cr, vnode_t *, vp, READDIRPLUS3res *, resp);
3666         if (ndata != data)
3667                 kmem_free(data, args->dircount);
3668 
3669 
3670         VN_RELE(vp);
3671 
3672         return;
3673 
3674 out:
3675         if (curthread->t_flag & T_WOULDBLOCK) {
3676                 curthread->t_flag &= ~T_WOULDBLOCK;
3677                 resp->status = NFS3ERR_JUKEBOX;
3678         } else {
3679                 resp->status = puterrno3(error);
3680         }
3681 out1:
3682         DTRACE_NFSV3_4(op__readdirplus__done, struct svc_req *, req,
3683             cred_t *, cr, vnode_t *, vp, READDIRPLUS3res *, resp);
3684 
3685         if (vp != NULL) {
3686                 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
3687                 VN_RELE(vp);
3688         }
3689 
3690         if (namlen != NULL)
3691                 kmem_free(namlen, args->dircount);
3692 
3693         vattr_to_post_op_attr(vap, &resp->resfail.dir_attributes);
3694 }
3695 
3696 void *
3697 rfs3_readdirplus_getfh(READDIRPLUS3args *args)
3698 {
3699 
3700         return (&args->dir);
3701 }
3702 
3703 void
3704 rfs3_readdirplus_free(READDIRPLUS3res *resp)
3705 {
3706 
3707         if (resp->status == NFS3_OK) {
3708                 kmem_free(resp->resok.reply.entries, resp->resok.count);
3709                 kmem_free(resp->resok.infop,
3710                     resp->resok.size * sizeof (struct entryplus3_info));
3711         }
3712 }
3713 
3714 /* ARGSUSED */
3715 void
3716 rfs3_fsstat(FSSTAT3args *args, FSSTAT3res *resp, struct exportinfo *exi,
3717         struct svc_req *req, cred_t *cr)
3718 {
3719         int error;
3720         vnode_t *vp;
3721         struct vattr *vap;
3722         struct vattr va;
3723         struct statvfs64 sb;
3724 
3725         vap = NULL;
3726 
3727         vp = nfs3_fhtovp(&args->fsroot, exi);
3728 
3729         DTRACE_NFSV3_4(op__fsstat__start, struct svc_req *, req,
3730             cred_t *, cr, vnode_t *, vp, FSSTAT3args *, args);
3731 
3732         if (vp == NULL) {
3733                 error = ESTALE;
3734                 goto out;
3735         }
3736 
3737         if (is_system_labeled()) {
3738                 bslabel_t *clabel = req->rq_label;
3739 
3740                 ASSERT(clabel != NULL);
3741                 DTRACE_PROBE2(tx__rfs3__log__info__opfsstat__clabel, char *,
3742                     "got client label from request(1)", struct svc_req *, req);
3743 
3744                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
3745                         if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
3746                             exi)) {
3747                                 resp->status = NFS3ERR_ACCES;
3748                                 goto out1;
3749                         }
3750                 }
3751         }
3752 
3753         error = VFS_STATVFS(vp->v_vfsp, &sb);
3754 
3755         va.va_mask = AT_ALL;
3756         vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3757 
3758         if (error)
3759                 goto out;
3760 
3761         resp->status = NFS3_OK;
3762         vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
3763         if (sb.f_blocks != (fsblkcnt64_t)-1)
3764                 resp->resok.tbytes = (size3)sb.f_frsize * (size3)sb.f_blocks;
3765         else
3766                 resp->resok.tbytes = (size3)sb.f_blocks;
3767         if (sb.f_bfree != (fsblkcnt64_t)-1)
3768                 resp->resok.fbytes = (size3)sb.f_frsize * (size3)sb.f_bfree;
3769         else
3770                 resp->resok.fbytes = (size3)sb.f_bfree;
3771         if (sb.f_bavail != (fsblkcnt64_t)-1)
3772                 resp->resok.abytes = (size3)sb.f_frsize * (size3)sb.f_bavail;
3773         else
3774                 resp->resok.abytes = (size3)sb.f_bavail;
3775         resp->resok.tfiles = (size3)sb.f_files;
3776         resp->resok.ffiles = (size3)sb.f_ffree;
3777         resp->resok.afiles = (size3)sb.f_favail;
3778         resp->resok.invarsec = 0;
3779 
3780         DTRACE_NFSV3_4(op__fsstat__done, struct svc_req *, req,
3781             cred_t *, cr, vnode_t *, vp, FSSTAT3res *, resp);
3782         VN_RELE(vp);
3783 
3784         return;
3785 
3786 out:
3787         if (curthread->t_flag & T_WOULDBLOCK) {
3788                 curthread->t_flag &= ~T_WOULDBLOCK;
3789                 resp->status = NFS3ERR_JUKEBOX;
3790         } else
3791                 resp->status = puterrno3(error);
3792 out1:
3793         DTRACE_NFSV3_4(op__fsstat__done, struct svc_req *, req,
3794             cred_t *, cr, vnode_t *, vp, FSSTAT3res *, resp);
3795 
3796         if (vp != NULL)
3797                 VN_RELE(vp);
3798         vattr_to_post_op_attr(vap, &resp->resfail.obj_attributes);
3799 }
3800 
3801 void *
3802 rfs3_fsstat_getfh(FSSTAT3args *args)
3803 {
3804 
3805         return (&args->fsroot);
3806 }
3807 
3808 void
3809 rfs3_fsinfo(FSINFO3args *args, FSINFO3res *resp, struct exportinfo *exi,
3810         struct svc_req *req, cred_t *cr)
3811 {
3812         vnode_t *vp;
3813         struct vattr *vap;
3814         struct vattr va;
3815         uint32_t xfer_size;
3816         ulong_t l = 0;
3817         int error;
3818 
3819         vp = nfs3_fhtovp(&args->fsroot, exi);
3820 
3821         DTRACE_NFSV3_4(op__fsinfo__start, struct svc_req *, req,
3822             cred_t *, cr, vnode_t *, vp, FSINFO3args *, args);
3823 
3824         if (vp == NULL) {
3825                 if (curthread->t_flag & T_WOULDBLOCK) {
3826                         curthread->t_flag &= ~T_WOULDBLOCK;
3827                         resp->status = NFS3ERR_JUKEBOX;
3828                 } else
3829                         resp->status = NFS3ERR_STALE;
3830                 vattr_to_post_op_attr(NULL, &resp->resfail.obj_attributes);
3831                 goto out;
3832         }
3833 
3834         if (is_system_labeled()) {
3835                 bslabel_t *clabel = req->rq_label;
3836 
3837                 ASSERT(clabel != NULL);
3838                 DTRACE_PROBE2(tx__rfs3__log__info__opfsinfo__clabel, char *,
3839                     "got client label from request(1)", struct svc_req *, req);
3840 
3841                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
3842                         if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
3843                             exi)) {
3844                                 resp->status = NFS3ERR_STALE;
3845                                 vattr_to_post_op_attr(NULL,
3846                                     &resp->resfail.obj_attributes);
3847                                 goto out;
3848                         }
3849                 }
3850         }
3851 
3852         va.va_mask = AT_ALL;
3853         vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3854 
3855         resp->status = NFS3_OK;
3856         vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
3857         xfer_size = rfs3_tsize(req);
3858         resp->resok.rtmax = xfer_size;
3859         resp->resok.rtpref = xfer_size;
3860         resp->resok.rtmult = DEV_BSIZE;
3861         resp->resok.wtmax = xfer_size;
3862         resp->resok.wtpref = xfer_size;
3863         resp->resok.wtmult = DEV_BSIZE;
3864         resp->resok.dtpref = MAXBSIZE;
3865 
3866         /*
3867          * Large file spec: want maxfilesize based on limit of
3868          * underlying filesystem.  We can guess 2^31-1 if need be.
3869          */
3870         error = VOP_PATHCONF(vp, _PC_FILESIZEBITS, &l, cr, NULL);
3871         if (error) {
3872                 resp->status = puterrno3(error);
3873                 goto out;
3874         }
3875 
3876         /*
3877          * If the underlying file system does not support _PC_FILESIZEBITS,
3878          * return a reasonable default. Note that error code on VOP_PATHCONF
3879          * will be 0, even if the underlying file system does not support
3880          * _PC_FILESIZEBITS.
3881          */
3882         if (l == (ulong_t)-1) {
3883                 resp->resok.maxfilesize = MAXOFF32_T;
3884         } else {
3885                 if (l >= (sizeof (uint64_t) * 8))
3886                         resp->resok.maxfilesize = INT64_MAX;
3887                 else
3888                         resp->resok.maxfilesize = (1LL << (l-1)) - 1;
3889         }
3890 
3891         resp->resok.time_delta.seconds = 0;
3892         resp->resok.time_delta.nseconds = 1000;
3893         resp->resok.properties = FSF3_LINK | FSF3_SYMLINK |
3894             FSF3_HOMOGENEOUS | FSF3_CANSETTIME;
3895 
3896         DTRACE_NFSV3_4(op__fsinfo__done, struct svc_req *, req,
3897             cred_t *, cr, vnode_t *, vp, FSINFO3res *, resp);
3898 
3899         VN_RELE(vp);
3900 
3901         return;
3902 
3903 out:
3904         DTRACE_NFSV3_4(op__fsinfo__done, struct svc_req *, req,
3905             cred_t *, cr, vnode_t *, NULL, FSINFO3res *, resp);
3906         if (vp != NULL)
3907                 VN_RELE(vp);
3908 }
3909 
3910 void *
3911 rfs3_fsinfo_getfh(FSINFO3args *args)
3912 {
3913 
3914         return (&args->fsroot);
3915 }
3916 
3917 /* ARGSUSED */
3918 void
3919 rfs3_pathconf(PATHCONF3args *args, PATHCONF3res *resp, struct exportinfo *exi,
3920         struct svc_req *req, cred_t *cr)
3921 {
3922         int error;
3923         vnode_t *vp;
3924         struct vattr *vap;
3925         struct vattr va;
3926         ulong_t val;
3927 
3928         vap = NULL;
3929 
3930         vp = nfs3_fhtovp(&args->object, exi);
3931 
3932         DTRACE_NFSV3_4(op__pathconf__start, struct svc_req *, req,
3933             cred_t *, cr, vnode_t *, vp, PATHCONF3args *, args);
3934 
3935         if (vp == NULL) {
3936                 error = ESTALE;
3937                 goto out;
3938         }
3939 
3940         if (is_system_labeled()) {
3941                 bslabel_t *clabel = req->rq_label;
3942 
3943                 ASSERT(clabel != NULL);
3944                 DTRACE_PROBE2(tx__rfs3__log__info__oppathconf__clabel, char *,
3945                     "got client label from request(1)", struct svc_req *, req);
3946 
3947                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
3948                         if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
3949                             exi)) {
3950                                 resp->status = NFS3ERR_ACCES;
3951                                 goto out1;
3952                         }
3953                 }
3954         }
3955 
3956         va.va_mask = AT_ALL;
3957         vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3958 
3959         error = VOP_PATHCONF(vp, _PC_LINK_MAX, &val, cr, NULL);
3960         if (error)
3961                 goto out;
3962         resp->resok.info.link_max = (uint32)val;
3963 
3964         error = VOP_PATHCONF(vp, _PC_NAME_MAX, &val, cr, NULL);
3965         if (error)
3966                 goto out;
3967         resp->resok.info.name_max = (uint32)val;
3968 
3969         error = VOP_PATHCONF(vp, _PC_NO_TRUNC, &val, cr, NULL);
3970         if (error)
3971                 goto out;
3972         if (val == 1)
3973                 resp->resok.info.no_trunc = TRUE;
3974         else
3975                 resp->resok.info.no_trunc = FALSE;
3976 
3977         error = VOP_PATHCONF(vp, _PC_CHOWN_RESTRICTED, &val, cr, NULL);
3978         if (error)
3979                 goto out;
3980         if (val == 1)
3981                 resp->resok.info.chown_restricted = TRUE;
3982         else
3983                 resp->resok.info.chown_restricted = FALSE;
3984 
3985         resp->status = NFS3_OK;
3986         vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
3987         resp->resok.info.case_insensitive = FALSE;
3988         resp->resok.info.case_preserving = TRUE;
3989         DTRACE_NFSV3_4(op__pathconf__done, struct svc_req *, req,
3990             cred_t *, cr, vnode_t *, vp, PATHCONF3res *, resp);
3991         VN_RELE(vp);
3992         return;
3993 
3994 out:
3995         if (curthread->t_flag & T_WOULDBLOCK) {
3996                 curthread->t_flag &= ~T_WOULDBLOCK;
3997                 resp->status = NFS3ERR_JUKEBOX;
3998         } else
3999                 resp->status = puterrno3(error);
4000 out1:
4001         DTRACE_NFSV3_4(op__pathconf__done, struct svc_req *, req,
4002             cred_t *, cr, vnode_t *, vp, PATHCONF3res *, resp);
4003         if (vp != NULL)
4004                 VN_RELE(vp);
4005         vattr_to_post_op_attr(vap, &resp->resfail.obj_attributes);
4006 }
4007 
4008 void *
4009 rfs3_pathconf_getfh(PATHCONF3args *args)
4010 {
4011 
4012         return (&args->object);
4013 }
4014 
4015 void
4016 rfs3_commit(COMMIT3args *args, COMMIT3res *resp, struct exportinfo *exi,
4017         struct svc_req *req, cred_t *cr)
4018 {
4019         int error;
4020         vnode_t *vp;
4021         struct vattr *bvap;
4022         struct vattr bva;
4023         struct vattr *avap;
4024         struct vattr ava;
4025 
4026         bvap = NULL;
4027         avap = NULL;
4028 
4029         vp = nfs3_fhtovp(&args->file, exi);
4030 
4031         DTRACE_NFSV3_4(op__commit__start, struct svc_req *, req,
4032             cred_t *, cr, vnode_t *, vp, COMMIT3args *, args);
4033 
4034         if (vp == NULL) {
4035                 error = ESTALE;
4036                 goto out;
4037         }
4038 
4039         bva.va_mask = AT_ALL;
4040         error = VOP_GETATTR(vp, &bva, 0, cr, NULL);
4041 
4042         /*
4043          * If we can't get the attributes, then we can't do the
4044          * right access checking.  So, we'll fail the request.
4045          */
4046         if (error)
4047                 goto out;
4048 
4049         bvap = &bva;
4050 
4051         if (rdonly(exi, req)) {
4052                 resp->status = NFS3ERR_ROFS;
4053                 goto out1;
4054         }
4055 
4056         if (vp->v_type != VREG) {
4057                 resp->status = NFS3ERR_INVAL;
4058                 goto out1;
4059         }
4060 
4061         if (is_system_labeled()) {
4062                 bslabel_t *clabel = req->rq_label;
4063 
4064                 ASSERT(clabel != NULL);
4065                 DTRACE_PROBE2(tx__rfs3__log__info__opcommit__clabel, char *,
4066                     "got client label from request(1)", struct svc_req *, req);
4067 
4068                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
4069                         if (!do_rfs_label_check(clabel, vp, EQUALITY_CHECK,
4070                             exi)) {
4071                                 resp->status = NFS3ERR_ACCES;
4072                                 goto out1;
4073                         }
4074                 }
4075         }
4076 
4077         if (crgetuid(cr) != bva.va_uid &&
4078             (error = VOP_ACCESS(vp, VWRITE, 0, cr, NULL)))
4079                 goto out;
4080 
4081         error = VOP_FSYNC(vp, FSYNC, cr, NULL);
4082 
4083         ava.va_mask = AT_ALL;
4084         avap = VOP_GETATTR(vp, &ava, 0, cr, NULL) ? NULL : &ava;
4085 
4086         if (error)
4087                 goto out;
4088 
4089         resp->status = NFS3_OK;
4090         vattr_to_wcc_data(bvap, avap, &resp->resok.file_wcc);
4091         resp->resok.verf = write3verf;
4092 
4093         DTRACE_NFSV3_4(op__commit__done, struct svc_req *, req,
4094             cred_t *, cr, vnode_t *, vp, COMMIT3res *, resp);
4095 
4096         VN_RELE(vp);
4097 
4098         return;
4099 
4100 out:
4101         if (curthread->t_flag & T_WOULDBLOCK) {
4102                 curthread->t_flag &= ~T_WOULDBLOCK;
4103                 resp->status = NFS3ERR_JUKEBOX;
4104         } else
4105                 resp->status = puterrno3(error);
4106 out1:
4107         DTRACE_NFSV3_4(op__commit__done, struct svc_req *, req,
4108             cred_t *, cr, vnode_t *, vp, COMMIT3res *, resp);
4109 
4110         if (vp != NULL)
4111                 VN_RELE(vp);
4112         vattr_to_wcc_data(bvap, avap, &resp->resfail.file_wcc);
4113 }
4114 
4115 void *
4116 rfs3_commit_getfh(COMMIT3args *args)
4117 {
4118 
4119         return (&args->file);
4120 }
4121 
4122 static int
4123 sattr3_to_vattr(sattr3 *sap, struct vattr *vap)
4124 {
4125 
4126         vap->va_mask = 0;
4127 
4128         if (sap->mode.set_it) {
4129                 vap->va_mode = (mode_t)sap->mode.mode;
4130                 vap->va_mask |= AT_MODE;
4131         }
4132         if (sap->uid.set_it) {
4133                 vap->va_uid = (uid_t)sap->uid.uid;
4134                 vap->va_mask |= AT_UID;
4135         }
4136         if (sap->gid.set_it) {
4137                 vap->va_gid = (gid_t)sap->gid.gid;
4138                 vap->va_mask |= AT_GID;
4139         }
4140         if (sap->size.set_it) {
4141                 if (sap->size.size > (size3)((u_longlong_t)-1))
4142                         return (EINVAL);
4143                 vap->va_size = sap->size.size;
4144                 vap->va_mask |= AT_SIZE;
4145         }
4146         if (sap->atime.set_it == SET_TO_CLIENT_TIME) {
4147 #ifndef _LP64
4148                 /* check time validity */
4149                 if (!NFS3_TIME_OK(sap->atime.atime.seconds))
4150                         return (EOVERFLOW);
4151 #endif
4152                 /*
4153                  * nfs protocol defines times as unsigned so don't extend sign,
4154                  * unless sysadmin set nfs_allow_preepoch_time.
4155                  */
4156                 NFS_TIME_T_CONVERT(vap->va_atime.tv_sec,
4157                     sap->atime.atime.seconds);
4158                 vap->va_atime.tv_nsec = (uint32_t)sap->atime.atime.nseconds;
4159                 vap->va_mask |= AT_ATIME;
4160         } else if (sap->atime.set_it == SET_TO_SERVER_TIME) {
4161                 gethrestime(&vap->va_atime);
4162                 vap->va_mask |= AT_ATIME;
4163         }
4164         if (sap->mtime.set_it == SET_TO_CLIENT_TIME) {
4165 #ifndef _LP64
4166                 /* check time validity */
4167                 if (!NFS3_TIME_OK(sap->mtime.mtime.seconds))
4168                         return (EOVERFLOW);
4169 #endif
4170                 /*
4171                  * nfs protocol defines times as unsigned so don't extend sign,
4172                  * unless sysadmin set nfs_allow_preepoch_time.
4173                  */
4174                 NFS_TIME_T_CONVERT(vap->va_mtime.tv_sec,
4175                     sap->mtime.mtime.seconds);
4176                 vap->va_mtime.tv_nsec = (uint32_t)sap->mtime.mtime.nseconds;
4177                 vap->va_mask |= AT_MTIME;
4178         } else if (sap->mtime.set_it == SET_TO_SERVER_TIME) {
4179                 gethrestime(&vap->va_mtime);
4180                 vap->va_mask |= AT_MTIME;
4181         }
4182 
4183         return (0);
4184 }
4185 
4186 static ftype3 vt_to_nf3[] = {
4187         0, NF3REG, NF3DIR, NF3BLK, NF3CHR, NF3LNK, NF3FIFO, 0, 0, NF3SOCK, 0
4188 };
4189 
4190 static int
4191 vattr_to_fattr3(struct vattr *vap, fattr3 *fap)
4192 {
4193 
4194         ASSERT(vap->va_type >= VNON && vap->va_type <= VBAD);
4195         /* Return error if time or size overflow */
4196         if (! (NFS_VAP_TIME_OK(vap) && NFS3_SIZE_OK(vap->va_size))) {
4197                 return (EOVERFLOW);
4198         }
4199         fap->type = vt_to_nf3[vap->va_type];
4200         fap->mode = (mode3)(vap->va_mode & MODEMASK);
4201         fap->nlink = (uint32)vap->va_nlink;
4202         if (vap->va_uid == UID_NOBODY)
4203                 fap->uid = (uid3)NFS_UID_NOBODY;
4204         else
4205                 fap->uid = (uid3)vap->va_uid;
4206         if (vap->va_gid == GID_NOBODY)
4207                 fap->gid = (gid3)NFS_GID_NOBODY;
4208         else
4209                 fap->gid = (gid3)vap->va_gid;
4210         fap->size = (size3)vap->va_size;
4211         fap->used = (size3)DEV_BSIZE * (size3)vap->va_nblocks;
4212         fap->rdev.specdata1 = (uint32)getmajor(vap->va_rdev);
4213         fap->rdev.specdata2 = (uint32)getminor(vap->va_rdev);
4214         fap->fsid = (uint64)vap->va_fsid;
4215         fap->fileid = (fileid3)vap->va_nodeid;
4216         fap->atime.seconds = vap->va_atime.tv_sec;
4217         fap->atime.nseconds = vap->va_atime.tv_nsec;
4218         fap->mtime.seconds = vap->va_mtime.tv_sec;
4219         fap->mtime.nseconds = vap->va_mtime.tv_nsec;
4220         fap->ctime.seconds = vap->va_ctime.tv_sec;
4221         fap->ctime.nseconds = vap->va_ctime.tv_nsec;
4222         return (0);
4223 }
4224 
4225 static int
4226 vattr_to_wcc_attr(struct vattr *vap, wcc_attr *wccap)
4227 {
4228 
4229         /* Return error if time or size overflow */
4230         if (!(NFS_TIME_T_OK(vap->va_mtime.tv_sec) &&
4231             NFS_TIME_T_OK(vap->va_ctime.tv_sec) &&
4232             NFS3_SIZE_OK(vap->va_size))) {
4233                 return (EOVERFLOW);
4234         }
4235         wccap->size = (size3)vap->va_size;
4236         wccap->mtime.seconds = vap->va_mtime.tv_sec;
4237         wccap->mtime.nseconds = vap->va_mtime.tv_nsec;
4238         wccap->ctime.seconds = vap->va_ctime.tv_sec;
4239         wccap->ctime.nseconds = vap->va_ctime.tv_nsec;
4240         return (0);
4241 }
4242 
4243 static void
4244 vattr_to_pre_op_attr(struct vattr *vap, pre_op_attr *poap)
4245 {
4246 
4247         /* don't return attrs if time overflow */
4248         if ((vap != NULL) && !vattr_to_wcc_attr(vap, &poap->attr)) {
4249                 poap->attributes = TRUE;
4250         } else
4251                 poap->attributes = FALSE;
4252 }
4253 
4254 void
4255 vattr_to_post_op_attr(struct vattr *vap, post_op_attr *poap)
4256 {
4257 
4258         /* don't return attrs if time overflow */
4259         if ((vap != NULL) && !vattr_to_fattr3(vap, &poap->attr)) {
4260                 poap->attributes = TRUE;
4261         } else
4262                 poap->attributes = FALSE;
4263 }
4264 
4265 static void
4266 vattr_to_wcc_data(struct vattr *bvap, struct vattr *avap, wcc_data *wccp)
4267 {
4268 
4269         vattr_to_pre_op_attr(bvap, &wccp->before);
4270         vattr_to_post_op_attr(avap, &wccp->after);
4271 }
4272 
4273 void
4274 rfs3_srvrinit(void)
4275 {
4276         struct rfs3_verf_overlay {
4277                 uint_t id; /* a "unique" identifier */
4278                 int ts; /* a unique timestamp */
4279         } *verfp;
4280         timestruc_t now;
4281 
4282         /*
4283          * The following algorithm attempts to find a unique verifier
4284          * to be used as the write verifier returned from the server
4285          * to the client.  It is important that this verifier change
4286          * whenever the server reboots.  Of secondary importance, it
4287          * is important for the verifier to be unique between two
4288          * different servers.
4289          *
4290          * Thus, an attempt is made to use the system hostid and the
4291          * current time in seconds when the nfssrv kernel module is
4292          * loaded.  It is assumed that an NFS server will not be able
4293          * to boot and then to reboot in less than a second.  If the
4294          * hostid has not been set, then the current high resolution
4295          * time is used.  This will ensure different verifiers each
4296          * time the server reboots and minimize the chances that two
4297          * different servers will have the same verifier.
4298          */
4299 
4300 #ifndef lint
4301         /*
4302          * We ASSERT that this constant logic expression is
4303          * always true because in the past, it wasn't.
4304          */
4305         ASSERT(sizeof (*verfp) <= sizeof (write3verf));
4306 #endif
4307 
4308         gethrestime(&now);
4309         verfp = (struct rfs3_verf_overlay *)&write3verf;
4310         verfp->ts = (int)now.tv_sec;
4311         verfp->id = zone_get_hostid(NULL);
4312 
4313         if (verfp->id == 0)
4314                 verfp->id = (uint_t)now.tv_nsec;
4315 
4316         nfs3_srv_caller_id = fs_new_caller_id();
4317 
4318 }
4319 
4320 static int
4321 rdma_setup_read_data3(READ3args *args, READ3resok *rok)
4322 {
4323         struct clist    *wcl;
4324         int             wlist_len;
4325         count3          count = rok->count;
4326 
4327         wcl = args->wlist;
4328         if (rdma_setup_read_chunks(wcl, count, &wlist_len) == FALSE) {
4329                 return (FALSE);
4330         }
4331 
4332         wcl = args->wlist;
4333         rok->wlist_len = wlist_len;
4334         rok->wlist = wcl;
4335         return (TRUE);
4336 }
4337 
4338 void
4339 rfs3_srvrfini(void)
4340 {
4341         /* Nothing to do */
4342 }