1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright (c) 1994, 2010, Oracle and/or its affiliates. All rights reserved.
  23  *
  24  * Copyright 2013 Nexenta Systems, Inc.  All rights reserved.
  25  */
  26 
  27 /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */
  28 /* All Rights Reserved */
  29 
  30 #include <sys/param.h>
  31 #include <sys/types.h>
  32 #include <sys/systm.h>
  33 #include <sys/cred.h>
  34 #include <sys/buf.h>
  35 #include <sys/vfs.h>
  36 #include <sys/vnode.h>
  37 #include <sys/uio.h>
  38 #include <sys/errno.h>
  39 #include <sys/sysmacros.h>
  40 #include <sys/statvfs.h>
  41 #include <sys/kmem.h>
  42 #include <sys/dirent.h>
  43 #include <sys/cmn_err.h>
  44 #include <sys/debug.h>
  45 #include <sys/systeminfo.h>
  46 #include <sys/flock.h>
  47 #include <sys/nbmlock.h>
  48 #include <sys/policy.h>
  49 #include <sys/sdt.h>
  50 
  51 #include <rpc/types.h>
  52 #include <rpc/auth.h>
  53 #include <rpc/svc.h>
  54 #include <rpc/rpc_rdma.h>
  55 
  56 #include <nfs/nfs.h>
  57 #include <nfs/export.h>
  58 #include <nfs/nfs_cmd.h>
  59 
  60 #include <sys/strsubr.h>
  61 
  62 #include <sys/tsol/label.h>
  63 #include <sys/tsol/tndb.h>
  64 
  65 #include <sys/zone.h>
  66 
  67 #include <inet/ip.h>
  68 #include <inet/ip6.h>
  69 
  70 /*
  71  * These are the interface routines for the server side of the
  72  * Network File System.  See the NFS version 3 protocol specification
  73  * for a description of this interface.
  74  */
  75 
  76 static writeverf3 write3verf;
  77 
  78 static int      sattr3_to_vattr(sattr3 *, struct vattr *);
  79 static int      vattr_to_fattr3(struct vattr *, fattr3 *);
  80 static int      vattr_to_wcc_attr(struct vattr *, wcc_attr *);
  81 static void     vattr_to_pre_op_attr(struct vattr *, pre_op_attr *);
  82 static void     vattr_to_wcc_data(struct vattr *, struct vattr *, wcc_data *);
  83 static int      rdma_setup_read_data3(READ3args *, READ3resok *);
  84 
  85 extern int nfs_loaned_buffers;
  86 
  87 u_longlong_t nfs3_srv_caller_id;
  88 
  89 /* ARGSUSED */
  90 void
  91 rfs3_getattr(GETATTR3args *args, GETATTR3res *resp, struct exportinfo *exi,
  92         struct svc_req *req, cred_t *cr)
  93 {
  94         int error;
  95         vnode_t *vp;
  96         struct vattr va;
  97 
  98         vp = nfs3_fhtovp(&args->object, exi);
  99 
 100         DTRACE_NFSV3_4(op__getattr__start, struct svc_req *, req,
 101             cred_t *, cr, vnode_t *, vp, GETATTR3args *, args);
 102 
 103         if (vp == NULL) {
 104                 error = ESTALE;
 105                 goto out;
 106         }
 107 
 108         va.va_mask = AT_ALL;
 109         error = rfs4_delegated_getattr(vp, &va, 0, cr);
 110 
 111         if (!error) {
 112                 /* Lie about the object type for a referral */
 113                 if (vn_is_nfs_reparse(vp, cr))
 114                         va.va_type = VLNK;
 115 
 116                 /* overflow error if time or size is out of range */
 117                 error = vattr_to_fattr3(&va, &resp->resok.obj_attributes);
 118                 if (error)
 119                         goto out;
 120                 resp->status = NFS3_OK;
 121 
 122                 DTRACE_NFSV3_4(op__getattr__done, struct svc_req *, req,
 123                     cred_t *, cr, vnode_t *, vp, GETATTR3res *, resp);
 124 
 125                 VN_RELE(vp);
 126 
 127                 return;
 128         }
 129 
 130 out:
 131         if (curthread->t_flag & T_WOULDBLOCK) {
 132                 curthread->t_flag &= ~T_WOULDBLOCK;
 133                 resp->status = NFS3ERR_JUKEBOX;
 134         } else
 135                 resp->status = puterrno3(error);
 136 
 137         DTRACE_NFSV3_4(op__getattr__done, struct svc_req *, req,
 138             cred_t *, cr, vnode_t *, vp, GETATTR3res *, resp);
 139 
 140         if (vp != NULL)
 141                 VN_RELE(vp);
 142 }
 143 
 144 void *
 145 rfs3_getattr_getfh(GETATTR3args *args)
 146 {
 147 
 148         return (&args->object);
 149 }
 150 
 151 void
 152 rfs3_setattr(SETATTR3args *args, SETATTR3res *resp, struct exportinfo *exi,
 153         struct svc_req *req, cred_t *cr)
 154 {
 155         int error;
 156         vnode_t *vp;
 157         struct vattr *bvap;
 158         struct vattr bva;
 159         struct vattr *avap;
 160         struct vattr ava;
 161         int flag;
 162         int in_crit = 0;
 163         struct flock64 bf;
 164         caller_context_t ct;
 165 
 166         bvap = NULL;
 167         avap = NULL;
 168 
 169         vp = nfs3_fhtovp(&args->object, exi);
 170 
 171         DTRACE_NFSV3_4(op__setattr__start, struct svc_req *, req,
 172             cred_t *, cr, vnode_t *, vp, SETATTR3args *, args);
 173 
 174         if (vp == NULL) {
 175                 error = ESTALE;
 176                 goto out;
 177         }
 178 
 179         error = sattr3_to_vattr(&args->new_attributes, &ava);
 180         if (error)
 181                 goto out;
 182 
 183         if (is_system_labeled()) {
 184                 bslabel_t *clabel = req->rq_label;
 185 
 186                 ASSERT(clabel != NULL);
 187                 DTRACE_PROBE2(tx__rfs3__log__info__opsetattr__clabel, char *,
 188                     "got client label from request(1)", struct svc_req *, req);
 189 
 190                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
 191                         if (!do_rfs_label_check(clabel, vp, EQUALITY_CHECK,
 192                             exi)) {
 193                                 resp->status = NFS3ERR_ACCES;
 194                                 goto out1;
 195                         }
 196                 }
 197         }
 198 
 199         /*
 200          * We need to specially handle size changes because of
 201          * possible conflicting NBMAND locks. Get into critical
 202          * region before VOP_GETATTR, so the size attribute is
 203          * valid when checking conflicts.
 204          *
 205          * Also, check to see if the v4 side of the server has
 206          * delegated this file.  If so, then we return JUKEBOX to
 207          * allow the client to retrasmit its request.
 208          */
 209         if (vp->v_type == VREG && (ava.va_mask & AT_SIZE)) {
 210                 if (nbl_need_check(vp)) {
 211                         nbl_start_crit(vp, RW_READER);
 212                         in_crit = 1;
 213                 }
 214         }
 215 
 216         bva.va_mask = AT_ALL;
 217         error = rfs4_delegated_getattr(vp, &bva, 0, cr);
 218 
 219         /*
 220          * If we can't get the attributes, then we can't do the
 221          * right access checking.  So, we'll fail the request.
 222          */
 223         if (error)
 224                 goto out;
 225 
 226         bvap = &bva;
 227 
 228         if (rdonly(exi, req) || vn_is_readonly(vp)) {
 229                 resp->status = NFS3ERR_ROFS;
 230                 goto out1;
 231         }
 232 
 233         if (args->guard.check &&
 234             (args->guard.obj_ctime.seconds != bva.va_ctime.tv_sec ||
 235             args->guard.obj_ctime.nseconds != bva.va_ctime.tv_nsec)) {
 236                 resp->status = NFS3ERR_NOT_SYNC;
 237                 goto out1;
 238         }
 239 
 240         if (args->new_attributes.mtime.set_it == SET_TO_CLIENT_TIME)
 241                 flag = ATTR_UTIME;
 242         else
 243                 flag = 0;
 244 
 245         /*
 246          * If the filesystem is exported with nosuid, then mask off
 247          * the setuid and setgid bits.
 248          */
 249         if ((ava.va_mask & AT_MODE) && vp->v_type == VREG &&
 250             (exi->exi_export.ex_flags & EX_NOSUID))
 251                 ava.va_mode &= ~(VSUID | VSGID);
 252 
 253         ct.cc_sysid = 0;
 254         ct.cc_pid = 0;
 255         ct.cc_caller_id = nfs3_srv_caller_id;
 256         ct.cc_flags = CC_DONTBLOCK;
 257 
 258         /*
 259          * We need to specially handle size changes because it is
 260          * possible for the client to create a file with modes
 261          * which indicate read-only, but with the file opened for
 262          * writing.  If the client then tries to set the size of
 263          * the file, then the normal access checking done in
 264          * VOP_SETATTR would prevent the client from doing so,
 265          * although it should be legal for it to do so.  To get
 266          * around this, we do the access checking for ourselves
 267          * and then use VOP_SPACE which doesn't do the access
 268          * checking which VOP_SETATTR does. VOP_SPACE can only
 269          * operate on VREG files, let VOP_SETATTR handle the other
 270          * extremely rare cases.
 271          * Also the client should not be allowed to change the
 272          * size of the file if there is a conflicting non-blocking
 273          * mandatory lock in the region the change.
 274          */
 275         if (vp->v_type == VREG && (ava.va_mask & AT_SIZE)) {
 276                 if (in_crit) {
 277                         u_offset_t offset;
 278                         ssize_t length;
 279 
 280                         if (ava.va_size < bva.va_size) {
 281                                 offset = ava.va_size;
 282                                 length = bva.va_size - ava.va_size;
 283                         } else {
 284                                 offset = bva.va_size;
 285                                 length = ava.va_size - bva.va_size;
 286                         }
 287                         if (nbl_conflict(vp, NBL_WRITE, offset, length, 0,
 288                             NULL)) {
 289                                 error = EACCES;
 290                                 goto out;
 291                         }
 292                 }
 293 
 294                 if (crgetuid(cr) == bva.va_uid && ava.va_size != bva.va_size) {
 295                         ava.va_mask &= ~AT_SIZE;
 296                         bf.l_type = F_WRLCK;
 297                         bf.l_whence = 0;
 298                         bf.l_start = (off64_t)ava.va_size;
 299                         bf.l_len = 0;
 300                         bf.l_sysid = 0;
 301                         bf.l_pid = 0;
 302                         error = VOP_SPACE(vp, F_FREESP, &bf, FWRITE,
 303                             (offset_t)ava.va_size, cr, &ct);
 304                 }
 305         }
 306 
 307         if (!error && ava.va_mask)
 308                 error = VOP_SETATTR(vp, &ava, flag, cr, &ct);
 309 
 310         /* check if a monitor detected a delegation conflict */
 311         if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
 312                 resp->status = NFS3ERR_JUKEBOX;
 313                 goto out1;
 314         }
 315 
 316         ava.va_mask = AT_ALL;
 317         avap = rfs4_delegated_getattr(vp, &ava, 0, cr) ? NULL : &ava;
 318 
 319         /*
 320          * Force modified metadata out to stable storage.
 321          */
 322         (void) VOP_FSYNC(vp, FNODSYNC, cr, &ct);
 323 
 324         if (error)
 325                 goto out;
 326 
 327         if (in_crit)
 328                 nbl_end_crit(vp);
 329 
 330         resp->status = NFS3_OK;
 331         vattr_to_wcc_data(bvap, avap, &resp->resok.obj_wcc);
 332 
 333         DTRACE_NFSV3_4(op__setattr__done, struct svc_req *, req,
 334             cred_t *, cr, vnode_t *, vp, SETATTR3res *, resp);
 335 
 336         VN_RELE(vp);
 337 
 338         return;
 339 
 340 out:
 341         if (curthread->t_flag & T_WOULDBLOCK) {
 342                 curthread->t_flag &= ~T_WOULDBLOCK;
 343                 resp->status = NFS3ERR_JUKEBOX;
 344         } else
 345                 resp->status = puterrno3(error);
 346 out1:
 347         DTRACE_NFSV3_4(op__setattr__done, struct svc_req *, req,
 348             cred_t *, cr, vnode_t *, vp, SETATTR3res *, resp);
 349 
 350         if (vp != NULL) {
 351                 if (in_crit)
 352                         nbl_end_crit(vp);
 353                 VN_RELE(vp);
 354         }
 355         vattr_to_wcc_data(bvap, avap, &resp->resfail.obj_wcc);
 356 }
 357 
 358 void *
 359 rfs3_setattr_getfh(SETATTR3args *args)
 360 {
 361 
 362         return (&args->object);
 363 }
 364 
 365 /* ARGSUSED */
 366 void
 367 rfs3_lookup(LOOKUP3args *args, LOOKUP3res *resp, struct exportinfo *exi,
 368         struct svc_req *req, cred_t *cr)
 369 {
 370         int error;
 371         vnode_t *vp;
 372         vnode_t *dvp;
 373         struct vattr *vap;
 374         struct vattr va;
 375         struct vattr *dvap;
 376         struct vattr dva;
 377         nfs_fh3 *fhp;
 378         struct sec_ol sec = {0, 0};
 379         bool_t publicfh_flag = FALSE, auth_weak = FALSE;
 380         struct sockaddr *ca;
 381         char *name = NULL;
 382 
 383         dvap = NULL;
 384 
 385         /* Take an extra hold here in case of 'exi' switching */
 386         if (exi != NULL)
 387                 exi_hold(exi);
 388 
 389         /*
 390          * Allow lookups from the root - the default
 391          * location of the public filehandle.
 392          */
 393         if (exi != NULL && (exi->exi_export.ex_flags & EX_PUBLIC)) {
 394                 dvp = rootdir;
 395                 VN_HOLD(dvp);
 396 
 397                 DTRACE_NFSV3_4(op__lookup__start, struct svc_req *, req,
 398                     cred_t *, cr, vnode_t *, dvp, LOOKUP3args *, args);
 399         } else {
 400                 dvp = nfs3_fhtovp(&args->what.dir, exi);
 401 
 402                 DTRACE_NFSV3_4(op__lookup__start, struct svc_req *, req,
 403                     cred_t *, cr, vnode_t *, dvp, LOOKUP3args *, args);
 404 
 405                 if (dvp == NULL) {
 406                         error = ESTALE;
 407                         goto out;
 408                 }
 409         }
 410 
 411         dva.va_mask = AT_ALL;
 412         dvap = VOP_GETATTR(dvp, &dva, 0, cr, NULL) ? NULL : &dva;
 413 
 414         if (args->what.name == nfs3nametoolong) {
 415                 resp->status = NFS3ERR_NAMETOOLONG;
 416                 goto out1;
 417         }
 418 
 419         if (args->what.name == NULL || *(args->what.name) == '\0') {
 420                 resp->status = NFS3ERR_ACCES;
 421                 goto out1;
 422         }
 423 
 424         fhp = &args->what.dir;
 425         if (strcmp(args->what.name, "..") == 0 &&
 426             EQFID(&exi->exi_fid, FH3TOFIDP(fhp))) {
 427                 resp->status = NFS3ERR_NOENT;
 428                 goto out1;
 429         }
 430 
 431         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
 432         name = nfscmd_convname(ca, exi, args->what.name,
 433             NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
 434 
 435         if (name == NULL) {
 436                 resp->status = NFS3ERR_ACCES;
 437                 goto out1;
 438         }
 439 
 440         /*
 441          * If the public filehandle is used then allow
 442          * a multi-component lookup
 443          */
 444         if (PUBLIC_FH3(&args->what.dir)) {
 445                 struct exportinfo *new;
 446 
 447                 publicfh_flag = TRUE;
 448 
 449                 error = rfs_publicfh_mclookup(name, dvp, cr, &vp,
 450                     &new, &sec);
 451 
 452                 if (error == 0) {
 453                         exi_rele(exi);
 454                         exi = new;
 455                 }
 456 
 457                 /*
 458                  * Since WebNFS may bypass MOUNT, we need to ensure this
 459                  * request didn't come from an unlabeled admin_low client.
 460                  */
 461                 if (is_system_labeled() && error == 0) {
 462                         int             addr_type;
 463                         void            *ipaddr;
 464                         tsol_tpc_t      *tp;
 465 
 466                         if (ca->sa_family == AF_INET) {
 467                                 addr_type = IPV4_VERSION;
 468                                 ipaddr = &((struct sockaddr_in *)ca)->sin_addr;
 469                         } else if (ca->sa_family == AF_INET6) {
 470                                 addr_type = IPV6_VERSION;
 471                                 ipaddr = &((struct sockaddr_in6 *)
 472                                     ca)->sin6_addr;
 473                         }
 474                         tp = find_tpc(ipaddr, addr_type, B_FALSE);
 475                         if (tp == NULL || tp->tpc_tp.tp_doi !=
 476                             l_admin_low->tsl_doi || tp->tpc_tp.host_type !=
 477                             SUN_CIPSO) {
 478                                 VN_RELE(vp);
 479                                 resp->status = NFS3ERR_ACCES;
 480                                 error = 1;
 481                         }
 482                         if (tp != NULL)
 483                                 TPC_RELE(tp);
 484                 }
 485         } else {
 486                 error = VOP_LOOKUP(dvp, name, &vp,
 487                     NULL, 0, NULL, cr, NULL, NULL, NULL);
 488         }
 489 
 490         if (name != args->what.name)
 491                 kmem_free(name, MAXPATHLEN + 1);
 492 
 493         if (is_system_labeled() && error == 0) {
 494                 bslabel_t *clabel = req->rq_label;
 495 
 496                 ASSERT(clabel != NULL);
 497                 DTRACE_PROBE2(tx__rfs3__log__info__oplookup__clabel, char *,
 498                     "got client label from request(1)", struct svc_req *, req);
 499 
 500                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
 501                         if (!do_rfs_label_check(clabel, dvp,
 502                             DOMINANCE_CHECK, exi)) {
 503                                 VN_RELE(vp);
 504                                 resp->status = NFS3ERR_ACCES;
 505                                 error = 1;
 506                         }
 507                 }
 508         }
 509 
 510         dva.va_mask = AT_ALL;
 511         dvap = VOP_GETATTR(dvp, &dva, 0, cr, NULL) ? NULL : &dva;
 512 
 513         if (error)
 514                 goto out;
 515 
 516         if (sec.sec_flags & SEC_QUERY) {
 517                 error = makefh3_ol(&resp->resok.object, exi, sec.sec_index);
 518         } else {
 519                 error = makefh3(&resp->resok.object, vp, exi);
 520                 if (!error && publicfh_flag && !chk_clnt_sec(exi, req))
 521                         auth_weak = TRUE;
 522         }
 523 
 524         if (error) {
 525                 VN_RELE(vp);
 526                 goto out;
 527         }
 528 
 529         va.va_mask = AT_ALL;
 530         vap = rfs4_delegated_getattr(vp, &va, 0, cr) ? NULL : &va;
 531 
 532         exi_rele(exi);
 533         VN_RELE(vp);
 534 
 535         resp->status = NFS3_OK;
 536         vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
 537         vattr_to_post_op_attr(dvap, &resp->resok.dir_attributes);
 538 
 539         /*
 540          * If it's public fh, no 0x81, and client's flavor is
 541          * invalid, set WebNFS status to WNFSERR_CLNT_FLAVOR now.
 542          * Then set RPC status to AUTH_TOOWEAK in common_dispatch.
 543          */
 544         if (auth_weak)
 545                 resp->status = (enum nfsstat3)WNFSERR_CLNT_FLAVOR;
 546 
 547         DTRACE_NFSV3_4(op__lookup__done, struct svc_req *, req,
 548             cred_t *, cr, vnode_t *, dvp, LOOKUP3res *, resp);
 549         VN_RELE(dvp);
 550 
 551         return;
 552 
 553 out:
 554         if (curthread->t_flag & T_WOULDBLOCK) {
 555                 curthread->t_flag &= ~T_WOULDBLOCK;
 556                 resp->status = NFS3ERR_JUKEBOX;
 557         } else
 558                 resp->status = puterrno3(error);
 559 out1:
 560         if (exi != NULL)
 561                 exi_rele(exi);
 562 
 563         DTRACE_NFSV3_4(op__lookup__done, struct svc_req *, req,
 564             cred_t *, cr, vnode_t *, dvp, LOOKUP3res *, resp);
 565 
 566         if (dvp != NULL)
 567                 VN_RELE(dvp);
 568         vattr_to_post_op_attr(dvap, &resp->resfail.dir_attributes);
 569 
 570 }
 571 
 572 void *
 573 rfs3_lookup_getfh(LOOKUP3args *args)
 574 {
 575 
 576         return (&args->what.dir);
 577 }
 578 
 579 /* ARGSUSED */
 580 void
 581 rfs3_access(ACCESS3args *args, ACCESS3res *resp, struct exportinfo *exi,
 582         struct svc_req *req, cred_t *cr)
 583 {
 584         int error;
 585         vnode_t *vp;
 586         struct vattr *vap;
 587         struct vattr va;
 588         int checkwriteperm;
 589         boolean_t dominant_label = B_FALSE;
 590         boolean_t equal_label = B_FALSE;
 591         boolean_t admin_low_client;
 592 
 593         vap = NULL;
 594 
 595         vp = nfs3_fhtovp(&args->object, exi);
 596 
 597         DTRACE_NFSV3_4(op__access__start, struct svc_req *, req,
 598             cred_t *, cr, vnode_t *, vp, ACCESS3args *, args);
 599 
 600         if (vp == NULL) {
 601                 error = ESTALE;
 602                 goto out;
 603         }
 604 
 605         /*
 606          * If the file system is exported read only, it is not appropriate
 607          * to check write permissions for regular files and directories.
 608          * Special files are interpreted by the client, so the underlying
 609          * permissions are sent back to the client for interpretation.
 610          */
 611         if (rdonly(exi, req) && (vp->v_type == VREG || vp->v_type == VDIR))
 612                 checkwriteperm = 0;
 613         else
 614                 checkwriteperm = 1;
 615 
 616         /*
 617          * We need the mode so that we can correctly determine access
 618          * permissions relative to a mandatory lock file.  Access to
 619          * mandatory lock files is denied on the server, so it might
 620          * as well be reflected to the server during the open.
 621          */
 622         va.va_mask = AT_MODE;
 623         error = VOP_GETATTR(vp, &va, 0, cr, NULL);
 624         if (error)
 625                 goto out;
 626 
 627         vap = &va;
 628 
 629         resp->resok.access = 0;
 630 
 631         if (is_system_labeled()) {
 632                 bslabel_t *clabel = req->rq_label;
 633 
 634                 ASSERT(clabel != NULL);
 635                 DTRACE_PROBE2(tx__rfs3__log__info__opaccess__clabel, char *,
 636                     "got client label from request(1)", struct svc_req *, req);
 637 
 638                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
 639                         if ((equal_label = do_rfs_label_check(clabel, vp,
 640                             EQUALITY_CHECK, exi)) == B_FALSE) {
 641                                 dominant_label = do_rfs_label_check(clabel,
 642                                     vp, DOMINANCE_CHECK, exi);
 643                         } else
 644                                 dominant_label = B_TRUE;
 645                         admin_low_client = B_FALSE;
 646                 } else
 647                         admin_low_client = B_TRUE;
 648         }
 649 
 650         if (args->access & ACCESS3_READ) {
 651                 error = VOP_ACCESS(vp, VREAD, 0, cr, NULL);
 652                 if (error) {
 653                         if (curthread->t_flag & T_WOULDBLOCK)
 654                                 goto out;
 655                 } else if (!MANDLOCK(vp, va.va_mode) &&
 656                     (!is_system_labeled() || admin_low_client ||
 657                     dominant_label))
 658                         resp->resok.access |= ACCESS3_READ;
 659         }
 660         if ((args->access & ACCESS3_LOOKUP) && vp->v_type == VDIR) {
 661                 error = VOP_ACCESS(vp, VEXEC, 0, cr, NULL);
 662                 if (error) {
 663                         if (curthread->t_flag & T_WOULDBLOCK)
 664                                 goto out;
 665                 } else if (!is_system_labeled() || admin_low_client ||
 666                     dominant_label)
 667                         resp->resok.access |= ACCESS3_LOOKUP;
 668         }
 669         if (checkwriteperm &&
 670             (args->access & (ACCESS3_MODIFY|ACCESS3_EXTEND))) {
 671                 error = VOP_ACCESS(vp, VWRITE, 0, cr, NULL);
 672                 if (error) {
 673                         if (curthread->t_flag & T_WOULDBLOCK)
 674                                 goto out;
 675                 } else if (!MANDLOCK(vp, va.va_mode) &&
 676                     (!is_system_labeled() || admin_low_client || equal_label)) {
 677                         resp->resok.access |=
 678                             (args->access & (ACCESS3_MODIFY|ACCESS3_EXTEND));
 679                 }
 680         }
 681         if (checkwriteperm &&
 682             (args->access & ACCESS3_DELETE) && vp->v_type == VDIR) {
 683                 error = VOP_ACCESS(vp, VWRITE, 0, cr, NULL);
 684                 if (error) {
 685                         if (curthread->t_flag & T_WOULDBLOCK)
 686                                 goto out;
 687                 } else if (!is_system_labeled() || admin_low_client ||
 688                     equal_label)
 689                         resp->resok.access |= ACCESS3_DELETE;
 690         }
 691         if (args->access & ACCESS3_EXECUTE) {
 692                 error = VOP_ACCESS(vp, VEXEC, 0, cr, NULL);
 693                 if (error) {
 694                         if (curthread->t_flag & T_WOULDBLOCK)
 695                                 goto out;
 696                 } else if (!MANDLOCK(vp, va.va_mode) &&
 697                     (!is_system_labeled() || admin_low_client ||
 698                     dominant_label))
 699                         resp->resok.access |= ACCESS3_EXECUTE;
 700         }
 701 
 702         va.va_mask = AT_ALL;
 703         vap = rfs4_delegated_getattr(vp, &va, 0, cr) ? NULL : &va;
 704 
 705         resp->status = NFS3_OK;
 706         vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
 707 
 708         DTRACE_NFSV3_4(op__access__done, struct svc_req *, req,
 709             cred_t *, cr, vnode_t *, vp, ACCESS3res *, resp);
 710 
 711         VN_RELE(vp);
 712 
 713         return;
 714 
 715 out:
 716         if (curthread->t_flag & T_WOULDBLOCK) {
 717                 curthread->t_flag &= ~T_WOULDBLOCK;
 718                 resp->status = NFS3ERR_JUKEBOX;
 719         } else
 720                 resp->status = puterrno3(error);
 721         DTRACE_NFSV3_4(op__access__done, struct svc_req *, req,
 722             cred_t *, cr, vnode_t *, vp, ACCESS3res *, resp);
 723         if (vp != NULL)
 724                 VN_RELE(vp);
 725         vattr_to_post_op_attr(vap, &resp->resfail.obj_attributes);
 726 }
 727 
 728 void *
 729 rfs3_access_getfh(ACCESS3args *args)
 730 {
 731 
 732         return (&args->object);
 733 }
 734 
 735 /* ARGSUSED */
 736 void
 737 rfs3_readlink(READLINK3args *args, READLINK3res *resp, struct exportinfo *exi,
 738         struct svc_req *req, cred_t *cr)
 739 {
 740         int error;
 741         vnode_t *vp;
 742         struct vattr *vap;
 743         struct vattr va;
 744         struct iovec iov;
 745         struct uio uio;
 746         char *data;
 747         struct sockaddr *ca;
 748         char *name = NULL;
 749         int is_referral = 0;
 750 
 751         vap = NULL;
 752 
 753         vp = nfs3_fhtovp(&args->symlink, exi);
 754 
 755         DTRACE_NFSV3_4(op__readlink__start, struct svc_req *, req,
 756             cred_t *, cr, vnode_t *, vp, READLINK3args *, args);
 757 
 758         if (vp == NULL) {
 759                 error = ESTALE;
 760                 goto out;
 761         }
 762 
 763         va.va_mask = AT_ALL;
 764         error = VOP_GETATTR(vp, &va, 0, cr, NULL);
 765         if (error)
 766                 goto out;
 767 
 768         vap = &va;
 769 
 770         /* We lied about the object type for a referral */
 771         if (vn_is_nfs_reparse(vp, cr))
 772                 is_referral = 1;
 773 
 774         if (vp->v_type != VLNK && !is_referral) {
 775                 resp->status = NFS3ERR_INVAL;
 776                 goto out1;
 777         }
 778 
 779         if (MANDLOCK(vp, va.va_mode)) {
 780                 resp->status = NFS3ERR_ACCES;
 781                 goto out1;
 782         }
 783 
 784         if (is_system_labeled()) {
 785                 bslabel_t *clabel = req->rq_label;
 786 
 787                 ASSERT(clabel != NULL);
 788                 DTRACE_PROBE2(tx__rfs3__log__info__opreadlink__clabel, char *,
 789                     "got client label from request(1)", struct svc_req *, req);
 790 
 791                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
 792                         if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
 793                             exi)) {
 794                                 resp->status = NFS3ERR_ACCES;
 795                                 goto out1;
 796                         }
 797                 }
 798         }
 799 
 800         data = kmem_alloc(MAXPATHLEN + 1, KM_SLEEP);
 801 
 802         if (is_referral) {
 803                 char *s;
 804                 size_t strsz;
 805 
 806                 /* Get an artificial symlink based on a referral */
 807                 s = build_symlink(vp, cr, &strsz);
 808                 global_svstat_ptr[3][NFS_REFERLINKS].value.ui64++;
 809                 DTRACE_PROBE2(nfs3serv__func__referral__reflink,
 810                     vnode_t *, vp, char *, s);
 811                 if (s == NULL)
 812                         error = EINVAL;
 813                 else {
 814                         error = 0;
 815                         (void) strlcpy(data, s, MAXPATHLEN + 1);
 816                         kmem_free(s, strsz);
 817                 }
 818 
 819         } else {
 820 
 821                 iov.iov_base = data;
 822                 iov.iov_len = MAXPATHLEN;
 823                 uio.uio_iov = &iov;
 824                 uio.uio_iovcnt = 1;
 825                 uio.uio_segflg = UIO_SYSSPACE;
 826                 uio.uio_extflg = UIO_COPY_CACHED;
 827                 uio.uio_loffset = 0;
 828                 uio.uio_resid = MAXPATHLEN;
 829 
 830                 error = VOP_READLINK(vp, &uio, cr, NULL);
 831 
 832                 if (!error)
 833                         *(data + MAXPATHLEN - uio.uio_resid) = '\0';
 834         }
 835 
 836         va.va_mask = AT_ALL;
 837         vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
 838 
 839         /* Lie about object type again just to be consistent */
 840         if (is_referral && vap != NULL)
 841                 vap->va_type = VLNK;
 842 
 843 #if 0 /* notyet */
 844         /*
 845          * Don't do this.  It causes local disk writes when just
 846          * reading the file and the overhead is deemed larger
 847          * than the benefit.
 848          */
 849         /*
 850          * Force modified metadata out to stable storage.
 851          */
 852         (void) VOP_FSYNC(vp, FNODSYNC, cr, NULL);
 853 #endif
 854 
 855         if (error) {
 856                 kmem_free(data, MAXPATHLEN + 1);
 857                 goto out;
 858         }
 859 
 860         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
 861         name = nfscmd_convname(ca, exi, data, NFSCMD_CONV_OUTBOUND,
 862             MAXPATHLEN + 1);
 863 
 864         if (name == NULL) {
 865                 /*
 866                  * Even though the conversion failed, we return
 867                  * something. We just don't translate it.
 868                  */
 869                 name = data;
 870         }
 871 
 872         resp->status = NFS3_OK;
 873         vattr_to_post_op_attr(vap, &resp->resok.symlink_attributes);
 874         resp->resok.data = name;
 875 
 876         DTRACE_NFSV3_4(op__readlink__done, struct svc_req *, req,
 877             cred_t *, cr, vnode_t *, vp, READLINK3res *, resp);
 878         VN_RELE(vp);
 879 
 880         if (name != data)
 881                 kmem_free(data, MAXPATHLEN + 1);
 882 
 883         return;
 884 
 885 out:
 886         if (curthread->t_flag & T_WOULDBLOCK) {
 887                 curthread->t_flag &= ~T_WOULDBLOCK;
 888                 resp->status = NFS3ERR_JUKEBOX;
 889         } else
 890                 resp->status = puterrno3(error);
 891 out1:
 892         DTRACE_NFSV3_4(op__readlink__done, struct svc_req *, req,
 893             cred_t *, cr, vnode_t *, vp, READLINK3res *, resp);
 894         if (vp != NULL)
 895                 VN_RELE(vp);
 896         vattr_to_post_op_attr(vap, &resp->resfail.symlink_attributes);
 897 }
 898 
 899 void *
 900 rfs3_readlink_getfh(READLINK3args *args)
 901 {
 902 
 903         return (&args->symlink);
 904 }
 905 
 906 void
 907 rfs3_readlink_free(READLINK3res *resp)
 908 {
 909 
 910         if (resp->status == NFS3_OK)
 911                 kmem_free(resp->resok.data, MAXPATHLEN + 1);
 912 }
 913 
 914 /*
 915  * Server routine to handle read
 916  * May handle RDMA data as well as mblks
 917  */
 918 /* ARGSUSED */
 919 void
 920 rfs3_read(READ3args *args, READ3res *resp, struct exportinfo *exi,
 921         struct svc_req *req, cred_t *cr)
 922 {
 923         int error;
 924         vnode_t *vp;
 925         struct vattr *vap;
 926         struct vattr va;
 927         struct iovec iov;
 928         struct uio uio;
 929         u_offset_t offset;
 930         mblk_t *mp = NULL;
 931         int alloc_err = 0;
 932         int in_crit = 0;
 933         int need_rwunlock = 0;
 934         caller_context_t ct;
 935         int rdma_used = 0;
 936         int loaned_buffers;
 937         struct uio *uiop;
 938 
 939         vap = NULL;
 940 
 941         vp = nfs3_fhtovp(&args->file, exi);
 942 
 943         DTRACE_NFSV3_4(op__read__start, struct svc_req *, req,
 944             cred_t *, cr, vnode_t *, vp, READ3args *, args);
 945 
 946         if (vp == NULL) {
 947                 error = ESTALE;
 948                 goto out;
 949         }
 950 
 951         if (args->wlist) {
 952                 if (args->count > clist_len(args->wlist)) {
 953                         error = EINVAL;
 954                         goto out;
 955                 }
 956                 rdma_used = 1;
 957         }
 958 
 959         /* use loaned buffers for TCP */
 960         loaned_buffers = (nfs_loaned_buffers && !rdma_used) ? 1 : 0;
 961 
 962         if (is_system_labeled()) {
 963                 bslabel_t *clabel = req->rq_label;
 964 
 965                 ASSERT(clabel != NULL);
 966                 DTRACE_PROBE2(tx__rfs3__log__info__opread__clabel, char *,
 967                     "got client label from request(1)", struct svc_req *, req);
 968 
 969                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
 970                         if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
 971                             exi)) {
 972                                 resp->status = NFS3ERR_ACCES;
 973                                 goto out1;
 974                         }
 975                 }
 976         }
 977 
 978         ct.cc_sysid = 0;
 979         ct.cc_pid = 0;
 980         ct.cc_caller_id = nfs3_srv_caller_id;
 981         ct.cc_flags = CC_DONTBLOCK;
 982 
 983         /*
 984          * Enter the critical region before calling VOP_RWLOCK
 985          * to avoid a deadlock with write requests.
 986          */
 987         if (nbl_need_check(vp)) {
 988                 nbl_start_crit(vp, RW_READER);
 989                 in_crit = 1;
 990                 if (nbl_conflict(vp, NBL_READ, args->offset, args->count, 0,
 991                     NULL)) {
 992                         error = EACCES;
 993                         goto out;
 994                 }
 995         }
 996 
 997         error = VOP_RWLOCK(vp, V_WRITELOCK_FALSE, &ct);
 998 
 999         /* check if a monitor detected a delegation conflict */
1000         if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
1001                 resp->status = NFS3ERR_JUKEBOX;
1002                 goto out1;
1003         }
1004 
1005         need_rwunlock = 1;
1006 
1007         va.va_mask = AT_ALL;
1008         error = VOP_GETATTR(vp, &va, 0, cr, &ct);
1009 
1010         /*
1011          * If we can't get the attributes, then we can't do the
1012          * right access checking.  So, we'll fail the request.
1013          */
1014         if (error)
1015                 goto out;
1016 
1017         vap = &va;
1018 
1019         if (vp->v_type != VREG) {
1020                 resp->status = NFS3ERR_INVAL;
1021                 goto out1;
1022         }
1023 
1024         if (crgetuid(cr) != va.va_uid) {
1025                 error = VOP_ACCESS(vp, VREAD, 0, cr, &ct);
1026                 if (error) {
1027                         if (curthread->t_flag & T_WOULDBLOCK)
1028                                 goto out;
1029                         error = VOP_ACCESS(vp, VEXEC, 0, cr, &ct);
1030                         if (error)
1031                                 goto out;
1032                 }
1033         }
1034 
1035         if (MANDLOCK(vp, va.va_mode)) {
1036                 resp->status = NFS3ERR_ACCES;
1037                 goto out1;
1038         }
1039 
1040         offset = args->offset;
1041         if (offset >= va.va_size) {
1042                 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
1043                 if (in_crit)
1044                         nbl_end_crit(vp);
1045                 resp->status = NFS3_OK;
1046                 vattr_to_post_op_attr(vap, &resp->resok.file_attributes);
1047                 resp->resok.count = 0;
1048                 resp->resok.eof = TRUE;
1049                 resp->resok.data.data_len = 0;
1050                 resp->resok.data.data_val = NULL;
1051                 resp->resok.data.mp = NULL;
1052                 /* RDMA */
1053                 resp->resok.wlist = args->wlist;
1054                 resp->resok.wlist_len = resp->resok.count;
1055                 if (resp->resok.wlist)
1056                         clist_zero_len(resp->resok.wlist);
1057                 goto done;
1058         }
1059 
1060         if (args->count == 0) {
1061                 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
1062                 if (in_crit)
1063                         nbl_end_crit(vp);
1064                 resp->status = NFS3_OK;
1065                 vattr_to_post_op_attr(vap, &resp->resok.file_attributes);
1066                 resp->resok.count = 0;
1067                 resp->resok.eof = FALSE;
1068                 resp->resok.data.data_len = 0;
1069                 resp->resok.data.data_val = NULL;
1070                 resp->resok.data.mp = NULL;
1071                 /* RDMA */
1072                 resp->resok.wlist = args->wlist;
1073                 resp->resok.wlist_len = resp->resok.count;
1074                 if (resp->resok.wlist)
1075                         clist_zero_len(resp->resok.wlist);
1076                 goto done;
1077         }
1078 
1079         /*
1080          * do not allocate memory more the max. allowed
1081          * transfer size
1082          */
1083         if (args->count > rfs3_tsize(req))
1084                 args->count = rfs3_tsize(req);
1085 
1086         if (loaned_buffers) {
1087                 uiop = (uio_t *)rfs_setup_xuio(vp);
1088                 ASSERT(uiop != NULL);
1089                 uiop->uio_segflg = UIO_SYSSPACE;
1090                 uiop->uio_loffset = args->offset;
1091                 uiop->uio_resid = args->count;
1092 
1093                 /* Jump to do the read if successful */
1094                 if (VOP_REQZCBUF(vp, UIO_READ, (xuio_t *)uiop, cr, &ct) == 0) {
1095                         /*
1096                          * Need to hold the vnode until after VOP_RETZCBUF()
1097                          * is called.
1098                          */
1099                         VN_HOLD(vp);
1100                         goto doio_read;
1101                 }
1102 
1103                 DTRACE_PROBE2(nfss__i__reqzcbuf_failed, int,
1104                     uiop->uio_loffset, int, uiop->uio_resid);
1105 
1106                 uiop->uio_extflg = 0;
1107                 /* failure to setup for zero copy */
1108                 rfs_free_xuio((void *)uiop);
1109                 loaned_buffers = 0;
1110         }
1111 
1112         /*
1113          * If returning data via RDMA Write, then grab the chunk list.
1114          * If we aren't returning READ data w/RDMA_WRITE, then grab
1115          * a mblk.
1116          */
1117         if (rdma_used) {
1118                 (void) rdma_get_wchunk(req, &iov, args->wlist);
1119         } else {
1120                 /*
1121                  * mp will contain the data to be sent out in the read reply.
1122                  * This will be freed after the reply has been sent out (by the
1123                  * driver).
1124                  * Let's roundup the data to a BYTES_PER_XDR_UNIT multiple, so
1125                  * that the call to xdrmblk_putmblk() never fails.
1126                  */
1127                 mp = allocb_wait(RNDUP(args->count), BPRI_MED, STR_NOSIG,
1128                     &alloc_err);
1129                 ASSERT(mp != NULL);
1130                 ASSERT(alloc_err == 0);
1131 
1132                 iov.iov_base = (caddr_t)mp->b_datap->db_base;
1133                 iov.iov_len = args->count;
1134         }
1135 
1136         uio.uio_iov = &iov;
1137         uio.uio_iovcnt = 1;
1138         uio.uio_segflg = UIO_SYSSPACE;
1139         uio.uio_extflg = UIO_COPY_CACHED;
1140         uio.uio_loffset = args->offset;
1141         uio.uio_resid = args->count;
1142         uiop = &uio;
1143 
1144 doio_read:
1145         error = VOP_READ(vp, uiop, 0, cr, &ct);
1146 
1147         if (error) {
1148                 if (mp)
1149                         freemsg(mp);
1150                 /* check if a monitor detected a delegation conflict */
1151                 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
1152                         resp->status = NFS3ERR_JUKEBOX;
1153                         goto out1;
1154                 }
1155                 goto out;
1156         }
1157 
1158         /* make mblk using zc buffers */
1159         if (loaned_buffers) {
1160                 mp = uio_to_mblk(uiop);
1161                 ASSERT(mp != NULL);
1162         }
1163 
1164         va.va_mask = AT_ALL;
1165         error = VOP_GETATTR(vp, &va, 0, cr, &ct);
1166 
1167         if (error)
1168                 vap = NULL;
1169         else
1170                 vap = &va;
1171 
1172         VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
1173 
1174         if (in_crit)
1175                 nbl_end_crit(vp);
1176 
1177         resp->status = NFS3_OK;
1178         vattr_to_post_op_attr(vap, &resp->resok.file_attributes);
1179         resp->resok.count = args->count - uiop->uio_resid;
1180         if (!error && offset + resp->resok.count == va.va_size)
1181                 resp->resok.eof = TRUE;
1182         else
1183                 resp->resok.eof = FALSE;
1184         resp->resok.data.data_len = resp->resok.count;
1185 
1186         if (mp)
1187                 rfs_rndup_mblks(mp, resp->resok.count, loaned_buffers);
1188 
1189         resp->resok.data.mp = mp;
1190         resp->resok.size = (uint_t)args->count;
1191 
1192         if (rdma_used) {
1193                 resp->resok.data.data_val = (caddr_t)iov.iov_base;
1194                 if (!rdma_setup_read_data3(args, &(resp->resok))) {
1195                         resp->status = NFS3ERR_INVAL;
1196                 }
1197         } else {
1198                 resp->resok.data.data_val = (caddr_t)mp->b_datap->db_base;
1199                 (resp->resok).wlist = NULL;
1200         }
1201 
1202 done:
1203         DTRACE_NFSV3_4(op__read__done, struct svc_req *, req,
1204             cred_t *, cr, vnode_t *, vp, READ3res *, resp);
1205 
1206         VN_RELE(vp);
1207 
1208         return;
1209 
1210 out:
1211         if (curthread->t_flag & T_WOULDBLOCK) {
1212                 curthread->t_flag &= ~T_WOULDBLOCK;
1213                 resp->status = NFS3ERR_JUKEBOX;
1214         } else
1215                 resp->status = puterrno3(error);
1216 out1:
1217         DTRACE_NFSV3_4(op__read__done, struct svc_req *, req,
1218             cred_t *, cr, vnode_t *, vp, READ3res *, resp);
1219 
1220         if (vp != NULL) {
1221                 if (need_rwunlock)
1222                         VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
1223                 if (in_crit)
1224                         nbl_end_crit(vp);
1225                 VN_RELE(vp);
1226         }
1227         vattr_to_post_op_attr(vap, &resp->resfail.file_attributes);
1228 }
1229 
1230 void
1231 rfs3_read_free(READ3res *resp)
1232 {
1233         mblk_t *mp;
1234 
1235         if (resp->status == NFS3_OK) {
1236                 mp = resp->resok.data.mp;
1237                 if (mp != NULL)
1238                         freemsg(mp);
1239         }
1240 }
1241 
1242 void *
1243 rfs3_read_getfh(READ3args *args)
1244 {
1245 
1246         return (&args->file);
1247 }
1248 
1249 #define MAX_IOVECS      12
1250 
1251 #ifdef DEBUG
1252 static int rfs3_write_hits = 0;
1253 static int rfs3_write_misses = 0;
1254 #endif
1255 
1256 void
1257 rfs3_write(WRITE3args *args, WRITE3res *resp, struct exportinfo *exi,
1258         struct svc_req *req, cred_t *cr)
1259 {
1260         int error;
1261         vnode_t *vp;
1262         struct vattr *bvap = NULL;
1263         struct vattr bva;
1264         struct vattr *avap = NULL;
1265         struct vattr ava;
1266         u_offset_t rlimit;
1267         struct uio uio;
1268         struct iovec iov[MAX_IOVECS];
1269         mblk_t *m;
1270         struct iovec *iovp;
1271         int iovcnt;
1272         int ioflag;
1273         cred_t *savecred;
1274         int in_crit = 0;
1275         int rwlock_ret = -1;
1276         caller_context_t ct;
1277 
1278         vp = nfs3_fhtovp(&args->file, exi);
1279 
1280         DTRACE_NFSV3_4(op__write__start, struct svc_req *, req,
1281             cred_t *, cr, vnode_t *, vp, WRITE3args *, args);
1282 
1283         if (vp == NULL) {
1284                 error = ESTALE;
1285                 goto err;
1286         }
1287 
1288         if (is_system_labeled()) {
1289                 bslabel_t *clabel = req->rq_label;
1290 
1291                 ASSERT(clabel != NULL);
1292                 DTRACE_PROBE2(tx__rfs3__log__info__opwrite__clabel, char *,
1293                     "got client label from request(1)", struct svc_req *, req);
1294 
1295                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
1296                         if (!do_rfs_label_check(clabel, vp, EQUALITY_CHECK,
1297                             exi)) {
1298                                 resp->status = NFS3ERR_ACCES;
1299                                 goto err1;
1300                         }
1301                 }
1302         }
1303 
1304         ct.cc_sysid = 0;
1305         ct.cc_pid = 0;
1306         ct.cc_caller_id = nfs3_srv_caller_id;
1307         ct.cc_flags = CC_DONTBLOCK;
1308 
1309         /*
1310          * We have to enter the critical region before calling VOP_RWLOCK
1311          * to avoid a deadlock with ufs.
1312          */
1313         if (nbl_need_check(vp)) {
1314                 nbl_start_crit(vp, RW_READER);
1315                 in_crit = 1;
1316                 if (nbl_conflict(vp, NBL_WRITE, args->offset, args->count, 0,
1317                     NULL)) {
1318                         error = EACCES;
1319                         goto err;
1320                 }
1321         }
1322 
1323         rwlock_ret = VOP_RWLOCK(vp, V_WRITELOCK_TRUE, &ct);
1324 
1325         /* check if a monitor detected a delegation conflict */
1326         if (rwlock_ret == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
1327                 resp->status = NFS3ERR_JUKEBOX;
1328                 rwlock_ret = -1;
1329                 goto err1;
1330         }
1331 
1332 
1333         bva.va_mask = AT_ALL;
1334         error = VOP_GETATTR(vp, &bva, 0, cr, &ct);
1335 
1336         /*
1337          * If we can't get the attributes, then we can't do the
1338          * right access checking.  So, we'll fail the request.
1339          */
1340         if (error)
1341                 goto err;
1342 
1343         bvap = &bva;
1344         avap = bvap;
1345 
1346         if (args->count != args->data.data_len) {
1347                 resp->status = NFS3ERR_INVAL;
1348                 goto err1;
1349         }
1350 
1351         if (rdonly(exi, req)) {
1352                 resp->status = NFS3ERR_ROFS;
1353                 goto err1;
1354         }
1355 
1356         if (vp->v_type != VREG) {
1357                 resp->status = NFS3ERR_INVAL;
1358                 goto err1;
1359         }
1360 
1361         if (crgetuid(cr) != bva.va_uid &&
1362             (error = VOP_ACCESS(vp, VWRITE, 0, cr, &ct)))
1363                 goto err;
1364 
1365         if (MANDLOCK(vp, bva.va_mode)) {
1366                 resp->status = NFS3ERR_ACCES;
1367                 goto err1;
1368         }
1369 
1370         if (args->count == 0) {
1371                 resp->status = NFS3_OK;
1372                 vattr_to_wcc_data(bvap, avap, &resp->resok.file_wcc);
1373                 resp->resok.count = 0;
1374                 resp->resok.committed = args->stable;
1375                 resp->resok.verf = write3verf;
1376                 goto out;
1377         }
1378 
1379         if (args->mblk != NULL) {
1380                 iovcnt = 0;
1381                 for (m = args->mblk; m != NULL; m = m->b_cont)
1382                         iovcnt++;
1383                 if (iovcnt <= MAX_IOVECS) {
1384 #ifdef DEBUG
1385                         rfs3_write_hits++;
1386 #endif
1387                         iovp = iov;
1388                 } else {
1389 #ifdef DEBUG
1390                         rfs3_write_misses++;
1391 #endif
1392                         iovp = kmem_alloc(sizeof (*iovp) * iovcnt, KM_SLEEP);
1393                 }
1394                 mblk_to_iov(args->mblk, iovcnt, iovp);
1395 
1396         } else if (args->rlist != NULL) {
1397                 iovcnt = 1;
1398                 iovp = iov;
1399                 iovp->iov_base = (char *)((args->rlist)->u.c_daddr3);
1400                 iovp->iov_len = args->count;
1401         } else {
1402                 iovcnt = 1;
1403                 iovp = iov;
1404                 iovp->iov_base = args->data.data_val;
1405                 iovp->iov_len = args->count;
1406         }
1407 
1408         uio.uio_iov = iovp;
1409         uio.uio_iovcnt = iovcnt;
1410 
1411         uio.uio_segflg = UIO_SYSSPACE;
1412         uio.uio_extflg = UIO_COPY_DEFAULT;
1413         uio.uio_loffset = args->offset;
1414         uio.uio_resid = args->count;
1415         uio.uio_llimit = curproc->p_fsz_ctl;
1416         rlimit = uio.uio_llimit - args->offset;
1417         if (rlimit < (u_offset_t)uio.uio_resid)
1418                 uio.uio_resid = (int)rlimit;
1419 
1420         if (args->stable == UNSTABLE)
1421                 ioflag = 0;
1422         else if (args->stable == FILE_SYNC)
1423                 ioflag = FSYNC;
1424         else if (args->stable == DATA_SYNC)
1425                 ioflag = FDSYNC;
1426         else {
1427                 if (iovp != iov)
1428                         kmem_free(iovp, sizeof (*iovp) * iovcnt);
1429                 resp->status = NFS3ERR_INVAL;
1430                 goto err1;
1431         }
1432 
1433         /*
1434          * We're changing creds because VM may fault and we need
1435          * the cred of the current thread to be used if quota
1436          * checking is enabled.
1437          */
1438         savecred = curthread->t_cred;
1439         curthread->t_cred = cr;
1440         error = VOP_WRITE(vp, &uio, ioflag, cr, &ct);
1441         curthread->t_cred = savecred;
1442 
1443         if (iovp != iov)
1444                 kmem_free(iovp, sizeof (*iovp) * iovcnt);
1445 
1446         /* check if a monitor detected a delegation conflict */
1447         if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
1448                 resp->status = NFS3ERR_JUKEBOX;
1449                 goto err1;
1450         }
1451 
1452         ava.va_mask = AT_ALL;
1453         avap = VOP_GETATTR(vp, &ava, 0, cr, &ct) ? NULL : &ava;
1454 
1455         if (error)
1456                 goto err;
1457 
1458         /*
1459          * If we were unable to get the V_WRITELOCK_TRUE, then we
1460          * may not have accurate after attrs, so check if
1461          * we have both attributes, they have a non-zero va_seq, and
1462          * va_seq has changed by exactly one,
1463          * if not, turn off the before attr.
1464          */
1465         if (rwlock_ret != V_WRITELOCK_TRUE) {
1466                 if (bvap == NULL || avap == NULL ||
1467                     bvap->va_seq == 0 || avap->va_seq == 0 ||
1468                     avap->va_seq != (bvap->va_seq + 1)) {
1469                         bvap = NULL;
1470                 }
1471         }
1472 
1473         resp->status = NFS3_OK;
1474         vattr_to_wcc_data(bvap, avap, &resp->resok.file_wcc);
1475         resp->resok.count = args->count - uio.uio_resid;
1476         resp->resok.committed = args->stable;
1477         resp->resok.verf = write3verf;
1478         goto out;
1479 
1480 err:
1481         if (curthread->t_flag & T_WOULDBLOCK) {
1482                 curthread->t_flag &= ~T_WOULDBLOCK;
1483                 resp->status = NFS3ERR_JUKEBOX;
1484         } else
1485                 resp->status = puterrno3(error);
1486 err1:
1487         vattr_to_wcc_data(bvap, avap, &resp->resfail.file_wcc);
1488 out:
1489         DTRACE_NFSV3_4(op__write__done, struct svc_req *, req,
1490             cred_t *, cr, vnode_t *, vp, WRITE3res *, resp);
1491 
1492         if (vp != NULL) {
1493                 if (rwlock_ret != -1)
1494                         VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, &ct);
1495                 if (in_crit)
1496                         nbl_end_crit(vp);
1497                 VN_RELE(vp);
1498         }
1499 }
1500 
1501 void *
1502 rfs3_write_getfh(WRITE3args *args)
1503 {
1504 
1505         return (&args->file);
1506 }
1507 
1508 void
1509 rfs3_create(CREATE3args *args, CREATE3res *resp, struct exportinfo *exi,
1510         struct svc_req *req, cred_t *cr)
1511 {
1512         int error;
1513         int in_crit = 0;
1514         vnode_t *vp;
1515         vnode_t *tvp = NULL;
1516         vnode_t *dvp;
1517         struct vattr *vap;
1518         struct vattr va;
1519         struct vattr *dbvap;
1520         struct vattr dbva;
1521         struct vattr *davap;
1522         struct vattr dava;
1523         enum vcexcl excl;
1524         nfstime3 *mtime;
1525         len_t reqsize;
1526         bool_t trunc;
1527         struct sockaddr *ca;
1528         char *name = NULL;
1529 
1530         dbvap = NULL;
1531         davap = NULL;
1532 
1533         dvp = nfs3_fhtovp(&args->where.dir, exi);
1534 
1535         DTRACE_NFSV3_4(op__create__start, struct svc_req *, req,
1536             cred_t *, cr, vnode_t *, dvp, CREATE3args *, args);
1537 
1538         if (dvp == NULL) {
1539                 error = ESTALE;
1540                 goto out;
1541         }
1542 
1543         dbva.va_mask = AT_ALL;
1544         dbvap = VOP_GETATTR(dvp, &dbva, 0, cr, NULL) ? NULL : &dbva;
1545         davap = dbvap;
1546 
1547         if (args->where.name == nfs3nametoolong) {
1548                 resp->status = NFS3ERR_NAMETOOLONG;
1549                 goto out1;
1550         }
1551 
1552         if (args->where.name == NULL || *(args->where.name) == '\0') {
1553                 resp->status = NFS3ERR_ACCES;
1554                 goto out1;
1555         }
1556 
1557         if (rdonly(exi, req)) {
1558                 resp->status = NFS3ERR_ROFS;
1559                 goto out1;
1560         }
1561 
1562         if (is_system_labeled()) {
1563                 bslabel_t *clabel = req->rq_label;
1564 
1565                 ASSERT(clabel != NULL);
1566                 DTRACE_PROBE2(tx__rfs3__log__info__opcreate__clabel, char *,
1567                     "got client label from request(1)", struct svc_req *, req);
1568 
1569                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
1570                         if (!do_rfs_label_check(clabel, dvp, EQUALITY_CHECK,
1571                             exi)) {
1572                                 resp->status = NFS3ERR_ACCES;
1573                                 goto out1;
1574                         }
1575                 }
1576         }
1577 
1578         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
1579         name = nfscmd_convname(ca, exi, args->where.name,
1580             NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
1581 
1582         if (name == NULL) {
1583                 /* This is really a Solaris EILSEQ */
1584                 resp->status = NFS3ERR_INVAL;
1585                 goto out1;
1586         }
1587 
1588         if (args->how.mode == EXCLUSIVE) {
1589                 va.va_mask = AT_TYPE | AT_MODE | AT_MTIME;
1590                 va.va_type = VREG;
1591                 va.va_mode = (mode_t)0;
1592                 /*
1593                  * Ensure no time overflows and that types match
1594                  */
1595                 mtime = (nfstime3 *)&args->how.createhow3_u.verf;
1596                 va.va_mtime.tv_sec = mtime->seconds % INT32_MAX;
1597                 va.va_mtime.tv_nsec = mtime->nseconds;
1598                 excl = EXCL;
1599         } else {
1600                 error = sattr3_to_vattr(&args->how.createhow3_u.obj_attributes,
1601                     &va);
1602                 if (error)
1603                         goto out;
1604                 va.va_mask |= AT_TYPE;
1605                 va.va_type = VREG;
1606                 if (args->how.mode == GUARDED)
1607                         excl = EXCL;
1608                 else {
1609                         excl = NONEXCL;
1610 
1611                         /*
1612                          * During creation of file in non-exclusive mode
1613                          * if size of file is being set then make sure
1614                          * that if the file already exists that no conflicting
1615                          * non-blocking mandatory locks exists in the region
1616                          * being modified. If there are conflicting locks fail
1617                          * the operation with EACCES.
1618                          */
1619                         if (va.va_mask & AT_SIZE) {
1620                                 struct vattr tva;
1621 
1622                                 /*
1623                                  * Does file already exist?
1624                                  */
1625                                 error = VOP_LOOKUP(dvp, name, &tvp,
1626                                     NULL, 0, NULL, cr, NULL, NULL, NULL);
1627 
1628                                 /*
1629                                  * Check to see if the file has been delegated
1630                                  * to a v4 client.  If so, then begin recall of
1631                                  * the delegation and return JUKEBOX to allow
1632                                  * the client to retrasmit its request.
1633                                  */
1634 
1635                                 trunc = va.va_size == 0;
1636                                 if (!error &&
1637                                     rfs4_check_delegated(FWRITE, tvp, trunc)) {
1638                                         resp->status = NFS3ERR_JUKEBOX;
1639                                         goto out1;
1640                                 }
1641 
1642                                 /*
1643                                  * Check for NBMAND lock conflicts
1644                                  */
1645                                 if (!error && nbl_need_check(tvp)) {
1646                                         u_offset_t offset;
1647                                         ssize_t len;
1648 
1649                                         nbl_start_crit(tvp, RW_READER);
1650                                         in_crit = 1;
1651 
1652                                         tva.va_mask = AT_SIZE;
1653                                         error = VOP_GETATTR(tvp, &tva, 0, cr,
1654                                             NULL);
1655                                         /*
1656                                          * Can't check for conflicts, so return
1657                                          * error.
1658                                          */
1659                                         if (error)
1660                                                 goto out;
1661 
1662                                         offset = tva.va_size < va.va_size ?
1663                                             tva.va_size : va.va_size;
1664                                         len = tva.va_size < va.va_size ?
1665                                             va.va_size - tva.va_size :
1666                                             tva.va_size - va.va_size;
1667                                         if (nbl_conflict(tvp, NBL_WRITE,
1668                                             offset, len, 0, NULL)) {
1669                                                 error = EACCES;
1670                                                 goto out;
1671                                         }
1672                                 } else if (tvp) {
1673                                         VN_RELE(tvp);
1674                                         tvp = NULL;
1675                                 }
1676                         }
1677                 }
1678                 if (va.va_mask & AT_SIZE)
1679                         reqsize = va.va_size;
1680         }
1681 
1682         /*
1683          * Must specify the mode.
1684          */
1685         if (!(va.va_mask & AT_MODE)) {
1686                 resp->status = NFS3ERR_INVAL;
1687                 goto out1;
1688         }
1689 
1690         /*
1691          * If the filesystem is exported with nosuid, then mask off
1692          * the setuid and setgid bits.
1693          */
1694         if (va.va_type == VREG && (exi->exi_export.ex_flags & EX_NOSUID))
1695                 va.va_mode &= ~(VSUID | VSGID);
1696 
1697 tryagain:
1698         /*
1699          * The file open mode used is VWRITE.  If the client needs
1700          * some other semantic, then it should do the access checking
1701          * itself.  It would have been nice to have the file open mode
1702          * passed as part of the arguments.
1703          */
1704         error = VOP_CREATE(dvp, name, &va, excl, VWRITE,
1705             &vp, cr, 0, NULL, NULL);
1706 
1707         dava.va_mask = AT_ALL;
1708         davap = VOP_GETATTR(dvp, &dava, 0, cr, NULL) ? NULL : &dava;
1709 
1710         if (error) {
1711                 /*
1712                  * If we got something other than file already exists
1713                  * then just return this error.  Otherwise, we got
1714                  * EEXIST.  If we were doing a GUARDED create, then
1715                  * just return this error.  Otherwise, we need to
1716                  * make sure that this wasn't a duplicate of an
1717                  * exclusive create request.
1718                  *
1719                  * The assumption is made that a non-exclusive create
1720                  * request will never return EEXIST.
1721                  */
1722                 if (error != EEXIST || args->how.mode == GUARDED)
1723                         goto out;
1724                 /*
1725                  * Lookup the file so that we can get a vnode for it.
1726                  */
1727                 error = VOP_LOOKUP(dvp, name, &vp, NULL, 0,
1728                     NULL, cr, NULL, NULL, NULL);
1729                 if (error) {
1730                         /*
1731                          * We couldn't find the file that we thought that
1732                          * we just created.  So, we'll just try creating
1733                          * it again.
1734                          */
1735                         if (error == ENOENT)
1736                                 goto tryagain;
1737                         goto out;
1738                 }
1739 
1740                 /*
1741                  * If the file is delegated to a v4 client, go ahead
1742                  * and initiate recall, this create is a hint that a
1743                  * conflicting v3 open has occurred.
1744                  */
1745 
1746                 if (rfs4_check_delegated(FWRITE, vp, FALSE)) {
1747                         VN_RELE(vp);
1748                         resp->status = NFS3ERR_JUKEBOX;
1749                         goto out1;
1750                 }
1751 
1752                 va.va_mask = AT_ALL;
1753                 vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
1754 
1755                 mtime = (nfstime3 *)&args->how.createhow3_u.verf;
1756                 /* % with INT32_MAX to prevent overflows */
1757                 if (args->how.mode == EXCLUSIVE && (vap == NULL ||
1758                     vap->va_mtime.tv_sec !=
1759                     (mtime->seconds % INT32_MAX) ||
1760                     vap->va_mtime.tv_nsec != mtime->nseconds)) {
1761                         VN_RELE(vp);
1762                         error = EEXIST;
1763                         goto out;
1764                 }
1765         } else {
1766 
1767                 if ((args->how.mode == UNCHECKED ||
1768                     args->how.mode == GUARDED) &&
1769                     args->how.createhow3_u.obj_attributes.size.set_it &&
1770                     va.va_size == 0)
1771                         trunc = TRUE;
1772                 else
1773                         trunc = FALSE;
1774 
1775                 if (rfs4_check_delegated(FWRITE, vp, trunc)) {
1776                         VN_RELE(vp);
1777                         resp->status = NFS3ERR_JUKEBOX;
1778                         goto out1;
1779                 }
1780 
1781                 va.va_mask = AT_ALL;
1782                 vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
1783 
1784                 /*
1785                  * We need to check to make sure that the file got
1786                  * created to the indicated size.  If not, we do a
1787                  * setattr to try to change the size, but we don't
1788                  * try too hard.  This shouldn't a problem as most
1789                  * clients will only specifiy a size of zero which
1790                  * local file systems handle.  However, even if
1791                  * the client does specify a non-zero size, it can
1792                  * still recover by checking the size of the file
1793                  * after it has created it and then issue a setattr
1794                  * request of its own to set the size of the file.
1795                  */
1796                 if (vap != NULL &&
1797                     (args->how.mode == UNCHECKED ||
1798                     args->how.mode == GUARDED) &&
1799                     args->how.createhow3_u.obj_attributes.size.set_it &&
1800                     vap->va_size != reqsize) {
1801                         va.va_mask = AT_SIZE;
1802                         va.va_size = reqsize;
1803                         (void) VOP_SETATTR(vp, &va, 0, cr, NULL);
1804                         va.va_mask = AT_ALL;
1805                         vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
1806                 }
1807         }
1808 
1809         if (name != args->where.name)
1810                 kmem_free(name, MAXPATHLEN + 1);
1811 
1812         error = makefh3(&resp->resok.obj.handle, vp, exi);
1813         if (error)
1814                 resp->resok.obj.handle_follows = FALSE;
1815         else
1816                 resp->resok.obj.handle_follows = TRUE;
1817 
1818         /*
1819          * Force modified data and metadata out to stable storage.
1820          */
1821         (void) VOP_FSYNC(vp, FNODSYNC, cr, NULL);
1822         (void) VOP_FSYNC(dvp, 0, cr, NULL);
1823 
1824         VN_RELE(vp);
1825         if (tvp != NULL) {
1826                 if (in_crit)
1827                         nbl_end_crit(tvp);
1828                 VN_RELE(tvp);
1829         }
1830 
1831         resp->status = NFS3_OK;
1832         vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
1833         vattr_to_wcc_data(dbvap, davap, &resp->resok.dir_wcc);
1834 
1835         DTRACE_NFSV3_4(op__create__done, struct svc_req *, req,
1836             cred_t *, cr, vnode_t *, dvp, CREATE3res *, resp);
1837 
1838         VN_RELE(dvp);
1839         return;
1840 
1841 out:
1842         if (curthread->t_flag & T_WOULDBLOCK) {
1843                 curthread->t_flag &= ~T_WOULDBLOCK;
1844                 resp->status = NFS3ERR_JUKEBOX;
1845         } else
1846                 resp->status = puterrno3(error);
1847 out1:
1848         DTRACE_NFSV3_4(op__create__done, struct svc_req *, req,
1849             cred_t *, cr, vnode_t *, dvp, CREATE3res *, resp);
1850 
1851         if (name != NULL && name != args->where.name)
1852                 kmem_free(name, MAXPATHLEN + 1);
1853 
1854         if (tvp != NULL) {
1855                 if (in_crit)
1856                         nbl_end_crit(tvp);
1857                 VN_RELE(tvp);
1858         }
1859         if (dvp != NULL)
1860                 VN_RELE(dvp);
1861         vattr_to_wcc_data(dbvap, davap, &resp->resfail.dir_wcc);
1862 }
1863 
1864 void *
1865 rfs3_create_getfh(CREATE3args *args)
1866 {
1867 
1868         return (&args->where.dir);
1869 }
1870 
1871 void
1872 rfs3_mkdir(MKDIR3args *args, MKDIR3res *resp, struct exportinfo *exi,
1873         struct svc_req *req, cred_t *cr)
1874 {
1875         int error;
1876         vnode_t *vp = NULL;
1877         vnode_t *dvp;
1878         struct vattr *vap;
1879         struct vattr va;
1880         struct vattr *dbvap;
1881         struct vattr dbva;
1882         struct vattr *davap;
1883         struct vattr dava;
1884         struct sockaddr *ca;
1885         char *name = NULL;
1886 
1887         dbvap = NULL;
1888         davap = NULL;
1889 
1890         dvp = nfs3_fhtovp(&args->where.dir, exi);
1891 
1892         DTRACE_NFSV3_4(op__mkdir__start, struct svc_req *, req,
1893             cred_t *, cr, vnode_t *, dvp, MKDIR3args *, args);
1894 
1895         if (dvp == NULL) {
1896                 error = ESTALE;
1897                 goto out;
1898         }
1899 
1900         dbva.va_mask = AT_ALL;
1901         dbvap = VOP_GETATTR(dvp, &dbva, 0, cr, NULL) ? NULL : &dbva;
1902         davap = dbvap;
1903 
1904         if (args->where.name == nfs3nametoolong) {
1905                 resp->status = NFS3ERR_NAMETOOLONG;
1906                 goto out1;
1907         }
1908 
1909         if (args->where.name == NULL || *(args->where.name) == '\0') {
1910                 resp->status = NFS3ERR_ACCES;
1911                 goto out1;
1912         }
1913 
1914         if (rdonly(exi, req)) {
1915                 resp->status = NFS3ERR_ROFS;
1916                 goto out1;
1917         }
1918 
1919         if (is_system_labeled()) {
1920                 bslabel_t *clabel = req->rq_label;
1921 
1922                 ASSERT(clabel != NULL);
1923                 DTRACE_PROBE2(tx__rfs3__log__info__opmkdir__clabel, char *,
1924                     "got client label from request(1)", struct svc_req *, req);
1925 
1926                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
1927                         if (!do_rfs_label_check(clabel, dvp, EQUALITY_CHECK,
1928                             exi)) {
1929                                 resp->status = NFS3ERR_ACCES;
1930                                 goto out1;
1931                         }
1932                 }
1933         }
1934 
1935         error = sattr3_to_vattr(&args->attributes, &va);
1936         if (error)
1937                 goto out;
1938 
1939         if (!(va.va_mask & AT_MODE)) {
1940                 resp->status = NFS3ERR_INVAL;
1941                 goto out1;
1942         }
1943 
1944         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
1945         name = nfscmd_convname(ca, exi, args->where.name,
1946             NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
1947 
1948         if (name == NULL) {
1949                 resp->status = NFS3ERR_INVAL;
1950                 goto out1;
1951         }
1952 
1953         va.va_mask |= AT_TYPE;
1954         va.va_type = VDIR;
1955 
1956         error = VOP_MKDIR(dvp, name, &va, &vp, cr, NULL, 0, NULL);
1957 
1958         if (name != args->where.name)
1959                 kmem_free(name, MAXPATHLEN + 1);
1960 
1961         dava.va_mask = AT_ALL;
1962         davap = VOP_GETATTR(dvp, &dava, 0, cr, NULL) ? NULL : &dava;
1963 
1964         /*
1965          * Force modified data and metadata out to stable storage.
1966          */
1967         (void) VOP_FSYNC(dvp, 0, cr, NULL);
1968 
1969         if (error)
1970                 goto out;
1971 
1972         error = makefh3(&resp->resok.obj.handle, vp, exi);
1973         if (error)
1974                 resp->resok.obj.handle_follows = FALSE;
1975         else
1976                 resp->resok.obj.handle_follows = TRUE;
1977 
1978         va.va_mask = AT_ALL;
1979         vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
1980 
1981         /*
1982          * Force modified data and metadata out to stable storage.
1983          */
1984         (void) VOP_FSYNC(vp, 0, cr, NULL);
1985 
1986         VN_RELE(vp);
1987 
1988         resp->status = NFS3_OK;
1989         vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
1990         vattr_to_wcc_data(dbvap, davap, &resp->resok.dir_wcc);
1991 
1992         DTRACE_NFSV3_4(op__mkdir__done, struct svc_req *, req,
1993             cred_t *, cr, vnode_t *, dvp, MKDIR3res *, resp);
1994         VN_RELE(dvp);
1995 
1996         return;
1997 
1998 out:
1999         if (curthread->t_flag & T_WOULDBLOCK) {
2000                 curthread->t_flag &= ~T_WOULDBLOCK;
2001                 resp->status = NFS3ERR_JUKEBOX;
2002         } else
2003                 resp->status = puterrno3(error);
2004 out1:
2005         DTRACE_NFSV3_4(op__mkdir__done, struct svc_req *, req,
2006             cred_t *, cr, vnode_t *, dvp, MKDIR3res *, resp);
2007         if (dvp != NULL)
2008                 VN_RELE(dvp);
2009         vattr_to_wcc_data(dbvap, davap, &resp->resfail.dir_wcc);
2010 }
2011 
2012 void *
2013 rfs3_mkdir_getfh(MKDIR3args *args)
2014 {
2015 
2016         return (&args->where.dir);
2017 }
2018 
2019 void
2020 rfs3_symlink(SYMLINK3args *args, SYMLINK3res *resp, struct exportinfo *exi,
2021         struct svc_req *req, cred_t *cr)
2022 {
2023         int error;
2024         vnode_t *vp;
2025         vnode_t *dvp;
2026         struct vattr *vap;
2027         struct vattr va;
2028         struct vattr *dbvap;
2029         struct vattr dbva;
2030         struct vattr *davap;
2031         struct vattr dava;
2032         struct sockaddr *ca;
2033         char *name = NULL;
2034         char *symdata = NULL;
2035 
2036         dbvap = NULL;
2037         davap = NULL;
2038 
2039         dvp = nfs3_fhtovp(&args->where.dir, exi);
2040 
2041         DTRACE_NFSV3_4(op__symlink__start, struct svc_req *, req,
2042             cred_t *, cr, vnode_t *, dvp, SYMLINK3args *, args);
2043 
2044         if (dvp == NULL) {
2045                 error = ESTALE;
2046                 goto err;
2047         }
2048 
2049         dbva.va_mask = AT_ALL;
2050         dbvap = VOP_GETATTR(dvp, &dbva, 0, cr, NULL) ? NULL : &dbva;
2051         davap = dbvap;
2052 
2053         if (args->where.name == nfs3nametoolong) {
2054                 resp->status = NFS3ERR_NAMETOOLONG;
2055                 goto err1;
2056         }
2057 
2058         if (args->where.name == NULL || *(args->where.name) == '\0') {
2059                 resp->status = NFS3ERR_ACCES;
2060                 goto err1;
2061         }
2062 
2063         if (rdonly(exi, req)) {
2064                 resp->status = NFS3ERR_ROFS;
2065                 goto err1;
2066         }
2067 
2068         if (is_system_labeled()) {
2069                 bslabel_t *clabel = req->rq_label;
2070 
2071                 ASSERT(clabel != NULL);
2072                 DTRACE_PROBE2(tx__rfs3__log__info__opsymlink__clabel, char *,
2073                     "got client label from request(1)", struct svc_req *, req);
2074 
2075                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
2076                         if (!do_rfs_label_check(clabel, dvp, EQUALITY_CHECK,
2077                             exi)) {
2078                                 resp->status = NFS3ERR_ACCES;
2079                                 goto err1;
2080                         }
2081                 }
2082         }
2083 
2084         error = sattr3_to_vattr(&args->symlink.symlink_attributes, &va);
2085         if (error)
2086                 goto err;
2087 
2088         if (!(va.va_mask & AT_MODE)) {
2089                 resp->status = NFS3ERR_INVAL;
2090                 goto err1;
2091         }
2092 
2093         if (args->symlink.symlink_data == nfs3nametoolong) {
2094                 resp->status = NFS3ERR_NAMETOOLONG;
2095                 goto err1;
2096         }
2097 
2098         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2099         name = nfscmd_convname(ca, exi, args->where.name,
2100             NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
2101 
2102         if (name == NULL) {
2103                 /* This is really a Solaris EILSEQ */
2104                 resp->status = NFS3ERR_INVAL;
2105                 goto err1;
2106         }
2107 
2108         symdata = nfscmd_convname(ca, exi, args->symlink.symlink_data,
2109             NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
2110         if (symdata == NULL) {
2111                 /* This is really a Solaris EILSEQ */
2112                 resp->status = NFS3ERR_INVAL;
2113                 goto err1;
2114         }
2115 
2116 
2117         va.va_mask |= AT_TYPE;
2118         va.va_type = VLNK;
2119 
2120         error = VOP_SYMLINK(dvp, name, &va, symdata, cr, NULL, 0);
2121 
2122         dava.va_mask = AT_ALL;
2123         davap = VOP_GETATTR(dvp, &dava, 0, cr, NULL) ? NULL : &dava;
2124 
2125         if (error)
2126                 goto err;
2127 
2128         error = VOP_LOOKUP(dvp, name, &vp, NULL, 0, NULL, cr,
2129             NULL, NULL, NULL);
2130 
2131         /*
2132          * Force modified data and metadata out to stable storage.
2133          */
2134         (void) VOP_FSYNC(dvp, 0, cr, NULL);
2135 
2136 
2137         resp->status = NFS3_OK;
2138         if (error) {
2139                 resp->resok.obj.handle_follows = FALSE;
2140                 vattr_to_post_op_attr(NULL, &resp->resok.obj_attributes);
2141                 vattr_to_wcc_data(dbvap, davap, &resp->resok.dir_wcc);
2142                 goto out;
2143         }
2144 
2145         error = makefh3(&resp->resok.obj.handle, vp, exi);
2146         if (error)
2147                 resp->resok.obj.handle_follows = FALSE;
2148         else
2149                 resp->resok.obj.handle_follows = TRUE;
2150 
2151         va.va_mask = AT_ALL;
2152         vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
2153 
2154         /*
2155          * Force modified data and metadata out to stable storage.
2156          */
2157         (void) VOP_FSYNC(vp, 0, cr, NULL);
2158 
2159         VN_RELE(vp);
2160 
2161         vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
2162         vattr_to_wcc_data(dbvap, davap, &resp->resok.dir_wcc);
2163         goto out;
2164 
2165 err:
2166         if (curthread->t_flag & T_WOULDBLOCK) {
2167                 curthread->t_flag &= ~T_WOULDBLOCK;
2168                 resp->status = NFS3ERR_JUKEBOX;
2169         } else
2170                 resp->status = puterrno3(error);
2171 err1:
2172         vattr_to_wcc_data(dbvap, davap, &resp->resfail.dir_wcc);
2173 out:
2174         if (name != NULL && name != args->where.name)
2175                 kmem_free(name, MAXPATHLEN + 1);
2176         if (symdata != NULL && symdata != args->symlink.symlink_data)
2177                 kmem_free(symdata, MAXPATHLEN + 1);
2178 
2179         DTRACE_NFSV3_4(op__symlink__done, struct svc_req *, req,
2180             cred_t *, cr, vnode_t *, dvp, SYMLINK3res *, resp);
2181 
2182         if (dvp != NULL)
2183                 VN_RELE(dvp);
2184 }
2185 
2186 void *
2187 rfs3_symlink_getfh(SYMLINK3args *args)
2188 {
2189 
2190         return (&args->where.dir);
2191 }
2192 
2193 void
2194 rfs3_mknod(MKNOD3args *args, MKNOD3res *resp, struct exportinfo *exi,
2195         struct svc_req *req, cred_t *cr)
2196 {
2197         int error;
2198         vnode_t *vp;
2199         vnode_t *realvp;
2200         vnode_t *dvp;
2201         struct vattr *vap;
2202         struct vattr va;
2203         struct vattr *dbvap;
2204         struct vattr dbva;
2205         struct vattr *davap;
2206         struct vattr dava;
2207         int mode;
2208         enum vcexcl excl;
2209         struct sockaddr *ca;
2210         char *name = NULL;
2211 
2212         dbvap = NULL;
2213         davap = NULL;
2214 
2215         dvp = nfs3_fhtovp(&args->where.dir, exi);
2216 
2217         DTRACE_NFSV3_4(op__mknod__start, struct svc_req *, req,
2218             cred_t *, cr, vnode_t *, dvp, MKNOD3args *, args);
2219 
2220         if (dvp == NULL) {
2221                 error = ESTALE;
2222                 goto out;
2223         }
2224 
2225         dbva.va_mask = AT_ALL;
2226         dbvap = VOP_GETATTR(dvp, &dbva, 0, cr, NULL) ? NULL : &dbva;
2227         davap = dbvap;
2228 
2229         if (args->where.name == nfs3nametoolong) {
2230                 resp->status = NFS3ERR_NAMETOOLONG;
2231                 goto out1;
2232         }
2233 
2234         if (args->where.name == NULL || *(args->where.name) == '\0') {
2235                 resp->status = NFS3ERR_ACCES;
2236                 goto out1;
2237         }
2238 
2239         if (rdonly(exi, req)) {
2240                 resp->status = NFS3ERR_ROFS;
2241                 goto out1;
2242         }
2243 
2244         if (is_system_labeled()) {
2245                 bslabel_t *clabel = req->rq_label;
2246 
2247                 ASSERT(clabel != NULL);
2248                 DTRACE_PROBE2(tx__rfs3__log__info__opmknod__clabel, char *,
2249                     "got client label from request(1)", struct svc_req *, req);
2250 
2251                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
2252                         if (!do_rfs_label_check(clabel, dvp, EQUALITY_CHECK,
2253                             exi)) {
2254                                 resp->status = NFS3ERR_ACCES;
2255                                 goto out1;
2256                         }
2257                 }
2258         }
2259 
2260         switch (args->what.type) {
2261         case NF3CHR:
2262         case NF3BLK:
2263                 error = sattr3_to_vattr(
2264                     &args->what.mknoddata3_u.device.dev_attributes, &va);
2265                 if (error)
2266                         goto out;
2267                 if (secpolicy_sys_devices(cr) != 0) {
2268                         resp->status = NFS3ERR_PERM;
2269                         goto out1;
2270                 }
2271                 if (args->what.type == NF3CHR)
2272                         va.va_type = VCHR;
2273                 else
2274                         va.va_type = VBLK;
2275                 va.va_rdev = makedevice(
2276                     args->what.mknoddata3_u.device.spec.specdata1,
2277                     args->what.mknoddata3_u.device.spec.specdata2);
2278                 va.va_mask |= AT_TYPE | AT_RDEV;
2279                 break;
2280         case NF3SOCK:
2281                 error = sattr3_to_vattr(
2282                     &args->what.mknoddata3_u.pipe_attributes, &va);
2283                 if (error)
2284                         goto out;
2285                 va.va_type = VSOCK;
2286                 va.va_mask |= AT_TYPE;
2287                 break;
2288         case NF3FIFO:
2289                 error = sattr3_to_vattr(
2290                     &args->what.mknoddata3_u.pipe_attributes, &va);
2291                 if (error)
2292                         goto out;
2293                 va.va_type = VFIFO;
2294                 va.va_mask |= AT_TYPE;
2295                 break;
2296         default:
2297                 resp->status = NFS3ERR_BADTYPE;
2298                 goto out1;
2299         }
2300 
2301         /*
2302          * Must specify the mode.
2303          */
2304         if (!(va.va_mask & AT_MODE)) {
2305                 resp->status = NFS3ERR_INVAL;
2306                 goto out1;
2307         }
2308 
2309         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2310         name = nfscmd_convname(ca, exi, args->where.name,
2311             NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
2312 
2313         if (name == NULL) {
2314                 resp->status = NFS3ERR_INVAL;
2315                 goto out1;
2316         }
2317 
2318         excl = EXCL;
2319 
2320         mode = 0;
2321 
2322         error = VOP_CREATE(dvp, name, &va, excl, mode,
2323             &vp, cr, 0, NULL, NULL);
2324 
2325         if (name != args->where.name)
2326                 kmem_free(name, MAXPATHLEN + 1);
2327 
2328         dava.va_mask = AT_ALL;
2329         davap = VOP_GETATTR(dvp, &dava, 0, cr, NULL) ? NULL : &dava;
2330 
2331         /*
2332          * Force modified data and metadata out to stable storage.
2333          */
2334         (void) VOP_FSYNC(dvp, 0, cr, NULL);
2335 
2336         if (error)
2337                 goto out;
2338 
2339         resp->status = NFS3_OK;
2340 
2341         error = makefh3(&resp->resok.obj.handle, vp, exi);
2342         if (error)
2343                 resp->resok.obj.handle_follows = FALSE;
2344         else
2345                 resp->resok.obj.handle_follows = TRUE;
2346 
2347         va.va_mask = AT_ALL;
2348         vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
2349 
2350         /*
2351          * Force modified metadata out to stable storage.
2352          *
2353          * if a underlying vp exists, pass it to VOP_FSYNC
2354          */
2355         if (VOP_REALVP(vp, &realvp, NULL) == 0)
2356                 (void) VOP_FSYNC(realvp, FNODSYNC, cr, NULL);
2357         else
2358                 (void) VOP_FSYNC(vp, FNODSYNC, cr, NULL);
2359 
2360         VN_RELE(vp);
2361 
2362         vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
2363         vattr_to_wcc_data(dbvap, davap, &resp->resok.dir_wcc);
2364         DTRACE_NFSV3_4(op__mknod__done, struct svc_req *, req,
2365             cred_t *, cr, vnode_t *, dvp, MKNOD3res *, resp);
2366         VN_RELE(dvp);
2367         return;
2368 
2369 out:
2370         if (curthread->t_flag & T_WOULDBLOCK) {
2371                 curthread->t_flag &= ~T_WOULDBLOCK;
2372                 resp->status = NFS3ERR_JUKEBOX;
2373         } else
2374                 resp->status = puterrno3(error);
2375 out1:
2376         DTRACE_NFSV3_4(op__mknod__done, struct svc_req *, req,
2377             cred_t *, cr, vnode_t *, dvp, MKNOD3res *, resp);
2378         if (dvp != NULL)
2379                 VN_RELE(dvp);
2380         vattr_to_wcc_data(dbvap, davap, &resp->resfail.dir_wcc);
2381 }
2382 
2383 void *
2384 rfs3_mknod_getfh(MKNOD3args *args)
2385 {
2386 
2387         return (&args->where.dir);
2388 }
2389 
2390 void
2391 rfs3_remove(REMOVE3args *args, REMOVE3res *resp, struct exportinfo *exi,
2392         struct svc_req *req, cred_t *cr)
2393 {
2394         int error = 0;
2395         vnode_t *vp;
2396         struct vattr *bvap;
2397         struct vattr bva;
2398         struct vattr *avap;
2399         struct vattr ava;
2400         vnode_t *targvp = NULL;
2401         struct sockaddr *ca;
2402         char *name = NULL;
2403 
2404         bvap = NULL;
2405         avap = NULL;
2406 
2407         vp = nfs3_fhtovp(&args->object.dir, exi);
2408 
2409         DTRACE_NFSV3_4(op__remove__start, struct svc_req *, req,
2410             cred_t *, cr, vnode_t *, vp, REMOVE3args *, args);
2411 
2412         if (vp == NULL) {
2413                 error = ESTALE;
2414                 goto err;
2415         }
2416 
2417         bva.va_mask = AT_ALL;
2418         bvap = VOP_GETATTR(vp, &bva, 0, cr, NULL) ? NULL : &bva;
2419         avap = bvap;
2420 
2421         if (vp->v_type != VDIR) {
2422                 resp->status = NFS3ERR_NOTDIR;
2423                 goto err1;
2424         }
2425 
2426         if (args->object.name == nfs3nametoolong) {
2427                 resp->status = NFS3ERR_NAMETOOLONG;
2428                 goto err1;
2429         }
2430 
2431         if (args->object.name == NULL || *(args->object.name) == '\0') {
2432                 resp->status = NFS3ERR_ACCES;
2433                 goto err1;
2434         }
2435 
2436         if (rdonly(exi, req)) {
2437                 resp->status = NFS3ERR_ROFS;
2438                 goto err1;
2439         }
2440 
2441         if (is_system_labeled()) {
2442                 bslabel_t *clabel = req->rq_label;
2443 
2444                 ASSERT(clabel != NULL);
2445                 DTRACE_PROBE2(tx__rfs3__log__info__opremove__clabel, char *,
2446                     "got client label from request(1)", struct svc_req *, req);
2447 
2448                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
2449                         if (!do_rfs_label_check(clabel, vp, EQUALITY_CHECK,
2450                             exi)) {
2451                                 resp->status = NFS3ERR_ACCES;
2452                                 goto err1;
2453                         }
2454                 }
2455         }
2456 
2457         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2458         name = nfscmd_convname(ca, exi, args->object.name,
2459             NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
2460 
2461         if (name == NULL) {
2462                 resp->status = NFS3ERR_INVAL;
2463                 goto err1;
2464         }
2465 
2466         /*
2467          * Check for a conflict with a non-blocking mandatory share
2468          * reservation and V4 delegations
2469          */
2470         error = VOP_LOOKUP(vp, name, &targvp, NULL, 0,
2471             NULL, cr, NULL, NULL, NULL);
2472         if (error != 0)
2473                 goto err;
2474 
2475         if (rfs4_check_delegated(FWRITE, targvp, TRUE)) {
2476                 resp->status = NFS3ERR_JUKEBOX;
2477                 goto err1;
2478         }
2479 
2480         if (!nbl_need_check(targvp)) {
2481                 error = VOP_REMOVE(vp, name, cr, NULL, 0);
2482         } else {
2483                 nbl_start_crit(targvp, RW_READER);
2484                 if (nbl_conflict(targvp, NBL_REMOVE, 0, 0, 0, NULL)) {
2485                         error = EACCES;
2486                 } else {
2487                         error = VOP_REMOVE(vp, name, cr, NULL, 0);
2488                 }
2489                 nbl_end_crit(targvp);
2490         }
2491         VN_RELE(targvp);
2492         targvp = NULL;
2493 
2494         ava.va_mask = AT_ALL;
2495         avap = VOP_GETATTR(vp, &ava, 0, cr, NULL) ? NULL : &ava;
2496 
2497         /*
2498          * Force modified data and metadata out to stable storage.
2499          */
2500         (void) VOP_FSYNC(vp, 0, cr, NULL);
2501 
2502         if (error)
2503                 goto err;
2504 
2505         resp->status = NFS3_OK;
2506         vattr_to_wcc_data(bvap, avap, &resp->resok.dir_wcc);
2507         goto out;
2508 
2509 err:
2510         if (curthread->t_flag & T_WOULDBLOCK) {
2511                 curthread->t_flag &= ~T_WOULDBLOCK;
2512                 resp->status = NFS3ERR_JUKEBOX;
2513         } else
2514                 resp->status = puterrno3(error);
2515 err1:
2516         vattr_to_wcc_data(bvap, avap, &resp->resfail.dir_wcc);
2517 out:
2518         DTRACE_NFSV3_4(op__remove__done, struct svc_req *, req,
2519             cred_t *, cr, vnode_t *, vp, REMOVE3res *, resp);
2520 
2521         if (name != NULL && name != args->object.name)
2522                 kmem_free(name, MAXPATHLEN + 1);
2523 
2524         if (vp != NULL)
2525                 VN_RELE(vp);
2526 }
2527 
2528 void *
2529 rfs3_remove_getfh(REMOVE3args *args)
2530 {
2531 
2532         return (&args->object.dir);
2533 }
2534 
2535 void
2536 rfs3_rmdir(RMDIR3args *args, RMDIR3res *resp, struct exportinfo *exi,
2537         struct svc_req *req, cred_t *cr)
2538 {
2539         int error;
2540         vnode_t *vp;
2541         struct vattr *bvap;
2542         struct vattr bva;
2543         struct vattr *avap;
2544         struct vattr ava;
2545         struct sockaddr *ca;
2546         char *name = NULL;
2547 
2548         bvap = NULL;
2549         avap = NULL;
2550 
2551         vp = nfs3_fhtovp(&args->object.dir, exi);
2552 
2553         DTRACE_NFSV3_4(op__rmdir__start, struct svc_req *, req,
2554             cred_t *, cr, vnode_t *, vp, RMDIR3args *, args);
2555 
2556         if (vp == NULL) {
2557                 error = ESTALE;
2558                 goto err;
2559         }
2560 
2561         bva.va_mask = AT_ALL;
2562         bvap = VOP_GETATTR(vp, &bva, 0, cr, NULL) ? NULL : &bva;
2563         avap = bvap;
2564 
2565         if (vp->v_type != VDIR) {
2566                 resp->status = NFS3ERR_NOTDIR;
2567                 goto err1;
2568         }
2569 
2570         if (args->object.name == nfs3nametoolong) {
2571                 resp->status = NFS3ERR_NAMETOOLONG;
2572                 goto err1;
2573         }
2574 
2575         if (args->object.name == NULL || *(args->object.name) == '\0') {
2576                 resp->status = NFS3ERR_ACCES;
2577                 goto err1;
2578         }
2579 
2580         if (rdonly(exi, req)) {
2581                 resp->status = NFS3ERR_ROFS;
2582                 goto err1;
2583         }
2584 
2585         if (is_system_labeled()) {
2586                 bslabel_t *clabel = req->rq_label;
2587 
2588                 ASSERT(clabel != NULL);
2589                 DTRACE_PROBE2(tx__rfs3__log__info__opremovedir__clabel, char *,
2590                     "got client label from request(1)", struct svc_req *, req);
2591 
2592                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
2593                         if (!do_rfs_label_check(clabel, vp, EQUALITY_CHECK,
2594                             exi)) {
2595                                 resp->status = NFS3ERR_ACCES;
2596                                 goto err1;
2597                         }
2598                 }
2599         }
2600 
2601         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2602         name = nfscmd_convname(ca, exi, args->object.name,
2603             NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
2604 
2605         if (name == NULL) {
2606                 resp->status = NFS3ERR_INVAL;
2607                 goto err1;
2608         }
2609 
2610         error = VOP_RMDIR(vp, name, rootdir, cr, NULL, 0);
2611 
2612         if (name != args->object.name)
2613                 kmem_free(name, MAXPATHLEN + 1);
2614 
2615         ava.va_mask = AT_ALL;
2616         avap = VOP_GETATTR(vp, &ava, 0, cr, NULL) ? NULL : &ava;
2617 
2618         /*
2619          * Force modified data and metadata out to stable storage.
2620          */
2621         (void) VOP_FSYNC(vp, 0, cr, NULL);
2622 
2623         if (error) {
2624                 /*
2625                  * System V defines rmdir to return EEXIST, not ENOTEMPTY,
2626                  * if the directory is not empty.  A System V NFS server
2627                  * needs to map NFS3ERR_EXIST to NFS3ERR_NOTEMPTY to transmit
2628                  * over the wire.
2629                  */
2630                 if (error == EEXIST)
2631                         error = ENOTEMPTY;
2632                 goto err;
2633         }
2634 
2635         resp->status = NFS3_OK;
2636         vattr_to_wcc_data(bvap, avap, &resp->resok.dir_wcc);
2637         goto out;
2638 
2639 err:
2640         if (curthread->t_flag & T_WOULDBLOCK) {
2641                 curthread->t_flag &= ~T_WOULDBLOCK;
2642                 resp->status = NFS3ERR_JUKEBOX;
2643         } else
2644                 resp->status = puterrno3(error);
2645 err1:
2646         vattr_to_wcc_data(bvap, avap, &resp->resfail.dir_wcc);
2647 out:
2648         DTRACE_NFSV3_4(op__rmdir__done, struct svc_req *, req,
2649             cred_t *, cr, vnode_t *, vp, RMDIR3res *, resp);
2650         if (vp != NULL)
2651                 VN_RELE(vp);
2652 
2653 }
2654 
2655 void *
2656 rfs3_rmdir_getfh(RMDIR3args *args)
2657 {
2658 
2659         return (&args->object.dir);
2660 }
2661 
2662 void
2663 rfs3_rename(RENAME3args *args, RENAME3res *resp, struct exportinfo *exi,
2664         struct svc_req *req, cred_t *cr)
2665 {
2666         int error = 0;
2667         vnode_t *fvp;
2668         vnode_t *tvp;
2669         vnode_t *targvp;
2670         struct vattr *fbvap;
2671         struct vattr fbva;
2672         struct vattr *favap;
2673         struct vattr fava;
2674         struct vattr *tbvap;
2675         struct vattr tbva;
2676         struct vattr *tavap;
2677         struct vattr tava;
2678         nfs_fh3 *fh3;
2679         struct exportinfo *to_exi;
2680         vnode_t *srcvp = NULL;
2681         bslabel_t *clabel;
2682         struct sockaddr *ca;
2683         char *name = NULL;
2684         char *toname = NULL;
2685 
2686         fbvap = NULL;
2687         favap = NULL;
2688         tbvap = NULL;
2689         tavap = NULL;
2690         tvp = NULL;
2691 
2692         fvp = nfs3_fhtovp(&args->from.dir, exi);
2693 
2694         DTRACE_NFSV3_4(op__rename__start, struct svc_req *, req,
2695             cred_t *, cr, vnode_t *, fvp, RENAME3args *, args);
2696 
2697         if (fvp == NULL) {
2698                 error = ESTALE;
2699                 goto err;
2700         }
2701 
2702         if (is_system_labeled()) {
2703                 clabel = req->rq_label;
2704                 ASSERT(clabel != NULL);
2705                 DTRACE_PROBE2(tx__rfs3__log__info__oprename__clabel, char *,
2706                     "got client label from request(1)", struct svc_req *, req);
2707 
2708                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
2709                         if (!do_rfs_label_check(clabel, fvp, EQUALITY_CHECK,
2710                             exi)) {
2711                                 resp->status = NFS3ERR_ACCES;
2712                                 goto err1;
2713                         }
2714                 }
2715         }
2716 
2717         fbva.va_mask = AT_ALL;
2718         fbvap = VOP_GETATTR(fvp, &fbva, 0, cr, NULL) ? NULL : &fbva;
2719         favap = fbvap;
2720 
2721         fh3 = &args->to.dir;
2722         to_exi = checkexport(&fh3->fh3_fsid, FH3TOXFIDP(fh3));
2723         if (to_exi == NULL) {
2724                 resp->status = NFS3ERR_ACCES;
2725                 goto err1;
2726         }
2727         exi_rele(to_exi);
2728 
2729         if (to_exi != exi) {
2730                 resp->status = NFS3ERR_XDEV;
2731                 goto err1;
2732         }
2733 
2734         tvp = nfs3_fhtovp(&args->to.dir, exi);
2735         if (tvp == NULL) {
2736                 error = ESTALE;
2737                 goto err;
2738         }
2739 
2740         tbva.va_mask = AT_ALL;
2741         tbvap = VOP_GETATTR(tvp, &tbva, 0, cr, NULL) ? NULL : &tbva;
2742         tavap = tbvap;
2743 
2744         if (fvp->v_type != VDIR || tvp->v_type != VDIR) {
2745                 resp->status = NFS3ERR_NOTDIR;
2746                 goto err1;
2747         }
2748 
2749         if (args->from.name == nfs3nametoolong ||
2750             args->to.name == nfs3nametoolong) {
2751                 resp->status = NFS3ERR_NAMETOOLONG;
2752                 goto err1;
2753         }
2754         if (args->from.name == NULL || *(args->from.name) == '\0' ||
2755             args->to.name == NULL || *(args->to.name) == '\0') {
2756                 resp->status = NFS3ERR_ACCES;
2757                 goto err1;
2758         }
2759 
2760         if (rdonly(exi, req)) {
2761                 resp->status = NFS3ERR_ROFS;
2762                 goto err1;
2763         }
2764 
2765         if (is_system_labeled()) {
2766                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
2767                         if (!do_rfs_label_check(clabel, tvp, EQUALITY_CHECK,
2768                             exi)) {
2769                                 resp->status = NFS3ERR_ACCES;
2770                                 goto err1;
2771                         }
2772                 }
2773         }
2774 
2775         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2776         name = nfscmd_convname(ca, exi, args->from.name,
2777             NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
2778 
2779         if (name == NULL) {
2780                 resp->status = NFS3ERR_INVAL;
2781                 goto err1;
2782         }
2783 
2784         toname = nfscmd_convname(ca, exi, args->to.name,
2785             NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
2786 
2787         if (toname == NULL) {
2788                 resp->status = NFS3ERR_INVAL;
2789                 goto err1;
2790         }
2791 
2792         /*
2793          * Check for a conflict with a non-blocking mandatory share
2794          * reservation or V4 delegations.
2795          */
2796         error = VOP_LOOKUP(fvp, name, &srcvp, NULL, 0,
2797             NULL, cr, NULL, NULL, NULL);
2798         if (error != 0)
2799                 goto err;
2800 
2801         /*
2802          * If we rename a delegated file we should recall the
2803          * delegation, since future opens should fail or would
2804          * refer to a new file.
2805          */
2806         if (rfs4_check_delegated(FWRITE, srcvp, FALSE)) {
2807                 resp->status = NFS3ERR_JUKEBOX;
2808                 goto err1;
2809         }
2810 
2811         /*
2812          * Check for renaming over a delegated file.  Check rfs4_deleg_policy
2813          * first to avoid VOP_LOOKUP if possible.
2814          */
2815         if (rfs4_deleg_policy != SRV_NEVER_DELEGATE &&
2816             VOP_LOOKUP(tvp, toname, &targvp, NULL, 0, NULL, cr,
2817             NULL, NULL, NULL) == 0) {
2818 
2819                 if (rfs4_check_delegated(FWRITE, targvp, TRUE)) {
2820                         VN_RELE(targvp);
2821                         resp->status = NFS3ERR_JUKEBOX;
2822                         goto err1;
2823                 }
2824                 VN_RELE(targvp);
2825         }
2826 
2827         if (!nbl_need_check(srcvp)) {
2828                 error = VOP_RENAME(fvp, name, tvp, toname, cr, NULL, 0);
2829         } else {
2830                 nbl_start_crit(srcvp, RW_READER);
2831                 if (nbl_conflict(srcvp, NBL_RENAME, 0, 0, 0, NULL))
2832                         error = EACCES;
2833                 else
2834                         error = VOP_RENAME(fvp, name, tvp, toname, cr, NULL, 0);
2835                 nbl_end_crit(srcvp);
2836         }
2837         if (error == 0)
2838                 vn_renamepath(tvp, srcvp, args->to.name,
2839                     strlen(args->to.name));
2840         VN_RELE(srcvp);
2841         srcvp = NULL;
2842 
2843         fava.va_mask = AT_ALL;
2844         favap = VOP_GETATTR(fvp, &fava, 0, cr, NULL) ? NULL : &fava;
2845         tava.va_mask = AT_ALL;
2846         tavap = VOP_GETATTR(tvp, &tava, 0, cr, NULL) ? NULL : &tava;
2847 
2848         /*
2849          * Force modified data and metadata out to stable storage.
2850          */
2851         (void) VOP_FSYNC(fvp, 0, cr, NULL);
2852         (void) VOP_FSYNC(tvp, 0, cr, NULL);
2853 
2854         if (error)
2855                 goto err;
2856 
2857         resp->status = NFS3_OK;
2858         vattr_to_wcc_data(fbvap, favap, &resp->resok.fromdir_wcc);
2859         vattr_to_wcc_data(tbvap, tavap, &resp->resok.todir_wcc);
2860         goto out;
2861 
2862 err:
2863         if (curthread->t_flag & T_WOULDBLOCK) {
2864                 curthread->t_flag &= ~T_WOULDBLOCK;
2865                 resp->status = NFS3ERR_JUKEBOX;
2866         } else {
2867                 resp->status = puterrno3(error);
2868         }
2869 err1:
2870         vattr_to_wcc_data(fbvap, favap, &resp->resfail.fromdir_wcc);
2871         vattr_to_wcc_data(tbvap, tavap, &resp->resfail.todir_wcc);
2872 
2873 out:
2874         if (name != NULL && name != args->from.name)
2875                 kmem_free(name, MAXPATHLEN + 1);
2876         if (toname != NULL && toname != args->to.name)
2877                 kmem_free(toname, MAXPATHLEN + 1);
2878 
2879         DTRACE_NFSV3_4(op__rename__done, struct svc_req *, req,
2880             cred_t *, cr, vnode_t *, fvp, RENAME3res *, resp);
2881         if (fvp != NULL)
2882                 VN_RELE(fvp);
2883         if (tvp != NULL)
2884                 VN_RELE(tvp);
2885 }
2886 
2887 void *
2888 rfs3_rename_getfh(RENAME3args *args)
2889 {
2890 
2891         return (&args->from.dir);
2892 }
2893 
2894 void
2895 rfs3_link(LINK3args *args, LINK3res *resp, struct exportinfo *exi,
2896         struct svc_req *req, cred_t *cr)
2897 {
2898         int error;
2899         vnode_t *vp;
2900         vnode_t *dvp;
2901         struct vattr *vap;
2902         struct vattr va;
2903         struct vattr *bvap;
2904         struct vattr bva;
2905         struct vattr *avap;
2906         struct vattr ava;
2907         nfs_fh3 *fh3;
2908         struct exportinfo *to_exi;
2909         bslabel_t *clabel;
2910         struct sockaddr *ca;
2911         char *name = NULL;
2912 
2913         vap = NULL;
2914         bvap = NULL;
2915         avap = NULL;
2916         dvp = NULL;
2917 
2918         vp = nfs3_fhtovp(&args->file, exi);
2919 
2920         DTRACE_NFSV3_4(op__link__start, struct svc_req *, req,
2921             cred_t *, cr, vnode_t *, vp, LINK3args *, args);
2922 
2923         if (vp == NULL) {
2924                 error = ESTALE;
2925                 goto out;
2926         }
2927 
2928         va.va_mask = AT_ALL;
2929         vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
2930 
2931         fh3 = &args->link.dir;
2932         to_exi = checkexport(&fh3->fh3_fsid, FH3TOXFIDP(fh3));
2933         if (to_exi == NULL) {
2934                 resp->status = NFS3ERR_ACCES;
2935                 goto out1;
2936         }
2937         exi_rele(to_exi);
2938 
2939         if (to_exi != exi) {
2940                 resp->status = NFS3ERR_XDEV;
2941                 goto out1;
2942         }
2943 
2944         if (is_system_labeled()) {
2945                 clabel = req->rq_label;
2946 
2947                 ASSERT(clabel != NULL);
2948                 DTRACE_PROBE2(tx__rfs3__log__info__oplink__clabel, char *,
2949                     "got client label from request(1)", struct svc_req *, req);
2950 
2951                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
2952                         if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
2953                             exi)) {
2954                                 resp->status = NFS3ERR_ACCES;
2955                                 goto out1;
2956                         }
2957                 }
2958         }
2959 
2960         dvp = nfs3_fhtovp(&args->link.dir, exi);
2961         if (dvp == NULL) {
2962                 error = ESTALE;
2963                 goto out;
2964         }
2965 
2966         bva.va_mask = AT_ALL;
2967         bvap = VOP_GETATTR(dvp, &bva, 0, cr, NULL) ? NULL : &bva;
2968 
2969         if (dvp->v_type != VDIR) {
2970                 resp->status = NFS3ERR_NOTDIR;
2971                 goto out1;
2972         }
2973 
2974         if (args->link.name == nfs3nametoolong) {
2975                 resp->status = NFS3ERR_NAMETOOLONG;
2976                 goto out1;
2977         }
2978 
2979         if (args->link.name == NULL || *(args->link.name) == '\0') {
2980                 resp->status = NFS3ERR_ACCES;
2981                 goto out1;
2982         }
2983 
2984         if (rdonly(exi, req)) {
2985                 resp->status = NFS3ERR_ROFS;
2986                 goto out1;
2987         }
2988 
2989         if (is_system_labeled()) {
2990                 DTRACE_PROBE2(tx__rfs3__log__info__oplinkdir__clabel, char *,
2991                     "got client label from request(1)", struct svc_req *, req);
2992 
2993                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
2994                         if (!do_rfs_label_check(clabel, dvp, EQUALITY_CHECK,
2995                             exi)) {
2996                                 resp->status = NFS3ERR_ACCES;
2997                                 goto out1;
2998                         }
2999                 }
3000         }
3001 
3002         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
3003         name = nfscmd_convname(ca, exi, args->link.name,
3004             NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
3005 
3006         if (name == NULL) {
3007                 resp->status = NFS3ERR_SERVERFAULT;
3008                 goto out1;
3009         }
3010 
3011         error = VOP_LINK(dvp, vp, name, cr, NULL, 0);
3012 
3013         va.va_mask = AT_ALL;
3014         vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3015         ava.va_mask = AT_ALL;
3016         avap = VOP_GETATTR(dvp, &ava, 0, cr, NULL) ? NULL : &ava;
3017 
3018         /*
3019          * Force modified data and metadata out to stable storage.
3020          */
3021         (void) VOP_FSYNC(vp, FNODSYNC, cr, NULL);
3022         (void) VOP_FSYNC(dvp, 0, cr, NULL);
3023 
3024         if (error)
3025                 goto out;
3026 
3027         VN_RELE(dvp);
3028 
3029         resp->status = NFS3_OK;
3030         vattr_to_post_op_attr(vap, &resp->resok.file_attributes);
3031         vattr_to_wcc_data(bvap, avap, &resp->resok.linkdir_wcc);
3032 
3033         DTRACE_NFSV3_4(op__link__done, struct svc_req *, req,
3034             cred_t *, cr, vnode_t *, vp, LINK3res *, resp);
3035 
3036         VN_RELE(vp);
3037 
3038         return;
3039 
3040 out:
3041         if (curthread->t_flag & T_WOULDBLOCK) {
3042                 curthread->t_flag &= ~T_WOULDBLOCK;
3043                 resp->status = NFS3ERR_JUKEBOX;
3044         } else
3045                 resp->status = puterrno3(error);
3046 out1:
3047         if (name != NULL && name != args->link.name)
3048                 kmem_free(name, MAXPATHLEN + 1);
3049 
3050         DTRACE_NFSV3_4(op__link__done, struct svc_req *, req,
3051             cred_t *, cr, vnode_t *, vp, LINK3res *, resp);
3052 
3053         if (vp != NULL)
3054                 VN_RELE(vp);
3055         if (dvp != NULL)
3056                 VN_RELE(dvp);
3057         vattr_to_post_op_attr(vap, &resp->resfail.file_attributes);
3058         vattr_to_wcc_data(bvap, avap, &resp->resfail.linkdir_wcc);
3059 }
3060 
3061 void *
3062 rfs3_link_getfh(LINK3args *args)
3063 {
3064 
3065         return (&args->file);
3066 }
3067 
3068 /*
3069  * This macro defines the size of a response which contains attribute
3070  * information and one directory entry (whose length is specified by
3071  * the macro parameter).  If the incoming request is larger than this,
3072  * then we are guaranteed to be able to return at one directory entry
3073  * if one exists.  Therefore, we do not need to check for
3074  * NFS3ERR_TOOSMALL if the requested size is larger then this.  If it
3075  * is not, then we need to check to make sure that this error does not
3076  * need to be returned.
3077  *
3078  * NFS3_READDIR_MIN_COUNT is comprised of following :
3079  *
3080  * status - 1 * BYTES_PER_XDR_UNIT
3081  * attr. flag - 1 * BYTES_PER_XDR_UNIT
3082  * cookie verifier - 2 * BYTES_PER_XDR_UNIT
3083  * attributes  - NFS3_SIZEOF_FATTR3 * BYTES_PER_XDR_UNIT
3084  * boolean - 1 * BYTES_PER_XDR_UNIT
3085  * file id - 2 * BYTES_PER_XDR_UNIT
3086  * directory name length - 1 * BYTES_PER_XDR_UNIT
3087  * cookie - 2 * BYTES_PER_XDR_UNIT
3088  * end of list - 1 * BYTES_PER_XDR_UNIT
3089  * end of file - 1 * BYTES_PER_XDR_UNIT
3090  * Name length of directory to the nearest byte
3091  */
3092 
3093 #define NFS3_READDIR_MIN_COUNT(length)  \
3094         ((1 + 1 + 2 + NFS3_SIZEOF_FATTR3 + 1 + 2 + 1 + 2 + 1 + 1) * \
3095                 BYTES_PER_XDR_UNIT + roundup((length), BYTES_PER_XDR_UNIT))
3096 
3097 /* ARGSUSED */
3098 void
3099 rfs3_readdir(READDIR3args *args, READDIR3res *resp, struct exportinfo *exi,
3100         struct svc_req *req, cred_t *cr)
3101 {
3102         int error;
3103         vnode_t *vp;
3104         struct vattr *vap;
3105         struct vattr va;
3106         struct iovec iov;
3107         struct uio uio;
3108         char *data;
3109         int iseof;
3110         int bufsize;
3111         int namlen;
3112         uint_t count;
3113         struct sockaddr *ca;
3114 
3115         vap = NULL;
3116 
3117         vp = nfs3_fhtovp(&args->dir, exi);
3118 
3119         DTRACE_NFSV3_4(op__readdir__start, struct svc_req *, req,
3120             cred_t *, cr, vnode_t *, vp, READDIR3args *, args);
3121 
3122         if (vp == NULL) {
3123                 error = ESTALE;
3124                 goto out;
3125         }
3126 
3127         if (is_system_labeled()) {
3128                 bslabel_t *clabel = req->rq_label;
3129 
3130                 ASSERT(clabel != NULL);
3131                 DTRACE_PROBE2(tx__rfs3__log__info__opreaddir__clabel, char *,
3132                     "got client label from request(1)", struct svc_req *, req);
3133 
3134                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
3135                         if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
3136                             exi)) {
3137                                 resp->status = NFS3ERR_ACCES;
3138                                 goto out1;
3139                         }
3140                 }
3141         }
3142 
3143         (void) VOP_RWLOCK(vp, V_WRITELOCK_FALSE, NULL);
3144 
3145         va.va_mask = AT_ALL;
3146         vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3147 
3148         if (vp->v_type != VDIR) {
3149                 resp->status = NFS3ERR_NOTDIR;
3150                 goto out1;
3151         }
3152 
3153         error = VOP_ACCESS(vp, VREAD, 0, cr, NULL);
3154         if (error)
3155                 goto out;
3156 
3157         /*
3158          * Now don't allow arbitrary count to alloc;
3159          * allow the maximum not to exceed rfs3_tsize()
3160          */
3161         if (args->count > rfs3_tsize(req))
3162                 args->count = rfs3_tsize(req);
3163 
3164         /*
3165          * Make sure that there is room to read at least one entry
3166          * if any are available.
3167          */
3168         if (args->count < DIRENT64_RECLEN(MAXNAMELEN))
3169                 count = DIRENT64_RECLEN(MAXNAMELEN);
3170         else
3171                 count = args->count;
3172 
3173         data = kmem_alloc(count, KM_SLEEP);
3174 
3175         iov.iov_base = data;
3176         iov.iov_len = count;
3177         uio.uio_iov = &iov;
3178         uio.uio_iovcnt = 1;
3179         uio.uio_segflg = UIO_SYSSPACE;
3180         uio.uio_extflg = UIO_COPY_CACHED;
3181         uio.uio_loffset = (offset_t)args->cookie;
3182         uio.uio_resid = count;
3183 
3184         error = VOP_READDIR(vp, &uio, cr, &iseof, NULL, 0);
3185 
3186         va.va_mask = AT_ALL;
3187         vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3188 
3189         if (error) {
3190                 kmem_free(data, count);
3191                 goto out;
3192         }
3193 
3194         /*
3195          * If the count was not large enough to be able to guarantee
3196          * to be able to return at least one entry, then need to
3197          * check to see if NFS3ERR_TOOSMALL should be returned.
3198          */
3199         if (args->count < NFS3_READDIR_MIN_COUNT(MAXNAMELEN)) {
3200                 /*
3201                  * bufsize is used to keep track of the size of the response.
3202                  * It is primed with:
3203                  *      1 for the status +
3204                  *      1 for the dir_attributes.attributes boolean +
3205                  *      2 for the cookie verifier
3206                  * all times BYTES_PER_XDR_UNIT to convert from XDR units
3207                  * to bytes.  If there are directory attributes to be
3208                  * returned, then:
3209                  *      NFS3_SIZEOF_FATTR3 for the dir_attributes.attr fattr3
3210                  * time BYTES_PER_XDR_UNIT is added to account for them.
3211                  */
3212                 bufsize = (1 + 1 + 2) * BYTES_PER_XDR_UNIT;
3213                 if (vap != NULL)
3214                         bufsize += NFS3_SIZEOF_FATTR3 * BYTES_PER_XDR_UNIT;
3215                 /*
3216                  * An entry is composed of:
3217                  *      1 for the true/false list indicator +
3218                  *      2 for the fileid +
3219                  *      1 for the length of the name +
3220                  *      2 for the cookie +
3221                  * all times BYTES_PER_XDR_UNIT to convert from
3222                  * XDR units to bytes, plus the length of the name
3223                  * rounded up to the nearest BYTES_PER_XDR_UNIT.
3224                  */
3225                 if (count != uio.uio_resid) {
3226                         namlen = strlen(((struct dirent64 *)data)->d_name);
3227                         bufsize += (1 + 2 + 1 + 2) * BYTES_PER_XDR_UNIT +
3228                             roundup(namlen, BYTES_PER_XDR_UNIT);
3229                 }
3230                 /*
3231                  * We need to check to see if the number of bytes left
3232                  * to go into the buffer will actually fit into the
3233                  * buffer.  This is calculated as the size of this
3234                  * entry plus:
3235                  *      1 for the true/false list indicator +
3236                  *      1 for the eof indicator
3237                  * times BYTES_PER_XDR_UNIT to convert from from
3238                  * XDR units to bytes.
3239                  */
3240                 bufsize += (1 + 1) * BYTES_PER_XDR_UNIT;
3241                 if (bufsize > args->count) {
3242                         kmem_free(data, count);
3243                         resp->status = NFS3ERR_TOOSMALL;
3244                         goto out1;
3245                 }
3246         }
3247 
3248         /*
3249          * Have a valid readir buffer for the native character
3250          * set. Need to check if a conversion is necessary and
3251          * potentially rewrite the whole buffer. Note that if the
3252          * conversion expands names enough, the structure may not
3253          * fit. In this case, we need to drop entries until if fits
3254          * and patch the counts in order that the next readdir will
3255          * get the correct entries.
3256          */
3257         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
3258         data = nfscmd_convdirent(ca, exi, data, count, &resp->status);
3259 
3260 
3261         VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
3262 
3263 #if 0 /* notyet */
3264         /*
3265          * Don't do this.  It causes local disk writes when just
3266          * reading the file and the overhead is deemed larger
3267          * than the benefit.
3268          */
3269         /*
3270          * Force modified metadata out to stable storage.
3271          */
3272         (void) VOP_FSYNC(vp, FNODSYNC, cr, NULL);
3273 #endif
3274 
3275         resp->status = NFS3_OK;
3276         vattr_to_post_op_attr(vap, &resp->resok.dir_attributes);
3277         resp->resok.cookieverf = 0;
3278         resp->resok.reply.entries = (entry3 *)data;
3279         resp->resok.reply.eof = iseof;
3280         resp->resok.size = count - uio.uio_resid;
3281         resp->resok.count = args->count;
3282         resp->resok.freecount = count;
3283 
3284         DTRACE_NFSV3_4(op__readdir__done, struct svc_req *, req,
3285             cred_t *, cr, vnode_t *, vp, READDIR3res *, resp);
3286 
3287         VN_RELE(vp);
3288 
3289         return;
3290 
3291 out:
3292         if (curthread->t_flag & T_WOULDBLOCK) {
3293                 curthread->t_flag &= ~T_WOULDBLOCK;
3294                 resp->status = NFS3ERR_JUKEBOX;
3295         } else
3296                 resp->status = puterrno3(error);
3297 out1:
3298         DTRACE_NFSV3_4(op__readdir__done, struct svc_req *, req,
3299             cred_t *, cr, vnode_t *, vp, READDIR3res *, resp);
3300 
3301         if (vp != NULL) {
3302                 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
3303                 VN_RELE(vp);
3304         }
3305         vattr_to_post_op_attr(vap, &resp->resfail.dir_attributes);
3306 }
3307 
3308 void *
3309 rfs3_readdir_getfh(READDIR3args *args)
3310 {
3311 
3312         return (&args->dir);
3313 }
3314 
3315 void
3316 rfs3_readdir_free(READDIR3res *resp)
3317 {
3318 
3319         if (resp->status == NFS3_OK)
3320                 kmem_free(resp->resok.reply.entries, resp->resok.freecount);
3321 }
3322 
3323 #ifdef nextdp
3324 #undef nextdp
3325 #endif
3326 #define nextdp(dp)      ((struct dirent64 *)((char *)(dp) + (dp)->d_reclen))
3327 
3328 /*
3329  * This macro computes the size of a response which contains
3330  * one directory entry including the attributes as well as file handle.
3331  * If the incoming request is larger than this, then we are guaranteed to be
3332  * able to return at least one more directory entry if one exists.
3333  *
3334  * NFS3_READDIRPLUS_ENTRY is made up of the following:
3335  *
3336  * boolean - 1 * BYTES_PER_XDR_UNIT
3337  * file id - 2 * BYTES_PER_XDR_UNIT
3338  * directory name length - 1 * BYTES_PER_XDR_UNIT
3339  * cookie - 2 * BYTES_PER_XDR_UNIT
3340  * attribute flag - 1 * BYTES_PER_XDR_UNIT
3341  * attributes - NFS3_SIZEOF_FATTR3 * BYTES_PER_XDR_UNIT
3342  * status byte for file handle - 1 *  BYTES_PER_XDR_UNIT
3343  * length of a file handle - 1 * BYTES_PER_XDR_UNIT
3344  * Maximum length of a file handle (NFS3_MAXFHSIZE)
3345  * name length of the entry to the nearest bytes
3346  */
3347 #define NFS3_READDIRPLUS_ENTRY(namelen) \
3348         ((1 + 2 + 1 + 2 + 1 + NFS3_SIZEOF_FATTR3 + 1 + 1) * \
3349                 BYTES_PER_XDR_UNIT + \
3350         NFS3_MAXFHSIZE + roundup(namelen, BYTES_PER_XDR_UNIT))
3351 
3352 static int rfs3_readdir_unit = MAXBSIZE;
3353 
3354 /* ARGSUSED */
3355 void
3356 rfs3_readdirplus(READDIRPLUS3args *args, READDIRPLUS3res *resp,
3357         struct exportinfo *exi, struct svc_req *req, cred_t *cr)
3358 {
3359         int error;
3360         vnode_t *vp;
3361         struct vattr *vap;
3362         struct vattr va;
3363         struct iovec iov;
3364         struct uio uio;
3365         char *data;
3366         int iseof;
3367         struct dirent64 *dp;
3368         vnode_t *nvp;
3369         struct vattr *nvap;
3370         struct vattr nva;
3371         entryplus3_info *infop = NULL;
3372         int size = 0;
3373         int nents = 0;
3374         int bufsize = 0;
3375         int entrysize = 0;
3376         int tofit = 0;
3377         int rd_unit = rfs3_readdir_unit;
3378         int prev_len;
3379         int space_left;
3380         int i;
3381         uint_t *namlen = NULL;
3382         char *ndata = NULL;
3383         struct sockaddr *ca;
3384         size_t ret;
3385 
3386         vap = NULL;
3387 
3388         vp = nfs3_fhtovp(&args->dir, exi);
3389 
3390         DTRACE_NFSV3_4(op__readdirplus__start, struct svc_req *, req,
3391             cred_t *, cr, vnode_t *, vp, READDIRPLUS3args *, args);
3392 
3393         if (vp == NULL) {
3394                 error = ESTALE;
3395                 goto out;
3396         }
3397 
3398         if (is_system_labeled()) {
3399                 bslabel_t *clabel = req->rq_label;
3400 
3401                 ASSERT(clabel != NULL);
3402                 DTRACE_PROBE2(tx__rfs3__log__info__opreaddirplus__clabel,
3403                     char *, "got client label from request(1)",
3404                     struct svc_req *, req);
3405 
3406                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
3407                         if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
3408                             exi)) {
3409                                 resp->status = NFS3ERR_ACCES;
3410                                 goto out1;
3411                         }
3412                 }
3413         }
3414 
3415         (void) VOP_RWLOCK(vp, V_WRITELOCK_FALSE, NULL);
3416 
3417         va.va_mask = AT_ALL;
3418         vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3419 
3420         if (vp->v_type != VDIR) {
3421                 error = ENOTDIR;
3422                 goto out;
3423         }
3424 
3425         error = VOP_ACCESS(vp, VREAD, 0, cr, NULL);
3426         if (error)
3427                 goto out;
3428 
3429         /*
3430          * Don't allow arbitrary counts for allocation
3431          */
3432         if (args->maxcount > rfs3_tsize(req))
3433                 args->maxcount = rfs3_tsize(req);
3434 
3435         /*
3436          * Make sure that there is room to read at least one entry
3437          * if any are available
3438          */
3439         args->dircount = MIN(args->dircount, args->maxcount);
3440 
3441         if (args->dircount < DIRENT64_RECLEN(MAXNAMELEN))
3442                 args->dircount = DIRENT64_RECLEN(MAXNAMELEN);
3443 
3444         /*
3445          * This allocation relies on a minimum directory entry
3446          * being roughly 24 bytes.  Therefore, the namlen array
3447          * will have enough space based on the maximum number of
3448          * entries to read.
3449          */
3450         namlen = kmem_alloc(args->dircount, KM_SLEEP);
3451 
3452         space_left = args->dircount;
3453         data = kmem_alloc(args->dircount, KM_SLEEP);
3454         dp = (struct dirent64 *)data;
3455         uio.uio_iov = &iov;
3456         uio.uio_iovcnt = 1;
3457         uio.uio_segflg = UIO_SYSSPACE;
3458         uio.uio_extflg = UIO_COPY_CACHED;
3459         uio.uio_loffset = (offset_t)args->cookie;
3460 
3461         /*
3462          * bufsize is used to keep track of the size of the response as we
3463          * get post op attributes and filehandles for each entry.  This is
3464          * an optimization as the server may have read more entries than will
3465          * fit in the buffer specified by maxcount.  We stop calculating
3466          * post op attributes and filehandles once we have exceeded maxcount.
3467          * This will minimize the effect of truncation.
3468          *
3469          * It is primed with:
3470          *      1 for the status +
3471          *      1 for the dir_attributes.attributes boolean +
3472          *      2 for the cookie verifier
3473          * all times BYTES_PER_XDR_UNIT to convert from XDR units
3474          * to bytes.  If there are directory attributes to be
3475          * returned, then:
3476          *      NFS3_SIZEOF_FATTR3 for the dir_attributes.attr fattr3
3477          * time BYTES_PER_XDR_UNIT is added to account for them.
3478          */
3479         bufsize = (1 + 1 + 2) * BYTES_PER_XDR_UNIT;
3480         if (vap != NULL)
3481                 bufsize += NFS3_SIZEOF_FATTR3 * BYTES_PER_XDR_UNIT;
3482 
3483 getmoredents:
3484         /*
3485          * Here we make a check so that our read unit is not larger than
3486          * the space left in the buffer.
3487          */
3488         rd_unit = MIN(rd_unit, space_left);
3489         iov.iov_base = (char *)dp;
3490         iov.iov_len = rd_unit;
3491         uio.uio_resid = rd_unit;
3492         prev_len = rd_unit;
3493 
3494         error = VOP_READDIR(vp, &uio, cr, &iseof, NULL, 0);
3495 
3496         if (error) {
3497                 kmem_free(data, args->dircount);
3498                 goto out;
3499         }
3500 
3501         if (uio.uio_resid == prev_len && !iseof) {
3502                 if (nents == 0) {
3503                         kmem_free(data, args->dircount);
3504                         resp->status = NFS3ERR_TOOSMALL;
3505                         goto out1;
3506                 }
3507 
3508                 /*
3509                  * We could not get any more entries, so get the attributes
3510                  * and filehandle for the entries already obtained.
3511                  */
3512                 goto good;
3513         }
3514 
3515         /*
3516          * We estimate the size of the response by assuming the
3517          * entry exists and attributes and filehandle are also valid
3518          */
3519         for (size = prev_len - uio.uio_resid;
3520             size > 0;
3521             size -= dp->d_reclen, dp = nextdp(dp)) {
3522 
3523                 if (dp->d_ino == 0) {
3524                         nents++;
3525                         continue;
3526                 }
3527 
3528                 namlen[nents] = strlen(dp->d_name);
3529                 entrysize = NFS3_READDIRPLUS_ENTRY(namlen[nents]);
3530 
3531                 /*
3532                  * We need to check to see if the number of bytes left
3533                  * to go into the buffer will actually fit into the
3534                  * buffer.  This is calculated as the size of this
3535                  * entry plus:
3536                  *      1 for the true/false list indicator +
3537                  *      1 for the eof indicator
3538                  * times BYTES_PER_XDR_UNIT to convert from XDR units
3539                  * to bytes.
3540                  *
3541                  * Also check the dircount limit against the first entry read
3542                  *
3543                  */
3544                 tofit = entrysize + (1 + 1) * BYTES_PER_XDR_UNIT;
3545                 if (bufsize + tofit > args->maxcount) {
3546                         /*
3547                          * We make a check here to see if this was the
3548                          * first entry being measured.  If so, then maxcount
3549                          * was too small to begin with and so we need to
3550                          * return with NFS3ERR_TOOSMALL.
3551                          */
3552                         if (nents == 0) {
3553                                 kmem_free(data, args->dircount);
3554                                 resp->status = NFS3ERR_TOOSMALL;
3555                                 goto out1;
3556                         }
3557                         iseof = FALSE;
3558                         goto good;
3559                 }
3560                 bufsize += entrysize;
3561                 nents++;
3562         }
3563 
3564         /*
3565          * If there is enough room to fit at least 1 more entry including
3566          * post op attributes and filehandle in the buffer AND that we haven't
3567          * exceeded dircount then go back and get some more.
3568          */
3569         if (!iseof &&
3570             (args->maxcount - bufsize) >= NFS3_READDIRPLUS_ENTRY(MAXNAMELEN)) {
3571                 space_left -= (prev_len - uio.uio_resid);
3572                 if (space_left >= DIRENT64_RECLEN(MAXNAMELEN))
3573                         goto getmoredents;
3574 
3575                 /* else, fall through */
3576         }
3577 good:
3578         va.va_mask = AT_ALL;
3579         vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3580 
3581         VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
3582 
3583         infop = kmem_alloc(nents * sizeof (struct entryplus3_info), KM_SLEEP);
3584         resp->resok.infop = infop;
3585 
3586         dp = (struct dirent64 *)data;
3587         for (i = 0; i < nents; i++) {
3588 
3589                 if (dp->d_ino == 0) {
3590                         infop[i].attr.attributes = FALSE;
3591                         infop[i].fh.handle_follows = FALSE;
3592                         dp = nextdp(dp);
3593                         continue;
3594                 }
3595 
3596                 infop[i].namelen = namlen[i];
3597 
3598                 error = VOP_LOOKUP(vp, dp->d_name, &nvp, NULL, 0, NULL, cr,
3599                     NULL, NULL, NULL);
3600                 if (error) {
3601                         infop[i].attr.attributes = FALSE;
3602                         infop[i].fh.handle_follows = FALSE;
3603                         dp = nextdp(dp);
3604                         continue;
3605                 }
3606 
3607                 nva.va_mask = AT_ALL;
3608                 nvap = rfs4_delegated_getattr(nvp, &nva, 0, cr) ? NULL : &nva;
3609 
3610                 /* Lie about the object type for a referral */
3611                 if (vn_is_nfs_reparse(nvp, cr))
3612                         nvap->va_type = VLNK;
3613 
3614                 vattr_to_post_op_attr(nvap, &infop[i].attr);
3615 
3616                 error = makefh3(&infop[i].fh.handle, nvp, exi);
3617                 if (!error)
3618                         infop[i].fh.handle_follows = TRUE;
3619                 else
3620                         infop[i].fh.handle_follows = FALSE;
3621 
3622                 VN_RELE(nvp);
3623                 dp = nextdp(dp);
3624         }
3625 
3626         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
3627         ret = nfscmd_convdirplus(ca, exi, data, nents, args->dircount, &ndata);
3628         if (ndata == NULL)
3629                 ndata = data;
3630 
3631         if (ret > 0) {
3632                 /*
3633                  * We had to drop one or more entries in order to fit
3634                  * during the character conversion.  We need to patch
3635                  * up the size and eof info.
3636                  */
3637                 if (iseof)
3638                         iseof = FALSE;
3639 
3640                 ret = nfscmd_dropped_entrysize((struct dirent64 *)data,
3641                     nents, ret);
3642         }
3643 
3644 
3645 #if 0 /* notyet */
3646         /*
3647          * Don't do this.  It causes local disk writes when just
3648          * reading the file and the overhead is deemed larger
3649          * than the benefit.
3650          */
3651         /*
3652          * Force modified metadata out to stable storage.
3653          */
3654         (void) VOP_FSYNC(vp, FNODSYNC, cr, NULL);
3655 #endif
3656 
3657         kmem_free(namlen, args->dircount);
3658 
3659         resp->status = NFS3_OK;
3660         vattr_to_post_op_attr(vap, &resp->resok.dir_attributes);
3661         resp->resok.cookieverf = 0;
3662         resp->resok.reply.entries = (entryplus3 *)ndata;
3663         resp->resok.reply.eof = iseof;
3664         resp->resok.size = nents;
3665         resp->resok.count = args->dircount - ret;
3666         resp->resok.maxcount = args->maxcount;
3667 
3668         DTRACE_NFSV3_4(op__readdirplus__done, struct svc_req *, req,
3669             cred_t *, cr, vnode_t *, vp, READDIRPLUS3res *, resp);
3670         if (ndata != data)
3671                 kmem_free(data, args->dircount);
3672 
3673 
3674         VN_RELE(vp);
3675 
3676         return;
3677 
3678 out:
3679         if (curthread->t_flag & T_WOULDBLOCK) {
3680                 curthread->t_flag &= ~T_WOULDBLOCK;
3681                 resp->status = NFS3ERR_JUKEBOX;
3682         } else {
3683                 resp->status = puterrno3(error);
3684         }
3685 out1:
3686         DTRACE_NFSV3_4(op__readdirplus__done, struct svc_req *, req,
3687             cred_t *, cr, vnode_t *, vp, READDIRPLUS3res *, resp);
3688 
3689         if (vp != NULL) {
3690                 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
3691                 VN_RELE(vp);
3692         }
3693 
3694         if (namlen != NULL)
3695                 kmem_free(namlen, args->dircount);
3696 
3697         vattr_to_post_op_attr(vap, &resp->resfail.dir_attributes);
3698 }
3699 
3700 void *
3701 rfs3_readdirplus_getfh(READDIRPLUS3args *args)
3702 {
3703 
3704         return (&args->dir);
3705 }
3706 
3707 void
3708 rfs3_readdirplus_free(READDIRPLUS3res *resp)
3709 {
3710 
3711         if (resp->status == NFS3_OK) {
3712                 kmem_free(resp->resok.reply.entries, resp->resok.count);
3713                 kmem_free(resp->resok.infop,
3714                     resp->resok.size * sizeof (struct entryplus3_info));
3715         }
3716 }
3717 
3718 /* ARGSUSED */
3719 void
3720 rfs3_fsstat(FSSTAT3args *args, FSSTAT3res *resp, struct exportinfo *exi,
3721         struct svc_req *req, cred_t *cr)
3722 {
3723         int error;
3724         vnode_t *vp;
3725         struct vattr *vap;
3726         struct vattr va;
3727         struct statvfs64 sb;
3728 
3729         vap = NULL;
3730 
3731         vp = nfs3_fhtovp(&args->fsroot, exi);
3732 
3733         DTRACE_NFSV3_4(op__fsstat__start, struct svc_req *, req,
3734             cred_t *, cr, vnode_t *, vp, FSSTAT3args *, args);
3735 
3736         if (vp == NULL) {
3737                 error = ESTALE;
3738                 goto out;
3739         }
3740 
3741         if (is_system_labeled()) {
3742                 bslabel_t *clabel = req->rq_label;
3743 
3744                 ASSERT(clabel != NULL);
3745                 DTRACE_PROBE2(tx__rfs3__log__info__opfsstat__clabel, char *,
3746                     "got client label from request(1)", struct svc_req *, req);
3747 
3748                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
3749                         if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
3750                             exi)) {
3751                                 resp->status = NFS3ERR_ACCES;
3752                                 goto out1;
3753                         }
3754                 }
3755         }
3756 
3757         error = VFS_STATVFS(vp->v_vfsp, &sb);
3758 
3759         va.va_mask = AT_ALL;
3760         vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3761 
3762         if (error)
3763                 goto out;
3764 
3765         resp->status = NFS3_OK;
3766         vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
3767         if (sb.f_blocks != (fsblkcnt64_t)-1)
3768                 resp->resok.tbytes = (size3)sb.f_frsize * (size3)sb.f_blocks;
3769         else
3770                 resp->resok.tbytes = (size3)sb.f_blocks;
3771         if (sb.f_bfree != (fsblkcnt64_t)-1)
3772                 resp->resok.fbytes = (size3)sb.f_frsize * (size3)sb.f_bfree;
3773         else
3774                 resp->resok.fbytes = (size3)sb.f_bfree;
3775         if (sb.f_bavail != (fsblkcnt64_t)-1)
3776                 resp->resok.abytes = (size3)sb.f_frsize * (size3)sb.f_bavail;
3777         else
3778                 resp->resok.abytes = (size3)sb.f_bavail;
3779         resp->resok.tfiles = (size3)sb.f_files;
3780         resp->resok.ffiles = (size3)sb.f_ffree;
3781         resp->resok.afiles = (size3)sb.f_favail;
3782         resp->resok.invarsec = 0;
3783 
3784         DTRACE_NFSV3_4(op__fsstat__done, struct svc_req *, req,
3785             cred_t *, cr, vnode_t *, vp, FSSTAT3res *, resp);
3786         VN_RELE(vp);
3787 
3788         return;
3789 
3790 out:
3791         if (curthread->t_flag & T_WOULDBLOCK) {
3792                 curthread->t_flag &= ~T_WOULDBLOCK;
3793                 resp->status = NFS3ERR_JUKEBOX;
3794         } else
3795                 resp->status = puterrno3(error);
3796 out1:
3797         DTRACE_NFSV3_4(op__fsstat__done, struct svc_req *, req,
3798             cred_t *, cr, vnode_t *, vp, FSSTAT3res *, resp);
3799 
3800         if (vp != NULL)
3801                 VN_RELE(vp);
3802         vattr_to_post_op_attr(vap, &resp->resfail.obj_attributes);
3803 }
3804 
3805 void *
3806 rfs3_fsstat_getfh(FSSTAT3args *args)
3807 {
3808 
3809         return (&args->fsroot);
3810 }
3811 
3812 void
3813 rfs3_fsinfo(FSINFO3args *args, FSINFO3res *resp, struct exportinfo *exi,
3814         struct svc_req *req, cred_t *cr)
3815 {
3816         vnode_t *vp;
3817         struct vattr *vap;
3818         struct vattr va;
3819         uint32_t xfer_size;
3820         ulong_t l = 0;
3821         int error;
3822 
3823         vp = nfs3_fhtovp(&args->fsroot, exi);
3824 
3825         DTRACE_NFSV3_4(op__fsinfo__start, struct svc_req *, req,
3826             cred_t *, cr, vnode_t *, vp, FSINFO3args *, args);
3827 
3828         if (vp == NULL) {
3829                 if (curthread->t_flag & T_WOULDBLOCK) {
3830                         curthread->t_flag &= ~T_WOULDBLOCK;
3831                         resp->status = NFS3ERR_JUKEBOX;
3832                 } else
3833                         resp->status = NFS3ERR_STALE;
3834                 vattr_to_post_op_attr(NULL, &resp->resfail.obj_attributes);
3835                 goto out;
3836         }
3837 
3838         if (is_system_labeled()) {
3839                 bslabel_t *clabel = req->rq_label;
3840 
3841                 ASSERT(clabel != NULL);
3842                 DTRACE_PROBE2(tx__rfs3__log__info__opfsinfo__clabel, char *,
3843                     "got client label from request(1)", struct svc_req *, req);
3844 
3845                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
3846                         if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
3847                             exi)) {
3848                                 resp->status = NFS3ERR_STALE;
3849                                 vattr_to_post_op_attr(NULL,
3850                                     &resp->resfail.obj_attributes);
3851                                 goto out;
3852                         }
3853                 }
3854         }
3855 
3856         va.va_mask = AT_ALL;
3857         vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3858 
3859         resp->status = NFS3_OK;
3860         vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
3861         xfer_size = rfs3_tsize(req);
3862         resp->resok.rtmax = xfer_size;
3863         resp->resok.rtpref = xfer_size;
3864         resp->resok.rtmult = DEV_BSIZE;
3865         resp->resok.wtmax = xfer_size;
3866         resp->resok.wtpref = xfer_size;
3867         resp->resok.wtmult = DEV_BSIZE;
3868         resp->resok.dtpref = MAXBSIZE;
3869 
3870         /*
3871          * Large file spec: want maxfilesize based on limit of
3872          * underlying filesystem.  We can guess 2^31-1 if need be.
3873          */
3874         error = VOP_PATHCONF(vp, _PC_FILESIZEBITS, &l, cr, NULL);
3875         if (error) {
3876                 resp->status = puterrno3(error);
3877                 goto out;
3878         }
3879 
3880         /*
3881          * If the underlying file system does not support _PC_FILESIZEBITS,
3882          * return a reasonable default. Note that error code on VOP_PATHCONF
3883          * will be 0, even if the underlying file system does not support
3884          * _PC_FILESIZEBITS.
3885          */
3886         if (l == (ulong_t)-1) {
3887                 resp->resok.maxfilesize = MAXOFF32_T;
3888         } else {
3889                 if (l >= (sizeof (uint64_t) * 8))
3890                         resp->resok.maxfilesize = INT64_MAX;
3891                 else
3892                         resp->resok.maxfilesize = (1LL << (l-1)) - 1;
3893         }
3894 
3895         resp->resok.time_delta.seconds = 0;
3896         resp->resok.time_delta.nseconds = 1000;
3897         resp->resok.properties = FSF3_LINK | FSF3_SYMLINK |
3898             FSF3_HOMOGENEOUS | FSF3_CANSETTIME;
3899 
3900         DTRACE_NFSV3_4(op__fsinfo__done, struct svc_req *, req,
3901             cred_t *, cr, vnode_t *, vp, FSINFO3res *, resp);
3902 
3903         VN_RELE(vp);
3904 
3905         return;
3906 
3907 out:
3908         DTRACE_NFSV3_4(op__fsinfo__done, struct svc_req *, req,
3909             cred_t *, cr, vnode_t *, NULL, FSINFO3res *, resp);
3910         if (vp != NULL)
3911                 VN_RELE(vp);
3912 }
3913 
3914 void *
3915 rfs3_fsinfo_getfh(FSINFO3args *args)
3916 {
3917 
3918         return (&args->fsroot);
3919 }
3920 
3921 /* ARGSUSED */
3922 void
3923 rfs3_pathconf(PATHCONF3args *args, PATHCONF3res *resp, struct exportinfo *exi,
3924         struct svc_req *req, cred_t *cr)
3925 {
3926         int error;
3927         vnode_t *vp;
3928         struct vattr *vap;
3929         struct vattr va;
3930         ulong_t val;
3931 
3932         vap = NULL;
3933 
3934         vp = nfs3_fhtovp(&args->object, exi);
3935 
3936         DTRACE_NFSV3_4(op__pathconf__start, struct svc_req *, req,
3937             cred_t *, cr, vnode_t *, vp, PATHCONF3args *, args);
3938 
3939         if (vp == NULL) {
3940                 error = ESTALE;
3941                 goto out;
3942         }
3943 
3944         if (is_system_labeled()) {
3945                 bslabel_t *clabel = req->rq_label;
3946 
3947                 ASSERT(clabel != NULL);
3948                 DTRACE_PROBE2(tx__rfs3__log__info__oppathconf__clabel, char *,
3949                     "got client label from request(1)", struct svc_req *, req);
3950 
3951                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
3952                         if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
3953                             exi)) {
3954                                 resp->status = NFS3ERR_ACCES;
3955                                 goto out1;
3956                         }
3957                 }
3958         }
3959 
3960         va.va_mask = AT_ALL;
3961         vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3962 
3963         error = VOP_PATHCONF(vp, _PC_LINK_MAX, &val, cr, NULL);
3964         if (error)
3965                 goto out;
3966         resp->resok.info.link_max = (uint32)val;
3967 
3968         error = VOP_PATHCONF(vp, _PC_NAME_MAX, &val, cr, NULL);
3969         if (error)
3970                 goto out;
3971         resp->resok.info.name_max = (uint32)val;
3972 
3973         error = VOP_PATHCONF(vp, _PC_NO_TRUNC, &val, cr, NULL);
3974         if (error)
3975                 goto out;
3976         if (val == 1)
3977                 resp->resok.info.no_trunc = TRUE;
3978         else
3979                 resp->resok.info.no_trunc = FALSE;
3980 
3981         error = VOP_PATHCONF(vp, _PC_CHOWN_RESTRICTED, &val, cr, NULL);
3982         if (error)
3983                 goto out;
3984         if (val == 1)
3985                 resp->resok.info.chown_restricted = TRUE;
3986         else
3987                 resp->resok.info.chown_restricted = FALSE;
3988 
3989         resp->status = NFS3_OK;
3990         vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
3991         resp->resok.info.case_insensitive = FALSE;
3992         resp->resok.info.case_preserving = TRUE;
3993         DTRACE_NFSV3_4(op__pathconf__done, struct svc_req *, req,
3994             cred_t *, cr, vnode_t *, vp, PATHCONF3res *, resp);
3995         VN_RELE(vp);
3996         return;
3997 
3998 out:
3999         if (curthread->t_flag & T_WOULDBLOCK) {
4000                 curthread->t_flag &= ~T_WOULDBLOCK;
4001                 resp->status = NFS3ERR_JUKEBOX;
4002         } else
4003                 resp->status = puterrno3(error);
4004 out1:
4005         DTRACE_NFSV3_4(op__pathconf__done, struct svc_req *, req,
4006             cred_t *, cr, vnode_t *, vp, PATHCONF3res *, resp);
4007         if (vp != NULL)
4008                 VN_RELE(vp);
4009         vattr_to_post_op_attr(vap, &resp->resfail.obj_attributes);
4010 }
4011 
4012 void *
4013 rfs3_pathconf_getfh(PATHCONF3args *args)
4014 {
4015 
4016         return (&args->object);
4017 }
4018 
4019 void
4020 rfs3_commit(COMMIT3args *args, COMMIT3res *resp, struct exportinfo *exi,
4021         struct svc_req *req, cred_t *cr)
4022 {
4023         int error;
4024         vnode_t *vp;
4025         struct vattr *bvap;
4026         struct vattr bva;
4027         struct vattr *avap;
4028         struct vattr ava;
4029 
4030         bvap = NULL;
4031         avap = NULL;
4032 
4033         vp = nfs3_fhtovp(&args->file, exi);
4034 
4035         DTRACE_NFSV3_4(op__commit__start, struct svc_req *, req,
4036             cred_t *, cr, vnode_t *, vp, COMMIT3args *, args);
4037 
4038         if (vp == NULL) {
4039                 error = ESTALE;
4040                 goto out;
4041         }
4042 
4043         bva.va_mask = AT_ALL;
4044         error = VOP_GETATTR(vp, &bva, 0, cr, NULL);
4045 
4046         /*
4047          * If we can't get the attributes, then we can't do the
4048          * right access checking.  So, we'll fail the request.
4049          */
4050         if (error)
4051                 goto out;
4052 
4053         bvap = &bva;
4054 
4055         if (rdonly(exi, req)) {
4056                 resp->status = NFS3ERR_ROFS;
4057                 goto out1;
4058         }
4059 
4060         if (vp->v_type != VREG) {
4061                 resp->status = NFS3ERR_INVAL;
4062                 goto out1;
4063         }
4064 
4065         if (is_system_labeled()) {
4066                 bslabel_t *clabel = req->rq_label;
4067 
4068                 ASSERT(clabel != NULL);
4069                 DTRACE_PROBE2(tx__rfs3__log__info__opcommit__clabel, char *,
4070                     "got client label from request(1)", struct svc_req *, req);
4071 
4072                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
4073                         if (!do_rfs_label_check(clabel, vp, EQUALITY_CHECK,
4074                             exi)) {
4075                                 resp->status = NFS3ERR_ACCES;
4076                                 goto out1;
4077                         }
4078                 }
4079         }
4080 
4081         if (crgetuid(cr) != bva.va_uid &&
4082             (error = VOP_ACCESS(vp, VWRITE, 0, cr, NULL)))
4083                 goto out;
4084 
4085         error = VOP_FSYNC(vp, FSYNC, cr, NULL);
4086 
4087         ava.va_mask = AT_ALL;
4088         avap = VOP_GETATTR(vp, &ava, 0, cr, NULL) ? NULL : &ava;
4089 
4090         if (error)
4091                 goto out;
4092 
4093         resp->status = NFS3_OK;
4094         vattr_to_wcc_data(bvap, avap, &resp->resok.file_wcc);
4095         resp->resok.verf = write3verf;
4096 
4097         DTRACE_NFSV3_4(op__commit__done, struct svc_req *, req,
4098             cred_t *, cr, vnode_t *, vp, COMMIT3res *, resp);
4099 
4100         VN_RELE(vp);
4101 
4102         return;
4103 
4104 out:
4105         if (curthread->t_flag & T_WOULDBLOCK) {
4106                 curthread->t_flag &= ~T_WOULDBLOCK;
4107                 resp->status = NFS3ERR_JUKEBOX;
4108         } else
4109                 resp->status = puterrno3(error);
4110 out1:
4111         DTRACE_NFSV3_4(op__commit__done, struct svc_req *, req,
4112             cred_t *, cr, vnode_t *, vp, COMMIT3res *, resp);
4113 
4114         if (vp != NULL)
4115                 VN_RELE(vp);
4116         vattr_to_wcc_data(bvap, avap, &resp->resfail.file_wcc);
4117 }
4118 
4119 void *
4120 rfs3_commit_getfh(COMMIT3args *args)
4121 {
4122 
4123         return (&args->file);
4124 }
4125 
4126 static int
4127 sattr3_to_vattr(sattr3 *sap, struct vattr *vap)
4128 {
4129 
4130         vap->va_mask = 0;
4131 
4132         if (sap->mode.set_it) {
4133                 vap->va_mode = (mode_t)sap->mode.mode;
4134                 vap->va_mask |= AT_MODE;
4135         }
4136         if (sap->uid.set_it) {
4137                 vap->va_uid = (uid_t)sap->uid.uid;
4138                 vap->va_mask |= AT_UID;
4139         }
4140         if (sap->gid.set_it) {
4141                 vap->va_gid = (gid_t)sap->gid.gid;
4142                 vap->va_mask |= AT_GID;
4143         }
4144         if (sap->size.set_it) {
4145                 if (sap->size.size > (size3)((u_longlong_t)-1))
4146                         return (EINVAL);
4147                 vap->va_size = sap->size.size;
4148                 vap->va_mask |= AT_SIZE;
4149         }
4150         if (sap->atime.set_it == SET_TO_CLIENT_TIME) {
4151 #ifndef _LP64
4152                 /* check time validity */
4153                 if (!NFS3_TIME_OK(sap->atime.atime.seconds))
4154                         return (EOVERFLOW);
4155 #endif
4156                 /*
4157                  * nfs protocol defines times as unsigned so don't extend sign,
4158                  * unless sysadmin set nfs_allow_preepoch_time.
4159                  */
4160                 NFS_TIME_T_CONVERT(vap->va_atime.tv_sec,
4161                     sap->atime.atime.seconds);
4162                 vap->va_atime.tv_nsec = (uint32_t)sap->atime.atime.nseconds;
4163                 vap->va_mask |= AT_ATIME;
4164         } else if (sap->atime.set_it == SET_TO_SERVER_TIME) {
4165                 gethrestime(&vap->va_atime);
4166                 vap->va_mask |= AT_ATIME;
4167         }
4168         if (sap->mtime.set_it == SET_TO_CLIENT_TIME) {
4169 #ifndef _LP64
4170                 /* check time validity */
4171                 if (!NFS3_TIME_OK(sap->mtime.mtime.seconds))
4172                         return (EOVERFLOW);
4173 #endif
4174                 /*
4175                  * nfs protocol defines times as unsigned so don't extend sign,
4176                  * unless sysadmin set nfs_allow_preepoch_time.
4177                  */
4178                 NFS_TIME_T_CONVERT(vap->va_mtime.tv_sec,
4179                     sap->mtime.mtime.seconds);
4180                 vap->va_mtime.tv_nsec = (uint32_t)sap->mtime.mtime.nseconds;
4181                 vap->va_mask |= AT_MTIME;
4182         } else if (sap->mtime.set_it == SET_TO_SERVER_TIME) {
4183                 gethrestime(&vap->va_mtime);
4184                 vap->va_mask |= AT_MTIME;
4185         }
4186 
4187         return (0);
4188 }
4189 
4190 static ftype3 vt_to_nf3[] = {
4191         0, NF3REG, NF3DIR, NF3BLK, NF3CHR, NF3LNK, NF3FIFO, 0, 0, NF3SOCK, 0
4192 };
4193 
4194 static int
4195 vattr_to_fattr3(struct vattr *vap, fattr3 *fap)
4196 {
4197 
4198         ASSERT(vap->va_type >= VNON && vap->va_type <= VBAD);
4199         /* Return error if time or size overflow */
4200         if (! (NFS_VAP_TIME_OK(vap) && NFS3_SIZE_OK(vap->va_size))) {
4201                 return (EOVERFLOW);
4202         }
4203         fap->type = vt_to_nf3[vap->va_type];
4204         fap->mode = (mode3)(vap->va_mode & MODEMASK);
4205         fap->nlink = (uint32)vap->va_nlink;
4206         if (vap->va_uid == UID_NOBODY)
4207                 fap->uid = (uid3)NFS_UID_NOBODY;
4208         else
4209                 fap->uid = (uid3)vap->va_uid;
4210         if (vap->va_gid == GID_NOBODY)
4211                 fap->gid = (gid3)NFS_GID_NOBODY;
4212         else
4213                 fap->gid = (gid3)vap->va_gid;
4214         fap->size = (size3)vap->va_size;
4215         fap->used = (size3)DEV_BSIZE * (size3)vap->va_nblocks;
4216         fap->rdev.specdata1 = (uint32)getmajor(vap->va_rdev);
4217         fap->rdev.specdata2 = (uint32)getminor(vap->va_rdev);
4218         fap->fsid = (uint64)vap->va_fsid;
4219         fap->fileid = (fileid3)vap->va_nodeid;
4220         fap->atime.seconds = vap->va_atime.tv_sec;
4221         fap->atime.nseconds = vap->va_atime.tv_nsec;
4222         fap->mtime.seconds = vap->va_mtime.tv_sec;
4223         fap->mtime.nseconds = vap->va_mtime.tv_nsec;
4224         fap->ctime.seconds = vap->va_ctime.tv_sec;
4225         fap->ctime.nseconds = vap->va_ctime.tv_nsec;
4226         return (0);
4227 }
4228 
4229 static int
4230 vattr_to_wcc_attr(struct vattr *vap, wcc_attr *wccap)
4231 {
4232 
4233         /* Return error if time or size overflow */
4234         if (!(NFS_TIME_T_OK(vap->va_mtime.tv_sec) &&
4235             NFS_TIME_T_OK(vap->va_ctime.tv_sec) &&
4236             NFS3_SIZE_OK(vap->va_size))) {
4237                 return (EOVERFLOW);
4238         }
4239         wccap->size = (size3)vap->va_size;
4240         wccap->mtime.seconds = vap->va_mtime.tv_sec;
4241         wccap->mtime.nseconds = vap->va_mtime.tv_nsec;
4242         wccap->ctime.seconds = vap->va_ctime.tv_sec;
4243         wccap->ctime.nseconds = vap->va_ctime.tv_nsec;
4244         return (0);
4245 }
4246 
4247 static void
4248 vattr_to_pre_op_attr(struct vattr *vap, pre_op_attr *poap)
4249 {
4250 
4251         /* don't return attrs if time overflow */
4252         if ((vap != NULL) && !vattr_to_wcc_attr(vap, &poap->attr)) {
4253                 poap->attributes = TRUE;
4254         } else
4255                 poap->attributes = FALSE;
4256 }
4257 
4258 void
4259 vattr_to_post_op_attr(struct vattr *vap, post_op_attr *poap)
4260 {
4261 
4262         /* don't return attrs if time overflow */
4263         if ((vap != NULL) && !vattr_to_fattr3(vap, &poap->attr)) {
4264                 poap->attributes = TRUE;
4265         } else
4266                 poap->attributes = FALSE;
4267 }
4268 
4269 static void
4270 vattr_to_wcc_data(struct vattr *bvap, struct vattr *avap, wcc_data *wccp)
4271 {
4272 
4273         vattr_to_pre_op_attr(bvap, &wccp->before);
4274         vattr_to_post_op_attr(avap, &wccp->after);
4275 }
4276 
4277 void
4278 rfs3_srvrinit(void)
4279 {
4280         struct rfs3_verf_overlay {
4281                 uint_t id; /* a "unique" identifier */
4282                 int ts; /* a unique timestamp */
4283         } *verfp;
4284         timestruc_t now;
4285 
4286         /*
4287          * The following algorithm attempts to find a unique verifier
4288          * to be used as the write verifier returned from the server
4289          * to the client.  It is important that this verifier change
4290          * whenever the server reboots.  Of secondary importance, it
4291          * is important for the verifier to be unique between two
4292          * different servers.
4293          *
4294          * Thus, an attempt is made to use the system hostid and the
4295          * current time in seconds when the nfssrv kernel module is
4296          * loaded.  It is assumed that an NFS server will not be able
4297          * to boot and then to reboot in less than a second.  If the
4298          * hostid has not been set, then the current high resolution
4299          * time is used.  This will ensure different verifiers each
4300          * time the server reboots and minimize the chances that two
4301          * different servers will have the same verifier.
4302          */
4303 
4304 #ifndef lint
4305         /*
4306          * We ASSERT that this constant logic expression is
4307          * always true because in the past, it wasn't.
4308          */
4309         ASSERT(sizeof (*verfp) <= sizeof (write3verf));
4310 #endif
4311 
4312         gethrestime(&now);
4313         verfp = (struct rfs3_verf_overlay *)&write3verf;
4314         verfp->ts = (int)now.tv_sec;
4315         verfp->id = zone_get_hostid(NULL);
4316 
4317         if (verfp->id == 0)
4318                 verfp->id = (uint_t)now.tv_nsec;
4319 
4320         nfs3_srv_caller_id = fs_new_caller_id();
4321 
4322 }
4323 
4324 static int
4325 rdma_setup_read_data3(READ3args *args, READ3resok *rok)
4326 {
4327         struct clist    *wcl;
4328         int             wlist_len;
4329         count3          count = rok->count;
4330 
4331         wcl = args->wlist;
4332         if (rdma_setup_read_chunks(wcl, count, &wlist_len) == FALSE) {
4333                 return (FALSE);
4334         }
4335 
4336         wcl = args->wlist;
4337         rok->wlist_len = wlist_len;
4338         rok->wlist = wcl;
4339         return (TRUE);
4340 }
4341 
4342 void
4343 rfs3_srvrfini(void)
4344 {
4345         /* Nothing to do */
4346 }