1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright (c) 1994, 2010, Oracle and/or its affiliates. All rights reserved.
  23  */
  24 
  25 /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */
  26 /* All Rights Reserved */
  27 
  28 #include <sys/param.h>
  29 #include <sys/types.h>
  30 #include <sys/systm.h>
  31 #include <sys/cred.h>
  32 #include <sys/buf.h>
  33 #include <sys/vfs.h>
  34 #include <sys/vnode.h>
  35 #include <sys/uio.h>
  36 #include <sys/errno.h>
  37 #include <sys/sysmacros.h>
  38 #include <sys/statvfs.h>
  39 #include <sys/kmem.h>
  40 #include <sys/dirent.h>
  41 #include <sys/cmn_err.h>
  42 #include <sys/debug.h>
  43 #include <sys/systeminfo.h>
  44 #include <sys/flock.h>
  45 #include <sys/nbmlock.h>
  46 #include <sys/policy.h>
  47 #include <sys/sdt.h>
  48 
  49 #include <rpc/types.h>
  50 #include <rpc/auth.h>
  51 #include <rpc/svc.h>
  52 #include <rpc/rpc_rdma.h>
  53 
  54 #include <nfs/nfs.h>
  55 #include <nfs/export.h>
  56 #include <nfs/nfs_cmd.h>
  57 
  58 #include <sys/strsubr.h>
  59 
  60 #include <sys/tsol/label.h>
  61 #include <sys/tsol/tndb.h>
  62 
  63 #include <sys/zone.h>
  64 
  65 #include <inet/ip.h>
  66 #include <inet/ip6.h>
  67 
  68 /*
  69  * These are the interface routines for the server side of the
  70  * Network File System.  See the NFS version 3 protocol specification
  71  * for a description of this interface.
  72  */
  73 
  74 #ifdef DEBUG
  75 int rfs3_do_pre_op_attr = 1;
  76 int rfs3_do_post_op_attr = 1;
  77 int rfs3_do_post_op_fh3 = 1;
  78 #endif
  79 
  80 static writeverf3 write3verf;
  81 
  82 static int      sattr3_to_vattr(sattr3 *, struct vattr *);
  83 static int      vattr_to_fattr3(struct vattr *, fattr3 *);
  84 static int      vattr_to_wcc_attr(struct vattr *, wcc_attr *);
  85 static void     vattr_to_pre_op_attr(struct vattr *, pre_op_attr *);
  86 static void     vattr_to_wcc_data(struct vattr *, struct vattr *, wcc_data *);
  87 static int      rdma_setup_read_data3(READ3args *, READ3resok *);
  88 
  89 extern int nfs_loaned_buffers;
  90 
  91 u_longlong_t nfs3_srv_caller_id;
  92 
  93 /* ARGSUSED */
  94 void
  95 rfs3_getattr(GETATTR3args *args, GETATTR3res *resp, struct exportinfo *exi,
  96         struct svc_req *req, cred_t *cr)
  97 {
  98         int error;
  99         vnode_t *vp;
 100         struct vattr va;
 101 
 102         vp = nfs3_fhtovp(&args->object, exi);
 103 
 104         DTRACE_NFSV3_4(op__getattr__start, struct svc_req *, req,
 105             cred_t *, cr, vnode_t *, vp, GETATTR3args *, args);
 106 
 107         if (vp == NULL) {
 108                 error = ESTALE;
 109                 goto out;
 110         }
 111 
 112         va.va_mask = AT_ALL;
 113         error = rfs4_delegated_getattr(vp, &va, 0, cr);
 114 
 115         if (!error) {
 116                 /* Lie about the object type for a referral */
 117                 if (vn_is_nfs_reparse(vp, cr))
 118                         va.va_type = VLNK;
 119 
 120                 /* overflow error if time or size is out of range */
 121                 error = vattr_to_fattr3(&va, &resp->resok.obj_attributes);
 122                 if (error)
 123                         goto out;
 124                 resp->status = NFS3_OK;
 125 
 126                 DTRACE_NFSV3_4(op__getattr__done, struct svc_req *, req,
 127                     cred_t *, cr, vnode_t *, vp, GETATTR3res *, resp);
 128 
 129                 VN_RELE(vp);
 130 
 131                 return;
 132         }
 133 
 134 out:
 135         if (curthread->t_flag & T_WOULDBLOCK) {
 136                 curthread->t_flag &= ~T_WOULDBLOCK;
 137                 resp->status = NFS3ERR_JUKEBOX;
 138         } else
 139                 resp->status = puterrno3(error);
 140 
 141         DTRACE_NFSV3_4(op__getattr__done, struct svc_req *, req,
 142             cred_t *, cr, vnode_t *, vp, GETATTR3res *, resp);
 143 
 144         if (vp != NULL)
 145                 VN_RELE(vp);
 146 }
 147 
 148 void *
 149 rfs3_getattr_getfh(GETATTR3args *args)
 150 {
 151 
 152         return (&args->object);
 153 }
 154 
 155 void
 156 rfs3_setattr(SETATTR3args *args, SETATTR3res *resp, struct exportinfo *exi,
 157         struct svc_req *req, cred_t *cr)
 158 {
 159         int error;
 160         vnode_t *vp;
 161         struct vattr *bvap;
 162         struct vattr bva;
 163         struct vattr *avap;
 164         struct vattr ava;
 165         int flag;
 166         int in_crit = 0;
 167         struct flock64 bf;
 168         caller_context_t ct;
 169 
 170         bvap = NULL;
 171         avap = NULL;
 172 
 173         vp = nfs3_fhtovp(&args->object, exi);
 174 
 175         DTRACE_NFSV3_4(op__setattr__start, struct svc_req *, req,
 176             cred_t *, cr, vnode_t *, vp, SETATTR3args *, args);
 177 
 178         if (vp == NULL) {
 179                 error = ESTALE;
 180                 goto out;
 181         }
 182 
 183         error = sattr3_to_vattr(&args->new_attributes, &ava);
 184         if (error)
 185                 goto out;
 186 
 187         if (is_system_labeled()) {
 188                 bslabel_t *clabel = req->rq_label;
 189 
 190                 ASSERT(clabel != NULL);
 191                 DTRACE_PROBE2(tx__rfs3__log__info__opsetattr__clabel, char *,
 192                     "got client label from request(1)", struct svc_req *, req);
 193 
 194                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
 195                         if (!do_rfs_label_check(clabel, vp, EQUALITY_CHECK,
 196                             exi)) {
 197                                 resp->status = NFS3ERR_ACCES;
 198                                 goto out1;
 199                         }
 200                 }
 201         }
 202 
 203         /*
 204          * We need to specially handle size changes because of
 205          * possible conflicting NBMAND locks. Get into critical
 206          * region before VOP_GETATTR, so the size attribute is
 207          * valid when checking conflicts.
 208          *
 209          * Also, check to see if the v4 side of the server has
 210          * delegated this file.  If so, then we return JUKEBOX to
 211          * allow the client to retrasmit its request.
 212          */
 213         if (vp->v_type == VREG && (ava.va_mask & AT_SIZE)) {
 214                 if (nbl_need_check(vp)) {
 215                         nbl_start_crit(vp, RW_READER);
 216                         in_crit = 1;
 217                 }
 218         }
 219 
 220         bva.va_mask = AT_ALL;
 221         error = rfs4_delegated_getattr(vp, &bva, 0, cr);
 222 
 223         /*
 224          * If we can't get the attributes, then we can't do the
 225          * right access checking.  So, we'll fail the request.
 226          */
 227         if (error)
 228                 goto out;
 229 
 230 #ifdef DEBUG
 231         if (rfs3_do_pre_op_attr)
 232                 bvap = &bva;
 233 #else
 234         bvap = &bva;
 235 #endif
 236 
 237         if (rdonly(exi, req) || vn_is_readonly(vp)) {
 238                 resp->status = NFS3ERR_ROFS;
 239                 goto out1;
 240         }
 241 
 242         if (args->guard.check &&
 243             (args->guard.obj_ctime.seconds != bva.va_ctime.tv_sec ||
 244             args->guard.obj_ctime.nseconds != bva.va_ctime.tv_nsec)) {
 245                 resp->status = NFS3ERR_NOT_SYNC;
 246                 goto out1;
 247         }
 248 
 249         if (args->new_attributes.mtime.set_it == SET_TO_CLIENT_TIME)
 250                 flag = ATTR_UTIME;
 251         else
 252                 flag = 0;
 253 
 254         /*
 255          * If the filesystem is exported with nosuid, then mask off
 256          * the setuid and setgid bits.
 257          */
 258         if ((ava.va_mask & AT_MODE) && vp->v_type == VREG &&
 259             (exi->exi_export.ex_flags & EX_NOSUID))
 260                 ava.va_mode &= ~(VSUID | VSGID);
 261 
 262         ct.cc_sysid = 0;
 263         ct.cc_pid = 0;
 264         ct.cc_caller_id = nfs3_srv_caller_id;
 265         ct.cc_flags = CC_DONTBLOCK;
 266 
 267         /*
 268          * We need to specially handle size changes because it is
 269          * possible for the client to create a file with modes
 270          * which indicate read-only, but with the file opened for
 271          * writing.  If the client then tries to set the size of
 272          * the file, then the normal access checking done in
 273          * VOP_SETATTR would prevent the client from doing so,
 274          * although it should be legal for it to do so.  To get
 275          * around this, we do the access checking for ourselves
 276          * and then use VOP_SPACE which doesn't do the access
 277          * checking which VOP_SETATTR does. VOP_SPACE can only
 278          * operate on VREG files, let VOP_SETATTR handle the other
 279          * extremely rare cases.
 280          * Also the client should not be allowed to change the
 281          * size of the file if there is a conflicting non-blocking
 282          * mandatory lock in the region the change.
 283          */
 284         if (vp->v_type == VREG && (ava.va_mask & AT_SIZE)) {
 285                 if (in_crit) {
 286                         u_offset_t offset;
 287                         ssize_t length;
 288 
 289                         if (ava.va_size < bva.va_size) {
 290                                 offset = ava.va_size;
 291                                 length = bva.va_size - ava.va_size;
 292                         } else {
 293                                 offset = bva.va_size;
 294                                 length = ava.va_size - bva.va_size;
 295                         }
 296                         if (nbl_conflict(vp, NBL_WRITE, offset, length, 0,
 297                             NULL)) {
 298                                 error = EACCES;
 299                                 goto out;
 300                         }
 301                 }
 302 
 303                 if (crgetuid(cr) == bva.va_uid && ava.va_size != bva.va_size) {
 304                         ava.va_mask &= ~AT_SIZE;
 305                         bf.l_type = F_WRLCK;
 306                         bf.l_whence = 0;
 307                         bf.l_start = (off64_t)ava.va_size;
 308                         bf.l_len = 0;
 309                         bf.l_sysid = 0;
 310                         bf.l_pid = 0;
 311                         error = VOP_SPACE(vp, F_FREESP, &bf, FWRITE,
 312                             (offset_t)ava.va_size, cr, &ct);
 313                 }
 314         }
 315 
 316         if (!error && ava.va_mask)
 317                 error = VOP_SETATTR(vp, &ava, flag, cr, &ct);
 318 
 319         /* check if a monitor detected a delegation conflict */
 320         if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
 321                 resp->status = NFS3ERR_JUKEBOX;
 322                 goto out1;
 323         }
 324 
 325 #ifdef DEBUG
 326         if (rfs3_do_post_op_attr) {
 327                 ava.va_mask = AT_ALL;
 328                 avap = rfs4_delegated_getattr(vp, &ava, 0, cr) ? NULL : &ava;
 329         } else
 330                 avap = NULL;
 331 #else
 332         ava.va_mask = AT_ALL;
 333         avap = rfs4_delegated_getattr(vp, &ava, 0, cr) ? NULL : &ava;
 334 #endif
 335 
 336         /*
 337          * Force modified metadata out to stable storage.
 338          */
 339         (void) VOP_FSYNC(vp, FNODSYNC, cr, &ct);
 340 
 341         if (error)
 342                 goto out;
 343 
 344         if (in_crit)
 345                 nbl_end_crit(vp);
 346 
 347         resp->status = NFS3_OK;
 348         vattr_to_wcc_data(bvap, avap, &resp->resok.obj_wcc);
 349 
 350         DTRACE_NFSV3_4(op__setattr__done, struct svc_req *, req,
 351             cred_t *, cr, vnode_t *, vp, SETATTR3res *, resp);
 352 
 353         VN_RELE(vp);
 354 
 355         return;
 356 
 357 out:
 358         if (curthread->t_flag & T_WOULDBLOCK) {
 359                 curthread->t_flag &= ~T_WOULDBLOCK;
 360                 resp->status = NFS3ERR_JUKEBOX;
 361         } else
 362                 resp->status = puterrno3(error);
 363 out1:
 364         DTRACE_NFSV3_4(op__setattr__done, struct svc_req *, req,
 365             cred_t *, cr, vnode_t *, vp, SETATTR3res *, resp);
 366 
 367         if (vp != NULL) {
 368                 if (in_crit)
 369                         nbl_end_crit(vp);
 370                 VN_RELE(vp);
 371         }
 372         vattr_to_wcc_data(bvap, avap, &resp->resfail.obj_wcc);
 373 }
 374 
 375 void *
 376 rfs3_setattr_getfh(SETATTR3args *args)
 377 {
 378 
 379         return (&args->object);
 380 }
 381 
 382 /* ARGSUSED */
 383 void
 384 rfs3_lookup(LOOKUP3args *args, LOOKUP3res *resp, struct exportinfo *exi,
 385         struct svc_req *req, cred_t *cr)
 386 {
 387         int error;
 388         vnode_t *vp;
 389         vnode_t *dvp;
 390         struct vattr *vap;
 391         struct vattr va;
 392         struct vattr *dvap;
 393         struct vattr dva;
 394         nfs_fh3 *fhp;
 395         struct sec_ol sec = {0, 0};
 396         bool_t publicfh_flag = FALSE, auth_weak = FALSE;
 397         struct sockaddr *ca;
 398         char *name = NULL;
 399 
 400         dvap = NULL;
 401 
 402         /*
 403          * Allow lookups from the root - the default
 404          * location of the public filehandle.
 405          */
 406         if (exi != NULL && (exi->exi_export.ex_flags & EX_PUBLIC)) {
 407                 dvp = rootdir;
 408                 VN_HOLD(dvp);
 409 
 410                 DTRACE_NFSV3_4(op__lookup__start, struct svc_req *, req,
 411                     cred_t *, cr, vnode_t *, dvp, LOOKUP3args *, args);
 412         } else {
 413                 dvp = nfs3_fhtovp(&args->what.dir, exi);
 414 
 415                 DTRACE_NFSV3_4(op__lookup__start, struct svc_req *, req,
 416                     cred_t *, cr, vnode_t *, dvp, LOOKUP3args *, args);
 417 
 418                 if (dvp == NULL) {
 419                         error = ESTALE;
 420                         goto out;
 421                 }
 422         }
 423 
 424 #ifdef DEBUG
 425         if (rfs3_do_pre_op_attr) {
 426                 dva.va_mask = AT_ALL;
 427                 dvap = VOP_GETATTR(dvp, &dva, 0, cr, NULL) ? NULL : &dva;
 428         }
 429 #else
 430         dva.va_mask = AT_ALL;
 431         dvap = VOP_GETATTR(dvp, &dva, 0, cr, NULL) ? NULL : &dva;
 432 #endif
 433 
 434         if (args->what.name == nfs3nametoolong) {
 435                 resp->status = NFS3ERR_NAMETOOLONG;
 436                 goto out1;
 437         }
 438 
 439         if (args->what.name == NULL || *(args->what.name) == '\0') {
 440                 resp->status = NFS3ERR_ACCES;
 441                 goto out1;
 442         }
 443 
 444         fhp = &args->what.dir;
 445         if (strcmp(args->what.name, "..") == 0 &&
 446             EQFID(&exi->exi_fid, FH3TOFIDP(fhp))) {
 447                 resp->status = NFS3ERR_NOENT;
 448                 goto out1;
 449         }
 450 
 451         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
 452         name = nfscmd_convname(ca, exi, args->what.name,
 453             NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
 454 
 455         if (name == NULL) {
 456                 resp->status = NFS3ERR_ACCES;
 457                 goto out1;
 458         }
 459 
 460         /*
 461          * If the public filehandle is used then allow
 462          * a multi-component lookup
 463          */
 464         if (PUBLIC_FH3(&args->what.dir)) {
 465                 publicfh_flag = TRUE;
 466                 error = rfs_publicfh_mclookup(name, dvp, cr, &vp,
 467                     &exi, &sec);
 468                 if (error && exi != NULL)
 469                         exi_rele(exi); /* See comment below Re: publicfh_flag */
 470                 /*
 471                  * Since WebNFS may bypass MOUNT, we need to ensure this
 472                  * request didn't come from an unlabeled admin_low client.
 473                  */
 474                 if (is_system_labeled() && error == 0) {
 475                         int             addr_type;
 476                         void            *ipaddr;
 477                         tsol_tpc_t      *tp;
 478 
 479                         if (ca->sa_family == AF_INET) {
 480                                 addr_type = IPV4_VERSION;
 481                                 ipaddr = &((struct sockaddr_in *)ca)->sin_addr;
 482                         } else if (ca->sa_family == AF_INET6) {
 483                                 addr_type = IPV6_VERSION;
 484                                 ipaddr = &((struct sockaddr_in6 *)
 485                                     ca)->sin6_addr;
 486                         }
 487                         tp = find_tpc(ipaddr, addr_type, B_FALSE);
 488                         if (tp == NULL || tp->tpc_tp.tp_doi !=
 489                             l_admin_low->tsl_doi || tp->tpc_tp.host_type !=
 490                             SUN_CIPSO) {
 491                                 if (exi != NULL)
 492                                         exi_rele(exi);
 493                                 VN_RELE(vp);
 494                                 resp->status = NFS3ERR_ACCES;
 495                                 error = 1;
 496                         }
 497                         if (tp != NULL)
 498                                 TPC_RELE(tp);
 499                 }
 500         } else {
 501                 error = VOP_LOOKUP(dvp, name, &vp,
 502                     NULL, 0, NULL, cr, NULL, NULL, NULL);
 503         }
 504 
 505         if (name != args->what.name)
 506                 kmem_free(name, MAXPATHLEN + 1);
 507 
 508         if (is_system_labeled() && error == 0) {
 509                 bslabel_t *clabel = req->rq_label;
 510 
 511                 ASSERT(clabel != NULL);
 512                 DTRACE_PROBE2(tx__rfs3__log__info__oplookup__clabel, char *,
 513                     "got client label from request(1)", struct svc_req *, req);
 514 
 515                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
 516                         if (!do_rfs_label_check(clabel, dvp,
 517                             DOMINANCE_CHECK, exi)) {
 518                                 if (publicfh_flag && exi != NULL)
 519                                         exi_rele(exi);
 520                                 VN_RELE(vp);
 521                                 resp->status = NFS3ERR_ACCES;
 522                                 error = 1;
 523                         }
 524                 }
 525         }
 526 
 527 #ifdef DEBUG
 528         if (rfs3_do_post_op_attr) {
 529                 dva.va_mask = AT_ALL;
 530                 dvap = VOP_GETATTR(dvp, &dva, 0, cr, NULL) ? NULL : &dva;
 531         } else
 532                 dvap = NULL;
 533 #else
 534         dva.va_mask = AT_ALL;
 535         dvap = VOP_GETATTR(dvp, &dva, 0, cr, NULL) ? NULL : &dva;
 536 #endif
 537 
 538         if (error)
 539                 goto out;
 540 
 541         if (sec.sec_flags & SEC_QUERY) {
 542                 error = makefh3_ol(&resp->resok.object, exi, sec.sec_index);
 543         } else {
 544                 error = makefh3(&resp->resok.object, vp, exi);
 545                 if (!error && publicfh_flag && !chk_clnt_sec(exi, req))
 546                         auth_weak = TRUE;
 547         }
 548 
 549         if (error) {
 550                 VN_RELE(vp);
 551                 goto out;
 552         }
 553 
 554         /*
 555          * If publicfh_flag is true then we have called rfs_publicfh_mclookup
 556          * and have obtained a new exportinfo in exi which needs to be
 557          * released. Note the the original exportinfo pointed to by exi
 558          * will be released by the caller, common_dispatch.
 559          */
 560         if (publicfh_flag)
 561                 exi_rele(exi);
 562 
 563 #ifdef DEBUG
 564         if (rfs3_do_post_op_attr) {
 565                 va.va_mask = AT_ALL;
 566                 vap = rfs4_delegated_getattr(vp, &va, 0, cr) ? NULL : &va;
 567         } else
 568                 vap = NULL;
 569 #else
 570         va.va_mask = AT_ALL;
 571         vap = rfs4_delegated_getattr(vp, &va, 0, cr) ? NULL : &va;
 572 #endif
 573 
 574         VN_RELE(vp);
 575 
 576         resp->status = NFS3_OK;
 577         vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
 578         vattr_to_post_op_attr(dvap, &resp->resok.dir_attributes);
 579 
 580         /*
 581          * If it's public fh, no 0x81, and client's flavor is
 582          * invalid, set WebNFS status to WNFSERR_CLNT_FLAVOR now.
 583          * Then set RPC status to AUTH_TOOWEAK in common_dispatch.
 584          */
 585         if (auth_weak)
 586                 resp->status = (enum nfsstat3)WNFSERR_CLNT_FLAVOR;
 587 
 588         DTRACE_NFSV3_4(op__lookup__done, struct svc_req *, req,
 589             cred_t *, cr, vnode_t *, dvp, LOOKUP3res *, resp);
 590         VN_RELE(dvp);
 591 
 592         return;
 593 
 594 out:
 595         if (curthread->t_flag & T_WOULDBLOCK) {
 596                 curthread->t_flag &= ~T_WOULDBLOCK;
 597                 resp->status = NFS3ERR_JUKEBOX;
 598         } else
 599                 resp->status = puterrno3(error);
 600 out1:
 601         DTRACE_NFSV3_4(op__lookup__done, struct svc_req *, req,
 602             cred_t *, cr, vnode_t *, dvp, LOOKUP3res *, resp);
 603 
 604         if (dvp != NULL)
 605                 VN_RELE(dvp);
 606         vattr_to_post_op_attr(dvap, &resp->resfail.dir_attributes);
 607 
 608 }
 609 
 610 void *
 611 rfs3_lookup_getfh(LOOKUP3args *args)
 612 {
 613 
 614         return (&args->what.dir);
 615 }
 616 
 617 /* ARGSUSED */
 618 void
 619 rfs3_access(ACCESS3args *args, ACCESS3res *resp, struct exportinfo *exi,
 620         struct svc_req *req, cred_t *cr)
 621 {
 622         int error;
 623         vnode_t *vp;
 624         struct vattr *vap;
 625         struct vattr va;
 626         int checkwriteperm;
 627         boolean_t dominant_label = B_FALSE;
 628         boolean_t equal_label = B_FALSE;
 629         boolean_t admin_low_client;
 630 
 631         vap = NULL;
 632 
 633         vp = nfs3_fhtovp(&args->object, exi);
 634 
 635         DTRACE_NFSV3_4(op__access__start, struct svc_req *, req,
 636             cred_t *, cr, vnode_t *, vp, ACCESS3args *, args);
 637 
 638         if (vp == NULL) {
 639                 error = ESTALE;
 640                 goto out;
 641         }
 642 
 643         /*
 644          * If the file system is exported read only, it is not appropriate
 645          * to check write permissions for regular files and directories.
 646          * Special files are interpreted by the client, so the underlying
 647          * permissions are sent back to the client for interpretation.
 648          */
 649         if (rdonly(exi, req) && (vp->v_type == VREG || vp->v_type == VDIR))
 650                 checkwriteperm = 0;
 651         else
 652                 checkwriteperm = 1;
 653 
 654         /*
 655          * We need the mode so that we can correctly determine access
 656          * permissions relative to a mandatory lock file.  Access to
 657          * mandatory lock files is denied on the server, so it might
 658          * as well be reflected to the server during the open.
 659          */
 660         va.va_mask = AT_MODE;
 661         error = VOP_GETATTR(vp, &va, 0, cr, NULL);
 662         if (error)
 663                 goto out;
 664 
 665 #ifdef DEBUG
 666         if (rfs3_do_post_op_attr)
 667                 vap = &va;
 668 #else
 669         vap = &va;
 670 #endif
 671 
 672         resp->resok.access = 0;
 673 
 674         if (is_system_labeled()) {
 675                 bslabel_t *clabel = req->rq_label;
 676 
 677                 ASSERT(clabel != NULL);
 678                 DTRACE_PROBE2(tx__rfs3__log__info__opaccess__clabel, char *,
 679                     "got client label from request(1)", struct svc_req *, req);
 680 
 681                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
 682                         if ((equal_label = do_rfs_label_check(clabel, vp,
 683                             EQUALITY_CHECK, exi)) == B_FALSE) {
 684                                 dominant_label = do_rfs_label_check(clabel,
 685                                     vp, DOMINANCE_CHECK, exi);
 686                         } else
 687                                 dominant_label = B_TRUE;
 688                         admin_low_client = B_FALSE;
 689                 } else
 690                         admin_low_client = B_TRUE;
 691         }
 692 
 693         if (args->access & ACCESS3_READ) {
 694                 error = VOP_ACCESS(vp, VREAD, 0, cr, NULL);
 695                 if (error) {
 696                         if (curthread->t_flag & T_WOULDBLOCK)
 697                                 goto out;
 698                 } else if (!MANDLOCK(vp, va.va_mode) &&
 699                     (!is_system_labeled() || admin_low_client ||
 700                     dominant_label))
 701                         resp->resok.access |= ACCESS3_READ;
 702         }
 703         if ((args->access & ACCESS3_LOOKUP) && vp->v_type == VDIR) {
 704                 error = VOP_ACCESS(vp, VEXEC, 0, cr, NULL);
 705                 if (error) {
 706                         if (curthread->t_flag & T_WOULDBLOCK)
 707                                 goto out;
 708                 } else if (!is_system_labeled() || admin_low_client ||
 709                     dominant_label)
 710                         resp->resok.access |= ACCESS3_LOOKUP;
 711         }
 712         if (checkwriteperm &&
 713             (args->access & (ACCESS3_MODIFY|ACCESS3_EXTEND))) {
 714                 error = VOP_ACCESS(vp, VWRITE, 0, cr, NULL);
 715                 if (error) {
 716                         if (curthread->t_flag & T_WOULDBLOCK)
 717                                 goto out;
 718                 } else if (!MANDLOCK(vp, va.va_mode) &&
 719                     (!is_system_labeled() || admin_low_client || equal_label)) {
 720                         resp->resok.access |=
 721                             (args->access & (ACCESS3_MODIFY|ACCESS3_EXTEND));
 722                 }
 723         }
 724         if (checkwriteperm &&
 725             (args->access & ACCESS3_DELETE) && vp->v_type == VDIR) {
 726                 error = VOP_ACCESS(vp, VWRITE, 0, cr, NULL);
 727                 if (error) {
 728                         if (curthread->t_flag & T_WOULDBLOCK)
 729                                 goto out;
 730                 } else if (!is_system_labeled() || admin_low_client ||
 731                     equal_label)
 732                         resp->resok.access |= ACCESS3_DELETE;
 733         }
 734         if (args->access & ACCESS3_EXECUTE) {
 735                 error = VOP_ACCESS(vp, VEXEC, 0, cr, NULL);
 736                 if (error) {
 737                         if (curthread->t_flag & T_WOULDBLOCK)
 738                                 goto out;
 739                 } else if (!MANDLOCK(vp, va.va_mode) &&
 740                     (!is_system_labeled() || admin_low_client ||
 741                     dominant_label))
 742                         resp->resok.access |= ACCESS3_EXECUTE;
 743         }
 744 
 745 #ifdef DEBUG
 746         if (rfs3_do_post_op_attr) {
 747                 va.va_mask = AT_ALL;
 748                 vap = rfs4_delegated_getattr(vp, &va, 0, cr) ? NULL : &va;
 749         } else
 750                 vap = NULL;
 751 #else
 752         va.va_mask = AT_ALL;
 753         vap = rfs4_delegated_getattr(vp, &va, 0, cr) ? NULL : &va;
 754 #endif
 755 
 756         resp->status = NFS3_OK;
 757         vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
 758 
 759         DTRACE_NFSV3_4(op__access__done, struct svc_req *, req,
 760             cred_t *, cr, vnode_t *, vp, ACCESS3res *, resp);
 761 
 762         VN_RELE(vp);
 763 
 764         return;
 765 
 766 out:
 767         if (curthread->t_flag & T_WOULDBLOCK) {
 768                 curthread->t_flag &= ~T_WOULDBLOCK;
 769                 resp->status = NFS3ERR_JUKEBOX;
 770         } else
 771                 resp->status = puterrno3(error);
 772         DTRACE_NFSV3_4(op__access__done, struct svc_req *, req,
 773             cred_t *, cr, vnode_t *, vp, ACCESS3res *, resp);
 774         if (vp != NULL)
 775                 VN_RELE(vp);
 776         vattr_to_post_op_attr(vap, &resp->resfail.obj_attributes);
 777 }
 778 
 779 void *
 780 rfs3_access_getfh(ACCESS3args *args)
 781 {
 782 
 783         return (&args->object);
 784 }
 785 
 786 /* ARGSUSED */
 787 void
 788 rfs3_readlink(READLINK3args *args, READLINK3res *resp, struct exportinfo *exi,
 789         struct svc_req *req, cred_t *cr)
 790 {
 791         int error;
 792         vnode_t *vp;
 793         struct vattr *vap;
 794         struct vattr va;
 795         struct iovec iov;
 796         struct uio uio;
 797         char *data;
 798         struct sockaddr *ca;
 799         char *name = NULL;
 800         int is_referral = 0;
 801 
 802         vap = NULL;
 803 
 804         vp = nfs3_fhtovp(&args->symlink, exi);
 805 
 806         DTRACE_NFSV3_4(op__readlink__start, struct svc_req *, req,
 807             cred_t *, cr, vnode_t *, vp, READLINK3args *, args);
 808 
 809         if (vp == NULL) {
 810                 error = ESTALE;
 811                 goto out;
 812         }
 813 
 814         va.va_mask = AT_ALL;
 815         error = VOP_GETATTR(vp, &va, 0, cr, NULL);
 816         if (error)
 817                 goto out;
 818 
 819 #ifdef DEBUG
 820         if (rfs3_do_post_op_attr)
 821                 vap = &va;
 822 #else
 823         vap = &va;
 824 #endif
 825 
 826         /* We lied about the object type for a referral */
 827         if (vn_is_nfs_reparse(vp, cr))
 828                 is_referral = 1;
 829 
 830         if (vp->v_type != VLNK && !is_referral) {
 831                 resp->status = NFS3ERR_INVAL;
 832                 goto out1;
 833         }
 834 
 835         if (MANDLOCK(vp, va.va_mode)) {
 836                 resp->status = NFS3ERR_ACCES;
 837                 goto out1;
 838         }
 839 
 840         if (is_system_labeled()) {
 841                 bslabel_t *clabel = req->rq_label;
 842 
 843                 ASSERT(clabel != NULL);
 844                 DTRACE_PROBE2(tx__rfs3__log__info__opreadlink__clabel, char *,
 845                     "got client label from request(1)", struct svc_req *, req);
 846 
 847                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
 848                         if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
 849                             exi)) {
 850                                 resp->status = NFS3ERR_ACCES;
 851                                 goto out1;
 852                         }
 853                 }
 854         }
 855 
 856         data = kmem_alloc(MAXPATHLEN + 1, KM_SLEEP);
 857 
 858         if (is_referral) {
 859                 char *s;
 860                 size_t strsz;
 861 
 862                 /* Get an artificial symlink based on a referral */
 863                 s = build_symlink(vp, cr, &strsz);
 864                 global_svstat_ptr[3][NFS_REFERLINKS].value.ui64++;
 865                 DTRACE_PROBE2(nfs3serv__func__referral__reflink,
 866                     vnode_t *, vp, char *, s);
 867                 if (s == NULL)
 868                         error = EINVAL;
 869                 else {
 870                         error = 0;
 871                         (void) strlcpy(data, s, MAXPATHLEN + 1);
 872                         kmem_free(s, strsz);
 873                 }
 874 
 875         } else {
 876 
 877                 iov.iov_base = data;
 878                 iov.iov_len = MAXPATHLEN;
 879                 uio.uio_iov = &iov;
 880                 uio.uio_iovcnt = 1;
 881                 uio.uio_segflg = UIO_SYSSPACE;
 882                 uio.uio_extflg = UIO_COPY_CACHED;
 883                 uio.uio_loffset = 0;
 884                 uio.uio_resid = MAXPATHLEN;
 885 
 886                 error = VOP_READLINK(vp, &uio, cr, NULL);
 887 
 888                 if (!error)
 889                         *(data + MAXPATHLEN - uio.uio_resid) = '\0';
 890         }
 891 
 892 #ifdef DEBUG
 893         if (rfs3_do_post_op_attr) {
 894                 va.va_mask = AT_ALL;
 895                 vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
 896         } else
 897                 vap = NULL;
 898 #else
 899         va.va_mask = AT_ALL;
 900         vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
 901 #endif
 902         /* Lie about object type again just to be consistent */
 903         if (is_referral && vap != NULL)
 904                 vap->va_type = VLNK;
 905 
 906 #if 0 /* notyet */
 907         /*
 908          * Don't do this.  It causes local disk writes when just
 909          * reading the file and the overhead is deemed larger
 910          * than the benefit.
 911          */
 912         /*
 913          * Force modified metadata out to stable storage.
 914          */
 915         (void) VOP_FSYNC(vp, FNODSYNC, cr, NULL);
 916 #endif
 917 
 918         if (error) {
 919                 kmem_free(data, MAXPATHLEN + 1);
 920                 goto out;
 921         }
 922 
 923         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
 924         name = nfscmd_convname(ca, exi, data, NFSCMD_CONV_OUTBOUND,
 925             MAXPATHLEN + 1);
 926 
 927         if (name == NULL) {
 928                 /*
 929                  * Even though the conversion failed, we return
 930                  * something. We just don't translate it.
 931                  */
 932                 name = data;
 933         }
 934 
 935         resp->status = NFS3_OK;
 936         vattr_to_post_op_attr(vap, &resp->resok.symlink_attributes);
 937         resp->resok.data = name;
 938 
 939         DTRACE_NFSV3_4(op__readlink__done, struct svc_req *, req,
 940             cred_t *, cr, vnode_t *, vp, READLINK3res *, resp);
 941         VN_RELE(vp);
 942 
 943         if (name != data)
 944                 kmem_free(data, MAXPATHLEN + 1);
 945 
 946         return;
 947 
 948 out:
 949         if (curthread->t_flag & T_WOULDBLOCK) {
 950                 curthread->t_flag &= ~T_WOULDBLOCK;
 951                 resp->status = NFS3ERR_JUKEBOX;
 952         } else
 953                 resp->status = puterrno3(error);
 954 out1:
 955         DTRACE_NFSV3_4(op__readlink__done, struct svc_req *, req,
 956             cred_t *, cr, vnode_t *, vp, READLINK3res *, resp);
 957         if (vp != NULL)
 958                 VN_RELE(vp);
 959         vattr_to_post_op_attr(vap, &resp->resfail.symlink_attributes);
 960 }
 961 
 962 void *
 963 rfs3_readlink_getfh(READLINK3args *args)
 964 {
 965 
 966         return (&args->symlink);
 967 }
 968 
 969 void
 970 rfs3_readlink_free(READLINK3res *resp)
 971 {
 972 
 973         if (resp->status == NFS3_OK)
 974                 kmem_free(resp->resok.data, MAXPATHLEN + 1);
 975 }
 976 
 977 /*
 978  * Server routine to handle read
 979  * May handle RDMA data as well as mblks
 980  */
 981 /* ARGSUSED */
 982 void
 983 rfs3_read(READ3args *args, READ3res *resp, struct exportinfo *exi,
 984         struct svc_req *req, cred_t *cr)
 985 {
 986         int error;
 987         vnode_t *vp;
 988         struct vattr *vap;
 989         struct vattr va;
 990         struct iovec iov;
 991         struct uio uio;
 992         u_offset_t offset;
 993         mblk_t *mp = NULL;
 994         int alloc_err = 0;
 995         int in_crit = 0;
 996         int need_rwunlock = 0;
 997         caller_context_t ct;
 998         int rdma_used = 0;
 999         int loaned_buffers;
1000         struct uio *uiop;
1001 
1002         vap = NULL;
1003 
1004         vp = nfs3_fhtovp(&args->file, exi);
1005 
1006         DTRACE_NFSV3_4(op__read__start, struct svc_req *, req,
1007             cred_t *, cr, vnode_t *, vp, READ3args *, args);
1008 
1009         if (vp == NULL) {
1010                 error = ESTALE;
1011                 goto out;
1012         }
1013 
1014         if (args->wlist) {
1015                 if (args->count > clist_len(args->wlist)) {
1016                         error = EINVAL;
1017                         goto out;
1018                 }
1019                 rdma_used = 1;
1020         }
1021 
1022         /* use loaned buffers for TCP */
1023         loaned_buffers = (nfs_loaned_buffers && !rdma_used) ? 1 : 0;
1024 
1025         if (is_system_labeled()) {
1026                 bslabel_t *clabel = req->rq_label;
1027 
1028                 ASSERT(clabel != NULL);
1029                 DTRACE_PROBE2(tx__rfs3__log__info__opread__clabel, char *,
1030                     "got client label from request(1)", struct svc_req *, req);
1031 
1032                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
1033                         if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
1034                             exi)) {
1035                                 resp->status = NFS3ERR_ACCES;
1036                                 goto out1;
1037                         }
1038                 }
1039         }
1040 
1041         ct.cc_sysid = 0;
1042         ct.cc_pid = 0;
1043         ct.cc_caller_id = nfs3_srv_caller_id;
1044         ct.cc_flags = CC_DONTBLOCK;
1045 
1046         /*
1047          * Enter the critical region before calling VOP_RWLOCK
1048          * to avoid a deadlock with write requests.
1049          */
1050         if (nbl_need_check(vp)) {
1051                 nbl_start_crit(vp, RW_READER);
1052                 in_crit = 1;
1053                 if (nbl_conflict(vp, NBL_READ, args->offset, args->count, 0,
1054                     NULL)) {
1055                         error = EACCES;
1056                         goto out;
1057                 }
1058         }
1059 
1060         error = VOP_RWLOCK(vp, V_WRITELOCK_FALSE, &ct);
1061 
1062         /* check if a monitor detected a delegation conflict */
1063         if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
1064                 resp->status = NFS3ERR_JUKEBOX;
1065                 goto out1;
1066         }
1067 
1068         need_rwunlock = 1;
1069 
1070         va.va_mask = AT_ALL;
1071         error = VOP_GETATTR(vp, &va, 0, cr, &ct);
1072 
1073         /*
1074          * If we can't get the attributes, then we can't do the
1075          * right access checking.  So, we'll fail the request.
1076          */
1077         if (error)
1078                 goto out;
1079 
1080 #ifdef DEBUG
1081         if (rfs3_do_post_op_attr)
1082                 vap = &va;
1083 #else
1084         vap = &va;
1085 #endif
1086 
1087         if (vp->v_type != VREG) {
1088                 resp->status = NFS3ERR_INVAL;
1089                 goto out1;
1090         }
1091 
1092         if (crgetuid(cr) != va.va_uid) {
1093                 error = VOP_ACCESS(vp, VREAD, 0, cr, &ct);
1094                 if (error) {
1095                         if (curthread->t_flag & T_WOULDBLOCK)
1096                                 goto out;
1097                         error = VOP_ACCESS(vp, VEXEC, 0, cr, &ct);
1098                         if (error)
1099                                 goto out;
1100                 }
1101         }
1102 
1103         if (MANDLOCK(vp, va.va_mode)) {
1104                 resp->status = NFS3ERR_ACCES;
1105                 goto out1;
1106         }
1107 
1108         offset = args->offset;
1109         if (offset >= va.va_size) {
1110                 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
1111                 if (in_crit)
1112                         nbl_end_crit(vp);
1113                 resp->status = NFS3_OK;
1114                 vattr_to_post_op_attr(vap, &resp->resok.file_attributes);
1115                 resp->resok.count = 0;
1116                 resp->resok.eof = TRUE;
1117                 resp->resok.data.data_len = 0;
1118                 resp->resok.data.data_val = NULL;
1119                 resp->resok.data.mp = NULL;
1120                 /* RDMA */
1121                 resp->resok.wlist = args->wlist;
1122                 resp->resok.wlist_len = resp->resok.count;
1123                 if (resp->resok.wlist)
1124                         clist_zero_len(resp->resok.wlist);
1125                 goto done;
1126         }
1127 
1128         if (args->count == 0) {
1129                 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
1130                 if (in_crit)
1131                         nbl_end_crit(vp);
1132                 resp->status = NFS3_OK;
1133                 vattr_to_post_op_attr(vap, &resp->resok.file_attributes);
1134                 resp->resok.count = 0;
1135                 resp->resok.eof = FALSE;
1136                 resp->resok.data.data_len = 0;
1137                 resp->resok.data.data_val = NULL;
1138                 resp->resok.data.mp = NULL;
1139                 /* RDMA */
1140                 resp->resok.wlist = args->wlist;
1141                 resp->resok.wlist_len = resp->resok.count;
1142                 if (resp->resok.wlist)
1143                         clist_zero_len(resp->resok.wlist);
1144                 goto done;
1145         }
1146 
1147         /*
1148          * do not allocate memory more the max. allowed
1149          * transfer size
1150          */
1151         if (args->count > rfs3_tsize(req))
1152                 args->count = rfs3_tsize(req);
1153 
1154         if (loaned_buffers) {
1155                 uiop = (uio_t *)rfs_setup_xuio(vp);
1156                 ASSERT(uiop != NULL);
1157                 uiop->uio_segflg = UIO_SYSSPACE;
1158                 uiop->uio_loffset = args->offset;
1159                 uiop->uio_resid = args->count;
1160 
1161                 /* Jump to do the read if successful */
1162                 if (VOP_REQZCBUF(vp, UIO_READ, (xuio_t *)uiop, cr, &ct) == 0) {
1163                         /*
1164                          * Need to hold the vnode until after VOP_RETZCBUF()
1165                          * is called.
1166                          */
1167                         VN_HOLD(vp);
1168                         goto doio_read;
1169                 }
1170 
1171                 DTRACE_PROBE2(nfss__i__reqzcbuf_failed, int,
1172                     uiop->uio_loffset, int, uiop->uio_resid);
1173 
1174                 uiop->uio_extflg = 0;
1175                 /* failure to setup for zero copy */
1176                 rfs_free_xuio((void *)uiop);
1177                 loaned_buffers = 0;
1178         }
1179 
1180         /*
1181          * If returning data via RDMA Write, then grab the chunk list.
1182          * If we aren't returning READ data w/RDMA_WRITE, then grab
1183          * a mblk.
1184          */
1185         if (rdma_used) {
1186                 (void) rdma_get_wchunk(req, &iov, args->wlist);
1187         } else {
1188                 /*
1189                  * mp will contain the data to be sent out in the read reply.
1190                  * This will be freed after the reply has been sent out (by the
1191                  * driver).
1192                  * Let's roundup the data to a BYTES_PER_XDR_UNIT multiple, so
1193                  * that the call to xdrmblk_putmblk() never fails.
1194                  */
1195                 mp = allocb_wait(RNDUP(args->count), BPRI_MED, STR_NOSIG,
1196                     &alloc_err);
1197                 ASSERT(mp != NULL);
1198                 ASSERT(alloc_err == 0);
1199 
1200                 iov.iov_base = (caddr_t)mp->b_datap->db_base;
1201                 iov.iov_len = args->count;
1202         }
1203 
1204         uio.uio_iov = &iov;
1205         uio.uio_iovcnt = 1;
1206         uio.uio_segflg = UIO_SYSSPACE;
1207         uio.uio_extflg = UIO_COPY_CACHED;
1208         uio.uio_loffset = args->offset;
1209         uio.uio_resid = args->count;
1210         uiop = &uio;
1211 
1212 doio_read:
1213         error = VOP_READ(vp, uiop, 0, cr, &ct);
1214 
1215         if (error) {
1216                 if (mp)
1217                         freemsg(mp);
1218                 /* check if a monitor detected a delegation conflict */
1219                 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
1220                         resp->status = NFS3ERR_JUKEBOX;
1221                         goto out1;
1222                 }
1223                 goto out;
1224         }
1225 
1226         /* make mblk using zc buffers */
1227         if (loaned_buffers) {
1228                 mp = uio_to_mblk(uiop);
1229                 ASSERT(mp != NULL);
1230         }
1231 
1232         va.va_mask = AT_ALL;
1233         error = VOP_GETATTR(vp, &va, 0, cr, &ct);
1234 
1235 #ifdef DEBUG
1236         if (rfs3_do_post_op_attr) {
1237                 if (error)
1238                         vap = NULL;
1239                 else
1240                         vap = &va;
1241         } else
1242                 vap = NULL;
1243 #else
1244         if (error)
1245                 vap = NULL;
1246         else
1247                 vap = &va;
1248 #endif
1249 
1250         VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
1251 
1252         if (in_crit)
1253                 nbl_end_crit(vp);
1254 
1255         resp->status = NFS3_OK;
1256         vattr_to_post_op_attr(vap, &resp->resok.file_attributes);
1257         resp->resok.count = args->count - uiop->uio_resid;
1258         if (!error && offset + resp->resok.count == va.va_size)
1259                 resp->resok.eof = TRUE;
1260         else
1261                 resp->resok.eof = FALSE;
1262         resp->resok.data.data_len = resp->resok.count;
1263 
1264         if (mp)
1265                 rfs_rndup_mblks(mp, resp->resok.count, loaned_buffers);
1266 
1267         resp->resok.data.mp = mp;
1268         resp->resok.size = (uint_t)args->count;
1269 
1270         if (rdma_used) {
1271                 resp->resok.data.data_val = (caddr_t)iov.iov_base;
1272                 if (!rdma_setup_read_data3(args, &(resp->resok))) {
1273                         resp->status = NFS3ERR_INVAL;
1274                 }
1275         } else {
1276                 resp->resok.data.data_val = (caddr_t)mp->b_datap->db_base;
1277                 (resp->resok).wlist = NULL;
1278         }
1279 
1280 done:
1281         DTRACE_NFSV3_4(op__read__done, struct svc_req *, req,
1282             cred_t *, cr, vnode_t *, vp, READ3res *, resp);
1283 
1284         VN_RELE(vp);
1285 
1286         return;
1287 
1288 out:
1289         if (curthread->t_flag & T_WOULDBLOCK) {
1290                 curthread->t_flag &= ~T_WOULDBLOCK;
1291                 resp->status = NFS3ERR_JUKEBOX;
1292         } else
1293                 resp->status = puterrno3(error);
1294 out1:
1295         DTRACE_NFSV3_4(op__read__done, struct svc_req *, req,
1296             cred_t *, cr, vnode_t *, vp, READ3res *, resp);
1297 
1298         if (vp != NULL) {
1299                 if (need_rwunlock)
1300                         VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
1301                 if (in_crit)
1302                         nbl_end_crit(vp);
1303                 VN_RELE(vp);
1304         }
1305         vattr_to_post_op_attr(vap, &resp->resfail.file_attributes);
1306 }
1307 
1308 void
1309 rfs3_read_free(READ3res *resp)
1310 {
1311         mblk_t *mp;
1312 
1313         if (resp->status == NFS3_OK) {
1314                 mp = resp->resok.data.mp;
1315                 if (mp != NULL)
1316                         freemsg(mp);
1317         }
1318 }
1319 
1320 void *
1321 rfs3_read_getfh(READ3args *args)
1322 {
1323 
1324         return (&args->file);
1325 }
1326 
1327 #define MAX_IOVECS      12
1328 
1329 #ifdef DEBUG
1330 static int rfs3_write_hits = 0;
1331 static int rfs3_write_misses = 0;
1332 #endif
1333 
1334 void
1335 rfs3_write(WRITE3args *args, WRITE3res *resp, struct exportinfo *exi,
1336         struct svc_req *req, cred_t *cr)
1337 {
1338         int error;
1339         vnode_t *vp;
1340         struct vattr *bvap = NULL;
1341         struct vattr bva;
1342         struct vattr *avap = NULL;
1343         struct vattr ava;
1344         u_offset_t rlimit;
1345         struct uio uio;
1346         struct iovec iov[MAX_IOVECS];
1347         mblk_t *m;
1348         struct iovec *iovp;
1349         int iovcnt;
1350         int ioflag;
1351         cred_t *savecred;
1352         int in_crit = 0;
1353         int rwlock_ret = -1;
1354         caller_context_t ct;
1355 
1356         vp = nfs3_fhtovp(&args->file, exi);
1357 
1358         DTRACE_NFSV3_4(op__write__start, struct svc_req *, req,
1359             cred_t *, cr, vnode_t *, vp, WRITE3args *, args);
1360 
1361         if (vp == NULL) {
1362                 error = ESTALE;
1363                 goto err;
1364         }
1365 
1366         if (is_system_labeled()) {
1367                 bslabel_t *clabel = req->rq_label;
1368 
1369                 ASSERT(clabel != NULL);
1370                 DTRACE_PROBE2(tx__rfs3__log__info__opwrite__clabel, char *,
1371                     "got client label from request(1)", struct svc_req *, req);
1372 
1373                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
1374                         if (!do_rfs_label_check(clabel, vp, EQUALITY_CHECK,
1375                             exi)) {
1376                                 resp->status = NFS3ERR_ACCES;
1377                                 goto err1;
1378                         }
1379                 }
1380         }
1381 
1382         ct.cc_sysid = 0;
1383         ct.cc_pid = 0;
1384         ct.cc_caller_id = nfs3_srv_caller_id;
1385         ct.cc_flags = CC_DONTBLOCK;
1386 
1387         /*
1388          * We have to enter the critical region before calling VOP_RWLOCK
1389          * to avoid a deadlock with ufs.
1390          */
1391         if (nbl_need_check(vp)) {
1392                 nbl_start_crit(vp, RW_READER);
1393                 in_crit = 1;
1394                 if (nbl_conflict(vp, NBL_WRITE, args->offset, args->count, 0,
1395                     NULL)) {
1396                         error = EACCES;
1397                         goto err;
1398                 }
1399         }
1400 
1401         rwlock_ret = VOP_RWLOCK(vp, V_WRITELOCK_TRUE, &ct);
1402 
1403         /* check if a monitor detected a delegation conflict */
1404         if (rwlock_ret == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
1405                 resp->status = NFS3ERR_JUKEBOX;
1406                 rwlock_ret = -1;
1407                 goto err1;
1408         }
1409 
1410 
1411         bva.va_mask = AT_ALL;
1412         error = VOP_GETATTR(vp, &bva, 0, cr, &ct);
1413 
1414         /*
1415          * If we can't get the attributes, then we can't do the
1416          * right access checking.  So, we'll fail the request.
1417          */
1418         if (error)
1419                 goto err;
1420 
1421         bvap = &bva;
1422 #ifdef DEBUG
1423         if (!rfs3_do_pre_op_attr)
1424                 bvap = NULL;
1425 #endif
1426         avap = bvap;
1427 
1428         if (args->count != args->data.data_len) {
1429                 resp->status = NFS3ERR_INVAL;
1430                 goto err1;
1431         }
1432 
1433         if (rdonly(exi, req)) {
1434                 resp->status = NFS3ERR_ROFS;
1435                 goto err1;
1436         }
1437 
1438         if (vp->v_type != VREG) {
1439                 resp->status = NFS3ERR_INVAL;
1440                 goto err1;
1441         }
1442 
1443         if (crgetuid(cr) != bva.va_uid &&
1444             (error = VOP_ACCESS(vp, VWRITE, 0, cr, &ct)))
1445                 goto err;
1446 
1447         if (MANDLOCK(vp, bva.va_mode)) {
1448                 resp->status = NFS3ERR_ACCES;
1449                 goto err1;
1450         }
1451 
1452         if (args->count == 0) {
1453                 resp->status = NFS3_OK;
1454                 vattr_to_wcc_data(bvap, avap, &resp->resok.file_wcc);
1455                 resp->resok.count = 0;
1456                 resp->resok.committed = args->stable;
1457                 resp->resok.verf = write3verf;
1458                 goto out;
1459         }
1460 
1461         if (args->mblk != NULL) {
1462                 iovcnt = 0;
1463                 for (m = args->mblk; m != NULL; m = m->b_cont)
1464                         iovcnt++;
1465                 if (iovcnt <= MAX_IOVECS) {
1466 #ifdef DEBUG
1467                         rfs3_write_hits++;
1468 #endif
1469                         iovp = iov;
1470                 } else {
1471 #ifdef DEBUG
1472                         rfs3_write_misses++;
1473 #endif
1474                         iovp = kmem_alloc(sizeof (*iovp) * iovcnt, KM_SLEEP);
1475                 }
1476                 mblk_to_iov(args->mblk, iovcnt, iovp);
1477 
1478         } else if (args->rlist != NULL) {
1479                 iovcnt = 1;
1480                 iovp = iov;
1481                 iovp->iov_base = (char *)((args->rlist)->u.c_daddr3);
1482                 iovp->iov_len = args->count;
1483         } else {
1484                 iovcnt = 1;
1485                 iovp = iov;
1486                 iovp->iov_base = args->data.data_val;
1487                 iovp->iov_len = args->count;
1488         }
1489 
1490         uio.uio_iov = iovp;
1491         uio.uio_iovcnt = iovcnt;
1492 
1493         uio.uio_segflg = UIO_SYSSPACE;
1494         uio.uio_extflg = UIO_COPY_DEFAULT;
1495         uio.uio_loffset = args->offset;
1496         uio.uio_resid = args->count;
1497         uio.uio_llimit = curproc->p_fsz_ctl;
1498         rlimit = uio.uio_llimit - args->offset;
1499         if (rlimit < (u_offset_t)uio.uio_resid)
1500                 uio.uio_resid = (int)rlimit;
1501 
1502         if (args->stable == UNSTABLE)
1503                 ioflag = 0;
1504         else if (args->stable == FILE_SYNC)
1505                 ioflag = FSYNC;
1506         else if (args->stable == DATA_SYNC)
1507                 ioflag = FDSYNC;
1508         else {
1509                 if (iovp != iov)
1510                         kmem_free(iovp, sizeof (*iovp) * iovcnt);
1511                 resp->status = NFS3ERR_INVAL;
1512                 goto err1;
1513         }
1514 
1515         /*
1516          * We're changing creds because VM may fault and we need
1517          * the cred of the current thread to be used if quota
1518          * checking is enabled.
1519          */
1520         savecred = curthread->t_cred;
1521         curthread->t_cred = cr;
1522         error = VOP_WRITE(vp, &uio, ioflag, cr, &ct);
1523         curthread->t_cred = savecred;
1524 
1525         if (iovp != iov)
1526                 kmem_free(iovp, sizeof (*iovp) * iovcnt);
1527 
1528         /* check if a monitor detected a delegation conflict */
1529         if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
1530                 resp->status = NFS3ERR_JUKEBOX;
1531                 goto err1;
1532         }
1533 
1534         ava.va_mask = AT_ALL;
1535         avap = VOP_GETATTR(vp, &ava, 0, cr, &ct) ? NULL : &ava;
1536 
1537 #ifdef DEBUG
1538         if (!rfs3_do_post_op_attr)
1539                 avap = NULL;
1540 #endif
1541 
1542         if (error)
1543                 goto err;
1544 
1545         /*
1546          * If we were unable to get the V_WRITELOCK_TRUE, then we
1547          * may not have accurate after attrs, so check if
1548          * we have both attributes, they have a non-zero va_seq, and
1549          * va_seq has changed by exactly one,
1550          * if not, turn off the before attr.
1551          */
1552         if (rwlock_ret != V_WRITELOCK_TRUE) {
1553                 if (bvap == NULL || avap == NULL ||
1554                     bvap->va_seq == 0 || avap->va_seq == 0 ||
1555                     avap->va_seq != (bvap->va_seq + 1)) {
1556                         bvap = NULL;
1557                 }
1558         }
1559 
1560         resp->status = NFS3_OK;
1561         vattr_to_wcc_data(bvap, avap, &resp->resok.file_wcc);
1562         resp->resok.count = args->count - uio.uio_resid;
1563         resp->resok.committed = args->stable;
1564         resp->resok.verf = write3verf;
1565         goto out;
1566 
1567 err:
1568         if (curthread->t_flag & T_WOULDBLOCK) {
1569                 curthread->t_flag &= ~T_WOULDBLOCK;
1570                 resp->status = NFS3ERR_JUKEBOX;
1571         } else
1572                 resp->status = puterrno3(error);
1573 err1:
1574         vattr_to_wcc_data(bvap, avap, &resp->resfail.file_wcc);
1575 out:
1576         DTRACE_NFSV3_4(op__write__done, struct svc_req *, req,
1577             cred_t *, cr, vnode_t *, vp, WRITE3res *, resp);
1578 
1579         if (vp != NULL) {
1580                 if (rwlock_ret != -1)
1581                         VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, &ct);
1582                 if (in_crit)
1583                         nbl_end_crit(vp);
1584                 VN_RELE(vp);
1585         }
1586 }
1587 
1588 void *
1589 rfs3_write_getfh(WRITE3args *args)
1590 {
1591 
1592         return (&args->file);
1593 }
1594 
1595 void
1596 rfs3_create(CREATE3args *args, CREATE3res *resp, struct exportinfo *exi,
1597         struct svc_req *req, cred_t *cr)
1598 {
1599         int error;
1600         int in_crit = 0;
1601         vnode_t *vp;
1602         vnode_t *tvp = NULL;
1603         vnode_t *dvp;
1604         struct vattr *vap;
1605         struct vattr va;
1606         struct vattr *dbvap;
1607         struct vattr dbva;
1608         struct vattr *davap;
1609         struct vattr dava;
1610         enum vcexcl excl;
1611         nfstime3 *mtime;
1612         len_t reqsize;
1613         bool_t trunc;
1614         struct sockaddr *ca;
1615         char *name = NULL;
1616 
1617         dbvap = NULL;
1618         davap = NULL;
1619 
1620         dvp = nfs3_fhtovp(&args->where.dir, exi);
1621 
1622         DTRACE_NFSV3_4(op__create__start, struct svc_req *, req,
1623             cred_t *, cr, vnode_t *, dvp, CREATE3args *, args);
1624 
1625         if (dvp == NULL) {
1626                 error = ESTALE;
1627                 goto out;
1628         }
1629 
1630 #ifdef DEBUG
1631         if (rfs3_do_pre_op_attr) {
1632                 dbva.va_mask = AT_ALL;
1633                 dbvap = VOP_GETATTR(dvp, &dbva, 0, cr, NULL) ? NULL : &dbva;
1634         } else
1635                 dbvap = NULL;
1636 #else
1637         dbva.va_mask = AT_ALL;
1638         dbvap = VOP_GETATTR(dvp, &dbva, 0, cr, NULL) ? NULL : &dbva;
1639 #endif
1640         davap = dbvap;
1641 
1642         if (args->where.name == nfs3nametoolong) {
1643                 resp->status = NFS3ERR_NAMETOOLONG;
1644                 goto out1;
1645         }
1646 
1647         if (args->where.name == NULL || *(args->where.name) == '\0') {
1648                 resp->status = NFS3ERR_ACCES;
1649                 goto out1;
1650         }
1651 
1652         if (rdonly(exi, req)) {
1653                 resp->status = NFS3ERR_ROFS;
1654                 goto out1;
1655         }
1656 
1657         if (is_system_labeled()) {
1658                 bslabel_t *clabel = req->rq_label;
1659 
1660                 ASSERT(clabel != NULL);
1661                 DTRACE_PROBE2(tx__rfs3__log__info__opcreate__clabel, char *,
1662                     "got client label from request(1)", struct svc_req *, req);
1663 
1664                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
1665                         if (!do_rfs_label_check(clabel, dvp, EQUALITY_CHECK,
1666                             exi)) {
1667                                 resp->status = NFS3ERR_ACCES;
1668                                 goto out1;
1669                         }
1670                 }
1671         }
1672 
1673         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
1674         name = nfscmd_convname(ca, exi, args->where.name,
1675             NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
1676 
1677         if (name == NULL) {
1678                 /* This is really a Solaris EILSEQ */
1679                 resp->status = NFS3ERR_INVAL;
1680                 goto out1;
1681         }
1682 
1683         if (args->how.mode == EXCLUSIVE) {
1684                 va.va_mask = AT_TYPE | AT_MODE | AT_MTIME;
1685                 va.va_type = VREG;
1686                 va.va_mode = (mode_t)0;
1687                 /*
1688                  * Ensure no time overflows and that types match
1689                  */
1690                 mtime = (nfstime3 *)&args->how.createhow3_u.verf;
1691                 va.va_mtime.tv_sec = mtime->seconds % INT32_MAX;
1692                 va.va_mtime.tv_nsec = mtime->nseconds;
1693                 excl = EXCL;
1694         } else {
1695                 error = sattr3_to_vattr(&args->how.createhow3_u.obj_attributes,
1696                     &va);
1697                 if (error)
1698                         goto out;
1699                 va.va_mask |= AT_TYPE;
1700                 va.va_type = VREG;
1701                 if (args->how.mode == GUARDED)
1702                         excl = EXCL;
1703                 else {
1704                         excl = NONEXCL;
1705 
1706                         /*
1707                          * During creation of file in non-exclusive mode
1708                          * if size of file is being set then make sure
1709                          * that if the file already exists that no conflicting
1710                          * non-blocking mandatory locks exists in the region
1711                          * being modified. If there are conflicting locks fail
1712                          * the operation with EACCES.
1713                          */
1714                         if (va.va_mask & AT_SIZE) {
1715                                 struct vattr tva;
1716 
1717                                 /*
1718                                  * Does file already exist?
1719                                  */
1720                                 error = VOP_LOOKUP(dvp, name, &tvp,
1721                                     NULL, 0, NULL, cr, NULL, NULL, NULL);
1722 
1723                                 /*
1724                                  * Check to see if the file has been delegated
1725                                  * to a v4 client.  If so, then begin recall of
1726                                  * the delegation and return JUKEBOX to allow
1727                                  * the client to retrasmit its request.
1728                                  */
1729 
1730                                 trunc = va.va_size == 0;
1731                                 if (!error &&
1732                                     rfs4_check_delegated(FWRITE, tvp, trunc)) {
1733                                         resp->status = NFS3ERR_JUKEBOX;
1734                                         goto out1;
1735                                 }
1736 
1737                                 /*
1738                                  * Check for NBMAND lock conflicts
1739                                  */
1740                                 if (!error && nbl_need_check(tvp)) {
1741                                         u_offset_t offset;
1742                                         ssize_t len;
1743 
1744                                         nbl_start_crit(tvp, RW_READER);
1745                                         in_crit = 1;
1746 
1747                                         tva.va_mask = AT_SIZE;
1748                                         error = VOP_GETATTR(tvp, &tva, 0, cr,
1749                                             NULL);
1750                                         /*
1751                                          * Can't check for conflicts, so return
1752                                          * error.
1753                                          */
1754                                         if (error)
1755                                                 goto out;
1756 
1757                                         offset = tva.va_size < va.va_size ?
1758                                             tva.va_size : va.va_size;
1759                                         len = tva.va_size < va.va_size ?
1760                                             va.va_size - tva.va_size :
1761                                             tva.va_size - va.va_size;
1762                                         if (nbl_conflict(tvp, NBL_WRITE,
1763                                             offset, len, 0, NULL)) {
1764                                                 error = EACCES;
1765                                                 goto out;
1766                                         }
1767                                 } else if (tvp) {
1768                                         VN_RELE(tvp);
1769                                         tvp = NULL;
1770                                 }
1771                         }
1772                 }
1773                 if (va.va_mask & AT_SIZE)
1774                         reqsize = va.va_size;
1775         }
1776 
1777         /*
1778          * Must specify the mode.
1779          */
1780         if (!(va.va_mask & AT_MODE)) {
1781                 resp->status = NFS3ERR_INVAL;
1782                 goto out1;
1783         }
1784 
1785         /*
1786          * If the filesystem is exported with nosuid, then mask off
1787          * the setuid and setgid bits.
1788          */
1789         if (va.va_type == VREG && (exi->exi_export.ex_flags & EX_NOSUID))
1790                 va.va_mode &= ~(VSUID | VSGID);
1791 
1792 tryagain:
1793         /*
1794          * The file open mode used is VWRITE.  If the client needs
1795          * some other semantic, then it should do the access checking
1796          * itself.  It would have been nice to have the file open mode
1797          * passed as part of the arguments.
1798          */
1799         error = VOP_CREATE(dvp, name, &va, excl, VWRITE,
1800             &vp, cr, 0, NULL, NULL);
1801 
1802 #ifdef DEBUG
1803         if (rfs3_do_post_op_attr) {
1804                 dava.va_mask = AT_ALL;
1805                 davap = VOP_GETATTR(dvp, &dava, 0, cr, NULL) ? NULL : &dava;
1806         } else
1807                 davap = NULL;
1808 #else
1809         dava.va_mask = AT_ALL;
1810         davap = VOP_GETATTR(dvp, &dava, 0, cr, NULL) ? NULL : &dava;
1811 #endif
1812 
1813         if (error) {
1814                 /*
1815                  * If we got something other than file already exists
1816                  * then just return this error.  Otherwise, we got
1817                  * EEXIST.  If we were doing a GUARDED create, then
1818                  * just return this error.  Otherwise, we need to
1819                  * make sure that this wasn't a duplicate of an
1820                  * exclusive create request.
1821                  *
1822                  * The assumption is made that a non-exclusive create
1823                  * request will never return EEXIST.
1824                  */
1825                 if (error != EEXIST || args->how.mode == GUARDED)
1826                         goto out;
1827                 /*
1828                  * Lookup the file so that we can get a vnode for it.
1829                  */
1830                 error = VOP_LOOKUP(dvp, name, &vp, NULL, 0,
1831                     NULL, cr, NULL, NULL, NULL);
1832                 if (error) {
1833                         /*
1834                          * We couldn't find the file that we thought that
1835                          * we just created.  So, we'll just try creating
1836                          * it again.
1837                          */
1838                         if (error == ENOENT)
1839                                 goto tryagain;
1840                         goto out;
1841                 }
1842 
1843                 /*
1844                  * If the file is delegated to a v4 client, go ahead
1845                  * and initiate recall, this create is a hint that a
1846                  * conflicting v3 open has occurred.
1847                  */
1848 
1849                 if (rfs4_check_delegated(FWRITE, vp, FALSE)) {
1850                         VN_RELE(vp);
1851                         resp->status = NFS3ERR_JUKEBOX;
1852                         goto out1;
1853                 }
1854 
1855                 va.va_mask = AT_ALL;
1856                 vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
1857 
1858                 mtime = (nfstime3 *)&args->how.createhow3_u.verf;
1859                 /* % with INT32_MAX to prevent overflows */
1860                 if (args->how.mode == EXCLUSIVE && (vap == NULL ||
1861                     vap->va_mtime.tv_sec !=
1862                     (mtime->seconds % INT32_MAX) ||
1863                     vap->va_mtime.tv_nsec != mtime->nseconds)) {
1864                         VN_RELE(vp);
1865                         error = EEXIST;
1866                         goto out;
1867                 }
1868         } else {
1869 
1870                 if ((args->how.mode == UNCHECKED ||
1871                     args->how.mode == GUARDED) &&
1872                     args->how.createhow3_u.obj_attributes.size.set_it &&
1873                     va.va_size == 0)
1874                         trunc = TRUE;
1875                 else
1876                         trunc = FALSE;
1877 
1878                 if (rfs4_check_delegated(FWRITE, vp, trunc)) {
1879                         VN_RELE(vp);
1880                         resp->status = NFS3ERR_JUKEBOX;
1881                         goto out1;
1882                 }
1883 
1884                 va.va_mask = AT_ALL;
1885                 vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
1886 
1887                 /*
1888                  * We need to check to make sure that the file got
1889                  * created to the indicated size.  If not, we do a
1890                  * setattr to try to change the size, but we don't
1891                  * try too hard.  This shouldn't a problem as most
1892                  * clients will only specifiy a size of zero which
1893                  * local file systems handle.  However, even if
1894                  * the client does specify a non-zero size, it can
1895                  * still recover by checking the size of the file
1896                  * after it has created it and then issue a setattr
1897                  * request of its own to set the size of the file.
1898                  */
1899                 if (vap != NULL &&
1900                     (args->how.mode == UNCHECKED ||
1901                     args->how.mode == GUARDED) &&
1902                     args->how.createhow3_u.obj_attributes.size.set_it &&
1903                     vap->va_size != reqsize) {
1904                         va.va_mask = AT_SIZE;
1905                         va.va_size = reqsize;
1906                         (void) VOP_SETATTR(vp, &va, 0, cr, NULL);
1907                         va.va_mask = AT_ALL;
1908                         vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
1909                 }
1910         }
1911 
1912         if (name != args->where.name)
1913                 kmem_free(name, MAXPATHLEN + 1);
1914 
1915 #ifdef DEBUG
1916         if (!rfs3_do_post_op_attr)
1917                 vap = NULL;
1918 #endif
1919 
1920 #ifdef DEBUG
1921         if (!rfs3_do_post_op_fh3)
1922                 resp->resok.obj.handle_follows = FALSE;
1923         else {
1924 #endif
1925         error = makefh3(&resp->resok.obj.handle, vp, exi);
1926         if (error)
1927                 resp->resok.obj.handle_follows = FALSE;
1928         else
1929                 resp->resok.obj.handle_follows = TRUE;
1930 #ifdef DEBUG
1931         }
1932 #endif
1933 
1934         /*
1935          * Force modified data and metadata out to stable storage.
1936          */
1937         (void) VOP_FSYNC(vp, FNODSYNC, cr, NULL);
1938         (void) VOP_FSYNC(dvp, 0, cr, NULL);
1939 
1940         VN_RELE(vp);
1941         if (tvp != NULL) {
1942                 if (in_crit)
1943                         nbl_end_crit(tvp);
1944                 VN_RELE(tvp);
1945         }
1946 
1947         resp->status = NFS3_OK;
1948         vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
1949         vattr_to_wcc_data(dbvap, davap, &resp->resok.dir_wcc);
1950 
1951         DTRACE_NFSV3_4(op__create__done, struct svc_req *, req,
1952             cred_t *, cr, vnode_t *, dvp, CREATE3res *, resp);
1953 
1954         VN_RELE(dvp);
1955         return;
1956 
1957 out:
1958         if (curthread->t_flag & T_WOULDBLOCK) {
1959                 curthread->t_flag &= ~T_WOULDBLOCK;
1960                 resp->status = NFS3ERR_JUKEBOX;
1961         } else
1962                 resp->status = puterrno3(error);
1963 out1:
1964         DTRACE_NFSV3_4(op__create__done, struct svc_req *, req,
1965             cred_t *, cr, vnode_t *, dvp, CREATE3res *, resp);
1966 
1967         if (name != NULL && name != args->where.name)
1968                 kmem_free(name, MAXPATHLEN + 1);
1969 
1970         if (tvp != NULL) {
1971                 if (in_crit)
1972                         nbl_end_crit(tvp);
1973                 VN_RELE(tvp);
1974         }
1975         if (dvp != NULL)
1976                 VN_RELE(dvp);
1977         vattr_to_wcc_data(dbvap, davap, &resp->resfail.dir_wcc);
1978 }
1979 
1980 void *
1981 rfs3_create_getfh(CREATE3args *args)
1982 {
1983 
1984         return (&args->where.dir);
1985 }
1986 
1987 void
1988 rfs3_mkdir(MKDIR3args *args, MKDIR3res *resp, struct exportinfo *exi,
1989         struct svc_req *req, cred_t *cr)
1990 {
1991         int error;
1992         vnode_t *vp = NULL;
1993         vnode_t *dvp;
1994         struct vattr *vap;
1995         struct vattr va;
1996         struct vattr *dbvap;
1997         struct vattr dbva;
1998         struct vattr *davap;
1999         struct vattr dava;
2000         struct sockaddr *ca;
2001         char *name = NULL;
2002 
2003         dbvap = NULL;
2004         davap = NULL;
2005 
2006         dvp = nfs3_fhtovp(&args->where.dir, exi);
2007 
2008         DTRACE_NFSV3_4(op__mkdir__start, struct svc_req *, req,
2009             cred_t *, cr, vnode_t *, dvp, MKDIR3args *, args);
2010 
2011         if (dvp == NULL) {
2012                 error = ESTALE;
2013                 goto out;
2014         }
2015 
2016 #ifdef DEBUG
2017         if (rfs3_do_pre_op_attr) {
2018                 dbva.va_mask = AT_ALL;
2019                 dbvap = VOP_GETATTR(dvp, &dbva, 0, cr, NULL) ? NULL : &dbva;
2020         } else
2021                 dbvap = NULL;
2022 #else
2023         dbva.va_mask = AT_ALL;
2024         dbvap = VOP_GETATTR(dvp, &dbva, 0, cr, NULL) ? NULL : &dbva;
2025 #endif
2026         davap = dbvap;
2027 
2028         if (args->where.name == nfs3nametoolong) {
2029                 resp->status = NFS3ERR_NAMETOOLONG;
2030                 goto out1;
2031         }
2032 
2033         if (args->where.name == NULL || *(args->where.name) == '\0') {
2034                 resp->status = NFS3ERR_ACCES;
2035                 goto out1;
2036         }
2037 
2038         if (rdonly(exi, req)) {
2039                 resp->status = NFS3ERR_ROFS;
2040                 goto out1;
2041         }
2042 
2043         if (is_system_labeled()) {
2044                 bslabel_t *clabel = req->rq_label;
2045 
2046                 ASSERT(clabel != NULL);
2047                 DTRACE_PROBE2(tx__rfs3__log__info__opmkdir__clabel, char *,
2048                     "got client label from request(1)", struct svc_req *, req);
2049 
2050                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
2051                         if (!do_rfs_label_check(clabel, dvp, EQUALITY_CHECK,
2052                             exi)) {
2053                                 resp->status = NFS3ERR_ACCES;
2054                                 goto out1;
2055                         }
2056                 }
2057         }
2058 
2059         error = sattr3_to_vattr(&args->attributes, &va);
2060         if (error)
2061                 goto out;
2062 
2063         if (!(va.va_mask & AT_MODE)) {
2064                 resp->status = NFS3ERR_INVAL;
2065                 goto out1;
2066         }
2067 
2068         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2069         name = nfscmd_convname(ca, exi, args->where.name,
2070             NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
2071 
2072         if (name == NULL) {
2073                 resp->status = NFS3ERR_INVAL;
2074                 goto out1;
2075         }
2076 
2077         va.va_mask |= AT_TYPE;
2078         va.va_type = VDIR;
2079 
2080         error = VOP_MKDIR(dvp, name, &va, &vp, cr, NULL, 0, NULL);
2081 
2082         if (name != args->where.name)
2083                 kmem_free(name, MAXPATHLEN + 1);
2084 
2085 #ifdef DEBUG
2086         if (rfs3_do_post_op_attr) {
2087                 dava.va_mask = AT_ALL;
2088                 davap = VOP_GETATTR(dvp, &dava, 0, cr, NULL) ? NULL : &dava;
2089         } else
2090                 davap = NULL;
2091 #else
2092         dava.va_mask = AT_ALL;
2093         davap = VOP_GETATTR(dvp, &dava, 0, cr, NULL) ? NULL : &dava;
2094 #endif
2095 
2096         /*
2097          * Force modified data and metadata out to stable storage.
2098          */
2099         (void) VOP_FSYNC(dvp, 0, cr, NULL);
2100 
2101         if (error)
2102                 goto out;
2103 
2104 #ifdef DEBUG
2105         if (!rfs3_do_post_op_fh3)
2106                 resp->resok.obj.handle_follows = FALSE;
2107         else {
2108 #endif
2109         error = makefh3(&resp->resok.obj.handle, vp, exi);
2110         if (error)
2111                 resp->resok.obj.handle_follows = FALSE;
2112         else
2113                 resp->resok.obj.handle_follows = TRUE;
2114 #ifdef DEBUG
2115         }
2116 #endif
2117 
2118 #ifdef DEBUG
2119         if (rfs3_do_post_op_attr) {
2120                 va.va_mask = AT_ALL;
2121                 vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
2122         } else
2123                 vap = NULL;
2124 #else
2125         va.va_mask = AT_ALL;
2126         vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
2127 #endif
2128 
2129         /*
2130          * Force modified data and metadata out to stable storage.
2131          */
2132         (void) VOP_FSYNC(vp, 0, cr, NULL);
2133 
2134         VN_RELE(vp);
2135 
2136         resp->status = NFS3_OK;
2137         vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
2138         vattr_to_wcc_data(dbvap, davap, &resp->resok.dir_wcc);
2139 
2140         DTRACE_NFSV3_4(op__mkdir__done, struct svc_req *, req,
2141             cred_t *, cr, vnode_t *, dvp, MKDIR3res *, resp);
2142         VN_RELE(dvp);
2143 
2144         return;
2145 
2146 out:
2147         if (curthread->t_flag & T_WOULDBLOCK) {
2148                 curthread->t_flag &= ~T_WOULDBLOCK;
2149                 resp->status = NFS3ERR_JUKEBOX;
2150         } else
2151                 resp->status = puterrno3(error);
2152 out1:
2153         DTRACE_NFSV3_4(op__mkdir__done, struct svc_req *, req,
2154             cred_t *, cr, vnode_t *, dvp, MKDIR3res *, resp);
2155         if (dvp != NULL)
2156                 VN_RELE(dvp);
2157         vattr_to_wcc_data(dbvap, davap, &resp->resfail.dir_wcc);
2158 }
2159 
2160 void *
2161 rfs3_mkdir_getfh(MKDIR3args *args)
2162 {
2163 
2164         return (&args->where.dir);
2165 }
2166 
2167 void
2168 rfs3_symlink(SYMLINK3args *args, SYMLINK3res *resp, struct exportinfo *exi,
2169         struct svc_req *req, cred_t *cr)
2170 {
2171         int error;
2172         vnode_t *vp;
2173         vnode_t *dvp;
2174         struct vattr *vap;
2175         struct vattr va;
2176         struct vattr *dbvap;
2177         struct vattr dbva;
2178         struct vattr *davap;
2179         struct vattr dava;
2180         struct sockaddr *ca;
2181         char *name = NULL;
2182         char *symdata = NULL;
2183 
2184         dbvap = NULL;
2185         davap = NULL;
2186 
2187         dvp = nfs3_fhtovp(&args->where.dir, exi);
2188 
2189         DTRACE_NFSV3_4(op__symlink__start, struct svc_req *, req,
2190             cred_t *, cr, vnode_t *, dvp, SYMLINK3args *, args);
2191 
2192         if (dvp == NULL) {
2193                 error = ESTALE;
2194                 goto err;
2195         }
2196 
2197 #ifdef DEBUG
2198         if (rfs3_do_pre_op_attr) {
2199                 dbva.va_mask = AT_ALL;
2200                 dbvap = VOP_GETATTR(dvp, &dbva, 0, cr, NULL) ? NULL : &dbva;
2201         } else
2202                 dbvap = NULL;
2203 #else
2204         dbva.va_mask = AT_ALL;
2205         dbvap = VOP_GETATTR(dvp, &dbva, 0, cr, NULL) ? NULL : &dbva;
2206 #endif
2207         davap = dbvap;
2208 
2209         if (args->where.name == nfs3nametoolong) {
2210                 resp->status = NFS3ERR_NAMETOOLONG;
2211                 goto err1;
2212         }
2213 
2214         if (args->where.name == NULL || *(args->where.name) == '\0') {
2215                 resp->status = NFS3ERR_ACCES;
2216                 goto err1;
2217         }
2218 
2219         if (rdonly(exi, req)) {
2220                 resp->status = NFS3ERR_ROFS;
2221                 goto err1;
2222         }
2223 
2224         if (is_system_labeled()) {
2225                 bslabel_t *clabel = req->rq_label;
2226 
2227                 ASSERT(clabel != NULL);
2228                 DTRACE_PROBE2(tx__rfs3__log__info__opsymlink__clabel, char *,
2229                     "got client label from request(1)", struct svc_req *, req);
2230 
2231                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
2232                         if (!do_rfs_label_check(clabel, dvp, EQUALITY_CHECK,
2233                             exi)) {
2234                                 resp->status = NFS3ERR_ACCES;
2235                                 goto err1;
2236                         }
2237                 }
2238         }
2239 
2240         error = sattr3_to_vattr(&args->symlink.symlink_attributes, &va);
2241         if (error)
2242                 goto err;
2243 
2244         if (!(va.va_mask & AT_MODE)) {
2245                 resp->status = NFS3ERR_INVAL;
2246                 goto err1;
2247         }
2248 
2249         if (args->symlink.symlink_data == nfs3nametoolong) {
2250                 resp->status = NFS3ERR_NAMETOOLONG;
2251                 goto err1;
2252         }
2253 
2254         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2255         name = nfscmd_convname(ca, exi, args->where.name,
2256             NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
2257 
2258         if (name == NULL) {
2259                 /* This is really a Solaris EILSEQ */
2260                 resp->status = NFS3ERR_INVAL;
2261                 goto err1;
2262         }
2263 
2264         symdata = nfscmd_convname(ca, exi, args->symlink.symlink_data,
2265             NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
2266         if (symdata == NULL) {
2267                 /* This is really a Solaris EILSEQ */
2268                 resp->status = NFS3ERR_INVAL;
2269                 goto err1;
2270         }
2271 
2272 
2273         va.va_mask |= AT_TYPE;
2274         va.va_type = VLNK;
2275 
2276         error = VOP_SYMLINK(dvp, name, &va, symdata, cr, NULL, 0);
2277 
2278 #ifdef DEBUG
2279         if (rfs3_do_post_op_attr) {
2280                 dava.va_mask = AT_ALL;
2281                 davap = VOP_GETATTR(dvp, &dava, 0, cr, NULL) ? NULL : &dava;
2282         } else
2283                 davap = NULL;
2284 #else
2285         dava.va_mask = AT_ALL;
2286         davap = VOP_GETATTR(dvp, &dava, 0, cr, NULL) ? NULL : &dava;
2287 #endif
2288 
2289         if (error)
2290                 goto err;
2291 
2292         error = VOP_LOOKUP(dvp, name, &vp, NULL, 0, NULL, cr,
2293             NULL, NULL, NULL);
2294 
2295         /*
2296          * Force modified data and metadata out to stable storage.
2297          */
2298         (void) VOP_FSYNC(dvp, 0, cr, NULL);
2299 
2300 
2301         resp->status = NFS3_OK;
2302         if (error) {
2303                 resp->resok.obj.handle_follows = FALSE;
2304                 vattr_to_post_op_attr(NULL, &resp->resok.obj_attributes);
2305                 vattr_to_wcc_data(dbvap, davap, &resp->resok.dir_wcc);
2306                 goto out;
2307         }
2308 
2309 #ifdef DEBUG
2310         if (!rfs3_do_post_op_fh3)
2311                 resp->resok.obj.handle_follows = FALSE;
2312         else {
2313 #endif
2314         error = makefh3(&resp->resok.obj.handle, vp, exi);
2315         if (error)
2316                 resp->resok.obj.handle_follows = FALSE;
2317         else
2318                 resp->resok.obj.handle_follows = TRUE;
2319 #ifdef DEBUG
2320         }
2321 #endif
2322 
2323 #ifdef DEBUG
2324         if (rfs3_do_post_op_attr) {
2325                 va.va_mask = AT_ALL;
2326                 vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
2327         } else
2328                 vap = NULL;
2329 #else
2330         va.va_mask = AT_ALL;
2331         vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
2332 #endif
2333 
2334         /*
2335          * Force modified data and metadata out to stable storage.
2336          */
2337         (void) VOP_FSYNC(vp, 0, cr, NULL);
2338 
2339         VN_RELE(vp);
2340 
2341         vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
2342         vattr_to_wcc_data(dbvap, davap, &resp->resok.dir_wcc);
2343         goto out;
2344 
2345 err:
2346         if (curthread->t_flag & T_WOULDBLOCK) {
2347                 curthread->t_flag &= ~T_WOULDBLOCK;
2348                 resp->status = NFS3ERR_JUKEBOX;
2349         } else
2350                 resp->status = puterrno3(error);
2351 err1:
2352         vattr_to_wcc_data(dbvap, davap, &resp->resfail.dir_wcc);
2353 out:
2354         if (name != NULL && name != args->where.name)
2355                 kmem_free(name, MAXPATHLEN + 1);
2356         if (symdata != NULL && symdata != args->symlink.symlink_data)
2357                 kmem_free(symdata, MAXPATHLEN + 1);
2358 
2359         DTRACE_NFSV3_4(op__symlink__done, struct svc_req *, req,
2360             cred_t *, cr, vnode_t *, dvp, SYMLINK3res *, resp);
2361 
2362         if (dvp != NULL)
2363                 VN_RELE(dvp);
2364 }
2365 
2366 void *
2367 rfs3_symlink_getfh(SYMLINK3args *args)
2368 {
2369 
2370         return (&args->where.dir);
2371 }
2372 
2373 void
2374 rfs3_mknod(MKNOD3args *args, MKNOD3res *resp, struct exportinfo *exi,
2375         struct svc_req *req, cred_t *cr)
2376 {
2377         int error;
2378         vnode_t *vp;
2379         vnode_t *realvp;
2380         vnode_t *dvp;
2381         struct vattr *vap;
2382         struct vattr va;
2383         struct vattr *dbvap;
2384         struct vattr dbva;
2385         struct vattr *davap;
2386         struct vattr dava;
2387         int mode;
2388         enum vcexcl excl;
2389         struct sockaddr *ca;
2390         char *name = NULL;
2391 
2392         dbvap = NULL;
2393         davap = NULL;
2394 
2395         dvp = nfs3_fhtovp(&args->where.dir, exi);
2396 
2397         DTRACE_NFSV3_4(op__mknod__start, struct svc_req *, req,
2398             cred_t *, cr, vnode_t *, dvp, MKNOD3args *, args);
2399 
2400         if (dvp == NULL) {
2401                 error = ESTALE;
2402                 goto out;
2403         }
2404 
2405 #ifdef DEBUG
2406         if (rfs3_do_pre_op_attr) {
2407                 dbva.va_mask = AT_ALL;
2408                 dbvap = VOP_GETATTR(dvp, &dbva, 0, cr, NULL) ? NULL : &dbva;
2409         } else
2410                 dbvap = NULL;
2411 #else
2412         dbva.va_mask = AT_ALL;
2413         dbvap = VOP_GETATTR(dvp, &dbva, 0, cr, NULL) ? NULL : &dbva;
2414 #endif
2415         davap = dbvap;
2416 
2417         if (args->where.name == nfs3nametoolong) {
2418                 resp->status = NFS3ERR_NAMETOOLONG;
2419                 goto out1;
2420         }
2421 
2422         if (args->where.name == NULL || *(args->where.name) == '\0') {
2423                 resp->status = NFS3ERR_ACCES;
2424                 goto out1;
2425         }
2426 
2427         if (rdonly(exi, req)) {
2428                 resp->status = NFS3ERR_ROFS;
2429                 goto out1;
2430         }
2431 
2432         if (is_system_labeled()) {
2433                 bslabel_t *clabel = req->rq_label;
2434 
2435                 ASSERT(clabel != NULL);
2436                 DTRACE_PROBE2(tx__rfs3__log__info__opmknod__clabel, char *,
2437                     "got client label from request(1)", struct svc_req *, req);
2438 
2439                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
2440                         if (!do_rfs_label_check(clabel, dvp, EQUALITY_CHECK,
2441                             exi)) {
2442                                 resp->status = NFS3ERR_ACCES;
2443                                 goto out1;
2444                         }
2445                 }
2446         }
2447 
2448         switch (args->what.type) {
2449         case NF3CHR:
2450         case NF3BLK:
2451                 error = sattr3_to_vattr(
2452                     &args->what.mknoddata3_u.device.dev_attributes, &va);
2453                 if (error)
2454                         goto out;
2455                 if (secpolicy_sys_devices(cr) != 0) {
2456                         resp->status = NFS3ERR_PERM;
2457                         goto out1;
2458                 }
2459                 if (args->what.type == NF3CHR)
2460                         va.va_type = VCHR;
2461                 else
2462                         va.va_type = VBLK;
2463                 va.va_rdev = makedevice(
2464                     args->what.mknoddata3_u.device.spec.specdata1,
2465                     args->what.mknoddata3_u.device.spec.specdata2);
2466                 va.va_mask |= AT_TYPE | AT_RDEV;
2467                 break;
2468         case NF3SOCK:
2469                 error = sattr3_to_vattr(
2470                     &args->what.mknoddata3_u.pipe_attributes, &va);
2471                 if (error)
2472                         goto out;
2473                 va.va_type = VSOCK;
2474                 va.va_mask |= AT_TYPE;
2475                 break;
2476         case NF3FIFO:
2477                 error = sattr3_to_vattr(
2478                     &args->what.mknoddata3_u.pipe_attributes, &va);
2479                 if (error)
2480                         goto out;
2481                 va.va_type = VFIFO;
2482                 va.va_mask |= AT_TYPE;
2483                 break;
2484         default:
2485                 resp->status = NFS3ERR_BADTYPE;
2486                 goto out1;
2487         }
2488 
2489         /*
2490          * Must specify the mode.
2491          */
2492         if (!(va.va_mask & AT_MODE)) {
2493                 resp->status = NFS3ERR_INVAL;
2494                 goto out1;
2495         }
2496 
2497         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2498         name = nfscmd_convname(ca, exi, args->where.name,
2499             NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
2500 
2501         if (name == NULL) {
2502                 resp->status = NFS3ERR_INVAL;
2503                 goto out1;
2504         }
2505 
2506         excl = EXCL;
2507 
2508         mode = 0;
2509 
2510         error = VOP_CREATE(dvp, name, &va, excl, mode,
2511             &vp, cr, 0, NULL, NULL);
2512 
2513         if (name != args->where.name)
2514                 kmem_free(name, MAXPATHLEN + 1);
2515 
2516 #ifdef DEBUG
2517         if (rfs3_do_post_op_attr) {
2518                 dava.va_mask = AT_ALL;
2519                 davap = VOP_GETATTR(dvp, &dava, 0, cr, NULL) ? NULL : &dava;
2520         } else
2521                 davap = NULL;
2522 #else
2523         dava.va_mask = AT_ALL;
2524         davap = VOP_GETATTR(dvp, &dava, 0, cr, NULL) ? NULL : &dava;
2525 #endif
2526 
2527         /*
2528          * Force modified data and metadata out to stable storage.
2529          */
2530         (void) VOP_FSYNC(dvp, 0, cr, NULL);
2531 
2532         if (error)
2533                 goto out;
2534 
2535         resp->status = NFS3_OK;
2536 
2537 #ifdef DEBUG
2538         if (!rfs3_do_post_op_fh3)
2539                 resp->resok.obj.handle_follows = FALSE;
2540         else {
2541 #endif
2542         error = makefh3(&resp->resok.obj.handle, vp, exi);
2543         if (error)
2544                 resp->resok.obj.handle_follows = FALSE;
2545         else
2546                 resp->resok.obj.handle_follows = TRUE;
2547 #ifdef DEBUG
2548         }
2549 #endif
2550 
2551 #ifdef DEBUG
2552         if (rfs3_do_post_op_attr) {
2553                 va.va_mask = AT_ALL;
2554                 vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
2555         } else
2556                 vap = NULL;
2557 #else
2558         va.va_mask = AT_ALL;
2559         vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
2560 #endif
2561 
2562         /*
2563          * Force modified metadata out to stable storage.
2564          *
2565          * if a underlying vp exists, pass it to VOP_FSYNC
2566          */
2567         if (VOP_REALVP(vp, &realvp, NULL) == 0)
2568                 (void) VOP_FSYNC(realvp, FNODSYNC, cr, NULL);
2569         else
2570                 (void) VOP_FSYNC(vp, FNODSYNC, cr, NULL);
2571 
2572         VN_RELE(vp);
2573 
2574         vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
2575         vattr_to_wcc_data(dbvap, davap, &resp->resok.dir_wcc);
2576         DTRACE_NFSV3_4(op__mknod__done, struct svc_req *, req,
2577             cred_t *, cr, vnode_t *, dvp, MKNOD3res *, resp);
2578         VN_RELE(dvp);
2579         return;
2580 
2581 out:
2582         if (curthread->t_flag & T_WOULDBLOCK) {
2583                 curthread->t_flag &= ~T_WOULDBLOCK;
2584                 resp->status = NFS3ERR_JUKEBOX;
2585         } else
2586                 resp->status = puterrno3(error);
2587 out1:
2588         DTRACE_NFSV3_4(op__mknod__done, struct svc_req *, req,
2589             cred_t *, cr, vnode_t *, dvp, MKNOD3res *, resp);
2590         if (dvp != NULL)
2591                 VN_RELE(dvp);
2592         vattr_to_wcc_data(dbvap, davap, &resp->resfail.dir_wcc);
2593 }
2594 
2595 void *
2596 rfs3_mknod_getfh(MKNOD3args *args)
2597 {
2598 
2599         return (&args->where.dir);
2600 }
2601 
2602 void
2603 rfs3_remove(REMOVE3args *args, REMOVE3res *resp, struct exportinfo *exi,
2604         struct svc_req *req, cred_t *cr)
2605 {
2606         int error = 0;
2607         vnode_t *vp;
2608         struct vattr *bvap;
2609         struct vattr bva;
2610         struct vattr *avap;
2611         struct vattr ava;
2612         vnode_t *targvp = NULL;
2613         struct sockaddr *ca;
2614         char *name = NULL;
2615 
2616         bvap = NULL;
2617         avap = NULL;
2618 
2619         vp = nfs3_fhtovp(&args->object.dir, exi);
2620 
2621         DTRACE_NFSV3_4(op__remove__start, struct svc_req *, req,
2622             cred_t *, cr, vnode_t *, vp, REMOVE3args *, args);
2623 
2624         if (vp == NULL) {
2625                 error = ESTALE;
2626                 goto err;
2627         }
2628 
2629 #ifdef DEBUG
2630         if (rfs3_do_pre_op_attr) {
2631                 bva.va_mask = AT_ALL;
2632                 bvap = VOP_GETATTR(vp, &bva, 0, cr, NULL) ? NULL : &bva;
2633         } else
2634                 bvap = NULL;
2635 #else
2636         bva.va_mask = AT_ALL;
2637         bvap = VOP_GETATTR(vp, &bva, 0, cr, NULL) ? NULL : &bva;
2638 #endif
2639         avap = bvap;
2640 
2641         if (vp->v_type != VDIR) {
2642                 resp->status = NFS3ERR_NOTDIR;
2643                 goto err1;
2644         }
2645 
2646         if (args->object.name == nfs3nametoolong) {
2647                 resp->status = NFS3ERR_NAMETOOLONG;
2648                 goto err1;
2649         }
2650 
2651         if (args->object.name == NULL || *(args->object.name) == '\0') {
2652                 resp->status = NFS3ERR_ACCES;
2653                 goto err1;
2654         }
2655 
2656         if (rdonly(exi, req)) {
2657                 resp->status = NFS3ERR_ROFS;
2658                 goto err1;
2659         }
2660 
2661         if (is_system_labeled()) {
2662                 bslabel_t *clabel = req->rq_label;
2663 
2664                 ASSERT(clabel != NULL);
2665                 DTRACE_PROBE2(tx__rfs3__log__info__opremove__clabel, char *,
2666                     "got client label from request(1)", struct svc_req *, req);
2667 
2668                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
2669                         if (!do_rfs_label_check(clabel, vp, EQUALITY_CHECK,
2670                             exi)) {
2671                                 resp->status = NFS3ERR_ACCES;
2672                                 goto err1;
2673                         }
2674                 }
2675         }
2676 
2677         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2678         name = nfscmd_convname(ca, exi, args->object.name,
2679             NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
2680 
2681         if (name == NULL) {
2682                 resp->status = NFS3ERR_INVAL;
2683                 goto err1;
2684         }
2685 
2686         /*
2687          * Check for a conflict with a non-blocking mandatory share
2688          * reservation and V4 delegations
2689          */
2690         error = VOP_LOOKUP(vp, name, &targvp, NULL, 0,
2691             NULL, cr, NULL, NULL, NULL);
2692         if (error != 0)
2693                 goto err;
2694 
2695         if (rfs4_check_delegated(FWRITE, targvp, TRUE)) {
2696                 resp->status = NFS3ERR_JUKEBOX;
2697                 goto err1;
2698         }
2699 
2700         if (!nbl_need_check(targvp)) {
2701                 error = VOP_REMOVE(vp, name, cr, NULL, 0);
2702         } else {
2703                 nbl_start_crit(targvp, RW_READER);
2704                 if (nbl_conflict(targvp, NBL_REMOVE, 0, 0, 0, NULL)) {
2705                         error = EACCES;
2706                 } else {
2707                         error = VOP_REMOVE(vp, name, cr, NULL, 0);
2708                 }
2709                 nbl_end_crit(targvp);
2710         }
2711         VN_RELE(targvp);
2712         targvp = NULL;
2713 
2714 #ifdef DEBUG
2715         if (rfs3_do_post_op_attr) {
2716                 ava.va_mask = AT_ALL;
2717                 avap = VOP_GETATTR(vp, &ava, 0, cr, NULL) ? NULL : &ava;
2718         } else
2719                 avap = NULL;
2720 #else
2721         ava.va_mask = AT_ALL;
2722         avap = VOP_GETATTR(vp, &ava, 0, cr, NULL) ? NULL : &ava;
2723 #endif
2724 
2725         /*
2726          * Force modified data and metadata out to stable storage.
2727          */
2728         (void) VOP_FSYNC(vp, 0, cr, NULL);
2729 
2730         if (error)
2731                 goto err;
2732 
2733         resp->status = NFS3_OK;
2734         vattr_to_wcc_data(bvap, avap, &resp->resok.dir_wcc);
2735         goto out;
2736 
2737 err:
2738         if (curthread->t_flag & T_WOULDBLOCK) {
2739                 curthread->t_flag &= ~T_WOULDBLOCK;
2740                 resp->status = NFS3ERR_JUKEBOX;
2741         } else
2742                 resp->status = puterrno3(error);
2743 err1:
2744         vattr_to_wcc_data(bvap, avap, &resp->resfail.dir_wcc);
2745 out:
2746         DTRACE_NFSV3_4(op__remove__done, struct svc_req *, req,
2747             cred_t *, cr, vnode_t *, vp, REMOVE3res *, resp);
2748 
2749         if (name != NULL && name != args->object.name)
2750                 kmem_free(name, MAXPATHLEN + 1);
2751 
2752         if (vp != NULL)
2753                 VN_RELE(vp);
2754 }
2755 
2756 void *
2757 rfs3_remove_getfh(REMOVE3args *args)
2758 {
2759 
2760         return (&args->object.dir);
2761 }
2762 
2763 void
2764 rfs3_rmdir(RMDIR3args *args, RMDIR3res *resp, struct exportinfo *exi,
2765         struct svc_req *req, cred_t *cr)
2766 {
2767         int error;
2768         vnode_t *vp;
2769         struct vattr *bvap;
2770         struct vattr bva;
2771         struct vattr *avap;
2772         struct vattr ava;
2773         struct sockaddr *ca;
2774         char *name = NULL;
2775 
2776         bvap = NULL;
2777         avap = NULL;
2778 
2779         vp = nfs3_fhtovp(&args->object.dir, exi);
2780 
2781         DTRACE_NFSV3_4(op__rmdir__start, struct svc_req *, req,
2782             cred_t *, cr, vnode_t *, vp, RMDIR3args *, args);
2783 
2784         if (vp == NULL) {
2785                 error = ESTALE;
2786                 goto err;
2787         }
2788 
2789 #ifdef DEBUG
2790         if (rfs3_do_pre_op_attr) {
2791                 bva.va_mask = AT_ALL;
2792                 bvap = VOP_GETATTR(vp, &bva, 0, cr, NULL) ? NULL : &bva;
2793         } else
2794                 bvap = NULL;
2795 #else
2796         bva.va_mask = AT_ALL;
2797         bvap = VOP_GETATTR(vp, &bva, 0, cr, NULL) ? NULL : &bva;
2798 #endif
2799         avap = bvap;
2800 
2801         if (vp->v_type != VDIR) {
2802                 resp->status = NFS3ERR_NOTDIR;
2803                 goto err1;
2804         }
2805 
2806         if (args->object.name == nfs3nametoolong) {
2807                 resp->status = NFS3ERR_NAMETOOLONG;
2808                 goto err1;
2809         }
2810 
2811         if (args->object.name == NULL || *(args->object.name) == '\0') {
2812                 resp->status = NFS3ERR_ACCES;
2813                 goto err1;
2814         }
2815 
2816         if (rdonly(exi, req)) {
2817                 resp->status = NFS3ERR_ROFS;
2818                 goto err1;
2819         }
2820 
2821         if (is_system_labeled()) {
2822                 bslabel_t *clabel = req->rq_label;
2823 
2824                 ASSERT(clabel != NULL);
2825                 DTRACE_PROBE2(tx__rfs3__log__info__opremovedir__clabel, char *,
2826                     "got client label from request(1)", struct svc_req *, req);
2827 
2828                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
2829                         if (!do_rfs_label_check(clabel, vp, EQUALITY_CHECK,
2830                             exi)) {
2831                                 resp->status = NFS3ERR_ACCES;
2832                                 goto err1;
2833                         }
2834                 }
2835         }
2836 
2837         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2838         name = nfscmd_convname(ca, exi, args->object.name,
2839             NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
2840 
2841         if (name == NULL) {
2842                 resp->status = NFS3ERR_INVAL;
2843                 goto err1;
2844         }
2845 
2846         error = VOP_RMDIR(vp, name, rootdir, cr, NULL, 0);
2847 
2848         if (name != args->object.name)
2849                 kmem_free(name, MAXPATHLEN + 1);
2850 
2851 #ifdef DEBUG
2852         if (rfs3_do_post_op_attr) {
2853                 ava.va_mask = AT_ALL;
2854                 avap = VOP_GETATTR(vp, &ava, 0, cr, NULL) ? NULL : &ava;
2855         } else
2856                 avap = NULL;
2857 #else
2858         ava.va_mask = AT_ALL;
2859         avap = VOP_GETATTR(vp, &ava, 0, cr, NULL) ? NULL : &ava;
2860 #endif
2861 
2862         /*
2863          * Force modified data and metadata out to stable storage.
2864          */
2865         (void) VOP_FSYNC(vp, 0, cr, NULL);
2866 
2867         if (error) {
2868                 /*
2869                  * System V defines rmdir to return EEXIST, not ENOTEMPTY,
2870                  * if the directory is not empty.  A System V NFS server
2871                  * needs to map NFS3ERR_EXIST to NFS3ERR_NOTEMPTY to transmit
2872                  * over the wire.
2873                  */
2874                 if (error == EEXIST)
2875                         error = ENOTEMPTY;
2876                 goto err;
2877         }
2878 
2879         resp->status = NFS3_OK;
2880         vattr_to_wcc_data(bvap, avap, &resp->resok.dir_wcc);
2881         goto out;
2882 
2883 err:
2884         if (curthread->t_flag & T_WOULDBLOCK) {
2885                 curthread->t_flag &= ~T_WOULDBLOCK;
2886                 resp->status = NFS3ERR_JUKEBOX;
2887         } else
2888                 resp->status = puterrno3(error);
2889 err1:
2890         vattr_to_wcc_data(bvap, avap, &resp->resfail.dir_wcc);
2891 out:
2892         DTRACE_NFSV3_4(op__rmdir__done, struct svc_req *, req,
2893             cred_t *, cr, vnode_t *, vp, RMDIR3res *, resp);
2894         if (vp != NULL)
2895                 VN_RELE(vp);
2896 
2897 }
2898 
2899 void *
2900 rfs3_rmdir_getfh(RMDIR3args *args)
2901 {
2902 
2903         return (&args->object.dir);
2904 }
2905 
2906 void
2907 rfs3_rename(RENAME3args *args, RENAME3res *resp, struct exportinfo *exi,
2908         struct svc_req *req, cred_t *cr)
2909 {
2910         int error = 0;
2911         vnode_t *fvp;
2912         vnode_t *tvp;
2913         vnode_t *targvp;
2914         struct vattr *fbvap;
2915         struct vattr fbva;
2916         struct vattr *favap;
2917         struct vattr fava;
2918         struct vattr *tbvap;
2919         struct vattr tbva;
2920         struct vattr *tavap;
2921         struct vattr tava;
2922         nfs_fh3 *fh3;
2923         struct exportinfo *to_exi;
2924         vnode_t *srcvp = NULL;
2925         bslabel_t *clabel;
2926         struct sockaddr *ca;
2927         char *name = NULL;
2928         char *toname = NULL;
2929 
2930         fbvap = NULL;
2931         favap = NULL;
2932         tbvap = NULL;
2933         tavap = NULL;
2934         tvp = NULL;
2935 
2936         fvp = nfs3_fhtovp(&args->from.dir, exi);
2937 
2938         DTRACE_NFSV3_4(op__rename__start, struct svc_req *, req,
2939             cred_t *, cr, vnode_t *, fvp, RENAME3args *, args);
2940 
2941         if (fvp == NULL) {
2942                 error = ESTALE;
2943                 goto err;
2944         }
2945 
2946         if (is_system_labeled()) {
2947                 clabel = req->rq_label;
2948                 ASSERT(clabel != NULL);
2949                 DTRACE_PROBE2(tx__rfs3__log__info__oprename__clabel, char *,
2950                     "got client label from request(1)", struct svc_req *, req);
2951 
2952                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
2953                         if (!do_rfs_label_check(clabel, fvp, EQUALITY_CHECK,
2954                             exi)) {
2955                                 resp->status = NFS3ERR_ACCES;
2956                                 goto err1;
2957                         }
2958                 }
2959         }
2960 
2961 #ifdef DEBUG
2962         if (rfs3_do_pre_op_attr) {
2963                 fbva.va_mask = AT_ALL;
2964                 fbvap = VOP_GETATTR(fvp, &fbva, 0, cr, NULL) ? NULL : &fbva;
2965         } else
2966                 fbvap = NULL;
2967 #else
2968         fbva.va_mask = AT_ALL;
2969         fbvap = VOP_GETATTR(fvp, &fbva, 0, cr, NULL) ? NULL : &fbva;
2970 #endif
2971         favap = fbvap;
2972 
2973         fh3 = &args->to.dir;
2974         to_exi = checkexport(&fh3->fh3_fsid, FH3TOXFIDP(fh3));
2975         if (to_exi == NULL) {
2976                 resp->status = NFS3ERR_ACCES;
2977                 goto err1;
2978         }
2979         exi_rele(to_exi);
2980 
2981         if (to_exi != exi) {
2982                 resp->status = NFS3ERR_XDEV;
2983                 goto err1;
2984         }
2985 
2986         tvp = nfs3_fhtovp(&args->to.dir, exi);
2987         if (tvp == NULL) {
2988                 error = ESTALE;
2989                 goto err;
2990         }
2991 
2992 #ifdef DEBUG
2993         if (rfs3_do_pre_op_attr) {
2994                 tbva.va_mask = AT_ALL;
2995                 tbvap = VOP_GETATTR(tvp, &tbva, 0, cr, NULL) ? NULL : &tbva;
2996         } else
2997                 tbvap = NULL;
2998 #else
2999         tbva.va_mask = AT_ALL;
3000         tbvap = VOP_GETATTR(tvp, &tbva, 0, cr, NULL) ? NULL : &tbva;
3001 #endif
3002         tavap = tbvap;
3003 
3004         if (fvp->v_type != VDIR || tvp->v_type != VDIR) {
3005                 resp->status = NFS3ERR_NOTDIR;
3006                 goto err1;
3007         }
3008 
3009         if (args->from.name == nfs3nametoolong ||
3010             args->to.name == nfs3nametoolong) {
3011                 resp->status = NFS3ERR_NAMETOOLONG;
3012                 goto err1;
3013         }
3014         if (args->from.name == NULL || *(args->from.name) == '\0' ||
3015             args->to.name == NULL || *(args->to.name) == '\0') {
3016                 resp->status = NFS3ERR_ACCES;
3017                 goto err1;
3018         }
3019 
3020         if (rdonly(exi, req)) {
3021                 resp->status = NFS3ERR_ROFS;
3022                 goto err1;
3023         }
3024 
3025         if (is_system_labeled()) {
3026                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
3027                         if (!do_rfs_label_check(clabel, tvp, EQUALITY_CHECK,
3028                             exi)) {
3029                                 resp->status = NFS3ERR_ACCES;
3030                                 goto err1;
3031                         }
3032                 }
3033         }
3034 
3035         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
3036         name = nfscmd_convname(ca, exi, args->from.name,
3037             NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
3038 
3039         if (name == NULL) {
3040                 resp->status = NFS3ERR_INVAL;
3041                 goto err1;
3042         }
3043 
3044         toname = nfscmd_convname(ca, exi, args->to.name,
3045             NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
3046 
3047         if (toname == NULL) {
3048                 resp->status = NFS3ERR_INVAL;
3049                 goto err1;
3050         }
3051 
3052         /*
3053          * Check for a conflict with a non-blocking mandatory share
3054          * reservation or V4 delegations.
3055          */
3056         error = VOP_LOOKUP(fvp, name, &srcvp, NULL, 0,
3057             NULL, cr, NULL, NULL, NULL);
3058         if (error != 0)
3059                 goto err;
3060 
3061         /*
3062          * If we rename a delegated file we should recall the
3063          * delegation, since future opens should fail or would
3064          * refer to a new file.
3065          */
3066         if (rfs4_check_delegated(FWRITE, srcvp, FALSE)) {
3067                 resp->status = NFS3ERR_JUKEBOX;
3068                 goto err1;
3069         }
3070 
3071         /*
3072          * Check for renaming over a delegated file.  Check rfs4_deleg_policy
3073          * first to avoid VOP_LOOKUP if possible.
3074          */
3075         if (rfs4_deleg_policy != SRV_NEVER_DELEGATE &&
3076             VOP_LOOKUP(tvp, toname, &targvp, NULL, 0, NULL, cr,
3077             NULL, NULL, NULL) == 0) {
3078 
3079                 if (rfs4_check_delegated(FWRITE, targvp, TRUE)) {
3080                         VN_RELE(targvp);
3081                         resp->status = NFS3ERR_JUKEBOX;
3082                         goto err1;
3083                 }
3084                 VN_RELE(targvp);
3085         }
3086 
3087         if (!nbl_need_check(srcvp)) {
3088                 error = VOP_RENAME(fvp, name, tvp, toname, cr, NULL, 0);
3089         } else {
3090                 nbl_start_crit(srcvp, RW_READER);
3091                 if (nbl_conflict(srcvp, NBL_RENAME, 0, 0, 0, NULL))
3092                         error = EACCES;
3093                 else
3094                         error = VOP_RENAME(fvp, name, tvp, toname, cr, NULL, 0);
3095                 nbl_end_crit(srcvp);
3096         }
3097         if (error == 0)
3098                 vn_renamepath(tvp, srcvp, args->to.name,
3099                     strlen(args->to.name));
3100         VN_RELE(srcvp);
3101         srcvp = NULL;
3102 
3103 #ifdef DEBUG
3104         if (rfs3_do_post_op_attr) {
3105                 fava.va_mask = AT_ALL;
3106                 favap = VOP_GETATTR(fvp, &fava, 0, cr, NULL) ? NULL : &fava;
3107                 tava.va_mask = AT_ALL;
3108                 tavap = VOP_GETATTR(tvp, &tava, 0, cr, NULL) ? NULL : &tava;
3109         } else {
3110                 favap = NULL;
3111                 tavap = NULL;
3112         }
3113 #else
3114         fava.va_mask = AT_ALL;
3115         favap = VOP_GETATTR(fvp, &fava, 0, cr, NULL) ? NULL : &fava;
3116         tava.va_mask = AT_ALL;
3117         tavap = VOP_GETATTR(tvp, &tava, 0, cr, NULL) ? NULL : &tava;
3118 #endif
3119 
3120         /*
3121          * Force modified data and metadata out to stable storage.
3122          */
3123         (void) VOP_FSYNC(fvp, 0, cr, NULL);
3124         (void) VOP_FSYNC(tvp, 0, cr, NULL);
3125 
3126         if (error)
3127                 goto err;
3128 
3129         resp->status = NFS3_OK;
3130         vattr_to_wcc_data(fbvap, favap, &resp->resok.fromdir_wcc);
3131         vattr_to_wcc_data(tbvap, tavap, &resp->resok.todir_wcc);
3132         goto out;
3133 
3134 err:
3135         if (curthread->t_flag & T_WOULDBLOCK) {
3136                 curthread->t_flag &= ~T_WOULDBLOCK;
3137                 resp->status = NFS3ERR_JUKEBOX;
3138         } else {
3139                 resp->status = puterrno3(error);
3140         }
3141 err1:
3142         vattr_to_wcc_data(fbvap, favap, &resp->resfail.fromdir_wcc);
3143         vattr_to_wcc_data(tbvap, tavap, &resp->resfail.todir_wcc);
3144 
3145 out:
3146         if (name != NULL && name != args->from.name)
3147                 kmem_free(name, MAXPATHLEN + 1);
3148         if (toname != NULL && toname != args->to.name)
3149                 kmem_free(toname, MAXPATHLEN + 1);
3150 
3151         DTRACE_NFSV3_4(op__rename__done, struct svc_req *, req,
3152             cred_t *, cr, vnode_t *, fvp, RENAME3res *, resp);
3153         if (fvp != NULL)
3154                 VN_RELE(fvp);
3155         if (tvp != NULL)
3156                 VN_RELE(tvp);
3157 }
3158 
3159 void *
3160 rfs3_rename_getfh(RENAME3args *args)
3161 {
3162 
3163         return (&args->from.dir);
3164 }
3165 
3166 void
3167 rfs3_link(LINK3args *args, LINK3res *resp, struct exportinfo *exi,
3168         struct svc_req *req, cred_t *cr)
3169 {
3170         int error;
3171         vnode_t *vp;
3172         vnode_t *dvp;
3173         struct vattr *vap;
3174         struct vattr va;
3175         struct vattr *bvap;
3176         struct vattr bva;
3177         struct vattr *avap;
3178         struct vattr ava;
3179         nfs_fh3 *fh3;
3180         struct exportinfo *to_exi;
3181         bslabel_t *clabel;
3182         struct sockaddr *ca;
3183         char *name = NULL;
3184 
3185         vap = NULL;
3186         bvap = NULL;
3187         avap = NULL;
3188         dvp = NULL;
3189 
3190         vp = nfs3_fhtovp(&args->file, exi);
3191 
3192         DTRACE_NFSV3_4(op__link__start, struct svc_req *, req,
3193             cred_t *, cr, vnode_t *, vp, LINK3args *, args);
3194 
3195         if (vp == NULL) {
3196                 error = ESTALE;
3197                 goto out;
3198         }
3199 
3200 #ifdef DEBUG
3201         if (rfs3_do_pre_op_attr) {
3202                 va.va_mask = AT_ALL;
3203                 vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3204         } else
3205                 vap = NULL;
3206 #else
3207         va.va_mask = AT_ALL;
3208         vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3209 #endif
3210 
3211         fh3 = &args->link.dir;
3212         to_exi = checkexport(&fh3->fh3_fsid, FH3TOXFIDP(fh3));
3213         if (to_exi == NULL) {
3214                 resp->status = NFS3ERR_ACCES;
3215                 goto out1;
3216         }
3217         exi_rele(to_exi);
3218 
3219         if (to_exi != exi) {
3220                 resp->status = NFS3ERR_XDEV;
3221                 goto out1;
3222         }
3223 
3224         if (is_system_labeled()) {
3225                 clabel = req->rq_label;
3226 
3227                 ASSERT(clabel != NULL);
3228                 DTRACE_PROBE2(tx__rfs3__log__info__oplink__clabel, char *,
3229                     "got client label from request(1)", struct svc_req *, req);
3230 
3231                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
3232                         if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
3233                             exi)) {
3234                                 resp->status = NFS3ERR_ACCES;
3235                                 goto out1;
3236                         }
3237                 }
3238         }
3239 
3240         dvp = nfs3_fhtovp(&args->link.dir, exi);
3241         if (dvp == NULL) {
3242                 error = ESTALE;
3243                 goto out;
3244         }
3245 
3246 #ifdef DEBUG
3247         if (rfs3_do_pre_op_attr) {
3248                 bva.va_mask = AT_ALL;
3249                 bvap = VOP_GETATTR(dvp, &bva, 0, cr, NULL) ? NULL : &bva;
3250         } else
3251                 bvap = NULL;
3252 #else
3253         bva.va_mask = AT_ALL;
3254         bvap = VOP_GETATTR(dvp, &bva, 0, cr, NULL) ? NULL : &bva;
3255 #endif
3256 
3257         if (dvp->v_type != VDIR) {
3258                 resp->status = NFS3ERR_NOTDIR;
3259                 goto out1;
3260         }
3261 
3262         if (args->link.name == nfs3nametoolong) {
3263                 resp->status = NFS3ERR_NAMETOOLONG;
3264                 goto out1;
3265         }
3266 
3267         if (args->link.name == NULL || *(args->link.name) == '\0') {
3268                 resp->status = NFS3ERR_ACCES;
3269                 goto out1;
3270         }
3271 
3272         if (rdonly(exi, req)) {
3273                 resp->status = NFS3ERR_ROFS;
3274                 goto out1;
3275         }
3276 
3277         if (is_system_labeled()) {
3278                 DTRACE_PROBE2(tx__rfs3__log__info__oplinkdir__clabel, char *,
3279                     "got client label from request(1)", struct svc_req *, req);
3280 
3281                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
3282                         if (!do_rfs_label_check(clabel, dvp, EQUALITY_CHECK,
3283                             exi)) {
3284                                 resp->status = NFS3ERR_ACCES;
3285                                 goto out1;
3286                         }
3287                 }
3288         }
3289 
3290         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
3291         name = nfscmd_convname(ca, exi, args->link.name,
3292             NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
3293 
3294         if (name == NULL) {
3295                 resp->status = NFS3ERR_SERVERFAULT;
3296                 goto out1;
3297         }
3298 
3299         error = VOP_LINK(dvp, vp, name, cr, NULL, 0);
3300 
3301 #ifdef DEBUG
3302         if (rfs3_do_post_op_attr) {
3303                 va.va_mask = AT_ALL;
3304                 vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3305                 ava.va_mask = AT_ALL;
3306                 avap = VOP_GETATTR(dvp, &ava, 0, cr, NULL) ? NULL : &ava;
3307         } else {
3308                 vap = NULL;
3309                 avap = NULL;
3310         }
3311 #else
3312         va.va_mask = AT_ALL;
3313         vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3314         ava.va_mask = AT_ALL;
3315         avap = VOP_GETATTR(dvp, &ava, 0, cr, NULL) ? NULL : &ava;
3316 #endif
3317 
3318         /*
3319          * Force modified data and metadata out to stable storage.
3320          */
3321         (void) VOP_FSYNC(vp, FNODSYNC, cr, NULL);
3322         (void) VOP_FSYNC(dvp, 0, cr, NULL);
3323 
3324         if (error)
3325                 goto out;
3326 
3327         VN_RELE(dvp);
3328 
3329         resp->status = NFS3_OK;
3330         vattr_to_post_op_attr(vap, &resp->resok.file_attributes);
3331         vattr_to_wcc_data(bvap, avap, &resp->resok.linkdir_wcc);
3332 
3333         DTRACE_NFSV3_4(op__link__done, struct svc_req *, req,
3334             cred_t *, cr, vnode_t *, vp, LINK3res *, resp);
3335 
3336         VN_RELE(vp);
3337 
3338         return;
3339 
3340 out:
3341         if (curthread->t_flag & T_WOULDBLOCK) {
3342                 curthread->t_flag &= ~T_WOULDBLOCK;
3343                 resp->status = NFS3ERR_JUKEBOX;
3344         } else
3345                 resp->status = puterrno3(error);
3346 out1:
3347         if (name != NULL && name != args->link.name)
3348                 kmem_free(name, MAXPATHLEN + 1);
3349 
3350         DTRACE_NFSV3_4(op__link__done, struct svc_req *, req,
3351             cred_t *, cr, vnode_t *, vp, LINK3res *, resp);
3352 
3353         if (vp != NULL)
3354                 VN_RELE(vp);
3355         if (dvp != NULL)
3356                 VN_RELE(dvp);
3357         vattr_to_post_op_attr(vap, &resp->resfail.file_attributes);
3358         vattr_to_wcc_data(bvap, avap, &resp->resfail.linkdir_wcc);
3359 }
3360 
3361 void *
3362 rfs3_link_getfh(LINK3args *args)
3363 {
3364 
3365         return (&args->file);
3366 }
3367 
3368 /*
3369  * This macro defines the size of a response which contains attribute
3370  * information and one directory entry (whose length is specified by
3371  * the macro parameter).  If the incoming request is larger than this,
3372  * then we are guaranteed to be able to return at one directory entry
3373  * if one exists.  Therefore, we do not need to check for
3374  * NFS3ERR_TOOSMALL if the requested size is larger then this.  If it
3375  * is not, then we need to check to make sure that this error does not
3376  * need to be returned.
3377  *
3378  * NFS3_READDIR_MIN_COUNT is comprised of following :
3379  *
3380  * status - 1 * BYTES_PER_XDR_UNIT
3381  * attr. flag - 1 * BYTES_PER_XDR_UNIT
3382  * cookie verifier - 2 * BYTES_PER_XDR_UNIT
3383  * attributes  - NFS3_SIZEOF_FATTR3 * BYTES_PER_XDR_UNIT
3384  * boolean - 1 * BYTES_PER_XDR_UNIT
3385  * file id - 2 * BYTES_PER_XDR_UNIT
3386  * directory name length - 1 * BYTES_PER_XDR_UNIT
3387  * cookie - 2 * BYTES_PER_XDR_UNIT
3388  * end of list - 1 * BYTES_PER_XDR_UNIT
3389  * end of file - 1 * BYTES_PER_XDR_UNIT
3390  * Name length of directory to the nearest byte
3391  */
3392 
3393 #define NFS3_READDIR_MIN_COUNT(length)  \
3394         ((1 + 1 + 2 + NFS3_SIZEOF_FATTR3 + 1 + 2 + 1 + 2 + 1 + 1) * \
3395                 BYTES_PER_XDR_UNIT + roundup((length), BYTES_PER_XDR_UNIT))
3396 
3397 /* ARGSUSED */
3398 void
3399 rfs3_readdir(READDIR3args *args, READDIR3res *resp, struct exportinfo *exi,
3400         struct svc_req *req, cred_t *cr)
3401 {
3402         int error;
3403         vnode_t *vp;
3404         struct vattr *vap;
3405         struct vattr va;
3406         struct iovec iov;
3407         struct uio uio;
3408         char *data;
3409         int iseof;
3410         int bufsize;
3411         int namlen;
3412         uint_t count;
3413         struct sockaddr *ca;
3414 
3415         vap = NULL;
3416 
3417         vp = nfs3_fhtovp(&args->dir, exi);
3418 
3419         DTRACE_NFSV3_4(op__readdir__start, struct svc_req *, req,
3420             cred_t *, cr, vnode_t *, vp, READDIR3args *, args);
3421 
3422         if (vp == NULL) {
3423                 error = ESTALE;
3424                 goto out;
3425         }
3426 
3427         if (is_system_labeled()) {
3428                 bslabel_t *clabel = req->rq_label;
3429 
3430                 ASSERT(clabel != NULL);
3431                 DTRACE_PROBE2(tx__rfs3__log__info__opreaddir__clabel, char *,
3432                     "got client label from request(1)", struct svc_req *, req);
3433 
3434                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
3435                         if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
3436                             exi)) {
3437                                 resp->status = NFS3ERR_ACCES;
3438                                 goto out1;
3439                         }
3440                 }
3441         }
3442 
3443         (void) VOP_RWLOCK(vp, V_WRITELOCK_FALSE, NULL);
3444 
3445 #ifdef DEBUG
3446         if (rfs3_do_pre_op_attr) {
3447                 va.va_mask = AT_ALL;
3448                 vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3449         } else
3450                 vap = NULL;
3451 #else
3452         va.va_mask = AT_ALL;
3453         vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3454 #endif
3455 
3456         if (vp->v_type != VDIR) {
3457                 resp->status = NFS3ERR_NOTDIR;
3458                 goto out1;
3459         }
3460 
3461         error = VOP_ACCESS(vp, VREAD, 0, cr, NULL);
3462         if (error)
3463                 goto out;
3464 
3465         /*
3466          * Now don't allow arbitrary count to alloc;
3467          * allow the maximum not to exceed rfs3_tsize()
3468          */
3469         if (args->count > rfs3_tsize(req))
3470                 args->count = rfs3_tsize(req);
3471 
3472         /*
3473          * Make sure that there is room to read at least one entry
3474          * if any are available.
3475          */
3476         if (args->count < DIRENT64_RECLEN(MAXNAMELEN))
3477                 count = DIRENT64_RECLEN(MAXNAMELEN);
3478         else
3479                 count = args->count;
3480 
3481         data = kmem_alloc(count, KM_SLEEP);
3482 
3483         iov.iov_base = data;
3484         iov.iov_len = count;
3485         uio.uio_iov = &iov;
3486         uio.uio_iovcnt = 1;
3487         uio.uio_segflg = UIO_SYSSPACE;
3488         uio.uio_extflg = UIO_COPY_CACHED;
3489         uio.uio_loffset = (offset_t)args->cookie;
3490         uio.uio_resid = count;
3491 
3492         error = VOP_READDIR(vp, &uio, cr, &iseof, NULL, 0);
3493 
3494 #ifdef DEBUG
3495         if (rfs3_do_post_op_attr) {
3496                 va.va_mask = AT_ALL;
3497                 vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3498         } else
3499                 vap = NULL;
3500 #else
3501         va.va_mask = AT_ALL;
3502         vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3503 #endif
3504 
3505         if (error) {
3506                 kmem_free(data, count);
3507                 goto out;
3508         }
3509 
3510         /*
3511          * If the count was not large enough to be able to guarantee
3512          * to be able to return at least one entry, then need to
3513          * check to see if NFS3ERR_TOOSMALL should be returned.
3514          */
3515         if (args->count < NFS3_READDIR_MIN_COUNT(MAXNAMELEN)) {
3516                 /*
3517                  * bufsize is used to keep track of the size of the response.
3518                  * It is primed with:
3519                  *      1 for the status +
3520                  *      1 for the dir_attributes.attributes boolean +
3521                  *      2 for the cookie verifier
3522                  * all times BYTES_PER_XDR_UNIT to convert from XDR units
3523                  * to bytes.  If there are directory attributes to be
3524                  * returned, then:
3525                  *      NFS3_SIZEOF_FATTR3 for the dir_attributes.attr fattr3
3526                  * time BYTES_PER_XDR_UNIT is added to account for them.
3527                  */
3528                 bufsize = (1 + 1 + 2) * BYTES_PER_XDR_UNIT;
3529                 if (vap != NULL)
3530                         bufsize += NFS3_SIZEOF_FATTR3 * BYTES_PER_XDR_UNIT;
3531                 /*
3532                  * An entry is composed of:
3533                  *      1 for the true/false list indicator +
3534                  *      2 for the fileid +
3535                  *      1 for the length of the name +
3536                  *      2 for the cookie +
3537                  * all times BYTES_PER_XDR_UNIT to convert from
3538                  * XDR units to bytes, plus the length of the name
3539                  * rounded up to the nearest BYTES_PER_XDR_UNIT.
3540                  */
3541                 if (count != uio.uio_resid) {
3542                         namlen = strlen(((struct dirent64 *)data)->d_name);
3543                         bufsize += (1 + 2 + 1 + 2) * BYTES_PER_XDR_UNIT +
3544                             roundup(namlen, BYTES_PER_XDR_UNIT);
3545                 }
3546                 /*
3547                  * We need to check to see if the number of bytes left
3548                  * to go into the buffer will actually fit into the
3549                  * buffer.  This is calculated as the size of this
3550                  * entry plus:
3551                  *      1 for the true/false list indicator +
3552                  *      1 for the eof indicator
3553                  * times BYTES_PER_XDR_UNIT to convert from from
3554                  * XDR units to bytes.
3555                  */
3556                 bufsize += (1 + 1) * BYTES_PER_XDR_UNIT;
3557                 if (bufsize > args->count) {
3558                         kmem_free(data, count);
3559                         resp->status = NFS3ERR_TOOSMALL;
3560                         goto out1;
3561                 }
3562         }
3563 
3564         /*
3565          * Have a valid readir buffer for the native character
3566          * set. Need to check if a conversion is necessary and
3567          * potentially rewrite the whole buffer. Note that if the
3568          * conversion expands names enough, the structure may not
3569          * fit. In this case, we need to drop entries until if fits
3570          * and patch the counts in order that the next readdir will
3571          * get the correct entries.
3572          */
3573         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
3574         data = nfscmd_convdirent(ca, exi, data, count, &resp->status);
3575 
3576 
3577         VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
3578 
3579 #if 0 /* notyet */
3580         /*
3581          * Don't do this.  It causes local disk writes when just
3582          * reading the file and the overhead is deemed larger
3583          * than the benefit.
3584          */
3585         /*
3586          * Force modified metadata out to stable storage.
3587          */
3588         (void) VOP_FSYNC(vp, FNODSYNC, cr, NULL);
3589 #endif
3590 
3591         resp->status = NFS3_OK;
3592         vattr_to_post_op_attr(vap, &resp->resok.dir_attributes);
3593         resp->resok.cookieverf = 0;
3594         resp->resok.reply.entries = (entry3 *)data;
3595         resp->resok.reply.eof = iseof;
3596         resp->resok.size = count - uio.uio_resid;
3597         resp->resok.count = args->count;
3598         resp->resok.freecount = count;
3599 
3600         DTRACE_NFSV3_4(op__readdir__done, struct svc_req *, req,
3601             cred_t *, cr, vnode_t *, vp, READDIR3res *, resp);
3602 
3603         VN_RELE(vp);
3604 
3605         return;
3606 
3607 out:
3608         if (curthread->t_flag & T_WOULDBLOCK) {
3609                 curthread->t_flag &= ~T_WOULDBLOCK;
3610                 resp->status = NFS3ERR_JUKEBOX;
3611         } else
3612                 resp->status = puterrno3(error);
3613 out1:
3614         DTRACE_NFSV3_4(op__readdir__done, struct svc_req *, req,
3615             cred_t *, cr, vnode_t *, vp, READDIR3res *, resp);
3616 
3617         if (vp != NULL) {
3618                 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
3619                 VN_RELE(vp);
3620         }
3621         vattr_to_post_op_attr(vap, &resp->resfail.dir_attributes);
3622 }
3623 
3624 void *
3625 rfs3_readdir_getfh(READDIR3args *args)
3626 {
3627 
3628         return (&args->dir);
3629 }
3630 
3631 void
3632 rfs3_readdir_free(READDIR3res *resp)
3633 {
3634 
3635         if (resp->status == NFS3_OK)
3636                 kmem_free(resp->resok.reply.entries, resp->resok.freecount);
3637 }
3638 
3639 #ifdef nextdp
3640 #undef nextdp
3641 #endif
3642 #define nextdp(dp)      ((struct dirent64 *)((char *)(dp) + (dp)->d_reclen))
3643 
3644 /*
3645  * This macro computes the size of a response which contains
3646  * one directory entry including the attributes as well as file handle.
3647  * If the incoming request is larger than this, then we are guaranteed to be
3648  * able to return at least one more directory entry if one exists.
3649  *
3650  * NFS3_READDIRPLUS_ENTRY is made up of the following:
3651  *
3652  * boolean - 1 * BYTES_PER_XDR_UNIT
3653  * file id - 2 * BYTES_PER_XDR_UNIT
3654  * directory name length - 1 * BYTES_PER_XDR_UNIT
3655  * cookie - 2 * BYTES_PER_XDR_UNIT
3656  * attribute flag - 1 * BYTES_PER_XDR_UNIT
3657  * attributes - NFS3_SIZEOF_FATTR3 * BYTES_PER_XDR_UNIT
3658  * status byte for file handle - 1 *  BYTES_PER_XDR_UNIT
3659  * length of a file handle - 1 * BYTES_PER_XDR_UNIT
3660  * Maximum length of a file handle (NFS3_MAXFHSIZE)
3661  * name length of the entry to the nearest bytes
3662  */
3663 #define NFS3_READDIRPLUS_ENTRY(namelen) \
3664         ((1 + 2 + 1 + 2 + 1 + NFS3_SIZEOF_FATTR3 + 1 + 1) * \
3665                 BYTES_PER_XDR_UNIT + \
3666         NFS3_MAXFHSIZE + roundup(namelen, BYTES_PER_XDR_UNIT))
3667 
3668 static int rfs3_readdir_unit = MAXBSIZE;
3669 
3670 /* ARGSUSED */
3671 void
3672 rfs3_readdirplus(READDIRPLUS3args *args, READDIRPLUS3res *resp,
3673         struct exportinfo *exi, struct svc_req *req, cred_t *cr)
3674 {
3675         int error;
3676         vnode_t *vp;
3677         struct vattr *vap;
3678         struct vattr va;
3679         struct iovec iov;
3680         struct uio uio;
3681         char *data;
3682         int iseof;
3683         struct dirent64 *dp;
3684         vnode_t *nvp;
3685         struct vattr *nvap;
3686         struct vattr nva;
3687         entryplus3_info *infop = NULL;
3688         int size = 0;
3689         int nents = 0;
3690         int bufsize = 0;
3691         int entrysize = 0;
3692         int tofit = 0;
3693         int rd_unit = rfs3_readdir_unit;
3694         int prev_len;
3695         int space_left;
3696         int i;
3697         uint_t *namlen = NULL;
3698         char *ndata = NULL;
3699         struct sockaddr *ca;
3700         size_t ret;
3701 
3702         vap = NULL;
3703 
3704         vp = nfs3_fhtovp(&args->dir, exi);
3705 
3706         DTRACE_NFSV3_4(op__readdirplus__start, struct svc_req *, req,
3707             cred_t *, cr, vnode_t *, vp, READDIRPLUS3args *, args);
3708 
3709         if (vp == NULL) {
3710                 error = ESTALE;
3711                 goto out;
3712         }
3713 
3714         if (is_system_labeled()) {
3715                 bslabel_t *clabel = req->rq_label;
3716 
3717                 ASSERT(clabel != NULL);
3718                 DTRACE_PROBE2(tx__rfs3__log__info__opreaddirplus__clabel,
3719                     char *, "got client label from request(1)",
3720                     struct svc_req *, req);
3721 
3722                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
3723                         if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
3724                             exi)) {
3725                                 resp->status = NFS3ERR_ACCES;
3726                                 goto out1;
3727                         }
3728                 }
3729         }
3730 
3731         (void) VOP_RWLOCK(vp, V_WRITELOCK_FALSE, NULL);
3732 
3733 #ifdef DEBUG
3734         if (rfs3_do_pre_op_attr) {
3735                 va.va_mask = AT_ALL;
3736                 vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3737         } else
3738                 vap = NULL;
3739 #else
3740         va.va_mask = AT_ALL;
3741         vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3742 #endif
3743 
3744         if (vp->v_type != VDIR) {
3745                 error = ENOTDIR;
3746                 goto out;
3747         }
3748 
3749         error = VOP_ACCESS(vp, VREAD, 0, cr, NULL);
3750         if (error)
3751                 goto out;
3752 
3753         /*
3754          * Don't allow arbitrary counts for allocation
3755          */
3756         if (args->maxcount > rfs3_tsize(req))
3757                 args->maxcount = rfs3_tsize(req);
3758 
3759         /*
3760          * Make sure that there is room to read at least one entry
3761          * if any are available
3762          */
3763         args->dircount = MIN(args->dircount, args->maxcount);
3764 
3765         if (args->dircount < DIRENT64_RECLEN(MAXNAMELEN))
3766                 args->dircount = DIRENT64_RECLEN(MAXNAMELEN);
3767 
3768         /*
3769          * This allocation relies on a minimum directory entry
3770          * being roughly 24 bytes.  Therefore, the namlen array
3771          * will have enough space based on the maximum number of
3772          * entries to read.
3773          */
3774         namlen = kmem_alloc(args->dircount, KM_SLEEP);
3775 
3776         space_left = args->dircount;
3777         data = kmem_alloc(args->dircount, KM_SLEEP);
3778         dp = (struct dirent64 *)data;
3779         uio.uio_iov = &iov;
3780         uio.uio_iovcnt = 1;
3781         uio.uio_segflg = UIO_SYSSPACE;
3782         uio.uio_extflg = UIO_COPY_CACHED;
3783         uio.uio_loffset = (offset_t)args->cookie;
3784 
3785         /*
3786          * bufsize is used to keep track of the size of the response as we
3787          * get post op attributes and filehandles for each entry.  This is
3788          * an optimization as the server may have read more entries than will
3789          * fit in the buffer specified by maxcount.  We stop calculating
3790          * post op attributes and filehandles once we have exceeded maxcount.
3791          * This will minimize the effect of truncation.
3792          *
3793          * It is primed with:
3794          *      1 for the status +
3795          *      1 for the dir_attributes.attributes boolean +
3796          *      2 for the cookie verifier
3797          * all times BYTES_PER_XDR_UNIT to convert from XDR units
3798          * to bytes.  If there are directory attributes to be
3799          * returned, then:
3800          *      NFS3_SIZEOF_FATTR3 for the dir_attributes.attr fattr3
3801          * time BYTES_PER_XDR_UNIT is added to account for them.
3802          */
3803         bufsize = (1 + 1 + 2) * BYTES_PER_XDR_UNIT;
3804         if (vap != NULL)
3805                 bufsize += NFS3_SIZEOF_FATTR3 * BYTES_PER_XDR_UNIT;
3806 
3807 getmoredents:
3808         /*
3809          * Here we make a check so that our read unit is not larger than
3810          * the space left in the buffer.
3811          */
3812         rd_unit = MIN(rd_unit, space_left);
3813         iov.iov_base = (char *)dp;
3814         iov.iov_len = rd_unit;
3815         uio.uio_resid = rd_unit;
3816         prev_len = rd_unit;
3817 
3818         error = VOP_READDIR(vp, &uio, cr, &iseof, NULL, 0);
3819 
3820         if (error) {
3821                 kmem_free(data, args->dircount);
3822                 goto out;
3823         }
3824 
3825         if (uio.uio_resid == prev_len && !iseof) {
3826                 if (nents == 0) {
3827                         kmem_free(data, args->dircount);
3828                         resp->status = NFS3ERR_TOOSMALL;
3829                         goto out1;
3830                 }
3831 
3832                 /*
3833                  * We could not get any more entries, so get the attributes
3834                  * and filehandle for the entries already obtained.
3835                  */
3836                 goto good;
3837         }
3838 
3839         /*
3840          * We estimate the size of the response by assuming the
3841          * entry exists and attributes and filehandle are also valid
3842          */
3843         for (size = prev_len - uio.uio_resid;
3844             size > 0;
3845             size -= dp->d_reclen, dp = nextdp(dp)) {
3846 
3847                 if (dp->d_ino == 0) {
3848                         nents++;
3849                         continue;
3850                 }
3851 
3852                 namlen[nents] = strlen(dp->d_name);
3853                 entrysize = NFS3_READDIRPLUS_ENTRY(namlen[nents]);
3854 
3855                 /*
3856                  * We need to check to see if the number of bytes left
3857                  * to go into the buffer will actually fit into the
3858                  * buffer.  This is calculated as the size of this
3859                  * entry plus:
3860                  *      1 for the true/false list indicator +
3861                  *      1 for the eof indicator
3862                  * times BYTES_PER_XDR_UNIT to convert from XDR units
3863                  * to bytes.
3864                  *
3865                  * Also check the dircount limit against the first entry read
3866                  *
3867                  */
3868                 tofit = entrysize + (1 + 1) * BYTES_PER_XDR_UNIT;
3869                 if (bufsize + tofit > args->maxcount) {
3870                         /*
3871                          * We make a check here to see if this was the
3872                          * first entry being measured.  If so, then maxcount
3873                          * was too small to begin with and so we need to
3874                          * return with NFS3ERR_TOOSMALL.
3875                          */
3876                         if (nents == 0) {
3877                                 kmem_free(data, args->dircount);
3878                                 resp->status = NFS3ERR_TOOSMALL;
3879                                 goto out1;
3880                         }
3881                         iseof = FALSE;
3882                         goto good;
3883                 }
3884                 bufsize += entrysize;
3885                 nents++;
3886         }
3887 
3888         /*
3889          * If there is enough room to fit at least 1 more entry including
3890          * post op attributes and filehandle in the buffer AND that we haven't
3891          * exceeded dircount then go back and get some more.
3892          */
3893         if (!iseof &&
3894             (args->maxcount - bufsize) >= NFS3_READDIRPLUS_ENTRY(MAXNAMELEN)) {
3895                 space_left -= (prev_len - uio.uio_resid);
3896                 if (space_left >= DIRENT64_RECLEN(MAXNAMELEN))
3897                         goto getmoredents;
3898 
3899                 /* else, fall through */
3900         }
3901 
3902 good:
3903 
3904 #ifdef DEBUG
3905         if (rfs3_do_post_op_attr) {
3906                 va.va_mask = AT_ALL;
3907                 vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3908         } else
3909                 vap = NULL;
3910 #else
3911         va.va_mask = AT_ALL;
3912         vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3913 #endif
3914 
3915         VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
3916 
3917         infop = kmem_alloc(nents * sizeof (struct entryplus3_info), KM_SLEEP);
3918         resp->resok.infop = infop;
3919 
3920         dp = (struct dirent64 *)data;
3921         for (i = 0; i < nents; i++) {
3922 
3923                 if (dp->d_ino == 0) {
3924                         infop[i].attr.attributes = FALSE;
3925                         infop[i].fh.handle_follows = FALSE;
3926                         dp = nextdp(dp);
3927                         continue;
3928                 }
3929 
3930                 infop[i].namelen = namlen[i];
3931 
3932                 error = VOP_LOOKUP(vp, dp->d_name, &nvp, NULL, 0, NULL, cr,
3933                     NULL, NULL, NULL);
3934                 if (error) {
3935                         infop[i].attr.attributes = FALSE;
3936                         infop[i].fh.handle_follows = FALSE;
3937                         dp = nextdp(dp);
3938                         continue;
3939                 }
3940 
3941 #ifdef DEBUG
3942                 if (rfs3_do_post_op_attr) {
3943                         nva.va_mask = AT_ALL;
3944                         nvap = rfs4_delegated_getattr(nvp, &nva, 0, cr) ?
3945                             NULL : &nva;
3946                 } else
3947                         nvap = NULL;
3948 #else
3949                 nva.va_mask = AT_ALL;
3950                 nvap = rfs4_delegated_getattr(nvp, &nva, 0, cr) ? NULL : &nva;
3951 #endif
3952                 /* Lie about the object type for a referral */
3953                 if (vn_is_nfs_reparse(nvp, cr))
3954                         nvap->va_type = VLNK;
3955 
3956                 vattr_to_post_op_attr(nvap, &infop[i].attr);
3957 
3958 #ifdef DEBUG
3959                 if (!rfs3_do_post_op_fh3)
3960                         infop[i].fh.handle_follows = FALSE;
3961                 else {
3962 #endif
3963                 error = makefh3(&infop[i].fh.handle, nvp, exi);
3964                 if (!error)
3965                         infop[i].fh.handle_follows = TRUE;
3966                 else
3967                         infop[i].fh.handle_follows = FALSE;
3968 #ifdef DEBUG
3969                 }
3970 #endif
3971 
3972                 VN_RELE(nvp);
3973                 dp = nextdp(dp);
3974         }
3975 
3976         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
3977         ret = nfscmd_convdirplus(ca, exi, data, nents, args->dircount, &ndata);
3978         if (ndata == NULL)
3979                 ndata = data;
3980 
3981         if (ret > 0) {
3982                 /*
3983                  * We had to drop one or more entries in order to fit
3984                  * during the character conversion.  We need to patch
3985                  * up the size and eof info.
3986                  */
3987                 if (iseof)
3988                         iseof = FALSE;
3989 
3990                 ret = nfscmd_dropped_entrysize((struct dirent64 *)data,
3991                     nents, ret);
3992         }
3993 
3994 
3995 #if 0 /* notyet */
3996         /*
3997          * Don't do this.  It causes local disk writes when just
3998          * reading the file and the overhead is deemed larger
3999          * than the benefit.
4000          */
4001         /*
4002          * Force modified metadata out to stable storage.
4003          */
4004         (void) VOP_FSYNC(vp, FNODSYNC, cr, NULL);
4005 #endif
4006 
4007         kmem_free(namlen, args->dircount);
4008 
4009         resp->status = NFS3_OK;
4010         vattr_to_post_op_attr(vap, &resp->resok.dir_attributes);
4011         resp->resok.cookieverf = 0;
4012         resp->resok.reply.entries = (entryplus3 *)ndata;
4013         resp->resok.reply.eof = iseof;
4014         resp->resok.size = nents;
4015         resp->resok.count = args->dircount - ret;
4016         resp->resok.maxcount = args->maxcount;
4017 
4018         DTRACE_NFSV3_4(op__readdirplus__done, struct svc_req *, req,
4019             cred_t *, cr, vnode_t *, vp, READDIRPLUS3res *, resp);
4020         if (ndata != data)
4021                 kmem_free(data, args->dircount);
4022 
4023 
4024         VN_RELE(vp);
4025 
4026         return;
4027 
4028 out:
4029         if (curthread->t_flag & T_WOULDBLOCK) {
4030                 curthread->t_flag &= ~T_WOULDBLOCK;
4031                 resp->status = NFS3ERR_JUKEBOX;
4032         } else {
4033                 resp->status = puterrno3(error);
4034         }
4035 out1:
4036         DTRACE_NFSV3_4(op__readdirplus__done, struct svc_req *, req,
4037             cred_t *, cr, vnode_t *, vp, READDIRPLUS3res *, resp);
4038 
4039         if (vp != NULL) {
4040                 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
4041                 VN_RELE(vp);
4042         }
4043 
4044         if (namlen != NULL)
4045                 kmem_free(namlen, args->dircount);
4046 
4047         vattr_to_post_op_attr(vap, &resp->resfail.dir_attributes);
4048 }
4049 
4050 void *
4051 rfs3_readdirplus_getfh(READDIRPLUS3args *args)
4052 {
4053 
4054         return (&args->dir);
4055 }
4056 
4057 void
4058 rfs3_readdirplus_free(READDIRPLUS3res *resp)
4059 {
4060 
4061         if (resp->status == NFS3_OK) {
4062                 kmem_free(resp->resok.reply.entries, resp->resok.count);
4063                 kmem_free(resp->resok.infop,
4064                     resp->resok.size * sizeof (struct entryplus3_info));
4065         }
4066 }
4067 
4068 /* ARGSUSED */
4069 void
4070 rfs3_fsstat(FSSTAT3args *args, FSSTAT3res *resp, struct exportinfo *exi,
4071         struct svc_req *req, cred_t *cr)
4072 {
4073         int error;
4074         vnode_t *vp;
4075         struct vattr *vap;
4076         struct vattr va;
4077         struct statvfs64 sb;
4078 
4079         vap = NULL;
4080 
4081         vp = nfs3_fhtovp(&args->fsroot, exi);
4082 
4083         DTRACE_NFSV3_4(op__fsstat__start, struct svc_req *, req,
4084             cred_t *, cr, vnode_t *, vp, FSSTAT3args *, args);
4085 
4086         if (vp == NULL) {
4087                 error = ESTALE;
4088                 goto out;
4089         }
4090 
4091         if (is_system_labeled()) {
4092                 bslabel_t *clabel = req->rq_label;
4093 
4094                 ASSERT(clabel != NULL);
4095                 DTRACE_PROBE2(tx__rfs3__log__info__opfsstat__clabel, char *,
4096                     "got client label from request(1)", struct svc_req *, req);
4097 
4098                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
4099                         if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
4100                             exi)) {
4101                                 resp->status = NFS3ERR_ACCES;
4102                                 goto out1;
4103                         }
4104                 }
4105         }
4106 
4107         error = VFS_STATVFS(vp->v_vfsp, &sb);
4108 
4109 #ifdef DEBUG
4110         if (rfs3_do_post_op_attr) {
4111                 va.va_mask = AT_ALL;
4112                 vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
4113         } else
4114                 vap = NULL;
4115 #else
4116         va.va_mask = AT_ALL;
4117         vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
4118 #endif
4119 
4120         if (error)
4121                 goto out;
4122 
4123         resp->status = NFS3_OK;
4124         vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
4125         if (sb.f_blocks != (fsblkcnt64_t)-1)
4126                 resp->resok.tbytes = (size3)sb.f_frsize * (size3)sb.f_blocks;
4127         else
4128                 resp->resok.tbytes = (size3)sb.f_blocks;
4129         if (sb.f_bfree != (fsblkcnt64_t)-1)
4130                 resp->resok.fbytes = (size3)sb.f_frsize * (size3)sb.f_bfree;
4131         else
4132                 resp->resok.fbytes = (size3)sb.f_bfree;
4133         if (sb.f_bavail != (fsblkcnt64_t)-1)
4134                 resp->resok.abytes = (size3)sb.f_frsize * (size3)sb.f_bavail;
4135         else
4136                 resp->resok.abytes = (size3)sb.f_bavail;
4137         resp->resok.tfiles = (size3)sb.f_files;
4138         resp->resok.ffiles = (size3)sb.f_ffree;
4139         resp->resok.afiles = (size3)sb.f_favail;
4140         resp->resok.invarsec = 0;
4141 
4142         DTRACE_NFSV3_4(op__fsstat__done, struct svc_req *, req,
4143             cred_t *, cr, vnode_t *, vp, FSSTAT3res *, resp);
4144         VN_RELE(vp);
4145 
4146         return;
4147 
4148 out:
4149         if (curthread->t_flag & T_WOULDBLOCK) {
4150                 curthread->t_flag &= ~T_WOULDBLOCK;
4151                 resp->status = NFS3ERR_JUKEBOX;
4152         } else
4153                 resp->status = puterrno3(error);
4154 out1:
4155         DTRACE_NFSV3_4(op__fsstat__done, struct svc_req *, req,
4156             cred_t *, cr, vnode_t *, vp, FSSTAT3res *, resp);
4157 
4158         if (vp != NULL)
4159                 VN_RELE(vp);
4160         vattr_to_post_op_attr(vap, &resp->resfail.obj_attributes);
4161 }
4162 
4163 void *
4164 rfs3_fsstat_getfh(FSSTAT3args *args)
4165 {
4166 
4167         return (&args->fsroot);
4168 }
4169 
4170 void
4171 rfs3_fsinfo(FSINFO3args *args, FSINFO3res *resp, struct exportinfo *exi,
4172         struct svc_req *req, cred_t *cr)
4173 {
4174         vnode_t *vp;
4175         struct vattr *vap;
4176         struct vattr va;
4177         uint32_t xfer_size;
4178         ulong_t l = 0;
4179         int error;
4180 
4181         vp = nfs3_fhtovp(&args->fsroot, exi);
4182 
4183         DTRACE_NFSV3_4(op__fsinfo__start, struct svc_req *, req,
4184             cred_t *, cr, vnode_t *, vp, FSINFO3args *, args);
4185 
4186         if (vp == NULL) {
4187                 if (curthread->t_flag & T_WOULDBLOCK) {
4188                         curthread->t_flag &= ~T_WOULDBLOCK;
4189                         resp->status = NFS3ERR_JUKEBOX;
4190                 } else
4191                         resp->status = NFS3ERR_STALE;
4192                 vattr_to_post_op_attr(NULL, &resp->resfail.obj_attributes);
4193                 goto out;
4194         }
4195 
4196         if (is_system_labeled()) {
4197                 bslabel_t *clabel = req->rq_label;
4198 
4199                 ASSERT(clabel != NULL);
4200                 DTRACE_PROBE2(tx__rfs3__log__info__opfsinfo__clabel, char *,
4201                     "got client label from request(1)", struct svc_req *, req);
4202 
4203                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
4204                         if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
4205                             exi)) {
4206                                 resp->status = NFS3ERR_STALE;
4207                                 vattr_to_post_op_attr(NULL,
4208                                     &resp->resfail.obj_attributes);
4209                                 goto out;
4210                         }
4211                 }
4212         }
4213 
4214 #ifdef DEBUG
4215         if (rfs3_do_post_op_attr) {
4216                 va.va_mask = AT_ALL;
4217                 vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
4218         } else
4219                 vap = NULL;
4220 #else
4221         va.va_mask = AT_ALL;
4222         vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
4223 #endif
4224 
4225         resp->status = NFS3_OK;
4226         vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
4227         xfer_size = rfs3_tsize(req);
4228         resp->resok.rtmax = xfer_size;
4229         resp->resok.rtpref = xfer_size;
4230         resp->resok.rtmult = DEV_BSIZE;
4231         resp->resok.wtmax = xfer_size;
4232         resp->resok.wtpref = xfer_size;
4233         resp->resok.wtmult = DEV_BSIZE;
4234         resp->resok.dtpref = MAXBSIZE;
4235 
4236         /*
4237          * Large file spec: want maxfilesize based on limit of
4238          * underlying filesystem.  We can guess 2^31-1 if need be.
4239          */
4240         error = VOP_PATHCONF(vp, _PC_FILESIZEBITS, &l, cr, NULL);
4241         if (error) {
4242                 resp->status = puterrno3(error);
4243                 goto out;
4244         }
4245 
4246         /*
4247          * If the underlying file system does not support _PC_FILESIZEBITS,
4248          * return a reasonable default. Note that error code on VOP_PATHCONF
4249          * will be 0, even if the underlying file system does not support
4250          * _PC_FILESIZEBITS.
4251          */
4252         if (l == (ulong_t)-1) {
4253                 resp->resok.maxfilesize = MAXOFF32_T;
4254         } else {
4255                 if (l >= (sizeof (uint64_t) * 8))
4256                         resp->resok.maxfilesize = INT64_MAX;
4257                 else
4258                         resp->resok.maxfilesize = (1LL << (l-1)) - 1;
4259         }
4260 
4261         resp->resok.time_delta.seconds = 0;
4262         resp->resok.time_delta.nseconds = 1000;
4263         resp->resok.properties = FSF3_LINK | FSF3_SYMLINK |
4264             FSF3_HOMOGENEOUS | FSF3_CANSETTIME;
4265 
4266         DTRACE_NFSV3_4(op__fsinfo__done, struct svc_req *, req,
4267             cred_t *, cr, vnode_t *, vp, FSINFO3res *, resp);
4268 
4269         VN_RELE(vp);
4270 
4271         return;
4272 
4273 out:
4274         DTRACE_NFSV3_4(op__fsinfo__done, struct svc_req *, req,
4275             cred_t *, cr, vnode_t *, NULL, FSINFO3res *, resp);
4276         if (vp != NULL)
4277                 VN_RELE(vp);
4278 }
4279 
4280 void *
4281 rfs3_fsinfo_getfh(FSINFO3args *args)
4282 {
4283 
4284         return (&args->fsroot);
4285 }
4286 
4287 /* ARGSUSED */
4288 void
4289 rfs3_pathconf(PATHCONF3args *args, PATHCONF3res *resp, struct exportinfo *exi,
4290         struct svc_req *req, cred_t *cr)
4291 {
4292         int error;
4293         vnode_t *vp;
4294         struct vattr *vap;
4295         struct vattr va;
4296         ulong_t val;
4297 
4298         vap = NULL;
4299 
4300         vp = nfs3_fhtovp(&args->object, exi);
4301 
4302         DTRACE_NFSV3_4(op__pathconf__start, struct svc_req *, req,
4303             cred_t *, cr, vnode_t *, vp, PATHCONF3args *, args);
4304 
4305         if (vp == NULL) {
4306                 error = ESTALE;
4307                 goto out;
4308         }
4309 
4310         if (is_system_labeled()) {
4311                 bslabel_t *clabel = req->rq_label;
4312 
4313                 ASSERT(clabel != NULL);
4314                 DTRACE_PROBE2(tx__rfs3__log__info__oppathconf__clabel, char *,
4315                     "got client label from request(1)", struct svc_req *, req);
4316 
4317                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
4318                         if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
4319                             exi)) {
4320                                 resp->status = NFS3ERR_ACCES;
4321                                 goto out1;
4322                         }
4323                 }
4324         }
4325 
4326 #ifdef DEBUG
4327         if (rfs3_do_post_op_attr) {
4328                 va.va_mask = AT_ALL;
4329                 vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
4330         } else
4331                 vap = NULL;
4332 #else
4333         va.va_mask = AT_ALL;
4334         vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
4335 #endif
4336 
4337         error = VOP_PATHCONF(vp, _PC_LINK_MAX, &val, cr, NULL);
4338         if (error)
4339                 goto out;
4340         resp->resok.info.link_max = (uint32)val;
4341 
4342         error = VOP_PATHCONF(vp, _PC_NAME_MAX, &val, cr, NULL);
4343         if (error)
4344                 goto out;
4345         resp->resok.info.name_max = (uint32)val;
4346 
4347         error = VOP_PATHCONF(vp, _PC_NO_TRUNC, &val, cr, NULL);
4348         if (error)
4349                 goto out;
4350         if (val == 1)
4351                 resp->resok.info.no_trunc = TRUE;
4352         else
4353                 resp->resok.info.no_trunc = FALSE;
4354 
4355         error = VOP_PATHCONF(vp, _PC_CHOWN_RESTRICTED, &val, cr, NULL);
4356         if (error)
4357                 goto out;
4358         if (val == 1)
4359                 resp->resok.info.chown_restricted = TRUE;
4360         else
4361                 resp->resok.info.chown_restricted = FALSE;
4362 
4363         resp->status = NFS3_OK;
4364         vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
4365         resp->resok.info.case_insensitive = FALSE;
4366         resp->resok.info.case_preserving = TRUE;
4367         DTRACE_NFSV3_4(op__pathconf__done, struct svc_req *, req,
4368             cred_t *, cr, vnode_t *, vp, PATHCONF3res *, resp);
4369         VN_RELE(vp);
4370         return;
4371 
4372 out:
4373         if (curthread->t_flag & T_WOULDBLOCK) {
4374                 curthread->t_flag &= ~T_WOULDBLOCK;
4375                 resp->status = NFS3ERR_JUKEBOX;
4376         } else
4377                 resp->status = puterrno3(error);
4378 out1:
4379         DTRACE_NFSV3_4(op__pathconf__done, struct svc_req *, req,
4380             cred_t *, cr, vnode_t *, vp, PATHCONF3res *, resp);
4381         if (vp != NULL)
4382                 VN_RELE(vp);
4383         vattr_to_post_op_attr(vap, &resp->resfail.obj_attributes);
4384 }
4385 
4386 void *
4387 rfs3_pathconf_getfh(PATHCONF3args *args)
4388 {
4389 
4390         return (&args->object);
4391 }
4392 
4393 void
4394 rfs3_commit(COMMIT3args *args, COMMIT3res *resp, struct exportinfo *exi,
4395         struct svc_req *req, cred_t *cr)
4396 {
4397         int error;
4398         vnode_t *vp;
4399         struct vattr *bvap;
4400         struct vattr bva;
4401         struct vattr *avap;
4402         struct vattr ava;
4403 
4404         bvap = NULL;
4405         avap = NULL;
4406 
4407         vp = nfs3_fhtovp(&args->file, exi);
4408 
4409         DTRACE_NFSV3_4(op__commit__start, struct svc_req *, req,
4410             cred_t *, cr, vnode_t *, vp, COMMIT3args *, args);
4411 
4412         if (vp == NULL) {
4413                 error = ESTALE;
4414                 goto out;
4415         }
4416 
4417         bva.va_mask = AT_ALL;
4418         error = VOP_GETATTR(vp, &bva, 0, cr, NULL);
4419 
4420         /*
4421          * If we can't get the attributes, then we can't do the
4422          * right access checking.  So, we'll fail the request.
4423          */
4424         if (error)
4425                 goto out;
4426 
4427 #ifdef DEBUG
4428         if (rfs3_do_pre_op_attr)
4429                 bvap = &bva;
4430         else
4431                 bvap = NULL;
4432 #else
4433         bvap = &bva;
4434 #endif
4435 
4436         if (rdonly(exi, req)) {
4437                 resp->status = NFS3ERR_ROFS;
4438                 goto out1;
4439         }
4440 
4441         if (vp->v_type != VREG) {
4442                 resp->status = NFS3ERR_INVAL;
4443                 goto out1;
4444         }
4445 
4446         if (is_system_labeled()) {
4447                 bslabel_t *clabel = req->rq_label;
4448 
4449                 ASSERT(clabel != NULL);
4450                 DTRACE_PROBE2(tx__rfs3__log__info__opcommit__clabel, char *,
4451                     "got client label from request(1)", struct svc_req *, req);
4452 
4453                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
4454                         if (!do_rfs_label_check(clabel, vp, EQUALITY_CHECK,
4455                             exi)) {
4456                                 resp->status = NFS3ERR_ACCES;
4457                                 goto out1;
4458                         }
4459                 }
4460         }
4461 
4462         if (crgetuid(cr) != bva.va_uid &&
4463             (error = VOP_ACCESS(vp, VWRITE, 0, cr, NULL)))
4464                 goto out;
4465 
4466         error = VOP_FSYNC(vp, FSYNC, cr, NULL);
4467 
4468 #ifdef DEBUG
4469         if (rfs3_do_post_op_attr) {
4470                 ava.va_mask = AT_ALL;
4471                 avap = VOP_GETATTR(vp, &ava, 0, cr, NULL) ? NULL : &ava;
4472         } else
4473                 avap = NULL;
4474 #else
4475         ava.va_mask = AT_ALL;
4476         avap = VOP_GETATTR(vp, &ava, 0, cr, NULL) ? NULL : &ava;
4477 #endif
4478 
4479         if (error)
4480                 goto out;
4481 
4482         resp->status = NFS3_OK;
4483         vattr_to_wcc_data(bvap, avap, &resp->resok.file_wcc);
4484         resp->resok.verf = write3verf;
4485 
4486         DTRACE_NFSV3_4(op__commit__done, struct svc_req *, req,
4487             cred_t *, cr, vnode_t *, vp, COMMIT3res *, resp);
4488 
4489         VN_RELE(vp);
4490 
4491         return;
4492 
4493 out:
4494         if (curthread->t_flag & T_WOULDBLOCK) {
4495                 curthread->t_flag &= ~T_WOULDBLOCK;
4496                 resp->status = NFS3ERR_JUKEBOX;
4497         } else
4498                 resp->status = puterrno3(error);
4499 out1:
4500         DTRACE_NFSV3_4(op__commit__done, struct svc_req *, req,
4501             cred_t *, cr, vnode_t *, vp, COMMIT3res *, resp);
4502 
4503         if (vp != NULL)
4504                 VN_RELE(vp);
4505         vattr_to_wcc_data(bvap, avap, &resp->resfail.file_wcc);
4506 }
4507 
4508 void *
4509 rfs3_commit_getfh(COMMIT3args *args)
4510 {
4511 
4512         return (&args->file);
4513 }
4514 
4515 static int
4516 sattr3_to_vattr(sattr3 *sap, struct vattr *vap)
4517 {
4518 
4519         vap->va_mask = 0;
4520 
4521         if (sap->mode.set_it) {
4522                 vap->va_mode = (mode_t)sap->mode.mode;
4523                 vap->va_mask |= AT_MODE;
4524         }
4525         if (sap->uid.set_it) {
4526                 vap->va_uid = (uid_t)sap->uid.uid;
4527                 vap->va_mask |= AT_UID;
4528         }
4529         if (sap->gid.set_it) {
4530                 vap->va_gid = (gid_t)sap->gid.gid;
4531                 vap->va_mask |= AT_GID;
4532         }
4533         if (sap->size.set_it) {
4534                 if (sap->size.size > (size3)((u_longlong_t)-1))
4535                         return (EINVAL);
4536                 vap->va_size = sap->size.size;
4537                 vap->va_mask |= AT_SIZE;
4538         }
4539         if (sap->atime.set_it == SET_TO_CLIENT_TIME) {
4540 #ifndef _LP64
4541                 /* check time validity */
4542                 if (!NFS3_TIME_OK(sap->atime.atime.seconds))
4543                         return (EOVERFLOW);
4544 #endif
4545                 /*
4546                  * nfs protocol defines times as unsigned so don't extend sign,
4547                  * unless sysadmin set nfs_allow_preepoch_time.
4548                  */
4549                 NFS_TIME_T_CONVERT(vap->va_atime.tv_sec,
4550                     sap->atime.atime.seconds);
4551                 vap->va_atime.tv_nsec = (uint32_t)sap->atime.atime.nseconds;
4552                 vap->va_mask |= AT_ATIME;
4553         } else if (sap->atime.set_it == SET_TO_SERVER_TIME) {
4554                 gethrestime(&vap->va_atime);
4555                 vap->va_mask |= AT_ATIME;
4556         }
4557         if (sap->mtime.set_it == SET_TO_CLIENT_TIME) {
4558 #ifndef _LP64
4559                 /* check time validity */
4560                 if (!NFS3_TIME_OK(sap->mtime.mtime.seconds))
4561                         return (EOVERFLOW);
4562 #endif
4563                 /*
4564                  * nfs protocol defines times as unsigned so don't extend sign,
4565                  * unless sysadmin set nfs_allow_preepoch_time.
4566                  */
4567                 NFS_TIME_T_CONVERT(vap->va_mtime.tv_sec,
4568                     sap->mtime.mtime.seconds);
4569                 vap->va_mtime.tv_nsec = (uint32_t)sap->mtime.mtime.nseconds;
4570                 vap->va_mask |= AT_MTIME;
4571         } else if (sap->mtime.set_it == SET_TO_SERVER_TIME) {
4572                 gethrestime(&vap->va_mtime);
4573                 vap->va_mask |= AT_MTIME;
4574         }
4575 
4576         return (0);
4577 }
4578 
4579 static ftype3 vt_to_nf3[] = {
4580         0, NF3REG, NF3DIR, NF3BLK, NF3CHR, NF3LNK, NF3FIFO, 0, 0, NF3SOCK, 0
4581 };
4582 
4583 static int
4584 vattr_to_fattr3(struct vattr *vap, fattr3 *fap)
4585 {
4586 
4587         ASSERT(vap->va_type >= VNON && vap->va_type <= VBAD);
4588         /* Return error if time or size overflow */
4589         if (! (NFS_VAP_TIME_OK(vap) && NFS3_SIZE_OK(vap->va_size))) {
4590                 return (EOVERFLOW);
4591         }
4592         fap->type = vt_to_nf3[vap->va_type];
4593         fap->mode = (mode3)(vap->va_mode & MODEMASK);
4594         fap->nlink = (uint32)vap->va_nlink;
4595         if (vap->va_uid == UID_NOBODY)
4596                 fap->uid = (uid3)NFS_UID_NOBODY;
4597         else
4598                 fap->uid = (uid3)vap->va_uid;
4599         if (vap->va_gid == GID_NOBODY)
4600                 fap->gid = (gid3)NFS_GID_NOBODY;
4601         else
4602                 fap->gid = (gid3)vap->va_gid;
4603         fap->size = (size3)vap->va_size;
4604         fap->used = (size3)DEV_BSIZE * (size3)vap->va_nblocks;
4605         fap->rdev.specdata1 = (uint32)getmajor(vap->va_rdev);
4606         fap->rdev.specdata2 = (uint32)getminor(vap->va_rdev);
4607         fap->fsid = (uint64)vap->va_fsid;
4608         fap->fileid = (fileid3)vap->va_nodeid;
4609         fap->atime.seconds = vap->va_atime.tv_sec;
4610         fap->atime.nseconds = vap->va_atime.tv_nsec;
4611         fap->mtime.seconds = vap->va_mtime.tv_sec;
4612         fap->mtime.nseconds = vap->va_mtime.tv_nsec;
4613         fap->ctime.seconds = vap->va_ctime.tv_sec;
4614         fap->ctime.nseconds = vap->va_ctime.tv_nsec;
4615         return (0);
4616 }
4617 
4618 static int
4619 vattr_to_wcc_attr(struct vattr *vap, wcc_attr *wccap)
4620 {
4621 
4622         /* Return error if time or size overflow */
4623         if (!(NFS_TIME_T_OK(vap->va_mtime.tv_sec) &&
4624             NFS_TIME_T_OK(vap->va_ctime.tv_sec) &&
4625             NFS3_SIZE_OK(vap->va_size))) {
4626                 return (EOVERFLOW);
4627         }
4628         wccap->size = (size3)vap->va_size;
4629         wccap->mtime.seconds = vap->va_mtime.tv_sec;
4630         wccap->mtime.nseconds = vap->va_mtime.tv_nsec;
4631         wccap->ctime.seconds = vap->va_ctime.tv_sec;
4632         wccap->ctime.nseconds = vap->va_ctime.tv_nsec;
4633         return (0);
4634 }
4635 
4636 static void
4637 vattr_to_pre_op_attr(struct vattr *vap, pre_op_attr *poap)
4638 {
4639 
4640         /* don't return attrs if time overflow */
4641         if ((vap != NULL) && !vattr_to_wcc_attr(vap, &poap->attr)) {
4642                 poap->attributes = TRUE;
4643         } else
4644                 poap->attributes = FALSE;
4645 }
4646 
4647 void
4648 vattr_to_post_op_attr(struct vattr *vap, post_op_attr *poap)
4649 {
4650 
4651         /* don't return attrs if time overflow */
4652         if ((vap != NULL) && !vattr_to_fattr3(vap, &poap->attr)) {
4653                 poap->attributes = TRUE;
4654         } else
4655                 poap->attributes = FALSE;
4656 }
4657 
4658 static void
4659 vattr_to_wcc_data(struct vattr *bvap, struct vattr *avap, wcc_data *wccp)
4660 {
4661 
4662         vattr_to_pre_op_attr(bvap, &wccp->before);
4663         vattr_to_post_op_attr(avap, &wccp->after);
4664 }
4665 
4666 void
4667 rfs3_srvrinit(void)
4668 {
4669         struct rfs3_verf_overlay {
4670                 uint_t id; /* a "unique" identifier */
4671                 int ts; /* a unique timestamp */
4672         } *verfp;
4673         timestruc_t now;
4674 
4675         /*
4676          * The following algorithm attempts to find a unique verifier
4677          * to be used as the write verifier returned from the server
4678          * to the client.  It is important that this verifier change
4679          * whenever the server reboots.  Of secondary importance, it
4680          * is important for the verifier to be unique between two
4681          * different servers.
4682          *
4683          * Thus, an attempt is made to use the system hostid and the
4684          * current time in seconds when the nfssrv kernel module is
4685          * loaded.  It is assumed that an NFS server will not be able
4686          * to boot and then to reboot in less than a second.  If the
4687          * hostid has not been set, then the current high resolution
4688          * time is used.  This will ensure different verifiers each
4689          * time the server reboots and minimize the chances that two
4690          * different servers will have the same verifier.
4691          */
4692 
4693 #ifndef lint
4694         /*
4695          * We ASSERT that this constant logic expression is
4696          * always true because in the past, it wasn't.
4697          */
4698         ASSERT(sizeof (*verfp) <= sizeof (write3verf));
4699 #endif
4700 
4701         gethrestime(&now);
4702         verfp = (struct rfs3_verf_overlay *)&write3verf;
4703         verfp->ts = (int)now.tv_sec;
4704         verfp->id = zone_get_hostid(NULL);
4705 
4706         if (verfp->id == 0)
4707                 verfp->id = (uint_t)now.tv_nsec;
4708 
4709         nfs3_srv_caller_id = fs_new_caller_id();
4710 
4711 }
4712 
4713 static int
4714 rdma_setup_read_data3(READ3args *args, READ3resok *rok)
4715 {
4716         struct clist    *wcl;
4717         int             wlist_len;
4718         count3          count = rok->count;
4719 
4720         wcl = args->wlist;
4721         if (rdma_setup_read_chunks(wcl, count, &wlist_len) == FALSE) {
4722                 return (FALSE);
4723         }
4724 
4725         wcl = args->wlist;
4726         rok->wlist_len = wlist_len;
4727         rok->wlist = wcl;
4728         return (TRUE);
4729 }
4730 
4731 void
4732 rfs3_srvrfini(void)
4733 {
4734         /* Nothing to do */
4735 }