Print this page
    
7378 exported_lock held during nfs4 compound processing
    
      
        | Split | 
	Close | 
      
      | Expand all | 
      | Collapse all | 
    
    
          --- old/usr/src/uts/common/fs/nfs/nfs3_srv.c
          +++ new/usr/src/uts/common/fs/nfs/nfs3_srv.c
   1    1  /*
   2    2   * CDDL HEADER START
   3    3   *
   4    4   * The contents of this file are subject to the terms of the
   5    5   * Common Development and Distribution License (the "License").
   6    6   * You may not use this file except in compliance with the License.
   7    7   *
   8    8   * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9    9   * or http://www.opensolaris.org/os/licensing.
  10   10   * See the License for the specific language governing permissions
  11   11   * and limitations under the License.
  12   12   *
  13   13   * When distributing Covered Code, include this CDDL HEADER in each
  14   14   * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15   15   * If applicable, add the following below this CDDL HEADER, with the
  16   16   * fields enclosed by brackets "[]" replaced with your own identifying
  17   17   * information: Portions Copyright [yyyy] [name of copyright owner]
  18   18   *
  19   19   * CDDL HEADER END
  20   20   */
  21   21  
  22   22  /*
  23   23   * Copyright 2015 Nexenta Systems, Inc.  All rights reserved.
  24   24   * Copyright (c) 1994, 2010, Oracle and/or its affiliates. All rights reserved.
  25   25   * Copyright (c) 2013 by Delphix. All rights reserved.
  26   26   */
  27   27  
  28   28  /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */
  29   29  /* All Rights Reserved */
  30   30  
  31   31  #include <sys/param.h>
  32   32  #include <sys/types.h>
  33   33  #include <sys/systm.h>
  34   34  #include <sys/cred.h>
  35   35  #include <sys/buf.h>
  36   36  #include <sys/vfs.h>
  37   37  #include <sys/vnode.h>
  38   38  #include <sys/uio.h>
  39   39  #include <sys/errno.h>
  40   40  #include <sys/sysmacros.h>
  41   41  #include <sys/statvfs.h>
  42   42  #include <sys/kmem.h>
  43   43  #include <sys/dirent.h>
  44   44  #include <sys/cmn_err.h>
  45   45  #include <sys/debug.h>
  46   46  #include <sys/systeminfo.h>
  47   47  #include <sys/flock.h>
  48   48  #include <sys/nbmlock.h>
  49   49  #include <sys/policy.h>
  50   50  #include <sys/sdt.h>
  51   51  
  52   52  #include <rpc/types.h>
  53   53  #include <rpc/auth.h>
  54   54  #include <rpc/svc.h>
  55   55  #include <rpc/rpc_rdma.h>
  56   56  
  57   57  #include <nfs/nfs.h>
  58   58  #include <nfs/export.h>
  59   59  #include <nfs/nfs_cmd.h>
  60   60  
  61   61  #include <sys/strsubr.h>
  62   62  #include <sys/tsol/label.h>
  63   63  #include <sys/tsol/tndb.h>
  64   64  
  65   65  #include <sys/zone.h>
  66   66  
  67   67  #include <inet/ip.h>
  68   68  #include <inet/ip6.h>
  69   69  
  70   70  /*
  71   71   * These are the interface routines for the server side of the
  72   72   * Network File System.  See the NFS version 3 protocol specification
  73   73   * for a description of this interface.
  74   74   */
  75   75  
  76   76  static writeverf3 write3verf;
  77   77  
  78   78  static int      sattr3_to_vattr(sattr3 *, struct vattr *);
  79   79  static int      vattr_to_fattr3(struct vattr *, fattr3 *);
  80   80  static int      vattr_to_wcc_attr(struct vattr *, wcc_attr *);
  81   81  static void     vattr_to_pre_op_attr(struct vattr *, pre_op_attr *);
  82   82  static void     vattr_to_wcc_data(struct vattr *, struct vattr *, wcc_data *);
  83   83  static int      rdma_setup_read_data3(READ3args *, READ3resok *);
  84   84  
  85   85  extern int nfs_loaned_buffers;
  86   86  
  87   87  u_longlong_t nfs3_srv_caller_id;
  88   88  
  89   89  /* ARGSUSED */
  90   90  void
  91   91  rfs3_getattr(GETATTR3args *args, GETATTR3res *resp, struct exportinfo *exi,
  92   92      struct svc_req *req, cred_t *cr, bool_t ro)
  93   93  {
  94   94          int error;
  95   95          vnode_t *vp;
  96   96          struct vattr va;
  97   97  
  98   98          vp = nfs3_fhtovp(&args->object, exi);
  99   99  
 100  100          DTRACE_NFSV3_4(op__getattr__start, struct svc_req *, req,
 101  101              cred_t *, cr, vnode_t *, vp, GETATTR3args *, args);
 102  102  
 103  103          if (vp == NULL) {
 104  104                  error = ESTALE;
 105  105                  goto out;
 106  106          }
 107  107  
 108  108          va.va_mask = AT_ALL;
 109  109          error = rfs4_delegated_getattr(vp, &va, 0, cr);
 110  110  
 111  111          if (!error) {
 112  112                  /* Lie about the object type for a referral */
 113  113                  if (vn_is_nfs_reparse(vp, cr))
 114  114                          va.va_type = VLNK;
 115  115  
 116  116                  /* overflow error if time or size is out of range */
 117  117                  error = vattr_to_fattr3(&va, &resp->resok.obj_attributes);
 118  118                  if (error)
 119  119                          goto out;
 120  120                  resp->status = NFS3_OK;
 121  121  
 122  122                  DTRACE_NFSV3_4(op__getattr__done, struct svc_req *, req,
 123  123                      cred_t *, cr, vnode_t *, vp, GETATTR3res *, resp);
 124  124  
 125  125                  VN_RELE(vp);
 126  126  
 127  127                  return;
 128  128          }
 129  129  
 130  130  out:
 131  131          if (curthread->t_flag & T_WOULDBLOCK) {
 132  132                  curthread->t_flag &= ~T_WOULDBLOCK;
 133  133                  resp->status = NFS3ERR_JUKEBOX;
 134  134          } else
 135  135                  resp->status = puterrno3(error);
 136  136  
 137  137          DTRACE_NFSV3_4(op__getattr__done, struct svc_req *, req,
 138  138              cred_t *, cr, vnode_t *, vp, GETATTR3res *, resp);
 139  139  
 140  140          if (vp != NULL)
 141  141                  VN_RELE(vp);
 142  142  }
 143  143  
 144  144  void *
 145  145  rfs3_getattr_getfh(GETATTR3args *args)
 146  146  {
 147  147  
 148  148          return (&args->object);
 149  149  }
 150  150  
 151  151  void
 152  152  rfs3_setattr(SETATTR3args *args, SETATTR3res *resp, struct exportinfo *exi,
 153  153      struct svc_req *req, cred_t *cr, bool_t ro)
 154  154  {
 155  155          int error;
 156  156          vnode_t *vp;
 157  157          struct vattr *bvap;
 158  158          struct vattr bva;
 159  159          struct vattr *avap;
 160  160          struct vattr ava;
 161  161          int flag;
 162  162          int in_crit = 0;
 163  163          struct flock64 bf;
 164  164          caller_context_t ct;
 165  165  
 166  166          bvap = NULL;
 167  167          avap = NULL;
 168  168  
 169  169          vp = nfs3_fhtovp(&args->object, exi);
 170  170  
 171  171          DTRACE_NFSV3_4(op__setattr__start, struct svc_req *, req,
 172  172              cred_t *, cr, vnode_t *, vp, SETATTR3args *, args);
 173  173  
 174  174          if (vp == NULL) {
 175  175                  error = ESTALE;
 176  176                  goto out;
 177  177          }
 178  178  
 179  179          error = sattr3_to_vattr(&args->new_attributes, &ava);
 180  180          if (error)
 181  181                  goto out;
 182  182  
 183  183          if (is_system_labeled()) {
 184  184                  bslabel_t *clabel = req->rq_label;
 185  185  
 186  186                  ASSERT(clabel != NULL);
 187  187                  DTRACE_PROBE2(tx__rfs3__log__info__opsetattr__clabel, char *,
 188  188                      "got client label from request(1)", struct svc_req *, req);
 189  189  
 190  190                  if (!blequal(&l_admin_low->tsl_label, clabel)) {
 191  191                          if (!do_rfs_label_check(clabel, vp, EQUALITY_CHECK,
 192  192                              exi)) {
 193  193                                  resp->status = NFS3ERR_ACCES;
 194  194                                  goto out1;
 195  195                          }
 196  196                  }
 197  197          }
 198  198  
 199  199          /*
 200  200           * We need to specially handle size changes because of
 201  201           * possible conflicting NBMAND locks. Get into critical
 202  202           * region before VOP_GETATTR, so the size attribute is
 203  203           * valid when checking conflicts.
 204  204           *
 205  205           * Also, check to see if the v4 side of the server has
 206  206           * delegated this file.  If so, then we return JUKEBOX to
 207  207           * allow the client to retrasmit its request.
 208  208           */
 209  209          if (vp->v_type == VREG && (ava.va_mask & AT_SIZE)) {
 210  210                  if (nbl_need_check(vp)) {
 211  211                          nbl_start_crit(vp, RW_READER);
 212  212                          in_crit = 1;
 213  213                  }
 214  214          }
 215  215  
 216  216          bva.va_mask = AT_ALL;
 217  217          error = rfs4_delegated_getattr(vp, &bva, 0, cr);
 218  218  
 219  219          /*
 220  220           * If we can't get the attributes, then we can't do the
 221  221           * right access checking.  So, we'll fail the request.
 222  222           */
 223  223          if (error)
 224  224                  goto out;
 225  225  
 226  226          bvap = &bva;
 227  227  
 228  228          if (rdonly(ro, vp)) {
 229  229                  resp->status = NFS3ERR_ROFS;
 230  230                  goto out1;
 231  231          }
 232  232  
 233  233          if (args->guard.check &&
 234  234              (args->guard.obj_ctime.seconds != bva.va_ctime.tv_sec ||
 235  235              args->guard.obj_ctime.nseconds != bva.va_ctime.tv_nsec)) {
 236  236                  resp->status = NFS3ERR_NOT_SYNC;
 237  237                  goto out1;
 238  238          }
 239  239  
 240  240          if (args->new_attributes.mtime.set_it == SET_TO_CLIENT_TIME)
 241  241                  flag = ATTR_UTIME;
 242  242          else
 243  243                  flag = 0;
 244  244  
 245  245          /*
 246  246           * If the filesystem is exported with nosuid, then mask off
 247  247           * the setuid and setgid bits.
 248  248           */
 249  249          if ((ava.va_mask & AT_MODE) && vp->v_type == VREG &&
 250  250              (exi->exi_export.ex_flags & EX_NOSUID))
 251  251                  ava.va_mode &= ~(VSUID | VSGID);
 252  252  
 253  253          ct.cc_sysid = 0;
 254  254          ct.cc_pid = 0;
 255  255          ct.cc_caller_id = nfs3_srv_caller_id;
 256  256          ct.cc_flags = CC_DONTBLOCK;
 257  257  
 258  258          /*
 259  259           * We need to specially handle size changes because it is
 260  260           * possible for the client to create a file with modes
 261  261           * which indicate read-only, but with the file opened for
 262  262           * writing.  If the client then tries to set the size of
 263  263           * the file, then the normal access checking done in
 264  264           * VOP_SETATTR would prevent the client from doing so,
 265  265           * although it should be legal for it to do so.  To get
 266  266           * around this, we do the access checking for ourselves
 267  267           * and then use VOP_SPACE which doesn't do the access
 268  268           * checking which VOP_SETATTR does. VOP_SPACE can only
 269  269           * operate on VREG files, let VOP_SETATTR handle the other
 270  270           * extremely rare cases.
 271  271           * Also the client should not be allowed to change the
 272  272           * size of the file if there is a conflicting non-blocking
 273  273           * mandatory lock in the region the change.
 274  274           */
 275  275          if (vp->v_type == VREG && (ava.va_mask & AT_SIZE)) {
 276  276                  if (in_crit) {
 277  277                          u_offset_t offset;
 278  278                          ssize_t length;
 279  279  
 280  280                          if (ava.va_size < bva.va_size) {
 281  281                                  offset = ava.va_size;
 282  282                                  length = bva.va_size - ava.va_size;
 283  283                          } else {
 284  284                                  offset = bva.va_size;
 285  285                                  length = ava.va_size - bva.va_size;
 286  286                          }
 287  287                          if (nbl_conflict(vp, NBL_WRITE, offset, length, 0,
 288  288                              NULL)) {
 289  289                                  error = EACCES;
 290  290                                  goto out;
 291  291                          }
 292  292                  }
 293  293  
 294  294                  if (crgetuid(cr) == bva.va_uid && ava.va_size != bva.va_size) {
 295  295                          ava.va_mask &= ~AT_SIZE;
 296  296                          bf.l_type = F_WRLCK;
 297  297                          bf.l_whence = 0;
 298  298                          bf.l_start = (off64_t)ava.va_size;
 299  299                          bf.l_len = 0;
 300  300                          bf.l_sysid = 0;
 301  301                          bf.l_pid = 0;
 302  302                          error = VOP_SPACE(vp, F_FREESP, &bf, FWRITE,
 303  303                              (offset_t)ava.va_size, cr, &ct);
 304  304                  }
 305  305          }
 306  306  
 307  307          if (!error && ava.va_mask)
 308  308                  error = VOP_SETATTR(vp, &ava, flag, cr, &ct);
 309  309  
 310  310          /* check if a monitor detected a delegation conflict */
 311  311          if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
 312  312                  resp->status = NFS3ERR_JUKEBOX;
 313  313                  goto out1;
 314  314          }
 315  315  
 316  316          ava.va_mask = AT_ALL;
 317  317          avap = rfs4_delegated_getattr(vp, &ava, 0, cr) ? NULL : &ava;
 318  318  
 319  319          /*
 320  320           * Force modified metadata out to stable storage.
 321  321           */
 322  322          (void) VOP_FSYNC(vp, FNODSYNC, cr, &ct);
 323  323  
 324  324          if (error)
 325  325                  goto out;
 326  326  
 327  327          if (in_crit)
 328  328                  nbl_end_crit(vp);
 329  329  
 330  330          resp->status = NFS3_OK;
 331  331          vattr_to_wcc_data(bvap, avap, &resp->resok.obj_wcc);
 332  332  
 333  333          DTRACE_NFSV3_4(op__setattr__done, struct svc_req *, req,
 334  334              cred_t *, cr, vnode_t *, vp, SETATTR3res *, resp);
 335  335  
 336  336          VN_RELE(vp);
 337  337  
 338  338          return;
 339  339  
 340  340  out:
 341  341          if (curthread->t_flag & T_WOULDBLOCK) {
 342  342                  curthread->t_flag &= ~T_WOULDBLOCK;
 343  343                  resp->status = NFS3ERR_JUKEBOX;
 344  344          } else
 345  345                  resp->status = puterrno3(error);
 346  346  out1:
 347  347          DTRACE_NFSV3_4(op__setattr__done, struct svc_req *, req,
 348  348              cred_t *, cr, vnode_t *, vp, SETATTR3res *, resp);
 349  349  
 350  350          if (vp != NULL) {
 351  351                  if (in_crit)
 352  352                          nbl_end_crit(vp);
 353  353                  VN_RELE(vp);
 354  354          }
 355  355          vattr_to_wcc_data(bvap, avap, &resp->resfail.obj_wcc);
 356  356  }
 357  357  
 358  358  void *
 359  359  rfs3_setattr_getfh(SETATTR3args *args)
 360  360  {
 361  361  
 362  362          return (&args->object);
 363  363  }
 364  364  
 365  365  /* ARGSUSED */
 366  366  void
 367  367  rfs3_lookup(LOOKUP3args *args, LOOKUP3res *resp, struct exportinfo *exi,
 368  368      struct svc_req *req, cred_t *cr, bool_t ro)
 369  369  {
 370  370          int error;
 371  371          vnode_t *vp;
 372  372          vnode_t *dvp;
 373  373          struct vattr *vap;
 374  374          struct vattr va;
 375  375          struct vattr *dvap;
 376  376          struct vattr dva;
 377  377          nfs_fh3 *fhp;
 378  378          struct sec_ol sec = {0, 0};
 379  379          bool_t publicfh_flag = FALSE, auth_weak = FALSE;
 380  380          struct sockaddr *ca;
 381  381          char *name = NULL;
 382  382  
 383  383          dvap = NULL;
 384  384  
 385  385          /*
 386  386           * Allow lookups from the root - the default
 387  387           * location of the public filehandle.
 388  388           */
 389  389          if (exi != NULL && (exi->exi_export.ex_flags & EX_PUBLIC)) {
 390  390                  dvp = rootdir;
 391  391                  VN_HOLD(dvp);
 392  392  
 393  393                  DTRACE_NFSV3_4(op__lookup__start, struct svc_req *, req,
 394  394                      cred_t *, cr, vnode_t *, dvp, LOOKUP3args *, args);
 395  395          } else {
 396  396                  dvp = nfs3_fhtovp(&args->what.dir, exi);
 397  397  
 398  398                  DTRACE_NFSV3_4(op__lookup__start, struct svc_req *, req,
 399  399                      cred_t *, cr, vnode_t *, dvp, LOOKUP3args *, args);
 400  400  
 401  401                  if (dvp == NULL) {
 402  402                          error = ESTALE;
 403  403                          goto out;
 404  404                  }
 405  405          }
 406  406  
 407  407          dva.va_mask = AT_ALL;
 408  408          dvap = VOP_GETATTR(dvp, &dva, 0, cr, NULL) ? NULL : &dva;
 409  409  
 410  410          if (args->what.name == nfs3nametoolong) {
 411  411                  resp->status = NFS3ERR_NAMETOOLONG;
 412  412                  goto out1;
 413  413          }
 414  414  
 415  415          if (args->what.name == NULL || *(args->what.name) == '\0') {
 416  416                  resp->status = NFS3ERR_ACCES;
 417  417                  goto out1;
 418  418          }
 419  419  
 420  420          fhp = &args->what.dir;
 421  421          if (strcmp(args->what.name, "..") == 0 &&
 422  422              EQFID(&exi->exi_fid, FH3TOFIDP(fhp))) {
 423  423                  resp->status = NFS3ERR_NOENT;
 424  424                  goto out1;
 425  425          }
 426  426  
 427  427          ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
 428  428          name = nfscmd_convname(ca, exi, args->what.name,
 429  429              NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
 430  430  
 431  431          if (name == NULL) {
 432  432                  resp->status = NFS3ERR_ACCES;
 433  433                  goto out1;
 434  434          }
 435  435  
 436  436          /*
 437  437           * If the public filehandle is used then allow
 438  438           * a multi-component lookup
 439  439           */
 440  440          if (PUBLIC_FH3(&args->what.dir)) {
 441  441                  publicfh_flag = TRUE;
 442  442                  error = rfs_publicfh_mclookup(name, dvp, cr, &vp,
 443  443                      &exi, &sec);
 444  444                  if (error && exi != NULL)
 445  445                          exi_rele(exi); /* See comment below Re: publicfh_flag */
 446  446                  /*
 447  447                   * Since WebNFS may bypass MOUNT, we need to ensure this
 448  448                   * request didn't come from an unlabeled admin_low client.
 449  449                   */
 450  450                  if (is_system_labeled() && error == 0) {
 451  451                          int             addr_type;
 452  452                          void            *ipaddr;
 453  453                          tsol_tpc_t      *tp;
 454  454  
 455  455                          if (ca->sa_family == AF_INET) {
 456  456                                  addr_type = IPV4_VERSION;
 457  457                                  ipaddr = &((struct sockaddr_in *)ca)->sin_addr;
 458  458                          } else if (ca->sa_family == AF_INET6) {
 459  459                                  addr_type = IPV6_VERSION;
 460  460                                  ipaddr = &((struct sockaddr_in6 *)
 461  461                                      ca)->sin6_addr;
 462  462                          }
 463  463                          tp = find_tpc(ipaddr, addr_type, B_FALSE);
 464  464                          if (tp == NULL || tp->tpc_tp.tp_doi !=
 465  465                              l_admin_low->tsl_doi || tp->tpc_tp.host_type !=
 466  466                              SUN_CIPSO) {
 467  467                                  if (exi != NULL)
 468  468                                          exi_rele(exi);
 469  469                                  VN_RELE(vp);
 470  470                                  error = EACCES;
 471  471                          }
 472  472                          if (tp != NULL)
 473  473                                  TPC_RELE(tp);
 474  474                  }
 475  475          } else {
 476  476                  error = VOP_LOOKUP(dvp, name, &vp,
 477  477                      NULL, 0, NULL, cr, NULL, NULL, NULL);
 478  478          }
 479  479  
 480  480          if (name != args->what.name)
 481  481                  kmem_free(name, MAXPATHLEN + 1);
 482  482  
 483  483          if (is_system_labeled() && error == 0) {
 484  484                  bslabel_t *clabel = req->rq_label;
 485  485  
 486  486                  ASSERT(clabel != NULL);
 487  487                  DTRACE_PROBE2(tx__rfs3__log__info__oplookup__clabel, char *,
 488  488                      "got client label from request(1)", struct svc_req *, req);
 489  489  
 490  490                  if (!blequal(&l_admin_low->tsl_label, clabel)) {
 491  491                          if (!do_rfs_label_check(clabel, dvp,
 492  492                              DOMINANCE_CHECK, exi)) {
 493  493                                  if (publicfh_flag && exi != NULL)
 494  494                                          exi_rele(exi);
 495  495                                  VN_RELE(vp);
 496  496                                  error = EACCES;
 497  497                          }
 498  498                  }
 499  499          }
 500  500  
 501  501          dva.va_mask = AT_ALL;
 502  502          dvap = VOP_GETATTR(dvp, &dva, 0, cr, NULL) ? NULL : &dva;
 503  503  
 504  504          if (error)
 505  505                  goto out;
 506  506  
 507  507          if (sec.sec_flags & SEC_QUERY) {
 508  508                  error = makefh3_ol(&resp->resok.object, exi, sec.sec_index);
 509  509          } else {
 510  510                  error = makefh3(&resp->resok.object, vp, exi);
 511  511                  if (!error && publicfh_flag && !chk_clnt_sec(exi, req))
 512  512                          auth_weak = TRUE;
 513  513          }
 514  514  
 515  515          /*
 516  516           * If publicfh_flag is true then we have called rfs_publicfh_mclookup
 517  517           * and have obtained a new exportinfo in exi which needs to be
 518  518           * released. Note that the original exportinfo pointed to by exi
 519  519           * will be released by the caller, common_dispatch.
 520  520           */
 521  521          if (publicfh_flag)
 522  522                  exi_rele(exi);
 523  523  
 524  524          if (error) {
 525  525                  VN_RELE(vp);
 526  526                  goto out;
 527  527          }
 528  528  
 529  529          va.va_mask = AT_ALL;
 530  530          vap = rfs4_delegated_getattr(vp, &va, 0, cr) ? NULL : &va;
 531  531  
 532  532          VN_RELE(vp);
 533  533  
 534  534          resp->status = NFS3_OK;
 535  535          vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
 536  536          vattr_to_post_op_attr(dvap, &resp->resok.dir_attributes);
 537  537  
 538  538          /*
 539  539           * If it's public fh, no 0x81, and client's flavor is
 540  540           * invalid, set WebNFS status to WNFSERR_CLNT_FLAVOR now.
 541  541           * Then set RPC status to AUTH_TOOWEAK in common_dispatch.
 542  542           */
 543  543          if (auth_weak)
 544  544                  resp->status = (enum nfsstat3)WNFSERR_CLNT_FLAVOR;
 545  545  
 546  546          DTRACE_NFSV3_4(op__lookup__done, struct svc_req *, req,
 547  547              cred_t *, cr, vnode_t *, dvp, LOOKUP3res *, resp);
 548  548          VN_RELE(dvp);
 549  549  
 550  550          return;
 551  551  
 552  552  out:
 553  553          if (curthread->t_flag & T_WOULDBLOCK) {
 554  554                  curthread->t_flag &= ~T_WOULDBLOCK;
 555  555                  resp->status = NFS3ERR_JUKEBOX;
 556  556          } else
 557  557                  resp->status = puterrno3(error);
 558  558  out1:
 559  559          DTRACE_NFSV3_4(op__lookup__done, struct svc_req *, req,
 560  560              cred_t *, cr, vnode_t *, dvp, LOOKUP3res *, resp);
 561  561  
 562  562          if (dvp != NULL)
 563  563                  VN_RELE(dvp);
 564  564          vattr_to_post_op_attr(dvap, &resp->resfail.dir_attributes);
 565  565  
 566  566  }
 567  567  
 568  568  void *
 569  569  rfs3_lookup_getfh(LOOKUP3args *args)
 570  570  {
 571  571  
 572  572          return (&args->what.dir);
 573  573  }
 574  574  
 575  575  /* ARGSUSED */
 576  576  void
 577  577  rfs3_access(ACCESS3args *args, ACCESS3res *resp, struct exportinfo *exi,
 578  578      struct svc_req *req, cred_t *cr, bool_t ro)
 579  579  {
 580  580          int error;
 581  581          vnode_t *vp;
 582  582          struct vattr *vap;
 583  583          struct vattr va;
 584  584          int checkwriteperm;
 585  585          boolean_t dominant_label = B_FALSE;
 586  586          boolean_t equal_label = B_FALSE;
 587  587          boolean_t admin_low_client;
 588  588  
 589  589          vap = NULL;
 590  590  
 591  591          vp = nfs3_fhtovp(&args->object, exi);
 592  592  
 593  593          DTRACE_NFSV3_4(op__access__start, struct svc_req *, req,
 594  594              cred_t *, cr, vnode_t *, vp, ACCESS3args *, args);
 595  595  
 596  596          if (vp == NULL) {
 597  597                  error = ESTALE;
 598  598                  goto out;
 599  599          }
 600  600  
 601  601          /*
 602  602           * If the file system is exported read only, it is not appropriate
 603  603           * to check write permissions for regular files and directories.
 604  604           * Special files are interpreted by the client, so the underlying
 605  605           * permissions are sent back to the client for interpretation.
 606  606           */
 607  607          if (rdonly(ro, vp) && (vp->v_type == VREG || vp->v_type == VDIR))
 608  608                  checkwriteperm = 0;
 609  609          else
 610  610                  checkwriteperm = 1;
 611  611  
 612  612          /*
 613  613           * We need the mode so that we can correctly determine access
 614  614           * permissions relative to a mandatory lock file.  Access to
 615  615           * mandatory lock files is denied on the server, so it might
 616  616           * as well be reflected to the server during the open.
 617  617           */
 618  618          va.va_mask = AT_MODE;
 619  619          error = VOP_GETATTR(vp, &va, 0, cr, NULL);
 620  620          if (error)
 621  621                  goto out;
 622  622  
 623  623          vap = &va;
 624  624  
 625  625          resp->resok.access = 0;
 626  626  
 627  627          if (is_system_labeled()) {
 628  628                  bslabel_t *clabel = req->rq_label;
 629  629  
 630  630                  ASSERT(clabel != NULL);
 631  631                  DTRACE_PROBE2(tx__rfs3__log__info__opaccess__clabel, char *,
 632  632                      "got client label from request(1)", struct svc_req *, req);
 633  633  
 634  634                  if (!blequal(&l_admin_low->tsl_label, clabel)) {
 635  635                          if ((equal_label = do_rfs_label_check(clabel, vp,
 636  636                              EQUALITY_CHECK, exi)) == B_FALSE) {
 637  637                                  dominant_label = do_rfs_label_check(clabel,
 638  638                                      vp, DOMINANCE_CHECK, exi);
 639  639                          } else
 640  640                                  dominant_label = B_TRUE;
 641  641                          admin_low_client = B_FALSE;
 642  642                  } else
 643  643                          admin_low_client = B_TRUE;
 644  644          }
 645  645  
 646  646          if (args->access & ACCESS3_READ) {
 647  647                  error = VOP_ACCESS(vp, VREAD, 0, cr, NULL);
 648  648                  if (error) {
 649  649                          if (curthread->t_flag & T_WOULDBLOCK)
 650  650                                  goto out;
 651  651                  } else if (!MANDLOCK(vp, va.va_mode) &&
 652  652                      (!is_system_labeled() || admin_low_client ||
 653  653                      dominant_label))
 654  654                          resp->resok.access |= ACCESS3_READ;
 655  655          }
 656  656          if ((args->access & ACCESS3_LOOKUP) && vp->v_type == VDIR) {
 657  657                  error = VOP_ACCESS(vp, VEXEC, 0, cr, NULL);
 658  658                  if (error) {
 659  659                          if (curthread->t_flag & T_WOULDBLOCK)
 660  660                                  goto out;
 661  661                  } else if (!is_system_labeled() || admin_low_client ||
 662  662                      dominant_label)
 663  663                          resp->resok.access |= ACCESS3_LOOKUP;
 664  664          }
 665  665          if (checkwriteperm &&
 666  666              (args->access & (ACCESS3_MODIFY|ACCESS3_EXTEND))) {
 667  667                  error = VOP_ACCESS(vp, VWRITE, 0, cr, NULL);
 668  668                  if (error) {
 669  669                          if (curthread->t_flag & T_WOULDBLOCK)
 670  670                                  goto out;
 671  671                  } else if (!MANDLOCK(vp, va.va_mode) &&
 672  672                      (!is_system_labeled() || admin_low_client || equal_label)) {
 673  673                          resp->resok.access |=
 674  674                              (args->access & (ACCESS3_MODIFY|ACCESS3_EXTEND));
 675  675                  }
 676  676          }
 677  677          if (checkwriteperm &&
 678  678              (args->access & ACCESS3_DELETE) && vp->v_type == VDIR) {
 679  679                  error = VOP_ACCESS(vp, VWRITE, 0, cr, NULL);
 680  680                  if (error) {
 681  681                          if (curthread->t_flag & T_WOULDBLOCK)
 682  682                                  goto out;
 683  683                  } else if (!is_system_labeled() || admin_low_client ||
 684  684                      equal_label)
 685  685                          resp->resok.access |= ACCESS3_DELETE;
 686  686          }
 687  687          if (args->access & ACCESS3_EXECUTE) {
 688  688                  error = VOP_ACCESS(vp, VEXEC, 0, cr, NULL);
 689  689                  if (error) {
 690  690                          if (curthread->t_flag & T_WOULDBLOCK)
 691  691                                  goto out;
 692  692                  } else if (!MANDLOCK(vp, va.va_mode) &&
 693  693                      (!is_system_labeled() || admin_low_client ||
 694  694                      dominant_label))
 695  695                          resp->resok.access |= ACCESS3_EXECUTE;
 696  696          }
 697  697  
 698  698          va.va_mask = AT_ALL;
 699  699          vap = rfs4_delegated_getattr(vp, &va, 0, cr) ? NULL : &va;
 700  700  
 701  701          resp->status = NFS3_OK;
 702  702          vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
 703  703  
 704  704          DTRACE_NFSV3_4(op__access__done, struct svc_req *, req,
 705  705              cred_t *, cr, vnode_t *, vp, ACCESS3res *, resp);
 706  706  
 707  707          VN_RELE(vp);
 708  708  
 709  709          return;
 710  710  
 711  711  out:
 712  712          if (curthread->t_flag & T_WOULDBLOCK) {
 713  713                  curthread->t_flag &= ~T_WOULDBLOCK;
 714  714                  resp->status = NFS3ERR_JUKEBOX;
 715  715          } else
 716  716                  resp->status = puterrno3(error);
 717  717          DTRACE_NFSV3_4(op__access__done, struct svc_req *, req,
 718  718              cred_t *, cr, vnode_t *, vp, ACCESS3res *, resp);
 719  719          if (vp != NULL)
 720  720                  VN_RELE(vp);
 721  721          vattr_to_post_op_attr(vap, &resp->resfail.obj_attributes);
 722  722  }
 723  723  
 724  724  void *
 725  725  rfs3_access_getfh(ACCESS3args *args)
 726  726  {
 727  727  
 728  728          return (&args->object);
 729  729  }
 730  730  
 731  731  /* ARGSUSED */
 732  732  void
 733  733  rfs3_readlink(READLINK3args *args, READLINK3res *resp, struct exportinfo *exi,
 734  734      struct svc_req *req, cred_t *cr, bool_t ro)
 735  735  {
 736  736          int error;
 737  737          vnode_t *vp;
 738  738          struct vattr *vap;
 739  739          struct vattr va;
 740  740          struct iovec iov;
 741  741          struct uio uio;
 742  742          char *data;
 743  743          struct sockaddr *ca;
 744  744          char *name = NULL;
 745  745          int is_referral = 0;
 746  746  
 747  747          vap = NULL;
 748  748  
 749  749          vp = nfs3_fhtovp(&args->symlink, exi);
 750  750  
 751  751          DTRACE_NFSV3_4(op__readlink__start, struct svc_req *, req,
 752  752              cred_t *, cr, vnode_t *, vp, READLINK3args *, args);
 753  753  
 754  754          if (vp == NULL) {
 755  755                  error = ESTALE;
 756  756                  goto out;
 757  757          }
 758  758  
 759  759          va.va_mask = AT_ALL;
 760  760          error = VOP_GETATTR(vp, &va, 0, cr, NULL);
 761  761          if (error)
 762  762                  goto out;
 763  763  
 764  764          vap = &va;
 765  765  
 766  766          /* We lied about the object type for a referral */
 767  767          if (vn_is_nfs_reparse(vp, cr))
 768  768                  is_referral = 1;
 769  769  
 770  770          if (vp->v_type != VLNK && !is_referral) {
 771  771                  resp->status = NFS3ERR_INVAL;
 772  772                  goto out1;
 773  773          }
 774  774  
 775  775          if (MANDLOCK(vp, va.va_mode)) {
 776  776                  resp->status = NFS3ERR_ACCES;
 777  777                  goto out1;
 778  778          }
 779  779  
 780  780          if (is_system_labeled()) {
 781  781                  bslabel_t *clabel = req->rq_label;
 782  782  
 783  783                  ASSERT(clabel != NULL);
 784  784                  DTRACE_PROBE2(tx__rfs3__log__info__opreadlink__clabel, char *,
 785  785                      "got client label from request(1)", struct svc_req *, req);
 786  786  
 787  787                  if (!blequal(&l_admin_low->tsl_label, clabel)) {
 788  788                          if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
 789  789                              exi)) {
 790  790                                  resp->status = NFS3ERR_ACCES;
 791  791                                  goto out1;
 792  792                          }
 793  793                  }
 794  794          }
 795  795  
 796  796          data = kmem_alloc(MAXPATHLEN + 1, KM_SLEEP);
 797  797  
 798  798          if (is_referral) {
 799  799                  char *s;
 800  800                  size_t strsz;
 801  801  
 802  802                  /* Get an artificial symlink based on a referral */
 803  803                  s = build_symlink(vp, cr, &strsz);
 804  804                  global_svstat_ptr[3][NFS_REFERLINKS].value.ui64++;
 805  805                  DTRACE_PROBE2(nfs3serv__func__referral__reflink,
 806  806                      vnode_t *, vp, char *, s);
 807  807                  if (s == NULL)
 808  808                          error = EINVAL;
 809  809                  else {
 810  810                          error = 0;
 811  811                          (void) strlcpy(data, s, MAXPATHLEN + 1);
 812  812                          kmem_free(s, strsz);
 813  813                  }
 814  814  
 815  815          } else {
 816  816  
 817  817                  iov.iov_base = data;
 818  818                  iov.iov_len = MAXPATHLEN;
 819  819                  uio.uio_iov = &iov;
 820  820                  uio.uio_iovcnt = 1;
 821  821                  uio.uio_segflg = UIO_SYSSPACE;
 822  822                  uio.uio_extflg = UIO_COPY_CACHED;
 823  823                  uio.uio_loffset = 0;
 824  824                  uio.uio_resid = MAXPATHLEN;
 825  825  
 826  826                  error = VOP_READLINK(vp, &uio, cr, NULL);
 827  827  
 828  828                  if (!error)
 829  829                          *(data + MAXPATHLEN - uio.uio_resid) = '\0';
 830  830          }
 831  831  
 832  832          va.va_mask = AT_ALL;
 833  833          vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
 834  834  
 835  835          /* Lie about object type again just to be consistent */
 836  836          if (is_referral && vap != NULL)
 837  837                  vap->va_type = VLNK;
 838  838  
 839  839  #if 0 /* notyet */
 840  840          /*
 841  841           * Don't do this.  It causes local disk writes when just
 842  842           * reading the file and the overhead is deemed larger
 843  843           * than the benefit.
 844  844           */
 845  845          /*
 846  846           * Force modified metadata out to stable storage.
 847  847           */
 848  848          (void) VOP_FSYNC(vp, FNODSYNC, cr, NULL);
 849  849  #endif
 850  850  
 851  851          if (error) {
 852  852                  kmem_free(data, MAXPATHLEN + 1);
 853  853                  goto out;
 854  854          }
 855  855  
 856  856          ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
 857  857          name = nfscmd_convname(ca, exi, data, NFSCMD_CONV_OUTBOUND,
 858  858              MAXPATHLEN + 1);
 859  859  
 860  860          if (name == NULL) {
 861  861                  /*
 862  862                   * Even though the conversion failed, we return
 863  863                   * something. We just don't translate it.
 864  864                   */
 865  865                  name = data;
 866  866          }
 867  867  
 868  868          resp->status = NFS3_OK;
 869  869          vattr_to_post_op_attr(vap, &resp->resok.symlink_attributes);
 870  870          resp->resok.data = name;
 871  871  
 872  872          DTRACE_NFSV3_4(op__readlink__done, struct svc_req *, req,
 873  873              cred_t *, cr, vnode_t *, vp, READLINK3res *, resp);
 874  874          VN_RELE(vp);
 875  875  
 876  876          if (name != data)
 877  877                  kmem_free(data, MAXPATHLEN + 1);
 878  878  
 879  879          return;
 880  880  
 881  881  out:
 882  882          if (curthread->t_flag & T_WOULDBLOCK) {
 883  883                  curthread->t_flag &= ~T_WOULDBLOCK;
 884  884                  resp->status = NFS3ERR_JUKEBOX;
 885  885          } else
 886  886                  resp->status = puterrno3(error);
 887  887  out1:
 888  888          DTRACE_NFSV3_4(op__readlink__done, struct svc_req *, req,
 889  889              cred_t *, cr, vnode_t *, vp, READLINK3res *, resp);
 890  890          if (vp != NULL)
 891  891                  VN_RELE(vp);
 892  892          vattr_to_post_op_attr(vap, &resp->resfail.symlink_attributes);
 893  893  }
 894  894  
 895  895  void *
 896  896  rfs3_readlink_getfh(READLINK3args *args)
 897  897  {
 898  898  
 899  899          return (&args->symlink);
 900  900  }
 901  901  
 902  902  void
 903  903  rfs3_readlink_free(READLINK3res *resp)
 904  904  {
 905  905  
 906  906          if (resp->status == NFS3_OK)
 907  907                  kmem_free(resp->resok.data, MAXPATHLEN + 1);
 908  908  }
 909  909  
 910  910  /*
 911  911   * Server routine to handle read
 912  912   * May handle RDMA data as well as mblks
 913  913   */
 914  914  /* ARGSUSED */
 915  915  void
 916  916  rfs3_read(READ3args *args, READ3res *resp, struct exportinfo *exi,
 917  917      struct svc_req *req, cred_t *cr, bool_t ro)
 918  918  {
 919  919          int error;
 920  920          vnode_t *vp;
 921  921          struct vattr *vap;
 922  922          struct vattr va;
 923  923          struct iovec iov, *iovp = NULL;
 924  924          int iovcnt;
 925  925          struct uio uio;
 926  926          u_offset_t offset;
 927  927          mblk_t *mp = NULL;
 928  928          int in_crit = 0;
 929  929          int need_rwunlock = 0;
 930  930          caller_context_t ct;
 931  931          int rdma_used = 0;
 932  932          int loaned_buffers;
 933  933          struct uio *uiop;
 934  934  
 935  935          vap = NULL;
 936  936  
 937  937          vp = nfs3_fhtovp(&args->file, exi);
 938  938  
 939  939          DTRACE_NFSV3_4(op__read__start, struct svc_req *, req,
 940  940              cred_t *, cr, vnode_t *, vp, READ3args *, args);
 941  941  
 942  942          if (vp == NULL) {
 943  943                  error = ESTALE;
 944  944                  goto out;
 945  945          }
 946  946  
 947  947          if (args->wlist) {
 948  948                  if (args->count > clist_len(args->wlist)) {
 949  949                          error = EINVAL;
 950  950                          goto out;
 951  951                  }
 952  952                  rdma_used = 1;
 953  953          }
 954  954  
 955  955          /* use loaned buffers for TCP */
 956  956          loaned_buffers = (nfs_loaned_buffers && !rdma_used) ? 1 : 0;
 957  957  
 958  958          if (is_system_labeled()) {
 959  959                  bslabel_t *clabel = req->rq_label;
 960  960  
 961  961                  ASSERT(clabel != NULL);
 962  962                  DTRACE_PROBE2(tx__rfs3__log__info__opread__clabel, char *,
 963  963                      "got client label from request(1)", struct svc_req *, req);
 964  964  
 965  965                  if (!blequal(&l_admin_low->tsl_label, clabel)) {
 966  966                          if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
 967  967                              exi)) {
 968  968                                  resp->status = NFS3ERR_ACCES;
 969  969                                  goto out1;
 970  970                          }
 971  971                  }
 972  972          }
 973  973  
 974  974          ct.cc_sysid = 0;
 975  975          ct.cc_pid = 0;
 976  976          ct.cc_caller_id = nfs3_srv_caller_id;
 977  977          ct.cc_flags = CC_DONTBLOCK;
 978  978  
 979  979          /*
 980  980           * Enter the critical region before calling VOP_RWLOCK
 981  981           * to avoid a deadlock with write requests.
 982  982           */
 983  983          if (nbl_need_check(vp)) {
 984  984                  nbl_start_crit(vp, RW_READER);
 985  985                  in_crit = 1;
 986  986                  if (nbl_conflict(vp, NBL_READ, args->offset, args->count, 0,
 987  987                      NULL)) {
 988  988                          error = EACCES;
 989  989                          goto out;
 990  990                  }
 991  991          }
 992  992  
 993  993          error = VOP_RWLOCK(vp, V_WRITELOCK_FALSE, &ct);
 994  994  
 995  995          /* check if a monitor detected a delegation conflict */
 996  996          if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
 997  997                  resp->status = NFS3ERR_JUKEBOX;
 998  998                  goto out1;
 999  999          }
1000 1000  
1001 1001          need_rwunlock = 1;
1002 1002  
1003 1003          va.va_mask = AT_ALL;
1004 1004          error = VOP_GETATTR(vp, &va, 0, cr, &ct);
1005 1005  
1006 1006          /*
1007 1007           * If we can't get the attributes, then we can't do the
1008 1008           * right access checking.  So, we'll fail the request.
1009 1009           */
1010 1010          if (error)
1011 1011                  goto out;
1012 1012  
1013 1013          vap = &va;
1014 1014  
1015 1015          if (vp->v_type != VREG) {
1016 1016                  resp->status = NFS3ERR_INVAL;
1017 1017                  goto out1;
1018 1018          }
1019 1019  
1020 1020          if (crgetuid(cr) != va.va_uid) {
1021 1021                  error = VOP_ACCESS(vp, VREAD, 0, cr, &ct);
1022 1022                  if (error) {
1023 1023                          if (curthread->t_flag & T_WOULDBLOCK)
1024 1024                                  goto out;
1025 1025                          error = VOP_ACCESS(vp, VEXEC, 0, cr, &ct);
1026 1026                          if (error)
1027 1027                                  goto out;
1028 1028                  }
1029 1029          }
1030 1030  
1031 1031          if (MANDLOCK(vp, va.va_mode)) {
1032 1032                  resp->status = NFS3ERR_ACCES;
1033 1033                  goto out1;
1034 1034          }
1035 1035  
1036 1036          offset = args->offset;
1037 1037          if (offset >= va.va_size) {
1038 1038                  VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
1039 1039                  if (in_crit)
1040 1040                          nbl_end_crit(vp);
1041 1041                  resp->status = NFS3_OK;
1042 1042                  vattr_to_post_op_attr(vap, &resp->resok.file_attributes);
1043 1043                  resp->resok.count = 0;
1044 1044                  resp->resok.eof = TRUE;
1045 1045                  resp->resok.data.data_len = 0;
1046 1046                  resp->resok.data.data_val = NULL;
1047 1047                  resp->resok.data.mp = NULL;
1048 1048                  /* RDMA */
1049 1049                  resp->resok.wlist = args->wlist;
1050 1050                  resp->resok.wlist_len = resp->resok.count;
1051 1051                  if (resp->resok.wlist)
1052 1052                          clist_zero_len(resp->resok.wlist);
1053 1053                  goto done;
1054 1054          }
1055 1055  
1056 1056          if (args->count == 0) {
1057 1057                  VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
1058 1058                  if (in_crit)
1059 1059                          nbl_end_crit(vp);
1060 1060                  resp->status = NFS3_OK;
1061 1061                  vattr_to_post_op_attr(vap, &resp->resok.file_attributes);
1062 1062                  resp->resok.count = 0;
1063 1063                  resp->resok.eof = FALSE;
1064 1064                  resp->resok.data.data_len = 0;
1065 1065                  resp->resok.data.data_val = NULL;
1066 1066                  resp->resok.data.mp = NULL;
1067 1067                  /* RDMA */
1068 1068                  resp->resok.wlist = args->wlist;
1069 1069                  resp->resok.wlist_len = resp->resok.count;
1070 1070                  if (resp->resok.wlist)
1071 1071                          clist_zero_len(resp->resok.wlist);
1072 1072                  goto done;
1073 1073          }
1074 1074  
1075 1075          /*
1076 1076           * do not allocate memory more the max. allowed
1077 1077           * transfer size
1078 1078           */
1079 1079          if (args->count > rfs3_tsize(req))
1080 1080                  args->count = rfs3_tsize(req);
1081 1081  
1082 1082          if (loaned_buffers) {
1083 1083                  uiop = (uio_t *)rfs_setup_xuio(vp);
1084 1084                  ASSERT(uiop != NULL);
1085 1085                  uiop->uio_segflg = UIO_SYSSPACE;
1086 1086                  uiop->uio_loffset = args->offset;
1087 1087                  uiop->uio_resid = args->count;
1088 1088  
1089 1089                  /* Jump to do the read if successful */
1090 1090                  if (VOP_REQZCBUF(vp, UIO_READ, (xuio_t *)uiop, cr, &ct) == 0) {
1091 1091                          /*
1092 1092                           * Need to hold the vnode until after VOP_RETZCBUF()
1093 1093                           * is called.
1094 1094                           */
1095 1095                          VN_HOLD(vp);
1096 1096                          goto doio_read;
1097 1097                  }
1098 1098  
1099 1099                  DTRACE_PROBE2(nfss__i__reqzcbuf_failed, int,
1100 1100                      uiop->uio_loffset, int, uiop->uio_resid);
1101 1101  
1102 1102                  uiop->uio_extflg = 0;
1103 1103                  /* failure to setup for zero copy */
1104 1104                  rfs_free_xuio((void *)uiop);
1105 1105                  loaned_buffers = 0;
1106 1106          }
1107 1107  
1108 1108          /*
1109 1109           * If returning data via RDMA Write, then grab the chunk list.
1110 1110           * If we aren't returning READ data w/RDMA_WRITE, then grab
1111 1111           * a mblk.
1112 1112           */
1113 1113          if (rdma_used) {
1114 1114                  (void) rdma_get_wchunk(req, &iov, args->wlist);
1115 1115                  uio.uio_iov = &iov;
1116 1116                  uio.uio_iovcnt = 1;
1117 1117          } else {
1118 1118                  /*
1119 1119                   * mp will contain the data to be sent out in the read reply.
1120 1120                   * For UDP, this will be freed after the reply has been sent
1121 1121                   * out by the driver.  For TCP, it will be freed after the last
1122 1122                   * segment associated with the reply has been ACKed by the
1123 1123                   * client.
1124 1124                   */
1125 1125                  mp = rfs_read_alloc(args->count, &iovp, &iovcnt);
1126 1126                  uio.uio_iov = iovp;
1127 1127                  uio.uio_iovcnt = iovcnt;
1128 1128          }
1129 1129  
1130 1130          uio.uio_segflg = UIO_SYSSPACE;
1131 1131          uio.uio_extflg = UIO_COPY_CACHED;
1132 1132          uio.uio_loffset = args->offset;
1133 1133          uio.uio_resid = args->count;
1134 1134          uiop = &uio;
1135 1135  
1136 1136  doio_read:
1137 1137          error = VOP_READ(vp, uiop, 0, cr, &ct);
1138 1138  
1139 1139          if (error) {
1140 1140                  if (mp)
1141 1141                          freemsg(mp);
1142 1142                  /* check if a monitor detected a delegation conflict */
1143 1143                  if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
1144 1144                          resp->status = NFS3ERR_JUKEBOX;
1145 1145                          goto out1;
1146 1146                  }
1147 1147                  goto out;
1148 1148          }
1149 1149  
1150 1150          /* make mblk using zc buffers */
1151 1151          if (loaned_buffers) {
1152 1152                  mp = uio_to_mblk(uiop);
1153 1153                  ASSERT(mp != NULL);
1154 1154          }
1155 1155  
1156 1156          va.va_mask = AT_ALL;
1157 1157          error = VOP_GETATTR(vp, &va, 0, cr, &ct);
1158 1158  
1159 1159          if (error)
1160 1160                  vap = NULL;
1161 1161          else
1162 1162                  vap = &va;
1163 1163  
1164 1164          VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
1165 1165  
1166 1166          if (in_crit)
1167 1167                  nbl_end_crit(vp);
1168 1168  
1169 1169          resp->status = NFS3_OK;
1170 1170          vattr_to_post_op_attr(vap, &resp->resok.file_attributes);
1171 1171          resp->resok.count = args->count - uiop->uio_resid;
1172 1172          if (!error && offset + resp->resok.count == va.va_size)
1173 1173                  resp->resok.eof = TRUE;
1174 1174          else
1175 1175                  resp->resok.eof = FALSE;
1176 1176          resp->resok.data.data_len = resp->resok.count;
1177 1177  
1178 1178          if (mp)
1179 1179                  rfs_rndup_mblks(mp, resp->resok.count, loaned_buffers);
1180 1180  
1181 1181          resp->resok.data.mp = mp;
1182 1182          resp->resok.size = (uint_t)args->count;
1183 1183  
1184 1184          if (rdma_used) {
1185 1185                  resp->resok.data.data_val = (caddr_t)iov.iov_base;
1186 1186                  if (!rdma_setup_read_data3(args, &(resp->resok))) {
1187 1187                          resp->status = NFS3ERR_INVAL;
1188 1188                  }
1189 1189          } else {
1190 1190                  resp->resok.data.data_val = (caddr_t)mp->b_datap->db_base;
1191 1191                  (resp->resok).wlist = NULL;
1192 1192          }
1193 1193  
1194 1194  done:
1195 1195          DTRACE_NFSV3_4(op__read__done, struct svc_req *, req,
1196 1196              cred_t *, cr, vnode_t *, vp, READ3res *, resp);
1197 1197  
1198 1198          VN_RELE(vp);
1199 1199  
1200 1200          if (iovp != NULL)
1201 1201                  kmem_free(iovp, iovcnt * sizeof (struct iovec));
1202 1202  
1203 1203          return;
1204 1204  
1205 1205  out:
1206 1206          if (curthread->t_flag & T_WOULDBLOCK) {
1207 1207                  curthread->t_flag &= ~T_WOULDBLOCK;
1208 1208                  resp->status = NFS3ERR_JUKEBOX;
1209 1209          } else
1210 1210                  resp->status = puterrno3(error);
1211 1211  out1:
1212 1212          DTRACE_NFSV3_4(op__read__done, struct svc_req *, req,
1213 1213              cred_t *, cr, vnode_t *, vp, READ3res *, resp);
1214 1214  
1215 1215          if (vp != NULL) {
1216 1216                  if (need_rwunlock)
1217 1217                          VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
1218 1218                  if (in_crit)
1219 1219                          nbl_end_crit(vp);
1220 1220                  VN_RELE(vp);
1221 1221          }
1222 1222          vattr_to_post_op_attr(vap, &resp->resfail.file_attributes);
1223 1223  
1224 1224          if (iovp != NULL)
1225 1225                  kmem_free(iovp, iovcnt * sizeof (struct iovec));
1226 1226  }
1227 1227  
1228 1228  void
1229 1229  rfs3_read_free(READ3res *resp)
1230 1230  {
1231 1231          mblk_t *mp;
1232 1232  
1233 1233          if (resp->status == NFS3_OK) {
1234 1234                  mp = resp->resok.data.mp;
1235 1235                  if (mp != NULL)
1236 1236                          freemsg(mp);
1237 1237          }
1238 1238  }
1239 1239  
1240 1240  void *
1241 1241  rfs3_read_getfh(READ3args *args)
1242 1242  {
1243 1243  
1244 1244          return (&args->file);
1245 1245  }
1246 1246  
1247 1247  #define MAX_IOVECS      12
1248 1248  
1249 1249  #ifdef DEBUG
1250 1250  static int rfs3_write_hits = 0;
1251 1251  static int rfs3_write_misses = 0;
1252 1252  #endif
1253 1253  
1254 1254  void
1255 1255  rfs3_write(WRITE3args *args, WRITE3res *resp, struct exportinfo *exi,
1256 1256      struct svc_req *req, cred_t *cr, bool_t ro)
1257 1257  {
1258 1258          int error;
1259 1259          vnode_t *vp;
1260 1260          struct vattr *bvap = NULL;
1261 1261          struct vattr bva;
1262 1262          struct vattr *avap = NULL;
1263 1263          struct vattr ava;
1264 1264          u_offset_t rlimit;
1265 1265          struct uio uio;
1266 1266          struct iovec iov[MAX_IOVECS];
1267 1267          mblk_t *m;
1268 1268          struct iovec *iovp;
1269 1269          int iovcnt;
1270 1270          int ioflag;
1271 1271          cred_t *savecred;
1272 1272          int in_crit = 0;
1273 1273          int rwlock_ret = -1;
1274 1274          caller_context_t ct;
1275 1275  
1276 1276          vp = nfs3_fhtovp(&args->file, exi);
1277 1277  
1278 1278          DTRACE_NFSV3_4(op__write__start, struct svc_req *, req,
1279 1279              cred_t *, cr, vnode_t *, vp, WRITE3args *, args);
1280 1280  
1281 1281          if (vp == NULL) {
1282 1282                  error = ESTALE;
1283 1283                  goto err;
1284 1284          }
1285 1285  
1286 1286          if (is_system_labeled()) {
1287 1287                  bslabel_t *clabel = req->rq_label;
1288 1288  
1289 1289                  ASSERT(clabel != NULL);
1290 1290                  DTRACE_PROBE2(tx__rfs3__log__info__opwrite__clabel, char *,
1291 1291                      "got client label from request(1)", struct svc_req *, req);
1292 1292  
1293 1293                  if (!blequal(&l_admin_low->tsl_label, clabel)) {
1294 1294                          if (!do_rfs_label_check(clabel, vp, EQUALITY_CHECK,
1295 1295                              exi)) {
1296 1296                                  resp->status = NFS3ERR_ACCES;
1297 1297                                  goto err1;
1298 1298                          }
1299 1299                  }
1300 1300          }
1301 1301  
1302 1302          ct.cc_sysid = 0;
1303 1303          ct.cc_pid = 0;
1304 1304          ct.cc_caller_id = nfs3_srv_caller_id;
1305 1305          ct.cc_flags = CC_DONTBLOCK;
1306 1306  
1307 1307          /*
1308 1308           * We have to enter the critical region before calling VOP_RWLOCK
1309 1309           * to avoid a deadlock with ufs.
1310 1310           */
1311 1311          if (nbl_need_check(vp)) {
1312 1312                  nbl_start_crit(vp, RW_READER);
1313 1313                  in_crit = 1;
1314 1314                  if (nbl_conflict(vp, NBL_WRITE, args->offset, args->count, 0,
1315 1315                      NULL)) {
1316 1316                          error = EACCES;
1317 1317                          goto err;
1318 1318                  }
1319 1319          }
1320 1320  
1321 1321          rwlock_ret = VOP_RWLOCK(vp, V_WRITELOCK_TRUE, &ct);
1322 1322  
1323 1323          /* check if a monitor detected a delegation conflict */
1324 1324          if (rwlock_ret == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
1325 1325                  resp->status = NFS3ERR_JUKEBOX;
1326 1326                  rwlock_ret = -1;
1327 1327                  goto err1;
1328 1328          }
1329 1329  
1330 1330  
1331 1331          bva.va_mask = AT_ALL;
1332 1332          error = VOP_GETATTR(vp, &bva, 0, cr, &ct);
1333 1333  
1334 1334          /*
1335 1335           * If we can't get the attributes, then we can't do the
1336 1336           * right access checking.  So, we'll fail the request.
1337 1337           */
1338 1338          if (error)
1339 1339                  goto err;
1340 1340  
1341 1341          bvap = &bva;
1342 1342          avap = bvap;
1343 1343  
1344 1344          if (args->count != args->data.data_len) {
1345 1345                  resp->status = NFS3ERR_INVAL;
1346 1346                  goto err1;
1347 1347          }
1348 1348  
1349 1349          if (rdonly(ro, vp)) {
1350 1350                  resp->status = NFS3ERR_ROFS;
1351 1351                  goto err1;
1352 1352          }
1353 1353  
1354 1354          if (vp->v_type != VREG) {
1355 1355                  resp->status = NFS3ERR_INVAL;
1356 1356                  goto err1;
1357 1357          }
1358 1358  
1359 1359          if (crgetuid(cr) != bva.va_uid &&
1360 1360              (error = VOP_ACCESS(vp, VWRITE, 0, cr, &ct)))
1361 1361                  goto err;
1362 1362  
1363 1363          if (MANDLOCK(vp, bva.va_mode)) {
1364 1364                  resp->status = NFS3ERR_ACCES;
1365 1365                  goto err1;
1366 1366          }
1367 1367  
1368 1368          if (args->count == 0) {
1369 1369                  resp->status = NFS3_OK;
1370 1370                  vattr_to_wcc_data(bvap, avap, &resp->resok.file_wcc);
1371 1371                  resp->resok.count = 0;
1372 1372                  resp->resok.committed = args->stable;
1373 1373                  resp->resok.verf = write3verf;
1374 1374                  goto out;
1375 1375          }
1376 1376  
1377 1377          if (args->mblk != NULL) {
1378 1378                  iovcnt = 0;
1379 1379                  for (m = args->mblk; m != NULL; m = m->b_cont)
1380 1380                          iovcnt++;
1381 1381                  if (iovcnt <= MAX_IOVECS) {
1382 1382  #ifdef DEBUG
1383 1383                          rfs3_write_hits++;
1384 1384  #endif
1385 1385                          iovp = iov;
1386 1386                  } else {
1387 1387  #ifdef DEBUG
1388 1388                          rfs3_write_misses++;
1389 1389  #endif
1390 1390                          iovp = kmem_alloc(sizeof (*iovp) * iovcnt, KM_SLEEP);
1391 1391                  }
1392 1392                  mblk_to_iov(args->mblk, iovcnt, iovp);
1393 1393  
1394 1394          } else if (args->rlist != NULL) {
1395 1395                  iovcnt = 1;
1396 1396                  iovp = iov;
1397 1397                  iovp->iov_base = (char *)((args->rlist)->u.c_daddr3);
1398 1398                  iovp->iov_len = args->count;
1399 1399          } else {
1400 1400                  iovcnt = 1;
1401 1401                  iovp = iov;
1402 1402                  iovp->iov_base = args->data.data_val;
1403 1403                  iovp->iov_len = args->count;
1404 1404          }
1405 1405  
1406 1406          uio.uio_iov = iovp;
1407 1407          uio.uio_iovcnt = iovcnt;
1408 1408  
1409 1409          uio.uio_segflg = UIO_SYSSPACE;
1410 1410          uio.uio_extflg = UIO_COPY_DEFAULT;
1411 1411          uio.uio_loffset = args->offset;
1412 1412          uio.uio_resid = args->count;
1413 1413          uio.uio_llimit = curproc->p_fsz_ctl;
1414 1414          rlimit = uio.uio_llimit - args->offset;
1415 1415          if (rlimit < (u_offset_t)uio.uio_resid)
1416 1416                  uio.uio_resid = (int)rlimit;
1417 1417  
1418 1418          if (args->stable == UNSTABLE)
1419 1419                  ioflag = 0;
1420 1420          else if (args->stable == FILE_SYNC)
1421 1421                  ioflag = FSYNC;
1422 1422          else if (args->stable == DATA_SYNC)
1423 1423                  ioflag = FDSYNC;
1424 1424          else {
1425 1425                  if (iovp != iov)
1426 1426                          kmem_free(iovp, sizeof (*iovp) * iovcnt);
1427 1427                  resp->status = NFS3ERR_INVAL;
1428 1428                  goto err1;
1429 1429          }
1430 1430  
1431 1431          /*
1432 1432           * We're changing creds because VM may fault and we need
1433 1433           * the cred of the current thread to be used if quota
1434 1434           * checking is enabled.
1435 1435           */
1436 1436          savecred = curthread->t_cred;
1437 1437          curthread->t_cred = cr;
1438 1438          error = VOP_WRITE(vp, &uio, ioflag, cr, &ct);
1439 1439          curthread->t_cred = savecred;
1440 1440  
1441 1441          if (iovp != iov)
1442 1442                  kmem_free(iovp, sizeof (*iovp) * iovcnt);
1443 1443  
1444 1444          /* check if a monitor detected a delegation conflict */
1445 1445          if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
1446 1446                  resp->status = NFS3ERR_JUKEBOX;
1447 1447                  goto err1;
1448 1448          }
1449 1449  
1450 1450          ava.va_mask = AT_ALL;
1451 1451          avap = VOP_GETATTR(vp, &ava, 0, cr, &ct) ? NULL : &ava;
1452 1452  
1453 1453          if (error)
1454 1454                  goto err;
1455 1455  
1456 1456          /*
1457 1457           * If we were unable to get the V_WRITELOCK_TRUE, then we
1458 1458           * may not have accurate after attrs, so check if
1459 1459           * we have both attributes, they have a non-zero va_seq, and
1460 1460           * va_seq has changed by exactly one,
1461 1461           * if not, turn off the before attr.
1462 1462           */
1463 1463          if (rwlock_ret != V_WRITELOCK_TRUE) {
1464 1464                  if (bvap == NULL || avap == NULL ||
1465 1465                      bvap->va_seq == 0 || avap->va_seq == 0 ||
1466 1466                      avap->va_seq != (bvap->va_seq + 1)) {
1467 1467                          bvap = NULL;
1468 1468                  }
1469 1469          }
1470 1470  
1471 1471          resp->status = NFS3_OK;
1472 1472          vattr_to_wcc_data(bvap, avap, &resp->resok.file_wcc);
1473 1473          resp->resok.count = args->count - uio.uio_resid;
1474 1474          resp->resok.committed = args->stable;
1475 1475          resp->resok.verf = write3verf;
1476 1476          goto out;
1477 1477  
1478 1478  err:
1479 1479          if (curthread->t_flag & T_WOULDBLOCK) {
1480 1480                  curthread->t_flag &= ~T_WOULDBLOCK;
1481 1481                  resp->status = NFS3ERR_JUKEBOX;
1482 1482          } else
1483 1483                  resp->status = puterrno3(error);
1484 1484  err1:
1485 1485          vattr_to_wcc_data(bvap, avap, &resp->resfail.file_wcc);
1486 1486  out:
1487 1487          DTRACE_NFSV3_4(op__write__done, struct svc_req *, req,
1488 1488              cred_t *, cr, vnode_t *, vp, WRITE3res *, resp);
1489 1489  
1490 1490          if (vp != NULL) {
1491 1491                  if (rwlock_ret != -1)
1492 1492                          VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, &ct);
1493 1493                  if (in_crit)
1494 1494                          nbl_end_crit(vp);
1495 1495                  VN_RELE(vp);
1496 1496          }
1497 1497  }
1498 1498  
1499 1499  void *
1500 1500  rfs3_write_getfh(WRITE3args *args)
1501 1501  {
1502 1502  
1503 1503          return (&args->file);
1504 1504  }
1505 1505  
1506 1506  void
1507 1507  rfs3_create(CREATE3args *args, CREATE3res *resp, struct exportinfo *exi,
1508 1508      struct svc_req *req, cred_t *cr, bool_t ro)
1509 1509  {
1510 1510          int error;
1511 1511          int in_crit = 0;
1512 1512          vnode_t *vp;
1513 1513          vnode_t *tvp = NULL;
1514 1514          vnode_t *dvp;
1515 1515          struct vattr *vap;
1516 1516          struct vattr va;
1517 1517          struct vattr *dbvap;
1518 1518          struct vattr dbva;
1519 1519          struct vattr *davap;
1520 1520          struct vattr dava;
1521 1521          enum vcexcl excl;
1522 1522          nfstime3 *mtime;
1523 1523          len_t reqsize;
1524 1524          bool_t trunc;
1525 1525          struct sockaddr *ca;
1526 1526          char *name = NULL;
1527 1527  
1528 1528          dbvap = NULL;
1529 1529          davap = NULL;
1530 1530  
1531 1531          dvp = nfs3_fhtovp(&args->where.dir, exi);
1532 1532  
1533 1533          DTRACE_NFSV3_4(op__create__start, struct svc_req *, req,
1534 1534              cred_t *, cr, vnode_t *, dvp, CREATE3args *, args);
1535 1535  
1536 1536          if (dvp == NULL) {
1537 1537                  error = ESTALE;
1538 1538                  goto out;
1539 1539          }
1540 1540  
1541 1541          dbva.va_mask = AT_ALL;
1542 1542          dbvap = VOP_GETATTR(dvp, &dbva, 0, cr, NULL) ? NULL : &dbva;
1543 1543          davap = dbvap;
1544 1544  
1545 1545          if (args->where.name == nfs3nametoolong) {
1546 1546                  resp->status = NFS3ERR_NAMETOOLONG;
1547 1547                  goto out1;
1548 1548          }
1549 1549  
1550 1550          if (args->where.name == NULL || *(args->where.name) == '\0') {
1551 1551                  resp->status = NFS3ERR_ACCES;
1552 1552                  goto out1;
1553 1553          }
1554 1554  
1555 1555          if (rdonly(ro, dvp)) {
1556 1556                  resp->status = NFS3ERR_ROFS;
1557 1557                  goto out1;
1558 1558          }
1559 1559  
1560 1560          if (is_system_labeled()) {
1561 1561                  bslabel_t *clabel = req->rq_label;
1562 1562  
1563 1563                  ASSERT(clabel != NULL);
1564 1564                  DTRACE_PROBE2(tx__rfs3__log__info__opcreate__clabel, char *,
1565 1565                      "got client label from request(1)", struct svc_req *, req);
1566 1566  
1567 1567                  if (!blequal(&l_admin_low->tsl_label, clabel)) {
1568 1568                          if (!do_rfs_label_check(clabel, dvp, EQUALITY_CHECK,
1569 1569                              exi)) {
1570 1570                                  resp->status = NFS3ERR_ACCES;
1571 1571                                  goto out1;
1572 1572                          }
1573 1573                  }
1574 1574          }
1575 1575  
1576 1576          ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
1577 1577          name = nfscmd_convname(ca, exi, args->where.name,
1578 1578              NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
1579 1579  
1580 1580          if (name == NULL) {
1581 1581                  /* This is really a Solaris EILSEQ */
1582 1582                  resp->status = NFS3ERR_INVAL;
1583 1583                  goto out1;
1584 1584          }
1585 1585  
1586 1586          if (args->how.mode == EXCLUSIVE) {
1587 1587                  va.va_mask = AT_TYPE | AT_MODE | AT_MTIME;
1588 1588                  va.va_type = VREG;
1589 1589                  va.va_mode = (mode_t)0;
1590 1590                  /*
1591 1591                   * Ensure no time overflows and that types match
1592 1592                   */
1593 1593                  mtime = (nfstime3 *)&args->how.createhow3_u.verf;
1594 1594                  va.va_mtime.tv_sec = mtime->seconds % INT32_MAX;
1595 1595                  va.va_mtime.tv_nsec = mtime->nseconds;
1596 1596                  excl = EXCL;
1597 1597          } else {
1598 1598                  error = sattr3_to_vattr(&args->how.createhow3_u.obj_attributes,
1599 1599                      &va);
1600 1600                  if (error)
1601 1601                          goto out;
1602 1602                  va.va_mask |= AT_TYPE;
1603 1603                  va.va_type = VREG;
1604 1604                  if (args->how.mode == GUARDED)
1605 1605                          excl = EXCL;
1606 1606                  else {
1607 1607                          excl = NONEXCL;
1608 1608  
1609 1609                          /*
1610 1610                           * During creation of file in non-exclusive mode
1611 1611                           * if size of file is being set then make sure
1612 1612                           * that if the file already exists that no conflicting
1613 1613                           * non-blocking mandatory locks exists in the region
1614 1614                           * being modified. If there are conflicting locks fail
1615 1615                           * the operation with EACCES.
1616 1616                           */
1617 1617                          if (va.va_mask & AT_SIZE) {
1618 1618                                  struct vattr tva;
1619 1619  
1620 1620                                  /*
1621 1621                                   * Does file already exist?
1622 1622                                   */
1623 1623                                  error = VOP_LOOKUP(dvp, name, &tvp,
1624 1624                                      NULL, 0, NULL, cr, NULL, NULL, NULL);
1625 1625  
1626 1626                                  /*
1627 1627                                   * Check to see if the file has been delegated
1628 1628                                   * to a v4 client.  If so, then begin recall of
1629 1629                                   * the delegation and return JUKEBOX to allow
1630 1630                                   * the client to retrasmit its request.
1631 1631                                   */
1632 1632  
1633 1633                                  trunc = va.va_size == 0;
1634 1634                                  if (!error &&
1635 1635                                      rfs4_check_delegated(FWRITE, tvp, trunc)) {
1636 1636                                          resp->status = NFS3ERR_JUKEBOX;
1637 1637                                          goto out1;
1638 1638                                  }
1639 1639  
1640 1640                                  /*
1641 1641                                   * Check for NBMAND lock conflicts
1642 1642                                   */
1643 1643                                  if (!error && nbl_need_check(tvp)) {
1644 1644                                          u_offset_t offset;
1645 1645                                          ssize_t len;
1646 1646  
1647 1647                                          nbl_start_crit(tvp, RW_READER);
1648 1648                                          in_crit = 1;
1649 1649  
1650 1650                                          tva.va_mask = AT_SIZE;
1651 1651                                          error = VOP_GETATTR(tvp, &tva, 0, cr,
1652 1652                                              NULL);
1653 1653                                          /*
1654 1654                                           * Can't check for conflicts, so return
1655 1655                                           * error.
1656 1656                                           */
1657 1657                                          if (error)
1658 1658                                                  goto out;
1659 1659  
1660 1660                                          offset = tva.va_size < va.va_size ?
1661 1661                                              tva.va_size : va.va_size;
1662 1662                                          len = tva.va_size < va.va_size ?
1663 1663                                              va.va_size - tva.va_size :
1664 1664                                              tva.va_size - va.va_size;
1665 1665                                          if (nbl_conflict(tvp, NBL_WRITE,
1666 1666                                              offset, len, 0, NULL)) {
1667 1667                                                  error = EACCES;
1668 1668                                                  goto out;
1669 1669                                          }
1670 1670                                  } else if (tvp) {
1671 1671                                          VN_RELE(tvp);
1672 1672                                          tvp = NULL;
1673 1673                                  }
1674 1674                          }
1675 1675                  }
1676 1676                  if (va.va_mask & AT_SIZE)
1677 1677                          reqsize = va.va_size;
1678 1678          }
1679 1679  
1680 1680          /*
1681 1681           * Must specify the mode.
1682 1682           */
1683 1683          if (!(va.va_mask & AT_MODE)) {
1684 1684                  resp->status = NFS3ERR_INVAL;
1685 1685                  goto out1;
1686 1686          }
1687 1687  
1688 1688          /*
1689 1689           * If the filesystem is exported with nosuid, then mask off
1690 1690           * the setuid and setgid bits.
1691 1691           */
1692 1692          if (va.va_type == VREG && (exi->exi_export.ex_flags & EX_NOSUID))
1693 1693                  va.va_mode &= ~(VSUID | VSGID);
1694 1694  
1695 1695  tryagain:
1696 1696          /*
1697 1697           * The file open mode used is VWRITE.  If the client needs
1698 1698           * some other semantic, then it should do the access checking
1699 1699           * itself.  It would have been nice to have the file open mode
1700 1700           * passed as part of the arguments.
1701 1701           */
1702 1702          error = VOP_CREATE(dvp, name, &va, excl, VWRITE,
1703 1703              &vp, cr, 0, NULL, NULL);
1704 1704  
1705 1705          dava.va_mask = AT_ALL;
1706 1706          davap = VOP_GETATTR(dvp, &dava, 0, cr, NULL) ? NULL : &dava;
1707 1707  
1708 1708          if (error) {
1709 1709                  /*
1710 1710                   * If we got something other than file already exists
1711 1711                   * then just return this error.  Otherwise, we got
1712 1712                   * EEXIST.  If we were doing a GUARDED create, then
1713 1713                   * just return this error.  Otherwise, we need to
1714 1714                   * make sure that this wasn't a duplicate of an
1715 1715                   * exclusive create request.
1716 1716                   *
1717 1717                   * The assumption is made that a non-exclusive create
1718 1718                   * request will never return EEXIST.
1719 1719                   */
1720 1720                  if (error != EEXIST || args->how.mode == GUARDED)
1721 1721                          goto out;
1722 1722                  /*
1723 1723                   * Lookup the file so that we can get a vnode for it.
1724 1724                   */
1725 1725                  error = VOP_LOOKUP(dvp, name, &vp, NULL, 0,
1726 1726                      NULL, cr, NULL, NULL, NULL);
1727 1727                  if (error) {
1728 1728                          /*
1729 1729                           * We couldn't find the file that we thought that
1730 1730                           * we just created.  So, we'll just try creating
1731 1731                           * it again.
1732 1732                           */
1733 1733                          if (error == ENOENT)
1734 1734                                  goto tryagain;
1735 1735                          goto out;
1736 1736                  }
1737 1737  
1738 1738                  /*
1739 1739                   * If the file is delegated to a v4 client, go ahead
1740 1740                   * and initiate recall, this create is a hint that a
1741 1741                   * conflicting v3 open has occurred.
1742 1742                   */
1743 1743  
1744 1744                  if (rfs4_check_delegated(FWRITE, vp, FALSE)) {
1745 1745                          VN_RELE(vp);
1746 1746                          resp->status = NFS3ERR_JUKEBOX;
1747 1747                          goto out1;
1748 1748                  }
1749 1749  
1750 1750                  va.va_mask = AT_ALL;
1751 1751                  vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
1752 1752  
1753 1753                  mtime = (nfstime3 *)&args->how.createhow3_u.verf;
1754 1754                  /* % with INT32_MAX to prevent overflows */
1755 1755                  if (args->how.mode == EXCLUSIVE && (vap == NULL ||
1756 1756                      vap->va_mtime.tv_sec !=
1757 1757                      (mtime->seconds % INT32_MAX) ||
1758 1758                      vap->va_mtime.tv_nsec != mtime->nseconds)) {
1759 1759                          VN_RELE(vp);
1760 1760                          error = EEXIST;
1761 1761                          goto out;
1762 1762                  }
1763 1763          } else {
1764 1764  
1765 1765                  if ((args->how.mode == UNCHECKED ||
1766 1766                      args->how.mode == GUARDED) &&
1767 1767                      args->how.createhow3_u.obj_attributes.size.set_it &&
1768 1768                      va.va_size == 0)
1769 1769                          trunc = TRUE;
1770 1770                  else
1771 1771                          trunc = FALSE;
1772 1772  
1773 1773                  if (rfs4_check_delegated(FWRITE, vp, trunc)) {
1774 1774                          VN_RELE(vp);
1775 1775                          resp->status = NFS3ERR_JUKEBOX;
1776 1776                          goto out1;
1777 1777                  }
1778 1778  
1779 1779                  va.va_mask = AT_ALL;
1780 1780                  vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
1781 1781  
1782 1782                  /*
1783 1783                   * We need to check to make sure that the file got
1784 1784                   * created to the indicated size.  If not, we do a
1785 1785                   * setattr to try to change the size, but we don't
1786 1786                   * try too hard.  This shouldn't a problem as most
1787 1787                   * clients will only specifiy a size of zero which
1788 1788                   * local file systems handle.  However, even if
1789 1789                   * the client does specify a non-zero size, it can
1790 1790                   * still recover by checking the size of the file
1791 1791                   * after it has created it and then issue a setattr
1792 1792                   * request of its own to set the size of the file.
1793 1793                   */
1794 1794                  if (vap != NULL &&
1795 1795                      (args->how.mode == UNCHECKED ||
1796 1796                      args->how.mode == GUARDED) &&
1797 1797                      args->how.createhow3_u.obj_attributes.size.set_it &&
1798 1798                      vap->va_size != reqsize) {
1799 1799                          va.va_mask = AT_SIZE;
1800 1800                          va.va_size = reqsize;
1801 1801                          (void) VOP_SETATTR(vp, &va, 0, cr, NULL);
1802 1802                          va.va_mask = AT_ALL;
1803 1803                          vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
1804 1804                  }
1805 1805          }
1806 1806  
1807 1807          if (name != args->where.name)
1808 1808                  kmem_free(name, MAXPATHLEN + 1);
1809 1809  
1810 1810          error = makefh3(&resp->resok.obj.handle, vp, exi);
1811 1811          if (error)
1812 1812                  resp->resok.obj.handle_follows = FALSE;
1813 1813          else
1814 1814                  resp->resok.obj.handle_follows = TRUE;
1815 1815  
1816 1816          /*
1817 1817           * Force modified data and metadata out to stable storage.
1818 1818           */
1819 1819          (void) VOP_FSYNC(vp, FNODSYNC, cr, NULL);
1820 1820          (void) VOP_FSYNC(dvp, 0, cr, NULL);
1821 1821  
1822 1822          VN_RELE(vp);
1823 1823          if (tvp != NULL) {
1824 1824                  if (in_crit)
1825 1825                          nbl_end_crit(tvp);
1826 1826                  VN_RELE(tvp);
1827 1827          }
1828 1828  
1829 1829          resp->status = NFS3_OK;
1830 1830          vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
1831 1831          vattr_to_wcc_data(dbvap, davap, &resp->resok.dir_wcc);
1832 1832  
1833 1833          DTRACE_NFSV3_4(op__create__done, struct svc_req *, req,
1834 1834              cred_t *, cr, vnode_t *, dvp, CREATE3res *, resp);
1835 1835  
1836 1836          VN_RELE(dvp);
1837 1837          return;
1838 1838  
1839 1839  out:
1840 1840          if (curthread->t_flag & T_WOULDBLOCK) {
1841 1841                  curthread->t_flag &= ~T_WOULDBLOCK;
1842 1842                  resp->status = NFS3ERR_JUKEBOX;
1843 1843          } else
1844 1844                  resp->status = puterrno3(error);
1845 1845  out1:
1846 1846          DTRACE_NFSV3_4(op__create__done, struct svc_req *, req,
1847 1847              cred_t *, cr, vnode_t *, dvp, CREATE3res *, resp);
1848 1848  
1849 1849          if (name != NULL && name != args->where.name)
1850 1850                  kmem_free(name, MAXPATHLEN + 1);
1851 1851  
1852 1852          if (tvp != NULL) {
1853 1853                  if (in_crit)
1854 1854                          nbl_end_crit(tvp);
1855 1855                  VN_RELE(tvp);
1856 1856          }
1857 1857          if (dvp != NULL)
1858 1858                  VN_RELE(dvp);
1859 1859          vattr_to_wcc_data(dbvap, davap, &resp->resfail.dir_wcc);
1860 1860  }
1861 1861  
1862 1862  void *
1863 1863  rfs3_create_getfh(CREATE3args *args)
1864 1864  {
1865 1865  
1866 1866          return (&args->where.dir);
1867 1867  }
1868 1868  
1869 1869  void
1870 1870  rfs3_mkdir(MKDIR3args *args, MKDIR3res *resp, struct exportinfo *exi,
1871 1871      struct svc_req *req, cred_t *cr, bool_t ro)
1872 1872  {
1873 1873          int error;
1874 1874          vnode_t *vp = NULL;
1875 1875          vnode_t *dvp;
1876 1876          struct vattr *vap;
1877 1877          struct vattr va;
1878 1878          struct vattr *dbvap;
1879 1879          struct vattr dbva;
1880 1880          struct vattr *davap;
1881 1881          struct vattr dava;
1882 1882          struct sockaddr *ca;
1883 1883          char *name = NULL;
1884 1884  
1885 1885          dbvap = NULL;
1886 1886          davap = NULL;
1887 1887  
1888 1888          dvp = nfs3_fhtovp(&args->where.dir, exi);
1889 1889  
1890 1890          DTRACE_NFSV3_4(op__mkdir__start, struct svc_req *, req,
1891 1891              cred_t *, cr, vnode_t *, dvp, MKDIR3args *, args);
1892 1892  
1893 1893          if (dvp == NULL) {
1894 1894                  error = ESTALE;
1895 1895                  goto out;
1896 1896          }
1897 1897  
1898 1898          dbva.va_mask = AT_ALL;
1899 1899          dbvap = VOP_GETATTR(dvp, &dbva, 0, cr, NULL) ? NULL : &dbva;
1900 1900          davap = dbvap;
1901 1901  
1902 1902          if (args->where.name == nfs3nametoolong) {
1903 1903                  resp->status = NFS3ERR_NAMETOOLONG;
1904 1904                  goto out1;
1905 1905          }
1906 1906  
1907 1907          if (args->where.name == NULL || *(args->where.name) == '\0') {
1908 1908                  resp->status = NFS3ERR_ACCES;
1909 1909                  goto out1;
1910 1910          }
1911 1911  
1912 1912          if (rdonly(ro, dvp)) {
1913 1913                  resp->status = NFS3ERR_ROFS;
1914 1914                  goto out1;
1915 1915          }
1916 1916  
1917 1917          if (is_system_labeled()) {
1918 1918                  bslabel_t *clabel = req->rq_label;
1919 1919  
1920 1920                  ASSERT(clabel != NULL);
1921 1921                  DTRACE_PROBE2(tx__rfs3__log__info__opmkdir__clabel, char *,
1922 1922                      "got client label from request(1)", struct svc_req *, req);
1923 1923  
1924 1924                  if (!blequal(&l_admin_low->tsl_label, clabel)) {
1925 1925                          if (!do_rfs_label_check(clabel, dvp, EQUALITY_CHECK,
1926 1926                              exi)) {
1927 1927                                  resp->status = NFS3ERR_ACCES;
1928 1928                                  goto out1;
1929 1929                          }
1930 1930                  }
1931 1931          }
1932 1932  
1933 1933          error = sattr3_to_vattr(&args->attributes, &va);
1934 1934          if (error)
1935 1935                  goto out;
1936 1936  
1937 1937          if (!(va.va_mask & AT_MODE)) {
1938 1938                  resp->status = NFS3ERR_INVAL;
1939 1939                  goto out1;
1940 1940          }
1941 1941  
1942 1942          ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
1943 1943          name = nfscmd_convname(ca, exi, args->where.name,
1944 1944              NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
1945 1945  
1946 1946          if (name == NULL) {
1947 1947                  resp->status = NFS3ERR_INVAL;
1948 1948                  goto out1;
1949 1949          }
1950 1950  
1951 1951          va.va_mask |= AT_TYPE;
1952 1952          va.va_type = VDIR;
1953 1953  
1954 1954          error = VOP_MKDIR(dvp, name, &va, &vp, cr, NULL, 0, NULL);
1955 1955  
1956 1956          if (name != args->where.name)
1957 1957                  kmem_free(name, MAXPATHLEN + 1);
1958 1958  
1959 1959          dava.va_mask = AT_ALL;
1960 1960          davap = VOP_GETATTR(dvp, &dava, 0, cr, NULL) ? NULL : &dava;
1961 1961  
1962 1962          /*
1963 1963           * Force modified data and metadata out to stable storage.
1964 1964           */
1965 1965          (void) VOP_FSYNC(dvp, 0, cr, NULL);
1966 1966  
1967 1967          if (error)
1968 1968                  goto out;
1969 1969  
1970 1970          error = makefh3(&resp->resok.obj.handle, vp, exi);
1971 1971          if (error)
1972 1972                  resp->resok.obj.handle_follows = FALSE;
1973 1973          else
1974 1974                  resp->resok.obj.handle_follows = TRUE;
1975 1975  
1976 1976          va.va_mask = AT_ALL;
1977 1977          vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
1978 1978  
1979 1979          /*
1980 1980           * Force modified data and metadata out to stable storage.
1981 1981           */
1982 1982          (void) VOP_FSYNC(vp, 0, cr, NULL);
1983 1983  
1984 1984          VN_RELE(vp);
1985 1985  
1986 1986          resp->status = NFS3_OK;
1987 1987          vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
1988 1988          vattr_to_wcc_data(dbvap, davap, &resp->resok.dir_wcc);
1989 1989  
1990 1990          DTRACE_NFSV3_4(op__mkdir__done, struct svc_req *, req,
1991 1991              cred_t *, cr, vnode_t *, dvp, MKDIR3res *, resp);
1992 1992          VN_RELE(dvp);
1993 1993  
1994 1994          return;
1995 1995  
1996 1996  out:
1997 1997          if (curthread->t_flag & T_WOULDBLOCK) {
1998 1998                  curthread->t_flag &= ~T_WOULDBLOCK;
1999 1999                  resp->status = NFS3ERR_JUKEBOX;
2000 2000          } else
2001 2001                  resp->status = puterrno3(error);
2002 2002  out1:
2003 2003          DTRACE_NFSV3_4(op__mkdir__done, struct svc_req *, req,
2004 2004              cred_t *, cr, vnode_t *, dvp, MKDIR3res *, resp);
2005 2005          if (dvp != NULL)
2006 2006                  VN_RELE(dvp);
2007 2007          vattr_to_wcc_data(dbvap, davap, &resp->resfail.dir_wcc);
2008 2008  }
2009 2009  
2010 2010  void *
2011 2011  rfs3_mkdir_getfh(MKDIR3args *args)
2012 2012  {
2013 2013  
2014 2014          return (&args->where.dir);
2015 2015  }
2016 2016  
2017 2017  void
2018 2018  rfs3_symlink(SYMLINK3args *args, SYMLINK3res *resp, struct exportinfo *exi,
2019 2019      struct svc_req *req, cred_t *cr, bool_t ro)
2020 2020  {
2021 2021          int error;
2022 2022          vnode_t *vp;
2023 2023          vnode_t *dvp;
2024 2024          struct vattr *vap;
2025 2025          struct vattr va;
2026 2026          struct vattr *dbvap;
2027 2027          struct vattr dbva;
2028 2028          struct vattr *davap;
2029 2029          struct vattr dava;
2030 2030          struct sockaddr *ca;
2031 2031          char *name = NULL;
2032 2032          char *symdata = NULL;
2033 2033  
2034 2034          dbvap = NULL;
2035 2035          davap = NULL;
2036 2036  
2037 2037          dvp = nfs3_fhtovp(&args->where.dir, exi);
2038 2038  
2039 2039          DTRACE_NFSV3_4(op__symlink__start, struct svc_req *, req,
2040 2040              cred_t *, cr, vnode_t *, dvp, SYMLINK3args *, args);
2041 2041  
2042 2042          if (dvp == NULL) {
2043 2043                  error = ESTALE;
2044 2044                  goto err;
2045 2045          }
2046 2046  
2047 2047          dbva.va_mask = AT_ALL;
2048 2048          dbvap = VOP_GETATTR(dvp, &dbva, 0, cr, NULL) ? NULL : &dbva;
2049 2049          davap = dbvap;
2050 2050  
2051 2051          if (args->where.name == nfs3nametoolong) {
2052 2052                  resp->status = NFS3ERR_NAMETOOLONG;
2053 2053                  goto err1;
2054 2054          }
2055 2055  
2056 2056          if (args->where.name == NULL || *(args->where.name) == '\0') {
2057 2057                  resp->status = NFS3ERR_ACCES;
2058 2058                  goto err1;
2059 2059          }
2060 2060  
2061 2061          if (rdonly(ro, dvp)) {
2062 2062                  resp->status = NFS3ERR_ROFS;
2063 2063                  goto err1;
2064 2064          }
2065 2065  
2066 2066          if (is_system_labeled()) {
2067 2067                  bslabel_t *clabel = req->rq_label;
2068 2068  
2069 2069                  ASSERT(clabel != NULL);
2070 2070                  DTRACE_PROBE2(tx__rfs3__log__info__opsymlink__clabel, char *,
2071 2071                      "got client label from request(1)", struct svc_req *, req);
2072 2072  
2073 2073                  if (!blequal(&l_admin_low->tsl_label, clabel)) {
2074 2074                          if (!do_rfs_label_check(clabel, dvp, EQUALITY_CHECK,
2075 2075                              exi)) {
2076 2076                                  resp->status = NFS3ERR_ACCES;
2077 2077                                  goto err1;
2078 2078                          }
2079 2079                  }
2080 2080          }
2081 2081  
2082 2082          error = sattr3_to_vattr(&args->symlink.symlink_attributes, &va);
2083 2083          if (error)
2084 2084                  goto err;
2085 2085  
2086 2086          if (!(va.va_mask & AT_MODE)) {
2087 2087                  resp->status = NFS3ERR_INVAL;
2088 2088                  goto err1;
2089 2089          }
2090 2090  
2091 2091          if (args->symlink.symlink_data == nfs3nametoolong) {
2092 2092                  resp->status = NFS3ERR_NAMETOOLONG;
2093 2093                  goto err1;
2094 2094          }
2095 2095  
2096 2096          ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2097 2097          name = nfscmd_convname(ca, exi, args->where.name,
2098 2098              NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
2099 2099  
2100 2100          if (name == NULL) {
2101 2101                  /* This is really a Solaris EILSEQ */
2102 2102                  resp->status = NFS3ERR_INVAL;
2103 2103                  goto err1;
2104 2104          }
2105 2105  
2106 2106          symdata = nfscmd_convname(ca, exi, args->symlink.symlink_data,
2107 2107              NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
2108 2108          if (symdata == NULL) {
2109 2109                  /* This is really a Solaris EILSEQ */
2110 2110                  resp->status = NFS3ERR_INVAL;
2111 2111                  goto err1;
2112 2112          }
2113 2113  
2114 2114  
2115 2115          va.va_mask |= AT_TYPE;
2116 2116          va.va_type = VLNK;
2117 2117  
2118 2118          error = VOP_SYMLINK(dvp, name, &va, symdata, cr, NULL, 0);
2119 2119  
2120 2120          dava.va_mask = AT_ALL;
2121 2121          davap = VOP_GETATTR(dvp, &dava, 0, cr, NULL) ? NULL : &dava;
2122 2122  
2123 2123          if (error)
2124 2124                  goto err;
2125 2125  
2126 2126          error = VOP_LOOKUP(dvp, name, &vp, NULL, 0, NULL, cr,
2127 2127              NULL, NULL, NULL);
2128 2128  
2129 2129          /*
2130 2130           * Force modified data and metadata out to stable storage.
2131 2131           */
2132 2132          (void) VOP_FSYNC(dvp, 0, cr, NULL);
2133 2133  
2134 2134  
2135 2135          resp->status = NFS3_OK;
2136 2136          if (error) {
2137 2137                  resp->resok.obj.handle_follows = FALSE;
2138 2138                  vattr_to_post_op_attr(NULL, &resp->resok.obj_attributes);
2139 2139                  vattr_to_wcc_data(dbvap, davap, &resp->resok.dir_wcc);
2140 2140                  goto out;
2141 2141          }
2142 2142  
2143 2143          error = makefh3(&resp->resok.obj.handle, vp, exi);
2144 2144          if (error)
2145 2145                  resp->resok.obj.handle_follows = FALSE;
2146 2146          else
2147 2147                  resp->resok.obj.handle_follows = TRUE;
2148 2148  
2149 2149          va.va_mask = AT_ALL;
2150 2150          vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
2151 2151  
2152 2152          /*
2153 2153           * Force modified data and metadata out to stable storage.
2154 2154           */
2155 2155          (void) VOP_FSYNC(vp, 0, cr, NULL);
2156 2156  
2157 2157          VN_RELE(vp);
2158 2158  
2159 2159          vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
2160 2160          vattr_to_wcc_data(dbvap, davap, &resp->resok.dir_wcc);
2161 2161          goto out;
2162 2162  
2163 2163  err:
2164 2164          if (curthread->t_flag & T_WOULDBLOCK) {
2165 2165                  curthread->t_flag &= ~T_WOULDBLOCK;
2166 2166                  resp->status = NFS3ERR_JUKEBOX;
2167 2167          } else
2168 2168                  resp->status = puterrno3(error);
2169 2169  err1:
2170 2170          vattr_to_wcc_data(dbvap, davap, &resp->resfail.dir_wcc);
2171 2171  out:
2172 2172          if (name != NULL && name != args->where.name)
2173 2173                  kmem_free(name, MAXPATHLEN + 1);
2174 2174          if (symdata != NULL && symdata != args->symlink.symlink_data)
2175 2175                  kmem_free(symdata, MAXPATHLEN + 1);
2176 2176  
2177 2177          DTRACE_NFSV3_4(op__symlink__done, struct svc_req *, req,
2178 2178              cred_t *, cr, vnode_t *, dvp, SYMLINK3res *, resp);
2179 2179  
2180 2180          if (dvp != NULL)
2181 2181                  VN_RELE(dvp);
2182 2182  }
2183 2183  
2184 2184  void *
2185 2185  rfs3_symlink_getfh(SYMLINK3args *args)
2186 2186  {
2187 2187  
2188 2188          return (&args->where.dir);
2189 2189  }
2190 2190  
2191 2191  void
2192 2192  rfs3_mknod(MKNOD3args *args, MKNOD3res *resp, struct exportinfo *exi,
2193 2193      struct svc_req *req, cred_t *cr, bool_t ro)
2194 2194  {
2195 2195          int error;
2196 2196          vnode_t *vp;
2197 2197          vnode_t *realvp;
2198 2198          vnode_t *dvp;
2199 2199          struct vattr *vap;
2200 2200          struct vattr va;
2201 2201          struct vattr *dbvap;
2202 2202          struct vattr dbva;
2203 2203          struct vattr *davap;
2204 2204          struct vattr dava;
2205 2205          int mode;
2206 2206          enum vcexcl excl;
2207 2207          struct sockaddr *ca;
2208 2208          char *name = NULL;
2209 2209  
2210 2210          dbvap = NULL;
2211 2211          davap = NULL;
2212 2212  
2213 2213          dvp = nfs3_fhtovp(&args->where.dir, exi);
2214 2214  
2215 2215          DTRACE_NFSV3_4(op__mknod__start, struct svc_req *, req,
2216 2216              cred_t *, cr, vnode_t *, dvp, MKNOD3args *, args);
2217 2217  
2218 2218          if (dvp == NULL) {
2219 2219                  error = ESTALE;
2220 2220                  goto out;
2221 2221          }
2222 2222  
2223 2223          dbva.va_mask = AT_ALL;
2224 2224          dbvap = VOP_GETATTR(dvp, &dbva, 0, cr, NULL) ? NULL : &dbva;
2225 2225          davap = dbvap;
2226 2226  
2227 2227          if (args->where.name == nfs3nametoolong) {
2228 2228                  resp->status = NFS3ERR_NAMETOOLONG;
2229 2229                  goto out1;
2230 2230          }
2231 2231  
2232 2232          if (args->where.name == NULL || *(args->where.name) == '\0') {
2233 2233                  resp->status = NFS3ERR_ACCES;
2234 2234                  goto out1;
2235 2235          }
2236 2236  
2237 2237          if (rdonly(ro, dvp)) {
2238 2238                  resp->status = NFS3ERR_ROFS;
2239 2239                  goto out1;
2240 2240          }
2241 2241  
2242 2242          if (is_system_labeled()) {
2243 2243                  bslabel_t *clabel = req->rq_label;
2244 2244  
2245 2245                  ASSERT(clabel != NULL);
2246 2246                  DTRACE_PROBE2(tx__rfs3__log__info__opmknod__clabel, char *,
2247 2247                      "got client label from request(1)", struct svc_req *, req);
2248 2248  
2249 2249                  if (!blequal(&l_admin_low->tsl_label, clabel)) {
2250 2250                          if (!do_rfs_label_check(clabel, dvp, EQUALITY_CHECK,
2251 2251                              exi)) {
2252 2252                                  resp->status = NFS3ERR_ACCES;
2253 2253                                  goto out1;
2254 2254                          }
2255 2255                  }
2256 2256          }
2257 2257  
2258 2258          switch (args->what.type) {
2259 2259          case NF3CHR:
2260 2260          case NF3BLK:
2261 2261                  error = sattr3_to_vattr(
2262 2262                      &args->what.mknoddata3_u.device.dev_attributes, &va);
2263 2263                  if (error)
2264 2264                          goto out;
2265 2265                  if (secpolicy_sys_devices(cr) != 0) {
2266 2266                          resp->status = NFS3ERR_PERM;
2267 2267                          goto out1;
2268 2268                  }
2269 2269                  if (args->what.type == NF3CHR)
2270 2270                          va.va_type = VCHR;
2271 2271                  else
2272 2272                          va.va_type = VBLK;
2273 2273                  va.va_rdev = makedevice(
2274 2274                      args->what.mknoddata3_u.device.spec.specdata1,
2275 2275                      args->what.mknoddata3_u.device.spec.specdata2);
2276 2276                  va.va_mask |= AT_TYPE | AT_RDEV;
2277 2277                  break;
2278 2278          case NF3SOCK:
2279 2279                  error = sattr3_to_vattr(
2280 2280                      &args->what.mknoddata3_u.pipe_attributes, &va);
2281 2281                  if (error)
2282 2282                          goto out;
2283 2283                  va.va_type = VSOCK;
2284 2284                  va.va_mask |= AT_TYPE;
2285 2285                  break;
2286 2286          case NF3FIFO:
2287 2287                  error = sattr3_to_vattr(
2288 2288                      &args->what.mknoddata3_u.pipe_attributes, &va);
2289 2289                  if (error)
2290 2290                          goto out;
2291 2291                  va.va_type = VFIFO;
2292 2292                  va.va_mask |= AT_TYPE;
2293 2293                  break;
2294 2294          default:
2295 2295                  resp->status = NFS3ERR_BADTYPE;
2296 2296                  goto out1;
2297 2297          }
2298 2298  
2299 2299          /*
2300 2300           * Must specify the mode.
2301 2301           */
2302 2302          if (!(va.va_mask & AT_MODE)) {
2303 2303                  resp->status = NFS3ERR_INVAL;
2304 2304                  goto out1;
2305 2305          }
2306 2306  
2307 2307          ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2308 2308          name = nfscmd_convname(ca, exi, args->where.name,
2309 2309              NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
2310 2310  
2311 2311          if (name == NULL) {
2312 2312                  resp->status = NFS3ERR_INVAL;
2313 2313                  goto out1;
2314 2314          }
2315 2315  
2316 2316          excl = EXCL;
2317 2317  
2318 2318          mode = 0;
2319 2319  
2320 2320          error = VOP_CREATE(dvp, name, &va, excl, mode,
2321 2321              &vp, cr, 0, NULL, NULL);
2322 2322  
2323 2323          if (name != args->where.name)
2324 2324                  kmem_free(name, MAXPATHLEN + 1);
2325 2325  
2326 2326          dava.va_mask = AT_ALL;
2327 2327          davap = VOP_GETATTR(dvp, &dava, 0, cr, NULL) ? NULL : &dava;
2328 2328  
2329 2329          /*
2330 2330           * Force modified data and metadata out to stable storage.
2331 2331           */
2332 2332          (void) VOP_FSYNC(dvp, 0, cr, NULL);
2333 2333  
2334 2334          if (error)
2335 2335                  goto out;
2336 2336  
2337 2337          resp->status = NFS3_OK;
2338 2338  
2339 2339          error = makefh3(&resp->resok.obj.handle, vp, exi);
2340 2340          if (error)
2341 2341                  resp->resok.obj.handle_follows = FALSE;
2342 2342          else
2343 2343                  resp->resok.obj.handle_follows = TRUE;
2344 2344  
2345 2345          va.va_mask = AT_ALL;
2346 2346          vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
2347 2347  
2348 2348          /*
2349 2349           * Force modified metadata out to stable storage.
2350 2350           *
2351 2351           * if a underlying vp exists, pass it to VOP_FSYNC
2352 2352           */
2353 2353          if (VOP_REALVP(vp, &realvp, NULL) == 0)
2354 2354                  (void) VOP_FSYNC(realvp, FNODSYNC, cr, NULL);
2355 2355          else
2356 2356                  (void) VOP_FSYNC(vp, FNODSYNC, cr, NULL);
2357 2357  
2358 2358          VN_RELE(vp);
2359 2359  
2360 2360          vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
2361 2361          vattr_to_wcc_data(dbvap, davap, &resp->resok.dir_wcc);
2362 2362          DTRACE_NFSV3_4(op__mknod__done, struct svc_req *, req,
2363 2363              cred_t *, cr, vnode_t *, dvp, MKNOD3res *, resp);
2364 2364          VN_RELE(dvp);
2365 2365          return;
2366 2366  
2367 2367  out:
2368 2368          if (curthread->t_flag & T_WOULDBLOCK) {
2369 2369                  curthread->t_flag &= ~T_WOULDBLOCK;
2370 2370                  resp->status = NFS3ERR_JUKEBOX;
2371 2371          } else
2372 2372                  resp->status = puterrno3(error);
2373 2373  out1:
2374 2374          DTRACE_NFSV3_4(op__mknod__done, struct svc_req *, req,
2375 2375              cred_t *, cr, vnode_t *, dvp, MKNOD3res *, resp);
2376 2376          if (dvp != NULL)
2377 2377                  VN_RELE(dvp);
2378 2378          vattr_to_wcc_data(dbvap, davap, &resp->resfail.dir_wcc);
2379 2379  }
2380 2380  
2381 2381  void *
2382 2382  rfs3_mknod_getfh(MKNOD3args *args)
2383 2383  {
2384 2384  
2385 2385          return (&args->where.dir);
2386 2386  }
2387 2387  
2388 2388  void
2389 2389  rfs3_remove(REMOVE3args *args, REMOVE3res *resp, struct exportinfo *exi,
2390 2390      struct svc_req *req, cred_t *cr, bool_t ro)
2391 2391  {
2392 2392          int error = 0;
2393 2393          vnode_t *vp;
2394 2394          struct vattr *bvap;
2395 2395          struct vattr bva;
2396 2396          struct vattr *avap;
2397 2397          struct vattr ava;
2398 2398          vnode_t *targvp = NULL;
2399 2399          struct sockaddr *ca;
2400 2400          char *name = NULL;
2401 2401  
2402 2402          bvap = NULL;
2403 2403          avap = NULL;
2404 2404  
2405 2405          vp = nfs3_fhtovp(&args->object.dir, exi);
2406 2406  
2407 2407          DTRACE_NFSV3_4(op__remove__start, struct svc_req *, req,
2408 2408              cred_t *, cr, vnode_t *, vp, REMOVE3args *, args);
2409 2409  
2410 2410          if (vp == NULL) {
2411 2411                  error = ESTALE;
2412 2412                  goto err;
2413 2413          }
2414 2414  
2415 2415          bva.va_mask = AT_ALL;
2416 2416          bvap = VOP_GETATTR(vp, &bva, 0, cr, NULL) ? NULL : &bva;
2417 2417          avap = bvap;
2418 2418  
2419 2419          if (vp->v_type != VDIR) {
2420 2420                  resp->status = NFS3ERR_NOTDIR;
2421 2421                  goto err1;
2422 2422          }
2423 2423  
2424 2424          if (args->object.name == nfs3nametoolong) {
2425 2425                  resp->status = NFS3ERR_NAMETOOLONG;
2426 2426                  goto err1;
2427 2427          }
2428 2428  
2429 2429          if (args->object.name == NULL || *(args->object.name) == '\0') {
2430 2430                  resp->status = NFS3ERR_ACCES;
2431 2431                  goto err1;
2432 2432          }
2433 2433  
2434 2434          if (rdonly(ro, vp)) {
2435 2435                  resp->status = NFS3ERR_ROFS;
2436 2436                  goto err1;
2437 2437          }
2438 2438  
2439 2439          if (is_system_labeled()) {
2440 2440                  bslabel_t *clabel = req->rq_label;
2441 2441  
2442 2442                  ASSERT(clabel != NULL);
2443 2443                  DTRACE_PROBE2(tx__rfs3__log__info__opremove__clabel, char *,
2444 2444                      "got client label from request(1)", struct svc_req *, req);
2445 2445  
2446 2446                  if (!blequal(&l_admin_low->tsl_label, clabel)) {
2447 2447                          if (!do_rfs_label_check(clabel, vp, EQUALITY_CHECK,
2448 2448                              exi)) {
2449 2449                                  resp->status = NFS3ERR_ACCES;
2450 2450                                  goto err1;
2451 2451                          }
2452 2452                  }
2453 2453          }
2454 2454  
2455 2455          ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2456 2456          name = nfscmd_convname(ca, exi, args->object.name,
2457 2457              NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
2458 2458  
2459 2459          if (name == NULL) {
2460 2460                  resp->status = NFS3ERR_INVAL;
2461 2461                  goto err1;
2462 2462          }
2463 2463  
2464 2464          /*
2465 2465           * Check for a conflict with a non-blocking mandatory share
2466 2466           * reservation and V4 delegations
2467 2467           */
2468 2468          error = VOP_LOOKUP(vp, name, &targvp, NULL, 0,
2469 2469              NULL, cr, NULL, NULL, NULL);
2470 2470          if (error != 0)
2471 2471                  goto err;
2472 2472  
2473 2473          if (rfs4_check_delegated(FWRITE, targvp, TRUE)) {
2474 2474                  resp->status = NFS3ERR_JUKEBOX;
2475 2475                  goto err1;
2476 2476          }
2477 2477  
2478 2478          if (!nbl_need_check(targvp)) {
2479 2479                  error = VOP_REMOVE(vp, name, cr, NULL, 0);
2480 2480          } else {
2481 2481                  nbl_start_crit(targvp, RW_READER);
2482 2482                  if (nbl_conflict(targvp, NBL_REMOVE, 0, 0, 0, NULL)) {
2483 2483                          error = EACCES;
2484 2484                  } else {
2485 2485                          error = VOP_REMOVE(vp, name, cr, NULL, 0);
2486 2486                  }
2487 2487                  nbl_end_crit(targvp);
2488 2488          }
2489 2489          VN_RELE(targvp);
2490 2490          targvp = NULL;
2491 2491  
2492 2492          ava.va_mask = AT_ALL;
2493 2493          avap = VOP_GETATTR(vp, &ava, 0, cr, NULL) ? NULL : &ava;
2494 2494  
2495 2495          /*
2496 2496           * Force modified data and metadata out to stable storage.
2497 2497           */
2498 2498          (void) VOP_FSYNC(vp, 0, cr, NULL);
2499 2499  
2500 2500          if (error)
2501 2501                  goto err;
2502 2502  
2503 2503          resp->status = NFS3_OK;
2504 2504          vattr_to_wcc_data(bvap, avap, &resp->resok.dir_wcc);
2505 2505          goto out;
2506 2506  
2507 2507  err:
2508 2508          if (curthread->t_flag & T_WOULDBLOCK) {
2509 2509                  curthread->t_flag &= ~T_WOULDBLOCK;
2510 2510                  resp->status = NFS3ERR_JUKEBOX;
2511 2511          } else
2512 2512                  resp->status = puterrno3(error);
2513 2513  err1:
2514 2514          vattr_to_wcc_data(bvap, avap, &resp->resfail.dir_wcc);
2515 2515  out:
2516 2516          DTRACE_NFSV3_4(op__remove__done, struct svc_req *, req,
2517 2517              cred_t *, cr, vnode_t *, vp, REMOVE3res *, resp);
2518 2518  
2519 2519          if (name != NULL && name != args->object.name)
2520 2520                  kmem_free(name, MAXPATHLEN + 1);
2521 2521  
2522 2522          if (vp != NULL)
2523 2523                  VN_RELE(vp);
2524 2524  }
2525 2525  
2526 2526  void *
2527 2527  rfs3_remove_getfh(REMOVE3args *args)
2528 2528  {
2529 2529  
2530 2530          return (&args->object.dir);
2531 2531  }
2532 2532  
2533 2533  void
2534 2534  rfs3_rmdir(RMDIR3args *args, RMDIR3res *resp, struct exportinfo *exi,
2535 2535      struct svc_req *req, cred_t *cr, bool_t ro)
2536 2536  {
2537 2537          int error;
2538 2538          vnode_t *vp;
2539 2539          struct vattr *bvap;
2540 2540          struct vattr bva;
2541 2541          struct vattr *avap;
2542 2542          struct vattr ava;
2543 2543          struct sockaddr *ca;
2544 2544          char *name = NULL;
2545 2545  
2546 2546          bvap = NULL;
2547 2547          avap = NULL;
2548 2548  
2549 2549          vp = nfs3_fhtovp(&args->object.dir, exi);
2550 2550  
2551 2551          DTRACE_NFSV3_4(op__rmdir__start, struct svc_req *, req,
2552 2552              cred_t *, cr, vnode_t *, vp, RMDIR3args *, args);
2553 2553  
2554 2554          if (vp == NULL) {
2555 2555                  error = ESTALE;
2556 2556                  goto err;
2557 2557          }
2558 2558  
2559 2559          bva.va_mask = AT_ALL;
2560 2560          bvap = VOP_GETATTR(vp, &bva, 0, cr, NULL) ? NULL : &bva;
2561 2561          avap = bvap;
2562 2562  
2563 2563          if (vp->v_type != VDIR) {
2564 2564                  resp->status = NFS3ERR_NOTDIR;
2565 2565                  goto err1;
2566 2566          }
2567 2567  
2568 2568          if (args->object.name == nfs3nametoolong) {
2569 2569                  resp->status = NFS3ERR_NAMETOOLONG;
2570 2570                  goto err1;
2571 2571          }
2572 2572  
2573 2573          if (args->object.name == NULL || *(args->object.name) == '\0') {
2574 2574                  resp->status = NFS3ERR_ACCES;
2575 2575                  goto err1;
2576 2576          }
2577 2577  
2578 2578          if (rdonly(ro, vp)) {
2579 2579                  resp->status = NFS3ERR_ROFS;
2580 2580                  goto err1;
2581 2581          }
2582 2582  
2583 2583          if (is_system_labeled()) {
2584 2584                  bslabel_t *clabel = req->rq_label;
2585 2585  
2586 2586                  ASSERT(clabel != NULL);
2587 2587                  DTRACE_PROBE2(tx__rfs3__log__info__opremovedir__clabel, char *,
2588 2588                      "got client label from request(1)", struct svc_req *, req);
2589 2589  
2590 2590                  if (!blequal(&l_admin_low->tsl_label, clabel)) {
2591 2591                          if (!do_rfs_label_check(clabel, vp, EQUALITY_CHECK,
2592 2592                              exi)) {
2593 2593                                  resp->status = NFS3ERR_ACCES;
2594 2594                                  goto err1;
2595 2595                          }
2596 2596                  }
2597 2597          }
2598 2598  
2599 2599          ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2600 2600          name = nfscmd_convname(ca, exi, args->object.name,
2601 2601              NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
2602 2602  
2603 2603          if (name == NULL) {
2604 2604                  resp->status = NFS3ERR_INVAL;
2605 2605                  goto err1;
2606 2606          }
2607 2607  
2608 2608          error = VOP_RMDIR(vp, name, rootdir, cr, NULL, 0);
2609 2609  
2610 2610          if (name != args->object.name)
2611 2611                  kmem_free(name, MAXPATHLEN + 1);
2612 2612  
2613 2613          ava.va_mask = AT_ALL;
2614 2614          avap = VOP_GETATTR(vp, &ava, 0, cr, NULL) ? NULL : &ava;
2615 2615  
2616 2616          /*
2617 2617           * Force modified data and metadata out to stable storage.
2618 2618           */
2619 2619          (void) VOP_FSYNC(vp, 0, cr, NULL);
2620 2620  
2621 2621          if (error) {
2622 2622                  /*
2623 2623                   * System V defines rmdir to return EEXIST, not ENOTEMPTY,
2624 2624                   * if the directory is not empty.  A System V NFS server
2625 2625                   * needs to map NFS3ERR_EXIST to NFS3ERR_NOTEMPTY to transmit
2626 2626                   * over the wire.
2627 2627                   */
2628 2628                  if (error == EEXIST)
2629 2629                          error = ENOTEMPTY;
2630 2630                  goto err;
2631 2631          }
2632 2632  
2633 2633          resp->status = NFS3_OK;
2634 2634          vattr_to_wcc_data(bvap, avap, &resp->resok.dir_wcc);
2635 2635          goto out;
2636 2636  
2637 2637  err:
2638 2638          if (curthread->t_flag & T_WOULDBLOCK) {
2639 2639                  curthread->t_flag &= ~T_WOULDBLOCK;
2640 2640                  resp->status = NFS3ERR_JUKEBOX;
2641 2641          } else
2642 2642                  resp->status = puterrno3(error);
2643 2643  err1:
2644 2644          vattr_to_wcc_data(bvap, avap, &resp->resfail.dir_wcc);
2645 2645  out:
2646 2646          DTRACE_NFSV3_4(op__rmdir__done, struct svc_req *, req,
2647 2647              cred_t *, cr, vnode_t *, vp, RMDIR3res *, resp);
2648 2648          if (vp != NULL)
2649 2649                  VN_RELE(vp);
2650 2650  
2651 2651  }
2652 2652  
2653 2653  void *
2654 2654  rfs3_rmdir_getfh(RMDIR3args *args)
2655 2655  {
2656 2656  
2657 2657          return (&args->object.dir);
2658 2658  }
2659 2659  
2660 2660  void
2661 2661  rfs3_rename(RENAME3args *args, RENAME3res *resp, struct exportinfo *exi,
2662 2662      struct svc_req *req, cred_t *cr, bool_t ro)
2663 2663  {
2664 2664          int error = 0;
2665 2665          vnode_t *fvp;
2666 2666          vnode_t *tvp;
2667 2667          vnode_t *targvp;
2668 2668          struct vattr *fbvap;
2669 2669          struct vattr fbva;
2670 2670          struct vattr *favap;
2671 2671          struct vattr fava;
2672 2672          struct vattr *tbvap;
2673 2673          struct vattr tbva;
2674 2674          struct vattr *tavap;
2675 2675          struct vattr tava;
2676 2676          nfs_fh3 *fh3;
2677 2677          struct exportinfo *to_exi;
2678 2678          vnode_t *srcvp = NULL;
2679 2679          bslabel_t *clabel;
2680 2680          struct sockaddr *ca;
2681 2681          char *name = NULL;
2682 2682          char *toname = NULL;
2683 2683  
2684 2684          fbvap = NULL;
2685 2685          favap = NULL;
2686 2686          tbvap = NULL;
2687 2687          tavap = NULL;
2688 2688          tvp = NULL;
2689 2689  
2690 2690          fvp = nfs3_fhtovp(&args->from.dir, exi);
2691 2691  
2692 2692          DTRACE_NFSV3_4(op__rename__start, struct svc_req *, req,
2693 2693              cred_t *, cr, vnode_t *, fvp, RENAME3args *, args);
2694 2694  
2695 2695          if (fvp == NULL) {
2696 2696                  error = ESTALE;
2697 2697                  goto err;
2698 2698          }
2699 2699  
2700 2700          if (is_system_labeled()) {
2701 2701                  clabel = req->rq_label;
2702 2702                  ASSERT(clabel != NULL);
2703 2703                  DTRACE_PROBE2(tx__rfs3__log__info__oprename__clabel, char *,
2704 2704                      "got client label from request(1)", struct svc_req *, req);
2705 2705  
2706 2706                  if (!blequal(&l_admin_low->tsl_label, clabel)) {
2707 2707                          if (!do_rfs_label_check(clabel, fvp, EQUALITY_CHECK,
2708 2708                              exi)) {
2709 2709                                  resp->status = NFS3ERR_ACCES;
  
    | 
      ↓ open down ↓ | 
    2709 lines elided | 
    
      ↑ open up ↑ | 
  
2710 2710                                  goto err1;
2711 2711                          }
2712 2712                  }
2713 2713          }
2714 2714  
2715 2715          fbva.va_mask = AT_ALL;
2716 2716          fbvap = VOP_GETATTR(fvp, &fbva, 0, cr, NULL) ? NULL : &fbva;
2717 2717          favap = fbvap;
2718 2718  
2719 2719          fh3 = &args->to.dir;
2720      -        to_exi = checkexport(&fh3->fh3_fsid, FH3TOXFIDP(fh3));
     2720 +        to_exi = checkexport(&fh3->fh3_fsid, FH3TOXFIDP(fh3), NULL);
2721 2721          if (to_exi == NULL) {
2722 2722                  resp->status = NFS3ERR_ACCES;
2723 2723                  goto err1;
2724 2724          }
2725 2725          exi_rele(to_exi);
2726 2726  
2727 2727          if (to_exi != exi) {
2728 2728                  resp->status = NFS3ERR_XDEV;
2729 2729                  goto err1;
2730 2730          }
2731 2731  
2732 2732          tvp = nfs3_fhtovp(&args->to.dir, exi);
2733 2733          if (tvp == NULL) {
2734 2734                  error = ESTALE;
2735 2735                  goto err;
2736 2736          }
2737 2737  
2738 2738          tbva.va_mask = AT_ALL;
2739 2739          tbvap = VOP_GETATTR(tvp, &tbva, 0, cr, NULL) ? NULL : &tbva;
2740 2740          tavap = tbvap;
2741 2741  
2742 2742          if (fvp->v_type != VDIR || tvp->v_type != VDIR) {
2743 2743                  resp->status = NFS3ERR_NOTDIR;
2744 2744                  goto err1;
2745 2745          }
2746 2746  
2747 2747          if (args->from.name == nfs3nametoolong ||
2748 2748              args->to.name == nfs3nametoolong) {
2749 2749                  resp->status = NFS3ERR_NAMETOOLONG;
2750 2750                  goto err1;
2751 2751          }
2752 2752          if (args->from.name == NULL || *(args->from.name) == '\0' ||
2753 2753              args->to.name == NULL || *(args->to.name) == '\0') {
2754 2754                  resp->status = NFS3ERR_ACCES;
2755 2755                  goto err1;
2756 2756          }
2757 2757  
2758 2758          if (rdonly(ro, tvp)) {
2759 2759                  resp->status = NFS3ERR_ROFS;
2760 2760                  goto err1;
2761 2761          }
2762 2762  
2763 2763          if (is_system_labeled()) {
2764 2764                  if (!blequal(&l_admin_low->tsl_label, clabel)) {
2765 2765                          if (!do_rfs_label_check(clabel, tvp, EQUALITY_CHECK,
2766 2766                              exi)) {
2767 2767                                  resp->status = NFS3ERR_ACCES;
2768 2768                                  goto err1;
2769 2769                          }
2770 2770                  }
2771 2771          }
2772 2772  
2773 2773          ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2774 2774          name = nfscmd_convname(ca, exi, args->from.name,
2775 2775              NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
2776 2776  
2777 2777          if (name == NULL) {
2778 2778                  resp->status = NFS3ERR_INVAL;
2779 2779                  goto err1;
2780 2780          }
2781 2781  
2782 2782          toname = nfscmd_convname(ca, exi, args->to.name,
2783 2783              NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
2784 2784  
2785 2785          if (toname == NULL) {
2786 2786                  resp->status = NFS3ERR_INVAL;
2787 2787                  goto err1;
2788 2788          }
2789 2789  
2790 2790          /*
2791 2791           * Check for a conflict with a non-blocking mandatory share
2792 2792           * reservation or V4 delegations.
2793 2793           */
2794 2794          error = VOP_LOOKUP(fvp, name, &srcvp, NULL, 0,
2795 2795              NULL, cr, NULL, NULL, NULL);
2796 2796          if (error != 0)
2797 2797                  goto err;
2798 2798  
2799 2799          /*
2800 2800           * If we rename a delegated file we should recall the
2801 2801           * delegation, since future opens should fail or would
2802 2802           * refer to a new file.
2803 2803           */
2804 2804          if (rfs4_check_delegated(FWRITE, srcvp, FALSE)) {
2805 2805                  resp->status = NFS3ERR_JUKEBOX;
2806 2806                  goto err1;
2807 2807          }
2808 2808  
2809 2809          /*
2810 2810           * Check for renaming over a delegated file.  Check rfs4_deleg_policy
2811 2811           * first to avoid VOP_LOOKUP if possible.
2812 2812           */
2813 2813          if (rfs4_deleg_policy != SRV_NEVER_DELEGATE &&
2814 2814              VOP_LOOKUP(tvp, toname, &targvp, NULL, 0, NULL, cr,
2815 2815              NULL, NULL, NULL) == 0) {
2816 2816  
2817 2817                  if (rfs4_check_delegated(FWRITE, targvp, TRUE)) {
2818 2818                          VN_RELE(targvp);
2819 2819                          resp->status = NFS3ERR_JUKEBOX;
2820 2820                          goto err1;
2821 2821                  }
2822 2822                  VN_RELE(targvp);
2823 2823          }
2824 2824  
2825 2825          if (!nbl_need_check(srcvp)) {
2826 2826                  error = VOP_RENAME(fvp, name, tvp, toname, cr, NULL, 0);
2827 2827          } else {
2828 2828                  nbl_start_crit(srcvp, RW_READER);
2829 2829                  if (nbl_conflict(srcvp, NBL_RENAME, 0, 0, 0, NULL))
2830 2830                          error = EACCES;
2831 2831                  else
2832 2832                          error = VOP_RENAME(fvp, name, tvp, toname, cr, NULL, 0);
2833 2833                  nbl_end_crit(srcvp);
2834 2834          }
2835 2835          if (error == 0)
2836 2836                  vn_renamepath(tvp, srcvp, args->to.name,
2837 2837                      strlen(args->to.name));
2838 2838          VN_RELE(srcvp);
2839 2839          srcvp = NULL;
2840 2840  
2841 2841          fava.va_mask = AT_ALL;
2842 2842          favap = VOP_GETATTR(fvp, &fava, 0, cr, NULL) ? NULL : &fava;
2843 2843          tava.va_mask = AT_ALL;
2844 2844          tavap = VOP_GETATTR(tvp, &tava, 0, cr, NULL) ? NULL : &tava;
2845 2845  
2846 2846          /*
2847 2847           * Force modified data and metadata out to stable storage.
2848 2848           */
2849 2849          (void) VOP_FSYNC(fvp, 0, cr, NULL);
2850 2850          (void) VOP_FSYNC(tvp, 0, cr, NULL);
2851 2851  
2852 2852          if (error)
2853 2853                  goto err;
2854 2854  
2855 2855          resp->status = NFS3_OK;
2856 2856          vattr_to_wcc_data(fbvap, favap, &resp->resok.fromdir_wcc);
2857 2857          vattr_to_wcc_data(tbvap, tavap, &resp->resok.todir_wcc);
2858 2858          goto out;
2859 2859  
2860 2860  err:
2861 2861          if (curthread->t_flag & T_WOULDBLOCK) {
2862 2862                  curthread->t_flag &= ~T_WOULDBLOCK;
2863 2863                  resp->status = NFS3ERR_JUKEBOX;
2864 2864          } else {
2865 2865                  resp->status = puterrno3(error);
2866 2866          }
2867 2867  err1:
2868 2868          vattr_to_wcc_data(fbvap, favap, &resp->resfail.fromdir_wcc);
2869 2869          vattr_to_wcc_data(tbvap, tavap, &resp->resfail.todir_wcc);
2870 2870  
2871 2871  out:
2872 2872          if (name != NULL && name != args->from.name)
2873 2873                  kmem_free(name, MAXPATHLEN + 1);
2874 2874          if (toname != NULL && toname != args->to.name)
2875 2875                  kmem_free(toname, MAXPATHLEN + 1);
2876 2876  
2877 2877          DTRACE_NFSV3_4(op__rename__done, struct svc_req *, req,
2878 2878              cred_t *, cr, vnode_t *, fvp, RENAME3res *, resp);
2879 2879          if (fvp != NULL)
2880 2880                  VN_RELE(fvp);
2881 2881          if (tvp != NULL)
2882 2882                  VN_RELE(tvp);
2883 2883  }
2884 2884  
2885 2885  void *
2886 2886  rfs3_rename_getfh(RENAME3args *args)
2887 2887  {
2888 2888  
2889 2889          return (&args->from.dir);
2890 2890  }
2891 2891  
2892 2892  void
2893 2893  rfs3_link(LINK3args *args, LINK3res *resp, struct exportinfo *exi,
2894 2894      struct svc_req *req, cred_t *cr, bool_t ro)
2895 2895  {
2896 2896          int error;
2897 2897          vnode_t *vp;
2898 2898          vnode_t *dvp;
2899 2899          struct vattr *vap;
2900 2900          struct vattr va;
2901 2901          struct vattr *bvap;
2902 2902          struct vattr bva;
2903 2903          struct vattr *avap;
2904 2904          struct vattr ava;
2905 2905          nfs_fh3 *fh3;
2906 2906          struct exportinfo *to_exi;
2907 2907          bslabel_t *clabel;
2908 2908          struct sockaddr *ca;
2909 2909          char *name = NULL;
2910 2910  
2911 2911          vap = NULL;
2912 2912          bvap = NULL;
2913 2913          avap = NULL;
2914 2914          dvp = NULL;
2915 2915  
2916 2916          vp = nfs3_fhtovp(&args->file, exi);
2917 2917  
2918 2918          DTRACE_NFSV3_4(op__link__start, struct svc_req *, req,
2919 2919              cred_t *, cr, vnode_t *, vp, LINK3args *, args);
  
    | 
      ↓ open down ↓ | 
    189 lines elided | 
    
      ↑ open up ↑ | 
  
2920 2920  
2921 2921          if (vp == NULL) {
2922 2922                  error = ESTALE;
2923 2923                  goto out;
2924 2924          }
2925 2925  
2926 2926          va.va_mask = AT_ALL;
2927 2927          vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
2928 2928  
2929 2929          fh3 = &args->link.dir;
2930      -        to_exi = checkexport(&fh3->fh3_fsid, FH3TOXFIDP(fh3));
     2930 +        to_exi = checkexport(&fh3->fh3_fsid, FH3TOXFIDP(fh3), NULL);
2931 2931          if (to_exi == NULL) {
2932 2932                  resp->status = NFS3ERR_ACCES;
2933 2933                  goto out1;
2934 2934          }
2935 2935          exi_rele(to_exi);
2936 2936  
2937 2937          if (to_exi != exi) {
2938 2938                  resp->status = NFS3ERR_XDEV;
2939 2939                  goto out1;
2940 2940          }
2941 2941  
2942 2942          if (is_system_labeled()) {
2943 2943                  clabel = req->rq_label;
2944 2944  
2945 2945                  ASSERT(clabel != NULL);
2946 2946                  DTRACE_PROBE2(tx__rfs3__log__info__oplink__clabel, char *,
2947 2947                      "got client label from request(1)", struct svc_req *, req);
2948 2948  
2949 2949                  if (!blequal(&l_admin_low->tsl_label, clabel)) {
2950 2950                          if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
2951 2951                              exi)) {
2952 2952                                  resp->status = NFS3ERR_ACCES;
2953 2953                                  goto out1;
2954 2954                          }
2955 2955                  }
2956 2956          }
2957 2957  
2958 2958          dvp = nfs3_fhtovp(&args->link.dir, exi);
2959 2959          if (dvp == NULL) {
2960 2960                  error = ESTALE;
2961 2961                  goto out;
2962 2962          }
2963 2963  
2964 2964          bva.va_mask = AT_ALL;
2965 2965          bvap = VOP_GETATTR(dvp, &bva, 0, cr, NULL) ? NULL : &bva;
2966 2966  
2967 2967          if (dvp->v_type != VDIR) {
2968 2968                  resp->status = NFS3ERR_NOTDIR;
2969 2969                  goto out1;
2970 2970          }
2971 2971  
2972 2972          if (args->link.name == nfs3nametoolong) {
2973 2973                  resp->status = NFS3ERR_NAMETOOLONG;
2974 2974                  goto out1;
2975 2975          }
2976 2976  
2977 2977          if (args->link.name == NULL || *(args->link.name) == '\0') {
2978 2978                  resp->status = NFS3ERR_ACCES;
2979 2979                  goto out1;
2980 2980          }
2981 2981  
2982 2982          if (rdonly(ro, dvp)) {
2983 2983                  resp->status = NFS3ERR_ROFS;
2984 2984                  goto out1;
2985 2985          }
2986 2986  
2987 2987          if (is_system_labeled()) {
2988 2988                  DTRACE_PROBE2(tx__rfs3__log__info__oplinkdir__clabel, char *,
2989 2989                      "got client label from request(1)", struct svc_req *, req);
2990 2990  
2991 2991                  if (!blequal(&l_admin_low->tsl_label, clabel)) {
2992 2992                          if (!do_rfs_label_check(clabel, dvp, EQUALITY_CHECK,
2993 2993                              exi)) {
2994 2994                                  resp->status = NFS3ERR_ACCES;
2995 2995                                  goto out1;
2996 2996                          }
2997 2997                  }
2998 2998          }
2999 2999  
3000 3000          ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
3001 3001          name = nfscmd_convname(ca, exi, args->link.name,
3002 3002              NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
3003 3003  
3004 3004          if (name == NULL) {
3005 3005                  resp->status = NFS3ERR_SERVERFAULT;
3006 3006                  goto out1;
3007 3007          }
3008 3008  
3009 3009          error = VOP_LINK(dvp, vp, name, cr, NULL, 0);
3010 3010  
3011 3011          va.va_mask = AT_ALL;
3012 3012          vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3013 3013          ava.va_mask = AT_ALL;
3014 3014          avap = VOP_GETATTR(dvp, &ava, 0, cr, NULL) ? NULL : &ava;
3015 3015  
3016 3016          /*
3017 3017           * Force modified data and metadata out to stable storage.
3018 3018           */
3019 3019          (void) VOP_FSYNC(vp, FNODSYNC, cr, NULL);
3020 3020          (void) VOP_FSYNC(dvp, 0, cr, NULL);
3021 3021  
3022 3022          if (error)
3023 3023                  goto out;
3024 3024  
3025 3025          VN_RELE(dvp);
3026 3026  
3027 3027          resp->status = NFS3_OK;
3028 3028          vattr_to_post_op_attr(vap, &resp->resok.file_attributes);
3029 3029          vattr_to_wcc_data(bvap, avap, &resp->resok.linkdir_wcc);
3030 3030  
3031 3031          DTRACE_NFSV3_4(op__link__done, struct svc_req *, req,
3032 3032              cred_t *, cr, vnode_t *, vp, LINK3res *, resp);
3033 3033  
3034 3034          VN_RELE(vp);
3035 3035  
3036 3036          return;
3037 3037  
3038 3038  out:
3039 3039          if (curthread->t_flag & T_WOULDBLOCK) {
3040 3040                  curthread->t_flag &= ~T_WOULDBLOCK;
3041 3041                  resp->status = NFS3ERR_JUKEBOX;
3042 3042          } else
3043 3043                  resp->status = puterrno3(error);
3044 3044  out1:
3045 3045          if (name != NULL && name != args->link.name)
3046 3046                  kmem_free(name, MAXPATHLEN + 1);
3047 3047  
3048 3048          DTRACE_NFSV3_4(op__link__done, struct svc_req *, req,
3049 3049              cred_t *, cr, vnode_t *, vp, LINK3res *, resp);
3050 3050  
3051 3051          if (vp != NULL)
3052 3052                  VN_RELE(vp);
3053 3053          if (dvp != NULL)
3054 3054                  VN_RELE(dvp);
3055 3055          vattr_to_post_op_attr(vap, &resp->resfail.file_attributes);
3056 3056          vattr_to_wcc_data(bvap, avap, &resp->resfail.linkdir_wcc);
3057 3057  }
3058 3058  
3059 3059  void *
3060 3060  rfs3_link_getfh(LINK3args *args)
3061 3061  {
3062 3062  
3063 3063          return (&args->file);
3064 3064  }
3065 3065  
3066 3066  /*
3067 3067   * This macro defines the size of a response which contains attribute
3068 3068   * information and one directory entry (whose length is specified by
3069 3069   * the macro parameter).  If the incoming request is larger than this,
3070 3070   * then we are guaranteed to be able to return at one directory entry
3071 3071   * if one exists.  Therefore, we do not need to check for
3072 3072   * NFS3ERR_TOOSMALL if the requested size is larger then this.  If it
3073 3073   * is not, then we need to check to make sure that this error does not
3074 3074   * need to be returned.
3075 3075   *
3076 3076   * NFS3_READDIR_MIN_COUNT is comprised of following :
3077 3077   *
3078 3078   * status - 1 * BYTES_PER_XDR_UNIT
3079 3079   * attr. flag - 1 * BYTES_PER_XDR_UNIT
3080 3080   * cookie verifier - 2 * BYTES_PER_XDR_UNIT
3081 3081   * attributes  - NFS3_SIZEOF_FATTR3 * BYTES_PER_XDR_UNIT
3082 3082   * boolean - 1 * BYTES_PER_XDR_UNIT
3083 3083   * file id - 2 * BYTES_PER_XDR_UNIT
3084 3084   * directory name length - 1 * BYTES_PER_XDR_UNIT
3085 3085   * cookie - 2 * BYTES_PER_XDR_UNIT
3086 3086   * end of list - 1 * BYTES_PER_XDR_UNIT
3087 3087   * end of file - 1 * BYTES_PER_XDR_UNIT
3088 3088   * Name length of directory to the nearest byte
3089 3089   */
3090 3090  
3091 3091  #define NFS3_READDIR_MIN_COUNT(length)  \
3092 3092          ((1 + 1 + 2 + NFS3_SIZEOF_FATTR3 + 1 + 2 + 1 + 2 + 1 + 1) * \
3093 3093                  BYTES_PER_XDR_UNIT + roundup((length), BYTES_PER_XDR_UNIT))
3094 3094  
3095 3095  /* ARGSUSED */
3096 3096  void
3097 3097  rfs3_readdir(READDIR3args *args, READDIR3res *resp, struct exportinfo *exi,
3098 3098      struct svc_req *req, cred_t *cr, bool_t ro)
3099 3099  {
3100 3100          int error;
3101 3101          vnode_t *vp;
3102 3102          struct vattr *vap;
3103 3103          struct vattr va;
3104 3104          struct iovec iov;
3105 3105          struct uio uio;
3106 3106          char *data;
3107 3107          int iseof;
3108 3108          int bufsize;
3109 3109          int namlen;
3110 3110          uint_t count;
3111 3111          struct sockaddr *ca;
3112 3112  
3113 3113          vap = NULL;
3114 3114  
3115 3115          vp = nfs3_fhtovp(&args->dir, exi);
3116 3116  
3117 3117          DTRACE_NFSV3_4(op__readdir__start, struct svc_req *, req,
3118 3118              cred_t *, cr, vnode_t *, vp, READDIR3args *, args);
3119 3119  
3120 3120          if (vp == NULL) {
3121 3121                  error = ESTALE;
3122 3122                  goto out;
3123 3123          }
3124 3124  
3125 3125          if (is_system_labeled()) {
3126 3126                  bslabel_t *clabel = req->rq_label;
3127 3127  
3128 3128                  ASSERT(clabel != NULL);
3129 3129                  DTRACE_PROBE2(tx__rfs3__log__info__opreaddir__clabel, char *,
3130 3130                      "got client label from request(1)", struct svc_req *, req);
3131 3131  
3132 3132                  if (!blequal(&l_admin_low->tsl_label, clabel)) {
3133 3133                          if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
3134 3134                              exi)) {
3135 3135                                  resp->status = NFS3ERR_ACCES;
3136 3136                                  goto out1;
3137 3137                          }
3138 3138                  }
3139 3139          }
3140 3140  
3141 3141          (void) VOP_RWLOCK(vp, V_WRITELOCK_FALSE, NULL);
3142 3142  
3143 3143          va.va_mask = AT_ALL;
3144 3144          vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3145 3145  
3146 3146          if (vp->v_type != VDIR) {
3147 3147                  resp->status = NFS3ERR_NOTDIR;
3148 3148                  goto out1;
3149 3149          }
3150 3150  
3151 3151          error = VOP_ACCESS(vp, VREAD, 0, cr, NULL);
3152 3152          if (error)
3153 3153                  goto out;
3154 3154  
3155 3155          /*
3156 3156           * Now don't allow arbitrary count to alloc;
3157 3157           * allow the maximum not to exceed rfs3_tsize()
3158 3158           */
3159 3159          if (args->count > rfs3_tsize(req))
3160 3160                  args->count = rfs3_tsize(req);
3161 3161  
3162 3162          /*
3163 3163           * Make sure that there is room to read at least one entry
3164 3164           * if any are available.
3165 3165           */
3166 3166          if (args->count < DIRENT64_RECLEN(MAXNAMELEN))
3167 3167                  count = DIRENT64_RECLEN(MAXNAMELEN);
3168 3168          else
3169 3169                  count = args->count;
3170 3170  
3171 3171          data = kmem_alloc(count, KM_SLEEP);
3172 3172  
3173 3173          iov.iov_base = data;
3174 3174          iov.iov_len = count;
3175 3175          uio.uio_iov = &iov;
3176 3176          uio.uio_iovcnt = 1;
3177 3177          uio.uio_segflg = UIO_SYSSPACE;
3178 3178          uio.uio_extflg = UIO_COPY_CACHED;
3179 3179          uio.uio_loffset = (offset_t)args->cookie;
3180 3180          uio.uio_resid = count;
3181 3181  
3182 3182          error = VOP_READDIR(vp, &uio, cr, &iseof, NULL, 0);
3183 3183  
3184 3184          va.va_mask = AT_ALL;
3185 3185          vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3186 3186  
3187 3187          if (error) {
3188 3188                  kmem_free(data, count);
3189 3189                  goto out;
3190 3190          }
3191 3191  
3192 3192          /*
3193 3193           * If the count was not large enough to be able to guarantee
3194 3194           * to be able to return at least one entry, then need to
3195 3195           * check to see if NFS3ERR_TOOSMALL should be returned.
3196 3196           */
3197 3197          if (args->count < NFS3_READDIR_MIN_COUNT(MAXNAMELEN)) {
3198 3198                  /*
3199 3199                   * bufsize is used to keep track of the size of the response.
3200 3200                   * It is primed with:
3201 3201                   *      1 for the status +
3202 3202                   *      1 for the dir_attributes.attributes boolean +
3203 3203                   *      2 for the cookie verifier
3204 3204                   * all times BYTES_PER_XDR_UNIT to convert from XDR units
3205 3205                   * to bytes.  If there are directory attributes to be
3206 3206                   * returned, then:
3207 3207                   *      NFS3_SIZEOF_FATTR3 for the dir_attributes.attr fattr3
3208 3208                   * time BYTES_PER_XDR_UNIT is added to account for them.
3209 3209                   */
3210 3210                  bufsize = (1 + 1 + 2) * BYTES_PER_XDR_UNIT;
3211 3211                  if (vap != NULL)
3212 3212                          bufsize += NFS3_SIZEOF_FATTR3 * BYTES_PER_XDR_UNIT;
3213 3213                  /*
3214 3214                   * An entry is composed of:
3215 3215                   *      1 for the true/false list indicator +
3216 3216                   *      2 for the fileid +
3217 3217                   *      1 for the length of the name +
3218 3218                   *      2 for the cookie +
3219 3219                   * all times BYTES_PER_XDR_UNIT to convert from
3220 3220                   * XDR units to bytes, plus the length of the name
3221 3221                   * rounded up to the nearest BYTES_PER_XDR_UNIT.
3222 3222                   */
3223 3223                  if (count != uio.uio_resid) {
3224 3224                          namlen = strlen(((struct dirent64 *)data)->d_name);
3225 3225                          bufsize += (1 + 2 + 1 + 2) * BYTES_PER_XDR_UNIT +
3226 3226                              roundup(namlen, BYTES_PER_XDR_UNIT);
3227 3227                  }
3228 3228                  /*
3229 3229                   * We need to check to see if the number of bytes left
3230 3230                   * to go into the buffer will actually fit into the
3231 3231                   * buffer.  This is calculated as the size of this
3232 3232                   * entry plus:
3233 3233                   *      1 for the true/false list indicator +
3234 3234                   *      1 for the eof indicator
3235 3235                   * times BYTES_PER_XDR_UNIT to convert from from
3236 3236                   * XDR units to bytes.
3237 3237                   */
3238 3238                  bufsize += (1 + 1) * BYTES_PER_XDR_UNIT;
3239 3239                  if (bufsize > args->count) {
3240 3240                          kmem_free(data, count);
3241 3241                          resp->status = NFS3ERR_TOOSMALL;
3242 3242                          goto out1;
3243 3243                  }
3244 3244          }
3245 3245  
3246 3246          /*
3247 3247           * Have a valid readir buffer for the native character
3248 3248           * set. Need to check if a conversion is necessary and
3249 3249           * potentially rewrite the whole buffer. Note that if the
3250 3250           * conversion expands names enough, the structure may not
3251 3251           * fit. In this case, we need to drop entries until if fits
3252 3252           * and patch the counts in order that the next readdir will
3253 3253           * get the correct entries.
3254 3254           */
3255 3255          ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
3256 3256          data = nfscmd_convdirent(ca, exi, data, count, &resp->status);
3257 3257  
3258 3258  
3259 3259          VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
3260 3260  
3261 3261  #if 0 /* notyet */
3262 3262          /*
3263 3263           * Don't do this.  It causes local disk writes when just
3264 3264           * reading the file and the overhead is deemed larger
3265 3265           * than the benefit.
3266 3266           */
3267 3267          /*
3268 3268           * Force modified metadata out to stable storage.
3269 3269           */
3270 3270          (void) VOP_FSYNC(vp, FNODSYNC, cr, NULL);
3271 3271  #endif
3272 3272  
3273 3273          resp->status = NFS3_OK;
3274 3274          vattr_to_post_op_attr(vap, &resp->resok.dir_attributes);
3275 3275          resp->resok.cookieverf = 0;
3276 3276          resp->resok.reply.entries = (entry3 *)data;
3277 3277          resp->resok.reply.eof = iseof;
3278 3278          resp->resok.size = count - uio.uio_resid;
3279 3279          resp->resok.count = args->count;
3280 3280          resp->resok.freecount = count;
3281 3281  
3282 3282          DTRACE_NFSV3_4(op__readdir__done, struct svc_req *, req,
3283 3283              cred_t *, cr, vnode_t *, vp, READDIR3res *, resp);
3284 3284  
3285 3285          VN_RELE(vp);
3286 3286  
3287 3287          return;
3288 3288  
3289 3289  out:
3290 3290          if (curthread->t_flag & T_WOULDBLOCK) {
3291 3291                  curthread->t_flag &= ~T_WOULDBLOCK;
3292 3292                  resp->status = NFS3ERR_JUKEBOX;
3293 3293          } else
3294 3294                  resp->status = puterrno3(error);
3295 3295  out1:
3296 3296          DTRACE_NFSV3_4(op__readdir__done, struct svc_req *, req,
3297 3297              cred_t *, cr, vnode_t *, vp, READDIR3res *, resp);
3298 3298  
3299 3299          if (vp != NULL) {
3300 3300                  VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
3301 3301                  VN_RELE(vp);
3302 3302          }
3303 3303          vattr_to_post_op_attr(vap, &resp->resfail.dir_attributes);
3304 3304  }
3305 3305  
3306 3306  void *
3307 3307  rfs3_readdir_getfh(READDIR3args *args)
3308 3308  {
3309 3309  
3310 3310          return (&args->dir);
3311 3311  }
3312 3312  
3313 3313  void
3314 3314  rfs3_readdir_free(READDIR3res *resp)
3315 3315  {
3316 3316  
3317 3317          if (resp->status == NFS3_OK)
3318 3318                  kmem_free(resp->resok.reply.entries, resp->resok.freecount);
3319 3319  }
3320 3320  
3321 3321  #ifdef nextdp
3322 3322  #undef nextdp
3323 3323  #endif
3324 3324  #define nextdp(dp)      ((struct dirent64 *)((char *)(dp) + (dp)->d_reclen))
3325 3325  
3326 3326  /*
3327 3327   * This macro computes the size of a response which contains
3328 3328   * one directory entry including the attributes as well as file handle.
3329 3329   * If the incoming request is larger than this, then we are guaranteed to be
3330 3330   * able to return at least one more directory entry if one exists.
3331 3331   *
3332 3332   * NFS3_READDIRPLUS_ENTRY is made up of the following:
3333 3333   *
3334 3334   * boolean - 1 * BYTES_PER_XDR_UNIT
3335 3335   * file id - 2 * BYTES_PER_XDR_UNIT
3336 3336   * directory name length - 1 * BYTES_PER_XDR_UNIT
3337 3337   * cookie - 2 * BYTES_PER_XDR_UNIT
3338 3338   * attribute flag - 1 * BYTES_PER_XDR_UNIT
3339 3339   * attributes - NFS3_SIZEOF_FATTR3 * BYTES_PER_XDR_UNIT
3340 3340   * status byte for file handle - 1 *  BYTES_PER_XDR_UNIT
3341 3341   * length of a file handle - 1 * BYTES_PER_XDR_UNIT
3342 3342   * Maximum length of a file handle (NFS3_MAXFHSIZE)
3343 3343   * name length of the entry to the nearest bytes
3344 3344   */
3345 3345  #define NFS3_READDIRPLUS_ENTRY(namelen) \
3346 3346          ((1 + 2 + 1 + 2 + 1 + NFS3_SIZEOF_FATTR3 + 1 + 1) * \
3347 3347                  BYTES_PER_XDR_UNIT + \
3348 3348          NFS3_MAXFHSIZE + roundup(namelen, BYTES_PER_XDR_UNIT))
3349 3349  
3350 3350  static int rfs3_readdir_unit = MAXBSIZE;
3351 3351  
3352 3352  /* ARGSUSED */
3353 3353  void
3354 3354  rfs3_readdirplus(READDIRPLUS3args *args, READDIRPLUS3res *resp,
3355 3355      struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
3356 3356  {
3357 3357          int error;
3358 3358          vnode_t *vp;
3359 3359          struct vattr *vap;
3360 3360          struct vattr va;
3361 3361          struct iovec iov;
3362 3362          struct uio uio;
3363 3363          char *data;
3364 3364          int iseof;
3365 3365          struct dirent64 *dp;
3366 3366          vnode_t *nvp;
3367 3367          struct vattr *nvap;
3368 3368          struct vattr nva;
3369 3369          entryplus3_info *infop = NULL;
3370 3370          int size = 0;
3371 3371          int nents = 0;
3372 3372          int bufsize = 0;
3373 3373          int entrysize = 0;
3374 3374          int tofit = 0;
3375 3375          int rd_unit = rfs3_readdir_unit;
3376 3376          int prev_len;
3377 3377          int space_left;
3378 3378          int i;
3379 3379          uint_t *namlen = NULL;
3380 3380          char *ndata = NULL;
3381 3381          struct sockaddr *ca;
3382 3382          size_t ret;
3383 3383  
3384 3384          vap = NULL;
3385 3385  
3386 3386          vp = nfs3_fhtovp(&args->dir, exi);
3387 3387  
3388 3388          DTRACE_NFSV3_4(op__readdirplus__start, struct svc_req *, req,
3389 3389              cred_t *, cr, vnode_t *, vp, READDIRPLUS3args *, args);
3390 3390  
3391 3391          if (vp == NULL) {
3392 3392                  error = ESTALE;
3393 3393                  goto out;
3394 3394          }
3395 3395  
3396 3396          if (is_system_labeled()) {
3397 3397                  bslabel_t *clabel = req->rq_label;
3398 3398  
3399 3399                  ASSERT(clabel != NULL);
3400 3400                  DTRACE_PROBE2(tx__rfs3__log__info__opreaddirplus__clabel,
3401 3401                      char *, "got client label from request(1)",
3402 3402                      struct svc_req *, req);
3403 3403  
3404 3404                  if (!blequal(&l_admin_low->tsl_label, clabel)) {
3405 3405                          if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
3406 3406                              exi)) {
3407 3407                                  resp->status = NFS3ERR_ACCES;
3408 3408                                  goto out1;
3409 3409                          }
3410 3410                  }
3411 3411          }
3412 3412  
3413 3413          (void) VOP_RWLOCK(vp, V_WRITELOCK_FALSE, NULL);
3414 3414  
3415 3415          va.va_mask = AT_ALL;
3416 3416          vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3417 3417  
3418 3418          if (vp->v_type != VDIR) {
3419 3419                  error = ENOTDIR;
3420 3420                  goto out;
3421 3421          }
3422 3422  
3423 3423          error = VOP_ACCESS(vp, VREAD, 0, cr, NULL);
3424 3424          if (error)
3425 3425                  goto out;
3426 3426  
3427 3427          /*
3428 3428           * Don't allow arbitrary counts for allocation
3429 3429           */
3430 3430          if (args->maxcount > rfs3_tsize(req))
3431 3431                  args->maxcount = rfs3_tsize(req);
3432 3432  
3433 3433          /*
3434 3434           * Make sure that there is room to read at least one entry
3435 3435           * if any are available
3436 3436           */
3437 3437          args->dircount = MIN(args->dircount, args->maxcount);
3438 3438  
3439 3439          if (args->dircount < DIRENT64_RECLEN(MAXNAMELEN))
3440 3440                  args->dircount = DIRENT64_RECLEN(MAXNAMELEN);
3441 3441  
3442 3442          /*
3443 3443           * This allocation relies on a minimum directory entry
3444 3444           * being roughly 24 bytes.  Therefore, the namlen array
3445 3445           * will have enough space based on the maximum number of
3446 3446           * entries to read.
3447 3447           */
3448 3448          namlen = kmem_alloc(args->dircount, KM_SLEEP);
3449 3449  
3450 3450          space_left = args->dircount;
3451 3451          data = kmem_alloc(args->dircount, KM_SLEEP);
3452 3452          dp = (struct dirent64 *)data;
3453 3453          uio.uio_iov = &iov;
3454 3454          uio.uio_iovcnt = 1;
3455 3455          uio.uio_segflg = UIO_SYSSPACE;
3456 3456          uio.uio_extflg = UIO_COPY_CACHED;
3457 3457          uio.uio_loffset = (offset_t)args->cookie;
3458 3458  
3459 3459          /*
3460 3460           * bufsize is used to keep track of the size of the response as we
3461 3461           * get post op attributes and filehandles for each entry.  This is
3462 3462           * an optimization as the server may have read more entries than will
3463 3463           * fit in the buffer specified by maxcount.  We stop calculating
3464 3464           * post op attributes and filehandles once we have exceeded maxcount.
3465 3465           * This will minimize the effect of truncation.
3466 3466           *
3467 3467           * It is primed with:
3468 3468           *      1 for the status +
3469 3469           *      1 for the dir_attributes.attributes boolean +
3470 3470           *      2 for the cookie verifier
3471 3471           * all times BYTES_PER_XDR_UNIT to convert from XDR units
3472 3472           * to bytes.  If there are directory attributes to be
3473 3473           * returned, then:
3474 3474           *      NFS3_SIZEOF_FATTR3 for the dir_attributes.attr fattr3
3475 3475           * time BYTES_PER_XDR_UNIT is added to account for them.
3476 3476           */
3477 3477          bufsize = (1 + 1 + 2) * BYTES_PER_XDR_UNIT;
3478 3478          if (vap != NULL)
3479 3479                  bufsize += NFS3_SIZEOF_FATTR3 * BYTES_PER_XDR_UNIT;
3480 3480  
3481 3481  getmoredents:
3482 3482          /*
3483 3483           * Here we make a check so that our read unit is not larger than
3484 3484           * the space left in the buffer.
3485 3485           */
3486 3486          rd_unit = MIN(rd_unit, space_left);
3487 3487          iov.iov_base = (char *)dp;
3488 3488          iov.iov_len = rd_unit;
3489 3489          uio.uio_resid = rd_unit;
3490 3490          prev_len = rd_unit;
3491 3491  
3492 3492          error = VOP_READDIR(vp, &uio, cr, &iseof, NULL, 0);
3493 3493  
3494 3494          if (error) {
3495 3495                  kmem_free(data, args->dircount);
3496 3496                  goto out;
3497 3497          }
3498 3498  
3499 3499          if (uio.uio_resid == prev_len && !iseof) {
3500 3500                  if (nents == 0) {
3501 3501                          kmem_free(data, args->dircount);
3502 3502                          resp->status = NFS3ERR_TOOSMALL;
3503 3503                          goto out1;
3504 3504                  }
3505 3505  
3506 3506                  /*
3507 3507                   * We could not get any more entries, so get the attributes
3508 3508                   * and filehandle for the entries already obtained.
3509 3509                   */
3510 3510                  goto good;
3511 3511          }
3512 3512  
3513 3513          /*
3514 3514           * We estimate the size of the response by assuming the
3515 3515           * entry exists and attributes and filehandle are also valid
3516 3516           */
3517 3517          for (size = prev_len - uio.uio_resid;
3518 3518              size > 0;
3519 3519              size -= dp->d_reclen, dp = nextdp(dp)) {
3520 3520  
3521 3521                  if (dp->d_ino == 0) {
3522 3522                          nents++;
3523 3523                          continue;
3524 3524                  }
3525 3525  
3526 3526                  namlen[nents] = strlen(dp->d_name);
3527 3527                  entrysize = NFS3_READDIRPLUS_ENTRY(namlen[nents]);
3528 3528  
3529 3529                  /*
3530 3530                   * We need to check to see if the number of bytes left
3531 3531                   * to go into the buffer will actually fit into the
3532 3532                   * buffer.  This is calculated as the size of this
3533 3533                   * entry plus:
3534 3534                   *      1 for the true/false list indicator +
3535 3535                   *      1 for the eof indicator
3536 3536                   * times BYTES_PER_XDR_UNIT to convert from XDR units
3537 3537                   * to bytes.
3538 3538                   *
3539 3539                   * Also check the dircount limit against the first entry read
3540 3540                   *
3541 3541                   */
3542 3542                  tofit = entrysize + (1 + 1) * BYTES_PER_XDR_UNIT;
3543 3543                  if (bufsize + tofit > args->maxcount) {
3544 3544                          /*
3545 3545                           * We make a check here to see if this was the
3546 3546                           * first entry being measured.  If so, then maxcount
3547 3547                           * was too small to begin with and so we need to
3548 3548                           * return with NFS3ERR_TOOSMALL.
3549 3549                           */
3550 3550                          if (nents == 0) {
3551 3551                                  kmem_free(data, args->dircount);
3552 3552                                  resp->status = NFS3ERR_TOOSMALL;
3553 3553                                  goto out1;
3554 3554                          }
3555 3555                          iseof = FALSE;
3556 3556                          goto good;
3557 3557                  }
3558 3558                  bufsize += entrysize;
3559 3559                  nents++;
3560 3560          }
3561 3561  
3562 3562          /*
3563 3563           * If there is enough room to fit at least 1 more entry including
3564 3564           * post op attributes and filehandle in the buffer AND that we haven't
3565 3565           * exceeded dircount then go back and get some more.
3566 3566           */
3567 3567          if (!iseof &&
3568 3568              (args->maxcount - bufsize) >= NFS3_READDIRPLUS_ENTRY(MAXNAMELEN)) {
3569 3569                  space_left -= (prev_len - uio.uio_resid);
3570 3570                  if (space_left >= DIRENT64_RECLEN(MAXNAMELEN))
3571 3571                          goto getmoredents;
3572 3572  
3573 3573                  /* else, fall through */
3574 3574          }
3575 3575  good:
3576 3576          va.va_mask = AT_ALL;
3577 3577          vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3578 3578  
3579 3579          VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
3580 3580  
3581 3581          infop = kmem_alloc(nents * sizeof (struct entryplus3_info), KM_SLEEP);
3582 3582          resp->resok.infop = infop;
3583 3583  
3584 3584          dp = (struct dirent64 *)data;
3585 3585          for (i = 0; i < nents; i++) {
3586 3586  
3587 3587                  if (dp->d_ino == 0) {
3588 3588                          infop[i].attr.attributes = FALSE;
3589 3589                          infop[i].fh.handle_follows = FALSE;
3590 3590                          dp = nextdp(dp);
3591 3591                          continue;
3592 3592                  }
3593 3593  
3594 3594                  infop[i].namelen = namlen[i];
3595 3595  
3596 3596                  error = VOP_LOOKUP(vp, dp->d_name, &nvp, NULL, 0, NULL, cr,
3597 3597                      NULL, NULL, NULL);
3598 3598                  if (error) {
3599 3599                          infop[i].attr.attributes = FALSE;
3600 3600                          infop[i].fh.handle_follows = FALSE;
3601 3601                          dp = nextdp(dp);
3602 3602                          continue;
3603 3603                  }
3604 3604  
3605 3605                  nva.va_mask = AT_ALL;
3606 3606                  nvap = rfs4_delegated_getattr(nvp, &nva, 0, cr) ? NULL : &nva;
3607 3607  
3608 3608                  /* Lie about the object type for a referral */
3609 3609                  if (vn_is_nfs_reparse(nvp, cr))
3610 3610                          nvap->va_type = VLNK;
3611 3611  
3612 3612                  vattr_to_post_op_attr(nvap, &infop[i].attr);
3613 3613  
3614 3614                  error = makefh3(&infop[i].fh.handle, nvp, exi);
3615 3615                  if (!error)
3616 3616                          infop[i].fh.handle_follows = TRUE;
3617 3617                  else
3618 3618                          infop[i].fh.handle_follows = FALSE;
3619 3619  
3620 3620                  VN_RELE(nvp);
3621 3621                  dp = nextdp(dp);
3622 3622          }
3623 3623  
3624 3624          ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
3625 3625          ret = nfscmd_convdirplus(ca, exi, data, nents, args->dircount, &ndata);
3626 3626          if (ndata == NULL)
3627 3627                  ndata = data;
3628 3628  
3629 3629          if (ret > 0) {
3630 3630                  /*
3631 3631                   * We had to drop one or more entries in order to fit
3632 3632                   * during the character conversion.  We need to patch
3633 3633                   * up the size and eof info.
3634 3634                   */
3635 3635                  if (iseof)
3636 3636                          iseof = FALSE;
3637 3637  
3638 3638                  ret = nfscmd_dropped_entrysize((struct dirent64 *)data,
3639 3639                      nents, ret);
3640 3640          }
3641 3641  
3642 3642  
3643 3643  #if 0 /* notyet */
3644 3644          /*
3645 3645           * Don't do this.  It causes local disk writes when just
3646 3646           * reading the file and the overhead is deemed larger
3647 3647           * than the benefit.
3648 3648           */
3649 3649          /*
3650 3650           * Force modified metadata out to stable storage.
3651 3651           */
3652 3652          (void) VOP_FSYNC(vp, FNODSYNC, cr, NULL);
3653 3653  #endif
3654 3654  
3655 3655          kmem_free(namlen, args->dircount);
3656 3656  
3657 3657          resp->status = NFS3_OK;
3658 3658          vattr_to_post_op_attr(vap, &resp->resok.dir_attributes);
3659 3659          resp->resok.cookieverf = 0;
3660 3660          resp->resok.reply.entries = (entryplus3 *)ndata;
3661 3661          resp->resok.reply.eof = iseof;
3662 3662          resp->resok.size = nents;
3663 3663          resp->resok.count = args->dircount - ret;
3664 3664          resp->resok.maxcount = args->maxcount;
3665 3665  
3666 3666          DTRACE_NFSV3_4(op__readdirplus__done, struct svc_req *, req,
3667 3667              cred_t *, cr, vnode_t *, vp, READDIRPLUS3res *, resp);
3668 3668          if (ndata != data)
3669 3669                  kmem_free(data, args->dircount);
3670 3670  
3671 3671  
3672 3672          VN_RELE(vp);
3673 3673  
3674 3674          return;
3675 3675  
3676 3676  out:
3677 3677          if (curthread->t_flag & T_WOULDBLOCK) {
3678 3678                  curthread->t_flag &= ~T_WOULDBLOCK;
3679 3679                  resp->status = NFS3ERR_JUKEBOX;
3680 3680          } else {
3681 3681                  resp->status = puterrno3(error);
3682 3682          }
3683 3683  out1:
3684 3684          DTRACE_NFSV3_4(op__readdirplus__done, struct svc_req *, req,
3685 3685              cred_t *, cr, vnode_t *, vp, READDIRPLUS3res *, resp);
3686 3686  
3687 3687          if (vp != NULL) {
3688 3688                  VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
3689 3689                  VN_RELE(vp);
3690 3690          }
3691 3691  
3692 3692          if (namlen != NULL)
3693 3693                  kmem_free(namlen, args->dircount);
3694 3694  
3695 3695          vattr_to_post_op_attr(vap, &resp->resfail.dir_attributes);
3696 3696  }
3697 3697  
3698 3698  void *
3699 3699  rfs3_readdirplus_getfh(READDIRPLUS3args *args)
3700 3700  {
3701 3701  
3702 3702          return (&args->dir);
3703 3703  }
3704 3704  
3705 3705  void
3706 3706  rfs3_readdirplus_free(READDIRPLUS3res *resp)
3707 3707  {
3708 3708  
3709 3709          if (resp->status == NFS3_OK) {
3710 3710                  kmem_free(resp->resok.reply.entries, resp->resok.count);
3711 3711                  kmem_free(resp->resok.infop,
3712 3712                      resp->resok.size * sizeof (struct entryplus3_info));
3713 3713          }
3714 3714  }
3715 3715  
3716 3716  /* ARGSUSED */
3717 3717  void
3718 3718  rfs3_fsstat(FSSTAT3args *args, FSSTAT3res *resp, struct exportinfo *exi,
3719 3719      struct svc_req *req, cred_t *cr, bool_t ro)
3720 3720  {
3721 3721          int error;
3722 3722          vnode_t *vp;
3723 3723          struct vattr *vap;
3724 3724          struct vattr va;
3725 3725          struct statvfs64 sb;
3726 3726  
3727 3727          vap = NULL;
3728 3728  
3729 3729          vp = nfs3_fhtovp(&args->fsroot, exi);
3730 3730  
3731 3731          DTRACE_NFSV3_4(op__fsstat__start, struct svc_req *, req,
3732 3732              cred_t *, cr, vnode_t *, vp, FSSTAT3args *, args);
3733 3733  
3734 3734          if (vp == NULL) {
3735 3735                  error = ESTALE;
3736 3736                  goto out;
3737 3737          }
3738 3738  
3739 3739          if (is_system_labeled()) {
3740 3740                  bslabel_t *clabel = req->rq_label;
3741 3741  
3742 3742                  ASSERT(clabel != NULL);
3743 3743                  DTRACE_PROBE2(tx__rfs3__log__info__opfsstat__clabel, char *,
3744 3744                      "got client label from request(1)", struct svc_req *, req);
3745 3745  
3746 3746                  if (!blequal(&l_admin_low->tsl_label, clabel)) {
3747 3747                          if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
3748 3748                              exi)) {
3749 3749                                  resp->status = NFS3ERR_ACCES;
3750 3750                                  goto out1;
3751 3751                          }
3752 3752                  }
3753 3753          }
3754 3754  
3755 3755          error = VFS_STATVFS(vp->v_vfsp, &sb);
3756 3756  
3757 3757          va.va_mask = AT_ALL;
3758 3758          vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3759 3759  
3760 3760          if (error)
3761 3761                  goto out;
3762 3762  
3763 3763          resp->status = NFS3_OK;
3764 3764          vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
3765 3765          if (sb.f_blocks != (fsblkcnt64_t)-1)
3766 3766                  resp->resok.tbytes = (size3)sb.f_frsize * (size3)sb.f_blocks;
3767 3767          else
3768 3768                  resp->resok.tbytes = (size3)sb.f_blocks;
3769 3769          if (sb.f_bfree != (fsblkcnt64_t)-1)
3770 3770                  resp->resok.fbytes = (size3)sb.f_frsize * (size3)sb.f_bfree;
3771 3771          else
3772 3772                  resp->resok.fbytes = (size3)sb.f_bfree;
3773 3773          if (sb.f_bavail != (fsblkcnt64_t)-1)
3774 3774                  resp->resok.abytes = (size3)sb.f_frsize * (size3)sb.f_bavail;
3775 3775          else
3776 3776                  resp->resok.abytes = (size3)sb.f_bavail;
3777 3777          resp->resok.tfiles = (size3)sb.f_files;
3778 3778          resp->resok.ffiles = (size3)sb.f_ffree;
3779 3779          resp->resok.afiles = (size3)sb.f_favail;
3780 3780          resp->resok.invarsec = 0;
3781 3781  
3782 3782          DTRACE_NFSV3_4(op__fsstat__done, struct svc_req *, req,
3783 3783              cred_t *, cr, vnode_t *, vp, FSSTAT3res *, resp);
3784 3784          VN_RELE(vp);
3785 3785  
3786 3786          return;
3787 3787  
3788 3788  out:
3789 3789          if (curthread->t_flag & T_WOULDBLOCK) {
3790 3790                  curthread->t_flag &= ~T_WOULDBLOCK;
3791 3791                  resp->status = NFS3ERR_JUKEBOX;
3792 3792          } else
3793 3793                  resp->status = puterrno3(error);
3794 3794  out1:
3795 3795          DTRACE_NFSV3_4(op__fsstat__done, struct svc_req *, req,
3796 3796              cred_t *, cr, vnode_t *, vp, FSSTAT3res *, resp);
3797 3797  
3798 3798          if (vp != NULL)
3799 3799                  VN_RELE(vp);
3800 3800          vattr_to_post_op_attr(vap, &resp->resfail.obj_attributes);
3801 3801  }
3802 3802  
3803 3803  void *
3804 3804  rfs3_fsstat_getfh(FSSTAT3args *args)
3805 3805  {
3806 3806  
3807 3807          return (&args->fsroot);
3808 3808  }
3809 3809  
3810 3810  /* ARGSUSED */
3811 3811  void
3812 3812  rfs3_fsinfo(FSINFO3args *args, FSINFO3res *resp, struct exportinfo *exi,
3813 3813      struct svc_req *req, cred_t *cr, bool_t ro)
3814 3814  {
3815 3815          vnode_t *vp;
3816 3816          struct vattr *vap;
3817 3817          struct vattr va;
3818 3818          uint32_t xfer_size;
3819 3819          ulong_t l = 0;
3820 3820          int error;
3821 3821  
3822 3822          vp = nfs3_fhtovp(&args->fsroot, exi);
3823 3823  
3824 3824          DTRACE_NFSV3_4(op__fsinfo__start, struct svc_req *, req,
3825 3825              cred_t *, cr, vnode_t *, vp, FSINFO3args *, args);
3826 3826  
3827 3827          if (vp == NULL) {
3828 3828                  if (curthread->t_flag & T_WOULDBLOCK) {
3829 3829                          curthread->t_flag &= ~T_WOULDBLOCK;
3830 3830                          resp->status = NFS3ERR_JUKEBOX;
3831 3831                  } else
3832 3832                          resp->status = NFS3ERR_STALE;
3833 3833                  vattr_to_post_op_attr(NULL, &resp->resfail.obj_attributes);
3834 3834                  goto out;
3835 3835          }
3836 3836  
3837 3837          if (is_system_labeled()) {
3838 3838                  bslabel_t *clabel = req->rq_label;
3839 3839  
3840 3840                  ASSERT(clabel != NULL);
3841 3841                  DTRACE_PROBE2(tx__rfs3__log__info__opfsinfo__clabel, char *,
3842 3842                      "got client label from request(1)", struct svc_req *, req);
3843 3843  
3844 3844                  if (!blequal(&l_admin_low->tsl_label, clabel)) {
3845 3845                          if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
3846 3846                              exi)) {
3847 3847                                  resp->status = NFS3ERR_STALE;
3848 3848                                  vattr_to_post_op_attr(NULL,
3849 3849                                      &resp->resfail.obj_attributes);
3850 3850                                  goto out;
3851 3851                          }
3852 3852                  }
3853 3853          }
3854 3854  
3855 3855          va.va_mask = AT_ALL;
3856 3856          vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3857 3857  
3858 3858          resp->status = NFS3_OK;
3859 3859          vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
3860 3860          xfer_size = rfs3_tsize(req);
3861 3861          resp->resok.rtmax = xfer_size;
3862 3862          resp->resok.rtpref = xfer_size;
3863 3863          resp->resok.rtmult = DEV_BSIZE;
3864 3864          resp->resok.wtmax = xfer_size;
3865 3865          resp->resok.wtpref = xfer_size;
3866 3866          resp->resok.wtmult = DEV_BSIZE;
3867 3867          resp->resok.dtpref = MAXBSIZE;
3868 3868  
3869 3869          /*
3870 3870           * Large file spec: want maxfilesize based on limit of
3871 3871           * underlying filesystem.  We can guess 2^31-1 if need be.
3872 3872           */
3873 3873          error = VOP_PATHCONF(vp, _PC_FILESIZEBITS, &l, cr, NULL);
3874 3874          if (error) {
3875 3875                  resp->status = puterrno3(error);
3876 3876                  goto out;
3877 3877          }
3878 3878  
3879 3879          /*
3880 3880           * If the underlying file system does not support _PC_FILESIZEBITS,
3881 3881           * return a reasonable default. Note that error code on VOP_PATHCONF
3882 3882           * will be 0, even if the underlying file system does not support
3883 3883           * _PC_FILESIZEBITS.
3884 3884           */
3885 3885          if (l == (ulong_t)-1) {
3886 3886                  resp->resok.maxfilesize = MAXOFF32_T;
3887 3887          } else {
3888 3888                  if (l >= (sizeof (uint64_t) * 8))
3889 3889                          resp->resok.maxfilesize = INT64_MAX;
3890 3890                  else
3891 3891                          resp->resok.maxfilesize = (1LL << (l-1)) - 1;
3892 3892          }
3893 3893  
3894 3894          resp->resok.time_delta.seconds = 0;
3895 3895          resp->resok.time_delta.nseconds = 1000;
3896 3896          resp->resok.properties = FSF3_LINK | FSF3_SYMLINK |
3897 3897              FSF3_HOMOGENEOUS | FSF3_CANSETTIME;
3898 3898  
3899 3899          DTRACE_NFSV3_4(op__fsinfo__done, struct svc_req *, req,
3900 3900              cred_t *, cr, vnode_t *, vp, FSINFO3res *, resp);
3901 3901  
3902 3902          VN_RELE(vp);
3903 3903  
3904 3904          return;
3905 3905  
3906 3906  out:
3907 3907          DTRACE_NFSV3_4(op__fsinfo__done, struct svc_req *, req,
3908 3908              cred_t *, cr, vnode_t *, NULL, FSINFO3res *, resp);
3909 3909          if (vp != NULL)
3910 3910                  VN_RELE(vp);
3911 3911  }
3912 3912  
3913 3913  void *
3914 3914  rfs3_fsinfo_getfh(FSINFO3args *args)
3915 3915  {
3916 3916          return (&args->fsroot);
3917 3917  }
3918 3918  
3919 3919  /* ARGSUSED */
3920 3920  void
3921 3921  rfs3_pathconf(PATHCONF3args *args, PATHCONF3res *resp, struct exportinfo *exi,
3922 3922      struct svc_req *req, cred_t *cr, bool_t ro)
3923 3923  {
3924 3924          int error;
3925 3925          vnode_t *vp;
3926 3926          struct vattr *vap;
3927 3927          struct vattr va;
3928 3928          ulong_t val;
3929 3929  
3930 3930          vap = NULL;
3931 3931  
3932 3932          vp = nfs3_fhtovp(&args->object, exi);
3933 3933  
3934 3934          DTRACE_NFSV3_4(op__pathconf__start, struct svc_req *, req,
3935 3935              cred_t *, cr, vnode_t *, vp, PATHCONF3args *, args);
3936 3936  
3937 3937          if (vp == NULL) {
3938 3938                  error = ESTALE;
3939 3939                  goto out;
3940 3940          }
3941 3941  
3942 3942          if (is_system_labeled()) {
3943 3943                  bslabel_t *clabel = req->rq_label;
3944 3944  
3945 3945                  ASSERT(clabel != NULL);
3946 3946                  DTRACE_PROBE2(tx__rfs3__log__info__oppathconf__clabel, char *,
3947 3947                      "got client label from request(1)", struct svc_req *, req);
3948 3948  
3949 3949                  if (!blequal(&l_admin_low->tsl_label, clabel)) {
3950 3950                          if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
3951 3951                              exi)) {
3952 3952                                  resp->status = NFS3ERR_ACCES;
3953 3953                                  goto out1;
3954 3954                          }
3955 3955                  }
3956 3956          }
3957 3957  
3958 3958          va.va_mask = AT_ALL;
3959 3959          vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3960 3960  
3961 3961          error = VOP_PATHCONF(vp, _PC_LINK_MAX, &val, cr, NULL);
3962 3962          if (error)
3963 3963                  goto out;
3964 3964          resp->resok.info.link_max = (uint32)val;
3965 3965  
3966 3966          error = VOP_PATHCONF(vp, _PC_NAME_MAX, &val, cr, NULL);
3967 3967          if (error)
3968 3968                  goto out;
3969 3969          resp->resok.info.name_max = (uint32)val;
3970 3970  
3971 3971          error = VOP_PATHCONF(vp, _PC_NO_TRUNC, &val, cr, NULL);
3972 3972          if (error)
3973 3973                  goto out;
3974 3974          if (val == 1)
3975 3975                  resp->resok.info.no_trunc = TRUE;
3976 3976          else
3977 3977                  resp->resok.info.no_trunc = FALSE;
3978 3978  
3979 3979          error = VOP_PATHCONF(vp, _PC_CHOWN_RESTRICTED, &val, cr, NULL);
3980 3980          if (error)
3981 3981                  goto out;
3982 3982          if (val == 1)
3983 3983                  resp->resok.info.chown_restricted = TRUE;
3984 3984          else
3985 3985                  resp->resok.info.chown_restricted = FALSE;
3986 3986  
3987 3987          resp->status = NFS3_OK;
3988 3988          vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
3989 3989          resp->resok.info.case_insensitive = FALSE;
3990 3990          resp->resok.info.case_preserving = TRUE;
3991 3991          DTRACE_NFSV3_4(op__pathconf__done, struct svc_req *, req,
3992 3992              cred_t *, cr, vnode_t *, vp, PATHCONF3res *, resp);
3993 3993          VN_RELE(vp);
3994 3994          return;
3995 3995  
3996 3996  out:
3997 3997          if (curthread->t_flag & T_WOULDBLOCK) {
3998 3998                  curthread->t_flag &= ~T_WOULDBLOCK;
3999 3999                  resp->status = NFS3ERR_JUKEBOX;
4000 4000          } else
4001 4001                  resp->status = puterrno3(error);
4002 4002  out1:
4003 4003          DTRACE_NFSV3_4(op__pathconf__done, struct svc_req *, req,
4004 4004              cred_t *, cr, vnode_t *, vp, PATHCONF3res *, resp);
4005 4005          if (vp != NULL)
4006 4006                  VN_RELE(vp);
4007 4007          vattr_to_post_op_attr(vap, &resp->resfail.obj_attributes);
4008 4008  }
4009 4009  
4010 4010  void *
4011 4011  rfs3_pathconf_getfh(PATHCONF3args *args)
4012 4012  {
4013 4013  
4014 4014          return (&args->object);
4015 4015  }
4016 4016  
4017 4017  void
4018 4018  rfs3_commit(COMMIT3args *args, COMMIT3res *resp, struct exportinfo *exi,
4019 4019      struct svc_req *req, cred_t *cr, bool_t ro)
4020 4020  {
4021 4021          int error;
4022 4022          vnode_t *vp;
4023 4023          struct vattr *bvap;
4024 4024          struct vattr bva;
4025 4025          struct vattr *avap;
4026 4026          struct vattr ava;
4027 4027  
4028 4028          bvap = NULL;
4029 4029          avap = NULL;
4030 4030  
4031 4031          vp = nfs3_fhtovp(&args->file, exi);
4032 4032  
4033 4033          DTRACE_NFSV3_4(op__commit__start, struct svc_req *, req,
4034 4034              cred_t *, cr, vnode_t *, vp, COMMIT3args *, args);
4035 4035  
4036 4036          if (vp == NULL) {
4037 4037                  error = ESTALE;
4038 4038                  goto out;
4039 4039          }
4040 4040  
4041 4041          bva.va_mask = AT_ALL;
4042 4042          error = VOP_GETATTR(vp, &bva, 0, cr, NULL);
4043 4043  
4044 4044          /*
4045 4045           * If we can't get the attributes, then we can't do the
4046 4046           * right access checking.  So, we'll fail the request.
4047 4047           */
4048 4048          if (error)
4049 4049                  goto out;
4050 4050  
4051 4051          bvap = &bva;
4052 4052  
4053 4053          if (rdonly(ro, vp)) {
4054 4054                  resp->status = NFS3ERR_ROFS;
4055 4055                  goto out1;
4056 4056          }
4057 4057  
4058 4058          if (vp->v_type != VREG) {
4059 4059                  resp->status = NFS3ERR_INVAL;
4060 4060                  goto out1;
4061 4061          }
4062 4062  
4063 4063          if (is_system_labeled()) {
4064 4064                  bslabel_t *clabel = req->rq_label;
4065 4065  
4066 4066                  ASSERT(clabel != NULL);
4067 4067                  DTRACE_PROBE2(tx__rfs3__log__info__opcommit__clabel, char *,
4068 4068                      "got client label from request(1)", struct svc_req *, req);
4069 4069  
4070 4070                  if (!blequal(&l_admin_low->tsl_label, clabel)) {
4071 4071                          if (!do_rfs_label_check(clabel, vp, EQUALITY_CHECK,
4072 4072                              exi)) {
4073 4073                                  resp->status = NFS3ERR_ACCES;
4074 4074                                  goto out1;
4075 4075                          }
4076 4076                  }
4077 4077          }
4078 4078  
4079 4079          if (crgetuid(cr) != bva.va_uid &&
4080 4080              (error = VOP_ACCESS(vp, VWRITE, 0, cr, NULL)))
4081 4081                  goto out;
4082 4082  
4083 4083          error = VOP_FSYNC(vp, FSYNC, cr, NULL);
4084 4084  
4085 4085          ava.va_mask = AT_ALL;
4086 4086          avap = VOP_GETATTR(vp, &ava, 0, cr, NULL) ? NULL : &ava;
4087 4087  
4088 4088          if (error)
4089 4089                  goto out;
4090 4090  
4091 4091          resp->status = NFS3_OK;
4092 4092          vattr_to_wcc_data(bvap, avap, &resp->resok.file_wcc);
4093 4093          resp->resok.verf = write3verf;
4094 4094  
4095 4095          DTRACE_NFSV3_4(op__commit__done, struct svc_req *, req,
4096 4096              cred_t *, cr, vnode_t *, vp, COMMIT3res *, resp);
4097 4097  
4098 4098          VN_RELE(vp);
4099 4099  
4100 4100          return;
4101 4101  
4102 4102  out:
4103 4103          if (curthread->t_flag & T_WOULDBLOCK) {
4104 4104                  curthread->t_flag &= ~T_WOULDBLOCK;
4105 4105                  resp->status = NFS3ERR_JUKEBOX;
4106 4106          } else
4107 4107                  resp->status = puterrno3(error);
4108 4108  out1:
4109 4109          DTRACE_NFSV3_4(op__commit__done, struct svc_req *, req,
4110 4110              cred_t *, cr, vnode_t *, vp, COMMIT3res *, resp);
4111 4111  
4112 4112          if (vp != NULL)
4113 4113                  VN_RELE(vp);
4114 4114          vattr_to_wcc_data(bvap, avap, &resp->resfail.file_wcc);
4115 4115  }
4116 4116  
4117 4117  void *
4118 4118  rfs3_commit_getfh(COMMIT3args *args)
4119 4119  {
4120 4120  
4121 4121          return (&args->file);
4122 4122  }
4123 4123  
4124 4124  static int
4125 4125  sattr3_to_vattr(sattr3 *sap, struct vattr *vap)
4126 4126  {
4127 4127  
4128 4128          vap->va_mask = 0;
4129 4129  
4130 4130          if (sap->mode.set_it) {
4131 4131                  vap->va_mode = (mode_t)sap->mode.mode;
4132 4132                  vap->va_mask |= AT_MODE;
4133 4133          }
4134 4134          if (sap->uid.set_it) {
4135 4135                  vap->va_uid = (uid_t)sap->uid.uid;
4136 4136                  vap->va_mask |= AT_UID;
4137 4137          }
4138 4138          if (sap->gid.set_it) {
4139 4139                  vap->va_gid = (gid_t)sap->gid.gid;
4140 4140                  vap->va_mask |= AT_GID;
4141 4141          }
4142 4142          if (sap->size.set_it) {
4143 4143                  if (sap->size.size > (size3)((u_longlong_t)-1))
4144 4144                          return (EINVAL);
4145 4145                  vap->va_size = sap->size.size;
4146 4146                  vap->va_mask |= AT_SIZE;
4147 4147          }
4148 4148          if (sap->atime.set_it == SET_TO_CLIENT_TIME) {
4149 4149  #ifndef _LP64
4150 4150                  /* check time validity */
4151 4151                  if (!NFS3_TIME_OK(sap->atime.atime.seconds))
4152 4152                          return (EOVERFLOW);
4153 4153  #endif
4154 4154                  /*
4155 4155                   * nfs protocol defines times as unsigned so don't extend sign,
4156 4156                   * unless sysadmin set nfs_allow_preepoch_time.
4157 4157                   */
4158 4158                  NFS_TIME_T_CONVERT(vap->va_atime.tv_sec,
4159 4159                      sap->atime.atime.seconds);
4160 4160                  vap->va_atime.tv_nsec = (uint32_t)sap->atime.atime.nseconds;
4161 4161                  vap->va_mask |= AT_ATIME;
4162 4162          } else if (sap->atime.set_it == SET_TO_SERVER_TIME) {
4163 4163                  gethrestime(&vap->va_atime);
4164 4164                  vap->va_mask |= AT_ATIME;
4165 4165          }
4166 4166          if (sap->mtime.set_it == SET_TO_CLIENT_TIME) {
4167 4167  #ifndef _LP64
4168 4168                  /* check time validity */
4169 4169                  if (!NFS3_TIME_OK(sap->mtime.mtime.seconds))
4170 4170                          return (EOVERFLOW);
4171 4171  #endif
4172 4172                  /*
4173 4173                   * nfs protocol defines times as unsigned so don't extend sign,
4174 4174                   * unless sysadmin set nfs_allow_preepoch_time.
4175 4175                   */
4176 4176                  NFS_TIME_T_CONVERT(vap->va_mtime.tv_sec,
4177 4177                      sap->mtime.mtime.seconds);
4178 4178                  vap->va_mtime.tv_nsec = (uint32_t)sap->mtime.mtime.nseconds;
4179 4179                  vap->va_mask |= AT_MTIME;
4180 4180          } else if (sap->mtime.set_it == SET_TO_SERVER_TIME) {
4181 4181                  gethrestime(&vap->va_mtime);
4182 4182                  vap->va_mask |= AT_MTIME;
4183 4183          }
4184 4184  
4185 4185          return (0);
4186 4186  }
4187 4187  
4188 4188  static ftype3 vt_to_nf3[] = {
4189 4189          0, NF3REG, NF3DIR, NF3BLK, NF3CHR, NF3LNK, NF3FIFO, 0, 0, NF3SOCK, 0
4190 4190  };
4191 4191  
4192 4192  static int
4193 4193  vattr_to_fattr3(struct vattr *vap, fattr3 *fap)
4194 4194  {
4195 4195  
4196 4196          ASSERT(vap->va_type >= VNON && vap->va_type <= VBAD);
4197 4197          /* Return error if time or size overflow */
4198 4198          if (! (NFS_VAP_TIME_OK(vap) && NFS3_SIZE_OK(vap->va_size))) {
4199 4199                  return (EOVERFLOW);
4200 4200          }
4201 4201          fap->type = vt_to_nf3[vap->va_type];
4202 4202          fap->mode = (mode3)(vap->va_mode & MODEMASK);
4203 4203          fap->nlink = (uint32)vap->va_nlink;
4204 4204          if (vap->va_uid == UID_NOBODY)
4205 4205                  fap->uid = (uid3)NFS_UID_NOBODY;
4206 4206          else
4207 4207                  fap->uid = (uid3)vap->va_uid;
4208 4208          if (vap->va_gid == GID_NOBODY)
4209 4209                  fap->gid = (gid3)NFS_GID_NOBODY;
4210 4210          else
4211 4211                  fap->gid = (gid3)vap->va_gid;
4212 4212          fap->size = (size3)vap->va_size;
4213 4213          fap->used = (size3)DEV_BSIZE * (size3)vap->va_nblocks;
4214 4214          fap->rdev.specdata1 = (uint32)getmajor(vap->va_rdev);
4215 4215          fap->rdev.specdata2 = (uint32)getminor(vap->va_rdev);
4216 4216          fap->fsid = (uint64)vap->va_fsid;
4217 4217          fap->fileid = (fileid3)vap->va_nodeid;
4218 4218          fap->atime.seconds = vap->va_atime.tv_sec;
4219 4219          fap->atime.nseconds = vap->va_atime.tv_nsec;
4220 4220          fap->mtime.seconds = vap->va_mtime.tv_sec;
4221 4221          fap->mtime.nseconds = vap->va_mtime.tv_nsec;
4222 4222          fap->ctime.seconds = vap->va_ctime.tv_sec;
4223 4223          fap->ctime.nseconds = vap->va_ctime.tv_nsec;
4224 4224          return (0);
4225 4225  }
4226 4226  
4227 4227  static int
4228 4228  vattr_to_wcc_attr(struct vattr *vap, wcc_attr *wccap)
4229 4229  {
4230 4230  
4231 4231          /* Return error if time or size overflow */
4232 4232          if (!(NFS_TIME_T_OK(vap->va_mtime.tv_sec) &&
4233 4233              NFS_TIME_T_OK(vap->va_ctime.tv_sec) &&
4234 4234              NFS3_SIZE_OK(vap->va_size))) {
4235 4235                  return (EOVERFLOW);
4236 4236          }
4237 4237          wccap->size = (size3)vap->va_size;
4238 4238          wccap->mtime.seconds = vap->va_mtime.tv_sec;
4239 4239          wccap->mtime.nseconds = vap->va_mtime.tv_nsec;
4240 4240          wccap->ctime.seconds = vap->va_ctime.tv_sec;
4241 4241          wccap->ctime.nseconds = vap->va_ctime.tv_nsec;
4242 4242          return (0);
4243 4243  }
4244 4244  
4245 4245  static void
4246 4246  vattr_to_pre_op_attr(struct vattr *vap, pre_op_attr *poap)
4247 4247  {
4248 4248  
4249 4249          /* don't return attrs if time overflow */
4250 4250          if ((vap != NULL) && !vattr_to_wcc_attr(vap, &poap->attr)) {
4251 4251                  poap->attributes = TRUE;
4252 4252          } else
4253 4253                  poap->attributes = FALSE;
4254 4254  }
4255 4255  
4256 4256  void
4257 4257  vattr_to_post_op_attr(struct vattr *vap, post_op_attr *poap)
4258 4258  {
4259 4259  
4260 4260          /* don't return attrs if time overflow */
4261 4261          if ((vap != NULL) && !vattr_to_fattr3(vap, &poap->attr)) {
4262 4262                  poap->attributes = TRUE;
4263 4263          } else
4264 4264                  poap->attributes = FALSE;
4265 4265  }
4266 4266  
4267 4267  static void
4268 4268  vattr_to_wcc_data(struct vattr *bvap, struct vattr *avap, wcc_data *wccp)
4269 4269  {
4270 4270  
4271 4271          vattr_to_pre_op_attr(bvap, &wccp->before);
4272 4272          vattr_to_post_op_attr(avap, &wccp->after);
4273 4273  }
4274 4274  
4275 4275  void
4276 4276  rfs3_srvrinit(void)
4277 4277  {
4278 4278          struct rfs3_verf_overlay {
4279 4279                  uint_t id; /* a "unique" identifier */
4280 4280                  int ts; /* a unique timestamp */
4281 4281          } *verfp;
4282 4282          timestruc_t now;
4283 4283  
4284 4284          /*
4285 4285           * The following algorithm attempts to find a unique verifier
4286 4286           * to be used as the write verifier returned from the server
4287 4287           * to the client.  It is important that this verifier change
4288 4288           * whenever the server reboots.  Of secondary importance, it
4289 4289           * is important for the verifier to be unique between two
4290 4290           * different servers.
4291 4291           *
4292 4292           * Thus, an attempt is made to use the system hostid and the
4293 4293           * current time in seconds when the nfssrv kernel module is
4294 4294           * loaded.  It is assumed that an NFS server will not be able
4295 4295           * to boot and then to reboot in less than a second.  If the
4296 4296           * hostid has not been set, then the current high resolution
4297 4297           * time is used.  This will ensure different verifiers each
4298 4298           * time the server reboots and minimize the chances that two
4299 4299           * different servers will have the same verifier.
4300 4300           */
4301 4301  
4302 4302  #ifndef lint
4303 4303          /*
4304 4304           * We ASSERT that this constant logic expression is
4305 4305           * always true because in the past, it wasn't.
4306 4306           */
4307 4307          ASSERT(sizeof (*verfp) <= sizeof (write3verf));
4308 4308  #endif
4309 4309  
4310 4310          gethrestime(&now);
4311 4311          verfp = (struct rfs3_verf_overlay *)&write3verf;
4312 4312          verfp->ts = (int)now.tv_sec;
4313 4313          verfp->id = zone_get_hostid(NULL);
4314 4314  
4315 4315          if (verfp->id == 0)
4316 4316                  verfp->id = (uint_t)now.tv_nsec;
4317 4317  
4318 4318          nfs3_srv_caller_id = fs_new_caller_id();
4319 4319  
4320 4320  }
4321 4321  
4322 4322  static int
4323 4323  rdma_setup_read_data3(READ3args *args, READ3resok *rok)
4324 4324  {
4325 4325          struct clist    *wcl;
4326 4326          int             wlist_len;
4327 4327          count3          count = rok->count;
4328 4328  
4329 4329          wcl = args->wlist;
4330 4330          if (rdma_setup_read_chunks(wcl, count, &wlist_len) == FALSE) {
4331 4331                  return (FALSE);
4332 4332          }
4333 4333  
4334 4334          wcl = args->wlist;
4335 4335          rok->wlist_len = wlist_len;
4336 4336          rok->wlist = wcl;
4337 4337          return (TRUE);
4338 4338  }
4339 4339  
4340 4340  void
4341 4341  rfs3_srvrfini(void)
4342 4342  {
4343 4343          /* Nothing to do */
4344 4344  }
  
    | 
      ↓ open down ↓ | 
    1404 lines elided | 
    
      ↑ open up ↑ | 
  
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX