illumos-gate__ Wdiff usr/src/uts/common/fs/nfs/nfs_srv.c

Print this page

*** NO COMMENTS ***

Split	Close
Expand all
Collapse all

          --- old/usr/src/uts/common/fs/nfs/nfs_srv.c
          +++ new/usr/src/uts/common/fs/nfs/nfs_srv.c

   1    1  /*
   2    2   * CDDL HEADER START
   3    3   *
   4    4   * The contents of this file are subject to the terms of the
   5    5   * Common Development and Distribution License (the "License").
   6    6   * You may not use this file except in compliance with the License.
   7    7   *
   8    8   * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9    9   * or http://www.opensolaris.org/os/licensing.
  10   10   * See the License for the specific language governing permissions
  11   11   * and limitations under the License.
  12   12   *
  13   13   * When distributing Covered Code, include this CDDL HEADER in each
  14   14   * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15   15   * If applicable, add the following below this CDDL HEADER, with the
  16   16   * fields enclosed by brackets "[]" replaced with your own identifying
  17   17   * information: Portions Copyright [yyyy] [name of copyright owner]
  18   18   *
  19   19   * CDDL HEADER END
  20   20   */
  21   21  /*
  22   22   * Copyright (c) 1994, 2010, Oracle and/or its affiliates. All rights reserved.
  23   23   */
  24   24  
  25   25  /*
  26   26   *      Copyright (c) 1983,1984,1985,1986,1987,1988,1989  AT&T.
  27   27   *      All rights reserved.
  28   28   */
  29   29  
  30   30  #include <sys/param.h>
  31   31  #include <sys/types.h>
  32   32  #include <sys/systm.h>
  33   33  #include <sys/cred.h>
  34   34  #include <sys/buf.h>
  35   35  #include <sys/vfs.h>
  36   36  #include <sys/vnode.h>
  37   37  #include <sys/uio.h>
  38   38  #include <sys/stat.h>
  39   39  #include <sys/errno.h>
  40   40  #include <sys/sysmacros.h>
  41   41  #include <sys/statvfs.h>
  42   42  #include <sys/kmem.h>
  43   43  #include <sys/kstat.h>
  44   44  #include <sys/dirent.h>
  45   45  #include <sys/cmn_err.h>
  46   46  #include <sys/debug.h>
  47   47  #include <sys/vtrace.h>
  48   48  #include <sys/mode.h>
  49   49  #include <sys/acl.h>
  50   50  #include <sys/nbmlock.h>
  51   51  #include <sys/policy.h>
  52   52  #include <sys/sdt.h>
  53   53  
  54   54  #include <rpc/types.h>
  55   55  #include <rpc/auth.h>
  56   56  #include <rpc/svc.h>
  57   57  
  58   58  #include <nfs/nfs.h>
  59   59  #include <nfs/export.h>
  60   60  #include <nfs/nfs_cmd.h>
  61   61  
  62   62  #include <vm/hat.h>
  63   63  #include <vm/as.h>
  64   64  #include <vm/seg.h>
  65   65  #include <vm/seg_map.h>
  66   66  #include <vm/seg_kmem.h>
  67   67  
  68   68  #include <sys/strsubr.h>
  69   69  
  70   70  /*
  71   71   * These are the interface routines for the server side of the
  72   72   * Network File System.  See the NFS version 2 protocol specification
  73   73   * for a description of this interface.
  74   74   */
  75   75  
  76   76  static int      sattr_to_vattr(struct nfssattr *, struct vattr *);
  77   77  static void     acl_perm(struct vnode *, struct exportinfo *, struct vattr *,
  78   78                          cred_t *);
  79   79  
  80   80  /*
  81   81   * Some "over the wire" UNIX file types.  These are encoded
  82   82   * into the mode.  This needs to be fixed in the next rev.
  83   83   */
  84   84  #define IFMT            0170000         /* type of file */
  85   85  #define IFCHR           0020000         /* character special */
  86   86  #define IFBLK           0060000         /* block special */
  87   87  #define IFSOCK          0140000         /* socket */
  88   88  
  89   89  u_longlong_t nfs2_srv_caller_id;
  90   90  
  91   91  /*
  92   92   * Get file attributes.
  93   93   * Returns the current attributes of the file with the given fhandle.
  94   94   */
  95   95  /* ARGSUSED */
  96   96  void
  97   97  rfs_getattr(fhandle_t *fhp, struct nfsattrstat *ns, struct exportinfo *exi,
  98   98          struct svc_req *req, cred_t *cr)
  99   99  {
 100  100          int error;
 101  101          vnode_t *vp;
 102  102          struct vattr va;
 103  103  
 104  104          vp = nfs_fhtovp(fhp, exi);
 105  105          if (vp == NULL) {
 106  106                  ns->ns_status = NFSERR_STALE;
 107  107                  return;
 108  108          }
 109  109  
 110  110          /*
 111  111           * Do the getattr.
 112  112           */
 113  113          va.va_mask = AT_ALL;    /* we want all the attributes */
 114  114  
 115  115          error = rfs4_delegated_getattr(vp, &va, 0, cr);
 116  116  
 117  117          /* check for overflows */
 118  118          if (!error) {
 119  119                  /* Lie about the object type for a referral */
 120  120                  if (vn_is_nfs_reparse(vp, cr))
 121  121                          va.va_type = VLNK;
 122  122  
 123  123                  acl_perm(vp, exi, &va, cr);
 124  124                  error = vattr_to_nattr(&va, &ns->ns_attr);
 125  125          }
 126  126  
 127  127          VN_RELE(vp);
 128  128  
 129  129          ns->ns_status = puterrno(error);
 130  130  }
 131  131  void *
 132  132  rfs_getattr_getfh(fhandle_t *fhp)
 133  133  {
 134  134          return (fhp);
 135  135  }
 136  136  
 137  137  /*
 138  138   * Set file attributes.
 139  139   * Sets the attributes of the file with the given fhandle.  Returns
 140  140   * the new attributes.
 141  141   */
 142  142  void
 143  143  rfs_setattr(struct nfssaargs *args, struct nfsattrstat *ns,
 144  144          struct exportinfo *exi, struct svc_req *req, cred_t *cr)
 145  145  {
 146  146          int error;
 147  147          int flag;
 148  148          int in_crit = 0;
 149  149          vnode_t *vp;
 150  150          struct vattr va;
 151  151          struct vattr bva;
 152  152          struct flock64 bf;
 153  153          caller_context_t ct;
 154  154  
 155  155  
 156  156          vp = nfs_fhtovp(&args->saa_fh, exi);
 157  157          if (vp == NULL) {
 158  158                  ns->ns_status = NFSERR_STALE;
 159  159                  return;
 160  160          }
 161  161  
 162  162          if (rdonly(exi, req) || vn_is_readonly(vp)) {
 163  163                  VN_RELE(vp);
 164  164                  ns->ns_status = NFSERR_ROFS;
 165  165                  return;
 166  166          }
 167  167  
 168  168          error = sattr_to_vattr(&args->saa_sa, &va);
 169  169          if (error) {
 170  170                  VN_RELE(vp);
 171  171                  ns->ns_status = puterrno(error);
 172  172                  return;
 173  173          }
 174  174  
 175  175          /*
 176  176           * If the client is requesting a change to the mtime,
 177  177           * but the nanosecond field is set to 1 billion, then
 178  178           * this is a flag to the server that it should set the
 179  179           * atime and mtime fields to the server's current time.
 180  180           * The 1 billion number actually came from the client
 181  181           * as 1 million, but the units in the over the wire
 182  182           * request are microseconds instead of nanoseconds.
 183  183           *
 184  184           * This is an overload of the protocol and should be
 185  185           * documented in the NFS Version 2 protocol specification.
 186  186           */
 187  187          if (va.va_mask & AT_MTIME) {
 188  188                  if (va.va_mtime.tv_nsec == 1000000000) {
 189  189                          gethrestime(&va.va_mtime);
 190  190                          va.va_atime = va.va_mtime;
 191  191                          va.va_mask |= AT_ATIME;
 192  192                          flag = 0;
 193  193                  } else
 194  194                          flag = ATTR_UTIME;
 195  195          } else
 196  196                  flag = 0;
 197  197  
 198  198          /*
 199  199           * If the filesystem is exported with nosuid, then mask off
 200  200           * the setuid and setgid bits.
 201  201           */
 202  202          if ((va.va_mask & AT_MODE) && vp->v_type == VREG &&
 203  203              (exi->exi_export.ex_flags & EX_NOSUID))
 204  204                  va.va_mode &= ~(VSUID | VSGID);
 205  205  
 206  206          ct.cc_sysid = 0;
 207  207          ct.cc_pid = 0;
 208  208          ct.cc_caller_id = nfs2_srv_caller_id;
 209  209          ct.cc_flags = CC_DONTBLOCK;
 210  210  
 211  211          /*
 212  212           * We need to specially handle size changes because it is
 213  213           * possible for the client to create a file with modes
 214  214           * which indicate read-only, but with the file opened for
 215  215           * writing.  If the client then tries to set the size of
 216  216           * the file, then the normal access checking done in
 217  217           * VOP_SETATTR would prevent the client from doing so,
 218  218           * although it should be legal for it to do so.  To get
 219  219           * around this, we do the access checking for ourselves
 220  220           * and then use VOP_SPACE which doesn't do the access
 221  221           * checking which VOP_SETATTR does. VOP_SPACE can only
 222  222           * operate on VREG files, let VOP_SETATTR handle the other
 223  223           * extremely rare cases.
 224  224           * Also the client should not be allowed to change the
 225  225           * size of the file if there is a conflicting non-blocking
 226  226           * mandatory lock in the region of change.
 227  227           */
 228  228          if (vp->v_type == VREG && va.va_mask & AT_SIZE) {
 229  229                  if (nbl_need_check(vp)) {
 230  230                          nbl_start_crit(vp, RW_READER);
 231  231                          in_crit = 1;
 232  232                  }
 233  233  
 234  234                  bva.va_mask = AT_UID | AT_SIZE;
 235  235  
 236  236                  error = VOP_GETATTR(vp, &bva, 0, cr, &ct);
 237  237  
 238  238                  if (error) {
 239  239                          if (in_crit)
 240  240                                  nbl_end_crit(vp);
 241  241                          VN_RELE(vp);
 242  242                          ns->ns_status = puterrno(error);
 243  243                          return;
 244  244                  }
 245  245  
 246  246                  if (in_crit) {
 247  247                          u_offset_t offset;
 248  248                          ssize_t length;
 249  249  
 250  250                          if (va.va_size < bva.va_size) {
 251  251                                  offset = va.va_size;
 252  252                                  length = bva.va_size - va.va_size;
 253  253                          } else {
 254  254                                  offset = bva.va_size;
 255  255                                  length = va.va_size - bva.va_size;
 256  256                          }
 257  257                          if (nbl_conflict(vp, NBL_WRITE, offset, length, 0,
 258  258                              NULL)) {
 259  259                                  error = EACCES;
 260  260                          }
 261  261                  }
 262  262  
 263  263                  if (crgetuid(cr) == bva.va_uid && !error &&
 264  264                      va.va_size != bva.va_size) {
 265  265                          va.va_mask &= ~AT_SIZE;
 266  266                          bf.l_type = F_WRLCK;
 267  267                          bf.l_whence = 0;
 268  268                          bf.l_start = (off64_t)va.va_size;
 269  269                          bf.l_len = 0;
 270  270                          bf.l_sysid = 0;
 271  271                          bf.l_pid = 0;
 272  272  
 273  273                          error = VOP_SPACE(vp, F_FREESP, &bf, FWRITE,
 274  274                              (offset_t)va.va_size, cr, &ct);
 275  275                  }
 276  276                  if (in_crit)
 277  277                          nbl_end_crit(vp);
 278  278          } else
 279  279                  error = 0;
 280  280  
 281  281          /*
 282  282           * Do the setattr.
 283  283           */
 284  284          if (!error && va.va_mask) {
 285  285                  error = VOP_SETATTR(vp, &va, flag, cr, &ct);
 286  286          }
 287  287  
 288  288          /*
 289  289           * check if the monitor on either vop_space or vop_setattr detected
 290  290           * a delegation conflict and if so, mark the thread flag as
 291  291           * wouldblock so that the response is dropped and the client will
 292  292           * try again.
 293  293           */
 294  294          if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
 295  295                  VN_RELE(vp);
 296  296                  curthread->t_flag |= T_WOULDBLOCK;
 297  297                  return;
 298  298          }
 299  299  
 300  300          if (!error) {
 301  301                  va.va_mask = AT_ALL;    /* get everything */
 302  302  
 303  303                  error = rfs4_delegated_getattr(vp, &va, 0, cr);
 304  304  
 305  305                  /* check for overflows */
 306  306                  if (!error) {
 307  307                          acl_perm(vp, exi, &va, cr);
 308  308                          error = vattr_to_nattr(&va, &ns->ns_attr);
 309  309                  }
 310  310          }
 311  311  
 312  312          ct.cc_flags = 0;
 313  313  
 314  314          /*
 315  315           * Force modified metadata out to stable storage.
 316  316           */
 317  317          (void) VOP_FSYNC(vp, FNODSYNC, cr, &ct);
 318  318

↓ open down ↓

318 lines elided

↑ open up ↑

 319  319          VN_RELE(vp);
 320  320  
 321  321          ns->ns_status = puterrno(error);
 322  322  }
 323  323  void *
 324  324  rfs_setattr_getfh(struct nfssaargs *args)
 325  325  {
 326  326          return (&args->saa_fh);
 327  327  }
 328  328  
      329 +/* Change and release @exip and @vpp only in success */
      330 +int
      331 +rfs_cross_mnt(vnode_t **vpp, struct exportinfo **exip)
      332 +{
      333 +        struct exportinfo *exi;
      334 +        vnode_t *vp;
      335 +        fid_t fid;
      336 +        int error;
      337 +
      338 +        vp = *vpp;
      339 +
      340 +        /* traverse() releases argument in success */
      341 +        VN_HOLD(*vpp);
      342 +
      343 +        if ((error = traverse(&vp)) != 0) {
      344 +                VN_RELE(*vpp);
      345 +                return (error);
      346 +        }
      347 +
      348 +        bzero(&fid, sizeof (fid));
      349 +        fid.fid_len = MAXFIDSZ;
      350 +        error = VOP_FID(vp, &fid, NULL);
      351 +        if (error) {
      352 +                VN_RELE(vp);
      353 +                return (error);
      354 +        }
      355 +
      356 +        exi = checkexport(&vp->v_vfsp->vfs_fsid, &fid);
      357 +        if (exi == NULL ||
      358 +            (exi->exi_export.ex_flags & EX_NOHIDE) == 0) {
      359 +                /* It is not error, just subdir is not exported
      360 +                 * or "nohide" is not set
      361 +                 */
      362 +                VN_RELE(vp);
      363 +        } else {
      364 +                /* go to submount */
      365 +                exi_rele(*exip);
      366 +                *exip = exi;
      367 +
      368 +                VN_RELE(*vpp);
      369 +                *vpp = vp;
      370 +        }
      371 +        return (0);
      372 +}
      373 +
 329  374  /*
 330  375   * Directory lookup.
 331  376   * Returns an fhandle and file attributes for file name in a directory.
 332  377   */
 333  378  /* ARGSUSED */
 334  379  void
 335  380  rfs_lookup(struct nfsdiropargs *da, struct nfsdiropres *dr,
 336  381          struct exportinfo *exi, struct svc_req *req, cred_t *cr)
 337  382  {
 338  383          int error;

 339  384          vnode_t *dvp;
 340  385          vnode_t *vp;
 341  386          struct vattr va;
 342  387          fhandle_t *fhp = da->da_fhandle;
 343  388          struct sec_ol sec = {0, 0};
 344  389          bool_t publicfh_flag = FALSE, auth_weak = FALSE;
 345  390          char *name;
 346  391          struct sockaddr *ca;
 347  392  
 348  393          /*
 349  394           * Trusted Extension doesn't support NFSv2. MOUNT
 350  395           * will reject v2 clients. Need to prevent v2 client
 351  396           * access via WebNFS here.
 352  397           */
 353  398          if (is_system_labeled() && req->rq_vers == 2) {
 354  399                  dr->dr_status = NFSERR_ACCES;
 355  400                  return;
 356  401          }
 357  402  
 358  403          /*
 359  404           * Disallow NULL paths
 360  405           */
 361  406          if (da->da_name == NULL || *da->da_name == '\0') {
 362  407                  dr->dr_status = NFSERR_ACCES;
 363  408                  return;
 364  409          }
 365  410  
 366  411          /*
 367  412           * Allow lookups from the root - the default
 368  413           * location of the public filehandle.
 369  414           */
 370  415          if (exi != NULL && (exi->exi_export.ex_flags & EX_PUBLIC)) {
 371  416                  dvp = rootdir;
 372  417                  VN_HOLD(dvp);
 373  418          } else {
 374  419                  dvp = nfs_fhtovp(fhp, exi);
 375  420                  if (dvp == NULL) {
 376  421                          dr->dr_status = NFSERR_STALE;
 377  422                          return;
 378  423                  }
 379  424          }
 380  425  
 381  426          /*
 382  427           * Not allow lookup beyond root.
 383  428           * If the filehandle matches a filehandle of the exi,
 384  429           * then the ".." refers beyond the root of an exported filesystem.
 385  430           */
 386  431          if (strcmp(da->da_name, "..") == 0 &&
 387  432              EQFID(&exi->exi_fid, (fid_t *)&fhp->fh_len)) {
 388  433                  VN_RELE(dvp);
 389  434                  dr->dr_status = NFSERR_NOENT;
 390  435                  return;
 391  436          }

↓ open down ↓

53 lines elided

↑ open up ↑

 392  437  
 393  438          ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
 394  439          name = nfscmd_convname(ca, exi, da->da_name, NFSCMD_CONV_INBOUND,
 395  440              MAXPATHLEN);
 396  441  
 397  442          if (name == NULL) {
 398  443                  dr->dr_status = NFSERR_ACCES;
 399  444                  return;
 400  445          }
 401  446  
      447 +        exi_hold(exi);
      448 +
 402  449          /*
 403  450           * If the public filehandle is used then allow
 404  451           * a multi-component lookup, i.e. evaluate
 405  452           * a pathname and follow symbolic links if
 406  453           * necessary.
 407  454           *
 408  455           * This may result in a vnode in another filesystem
 409  456           * which is OK as long as the filesystem is exported.
 410  457           */
 411  458          if (PUBLIC_FH2(fhp)) {
      459 +                struct exportinfo *new;
      460 +
 412  461                  publicfh_flag = TRUE;
 413      -                error = rfs_publicfh_mclookup(name, dvp, cr, &vp, &exi,
      462 +                error = rfs_publicfh_mclookup(name, dvp, cr, &vp, &new,
 414  463                      &sec);
      464 +
      465 +                if (error == 0) {
      466 +                        exi_rele(exi);
      467 +                        exi = new;
      468 +                }
 415  469          } else {
 416  470                  /*
 417  471                   * Do a normal single component lookup.
 418  472                   */
 419  473                  error = VOP_LOOKUP(dvp, name, &vp, NULL, 0, NULL, cr,
 420  474                      NULL, NULL, NULL);
 421  475          }
 422  476  
 423  477          if (name != da->da_name)
 424  478                  kmem_free(name, MAXPATHLEN);
 425  479  
      480 +        if (error == 0 && vn_ismntpt(vp)) {
      481 +                error = rfs_cross_mnt(&vp, &exi);
      482 +                if (error)
      483 +                        VN_RELE(vp);
      484 +        }
 426  485  
 427  486          if (!error) {
 428  487                  va.va_mask = AT_ALL;    /* we want everything */
 429  488  
 430  489                  error = rfs4_delegated_getattr(vp, &va, 0, cr);
 431  490  
 432  491                  /* check for overflows */
 433  492                  if (!error) {
 434  493                          acl_perm(vp, exi, &va, cr);
 435  494                          error = vattr_to_nattr(&va, &dr->dr_attr);

 436  495                          if (!error) {
 437  496                                  if (sec.sec_flags & SEC_QUERY)
 438  497                                          error = makefh_ol(&dr->dr_fhandle, exi,
 439  498                                              sec.sec_index);
 440  499                                  else {
 441  500                                          error = makefh(&dr->dr_fhandle, vp,
 442  501                                              exi);
 443  502                                          if (!error && publicfh_flag &&

↓ open down ↓

8 lines elided

↑ open up ↑

 444  503                                              !chk_clnt_sec(exi, req))
 445  504                                                  auth_weak = TRUE;
 446  505                                  }
 447  506                          }
 448  507                  }
 449  508                  VN_RELE(vp);
 450  509          }
 451  510  
 452  511          VN_RELE(dvp);
 453  512  
 454      -        /*
 455      -         * If publicfh_flag is true then we have called rfs_publicfh_mclookup
 456      -         * and have obtained a new exportinfo in exi which needs to be
 457      -         * released. Note the the original exportinfo pointed to by exi
 458      -         * will be released by the caller, comon_dispatch.
      513 +        /* The passed argument exportinfo is released by the
      514 +         * caller, comon_dispatch
 459  515           */
 460      -        if (publicfh_flag && exi != NULL)
 461      -                exi_rele(exi);
      516 +        exi_rele(exi);
 462  517  
 463  518          /*
 464  519           * If it's public fh, no 0x81, and client's flavor is
 465  520           * invalid, set WebNFS status to WNFSERR_CLNT_FLAVOR now.
 466  521           * Then set RPC status to AUTH_TOOWEAK in common_dispatch.
 467  522           */
 468  523          if (auth_weak)
 469  524                  dr->dr_status = (enum nfsstat)WNFSERR_CLNT_FLAVOR;
 470  525          else
 471  526                  dr->dr_status = puterrno(error);

 472  527  }
 473  528  void *
 474  529  rfs_lookup_getfh(struct nfsdiropargs *da)
 475  530  {
 476  531          return (da->da_fhandle);
 477  532  }
 478  533  
 479  534  /*
 480  535   * Read symbolic link.
 481  536   * Returns the string in the symbolic link at the given fhandle.
 482  537   */
 483  538  /* ARGSUSED */
 484  539  void
 485  540  rfs_readlink(fhandle_t *fhp, struct nfsrdlnres *rl, struct exportinfo *exi,
 486  541          struct svc_req *req, cred_t *cr)
 487  542  {
 488  543          int error;
 489  544          struct iovec iov;
 490  545          struct uio uio;
 491  546          vnode_t *vp;
 492  547          struct vattr va;
 493  548          struct sockaddr *ca;
 494  549          char *name = NULL;
 495  550          int is_referral = 0;
 496  551  
 497  552          vp = nfs_fhtovp(fhp, exi);
 498  553          if (vp == NULL) {
 499  554                  rl->rl_data = NULL;
 500  555                  rl->rl_status = NFSERR_STALE;
 501  556                  return;
 502  557          }
 503  558  
 504  559          va.va_mask = AT_MODE;
 505  560  
 506  561          error = VOP_GETATTR(vp, &va, 0, cr, NULL);
 507  562  
 508  563          if (error) {
 509  564                  VN_RELE(vp);
 510  565                  rl->rl_data = NULL;
 511  566                  rl->rl_status = puterrno(error);
 512  567                  return;
 513  568          }
 514  569  
 515  570          if (MANDLOCK(vp, va.va_mode)) {
 516  571                  VN_RELE(vp);
 517  572                  rl->rl_data = NULL;
 518  573                  rl->rl_status = NFSERR_ACCES;
 519  574                  return;
 520  575          }
 521  576  
 522  577          /* We lied about the object type for a referral */
 523  578          if (vn_is_nfs_reparse(vp, cr))
 524  579                  is_referral = 1;
 525  580  
 526  581          /*
 527  582           * XNFS and RFC1094 require us to return ENXIO if argument
 528  583           * is not a link. BUGID 1138002.
 529  584           */
 530  585          if (vp->v_type != VLNK && !is_referral) {
 531  586                  VN_RELE(vp);
 532  587                  rl->rl_data = NULL;
 533  588                  rl->rl_status = NFSERR_NXIO;
 534  589                  return;
 535  590          }
 536  591  
 537  592          /*
 538  593           * Allocate data for pathname.  This will be freed by rfs_rlfree.
 539  594           */
 540  595          rl->rl_data = kmem_alloc(NFS_MAXPATHLEN, KM_SLEEP);
 541  596  
 542  597          if (is_referral) {
 543  598                  char *s;
 544  599                  size_t strsz;
 545  600  
 546  601                  /* Get an artificial symlink based on a referral */
 547  602                  s = build_symlink(vp, cr, &strsz);
 548  603                  global_svstat_ptr[2][NFS_REFERLINKS].value.ui64++;
 549  604                  DTRACE_PROBE2(nfs2serv__func__referral__reflink,
 550  605                      vnode_t *, vp, char *, s);
 551  606                  if (s == NULL)
 552  607                          error = EINVAL;
 553  608                  else {
 554  609                          error = 0;
 555  610                          (void) strlcpy(rl->rl_data, s, NFS_MAXPATHLEN);
 556  611                          rl->rl_count = (uint32_t)MIN(strsz, NFS_MAXPATHLEN);
 557  612                          kmem_free(s, strsz);
 558  613                  }
 559  614  
 560  615          } else {
 561  616  
 562  617                  /*
 563  618                   * Set up io vector to read sym link data
 564  619                   */
 565  620                  iov.iov_base = rl->rl_data;
 566  621                  iov.iov_len = NFS_MAXPATHLEN;
 567  622                  uio.uio_iov = &iov;
 568  623                  uio.uio_iovcnt = 1;
 569  624                  uio.uio_segflg = UIO_SYSSPACE;
 570  625                  uio.uio_extflg = UIO_COPY_CACHED;
 571  626                  uio.uio_loffset = (offset_t)0;
 572  627                  uio.uio_resid = NFS_MAXPATHLEN;
 573  628  
 574  629                  /*
 575  630                   * Do the readlink.
 576  631                   */
 577  632                  error = VOP_READLINK(vp, &uio, cr, NULL);
 578  633  
 579  634                  rl->rl_count = (uint32_t)(NFS_MAXPATHLEN - uio.uio_resid);
 580  635  
 581  636                  if (!error)
 582  637                          rl->rl_data[rl->rl_count] = '\0';
 583  638  
 584  639          }
 585  640  
 586  641  
 587  642          VN_RELE(vp);
 588  643  
 589  644          ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
 590  645          name = nfscmd_convname(ca, exi, rl->rl_data,
 591  646              NFSCMD_CONV_OUTBOUND, MAXPATHLEN);
 592  647  
 593  648          if (name != NULL && name != rl->rl_data) {
 594  649                  kmem_free(rl->rl_data, NFS_MAXPATHLEN);
 595  650                  rl->rl_data = name;
 596  651          }
 597  652  
 598  653          /*
 599  654           * XNFS and RFC1094 require us to return ENXIO if argument
 600  655           * is not a link. UFS returns EINVAL if this is the case,
 601  656           * so we do the mapping here. BUGID 1138002.
 602  657           */
 603  658          if (error == EINVAL)
 604  659                  rl->rl_status = NFSERR_NXIO;
 605  660          else
 606  661                  rl->rl_status = puterrno(error);
 607  662  
 608  663  }
 609  664  void *
 610  665  rfs_readlink_getfh(fhandle_t *fhp)
 611  666  {
 612  667          return (fhp);
 613  668  }
 614  669  /*
 615  670   * Free data allocated by rfs_readlink
 616  671   */
 617  672  void
 618  673  rfs_rlfree(struct nfsrdlnres *rl)
 619  674  {
 620  675          if (rl->rl_data != NULL)
 621  676                  kmem_free(rl->rl_data, NFS_MAXPATHLEN);
 622  677  }
 623  678  
 624  679  static int rdma_setup_read_data2(struct nfsreadargs *, struct nfsrdresult *);
 625  680  
 626  681  /*
 627  682   * Read data.
 628  683   * Returns some data read from the file at the given fhandle.
 629  684   */
 630  685  /* ARGSUSED */
 631  686  void
 632  687  rfs_read(struct nfsreadargs *ra, struct nfsrdresult *rr,
 633  688          struct exportinfo *exi, struct svc_req *req, cred_t *cr)
 634  689  {
 635  690          vnode_t *vp;
 636  691          int error;
 637  692          struct vattr va;
 638  693          struct iovec iov;
 639  694          struct uio uio;
 640  695          mblk_t *mp;
 641  696          int alloc_err = 0;
 642  697          int in_crit = 0;
 643  698          caller_context_t ct;
 644  699  
 645  700          vp = nfs_fhtovp(&ra->ra_fhandle, exi);
 646  701          if (vp == NULL) {
 647  702                  rr->rr_data = NULL;
 648  703                  rr->rr_status = NFSERR_STALE;
 649  704                  return;
 650  705          }
 651  706  
 652  707          if (vp->v_type != VREG) {
 653  708                  VN_RELE(vp);
 654  709                  rr->rr_data = NULL;
 655  710                  rr->rr_status = NFSERR_ISDIR;
 656  711                  return;
 657  712          }
 658  713  
 659  714          ct.cc_sysid = 0;
 660  715          ct.cc_pid = 0;
 661  716          ct.cc_caller_id = nfs2_srv_caller_id;
 662  717          ct.cc_flags = CC_DONTBLOCK;
 663  718  
 664  719          /*
 665  720           * Enter the critical region before calling VOP_RWLOCK
 666  721           * to avoid a deadlock with write requests.
 667  722           */
 668  723          if (nbl_need_check(vp)) {
 669  724                  nbl_start_crit(vp, RW_READER);
 670  725                  if (nbl_conflict(vp, NBL_READ, ra->ra_offset, ra->ra_count,
 671  726                      0, NULL)) {
 672  727                          nbl_end_crit(vp);
 673  728                          VN_RELE(vp);
 674  729                          rr->rr_data = NULL;
 675  730                          rr->rr_status = NFSERR_ACCES;
 676  731                          return;
 677  732                  }
 678  733                  in_crit = 1;
 679  734          }
 680  735  
 681  736          error = VOP_RWLOCK(vp, V_WRITELOCK_FALSE, &ct);
 682  737  
 683  738          /* check if a monitor detected a delegation conflict */
 684  739          if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
 685  740                  VN_RELE(vp);
 686  741                  /* mark as wouldblock so response is dropped */
 687  742                  curthread->t_flag |= T_WOULDBLOCK;
 688  743  
 689  744                  rr->rr_data = NULL;
 690  745                  return;
 691  746          }
 692  747  
 693  748          va.va_mask = AT_ALL;
 694  749  
 695  750          error = VOP_GETATTR(vp, &va, 0, cr, &ct);
 696  751  
 697  752          if (error) {
 698  753                  VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
 699  754                  if (in_crit)
 700  755                          nbl_end_crit(vp);
 701  756  
 702  757                  VN_RELE(vp);
 703  758                  rr->rr_data = NULL;
 704  759                  rr->rr_status = puterrno(error);
 705  760  
 706  761                  return;
 707  762          }
 708  763  
 709  764          /*
 710  765           * This is a kludge to allow reading of files created
 711  766           * with no read permission.  The owner of the file
 712  767           * is always allowed to read it.
 713  768           */
 714  769          if (crgetuid(cr) != va.va_uid) {
 715  770                  error = VOP_ACCESS(vp, VREAD, 0, cr, &ct);
 716  771  
 717  772                  if (error) {
 718  773                          /*
 719  774                           * Exec is the same as read over the net because
 720  775                           * of demand loading.
 721  776                           */
 722  777                          error = VOP_ACCESS(vp, VEXEC, 0, cr, &ct);
 723  778                  }
 724  779                  if (error) {
 725  780                          VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
 726  781                          if (in_crit)
 727  782                                  nbl_end_crit(vp);
 728  783                          VN_RELE(vp);
 729  784                          rr->rr_data = NULL;
 730  785                          rr->rr_status = puterrno(error);
 731  786  
 732  787                          return;
 733  788                  }
 734  789          }
 735  790  
 736  791          if (MANDLOCK(vp, va.va_mode)) {
 737  792                  VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
 738  793                  if (in_crit)
 739  794                          nbl_end_crit(vp);
 740  795  
 741  796                  VN_RELE(vp);
 742  797                  rr->rr_data = NULL;
 743  798                  rr->rr_status = NFSERR_ACCES;
 744  799  
 745  800                  return;
 746  801          }
 747  802  
 748  803          rr->rr_ok.rrok_wlist_len = 0;
 749  804          rr->rr_ok.rrok_wlist = NULL;
 750  805  
 751  806          if ((u_offset_t)ra->ra_offset >= va.va_size) {
 752  807                  rr->rr_count = 0;
 753  808                  rr->rr_data = NULL;
 754  809                  /*
 755  810                   * In this case, status is NFS_OK, but there is no data
 756  811                   * to encode. So set rr_mp to NULL.
 757  812                   */
 758  813                  rr->rr_mp = NULL;
 759  814                  rr->rr_ok.rrok_wlist = ra->ra_wlist;
 760  815                  if (rr->rr_ok.rrok_wlist)
 761  816                          clist_zero_len(rr->rr_ok.rrok_wlist);
 762  817                  goto done;
 763  818          }
 764  819  
 765  820          if (ra->ra_wlist) {
 766  821                  mp = NULL;
 767  822                  rr->rr_mp = NULL;
 768  823                  (void) rdma_get_wchunk(req, &iov, ra->ra_wlist);
 769  824                  if (ra->ra_count > iov.iov_len) {
 770  825                          rr->rr_data = NULL;
 771  826                          rr->rr_status = NFSERR_INVAL;
 772  827                          goto done;
 773  828                  }
 774  829          } else {
 775  830                  /*
 776  831                   * mp will contain the data to be sent out in the read reply.
 777  832                   * This will be freed after the reply has been sent out (by the
 778  833                   * driver).
 779  834                   * Let's roundup the data to a BYTES_PER_XDR_UNIT multiple, so
 780  835                   * that the call to xdrmblk_putmblk() never fails.
 781  836                   */
 782  837                  mp = allocb_wait(RNDUP(ra->ra_count), BPRI_MED, STR_NOSIG,
 783  838                      &alloc_err);
 784  839                  ASSERT(mp != NULL);
 785  840                  ASSERT(alloc_err == 0);
 786  841  
 787  842                  rr->rr_mp = mp;
 788  843  
 789  844                  /*
 790  845                   * Set up io vector
 791  846                   */
 792  847                  iov.iov_base = (caddr_t)mp->b_datap->db_base;
 793  848                  iov.iov_len = ra->ra_count;
 794  849          }
 795  850  
 796  851          uio.uio_iov = &iov;
 797  852          uio.uio_iovcnt = 1;
 798  853          uio.uio_segflg = UIO_SYSSPACE;
 799  854          uio.uio_extflg = UIO_COPY_CACHED;
 800  855          uio.uio_loffset = (offset_t)ra->ra_offset;
 801  856          uio.uio_resid = ra->ra_count;
 802  857  
 803  858          error = VOP_READ(vp, &uio, 0, cr, &ct);
 804  859  
 805  860          if (error) {
 806  861                  if (mp)
 807  862                          freeb(mp);
 808  863  
 809  864                  /*
 810  865                   * check if a monitor detected a delegation conflict and
 811  866                   * mark as wouldblock so response is dropped
 812  867                   */
 813  868                  if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK))
 814  869                          curthread->t_flag |= T_WOULDBLOCK;
 815  870                  else
 816  871                          rr->rr_status = puterrno(error);
 817  872  
 818  873                  VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
 819  874                  if (in_crit)
 820  875                          nbl_end_crit(vp);
 821  876  
 822  877                  VN_RELE(vp);
 823  878                  rr->rr_data = NULL;
 824  879  
 825  880                  return;
 826  881          }
 827  882  
 828  883          /*
 829  884           * Get attributes again so we can send the latest access
 830  885           * time to the client side for his cache.
 831  886           */
 832  887          va.va_mask = AT_ALL;
 833  888  
 834  889          error = VOP_GETATTR(vp, &va, 0, cr, &ct);
 835  890  
 836  891          if (error) {
 837  892                  if (mp)
 838  893                          freeb(mp);
 839  894  
 840  895                  VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
 841  896                  if (in_crit)
 842  897                          nbl_end_crit(vp);
 843  898  
 844  899                  VN_RELE(vp);
 845  900                  rr->rr_data = NULL;
 846  901                  rr->rr_status = puterrno(error);
 847  902  
 848  903                  return;
 849  904          }
 850  905  
 851  906          rr->rr_count = (uint32_t)(ra->ra_count - uio.uio_resid);
 852  907  
 853  908          if (mp) {
 854  909                  rr->rr_data = (char *)mp->b_datap->db_base;
 855  910          } else {
 856  911                  if (ra->ra_wlist) {
 857  912                          rr->rr_data = (caddr_t)iov.iov_base;
 858  913                          if (!rdma_setup_read_data2(ra, rr)) {
 859  914                                  rr->rr_data = NULL;
 860  915                                  rr->rr_status = puterrno(NFSERR_INVAL);
 861  916                          }
 862  917                  }
 863  918          }
 864  919  done:
 865  920          VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
 866  921          if (in_crit)
 867  922                  nbl_end_crit(vp);
 868  923  
 869  924          acl_perm(vp, exi, &va, cr);
 870  925  
 871  926          /* check for overflows */
 872  927          error = vattr_to_nattr(&va, &rr->rr_attr);
 873  928  
 874  929          VN_RELE(vp);
 875  930  
 876  931          rr->rr_status = puterrno(error);
 877  932  }
 878  933  
 879  934  /*
 880  935   * Free data allocated by rfs_read
 881  936   */
 882  937  void
 883  938  rfs_rdfree(struct nfsrdresult *rr)
 884  939  {
 885  940          mblk_t *mp;
 886  941  
 887  942          if (rr->rr_status == NFS_OK) {
 888  943                  mp = rr->rr_mp;
 889  944                  if (mp != NULL)
 890  945                          freeb(mp);
 891  946          }
 892  947  }
 893  948  
 894  949  void *
 895  950  rfs_read_getfh(struct nfsreadargs *ra)
 896  951  {
 897  952          return (&ra->ra_fhandle);
 898  953  }
 899  954  
 900  955  #define MAX_IOVECS      12
 901  956  
 902  957  #ifdef DEBUG
 903  958  static int rfs_write_sync_hits = 0;
 904  959  static int rfs_write_sync_misses = 0;
 905  960  #endif
 906  961  
 907  962  /*
 908  963   * Write data to file.
 909  964   * Returns attributes of a file after writing some data to it.
 910  965   *
 911  966   * Any changes made here, especially in error handling might have
 912  967   * to also be done in rfs_write (which clusters write requests).
 913  968   */
 914  969  void
 915  970  rfs_write_sync(struct nfswriteargs *wa, struct nfsattrstat *ns,
 916  971          struct exportinfo *exi, struct svc_req *req, cred_t *cr)
 917  972  {
 918  973          int error;
 919  974          vnode_t *vp;
 920  975          rlim64_t rlimit;
 921  976          struct vattr va;
 922  977          struct uio uio;
 923  978          struct iovec iov[MAX_IOVECS];
 924  979          mblk_t *m;
 925  980          struct iovec *iovp;
 926  981          int iovcnt;
 927  982          cred_t *savecred;
 928  983          int in_crit = 0;
 929  984          caller_context_t ct;
 930  985  
 931  986          vp = nfs_fhtovp(&wa->wa_fhandle, exi);
 932  987          if (vp == NULL) {
 933  988                  ns->ns_status = NFSERR_STALE;
 934  989                  return;
 935  990          }
 936  991  
 937  992          if (rdonly(exi, req)) {
 938  993                  VN_RELE(vp);
 939  994                  ns->ns_status = NFSERR_ROFS;
 940  995                  return;
 941  996          }
 942  997  
 943  998          if (vp->v_type != VREG) {
 944  999                  VN_RELE(vp);
 945 1000                  ns->ns_status = NFSERR_ISDIR;
 946 1001                  return;
 947 1002          }
 948 1003  
 949 1004          ct.cc_sysid = 0;
 950 1005          ct.cc_pid = 0;
 951 1006          ct.cc_caller_id = nfs2_srv_caller_id;
 952 1007          ct.cc_flags = CC_DONTBLOCK;
 953 1008  
 954 1009          va.va_mask = AT_UID|AT_MODE;
 955 1010  
 956 1011          error = VOP_GETATTR(vp, &va, 0, cr, &ct);
 957 1012  
 958 1013          if (error) {
 959 1014                  VN_RELE(vp);
 960 1015                  ns->ns_status = puterrno(error);
 961 1016  
 962 1017                  return;
 963 1018          }
 964 1019  
 965 1020          if (crgetuid(cr) != va.va_uid) {
 966 1021                  /*
 967 1022                   * This is a kludge to allow writes of files created
 968 1023                   * with read only permission.  The owner of the file
 969 1024                   * is always allowed to write it.
 970 1025                   */
 971 1026                  error = VOP_ACCESS(vp, VWRITE, 0, cr, &ct);
 972 1027  
 973 1028                  if (error) {
 974 1029                          VN_RELE(vp);
 975 1030                          ns->ns_status = puterrno(error);
 976 1031                          return;
 977 1032                  }
 978 1033          }
 979 1034  
 980 1035          /*
 981 1036           * Can't access a mandatory lock file.  This might cause
 982 1037           * the NFS service thread to block forever waiting for a
 983 1038           * lock to be released that will never be released.
 984 1039           */
 985 1040          if (MANDLOCK(vp, va.va_mode)) {
 986 1041                  VN_RELE(vp);
 987 1042                  ns->ns_status = NFSERR_ACCES;
 988 1043                  return;
 989 1044          }
 990 1045  
 991 1046          /*
 992 1047           * We have to enter the critical region before calling VOP_RWLOCK
 993 1048           * to avoid a deadlock with ufs.
 994 1049           */
 995 1050          if (nbl_need_check(vp)) {
 996 1051                  nbl_start_crit(vp, RW_READER);
 997 1052                  in_crit = 1;
 998 1053                  if (nbl_conflict(vp, NBL_WRITE, wa->wa_offset,
 999 1054                      wa->wa_count, 0, NULL)) {
1000 1055                          error = EACCES;
1001 1056                          goto out;
1002 1057                  }
1003 1058          }
1004 1059  
1005 1060          error = VOP_RWLOCK(vp, V_WRITELOCK_TRUE, &ct);
1006 1061  
1007 1062          /* check if a monitor detected a delegation conflict */
1008 1063          if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
1009 1064                  VN_RELE(vp);
1010 1065                  /* mark as wouldblock so response is dropped */
1011 1066                  curthread->t_flag |= T_WOULDBLOCK;
1012 1067                  return;
1013 1068          }
1014 1069  
1015 1070          if (wa->wa_data || wa->wa_rlist) {
1016 1071                  /* Do the RDMA thing if necessary */
1017 1072                  if (wa->wa_rlist) {
1018 1073                          iov[0].iov_base = (char *)((wa->wa_rlist)->u.c_daddr3);
1019 1074                          iov[0].iov_len = wa->wa_count;
1020 1075                  } else  {
1021 1076                          iov[0].iov_base = wa->wa_data;
1022 1077                          iov[0].iov_len = wa->wa_count;
1023 1078                  }
1024 1079                  uio.uio_iov = iov;
1025 1080                  uio.uio_iovcnt = 1;
1026 1081                  uio.uio_segflg = UIO_SYSSPACE;
1027 1082                  uio.uio_extflg = UIO_COPY_DEFAULT;
1028 1083                  uio.uio_loffset = (offset_t)wa->wa_offset;
1029 1084                  uio.uio_resid = wa->wa_count;
1030 1085                  /*
1031 1086                   * The limit is checked on the client. We
1032 1087                   * should allow any size writes here.
1033 1088                   */
1034 1089                  uio.uio_llimit = curproc->p_fsz_ctl;
1035 1090                  rlimit = uio.uio_llimit - wa->wa_offset;
1036 1091                  if (rlimit < (rlim64_t)uio.uio_resid)
1037 1092                          uio.uio_resid = (uint_t)rlimit;
1038 1093  
1039 1094                  /*
1040 1095                   * for now we assume no append mode
1041 1096                   */
1042 1097                  /*
1043 1098                   * We're changing creds because VM may fault and we need
1044 1099                   * the cred of the current thread to be used if quota
1045 1100                   * checking is enabled.
1046 1101                   */
1047 1102                  savecred = curthread->t_cred;
1048 1103                  curthread->t_cred = cr;
1049 1104                  error = VOP_WRITE(vp, &uio, FSYNC, cr, &ct);
1050 1105                  curthread->t_cred = savecred;
1051 1106          } else {
1052 1107                  iovcnt = 0;
1053 1108                  for (m = wa->wa_mblk; m != NULL; m = m->b_cont)
1054 1109                          iovcnt++;
1055 1110                  if (iovcnt <= MAX_IOVECS) {
1056 1111  #ifdef DEBUG
1057 1112                          rfs_write_sync_hits++;
1058 1113  #endif
1059 1114                          iovp = iov;
1060 1115                  } else {
1061 1116  #ifdef DEBUG
1062 1117                          rfs_write_sync_misses++;
1063 1118  #endif
1064 1119                          iovp = kmem_alloc(sizeof (*iovp) * iovcnt, KM_SLEEP);
1065 1120                  }
1066 1121                  mblk_to_iov(wa->wa_mblk, iovcnt, iovp);
1067 1122                  uio.uio_iov = iovp;
1068 1123                  uio.uio_iovcnt = iovcnt;
1069 1124                  uio.uio_segflg = UIO_SYSSPACE;
1070 1125                  uio.uio_extflg = UIO_COPY_DEFAULT;
1071 1126                  uio.uio_loffset = (offset_t)wa->wa_offset;
1072 1127                  uio.uio_resid = wa->wa_count;
1073 1128                  /*
1074 1129                   * The limit is checked on the client. We
1075 1130                   * should allow any size writes here.
1076 1131                   */
1077 1132                  uio.uio_llimit = curproc->p_fsz_ctl;
1078 1133                  rlimit = uio.uio_llimit - wa->wa_offset;
1079 1134                  if (rlimit < (rlim64_t)uio.uio_resid)
1080 1135                          uio.uio_resid = (uint_t)rlimit;
1081 1136  
1082 1137                  /*
1083 1138                   * For now we assume no append mode.
1084 1139                   */
1085 1140                  /*
1086 1141                   * We're changing creds because VM may fault and we need
1087 1142                   * the cred of the current thread to be used if quota
1088 1143                   * checking is enabled.
1089 1144                   */
1090 1145                  savecred = curthread->t_cred;
1091 1146                  curthread->t_cred = cr;
1092 1147                  error = VOP_WRITE(vp, &uio, FSYNC, cr, &ct);
1093 1148                  curthread->t_cred = savecred;
1094 1149  
1095 1150                  if (iovp != iov)
1096 1151                          kmem_free(iovp, sizeof (*iovp) * iovcnt);
1097 1152          }
1098 1153  
1099 1154          VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, &ct);
1100 1155  
1101 1156          if (!error) {
1102 1157                  /*
1103 1158                   * Get attributes again so we send the latest mod
1104 1159                   * time to the client side for his cache.
1105 1160                   */
1106 1161                  va.va_mask = AT_ALL;    /* now we want everything */
1107 1162  
1108 1163                  error = VOP_GETATTR(vp, &va, 0, cr, &ct);
1109 1164  
1110 1165                  /* check for overflows */
1111 1166                  if (!error) {
1112 1167                          acl_perm(vp, exi, &va, cr);
1113 1168                          error = vattr_to_nattr(&va, &ns->ns_attr);
1114 1169                  }
1115 1170          }
1116 1171  
1117 1172  out:
1118 1173          if (in_crit)
1119 1174                  nbl_end_crit(vp);
1120 1175          VN_RELE(vp);
1121 1176  
1122 1177          /* check if a monitor detected a delegation conflict */
1123 1178          if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK))
1124 1179                  /* mark as wouldblock so response is dropped */
1125 1180                  curthread->t_flag |= T_WOULDBLOCK;
1126 1181          else
1127 1182                  ns->ns_status = puterrno(error);
1128 1183  
1129 1184  }
1130 1185  
1131 1186  struct rfs_async_write {
1132 1187          struct nfswriteargs *wa;
1133 1188          struct nfsattrstat *ns;
1134 1189          struct svc_req *req;
1135 1190          cred_t *cr;
1136 1191          kthread_t *thread;
1137 1192          struct rfs_async_write *list;
1138 1193  };
1139 1194  
1140 1195  struct rfs_async_write_list {
1141 1196          fhandle_t *fhp;
1142 1197          kcondvar_t cv;
1143 1198          struct rfs_async_write *list;
1144 1199          struct rfs_async_write_list *next;
1145 1200  };
1146 1201  
1147 1202  static struct rfs_async_write_list *rfs_async_write_head = NULL;
1148 1203  static kmutex_t rfs_async_write_lock;
1149 1204  static int rfs_write_async = 1; /* enables write clustering if == 1 */
1150 1205  
1151 1206  #define MAXCLIOVECS     42
1152 1207  #define RFSWRITE_INITVAL (enum nfsstat) -1
1153 1208  
1154 1209  #ifdef DEBUG
1155 1210  static int rfs_write_hits = 0;
1156 1211  static int rfs_write_misses = 0;
1157 1212  #endif
1158 1213  
1159 1214  /*
1160 1215   * Write data to file.
1161 1216   * Returns attributes of a file after writing some data to it.
1162 1217   */
1163 1218  void
1164 1219  rfs_write(struct nfswriteargs *wa, struct nfsattrstat *ns,
1165 1220          struct exportinfo *exi, struct svc_req *req, cred_t *cr)
1166 1221  {
1167 1222          int error;
1168 1223          vnode_t *vp;
1169 1224          rlim64_t rlimit;
1170 1225          struct vattr va;
1171 1226          struct uio uio;
1172 1227          struct rfs_async_write_list *lp;
1173 1228          struct rfs_async_write_list *nlp;
1174 1229          struct rfs_async_write *rp;
1175 1230          struct rfs_async_write *nrp;
1176 1231          struct rfs_async_write *trp;
1177 1232          struct rfs_async_write *lrp;
1178 1233          int data_written;
1179 1234          int iovcnt;
1180 1235          mblk_t *m;
1181 1236          struct iovec *iovp;
1182 1237          struct iovec *niovp;
1183 1238          struct iovec iov[MAXCLIOVECS];
1184 1239          int count;
1185 1240          int rcount;
1186 1241          uint_t off;
1187 1242          uint_t len;
1188 1243          struct rfs_async_write nrpsp;
1189 1244          struct rfs_async_write_list nlpsp;
1190 1245          ushort_t t_flag;
1191 1246          cred_t *savecred;
1192 1247          int in_crit = 0;
1193 1248          caller_context_t ct;
1194 1249  
1195 1250          if (!rfs_write_async) {
1196 1251                  rfs_write_sync(wa, ns, exi, req, cr);
1197 1252                  return;
1198 1253          }
1199 1254  
1200 1255          /*
1201 1256           * Initialize status to RFSWRITE_INITVAL instead of 0, since value of 0
1202 1257           * is considered an OK.
1203 1258           */
1204 1259          ns->ns_status = RFSWRITE_INITVAL;
1205 1260  
1206 1261          nrp = &nrpsp;
1207 1262          nrp->wa = wa;
1208 1263          nrp->ns = ns;
1209 1264          nrp->req = req;
1210 1265          nrp->cr = cr;
1211 1266          nrp->thread = curthread;
1212 1267  
1213 1268          ASSERT(curthread->t_schedflag & TS_DONT_SWAP);
1214 1269  
1215 1270          /*
1216 1271           * Look to see if there is already a cluster started
1217 1272           * for this file.
1218 1273           */
1219 1274          mutex_enter(&rfs_async_write_lock);
1220 1275          for (lp = rfs_async_write_head; lp != NULL; lp = lp->next) {
1221 1276                  if (bcmp(&wa->wa_fhandle, lp->fhp,
1222 1277                      sizeof (fhandle_t)) == 0)
1223 1278                          break;
1224 1279          }
1225 1280  
1226 1281          /*
1227 1282           * If lp is non-NULL, then there is already a cluster
1228 1283           * started.  We need to place ourselves in the cluster
1229 1284           * list in the right place as determined by starting
1230 1285           * offset.  Conflicts with non-blocking mandatory locked
1231 1286           * regions will be checked when the cluster is processed.
1232 1287           */
1233 1288          if (lp != NULL) {
1234 1289                  rp = lp->list;
1235 1290                  trp = NULL;
1236 1291                  while (rp != NULL && rp->wa->wa_offset < wa->wa_offset) {
1237 1292                          trp = rp;
1238 1293                          rp = rp->list;
1239 1294                  }
1240 1295                  nrp->list = rp;
1241 1296                  if (trp == NULL)
1242 1297                          lp->list = nrp;
1243 1298                  else
1244 1299                          trp->list = nrp;
1245 1300                  while (nrp->ns->ns_status == RFSWRITE_INITVAL)
1246 1301                          cv_wait(&lp->cv, &rfs_async_write_lock);
1247 1302                  mutex_exit(&rfs_async_write_lock);
1248 1303  
1249 1304                  return;
1250 1305          }
1251 1306  
1252 1307          /*
1253 1308           * No cluster started yet, start one and add ourselves
1254 1309           * to the list of clusters.
1255 1310           */
1256 1311          nrp->list = NULL;
1257 1312  
1258 1313          nlp = &nlpsp;
1259 1314          nlp->fhp = &wa->wa_fhandle;
1260 1315          cv_init(&nlp->cv, NULL, CV_DEFAULT, NULL);
1261 1316          nlp->list = nrp;
1262 1317          nlp->next = NULL;
1263 1318  
1264 1319          if (rfs_async_write_head == NULL) {
1265 1320                  rfs_async_write_head = nlp;
1266 1321          } else {
1267 1322                  lp = rfs_async_write_head;
1268 1323                  while (lp->next != NULL)
1269 1324                          lp = lp->next;
1270 1325                  lp->next = nlp;
1271 1326          }
1272 1327          mutex_exit(&rfs_async_write_lock);
1273 1328  
1274 1329          /*
1275 1330           * Convert the file handle common to all of the requests
1276 1331           * in this cluster to a vnode.
1277 1332           */
1278 1333          vp = nfs_fhtovp(&wa->wa_fhandle, exi);
1279 1334          if (vp == NULL) {
1280 1335                  mutex_enter(&rfs_async_write_lock);
1281 1336                  if (rfs_async_write_head == nlp)
1282 1337                          rfs_async_write_head = nlp->next;
1283 1338                  else {
1284 1339                          lp = rfs_async_write_head;
1285 1340                          while (lp->next != nlp)
1286 1341                                  lp = lp->next;
1287 1342                          lp->next = nlp->next;
1288 1343                  }
1289 1344                  t_flag = curthread->t_flag & T_WOULDBLOCK;
1290 1345                  for (rp = nlp->list; rp != NULL; rp = rp->list) {
1291 1346                          rp->ns->ns_status = NFSERR_STALE;
1292 1347                          rp->thread->t_flag |= t_flag;
1293 1348                  }
1294 1349                  cv_broadcast(&nlp->cv);
1295 1350                  mutex_exit(&rfs_async_write_lock);
1296 1351  
1297 1352                  return;
1298 1353          }
1299 1354  
1300 1355          /*
1301 1356           * Can only write regular files.  Attempts to write any
1302 1357           * other file types fail with EISDIR.
1303 1358           */
1304 1359          if (vp->v_type != VREG) {
1305 1360                  VN_RELE(vp);
1306 1361                  mutex_enter(&rfs_async_write_lock);
1307 1362                  if (rfs_async_write_head == nlp)
1308 1363                          rfs_async_write_head = nlp->next;
1309 1364                  else {
1310 1365                          lp = rfs_async_write_head;
1311 1366                          while (lp->next != nlp)
1312 1367                                  lp = lp->next;
1313 1368                          lp->next = nlp->next;
1314 1369                  }
1315 1370                  t_flag = curthread->t_flag & T_WOULDBLOCK;
1316 1371                  for (rp = nlp->list; rp != NULL; rp = rp->list) {
1317 1372                          rp->ns->ns_status = NFSERR_ISDIR;
1318 1373                          rp->thread->t_flag |= t_flag;
1319 1374                  }
1320 1375                  cv_broadcast(&nlp->cv);
1321 1376                  mutex_exit(&rfs_async_write_lock);
1322 1377  
1323 1378                  return;
1324 1379          }
1325 1380  
1326 1381          /*
1327 1382           * Enter the critical region before calling VOP_RWLOCK, to avoid a
1328 1383           * deadlock with ufs.
1329 1384           */
1330 1385          if (nbl_need_check(vp)) {
1331 1386                  nbl_start_crit(vp, RW_READER);
1332 1387                  in_crit = 1;
1333 1388          }
1334 1389  
1335 1390          ct.cc_sysid = 0;
1336 1391          ct.cc_pid = 0;
1337 1392          ct.cc_caller_id = nfs2_srv_caller_id;
1338 1393          ct.cc_flags = CC_DONTBLOCK;
1339 1394  
1340 1395          /*
1341 1396           * Lock the file for writing.  This operation provides
1342 1397           * the delay which allows clusters to grow.
1343 1398           */
1344 1399          error = VOP_RWLOCK(vp, V_WRITELOCK_TRUE, &ct);
1345 1400  
1346 1401          /* check if a monitor detected a delegation conflict */
1347 1402          if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
1348 1403                  if (in_crit)
1349 1404                          nbl_end_crit(vp);
1350 1405                  VN_RELE(vp);
1351 1406                  /* mark as wouldblock so response is dropped */
1352 1407                  curthread->t_flag |= T_WOULDBLOCK;
1353 1408                  mutex_enter(&rfs_async_write_lock);
1354 1409                  if (rfs_async_write_head == nlp)
1355 1410                          rfs_async_write_head = nlp->next;
1356 1411                  else {
1357 1412                          lp = rfs_async_write_head;
1358 1413                          while (lp->next != nlp)
1359 1414                                  lp = lp->next;
1360 1415                          lp->next = nlp->next;
1361 1416                  }
1362 1417                  for (rp = nlp->list; rp != NULL; rp = rp->list) {
1363 1418                          if (rp->ns->ns_status == RFSWRITE_INITVAL) {
1364 1419                                  rp->ns->ns_status = puterrno(error);
1365 1420                                  rp->thread->t_flag |= T_WOULDBLOCK;
1366 1421                          }
1367 1422                  }
1368 1423                  cv_broadcast(&nlp->cv);
1369 1424                  mutex_exit(&rfs_async_write_lock);
1370 1425  
1371 1426                  return;
1372 1427          }
1373 1428  
1374 1429          /*
1375 1430           * Disconnect this cluster from the list of clusters.
1376 1431           * The cluster that is being dealt with must be fixed
1377 1432           * in size after this point, so there is no reason
1378 1433           * to leave it on the list so that new requests can
1379 1434           * find it.
1380 1435           *
1381 1436           * The algorithm is that the first write request will
1382 1437           * create a cluster, convert the file handle to a
1383 1438           * vnode pointer, and then lock the file for writing.
1384 1439           * This request is not likely to be clustered with
1385 1440           * any others.  However, the next request will create
1386 1441           * a new cluster and be blocked in VOP_RWLOCK while
1387 1442           * the first request is being processed.  This delay
1388 1443           * will allow more requests to be clustered in this
1389 1444           * second cluster.
1390 1445           */
1391 1446          mutex_enter(&rfs_async_write_lock);
1392 1447          if (rfs_async_write_head == nlp)
1393 1448                  rfs_async_write_head = nlp->next;
1394 1449          else {
1395 1450                  lp = rfs_async_write_head;
1396 1451                  while (lp->next != nlp)
1397 1452                          lp = lp->next;
1398 1453                  lp->next = nlp->next;
1399 1454          }
1400 1455          mutex_exit(&rfs_async_write_lock);
1401 1456  
1402 1457          /*
1403 1458           * Step through the list of requests in this cluster.
1404 1459           * We need to check permissions to make sure that all
1405 1460           * of the requests have sufficient permission to write
1406 1461           * the file.  A cluster can be composed of requests
1407 1462           * from different clients and different users on each
1408 1463           * client.
1409 1464           *
1410 1465           * As a side effect, we also calculate the size of the
1411 1466           * byte range that this cluster encompasses.
1412 1467           */
1413 1468          rp = nlp->list;
1414 1469          off = rp->wa->wa_offset;
1415 1470          len = (uint_t)0;
1416 1471          do {
1417 1472                  if (rdonly(exi, rp->req)) {
1418 1473                          rp->ns->ns_status = NFSERR_ROFS;
1419 1474                          t_flag = curthread->t_flag & T_WOULDBLOCK;
1420 1475                          rp->thread->t_flag |= t_flag;
1421 1476                          continue;
1422 1477                  }
1423 1478  
1424 1479                  va.va_mask = AT_UID|AT_MODE;
1425 1480  
1426 1481                  error = VOP_GETATTR(vp, &va, 0, rp->cr, &ct);
1427 1482  
1428 1483                  if (!error) {
1429 1484                          if (crgetuid(rp->cr) != va.va_uid) {
1430 1485                                  /*
1431 1486                                   * This is a kludge to allow writes of files
1432 1487                                   * created with read only permission.  The
1433 1488                                   * owner of the file is always allowed to
1434 1489                                   * write it.
1435 1490                                   */
1436 1491                                  error = VOP_ACCESS(vp, VWRITE, 0, rp->cr, &ct);
1437 1492                          }
1438 1493                          if (!error && MANDLOCK(vp, va.va_mode))
1439 1494                                  error = EACCES;
1440 1495                  }
1441 1496  
1442 1497                  /*
1443 1498                   * Check for a conflict with a nbmand-locked region.
1444 1499                   */
1445 1500                  if (in_crit && nbl_conflict(vp, NBL_WRITE, rp->wa->wa_offset,
1446 1501                      rp->wa->wa_count, 0, NULL)) {
1447 1502                          error = EACCES;
1448 1503                  }
1449 1504  
1450 1505                  if (error) {
1451 1506                          rp->ns->ns_status = puterrno(error);
1452 1507                          t_flag = curthread->t_flag & T_WOULDBLOCK;
1453 1508                          rp->thread->t_flag |= t_flag;
1454 1509                          continue;
1455 1510                  }
1456 1511                  if (len < rp->wa->wa_offset + rp->wa->wa_count - off)
1457 1512                          len = rp->wa->wa_offset + rp->wa->wa_count - off;
1458 1513          } while ((rp = rp->list) != NULL);
1459 1514  
1460 1515          /*
1461 1516           * Step through the cluster attempting to gather as many
1462 1517           * requests which are contiguous as possible.  These
1463 1518           * contiguous requests are handled via one call to VOP_WRITE
1464 1519           * instead of different calls to VOP_WRITE.  We also keep
1465 1520           * track of the fact that any data was written.
1466 1521           */
1467 1522          rp = nlp->list;
1468 1523          data_written = 0;
1469 1524          do {
1470 1525                  /*
1471 1526                   * Skip any requests which are already marked as having an
1472 1527                   * error.
1473 1528                   */
1474 1529                  if (rp->ns->ns_status != RFSWRITE_INITVAL) {
1475 1530                          rp = rp->list;
1476 1531                          continue;
1477 1532                  }
1478 1533  
1479 1534                  /*
1480 1535                   * Count the number of iovec's which are required
1481 1536                   * to handle this set of requests.  One iovec is
1482 1537                   * needed for each data buffer, whether addressed
1483 1538                   * by wa_data or by the b_rptr pointers in the
1484 1539                   * mblk chains.
1485 1540                   */
1486 1541                  iovcnt = 0;
1487 1542                  lrp = rp;
1488 1543                  for (;;) {
1489 1544                          if (lrp->wa->wa_data || lrp->wa->wa_rlist)
1490 1545                                  iovcnt++;
1491 1546                          else {
1492 1547                                  m = lrp->wa->wa_mblk;
1493 1548                                  while (m != NULL) {
1494 1549                                          iovcnt++;
1495 1550                                          m = m->b_cont;
1496 1551                                  }
1497 1552                          }
1498 1553                          if (lrp->list == NULL ||
1499 1554                              lrp->list->ns->ns_status != RFSWRITE_INITVAL ||
1500 1555                              lrp->wa->wa_offset + lrp->wa->wa_count !=
1501 1556                              lrp->list->wa->wa_offset) {
1502 1557                                  lrp = lrp->list;
1503 1558                                  break;
1504 1559                          }
1505 1560                          lrp = lrp->list;
1506 1561                  }
1507 1562  
1508 1563                  if (iovcnt <= MAXCLIOVECS) {
1509 1564  #ifdef DEBUG
1510 1565                          rfs_write_hits++;
1511 1566  #endif
1512 1567                          niovp = iov;
1513 1568                  } else {
1514 1569  #ifdef DEBUG
1515 1570                          rfs_write_misses++;
1516 1571  #endif
1517 1572                          niovp = kmem_alloc(sizeof (*niovp) * iovcnt, KM_SLEEP);
1518 1573                  }
1519 1574                  /*
1520 1575                   * Put together the scatter/gather iovecs.
1521 1576                   */
1522 1577                  iovp = niovp;
1523 1578                  trp = rp;
1524 1579                  count = 0;
1525 1580                  do {
1526 1581                          if (trp->wa->wa_data || trp->wa->wa_rlist) {
1527 1582                                  if (trp->wa->wa_rlist) {
1528 1583                                          iovp->iov_base =
1529 1584                                              (char *)((trp->wa->wa_rlist)->
1530 1585                                              u.c_daddr3);
1531 1586                                          iovp->iov_len = trp->wa->wa_count;
1532 1587                                  } else  {
1533 1588                                          iovp->iov_base = trp->wa->wa_data;
1534 1589                                          iovp->iov_len = trp->wa->wa_count;
1535 1590                                  }
1536 1591                                  iovp++;
1537 1592                          } else {
1538 1593                                  m = trp->wa->wa_mblk;
1539 1594                                  rcount = trp->wa->wa_count;
1540 1595                                  while (m != NULL) {
1541 1596                                          iovp->iov_base = (caddr_t)m->b_rptr;
1542 1597                                          iovp->iov_len = (m->b_wptr - m->b_rptr);
1543 1598                                          rcount -= iovp->iov_len;
1544 1599                                          if (rcount < 0)
1545 1600                                                  iovp->iov_len += rcount;
1546 1601                                          iovp++;
1547 1602                                          if (rcount <= 0)
1548 1603                                                  break;
1549 1604                                          m = m->b_cont;
1550 1605                                  }
1551 1606                          }
1552 1607                          count += trp->wa->wa_count;
1553 1608                          trp = trp->list;
1554 1609                  } while (trp != lrp);
1555 1610  
1556 1611                  uio.uio_iov = niovp;
1557 1612                  uio.uio_iovcnt = iovcnt;
1558 1613                  uio.uio_segflg = UIO_SYSSPACE;
1559 1614                  uio.uio_extflg = UIO_COPY_DEFAULT;
1560 1615                  uio.uio_loffset = (offset_t)rp->wa->wa_offset;
1561 1616                  uio.uio_resid = count;
1562 1617                  /*
1563 1618                   * The limit is checked on the client. We
1564 1619                   * should allow any size writes here.
1565 1620                   */
1566 1621                  uio.uio_llimit = curproc->p_fsz_ctl;
1567 1622                  rlimit = uio.uio_llimit - rp->wa->wa_offset;
1568 1623                  if (rlimit < (rlim64_t)uio.uio_resid)
1569 1624                          uio.uio_resid = (uint_t)rlimit;
1570 1625  
1571 1626                  /*
1572 1627                   * For now we assume no append mode.
1573 1628                   */
1574 1629  
1575 1630                  /*
1576 1631                   * We're changing creds because VM may fault
1577 1632                   * and we need the cred of the current
1578 1633                   * thread to be used if quota * checking is
1579 1634                   * enabled.
1580 1635                   */
1581 1636                  savecred = curthread->t_cred;
1582 1637                  curthread->t_cred = cr;
1583 1638                  error = VOP_WRITE(vp, &uio, 0, rp->cr, &ct);
1584 1639                  curthread->t_cred = savecred;
1585 1640  
1586 1641                  /* check if a monitor detected a delegation conflict */
1587 1642                  if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK))
1588 1643                          /* mark as wouldblock so response is dropped */
1589 1644                          curthread->t_flag |= T_WOULDBLOCK;
1590 1645  
1591 1646                  if (niovp != iov)
1592 1647                          kmem_free(niovp, sizeof (*niovp) * iovcnt);
1593 1648  
1594 1649                  if (!error) {
1595 1650                          data_written = 1;
1596 1651                          /*
1597 1652                           * Get attributes again so we send the latest mod
1598 1653                           * time to the client side for his cache.
1599 1654                           */
1600 1655                          va.va_mask = AT_ALL;    /* now we want everything */
1601 1656  
1602 1657                          error = VOP_GETATTR(vp, &va, 0, rp->cr, &ct);
1603 1658  
1604 1659                          if (!error)
1605 1660                                  acl_perm(vp, exi, &va, rp->cr);
1606 1661                  }
1607 1662  
1608 1663                  /*
1609 1664                   * Fill in the status responses for each request
1610 1665                   * which was just handled.  Also, copy the latest
1611 1666                   * attributes in to the attribute responses if
1612 1667                   * appropriate.
1613 1668                   */
1614 1669                  t_flag = curthread->t_flag & T_WOULDBLOCK;
1615 1670                  do {
1616 1671                          rp->thread->t_flag |= t_flag;
1617 1672                          /* check for overflows */
1618 1673                          if (!error) {
1619 1674                                  error  = vattr_to_nattr(&va, &rp->ns->ns_attr);
1620 1675                          }
1621 1676                          rp->ns->ns_status = puterrno(error);
1622 1677                          rp = rp->list;
1623 1678                  } while (rp != lrp);
1624 1679          } while (rp != NULL);
1625 1680  
1626 1681          /*
1627 1682           * If any data was written at all, then we need to flush
1628 1683           * the data and metadata to stable storage.
1629 1684           */
1630 1685          if (data_written) {
1631 1686                  error = VOP_PUTPAGE(vp, (u_offset_t)off, len, 0, cr, &ct);
1632 1687  
1633 1688                  if (!error) {
1634 1689                          error = VOP_FSYNC(vp, FNODSYNC, cr, &ct);
1635 1690                  }
1636 1691          }
1637 1692  
1638 1693          VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, &ct);
1639 1694  
1640 1695          if (in_crit)
1641 1696                  nbl_end_crit(vp);
1642 1697          VN_RELE(vp);
1643 1698  
1644 1699          t_flag = curthread->t_flag & T_WOULDBLOCK;
1645 1700          mutex_enter(&rfs_async_write_lock);
1646 1701          for (rp = nlp->list; rp != NULL; rp = rp->list) {
1647 1702                  if (rp->ns->ns_status == RFSWRITE_INITVAL) {
1648 1703                          rp->ns->ns_status = puterrno(error);
1649 1704                          rp->thread->t_flag |= t_flag;
1650 1705                  }
1651 1706          }
1652 1707          cv_broadcast(&nlp->cv);
1653 1708          mutex_exit(&rfs_async_write_lock);
1654 1709  
1655 1710  }
1656 1711  
1657 1712  void *
1658 1713  rfs_write_getfh(struct nfswriteargs *wa)
1659 1714  {
1660 1715          return (&wa->wa_fhandle);
1661 1716  }
1662 1717  
1663 1718  /*
1664 1719   * Create a file.
1665 1720   * Creates a file with given attributes and returns those attributes
1666 1721   * and an fhandle for the new file.
1667 1722   */
1668 1723  void
1669 1724  rfs_create(struct nfscreatargs *args, struct nfsdiropres *dr,
1670 1725          struct exportinfo *exi, struct svc_req *req, cred_t *cr)
1671 1726  {
1672 1727          int error;
1673 1728          int lookuperr;
1674 1729          int in_crit = 0;
1675 1730          struct vattr va;
1676 1731          vnode_t *vp;
1677 1732          vnode_t *realvp;
1678 1733          vnode_t *dvp;
1679 1734          char *name = args->ca_da.da_name;
1680 1735          vnode_t *tvp = NULL;
1681 1736          int mode;
1682 1737          int lookup_ok;
1683 1738          bool_t trunc;
1684 1739          struct sockaddr *ca;
1685 1740  
1686 1741          /*
1687 1742           * Disallow NULL paths
1688 1743           */
1689 1744          if (name == NULL || *name == '\0') {
1690 1745                  dr->dr_status = NFSERR_ACCES;
1691 1746                  return;
1692 1747          }
1693 1748  
1694 1749          dvp = nfs_fhtovp(args->ca_da.da_fhandle, exi);
1695 1750          if (dvp == NULL) {
1696 1751                  dr->dr_status = NFSERR_STALE;
1697 1752                  return;
1698 1753          }
1699 1754  
1700 1755          error = sattr_to_vattr(args->ca_sa, &va);
1701 1756          if (error) {
1702 1757                  dr->dr_status = puterrno(error);
1703 1758                  return;
1704 1759          }
1705 1760  
1706 1761          /*
1707 1762           * Must specify the mode.
1708 1763           */
1709 1764          if (!(va.va_mask & AT_MODE)) {
1710 1765                  VN_RELE(dvp);
1711 1766                  dr->dr_status = NFSERR_INVAL;
1712 1767                  return;
1713 1768          }
1714 1769  
1715 1770          /*
1716 1771           * This is a completely gross hack to make mknod
1717 1772           * work over the wire until we can wack the protocol
1718 1773           */
1719 1774          if ((va.va_mode & IFMT) == IFCHR) {
1720 1775                  if (args->ca_sa->sa_size == (uint_t)NFS_FIFO_DEV)
1721 1776                          va.va_type = VFIFO;     /* xtra kludge for named pipe */
1722 1777                  else {
1723 1778                          va.va_type = VCHR;
1724 1779                          /*
1725 1780                           * uncompress the received dev_t
1726 1781                           * if the top half is zero indicating a request
1727 1782                           * from an `older style' OS.
1728 1783                           */
1729 1784                          if ((va.va_size & 0xffff0000) == 0)
1730 1785                                  va.va_rdev = nfsv2_expdev(va.va_size);
1731 1786                          else
1732 1787                                  va.va_rdev = (dev_t)va.va_size;
1733 1788                  }
1734 1789                  va.va_mask &= ~AT_SIZE;
1735 1790          } else if ((va.va_mode & IFMT) == IFBLK) {
1736 1791                  va.va_type = VBLK;
1737 1792                  /*
1738 1793                   * uncompress the received dev_t
1739 1794                   * if the top half is zero indicating a request
1740 1795                   * from an `older style' OS.
1741 1796                   */
1742 1797                  if ((va.va_size & 0xffff0000) == 0)
1743 1798                          va.va_rdev = nfsv2_expdev(va.va_size);
1744 1799                  else
1745 1800                          va.va_rdev = (dev_t)va.va_size;
1746 1801                  va.va_mask &= ~AT_SIZE;
1747 1802          } else if ((va.va_mode & IFMT) == IFSOCK) {
1748 1803                  va.va_type = VSOCK;
1749 1804          } else {
1750 1805                  va.va_type = VREG;
1751 1806          }
1752 1807          va.va_mode &= ~IFMT;
1753 1808          va.va_mask |= AT_TYPE;
1754 1809  
1755 1810          ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
1756 1811          name = nfscmd_convname(ca, exi, name, NFSCMD_CONV_INBOUND,
1757 1812              MAXPATHLEN);
1758 1813          if (name == NULL) {
1759 1814                  dr->dr_status = puterrno(EINVAL);
1760 1815                  return;
1761 1816          }
1762 1817  
1763 1818          /*
1764 1819           * Why was the choice made to use VWRITE as the mode to the
1765 1820           * call to VOP_CREATE ? This results in a bug.  When a client
1766 1821           * opens a file that already exists and is RDONLY, the second
1767 1822           * open fails with an EACESS because of the mode.
1768 1823           * bug ID 1054648.
1769 1824           */
1770 1825          lookup_ok = 0;
1771 1826          mode = VWRITE;
1772 1827          if (!(va.va_mask & AT_SIZE) || va.va_type != VREG) {
1773 1828                  error = VOP_LOOKUP(dvp, name, &tvp, NULL, 0, NULL, cr,
1774 1829                      NULL, NULL, NULL);
1775 1830                  if (!error) {
1776 1831                          struct vattr at;
1777 1832  
1778 1833                          lookup_ok = 1;
1779 1834                          at.va_mask = AT_MODE;
1780 1835                          error = VOP_GETATTR(tvp, &at, 0, cr, NULL);
1781 1836                          if (!error)
1782 1837                                  mode = (at.va_mode & S_IWUSR) ? VWRITE : VREAD;
1783 1838                          VN_RELE(tvp);
1784 1839                          tvp = NULL;
1785 1840                  }
1786 1841          }
1787 1842  
1788 1843          if (!lookup_ok) {
1789 1844                  if (rdonly(exi, req)) {
1790 1845                          error = EROFS;
1791 1846                  } else if (va.va_type != VREG && va.va_type != VFIFO &&
1792 1847                      va.va_type != VSOCK && secpolicy_sys_devices(cr) != 0) {
1793 1848                          error = EPERM;
1794 1849                  } else {
1795 1850                          error = 0;
1796 1851                  }
1797 1852          }
1798 1853  
1799 1854          /*
1800 1855           * If file size is being modified on an already existing file
1801 1856           * make sure that there are no conflicting non-blocking mandatory
1802 1857           * locks in the region being manipulated. Return EACCES if there
1803 1858           * are conflicting locks.
1804 1859           */
1805 1860          if (!error && (va.va_type == VREG) && (va.va_mask & AT_SIZE)) {
1806 1861                  lookuperr = VOP_LOOKUP(dvp, name, &tvp, NULL, 0, NULL, cr,
1807 1862                      NULL, NULL, NULL);
1808 1863  
1809 1864                  if (!lookuperr &&
1810 1865                      rfs4_check_delegated(FWRITE, tvp, va.va_size == 0)) {
1811 1866                          VN_RELE(tvp);
1812 1867                          curthread->t_flag |= T_WOULDBLOCK;
1813 1868                          goto out;
1814 1869                  }
1815 1870  
1816 1871                  if (!lookuperr && nbl_need_check(tvp)) {
1817 1872                          /*
1818 1873                           * The file exists. Now check if it has any
1819 1874                           * conflicting non-blocking mandatory locks
1820 1875                           * in the region being changed.
1821 1876                           */
1822 1877                          struct vattr bva;
1823 1878                          u_offset_t offset;
1824 1879                          ssize_t length;
1825 1880  
1826 1881                          nbl_start_crit(tvp, RW_READER);
1827 1882                          in_crit = 1;
1828 1883  
1829 1884                          bva.va_mask = AT_SIZE;
1830 1885                          error = VOP_GETATTR(tvp, &bva, 0, cr, NULL);
1831 1886                          if (!error) {
1832 1887                                  if (va.va_size < bva.va_size) {
1833 1888                                          offset = va.va_size;
1834 1889                                          length = bva.va_size - va.va_size;
1835 1890                                  } else {
1836 1891                                          offset = bva.va_size;
1837 1892                                          length = va.va_size - bva.va_size;
1838 1893                                  }
1839 1894                                  if (length) {
1840 1895                                          if (nbl_conflict(tvp, NBL_WRITE,
1841 1896                                              offset, length, 0, NULL)) {
1842 1897                                                  error = EACCES;
1843 1898                                          }
1844 1899                                  }
1845 1900                          }
1846 1901                          if (error) {
1847 1902                                  nbl_end_crit(tvp);
1848 1903                                  VN_RELE(tvp);
1849 1904                                  in_crit = 0;
1850 1905                          }
1851 1906                  } else if (tvp != NULL) {
1852 1907                          VN_RELE(tvp);
1853 1908                  }
1854 1909          }
1855 1910  
1856 1911          if (!error) {
1857 1912                  /*
1858 1913                   * If filesystem is shared with nosuid the remove any
1859 1914                   * setuid/setgid bits on create.
1860 1915                   */
1861 1916                  if (va.va_type == VREG &&
1862 1917                      exi->exi_export.ex_flags & EX_NOSUID)
1863 1918                          va.va_mode &= ~(VSUID | VSGID);
1864 1919  
1865 1920                  error = VOP_CREATE(dvp, name, &va, NONEXCL, mode, &vp, cr, 0,
1866 1921                      NULL, NULL);
1867 1922  
1868 1923                  if (!error) {
1869 1924  
1870 1925                          if ((va.va_mask & AT_SIZE) && (va.va_size == 0))
1871 1926                                  trunc = TRUE;
1872 1927                          else
1873 1928                                  trunc = FALSE;
1874 1929  
1875 1930                          if (rfs4_check_delegated(FWRITE, vp, trunc)) {
1876 1931                                  VN_RELE(vp);
1877 1932                                  curthread->t_flag |= T_WOULDBLOCK;
1878 1933                                  goto out;
1879 1934                          }
1880 1935                          va.va_mask = AT_ALL;
1881 1936  
1882 1937                          error = VOP_GETATTR(vp, &va, 0, cr, NULL);
1883 1938  
1884 1939                          /* check for overflows */
1885 1940                          if (!error) {
1886 1941                                  acl_perm(vp, exi, &va, cr);
1887 1942                                  error = vattr_to_nattr(&va, &dr->dr_attr);
1888 1943                                  if (!error) {
1889 1944                                          error = makefh(&dr->dr_fhandle, vp,
1890 1945                                              exi);
1891 1946                                  }
1892 1947                          }
1893 1948                          /*
1894 1949                           * Force modified metadata out to stable storage.
1895 1950                           *
1896 1951                           * if a underlying vp exists, pass it to VOP_FSYNC
1897 1952                           */
1898 1953                          if (VOP_REALVP(vp, &realvp, NULL) == 0)
1899 1954                                  (void) VOP_FSYNC(realvp, FNODSYNC, cr, NULL);
1900 1955                          else
1901 1956                                  (void) VOP_FSYNC(vp, FNODSYNC, cr, NULL);
1902 1957                          VN_RELE(vp);
1903 1958                  }
1904 1959  
1905 1960                  if (in_crit) {
1906 1961                          nbl_end_crit(tvp);
1907 1962                          VN_RELE(tvp);
1908 1963                  }
1909 1964          }
1910 1965  
1911 1966          /*
1912 1967           * Force modified data and metadata out to stable storage.
1913 1968           */
1914 1969          (void) VOP_FSYNC(dvp, 0, cr, NULL);
1915 1970  
1916 1971  out:
1917 1972  
1918 1973          VN_RELE(dvp);
1919 1974  
1920 1975          dr->dr_status = puterrno(error);
1921 1976  
1922 1977          if (name != args->ca_da.da_name)
1923 1978                  kmem_free(name, MAXPATHLEN);
1924 1979  }
1925 1980  void *
1926 1981  rfs_create_getfh(struct nfscreatargs *args)
1927 1982  {
1928 1983          return (args->ca_da.da_fhandle);
1929 1984  }
1930 1985  
1931 1986  /*
1932 1987   * Remove a file.
1933 1988   * Remove named file from parent directory.
1934 1989   */
1935 1990  void
1936 1991  rfs_remove(struct nfsdiropargs *da, enum nfsstat *status,
1937 1992          struct exportinfo *exi, struct svc_req *req, cred_t *cr)
1938 1993  {
1939 1994          int error = 0;
1940 1995          vnode_t *vp;
1941 1996          vnode_t *targvp;
1942 1997          int in_crit = 0;
1943 1998  
1944 1999          /*
1945 2000           * Disallow NULL paths
1946 2001           */
1947 2002          if (da->da_name == NULL || *da->da_name == '\0') {
1948 2003                  *status = NFSERR_ACCES;
1949 2004                  return;
1950 2005          }
1951 2006  
1952 2007          vp = nfs_fhtovp(da->da_fhandle, exi);
1953 2008          if (vp == NULL) {
1954 2009                  *status = NFSERR_STALE;
1955 2010                  return;
1956 2011          }
1957 2012  
1958 2013          if (rdonly(exi, req)) {
1959 2014                  VN_RELE(vp);
1960 2015                  *status = NFSERR_ROFS;
1961 2016                  return;
1962 2017          }
1963 2018  
1964 2019          /*
1965 2020           * Check for a conflict with a non-blocking mandatory share reservation.
1966 2021           */
1967 2022          error = VOP_LOOKUP(vp, da->da_name, &targvp, NULL, 0,
1968 2023              NULL, cr, NULL, NULL, NULL);
1969 2024          if (error != 0) {
1970 2025                  VN_RELE(vp);
1971 2026                  *status = puterrno(error);
1972 2027                  return;
1973 2028          }
1974 2029  
1975 2030          /*
1976 2031           * If the file is delegated to an v4 client, then initiate
1977 2032           * recall and drop this request (by setting T_WOULDBLOCK).
1978 2033           * The client will eventually re-transmit the request and
1979 2034           * (hopefully), by then, the v4 client will have returned
1980 2035           * the delegation.
1981 2036           */
1982 2037  
1983 2038          if (rfs4_check_delegated(FWRITE, targvp, TRUE)) {
1984 2039                  VN_RELE(vp);
1985 2040                  VN_RELE(targvp);
1986 2041                  curthread->t_flag |= T_WOULDBLOCK;
1987 2042                  return;
1988 2043          }
1989 2044  
1990 2045          if (nbl_need_check(targvp)) {
1991 2046                  nbl_start_crit(targvp, RW_READER);
1992 2047                  in_crit = 1;
1993 2048                  if (nbl_conflict(targvp, NBL_REMOVE, 0, 0, 0, NULL)) {
1994 2049                          error = EACCES;
1995 2050                          goto out;
1996 2051                  }
1997 2052          }
1998 2053  
1999 2054          error = VOP_REMOVE(vp, da->da_name, cr, NULL, 0);
2000 2055  
2001 2056          /*
2002 2057           * Force modified data and metadata out to stable storage.
2003 2058           */
2004 2059          (void) VOP_FSYNC(vp, 0, cr, NULL);
2005 2060  
2006 2061  out:
2007 2062          if (in_crit)
2008 2063                  nbl_end_crit(targvp);
2009 2064          VN_RELE(targvp);
2010 2065          VN_RELE(vp);
2011 2066  
2012 2067          *status = puterrno(error);
2013 2068  
2014 2069  }
2015 2070  
2016 2071  void *
2017 2072  rfs_remove_getfh(struct nfsdiropargs *da)
2018 2073  {
2019 2074          return (da->da_fhandle);
2020 2075  }
2021 2076  
2022 2077  /*
2023 2078   * rename a file
2024 2079   * Give a file (from) a new name (to).
2025 2080   */
2026 2081  void
2027 2082  rfs_rename(struct nfsrnmargs *args, enum nfsstat *status,
2028 2083          struct exportinfo *exi, struct svc_req *req, cred_t *cr)
2029 2084  {
2030 2085          int error = 0;
2031 2086          vnode_t *fromvp;
2032 2087          vnode_t *tovp;
2033 2088          struct exportinfo *to_exi;
2034 2089          fhandle_t *fh;
2035 2090          vnode_t *srcvp;
2036 2091          vnode_t *targvp;
2037 2092          int in_crit = 0;
2038 2093  
2039 2094          fromvp = nfs_fhtovp(args->rna_from.da_fhandle, exi);
2040 2095          if (fromvp == NULL) {
2041 2096                  *status = NFSERR_STALE;
2042 2097                  return;
2043 2098          }
2044 2099  
2045 2100          fh = args->rna_to.da_fhandle;
2046 2101          to_exi = checkexport(&fh->fh_fsid, (fid_t *)&fh->fh_xlen);
2047 2102          if (to_exi == NULL) {
2048 2103                  VN_RELE(fromvp);
2049 2104                  *status = NFSERR_ACCES;
2050 2105                  return;
2051 2106          }
2052 2107          exi_rele(to_exi);
2053 2108  
2054 2109          if (to_exi != exi) {
2055 2110                  VN_RELE(fromvp);
2056 2111                  *status = NFSERR_XDEV;
2057 2112                  return;
2058 2113          }
2059 2114  
2060 2115          tovp = nfs_fhtovp(args->rna_to.da_fhandle, exi);
2061 2116          if (tovp == NULL) {
2062 2117                  VN_RELE(fromvp);
2063 2118                  *status = NFSERR_STALE;
2064 2119                  return;
2065 2120          }
2066 2121  
2067 2122          if (fromvp->v_type != VDIR || tovp->v_type != VDIR) {
2068 2123                  VN_RELE(tovp);
2069 2124                  VN_RELE(fromvp);
2070 2125                  *status = NFSERR_NOTDIR;
2071 2126                  return;
2072 2127          }
2073 2128  
2074 2129          /*
2075 2130           * Disallow NULL paths
2076 2131           */
2077 2132          if (args->rna_from.da_name == NULL || *args->rna_from.da_name == '\0' ||
2078 2133              args->rna_to.da_name == NULL || *args->rna_to.da_name == '\0') {
2079 2134                  VN_RELE(tovp);
2080 2135                  VN_RELE(fromvp);
2081 2136                  *status = NFSERR_ACCES;
2082 2137                  return;
2083 2138          }
2084 2139  
2085 2140          if (rdonly(exi, req)) {
2086 2141                  VN_RELE(tovp);
2087 2142                  VN_RELE(fromvp);
2088 2143                  *status = NFSERR_ROFS;
2089 2144                  return;
2090 2145          }
2091 2146  
2092 2147          /*
2093 2148           * Check for a conflict with a non-blocking mandatory share reservation.
2094 2149           */
2095 2150          error = VOP_LOOKUP(fromvp, args->rna_from.da_name, &srcvp, NULL, 0,
2096 2151              NULL, cr, NULL, NULL, NULL);
2097 2152          if (error != 0) {
2098 2153                  VN_RELE(tovp);
2099 2154                  VN_RELE(fromvp);
2100 2155                  *status = puterrno(error);
2101 2156                  return;
2102 2157          }
2103 2158  
2104 2159          /* Check for delegations on the source file */
2105 2160  
2106 2161          if (rfs4_check_delegated(FWRITE, srcvp, FALSE)) {
2107 2162                  VN_RELE(tovp);
2108 2163                  VN_RELE(fromvp);
2109 2164                  VN_RELE(srcvp);
2110 2165                  curthread->t_flag |= T_WOULDBLOCK;
2111 2166                  return;
2112 2167          }
2113 2168  
2114 2169          /* Check for delegation on the file being renamed over, if it exists */
2115 2170  
2116 2171          if (rfs4_deleg_policy != SRV_NEVER_DELEGATE &&
2117 2172              VOP_LOOKUP(tovp, args->rna_to.da_name, &targvp, NULL, 0, NULL, cr,
2118 2173              NULL, NULL, NULL) == 0) {
2119 2174  
2120 2175                  if (rfs4_check_delegated(FWRITE, targvp, TRUE)) {
2121 2176                          VN_RELE(tovp);
2122 2177                          VN_RELE(fromvp);
2123 2178                          VN_RELE(srcvp);
2124 2179                          VN_RELE(targvp);
2125 2180                          curthread->t_flag |= T_WOULDBLOCK;
2126 2181                          return;
2127 2182                  }
2128 2183                  VN_RELE(targvp);
2129 2184          }
2130 2185  
2131 2186  
2132 2187          if (nbl_need_check(srcvp)) {
2133 2188                  nbl_start_crit(srcvp, RW_READER);
2134 2189                  in_crit = 1;
2135 2190                  if (nbl_conflict(srcvp, NBL_RENAME, 0, 0, 0, NULL)) {
2136 2191                          error = EACCES;
2137 2192                          goto out;
2138 2193                  }
2139 2194          }
2140 2195  
2141 2196          error = VOP_RENAME(fromvp, args->rna_from.da_name,
2142 2197              tovp, args->rna_to.da_name, cr, NULL, 0);
2143 2198  
2144 2199          if (error == 0)
2145 2200                  vn_renamepath(tovp, srcvp, args->rna_to.da_name,
2146 2201                      strlen(args->rna_to.da_name));
2147 2202  
2148 2203          /*
2149 2204           * Force modified data and metadata out to stable storage.
2150 2205           */
2151 2206          (void) VOP_FSYNC(tovp, 0, cr, NULL);
2152 2207          (void) VOP_FSYNC(fromvp, 0, cr, NULL);
2153 2208  
2154 2209  out:
2155 2210          if (in_crit)
2156 2211                  nbl_end_crit(srcvp);
2157 2212          VN_RELE(srcvp);
2158 2213          VN_RELE(tovp);
2159 2214          VN_RELE(fromvp);
2160 2215  
2161 2216          *status = puterrno(error);
2162 2217  
2163 2218  }
2164 2219  void *
2165 2220  rfs_rename_getfh(struct nfsrnmargs *args)
2166 2221  {
2167 2222          return (args->rna_from.da_fhandle);
2168 2223  }
2169 2224  
2170 2225  /*
2171 2226   * Link to a file.
2172 2227   * Create a file (to) which is a hard link to the given file (from).
2173 2228   */
2174 2229  void
2175 2230  rfs_link(struct nfslinkargs *args, enum nfsstat *status,
2176 2231          struct exportinfo *exi, struct svc_req *req, cred_t *cr)
2177 2232  {
2178 2233          int error;
2179 2234          vnode_t *fromvp;
2180 2235          vnode_t *tovp;
2181 2236          struct exportinfo *to_exi;
2182 2237          fhandle_t *fh;
2183 2238  
2184 2239          fromvp = nfs_fhtovp(args->la_from, exi);
2185 2240          if (fromvp == NULL) {
2186 2241                  *status = NFSERR_STALE;
2187 2242                  return;
2188 2243          }
2189 2244  
2190 2245          fh = args->la_to.da_fhandle;
2191 2246          to_exi = checkexport(&fh->fh_fsid, (fid_t *)&fh->fh_xlen);
2192 2247          if (to_exi == NULL) {
2193 2248                  VN_RELE(fromvp);
2194 2249                  *status = NFSERR_ACCES;
2195 2250                  return;
2196 2251          }
2197 2252          exi_rele(to_exi);
2198 2253  
2199 2254          if (to_exi != exi) {
2200 2255                  VN_RELE(fromvp);
2201 2256                  *status = NFSERR_XDEV;
2202 2257                  return;
2203 2258          }
2204 2259  
2205 2260          tovp = nfs_fhtovp(args->la_to.da_fhandle, exi);
2206 2261          if (tovp == NULL) {
2207 2262                  VN_RELE(fromvp);
2208 2263                  *status = NFSERR_STALE;
2209 2264                  return;
2210 2265          }
2211 2266  
2212 2267          if (tovp->v_type != VDIR) {
2213 2268                  VN_RELE(tovp);
2214 2269                  VN_RELE(fromvp);
2215 2270                  *status = NFSERR_NOTDIR;
2216 2271                  return;
2217 2272          }
2218 2273          /*
2219 2274           * Disallow NULL paths
2220 2275           */
2221 2276          if (args->la_to.da_name == NULL || *args->la_to.da_name == '\0') {
2222 2277                  VN_RELE(tovp);
2223 2278                  VN_RELE(fromvp);
2224 2279                  *status = NFSERR_ACCES;
2225 2280                  return;
2226 2281          }
2227 2282  
2228 2283          if (rdonly(exi, req)) {
2229 2284                  VN_RELE(tovp);
2230 2285                  VN_RELE(fromvp);
2231 2286                  *status = NFSERR_ROFS;
2232 2287                  return;
2233 2288          }
2234 2289  
2235 2290          error = VOP_LINK(tovp, fromvp, args->la_to.da_name, cr, NULL, 0);
2236 2291  
2237 2292          /*
2238 2293           * Force modified data and metadata out to stable storage.
2239 2294           */
2240 2295          (void) VOP_FSYNC(tovp, 0, cr, NULL);
2241 2296          (void) VOP_FSYNC(fromvp, FNODSYNC, cr, NULL);
2242 2297  
2243 2298          VN_RELE(tovp);
2244 2299          VN_RELE(fromvp);
2245 2300  
2246 2301          *status = puterrno(error);
2247 2302  
2248 2303  }
2249 2304  void *
2250 2305  rfs_link_getfh(struct nfslinkargs *args)
2251 2306  {
2252 2307          return (args->la_from);
2253 2308  }
2254 2309  
2255 2310  /*
2256 2311   * Symbolicly link to a file.
2257 2312   * Create a file (to) with the given attributes which is a symbolic link
2258 2313   * to the given path name (to).
2259 2314   */
2260 2315  void
2261 2316  rfs_symlink(struct nfsslargs *args, enum nfsstat *status,
2262 2317          struct exportinfo *exi, struct svc_req *req, cred_t *cr)
2263 2318  {
2264 2319          int error;
2265 2320          struct vattr va;
2266 2321          vnode_t *vp;
2267 2322          vnode_t *svp;
2268 2323          int lerror;
2269 2324          struct sockaddr *ca;
2270 2325          char *name = NULL;
2271 2326  
2272 2327          /*
2273 2328           * Disallow NULL paths
2274 2329           */
2275 2330          if (args->sla_from.da_name == NULL || *args->sla_from.da_name == '\0') {
2276 2331                  *status = NFSERR_ACCES;
2277 2332                  return;
2278 2333          }
2279 2334  
2280 2335          vp = nfs_fhtovp(args->sla_from.da_fhandle, exi);
2281 2336          if (vp == NULL) {
2282 2337                  *status = NFSERR_STALE;
2283 2338                  return;
2284 2339          }
2285 2340  
2286 2341          if (rdonly(exi, req)) {
2287 2342                  VN_RELE(vp);
2288 2343                  *status = NFSERR_ROFS;
2289 2344                  return;
2290 2345          }
2291 2346  
2292 2347          error = sattr_to_vattr(args->sla_sa, &va);
2293 2348          if (error) {
2294 2349                  VN_RELE(vp);
2295 2350                  *status = puterrno(error);
2296 2351                  return;
2297 2352          }
2298 2353  
2299 2354          if (!(va.va_mask & AT_MODE)) {
2300 2355                  VN_RELE(vp);
2301 2356                  *status = NFSERR_INVAL;
2302 2357                  return;
2303 2358          }
2304 2359  
2305 2360          ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2306 2361          name = nfscmd_convname(ca, exi, args->sla_tnm,
2307 2362              NFSCMD_CONV_INBOUND, MAXPATHLEN);
2308 2363  
2309 2364          if (name == NULL) {
2310 2365                  *status = NFSERR_ACCES;
2311 2366                  return;
2312 2367          }
2313 2368  
2314 2369          va.va_type = VLNK;
2315 2370          va.va_mask |= AT_TYPE;
2316 2371  
2317 2372          error = VOP_SYMLINK(vp, args->sla_from.da_name, &va, name, cr, NULL, 0);
2318 2373  
2319 2374          /*
2320 2375           * Force new data and metadata out to stable storage.
2321 2376           */
2322 2377          lerror = VOP_LOOKUP(vp, args->sla_from.da_name, &svp, NULL, 0,
2323 2378              NULL, cr, NULL, NULL, NULL);
2324 2379  
2325 2380          if (!lerror) {
2326 2381                  (void) VOP_FSYNC(svp, 0, cr, NULL);
2327 2382                  VN_RELE(svp);
2328 2383          }
2329 2384  
2330 2385          /*
2331 2386           * Force modified data and metadata out to stable storage.
2332 2387           */
2333 2388          (void) VOP_FSYNC(vp, 0, cr, NULL);
2334 2389  
2335 2390          VN_RELE(vp);
2336 2391  
2337 2392          *status = puterrno(error);
2338 2393          if (name != args->sla_tnm)
2339 2394                  kmem_free(name, MAXPATHLEN);
2340 2395  
2341 2396  }
2342 2397  void *
2343 2398  rfs_symlink_getfh(struct nfsslargs *args)
2344 2399  {
2345 2400          return (args->sla_from.da_fhandle);
2346 2401  }
2347 2402  
2348 2403  /*
2349 2404   * Make a directory.
2350 2405   * Create a directory with the given name, parent directory, and attributes.
2351 2406   * Returns a file handle and attributes for the new directory.
2352 2407   */
2353 2408  void
2354 2409  rfs_mkdir(struct nfscreatargs *args, struct nfsdiropres *dr,
2355 2410          struct exportinfo *exi, struct svc_req *req, cred_t *cr)
2356 2411  {
2357 2412          int error;
2358 2413          struct vattr va;
2359 2414          vnode_t *dvp = NULL;
2360 2415          vnode_t *vp;
2361 2416          char *name = args->ca_da.da_name;
2362 2417  
2363 2418          /*
2364 2419           * Disallow NULL paths
2365 2420           */
2366 2421          if (name == NULL || *name == '\0') {
2367 2422                  dr->dr_status = NFSERR_ACCES;
2368 2423                  return;
2369 2424          }
2370 2425  
2371 2426          vp = nfs_fhtovp(args->ca_da.da_fhandle, exi);
2372 2427          if (vp == NULL) {
2373 2428                  dr->dr_status = NFSERR_STALE;
2374 2429                  return;
2375 2430          }
2376 2431  
2377 2432          if (rdonly(exi, req)) {
2378 2433                  VN_RELE(vp);
2379 2434                  dr->dr_status = NFSERR_ROFS;
2380 2435                  return;
2381 2436          }
2382 2437  
2383 2438          error = sattr_to_vattr(args->ca_sa, &va);
2384 2439          if (error) {
2385 2440                  VN_RELE(vp);
2386 2441                  dr->dr_status = puterrno(error);
2387 2442                  return;
2388 2443          }
2389 2444  
2390 2445          if (!(va.va_mask & AT_MODE)) {
2391 2446                  VN_RELE(vp);
2392 2447                  dr->dr_status = NFSERR_INVAL;
2393 2448                  return;
2394 2449          }
2395 2450  
2396 2451          va.va_type = VDIR;
2397 2452          va.va_mask |= AT_TYPE;
2398 2453  
2399 2454          error = VOP_MKDIR(vp, name, &va, &dvp, cr, NULL, 0, NULL);
2400 2455  
2401 2456          if (!error) {
2402 2457                  /*
2403 2458                   * Attribtutes of the newly created directory should
2404 2459                   * be returned to the client.
2405 2460                   */
2406 2461                  va.va_mask = AT_ALL; /* We want everything */
2407 2462                  error = VOP_GETATTR(dvp, &va, 0, cr, NULL);
2408 2463  
2409 2464                  /* check for overflows */
2410 2465                  if (!error) {
2411 2466                          acl_perm(vp, exi, &va, cr);
2412 2467                          error = vattr_to_nattr(&va, &dr->dr_attr);
2413 2468                          if (!error) {
2414 2469                                  error = makefh(&dr->dr_fhandle, dvp, exi);
2415 2470                          }
2416 2471                  }
2417 2472                  /*
2418 2473                   * Force new data and metadata out to stable storage.
2419 2474                   */
2420 2475                  (void) VOP_FSYNC(dvp, 0, cr, NULL);
2421 2476                  VN_RELE(dvp);
2422 2477          }
2423 2478  
2424 2479          /*
2425 2480           * Force modified data and metadata out to stable storage.
2426 2481           */
2427 2482          (void) VOP_FSYNC(vp, 0, cr, NULL);
2428 2483  
2429 2484          VN_RELE(vp);
2430 2485  
2431 2486          dr->dr_status = puterrno(error);
2432 2487  
2433 2488  }
2434 2489  void *
2435 2490  rfs_mkdir_getfh(struct nfscreatargs *args)
2436 2491  {
2437 2492          return (args->ca_da.da_fhandle);
2438 2493  }
2439 2494  
2440 2495  /*
2441 2496   * Remove a directory.
2442 2497   * Remove the given directory name from the given parent directory.
2443 2498   */
2444 2499  void
2445 2500  rfs_rmdir(struct nfsdiropargs *da, enum nfsstat *status,
2446 2501          struct exportinfo *exi, struct svc_req *req, cred_t *cr)
2447 2502  {
2448 2503          int error;
2449 2504          vnode_t *vp;
2450 2505  
2451 2506  
2452 2507          /*
2453 2508           * Disallow NULL paths
2454 2509           */
2455 2510          if (da->da_name == NULL || *da->da_name == '\0') {
2456 2511                  *status = NFSERR_ACCES;
2457 2512                  return;
2458 2513          }
2459 2514  
2460 2515          vp = nfs_fhtovp(da->da_fhandle, exi);
2461 2516          if (vp == NULL) {
2462 2517                  *status = NFSERR_STALE;
2463 2518                  return;
2464 2519          }
2465 2520  
2466 2521          if (rdonly(exi, req)) {
2467 2522                  VN_RELE(vp);
2468 2523                  *status = NFSERR_ROFS;
2469 2524                  return;
2470 2525          }
2471 2526  
2472 2527          /*
2473 2528           * VOP_RMDIR now takes a new third argument (the current
2474 2529           * directory of the process).  That's because someone
2475 2530           * wants to return EINVAL if one tries to remove ".".
2476 2531           * Of course, NFS servers have no idea what their
2477 2532           * clients' current directories are.  We fake it by
2478 2533           * supplying a vnode known to exist and illegal to
2479 2534           * remove.
2480 2535           */
2481 2536          error = VOP_RMDIR(vp, da->da_name, rootdir, cr, NULL, 0);
2482 2537  
2483 2538          /*
2484 2539           * Force modified data and metadata out to stable storage.
2485 2540           */
2486 2541          (void) VOP_FSYNC(vp, 0, cr, NULL);
2487 2542  
2488 2543          VN_RELE(vp);
2489 2544  
2490 2545          /*
2491 2546           * System V defines rmdir to return EEXIST, not ENOTEMPTY,
2492 2547           * if the directory is not empty.  A System V NFS server
2493 2548           * needs to map NFSERR_EXIST to NFSERR_NOTEMPTY to transmit
2494 2549           * over the wire.
2495 2550           */
2496 2551          if (error == EEXIST)
2497 2552                  *status = NFSERR_NOTEMPTY;
2498 2553          else
2499 2554                  *status = puterrno(error);
2500 2555  
2501 2556  }
2502 2557  void *
2503 2558  rfs_rmdir_getfh(struct nfsdiropargs *da)
2504 2559  {
2505 2560          return (da->da_fhandle);
2506 2561  }
2507 2562  
2508 2563  /* ARGSUSED */
2509 2564  void
2510 2565  rfs_readdir(struct nfsrddirargs *rda, struct nfsrddirres *rd,
2511 2566          struct exportinfo *exi, struct svc_req *req, cred_t *cr)
2512 2567  {
2513 2568          int error;
2514 2569          int iseof;
2515 2570          struct iovec iov;
2516 2571          struct uio uio;
2517 2572          vnode_t *vp;
2518 2573          char *ndata = NULL;
2519 2574          struct sockaddr *ca;
2520 2575          size_t nents;
2521 2576          int ret;
2522 2577  
2523 2578          vp = nfs_fhtovp(&rda->rda_fh, exi);
2524 2579          if (vp == NULL) {
2525 2580                  rd->rd_entries = NULL;
2526 2581                  rd->rd_status = NFSERR_STALE;
2527 2582                  return;
2528 2583          }
2529 2584  
2530 2585          if (vp->v_type != VDIR) {
2531 2586                  VN_RELE(vp);
2532 2587                  rd->rd_entries = NULL;
2533 2588                  rd->rd_status = NFSERR_NOTDIR;
2534 2589                  return;
2535 2590          }
2536 2591  
2537 2592          (void) VOP_RWLOCK(vp, V_WRITELOCK_FALSE, NULL);
2538 2593  
2539 2594          error = VOP_ACCESS(vp, VREAD, 0, cr, NULL);
2540 2595  
2541 2596          if (error) {
2542 2597                  rd->rd_entries = NULL;
2543 2598                  goto bad;
2544 2599          }
2545 2600  
2546 2601          if (rda->rda_count == 0) {
2547 2602                  rd->rd_entries = NULL;
2548 2603                  rd->rd_size = 0;
2549 2604                  rd->rd_eof = FALSE;
2550 2605                  goto bad;
2551 2606          }
2552 2607  
2553 2608          rda->rda_count = MIN(rda->rda_count, NFS_MAXDATA);
2554 2609  
2555 2610          /*
2556 2611           * Allocate data for entries.  This will be freed by rfs_rddirfree.
2557 2612           */
2558 2613          rd->rd_bufsize = (uint_t)rda->rda_count;
2559 2614          rd->rd_entries = kmem_alloc(rd->rd_bufsize, KM_SLEEP);
2560 2615  
2561 2616          /*
2562 2617           * Set up io vector to read directory data
2563 2618           */
2564 2619          iov.iov_base = (caddr_t)rd->rd_entries;
2565 2620          iov.iov_len = rda->rda_count;
2566 2621          uio.uio_iov = &iov;
2567 2622          uio.uio_iovcnt = 1;
2568 2623          uio.uio_segflg = UIO_SYSSPACE;
2569 2624          uio.uio_extflg = UIO_COPY_CACHED;
2570 2625          uio.uio_loffset = (offset_t)rda->rda_offset;
2571 2626          uio.uio_resid = rda->rda_count;
2572 2627  
2573 2628          /*
2574 2629           * read directory
2575 2630           */
2576 2631          error = VOP_READDIR(vp, &uio, cr, &iseof, NULL, 0);
2577 2632  
2578 2633          /*
2579 2634           * Clean up
2580 2635           */
2581 2636          if (!error) {
2582 2637                  /*
2583 2638                   * set size and eof
2584 2639                   */
2585 2640                  if (uio.uio_resid == rda->rda_count) {
2586 2641                          rd->rd_size = 0;
2587 2642                          rd->rd_eof = TRUE;
2588 2643                  } else {
2589 2644                          rd->rd_size = (uint32_t)(rda->rda_count -
2590 2645                              uio.uio_resid);
2591 2646                          rd->rd_eof = iseof ? TRUE : FALSE;
2592 2647                  }
2593 2648          }
2594 2649  
2595 2650          ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2596 2651          nents = nfscmd_countents((char *)rd->rd_entries, rd->rd_size);
2597 2652          ret = nfscmd_convdirplus(ca, exi, (char *)rd->rd_entries, nents,
2598 2653              rda->rda_count, &ndata);
2599 2654  
2600 2655          if (ret != 0) {
2601 2656                  size_t dropbytes;
2602 2657                  /*
2603 2658                   * We had to drop one or more entries in order to fit
2604 2659                   * during the character conversion.  We need to patch
2605 2660                   * up the size and eof info.
2606 2661                   */
2607 2662                  if (rd->rd_eof)
2608 2663                          rd->rd_eof = FALSE;
2609 2664                  dropbytes = nfscmd_dropped_entrysize(
2610 2665                      (struct dirent64 *)rd->rd_entries, nents, ret);
2611 2666                  rd->rd_size -= dropbytes;
2612 2667          }
2613 2668          if (ndata == NULL) {
2614 2669                  ndata = (char *)rd->rd_entries;
2615 2670          } else if (ndata != (char *)rd->rd_entries) {
2616 2671                  kmem_free(rd->rd_entries, rd->rd_bufsize);
2617 2672                  rd->rd_entries = (void *)ndata;
2618 2673                  rd->rd_bufsize = rda->rda_count;
2619 2674          }
2620 2675  
2621 2676  bad:
2622 2677          VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
2623 2678  
2624 2679  #if 0 /* notyet */
2625 2680          /*
2626 2681           * Don't do this.  It causes local disk writes when just
2627 2682           * reading the file and the overhead is deemed larger
2628 2683           * than the benefit.
2629 2684           */
2630 2685          /*
2631 2686           * Force modified metadata out to stable storage.
2632 2687           */
2633 2688          (void) VOP_FSYNC(vp, FNODSYNC, cr, NULL);
2634 2689  #endif
2635 2690  
2636 2691          VN_RELE(vp);
2637 2692  
2638 2693          rd->rd_status = puterrno(error);
2639 2694  
2640 2695  }
2641 2696  void *
2642 2697  rfs_readdir_getfh(struct nfsrddirargs *rda)
2643 2698  {
2644 2699          return (&rda->rda_fh);
2645 2700  }
2646 2701  void
2647 2702  rfs_rddirfree(struct nfsrddirres *rd)
2648 2703  {
2649 2704          if (rd->rd_entries != NULL)
2650 2705                  kmem_free(rd->rd_entries, rd->rd_bufsize);
2651 2706  }
2652 2707  
2653 2708  /* ARGSUSED */
2654 2709  void
2655 2710  rfs_statfs(fhandle_t *fh, struct nfsstatfs *fs, struct exportinfo *exi,
2656 2711          struct svc_req *req, cred_t *cr)
2657 2712  {
2658 2713          int error;
2659 2714          struct statvfs64 sb;
2660 2715          vnode_t *vp;
2661 2716  
2662 2717          vp = nfs_fhtovp(fh, exi);
2663 2718          if (vp == NULL) {
2664 2719                  fs->fs_status = NFSERR_STALE;
2665 2720                  return;
2666 2721          }
2667 2722  
2668 2723          error = VFS_STATVFS(vp->v_vfsp, &sb);
2669 2724  
2670 2725          if (!error) {
2671 2726                  fs->fs_tsize = nfstsize();
2672 2727                  fs->fs_bsize = sb.f_frsize;
2673 2728                  fs->fs_blocks = sb.f_blocks;
2674 2729                  fs->fs_bfree = sb.f_bfree;
2675 2730                  fs->fs_bavail = sb.f_bavail;
2676 2731          }
2677 2732  
2678 2733          VN_RELE(vp);
2679 2734  
2680 2735          fs->fs_status = puterrno(error);
2681 2736  
2682 2737  }
2683 2738  void *
2684 2739  rfs_statfs_getfh(fhandle_t *fh)
2685 2740  {
2686 2741          return (fh);
2687 2742  }
2688 2743  
2689 2744  static int
2690 2745  sattr_to_vattr(struct nfssattr *sa, struct vattr *vap)
2691 2746  {
2692 2747          vap->va_mask = 0;
2693 2748  
2694 2749          /*
2695 2750           * There was a sign extension bug in some VFS based systems
2696 2751           * which stored the mode as a short.  When it would get
2697 2752           * assigned to a u_long, no sign extension would occur.
2698 2753           * It needed to, but this wasn't noticed because sa_mode
2699 2754           * would then get assigned back to the short, thus ignoring
2700 2755           * the upper 16 bits of sa_mode.
2701 2756           *
2702 2757           * To make this implementation work for both broken
2703 2758           * clients and good clients, we check for both versions
2704 2759           * of the mode.
2705 2760           */
2706 2761          if (sa->sa_mode != (uint32_t)((ushort_t)-1) &&
2707 2762              sa->sa_mode != (uint32_t)-1) {
2708 2763                  vap->va_mask |= AT_MODE;
2709 2764                  vap->va_mode = sa->sa_mode;
2710 2765          }
2711 2766          if (sa->sa_uid != (uint32_t)-1) {
2712 2767                  vap->va_mask |= AT_UID;
2713 2768                  vap->va_uid = sa->sa_uid;
2714 2769          }
2715 2770          if (sa->sa_gid != (uint32_t)-1) {
2716 2771                  vap->va_mask |= AT_GID;
2717 2772                  vap->va_gid = sa->sa_gid;
2718 2773          }
2719 2774          if (sa->sa_size != (uint32_t)-1) {
2720 2775                  vap->va_mask |= AT_SIZE;
2721 2776                  vap->va_size = sa->sa_size;
2722 2777          }
2723 2778          if (sa->sa_atime.tv_sec != (int32_t)-1 &&
2724 2779              sa->sa_atime.tv_usec != (int32_t)-1) {
2725 2780  #ifndef _LP64
2726 2781                  /* return error if time overflow */
2727 2782                  if (!NFS2_TIME_OK(sa->sa_atime.tv_sec))
2728 2783                          return (EOVERFLOW);
2729 2784  #endif
2730 2785                  vap->va_mask |= AT_ATIME;
2731 2786                  /*
2732 2787                   * nfs protocol defines times as unsigned so don't extend sign,
2733 2788                   * unless sysadmin set nfs_allow_preepoch_time.
2734 2789                   */
2735 2790                  NFS_TIME_T_CONVERT(vap->va_atime.tv_sec, sa->sa_atime.tv_sec);
2736 2791                  vap->va_atime.tv_nsec = (uint32_t)(sa->sa_atime.tv_usec * 1000);
2737 2792          }
2738 2793          if (sa->sa_mtime.tv_sec != (int32_t)-1 &&
2739 2794              sa->sa_mtime.tv_usec != (int32_t)-1) {
2740 2795  #ifndef _LP64
2741 2796                  /* return error if time overflow */
2742 2797                  if (!NFS2_TIME_OK(sa->sa_mtime.tv_sec))
2743 2798                          return (EOVERFLOW);
2744 2799  #endif
2745 2800                  vap->va_mask |= AT_MTIME;
2746 2801                  /*
2747 2802                   * nfs protocol defines times as unsigned so don't extend sign,
2748 2803                   * unless sysadmin set nfs_allow_preepoch_time.
2749 2804                   */
2750 2805                  NFS_TIME_T_CONVERT(vap->va_mtime.tv_sec, sa->sa_mtime.tv_sec);
2751 2806                  vap->va_mtime.tv_nsec = (uint32_t)(sa->sa_mtime.tv_usec * 1000);
2752 2807          }
2753 2808          return (0);
2754 2809  }
2755 2810  
2756 2811  static enum nfsftype vt_to_nf[] = {
2757 2812          0, NFREG, NFDIR, NFBLK, NFCHR, NFLNK, 0, 0, 0, NFSOC, 0
2758 2813  };
2759 2814  
2760 2815  /*
2761 2816   * check the following fields for overflow: nodeid, size, and time.
2762 2817   * There could be a problem when converting 64-bit LP64 fields
2763 2818   * into 32-bit ones.  Return an error if there is an overflow.
2764 2819   */
2765 2820  int
2766 2821  vattr_to_nattr(struct vattr *vap, struct nfsfattr *na)
2767 2822  {
2768 2823          ASSERT(vap->va_type >= VNON && vap->va_type <= VBAD);
2769 2824          na->na_type = vt_to_nf[vap->va_type];
2770 2825  
2771 2826          if (vap->va_mode == (unsigned short) -1)
2772 2827                  na->na_mode = (uint32_t)-1;
2773 2828          else
2774 2829                  na->na_mode = VTTOIF(vap->va_type) | vap->va_mode;
2775 2830  
2776 2831          if (vap->va_uid == (unsigned short)(-1))
2777 2832                  na->na_uid = (uint32_t)(-1);
2778 2833          else if (vap->va_uid == UID_NOBODY)
2779 2834                  na->na_uid = (uint32_t)NFS_UID_NOBODY;
2780 2835          else
2781 2836                  na->na_uid = vap->va_uid;
2782 2837  
2783 2838          if (vap->va_gid == (unsigned short)(-1))
2784 2839                  na->na_gid = (uint32_t)-1;
2785 2840          else if (vap->va_gid == GID_NOBODY)
2786 2841                  na->na_gid = (uint32_t)NFS_GID_NOBODY;
2787 2842          else
2788 2843                  na->na_gid = vap->va_gid;
2789 2844  
2790 2845          /*
2791 2846           * Do we need to check fsid for overflow?  It is 64-bit in the
2792 2847           * vattr, but are bigger than 32 bit values supported?
2793 2848           */
2794 2849          na->na_fsid = vap->va_fsid;
2795 2850  
2796 2851          na->na_nodeid = vap->va_nodeid;
2797 2852  
2798 2853          /*
2799 2854           * Check to make sure that the nodeid is representable over the
2800 2855           * wire without losing bits.
2801 2856           */
2802 2857          if (vap->va_nodeid != (u_longlong_t)na->na_nodeid)
2803 2858                  return (EFBIG);
2804 2859          na->na_nlink = vap->va_nlink;
2805 2860  
2806 2861          /*
2807 2862           * Check for big files here, instead of at the caller.  See
2808 2863           * comments in cstat for large special file explanation.
2809 2864           */
2810 2865          if (vap->va_size > (u_longlong_t)MAXOFF32_T) {
2811 2866                  if ((vap->va_type == VREG) || (vap->va_type == VDIR))
2812 2867                          return (EFBIG);
2813 2868                  if ((vap->va_type == VBLK) || (vap->va_type == VCHR)) {
2814 2869                          /* UNKNOWN_SIZE | OVERFLOW */
2815 2870                          na->na_size = MAXOFF32_T;
2816 2871                  } else
2817 2872                          na->na_size = vap->va_size;
2818 2873          } else
2819 2874                  na->na_size = vap->va_size;
2820 2875  
2821 2876          /*
2822 2877           * If the vnode times overflow the 32-bit times that NFS2
2823 2878           * uses on the wire then return an error.
2824 2879           */
2825 2880          if (!NFS_VAP_TIME_OK(vap)) {
2826 2881                  return (EOVERFLOW);
2827 2882          }
2828 2883          na->na_atime.tv_sec = vap->va_atime.tv_sec;
2829 2884          na->na_atime.tv_usec = vap->va_atime.tv_nsec / 1000;
2830 2885  
2831 2886          na->na_mtime.tv_sec = vap->va_mtime.tv_sec;
2832 2887          na->na_mtime.tv_usec = vap->va_mtime.tv_nsec / 1000;
2833 2888  
2834 2889          na->na_ctime.tv_sec = vap->va_ctime.tv_sec;
2835 2890          na->na_ctime.tv_usec = vap->va_ctime.tv_nsec / 1000;
2836 2891  
2837 2892          /*
2838 2893           * If the dev_t will fit into 16 bits then compress
2839 2894           * it, otherwise leave it alone. See comments in
2840 2895           * nfs_client.c.
2841 2896           */
2842 2897          if (getminor(vap->va_rdev) <= SO4_MAXMIN &&
2843 2898              getmajor(vap->va_rdev) <= SO4_MAXMAJ)
2844 2899                  na->na_rdev = nfsv2_cmpdev(vap->va_rdev);
2845 2900          else
2846 2901                  (void) cmpldev(&na->na_rdev, vap->va_rdev);
2847 2902  
2848 2903          na->na_blocks = vap->va_nblocks;
2849 2904          na->na_blocksize = vap->va_blksize;
2850 2905  
2851 2906          /*
2852 2907           * This bit of ugliness is a *TEMPORARY* hack to preserve the
2853 2908           * over-the-wire protocols for named-pipe vnodes.  It remaps the
2854 2909           * VFIFO type to the special over-the-wire type. (see note in nfs.h)
2855 2910           *
2856 2911           * BUYER BEWARE:
2857 2912           *  If you are porting the NFS to a non-Sun server, you probably
2858 2913           *  don't want to include the following block of code.  The
2859 2914           *  over-the-wire special file types will be changing with the
2860 2915           *  NFS Protocol Revision.
2861 2916           */
2862 2917          if (vap->va_type == VFIFO)
2863 2918                  NA_SETFIFO(na);
2864 2919          return (0);
2865 2920  }
2866 2921  
2867 2922  /*
2868 2923   * acl v2 support: returns approximate permission.
2869 2924   *      default: returns minimal permission (more restrictive)
2870 2925   *      aclok: returns maximal permission (less restrictive)
2871 2926   *      This routine changes the permissions that are alaredy in *va.
2872 2927   *      If a file has minimal ACL, i.e. aclcnt == MIN_ACL_ENTRIES,
2873 2928   *      CLASS_OBJ is always the same as GROUP_OBJ entry.
2874 2929   */
2875 2930  static void
2876 2931  acl_perm(struct vnode *vp, struct exportinfo *exi, struct vattr *va, cred_t *cr)
2877 2932  {
2878 2933          vsecattr_t      vsa;
2879 2934          int             aclcnt;
2880 2935          aclent_t        *aclentp;
2881 2936          mode_t          mask_perm;
2882 2937          mode_t          grp_perm;
2883 2938          mode_t          other_perm;
2884 2939          mode_t          other_orig;
2885 2940          int             error;
2886 2941  
2887 2942          /* dont care default acl */
2888 2943          vsa.vsa_mask = (VSA_ACL | VSA_ACLCNT);
2889 2944          error = VOP_GETSECATTR(vp, &vsa, 0, cr, NULL);
2890 2945  
2891 2946          if (!error) {
2892 2947                  aclcnt = vsa.vsa_aclcnt;
2893 2948                  if (aclcnt > MIN_ACL_ENTRIES) {
2894 2949                          /* non-trivial ACL */
2895 2950                          aclentp = vsa.vsa_aclentp;
2896 2951                          if (exi->exi_export.ex_flags & EX_ACLOK) {
2897 2952                                  /* maximal permissions */
2898 2953                                  grp_perm = 0;
2899 2954                                  other_perm = 0;
2900 2955                                  for (; aclcnt > 0; aclcnt--, aclentp++) {
2901 2956                                          switch (aclentp->a_type) {
2902 2957                                          case USER_OBJ:
2903 2958                                                  break;
2904 2959                                          case USER:
2905 2960                                                  grp_perm |=
2906 2961                                                      aclentp->a_perm << 3;
2907 2962                                                  other_perm |= aclentp->a_perm;
2908 2963                                                  break;
2909 2964                                          case GROUP_OBJ:
2910 2965                                                  grp_perm |=
2911 2966                                                      aclentp->a_perm << 3;
2912 2967                                                  break;
2913 2968                                          case GROUP:
2914 2969                                                  other_perm |= aclentp->a_perm;
2915 2970                                                  break;
2916 2971                                          case OTHER_OBJ:
2917 2972                                                  other_orig = aclentp->a_perm;
2918 2973                                                  break;
2919 2974                                          case CLASS_OBJ:
2920 2975                                                  mask_perm = aclentp->a_perm;
2921 2976                                                  break;
2922 2977                                          default:
2923 2978                                                  break;
2924 2979                                          }
2925 2980                                  }
2926 2981                                  grp_perm &= mask_perm << 3;
2927 2982                                  other_perm &= mask_perm;
2928 2983                                  other_perm |= other_orig;
2929 2984  
2930 2985                          } else {
2931 2986                                  /* minimal permissions */
2932 2987                                  grp_perm = 070;
2933 2988                                  other_perm = 07;
2934 2989                                  for (; aclcnt > 0; aclcnt--, aclentp++) {
2935 2990                                          switch (aclentp->a_type) {
2936 2991                                          case USER_OBJ:
2937 2992                                                  break;
2938 2993                                          case USER:
2939 2994                                          case CLASS_OBJ:
2940 2995                                                  grp_perm &=
2941 2996                                                      aclentp->a_perm << 3;
2942 2997                                                  other_perm &=
2943 2998                                                      aclentp->a_perm;
2944 2999                                                  break;
2945 3000                                          case GROUP_OBJ:
2946 3001                                                  grp_perm &=
2947 3002                                                      aclentp->a_perm << 3;
2948 3003                                                  break;
2949 3004                                          case GROUP:
2950 3005                                                  other_perm &=
2951 3006                                                      aclentp->a_perm;
2952 3007                                                  break;
2953 3008                                          case OTHER_OBJ:
2954 3009                                                  other_perm &=
2955 3010                                                      aclentp->a_perm;
2956 3011                                                  break;
2957 3012                                          default:
2958 3013                                                  break;
2959 3014                                          }
2960 3015                                  }
2961 3016                          }
2962 3017                          /* copy to va */
2963 3018                          va->va_mode &= ~077;
2964 3019                          va->va_mode |= grp_perm | other_perm;
2965 3020                  }
2966 3021                  if (vsa.vsa_aclcnt)
2967 3022                          kmem_free(vsa.vsa_aclentp,
2968 3023                              vsa.vsa_aclcnt * sizeof (aclent_t));
2969 3024          }
2970 3025  }
2971 3026  
2972 3027  void
2973 3028  rfs_srvrinit(void)
2974 3029  {
2975 3030          mutex_init(&rfs_async_write_lock, NULL, MUTEX_DEFAULT, NULL);
2976 3031          nfs2_srv_caller_id = fs_new_caller_id();
2977 3032  }
2978 3033  
2979 3034  void
2980 3035  rfs_srvrfini(void)
2981 3036  {
2982 3037          mutex_destroy(&rfs_async_write_lock);
2983 3038  }
2984 3039  
2985 3040  static int
2986 3041  rdma_setup_read_data2(struct nfsreadargs *ra, struct nfsrdresult *rr)
2987 3042  {
2988 3043          struct clist    *wcl;
2989 3044          int             wlist_len;
2990 3045          uint32_t        count = rr->rr_count;
2991 3046  
2992 3047          wcl = ra->ra_wlist;
2993 3048  
2994 3049          if (rdma_setup_read_chunks(wcl, count, &wlist_len) == FALSE) {
2995 3050                  return (FALSE);
2996 3051          }
2997 3052  
2998 3053          wcl = ra->ra_wlist;
2999 3054          rr->rr_ok.rrok_wlist_len = wlist_len;
3000 3055          rr->rr_ok.rrok_wlist = wcl;
3001 3056  
3002 3057          return (TRUE);
3003 3058  }

↓ open down ↓

2532 lines elided

↑ open up ↑

XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX