illumos-gate Wdiff usr/src/uts/common/fs/nfs/nfs_srv.c

Print this page

7378 exported_lock held during nfs4 compound processing

Split	Close
Expand all
Collapse all

          --- old/usr/src/uts/common/fs/nfs/nfs_srv.c
          +++ new/usr/src/uts/common/fs/nfs/nfs_srv.c

   1    1  /*
   2    2   * CDDL HEADER START
   3    3   *
   4    4   * The contents of this file are subject to the terms of the
   5    5   * Common Development and Distribution License (the "License").
   6    6   * You may not use this file except in compliance with the License.
   7    7   *
   8    8   * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9    9   * or http://www.opensolaris.org/os/licensing.
  10   10   * See the License for the specific language governing permissions
  11   11   * and limitations under the License.
  12   12   *
  13   13   * When distributing Covered Code, include this CDDL HEADER in each
  14   14   * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15   15   * If applicable, add the following below this CDDL HEADER, with the
  16   16   * fields enclosed by brackets "[]" replaced with your own identifying
  17   17   * information: Portions Copyright [yyyy] [name of copyright owner]
  18   18   *
  19   19   * CDDL HEADER END
  20   20   */
  21   21  /*
  22   22   * Copyright (c) 1994, 2010, Oracle and/or its affiliates. All rights reserved.
  23   23   * Copyright 2014 Nexenta Systems, Inc.  All rights reserved.
  24   24   */
  25   25  
  26   26  /*
  27   27   *      Copyright (c) 1983,1984,1985,1986,1987,1988,1989  AT&T.
  28   28   *      All rights reserved.
  29   29   */
  30   30  
  31   31  #include <sys/param.h>
  32   32  #include <sys/types.h>
  33   33  #include <sys/systm.h>
  34   34  #include <sys/cred.h>
  35   35  #include <sys/buf.h>
  36   36  #include <sys/vfs.h>
  37   37  #include <sys/vnode.h>
  38   38  #include <sys/uio.h>
  39   39  #include <sys/stat.h>
  40   40  #include <sys/errno.h>
  41   41  #include <sys/sysmacros.h>
  42   42  #include <sys/statvfs.h>
  43   43  #include <sys/kmem.h>
  44   44  #include <sys/kstat.h>
  45   45  #include <sys/dirent.h>
  46   46  #include <sys/cmn_err.h>
  47   47  #include <sys/debug.h>
  48   48  #include <sys/vtrace.h>
  49   49  #include <sys/mode.h>
  50   50  #include <sys/acl.h>
  51   51  #include <sys/nbmlock.h>
  52   52  #include <sys/policy.h>
  53   53  #include <sys/sdt.h>
  54   54  
  55   55  #include <rpc/types.h>
  56   56  #include <rpc/auth.h>
  57   57  #include <rpc/svc.h>
  58   58  
  59   59  #include <nfs/nfs.h>
  60   60  #include <nfs/export.h>
  61   61  #include <nfs/nfs_cmd.h>
  62   62  
  63   63  #include <vm/hat.h>
  64   64  #include <vm/as.h>
  65   65  #include <vm/seg.h>
  66   66  #include <vm/seg_map.h>
  67   67  #include <vm/seg_kmem.h>
  68   68  
  69   69  #include <sys/strsubr.h>
  70   70  
  71   71  /*
  72   72   * These are the interface routines for the server side of the
  73   73   * Network File System.  See the NFS version 2 protocol specification
  74   74   * for a description of this interface.
  75   75   */
  76   76  
  77   77  static int      sattr_to_vattr(struct nfssattr *, struct vattr *);
  78   78  static void     acl_perm(struct vnode *, struct exportinfo *, struct vattr *,
  79   79                          cred_t *);
  80   80  
  81   81  /*
  82   82   * Some "over the wire" UNIX file types.  These are encoded
  83   83   * into the mode.  This needs to be fixed in the next rev.
  84   84   */
  85   85  #define IFMT            0170000         /* type of file */
  86   86  #define IFCHR           0020000         /* character special */
  87   87  #define IFBLK           0060000         /* block special */
  88   88  #define IFSOCK          0140000         /* socket */
  89   89  
  90   90  u_longlong_t nfs2_srv_caller_id;
  91   91  
  92   92  /*
  93   93   * Get file attributes.
  94   94   * Returns the current attributes of the file with the given fhandle.
  95   95   */
  96   96  /* ARGSUSED */
  97   97  void
  98   98  rfs_getattr(fhandle_t *fhp, struct nfsattrstat *ns, struct exportinfo *exi,
  99   99      struct svc_req *req, cred_t *cr, bool_t ro)
 100  100  {
 101  101          int error;
 102  102          vnode_t *vp;
 103  103          struct vattr va;
 104  104  
 105  105          vp = nfs_fhtovp(fhp, exi);
 106  106          if (vp == NULL) {
 107  107                  ns->ns_status = NFSERR_STALE;
 108  108                  return;
 109  109          }
 110  110  
 111  111          /*
 112  112           * Do the getattr.
 113  113           */
 114  114          va.va_mask = AT_ALL;    /* we want all the attributes */
 115  115  
 116  116          error = rfs4_delegated_getattr(vp, &va, 0, cr);
 117  117  
 118  118          /* check for overflows */
 119  119          if (!error) {
 120  120                  /* Lie about the object type for a referral */
 121  121                  if (vn_is_nfs_reparse(vp, cr))
 122  122                          va.va_type = VLNK;
 123  123  
 124  124                  acl_perm(vp, exi, &va, cr);
 125  125                  error = vattr_to_nattr(&va, &ns->ns_attr);
 126  126          }
 127  127  
 128  128          VN_RELE(vp);
 129  129  
 130  130          ns->ns_status = puterrno(error);
 131  131  }
 132  132  void *
 133  133  rfs_getattr_getfh(fhandle_t *fhp)
 134  134  {
 135  135          return (fhp);
 136  136  }
 137  137  
 138  138  /*
 139  139   * Set file attributes.
 140  140   * Sets the attributes of the file with the given fhandle.  Returns
 141  141   * the new attributes.
 142  142   */
 143  143  /* ARGSUSED */
 144  144  void
 145  145  rfs_setattr(struct nfssaargs *args, struct nfsattrstat *ns,
 146  146      struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
 147  147  {
 148  148          int error;
 149  149          int flag;
 150  150          int in_crit = 0;
 151  151          vnode_t *vp;
 152  152          struct vattr va;
 153  153          struct vattr bva;
 154  154          struct flock64 bf;
 155  155          caller_context_t ct;
 156  156  
 157  157  
 158  158          vp = nfs_fhtovp(&args->saa_fh, exi);
 159  159          if (vp == NULL) {
 160  160                  ns->ns_status = NFSERR_STALE;
 161  161                  return;
 162  162          }
 163  163  
 164  164          if (rdonly(ro, vp)) {
 165  165                  VN_RELE(vp);
 166  166                  ns->ns_status = NFSERR_ROFS;
 167  167                  return;
 168  168          }
 169  169  
 170  170          error = sattr_to_vattr(&args->saa_sa, &va);
 171  171          if (error) {
 172  172                  VN_RELE(vp);
 173  173                  ns->ns_status = puterrno(error);
 174  174                  return;
 175  175          }
 176  176  
 177  177          /*
 178  178           * If the client is requesting a change to the mtime,
 179  179           * but the nanosecond field is set to 1 billion, then
 180  180           * this is a flag to the server that it should set the
 181  181           * atime and mtime fields to the server's current time.
 182  182           * The 1 billion number actually came from the client
 183  183           * as 1 million, but the units in the over the wire
 184  184           * request are microseconds instead of nanoseconds.
 185  185           *
 186  186           * This is an overload of the protocol and should be
 187  187           * documented in the NFS Version 2 protocol specification.
 188  188           */
 189  189          if (va.va_mask & AT_MTIME) {
 190  190                  if (va.va_mtime.tv_nsec == 1000000000) {
 191  191                          gethrestime(&va.va_mtime);
 192  192                          va.va_atime = va.va_mtime;
 193  193                          va.va_mask |= AT_ATIME;
 194  194                          flag = 0;
 195  195                  } else
 196  196                          flag = ATTR_UTIME;
 197  197          } else
 198  198                  flag = 0;
 199  199  
 200  200          /*
 201  201           * If the filesystem is exported with nosuid, then mask off
 202  202           * the setuid and setgid bits.
 203  203           */
 204  204          if ((va.va_mask & AT_MODE) && vp->v_type == VREG &&
 205  205              (exi->exi_export.ex_flags & EX_NOSUID))
 206  206                  va.va_mode &= ~(VSUID | VSGID);
 207  207  
 208  208          ct.cc_sysid = 0;
 209  209          ct.cc_pid = 0;
 210  210          ct.cc_caller_id = nfs2_srv_caller_id;
 211  211          ct.cc_flags = CC_DONTBLOCK;
 212  212  
 213  213          /*
 214  214           * We need to specially handle size changes because it is
 215  215           * possible for the client to create a file with modes
 216  216           * which indicate read-only, but with the file opened for
 217  217           * writing.  If the client then tries to set the size of
 218  218           * the file, then the normal access checking done in
 219  219           * VOP_SETATTR would prevent the client from doing so,
 220  220           * although it should be legal for it to do so.  To get
 221  221           * around this, we do the access checking for ourselves
 222  222           * and then use VOP_SPACE which doesn't do the access
 223  223           * checking which VOP_SETATTR does. VOP_SPACE can only
 224  224           * operate on VREG files, let VOP_SETATTR handle the other
 225  225           * extremely rare cases.
 226  226           * Also the client should not be allowed to change the
 227  227           * size of the file if there is a conflicting non-blocking
 228  228           * mandatory lock in the region of change.
 229  229           */
 230  230          if (vp->v_type == VREG && va.va_mask & AT_SIZE) {
 231  231                  if (nbl_need_check(vp)) {
 232  232                          nbl_start_crit(vp, RW_READER);
 233  233                          in_crit = 1;
 234  234                  }
 235  235  
 236  236                  bva.va_mask = AT_UID | AT_SIZE;
 237  237  
 238  238                  error = VOP_GETATTR(vp, &bva, 0, cr, &ct);
 239  239  
 240  240                  if (error) {
 241  241                          if (in_crit)
 242  242                                  nbl_end_crit(vp);
 243  243                          VN_RELE(vp);
 244  244                          ns->ns_status = puterrno(error);
 245  245                          return;
 246  246                  }
 247  247  
 248  248                  if (in_crit) {
 249  249                          u_offset_t offset;
 250  250                          ssize_t length;
 251  251  
 252  252                          if (va.va_size < bva.va_size) {
 253  253                                  offset = va.va_size;
 254  254                                  length = bva.va_size - va.va_size;
 255  255                          } else {
 256  256                                  offset = bva.va_size;
 257  257                                  length = va.va_size - bva.va_size;
 258  258                          }
 259  259                          if (nbl_conflict(vp, NBL_WRITE, offset, length, 0,
 260  260                              NULL)) {
 261  261                                  error = EACCES;
 262  262                          }
 263  263                  }
 264  264  
 265  265                  if (crgetuid(cr) == bva.va_uid && !error &&
 266  266                      va.va_size != bva.va_size) {
 267  267                          va.va_mask &= ~AT_SIZE;
 268  268                          bf.l_type = F_WRLCK;
 269  269                          bf.l_whence = 0;
 270  270                          bf.l_start = (off64_t)va.va_size;
 271  271                          bf.l_len = 0;
 272  272                          bf.l_sysid = 0;
 273  273                          bf.l_pid = 0;
 274  274  
 275  275                          error = VOP_SPACE(vp, F_FREESP, &bf, FWRITE,
 276  276                              (offset_t)va.va_size, cr, &ct);
 277  277                  }
 278  278                  if (in_crit)
 279  279                          nbl_end_crit(vp);
 280  280          } else
 281  281                  error = 0;
 282  282  
 283  283          /*
 284  284           * Do the setattr.
 285  285           */
 286  286          if (!error && va.va_mask) {
 287  287                  error = VOP_SETATTR(vp, &va, flag, cr, &ct);
 288  288          }
 289  289  
 290  290          /*
 291  291           * check if the monitor on either vop_space or vop_setattr detected
 292  292           * a delegation conflict and if so, mark the thread flag as
 293  293           * wouldblock so that the response is dropped and the client will
 294  294           * try again.
 295  295           */
 296  296          if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
 297  297                  VN_RELE(vp);
 298  298                  curthread->t_flag |= T_WOULDBLOCK;
 299  299                  return;
 300  300          }
 301  301  
 302  302          if (!error) {
 303  303                  va.va_mask = AT_ALL;    /* get everything */
 304  304  
 305  305                  error = rfs4_delegated_getattr(vp, &va, 0, cr);
 306  306  
 307  307                  /* check for overflows */
 308  308                  if (!error) {
 309  309                          acl_perm(vp, exi, &va, cr);
 310  310                          error = vattr_to_nattr(&va, &ns->ns_attr);
 311  311                  }
 312  312          }
 313  313  
 314  314          ct.cc_flags = 0;
 315  315  
 316  316          /*
 317  317           * Force modified metadata out to stable storage.
 318  318           */
 319  319          (void) VOP_FSYNC(vp, FNODSYNC, cr, &ct);
 320  320  
 321  321          VN_RELE(vp);
 322  322  
 323  323          ns->ns_status = puterrno(error);
 324  324  }
 325  325  void *
 326  326  rfs_setattr_getfh(struct nfssaargs *args)
 327  327  {
 328  328          return (&args->saa_fh);
 329  329  }
 330  330  
 331  331  /*
 332  332   * Directory lookup.
 333  333   * Returns an fhandle and file attributes for file name in a directory.
 334  334   */
 335  335  /* ARGSUSED */
 336  336  void
 337  337  rfs_lookup(struct nfsdiropargs *da, struct nfsdiropres *dr,
 338  338      struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
 339  339  {
 340  340          int error;
 341  341          vnode_t *dvp;
 342  342          vnode_t *vp;
 343  343          struct vattr va;
 344  344          fhandle_t *fhp = da->da_fhandle;
 345  345          struct sec_ol sec = {0, 0};
 346  346          bool_t publicfh_flag = FALSE, auth_weak = FALSE;
 347  347          char *name;
 348  348          struct sockaddr *ca;
 349  349  
 350  350          /*
 351  351           * Trusted Extension doesn't support NFSv2. MOUNT
 352  352           * will reject v2 clients. Need to prevent v2 client
 353  353           * access via WebNFS here.
 354  354           */
 355  355          if (is_system_labeled() && req->rq_vers == 2) {
 356  356                  dr->dr_status = NFSERR_ACCES;
 357  357                  return;
 358  358          }
 359  359  
 360  360          /*
 361  361           * Disallow NULL paths
 362  362           */
 363  363          if (da->da_name == NULL || *da->da_name == '\0') {
 364  364                  dr->dr_status = NFSERR_ACCES;
 365  365                  return;
 366  366          }
 367  367  
 368  368          /*
 369  369           * Allow lookups from the root - the default
 370  370           * location of the public filehandle.
 371  371           */
 372  372          if (exi != NULL && (exi->exi_export.ex_flags & EX_PUBLIC)) {
 373  373                  dvp = rootdir;
 374  374                  VN_HOLD(dvp);
 375  375          } else {
 376  376                  dvp = nfs_fhtovp(fhp, exi);
 377  377                  if (dvp == NULL) {
 378  378                          dr->dr_status = NFSERR_STALE;
 379  379                          return;
 380  380                  }
 381  381          }
 382  382  
 383  383          /*
 384  384           * Not allow lookup beyond root.
 385  385           * If the filehandle matches a filehandle of the exi,
 386  386           * then the ".." refers beyond the root of an exported filesystem.
 387  387           */
 388  388          if (strcmp(da->da_name, "..") == 0 &&
 389  389              EQFID(&exi->exi_fid, (fid_t *)&fhp->fh_len)) {
 390  390                  VN_RELE(dvp);
 391  391                  dr->dr_status = NFSERR_NOENT;
 392  392                  return;
 393  393          }
 394  394  
 395  395          ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
 396  396          name = nfscmd_convname(ca, exi, da->da_name, NFSCMD_CONV_INBOUND,
 397  397              MAXPATHLEN);
 398  398  
 399  399          if (name == NULL) {
 400  400                  dr->dr_status = NFSERR_ACCES;
 401  401                  return;
 402  402          }
 403  403  
 404  404          /*
 405  405           * If the public filehandle is used then allow
 406  406           * a multi-component lookup, i.e. evaluate
 407  407           * a pathname and follow symbolic links if
 408  408           * necessary.
 409  409           *
 410  410           * This may result in a vnode in another filesystem
 411  411           * which is OK as long as the filesystem is exported.
 412  412           */
 413  413          if (PUBLIC_FH2(fhp)) {
 414  414                  publicfh_flag = TRUE;
 415  415                  error = rfs_publicfh_mclookup(name, dvp, cr, &vp, &exi,
 416  416                      &sec);
 417  417          } else {
 418  418                  /*
 419  419                   * Do a normal single component lookup.
 420  420                   */
 421  421                  error = VOP_LOOKUP(dvp, name, &vp, NULL, 0, NULL, cr,
 422  422                      NULL, NULL, NULL);
 423  423          }
 424  424  
 425  425          if (name != da->da_name)
 426  426                  kmem_free(name, MAXPATHLEN);
 427  427  
 428  428  
 429  429          if (!error) {
 430  430                  va.va_mask = AT_ALL;    /* we want everything */
 431  431  
 432  432                  error = rfs4_delegated_getattr(vp, &va, 0, cr);
 433  433  
 434  434                  /* check for overflows */
 435  435                  if (!error) {
 436  436                          acl_perm(vp, exi, &va, cr);
 437  437                          error = vattr_to_nattr(&va, &dr->dr_attr);
 438  438                          if (!error) {
 439  439                                  if (sec.sec_flags & SEC_QUERY)
 440  440                                          error = makefh_ol(&dr->dr_fhandle, exi,
 441  441                                              sec.sec_index);
 442  442                                  else {
 443  443                                          error = makefh(&dr->dr_fhandle, vp,
 444  444                                              exi);
 445  445                                          if (!error && publicfh_flag &&
 446  446                                              !chk_clnt_sec(exi, req))
 447  447                                                  auth_weak = TRUE;
 448  448                                  }
 449  449                          }
 450  450                  }
 451  451                  VN_RELE(vp);
 452  452          }
 453  453  
 454  454          VN_RELE(dvp);
 455  455  
 456  456          /*
 457  457           * If publicfh_flag is true then we have called rfs_publicfh_mclookup
 458  458           * and have obtained a new exportinfo in exi which needs to be
 459  459           * released. Note the the original exportinfo pointed to by exi
 460  460           * will be released by the caller, comon_dispatch.
 461  461           */
 462  462          if (publicfh_flag && exi != NULL)
 463  463                  exi_rele(exi);
 464  464  
 465  465          /*
 466  466           * If it's public fh, no 0x81, and client's flavor is
 467  467           * invalid, set WebNFS status to WNFSERR_CLNT_FLAVOR now.
 468  468           * Then set RPC status to AUTH_TOOWEAK in common_dispatch.
 469  469           */
 470  470          if (auth_weak)
 471  471                  dr->dr_status = (enum nfsstat)WNFSERR_CLNT_FLAVOR;
 472  472          else
 473  473                  dr->dr_status = puterrno(error);
 474  474  }
 475  475  void *
 476  476  rfs_lookup_getfh(struct nfsdiropargs *da)
 477  477  {
 478  478          return (da->da_fhandle);
 479  479  }
 480  480  
 481  481  /*
 482  482   * Read symbolic link.
 483  483   * Returns the string in the symbolic link at the given fhandle.
 484  484   */
 485  485  /* ARGSUSED */
 486  486  void
 487  487  rfs_readlink(fhandle_t *fhp, struct nfsrdlnres *rl, struct exportinfo *exi,
 488  488      struct svc_req *req, cred_t *cr, bool_t ro)
 489  489  {
 490  490          int error;
 491  491          struct iovec iov;
 492  492          struct uio uio;
 493  493          vnode_t *vp;
 494  494          struct vattr va;
 495  495          struct sockaddr *ca;
 496  496          char *name = NULL;
 497  497          int is_referral = 0;
 498  498  
 499  499          vp = nfs_fhtovp(fhp, exi);
 500  500          if (vp == NULL) {
 501  501                  rl->rl_data = NULL;
 502  502                  rl->rl_status = NFSERR_STALE;
 503  503                  return;
 504  504          }
 505  505  
 506  506          va.va_mask = AT_MODE;
 507  507  
 508  508          error = VOP_GETATTR(vp, &va, 0, cr, NULL);
 509  509  
 510  510          if (error) {
 511  511                  VN_RELE(vp);
 512  512                  rl->rl_data = NULL;
 513  513                  rl->rl_status = puterrno(error);
 514  514                  return;
 515  515          }
 516  516  
 517  517          if (MANDLOCK(vp, va.va_mode)) {
 518  518                  VN_RELE(vp);
 519  519                  rl->rl_data = NULL;
 520  520                  rl->rl_status = NFSERR_ACCES;
 521  521                  return;
 522  522          }
 523  523  
 524  524          /* We lied about the object type for a referral */
 525  525          if (vn_is_nfs_reparse(vp, cr))
 526  526                  is_referral = 1;
 527  527  
 528  528          /*
 529  529           * XNFS and RFC1094 require us to return ENXIO if argument
 530  530           * is not a link. BUGID 1138002.
 531  531           */
 532  532          if (vp->v_type != VLNK && !is_referral) {
 533  533                  VN_RELE(vp);
 534  534                  rl->rl_data = NULL;
 535  535                  rl->rl_status = NFSERR_NXIO;
 536  536                  return;
 537  537          }
 538  538  
 539  539          /*
 540  540           * Allocate data for pathname.  This will be freed by rfs_rlfree.
 541  541           */
 542  542          rl->rl_data = kmem_alloc(NFS_MAXPATHLEN, KM_SLEEP);
 543  543  
 544  544          if (is_referral) {
 545  545                  char *s;
 546  546                  size_t strsz;
 547  547  
 548  548                  /* Get an artificial symlink based on a referral */
 549  549                  s = build_symlink(vp, cr, &strsz);
 550  550                  global_svstat_ptr[2][NFS_REFERLINKS].value.ui64++;
 551  551                  DTRACE_PROBE2(nfs2serv__func__referral__reflink,
 552  552                      vnode_t *, vp, char *, s);
 553  553                  if (s == NULL)
 554  554                          error = EINVAL;
 555  555                  else {
 556  556                          error = 0;
 557  557                          (void) strlcpy(rl->rl_data, s, NFS_MAXPATHLEN);
 558  558                          rl->rl_count = (uint32_t)MIN(strsz, NFS_MAXPATHLEN);
 559  559                          kmem_free(s, strsz);
 560  560                  }
 561  561  
 562  562          } else {
 563  563  
 564  564                  /*
 565  565                   * Set up io vector to read sym link data
 566  566                   */
 567  567                  iov.iov_base = rl->rl_data;
 568  568                  iov.iov_len = NFS_MAXPATHLEN;
 569  569                  uio.uio_iov = &iov;
 570  570                  uio.uio_iovcnt = 1;
 571  571                  uio.uio_segflg = UIO_SYSSPACE;
 572  572                  uio.uio_extflg = UIO_COPY_CACHED;
 573  573                  uio.uio_loffset = (offset_t)0;
 574  574                  uio.uio_resid = NFS_MAXPATHLEN;
 575  575  
 576  576                  /*
 577  577                   * Do the readlink.
 578  578                   */
 579  579                  error = VOP_READLINK(vp, &uio, cr, NULL);
 580  580  
 581  581                  rl->rl_count = (uint32_t)(NFS_MAXPATHLEN - uio.uio_resid);
 582  582  
 583  583                  if (!error)
 584  584                          rl->rl_data[rl->rl_count] = '\0';
 585  585  
 586  586          }
 587  587  
 588  588  
 589  589          VN_RELE(vp);
 590  590  
 591  591          ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
 592  592          name = nfscmd_convname(ca, exi, rl->rl_data,
 593  593              NFSCMD_CONV_OUTBOUND, MAXPATHLEN);
 594  594  
 595  595          if (name != NULL && name != rl->rl_data) {
 596  596                  kmem_free(rl->rl_data, NFS_MAXPATHLEN);
 597  597                  rl->rl_data = name;
 598  598          }
 599  599  
 600  600          /*
 601  601           * XNFS and RFC1094 require us to return ENXIO if argument
 602  602           * is not a link. UFS returns EINVAL if this is the case,
 603  603           * so we do the mapping here. BUGID 1138002.
 604  604           */
 605  605          if (error == EINVAL)
 606  606                  rl->rl_status = NFSERR_NXIO;
 607  607          else
 608  608                  rl->rl_status = puterrno(error);
 609  609  
 610  610  }
 611  611  void *
 612  612  rfs_readlink_getfh(fhandle_t *fhp)
 613  613  {
 614  614          return (fhp);
 615  615  }
 616  616  /*
 617  617   * Free data allocated by rfs_readlink
 618  618   */
 619  619  void
 620  620  rfs_rlfree(struct nfsrdlnres *rl)
 621  621  {
 622  622          if (rl->rl_data != NULL)
 623  623                  kmem_free(rl->rl_data, NFS_MAXPATHLEN);
 624  624  }
 625  625  
 626  626  static int rdma_setup_read_data2(struct nfsreadargs *, struct nfsrdresult *);
 627  627  
 628  628  /*
 629  629   * Read data.
 630  630   * Returns some data read from the file at the given fhandle.
 631  631   */
 632  632  /* ARGSUSED */
 633  633  void
 634  634  rfs_read(struct nfsreadargs *ra, struct nfsrdresult *rr,
 635  635      struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
 636  636  {
 637  637          vnode_t *vp;
 638  638          int error;
 639  639          struct vattr va;
 640  640          struct iovec iov;
 641  641          struct uio uio;
 642  642          mblk_t *mp;
 643  643          int alloc_err = 0;
 644  644          int in_crit = 0;
 645  645          caller_context_t ct;
 646  646  
 647  647          vp = nfs_fhtovp(&ra->ra_fhandle, exi);
 648  648          if (vp == NULL) {
 649  649                  rr->rr_data = NULL;
 650  650                  rr->rr_status = NFSERR_STALE;
 651  651                  return;
 652  652          }
 653  653  
 654  654          if (vp->v_type != VREG) {
 655  655                  VN_RELE(vp);
 656  656                  rr->rr_data = NULL;
 657  657                  rr->rr_status = NFSERR_ISDIR;
 658  658                  return;
 659  659          }
 660  660  
 661  661          ct.cc_sysid = 0;
 662  662          ct.cc_pid = 0;
 663  663          ct.cc_caller_id = nfs2_srv_caller_id;
 664  664          ct.cc_flags = CC_DONTBLOCK;
 665  665  
 666  666          /*
 667  667           * Enter the critical region before calling VOP_RWLOCK
 668  668           * to avoid a deadlock with write requests.
 669  669           */
 670  670          if (nbl_need_check(vp)) {
 671  671                  nbl_start_crit(vp, RW_READER);
 672  672                  if (nbl_conflict(vp, NBL_READ, ra->ra_offset, ra->ra_count,
 673  673                      0, NULL)) {
 674  674                          nbl_end_crit(vp);
 675  675                          VN_RELE(vp);
 676  676                          rr->rr_data = NULL;
 677  677                          rr->rr_status = NFSERR_ACCES;
 678  678                          return;
 679  679                  }
 680  680                  in_crit = 1;
 681  681          }
 682  682  
 683  683          error = VOP_RWLOCK(vp, V_WRITELOCK_FALSE, &ct);
 684  684  
 685  685          /* check if a monitor detected a delegation conflict */
 686  686          if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
 687  687                  VN_RELE(vp);
 688  688                  /* mark as wouldblock so response is dropped */
 689  689                  curthread->t_flag |= T_WOULDBLOCK;
 690  690  
 691  691                  rr->rr_data = NULL;
 692  692                  return;
 693  693          }
 694  694  
 695  695          va.va_mask = AT_ALL;
 696  696  
 697  697          error = VOP_GETATTR(vp, &va, 0, cr, &ct);
 698  698  
 699  699          if (error) {
 700  700                  VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
 701  701                  if (in_crit)
 702  702                          nbl_end_crit(vp);
 703  703  
 704  704                  VN_RELE(vp);
 705  705                  rr->rr_data = NULL;
 706  706                  rr->rr_status = puterrno(error);
 707  707  
 708  708                  return;
 709  709          }
 710  710  
 711  711          /*
 712  712           * This is a kludge to allow reading of files created
 713  713           * with no read permission.  The owner of the file
 714  714           * is always allowed to read it.
 715  715           */
 716  716          if (crgetuid(cr) != va.va_uid) {
 717  717                  error = VOP_ACCESS(vp, VREAD, 0, cr, &ct);
 718  718  
 719  719                  if (error) {
 720  720                          /*
 721  721                           * Exec is the same as read over the net because
 722  722                           * of demand loading.
 723  723                           */
 724  724                          error = VOP_ACCESS(vp, VEXEC, 0, cr, &ct);
 725  725                  }
 726  726                  if (error) {
 727  727                          VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
 728  728                          if (in_crit)
 729  729                                  nbl_end_crit(vp);
 730  730                          VN_RELE(vp);
 731  731                          rr->rr_data = NULL;
 732  732                          rr->rr_status = puterrno(error);
 733  733  
 734  734                          return;
 735  735                  }
 736  736          }
 737  737  
 738  738          if (MANDLOCK(vp, va.va_mode)) {
 739  739                  VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
 740  740                  if (in_crit)
 741  741                          nbl_end_crit(vp);
 742  742  
 743  743                  VN_RELE(vp);
 744  744                  rr->rr_data = NULL;
 745  745                  rr->rr_status = NFSERR_ACCES;
 746  746  
 747  747                  return;
 748  748          }
 749  749  
 750  750          rr->rr_ok.rrok_wlist_len = 0;
 751  751          rr->rr_ok.rrok_wlist = NULL;
 752  752  
 753  753          if ((u_offset_t)ra->ra_offset >= va.va_size) {
 754  754                  rr->rr_count = 0;
 755  755                  rr->rr_data = NULL;
 756  756                  /*
 757  757                   * In this case, status is NFS_OK, but there is no data
 758  758                   * to encode. So set rr_mp to NULL.
 759  759                   */
 760  760                  rr->rr_mp = NULL;
 761  761                  rr->rr_ok.rrok_wlist = ra->ra_wlist;
 762  762                  if (rr->rr_ok.rrok_wlist)
 763  763                          clist_zero_len(rr->rr_ok.rrok_wlist);
 764  764                  goto done;
 765  765          }
 766  766  
 767  767          if (ra->ra_wlist) {
 768  768                  mp = NULL;
 769  769                  rr->rr_mp = NULL;
 770  770                  (void) rdma_get_wchunk(req, &iov, ra->ra_wlist);
 771  771                  if (ra->ra_count > iov.iov_len) {
 772  772                          rr->rr_data = NULL;
 773  773                          rr->rr_status = NFSERR_INVAL;
 774  774                          goto done;
 775  775                  }
 776  776          } else {
 777  777                  /*
 778  778                   * mp will contain the data to be sent out in the read reply.
 779  779                   * This will be freed after the reply has been sent out (by the
 780  780                   * driver).
 781  781                   * Let's roundup the data to a BYTES_PER_XDR_UNIT multiple, so
 782  782                   * that the call to xdrmblk_putmblk() never fails.
 783  783                   */
 784  784                  mp = allocb_wait(RNDUP(ra->ra_count), BPRI_MED, STR_NOSIG,
 785  785                      &alloc_err);
 786  786                  ASSERT(mp != NULL);
 787  787                  ASSERT(alloc_err == 0);
 788  788  
 789  789                  rr->rr_mp = mp;
 790  790  
 791  791                  /*
 792  792                   * Set up io vector
 793  793                   */
 794  794                  iov.iov_base = (caddr_t)mp->b_datap->db_base;
 795  795                  iov.iov_len = ra->ra_count;
 796  796          }
 797  797  
 798  798          uio.uio_iov = &iov;
 799  799          uio.uio_iovcnt = 1;
 800  800          uio.uio_segflg = UIO_SYSSPACE;
 801  801          uio.uio_extflg = UIO_COPY_CACHED;
 802  802          uio.uio_loffset = (offset_t)ra->ra_offset;
 803  803          uio.uio_resid = ra->ra_count;
 804  804  
 805  805          error = VOP_READ(vp, &uio, 0, cr, &ct);
 806  806  
 807  807          if (error) {
 808  808                  if (mp)
 809  809                          freeb(mp);
 810  810  
 811  811                  /*
 812  812                   * check if a monitor detected a delegation conflict and
 813  813                   * mark as wouldblock so response is dropped
 814  814                   */
 815  815                  if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK))
 816  816                          curthread->t_flag |= T_WOULDBLOCK;
 817  817                  else
 818  818                          rr->rr_status = puterrno(error);
 819  819  
 820  820                  VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
 821  821                  if (in_crit)
 822  822                          nbl_end_crit(vp);
 823  823  
 824  824                  VN_RELE(vp);
 825  825                  rr->rr_data = NULL;
 826  826  
 827  827                  return;
 828  828          }
 829  829  
 830  830          /*
 831  831           * Get attributes again so we can send the latest access
 832  832           * time to the client side for his cache.
 833  833           */
 834  834          va.va_mask = AT_ALL;
 835  835  
 836  836          error = VOP_GETATTR(vp, &va, 0, cr, &ct);
 837  837  
 838  838          if (error) {
 839  839                  if (mp)
 840  840                          freeb(mp);
 841  841  
 842  842                  VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
 843  843                  if (in_crit)
 844  844                          nbl_end_crit(vp);
 845  845  
 846  846                  VN_RELE(vp);
 847  847                  rr->rr_data = NULL;
 848  848                  rr->rr_status = puterrno(error);
 849  849  
 850  850                  return;
 851  851          }
 852  852  
 853  853          rr->rr_count = (uint32_t)(ra->ra_count - uio.uio_resid);
 854  854  
 855  855          if (mp) {
 856  856                  rr->rr_data = (char *)mp->b_datap->db_base;
 857  857          } else {
 858  858                  if (ra->ra_wlist) {
 859  859                          rr->rr_data = (caddr_t)iov.iov_base;
 860  860                          if (!rdma_setup_read_data2(ra, rr)) {
 861  861                                  rr->rr_data = NULL;
 862  862                                  rr->rr_status = puterrno(NFSERR_INVAL);
 863  863                          }
 864  864                  }
 865  865          }
 866  866  done:
 867  867          VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
 868  868          if (in_crit)
 869  869                  nbl_end_crit(vp);
 870  870  
 871  871          acl_perm(vp, exi, &va, cr);
 872  872  
 873  873          /* check for overflows */
 874  874          error = vattr_to_nattr(&va, &rr->rr_attr);
 875  875  
 876  876          VN_RELE(vp);
 877  877  
 878  878          rr->rr_status = puterrno(error);
 879  879  }
 880  880  
 881  881  /*
 882  882   * Free data allocated by rfs_read
 883  883   */
 884  884  void
 885  885  rfs_rdfree(struct nfsrdresult *rr)
 886  886  {
 887  887          mblk_t *mp;
 888  888  
 889  889          if (rr->rr_status == NFS_OK) {
 890  890                  mp = rr->rr_mp;
 891  891                  if (mp != NULL)
 892  892                          freeb(mp);
 893  893          }
 894  894  }
 895  895  
 896  896  void *
 897  897  rfs_read_getfh(struct nfsreadargs *ra)
 898  898  {
 899  899          return (&ra->ra_fhandle);
 900  900  }
 901  901  
 902  902  #define MAX_IOVECS      12
 903  903  
 904  904  #ifdef DEBUG
 905  905  static int rfs_write_sync_hits = 0;
 906  906  static int rfs_write_sync_misses = 0;
 907  907  #endif
 908  908  
 909  909  /*
 910  910   * Write data to file.
 911  911   * Returns attributes of a file after writing some data to it.
 912  912   *
 913  913   * Any changes made here, especially in error handling might have
 914  914   * to also be done in rfs_write (which clusters write requests).
 915  915   */
 916  916  /* ARGSUSED */
 917  917  void
 918  918  rfs_write_sync(struct nfswriteargs *wa, struct nfsattrstat *ns,
 919  919      struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
 920  920  {
 921  921          int error;
 922  922          vnode_t *vp;
 923  923          rlim64_t rlimit;
 924  924          struct vattr va;
 925  925          struct uio uio;
 926  926          struct iovec iov[MAX_IOVECS];
 927  927          mblk_t *m;
 928  928          struct iovec *iovp;
 929  929          int iovcnt;
 930  930          cred_t *savecred;
 931  931          int in_crit = 0;
 932  932          caller_context_t ct;
 933  933  
 934  934          vp = nfs_fhtovp(&wa->wa_fhandle, exi);
 935  935          if (vp == NULL) {
 936  936                  ns->ns_status = NFSERR_STALE;
 937  937                  return;
 938  938          }
 939  939  
 940  940          if (rdonly(ro, vp)) {
 941  941                  VN_RELE(vp);
 942  942                  ns->ns_status = NFSERR_ROFS;
 943  943                  return;
 944  944          }
 945  945  
 946  946          if (vp->v_type != VREG) {
 947  947                  VN_RELE(vp);
 948  948                  ns->ns_status = NFSERR_ISDIR;
 949  949                  return;
 950  950          }
 951  951  
 952  952          ct.cc_sysid = 0;
 953  953          ct.cc_pid = 0;
 954  954          ct.cc_caller_id = nfs2_srv_caller_id;
 955  955          ct.cc_flags = CC_DONTBLOCK;
 956  956  
 957  957          va.va_mask = AT_UID|AT_MODE;
 958  958  
 959  959          error = VOP_GETATTR(vp, &va, 0, cr, &ct);
 960  960  
 961  961          if (error) {
 962  962                  VN_RELE(vp);
 963  963                  ns->ns_status = puterrno(error);
 964  964  
 965  965                  return;
 966  966          }
 967  967  
 968  968          if (crgetuid(cr) != va.va_uid) {
 969  969                  /*
 970  970                   * This is a kludge to allow writes of files created
 971  971                   * with read only permission.  The owner of the file
 972  972                   * is always allowed to write it.
 973  973                   */
 974  974                  error = VOP_ACCESS(vp, VWRITE, 0, cr, &ct);
 975  975  
 976  976                  if (error) {
 977  977                          VN_RELE(vp);
 978  978                          ns->ns_status = puterrno(error);
 979  979                          return;
 980  980                  }
 981  981          }
 982  982  
 983  983          /*
 984  984           * Can't access a mandatory lock file.  This might cause
 985  985           * the NFS service thread to block forever waiting for a
 986  986           * lock to be released that will never be released.
 987  987           */
 988  988          if (MANDLOCK(vp, va.va_mode)) {
 989  989                  VN_RELE(vp);
 990  990                  ns->ns_status = NFSERR_ACCES;
 991  991                  return;
 992  992          }
 993  993  
 994  994          /*
 995  995           * We have to enter the critical region before calling VOP_RWLOCK
 996  996           * to avoid a deadlock with ufs.
 997  997           */
 998  998          if (nbl_need_check(vp)) {
 999  999                  nbl_start_crit(vp, RW_READER);
1000 1000                  in_crit = 1;
1001 1001                  if (nbl_conflict(vp, NBL_WRITE, wa->wa_offset,
1002 1002                      wa->wa_count, 0, NULL)) {
1003 1003                          error = EACCES;
1004 1004                          goto out;
1005 1005                  }
1006 1006          }
1007 1007  
1008 1008          error = VOP_RWLOCK(vp, V_WRITELOCK_TRUE, &ct);
1009 1009  
1010 1010          /* check if a monitor detected a delegation conflict */
1011 1011          if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
1012 1012                  VN_RELE(vp);
1013 1013                  /* mark as wouldblock so response is dropped */
1014 1014                  curthread->t_flag |= T_WOULDBLOCK;
1015 1015                  return;
1016 1016          }
1017 1017  
1018 1018          if (wa->wa_data || wa->wa_rlist) {
1019 1019                  /* Do the RDMA thing if necessary */
1020 1020                  if (wa->wa_rlist) {
1021 1021                          iov[0].iov_base = (char *)((wa->wa_rlist)->u.c_daddr3);
1022 1022                          iov[0].iov_len = wa->wa_count;
1023 1023                  } else  {
1024 1024                          iov[0].iov_base = wa->wa_data;
1025 1025                          iov[0].iov_len = wa->wa_count;
1026 1026                  }
1027 1027                  uio.uio_iov = iov;
1028 1028                  uio.uio_iovcnt = 1;
1029 1029                  uio.uio_segflg = UIO_SYSSPACE;
1030 1030                  uio.uio_extflg = UIO_COPY_DEFAULT;
1031 1031                  uio.uio_loffset = (offset_t)wa->wa_offset;
1032 1032                  uio.uio_resid = wa->wa_count;
1033 1033                  /*
1034 1034                   * The limit is checked on the client. We
1035 1035                   * should allow any size writes here.
1036 1036                   */
1037 1037                  uio.uio_llimit = curproc->p_fsz_ctl;
1038 1038                  rlimit = uio.uio_llimit - wa->wa_offset;
1039 1039                  if (rlimit < (rlim64_t)uio.uio_resid)
1040 1040                          uio.uio_resid = (uint_t)rlimit;
1041 1041  
1042 1042                  /*
1043 1043                   * for now we assume no append mode
1044 1044                   */
1045 1045                  /*
1046 1046                   * We're changing creds because VM may fault and we need
1047 1047                   * the cred of the current thread to be used if quota
1048 1048                   * checking is enabled.
1049 1049                   */
1050 1050                  savecred = curthread->t_cred;
1051 1051                  curthread->t_cred = cr;
1052 1052                  error = VOP_WRITE(vp, &uio, FSYNC, cr, &ct);
1053 1053                  curthread->t_cred = savecred;
1054 1054          } else {
1055 1055                  iovcnt = 0;
1056 1056                  for (m = wa->wa_mblk; m != NULL; m = m->b_cont)
1057 1057                          iovcnt++;
1058 1058                  if (iovcnt <= MAX_IOVECS) {
1059 1059  #ifdef DEBUG
1060 1060                          rfs_write_sync_hits++;
1061 1061  #endif
1062 1062                          iovp = iov;
1063 1063                  } else {
1064 1064  #ifdef DEBUG
1065 1065                          rfs_write_sync_misses++;
1066 1066  #endif
1067 1067                          iovp = kmem_alloc(sizeof (*iovp) * iovcnt, KM_SLEEP);
1068 1068                  }
1069 1069                  mblk_to_iov(wa->wa_mblk, iovcnt, iovp);
1070 1070                  uio.uio_iov = iovp;
1071 1071                  uio.uio_iovcnt = iovcnt;
1072 1072                  uio.uio_segflg = UIO_SYSSPACE;
1073 1073                  uio.uio_extflg = UIO_COPY_DEFAULT;
1074 1074                  uio.uio_loffset = (offset_t)wa->wa_offset;
1075 1075                  uio.uio_resid = wa->wa_count;
1076 1076                  /*
1077 1077                   * The limit is checked on the client. We
1078 1078                   * should allow any size writes here.
1079 1079                   */
1080 1080                  uio.uio_llimit = curproc->p_fsz_ctl;
1081 1081                  rlimit = uio.uio_llimit - wa->wa_offset;
1082 1082                  if (rlimit < (rlim64_t)uio.uio_resid)
1083 1083                          uio.uio_resid = (uint_t)rlimit;
1084 1084  
1085 1085                  /*
1086 1086                   * For now we assume no append mode.
1087 1087                   */
1088 1088                  /*
1089 1089                   * We're changing creds because VM may fault and we need
1090 1090                   * the cred of the current thread to be used if quota
1091 1091                   * checking is enabled.
1092 1092                   */
1093 1093                  savecred = curthread->t_cred;
1094 1094                  curthread->t_cred = cr;
1095 1095                  error = VOP_WRITE(vp, &uio, FSYNC, cr, &ct);
1096 1096                  curthread->t_cred = savecred;
1097 1097  
1098 1098                  if (iovp != iov)
1099 1099                          kmem_free(iovp, sizeof (*iovp) * iovcnt);
1100 1100          }
1101 1101  
1102 1102          VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, &ct);
1103 1103  
1104 1104          if (!error) {
1105 1105                  /*
1106 1106                   * Get attributes again so we send the latest mod
1107 1107                   * time to the client side for his cache.
1108 1108                   */
1109 1109                  va.va_mask = AT_ALL;    /* now we want everything */
1110 1110  
1111 1111                  error = VOP_GETATTR(vp, &va, 0, cr, &ct);
1112 1112  
1113 1113                  /* check for overflows */
1114 1114                  if (!error) {
1115 1115                          acl_perm(vp, exi, &va, cr);
1116 1116                          error = vattr_to_nattr(&va, &ns->ns_attr);
1117 1117                  }
1118 1118          }
1119 1119  
1120 1120  out:
1121 1121          if (in_crit)
1122 1122                  nbl_end_crit(vp);
1123 1123          VN_RELE(vp);
1124 1124  
1125 1125          /* check if a monitor detected a delegation conflict */
1126 1126          if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK))
1127 1127                  /* mark as wouldblock so response is dropped */
1128 1128                  curthread->t_flag |= T_WOULDBLOCK;
1129 1129          else
1130 1130                  ns->ns_status = puterrno(error);
1131 1131  
1132 1132  }
1133 1133  
1134 1134  struct rfs_async_write {
1135 1135          struct nfswriteargs *wa;
1136 1136          struct nfsattrstat *ns;
1137 1137          struct svc_req *req;
1138 1138          cred_t *cr;
1139 1139          bool_t ro;
1140 1140          kthread_t *thread;
1141 1141          struct rfs_async_write *list;
1142 1142  };
1143 1143  
1144 1144  struct rfs_async_write_list {
1145 1145          fhandle_t *fhp;
1146 1146          kcondvar_t cv;
1147 1147          struct rfs_async_write *list;
1148 1148          struct rfs_async_write_list *next;
1149 1149  };
1150 1150  
1151 1151  static struct rfs_async_write_list *rfs_async_write_head = NULL;
1152 1152  static kmutex_t rfs_async_write_lock;
1153 1153  static int rfs_write_async = 1; /* enables write clustering if == 1 */
1154 1154  
1155 1155  #define MAXCLIOVECS     42
1156 1156  #define RFSWRITE_INITVAL (enum nfsstat) -1
1157 1157  
1158 1158  #ifdef DEBUG
1159 1159  static int rfs_write_hits = 0;
1160 1160  static int rfs_write_misses = 0;
1161 1161  #endif
1162 1162  
1163 1163  /*
1164 1164   * Write data to file.
1165 1165   * Returns attributes of a file after writing some data to it.
1166 1166   */
1167 1167  void
1168 1168  rfs_write(struct nfswriteargs *wa, struct nfsattrstat *ns,
1169 1169      struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
1170 1170  {
1171 1171          int error;
1172 1172          vnode_t *vp;
1173 1173          rlim64_t rlimit;
1174 1174          struct vattr va;
1175 1175          struct uio uio;
1176 1176          struct rfs_async_write_list *lp;
1177 1177          struct rfs_async_write_list *nlp;
1178 1178          struct rfs_async_write *rp;
1179 1179          struct rfs_async_write *nrp;
1180 1180          struct rfs_async_write *trp;
1181 1181          struct rfs_async_write *lrp;
1182 1182          int data_written;
1183 1183          int iovcnt;
1184 1184          mblk_t *m;
1185 1185          struct iovec *iovp;
1186 1186          struct iovec *niovp;
1187 1187          struct iovec iov[MAXCLIOVECS];
1188 1188          int count;
1189 1189          int rcount;
1190 1190          uint_t off;
1191 1191          uint_t len;
1192 1192          struct rfs_async_write nrpsp;
1193 1193          struct rfs_async_write_list nlpsp;
1194 1194          ushort_t t_flag;
1195 1195          cred_t *savecred;
1196 1196          int in_crit = 0;
1197 1197          caller_context_t ct;
1198 1198  
1199 1199          if (!rfs_write_async) {
1200 1200                  rfs_write_sync(wa, ns, exi, req, cr, ro);
1201 1201                  return;
1202 1202          }
1203 1203  
1204 1204          /*
1205 1205           * Initialize status to RFSWRITE_INITVAL instead of 0, since value of 0
1206 1206           * is considered an OK.
1207 1207           */
1208 1208          ns->ns_status = RFSWRITE_INITVAL;
1209 1209  
1210 1210          nrp = &nrpsp;
1211 1211          nrp->wa = wa;
1212 1212          nrp->ns = ns;
1213 1213          nrp->req = req;
1214 1214          nrp->cr = cr;
1215 1215          nrp->ro = ro;
1216 1216          nrp->thread = curthread;
1217 1217  
1218 1218          ASSERT(curthread->t_schedflag & TS_DONT_SWAP);
1219 1219  
1220 1220          /*
1221 1221           * Look to see if there is already a cluster started
1222 1222           * for this file.
1223 1223           */
1224 1224          mutex_enter(&rfs_async_write_lock);
1225 1225          for (lp = rfs_async_write_head; lp != NULL; lp = lp->next) {
1226 1226                  if (bcmp(&wa->wa_fhandle, lp->fhp,
1227 1227                      sizeof (fhandle_t)) == 0)
1228 1228                          break;
1229 1229          }
1230 1230  
1231 1231          /*
1232 1232           * If lp is non-NULL, then there is already a cluster
1233 1233           * started.  We need to place ourselves in the cluster
1234 1234           * list in the right place as determined by starting
1235 1235           * offset.  Conflicts with non-blocking mandatory locked
1236 1236           * regions will be checked when the cluster is processed.
1237 1237           */
1238 1238          if (lp != NULL) {
1239 1239                  rp = lp->list;
1240 1240                  trp = NULL;
1241 1241                  while (rp != NULL && rp->wa->wa_offset < wa->wa_offset) {
1242 1242                          trp = rp;
1243 1243                          rp = rp->list;
1244 1244                  }
1245 1245                  nrp->list = rp;
1246 1246                  if (trp == NULL)
1247 1247                          lp->list = nrp;
1248 1248                  else
1249 1249                          trp->list = nrp;
1250 1250                  while (nrp->ns->ns_status == RFSWRITE_INITVAL)
1251 1251                          cv_wait(&lp->cv, &rfs_async_write_lock);
1252 1252                  mutex_exit(&rfs_async_write_lock);
1253 1253  
1254 1254                  return;
1255 1255          }
1256 1256  
1257 1257          /*
1258 1258           * No cluster started yet, start one and add ourselves
1259 1259           * to the list of clusters.
1260 1260           */
1261 1261          nrp->list = NULL;
1262 1262  
1263 1263          nlp = &nlpsp;
1264 1264          nlp->fhp = &wa->wa_fhandle;
1265 1265          cv_init(&nlp->cv, NULL, CV_DEFAULT, NULL);
1266 1266          nlp->list = nrp;
1267 1267          nlp->next = NULL;
1268 1268  
1269 1269          if (rfs_async_write_head == NULL) {
1270 1270                  rfs_async_write_head = nlp;
1271 1271          } else {
1272 1272                  lp = rfs_async_write_head;
1273 1273                  while (lp->next != NULL)
1274 1274                          lp = lp->next;
1275 1275                  lp->next = nlp;
1276 1276          }
1277 1277          mutex_exit(&rfs_async_write_lock);
1278 1278  
1279 1279          /*
1280 1280           * Convert the file handle common to all of the requests
1281 1281           * in this cluster to a vnode.
1282 1282           */
1283 1283          vp = nfs_fhtovp(&wa->wa_fhandle, exi);
1284 1284          if (vp == NULL) {
1285 1285                  mutex_enter(&rfs_async_write_lock);
1286 1286                  if (rfs_async_write_head == nlp)
1287 1287                          rfs_async_write_head = nlp->next;
1288 1288                  else {
1289 1289                          lp = rfs_async_write_head;
1290 1290                          while (lp->next != nlp)
1291 1291                                  lp = lp->next;
1292 1292                          lp->next = nlp->next;
1293 1293                  }
1294 1294                  t_flag = curthread->t_flag & T_WOULDBLOCK;
1295 1295                  for (rp = nlp->list; rp != NULL; rp = rp->list) {
1296 1296                          rp->ns->ns_status = NFSERR_STALE;
1297 1297                          rp->thread->t_flag |= t_flag;
1298 1298                  }
1299 1299                  cv_broadcast(&nlp->cv);
1300 1300                  mutex_exit(&rfs_async_write_lock);
1301 1301  
1302 1302                  return;
1303 1303          }
1304 1304  
1305 1305          /*
1306 1306           * Can only write regular files.  Attempts to write any
1307 1307           * other file types fail with EISDIR.
1308 1308           */
1309 1309          if (vp->v_type != VREG) {
1310 1310                  VN_RELE(vp);
1311 1311                  mutex_enter(&rfs_async_write_lock);
1312 1312                  if (rfs_async_write_head == nlp)
1313 1313                          rfs_async_write_head = nlp->next;
1314 1314                  else {
1315 1315                          lp = rfs_async_write_head;
1316 1316                          while (lp->next != nlp)
1317 1317                                  lp = lp->next;
1318 1318                          lp->next = nlp->next;
1319 1319                  }
1320 1320                  t_flag = curthread->t_flag & T_WOULDBLOCK;
1321 1321                  for (rp = nlp->list; rp != NULL; rp = rp->list) {
1322 1322                          rp->ns->ns_status = NFSERR_ISDIR;
1323 1323                          rp->thread->t_flag |= t_flag;
1324 1324                  }
1325 1325                  cv_broadcast(&nlp->cv);
1326 1326                  mutex_exit(&rfs_async_write_lock);
1327 1327  
1328 1328                  return;
1329 1329          }
1330 1330  
1331 1331          /*
1332 1332           * Enter the critical region before calling VOP_RWLOCK, to avoid a
1333 1333           * deadlock with ufs.
1334 1334           */
1335 1335          if (nbl_need_check(vp)) {
1336 1336                  nbl_start_crit(vp, RW_READER);
1337 1337                  in_crit = 1;
1338 1338          }
1339 1339  
1340 1340          ct.cc_sysid = 0;
1341 1341          ct.cc_pid = 0;
1342 1342          ct.cc_caller_id = nfs2_srv_caller_id;
1343 1343          ct.cc_flags = CC_DONTBLOCK;
1344 1344  
1345 1345          /*
1346 1346           * Lock the file for writing.  This operation provides
1347 1347           * the delay which allows clusters to grow.
1348 1348           */
1349 1349          error = VOP_RWLOCK(vp, V_WRITELOCK_TRUE, &ct);
1350 1350  
1351 1351          /* check if a monitor detected a delegation conflict */
1352 1352          if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
1353 1353                  if (in_crit)
1354 1354                          nbl_end_crit(vp);
1355 1355                  VN_RELE(vp);
1356 1356                  /* mark as wouldblock so response is dropped */
1357 1357                  curthread->t_flag |= T_WOULDBLOCK;
1358 1358                  mutex_enter(&rfs_async_write_lock);
1359 1359                  if (rfs_async_write_head == nlp)
1360 1360                          rfs_async_write_head = nlp->next;
1361 1361                  else {
1362 1362                          lp = rfs_async_write_head;
1363 1363                          while (lp->next != nlp)
1364 1364                                  lp = lp->next;
1365 1365                          lp->next = nlp->next;
1366 1366                  }
1367 1367                  for (rp = nlp->list; rp != NULL; rp = rp->list) {
1368 1368                          if (rp->ns->ns_status == RFSWRITE_INITVAL) {
1369 1369                                  rp->ns->ns_status = puterrno(error);
1370 1370                                  rp->thread->t_flag |= T_WOULDBLOCK;
1371 1371                          }
1372 1372                  }
1373 1373                  cv_broadcast(&nlp->cv);
1374 1374                  mutex_exit(&rfs_async_write_lock);
1375 1375  
1376 1376                  return;
1377 1377          }
1378 1378  
1379 1379          /*
1380 1380           * Disconnect this cluster from the list of clusters.
1381 1381           * The cluster that is being dealt with must be fixed
1382 1382           * in size after this point, so there is no reason
1383 1383           * to leave it on the list so that new requests can
1384 1384           * find it.
1385 1385           *
1386 1386           * The algorithm is that the first write request will
1387 1387           * create a cluster, convert the file handle to a
1388 1388           * vnode pointer, and then lock the file for writing.
1389 1389           * This request is not likely to be clustered with
1390 1390           * any others.  However, the next request will create
1391 1391           * a new cluster and be blocked in VOP_RWLOCK while
1392 1392           * the first request is being processed.  This delay
1393 1393           * will allow more requests to be clustered in this
1394 1394           * second cluster.
1395 1395           */
1396 1396          mutex_enter(&rfs_async_write_lock);
1397 1397          if (rfs_async_write_head == nlp)
1398 1398                  rfs_async_write_head = nlp->next;
1399 1399          else {
1400 1400                  lp = rfs_async_write_head;
1401 1401                  while (lp->next != nlp)
1402 1402                          lp = lp->next;
1403 1403                  lp->next = nlp->next;
1404 1404          }
1405 1405          mutex_exit(&rfs_async_write_lock);
1406 1406  
1407 1407          /*
1408 1408           * Step through the list of requests in this cluster.
1409 1409           * We need to check permissions to make sure that all
1410 1410           * of the requests have sufficient permission to write
1411 1411           * the file.  A cluster can be composed of requests
1412 1412           * from different clients and different users on each
1413 1413           * client.
1414 1414           *
1415 1415           * As a side effect, we also calculate the size of the
1416 1416           * byte range that this cluster encompasses.
1417 1417           */
1418 1418          rp = nlp->list;
1419 1419          off = rp->wa->wa_offset;
1420 1420          len = (uint_t)0;
1421 1421          do {
1422 1422                  if (rdonly(rp->ro, vp)) {
1423 1423                          rp->ns->ns_status = NFSERR_ROFS;
1424 1424                          t_flag = curthread->t_flag & T_WOULDBLOCK;
1425 1425                          rp->thread->t_flag |= t_flag;
1426 1426                          continue;
1427 1427                  }
1428 1428  
1429 1429                  va.va_mask = AT_UID|AT_MODE;
1430 1430  
1431 1431                  error = VOP_GETATTR(vp, &va, 0, rp->cr, &ct);
1432 1432  
1433 1433                  if (!error) {
1434 1434                          if (crgetuid(rp->cr) != va.va_uid) {
1435 1435                                  /*
1436 1436                                   * This is a kludge to allow writes of files
1437 1437                                   * created with read only permission.  The
1438 1438                                   * owner of the file is always allowed to
1439 1439                                   * write it.
1440 1440                                   */
1441 1441                                  error = VOP_ACCESS(vp, VWRITE, 0, rp->cr, &ct);
1442 1442                          }
1443 1443                          if (!error && MANDLOCK(vp, va.va_mode))
1444 1444                                  error = EACCES;
1445 1445                  }
1446 1446  
1447 1447                  /*
1448 1448                   * Check for a conflict with a nbmand-locked region.
1449 1449                   */
1450 1450                  if (in_crit && nbl_conflict(vp, NBL_WRITE, rp->wa->wa_offset,
1451 1451                      rp->wa->wa_count, 0, NULL)) {
1452 1452                          error = EACCES;
1453 1453                  }
1454 1454  
1455 1455                  if (error) {
1456 1456                          rp->ns->ns_status = puterrno(error);
1457 1457                          t_flag = curthread->t_flag & T_WOULDBLOCK;
1458 1458                          rp->thread->t_flag |= t_flag;
1459 1459                          continue;
1460 1460                  }
1461 1461                  if (len < rp->wa->wa_offset + rp->wa->wa_count - off)
1462 1462                          len = rp->wa->wa_offset + rp->wa->wa_count - off;
1463 1463          } while ((rp = rp->list) != NULL);
1464 1464  
1465 1465          /*
1466 1466           * Step through the cluster attempting to gather as many
1467 1467           * requests which are contiguous as possible.  These
1468 1468           * contiguous requests are handled via one call to VOP_WRITE
1469 1469           * instead of different calls to VOP_WRITE.  We also keep
1470 1470           * track of the fact that any data was written.
1471 1471           */
1472 1472          rp = nlp->list;
1473 1473          data_written = 0;
1474 1474          do {
1475 1475                  /*
1476 1476                   * Skip any requests which are already marked as having an
1477 1477                   * error.
1478 1478                   */
1479 1479                  if (rp->ns->ns_status != RFSWRITE_INITVAL) {
1480 1480                          rp = rp->list;
1481 1481                          continue;
1482 1482                  }
1483 1483  
1484 1484                  /*
1485 1485                   * Count the number of iovec's which are required
1486 1486                   * to handle this set of requests.  One iovec is
1487 1487                   * needed for each data buffer, whether addressed
1488 1488                   * by wa_data or by the b_rptr pointers in the
1489 1489                   * mblk chains.
1490 1490                   */
1491 1491                  iovcnt = 0;
1492 1492                  lrp = rp;
1493 1493                  for (;;) {
1494 1494                          if (lrp->wa->wa_data || lrp->wa->wa_rlist)
1495 1495                                  iovcnt++;
1496 1496                          else {
1497 1497                                  m = lrp->wa->wa_mblk;
1498 1498                                  while (m != NULL) {
1499 1499                                          iovcnt++;
1500 1500                                          m = m->b_cont;
1501 1501                                  }
1502 1502                          }
1503 1503                          if (lrp->list == NULL ||
1504 1504                              lrp->list->ns->ns_status != RFSWRITE_INITVAL ||
1505 1505                              lrp->wa->wa_offset + lrp->wa->wa_count !=
1506 1506                              lrp->list->wa->wa_offset) {
1507 1507                                  lrp = lrp->list;
1508 1508                                  break;
1509 1509                          }
1510 1510                          lrp = lrp->list;
1511 1511                  }
1512 1512  
1513 1513                  if (iovcnt <= MAXCLIOVECS) {
1514 1514  #ifdef DEBUG
1515 1515                          rfs_write_hits++;
1516 1516  #endif
1517 1517                          niovp = iov;
1518 1518                  } else {
1519 1519  #ifdef DEBUG
1520 1520                          rfs_write_misses++;
1521 1521  #endif
1522 1522                          niovp = kmem_alloc(sizeof (*niovp) * iovcnt, KM_SLEEP);
1523 1523                  }
1524 1524                  /*
1525 1525                   * Put together the scatter/gather iovecs.
1526 1526                   */
1527 1527                  iovp = niovp;
1528 1528                  trp = rp;
1529 1529                  count = 0;
1530 1530                  do {
1531 1531                          if (trp->wa->wa_data || trp->wa->wa_rlist) {
1532 1532                                  if (trp->wa->wa_rlist) {
1533 1533                                          iovp->iov_base =
1534 1534                                              (char *)((trp->wa->wa_rlist)->
1535 1535                                              u.c_daddr3);
1536 1536                                          iovp->iov_len = trp->wa->wa_count;
1537 1537                                  } else  {
1538 1538                                          iovp->iov_base = trp->wa->wa_data;
1539 1539                                          iovp->iov_len = trp->wa->wa_count;
1540 1540                                  }
1541 1541                                  iovp++;
1542 1542                          } else {
1543 1543                                  m = trp->wa->wa_mblk;
1544 1544                                  rcount = trp->wa->wa_count;
1545 1545                                  while (m != NULL) {
1546 1546                                          iovp->iov_base = (caddr_t)m->b_rptr;
1547 1547                                          iovp->iov_len = (m->b_wptr - m->b_rptr);
1548 1548                                          rcount -= iovp->iov_len;
1549 1549                                          if (rcount < 0)
1550 1550                                                  iovp->iov_len += rcount;
1551 1551                                          iovp++;
1552 1552                                          if (rcount <= 0)
1553 1553                                                  break;
1554 1554                                          m = m->b_cont;
1555 1555                                  }
1556 1556                          }
1557 1557                          count += trp->wa->wa_count;
1558 1558                          trp = trp->list;
1559 1559                  } while (trp != lrp);
1560 1560  
1561 1561                  uio.uio_iov = niovp;
1562 1562                  uio.uio_iovcnt = iovcnt;
1563 1563                  uio.uio_segflg = UIO_SYSSPACE;
1564 1564                  uio.uio_extflg = UIO_COPY_DEFAULT;
1565 1565                  uio.uio_loffset = (offset_t)rp->wa->wa_offset;
1566 1566                  uio.uio_resid = count;
1567 1567                  /*
1568 1568                   * The limit is checked on the client. We
1569 1569                   * should allow any size writes here.
1570 1570                   */
1571 1571                  uio.uio_llimit = curproc->p_fsz_ctl;
1572 1572                  rlimit = uio.uio_llimit - rp->wa->wa_offset;
1573 1573                  if (rlimit < (rlim64_t)uio.uio_resid)
1574 1574                          uio.uio_resid = (uint_t)rlimit;
1575 1575  
1576 1576                  /*
1577 1577                   * For now we assume no append mode.
1578 1578                   */
1579 1579  
1580 1580                  /*
1581 1581                   * We're changing creds because VM may fault
1582 1582                   * and we need the cred of the current
1583 1583                   * thread to be used if quota * checking is
1584 1584                   * enabled.
1585 1585                   */
1586 1586                  savecred = curthread->t_cred;
1587 1587                  curthread->t_cred = cr;
1588 1588                  error = VOP_WRITE(vp, &uio, 0, rp->cr, &ct);
1589 1589                  curthread->t_cred = savecred;
1590 1590  
1591 1591                  /* check if a monitor detected a delegation conflict */
1592 1592                  if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK))
1593 1593                          /* mark as wouldblock so response is dropped */
1594 1594                          curthread->t_flag |= T_WOULDBLOCK;
1595 1595  
1596 1596                  if (niovp != iov)
1597 1597                          kmem_free(niovp, sizeof (*niovp) * iovcnt);
1598 1598  
1599 1599                  if (!error) {
1600 1600                          data_written = 1;
1601 1601                          /*
1602 1602                           * Get attributes again so we send the latest mod
1603 1603                           * time to the client side for his cache.
1604 1604                           */
1605 1605                          va.va_mask = AT_ALL;    /* now we want everything */
1606 1606  
1607 1607                          error = VOP_GETATTR(vp, &va, 0, rp->cr, &ct);
1608 1608  
1609 1609                          if (!error)
1610 1610                                  acl_perm(vp, exi, &va, rp->cr);
1611 1611                  }
1612 1612  
1613 1613                  /*
1614 1614                   * Fill in the status responses for each request
1615 1615                   * which was just handled.  Also, copy the latest
1616 1616                   * attributes in to the attribute responses if
1617 1617                   * appropriate.
1618 1618                   */
1619 1619                  t_flag = curthread->t_flag & T_WOULDBLOCK;
1620 1620                  do {
1621 1621                          rp->thread->t_flag |= t_flag;
1622 1622                          /* check for overflows */
1623 1623                          if (!error) {
1624 1624                                  error  = vattr_to_nattr(&va, &rp->ns->ns_attr);
1625 1625                          }
1626 1626                          rp->ns->ns_status = puterrno(error);
1627 1627                          rp = rp->list;
1628 1628                  } while (rp != lrp);
1629 1629          } while (rp != NULL);
1630 1630  
1631 1631          /*
1632 1632           * If any data was written at all, then we need to flush
1633 1633           * the data and metadata to stable storage.
1634 1634           */
1635 1635          if (data_written) {
1636 1636                  error = VOP_PUTPAGE(vp, (u_offset_t)off, len, 0, cr, &ct);
1637 1637  
1638 1638                  if (!error) {
1639 1639                          error = VOP_FSYNC(vp, FNODSYNC, cr, &ct);
1640 1640                  }
1641 1641          }
1642 1642  
1643 1643          VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, &ct);
1644 1644  
1645 1645          if (in_crit)
1646 1646                  nbl_end_crit(vp);
1647 1647          VN_RELE(vp);
1648 1648  
1649 1649          t_flag = curthread->t_flag & T_WOULDBLOCK;
1650 1650          mutex_enter(&rfs_async_write_lock);
1651 1651          for (rp = nlp->list; rp != NULL; rp = rp->list) {
1652 1652                  if (rp->ns->ns_status == RFSWRITE_INITVAL) {
1653 1653                          rp->ns->ns_status = puterrno(error);
1654 1654                          rp->thread->t_flag |= t_flag;
1655 1655                  }
1656 1656          }
1657 1657          cv_broadcast(&nlp->cv);
1658 1658          mutex_exit(&rfs_async_write_lock);
1659 1659  
1660 1660  }
1661 1661  
1662 1662  void *
1663 1663  rfs_write_getfh(struct nfswriteargs *wa)
1664 1664  {
1665 1665          return (&wa->wa_fhandle);
1666 1666  }
1667 1667  
1668 1668  /*
1669 1669   * Create a file.
1670 1670   * Creates a file with given attributes and returns those attributes
1671 1671   * and an fhandle for the new file.
1672 1672   */
1673 1673  void
1674 1674  rfs_create(struct nfscreatargs *args, struct nfsdiropres *dr,
1675 1675      struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
1676 1676  {
1677 1677          int error;
1678 1678          int lookuperr;
1679 1679          int in_crit = 0;
1680 1680          struct vattr va;
1681 1681          vnode_t *vp;
1682 1682          vnode_t *realvp;
1683 1683          vnode_t *dvp;
1684 1684          char *name = args->ca_da.da_name;
1685 1685          vnode_t *tvp = NULL;
1686 1686          int mode;
1687 1687          int lookup_ok;
1688 1688          bool_t trunc;
1689 1689          struct sockaddr *ca;
1690 1690  
1691 1691          /*
1692 1692           * Disallow NULL paths
1693 1693           */
1694 1694          if (name == NULL || *name == '\0') {
1695 1695                  dr->dr_status = NFSERR_ACCES;
1696 1696                  return;
1697 1697          }
1698 1698  
1699 1699          dvp = nfs_fhtovp(args->ca_da.da_fhandle, exi);
1700 1700          if (dvp == NULL) {
1701 1701                  dr->dr_status = NFSERR_STALE;
1702 1702                  return;
1703 1703          }
1704 1704  
1705 1705          error = sattr_to_vattr(args->ca_sa, &va);
1706 1706          if (error) {
1707 1707                  dr->dr_status = puterrno(error);
1708 1708                  return;
1709 1709          }
1710 1710  
1711 1711          /*
1712 1712           * Must specify the mode.
1713 1713           */
1714 1714          if (!(va.va_mask & AT_MODE)) {
1715 1715                  VN_RELE(dvp);
1716 1716                  dr->dr_status = NFSERR_INVAL;
1717 1717                  return;
1718 1718          }
1719 1719  
1720 1720          /*
1721 1721           * This is a completely gross hack to make mknod
1722 1722           * work over the wire until we can wack the protocol
1723 1723           */
1724 1724          if ((va.va_mode & IFMT) == IFCHR) {
1725 1725                  if (args->ca_sa->sa_size == (uint_t)NFS_FIFO_DEV)
1726 1726                          va.va_type = VFIFO;     /* xtra kludge for named pipe */
1727 1727                  else {
1728 1728                          va.va_type = VCHR;
1729 1729                          /*
1730 1730                           * uncompress the received dev_t
1731 1731                           * if the top half is zero indicating a request
1732 1732                           * from an `older style' OS.
1733 1733                           */
1734 1734                          if ((va.va_size & 0xffff0000) == 0)
1735 1735                                  va.va_rdev = nfsv2_expdev(va.va_size);
1736 1736                          else
1737 1737                                  va.va_rdev = (dev_t)va.va_size;
1738 1738                  }
1739 1739                  va.va_mask &= ~AT_SIZE;
1740 1740          } else if ((va.va_mode & IFMT) == IFBLK) {
1741 1741                  va.va_type = VBLK;
1742 1742                  /*
1743 1743                   * uncompress the received dev_t
1744 1744                   * if the top half is zero indicating a request
1745 1745                   * from an `older style' OS.
1746 1746                   */
1747 1747                  if ((va.va_size & 0xffff0000) == 0)
1748 1748                          va.va_rdev = nfsv2_expdev(va.va_size);
1749 1749                  else
1750 1750                          va.va_rdev = (dev_t)va.va_size;
1751 1751                  va.va_mask &= ~AT_SIZE;
1752 1752          } else if ((va.va_mode & IFMT) == IFSOCK) {
1753 1753                  va.va_type = VSOCK;
1754 1754          } else {
1755 1755                  va.va_type = VREG;
1756 1756          }
1757 1757          va.va_mode &= ~IFMT;
1758 1758          va.va_mask |= AT_TYPE;
1759 1759  
1760 1760          ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
1761 1761          name = nfscmd_convname(ca, exi, name, NFSCMD_CONV_INBOUND,
1762 1762              MAXPATHLEN);
1763 1763          if (name == NULL) {
1764 1764                  dr->dr_status = puterrno(EINVAL);
1765 1765                  return;
1766 1766          }
1767 1767  
1768 1768          /*
1769 1769           * Why was the choice made to use VWRITE as the mode to the
1770 1770           * call to VOP_CREATE ? This results in a bug.  When a client
1771 1771           * opens a file that already exists and is RDONLY, the second
1772 1772           * open fails with an EACESS because of the mode.
1773 1773           * bug ID 1054648.
1774 1774           */
1775 1775          lookup_ok = 0;
1776 1776          mode = VWRITE;
1777 1777          if (!(va.va_mask & AT_SIZE) || va.va_type != VREG) {
1778 1778                  error = VOP_LOOKUP(dvp, name, &tvp, NULL, 0, NULL, cr,
1779 1779                      NULL, NULL, NULL);
1780 1780                  if (!error) {
1781 1781                          struct vattr at;
1782 1782  
1783 1783                          lookup_ok = 1;
1784 1784                          at.va_mask = AT_MODE;
1785 1785                          error = VOP_GETATTR(tvp, &at, 0, cr, NULL);
1786 1786                          if (!error)
1787 1787                                  mode = (at.va_mode & S_IWUSR) ? VWRITE : VREAD;
1788 1788                          VN_RELE(tvp);
1789 1789                          tvp = NULL;
1790 1790                  }
1791 1791          }
1792 1792  
1793 1793          if (!lookup_ok) {
1794 1794                  if (rdonly(ro, dvp)) {
1795 1795                          error = EROFS;
1796 1796                  } else if (va.va_type != VREG && va.va_type != VFIFO &&
1797 1797                      va.va_type != VSOCK && secpolicy_sys_devices(cr) != 0) {
1798 1798                          error = EPERM;
1799 1799                  } else {
1800 1800                          error = 0;
1801 1801                  }
1802 1802          }
1803 1803  
1804 1804          /*
1805 1805           * If file size is being modified on an already existing file
1806 1806           * make sure that there are no conflicting non-blocking mandatory
1807 1807           * locks in the region being manipulated. Return EACCES if there
1808 1808           * are conflicting locks.
1809 1809           */
1810 1810          if (!error && (va.va_type == VREG) && (va.va_mask & AT_SIZE)) {
1811 1811                  lookuperr = VOP_LOOKUP(dvp, name, &tvp, NULL, 0, NULL, cr,
1812 1812                      NULL, NULL, NULL);
1813 1813  
1814 1814                  if (!lookuperr &&
1815 1815                      rfs4_check_delegated(FWRITE, tvp, va.va_size == 0)) {
1816 1816                          VN_RELE(tvp);
1817 1817                          curthread->t_flag |= T_WOULDBLOCK;
1818 1818                          goto out;
1819 1819                  }
1820 1820  
1821 1821                  if (!lookuperr && nbl_need_check(tvp)) {
1822 1822                          /*
1823 1823                           * The file exists. Now check if it has any
1824 1824                           * conflicting non-blocking mandatory locks
1825 1825                           * in the region being changed.
1826 1826                           */
1827 1827                          struct vattr bva;
1828 1828                          u_offset_t offset;
1829 1829                          ssize_t length;
1830 1830  
1831 1831                          nbl_start_crit(tvp, RW_READER);
1832 1832                          in_crit = 1;
1833 1833  
1834 1834                          bva.va_mask = AT_SIZE;
1835 1835                          error = VOP_GETATTR(tvp, &bva, 0, cr, NULL);
1836 1836                          if (!error) {
1837 1837                                  if (va.va_size < bva.va_size) {
1838 1838                                          offset = va.va_size;
1839 1839                                          length = bva.va_size - va.va_size;
1840 1840                                  } else {
1841 1841                                          offset = bva.va_size;
1842 1842                                          length = va.va_size - bva.va_size;
1843 1843                                  }
1844 1844                                  if (length) {
1845 1845                                          if (nbl_conflict(tvp, NBL_WRITE,
1846 1846                                              offset, length, 0, NULL)) {
1847 1847                                                  error = EACCES;
1848 1848                                          }
1849 1849                                  }
1850 1850                          }
1851 1851                          if (error) {
1852 1852                                  nbl_end_crit(tvp);
1853 1853                                  VN_RELE(tvp);
1854 1854                                  in_crit = 0;
1855 1855                          }
1856 1856                  } else if (tvp != NULL) {
1857 1857                          VN_RELE(tvp);
1858 1858                  }
1859 1859          }
1860 1860  
1861 1861          if (!error) {
1862 1862                  /*
1863 1863                   * If filesystem is shared with nosuid the remove any
1864 1864                   * setuid/setgid bits on create.
1865 1865                   */
1866 1866                  if (va.va_type == VREG &&
1867 1867                      exi->exi_export.ex_flags & EX_NOSUID)
1868 1868                          va.va_mode &= ~(VSUID | VSGID);
1869 1869  
1870 1870                  error = VOP_CREATE(dvp, name, &va, NONEXCL, mode, &vp, cr, 0,
1871 1871                      NULL, NULL);
1872 1872  
1873 1873                  if (!error) {
1874 1874  
1875 1875                          if ((va.va_mask & AT_SIZE) && (va.va_size == 0))
1876 1876                                  trunc = TRUE;
1877 1877                          else
1878 1878                                  trunc = FALSE;
1879 1879  
1880 1880                          if (rfs4_check_delegated(FWRITE, vp, trunc)) {
1881 1881                                  VN_RELE(vp);
1882 1882                                  curthread->t_flag |= T_WOULDBLOCK;
1883 1883                                  goto out;
1884 1884                          }
1885 1885                          va.va_mask = AT_ALL;
1886 1886  
1887 1887                          error = VOP_GETATTR(vp, &va, 0, cr, NULL);
1888 1888  
1889 1889                          /* check for overflows */
1890 1890                          if (!error) {
1891 1891                                  acl_perm(vp, exi, &va, cr);
1892 1892                                  error = vattr_to_nattr(&va, &dr->dr_attr);
1893 1893                                  if (!error) {
1894 1894                                          error = makefh(&dr->dr_fhandle, vp,
1895 1895                                              exi);
1896 1896                                  }
1897 1897                          }
1898 1898                          /*
1899 1899                           * Force modified metadata out to stable storage.
1900 1900                           *
1901 1901                           * if a underlying vp exists, pass it to VOP_FSYNC
1902 1902                           */
1903 1903                          if (VOP_REALVP(vp, &realvp, NULL) == 0)
1904 1904                                  (void) VOP_FSYNC(realvp, FNODSYNC, cr, NULL);
1905 1905                          else
1906 1906                                  (void) VOP_FSYNC(vp, FNODSYNC, cr, NULL);
1907 1907                          VN_RELE(vp);
1908 1908                  }
1909 1909  
1910 1910                  if (in_crit) {
1911 1911                          nbl_end_crit(tvp);
1912 1912                          VN_RELE(tvp);
1913 1913                  }
1914 1914          }
1915 1915  
1916 1916          /*
1917 1917           * Force modified data and metadata out to stable storage.
1918 1918           */
1919 1919          (void) VOP_FSYNC(dvp, 0, cr, NULL);
1920 1920  
1921 1921  out:
1922 1922  
1923 1923          VN_RELE(dvp);
1924 1924  
1925 1925          dr->dr_status = puterrno(error);
1926 1926  
1927 1927          if (name != args->ca_da.da_name)
1928 1928                  kmem_free(name, MAXPATHLEN);
1929 1929  }
1930 1930  void *
1931 1931  rfs_create_getfh(struct nfscreatargs *args)
1932 1932  {
1933 1933          return (args->ca_da.da_fhandle);
1934 1934  }
1935 1935  
1936 1936  /*
1937 1937   * Remove a file.
1938 1938   * Remove named file from parent directory.
1939 1939   */
1940 1940  /* ARGSUSED */
1941 1941  void
1942 1942  rfs_remove(struct nfsdiropargs *da, enum nfsstat *status,
1943 1943      struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
1944 1944  {
1945 1945          int error = 0;
1946 1946          vnode_t *vp;
1947 1947          vnode_t *targvp;
1948 1948          int in_crit = 0;
1949 1949  
1950 1950          /*
1951 1951           * Disallow NULL paths
1952 1952           */
1953 1953          if (da->da_name == NULL || *da->da_name == '\0') {
1954 1954                  *status = NFSERR_ACCES;
1955 1955                  return;
1956 1956          }
1957 1957  
1958 1958          vp = nfs_fhtovp(da->da_fhandle, exi);
1959 1959          if (vp == NULL) {
1960 1960                  *status = NFSERR_STALE;
1961 1961                  return;
1962 1962          }
1963 1963  
1964 1964          if (rdonly(ro, vp)) {
1965 1965                  VN_RELE(vp);
1966 1966                  *status = NFSERR_ROFS;
1967 1967                  return;
1968 1968          }
1969 1969  
1970 1970          /*
1971 1971           * Check for a conflict with a non-blocking mandatory share reservation.
1972 1972           */
1973 1973          error = VOP_LOOKUP(vp, da->da_name, &targvp, NULL, 0,
1974 1974              NULL, cr, NULL, NULL, NULL);
1975 1975          if (error != 0) {
1976 1976                  VN_RELE(vp);
1977 1977                  *status = puterrno(error);
1978 1978                  return;
1979 1979          }
1980 1980  
1981 1981          /*
1982 1982           * If the file is delegated to an v4 client, then initiate
1983 1983           * recall and drop this request (by setting T_WOULDBLOCK).
1984 1984           * The client will eventually re-transmit the request and
1985 1985           * (hopefully), by then, the v4 client will have returned
1986 1986           * the delegation.
1987 1987           */
1988 1988  
1989 1989          if (rfs4_check_delegated(FWRITE, targvp, TRUE)) {
1990 1990                  VN_RELE(vp);
1991 1991                  VN_RELE(targvp);
1992 1992                  curthread->t_flag |= T_WOULDBLOCK;
1993 1993                  return;
1994 1994          }
1995 1995  
1996 1996          if (nbl_need_check(targvp)) {
1997 1997                  nbl_start_crit(targvp, RW_READER);
1998 1998                  in_crit = 1;
1999 1999                  if (nbl_conflict(targvp, NBL_REMOVE, 0, 0, 0, NULL)) {
2000 2000                          error = EACCES;
2001 2001                          goto out;
2002 2002                  }
2003 2003          }
2004 2004  
2005 2005          error = VOP_REMOVE(vp, da->da_name, cr, NULL, 0);
2006 2006  
2007 2007          /*
2008 2008           * Force modified data and metadata out to stable storage.
2009 2009           */
2010 2010          (void) VOP_FSYNC(vp, 0, cr, NULL);
2011 2011  
2012 2012  out:
2013 2013          if (in_crit)
2014 2014                  nbl_end_crit(targvp);
2015 2015          VN_RELE(targvp);
2016 2016          VN_RELE(vp);
2017 2017  
2018 2018          *status = puterrno(error);
2019 2019  
2020 2020  }
2021 2021  
2022 2022  void *
2023 2023  rfs_remove_getfh(struct nfsdiropargs *da)
2024 2024  {
2025 2025          return (da->da_fhandle);
2026 2026  }
2027 2027  
2028 2028  /*
2029 2029   * rename a file
2030 2030   * Give a file (from) a new name (to).
2031 2031   */
2032 2032  /* ARGSUSED */
2033 2033  void
2034 2034  rfs_rename(struct nfsrnmargs *args, enum nfsstat *status,
2035 2035      struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
2036 2036  {
2037 2037          int error = 0;
2038 2038          vnode_t *fromvp;
2039 2039          vnode_t *tovp;
2040 2040          struct exportinfo *to_exi;
2041 2041          fhandle_t *fh;
2042 2042          vnode_t *srcvp;

↓ open down ↓

2042 lines elided

↑ open up ↑

2043 2043          vnode_t *targvp;
2044 2044          int in_crit = 0;
2045 2045  
2046 2046          fromvp = nfs_fhtovp(args->rna_from.da_fhandle, exi);
2047 2047          if (fromvp == NULL) {
2048 2048                  *status = NFSERR_STALE;
2049 2049                  return;
2050 2050          }
2051 2051  
2052 2052          fh = args->rna_to.da_fhandle;
2053      -        to_exi = checkexport(&fh->fh_fsid, (fid_t *)&fh->fh_xlen);
     2053 +        to_exi = checkexport(&fh->fh_fsid, (fid_t *)&fh->fh_xlen, NULL);
2054 2054          if (to_exi == NULL) {
2055 2055                  VN_RELE(fromvp);
2056 2056                  *status = NFSERR_ACCES;
2057 2057                  return;
2058 2058          }
2059 2059          exi_rele(to_exi);
2060 2060  
2061 2061          if (to_exi != exi) {
2062 2062                  VN_RELE(fromvp);
2063 2063                  *status = NFSERR_XDEV;

2064 2064                  return;
2065 2065          }
2066 2066  
2067 2067          tovp = nfs_fhtovp(args->rna_to.da_fhandle, exi);
2068 2068          if (tovp == NULL) {
2069 2069                  VN_RELE(fromvp);
2070 2070                  *status = NFSERR_STALE;
2071 2071                  return;
2072 2072          }
2073 2073  
2074 2074          if (fromvp->v_type != VDIR || tovp->v_type != VDIR) {
2075 2075                  VN_RELE(tovp);
2076 2076                  VN_RELE(fromvp);
2077 2077                  *status = NFSERR_NOTDIR;
2078 2078                  return;
2079 2079          }
2080 2080  
2081 2081          /*
2082 2082           * Disallow NULL paths
2083 2083           */
2084 2084          if (args->rna_from.da_name == NULL || *args->rna_from.da_name == '\0' ||
2085 2085              args->rna_to.da_name == NULL || *args->rna_to.da_name == '\0') {
2086 2086                  VN_RELE(tovp);
2087 2087                  VN_RELE(fromvp);
2088 2088                  *status = NFSERR_ACCES;
2089 2089                  return;
2090 2090          }
2091 2091  
2092 2092          if (rdonly(ro, tovp)) {
2093 2093                  VN_RELE(tovp);
2094 2094                  VN_RELE(fromvp);
2095 2095                  *status = NFSERR_ROFS;
2096 2096                  return;
2097 2097          }
2098 2098  
2099 2099          /*
2100 2100           * Check for a conflict with a non-blocking mandatory share reservation.
2101 2101           */
2102 2102          error = VOP_LOOKUP(fromvp, args->rna_from.da_name, &srcvp, NULL, 0,
2103 2103              NULL, cr, NULL, NULL, NULL);
2104 2104          if (error != 0) {
2105 2105                  VN_RELE(tovp);
2106 2106                  VN_RELE(fromvp);
2107 2107                  *status = puterrno(error);
2108 2108                  return;
2109 2109          }
2110 2110  
2111 2111          /* Check for delegations on the source file */
2112 2112  
2113 2113          if (rfs4_check_delegated(FWRITE, srcvp, FALSE)) {
2114 2114                  VN_RELE(tovp);
2115 2115                  VN_RELE(fromvp);
2116 2116                  VN_RELE(srcvp);
2117 2117                  curthread->t_flag |= T_WOULDBLOCK;
2118 2118                  return;
2119 2119          }
2120 2120  
2121 2121          /* Check for delegation on the file being renamed over, if it exists */
2122 2122  
2123 2123          if (rfs4_deleg_policy != SRV_NEVER_DELEGATE &&
2124 2124              VOP_LOOKUP(tovp, args->rna_to.da_name, &targvp, NULL, 0, NULL, cr,
2125 2125              NULL, NULL, NULL) == 0) {
2126 2126  
2127 2127                  if (rfs4_check_delegated(FWRITE, targvp, TRUE)) {
2128 2128                          VN_RELE(tovp);
2129 2129                          VN_RELE(fromvp);
2130 2130                          VN_RELE(srcvp);
2131 2131                          VN_RELE(targvp);
2132 2132                          curthread->t_flag |= T_WOULDBLOCK;
2133 2133                          return;
2134 2134                  }
2135 2135                  VN_RELE(targvp);
2136 2136          }
2137 2137  
2138 2138  
2139 2139          if (nbl_need_check(srcvp)) {
2140 2140                  nbl_start_crit(srcvp, RW_READER);
2141 2141                  in_crit = 1;
2142 2142                  if (nbl_conflict(srcvp, NBL_RENAME, 0, 0, 0, NULL)) {
2143 2143                          error = EACCES;
2144 2144                          goto out;
2145 2145                  }
2146 2146          }
2147 2147  
2148 2148          error = VOP_RENAME(fromvp, args->rna_from.da_name,
2149 2149              tovp, args->rna_to.da_name, cr, NULL, 0);
2150 2150  
2151 2151          if (error == 0)
2152 2152                  vn_renamepath(tovp, srcvp, args->rna_to.da_name,
2153 2153                      strlen(args->rna_to.da_name));
2154 2154  
2155 2155          /*
2156 2156           * Force modified data and metadata out to stable storage.
2157 2157           */
2158 2158          (void) VOP_FSYNC(tovp, 0, cr, NULL);
2159 2159          (void) VOP_FSYNC(fromvp, 0, cr, NULL);
2160 2160  
2161 2161  out:
2162 2162          if (in_crit)
2163 2163                  nbl_end_crit(srcvp);
2164 2164          VN_RELE(srcvp);
2165 2165          VN_RELE(tovp);
2166 2166          VN_RELE(fromvp);
2167 2167  
2168 2168          *status = puterrno(error);
2169 2169  
2170 2170  }
2171 2171  void *
2172 2172  rfs_rename_getfh(struct nfsrnmargs *args)
2173 2173  {
2174 2174          return (args->rna_from.da_fhandle);
2175 2175  }
2176 2176  
2177 2177  /*
2178 2178   * Link to a file.
2179 2179   * Create a file (to) which is a hard link to the given file (from).
2180 2180   */
2181 2181  /* ARGSUSED */
2182 2182  void
2183 2183  rfs_link(struct nfslinkargs *args, enum nfsstat *status,
2184 2184      struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
2185 2185  {
2186 2186          int error;
2187 2187          vnode_t *fromvp;
2188 2188          vnode_t *tovp;

↓ open down ↓

125 lines elided

↑ open up ↑

2189 2189          struct exportinfo *to_exi;
2190 2190          fhandle_t *fh;
2191 2191  
2192 2192          fromvp = nfs_fhtovp(args->la_from, exi);
2193 2193          if (fromvp == NULL) {
2194 2194                  *status = NFSERR_STALE;
2195 2195                  return;
2196 2196          }
2197 2197  
2198 2198          fh = args->la_to.da_fhandle;
2199      -        to_exi = checkexport(&fh->fh_fsid, (fid_t *)&fh->fh_xlen);
     2199 +        to_exi = checkexport(&fh->fh_fsid, (fid_t *)&fh->fh_xlen, NULL);
2200 2200          if (to_exi == NULL) {
2201 2201                  VN_RELE(fromvp);
2202 2202                  *status = NFSERR_ACCES;
2203 2203                  return;
2204 2204          }
2205 2205          exi_rele(to_exi);
2206 2206  
2207 2207          if (to_exi != exi) {
2208 2208                  VN_RELE(fromvp);
2209 2209                  *status = NFSERR_XDEV;

2210 2210                  return;
2211 2211          }
2212 2212  
2213 2213          tovp = nfs_fhtovp(args->la_to.da_fhandle, exi);
2214 2214          if (tovp == NULL) {
2215 2215                  VN_RELE(fromvp);
2216 2216                  *status = NFSERR_STALE;
2217 2217                  return;
2218 2218          }
2219 2219  
2220 2220          if (tovp->v_type != VDIR) {
2221 2221                  VN_RELE(tovp);
2222 2222                  VN_RELE(fromvp);
2223 2223                  *status = NFSERR_NOTDIR;
2224 2224                  return;
2225 2225          }
2226 2226          /*
2227 2227           * Disallow NULL paths
2228 2228           */
2229 2229          if (args->la_to.da_name == NULL || *args->la_to.da_name == '\0') {
2230 2230                  VN_RELE(tovp);
2231 2231                  VN_RELE(fromvp);
2232 2232                  *status = NFSERR_ACCES;
2233 2233                  return;
2234 2234          }
2235 2235  
2236 2236          if (rdonly(ro, tovp)) {
2237 2237                  VN_RELE(tovp);
2238 2238                  VN_RELE(fromvp);
2239 2239                  *status = NFSERR_ROFS;
2240 2240                  return;
2241 2241          }
2242 2242  
2243 2243          error = VOP_LINK(tovp, fromvp, args->la_to.da_name, cr, NULL, 0);
2244 2244  
2245 2245          /*
2246 2246           * Force modified data and metadata out to stable storage.
2247 2247           */
2248 2248          (void) VOP_FSYNC(tovp, 0, cr, NULL);
2249 2249          (void) VOP_FSYNC(fromvp, FNODSYNC, cr, NULL);
2250 2250  
2251 2251          VN_RELE(tovp);
2252 2252          VN_RELE(fromvp);
2253 2253  
2254 2254          *status = puterrno(error);
2255 2255  
2256 2256  }
2257 2257  void *
2258 2258  rfs_link_getfh(struct nfslinkargs *args)
2259 2259  {
2260 2260          return (args->la_from);
2261 2261  }
2262 2262  
2263 2263  /*
2264 2264   * Symbolicly link to a file.
2265 2265   * Create a file (to) with the given attributes which is a symbolic link
2266 2266   * to the given path name (to).
2267 2267   */
2268 2268  void
2269 2269  rfs_symlink(struct nfsslargs *args, enum nfsstat *status,
2270 2270      struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
2271 2271  {
2272 2272          int error;
2273 2273          struct vattr va;
2274 2274          vnode_t *vp;
2275 2275          vnode_t *svp;
2276 2276          int lerror;
2277 2277          struct sockaddr *ca;
2278 2278          char *name = NULL;
2279 2279  
2280 2280          /*
2281 2281           * Disallow NULL paths
2282 2282           */
2283 2283          if (args->sla_from.da_name == NULL || *args->sla_from.da_name == '\0') {
2284 2284                  *status = NFSERR_ACCES;
2285 2285                  return;
2286 2286          }
2287 2287  
2288 2288          vp = nfs_fhtovp(args->sla_from.da_fhandle, exi);
2289 2289          if (vp == NULL) {
2290 2290                  *status = NFSERR_STALE;
2291 2291                  return;
2292 2292          }
2293 2293  
2294 2294          if (rdonly(ro, vp)) {
2295 2295                  VN_RELE(vp);
2296 2296                  *status = NFSERR_ROFS;
2297 2297                  return;
2298 2298          }
2299 2299  
2300 2300          error = sattr_to_vattr(args->sla_sa, &va);
2301 2301          if (error) {
2302 2302                  VN_RELE(vp);
2303 2303                  *status = puterrno(error);
2304 2304                  return;
2305 2305          }
2306 2306  
2307 2307          if (!(va.va_mask & AT_MODE)) {
2308 2308                  VN_RELE(vp);
2309 2309                  *status = NFSERR_INVAL;
2310 2310                  return;
2311 2311          }
2312 2312  
2313 2313          ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2314 2314          name = nfscmd_convname(ca, exi, args->sla_tnm,
2315 2315              NFSCMD_CONV_INBOUND, MAXPATHLEN);
2316 2316  
2317 2317          if (name == NULL) {
2318 2318                  *status = NFSERR_ACCES;
2319 2319                  return;
2320 2320          }
2321 2321  
2322 2322          va.va_type = VLNK;
2323 2323          va.va_mask |= AT_TYPE;
2324 2324  
2325 2325          error = VOP_SYMLINK(vp, args->sla_from.da_name, &va, name, cr, NULL, 0);
2326 2326  
2327 2327          /*
2328 2328           * Force new data and metadata out to stable storage.
2329 2329           */
2330 2330          lerror = VOP_LOOKUP(vp, args->sla_from.da_name, &svp, NULL, 0,
2331 2331              NULL, cr, NULL, NULL, NULL);
2332 2332  
2333 2333          if (!lerror) {
2334 2334                  (void) VOP_FSYNC(svp, 0, cr, NULL);
2335 2335                  VN_RELE(svp);
2336 2336          }
2337 2337  
2338 2338          /*
2339 2339           * Force modified data and metadata out to stable storage.
2340 2340           */
2341 2341          (void) VOP_FSYNC(vp, 0, cr, NULL);
2342 2342  
2343 2343          VN_RELE(vp);
2344 2344  
2345 2345          *status = puterrno(error);
2346 2346          if (name != args->sla_tnm)
2347 2347                  kmem_free(name, MAXPATHLEN);
2348 2348  
2349 2349  }
2350 2350  void *
2351 2351  rfs_symlink_getfh(struct nfsslargs *args)
2352 2352  {
2353 2353          return (args->sla_from.da_fhandle);
2354 2354  }
2355 2355  
2356 2356  /*
2357 2357   * Make a directory.
2358 2358   * Create a directory with the given name, parent directory, and attributes.
2359 2359   * Returns a file handle and attributes for the new directory.
2360 2360   */
2361 2361  /* ARGSUSED */
2362 2362  void
2363 2363  rfs_mkdir(struct nfscreatargs *args, struct nfsdiropres *dr,
2364 2364      struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
2365 2365  {
2366 2366          int error;
2367 2367          struct vattr va;
2368 2368          vnode_t *dvp = NULL;
2369 2369          vnode_t *vp;
2370 2370          char *name = args->ca_da.da_name;
2371 2371  
2372 2372          /*
2373 2373           * Disallow NULL paths
2374 2374           */
2375 2375          if (name == NULL || *name == '\0') {
2376 2376                  dr->dr_status = NFSERR_ACCES;
2377 2377                  return;
2378 2378          }
2379 2379  
2380 2380          vp = nfs_fhtovp(args->ca_da.da_fhandle, exi);
2381 2381          if (vp == NULL) {
2382 2382                  dr->dr_status = NFSERR_STALE;
2383 2383                  return;
2384 2384          }
2385 2385  
2386 2386          if (rdonly(ro, vp)) {
2387 2387                  VN_RELE(vp);
2388 2388                  dr->dr_status = NFSERR_ROFS;
2389 2389                  return;
2390 2390          }
2391 2391  
2392 2392          error = sattr_to_vattr(args->ca_sa, &va);
2393 2393          if (error) {
2394 2394                  VN_RELE(vp);
2395 2395                  dr->dr_status = puterrno(error);
2396 2396                  return;
2397 2397          }
2398 2398  
2399 2399          if (!(va.va_mask & AT_MODE)) {
2400 2400                  VN_RELE(vp);
2401 2401                  dr->dr_status = NFSERR_INVAL;
2402 2402                  return;
2403 2403          }
2404 2404  
2405 2405          va.va_type = VDIR;
2406 2406          va.va_mask |= AT_TYPE;
2407 2407  
2408 2408          error = VOP_MKDIR(vp, name, &va, &dvp, cr, NULL, 0, NULL);
2409 2409  
2410 2410          if (!error) {
2411 2411                  /*
2412 2412                   * Attribtutes of the newly created directory should
2413 2413                   * be returned to the client.
2414 2414                   */
2415 2415                  va.va_mask = AT_ALL; /* We want everything */
2416 2416                  error = VOP_GETATTR(dvp, &va, 0, cr, NULL);
2417 2417  
2418 2418                  /* check for overflows */
2419 2419                  if (!error) {
2420 2420                          acl_perm(vp, exi, &va, cr);
2421 2421                          error = vattr_to_nattr(&va, &dr->dr_attr);
2422 2422                          if (!error) {
2423 2423                                  error = makefh(&dr->dr_fhandle, dvp, exi);
2424 2424                          }
2425 2425                  }
2426 2426                  /*
2427 2427                   * Force new data and metadata out to stable storage.
2428 2428                   */
2429 2429                  (void) VOP_FSYNC(dvp, 0, cr, NULL);
2430 2430                  VN_RELE(dvp);
2431 2431          }
2432 2432  
2433 2433          /*
2434 2434           * Force modified data and metadata out to stable storage.
2435 2435           */
2436 2436          (void) VOP_FSYNC(vp, 0, cr, NULL);
2437 2437  
2438 2438          VN_RELE(vp);
2439 2439  
2440 2440          dr->dr_status = puterrno(error);
2441 2441  
2442 2442  }
2443 2443  void *
2444 2444  rfs_mkdir_getfh(struct nfscreatargs *args)
2445 2445  {
2446 2446          return (args->ca_da.da_fhandle);
2447 2447  }
2448 2448  
2449 2449  /*
2450 2450   * Remove a directory.
2451 2451   * Remove the given directory name from the given parent directory.
2452 2452   */
2453 2453  /* ARGSUSED */
2454 2454  void
2455 2455  rfs_rmdir(struct nfsdiropargs *da, enum nfsstat *status,
2456 2456      struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
2457 2457  {
2458 2458          int error;
2459 2459          vnode_t *vp;
2460 2460  
2461 2461          /*
2462 2462           * Disallow NULL paths
2463 2463           */
2464 2464          if (da->da_name == NULL || *da->da_name == '\0') {
2465 2465                  *status = NFSERR_ACCES;
2466 2466                  return;
2467 2467          }
2468 2468  
2469 2469          vp = nfs_fhtovp(da->da_fhandle, exi);
2470 2470          if (vp == NULL) {
2471 2471                  *status = NFSERR_STALE;
2472 2472                  return;
2473 2473          }
2474 2474  
2475 2475          if (rdonly(ro, vp)) {
2476 2476                  VN_RELE(vp);
2477 2477                  *status = NFSERR_ROFS;
2478 2478                  return;
2479 2479          }
2480 2480  
2481 2481          /*
2482 2482           * VOP_RMDIR takes a third argument (the current
2483 2483           * directory of the process).  That's because someone
2484 2484           * wants to return EINVAL if one tries to remove ".".
2485 2485           * Of course, NFS servers have no idea what their
2486 2486           * clients' current directories are.  We fake it by
2487 2487           * supplying a vnode known to exist and illegal to
2488 2488           * remove.
2489 2489           */
2490 2490          error = VOP_RMDIR(vp, da->da_name, rootdir, cr, NULL, 0);
2491 2491  
2492 2492          /*
2493 2493           * Force modified data and metadata out to stable storage.
2494 2494           */
2495 2495          (void) VOP_FSYNC(vp, 0, cr, NULL);
2496 2496  
2497 2497          VN_RELE(vp);
2498 2498  
2499 2499          /*
2500 2500           * System V defines rmdir to return EEXIST, not ENOTEMPTY,
2501 2501           * if the directory is not empty.  A System V NFS server
2502 2502           * needs to map NFSERR_EXIST to NFSERR_NOTEMPTY to transmit
2503 2503           * over the wire.
2504 2504           */
2505 2505          if (error == EEXIST)
2506 2506                  *status = NFSERR_NOTEMPTY;
2507 2507          else
2508 2508                  *status = puterrno(error);
2509 2509  
2510 2510  }
2511 2511  void *
2512 2512  rfs_rmdir_getfh(struct nfsdiropargs *da)
2513 2513  {
2514 2514          return (da->da_fhandle);
2515 2515  }
2516 2516  
2517 2517  /* ARGSUSED */
2518 2518  void
2519 2519  rfs_readdir(struct nfsrddirargs *rda, struct nfsrddirres *rd,
2520 2520      struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
2521 2521  {
2522 2522          int error;
2523 2523          int iseof;
2524 2524          struct iovec iov;
2525 2525          struct uio uio;
2526 2526          vnode_t *vp;
2527 2527          char *ndata = NULL;
2528 2528          struct sockaddr *ca;
2529 2529          size_t nents;
2530 2530          int ret;
2531 2531  
2532 2532          vp = nfs_fhtovp(&rda->rda_fh, exi);
2533 2533          if (vp == NULL) {
2534 2534                  rd->rd_entries = NULL;
2535 2535                  rd->rd_status = NFSERR_STALE;
2536 2536                  return;
2537 2537          }
2538 2538  
2539 2539          if (vp->v_type != VDIR) {
2540 2540                  VN_RELE(vp);
2541 2541                  rd->rd_entries = NULL;
2542 2542                  rd->rd_status = NFSERR_NOTDIR;
2543 2543                  return;
2544 2544          }
2545 2545  
2546 2546          (void) VOP_RWLOCK(vp, V_WRITELOCK_FALSE, NULL);
2547 2547  
2548 2548          error = VOP_ACCESS(vp, VREAD, 0, cr, NULL);
2549 2549  
2550 2550          if (error) {
2551 2551                  rd->rd_entries = NULL;
2552 2552                  goto bad;
2553 2553          }
2554 2554  
2555 2555          if (rda->rda_count == 0) {
2556 2556                  rd->rd_entries = NULL;
2557 2557                  rd->rd_size = 0;
2558 2558                  rd->rd_eof = FALSE;
2559 2559                  goto bad;
2560 2560          }
2561 2561  
2562 2562          rda->rda_count = MIN(rda->rda_count, NFS_MAXDATA);
2563 2563  
2564 2564          /*
2565 2565           * Allocate data for entries.  This will be freed by rfs_rddirfree.
2566 2566           */
2567 2567          rd->rd_bufsize = (uint_t)rda->rda_count;
2568 2568          rd->rd_entries = kmem_alloc(rd->rd_bufsize, KM_SLEEP);
2569 2569  
2570 2570          /*
2571 2571           * Set up io vector to read directory data
2572 2572           */
2573 2573          iov.iov_base = (caddr_t)rd->rd_entries;
2574 2574          iov.iov_len = rda->rda_count;
2575 2575          uio.uio_iov = &iov;
2576 2576          uio.uio_iovcnt = 1;
2577 2577          uio.uio_segflg = UIO_SYSSPACE;
2578 2578          uio.uio_extflg = UIO_COPY_CACHED;
2579 2579          uio.uio_loffset = (offset_t)rda->rda_offset;
2580 2580          uio.uio_resid = rda->rda_count;
2581 2581  
2582 2582          /*
2583 2583           * read directory
2584 2584           */
2585 2585          error = VOP_READDIR(vp, &uio, cr, &iseof, NULL, 0);
2586 2586  
2587 2587          /*
2588 2588           * Clean up
2589 2589           */
2590 2590          if (!error) {
2591 2591                  /*
2592 2592                   * set size and eof
2593 2593                   */
2594 2594                  if (uio.uio_resid == rda->rda_count) {
2595 2595                          rd->rd_size = 0;
2596 2596                          rd->rd_eof = TRUE;
2597 2597                  } else {
2598 2598                          rd->rd_size = (uint32_t)(rda->rda_count -
2599 2599                              uio.uio_resid);
2600 2600                          rd->rd_eof = iseof ? TRUE : FALSE;
2601 2601                  }
2602 2602          }
2603 2603  
2604 2604          ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2605 2605          nents = nfscmd_countents((char *)rd->rd_entries, rd->rd_size);
2606 2606          ret = nfscmd_convdirplus(ca, exi, (char *)rd->rd_entries, nents,
2607 2607              rda->rda_count, &ndata);
2608 2608  
2609 2609          if (ret != 0) {
2610 2610                  size_t dropbytes;
2611 2611                  /*
2612 2612                   * We had to drop one or more entries in order to fit
2613 2613                   * during the character conversion.  We need to patch
2614 2614                   * up the size and eof info.
2615 2615                   */
2616 2616                  if (rd->rd_eof)
2617 2617                          rd->rd_eof = FALSE;
2618 2618                  dropbytes = nfscmd_dropped_entrysize(
2619 2619                      (struct dirent64 *)rd->rd_entries, nents, ret);
2620 2620                  rd->rd_size -= dropbytes;
2621 2621          }
2622 2622          if (ndata == NULL) {
2623 2623                  ndata = (char *)rd->rd_entries;
2624 2624          } else if (ndata != (char *)rd->rd_entries) {
2625 2625                  kmem_free(rd->rd_entries, rd->rd_bufsize);
2626 2626                  rd->rd_entries = (void *)ndata;
2627 2627                  rd->rd_bufsize = rda->rda_count;
2628 2628          }
2629 2629  
2630 2630  bad:
2631 2631          VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
2632 2632  
2633 2633  #if 0 /* notyet */
2634 2634          /*
2635 2635           * Don't do this.  It causes local disk writes when just
2636 2636           * reading the file and the overhead is deemed larger
2637 2637           * than the benefit.
2638 2638           */
2639 2639          /*
2640 2640           * Force modified metadata out to stable storage.
2641 2641           */
2642 2642          (void) VOP_FSYNC(vp, FNODSYNC, cr, NULL);
2643 2643  #endif
2644 2644  
2645 2645          VN_RELE(vp);
2646 2646  
2647 2647          rd->rd_status = puterrno(error);
2648 2648  
2649 2649  }
2650 2650  void *
2651 2651  rfs_readdir_getfh(struct nfsrddirargs *rda)
2652 2652  {
2653 2653          return (&rda->rda_fh);
2654 2654  }
2655 2655  void
2656 2656  rfs_rddirfree(struct nfsrddirres *rd)
2657 2657  {
2658 2658          if (rd->rd_entries != NULL)
2659 2659                  kmem_free(rd->rd_entries, rd->rd_bufsize);
2660 2660  }
2661 2661  
2662 2662  /* ARGSUSED */
2663 2663  void
2664 2664  rfs_statfs(fhandle_t *fh, struct nfsstatfs *fs, struct exportinfo *exi,
2665 2665      struct svc_req *req, cred_t *cr, bool_t ro)
2666 2666  {
2667 2667          int error;
2668 2668          struct statvfs64 sb;
2669 2669          vnode_t *vp;
2670 2670  
2671 2671          vp = nfs_fhtovp(fh, exi);
2672 2672          if (vp == NULL) {
2673 2673                  fs->fs_status = NFSERR_STALE;
2674 2674                  return;
2675 2675          }
2676 2676  
2677 2677          error = VFS_STATVFS(vp->v_vfsp, &sb);
2678 2678  
2679 2679          if (!error) {
2680 2680                  fs->fs_tsize = nfstsize();
2681 2681                  fs->fs_bsize = sb.f_frsize;
2682 2682                  fs->fs_blocks = sb.f_blocks;
2683 2683                  fs->fs_bfree = sb.f_bfree;
2684 2684                  fs->fs_bavail = sb.f_bavail;
2685 2685          }
2686 2686  
2687 2687          VN_RELE(vp);
2688 2688  
2689 2689          fs->fs_status = puterrno(error);
2690 2690  
2691 2691  }
2692 2692  void *
2693 2693  rfs_statfs_getfh(fhandle_t *fh)
2694 2694  {
2695 2695          return (fh);
2696 2696  }
2697 2697  
2698 2698  static int
2699 2699  sattr_to_vattr(struct nfssattr *sa, struct vattr *vap)
2700 2700  {
2701 2701          vap->va_mask = 0;
2702 2702  
2703 2703          /*
2704 2704           * There was a sign extension bug in some VFS based systems
2705 2705           * which stored the mode as a short.  When it would get
2706 2706           * assigned to a u_long, no sign extension would occur.
2707 2707           * It needed to, but this wasn't noticed because sa_mode
2708 2708           * would then get assigned back to the short, thus ignoring
2709 2709           * the upper 16 bits of sa_mode.
2710 2710           *
2711 2711           * To make this implementation work for both broken
2712 2712           * clients and good clients, we check for both versions
2713 2713           * of the mode.
2714 2714           */
2715 2715          if (sa->sa_mode != (uint32_t)((ushort_t)-1) &&
2716 2716              sa->sa_mode != (uint32_t)-1) {
2717 2717                  vap->va_mask |= AT_MODE;
2718 2718                  vap->va_mode = sa->sa_mode;
2719 2719          }
2720 2720          if (sa->sa_uid != (uint32_t)-1) {
2721 2721                  vap->va_mask |= AT_UID;
2722 2722                  vap->va_uid = sa->sa_uid;
2723 2723          }
2724 2724          if (sa->sa_gid != (uint32_t)-1) {
2725 2725                  vap->va_mask |= AT_GID;
2726 2726                  vap->va_gid = sa->sa_gid;
2727 2727          }
2728 2728          if (sa->sa_size != (uint32_t)-1) {
2729 2729                  vap->va_mask |= AT_SIZE;
2730 2730                  vap->va_size = sa->sa_size;
2731 2731          }
2732 2732          if (sa->sa_atime.tv_sec != (int32_t)-1 &&
2733 2733              sa->sa_atime.tv_usec != (int32_t)-1) {
2734 2734  #ifndef _LP64
2735 2735                  /* return error if time overflow */
2736 2736                  if (!NFS2_TIME_OK(sa->sa_atime.tv_sec))
2737 2737                          return (EOVERFLOW);
2738 2738  #endif
2739 2739                  vap->va_mask |= AT_ATIME;
2740 2740                  /*
2741 2741                   * nfs protocol defines times as unsigned so don't extend sign,
2742 2742                   * unless sysadmin set nfs_allow_preepoch_time.
2743 2743                   */
2744 2744                  NFS_TIME_T_CONVERT(vap->va_atime.tv_sec, sa->sa_atime.tv_sec);
2745 2745                  vap->va_atime.tv_nsec = (uint32_t)(sa->sa_atime.tv_usec * 1000);
2746 2746          }
2747 2747          if (sa->sa_mtime.tv_sec != (int32_t)-1 &&
2748 2748              sa->sa_mtime.tv_usec != (int32_t)-1) {
2749 2749  #ifndef _LP64
2750 2750                  /* return error if time overflow */
2751 2751                  if (!NFS2_TIME_OK(sa->sa_mtime.tv_sec))
2752 2752                          return (EOVERFLOW);
2753 2753  #endif
2754 2754                  vap->va_mask |= AT_MTIME;
2755 2755                  /*
2756 2756                   * nfs protocol defines times as unsigned so don't extend sign,
2757 2757                   * unless sysadmin set nfs_allow_preepoch_time.
2758 2758                   */
2759 2759                  NFS_TIME_T_CONVERT(vap->va_mtime.tv_sec, sa->sa_mtime.tv_sec);
2760 2760                  vap->va_mtime.tv_nsec = (uint32_t)(sa->sa_mtime.tv_usec * 1000);
2761 2761          }
2762 2762          return (0);
2763 2763  }
2764 2764  
2765 2765  static enum nfsftype vt_to_nf[] = {
2766 2766          0, NFREG, NFDIR, NFBLK, NFCHR, NFLNK, 0, 0, 0, NFSOC, 0
2767 2767  };
2768 2768  
2769 2769  /*
2770 2770   * check the following fields for overflow: nodeid, size, and time.
2771 2771   * There could be a problem when converting 64-bit LP64 fields
2772 2772   * into 32-bit ones.  Return an error if there is an overflow.
2773 2773   */
2774 2774  int
2775 2775  vattr_to_nattr(struct vattr *vap, struct nfsfattr *na)
2776 2776  {
2777 2777          ASSERT(vap->va_type >= VNON && vap->va_type <= VBAD);
2778 2778          na->na_type = vt_to_nf[vap->va_type];
2779 2779  
2780 2780          if (vap->va_mode == (unsigned short) -1)
2781 2781                  na->na_mode = (uint32_t)-1;
2782 2782          else
2783 2783                  na->na_mode = VTTOIF(vap->va_type) | vap->va_mode;
2784 2784  
2785 2785          if (vap->va_uid == (unsigned short)(-1))
2786 2786                  na->na_uid = (uint32_t)(-1);
2787 2787          else if (vap->va_uid == UID_NOBODY)
2788 2788                  na->na_uid = (uint32_t)NFS_UID_NOBODY;
2789 2789          else
2790 2790                  na->na_uid = vap->va_uid;
2791 2791  
2792 2792          if (vap->va_gid == (unsigned short)(-1))
2793 2793                  na->na_gid = (uint32_t)-1;
2794 2794          else if (vap->va_gid == GID_NOBODY)
2795 2795                  na->na_gid = (uint32_t)NFS_GID_NOBODY;
2796 2796          else
2797 2797                  na->na_gid = vap->va_gid;
2798 2798  
2799 2799          /*
2800 2800           * Do we need to check fsid for overflow?  It is 64-bit in the
2801 2801           * vattr, but are bigger than 32 bit values supported?
2802 2802           */
2803 2803          na->na_fsid = vap->va_fsid;
2804 2804  
2805 2805          na->na_nodeid = vap->va_nodeid;
2806 2806  
2807 2807          /*
2808 2808           * Check to make sure that the nodeid is representable over the
2809 2809           * wire without losing bits.
2810 2810           */
2811 2811          if (vap->va_nodeid != (u_longlong_t)na->na_nodeid)
2812 2812                  return (EFBIG);
2813 2813          na->na_nlink = vap->va_nlink;
2814 2814  
2815 2815          /*
2816 2816           * Check for big files here, instead of at the caller.  See
2817 2817           * comments in cstat for large special file explanation.
2818 2818           */
2819 2819          if (vap->va_size > (u_longlong_t)MAXOFF32_T) {
2820 2820                  if ((vap->va_type == VREG) || (vap->va_type == VDIR))
2821 2821                          return (EFBIG);
2822 2822                  if ((vap->va_type == VBLK) || (vap->va_type == VCHR)) {
2823 2823                          /* UNKNOWN_SIZE | OVERFLOW */
2824 2824                          na->na_size = MAXOFF32_T;
2825 2825                  } else
2826 2826                          na->na_size = vap->va_size;
2827 2827          } else
2828 2828                  na->na_size = vap->va_size;
2829 2829  
2830 2830          /*
2831 2831           * If the vnode times overflow the 32-bit times that NFS2
2832 2832           * uses on the wire then return an error.
2833 2833           */
2834 2834          if (!NFS_VAP_TIME_OK(vap)) {
2835 2835                  return (EOVERFLOW);
2836 2836          }
2837 2837          na->na_atime.tv_sec = vap->va_atime.tv_sec;
2838 2838          na->na_atime.tv_usec = vap->va_atime.tv_nsec / 1000;
2839 2839  
2840 2840          na->na_mtime.tv_sec = vap->va_mtime.tv_sec;
2841 2841          na->na_mtime.tv_usec = vap->va_mtime.tv_nsec / 1000;
2842 2842  
2843 2843          na->na_ctime.tv_sec = vap->va_ctime.tv_sec;
2844 2844          na->na_ctime.tv_usec = vap->va_ctime.tv_nsec / 1000;
2845 2845  
2846 2846          /*
2847 2847           * If the dev_t will fit into 16 bits then compress
2848 2848           * it, otherwise leave it alone. See comments in
2849 2849           * nfs_client.c.
2850 2850           */
2851 2851          if (getminor(vap->va_rdev) <= SO4_MAXMIN &&
2852 2852              getmajor(vap->va_rdev) <= SO4_MAXMAJ)
2853 2853                  na->na_rdev = nfsv2_cmpdev(vap->va_rdev);
2854 2854          else
2855 2855                  (void) cmpldev(&na->na_rdev, vap->va_rdev);
2856 2856  
2857 2857          na->na_blocks = vap->va_nblocks;
2858 2858          na->na_blocksize = vap->va_blksize;
2859 2859  
2860 2860          /*
2861 2861           * This bit of ugliness is a *TEMPORARY* hack to preserve the
2862 2862           * over-the-wire protocols for named-pipe vnodes.  It remaps the
2863 2863           * VFIFO type to the special over-the-wire type. (see note in nfs.h)
2864 2864           *
2865 2865           * BUYER BEWARE:
2866 2866           *  If you are porting the NFS to a non-Sun server, you probably
2867 2867           *  don't want to include the following block of code.  The
2868 2868           *  over-the-wire special file types will be changing with the
2869 2869           *  NFS Protocol Revision.
2870 2870           */
2871 2871          if (vap->va_type == VFIFO)
2872 2872                  NA_SETFIFO(na);
2873 2873          return (0);
2874 2874  }
2875 2875  
2876 2876  /*
2877 2877   * acl v2 support: returns approximate permission.
2878 2878   *      default: returns minimal permission (more restrictive)
2879 2879   *      aclok: returns maximal permission (less restrictive)
2880 2880   *      This routine changes the permissions that are alaredy in *va.
2881 2881   *      If a file has minimal ACL, i.e. aclcnt == MIN_ACL_ENTRIES,
2882 2882   *      CLASS_OBJ is always the same as GROUP_OBJ entry.
2883 2883   */
2884 2884  static void
2885 2885  acl_perm(struct vnode *vp, struct exportinfo *exi, struct vattr *va, cred_t *cr)
2886 2886  {
2887 2887          vsecattr_t      vsa;
2888 2888          int             aclcnt;
2889 2889          aclent_t        *aclentp;
2890 2890          mode_t          mask_perm;
2891 2891          mode_t          grp_perm;
2892 2892          mode_t          other_perm;
2893 2893          mode_t          other_orig;
2894 2894          int             error;
2895 2895  
2896 2896          /* dont care default acl */
2897 2897          vsa.vsa_mask = (VSA_ACL | VSA_ACLCNT);
2898 2898          error = VOP_GETSECATTR(vp, &vsa, 0, cr, NULL);
2899 2899  
2900 2900          if (!error) {
2901 2901                  aclcnt = vsa.vsa_aclcnt;
2902 2902                  if (aclcnt > MIN_ACL_ENTRIES) {
2903 2903                          /* non-trivial ACL */
2904 2904                          aclentp = vsa.vsa_aclentp;
2905 2905                          if (exi->exi_export.ex_flags & EX_ACLOK) {
2906 2906                                  /* maximal permissions */
2907 2907                                  grp_perm = 0;
2908 2908                                  other_perm = 0;
2909 2909                                  for (; aclcnt > 0; aclcnt--, aclentp++) {
2910 2910                                          switch (aclentp->a_type) {
2911 2911                                          case USER_OBJ:
2912 2912                                                  break;
2913 2913                                          case USER:
2914 2914                                                  grp_perm |=
2915 2915                                                      aclentp->a_perm << 3;
2916 2916                                                  other_perm |= aclentp->a_perm;
2917 2917                                                  break;
2918 2918                                          case GROUP_OBJ:
2919 2919                                                  grp_perm |=
2920 2920                                                      aclentp->a_perm << 3;
2921 2921                                                  break;
2922 2922                                          case GROUP:
2923 2923                                                  other_perm |= aclentp->a_perm;
2924 2924                                                  break;
2925 2925                                          case OTHER_OBJ:
2926 2926                                                  other_orig = aclentp->a_perm;
2927 2927                                                  break;
2928 2928                                          case CLASS_OBJ:
2929 2929                                                  mask_perm = aclentp->a_perm;
2930 2930                                                  break;
2931 2931                                          default:
2932 2932                                                  break;
2933 2933                                          }
2934 2934                                  }
2935 2935                                  grp_perm &= mask_perm << 3;
2936 2936                                  other_perm &= mask_perm;
2937 2937                                  other_perm |= other_orig;
2938 2938  
2939 2939                          } else {
2940 2940                                  /* minimal permissions */
2941 2941                                  grp_perm = 070;
2942 2942                                  other_perm = 07;
2943 2943                                  for (; aclcnt > 0; aclcnt--, aclentp++) {
2944 2944                                          switch (aclentp->a_type) {
2945 2945                                          case USER_OBJ:
2946 2946                                                  break;
2947 2947                                          case USER:
2948 2948                                          case CLASS_OBJ:
2949 2949                                                  grp_perm &=
2950 2950                                                      aclentp->a_perm << 3;
2951 2951                                                  other_perm &=
2952 2952                                                      aclentp->a_perm;
2953 2953                                                  break;
2954 2954                                          case GROUP_OBJ:
2955 2955                                                  grp_perm &=
2956 2956                                                      aclentp->a_perm << 3;
2957 2957                                                  break;
2958 2958                                          case GROUP:
2959 2959                                                  other_perm &=
2960 2960                                                      aclentp->a_perm;
2961 2961                                                  break;
2962 2962                                          case OTHER_OBJ:
2963 2963                                                  other_perm &=
2964 2964                                                      aclentp->a_perm;
2965 2965                                                  break;
2966 2966                                          default:
2967 2967                                                  break;
2968 2968                                          }
2969 2969                                  }
2970 2970                          }
2971 2971                          /* copy to va */
2972 2972                          va->va_mode &= ~077;
2973 2973                          va->va_mode |= grp_perm | other_perm;
2974 2974                  }
2975 2975                  if (vsa.vsa_aclcnt)
2976 2976                          kmem_free(vsa.vsa_aclentp,
2977 2977                              vsa.vsa_aclcnt * sizeof (aclent_t));
2978 2978          }
2979 2979  }
2980 2980  
2981 2981  void
2982 2982  rfs_srvrinit(void)
2983 2983  {
2984 2984          mutex_init(&rfs_async_write_lock, NULL, MUTEX_DEFAULT, NULL);
2985 2985          nfs2_srv_caller_id = fs_new_caller_id();
2986 2986  }
2987 2987  
2988 2988  void
2989 2989  rfs_srvrfini(void)
2990 2990  {
2991 2991          mutex_destroy(&rfs_async_write_lock);
2992 2992  }
2993 2993  
2994 2994  static int
2995 2995  rdma_setup_read_data2(struct nfsreadargs *ra, struct nfsrdresult *rr)
2996 2996  {
2997 2997          struct clist    *wcl;
2998 2998          int             wlist_len;
2999 2999          uint32_t        count = rr->rr_count;
3000 3000  
3001 3001          wcl = ra->ra_wlist;
3002 3002  
3003 3003          if (rdma_setup_read_chunks(wcl, count, &wlist_len) == FALSE) {
3004 3004                  return (FALSE);
3005 3005          }
3006 3006  
3007 3007          wcl = ra->ra_wlist;
3008 3008          rr->rr_ok.rrok_wlist_len = wlist_len;
3009 3009          rr->rr_ok.rrok_wlist = wcl;
3010 3010  
3011 3011          return (TRUE);
3012 3012  }

↓ open down ↓

803 lines elided

↑ open up ↑

XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX