illumos-gate Wdiff usr/src/uts/common/fs/vnode.c

Print this page

basic fsh prototype (no comments yet)

Split	Close
Expand all
Collapse all

          --- old/usr/src/uts/common/fs/vnode.c
          +++ new/usr/src/uts/common/fs/vnode.c

   1    1  /*
   2    2   * CDDL HEADER START
   3    3   *
   4    4   * The contents of this file are subject to the terms of the
   5    5   * Common Development and Distribution License (the "License").
   6    6   * You may not use this file except in compliance with the License.
   7    7   *
   8    8   * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9    9   * or http://www.opensolaris.org/os/licensing.
  10   10   * See the License for the specific language governing permissions
  11   11   * and limitations under the License.
  12   12   *
  13   13   * When distributing Covered Code, include this CDDL HEADER in each
  14   14   * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15   15   * If applicable, add the following below this CDDL HEADER, with the
  16   16   * fields enclosed by brackets "[]" replaced with your own identifying
  17   17   * information: Portions Copyright [yyyy] [name of copyright owner]
  18   18   *
  19   19   * CDDL HEADER END
  20   20   */
  21   21  
  22   22  /*
  23   23   * Copyright (c) 1988, 2010, Oracle and/or its affiliates. All rights reserved.
  24   24   */
  25   25  
  26   26  /*      Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T     */
  27   27  /*        All Rights Reserved   */
  28   28  
  29   29  /*
  30   30   * University Copyright- Copyright (c) 1982, 1986, 1988
  31   31   * The Regents of the University of California
  32   32   * All Rights Reserved
  33   33   *
  34   34   * University Acknowledgment- Portions of this document are derived from
  35   35   * software developed by the University of California, Berkeley, and its
  36   36   * contributors.
  37   37   */
  38   38  
  39   39  #include <sys/types.h>
  40   40  #include <sys/param.h>
  41   41  #include <sys/t_lock.h>
  42   42  #include <sys/errno.h>
  43   43  #include <sys/cred.h>
  44   44  #include <sys/user.h>
  45   45  #include <sys/uio.h>
  46   46  #include <sys/file.h>
  47   47  #include <sys/pathname.h>
  48   48  #include <sys/vfs.h>
  49   49  #include <sys/vfs_opreg.h>
  50   50  #include <sys/vnode.h>
  51   51  #include <sys/rwstlock.h>
  52   52  #include <sys/fem.h>
  53   53  #include <sys/stat.h>
  54   54  #include <sys/mode.h>
  55   55  #include <sys/conf.h>
  56   56  #include <sys/sysmacros.h>
  57   57  #include <sys/cmn_err.h>

↓ open down ↓

57 lines elided

↑ open up ↑

  58   58  #include <sys/systm.h>
  59   59  #include <sys/kmem.h>
  60   60  #include <sys/debug.h>
  61   61  #include <c2/audit.h>
  62   62  #include <sys/acl.h>
  63   63  #include <sys/nbmlock.h>
  64   64  #include <sys/fcntl.h>
  65   65  #include <fs/fs_subr.h>
  66   66  #include <sys/taskq.h>
  67   67  #include <fs/fs_reparse.h>
       68 +#include <sys/fsh_impl.h>
  68   69  
  69   70  /* Determine if this vnode is a file that is read-only */
  70   71  #define ISROFILE(vp)    \
  71   72          ((vp)->v_type != VCHR && (vp)->v_type != VBLK && \
  72   73              (vp)->v_type != VFIFO && vn_is_readonly(vp))
  73   74  
  74   75  /* Tunable via /etc/system; used only by admin/install */
  75   76  int nfs_global_client_only;
  76   77  
  77   78  /*

  78   79   * Array of vopstats_t for per-FS-type vopstats.  This array has the same
  79   80   * number of entries as and parallel to the vfssw table.  (Arguably, it could
  80   81   * be part of the vfssw table.)  Once it's initialized, it's accessed using
  81   82   * the same fstype index that is used to index into the vfssw table.
  82   83   */
  83   84  vopstats_t **vopstats_fstype;
  84   85  
  85   86  /* vopstats initialization template used for fast initialization via bcopy() */
  86   87  static vopstats_t *vs_templatep;
  87   88  
  88   89  /* Kmem cache handle for vsk_anchor_t allocations */
  89   90  kmem_cache_t *vsk_anchor_cache;
  90   91  
  91   92  /* file events cleanup routine */
  92   93  extern void free_fopdata(vnode_t *);
  93   94  
  94   95  /*
  95   96   * Root of AVL tree for the kstats associated with vopstats.  Lock protects
  96   97   * updates to vsktat_tree.
  97   98   */
  98   99  avl_tree_t      vskstat_tree;
  99  100  kmutex_t        vskstat_tree_lock;
 100  101  
 101  102  /* Global variable which enables/disables the vopstats collection */
 102  103  int vopstats_enabled = 1;
 103  104  
 104  105  /*
 105  106   * forward declarations for internal vnode specific data (vsd)
 106  107   */
 107  108  static void *vsd_realloc(void *, size_t, size_t);
 108  109  
 109  110  /*
 110  111   * forward declarations for reparse point functions
 111  112   */
 112  113  static int fs_reparse_mark(char *target, vattr_t *vap, xvattr_t *xvattr);
 113  114  
 114  115  /*
 115  116   * VSD -- VNODE SPECIFIC DATA
 116  117   * The v_data pointer is typically used by a file system to store a
 117  118   * pointer to the file system's private node (e.g. ufs inode, nfs rnode).
 118  119   * However, there are times when additional project private data needs
 119  120   * to be stored separately from the data (node) pointed to by v_data.
 120  121   * This additional data could be stored by the file system itself or
 121  122   * by a completely different kernel entity.  VSD provides a way for
 122  123   * callers to obtain a key and store a pointer to private data associated
 123  124   * with a vnode.
 124  125   *
 125  126   * Callers are responsible for protecting the vsd by holding v_vsd_lock
 126  127   * for calls to vsd_set() and vsd_get().
 127  128   */
 128  129  
 129  130  /*
 130  131   * vsd_lock protects:
 131  132   *   vsd_nkeys - creation and deletion of vsd keys
 132  133   *   vsd_list - insertion and deletion of vsd_node in the vsd_list
 133  134   *   vsd_destructor - adding and removing destructors to the list
 134  135   */
 135  136  static kmutex_t         vsd_lock;
 136  137  static uint_t           vsd_nkeys;       /* size of destructor array */
 137  138  /* list of vsd_node's */
 138  139  static list_t *vsd_list = NULL;
 139  140  /* per-key destructor funcs */
 140  141  static void             (**vsd_destructor)(void *);
 141  142  
 142  143  /*
 143  144   * The following is the common set of actions needed to update the
 144  145   * vopstats structure from a vnode op.  Both VOPSTATS_UPDATE() and
 145  146   * VOPSTATS_UPDATE_IO() do almost the same thing, except for the
 146  147   * recording of the bytes transferred.  Since the code is similar
 147  148   * but small, it is nearly a duplicate.  Consequently any changes
 148  149   * to one may need to be reflected in the other.
 149  150   * Rundown of the variables:
 150  151   * vp - Pointer to the vnode
 151  152   * counter - Partial name structure member to update in vopstats for counts
 152  153   * bytecounter - Partial name structure member to update in vopstats for bytes
 153  154   * bytesval - Value to update in vopstats for bytes
 154  155   * fstype - Index into vsanchor_fstype[], same as index into vfssw[]
 155  156   * vsp - Pointer to vopstats structure (either in vfs or vsanchor_fstype[i])
 156  157   */
 157  158  
 158  159  #define VOPSTATS_UPDATE(vp, counter) {                                  \
 159  160          vfs_t *vfsp = (vp)->v_vfsp;                                     \
 160  161          if (vfsp && vfsp->vfs_implp &&                                  \
 161  162              (vfsp->vfs_flag & VFS_STATS) && (vp)->v_type != VBAD) {     \
 162  163                  vopstats_t *vsp = &vfsp->vfs_vopstats;                  \
 163  164                  uint64_t *stataddr = &(vsp->n##counter.value.ui64);     \
 164  165                  extern void __dtrace_probe___fsinfo_##counter(vnode_t *, \
 165  166                      size_t, uint64_t *);                                \
 166  167                  __dtrace_probe___fsinfo_##counter(vp, 0, stataddr);     \
 167  168                  (*stataddr)++;                                          \
 168  169                  if ((vsp = vfsp->vfs_fstypevsp) != NULL) {              \
 169  170                          vsp->n##counter.value.ui64++;                   \
 170  171                  }                                                       \
 171  172          }                                                               \
 172  173  }
 173  174  
 174  175  #define VOPSTATS_UPDATE_IO(vp, counter, bytecounter, bytesval) {        \
 175  176          vfs_t *vfsp = (vp)->v_vfsp;                                     \
 176  177          if (vfsp && vfsp->vfs_implp &&                                  \
 177  178              (vfsp->vfs_flag & VFS_STATS) && (vp)->v_type != VBAD) {     \
 178  179                  vopstats_t *vsp = &vfsp->vfs_vopstats;                  \
 179  180                  uint64_t *stataddr = &(vsp->n##counter.value.ui64);     \
 180  181                  extern void __dtrace_probe___fsinfo_##counter(vnode_t *, \
 181  182                      size_t, uint64_t *);                                \
 182  183                  __dtrace_probe___fsinfo_##counter(vp, bytesval, stataddr); \
 183  184                  (*stataddr)++;                                          \
 184  185                  vsp->bytecounter.value.ui64 += bytesval;                \
 185  186                  if ((vsp = vfsp->vfs_fstypevsp) != NULL) {              \
 186  187                          vsp->n##counter.value.ui64++;                   \
 187  188                          vsp->bytecounter.value.ui64 += bytesval;        \
 188  189                  }                                                       \
 189  190          }                                                               \
 190  191  }
 191  192  
 192  193  /*
 193  194   * If the filesystem does not support XIDs map credential
 194  195   * If the vfsp is NULL, perhaps we should also map?
 195  196   */
 196  197  #define VOPXID_MAP_CR(vp, cr)   {                                       \
 197  198          vfs_t *vfsp = (vp)->v_vfsp;                                     \
 198  199          if (vfsp != NULL && (vfsp->vfs_flag & VFS_XID) == 0)            \
 199  200                  cr = crgetmapped(cr);                                   \
 200  201          }
 201  202  
 202  203  /*
 203  204   * Convert stat(2) formats to vnode types and vice versa.  (Knows about
 204  205   * numerical order of S_IFMT and vnode types.)
 205  206   */
 206  207  enum vtype iftovt_tab[] = {
 207  208          VNON, VFIFO, VCHR, VNON, VDIR, VNON, VBLK, VNON,
 208  209          VREG, VNON, VLNK, VNON, VSOCK, VNON, VNON, VNON
 209  210  };
 210  211  
 211  212  ushort_t vttoif_tab[] = {
 212  213          0, S_IFREG, S_IFDIR, S_IFBLK, S_IFCHR, S_IFLNK, S_IFIFO,
 213  214          S_IFDOOR, 0, S_IFSOCK, S_IFPORT, 0
 214  215  };
 215  216  
 216  217  /*
 217  218   * The system vnode cache.
 218  219   */
 219  220  
 220  221  kmem_cache_t *vn_cache;
 221  222  
 222  223  
 223  224  /*
 224  225   * Vnode operations vector.
 225  226   */
 226  227  
 227  228  static const fs_operation_trans_def_t vn_ops_table[] = {
 228  229          VOPNAME_OPEN, offsetof(struct vnodeops, vop_open),
 229  230              fs_nosys, fs_nosys,
 230  231  
 231  232          VOPNAME_CLOSE, offsetof(struct vnodeops, vop_close),
 232  233              fs_nosys, fs_nosys,
 233  234  
 234  235          VOPNAME_READ, offsetof(struct vnodeops, vop_read),
 235  236              fs_nosys, fs_nosys,
 236  237  
 237  238          VOPNAME_WRITE, offsetof(struct vnodeops, vop_write),
 238  239              fs_nosys, fs_nosys,
 239  240  
 240  241          VOPNAME_IOCTL, offsetof(struct vnodeops, vop_ioctl),
 241  242              fs_nosys, fs_nosys,
 242  243  
 243  244          VOPNAME_SETFL, offsetof(struct vnodeops, vop_setfl),
 244  245              fs_setfl, fs_nosys,
 245  246  
 246  247          VOPNAME_GETATTR, offsetof(struct vnodeops, vop_getattr),
 247  248              fs_nosys, fs_nosys,
 248  249  
 249  250          VOPNAME_SETATTR, offsetof(struct vnodeops, vop_setattr),
 250  251              fs_nosys, fs_nosys,
 251  252  
 252  253          VOPNAME_ACCESS, offsetof(struct vnodeops, vop_access),
 253  254              fs_nosys, fs_nosys,
 254  255  
 255  256          VOPNAME_LOOKUP, offsetof(struct vnodeops, vop_lookup),
 256  257              fs_nosys, fs_nosys,
 257  258  
 258  259          VOPNAME_CREATE, offsetof(struct vnodeops, vop_create),
 259  260              fs_nosys, fs_nosys,
 260  261  
 261  262          VOPNAME_REMOVE, offsetof(struct vnodeops, vop_remove),
 262  263              fs_nosys, fs_nosys,
 263  264  
 264  265          VOPNAME_LINK, offsetof(struct vnodeops, vop_link),
 265  266              fs_nosys, fs_nosys,
 266  267  
 267  268          VOPNAME_RENAME, offsetof(struct vnodeops, vop_rename),
 268  269              fs_nosys, fs_nosys,
 269  270  
 270  271          VOPNAME_MKDIR, offsetof(struct vnodeops, vop_mkdir),
 271  272              fs_nosys, fs_nosys,
 272  273  
 273  274          VOPNAME_RMDIR, offsetof(struct vnodeops, vop_rmdir),
 274  275              fs_nosys, fs_nosys,
 275  276  
 276  277          VOPNAME_READDIR, offsetof(struct vnodeops, vop_readdir),
 277  278              fs_nosys, fs_nosys,
 278  279  
 279  280          VOPNAME_SYMLINK, offsetof(struct vnodeops, vop_symlink),
 280  281              fs_nosys, fs_nosys,
 281  282  
 282  283          VOPNAME_READLINK, offsetof(struct vnodeops, vop_readlink),
 283  284              fs_nosys, fs_nosys,
 284  285  
 285  286          VOPNAME_FSYNC, offsetof(struct vnodeops, vop_fsync),
 286  287              fs_nosys, fs_nosys,
 287  288  
 288  289          VOPNAME_INACTIVE, offsetof(struct vnodeops, vop_inactive),
 289  290              fs_nosys, fs_nosys,
 290  291  
 291  292          VOPNAME_FID, offsetof(struct vnodeops, vop_fid),
 292  293              fs_nosys, fs_nosys,
 293  294  
 294  295          VOPNAME_RWLOCK, offsetof(struct vnodeops, vop_rwlock),
 295  296              fs_rwlock, fs_rwlock,
 296  297  
 297  298          VOPNAME_RWUNLOCK, offsetof(struct vnodeops, vop_rwunlock),
 298  299              (fs_generic_func_p) fs_rwunlock,
 299  300              (fs_generic_func_p) fs_rwunlock,    /* no errors allowed */
 300  301  
 301  302          VOPNAME_SEEK, offsetof(struct vnodeops, vop_seek),
 302  303              fs_nosys, fs_nosys,
 303  304  
 304  305          VOPNAME_CMP, offsetof(struct vnodeops, vop_cmp),
 305  306              fs_cmp, fs_cmp,             /* no errors allowed */
 306  307  
 307  308          VOPNAME_FRLOCK, offsetof(struct vnodeops, vop_frlock),
 308  309              fs_frlock, fs_nosys,
 309  310  
 310  311          VOPNAME_SPACE, offsetof(struct vnodeops, vop_space),
 311  312              fs_nosys, fs_nosys,
 312  313  
 313  314          VOPNAME_REALVP, offsetof(struct vnodeops, vop_realvp),
 314  315              fs_nosys, fs_nosys,
 315  316  
 316  317          VOPNAME_GETPAGE, offsetof(struct vnodeops, vop_getpage),
 317  318              fs_nosys, fs_nosys,
 318  319  
 319  320          VOPNAME_PUTPAGE, offsetof(struct vnodeops, vop_putpage),
 320  321              fs_nosys, fs_nosys,
 321  322  
 322  323          VOPNAME_MAP, offsetof(struct vnodeops, vop_map),
 323  324              (fs_generic_func_p) fs_nosys_map,
 324  325              (fs_generic_func_p) fs_nosys_map,
 325  326  
 326  327          VOPNAME_ADDMAP, offsetof(struct vnodeops, vop_addmap),
 327  328              (fs_generic_func_p) fs_nosys_addmap,
 328  329              (fs_generic_func_p) fs_nosys_addmap,
 329  330  
 330  331          VOPNAME_DELMAP, offsetof(struct vnodeops, vop_delmap),
 331  332              fs_nosys, fs_nosys,
 332  333  
 333  334          VOPNAME_POLL, offsetof(struct vnodeops, vop_poll),
 334  335              (fs_generic_func_p) fs_poll, (fs_generic_func_p) fs_nosys_poll,
 335  336  
 336  337          VOPNAME_DUMP, offsetof(struct vnodeops, vop_dump),
 337  338              fs_nosys, fs_nosys,
 338  339  
 339  340          VOPNAME_PATHCONF, offsetof(struct vnodeops, vop_pathconf),
 340  341              fs_pathconf, fs_nosys,
 341  342  
 342  343          VOPNAME_PAGEIO, offsetof(struct vnodeops, vop_pageio),
 343  344              fs_nosys, fs_nosys,
 344  345  
 345  346          VOPNAME_DUMPCTL, offsetof(struct vnodeops, vop_dumpctl),
 346  347              fs_nosys, fs_nosys,
 347  348  
 348  349          VOPNAME_DISPOSE, offsetof(struct vnodeops, vop_dispose),
 349  350              (fs_generic_func_p) fs_dispose,
 350  351              (fs_generic_func_p) fs_nodispose,
 351  352  
 352  353          VOPNAME_SETSECATTR, offsetof(struct vnodeops, vop_setsecattr),
 353  354              fs_nosys, fs_nosys,
 354  355  
 355  356          VOPNAME_GETSECATTR, offsetof(struct vnodeops, vop_getsecattr),
 356  357              fs_fab_acl, fs_nosys,
 357  358  
 358  359          VOPNAME_SHRLOCK, offsetof(struct vnodeops, vop_shrlock),
 359  360              fs_shrlock, fs_nosys,
 360  361  
 361  362          VOPNAME_VNEVENT, offsetof(struct vnodeops, vop_vnevent),
 362  363              (fs_generic_func_p) fs_vnevent_nosupport,
 363  364              (fs_generic_func_p) fs_vnevent_nosupport,
 364  365  
 365  366          VOPNAME_REQZCBUF, offsetof(struct vnodeops, vop_reqzcbuf),
 366  367              fs_nosys, fs_nosys,
 367  368  
 368  369          VOPNAME_RETZCBUF, offsetof(struct vnodeops, vop_retzcbuf),
 369  370              fs_nosys, fs_nosys,
 370  371  
 371  372          NULL, 0, NULL, NULL
 372  373  };
 373  374  
 374  375  /* Extensible attribute (xva) routines. */
 375  376  
 376  377  /*
 377  378   * Zero out the structure, set the size of the requested/returned bitmaps,
 378  379   * set AT_XVATTR in the embedded vattr_t's va_mask, and set up the pointer
 379  380   * to the returned attributes array.
 380  381   */
 381  382  void
 382  383  xva_init(xvattr_t *xvap)
 383  384  {
 384  385          bzero(xvap, sizeof (xvattr_t));
 385  386          xvap->xva_mapsize = XVA_MAPSIZE;
 386  387          xvap->xva_magic = XVA_MAGIC;
 387  388          xvap->xva_vattr.va_mask = AT_XVATTR;
 388  389          xvap->xva_rtnattrmapp = &(xvap->xva_rtnattrmap)[0];
 389  390  }
 390  391  
 391  392  /*
 392  393   * If AT_XVATTR is set, returns a pointer to the embedded xoptattr_t
 393  394   * structure.  Otherwise, returns NULL.
 394  395   */
 395  396  xoptattr_t *
 396  397  xva_getxoptattr(xvattr_t *xvap)
 397  398  {
 398  399          xoptattr_t *xoap = NULL;
 399  400          if (xvap->xva_vattr.va_mask & AT_XVATTR)
 400  401                  xoap = &xvap->xva_xoptattrs;
 401  402          return (xoap);
 402  403  }
 403  404  
 404  405  /*
 405  406   * Used by the AVL routines to compare two vsk_anchor_t structures in the tree.
 406  407   * We use the f_fsid reported by VFS_STATVFS() since we use that for the
 407  408   * kstat name.
 408  409   */
 409  410  static int
 410  411  vska_compar(const void *n1, const void *n2)
 411  412  {
 412  413          int ret;
 413  414          ulong_t p1 = ((vsk_anchor_t *)n1)->vsk_fsid;
 414  415          ulong_t p2 = ((vsk_anchor_t *)n2)->vsk_fsid;
 415  416  
 416  417          if (p1 < p2) {
 417  418                  ret = -1;
 418  419          } else if (p1 > p2) {
 419  420                  ret = 1;
 420  421          } else {
 421  422                  ret = 0;
 422  423          }
 423  424  
 424  425          return (ret);
 425  426  }
 426  427  
 427  428  /*
 428  429   * Used to create a single template which will be bcopy()ed to a newly
 429  430   * allocated vsanchor_combo_t structure in new_vsanchor(), below.
 430  431   */
 431  432  static vopstats_t *
 432  433  create_vopstats_template()
 433  434  {
 434  435          vopstats_t              *vsp;
 435  436  
 436  437          vsp = kmem_alloc(sizeof (vopstats_t), KM_SLEEP);
 437  438          bzero(vsp, sizeof (*vsp));      /* Start fresh */
 438  439  
 439  440          /* VOP_OPEN */
 440  441          kstat_named_init(&vsp->nopen, "nopen", KSTAT_DATA_UINT64);
 441  442          /* VOP_CLOSE */
 442  443          kstat_named_init(&vsp->nclose, "nclose", KSTAT_DATA_UINT64);
 443  444          /* VOP_READ I/O */
 444  445          kstat_named_init(&vsp->nread, "nread", KSTAT_DATA_UINT64);
 445  446          kstat_named_init(&vsp->read_bytes, "read_bytes", KSTAT_DATA_UINT64);
 446  447          /* VOP_WRITE I/O */
 447  448          kstat_named_init(&vsp->nwrite, "nwrite", KSTAT_DATA_UINT64);
 448  449          kstat_named_init(&vsp->write_bytes, "write_bytes", KSTAT_DATA_UINT64);
 449  450          /* VOP_IOCTL */
 450  451          kstat_named_init(&vsp->nioctl, "nioctl", KSTAT_DATA_UINT64);
 451  452          /* VOP_SETFL */
 452  453          kstat_named_init(&vsp->nsetfl, "nsetfl", KSTAT_DATA_UINT64);
 453  454          /* VOP_GETATTR */
 454  455          kstat_named_init(&vsp->ngetattr, "ngetattr", KSTAT_DATA_UINT64);
 455  456          /* VOP_SETATTR */
 456  457          kstat_named_init(&vsp->nsetattr, "nsetattr", KSTAT_DATA_UINT64);
 457  458          /* VOP_ACCESS */
 458  459          kstat_named_init(&vsp->naccess, "naccess", KSTAT_DATA_UINT64);
 459  460          /* VOP_LOOKUP */
 460  461          kstat_named_init(&vsp->nlookup, "nlookup", KSTAT_DATA_UINT64);
 461  462          /* VOP_CREATE */
 462  463          kstat_named_init(&vsp->ncreate, "ncreate", KSTAT_DATA_UINT64);
 463  464          /* VOP_REMOVE */
 464  465          kstat_named_init(&vsp->nremove, "nremove", KSTAT_DATA_UINT64);
 465  466          /* VOP_LINK */
 466  467          kstat_named_init(&vsp->nlink, "nlink", KSTAT_DATA_UINT64);
 467  468          /* VOP_RENAME */
 468  469          kstat_named_init(&vsp->nrename, "nrename", KSTAT_DATA_UINT64);
 469  470          /* VOP_MKDIR */
 470  471          kstat_named_init(&vsp->nmkdir, "nmkdir", KSTAT_DATA_UINT64);
 471  472          /* VOP_RMDIR */
 472  473          kstat_named_init(&vsp->nrmdir, "nrmdir", KSTAT_DATA_UINT64);
 473  474          /* VOP_READDIR I/O */
 474  475          kstat_named_init(&vsp->nreaddir, "nreaddir", KSTAT_DATA_UINT64);
 475  476          kstat_named_init(&vsp->readdir_bytes, "readdir_bytes",
 476  477              KSTAT_DATA_UINT64);
 477  478          /* VOP_SYMLINK */
 478  479          kstat_named_init(&vsp->nsymlink, "nsymlink", KSTAT_DATA_UINT64);
 479  480          /* VOP_READLINK */
 480  481          kstat_named_init(&vsp->nreadlink, "nreadlink", KSTAT_DATA_UINT64);
 481  482          /* VOP_FSYNC */
 482  483          kstat_named_init(&vsp->nfsync, "nfsync", KSTAT_DATA_UINT64);
 483  484          /* VOP_INACTIVE */
 484  485          kstat_named_init(&vsp->ninactive, "ninactive", KSTAT_DATA_UINT64);
 485  486          /* VOP_FID */
 486  487          kstat_named_init(&vsp->nfid, "nfid", KSTAT_DATA_UINT64);
 487  488          /* VOP_RWLOCK */
 488  489          kstat_named_init(&vsp->nrwlock, "nrwlock", KSTAT_DATA_UINT64);
 489  490          /* VOP_RWUNLOCK */
 490  491          kstat_named_init(&vsp->nrwunlock, "nrwunlock", KSTAT_DATA_UINT64);
 491  492          /* VOP_SEEK */
 492  493          kstat_named_init(&vsp->nseek, "nseek", KSTAT_DATA_UINT64);
 493  494          /* VOP_CMP */
 494  495          kstat_named_init(&vsp->ncmp, "ncmp", KSTAT_DATA_UINT64);
 495  496          /* VOP_FRLOCK */
 496  497          kstat_named_init(&vsp->nfrlock, "nfrlock", KSTAT_DATA_UINT64);
 497  498          /* VOP_SPACE */
 498  499          kstat_named_init(&vsp->nspace, "nspace", KSTAT_DATA_UINT64);
 499  500          /* VOP_REALVP */
 500  501          kstat_named_init(&vsp->nrealvp, "nrealvp", KSTAT_DATA_UINT64);
 501  502          /* VOP_GETPAGE */
 502  503          kstat_named_init(&vsp->ngetpage, "ngetpage", KSTAT_DATA_UINT64);
 503  504          /* VOP_PUTPAGE */
 504  505          kstat_named_init(&vsp->nputpage, "nputpage", KSTAT_DATA_UINT64);
 505  506          /* VOP_MAP */
 506  507          kstat_named_init(&vsp->nmap, "nmap", KSTAT_DATA_UINT64);
 507  508          /* VOP_ADDMAP */
 508  509          kstat_named_init(&vsp->naddmap, "naddmap", KSTAT_DATA_UINT64);
 509  510          /* VOP_DELMAP */
 510  511          kstat_named_init(&vsp->ndelmap, "ndelmap", KSTAT_DATA_UINT64);
 511  512          /* VOP_POLL */
 512  513          kstat_named_init(&vsp->npoll, "npoll", KSTAT_DATA_UINT64);
 513  514          /* VOP_DUMP */
 514  515          kstat_named_init(&vsp->ndump, "ndump", KSTAT_DATA_UINT64);
 515  516          /* VOP_PATHCONF */
 516  517          kstat_named_init(&vsp->npathconf, "npathconf", KSTAT_DATA_UINT64);
 517  518          /* VOP_PAGEIO */
 518  519          kstat_named_init(&vsp->npageio, "npageio", KSTAT_DATA_UINT64);
 519  520          /* VOP_DUMPCTL */
 520  521          kstat_named_init(&vsp->ndumpctl, "ndumpctl", KSTAT_DATA_UINT64);
 521  522          /* VOP_DISPOSE */
 522  523          kstat_named_init(&vsp->ndispose, "ndispose", KSTAT_DATA_UINT64);
 523  524          /* VOP_SETSECATTR */
 524  525          kstat_named_init(&vsp->nsetsecattr, "nsetsecattr", KSTAT_DATA_UINT64);
 525  526          /* VOP_GETSECATTR */
 526  527          kstat_named_init(&vsp->ngetsecattr, "ngetsecattr", KSTAT_DATA_UINT64);
 527  528          /* VOP_SHRLOCK */
 528  529          kstat_named_init(&vsp->nshrlock, "nshrlock", KSTAT_DATA_UINT64);
 529  530          /* VOP_VNEVENT */
 530  531          kstat_named_init(&vsp->nvnevent, "nvnevent", KSTAT_DATA_UINT64);
 531  532          /* VOP_REQZCBUF */
 532  533          kstat_named_init(&vsp->nreqzcbuf, "nreqzcbuf", KSTAT_DATA_UINT64);
 533  534          /* VOP_RETZCBUF */
 534  535          kstat_named_init(&vsp->nretzcbuf, "nretzcbuf", KSTAT_DATA_UINT64);
 535  536  
 536  537          return (vsp);
 537  538  }
 538  539  
 539  540  /*
 540  541   * Creates a kstat structure associated with a vopstats structure.
 541  542   */
 542  543  kstat_t *
 543  544  new_vskstat(char *ksname, vopstats_t *vsp)
 544  545  {
 545  546          kstat_t         *ksp;
 546  547  
 547  548          if (!vopstats_enabled) {
 548  549                  return (NULL);
 549  550          }
 550  551  
 551  552          ksp = kstat_create("unix", 0, ksname, "misc", KSTAT_TYPE_NAMED,
 552  553              sizeof (vopstats_t)/sizeof (kstat_named_t),
 553  554              KSTAT_FLAG_VIRTUAL|KSTAT_FLAG_WRITABLE);
 554  555          if (ksp) {
 555  556                  ksp->ks_data = vsp;
 556  557                  kstat_install(ksp);
 557  558          }
 558  559  
 559  560          return (ksp);
 560  561  }
 561  562  
 562  563  /*
 563  564   * Called from vfsinit() to initialize the support mechanisms for vopstats
 564  565   */
 565  566  void
 566  567  vopstats_startup()
 567  568  {
 568  569          if (!vopstats_enabled)
 569  570                  return;
 570  571  
 571  572          /*
 572  573           * Creates the AVL tree which holds per-vfs vopstat anchors.  This
 573  574           * is necessary since we need to check if a kstat exists before we
 574  575           * attempt to create it.  Also, initialize its lock.
 575  576           */
 576  577          avl_create(&vskstat_tree, vska_compar, sizeof (vsk_anchor_t),
 577  578              offsetof(vsk_anchor_t, vsk_node));
 578  579          mutex_init(&vskstat_tree_lock, NULL, MUTEX_DEFAULT, NULL);
 579  580  
 580  581          vsk_anchor_cache = kmem_cache_create("vsk_anchor_cache",
 581  582              sizeof (vsk_anchor_t), sizeof (uintptr_t), NULL, NULL, NULL,
 582  583              NULL, NULL, 0);
 583  584  
 584  585          /*
 585  586           * Set up the array of pointers for the vopstats-by-FS-type.
 586  587           * The entries will be allocated/initialized as each file system
 587  588           * goes through modload/mod_installfs.
 588  589           */
 589  590          vopstats_fstype = (vopstats_t **)kmem_zalloc(
 590  591              (sizeof (vopstats_t *) * nfstype), KM_SLEEP);
 591  592  
 592  593          /* Set up the global vopstats initialization template */
 593  594          vs_templatep = create_vopstats_template();
 594  595  }
 595  596  
 596  597  /*
 597  598   * We need to have the all of the counters zeroed.
 598  599   * The initialization of the vopstats_t includes on the order of
 599  600   * 50 calls to kstat_named_init().  Rather that do that on every call,
 600  601   * we do it once in a template (vs_templatep) then bcopy it over.
 601  602   */
 602  603  void
 603  604  initialize_vopstats(vopstats_t *vsp)
 604  605  {
 605  606          if (vsp == NULL)
 606  607                  return;
 607  608  
 608  609          bcopy(vs_templatep, vsp, sizeof (vopstats_t));
 609  610  }
 610  611  
 611  612  /*
 612  613   * If possible, determine which vopstats by fstype to use and
 613  614   * return a pointer to the caller.
 614  615   */
 615  616  vopstats_t *
 616  617  get_fstype_vopstats(vfs_t *vfsp, struct vfssw *vswp)
 617  618  {
 618  619          int             fstype = 0;     /* Index into vfssw[] */
 619  620          vopstats_t      *vsp = NULL;
 620  621  
 621  622          if (vfsp == NULL || (vfsp->vfs_flag & VFS_STATS) == 0 ||
 622  623              !vopstats_enabled)
 623  624                  return (NULL);
 624  625          /*
 625  626           * Set up the fstype.  We go to so much trouble because all versions
 626  627           * of NFS use the same fstype in their vfs even though they have
 627  628           * distinct entries in the vfssw[] table.
 628  629           * NOTE: A special vfs (e.g., EIO_vfs) may not have an entry.
 629  630           */
 630  631          if (vswp) {
 631  632                  fstype = vswp - vfssw;  /* Gets us the index */
 632  633          } else {
 633  634                  fstype = vfsp->vfs_fstype;
 634  635          }
 635  636  
 636  637          /*
 637  638           * Point to the per-fstype vopstats. The only valid values are
 638  639           * non-zero positive values less than the number of vfssw[] table
 639  640           * entries.
 640  641           */
 641  642          if (fstype > 0 && fstype < nfstype) {
 642  643                  vsp = vopstats_fstype[fstype];
 643  644          }
 644  645  
 645  646          return (vsp);
 646  647  }
 647  648  
 648  649  /*
 649  650   * Generate a kstat name, create the kstat structure, and allocate a
 650  651   * vsk_anchor_t to hold it together.  Return the pointer to the vsk_anchor_t
 651  652   * to the caller.  This must only be called from a mount.
 652  653   */
 653  654  vsk_anchor_t *
 654  655  get_vskstat_anchor(vfs_t *vfsp)
 655  656  {
 656  657          char            kstatstr[KSTAT_STRLEN]; /* kstat name for vopstats */
 657  658          statvfs64_t     statvfsbuf;             /* Needed to find f_fsid */
 658  659          vsk_anchor_t    *vskp = NULL;           /* vfs <--> kstat anchor */
 659  660          kstat_t         *ksp;                   /* Ptr to new kstat */
 660  661          avl_index_t     where;                  /* Location in the AVL tree */
 661  662  
 662  663          if (vfsp == NULL || vfsp->vfs_implp == NULL ||
 663  664              (vfsp->vfs_flag & VFS_STATS) == 0 || !vopstats_enabled)
 664  665                  return (NULL);
 665  666  
 666  667          /* Need to get the fsid to build a kstat name */
 667  668          if (VFS_STATVFS(vfsp, &statvfsbuf) == 0) {
 668  669                  /* Create a name for our kstats based on fsid */
 669  670                  (void) snprintf(kstatstr, KSTAT_STRLEN, "%s%lx",
 670  671                      VOPSTATS_STR, statvfsbuf.f_fsid);
 671  672  
 672  673                  /* Allocate and initialize the vsk_anchor_t */
 673  674                  vskp = kmem_cache_alloc(vsk_anchor_cache, KM_SLEEP);
 674  675                  bzero(vskp, sizeof (*vskp));
 675  676                  vskp->vsk_fsid = statvfsbuf.f_fsid;
 676  677  
 677  678                  mutex_enter(&vskstat_tree_lock);
 678  679                  if (avl_find(&vskstat_tree, vskp, &where) == NULL) {
 679  680                          avl_insert(&vskstat_tree, vskp, where);
 680  681                          mutex_exit(&vskstat_tree_lock);
 681  682  
 682  683                          /*
 683  684                           * Now that we've got the anchor in the AVL
 684  685                           * tree, we can create the kstat.
 685  686                           */
 686  687                          ksp = new_vskstat(kstatstr, &vfsp->vfs_vopstats);
 687  688                          if (ksp) {
 688  689                                  vskp->vsk_ksp = ksp;
 689  690                          }
 690  691                  } else {
 691  692                          /* Oops, found one! Release memory and lock. */
 692  693                          mutex_exit(&vskstat_tree_lock);
 693  694                          kmem_cache_free(vsk_anchor_cache, vskp);
 694  695                          vskp = NULL;
 695  696                  }
 696  697          }
 697  698          return (vskp);
 698  699  }
 699  700  
 700  701  /*
 701  702   * We're in the process of tearing down the vfs and need to cleanup
 702  703   * the data structures associated with the vopstats. Must only be called
 703  704   * from dounmount().
 704  705   */
 705  706  void
 706  707  teardown_vopstats(vfs_t *vfsp)
 707  708  {
 708  709          vsk_anchor_t    *vskap;
 709  710          avl_index_t     where;
 710  711  
 711  712          if (vfsp == NULL || vfsp->vfs_implp == NULL ||
 712  713              (vfsp->vfs_flag & VFS_STATS) == 0 || !vopstats_enabled)
 713  714                  return;
 714  715  
 715  716          /* This is a safe check since VFS_STATS must be set (see above) */
 716  717          if ((vskap = vfsp->vfs_vskap) == NULL)
 717  718                  return;
 718  719  
 719  720          /* Whack the pointer right away */
 720  721          vfsp->vfs_vskap = NULL;
 721  722  
 722  723          /* Lock the tree, remove the node, and delete the kstat */
 723  724          mutex_enter(&vskstat_tree_lock);
 724  725          if (avl_find(&vskstat_tree, vskap, &where)) {
 725  726                  avl_remove(&vskstat_tree, vskap);
 726  727          }
 727  728  
 728  729          if (vskap->vsk_ksp) {
 729  730                  kstat_delete(vskap->vsk_ksp);
 730  731          }
 731  732          mutex_exit(&vskstat_tree_lock);
 732  733  
 733  734          kmem_cache_free(vsk_anchor_cache, vskap);
 734  735  }
 735  736  
 736  737  /*
 737  738   * Read or write a vnode.  Called from kernel code.
 738  739   */
 739  740  int
 740  741  vn_rdwr(
 741  742          enum uio_rw rw,
 742  743          struct vnode *vp,
 743  744          caddr_t base,
 744  745          ssize_t len,
 745  746          offset_t offset,
 746  747          enum uio_seg seg,
 747  748          int ioflag,
 748  749          rlim64_t ulimit,        /* meaningful only if rw is UIO_WRITE */
 749  750          cred_t *cr,
 750  751          ssize_t *residp)
 751  752  {
 752  753          struct uio uio;
 753  754          struct iovec iov;
 754  755          int error;
 755  756          int in_crit = 0;
 756  757  
 757  758          if (rw == UIO_WRITE && ISROFILE(vp))
 758  759                  return (EROFS);
 759  760  
 760  761          if (len < 0)
 761  762                  return (EIO);
 762  763  
 763  764          VOPXID_MAP_CR(vp, cr);
 764  765  
 765  766          iov.iov_base = base;
 766  767          iov.iov_len = len;
 767  768          uio.uio_iov = &iov;
 768  769          uio.uio_iovcnt = 1;
 769  770          uio.uio_loffset = offset;
 770  771          uio.uio_segflg = (short)seg;
 771  772          uio.uio_resid = len;
 772  773          uio.uio_llimit = ulimit;
 773  774  
 774  775          /*
 775  776           * We have to enter the critical region before calling VOP_RWLOCK
 776  777           * to avoid a deadlock with ufs.
 777  778           */
 778  779          if (nbl_need_check(vp)) {
 779  780                  int svmand;
 780  781  
 781  782                  nbl_start_crit(vp, RW_READER);
 782  783                  in_crit = 1;
 783  784                  error = nbl_svmand(vp, cr, &svmand);
 784  785                  if (error != 0)
 785  786                          goto done;
 786  787                  if (nbl_conflict(vp, rw == UIO_WRITE ? NBL_WRITE : NBL_READ,
 787  788                      uio.uio_offset, uio.uio_resid, svmand, NULL)) {
 788  789                          error = EACCES;
 789  790                          goto done;
 790  791                  }
 791  792          }
 792  793  
 793  794          (void) VOP_RWLOCK(vp,
 794  795              rw == UIO_WRITE ? V_WRITELOCK_TRUE : V_WRITELOCK_FALSE, NULL);
 795  796          if (rw == UIO_WRITE) {
 796  797                  uio.uio_fmode = FWRITE;
 797  798                  uio.uio_extflg = UIO_COPY_DEFAULT;
 798  799                  error = VOP_WRITE(vp, &uio, ioflag, cr, NULL);
 799  800          } else {
 800  801                  uio.uio_fmode = FREAD;
 801  802                  uio.uio_extflg = UIO_COPY_CACHED;
 802  803                  error = VOP_READ(vp, &uio, ioflag, cr, NULL);
 803  804          }
 804  805          VOP_RWUNLOCK(vp,
 805  806              rw == UIO_WRITE ? V_WRITELOCK_TRUE : V_WRITELOCK_FALSE, NULL);
 806  807          if (residp)
 807  808                  *residp = uio.uio_resid;
 808  809          else if (uio.uio_resid)
 809  810                  error = EIO;
 810  811  
 811  812  done:
 812  813          if (in_crit)
 813  814                  nbl_end_crit(vp);
 814  815          return (error);
 815  816  }
 816  817  
 817  818  /*
 818  819   * Release a vnode.  Call VOP_INACTIVE on last reference or
 819  820   * decrement reference count.
 820  821   *
 821  822   * To avoid race conditions, the v_count is left at 1 for
 822  823   * the call to VOP_INACTIVE. This prevents another thread
 823  824   * from reclaiming and releasing the vnode *before* the
 824  825   * VOP_INACTIVE routine has a chance to destroy the vnode.
 825  826   * We can't have more than 1 thread calling VOP_INACTIVE
 826  827   * on a vnode.
 827  828   */
 828  829  void
 829  830  vn_rele(vnode_t *vp)
 830  831  {
 831  832          VERIFY(vp->v_count > 0);
 832  833          mutex_enter(&vp->v_lock);
 833  834          if (vp->v_count == 1) {
 834  835                  mutex_exit(&vp->v_lock);
 835  836                  VOP_INACTIVE(vp, CRED(), NULL);
 836  837                  return;
 837  838          }
 838  839          vp->v_count--;
 839  840          mutex_exit(&vp->v_lock);
 840  841  }
 841  842  
 842  843  /*
 843  844   * Release a vnode referenced by the DNLC. Multiple DNLC references are treated
 844  845   * as a single reference, so v_count is not decremented until the last DNLC hold
 845  846   * is released. This makes it possible to distinguish vnodes that are referenced
 846  847   * only by the DNLC.
 847  848   */
 848  849  void
 849  850  vn_rele_dnlc(vnode_t *vp)
 850  851  {
 851  852          VERIFY((vp->v_count > 0) && (vp->v_count_dnlc > 0));
 852  853          mutex_enter(&vp->v_lock);
 853  854          if (--vp->v_count_dnlc == 0) {
 854  855                  if (vp->v_count == 1) {
 855  856                          mutex_exit(&vp->v_lock);
 856  857                          VOP_INACTIVE(vp, CRED(), NULL);
 857  858                          return;
 858  859                  }
 859  860                  vp->v_count--;
 860  861          }
 861  862          mutex_exit(&vp->v_lock);
 862  863  }
 863  864  
 864  865  /*
 865  866   * Like vn_rele() except that it clears v_stream under v_lock.
 866  867   * This is used by sockfs when it dismantels the association between
 867  868   * the sockfs node and the vnode in the underlaying file system.
 868  869   * v_lock has to be held to prevent a thread coming through the lookupname
 869  870   * path from accessing a stream head that is going away.
 870  871   */
 871  872  void
 872  873  vn_rele_stream(vnode_t *vp)
 873  874  {
 874  875          VERIFY(vp->v_count > 0);
 875  876          mutex_enter(&vp->v_lock);
 876  877          vp->v_stream = NULL;
 877  878          if (vp->v_count == 1) {
 878  879                  mutex_exit(&vp->v_lock);
 879  880                  VOP_INACTIVE(vp, CRED(), NULL);
 880  881                  return;
 881  882          }
 882  883          vp->v_count--;
 883  884          mutex_exit(&vp->v_lock);
 884  885  }
 885  886  
 886  887  static void
 887  888  vn_rele_inactive(vnode_t *vp)
 888  889  {
 889  890          VOP_INACTIVE(vp, CRED(), NULL);
 890  891  }
 891  892  
 892  893  /*
 893  894   * Like vn_rele() except if we are going to call VOP_INACTIVE() then do it
 894  895   * asynchronously using a taskq. This can avoid deadlocks caused by re-entering
 895  896   * the file system as a result of releasing the vnode. Note, file systems
 896  897   * already have to handle the race where the vnode is incremented before the
 897  898   * inactive routine is called and does its locking.
 898  899   *
 899  900   * Warning: Excessive use of this routine can lead to performance problems.
 900  901   * This is because taskqs throttle back allocation if too many are created.
 901  902   */
 902  903  void
 903  904  vn_rele_async(vnode_t *vp, taskq_t *taskq)
 904  905  {
 905  906          VERIFY(vp->v_count > 0);
 906  907          mutex_enter(&vp->v_lock);
 907  908          if (vp->v_count == 1) {
 908  909                  mutex_exit(&vp->v_lock);
 909  910                  VERIFY(taskq_dispatch(taskq, (task_func_t *)vn_rele_inactive,
 910  911                      vp, TQ_SLEEP) != NULL);
 911  912                  return;
 912  913          }
 913  914          vp->v_count--;
 914  915          mutex_exit(&vp->v_lock);
 915  916  }
 916  917  
 917  918  int
 918  919  vn_open(
 919  920          char *pnamep,
 920  921          enum uio_seg seg,
 921  922          int filemode,
 922  923          int createmode,
 923  924          struct vnode **vpp,
 924  925          enum create crwhy,
 925  926          mode_t umask)
 926  927  {
 927  928          return (vn_openat(pnamep, seg, filemode, createmode, vpp, crwhy,
 928  929              umask, NULL, -1));
 929  930  }
 930  931  
 931  932  
 932  933  /*
 933  934   * Open/create a vnode.
 934  935   * This may be callable by the kernel, the only known use
 935  936   * of user context being that the current user credentials
 936  937   * are used for permissions.  crwhy is defined iff filemode & FCREAT.
 937  938   */
 938  939  int
 939  940  vn_openat(
 940  941          char *pnamep,
 941  942          enum uio_seg seg,
 942  943          int filemode,
 943  944          int createmode,
 944  945          struct vnode **vpp,
 945  946          enum create crwhy,
 946  947          mode_t umask,
 947  948          struct vnode *startvp,
 948  949          int fd)
 949  950  {
 950  951          struct vnode *vp;
 951  952          int mode;
 952  953          int accessflags;
 953  954          int error;
 954  955          int in_crit = 0;
 955  956          int open_done = 0;
 956  957          int shrlock_done = 0;
 957  958          struct vattr vattr;
 958  959          enum symfollow follow;
 959  960          int estale_retry = 0;
 960  961          struct shrlock shr;
 961  962          struct shr_locowner shr_own;
 962  963  
 963  964          mode = 0;
 964  965          accessflags = 0;
 965  966          if (filemode & FREAD)
 966  967                  mode |= VREAD;
 967  968          if (filemode & (FWRITE|FTRUNC))
 968  969                  mode |= VWRITE;
 969  970          if (filemode & (FSEARCH|FEXEC|FXATTRDIROPEN))
 970  971                  mode |= VEXEC;
 971  972  
 972  973          /* symlink interpretation */
 973  974          if (filemode & FNOFOLLOW)
 974  975                  follow = NO_FOLLOW;
 975  976          else
 976  977                  follow = FOLLOW;
 977  978  
 978  979          if (filemode & FAPPEND)
 979  980                  accessflags |= V_APPEND;
 980  981  
 981  982  top:
 982  983          if (filemode & FCREAT) {
 983  984                  enum vcexcl excl;
 984  985  
 985  986                  /*
 986  987                   * Wish to create a file.
 987  988                   */
 988  989                  vattr.va_type = VREG;
 989  990                  vattr.va_mode = createmode;
 990  991                  vattr.va_mask = AT_TYPE|AT_MODE;
 991  992                  if (filemode & FTRUNC) {
 992  993                          vattr.va_size = 0;
 993  994                          vattr.va_mask |= AT_SIZE;
 994  995                  }
 995  996                  if (filemode & FEXCL)
 996  997                          excl = EXCL;
 997  998                  else
 998  999                          excl = NONEXCL;
 999 1000  
1000 1001                  if (error =
1001 1002                      vn_createat(pnamep, seg, &vattr, excl, mode, &vp, crwhy,
1002 1003                      (filemode & ~(FTRUNC|FEXCL)), umask, startvp))
1003 1004                          return (error);
1004 1005          } else {
1005 1006                  /*
1006 1007                   * Wish to open a file.  Just look it up.
1007 1008                   */
1008 1009                  if (error = lookupnameat(pnamep, seg, follow,
1009 1010                      NULLVPP, &vp, startvp)) {
1010 1011                          if ((error == ESTALE) &&
1011 1012                              fs_need_estale_retry(estale_retry++))
1012 1013                                  goto top;
1013 1014                          return (error);
1014 1015                  }
1015 1016  
1016 1017                  /*
1017 1018                   * Get the attributes to check whether file is large.
1018 1019                   * We do this only if the FOFFMAX flag is not set and
1019 1020                   * only for regular files.
1020 1021                   */
1021 1022  
1022 1023                  if (!(filemode & FOFFMAX) && (vp->v_type == VREG)) {
1023 1024                          vattr.va_mask = AT_SIZE;
1024 1025                          if ((error = VOP_GETATTR(vp, &vattr, 0,
1025 1026                              CRED(), NULL))) {
1026 1027                                  goto out;
1027 1028                          }
1028 1029                          if (vattr.va_size > (u_offset_t)MAXOFF32_T) {
1029 1030                                  /*
1030 1031                                   * Large File API - regular open fails
1031 1032                                   * if FOFFMAX flag is set in file mode
1032 1033                                   */
1033 1034                                  error = EOVERFLOW;
1034 1035                                  goto out;
1035 1036                          }
1036 1037                  }
1037 1038                  /*
1038 1039                   * Can't write directories, active texts, or
1039 1040                   * read-only filesystems.  Can't truncate files
1040 1041                   * on which mandatory locking is in effect.
1041 1042                   */
1042 1043                  if (filemode & (FWRITE|FTRUNC)) {
1043 1044                          /*
1044 1045                           * Allow writable directory if VDIROPEN flag is set.
1045 1046                           */
1046 1047                          if (vp->v_type == VDIR && !(vp->v_flag & VDIROPEN)) {
1047 1048                                  error = EISDIR;
1048 1049                                  goto out;
1049 1050                          }
1050 1051                          if (ISROFILE(vp)) {
1051 1052                                  error = EROFS;
1052 1053                                  goto out;
1053 1054                          }
1054 1055                          /*
1055 1056                           * Can't truncate files on which
1056 1057                           * sysv mandatory locking is in effect.
1057 1058                           */
1058 1059                          if (filemode & FTRUNC) {
1059 1060                                  vnode_t *rvp;
1060 1061  
1061 1062                                  if (VOP_REALVP(vp, &rvp, NULL) != 0)
1062 1063                                          rvp = vp;
1063 1064                                  if (rvp->v_filocks != NULL) {
1064 1065                                          vattr.va_mask = AT_MODE;
1065 1066                                          if ((error = VOP_GETATTR(vp,
1066 1067                                              &vattr, 0, CRED(), NULL)) == 0 &&
1067 1068                                              MANDLOCK(vp, vattr.va_mode))
1068 1069                                                  error = EAGAIN;
1069 1070                                  }
1070 1071                          }
1071 1072                          if (error)
1072 1073                                  goto out;
1073 1074                  }
1074 1075                  /*
1075 1076                   * Check permissions.
1076 1077                   */
1077 1078                  if (error = VOP_ACCESS(vp, mode, accessflags, CRED(), NULL))
1078 1079                          goto out;
1079 1080                  /*
1080 1081                   * Require FSEARCH to return a directory.
1081 1082                   * Require FEXEC to return a regular file.
1082 1083                   */
1083 1084                  if ((filemode & FSEARCH) && vp->v_type != VDIR) {
1084 1085                          error = ENOTDIR;
1085 1086                          goto out;
1086 1087                  }
1087 1088                  if ((filemode & FEXEC) && vp->v_type != VREG) {
1088 1089                          error = ENOEXEC;        /* XXX: error code? */
1089 1090                          goto out;
1090 1091                  }
1091 1092          }
1092 1093  
1093 1094          /*
1094 1095           * Do remaining checks for FNOFOLLOW and FNOLINKS.
1095 1096           */
1096 1097          if ((filemode & FNOFOLLOW) && vp->v_type == VLNK) {
1097 1098                  error = ELOOP;
1098 1099                  goto out;
1099 1100          }
1100 1101          if (filemode & FNOLINKS) {
1101 1102                  vattr.va_mask = AT_NLINK;
1102 1103                  if ((error = VOP_GETATTR(vp, &vattr, 0, CRED(), NULL))) {
1103 1104                          goto out;
1104 1105                  }
1105 1106                  if (vattr.va_nlink != 1) {
1106 1107                          error = EMLINK;
1107 1108                          goto out;
1108 1109                  }
1109 1110          }
1110 1111  
1111 1112          /*
1112 1113           * Opening a socket corresponding to the AF_UNIX pathname
1113 1114           * in the filesystem name space is not supported.
1114 1115           * However, VSOCK nodes in namefs are supported in order
1115 1116           * to make fattach work for sockets.
1116 1117           *
1117 1118           * XXX This uses VOP_REALVP to distinguish between
1118 1119           * an unopened namefs node (where VOP_REALVP returns a
1119 1120           * different VSOCK vnode) and a VSOCK created by vn_create
1120 1121           * in some file system (where VOP_REALVP would never return
1121 1122           * a different vnode).
1122 1123           */
1123 1124          if (vp->v_type == VSOCK) {
1124 1125                  struct vnode *nvp;
1125 1126  
1126 1127                  error = VOP_REALVP(vp, &nvp, NULL);
1127 1128                  if (error != 0 || nvp == NULL || nvp == vp ||
1128 1129                      nvp->v_type != VSOCK) {
1129 1130                          error = EOPNOTSUPP;
1130 1131                          goto out;
1131 1132                  }
1132 1133          }
1133 1134  
1134 1135          if ((vp->v_type == VREG) && nbl_need_check(vp)) {
1135 1136                  /* get share reservation */
1136 1137                  shr.s_access = 0;
1137 1138                  if (filemode & FWRITE)
1138 1139                          shr.s_access |= F_WRACC;
1139 1140                  if (filemode & FREAD)
1140 1141                          shr.s_access |= F_RDACC;
1141 1142                  shr.s_deny = 0;
1142 1143                  shr.s_sysid = 0;
1143 1144                  shr.s_pid = ttoproc(curthread)->p_pid;
1144 1145                  shr_own.sl_pid = shr.s_pid;
1145 1146                  shr_own.sl_id = fd;
1146 1147                  shr.s_own_len = sizeof (shr_own);
1147 1148                  shr.s_owner = (caddr_t)&shr_own;
1148 1149                  error = VOP_SHRLOCK(vp, F_SHARE_NBMAND, &shr, filemode, CRED(),
1149 1150                      NULL);
1150 1151                  if (error)
1151 1152                          goto out;
1152 1153                  shrlock_done = 1;
1153 1154  
1154 1155                  /* nbmand conflict check if truncating file */
1155 1156                  if ((filemode & FTRUNC) && !(filemode & FCREAT)) {
1156 1157                          nbl_start_crit(vp, RW_READER);
1157 1158                          in_crit = 1;
1158 1159  
1159 1160                          vattr.va_mask = AT_SIZE;
1160 1161                          if (error = VOP_GETATTR(vp, &vattr, 0, CRED(), NULL))
1161 1162                                  goto out;
1162 1163                          if (nbl_conflict(vp, NBL_WRITE, 0, vattr.va_size, 0,
1163 1164                              NULL)) {
1164 1165                                  error = EACCES;
1165 1166                                  goto out;
1166 1167                          }
1167 1168                  }
1168 1169          }
1169 1170  
1170 1171          /*
1171 1172           * Do opening protocol.
1172 1173           */
1173 1174          error = VOP_OPEN(&vp, filemode, CRED(), NULL);
1174 1175          if (error)
1175 1176                  goto out;
1176 1177          open_done = 1;
1177 1178  
1178 1179          /*
1179 1180           * Truncate if required.
1180 1181           */
1181 1182          if ((filemode & FTRUNC) && !(filemode & FCREAT)) {
1182 1183                  vattr.va_size = 0;
1183 1184                  vattr.va_mask = AT_SIZE;
1184 1185                  if ((error = VOP_SETATTR(vp, &vattr, 0, CRED(), NULL)) != 0)
1185 1186                          goto out;
1186 1187          }
1187 1188  out:
1188 1189          ASSERT(vp->v_count > 0);
1189 1190  
1190 1191          if (in_crit) {
1191 1192                  nbl_end_crit(vp);
1192 1193                  in_crit = 0;
1193 1194          }
1194 1195          if (error) {
1195 1196                  if (open_done) {
1196 1197                          (void) VOP_CLOSE(vp, filemode, 1, (offset_t)0, CRED(),
1197 1198                              NULL);
1198 1199                          open_done = 0;
1199 1200                          shrlock_done = 0;
1200 1201                  }
1201 1202                  if (shrlock_done) {
1202 1203                          (void) VOP_SHRLOCK(vp, F_UNSHARE, &shr, 0, CRED(),
1203 1204                              NULL);
1204 1205                          shrlock_done = 0;
1205 1206                  }
1206 1207  
1207 1208                  /*
1208 1209                   * The following clause was added to handle a problem
1209 1210                   * with NFS consistency.  It is possible that a lookup
1210 1211                   * of the file to be opened succeeded, but the file
1211 1212                   * itself doesn't actually exist on the server.  This
1212 1213                   * is chiefly due to the DNLC containing an entry for
1213 1214                   * the file which has been removed on the server.  In
1214 1215                   * this case, we just start over.  If there was some
1215 1216                   * other cause for the ESTALE error, then the lookup
1216 1217                   * of the file will fail and the error will be returned
1217 1218                   * above instead of looping around from here.
1218 1219                   */
1219 1220                  VN_RELE(vp);
1220 1221                  if ((error == ESTALE) && fs_need_estale_retry(estale_retry++))
1221 1222                          goto top;
1222 1223          } else
1223 1224                  *vpp = vp;
1224 1225          return (error);
1225 1226  }
1226 1227  
1227 1228  /*
1228 1229   * The following two accessor functions are for the NFSv4 server.  Since there
1229 1230   * is no VOP_OPEN_UP/DOWNGRADE we need a way for the NFS server to keep the
1230 1231   * vnode open counts correct when a client "upgrades" an open or does an
1231 1232   * open_downgrade.  In NFS, an upgrade or downgrade can not only change the
1232 1233   * open mode (add or subtract read or write), but also change the share/deny
1233 1234   * modes.  However, share reservations are not integrated with OPEN, yet, so
1234 1235   * we need to handle each separately.  These functions are cleaner than having
1235 1236   * the NFS server manipulate the counts directly, however, nobody else should
1236 1237   * use these functions.
1237 1238   */
1238 1239  void
1239 1240  vn_open_upgrade(
1240 1241          vnode_t *vp,
1241 1242          int filemode)
1242 1243  {
1243 1244          ASSERT(vp->v_type == VREG);
1244 1245  
1245 1246          if (filemode & FREAD)
1246 1247                  atomic_add_32(&(vp->v_rdcnt), 1);
1247 1248          if (filemode & FWRITE)
1248 1249                  atomic_add_32(&(vp->v_wrcnt), 1);
1249 1250  
1250 1251  }
1251 1252  
1252 1253  void
1253 1254  vn_open_downgrade(
1254 1255          vnode_t *vp,
1255 1256          int filemode)
1256 1257  {
1257 1258          ASSERT(vp->v_type == VREG);
1258 1259  
1259 1260          if (filemode & FREAD) {
1260 1261                  ASSERT(vp->v_rdcnt > 0);
1261 1262                  atomic_add_32(&(vp->v_rdcnt), -1);
1262 1263          }
1263 1264          if (filemode & FWRITE) {
1264 1265                  ASSERT(vp->v_wrcnt > 0);
1265 1266                  atomic_add_32(&(vp->v_wrcnt), -1);
1266 1267          }
1267 1268  
1268 1269  }
1269 1270  
1270 1271  int
1271 1272  vn_create(
1272 1273          char *pnamep,
1273 1274          enum uio_seg seg,
1274 1275          struct vattr *vap,
1275 1276          enum vcexcl excl,
1276 1277          int mode,
1277 1278          struct vnode **vpp,
1278 1279          enum create why,
1279 1280          int flag,
1280 1281          mode_t umask)
1281 1282  {
1282 1283          return (vn_createat(pnamep, seg, vap, excl, mode, vpp, why, flag,
1283 1284              umask, NULL));
1284 1285  }
1285 1286  
1286 1287  /*
1287 1288   * Create a vnode (makenode).
1288 1289   */
1289 1290  int
1290 1291  vn_createat(
1291 1292          char *pnamep,
1292 1293          enum uio_seg seg,
1293 1294          struct vattr *vap,
1294 1295          enum vcexcl excl,
1295 1296          int mode,
1296 1297          struct vnode **vpp,
1297 1298          enum create why,
1298 1299          int flag,
1299 1300          mode_t umask,
1300 1301          struct vnode *startvp)
1301 1302  {
1302 1303          struct vnode *dvp;      /* ptr to parent dir vnode */
1303 1304          struct vnode *vp = NULL;
1304 1305          struct pathname pn;
1305 1306          int error;
1306 1307          int in_crit = 0;
1307 1308          struct vattr vattr;
1308 1309          enum symfollow follow;
1309 1310          int estale_retry = 0;
1310 1311          uint32_t auditing = AU_AUDITING();
1311 1312  
1312 1313          ASSERT((vap->va_mask & (AT_TYPE|AT_MODE)) == (AT_TYPE|AT_MODE));
1313 1314  
1314 1315          /* symlink interpretation */
1315 1316          if ((flag & FNOFOLLOW) || excl == EXCL)
1316 1317                  follow = NO_FOLLOW;
1317 1318          else
1318 1319                  follow = FOLLOW;
1319 1320          flag &= ~(FNOFOLLOW|FNOLINKS);
1320 1321  
1321 1322  top:
1322 1323          /*
1323 1324           * Lookup directory.
1324 1325           * If new object is a file, call lower level to create it.
1325 1326           * Note that it is up to the lower level to enforce exclusive
1326 1327           * creation, if the file is already there.
1327 1328           * This allows the lower level to do whatever
1328 1329           * locking or protocol that is needed to prevent races.
1329 1330           * If the new object is directory call lower level to make
1330 1331           * the new directory, with "." and "..".
1331 1332           */
1332 1333          if (error = pn_get(pnamep, seg, &pn))
1333 1334                  return (error);
1334 1335          if (auditing)
1335 1336                  audit_vncreate_start();
1336 1337          dvp = NULL;
1337 1338          *vpp = NULL;
1338 1339          /*
1339 1340           * lookup will find the parent directory for the vnode.
1340 1341           * When it is done the pn holds the name of the entry
1341 1342           * in the directory.
1342 1343           * If this is a non-exclusive create we also find the node itself.
1343 1344           */
1344 1345          error = lookuppnat(&pn, NULL, follow, &dvp,
1345 1346              (excl == EXCL) ? NULLVPP : vpp, startvp);
1346 1347          if (error) {
1347 1348                  pn_free(&pn);
1348 1349                  if ((error == ESTALE) && fs_need_estale_retry(estale_retry++))
1349 1350                          goto top;
1350 1351                  if (why == CRMKDIR && error == EINVAL)
1351 1352                          error = EEXIST;         /* SVID */
1352 1353                  return (error);
1353 1354          }
1354 1355  
1355 1356          if (why != CRMKNOD)
1356 1357                  vap->va_mode &= ~VSVTX;
1357 1358  
1358 1359          /*
1359 1360           * If default ACLs are defined for the directory don't apply the
1360 1361           * umask if umask is passed.
1361 1362           */
1362 1363  
1363 1364          if (umask) {
1364 1365  
1365 1366                  vsecattr_t vsec;
1366 1367  
1367 1368                  vsec.vsa_aclcnt = 0;
1368 1369                  vsec.vsa_aclentp = NULL;
1369 1370                  vsec.vsa_dfaclcnt = 0;
1370 1371                  vsec.vsa_dfaclentp = NULL;
1371 1372                  vsec.vsa_mask = VSA_DFACLCNT;
1372 1373                  error = VOP_GETSECATTR(dvp, &vsec, 0, CRED(), NULL);
1373 1374                  /*
1374 1375                   * If error is ENOSYS then treat it as no error
1375 1376                   * Don't want to force all file systems to support
1376 1377                   * aclent_t style of ACL's.
1377 1378                   */
1378 1379                  if (error == ENOSYS)
1379 1380                          error = 0;
1380 1381                  if (error) {
1381 1382                          if (*vpp != NULL)
1382 1383                                  VN_RELE(*vpp);
1383 1384                          goto out;
1384 1385                  } else {
1385 1386                          /*
1386 1387                           * Apply the umask if no default ACLs.
1387 1388                           */
1388 1389                          if (vsec.vsa_dfaclcnt == 0)
1389 1390                                  vap->va_mode &= ~umask;
1390 1391  
1391 1392                          /*
1392 1393                           * VOP_GETSECATTR() may have allocated memory for
1393 1394                           * ACLs we didn't request, so double-check and
1394 1395                           * free it if necessary.
1395 1396                           */
1396 1397                          if (vsec.vsa_aclcnt && vsec.vsa_aclentp != NULL)
1397 1398                                  kmem_free((caddr_t)vsec.vsa_aclentp,
1398 1399                                      vsec.vsa_aclcnt * sizeof (aclent_t));
1399 1400                          if (vsec.vsa_dfaclcnt && vsec.vsa_dfaclentp != NULL)
1400 1401                                  kmem_free((caddr_t)vsec.vsa_dfaclentp,
1401 1402                                      vsec.vsa_dfaclcnt * sizeof (aclent_t));
1402 1403                  }
1403 1404          }
1404 1405  
1405 1406          /*
1406 1407           * In general we want to generate EROFS if the file system is
1407 1408           * readonly.  However, POSIX (IEEE Std. 1003.1) section 5.3.1
1408 1409           * documents the open system call, and it says that O_CREAT has no
1409 1410           * effect if the file already exists.  Bug 1119649 states
1410 1411           * that open(path, O_CREAT, ...) fails when attempting to open an
1411 1412           * existing file on a read only file system.  Thus, the first part
1412 1413           * of the following if statement has 3 checks:
1413 1414           *      if the file exists &&
1414 1415           *              it is being open with write access &&
1415 1416           *              the file system is read only
1416 1417           *      then generate EROFS
1417 1418           */
1418 1419          if ((*vpp != NULL && (mode & VWRITE) && ISROFILE(*vpp)) ||
1419 1420              (*vpp == NULL && dvp->v_vfsp->vfs_flag & VFS_RDONLY)) {
1420 1421                  if (*vpp)
1421 1422                          VN_RELE(*vpp);
1422 1423                  error = EROFS;
1423 1424          } else if (excl == NONEXCL && *vpp != NULL) {
1424 1425                  vnode_t *rvp;
1425 1426  
1426 1427                  /*
1427 1428                   * File already exists.  If a mandatory lock has been
1428 1429                   * applied, return error.
1429 1430                   */
1430 1431                  vp = *vpp;
1431 1432                  if (VOP_REALVP(vp, &rvp, NULL) != 0)
1432 1433                          rvp = vp;
1433 1434                  if ((vap->va_mask & AT_SIZE) && nbl_need_check(vp)) {
1434 1435                          nbl_start_crit(vp, RW_READER);
1435 1436                          in_crit = 1;
1436 1437                  }
1437 1438                  if (rvp->v_filocks != NULL || rvp->v_shrlocks != NULL) {
1438 1439                          vattr.va_mask = AT_MODE|AT_SIZE;
1439 1440                          if (error = VOP_GETATTR(vp, &vattr, 0, CRED(), NULL)) {
1440 1441                                  goto out;
1441 1442                          }
1442 1443                          if (MANDLOCK(vp, vattr.va_mode)) {
1443 1444                                  error = EAGAIN;
1444 1445                                  goto out;
1445 1446                          }
1446 1447                          /*
1447 1448                           * File cannot be truncated if non-blocking mandatory
1448 1449                           * locks are currently on the file.
1449 1450                           */
1450 1451                          if ((vap->va_mask & AT_SIZE) && in_crit) {
1451 1452                                  u_offset_t offset;
1452 1453                                  ssize_t length;
1453 1454  
1454 1455                                  offset = vap->va_size > vattr.va_size ?
1455 1456                                      vattr.va_size : vap->va_size;
1456 1457                                  length = vap->va_size > vattr.va_size ?
1457 1458                                      vap->va_size - vattr.va_size :
1458 1459                                      vattr.va_size - vap->va_size;
1459 1460                                  if (nbl_conflict(vp, NBL_WRITE, offset,
1460 1461                                      length, 0, NULL)) {
1461 1462                                          error = EACCES;
1462 1463                                          goto out;
1463 1464                                  }
1464 1465                          }
1465 1466                  }
1466 1467  
1467 1468                  /*
1468 1469                   * If the file is the root of a VFS, we've crossed a
1469 1470                   * mount point and the "containing" directory that we
1470 1471                   * acquired above (dvp) is irrelevant because it's in
1471 1472                   * a different file system.  We apply VOP_CREATE to the
1472 1473                   * target itself instead of to the containing directory
1473 1474                   * and supply a null path name to indicate (conventionally)
1474 1475                   * the node itself as the "component" of interest.
1475 1476                   *
1476 1477                   * The intercession of the file system is necessary to
1477 1478                   * ensure that the appropriate permission checks are
1478 1479                   * done.
1479 1480                   */
1480 1481                  if (vp->v_flag & VROOT) {
1481 1482                          ASSERT(why != CRMKDIR);
1482 1483                          error = VOP_CREATE(vp, "", vap, excl, mode, vpp,
1483 1484                              CRED(), flag, NULL, NULL);
1484 1485                          /*
1485 1486                           * If the create succeeded, it will have created
1486 1487                           * a new reference to the vnode.  Give up the
1487 1488                           * original reference.  The assertion should not
1488 1489                           * get triggered because NBMAND locks only apply to
1489 1490                           * VREG files.  And if in_crit is non-zero for some
1490 1491                           * reason, detect that here, rather than when we
1491 1492                           * deference a null vp.
1492 1493                           */
1493 1494                          ASSERT(in_crit == 0);
1494 1495                          VN_RELE(vp);
1495 1496                          vp = NULL;
1496 1497                          goto out;
1497 1498                  }
1498 1499  
1499 1500                  /*
1500 1501                   * Large File API - non-large open (FOFFMAX flag not set)
1501 1502                   * of regular file fails if the file size exceeds MAXOFF32_T.
1502 1503                   */
1503 1504                  if (why != CRMKDIR &&
1504 1505                      !(flag & FOFFMAX) &&
1505 1506                      (vp->v_type == VREG)) {
1506 1507                          vattr.va_mask = AT_SIZE;
1507 1508                          if ((error = VOP_GETATTR(vp, &vattr, 0,
1508 1509                              CRED(), NULL))) {
1509 1510                                  goto out;
1510 1511                          }
1511 1512                          if ((vattr.va_size > (u_offset_t)MAXOFF32_T)) {
1512 1513                                  error = EOVERFLOW;
1513 1514                                  goto out;
1514 1515                          }
1515 1516                  }
1516 1517          }
1517 1518  
1518 1519          if (error == 0) {
1519 1520                  /*
1520 1521                   * Call mkdir() if specified, otherwise create().
1521 1522                   */
1522 1523                  int must_be_dir = pn_fixslash(&pn);     /* trailing '/'? */
1523 1524  
1524 1525                  if (why == CRMKDIR)
1525 1526                          /*
1526 1527                           * N.B., if vn_createat() ever requests
1527 1528                           * case-insensitive behavior then it will need
1528 1529                           * to be passed to VOP_MKDIR().  VOP_CREATE()
1529 1530                           * will already get it via "flag"
1530 1531                           */
1531 1532                          error = VOP_MKDIR(dvp, pn.pn_path, vap, vpp, CRED(),
1532 1533                              NULL, 0, NULL);
1533 1534                  else if (!must_be_dir)
1534 1535                          error = VOP_CREATE(dvp, pn.pn_path, vap,
1535 1536                              excl, mode, vpp, CRED(), flag, NULL, NULL);
1536 1537                  else
1537 1538                          error = ENOTDIR;
1538 1539          }
1539 1540  
1540 1541  out:
1541 1542  
1542 1543          if (auditing)
1543 1544                  audit_vncreate_finish(*vpp, error);
1544 1545          if (in_crit) {
1545 1546                  nbl_end_crit(vp);
1546 1547                  in_crit = 0;
1547 1548          }
1548 1549          if (vp != NULL) {
1549 1550                  VN_RELE(vp);
1550 1551                  vp = NULL;
1551 1552          }
1552 1553          pn_free(&pn);
1553 1554          VN_RELE(dvp);
1554 1555          /*
1555 1556           * The following clause was added to handle a problem
1556 1557           * with NFS consistency.  It is possible that a lookup
1557 1558           * of the file to be created succeeded, but the file
1558 1559           * itself doesn't actually exist on the server.  This
1559 1560           * is chiefly due to the DNLC containing an entry for
1560 1561           * the file which has been removed on the server.  In
1561 1562           * this case, we just start over.  If there was some
1562 1563           * other cause for the ESTALE error, then the lookup
1563 1564           * of the file will fail and the error will be returned
1564 1565           * above instead of looping around from here.
1565 1566           */
1566 1567          if ((error == ESTALE) && fs_need_estale_retry(estale_retry++))
1567 1568                  goto top;
1568 1569          return (error);
1569 1570  }
1570 1571  
1571 1572  int
1572 1573  vn_link(char *from, char *to, enum uio_seg seg)
1573 1574  {
1574 1575          return (vn_linkat(NULL, from, NO_FOLLOW, NULL, to, seg));
1575 1576  }
1576 1577  
1577 1578  int
1578 1579  vn_linkat(vnode_t *fstartvp, char *from, enum symfollow follow,
1579 1580      vnode_t *tstartvp, char *to, enum uio_seg seg)
1580 1581  {
1581 1582          struct vnode *fvp;              /* from vnode ptr */
1582 1583          struct vnode *tdvp;             /* to directory vnode ptr */
1583 1584          struct pathname pn;
1584 1585          int error;
1585 1586          struct vattr vattr;
1586 1587          dev_t fsid;
1587 1588          int estale_retry = 0;
1588 1589          uint32_t auditing = AU_AUDITING();
1589 1590  
1590 1591  top:
1591 1592          fvp = tdvp = NULL;
1592 1593          if (error = pn_get(to, seg, &pn))
1593 1594                  return (error);
1594 1595          if (auditing && fstartvp != NULL)
1595 1596                  audit_setfsat_path(1);
1596 1597          if (error = lookupnameat(from, seg, follow, NULLVPP, &fvp, fstartvp))
1597 1598                  goto out;
1598 1599          if (auditing && tstartvp != NULL)
1599 1600                  audit_setfsat_path(3);
1600 1601          if (error = lookuppnat(&pn, NULL, NO_FOLLOW, &tdvp, NULLVPP, tstartvp))
1601 1602                  goto out;
1602 1603          /*
1603 1604           * Make sure both source vnode and target directory vnode are
1604 1605           * in the same vfs and that it is writeable.
1605 1606           */
1606 1607          vattr.va_mask = AT_FSID;
1607 1608          if (error = VOP_GETATTR(fvp, &vattr, 0, CRED(), NULL))
1608 1609                  goto out;
1609 1610          fsid = vattr.va_fsid;
1610 1611          vattr.va_mask = AT_FSID;
1611 1612          if (error = VOP_GETATTR(tdvp, &vattr, 0, CRED(), NULL))
1612 1613                  goto out;
1613 1614          if (fsid != vattr.va_fsid) {
1614 1615                  error = EXDEV;
1615 1616                  goto out;
1616 1617          }
1617 1618          if (tdvp->v_vfsp->vfs_flag & VFS_RDONLY) {
1618 1619                  error = EROFS;
1619 1620                  goto out;
1620 1621          }
1621 1622          /*
1622 1623           * Do the link.
1623 1624           */
1624 1625          (void) pn_fixslash(&pn);
1625 1626          error = VOP_LINK(tdvp, fvp, pn.pn_path, CRED(), NULL, 0);
1626 1627  out:
1627 1628          pn_free(&pn);
1628 1629          if (fvp)
1629 1630                  VN_RELE(fvp);
1630 1631          if (tdvp)
1631 1632                  VN_RELE(tdvp);
1632 1633          if ((error == ESTALE) && fs_need_estale_retry(estale_retry++))
1633 1634                  goto top;
1634 1635          return (error);
1635 1636  }
1636 1637  
1637 1638  int
1638 1639  vn_rename(char *from, char *to, enum uio_seg seg)
1639 1640  {
1640 1641          return (vn_renameat(NULL, from, NULL, to, seg));
1641 1642  }
1642 1643  
1643 1644  int
1644 1645  vn_renameat(vnode_t *fdvp, char *fname, vnode_t *tdvp,
1645 1646                  char *tname, enum uio_seg seg)
1646 1647  {
1647 1648          int error;
1648 1649          struct vattr vattr;
1649 1650          struct pathname fpn;            /* from pathname */
1650 1651          struct pathname tpn;            /* to pathname */
1651 1652          dev_t fsid;
1652 1653          int in_crit_src, in_crit_targ;
1653 1654          vnode_t *fromvp, *fvp;
1654 1655          vnode_t *tovp, *targvp;
1655 1656          int estale_retry = 0;
1656 1657          uint32_t auditing = AU_AUDITING();
1657 1658  
1658 1659  top:
1659 1660          fvp = fromvp = tovp = targvp = NULL;
1660 1661          in_crit_src = in_crit_targ = 0;
1661 1662          /*
1662 1663           * Get to and from pathnames.
1663 1664           */
1664 1665          if (error = pn_get(fname, seg, &fpn))
1665 1666                  return (error);
1666 1667          if (error = pn_get(tname, seg, &tpn)) {
1667 1668                  pn_free(&fpn);
1668 1669                  return (error);
1669 1670          }
1670 1671  
1671 1672          /*
1672 1673           * First we need to resolve the correct directories
1673 1674           * The passed in directories may only be a starting point,
1674 1675           * but we need the real directories the file(s) live in.
1675 1676           * For example the fname may be something like usr/lib/sparc
1676 1677           * and we were passed in the / directory, but we need to
1677 1678           * use the lib directory for the rename.
1678 1679           */
1679 1680  
1680 1681          if (auditing && fdvp != NULL)
1681 1682                  audit_setfsat_path(1);
1682 1683          /*
1683 1684           * Lookup to and from directories.
1684 1685           */
1685 1686          if (error = lookuppnat(&fpn, NULL, NO_FOLLOW, &fromvp, &fvp, fdvp)) {
1686 1687                  goto out;
1687 1688          }
1688 1689  
1689 1690          /*
1690 1691           * Make sure there is an entry.
1691 1692           */
1692 1693          if (fvp == NULL) {
1693 1694                  error = ENOENT;
1694 1695                  goto out;
1695 1696          }
1696 1697  
1697 1698          if (auditing && tdvp != NULL)
1698 1699                  audit_setfsat_path(3);
1699 1700          if (error = lookuppnat(&tpn, NULL, NO_FOLLOW, &tovp, &targvp, tdvp)) {
1700 1701                  goto out;
1701 1702          }
1702 1703  
1703 1704          /*
1704 1705           * Make sure both the from vnode directory and the to directory
1705 1706           * are in the same vfs and the to directory is writable.
1706 1707           * We check fsid's, not vfs pointers, so loopback fs works.
1707 1708           */
1708 1709          if (fromvp != tovp) {
1709 1710                  vattr.va_mask = AT_FSID;
1710 1711                  if (error = VOP_GETATTR(fromvp, &vattr, 0, CRED(), NULL))
1711 1712                          goto out;
1712 1713                  fsid = vattr.va_fsid;
1713 1714                  vattr.va_mask = AT_FSID;
1714 1715                  if (error = VOP_GETATTR(tovp, &vattr, 0, CRED(), NULL))
1715 1716                          goto out;
1716 1717                  if (fsid != vattr.va_fsid) {
1717 1718                          error = EXDEV;
1718 1719                          goto out;
1719 1720                  }
1720 1721          }
1721 1722  
1722 1723          if (tovp->v_vfsp->vfs_flag & VFS_RDONLY) {
1723 1724                  error = EROFS;
1724 1725                  goto out;
1725 1726          }
1726 1727  
1727 1728          if (targvp && (fvp != targvp)) {
1728 1729                  nbl_start_crit(targvp, RW_READER);
1729 1730                  in_crit_targ = 1;
1730 1731                  if (nbl_conflict(targvp, NBL_REMOVE, 0, 0, 0, NULL)) {
1731 1732                          error = EACCES;
1732 1733                          goto out;
1733 1734                  }
1734 1735          }
1735 1736  
1736 1737          if (nbl_need_check(fvp)) {
1737 1738                  nbl_start_crit(fvp, RW_READER);
1738 1739                  in_crit_src = 1;
1739 1740                  if (nbl_conflict(fvp, NBL_RENAME, 0, 0, 0, NULL)) {
1740 1741                          error = EACCES;
1741 1742                          goto out;
1742 1743                  }
1743 1744          }
1744 1745  
1745 1746          /*
1746 1747           * Do the rename.
1747 1748           */
1748 1749          (void) pn_fixslash(&tpn);
1749 1750          error = VOP_RENAME(fromvp, fpn.pn_path, tovp, tpn.pn_path, CRED(),
1750 1751              NULL, 0);
1751 1752  
1752 1753  out:
1753 1754          pn_free(&fpn);
1754 1755          pn_free(&tpn);
1755 1756          if (in_crit_src)
1756 1757                  nbl_end_crit(fvp);
1757 1758          if (in_crit_targ)
1758 1759                  nbl_end_crit(targvp);
1759 1760          if (fromvp)
1760 1761                  VN_RELE(fromvp);
1761 1762          if (tovp)
1762 1763                  VN_RELE(tovp);
1763 1764          if (targvp)
1764 1765                  VN_RELE(targvp);
1765 1766          if (fvp)
1766 1767                  VN_RELE(fvp);
1767 1768          if ((error == ESTALE) && fs_need_estale_retry(estale_retry++))
1768 1769                  goto top;
1769 1770          return (error);
1770 1771  }
1771 1772  
1772 1773  /*
1773 1774   * Remove a file or directory.
1774 1775   */
1775 1776  int
1776 1777  vn_remove(char *fnamep, enum uio_seg seg, enum rm dirflag)
1777 1778  {
1778 1779          return (vn_removeat(NULL, fnamep, seg, dirflag));
1779 1780  }
1780 1781  
1781 1782  int
1782 1783  vn_removeat(vnode_t *startvp, char *fnamep, enum uio_seg seg, enum rm dirflag)
1783 1784  {
1784 1785          struct vnode *vp;               /* entry vnode */
1785 1786          struct vnode *dvp;              /* ptr to parent dir vnode */
1786 1787          struct vnode *coveredvp;
1787 1788          struct pathname pn;             /* name of entry */
1788 1789          enum vtype vtype;
1789 1790          int error;
1790 1791          struct vfs *vfsp;
1791 1792          struct vfs *dvfsp;      /* ptr to parent dir vfs */
1792 1793          int in_crit = 0;
1793 1794          int estale_retry = 0;
1794 1795  
1795 1796  top:
1796 1797          if (error = pn_get(fnamep, seg, &pn))
1797 1798                  return (error);
1798 1799          dvp = vp = NULL;
1799 1800          if (error = lookuppnat(&pn, NULL, NO_FOLLOW, &dvp, &vp, startvp)) {
1800 1801                  pn_free(&pn);
1801 1802                  if ((error == ESTALE) && fs_need_estale_retry(estale_retry++))
1802 1803                          goto top;
1803 1804                  return (error);
1804 1805          }
1805 1806  
1806 1807          /*
1807 1808           * Make sure there is an entry.
1808 1809           */
1809 1810          if (vp == NULL) {
1810 1811                  error = ENOENT;
1811 1812                  goto out;
1812 1813          }
1813 1814  
1814 1815          vfsp = vp->v_vfsp;
1815 1816          dvfsp = dvp->v_vfsp;
1816 1817  
1817 1818          /*
1818 1819           * If the named file is the root of a mounted filesystem, fail,
1819 1820           * unless it's marked unlinkable.  In that case, unmount the
1820 1821           * filesystem and proceed to unlink the covered vnode.  (If the
1821 1822           * covered vnode is a directory, use rmdir instead of unlink,
1822 1823           * to avoid file system corruption.)
1823 1824           */
1824 1825          if (vp->v_flag & VROOT) {
1825 1826                  if ((vfsp->vfs_flag & VFS_UNLINKABLE) == 0) {
1826 1827                          error = EBUSY;
1827 1828                          goto out;
1828 1829                  }
1829 1830  
1830 1831                  /*
1831 1832                   * Namefs specific code starts here.
1832 1833                   */
1833 1834  
1834 1835                  if (dirflag == RMDIRECTORY) {
1835 1836                          /*
1836 1837                           * User called rmdir(2) on a file that has
1837 1838                           * been namefs mounted on top of.  Since
1838 1839                           * namefs doesn't allow directories to
1839 1840                           * be mounted on other files we know
1840 1841                           * vp is not of type VDIR so fail to operation.
1841 1842                           */
1842 1843                          error = ENOTDIR;
1843 1844                          goto out;
1844 1845                  }
1845 1846  
1846 1847                  /*
1847 1848                   * If VROOT is still set after grabbing vp->v_lock,
1848 1849                   * noone has finished nm_unmount so far and coveredvp
1849 1850                   * is valid.
1850 1851                   * If we manage to grab vn_vfswlock(coveredvp) before releasing
1851 1852                   * vp->v_lock, any race window is eliminated.
1852 1853                   */
1853 1854  
1854 1855                  mutex_enter(&vp->v_lock);
1855 1856                  if ((vp->v_flag & VROOT) == 0) {
1856 1857                          /* Someone beat us to the unmount */
1857 1858                          mutex_exit(&vp->v_lock);
1858 1859                          error = EBUSY;
1859 1860                          goto out;
1860 1861                  }
1861 1862                  vfsp = vp->v_vfsp;
1862 1863                  coveredvp = vfsp->vfs_vnodecovered;
1863 1864                  ASSERT(coveredvp);
1864 1865                  /*
1865 1866                   * Note: Implementation of vn_vfswlock shows that ordering of
1866 1867                   * v_lock / vn_vfswlock is not an issue here.
1867 1868                   */
1868 1869                  error = vn_vfswlock(coveredvp);
1869 1870                  mutex_exit(&vp->v_lock);
1870 1871  
1871 1872                  if (error)
1872 1873                          goto out;
1873 1874  
1874 1875                  VN_HOLD(coveredvp);
1875 1876                  VN_RELE(vp);
1876 1877                  error = dounmount(vfsp, 0, CRED());
1877 1878  
1878 1879                  /*
1879 1880                   * Unmounted the namefs file system; now get
1880 1881                   * the object it was mounted over.
1881 1882                   */
1882 1883                  vp = coveredvp;
1883 1884                  /*
1884 1885                   * If namefs was mounted over a directory, then
1885 1886                   * we want to use rmdir() instead of unlink().
1886 1887                   */
1887 1888                  if (vp->v_type == VDIR)
1888 1889                          dirflag = RMDIRECTORY;
1889 1890  
1890 1891                  if (error)
1891 1892                          goto out;
1892 1893          }
1893 1894  
1894 1895          /*
1895 1896           * Make sure filesystem is writeable.
1896 1897           * We check the parent directory's vfs in case this is an lofs vnode.
1897 1898           */
1898 1899          if (dvfsp && dvfsp->vfs_flag & VFS_RDONLY) {
1899 1900                  error = EROFS;
1900 1901                  goto out;
1901 1902          }
1902 1903  
1903 1904          vtype = vp->v_type;
1904 1905  
1905 1906          /*
1906 1907           * If there is the possibility of an nbmand share reservation, make
1907 1908           * sure it's okay to remove the file.  Keep a reference to the
1908 1909           * vnode, so that we can exit the nbl critical region after
1909 1910           * calling VOP_REMOVE.
1910 1911           * If there is no possibility of an nbmand share reservation,
1911 1912           * release the vnode reference now.  Filesystems like NFS may
1912 1913           * behave differently if there is an extra reference, so get rid of
1913 1914           * this one.  Fortunately, we can't have nbmand mounts on NFS
1914 1915           * filesystems.
1915 1916           */
1916 1917          if (nbl_need_check(vp)) {
1917 1918                  nbl_start_crit(vp, RW_READER);
1918 1919                  in_crit = 1;
1919 1920                  if (nbl_conflict(vp, NBL_REMOVE, 0, 0, 0, NULL)) {
1920 1921                          error = EACCES;
1921 1922                          goto out;
1922 1923                  }
1923 1924          } else {
1924 1925                  VN_RELE(vp);
1925 1926                  vp = NULL;
1926 1927          }
1927 1928  
1928 1929          if (dirflag == RMDIRECTORY) {
1929 1930                  /*
1930 1931                   * Caller is using rmdir(2), which can only be applied to
1931 1932                   * directories.
1932 1933                   */
1933 1934                  if (vtype != VDIR) {
1934 1935                          error = ENOTDIR;
1935 1936                  } else {
1936 1937                          vnode_t *cwd;
1937 1938                          proc_t *pp = curproc;
1938 1939  
1939 1940                          mutex_enter(&pp->p_lock);
1940 1941                          cwd = PTOU(pp)->u_cdir;
1941 1942                          VN_HOLD(cwd);
1942 1943                          mutex_exit(&pp->p_lock);
1943 1944                          error = VOP_RMDIR(dvp, pn.pn_path, cwd, CRED(),
1944 1945                              NULL, 0);
1945 1946                          VN_RELE(cwd);
1946 1947                  }
1947 1948          } else {
1948 1949                  /*
1949 1950                   * Unlink(2) can be applied to anything.
1950 1951                   */
1951 1952                  error = VOP_REMOVE(dvp, pn.pn_path, CRED(), NULL, 0);
1952 1953          }
1953 1954  
1954 1955  out:
1955 1956          pn_free(&pn);
1956 1957          if (in_crit) {
1957 1958                  nbl_end_crit(vp);
1958 1959                  in_crit = 0;
1959 1960          }
1960 1961          if (vp != NULL)
1961 1962                  VN_RELE(vp);
1962 1963          if (dvp != NULL)
1963 1964                  VN_RELE(dvp);
1964 1965          if ((error == ESTALE) && fs_need_estale_retry(estale_retry++))
1965 1966                  goto top;
1966 1967          return (error);
1967 1968  }
1968 1969  
1969 1970  /*
1970 1971   * Utility function to compare equality of vnodes.
1971 1972   * Compare the underlying real vnodes, if there are underlying vnodes.
1972 1973   * This is a more thorough comparison than the VN_CMP() macro provides.
1973 1974   */
1974 1975  int
1975 1976  vn_compare(vnode_t *vp1, vnode_t *vp2)
1976 1977  {
1977 1978          vnode_t *realvp;
1978 1979  
1979 1980          if (vp1 != NULL && VOP_REALVP(vp1, &realvp, NULL) == 0)
1980 1981                  vp1 = realvp;
1981 1982          if (vp2 != NULL && VOP_REALVP(vp2, &realvp, NULL) == 0)
1982 1983                  vp2 = realvp;
1983 1984          return (VN_CMP(vp1, vp2));
1984 1985  }
1985 1986  
1986 1987  /*
1987 1988   * The number of locks to hash into.  This value must be a power
1988 1989   * of 2 minus 1 and should probably also be prime.
1989 1990   */
1990 1991  #define NUM_BUCKETS     1023
1991 1992  
1992 1993  struct  vn_vfslocks_bucket {
1993 1994          kmutex_t vb_lock;
1994 1995          vn_vfslocks_entry_t *vb_list;
1995 1996          char pad[64 - sizeof (kmutex_t) - sizeof (void *)];
1996 1997  };
1997 1998  
1998 1999  /*
1999 2000   * Total number of buckets will be NUM_BUCKETS + 1 .
2000 2001   */
2001 2002  
2002 2003  #pragma align   64(vn_vfslocks_buckets)
2003 2004  static  struct vn_vfslocks_bucket       vn_vfslocks_buckets[NUM_BUCKETS + 1];
2004 2005  
2005 2006  #define VN_VFSLOCKS_SHIFT       9
2006 2007  
2007 2008  #define VN_VFSLOCKS_HASH(vfsvpptr)      \
2008 2009          ((((intptr_t)(vfsvpptr)) >> VN_VFSLOCKS_SHIFT) & NUM_BUCKETS)
2009 2010  
2010 2011  /*
2011 2012   * vn_vfslocks_getlock() uses an HASH scheme to generate
2012 2013   * rwstlock using vfs/vnode pointer passed to it.
2013 2014   *
2014 2015   * vn_vfslocks_rele() releases a reference in the
2015 2016   * HASH table which allows the entry allocated by
2016 2017   * vn_vfslocks_getlock() to be freed at a later
2017 2018   * stage when the refcount drops to zero.
2018 2019   */
2019 2020  
2020 2021  vn_vfslocks_entry_t *
2021 2022  vn_vfslocks_getlock(void *vfsvpptr)
2022 2023  {
2023 2024          struct vn_vfslocks_bucket *bp;
2024 2025          vn_vfslocks_entry_t *vep;
2025 2026          vn_vfslocks_entry_t *tvep;
2026 2027  
2027 2028          ASSERT(vfsvpptr != NULL);
2028 2029          bp = &vn_vfslocks_buckets[VN_VFSLOCKS_HASH(vfsvpptr)];
2029 2030  
2030 2031          mutex_enter(&bp->vb_lock);
2031 2032          for (vep = bp->vb_list; vep != NULL; vep = vep->ve_next) {
2032 2033                  if (vep->ve_vpvfs == vfsvpptr) {
2033 2034                          vep->ve_refcnt++;
2034 2035                          mutex_exit(&bp->vb_lock);
2035 2036                          return (vep);
2036 2037                  }
2037 2038          }
2038 2039          mutex_exit(&bp->vb_lock);
2039 2040          vep = kmem_alloc(sizeof (*vep), KM_SLEEP);
2040 2041          rwst_init(&vep->ve_lock, NULL, RW_DEFAULT, NULL);
2041 2042          vep->ve_vpvfs = (char *)vfsvpptr;
2042 2043          vep->ve_refcnt = 1;
2043 2044          mutex_enter(&bp->vb_lock);
2044 2045          for (tvep = bp->vb_list; tvep != NULL; tvep = tvep->ve_next) {
2045 2046                  if (tvep->ve_vpvfs == vfsvpptr) {
2046 2047                          tvep->ve_refcnt++;
2047 2048                          mutex_exit(&bp->vb_lock);
2048 2049  
2049 2050                          /*
2050 2051                           * There is already an entry in the hash
2051 2052                           * destroy what we just allocated.
2052 2053                           */
2053 2054                          rwst_destroy(&vep->ve_lock);
2054 2055                          kmem_free(vep, sizeof (*vep));
2055 2056                          return (tvep);
2056 2057                  }
2057 2058          }
2058 2059          vep->ve_next = bp->vb_list;
2059 2060          bp->vb_list = vep;
2060 2061          mutex_exit(&bp->vb_lock);
2061 2062          return (vep);
2062 2063  }
2063 2064  
2064 2065  void
2065 2066  vn_vfslocks_rele(vn_vfslocks_entry_t *vepent)
2066 2067  {
2067 2068          struct vn_vfslocks_bucket *bp;
2068 2069          vn_vfslocks_entry_t *vep;
2069 2070          vn_vfslocks_entry_t *pvep;
2070 2071  
2071 2072          ASSERT(vepent != NULL);
2072 2073          ASSERT(vepent->ve_vpvfs != NULL);
2073 2074  
2074 2075          bp = &vn_vfslocks_buckets[VN_VFSLOCKS_HASH(vepent->ve_vpvfs)];
2075 2076  
2076 2077          mutex_enter(&bp->vb_lock);
2077 2078          vepent->ve_refcnt--;
2078 2079  
2079 2080          if ((int32_t)vepent->ve_refcnt < 0)
2080 2081                  cmn_err(CE_PANIC, "vn_vfslocks_rele: refcount negative");
2081 2082  
2082 2083          if (vepent->ve_refcnt == 0) {
2083 2084                  for (vep = bp->vb_list; vep != NULL; vep = vep->ve_next) {
2084 2085                          if (vep->ve_vpvfs == vepent->ve_vpvfs) {
2085 2086                                  if (bp->vb_list == vep)
2086 2087                                          bp->vb_list = vep->ve_next;
2087 2088                                  else {
2088 2089                                          /* LINTED */
2089 2090                                          pvep->ve_next = vep->ve_next;
2090 2091                                  }
2091 2092                                  mutex_exit(&bp->vb_lock);
2092 2093                                  rwst_destroy(&vep->ve_lock);
2093 2094                                  kmem_free(vep, sizeof (*vep));
2094 2095                                  return;
2095 2096                          }
2096 2097                          pvep = vep;
2097 2098                  }
2098 2099                  cmn_err(CE_PANIC, "vn_vfslocks_rele: vp/vfs not found");
2099 2100          }
2100 2101          mutex_exit(&bp->vb_lock);
2101 2102  }
2102 2103  
2103 2104  /*
2104 2105   * vn_vfswlock_wait is used to implement a lock which is logically a writers
2105 2106   * lock protecting the v_vfsmountedhere field.
2106 2107   * vn_vfswlock_wait has been modified to be similar to vn_vfswlock,
2107 2108   * except that it blocks to acquire the lock VVFSLOCK.
2108 2109   *
2109 2110   * traverse() and routines re-implementing part of traverse (e.g. autofs)
2110 2111   * need to hold this lock. mount(), vn_rename(), vn_remove() and so on
2111 2112   * need the non-blocking version of the writers lock i.e. vn_vfswlock
2112 2113   */
2113 2114  int
2114 2115  vn_vfswlock_wait(vnode_t *vp)
2115 2116  {
2116 2117          int retval;
2117 2118          vn_vfslocks_entry_t *vpvfsentry;
2118 2119          ASSERT(vp != NULL);
2119 2120  
2120 2121          vpvfsentry = vn_vfslocks_getlock(vp);
2121 2122          retval = rwst_enter_sig(&vpvfsentry->ve_lock, RW_WRITER);
2122 2123  
2123 2124          if (retval == EINTR) {
2124 2125                  vn_vfslocks_rele(vpvfsentry);
2125 2126                  return (EINTR);
2126 2127          }
2127 2128          return (retval);
2128 2129  }
2129 2130  
2130 2131  int
2131 2132  vn_vfsrlock_wait(vnode_t *vp)
2132 2133  {
2133 2134          int retval;
2134 2135          vn_vfslocks_entry_t *vpvfsentry;
2135 2136          ASSERT(vp != NULL);
2136 2137  
2137 2138          vpvfsentry = vn_vfslocks_getlock(vp);
2138 2139          retval = rwst_enter_sig(&vpvfsentry->ve_lock, RW_READER);
2139 2140  
2140 2141          if (retval == EINTR) {
2141 2142                  vn_vfslocks_rele(vpvfsentry);
2142 2143                  return (EINTR);
2143 2144          }
2144 2145  
2145 2146          return (retval);
2146 2147  }
2147 2148  
2148 2149  
2149 2150  /*
2150 2151   * vn_vfswlock is used to implement a lock which is logically a writers lock
2151 2152   * protecting the v_vfsmountedhere field.
2152 2153   */
2153 2154  int
2154 2155  vn_vfswlock(vnode_t *vp)
2155 2156  {
2156 2157          vn_vfslocks_entry_t *vpvfsentry;
2157 2158  
2158 2159          /*
2159 2160           * If vp is NULL then somebody is trying to lock the covered vnode
2160 2161           * of /.  (vfs_vnodecovered is NULL for /).  This situation will
2161 2162           * only happen when unmounting /.  Since that operation will fail
2162 2163           * anyway, return EBUSY here instead of in VFS_UNMOUNT.
2163 2164           */
2164 2165          if (vp == NULL)
2165 2166                  return (EBUSY);
2166 2167  
2167 2168          vpvfsentry = vn_vfslocks_getlock(vp);
2168 2169  
2169 2170          if (rwst_tryenter(&vpvfsentry->ve_lock, RW_WRITER))
2170 2171                  return (0);
2171 2172  
2172 2173          vn_vfslocks_rele(vpvfsentry);
2173 2174          return (EBUSY);
2174 2175  }
2175 2176  
2176 2177  int
2177 2178  vn_vfsrlock(vnode_t *vp)
2178 2179  {
2179 2180          vn_vfslocks_entry_t *vpvfsentry;
2180 2181  
2181 2182          /*
2182 2183           * If vp is NULL then somebody is trying to lock the covered vnode
2183 2184           * of /.  (vfs_vnodecovered is NULL for /).  This situation will
2184 2185           * only happen when unmounting /.  Since that operation will fail
2185 2186           * anyway, return EBUSY here instead of in VFS_UNMOUNT.
2186 2187           */
2187 2188          if (vp == NULL)
2188 2189                  return (EBUSY);
2189 2190  
2190 2191          vpvfsentry = vn_vfslocks_getlock(vp);
2191 2192  
2192 2193          if (rwst_tryenter(&vpvfsentry->ve_lock, RW_READER))
2193 2194                  return (0);
2194 2195  
2195 2196          vn_vfslocks_rele(vpvfsentry);
2196 2197          return (EBUSY);
2197 2198  }
2198 2199  
2199 2200  void
2200 2201  vn_vfsunlock(vnode_t *vp)
2201 2202  {
2202 2203          vn_vfslocks_entry_t *vpvfsentry;
2203 2204  
2204 2205          /*
2205 2206           * ve_refcnt needs to be decremented twice.
2206 2207           * 1. To release refernce after a call to vn_vfslocks_getlock()
2207 2208           * 2. To release the reference from the locking routines like
2208 2209           *    vn_vfsrlock/vn_vfswlock etc,.
2209 2210           */
2210 2211          vpvfsentry = vn_vfslocks_getlock(vp);
2211 2212          vn_vfslocks_rele(vpvfsentry);
2212 2213  
2213 2214          rwst_exit(&vpvfsentry->ve_lock);
2214 2215          vn_vfslocks_rele(vpvfsentry);
2215 2216  }
2216 2217  
2217 2218  int
2218 2219  vn_vfswlock_held(vnode_t *vp)
2219 2220  {
2220 2221          int held;
2221 2222          vn_vfslocks_entry_t *vpvfsentry;
2222 2223  
2223 2224          ASSERT(vp != NULL);
2224 2225  
2225 2226          vpvfsentry = vn_vfslocks_getlock(vp);
2226 2227          held = rwst_lock_held(&vpvfsentry->ve_lock, RW_WRITER);
2227 2228  
2228 2229          vn_vfslocks_rele(vpvfsentry);
2229 2230          return (held);
2230 2231  }
2231 2232  
2232 2233  
2233 2234  int
2234 2235  vn_make_ops(
2235 2236          const char *name,                       /* Name of file system */
2236 2237          const fs_operation_def_t *templ,        /* Operation specification */
2237 2238          vnodeops_t **actual)                    /* Return the vnodeops */
2238 2239  {
2239 2240          int unused_ops;
2240 2241          int error;
2241 2242  
2242 2243          *actual = (vnodeops_t *)kmem_alloc(sizeof (vnodeops_t), KM_SLEEP);
2243 2244  
2244 2245          (*actual)->vnop_name = name;
2245 2246  
2246 2247          error = fs_build_vector(*actual, &unused_ops, vn_ops_table, templ);
2247 2248          if (error) {
2248 2249                  kmem_free(*actual, sizeof (vnodeops_t));
2249 2250          }
2250 2251  
2251 2252  #if DEBUG
2252 2253          if (unused_ops != 0)
2253 2254                  cmn_err(CE_WARN, "vn_make_ops: %s: %d operations supplied "
2254 2255                      "but not used", name, unused_ops);
2255 2256  #endif
2256 2257  
2257 2258          return (error);
2258 2259  }
2259 2260  
2260 2261  /*
2261 2262   * Free the vnodeops created as a result of vn_make_ops()
2262 2263   */
2263 2264  void
2264 2265  vn_freevnodeops(vnodeops_t *vnops)
2265 2266  {
2266 2267          kmem_free(vnops, sizeof (vnodeops_t));
2267 2268  }
2268 2269  
2269 2270  /*
2270 2271   * Vnode cache.
2271 2272   */
2272 2273  
2273 2274  /* ARGSUSED */
2274 2275  static int
2275 2276  vn_cache_constructor(void *buf, void *cdrarg, int kmflags)
2276 2277  {
2277 2278          struct vnode *vp;
2278 2279  
2279 2280          vp = buf;
2280 2281  
2281 2282          mutex_init(&vp->v_lock, NULL, MUTEX_DEFAULT, NULL);
2282 2283          mutex_init(&vp->v_vsd_lock, NULL, MUTEX_DEFAULT, NULL);
2283 2284          cv_init(&vp->v_cv, NULL, CV_DEFAULT, NULL);
2284 2285          rw_init(&vp->v_nbllock, NULL, RW_DEFAULT, NULL);
2285 2286          vp->v_femhead = NULL;   /* Must be done before vn_reinit() */
2286 2287          vp->v_path = NULL;
2287 2288          vp->v_mpssdata = NULL;
2288 2289          vp->v_vsd = NULL;
2289 2290          vp->v_fopdata = NULL;
2290 2291  
2291 2292          return (0);
2292 2293  }
2293 2294  
2294 2295  /* ARGSUSED */
2295 2296  static void
2296 2297  vn_cache_destructor(void *buf, void *cdrarg)
2297 2298  {
2298 2299          struct vnode *vp;
2299 2300  
2300 2301          vp = buf;
2301 2302  
2302 2303          rw_destroy(&vp->v_nbllock);
2303 2304          cv_destroy(&vp->v_cv);
2304 2305          mutex_destroy(&vp->v_vsd_lock);
2305 2306          mutex_destroy(&vp->v_lock);
2306 2307  }
2307 2308  
2308 2309  void
2309 2310  vn_create_cache(void)
2310 2311  {
2311 2312          /* LINTED */
2312 2313          ASSERT((1 << VNODE_ALIGN_LOG2) ==
2313 2314              P2ROUNDUP(sizeof (struct vnode), VNODE_ALIGN));
2314 2315          vn_cache = kmem_cache_create("vn_cache", sizeof (struct vnode),
2315 2316              VNODE_ALIGN, vn_cache_constructor, vn_cache_destructor, NULL, NULL,
2316 2317              NULL, 0);
2317 2318  }
2318 2319  
2319 2320  void
2320 2321  vn_destroy_cache(void)
2321 2322  {
2322 2323          kmem_cache_destroy(vn_cache);
2323 2324  }
2324 2325  
2325 2326  /*
2326 2327   * Used by file systems when fs-specific nodes (e.g., ufs inodes) are
2327 2328   * cached by the file system and vnodes remain associated.
2328 2329   */
2329 2330  void
2330 2331  vn_recycle(vnode_t *vp)
2331 2332  {
2332 2333          ASSERT(vp->v_pages == NULL);
2333 2334  
2334 2335          /*
2335 2336           * XXX - This really belongs in vn_reinit(), but we have some issues
2336 2337           * with the counts.  Best to have it here for clean initialization.
2337 2338           */
2338 2339          vp->v_rdcnt = 0;
2339 2340          vp->v_wrcnt = 0;
2340 2341          vp->v_mmap_read = 0;
2341 2342          vp->v_mmap_write = 0;
2342 2343  
2343 2344          /*
2344 2345           * If FEM was in use, make sure everything gets cleaned up
2345 2346           * NOTE: vp->v_femhead is initialized to NULL in the vnode
2346 2347           * constructor.
2347 2348           */
2348 2349          if (vp->v_femhead) {
2349 2350                  /* XXX - There should be a free_femhead() that does all this */
2350 2351                  ASSERT(vp->v_femhead->femh_list == NULL);
2351 2352                  mutex_destroy(&vp->v_femhead->femh_lock);
2352 2353                  kmem_free(vp->v_femhead, sizeof (*(vp->v_femhead)));
2353 2354                  vp->v_femhead = NULL;
2354 2355          }
2355 2356          if (vp->v_path) {
2356 2357                  kmem_free(vp->v_path, strlen(vp->v_path) + 1);
2357 2358                  vp->v_path = NULL;
2358 2359          }
2359 2360  
2360 2361          if (vp->v_fopdata != NULL) {
2361 2362                  free_fopdata(vp);
2362 2363          }
2363 2364          vp->v_mpssdata = NULL;
2364 2365          vsd_free(vp);
2365 2366  }
2366 2367  
2367 2368  /*
2368 2369   * Used to reset the vnode fields including those that are directly accessible
2369 2370   * as well as those which require an accessor function.
2370 2371   *
2371 2372   * Does not initialize:
2372 2373   *      synchronization objects: v_lock, v_vsd_lock, v_nbllock, v_cv
2373 2374   *      v_data (since FS-nodes and vnodes point to each other and should
2374 2375   *              be updated simultaneously)
2375 2376   *      v_op (in case someone needs to make a VOP call on this object)
2376 2377   */
2377 2378  void
2378 2379  vn_reinit(vnode_t *vp)
2379 2380  {
2380 2381          vp->v_count = 1;
2381 2382          vp->v_count_dnlc = 0;
2382 2383          vp->v_vfsp = NULL;
2383 2384          vp->v_stream = NULL;
2384 2385          vp->v_vfsmountedhere = NULL;
2385 2386          vp->v_flag = 0;
2386 2387          vp->v_type = VNON;
2387 2388          vp->v_rdev = NODEV;
2388 2389  
2389 2390          vp->v_filocks = NULL;
2390 2391          vp->v_shrlocks = NULL;
2391 2392          vp->v_pages = NULL;
2392 2393  
2393 2394          vp->v_locality = NULL;
2394 2395          vp->v_xattrdir = NULL;
2395 2396  
2396 2397          /* Handles v_femhead, v_path, and the r/w/map counts */
2397 2398          vn_recycle(vp);
2398 2399  }
2399 2400  
2400 2401  vnode_t *
2401 2402  vn_alloc(int kmflag)
2402 2403  {
2403 2404          vnode_t *vp;
2404 2405  
2405 2406          vp = kmem_cache_alloc(vn_cache, kmflag);
2406 2407  
2407 2408          if (vp != NULL) {
2408 2409                  vp->v_femhead = NULL;   /* Must be done before vn_reinit() */
2409 2410                  vp->v_fopdata = NULL;
2410 2411                  vn_reinit(vp);
2411 2412          }
2412 2413  
2413 2414          return (vp);
2414 2415  }
2415 2416  
2416 2417  void
2417 2418  vn_free(vnode_t *vp)
2418 2419  {
2419 2420          ASSERT(vp->v_shrlocks == NULL);
2420 2421          ASSERT(vp->v_filocks == NULL);
2421 2422  
2422 2423          /*
2423 2424           * Some file systems call vn_free() with v_count of zero,
2424 2425           * some with v_count of 1.  In any case, the value should
2425 2426           * never be anything else.
2426 2427           */
2427 2428          ASSERT((vp->v_count == 0) || (vp->v_count == 1));
2428 2429          ASSERT(vp->v_count_dnlc == 0);
2429 2430          if (vp->v_path != NULL) {
2430 2431                  kmem_free(vp->v_path, strlen(vp->v_path) + 1);
2431 2432                  vp->v_path = NULL;
2432 2433          }
2433 2434  
2434 2435          /* If FEM was in use, make sure everything gets cleaned up */
2435 2436          if (vp->v_femhead) {
2436 2437                  /* XXX - There should be a free_femhead() that does all this */
2437 2438                  ASSERT(vp->v_femhead->femh_list == NULL);
2438 2439                  mutex_destroy(&vp->v_femhead->femh_lock);
2439 2440                  kmem_free(vp->v_femhead, sizeof (*(vp->v_femhead)));
2440 2441                  vp->v_femhead = NULL;
2441 2442          }
2442 2443  
2443 2444          if (vp->v_fopdata != NULL) {
2444 2445                  free_fopdata(vp);
2445 2446          }
2446 2447          vp->v_mpssdata = NULL;
2447 2448          vsd_free(vp);
2448 2449          kmem_cache_free(vn_cache, vp);
2449 2450  }
2450 2451  
2451 2452  /*
2452 2453   * vnode status changes, should define better states than 1, 0.
2453 2454   */
2454 2455  void
2455 2456  vn_reclaim(vnode_t *vp)
2456 2457  {
2457 2458          vfs_t   *vfsp = vp->v_vfsp;
2458 2459  
2459 2460          if (vfsp == NULL ||
2460 2461              vfsp->vfs_implp == NULL || vfsp->vfs_femhead == NULL) {
2461 2462                  return;
2462 2463          }
2463 2464          (void) VFS_VNSTATE(vfsp, vp, VNTRANS_RECLAIMED);
2464 2465  }
2465 2466  
2466 2467  void
2467 2468  vn_idle(vnode_t *vp)
2468 2469  {
2469 2470          vfs_t   *vfsp = vp->v_vfsp;
2470 2471  
2471 2472          if (vfsp == NULL ||
2472 2473              vfsp->vfs_implp == NULL || vfsp->vfs_femhead == NULL) {
2473 2474                  return;
2474 2475          }
2475 2476          (void) VFS_VNSTATE(vfsp, vp, VNTRANS_IDLED);
2476 2477  }
2477 2478  void
2478 2479  vn_exists(vnode_t *vp)
2479 2480  {
2480 2481          vfs_t   *vfsp = vp->v_vfsp;
2481 2482  
2482 2483          if (vfsp == NULL ||
2483 2484              vfsp->vfs_implp == NULL || vfsp->vfs_femhead == NULL) {
2484 2485                  return;
2485 2486          }
2486 2487          (void) VFS_VNSTATE(vfsp, vp, VNTRANS_EXISTS);
2487 2488  }
2488 2489  
2489 2490  void
2490 2491  vn_invalid(vnode_t *vp)
2491 2492  {
2492 2493          vfs_t   *vfsp = vp->v_vfsp;
2493 2494  
2494 2495          if (vfsp == NULL ||
2495 2496              vfsp->vfs_implp == NULL || vfsp->vfs_femhead == NULL) {
2496 2497                  return;
2497 2498          }
2498 2499          (void) VFS_VNSTATE(vfsp, vp, VNTRANS_DESTROYED);
2499 2500  }
2500 2501  
2501 2502  /* Vnode event notification */
2502 2503  
2503 2504  int
2504 2505  vnevent_support(vnode_t *vp, caller_context_t *ct)
2505 2506  {
2506 2507          if (vp == NULL)
2507 2508                  return (EINVAL);
2508 2509  
2509 2510          return (VOP_VNEVENT(vp, VE_SUPPORT, NULL, NULL, ct));
2510 2511  }
2511 2512  
2512 2513  void
2513 2514  vnevent_rename_src(vnode_t *vp, vnode_t *dvp, char *name, caller_context_t *ct)
2514 2515  {
2515 2516          if (vp == NULL || vp->v_femhead == NULL) {
2516 2517                  return;
2517 2518          }
2518 2519          (void) VOP_VNEVENT(vp, VE_RENAME_SRC, dvp, name, ct);
2519 2520  }
2520 2521  
2521 2522  void
2522 2523  vnevent_rename_dest(vnode_t *vp, vnode_t *dvp, char *name,
2523 2524      caller_context_t *ct)
2524 2525  {
2525 2526          if (vp == NULL || vp->v_femhead == NULL) {
2526 2527                  return;
2527 2528          }
2528 2529          (void) VOP_VNEVENT(vp, VE_RENAME_DEST, dvp, name, ct);
2529 2530  }
2530 2531  
2531 2532  void
2532 2533  vnevent_rename_dest_dir(vnode_t *vp, caller_context_t *ct)
2533 2534  {
2534 2535          if (vp == NULL || vp->v_femhead == NULL) {
2535 2536                  return;
2536 2537          }
2537 2538          (void) VOP_VNEVENT(vp, VE_RENAME_DEST_DIR, NULL, NULL, ct);
2538 2539  }
2539 2540  
2540 2541  void
2541 2542  vnevent_remove(vnode_t *vp, vnode_t *dvp, char *name, caller_context_t *ct)
2542 2543  {
2543 2544          if (vp == NULL || vp->v_femhead == NULL) {
2544 2545                  return;
2545 2546          }
2546 2547          (void) VOP_VNEVENT(vp, VE_REMOVE, dvp, name, ct);
2547 2548  }
2548 2549  
2549 2550  void
2550 2551  vnevent_rmdir(vnode_t *vp, vnode_t *dvp, char *name, caller_context_t *ct)
2551 2552  {
2552 2553          if (vp == NULL || vp->v_femhead == NULL) {
2553 2554                  return;
2554 2555          }
2555 2556          (void) VOP_VNEVENT(vp, VE_RMDIR, dvp, name, ct);
2556 2557  }
2557 2558  
2558 2559  void
2559 2560  vnevent_create(vnode_t *vp, caller_context_t *ct)
2560 2561  {
2561 2562          if (vp == NULL || vp->v_femhead == NULL) {
2562 2563                  return;
2563 2564          }
2564 2565          (void) VOP_VNEVENT(vp, VE_CREATE, NULL, NULL, ct);
2565 2566  }
2566 2567  
2567 2568  void
2568 2569  vnevent_link(vnode_t *vp, caller_context_t *ct)
2569 2570  {
2570 2571          if (vp == NULL || vp->v_femhead == NULL) {
2571 2572                  return;
2572 2573          }
2573 2574          (void) VOP_VNEVENT(vp, VE_LINK, NULL, NULL, ct);
2574 2575  }
2575 2576  
2576 2577  void
2577 2578  vnevent_mountedover(vnode_t *vp, caller_context_t *ct)
2578 2579  {
2579 2580          if (vp == NULL || vp->v_femhead == NULL) {
2580 2581                  return;
2581 2582          }
2582 2583          (void) VOP_VNEVENT(vp, VE_MOUNTEDOVER, NULL, NULL, ct);
2583 2584  }
2584 2585  
2585 2586  /*
2586 2587   * Vnode accessors.
2587 2588   */
2588 2589  
2589 2590  int
2590 2591  vn_is_readonly(vnode_t *vp)
2591 2592  {
2592 2593          return (vp->v_vfsp->vfs_flag & VFS_RDONLY);
2593 2594  }
2594 2595  
2595 2596  int
2596 2597  vn_has_flocks(vnode_t *vp)
2597 2598  {
2598 2599          return (vp->v_filocks != NULL);
2599 2600  }
2600 2601  
2601 2602  int
2602 2603  vn_has_mandatory_locks(vnode_t *vp, int mode)
2603 2604  {
2604 2605          return ((vp->v_filocks != NULL) && (MANDLOCK(vp, mode)));
2605 2606  }
2606 2607  
2607 2608  int
2608 2609  vn_has_cached_data(vnode_t *vp)
2609 2610  {
2610 2611          return (vp->v_pages != NULL);
2611 2612  }
2612 2613  
2613 2614  /*
2614 2615   * Return 0 if the vnode in question shouldn't be permitted into a zone via
2615 2616   * zone_enter(2).
2616 2617   */
2617 2618  int
2618 2619  vn_can_change_zones(vnode_t *vp)
2619 2620  {
2620 2621          struct vfssw *vswp;
2621 2622          int allow = 1;
2622 2623          vnode_t *rvp;
2623 2624  
2624 2625          if (nfs_global_client_only != 0)
2625 2626                  return (1);
2626 2627  
2627 2628          /*
2628 2629           * We always want to look at the underlying vnode if there is one.
2629 2630           */
2630 2631          if (VOP_REALVP(vp, &rvp, NULL) != 0)
2631 2632                  rvp = vp;
2632 2633          /*
2633 2634           * Some pseudo filesystems (including doorfs) don't actually register
2634 2635           * their vfsops_t, so the following may return NULL; we happily let
2635 2636           * such vnodes switch zones.
2636 2637           */
2637 2638          vswp = vfs_getvfsswbyvfsops(vfs_getops(rvp->v_vfsp));
2638 2639          if (vswp != NULL) {
2639 2640                  if (vswp->vsw_flag & VSW_NOTZONESAFE)
2640 2641                          allow = 0;
2641 2642                  vfs_unrefvfssw(vswp);
2642 2643          }
2643 2644          return (allow);
2644 2645  }
2645 2646  
2646 2647  /*
2647 2648   * Return nonzero if the vnode is a mount point, zero if not.
2648 2649   */
2649 2650  int
2650 2651  vn_ismntpt(vnode_t *vp)
2651 2652  {
2652 2653          return (vp->v_vfsmountedhere != NULL);
2653 2654  }
2654 2655  
2655 2656  /* Retrieve the vfs (if any) mounted on this vnode */
2656 2657  vfs_t *
2657 2658  vn_mountedvfs(vnode_t *vp)
2658 2659  {
2659 2660          return (vp->v_vfsmountedhere);
2660 2661  }
2661 2662  
2662 2663  /*
2663 2664   * Return nonzero if the vnode is referenced by the dnlc, zero if not.
2664 2665   */
2665 2666  int
2666 2667  vn_in_dnlc(vnode_t *vp)
2667 2668  {
2668 2669          return (vp->v_count_dnlc > 0);
2669 2670  }
2670 2671  
2671 2672  /*
2672 2673   * vn_has_other_opens() checks whether a particular file is opened by more than
2673 2674   * just the caller and whether the open is for read and/or write.
2674 2675   * This routine is for calling after the caller has already called VOP_OPEN()
2675 2676   * and the caller wishes to know if they are the only one with it open for
2676 2677   * the mode(s) specified.
2677 2678   *
2678 2679   * Vnode counts are only kept on regular files (v_type=VREG).
2679 2680   */
2680 2681  int
2681 2682  vn_has_other_opens(
2682 2683          vnode_t *vp,
2683 2684          v_mode_t mode)
2684 2685  {
2685 2686  
2686 2687          ASSERT(vp != NULL);
2687 2688  
2688 2689          switch (mode) {
2689 2690          case V_WRITE:
2690 2691                  if (vp->v_wrcnt > 1)
2691 2692                          return (V_TRUE);
2692 2693                  break;
2693 2694          case V_RDORWR:
2694 2695                  if ((vp->v_rdcnt > 1) || (vp->v_wrcnt > 1))
2695 2696                          return (V_TRUE);
2696 2697                  break;
2697 2698          case V_RDANDWR:
2698 2699                  if ((vp->v_rdcnt > 1) && (vp->v_wrcnt > 1))
2699 2700                          return (V_TRUE);
2700 2701                  break;
2701 2702          case V_READ:
2702 2703                  if (vp->v_rdcnt > 1)
2703 2704                          return (V_TRUE);
2704 2705                  break;
2705 2706          }
2706 2707  
2707 2708          return (V_FALSE);
2708 2709  }
2709 2710  
2710 2711  /*
2711 2712   * vn_is_opened() checks whether a particular file is opened and
2712 2713   * whether the open is for read and/or write.
2713 2714   *
2714 2715   * Vnode counts are only kept on regular files (v_type=VREG).
2715 2716   */
2716 2717  int
2717 2718  vn_is_opened(
2718 2719          vnode_t *vp,
2719 2720          v_mode_t mode)
2720 2721  {
2721 2722  
2722 2723          ASSERT(vp != NULL);
2723 2724  
2724 2725          switch (mode) {
2725 2726          case V_WRITE:
2726 2727                  if (vp->v_wrcnt)
2727 2728                          return (V_TRUE);
2728 2729                  break;
2729 2730          case V_RDANDWR:
2730 2731                  if (vp->v_rdcnt && vp->v_wrcnt)
2731 2732                          return (V_TRUE);
2732 2733                  break;
2733 2734          case V_RDORWR:
2734 2735                  if (vp->v_rdcnt || vp->v_wrcnt)
2735 2736                          return (V_TRUE);
2736 2737                  break;
2737 2738          case V_READ:
2738 2739                  if (vp->v_rdcnt)
2739 2740                          return (V_TRUE);
2740 2741                  break;
2741 2742          }
2742 2743  
2743 2744          return (V_FALSE);
2744 2745  }
2745 2746  
2746 2747  /*
2747 2748   * vn_is_mapped() checks whether a particular file is mapped and whether
2748 2749   * the file is mapped read and/or write.
2749 2750   */
2750 2751  int
2751 2752  vn_is_mapped(
2752 2753          vnode_t *vp,
2753 2754          v_mode_t mode)
2754 2755  {
2755 2756  
2756 2757          ASSERT(vp != NULL);
2757 2758  
2758 2759  #if !defined(_LP64)
2759 2760          switch (mode) {
2760 2761          /*
2761 2762           * The atomic_add_64_nv functions force atomicity in the
2762 2763           * case of 32 bit architectures. Otherwise the 64 bit values
2763 2764           * require two fetches. The value of the fields may be
2764 2765           * (potentially) changed between the first fetch and the
2765 2766           * second
2766 2767           */
2767 2768          case V_WRITE:
2768 2769                  if (atomic_add_64_nv((&(vp->v_mmap_write)), 0))
2769 2770                          return (V_TRUE);
2770 2771                  break;
2771 2772          case V_RDANDWR:
2772 2773                  if ((atomic_add_64_nv((&(vp->v_mmap_read)), 0)) &&
2773 2774                      (atomic_add_64_nv((&(vp->v_mmap_write)), 0)))
2774 2775                          return (V_TRUE);
2775 2776                  break;
2776 2777          case V_RDORWR:
2777 2778                  if ((atomic_add_64_nv((&(vp->v_mmap_read)), 0)) ||
2778 2779                      (atomic_add_64_nv((&(vp->v_mmap_write)), 0)))
2779 2780                          return (V_TRUE);
2780 2781                  break;
2781 2782          case V_READ:
2782 2783                  if (atomic_add_64_nv((&(vp->v_mmap_read)), 0))
2783 2784                          return (V_TRUE);
2784 2785                  break;
2785 2786          }
2786 2787  #else
2787 2788          switch (mode) {
2788 2789          case V_WRITE:
2789 2790                  if (vp->v_mmap_write)
2790 2791                          return (V_TRUE);
2791 2792                  break;
2792 2793          case V_RDANDWR:
2793 2794                  if (vp->v_mmap_read && vp->v_mmap_write)
2794 2795                          return (V_TRUE);
2795 2796                  break;
2796 2797          case V_RDORWR:
2797 2798                  if (vp->v_mmap_read || vp->v_mmap_write)
2798 2799                          return (V_TRUE);
2799 2800                  break;
2800 2801          case V_READ:
2801 2802                  if (vp->v_mmap_read)
2802 2803                          return (V_TRUE);
2803 2804                  break;
2804 2805          }
2805 2806  #endif
2806 2807  
2807 2808          return (V_FALSE);
2808 2809  }
2809 2810  
2810 2811  /*
2811 2812   * Set the operations vector for a vnode.
2812 2813   *
2813 2814   * FEM ensures that the v_femhead pointer is filled in before the
2814 2815   * v_op pointer is changed.  This means that if the v_femhead pointer
2815 2816   * is NULL, and the v_op field hasn't changed since before which checked
2816 2817   * the v_femhead pointer; then our update is ok - we are not racing with
2817 2818   * FEM.
2818 2819   */
2819 2820  void
2820 2821  vn_setops(vnode_t *vp, vnodeops_t *vnodeops)
2821 2822  {
2822 2823          vnodeops_t      *op;
2823 2824  
2824 2825          ASSERT(vp != NULL);
2825 2826          ASSERT(vnodeops != NULL);
2826 2827  
2827 2828          op = vp->v_op;
2828 2829          membar_consumer();
2829 2830          /*
2830 2831           * If vp->v_femhead == NULL, then we'll call casptr() to do the
2831 2832           * compare-and-swap on vp->v_op.  If either fails, then FEM is
2832 2833           * in effect on the vnode and we need to have FEM deal with it.
2833 2834           */
2834 2835          if (vp->v_femhead != NULL || casptr(&vp->v_op, op, vnodeops) != op) {
2835 2836                  fem_setvnops(vp, vnodeops);
2836 2837          }
2837 2838  }
2838 2839  
2839 2840  /*
2840 2841   * Retrieve the operations vector for a vnode
2841 2842   * As with vn_setops(above); make sure we aren't racing with FEM.
2842 2843   * FEM sets the v_op to a special, internal, vnodeops that wouldn't
2843 2844   * make sense to the callers of this routine.
2844 2845   */
2845 2846  vnodeops_t *
2846 2847  vn_getops(vnode_t *vp)
2847 2848  {
2848 2849          vnodeops_t      *op;
2849 2850  
2850 2851          ASSERT(vp != NULL);
2851 2852  
2852 2853          op = vp->v_op;
2853 2854          membar_consumer();
2854 2855          if (vp->v_femhead == NULL && op == vp->v_op) {
2855 2856                  return (op);
2856 2857          } else {
2857 2858                  return (fem_getvnops(vp));
2858 2859          }
2859 2860  }
2860 2861  
2861 2862  /*
2862 2863   * Returns non-zero (1) if the vnodeops matches that of the vnode.
2863 2864   * Returns zero (0) if not.
2864 2865   */
2865 2866  int
2866 2867  vn_matchops(vnode_t *vp, vnodeops_t *vnodeops)
2867 2868  {
2868 2869          return (vn_getops(vp) == vnodeops);
2869 2870  }
2870 2871  
2871 2872  /*
2872 2873   * Returns non-zero (1) if the specified operation matches the
2873 2874   * corresponding operation for that the vnode.
2874 2875   * Returns zero (0) if not.
2875 2876   */
2876 2877  
2877 2878  #define MATCHNAME(n1, n2) (((n1)[0] == (n2)[0]) && (strcmp((n1), (n2)) == 0))
2878 2879  
2879 2880  int
2880 2881  vn_matchopval(vnode_t *vp, char *vopname, fs_generic_func_p funcp)
2881 2882  {
2882 2883          const fs_operation_trans_def_t *otdp;
2883 2884          fs_generic_func_p *loc = NULL;
2884 2885          vnodeops_t      *vop = vn_getops(vp);
2885 2886  
2886 2887          ASSERT(vopname != NULL);
2887 2888  
2888 2889          for (otdp = vn_ops_table; otdp->name != NULL; otdp++) {
2889 2890                  if (MATCHNAME(otdp->name, vopname)) {
2890 2891                          loc = (fs_generic_func_p *)
2891 2892                              ((char *)(vop) + otdp->offset);
2892 2893                          break;
2893 2894                  }
2894 2895          }
2895 2896  
2896 2897          return ((loc != NULL) && (*loc == funcp));
2897 2898  }
2898 2899  
2899 2900  /*
2900 2901   * fs_new_caller_id() needs to return a unique ID on a given local system.
2901 2902   * The IDs do not need to survive across reboots.  These are primarily
2902 2903   * used so that (FEM) monitors can detect particular callers (such as
2903 2904   * the NFS server) to a given vnode/vfs operation.
2904 2905   */
2905 2906  u_longlong_t
2906 2907  fs_new_caller_id()
2907 2908  {
2908 2909          static uint64_t next_caller_id = 0LL; /* First call returns 1 */
2909 2910  
2910 2911          return ((u_longlong_t)atomic_add_64_nv(&next_caller_id, 1));
2911 2912  }
2912 2913  
2913 2914  /*
2914 2915   * Given a starting vnode and a path, updates the path in the target vnode in
2915 2916   * a safe manner.  If the vnode already has path information embedded, then the
2916 2917   * cached path is left untouched.
2917 2918   */
2918 2919  
2919 2920  size_t max_vnode_path = 4 * MAXPATHLEN;
2920 2921  
2921 2922  void
2922 2923  vn_setpath(vnode_t *rootvp, struct vnode *startvp, struct vnode *vp,
2923 2924      const char *path, size_t plen)
2924 2925  {
2925 2926          char    *rpath;
2926 2927          vnode_t *base;
2927 2928          size_t  rpathlen, rpathalloc;
2928 2929          int     doslash = 1;
2929 2930  
2930 2931          if (*path == '/') {
2931 2932                  base = rootvp;
2932 2933                  path++;
2933 2934                  plen--;
2934 2935          } else {
2935 2936                  base = startvp;
2936 2937          }
2937 2938  
2938 2939          /*
2939 2940           * We cannot grab base->v_lock while we hold vp->v_lock because of
2940 2941           * the potential for deadlock.
2941 2942           */
2942 2943          mutex_enter(&base->v_lock);
2943 2944          if (base->v_path == NULL) {
2944 2945                  mutex_exit(&base->v_lock);
2945 2946                  return;
2946 2947          }
2947 2948  
2948 2949          rpathlen = strlen(base->v_path);
2949 2950          rpathalloc = rpathlen + plen + 1;
2950 2951          /* Avoid adding a slash if there's already one there */
2951 2952          if (base->v_path[rpathlen-1] == '/')
2952 2953                  doslash = 0;
2953 2954          else
2954 2955                  rpathalloc++;
2955 2956  
2956 2957          /*
2957 2958           * We don't want to call kmem_alloc(KM_SLEEP) with kernel locks held,
2958 2959           * so we must do this dance.  If, by chance, something changes the path,
2959 2960           * just give up since there is no real harm.
2960 2961           */
2961 2962          mutex_exit(&base->v_lock);
2962 2963  
2963 2964          /* Paths should stay within reason */
2964 2965          if (rpathalloc > max_vnode_path)
2965 2966                  return;
2966 2967  
2967 2968          rpath = kmem_alloc(rpathalloc, KM_SLEEP);
2968 2969  
2969 2970          mutex_enter(&base->v_lock);
2970 2971          if (base->v_path == NULL || strlen(base->v_path) != rpathlen) {
2971 2972                  mutex_exit(&base->v_lock);
2972 2973                  kmem_free(rpath, rpathalloc);
2973 2974                  return;
2974 2975          }
2975 2976          bcopy(base->v_path, rpath, rpathlen);
2976 2977          mutex_exit(&base->v_lock);
2977 2978  
2978 2979          if (doslash)
2979 2980                  rpath[rpathlen++] = '/';
2980 2981          bcopy(path, rpath + rpathlen, plen);
2981 2982          rpath[rpathlen + plen] = '\0';
2982 2983  
2983 2984          mutex_enter(&vp->v_lock);
2984 2985          if (vp->v_path != NULL) {
2985 2986                  mutex_exit(&vp->v_lock);
2986 2987                  kmem_free(rpath, rpathalloc);
2987 2988          } else {
2988 2989                  vp->v_path = rpath;
2989 2990                  mutex_exit(&vp->v_lock);
2990 2991          }
2991 2992  }
2992 2993  
2993 2994  /*
2994 2995   * Sets the path to the vnode to be the given string, regardless of current
2995 2996   * context.  The string must be a complete path from rootdir.  This is only used
2996 2997   * by fsop_root() for setting the path based on the mountpoint.
2997 2998   */
2998 2999  void
2999 3000  vn_setpath_str(struct vnode *vp, const char *str, size_t len)
3000 3001  {
3001 3002          char *buf = kmem_alloc(len + 1, KM_SLEEP);
3002 3003  
3003 3004          mutex_enter(&vp->v_lock);
3004 3005          if (vp->v_path != NULL) {
3005 3006                  mutex_exit(&vp->v_lock);
3006 3007                  kmem_free(buf, len + 1);
3007 3008                  return;
3008 3009          }
3009 3010  
3010 3011          vp->v_path = buf;
3011 3012          bcopy(str, vp->v_path, len);
3012 3013          vp->v_path[len] = '\0';
3013 3014  
3014 3015          mutex_exit(&vp->v_lock);
3015 3016  }
3016 3017  
3017 3018  /*
3018 3019   * Called from within filesystem's vop_rename() to handle renames once the
3019 3020   * target vnode is available.
3020 3021   */
3021 3022  void
3022 3023  vn_renamepath(vnode_t *dvp, vnode_t *vp, const char *nm, size_t len)
3023 3024  {
3024 3025          char *tmp;
3025 3026  
3026 3027          mutex_enter(&vp->v_lock);
3027 3028          tmp = vp->v_path;
3028 3029          vp->v_path = NULL;
3029 3030          mutex_exit(&vp->v_lock);
3030 3031          vn_setpath(rootdir, dvp, vp, nm, len);
3031 3032          if (tmp != NULL)
3032 3033                  kmem_free(tmp, strlen(tmp) + 1);
3033 3034  }
3034 3035  
3035 3036  /*
3036 3037   * Similar to vn_setpath_str(), this function sets the path of the destination
3037 3038   * vnode to the be the same as the source vnode.
3038 3039   */
3039 3040  void
3040 3041  vn_copypath(struct vnode *src, struct vnode *dst)
3041 3042  {
3042 3043          char *buf;
3043 3044          int alloc;
3044 3045  
3045 3046          mutex_enter(&src->v_lock);
3046 3047          if (src->v_path == NULL) {
3047 3048                  mutex_exit(&src->v_lock);
3048 3049                  return;
3049 3050          }
3050 3051          alloc = strlen(src->v_path) + 1;
3051 3052  
3052 3053          /* avoid kmem_alloc() with lock held */
3053 3054          mutex_exit(&src->v_lock);
3054 3055          buf = kmem_alloc(alloc, KM_SLEEP);
3055 3056          mutex_enter(&src->v_lock);
3056 3057          if (src->v_path == NULL || strlen(src->v_path) + 1 != alloc) {
3057 3058                  mutex_exit(&src->v_lock);
3058 3059                  kmem_free(buf, alloc);
3059 3060                  return;
3060 3061          }
3061 3062          bcopy(src->v_path, buf, alloc);
3062 3063          mutex_exit(&src->v_lock);
3063 3064  
3064 3065          mutex_enter(&dst->v_lock);
3065 3066          if (dst->v_path != NULL) {
3066 3067                  mutex_exit(&dst->v_lock);
3067 3068                  kmem_free(buf, alloc);
3068 3069                  return;
3069 3070          }
3070 3071          dst->v_path = buf;
3071 3072          mutex_exit(&dst->v_lock);
3072 3073  }
3073 3074  
3074 3075  /*
3075 3076   * XXX Private interface for segvn routines that handle vnode
3076 3077   * large page segments.
3077 3078   *
3078 3079   * return 1 if vp's file system VOP_PAGEIO() implementation
3079 3080   * can be safely used instead of VOP_GETPAGE() for handling
3080 3081   * pagefaults against regular non swap files. VOP_PAGEIO()
3081 3082   * interface is considered safe here if its implementation
3082 3083   * is very close to VOP_GETPAGE() implementation.
3083 3084   * e.g. It zero's out the part of the page beyond EOF. Doesn't
3084 3085   * panic if there're file holes but instead returns an error.
3085 3086   * Doesn't assume file won't be changed by user writes, etc.
3086 3087   *
3087 3088   * return 0 otherwise.
3088 3089   *
3089 3090   * For now allow segvn to only use VOP_PAGEIO() with ufs and nfs.
3090 3091   */
3091 3092  int
3092 3093  vn_vmpss_usepageio(vnode_t *vp)
3093 3094  {
3094 3095          vfs_t   *vfsp = vp->v_vfsp;
3095 3096          char *fsname = vfssw[vfsp->vfs_fstype].vsw_name;
3096 3097          char *pageio_ok_fss[] = {"ufs", "nfs", NULL};
3097 3098          char **fsok = pageio_ok_fss;
3098 3099  
3099 3100          if (fsname == NULL) {
3100 3101                  return (0);
3101 3102          }
3102 3103  
3103 3104          for (; *fsok; fsok++) {
3104 3105                  if (strcmp(*fsok, fsname) == 0) {
3105 3106                          return (1);
3106 3107                  }
3107 3108          }
3108 3109          return (0);
3109 3110  }
3110 3111  
3111 3112  /* VOP_XXX() macros call the corresponding fop_xxx() function */
3112 3113  
3113 3114  int
3114 3115  fop_open(
3115 3116          vnode_t **vpp,
3116 3117          int mode,
3117 3118          cred_t *cr,
3118 3119          caller_context_t *ct)
3119 3120  {
3120 3121          int ret;
3121 3122          vnode_t *vp = *vpp;
3122 3123  
3123 3124          VN_HOLD(vp);
3124 3125          /*
3125 3126           * Adding to the vnode counts before calling open
3126 3127           * avoids the need for a mutex. It circumvents a race
3127 3128           * condition where a query made on the vnode counts results in a
3128 3129           * false negative. The inquirer goes away believing the file is
3129 3130           * not open when there is an open on the file already under way.
3130 3131           *
3131 3132           * The counts are meant to prevent NFS from granting a delegation
3132 3133           * when it would be dangerous to do so.
3133 3134           *

↓ open down ↓

3056 lines elided

↑ open up ↑

3134 3135           * The vnode counts are only kept on regular files
3135 3136           */
3136 3137          if ((*vpp)->v_type == VREG) {
3137 3138                  if (mode & FREAD)
3138 3139                          atomic_add_32(&((*vpp)->v_rdcnt), 1);
3139 3140                  if (mode & FWRITE)
3140 3141                          atomic_add_32(&((*vpp)->v_wrcnt), 1);
3141 3142          }
3142 3143  
3143 3144          VOPXID_MAP_CR(vp, cr);
     3145 +        
     3146 +        /*
     3147 +         * Control is passed to fsh. In the end, underlying vop_vopen()
     3148 +         * is called.
     3149 +         */
     3150 +        ret = fsh_open(vpp, mode, cr, ct);
3144 3151  
3145      -        ret = (*(*(vpp))->v_op->vop_open)(vpp, mode, cr, ct);
3146      -
3147 3152          if (ret) {
3148 3153                  /*
3149 3154                   * Use the saved vp just in case the vnode ptr got trashed
3150 3155                   * by the error.
3151 3156                   */
3152 3157                  VOPSTATS_UPDATE(vp, open);
3153 3158                  if ((vp->v_type == VREG) && (mode & FREAD))
3154 3159                          atomic_add_32(&(vp->v_rdcnt), -1);
3155 3160                  if ((vp->v_type == VREG) && (mode & FWRITE))
3156 3161                          atomic_add_32(&(vp->v_wrcnt), -1);

3157 3162          } else {
3158 3163                  /*
3159 3164                   * Some filesystems will return a different vnode,
3160 3165                   * but the same path was still used to open it.
3161 3166                   * So if we do change the vnode and need to
3162 3167                   * copy over the path, do so here, rather than special
3163 3168                   * casing each filesystem. Adjust the vnode counts to
3164 3169                   * reflect the vnode switch.
3165 3170                   */
3166 3171                  VOPSTATS_UPDATE(*vpp, open);
3167 3172                  if (*vpp != vp && *vpp != NULL) {
3168 3173                          vn_copypath(vp, *vpp);
3169 3174                          if (((*vpp)->v_type == VREG) && (mode & FREAD))
3170 3175                                  atomic_add_32(&((*vpp)->v_rdcnt), 1);
3171 3176                          if ((vp->v_type == VREG) && (mode & FREAD))
3172 3177                                  atomic_add_32(&(vp->v_rdcnt), -1);
3173 3178                          if (((*vpp)->v_type == VREG) && (mode & FWRITE))
3174 3179                                  atomic_add_32(&((*vpp)->v_wrcnt), 1);
3175 3180                          if ((vp->v_type == VREG) && (mode & FWRITE))
3176 3181                                  atomic_add_32(&(vp->v_wrcnt), -1);
3177 3182                  }
3178 3183          }
3179 3184          VN_RELE(vp);
3180 3185          return (ret);
3181 3186  }
3182 3187  
3183 3188  int
3184 3189  fop_close(

↓ open down ↓

28 lines elided

↑ open up ↑

3185 3190          vnode_t *vp,
3186 3191          int flag,
3187 3192          int count,
3188 3193          offset_t offset,
3189 3194          cred_t *cr,
3190 3195          caller_context_t *ct)
3191 3196  {
3192 3197          int err;
3193 3198  
3194 3199          VOPXID_MAP_CR(vp, cr);
3195      -
3196      -        err = (*(vp)->v_op->vop_close)(vp, flag, count, offset, cr, ct);
     3200 +        
     3201 +        err = fsh_close(vp, flag, count, offset, cr, ct);
3197 3202          VOPSTATS_UPDATE(vp, close);
3198 3203          /*
3199 3204           * Check passed in count to handle possible dups. Vnode counts are only
3200 3205           * kept on regular files
3201 3206           */
3202 3207          if ((vp->v_type == VREG) && (count == 1))  {
3203 3208                  if (flag & FREAD) {
3204 3209                          ASSERT(vp->v_rdcnt > 0);
3205 3210                          atomic_add_32(&(vp->v_rdcnt), -1);
3206 3211                  }

3207 3212                  if (flag & FWRITE) {
3208 3213                          ASSERT(vp->v_wrcnt > 0);
3209 3214                          atomic_add_32(&(vp->v_wrcnt), -1);
3210 3215                  }
3211 3216          }
3212 3217          return (err);
3213 3218  }
3214 3219  
3215 3220  int
3216 3221  fop_read(

↓ open down ↓

10 lines elided

↑ open up ↑

3217 3222          vnode_t *vp,
3218 3223          uio_t *uiop,
3219 3224          int ioflag,
3220 3225          cred_t *cr,
3221 3226          caller_context_t *ct)
3222 3227  {
3223 3228          int     err;
3224 3229          ssize_t resid_start = uiop->uio_resid;
3225 3230  
3226 3231          VOPXID_MAP_CR(vp, cr);
3227      -
3228      -        err = (*(vp)->v_op->vop_read)(vp, uiop, ioflag, cr, ct);
     3232 +        
     3233 +        err = fsh_read(vp, uiop, ioflag, cr, ct);
3229 3234          VOPSTATS_UPDATE_IO(vp, read,
3230 3235              read_bytes, (resid_start - uiop->uio_resid));
3231 3236          return (err);
3232 3237  }
3233 3238  
3234 3239  int
3235 3240  fop_write(
3236 3241          vnode_t *vp,
3237 3242          uio_t *uiop,
3238 3243          int ioflag,
3239 3244          cred_t *cr,
3240 3245          caller_context_t *ct)
3241 3246  {
3242 3247          int     err;
3243 3248          ssize_t resid_start = uiop->uio_resid;
3244 3249  
3245 3250          VOPXID_MAP_CR(vp, cr);
3246 3251  
3247      -        err = (*(vp)->v_op->vop_write)(vp, uiop, ioflag, cr, ct);
     3252 +        err = fsh_write(vp, uiop, ioflag, cr, ct);
3248 3253          VOPSTATS_UPDATE_IO(vp, write,
3249 3254              write_bytes, (resid_start - uiop->uio_resid));
3250 3255          return (err);
3251 3256  }
3252 3257  
3253 3258  int
3254 3259  fop_ioctl(
3255 3260          vnode_t *vp,
3256 3261          int cmd,
3257 3262          intptr_t arg,

3258 3263          int flag,
3259 3264          cred_t *cr,
3260 3265          int *rvalp,
3261 3266          caller_context_t *ct)
3262 3267  {
3263 3268          int     err;
3264 3269  
3265 3270          VOPXID_MAP_CR(vp, cr);
3266 3271  
3267 3272          err = (*(vp)->v_op->vop_ioctl)(vp, cmd, arg, flag, cr, rvalp, ct);
3268 3273          VOPSTATS_UPDATE(vp, ioctl);
3269 3274          return (err);
3270 3275  }
3271 3276  
3272 3277  int
3273 3278  fop_setfl(
3274 3279          vnode_t *vp,
3275 3280          int oflags,
3276 3281          int nflags,
3277 3282          cred_t *cr,
3278 3283          caller_context_t *ct)
3279 3284  {
3280 3285          int     err;
3281 3286  
3282 3287          VOPXID_MAP_CR(vp, cr);
3283 3288  
3284 3289          err = (*(vp)->v_op->vop_setfl)(vp, oflags, nflags, cr, ct);
3285 3290          VOPSTATS_UPDATE(vp, setfl);
3286 3291          return (err);
3287 3292  }
3288 3293  
3289 3294  int
3290 3295  fop_getattr(
3291 3296          vnode_t *vp,
3292 3297          vattr_t *vap,
3293 3298          int flags,
3294 3299          cred_t *cr,
3295 3300          caller_context_t *ct)
3296 3301  {
3297 3302          int     err;
3298 3303  
3299 3304          VOPXID_MAP_CR(vp, cr);
3300 3305  
3301 3306          /*
3302 3307           * If this file system doesn't understand the xvattr extensions
3303 3308           * then turn off the xvattr bit.
3304 3309           */
3305 3310          if (vfs_has_feature(vp->v_vfsp, VFSFT_XVATTR) == 0) {
3306 3311                  vap->va_mask &= ~AT_XVATTR;
3307 3312          }
3308 3313  
3309 3314          /*
3310 3315           * We're only allowed to skip the ACL check iff we used a 32 bit
3311 3316           * ACE mask with VOP_ACCESS() to determine permissions.
3312 3317           */
3313 3318          if ((flags & ATTR_NOACLCHECK) &&
3314 3319              vfs_has_feature(vp->v_vfsp, VFSFT_ACEMASKONACCESS) == 0) {
3315 3320                  return (EINVAL);
3316 3321          }
3317 3322          err = (*(vp)->v_op->vop_getattr)(vp, vap, flags, cr, ct);
3318 3323          VOPSTATS_UPDATE(vp, getattr);
3319 3324          return (err);
3320 3325  }
3321 3326  
3322 3327  int
3323 3328  fop_setattr(
3324 3329          vnode_t *vp,
3325 3330          vattr_t *vap,
3326 3331          int flags,
3327 3332          cred_t *cr,
3328 3333          caller_context_t *ct)
3329 3334  {
3330 3335          int     err;
3331 3336  
3332 3337          VOPXID_MAP_CR(vp, cr);
3333 3338  
3334 3339          /*
3335 3340           * If this file system doesn't understand the xvattr extensions
3336 3341           * then turn off the xvattr bit.
3337 3342           */
3338 3343          if (vfs_has_feature(vp->v_vfsp, VFSFT_XVATTR) == 0) {
3339 3344                  vap->va_mask &= ~AT_XVATTR;
3340 3345          }
3341 3346  
3342 3347          /*
3343 3348           * We're only allowed to skip the ACL check iff we used a 32 bit
3344 3349           * ACE mask with VOP_ACCESS() to determine permissions.
3345 3350           */
3346 3351          if ((flags & ATTR_NOACLCHECK) &&
3347 3352              vfs_has_feature(vp->v_vfsp, VFSFT_ACEMASKONACCESS) == 0) {
3348 3353                  return (EINVAL);
3349 3354          }
3350 3355          err = (*(vp)->v_op->vop_setattr)(vp, vap, flags, cr, ct);
3351 3356          VOPSTATS_UPDATE(vp, setattr);
3352 3357          return (err);
3353 3358  }
3354 3359  
3355 3360  int
3356 3361  fop_access(
3357 3362          vnode_t *vp,
3358 3363          int mode,
3359 3364          int flags,
3360 3365          cred_t *cr,
3361 3366          caller_context_t *ct)
3362 3367  {
3363 3368          int     err;
3364 3369  
3365 3370          if ((flags & V_ACE_MASK) &&
3366 3371              vfs_has_feature(vp->v_vfsp, VFSFT_ACEMASKONACCESS) == 0) {
3367 3372                  return (EINVAL);
3368 3373          }
3369 3374  
3370 3375          VOPXID_MAP_CR(vp, cr);
3371 3376  
3372 3377          err = (*(vp)->v_op->vop_access)(vp, mode, flags, cr, ct);
3373 3378          VOPSTATS_UPDATE(vp, access);
3374 3379          return (err);
3375 3380  }
3376 3381  
3377 3382  int
3378 3383  fop_lookup(
3379 3384          vnode_t *dvp,
3380 3385          char *nm,
3381 3386          vnode_t **vpp,
3382 3387          pathname_t *pnp,
3383 3388          int flags,
3384 3389          vnode_t *rdir,
3385 3390          cred_t *cr,
3386 3391          caller_context_t *ct,
3387 3392          int *deflags,           /* Returned per-dirent flags */
3388 3393          pathname_t *ppnp)       /* Returned case-preserved name in directory */
3389 3394  {
3390 3395          int ret;
3391 3396  
3392 3397          /*
3393 3398           * If this file system doesn't support case-insensitive access
3394 3399           * and said access is requested, fail quickly.  It is required
3395 3400           * that if the vfs supports case-insensitive lookup, it also
3396 3401           * supports extended dirent flags.
3397 3402           */
3398 3403          if (flags & FIGNORECASE &&
3399 3404              (vfs_has_feature(dvp->v_vfsp, VFSFT_CASEINSENSITIVE) == 0 &&
3400 3405              vfs_has_feature(dvp->v_vfsp, VFSFT_NOCASESENSITIVE) == 0))
3401 3406                  return (EINVAL);
3402 3407  
3403 3408          VOPXID_MAP_CR(dvp, cr);
3404 3409  
3405 3410          if ((flags & LOOKUP_XATTR) && (flags & LOOKUP_HAVE_SYSATTR_DIR) == 0) {
3406 3411                  ret = xattr_dir_lookup(dvp, vpp, flags, cr);
3407 3412          } else {
3408 3413                  ret = (*(dvp)->v_op->vop_lookup)
3409 3414                      (dvp, nm, vpp, pnp, flags, rdir, cr, ct, deflags, ppnp);
3410 3415          }
3411 3416          if (ret == 0 && *vpp) {
3412 3417                  VOPSTATS_UPDATE(*vpp, lookup);
3413 3418                  if ((*vpp)->v_path == NULL) {
3414 3419                          vn_setpath(rootdir, dvp, *vpp, nm, strlen(nm));
3415 3420                  }
3416 3421          }
3417 3422  
3418 3423          return (ret);
3419 3424  }
3420 3425  
3421 3426  int
3422 3427  fop_create(
3423 3428          vnode_t *dvp,
3424 3429          char *name,
3425 3430          vattr_t *vap,
3426 3431          vcexcl_t excl,
3427 3432          int mode,
3428 3433          vnode_t **vpp,
3429 3434          cred_t *cr,
3430 3435          int flags,
3431 3436          caller_context_t *ct,
3432 3437          vsecattr_t *vsecp)      /* ACL to set during create */
3433 3438  {
3434 3439          int ret;
3435 3440  
3436 3441          if (vsecp != NULL &&
3437 3442              vfs_has_feature(dvp->v_vfsp, VFSFT_ACLONCREATE) == 0) {
3438 3443                  return (EINVAL);
3439 3444          }
3440 3445          /*
3441 3446           * If this file system doesn't support case-insensitive access
3442 3447           * and said access is requested, fail quickly.
3443 3448           */
3444 3449          if (flags & FIGNORECASE &&
3445 3450              (vfs_has_feature(dvp->v_vfsp, VFSFT_CASEINSENSITIVE) == 0 &&
3446 3451              vfs_has_feature(dvp->v_vfsp, VFSFT_NOCASESENSITIVE) == 0))
3447 3452                  return (EINVAL);
3448 3453  
3449 3454          VOPXID_MAP_CR(dvp, cr);
3450 3455  
3451 3456          ret = (*(dvp)->v_op->vop_create)
3452 3457              (dvp, name, vap, excl, mode, vpp, cr, flags, ct, vsecp);
3453 3458          if (ret == 0 && *vpp) {
3454 3459                  VOPSTATS_UPDATE(*vpp, create);
3455 3460                  if ((*vpp)->v_path == NULL) {
3456 3461                          vn_setpath(rootdir, dvp, *vpp, name, strlen(name));
3457 3462                  }
3458 3463          }
3459 3464  
3460 3465          return (ret);
3461 3466  }
3462 3467  
3463 3468  int
3464 3469  fop_remove(
3465 3470          vnode_t *dvp,
3466 3471          char *nm,
3467 3472          cred_t *cr,
3468 3473          caller_context_t *ct,
3469 3474          int flags)
3470 3475  {
3471 3476          int     err;
3472 3477  
3473 3478          /*
3474 3479           * If this file system doesn't support case-insensitive access
3475 3480           * and said access is requested, fail quickly.
3476 3481           */
3477 3482          if (flags & FIGNORECASE &&
3478 3483              (vfs_has_feature(dvp->v_vfsp, VFSFT_CASEINSENSITIVE) == 0 &&
3479 3484              vfs_has_feature(dvp->v_vfsp, VFSFT_NOCASESENSITIVE) == 0))
3480 3485                  return (EINVAL);
3481 3486  
3482 3487          VOPXID_MAP_CR(dvp, cr);
3483 3488  
3484 3489          err = (*(dvp)->v_op->vop_remove)(dvp, nm, cr, ct, flags);
3485 3490          VOPSTATS_UPDATE(dvp, remove);
3486 3491          return (err);
3487 3492  }
3488 3493  
3489 3494  int
3490 3495  fop_link(
3491 3496          vnode_t *tdvp,
3492 3497          vnode_t *svp,
3493 3498          char *tnm,
3494 3499          cred_t *cr,
3495 3500          caller_context_t *ct,
3496 3501          int flags)
3497 3502  {
3498 3503          int     err;
3499 3504  
3500 3505          /*
3501 3506           * If the target file system doesn't support case-insensitive access
3502 3507           * and said access is requested, fail quickly.
3503 3508           */
3504 3509          if (flags & FIGNORECASE &&
3505 3510              (vfs_has_feature(tdvp->v_vfsp, VFSFT_CASEINSENSITIVE) == 0 &&
3506 3511              vfs_has_feature(tdvp->v_vfsp, VFSFT_NOCASESENSITIVE) == 0))
3507 3512                  return (EINVAL);
3508 3513  
3509 3514          VOPXID_MAP_CR(tdvp, cr);
3510 3515  
3511 3516          err = (*(tdvp)->v_op->vop_link)(tdvp, svp, tnm, cr, ct, flags);
3512 3517          VOPSTATS_UPDATE(tdvp, link);
3513 3518          return (err);
3514 3519  }
3515 3520  
3516 3521  int
3517 3522  fop_rename(
3518 3523          vnode_t *sdvp,
3519 3524          char *snm,
3520 3525          vnode_t *tdvp,
3521 3526          char *tnm,
3522 3527          cred_t *cr,
3523 3528          caller_context_t *ct,
3524 3529          int flags)
3525 3530  {
3526 3531          int     err;
3527 3532  
3528 3533          /*
3529 3534           * If the file system involved does not support
3530 3535           * case-insensitive access and said access is requested, fail
3531 3536           * quickly.
3532 3537           */
3533 3538          if (flags & FIGNORECASE &&
3534 3539              ((vfs_has_feature(sdvp->v_vfsp, VFSFT_CASEINSENSITIVE) == 0 &&
3535 3540              vfs_has_feature(sdvp->v_vfsp, VFSFT_NOCASESENSITIVE) == 0)))
3536 3541                  return (EINVAL);
3537 3542  
3538 3543          VOPXID_MAP_CR(tdvp, cr);
3539 3544  
3540 3545          err = (*(sdvp)->v_op->vop_rename)(sdvp, snm, tdvp, tnm, cr, ct, flags);
3541 3546          VOPSTATS_UPDATE(sdvp, rename);
3542 3547          return (err);
3543 3548  }
3544 3549  
3545 3550  int
3546 3551  fop_mkdir(
3547 3552          vnode_t *dvp,
3548 3553          char *dirname,
3549 3554          vattr_t *vap,
3550 3555          vnode_t **vpp,
3551 3556          cred_t *cr,
3552 3557          caller_context_t *ct,
3553 3558          int flags,
3554 3559          vsecattr_t *vsecp)      /* ACL to set during create */
3555 3560  {
3556 3561          int ret;
3557 3562  
3558 3563          if (vsecp != NULL &&
3559 3564              vfs_has_feature(dvp->v_vfsp, VFSFT_ACLONCREATE) == 0) {
3560 3565                  return (EINVAL);
3561 3566          }
3562 3567          /*
3563 3568           * If this file system doesn't support case-insensitive access
3564 3569           * and said access is requested, fail quickly.
3565 3570           */
3566 3571          if (flags & FIGNORECASE &&
3567 3572              (vfs_has_feature(dvp->v_vfsp, VFSFT_CASEINSENSITIVE) == 0 &&
3568 3573              vfs_has_feature(dvp->v_vfsp, VFSFT_NOCASESENSITIVE) == 0))
3569 3574                  return (EINVAL);
3570 3575  
3571 3576          VOPXID_MAP_CR(dvp, cr);
3572 3577  
3573 3578          ret = (*(dvp)->v_op->vop_mkdir)
3574 3579              (dvp, dirname, vap, vpp, cr, ct, flags, vsecp);
3575 3580          if (ret == 0 && *vpp) {
3576 3581                  VOPSTATS_UPDATE(*vpp, mkdir);
3577 3582                  if ((*vpp)->v_path == NULL) {
3578 3583                          vn_setpath(rootdir, dvp, *vpp, dirname,
3579 3584                              strlen(dirname));
3580 3585                  }
3581 3586          }
3582 3587  
3583 3588          return (ret);
3584 3589  }
3585 3590  
3586 3591  int
3587 3592  fop_rmdir(
3588 3593          vnode_t *dvp,
3589 3594          char *nm,
3590 3595          vnode_t *cdir,
3591 3596          cred_t *cr,
3592 3597          caller_context_t *ct,
3593 3598          int flags)
3594 3599  {
3595 3600          int     err;
3596 3601  
3597 3602          /*
3598 3603           * If this file system doesn't support case-insensitive access
3599 3604           * and said access is requested, fail quickly.
3600 3605           */
3601 3606          if (flags & FIGNORECASE &&
3602 3607              (vfs_has_feature(dvp->v_vfsp, VFSFT_CASEINSENSITIVE) == 0 &&
3603 3608              vfs_has_feature(dvp->v_vfsp, VFSFT_NOCASESENSITIVE) == 0))
3604 3609                  return (EINVAL);
3605 3610  
3606 3611          VOPXID_MAP_CR(dvp, cr);
3607 3612  
3608 3613          err = (*(dvp)->v_op->vop_rmdir)(dvp, nm, cdir, cr, ct, flags);
3609 3614          VOPSTATS_UPDATE(dvp, rmdir);
3610 3615          return (err);
3611 3616  }
3612 3617  
3613 3618  int
3614 3619  fop_readdir(
3615 3620          vnode_t *vp,
3616 3621          uio_t *uiop,
3617 3622          cred_t *cr,
3618 3623          int *eofp,
3619 3624          caller_context_t *ct,
3620 3625          int flags)
3621 3626  {
3622 3627          int     err;
3623 3628          ssize_t resid_start = uiop->uio_resid;
3624 3629  
3625 3630          /*
3626 3631           * If this file system doesn't support retrieving directory
3627 3632           * entry flags and said access is requested, fail quickly.
3628 3633           */
3629 3634          if (flags & V_RDDIR_ENTFLAGS &&
3630 3635              vfs_has_feature(vp->v_vfsp, VFSFT_DIRENTFLAGS) == 0)
3631 3636                  return (EINVAL);
3632 3637  
3633 3638          VOPXID_MAP_CR(vp, cr);
3634 3639  
3635 3640          err = (*(vp)->v_op->vop_readdir)(vp, uiop, cr, eofp, ct, flags);
3636 3641          VOPSTATS_UPDATE_IO(vp, readdir,
3637 3642              readdir_bytes, (resid_start - uiop->uio_resid));
3638 3643          return (err);
3639 3644  }
3640 3645  
3641 3646  int
3642 3647  fop_symlink(
3643 3648          vnode_t *dvp,
3644 3649          char *linkname,
3645 3650          vattr_t *vap,
3646 3651          char *target,
3647 3652          cred_t *cr,
3648 3653          caller_context_t *ct,
3649 3654          int flags)
3650 3655  {
3651 3656          int     err;
3652 3657          xvattr_t xvattr;
3653 3658  
3654 3659          /*
3655 3660           * If this file system doesn't support case-insensitive access
3656 3661           * and said access is requested, fail quickly.
3657 3662           */
3658 3663          if (flags & FIGNORECASE &&
3659 3664              (vfs_has_feature(dvp->v_vfsp, VFSFT_CASEINSENSITIVE) == 0 &&
3660 3665              vfs_has_feature(dvp->v_vfsp, VFSFT_NOCASESENSITIVE) == 0))
3661 3666                  return (EINVAL);
3662 3667  
3663 3668          VOPXID_MAP_CR(dvp, cr);
3664 3669  
3665 3670          /* check for reparse point */
3666 3671          if ((vfs_has_feature(dvp->v_vfsp, VFSFT_REPARSE)) &&
3667 3672              (strncmp(target, FS_REPARSE_TAG_STR,
3668 3673              strlen(FS_REPARSE_TAG_STR)) == 0)) {
3669 3674                  if (!fs_reparse_mark(target, vap, &xvattr))
3670 3675                          vap = (vattr_t *)&xvattr;
3671 3676          }
3672 3677  
3673 3678          err = (*(dvp)->v_op->vop_symlink)
3674 3679              (dvp, linkname, vap, target, cr, ct, flags);
3675 3680          VOPSTATS_UPDATE(dvp, symlink);
3676 3681          return (err);
3677 3682  }
3678 3683  
3679 3684  int
3680 3685  fop_readlink(
3681 3686          vnode_t *vp,
3682 3687          uio_t *uiop,
3683 3688          cred_t *cr,
3684 3689          caller_context_t *ct)
3685 3690  {
3686 3691          int     err;
3687 3692  
3688 3693          VOPXID_MAP_CR(vp, cr);
3689 3694  
3690 3695          err = (*(vp)->v_op->vop_readlink)(vp, uiop, cr, ct);
3691 3696          VOPSTATS_UPDATE(vp, readlink);
3692 3697          return (err);
3693 3698  }
3694 3699  
3695 3700  int
3696 3701  fop_fsync(
3697 3702          vnode_t *vp,
3698 3703          int syncflag,
3699 3704          cred_t *cr,
3700 3705          caller_context_t *ct)
3701 3706  {
3702 3707          int     err;
3703 3708  
3704 3709          VOPXID_MAP_CR(vp, cr);
3705 3710  
3706 3711          err = (*(vp)->v_op->vop_fsync)(vp, syncflag, cr, ct);
3707 3712          VOPSTATS_UPDATE(vp, fsync);
3708 3713          return (err);
3709 3714  }
3710 3715  
3711 3716  void
3712 3717  fop_inactive(
3713 3718          vnode_t *vp,
3714 3719          cred_t *cr,
3715 3720          caller_context_t *ct)
3716 3721  {
3717 3722          /* Need to update stats before vop call since we may lose the vnode */
3718 3723          VOPSTATS_UPDATE(vp, inactive);
3719 3724  
3720 3725          VOPXID_MAP_CR(vp, cr);
3721 3726  
3722 3727          (*(vp)->v_op->vop_inactive)(vp, cr, ct);
3723 3728  }
3724 3729  
3725 3730  int
3726 3731  fop_fid(
3727 3732          vnode_t *vp,
3728 3733          fid_t *fidp,
3729 3734          caller_context_t *ct)
3730 3735  {
3731 3736          int     err;
3732 3737  
3733 3738          err = (*(vp)->v_op->vop_fid)(vp, fidp, ct);
3734 3739          VOPSTATS_UPDATE(vp, fid);
3735 3740          return (err);
3736 3741  }
3737 3742  
3738 3743  int
3739 3744  fop_rwlock(
3740 3745          vnode_t *vp,
3741 3746          int write_lock,
3742 3747          caller_context_t *ct)
3743 3748  {
3744 3749          int     ret;
3745 3750  
3746 3751          ret = ((*(vp)->v_op->vop_rwlock)(vp, write_lock, ct));
3747 3752          VOPSTATS_UPDATE(vp, rwlock);
3748 3753          return (ret);
3749 3754  }
3750 3755  
3751 3756  void
3752 3757  fop_rwunlock(
3753 3758          vnode_t *vp,
3754 3759          int write_lock,
3755 3760          caller_context_t *ct)
3756 3761  {
3757 3762          (*(vp)->v_op->vop_rwunlock)(vp, write_lock, ct);
3758 3763          VOPSTATS_UPDATE(vp, rwunlock);
3759 3764  }
3760 3765  
3761 3766  int
3762 3767  fop_seek(
3763 3768          vnode_t *vp,
3764 3769          offset_t ooff,
3765 3770          offset_t *noffp,
3766 3771          caller_context_t *ct)
3767 3772  {
3768 3773          int     err;
3769 3774  
3770 3775          err = (*(vp)->v_op->vop_seek)(vp, ooff, noffp, ct);
3771 3776          VOPSTATS_UPDATE(vp, seek);
3772 3777          return (err);
3773 3778  }
3774 3779  
3775 3780  int
3776 3781  fop_cmp(
3777 3782          vnode_t *vp1,
3778 3783          vnode_t *vp2,
3779 3784          caller_context_t *ct)
3780 3785  {
3781 3786          int     err;
3782 3787  
3783 3788          err = (*(vp1)->v_op->vop_cmp)(vp1, vp2, ct);
3784 3789          VOPSTATS_UPDATE(vp1, cmp);
3785 3790          return (err);
3786 3791  }
3787 3792  
3788 3793  int
3789 3794  fop_frlock(
3790 3795          vnode_t *vp,
3791 3796          int cmd,
3792 3797          flock64_t *bfp,
3793 3798          int flag,
3794 3799          offset_t offset,
3795 3800          struct flk_callback *flk_cbp,
3796 3801          cred_t *cr,
3797 3802          caller_context_t *ct)
3798 3803  {
3799 3804          int     err;
3800 3805  
3801 3806          VOPXID_MAP_CR(vp, cr);
3802 3807  
3803 3808          err = (*(vp)->v_op->vop_frlock)
3804 3809              (vp, cmd, bfp, flag, offset, flk_cbp, cr, ct);
3805 3810          VOPSTATS_UPDATE(vp, frlock);
3806 3811          return (err);
3807 3812  }
3808 3813  
3809 3814  int
3810 3815  fop_space(
3811 3816          vnode_t *vp,
3812 3817          int cmd,
3813 3818          flock64_t *bfp,
3814 3819          int flag,
3815 3820          offset_t offset,
3816 3821          cred_t *cr,
3817 3822          caller_context_t *ct)
3818 3823  {
3819 3824          int     err;
3820 3825  
3821 3826          VOPXID_MAP_CR(vp, cr);
3822 3827  
3823 3828          err = (*(vp)->v_op->vop_space)(vp, cmd, bfp, flag, offset, cr, ct);
3824 3829          VOPSTATS_UPDATE(vp, space);
3825 3830          return (err);
3826 3831  }
3827 3832  
3828 3833  int
3829 3834  fop_realvp(
3830 3835          vnode_t *vp,
3831 3836          vnode_t **vpp,
3832 3837          caller_context_t *ct)
3833 3838  {
3834 3839          int     err;
3835 3840  
3836 3841          err = (*(vp)->v_op->vop_realvp)(vp, vpp, ct);
3837 3842          VOPSTATS_UPDATE(vp, realvp);
3838 3843          return (err);
3839 3844  }
3840 3845  
3841 3846  int
3842 3847  fop_getpage(
3843 3848          vnode_t *vp,
3844 3849          offset_t off,
3845 3850          size_t len,
3846 3851          uint_t *protp,
3847 3852          page_t **plarr,
3848 3853          size_t plsz,
3849 3854          struct seg *seg,
3850 3855          caddr_t addr,
3851 3856          enum seg_rw rw,
3852 3857          cred_t *cr,
3853 3858          caller_context_t *ct)
3854 3859  {
3855 3860          int     err;
3856 3861  
3857 3862          VOPXID_MAP_CR(vp, cr);
3858 3863  
3859 3864          err = (*(vp)->v_op->vop_getpage)
3860 3865              (vp, off, len, protp, plarr, plsz, seg, addr, rw, cr, ct);
3861 3866          VOPSTATS_UPDATE(vp, getpage);
3862 3867          return (err);
3863 3868  }
3864 3869  
3865 3870  int
3866 3871  fop_putpage(
3867 3872          vnode_t *vp,
3868 3873          offset_t off,
3869 3874          size_t len,
3870 3875          int flags,
3871 3876          cred_t *cr,
3872 3877          caller_context_t *ct)
3873 3878  {
3874 3879          int     err;
3875 3880  
3876 3881          VOPXID_MAP_CR(vp, cr);
3877 3882  
3878 3883          err = (*(vp)->v_op->vop_putpage)(vp, off, len, flags, cr, ct);
3879 3884          VOPSTATS_UPDATE(vp, putpage);
3880 3885          return (err);
3881 3886  }
3882 3887  
3883 3888  int
3884 3889  fop_map(
3885 3890          vnode_t *vp,
3886 3891          offset_t off,
3887 3892          struct as *as,
3888 3893          caddr_t *addrp,
3889 3894          size_t len,
3890 3895          uchar_t prot,
3891 3896          uchar_t maxprot,
3892 3897          uint_t flags,
3893 3898          cred_t *cr,
3894 3899          caller_context_t *ct)
3895 3900  {
3896 3901          int     err;
3897 3902  
3898 3903          VOPXID_MAP_CR(vp, cr);
3899 3904  
3900 3905          err = (*(vp)->v_op->vop_map)
3901 3906              (vp, off, as, addrp, len, prot, maxprot, flags, cr, ct);
3902 3907          VOPSTATS_UPDATE(vp, map);
3903 3908          return (err);
3904 3909  }
3905 3910  
3906 3911  int
3907 3912  fop_addmap(
3908 3913          vnode_t *vp,
3909 3914          offset_t off,
3910 3915          struct as *as,
3911 3916          caddr_t addr,
3912 3917          size_t len,
3913 3918          uchar_t prot,
3914 3919          uchar_t maxprot,
3915 3920          uint_t flags,
3916 3921          cred_t *cr,
3917 3922          caller_context_t *ct)
3918 3923  {
3919 3924          int error;
3920 3925          u_longlong_t delta;
3921 3926  
3922 3927          VOPXID_MAP_CR(vp, cr);
3923 3928  
3924 3929          error = (*(vp)->v_op->vop_addmap)
3925 3930              (vp, off, as, addr, len, prot, maxprot, flags, cr, ct);
3926 3931  
3927 3932          if ((!error) && (vp->v_type == VREG)) {
3928 3933                  delta = (u_longlong_t)btopr(len);
3929 3934                  /*
3930 3935                   * If file is declared MAP_PRIVATE, it can't be written back
3931 3936                   * even if open for write. Handle as read.
3932 3937                   */
3933 3938                  if (flags & MAP_PRIVATE) {
3934 3939                          atomic_add_64((uint64_t *)(&(vp->v_mmap_read)),
3935 3940                              (int64_t)delta);
3936 3941                  } else {
3937 3942                          /*
3938 3943                           * atomic_add_64 forces the fetch of a 64 bit value to
3939 3944                           * be atomic on 32 bit machines
3940 3945                           */
3941 3946                          if (maxprot & PROT_WRITE)
3942 3947                                  atomic_add_64((uint64_t *)(&(vp->v_mmap_write)),
3943 3948                                      (int64_t)delta);
3944 3949                          if (maxprot & PROT_READ)
3945 3950                                  atomic_add_64((uint64_t *)(&(vp->v_mmap_read)),
3946 3951                                      (int64_t)delta);
3947 3952                          if (maxprot & PROT_EXEC)
3948 3953                                  atomic_add_64((uint64_t *)(&(vp->v_mmap_read)),
3949 3954                                      (int64_t)delta);
3950 3955                  }
3951 3956          }
3952 3957          VOPSTATS_UPDATE(vp, addmap);
3953 3958          return (error);
3954 3959  }
3955 3960  
3956 3961  int
3957 3962  fop_delmap(
3958 3963          vnode_t *vp,
3959 3964          offset_t off,
3960 3965          struct as *as,
3961 3966          caddr_t addr,
3962 3967          size_t len,
3963 3968          uint_t prot,
3964 3969          uint_t maxprot,
3965 3970          uint_t flags,
3966 3971          cred_t *cr,
3967 3972          caller_context_t *ct)
3968 3973  {
3969 3974          int error;
3970 3975          u_longlong_t delta;
3971 3976  
3972 3977          VOPXID_MAP_CR(vp, cr);
3973 3978  
3974 3979          error = (*(vp)->v_op->vop_delmap)
3975 3980              (vp, off, as, addr, len, prot, maxprot, flags, cr, ct);
3976 3981  
3977 3982          /*
3978 3983           * NFS calls into delmap twice, the first time
3979 3984           * it simply establishes a callback mechanism and returns EAGAIN
3980 3985           * while the real work is being done upon the second invocation.
3981 3986           * We have to detect this here and only decrement the counts upon
3982 3987           * the second delmap request.
3983 3988           */
3984 3989          if ((error != EAGAIN) && (vp->v_type == VREG)) {
3985 3990  
3986 3991                  delta = (u_longlong_t)btopr(len);
3987 3992  
3988 3993                  if (flags & MAP_PRIVATE) {
3989 3994                          atomic_add_64((uint64_t *)(&(vp->v_mmap_read)),
3990 3995                              (int64_t)(-delta));
3991 3996                  } else {
3992 3997                          /*
3993 3998                           * atomic_add_64 forces the fetch of a 64 bit value
3994 3999                           * to be atomic on 32 bit machines
3995 4000                           */
3996 4001                          if (maxprot & PROT_WRITE)
3997 4002                                  atomic_add_64((uint64_t *)(&(vp->v_mmap_write)),
3998 4003                                      (int64_t)(-delta));
3999 4004                          if (maxprot & PROT_READ)
4000 4005                                  atomic_add_64((uint64_t *)(&(vp->v_mmap_read)),
4001 4006                                      (int64_t)(-delta));
4002 4007                          if (maxprot & PROT_EXEC)
4003 4008                                  atomic_add_64((uint64_t *)(&(vp->v_mmap_read)),
4004 4009                                      (int64_t)(-delta));
4005 4010                  }
4006 4011          }
4007 4012          VOPSTATS_UPDATE(vp, delmap);
4008 4013          return (error);
4009 4014  }
4010 4015  
4011 4016  
4012 4017  int
4013 4018  fop_poll(
4014 4019          vnode_t *vp,
4015 4020          short events,
4016 4021          int anyyet,
4017 4022          short *reventsp,
4018 4023          struct pollhead **phpp,
4019 4024          caller_context_t *ct)
4020 4025  {
4021 4026          int     err;
4022 4027  
4023 4028          err = (*(vp)->v_op->vop_poll)(vp, events, anyyet, reventsp, phpp, ct);
4024 4029          VOPSTATS_UPDATE(vp, poll);
4025 4030          return (err);
4026 4031  }
4027 4032  
4028 4033  int
4029 4034  fop_dump(
4030 4035          vnode_t *vp,
4031 4036          caddr_t addr,
4032 4037          offset_t lbdn,
4033 4038          offset_t dblks,
4034 4039          caller_context_t *ct)
4035 4040  {
4036 4041          int     err;
4037 4042  
4038 4043          /* ensure lbdn and dblks can be passed safely to bdev_dump */
4039 4044          if ((lbdn != (daddr_t)lbdn) || (dblks != (int)dblks))
4040 4045                  return (EIO);
4041 4046  
4042 4047          err = (*(vp)->v_op->vop_dump)(vp, addr, lbdn, dblks, ct);
4043 4048          VOPSTATS_UPDATE(vp, dump);
4044 4049          return (err);
4045 4050  }
4046 4051  
4047 4052  int
4048 4053  fop_pathconf(
4049 4054          vnode_t *vp,
4050 4055          int cmd,
4051 4056          ulong_t *valp,
4052 4057          cred_t *cr,
4053 4058          caller_context_t *ct)
4054 4059  {
4055 4060          int     err;
4056 4061  
4057 4062          VOPXID_MAP_CR(vp, cr);
4058 4063  
4059 4064          err = (*(vp)->v_op->vop_pathconf)(vp, cmd, valp, cr, ct);
4060 4065          VOPSTATS_UPDATE(vp, pathconf);
4061 4066          return (err);
4062 4067  }
4063 4068  
4064 4069  int
4065 4070  fop_pageio(
4066 4071          vnode_t *vp,
4067 4072          struct page *pp,
4068 4073          u_offset_t io_off,
4069 4074          size_t io_len,
4070 4075          int flags,
4071 4076          cred_t *cr,
4072 4077          caller_context_t *ct)
4073 4078  {
4074 4079          int     err;
4075 4080  
4076 4081          VOPXID_MAP_CR(vp, cr);
4077 4082  
4078 4083          err = (*(vp)->v_op->vop_pageio)(vp, pp, io_off, io_len, flags, cr, ct);
4079 4084          VOPSTATS_UPDATE(vp, pageio);
4080 4085          return (err);
4081 4086  }
4082 4087  
4083 4088  int
4084 4089  fop_dumpctl(
4085 4090          vnode_t *vp,
4086 4091          int action,
4087 4092          offset_t *blkp,
4088 4093          caller_context_t *ct)
4089 4094  {
4090 4095          int     err;
4091 4096          err = (*(vp)->v_op->vop_dumpctl)(vp, action, blkp, ct);
4092 4097          VOPSTATS_UPDATE(vp, dumpctl);
4093 4098          return (err);
4094 4099  }
4095 4100  
4096 4101  void
4097 4102  fop_dispose(
4098 4103          vnode_t *vp,
4099 4104          page_t *pp,
4100 4105          int flag,
4101 4106          int dn,
4102 4107          cred_t *cr,
4103 4108          caller_context_t *ct)
4104 4109  {
4105 4110          /* Must do stats first since it's possible to lose the vnode */
4106 4111          VOPSTATS_UPDATE(vp, dispose);
4107 4112  
4108 4113          VOPXID_MAP_CR(vp, cr);
4109 4114  
4110 4115          (*(vp)->v_op->vop_dispose)(vp, pp, flag, dn, cr, ct);
4111 4116  }
4112 4117  
4113 4118  int
4114 4119  fop_setsecattr(
4115 4120          vnode_t *vp,
4116 4121          vsecattr_t *vsap,
4117 4122          int flag,
4118 4123          cred_t *cr,
4119 4124          caller_context_t *ct)
4120 4125  {
4121 4126          int     err;
4122 4127  
4123 4128          VOPXID_MAP_CR(vp, cr);
4124 4129  
4125 4130          /*
4126 4131           * We're only allowed to skip the ACL check iff we used a 32 bit
4127 4132           * ACE mask with VOP_ACCESS() to determine permissions.
4128 4133           */
4129 4134          if ((flag & ATTR_NOACLCHECK) &&
4130 4135              vfs_has_feature(vp->v_vfsp, VFSFT_ACEMASKONACCESS) == 0) {
4131 4136                  return (EINVAL);
4132 4137          }
4133 4138          err = (*(vp)->v_op->vop_setsecattr) (vp, vsap, flag, cr, ct);
4134 4139          VOPSTATS_UPDATE(vp, setsecattr);
4135 4140          return (err);
4136 4141  }
4137 4142  
4138 4143  int
4139 4144  fop_getsecattr(
4140 4145          vnode_t *vp,
4141 4146          vsecattr_t *vsap,
4142 4147          int flag,
4143 4148          cred_t *cr,
4144 4149          caller_context_t *ct)
4145 4150  {
4146 4151          int     err;
4147 4152  
4148 4153          /*
4149 4154           * We're only allowed to skip the ACL check iff we used a 32 bit
4150 4155           * ACE mask with VOP_ACCESS() to determine permissions.
4151 4156           */
4152 4157          if ((flag & ATTR_NOACLCHECK) &&
4153 4158              vfs_has_feature(vp->v_vfsp, VFSFT_ACEMASKONACCESS) == 0) {
4154 4159                  return (EINVAL);
4155 4160          }
4156 4161  
4157 4162          VOPXID_MAP_CR(vp, cr);
4158 4163  
4159 4164          err = (*(vp)->v_op->vop_getsecattr) (vp, vsap, flag, cr, ct);
4160 4165          VOPSTATS_UPDATE(vp, getsecattr);
4161 4166          return (err);
4162 4167  }
4163 4168  
4164 4169  int
4165 4170  fop_shrlock(
4166 4171          vnode_t *vp,
4167 4172          int cmd,
4168 4173          struct shrlock *shr,
4169 4174          int flag,
4170 4175          cred_t *cr,
4171 4176          caller_context_t *ct)
4172 4177  {
4173 4178          int     err;
4174 4179  
4175 4180          VOPXID_MAP_CR(vp, cr);
4176 4181  
4177 4182          err = (*(vp)->v_op->vop_shrlock)(vp, cmd, shr, flag, cr, ct);
4178 4183          VOPSTATS_UPDATE(vp, shrlock);
4179 4184          return (err);
4180 4185  }
4181 4186  
4182 4187  int
4183 4188  fop_vnevent(vnode_t *vp, vnevent_t vnevent, vnode_t *dvp, char *fnm,
4184 4189      caller_context_t *ct)
4185 4190  {
4186 4191          int     err;
4187 4192  
4188 4193          err = (*(vp)->v_op->vop_vnevent)(vp, vnevent, dvp, fnm, ct);
4189 4194          VOPSTATS_UPDATE(vp, vnevent);
4190 4195          return (err);
4191 4196  }
4192 4197  
4193 4198  int
4194 4199  fop_reqzcbuf(vnode_t *vp, enum uio_rw ioflag, xuio_t *uiop, cred_t *cr,
4195 4200      caller_context_t *ct)
4196 4201  {
4197 4202          int err;
4198 4203  
4199 4204          if (vfs_has_feature(vp->v_vfsp, VFSFT_ZEROCOPY_SUPPORTED) == 0)
4200 4205                  return (ENOTSUP);
4201 4206          err = (*(vp)->v_op->vop_reqzcbuf)(vp, ioflag, uiop, cr, ct);
4202 4207          VOPSTATS_UPDATE(vp, reqzcbuf);
4203 4208          return (err);
4204 4209  }
4205 4210  
4206 4211  int
4207 4212  fop_retzcbuf(vnode_t *vp, xuio_t *uiop, cred_t *cr, caller_context_t *ct)
4208 4213  {
4209 4214          int err;
4210 4215  
4211 4216          if (vfs_has_feature(vp->v_vfsp, VFSFT_ZEROCOPY_SUPPORTED) == 0)
4212 4217                  return (ENOTSUP);
4213 4218          err = (*(vp)->v_op->vop_retzcbuf)(vp, uiop, cr, ct);
4214 4219          VOPSTATS_UPDATE(vp, retzcbuf);
4215 4220          return (err);
4216 4221  }
4217 4222  
4218 4223  /*
4219 4224   * Default destructor
4220 4225   *      Needed because NULL destructor means that the key is unused
4221 4226   */
4222 4227  /* ARGSUSED */
4223 4228  void
4224 4229  vsd_defaultdestructor(void *value)
4225 4230  {}
4226 4231  
4227 4232  /*
4228 4233   * Create a key (index into per vnode array)
4229 4234   *      Locks out vsd_create, vsd_destroy, and vsd_free
4230 4235   *      May allocate memory with lock held
4231 4236   */
4232 4237  void
4233 4238  vsd_create(uint_t *keyp, void (*destructor)(void *))
4234 4239  {
4235 4240          int     i;
4236 4241          uint_t  nkeys;
4237 4242  
4238 4243          /*
4239 4244           * if key is allocated, do nothing
4240 4245           */
4241 4246          mutex_enter(&vsd_lock);
4242 4247          if (*keyp) {
4243 4248                  mutex_exit(&vsd_lock);
4244 4249                  return;
4245 4250          }
4246 4251          /*
4247 4252           * find an unused key
4248 4253           */
4249 4254          if (destructor == NULL)
4250 4255                  destructor = vsd_defaultdestructor;
4251 4256  
4252 4257          for (i = 0; i < vsd_nkeys; ++i)
4253 4258                  if (vsd_destructor[i] == NULL)
4254 4259                          break;
4255 4260  
4256 4261          /*
4257 4262           * if no unused keys, increase the size of the destructor array
4258 4263           */
4259 4264          if (i == vsd_nkeys) {
4260 4265                  if ((nkeys = (vsd_nkeys << 1)) == 0)
4261 4266                          nkeys = 1;
4262 4267                  vsd_destructor =
4263 4268                      (void (**)(void *))vsd_realloc((void *)vsd_destructor,
4264 4269                      (size_t)(vsd_nkeys * sizeof (void (*)(void *))),
4265 4270                      (size_t)(nkeys * sizeof (void (*)(void *))));
4266 4271                  vsd_nkeys = nkeys;
4267 4272          }
4268 4273  
4269 4274          /*
4270 4275           * allocate the next available unused key
4271 4276           */
4272 4277          vsd_destructor[i] = destructor;
4273 4278          *keyp = i + 1;
4274 4279  
4275 4280          /* create vsd_list, if it doesn't exist */
4276 4281          if (vsd_list == NULL) {
4277 4282                  vsd_list = kmem_alloc(sizeof (list_t), KM_SLEEP);
4278 4283                  list_create(vsd_list, sizeof (struct vsd_node),
4279 4284                      offsetof(struct vsd_node, vs_nodes));
4280 4285          }
4281 4286  
4282 4287          mutex_exit(&vsd_lock);
4283 4288  }
4284 4289  
4285 4290  /*
4286 4291   * Destroy a key
4287 4292   *
4288 4293   * Assumes that the caller is preventing vsd_set and vsd_get
4289 4294   * Locks out vsd_create, vsd_destroy, and vsd_free
4290 4295   * May free memory with lock held
4291 4296   */
4292 4297  void
4293 4298  vsd_destroy(uint_t *keyp)
4294 4299  {
4295 4300          uint_t key;
4296 4301          struct vsd_node *vsd;
4297 4302  
4298 4303          /*
4299 4304           * protect the key namespace and our destructor lists
4300 4305           */
4301 4306          mutex_enter(&vsd_lock);
4302 4307          key = *keyp;
4303 4308          *keyp = 0;
4304 4309  
4305 4310          ASSERT(key <= vsd_nkeys);
4306 4311  
4307 4312          /*
4308 4313           * if the key is valid
4309 4314           */
4310 4315          if (key != 0) {
4311 4316                  uint_t k = key - 1;
4312 4317                  /*
4313 4318                   * for every vnode with VSD, call key's destructor
4314 4319                   */
4315 4320                  for (vsd = list_head(vsd_list); vsd != NULL;
4316 4321                      vsd = list_next(vsd_list, vsd)) {
4317 4322                          /*
4318 4323                           * no VSD for key in this vnode
4319 4324                           */
4320 4325                          if (key > vsd->vs_nkeys)
4321 4326                                  continue;
4322 4327                          /*
4323 4328                           * call destructor for key
4324 4329                           */
4325 4330                          if (vsd->vs_value[k] && vsd_destructor[k])
4326 4331                                  (*vsd_destructor[k])(vsd->vs_value[k]);
4327 4332                          /*
4328 4333                           * reset value for key
4329 4334                           */
4330 4335                          vsd->vs_value[k] = NULL;
4331 4336                  }
4332 4337                  /*
4333 4338                   * actually free the key (NULL destructor == unused)
4334 4339                   */
4335 4340                  vsd_destructor[k] = NULL;
4336 4341          }
4337 4342  
4338 4343          mutex_exit(&vsd_lock);
4339 4344  }
4340 4345  
4341 4346  /*
4342 4347   * Quickly return the per vnode value that was stored with the specified key
4343 4348   * Assumes the caller is protecting key from vsd_create and vsd_destroy
4344 4349   * Assumes the caller is holding v_vsd_lock to protect the vsd.
4345 4350   */
4346 4351  void *
4347 4352  vsd_get(vnode_t *vp, uint_t key)
4348 4353  {
4349 4354          struct vsd_node *vsd;
4350 4355  
4351 4356          ASSERT(vp != NULL);
4352 4357          ASSERT(mutex_owned(&vp->v_vsd_lock));
4353 4358  
4354 4359          vsd = vp->v_vsd;
4355 4360  
4356 4361          if (key && vsd != NULL && key <= vsd->vs_nkeys)
4357 4362                  return (vsd->vs_value[key - 1]);
4358 4363          return (NULL);
4359 4364  }
4360 4365  
4361 4366  /*
4362 4367   * Set a per vnode value indexed with the specified key
4363 4368   * Assumes the caller is holding v_vsd_lock to protect the vsd.
4364 4369   */
4365 4370  int
4366 4371  vsd_set(vnode_t *vp, uint_t key, void *value)
4367 4372  {
4368 4373          struct vsd_node *vsd;
4369 4374  
4370 4375          ASSERT(vp != NULL);
4371 4376          ASSERT(mutex_owned(&vp->v_vsd_lock));
4372 4377  
4373 4378          if (key == 0)
4374 4379                  return (EINVAL);
4375 4380  
4376 4381          vsd = vp->v_vsd;
4377 4382          if (vsd == NULL)
4378 4383                  vsd = vp->v_vsd = kmem_zalloc(sizeof (*vsd), KM_SLEEP);
4379 4384  
4380 4385          /*
4381 4386           * If the vsd was just allocated, vs_nkeys will be 0, so the following
4382 4387           * code won't happen and we will continue down and allocate space for
4383 4388           * the vs_value array.
4384 4389           * If the caller is replacing one value with another, then it is up
4385 4390           * to the caller to free/rele/destroy the previous value (if needed).
4386 4391           */
4387 4392          if (key <= vsd->vs_nkeys) {
4388 4393                  vsd->vs_value[key - 1] = value;
4389 4394                  return (0);
4390 4395          }
4391 4396  
4392 4397          ASSERT(key <= vsd_nkeys);
4393 4398  
4394 4399          if (vsd->vs_nkeys == 0) {
4395 4400                  mutex_enter(&vsd_lock); /* lock out vsd_destroy() */
4396 4401                  /*
4397 4402                   * Link onto list of all VSD nodes.
4398 4403                   */
4399 4404                  list_insert_head(vsd_list, vsd);
4400 4405                  mutex_exit(&vsd_lock);
4401 4406          }
4402 4407  
4403 4408          /*
4404 4409           * Allocate vnode local storage and set the value for key
4405 4410           */
4406 4411          vsd->vs_value = vsd_realloc(vsd->vs_value,
4407 4412              vsd->vs_nkeys * sizeof (void *),
4408 4413              key * sizeof (void *));
4409 4414          vsd->vs_nkeys = key;
4410 4415          vsd->vs_value[key - 1] = value;
4411 4416  
4412 4417          return (0);
4413 4418  }
4414 4419  
4415 4420  /*
4416 4421   * Called from vn_free() to run the destructor function for each vsd
4417 4422   *      Locks out vsd_create and vsd_destroy
4418 4423   *      Assumes that the destructor *DOES NOT* use vsd
4419 4424   */
4420 4425  void
4421 4426  vsd_free(vnode_t *vp)
4422 4427  {
4423 4428          int i;
4424 4429          struct vsd_node *vsd = vp->v_vsd;
4425 4430  
4426 4431          if (vsd == NULL)
4427 4432                  return;
4428 4433  
4429 4434          if (vsd->vs_nkeys == 0) {
4430 4435                  kmem_free(vsd, sizeof (*vsd));
4431 4436                  vp->v_vsd = NULL;
4432 4437                  return;
4433 4438          }
4434 4439  
4435 4440          /*
4436 4441           * lock out vsd_create and vsd_destroy, call
4437 4442           * the destructor, and mark the value as destroyed.
4438 4443           */
4439 4444          mutex_enter(&vsd_lock);
4440 4445  
4441 4446          for (i = 0; i < vsd->vs_nkeys; i++) {
4442 4447                  if (vsd->vs_value[i] && vsd_destructor[i])
4443 4448                          (*vsd_destructor[i])(vsd->vs_value[i]);
4444 4449                  vsd->vs_value[i] = NULL;
4445 4450          }
4446 4451  
4447 4452          /*
4448 4453           * remove from linked list of VSD nodes
4449 4454           */
4450 4455          list_remove(vsd_list, vsd);
4451 4456  
4452 4457          mutex_exit(&vsd_lock);
4453 4458  
4454 4459          /*
4455 4460           * free up the VSD
4456 4461           */
4457 4462          kmem_free(vsd->vs_value, vsd->vs_nkeys * sizeof (void *));
4458 4463          kmem_free(vsd, sizeof (struct vsd_node));
4459 4464          vp->v_vsd = NULL;
4460 4465  }
4461 4466  
4462 4467  /*
4463 4468   * realloc
4464 4469   */
4465 4470  static void *
4466 4471  vsd_realloc(void *old, size_t osize, size_t nsize)
4467 4472  {
4468 4473          void *new;
4469 4474  
4470 4475          new = kmem_zalloc(nsize, KM_SLEEP);
4471 4476          if (old) {
4472 4477                  bcopy(old, new, osize);
4473 4478                  kmem_free(old, osize);
4474 4479          }
4475 4480          return (new);
4476 4481  }
4477 4482  
4478 4483  /*
4479 4484   * Setup the extensible system attribute for creating a reparse point.
4480 4485   * The symlink data 'target' is validated for proper format of a reparse
4481 4486   * string and a check also made to make sure the symlink data does not
4482 4487   * point to an existing file.
4483 4488   *
4484 4489   * return 0 if ok else -1.
4485 4490   */
4486 4491  static int
4487 4492  fs_reparse_mark(char *target, vattr_t *vap, xvattr_t *xvattr)
4488 4493  {
4489 4494          xoptattr_t *xoap;
4490 4495  
4491 4496          if ((!target) || (!vap) || (!xvattr))
4492 4497                  return (-1);
4493 4498  
4494 4499          /* validate reparse string */
4495 4500          if (reparse_validate((const char *)target))
4496 4501                  return (-1);
4497 4502  
4498 4503          xva_init(xvattr);
4499 4504          xvattr->xva_vattr = *vap;
4500 4505          xvattr->xva_vattr.va_mask |= AT_XVATTR;
4501 4506          xoap = xva_getxoptattr(xvattr);
4502 4507          ASSERT(xoap);
4503 4508          XVA_SET_REQ(xvattr, XAT_REPARSE);
4504 4509          xoap->xoa_reparse = 1;
4505 4510  
4506 4511          return (0);
4507 4512  }
4508 4513  
4509 4514  /*
4510 4515   * Function to check whether a symlink is a reparse point.
4511 4516   * Return B_TRUE if it is a reparse point, else return B_FALSE
4512 4517   */
4513 4518  boolean_t
4514 4519  vn_is_reparse(vnode_t *vp, cred_t *cr, caller_context_t *ct)
4515 4520  {
4516 4521          xvattr_t xvattr;
4517 4522          xoptattr_t *xoap;
4518 4523  
4519 4524          if ((vp->v_type != VLNK) ||
4520 4525              !(vfs_has_feature(vp->v_vfsp, VFSFT_XVATTR)))
4521 4526                  return (B_FALSE);
4522 4527  
4523 4528          xva_init(&xvattr);
4524 4529          xoap = xva_getxoptattr(&xvattr);
4525 4530          ASSERT(xoap);
4526 4531          XVA_SET_REQ(&xvattr, XAT_REPARSE);
4527 4532  
4528 4533          if (VOP_GETATTR(vp, &xvattr.xva_vattr, 0, cr, ct))
4529 4534                  return (B_FALSE);
4530 4535  
4531 4536          if ((!(xvattr.xva_vattr.va_mask & AT_XVATTR)) ||
4532 4537              (!(XVA_ISSET_RTN(&xvattr, XAT_REPARSE))))
4533 4538                  return (B_FALSE);
4534 4539  
4535 4540          return (xoap->xoa_reparse ? B_TRUE : B_FALSE);
4536 4541  }

↓ open down ↓

1279 lines elided

↑ open up ↑

XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX