illumos-gate Wdiff usr/src/uts/common/fs/vnode.c

Print this page

cstyle sort of updates
7127  remove -Wno-missing-braces from Makefile.uts

Split	Close
Expand all
Collapse all

          --- old/usr/src/uts/common/fs/vnode.c
          +++ new/usr/src/uts/common/fs/vnode.c

   1    1  /*
   2    2   * CDDL HEADER START
   3    3   *
   4    4   * The contents of this file are subject to the terms of the
   5    5   * Common Development and Distribution License (the "License").
   6    6   * You may not use this file except in compliance with the License.
   7    7   *
   8    8   * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9    9   * or http://www.opensolaris.org/os/licensing.
  10   10   * See the License for the specific language governing permissions
  11   11   * and limitations under the License.
  12   12   *
  13   13   * When distributing Covered Code, include this CDDL HEADER in each
  14   14   * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15   15   * If applicable, add the following below this CDDL HEADER, with the
  16   16   * fields enclosed by brackets "[]" replaced with your own identifying
  17   17   * information: Portions Copyright [yyyy] [name of copyright owner]
  18   18   *
  19   19   * CDDL HEADER END
  20   20   */
  21   21  
  22   22  /*
  23   23   * Copyright (c) 1988, 2010, Oracle and/or its affiliates. All rights reserved.
  24   24   * Copyright (c) 2013, Joyent, Inc. All rights reserved.
  25   25   */
  26   26  
  27   27  /*      Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T     */
  28   28  /*        All Rights Reserved   */
  29   29  
  30   30  /*
  31   31   * University Copyright- Copyright (c) 1982, 1986, 1988
  32   32   * The Regents of the University of California
  33   33   * All Rights Reserved
  34   34   *
  35   35   * University Acknowledgment- Portions of this document are derived from
  36   36   * software developed by the University of California, Berkeley, and its
  37   37   * contributors.
  38   38   */
  39   39  
  40   40  #include <sys/types.h>
  41   41  #include <sys/param.h>
  42   42  #include <sys/t_lock.h>
  43   43  #include <sys/errno.h>
  44   44  #include <sys/cred.h>
  45   45  #include <sys/user.h>
  46   46  #include <sys/uio.h>
  47   47  #include <sys/file.h>
  48   48  #include <sys/pathname.h>
  49   49  #include <sys/vfs.h>
  50   50  #include <sys/vfs_opreg.h>
  51   51  #include <sys/vnode.h>
  52   52  #include <sys/rwstlock.h>
  53   53  #include <sys/fem.h>
  54   54  #include <sys/stat.h>
  55   55  #include <sys/mode.h>
  56   56  #include <sys/conf.h>
  57   57  #include <sys/sysmacros.h>
  58   58  #include <sys/cmn_err.h>
  59   59  #include <sys/systm.h>
  60   60  #include <sys/kmem.h>
  61   61  #include <sys/debug.h>
  62   62  #include <c2/audit.h>
  63   63  #include <sys/acl.h>
  64   64  #include <sys/nbmlock.h>
  65   65  #include <sys/fcntl.h>
  66   66  #include <fs/fs_subr.h>
  67   67  #include <sys/taskq.h>
  68   68  #include <fs/fs_reparse.h>
  69   69  
  70   70  /* Determine if this vnode is a file that is read-only */
  71   71  #define ISROFILE(vp)    \
  72   72          ((vp)->v_type != VCHR && (vp)->v_type != VBLK && \
  73   73              (vp)->v_type != VFIFO && vn_is_readonly(vp))
  74   74  
  75   75  /* Tunable via /etc/system; used only by admin/install */
  76   76  int nfs_global_client_only;
  77   77  
  78   78  /*
  79   79   * Array of vopstats_t for per-FS-type vopstats.  This array has the same
  80   80   * number of entries as and parallel to the vfssw table.  (Arguably, it could
  81   81   * be part of the vfssw table.)  Once it's initialized, it's accessed using
  82   82   * the same fstype index that is used to index into the vfssw table.
  83   83   */
  84   84  vopstats_t **vopstats_fstype;
  85   85  
  86   86  /* vopstats initialization template used for fast initialization via bcopy() */
  87   87  static vopstats_t *vs_templatep;
  88   88  
  89   89  /* Kmem cache handle for vsk_anchor_t allocations */
  90   90  kmem_cache_t *vsk_anchor_cache;
  91   91  
  92   92  /* file events cleanup routine */
  93   93  extern void free_fopdata(vnode_t *);
  94   94  
  95   95  /*
  96   96   * Root of AVL tree for the kstats associated with vopstats.  Lock protects
  97   97   * updates to vsktat_tree.
  98   98   */
  99   99  avl_tree_t      vskstat_tree;
 100  100  kmutex_t        vskstat_tree_lock;
 101  101  
 102  102  /* Global variable which enables/disables the vopstats collection */
 103  103  int vopstats_enabled = 1;
 104  104  
 105  105  /*
 106  106   * forward declarations for internal vnode specific data (vsd)
 107  107   */
 108  108  static void *vsd_realloc(void *, size_t, size_t);
 109  109  
 110  110  /*
 111  111   * forward declarations for reparse point functions
 112  112   */
 113  113  static int fs_reparse_mark(char *target, vattr_t *vap, xvattr_t *xvattr);
 114  114  
 115  115  /*
 116  116   * VSD -- VNODE SPECIFIC DATA
 117  117   * The v_data pointer is typically used by a file system to store a
 118  118   * pointer to the file system's private node (e.g. ufs inode, nfs rnode).
 119  119   * However, there are times when additional project private data needs
 120  120   * to be stored separately from the data (node) pointed to by v_data.
 121  121   * This additional data could be stored by the file system itself or
 122  122   * by a completely different kernel entity.  VSD provides a way for
 123  123   * callers to obtain a key and store a pointer to private data associated
 124  124   * with a vnode.
 125  125   *
 126  126   * Callers are responsible for protecting the vsd by holding v_vsd_lock
 127  127   * for calls to vsd_set() and vsd_get().
 128  128   */
 129  129  
 130  130  /*
 131  131   * vsd_lock protects:
 132  132   *   vsd_nkeys - creation and deletion of vsd keys
 133  133   *   vsd_list - insertion and deletion of vsd_node in the vsd_list
 134  134   *   vsd_destructor - adding and removing destructors to the list
 135  135   */
 136  136  static kmutex_t         vsd_lock;
 137  137  static uint_t           vsd_nkeys;       /* size of destructor array */
 138  138  /* list of vsd_node's */
 139  139  static list_t *vsd_list = NULL;
 140  140  /* per-key destructor funcs */
 141  141  static void             (**vsd_destructor)(void *);
 142  142  
 143  143  /*
 144  144   * The following is the common set of actions needed to update the
 145  145   * vopstats structure from a vnode op.  Both VOPSTATS_UPDATE() and
 146  146   * VOPSTATS_UPDATE_IO() do almost the same thing, except for the
 147  147   * recording of the bytes transferred.  Since the code is similar
 148  148   * but small, it is nearly a duplicate.  Consequently any changes
 149  149   * to one may need to be reflected in the other.
 150  150   * Rundown of the variables:
 151  151   * vp - Pointer to the vnode
 152  152   * counter - Partial name structure member to update in vopstats for counts
 153  153   * bytecounter - Partial name structure member to update in vopstats for bytes
 154  154   * bytesval - Value to update in vopstats for bytes
 155  155   * fstype - Index into vsanchor_fstype[], same as index into vfssw[]
 156  156   * vsp - Pointer to vopstats structure (either in vfs or vsanchor_fstype[i])
 157  157   */
 158  158  
 159  159  #define VOPSTATS_UPDATE(vp, counter) {                                  \
 160  160          vfs_t *vfsp = (vp)->v_vfsp;                                     \
 161  161          if (vfsp && vfsp->vfs_implp &&                                  \
 162  162              (vfsp->vfs_flag & VFS_STATS) && (vp)->v_type != VBAD) {     \
 163  163                  vopstats_t *vsp = &vfsp->vfs_vopstats;                  \
 164  164                  uint64_t *stataddr = &(vsp->n##counter.value.ui64);     \
 165  165                  extern void __dtrace_probe___fsinfo_##counter(vnode_t *, \
 166  166                      size_t, uint64_t *);                                \
 167  167                  __dtrace_probe___fsinfo_##counter(vp, 0, stataddr);     \
 168  168                  (*stataddr)++;                                          \
 169  169                  if ((vsp = vfsp->vfs_fstypevsp) != NULL) {              \
 170  170                          vsp->n##counter.value.ui64++;                   \
 171  171                  }                                                       \
 172  172          }                                                               \
 173  173  }
 174  174  
 175  175  #define VOPSTATS_UPDATE_IO(vp, counter, bytecounter, bytesval) {        \
 176  176          vfs_t *vfsp = (vp)->v_vfsp;                                     \
 177  177          if (vfsp && vfsp->vfs_implp &&                                  \
 178  178              (vfsp->vfs_flag & VFS_STATS) && (vp)->v_type != VBAD) {     \
 179  179                  vopstats_t *vsp = &vfsp->vfs_vopstats;                  \
 180  180                  uint64_t *stataddr = &(vsp->n##counter.value.ui64);     \
 181  181                  extern void __dtrace_probe___fsinfo_##counter(vnode_t *, \
 182  182                      size_t, uint64_t *);                                \
 183  183                  __dtrace_probe___fsinfo_##counter(vp, bytesval, stataddr); \
 184  184                  (*stataddr)++;                                          \
 185  185                  vsp->bytecounter.value.ui64 += bytesval;                \
 186  186                  if ((vsp = vfsp->vfs_fstypevsp) != NULL) {              \
 187  187                          vsp->n##counter.value.ui64++;                   \
 188  188                          vsp->bytecounter.value.ui64 += bytesval;        \
 189  189                  }                                                       \
 190  190          }                                                               \
 191  191  }
 192  192  
 193  193  /*
 194  194   * If the filesystem does not support XIDs map credential
 195  195   * If the vfsp is NULL, perhaps we should also map?
 196  196   */
 197  197  #define VOPXID_MAP_CR(vp, cr)   {                                       \
 198  198          vfs_t *vfsp = (vp)->v_vfsp;                                     \
 199  199          if (vfsp != NULL && (vfsp->vfs_flag & VFS_XID) == 0)            \
 200  200                  cr = crgetmapped(cr);                                   \
 201  201          }
 202  202  
 203  203  /*
 204  204   * Convert stat(2) formats to vnode types and vice versa.  (Knows about
 205  205   * numerical order of S_IFMT and vnode types.)
 206  206   */
 207  207  enum vtype iftovt_tab[] = {
 208  208          VNON, VFIFO, VCHR, VNON, VDIR, VNON, VBLK, VNON,
 209  209          VREG, VNON, VLNK, VNON, VSOCK, VNON, VNON, VNON
 210  210  };
 211  211  
 212  212  ushort_t vttoif_tab[] = {
 213  213          0, S_IFREG, S_IFDIR, S_IFBLK, S_IFCHR, S_IFLNK, S_IFIFO,
 214  214          S_IFDOOR, 0, S_IFSOCK, S_IFPORT, 0
 215  215  };
 216  216  
 217  217  /*
 218  218   * The system vnode cache.

↓ open down ↓

218 lines elided

↑ open up ↑

 219  219   */
 220  220  
 221  221  kmem_cache_t *vn_cache;
 222  222  
 223  223  
 224  224  /*
 225  225   * Vnode operations vector.
 226  226   */
 227  227  
 228  228  static const fs_operation_trans_def_t vn_ops_table[] = {
 229      -        VOPNAME_OPEN, offsetof(struct vnodeops, vop_open),
 230      -            fs_nosys, fs_nosys,
      229 +        { VOPNAME_OPEN, offsetof(struct vnodeops, vop_open),
      230 +            fs_nosys, fs_nosys },
 231  231  
 232      -        VOPNAME_CLOSE, offsetof(struct vnodeops, vop_close),
 233      -            fs_nosys, fs_nosys,
      232 +        { VOPNAME_CLOSE, offsetof(struct vnodeops, vop_close),
      233 +            fs_nosys, fs_nosys },
 234  234  
 235      -        VOPNAME_READ, offsetof(struct vnodeops, vop_read),
 236      -            fs_nosys, fs_nosys,
      235 +        { VOPNAME_READ, offsetof(struct vnodeops, vop_read),
      236 +            fs_nosys, fs_nosys },
 237  237  
 238      -        VOPNAME_WRITE, offsetof(struct vnodeops, vop_write),
 239      -            fs_nosys, fs_nosys,
      238 +        { VOPNAME_WRITE, offsetof(struct vnodeops, vop_write),
      239 +            fs_nosys, fs_nosys },
 240  240  
 241      -        VOPNAME_IOCTL, offsetof(struct vnodeops, vop_ioctl),
 242      -            fs_nosys, fs_nosys,
      241 +        { VOPNAME_IOCTL, offsetof(struct vnodeops, vop_ioctl),
      242 +            fs_nosys, fs_nosys },
 243  243  
 244      -        VOPNAME_SETFL, offsetof(struct vnodeops, vop_setfl),
 245      -            fs_setfl, fs_nosys,
      244 +        { VOPNAME_SETFL, offsetof(struct vnodeops, vop_setfl),
      245 +            fs_setfl, fs_nosys },
 246  246  
 247      -        VOPNAME_GETATTR, offsetof(struct vnodeops, vop_getattr),
 248      -            fs_nosys, fs_nosys,
      247 +        { VOPNAME_GETATTR, offsetof(struct vnodeops, vop_getattr),
      248 +            fs_nosys, fs_nosys },
 249  249  
 250      -        VOPNAME_SETATTR, offsetof(struct vnodeops, vop_setattr),
 251      -            fs_nosys, fs_nosys,
      250 +        { VOPNAME_SETATTR, offsetof(struct vnodeops, vop_setattr),
      251 +            fs_nosys, fs_nosys },
 252  252  
 253      -        VOPNAME_ACCESS, offsetof(struct vnodeops, vop_access),
 254      -            fs_nosys, fs_nosys,
      253 +        { VOPNAME_ACCESS, offsetof(struct vnodeops, vop_access),
      254 +            fs_nosys, fs_nosys },
 255  255  
 256      -        VOPNAME_LOOKUP, offsetof(struct vnodeops, vop_lookup),
 257      -            fs_nosys, fs_nosys,
      256 +        { VOPNAME_LOOKUP, offsetof(struct vnodeops, vop_lookup),
      257 +            fs_nosys, fs_nosys },
 258  258  
 259      -        VOPNAME_CREATE, offsetof(struct vnodeops, vop_create),
 260      -            fs_nosys, fs_nosys,
      259 +        { VOPNAME_CREATE, offsetof(struct vnodeops, vop_create),
      260 +            fs_nosys, fs_nosys },
 261  261  
 262      -        VOPNAME_REMOVE, offsetof(struct vnodeops, vop_remove),
 263      -            fs_nosys, fs_nosys,
      262 +        { VOPNAME_REMOVE, offsetof(struct vnodeops, vop_remove),
      263 +            fs_nosys, fs_nosys },
 264  264  
 265      -        VOPNAME_LINK, offsetof(struct vnodeops, vop_link),
 266      -            fs_nosys, fs_nosys,
      265 +        { VOPNAME_LINK, offsetof(struct vnodeops, vop_link),
      266 +            fs_nosys, fs_nosys },
 267  267  
 268      -        VOPNAME_RENAME, offsetof(struct vnodeops, vop_rename),
 269      -            fs_nosys, fs_nosys,
      268 +        { VOPNAME_RENAME, offsetof(struct vnodeops, vop_rename),
      269 +            fs_nosys, fs_nosys },
 270  270  
 271      -        VOPNAME_MKDIR, offsetof(struct vnodeops, vop_mkdir),
 272      -            fs_nosys, fs_nosys,
      271 +        { VOPNAME_MKDIR, offsetof(struct vnodeops, vop_mkdir),
      272 +            fs_nosys, fs_nosys },
 273  273  
 274      -        VOPNAME_RMDIR, offsetof(struct vnodeops, vop_rmdir),
 275      -            fs_nosys, fs_nosys,
      274 +        { VOPNAME_RMDIR, offsetof(struct vnodeops, vop_rmdir),
      275 +            fs_nosys, fs_nosys },
 276  276  
 277      -        VOPNAME_READDIR, offsetof(struct vnodeops, vop_readdir),
 278      -            fs_nosys, fs_nosys,
      277 +        { VOPNAME_READDIR, offsetof(struct vnodeops, vop_readdir),
      278 +            fs_nosys, fs_nosys },
 279  279  
 280      -        VOPNAME_SYMLINK, offsetof(struct vnodeops, vop_symlink),
 281      -            fs_nosys, fs_nosys,
      280 +        { VOPNAME_SYMLINK, offsetof(struct vnodeops, vop_symlink),
      281 +            fs_nosys, fs_nosys },
 282  282  
 283      -        VOPNAME_READLINK, offsetof(struct vnodeops, vop_readlink),
 284      -            fs_nosys, fs_nosys,
      283 +        { VOPNAME_READLINK, offsetof(struct vnodeops, vop_readlink),
      284 +            fs_nosys, fs_nosys },
 285  285  
 286      -        VOPNAME_FSYNC, offsetof(struct vnodeops, vop_fsync),
 287      -            fs_nosys, fs_nosys,
      286 +        { VOPNAME_FSYNC, offsetof(struct vnodeops, vop_fsync),
      287 +            fs_nosys, fs_nosys },
 288  288  
 289      -        VOPNAME_INACTIVE, offsetof(struct vnodeops, vop_inactive),
 290      -            fs_nosys, fs_nosys,
      289 +        { VOPNAME_INACTIVE, offsetof(struct vnodeops, vop_inactive),
      290 +            fs_nosys, fs_nosys },
 291  291  
 292      -        VOPNAME_FID, offsetof(struct vnodeops, vop_fid),
 293      -            fs_nosys, fs_nosys,
      292 +        { VOPNAME_FID, offsetof(struct vnodeops, vop_fid),
      293 +            fs_nosys, fs_nosys },
 294  294  
 295      -        VOPNAME_RWLOCK, offsetof(struct vnodeops, vop_rwlock),
 296      -            fs_rwlock, fs_rwlock,
      295 +        { VOPNAME_RWLOCK, offsetof(struct vnodeops, vop_rwlock),
      296 +            fs_rwlock, fs_rwlock },
 297  297  
 298      -        VOPNAME_RWUNLOCK, offsetof(struct vnodeops, vop_rwunlock),
      298 +        { VOPNAME_RWUNLOCK, offsetof(struct vnodeops, vop_rwunlock),
 299  299              (fs_generic_func_p) fs_rwunlock,
 300      -            (fs_generic_func_p) fs_rwunlock,    /* no errors allowed */
      300 +            (fs_generic_func_p) fs_rwunlock },  /* no errors allowed */
 301  301  
 302      -        VOPNAME_SEEK, offsetof(struct vnodeops, vop_seek),
 303      -            fs_nosys, fs_nosys,
      302 +        { VOPNAME_SEEK, offsetof(struct vnodeops, vop_seek),
      303 +            fs_nosys, fs_nosys },
 304  304  
 305      -        VOPNAME_CMP, offsetof(struct vnodeops, vop_cmp),
 306      -            fs_cmp, fs_cmp,             /* no errors allowed */
      305 +        { VOPNAME_CMP, offsetof(struct vnodeops, vop_cmp),
      306 +            fs_cmp, fs_cmp },           /* no errors allowed */
 307  307  
 308      -        VOPNAME_FRLOCK, offsetof(struct vnodeops, vop_frlock),
 309      -            fs_frlock, fs_nosys,
      308 +        { VOPNAME_FRLOCK, offsetof(struct vnodeops, vop_frlock),
      309 +            fs_frlock, fs_nosys },
 310  310  
 311      -        VOPNAME_SPACE, offsetof(struct vnodeops, vop_space),
 312      -            fs_nosys, fs_nosys,
      311 +        { VOPNAME_SPACE, offsetof(struct vnodeops, vop_space),
      312 +            fs_nosys, fs_nosys },
 313  313  
 314      -        VOPNAME_REALVP, offsetof(struct vnodeops, vop_realvp),
 315      -            fs_nosys, fs_nosys,
      314 +        { VOPNAME_REALVP, offsetof(struct vnodeops, vop_realvp),
      315 +            fs_nosys, fs_nosys },
 316  316  
 317      -        VOPNAME_GETPAGE, offsetof(struct vnodeops, vop_getpage),
 318      -            fs_nosys, fs_nosys,
      317 +        { VOPNAME_GETPAGE, offsetof(struct vnodeops, vop_getpage),
      318 +            fs_nosys, fs_nosys },
 319  319  
 320      -        VOPNAME_PUTPAGE, offsetof(struct vnodeops, vop_putpage),
 321      -            fs_nosys, fs_nosys,
      320 +        { VOPNAME_PUTPAGE, offsetof(struct vnodeops, vop_putpage),
      321 +            fs_nosys, fs_nosys },
 322  322  
 323      -        VOPNAME_MAP, offsetof(struct vnodeops, vop_map),
      323 +        { VOPNAME_MAP, offsetof(struct vnodeops, vop_map),
 324  324              (fs_generic_func_p) fs_nosys_map,
 325      -            (fs_generic_func_p) fs_nosys_map,
      325 +            (fs_generic_func_p) fs_nosys_map },
 326  326  
 327      -        VOPNAME_ADDMAP, offsetof(struct vnodeops, vop_addmap),
      327 +        { VOPNAME_ADDMAP, offsetof(struct vnodeops, vop_addmap),
 328  328              (fs_generic_func_p) fs_nosys_addmap,
 329      -            (fs_generic_func_p) fs_nosys_addmap,
      329 +            (fs_generic_func_p) fs_nosys_addmap },
 330  330  
 331      -        VOPNAME_DELMAP, offsetof(struct vnodeops, vop_delmap),
 332      -            fs_nosys, fs_nosys,
      331 +        { VOPNAME_DELMAP, offsetof(struct vnodeops, vop_delmap),
      332 +            fs_nosys, fs_nosys },
 333  333  
 334      -        VOPNAME_POLL, offsetof(struct vnodeops, vop_poll),
 335      -            (fs_generic_func_p) fs_poll, (fs_generic_func_p) fs_nosys_poll,
      334 +        { VOPNAME_POLL, offsetof(struct vnodeops, vop_poll),
      335 +            (fs_generic_func_p) fs_poll, (fs_generic_func_p) fs_nosys_poll },
 336  336  
 337      -        VOPNAME_DUMP, offsetof(struct vnodeops, vop_dump),
 338      -            fs_nosys, fs_nosys,
      337 +        { VOPNAME_DUMP, offsetof(struct vnodeops, vop_dump),
      338 +            fs_nosys, fs_nosys },
 339  339  
 340      -        VOPNAME_PATHCONF, offsetof(struct vnodeops, vop_pathconf),
 341      -            fs_pathconf, fs_nosys,
      340 +        { VOPNAME_PATHCONF, offsetof(struct vnodeops, vop_pathconf),
      341 +            fs_pathconf, fs_nosys },
 342  342  
 343      -        VOPNAME_PAGEIO, offsetof(struct vnodeops, vop_pageio),
 344      -            fs_nosys, fs_nosys,
      343 +        { VOPNAME_PAGEIO, offsetof(struct vnodeops, vop_pageio),
      344 +            fs_nosys, fs_nosys },
 345  345  
 346      -        VOPNAME_DUMPCTL, offsetof(struct vnodeops, vop_dumpctl),
 347      -            fs_nosys, fs_nosys,
      346 +        { VOPNAME_DUMPCTL, offsetof(struct vnodeops, vop_dumpctl),
      347 +            fs_nosys, fs_nosys },
 348  348  
 349      -        VOPNAME_DISPOSE, offsetof(struct vnodeops, vop_dispose),
      349 +        { VOPNAME_DISPOSE, offsetof(struct vnodeops, vop_dispose),
 350  350              (fs_generic_func_p) fs_dispose,
 351      -            (fs_generic_func_p) fs_nodispose,
      351 +            (fs_generic_func_p) fs_nodispose },
 352  352  
 353      -        VOPNAME_SETSECATTR, offsetof(struct vnodeops, vop_setsecattr),
 354      -            fs_nosys, fs_nosys,
      353 +        { VOPNAME_SETSECATTR, offsetof(struct vnodeops, vop_setsecattr),
      354 +            fs_nosys, fs_nosys },
 355  355  
 356      -        VOPNAME_GETSECATTR, offsetof(struct vnodeops, vop_getsecattr),
 357      -            fs_fab_acl, fs_nosys,
      356 +        { VOPNAME_GETSECATTR, offsetof(struct vnodeops, vop_getsecattr),
      357 +            fs_fab_acl, fs_nosys },
 358  358  
 359      -        VOPNAME_SHRLOCK, offsetof(struct vnodeops, vop_shrlock),
 360      -            fs_shrlock, fs_nosys,
      359 +        { VOPNAME_SHRLOCK, offsetof(struct vnodeops, vop_shrlock),
      360 +            fs_shrlock, fs_nosys },
 361  361  
 362      -        VOPNAME_VNEVENT, offsetof(struct vnodeops, vop_vnevent),
      362 +        { VOPNAME_VNEVENT, offsetof(struct vnodeops, vop_vnevent),
 363  363              (fs_generic_func_p) fs_vnevent_nosupport,
 364      -            (fs_generic_func_p) fs_vnevent_nosupport,
      364 +            (fs_generic_func_p) fs_vnevent_nosupport },
 365  365  
 366      -        VOPNAME_REQZCBUF, offsetof(struct vnodeops, vop_reqzcbuf),
 367      -            fs_nosys, fs_nosys,
      366 +        { VOPNAME_REQZCBUF, offsetof(struct vnodeops, vop_reqzcbuf),
      367 +            fs_nosys, fs_nosys },
 368  368  
 369      -        VOPNAME_RETZCBUF, offsetof(struct vnodeops, vop_retzcbuf),
 370      -            fs_nosys, fs_nosys,
      369 +        { VOPNAME_RETZCBUF, offsetof(struct vnodeops, vop_retzcbuf),
      370 +            fs_nosys, fs_nosys },
 371  371  
 372      -        NULL, 0, NULL, NULL
      372 +        { NULL, 0, NULL, NULL }
 373  373  };
 374  374  
 375  375  /* Extensible attribute (xva) routines. */
 376  376  
 377  377  /*
 378  378   * Zero out the structure, set the size of the requested/returned bitmaps,
 379  379   * set AT_XVATTR in the embedded vattr_t's va_mask, and set up the pointer
 380  380   * to the returned attributes array.
 381  381   */
 382  382  void

 383  383  xva_init(xvattr_t *xvap)
 384  384  {
 385  385          bzero(xvap, sizeof (xvattr_t));
 386  386          xvap->xva_mapsize = XVA_MAPSIZE;
 387  387          xvap->xva_magic = XVA_MAGIC;
 388  388          xvap->xva_vattr.va_mask = AT_XVATTR;
 389  389          xvap->xva_rtnattrmapp = &(xvap->xva_rtnattrmap)[0];
 390  390  }
 391  391  
 392  392  /*
 393  393   * If AT_XVATTR is set, returns a pointer to the embedded xoptattr_t
 394  394   * structure.  Otherwise, returns NULL.
 395  395   */
 396  396  xoptattr_t *
 397  397  xva_getxoptattr(xvattr_t *xvap)
 398  398  {
 399  399          xoptattr_t *xoap = NULL;
 400  400          if (xvap->xva_vattr.va_mask & AT_XVATTR)
 401  401                  xoap = &xvap->xva_xoptattrs;
 402  402          return (xoap);
 403  403  }
 404  404  
 405  405  /*
 406  406   * Used by the AVL routines to compare two vsk_anchor_t structures in the tree.
 407  407   * We use the f_fsid reported by VFS_STATVFS() since we use that for the
 408  408   * kstat name.
 409  409   */
 410  410  static int
 411  411  vska_compar(const void *n1, const void *n2)
 412  412  {
 413  413          int ret;
 414  414          ulong_t p1 = ((vsk_anchor_t *)n1)->vsk_fsid;
 415  415          ulong_t p2 = ((vsk_anchor_t *)n2)->vsk_fsid;
 416  416  
 417  417          if (p1 < p2) {
 418  418                  ret = -1;
 419  419          } else if (p1 > p2) {
 420  420                  ret = 1;
 421  421          } else {
 422  422                  ret = 0;
 423  423          }
 424  424  
 425  425          return (ret);
 426  426  }
 427  427  
 428  428  /*
 429  429   * Used to create a single template which will be bcopy()ed to a newly
 430  430   * allocated vsanchor_combo_t structure in new_vsanchor(), below.
 431  431   */
 432  432  static vopstats_t *
 433  433  create_vopstats_template()
 434  434  {
 435  435          vopstats_t              *vsp;
 436  436  
 437  437          vsp = kmem_alloc(sizeof (vopstats_t), KM_SLEEP);
 438  438          bzero(vsp, sizeof (*vsp));      /* Start fresh */
 439  439  
 440  440          /* VOP_OPEN */
 441  441          kstat_named_init(&vsp->nopen, "nopen", KSTAT_DATA_UINT64);
 442  442          /* VOP_CLOSE */
 443  443          kstat_named_init(&vsp->nclose, "nclose", KSTAT_DATA_UINT64);
 444  444          /* VOP_READ I/O */
 445  445          kstat_named_init(&vsp->nread, "nread", KSTAT_DATA_UINT64);
 446  446          kstat_named_init(&vsp->read_bytes, "read_bytes", KSTAT_DATA_UINT64);
 447  447          /* VOP_WRITE I/O */
 448  448          kstat_named_init(&vsp->nwrite, "nwrite", KSTAT_DATA_UINT64);
 449  449          kstat_named_init(&vsp->write_bytes, "write_bytes", KSTAT_DATA_UINT64);
 450  450          /* VOP_IOCTL */
 451  451          kstat_named_init(&vsp->nioctl, "nioctl", KSTAT_DATA_UINT64);
 452  452          /* VOP_SETFL */
 453  453          kstat_named_init(&vsp->nsetfl, "nsetfl", KSTAT_DATA_UINT64);
 454  454          /* VOP_GETATTR */
 455  455          kstat_named_init(&vsp->ngetattr, "ngetattr", KSTAT_DATA_UINT64);
 456  456          /* VOP_SETATTR */
 457  457          kstat_named_init(&vsp->nsetattr, "nsetattr", KSTAT_DATA_UINT64);
 458  458          /* VOP_ACCESS */
 459  459          kstat_named_init(&vsp->naccess, "naccess", KSTAT_DATA_UINT64);
 460  460          /* VOP_LOOKUP */
 461  461          kstat_named_init(&vsp->nlookup, "nlookup", KSTAT_DATA_UINT64);
 462  462          /* VOP_CREATE */
 463  463          kstat_named_init(&vsp->ncreate, "ncreate", KSTAT_DATA_UINT64);
 464  464          /* VOP_REMOVE */
 465  465          kstat_named_init(&vsp->nremove, "nremove", KSTAT_DATA_UINT64);
 466  466          /* VOP_LINK */
 467  467          kstat_named_init(&vsp->nlink, "nlink", KSTAT_DATA_UINT64);
 468  468          /* VOP_RENAME */
 469  469          kstat_named_init(&vsp->nrename, "nrename", KSTAT_DATA_UINT64);
 470  470          /* VOP_MKDIR */
 471  471          kstat_named_init(&vsp->nmkdir, "nmkdir", KSTAT_DATA_UINT64);
 472  472          /* VOP_RMDIR */
 473  473          kstat_named_init(&vsp->nrmdir, "nrmdir", KSTAT_DATA_UINT64);
 474  474          /* VOP_READDIR I/O */
 475  475          kstat_named_init(&vsp->nreaddir, "nreaddir", KSTAT_DATA_UINT64);
 476  476          kstat_named_init(&vsp->readdir_bytes, "readdir_bytes",
 477  477              KSTAT_DATA_UINT64);
 478  478          /* VOP_SYMLINK */
 479  479          kstat_named_init(&vsp->nsymlink, "nsymlink", KSTAT_DATA_UINT64);
 480  480          /* VOP_READLINK */
 481  481          kstat_named_init(&vsp->nreadlink, "nreadlink", KSTAT_DATA_UINT64);
 482  482          /* VOP_FSYNC */
 483  483          kstat_named_init(&vsp->nfsync, "nfsync", KSTAT_DATA_UINT64);
 484  484          /* VOP_INACTIVE */
 485  485          kstat_named_init(&vsp->ninactive, "ninactive", KSTAT_DATA_UINT64);
 486  486          /* VOP_FID */
 487  487          kstat_named_init(&vsp->nfid, "nfid", KSTAT_DATA_UINT64);
 488  488          /* VOP_RWLOCK */
 489  489          kstat_named_init(&vsp->nrwlock, "nrwlock", KSTAT_DATA_UINT64);
 490  490          /* VOP_RWUNLOCK */
 491  491          kstat_named_init(&vsp->nrwunlock, "nrwunlock", KSTAT_DATA_UINT64);
 492  492          /* VOP_SEEK */
 493  493          kstat_named_init(&vsp->nseek, "nseek", KSTAT_DATA_UINT64);
 494  494          /* VOP_CMP */
 495  495          kstat_named_init(&vsp->ncmp, "ncmp", KSTAT_DATA_UINT64);
 496  496          /* VOP_FRLOCK */
 497  497          kstat_named_init(&vsp->nfrlock, "nfrlock", KSTAT_DATA_UINT64);
 498  498          /* VOP_SPACE */
 499  499          kstat_named_init(&vsp->nspace, "nspace", KSTAT_DATA_UINT64);
 500  500          /* VOP_REALVP */
 501  501          kstat_named_init(&vsp->nrealvp, "nrealvp", KSTAT_DATA_UINT64);
 502  502          /* VOP_GETPAGE */
 503  503          kstat_named_init(&vsp->ngetpage, "ngetpage", KSTAT_DATA_UINT64);
 504  504          /* VOP_PUTPAGE */
 505  505          kstat_named_init(&vsp->nputpage, "nputpage", KSTAT_DATA_UINT64);
 506  506          /* VOP_MAP */
 507  507          kstat_named_init(&vsp->nmap, "nmap", KSTAT_DATA_UINT64);
 508  508          /* VOP_ADDMAP */
 509  509          kstat_named_init(&vsp->naddmap, "naddmap", KSTAT_DATA_UINT64);
 510  510          /* VOP_DELMAP */
 511  511          kstat_named_init(&vsp->ndelmap, "ndelmap", KSTAT_DATA_UINT64);
 512  512          /* VOP_POLL */
 513  513          kstat_named_init(&vsp->npoll, "npoll", KSTAT_DATA_UINT64);
 514  514          /* VOP_DUMP */
 515  515          kstat_named_init(&vsp->ndump, "ndump", KSTAT_DATA_UINT64);
 516  516          /* VOP_PATHCONF */
 517  517          kstat_named_init(&vsp->npathconf, "npathconf", KSTAT_DATA_UINT64);
 518  518          /* VOP_PAGEIO */
 519  519          kstat_named_init(&vsp->npageio, "npageio", KSTAT_DATA_UINT64);
 520  520          /* VOP_DUMPCTL */
 521  521          kstat_named_init(&vsp->ndumpctl, "ndumpctl", KSTAT_DATA_UINT64);
 522  522          /* VOP_DISPOSE */
 523  523          kstat_named_init(&vsp->ndispose, "ndispose", KSTAT_DATA_UINT64);
 524  524          /* VOP_SETSECATTR */
 525  525          kstat_named_init(&vsp->nsetsecattr, "nsetsecattr", KSTAT_DATA_UINT64);
 526  526          /* VOP_GETSECATTR */
 527  527          kstat_named_init(&vsp->ngetsecattr, "ngetsecattr", KSTAT_DATA_UINT64);
 528  528          /* VOP_SHRLOCK */
 529  529          kstat_named_init(&vsp->nshrlock, "nshrlock", KSTAT_DATA_UINT64);
 530  530          /* VOP_VNEVENT */
 531  531          kstat_named_init(&vsp->nvnevent, "nvnevent", KSTAT_DATA_UINT64);
 532  532          /* VOP_REQZCBUF */
 533  533          kstat_named_init(&vsp->nreqzcbuf, "nreqzcbuf", KSTAT_DATA_UINT64);
 534  534          /* VOP_RETZCBUF */
 535  535          kstat_named_init(&vsp->nretzcbuf, "nretzcbuf", KSTAT_DATA_UINT64);
 536  536  
 537  537          return (vsp);
 538  538  }
 539  539  
 540  540  /*
 541  541   * Creates a kstat structure associated with a vopstats structure.
 542  542   */
 543  543  kstat_t *
 544  544  new_vskstat(char *ksname, vopstats_t *vsp)
 545  545  {
 546  546          kstat_t         *ksp;
 547  547  
 548  548          if (!vopstats_enabled) {
 549  549                  return (NULL);
 550  550          }
 551  551  
 552  552          ksp = kstat_create("unix", 0, ksname, "misc", KSTAT_TYPE_NAMED,
 553  553              sizeof (vopstats_t)/sizeof (kstat_named_t),
 554  554              KSTAT_FLAG_VIRTUAL|KSTAT_FLAG_WRITABLE);
 555  555          if (ksp) {
 556  556                  ksp->ks_data = vsp;
 557  557                  kstat_install(ksp);
 558  558          }
 559  559  
 560  560          return (ksp);
 561  561  }
 562  562  
 563  563  /*
 564  564   * Called from vfsinit() to initialize the support mechanisms for vopstats
 565  565   */
 566  566  void
 567  567  vopstats_startup()
 568  568  {
 569  569          if (!vopstats_enabled)
 570  570                  return;
 571  571  
 572  572          /*
 573  573           * Creates the AVL tree which holds per-vfs vopstat anchors.  This
 574  574           * is necessary since we need to check if a kstat exists before we
 575  575           * attempt to create it.  Also, initialize its lock.
 576  576           */
 577  577          avl_create(&vskstat_tree, vska_compar, sizeof (vsk_anchor_t),
 578  578              offsetof(vsk_anchor_t, vsk_node));
 579  579          mutex_init(&vskstat_tree_lock, NULL, MUTEX_DEFAULT, NULL);
 580  580  
 581  581          vsk_anchor_cache = kmem_cache_create("vsk_anchor_cache",
 582  582              sizeof (vsk_anchor_t), sizeof (uintptr_t), NULL, NULL, NULL,
 583  583              NULL, NULL, 0);
 584  584  
 585  585          /*
 586  586           * Set up the array of pointers for the vopstats-by-FS-type.
 587  587           * The entries will be allocated/initialized as each file system
 588  588           * goes through modload/mod_installfs.
 589  589           */
 590  590          vopstats_fstype = (vopstats_t **)kmem_zalloc(
 591  591              (sizeof (vopstats_t *) * nfstype), KM_SLEEP);
 592  592  
 593  593          /* Set up the global vopstats initialization template */
 594  594          vs_templatep = create_vopstats_template();
 595  595  }
 596  596  
 597  597  /*
 598  598   * We need to have the all of the counters zeroed.
 599  599   * The initialization of the vopstats_t includes on the order of
 600  600   * 50 calls to kstat_named_init().  Rather that do that on every call,
 601  601   * we do it once in a template (vs_templatep) then bcopy it over.
 602  602   */
 603  603  void
 604  604  initialize_vopstats(vopstats_t *vsp)
 605  605  {
 606  606          if (vsp == NULL)
 607  607                  return;
 608  608  
 609  609          bcopy(vs_templatep, vsp, sizeof (vopstats_t));
 610  610  }
 611  611  
 612  612  /*
 613  613   * If possible, determine which vopstats by fstype to use and
 614  614   * return a pointer to the caller.
 615  615   */
 616  616  vopstats_t *
 617  617  get_fstype_vopstats(vfs_t *vfsp, struct vfssw *vswp)
 618  618  {
 619  619          int             fstype = 0;     /* Index into vfssw[] */
 620  620          vopstats_t      *vsp = NULL;
 621  621  
 622  622          if (vfsp == NULL || (vfsp->vfs_flag & VFS_STATS) == 0 ||
 623  623              !vopstats_enabled)
 624  624                  return (NULL);
 625  625          /*
 626  626           * Set up the fstype.  We go to so much trouble because all versions
 627  627           * of NFS use the same fstype in their vfs even though they have
 628  628           * distinct entries in the vfssw[] table.
 629  629           * NOTE: A special vfs (e.g., EIO_vfs) may not have an entry.
 630  630           */
 631  631          if (vswp) {
 632  632                  fstype = vswp - vfssw;  /* Gets us the index */
 633  633          } else {
 634  634                  fstype = vfsp->vfs_fstype;
 635  635          }
 636  636  
 637  637          /*
 638  638           * Point to the per-fstype vopstats. The only valid values are
 639  639           * non-zero positive values less than the number of vfssw[] table
 640  640           * entries.
 641  641           */
 642  642          if (fstype > 0 && fstype < nfstype) {
 643  643                  vsp = vopstats_fstype[fstype];
 644  644          }
 645  645  
 646  646          return (vsp);
 647  647  }
 648  648  
 649  649  /*
 650  650   * Generate a kstat name, create the kstat structure, and allocate a
 651  651   * vsk_anchor_t to hold it together.  Return the pointer to the vsk_anchor_t
 652  652   * to the caller.  This must only be called from a mount.
 653  653   */
 654  654  vsk_anchor_t *
 655  655  get_vskstat_anchor(vfs_t *vfsp)
 656  656  {
 657  657          char            kstatstr[KSTAT_STRLEN]; /* kstat name for vopstats */
 658  658          statvfs64_t     statvfsbuf;             /* Needed to find f_fsid */
 659  659          vsk_anchor_t    *vskp = NULL;           /* vfs <--> kstat anchor */
 660  660          kstat_t         *ksp;                   /* Ptr to new kstat */
 661  661          avl_index_t     where;                  /* Location in the AVL tree */
 662  662  
 663  663          if (vfsp == NULL || vfsp->vfs_implp == NULL ||
 664  664              (vfsp->vfs_flag & VFS_STATS) == 0 || !vopstats_enabled)
 665  665                  return (NULL);
 666  666  
 667  667          /* Need to get the fsid to build a kstat name */
 668  668          if (VFS_STATVFS(vfsp, &statvfsbuf) == 0) {
 669  669                  /* Create a name for our kstats based on fsid */
 670  670                  (void) snprintf(kstatstr, KSTAT_STRLEN, "%s%lx",
 671  671                      VOPSTATS_STR, statvfsbuf.f_fsid);
 672  672  
 673  673                  /* Allocate and initialize the vsk_anchor_t */
 674  674                  vskp = kmem_cache_alloc(vsk_anchor_cache, KM_SLEEP);
 675  675                  bzero(vskp, sizeof (*vskp));
 676  676                  vskp->vsk_fsid = statvfsbuf.f_fsid;
 677  677  
 678  678                  mutex_enter(&vskstat_tree_lock);
 679  679                  if (avl_find(&vskstat_tree, vskp, &where) == NULL) {
 680  680                          avl_insert(&vskstat_tree, vskp, where);
 681  681                          mutex_exit(&vskstat_tree_lock);
 682  682  
 683  683                          /*
 684  684                           * Now that we've got the anchor in the AVL
 685  685                           * tree, we can create the kstat.
 686  686                           */
 687  687                          ksp = new_vskstat(kstatstr, &vfsp->vfs_vopstats);
 688  688                          if (ksp) {
 689  689                                  vskp->vsk_ksp = ksp;
 690  690                          }
 691  691                  } else {
 692  692                          /* Oops, found one! Release memory and lock. */
 693  693                          mutex_exit(&vskstat_tree_lock);
 694  694                          kmem_cache_free(vsk_anchor_cache, vskp);
 695  695                          vskp = NULL;
 696  696                  }
 697  697          }
 698  698          return (vskp);
 699  699  }
 700  700  
 701  701  /*
 702  702   * We're in the process of tearing down the vfs and need to cleanup
 703  703   * the data structures associated with the vopstats. Must only be called
 704  704   * from dounmount().
 705  705   */
 706  706  void
 707  707  teardown_vopstats(vfs_t *vfsp)
 708  708  {
 709  709          vsk_anchor_t    *vskap;
 710  710          avl_index_t     where;
 711  711  
 712  712          if (vfsp == NULL || vfsp->vfs_implp == NULL ||
 713  713              (vfsp->vfs_flag & VFS_STATS) == 0 || !vopstats_enabled)
 714  714                  return;
 715  715  
 716  716          /* This is a safe check since VFS_STATS must be set (see above) */
 717  717          if ((vskap = vfsp->vfs_vskap) == NULL)
 718  718                  return;
 719  719  
 720  720          /* Whack the pointer right away */
 721  721          vfsp->vfs_vskap = NULL;
 722  722  
 723  723          /* Lock the tree, remove the node, and delete the kstat */
 724  724          mutex_enter(&vskstat_tree_lock);
 725  725          if (avl_find(&vskstat_tree, vskap, &where)) {
 726  726                  avl_remove(&vskstat_tree, vskap);
 727  727          }
 728  728  
 729  729          if (vskap->vsk_ksp) {
 730  730                  kstat_delete(vskap->vsk_ksp);
 731  731          }
 732  732          mutex_exit(&vskstat_tree_lock);
 733  733  
 734  734          kmem_cache_free(vsk_anchor_cache, vskap);
 735  735  }
 736  736  
 737  737  /*
 738  738   * Read or write a vnode.  Called from kernel code.
 739  739   */
 740  740  int
 741  741  vn_rdwr(
 742  742          enum uio_rw rw,
 743  743          struct vnode *vp,
 744  744          caddr_t base,
 745  745          ssize_t len,
 746  746          offset_t offset,
 747  747          enum uio_seg seg,
 748  748          int ioflag,
 749  749          rlim64_t ulimit,        /* meaningful only if rw is UIO_WRITE */
 750  750          cred_t *cr,
 751  751          ssize_t *residp)
 752  752  {
 753  753          struct uio uio;
 754  754          struct iovec iov;
 755  755          int error;
 756  756          int in_crit = 0;
 757  757  
 758  758          if (rw == UIO_WRITE && ISROFILE(vp))
 759  759                  return (EROFS);
 760  760  
 761  761          if (len < 0)
 762  762                  return (EIO);
 763  763  
 764  764          VOPXID_MAP_CR(vp, cr);
 765  765  
 766  766          iov.iov_base = base;
 767  767          iov.iov_len = len;
 768  768          uio.uio_iov = &iov;
 769  769          uio.uio_iovcnt = 1;
 770  770          uio.uio_loffset = offset;
 771  771          uio.uio_segflg = (short)seg;
 772  772          uio.uio_resid = len;
 773  773          uio.uio_llimit = ulimit;
 774  774  
 775  775          /*
 776  776           * We have to enter the critical region before calling VOP_RWLOCK
 777  777           * to avoid a deadlock with ufs.
 778  778           */
 779  779          if (nbl_need_check(vp)) {
 780  780                  int svmand;
 781  781  
 782  782                  nbl_start_crit(vp, RW_READER);
 783  783                  in_crit = 1;
 784  784                  error = nbl_svmand(vp, cr, &svmand);
 785  785                  if (error != 0)
 786  786                          goto done;
 787  787                  if (nbl_conflict(vp, rw == UIO_WRITE ? NBL_WRITE : NBL_READ,
 788  788                      uio.uio_offset, uio.uio_resid, svmand, NULL)) {
 789  789                          error = EACCES;
 790  790                          goto done;
 791  791                  }
 792  792          }
 793  793  
 794  794          (void) VOP_RWLOCK(vp,
 795  795              rw == UIO_WRITE ? V_WRITELOCK_TRUE : V_WRITELOCK_FALSE, NULL);
 796  796          if (rw == UIO_WRITE) {
 797  797                  uio.uio_fmode = FWRITE;
 798  798                  uio.uio_extflg = UIO_COPY_DEFAULT;
 799  799                  error = VOP_WRITE(vp, &uio, ioflag, cr, NULL);
 800  800          } else {
 801  801                  uio.uio_fmode = FREAD;
 802  802                  uio.uio_extflg = UIO_COPY_CACHED;
 803  803                  error = VOP_READ(vp, &uio, ioflag, cr, NULL);
 804  804          }
 805  805          VOP_RWUNLOCK(vp,
 806  806              rw == UIO_WRITE ? V_WRITELOCK_TRUE : V_WRITELOCK_FALSE, NULL);
 807  807          if (residp)
 808  808                  *residp = uio.uio_resid;
 809  809          else if (uio.uio_resid)
 810  810                  error = EIO;
 811  811  
 812  812  done:
 813  813          if (in_crit)
 814  814                  nbl_end_crit(vp);
 815  815          return (error);
 816  816  }
 817  817  
 818  818  /*
 819  819   * Release a vnode.  Call VOP_INACTIVE on last reference or
 820  820   * decrement reference count.
 821  821   *
 822  822   * To avoid race conditions, the v_count is left at 1 for
 823  823   * the call to VOP_INACTIVE. This prevents another thread
 824  824   * from reclaiming and releasing the vnode *before* the
 825  825   * VOP_INACTIVE routine has a chance to destroy the vnode.
 826  826   * We can't have more than 1 thread calling VOP_INACTIVE
 827  827   * on a vnode.
 828  828   */
 829  829  void
 830  830  vn_rele(vnode_t *vp)
 831  831  {
 832  832          VERIFY(vp->v_count > 0);
 833  833          mutex_enter(&vp->v_lock);
 834  834          if (vp->v_count == 1) {
 835  835                  mutex_exit(&vp->v_lock);
 836  836                  VOP_INACTIVE(vp, CRED(), NULL);
 837  837                  return;
 838  838          }
 839  839          vp->v_count--;
 840  840          mutex_exit(&vp->v_lock);
 841  841  }
 842  842  
 843  843  /*
 844  844   * Release a vnode referenced by the DNLC. Multiple DNLC references are treated
 845  845   * as a single reference, so v_count is not decremented until the last DNLC hold
 846  846   * is released. This makes it possible to distinguish vnodes that are referenced
 847  847   * only by the DNLC.
 848  848   */
 849  849  void
 850  850  vn_rele_dnlc(vnode_t *vp)
 851  851  {
 852  852          VERIFY((vp->v_count > 0) && (vp->v_count_dnlc > 0));
 853  853          mutex_enter(&vp->v_lock);
 854  854          if (--vp->v_count_dnlc == 0) {
 855  855                  if (vp->v_count == 1) {
 856  856                          mutex_exit(&vp->v_lock);
 857  857                          VOP_INACTIVE(vp, CRED(), NULL);
 858  858                          return;
 859  859                  }
 860  860                  vp->v_count--;
 861  861          }
 862  862          mutex_exit(&vp->v_lock);
 863  863  }
 864  864  
 865  865  /*
 866  866   * Like vn_rele() except that it clears v_stream under v_lock.
 867  867   * This is used by sockfs when it dismantels the association between
 868  868   * the sockfs node and the vnode in the underlaying file system.
 869  869   * v_lock has to be held to prevent a thread coming through the lookupname
 870  870   * path from accessing a stream head that is going away.
 871  871   */
 872  872  void
 873  873  vn_rele_stream(vnode_t *vp)
 874  874  {
 875  875          VERIFY(vp->v_count > 0);
 876  876          mutex_enter(&vp->v_lock);
 877  877          vp->v_stream = NULL;
 878  878          if (vp->v_count == 1) {
 879  879                  mutex_exit(&vp->v_lock);
 880  880                  VOP_INACTIVE(vp, CRED(), NULL);
 881  881                  return;
 882  882          }
 883  883          vp->v_count--;
 884  884          mutex_exit(&vp->v_lock);
 885  885  }
 886  886  
 887  887  static void
 888  888  vn_rele_inactive(vnode_t *vp)
 889  889  {
 890  890          VOP_INACTIVE(vp, CRED(), NULL);
 891  891  }
 892  892  
 893  893  /*
 894  894   * Like vn_rele() except if we are going to call VOP_INACTIVE() then do it
 895  895   * asynchronously using a taskq. This can avoid deadlocks caused by re-entering
 896  896   * the file system as a result of releasing the vnode. Note, file systems
 897  897   * already have to handle the race where the vnode is incremented before the
 898  898   * inactive routine is called and does its locking.
 899  899   *
 900  900   * Warning: Excessive use of this routine can lead to performance problems.
 901  901   * This is because taskqs throttle back allocation if too many are created.
 902  902   */
 903  903  void
 904  904  vn_rele_async(vnode_t *vp, taskq_t *taskq)
 905  905  {
 906  906          VERIFY(vp->v_count > 0);
 907  907          mutex_enter(&vp->v_lock);
 908  908          if (vp->v_count == 1) {
 909  909                  mutex_exit(&vp->v_lock);
 910  910                  VERIFY(taskq_dispatch(taskq, (task_func_t *)vn_rele_inactive,
 911  911                      vp, TQ_SLEEP) != NULL);
 912  912                  return;
 913  913          }
 914  914          vp->v_count--;
 915  915          mutex_exit(&vp->v_lock);
 916  916  }
 917  917  
 918  918  int
 919  919  vn_open(
 920  920          char *pnamep,
 921  921          enum uio_seg seg,
 922  922          int filemode,
 923  923          int createmode,
 924  924          struct vnode **vpp,
 925  925          enum create crwhy,
 926  926          mode_t umask)
 927  927  {
 928  928          return (vn_openat(pnamep, seg, filemode, createmode, vpp, crwhy,
 929  929              umask, NULL, -1));
 930  930  }
 931  931  
 932  932  
 933  933  /*
 934  934   * Open/create a vnode.
 935  935   * This may be callable by the kernel, the only known use
 936  936   * of user context being that the current user credentials
 937  937   * are used for permissions.  crwhy is defined iff filemode & FCREAT.
 938  938   */
 939  939  int
 940  940  vn_openat(
 941  941          char *pnamep,
 942  942          enum uio_seg seg,
 943  943          int filemode,
 944  944          int createmode,
 945  945          struct vnode **vpp,
 946  946          enum create crwhy,
 947  947          mode_t umask,
 948  948          struct vnode *startvp,
 949  949          int fd)
 950  950  {
 951  951          struct vnode *vp;
 952  952          int mode;
 953  953          int accessflags;
 954  954          int error;
 955  955          int in_crit = 0;
 956  956          int open_done = 0;
 957  957          int shrlock_done = 0;
 958  958          struct vattr vattr;
 959  959          enum symfollow follow;
 960  960          int estale_retry = 0;
 961  961          struct shrlock shr;
 962  962          struct shr_locowner shr_own;
 963  963  
 964  964          mode = 0;
 965  965          accessflags = 0;
 966  966          if (filemode & FREAD)
 967  967                  mode |= VREAD;
 968  968          if (filemode & (FWRITE|FTRUNC))
 969  969                  mode |= VWRITE;
 970  970          if (filemode & (FSEARCH|FEXEC|FXATTRDIROPEN))
 971  971                  mode |= VEXEC;
 972  972  
 973  973          /* symlink interpretation */
 974  974          if (filemode & FNOFOLLOW)
 975  975                  follow = NO_FOLLOW;
 976  976          else
 977  977                  follow = FOLLOW;
 978  978  
 979  979          if (filemode & FAPPEND)
 980  980                  accessflags |= V_APPEND;
 981  981  
 982  982  top:
 983  983          if (filemode & FCREAT) {
 984  984                  enum vcexcl excl;
 985  985  
 986  986                  /*
 987  987                   * Wish to create a file.
 988  988                   */
 989  989                  vattr.va_type = VREG;
 990  990                  vattr.va_mode = createmode;
 991  991                  vattr.va_mask = AT_TYPE|AT_MODE;
 992  992                  if (filemode & FTRUNC) {
 993  993                          vattr.va_size = 0;
 994  994                          vattr.va_mask |= AT_SIZE;
 995  995                  }
 996  996                  if (filemode & FEXCL)
 997  997                          excl = EXCL;
 998  998                  else
 999  999                          excl = NONEXCL;
1000 1000  
1001 1001                  if (error =
1002 1002                      vn_createat(pnamep, seg, &vattr, excl, mode, &vp, crwhy,
1003 1003                      (filemode & ~(FTRUNC|FEXCL)), umask, startvp))
1004 1004                          return (error);
1005 1005          } else {
1006 1006                  /*
1007 1007                   * Wish to open a file.  Just look it up.
1008 1008                   */
1009 1009                  if (error = lookupnameat(pnamep, seg, follow,
1010 1010                      NULLVPP, &vp, startvp)) {
1011 1011                          if ((error == ESTALE) &&
1012 1012                              fs_need_estale_retry(estale_retry++))
1013 1013                                  goto top;
1014 1014                          return (error);
1015 1015                  }
1016 1016  
1017 1017                  /*
1018 1018                   * Get the attributes to check whether file is large.
1019 1019                   * We do this only if the FOFFMAX flag is not set and
1020 1020                   * only for regular files.
1021 1021                   */
1022 1022  
1023 1023                  if (!(filemode & FOFFMAX) && (vp->v_type == VREG)) {
1024 1024                          vattr.va_mask = AT_SIZE;
1025 1025                          if ((error = VOP_GETATTR(vp, &vattr, 0,
1026 1026                              CRED(), NULL))) {
1027 1027                                  goto out;
1028 1028                          }
1029 1029                          if (vattr.va_size > (u_offset_t)MAXOFF32_T) {
1030 1030                                  /*
1031 1031                                   * Large File API - regular open fails
1032 1032                                   * if FOFFMAX flag is set in file mode
1033 1033                                   */
1034 1034                                  error = EOVERFLOW;
1035 1035                                  goto out;
1036 1036                          }
1037 1037                  }
1038 1038                  /*
1039 1039                   * Can't write directories, active texts, or
1040 1040                   * read-only filesystems.  Can't truncate files
1041 1041                   * on which mandatory locking is in effect.
1042 1042                   */
1043 1043                  if (filemode & (FWRITE|FTRUNC)) {
1044 1044                          /*
1045 1045                           * Allow writable directory if VDIROPEN flag is set.
1046 1046                           */
1047 1047                          if (vp->v_type == VDIR && !(vp->v_flag & VDIROPEN)) {
1048 1048                                  error = EISDIR;
1049 1049                                  goto out;
1050 1050                          }
1051 1051                          if (ISROFILE(vp)) {
1052 1052                                  error = EROFS;
1053 1053                                  goto out;
1054 1054                          }
1055 1055                          /*
1056 1056                           * Can't truncate files on which
1057 1057                           * sysv mandatory locking is in effect.
1058 1058                           */
1059 1059                          if (filemode & FTRUNC) {
1060 1060                                  vnode_t *rvp;
1061 1061  
1062 1062                                  if (VOP_REALVP(vp, &rvp, NULL) != 0)
1063 1063                                          rvp = vp;
1064 1064                                  if (rvp->v_filocks != NULL) {
1065 1065                                          vattr.va_mask = AT_MODE;
1066 1066                                          if ((error = VOP_GETATTR(vp,
1067 1067                                              &vattr, 0, CRED(), NULL)) == 0 &&
1068 1068                                              MANDLOCK(vp, vattr.va_mode))
1069 1069                                                  error = EAGAIN;
1070 1070                                  }
1071 1071                          }
1072 1072                          if (error)
1073 1073                                  goto out;
1074 1074                  }
1075 1075                  /*
1076 1076                   * Check permissions.
1077 1077                   */
1078 1078                  if (error = VOP_ACCESS(vp, mode, accessflags, CRED(), NULL))
1079 1079                          goto out;
1080 1080                  /*
1081 1081                   * Require FSEARCH to return a directory.
1082 1082                   * Require FEXEC to return a regular file.
1083 1083                   */
1084 1084                  if ((filemode & FSEARCH) && vp->v_type != VDIR) {
1085 1085                          error = ENOTDIR;
1086 1086                          goto out;
1087 1087                  }
1088 1088                  if ((filemode & FEXEC) && vp->v_type != VREG) {
1089 1089                          error = ENOEXEC;        /* XXX: error code? */
1090 1090                          goto out;
1091 1091                  }
1092 1092          }
1093 1093  
1094 1094          /*
1095 1095           * Do remaining checks for FNOFOLLOW and FNOLINKS.
1096 1096           */
1097 1097          if ((filemode & FNOFOLLOW) && vp->v_type == VLNK) {
1098 1098                  error = ELOOP;
1099 1099                  goto out;
1100 1100          }
1101 1101          if (filemode & FNOLINKS) {
1102 1102                  vattr.va_mask = AT_NLINK;
1103 1103                  if ((error = VOP_GETATTR(vp, &vattr, 0, CRED(), NULL))) {
1104 1104                          goto out;
1105 1105                  }
1106 1106                  if (vattr.va_nlink != 1) {
1107 1107                          error = EMLINK;
1108 1108                          goto out;
1109 1109                  }
1110 1110          }
1111 1111  
1112 1112          /*
1113 1113           * Opening a socket corresponding to the AF_UNIX pathname
1114 1114           * in the filesystem name space is not supported.
1115 1115           * However, VSOCK nodes in namefs are supported in order
1116 1116           * to make fattach work for sockets.
1117 1117           *
1118 1118           * XXX This uses VOP_REALVP to distinguish between
1119 1119           * an unopened namefs node (where VOP_REALVP returns a
1120 1120           * different VSOCK vnode) and a VSOCK created by vn_create
1121 1121           * in some file system (where VOP_REALVP would never return
1122 1122           * a different vnode).
1123 1123           */
1124 1124          if (vp->v_type == VSOCK) {
1125 1125                  struct vnode *nvp;
1126 1126  
1127 1127                  error = VOP_REALVP(vp, &nvp, NULL);
1128 1128                  if (error != 0 || nvp == NULL || nvp == vp ||
1129 1129                      nvp->v_type != VSOCK) {
1130 1130                          error = EOPNOTSUPP;
1131 1131                          goto out;
1132 1132                  }
1133 1133          }
1134 1134  
1135 1135          if ((vp->v_type == VREG) && nbl_need_check(vp)) {
1136 1136                  /* get share reservation */
1137 1137                  shr.s_access = 0;
1138 1138                  if (filemode & FWRITE)
1139 1139                          shr.s_access |= F_WRACC;
1140 1140                  if (filemode & FREAD)
1141 1141                          shr.s_access |= F_RDACC;
1142 1142                  shr.s_deny = 0;
1143 1143                  shr.s_sysid = 0;
1144 1144                  shr.s_pid = ttoproc(curthread)->p_pid;
1145 1145                  shr_own.sl_pid = shr.s_pid;
1146 1146                  shr_own.sl_id = fd;
1147 1147                  shr.s_own_len = sizeof (shr_own);
1148 1148                  shr.s_owner = (caddr_t)&shr_own;
1149 1149                  error = VOP_SHRLOCK(vp, F_SHARE_NBMAND, &shr, filemode, CRED(),
1150 1150                      NULL);
1151 1151                  if (error)
1152 1152                          goto out;
1153 1153                  shrlock_done = 1;
1154 1154  
1155 1155                  /* nbmand conflict check if truncating file */
1156 1156                  if ((filemode & FTRUNC) && !(filemode & FCREAT)) {
1157 1157                          nbl_start_crit(vp, RW_READER);
1158 1158                          in_crit = 1;
1159 1159  
1160 1160                          vattr.va_mask = AT_SIZE;
1161 1161                          if (error = VOP_GETATTR(vp, &vattr, 0, CRED(), NULL))
1162 1162                                  goto out;
1163 1163                          if (nbl_conflict(vp, NBL_WRITE, 0, vattr.va_size, 0,
1164 1164                              NULL)) {
1165 1165                                  error = EACCES;
1166 1166                                  goto out;
1167 1167                          }
1168 1168                  }
1169 1169          }
1170 1170  
1171 1171          /*
1172 1172           * Do opening protocol.
1173 1173           */
1174 1174          error = VOP_OPEN(&vp, filemode, CRED(), NULL);
1175 1175          if (error)
1176 1176                  goto out;
1177 1177          open_done = 1;
1178 1178  
1179 1179          /*
1180 1180           * Truncate if required.
1181 1181           */
1182 1182          if ((filemode & FTRUNC) && !(filemode & FCREAT)) {
1183 1183                  vattr.va_size = 0;
1184 1184                  vattr.va_mask = AT_SIZE;
1185 1185                  if ((error = VOP_SETATTR(vp, &vattr, 0, CRED(), NULL)) != 0)
1186 1186                          goto out;
1187 1187          }
1188 1188  out:
1189 1189          ASSERT(vp->v_count > 0);
1190 1190  
1191 1191          if (in_crit) {
1192 1192                  nbl_end_crit(vp);
1193 1193                  in_crit = 0;
1194 1194          }
1195 1195          if (error) {
1196 1196                  if (open_done) {
1197 1197                          (void) VOP_CLOSE(vp, filemode, 1, (offset_t)0, CRED(),
1198 1198                              NULL);
1199 1199                          open_done = 0;
1200 1200                          shrlock_done = 0;
1201 1201                  }
1202 1202                  if (shrlock_done) {
1203 1203                          (void) VOP_SHRLOCK(vp, F_UNSHARE, &shr, 0, CRED(),
1204 1204                              NULL);
1205 1205                          shrlock_done = 0;
1206 1206                  }
1207 1207  
1208 1208                  /*
1209 1209                   * The following clause was added to handle a problem
1210 1210                   * with NFS consistency.  It is possible that a lookup
1211 1211                   * of the file to be opened succeeded, but the file
1212 1212                   * itself doesn't actually exist on the server.  This
1213 1213                   * is chiefly due to the DNLC containing an entry for
1214 1214                   * the file which has been removed on the server.  In
1215 1215                   * this case, we just start over.  If there was some
1216 1216                   * other cause for the ESTALE error, then the lookup
1217 1217                   * of the file will fail and the error will be returned
1218 1218                   * above instead of looping around from here.
1219 1219                   */
1220 1220                  VN_RELE(vp);
1221 1221                  if ((error == ESTALE) && fs_need_estale_retry(estale_retry++))
1222 1222                          goto top;
1223 1223          } else
1224 1224                  *vpp = vp;
1225 1225          return (error);
1226 1226  }
1227 1227  
1228 1228  /*
1229 1229   * The following two accessor functions are for the NFSv4 server.  Since there
1230 1230   * is no VOP_OPEN_UP/DOWNGRADE we need a way for the NFS server to keep the
1231 1231   * vnode open counts correct when a client "upgrades" an open or does an
1232 1232   * open_downgrade.  In NFS, an upgrade or downgrade can not only change the
1233 1233   * open mode (add or subtract read or write), but also change the share/deny
1234 1234   * modes.  However, share reservations are not integrated with OPEN, yet, so
1235 1235   * we need to handle each separately.  These functions are cleaner than having
1236 1236   * the NFS server manipulate the counts directly, however, nobody else should
1237 1237   * use these functions.
1238 1238   */
1239 1239  void
1240 1240  vn_open_upgrade(
1241 1241          vnode_t *vp,
1242 1242          int filemode)
1243 1243  {
1244 1244          ASSERT(vp->v_type == VREG);
1245 1245  
1246 1246          if (filemode & FREAD)
1247 1247                  atomic_inc_32(&vp->v_rdcnt);
1248 1248          if (filemode & FWRITE)
1249 1249                  atomic_inc_32(&vp->v_wrcnt);
1250 1250  
1251 1251  }
1252 1252  
1253 1253  void
1254 1254  vn_open_downgrade(
1255 1255          vnode_t *vp,
1256 1256          int filemode)
1257 1257  {
1258 1258          ASSERT(vp->v_type == VREG);
1259 1259  
1260 1260          if (filemode & FREAD) {
1261 1261                  ASSERT(vp->v_rdcnt > 0);
1262 1262                  atomic_dec_32(&vp->v_rdcnt);
1263 1263          }
1264 1264          if (filemode & FWRITE) {
1265 1265                  ASSERT(vp->v_wrcnt > 0);
1266 1266                  atomic_dec_32(&vp->v_wrcnt);
1267 1267          }
1268 1268  
1269 1269  }
1270 1270  
1271 1271  int
1272 1272  vn_create(
1273 1273          char *pnamep,
1274 1274          enum uio_seg seg,
1275 1275          struct vattr *vap,
1276 1276          enum vcexcl excl,
1277 1277          int mode,
1278 1278          struct vnode **vpp,
1279 1279          enum create why,
1280 1280          int flag,
1281 1281          mode_t umask)
1282 1282  {
1283 1283          return (vn_createat(pnamep, seg, vap, excl, mode, vpp, why, flag,
1284 1284              umask, NULL));
1285 1285  }
1286 1286  
1287 1287  /*
1288 1288   * Create a vnode (makenode).
1289 1289   */
1290 1290  int
1291 1291  vn_createat(
1292 1292          char *pnamep,
1293 1293          enum uio_seg seg,
1294 1294          struct vattr *vap,
1295 1295          enum vcexcl excl,
1296 1296          int mode,
1297 1297          struct vnode **vpp,
1298 1298          enum create why,
1299 1299          int flag,
1300 1300          mode_t umask,
1301 1301          struct vnode *startvp)
1302 1302  {
1303 1303          struct vnode *dvp;      /* ptr to parent dir vnode */
1304 1304          struct vnode *vp = NULL;
1305 1305          struct pathname pn;
1306 1306          int error;
1307 1307          int in_crit = 0;
1308 1308          struct vattr vattr;
1309 1309          enum symfollow follow;
1310 1310          int estale_retry = 0;
1311 1311          uint32_t auditing = AU_AUDITING();
1312 1312  
1313 1313          ASSERT((vap->va_mask & (AT_TYPE|AT_MODE)) == (AT_TYPE|AT_MODE));
1314 1314  
1315 1315          /* symlink interpretation */
1316 1316          if ((flag & FNOFOLLOW) || excl == EXCL)
1317 1317                  follow = NO_FOLLOW;
1318 1318          else
1319 1319                  follow = FOLLOW;
1320 1320          flag &= ~(FNOFOLLOW|FNOLINKS);
1321 1321  
1322 1322  top:
1323 1323          /*
1324 1324           * Lookup directory.
1325 1325           * If new object is a file, call lower level to create it.
1326 1326           * Note that it is up to the lower level to enforce exclusive
1327 1327           * creation, if the file is already there.
1328 1328           * This allows the lower level to do whatever
1329 1329           * locking or protocol that is needed to prevent races.
1330 1330           * If the new object is directory call lower level to make
1331 1331           * the new directory, with "." and "..".
1332 1332           */
1333 1333          if (error = pn_get(pnamep, seg, &pn))
1334 1334                  return (error);
1335 1335          if (auditing)
1336 1336                  audit_vncreate_start();
1337 1337          dvp = NULL;
1338 1338          *vpp = NULL;
1339 1339          /*
1340 1340           * lookup will find the parent directory for the vnode.
1341 1341           * When it is done the pn holds the name of the entry
1342 1342           * in the directory.
1343 1343           * If this is a non-exclusive create we also find the node itself.
1344 1344           */
1345 1345          error = lookuppnat(&pn, NULL, follow, &dvp,
1346 1346              (excl == EXCL) ? NULLVPP : vpp, startvp);
1347 1347          if (error) {
1348 1348                  pn_free(&pn);
1349 1349                  if ((error == ESTALE) && fs_need_estale_retry(estale_retry++))
1350 1350                          goto top;
1351 1351                  if (why == CRMKDIR && error == EINVAL)
1352 1352                          error = EEXIST;         /* SVID */
1353 1353                  return (error);
1354 1354          }
1355 1355  
1356 1356          if (why != CRMKNOD)
1357 1357                  vap->va_mode &= ~VSVTX;
1358 1358  
1359 1359          /*
1360 1360           * If default ACLs are defined for the directory don't apply the
1361 1361           * umask if umask is passed.
1362 1362           */
1363 1363  
1364 1364          if (umask) {
1365 1365  
1366 1366                  vsecattr_t vsec;
1367 1367  
1368 1368                  vsec.vsa_aclcnt = 0;
1369 1369                  vsec.vsa_aclentp = NULL;
1370 1370                  vsec.vsa_dfaclcnt = 0;
1371 1371                  vsec.vsa_dfaclentp = NULL;
1372 1372                  vsec.vsa_mask = VSA_DFACLCNT;
1373 1373                  error = VOP_GETSECATTR(dvp, &vsec, 0, CRED(), NULL);
1374 1374                  /*
1375 1375                   * If error is ENOSYS then treat it as no error
1376 1376                   * Don't want to force all file systems to support
1377 1377                   * aclent_t style of ACL's.
1378 1378                   */
1379 1379                  if (error == ENOSYS)
1380 1380                          error = 0;
1381 1381                  if (error) {
1382 1382                          if (*vpp != NULL)
1383 1383                                  VN_RELE(*vpp);
1384 1384                          goto out;
1385 1385                  } else {
1386 1386                          /*
1387 1387                           * Apply the umask if no default ACLs.
1388 1388                           */
1389 1389                          if (vsec.vsa_dfaclcnt == 0)
1390 1390                                  vap->va_mode &= ~umask;
1391 1391  
1392 1392                          /*
1393 1393                           * VOP_GETSECATTR() may have allocated memory for
1394 1394                           * ACLs we didn't request, so double-check and
1395 1395                           * free it if necessary.
1396 1396                           */
1397 1397                          if (vsec.vsa_aclcnt && vsec.vsa_aclentp != NULL)
1398 1398                                  kmem_free((caddr_t)vsec.vsa_aclentp,
1399 1399                                      vsec.vsa_aclcnt * sizeof (aclent_t));
1400 1400                          if (vsec.vsa_dfaclcnt && vsec.vsa_dfaclentp != NULL)
1401 1401                                  kmem_free((caddr_t)vsec.vsa_dfaclentp,
1402 1402                                      vsec.vsa_dfaclcnt * sizeof (aclent_t));
1403 1403                  }
1404 1404          }
1405 1405  
1406 1406          /*
1407 1407           * In general we want to generate EROFS if the file system is
1408 1408           * readonly.  However, POSIX (IEEE Std. 1003.1) section 5.3.1
1409 1409           * documents the open system call, and it says that O_CREAT has no
1410 1410           * effect if the file already exists.  Bug 1119649 states
1411 1411           * that open(path, O_CREAT, ...) fails when attempting to open an
1412 1412           * existing file on a read only file system.  Thus, the first part
1413 1413           * of the following if statement has 3 checks:
1414 1414           *      if the file exists &&
1415 1415           *              it is being open with write access &&
1416 1416           *              the file system is read only
1417 1417           *      then generate EROFS
1418 1418           */
1419 1419          if ((*vpp != NULL && (mode & VWRITE) && ISROFILE(*vpp)) ||
1420 1420              (*vpp == NULL && dvp->v_vfsp->vfs_flag & VFS_RDONLY)) {
1421 1421                  if (*vpp)
1422 1422                          VN_RELE(*vpp);
1423 1423                  error = EROFS;
1424 1424          } else if (excl == NONEXCL && *vpp != NULL) {
1425 1425                  vnode_t *rvp;
1426 1426  
1427 1427                  /*
1428 1428                   * File already exists.  If a mandatory lock has been
1429 1429                   * applied, return error.
1430 1430                   */
1431 1431                  vp = *vpp;
1432 1432                  if (VOP_REALVP(vp, &rvp, NULL) != 0)
1433 1433                          rvp = vp;
1434 1434                  if ((vap->va_mask & AT_SIZE) && nbl_need_check(vp)) {
1435 1435                          nbl_start_crit(vp, RW_READER);
1436 1436                          in_crit = 1;
1437 1437                  }
1438 1438                  if (rvp->v_filocks != NULL || rvp->v_shrlocks != NULL) {
1439 1439                          vattr.va_mask = AT_MODE|AT_SIZE;
1440 1440                          if (error = VOP_GETATTR(vp, &vattr, 0, CRED(), NULL)) {
1441 1441                                  goto out;
1442 1442                          }
1443 1443                          if (MANDLOCK(vp, vattr.va_mode)) {
1444 1444                                  error = EAGAIN;
1445 1445                                  goto out;
1446 1446                          }
1447 1447                          /*
1448 1448                           * File cannot be truncated if non-blocking mandatory
1449 1449                           * locks are currently on the file.
1450 1450                           */
1451 1451                          if ((vap->va_mask & AT_SIZE) && in_crit) {
1452 1452                                  u_offset_t offset;
1453 1453                                  ssize_t length;
1454 1454  
1455 1455                                  offset = vap->va_size > vattr.va_size ?
1456 1456                                      vattr.va_size : vap->va_size;
1457 1457                                  length = vap->va_size > vattr.va_size ?
1458 1458                                      vap->va_size - vattr.va_size :
1459 1459                                      vattr.va_size - vap->va_size;
1460 1460                                  if (nbl_conflict(vp, NBL_WRITE, offset,
1461 1461                                      length, 0, NULL)) {
1462 1462                                          error = EACCES;
1463 1463                                          goto out;
1464 1464                                  }
1465 1465                          }
1466 1466                  }
1467 1467  
1468 1468                  /*
1469 1469                   * If the file is the root of a VFS, we've crossed a
1470 1470                   * mount point and the "containing" directory that we
1471 1471                   * acquired above (dvp) is irrelevant because it's in
1472 1472                   * a different file system.  We apply VOP_CREATE to the
1473 1473                   * target itself instead of to the containing directory
1474 1474                   * and supply a null path name to indicate (conventionally)
1475 1475                   * the node itself as the "component" of interest.
1476 1476                   *
1477 1477                   * The intercession of the file system is necessary to
1478 1478                   * ensure that the appropriate permission checks are
1479 1479                   * done.
1480 1480                   */
1481 1481                  if (vp->v_flag & VROOT) {
1482 1482                          ASSERT(why != CRMKDIR);
1483 1483                          error = VOP_CREATE(vp, "", vap, excl, mode, vpp,
1484 1484                              CRED(), flag, NULL, NULL);
1485 1485                          /*
1486 1486                           * If the create succeeded, it will have created
1487 1487                           * a new reference to the vnode.  Give up the
1488 1488                           * original reference.  The assertion should not
1489 1489                           * get triggered because NBMAND locks only apply to
1490 1490                           * VREG files.  And if in_crit is non-zero for some
1491 1491                           * reason, detect that here, rather than when we
1492 1492                           * deference a null vp.
1493 1493                           */
1494 1494                          ASSERT(in_crit == 0);
1495 1495                          VN_RELE(vp);
1496 1496                          vp = NULL;
1497 1497                          goto out;
1498 1498                  }
1499 1499  
1500 1500                  /*
1501 1501                   * Large File API - non-large open (FOFFMAX flag not set)
1502 1502                   * of regular file fails if the file size exceeds MAXOFF32_T.
1503 1503                   */
1504 1504                  if (why != CRMKDIR &&
1505 1505                      !(flag & FOFFMAX) &&
1506 1506                      (vp->v_type == VREG)) {
1507 1507                          vattr.va_mask = AT_SIZE;
1508 1508                          if ((error = VOP_GETATTR(vp, &vattr, 0,
1509 1509                              CRED(), NULL))) {
1510 1510                                  goto out;
1511 1511                          }
1512 1512                          if ((vattr.va_size > (u_offset_t)MAXOFF32_T)) {
1513 1513                                  error = EOVERFLOW;
1514 1514                                  goto out;
1515 1515                          }
1516 1516                  }
1517 1517          }
1518 1518  
1519 1519          if (error == 0) {
1520 1520                  /*
1521 1521                   * Call mkdir() if specified, otherwise create().
1522 1522                   */
1523 1523                  int must_be_dir = pn_fixslash(&pn);     /* trailing '/'? */
1524 1524  
1525 1525                  if (why == CRMKDIR)
1526 1526                          /*
1527 1527                           * N.B., if vn_createat() ever requests
1528 1528                           * case-insensitive behavior then it will need
1529 1529                           * to be passed to VOP_MKDIR().  VOP_CREATE()
1530 1530                           * will already get it via "flag"
1531 1531                           */
1532 1532                          error = VOP_MKDIR(dvp, pn.pn_path, vap, vpp, CRED(),
1533 1533                              NULL, 0, NULL);
1534 1534                  else if (!must_be_dir)
1535 1535                          error = VOP_CREATE(dvp, pn.pn_path, vap,
1536 1536                              excl, mode, vpp, CRED(), flag, NULL, NULL);
1537 1537                  else
1538 1538                          error = ENOTDIR;
1539 1539          }
1540 1540  
1541 1541  out:
1542 1542  
1543 1543          if (auditing)
1544 1544                  audit_vncreate_finish(*vpp, error);
1545 1545          if (in_crit) {
1546 1546                  nbl_end_crit(vp);
1547 1547                  in_crit = 0;
1548 1548          }
1549 1549          if (vp != NULL) {
1550 1550                  VN_RELE(vp);
1551 1551                  vp = NULL;
1552 1552          }
1553 1553          pn_free(&pn);
1554 1554          VN_RELE(dvp);
1555 1555          /*
1556 1556           * The following clause was added to handle a problem
1557 1557           * with NFS consistency.  It is possible that a lookup
1558 1558           * of the file to be created succeeded, but the file
1559 1559           * itself doesn't actually exist on the server.  This
1560 1560           * is chiefly due to the DNLC containing an entry for
1561 1561           * the file which has been removed on the server.  In
1562 1562           * this case, we just start over.  If there was some
1563 1563           * other cause for the ESTALE error, then the lookup
1564 1564           * of the file will fail and the error will be returned
1565 1565           * above instead of looping around from here.
1566 1566           */
1567 1567          if ((error == ESTALE) && fs_need_estale_retry(estale_retry++))
1568 1568                  goto top;
1569 1569          return (error);
1570 1570  }
1571 1571  
1572 1572  int
1573 1573  vn_link(char *from, char *to, enum uio_seg seg)
1574 1574  {
1575 1575          return (vn_linkat(NULL, from, NO_FOLLOW, NULL, to, seg));
1576 1576  }
1577 1577  
1578 1578  int
1579 1579  vn_linkat(vnode_t *fstartvp, char *from, enum symfollow follow,
1580 1580      vnode_t *tstartvp, char *to, enum uio_seg seg)
1581 1581  {
1582 1582          struct vnode *fvp;              /* from vnode ptr */
1583 1583          struct vnode *tdvp;             /* to directory vnode ptr */
1584 1584          struct pathname pn;
1585 1585          int error;
1586 1586          struct vattr vattr;
1587 1587          dev_t fsid;
1588 1588          int estale_retry = 0;
1589 1589          uint32_t auditing = AU_AUDITING();
1590 1590  
1591 1591  top:
1592 1592          fvp = tdvp = NULL;
1593 1593          if (error = pn_get(to, seg, &pn))
1594 1594                  return (error);
1595 1595          if (auditing && fstartvp != NULL)
1596 1596                  audit_setfsat_path(1);
1597 1597          if (error = lookupnameat(from, seg, follow, NULLVPP, &fvp, fstartvp))
1598 1598                  goto out;
1599 1599          if (auditing && tstartvp != NULL)
1600 1600                  audit_setfsat_path(3);
1601 1601          if (error = lookuppnat(&pn, NULL, NO_FOLLOW, &tdvp, NULLVPP, tstartvp))
1602 1602                  goto out;
1603 1603          /*
1604 1604           * Make sure both source vnode and target directory vnode are
1605 1605           * in the same vfs and that it is writeable.
1606 1606           */
1607 1607          vattr.va_mask = AT_FSID;
1608 1608          if (error = VOP_GETATTR(fvp, &vattr, 0, CRED(), NULL))
1609 1609                  goto out;
1610 1610          fsid = vattr.va_fsid;
1611 1611          vattr.va_mask = AT_FSID;
1612 1612          if (error = VOP_GETATTR(tdvp, &vattr, 0, CRED(), NULL))
1613 1613                  goto out;
1614 1614          if (fsid != vattr.va_fsid) {
1615 1615                  error = EXDEV;
1616 1616                  goto out;
1617 1617          }
1618 1618          if (tdvp->v_vfsp->vfs_flag & VFS_RDONLY) {
1619 1619                  error = EROFS;
1620 1620                  goto out;
1621 1621          }
1622 1622          /*
1623 1623           * Do the link.
1624 1624           */
1625 1625          (void) pn_fixslash(&pn);
1626 1626          error = VOP_LINK(tdvp, fvp, pn.pn_path, CRED(), NULL, 0);
1627 1627  out:
1628 1628          pn_free(&pn);
1629 1629          if (fvp)
1630 1630                  VN_RELE(fvp);
1631 1631          if (tdvp)
1632 1632                  VN_RELE(tdvp);
1633 1633          if ((error == ESTALE) && fs_need_estale_retry(estale_retry++))
1634 1634                  goto top;
1635 1635          return (error);
1636 1636  }
1637 1637  
1638 1638  int
1639 1639  vn_rename(char *from, char *to, enum uio_seg seg)
1640 1640  {
1641 1641          return (vn_renameat(NULL, from, NULL, to, seg));
1642 1642  }
1643 1643  
1644 1644  int
1645 1645  vn_renameat(vnode_t *fdvp, char *fname, vnode_t *tdvp,
1646 1646                  char *tname, enum uio_seg seg)
1647 1647  {
1648 1648          int error;
1649 1649          struct vattr vattr;
1650 1650          struct pathname fpn;            /* from pathname */
1651 1651          struct pathname tpn;            /* to pathname */
1652 1652          dev_t fsid;
1653 1653          int in_crit_src, in_crit_targ;
1654 1654          vnode_t *fromvp, *fvp;
1655 1655          vnode_t *tovp, *targvp;
1656 1656          int estale_retry = 0;
1657 1657          uint32_t auditing = AU_AUDITING();
1658 1658  
1659 1659  top:
1660 1660          fvp = fromvp = tovp = targvp = NULL;
1661 1661          in_crit_src = in_crit_targ = 0;
1662 1662          /*
1663 1663           * Get to and from pathnames.
1664 1664           */
1665 1665          if (error = pn_get(fname, seg, &fpn))
1666 1666                  return (error);
1667 1667          if (error = pn_get(tname, seg, &tpn)) {
1668 1668                  pn_free(&fpn);
1669 1669                  return (error);
1670 1670          }
1671 1671  
1672 1672          /*
1673 1673           * First we need to resolve the correct directories
1674 1674           * The passed in directories may only be a starting point,
1675 1675           * but we need the real directories the file(s) live in.
1676 1676           * For example the fname may be something like usr/lib/sparc
1677 1677           * and we were passed in the / directory, but we need to
1678 1678           * use the lib directory for the rename.
1679 1679           */
1680 1680  
1681 1681          if (auditing && fdvp != NULL)
1682 1682                  audit_setfsat_path(1);
1683 1683          /*
1684 1684           * Lookup to and from directories.
1685 1685           */
1686 1686          if (error = lookuppnat(&fpn, NULL, NO_FOLLOW, &fromvp, &fvp, fdvp)) {
1687 1687                  goto out;
1688 1688          }
1689 1689  
1690 1690          /*
1691 1691           * Make sure there is an entry.
1692 1692           */
1693 1693          if (fvp == NULL) {
1694 1694                  error = ENOENT;
1695 1695                  goto out;
1696 1696          }
1697 1697  
1698 1698          if (auditing && tdvp != NULL)
1699 1699                  audit_setfsat_path(3);
1700 1700          if (error = lookuppnat(&tpn, NULL, NO_FOLLOW, &tovp, &targvp, tdvp)) {
1701 1701                  goto out;
1702 1702          }
1703 1703  
1704 1704          /*
1705 1705           * Make sure both the from vnode directory and the to directory
1706 1706           * are in the same vfs and the to directory is writable.
1707 1707           * We check fsid's, not vfs pointers, so loopback fs works.
1708 1708           */
1709 1709          if (fromvp != tovp) {
1710 1710                  vattr.va_mask = AT_FSID;
1711 1711                  if (error = VOP_GETATTR(fromvp, &vattr, 0, CRED(), NULL))
1712 1712                          goto out;
1713 1713                  fsid = vattr.va_fsid;
1714 1714                  vattr.va_mask = AT_FSID;
1715 1715                  if (error = VOP_GETATTR(tovp, &vattr, 0, CRED(), NULL))
1716 1716                          goto out;
1717 1717                  if (fsid != vattr.va_fsid) {
1718 1718                          error = EXDEV;
1719 1719                          goto out;
1720 1720                  }
1721 1721          }
1722 1722  
1723 1723          if (tovp->v_vfsp->vfs_flag & VFS_RDONLY) {
1724 1724                  error = EROFS;
1725 1725                  goto out;
1726 1726          }
1727 1727  
1728 1728          if (targvp && (fvp != targvp)) {
1729 1729                  nbl_start_crit(targvp, RW_READER);
1730 1730                  in_crit_targ = 1;
1731 1731                  if (nbl_conflict(targvp, NBL_REMOVE, 0, 0, 0, NULL)) {
1732 1732                          error = EACCES;
1733 1733                          goto out;
1734 1734                  }
1735 1735          }
1736 1736  
1737 1737          if (nbl_need_check(fvp)) {
1738 1738                  nbl_start_crit(fvp, RW_READER);
1739 1739                  in_crit_src = 1;
1740 1740                  if (nbl_conflict(fvp, NBL_RENAME, 0, 0, 0, NULL)) {
1741 1741                          error = EACCES;
1742 1742                          goto out;
1743 1743                  }
1744 1744          }
1745 1745  
1746 1746          /*
1747 1747           * Do the rename.
1748 1748           */
1749 1749          (void) pn_fixslash(&tpn);
1750 1750          error = VOP_RENAME(fromvp, fpn.pn_path, tovp, tpn.pn_path, CRED(),
1751 1751              NULL, 0);
1752 1752  
1753 1753  out:
1754 1754          pn_free(&fpn);
1755 1755          pn_free(&tpn);
1756 1756          if (in_crit_src)
1757 1757                  nbl_end_crit(fvp);
1758 1758          if (in_crit_targ)
1759 1759                  nbl_end_crit(targvp);
1760 1760          if (fromvp)
1761 1761                  VN_RELE(fromvp);
1762 1762          if (tovp)
1763 1763                  VN_RELE(tovp);
1764 1764          if (targvp)
1765 1765                  VN_RELE(targvp);
1766 1766          if (fvp)
1767 1767                  VN_RELE(fvp);
1768 1768          if ((error == ESTALE) && fs_need_estale_retry(estale_retry++))
1769 1769                  goto top;
1770 1770          return (error);
1771 1771  }
1772 1772  
1773 1773  /*
1774 1774   * Remove a file or directory.
1775 1775   */
1776 1776  int
1777 1777  vn_remove(char *fnamep, enum uio_seg seg, enum rm dirflag)
1778 1778  {
1779 1779          return (vn_removeat(NULL, fnamep, seg, dirflag));
1780 1780  }
1781 1781  
1782 1782  int
1783 1783  vn_removeat(vnode_t *startvp, char *fnamep, enum uio_seg seg, enum rm dirflag)
1784 1784  {
1785 1785          struct vnode *vp;               /* entry vnode */
1786 1786          struct vnode *dvp;              /* ptr to parent dir vnode */
1787 1787          struct vnode *coveredvp;
1788 1788          struct pathname pn;             /* name of entry */
1789 1789          enum vtype vtype;
1790 1790          int error;
1791 1791          struct vfs *vfsp;
1792 1792          struct vfs *dvfsp;      /* ptr to parent dir vfs */
1793 1793          int in_crit = 0;
1794 1794          int estale_retry = 0;
1795 1795  
1796 1796  top:
1797 1797          if (error = pn_get(fnamep, seg, &pn))
1798 1798                  return (error);
1799 1799          dvp = vp = NULL;
1800 1800          if (error = lookuppnat(&pn, NULL, NO_FOLLOW, &dvp, &vp, startvp)) {
1801 1801                  pn_free(&pn);
1802 1802                  if ((error == ESTALE) && fs_need_estale_retry(estale_retry++))
1803 1803                          goto top;
1804 1804                  return (error);
1805 1805          }
1806 1806  
1807 1807          /*
1808 1808           * Make sure there is an entry.
1809 1809           */
1810 1810          if (vp == NULL) {
1811 1811                  error = ENOENT;
1812 1812                  goto out;
1813 1813          }
1814 1814  
1815 1815          vfsp = vp->v_vfsp;
1816 1816          dvfsp = dvp->v_vfsp;
1817 1817  
1818 1818          /*
1819 1819           * If the named file is the root of a mounted filesystem, fail,
1820 1820           * unless it's marked unlinkable.  In that case, unmount the
1821 1821           * filesystem and proceed to unlink the covered vnode.  (If the
1822 1822           * covered vnode is a directory, use rmdir instead of unlink,
1823 1823           * to avoid file system corruption.)
1824 1824           */
1825 1825          if (vp->v_flag & VROOT) {
1826 1826                  if ((vfsp->vfs_flag & VFS_UNLINKABLE) == 0) {
1827 1827                          error = EBUSY;
1828 1828                          goto out;
1829 1829                  }
1830 1830  
1831 1831                  /*
1832 1832                   * Namefs specific code starts here.
1833 1833                   */
1834 1834  
1835 1835                  if (dirflag == RMDIRECTORY) {
1836 1836                          /*
1837 1837                           * User called rmdir(2) on a file that has
1838 1838                           * been namefs mounted on top of.  Since
1839 1839                           * namefs doesn't allow directories to
1840 1840                           * be mounted on other files we know
1841 1841                           * vp is not of type VDIR so fail to operation.
1842 1842                           */
1843 1843                          error = ENOTDIR;
1844 1844                          goto out;
1845 1845                  }
1846 1846  
1847 1847                  /*
1848 1848                   * If VROOT is still set after grabbing vp->v_lock,
1849 1849                   * noone has finished nm_unmount so far and coveredvp
1850 1850                   * is valid.
1851 1851                   * If we manage to grab vn_vfswlock(coveredvp) before releasing
1852 1852                   * vp->v_lock, any race window is eliminated.
1853 1853                   */
1854 1854  
1855 1855                  mutex_enter(&vp->v_lock);
1856 1856                  if ((vp->v_flag & VROOT) == 0) {
1857 1857                          /* Someone beat us to the unmount */
1858 1858                          mutex_exit(&vp->v_lock);
1859 1859                          error = EBUSY;
1860 1860                          goto out;
1861 1861                  }
1862 1862                  vfsp = vp->v_vfsp;
1863 1863                  coveredvp = vfsp->vfs_vnodecovered;
1864 1864                  ASSERT(coveredvp);
1865 1865                  /*
1866 1866                   * Note: Implementation of vn_vfswlock shows that ordering of
1867 1867                   * v_lock / vn_vfswlock is not an issue here.
1868 1868                   */
1869 1869                  error = vn_vfswlock(coveredvp);
1870 1870                  mutex_exit(&vp->v_lock);
1871 1871  
1872 1872                  if (error)
1873 1873                          goto out;
1874 1874  
1875 1875                  VN_HOLD(coveredvp);
1876 1876                  VN_RELE(vp);
1877 1877                  error = dounmount(vfsp, 0, CRED());
1878 1878  
1879 1879                  /*
1880 1880                   * Unmounted the namefs file system; now get
1881 1881                   * the object it was mounted over.
1882 1882                   */
1883 1883                  vp = coveredvp;
1884 1884                  /*
1885 1885                   * If namefs was mounted over a directory, then
1886 1886                   * we want to use rmdir() instead of unlink().
1887 1887                   */
1888 1888                  if (vp->v_type == VDIR)
1889 1889                          dirflag = RMDIRECTORY;
1890 1890  
1891 1891                  if (error)
1892 1892                          goto out;
1893 1893          }
1894 1894  
1895 1895          /*
1896 1896           * Make sure filesystem is writeable.
1897 1897           * We check the parent directory's vfs in case this is an lofs vnode.
1898 1898           */
1899 1899          if (dvfsp && dvfsp->vfs_flag & VFS_RDONLY) {
1900 1900                  error = EROFS;
1901 1901                  goto out;
1902 1902          }
1903 1903  
1904 1904          vtype = vp->v_type;
1905 1905  
1906 1906          /*
1907 1907           * If there is the possibility of an nbmand share reservation, make
1908 1908           * sure it's okay to remove the file.  Keep a reference to the
1909 1909           * vnode, so that we can exit the nbl critical region after
1910 1910           * calling VOP_REMOVE.
1911 1911           * If there is no possibility of an nbmand share reservation,
1912 1912           * release the vnode reference now.  Filesystems like NFS may
1913 1913           * behave differently if there is an extra reference, so get rid of
1914 1914           * this one.  Fortunately, we can't have nbmand mounts on NFS
1915 1915           * filesystems.
1916 1916           */
1917 1917          if (nbl_need_check(vp)) {
1918 1918                  nbl_start_crit(vp, RW_READER);
1919 1919                  in_crit = 1;
1920 1920                  if (nbl_conflict(vp, NBL_REMOVE, 0, 0, 0, NULL)) {
1921 1921                          error = EACCES;
1922 1922                          goto out;
1923 1923                  }
1924 1924          } else {
1925 1925                  VN_RELE(vp);
1926 1926                  vp = NULL;
1927 1927          }
1928 1928  
1929 1929          if (dirflag == RMDIRECTORY) {
1930 1930                  /*
1931 1931                   * Caller is using rmdir(2), which can only be applied to
1932 1932                   * directories.
1933 1933                   */
1934 1934                  if (vtype != VDIR) {
1935 1935                          error = ENOTDIR;
1936 1936                  } else {
1937 1937                          vnode_t *cwd;
1938 1938                          proc_t *pp = curproc;
1939 1939  
1940 1940                          mutex_enter(&pp->p_lock);
1941 1941                          cwd = PTOU(pp)->u_cdir;
1942 1942                          VN_HOLD(cwd);
1943 1943                          mutex_exit(&pp->p_lock);
1944 1944                          error = VOP_RMDIR(dvp, pn.pn_path, cwd, CRED(),
1945 1945                              NULL, 0);
1946 1946                          VN_RELE(cwd);
1947 1947                  }
1948 1948          } else {
1949 1949                  /*
1950 1950                   * Unlink(2) can be applied to anything.
1951 1951                   */
1952 1952                  error = VOP_REMOVE(dvp, pn.pn_path, CRED(), NULL, 0);
1953 1953          }
1954 1954  
1955 1955  out:
1956 1956          pn_free(&pn);
1957 1957          if (in_crit) {
1958 1958                  nbl_end_crit(vp);
1959 1959                  in_crit = 0;
1960 1960          }
1961 1961          if (vp != NULL)
1962 1962                  VN_RELE(vp);
1963 1963          if (dvp != NULL)
1964 1964                  VN_RELE(dvp);
1965 1965          if ((error == ESTALE) && fs_need_estale_retry(estale_retry++))
1966 1966                  goto top;
1967 1967          return (error);
1968 1968  }
1969 1969  
1970 1970  /*
1971 1971   * Utility function to compare equality of vnodes.
1972 1972   * Compare the underlying real vnodes, if there are underlying vnodes.
1973 1973   * This is a more thorough comparison than the VN_CMP() macro provides.
1974 1974   */
1975 1975  int
1976 1976  vn_compare(vnode_t *vp1, vnode_t *vp2)
1977 1977  {
1978 1978          vnode_t *realvp;
1979 1979  
1980 1980          if (vp1 != NULL && VOP_REALVP(vp1, &realvp, NULL) == 0)
1981 1981                  vp1 = realvp;
1982 1982          if (vp2 != NULL && VOP_REALVP(vp2, &realvp, NULL) == 0)
1983 1983                  vp2 = realvp;
1984 1984          return (VN_CMP(vp1, vp2));
1985 1985  }
1986 1986  
1987 1987  /*
1988 1988   * The number of locks to hash into.  This value must be a power
1989 1989   * of 2 minus 1 and should probably also be prime.
1990 1990   */
1991 1991  #define NUM_BUCKETS     1023
1992 1992  
1993 1993  struct  vn_vfslocks_bucket {
1994 1994          kmutex_t vb_lock;
1995 1995          vn_vfslocks_entry_t *vb_list;
1996 1996          char pad[64 - sizeof (kmutex_t) - sizeof (void *)];
1997 1997  };
1998 1998  
1999 1999  /*
2000 2000   * Total number of buckets will be NUM_BUCKETS + 1 .
2001 2001   */
2002 2002  
2003 2003  #pragma align   64(vn_vfslocks_buckets)
2004 2004  static  struct vn_vfslocks_bucket       vn_vfslocks_buckets[NUM_BUCKETS + 1];
2005 2005  
2006 2006  #define VN_VFSLOCKS_SHIFT       9
2007 2007  
2008 2008  #define VN_VFSLOCKS_HASH(vfsvpptr)      \
2009 2009          ((((intptr_t)(vfsvpptr)) >> VN_VFSLOCKS_SHIFT) & NUM_BUCKETS)
2010 2010  
2011 2011  /*
2012 2012   * vn_vfslocks_getlock() uses an HASH scheme to generate
2013 2013   * rwstlock using vfs/vnode pointer passed to it.
2014 2014   *
2015 2015   * vn_vfslocks_rele() releases a reference in the
2016 2016   * HASH table which allows the entry allocated by
2017 2017   * vn_vfslocks_getlock() to be freed at a later
2018 2018   * stage when the refcount drops to zero.
2019 2019   */
2020 2020  
2021 2021  vn_vfslocks_entry_t *
2022 2022  vn_vfslocks_getlock(void *vfsvpptr)
2023 2023  {
2024 2024          struct vn_vfslocks_bucket *bp;
2025 2025          vn_vfslocks_entry_t *vep;
2026 2026          vn_vfslocks_entry_t *tvep;
2027 2027  
2028 2028          ASSERT(vfsvpptr != NULL);
2029 2029          bp = &vn_vfslocks_buckets[VN_VFSLOCKS_HASH(vfsvpptr)];
2030 2030  
2031 2031          mutex_enter(&bp->vb_lock);
2032 2032          for (vep = bp->vb_list; vep != NULL; vep = vep->ve_next) {
2033 2033                  if (vep->ve_vpvfs == vfsvpptr) {
2034 2034                          vep->ve_refcnt++;
2035 2035                          mutex_exit(&bp->vb_lock);
2036 2036                          return (vep);
2037 2037                  }
2038 2038          }
2039 2039          mutex_exit(&bp->vb_lock);
2040 2040          vep = kmem_alloc(sizeof (*vep), KM_SLEEP);
2041 2041          rwst_init(&vep->ve_lock, NULL, RW_DEFAULT, NULL);
2042 2042          vep->ve_vpvfs = (char *)vfsvpptr;
2043 2043          vep->ve_refcnt = 1;
2044 2044          mutex_enter(&bp->vb_lock);
2045 2045          for (tvep = bp->vb_list; tvep != NULL; tvep = tvep->ve_next) {
2046 2046                  if (tvep->ve_vpvfs == vfsvpptr) {
2047 2047                          tvep->ve_refcnt++;
2048 2048                          mutex_exit(&bp->vb_lock);
2049 2049  
2050 2050                          /*
2051 2051                           * There is already an entry in the hash
2052 2052                           * destroy what we just allocated.
2053 2053                           */
2054 2054                          rwst_destroy(&vep->ve_lock);
2055 2055                          kmem_free(vep, sizeof (*vep));
2056 2056                          return (tvep);
2057 2057                  }
2058 2058          }
2059 2059          vep->ve_next = bp->vb_list;
2060 2060          bp->vb_list = vep;
2061 2061          mutex_exit(&bp->vb_lock);
2062 2062          return (vep);
2063 2063  }
2064 2064  
2065 2065  void
2066 2066  vn_vfslocks_rele(vn_vfslocks_entry_t *vepent)
2067 2067  {
2068 2068          struct vn_vfslocks_bucket *bp;
2069 2069          vn_vfslocks_entry_t *vep;
2070 2070          vn_vfslocks_entry_t *pvep;
2071 2071  
2072 2072          ASSERT(vepent != NULL);
2073 2073          ASSERT(vepent->ve_vpvfs != NULL);
2074 2074  
2075 2075          bp = &vn_vfslocks_buckets[VN_VFSLOCKS_HASH(vepent->ve_vpvfs)];
2076 2076  
2077 2077          mutex_enter(&bp->vb_lock);
2078 2078          vepent->ve_refcnt--;
2079 2079  
2080 2080          if ((int32_t)vepent->ve_refcnt < 0)
2081 2081                  cmn_err(CE_PANIC, "vn_vfslocks_rele: refcount negative");
2082 2082  
2083 2083          if (vepent->ve_refcnt == 0) {
2084 2084                  for (vep = bp->vb_list; vep != NULL; vep = vep->ve_next) {
2085 2085                          if (vep->ve_vpvfs == vepent->ve_vpvfs) {
2086 2086                                  if (bp->vb_list == vep)
2087 2087                                          bp->vb_list = vep->ve_next;
2088 2088                                  else {
2089 2089                                          /* LINTED */
2090 2090                                          pvep->ve_next = vep->ve_next;
2091 2091                                  }
2092 2092                                  mutex_exit(&bp->vb_lock);
2093 2093                                  rwst_destroy(&vep->ve_lock);
2094 2094                                  kmem_free(vep, sizeof (*vep));
2095 2095                                  return;
2096 2096                          }
2097 2097                          pvep = vep;
2098 2098                  }
2099 2099                  cmn_err(CE_PANIC, "vn_vfslocks_rele: vp/vfs not found");
2100 2100          }
2101 2101          mutex_exit(&bp->vb_lock);
2102 2102  }
2103 2103  
2104 2104  /*
2105 2105   * vn_vfswlock_wait is used to implement a lock which is logically a writers
2106 2106   * lock protecting the v_vfsmountedhere field.
2107 2107   * vn_vfswlock_wait has been modified to be similar to vn_vfswlock,
2108 2108   * except that it blocks to acquire the lock VVFSLOCK.
2109 2109   *
2110 2110   * traverse() and routines re-implementing part of traverse (e.g. autofs)
2111 2111   * need to hold this lock. mount(), vn_rename(), vn_remove() and so on
2112 2112   * need the non-blocking version of the writers lock i.e. vn_vfswlock
2113 2113   */
2114 2114  int
2115 2115  vn_vfswlock_wait(vnode_t *vp)
2116 2116  {
2117 2117          int retval;
2118 2118          vn_vfslocks_entry_t *vpvfsentry;
2119 2119          ASSERT(vp != NULL);
2120 2120  
2121 2121          vpvfsentry = vn_vfslocks_getlock(vp);
2122 2122          retval = rwst_enter_sig(&vpvfsentry->ve_lock, RW_WRITER);
2123 2123  
2124 2124          if (retval == EINTR) {
2125 2125                  vn_vfslocks_rele(vpvfsentry);
2126 2126                  return (EINTR);
2127 2127          }
2128 2128          return (retval);
2129 2129  }
2130 2130  
2131 2131  int
2132 2132  vn_vfsrlock_wait(vnode_t *vp)
2133 2133  {
2134 2134          int retval;
2135 2135          vn_vfslocks_entry_t *vpvfsentry;
2136 2136          ASSERT(vp != NULL);
2137 2137  
2138 2138          vpvfsentry = vn_vfslocks_getlock(vp);
2139 2139          retval = rwst_enter_sig(&vpvfsentry->ve_lock, RW_READER);
2140 2140  
2141 2141          if (retval == EINTR) {
2142 2142                  vn_vfslocks_rele(vpvfsentry);
2143 2143                  return (EINTR);
2144 2144          }
2145 2145  
2146 2146          return (retval);
2147 2147  }
2148 2148  
2149 2149  
2150 2150  /*
2151 2151   * vn_vfswlock is used to implement a lock which is logically a writers lock
2152 2152   * protecting the v_vfsmountedhere field.
2153 2153   */
2154 2154  int
2155 2155  vn_vfswlock(vnode_t *vp)
2156 2156  {
2157 2157          vn_vfslocks_entry_t *vpvfsentry;
2158 2158  
2159 2159          /*
2160 2160           * If vp is NULL then somebody is trying to lock the covered vnode
2161 2161           * of /.  (vfs_vnodecovered is NULL for /).  This situation will
2162 2162           * only happen when unmounting /.  Since that operation will fail
2163 2163           * anyway, return EBUSY here instead of in VFS_UNMOUNT.
2164 2164           */
2165 2165          if (vp == NULL)
2166 2166                  return (EBUSY);
2167 2167  
2168 2168          vpvfsentry = vn_vfslocks_getlock(vp);
2169 2169  
2170 2170          if (rwst_tryenter(&vpvfsentry->ve_lock, RW_WRITER))
2171 2171                  return (0);
2172 2172  
2173 2173          vn_vfslocks_rele(vpvfsentry);
2174 2174          return (EBUSY);
2175 2175  }
2176 2176  
2177 2177  int
2178 2178  vn_vfsrlock(vnode_t *vp)
2179 2179  {
2180 2180          vn_vfslocks_entry_t *vpvfsentry;
2181 2181  
2182 2182          /*
2183 2183           * If vp is NULL then somebody is trying to lock the covered vnode
2184 2184           * of /.  (vfs_vnodecovered is NULL for /).  This situation will
2185 2185           * only happen when unmounting /.  Since that operation will fail
2186 2186           * anyway, return EBUSY here instead of in VFS_UNMOUNT.
2187 2187           */
2188 2188          if (vp == NULL)
2189 2189                  return (EBUSY);
2190 2190  
2191 2191          vpvfsentry = vn_vfslocks_getlock(vp);
2192 2192  
2193 2193          if (rwst_tryenter(&vpvfsentry->ve_lock, RW_READER))
2194 2194                  return (0);
2195 2195  
2196 2196          vn_vfslocks_rele(vpvfsentry);
2197 2197          return (EBUSY);
2198 2198  }
2199 2199  
2200 2200  void
2201 2201  vn_vfsunlock(vnode_t *vp)
2202 2202  {
2203 2203          vn_vfslocks_entry_t *vpvfsentry;
2204 2204  
2205 2205          /*
2206 2206           * ve_refcnt needs to be decremented twice.
2207 2207           * 1. To release refernce after a call to vn_vfslocks_getlock()
2208 2208           * 2. To release the reference from the locking routines like
2209 2209           *    vn_vfsrlock/vn_vfswlock etc,.
2210 2210           */
2211 2211          vpvfsentry = vn_vfslocks_getlock(vp);
2212 2212          vn_vfslocks_rele(vpvfsentry);
2213 2213  
2214 2214          rwst_exit(&vpvfsentry->ve_lock);
2215 2215          vn_vfslocks_rele(vpvfsentry);
2216 2216  }
2217 2217  
2218 2218  int
2219 2219  vn_vfswlock_held(vnode_t *vp)
2220 2220  {
2221 2221          int held;
2222 2222          vn_vfslocks_entry_t *vpvfsentry;
2223 2223  
2224 2224          ASSERT(vp != NULL);
2225 2225  
2226 2226          vpvfsentry = vn_vfslocks_getlock(vp);
2227 2227          held = rwst_lock_held(&vpvfsentry->ve_lock, RW_WRITER);
2228 2228  
2229 2229          vn_vfslocks_rele(vpvfsentry);
2230 2230          return (held);
2231 2231  }
2232 2232  
2233 2233  
2234 2234  int
2235 2235  vn_make_ops(
2236 2236          const char *name,                       /* Name of file system */
2237 2237          const fs_operation_def_t *templ,        /* Operation specification */
2238 2238          vnodeops_t **actual)                    /* Return the vnodeops */
2239 2239  {
2240 2240          int unused_ops;
2241 2241          int error;
2242 2242  
2243 2243          *actual = (vnodeops_t *)kmem_alloc(sizeof (vnodeops_t), KM_SLEEP);
2244 2244  
2245 2245          (*actual)->vnop_name = name;
2246 2246  
2247 2247          error = fs_build_vector(*actual, &unused_ops, vn_ops_table, templ);
2248 2248          if (error) {
2249 2249                  kmem_free(*actual, sizeof (vnodeops_t));
2250 2250          }
2251 2251  
2252 2252  #if DEBUG
2253 2253          if (unused_ops != 0)
2254 2254                  cmn_err(CE_WARN, "vn_make_ops: %s: %d operations supplied "
2255 2255                      "but not used", name, unused_ops);
2256 2256  #endif
2257 2257  
2258 2258          return (error);
2259 2259  }
2260 2260  
2261 2261  /*
2262 2262   * Free the vnodeops created as a result of vn_make_ops()
2263 2263   */
2264 2264  void
2265 2265  vn_freevnodeops(vnodeops_t *vnops)
2266 2266  {
2267 2267          kmem_free(vnops, sizeof (vnodeops_t));
2268 2268  }
2269 2269  
2270 2270  /*
2271 2271   * Vnode cache.
2272 2272   */
2273 2273  
2274 2274  /* ARGSUSED */
2275 2275  static int
2276 2276  vn_cache_constructor(void *buf, void *cdrarg, int kmflags)
2277 2277  {
2278 2278          struct vnode *vp;
2279 2279  
2280 2280          vp = buf;
2281 2281  
2282 2282          mutex_init(&vp->v_lock, NULL, MUTEX_DEFAULT, NULL);
2283 2283          mutex_init(&vp->v_vsd_lock, NULL, MUTEX_DEFAULT, NULL);
2284 2284          cv_init(&vp->v_cv, NULL, CV_DEFAULT, NULL);
2285 2285          rw_init(&vp->v_nbllock, NULL, RW_DEFAULT, NULL);
2286 2286          vp->v_femhead = NULL;   /* Must be done before vn_reinit() */
2287 2287          vp->v_path = NULL;
2288 2288          vp->v_mpssdata = NULL;
2289 2289          vp->v_vsd = NULL;
2290 2290          vp->v_fopdata = NULL;
2291 2291  
2292 2292          return (0);
2293 2293  }
2294 2294  
2295 2295  /* ARGSUSED */
2296 2296  static void
2297 2297  vn_cache_destructor(void *buf, void *cdrarg)
2298 2298  {
2299 2299          struct vnode *vp;
2300 2300  
2301 2301          vp = buf;
2302 2302  
2303 2303          rw_destroy(&vp->v_nbllock);
2304 2304          cv_destroy(&vp->v_cv);
2305 2305          mutex_destroy(&vp->v_vsd_lock);
2306 2306          mutex_destroy(&vp->v_lock);
2307 2307  }
2308 2308  
2309 2309  void
2310 2310  vn_create_cache(void)
2311 2311  {
2312 2312          /* LINTED */
2313 2313          ASSERT((1 << VNODE_ALIGN_LOG2) ==
2314 2314              P2ROUNDUP(sizeof (struct vnode), VNODE_ALIGN));
2315 2315          vn_cache = kmem_cache_create("vn_cache", sizeof (struct vnode),
2316 2316              VNODE_ALIGN, vn_cache_constructor, vn_cache_destructor, NULL, NULL,
2317 2317              NULL, 0);
2318 2318  }
2319 2319  
2320 2320  void
2321 2321  vn_destroy_cache(void)
2322 2322  {
2323 2323          kmem_cache_destroy(vn_cache);
2324 2324  }
2325 2325  
2326 2326  /*
2327 2327   * Used by file systems when fs-specific nodes (e.g., ufs inodes) are
2328 2328   * cached by the file system and vnodes remain associated.
2329 2329   */
2330 2330  void
2331 2331  vn_recycle(vnode_t *vp)
2332 2332  {
2333 2333          ASSERT(vp->v_pages == NULL);
2334 2334  
2335 2335          /*
2336 2336           * XXX - This really belongs in vn_reinit(), but we have some issues
2337 2337           * with the counts.  Best to have it here for clean initialization.
2338 2338           */
2339 2339          vp->v_rdcnt = 0;
2340 2340          vp->v_wrcnt = 0;
2341 2341          vp->v_mmap_read = 0;
2342 2342          vp->v_mmap_write = 0;
2343 2343  
2344 2344          /*
2345 2345           * If FEM was in use, make sure everything gets cleaned up
2346 2346           * NOTE: vp->v_femhead is initialized to NULL in the vnode
2347 2347           * constructor.
2348 2348           */
2349 2349          if (vp->v_femhead) {
2350 2350                  /* XXX - There should be a free_femhead() that does all this */
2351 2351                  ASSERT(vp->v_femhead->femh_list == NULL);
2352 2352                  mutex_destroy(&vp->v_femhead->femh_lock);
2353 2353                  kmem_free(vp->v_femhead, sizeof (*(vp->v_femhead)));
2354 2354                  vp->v_femhead = NULL;
2355 2355          }
2356 2356          if (vp->v_path) {
2357 2357                  kmem_free(vp->v_path, strlen(vp->v_path) + 1);
2358 2358                  vp->v_path = NULL;
2359 2359          }
2360 2360  
2361 2361          if (vp->v_fopdata != NULL) {
2362 2362                  free_fopdata(vp);
2363 2363          }
2364 2364          vp->v_mpssdata = NULL;
2365 2365          vsd_free(vp);
2366 2366  }
2367 2367  
2368 2368  /*
2369 2369   * Used to reset the vnode fields including those that are directly accessible
2370 2370   * as well as those which require an accessor function.
2371 2371   *
2372 2372   * Does not initialize:
2373 2373   *      synchronization objects: v_lock, v_vsd_lock, v_nbllock, v_cv
2374 2374   *      v_data (since FS-nodes and vnodes point to each other and should
2375 2375   *              be updated simultaneously)
2376 2376   *      v_op (in case someone needs to make a VOP call on this object)
2377 2377   */
2378 2378  void
2379 2379  vn_reinit(vnode_t *vp)
2380 2380  {
2381 2381          vp->v_count = 1;
2382 2382          vp->v_count_dnlc = 0;
2383 2383          vp->v_vfsp = NULL;
2384 2384          vp->v_stream = NULL;
2385 2385          vp->v_vfsmountedhere = NULL;
2386 2386          vp->v_flag = 0;
2387 2387          vp->v_type = VNON;
2388 2388          vp->v_rdev = NODEV;
2389 2389  
2390 2390          vp->v_filocks = NULL;
2391 2391          vp->v_shrlocks = NULL;
2392 2392          vp->v_pages = NULL;
2393 2393  
2394 2394          vp->v_locality = NULL;
2395 2395          vp->v_xattrdir = NULL;
2396 2396  
2397 2397          /* Handles v_femhead, v_path, and the r/w/map counts */
2398 2398          vn_recycle(vp);
2399 2399  }
2400 2400  
2401 2401  vnode_t *
2402 2402  vn_alloc(int kmflag)
2403 2403  {
2404 2404          vnode_t *vp;
2405 2405  
2406 2406          vp = kmem_cache_alloc(vn_cache, kmflag);
2407 2407  
2408 2408          if (vp != NULL) {
2409 2409                  vp->v_femhead = NULL;   /* Must be done before vn_reinit() */
2410 2410                  vp->v_fopdata = NULL;
2411 2411                  vn_reinit(vp);
2412 2412          }
2413 2413  
2414 2414          return (vp);
2415 2415  }
2416 2416  
2417 2417  void
2418 2418  vn_free(vnode_t *vp)
2419 2419  {
2420 2420          ASSERT(vp->v_shrlocks == NULL);
2421 2421          ASSERT(vp->v_filocks == NULL);
2422 2422  
2423 2423          /*
2424 2424           * Some file systems call vn_free() with v_count of zero,
2425 2425           * some with v_count of 1.  In any case, the value should
2426 2426           * never be anything else.
2427 2427           */
2428 2428          ASSERT((vp->v_count == 0) || (vp->v_count == 1));
2429 2429          ASSERT(vp->v_count_dnlc == 0);
2430 2430          if (vp->v_path != NULL) {
2431 2431                  kmem_free(vp->v_path, strlen(vp->v_path) + 1);
2432 2432                  vp->v_path = NULL;
2433 2433          }
2434 2434  
2435 2435          /* If FEM was in use, make sure everything gets cleaned up */
2436 2436          if (vp->v_femhead) {
2437 2437                  /* XXX - There should be a free_femhead() that does all this */
2438 2438                  ASSERT(vp->v_femhead->femh_list == NULL);
2439 2439                  mutex_destroy(&vp->v_femhead->femh_lock);
2440 2440                  kmem_free(vp->v_femhead, sizeof (*(vp->v_femhead)));
2441 2441                  vp->v_femhead = NULL;
2442 2442          }
2443 2443  
2444 2444          if (vp->v_fopdata != NULL) {
2445 2445                  free_fopdata(vp);
2446 2446          }
2447 2447          vp->v_mpssdata = NULL;
2448 2448          vsd_free(vp);
2449 2449          kmem_cache_free(vn_cache, vp);
2450 2450  }
2451 2451  
2452 2452  /*
2453 2453   * vnode status changes, should define better states than 1, 0.
2454 2454   */
2455 2455  void
2456 2456  vn_reclaim(vnode_t *vp)
2457 2457  {
2458 2458          vfs_t   *vfsp = vp->v_vfsp;
2459 2459  
2460 2460          if (vfsp == NULL ||
2461 2461              vfsp->vfs_implp == NULL || vfsp->vfs_femhead == NULL) {
2462 2462                  return;
2463 2463          }
2464 2464          (void) VFS_VNSTATE(vfsp, vp, VNTRANS_RECLAIMED);
2465 2465  }
2466 2466  
2467 2467  void
2468 2468  vn_idle(vnode_t *vp)
2469 2469  {
2470 2470          vfs_t   *vfsp = vp->v_vfsp;
2471 2471  
2472 2472          if (vfsp == NULL ||
2473 2473              vfsp->vfs_implp == NULL || vfsp->vfs_femhead == NULL) {
2474 2474                  return;
2475 2475          }
2476 2476          (void) VFS_VNSTATE(vfsp, vp, VNTRANS_IDLED);
2477 2477  }
2478 2478  void
2479 2479  vn_exists(vnode_t *vp)
2480 2480  {
2481 2481          vfs_t   *vfsp = vp->v_vfsp;
2482 2482  
2483 2483          if (vfsp == NULL ||
2484 2484              vfsp->vfs_implp == NULL || vfsp->vfs_femhead == NULL) {
2485 2485                  return;
2486 2486          }
2487 2487          (void) VFS_VNSTATE(vfsp, vp, VNTRANS_EXISTS);
2488 2488  }
2489 2489  
2490 2490  void
2491 2491  vn_invalid(vnode_t *vp)
2492 2492  {
2493 2493          vfs_t   *vfsp = vp->v_vfsp;
2494 2494  
2495 2495          if (vfsp == NULL ||
2496 2496              vfsp->vfs_implp == NULL || vfsp->vfs_femhead == NULL) {
2497 2497                  return;
2498 2498          }
2499 2499          (void) VFS_VNSTATE(vfsp, vp, VNTRANS_DESTROYED);
2500 2500  }
2501 2501  
2502 2502  /* Vnode event notification */
2503 2503  
2504 2504  int
2505 2505  vnevent_support(vnode_t *vp, caller_context_t *ct)
2506 2506  {
2507 2507          if (vp == NULL)
2508 2508                  return (EINVAL);
2509 2509  
2510 2510          return (VOP_VNEVENT(vp, VE_SUPPORT, NULL, NULL, ct));
2511 2511  }
2512 2512  
2513 2513  void
2514 2514  vnevent_rename_src(vnode_t *vp, vnode_t *dvp, char *name, caller_context_t *ct)
2515 2515  {
2516 2516          if (vp == NULL || vp->v_femhead == NULL) {
2517 2517                  return;
2518 2518          }
2519 2519          (void) VOP_VNEVENT(vp, VE_RENAME_SRC, dvp, name, ct);
2520 2520  }
2521 2521  
2522 2522  void
2523 2523  vnevent_rename_dest(vnode_t *vp, vnode_t *dvp, char *name,
2524 2524      caller_context_t *ct)
2525 2525  {
2526 2526          if (vp == NULL || vp->v_femhead == NULL) {
2527 2527                  return;
2528 2528          }
2529 2529          (void) VOP_VNEVENT(vp, VE_RENAME_DEST, dvp, name, ct);
2530 2530  }
2531 2531  
2532 2532  void
2533 2533  vnevent_rename_dest_dir(vnode_t *vp, caller_context_t *ct)
2534 2534  {
2535 2535          if (vp == NULL || vp->v_femhead == NULL) {
2536 2536                  return;
2537 2537          }
2538 2538          (void) VOP_VNEVENT(vp, VE_RENAME_DEST_DIR, NULL, NULL, ct);
2539 2539  }
2540 2540  
2541 2541  void
2542 2542  vnevent_remove(vnode_t *vp, vnode_t *dvp, char *name, caller_context_t *ct)
2543 2543  {
2544 2544          if (vp == NULL || vp->v_femhead == NULL) {
2545 2545                  return;
2546 2546          }
2547 2547          (void) VOP_VNEVENT(vp, VE_REMOVE, dvp, name, ct);
2548 2548  }
2549 2549  
2550 2550  void
2551 2551  vnevent_rmdir(vnode_t *vp, vnode_t *dvp, char *name, caller_context_t *ct)
2552 2552  {
2553 2553          if (vp == NULL || vp->v_femhead == NULL) {
2554 2554                  return;
2555 2555          }
2556 2556          (void) VOP_VNEVENT(vp, VE_RMDIR, dvp, name, ct);
2557 2557  }
2558 2558  
2559 2559  void
2560 2560  vnevent_pre_rename_src(vnode_t *vp, vnode_t *dvp, char *name,
2561 2561      caller_context_t *ct)
2562 2562  {
2563 2563          if (vp == NULL || vp->v_femhead == NULL) {
2564 2564                  return;
2565 2565          }
2566 2566          (void) VOP_VNEVENT(vp, VE_PRE_RENAME_SRC, dvp, name, ct);
2567 2567  }
2568 2568  
2569 2569  void
2570 2570  vnevent_pre_rename_dest(vnode_t *vp, vnode_t *dvp, char *name,
2571 2571      caller_context_t *ct)
2572 2572  {
2573 2573          if (vp == NULL || vp->v_femhead == NULL) {
2574 2574                  return;
2575 2575          }
2576 2576          (void) VOP_VNEVENT(vp, VE_PRE_RENAME_DEST, dvp, name, ct);
2577 2577  }
2578 2578  
2579 2579  void
2580 2580  vnevent_pre_rename_dest_dir(vnode_t *vp, vnode_t *nvp, char *name,
2581 2581      caller_context_t *ct)
2582 2582  {
2583 2583          if (vp == NULL || vp->v_femhead == NULL) {
2584 2584                  return;
2585 2585          }
2586 2586          (void) VOP_VNEVENT(vp, VE_PRE_RENAME_DEST_DIR, nvp, name, ct);
2587 2587  }
2588 2588  
2589 2589  void
2590 2590  vnevent_create(vnode_t *vp, caller_context_t *ct)
2591 2591  {
2592 2592          if (vp == NULL || vp->v_femhead == NULL) {
2593 2593                  return;
2594 2594          }
2595 2595          (void) VOP_VNEVENT(vp, VE_CREATE, NULL, NULL, ct);
2596 2596  }
2597 2597  
2598 2598  void
2599 2599  vnevent_link(vnode_t *vp, caller_context_t *ct)
2600 2600  {
2601 2601          if (vp == NULL || vp->v_femhead == NULL) {
2602 2602                  return;
2603 2603          }
2604 2604          (void) VOP_VNEVENT(vp, VE_LINK, NULL, NULL, ct);
2605 2605  }
2606 2606  
2607 2607  void
2608 2608  vnevent_mountedover(vnode_t *vp, caller_context_t *ct)
2609 2609  {
2610 2610          if (vp == NULL || vp->v_femhead == NULL) {
2611 2611                  return;
2612 2612          }
2613 2613          (void) VOP_VNEVENT(vp, VE_MOUNTEDOVER, NULL, NULL, ct);
2614 2614  }
2615 2615  
2616 2616  void
2617 2617  vnevent_truncate(vnode_t *vp, caller_context_t *ct)
2618 2618  {
2619 2619          if (vp == NULL || vp->v_femhead == NULL) {
2620 2620                  return;
2621 2621          }
2622 2622          (void) VOP_VNEVENT(vp, VE_TRUNCATE, NULL, NULL, ct);
2623 2623  }
2624 2624  
2625 2625  /*
2626 2626   * Vnode accessors.
2627 2627   */
2628 2628  
2629 2629  int
2630 2630  vn_is_readonly(vnode_t *vp)
2631 2631  {
2632 2632          return (vp->v_vfsp->vfs_flag & VFS_RDONLY);
2633 2633  }
2634 2634  
2635 2635  int
2636 2636  vn_has_flocks(vnode_t *vp)
2637 2637  {
2638 2638          return (vp->v_filocks != NULL);
2639 2639  }
2640 2640  
2641 2641  int
2642 2642  vn_has_mandatory_locks(vnode_t *vp, int mode)
2643 2643  {
2644 2644          return ((vp->v_filocks != NULL) && (MANDLOCK(vp, mode)));
2645 2645  }
2646 2646  
2647 2647  int
2648 2648  vn_has_cached_data(vnode_t *vp)
2649 2649  {
2650 2650          return (vp->v_pages != NULL);
2651 2651  }
2652 2652  
2653 2653  /*
2654 2654   * Return 0 if the vnode in question shouldn't be permitted into a zone via
2655 2655   * zone_enter(2).
2656 2656   */
2657 2657  int
2658 2658  vn_can_change_zones(vnode_t *vp)
2659 2659  {
2660 2660          struct vfssw *vswp;
2661 2661          int allow = 1;
2662 2662          vnode_t *rvp;
2663 2663  
2664 2664          if (nfs_global_client_only != 0)
2665 2665                  return (1);
2666 2666  
2667 2667          /*
2668 2668           * We always want to look at the underlying vnode if there is one.
2669 2669           */
2670 2670          if (VOP_REALVP(vp, &rvp, NULL) != 0)
2671 2671                  rvp = vp;
2672 2672          /*
2673 2673           * Some pseudo filesystems (including doorfs) don't actually register
2674 2674           * their vfsops_t, so the following may return NULL; we happily let
2675 2675           * such vnodes switch zones.
2676 2676           */
2677 2677          vswp = vfs_getvfsswbyvfsops(vfs_getops(rvp->v_vfsp));
2678 2678          if (vswp != NULL) {
2679 2679                  if (vswp->vsw_flag & VSW_NOTZONESAFE)
2680 2680                          allow = 0;
2681 2681                  vfs_unrefvfssw(vswp);
2682 2682          }
2683 2683          return (allow);
2684 2684  }
2685 2685  
2686 2686  /*
2687 2687   * Return nonzero if the vnode is a mount point, zero if not.
2688 2688   */
2689 2689  int
2690 2690  vn_ismntpt(vnode_t *vp)
2691 2691  {
2692 2692          return (vp->v_vfsmountedhere != NULL);
2693 2693  }
2694 2694  
2695 2695  /* Retrieve the vfs (if any) mounted on this vnode */
2696 2696  vfs_t *
2697 2697  vn_mountedvfs(vnode_t *vp)
2698 2698  {
2699 2699          return (vp->v_vfsmountedhere);
2700 2700  }
2701 2701  
2702 2702  /*
2703 2703   * Return nonzero if the vnode is referenced by the dnlc, zero if not.
2704 2704   */
2705 2705  int
2706 2706  vn_in_dnlc(vnode_t *vp)
2707 2707  {
2708 2708          return (vp->v_count_dnlc > 0);
2709 2709  }
2710 2710  
2711 2711  /*
2712 2712   * vn_has_other_opens() checks whether a particular file is opened by more than
2713 2713   * just the caller and whether the open is for read and/or write.
2714 2714   * This routine is for calling after the caller has already called VOP_OPEN()
2715 2715   * and the caller wishes to know if they are the only one with it open for
2716 2716   * the mode(s) specified.
2717 2717   *
2718 2718   * Vnode counts are only kept on regular files (v_type=VREG).
2719 2719   */
2720 2720  int
2721 2721  vn_has_other_opens(
2722 2722          vnode_t *vp,
2723 2723          v_mode_t mode)
2724 2724  {
2725 2725  
2726 2726          ASSERT(vp != NULL);
2727 2727  
2728 2728          switch (mode) {
2729 2729          case V_WRITE:
2730 2730                  if (vp->v_wrcnt > 1)
2731 2731                          return (V_TRUE);
2732 2732                  break;
2733 2733          case V_RDORWR:
2734 2734                  if ((vp->v_rdcnt > 1) || (vp->v_wrcnt > 1))
2735 2735                          return (V_TRUE);
2736 2736                  break;
2737 2737          case V_RDANDWR:
2738 2738                  if ((vp->v_rdcnt > 1) && (vp->v_wrcnt > 1))
2739 2739                          return (V_TRUE);
2740 2740                  break;
2741 2741          case V_READ:
2742 2742                  if (vp->v_rdcnt > 1)
2743 2743                          return (V_TRUE);
2744 2744                  break;
2745 2745          }
2746 2746  
2747 2747          return (V_FALSE);
2748 2748  }
2749 2749  
2750 2750  /*
2751 2751   * vn_is_opened() checks whether a particular file is opened and
2752 2752   * whether the open is for read and/or write.
2753 2753   *
2754 2754   * Vnode counts are only kept on regular files (v_type=VREG).
2755 2755   */
2756 2756  int
2757 2757  vn_is_opened(
2758 2758          vnode_t *vp,
2759 2759          v_mode_t mode)
2760 2760  {
2761 2761  
2762 2762          ASSERT(vp != NULL);
2763 2763  
2764 2764          switch (mode) {
2765 2765          case V_WRITE:
2766 2766                  if (vp->v_wrcnt)
2767 2767                          return (V_TRUE);
2768 2768                  break;
2769 2769          case V_RDANDWR:
2770 2770                  if (vp->v_rdcnt && vp->v_wrcnt)
2771 2771                          return (V_TRUE);
2772 2772                  break;
2773 2773          case V_RDORWR:
2774 2774                  if (vp->v_rdcnt || vp->v_wrcnt)
2775 2775                          return (V_TRUE);
2776 2776                  break;
2777 2777          case V_READ:
2778 2778                  if (vp->v_rdcnt)
2779 2779                          return (V_TRUE);
2780 2780                  break;
2781 2781          }
2782 2782  
2783 2783          return (V_FALSE);
2784 2784  }
2785 2785  
2786 2786  /*
2787 2787   * vn_is_mapped() checks whether a particular file is mapped and whether
2788 2788   * the file is mapped read and/or write.
2789 2789   */
2790 2790  int
2791 2791  vn_is_mapped(
2792 2792          vnode_t *vp,
2793 2793          v_mode_t mode)
2794 2794  {
2795 2795  
2796 2796          ASSERT(vp != NULL);
2797 2797  
2798 2798  #if !defined(_LP64)
2799 2799          switch (mode) {
2800 2800          /*
2801 2801           * The atomic_add_64_nv functions force atomicity in the
2802 2802           * case of 32 bit architectures. Otherwise the 64 bit values
2803 2803           * require two fetches. The value of the fields may be
2804 2804           * (potentially) changed between the first fetch and the
2805 2805           * second
2806 2806           */
2807 2807          case V_WRITE:
2808 2808                  if (atomic_add_64_nv((&(vp->v_mmap_write)), 0))
2809 2809                          return (V_TRUE);
2810 2810                  break;
2811 2811          case V_RDANDWR:
2812 2812                  if ((atomic_add_64_nv((&(vp->v_mmap_read)), 0)) &&
2813 2813                      (atomic_add_64_nv((&(vp->v_mmap_write)), 0)))
2814 2814                          return (V_TRUE);
2815 2815                  break;
2816 2816          case V_RDORWR:
2817 2817                  if ((atomic_add_64_nv((&(vp->v_mmap_read)), 0)) ||
2818 2818                      (atomic_add_64_nv((&(vp->v_mmap_write)), 0)))
2819 2819                          return (V_TRUE);
2820 2820                  break;
2821 2821          case V_READ:
2822 2822                  if (atomic_add_64_nv((&(vp->v_mmap_read)), 0))
2823 2823                          return (V_TRUE);
2824 2824                  break;
2825 2825          }
2826 2826  #else
2827 2827          switch (mode) {
2828 2828          case V_WRITE:
2829 2829                  if (vp->v_mmap_write)
2830 2830                          return (V_TRUE);
2831 2831                  break;
2832 2832          case V_RDANDWR:
2833 2833                  if (vp->v_mmap_read && vp->v_mmap_write)
2834 2834                          return (V_TRUE);
2835 2835                  break;
2836 2836          case V_RDORWR:
2837 2837                  if (vp->v_mmap_read || vp->v_mmap_write)
2838 2838                          return (V_TRUE);
2839 2839                  break;
2840 2840          case V_READ:
2841 2841                  if (vp->v_mmap_read)
2842 2842                          return (V_TRUE);
2843 2843                  break;
2844 2844          }
2845 2845  #endif
2846 2846  
2847 2847          return (V_FALSE);
2848 2848  }
2849 2849  
2850 2850  /*
2851 2851   * Set the operations vector for a vnode.
2852 2852   *
2853 2853   * FEM ensures that the v_femhead pointer is filled in before the
2854 2854   * v_op pointer is changed.  This means that if the v_femhead pointer
2855 2855   * is NULL, and the v_op field hasn't changed since before which checked
2856 2856   * the v_femhead pointer; then our update is ok - we are not racing with
2857 2857   * FEM.
2858 2858   */
2859 2859  void
2860 2860  vn_setops(vnode_t *vp, vnodeops_t *vnodeops)
2861 2861  {
2862 2862          vnodeops_t      *op;
2863 2863  
2864 2864          ASSERT(vp != NULL);
2865 2865          ASSERT(vnodeops != NULL);
2866 2866  
2867 2867          op = vp->v_op;
2868 2868          membar_consumer();
2869 2869          /*
2870 2870           * If vp->v_femhead == NULL, then we'll call atomic_cas_ptr() to do
2871 2871           * the compare-and-swap on vp->v_op.  If either fails, then FEM is
2872 2872           * in effect on the vnode and we need to have FEM deal with it.
2873 2873           */
2874 2874          if (vp->v_femhead != NULL || atomic_cas_ptr(&vp->v_op, op, vnodeops) !=
2875 2875              op) {
2876 2876                  fem_setvnops(vp, vnodeops);
2877 2877          }
2878 2878  }
2879 2879  
2880 2880  /*
2881 2881   * Retrieve the operations vector for a vnode
2882 2882   * As with vn_setops(above); make sure we aren't racing with FEM.
2883 2883   * FEM sets the v_op to a special, internal, vnodeops that wouldn't
2884 2884   * make sense to the callers of this routine.
2885 2885   */
2886 2886  vnodeops_t *
2887 2887  vn_getops(vnode_t *vp)
2888 2888  {
2889 2889          vnodeops_t      *op;
2890 2890  
2891 2891          ASSERT(vp != NULL);
2892 2892  
2893 2893          op = vp->v_op;
2894 2894          membar_consumer();
2895 2895          if (vp->v_femhead == NULL && op == vp->v_op) {
2896 2896                  return (op);
2897 2897          } else {
2898 2898                  return (fem_getvnops(vp));
2899 2899          }
2900 2900  }
2901 2901  
2902 2902  /*
2903 2903   * Returns non-zero (1) if the vnodeops matches that of the vnode.
2904 2904   * Returns zero (0) if not.
2905 2905   */
2906 2906  int
2907 2907  vn_matchops(vnode_t *vp, vnodeops_t *vnodeops)
2908 2908  {
2909 2909          return (vn_getops(vp) == vnodeops);
2910 2910  }
2911 2911  
2912 2912  /*
2913 2913   * Returns non-zero (1) if the specified operation matches the
2914 2914   * corresponding operation for that the vnode.
2915 2915   * Returns zero (0) if not.
2916 2916   */
2917 2917  
2918 2918  #define MATCHNAME(n1, n2) (((n1)[0] == (n2)[0]) && (strcmp((n1), (n2)) == 0))
2919 2919  
2920 2920  int
2921 2921  vn_matchopval(vnode_t *vp, char *vopname, fs_generic_func_p funcp)
2922 2922  {
2923 2923          const fs_operation_trans_def_t *otdp;
2924 2924          fs_generic_func_p *loc = NULL;
2925 2925          vnodeops_t      *vop = vn_getops(vp);
2926 2926  
2927 2927          ASSERT(vopname != NULL);
2928 2928  
2929 2929          for (otdp = vn_ops_table; otdp->name != NULL; otdp++) {
2930 2930                  if (MATCHNAME(otdp->name, vopname)) {
2931 2931                          loc = (fs_generic_func_p *)
2932 2932                              ((char *)(vop) + otdp->offset);
2933 2933                          break;
2934 2934                  }
2935 2935          }
2936 2936  
2937 2937          return ((loc != NULL) && (*loc == funcp));
2938 2938  }
2939 2939  
2940 2940  /*
2941 2941   * fs_new_caller_id() needs to return a unique ID on a given local system.
2942 2942   * The IDs do not need to survive across reboots.  These are primarily
2943 2943   * used so that (FEM) monitors can detect particular callers (such as
2944 2944   * the NFS server) to a given vnode/vfs operation.
2945 2945   */
2946 2946  u_longlong_t
2947 2947  fs_new_caller_id()
2948 2948  {
2949 2949          static uint64_t next_caller_id = 0LL; /* First call returns 1 */
2950 2950  
2951 2951          return ((u_longlong_t)atomic_inc_64_nv(&next_caller_id));
2952 2952  }
2953 2953  
2954 2954  /*
2955 2955   * Given a starting vnode and a path, updates the path in the target vnode in
2956 2956   * a safe manner.  If the vnode already has path information embedded, then the
2957 2957   * cached path is left untouched.
2958 2958   */
2959 2959  
2960 2960  size_t max_vnode_path = 4 * MAXPATHLEN;
2961 2961  
2962 2962  void
2963 2963  vn_setpath(vnode_t *rootvp, struct vnode *startvp, struct vnode *vp,
2964 2964      const char *path, size_t plen)
2965 2965  {
2966 2966          char    *rpath;
2967 2967          vnode_t *base;
2968 2968          size_t  rpathlen, rpathalloc;
2969 2969          int     doslash = 1;
2970 2970  
2971 2971          if (*path == '/') {
2972 2972                  base = rootvp;
2973 2973                  path++;
2974 2974                  plen--;
2975 2975          } else {
2976 2976                  base = startvp;
2977 2977          }
2978 2978  
2979 2979          /*
2980 2980           * We cannot grab base->v_lock while we hold vp->v_lock because of
2981 2981           * the potential for deadlock.
2982 2982           */
2983 2983          mutex_enter(&base->v_lock);
2984 2984          if (base->v_path == NULL) {
2985 2985                  mutex_exit(&base->v_lock);
2986 2986                  return;
2987 2987          }
2988 2988  
2989 2989          rpathlen = strlen(base->v_path);
2990 2990          rpathalloc = rpathlen + plen + 1;
2991 2991          /* Avoid adding a slash if there's already one there */
2992 2992          if (base->v_path[rpathlen-1] == '/')
2993 2993                  doslash = 0;
2994 2994          else
2995 2995                  rpathalloc++;
2996 2996  
2997 2997          /*
2998 2998           * We don't want to call kmem_alloc(KM_SLEEP) with kernel locks held,
2999 2999           * so we must do this dance.  If, by chance, something changes the path,
3000 3000           * just give up since there is no real harm.
3001 3001           */
3002 3002          mutex_exit(&base->v_lock);
3003 3003  
3004 3004          /* Paths should stay within reason */
3005 3005          if (rpathalloc > max_vnode_path)
3006 3006                  return;
3007 3007  
3008 3008          rpath = kmem_alloc(rpathalloc, KM_SLEEP);
3009 3009  
3010 3010          mutex_enter(&base->v_lock);
3011 3011          if (base->v_path == NULL || strlen(base->v_path) != rpathlen) {
3012 3012                  mutex_exit(&base->v_lock);
3013 3013                  kmem_free(rpath, rpathalloc);
3014 3014                  return;
3015 3015          }
3016 3016          bcopy(base->v_path, rpath, rpathlen);
3017 3017          mutex_exit(&base->v_lock);
3018 3018  
3019 3019          if (doslash)
3020 3020                  rpath[rpathlen++] = '/';
3021 3021          bcopy(path, rpath + rpathlen, plen);
3022 3022          rpath[rpathlen + plen] = '\0';
3023 3023  
3024 3024          mutex_enter(&vp->v_lock);
3025 3025          if (vp->v_path != NULL) {
3026 3026                  mutex_exit(&vp->v_lock);
3027 3027                  kmem_free(rpath, rpathalloc);
3028 3028          } else {
3029 3029                  vp->v_path = rpath;
3030 3030                  mutex_exit(&vp->v_lock);
3031 3031          }
3032 3032  }
3033 3033  
3034 3034  /*
3035 3035   * Sets the path to the vnode to be the given string, regardless of current
3036 3036   * context.  The string must be a complete path from rootdir.  This is only used
3037 3037   * by fsop_root() for setting the path based on the mountpoint.
3038 3038   */
3039 3039  void
3040 3040  vn_setpath_str(struct vnode *vp, const char *str, size_t len)
3041 3041  {
3042 3042          char *buf = kmem_alloc(len + 1, KM_SLEEP);
3043 3043  
3044 3044          mutex_enter(&vp->v_lock);
3045 3045          if (vp->v_path != NULL) {
3046 3046                  mutex_exit(&vp->v_lock);
3047 3047                  kmem_free(buf, len + 1);
3048 3048                  return;
3049 3049          }
3050 3050  
3051 3051          vp->v_path = buf;
3052 3052          bcopy(str, vp->v_path, len);
3053 3053          vp->v_path[len] = '\0';
3054 3054  
3055 3055          mutex_exit(&vp->v_lock);
3056 3056  }
3057 3057  
3058 3058  /*
3059 3059   * Called from within filesystem's vop_rename() to handle renames once the
3060 3060   * target vnode is available.
3061 3061   */
3062 3062  void
3063 3063  vn_renamepath(vnode_t *dvp, vnode_t *vp, const char *nm, size_t len)
3064 3064  {
3065 3065          char *tmp;
3066 3066  
3067 3067          mutex_enter(&vp->v_lock);
3068 3068          tmp = vp->v_path;
3069 3069          vp->v_path = NULL;
3070 3070          mutex_exit(&vp->v_lock);
3071 3071          vn_setpath(rootdir, dvp, vp, nm, len);
3072 3072          if (tmp != NULL)
3073 3073                  kmem_free(tmp, strlen(tmp) + 1);
3074 3074  }
3075 3075  
3076 3076  /*
3077 3077   * Similar to vn_setpath_str(), this function sets the path of the destination
3078 3078   * vnode to the be the same as the source vnode.
3079 3079   */
3080 3080  void
3081 3081  vn_copypath(struct vnode *src, struct vnode *dst)
3082 3082  {
3083 3083          char *buf;
3084 3084          int alloc;
3085 3085  
3086 3086          mutex_enter(&src->v_lock);
3087 3087          if (src->v_path == NULL) {
3088 3088                  mutex_exit(&src->v_lock);
3089 3089                  return;
3090 3090          }
3091 3091          alloc = strlen(src->v_path) + 1;
3092 3092  
3093 3093          /* avoid kmem_alloc() with lock held */
3094 3094          mutex_exit(&src->v_lock);
3095 3095          buf = kmem_alloc(alloc, KM_SLEEP);
3096 3096          mutex_enter(&src->v_lock);
3097 3097          if (src->v_path == NULL || strlen(src->v_path) + 1 != alloc) {
3098 3098                  mutex_exit(&src->v_lock);
3099 3099                  kmem_free(buf, alloc);
3100 3100                  return;
3101 3101          }
3102 3102          bcopy(src->v_path, buf, alloc);
3103 3103          mutex_exit(&src->v_lock);
3104 3104  
3105 3105          mutex_enter(&dst->v_lock);
3106 3106          if (dst->v_path != NULL) {
3107 3107                  mutex_exit(&dst->v_lock);
3108 3108                  kmem_free(buf, alloc);
3109 3109                  return;
3110 3110          }
3111 3111          dst->v_path = buf;
3112 3112          mutex_exit(&dst->v_lock);
3113 3113  }
3114 3114  
3115 3115  /*
3116 3116   * XXX Private interface for segvn routines that handle vnode
3117 3117   * large page segments.
3118 3118   *
3119 3119   * return 1 if vp's file system VOP_PAGEIO() implementation
3120 3120   * can be safely used instead of VOP_GETPAGE() for handling
3121 3121   * pagefaults against regular non swap files. VOP_PAGEIO()
3122 3122   * interface is considered safe here if its implementation
3123 3123   * is very close to VOP_GETPAGE() implementation.
3124 3124   * e.g. It zero's out the part of the page beyond EOF. Doesn't
3125 3125   * panic if there're file holes but instead returns an error.
3126 3126   * Doesn't assume file won't be changed by user writes, etc.
3127 3127   *
3128 3128   * return 0 otherwise.
3129 3129   *
3130 3130   * For now allow segvn to only use VOP_PAGEIO() with ufs and nfs.
3131 3131   */
3132 3132  int
3133 3133  vn_vmpss_usepageio(vnode_t *vp)
3134 3134  {
3135 3135          vfs_t   *vfsp = vp->v_vfsp;
3136 3136          char *fsname = vfssw[vfsp->vfs_fstype].vsw_name;
3137 3137          char *pageio_ok_fss[] = {"ufs", "nfs", NULL};
3138 3138          char **fsok = pageio_ok_fss;
3139 3139  
3140 3140          if (fsname == NULL) {
3141 3141                  return (0);
3142 3142          }
3143 3143  
3144 3144          for (; *fsok; fsok++) {
3145 3145                  if (strcmp(*fsok, fsname) == 0) {
3146 3146                          return (1);
3147 3147                  }
3148 3148          }
3149 3149          return (0);
3150 3150  }
3151 3151  
3152 3152  /* VOP_XXX() macros call the corresponding fop_xxx() function */
3153 3153  
3154 3154  int
3155 3155  fop_open(
3156 3156          vnode_t **vpp,
3157 3157          int mode,
3158 3158          cred_t *cr,
3159 3159          caller_context_t *ct)
3160 3160  {
3161 3161          int ret;
3162 3162          vnode_t *vp = *vpp;
3163 3163  
3164 3164          VN_HOLD(vp);
3165 3165          /*
3166 3166           * Adding to the vnode counts before calling open
3167 3167           * avoids the need for a mutex. It circumvents a race
3168 3168           * condition where a query made on the vnode counts results in a
3169 3169           * false negative. The inquirer goes away believing the file is
3170 3170           * not open when there is an open on the file already under way.
3171 3171           *
3172 3172           * The counts are meant to prevent NFS from granting a delegation
3173 3173           * when it would be dangerous to do so.
3174 3174           *
3175 3175           * The vnode counts are only kept on regular files
3176 3176           */
3177 3177          if ((*vpp)->v_type == VREG) {
3178 3178                  if (mode & FREAD)
3179 3179                          atomic_inc_32(&(*vpp)->v_rdcnt);
3180 3180                  if (mode & FWRITE)
3181 3181                          atomic_inc_32(&(*vpp)->v_wrcnt);
3182 3182          }
3183 3183  
3184 3184          VOPXID_MAP_CR(vp, cr);
3185 3185  
3186 3186          ret = (*(*(vpp))->v_op->vop_open)(vpp, mode, cr, ct);
3187 3187  
3188 3188          if (ret) {
3189 3189                  /*
3190 3190                   * Use the saved vp just in case the vnode ptr got trashed
3191 3191                   * by the error.
3192 3192                   */
3193 3193                  VOPSTATS_UPDATE(vp, open);
3194 3194                  if ((vp->v_type == VREG) && (mode & FREAD))
3195 3195                          atomic_dec_32(&vp->v_rdcnt);
3196 3196                  if ((vp->v_type == VREG) && (mode & FWRITE))
3197 3197                          atomic_dec_32(&vp->v_wrcnt);
3198 3198          } else {
3199 3199                  /*
3200 3200                   * Some filesystems will return a different vnode,
3201 3201                   * but the same path was still used to open it.
3202 3202                   * So if we do change the vnode and need to
3203 3203                   * copy over the path, do so here, rather than special
3204 3204                   * casing each filesystem. Adjust the vnode counts to
3205 3205                   * reflect the vnode switch.
3206 3206                   */
3207 3207                  VOPSTATS_UPDATE(*vpp, open);
3208 3208                  if (*vpp != vp && *vpp != NULL) {
3209 3209                          vn_copypath(vp, *vpp);
3210 3210                          if (((*vpp)->v_type == VREG) && (mode & FREAD))
3211 3211                                  atomic_inc_32(&(*vpp)->v_rdcnt);
3212 3212                          if ((vp->v_type == VREG) && (mode & FREAD))
3213 3213                                  atomic_dec_32(&vp->v_rdcnt);
3214 3214                          if (((*vpp)->v_type == VREG) && (mode & FWRITE))
3215 3215                                  atomic_inc_32(&(*vpp)->v_wrcnt);
3216 3216                          if ((vp->v_type == VREG) && (mode & FWRITE))
3217 3217                                  atomic_dec_32(&vp->v_wrcnt);
3218 3218                  }
3219 3219          }
3220 3220          VN_RELE(vp);
3221 3221          return (ret);
3222 3222  }
3223 3223  
3224 3224  int
3225 3225  fop_close(
3226 3226          vnode_t *vp,
3227 3227          int flag,
3228 3228          int count,
3229 3229          offset_t offset,
3230 3230          cred_t *cr,
3231 3231          caller_context_t *ct)
3232 3232  {
3233 3233          int err;
3234 3234  
3235 3235          VOPXID_MAP_CR(vp, cr);
3236 3236  
3237 3237          err = (*(vp)->v_op->vop_close)(vp, flag, count, offset, cr, ct);
3238 3238          VOPSTATS_UPDATE(vp, close);
3239 3239          /*
3240 3240           * Check passed in count to handle possible dups. Vnode counts are only
3241 3241           * kept on regular files
3242 3242           */
3243 3243          if ((vp->v_type == VREG) && (count == 1))  {
3244 3244                  if (flag & FREAD) {
3245 3245                          ASSERT(vp->v_rdcnt > 0);
3246 3246                          atomic_dec_32(&vp->v_rdcnt);
3247 3247                  }
3248 3248                  if (flag & FWRITE) {
3249 3249                          ASSERT(vp->v_wrcnt > 0);
3250 3250                          atomic_dec_32(&vp->v_wrcnt);
3251 3251                  }
3252 3252          }
3253 3253          return (err);
3254 3254  }
3255 3255  
3256 3256  int
3257 3257  fop_read(
3258 3258          vnode_t *vp,
3259 3259          uio_t *uiop,
3260 3260          int ioflag,
3261 3261          cred_t *cr,
3262 3262          caller_context_t *ct)
3263 3263  {
3264 3264          int     err;
3265 3265          ssize_t resid_start = uiop->uio_resid;
3266 3266  
3267 3267          VOPXID_MAP_CR(vp, cr);
3268 3268  
3269 3269          err = (*(vp)->v_op->vop_read)(vp, uiop, ioflag, cr, ct);
3270 3270          VOPSTATS_UPDATE_IO(vp, read,
3271 3271              read_bytes, (resid_start - uiop->uio_resid));
3272 3272          return (err);
3273 3273  }
3274 3274  
3275 3275  int
3276 3276  fop_write(
3277 3277          vnode_t *vp,
3278 3278          uio_t *uiop,
3279 3279          int ioflag,
3280 3280          cred_t *cr,
3281 3281          caller_context_t *ct)
3282 3282  {
3283 3283          int     err;
3284 3284          ssize_t resid_start = uiop->uio_resid;
3285 3285  
3286 3286          VOPXID_MAP_CR(vp, cr);
3287 3287  
3288 3288          err = (*(vp)->v_op->vop_write)(vp, uiop, ioflag, cr, ct);
3289 3289          VOPSTATS_UPDATE_IO(vp, write,
3290 3290              write_bytes, (resid_start - uiop->uio_resid));
3291 3291          return (err);
3292 3292  }
3293 3293  
3294 3294  int
3295 3295  fop_ioctl(
3296 3296          vnode_t *vp,
3297 3297          int cmd,
3298 3298          intptr_t arg,
3299 3299          int flag,
3300 3300          cred_t *cr,
3301 3301          int *rvalp,
3302 3302          caller_context_t *ct)
3303 3303  {
3304 3304          int     err;
3305 3305  
3306 3306          VOPXID_MAP_CR(vp, cr);
3307 3307  
3308 3308          err = (*(vp)->v_op->vop_ioctl)(vp, cmd, arg, flag, cr, rvalp, ct);
3309 3309          VOPSTATS_UPDATE(vp, ioctl);
3310 3310          return (err);
3311 3311  }
3312 3312  
3313 3313  int
3314 3314  fop_setfl(
3315 3315          vnode_t *vp,
3316 3316          int oflags,
3317 3317          int nflags,
3318 3318          cred_t *cr,
3319 3319          caller_context_t *ct)
3320 3320  {
3321 3321          int     err;
3322 3322  
3323 3323          VOPXID_MAP_CR(vp, cr);
3324 3324  
3325 3325          err = (*(vp)->v_op->vop_setfl)(vp, oflags, nflags, cr, ct);
3326 3326          VOPSTATS_UPDATE(vp, setfl);
3327 3327          return (err);
3328 3328  }
3329 3329  
3330 3330  int
3331 3331  fop_getattr(
3332 3332          vnode_t *vp,
3333 3333          vattr_t *vap,
3334 3334          int flags,
3335 3335          cred_t *cr,
3336 3336          caller_context_t *ct)
3337 3337  {
3338 3338          int     err;
3339 3339  
3340 3340          VOPXID_MAP_CR(vp, cr);
3341 3341  
3342 3342          /*
3343 3343           * If this file system doesn't understand the xvattr extensions
3344 3344           * then turn off the xvattr bit.
3345 3345           */
3346 3346          if (vfs_has_feature(vp->v_vfsp, VFSFT_XVATTR) == 0) {
3347 3347                  vap->va_mask &= ~AT_XVATTR;
3348 3348          }
3349 3349  
3350 3350          /*
3351 3351           * We're only allowed to skip the ACL check iff we used a 32 bit
3352 3352           * ACE mask with VOP_ACCESS() to determine permissions.
3353 3353           */
3354 3354          if ((flags & ATTR_NOACLCHECK) &&
3355 3355              vfs_has_feature(vp->v_vfsp, VFSFT_ACEMASKONACCESS) == 0) {
3356 3356                  return (EINVAL);
3357 3357          }
3358 3358          err = (*(vp)->v_op->vop_getattr)(vp, vap, flags, cr, ct);
3359 3359          VOPSTATS_UPDATE(vp, getattr);
3360 3360          return (err);
3361 3361  }
3362 3362  
3363 3363  int
3364 3364  fop_setattr(
3365 3365          vnode_t *vp,
3366 3366          vattr_t *vap,
3367 3367          int flags,
3368 3368          cred_t *cr,
3369 3369          caller_context_t *ct)
3370 3370  {
3371 3371          int     err;
3372 3372  
3373 3373          VOPXID_MAP_CR(vp, cr);
3374 3374  
3375 3375          /*
3376 3376           * If this file system doesn't understand the xvattr extensions
3377 3377           * then turn off the xvattr bit.
3378 3378           */
3379 3379          if (vfs_has_feature(vp->v_vfsp, VFSFT_XVATTR) == 0) {
3380 3380                  vap->va_mask &= ~AT_XVATTR;
3381 3381          }
3382 3382  
3383 3383          /*
3384 3384           * We're only allowed to skip the ACL check iff we used a 32 bit
3385 3385           * ACE mask with VOP_ACCESS() to determine permissions.
3386 3386           */
3387 3387          if ((flags & ATTR_NOACLCHECK) &&
3388 3388              vfs_has_feature(vp->v_vfsp, VFSFT_ACEMASKONACCESS) == 0) {
3389 3389                  return (EINVAL);
3390 3390          }
3391 3391          err = (*(vp)->v_op->vop_setattr)(vp, vap, flags, cr, ct);
3392 3392          VOPSTATS_UPDATE(vp, setattr);
3393 3393          return (err);
3394 3394  }
3395 3395  
3396 3396  int
3397 3397  fop_access(
3398 3398          vnode_t *vp,
3399 3399          int mode,
3400 3400          int flags,
3401 3401          cred_t *cr,
3402 3402          caller_context_t *ct)
3403 3403  {
3404 3404          int     err;
3405 3405  
3406 3406          if ((flags & V_ACE_MASK) &&
3407 3407              vfs_has_feature(vp->v_vfsp, VFSFT_ACEMASKONACCESS) == 0) {
3408 3408                  return (EINVAL);
3409 3409          }
3410 3410  
3411 3411          VOPXID_MAP_CR(vp, cr);
3412 3412  
3413 3413          err = (*(vp)->v_op->vop_access)(vp, mode, flags, cr, ct);
3414 3414          VOPSTATS_UPDATE(vp, access);
3415 3415          return (err);
3416 3416  }
3417 3417  
3418 3418  int
3419 3419  fop_lookup(
3420 3420          vnode_t *dvp,
3421 3421          char *nm,
3422 3422          vnode_t **vpp,
3423 3423          pathname_t *pnp,
3424 3424          int flags,
3425 3425          vnode_t *rdir,
3426 3426          cred_t *cr,
3427 3427          caller_context_t *ct,
3428 3428          int *deflags,           /* Returned per-dirent flags */
3429 3429          pathname_t *ppnp)       /* Returned case-preserved name in directory */
3430 3430  {
3431 3431          int ret;
3432 3432  
3433 3433          /*
3434 3434           * If this file system doesn't support case-insensitive access
3435 3435           * and said access is requested, fail quickly.  It is required
3436 3436           * that if the vfs supports case-insensitive lookup, it also
3437 3437           * supports extended dirent flags.
3438 3438           */
3439 3439          if (flags & FIGNORECASE &&
3440 3440              (vfs_has_feature(dvp->v_vfsp, VFSFT_CASEINSENSITIVE) == 0 &&
3441 3441              vfs_has_feature(dvp->v_vfsp, VFSFT_NOCASESENSITIVE) == 0))
3442 3442                  return (EINVAL);
3443 3443  
3444 3444          VOPXID_MAP_CR(dvp, cr);
3445 3445  
3446 3446          if ((flags & LOOKUP_XATTR) && (flags & LOOKUP_HAVE_SYSATTR_DIR) == 0) {
3447 3447                  ret = xattr_dir_lookup(dvp, vpp, flags, cr);
3448 3448          } else {
3449 3449                  ret = (*(dvp)->v_op->vop_lookup)
3450 3450                      (dvp, nm, vpp, pnp, flags, rdir, cr, ct, deflags, ppnp);
3451 3451          }
3452 3452          if (ret == 0 && *vpp) {
3453 3453                  VOPSTATS_UPDATE(*vpp, lookup);
3454 3454                  if ((*vpp)->v_path == NULL) {
3455 3455                          vn_setpath(rootdir, dvp, *vpp, nm, strlen(nm));
3456 3456                  }
3457 3457          }
3458 3458  
3459 3459          return (ret);
3460 3460  }
3461 3461  
3462 3462  int
3463 3463  fop_create(
3464 3464          vnode_t *dvp,
3465 3465          char *name,
3466 3466          vattr_t *vap,
3467 3467          vcexcl_t excl,
3468 3468          int mode,
3469 3469          vnode_t **vpp,
3470 3470          cred_t *cr,
3471 3471          int flags,
3472 3472          caller_context_t *ct,
3473 3473          vsecattr_t *vsecp)      /* ACL to set during create */
3474 3474  {
3475 3475          int ret;
3476 3476  
3477 3477          if (vsecp != NULL &&
3478 3478              vfs_has_feature(dvp->v_vfsp, VFSFT_ACLONCREATE) == 0) {
3479 3479                  return (EINVAL);
3480 3480          }
3481 3481          /*
3482 3482           * If this file system doesn't support case-insensitive access
3483 3483           * and said access is requested, fail quickly.
3484 3484           */
3485 3485          if (flags & FIGNORECASE &&
3486 3486              (vfs_has_feature(dvp->v_vfsp, VFSFT_CASEINSENSITIVE) == 0 &&
3487 3487              vfs_has_feature(dvp->v_vfsp, VFSFT_NOCASESENSITIVE) == 0))
3488 3488                  return (EINVAL);
3489 3489  
3490 3490          VOPXID_MAP_CR(dvp, cr);
3491 3491  
3492 3492          ret = (*(dvp)->v_op->vop_create)
3493 3493              (dvp, name, vap, excl, mode, vpp, cr, flags, ct, vsecp);
3494 3494          if (ret == 0 && *vpp) {
3495 3495                  VOPSTATS_UPDATE(*vpp, create);
3496 3496                  if ((*vpp)->v_path == NULL) {
3497 3497                          vn_setpath(rootdir, dvp, *vpp, name, strlen(name));
3498 3498                  }
3499 3499          }
3500 3500  
3501 3501          return (ret);
3502 3502  }
3503 3503  
3504 3504  int
3505 3505  fop_remove(
3506 3506          vnode_t *dvp,
3507 3507          char *nm,
3508 3508          cred_t *cr,
3509 3509          caller_context_t *ct,
3510 3510          int flags)
3511 3511  {
3512 3512          int     err;
3513 3513  
3514 3514          /*
3515 3515           * If this file system doesn't support case-insensitive access
3516 3516           * and said access is requested, fail quickly.
3517 3517           */
3518 3518          if (flags & FIGNORECASE &&
3519 3519              (vfs_has_feature(dvp->v_vfsp, VFSFT_CASEINSENSITIVE) == 0 &&
3520 3520              vfs_has_feature(dvp->v_vfsp, VFSFT_NOCASESENSITIVE) == 0))
3521 3521                  return (EINVAL);
3522 3522  
3523 3523          VOPXID_MAP_CR(dvp, cr);
3524 3524  
3525 3525          err = (*(dvp)->v_op->vop_remove)(dvp, nm, cr, ct, flags);
3526 3526          VOPSTATS_UPDATE(dvp, remove);
3527 3527          return (err);
3528 3528  }
3529 3529  
3530 3530  int
3531 3531  fop_link(
3532 3532          vnode_t *tdvp,
3533 3533          vnode_t *svp,
3534 3534          char *tnm,
3535 3535          cred_t *cr,
3536 3536          caller_context_t *ct,
3537 3537          int flags)
3538 3538  {
3539 3539          int     err;
3540 3540  
3541 3541          /*
3542 3542           * If the target file system doesn't support case-insensitive access
3543 3543           * and said access is requested, fail quickly.
3544 3544           */
3545 3545          if (flags & FIGNORECASE &&
3546 3546              (vfs_has_feature(tdvp->v_vfsp, VFSFT_CASEINSENSITIVE) == 0 &&
3547 3547              vfs_has_feature(tdvp->v_vfsp, VFSFT_NOCASESENSITIVE) == 0))
3548 3548                  return (EINVAL);
3549 3549  
3550 3550          VOPXID_MAP_CR(tdvp, cr);
3551 3551  
3552 3552          err = (*(tdvp)->v_op->vop_link)(tdvp, svp, tnm, cr, ct, flags);
3553 3553          VOPSTATS_UPDATE(tdvp, link);
3554 3554          return (err);
3555 3555  }
3556 3556  
3557 3557  int
3558 3558  fop_rename(
3559 3559          vnode_t *sdvp,
3560 3560          char *snm,
3561 3561          vnode_t *tdvp,
3562 3562          char *tnm,
3563 3563          cred_t *cr,
3564 3564          caller_context_t *ct,
3565 3565          int flags)
3566 3566  {
3567 3567          int     err;
3568 3568  
3569 3569          /*
3570 3570           * If the file system involved does not support
3571 3571           * case-insensitive access and said access is requested, fail
3572 3572           * quickly.
3573 3573           */
3574 3574          if (flags & FIGNORECASE &&
3575 3575              ((vfs_has_feature(sdvp->v_vfsp, VFSFT_CASEINSENSITIVE) == 0 &&
3576 3576              vfs_has_feature(sdvp->v_vfsp, VFSFT_NOCASESENSITIVE) == 0)))
3577 3577                  return (EINVAL);
3578 3578  
3579 3579          VOPXID_MAP_CR(tdvp, cr);
3580 3580  
3581 3581          err = (*(sdvp)->v_op->vop_rename)(sdvp, snm, tdvp, tnm, cr, ct, flags);
3582 3582          VOPSTATS_UPDATE(sdvp, rename);
3583 3583          return (err);
3584 3584  }
3585 3585  
3586 3586  int
3587 3587  fop_mkdir(
3588 3588          vnode_t *dvp,
3589 3589          char *dirname,
3590 3590          vattr_t *vap,
3591 3591          vnode_t **vpp,
3592 3592          cred_t *cr,
3593 3593          caller_context_t *ct,
3594 3594          int flags,
3595 3595          vsecattr_t *vsecp)      /* ACL to set during create */
3596 3596  {
3597 3597          int ret;
3598 3598  
3599 3599          if (vsecp != NULL &&
3600 3600              vfs_has_feature(dvp->v_vfsp, VFSFT_ACLONCREATE) == 0) {
3601 3601                  return (EINVAL);
3602 3602          }
3603 3603          /*
3604 3604           * If this file system doesn't support case-insensitive access
3605 3605           * and said access is requested, fail quickly.
3606 3606           */
3607 3607          if (flags & FIGNORECASE &&
3608 3608              (vfs_has_feature(dvp->v_vfsp, VFSFT_CASEINSENSITIVE) == 0 &&
3609 3609              vfs_has_feature(dvp->v_vfsp, VFSFT_NOCASESENSITIVE) == 0))
3610 3610                  return (EINVAL);
3611 3611  
3612 3612          VOPXID_MAP_CR(dvp, cr);
3613 3613  
3614 3614          ret = (*(dvp)->v_op->vop_mkdir)
3615 3615              (dvp, dirname, vap, vpp, cr, ct, flags, vsecp);
3616 3616          if (ret == 0 && *vpp) {
3617 3617                  VOPSTATS_UPDATE(*vpp, mkdir);
3618 3618                  if ((*vpp)->v_path == NULL) {
3619 3619                          vn_setpath(rootdir, dvp, *vpp, dirname,
3620 3620                              strlen(dirname));
3621 3621                  }
3622 3622          }
3623 3623  
3624 3624          return (ret);
3625 3625  }
3626 3626  
3627 3627  int
3628 3628  fop_rmdir(
3629 3629          vnode_t *dvp,
3630 3630          char *nm,
3631 3631          vnode_t *cdir,
3632 3632          cred_t *cr,
3633 3633          caller_context_t *ct,
3634 3634          int flags)
3635 3635  {
3636 3636          int     err;
3637 3637  
3638 3638          /*
3639 3639           * If this file system doesn't support case-insensitive access
3640 3640           * and said access is requested, fail quickly.
3641 3641           */
3642 3642          if (flags & FIGNORECASE &&
3643 3643              (vfs_has_feature(dvp->v_vfsp, VFSFT_CASEINSENSITIVE) == 0 &&
3644 3644              vfs_has_feature(dvp->v_vfsp, VFSFT_NOCASESENSITIVE) == 0))
3645 3645                  return (EINVAL);
3646 3646  
3647 3647          VOPXID_MAP_CR(dvp, cr);
3648 3648  
3649 3649          err = (*(dvp)->v_op->vop_rmdir)(dvp, nm, cdir, cr, ct, flags);
3650 3650          VOPSTATS_UPDATE(dvp, rmdir);
3651 3651          return (err);
3652 3652  }
3653 3653  
3654 3654  int
3655 3655  fop_readdir(
3656 3656          vnode_t *vp,
3657 3657          uio_t *uiop,
3658 3658          cred_t *cr,
3659 3659          int *eofp,
3660 3660          caller_context_t *ct,
3661 3661          int flags)
3662 3662  {
3663 3663          int     err;
3664 3664          ssize_t resid_start = uiop->uio_resid;
3665 3665  
3666 3666          /*
3667 3667           * If this file system doesn't support retrieving directory
3668 3668           * entry flags and said access is requested, fail quickly.
3669 3669           */
3670 3670          if (flags & V_RDDIR_ENTFLAGS &&
3671 3671              vfs_has_feature(vp->v_vfsp, VFSFT_DIRENTFLAGS) == 0)
3672 3672                  return (EINVAL);
3673 3673  
3674 3674          VOPXID_MAP_CR(vp, cr);
3675 3675  
3676 3676          err = (*(vp)->v_op->vop_readdir)(vp, uiop, cr, eofp, ct, flags);
3677 3677          VOPSTATS_UPDATE_IO(vp, readdir,
3678 3678              readdir_bytes, (resid_start - uiop->uio_resid));
3679 3679          return (err);
3680 3680  }
3681 3681  
3682 3682  int
3683 3683  fop_symlink(
3684 3684          vnode_t *dvp,
3685 3685          char *linkname,
3686 3686          vattr_t *vap,
3687 3687          char *target,
3688 3688          cred_t *cr,
3689 3689          caller_context_t *ct,
3690 3690          int flags)
3691 3691  {
3692 3692          int     err;
3693 3693          xvattr_t xvattr;
3694 3694  
3695 3695          /*
3696 3696           * If this file system doesn't support case-insensitive access
3697 3697           * and said access is requested, fail quickly.
3698 3698           */
3699 3699          if (flags & FIGNORECASE &&
3700 3700              (vfs_has_feature(dvp->v_vfsp, VFSFT_CASEINSENSITIVE) == 0 &&
3701 3701              vfs_has_feature(dvp->v_vfsp, VFSFT_NOCASESENSITIVE) == 0))
3702 3702                  return (EINVAL);
3703 3703  
3704 3704          VOPXID_MAP_CR(dvp, cr);
3705 3705  
3706 3706          /* check for reparse point */
3707 3707          if ((vfs_has_feature(dvp->v_vfsp, VFSFT_REPARSE)) &&
3708 3708              (strncmp(target, FS_REPARSE_TAG_STR,
3709 3709              strlen(FS_REPARSE_TAG_STR)) == 0)) {
3710 3710                  if (!fs_reparse_mark(target, vap, &xvattr))
3711 3711                          vap = (vattr_t *)&xvattr;
3712 3712          }
3713 3713  
3714 3714          err = (*(dvp)->v_op->vop_symlink)
3715 3715              (dvp, linkname, vap, target, cr, ct, flags);
3716 3716          VOPSTATS_UPDATE(dvp, symlink);
3717 3717          return (err);
3718 3718  }
3719 3719  
3720 3720  int
3721 3721  fop_readlink(
3722 3722          vnode_t *vp,
3723 3723          uio_t *uiop,
3724 3724          cred_t *cr,
3725 3725          caller_context_t *ct)
3726 3726  {
3727 3727          int     err;
3728 3728  
3729 3729          VOPXID_MAP_CR(vp, cr);
3730 3730  
3731 3731          err = (*(vp)->v_op->vop_readlink)(vp, uiop, cr, ct);
3732 3732          VOPSTATS_UPDATE(vp, readlink);
3733 3733          return (err);
3734 3734  }
3735 3735  
3736 3736  int
3737 3737  fop_fsync(
3738 3738          vnode_t *vp,
3739 3739          int syncflag,
3740 3740          cred_t *cr,
3741 3741          caller_context_t *ct)
3742 3742  {
3743 3743          int     err;
3744 3744  
3745 3745          VOPXID_MAP_CR(vp, cr);
3746 3746  
3747 3747          err = (*(vp)->v_op->vop_fsync)(vp, syncflag, cr, ct);
3748 3748          VOPSTATS_UPDATE(vp, fsync);
3749 3749          return (err);
3750 3750  }
3751 3751  
3752 3752  void
3753 3753  fop_inactive(
3754 3754          vnode_t *vp,
3755 3755          cred_t *cr,
3756 3756          caller_context_t *ct)
3757 3757  {
3758 3758          /* Need to update stats before vop call since we may lose the vnode */
3759 3759          VOPSTATS_UPDATE(vp, inactive);
3760 3760  
3761 3761          VOPXID_MAP_CR(vp, cr);
3762 3762  
3763 3763          (*(vp)->v_op->vop_inactive)(vp, cr, ct);
3764 3764  }
3765 3765  
3766 3766  int
3767 3767  fop_fid(
3768 3768          vnode_t *vp,
3769 3769          fid_t *fidp,
3770 3770          caller_context_t *ct)
3771 3771  {
3772 3772          int     err;
3773 3773  
3774 3774          err = (*(vp)->v_op->vop_fid)(vp, fidp, ct);
3775 3775          VOPSTATS_UPDATE(vp, fid);
3776 3776          return (err);
3777 3777  }
3778 3778  
3779 3779  int
3780 3780  fop_rwlock(
3781 3781          vnode_t *vp,
3782 3782          int write_lock,
3783 3783          caller_context_t *ct)
3784 3784  {
3785 3785          int     ret;
3786 3786  
3787 3787          ret = ((*(vp)->v_op->vop_rwlock)(vp, write_lock, ct));
3788 3788          VOPSTATS_UPDATE(vp, rwlock);
3789 3789          return (ret);
3790 3790  }
3791 3791  
3792 3792  void
3793 3793  fop_rwunlock(
3794 3794          vnode_t *vp,
3795 3795          int write_lock,
3796 3796          caller_context_t *ct)
3797 3797  {
3798 3798          (*(vp)->v_op->vop_rwunlock)(vp, write_lock, ct);
3799 3799          VOPSTATS_UPDATE(vp, rwunlock);
3800 3800  }
3801 3801  
3802 3802  int
3803 3803  fop_seek(
3804 3804          vnode_t *vp,
3805 3805          offset_t ooff,
3806 3806          offset_t *noffp,
3807 3807          caller_context_t *ct)
3808 3808  {
3809 3809          int     err;
3810 3810  
3811 3811          err = (*(vp)->v_op->vop_seek)(vp, ooff, noffp, ct);
3812 3812          VOPSTATS_UPDATE(vp, seek);
3813 3813          return (err);
3814 3814  }
3815 3815  
3816 3816  int
3817 3817  fop_cmp(
3818 3818          vnode_t *vp1,
3819 3819          vnode_t *vp2,
3820 3820          caller_context_t *ct)
3821 3821  {
3822 3822          int     err;
3823 3823  
3824 3824          err = (*(vp1)->v_op->vop_cmp)(vp1, vp2, ct);
3825 3825          VOPSTATS_UPDATE(vp1, cmp);
3826 3826          return (err);
3827 3827  }
3828 3828  
3829 3829  int
3830 3830  fop_frlock(
3831 3831          vnode_t *vp,
3832 3832          int cmd,
3833 3833          flock64_t *bfp,
3834 3834          int flag,
3835 3835          offset_t offset,
3836 3836          struct flk_callback *flk_cbp,
3837 3837          cred_t *cr,
3838 3838          caller_context_t *ct)
3839 3839  {
3840 3840          int     err;
3841 3841  
3842 3842          VOPXID_MAP_CR(vp, cr);
3843 3843  
3844 3844          err = (*(vp)->v_op->vop_frlock)
3845 3845              (vp, cmd, bfp, flag, offset, flk_cbp, cr, ct);
3846 3846          VOPSTATS_UPDATE(vp, frlock);
3847 3847          return (err);
3848 3848  }
3849 3849  
3850 3850  int
3851 3851  fop_space(
3852 3852          vnode_t *vp,
3853 3853          int cmd,
3854 3854          flock64_t *bfp,
3855 3855          int flag,
3856 3856          offset_t offset,
3857 3857          cred_t *cr,
3858 3858          caller_context_t *ct)
3859 3859  {
3860 3860          int     err;
3861 3861  
3862 3862          VOPXID_MAP_CR(vp, cr);
3863 3863  
3864 3864          err = (*(vp)->v_op->vop_space)(vp, cmd, bfp, flag, offset, cr, ct);
3865 3865          VOPSTATS_UPDATE(vp, space);
3866 3866          return (err);
3867 3867  }
3868 3868  
3869 3869  int
3870 3870  fop_realvp(
3871 3871          vnode_t *vp,
3872 3872          vnode_t **vpp,
3873 3873          caller_context_t *ct)
3874 3874  {
3875 3875          int     err;
3876 3876  
3877 3877          err = (*(vp)->v_op->vop_realvp)(vp, vpp, ct);
3878 3878          VOPSTATS_UPDATE(vp, realvp);
3879 3879          return (err);
3880 3880  }
3881 3881  
3882 3882  int
3883 3883  fop_getpage(
3884 3884          vnode_t *vp,
3885 3885          offset_t off,
3886 3886          size_t len,
3887 3887          uint_t *protp,
3888 3888          page_t **plarr,
3889 3889          size_t plsz,
3890 3890          struct seg *seg,
3891 3891          caddr_t addr,
3892 3892          enum seg_rw rw,
3893 3893          cred_t *cr,
3894 3894          caller_context_t *ct)
3895 3895  {
3896 3896          int     err;
3897 3897  
3898 3898          VOPXID_MAP_CR(vp, cr);
3899 3899  
3900 3900          err = (*(vp)->v_op->vop_getpage)
3901 3901              (vp, off, len, protp, plarr, plsz, seg, addr, rw, cr, ct);
3902 3902          VOPSTATS_UPDATE(vp, getpage);
3903 3903          return (err);
3904 3904  }
3905 3905  
3906 3906  int
3907 3907  fop_putpage(
3908 3908          vnode_t *vp,
3909 3909          offset_t off,
3910 3910          size_t len,
3911 3911          int flags,
3912 3912          cred_t *cr,
3913 3913          caller_context_t *ct)
3914 3914  {
3915 3915          int     err;
3916 3916  
3917 3917          VOPXID_MAP_CR(vp, cr);
3918 3918  
3919 3919          err = (*(vp)->v_op->vop_putpage)(vp, off, len, flags, cr, ct);
3920 3920          VOPSTATS_UPDATE(vp, putpage);
3921 3921          return (err);
3922 3922  }
3923 3923  
3924 3924  int
3925 3925  fop_map(
3926 3926          vnode_t *vp,
3927 3927          offset_t off,
3928 3928          struct as *as,
3929 3929          caddr_t *addrp,
3930 3930          size_t len,
3931 3931          uchar_t prot,
3932 3932          uchar_t maxprot,
3933 3933          uint_t flags,
3934 3934          cred_t *cr,
3935 3935          caller_context_t *ct)
3936 3936  {
3937 3937          int     err;
3938 3938  
3939 3939          VOPXID_MAP_CR(vp, cr);
3940 3940  
3941 3941          err = (*(vp)->v_op->vop_map)
3942 3942              (vp, off, as, addrp, len, prot, maxprot, flags, cr, ct);
3943 3943          VOPSTATS_UPDATE(vp, map);
3944 3944          return (err);
3945 3945  }
3946 3946  
3947 3947  int
3948 3948  fop_addmap(
3949 3949          vnode_t *vp,
3950 3950          offset_t off,
3951 3951          struct as *as,
3952 3952          caddr_t addr,
3953 3953          size_t len,
3954 3954          uchar_t prot,
3955 3955          uchar_t maxprot,
3956 3956          uint_t flags,
3957 3957          cred_t *cr,
3958 3958          caller_context_t *ct)
3959 3959  {
3960 3960          int error;
3961 3961          u_longlong_t delta;
3962 3962  
3963 3963          VOPXID_MAP_CR(vp, cr);
3964 3964  
3965 3965          error = (*(vp)->v_op->vop_addmap)
3966 3966              (vp, off, as, addr, len, prot, maxprot, flags, cr, ct);
3967 3967  
3968 3968          if ((!error) && (vp->v_type == VREG)) {
3969 3969                  delta = (u_longlong_t)btopr(len);
3970 3970                  /*
3971 3971                   * If file is declared MAP_PRIVATE, it can't be written back
3972 3972                   * even if open for write. Handle as read.
3973 3973                   */
3974 3974                  if (flags & MAP_PRIVATE) {
3975 3975                          atomic_add_64((uint64_t *)(&(vp->v_mmap_read)),
3976 3976                              (int64_t)delta);
3977 3977                  } else {
3978 3978                          /*
3979 3979                           * atomic_add_64 forces the fetch of a 64 bit value to
3980 3980                           * be atomic on 32 bit machines
3981 3981                           */
3982 3982                          if (maxprot & PROT_WRITE)
3983 3983                                  atomic_add_64((uint64_t *)(&(vp->v_mmap_write)),
3984 3984                                      (int64_t)delta);
3985 3985                          if (maxprot & PROT_READ)
3986 3986                                  atomic_add_64((uint64_t *)(&(vp->v_mmap_read)),
3987 3987                                      (int64_t)delta);
3988 3988                          if (maxprot & PROT_EXEC)
3989 3989                                  atomic_add_64((uint64_t *)(&(vp->v_mmap_read)),
3990 3990                                      (int64_t)delta);
3991 3991                  }
3992 3992          }
3993 3993          VOPSTATS_UPDATE(vp, addmap);
3994 3994          return (error);
3995 3995  }
3996 3996  
3997 3997  int
3998 3998  fop_delmap(
3999 3999          vnode_t *vp,
4000 4000          offset_t off,
4001 4001          struct as *as,
4002 4002          caddr_t addr,
4003 4003          size_t len,
4004 4004          uint_t prot,
4005 4005          uint_t maxprot,
4006 4006          uint_t flags,
4007 4007          cred_t *cr,
4008 4008          caller_context_t *ct)
4009 4009  {
4010 4010          int error;
4011 4011          u_longlong_t delta;
4012 4012  
4013 4013          VOPXID_MAP_CR(vp, cr);
4014 4014  
4015 4015          error = (*(vp)->v_op->vop_delmap)
4016 4016              (vp, off, as, addr, len, prot, maxprot, flags, cr, ct);
4017 4017  
4018 4018          /*
4019 4019           * NFS calls into delmap twice, the first time
4020 4020           * it simply establishes a callback mechanism and returns EAGAIN
4021 4021           * while the real work is being done upon the second invocation.
4022 4022           * We have to detect this here and only decrement the counts upon
4023 4023           * the second delmap request.
4024 4024           */
4025 4025          if ((error != EAGAIN) && (vp->v_type == VREG)) {
4026 4026  
4027 4027                  delta = (u_longlong_t)btopr(len);
4028 4028  
4029 4029                  if (flags & MAP_PRIVATE) {
4030 4030                          atomic_add_64((uint64_t *)(&(vp->v_mmap_read)),
4031 4031                              (int64_t)(-delta));
4032 4032                  } else {
4033 4033                          /*
4034 4034                           * atomic_add_64 forces the fetch of a 64 bit value
4035 4035                           * to be atomic on 32 bit machines
4036 4036                           */
4037 4037                          if (maxprot & PROT_WRITE)
4038 4038                                  atomic_add_64((uint64_t *)(&(vp->v_mmap_write)),
4039 4039                                      (int64_t)(-delta));
4040 4040                          if (maxprot & PROT_READ)
4041 4041                                  atomic_add_64((uint64_t *)(&(vp->v_mmap_read)),
4042 4042                                      (int64_t)(-delta));
4043 4043                          if (maxprot & PROT_EXEC)
4044 4044                                  atomic_add_64((uint64_t *)(&(vp->v_mmap_read)),
4045 4045                                      (int64_t)(-delta));
4046 4046                  }
4047 4047          }
4048 4048          VOPSTATS_UPDATE(vp, delmap);
4049 4049          return (error);
4050 4050  }
4051 4051  
4052 4052  
4053 4053  int
4054 4054  fop_poll(
4055 4055          vnode_t *vp,
4056 4056          short events,
4057 4057          int anyyet,
4058 4058          short *reventsp,
4059 4059          struct pollhead **phpp,
4060 4060          caller_context_t *ct)
4061 4061  {
4062 4062          int     err;
4063 4063  
4064 4064          err = (*(vp)->v_op->vop_poll)(vp, events, anyyet, reventsp, phpp, ct);
4065 4065          VOPSTATS_UPDATE(vp, poll);
4066 4066          return (err);
4067 4067  }
4068 4068  
4069 4069  int
4070 4070  fop_dump(
4071 4071          vnode_t *vp,
4072 4072          caddr_t addr,
4073 4073          offset_t lbdn,
4074 4074          offset_t dblks,
4075 4075          caller_context_t *ct)
4076 4076  {
4077 4077          int     err;
4078 4078  
4079 4079          /* ensure lbdn and dblks can be passed safely to bdev_dump */
4080 4080          if ((lbdn != (daddr_t)lbdn) || (dblks != (int)dblks))
4081 4081                  return (EIO);
4082 4082  
4083 4083          err = (*(vp)->v_op->vop_dump)(vp, addr, lbdn, dblks, ct);
4084 4084          VOPSTATS_UPDATE(vp, dump);
4085 4085          return (err);
4086 4086  }
4087 4087  
4088 4088  int
4089 4089  fop_pathconf(
4090 4090          vnode_t *vp,
4091 4091          int cmd,
4092 4092          ulong_t *valp,
4093 4093          cred_t *cr,
4094 4094          caller_context_t *ct)
4095 4095  {
4096 4096          int     err;
4097 4097  
4098 4098          VOPXID_MAP_CR(vp, cr);
4099 4099  
4100 4100          err = (*(vp)->v_op->vop_pathconf)(vp, cmd, valp, cr, ct);
4101 4101          VOPSTATS_UPDATE(vp, pathconf);
4102 4102          return (err);
4103 4103  }
4104 4104  
4105 4105  int
4106 4106  fop_pageio(
4107 4107          vnode_t *vp,
4108 4108          struct page *pp,
4109 4109          u_offset_t io_off,
4110 4110          size_t io_len,
4111 4111          int flags,
4112 4112          cred_t *cr,
4113 4113          caller_context_t *ct)
4114 4114  {
4115 4115          int     err;
4116 4116  
4117 4117          VOPXID_MAP_CR(vp, cr);
4118 4118  
4119 4119          err = (*(vp)->v_op->vop_pageio)(vp, pp, io_off, io_len, flags, cr, ct);
4120 4120          VOPSTATS_UPDATE(vp, pageio);
4121 4121          return (err);
4122 4122  }
4123 4123  
4124 4124  int
4125 4125  fop_dumpctl(
4126 4126          vnode_t *vp,
4127 4127          int action,
4128 4128          offset_t *blkp,
4129 4129          caller_context_t *ct)
4130 4130  {
4131 4131          int     err;
4132 4132          err = (*(vp)->v_op->vop_dumpctl)(vp, action, blkp, ct);
4133 4133          VOPSTATS_UPDATE(vp, dumpctl);
4134 4134          return (err);
4135 4135  }
4136 4136  
4137 4137  void
4138 4138  fop_dispose(
4139 4139          vnode_t *vp,
4140 4140          page_t *pp,
4141 4141          int flag,
4142 4142          int dn,
4143 4143          cred_t *cr,
4144 4144          caller_context_t *ct)
4145 4145  {
4146 4146          /* Must do stats first since it's possible to lose the vnode */
4147 4147          VOPSTATS_UPDATE(vp, dispose);
4148 4148  
4149 4149          VOPXID_MAP_CR(vp, cr);
4150 4150  
4151 4151          (*(vp)->v_op->vop_dispose)(vp, pp, flag, dn, cr, ct);
4152 4152  }
4153 4153  
4154 4154  int
4155 4155  fop_setsecattr(
4156 4156          vnode_t *vp,
4157 4157          vsecattr_t *vsap,
4158 4158          int flag,
4159 4159          cred_t *cr,
4160 4160          caller_context_t *ct)
4161 4161  {
4162 4162          int     err;
4163 4163  
4164 4164          VOPXID_MAP_CR(vp, cr);
4165 4165  
4166 4166          /*
4167 4167           * We're only allowed to skip the ACL check iff we used a 32 bit
4168 4168           * ACE mask with VOP_ACCESS() to determine permissions.
4169 4169           */
4170 4170          if ((flag & ATTR_NOACLCHECK) &&
4171 4171              vfs_has_feature(vp->v_vfsp, VFSFT_ACEMASKONACCESS) == 0) {
4172 4172                  return (EINVAL);
4173 4173          }
4174 4174          err = (*(vp)->v_op->vop_setsecattr) (vp, vsap, flag, cr, ct);
4175 4175          VOPSTATS_UPDATE(vp, setsecattr);
4176 4176          return (err);
4177 4177  }
4178 4178  
4179 4179  int
4180 4180  fop_getsecattr(
4181 4181          vnode_t *vp,
4182 4182          vsecattr_t *vsap,
4183 4183          int flag,
4184 4184          cred_t *cr,
4185 4185          caller_context_t *ct)
4186 4186  {
4187 4187          int     err;
4188 4188  
4189 4189          /*
4190 4190           * We're only allowed to skip the ACL check iff we used a 32 bit
4191 4191           * ACE mask with VOP_ACCESS() to determine permissions.
4192 4192           */
4193 4193          if ((flag & ATTR_NOACLCHECK) &&
4194 4194              vfs_has_feature(vp->v_vfsp, VFSFT_ACEMASKONACCESS) == 0) {
4195 4195                  return (EINVAL);
4196 4196          }
4197 4197  
4198 4198          VOPXID_MAP_CR(vp, cr);
4199 4199  
4200 4200          err = (*(vp)->v_op->vop_getsecattr) (vp, vsap, flag, cr, ct);
4201 4201          VOPSTATS_UPDATE(vp, getsecattr);
4202 4202          return (err);
4203 4203  }
4204 4204  
4205 4205  int
4206 4206  fop_shrlock(
4207 4207          vnode_t *vp,
4208 4208          int cmd,
4209 4209          struct shrlock *shr,
4210 4210          int flag,
4211 4211          cred_t *cr,
4212 4212          caller_context_t *ct)
4213 4213  {
4214 4214          int     err;
4215 4215  
4216 4216          VOPXID_MAP_CR(vp, cr);
4217 4217  
4218 4218          err = (*(vp)->v_op->vop_shrlock)(vp, cmd, shr, flag, cr, ct);
4219 4219          VOPSTATS_UPDATE(vp, shrlock);
4220 4220          return (err);
4221 4221  }
4222 4222  
4223 4223  int
4224 4224  fop_vnevent(vnode_t *vp, vnevent_t vnevent, vnode_t *dvp, char *fnm,
4225 4225      caller_context_t *ct)
4226 4226  {
4227 4227          int     err;
4228 4228  
4229 4229          err = (*(vp)->v_op->vop_vnevent)(vp, vnevent, dvp, fnm, ct);
4230 4230          VOPSTATS_UPDATE(vp, vnevent);
4231 4231          return (err);
4232 4232  }
4233 4233  
4234 4234  int
4235 4235  fop_reqzcbuf(vnode_t *vp, enum uio_rw ioflag, xuio_t *uiop, cred_t *cr,
4236 4236      caller_context_t *ct)
4237 4237  {
4238 4238          int err;
4239 4239  
4240 4240          if (vfs_has_feature(vp->v_vfsp, VFSFT_ZEROCOPY_SUPPORTED) == 0)
4241 4241                  return (ENOTSUP);
4242 4242          err = (*(vp)->v_op->vop_reqzcbuf)(vp, ioflag, uiop, cr, ct);
4243 4243          VOPSTATS_UPDATE(vp, reqzcbuf);
4244 4244          return (err);
4245 4245  }
4246 4246  
4247 4247  int
4248 4248  fop_retzcbuf(vnode_t *vp, xuio_t *uiop, cred_t *cr, caller_context_t *ct)
4249 4249  {
4250 4250          int err;
4251 4251  
4252 4252          if (vfs_has_feature(vp->v_vfsp, VFSFT_ZEROCOPY_SUPPORTED) == 0)
4253 4253                  return (ENOTSUP);
4254 4254          err = (*(vp)->v_op->vop_retzcbuf)(vp, uiop, cr, ct);
4255 4255          VOPSTATS_UPDATE(vp, retzcbuf);
4256 4256          return (err);
4257 4257  }
4258 4258  
4259 4259  /*
4260 4260   * Default destructor
4261 4261   *      Needed because NULL destructor means that the key is unused
4262 4262   */
4263 4263  /* ARGSUSED */
4264 4264  void
4265 4265  vsd_defaultdestructor(void *value)
4266 4266  {}
4267 4267  
4268 4268  /*
4269 4269   * Create a key (index into per vnode array)
4270 4270   *      Locks out vsd_create, vsd_destroy, and vsd_free
4271 4271   *      May allocate memory with lock held
4272 4272   */
4273 4273  void
4274 4274  vsd_create(uint_t *keyp, void (*destructor)(void *))
4275 4275  {
4276 4276          int     i;
4277 4277          uint_t  nkeys;
4278 4278  
4279 4279          /*
4280 4280           * if key is allocated, do nothing
4281 4281           */
4282 4282          mutex_enter(&vsd_lock);
4283 4283          if (*keyp) {
4284 4284                  mutex_exit(&vsd_lock);
4285 4285                  return;
4286 4286          }
4287 4287          /*
4288 4288           * find an unused key
4289 4289           */
4290 4290          if (destructor == NULL)
4291 4291                  destructor = vsd_defaultdestructor;
4292 4292  
4293 4293          for (i = 0; i < vsd_nkeys; ++i)
4294 4294                  if (vsd_destructor[i] == NULL)
4295 4295                          break;
4296 4296  
4297 4297          /*
4298 4298           * if no unused keys, increase the size of the destructor array
4299 4299           */
4300 4300          if (i == vsd_nkeys) {
4301 4301                  if ((nkeys = (vsd_nkeys << 1)) == 0)
4302 4302                          nkeys = 1;
4303 4303                  vsd_destructor =
4304 4304                      (void (**)(void *))vsd_realloc((void *)vsd_destructor,
4305 4305                      (size_t)(vsd_nkeys * sizeof (void (*)(void *))),
4306 4306                      (size_t)(nkeys * sizeof (void (*)(void *))));
4307 4307                  vsd_nkeys = nkeys;
4308 4308          }
4309 4309  
4310 4310          /*
4311 4311           * allocate the next available unused key
4312 4312           */
4313 4313          vsd_destructor[i] = destructor;
4314 4314          *keyp = i + 1;
4315 4315  
4316 4316          /* create vsd_list, if it doesn't exist */
4317 4317          if (vsd_list == NULL) {
4318 4318                  vsd_list = kmem_alloc(sizeof (list_t), KM_SLEEP);
4319 4319                  list_create(vsd_list, sizeof (struct vsd_node),
4320 4320                      offsetof(struct vsd_node, vs_nodes));
4321 4321          }
4322 4322  
4323 4323          mutex_exit(&vsd_lock);
4324 4324  }
4325 4325  
4326 4326  /*
4327 4327   * Destroy a key
4328 4328   *
4329 4329   * Assumes that the caller is preventing vsd_set and vsd_get
4330 4330   * Locks out vsd_create, vsd_destroy, and vsd_free
4331 4331   * May free memory with lock held
4332 4332   */
4333 4333  void
4334 4334  vsd_destroy(uint_t *keyp)
4335 4335  {
4336 4336          uint_t key;
4337 4337          struct vsd_node *vsd;
4338 4338  
4339 4339          /*
4340 4340           * protect the key namespace and our destructor lists
4341 4341           */
4342 4342          mutex_enter(&vsd_lock);
4343 4343          key = *keyp;
4344 4344          *keyp = 0;
4345 4345  
4346 4346          ASSERT(key <= vsd_nkeys);
4347 4347  
4348 4348          /*
4349 4349           * if the key is valid
4350 4350           */
4351 4351          if (key != 0) {
4352 4352                  uint_t k = key - 1;
4353 4353                  /*
4354 4354                   * for every vnode with VSD, call key's destructor
4355 4355                   */
4356 4356                  for (vsd = list_head(vsd_list); vsd != NULL;
4357 4357                      vsd = list_next(vsd_list, vsd)) {
4358 4358                          /*
4359 4359                           * no VSD for key in this vnode
4360 4360                           */
4361 4361                          if (key > vsd->vs_nkeys)
4362 4362                                  continue;
4363 4363                          /*
4364 4364                           * call destructor for key
4365 4365                           */
4366 4366                          if (vsd->vs_value[k] && vsd_destructor[k])
4367 4367                                  (*vsd_destructor[k])(vsd->vs_value[k]);
4368 4368                          /*
4369 4369                           * reset value for key
4370 4370                           */
4371 4371                          vsd->vs_value[k] = NULL;
4372 4372                  }
4373 4373                  /*
4374 4374                   * actually free the key (NULL destructor == unused)
4375 4375                   */
4376 4376                  vsd_destructor[k] = NULL;
4377 4377          }
4378 4378  
4379 4379          mutex_exit(&vsd_lock);
4380 4380  }
4381 4381  
4382 4382  /*
4383 4383   * Quickly return the per vnode value that was stored with the specified key
4384 4384   * Assumes the caller is protecting key from vsd_create and vsd_destroy
4385 4385   * Assumes the caller is holding v_vsd_lock to protect the vsd.
4386 4386   */
4387 4387  void *
4388 4388  vsd_get(vnode_t *vp, uint_t key)
4389 4389  {
4390 4390          struct vsd_node *vsd;
4391 4391  
4392 4392          ASSERT(vp != NULL);
4393 4393          ASSERT(mutex_owned(&vp->v_vsd_lock));
4394 4394  
4395 4395          vsd = vp->v_vsd;
4396 4396  
4397 4397          if (key && vsd != NULL && key <= vsd->vs_nkeys)
4398 4398                  return (vsd->vs_value[key - 1]);
4399 4399          return (NULL);
4400 4400  }
4401 4401  
4402 4402  /*
4403 4403   * Set a per vnode value indexed with the specified key
4404 4404   * Assumes the caller is holding v_vsd_lock to protect the vsd.
4405 4405   */
4406 4406  int
4407 4407  vsd_set(vnode_t *vp, uint_t key, void *value)
4408 4408  {
4409 4409          struct vsd_node *vsd;
4410 4410  
4411 4411          ASSERT(vp != NULL);
4412 4412          ASSERT(mutex_owned(&vp->v_vsd_lock));
4413 4413  
4414 4414          if (key == 0)
4415 4415                  return (EINVAL);
4416 4416  
4417 4417          vsd = vp->v_vsd;
4418 4418          if (vsd == NULL)
4419 4419                  vsd = vp->v_vsd = kmem_zalloc(sizeof (*vsd), KM_SLEEP);
4420 4420  
4421 4421          /*
4422 4422           * If the vsd was just allocated, vs_nkeys will be 0, so the following
4423 4423           * code won't happen and we will continue down and allocate space for
4424 4424           * the vs_value array.
4425 4425           * If the caller is replacing one value with another, then it is up
4426 4426           * to the caller to free/rele/destroy the previous value (if needed).
4427 4427           */
4428 4428          if (key <= vsd->vs_nkeys) {
4429 4429                  vsd->vs_value[key - 1] = value;
4430 4430                  return (0);
4431 4431          }
4432 4432  
4433 4433          ASSERT(key <= vsd_nkeys);
4434 4434  
4435 4435          if (vsd->vs_nkeys == 0) {
4436 4436                  mutex_enter(&vsd_lock); /* lock out vsd_destroy() */
4437 4437                  /*
4438 4438                   * Link onto list of all VSD nodes.
4439 4439                   */
4440 4440                  list_insert_head(vsd_list, vsd);
4441 4441                  mutex_exit(&vsd_lock);
4442 4442          }
4443 4443  
4444 4444          /*
4445 4445           * Allocate vnode local storage and set the value for key
4446 4446           */
4447 4447          vsd->vs_value = vsd_realloc(vsd->vs_value,
4448 4448              vsd->vs_nkeys * sizeof (void *),
4449 4449              key * sizeof (void *));
4450 4450          vsd->vs_nkeys = key;
4451 4451          vsd->vs_value[key - 1] = value;
4452 4452  
4453 4453          return (0);
4454 4454  }
4455 4455  
4456 4456  /*
4457 4457   * Called from vn_free() to run the destructor function for each vsd
4458 4458   *      Locks out vsd_create and vsd_destroy
4459 4459   *      Assumes that the destructor *DOES NOT* use vsd
4460 4460   */
4461 4461  void
4462 4462  vsd_free(vnode_t *vp)
4463 4463  {
4464 4464          int i;
4465 4465          struct vsd_node *vsd = vp->v_vsd;
4466 4466  
4467 4467          if (vsd == NULL)
4468 4468                  return;
4469 4469  
4470 4470          if (vsd->vs_nkeys == 0) {
4471 4471                  kmem_free(vsd, sizeof (*vsd));
4472 4472                  vp->v_vsd = NULL;
4473 4473                  return;
4474 4474          }
4475 4475  
4476 4476          /*
4477 4477           * lock out vsd_create and vsd_destroy, call
4478 4478           * the destructor, and mark the value as destroyed.
4479 4479           */
4480 4480          mutex_enter(&vsd_lock);
4481 4481  
4482 4482          for (i = 0; i < vsd->vs_nkeys; i++) {
4483 4483                  if (vsd->vs_value[i] && vsd_destructor[i])
4484 4484                          (*vsd_destructor[i])(vsd->vs_value[i]);
4485 4485                  vsd->vs_value[i] = NULL;
4486 4486          }
4487 4487  
4488 4488          /*
4489 4489           * remove from linked list of VSD nodes
4490 4490           */
4491 4491          list_remove(vsd_list, vsd);
4492 4492  
4493 4493          mutex_exit(&vsd_lock);
4494 4494  
4495 4495          /*
4496 4496           * free up the VSD
4497 4497           */
4498 4498          kmem_free(vsd->vs_value, vsd->vs_nkeys * sizeof (void *));
4499 4499          kmem_free(vsd, sizeof (struct vsd_node));
4500 4500          vp->v_vsd = NULL;
4501 4501  }
4502 4502  
4503 4503  /*
4504 4504   * realloc
4505 4505   */
4506 4506  static void *
4507 4507  vsd_realloc(void *old, size_t osize, size_t nsize)
4508 4508  {
4509 4509          void *new;
4510 4510  
4511 4511          new = kmem_zalloc(nsize, KM_SLEEP);
4512 4512          if (old) {
4513 4513                  bcopy(old, new, osize);
4514 4514                  kmem_free(old, osize);
4515 4515          }
4516 4516          return (new);
4517 4517  }
4518 4518  
4519 4519  /*
4520 4520   * Setup the extensible system attribute for creating a reparse point.
4521 4521   * The symlink data 'target' is validated for proper format of a reparse
4522 4522   * string and a check also made to make sure the symlink data does not
4523 4523   * point to an existing file.
4524 4524   *
4525 4525   * return 0 if ok else -1.
4526 4526   */
4527 4527  static int
4528 4528  fs_reparse_mark(char *target, vattr_t *vap, xvattr_t *xvattr)
4529 4529  {
4530 4530          xoptattr_t *xoap;
4531 4531  
4532 4532          if ((!target) || (!vap) || (!xvattr))
4533 4533                  return (-1);
4534 4534  
4535 4535          /* validate reparse string */
4536 4536          if (reparse_validate((const char *)target))
4537 4537                  return (-1);
4538 4538  
4539 4539          xva_init(xvattr);
4540 4540          xvattr->xva_vattr = *vap;
4541 4541          xvattr->xva_vattr.va_mask |= AT_XVATTR;
4542 4542          xoap = xva_getxoptattr(xvattr);
4543 4543          ASSERT(xoap);
4544 4544          XVA_SET_REQ(xvattr, XAT_REPARSE);
4545 4545          xoap->xoa_reparse = 1;
4546 4546  
4547 4547          return (0);
4548 4548  }
4549 4549  
4550 4550  /*
4551 4551   * Function to check whether a symlink is a reparse point.
4552 4552   * Return B_TRUE if it is a reparse point, else return B_FALSE
4553 4553   */
4554 4554  boolean_t
4555 4555  vn_is_reparse(vnode_t *vp, cred_t *cr, caller_context_t *ct)
4556 4556  {
4557 4557          xvattr_t xvattr;
4558 4558          xoptattr_t *xoap;
4559 4559  
4560 4560          if ((vp->v_type != VLNK) ||
4561 4561              !(vfs_has_feature(vp->v_vfsp, VFSFT_XVATTR)))
4562 4562                  return (B_FALSE);
4563 4563  
4564 4564          xva_init(&xvattr);
4565 4565          xoap = xva_getxoptattr(&xvattr);
4566 4566          ASSERT(xoap);
4567 4567          XVA_SET_REQ(&xvattr, XAT_REPARSE);
4568 4568  
4569 4569          if (VOP_GETATTR(vp, &xvattr.xva_vattr, 0, cr, ct))
4570 4570                  return (B_FALSE);
4571 4571  
4572 4572          if ((!(xvattr.xva_vattr.va_mask & AT_XVATTR)) ||
4573 4573              (!(XVA_ISSET_RTN(&xvattr, XAT_REPARSE))))
4574 4574                  return (B_FALSE);
4575 4575  
4576 4576          return (xoap->xoa_reparse ? B_TRUE : B_FALSE);
4577 4577  }

↓ open down ↓

4195 lines elided

↑ open up ↑

XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX