Print this page
Update from fsd_sep3 webrev to fsd_sep9

Split Close
Expand all
Collapse all
          --- old/usr/src/uts/common/fs/fsh.c
          +++ new/usr/src/uts/common/fs/fsh.c
↓ open down ↓ 27 lines elided ↑ open up ↑
  28   28  #include <sys/vnode.h>
  29   29  
  30   30  /*
  31   31   * Filesystem hook framework (fsh)
  32   32   *
  33   33   * 1. Abstract.
  34   34   * The main goal of the filesystem hook framework is to provide an easy way to
  35   35   * inject client-defined behaviour into vfs/vnode calls. fsh works on
  36   36   * vfs_t granularity.
  37   37   *
       38 + * Note: In this document, both an fsh_t structure and hooking function for a
       39 + * vnodeop/vfsop is referred to as *hook*.
  38   40   *
       41 + *
  39   42   * 2. Overview.
  40   43   * fsh_t is the main object in the fsh. An fsh_t is a structure containing:
  41      - *      - pointers to hooking functions (named after corresponding
  42      - *      vnodeops/vfsops)
  43      - *      - a pointer to an argument to pass (this is shared for all the
  44      - *      hooks in a given fsh_t)
  45      - *      - a pointer to the *hook remove callback* - it's being fired after a
  46      - *      hook is removed and the hook has stopped executing. It's safe to destroy
  47      - *      any data associated with this hook.
       44 + *      - pointers to hooking functions
       45 + *      - an argument to pass (this is shared for all the hooks in a given
       46 + *      fsh_t)
       47 + *      - a pointer to the *hook remove callback*
  48   48   *
  49   49   * The information from fsh_t is copied by the fsh and an fsh_handle_t
  50   50   * is returned. It should be used for further removing.
  51   51   *
  52   52   *
  53   53   * 3. Usage.
  54      - * It is expected that vfs_t/vnode_t that are passed to fsh_foo() functions
  55      - * are held by the caller when needed. fsh does no vfs_t/vnode_t locking.
       54 + * It is expected that vfs_t/vnode_t passed to fsh_foo() functions are held by
       55 + * the caller when needed. fsh does no vfs_t/vnode_t locking.
  56   56   *
  57      - * fsh_t is a structure filled out by the client. If a client does not want
  58      - * to add/remove a hook for function foo(), he should fill the foo field of
  59      - * fsh_t with NULL. Every hook has a type of corresponding vfsop/vnodeop with
  60      - * two additional arguments:
  61      - *      - fsh_int_t *fsh_int - this argument MUST be passed to
  62      - *      hook_next_foo(). fsh wouldn't know which hook to execute next
  63      - *      without it
  64      - *      - void *arg - this is the argument passed with fsh_t during
  65      - *      installation
  66      - *      - void (*remove_cb)(void *, fsh_handle_t) - hook remove callback
  67      - *      (mentioned earlier); it's first argument is arg, the second is the
  68      - *      handle
       57 + * fsh_t is a structure filled out by the client. It contains:
       58 + *      - pointers to hooking functions
       59 + *      - the argument passed to the hooks
       60 + *      - the *hook remove callback*
  69   61   *
       62 + * If a client does not want to add a hook for function foo(), he should fill
       63 + * corresponding fields with NULLs. For every vfsop/vnodeop there are two
       64 + * fields: pre_foo() and post_foo(). These are the functions called before and
       65 + * after the next hook or underlying vfsop/vnodeop.
       66 + *
       67 + * Pre hooks take:
       68 + *      - arg
       69 + *      - pointer to a field containing void* - it should be filled whenever
       70 + *      the client wants to have some data shared by the pre and post hooks in
       71 + *      the same syscall execution. This is called the *instance data*.
       72 + *      - pointers to the arguments passed to the underlying vfsop/vnodeop
       73 + * Pre hooks return void.
       74 + *
       75 + * Post hooks take:
       76 + *      - value returned by the previous post hook or underlying vfsop/vnodeop
       77 + *      - arg
       78 + *      - pointer to the *instance data*
       79 + *      - arguments passed to the underlying vfsop/vnodeop
       80 + * Post hooks return an int, which should be treated as the vfsop/vnodeop
       81 + * return value.
       82 + * Memory allocated by pre hook must be deallocated by the post hook.
       83 + *
       84 + * Execution path of hooks A, B, C is as follows:
       85 + * foo()
       86 + *      preA(argA, &instancepA, ...);
       87 + *      preB(argB, &instancepB, ...);
       88 + *      preC(argC, &instancepC, ...);
       89 + *      ret = VOP_FOO();
       90 + *      ret = postC(ret, argC, instancepC, ...);
       91 + *      ret = postB(ret, argB, instancepB, ...);
       92 + *      ret = postC(ret, argA, instancepA, ...);
       93 + *      return (ret);
       94 + *
  70   95   * After installation, an fsh_handle_t is returned to the caller.
  71   96   *
  72      - * Every hook function is responsible for passing the control to the next
  73      - * hook associated with a particular call. In order to provide an easy way to
  74      - * modify the behaviour of a function call both before and after the
  75      - * underlying vfsop/vnodeop (or next hook) execution, a hook has to call
  76      - * fsh_next_foo() at some point. This function does necessary internal
  77      - * operations and calls the next hook, until there's no hook left, then it
  78      - * calls the underlying vfsop/vnodeop.
  79      - * Example:
  80      - * my_freefs(fsh_int_t *fsh_int, void *arg, vfs_t *vfsp) {
  81      - *      cmn_err(CE_NOTE, "freefs called!\n");
  82      - *      return (fsh_next_freefs(fsh_int, vfsp));
  83      - * }
       97 + * Hook remove callback - it's a function being fired after a hook is removed
       98 + * and no thread is going to execute it anymore. It's safe to destroy all the
       99 + * data associated with this hook inside it.
  84  100   *
      101 + * It is guaranteed, that whenever a pre_hook() is called, there will be also
      102 + * post_hook() called within the same syscall.
  85  103   *
  86      - * A client might want to fire callbacks when vfs_t's are being mounted
      104 + * If a hook (HNew) is installed/removed on/from a vfs_t within execution of
      105 + * another hook (HExec) installed on this vfs_t, the syscall that executes
      106 + * HExec won't fire HNew.
      107 + *
      108 + * A client might want to fire callbacks when vfs_ts are being mounted
  87  109   * or freed. There's an fsh_callback_t structure provided to install such
  88  110   * callbacks along with the API.
  89  111   * It is legal to call fsh_hook_{install,remove}() inside a mount callback
  90  112   * WITHOUT holding the vfs_t.
  91  113   *
  92  114   * After vfs_t's free callback returns, all the handles associated with the
  93  115   * hooks installed on this vfs_t are invalid and must not be used.
  94  116   *
  95      - *
  96  117   * 4. API
  97  118   * None of the APIs should be called during interrupt context above lock
  98      - * level. The only exceptions are fsh_next_foo() functions, which do not use
  99      - * locks.
      119 + * level.
 100  120   *
 101  121   * a) fsh.h
 102      - * Any of these functions could be called inside a hook or a hook remove
 103      - * callback.
 104      - * fsh_callback_{install,remove}() must not be called inside a {mount,free}
 105      - * callback. Doing so will cause a deadlock. Other functions can be called
 106      - * inside {mount,free} callbacks.
      122 + * Any of these functions could be called in a hook or a hook remove callback.
      123 + * The only functions that must not be called inside a {mount,free} callback are
      124 + * fsd_callback_{install,remove}. Using them will cause a deadlock.
 107  125   *
      126 + *
 108  127   * fsh_fs_enable(vfs_t *vfsp)
 109  128   * fsh_fs_disable(vfs_t *vfsp)
 110  129   *      Enables/disables fsh for a given vfs_t.
 111  130   *
 112  131   * fsh_hook_install(vfs_t *vfsp, fsh_t *hooks)
 113  132   *      Installs hooks on vfsp filesystem.
 114  133   *      It's important that hooks are executed in LIFO installation order,
 115  134   *      which means that if there are hooks A and B installed in this order, B
 116  135   *      is going to be executed before A.
 117  136   *      It returns a correct handle, or (-1) if hook/callback limit exceeded.
↓ open down ↓ 3 lines elided ↑ open up ↑
 121  140   * fsh_hook_remove(fsh_handle_t handle)
 122  141   *      Removes a hook and invalidates the handle.
 123  142   *      It is guaranteed that after this funcion returns, calls to
 124  143   *      vnodeops/vfsops won't go through this hook, although there might be
 125  144   *      some threads still executing this hook. When hook remove callback is
 126  145   *      fired, it is guaranteed that the hook won't be executed anymore. It is
 127  146   *      safe to remove all the internal data associated with this hook inside
 128  147   *      the hook remove callback. The hook remove callback could be called
 129  148   *      inside fsh_hook_remove().
 130  149   *
 131      - * fsh_next_foo(fsh_int_t *fsh_int, void *arg, ARGUMENTS)
 132      - *      This is the function which should be called once in every hook. It
 133      - *      does the necessary internal operations and passes control to the
 134      - *      next hook or, if there's no hook left, to the underlying
 135      - *      vfsop/vnodeop.
 136  150   *
 137  151   * fsh_callback_install(fsh_callback_t *callback)
 138  152   * fsh_callback_remove(fsh_callback_handle_t handle)
 139  153   *      Installs/removes callbacks for vfs_t mount/free. The mount callback
 140  154   *      is executed right before domount() returns. The free callback is
 141  155   *      called right before VFS_FREEVFS() is called.
 142  156   *      The fsh_callback_install() returns a correct handle, or (-1) if
 143  157   *      hook/callback limit exceeded.
 144  158   *
      159 + *
 145  160   * b) fsh_impl.h (for vfs.c and vnode.c only)
 146  161   * fsh_init()
 147  162   *      This call has to be done in vfsinit(). It initialises the fsh. It
 148  163   *      is absolutely necessary that this call is made before any other fsh
 149  164   *      operation.
 150  165   *
 151  166   * fsh_exec_mount_callbacks(vfs_t *vfsp)
 152  167   * fsh_exec_free_callbacks(vfs_t *vfsp)
 153  168   *      Used to execute all fsh callbacks for {mount,free} of a vfs_t.
 154  169   *
 155  170   * fsh_fsrec_destroy(struct fsh_fsrecord *fsrecp)
 156  171   *      Destroys an fsh_fsrecord structure. All the hooks installed on this
 157  172   *      vfs_t are then destroyed. free callback is called before this function.
 158  173   *
 159  174   * fsh_foo(ARGUMENTS)
 160      - *      Function used to start executing the hook chain for a given call.
      175 + *      Function used to execute the hook chain for a given syscall.
 161  176   *
 162  177   *
 163  178   * 5. Internals.
 164  179   * fsh_int_t is an internal hook structure. It is reference counted.
 165  180   * fshi_hold() and fshi_rele() should be used whenever needed.
 166  181   * fsh_int_t entries are elements of both fsh_map (global) and fshfsr_list
 167  182   * (local to vfs_t). All entries are unique and are identified by fshi_handle.
 168  183   *
 169  184   * fsh_int_t properties:
 170  185   *      - fsh_hook_install() sets the ref. counter to 1 and adds it to both
↓ open down ↓ 11 lines elided ↑ open up ↑
 182  197   *      that there is no thread executing the hook
 183  198   *
 184  199   *
 185  200   * fsh_fsrecord_t is a structure which lives inside a vfs_t.
 186  201   * fsh_fsrecord_t contains:
 187  202   *      - an rw-lock that protects the structure
 188  203   *      - a list of hooks installed on this vfs_t
 189  204   *      - a flag which tells whether fsh is enabled on this vfs_t
 190  205   *
 191  206   *
 192      - * fsh_prepare_fsrec rule:
      207 + * fsh_fsrec_prepare rule:
 193  208   * Every function that needs vfsp->vfs_fshrecord has to call
 194      - * fsh_prepare_fsrec() first. If and only if the call is made, it is safe to
      209 + * fsh_fsrec_prepare() first. If and only if the call is made, it is safe to
 195  210   * use vfsp->vfs_fshrecord.
 196  211   *
 197  212   * Unfortunately, because of unexpected behaviour of some filesystems (no use
 198  213   * of vfs_alloc()/vfs_init()) there's no good place to initialise the
 199  214   * fsh_fshrecord_t structure. The approach being used here is to check if it's
 200  215   * initialised in every call. Because of the fact that no lock could be used
 201  216   * here (the same problem with initialisation), a spinlock is used.  This is
 202      - * explained in more detail in a comment before fsh_prepare_fsrec(). After
      217 + * explained in more detail in a comment before fsh_fsrec_prepare(). After
 203  218   * calling fsh_preapre_fsrec() it's completely safe to keep the vfs_fshrecord
 204  219   * pointer locally, because it won't be changed until vfs_free() is called.
 205  220   *
 206      - * The only exception from the fsh_prepare_fsrec() rule is vfs_free(),
 207      - * where there is expected that no other fsh calls would be made for the
      221 + * Exceptions from this rule:
      222 + * - vfs_free() - it is expected that no other fsh calls would be made for the
 208  223   * vfs_t that's being freed. That's why vfs_fshrecord could be only NULL or a
 209  224   * valid pointer and could not be concurrently accessed.
      225 + * - fshi_rele() - fsh_hook_install() comes before first fshi_rele() call;
      226 + * the fsh_fsrecord_t has been initialised there
 210  227   *
      228 + *
 211  229   * When there are no fsh functions (that use a particular fsh_fsrecord_t)
 212  230   * executing, the vfs_fshrecord pointer won't be equal to fsh_res_ptr. It
 213  231   * would be NULL or a pointer to an initialised fsh_fsrecord_t.
 214  232   *
      233 + * It is required and sufficient to check if fsh_fsrecord_t is not NULL before
      234 + * passing it to fsh_fsrec_destroy. We don't have to check if it is not equal
      235 + * to fsh_res_ptr, because all the fsh API calls involving this vfs_t should
      236 + * end before vfs_free() is called (outside the fsh, fsh_fsrecord is never
      237 + * equal to fsh_res_ptr). That is guaranteed by the explicit requirement that
      238 + * the caller of fsh API holds the vfs_t when needed. fsh_hook_remove() must not
      239 + * be called either, because the handles are invalidated after free callback has
      240 + * fired.
 215  241   *
      242 + *
 216  243   * Callbacks:
 217  244   * Mount callbacks are executed by a call to fsh_exec_mount_callbacks() right
 218  245   * before returning from domount()@vfs.c.
 219  246   *
 220  247   * Free callbacks are executed by a call to fsh_exec_free_callbacks() right
 221  248   * before calling VFS_FREEVFS(), after vfs_t's reference count drops to 0.
 222  249   *
 223  250   *
 224      - * fsh_next_foo(fsh_int_t *fshi, ARGUMENTS)
 225      - *      This function is quite simple. It takes the fsh_int_t and passes control
 226      - *      to the next hook or to the underlying vnodeop/vfsop.
 227      - *
 228      - *
 229  251   * 6. Locking
 230  252   * a) public
 231  253   * fsh does no vfs_t nor vnode_t locking. It is expected that whenever it is
 232  254   * needed, the client does that.
 233  255   *
 234      - * fsh_callback_{install,remove} must not be called inside a callback, because
 235      - * it will cause a deadlock.
      256 + * No locks are held across hooks or hook remove callbacks execution. It is
      257 + * safe to use fsh API inside hooks and hook remove callbacks.
 236  258   *
 237      - * b) internal
      259 + * fsh_cb_lock is held across {mount,free} callbacks. Calling
      260 + * fsh_callback_{install,remove} inside of a callback will cause a deadlock.
      261 + *
      262 + * b) internals
 238  263   * Locking diagram:
 239  264   *
 240      - *     fsh_hook_install()    fsh_hook_remove()   fsh_fsrec_destroy()
 241      - *           |                     |                |
 242      - *           |                     |                |
 243      - *           +------------------+  |   +------------+
 244      - *                              |  |   |
 245      - *                              V  V   V
 246      - *                              fsh_lock
 247      - *                                 |   |
 248      - *                                 |   +----- fshfsr_lock, RW_WRITER ---+
 249      - *                                 |                                    |
 250      - *                                 V                                    |
 251      - *               +---------------------------------------+              |
 252      - *               |               fsh_map                 |              |
 253      - *               |                                       |              |
 254      - *          +----|-> vfsp->vfs_fshrecord->fshfsr_list <--|--------------+
      265 + *     fsh_hook_remove()          fsh_hook_install()   fsh_fsrec_destroy()
      266 + *           |                            |                |
      267 + *           |                            |                |
      268 + *           +------------------+         |   +------------+
      269 + *           |                  |         |   |
      270 + *           |                  V         |   |
      271 + *           V               +------------|---|-+
      272 + *      fshi_rele()          |  fsh_lock  |   | |
      273 + *      (sometimes)          +------------|---|-+
      274 + *                                 |      |   |
      275 + *                                 |      +---+-- fshfsr_lock, RW_WRITER -+
      276 + *                                 |                                      |
      277 + *                                 V                                      |
      278 + *               +---------------------------------------+                |
      279 + *               |               fsh_map                 |                |
      280 + *               |                                       |                |
      281 + *          +----|-> vfsp->vfs_fshrecord->fshfsr_list <--|----------------+
 255  282   *          |    +------------------------------^--------+
 256  283   *          |                                   |
 257  284   *          |                                   |
 258  285   * fshfsr_lock, RW_READER              fshfsr_lock, RW_WRITER
 259  286   *          |                                   |
 260  287   *          |                                   |
 261  288   *   fsh_read(),                            fshi_rele()
 262  289   *   fsh_write(),
 263      - *   ...,                               Might be called from:
 264      - *   fsh_next_read(),                    fsh_hook_remove()
 265      - *   fsh_next_write(),                   fsh_read(), fsh_write(), ...
 266      - *   ...                                 fsh_next_read(), fsh_next_write(), ...
      290 + *   ...                                Might be called from:
      291 + *                                        fsh_hook_remove()
      292 + *                                        fsh_read(), fsh_write(), ...
 267  293   *
      294 + *
 268  295   * fsh_lock is a global lock for adminsitrative path (fsh_hook_install,
 269  296   * fsh_hook_remove) and fsh_fsrec_destroy() (which is semi-administrative, since
 270  297   * it destroys the unremoved hooks). It is used only when fsh_map needs to be
 271  298   * locked. The usage of this lock guarantees that the data in fsh_map and
 272  299   * fshfsr_lists is consistent.
      300 + *
      301 + * In order to make calling callbacks inside callbacks possible, fsh_cb_owner is
      302 + * set by fsh_exec_{mount,free} callbacks to the thread that owns the
      303 + * fsh_cb_lock.  It's always checked if we are owners of the mutex before
      304 + * entering it.
      305 + *
 273  306   */
 274  307  
 275  308  
 276  309  /* Internals */
 277      -struct fsh_int {
      310 +typedef struct fsh_int {
 278  311          fsh_handle_t    fshi_handle;
 279  312          fsh_t           fshi_hooks;
 280  313          vfs_t           *fshi_vfsp;
 281  314  
 282  315          kmutex_t        fshi_lock;
 283  316          uint64_t        fshi_ref;
 284  317          uint64_t        fshi_doomed;    /* changed inside fsh_lock */
 285  318  
 286  319          /* next node in fshfsr_list */
 287      -        list_node_t     fshi_next;
      320 +        list_node_t     fshi_node;
 288  321  
 289  322          /* next node in fsh_map */
 290  323          list_node_t     fshi_global;
 291      -};
      324 +} fsh_int_t;
 292  325  
 293  326  typedef struct fsh_callback_int {
 294  327          fsh_callback_t  fshci_cb;
 295  328          fsh_callback_handle_t fshci_handle;
 296      -        list_node_t     fshci_next;
      329 +        list_node_t     fshci_node;
 297  330  } fsh_callback_int_t;
 298  331  
 299  332  
      333 +typedef struct fsh_exec {
      334 +        fsh_int_t       *fshe_fshi;
      335 +        void            *fshe_instance;
      336 +        list_node_t     fshe_node;
      337 +} fsh_exec_t;
      338 +
      339 +
 300  340  static kmutex_t fsh_lock;
 301  341  
 302  342  /*
 303  343   * fsh_fsrecord_t is the main internal structure. It's content is protected
 304  344   * by fshfsr_lock. The fshfsr_list is a list of fsh_int_t hook entries for
 305  345   * the vfs_t that contains the fsh_fsrecord_t.
 306  346   */
 307  347  struct fsh_fsrecord {
 308  348          krwlock_t       fshfsr_lock;
 309  349          int             fshfsr_enabled;
↓ open down ↓ 1 lines elided ↑ open up ↑
 311  351  };
 312  352  
 313  353  /*
 314  354   * Global list of fsh_int_t. Protected by fsh_lock.
 315  355   */
 316  356  static list_t fsh_map;
 317  357  
 318  358  /*
 319  359   * Global list of fsh_callback_int_t.
 320  360   */
 321      -static krwlock_t fsh_cblist_lock;
      361 +static kmutex_t fsh_cb_lock;
      362 +static kmutex_t fsh_cb_owner_lock;
      363 +static kthread_t *fsh_cb_owner;
 322  364  static list_t fsh_cblist;
 323  365  
 324  366  /*
 325  367   * A reserved pointer for fsh purposes. It is used because of the method
 326  368   * chosen for solving concurrency issues with vfs_fshrecord. The full
 327  369   * explanation is in the big theory statement at the beginning of this
 328  370   * file and above fsh_fsrec_prepare(). It is initialised in fsh_init().
 329  371   */
 330  372  static void *fsh_res_ptr;
 331  373  
 332  374  static fsh_fsrecord_t *fsh_fsrec_create();
 333  375  
 334  376  int fsh_limit = INT_MAX;
 335  377  static id_space_t *fsh_idspace;
 336  378  
 337  379  /*
 338      - * fsh_prepare_fsrec()
      380 + * fsh_fsrec_prepare()
 339  381   *
 340  382   * Important note:
 341  383   * Before using this function, fsh_init() MUST be called. We do that in
 342  384   * vfsinit()@vfs.c.
 343  385   *
 344  386   * One would ask, why isn't the vfsp->vfs_fshrecord initialised when the
 345  387   * vfs_t is created. Unfortunately, some filesystems (e.g. fifofs) do not
 346  388   * call vfs_init() or even vfs_alloc(), It's possible that some unbundled
 347  389   * filesystems could do the same thing. That's why this solution is
 348  390   * introduced. It should be called before any code that needs access to
↓ open down ↓ 10 lines elided ↑ open up ↑
 359  401   *      fsh_res_ptr. That's a signal for other threads, that the structure
 360  402   *      is being initialised.
 361  403   * b) if vfsp->vfs_fshrecord equals fsh_res_ptr, that means we have to wait,
 362  404   *      because vfs_fshrecord is being initialised by another call.
 363  405   * c) other cases:
 364  406   *      vfs_fshrecord is already initialised, so we can use it. It won't change
 365  407   *      until vfs_free() is called. It can't happen when someone is holding
 366  408   *      the vfs_t, which is expected from the caller of fsh API.
 367  409   */
 368  410  static void
 369      -fsh_prepare_fsrec(vfs_t *vfsp)
      411 +fsh_fsrec_prepare(vfs_t *vfsp)
 370  412  {
 371  413          fsh_fsrecord_t *fsrec;
 372  414  
 373  415          while ((fsrec = atomic_cas_ptr(&vfsp->vfs_fshrecord, NULL,
 374  416              fsh_res_ptr)) == fsh_res_ptr)
 375  417                  ;
 376  418  
 377  419          if (fsrec == NULL)
 378  420                  atomic_swap_ptr(&vfsp->vfs_fshrecord, fsh_fsrec_create());
 379  421  }
↓ open down ↓ 4 lines elided ↑ open up ↑
 384  426   * A newly created vfs_t has fsh enabled by default. If one would want to change
 385  427   * this behaviour, mount callbacks could be used.
 386  428   *
 387  429   * The caller is expected to hold the vfs_t.
 388  430   *
 389  431   * These functions must NOT be called in a hook.
 390  432   */
 391  433  void
 392  434  fsh_fs_enable(vfs_t *vfsp)
 393  435  {
 394      -        fsh_prepare_fsrec(vfsp);
      436 +        fsh_fsrec_prepare(vfsp);
 395  437  
 396  438          rw_enter(&vfsp->vfs_fshrecord->fshfsr_lock, RW_WRITER);
 397  439          vfsp->vfs_fshrecord->fshfsr_enabled = 1;
 398  440          rw_exit(&vfsp->vfs_fshrecord->fshfsr_lock);
 399  441  }
 400  442  
 401  443  void
 402  444  fsh_fs_disable(vfs_t *vfsp)
 403  445  {
 404      -        fsh_prepare_fsrec(vfsp);
      446 +        fsh_fsrec_prepare(vfsp);
 405  447  
 406  448          rw_enter(&vfsp->vfs_fshrecord->fshfsr_lock, RW_WRITER);
 407  449          vfsp->vfs_fshrecord->fshfsr_enabled = 0;
 408  450          rw_exit(&vfsp->vfs_fshrecord->fshfsr_lock);
 409  451  }
 410  452  
 411  453  /*
 412  454   * API used for installing hooks. fsh_handle_t is returned for further
 413  455   * actions (currently just removing) on this set of hooks.
 414  456   *
 415      - * fsh_t fields:
 416      - * - arg - argument passed to every hook
 417      - * - remove_cb - remove callback, called after a hook is removed and all the
 418      - *      threads stops executing it
 419      - * - read, write, ... - pointers to hooks for corresponding vnodeops/vfsops;
 420      - *      if there is no hook desired for an operation, it should be set to
 421      - *      NULL
 422      - *
 423  457   * It's important that the hooks are executed in LIFO installation order (they
 424  458   * are added to the head of the hook list).
 425  459   *
 426  460   * The caller is expected to hold the vfs_t.
 427  461   *
 428  462   * Returns (-1) if hook/callback limit exceeded, handle otherwise.
 429  463   */
 430  464  fsh_handle_t
 431  465  fsh_hook_install(vfs_t *vfsp, fsh_t *hooks)
 432  466  {
 433  467          fsh_handle_t    handle;
 434  468          fsh_int_t       *fshi;
 435  469  
 436      -        fsh_prepare_fsrec(vfsp);
      470 +        fsh_fsrec_prepare(vfsp);
 437  471  
 438  472          if ((handle = id_alloc(fsh_idspace)) == -1)
 439  473                  return (-1);
 440  474  
 441  475          fshi = kmem_alloc(sizeof (*fshi), KM_SLEEP);
 442  476          mutex_init(&fshi->fshi_lock, NULL, MUTEX_DRIVER, NULL);
 443  477          (void) memcpy(&fshi->fshi_hooks, hooks, sizeof (fshi->fshi_hooks));
 444  478          fshi->fshi_handle = handle;
 445  479          fshi->fshi_doomed = 0;
 446  480          fshi->fshi_ref = 1;
↓ open down ↓ 45 lines elided ↑ open up ↑
 492  526          } else {
 493  527                  destroy = 0;
 494  528          }
 495  529          mutex_exit(&fshi->fshi_lock);
 496  530  
 497  531          if (destroy) {
 498  532                  /*
 499  533                   * At this point, we are sure that fsh_hook_remove() has been
 500  534                   * called, that's why we don't remove the fshi from fsh_map.
 501  535                   * fsh_hook_remove() did that already.
      536 +                 * There is also no need to call fsh_fsrec_prepare() here.
 502  537                   */
 503  538                  fsh_fsrecord_t *fsrecp;
 504  539  
 505      -                if (fshi->fshi_hooks.remove_cb != NULL)
 506      -                        (*fshi->fshi_hooks.remove_cb)(
 507      -                            fshi->fshi_hooks.arg, fshi->fshi_handle);
 508  540                  /*
 509      -                 * We don't have to call fsh_prepare_fsrec() here.
      541 +                 * We don't have to call fsh_fsrec_prepare() here.
 510  542                   * fsh_fsrecord_t is already initialised, because we've found a
 511  543                   * mapping for the given handle.
 512  544                   */
 513  545                  fsrecp = fshi->fshi_vfsp->vfs_fshrecord;
 514  546                  ASSERT(fsrecp != NULL);
 515  547                  ASSERT(fsrecp != fsh_res_ptr);
 516  548  
 517  549                  rw_enter(&fsrecp->fshfsr_lock, RW_WRITER);
 518  550                  list_remove(&fsrecp->fshfsr_list, fshi);
 519  551                  rw_exit(&fsrecp->fshfsr_lock);
 520  552  
      553 +                if (fshi->fshi_hooks.remove_cb != NULL)
      554 +                        (*fshi->fshi_hooks.remove_cb)(
      555 +                            fshi->fshi_hooks.arg, fshi->fshi_handle);
      556 +
 521  557                  id_free(fsh_idspace, fshi->fshi_handle);
 522  558                  mutex_destroy(&fshi->fshi_lock);
 523  559                  kmem_free(fshi, sizeof (*fshi));
 524  560          }
 525  561  }
 526  562  
 527  563  /*
 528  564   * Used for removing a hook set.
 529  565   *
 530  566   * fsh_hook_remove() invalidates the given handle.
↓ open down ↓ 47 lines elided ↑ open up ↑
 578  614   * fsh_callback_t fields:
 579  615   * fshc_arg - argument passed to the callbacks
 580  616   * fshc_free - callback fired before VFS_FREEVFS() is called, after vfs_count
 581  617   *      drops to 0
 582  618   * fshc_mount - callback fired right before returning from domount()
 583  619   * The first argument of these callbacks is the vfs_t that is mounted/freed.
 584  620   * The second one is the fshc_arg.
 585  621   *
 586  622   * fsh_callback_handle_t is filled out by this function.
 587  623   *
 588      - * This function must NOT be called in a callback, because it will cause
 589      - * a deadlock.
 590      - *
 591  624   * Returns (-1) if hook/callback limit exceeded.
      625 + *
      626 + * Calling this function in a {mount,free} callback will cause a deadlock.
 592  627   */
 593  628  fsh_callback_handle_t
 594  629  fsh_callback_install(fsh_callback_t *callback)
 595  630  {
 596  631          fsh_callback_int_t *fshci;
 597  632          fsh_callback_handle_t handle;
 598  633  
 599  634          if ((handle = id_alloc(fsh_idspace)) == -1)
 600  635                  return (-1);
 601  636  
 602  637          fshci = (fsh_callback_int_t *)kmem_alloc(sizeof (*fshci), KM_SLEEP);
 603  638          (void) memcpy(&fshci->fshci_cb, callback, sizeof (fshci->fshci_cb));
 604  639          fshci->fshci_handle = handle;
 605  640  
 606      -        /* If it is called in a {mount,free} callback, causes deadlock. */
 607      -        rw_enter(&fsh_cblist_lock, RW_WRITER);
      641 +        mutex_enter(&fsh_cb_lock);
 608  642          list_insert_head(&fsh_cblist, fshci);
 609      -        rw_exit(&fsh_cblist_lock);
      643 +        mutex_exit(&fsh_cb_lock);
 610  644  
 611  645          return (handle);
 612  646  }
 613  647  
 614  648  /*
 615  649   * API for removing global mount/free callbacks.
 616  650   *
 617      - * This function must NOT be called in a callback, because it will cause
 618      - * a deadlock.
 619      - *
 620  651   * Returns (-1) if callback wasn't found, 0 otherwise.
      652 + *
      653 + * Calling this function in a {mount,free} callback will cause a deadlock.
 621  654   */
 622  655  int
 623  656  fsh_callback_remove(fsh_callback_handle_t handle)
 624  657  {
 625  658          fsh_callback_int_t *fshci;
 626  659  
 627      -        /* If it is called in a {mount,free} callback, causes deadlock. */
 628      -        rw_enter(&fsh_cblist_lock, RW_WRITER);
      660 +        mutex_enter(&fsh_cb_lock);
      661 +
 629  662          for (fshci = list_head(&fsh_cblist); fshci != NULL;
 630  663              fshci = list_next(&fsh_cblist, fshci)) {
 631  664                  if (fshci->fshci_handle == handle) {
 632  665                          list_remove(&fsh_cblist, fshci);
 633  666                          break;
 634  667                  }
 635  668          }
 636      -        rw_exit(&fsh_cblist_lock);
 637  669  
      670 +        mutex_exit(&fsh_cb_lock);
      671 +
 638  672          if (fshci == NULL)
 639  673                  return (-1);
 640  674  
 641  675          kmem_free(fshci, sizeof (*fshci));
 642  676          id_free(fsh_idspace, handle);
 643  677  
 644  678          return (0);
 645  679  }
 646  680  
 647  681  /*
↓ open down ↓ 4 lines elided ↑ open up ↑
 652  686   * Since fsh_exec_mount_callbacks() is called only inside domount(), it is legal
 653  687   * to call fsh_hook_{install,remove}() inside a mount callback WITHOUT holding
 654  688   * this vfs_t. This guarantee should be preserved, because it's in the "Usage"
 655  689   * section in the big theory statement at the top of this file.
 656  690   */
 657  691  void
 658  692  fsh_exec_mount_callbacks(vfs_t *vfsp)
 659  693  {
 660  694          fsh_callback_int_t *fshci;
 661  695          fsh_callback_t *cb;
      696 +        int fsh_context;
 662  697  
 663      -        rw_enter(&fsh_cblist_lock, RW_READER);
      698 +        mutex_enter(&fsh_cb_owner_lock);
      699 +        fsh_context = fsh_cb_owner == curthread;
      700 +        mutex_exit(&fsh_cb_owner_lock);
      701 +
      702 +        if (!fsh_context) {
      703 +                mutex_enter(&fsh_cb_lock);
      704 +                mutex_enter(&fsh_cb_owner_lock);
      705 +                fsh_cb_owner = curthread;
      706 +                mutex_exit(&fsh_cb_owner_lock);
      707 +        }
      708 +
      709 +        ASSERT(MUTEX_HELD(&fsh_cb_lock));
      710 +
 664  711          for (fshci = list_head(&fsh_cblist); fshci != NULL;
 665  712              fshci = list_next(&fsh_cblist, fshci)) {
 666  713                  cb = &fshci->fshci_cb;
 667  714                  if (cb->fshc_mount != NULL)
 668  715                          (*(cb->fshc_mount))(vfsp, cb->fshc_arg);
 669  716          }
 670      -        rw_exit(&fsh_cblist_lock);
      717 +
      718 +        if (!fsh_context) {
      719 +                mutex_enter(&fsh_cb_owner_lock);
      720 +                fsh_cb_owner = NULL;
      721 +                mutex_exit(&fsh_cb_owner_lock);
      722 +                mutex_exit(&fsh_cb_lock);
      723 +        }
 671  724  }
 672  725  
 673  726  /*
 674  727   * This function is executed right before VFS_FREEVFS() is called in
 675  728   * vfs_rele()@vfs.c. We are sure that it's called only after fsh_init().
 676  729   * It executes all the free callbacks installed in the fsh.
 677  730   *
 678  731   * free() callback is the point after the handles associated with the hooks
 679  732   * installed on this vfs_t become invalid
 680  733   */
 681  734  void
 682  735  fsh_exec_free_callbacks(vfs_t *vfsp)
 683  736  {
 684  737          fsh_callback_int_t *fshci;
 685  738          fsh_callback_t *cb;
      739 +        int fsh_context;
 686  740  
 687      -        rw_enter(&fsh_cblist_lock, RW_READER);
      741 +        mutex_enter(&fsh_cb_owner_lock);
      742 +        fsh_context = fsh_cb_owner == curthread;
      743 +        mutex_exit(&fsh_cb_owner_lock);
      744 +
      745 +        if (!fsh_context) {
      746 +                mutex_enter(&fsh_cb_lock);
      747 +                mutex_enter(&fsh_cb_owner_lock);
      748 +                fsh_cb_owner = curthread;
      749 +                mutex_exit(&fsh_cb_owner_lock);
      750 +        }
      751 +
      752 +        ASSERT(MUTEX_HELD(&fsh_cb_lock));
      753 +
 688  754          for (fshci = list_head(&fsh_cblist); fshci != NULL;
 689  755              fshci = list_next(&fsh_cblist, fshci)) {
 690  756                  cb = &fshci->fshci_cb;
 691  757                  if (cb->fshc_free != NULL)
 692  758                          (*(cb->fshc_free))(vfsp, cb->fshc_arg);
 693  759          }
 694      -        rw_exit(&fsh_cblist_lock);
      760 +
      761 +        if (!fsh_context) {
      762 +                mutex_enter(&fsh_cb_owner_lock);
      763 +                fsh_cb_owner = NULL;
      764 +                mutex_exit(&fsh_cb_owner_lock);
      765 +                mutex_exit(&fsh_cb_lock);
      766 +        }
 695  767  }
 696  768  
 697  769  /*
 698  770   * API for vnode.c/vfs.c to start executing the fsh for a given operation.
 699  771   *
 700  772   * fsh_xxx() tries to find the first non-NULL xxx hook on the fshfsr_list. If it
 701  773   * does, it executes it. If not, underlying vnodeop/vfsop is called.
 702  774   *
 703      - * These interfaces are using fsh_res_ptr (in fsh_prepare_fsrec()), so it's
      775 + * These interfaces are using fsh_res_ptr (in fsh_fsrec_prepare()), so it's
 704  776   * absolutely necessary to call fsh_init() before using them. That's done in
 705  777   * vfsinit().
 706  778   *
 707  779   * While these functions are executing, it's expected that necessary vfs_t's
 708  780   * are held so that vfs_free() isn't called. vfs_free() expects that noone
 709  781   * accesses vfs_fshrecord of a given vfs_t.
 710  782   * It's also the caller's responsibility to keep vnode_t passed to fsh_foo()
 711  783   * alive and valid.
 712  784   * All these expectations are met because these functions are used only in
 713  785   * correspondng {fop,fsop}_foo() functions.
 714  786   */
 715  787  int
 716  788  fsh_read(vnode_t *vp, uio_t *uiop, int ioflag, cred_t *cr,
 717  789          caller_context_t *ct)
 718  790  {
 719  791          int ret;
 720  792          fsh_fsrecord_t *fsrecp;
 721  793          fsh_int_t *fshi;
      794 +        fsh_exec_t *fshe;
      795 +        list_t exec_list;
 722  796  
 723      -        fsh_prepare_fsrec(vp->v_vfsp);
      797 +        fsh_fsrec_prepare(vp->v_vfsp);
 724  798          fsrecp = vp->v_vfsp->vfs_fshrecord;
 725  799  
 726  800          rw_enter(&fsrecp->fshfsr_lock, RW_READER);
 727  801          if (!(fsrecp->fshfsr_enabled)) {
 728  802                  rw_exit(&fsrecp->fshfsr_lock);
 729      -                return ((*(vp->v_op->vop_read))(vp, uiop, ioflag, cr, ct));
      803 +                return ((*vp->v_op->vop_read)(vp, uiop, ioflag, cr, ct));
 730  804          }
 731  805  
      806 +        list_create(&exec_list, sizeof (fsh_exec_t),
      807 +            offsetof(fsh_exec_t, fshe_node));
      808 +
 732  809          for (fshi = list_head(&fsrecp->fshfsr_list); fshi != NULL;
 733  810              fshi = list_next(&fsrecp->fshfsr_list, fshi)) {
 734      -                if (fshi->fshi_hooks.read != NULL)
 735      -                        if (fshi_hold(fshi))
 736      -                                break;
      811 +                if (fshi->fshi_hooks.pre_read != NULL ||
      812 +                    fshi->fshi_hooks.post_read != NULL) {
      813 +                        if (fshi_hold(fshi)) {
      814 +                                fshe = kmem_alloc(sizeof (*fshe), KM_SLEEP);
      815 +                                fshe->fshe_fshi = fshi;
      816 +                                list_insert_tail(&exec_list, fshe);
      817 +                        }
      818 +                }
 737  819          }
 738  820          rw_exit(&fsrecp->fshfsr_lock);
 739  821  
 740      -        if (fshi == NULL)
 741      -                return ((*(vp->v_op->vop_read))(vp, uiop, ioflag, cr, ct));
      822 +        /* Execute pre hooks */
      823 +        for (fshe = list_head(&exec_list); fshe != NULL;
      824 +            fshe = list_next(&exec_list, fshe)) {
      825 +                if (fshe->fshe_fshi->fshi_hooks.pre_read != NULL)
      826 +                        (*fshe->fshe_fshi->fshi_hooks.pre_read)(
      827 +                            fshe->fshe_fshi->fshi_hooks.arg,
      828 +                            &fshe->fshe_instance,
      829 +                            &vp, &uiop, &ioflag, &cr, &ct);
      830 +        }
 742  831  
 743      -        ret = (*fshi->fshi_hooks.read)(fshi, fshi->fshi_hooks.arg,
 744      -            vp, uiop, ioflag, cr, ct);
 745      -        fshi_rele(fshi);
      832 +        ret = (*vp->v_op->vop_read)(vp, uiop, ioflag, cr, ct);
      833 +
      834 +        /* Execute post hooks */
      835 +        while ((fshe = list_remove_tail(&exec_list)) != NULL) {
      836 +                if (fshe->fshe_fshi->fshi_hooks.post_read != NULL)
      837 +                        ret = (*fshe->fshe_fshi->fshi_hooks.post_read)(
      838 +                            ret, fshe->fshe_fshi->fshi_hooks.arg,
      839 +                            fshe->fshe_instance,
      840 +                            vp, uiop, ioflag, cr, ct);
      841 +                fshi_rele(fshe->fshe_fshi);
      842 +                kmem_free(fshe, sizeof (*fshe));
      843 +        }
      844 +        list_destroy(&exec_list);
      845 +
 746  846          return (ret);
 747  847  }
 748  848  
 749  849  int
 750  850  fsh_write(vnode_t *vp, uio_t *uiop, int ioflag, cred_t *cr,
 751  851          caller_context_t *ct)
 752  852  {
 753      -        fsh_int_t *fshi;
 754  853          int ret;
 755  854          fsh_fsrecord_t *fsrecp;
      855 +        fsh_int_t *fshi;
      856 +        fsh_exec_t *fshe;
      857 +        list_t exec_list;
 756  858  
 757      -        fsh_prepare_fsrec(vp->v_vfsp);
      859 +        fsh_fsrec_prepare(vp->v_vfsp);
 758  860          fsrecp = vp->v_vfsp->vfs_fshrecord;
 759  861  
 760  862          rw_enter(&fsrecp->fshfsr_lock, RW_READER);
 761      -        if (!(vp->v_vfsp->vfs_fshrecord->fshfsr_enabled)) {
      863 +        if (!(fsrecp->fshfsr_enabled)) {
 762  864                  rw_exit(&fsrecp->fshfsr_lock);
 763      -                return ((*(vp->v_op->vop_write))(vp, uiop, ioflag, cr, ct));
      865 +                return ((*vp->v_op->vop_write)(vp, uiop, ioflag, cr, ct));
 764  866          }
 765  867  
      868 +        list_create(&exec_list, sizeof (fsh_exec_t),
      869 +            offsetof(fsh_exec_t, fshe_node));
      870 +
 766  871          for (fshi = list_head(&fsrecp->fshfsr_list); fshi != NULL;
 767  872              fshi = list_next(&fsrecp->fshfsr_list, fshi)) {
 768      -                if (fshi->fshi_hooks.write != NULL)
 769      -                        if (fshi_hold(fshi))
 770      -                                break;
      873 +                if (fshi->fshi_hooks.pre_write != NULL ||
      874 +                    fshi->fshi_hooks.post_write != NULL) {
      875 +                        if (fshi_hold(fshi)) {
      876 +                                fshe = kmem_alloc(sizeof (*fshe), KM_SLEEP);
      877 +                                fshe->fshe_fshi = fshi;
      878 +                                list_insert_tail(&exec_list, fshe);
      879 +                        }
      880 +                }
 771  881          }
 772  882          rw_exit(&fsrecp->fshfsr_lock);
 773  883  
 774      -        if (fshi == NULL)
 775      -                return ((*(vp->v_op->vop_write))(vp, uiop, ioflag, cr, ct));
      884 +        /* Execute pre hooks */
      885 +        for (fshe = list_head(&exec_list); fshe != NULL;
      886 +            fshe = list_next(&exec_list, fshe)) {
      887 +                if (fshe->fshe_fshi->fshi_hooks.pre_write != NULL)
      888 +                        (*fshe->fshe_fshi->fshi_hooks.pre_write)(
      889 +                            fshe->fshe_fshi->fshi_hooks.arg,
      890 +                            &fshe->fshe_instance,
      891 +                            &vp, &uiop, &ioflag, &cr, &ct);
      892 +        }
 776  893  
 777      -        ret = (*fshi->fshi_hooks.write)(fshi, fshi->fshi_hooks.arg,
 778      -            vp, uiop, ioflag, cr, ct);
 779      -        fshi_rele(fshi);
      894 +        ret = (*vp->v_op->vop_write)(vp, uiop, ioflag, cr, ct);
      895 +
      896 +        /* Execute post hooks */
      897 +        while ((fshe = list_remove_tail(&exec_list)) != NULL) {
      898 +                if (fshe->fshe_fshi->fshi_hooks.post_write != NULL)
      899 +                        ret = (*fshe->fshe_fshi->fshi_hooks.post_write)(
      900 +                            ret, fshe->fshe_fshi->fshi_hooks.arg,
      901 +                            fshe->fshe_instance,
      902 +                            vp, uiop, ioflag, cr, ct);
      903 +                fshi_rele(fshe->fshe_fshi);
      904 +                kmem_free(fshe, sizeof (*fshe));
      905 +        }
      906 +        list_destroy(&exec_list);
      907 +
 780  908          return (ret);
 781  909  }
 782  910  
 783  911  int
 784  912  fsh_mount(vfs_t *vfsp, vnode_t *mvp, struct mounta *uap, cred_t *cr)
 785  913  {
      914 +        int ret;
 786  915          fsh_fsrecord_t *fsrecp;
 787  916          fsh_int_t *fshi;
 788      -        int ret;
      917 +        fsh_exec_t *fshe;
      918 +        list_t exec_list;
 789  919  
 790      -        fsh_prepare_fsrec(vfsp);
      920 +        fsh_fsrec_prepare(vfsp);
 791  921          fsrecp = vfsp->vfs_fshrecord;
 792  922  
 793  923          rw_enter(&fsrecp->fshfsr_lock, RW_READER);
 794  924          if (!(fsrecp->fshfsr_enabled)) {
 795  925                  rw_exit(&fsrecp->fshfsr_lock);
 796      -                return ((*(vfsp->vfs_op->vfs_mount))(vfsp, mvp, uap, cr));
      926 +                return ((*vfsp->vfs_op->vfs_mount)(vfsp, mvp, uap, cr));
 797  927          }
 798  928  
      929 +        list_create(&exec_list, sizeof (fsh_exec_t),
      930 +            offsetof(fsh_exec_t, fshe_node));
      931 +
 799  932          for (fshi = list_head(&fsrecp->fshfsr_list); fshi != NULL;
 800  933              fshi = list_next(&fsrecp->fshfsr_list, fshi)) {
 801      -                if (fshi->fshi_hooks.mount != NULL)
 802      -                        if (fshi_hold(fshi))
 803      -                                break;
      934 +                if (fshi->fshi_hooks.pre_mount != NULL ||
      935 +                    fshi->fshi_hooks.post_mount != NULL) {
      936 +                        if (fshi_hold(fshi)) {
      937 +                                fshe = kmem_alloc(sizeof (*fshe), KM_SLEEP);
      938 +                                fshe->fshe_fshi = fshi;
      939 +                                list_insert_tail(&exec_list, fshe);
      940 +                        }
      941 +                }
 804  942          }
 805  943          rw_exit(&fsrecp->fshfsr_lock);
 806  944  
 807      -        if (fshi == NULL)
 808      -                return ((*(vfsp->vfs_op->vfs_mount))(vfsp, mvp, uap, cr));
      945 +        /* Execute pre hooks */
      946 +        for (fshe = list_head(&exec_list); fshe != NULL;
      947 +            fshe = list_next(&exec_list, fshe)) {
      948 +                if (fshe->fshe_fshi->fshi_hooks.pre_mount != NULL)
      949 +                        (*fshe->fshe_fshi->fshi_hooks.pre_mount)(
      950 +                            &fshe->fshe_fshi->fshi_hooks.arg,
      951 +                            &fshe->fshe_instance,
      952 +                            &vfsp, &mvp, &uap, &cr);
      953 +        }
 809  954  
 810      -        ret = (*fshi->fshi_hooks.mount)(fshi, fshi->fshi_hooks.arg,
 811      -            vfsp, mvp, uap, cr);
 812      -        fshi_rele(fshi);
      955 +        ret = (*vfsp->vfs_op->vfs_mount)(vfsp, mvp, uap, cr);
      956 +
      957 +        /* Execute post hooks */
      958 +        while ((fshe = list_remove_tail(&exec_list)) != NULL) {
      959 +                if (fshe->fshe_fshi->fshi_hooks.post_mount != NULL)
      960 +                        ret = (*fshe->fshe_fshi->fshi_hooks.post_mount)(
      961 +                            ret, fshe->fshe_fshi->fshi_hooks.arg,
      962 +                            fshe->fshe_instance,
      963 +                            vfsp, mvp, uap, cr);
      964 +                fshi_rele(fshe->fshe_fshi);
      965 +                kmem_free(fshe, sizeof (*fshe));
      966 +        }
      967 +        list_destroy(&exec_list);
      968 +
 813  969          return (ret);
 814  970  }
 815  971  
 816  972  int
 817  973  fsh_unmount(vfs_t *vfsp, int flag, cred_t *cr)
 818  974  {
      975 +        int ret;
 819  976          fsh_fsrecord_t *fsrecp;
 820  977          fsh_int_t *fshi;
 821      -        int ret;
      978 +        fsh_exec_t *fshe;
      979 +        list_t exec_list;
 822  980  
 823      -        fsh_prepare_fsrec(vfsp);
      981 +        fsh_fsrec_prepare(vfsp);
 824  982          fsrecp = vfsp->vfs_fshrecord;
 825  983  
 826  984          rw_enter(&fsrecp->fshfsr_lock, RW_READER);
 827  985          if (!(fsrecp->fshfsr_enabled)) {
 828  986                  rw_exit(&fsrecp->fshfsr_lock);
 829      -                return ((*(vfsp->vfs_op->vfs_unmount))(vfsp, flag, cr));
      987 +                return ((*vfsp->vfs_op->vfs_unmount)(vfsp, flag, cr));
 830  988          }
 831  989  
      990 +        list_create(&exec_list, sizeof (fsh_exec_t),
      991 +            offsetof(fsh_exec_t, fshe_node));
      992 +
 832  993          for (fshi = list_head(&fsrecp->fshfsr_list); fshi != NULL;
 833  994              fshi = list_next(&fsrecp->fshfsr_list, fshi)) {
 834      -                if (fshi->fshi_hooks.unmount != NULL)
 835      -                        if (fshi_hold(fshi))
 836      -                                break;
      995 +                if (fshi->fshi_hooks.pre_unmount != NULL ||
      996 +                    fshi->fshi_hooks.post_unmount != NULL) {
      997 +                        if (fshi_hold(fshi)) {
      998 +                                fshe = kmem_alloc(sizeof (*fshe), KM_SLEEP);
      999 +                                fshe->fshe_fshi = fshi;
     1000 +                                list_insert_tail(&exec_list, fshe);
     1001 +                        }
     1002 +                }
 837 1003          }
 838 1004          rw_exit(&fsrecp->fshfsr_lock);
 839 1005  
 840      -        if (fshi == NULL)
 841      -                return ((*(vfsp->vfs_op->vfs_unmount))(vfsp, flag, cr));
     1006 +        /* Execute pre hooks */
     1007 +        for (fshe = list_head(&exec_list); fshe != NULL;
     1008 +            fshe = list_next(&exec_list, fshe)) {
     1009 +                if (fshe->fshe_fshi->fshi_hooks.pre_unmount != NULL)
     1010 +                        (*fshe->fshe_fshi->fshi_hooks.pre_unmount)(
     1011 +                            fshe->fshe_fshi->fshi_hooks.arg,
     1012 +                            &fshe->fshe_instance,
     1013 +                            &vfsp, &flag, &cr);
     1014 +        }
 842 1015  
 843      -        ret = (*fshi->fshi_hooks.unmount)(fshi, fshi->fshi_hooks.arg,
 844      -            vfsp, flag, cr);
 845      -        fshi_rele(fshi);
     1016 +        ret = (*vfsp->vfs_op->vfs_unmount)(vfsp, flag, cr);
     1017 +
     1018 +        /* Execute post hooks */
     1019 +        while ((fshe = list_remove_tail(&exec_list)) != NULL) {
     1020 +                if (fshe->fshe_fshi->fshi_hooks.post_unmount != NULL)
     1021 +                        ret = (*fshe->fshe_fshi->fshi_hooks.post_unmount)(
     1022 +                            ret, fshe->fshe_fshi->fshi_hooks.arg,
     1023 +                            fshe->fshe_instance,
     1024 +                            vfsp, flag, cr);
     1025 +                fshi_rele(fshe->fshe_fshi);
     1026 +                kmem_free(fshe, sizeof (*fshe));
     1027 +        }
     1028 +        list_destroy(&exec_list);
     1029 +
 846 1030          return (ret);
 847 1031  }
 848 1032  
 849 1033  /*
 850      - * This is the funtion used by fsh_prepare_fsrec() to allocate a new
     1034 + * This is the funtion used by fsh_fsrec_prepare() to allocate a new
 851 1035   * fsh_fsrecord. This function is called by the first function which
 852 1036   * access the vfs_fshrecord and finds out it's NULL.
 853 1037   */
 854 1038  static fsh_fsrecord_t *
 855 1039  fsh_fsrec_create()
 856 1040  {
 857 1041          fsh_fsrecord_t *fsrecp;
 858 1042  
 859 1043          fsrecp = (fsh_fsrecord_t *)kmem_zalloc(sizeof (*fsrecp), KM_SLEEP);
 860 1044          list_create(&fsrecp->fshfsr_list, sizeof (fsh_int_t),
 861      -            offsetof(fsh_int_t, fshi_next));
     1045 +            offsetof(fsh_int_t, fshi_node));
 862 1046          rw_init(&fsrecp->fshfsr_lock, NULL, RW_DRIVER, NULL);
 863 1047          fsrecp->fshfsr_enabled = 1;
 864 1048          return (fsrecp);
 865 1049  }
 866 1050  
 867 1051  
 868 1052  /*
 869      - * This call can be used ONLY in vfs_free(). It's assumed that no other
 870      - * fsh calls using the vfs_t that owns the fsh_fsrecord to be destroyed
 871      - * are executing while a call to fsh_fsrec_destroy() is made. With this
 872      - * assumptions, no concurrency issues occur.
     1053 + * This call must be used ONLY in vfs_free().
 873 1054   *
 874      - * Before calling this function outside the fsh, it's sufficient and
 875      - * required to check if the passed fsh_fsrecord * is not NULL. We don't
 876      - * have to check if it is not equal to fsh_res_ptr, because all the fsh API
 877      - * calls involving this vfs_t should end before vfs_free() is called
 878      - * (outside the fsh, fsh_fsrecord is never equal to fsh_res_ptr). That is
 879      - * guaranteed by the explicit requirement that the caller of fsh API holds
 880      - * the vfs_t when needed.
     1055 + * It is required and sufficient to check if fsh_fsrecord_t is not NULL before
     1056 + * passing it to fsh_fsrec_destroy.
 881 1057   *
 882      - * All the remaining hooks are being removed.
     1058 + * All the remaining hooks are being removed here.
 883 1059   */
 884 1060  void
 885 1061  fsh_fsrec_destroy(struct fsh_fsrecord *volatile fsrecp)
 886 1062  {
 887 1063          fsh_int_t *fshi;
 888 1064  
 889 1065          VERIFY(fsrecp != NULL);
 890 1066  
 891 1067          _NOTE(CONSTCOND)
 892 1068          while (1) {
 893 1069                  mutex_enter(&fsh_lock);
 894      -                /* No need here to hold fshfsr_lock */
     1070 +                rw_enter(&fsrecp->fshfsr_lock, RW_WRITER);
 895 1071                  fshi = list_remove_head(&fsrecp->fshfsr_list);
     1072 +                rw_exit(&fsrecp->fshfsr_lock);
 896 1073                  if (fshi == NULL) {
 897 1074                          mutex_exit(&fsh_lock);
 898 1075                          break;
 899 1076                  }
 900 1077                  ASSERT(fshi->fshi_doomed == 0);
 901 1078                  list_remove(&fsh_map, fshi);
 902 1079                  mutex_exit(&fsh_lock);
 903 1080  
 904 1081                  if (fshi->fshi_hooks.remove_cb != NULL)
 905 1082                          (*fshi->fshi_hooks.remove_cb)(fshi->fshi_hooks.arg,
 906 1083                              fshi->fshi_handle);
     1084 +
 907 1085                  id_free(fsh_idspace, fshi->fshi_handle);
 908 1086                  mutex_destroy(&fshi->fshi_lock);
 909 1087                  kmem_free(fshi, sizeof (*fshi));
 910 1088  
 911 1089          }
 912 1090  
 913 1091          list_destroy(&fsrecp->fshfsr_list);
 914 1092          rw_destroy(&fsrecp->fshfsr_lock);
 915 1093          kmem_free(fsrecp, sizeof (*fsrecp));
 916 1094  }
 917 1095  
 918 1096  /*
 919 1097   * fsh_init() is called in vfsinit()@vfs.c. This function MUST be called
 920 1098   * before every other fsh call.
 921 1099   */
 922 1100  void
 923 1101  fsh_init(void)
 924 1102  {
 925      -        rw_init(&fsh_cblist_lock, NULL, RW_DRIVER, NULL);
     1103 +        mutex_init(&fsh_cb_lock, NULL, MUTEX_DRIVER, NULL);
     1104 +        mutex_init(&fsh_cb_owner_lock, NULL, MUTEX_DRIVER, NULL);
 926 1105          list_create(&fsh_cblist, sizeof (fsh_callback_int_t),
 927      -            offsetof(fsh_callback_int_t, fshci_next));
     1106 +            offsetof(fsh_callback_int_t, fshci_node));
 928 1107  
 929 1108          mutex_init(&fsh_lock, NULL, MUTEX_DRIVER, NULL);
 930 1109  
 931 1110          list_create(&fsh_map, sizeof (fsh_int_t), offsetof(fsh_int_t,
 932 1111              fshi_global));
 933 1112  
 934      -        /* See comment above fsh_prepare_fsrec() */
     1113 +        /* See comment above fsh_fsrec_prepare() */
 935 1114          fsh_res_ptr = (void *)-1;
 936 1115  
 937 1116          fsh_idspace = id_space_create("fsh", 0, fsh_limit);
 938      -}
 939      -
 940      -/*
 941      - * These functions are used to pass control to the next hook or underlying
 942      - * vop or vfsop. It's client doesn't have to worry about any locking.
 943      - */
 944      -int
 945      -fsh_next_read(fsh_int_t *fshi, vnode_t *vp, uio_t *uiop, int ioflag,
 946      -        cred_t *cr, caller_context_t *ct)
 947      -{
 948      -        int ret;
 949      -        fsh_fsrecord_t *fsrecp = vp->v_vfsp->vfs_fshrecord;
 950      -
 951      -        /*
 952      -         * The passed fshi is the previous hook (the one from which we've been
 953      -         * called). We need to find the next one.
 954      -         */
 955      -        rw_enter(&fsrecp->fshfsr_lock, RW_READER);
 956      -        for (fshi = list_next(&fsrecp->fshfsr_list, fshi); fshi != NULL;
 957      -            fshi = list_next(&fsrecp->fshfsr_list, fshi)) {
 958      -                if (fshi->fshi_hooks.read != NULL)
 959      -                        if (fshi_hold(fshi))
 960      -                                break;
 961      -        }
 962      -        rw_exit(&fsrecp->fshfsr_lock);
 963      -
 964      -        if (fshi == NULL)
 965      -                return ((*vp->v_op->vop_read)(vp, uiop, ioflag, cr, ct));
 966      -
 967      -        ret = (*fshi->fshi_hooks.read)(fshi, fshi->fshi_hooks.arg,
 968      -            vp, uiop, ioflag, cr, ct);
 969      -        fshi_rele(fshi);
 970      -        return (ret);
 971      -}
 972      -
 973      -int
 974      -fsh_next_write(fsh_int_t *fshi, vnode_t *vp, uio_t *uiop, int ioflag,
 975      -        cred_t *cr, caller_context_t *ct)
 976      -{
 977      -        fsh_fsrecord_t *fsrecp = vp->v_vfsp->vfs_fshrecord;
 978      -        int ret;
 979      -
 980      -        /*
 981      -         * The passed fshi is the previous hook (the one from which we've been
 982      -         * called). We need to find the next one.
 983      -         */
 984      -        rw_enter(&fsrecp->fshfsr_lock, RW_READER);
 985      -        for (fshi = list_next(&fsrecp->fshfsr_list, fshi); fshi != NULL;
 986      -            fshi = list_next(&fsrecp->fshfsr_list, fshi)) {
 987      -                if (fshi->fshi_hooks.write != NULL)
 988      -                        if (fshi_hold(fshi))
 989      -                                break;
 990      -        }
 991      -        rw_exit(&fsrecp->fshfsr_lock);
 992      -
 993      -        if (fshi == NULL)
 994      -                return ((*vp->v_op->vop_write)(vp, uiop, ioflag, cr, ct));
 995      -
 996      -        ret = (*fshi->fshi_hooks.write)(fshi, fshi->fshi_hooks.arg,
 997      -            vp, uiop, ioflag, cr, ct);
 998      -        fshi_rele(fshi);
 999      -        return (ret);
1000      -}
1001      -
1002      -int
1003      -fsh_next_mount(fsh_int_t *fshi, vfs_t *vfsp, vnode_t *mvp, struct mounta *uap,
1004      -        cred_t *cr)
1005      -{
1006      -        fsh_fsrecord_t *fsrecp = vfsp->vfs_fshrecord;
1007      -        int ret;
1008      -
1009      -        /*
1010      -         * The passed fshi is the previous hook (the one from which we've been
1011      -         * called). We need to find the next one.
1012      -         */
1013      -        rw_enter(&fsrecp->fshfsr_lock, RW_READER);
1014      -        for (fshi = list_next(&fsrecp->fshfsr_list, fshi); fshi != NULL;
1015      -            fshi = list_next(&fsrecp->fshfsr_list, fshi)) {
1016      -                if (fshi->fshi_hooks.mount != NULL)
1017      -                        if (fshi_hold(fshi))
1018      -                                break;
1019      -        }
1020      -        rw_exit(&fsrecp->fshfsr_lock);
1021      -
1022      -        if (fshi == NULL)
1023      -                return ((*(vfsp->vfs_op->vfs_mount))(vfsp, mvp, uap, cr));
1024      -
1025      -        ret = (*fshi->fshi_hooks.mount)(fshi, fshi->fshi_hooks.arg,
1026      -            vfsp, mvp, uap, cr);
1027      -        fshi_rele(fshi);
1028      -        return (ret);
1029      -}
1030      -
1031      -int
1032      -fsh_next_unmount(fsh_int_t *fshi, vfs_t *vfsp, int flag, cred_t *cr)
1033      -{
1034      -        fsh_fsrecord_t *fsrecp = vfsp->vfs_fshrecord;
1035      -        int ret;
1036      -
1037      -        /*
1038      -         * The passed fshi is the previous hook (the one from which we've been
1039      -         * called). We need to find the next one.
1040      -         */
1041      -        rw_enter(&fsrecp->fshfsr_lock, RW_READER);
1042      -        for (fshi = list_next(&fsrecp->fshfsr_list, fshi); fshi != NULL;
1043      -            fshi = list_next(&fsrecp->fshfsr_list, fshi)) {
1044      -                if (fshi->fshi_hooks.unmount != NULL)
1045      -                        if (fshi_hold(fshi))
1046      -                                break;
1047      -        }
1048      -        rw_exit(&fsrecp->fshfsr_lock);
1049      -
1050      -        if (fshi == NULL)
1051      -                return ((*vfsp->vfs_op->vfs_unmount)(vfsp, flag, cr));
1052      -
1053      -        ret = (*fshi->fshi_hooks.unmount)(fshi, fshi->fshi_hooks.arg,
1054      -            vfsp, flag, cr);
1055      -        fshi_rele(fshi);
1056      -        return (ret);
1057 1117  }
    
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX